@caupulican/pi-adaptative 0.80.30 → 0.80.34

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -102,7 +102,7 @@ const AUTO_LEARN_DEFAULTS = {
102
102
  maxConcurrentLearners: 1,
103
103
  applyHighConfidence: false,
104
104
  reflectionReview: true,
105
- reflectionMinToolCalls: 8,
105
+ reflectionMinToolCalls: 5,
106
106
  reflectionCooldownMinutes: 24 * 60,
107
107
  };
108
108
  const AUTONOMY_AUTO_LEARN_PRESETS = {
@@ -117,7 +117,7 @@ const AUTONOMY_AUTO_LEARN_PRESETS = {
117
117
  maxConcurrentLearners: 1,
118
118
  applyHighConfidence: false,
119
119
  reflectionReview: true,
120
- reflectionMinToolCalls: 8,
120
+ reflectionMinToolCalls: 5,
121
121
  reflectionCooldownMinutes: 24 * 60,
122
122
  },
123
123
  balanced: {
@@ -130,7 +130,7 @@ const AUTONOMY_AUTO_LEARN_PRESETS = {
130
130
  maxConcurrentLearners: 1,
131
131
  applyHighConfidence: false,
132
132
  reflectionReview: true,
133
- reflectionMinToolCalls: 8,
133
+ reflectionMinToolCalls: 5,
134
134
  reflectionCooldownMinutes: 24 * 60,
135
135
  },
136
136
  full: {
@@ -143,13 +143,14 @@ const AUTONOMY_AUTO_LEARN_PRESETS = {
143
143
  maxConcurrentLearners: 1,
144
144
  applyHighConfidence: true,
145
145
  reflectionReview: true,
146
- reflectionMinToolCalls: 8,
146
+ reflectionMinToolCalls: 5,
147
147
  reflectionCooldownMinutes: 24 * 60,
148
148
  },
149
149
  };
150
150
  const AUTONOMY_MODES = ["off", "safe", "balanced", "full"];
151
151
  const AUTO_LEARN_RESERVATION_MS = 2 * 60 * 1000;
152
152
  const AUTO_LEARN_THINKING_LEVEL = "xhigh";
153
+ const AUTO_LEARN_COMPLEX_TASK_TOOL_CALLS = 5;
153
154
  export const AUTO_LEARN_HISTORY_RETENTION_MS = 7 * 24 * 60 * 60 * 1000;
154
155
  function definedStringSet(values) {
155
156
  const set = new Set();
@@ -4020,16 +4021,16 @@ export class InteractiveMode {
4020
4021
  ? `\n\nLatest completed turn digest (bounded; use only as current-session evidence, not as longitudinal proof):\n<turn_digest>\n${options.turnDigest}\n</turn_digest>`
4021
4022
  : "";
4022
4023
  const objective = options.kind === "reflection"
4023
- ? "review the latest completed turn for durable memory, skill, validation, and tooling-improvement cues, then run one bounded continuous-learning pass if the learning tools are available"
4024
+ ? "review the latest completed turn for durable memory, skill, validation, tooling, and code-baked self-improvement cues, then run one bounded continuous-learning pass if the learning tools are available"
4024
4025
  : "run one bounded continuous-learning pass for this Pi tenant";
4025
- return `You are Pi Auto Learn running as a background learner.\n\nObjective: ${objective}.\nTrigger: ${reason}.\n\n${authorityBlock}\n\nRequired workflow:\n1. Query existing durable memory/rules first when tools allow it. Memory confrontation is mandatory before accepting, merging, upgrading, or rejecting learning candidates.\n2. Run the available Auto Learn tooling, preferably learning_run_auto, with applyHighConfidence=${settings.applyHighConfidence}. Process candidate validation in vectorized chunks/batches; avoid scalar per-candidate memory queries except for final selected writes.\n3. Apply the learning validation tree to each candidate chunk: (a) Why is this good for the user? (b) Is it unique, or similar to existing memory/skills/agents so it should merge or upgrade existing knowledge? (c) Will this make Pi a better agent? Candidates that cannot answer all three are noise.\n4. Treat the latest-turn digest as current-session evidence only; do not auto-commit one-off cues unless deterministic tooling and memory confrontation corroborate them.\n5. In mode=full, apply safe memory/skill/user-extension/authorized-source improvements under the standing grant above; otherwise keep them proposal-gated.\n6. Never cross hard-stop boundaries from the authority policy.\n7. If the learning tools are unavailable, report BLOCKED with the missing tool names and do not improvise.\n8. Finish with PASS, BLOCKED, or FAIL and concise evidence, including chunk counts, merge/upgrade decisions, and cleanup/purge status.${reflectionBlock}`;
4026
+ return `You are Pi Auto Learn running as a background learner.\n\nObjective: ${objective}.\nTrigger: ${reason}.\n\n${authorityBlock}\n\nRequired workflow:\n1. Query existing durable memory/rules first when tools allow it. Memory confrontation is mandatory before accepting, merging, upgrading, or rejecting learning candidates.\n2. Run the available Auto Learn tooling, preferably learning_run_auto, with applyHighConfidence=${settings.applyHighConfidence}. Process candidate validation in vectorized chunks/batches; avoid scalar per-candidate memory queries except for final selected writes.\n3. Apply the learning validation tree to each candidate chunk: (a) Why is this good for the user? (b) Is it unique, or similar to existing memory/skills/agents so it should merge or upgrade existing knowledge? (c) Will this make Pi a better agent? Candidates that cannot answer all three are noise.\n4. Hermes-style learning cycle: after a complex task (${AUTO_LEARN_COMPLEX_TASK_TOOL_CALLS}+ tool calls), user correction, repeated steering pattern, non-trivial fix/workaround/debugging path, loaded-skill defect, trigger gap, tool gap, or harness workflow defect, actively create or update durable learning artifacts. Memory stores compact facts/preferences/state; skills/prompts/agents/extensions/source store procedural behavior. When a lesson changes how Pi should act on a future class of task, memory alone is not completion.\n5. Skill update preference order: (1) patch the currently loaded or consulted skill that governed the task; (2) patch an existing class-level umbrella skill/agent/prompt; (3) add a support file under references/, templates/, or scripts/ and add a SKILL.md pointer; (4) create a new class-level umbrella skill only when no existing artifact fits. Never create one-off PR/error/codename/session skills.\n6. Behavioral self-improvement is code-baked by default: prefer the lowest durable executable layer that fixes the behavior — patch an existing skill/prompt/agent/extension/tool, tune an approved setting, or edit the authorized Pi source when source authority is available. Use Automata only for concise facts/evidence pointers that support the baked change.\n7. Do not harden transient or environment-dependent failures into durable behavior: missing binaries, fresh-install package gaps, credentials not configured, path mismatches, one-off task narratives, or negative tool-broken claims should become setup/troubleshooting fixes only when the fix itself is reusable.\n8. Treat the latest-turn digest as current-session evidence only; do not auto-commit one-off cues unless deterministic tooling and memory confrontation corroborate them.\n9. In mode=full, apply safe memory/skill/user-extension/authorized-source improvements under the standing grant above; otherwise keep them proposal-gated.\n10. Never cross hard-stop boundaries from the authority policy.\n11. If the learning tools are unavailable, report BLOCKED with the missing tool names and do not improvise.\n12. Finish with PASS, BLOCKED, or FAIL and concise evidence, including chunk counts, merge/upgrade/code-bake decisions, changed paths/settings, validation, and cleanup/purge status.${reflectionBlock}`;
4026
4027
  }
4027
4028
  reserveAutoLearnRun(params) {
4028
4029
  return this.withAutoLearnStateLock((current) => {
4029
4030
  const now = Date.now();
4030
4031
  const state = this.pruneAutoLearnHistoryFromState(current, now);
4031
4032
  const tenant = this.getAutoLearnTenantKey();
4032
- if (params.cooldownKind === "reflection") {
4033
+ if (params.cooldownKind === "reflection" && !params.bypassReflectionCooldown) {
4033
4034
  const lastReflection = state.lastReflectionByTenant?.[tenant] ?? 0;
4034
4035
  const cooldownMs = params.settings.reflectionCooldownMinutes * 60 * 1000;
4035
4036
  if (Math.max(0, lastReflection + cooldownMs - now) > 0) {
@@ -4168,6 +4169,7 @@ export class InteractiveMode {
4168
4169
  settings,
4169
4170
  force,
4170
4171
  cooldownKind: options.cooldownKind,
4172
+ bypassReflectionCooldown: options.bypassReflectionCooldown,
4171
4173
  runId,
4172
4174
  modelPattern,
4173
4175
  reason,
@@ -4332,6 +4334,10 @@ export class InteractiveMode {
4332
4334
  .map((message) => this.getAgentMessagePlainText(message))
4333
4335
  .join("\n");
4334
4336
  const correctionSignal = /\b(next time|for future|from now on|remember this|don't|do not|avoid|instead|you should|should have|you forgot|you missed|not what i asked|wrong again)\b/i.test(userText);
4337
+ const behavioralSelfImprovementSignal = /\b(harness|pi|agent|autonomy|autonomous|self[- ]?improv(?:e|ement|ing)?|steer(?:ing)?|trigger(?:s)?|skill(?:s)?|code[- ]?bak(?:e|ed)|bake(?:d)? into code|not (?:automata|memory)|reference agent|hermes)\b/i.test(userText) &&
4338
+ /\b(improve|automatic(?:ally)?|autonomous|trigger|fire|skill|steer|self[- ]?improv(?:e|ement|ing)?|code[- ]?bak(?:e|ed)|bake(?:d)?|too much|less)\b/i.test(userText);
4339
+ const complexTaskSignal = toolCalls >= AUTO_LEARN_COMPLEX_TASK_TOOL_CALLS;
4340
+ const bypassCooldown = correctionSignal || behavioralSelfImprovementSignal || complexTaskSignal;
4335
4341
  const base = { messageCount, contextPercent, cooldownRemainingMs, runningCount, toolCalls };
4336
4342
  if (!settings.enabled)
4337
4343
  return { ...base, shouldRun: false, reason: "disabled" };
@@ -4344,14 +4350,34 @@ export class InteractiveMode {
4344
4350
  reason: `max tenant learners running (${runningCount}/${settings.maxConcurrentLearners})`,
4345
4351
  };
4346
4352
  }
4347
- if (cooldownRemainingMs > 0)
4353
+ if (cooldownRemainingMs > 0 && !bypassCooldown) {
4348
4354
  return { ...base, shouldRun: false, reason: "reflection cooldown" };
4355
+ }
4356
+ if (behavioralSelfImprovementSignal) {
4357
+ return {
4358
+ ...base,
4359
+ shouldRun: true,
4360
+ reason: "reflection behavioral self-improvement signal",
4361
+ digest: this.buildAutonomyReviewDigest(messages),
4362
+ bypassCooldown: true,
4363
+ };
4364
+ }
4349
4365
  if (correctionSignal) {
4350
4366
  return {
4351
4367
  ...base,
4352
4368
  shouldRun: true,
4353
4369
  reason: "reflection correction signal",
4354
4370
  digest: this.buildAutonomyReviewDigest(messages),
4371
+ bypassCooldown: true,
4372
+ };
4373
+ }
4374
+ if (complexTaskSignal) {
4375
+ return {
4376
+ ...base,
4377
+ shouldRun: true,
4378
+ reason: `reflection complex task learning signal (${toolCalls}/${AUTO_LEARN_COMPLEX_TASK_TOOL_CALLS} tool calls)`,
4379
+ digest: this.buildAutonomyReviewDigest(messages),
4380
+ bypassCooldown: true,
4355
4381
  };
4356
4382
  }
4357
4383
  if (autonomy.mode === "full") {
@@ -4395,6 +4421,7 @@ export class InteractiveMode {
4395
4421
  cooldownKind: "reflection",
4396
4422
  promptKind: "reflection",
4397
4423
  turnDigest: decision.digest,
4424
+ bypassReflectionCooldown: decision.bypassCooldown,
4398
4425
  });
4399
4426
  if (!message.startsWith("Auto Learn started"))
4400
4427
  this.showStatus(message);