npm - instar - Versions diffs - 1.2.56 → 1.2.58 - Mend

instar 1.2.56 → 1.2.58

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

package/.claude/skills/autonomous/SKILL.md +11 -2
package/.claude/skills/autonomous/hooks/autonomous-stop-hook.sh +42 -2
package/.claude/skills/autonomous/scripts/setup-autonomous.sh +6 -0
package/dist/commands/server.d.ts.map +1 -1
package/dist/commands/server.js +31 -2
package/dist/commands/server.js.map +1 -1
package/dist/core/CompletionEvaluator.d.ts +46 -0
package/dist/core/CompletionEvaluator.d.ts.map +1 -0
package/dist/core/CompletionEvaluator.js +86 -0
package/dist/core/CompletionEvaluator.js.map +1 -0
package/dist/core/PostUpdateMigrator.d.ts.map +1 -1
package/dist/core/PostUpdateMigrator.js +5 -3
package/dist/core/PostUpdateMigrator.js.map +1 -1
package/dist/core/SessionManager.d.ts +6 -0
package/dist/core/SessionManager.d.ts.map +1 -1
package/dist/core/SessionManager.js +10 -0
package/dist/core/SessionManager.js.map +1 -1
package/dist/core/frameworkSessionLaunch.d.ts +34 -3
package/dist/core/frameworkSessionLaunch.d.ts.map +1 -1
package/dist/core/frameworkSessionLaunch.js +42 -3
package/dist/core/frameworkSessionLaunch.js.map +1 -1
package/dist/core/types.d.ts +12 -0
package/dist/core/types.d.ts.map +1 -1
package/dist/core/types.js.map +1 -1
package/dist/server/AgentServer.d.ts +1 -0
package/dist/server/AgentServer.d.ts.map +1 -1
package/dist/server/AgentServer.js +1 -0
package/dist/server/AgentServer.js.map +1 -1
package/dist/server/CapabilityIndex.d.ts.map +1 -1
package/dist/server/CapabilityIndex.js +1 -0
package/dist/server/CapabilityIndex.js.map +1 -1
package/dist/server/routes.d.ts +2 -0
package/dist/server/routes.d.ts.map +1 -1
package/dist/server/routes.js +21 -0
package/dist/server/routes.js.map +1 -1
package/dist/threadline/PipeSessionSpawner.d.ts +10 -0
package/dist/threadline/PipeSessionSpawner.d.ts.map +1 -1
package/dist/threadline/PipeSessionSpawner.js +6 -0
package/dist/threadline/PipeSessionSpawner.js.map +1 -1
package/dist/threadline/ThreadlineBootstrap.d.ts.map +1 -1
package/dist/threadline/ThreadlineBootstrap.js +5 -16
package/dist/threadline/ThreadlineBootstrap.js.map +1 -1
package/dist/threadline/mcpEntry.d.ts +25 -0
package/dist/threadline/mcpEntry.d.ts.map +1 -0
package/dist/threadline/mcpEntry.js +38 -0
package/dist/threadline/mcpEntry.js.map +1 -0
package/package.json +1 -1
package/src/data/builtin-manifest.json +64 -64
package/upgrades/1.2.57.md +64 -0
package/upgrades/1.2.58.md +77 -0
package/upgrades/side-effects/codex-multiagent-threadline.md +69 -0
package/upgrades/side-effects/goal-completion-evaluator.md +69 -0

package/.claude/skills/autonomous/SKILL.md CHANGED Viewed

@@ -162,7 +162,16 @@ The stop hook will catch every attempt to exit and feed your task list back. Eac
 ## Step 4: Completion
-When ALL tasks are genuinely done:
+**Preferred: a verifiable completion CONDITION (independent judge, like /goal).**
+Pass `--completion-condition "<measurable end-state>"` when starting (e.g. "all tests in
+test/auth pass and `npm test` exits 0"). Each turn, an INDEPENDENT model judges the condition
+against what you've SURFACED in the conversation — so *run the real checks and show the
+evidence in your output*. When the judge confirms it, the hook exits automatically. You do not
+self-declare done. If the judge can't be reached, the run keeps going (fail-safe). This mirrors
+the framework `/goal` feature and is harder to fool than a self-declared promise.
+**Legacy fallback: self-declared promise.** If no condition is set, when ALL tasks are
+genuinely done:
 1. Verify every task is complete (re-read the list)
 2. Run `npx tsc --noEmit` — zero errors
@@ -171,7 +180,7 @@ When ALL tasks are genuinely done:
 5. Send final report via messaging
 6. Output: `<promise>ALL_TASKS_COMPLETE</promise>`
-The stop hook will detect the promise and allow exit.
+The stop hook detects the promise and allows exit.
 ---

package/.claude/skills/autonomous/hooks/autonomous-stop-hook.sh CHANGED Viewed

@@ -145,6 +145,7 @@ ITERATION=$(fm_get iteration)
 DURATION_SECONDS=$(fm_get duration_seconds)
 STARTED_AT=$(fm_get started_at)
 COMPLETION_PROMISE=$(fm_get completion_promise)
+COMPLETION_CONDITION=$(fm_get completion_condition)
 # Validate recorded session_id is a real UUID. Claude sometimes writes a custom
 # string instead of $CLAUDE_CODE_SESSION_ID; non-UUID values are treated as
@@ -267,7 +268,37 @@ if [[ -f ".instar/autonomous-emergency-stop" ]]; then
   exit 0
 fi
-# Completion promise (genuine completion)
+# Completion CONDITION — independent evaluator (mirrors /goal). Authoritative when
+# set; the self-declared promise below is the legacy fallback. FAIL-SAFE: if the
+# evaluator is unreachable or unsure, we keep working — never a false "done".
+EVAL_REASON=""
+if [[ -n "$COMPLETION_CONDITION" ]] && [[ -n "$TRANSCRIPT_PATH" ]] && [[ -f "$TRANSCRIPT_PATH" ]]; then
+  EVAL_MET=""
+  if [[ -n "${INSTAR_HOOK_EVAL_OVERRIDE:-}" ]]; then
+    # Test seam: "met" | "not-met" short-circuits the live evaluator call.
+    [[ "$INSTAR_HOOK_EVAL_OVERRIDE" == "met" ]] && EVAL_MET="true"
+    EVAL_REASON="override:$INSTAR_HOOK_EVAL_OVERRIDE"
+  else
+    EVAL_TAIL=$(grep '"role":"assistant"' "$TRANSCRIPT_PATH" 2>/dev/null | tail -6 \
+      | jq -r '.message.content | map(select(.type=="text")) | map(.text) | join("\n")' 2>/dev/null \
+      | tail -c 8000 || echo "")
+    EVAL_PORT=$(python3 -c "import json;print(json.load(open('.instar/config.json')).get('port',4040))" 2>/dev/null || echo 4040)
+    EVAL_AUTH=$(python3 -c "import json;print(json.load(open('.instar/config.json')).get('authToken',''))" 2>/dev/null || echo "")
+    EVAL_RESP=$(jq -nc --arg c "$COMPLETION_CONDITION" --arg t "$EVAL_TAIL" '{condition:$c,transcriptTail:$t}' \
+      | curl -s -m 35 -H "Authorization: Bearer $EVAL_AUTH" -H 'Content-Type: application/json' \
+        --data-binary @- "http://localhost:${EVAL_PORT}/autonomous/evaluate-completion" 2>/dev/null || echo "")
+    EVAL_MET=$(printf '%s' "$EVAL_RESP" | jq -r '.met // empty' 2>/dev/null || echo "")
+    EVAL_REASON=$(printf '%s' "$EVAL_RESP" | jq -r '.reason // empty' 2>/dev/null || echo "")
+  fi
+  if [[ "$EVAL_MET" == "true" ]]; then
+    echo "✅ Autonomous mode: completion condition met (independent evaluator): ${EVAL_REASON}"
+    rm -f "$STATE_FILE"
+    exit 0
+  fi
+  # Not met / unreachable → keep working; EVAL_REASON (if any) becomes next-turn guidance.
+fi
+# Completion promise (genuine completion — legacy/self-declared fallback)
 if [[ -n "$TRANSCRIPT_PATH" ]] && [[ -f "$TRANSCRIPT_PATH" ]]; then
   LAST_LINE=$(grep '"role":"assistant"' "$TRANSCRIPT_PATH" 2>/dev/null | tail -1 || echo "")
   if [[ -n "$LAST_LINE" ]]; then
@@ -393,7 +424,16 @@ else
   TIME_MSG="no time limit"
 fi
-SYSTEM_MSG="🔄 Autonomous iteration $NEXT_ITERATION ($TIME_MSG) | Complete ALL tasks, then output <promise>$COMPLETION_PROMISE</promise> | Do NOT defer to future self — if you can do it now, DO IT NOW${REPORT_DIRECTIVE}"
+# When a completion CONDITION is set, an independent judge decides "done" — steer
+# toward the condition + feed back the judge's latest reason (mirrors /goal). When
+# only a legacy promise is set, keep the self-declared-promise directive.
+if [[ -n "$COMPLETION_CONDITION" ]]; then
+  GUIDANCE=""
+  [[ -n "$EVAL_REASON" ]] && GUIDANCE=" | Not done yet: ${EVAL_REASON}"
+  SYSTEM_MSG="🔄 Autonomous iteration $NEXT_ITERATION ($TIME_MSG) | Keep working until this is TRUE: ${COMPLETION_CONDITION}${GUIDANCE} | An independent check decides done from what you SURFACE — run the real checks and show the evidence. Do NOT defer — do it now${REPORT_DIRECTIVE}"
+else
+  SYSTEM_MSG="🔄 Autonomous iteration $NEXT_ITERATION ($TIME_MSG) | Complete ALL tasks, then output <promise>$COMPLETION_PROMISE</promise> | Do NOT defer to future self — if you can do it now, DO IT NOW${REPORT_DIRECTIVE}"
+fi
 # Block exit and feed prompt back
 jq -n \

package/.claude/skills/autonomous/scripts/setup-autonomous.sh CHANGED Viewed

@@ -14,6 +14,7 @@ REPORT_CHANNEL="telegram"   # channel that owns this job; recovery note routes h
 LEVEL_UP="false"
 TASKS=""
 COMPLETION_PROMISE=""
+COMPLETION_CONDITION=""   # verifiable end-state; an independent judge decides "done" (mirrors /goal). Preferred over the self-declared promise.
 REPORT_INTERVAL="30m"
 while [[ $# -gt 0 ]]; do
@@ -46,6 +47,10 @@ while [[ $# -gt 0 ]]; do
       COMPLETION_PROMISE="$2"
       shift 2
       ;;
+    --completion-condition)
+      COMPLETION_CONDITION="$2"
+      shift 2
+      ;;
     --report-interval)
       REPORT_INTERVAL="$2"
       shift 2
@@ -153,6 +158,7 @@ report_interval: "$REPORT_INTERVAL"
 last_report_at: ""
 level_up: $LEVEL_UP
 completion_promise: "$COMPLETION_PROMISE"
+completion_condition: "$COMPLETION_CONDITION"
 ---
 # Autonomous Session

package/dist/commands/server.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"server.d.ts","sourceRoot":"","sources":["../../src/commands/server.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;~~AAqQH~~,UAAU,YAAY;IACpB,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,GAAG,CAAC,EAAE,MAAM,CAAC;IACb;2DACuD;IACvD,QAAQ,CAAC,EAAE,OAAO,CAAC;CACpB;AAiqDD,wBAAsB,WAAW,CAAC,OAAO,EAAE,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC,~~CA0xLtE~~;AAED,wBAAsB,UAAU,CAAC,OAAO,EAAE;IAAE,GAAG,CAAC,EAAE,MAAM,CAAA;CAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CAsDzE;AAED;;;;;;;;;;;GAWG;AACH,wBAAsB,aAAa,CAAC,OAAO,EAAE;IAAE,GAAG,CAAC,EAAE,MAAM,CAAA;CAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CAuD5E"}
1	+ {"version":3,"file":"server.d.ts","sourceRoot":"","sources":["../../src/commands/server.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAsQH,UAAU,YAAY;IACpB,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,GAAG,CAAC,EAAE,MAAM,CAAC;IACb;2DACuD;IACvD,QAAQ,CAAC,EAAE,OAAO,CAAC;CACpB;AAiqDD,wBAAsB,WAAW,CAAC,OAAO,EAAE,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC,CA0zLtE;AAED,wBAAsB,UAAU,CAAC,OAAO,EAAE;IAAE,GAAG,CAAC,EAAE,MAAM,CAAA;CAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CAsDzE;AAED;;;;;;;;;;;GAWG;AACH,wBAAsB,aAAa,CAAC,OAAO,EAAE;IAAE,GAAG,CAAC,EAAE,MAAM,CAAA;CAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CAuD5E"}

package/dist/commands/server.js CHANGED Viewed

@@ -91,6 +91,7 @@ import { pickupGitSyncMessages } from '../messaging/GitSyncTransport.js';
 import { DeliveryRetryManager } from '../messaging/DeliveryRetryManager.js';
 import { SpawnRequestManager } from '../messaging/SpawnRequestManager.js';
 import { ThreadlineRouter } from '../threadline/ThreadlineRouter.js';
+import { resolveThreadlineMcpEntry } from '../threadline/mcpEntry.js';
 import { ThreadResumeMap } from '../threadline/ThreadResumeMap.js';
 import { ListenerSessionManager } from '../threadline/ListenerSessionManager.js';
 import { SystemReviewer } from '../monitoring/SystemReviewer.js';
@@ -2116,7 +2117,17 @@ export async function startServer(options) {
                 console.log(pc.yellow(`  Git sync setup: ${err instanceof Error ? err.message : String(err)}`));
             }
         }
-        const sessionManager = new SessionManager(config.sessions, state);
+        // Per-agent Codex threadline MCP override. Codex reads a SHARED
+        // ~/.codex/config.toml whose [mcp_servers."threadline"] is last-writer-wins
+        // across every codex agent on the machine — so a codex worker could load a
+        // DIFFERENT agent's threadline identity and its threadline_send would be
+        // misaddressed. Pin this agent's own entry per-spawn (the launch builders
+        // emit `-c mcp_servers.threadline.*`). Only when threadline is configured;
+        // ignored by non-codex launches. See CODEX-MULTIAGENT-THREADLINE-SPEC.
+        const codexThreadlineMcp = config.threadline
+            ? resolveThreadlineMcpEntry(config.sessions.projectDir, config.stateDir, config.projectName)
+            : undefined;
+        const sessionManager = new SessionManager(codexThreadlineMcp ? { ...config.sessions, codexThreadlineMcp } : config.sessions, state);
         // Input Guard is constructed later (after sharedIntelligence is available)
         // so the topic coherence reviewer can route through the IntelligenceProvider
         // abstraction instead of calling Anthropic directly.
@@ -5970,6 +5981,12 @@ export async function startServer(options) {
                     // §4.5: honor SpawnRequestManager's provenance tag so drain-spawned
                     // sessions are distinguishable from inline-spawned ones in logs/stream.
                     triggeredBy: opts?.triggeredBy ?? 'spawn-request',
+                    // This is the Threadline inbound-reply spawn: the worker must call
+                    // the threadline_send MCP tool to reply, which a codex worker can only
+                    // do under full bypass (codex cancels MCP calls in any sandbox). Jobs
+                    // do NOT set this and stay sandboxed. Bounded: Threadline only accepts
+                    // messages from trusted agents.
+                    codexAllowMcpTools: true,
                 });
                 return session.id;
             },
@@ -6160,6 +6177,9 @@ export async function startServer(options) {
                     minIqsBand: pipeConfig?.minIqsBand ?? 70,
                     framework: pipeFramework,
                     binaryPath: pipeBinaryPath,
+                    // Same per-agent codex threadline MCP override as SessionManager, so a
+                    // codex pipe-reply worker uses THIS agent's threadline MCP.
+                    ...(codexThreadlineMcp ? { codexThreadlineMcp } : {}),
                 });
                 console.log(pc.dim(`  Pipe sessions: enabled (model: ${pipeConfig?.model ?? 'sonnet'}, max: ${pipeConfig?.maxConcurrent ?? 5})`));
             }
@@ -6579,6 +6599,15 @@ export async function startServer(options) {
         else {
             console.log(pc.yellow('  Discovery evaluator: inactive (no IntelligenceProvider)'));
         }
+        // Independent autonomous-completion judge (mirrors /goal). Reuses the
+        // framework-aware sharedIntelligence; falls back to the self-declared promise
+        // when absent (the stop-hook handles that).
+        let completionEvaluator;
+        if (sharedIntelligence) {
+            const { CompletionEvaluator } = await import('../core/CompletionEvaluator.js');
+            completionEvaluator = new CompletionEvaluator({ intelligence: sharedIntelligence });
+            console.log(pc.green('  Completion evaluator: active (independent /goal-style judge)'));
+        }
         // Register feature-discovery probe for self-knowledge tree (Phase 4: Agent Integration)
         if (selfKnowledgeTree && featureRegistry) {
             selfKnowledgeTree.probes.register('feature-discovery', async () => {
@@ -6911,7 +6940,7 @@ export async function startServer(options) {
                 },
             });
         }
-        const server = new AgentServer({ config, sessionManager, state, scheduler, telegram, relationships, feedback, feedbackAnomalyDetector, dispatches, updateChecker, autoUpdater, autoDispatcher, quotaTracker, quotaManager, publisher, viewer, tunnel, evolution, watchdog, topicMemory, triageNurse, projectMapper, coherenceGate: scopeVerifier, contextHierarchy, canonicalState, operationGate, sentinel, adaptiveTrust, memoryMonitor, orphanReaper, coherenceMonitor, commitmentTracker, semanticMemory, activitySentinel, rateLimitSentinel, messageRouter, summarySentinel, spawnManager, systemReviewer, capabilityMapper, selfKnowledgeTree, coverageAuditor, topicResumeMap: _topicResumeMap ?? undefined, sessionRefresh: _sessionRefresh ?? undefined, autonomyManager, trustElevationTracker, autonomousEvolution, coordinator: coordinator.enabled ? coordinator : undefined, localSigningKeyPem, whatsapp: whatsappAdapter, slack: slackAdapter, imessage: imessageAdapter, whatsappBusinessBackend, messageBridge, hookEventReceiver, worktreeMonitor, subagentTracker, instructionsVerifier, handshakeManager: threadlineHandshake, threadlineRouter, threadResumeMap, topicLinkageHandler: topicLinkageHandler ?? undefined, threadlineRelayClient, threadlineReplyWaiters, listenerManager: listenerManager ?? undefined, responseReviewGate, messagingToneGate, outboundDedupGate, telemetryHeartbeat, pasteManager, featureRegistry, discoveryEvaluator, unifiedTrust, liveConfig, sharedStateLedger, ledgerSessionRegistry, worktreeManager, oidcEnrolledRepos: parallelDevConfig?.oidcEnrolledRepos, initiativeTracker, projectRoundRunner, projectDriftChecker, machineHeartbeat, proxyCoordinator, topicIntentStore, telegramBridgeConfig, telegramBridge: telegramBridge ?? undefined, threadlineObservability, workingMemory, taskFlowRegistry, threadlineFlowBridge });
+        const server = new AgentServer({ config, sessionManager, state, scheduler, telegram, relationships, feedback, feedbackAnomalyDetector, dispatches, updateChecker, autoUpdater, autoDispatcher, quotaTracker, quotaManager, publisher, viewer, tunnel, evolution, watchdog, topicMemory, triageNurse, projectMapper, coherenceGate: scopeVerifier, contextHierarchy, canonicalState, operationGate, sentinel, adaptiveTrust, memoryMonitor, orphanReaper, coherenceMonitor, commitmentTracker, semanticMemory, activitySentinel, rateLimitSentinel, messageRouter, summarySentinel, spawnManager, systemReviewer, capabilityMapper, selfKnowledgeTree, coverageAuditor, topicResumeMap: _topicResumeMap ?? undefined, sessionRefresh: _sessionRefresh ?? undefined, autonomyManager, trustElevationTracker, autonomousEvolution, coordinator: coordinator.enabled ? coordinator : undefined, localSigningKeyPem, whatsapp: whatsappAdapter, slack: slackAdapter, imessage: imessageAdapter, whatsappBusinessBackend, messageBridge, hookEventReceiver, worktreeMonitor, subagentTracker, instructionsVerifier, handshakeManager: threadlineHandshake, threadlineRouter, threadResumeMap, topicLinkageHandler: topicLinkageHandler ?? undefined, threadlineRelayClient, threadlineReplyWaiters, listenerManager: listenerManager ?? undefined, responseReviewGate, messagingToneGate, outboundDedupGate, telemetryHeartbeat, pasteManager, featureRegistry, discoveryEvaluator, completionEvaluator, unifiedTrust, liveConfig, sharedStateLedger, ledgerSessionRegistry, worktreeManager, oidcEnrolledRepos: parallelDevConfig?.oidcEnrolledRepos, initiativeTracker, projectRoundRunner, projectDriftChecker, machineHeartbeat, proxyCoordinator, topicIntentStore, telegramBridgeConfig, telegramBridge: telegramBridge ?? undefined, threadlineObservability, workingMemory, taskFlowRegistry, threadlineFlowBridge });
         // Boot-recovery (tunnel-failure-resilience spec Part 6): if the agent
         // died mid-relay-episode, the persisted tunnel.json carries
         // rotationPending=true. Rotate the dashboard PIN + authToken BEFORE