instar 1.2.56 → 1.2.58
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/skills/autonomous/SKILL.md +11 -2
- package/.claude/skills/autonomous/hooks/autonomous-stop-hook.sh +42 -2
- package/.claude/skills/autonomous/scripts/setup-autonomous.sh +6 -0
- package/dist/commands/server.d.ts.map +1 -1
- package/dist/commands/server.js +31 -2
- package/dist/commands/server.js.map +1 -1
- package/dist/core/CompletionEvaluator.d.ts +46 -0
- package/dist/core/CompletionEvaluator.d.ts.map +1 -0
- package/dist/core/CompletionEvaluator.js +86 -0
- package/dist/core/CompletionEvaluator.js.map +1 -0
- package/dist/core/PostUpdateMigrator.d.ts.map +1 -1
- package/dist/core/PostUpdateMigrator.js +5 -3
- package/dist/core/PostUpdateMigrator.js.map +1 -1
- package/dist/core/SessionManager.d.ts +6 -0
- package/dist/core/SessionManager.d.ts.map +1 -1
- package/dist/core/SessionManager.js +10 -0
- package/dist/core/SessionManager.js.map +1 -1
- package/dist/core/frameworkSessionLaunch.d.ts +34 -3
- package/dist/core/frameworkSessionLaunch.d.ts.map +1 -1
- package/dist/core/frameworkSessionLaunch.js +42 -3
- package/dist/core/frameworkSessionLaunch.js.map +1 -1
- package/dist/core/types.d.ts +12 -0
- package/dist/core/types.d.ts.map +1 -1
- package/dist/core/types.js.map +1 -1
- package/dist/server/AgentServer.d.ts +1 -0
- package/dist/server/AgentServer.d.ts.map +1 -1
- package/dist/server/AgentServer.js +1 -0
- package/dist/server/AgentServer.js.map +1 -1
- package/dist/server/CapabilityIndex.d.ts.map +1 -1
- package/dist/server/CapabilityIndex.js +1 -0
- package/dist/server/CapabilityIndex.js.map +1 -1
- package/dist/server/routes.d.ts +2 -0
- package/dist/server/routes.d.ts.map +1 -1
- package/dist/server/routes.js +21 -0
- package/dist/server/routes.js.map +1 -1
- package/dist/threadline/PipeSessionSpawner.d.ts +10 -0
- package/dist/threadline/PipeSessionSpawner.d.ts.map +1 -1
- package/dist/threadline/PipeSessionSpawner.js +6 -0
- package/dist/threadline/PipeSessionSpawner.js.map +1 -1
- package/dist/threadline/ThreadlineBootstrap.d.ts.map +1 -1
- package/dist/threadline/ThreadlineBootstrap.js +5 -16
- package/dist/threadline/ThreadlineBootstrap.js.map +1 -1
- package/dist/threadline/mcpEntry.d.ts +25 -0
- package/dist/threadline/mcpEntry.d.ts.map +1 -0
- package/dist/threadline/mcpEntry.js +38 -0
- package/dist/threadline/mcpEntry.js.map +1 -0
- package/package.json +1 -1
- package/src/data/builtin-manifest.json +64 -64
- package/upgrades/1.2.57.md +64 -0
- package/upgrades/1.2.58.md +77 -0
- package/upgrades/side-effects/codex-multiagent-threadline.md +69 -0
- package/upgrades/side-effects/goal-completion-evaluator.md +69 -0
|
@@ -162,7 +162,16 @@ The stop hook will catch every attempt to exit and feed your task list back. Eac
|
|
|
162
162
|
|
|
163
163
|
## Step 4: Completion
|
|
164
164
|
|
|
165
|
-
|
|
165
|
+
**Preferred: a verifiable completion CONDITION (independent judge, like /goal).**
|
|
166
|
+
Pass `--completion-condition "<measurable end-state>"` when starting (e.g. "all tests in
|
|
167
|
+
test/auth pass and `npm test` exits 0"). Each turn, an INDEPENDENT model judges the condition
|
|
168
|
+
against what you've SURFACED in the conversation — so *run the real checks and show the
|
|
169
|
+
evidence in your output*. When the judge confirms it, the hook exits automatically. You do not
|
|
170
|
+
self-declare done. If the judge can't be reached, the run keeps going (fail-safe). This mirrors
|
|
171
|
+
the framework `/goal` feature and is harder to fool than a self-declared promise.
|
|
172
|
+
|
|
173
|
+
**Legacy fallback: self-declared promise.** If no condition is set, when ALL tasks are
|
|
174
|
+
genuinely done:
|
|
166
175
|
|
|
167
176
|
1. Verify every task is complete (re-read the list)
|
|
168
177
|
2. Run `npx tsc --noEmit` — zero errors
|
|
@@ -171,7 +180,7 @@ When ALL tasks are genuinely done:
|
|
|
171
180
|
5. Send final report via messaging
|
|
172
181
|
6. Output: `<promise>ALL_TASKS_COMPLETE</promise>`
|
|
173
182
|
|
|
174
|
-
The stop hook
|
|
183
|
+
The stop hook detects the promise and allows exit.
|
|
175
184
|
|
|
176
185
|
---
|
|
177
186
|
|
|
@@ -145,6 +145,7 @@ ITERATION=$(fm_get iteration)
|
|
|
145
145
|
DURATION_SECONDS=$(fm_get duration_seconds)
|
|
146
146
|
STARTED_AT=$(fm_get started_at)
|
|
147
147
|
COMPLETION_PROMISE=$(fm_get completion_promise)
|
|
148
|
+
COMPLETION_CONDITION=$(fm_get completion_condition)
|
|
148
149
|
|
|
149
150
|
# Validate recorded session_id is a real UUID. Claude sometimes writes a custom
|
|
150
151
|
# string instead of $CLAUDE_CODE_SESSION_ID; non-UUID values are treated as
|
|
@@ -267,7 +268,37 @@ if [[ -f ".instar/autonomous-emergency-stop" ]]; then
|
|
|
267
268
|
exit 0
|
|
268
269
|
fi
|
|
269
270
|
|
|
270
|
-
# Completion
|
|
271
|
+
# Completion CONDITION — independent evaluator (mirrors /goal). Authoritative when
|
|
272
|
+
# set; the self-declared promise below is the legacy fallback. FAIL-SAFE: if the
|
|
273
|
+
# evaluator is unreachable or unsure, we keep working — never a false "done".
|
|
274
|
+
EVAL_REASON=""
|
|
275
|
+
if [[ -n "$COMPLETION_CONDITION" ]] && [[ -n "$TRANSCRIPT_PATH" ]] && [[ -f "$TRANSCRIPT_PATH" ]]; then
|
|
276
|
+
EVAL_MET=""
|
|
277
|
+
if [[ -n "${INSTAR_HOOK_EVAL_OVERRIDE:-}" ]]; then
|
|
278
|
+
# Test seam: "met" | "not-met" short-circuits the live evaluator call.
|
|
279
|
+
[[ "$INSTAR_HOOK_EVAL_OVERRIDE" == "met" ]] && EVAL_MET="true"
|
|
280
|
+
EVAL_REASON="override:$INSTAR_HOOK_EVAL_OVERRIDE"
|
|
281
|
+
else
|
|
282
|
+
EVAL_TAIL=$(grep '"role":"assistant"' "$TRANSCRIPT_PATH" 2>/dev/null | tail -6 \
|
|
283
|
+
| jq -r '.message.content | map(select(.type=="text")) | map(.text) | join("\n")' 2>/dev/null \
|
|
284
|
+
| tail -c 8000 || echo "")
|
|
285
|
+
EVAL_PORT=$(python3 -c "import json;print(json.load(open('.instar/config.json')).get('port',4040))" 2>/dev/null || echo 4040)
|
|
286
|
+
EVAL_AUTH=$(python3 -c "import json;print(json.load(open('.instar/config.json')).get('authToken',''))" 2>/dev/null || echo "")
|
|
287
|
+
EVAL_RESP=$(jq -nc --arg c "$COMPLETION_CONDITION" --arg t "$EVAL_TAIL" '{condition:$c,transcriptTail:$t}' \
|
|
288
|
+
| curl -s -m 35 -H "Authorization: Bearer $EVAL_AUTH" -H 'Content-Type: application/json' \
|
|
289
|
+
--data-binary @- "http://localhost:${EVAL_PORT}/autonomous/evaluate-completion" 2>/dev/null || echo "")
|
|
290
|
+
EVAL_MET=$(printf '%s' "$EVAL_RESP" | jq -r '.met // empty' 2>/dev/null || echo "")
|
|
291
|
+
EVAL_REASON=$(printf '%s' "$EVAL_RESP" | jq -r '.reason // empty' 2>/dev/null || echo "")
|
|
292
|
+
fi
|
|
293
|
+
if [[ "$EVAL_MET" == "true" ]]; then
|
|
294
|
+
echo "✅ Autonomous mode: completion condition met (independent evaluator): ${EVAL_REASON}"
|
|
295
|
+
rm -f "$STATE_FILE"
|
|
296
|
+
exit 0
|
|
297
|
+
fi
|
|
298
|
+
# Not met / unreachable → keep working; EVAL_REASON (if any) becomes next-turn guidance.
|
|
299
|
+
fi
|
|
300
|
+
|
|
301
|
+
# Completion promise (genuine completion — legacy/self-declared fallback)
|
|
271
302
|
if [[ -n "$TRANSCRIPT_PATH" ]] && [[ -f "$TRANSCRIPT_PATH" ]]; then
|
|
272
303
|
LAST_LINE=$(grep '"role":"assistant"' "$TRANSCRIPT_PATH" 2>/dev/null | tail -1 || echo "")
|
|
273
304
|
if [[ -n "$LAST_LINE" ]]; then
|
|
@@ -393,7 +424,16 @@ else
|
|
|
393
424
|
TIME_MSG="no time limit"
|
|
394
425
|
fi
|
|
395
426
|
|
|
396
|
-
|
|
427
|
+
# When a completion CONDITION is set, an independent judge decides "done" — steer
|
|
428
|
+
# toward the condition + feed back the judge's latest reason (mirrors /goal). When
|
|
429
|
+
# only a legacy promise is set, keep the self-declared-promise directive.
|
|
430
|
+
if [[ -n "$COMPLETION_CONDITION" ]]; then
|
|
431
|
+
GUIDANCE=""
|
|
432
|
+
[[ -n "$EVAL_REASON" ]] && GUIDANCE=" | Not done yet: ${EVAL_REASON}"
|
|
433
|
+
SYSTEM_MSG="🔄 Autonomous iteration $NEXT_ITERATION ($TIME_MSG) | Keep working until this is TRUE: ${COMPLETION_CONDITION}${GUIDANCE} | An independent check decides done from what you SURFACE — run the real checks and show the evidence. Do NOT defer — do it now${REPORT_DIRECTIVE}"
|
|
434
|
+
else
|
|
435
|
+
SYSTEM_MSG="🔄 Autonomous iteration $NEXT_ITERATION ($TIME_MSG) | Complete ALL tasks, then output <promise>$COMPLETION_PROMISE</promise> | Do NOT defer to future self — if you can do it now, DO IT NOW${REPORT_DIRECTIVE}"
|
|
436
|
+
fi
|
|
397
437
|
|
|
398
438
|
# Block exit and feed prompt back
|
|
399
439
|
jq -n \
|
|
@@ -14,6 +14,7 @@ REPORT_CHANNEL="telegram" # channel that owns this job; recovery note routes h
|
|
|
14
14
|
LEVEL_UP="false"
|
|
15
15
|
TASKS=""
|
|
16
16
|
COMPLETION_PROMISE=""
|
|
17
|
+
COMPLETION_CONDITION="" # verifiable end-state; an independent judge decides "done" (mirrors /goal). Preferred over the self-declared promise.
|
|
17
18
|
REPORT_INTERVAL="30m"
|
|
18
19
|
|
|
19
20
|
while [[ $# -gt 0 ]]; do
|
|
@@ -46,6 +47,10 @@ while [[ $# -gt 0 ]]; do
|
|
|
46
47
|
COMPLETION_PROMISE="$2"
|
|
47
48
|
shift 2
|
|
48
49
|
;;
|
|
50
|
+
--completion-condition)
|
|
51
|
+
COMPLETION_CONDITION="$2"
|
|
52
|
+
shift 2
|
|
53
|
+
;;
|
|
49
54
|
--report-interval)
|
|
50
55
|
REPORT_INTERVAL="$2"
|
|
51
56
|
shift 2
|
|
@@ -153,6 +158,7 @@ report_interval: "$REPORT_INTERVAL"
|
|
|
153
158
|
last_report_at: ""
|
|
154
159
|
level_up: $LEVEL_UP
|
|
155
160
|
completion_promise: "$COMPLETION_PROMISE"
|
|
161
|
+
completion_condition: "$COMPLETION_CONDITION"
|
|
156
162
|
---
|
|
157
163
|
|
|
158
164
|
# Autonomous Session
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"server.d.ts","sourceRoot":"","sources":["../../src/commands/server.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;
|
|
1
|
+
{"version":3,"file":"server.d.ts","sourceRoot":"","sources":["../../src/commands/server.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAsQH,UAAU,YAAY;IACpB,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,GAAG,CAAC,EAAE,MAAM,CAAC;IACb;2DACuD;IACvD,QAAQ,CAAC,EAAE,OAAO,CAAC;CACpB;AAiqDD,wBAAsB,WAAW,CAAC,OAAO,EAAE,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC,CA0zLtE;AAED,wBAAsB,UAAU,CAAC,OAAO,EAAE;IAAE,GAAG,CAAC,EAAE,MAAM,CAAA;CAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CAsDzE;AAED;;;;;;;;;;;GAWG;AACH,wBAAsB,aAAa,CAAC,OAAO,EAAE;IAAE,GAAG,CAAC,EAAE,MAAM,CAAA;CAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CAuD5E"}
|
package/dist/commands/server.js
CHANGED
|
@@ -91,6 +91,7 @@ import { pickupGitSyncMessages } from '../messaging/GitSyncTransport.js';
|
|
|
91
91
|
import { DeliveryRetryManager } from '../messaging/DeliveryRetryManager.js';
|
|
92
92
|
import { SpawnRequestManager } from '../messaging/SpawnRequestManager.js';
|
|
93
93
|
import { ThreadlineRouter } from '../threadline/ThreadlineRouter.js';
|
|
94
|
+
import { resolveThreadlineMcpEntry } from '../threadline/mcpEntry.js';
|
|
94
95
|
import { ThreadResumeMap } from '../threadline/ThreadResumeMap.js';
|
|
95
96
|
import { ListenerSessionManager } from '../threadline/ListenerSessionManager.js';
|
|
96
97
|
import { SystemReviewer } from '../monitoring/SystemReviewer.js';
|
|
@@ -2116,7 +2117,17 @@ export async function startServer(options) {
|
|
|
2116
2117
|
console.log(pc.yellow(` Git sync setup: ${err instanceof Error ? err.message : String(err)}`));
|
|
2117
2118
|
}
|
|
2118
2119
|
}
|
|
2119
|
-
|
|
2120
|
+
// Per-agent Codex threadline MCP override. Codex reads a SHARED
|
|
2121
|
+
// ~/.codex/config.toml whose [mcp_servers."threadline"] is last-writer-wins
|
|
2122
|
+
// across every codex agent on the machine — so a codex worker could load a
|
|
2123
|
+
// DIFFERENT agent's threadline identity and its threadline_send would be
|
|
2124
|
+
// misaddressed. Pin this agent's own entry per-spawn (the launch builders
|
|
2125
|
+
// emit `-c mcp_servers.threadline.*`). Only when threadline is configured;
|
|
2126
|
+
// ignored by non-codex launches. See CODEX-MULTIAGENT-THREADLINE-SPEC.
|
|
2127
|
+
const codexThreadlineMcp = config.threadline
|
|
2128
|
+
? resolveThreadlineMcpEntry(config.sessions.projectDir, config.stateDir, config.projectName)
|
|
2129
|
+
: undefined;
|
|
2130
|
+
const sessionManager = new SessionManager(codexThreadlineMcp ? { ...config.sessions, codexThreadlineMcp } : config.sessions, state);
|
|
2120
2131
|
// Input Guard is constructed later (after sharedIntelligence is available)
|
|
2121
2132
|
// so the topic coherence reviewer can route through the IntelligenceProvider
|
|
2122
2133
|
// abstraction instead of calling Anthropic directly.
|
|
@@ -5970,6 +5981,12 @@ export async function startServer(options) {
|
|
|
5970
5981
|
// §4.5: honor SpawnRequestManager's provenance tag so drain-spawned
|
|
5971
5982
|
// sessions are distinguishable from inline-spawned ones in logs/stream.
|
|
5972
5983
|
triggeredBy: opts?.triggeredBy ?? 'spawn-request',
|
|
5984
|
+
// This is the Threadline inbound-reply spawn: the worker must call
|
|
5985
|
+
// the threadline_send MCP tool to reply, which a codex worker can only
|
|
5986
|
+
// do under full bypass (codex cancels MCP calls in any sandbox). Jobs
|
|
5987
|
+
// do NOT set this and stay sandboxed. Bounded: Threadline only accepts
|
|
5988
|
+
// messages from trusted agents.
|
|
5989
|
+
codexAllowMcpTools: true,
|
|
5973
5990
|
});
|
|
5974
5991
|
return session.id;
|
|
5975
5992
|
},
|
|
@@ -6160,6 +6177,9 @@ export async function startServer(options) {
|
|
|
6160
6177
|
minIqsBand: pipeConfig?.minIqsBand ?? 70,
|
|
6161
6178
|
framework: pipeFramework,
|
|
6162
6179
|
binaryPath: pipeBinaryPath,
|
|
6180
|
+
// Same per-agent codex threadline MCP override as SessionManager, so a
|
|
6181
|
+
// codex pipe-reply worker uses THIS agent's threadline MCP.
|
|
6182
|
+
...(codexThreadlineMcp ? { codexThreadlineMcp } : {}),
|
|
6163
6183
|
});
|
|
6164
6184
|
console.log(pc.dim(` Pipe sessions: enabled (model: ${pipeConfig?.model ?? 'sonnet'}, max: ${pipeConfig?.maxConcurrent ?? 5})`));
|
|
6165
6185
|
}
|
|
@@ -6579,6 +6599,15 @@ export async function startServer(options) {
|
|
|
6579
6599
|
else {
|
|
6580
6600
|
console.log(pc.yellow(' Discovery evaluator: inactive (no IntelligenceProvider)'));
|
|
6581
6601
|
}
|
|
6602
|
+
// Independent autonomous-completion judge (mirrors /goal). Reuses the
|
|
6603
|
+
// framework-aware sharedIntelligence; falls back to the self-declared promise
|
|
6604
|
+
// when absent (the stop-hook handles that).
|
|
6605
|
+
let completionEvaluator;
|
|
6606
|
+
if (sharedIntelligence) {
|
|
6607
|
+
const { CompletionEvaluator } = await import('../core/CompletionEvaluator.js');
|
|
6608
|
+
completionEvaluator = new CompletionEvaluator({ intelligence: sharedIntelligence });
|
|
6609
|
+
console.log(pc.green(' Completion evaluator: active (independent /goal-style judge)'));
|
|
6610
|
+
}
|
|
6582
6611
|
// Register feature-discovery probe for self-knowledge tree (Phase 4: Agent Integration)
|
|
6583
6612
|
if (selfKnowledgeTree && featureRegistry) {
|
|
6584
6613
|
selfKnowledgeTree.probes.register('feature-discovery', async () => {
|
|
@@ -6911,7 +6940,7 @@ export async function startServer(options) {
|
|
|
6911
6940
|
},
|
|
6912
6941
|
});
|
|
6913
6942
|
}
|
|
6914
|
-
const server = new AgentServer({ config, sessionManager, state, scheduler, telegram, relationships, feedback, feedbackAnomalyDetector, dispatches, updateChecker, autoUpdater, autoDispatcher, quotaTracker, quotaManager, publisher, viewer, tunnel, evolution, watchdog, topicMemory, triageNurse, projectMapper, coherenceGate: scopeVerifier, contextHierarchy, canonicalState, operationGate, sentinel, adaptiveTrust, memoryMonitor, orphanReaper, coherenceMonitor, commitmentTracker, semanticMemory, activitySentinel, rateLimitSentinel, messageRouter, summarySentinel, spawnManager, systemReviewer, capabilityMapper, selfKnowledgeTree, coverageAuditor, topicResumeMap: _topicResumeMap ?? undefined, sessionRefresh: _sessionRefresh ?? undefined, autonomyManager, trustElevationTracker, autonomousEvolution, coordinator: coordinator.enabled ? coordinator : undefined, localSigningKeyPem, whatsapp: whatsappAdapter, slack: slackAdapter, imessage: imessageAdapter, whatsappBusinessBackend, messageBridge, hookEventReceiver, worktreeMonitor, subagentTracker, instructionsVerifier, handshakeManager: threadlineHandshake, threadlineRouter, threadResumeMap, topicLinkageHandler: topicLinkageHandler ?? undefined, threadlineRelayClient, threadlineReplyWaiters, listenerManager: listenerManager ?? undefined, responseReviewGate, messagingToneGate, outboundDedupGate, telemetryHeartbeat, pasteManager, featureRegistry, discoveryEvaluator, unifiedTrust, liveConfig, sharedStateLedger, ledgerSessionRegistry, worktreeManager, oidcEnrolledRepos: parallelDevConfig?.oidcEnrolledRepos, initiativeTracker, projectRoundRunner, projectDriftChecker, machineHeartbeat, proxyCoordinator, topicIntentStore, telegramBridgeConfig, telegramBridge: telegramBridge ?? undefined, threadlineObservability, workingMemory, taskFlowRegistry, threadlineFlowBridge });
|
|
6943
|
+
const server = new AgentServer({ config, sessionManager, state, scheduler, telegram, relationships, feedback, feedbackAnomalyDetector, dispatches, updateChecker, autoUpdater, autoDispatcher, quotaTracker, quotaManager, publisher, viewer, tunnel, evolution, watchdog, topicMemory, triageNurse, projectMapper, coherenceGate: scopeVerifier, contextHierarchy, canonicalState, operationGate, sentinel, adaptiveTrust, memoryMonitor, orphanReaper, coherenceMonitor, commitmentTracker, semanticMemory, activitySentinel, rateLimitSentinel, messageRouter, summarySentinel, spawnManager, systemReviewer, capabilityMapper, selfKnowledgeTree, coverageAuditor, topicResumeMap: _topicResumeMap ?? undefined, sessionRefresh: _sessionRefresh ?? undefined, autonomyManager, trustElevationTracker, autonomousEvolution, coordinator: coordinator.enabled ? coordinator : undefined, localSigningKeyPem, whatsapp: whatsappAdapter, slack: slackAdapter, imessage: imessageAdapter, whatsappBusinessBackend, messageBridge, hookEventReceiver, worktreeMonitor, subagentTracker, instructionsVerifier, handshakeManager: threadlineHandshake, threadlineRouter, threadResumeMap, topicLinkageHandler: topicLinkageHandler ?? undefined, threadlineRelayClient, threadlineReplyWaiters, listenerManager: listenerManager ?? undefined, responseReviewGate, messagingToneGate, outboundDedupGate, telemetryHeartbeat, pasteManager, featureRegistry, discoveryEvaluator, completionEvaluator, unifiedTrust, liveConfig, sharedStateLedger, ledgerSessionRegistry, worktreeManager, oidcEnrolledRepos: parallelDevConfig?.oidcEnrolledRepos, initiativeTracker, projectRoundRunner, projectDriftChecker, machineHeartbeat, proxyCoordinator, topicIntentStore, telegramBridgeConfig, telegramBridge: telegramBridge ?? undefined, threadlineObservability, workingMemory, taskFlowRegistry, threadlineFlowBridge });
|
|
6915
6944
|
// Boot-recovery (tunnel-failure-resilience spec Part 6): if the agent
|
|
6916
6945
|
// died mid-relay-episode, the persisted tunnel.json carries
|
|
6917
6946
|
// rotationPending=true. Rotate the dashboard PIN + authToken BEFORE
|