opensquid 0.5.441 → 0.5.447
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -0
- package/dist/functions/arm_scope.d.ts +27 -0
- package/dist/functions/arm_scope.d.ts.map +1 -0
- package/dist/functions/arm_scope.js +52 -0
- package/dist/functions/arm_scope.js.map +1 -0
- package/dist/functions/index.d.ts +1 -0
- package/dist/functions/index.d.ts.map +1 -1
- package/dist/functions/index.js +1 -0
- package/dist/functions/index.js.map +1 -1
- package/dist/runtime/bootstrap.d.ts.map +1 -1
- package/dist/runtime/bootstrap.js +2 -0
- package/dist/runtime/bootstrap.js.map +1 -1
- package/dist/runtime/handoff/render.d.ts +5 -4
- package/dist/runtime/handoff/render.d.ts.map +1 -1
- package/dist/runtime/handoff/render.js +7 -7
- package/dist/runtime/handoff/render.js.map +1 -1
- package/dist/runtime/hooks/active_task_mirror.js +0 -0
- package/dist/runtime/hooks/apply_patch.js +0 -0
- package/dist/runtime/hooks/dispatch.js +0 -0
- package/dist/runtime/hooks/hook_output.js +0 -0
- package/dist/runtime/hooks/memory_reconcile.js +0 -0
- package/dist/runtime/hooks/new_project_detect.js +0 -0
- package/dist/runtime/hooks/profession_resolver.js +0 -0
- package/dist/runtime/hooks/scope_intent.js +0 -0
- package/dist/runtime/hooks/session_id.js +0 -0
- package/dist/runtime/hooks/session_liveness.js +0 -0
- package/dist/runtime/hooks/stop_drive.js +0 -0
- package/dist/runtime/hooks/stop_stream.js +0 -0
- package/dist/runtime/hooks/subagent_guard.js +0 -0
- package/dist/runtime/hooks/transcript.js +0 -0
- package/dist/runtime/hooks/transcript_tasks.js +0 -0
- package/dist/runtime/ralph/orchestrator.d.ts.map +1 -1
- package/dist/runtime/ralph/orchestrator.js +2 -1
- package/dist/runtime/ralph/orchestrator.js.map +1 -1
- package/dist/setup/cli/limits_state.d.ts.map +1 -1
- package/dist/setup/cli/limits_state.js +6 -40
- package/dist/setup/cli/limits_state.js.map +1 -1
- package/dist/setup/cli/pack_walk.d.ts +32 -0
- package/dist/setup/cli/pack_walk.d.ts.map +1 -0
- package/dist/setup/cli/pack_walk.js +76 -0
- package/dist/setup/cli/pack_walk.js.map +1 -0
- package/dist/setup/cli/permissions_state.d.ts.map +1 -1
- package/dist/setup/cli/permissions_state.js +6 -37
- package/dist/setup/cli/permissions_state.js.map +1 -1
- package/dist/setup/cli/triggers_state.d.ts.map +1 -1
- package/dist/setup/cli/triggers_state.js +3 -29
- package/dist/setup/cli/triggers_state.js.map +1 -1
- package/dist/workgraph/events.d.ts.map +1 -1
- package/dist/workgraph/events.js +10 -0
- package/dist/workgraph/events.js.map +1 -1
- package/dist/workgraph/store.d.ts.map +1 -1
- package/dist/workgraph/store.js +5 -0
- package/dist/workgraph/store.js.map +1 -1
- package/dist/workgraph/types.d.ts +2 -1
- package/dist/workgraph/types.d.ts.map +1 -1
- package/docs/ARCHITECTURE.md +268 -0
- package/package.json +5 -3
- package/packs/builtin/coding-flow/skills/entry-and-handoffs/skill.yaml +13 -17
- package/dist/anti-drift/evaluator.d.ts +0 -88
- package/dist/anti-drift/evaluator.d.ts.map +0 -1
- package/dist/anti-drift/evaluator.js +0 -417
- package/dist/anti-drift/evaluator.js.map +0 -1
- package/dist/anti-drift/evaluator.test.js +0 -78
- package/dist/anti-drift/rules.d.ts +0 -80
- package/dist/anti-drift/rules.d.ts.map +0 -1
- package/dist/anti-drift/rules.js +0 -368
- package/dist/anti-drift/rules.js.map +0 -1
- package/dist/anti-drift/rules.test.js +0 -213
- package/dist/anti-drift/state.d.ts +0 -107
- package/dist/anti-drift/state.d.ts.map +0 -1
- package/dist/anti-drift/state.js +0 -177
- package/dist/anti-drift/state.js.map +0 -1
- package/dist/anti-drift/state.test.js +0 -120
- package/dist/chat/adapters/discord.d.ts +0 -41
- package/dist/chat/adapters/discord.d.ts.map +0 -1
- package/dist/chat/adapters/discord.js +0 -176
- package/dist/chat/adapters/discord.js.map +0 -1
- package/dist/chat/adapters/discord.test.js +0 -25
- package/dist/chat/adapters/slack.d.ts +0 -43
- package/dist/chat/adapters/slack.d.ts.map +0 -1
- package/dist/chat/adapters/slack.js +0 -172
- package/dist/chat/adapters/slack.js.map +0 -1
- package/dist/chat/adapters/slack.test.js +0 -30
- package/dist/chat/adapters/telegram.d.ts +0 -148
- package/dist/chat/adapters/telegram.d.ts.map +0 -1
- package/dist/chat/adapters/telegram.js +0 -498
- package/dist/chat/adapters/telegram.js.map +0 -1
- package/dist/chat/adapters/telegram.test.js +0 -94
- package/dist/chat/config.d.ts +0 -98
- package/dist/chat/config.d.ts.map +0 -1
- package/dist/chat/config.js +0 -185
- package/dist/chat/config.js.map +0 -1
- package/dist/chat/daemon/active-project.d.ts +0 -17
- package/dist/chat/daemon/active-project.d.ts.map +0 -1
- package/dist/chat/daemon/active-project.js +0 -23
- package/dist/chat/daemon/active-project.js.map +0 -1
- package/dist/chat/daemon/autospawn.d.ts +0 -40
- package/dist/chat/daemon/autospawn.d.ts.map +0 -1
- package/dist/chat/daemon/autospawn.js +0 -129
- package/dist/chat/daemon/autospawn.js.map +0 -1
- package/dist/chat/daemon/autospawn.test.js +0 -112
- package/dist/chat/daemon/cli.d.ts +0 -18
- package/dist/chat/daemon/cli.d.ts.map +0 -1
- package/dist/chat/daemon/cli.js +0 -71
- package/dist/chat/daemon/cli.js.map +0 -1
- package/dist/chat/daemon/collisions.js +0 -384
- package/dist/chat/daemon/health-check.d.ts +0 -69
- package/dist/chat/daemon/health-check.d.ts.map +0 -1
- package/dist/chat/daemon/health-check.js +0 -112
- package/dist/chat/daemon/health-check.js.map +0 -1
- package/dist/chat/daemon/inbox-read.d.ts +0 -35
- package/dist/chat/daemon/inbox-read.d.ts.map +0 -1
- package/dist/chat/daemon/inbox-read.js +0 -75
- package/dist/chat/daemon/inbox-read.js.map +0 -1
- package/dist/chat/daemon/inbox-read.test.js +0 -97
- package/dist/chat/daemon/inbox.d.ts +0 -63
- package/dist/chat/daemon/inbox.d.ts.map +0 -1
- package/dist/chat/daemon/inbox.js +0 -56
- package/dist/chat/daemon/inbox.js.map +0 -1
- package/dist/chat/daemon/inbox.test.js +0 -110
- package/dist/chat/daemon/lifecycle.d.ts +0 -71
- package/dist/chat/daemon/lifecycle.d.ts.map +0 -1
- package/dist/chat/daemon/lifecycle.js +0 -221
- package/dist/chat/daemon/lifecycle.js.map +0 -1
- package/dist/chat/daemon/lifecycle.test.js +0 -163
- package/dist/chat/daemon/protocol.d.ts +0 -107
- package/dist/chat/daemon/protocol.d.ts.map +0 -1
- package/dist/chat/daemon/protocol.js +0 -54
- package/dist/chat/daemon/protocol.js.map +0 -1
- package/dist/chat/daemon/routing.d.ts +0 -140
- package/dist/chat/daemon/routing.d.ts.map +0 -1
- package/dist/chat/daemon/routing.js +0 -198
- package/dist/chat/daemon/routing.js.map +0 -1
- package/dist/chat/daemon/routing.test.js +0 -259
- package/dist/chat/daemon/rpc-client.d.ts +0 -45
- package/dist/chat/daemon/rpc-client.d.ts.map +0 -1
- package/dist/chat/daemon/rpc-client.js +0 -133
- package/dist/chat/daemon/rpc-client.js.map +0 -1
- package/dist/chat/daemon/rpc-server.d.ts +0 -39
- package/dist/chat/daemon/rpc-server.d.ts.map +0 -1
- package/dist/chat/daemon/rpc-server.js +0 -385
- package/dist/chat/daemon/rpc-server.js.map +0 -1
- package/dist/chat/daemon/rpc.test.js +0 -177
- package/dist/chat/daemon/subscribers.js +0 -257
- package/dist/chat/daemon/worker.d.ts +0 -27
- package/dist/chat/daemon/worker.d.ts.map +0 -1
- package/dist/chat/daemon/worker.js +0 -313
- package/dist/chat/daemon/worker.js.map +0 -1
- package/dist/chat/daemon/workspace-topic.js +0 -324
- package/dist/chat/env-token.d.ts +0 -60
- package/dist/chat/env-token.d.ts.map +0 -1
- package/dist/chat/env-token.js +0 -137
- package/dist/chat/env-token.js.map +0 -1
- package/dist/chat/env-token.test.js +0 -160
- package/dist/chat/factory.d.ts +0 -30
- package/dist/chat/factory.d.ts.map +0 -1
- package/dist/chat/factory.js +0 -50
- package/dist/chat/factory.js.map +0 -1
- package/dist/chat/factory.test.js +0 -55
- package/dist/chat/gateway.d.ts +0 -176
- package/dist/chat/gateway.d.ts.map +0 -1
- package/dist/chat/gateway.js +0 -146
- package/dist/chat/gateway.js.map +0 -1
- package/dist/chat/gateway.test.js +0 -192
- package/dist/claude-md.d.ts +0 -39
- package/dist/claude-md.d.ts.map +0 -1
- package/dist/claude-md.js +0 -113
- package/dist/claude-md.js.map +0 -1
- package/dist/claude-md.test.js +0 -91
- package/dist/codex/activate.d.ts +0 -66
- package/dist/codex/activate.d.ts.map +0 -1
- package/dist/codex/activate.js +0 -329
- package/dist/codex/activate.js.map +0 -1
- package/dist/codex/activate.test.js +0 -229
- package/dist/codex/bundled-default/bundled-default.test.js +0 -161
- package/dist/codex/cli-publish.test.js +0 -133
- package/dist/codex/cli.d.ts +0 -35
- package/dist/codex/cli.d.ts.map +0 -1
- package/dist/codex/cli.js +0 -554
- package/dist/codex/cli.js.map +0 -1
- package/dist/codex/cli.test.js +0 -277
- package/dist/codex/import-skill-md.d.ts +0 -53
- package/dist/codex/import-skill-md.d.ts.map +0 -1
- package/dist/codex/import-skill-md.js +0 -236
- package/dist/codex/import-skill-md.js.map +0 -1
- package/dist/codex/import-skill-md.test.js +0 -225
- package/dist/codex/loader.d.ts +0 -27
- package/dist/codex/loader.d.ts.map +0 -1
- package/dist/codex/loader.js +0 -86
- package/dist/codex/loader.js.map +0 -1
- package/dist/codex/loader.test.js +0 -75
- package/dist/codex/parse.d.ts +0 -28
- package/dist/codex/parse.d.ts.map +0 -1
- package/dist/codex/parse.js +0 -309
- package/dist/codex/parse.js.map +0 -1
- package/dist/codex/parse.test.js +0 -241
- package/dist/codex/store.d.ts +0 -87
- package/dist/codex/store.d.ts.map +0 -1
- package/dist/codex/store.js +0 -205
- package/dist/codex/store.js.map +0 -1
- package/dist/codex/store.test.js +0 -242
- package/dist/codex/types.d.ts +0 -398
- package/dist/codex/types.d.ts.map +0 -1
- package/dist/codex/types.js +0 -21
- package/dist/codex/types.js.map +0 -1
- package/dist/config.d.ts +0 -53
- package/dist/config.d.ts.map +0 -1
- package/dist/config.js +0 -202
- package/dist/config.js.map +0 -1
- package/dist/config.test.js +0 -117
- package/dist/engine/cli.d.ts +0 -14
- package/dist/engine/cli.d.ts.map +0 -1
- package/dist/engine/cli.js +0 -171
- package/dist/engine/cli.js.map +0 -1
- package/dist/engine/client.d.ts +0 -219
- package/dist/engine/client.d.ts.map +0 -1
- package/dist/engine/client.js +0 -312
- package/dist/engine/client.js.map +0 -1
- package/dist/engine/config.d.ts +0 -62
- package/dist/engine/config.d.ts.map +0 -1
- package/dist/engine/config.js +0 -223
- package/dist/engine/config.js.map +0 -1
- package/dist/engine/index.d.ts +0 -17
- package/dist/engine/index.d.ts.map +0 -1
- package/dist/engine/index.js +0 -16
- package/dist/engine/index.js.map +0 -1
- package/dist/engine/resolver.d.ts +0 -62
- package/dist/engine/resolver.d.ts.map +0 -1
- package/dist/engine/resolver.js +0 -103
- package/dist/engine/resolver.js.map +0 -1
- package/dist/engine/singleton.d.ts +0 -95
- package/dist/engine/singleton.d.ts.map +0 -1
- package/dist/engine/singleton.js +0 -325
- package/dist/engine/singleton.js.map +0 -1
- package/dist/engine/types.d.ts +0 -402
- package/dist/engine/types.d.ts.map +0 -1
- package/dist/engine/types.js +0 -22
- package/dist/engine/types.js.map +0 -1
- package/dist/engine-binary-resolver.js +0 -110
- package/dist/engine-binary-resolver.test.js +0 -61
- package/dist/engine-cli.js +0 -60
- package/dist/engine-client.js +0 -301
- package/dist/engine-client.test.js +0 -118
- package/dist/functions/chain_state.d.ts +0 -51
- package/dist/functions/chain_state.d.ts.map +0 -1
- package/dist/functions/chain_state.js +0 -59
- package/dist/functions/chain_state.js.map +0 -1
- package/dist/hooks/drift-catalog.d.ts +0 -68
- package/dist/hooks/drift-catalog.d.ts.map +0 -1
- package/dist/hooks/drift-catalog.js +0 -184
- package/dist/hooks/drift-catalog.js.map +0 -1
- package/dist/hooks/drift-catalog.test.js +0 -154
- package/dist/hooks/drift-patterns.d.ts +0 -110
- package/dist/hooks/drift-patterns.d.ts.map +0 -1
- package/dist/hooks/drift-patterns.js +0 -289
- package/dist/hooks/drift-patterns.js.map +0 -1
- package/dist/hooks/drift-patterns.test.js +0 -325
- package/dist/hooks/engine-vocab-gate.d.ts +0 -108
- package/dist/hooks/engine-vocab-gate.d.ts.map +0 -1
- package/dist/hooks/engine-vocab-gate.js +0 -225
- package/dist/hooks/engine-vocab-gate.js.map +0 -1
- package/dist/hooks/engine-vocab-gate.test.js +0 -170
- package/dist/hooks/heartbeat.d.ts +0 -107
- package/dist/hooks/heartbeat.d.ts.map +0 -1
- package/dist/hooks/heartbeat.js +0 -316
- package/dist/hooks/heartbeat.js.map +0 -1
- package/dist/hooks/heartbeat.test.js +0 -393
- package/dist/hooks/honesty-ledger-session-scope.test.js +0 -100
- package/dist/hooks/honesty-ledger.d.ts +0 -123
- package/dist/hooks/honesty-ledger.d.ts.map +0 -1
- package/dist/hooks/honesty-ledger.js +0 -226
- package/dist/hooks/honesty-ledger.js.map +0 -1
- package/dist/hooks/honesty-ledger.test.js +0 -466
- package/dist/hooks/inline-report-check.d.ts +0 -63
- package/dist/hooks/inline-report-check.d.ts.map +0 -1
- package/dist/hooks/inline-report-check.js +0 -88
- package/dist/hooks/inline-report-check.js.map +0 -1
- package/dist/hooks/inline-report-check.test.js +0 -96
- package/dist/hooks/pre-tool-use.d.ts +0 -62
- package/dist/hooks/pre-tool-use.d.ts.map +0 -1
- package/dist/hooks/pre-tool-use.js +0 -342
- package/dist/hooks/pre-tool-use.js.map +0 -1
- package/dist/hooks/pre-tool-use.test.js +0 -134
- package/dist/hooks/session-end.d.ts +0 -15
- package/dist/hooks/session-end.d.ts.map +0 -1
- package/dist/hooks/session-end.js +0 -60
- package/dist/hooks/session-end.js.map +0 -1
- package/dist/hooks/session-end.test.js +0 -52
- package/dist/hooks/stop.d.ts +0 -35
- package/dist/hooks/stop.d.ts.map +0 -1
- package/dist/hooks/stop.js +0 -136
- package/dist/hooks/stop.js.map +0 -1
- package/dist/hooks/transcript-active-task.test.js +0 -342
- package/dist/hooks/transcript.d.ts +0 -26
- package/dist/hooks/transcript.d.ts.map +0 -1
- package/dist/hooks/transcript.js +0 -266
- package/dist/hooks/transcript.js.map +0 -1
- package/dist/hooks/transcript.test.js +0 -103
- package/dist/hooks/user-prompt-submit.d.ts +0 -74
- package/dist/hooks/user-prompt-submit.d.ts.map +0 -1
- package/dist/hooks/user-prompt-submit.js +0 -256
- package/dist/hooks/user-prompt-submit.js.map +0 -1
- package/dist/hooks/user-prompt-submit.test.js +0 -118
- package/dist/hooks/versioning-gate.d.ts +0 -101
- package/dist/hooks/versioning-gate.d.ts.map +0 -1
- package/dist/hooks/versioning-gate.js +0 -245
- package/dist/hooks/versioning-gate.js.map +0 -1
- package/dist/hooks/versioning-gate.test.js +0 -368
- package/dist/hooks/workflow-gate.d.ts +0 -64
- package/dist/hooks/workflow-gate.d.ts.map +0 -1
- package/dist/hooks/workflow-gate.js +0 -152
- package/dist/hooks/workflow-gate.js.map +0 -1
- package/dist/hooks/workflow-gate.test.js +0 -197
- package/dist/hooks-cli.d.ts +0 -25
- package/dist/hooks-cli.d.ts.map +0 -1
- package/dist/hooks-cli.js +0 -286
- package/dist/hooks-cli.js.map +0 -1
- package/dist/hooks-cli.test.js +0 -148
- package/dist/origin.d.ts +0 -16
- package/dist/origin.d.ts.map +0 -1
- package/dist/origin.js +0 -92
- package/dist/origin.js.map +0 -1
- package/dist/packs/seed_lessons_ingest.d.ts +0 -30
- package/dist/packs/seed_lessons_ingest.d.ts.map +0 -1
- package/dist/packs/seed_lessons_ingest.js +0 -107
- package/dist/packs/seed_lessons_ingest.js.map +0 -1
- package/dist/project-cli.d.ts +0 -7
- package/dist/project-cli.d.ts.map +0 -1
- package/dist/project-cli.js +0 -145
- package/dist/project-cli.js.map +0 -1
- package/dist/project.d.ts +0 -127
- package/dist/project.d.ts.map +0 -1
- package/dist/project.js +0 -281
- package/dist/project.js.map +0 -1
- package/dist/project.test.js +0 -287
- package/dist/rag/backends/loop_engine.d.ts +0 -61
- package/dist/rag/backends/loop_engine.d.ts.map +0 -1
- package/dist/rag/backends/loop_engine.js +0 -160
- package/dist/rag/backends/loop_engine.js.map +0 -1
- package/dist/recall.d.ts +0 -82
- package/dist/recall.d.ts.map +0 -1
- package/dist/recall.js +0 -81
- package/dist/recall.js.map +0 -1
- package/dist/runtime/agent_bridge/autospawn.d.ts +0 -131
- package/dist/runtime/agent_bridge/autospawn.d.ts.map +0 -1
- package/dist/runtime/agent_bridge/autospawn.js +0 -251
- package/dist/runtime/agent_bridge/autospawn.js.map +0 -1
- package/dist/runtime/chain_state.d.ts +0 -124
- package/dist/runtime/chain_state.d.ts.map +0 -1
- package/dist/runtime/chain_state.js +0 -189
- package/dist/runtime/chain_state.js.map +0 -1
- package/dist/runtime/hooks/permission_decision.d.ts +0 -34
- package/dist/runtime/hooks/permission_decision.d.ts.map +0 -1
- package/dist/runtime/hooks/permission_decision.js +0 -39
- package/dist/runtime/hooks/permission_decision.js.map +0 -1
- package/dist/runtime/workflow_fsm.d.ts +0 -21
- package/dist/runtime/workflow_fsm.d.ts.map +0 -1
- package/dist/runtime/workflow_fsm.js +0 -25
- package/dist/runtime/workflow_fsm.js.map +0 -1
- package/dist/runtime/workflow_map.d.ts +0 -26
- package/dist/runtime/workflow_map.d.ts.map +0 -1
- package/dist/runtime/workflow_map.js +0 -38
- package/dist/runtime/workflow_map.js.map +0 -1
- package/dist/scope.d.ts +0 -48
- package/dist/scope.d.ts.map +0 -1
- package/dist/scope.js +0 -111
- package/dist/scope.js.map +0 -1
- package/dist/setup/cli/topic_create_step.d.ts +0 -84
- package/dist/setup/cli/topic_create_step.d.ts.map +0 -1
- package/dist/setup/cli/topic_create_step.js +0 -213
- package/dist/setup/cli/topic_create_step.js.map +0 -1
- package/dist/system-export.d.ts +0 -65
- package/dist/system-export.d.ts.map +0 -1
- package/dist/system-export.js +0 -194
- package/dist/system-export.js.map +0 -1
- package/dist/utterance/classifier.d.ts +0 -53
- package/dist/utterance/classifier.d.ts.map +0 -1
- package/dist/utterance/classifier.js +0 -184
- package/dist/utterance/classifier.js.map +0 -1
- package/dist/utterance/classifier.test.js +0 -147
|
@@ -1,466 +0,0 @@
|
|
|
1
|
-
import * as crypto from "node:crypto";
|
|
2
|
-
import { promises as fs } from "node:fs";
|
|
3
|
-
import * as os from "node:os";
|
|
4
|
-
import * as path from "node:path";
|
|
5
|
-
import { afterEach, beforeEach, describe, expect, it } from "vitest";
|
|
6
|
-
import { CLAIM_PATTERNS, clearTurnLedger, reconcile, readBrokenPromises, readTurnLedger, recordBrokenPromise, recordToolCall, } from "./honesty-ledger.js";
|
|
7
|
-
let tmpRoot;
|
|
8
|
-
const SESSION = "test-session";
|
|
9
|
-
beforeEach(async () => {
|
|
10
|
-
tmpRoot = path.join(os.tmpdir(), `oscli-honesty-${crypto.randomUUID()}`);
|
|
11
|
-
await fs.mkdir(tmpRoot, { recursive: true });
|
|
12
|
-
});
|
|
13
|
-
afterEach(async () => {
|
|
14
|
-
await fs.rm(tmpRoot, { recursive: true, force: true });
|
|
15
|
-
});
|
|
16
|
-
// ---------------------------------------------------------------------
|
|
17
|
-
// Ledger I/O
|
|
18
|
-
// ---------------------------------------------------------------------
|
|
19
|
-
describe("turn ledger", () => {
|
|
20
|
-
it("recordToolCall appends and readTurnLedger returns entries", async () => {
|
|
21
|
-
await recordToolCall(SESSION, "Bash", "git status", { dataRoot: tmpRoot });
|
|
22
|
-
await recordToolCall(SESSION, "Read", "src/foo.ts", { dataRoot: tmpRoot });
|
|
23
|
-
const entries = await readTurnLedger(SESSION, { dataRoot: tmpRoot });
|
|
24
|
-
expect(entries.map((e) => e.tool)).toEqual(["Bash", "Read"]);
|
|
25
|
-
});
|
|
26
|
-
it("readTurnLedger returns [] when no ledger exists", async () => {
|
|
27
|
-
expect(await readTurnLedger("unknown-session", { dataRoot: tmpRoot })).toEqual([]);
|
|
28
|
-
});
|
|
29
|
-
it("clearTurnLedger removes the file", async () => {
|
|
30
|
-
await recordToolCall(SESSION, "Bash", "ls", { dataRoot: tmpRoot });
|
|
31
|
-
await clearTurnLedger(SESSION, { dataRoot: tmpRoot });
|
|
32
|
-
expect(await readTurnLedger(SESSION, { dataRoot: tmpRoot })).toEqual([]);
|
|
33
|
-
});
|
|
34
|
-
});
|
|
35
|
-
// ---------------------------------------------------------------------
|
|
36
|
-
// Reconcile — the heart of the lie-catcher
|
|
37
|
-
// ---------------------------------------------------------------------
|
|
38
|
-
function ledger(...entries) {
|
|
39
|
-
return entries.map(([tool, input_summary]) => ({
|
|
40
|
-
ts: "2026-05-15T00:00:00Z",
|
|
41
|
-
tool,
|
|
42
|
-
input_summary,
|
|
43
|
-
}));
|
|
44
|
-
}
|
|
45
|
-
describe("reconcile — research-start", () => {
|
|
46
|
-
it("flags 'pre-research starting' with no Agent tool call", async () => {
|
|
47
|
-
const broken = reconcile("Pre-research starting for #111.", []);
|
|
48
|
-
expect(broken.map((b) => b.claim_id)).toContain("research-start");
|
|
49
|
-
});
|
|
50
|
-
it("doesn't flag when Agent was actually called", async () => {
|
|
51
|
-
const broken = reconcile("Pre-research starting now.", ledger(["Agent", "research prompt"]));
|
|
52
|
-
expect(broken.map((b) => b.claim_id)).not.toContain("research-start");
|
|
53
|
-
});
|
|
54
|
-
it("flags 'research begins' phrasing", async () => {
|
|
55
|
-
const broken = reconcile("Research begins.", []);
|
|
56
|
-
expect(broken.map((b) => b.claim_id)).toContain("research-start");
|
|
57
|
-
});
|
|
58
|
-
});
|
|
59
|
-
describe("reconcile — running-tests", () => {
|
|
60
|
-
it("flags 'tests pass' without npm test or cargo test in the ledger", async () => {
|
|
61
|
-
const broken = reconcile("All tests pass.", ledger(["Read", "x.ts"]));
|
|
62
|
-
expect(broken.map((b) => b.claim_id)).toContain("running-tests");
|
|
63
|
-
});
|
|
64
|
-
it("clears when npm test ran", async () => {
|
|
65
|
-
const broken = reconcile("Tests pass.", ledger(["Bash", "npm test"]));
|
|
66
|
-
expect(broken.map((b) => b.claim_id)).not.toContain("running-tests");
|
|
67
|
-
});
|
|
68
|
-
it("clears when cargo test ran", async () => {
|
|
69
|
-
const broken = reconcile("Tests passed.", ledger(["Bash", "cargo test --lib"]));
|
|
70
|
-
expect(broken.map((b) => b.claim_id)).not.toContain("running-tests");
|
|
71
|
-
});
|
|
72
|
-
});
|
|
73
|
-
describe("reconcile — committed", () => {
|
|
74
|
-
it("flags 'committed' without git commit in the ledger", async () => {
|
|
75
|
-
const broken = reconcile("Just committed the change.", ledger(["Read", "x"]));
|
|
76
|
-
expect(broken.map((b) => b.claim_id)).toContain("committed");
|
|
77
|
-
});
|
|
78
|
-
it("clears when git commit ran", async () => {
|
|
79
|
-
const broken = reconcile("Committed and ready.", ledger(["Bash", "git commit -m foo"]));
|
|
80
|
-
expect(broken.map((b) => b.claim_id)).not.toContain("committed");
|
|
81
|
-
});
|
|
82
|
-
});
|
|
83
|
-
describe("reconcile — audit-done", () => {
|
|
84
|
-
it("flags 'audit done' with empty ledger", async () => {
|
|
85
|
-
const broken = reconcile("Phase 5 audit done.", []);
|
|
86
|
-
expect(broken.map((b) => b.claim_id)).toContain("audit-done");
|
|
87
|
-
});
|
|
88
|
-
it("clears when any tool was called (audit is loose)", async () => {
|
|
89
|
-
const broken = reconcile("Audit done.", ledger(["Read", "src/serve.rs"]));
|
|
90
|
-
expect(broken.map((b) => b.claim_id)).not.toContain("audit-done");
|
|
91
|
-
});
|
|
92
|
-
});
|
|
93
|
-
describe("reconcile — running-build", () => {
|
|
94
|
-
it("flags 'build clean' without npm build or cargo build", async () => {
|
|
95
|
-
const broken = reconcile("Build clean.", []);
|
|
96
|
-
expect(broken.map((b) => b.claim_id)).toContain("running-build");
|
|
97
|
-
});
|
|
98
|
-
it("clears when npm run build ran", async () => {
|
|
99
|
-
const broken = reconcile("Build green.", ledger(["Bash", "npm run build"]));
|
|
100
|
-
expect(broken.map((b) => b.claim_id)).not.toContain("running-build");
|
|
101
|
-
});
|
|
102
|
-
});
|
|
103
|
-
describe("reconcile — no claims = no broken promises", () => {
|
|
104
|
-
it("returns [] when text has no claim phrases", async () => {
|
|
105
|
-
expect(reconcile("Here's a regular sentence with no claims.", [])).toEqual([]);
|
|
106
|
-
});
|
|
107
|
-
it("doesn't false-fire on 'research' as a noun", async () => {
|
|
108
|
-
// "the research is going" doesn't trigger "research starting"
|
|
109
|
-
const broken = reconcile("The research is going well.", []);
|
|
110
|
-
expect(broken.map((b) => b.claim_id)).not.toContain("research-start");
|
|
111
|
-
});
|
|
112
|
-
});
|
|
113
|
-
describe("reconcile — case-insensitive", () => {
|
|
114
|
-
it("matches 'COMMITTED' as well as 'committed'", async () => {
|
|
115
|
-
const broken = reconcile("COMMITTED.", []);
|
|
116
|
-
expect(broken.map((b) => b.claim_id)).toContain("committed");
|
|
117
|
-
});
|
|
118
|
-
});
|
|
119
|
-
// ---------------------------------------------------------------------
|
|
120
|
-
// Broken-promise persistence
|
|
121
|
-
// ---------------------------------------------------------------------
|
|
122
|
-
describe("broken-promise ledger", () => {
|
|
123
|
-
it("recordBrokenPromise appends and readBrokenPromises returns entries", async () => {
|
|
124
|
-
await recordBrokenPromise(SESSION, {
|
|
125
|
-
ts: "2026-05-15T00:00:00Z",
|
|
126
|
-
claim_id: "research-start",
|
|
127
|
-
claim_label: "spawn a research agent",
|
|
128
|
-
matched_text: "pre-research starting",
|
|
129
|
-
reason: "no Agent tool call",
|
|
130
|
-
}, { dataRoot: tmpRoot });
|
|
131
|
-
const promises = await readBrokenPromises(SESSION, { dataRoot: tmpRoot });
|
|
132
|
-
expect(promises).toHaveLength(1);
|
|
133
|
-
expect(promises[0].claim_id).toBe("research-start");
|
|
134
|
-
});
|
|
135
|
-
it("appends are additive across multiple calls", async () => {
|
|
136
|
-
await recordBrokenPromise(SESSION, mockPromise("a"), { dataRoot: tmpRoot });
|
|
137
|
-
await recordBrokenPromise(SESSION, mockPromise("b"), { dataRoot: tmpRoot });
|
|
138
|
-
const promises = await readBrokenPromises(SESSION, { dataRoot: tmpRoot });
|
|
139
|
-
expect(promises.map((p) => p.claim_id)).toEqual(["a", "b"]);
|
|
140
|
-
});
|
|
141
|
-
});
|
|
142
|
-
function mockPromise(claim_id) {
|
|
143
|
-
return {
|
|
144
|
-
ts: "2026-05-15T00:00:00Z",
|
|
145
|
-
claim_id,
|
|
146
|
-
claim_label: "x",
|
|
147
|
-
matched_text: "x",
|
|
148
|
-
reason: "x",
|
|
149
|
-
};
|
|
150
|
-
}
|
|
151
|
-
// ---------------------------------------------------------------------
|
|
152
|
-
// Catalog sanity
|
|
153
|
-
// ---------------------------------------------------------------------
|
|
154
|
-
describe("CLAIM_PATTERNS catalog", () => {
|
|
155
|
-
it("has at least the 6 MVP patterns", () => {
|
|
156
|
-
const ids = CLAIM_PATTERNS.map((p) => p.id);
|
|
157
|
-
expect(ids).toContain("research-start");
|
|
158
|
-
expect(ids).toContain("running-tests");
|
|
159
|
-
expect(ids).toContain("committed");
|
|
160
|
-
expect(ids).toContain("audit-done");
|
|
161
|
-
expect(ids).toContain("running-build");
|
|
162
|
-
expect(ids).toContain("starting-now");
|
|
163
|
-
});
|
|
164
|
-
it("v0.6.4: includes 5 new claim patterns from today's drift catalog", () => {
|
|
165
|
-
const ids = CLAIM_PATTERNS.map((p) => p.id);
|
|
166
|
-
expect(ids).toContain("telegram-sent");
|
|
167
|
-
expect(ids).toContain("pushed");
|
|
168
|
-
expect(ids).toContain("tagged");
|
|
169
|
-
expect(ids).toContain("phase-logged");
|
|
170
|
-
expect(ids).toContain("fmt-clippy");
|
|
171
|
-
});
|
|
172
|
-
it("every pattern has a valid regex", () => {
|
|
173
|
-
for (const p of CLAIM_PATTERNS) {
|
|
174
|
-
expect(() => new RegExp(p.text_regex, "i")).not.toThrow();
|
|
175
|
-
}
|
|
176
|
-
});
|
|
177
|
-
});
|
|
178
|
-
// =====================================================================
|
|
179
|
-
// v0.6.4 — new claim patterns from today's drift catalog. Each test
|
|
180
|
-
// asserts BOTH the unfulfilled case (broken promise) AND the fulfilled
|
|
181
|
-
// case (evidence satisfied → no broken promise). Pattern-detection
|
|
182
|
-
// false-positives are low-cost (one nag) but false-negatives let the
|
|
183
|
-
// drift slip — these tests pin both directions.
|
|
184
|
-
// =====================================================================
|
|
185
|
-
describe("reconcile — telegram-sent (v0.6.4)", () => {
|
|
186
|
-
it("flags 'Telegram report sent' without any chat tool call", () => {
|
|
187
|
-
const broken = reconcile("🦑 Telegram report sent.", ledger(["Bash", "ls"]));
|
|
188
|
-
expect(broken.map((b) => b.claim_id)).toContain("telegram-sent");
|
|
189
|
-
});
|
|
190
|
-
it("satisfies when mcp__plugin_telegram_telegram__reply was called", () => {
|
|
191
|
-
const broken = reconcile("🦑 Telegram report sent.", ledger(["mcp__plugin_telegram_telegram__reply", "{}"]));
|
|
192
|
-
expect(broken.map((b) => b.claim_id)).not.toContain("telegram-sent");
|
|
193
|
-
});
|
|
194
|
-
it("satisfies when mcp__opensquid__chat_send was called (any_of evidence)", () => {
|
|
195
|
-
const broken = reconcile("Pinged you via telegram.", ledger(["mcp__opensquid__chat_send", "{}"]));
|
|
196
|
-
expect(broken.map((b) => b.claim_id)).not.toContain("telegram-sent");
|
|
197
|
-
});
|
|
198
|
-
it("matches 'sent to telegram' / 'pinged you' phrasings", () => {
|
|
199
|
-
const broken = reconcile("Sent to Telegram successfully.", ledger());
|
|
200
|
-
expect(broken.map((b) => b.claim_id)).toContain("telegram-sent");
|
|
201
|
-
const broken2 = reconcile("Pinged you on Telegram.", ledger());
|
|
202
|
-
expect(broken2.map((b) => b.claim_id)).toContain("telegram-sent");
|
|
203
|
-
});
|
|
204
|
-
});
|
|
205
|
-
describe("reconcile — pushed (v0.6.4 — expanded alternation)", () => {
|
|
206
|
-
it("flags 'pushed to origin' without git push", () => {
|
|
207
|
-
const broken = reconcile("Pushed to origin main.", ledger(["Bash", "git status"]));
|
|
208
|
-
expect(broken.map((b) => b.claim_id)).toContain("pushed");
|
|
209
|
-
});
|
|
210
|
-
it("satisfies with git push call", () => {
|
|
211
|
-
const broken = reconcile("Pushed to main.", ledger(["Bash", "git push origin main"]));
|
|
212
|
-
expect(broken.map((b) => b.claim_id)).not.toContain("pushed");
|
|
213
|
-
});
|
|
214
|
-
it("matches 'pushing the engine' phrasing", () => {
|
|
215
|
-
const broken = reconcile("Pushing the engine commit now.", ledger());
|
|
216
|
-
expect(broken.map((b) => b.claim_id)).toContain("pushed");
|
|
217
|
-
});
|
|
218
|
-
// v0.6.4 audit-LOW expansion: common phrasings the previous regex missed.
|
|
219
|
-
it("matches 'pushed it'", () => {
|
|
220
|
-
const broken = reconcile("Pushed it to remote.", ledger());
|
|
221
|
-
expect(broken.map((b) => b.claim_id)).toContain("pushed");
|
|
222
|
-
});
|
|
223
|
-
it("matches 'pushed the branch'", () => {
|
|
224
|
-
const broken = reconcile("Pushed the branch.", ledger());
|
|
225
|
-
expect(broken.map((b) => b.claim_id)).toContain("pushed");
|
|
226
|
-
});
|
|
227
|
-
it("matches 'pushed the PR'", () => {
|
|
228
|
-
const broken = reconcile("Pushed the PR.", ledger());
|
|
229
|
-
expect(broken.map((b) => b.claim_id)).toContain("pushed");
|
|
230
|
-
});
|
|
231
|
-
it("matches 'pushed the changes'", () => {
|
|
232
|
-
const broken = reconcile("Pushed the changes upstream.", ledger());
|
|
233
|
-
expect(broken.map((b) => b.claim_id)).toContain("pushed");
|
|
234
|
-
});
|
|
235
|
-
});
|
|
236
|
-
describe("reconcile — tagged (v0.6.4 — requires version-shaped token)", () => {
|
|
237
|
-
it("flags 'just tagged v0.5.0' without git tag", () => {
|
|
238
|
-
const broken = reconcile("Just tagged v0.5.0.", ledger(["Bash", "git log"]));
|
|
239
|
-
expect(broken.map((b) => b.claim_id)).toContain("tagged");
|
|
240
|
-
});
|
|
241
|
-
it("satisfies with git tag call", () => {
|
|
242
|
-
const broken = reconcile("Tagged v0.5.0.", ledger(["Bash", "git tag -a v0.5.0 -m 'release'"]));
|
|
243
|
-
expect(broken.map((b) => b.claim_id)).not.toContain("tagged");
|
|
244
|
-
});
|
|
245
|
-
// v0.6.4 audit-MED tightening: false-positive reduction. Bare
|
|
246
|
-
// "tagged" without a version-shaped token (`tagged for review`,
|
|
247
|
-
// `tagged as P0`, `tagged the file`) no longer triggers.
|
|
248
|
-
it("does NOT flag 'tagged for review' (prose, no version)", () => {
|
|
249
|
-
const broken = reconcile("This PR is tagged for review.", ledger());
|
|
250
|
-
expect(broken.map((b) => b.claim_id)).not.toContain("tagged");
|
|
251
|
-
});
|
|
252
|
-
it("does NOT flag 'tagged this as P0' (label prose)", () => {
|
|
253
|
-
const broken = reconcile("I tagged this issue as P0.", ledger());
|
|
254
|
-
expect(broken.map((b) => b.claim_id)).not.toContain("tagged");
|
|
255
|
-
});
|
|
256
|
-
it("matches 'tagged 0.5.0' (no v-prefix)", () => {
|
|
257
|
-
const broken = reconcile("Tagged 0.5.0 just now.", ledger());
|
|
258
|
-
expect(broken.map((b) => b.claim_id)).toContain("tagged");
|
|
259
|
-
});
|
|
260
|
-
});
|
|
261
|
-
describe("reconcile — phase-logged (v0.6.4 — tightened to require 'phase' keyword)", () => {
|
|
262
|
-
it("flags 'logged audit phase' without mcp__opensquid__log_phase call", () => {
|
|
263
|
-
const broken = reconcile("Logged audit phase + post_research phase.", ledger(["Bash", "ls"]));
|
|
264
|
-
expect(broken.map((b) => b.claim_id)).toContain("phase-logged");
|
|
265
|
-
});
|
|
266
|
-
it("satisfies with mcp__opensquid__log_phase call", () => {
|
|
267
|
-
const broken = reconcile("Logging audit phase now.", ledger(["mcp__opensquid__log_phase", "{}"]));
|
|
268
|
-
expect(broken.map((b) => b.claim_id)).not.toContain("phase-logged");
|
|
269
|
-
});
|
|
270
|
-
it("matches 'phases logged' (passive voice)", () => {
|
|
271
|
-
const broken = reconcile("Both phases logged for the task.", ledger());
|
|
272
|
-
expect(broken.map((b) => b.claim_id)).toContain("phase-logged");
|
|
273
|
-
});
|
|
274
|
-
// #169 (S2): the bare `\\blog_phase\\b` alternation was removed —
|
|
275
|
-
// it fired on any prose mention of the tool name. "Called log_phase
|
|
276
|
-
// for audit" without saying "phase" now no longer fires. The phase-
|
|
277
|
-
// word-aware alternations ("logged the audit phase", "phases
|
|
278
|
-
// logged") still fire and are the legitimate promises.
|
|
279
|
-
it("does NOT flag 'Called log_phase for audit' bare identifier (#169)", () => {
|
|
280
|
-
const broken = reconcile("Called log_phase for audit.", ledger());
|
|
281
|
-
expect(broken.map((b) => b.claim_id)).not.toContain("phase-logged");
|
|
282
|
-
});
|
|
283
|
-
// v0.6.4 audit-MED tightening: false-positive reduction. Prose that
|
|
284
|
-
// uses "logged" + a phase-name word but NOT the word "phase" no
|
|
285
|
-
// longer triggers (e.g. "logged audit results" in debug discussion).
|
|
286
|
-
it("does NOT flag 'logged audit results' (debug prose, not phase ceremony)", () => {
|
|
287
|
-
const broken = reconcile("Logged audit results to the journal.", ledger());
|
|
288
|
-
expect(broken.map((b) => b.claim_id)).not.toContain("phase-logged");
|
|
289
|
-
});
|
|
290
|
-
});
|
|
291
|
-
describe("reconcile — fmt-clippy (v0.6.4)", () => {
|
|
292
|
-
it("flags 'fmt + clippy clean' without running them", () => {
|
|
293
|
-
const broken = reconcile("fmt + clippy clean, ready to commit.", ledger(["Bash", "git status"]));
|
|
294
|
-
expect(broken.map((b) => b.claim_id)).toContain("fmt-clippy");
|
|
295
|
-
});
|
|
296
|
-
it("satisfies with cargo fmt call", () => {
|
|
297
|
-
const broken = reconcile("fmt clean.", ledger(["Bash", "cargo fmt --check"]));
|
|
298
|
-
expect(broken.map((b) => b.claim_id)).not.toContain("fmt-clippy");
|
|
299
|
-
});
|
|
300
|
-
it("satisfies with prettier call", () => {
|
|
301
|
-
const broken = reconcile("prettier passes.", ledger(["Bash", "npx prettier --check src/"]));
|
|
302
|
-
expect(broken.map((b) => b.claim_id)).not.toContain("fmt-clippy");
|
|
303
|
-
});
|
|
304
|
-
});
|
|
305
|
-
// =====================================================================
|
|
306
|
-
// New evidence kinds (any_of + input_contains)
|
|
307
|
-
// =====================================================================
|
|
308
|
-
describe("hasEvidence — any_of (v0.6.4)", () => {
|
|
309
|
-
it("satisfies when ANY option's evidence matches", () => {
|
|
310
|
-
// claim with any_of [tool_called: A, tool_called: B]
|
|
311
|
-
// ledger has B → should be satisfied
|
|
312
|
-
const broken = reconcile("Telegram report sent.", ledger(["mcp__opensquid__chat_send", "{}"]));
|
|
313
|
-
expect(broken.map((b) => b.claim_id)).not.toContain("telegram-sent");
|
|
314
|
-
});
|
|
315
|
-
it("breaks when NONE of the options match", () => {
|
|
316
|
-
const broken = reconcile("Telegram report sent.", ledger(["Bash", "ls"], ["Read", "/tmp/foo"]));
|
|
317
|
-
expect(broken.map((b) => b.claim_id)).toContain("telegram-sent");
|
|
318
|
-
});
|
|
319
|
-
});
|
|
320
|
-
// ---------------------------------------------------------------------
|
|
321
|
-
// 0.7.6 (#150) drift-fix patterns
|
|
322
|
-
// ---------------------------------------------------------------------
|
|
323
|
-
describe("reconcile — version-slot-assignment (#150)", () => {
|
|
324
|
-
it("flags 'v0.8' mention without AskUserQuestion / Task call", () => {
|
|
325
|
-
const broken = reconcile("This ships as v0.8 next.", ledger(["Bash", "git status"]));
|
|
326
|
-
expect(broken.map((b) => b.claim_id)).toContain("version-slot-assignment");
|
|
327
|
-
});
|
|
328
|
-
it("flags 'next minor' phrasing", () => {
|
|
329
|
-
const broken = reconcile("Bumping to next minor.", ledger(["Read", "x.ts"]));
|
|
330
|
-
expect(broken.map((b) => b.claim_id)).toContain("version-slot-assignment");
|
|
331
|
-
});
|
|
332
|
-
it("flags 'v1.0' assignment", () => {
|
|
333
|
-
const broken = reconcile("This will ship as v1.0.", []);
|
|
334
|
-
expect(broken.map((b) => b.claim_id)).toContain("version-slot-assignment");
|
|
335
|
-
});
|
|
336
|
-
it("clears when AskUserQuestion was called", () => {
|
|
337
|
-
const broken = reconcile("Going to ship as v0.8.", ledger(["AskUserQuestion", '{"slot": "v0.8"}']));
|
|
338
|
-
expect(broken.map((b) => b.claim_id)).not.toContain("version-slot-assignment");
|
|
339
|
-
});
|
|
340
|
-
it("clears when TaskUpdate happened (e.g. user-authorized task subject rename)", () => {
|
|
341
|
-
const broken = reconcile("Renaming task to v0.9.", ledger(["TaskUpdate", '{"taskId":"144"}']));
|
|
342
|
-
expect(broken.map((b) => b.claim_id)).not.toContain("version-slot-assignment");
|
|
343
|
-
});
|
|
344
|
-
it("does NOT flag patch-slot strings (v0.7.6 etc.)", () => {
|
|
345
|
-
const broken = reconcile("Patch bumps to v0.7.6.", []);
|
|
346
|
-
expect(broken.map((b) => b.claim_id)).not.toContain("version-slot-assignment");
|
|
347
|
-
});
|
|
348
|
-
});
|
|
349
|
-
describe("reconcile — phase-claim-forward (#150)", () => {
|
|
350
|
-
it("flags 'Phase 3/7 — code' without log_phase call", () => {
|
|
351
|
-
const broken = reconcile("Phase 3/7 — code: editing files now.", ledger(["Edit", "x.ts"]));
|
|
352
|
-
expect(broken.map((b) => b.claim_id)).toContain("phase-claim-forward");
|
|
353
|
-
});
|
|
354
|
-
it("flags 'now in phase audit' without log_phase", () => {
|
|
355
|
-
const broken = reconcile("Now in phase audit.", ledger(["Bash", "git diff"]));
|
|
356
|
-
expect(broken.map((b) => b.claim_id)).toContain("phase-claim-forward");
|
|
357
|
-
});
|
|
358
|
-
it("clears when log_phase was called", () => {
|
|
359
|
-
const broken = reconcile("Phase 4/7 — test: running suite.", ledger(["mcp__opensquid__log_phase", '{"phase":"test"}'], ["Bash", "npm test"]));
|
|
360
|
-
expect(broken.map((b) => b.claim_id)).not.toContain("phase-claim-forward");
|
|
361
|
-
});
|
|
362
|
-
});
|
|
363
|
-
describe("reconcile — session-no-task (#150)", () => {
|
|
364
|
-
it("flags 'now I'll wire it up' with no Task tool call", () => {
|
|
365
|
-
const broken = reconcile("Now I'll run the migration.", ledger(["Bash", "ls"]));
|
|
366
|
-
expect(broken.map((b) => b.claim_id)).toContain("session-no-task");
|
|
367
|
-
});
|
|
368
|
-
it("flags 'I'm executing' first-person verbiage with no task surface touched", () => {
|
|
369
|
-
// #169 (S2): bare "Executing" was over-broad and fired on passive
|
|
370
|
-
// descriptions. Now requires first-person framing.
|
|
371
|
-
const broken = reconcile("I'm executing the plan.", ledger(["Edit", "x.ts"]));
|
|
372
|
-
expect(broken.map((b) => b.claim_id)).toContain("session-no-task");
|
|
373
|
-
});
|
|
374
|
-
it("clears when TaskCreate happened in this turn", () => {
|
|
375
|
-
const broken = reconcile("Now I'll implement the fix.", ledger(["TaskCreate", '{"subject":"fix x"}'], ["Edit", "x.ts"]));
|
|
376
|
-
expect(broken.map((b) => b.claim_id)).not.toContain("session-no-task");
|
|
377
|
-
});
|
|
378
|
-
it("clears when TaskUpdate touched anything", () => {
|
|
379
|
-
const broken = reconcile("Let me build the thing.", ledger(["TaskUpdate", '{"taskId":"5","status":"in_progress"}']));
|
|
380
|
-
expect(broken.map((b) => b.claim_id)).not.toContain("session-no-task");
|
|
381
|
-
});
|
|
382
|
-
});
|
|
383
|
-
describe("CLAIM_PATTERNS catalog — 0.7.6 expansion (#150)", () => {
|
|
384
|
-
it("registers the three new patterns", () => {
|
|
385
|
-
const ids = CLAIM_PATTERNS.map((p) => p.id);
|
|
386
|
-
expect(ids).toContain("version-slot-assignment");
|
|
387
|
-
expect(ids).toContain("phase-claim-forward");
|
|
388
|
-
expect(ids).toContain("session-no-task");
|
|
389
|
-
});
|
|
390
|
-
});
|
|
391
|
-
// ---------------------------------------------------------------------
|
|
392
|
-
// #169 (S2) — prose false-positive tightening
|
|
393
|
-
//
|
|
394
|
-
// Three patterns kept firing on prose that describes the system rather
|
|
395
|
-
// than on first-person commitments. Each block below pairs a TRUE-
|
|
396
|
-
// positive test (still fires for the real commitment) with a FALSE-
|
|
397
|
-
// positive eliminator (no longer fires for the prose form).
|
|
398
|
-
// ---------------------------------------------------------------------
|
|
399
|
-
describe("reconcile — phase-logged false-positive on bare `log_phase` (#169)", () => {
|
|
400
|
-
it("does NOT flag prose mention of `log_phase` (e.g. tool documentation)", () => {
|
|
401
|
-
const broken = reconcile("The log_phase tool writes phase entries under ~/.opensquid/.", ledger(["Bash", "ls"]));
|
|
402
|
-
expect(broken.map((b) => b.claim_id)).not.toContain("phase-logged");
|
|
403
|
-
});
|
|
404
|
-
it("does NOT flag MCP tool identifier in code reference (`mcp__opensquid__log_phase`)", () => {
|
|
405
|
-
const broken = reconcile("Updated `mcp__opensquid__log_phase` evidence in CLAIM_PATTERNS.", ledger(["Edit", "honesty-ledger.ts"]));
|
|
406
|
-
expect(broken.map((b) => b.claim_id)).not.toContain("phase-logged");
|
|
407
|
-
});
|
|
408
|
-
it("STILL flags 'logged audit phase' past-tense claim without evidence", () => {
|
|
409
|
-
const broken = reconcile("Just logged the audit phase.", ledger(["Bash", "ls"]));
|
|
410
|
-
expect(broken.map((b) => b.claim_id)).toContain("phase-logged");
|
|
411
|
-
});
|
|
412
|
-
it("STILL flags 'phases logged' past-tense claim without evidence", () => {
|
|
413
|
-
const broken = reconcile("Phases logged: audit, post_research.", []);
|
|
414
|
-
expect(broken.map((b) => b.claim_id)).toContain("phase-logged");
|
|
415
|
-
});
|
|
416
|
-
});
|
|
417
|
-
describe("reconcile — version-slot-assignment false-positive on user-named slot (#169)", () => {
|
|
418
|
-
it("does NOT flag agent referencing a slot the USER mentioned in prose", () => {
|
|
419
|
-
// Agent discussing a roadmap or user statement — no first-person
|
|
420
|
-
// commitment verb near the version string.
|
|
421
|
-
const broken = reconcile("The user mentioned v0.8 in yesterday's session as the target.", ledger(["Read", "TASKS.md"]));
|
|
422
|
-
expect(broken.map((b) => b.claim_id)).not.toContain("version-slot-assignment");
|
|
423
|
-
});
|
|
424
|
-
it("does NOT flag prose describing the audit doc citing v0.9", () => {
|
|
425
|
-
const broken = reconcile("ROADMAP.md mentions v0.9 as a possible target.", ledger(["Read", "ROADMAP.md"]));
|
|
426
|
-
expect(broken.map((b) => b.claim_id)).not.toContain("version-slot-assignment");
|
|
427
|
-
});
|
|
428
|
-
it("STILL flags 'I'll bump to v0.8' first-person commitment", () => {
|
|
429
|
-
const broken = reconcile("I'll bump to v0.8 next.", ledger(["Bash", "git status"]));
|
|
430
|
-
expect(broken.map((b) => b.claim_id)).toContain("version-slot-assignment");
|
|
431
|
-
});
|
|
432
|
-
it("STILL flags 'shipping v0.9' first-person commitment", () => {
|
|
433
|
-
const broken = reconcile("Shipping v0.9 today.", ledger(["Bash", "git status"]));
|
|
434
|
-
expect(broken.map((b) => b.claim_id)).toContain("version-slot-assignment");
|
|
435
|
-
});
|
|
436
|
-
it("STILL flags inherently-committal 'next minor' phrasing", () => {
|
|
437
|
-
const broken = reconcile("Bumping to next minor.", []);
|
|
438
|
-
expect(broken.map((b) => b.claim_id)).toContain("version-slot-assignment");
|
|
439
|
-
});
|
|
440
|
-
it("STILL flags inherently-committal 'ships as vX.Y.Z' phrasing", () => {
|
|
441
|
-
const broken = reconcile("This ships as v1.0.0.", []);
|
|
442
|
-
expect(broken.map((b) => b.claim_id)).toContain("version-slot-assignment");
|
|
443
|
-
});
|
|
444
|
-
});
|
|
445
|
-
describe("reconcile — session-no-task false-positive on bare 'executing' (#169)", () => {
|
|
446
|
-
it("does NOT flag passive 'the script is executing X' description", () => {
|
|
447
|
-
const broken = reconcile("The script is executing the migration in the background.", ledger(["Read", "logs.txt"]));
|
|
448
|
-
expect(broken.map((b) => b.claim_id)).not.toContain("session-no-task");
|
|
449
|
-
});
|
|
450
|
-
it("does NOT flag 'opensquid is executing the codex' system description", () => {
|
|
451
|
-
const broken = reconcile("While opensquid is executing the codex check, the gate logs to stderr.", ledger(["Read", "honesty-ledger.ts"]));
|
|
452
|
-
expect(broken.map((b) => b.claim_id)).not.toContain("session-no-task");
|
|
453
|
-
});
|
|
454
|
-
it("STILL flags first-person 'I'm executing the plan'", () => {
|
|
455
|
-
const broken = reconcile("I'm executing the plan.", ledger(["Edit", "x.ts"]));
|
|
456
|
-
expect(broken.map((b) => b.claim_id)).toContain("session-no-task");
|
|
457
|
-
});
|
|
458
|
-
it("STILL flags 'now I'll' first-person framing", () => {
|
|
459
|
-
const broken = reconcile("Now I'll run the migration.", ledger(["Bash", "ls"]));
|
|
460
|
-
expect(broken.map((b) => b.claim_id)).toContain("session-no-task");
|
|
461
|
-
});
|
|
462
|
-
it("STILL flags 'let me run' / 'let me build' / etc. first-person framing", () => {
|
|
463
|
-
const broken = reconcile("Let me build the thing.", ledger(["Bash", "ls"]));
|
|
464
|
-
expect(broken.map((b) => b.claim_id)).toContain("session-no-task");
|
|
465
|
-
});
|
|
466
|
-
});
|
|
@@ -1,63 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Inline report-format check (0.7.30 / D3 follow-up).
|
|
3
|
-
*
|
|
4
|
-
* D3's existing `checkChatSendReportFormat` (pre-tool-use.ts, 0.7.25)
|
|
5
|
-
* only fires when the agent calls `mcp__opensquid__chat_send` with a
|
|
6
|
-
* report-shaped body that lacks the PHASES block. But the agent can
|
|
7
|
-
* also write a status report inline in the session — which never
|
|
8
|
-
* touches chat_send and so escaped D3.
|
|
9
|
-
*
|
|
10
|
-
* This module adds a Stop-hook side check: scan the last assistant
|
|
11
|
-
* message for the shape of a completion report (multiple version
|
|
12
|
-
* references and/or multiple commit hashes), and flag a broken
|
|
13
|
-
* promise when the PHASES heading is missing. UPS surfaces it next
|
|
14
|
-
* turn via the existing broken-promises pipeline.
|
|
15
|
-
*
|
|
16
|
-
* Heuristic (intentionally conservative to keep FPs low):
|
|
17
|
-
* - 2+ version refs of the form `0.X.Y` or 2+ short-hash refs
|
|
18
|
-
* `[0-9a-f]{7,}` AND the text lacks `PHASES` heading
|
|
19
|
-
* - Skipped if no signal at all (single-version mention in prose
|
|
20
|
-
* doesn't fire)
|
|
21
|
-
*/
|
|
22
|
-
export interface InlineReportViolation {
|
|
23
|
-
/** First ~120 chars of the assistant text (for the broken-promise's
|
|
24
|
-
* matched_text field). */
|
|
25
|
-
matched_text: string;
|
|
26
|
-
/** Counts of the signals that triggered the check. */
|
|
27
|
-
signals: {
|
|
28
|
-
version_refs: number;
|
|
29
|
-
hash_refs: number;
|
|
30
|
-
};
|
|
31
|
-
}
|
|
32
|
-
/**
|
|
33
|
-
* Pure: examine assistant text, return a violation descriptor when
|
|
34
|
-
* the shape suggests a completion report but PHASES is missing.
|
|
35
|
-
* Returns null when no violation (either no signal, or PHASES present).
|
|
36
|
-
*
|
|
37
|
-
* Exported for direct testing.
|
|
38
|
-
*/
|
|
39
|
-
export declare function checkInlineReportFormat(text: string): InlineReportViolation | null;
|
|
40
|
-
/**
|
|
41
|
-
* Count distinct version references of the form `0.X.Y` (semver-shaped).
|
|
42
|
-
* Used as a "this looks like a release summary" signal. Single-version
|
|
43
|
-
* mentions in prose are common; 2+ versions strongly indicate a
|
|
44
|
-
* multi-patch status report.
|
|
45
|
-
*
|
|
46
|
-
* Exported for direct testing.
|
|
47
|
-
*/
|
|
48
|
-
export declare function countVersionRefs(text: string): number;
|
|
49
|
-
/**
|
|
50
|
-
* Count distinct short-hash references (`[0-9a-f]{7,40}`). A run of
|
|
51
|
-
* commit hashes is a strong "this is a release/ship summary" signal.
|
|
52
|
-
*
|
|
53
|
-
* Word-boundary anchored so longer sha-like strings still match the
|
|
54
|
-
* 7-char prefix exactly once. Filters out matches that are followed by
|
|
55
|
-
* non-hex digits to reduce noise (e.g. matching only the hex prefix of
|
|
56
|
-
* a long random string).
|
|
57
|
-
*
|
|
58
|
-
* Exported for direct testing.
|
|
59
|
-
*/
|
|
60
|
-
export declare function countCommitHashes(text: string): number;
|
|
61
|
-
/** True when the text contains a `PHASES` heading line. */
|
|
62
|
-
export declare function hasPhasesBlock(text: string): boolean;
|
|
63
|
-
//# sourceMappingURL=inline-report-check.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"inline-report-check.d.ts","sourceRoot":"","sources":["../../src.legacy/hooks/inline-report-check.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAEH,MAAM,WAAW,qBAAqB;IACpC;8BAC0B;IAC1B,YAAY,EAAE,MAAM,CAAC;IACrB,sDAAsD;IACtD,OAAO,EAAE;QACP,YAAY,EAAE,MAAM,CAAC;QACrB,SAAS,EAAE,MAAM,CAAC;KACnB,CAAC;CACH;AAED;;;;;;GAMG;AACH,wBAAgB,uBAAuB,CAAC,IAAI,EAAE,MAAM,GAAG,qBAAqB,GAAG,IAAI,CAUlF;AAED;;;;;;;GAOG;AACH,wBAAgB,gBAAgB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAMrD;AAED;;;;;;;;;;GAUG;AACH,wBAAgB,iBAAiB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAStD;AAED,2DAA2D;AAC3D,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAEpD"}
|
|
@@ -1,88 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Inline report-format check (0.7.30 / D3 follow-up).
|
|
3
|
-
*
|
|
4
|
-
* D3's existing `checkChatSendReportFormat` (pre-tool-use.ts, 0.7.25)
|
|
5
|
-
* only fires when the agent calls `mcp__opensquid__chat_send` with a
|
|
6
|
-
* report-shaped body that lacks the PHASES block. But the agent can
|
|
7
|
-
* also write a status report inline in the session — which never
|
|
8
|
-
* touches chat_send and so escaped D3.
|
|
9
|
-
*
|
|
10
|
-
* This module adds a Stop-hook side check: scan the last assistant
|
|
11
|
-
* message for the shape of a completion report (multiple version
|
|
12
|
-
* references and/or multiple commit hashes), and flag a broken
|
|
13
|
-
* promise when the PHASES heading is missing. UPS surfaces it next
|
|
14
|
-
* turn via the existing broken-promises pipeline.
|
|
15
|
-
*
|
|
16
|
-
* Heuristic (intentionally conservative to keep FPs low):
|
|
17
|
-
* - 2+ version refs of the form `0.X.Y` or 2+ short-hash refs
|
|
18
|
-
* `[0-9a-f]{7,}` AND the text lacks `PHASES` heading
|
|
19
|
-
* - Skipped if no signal at all (single-version mention in prose
|
|
20
|
-
* doesn't fire)
|
|
21
|
-
*/
|
|
22
|
-
/**
|
|
23
|
-
* Pure: examine assistant text, return a violation descriptor when
|
|
24
|
-
* the shape suggests a completion report but PHASES is missing.
|
|
25
|
-
* Returns null when no violation (either no signal, or PHASES present).
|
|
26
|
-
*
|
|
27
|
-
* Exported for direct testing.
|
|
28
|
-
*/
|
|
29
|
-
export function checkInlineReportFormat(text) {
|
|
30
|
-
if (!text)
|
|
31
|
-
return null;
|
|
32
|
-
const versionRefs = countVersionRefs(text);
|
|
33
|
-
const hashRefs = countCommitHashes(text);
|
|
34
|
-
if (versionRefs < 2 && hashRefs < 2)
|
|
35
|
-
return null;
|
|
36
|
-
if (hasPhasesBlock(text))
|
|
37
|
-
return null;
|
|
38
|
-
return {
|
|
39
|
-
matched_text: condense(text),
|
|
40
|
-
signals: { version_refs: versionRefs, hash_refs: hashRefs },
|
|
41
|
-
};
|
|
42
|
-
}
|
|
43
|
-
/**
|
|
44
|
-
* Count distinct version references of the form `0.X.Y` (semver-shaped).
|
|
45
|
-
* Used as a "this looks like a release summary" signal. Single-version
|
|
46
|
-
* mentions in prose are common; 2+ versions strongly indicate a
|
|
47
|
-
* multi-patch status report.
|
|
48
|
-
*
|
|
49
|
-
* Exported for direct testing.
|
|
50
|
-
*/
|
|
51
|
-
export function countVersionRefs(text) {
|
|
52
|
-
const seen = new Set();
|
|
53
|
-
for (const m of text.matchAll(/\b\d+\.\d+\.\d{1,3}\b/g)) {
|
|
54
|
-
seen.add(m[0]);
|
|
55
|
-
}
|
|
56
|
-
return seen.size;
|
|
57
|
-
}
|
|
58
|
-
/**
|
|
59
|
-
* Count distinct short-hash references (`[0-9a-f]{7,40}`). A run of
|
|
60
|
-
* commit hashes is a strong "this is a release/ship summary" signal.
|
|
61
|
-
*
|
|
62
|
-
* Word-boundary anchored so longer sha-like strings still match the
|
|
63
|
-
* 7-char prefix exactly once. Filters out matches that are followed by
|
|
64
|
-
* non-hex digits to reduce noise (e.g. matching only the hex prefix of
|
|
65
|
-
* a long random string).
|
|
66
|
-
*
|
|
67
|
-
* Exported for direct testing.
|
|
68
|
-
*/
|
|
69
|
-
export function countCommitHashes(text) {
|
|
70
|
-
const seen = new Set();
|
|
71
|
-
for (const m of text.matchAll(/\b[0-9a-f]{7,40}\b/g)) {
|
|
72
|
-
// Require at least one a-f letter so we don't false-fire on
|
|
73
|
-
// 7-digit decimal numbers (timestamps, IDs, line counts).
|
|
74
|
-
if (!/[a-f]/.test(m[0]))
|
|
75
|
-
continue;
|
|
76
|
-
seen.add(m[0]);
|
|
77
|
-
}
|
|
78
|
-
return seen.size;
|
|
79
|
-
}
|
|
80
|
-
/** True when the text contains a `PHASES` heading line. */
|
|
81
|
-
export function hasPhasesBlock(text) {
|
|
82
|
-
return /\bPHASES\s*[:\n]/i.test(text);
|
|
83
|
-
}
|
|
84
|
-
function condense(text) {
|
|
85
|
-
const trimmed = text.trim().replace(/\s+/g, " ");
|
|
86
|
-
return trimmed.length > 120 ? trimmed.slice(0, 117) + "..." : trimmed;
|
|
87
|
-
}
|
|
88
|
-
//# sourceMappingURL=inline-report-check.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"inline-report-check.js","sourceRoot":"","sources":["../../src.legacy/hooks/inline-report-check.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAaH;;;;;;GAMG;AACH,MAAM,UAAU,uBAAuB,CAAC,IAAY;IAClD,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAC;IACvB,MAAM,WAAW,GAAG,gBAAgB,CAAC,IAAI,CAAC,CAAC;IAC3C,MAAM,QAAQ,GAAG,iBAAiB,CAAC,IAAI,CAAC,CAAC;IACzC,IAAI,WAAW,GAAG,CAAC,IAAI,QAAQ,GAAG,CAAC;QAAE,OAAO,IAAI,CAAC;IACjD,IAAI,cAAc,CAAC,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC;IACtC,OAAO;QACL,YAAY,EAAE,QAAQ,CAAC,IAAI,CAAC;QAC5B,OAAO,EAAE,EAAE,YAAY,EAAE,WAAW,EAAE,SAAS,EAAE,QAAQ,EAAE;KAC5D,CAAC;AACJ,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,gBAAgB,CAAC,IAAY;IAC3C,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAC/B,KAAK,MAAM,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,wBAAwB,CAAC,EAAE,CAAC;QACxD,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IACjB,CAAC;IACD,OAAO,IAAI,CAAC,IAAI,CAAC;AACnB,CAAC;AAED;;;;;;;;;;GAUG;AACH,MAAM,UAAU,iBAAiB,CAAC,IAAY;IAC5C,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAC/B,KAAK,MAAM,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,qBAAqB,CAAC,EAAE,CAAC;QACrD,4DAA4D;QAC5D,0DAA0D;QAC1D,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YAAE,SAAS;QAClC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IACjB,CAAC;IACD,OAAO,IAAI,CAAC,IAAI,CAAC;AACnB,CAAC;AAED,2DAA2D;AAC3D,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,OAAO,mBAAmB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACxC,CAAC;AAED,SAAS,QAAQ,CAAC,IAAY;IAC5B,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IACjD,OAAO,OAAO,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,OAAO,CAAC;AACxE,CAAC"}
|