muonroi-cli 1.5.0 → 1.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/cli/cost-forensics.d.ts +3 -0
- package/dist/src/cli/cost-forensics.js +11 -0
- package/dist/src/cli/cost-forensics.test.js +1 -0
- package/dist/src/cli/experience-report.d.ts +20 -0
- package/dist/src/cli/experience-report.js +76 -0
- package/dist/src/cli/experience-report.test.d.ts +5 -0
- package/dist/src/cli/experience-report.test.js +63 -0
- package/dist/src/ee/artifact-cache.d.ts +56 -0
- package/dist/src/ee/artifact-cache.js +155 -0
- package/dist/src/ee/artifact-cache.test.d.ts +1 -0
- package/dist/src/ee/artifact-cache.test.js +69 -0
- package/dist/src/ee/search.js +7 -5
- package/dist/src/ee/search.test.d.ts +1 -0
- package/dist/src/ee/search.test.js +23 -0
- package/dist/src/generated/version.d.ts +1 -1
- package/dist/src/generated/version.js +1 -1
- package/dist/src/gsd/__tests__/directives.test.js +24 -1
- package/dist/src/gsd/directives.d.ts +22 -0
- package/dist/src/gsd/directives.js +34 -10
- package/dist/src/index.js +9 -0
- package/dist/src/mcp/__tests__/client-pool.spec.js +54 -4
- package/dist/src/mcp/__tests__/forensics-tools.test.js +1 -0
- package/dist/src/mcp/client-pool.d.ts +9 -2
- package/dist/src/mcp/client-pool.js +60 -21
- package/dist/src/orchestrator/compaction.d.ts +2 -0
- package/dist/src/orchestrator/compaction.js +14 -1
- package/dist/src/orchestrator/compaction.test.js +25 -1
- package/dist/src/orchestrator/message-processor.js +49 -7
- package/dist/src/orchestrator/scope-reminder.d.ts +12 -0
- package/dist/src/orchestrator/scope-reminder.js +16 -0
- package/dist/src/orchestrator/scope-reminder.test.js +22 -1
- package/dist/src/orchestrator/session-experience.d.ts +89 -0
- package/dist/src/orchestrator/session-experience.js +169 -0
- package/dist/src/orchestrator/session-experience.test.d.ts +6 -0
- package/dist/src/orchestrator/session-experience.test.js +72 -0
- package/dist/src/orchestrator/stream-runner.js +7 -0
- package/dist/src/orchestrator/subagent-compactor.d.ts +14 -5
- package/dist/src/orchestrator/subagent-compactor.js +30 -8
- package/dist/src/orchestrator/subagent-compactor.spec.js +18 -0
- package/dist/src/pil/__tests__/layer3-ee-injection.test.js +5 -3
- package/dist/src/pil/__tests__/layer3-injected-chunk.test.js +31 -0
- package/dist/src/pil/__tests__/layer6-output.test.js +21 -0
- package/dist/src/pil/__tests__/pipeline.test.js +17 -0
- package/dist/src/pil/__tests__/surface-compaction-artifacts.test.d.ts +1 -0
- package/dist/src/pil/__tests__/surface-compaction-artifacts.test.js +112 -0
- package/dist/src/pil/layer3-ee-injection.d.ts +28 -0
- package/dist/src/pil/layer3-ee-injection.js +125 -4
- package/dist/src/pil/layer4-gsd.js +3 -2
- package/dist/src/pil/layer6-output.js +18 -7
- package/dist/src/pil/pipeline.js +26 -9
- package/dist/src/pil/session-experience-injection.d.ts +34 -0
- package/dist/src/pil/session-experience-injection.js +54 -0
- package/dist/src/pil/session-experience-injection.test.d.ts +6 -0
- package/dist/src/pil/session-experience-injection.test.js +79 -0
- package/dist/src/storage/interaction-log.d.ts +1 -1
- package/dist/src/storage/interaction-log.js +17 -4
- package/dist/src/storage/session-experience-store.d.ts +63 -0
- package/dist/src/storage/session-experience-store.js +164 -0
- package/dist/src/storage/session-experience-store.test.d.ts +5 -0
- package/dist/src/storage/session-experience-store.test.js +86 -0
- package/dist/src/storage/ui-interaction-log.js +4 -2
- package/dist/src/tools/registry-ee-query.test.js +24 -1
- package/dist/src/tools/registry.js +20 -2
- package/dist/src/types/index.d.ts +6 -0
- package/dist/src/ui/app.js +0 -0
- package/package.json +1 -1
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
import { beforeEach, describe, expect, test, vi } from "vitest";
|
|
2
|
+
import { surfaceCompactionArtifacts } from "../layer3-ee-injection.js";
|
|
3
|
+
// Issue #4 — targeted complement to layer3's checkpoint arm on meta turns.
|
|
4
|
+
// layer3 (now run on meta after issue #2) surfaces checkpoints via a FIXED
|
|
5
|
+
// recency query; this arm searches by the meta question (ctx.raw) to surface the
|
|
6
|
+
// elided tool-artifacts relevant to it, and DEFERS when layer3 already injected a
|
|
7
|
+
// checkpoint block. Mock the EE search + the audit log so the test stays offline.
|
|
8
|
+
vi.mock("../../ee/bridge.js", () => ({
|
|
9
|
+
searchByText: vi.fn().mockResolvedValue([]),
|
|
10
|
+
}));
|
|
11
|
+
vi.mock("../../storage/interaction-log.js", () => ({
|
|
12
|
+
logInteraction: vi.fn(),
|
|
13
|
+
}));
|
|
14
|
+
import { searchByText } from "../../ee/bridge.js";
|
|
15
|
+
function makeCtx(overrides = {}) {
|
|
16
|
+
return {
|
|
17
|
+
raw: "compaction cần cải thiện gì trong CLI",
|
|
18
|
+
enriched: "compaction cần cải thiện gì trong CLI",
|
|
19
|
+
taskType: "general",
|
|
20
|
+
domain: null,
|
|
21
|
+
confidence: 0.85,
|
|
22
|
+
outputStyle: "balanced",
|
|
23
|
+
tokenBudget: 2000,
|
|
24
|
+
metrics: null,
|
|
25
|
+
layers: [],
|
|
26
|
+
sessionId: "sess-meta-1",
|
|
27
|
+
...overrides,
|
|
28
|
+
};
|
|
29
|
+
}
|
|
30
|
+
const artifactPoint = {
|
|
31
|
+
id: "art1",
|
|
32
|
+
score: 0.9,
|
|
33
|
+
payload: {
|
|
34
|
+
text: "tool-artifact id=call_7 toolName=read_file elided 4200 chars: src/orchestrator/compaction.ts createCompactionSummaryMessage ...",
|
|
35
|
+
},
|
|
36
|
+
collection: "experience-behavioral",
|
|
37
|
+
};
|
|
38
|
+
const checkpointPoint = {
|
|
39
|
+
id: "cp1",
|
|
40
|
+
score: 0.8,
|
|
41
|
+
payload: { text: "Context checkpoint summary ✔ DONE: extended IMPORTANT_TOOL_NAMES; tests 16/16" },
|
|
42
|
+
collection: "experience-behavioral",
|
|
43
|
+
};
|
|
44
|
+
const genericPoint = {
|
|
45
|
+
id: "gen1",
|
|
46
|
+
score: 0.97,
|
|
47
|
+
payload: { text: "Always run the full test suite before pushing" },
|
|
48
|
+
collection: "experience-behavioral",
|
|
49
|
+
};
|
|
50
|
+
describe("surfaceCompactionArtifacts (issue #4 — meta-turn auto-surface)", () => {
|
|
51
|
+
beforeEach(() => {
|
|
52
|
+
vi.mocked(searchByText).mockReset();
|
|
53
|
+
vi.mocked(searchByText).mockResolvedValue([]);
|
|
54
|
+
});
|
|
55
|
+
test("auto-surfaces [artifact] + checkpoint refs (and the rehydrate instruction) into enriched", async () => {
|
|
56
|
+
// biome-ignore lint/suspicious/noExplicitAny: test fixture shape mirrors EEPoint
|
|
57
|
+
vi.mocked(searchByText).mockResolvedValue([artifactPoint, checkpointPoint]);
|
|
58
|
+
const ctx = makeCtx();
|
|
59
|
+
const out = await surfaceCompactionArtifacts(ctx);
|
|
60
|
+
expect(out.enriched).toContain("[artifact]"); // artifact-typed line
|
|
61
|
+
expect(out.enriched).toContain("ee.query tool"); // how to rehydrate the full output
|
|
62
|
+
expect(out.enriched).toContain("call_7"); // the concrete tool-artifact id the agent can fetch
|
|
63
|
+
const layer = out.layers.find((l) => l.name === "ee-meta-artifacts");
|
|
64
|
+
expect(layer?.applied).toBe(true);
|
|
65
|
+
expect(layer?.delta).toContain("artifacts=2");
|
|
66
|
+
// Searches only the behavioral collection (where tool-artifacts are persisted).
|
|
67
|
+
expect(vi.mocked(searchByText)).toHaveBeenCalledWith(expect.stringContaining("tool-artifact"), ["experience-behavioral"], expect.any(Number), expect.any(Object));
|
|
68
|
+
});
|
|
69
|
+
test("no sessionId → unchanged, no EE call (no prior compaction to rehydrate)", async () => {
|
|
70
|
+
const ctx = makeCtx({ sessionId: undefined });
|
|
71
|
+
const out = await surfaceCompactionArtifacts(ctx);
|
|
72
|
+
expect(out.enriched).toBe(ctx.enriched);
|
|
73
|
+
expect(out.layers.find((l) => l.name === "ee-meta-artifacts")?.delta).toBe("no-session");
|
|
74
|
+
expect(vi.mocked(searchByText)).not.toHaveBeenCalled();
|
|
75
|
+
});
|
|
76
|
+
test("search failure is fail-open + recorded (delta=error=…, enriched unchanged)", async () => {
|
|
77
|
+
vi.mocked(searchByText).mockRejectedValue(new Error("EE down"));
|
|
78
|
+
const ctx = makeCtx();
|
|
79
|
+
const out = await surfaceCompactionArtifacts(ctx);
|
|
80
|
+
expect(out.enriched).toBe(ctx.enriched);
|
|
81
|
+
expect(out.layers.find((l) => l.name === "ee-meta-artifacts")?.delta).toMatch(/^error=/);
|
|
82
|
+
});
|
|
83
|
+
test("generic behavioral hits are filtered out (not mislabelled as artifacts)", async () => {
|
|
84
|
+
// biome-ignore lint/suspicious/noExplicitAny: test fixture shape mirrors EEPoint
|
|
85
|
+
vi.mocked(searchByText).mockResolvedValue([genericPoint]);
|
|
86
|
+
const ctx = makeCtx();
|
|
87
|
+
const out = await surfaceCompactionArtifacts(ctx);
|
|
88
|
+
expect(out.enriched).toBe(ctx.enriched);
|
|
89
|
+
expect(out.layers.find((l) => l.name === "ee-meta-artifacts")?.delta).toBe("no-artifacts");
|
|
90
|
+
});
|
|
91
|
+
test("defers to layer3 — skips with NO EE call when a checkpoint block is already present", async () => {
|
|
92
|
+
// layer3 ran first this turn and injected a checkpoint block (its marker is
|
|
93
|
+
// in enriched). The complement must not duplicate it or pay a 2nd round-trip.
|
|
94
|
+
const enriched = `${makeCtx().raw}\n[task checkpoints …]\n<!-- ee-checkpoint-injected:0123456789abcdef -->`;
|
|
95
|
+
const out = await surfaceCompactionArtifacts(makeCtx({ enriched }));
|
|
96
|
+
expect(out.layers.find((l) => l.name === "ee-meta-artifacts")?.delta).toBe("already-surfaced");
|
|
97
|
+
expect(out.enriched).toBe(enriched); // unchanged
|
|
98
|
+
expect(vi.mocked(searchByText)).not.toHaveBeenCalled();
|
|
99
|
+
});
|
|
100
|
+
test("idempotent — a second pass on its own output defers (marker it wrote is seen)", async () => {
|
|
101
|
+
// biome-ignore lint/suspicious/noExplicitAny: test fixture shape mirrors EEPoint
|
|
102
|
+
vi.mocked(searchByText).mockResolvedValue([artifactPoint]);
|
|
103
|
+
const first = await surfaceCompactionArtifacts(makeCtx());
|
|
104
|
+
expect(first.enriched).toContain("[artifact]");
|
|
105
|
+
expect(vi.mocked(searchByText)).toHaveBeenCalledTimes(1);
|
|
106
|
+
const second = await surfaceCompactionArtifacts(makeCtx({ enriched: first.enriched }));
|
|
107
|
+
expect(second.layers.find((l) => l.name === "ee-meta-artifacts")?.delta).toBe("already-surfaced");
|
|
108
|
+
expect(second.enriched).toBe(first.enriched); // not grown a second time
|
|
109
|
+
expect(vi.mocked(searchByText)).toHaveBeenCalledTimes(1); // no second round-trip
|
|
110
|
+
});
|
|
111
|
+
});
|
|
112
|
+
//# sourceMappingURL=surface-compaction-artifacts.test.js.map
|
|
@@ -15,4 +15,32 @@
|
|
|
15
15
|
* and PIL Layer 3 are active on the same pipeline run.
|
|
16
16
|
*/
|
|
17
17
|
import type { PipelineContext } from "./types.js";
|
|
18
|
+
/**
|
|
19
|
+
* Inline reminder appended to the injected experience block (when rateable
|
|
20
|
+
* principles/behavioral are present) so passively-injected recalls carry a
|
|
21
|
+
* feedback prompt next to their [id:..] handles — the front-loaded
|
|
22
|
+
* native-capabilities instruction can be compacted away on long sessions, and
|
|
23
|
+
* unrated recalls degrade future recall (the recall arm of the EE loop is
|
|
24
|
+
* explicit-feedback-only by design).
|
|
25
|
+
*/
|
|
26
|
+
export declare const RECALL_FEEDBACK_NUDGE = "\u21B3 Acted on one of the above [id:..]? Rate it: ee_feedback(id, followed|ignored|noise). Unrated recalls degrade future recall.";
|
|
18
27
|
export declare function layer3EeInjection(ctx: PipelineContext): Promise<PipelineContext>;
|
|
28
|
+
/**
|
|
29
|
+
* Issue #4 — meta-turn TARGETED complement to Layer 3's checkpoint arm.
|
|
30
|
+
*
|
|
31
|
+
* Since issue #2, Layer 3 now runs on the meta-analysis path too, so its
|
|
32
|
+
* checkpoint arm already surfaces recent checkpoints/artifacts for the agent.
|
|
33
|
+
* That arm uses a FIXED recency query, though — it isn't biased toward the
|
|
34
|
+
* current meta question. This arm fills that gap: it searches by `ctx.raw` so a
|
|
35
|
+
* self-evaluating agent sees the elided tool-artifacts RELEVANT to what it's
|
|
36
|
+
* analyzing, rendered via the same `formatTaskCheckpoints` so the `[artifact]
|
|
37
|
+
* … id=X` refs appear automatically instead of waiting on a manual `ee_query`.
|
|
38
|
+
*
|
|
39
|
+
* Defers to Layer 3: if a checkpoint block was already injected this turn (any
|
|
40
|
+
* `ee-checkpoint-injected` marker present) it skips entirely — no duplicate
|
|
41
|
+
* block and no second EE round-trip. Gated on `sessionId` (no session ⇒ no prior
|
|
42
|
+
* compaction to rehydrate). Strictly additive and fail-open: any error /
|
|
43
|
+
* no-session / no-match / already-surfaced returns ctx with the original
|
|
44
|
+
* `enriched` plus an `ee-meta-artifacts` layer marker for forensics.
|
|
45
|
+
*/
|
|
46
|
+
export declare function surfaceCompactionArtifacts(ctx: PipelineContext): Promise<PipelineContext>;
|
|
@@ -45,6 +45,15 @@ const PIL_PRINCIPLES_FLOOR = Math.max(0, PIL_SCORE_FLOOR - 0.15);
|
|
|
45
45
|
// hitCount threshold for promoting a behavioral point to T1 "proven" reflex.
|
|
46
46
|
// Mirrors the EE evolution promotion rule (3 confirmed hits → T1).
|
|
47
47
|
const T1_HIT_THRESHOLD = 3;
|
|
48
|
+
/**
|
|
49
|
+
* Inline reminder appended to the injected experience block (when rateable
|
|
50
|
+
* principles/behavioral are present) so passively-injected recalls carry a
|
|
51
|
+
* feedback prompt next to their [id:..] handles — the front-loaded
|
|
52
|
+
* native-capabilities instruction can be compacted away on long sessions, and
|
|
53
|
+
* unrated recalls degrade future recall (the recall arm of the EE loop is
|
|
54
|
+
* explicit-feedback-only by design).
|
|
55
|
+
*/
|
|
56
|
+
export const RECALL_FEEDBACK_NUDGE = "↳ Acted on one of the above [id:..]? Rate it: ee_feedback(id, followed|ignored|noise). Unrated recalls degrade future recall.";
|
|
48
57
|
/**
|
|
49
58
|
* Extract all sha16 values from `<!-- bb-context-injected:<sha16> -->` markers
|
|
50
59
|
* already present in the enriched context string.
|
|
@@ -119,7 +128,7 @@ async function queryEeBridge(raw) {
|
|
|
119
128
|
const [principleRaw, behavioralRaw, checkpointRaw] = await Promise.all([
|
|
120
129
|
searchByText(raw, ["experience-principles"], 3, signal),
|
|
121
130
|
searchByText(raw, ["experience-behavioral"], 4, signal),
|
|
122
|
-
searchByText(
|
|
131
|
+
searchByText('Context checkpoint summary OR "compaction checkpoint" recent Progress DONE elided OR tool-artifact OR "tool result id="', ["experience-behavioral"], 3, signal).catch(() => []),
|
|
123
132
|
]);
|
|
124
133
|
const principlePoints = principleRaw.filter((p) => (p.score ?? 0) >= PIL_PRINCIPLES_FLOOR);
|
|
125
134
|
const behavioralPoints = behavioralRaw.filter((p) => (p.score ?? 0) >= PIL_SCORE_FLOOR);
|
|
@@ -161,14 +170,16 @@ function formatExperienceHints(points) {
|
|
|
161
170
|
function formatTaskCheckpoints(points) {
|
|
162
171
|
if (points.length === 0)
|
|
163
172
|
return "";
|
|
164
|
-
const lines = points
|
|
173
|
+
const lines = points
|
|
174
|
+
.map((p) => {
|
|
165
175
|
const t = extractPointText(p);
|
|
166
176
|
// Idea 4: surface tool-artifact refs so agent sees "elided high-value, query for full"
|
|
167
177
|
if (/tool-artifact|tool result id=|elided.*id=/.test(t.toLowerCase())) {
|
|
168
178
|
return `- [artifact] ${t.slice(0, 160)} [id:${p.id}]`;
|
|
169
179
|
}
|
|
170
180
|
return `- ${t.slice(0, 180)} [id:${p.id}]`;
|
|
171
|
-
})
|
|
181
|
+
})
|
|
182
|
+
.filter((l) => l !== "- ");
|
|
172
183
|
if (lines.length === 0)
|
|
173
184
|
return "";
|
|
174
185
|
return `[task checkpoints — prior compactions: use to answer "task finished?", "compacted yet?". Artifacts: use ee.query tool with "tool-artifact id=XXX" for full elided tool output.] \n${lines.join("\n")}`;
|
|
@@ -282,17 +293,27 @@ export async function layer3EeInjection(ctx) {
|
|
|
282
293
|
const text = extractPointText(p);
|
|
283
294
|
return text.length === 0 || !checkpointMarkerShas.has(payloadSha16(text));
|
|
284
295
|
})
|
|
285
|
-
:
|
|
296
|
+
: result.checkpointPoints || [];
|
|
286
297
|
const allPoints = [...deduplicatedPrinciples, ...deduplicatedBehavioral, ...deduplicatedCheckpoints];
|
|
287
298
|
// STALE-01: Register injected point IDs for prompt-stale reconciliation.
|
|
288
299
|
updateLastSurfacedState(allPoints.map((p) => String(p.id)));
|
|
289
300
|
// CQ-16b: Emit experience_injected StreamChunk so TUI can show collapsible block.
|
|
301
|
+
// Carry per-point {id, title, tier} so the TUI can show WHAT was injected, not
|
|
302
|
+
// just how many (the data already exists here; previously only the count + ids
|
|
303
|
+
// reached the client and the title was never serialized).
|
|
304
|
+
const pointTitle = (p) => (extractPointText(p).split("\n")[0] ?? "").replace(/\s+/g, " ").trim().slice(0, 100);
|
|
305
|
+
const injectedPoints = [
|
|
306
|
+
...deduplicatedPrinciples.map((p) => ({ id: String(p.id), title: pointTitle(p), tier: "principle" })),
|
|
307
|
+
...deduplicatedBehavioral.map((p) => ({ id: String(p.id), title: pointTitle(p), tier: "behavioral" })),
|
|
308
|
+
...deduplicatedCheckpoints.map((p) => ({ id: String(p.id), title: pointTitle(p), tier: "checkpoint" })),
|
|
309
|
+
];
|
|
290
310
|
try {
|
|
291
311
|
const injectedChunk = {
|
|
292
312
|
type: "experience_injected",
|
|
293
313
|
experienceInjected: {
|
|
294
314
|
pointCount: totalPoints + deduplicatedCheckpoints.length,
|
|
295
315
|
pointIds: allPoints.map((p) => String(p.id)),
|
|
316
|
+
points: injectedPoints,
|
|
296
317
|
scoreFloor: PIL_SCORE_FLOOR,
|
|
297
318
|
taskType: ctx.taskType ?? undefined,
|
|
298
319
|
domain: ctx.domain ?? undefined,
|
|
@@ -322,6 +343,16 @@ export async function layer3EeInjection(ctx) {
|
|
|
322
343
|
// Idea 5: raised from 0.08 to 0.12 for higher fidelity on critical progress + artifact refs.
|
|
323
344
|
parts.push(truncateToBudget(cpText + "\n" + marker, Math.floor(ctx.tokenBudget * 0.12)));
|
|
324
345
|
}
|
|
346
|
+
// Close the recall feedback loop at the injection site: passively-injected
|
|
347
|
+
// experience (the agent did not ee_query for it) otherwise carries no feedback
|
|
348
|
+
// prompt, so it goes unrated and EE cannot learn if the injection was gold or
|
|
349
|
+
// noise. The front-loaded native-capabilities instruction can be compacted away
|
|
350
|
+
// on long sessions; this nudge rides next to the [id:..] handles it refers to.
|
|
351
|
+
// Gated on rateable experience (principles/behavioral) — checkpoints are task
|
|
352
|
+
// artifacts, not recall verdicts.
|
|
353
|
+
if (deduplicatedPrinciples.length + deduplicatedBehavioral.length > 0) {
|
|
354
|
+
parts.push(RECALL_FEEDBACK_NUDGE);
|
|
355
|
+
}
|
|
325
356
|
const injected = parts.join("\n");
|
|
326
357
|
try {
|
|
327
358
|
if (ctx.sessionId) {
|
|
@@ -359,4 +390,94 @@ export async function layer3EeInjection(ctx) {
|
|
|
359
390
|
],
|
|
360
391
|
};
|
|
361
392
|
}
|
|
393
|
+
/**
|
|
394
|
+
* Records whose text actually reads like a compaction checkpoint or an elided
|
|
395
|
+
* tool-artifact. Used to keep generic behavioral hits from being mislabelled as
|
|
396
|
+
* `[artifact]`/checkpoint lines when we search by the meta question (ctx.raw)
|
|
397
|
+
* rather than the fixed checkpoint-arm query.
|
|
398
|
+
*/
|
|
399
|
+
const CHECKPOINT_LIKE_RE = /context checkpoint summary|compaction checkpoint|tool-artifact|tool result id=|elided|progress[^a-z]*done|✔/i;
|
|
400
|
+
/**
|
|
401
|
+
* Issue #4 — meta-turn TARGETED complement to Layer 3's checkpoint arm.
|
|
402
|
+
*
|
|
403
|
+
* Since issue #2, Layer 3 now runs on the meta-analysis path too, so its
|
|
404
|
+
* checkpoint arm already surfaces recent checkpoints/artifacts for the agent.
|
|
405
|
+
* That arm uses a FIXED recency query, though — it isn't biased toward the
|
|
406
|
+
* current meta question. This arm fills that gap: it searches by `ctx.raw` so a
|
|
407
|
+
* self-evaluating agent sees the elided tool-artifacts RELEVANT to what it's
|
|
408
|
+
* analyzing, rendered via the same `formatTaskCheckpoints` so the `[artifact]
|
|
409
|
+
* … id=X` refs appear automatically instead of waiting on a manual `ee_query`.
|
|
410
|
+
*
|
|
411
|
+
* Defers to Layer 3: if a checkpoint block was already injected this turn (any
|
|
412
|
+
* `ee-checkpoint-injected` marker present) it skips entirely — no duplicate
|
|
413
|
+
* block and no second EE round-trip. Gated on `sessionId` (no session ⇒ no prior
|
|
414
|
+
* compaction to rehydrate). Strictly additive and fail-open: any error /
|
|
415
|
+
* no-session / no-match / already-surfaced returns ctx with the original
|
|
416
|
+
* `enriched` plus an `ee-meta-artifacts` layer marker for forensics.
|
|
417
|
+
*/
|
|
418
|
+
export async function surfaceCompactionArtifacts(ctx) {
|
|
419
|
+
const markLayer = (applied, delta) => ({
|
|
420
|
+
...ctx,
|
|
421
|
+
layers: [...ctx.layers, { name: "ee-meta-artifacts", applied, delta }],
|
|
422
|
+
});
|
|
423
|
+
if (!ctx.sessionId)
|
|
424
|
+
return markLayer(false, "no-session");
|
|
425
|
+
// Defer to Layer 3: a checkpoint/artifact block is already present this turn,
|
|
426
|
+
// so don't duplicate it or pay a second EE round-trip. This arm only fills the
|
|
427
|
+
// gap when Layer 3's fixed-query checkpoint arm surfaced nothing.
|
|
428
|
+
if (extractCheckpointMarkerShas(ctx.enriched).size > 0)
|
|
429
|
+
return markLayer(false, "already-surfaced");
|
|
430
|
+
let points = [];
|
|
431
|
+
try {
|
|
432
|
+
const signal = AbortSignal.timeout(PIL_SEARCH_TIMEOUT_MS);
|
|
433
|
+
// Bias toward records relevant to THIS meta question (ctx.raw) while pulling
|
|
434
|
+
// in checkpoint/artifact vocabulary so the single cheap arm lands on the
|
|
435
|
+
// compaction records rather than generic behavioral patterns.
|
|
436
|
+
const query = `${ctx.raw}\nContext checkpoint summary tool-artifact "tool result id=" elided Progress DONE`;
|
|
437
|
+
const raw = await searchByText(query, ["experience-behavioral"], 5, signal);
|
|
438
|
+
points = raw
|
|
439
|
+
.filter((p) => (p.score ?? 0) >= PIL_SCORE_FLOOR * 0.7)
|
|
440
|
+
.filter((p) => CHECKPOINT_LIKE_RE.test(extractPointText(p)));
|
|
441
|
+
}
|
|
442
|
+
catch (err) {
|
|
443
|
+
logEeFailure("pil.meta.surfaceCompactionArtifacts", classifyEeError(err), err, { budgetMs: PIL_SEARCH_TIMEOUT_MS });
|
|
444
|
+
return markLayer(false, `error=${String(err)}`);
|
|
445
|
+
}
|
|
446
|
+
if (points.length === 0)
|
|
447
|
+
return markLayer(false, "no-artifacts");
|
|
448
|
+
const cpText = formatTaskCheckpoints(points);
|
|
449
|
+
if (!cpText)
|
|
450
|
+
return markLayer(false, "no-artifacts");
|
|
451
|
+
// Append the marker AFTER truncation so it always survives into `enriched`
|
|
452
|
+
// — that marker is what makes the defer-check above fire on any later pass.
|
|
453
|
+
const blockSha = payloadSha16(cpText);
|
|
454
|
+
const body = truncateToBudget(cpText, Math.floor(ctx.tokenBudget * 0.12));
|
|
455
|
+
const block = `${body}\n<!-- ee-checkpoint-injected:${blockSha} -->`;
|
|
456
|
+
try {
|
|
457
|
+
if (ctx.sessionId) {
|
|
458
|
+
logInteraction(ctx.sessionId, "ee_injection", {
|
|
459
|
+
eventSubtype: "injected",
|
|
460
|
+
data: {
|
|
461
|
+
phase: "pil_meta_artifacts",
|
|
462
|
+
role: "knowledge_retriever",
|
|
463
|
+
checkpointCount: points.length,
|
|
464
|
+
pointIds: points.map((p) => String(p.id)),
|
|
465
|
+
injectedChars: block.length,
|
|
466
|
+
},
|
|
467
|
+
});
|
|
468
|
+
}
|
|
469
|
+
}
|
|
470
|
+
catch (err) {
|
|
471
|
+
// No silent catch: surfacing succeeded; only the audit write failed.
|
|
472
|
+
console.error(`[pil.meta.surfaceCompactionArtifacts] interaction log failed: ${err?.message}`);
|
|
473
|
+
}
|
|
474
|
+
return {
|
|
475
|
+
...ctx,
|
|
476
|
+
enriched: `${ctx.enriched}\n${block}`,
|
|
477
|
+
layers: [
|
|
478
|
+
...ctx.layers,
|
|
479
|
+
{ name: "ee-meta-artifacts", applied: true, delta: `artifacts=${points.length} chars=${block.length}` },
|
|
480
|
+
],
|
|
481
|
+
};
|
|
482
|
+
}
|
|
362
483
|
//# sourceMappingURL=layer3-ee-injection.js.map
|
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
*/
|
|
19
19
|
import { routeTask } from "../ee/bridge.js";
|
|
20
20
|
import { scoreComplexity } from "../gsd/complexity.js";
|
|
21
|
-
import { buildDirective } from "../gsd/directives.js";
|
|
21
|
+
import { buildDirective, mentionsEcosystemScope } from "../gsd/directives.js";
|
|
22
22
|
import { detectGrayAreas } from "../gsd/gray-areas.js";
|
|
23
23
|
import { detectGsdPhase } from "../gsd/types.js";
|
|
24
24
|
import { classifyEeError, logEeFailure } from "../utils/ee-logger.js";
|
|
@@ -96,7 +96,8 @@ export async function layer4Gsd(ctx) {
|
|
|
96
96
|
: isMetaAnalysisPrompt(ctx.raw) ||
|
|
97
97
|
(ctx.taskType === "general" && ctx.intentKind === "task") ||
|
|
98
98
|
(isQuestionLike(ctx.raw) && !isImplementationIntent(ctx.raw));
|
|
99
|
-
const
|
|
99
|
+
const ecosystem = mentionsEcosystemScope(ctx.raw);
|
|
100
|
+
const directive = buildDirective({ complexity, phase, grayAreas, informational, ecosystem });
|
|
100
101
|
const budgetChars = Math.floor(ctx.tokenBudget * DIRECTIVE_BUDGET_FRACTION);
|
|
101
102
|
const trimmed = truncateToBudget(directive.text, budgetChars);
|
|
102
103
|
return {
|
|
@@ -258,12 +258,16 @@ export function applyPilSuffix(systemPrompt, ctx, responseToolsActive = false) {
|
|
|
258
258
|
* OUTPUT RULES (graceful — exactly what code-heavy tasks already do), so a false
|
|
259
259
|
* positive on an analysis turn only forgoes structured JSON, never breaks output.
|
|
260
260
|
*
|
|
261
|
-
* High-signal verbs only (implement/edit/wire/rewrite/rename/scaffold/refactor
|
|
262
|
-
* "make the change", "apply the fix/patch", VI equivalents
|
|
263
|
-
* are excluded — too common in analysis ("explain
|
|
264
|
-
* analyze/plan/debug-investigation turns keep their
|
|
261
|
+
* High-signal verbs only (implement/edit/wire/rewrite/rename/scaffold/refactor/
|
|
262
|
+
* improve, "make the change", "apply the fix/patch", VI equivalents incl. "cải
|
|
263
|
+
* thiện"). Bare "fix"/"replace" are excluded — too common in analysis ("explain
|
|
264
|
+
* the fix") — so pure analyze/plan/debug-investigation turns keep their
|
|
265
|
+
* structured output. "improve(ment)" / "cải thiện" added after session
|
|
266
|
+
* 2b7a10219499: "lên plan rồi improvement … cải thiện Compaction" was an
|
|
267
|
+
* implement turn the model mis-classified as a `report`, so a terminal
|
|
268
|
+
* respond_plan ended it on a plan (edits done but uncommitted/unreported).
|
|
265
269
|
*/
|
|
266
|
-
const IMPLEMENTATION_INTENT_RE = /\b(implement|edit|wire(?:\s+up)?|rewrite|rename|scaffold|refactor)\b|\bmake\s+(the\s+)?(change|edit|modification)s?\b|\bapply\s+(the\s+)?(fix|change|patch|edit|diff)\b|(?:^|\s)(triển\s*khai|trien\s*khai|chỉnh\s*sửa|chinh\s*sua|viết\s*lại|viet\s*lai|đổi\s*tên|doi\s*ten)\b/i;
|
|
270
|
+
const IMPLEMENTATION_INTENT_RE = /\b(implement|edit|wire(?:\s+up)?|rewrite|rename|scaffold|refactor)\b|\bimprove(?:ment)?\b|\bmake\s+(the\s+)?(change|edit|modification)s?\b|\bapply\s+(the\s+)?(fix|change|patch|edit|diff)\b|(?:^|\s)(triển\s*khai|trien\s*khai|chỉnh\s*sửa|chinh\s*sua|viết\s*lại|viet\s*lai|đổi\s*tên|doi\s*ten|cải\s*thiện|cai\s*thien)\b/i;
|
|
267
271
|
export function isImplementationIntent(raw) {
|
|
268
272
|
return !!raw && IMPLEMENTATION_INTENT_RE.test(raw);
|
|
269
273
|
}
|
|
@@ -340,6 +344,15 @@ export function getResponseToolSet(ctx, providerId) {
|
|
|
340
344
|
// - report → keep the structured tool (its value IS the structure).
|
|
341
345
|
// Only when the model didn't emit a deliverable (null → legacy cascade / model
|
|
342
346
|
// omitted the word) do we fall back to the legacy regex predicates.
|
|
347
|
+
// Implementation intent ALWAYS suppresses a terminal respond_* — checked
|
|
348
|
+
// BEFORE the deliverable branch so a mis-classified `report` can't bypass it
|
|
349
|
+
// (session 2b7a10219499: a "plan rồi improvement" implement turn got
|
|
350
|
+
// deliverable=report → the report-exception below kept respond_plan → the
|
|
351
|
+
// model stated a plan and ended the turn with edits done but uncommitted).
|
|
352
|
+
// A respond_* tool lets the model "answer" and stop before edits land, so any
|
|
353
|
+
// implement turn must fall through to the markdown OUTPUT RULES instead.
|
|
354
|
+
if (isImplementationIntent(ctx.raw))
|
|
355
|
+
return {};
|
|
343
356
|
if (ctx.deliverableKind) {
|
|
344
357
|
if (ctx.deliverableKind === "code")
|
|
345
358
|
return {};
|
|
@@ -347,8 +360,6 @@ export function getResponseToolSet(ctx, providerId) {
|
|
|
347
360
|
return {};
|
|
348
361
|
}
|
|
349
362
|
else {
|
|
350
|
-
if (isImplementationIntent(ctx.raw))
|
|
351
|
-
return {};
|
|
352
363
|
if (ctx.taskType !== "general" && !prefersStructuredReport(ctx.raw))
|
|
353
364
|
return {};
|
|
354
365
|
}
|
package/dist/src/pil/pipeline.js
CHANGED
|
@@ -22,11 +22,12 @@ import { isDiscoveryEnabled } from "./config.js";
|
|
|
22
22
|
import { scoreComplexitySize } from "./layer1_5-complexity-size.js";
|
|
23
23
|
import { layer1Intent } from "./layer1-intent.js";
|
|
24
24
|
import { layer2Personality } from "./layer2-personality.js";
|
|
25
|
-
import { layer3EeInjection } from "./layer3-ee-injection.js";
|
|
25
|
+
import { layer3EeInjection, surfaceCompactionArtifacts } from "./layer3-ee-injection.js";
|
|
26
26
|
import { layer4Gsd } from "./layer4-gsd.js";
|
|
27
27
|
import { layer5Context } from "./layer5-context.js";
|
|
28
28
|
import { isMetaAnalysisPrompt, layer6Output } from "./layer6-output.js";
|
|
29
29
|
import { PipelineContextSchema } from "./schema.js";
|
|
30
|
+
import { injectSessionExperience, isSelfExperiencePrompt } from "./session-experience-injection.js";
|
|
30
31
|
import { bumpSessionTurn } from "./session-state.js";
|
|
31
32
|
import { setPilLastResult } from "./store.js";
|
|
32
33
|
import { resolveAfter } from "./timeout.js";
|
|
@@ -144,15 +145,21 @@ async function runLayers(ctx, options) {
|
|
|
144
145
|
}
|
|
145
146
|
if (ctx.taskType !== null) {
|
|
146
147
|
await timed("layer2-personality", layer2Personality);
|
|
148
|
+
// Issue #2: meta-analysis turns used to skip layer3 (EE recall) + layer5
|
|
149
|
+
// (context) to cut overhead — but that starved exactly the self-evaluation
|
|
150
|
+
// turns where behavioral/principle recall matters most. Run the full
|
|
151
|
+
// sequence for every taskType-bearing turn now. In the live (interactive)
|
|
152
|
+
// path there is no pipeline timeout (see runPipeline), and each EE layer is
|
|
153
|
+
// internally timeout-bounded, so meta turns just carry the same EE budget as
|
|
154
|
+
// a normal turn.
|
|
155
|
+
await timed("layer3-ee-injection", layer3EeInjection);
|
|
156
|
+
await timed("layer4-gsd-structuring", layer4Gsd);
|
|
157
|
+
await timed("layer5-context-enrichment", layer5Context);
|
|
147
158
|
if (isMetaAnalysisPrompt(ctx.raw)) {
|
|
148
|
-
//
|
|
149
|
-
// to
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
else {
|
|
153
|
-
await timed("layer3-ee-injection", layer3EeInjection);
|
|
154
|
-
await timed("layer4-gsd-structuring", layer4Gsd);
|
|
155
|
-
await timed("layer5-context-enrichment", layer5Context);
|
|
159
|
+
// Issue #4 (targeted complement): surface the elided tool-artifacts
|
|
160
|
+
// RELEVANT to this meta question. Defers to layer3 — it only fires when
|
|
161
|
+
// layer3's fixed-query checkpoint arm surfaced no checkpoint block.
|
|
162
|
+
await timed("ee-meta-artifacts", surfaceCompactionArtifacts);
|
|
156
163
|
}
|
|
157
164
|
}
|
|
158
165
|
else {
|
|
@@ -171,6 +178,16 @@ async function runLayers(ctx, options) {
|
|
|
171
178
|
],
|
|
172
179
|
};
|
|
173
180
|
}
|
|
181
|
+
// Felt-experience routing: a first-person "cảm nhận trong CLI / are you
|
|
182
|
+
// blind?" question gets the live session-experience snapshot so the agent
|
|
183
|
+
// answers from what actually happened to it this session — not by reading the
|
|
184
|
+
// compaction/PIL source. Runs REGARDLESS of taskType: such questions often
|
|
185
|
+
// classify to null (not a coding task), and gating it behind the taskType
|
|
186
|
+
// branch silently skipped it on exactly those prompts. Narrow (gated on
|
|
187
|
+
// isSelfExperiencePrompt) so non-experience turns add no layer.
|
|
188
|
+
if (isSelfExperiencePrompt(ctx.raw)) {
|
|
189
|
+
await timed("session-experience", async (c) => injectSessionExperience(c));
|
|
190
|
+
}
|
|
174
191
|
await timed("layer6-output", layer6Output);
|
|
175
192
|
const suffixCharsMatch = ctx.layers.find((l) => l.name === "output-optimization")?.delta?.match(/chars=(\d+)/);
|
|
176
193
|
const suffixChars = suffixCharsMatch ? parseInt(suffixCharsMatch[1], 10) : 0;
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* src/pil/session-experience-injection.ts
|
|
3
|
+
*
|
|
4
|
+
* Felt-experience routing. When the user asks how the agent is *doing* inside
|
|
5
|
+
* this CLI session — "cảm nhận trong CLI", "bạn có bị mù context không",
|
|
6
|
+
* "how do you feel working in here", "did you struggle" — the agent should
|
|
7
|
+
* answer from what ACTUALLY happened to it this session, not by reading the
|
|
8
|
+
* compaction/PIL source and theorizing about mechanisms (the backwards behaviour
|
|
9
|
+
* in session ce816796a57d: no compaction fired, no ee_query call, no blindness —
|
|
10
|
+
* yet the agent answered by grepping the anti-mù code).
|
|
11
|
+
*
|
|
12
|
+
* This step injects the live session-experience snapshot into the enriched
|
|
13
|
+
* prompt and tells the agent to ground its answer in that data. It is narrow on
|
|
14
|
+
* purpose: a generic "đánh giá / cải thiện CLI" evaluation still goes the
|
|
15
|
+
* code-reading route — only first-person *experience* questions get the snapshot.
|
|
16
|
+
*
|
|
17
|
+
* Pure, synchronous, additive, fail-open: records a `session-experience` layer
|
|
18
|
+
* marker either way for forensics.
|
|
19
|
+
*/
|
|
20
|
+
import type { PipelineContext } from "./types.js";
|
|
21
|
+
/**
|
|
22
|
+
* Narrow detector for "how do you (the agent) feel / are you blind / did you
|
|
23
|
+
* struggle in this session" questions. Deliberately keyed on introspective
|
|
24
|
+
* vocabulary (feeling / experience / blind / struggle) rather than the broad
|
|
25
|
+
* meta-analysis regex, so plain "evaluate the CLI" prompts are NOT captured.
|
|
26
|
+
*/
|
|
27
|
+
export declare const SELF_EXPERIENCE_RE: RegExp;
|
|
28
|
+
export declare function isSelfExperiencePrompt(raw: string): boolean;
|
|
29
|
+
/**
|
|
30
|
+
* Append the live session-experience snapshot when the prompt is a first-person
|
|
31
|
+
* experience question. No-op (but marker-recorded) otherwise, and idempotent if
|
|
32
|
+
* the snapshot is already present.
|
|
33
|
+
*/
|
|
34
|
+
export declare function injectSessionExperience(ctx: PipelineContext): PipelineContext;
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* src/pil/session-experience-injection.ts
|
|
3
|
+
*
|
|
4
|
+
* Felt-experience routing. When the user asks how the agent is *doing* inside
|
|
5
|
+
* this CLI session — "cảm nhận trong CLI", "bạn có bị mù context không",
|
|
6
|
+
* "how do you feel working in here", "did you struggle" — the agent should
|
|
7
|
+
* answer from what ACTUALLY happened to it this session, not by reading the
|
|
8
|
+
* compaction/PIL source and theorizing about mechanisms (the backwards behaviour
|
|
9
|
+
* in session ce816796a57d: no compaction fired, no ee_query call, no blindness —
|
|
10
|
+
* yet the agent answered by grepping the anti-mù code).
|
|
11
|
+
*
|
|
12
|
+
* This step injects the live session-experience snapshot into the enriched
|
|
13
|
+
* prompt and tells the agent to ground its answer in that data. It is narrow on
|
|
14
|
+
* purpose: a generic "đánh giá / cải thiện CLI" evaluation still goes the
|
|
15
|
+
* code-reading route — only first-person *experience* questions get the snapshot.
|
|
16
|
+
*
|
|
17
|
+
* Pure, synchronous, additive, fail-open: records a `session-experience` layer
|
|
18
|
+
* marker either way for forensics.
|
|
19
|
+
*/
|
|
20
|
+
import { formatSessionExperience } from "../orchestrator/session-experience.js";
|
|
21
|
+
/**
|
|
22
|
+
* Narrow detector for "how do you (the agent) feel / are you blind / did you
|
|
23
|
+
* struggle in this session" questions. Deliberately keyed on introspective
|
|
24
|
+
* vocabulary (feeling / experience / blind / struggle) rather than the broad
|
|
25
|
+
* meta-analysis regex, so plain "evaluate the CLI" prompts are NOT captured.
|
|
26
|
+
*/
|
|
27
|
+
export const SELF_EXPERIENCE_RE = /cảm nhận|cảm thấy|cảm giác|trải nghiệm|(bị\s*)?mù\s*context|bị\s*mù|how (do|does) (you|it) feel|how are you (doing|feeling)|your (own |felt )?experience|are you (feeling\s+)?blind|do you feel blind|did you (struggle|have (a |any )?(trouble|difficulty|hard time|problem))|có (gặp\s+)?khó khăn|gặp (vấn đề|khó khăn)/i;
|
|
28
|
+
export function isSelfExperiencePrompt(raw) {
|
|
29
|
+
return typeof raw === "string" && SELF_EXPERIENCE_RE.test(raw);
|
|
30
|
+
}
|
|
31
|
+
const MARKER = "[session experience —";
|
|
32
|
+
/**
|
|
33
|
+
* Append the live session-experience snapshot when the prompt is a first-person
|
|
34
|
+
* experience question. No-op (but marker-recorded) otherwise, and idempotent if
|
|
35
|
+
* the snapshot is already present.
|
|
36
|
+
*/
|
|
37
|
+
export function injectSessionExperience(ctx) {
|
|
38
|
+
const mark = (applied, delta) => ({
|
|
39
|
+
...ctx,
|
|
40
|
+
layers: [...ctx.layers, { name: "session-experience", applied, delta }],
|
|
41
|
+
});
|
|
42
|
+
if (!isSelfExperiencePrompt(ctx.raw))
|
|
43
|
+
return mark(false, "not-self-experience");
|
|
44
|
+
if (ctx.enriched.includes(MARKER))
|
|
45
|
+
return mark(false, "already-injected");
|
|
46
|
+
const snapshot = formatSessionExperience();
|
|
47
|
+
const block = `\n${snapshot}`;
|
|
48
|
+
return {
|
|
49
|
+
...ctx,
|
|
50
|
+
enriched: `${ctx.enriched}${block}`,
|
|
51
|
+
layers: [...ctx.layers, { name: "session-experience", applied: true, delta: `chars=${block.length}` }],
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
//# sourceMappingURL=session-experience-injection.js.map
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Felt-experience routing: a "cảm nhận trong CLI" / "are you blind?" question
|
|
3
|
+
* gets the live session-experience snapshot injected so the agent answers from
|
|
4
|
+
* lived data, while a plain "evaluate the CLI" prompt does NOT.
|
|
5
|
+
*/
|
|
6
|
+
import { afterEach, describe, expect, it } from "vitest";
|
|
7
|
+
import { __resetSessionExperienceForTests, recordCompaction, recordElision, } from "../orchestrator/session-experience.js";
|
|
8
|
+
import { injectSessionExperience, isSelfExperiencePrompt } from "./session-experience-injection.js";
|
|
9
|
+
function baseCtx(raw) {
|
|
10
|
+
return {
|
|
11
|
+
raw,
|
|
12
|
+
enriched: raw,
|
|
13
|
+
taskType: "analyze",
|
|
14
|
+
domain: null,
|
|
15
|
+
confidence: 1,
|
|
16
|
+
outputStyle: null,
|
|
17
|
+
tokenBudget: 8000,
|
|
18
|
+
metrics: null,
|
|
19
|
+
layers: [],
|
|
20
|
+
};
|
|
21
|
+
}
|
|
22
|
+
describe("isSelfExperiencePrompt", () => {
|
|
23
|
+
it("matches first-person experience / blindness / struggle questions (VI + EN)", () => {
|
|
24
|
+
for (const p of [
|
|
25
|
+
"cảm nhận trong cli thế nào",
|
|
26
|
+
"bạn có bị mù context hay cảm thấy có vấn đề gì khi làm việc trong các turn này không",
|
|
27
|
+
"how do you feel working in this session",
|
|
28
|
+
"did you struggle with anything this session",
|
|
29
|
+
"are you blind to earlier context?",
|
|
30
|
+
"bạn có gặp khó khăn gì không",
|
|
31
|
+
]) {
|
|
32
|
+
expect(isSelfExperiencePrompt(p)).toBe(true);
|
|
33
|
+
}
|
|
34
|
+
});
|
|
35
|
+
it("does NOT match plain evaluate/improve-the-CLI prompts", () => {
|
|
36
|
+
for (const p of [
|
|
37
|
+
"đánh giá agent bên trong cli và đề xuất cải thiện",
|
|
38
|
+
"phân tích pipeline PIL",
|
|
39
|
+
"improve the compaction subsystem",
|
|
40
|
+
"review the council code",
|
|
41
|
+
]) {
|
|
42
|
+
expect(isSelfExperiencePrompt(p)).toBe(false);
|
|
43
|
+
}
|
|
44
|
+
});
|
|
45
|
+
});
|
|
46
|
+
describe("injectSessionExperience", () => {
|
|
47
|
+
afterEach(() => __resetSessionExperienceForTests());
|
|
48
|
+
it("injects an intact-context snapshot on a fresh session and steers away from source-reading", () => {
|
|
49
|
+
const out = injectSessionExperience(baseCtx("bạn có bị mù context không trong session này"));
|
|
50
|
+
expect(out.enriched).toContain("[session experience —");
|
|
51
|
+
expect(out.enriched).toContain("context is intact this session");
|
|
52
|
+
expect(out.enriched).toMatch(/not by reading the CLI source/i);
|
|
53
|
+
expect(out.layers.at(-1)).toMatchObject({ name: "session-experience", applied: true });
|
|
54
|
+
});
|
|
55
|
+
it("reflects real counters when the session actually compacted/elided", () => {
|
|
56
|
+
recordCompaction(5);
|
|
57
|
+
recordElision("call_z", "read_file", 7000, 5);
|
|
58
|
+
const out = injectSessionExperience(baseCtx("cảm nhận của bạn trong cli ra sao"));
|
|
59
|
+
expect(out.enriched).toContain("fired 1x");
|
|
60
|
+
expect(out.enriched).toContain("Tool outputs elided: 1");
|
|
61
|
+
});
|
|
62
|
+
it("is a marker-recorded no-op for non-experience prompts", () => {
|
|
63
|
+
const out = injectSessionExperience(baseCtx("đánh giá tổng thể CLI và cải thiện"));
|
|
64
|
+
expect(out.enriched).not.toContain("[session experience —");
|
|
65
|
+
expect(out.layers.at(-1)).toMatchObject({
|
|
66
|
+
name: "session-experience",
|
|
67
|
+
applied: false,
|
|
68
|
+
delta: "not-self-experience",
|
|
69
|
+
});
|
|
70
|
+
});
|
|
71
|
+
it("is idempotent — does not double-inject", () => {
|
|
72
|
+
const once = injectSessionExperience(baseCtx("cảm nhận trong cli"));
|
|
73
|
+
const twice = injectSessionExperience(once);
|
|
74
|
+
const occurrences = twice.enriched.split("[session experience —").length - 1;
|
|
75
|
+
expect(occurrences).toBe(1);
|
|
76
|
+
expect(twice.layers.at(-1)).toMatchObject({ applied: false, delta: "already-injected" });
|
|
77
|
+
});
|
|
78
|
+
});
|
|
79
|
+
//# sourceMappingURL=session-experience-injection.test.js.map
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
* Fire-and-forget interaction logging for detailed user-agent event tracking.
|
|
5
5
|
* All calls are fail-open — logging never breaks the main flow.
|
|
6
6
|
*/
|
|
7
|
-
export type InteractionEventType = "user_message" | "agent_response" | "tool_call" | "tool_result" | "compaction" | "routing" | "pil" | "error" | "model_switch" | "council" | "ee_intercept" | "ee_judge" | "ee_injection" | "ui_interaction" | "stream_retry" | "f6_synthesis" | "grounding_flag" | "stall_rescue" | "stream_start" | "text_tool_resteer";
|
|
7
|
+
export type InteractionEventType = "user_message" | "agent_response" | "tool_call" | "tool_result" | "compaction" | "routing" | "pil" | "error" | "model_switch" | "council" | "ee_intercept" | "ee_judge" | "ee_injection" | "ui_interaction" | "stream_retry" | "f6_synthesis" | "grounding_flag" | "stall_rescue" | "stream_start" | "text_tool_resteer" | "session_experience";
|
|
8
8
|
export interface EEInjectionRow {
|
|
9
9
|
session_id: string;
|
|
10
10
|
event_subtype: string | null;
|