@davidorex/pi-agent-dispatch 0.28.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +25 -0
- package/README.md +37 -0
- package/dist/attested-commit.d.ts +32 -0
- package/dist/attested-commit.d.ts.map +1 -0
- package/dist/attested-commit.js +61 -0
- package/dist/attested-commit.js.map +1 -0
- package/dist/auth-gate.d.ts +92 -0
- package/dist/auth-gate.d.ts.map +1 -0
- package/dist/auth-gate.js +210 -0
- package/dist/auth-gate.js.map +1 -0
- package/dist/author-agent-spec-tool.d.ts +33 -0
- package/dist/author-agent-spec-tool.d.ts.map +1 -0
- package/dist/author-agent-spec-tool.js +98 -0
- package/dist/author-agent-spec-tool.js.map +1 -0
- package/dist/author-tool-grant-tool.d.ts +47 -0
- package/dist/author-tool-grant-tool.d.ts.map +1 -0
- package/dist/author-tool-grant-tool.js +87 -0
- package/dist/author-tool-grant-tool.js.map +1 -0
- package/dist/call-agent-tool.d.ts +42 -0
- package/dist/call-agent-tool.d.ts.map +1 -0
- package/dist/call-agent-tool.js +90 -0
- package/dist/call-agent-tool.js.map +1 -0
- package/dist/capability-composer.d.ts +11 -0
- package/dist/capability-composer.d.ts.map +1 -0
- package/dist/capability-composer.js +35 -0
- package/dist/capability-composer.js.map +1 -0
- package/dist/commit-attested-tool.d.ts +29 -0
- package/dist/commit-attested-tool.d.ts.map +1 -0
- package/dist/commit-attested-tool.js +45 -0
- package/dist/commit-attested-tool.js.map +1 -0
- package/dist/composite-loader.d.ts +36 -0
- package/dist/composite-loader.d.ts.map +1 -0
- package/dist/composite-loader.js +137 -0
- package/dist/composite-loader.js.map +1 -0
- package/dist/composites/command-allowlist.d.ts +29 -0
- package/dist/composites/command-allowlist.d.ts.map +1 -0
- package/dist/composites/command-allowlist.js +36 -0
- package/dist/composites/command-allowlist.js.map +1 -0
- package/dist/composites/git-log.d.ts +31 -0
- package/dist/composites/git-log.d.ts.map +1 -0
- package/dist/composites/git-log.js +39 -0
- package/dist/composites/git-log.js.map +1 -0
- package/dist/composites/grep-paths.d.ts +26 -0
- package/dist/composites/grep-paths.d.ts.map +1 -0
- package/dist/composites/grep-paths.js +34 -0
- package/dist/composites/grep-paths.js.map +1 -0
- package/dist/composites/read-files.d.ts +24 -0
- package/dist/composites/read-files.d.ts.map +1 -0
- package/dist/composites/read-files.js +35 -0
- package/dist/composites/read-files.js.map +1 -0
- package/dist/index.d.ts +18 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +77 -0
- package/dist/index.js.map +1 -0
- package/dist/operation-vocab.d.ts +25 -0
- package/dist/operation-vocab.d.ts.map +1 -0
- package/dist/operation-vocab.js +78 -0
- package/dist/operation-vocab.js.map +1 -0
- package/dist/read-truncation-gate.d.ts +143 -0
- package/dist/read-truncation-gate.d.ts.map +1 -0
- package/dist/read-truncation-gate.js +175 -0
- package/dist/read-truncation-gate.js.map +1 -0
- package/dist/real-check-runner.d.ts +66 -0
- package/dist/real-check-runner.d.ts.map +1 -0
- package/dist/real-check-runner.js +133 -0
- package/dist/real-check-runner.js.map +1 -0
- package/dist/run-real-checks-tool.d.ts +28 -0
- package/dist/run-real-checks-tool.d.ts.map +1 -0
- package/dist/run-real-checks-tool.js +47 -0
- package/dist/run-real-checks-tool.js.map +1 -0
- package/dist/run-work-order-loop-tool.d.ts +35 -0
- package/dist/run-work-order-loop-tool.d.ts.map +1 -0
- package/dist/run-work-order-loop-tool.js +46 -0
- package/dist/run-work-order-loop-tool.js.map +1 -0
- package/dist/verified-identity.d.ts +54 -0
- package/dist/verified-identity.d.ts.map +1 -0
- package/dist/verified-identity.js +133 -0
- package/dist/verified-identity.js.map +1 -0
- package/dist/work-order-loop.d.ts +82 -0
- package/dist/work-order-loop.d.ts.map +1 -0
- package/dist/work-order-loop.js +149 -0
- package/dist/work-order-loop.js.map +1 -0
- package/package.json +59 -0
- package/skill-narrative.md +53 -0
- package/skills/pi-agent-dispatch/SKILL.md +138 -0
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* read-truncation-gate — pi.on('tool_result', handler) registered from
|
|
3
|
+
* the pi-agent-dispatch extension factory that intercepts pi's built-in
|
|
4
|
+
* `read` tool responses when the underlying TruncationResult signals
|
|
5
|
+
* truncation, and REPLACES the content payload entirely with a single
|
|
6
|
+
* text item carrying a hard-refusal directive.
|
|
7
|
+
*
|
|
8
|
+
* Canonical model: the pi.on('tool_result') boundary mirrors the auth-
|
|
9
|
+
* gate's tool_call boundary — it runs at the pi-dispatch layer, after
|
|
10
|
+
* the tool's execute() body has produced its result, and before the
|
|
11
|
+
* LLM sees the result. Returning a ToolResultEventResult with `content`
|
|
12
|
+
* set REPLACES the corresponding event field entirely (no merge); the
|
|
13
|
+
* agent never sees the original (truncated head) content. The directive
|
|
14
|
+
* IS the content, so the agent cannot skim past it — there is no head
|
|
15
|
+
* to skim. The phrasing pattern (⚠️ prefix, INCOMPLETE / "do NOT"
|
|
16
|
+
* framing, explicit-next-action guidance naming concrete tools + params)
|
|
17
|
+
* mirrors pi-context's serializeForRead overCapDirective canon at
|
|
18
|
+
* read-element.ts:222-245, which proved itself across the typed-read
|
|
19
|
+
* surfaces (read-block-page / read-schema / read-samples-catalog).
|
|
20
|
+
*
|
|
21
|
+
* Why pi's built-in marker is insufficient: pi's read tool appends a
|
|
22
|
+
* single text marker at end-of-content on truncation (the marker IS
|
|
23
|
+
* structured + carries a continuation hint via `offset=N`), but its
|
|
24
|
+
* end-of-content position is trivially skimmable — LLMs scan content
|
|
25
|
+
* for the substantive bit they want and miss the marker. Empirically,
|
|
26
|
+
* an in-pi LLM reading a large file gets the head and proceeds with
|
|
27
|
+
* incomplete content as if it had the whole file. This handler closes
|
|
28
|
+
* that gap by making the directive the entire visible response.
|
|
29
|
+
*
|
|
30
|
+
* Coexists with auth-gate: that handler registers on pi.on('tool_call')
|
|
31
|
+
* for the substrate-write authorization surface; this handler registers
|
|
32
|
+
* on pi.on('tool_result') for the read-output integrity surface. The
|
|
33
|
+
* two events are orthogonal and the handlers do not interfere.
|
|
34
|
+
*
|
|
35
|
+
* No-op for non-read tools and for non-truncated read results — handler
|
|
36
|
+
* returns undefined, leaving the original event payload untouched.
|
|
37
|
+
*/
|
|
38
|
+
import type { ExtensionAPI, ExtensionContext, ToolResultEvent } from "@earendil-works/pi-coding-agent";
|
|
39
|
+
/**
|
|
40
|
+
* Local mirror of pi's internal `ToolResultEventResult` shape (not
|
|
41
|
+
* re-exported from the SDK index.d.ts at present; declared at
|
|
42
|
+
* `node_modules/@earendil-works/pi-coding-agent/dist/core/extensions/types.d.ts:726-730`).
|
|
43
|
+
* Carrying a local interface keeps the handler-return contract typed at
|
|
44
|
+
* the dispatch module without reaching into the SDK's internal-types
|
|
45
|
+
* subpath, which is not part of its public surface contract.
|
|
46
|
+
*
|
|
47
|
+
* Field semantics per the SDK: any field set REPLACES the corresponding
|
|
48
|
+
* event field entirely (no merge); omitted fields are left untouched.
|
|
49
|
+
*/
|
|
50
|
+
interface ToolResultGateOutcome {
|
|
51
|
+
content?: {
|
|
52
|
+
type: "text";
|
|
53
|
+
text: string;
|
|
54
|
+
}[];
|
|
55
|
+
details?: unknown;
|
|
56
|
+
isError?: boolean;
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Minimal projection of pi's TruncationResult carrying the fields the
|
|
60
|
+
* directive renders. Mirrors the canonical shape declared at
|
|
61
|
+
* `node_modules/@earendil-works/pi-coding-agent/dist/core/tools/truncate.d.ts`
|
|
62
|
+
* — kept as a local interface (rather than a deep import) so the
|
|
63
|
+
* dispatch module is not coupled to an internal pi tool-types path
|
|
64
|
+
* that may shift across pi versions; the field set is the public
|
|
65
|
+
* contract on details.truncation.
|
|
66
|
+
*/
|
|
67
|
+
export interface TruncationProjection {
|
|
68
|
+
truncated: boolean;
|
|
69
|
+
truncatedBy?: "lines" | "bytes" | null;
|
|
70
|
+
outputLines?: number;
|
|
71
|
+
totalLines?: number;
|
|
72
|
+
outputBytes?: number;
|
|
73
|
+
totalBytes?: number;
|
|
74
|
+
maxLines?: number;
|
|
75
|
+
maxBytes?: number;
|
|
76
|
+
lastLinePartial?: boolean;
|
|
77
|
+
firstLineExceedsLimit?: boolean;
|
|
78
|
+
}
|
|
79
|
+
/**
|
|
80
|
+
* Pure function that constructs the directive text from the pi read
|
|
81
|
+
* tool call input + the structured TruncationResult. No I/O; identical
|
|
82
|
+
* inputs yield identical output. Exported separately from the handler
|
|
83
|
+
* so unit tests can drive directive composition in isolation.
|
|
84
|
+
*
|
|
85
|
+
* Renders:
|
|
86
|
+
* - the file path (from `input.path`; defaults to "<unknown>" if the
|
|
87
|
+
* input shape is unexpectedly empty — defensive guard, not a
|
|
88
|
+
* contract);
|
|
89
|
+
* - the truncation accounting (outputLines / totalLines and
|
|
90
|
+
* outputBytes / totalBytes; numeric fields rendered with a default
|
|
91
|
+
* of "?" if absent so a partial TruncationResult still produces a
|
|
92
|
+
* readable directive);
|
|
93
|
+
* - variant-conditional next-action guidance:
|
|
94
|
+
* * firstLineExceedsLimit=true: pagination via `offset=N` is
|
|
95
|
+
* operationally meaningless (offset=0 re-fires the same
|
|
96
|
+
* truncation; offset>=1 jumps past the unreadable line losing
|
|
97
|
+
* content); the directive instead names `grep` for targeted
|
|
98
|
+
* search + bash `sed -n '<line>p' <path> | head -c <bytes>`
|
|
99
|
+
* for byte-range slicing of the over-long single line.
|
|
100
|
+
* * truncatedBy='bytes' (without firstLineExceedsLimit): the
|
|
101
|
+
* byte cap fired (not line cap); paginating by line offset may
|
|
102
|
+
* again exceed the byte cap on the next page with no agent
|
|
103
|
+
* signal — directive surfaces this risk + prefers `grep`.
|
|
104
|
+
* * truncatedBy='lines' (default): line cap fired; offset-by-
|
|
105
|
+
* outputLines pagination works predictably.
|
|
106
|
+
* - lastLinePartial=true clause (when set): warns the agent the
|
|
107
|
+
* terminal characters of the returned content were cut mid-content
|
|
108
|
+
* and should not be trusted (mid-JSON-object, mid-source-
|
|
109
|
+
* expression, mid-log-entry); placed AFTER the truncation summary
|
|
110
|
+
* + BEFORE the next-action clauses.
|
|
111
|
+
*
|
|
112
|
+
* The "The truncated head is NOT returned" sentence enforces the
|
|
113
|
+
* hard-refusal semantic at the language level — it signals the agent
|
|
114
|
+
* that the directive IS the entire response payload, not a header
|
|
115
|
+
* followed by content.
|
|
116
|
+
*/
|
|
117
|
+
export declare function buildTruncationDirective(input: Record<string, unknown> | undefined, truncation: TruncationProjection): string;
|
|
118
|
+
/**
|
|
119
|
+
* The pi.on('tool_result') handler. Exported separately from the
|
|
120
|
+
* registration helper so unit tests can invoke it directly with a mock
|
|
121
|
+
* event and mock context, without driving a real pi extension factory.
|
|
122
|
+
*
|
|
123
|
+
* Returns:
|
|
124
|
+
* - `undefined` (pass-through) when toolName !== "read";
|
|
125
|
+
* - `undefined` (pass-through) when details.truncation is absent or
|
|
126
|
+
* truncation.truncated is false (the read was complete);
|
|
127
|
+
* - `{ content, isError }` REPLACING the event payload when the read
|
|
128
|
+
* was truncated. `isError` is set false because the read itself
|
|
129
|
+
* succeeded — only its completeness is at issue; raising isError
|
|
130
|
+
* would surface as a tool failure to the LLM and trigger error-
|
|
131
|
+
* handling paths instead of the canonical "use offset / use grep"
|
|
132
|
+
* guidance the directive carries.
|
|
133
|
+
*/
|
|
134
|
+
export declare function readTruncationGateHandler(event: ToolResultEvent, _ctx: ExtensionContext): Promise<ToolResultGateOutcome | undefined>;
|
|
135
|
+
/**
|
|
136
|
+
* Register the read-truncation-gate handler on a pi extension API.
|
|
137
|
+
* Single line at the factory call-site. Idempotent registration is the
|
|
138
|
+
* responsibility of the caller (the extension factory runs once per pi
|
|
139
|
+
* process; double-registration would produce duplicate replacements).
|
|
140
|
+
*/
|
|
141
|
+
export declare function registerReadTruncationGate(pi: ExtensionAPI): void;
|
|
142
|
+
export {};
|
|
143
|
+
//# sourceMappingURL=read-truncation-gate.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"read-truncation-gate.d.ts","sourceRoot":"","sources":["../src/read-truncation-gate.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAoCG;AAEH,OAAO,KAAK,EAAE,YAAY,EAAE,gBAAgB,EAAE,eAAe,EAAE,MAAM,iCAAiC,CAAC;AAEvG;;;;;;;;;;GAUG;AACH,UAAU,qBAAqB;IAC9B,OAAO,CAAC,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,EAAE,CAAC;IAC3C,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,OAAO,CAAC,EAAE,OAAO,CAAC;CAClB;AAED;;;;;;;;GAQG;AACH,MAAM,WAAW,oBAAoB;IACpC,SAAS,EAAE,OAAO,CAAC;IACnB,WAAW,CAAC,EAAE,OAAO,GAAG,OAAO,GAAG,IAAI,CAAC;IACvC,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,qBAAqB,CAAC,EAAE,OAAO,CAAC;CAChC;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqCG;AACH,wBAAgB,wBAAwB,CACvC,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,SAAS,EAC1C,UAAU,EAAE,oBAAoB,GAC9B,MAAM,CA6DR;AAED;;;;;;;;;;;;;;;GAeG;AACH,wBAAsB,yBAAyB,CAC9C,KAAK,EAAE,eAAe,EACtB,IAAI,EAAE,gBAAgB,GACpB,OAAO,CAAC,qBAAqB,GAAG,SAAS,CAAC,CAc5C;AAED;;;;;GAKG;AACH,wBAAgB,0BAA0B,CAAC,EAAE,EAAE,YAAY,GAAG,IAAI,CAEjE"}
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* read-truncation-gate — pi.on('tool_result', handler) registered from
|
|
3
|
+
* the pi-agent-dispatch extension factory that intercepts pi's built-in
|
|
4
|
+
* `read` tool responses when the underlying TruncationResult signals
|
|
5
|
+
* truncation, and REPLACES the content payload entirely with a single
|
|
6
|
+
* text item carrying a hard-refusal directive.
|
|
7
|
+
*
|
|
8
|
+
* Canonical model: the pi.on('tool_result') boundary mirrors the auth-
|
|
9
|
+
* gate's tool_call boundary — it runs at the pi-dispatch layer, after
|
|
10
|
+
* the tool's execute() body has produced its result, and before the
|
|
11
|
+
* LLM sees the result. Returning a ToolResultEventResult with `content`
|
|
12
|
+
* set REPLACES the corresponding event field entirely (no merge); the
|
|
13
|
+
* agent never sees the original (truncated head) content. The directive
|
|
14
|
+
* IS the content, so the agent cannot skim past it — there is no head
|
|
15
|
+
* to skim. The phrasing pattern (⚠️ prefix, INCOMPLETE / "do NOT"
|
|
16
|
+
* framing, explicit-next-action guidance naming concrete tools + params)
|
|
17
|
+
* mirrors pi-context's serializeForRead overCapDirective canon at
|
|
18
|
+
* read-element.ts:222-245, which proved itself across the typed-read
|
|
19
|
+
* surfaces (read-block-page / read-schema / read-samples-catalog).
|
|
20
|
+
*
|
|
21
|
+
* Why pi's built-in marker is insufficient: pi's read tool appends a
|
|
22
|
+
* single text marker at end-of-content on truncation (the marker IS
|
|
23
|
+
* structured + carries a continuation hint via `offset=N`), but its
|
|
24
|
+
* end-of-content position is trivially skimmable — LLMs scan content
|
|
25
|
+
* for the substantive bit they want and miss the marker. Empirically,
|
|
26
|
+
* an in-pi LLM reading a large file gets the head and proceeds with
|
|
27
|
+
* incomplete content as if it had the whole file. This handler closes
|
|
28
|
+
* that gap by making the directive the entire visible response.
|
|
29
|
+
*
|
|
30
|
+
* Coexists with auth-gate: that handler registers on pi.on('tool_call')
|
|
31
|
+
* for the substrate-write authorization surface; this handler registers
|
|
32
|
+
* on pi.on('tool_result') for the read-output integrity surface. The
|
|
33
|
+
* two events are orthogonal and the handlers do not interfere.
|
|
34
|
+
*
|
|
35
|
+
* No-op for non-read tools and for non-truncated read results — handler
|
|
36
|
+
* returns undefined, leaving the original event payload untouched.
|
|
37
|
+
*/
|
|
38
|
+
/**
|
|
39
|
+
* Pure function that constructs the directive text from the pi read
|
|
40
|
+
* tool call input + the structured TruncationResult. No I/O; identical
|
|
41
|
+
* inputs yield identical output. Exported separately from the handler
|
|
42
|
+
* so unit tests can drive directive composition in isolation.
|
|
43
|
+
*
|
|
44
|
+
* Renders:
|
|
45
|
+
* - the file path (from `input.path`; defaults to "<unknown>" if the
|
|
46
|
+
* input shape is unexpectedly empty — defensive guard, not a
|
|
47
|
+
* contract);
|
|
48
|
+
* - the truncation accounting (outputLines / totalLines and
|
|
49
|
+
* outputBytes / totalBytes; numeric fields rendered with a default
|
|
50
|
+
* of "?" if absent so a partial TruncationResult still produces a
|
|
51
|
+
* readable directive);
|
|
52
|
+
* - variant-conditional next-action guidance:
|
|
53
|
+
* * firstLineExceedsLimit=true: pagination via `offset=N` is
|
|
54
|
+
* operationally meaningless (offset=0 re-fires the same
|
|
55
|
+
* truncation; offset>=1 jumps past the unreadable line losing
|
|
56
|
+
* content); the directive instead names `grep` for targeted
|
|
57
|
+
* search + bash `sed -n '<line>p' <path> | head -c <bytes>`
|
|
58
|
+
* for byte-range slicing of the over-long single line.
|
|
59
|
+
* * truncatedBy='bytes' (without firstLineExceedsLimit): the
|
|
60
|
+
* byte cap fired (not line cap); paginating by line offset may
|
|
61
|
+
* again exceed the byte cap on the next page with no agent
|
|
62
|
+
* signal — directive surfaces this risk + prefers `grep`.
|
|
63
|
+
* * truncatedBy='lines' (default): line cap fired; offset-by-
|
|
64
|
+
* outputLines pagination works predictably.
|
|
65
|
+
* - lastLinePartial=true clause (when set): warns the agent the
|
|
66
|
+
* terminal characters of the returned content were cut mid-content
|
|
67
|
+
* and should not be trusted (mid-JSON-object, mid-source-
|
|
68
|
+
* expression, mid-log-entry); placed AFTER the truncation summary
|
|
69
|
+
* + BEFORE the next-action clauses.
|
|
70
|
+
*
|
|
71
|
+
* The "The truncated head is NOT returned" sentence enforces the
|
|
72
|
+
* hard-refusal semantic at the language level — it signals the agent
|
|
73
|
+
* that the directive IS the entire response payload, not a header
|
|
74
|
+
* followed by content.
|
|
75
|
+
*/
|
|
76
|
+
export function buildTruncationDirective(input, truncation) {
|
|
77
|
+
const path = typeof input?.path === "string" && input.path.length > 0 ? input.path : "<unknown>";
|
|
78
|
+
const totalLines = truncation.totalLines ?? "?";
|
|
79
|
+
const totalBytes = truncation.totalBytes ?? "?";
|
|
80
|
+
const outputLines = truncation.outputLines ?? "?";
|
|
81
|
+
const outputBytes = truncation.outputBytes ?? "?";
|
|
82
|
+
const maxBytes = truncation.maxBytes ?? "?";
|
|
83
|
+
// pi's read.js builds its end-of-content marker with `Use offset=${nextOffset}`
|
|
84
|
+
// where nextOffset equals the count of complete lines emitted (outputLines).
|
|
85
|
+
// Mirror that convention so the directive's continuation hint matches the
|
|
86
|
+
// canonical semantics consumers may already expect from pi.
|
|
87
|
+
const nextOffset = truncation.outputLines ?? "?";
|
|
88
|
+
const summary = `⚠️ READ TRUNCATED — file \`${path}\` is ${totalLines} lines / ${totalBytes} bytes total; ` +
|
|
89
|
+
`only the first ${outputLines} lines / ${outputBytes} bytes were returned. ` +
|
|
90
|
+
`The content below is INCOMPLETE; do NOT proceed as if you have the full file.`;
|
|
91
|
+
// Discrete partial-last-line clause inserted between summary + next-actions.
|
|
92
|
+
// Placed before next-actions so the agent sees the integrity warning before
|
|
93
|
+
// composing a follow-up read.
|
|
94
|
+
const partialLineClause = truncation.lastLinePartial
|
|
95
|
+
? `\n\nWARNING: the last returned line was cut mid-content; do not trust its trailing ` +
|
|
96
|
+
`characters. The next read with \`offset=${nextOffset}\` should start at the line AFTER ` +
|
|
97
|
+
`the partial one OR re-read the partial line in full via a smaller \`limit=\`.`
|
|
98
|
+
: "";
|
|
99
|
+
// Variant-conditional next-action block. firstLineExceedsLimit is the
|
|
100
|
+
// dominant signal (suppresses pagination clause entirely); otherwise
|
|
101
|
+
// truncatedBy discriminates between bytes-cap (pagination risk) and
|
|
102
|
+
// lines-cap (predictable pagination).
|
|
103
|
+
let nextActions;
|
|
104
|
+
if (truncation.firstLineExceedsLimit === true) {
|
|
105
|
+
nextActions =
|
|
106
|
+
`\n\nThe file's first line ALONE exceeds the read byte cap (${outputBytes} of ${maxBytes} ` +
|
|
107
|
+
`bytes returned; \`firstLineExceedsLimit=true\`). Pagination via the read tool cannot help: ` +
|
|
108
|
+
`re-issuing with a zero line offset re-fires this same truncation, and any positive line ` +
|
|
109
|
+
`offset jumps past the unreadable line losing its content entirely. Useful next actions:\n` +
|
|
110
|
+
` - use \`grep\` with a pattern + path to find specific content within the over-long line;\n` +
|
|
111
|
+
` - use \`bash\` to byte-slice the single line, e.g. \`sed -n '1p' ${path} | head -c ${maxBytes}\`, ` +
|
|
112
|
+
`adjusting the line number + byte count as needed.\n\n` +
|
|
113
|
+
`The truncated head is NOT returned in this response — use \`grep\` or byte-range slicing as above.`;
|
|
114
|
+
}
|
|
115
|
+
else if (truncation.truncatedBy === "bytes") {
|
|
116
|
+
nextActions =
|
|
117
|
+
`\n\nTruncation fired on the BYTES cap (not line count); paginating by line offset may again ` +
|
|
118
|
+
`exceed the byte cap on the next page. If the lines are long, prefer \`grep\` for targeted search.\n` +
|
|
119
|
+
`To continue from where the read stopped: call \`read\` again with \`offset=${nextOffset}\` ` +
|
|
120
|
+
`(and \`limit=...\` if desired).\n` +
|
|
121
|
+
`To find specific content without reading the rest of the file: use \`grep\` with a pattern + path.\n\n` +
|
|
122
|
+
`The truncated head is NOT returned in this response — re-issue the read with ` +
|
|
123
|
+
`\`offset=${nextOffset}\` or use \`grep\` for targeted search.`;
|
|
124
|
+
}
|
|
125
|
+
else {
|
|
126
|
+
nextActions =
|
|
127
|
+
`\n\nTo continue from where the read stopped: call \`read\` again with \`offset=${nextOffset}\` ` +
|
|
128
|
+
`(and \`limit=...\` if desired).\n` +
|
|
129
|
+
`To find specific content without reading the rest of the file: use \`grep\` with a pattern + path.\n\n` +
|
|
130
|
+
`The truncated head is NOT returned in this response — re-issue the read with ` +
|
|
131
|
+
`\`offset=${nextOffset}\` or use \`grep\` for targeted search.`;
|
|
132
|
+
}
|
|
133
|
+
return summary + partialLineClause + nextActions;
|
|
134
|
+
}
|
|
135
|
+
/**
|
|
136
|
+
* The pi.on('tool_result') handler. Exported separately from the
|
|
137
|
+
* registration helper so unit tests can invoke it directly with a mock
|
|
138
|
+
* event and mock context, without driving a real pi extension factory.
|
|
139
|
+
*
|
|
140
|
+
* Returns:
|
|
141
|
+
* - `undefined` (pass-through) when toolName !== "read";
|
|
142
|
+
* - `undefined` (pass-through) when details.truncation is absent or
|
|
143
|
+
* truncation.truncated is false (the read was complete);
|
|
144
|
+
* - `{ content, isError }` REPLACING the event payload when the read
|
|
145
|
+
* was truncated. `isError` is set false because the read itself
|
|
146
|
+
* succeeded — only its completeness is at issue; raising isError
|
|
147
|
+
* would surface as a tool failure to the LLM and trigger error-
|
|
148
|
+
* handling paths instead of the canonical "use offset / use grep"
|
|
149
|
+
* guidance the directive carries.
|
|
150
|
+
*/
|
|
151
|
+
export async function readTruncationGateHandler(event, _ctx) {
|
|
152
|
+
if (event.toolName !== "read") {
|
|
153
|
+
return; // pass-through: only intercept read tool results
|
|
154
|
+
}
|
|
155
|
+
const details = event.details;
|
|
156
|
+
const truncation = details?.truncation;
|
|
157
|
+
if (!truncation?.truncated) {
|
|
158
|
+
return; // pass-through: read was not truncated, original content stands
|
|
159
|
+
}
|
|
160
|
+
const directive = buildTruncationDirective(event.input, truncation);
|
|
161
|
+
return {
|
|
162
|
+
content: [{ type: "text", text: directive }],
|
|
163
|
+
isError: false,
|
|
164
|
+
};
|
|
165
|
+
}
|
|
166
|
+
/**
|
|
167
|
+
* Register the read-truncation-gate handler on a pi extension API.
|
|
168
|
+
* Single line at the factory call-site. Idempotent registration is the
|
|
169
|
+
* responsibility of the caller (the extension factory runs once per pi
|
|
170
|
+
* process; double-registration would produce duplicate replacements).
|
|
171
|
+
*/
|
|
172
|
+
export function registerReadTruncationGate(pi) {
|
|
173
|
+
pi.on("tool_result", readTruncationGateHandler);
|
|
174
|
+
}
|
|
175
|
+
//# sourceMappingURL=read-truncation-gate.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"read-truncation-gate.js","sourceRoot":"","sources":["../src/read-truncation-gate.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAoCG;AA2CH;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqCG;AACH,MAAM,UAAU,wBAAwB,CACvC,KAA0C,EAC1C,UAAgC;IAEhC,MAAM,IAAI,GAAG,OAAO,KAAK,EAAE,IAAI,KAAK,QAAQ,IAAI,KAAK,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,WAAW,CAAC;IACjG,MAAM,UAAU,GAAG,UAAU,CAAC,UAAU,IAAI,GAAG,CAAC;IAChD,MAAM,UAAU,GAAG,UAAU,CAAC,UAAU,IAAI,GAAG,CAAC;IAChD,MAAM,WAAW,GAAG,UAAU,CAAC,WAAW,IAAI,GAAG,CAAC;IAClD,MAAM,WAAW,GAAG,UAAU,CAAC,WAAW,IAAI,GAAG,CAAC;IAClD,MAAM,QAAQ,GAAG,UAAU,CAAC,QAAQ,IAAI,GAAG,CAAC;IAC5C,gFAAgF;IAChF,6EAA6E;IAC7E,0EAA0E;IAC1E,4DAA4D;IAC5D,MAAM,UAAU,GAAG,UAAU,CAAC,WAAW,IAAI,GAAG,CAAC;IAEjD,MAAM,OAAO,GACZ,8BAA8B,IAAI,SAAS,UAAU,YAAY,UAAU,gBAAgB;QAC3F,kBAAkB,WAAW,YAAY,WAAW,wBAAwB;QAC5E,+EAA+E,CAAC;IAEjF,6EAA6E;IAC7E,4EAA4E;IAC5E,8BAA8B;IAC9B,MAAM,iBAAiB,GAAG,UAAU,CAAC,eAAe;QACnD,CAAC,CAAC,qFAAqF;YACtF,2CAA2C,UAAU,oCAAoC;YACzF,+EAA+E;QAChF,CAAC,CAAC,EAAE,CAAC;IAEN,sEAAsE;IACtE,qEAAqE;IACrE,oEAAoE;IACpE,sCAAsC;IACtC,IAAI,WAAmB,CAAC;IACxB,IAAI,UAAU,CAAC,qBAAqB,KAAK,IAAI,EAAE,CAAC;QAC/C,WAAW;YACV,8DAA8D,WAAW,OAAO,QAAQ,GAAG;gBAC3F,6FAA6F;gBAC7F,0FAA0F;gBAC1F,2FAA2F;gBAC3F,8FAA8F;gBAC9F,sEAAsE,IAAI,cAAc,QAAQ,MAAM;gBACtG,uDAAuD;gBACvD,oGAAoG,CAAC;IACvG,CAAC;SAAM,IAAI,UAAU,CAAC,WAAW,KAAK,OAAO,EAAE,CAAC;QAC/C,WAAW;YACV,8FAA8F;gBAC9F,qGAAqG;gBACrG,8EAA8E,UAAU,KAAK;gBAC7F,mCAAmC;gBACnC,wGAAwG;gBACxG,+EAA+E;gBAC/E,YAAY,UAAU,yCAAyC,CAAC;IAClE,CAAC;SAAM,CAAC;QACP,WAAW;YACV,kFAAkF,UAAU,KAAK;gBACjG,mCAAmC;gBACnC,wGAAwG;gBACxG,+EAA+E;gBAC/E,YAAY,UAAU,yCAAyC,CAAC;IAClE,CAAC;IAED,OAAO,OAAO,GAAG,iBAAiB,GAAG,WAAW,CAAC;AAClD,CAAC;AAED;;;;;;;;;;;;;;;GAeG;AACH,MAAM,CAAC,KAAK,UAAU,yBAAyB,CAC9C,KAAsB,EACtB,IAAsB;IAEtB,IAAI,KAAK,CAAC,QAAQ,KAAK,MAAM,EAAE,CAAC;QAC/B,OAAO,CAAC,iDAAiD;IAC1D,CAAC;IACD,MAAM,OAAO,GAAG,KAAK,CAAC,OAA4D,CAAC;IACnF,MAAM,UAAU,GAAG,OAAO,EAAE,UAAU,CAAC;IACvC,IAAI,CAAC,UAAU,EAAE,SAAS,EAAE,CAAC;QAC5B,OAAO,CAAC,gEAAgE;IACzE,CAAC;IACD,MAAM,SAAS,GAAG,wBAAwB,CAAC,KAAK,CAAC,KAAK,EAAE,UAAU,CAAC,CAAC;IACpE,OAAO;QACN,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC;QAC5C,OAAO,EAAE,KAAK;KACd,CAAC;AACH,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,0BAA0B,CAAC,EAAgB;IAC1D,EAAE,CAAC,EAAE,CAAC,aAAa,EAAE,yBAAyB,CAAC,CAAC;AACjD,CAAC"}
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* real-check-runner — deterministic verdict gate for agent-authored work-orders
|
|
3
|
+
* (TASK-090 / DEC-0018 / FGAP-102 / DEC-0047 clause 5). Runs the real-checks
|
|
4
|
+
* declared in a work-order's `real_check_criteria`: build/check/test exit,
|
|
5
|
+
* runtime-demo invocation + expected-substring presence, adversarial-probe
|
|
6
|
+
* grep-based evidence enumeration. Returns a structured RealCheckResult; the
|
|
7
|
+
* orchestrator (or `run-real-checks` Pi tool) interprets the verdict —
|
|
8
|
+
* never the executing agent's self-report.
|
|
9
|
+
*
|
|
10
|
+
* The runner buffers full stdout/stderr (no truncation) and surfaces the
|
|
11
|
+
* raw shell exit code so callers can diagnose. Timeouts terminate the
|
|
12
|
+
* spawned shell via SIGTERM; the captured streams up to that point are
|
|
13
|
+
* preserved and a non-zero exit code is reported.
|
|
14
|
+
*/
|
|
15
|
+
export interface BuildCheckTestResult {
|
|
16
|
+
passed: boolean;
|
|
17
|
+
exit_code: number;
|
|
18
|
+
stdout: string;
|
|
19
|
+
stderr: string;
|
|
20
|
+
duration_ms: number;
|
|
21
|
+
}
|
|
22
|
+
export interface RuntimeDemoResult {
|
|
23
|
+
passed: boolean;
|
|
24
|
+
output: string;
|
|
25
|
+
expected: string;
|
|
26
|
+
duration_ms: number;
|
|
27
|
+
}
|
|
28
|
+
export interface AdversarialProbeResult {
|
|
29
|
+
passed: boolean;
|
|
30
|
+
per_target: Array<{
|
|
31
|
+
target: string;
|
|
32
|
+
hits: string;
|
|
33
|
+
}>;
|
|
34
|
+
duration_ms: number;
|
|
35
|
+
}
|
|
36
|
+
export interface RealCheckCriteria {
|
|
37
|
+
build_check_test?: boolean;
|
|
38
|
+
runtime_demo?: {
|
|
39
|
+
invocation: string;
|
|
40
|
+
expected: string;
|
|
41
|
+
};
|
|
42
|
+
adversarial_probe?: {
|
|
43
|
+
targets: string[];
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
export interface RealCheckResult {
|
|
47
|
+
passed: boolean;
|
|
48
|
+
work_order_id: string;
|
|
49
|
+
details: {
|
|
50
|
+
build_check_test?: BuildCheckTestResult;
|
|
51
|
+
runtime_demo?: RuntimeDemoResult;
|
|
52
|
+
adversarial_probe?: AdversarialProbeResult;
|
|
53
|
+
};
|
|
54
|
+
total_duration_ms: number;
|
|
55
|
+
timestamp: string;
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Run all declared real-checks for a work-order. The criteria object selects
|
|
59
|
+
* which checks fire; absent fields are skipped (not failed). Aggregate
|
|
60
|
+
* `passed` is the AND of every declared check; an empty criteria block
|
|
61
|
+
* passes trivially (no gate declared).
|
|
62
|
+
*/
|
|
63
|
+
export declare function runRealChecks(cwd: string, work_order_id: string, criteria: RealCheckCriteria, opts?: {
|
|
64
|
+
max_check_time_ms?: number;
|
|
65
|
+
}): Promise<RealCheckResult>;
|
|
66
|
+
//# sourceMappingURL=real-check-runner.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"real-check-runner.d.ts","sourceRoot":"","sources":["../src/real-check-runner.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAIH,MAAM,WAAW,oBAAoB;IACpC,MAAM,EAAE,OAAO,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,iBAAiB;IACjC,MAAM,EAAE,OAAO,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;IACjB,WAAW,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,sBAAsB;IACtC,MAAM,EAAE,OAAO,CAAC;IAChB,UAAU,EAAE,KAAK,CAAC;QAAE,MAAM,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IACpD,WAAW,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,iBAAiB;IACjC,gBAAgB,CAAC,EAAE,OAAO,CAAC;IAC3B,YAAY,CAAC,EAAE;QAAE,UAAU,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAA;KAAE,CAAC;IACxD,iBAAiB,CAAC,EAAE;QAAE,OAAO,EAAE,MAAM,EAAE,CAAA;KAAE,CAAC;CAC1C;AAED,MAAM,WAAW,eAAe;IAC/B,MAAM,EAAE,OAAO,CAAC;IAChB,aAAa,EAAE,MAAM,CAAC;IACtB,OAAO,EAAE;QACR,gBAAgB,CAAC,EAAE,oBAAoB,CAAC;QACxC,YAAY,CAAC,EAAE,iBAAiB,CAAC;QACjC,iBAAiB,CAAC,EAAE,sBAAsB,CAAC;KAC3C,CAAC;IACF,iBAAiB,EAAE,MAAM,CAAC;IAC1B,SAAS,EAAE,MAAM,CAAC;CAClB;AA0GD;;;;;GAKG;AACH,wBAAsB,aAAa,CAClC,GAAG,EAAE,MAAM,EACX,aAAa,EAAE,MAAM,EACrB,QAAQ,EAAE,iBAAiB,EAC3B,IAAI,CAAC,EAAE;IAAE,iBAAiB,CAAC,EAAE,MAAM,CAAA;CAAE,GACnC,OAAO,CAAC,eAAe,CAAC,CA4B1B"}
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* real-check-runner — deterministic verdict gate for agent-authored work-orders
|
|
3
|
+
* (TASK-090 / DEC-0018 / FGAP-102 / DEC-0047 clause 5). Runs the real-checks
|
|
4
|
+
* declared in a work-order's `real_check_criteria`: build/check/test exit,
|
|
5
|
+
* runtime-demo invocation + expected-substring presence, adversarial-probe
|
|
6
|
+
* grep-based evidence enumeration. Returns a structured RealCheckResult; the
|
|
7
|
+
* orchestrator (or `run-real-checks` Pi tool) interprets the verdict —
|
|
8
|
+
* never the executing agent's self-report.
|
|
9
|
+
*
|
|
10
|
+
* The runner buffers full stdout/stderr (no truncation) and surfaces the
|
|
11
|
+
* raw shell exit code so callers can diagnose. Timeouts terminate the
|
|
12
|
+
* spawned shell via SIGTERM; the captured streams up to that point are
|
|
13
|
+
* preserved and a non-zero exit code is reported.
|
|
14
|
+
*/
|
|
15
|
+
import { spawn } from "node:child_process";
|
|
16
|
+
/**
|
|
17
|
+
* Run a shell command via `sh -c <cmd>`, buffering full stdout/stderr.
|
|
18
|
+
* Resolves on close (regardless of exit code); rejects only on spawn-error.
|
|
19
|
+
* Timeout terminates via SIGTERM, preserving captured streams; the eventual
|
|
20
|
+
* non-zero exit code surfaces in the resolved result.
|
|
21
|
+
*/
|
|
22
|
+
function runShell(cwd, cmd, timeoutMs) {
|
|
23
|
+
return new Promise((resolve, reject) => {
|
|
24
|
+
const start = Date.now();
|
|
25
|
+
let child;
|
|
26
|
+
try {
|
|
27
|
+
child = spawn("sh", ["-c", cmd], { cwd, stdio: ["ignore", "pipe", "pipe"] });
|
|
28
|
+
}
|
|
29
|
+
catch (err) {
|
|
30
|
+
reject(err);
|
|
31
|
+
return;
|
|
32
|
+
}
|
|
33
|
+
let stdout = "";
|
|
34
|
+
let stderr = "";
|
|
35
|
+
child.stdout?.on("data", (chunk) => {
|
|
36
|
+
stdout += chunk.toString();
|
|
37
|
+
});
|
|
38
|
+
child.stderr?.on("data", (chunk) => {
|
|
39
|
+
stderr += chunk.toString();
|
|
40
|
+
});
|
|
41
|
+
const timer = setTimeout(() => {
|
|
42
|
+
try {
|
|
43
|
+
child.kill("SIGTERM");
|
|
44
|
+
}
|
|
45
|
+
catch {
|
|
46
|
+
// child may have already exited between timer fire and kill
|
|
47
|
+
}
|
|
48
|
+
}, timeoutMs);
|
|
49
|
+
child.on("error", (err) => {
|
|
50
|
+
clearTimeout(timer);
|
|
51
|
+
reject(err);
|
|
52
|
+
});
|
|
53
|
+
child.on("close", (code, signal) => {
|
|
54
|
+
clearTimeout(timer);
|
|
55
|
+
const durationMs = Date.now() - start;
|
|
56
|
+
// when killed via SIGTERM (timeout), code is null + signal is set;
|
|
57
|
+
// surface a synthetic non-zero exit so callers see a failure.
|
|
58
|
+
const exitCode = code ?? (signal ? 124 : 1);
|
|
59
|
+
resolve({ exitCode, stdout, stderr, durationMs });
|
|
60
|
+
});
|
|
61
|
+
});
|
|
62
|
+
}
|
|
63
|
+
async function runBCT(cwd, timeoutMs) {
|
|
64
|
+
// single shell call so the `&&` chain runs sequentially in one process
|
|
65
|
+
const r = await runShell(cwd, "npm run check && npm test", timeoutMs);
|
|
66
|
+
return {
|
|
67
|
+
passed: r.exitCode === 0,
|
|
68
|
+
exit_code: r.exitCode,
|
|
69
|
+
stdout: r.stdout,
|
|
70
|
+
stderr: r.stderr,
|
|
71
|
+
duration_ms: r.durationMs,
|
|
72
|
+
};
|
|
73
|
+
}
|
|
74
|
+
async function runRD(cwd, demo, timeoutMs) {
|
|
75
|
+
const r = await runShell(cwd, demo.invocation, timeoutMs);
|
|
76
|
+
// pass requires BOTH clean exit and expected-substring presence in stdout —
|
|
77
|
+
// covers the "side-effect masks feature" failure mode (DEC-0018) where the
|
|
78
|
+
// invocation exits 0 but produces unexpected output.
|
|
79
|
+
const passed = r.exitCode === 0 && r.stdout.includes(demo.expected);
|
|
80
|
+
return {
|
|
81
|
+
passed,
|
|
82
|
+
output: r.stdout,
|
|
83
|
+
expected: demo.expected,
|
|
84
|
+
duration_ms: r.durationMs,
|
|
85
|
+
};
|
|
86
|
+
}
|
|
87
|
+
async function runAP(cwd, targets, timeoutMs) {
|
|
88
|
+
const start = Date.now();
|
|
89
|
+
const per_target = [];
|
|
90
|
+
for (const target of targets) {
|
|
91
|
+
// grep -rn over cwd; exit 1 (no match) is data, not error — captured stdout
|
|
92
|
+
// is the evidence enumeration the orchestrator inspects.
|
|
93
|
+
const r = await runShell(cwd, `grep -rn ${JSON.stringify(target)} .`, timeoutMs);
|
|
94
|
+
per_target.push({ target, hits: r.stdout });
|
|
95
|
+
}
|
|
96
|
+
const passed = per_target.length > 0 && per_target.every((t) => t.hits.length > 0);
|
|
97
|
+
return {
|
|
98
|
+
passed,
|
|
99
|
+
per_target,
|
|
100
|
+
duration_ms: Date.now() - start,
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
/**
|
|
104
|
+
* Run all declared real-checks for a work-order. The criteria object selects
|
|
105
|
+
* which checks fire; absent fields are skipped (not failed). Aggregate
|
|
106
|
+
* `passed` is the AND of every declared check; an empty criteria block
|
|
107
|
+
* passes trivially (no gate declared).
|
|
108
|
+
*/
|
|
109
|
+
export async function runRealChecks(cwd, work_order_id, criteria, opts) {
|
|
110
|
+
const timeoutMs = opts?.max_check_time_ms ?? 600_000;
|
|
111
|
+
const start = Date.now();
|
|
112
|
+
const timestamp = new Date(start).toISOString();
|
|
113
|
+
const details = {};
|
|
114
|
+
if (criteria.build_check_test === true) {
|
|
115
|
+
details.build_check_test = await runBCT(cwd, timeoutMs);
|
|
116
|
+
}
|
|
117
|
+
if (criteria.runtime_demo) {
|
|
118
|
+
details.runtime_demo = await runRD(cwd, criteria.runtime_demo, timeoutMs);
|
|
119
|
+
}
|
|
120
|
+
if (criteria.adversarial_probe?.targets && criteria.adversarial_probe.targets.length > 0) {
|
|
121
|
+
details.adversarial_probe = await runAP(cwd, criteria.adversarial_probe.targets, timeoutMs);
|
|
122
|
+
}
|
|
123
|
+
const declared = [details.build_check_test, details.runtime_demo, details.adversarial_probe].filter((d) => d !== undefined);
|
|
124
|
+
const passed = declared.every((d) => d.passed);
|
|
125
|
+
return {
|
|
126
|
+
passed,
|
|
127
|
+
work_order_id,
|
|
128
|
+
details,
|
|
129
|
+
total_duration_ms: Date.now() - start,
|
|
130
|
+
timestamp,
|
|
131
|
+
};
|
|
132
|
+
}
|
|
133
|
+
//# sourceMappingURL=real-check-runner.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"real-check-runner.js","sourceRoot":"","sources":["../src/real-check-runner.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAEH,OAAO,EAAqB,KAAK,EAAE,MAAM,oBAAoB,CAAC;AAgD9D;;;;;GAKG;AACH,SAAS,QAAQ,CAAC,GAAW,EAAE,GAAW,EAAE,SAAiB;IAC5D,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;QACtC,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QACzB,IAAI,KAAmB,CAAC;QACxB,IAAI,CAAC;YACJ,KAAK,GAAG,KAAK,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,GAAG,CAAC,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,CAAC,QAAQ,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC,CAAC;QAC9E,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACd,MAAM,CAAC,GAAG,CAAC,CAAC;YACZ,OAAO;QACR,CAAC;QAED,IAAI,MAAM,GAAG,EAAE,CAAC;QAChB,IAAI,MAAM,GAAG,EAAE,CAAC;QAChB,KAAK,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,EAAE,CAAC,KAAa,EAAE,EAAE;YAC1C,MAAM,IAAI,KAAK,CAAC,QAAQ,EAAE,CAAC;QAC5B,CAAC,CAAC,CAAC;QACH,KAAK,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,EAAE,CAAC,KAAa,EAAE,EAAE;YAC1C,MAAM,IAAI,KAAK,CAAC,QAAQ,EAAE,CAAC;QAC5B,CAAC,CAAC,CAAC;QAEH,MAAM,KAAK,GAAG,UAAU,CAAC,GAAG,EAAE;YAC7B,IAAI,CAAC;gBACJ,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YACvB,CAAC;YAAC,MAAM,CAAC;gBACR,4DAA4D;YAC7D,CAAC;QACF,CAAC,EAAE,SAAS,CAAC,CAAC;QAEd,KAAK,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,GAAG,EAAE,EAAE;YACzB,YAAY,CAAC,KAAK,CAAC,CAAC;YACpB,MAAM,CAAC,GAAG,CAAC,CAAC;QACb,CAAC,CAAC,CAAC;QAEH,KAAK,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE;YAClC,YAAY,CAAC,KAAK,CAAC,CAAC;YACpB,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC;YACtC,mEAAmE;YACnE,8DAA8D;YAC9D,MAAM,QAAQ,GAAG,IAAI,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YAC5C,OAAO,CAAC,EAAE,QAAQ,EAAE,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,CAAC,CAAC;QACnD,CAAC,CAAC,CAAC;IACJ,CAAC,CAAC,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,MAAM,CAAC,GAAW,EAAE,SAAiB;IACnD,uEAAuE;IACvE,MAAM,CAAC,GAAG,MAAM,QAAQ,CAAC,GAAG,EAAE,2BAA2B,EAAE,SAAS,CAAC,CAAC;IACtE,OAAO;QACN,MAAM,EAAE,CAAC,CAAC,QAAQ,KAAK,CAAC;QACxB,SAAS,EAAE,CAAC,CAAC,QAAQ;QACrB,MAAM,EAAE,CAAC,CAAC,MAAM;QAChB,MAAM,EAAE,CAAC,CAAC,MAAM;QAChB,WAAW,EAAE,CAAC,CAAC,UAAU;KACzB,CAAC;AACH,CAAC;AAED,KAAK,UAAU,KAAK,CACnB,GAAW,EACX,IAA8C,EAC9C,SAAiB;IAEjB,MAAM,CAAC,GAAG,MAAM,QAAQ,CAAC,GAAG,EAAE,IAAI,CAAC,UAAU,EAAE,SAAS,CAAC,CAAC;IAC1D,4EAA4E;IAC5E,2EAA2E;IAC3E,qDAAqD;IACrD,MAAM,MAAM,GAAG,CAAC,CAAC,QAAQ,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACpE,OAAO;QACN,MAAM;QACN,MAAM,EAAE,CAAC,CAAC,MAAM;QAChB,QAAQ,EAAE,IAAI,CAAC,QAAQ;QACvB,WAAW,EAAE,CAAC,CAAC,UAAU;KACzB,CAAC;AACH,CAAC;AAED,KAAK,UAAU,KAAK,CAAC,GAAW,EAAE,OAAiB,EAAE,SAAiB;IACrE,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IACzB,MAAM,UAAU,GAA4C,EAAE,CAAC;IAC/D,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC9B,4EAA4E;QAC5E,yDAAyD;QACzD,MAAM,CAAC,GAAG,MAAM,QAAQ,CAAC,GAAG,EAAE,YAAY,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;QACjF,UAAU,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC;IAC7C,CAAC;IACD,MAAM,MAAM,GAAG,UAAU,CAAC,MAAM,GAAG,CAAC,IAAI,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACnF,OAAO;QACN,MAAM;QACN,UAAU;QACV,WAAW,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;KAC/B,CAAC;AACH,CAAC;AAED;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,aAAa,CAClC,GAAW,EACX,aAAqB,EACrB,QAA2B,EAC3B,IAAqC;IAErC,MAAM,SAAS,GAAG,IAAI,EAAE,iBAAiB,IAAI,OAAO,CAAC;IACrD,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IACzB,MAAM,SAAS,GAAG,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,WAAW,EAAE,CAAC;IAChD,MAAM,OAAO,GAA+B,EAAE,CAAC;IAE/C,IAAI,QAAQ,CAAC,gBAAgB,KAAK,IAAI,EAAE,CAAC;QACxC,OAAO,CAAC,gBAAgB,GAAG,MAAM,MAAM,CAAC,GAAG,EAAE,SAAS,CAAC,CAAC;IACzD,CAAC;IACD,IAAI,QAAQ,CAAC,YAAY,EAAE,CAAC;QAC3B,OAAO,CAAC,YAAY,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE,QAAQ,CAAC,YAAY,EAAE,SAAS,CAAC,CAAC;IAC3E,CAAC;IACD,IAAI,QAAQ,CAAC,iBAAiB,EAAE,OAAO,IAAI,QAAQ,CAAC,iBAAiB,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC1F,OAAO,CAAC,iBAAiB,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE,QAAQ,CAAC,iBAAiB,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;IAC7F,CAAC;IAED,MAAM,QAAQ,GAAG,CAAC,OAAO,CAAC,gBAAgB,EAAE,OAAO,CAAC,YAAY,EAAE,OAAO,CAAC,iBAAiB,CAAC,CAAC,MAAM,CAClG,CAAC,CAAC,EAA0E,EAAE,CAAC,CAAC,KAAK,SAAS,CAC9F,CAAC;IACF,MAAM,MAAM,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;IAE/C,OAAO;QACN,MAAM;QACN,aAAa;QACb,OAAO;QACP,iBAAiB,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;QACrC,SAAS;KACT,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* run-real-checks Pi tool — loads a work-order by id from the substrate
|
|
3
|
+
* `work-orders` block (TASK-088 schema) and invokes the real-check-runner
|
|
4
|
+
* (TASK-090). The returned RealCheckResult carries the deterministic
|
|
5
|
+
* verdict the orchestrator inspects — never the executing agent's
|
|
6
|
+
* self-report (FGAP-102 + DEC-0047 clause 5).
|
|
7
|
+
*/
|
|
8
|
+
import { Type } from "@earendil-works/pi-ai";
|
|
9
|
+
import type { AgentToolResult, AgentToolUpdateCallback, ExtensionContext } from "@earendil-works/pi-coding-agent";
|
|
10
|
+
import { type RealCheckResult } from "./real-check-runner.js";
|
|
11
|
+
export declare class WorkOrderNotFoundError extends Error {
|
|
12
|
+
constructor(workOrderId: string);
|
|
13
|
+
}
|
|
14
|
+
export declare const runRealChecksTool: {
|
|
15
|
+
name: string;
|
|
16
|
+
label: string;
|
|
17
|
+
description: string;
|
|
18
|
+
promptSnippet: string;
|
|
19
|
+
parameters: Type.TObject<{
|
|
20
|
+
work_order_id: Type.TString;
|
|
21
|
+
max_check_time_ms: Type.TOptional<Type.TNumber>;
|
|
22
|
+
}>;
|
|
23
|
+
execute(_toolCallId: string, params: {
|
|
24
|
+
work_order_id: string;
|
|
25
|
+
max_check_time_ms?: number;
|
|
26
|
+
}, _signal: AbortSignal, _onUpdate: AgentToolUpdateCallback, ctx: ExtensionContext): Promise<AgentToolResult<RealCheckResult>>;
|
|
27
|
+
};
|
|
28
|
+
//# sourceMappingURL=run-real-checks-tool.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"run-real-checks-tool.d.ts","sourceRoot":"","sources":["../src/run-real-checks-tool.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAGH,OAAO,EAAE,IAAI,EAAE,MAAM,uBAAuB,CAAC;AAC7C,OAAO,KAAK,EAAE,eAAe,EAAE,uBAAuB,EAAE,gBAAgB,EAAE,MAAM,iCAAiC,CAAC;AAClH,OAAO,EAA0B,KAAK,eAAe,EAAiB,MAAM,wBAAwB,CAAC;AAOrG,qBAAa,sBAAuB,SAAQ,KAAK;gBACpC,WAAW,EAAE,MAAM;CAI/B;AAED,eAAO,MAAM,iBAAiB;;;;;;;;;yBAef,MAAM,UACX;QAAE,aAAa,EAAE,MAAM,CAAC;QAAC,iBAAiB,CAAC,EAAE,MAAM,CAAA;KAAE,WACpD,WAAW,aACT,uBAAuB,OAC7B,gBAAgB,GACnB,OAAO,CAAC,eAAe,CAAC,eAAe,CAAC,CAAC;CAmB5C,CAAC"}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* run-real-checks Pi tool — loads a work-order by id from the substrate
|
|
3
|
+
* `work-orders` block (TASK-088 schema) and invokes the real-check-runner
|
|
4
|
+
* (TASK-090). The returned RealCheckResult carries the deterministic
|
|
5
|
+
* verdict the orchestrator inspects — never the executing agent's
|
|
6
|
+
* self-report (FGAP-102 + DEC-0047 clause 5).
|
|
7
|
+
*/
|
|
8
|
+
import { readBlock } from "@davidorex/pi-context/block-api";
|
|
9
|
+
import { Type } from "@earendil-works/pi-ai";
|
|
10
|
+
import { runRealChecks } from "./real-check-runner.js";
|
|
11
|
+
export class WorkOrderNotFoundError extends Error {
|
|
12
|
+
constructor(workOrderId) {
|
|
13
|
+
super(`run-real-checks: work-order '${workOrderId}' not found in work-orders block`);
|
|
14
|
+
this.name = "WorkOrderNotFoundError";
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
export const runRealChecksTool = {
|
|
18
|
+
name: "run-real-checks",
|
|
19
|
+
label: "Run Real Checks",
|
|
20
|
+
description: "Run the deterministic real-checks declared on a work-order (build/check/test exit + runtime-demo + adversarial-probe). Returns a structured RealCheckResult. NEVER LLM self-report; verdict is the actual exit code.",
|
|
21
|
+
promptSnippet: "Run a work-order's declared real-checks for verdict gating.",
|
|
22
|
+
parameters: Type.Object({
|
|
23
|
+
work_order_id: Type.String({
|
|
24
|
+
description: "ID of the work-order whose real_check_criteria to run (e.g. 'WO-NNN').",
|
|
25
|
+
}),
|
|
26
|
+
max_check_time_ms: Type.Optional(Type.Number({ description: "Max total time per check in milliseconds. Defaults to 600000 (10 minutes)." })),
|
|
27
|
+
}),
|
|
28
|
+
async execute(_toolCallId, params, _signal, _onUpdate, ctx) {
|
|
29
|
+
const data = readBlock(ctx.cwd, "work-orders");
|
|
30
|
+
const wo = data.work_orders.find((w) => w.id === params.work_order_id);
|
|
31
|
+
if (!wo)
|
|
32
|
+
throw new WorkOrderNotFoundError(params.work_order_id);
|
|
33
|
+
const result = await runRealChecks(ctx.cwd, wo.id, wo.real_check_criteria ?? {}, {
|
|
34
|
+
max_check_time_ms: params.max_check_time_ms,
|
|
35
|
+
});
|
|
36
|
+
return {
|
|
37
|
+
details: result,
|
|
38
|
+
content: [
|
|
39
|
+
{
|
|
40
|
+
type: "text",
|
|
41
|
+
text: `run-real-checks ${wo.id}: ${result.passed ? "PASSED" : "FAILED"} (${result.total_duration_ms}ms)`,
|
|
42
|
+
},
|
|
43
|
+
],
|
|
44
|
+
};
|
|
45
|
+
},
|
|
46
|
+
};
|
|
47
|
+
//# sourceMappingURL=run-real-checks-tool.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"run-real-checks-tool.js","sourceRoot":"","sources":["../src/run-real-checks-tool.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,EAAE,SAAS,EAAE,MAAM,iCAAiC,CAAC;AAC5D,OAAO,EAAE,IAAI,EAAE,MAAM,uBAAuB,CAAC;AAE7C,OAAO,EAAgD,aAAa,EAAE,MAAM,wBAAwB,CAAC;AAOrG,MAAM,OAAO,sBAAuB,SAAQ,KAAK;IAChD,YAAY,WAAmB;QAC9B,KAAK,CAAC,gCAAgC,WAAW,kCAAkC,CAAC,CAAC;QACrF,IAAI,CAAC,IAAI,GAAG,wBAAwB,CAAC;IACtC,CAAC;CACD;AAED,MAAM,CAAC,MAAM,iBAAiB,GAAG;IAChC,IAAI,EAAE,iBAAiB;IACvB,KAAK,EAAE,iBAAiB;IACxB,WAAW,EACV,sNAAsN;IACvN,aAAa,EAAE,6DAA6D;IAC5E,UAAU,EAAE,IAAI,CAAC,MAAM,CAAC;QACvB,aAAa,EAAE,IAAI,CAAC,MAAM,CAAC;YAC1B,WAAW,EAAE,wEAAwE;SACrF,CAAC;QACF,iBAAiB,EAAE,IAAI,CAAC,QAAQ,CAC/B,IAAI,CAAC,MAAM,CAAC,EAAE,WAAW,EAAE,4EAA4E,EAAE,CAAC,CAC1G;KACD,CAAC;IACF,KAAK,CAAC,OAAO,CACZ,WAAmB,EACnB,MAA6D,EAC7D,OAAoB,EACpB,SAAkC,EAClC,GAAqB;QAErB,MAAM,IAAI,GAAG,SAAS,CAAC,GAAG,CAAC,GAAG,EAAE,aAAa,CAAiC,CAAC;QAC/E,MAAM,EAAE,GAAG,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,MAAM,CAAC,aAAa,CAAC,CAAC;QACvE,IAAI,CAAC,EAAE;YAAE,MAAM,IAAI,sBAAsB,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC;QAEhE,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,GAAG,CAAC,GAAG,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,mBAAmB,IAAI,EAAE,EAAE;YAChF,iBAAiB,EAAE,MAAM,CAAC,iBAAiB;SAC3C,CAAC,CAAC;QAEH,OAAO;YACN,OAAO,EAAE,MAAM;YACf,OAAO,EAAE;gBACR;oBACC,IAAI,EAAE,MAAM;oBACZ,IAAI,EAAE,mBAAmB,EAAE,CAAC,EAAE,KAAK,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,QAAQ,KAAK,MAAM,CAAC,iBAAiB,KAAK;iBACxG;aACD;SACD,CAAC;IACH,CAAC;CACD,CAAC"}
|