golden-hoop-spell-opencode 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +184 -0
- package/package.json +51 -0
- package/shared/SPIKE_RESULTS.md +597 -0
- package/shared/agents/ghs-context-haiku.md.template +124 -0
- package/shared/agents/ghs-plan-designer.md.template +128 -0
- package/shared/agents/ghs-plan-reviewer.md.template +170 -0
- package/shared/assets/features.json +67 -0
- package/shared/assets/progress.md +35 -0
- package/shared/ghs.default.json +7 -0
- package/shared/ghs.default.json.notes.md +34 -0
- package/shared/ghs.json.example +7 -0
- package/shared/opencode.json.example +11 -0
- package/shared/references/coding-agent.md +533 -0
- package/shared/references/context-snapshot-guide.md +98 -0
- package/shared/references/examples.md +299 -0
- package/shared/references/plan-designer.md +163 -0
- package/shared/references/plan-reviewer.md +193 -0
- package/shared/references/sprint-agent.md +261 -0
- package/src/index.ts +9 -0
- package/src/lib/assets.ts +31 -0
- package/src/lib/codegraph.ts +66 -0
- package/src/lib/config.ts +278 -0
- package/src/lib/nonce.ts +56 -0
- package/src/lib/parse.ts +175 -0
- package/src/lib/paths.ts +26 -0
- package/src/lib/project.ts +28 -0
- package/src/lib/scripts/append-progress-session.ts +178 -0
- package/src/lib/scripts/append-sprint.ts +121 -0
- package/src/lib/scripts/archive-sprint.ts +583 -0
- package/src/lib/scripts/init-project.ts +291 -0
- package/src/lib/scripts/parallel-utils.ts +380 -0
- package/src/lib/scripts/parse-completion-signal.ts +584 -0
- package/src/lib/scripts/parse-delimited-output.ts +632 -0
- package/src/lib/scripts/resolve-project-dir.ts +130 -0
- package/src/lib/scripts/status.ts +292 -0
- package/src/lib/scripts/update-feature-status.ts +169 -0
- package/src/lib/scripts/validate-structure.ts +290 -0
- package/src/lib/state.ts +305 -0
- package/src/plugin.ts +76 -0
- package/src/prompts/context-codegraph.ts +65 -0
- package/src/prompts/context-grep.ts +68 -0
- package/src/prompts/feature-impl.ts +78 -0
- package/src/prompts/plan-designer.ts +59 -0
- package/src/prompts/plan-reviewer.ts +61 -0
- package/src/prompts/sprint-planning.ts +47 -0
- package/src/tools/archive.ts +278 -0
- package/src/tools/code.ts +448 -0
- package/src/tools/config.ts +182 -0
- package/src/tools/force-archive.ts +195 -0
- package/src/tools/init.ts +193 -0
- package/src/tools/plan-finalize.ts +333 -0
- package/src/tools/plan-review.ts +759 -0
- package/src/tools/plan-start.ts +232 -0
- package/src/tools/sprint.ts +213 -0
- package/src/tools/status.ts +51 -0
|
@@ -0,0 +1,632 @@
|
|
|
1
|
+
// Port of golden-hoop-spell/plugin/shared/scripts/parse_delimited_output.py.
|
|
2
|
+
//
|
|
3
|
+
// Behavior source-of-truth:
|
|
4
|
+
// /Users/tom/github/golden-hoop-spell/plugin/shared/scripts/parse_delimited_output.py
|
|
5
|
+
//
|
|
6
|
+
// Faithful port notes (plan §3.4 D4 — line-by-line port):
|
|
7
|
+
// - The Python source is both a library (`parse_raw` + strategy helpers) and
|
|
8
|
+
// a CLI wrapper. We port the *library* core verbatim; the CLI layer
|
|
9
|
+
// (argparse / stdin / file IO) is intentionally omitted because the
|
|
10
|
+
// OpenCode plugin consumes this as an in-process TS module, not a
|
|
11
|
+
// subprocess. The plan-dispatcher tools (`ghs-plan-review` etc.) call
|
|
12
|
+
// `parseDelimitedOutput()` directly.
|
|
13
|
+
// - The 4-strategy cascade (exact_delimiter → normalized_delimiter →
|
|
14
|
+
// code_fence → whole_body) and the empty-vs-malformed distinction are
|
|
15
|
+
// preserved exactly.
|
|
16
|
+
// - Regex port hazards (plan §5 risk row "JS 正则与 Python re 的细微差异"):
|
|
17
|
+
// * Python inline flag group `(?i:_?START)` has no JS equivalent.
|
|
18
|
+
// We approximate by compiling the whole token pattern with the `/i`
|
|
19
|
+
// flag. Functionally equivalent for the inputs we see (the token name
|
|
20
|
+
// and the optional `_START` suffix are the only parts that need
|
|
21
|
+
// case-insensitivity; the bracket character classes are unaffected).
|
|
22
|
+
// * Python `re.DOTALL` → JS `/s` flag. `_strip_thinking` and the
|
|
23
|
+
// code-fence pattern both rely on `.` matching newlines — both use
|
|
24
|
+
// `/s` here.
|
|
25
|
+
// * Python `re.MULTILINE` → JS `/m` flag (completion-signal stripper,
|
|
26
|
+
// code-fence line anchoring).
|
|
27
|
+
// * Python `\b` is Unicode-aware; JS `\b` is ASCII-only. The completion
|
|
28
|
+
// signal is always ASCII uppercase (`PLAN DESIGN COMPLETE`, etc.) so
|
|
29
|
+
// the boundary semantics coincide for every real input.
|
|
30
|
+
// * Python `re.escape` escapes a superset of JS special chars, but for
|
|
31
|
+
// the inputs we pass (literal delimiters like `<<<PLAN_START>>>` and
|
|
32
|
+
// signal phrases) the escaped forms are identical. We use a small
|
|
33
|
+
// `escapeRegex` helper that escapes every char JS treats as special.
|
|
34
|
+
// - JSON output: the Python CLI serialises with `json.dumps(result,
|
|
35
|
+
// ensure_ascii=False, indent=2)`. The equivalence test compares the
|
|
36
|
+
// *parsed* result object (not the serialised string), so we return a plain
|
|
37
|
+
// object; a `serializeResult()` helper is provided for callers that need
|
|
38
|
+
// the exact byte stream (uses `JSON.stringify(..., null, 2)`).
|
|
39
|
+
// - Style follows s1-feat-008: no `process.exit`, no `console.log`,
|
|
40
|
+
// functions are pure (no FS / subprocess side effects).
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Escape every character that has special meaning in a JavaScript regular
|
|
44
|
+
* expression, so a literal string can be embedded inside a `RegExp`.
|
|
45
|
+
*
|
|
46
|
+
* This mirrors the intent of Python's `re.escape`: the escaped result matches
|
|
47
|
+
* the input verbatim. JS escapes a slightly smaller set of metacharacters
|
|
48
|
+
* than Python, but for the inputs this module passes (ASCII delimiters and
|
|
49
|
+
* signal phrases) the escaped forms are byte-identical.
|
|
50
|
+
*/
|
|
51
|
+
function escapeRegex(literal: string): string {
|
|
52
|
+
// Escape anything that is not a word character. This is a conservative
|
|
53
|
+
// superset of the JS regex metacharacters and is safe — escaping a normal
|
|
54
|
+
// char is a no-op for matching purposes.
|
|
55
|
+
return literal.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// ---------------------------------------------------------------------------
|
|
59
|
+
// Constants — mirror the Python module-level globals.
|
|
60
|
+
// ---------------------------------------------------------------------------
|
|
61
|
+
|
|
62
|
+
/** Token name per `kind` value (used to derive the delimiter labels). */
|
|
63
|
+
const TOKEN_BY_KIND: Record<string, string> = {
|
|
64
|
+
plan: "PLAN",
|
|
65
|
+
review: "REVIEW",
|
|
66
|
+
context_snapshot: "CONTEXT_SNAPSHOT",
|
|
67
|
+
};
|
|
68
|
+
|
|
69
|
+
/** Default min length when `minLength` is not provided. */
|
|
70
|
+
const DEFAULT_MIN_LENGTH = 200;
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Regex that pulls `Verdict: PASS` or `Verdict: FAIL` out of a review signal
|
|
74
|
+
* line.
|
|
75
|
+
*
|
|
76
|
+
* Python: `re.compile(r"Verdict:\s*(PASS|FAIL)")` (no flags).
|
|
77
|
+
*/
|
|
78
|
+
const VERDICT_RE = /Verdict:\s*(PASS|FAIL)/;
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Marker that plan-designer prints at the end of its response; everything
|
|
82
|
+
* after it is metadata that must not leak into the extracted content.
|
|
83
|
+
*/
|
|
84
|
+
const ADDITIONAL_FILES_READ_MARKER = "ADDITIONAL FILES READ:";
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* Regex variants used by STRATEGY 2 normalized_delimiter. Kept as plain
|
|
88
|
+
* strings so the token name can be interpolated before compilation, mirroring
|
|
89
|
+
* the Python module-level constants.
|
|
90
|
+
*/
|
|
91
|
+
const _NORMALIZED_TOKEN_LEFT = "[<《「〖]+\\s*";
|
|
92
|
+
const _NORMALIZED_TOKEN_RIGHT = "\\s*[>》」〗]+";
|
|
93
|
+
|
|
94
|
+
// ---------------------------------------------------------------------------
|
|
95
|
+
// Result types.
|
|
96
|
+
// ---------------------------------------------------------------------------
|
|
97
|
+
|
|
98
|
+
export type ParseStatus = "ok" | "fallback_used" | "empty" | "malformed";
|
|
99
|
+
|
|
100
|
+
export type ParseStrategy =
|
|
101
|
+
| "exact_delimiter"
|
|
102
|
+
| "normalized_delimiter"
|
|
103
|
+
| "code_fence"
|
|
104
|
+
| "whole_body"
|
|
105
|
+
| "none";
|
|
106
|
+
|
|
107
|
+
export type Verdict = "PASS" | "FAIL" | null;
|
|
108
|
+
|
|
109
|
+
export interface ParseResultMeta {
|
|
110
|
+
kind: string;
|
|
111
|
+
input_length: number;
|
|
112
|
+
content_length: number;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
export interface ParseResult {
|
|
116
|
+
status: ParseStatus;
|
|
117
|
+
content: string;
|
|
118
|
+
strategy: ParseStrategy;
|
|
119
|
+
completion_signal: string | null;
|
|
120
|
+
verdict: Verdict;
|
|
121
|
+
warnings: string[];
|
|
122
|
+
meta: ParseResultMeta;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// ---------------------------------------------------------------------------
|
|
126
|
+
// Helpers — 1:1 ports of the Python `_strip_*` / `_extract_*` functions.
|
|
127
|
+
// ---------------------------------------------------------------------------
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* Strip legacy `<thinking>...</thinking>` and `<antml:thinking>...</antml:thinking>`
|
|
131
|
+
* blocks.
|
|
132
|
+
*
|
|
133
|
+
* Port of `_strip_thinking`. Python compiles
|
|
134
|
+
* `r"<(?:antml:)?thinking>.*?</(?:antml:)?thinking>"` with `re.DOTALL |
|
|
135
|
+
* re.IGNORECASE`. JS equivalent: `/.../gis` (the `s` flag makes `.` match
|
|
136
|
+
* newlines; `i` is case-insensitive; the non-greedy `.*?` is preserved).
|
|
137
|
+
*
|
|
138
|
+
* The second regex strips a stray unclosed opening tag (rare but seen) by
|
|
139
|
+
* removing everything after it.
|
|
140
|
+
*/
|
|
141
|
+
function stripThinking(text: string): string {
|
|
142
|
+
const closed = text.replace(
|
|
143
|
+
/<(?:antml:)?thinking>.*?<\/(?:antml:)?thinking>/gis,
|
|
144
|
+
"",
|
|
145
|
+
);
|
|
146
|
+
return closed.replace(/<(?:antml:)?thinking>.*/gis, "");
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
/**
|
|
150
|
+
* Strip the completion-signal line from text, returning `[cleanedText,
|
|
151
|
+
* signalLine]`.
|
|
152
|
+
*
|
|
153
|
+
* Port of `_strip_completion_signal`. Python compiles
|
|
154
|
+
* `r"^[ \t]*" + re.escape(signal) + r"\b.*$"` with `re.MULTILINE`. The entire
|
|
155
|
+
* matching line (including any trailing variables like
|
|
156
|
+
* `| Verdict: PASS | Severe: 0 ...`) is captured and returned.
|
|
157
|
+
*
|
|
158
|
+
* JS note: `.*` without the `s` flag stops at the first newline, matching
|
|
159
|
+
* Python's default (no DOTALL) behaviour. `$` with the `m` flag matches end
|
|
160
|
+
* of line. `\b` is ASCII-only in JS but the signal is always ASCII, so the
|
|
161
|
+
* word boundary coincides.
|
|
162
|
+
*/
|
|
163
|
+
function stripCompletionSignal(
|
|
164
|
+
text: string,
|
|
165
|
+
signal: string,
|
|
166
|
+
): [string, string | null] {
|
|
167
|
+
if (!signal) {
|
|
168
|
+
return [text, null];
|
|
169
|
+
}
|
|
170
|
+
const pattern = new RegExp(
|
|
171
|
+
"^[ \\t]*" + escapeRegex(signal) + "\\b.*$",
|
|
172
|
+
"m",
|
|
173
|
+
);
|
|
174
|
+
const match = pattern.exec(text);
|
|
175
|
+
if (!match) {
|
|
176
|
+
return [text, null];
|
|
177
|
+
}
|
|
178
|
+
const signalLine = match[0].trim();
|
|
179
|
+
// Replace the first occurrence only — `pattern` has no `g` flag, but
|
|
180
|
+
// `String.replace` with a non-global RegExp replaces the first match, which
|
|
181
|
+
// matches Python's `pattern.sub("", text)` behaviour when there is exactly
|
|
182
|
+
// one signal line. Python's `re.sub` without a count replaces *all*
|
|
183
|
+
// non-overlapping matches; we mirror that by using the global flag for the
|
|
184
|
+
// substitution pass.
|
|
185
|
+
const globalPattern = new RegExp(
|
|
186
|
+
"^[ \\t]*" + escapeRegex(signal) + "\\b.*$",
|
|
187
|
+
"gm",
|
|
188
|
+
);
|
|
189
|
+
const cleaned = text.replace(globalPattern, "");
|
|
190
|
+
return [cleaned, signalLine];
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
/**
|
|
194
|
+
* Strip everything from the `ADDITIONAL FILES READ:` marker onward.
|
|
195
|
+
*
|
|
196
|
+
* Port of `_strip_additional_files_read`. Uses `String.indexOf` + slice to
|
|
197
|
+
* mirror Python's `text.find(marker)` + slice semantics exactly (a regex
|
|
198
|
+
* would also work but the source uses plain string ops).
|
|
199
|
+
*/
|
|
200
|
+
function stripAdditionalFilesRead(text: string): string {
|
|
201
|
+
const idx = text.indexOf(ADDITIONAL_FILES_READ_MARKER);
|
|
202
|
+
if (idx === -1) {
|
|
203
|
+
return text;
|
|
204
|
+
}
|
|
205
|
+
return text.slice(0, idx).replace(/\s+$/, "");
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
/**
|
|
209
|
+
* Extract `Verdict: PASS|FAIL` from the completion-signal line first, then
|
|
210
|
+
* the raw text tail.
|
|
211
|
+
*
|
|
212
|
+
* Port of `_extract_verdict`. Scans the signal line first; if not found
|
|
213
|
+
* there, falls back to the trailing ~600 chars of the raw input.
|
|
214
|
+
*/
|
|
215
|
+
function extractVerdict(
|
|
216
|
+
signalLine: string | null,
|
|
217
|
+
rawText: string,
|
|
218
|
+
): Verdict {
|
|
219
|
+
if (signalLine) {
|
|
220
|
+
const m = VERDICT_RE.exec(signalLine);
|
|
221
|
+
if (m) {
|
|
222
|
+
return m[1] as "PASS" | "FAIL";
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
const tail = rawText.length > 600 ? rawText.slice(-600) : rawText;
|
|
226
|
+
const m = VERDICT_RE.exec(tail);
|
|
227
|
+
if (m) {
|
|
228
|
+
return m[1] as "PASS" | "FAIL";
|
|
229
|
+
}
|
|
230
|
+
return null;
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
// ---------------------------------------------------------------------------
|
|
234
|
+
// Strategies. Each returns `[content, warnings]` or `[null, warnings]`.
|
|
235
|
+
// Callers must validate `content.trim().length >= minLength` before accepting.
|
|
236
|
+
// ---------------------------------------------------------------------------
|
|
237
|
+
|
|
238
|
+
type StrategyResult = [string | null, string[]];
|
|
239
|
+
|
|
240
|
+
/**
|
|
241
|
+
* STRATEGY 1: literal `<<<X_START>>>...<<<X_END>>>` extraction.
|
|
242
|
+
*
|
|
243
|
+
* Port of `_strategy_exact`. When the raw text contains multiple candidate
|
|
244
|
+
* pairs (e.g. a code fence that quotes the delimiters as string literals
|
|
245
|
+
* followed by the real pair), pick the pair with the largest inner span.
|
|
246
|
+
* `String.indexOf` mirrors Python's `str.find` (-1 sentinel on miss).
|
|
247
|
+
*/
|
|
248
|
+
function strategyExact(
|
|
249
|
+
text: string,
|
|
250
|
+
startToken: string,
|
|
251
|
+
endToken: string,
|
|
252
|
+
): StrategyResult {
|
|
253
|
+
const warnings: string[] = [];
|
|
254
|
+
const startPositions: number[] = [];
|
|
255
|
+
let searchFrom = 0;
|
|
256
|
+
while (true) {
|
|
257
|
+
const idx = text.indexOf(startToken, searchFrom);
|
|
258
|
+
if (idx === -1) {
|
|
259
|
+
break;
|
|
260
|
+
}
|
|
261
|
+
startPositions.push(idx);
|
|
262
|
+
searchFrom = idx + startToken.length;
|
|
263
|
+
}
|
|
264
|
+
if (startPositions.length === 0) {
|
|
265
|
+
return [null, warnings];
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
let bestContent: string | null = null;
|
|
269
|
+
for (const startIdx of startPositions) {
|
|
270
|
+
const contentStart = startIdx + startToken.length;
|
|
271
|
+
const endIdx = text.indexOf(endToken, contentStart);
|
|
272
|
+
if (endIdx === -1) {
|
|
273
|
+
continue;
|
|
274
|
+
}
|
|
275
|
+
const candidate = text.slice(contentStart, endIdx);
|
|
276
|
+
if (bestContent === null || candidate.length > bestContent.length) {
|
|
277
|
+
bestContent = candidate;
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
if (bestContent === null) {
|
|
281
|
+
return [null, warnings];
|
|
282
|
+
}
|
|
283
|
+
return [bestContent, warnings];
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
/**
|
|
287
|
+
* STRATEGY 2: tolerate bracket punctuation / whitespace / case variations.
|
|
288
|
+
*
|
|
289
|
+
* Port of `_strategy_normalized`. Matches the kind-specific token only (e.g.
|
|
290
|
+
* `PLAN`) so a response that mixes multiple kinds does not
|
|
291
|
+
* cross-contaminate.
|
|
292
|
+
*
|
|
293
|
+
* Python uses inline `(?i:_?START)` / `(?i:_?END)` flags; JS has no inline
|
|
294
|
+
* flags, so the whole token pattern is compiled with the global `/i` flag.
|
|
295
|
+
* For the inputs this strategy sees (ASCII token names + bracket decoration)
|
|
296
|
+
* the behaviour is identical.
|
|
297
|
+
*/
|
|
298
|
+
function strategyNormalized(
|
|
299
|
+
text: string,
|
|
300
|
+
tokenName: string,
|
|
301
|
+
): StrategyResult {
|
|
302
|
+
const warnings: string[] = [];
|
|
303
|
+
const tokenRe = escapeRegex(tokenName);
|
|
304
|
+
|
|
305
|
+
// `[<《「〖]+` then the token then optional `_?START` then `[>》」〗]+`.
|
|
306
|
+
// Whitespace tolerance via `\s*` on both sides (Python source).
|
|
307
|
+
const startPattern = new RegExp(
|
|
308
|
+
_NORMALIZED_TOKEN_LEFT + tokenRe + "_?START" + _NORMALIZED_TOKEN_RIGHT,
|
|
309
|
+
"i",
|
|
310
|
+
);
|
|
311
|
+
const endPattern = new RegExp(
|
|
312
|
+
_NORMALIZED_TOKEN_LEFT + tokenRe + "_?END" + _NORMALIZED_TOKEN_RIGHT,
|
|
313
|
+
"i",
|
|
314
|
+
);
|
|
315
|
+
|
|
316
|
+
const startMatch = startPattern.exec(text);
|
|
317
|
+
if (!startMatch) {
|
|
318
|
+
return [null, warnings];
|
|
319
|
+
}
|
|
320
|
+
// Search for the END marker from the end of the START match onward.
|
|
321
|
+
endPattern.lastIndex = startMatch.index! + startMatch[0].length;
|
|
322
|
+
const endMatch = endPattern.exec(text);
|
|
323
|
+
if (!endMatch) {
|
|
324
|
+
return [null, warnings];
|
|
325
|
+
}
|
|
326
|
+
const contentStart = startMatch.index! + startMatch[0].length;
|
|
327
|
+
const content = text.slice(contentStart, endMatch.index);
|
|
328
|
+
warnings.push(
|
|
329
|
+
`delimiter normalized: matched START at ${startMatch.index!}-${startMatch.index! + startMatch[0].length}, ` +
|
|
330
|
+
`END at ${endMatch.index}-${endMatch.index + endMatch[0].length}`,
|
|
331
|
+
);
|
|
332
|
+
return [content, warnings];
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
/**
|
|
336
|
+
* STRATEGY 3: take the largest fenced code block.
|
|
337
|
+
*
|
|
338
|
+
* Port of `_strategy_code_fence`. Python pattern:
|
|
339
|
+
* r"(?m)^(?P<fence>`{3,}|~{3,})[^\n]*\n(?P<body>.*?)(?P=fence)[^\n]*$"
|
|
340
|
+
* with `re.DOTALL`. JS port: the named backreference `(?P=fence)` becomes a
|
|
341
|
+
* numbered backreference (`\1`); `re.DOTALL` → `/s`; `(?m)` → `/m`.
|
|
342
|
+
*
|
|
343
|
+
* If the fence itself contains delimiters, defer to STRATEGY 1 on the fenced
|
|
344
|
+
* content so an exact-match win is not masked by fence wrapping.
|
|
345
|
+
*/
|
|
346
|
+
function strategyCodeFence(
|
|
347
|
+
text: string,
|
|
348
|
+
startToken: string,
|
|
349
|
+
endToken: string,
|
|
350
|
+
): StrategyResult {
|
|
351
|
+
const warnings: string[] = [];
|
|
352
|
+
const fencePattern =
|
|
353
|
+
/^(?:(`{3,}|~{3,})[^\n]*\n(.*?)(\1)[^\n]*)$/gms;
|
|
354
|
+
const blocks: RegExpExecArray[] = [];
|
|
355
|
+
let m: RegExpExecArray | null;
|
|
356
|
+
while ((m = fencePattern.exec(text)) !== null) {
|
|
357
|
+
blocks.push(m);
|
|
358
|
+
// Guard against zero-width matches looping forever.
|
|
359
|
+
if (m.index === fencePattern.lastIndex) {
|
|
360
|
+
fencePattern.lastIndex++;
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
if (blocks.length === 0) {
|
|
364
|
+
return [null, warnings];
|
|
365
|
+
}
|
|
366
|
+
// Pick the largest block by body (group 2) length.
|
|
367
|
+
let largest = blocks[0];
|
|
368
|
+
for (const b of blocks) {
|
|
369
|
+
if (b[2].length > largest[2].length) {
|
|
370
|
+
largest = b;
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
const body = largest[2];
|
|
374
|
+
// If the fenced body contains literal delimiters, try STRATEGY 1 on it.
|
|
375
|
+
const inner = strategyExact(body, startToken, endToken);
|
|
376
|
+
if (inner[0] !== null) {
|
|
377
|
+
warnings.push("code_fence: inner exact-delimiter match used");
|
|
378
|
+
return [inner[0], warnings];
|
|
379
|
+
}
|
|
380
|
+
warnings.push("code_fence: largest fenced block returned as content");
|
|
381
|
+
return [body, warnings];
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
/**
|
|
385
|
+
* STRATEGY 4: take the whole body after stripping thinking / signal / extras.
|
|
386
|
+
*
|
|
387
|
+
* Port of `_strategy_whole_body`. Returns `null` when nothing usable is left
|
|
388
|
+
* after stripping, so the orchestrator does not classify this as a
|
|
389
|
+
* too-short hit (which would yield `empty`); it falls through to `malformed`.
|
|
390
|
+
*/
|
|
391
|
+
function strategyWholeBody(
|
|
392
|
+
text: string,
|
|
393
|
+
completionSignal: string | null,
|
|
394
|
+
): StrategyResult {
|
|
395
|
+
const warnings: string[] = ["whole_body fallback engaged"];
|
|
396
|
+
let cleaned = stripThinking(text);
|
|
397
|
+
// Strip completion signal (we use raw_text for verdict extraction in the
|
|
398
|
+
// orchestrator, so we ignore the returned signal line here — mirrors
|
|
399
|
+
// Python which rebinds `cleaned, _ = ...`).
|
|
400
|
+
const [stripped] = stripCompletionSignal(cleaned, completionSignal ?? "");
|
|
401
|
+
cleaned = stripped;
|
|
402
|
+
cleaned = stripAdditionalFilesRead(cleaned);
|
|
403
|
+
if (cleaned.trim().length === 0) {
|
|
404
|
+
warnings.push("whole_body: nothing left after stripping");
|
|
405
|
+
return [null, warnings];
|
|
406
|
+
}
|
|
407
|
+
return [cleaned, warnings];
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
// ---------------------------------------------------------------------------
|
|
411
|
+
// Orchestrator — port of `parse_raw`.
|
|
412
|
+
// ---------------------------------------------------------------------------
|
|
413
|
+
|
|
414
|
+
/** Options accepted by {@link parseRaw}. */
|
|
415
|
+
export interface ParseRawOptions {
|
|
416
|
+
kind: string;
|
|
417
|
+
startToken: string;
|
|
418
|
+
endToken: string;
|
|
419
|
+
minLength: number;
|
|
420
|
+
completionSignal: string | null;
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
/**
|
|
424
|
+
* Parse `rawText` and return the result object.
|
|
425
|
+
*
|
|
426
|
+
* Faithful port of `parse_raw` in the Python source. Tries the 4 strategies
|
|
427
|
+
* in priority order; the first one that yields content whose stripped length
|
|
428
|
+
* is `>= minLength` wins. If a strategy finds something but it is too short,
|
|
429
|
+
* `foundShortContent` becomes true so the final status is `empty` (rather
|
|
430
|
+
* than `malformed`).
|
|
431
|
+
*
|
|
432
|
+
* The completion signal is pre-extracted from the *whole* raw text regardless
|
|
433
|
+
* of which strategy wins, so the dispatcher always sees a stable
|
|
434
|
+
* `completion_signal` field.
|
|
435
|
+
*/
|
|
436
|
+
export function parseRaw(rawText: string, opts: ParseRawOptions): ParseResult {
|
|
437
|
+
const { kind, startToken, endToken } = opts;
|
|
438
|
+
const minLength = opts.minLength;
|
|
439
|
+
const completionSignal = opts.completionSignal;
|
|
440
|
+
const warnings: string[] = [];
|
|
441
|
+
const tokenName = TOKEN_BY_KIND[kind] ?? "";
|
|
442
|
+
|
|
443
|
+
// Pre-extract completion signal from the WHOLE raw text.
|
|
444
|
+
const [, capturedSignalLine] = stripCompletionSignal(
|
|
445
|
+
rawText,
|
|
446
|
+
completionSignal ?? "",
|
|
447
|
+
);
|
|
448
|
+
|
|
449
|
+
// Build the strategy list in priority order. Each entry is `[name, runner]`.
|
|
450
|
+
type StrategyEntry = [ParseStrategy, () => StrategyResult];
|
|
451
|
+
const strategies: StrategyEntry[] = [
|
|
452
|
+
["exact_delimiter", () => strategyExact(rawText, startToken, endToken)],
|
|
453
|
+
];
|
|
454
|
+
if (tokenName) {
|
|
455
|
+
strategies.push([
|
|
456
|
+
"normalized_delimiter",
|
|
457
|
+
() => strategyNormalized(rawText, tokenName),
|
|
458
|
+
]);
|
|
459
|
+
}
|
|
460
|
+
strategies.push([
|
|
461
|
+
"code_fence",
|
|
462
|
+
() => strategyCodeFence(rawText, startToken, endToken),
|
|
463
|
+
]);
|
|
464
|
+
strategies.push([
|
|
465
|
+
"whole_body",
|
|
466
|
+
() => strategyWholeBody(rawText, completionSignal),
|
|
467
|
+
]);
|
|
468
|
+
|
|
469
|
+
let foundShortContent = false;
|
|
470
|
+
|
|
471
|
+
for (const [strategyName, runner] of strategies) {
|
|
472
|
+
const [content, stratWarnings] = runner();
|
|
473
|
+
if (content === null) {
|
|
474
|
+
continue;
|
|
475
|
+
}
|
|
476
|
+
warnings.push(...stratWarnings);
|
|
477
|
+
if (content.trim().length >= minLength) {
|
|
478
|
+
// Success — strip completion signal from the extracted content too so
|
|
479
|
+
// a strategy that includes the trailing signal line still produces
|
|
480
|
+
// clean output.
|
|
481
|
+
const [strippedContent] = stripCompletionSignal(
|
|
482
|
+
content,
|
|
483
|
+
completionSignal ?? "",
|
|
484
|
+
);
|
|
485
|
+
let finalContent = strippedContent;
|
|
486
|
+
finalContent = stripAdditionalFilesRead(finalContent);
|
|
487
|
+
const trimmed = finalContent.trim();
|
|
488
|
+
const status: ParseStatus =
|
|
489
|
+
strategyName === "exact_delimiter" ? "ok" : "fallback_used";
|
|
490
|
+
const verdict: Verdict =
|
|
491
|
+
kind === "review"
|
|
492
|
+
? extractVerdict(capturedSignalLine, rawText)
|
|
493
|
+
: null;
|
|
494
|
+
return {
|
|
495
|
+
status,
|
|
496
|
+
content: trimmed,
|
|
497
|
+
strategy: strategyName,
|
|
498
|
+
completion_signal: capturedSignalLine,
|
|
499
|
+
verdict,
|
|
500
|
+
warnings,
|
|
501
|
+
meta: {
|
|
502
|
+
kind,
|
|
503
|
+
input_length: rawText.length,
|
|
504
|
+
// Python computes `len(content.strip())` AFTER the post-strip pass,
|
|
505
|
+
// i.e. on the same `trimmed` value we return as `content`.
|
|
506
|
+
content_length: trimmed.length,
|
|
507
|
+
},
|
|
508
|
+
};
|
|
509
|
+
}
|
|
510
|
+
// Strategy found something but too short.
|
|
511
|
+
foundShortContent = true;
|
|
512
|
+
warnings.push(
|
|
513
|
+
`${strategyName}: extracted content too short ` +
|
|
514
|
+
`(${content.trim().length} < ${minLength})`,
|
|
515
|
+
);
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
// No strategy produced acceptable content.
|
|
519
|
+
const status: ParseStatus = foundShortContent ? "empty" : "malformed";
|
|
520
|
+
const verdict: Verdict =
|
|
521
|
+
kind === "review"
|
|
522
|
+
? extractVerdict(capturedSignalLine, rawText)
|
|
523
|
+
: null;
|
|
524
|
+
return {
|
|
525
|
+
status,
|
|
526
|
+
content: "",
|
|
527
|
+
strategy: "none",
|
|
528
|
+
completion_signal: capturedSignalLine,
|
|
529
|
+
verdict,
|
|
530
|
+
warnings,
|
|
531
|
+
meta: {
|
|
532
|
+
kind,
|
|
533
|
+
input_length: rawText.length,
|
|
534
|
+
content_length: 0,
|
|
535
|
+
},
|
|
536
|
+
};
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
// ---------------------------------------------------------------------------
|
|
540
|
+
// Public convenience API.
|
|
541
|
+
// ---------------------------------------------------------------------------
|
|
542
|
+
|
|
543
|
+
/**
|
|
544
|
+
* Default options resolver — mirrors the Python CLI's `_resolve_tokens`.
|
|
545
|
+
*
|
|
546
|
+
* Given a `kind`, derives the `<<<KIND_START>>>` / `<<<KIND_END>>>` token
|
|
547
|
+
* pair. `generic` kind requires explicit `startToken` / `endToken`.
|
|
548
|
+
*/
|
|
549
|
+
function resolveTokens(
|
|
550
|
+
kind: string,
|
|
551
|
+
startToken?: string,
|
|
552
|
+
endToken?: string,
|
|
553
|
+
): { startToken: string; endToken: string } {
|
|
554
|
+
if (startToken && endToken) {
|
|
555
|
+
return { startToken, endToken };
|
|
556
|
+
}
|
|
557
|
+
if (kind === "generic") {
|
|
558
|
+
throw new Error(
|
|
559
|
+
"startToken and endToken are required when kind=generic",
|
|
560
|
+
);
|
|
561
|
+
}
|
|
562
|
+
const tokenName = TOKEN_BY_KIND[kind];
|
|
563
|
+
if (tokenName === undefined) {
|
|
564
|
+
throw new Error(`unknown kind value: ${JSON.stringify(kind)}`);
|
|
565
|
+
}
|
|
566
|
+
return {
|
|
567
|
+
startToken: `<<<${tokenName}_START>>>`,
|
|
568
|
+
endToken: `<<<${tokenName}_END>>>`,
|
|
569
|
+
};
|
|
570
|
+
}
|
|
571
|
+
|
|
572
|
+
/** Arguments accepted by {@link parseDelimitedOutput}. */
|
|
573
|
+
export interface ParseDelimitedOutputArgs {
|
|
574
|
+
/** Delimiter family. One of `plan` / `review` / `context_snapshot` / `generic`. */
|
|
575
|
+
kind?: string;
|
|
576
|
+
/** Explicit start delimiter (overrides `kind`; required for `generic`). */
|
|
577
|
+
startToken?: string;
|
|
578
|
+
/** Explicit end delimiter (overrides `kind`; required for `generic`). */
|
|
579
|
+
endToken?: string;
|
|
580
|
+
/** Minimum acceptable stripped content length (default 200). */
|
|
581
|
+
minLength?: number;
|
|
582
|
+
/** Completion-signal prefix to detect and strip (line-anchored). */
|
|
583
|
+
completionSignal?: string | null;
|
|
584
|
+
}
|
|
585
|
+
|
|
586
|
+
/**
|
|
587
|
+
* Parse delimiter-based subagent output with multiple fallback strategies.
|
|
588
|
+
*
|
|
589
|
+
* This is the primary public entry point — a thin wrapper around
|
|
590
|
+
* {@link parseRaw} that resolves the kind-specific tokens and applies the
|
|
591
|
+
* default min length. Behaviour is equivalent to invoking the Python
|
|
592
|
+
* `parse_delimited_output.py` CLI with the same arguments.
|
|
593
|
+
*
|
|
594
|
+
* @example
|
|
595
|
+
* parseDelimitedOutput("<<<PLAN_START>>>\n...\n<<<PLAN_END>>>", {
|
|
596
|
+
* kind: "plan",
|
|
597
|
+
* completionSignal: "PLAN DESIGN COMPLETE",
|
|
598
|
+
* });
|
|
599
|
+
*
|
|
600
|
+
* @param text - raw text emitted by the subagent.
|
|
601
|
+
* @param args - parsing options (all optional; sensible defaults apply).
|
|
602
|
+
*/
|
|
603
|
+
export function parseDelimitedOutput(
|
|
604
|
+
text: string,
|
|
605
|
+
args: ParseDelimitedOutputArgs = {},
|
|
606
|
+
): ParseResult {
|
|
607
|
+
const kind = args.kind ?? "generic";
|
|
608
|
+
const { startToken, endToken } = resolveTokens(
|
|
609
|
+
kind,
|
|
610
|
+
args.startToken,
|
|
611
|
+
args.endToken,
|
|
612
|
+
);
|
|
613
|
+
return parseRaw(text, {
|
|
614
|
+
kind,
|
|
615
|
+
startToken,
|
|
616
|
+
endToken,
|
|
617
|
+
minLength: args.minLength ?? DEFAULT_MIN_LENGTH,
|
|
618
|
+
completionSignal: args.completionSignal ?? null,
|
|
619
|
+
});
|
|
620
|
+
}
|
|
621
|
+
|
|
622
|
+
/**
|
|
623
|
+
* Serialise a {@link ParseResult} to the exact JSON byte stream the Python
|
|
624
|
+
* CLI emits (`json.dumps(result, ensure_ascii=False, indent=2)`).
|
|
625
|
+
*
|
|
626
|
+
* Provided for callers that need byte-level equivalence with the source
|
|
627
|
+
* script's stdout (e.g. the equivalence test suite). Runtime tool callers
|
|
628
|
+
* consume the {@link ParseResult} object directly.
|
|
629
|
+
*/
|
|
630
|
+
export function serializeResult(result: ParseResult): string {
|
|
631
|
+
return JSON.stringify(result, null, 2);
|
|
632
|
+
}
|