imprint-mcp 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. package/README.md +165 -201
  2. package/examples/discoverandgo/README.md +1 -1
  3. package/examples/echo/README.md +1 -1
  4. package/examples/google-flights/README.md +28 -0
  5. package/examples/google-flights/_shared/batchexecute.ts +63 -0
  6. package/examples/google-flights/_shared/flights_request.ts +95 -0
  7. package/examples/google-flights/_shared/package.json +9 -0
  8. package/examples/google-flights/get_flight_booking_details/index.ts +159 -0
  9. package/examples/google-flights/get_flight_booking_details/package.json +9 -0
  10. package/examples/google-flights/get_flight_booking_details/parser.ts +182 -0
  11. package/examples/google-flights/get_flight_booking_details/playbook.yaml +138 -0
  12. package/examples/google-flights/get_flight_booking_details/request-transform.ts +86 -0
  13. package/examples/google-flights/get_flight_booking_details/workflow.json +98 -0
  14. package/examples/google-flights/get_flight_calendar_prices/index.ts +131 -0
  15. package/examples/google-flights/get_flight_calendar_prices/package.json +9 -0
  16. package/examples/google-flights/get_flight_calendar_prices/parser.ts +86 -0
  17. package/examples/google-flights/get_flight_calendar_prices/playbook.yaml +97 -0
  18. package/examples/google-flights/get_flight_calendar_prices/request-transform.ts +31 -0
  19. package/examples/google-flights/get_flight_calendar_prices/workflow.json +76 -0
  20. package/examples/google-flights/lookup_airport/index.ts +101 -0
  21. package/examples/google-flights/lookup_airport/package.json +9 -0
  22. package/examples/google-flights/lookup_airport/parser.ts +66 -0
  23. package/examples/google-flights/lookup_airport/playbook.yaml +47 -0
  24. package/examples/google-flights/lookup_airport/request-transform.ts +20 -0
  25. package/examples/google-flights/lookup_airport/workflow.json +57 -0
  26. package/examples/google-flights/search_flights/index.ts +219 -0
  27. package/examples/google-flights/search_flights/package.json +9 -0
  28. package/examples/google-flights/search_flights/parser.ts +169 -0
  29. package/examples/google-flights/search_flights/playbook.yaml +184 -0
  30. package/examples/google-flights/search_flights/request-transform.ts +119 -0
  31. package/examples/google-flights/search_flights/workflow.json +143 -0
  32. package/examples/google-hotels/README.md +29 -0
  33. package/examples/google-hotels/_shared/batchexecute.ts +73 -0
  34. package/examples/google-hotels/_shared/freq.ts +158 -0
  35. package/examples/google-hotels/_shared/package.json +9 -0
  36. package/examples/google-hotels/autocomplete_hotel_location/index.ts +80 -0
  37. package/examples/google-hotels/autocomplete_hotel_location/package.json +9 -0
  38. package/examples/google-hotels/autocomplete_hotel_location/parser.ts +71 -0
  39. package/examples/google-hotels/autocomplete_hotel_location/playbook.yaml +36 -0
  40. package/examples/google-hotels/autocomplete_hotel_location/request-transform.ts +37 -0
  41. package/examples/google-hotels/autocomplete_hotel_location/workflow.json +36 -0
  42. package/examples/google-hotels/get_hotel_booking_options/index.ts +143 -0
  43. package/examples/google-hotels/get_hotel_booking_options/package.json +9 -0
  44. package/examples/google-hotels/get_hotel_booking_options/parser.ts +271 -0
  45. package/examples/google-hotels/get_hotel_booking_options/playbook.yaml +154 -0
  46. package/examples/google-hotels/get_hotel_booking_options/request-transform.ts +154 -0
  47. package/examples/google-hotels/get_hotel_booking_options/workflow.json +84 -0
  48. package/examples/google-hotels/get_hotel_reviews/index.ts +81 -0
  49. package/examples/google-hotels/get_hotel_reviews/package.json +9 -0
  50. package/examples/google-hotels/get_hotel_reviews/parser.ts +128 -0
  51. package/examples/google-hotels/get_hotel_reviews/playbook.yaml +64 -0
  52. package/examples/google-hotels/get_hotel_reviews/request-transform.ts +42 -0
  53. package/examples/google-hotels/get_hotel_reviews/workflow.json +37 -0
  54. package/examples/google-hotels/search_hotels/index.ts +207 -0
  55. package/examples/google-hotels/search_hotels/package.json +9 -0
  56. package/examples/google-hotels/search_hotels/parser.ts +260 -0
  57. package/examples/google-hotels/search_hotels/playbook.yaml +87 -0
  58. package/examples/google-hotels/search_hotels/request-transform.ts +197 -0
  59. package/examples/google-hotels/search_hotels/workflow.json +127 -0
  60. package/package.json +3 -2
  61. package/prompts/audit-agent.md +71 -0
  62. package/prompts/build-planning.md +74 -0
  63. package/prompts/compile-agent.md +131 -27
  64. package/prompts/prereq-builder.md +64 -0
  65. package/prompts/prereq-planner.md +34 -0
  66. package/prompts/tool-planning.md +39 -0
  67. package/src/cli.ts +109 -2
  68. package/src/imprint/agent.ts +5 -0
  69. package/src/imprint/audit.ts +996 -0
  70. package/src/imprint/backend-ladder.ts +1214 -184
  71. package/src/imprint/build-plan.ts +1051 -0
  72. package/src/imprint/cdp-browser-fetch.ts +589 -0
  73. package/src/imprint/cdp-jar-cache.ts +320 -0
  74. package/src/imprint/chromium.ts +135 -0
  75. package/src/imprint/claude-cli-compile.ts +125 -25
  76. package/src/imprint/codex-cli-compile.ts +26 -23
  77. package/src/imprint/compile-agent-types.ts +38 -0
  78. package/src/imprint/compile-agent.ts +63 -25
  79. package/src/imprint/compile-tools.ts +1656 -64
  80. package/src/imprint/compile.ts +13 -1
  81. package/src/imprint/concurrency.ts +87 -0
  82. package/src/imprint/cron.ts +1 -0
  83. package/src/imprint/doctor.ts +39 -0
  84. package/src/imprint/freeform-redact.ts +5 -4
  85. package/src/imprint/integrations.ts +2 -2
  86. package/src/imprint/llm.ts +56 -8
  87. package/src/imprint/mcp-compile-server.ts +43 -10
  88. package/src/imprint/mcp-maintenance.ts +9 -101
  89. package/src/imprint/mcp-server.ts +73 -7
  90. package/src/imprint/multi-progress.ts +7 -2
  91. package/src/imprint/param-grounding.ts +367 -0
  92. package/src/imprint/paths.ts +29 -0
  93. package/src/imprint/playbook-runner.ts +101 -40
  94. package/src/imprint/prereq-builder.ts +651 -0
  95. package/src/imprint/probe-backends.ts +6 -3
  96. package/src/imprint/record.ts +10 -1
  97. package/src/imprint/redact.ts +30 -2
  98. package/src/imprint/replay-capture.ts +19 -18
  99. package/src/imprint/runtime.ts +19 -10
  100. package/src/imprint/session-diff.ts +79 -2
  101. package/src/imprint/session-merge.ts +9 -5
  102. package/src/imprint/stealth-chromium.ts +81 -0
  103. package/src/imprint/stealth-fetch.ts +309 -29
  104. package/src/imprint/stealth-token-cache.ts +88 -0
  105. package/src/imprint/teach-plan.ts +251 -0
  106. package/src/imprint/teach-state.ts +10 -0
  107. package/src/imprint/teach.ts +456 -142
  108. package/src/imprint/tool-candidates.ts +72 -14
  109. package/src/imprint/tool-plan.ts +313 -0
  110. package/src/imprint/tracing.ts +135 -6
  111. package/src/imprint/types.ts +61 -3
  112. package/examples/google-flights/search_google_flights/index.ts +0 -101
  113. package/examples/google-flights/search_google_flights/parser.test.ts +0 -140
  114. package/examples/google-flights/search_google_flights/parser.ts +0 -189
  115. package/examples/google-flights/search_google_flights/playbook.yaml +0 -130
  116. package/examples/google-flights/search_google_flights/workflow.json +0 -48
  117. package/examples/google-hotels/search_google_hotels/index.ts +0 -194
  118. package/examples/google-hotels/search_google_hotels/parser.test.ts +0 -168
  119. package/examples/google-hotels/search_google_hotels/parser.ts +0 -330
  120. package/examples/google-hotels/search_google_hotels/playbook.yaml +0 -125
  121. package/examples/google-hotels/search_google_hotels/workflow.json +0 -111
  122. package/examples/namecheap-domains/search_namecheap_domains/index.ts +0 -144
  123. package/examples/namecheap-domains/search_namecheap_domains/parser.ts +0 -380
  124. package/examples/namecheap-domains/search_namecheap_domains/playbook.yaml +0 -50
  125. package/examples/namecheap-domains/search_namecheap_domains/request-transform.ts +0 -136
  126. package/examples/namecheap-domains/search_namecheap_domains/workflow.json +0 -97
@@ -0,0 +1,34 @@
1
+ You are the PLANNER for ONE shared TypeScript module that several generated tools (compiled from the same browser recording of one site) will import. A second agent will WRITE the module + its test by following your plan. Your job is to remove the guesswork before any code exists: decode the recorded data, fix the algorithm, and call out every strict-typing hazard up front. A precise plan is what makes the implementation pass on the first attempt instead of burning verification cycles.
2
+
3
+ ## Input
4
+
5
+ You receive `{ site, url, module, availableDependencies, sources }`:
6
+
7
+ - `module` — `{ path, kind, purpose, exportSignatures, spec, dependsOn }`. The implementer must produce exactly these exports.
8
+ - `sources[]` — recorded requests that ground the behavior: `{ seq, method, url, requestHeaders, requestBody, status, mimeType, responseBody }`. These are the ground truth — decode them, do not guess.
9
+ - `availableDependencies[]` — already-built shared modules this one may import.
10
+
11
+ ## Output
12
+
13
+ Return a concise **Markdown** plan — no JSON, and do not wrap the whole response in a code fence. Use exactly these sections:
14
+
15
+ ### Data shape
16
+ Decode the ACTUAL recorded `sources`. State the precise shape the module operates on and where the target data lives. When the body is an RPC envelope — an anti-XSSI guard (e.g. `)]}'`), length-prefixed frames, `["wrb.fr", "<rpcid>", "<payload>"]` rows, or doubly/triply-encoded JSON strings — give the exact unwrapping steps AND a decoded sample with real indices (e.g. "strip the first line; each frame is `<len>\n<json>`; in the `wrb.fr` row, element [2] is a JSON string → `JSON.parse` it → the hotel name is at `[0][1][3]`, the price at `[0][1][7][0]`"). For a `request-transform`, identify the signing/dynamic param, its position in the recorded URL/body, and the apparent algorithm (HMAC/MD5/CRC32/base64/etc.) inferred from the recording.
17
+
18
+ ### Algorithm
19
+ Step by step, what each export in `exportSignatures` does to turn the recorded input into the required output. Name exact fields and indices. Ground every step in `sources`.
20
+
21
+ ### Typing hazards
22
+ The module is typechecked with `tsc` under `strict` + `noUncheckedIndexedAccess`, as a gate SEPARATE from the test (a passing test still fails the build on a type error). Enumerate the specific spots that yield `T | undefined` — indexed access (`arr[i]`), regex captures (`re.exec(s)` → `m[1]`, `s.match(re)` → `m[1]`), and split results (`s.split(d)[n]`) — and the exact guard or assertion to use at each (`const m = re.exec(s); if (!m?.[1]) return …`, or `m[1]!` when the structure guarantees presence). Be exhaustive: this is the single most common reason implementations fail.
23
+
24
+ ### Test plan
25
+ Which recorded `seq` to load (from `module.sourceSeqs`) and the concrete recorded values to assert — at least 3 meaningful assertions on real data, no tautologies. For a `request-transform`, name the param to strip and re-sign and the exact expected value from the recording.
26
+
27
+ ### Risks
28
+ Ambiguities, multiple plausible interpretations, or anything the recording doesn't fully pin down — each with your best-guess resolution so the implementer isn't blocked.
29
+
30
+ ## Rules
31
+
32
+ 1. Ground everything in the provided `sources`. Decode real values; never invent fields the recording doesn't show.
33
+ 2. No production code — pseudocode, field paths, and exact type-guard snippets only. The implementer writes the module.
34
+ 3. Be specific and concise. Skip generic advice; every line should be something the implementer couldn't trivially infer from the signatures alone.
@@ -0,0 +1,39 @@
1
+ You are the PLANNER for ONE MCP tool that a second agent will COMPILE from a browser recording of a single site. The tool replays the site's API: it takes typed parameters, issues one or more HTTP requests, and parses the response into a structured result. Your job is to remove the guesswork before any code exists — map every parameter to the exact recorded field, fix how each request is constructed and signed, and pin down exactly where the result data lives in the response. A precise plan is what makes the compile pass on the first attempt instead of burning verification cycles.
2
+
3
+ ## Input
4
+
5
+ You receive `{ site, url, tool, sharedContext?, planGuidance?, assignedModules, requests }`:
6
+
7
+ - `tool` — `{ toolName, description, expectedOutput, likelyParams, requestSeqs, dependencySeqs }`. The compiled tool must expose these parameters and produce `expectedOutput`. `likelyParams` are the detector's best guess — confirm or correct each against the recorded requests.
8
+ - `planGuidance?` — present when a global build plan ran first: `{ parserGuidance, paramChecklist, authRecipe, loadBearingSeqs }` for THIS tool. Treat it as prior guidance and reconcile it with the recorded data; if the recording contradicts it, prefer the recording and say so.
9
+ - `assignedModules[]` — verified shared modules this tool MUST import instead of re-implementing: `{ path, kind, importPath, exportSignatures, purpose }`. A `request-transform` module reproduces the site's request signing/construction; a `parser-helper` extracts data from the response. Reference each by its exact `importPath`.
10
+ - `requests[]` — the recorded requests in scope for this tool: `{ seq, method, url, headers, body, status, mimeType, responsePreview, ... }`. These are the ground truth — decode them, do not guess. `responsePreview` is truncated; note where the full body must be read.
11
+
12
+ ## Output
13
+
14
+ Return a concise **Markdown** plan — no JSON, and do not wrap the whole response in a code fence. Use exactly these sections:
15
+
16
+ ### Parameters
17
+ For EACH tool parameter, name the exact recorded field it maps to: the query-string key, JSON body path, header, or path segment in a specific recorded `seq`, with the recorded value as evidence (e.g. "`origin` → query param `from` in seq 12, recorded value `SFO`"). Flag any `likelyParam` that does not appear in the recording (it may be derived, optional, or wrong) and state your resolution. Note defaults where the recording shows one.
18
+
19
+ For EACH parameter that should influence the request, also emit a short **verification anchor**: the recorded `seq`(s) that demonstrate that parameter's effect, and the exact request location it controls — the field name, array index, or position the compiler must reproduce (e.g. "anchor: seq 12 query `from=SFO`, seq 19 query `from=LAX` — controls query key `from`"; for positional/array bodies, give the index). For a parameter that selects among request variants, give the anchor seq for each variant so the compiler wires the parameter to drive the variation rather than hardcoding one variant. The anchor is what lets the compiler verify, before finishing, that the constructed request reproduces the parameter's encoding instead of advertising a parameter it never applies.
20
+
21
+ **Hard rule: a parameter with no recorded anchor must not be exposed.** If you cannot point to at least one recorded `seq` and exact location demonstrating a parameter's effect, do not list it as a tool parameter — note it under Edge cases as dropped-for-lack-of-evidence and why. A narrower tool that does exactly what it advertises beats one exposing a parameter that nothing in the recording can verify.
22
+
23
+ ### Requests
24
+ The request(s) the tool issues, in order: method, URL (with which parts are templated from parameters vs constant), body shape, and required headers. If a value is signed or dynamically constructed and an `assignedModules` `request-transform` covers it, say to call that module by its `importPath` rather than re-deriving the algorithm. If there is no assigned module, describe the construction/signing from the recording. Note dependency requests (`dependencySeqs`) that must run first to mint a token or id, and what they produce.
25
+
26
+ ### Response parsing
27
+ The exact location of the result data: the `seq` whose response carries it, the precise JSON path(s) to the array/object, and the per-item fields to extract for `expectedOutput`. If the body is an RPC envelope (anti-XSSI prefix, length-prefixed frames, doubly-encoded JSON strings), give the exact unwrapping steps. If an `assignedModules` `parser-helper` covers this, say to call it by its `importPath` and what it returns.
28
+
29
+ ### Shared modules
30
+ The verbatim `importPath` of every module in `assignedModules` the tool must import, each with one line on what it provides. If `assignedModules` is empty, write "none".
31
+
32
+ ### Edge cases
33
+ Empty results, optional parameters omitted, pagination, error/zero-result responses, and any value the recording doesn't fully pin down — each with your best-guess resolution so the compiler isn't blocked.
34
+
35
+ ## Rules
36
+
37
+ 1. Ground every mapping in the provided `requests`. Decode real recorded values; do NOT invent fields absent from the recorded data.
38
+ 2. No production code — field paths, exact indices, and the `importPath` to call only. The compiler writes the tool.
39
+ 3. Be specific and concise. Skip generic advice; every line should be something the compiler couldn't trivially infer from the parameter names alone.
package/src/cli.ts CHANGED
@@ -7,7 +7,7 @@ import { parseArgs } from 'node:util';
7
7
  import { IS_COMPILED_BINARY } from './imprint/is-compiled.ts';
8
8
  import type { ProviderName } from './imprint/llm.ts';
9
9
  import { isDebug } from './imprint/log.ts';
10
- import { shutdownTracing, traced } from './imprint/tracing.ts';
10
+ import { shutdownTracing, tracedWithCostRollup } from './imprint/tracing.ts';
11
11
  import { VERSION } from './imprint/version.ts';
12
12
 
13
13
  /** Load .env from the project root (next to src/) if present.
@@ -66,6 +66,7 @@ RUN
66
66
  mcp-server <site> Serve one site's tools as MCP (stdio default).
67
67
  cron <site> Polling daemon for ~/.imprint/<site>/<toolName>/cron.json.
68
68
  playbook <site> Run a playbook directly (debugging).
69
+ audit <site> Exercise every generated tool and score it (≥95% gate).
69
70
 
70
71
  OTHER
71
72
  doctor Check that the environment is set up correctly.
@@ -352,6 +353,35 @@ export const VERB_HELP: Record<string, VerbHelp> = {
352
353
  ],
353
354
  example: 'imprint mcp-server southwest',
354
355
  },
356
+ audit: {
357
+ summary:
358
+ "Drive a headless agent against a site's MCP tools, exercise each one, and compute a deterministic accuracy score. Verdicts come from the agent; the score is computed by imprint.",
359
+ usage: [
360
+ 'imprint audit <site> [--min-score <n>] [--out <path>] [--model <name>] [--timeout <duration>] [--json]',
361
+ ],
362
+ flags: [
363
+ {
364
+ name: '--min-score <n>',
365
+ description: 'Pass threshold as a percentage of gradeable invocations (default 95).',
366
+ },
367
+ {
368
+ name: '--out <path>',
369
+ description:
370
+ 'Where to write the JSON report (default ~/.imprint/<site>/.audit-report.json).',
371
+ },
372
+ {
373
+ name: '--model <name>',
374
+ description: 'Override the auditor model (default Opus via claude-cli).',
375
+ },
376
+ {
377
+ name: '--timeout <duration>',
378
+ description:
379
+ 'Audit-session wall-clock cap. Accepts 20m, 1h, 300s, or plain ms. Default 45m.',
380
+ },
381
+ { name: '--json', description: 'Print the machine-readable report to stdout.' },
382
+ ],
383
+ example: 'imprint audit google-flights --min-score 95',
384
+ },
355
385
  mcp: {
356
386
  summary:
357
387
  'Audit, disable, re-enable, and delete Imprint MCP registrations and stale teach state.',
@@ -916,6 +946,75 @@ async function main(argv: string[]): Promise<number> {
916
946
  return 0;
917
947
  }
918
948
 
949
+ case 'audit': {
950
+ const site = requirePositional(argv, 'audit', 'a <site> argument');
951
+ if (site === null) return 2;
952
+ const { values } = parseArgs({
953
+ args: argv.slice(2),
954
+ options: {
955
+ 'min-score': { type: 'string' },
956
+ out: { type: 'string' },
957
+ model: { type: 'string' },
958
+ timeout: { type: 'string' },
959
+ json: { type: 'boolean' },
960
+ },
961
+ allowPositionals: false,
962
+ });
963
+
964
+ let minScore = 95;
965
+ if (values['min-score'] !== undefined) {
966
+ const parsed = Number(values['min-score']);
967
+ if (!Number.isFinite(parsed) || parsed < 0 || parsed > 100) {
968
+ console.error(
969
+ `error: invalid --min-score "${values['min-score']}"\n→ use a number between 0 and 100`,
970
+ );
971
+ return 2;
972
+ }
973
+ minScore = parsed;
974
+ }
975
+
976
+ let auditTimeoutMs: number | undefined;
977
+ if (values.timeout) {
978
+ auditTimeoutMs = parseDuration(values.timeout) ?? undefined;
979
+ if (auditTimeoutMs === undefined) {
980
+ console.error(
981
+ `error: invalid --timeout "${values.timeout}"\n→ use format: 20m, 1h, 300s, or plain milliseconds`,
982
+ );
983
+ return 2;
984
+ }
985
+ }
986
+
987
+ const { runAudit } = await import('./imprint/audit.ts');
988
+ const { localAuditReportPath } = await import('./imprint/paths.ts');
989
+ const outPath = values.out ?? localAuditReportPath(site);
990
+ const score = await tracedWithCostRollup(
991
+ 'cli.audit',
992
+ 'AGENT',
993
+ {
994
+ 'imprint.site': site,
995
+ 'imprint.min_score': minScore,
996
+ 'imprint.model': values.model ?? 'auto',
997
+ },
998
+ () =>
999
+ runAudit({
1000
+ site,
1001
+ minScore,
1002
+ outPath,
1003
+ model: values.model,
1004
+ timeoutMs: auditTimeoutMs,
1005
+ json: values.json,
1006
+ }),
1007
+ );
1008
+ // Exit codes distinguish the outcomes: 0 pass, 1 fail (fix the code),
1009
+ // 2 inconclusive (the site blocked us), 3 timeout (audit didn't finish).
1010
+ // (if-chain rather than switch: a `case '<word>':` here would be misread as
1011
+ // a CLI verb by the verb/help drift-guard test.)
1012
+ if (score.verdict === 'pass') return 0;
1013
+ if (score.verdict === 'fail') return 1;
1014
+ if (score.verdict === 'timeout') return 3;
1015
+ return 2;
1016
+ }
1017
+
919
1018
  case 'cron': {
920
1019
  const site = requirePositional(argv, 'cron', 'a <site> argument');
921
1020
  if (site === null) return 2;
@@ -1138,7 +1237,7 @@ async function main(argv: string[]): Promise<number> {
1138
1237
 
1139
1238
  try {
1140
1239
  const { teach } = await import('./imprint/teach.ts');
1141
- await traced(
1240
+ await tracedWithCostRollup(
1142
1241
  'cli.teach',
1143
1242
  'AGENT',
1144
1243
  {
@@ -1195,6 +1294,8 @@ async function main(argv: string[]): Promise<number> {
1195
1294
  'example-dir': { type: 'string' },
1196
1295
  'candidate-json': { type: 'string' },
1197
1296
  'shared-context-json': { type: 'string' },
1297
+ 'build-plan-path': { type: 'string' },
1298
+ 'shared-modules-json': { type: 'string' },
1198
1299
  },
1199
1300
  allowPositionals: false,
1200
1301
  });
@@ -1209,17 +1310,23 @@ async function main(argv: string[]): Promise<number> {
1209
1310
  const { ToolCandidateSchema, SharedCompileContextSchema } = await import(
1210
1311
  './imprint/tool-candidates.ts'
1211
1312
  );
1313
+ const { SharedModuleManifestSchema } = await import('./imprint/build-plan.ts');
1212
1314
  const candidate = values['candidate-json']
1213
1315
  ? ToolCandidateSchema.parse(JSON.parse(values['candidate-json']))
1214
1316
  : undefined;
1215
1317
  const sharedContext = values['shared-context-json']
1216
1318
  ? SharedCompileContextSchema.parse(JSON.parse(values['shared-context-json']))
1217
1319
  : undefined;
1320
+ const sharedModules = values['shared-modules-json']
1321
+ ? SharedModuleManifestSchema.parse(JSON.parse(values['shared-modules-json']))
1322
+ : undefined;
1218
1323
  await runCompileMcpServer({
1219
1324
  sessionPath: values['session-path'],
1220
1325
  toolDir,
1221
1326
  candidate,
1222
1327
  sharedContext,
1328
+ buildPlanPath: values['build-plan-path'],
1329
+ sharedModules,
1223
1330
  });
1224
1331
  return 0;
1225
1332
  }
@@ -77,6 +77,9 @@ interface AgentLoopOptions {
77
77
  llm: ToolUseProvider;
78
78
  /** called before each LLM call and tool dispatch with structured progress */
79
79
  onProgress?: (p: AgentProgress) => void;
80
+ /** called after each turn with the full conversation log so far, so callers
81
+ * can flush incrementally (e.g. write .compile-log.json to disk). */
82
+ onConversationUpdate?: (log: ConversationLogEntry[]) => void;
80
83
  /** called when the wall-clock deadline is reached; return ms to extend or null to time out */
81
84
  onDeadlineReached?: OnDeadlineReached;
82
85
  }
@@ -461,6 +464,8 @@ export async function runAgentLoop(opts: AgentLoopOptions): Promise<AgentResult>
461
464
  },
462
465
  );
463
466
 
467
+ opts.onConversationUpdate?.(conversationLog);
468
+
464
469
  if (turnOutcome.action === 'return') return turnOutcome.result;
465
470
 
466
471
  // Loop continues...