imprint-mcp 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +165 -201
- package/examples/discoverandgo/README.md +1 -1
- package/examples/echo/README.md +1 -1
- package/examples/google-flights/README.md +28 -0
- package/examples/google-flights/_shared/batchexecute.ts +63 -0
- package/examples/google-flights/_shared/flights_request.ts +95 -0
- package/examples/google-flights/_shared/package.json +9 -0
- package/examples/google-flights/get_flight_booking_details/index.ts +159 -0
- package/examples/google-flights/get_flight_booking_details/package.json +9 -0
- package/examples/google-flights/get_flight_booking_details/parser.ts +182 -0
- package/examples/google-flights/get_flight_booking_details/playbook.yaml +138 -0
- package/examples/google-flights/get_flight_booking_details/request-transform.ts +86 -0
- package/examples/google-flights/get_flight_booking_details/workflow.json +98 -0
- package/examples/google-flights/get_flight_calendar_prices/index.ts +131 -0
- package/examples/google-flights/get_flight_calendar_prices/package.json +9 -0
- package/examples/google-flights/get_flight_calendar_prices/parser.ts +86 -0
- package/examples/google-flights/get_flight_calendar_prices/playbook.yaml +97 -0
- package/examples/google-flights/get_flight_calendar_prices/request-transform.ts +31 -0
- package/examples/google-flights/get_flight_calendar_prices/workflow.json +76 -0
- package/examples/google-flights/lookup_airport/index.ts +101 -0
- package/examples/google-flights/lookup_airport/package.json +9 -0
- package/examples/google-flights/lookup_airport/parser.ts +66 -0
- package/examples/google-flights/lookup_airport/playbook.yaml +47 -0
- package/examples/google-flights/lookup_airport/request-transform.ts +20 -0
- package/examples/google-flights/lookup_airport/workflow.json +57 -0
- package/examples/google-flights/search_flights/index.ts +219 -0
- package/examples/google-flights/search_flights/package.json +9 -0
- package/examples/google-flights/search_flights/parser.ts +169 -0
- package/examples/google-flights/search_flights/playbook.yaml +184 -0
- package/examples/google-flights/search_flights/request-transform.ts +119 -0
- package/examples/google-flights/search_flights/workflow.json +143 -0
- package/examples/google-hotels/README.md +29 -0
- package/examples/google-hotels/_shared/batchexecute.ts +73 -0
- package/examples/google-hotels/_shared/freq.ts +158 -0
- package/examples/google-hotels/_shared/package.json +9 -0
- package/examples/google-hotels/autocomplete_hotel_location/index.ts +80 -0
- package/examples/google-hotels/autocomplete_hotel_location/package.json +9 -0
- package/examples/google-hotels/autocomplete_hotel_location/parser.ts +71 -0
- package/examples/google-hotels/autocomplete_hotel_location/playbook.yaml +36 -0
- package/examples/google-hotels/autocomplete_hotel_location/request-transform.ts +37 -0
- package/examples/google-hotels/autocomplete_hotel_location/workflow.json +36 -0
- package/examples/google-hotels/get_hotel_booking_options/index.ts +143 -0
- package/examples/google-hotels/get_hotel_booking_options/package.json +9 -0
- package/examples/google-hotels/get_hotel_booking_options/parser.ts +271 -0
- package/examples/google-hotels/get_hotel_booking_options/playbook.yaml +154 -0
- package/examples/google-hotels/get_hotel_booking_options/request-transform.ts +154 -0
- package/examples/google-hotels/get_hotel_booking_options/workflow.json +84 -0
- package/examples/google-hotels/get_hotel_reviews/index.ts +81 -0
- package/examples/google-hotels/get_hotel_reviews/package.json +9 -0
- package/examples/google-hotels/get_hotel_reviews/parser.ts +128 -0
- package/examples/google-hotels/get_hotel_reviews/playbook.yaml +64 -0
- package/examples/google-hotels/get_hotel_reviews/request-transform.ts +42 -0
- package/examples/google-hotels/get_hotel_reviews/workflow.json +37 -0
- package/examples/google-hotels/search_hotels/index.ts +207 -0
- package/examples/google-hotels/search_hotels/package.json +9 -0
- package/examples/google-hotels/search_hotels/parser.ts +260 -0
- package/examples/google-hotels/search_hotels/playbook.yaml +87 -0
- package/examples/google-hotels/search_hotels/request-transform.ts +197 -0
- package/examples/google-hotels/search_hotels/workflow.json +127 -0
- package/package.json +3 -2
- package/prompts/audit-agent.md +71 -0
- package/prompts/build-planning.md +74 -0
- package/prompts/compile-agent.md +131 -27
- package/prompts/prereq-builder.md +64 -0
- package/prompts/prereq-planner.md +34 -0
- package/prompts/tool-planning.md +39 -0
- package/src/cli.ts +109 -2
- package/src/imprint/agent.ts +5 -0
- package/src/imprint/audit.ts +996 -0
- package/src/imprint/backend-ladder.ts +1214 -184
- package/src/imprint/build-plan.ts +1051 -0
- package/src/imprint/cdp-browser-fetch.ts +589 -0
- package/src/imprint/cdp-jar-cache.ts +320 -0
- package/src/imprint/chromium.ts +135 -0
- package/src/imprint/claude-cli-compile.ts +125 -25
- package/src/imprint/codex-cli-compile.ts +26 -23
- package/src/imprint/compile-agent-types.ts +38 -0
- package/src/imprint/compile-agent.ts +63 -25
- package/src/imprint/compile-tools.ts +1656 -64
- package/src/imprint/compile.ts +13 -1
- package/src/imprint/concurrency.ts +87 -0
- package/src/imprint/cron.ts +1 -0
- package/src/imprint/doctor.ts +39 -0
- package/src/imprint/freeform-redact.ts +5 -4
- package/src/imprint/integrations.ts +2 -2
- package/src/imprint/llm.ts +56 -8
- package/src/imprint/mcp-compile-server.ts +43 -10
- package/src/imprint/mcp-maintenance.ts +9 -101
- package/src/imprint/mcp-server.ts +73 -7
- package/src/imprint/multi-progress.ts +7 -2
- package/src/imprint/param-grounding.ts +367 -0
- package/src/imprint/paths.ts +29 -0
- package/src/imprint/playbook-runner.ts +101 -40
- package/src/imprint/prereq-builder.ts +651 -0
- package/src/imprint/probe-backends.ts +6 -3
- package/src/imprint/record.ts +10 -1
- package/src/imprint/redact.ts +30 -2
- package/src/imprint/replay-capture.ts +19 -18
- package/src/imprint/runtime.ts +19 -10
- package/src/imprint/session-diff.ts +79 -2
- package/src/imprint/session-merge.ts +9 -5
- package/src/imprint/stealth-chromium.ts +81 -0
- package/src/imprint/stealth-fetch.ts +309 -29
- package/src/imprint/stealth-token-cache.ts +88 -0
- package/src/imprint/teach-plan.ts +251 -0
- package/src/imprint/teach-state.ts +10 -0
- package/src/imprint/teach.ts +456 -142
- package/src/imprint/tool-candidates.ts +72 -14
- package/src/imprint/tool-plan.ts +313 -0
- package/src/imprint/tracing.ts +135 -6
- package/src/imprint/types.ts +61 -3
- package/examples/google-flights/search_google_flights/index.ts +0 -101
- package/examples/google-flights/search_google_flights/parser.test.ts +0 -140
- package/examples/google-flights/search_google_flights/parser.ts +0 -189
- package/examples/google-flights/search_google_flights/playbook.yaml +0 -130
- package/examples/google-flights/search_google_flights/workflow.json +0 -48
- package/examples/google-hotels/search_google_hotels/index.ts +0 -194
- package/examples/google-hotels/search_google_hotels/parser.test.ts +0 -168
- package/examples/google-hotels/search_google_hotels/parser.ts +0 -330
- package/examples/google-hotels/search_google_hotels/playbook.yaml +0 -125
- package/examples/google-hotels/search_google_hotels/workflow.json +0 -111
- package/examples/namecheap-domains/search_namecheap_domains/index.ts +0 -144
- package/examples/namecheap-domains/search_namecheap_domains/parser.ts +0 -380
- package/examples/namecheap-domains/search_namecheap_domains/playbook.yaml +0 -50
- package/examples/namecheap-domains/search_namecheap_domains/request-transform.ts +0 -136
- package/examples/namecheap-domains/search_namecheap_domains/workflow.json +0 -97
package/src/imprint/compile.ts
CHANGED
|
@@ -19,6 +19,7 @@ import {
|
|
|
19
19
|
import { dirname, join as pathJoin } from 'node:path';
|
|
20
20
|
import type { OnDeadlineReached } from './agent.ts';
|
|
21
21
|
import { inferAppApiHosts } from './app-api-hosts.ts';
|
|
22
|
+
import type { SharedModuleManifestEntry } from './build-plan.ts';
|
|
22
23
|
import { type CompileAgentProgress, compileAgent } from './compile-agent.ts';
|
|
23
24
|
import { isSameRegistrableDomain, registrableDomain } from './etld.ts';
|
|
24
25
|
import { type LLMOptions, extractJsonArray, resolveProvider } from './llm.ts';
|
|
@@ -82,6 +83,14 @@ interface GenerateOptions extends CompileOptions {
|
|
|
82
83
|
classifications?: ClassifiedValue[];
|
|
83
84
|
/** Credential values extracted during teach, passed to integration tests via env var. */
|
|
84
85
|
teachCredentials?: { site: string; values: Record<string, string> };
|
|
86
|
+
/** Absolute path to the multi-tool build plan sidecar (.build-plan.json). */
|
|
87
|
+
buildPlanPath?: string;
|
|
88
|
+
/** Shared-module build manifest for this site (verified flags). */
|
|
89
|
+
sharedModules?: SharedModuleManifestEntry[];
|
|
90
|
+
/** Per-tool implementation plan (param→field mapping, request construction,
|
|
91
|
+
* response parsing, shared-module imports). Injected into the agent's initial
|
|
92
|
+
* message so the compile follows it. */
|
|
93
|
+
toolPlan?: string;
|
|
85
94
|
}
|
|
86
95
|
|
|
87
96
|
interface GenerateResult {
|
|
@@ -122,6 +131,9 @@ export async function generate(opts: GenerateOptions): Promise<GenerateResult> {
|
|
|
122
131
|
sharedContext: opts.sharedContext,
|
|
123
132
|
classifications: opts.classifications,
|
|
124
133
|
teachCredentials: opts.teachCredentials,
|
|
134
|
+
buildPlanPath: opts.buildPlanPath,
|
|
135
|
+
sharedModules: opts.sharedModules,
|
|
136
|
+
toolPlan: opts.toolPlan,
|
|
125
137
|
});
|
|
126
138
|
|
|
127
139
|
setSpanAttributes(span, {
|
|
@@ -274,7 +286,7 @@ const TRIAGE_RESOURCE_TYPES = new Set(['XHR', 'Fetch', 'Document']);
|
|
|
274
286
|
const HEADER_TRUNCATE_LIMIT = 200;
|
|
275
287
|
// Per-request body cap for triage. Triage only needs enough body to distinguish
|
|
276
288
|
// data-bearing POSTs (search/booking) from telemetry; full bodies on a busy
|
|
277
|
-
// site can total >1MB and blow the 200K-token cap on `claude-opus-4-
|
|
289
|
+
// site can total >1MB and blow the 200K-token cap on `claude-opus-4-8`.
|
|
278
290
|
const TRIAGE_BODY_LIMIT = 500;
|
|
279
291
|
|
|
280
292
|
export interface TriageResult {
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Bounded-concurrency fan-out helpers shared across the teach pipeline.
|
|
3
|
+
*
|
|
4
|
+
* Lives in its own module (rather than teach.ts) so leaf modules like
|
|
5
|
+
* teach-plan.ts can reuse it without importing teach.ts, which would create an
|
|
6
|
+
* import cycle (teach.ts → teach-plan.ts → teach.ts). teach.ts re-exports both
|
|
7
|
+
* for backwards compatibility with existing callers + tests.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
/** Run `fn` over `items` with at most `concurrency` in flight, preserving input
|
|
11
|
+
* order in the result. Throws the first error encountered (after in-flight work
|
|
12
|
+
* settles); use mapLimitSettled when you need per-item success/failure. */
|
|
13
|
+
export async function mapLimit<T, R>(
|
|
14
|
+
items: T[],
|
|
15
|
+
concurrency: number,
|
|
16
|
+
fn: (item: T) => Promise<R>,
|
|
17
|
+
): Promise<R[]> {
|
|
18
|
+
const results = new Array<R>(items.length);
|
|
19
|
+
let next = 0;
|
|
20
|
+
let firstError: unknown;
|
|
21
|
+
const workers = Array.from({ length: Math.min(concurrency, items.length) }, async () => {
|
|
22
|
+
while (next < items.length && firstError === undefined) {
|
|
23
|
+
const index = next++;
|
|
24
|
+
const item = items[index];
|
|
25
|
+
if (item === undefined) continue;
|
|
26
|
+
try {
|
|
27
|
+
results[index] = await fn(item);
|
|
28
|
+
} catch (err) {
|
|
29
|
+
firstError ??= err;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
});
|
|
33
|
+
await Promise.allSettled(workers);
|
|
34
|
+
if (firstError !== undefined) throw firstError;
|
|
35
|
+
return results;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
type SettledResult<R> = { ok: true; value: R } | { ok: false; error: unknown };
|
|
39
|
+
|
|
40
|
+
/** Like mapLimit, but never throws: each item resolves to a tagged
|
|
41
|
+
* success/failure entry, preserving input order. */
|
|
42
|
+
export async function mapLimitSettled<T, R>(
|
|
43
|
+
items: T[],
|
|
44
|
+
concurrency: number,
|
|
45
|
+
fn: (item: T) => Promise<R>,
|
|
46
|
+
): Promise<SettledResult<R>[]> {
|
|
47
|
+
const results = new Array<SettledResult<R>>(items.length);
|
|
48
|
+
let next = 0;
|
|
49
|
+
const workers = Array.from({ length: Math.min(concurrency, items.length) }, async () => {
|
|
50
|
+
while (next < items.length) {
|
|
51
|
+
const index = next++;
|
|
52
|
+
const item = items[index];
|
|
53
|
+
if (item === undefined) continue;
|
|
54
|
+
try {
|
|
55
|
+
results[index] = { ok: true, value: await fn(item) };
|
|
56
|
+
} catch (err) {
|
|
57
|
+
results[index] = { ok: false, error: err };
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
});
|
|
61
|
+
await Promise.allSettled(workers);
|
|
62
|
+
return results;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/** Error thrown by withTimeout when the deadline elapses before the work settles.
|
|
66
|
+
* A distinct class lets callers tell a timeout apart from a genuine failure. */
|
|
67
|
+
export class TimeoutError extends Error {
|
|
68
|
+
constructor(label: string, ms: number) {
|
|
69
|
+
super(`${label} exceeded ${Math.round(ms / 1000)}s timeout`);
|
|
70
|
+
this.name = 'TimeoutError';
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/** Race a promise against a timeout. The underlying work (e.g. a CLI child) is
|
|
75
|
+
* NOT cancelled — the caller just stops awaiting it and decides how to degrade.
|
|
76
|
+
* Throws TimeoutError on timeout. */
|
|
77
|
+
export async function withTimeout<T>(work: Promise<T>, ms: number, label: string): Promise<T> {
|
|
78
|
+
let timer: ReturnType<typeof setTimeout> | undefined;
|
|
79
|
+
const timeout = new Promise<never>((_, reject) => {
|
|
80
|
+
timer = setTimeout(() => reject(new TimeoutError(label, ms)), ms);
|
|
81
|
+
});
|
|
82
|
+
try {
|
|
83
|
+
return await Promise.race([work, timeout]);
|
|
84
|
+
} finally {
|
|
85
|
+
if (timer) clearTimeout(timer);
|
|
86
|
+
}
|
|
87
|
+
}
|
package/src/imprint/cron.ts
CHANGED
|
@@ -242,6 +242,7 @@ async function runCronImpl(opts: RunCronOptions): Promise<void> {
|
|
|
242
242
|
if (
|
|
243
243
|
ladder.includes('fetch') ||
|
|
244
244
|
ladder.includes('fetch-bootstrap') ||
|
|
245
|
+
ladder.includes('cdp-replay') ||
|
|
245
246
|
ladder.includes('stealth-fetch')
|
|
246
247
|
) {
|
|
247
248
|
const validator = buildZodValidator(tool.workflow.parameters);
|
package/src/imprint/doctor.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
/** `imprint doctor` — check that the environment can actually run imprint.
|
|
2
2
|
* Reports pass/fail per prerequisite plus a one-line fix when failed. */
|
|
3
3
|
|
|
4
|
+
import { spawnSync } from 'node:child_process';
|
|
4
5
|
import { existsSync, readFileSync, readdirSync } from 'node:fs';
|
|
5
6
|
import { homedir } from 'node:os';
|
|
6
7
|
import { join as pathJoin } from 'node:path';
|
|
@@ -20,6 +21,7 @@ export function doctor(): CheckResult[] {
|
|
|
20
21
|
checkBun(),
|
|
21
22
|
checkChromium(),
|
|
22
23
|
checkPlaywrightChromium(),
|
|
24
|
+
checkVirtualDisplay(),
|
|
23
25
|
checkLLMProvider(),
|
|
24
26
|
checkPushOptional(),
|
|
25
27
|
checkClaudeCode(),
|
|
@@ -87,6 +89,43 @@ function checkPlaywrightChromium(): CheckResult {
|
|
|
87
89
|
};
|
|
88
90
|
}
|
|
89
91
|
|
|
92
|
+
function hasXvfbBinary(): boolean {
|
|
93
|
+
try {
|
|
94
|
+
return spawnSync('sh', ['-c', 'command -v Xvfb'], { stdio: 'ignore' }).status === 0;
|
|
95
|
+
} catch {
|
|
96
|
+
return false;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/** The trusted-browser replay (playbook rung's cdp-browser transport) runs Chrome
|
|
101
|
+
* HEADLESS by default and needs NO display — the `HeadlessChrome` UA token is
|
|
102
|
+
* stripped so anti-bot services don't edge-block it. A display only matters as a
|
|
103
|
+
* fallback on a GPU-less Linux host, where headless WebGL reports SwiftShader and
|
|
104
|
+
* the replay must run HEADED under Xvfb (launchChromium auto-starts it when a
|
|
105
|
+
* headed launch finds no `$DISPLAY`). macOS/Windows need nothing. Advisory only. */
|
|
106
|
+
function checkVirtualDisplay(): CheckResult {
|
|
107
|
+
const name = 'Display (headed replay)';
|
|
108
|
+
if (process.platform !== 'linux') {
|
|
109
|
+
return { name, ok: true, detail: `${process.platform}: native window server (no Xvfb needed)` };
|
|
110
|
+
}
|
|
111
|
+
const display = process.env.DISPLAY;
|
|
112
|
+
if (display) return { name, ok: true, detail: `$DISPLAY=${display}` };
|
|
113
|
+
if (hasXvfbBinary()) {
|
|
114
|
+
return {
|
|
115
|
+
name,
|
|
116
|
+
ok: true,
|
|
117
|
+
detail: 'no $DISPLAY; Xvfb present — headed-replay fallback available for GPU-less hosts',
|
|
118
|
+
};
|
|
119
|
+
}
|
|
120
|
+
return {
|
|
121
|
+
name,
|
|
122
|
+
ok: true, // advisory — default replay is headless; Xvfb is only a GPU-less fallback
|
|
123
|
+
detail:
|
|
124
|
+
'Linux, no $DISPLAY and no Xvfb — default replay is headless (fine); install Xvfb only if a GPU-less host gets bot-flagged',
|
|
125
|
+
fix: 'GPU-less host bot-flagged? install the headed-replay fallback: apt-get install xvfb (or export DISPLAY=:0)',
|
|
126
|
+
};
|
|
127
|
+
}
|
|
128
|
+
|
|
90
129
|
function checkLLMProvider(): CheckResult {
|
|
91
130
|
const statuses = getProviderStatuses();
|
|
92
131
|
const detected = statuses.filter((s) => s.detected);
|
|
@@ -73,10 +73,11 @@ const FREEFORM_POLICIES: PolicyName[] = [
|
|
|
73
73
|
Policies.PGP_PRIVATE_KEY,
|
|
74
74
|
Policies.PASSWORD_ASSIGNMENT,
|
|
75
75
|
Policies.ENVIRONMENT_VARIABLE_SECRET,
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
76
|
+
// NOTE: the GENERIC_* catch-alls (GENERIC_PASSWORD/TOKEN/CREDENTIAL/SECRET) are
|
|
77
|
+
// intentionally omitted — they match on value shape alone and fire on benign
|
|
78
|
+
// data (e.g. `id=1234567890`), corrupting/over-redacting structured payloads.
|
|
79
|
+
// Real secrets are still covered by the keyword-anchored and specific policies
|
|
80
|
+
// above and below (PASSWORD_ASSIGNMENT, OAUTH_*, private keys, cloud tokens, PII).
|
|
80
81
|
Policies.OAUTH_CLIENT_SECRET,
|
|
81
82
|
Policies.OAUTH_REFRESH_TOKEN,
|
|
82
83
|
Policies.OAUTH_ACCESS_TOKEN,
|
|
@@ -87,7 +87,7 @@ export function generatePasteSnippet(opts: {
|
|
|
87
87
|
|
|
88
88
|
switch (platform) {
|
|
89
89
|
case 'claude-code':
|
|
90
|
-
return `Add the ${toolName} tool: run \`${shellCmd}\` to register ${descLower}. Parameters: ${paramList}. The backend ladder handles browser/API state and bot detection automatically (fetch → gated fetch-bootstrap → stealth-fetch → playbook).`;
|
|
90
|
+
return `Add the ${toolName} tool: run \`${shellCmd}\` to register ${descLower}. Parameters: ${paramList}. The backend ladder handles browser/API state and bot detection automatically (fetch → gated fetch-bootstrap → cdp-replay → stealth-fetch → playbook).`;
|
|
91
91
|
|
|
92
92
|
case 'codex':
|
|
93
93
|
return `Add the ${toolName} tool: run \`${shellCmd}\` to register ${descLower}. Parameters: ${paramList}.`;
|
|
@@ -352,7 +352,7 @@ ${yamlStringify(p, { lineWidth: 0 }).trim()}
|
|
|
352
352
|
// Backend ladder explanation.
|
|
353
353
|
const backendBlock = `## Backend Ladder
|
|
354
354
|
|
|
355
|
-
The MCP server automatically escalates from fetch API replay to gated fetch-bootstrap when browser-minted state is declared, then stealth-fetch for bot-defense state, then playbook for full DOM replay.
|
|
355
|
+
The MCP server automatically escalates from fetch API replay to gated fetch-bootstrap when browser-minted state is declared, then cdp-replay (API requests run inside a live trusted Chrome so a protected POST refreshes its anti-bot token between calls), then stealth-fetch for bot-defense state, then playbook for full DOM replay.
|
|
356
356
|
Bot detection is handled transparently.`;
|
|
357
357
|
|
|
358
358
|
// Scheduling block (optional).
|
package/src/imprint/llm.ts
CHANGED
|
@@ -6,6 +6,7 @@ import {
|
|
|
6
6
|
llmSpanAttributes,
|
|
7
7
|
resolveTraceTokenCount,
|
|
8
8
|
setSpanAttributes,
|
|
9
|
+
totalPromptTokens,
|
|
9
10
|
traceLlmIoEnabled,
|
|
10
11
|
traceLlmMessages,
|
|
11
12
|
traced,
|
|
@@ -17,6 +18,16 @@ interface AnalyzeResult {
|
|
|
17
18
|
text: string;
|
|
18
19
|
inputTokens: number | null;
|
|
19
20
|
outputTokens: number | null;
|
|
21
|
+
/**
|
|
22
|
+
* Prompt-cache token counts, when the provider reports them. `inputTokens` is
|
|
23
|
+
* the *uncached* input only (the Anthropic/CLI `usage.input_tokens`); the bulk
|
|
24
|
+
* of a cache-hit call lives here. Threaded through so `llm.analyze` cost is
|
|
25
|
+
* cache-aware (cache reads bill at 0.1×, writes at 1.25×) instead of charging
|
|
26
|
+
* the whole prompt at the full input rate. Null/undefined for providers that
|
|
27
|
+
* don't expose usage (codex-cli, cursor-cli).
|
|
28
|
+
*/
|
|
29
|
+
cacheReadInputTokens?: number | null;
|
|
30
|
+
cacheCreationInputTokens?: number | null;
|
|
20
31
|
durationMs: number;
|
|
21
32
|
stopReason: string | null;
|
|
22
33
|
}
|
|
@@ -129,6 +140,8 @@ class AnthropicApiProvider implements LLMProvider {
|
|
|
129
140
|
text,
|
|
130
141
|
inputTokens: response.usage.input_tokens,
|
|
131
142
|
outputTokens: response.usage.output_tokens,
|
|
143
|
+
cacheReadInputTokens: response.usage.cache_read_input_tokens ?? null,
|
|
144
|
+
cacheCreationInputTokens: response.usage.cache_creation_input_tokens ?? null,
|
|
132
145
|
durationMs: Date.now() - t0,
|
|
133
146
|
stopReason: response.stop_reason ?? null,
|
|
134
147
|
};
|
|
@@ -256,7 +269,15 @@ class ClaudeCliProvider implements LLMProvider {
|
|
|
256
269
|
);
|
|
257
270
|
}
|
|
258
271
|
|
|
259
|
-
let parsed: {
|
|
272
|
+
let parsed: {
|
|
273
|
+
result?: string;
|
|
274
|
+
usage?: {
|
|
275
|
+
input_tokens?: number;
|
|
276
|
+
output_tokens?: number;
|
|
277
|
+
cache_read_input_tokens?: number;
|
|
278
|
+
cache_creation_input_tokens?: number;
|
|
279
|
+
};
|
|
280
|
+
};
|
|
260
281
|
try {
|
|
261
282
|
parsed = JSON.parse(stdout);
|
|
262
283
|
} catch (parseErr) {
|
|
@@ -273,6 +294,8 @@ class ClaudeCliProvider implements LLMProvider {
|
|
|
273
294
|
text: parsed.result,
|
|
274
295
|
inputTokens: parsed.usage?.input_tokens ?? null,
|
|
275
296
|
outputTokens: parsed.usage?.output_tokens ?? null,
|
|
297
|
+
cacheReadInputTokens: parsed.usage?.cache_read_input_tokens ?? null,
|
|
298
|
+
cacheCreationInputTokens: parsed.usage?.cache_creation_input_tokens ?? null,
|
|
276
299
|
durationMs: Date.now() - t0,
|
|
277
300
|
stopReason: null,
|
|
278
301
|
};
|
|
@@ -437,7 +460,20 @@ async function traceAnalyze(
|
|
|
437
460
|
},
|
|
438
461
|
async (span) => {
|
|
439
462
|
const result = await fn();
|
|
440
|
-
|
|
463
|
+
// Providers report `inputTokens` as the *uncached* input only; the cached
|
|
464
|
+
// portion lives in the cache fields. `llmCostAttributes` expects the TOTAL
|
|
465
|
+
// prompt tokens (it derives uncached = total − cacheRead − cacheWrite), so
|
|
466
|
+
// sum them here. A real total is also large enough to clear the
|
|
467
|
+
// resolveTraceTokenCount sanity check, so cache-hit calls stop falling back
|
|
468
|
+
// to the chars/4 estimate.
|
|
469
|
+
const cacheReadTokens = result.cacheReadInputTokens ?? undefined;
|
|
470
|
+
const cacheWriteTokens = result.cacheCreationInputTokens ?? undefined;
|
|
471
|
+
const totalInputTokens = totalPromptTokens(
|
|
472
|
+
result.inputTokens,
|
|
473
|
+
cacheReadTokens,
|
|
474
|
+
cacheWriteTokens,
|
|
475
|
+
);
|
|
476
|
+
const inputTokens = resolveTraceTokenCount(totalInputTokens, details?.inputText);
|
|
441
477
|
const outputTokens = resolveTraceTokenCount(result.outputTokens, result.text);
|
|
442
478
|
setSpanAttributes(span, {
|
|
443
479
|
...llmSpanAttributes({
|
|
@@ -445,6 +481,8 @@ async function traceAnalyze(
|
|
|
445
481
|
model,
|
|
446
482
|
inputTokens: inputTokens.tokens,
|
|
447
483
|
outputTokens: outputTokens.tokens,
|
|
484
|
+
cacheReadTokens,
|
|
485
|
+
cacheWriteTokens,
|
|
448
486
|
tokenCountsEstimated:
|
|
449
487
|
inputTokens.source === 'estimated' || outputTokens.source === 'estimated',
|
|
450
488
|
inputTokenSource: inputTokens.source,
|
|
@@ -512,12 +550,20 @@ async function traceMessageWithTools(
|
|
|
512
550
|
return `[${b.type}]`;
|
|
513
551
|
})
|
|
514
552
|
.join('\n');
|
|
553
|
+
const cacheReadTokens = response.usage.cache_read_input_tokens ?? undefined;
|
|
554
|
+
const cacheWriteTokens = response.usage.cache_creation_input_tokens ?? undefined;
|
|
515
555
|
setSpanAttributes(span, {
|
|
516
556
|
...llmSpanAttributes({
|
|
517
557
|
provider,
|
|
518
558
|
model,
|
|
519
|
-
inputTokens:
|
|
559
|
+
inputTokens: totalPromptTokens(
|
|
560
|
+
response.usage.input_tokens,
|
|
561
|
+
cacheReadTokens,
|
|
562
|
+
cacheWriteTokens,
|
|
563
|
+
),
|
|
520
564
|
outputTokens: response.usage.output_tokens,
|
|
565
|
+
cacheReadTokens,
|
|
566
|
+
cacheWriteTokens,
|
|
521
567
|
stopReason: response.stop_reason,
|
|
522
568
|
outputMessages: captureIo
|
|
523
569
|
? traceLlmMessages([{ role: 'assistant', content: outputText }])
|
|
@@ -824,7 +870,7 @@ export function detectTeachProvider(): ProviderName {
|
|
|
824
870
|
}
|
|
825
871
|
|
|
826
872
|
function createProvider(name: ProviderName, opts: LLMOptions = {}): LLMProvider {
|
|
827
|
-
const model = opts.model ?? process.env.ANTHROPIC_MODEL ?? 'claude-opus-4-
|
|
873
|
+
const model = opts.model ?? process.env.ANTHROPIC_MODEL ?? 'claude-opus-4-8';
|
|
828
874
|
const temperature = opts.temperature ?? 0;
|
|
829
875
|
const maxTokens = opts.maxTokens ?? 8192;
|
|
830
876
|
|
|
@@ -865,11 +911,11 @@ export function preferredAgentModel(provider: ProviderName): string {
|
|
|
865
911
|
switch (provider) {
|
|
866
912
|
case 'anthropic-api':
|
|
867
913
|
case 'claude-cli':
|
|
868
|
-
return 'claude-opus-4-
|
|
914
|
+
return 'claude-opus-4-8';
|
|
869
915
|
case 'codex-cli':
|
|
870
916
|
return 'gpt-5.5';
|
|
871
917
|
case 'cursor-cli':
|
|
872
|
-
return 'claude-opus-4-
|
|
918
|
+
return 'claude-opus-4-8'; // best-effort; cursor passes through
|
|
873
919
|
}
|
|
874
920
|
}
|
|
875
921
|
|
|
@@ -883,7 +929,8 @@ export function availableModelsForProvider(provider: ProviderName): ModelOption[
|
|
|
883
929
|
case 'anthropic-api':
|
|
884
930
|
case 'claude-cli':
|
|
885
931
|
return [
|
|
886
|
-
{ model: 'claude-opus-4-
|
|
932
|
+
{ model: 'claude-opus-4-8', isDefault: true },
|
|
933
|
+
{ model: 'claude-opus-4-7', isDefault: false },
|
|
887
934
|
{ model: 'claude-sonnet-4-6', isDefault: false },
|
|
888
935
|
{ model: 'claude-haiku-4-5', isDefault: false },
|
|
889
936
|
{ model: 'claude-opus-4-6', isDefault: false },
|
|
@@ -908,7 +955,8 @@ export function availableModelsForProvider(provider: ProviderName): ModelOption[
|
|
|
908
955
|
];
|
|
909
956
|
case 'cursor-cli':
|
|
910
957
|
return [
|
|
911
|
-
{ model: 'claude-opus-4-
|
|
958
|
+
{ model: 'claude-opus-4-8', isDefault: true },
|
|
959
|
+
{ model: 'claude-opus-4-7', isDefault: false },
|
|
912
960
|
{ model: 'claude-sonnet-4-6', isDefault: false },
|
|
913
961
|
{ model: 'claude-haiku-4-5', isDefault: false },
|
|
914
962
|
{ model: 'gpt-5.5', isDefault: false },
|
|
@@ -24,7 +24,13 @@ import {
|
|
|
24
24
|
ListToolsRequestSchema,
|
|
25
25
|
type Tool,
|
|
26
26
|
} from '@modelcontextprotocol/sdk/types.js';
|
|
27
|
-
import {
|
|
27
|
+
import { type SharedModuleManifestEntry, resolvePlanSliceFromFile } from './build-plan.ts';
|
|
28
|
+
import {
|
|
29
|
+
applyLiveVerification,
|
|
30
|
+
applyParamVerification,
|
|
31
|
+
buildCompileTools,
|
|
32
|
+
externalVerification,
|
|
33
|
+
} from './compile-tools.ts';
|
|
28
34
|
import { loadJsonFile } from './load-json.ts';
|
|
29
35
|
import { createLog } from './log.ts';
|
|
30
36
|
import { redactSession } from './redact.ts';
|
|
@@ -43,6 +49,10 @@ interface RunCompileMcpServerOptions {
|
|
|
43
49
|
maxVerificationCycles?: number;
|
|
44
50
|
candidate?: ToolCandidate;
|
|
45
51
|
sharedContext?: SharedCompileContext;
|
|
52
|
+
/** Absolute path to the multi-tool build plan sidecar (.build-plan.json). */
|
|
53
|
+
buildPlanPath?: string;
|
|
54
|
+
/** Shared-module build manifest for this site (verified flags). */
|
|
55
|
+
sharedModules?: SharedModuleManifestEntry[];
|
|
46
56
|
}
|
|
47
57
|
|
|
48
58
|
const DONE_SENTINEL = '.compile-done.json';
|
|
@@ -67,12 +77,24 @@ export async function runCompileMcpServer(opts: RunCompileMcpServerOptions): Pro
|
|
|
67
77
|
session = redactSession(session).session;
|
|
68
78
|
}
|
|
69
79
|
|
|
70
|
-
// Build the
|
|
80
|
+
// Build the read/write tools (same as the in-process loop). When a build
|
|
81
|
+
// plan is present, buildCompileTools also exposes read_build_plan.
|
|
71
82
|
const compileTools = buildCompileTools(session, opts.toolDir, opts.sessionPath, {
|
|
72
83
|
candidate: opts.candidate,
|
|
73
84
|
sharedContext: opts.sharedContext,
|
|
85
|
+
buildPlanPath: opts.buildPlanPath,
|
|
86
|
+
sharedModules: opts.sharedModules,
|
|
74
87
|
});
|
|
75
88
|
|
|
89
|
+
// Resolve the shared modules + producer→consumer token contracts the plan
|
|
90
|
+
// assigned this tool, so verification can assert modules are imported and
|
|
91
|
+
// require a chained test for each producer-sourced token param.
|
|
92
|
+
const { assignedSharedModules, tokenParams, emittedTokens } = resolvePlanSliceFromFile(
|
|
93
|
+
opts.buildPlanPath,
|
|
94
|
+
opts.candidate?.toolName,
|
|
95
|
+
opts.sharedModules,
|
|
96
|
+
);
|
|
97
|
+
|
|
76
98
|
// The custom done/give_up tools live alongside in MCP space.
|
|
77
99
|
const doneTool: Tool = {
|
|
78
100
|
name: 'done',
|
|
@@ -136,25 +158,36 @@ export async function runCompileMcpServer(opts: RunCompileMcpServerOptions): Pro
|
|
|
136
158
|
if (name === 'done') {
|
|
137
159
|
const summary = (args as { summary?: string }).summary ?? 'Task completed';
|
|
138
160
|
log(`done() called: ${summary}`);
|
|
139
|
-
const { failures, warnings } =
|
|
140
|
-
opts.toolDir,
|
|
141
|
-
session,
|
|
142
|
-
opts.sessionPath,
|
|
143
|
-
{
|
|
161
|
+
const { failures, warnings, paramVerification, liveVerification } =
|
|
162
|
+
await externalVerification(opts.toolDir, session, opts.sessionPath, {
|
|
144
163
|
expectedToolName: opts.candidate?.toolName,
|
|
145
164
|
likelyParams: opts.candidate?.likelyParams,
|
|
146
165
|
candidateRequestSeqs: opts.candidate?.requestSeqs,
|
|
147
|
-
|
|
148
|
-
|
|
166
|
+
// Widen Fix B's variation pool to dependency requests so a token that
|
|
167
|
+
// varies only across them and is frozen as a literal in the tool's
|
|
168
|
+
// request is caught (the cross-request session-token leak case).
|
|
169
|
+
dependencyRequestSeqs: opts.candidate?.dependencySeqs,
|
|
170
|
+
assignedSharedModules,
|
|
171
|
+
tokenParams,
|
|
172
|
+
emittedTokens,
|
|
173
|
+
});
|
|
149
174
|
if (warnings.length > 0) {
|
|
150
175
|
log(`verification warnings (non-blocking):\n${warnings.join('\n')}`);
|
|
151
176
|
}
|
|
152
177
|
if (failures.length === 0) {
|
|
178
|
+
// Persist per-parameter verified flags + the live-verification stamp
|
|
179
|
+
// onto workflow.json. Audit and teach read the stamp.
|
|
180
|
+
applyLiveVerification(opts.toolDir, liveVerification);
|
|
181
|
+
const paramWarnings = applyParamVerification(opts.toolDir, paramVerification);
|
|
182
|
+
if (paramWarnings.length > 0) {
|
|
183
|
+
log(`parameter verification:\n${paramWarnings.join('\n')}`);
|
|
184
|
+
}
|
|
185
|
+
const allWarnings = [...warnings, ...paramWarnings];
|
|
153
186
|
const sentinel = pathJoin(opts.toolDir, DONE_SENTINEL);
|
|
154
187
|
writeFileSync(
|
|
155
188
|
sentinel,
|
|
156
189
|
JSON.stringify(
|
|
157
|
-
{ summary, verification: 'passed', warnings, timestamp: Date.now() },
|
|
190
|
+
{ summary, verification: 'passed', warnings: allWarnings, timestamp: Date.now() },
|
|
158
191
|
null,
|
|
159
192
|
2,
|
|
160
193
|
),
|