@kontourai/flow-agents 2.0.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/actions/trust-verify/action.yml +4 -2
- package/.github/workflows/ci.yml +12 -0
- package/.github/workflows/runtime-compat.yml +1 -1
- package/CHANGELOG.md +29 -0
- package/README.md +3 -3
- package/build/src/cli/workflow-sidecar.d.ts +16 -0
- package/build/src/cli/workflow-sidecar.js +72 -12
- package/build/src/lib/flow-resolver.d.ts +29 -0
- package/build/src/lib/flow-resolver.js +71 -0
- package/context/scripts/telemetry/lib/config.sh +15 -0
- package/context/scripts/telemetry/telemetry.conf +4 -0
- package/context/scripts/telemetry/telemetry.sh +23 -1
- package/docs/design/flowrun-eventsourcing-design.md +216 -0
- package/docs/design/workflowrun-observability-design.md +431 -0
- package/evals/ci/antigaming-suite.sh +2 -0
- package/evals/ci/run-baseline.sh +2 -0
- package/evals/integration/test_command_log_concurrency.sh +114 -0
- package/evals/integration/test_command_log_fork_classification.sh +134 -0
- package/evals/integration/test_kit_identity_trust.sh +393 -0
- package/evals/integration/test_usage_cost.sh +119 -0
- package/evals/integration/test_verify_cli.sh +23 -0
- package/evals/run.sh +2 -0
- package/integrations/strands/flow_agents_strands/hooks.py +126 -1
- package/integrations/strands/flow_agents_strands/telemetry.py +172 -0
- package/integrations/strands/tests/test_usage.py +129 -0
- package/integrations/strands-ts/src/hooks.ts +135 -1
- package/integrations/strands-ts/src/telemetry.ts +170 -0
- package/integrations/strands-ts/test/test-usage.ts +85 -0
- package/package.json +5 -5
- package/scripts/hooks/evidence-capture.js +75 -13
- package/scripts/hooks/stop-goal-fit.js +76 -23
- package/scripts/repair-command-log.js +115 -0
- package/scripts/telemetry/lib/config.sh +15 -0
- package/scripts/telemetry/lib/pricing.sh +42 -0
- package/scripts/telemetry/lib/usage.sh +108 -0
- package/scripts/telemetry/pricing.golden.json +15 -0
- package/scripts/telemetry/pricing.json +31 -0
- package/scripts/telemetry/telemetry.conf +4 -0
- package/scripts/telemetry/telemetry.sh +23 -1
- package/src/cli/workflow-sidecar.ts +73 -11
- package/src/lib/flow-resolver.ts +85 -0
|
@@ -125,6 +125,70 @@ function readKitFlows(flowAgentsDir: string): KitFlowEntry[] {
|
|
|
125
125
|
return results;
|
|
126
126
|
}
|
|
127
127
|
|
|
128
|
+
// ---------------------------------------------------------------------------
|
|
129
|
+
// Usage extraction — map a Strands model-call event onto the documented
|
|
130
|
+
// Anthropic usage object, defensively across SDK shapes.
|
|
131
|
+
// ---------------------------------------------------------------------------
|
|
132
|
+
|
|
133
|
+
function asRecord(value: unknown): Record<string, unknown> | undefined {
|
|
134
|
+
return value && typeof value === "object" ? (value as Record<string, unknown>) : undefined;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
function numField(obj: Record<string, unknown> | undefined, ...keys: string[]): number {
|
|
138
|
+
if (!obj) return 0;
|
|
139
|
+
for (const key of keys) {
|
|
140
|
+
const v = obj[key];
|
|
141
|
+
if (typeof v === "number" && Number.isFinite(v)) return v;
|
|
142
|
+
}
|
|
143
|
+
return 0;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
function strField(obj: Record<string, unknown> | undefined, ...keys: string[]): string | undefined {
|
|
147
|
+
if (!obj) return undefined;
|
|
148
|
+
for (const key of keys) {
|
|
149
|
+
const v = obj[key];
|
|
150
|
+
if (typeof v === "string" && v) return v;
|
|
151
|
+
}
|
|
152
|
+
return undefined;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
export function extractModelUsage(
|
|
156
|
+
event: StrandsEvent
|
|
157
|
+
): { model: string; input: number; output: number; cacheCreation: number; cacheRead: number } | null {
|
|
158
|
+
// Find the usage object wherever the event surfaces it.
|
|
159
|
+
const containers = [
|
|
160
|
+
event,
|
|
161
|
+
asRecord(event.usage),
|
|
162
|
+
asRecord(event.response),
|
|
163
|
+
asRecord(event.result),
|
|
164
|
+
asRecord(event.message),
|
|
165
|
+
asRecord(event.output),
|
|
166
|
+
asRecord(event.modelResponse),
|
|
167
|
+
];
|
|
168
|
+
let usage: Record<string, unknown> | undefined;
|
|
169
|
+
let modelCarrier: Record<string, unknown> | undefined;
|
|
170
|
+
for (const container of containers) {
|
|
171
|
+
const c = asRecord(container);
|
|
172
|
+
if (!c) continue;
|
|
173
|
+
const candidate = asRecord(c.usage) ?? (("input_tokens" in c || "inputTokens" in c) ? c : undefined);
|
|
174
|
+
if (candidate && !usage) usage = candidate;
|
|
175
|
+
if (!modelCarrier && (typeof c.model === "string" || typeof c.modelId === "string")) modelCarrier = c;
|
|
176
|
+
}
|
|
177
|
+
if (!usage) return null;
|
|
178
|
+
|
|
179
|
+
const input = numField(usage, "input_tokens", "inputTokens");
|
|
180
|
+
const output = numField(usage, "output_tokens", "outputTokens");
|
|
181
|
+
const cacheCreation = numField(usage, "cache_creation_input_tokens", "cacheCreationInputTokens");
|
|
182
|
+
const cacheRead = numField(usage, "cache_read_input_tokens", "cacheReadInputTokens");
|
|
183
|
+
if (input === 0 && output === 0 && cacheCreation === 0 && cacheRead === 0) return null;
|
|
184
|
+
|
|
185
|
+
const model =
|
|
186
|
+
strField(modelCarrier, "model", "modelId") ??
|
|
187
|
+
strField(usage, "model") ??
|
|
188
|
+
"unknown";
|
|
189
|
+
return { model, input, output, cacheCreation, cacheRead };
|
|
190
|
+
}
|
|
191
|
+
|
|
128
192
|
function buildKitFlowsHint(flows: KitFlowEntry[]): string {
|
|
129
193
|
if (flows.length === 0) return "";
|
|
130
194
|
const lines = ["KIT FLOWS: the following kit flows are activated for this workspace:"];
|
|
@@ -164,6 +228,11 @@ export class FlowAgentsHooks {
|
|
|
164
228
|
private readonly policyGate: PolicyGate;
|
|
165
229
|
private readonly _workspace: string;
|
|
166
230
|
private _sessionStartMs: number | null = null;
|
|
231
|
+
// Per-model token accumulator, summed across model-call events for the session.
|
|
232
|
+
private _usageByModel = new Map<
|
|
233
|
+
string,
|
|
234
|
+
{ input: number; output: number; cacheCreation: number; cacheRead: number }
|
|
235
|
+
>();
|
|
167
236
|
|
|
168
237
|
constructor(options: FlowAgentsHooksOptions = {}) {
|
|
169
238
|
this._workspace = findRepoRoot(options.workspace ?? process.cwd());
|
|
@@ -248,6 +317,15 @@ export class FlowAgentsHooks {
|
|
|
248
317
|
registry.addCallback(AfterInvocationEvent, (event) => this.onAfterInvocation(event));
|
|
249
318
|
registry.addCallback(BeforeToolCallEvent, (event) => this.onBeforeToolCall(event));
|
|
250
319
|
registry.addCallback(AfterToolCallEvent, (event) => this.onAfterToolCall(event));
|
|
320
|
+
|
|
321
|
+
// AfterModelCallEvent carries per-call token usage (the SDK's documented
|
|
322
|
+
// usage source). Optional — only registered if the installed SDK exposes it,
|
|
323
|
+
// so older SDKs still work (usage is simply not collected there).
|
|
324
|
+
// eslint-disable-next-line @typescript-eslint/no-require-imports, @typescript-eslint/no-explicit-any
|
|
325
|
+
const AfterModelCallEvent = (require("strands-agents") as any).AfterModelCallEvent as EventClass | undefined;
|
|
326
|
+
if (AfterModelCallEvent) {
|
|
327
|
+
registry.addCallback(AfterModelCallEvent, (event) => this.onAfterModelCall(event));
|
|
328
|
+
}
|
|
251
329
|
}
|
|
252
330
|
|
|
253
331
|
// --------------------------------------------------------------------------
|
|
@@ -262,13 +340,68 @@ export class FlowAgentsHooks {
|
|
|
262
340
|
this.sink.emitUserPromptSubmit();
|
|
263
341
|
}
|
|
264
342
|
|
|
265
|
-
/** AfterInvocationEvent → stop / session.end */
|
|
343
|
+
/** AfterInvocationEvent → emit session.usage (if any) then stop / session.end */
|
|
266
344
|
onAfterInvocation(_event: StrandsEvent): void {
|
|
267
345
|
const durationMs =
|
|
268
346
|
this._sessionStartMs !== null ? Date.now() - this._sessionStartMs : 0;
|
|
347
|
+
|
|
348
|
+
if (this._usageByModel.size > 0) {
|
|
349
|
+
const byModel = Array.from(this._usageByModel.entries()).map(([model, t]) => ({
|
|
350
|
+
model,
|
|
351
|
+
inputTokens: t.input,
|
|
352
|
+
outputTokens: t.output,
|
|
353
|
+
cacheCreationInputTokens: t.cacheCreation,
|
|
354
|
+
cacheReadInputTokens: t.cacheRead,
|
|
355
|
+
}));
|
|
356
|
+
const sum = byModel.reduce(
|
|
357
|
+
(acc, m) => ({
|
|
358
|
+
input: acc.input + m.inputTokens,
|
|
359
|
+
output: acc.output + m.outputTokens,
|
|
360
|
+
cacheCreation: acc.cacheCreation + m.cacheCreationInputTokens,
|
|
361
|
+
cacheRead: acc.cacheRead + m.cacheReadInputTokens,
|
|
362
|
+
}),
|
|
363
|
+
{ input: 0, output: 0, cacheCreation: 0, cacheRead: 0 }
|
|
364
|
+
);
|
|
365
|
+
this.sink.emitUsage({
|
|
366
|
+
model: byModel.length === 1 ? byModel[0].model : undefined,
|
|
367
|
+
durationS: durationMs / 1000,
|
|
368
|
+
inputTokens: sum.input,
|
|
369
|
+
outputTokens: sum.output,
|
|
370
|
+
cacheCreationInputTokens: sum.cacheCreation,
|
|
371
|
+
cacheReadInputTokens: sum.cacheRead,
|
|
372
|
+
byModel,
|
|
373
|
+
});
|
|
374
|
+
this._usageByModel.clear();
|
|
375
|
+
}
|
|
376
|
+
|
|
269
377
|
this.sink.emitSessionEnd(durationMs);
|
|
270
378
|
}
|
|
271
379
|
|
|
380
|
+
/**
|
|
381
|
+
* AfterModelCallEvent → accumulate per-model token usage.
|
|
382
|
+
*
|
|
383
|
+
* Reads the documented Anthropic usage object (input_tokens, output_tokens,
|
|
384
|
+
* cache_creation_input_tokens, cache_read_input_tokens) from wherever the
|
|
385
|
+
* Strands event surfaces it. Defensive across SDK shapes — if no usage is
|
|
386
|
+
* found, the call is a no-op (tokens for that turn are simply not counted).
|
|
387
|
+
*/
|
|
388
|
+
onAfterModelCall(event: StrandsEvent): void {
|
|
389
|
+
const extracted = extractModelUsage(event);
|
|
390
|
+
if (!extracted) return;
|
|
391
|
+
const { model, input, output, cacheCreation, cacheRead } = extracted;
|
|
392
|
+
const current = this._usageByModel.get(model) ?? {
|
|
393
|
+
input: 0,
|
|
394
|
+
output: 0,
|
|
395
|
+
cacheCreation: 0,
|
|
396
|
+
cacheRead: 0,
|
|
397
|
+
};
|
|
398
|
+
current.input += input;
|
|
399
|
+
current.output += output;
|
|
400
|
+
current.cacheCreation += cacheCreation;
|
|
401
|
+
current.cacheRead += cacheRead;
|
|
402
|
+
this._usageByModel.set(model, current);
|
|
403
|
+
}
|
|
404
|
+
|
|
272
405
|
/**
|
|
273
406
|
* BeforeToolCallEvent → preToolUse / tool.invoke + config-protection policy gate.
|
|
274
407
|
*
|
|
@@ -307,6 +440,7 @@ export class FlowAgentsHooks {
|
|
|
307
440
|
/** Call once after constructing / wiring to emit the agentSpawn event. */
|
|
308
441
|
emitSessionStart(): void {
|
|
309
442
|
this._sessionStartMs = Date.now();
|
|
443
|
+
this._usageByModel.clear();
|
|
310
444
|
this.sink.emitSessionStart();
|
|
311
445
|
}
|
|
312
446
|
}
|
|
@@ -12,8 +12,13 @@
|
|
|
12
12
|
|
|
13
13
|
import fs from "node:fs";
|
|
14
14
|
import path from "node:path";
|
|
15
|
+
import { fileURLToPath } from "node:url";
|
|
15
16
|
import { randomUUID } from "node:crypto";
|
|
16
17
|
|
|
18
|
+
// ESM has no __dirname; derive it (this package is "type":"module").
|
|
19
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
20
|
+
const __dirname = path.dirname(__filename);
|
|
21
|
+
|
|
17
22
|
// ---------------------------------------------------------------------------
|
|
18
23
|
// Strands TS → canonical event-name mapping
|
|
19
24
|
// Mirrors STRANDS_TO_CANONICAL in integrations/strands/flow_agents_strands/telemetry.py
|
|
@@ -248,4 +253,169 @@ export class TelemetrySink {
|
|
|
248
253
|
emitUserPromptSubmit(extra?: Record<string, unknown>): TelemetryEvent {
|
|
249
254
|
return this.emit("userPromptSubmit", extra);
|
|
250
255
|
}
|
|
256
|
+
|
|
257
|
+
/**
|
|
258
|
+
* Emit a `session.usage` event with real token counts + derived cost.
|
|
259
|
+
*
|
|
260
|
+
* The Strands SDK surfaces per-invocation usage on AfterModelCall /
|
|
261
|
+
* AfterInvocation events; accumulate those and pass the totals here at
|
|
262
|
+
* session end. Tokens are the source of truth; estimated_cost_usd is derived
|
|
263
|
+
* from PRICING (the console recomputes it authoritatively, so a pricing
|
|
264
|
+
* change is retroactive). Mirrors the `session.usage` shape emitted by
|
|
265
|
+
* scripts/telemetry/telemetry.sh so the console aggregates both identically.
|
|
266
|
+
*/
|
|
267
|
+
emitUsage(usage: UsageInput): TelemetryEvent {
|
|
268
|
+
const event = this.buildBaseEvent("session.usage");
|
|
269
|
+
event.event_id = `${event.event_id}-usage`;
|
|
270
|
+
event.hook = { ...event.hook, event_name: "usage" };
|
|
271
|
+
|
|
272
|
+
const byModel = (usage.byModel ?? []).map((entry) => {
|
|
273
|
+
const tokens = normalizeTokens(entry);
|
|
274
|
+
return {
|
|
275
|
+
model: entry.model,
|
|
276
|
+
input_tokens: tokens.input,
|
|
277
|
+
output_tokens: tokens.output,
|
|
278
|
+
cache_creation_input_tokens: tokens.cacheCreation,
|
|
279
|
+
cache_read_input_tokens: tokens.cacheRead,
|
|
280
|
+
estimated_cost_usd: costForModel(entry.model, tokens)
|
|
281
|
+
};
|
|
282
|
+
});
|
|
283
|
+
|
|
284
|
+
const flat = normalizeTokens(usage);
|
|
285
|
+
const cost = byModel.length
|
|
286
|
+
? round6(byModel.reduce((sum, m) => sum + m.estimated_cost_usd, 0))
|
|
287
|
+
: costForModel(usage.model, flat);
|
|
288
|
+
|
|
289
|
+
event.usage = {
|
|
290
|
+
model: usage.model ?? this.runtime,
|
|
291
|
+
duration_s: usage.durationS ?? null,
|
|
292
|
+
input_tokens: flat.input,
|
|
293
|
+
output_tokens: flat.output,
|
|
294
|
+
cache_creation_input_tokens: flat.cacheCreation,
|
|
295
|
+
cache_read_input_tokens: flat.cacheRead,
|
|
296
|
+
estimated_cost_usd: cost,
|
|
297
|
+
pricing_version: pricingVersion(),
|
|
298
|
+
by_model: byModel.length ? byModel : null
|
|
299
|
+
};
|
|
300
|
+
|
|
301
|
+
try {
|
|
302
|
+
fs.appendFileSync(this.logFile, JSON.stringify(event) + "\n", "utf8");
|
|
303
|
+
} catch {
|
|
304
|
+
// fail-open: telemetry must never block agent work
|
|
305
|
+
}
|
|
306
|
+
return event;
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
// ---------------------------------------------------------------------------
|
|
311
|
+
// Usage / cost — mirror of scripts/telemetry/pricing.json (per 1M tokens, USD)
|
|
312
|
+
// ---------------------------------------------------------------------------
|
|
313
|
+
|
|
314
|
+
export interface TokenCounts {
|
|
315
|
+
inputTokens?: number;
|
|
316
|
+
outputTokens?: number;
|
|
317
|
+
cacheCreationInputTokens?: number;
|
|
318
|
+
cacheReadInputTokens?: number;
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
export interface UsageInput extends TokenCounts {
|
|
322
|
+
model?: string;
|
|
323
|
+
durationS?: number;
|
|
324
|
+
byModel?: Array<TokenCounts & { model: string }>;
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
interface NormalizedTokens {
|
|
328
|
+
input: number;
|
|
329
|
+
output: number;
|
|
330
|
+
cacheCreation: number;
|
|
331
|
+
cacheRead: number;
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
// Pricing is read from the single-source registry (scripts/telemetry/pricing.json),
|
|
335
|
+
// never hand-maintained here. Resolution: TELEMETRY_PRICING_FILE /
|
|
336
|
+
// FLOW_AGENTS_PRICING_FILE env path, else the repo-relative registry, else a
|
|
337
|
+
// minimal fallback. Tokens are exact regardless; the console recomputes cost
|
|
338
|
+
// authoritatively, so a missing file only degrades the sink's stamped estimate.
|
|
339
|
+
interface PricingVersionBlock {
|
|
340
|
+
cache_multipliers: { write_5m: number; write_1h: number; read: number };
|
|
341
|
+
models: Record<string, { input: number; output: number }>;
|
|
342
|
+
default: { input: number; output: number };
|
|
343
|
+
zero_cost_models: string[];
|
|
344
|
+
}
|
|
345
|
+
interface PricingRegistry {
|
|
346
|
+
current_version: string;
|
|
347
|
+
versions: Record<string, PricingVersionBlock>;
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
const FALLBACK_REGISTRY: PricingRegistry = {
|
|
351
|
+
current_version: "fallback",
|
|
352
|
+
versions: {
|
|
353
|
+
fallback: {
|
|
354
|
+
cache_multipliers: { write_5m: 1.25, write_1h: 2.0, read: 0.1 },
|
|
355
|
+
models: {},
|
|
356
|
+
default: { input: 5.0, output: 25.0 },
|
|
357
|
+
zero_cost_models: ["<synthetic>", "synthetic", "unknown", ""]
|
|
358
|
+
}
|
|
359
|
+
}
|
|
360
|
+
};
|
|
361
|
+
|
|
362
|
+
let cachedRegistry: PricingRegistry | null = null;
|
|
363
|
+
function loadRegistry(): PricingRegistry {
|
|
364
|
+
if (cachedRegistry) return cachedRegistry;
|
|
365
|
+
const candidates = [
|
|
366
|
+
process.env.TELEMETRY_PRICING_FILE,
|
|
367
|
+
process.env.FLOW_AGENTS_PRICING_FILE,
|
|
368
|
+
path.join(__dirname, "../../../scripts/telemetry/pricing.json"),
|
|
369
|
+
path.join(__dirname, "../../../../scripts/telemetry/pricing.json")
|
|
370
|
+
].filter((p): p is string => Boolean(p));
|
|
371
|
+
for (const candidate of candidates) {
|
|
372
|
+
try {
|
|
373
|
+
const parsed = JSON.parse(fs.readFileSync(candidate, "utf8"));
|
|
374
|
+
if (parsed && typeof parsed.current_version === "string" && parsed.versions) {
|
|
375
|
+
cachedRegistry = parsed as PricingRegistry;
|
|
376
|
+
return cachedRegistry;
|
|
377
|
+
}
|
|
378
|
+
} catch {
|
|
379
|
+
// try next candidate
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
cachedRegistry = FALLBACK_REGISTRY;
|
|
383
|
+
return cachedRegistry;
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
function pricingVersion(): string {
|
|
387
|
+
return loadRegistry().current_version;
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
function num(value: number | undefined): number {
|
|
391
|
+
return typeof value === "number" && Number.isFinite(value) ? value : 0;
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
function round6(value: number): number {
|
|
395
|
+
return Math.round(value * 1_000_000) / 1_000_000;
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
function normalizeTokens(tokens: TokenCounts): NormalizedTokens {
|
|
399
|
+
return {
|
|
400
|
+
input: num(tokens.inputTokens),
|
|
401
|
+
output: num(tokens.outputTokens),
|
|
402
|
+
cacheCreation: num(tokens.cacheCreationInputTokens),
|
|
403
|
+
cacheRead: num(tokens.cacheReadInputTokens)
|
|
404
|
+
};
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
function costForModel(model: string | undefined, tokens: NormalizedTokens): number {
|
|
408
|
+
const registry = loadRegistry();
|
|
409
|
+
const block = registry.versions[registry.current_version] ?? FALLBACK_REGISTRY.versions.fallback;
|
|
410
|
+
const key = (model ?? "").trim();
|
|
411
|
+
if (block.zero_cost_models.includes(key)) return 0;
|
|
412
|
+
const rate = block.models[key] ?? block.default;
|
|
413
|
+
const cm = block.cache_multipliers;
|
|
414
|
+
return round6(
|
|
415
|
+
(tokens.input * rate.input +
|
|
416
|
+
tokens.output * rate.output +
|
|
417
|
+
tokens.cacheCreation * rate.input * cm.write_5m +
|
|
418
|
+
tokens.cacheRead * rate.input * cm.read) /
|
|
419
|
+
1_000_000
|
|
420
|
+
);
|
|
251
421
|
}
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
import { test } from "node:test";
|
|
2
|
+
import assert from "node:assert/strict";
|
|
3
|
+
import fs from "node:fs";
|
|
4
|
+
import path from "node:path";
|
|
5
|
+
import os from "node:os";
|
|
6
|
+
import { fileURLToPath } from "node:url";
|
|
7
|
+
import { TelemetrySink } from "../src/telemetry.js";
|
|
8
|
+
import { extractModelUsage } from "../src/hooks.js";
|
|
9
|
+
|
|
10
|
+
const here = path.dirname(fileURLToPath(import.meta.url));
|
|
11
|
+
const tmpSink = () => new TelemetrySink({ workspace: fs.mkdtempSync(path.join(os.tmpdir(), "ts-usage-")) });
|
|
12
|
+
|
|
13
|
+
test("emitUsage writes tokens + cost + pricing_version + by_model", () => {
|
|
14
|
+
const ev = tmpSink().emitUsage({
|
|
15
|
+
model: "claude-opus-4-8",
|
|
16
|
+
inputTokens: 1000,
|
|
17
|
+
outputTokens: 2000,
|
|
18
|
+
cacheReadInputTokens: 500000,
|
|
19
|
+
byModel: [{ model: "claude-opus-4-8", inputTokens: 1000, outputTokens: 2000, cacheReadInputTokens: 500000 }]
|
|
20
|
+
} as any);
|
|
21
|
+
const u = ev.usage as any;
|
|
22
|
+
assert.equal(u.input_tokens, 1000);
|
|
23
|
+
assert.equal(u.output_tokens, 2000);
|
|
24
|
+
assert.equal(u.cache_read_input_tokens, 500000);
|
|
25
|
+
assert.equal(u.pricing_version, "2026-06-28");
|
|
26
|
+
assert.equal(u.estimated_cost_usd, 0.305); // (1000*5 + 2000*25 + 500000*5*0.1)/1e6
|
|
27
|
+
assert.equal(u.by_model[0].model, "claude-opus-4-8");
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
test("emitUsage multi-model sums + prices each", () => {
|
|
31
|
+
const ev = tmpSink().emitUsage({
|
|
32
|
+
outputTokens: 2000,
|
|
33
|
+
byModel: [
|
|
34
|
+
{ model: "claude-opus-4-8", outputTokens: 1000 },
|
|
35
|
+
{ model: "claude-haiku-4-5", outputTokens: 1000 }
|
|
36
|
+
]
|
|
37
|
+
} as any);
|
|
38
|
+
const u = ev.usage as any;
|
|
39
|
+
const costs: Record<string, number> = Object.fromEntries(u.by_model.map((m: any) => [m.model, m.estimated_cost_usd]));
|
|
40
|
+
assert.equal(costs["claude-opus-4-8"], 0.025);
|
|
41
|
+
assert.equal(costs["claude-haiku-4-5"], 0.005);
|
|
42
|
+
assert.equal(u.estimated_cost_usd, 0.03);
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
test("extractModelUsage reads usage from varied event shapes", () => {
|
|
46
|
+
assert.deepEqual(
|
|
47
|
+
extractModelUsage({ model: "claude-opus-4-8", usage: { input_tokens: 10, output_tokens: 20, cache_read_input_tokens: 30 } } as any),
|
|
48
|
+
{ model: "claude-opus-4-8", input: 10, output: 20, cacheCreation: 0, cacheRead: 30 }
|
|
49
|
+
);
|
|
50
|
+
// camelCase + modelId
|
|
51
|
+
const camel = extractModelUsage({ modelId: "claude-haiku-4-5", usage: { inputTokens: 5, outputTokens: 6 } } as any);
|
|
52
|
+
assert.equal(camel?.model, "claude-haiku-4-5");
|
|
53
|
+
assert.equal(camel?.input, 5);
|
|
54
|
+
// nested response carrier
|
|
55
|
+
const nested = extractModelUsage({ response: { model: "claude-fable-5", usage: { output_tokens: 100 } } } as any);
|
|
56
|
+
assert.equal(nested?.model, "claude-fable-5");
|
|
57
|
+
assert.equal(nested?.output, 100);
|
|
58
|
+
// no usage / all-zero → null
|
|
59
|
+
assert.equal(extractModelUsage({ model: "x" } as any), null);
|
|
60
|
+
assert.equal(extractModelUsage({ model: "x", usage: { input_tokens: 0, output_tokens: 0 } } as any), null);
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
test("cross-runtime golden vectors (TS sink prices identically)", () => {
|
|
64
|
+
const candidates = [
|
|
65
|
+
path.join(here, "../../../../scripts/telemetry/pricing.golden.json"),
|
|
66
|
+
path.join(here, "../../../scripts/telemetry/pricing.golden.json"),
|
|
67
|
+
path.join(process.cwd(), "../../scripts/telemetry/pricing.golden.json")
|
|
68
|
+
];
|
|
69
|
+
const file = candidates.find((p) => fs.existsSync(p));
|
|
70
|
+
assert.ok(file, "pricing.golden.json not found");
|
|
71
|
+
const golden = JSON.parse(fs.readFileSync(file!, "utf8"));
|
|
72
|
+
const sink = tmpSink();
|
|
73
|
+
for (const c of golden.cases) {
|
|
74
|
+
const ev = sink.emitUsage({
|
|
75
|
+
byModel: [{
|
|
76
|
+
model: c.model,
|
|
77
|
+
inputTokens: c.tokens.input,
|
|
78
|
+
outputTokens: c.tokens.output,
|
|
79
|
+
cacheCreationInputTokens: c.tokens.cache_creation,
|
|
80
|
+
cacheReadInputTokens: c.tokens.cache_read
|
|
81
|
+
}]
|
|
82
|
+
} as any);
|
|
83
|
+
assert.equal((ev.usage as any).estimated_cost_usd, c.expected_cost_usd, `golden ${c.name} (${c.model})`);
|
|
84
|
+
}
|
|
85
|
+
});
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@kontourai/flow-agents",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.1.0",
|
|
4
4
|
"description": "Flow Agents — a Kontour product that applies Flow and Veritas discipline as a portable process layer inside the agent tools you already use: Claude Code, Codex, Kiro, opencode, pi, and GitHub Actions — with framework adapters (AWS Strands preview) on the same policy-engine contract.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"agents",
|
|
@@ -136,15 +136,15 @@
|
|
|
136
136
|
"kit": "npm run build --silent && node build/src/cli.js kit"
|
|
137
137
|
},
|
|
138
138
|
"devDependencies": {
|
|
139
|
-
"@types/node": "^
|
|
140
|
-
"promptfoo": "^0.121.
|
|
139
|
+
"@types/node": "^26.0.1",
|
|
140
|
+
"promptfoo": "^0.121.17",
|
|
141
141
|
"typescript": "^6.0.3"
|
|
142
142
|
},
|
|
143
143
|
"dependencies": {
|
|
144
|
-
"@kontourai/flow": "~1.
|
|
144
|
+
"@kontourai/flow": "~1.4.1"
|
|
145
145
|
},
|
|
146
146
|
"optionalDependencies": {
|
|
147
|
-
"hachure": "^0.
|
|
147
|
+
"hachure": "^0.5.1",
|
|
148
148
|
"@kontourai/surface": "^1.2.0"
|
|
149
149
|
}
|
|
150
150
|
}
|
|
@@ -121,6 +121,58 @@ function readLastChainState(logFile) {
|
|
|
121
121
|
}
|
|
122
122
|
return { seq: -1, hash: CHAIN_GENESIS };
|
|
123
123
|
}
|
|
124
|
+
|
|
125
|
+
// ─── Concurrency-safe append (lockfile) ──────────────────────────────────────
|
|
126
|
+
//
|
|
127
|
+
// The chain link is a read-(last hash)→compute→append critical section. Without
|
|
128
|
+
// mutual exclusion, two capture processes writing to the SAME command-log
|
|
129
|
+
// concurrently (e.g. parallel agents in one workspace) can both read the same
|
|
130
|
+
// prevHash and append entries with an identical seq/prevHash — forking the chain
|
|
131
|
+
// and tripping the tamper-evidence verifier on a benign race. We serialize the
|
|
132
|
+
// section with an atomic create-exclusive lockfile.
|
|
133
|
+
//
|
|
134
|
+
// FAIL-OPEN, like the rest of this hook: if the lock cannot be acquired we still
|
|
135
|
+
// append (capture must NEVER block the agent or drop evidence), accepting the
|
|
136
|
+
// small residual race rather than losing the record. A crashed holder's stale
|
|
137
|
+
// lock is stolen after LOCK_STALE_MS so a dead process can't wedge capture.
|
|
138
|
+
const LOCK_RETRY_MS = 5; // backoff between attempts
|
|
139
|
+
const LOCK_MAX_TRIES = 200; // ~1s total acquisition budget
|
|
140
|
+
const LOCK_STALE_MS = 10000; // steal a lock older than this (crashed holder)
|
|
141
|
+
|
|
142
|
+
/** Synchronous sleep without busy-spinning. Best-effort; no-ops if unavailable. */
|
|
143
|
+
function sleepSync(ms) {
|
|
144
|
+
try { Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, ms); }
|
|
145
|
+
catch { /* SharedArrayBuffer/Atomics unavailable — skip the backoff */ }
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
/**
|
|
149
|
+
* Acquire an exclusive lock via atomic create-exclusive (O_CREAT|O_EXCL).
|
|
150
|
+
* Returns a file descriptor on success, or null on failure (caller fails open).
|
|
151
|
+
*/
|
|
152
|
+
function acquireLock(lockFile) {
|
|
153
|
+
for (let i = 0; i < LOCK_MAX_TRIES; i++) {
|
|
154
|
+
try {
|
|
155
|
+
const fd = fs.openSync(lockFile, 'wx');
|
|
156
|
+
try { fs.writeSync(fd, String(process.pid)); } catch { /* pid is advisory only */ }
|
|
157
|
+
return fd;
|
|
158
|
+
} catch (err) {
|
|
159
|
+
if (!err || err.code !== 'EEXIST') return null; // unexpected — fail open
|
|
160
|
+
// Lock held: steal it if the holder appears dead (stale), else back off.
|
|
161
|
+
try {
|
|
162
|
+
const st = fs.statSync(lockFile);
|
|
163
|
+
if (Date.now() - st.mtimeMs > LOCK_STALE_MS) { fs.unlinkSync(lockFile); continue; }
|
|
164
|
+
} catch { continue; } // lock vanished between open and stat — retry immediately
|
|
165
|
+
sleepSync(LOCK_RETRY_MS);
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
return null;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
/** Release a lock acquired by acquireLock. Best-effort. */
|
|
172
|
+
function releaseLock(fd, lockFile) {
|
|
173
|
+
try { fs.closeSync(fd); } catch { /* already closed */ }
|
|
174
|
+
try { fs.unlinkSync(lockFile); } catch { /* already removed */ }
|
|
175
|
+
}
|
|
124
176
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
125
177
|
|
|
126
178
|
function parseJson(raw) {
|
|
@@ -305,20 +357,30 @@ function run(rawInput) {
|
|
|
305
357
|
const logFile = path.join(artifactDir, 'command-log.jsonl');
|
|
306
358
|
fs.mkdirSync(artifactDir, { recursive: true });
|
|
307
359
|
|
|
308
|
-
//
|
|
309
|
-
//
|
|
310
|
-
//
|
|
311
|
-
|
|
360
|
+
// Serialize the read→compute→append critical section so concurrent captures
|
|
361
|
+
// (parallel agents sharing this log) cannot fork the hash-chain. Fail-open:
|
|
362
|
+
// a null fd means we could not lock — we still append rather than drop the
|
|
363
|
+
// record. The lock is always released in finally.
|
|
364
|
+
const lockFile = logFile + '.lock';
|
|
365
|
+
const lockFd = acquireLock(lockFile);
|
|
312
366
|
try {
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
367
|
+
// Hash-chain integrity: compute _chain before appending. Fail-open: any
|
|
368
|
+
// error in chain computation falls back to the plain record (no _chain).
|
|
369
|
+
// A chain failure must NEVER block capture or corrupt the log.
|
|
370
|
+
let recordToWrite = record;
|
|
371
|
+
try {
|
|
372
|
+
const { seq: prevSeq, hash: prevHash } = readLastChainState(logFile);
|
|
373
|
+
const seq = prevSeq + 1;
|
|
374
|
+
const hash = computeChainHash(prevHash, record);
|
|
375
|
+
// Spread record fields then add _chain so the chain field is appended last
|
|
376
|
+
// (cosmetic ordering; canonicalJsonForChain excludes it during hashing).
|
|
377
|
+
recordToWrite = { ...record, _chain: { seq, prevHash, hash } };
|
|
378
|
+
} catch { /* chain computation failed — write plain record, do not block */ }
|
|
379
|
+
|
|
380
|
+
fs.appendFileSync(logFile, JSON.stringify(recordToWrite) + '\n');
|
|
381
|
+
} finally {
|
|
382
|
+
if (lockFd !== null) releaseLock(lockFd, lockFile);
|
|
383
|
+
}
|
|
322
384
|
} catch { /* fail-open: capture never blocks or corrupts */ }
|
|
323
385
|
return rawInput;
|
|
324
386
|
}
|