@kontourai/flow-agents 2.0.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/actions/trust-verify/action.yml +4 -2
- package/.github/workflows/ci.yml +12 -0
- package/.github/workflows/runtime-compat.yml +1 -1
- package/CHANGELOG.md +29 -0
- package/README.md +3 -3
- package/build/src/cli/workflow-sidecar.d.ts +16 -0
- package/build/src/cli/workflow-sidecar.js +72 -12
- package/build/src/lib/flow-resolver.d.ts +29 -0
- package/build/src/lib/flow-resolver.js +71 -0
- package/context/scripts/telemetry/lib/config.sh +15 -0
- package/context/scripts/telemetry/telemetry.conf +4 -0
- package/context/scripts/telemetry/telemetry.sh +23 -1
- package/docs/design/flowrun-eventsourcing-design.md +216 -0
- package/docs/design/workflowrun-observability-design.md +431 -0
- package/evals/ci/antigaming-suite.sh +2 -0
- package/evals/ci/run-baseline.sh +2 -0
- package/evals/integration/test_command_log_concurrency.sh +114 -0
- package/evals/integration/test_command_log_fork_classification.sh +134 -0
- package/evals/integration/test_kit_identity_trust.sh +393 -0
- package/evals/integration/test_usage_cost.sh +119 -0
- package/evals/integration/test_verify_cli.sh +23 -0
- package/evals/run.sh +2 -0
- package/integrations/strands/flow_agents_strands/hooks.py +126 -1
- package/integrations/strands/flow_agents_strands/telemetry.py +172 -0
- package/integrations/strands/tests/test_usage.py +129 -0
- package/integrations/strands-ts/src/hooks.ts +135 -1
- package/integrations/strands-ts/src/telemetry.ts +170 -0
- package/integrations/strands-ts/test/test-usage.ts +85 -0
- package/package.json +5 -5
- package/scripts/hooks/evidence-capture.js +75 -13
- package/scripts/hooks/stop-goal-fit.js +76 -23
- package/scripts/repair-command-log.js +115 -0
- package/scripts/telemetry/lib/config.sh +15 -0
- package/scripts/telemetry/lib/pricing.sh +42 -0
- package/scripts/telemetry/lib/usage.sh +108 -0
- package/scripts/telemetry/pricing.golden.json +15 -0
- package/scripts/telemetry/pricing.json +31 -0
- package/scripts/telemetry/telemetry.conf +4 -0
- package/scripts/telemetry/telemetry.sh +23 -1
- package/src/cli/workflow-sidecar.ts +73 -11
- package/src/lib/flow-resolver.ts +85 -0
|
@@ -803,17 +803,30 @@ function canonicalJsonForVerify(record) {
|
|
|
803
803
|
|
|
804
804
|
/**
|
|
805
805
|
* Verify the hash chain of command-log.jsonl.
|
|
806
|
-
* Returns { status, brokenAt } where:
|
|
807
|
-
* status = "ok" | "legacy" | "broken"
|
|
806
|
+
* Returns { status, brokenAt, forkAt } where:
|
|
807
|
+
* status = "ok" | "legacy" | "broken" | "forked"
|
|
808
808
|
* brokenAt = index (0-based) of the first broken entry, or null
|
|
809
|
+
* forkAt = index (0-based) of the first concurrent-fork sibling, or null
|
|
810
|
+
*
|
|
811
|
+
* "forked" is a BENIGN concurrent-append race, not tampering: two PostToolUse
|
|
812
|
+
* captures appended off the same parent tip (e.g. parallel agents sharing one
|
|
813
|
+
* log) before the writer lock (flow-agents#232) serialized them. It is
|
|
814
|
+
* distinguished from "broken" because:
|
|
815
|
+
* - every entry's hash is still self-consistent (no content was edited), and
|
|
816
|
+
* - every entry's parent is reachable (nothing was reordered or removed);
|
|
817
|
+
* - the only anomaly is a parent claimed by >1 capture-sourced sibling.
|
|
818
|
+
* Tamper — a content edit (self-hash mismatch), a reorder, or a deletion
|
|
819
|
+
* (unreachable parent) — still returns "broken". A fork cannot be used to
|
|
820
|
+
* launder a content edit: editing a record breaks its self-hash, which is
|
|
821
|
+
* checked before fork classification.
|
|
809
822
|
*/
|
|
810
823
|
function verifyCommandLogChain(artifactDir) {
|
|
811
824
|
const file = path.join(artifactDir, 'command-log.jsonl');
|
|
812
825
|
let raw = '';
|
|
813
|
-
try { raw = fs.readFileSync(file, 'utf8'); } catch { return { status: 'legacy', brokenAt: null }; }
|
|
826
|
+
try { raw = fs.readFileSync(file, 'utf8'); } catch { return { status: 'legacy', brokenAt: null, forkAt: null }; }
|
|
814
827
|
|
|
815
828
|
const lines = raw.split('\n').filter(l => l.trim());
|
|
816
|
-
if (lines.length === 0) return { status: 'legacy', brokenAt: null };
|
|
829
|
+
if (lines.length === 0) return { status: 'legacy', brokenAt: null, forkAt: null };
|
|
817
830
|
|
|
818
831
|
// Parse all entries, tolerating unparseable lines (they count as legacy/unchained).
|
|
819
832
|
const entries = [];
|
|
@@ -823,18 +836,25 @@ function verifyCommandLogChain(artifactDir) {
|
|
|
823
836
|
if (entry && typeof entry === 'object') entries.push(entry);
|
|
824
837
|
} catch { /* skip malformed lines */ }
|
|
825
838
|
}
|
|
826
|
-
if (entries.length === 0) return { status: 'legacy', brokenAt: null };
|
|
839
|
+
if (entries.length === 0) return { status: 'legacy', brokenAt: null, forkAt: null };
|
|
827
840
|
|
|
828
841
|
// Classify: are there any chained entries?
|
|
829
842
|
const hasAnyChain = entries.some(e => e._chain && typeof e._chain.hash === 'string');
|
|
830
|
-
if (!hasAnyChain) return { status: 'legacy', brokenAt: null };
|
|
831
|
-
|
|
832
|
-
//
|
|
833
|
-
//
|
|
834
|
-
//
|
|
835
|
-
|
|
843
|
+
if (!hasAnyChain) return { status: 'legacy', brokenAt: null, forkAt: null };
|
|
844
|
+
|
|
845
|
+
// Walk in file order. A chained entry is ACCEPTED when both:
|
|
846
|
+
// (a) self-consistent: hash === sha256(prevHash + canonicalJson(record)),
|
|
847
|
+
// so a content edit (e.g. flipping exitCode) without rehashing fails; and
|
|
848
|
+
// (b) reachable: prevHash is genesis or the hash of any prior accepted entry.
|
|
849
|
+
// We track the SET of reachable hashes (not just the latest tip) so that
|
|
850
|
+
// concurrent-fork siblings — which share a still-reachable parent — are
|
|
851
|
+
// tolerated, while a reorder/deletion (parent not reachable) is caught.
|
|
852
|
+
const reachable = new Set([CHAIN_GENESIS_VERIFY]);
|
|
853
|
+
const parentSources = new Map(); // prevHash -> [source, ...] (fork detection)
|
|
836
854
|
let prevWasChained = false;
|
|
837
|
-
let
|
|
855
|
+
let forked = false;
|
|
856
|
+
let firstForkAt = null;
|
|
857
|
+
|
|
838
858
|
for (let i = 0; i < entries.length; i++) {
|
|
839
859
|
const entry = entries[i];
|
|
840
860
|
const chain = entry._chain;
|
|
@@ -842,26 +862,43 @@ function verifyCommandLogChain(artifactDir) {
|
|
|
842
862
|
// Legacy entry without _chain. If we have already seen a chained entry,
|
|
843
863
|
// a gap in the chain (a legacy entry in the middle) counts as broken
|
|
844
864
|
// (it could indicate a removed chained entry was replaced by a legacy one).
|
|
845
|
-
if (prevWasChained) return { status: 'broken', brokenAt: i };
|
|
865
|
+
if (prevWasChained) return { status: 'broken', brokenAt: i, forkAt: null };
|
|
846
866
|
// Before any chained entry: tolerate (legacy prefix).
|
|
847
867
|
continue;
|
|
848
868
|
}
|
|
849
869
|
|
|
850
|
-
//
|
|
851
|
-
|
|
852
|
-
|
|
870
|
+
// (a) Self-consistency. A content edit without rehashing fails here.
|
|
871
|
+
if (typeof chain.prevHash !== 'string') return { status: 'broken', brokenAt: i, forkAt: null };
|
|
872
|
+
const selfHash = crypto.createHash('sha256')
|
|
873
|
+
.update(chain.prevHash + canonicalJsonForVerify(entry), 'utf8')
|
|
853
874
|
.digest('hex');
|
|
854
|
-
if (chain.hash !==
|
|
855
|
-
|
|
856
|
-
//
|
|
857
|
-
|
|
875
|
+
if (chain.hash !== selfHash) return { status: 'broken', brokenAt: i, forkAt: null };
|
|
876
|
+
|
|
877
|
+
// (b) Reachability. An unreachable parent means a reorder or a removed
|
|
878
|
+
// predecessor — structural tamper, not a benign concurrent append.
|
|
879
|
+
if (!reachable.has(chain.prevHash)) return { status: 'broken', brokenAt: i, forkAt: null };
|
|
880
|
+
|
|
881
|
+
// Fork detection: a parent claimed by more than one entry is a fork. It is
|
|
882
|
+
// benign only when EVERY sibling on that parent is a PostToolUse capture
|
|
883
|
+
// (two captures racing on the same tip). Any non-capture sibling on a
|
|
884
|
+
// shared parent is treated as tamper (conservative).
|
|
885
|
+
const sources = parentSources.get(chain.prevHash) || [];
|
|
886
|
+
sources.push(entry.source);
|
|
887
|
+
parentSources.set(chain.prevHash, sources);
|
|
888
|
+
if (sources.length > 1) {
|
|
889
|
+
if (!sources.every(s => s === 'postToolUse-capture')) {
|
|
890
|
+
return { status: 'broken', brokenAt: i, forkAt: null };
|
|
891
|
+
}
|
|
892
|
+
if (firstForkAt === null) firstForkAt = i;
|
|
893
|
+
forked = true;
|
|
894
|
+
}
|
|
858
895
|
|
|
859
|
-
|
|
896
|
+
reachable.add(chain.hash);
|
|
860
897
|
prevWasChained = true;
|
|
861
|
-
chainedCount += 1;
|
|
862
898
|
}
|
|
863
899
|
|
|
864
|
-
return { status: '
|
|
900
|
+
if (forked) return { status: 'forked', brokenAt: null, forkAt: firstForkAt };
|
|
901
|
+
return { status: 'ok', brokenAt: null, forkAt: null };
|
|
865
902
|
}
|
|
866
903
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
867
904
|
|
|
@@ -1065,6 +1102,11 @@ function captureCrossReference(root, artifactDir, activeFlowStep) {
|
|
|
1065
1102
|
//
|
|
1066
1103
|
// ok → proceed normally (chain is valid, log is trustworthy).
|
|
1067
1104
|
// legacy → proceed normally (pre-B2 log, no chain to verify, existing behavior).
|
|
1105
|
+
// forked → benign concurrent-append race (not tampering): emit a loud but
|
|
1106
|
+
// NON-blocking advisory and keep trusting the records. The capture
|
|
1107
|
+
// contradiction teeth still run (the records are genuine, just not
|
|
1108
|
+
// linearly ordered); the operator can re-linearize with the repair
|
|
1109
|
+
// tool. This is what stops honest parallel work from being trapped.
|
|
1068
1110
|
// broken → emit a loud warning and treat ALL claimed-pass commands relying on
|
|
1069
1111
|
// this log as NOT_VERIFIED/blocking — do not let them sail through.
|
|
1070
1112
|
let chainBroken = false;
|
|
@@ -1079,6 +1121,17 @@ function captureCrossReference(root, artifactDir, activeFlowStep) {
|
|
|
1079
1121
|
'This is tamper-EVIDENCE (hash-chain broken); alteration, removal, or reordering detected. ' +
|
|
1080
1122
|
'NOT_VERIFIED: cannot confirm or deny claimed passes.'
|
|
1081
1123
|
);
|
|
1124
|
+
} else if (chainResult.status === 'forked') {
|
|
1125
|
+
// NOT a hard block: this string must not match HARD_BLOCK/FULL_BLOCK. A
|
|
1126
|
+
// concurrent fork is benign — no content was edited and nothing was
|
|
1127
|
+
// removed — so honest parallel work proceeds. We surface it loudly and
|
|
1128
|
+
// point at the deterministic repair.
|
|
1129
|
+
const forkIdx = chainResult.forkAt !== null ? ` (entry ${chainResult.forkAt})` : '';
|
|
1130
|
+
warnings.push(
|
|
1131
|
+
`${base} command-log shows a concurrent-capture fork${forkIdx} — two PostToolUse captures appended off the same parent ` +
|
|
1132
|
+
'(parallel writers before the writer lock). This is NOT tampering: every record is self-consistent and reachable. ' +
|
|
1133
|
+
'Records remain trusted; re-linearize with: node scripts/repair-command-log.js <artifact-dir>'
|
|
1134
|
+
);
|
|
1082
1135
|
}
|
|
1083
1136
|
}
|
|
1084
1137
|
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
'use strict';
|
|
3
|
+
/**
|
|
4
|
+
* repair-command-log.js — deterministic re-linearization of a concurrent-fork
|
|
5
|
+
* command-log.jsonl.
|
|
6
|
+
*
|
|
7
|
+
* A "fork" happens when two PostToolUse captures append off the same parent tip
|
|
8
|
+
* (parallel writers before the writer lock, flow-agents#232). The records are
|
|
9
|
+
* all genuine and self-consistent; only their linear order is ambiguous. This
|
|
10
|
+
* tool produces THE canonical order — sort chained entries by (capturedAt, then
|
|
11
|
+
* hash) — and re-chains them, so any party re-running it gets the identical
|
|
12
|
+
* result. It is therefore a verifiable repair, not a judgement call.
|
|
13
|
+
*
|
|
14
|
+
* SAFETY: it refuses to run unless verifyCommandLogChain() reports "forked".
|
|
15
|
+
* - "broken" (real tamper: edited content, reorder, deletion) → REFUSE. The
|
|
16
|
+
* repair must never be usable to launder tampering.
|
|
17
|
+
* - "ok" / "legacy" → nothing to do.
|
|
18
|
+
* No record content is altered — only the _chain wrappers and line order. The
|
|
19
|
+
* original is backed up, and an in-chain `chain-repair` marker records that the
|
|
20
|
+
* re-linearization happened (so the repair is itself auditable).
|
|
21
|
+
*
|
|
22
|
+
* Usage: node scripts/repair-command-log.js <artifact-dir> [--reason "..."]
|
|
23
|
+
*/
|
|
24
|
+
const fs = require('fs');
|
|
25
|
+
const path = require('path');
|
|
26
|
+
const crypto = require('crypto');
|
|
27
|
+
|
|
28
|
+
const gate = require(path.join(__dirname, 'hooks', 'stop-goal-fit.js'));
|
|
29
|
+
const GENESIS = gate.CHAIN_GENESIS_VERIFY;
|
|
30
|
+
|
|
31
|
+
function canon(rec) {
|
|
32
|
+
const keys = Object.keys(rec).filter((k) => k !== '_chain').sort();
|
|
33
|
+
const obj = {};
|
|
34
|
+
for (const k of keys) obj[k] = rec[k];
|
|
35
|
+
return JSON.stringify(obj);
|
|
36
|
+
}
|
|
37
|
+
function hashLink(prev, rec) {
|
|
38
|
+
return crypto.createHash('sha256').update(prev + canon(rec), 'utf8').digest('hex');
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
function main() {
|
|
42
|
+
const dir = process.argv[2];
|
|
43
|
+
if (!dir) { console.error('usage: repair-command-log.js <artifact-dir> [--reason "..."]'); process.exit(2); }
|
|
44
|
+
const reasonIdx = process.argv.indexOf('--reason');
|
|
45
|
+
const reason = reasonIdx !== -1 ? (process.argv[reasonIdx + 1] || '') : 'deterministic concurrent-fork re-linearization';
|
|
46
|
+
|
|
47
|
+
const verdict = gate.verifyCommandLogChain(dir);
|
|
48
|
+
if (verdict.status === 'ok' || verdict.status === 'legacy') {
|
|
49
|
+
console.log(`nothing to repair: chain status is "${verdict.status}"`);
|
|
50
|
+
return;
|
|
51
|
+
}
|
|
52
|
+
if (verdict.status !== 'forked') {
|
|
53
|
+
console.error(`REFUSING to repair: chain status is "${verdict.status}" (entry ${verdict.brokenAt}). ` +
|
|
54
|
+
'This tool only re-linearizes benign concurrent forks; it will not touch a tampered chain.');
|
|
55
|
+
process.exit(1);
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
const file = path.join(dir, 'command-log.jsonl');
|
|
59
|
+
const lines = fs.readFileSync(file, 'utf8').split('\n').filter((l) => l.trim());
|
|
60
|
+
|
|
61
|
+
// Preserve legacy prefix verbatim; collect the chained records (content only).
|
|
62
|
+
const legacyPrefix = [];
|
|
63
|
+
const records = [];
|
|
64
|
+
let started = false;
|
|
65
|
+
for (const line of lines) {
|
|
66
|
+
let e;
|
|
67
|
+
try { e = JSON.parse(line); } catch { if (!started) { legacyPrefix.push(line); } continue; }
|
|
68
|
+
const isChained = e._chain && typeof e._chain.hash === 'string';
|
|
69
|
+
if (!started && !isChained) { legacyPrefix.push(line); continue; }
|
|
70
|
+
started = true;
|
|
71
|
+
const rec = { ...e };
|
|
72
|
+
delete rec._chain;
|
|
73
|
+
records.push(rec);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// Canonical deterministic order: capturedAt asc, then a stable content hash.
|
|
77
|
+
records.sort((a, b) => {
|
|
78
|
+
const ta = String(a.capturedAt || ''), tb = String(b.capturedAt || '');
|
|
79
|
+
if (ta !== tb) return ta < tb ? -1 : 1;
|
|
80
|
+
const ha = crypto.createHash('sha256').update(canon(a)).digest('hex');
|
|
81
|
+
const hb = crypto.createHash('sha256').update(canon(b)).digest('hex');
|
|
82
|
+
return ha < hb ? -1 : ha > hb ? 1 : 0;
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
// Re-chain from genesis.
|
|
86
|
+
const out = [...legacyPrefix];
|
|
87
|
+
let prev = GENESIS;
|
|
88
|
+
let seq = 0;
|
|
89
|
+
for (const rec of records) {
|
|
90
|
+
const h = hashLink(prev, rec);
|
|
91
|
+
out.push(JSON.stringify({ ...rec, _chain: { seq, prevHash: prev, hash: h } }));
|
|
92
|
+
prev = h; seq += 1;
|
|
93
|
+
}
|
|
94
|
+
// Append an in-chain repair marker so the re-linearization is itself auditable.
|
|
95
|
+
const marker = {
|
|
96
|
+
command: '(chain-repair marker)',
|
|
97
|
+
observedResult: `re-linearized ${records.length} entries from concurrent fork`,
|
|
98
|
+
exitCode: 0,
|
|
99
|
+
capturedAt: new Date().toISOString(),
|
|
100
|
+
source: 'chain-repair',
|
|
101
|
+
repair: { reason, entries: records.length, forkAt: verdict.forkAt },
|
|
102
|
+
};
|
|
103
|
+
const mh = hashLink(prev, marker);
|
|
104
|
+
out.push(JSON.stringify({ ...marker, _chain: { seq, prevHash: prev, hash: mh } }));
|
|
105
|
+
|
|
106
|
+
fs.copyFileSync(file, file + '.prebackup-repair');
|
|
107
|
+
fs.writeFileSync(file, out.join('\n') + '\n');
|
|
108
|
+
|
|
109
|
+
const after = gate.verifyCommandLogChain(dir);
|
|
110
|
+
console.log(`repaired: re-linearized ${records.length} entries (legacy prefix: ${legacyPrefix.length}); ` +
|
|
111
|
+
`chain status now "${after.status}". backup: command-log.jsonl.prebackup-repair`);
|
|
112
|
+
if (after.status !== 'ok') { console.error('repair did not produce a clean chain'); process.exit(1); }
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
main();
|
|
@@ -38,6 +38,11 @@ CONSOLE_TELEMETRY_URL="${CONSOLE_TELEMETRY_URL:-${CONSOLE_URL:-}}"
|
|
|
38
38
|
CONSOLE_TELEMETRY_ENDPOINT_URL="${CONSOLE_TELEMETRY_ENDPOINT_URL:-}"
|
|
39
39
|
CONSOLE_TELEMETRY_TOKEN="${CONSOLE_TELEMETRY_TOKEN:-${CONSOLE_AUTH_TOKEN:-}}"
|
|
40
40
|
CONSOLE_TENANT_ID="${CONSOLE_TENANT_ID:-}"
|
|
41
|
+
# Pricing registry source (consumed by lib/pricing.sh). Explicit file/URL win;
|
|
42
|
+
# otherwise the URL is derived from the console below so all runtimes read one
|
|
43
|
+
# live pricing source. Falls back to the bundled pricing.json offline.
|
|
44
|
+
TELEMETRY_PRICING_FILE="${TELEMETRY_PRICING_FILE:-${FLOW_AGENTS_PRICING_FILE:-}}"
|
|
45
|
+
TELEMETRY_PRICING_URL="${TELEMETRY_PRICING_URL:-${FLOW_AGENTS_PRICING_URL:-}}"
|
|
41
46
|
|
|
42
47
|
# Load config file if it exists
|
|
43
48
|
if [[ -f "$TELEMETRY_CONFIG_FILE" ]]; then
|
|
@@ -78,6 +83,9 @@ if [[ -f "$TELEMETRY_CONFIG_FILE" ]]; then
|
|
|
78
83
|
console_telemetry_token) CONSOLE_TELEMETRY_TOKEN="$value" ;;
|
|
79
84
|
console_tenant_id) CONSOLE_TENANT_ID="$value" ;;
|
|
80
85
|
console_telemetry_redact) CONSOLE_TELEMETRY_REDACT="$value" ;;
|
|
86
|
+
console_pricing_url) TELEMETRY_PRICING_URL="$value" ;;
|
|
87
|
+
pricing_url) TELEMETRY_PRICING_URL="$value" ;;
|
|
88
|
+
pricing_file) TELEMETRY_PRICING_FILE="$value" ;;
|
|
81
89
|
esac
|
|
82
90
|
fi
|
|
83
91
|
done < "$TELEMETRY_CONFIG_FILE"
|
|
@@ -85,5 +93,12 @@ fi
|
|
|
85
93
|
|
|
86
94
|
CONSOLE_TELEMETRY_REDACT="${CONSOLE_TELEMETRY_REDACT:-${TELEMETRY_CHANNEL_ANALYTICS_REDACT}}"
|
|
87
95
|
|
|
96
|
+
# Derive the live pricing source from the console when not set explicitly, the
|
|
97
|
+
# same way the transport derives /api/telemetry/records. One live source for
|
|
98
|
+
# bash/Python/TS runtimes; lib/pricing.sh caches it and falls back to bundled.
|
|
99
|
+
if [[ -z "${TELEMETRY_PRICING_URL:-}" && -n "${CONSOLE_TELEMETRY_URL:-}" ]]; then
|
|
100
|
+
TELEMETRY_PRICING_URL="${CONSOLE_TELEMETRY_URL%/}/api/telemetry/pricing"
|
|
101
|
+
fi
|
|
102
|
+
|
|
88
103
|
# Ensure directories exist
|
|
89
104
|
mkdir -p "$TELEMETRY_DATA_DIR" "$TELEMETRY_SESSION_DIR" 2>/dev/null
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# pricing.sh — single-source pricing registry loader.
|
|
3
|
+
#
|
|
4
|
+
# Resolves the versioned pricing registry (pricing.json) from, in priority:
|
|
5
|
+
# 1. explicit local file TELEMETRY_PRICING_FILE / FLOW_AGENTS_PRICING_FILE
|
|
6
|
+
# 2. remote URL (cached) TELEMETRY_PRICING_URL / FLOW_AGENTS_PRICING_URL
|
|
7
|
+
# 3. bundled snapshot <telemetry>/pricing.json
|
|
8
|
+
# This is the one source every runtime + the console read from — local for
|
|
9
|
+
# air-gapped use, remote for a single live registry shared across machines.
|
|
10
|
+
|
|
11
|
+
PRICING_LIB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
12
|
+
|
|
13
|
+
# Echo the raw registry JSON. Returns non-zero if nothing resolvable.
|
|
14
|
+
pricing_registry() {
|
|
15
|
+
local f="${TELEMETRY_PRICING_FILE:-${FLOW_AGENTS_PRICING_FILE:-}}"
|
|
16
|
+
if [[ -n "$f" && -f "$f" ]]; then cat "$f"; return 0; fi
|
|
17
|
+
|
|
18
|
+
local url="${TELEMETRY_PRICING_URL:-${FLOW_AGENTS_PRICING_URL:-}}"
|
|
19
|
+
if [[ -n "$url" ]] && command -v curl >/dev/null 2>&1; then
|
|
20
|
+
local cache="${TMPDIR:-/tmp}/flow-agents-pricing-cache.json"
|
|
21
|
+
local ttl="${TELEMETRY_PRICING_TTL_SEC:-3600}"
|
|
22
|
+
if [[ -f "$cache" ]]; then
|
|
23
|
+
local mtime now age
|
|
24
|
+
mtime=$(stat -f %m "$cache" 2>/dev/null || stat -c %Y "$cache" 2>/dev/null || echo 0)
|
|
25
|
+
now=$(date +%s)
|
|
26
|
+
age=$(( now - mtime ))
|
|
27
|
+
if [[ "$age" -lt "$ttl" ]]; then cat "$cache"; return 0; fi
|
|
28
|
+
fi
|
|
29
|
+
if curl -fsS --max-time 5 "$url" -o "${cache}.tmp" 2>/dev/null && [[ -s "${cache}.tmp" ]]; then
|
|
30
|
+
mv "${cache}.tmp" "$cache"
|
|
31
|
+
cat "$cache"
|
|
32
|
+
return 0
|
|
33
|
+
fi
|
|
34
|
+
rm -f "${cache}.tmp" 2>/dev/null
|
|
35
|
+
[[ -f "$cache" ]] && { cat "$cache"; return 0; } # stale cache beats nothing
|
|
36
|
+
fi
|
|
37
|
+
|
|
38
|
+
local bundled
|
|
39
|
+
bundled="$(cd "${PRICING_LIB_DIR}/.." && pwd)/pricing.json"
|
|
40
|
+
[[ -f "$bundled" ]] && { cat "$bundled"; return 0; }
|
|
41
|
+
return 1
|
|
42
|
+
}
|
|
@@ -1,6 +1,12 @@
|
|
|
1
1
|
#!/usr/bin/env bash
|
|
2
2
|
# usage.sh — Session usage metric functions
|
|
3
3
|
|
|
4
|
+
# Module directory, resolved once at source time (cwd-independent).
|
|
5
|
+
USAGE_LIB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
6
|
+
|
|
7
|
+
# Single-source pricing registry loader (local / remote / bundled).
|
|
8
|
+
source "${USAGE_LIB_DIR}/pricing.sh"
|
|
9
|
+
|
|
4
10
|
# Resolve model from agent-spec.json
|
|
5
11
|
usage_get_model() {
|
|
6
12
|
local agent_name="$1"
|
|
@@ -27,3 +33,105 @@ usage_count_delegations() {
|
|
|
27
33
|
[[ ! -f "$jsonl_path" ]] && echo 0 && return
|
|
28
34
|
grep -c "\"session_id\":\"${session_id}\".*\"event_type\":\"agent.delegate\"" "$jsonl_path" 2>/dev/null || echo 0
|
|
29
35
|
}
|
|
36
|
+
|
|
37
|
+
# Parse a runtime transcript (JSONL) into real per-model token + cost usage.
|
|
38
|
+
# Ground truth lives in each assistant message's `.message.usage` block:
|
|
39
|
+
# input_tokens (uncached), output_tokens, cache_creation_input_tokens,
|
|
40
|
+
# cache_read_input_tokens — plus `.message.model`.
|
|
41
|
+
# Cost is derived from the versioned pricing registry: cache writes bill at
|
|
42
|
+
# input*write_5m, cache reads at input*read. Cost uses the registry's
|
|
43
|
+
# current_version (override with arg $2) and the result stamps `pricing_version`
|
|
44
|
+
# so the console can reproduce or recompute it. Emits a compact JSON object:
|
|
45
|
+
# { by_model: [ {model, input_tokens, output_tokens,
|
|
46
|
+
# cache_creation_input_tokens, cache_read_input_tokens,
|
|
47
|
+
# estimated_cost_usd} ],
|
|
48
|
+
# input_tokens, output_tokens, cache_creation_input_tokens,
|
|
49
|
+
# cache_read_input_tokens, estimated_cost_usd, pricing_version }
|
|
50
|
+
# Prints nothing (non-zero) when the transcript is missing/unparseable so the
|
|
51
|
+
# caller can fall back to null usage. Never blocks agent work.
|
|
52
|
+
# Expected transcript usage path (Claude Code / Anthropic usage object). Bumped
|
|
53
|
+
# if the on-disk schema changes so drift is logged rather than silently zeroed.
|
|
54
|
+
USAGE_TRANSCRIPT_SCHEMA="message.usage.input_tokens"
|
|
55
|
+
|
|
56
|
+
# Append a one-line schema-drift warning (transcript carried usage data we could
|
|
57
|
+
# not parse). Goes to TELEMETRY_DRIFT_LOG if set, else stderr. Never fatal.
|
|
58
|
+
usage_log_drift() {
|
|
59
|
+
local transcript="$1"
|
|
60
|
+
local msg="[telemetry] pricing/usage drift: ${transcript} has usage data but expected path '${USAGE_TRANSCRIPT_SCHEMA}' parsed 0 tokens — transcript schema may have changed"
|
|
61
|
+
if [[ -n "${TELEMETRY_DRIFT_LOG:-}" ]]; then
|
|
62
|
+
echo "$msg" >> "${TELEMETRY_DRIFT_LOG}" 2>/dev/null || echo "$msg" >&2
|
|
63
|
+
else
|
|
64
|
+
echo "$msg" >&2
|
|
65
|
+
fi
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
usage_parse_transcript() {
|
|
69
|
+
local transcript="$1" version="${2:-}"
|
|
70
|
+
[[ -z "$transcript" || ! -f "$transcript" ]] && return 1
|
|
71
|
+
command -v jq >/dev/null 2>&1 || return 1
|
|
72
|
+
local registry
|
|
73
|
+
registry="$(pricing_registry)" || return 1
|
|
74
|
+
[[ -z "$registry" ]] && return 1
|
|
75
|
+
|
|
76
|
+
local out
|
|
77
|
+
out="$(jq -n --argjson registry "$registry" --arg version "$version" '
|
|
78
|
+
$registry as $reg
|
|
79
|
+
| (if $version == "" then ($reg.current_version) else $version end) as $ver
|
|
80
|
+
| ($reg.versions[$ver]) as $p
|
|
81
|
+
| if $p == null then empty else . end
|
|
82
|
+
| ($p.cache_multipliers) as $cm
|
|
83
|
+
| (reduce inputs as $l ({};
|
|
84
|
+
($l.message.usage) as $u
|
|
85
|
+
| if $u then
|
|
86
|
+
(($l.message.model) // "unknown") as $m
|
|
87
|
+
| .[$m].input = ((.[$m].input // 0) + (($u.input_tokens) // 0))
|
|
88
|
+
| .[$m].output = ((.[$m].output // 0) + (($u.output_tokens) // 0))
|
|
89
|
+
| .[$m].cache_creation = ((.[$m].cache_creation // 0) + (($u.cache_creation_input_tokens) // 0))
|
|
90
|
+
| .[$m].cache_read = ((.[$m].cache_read // 0) + (($u.cache_read_input_tokens) // 0))
|
|
91
|
+
else . end)) as $agg
|
|
92
|
+
| ($agg | to_entries
|
|
93
|
+
| map(
|
|
94
|
+
.key as $m | .value as $u
|
|
95
|
+
| (($p.models[$m]) // $p.default) as $rate
|
|
96
|
+
| (if ([$m] | inside($p.zero_cost_models)) then 0 else 1 end) as $billable
|
|
97
|
+
| {
|
|
98
|
+
model: $m,
|
|
99
|
+
input_tokens: ($u.input // 0),
|
|
100
|
+
output_tokens: ($u.output // 0),
|
|
101
|
+
cache_creation_input_tokens: ($u.cache_creation // 0),
|
|
102
|
+
cache_read_input_tokens: ($u.cache_read // 0),
|
|
103
|
+
estimated_cost_usd: (
|
|
104
|
+
$billable * (
|
|
105
|
+
($u.input // 0) * $rate.input
|
|
106
|
+
+ ($u.output // 0) * $rate.output
|
|
107
|
+
+ ($u.cache_creation // 0) * $rate.input * $cm.write_5m
|
|
108
|
+
+ ($u.cache_read // 0) * $rate.input * $cm.read
|
|
109
|
+
) / 1000000
|
|
110
|
+
)
|
|
111
|
+
})) as $by_model
|
|
112
|
+
| {
|
|
113
|
+
by_model: $by_model,
|
|
114
|
+
input_tokens: ([$by_model[].input_tokens] | add // 0),
|
|
115
|
+
output_tokens: ([$by_model[].output_tokens] | add // 0),
|
|
116
|
+
cache_creation_input_tokens: ([$by_model[].cache_creation_input_tokens] | add // 0),
|
|
117
|
+
cache_read_input_tokens: ([$by_model[].cache_read_input_tokens] | add // 0),
|
|
118
|
+
estimated_cost_usd: (([$by_model[].estimated_cost_usd] | add // 0) * 1000000 | round / 1000000),
|
|
119
|
+
pricing_version: $ver
|
|
120
|
+
}
|
|
121
|
+
' < "$transcript" 2>/dev/null)"
|
|
122
|
+
|
|
123
|
+
[[ -z "$out" ]] && return 1
|
|
124
|
+
|
|
125
|
+
# Drift / emptiness check: if we parsed zero tokens but the transcript clearly
|
|
126
|
+
# contains usage data, the schema drifted — warn and fall back to null usage.
|
|
127
|
+
local total
|
|
128
|
+
total="$(printf '%s' "$out" | jq -r '((.input_tokens // 0) + (.output_tokens // 0) + (.cache_creation_input_tokens // 0) + (.cache_read_input_tokens // 0))' 2>/dev/null)"
|
|
129
|
+
if [[ -z "$total" || "$total" == "0" ]]; then
|
|
130
|
+
if grep -q '"input_tokens"' "$transcript" 2>/dev/null; then
|
|
131
|
+
usage_log_drift "$transcript"
|
|
132
|
+
fi
|
|
133
|
+
return 1
|
|
134
|
+
fi
|
|
135
|
+
|
|
136
|
+
printf '%s\n' "$out"
|
|
137
|
+
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
{
|
|
2
|
+
"_note": "Cross-runtime cost golden vectors. Keep IN SYNC with console-telemetry/test/golden-vectors.json (identical content). Asserted by the flow-agents bash usage tests, the Python sink tests, and the console-telemetry package so every runtime that prices tokens produces the SAME cost. If these drift between repos, a runtime's cost math has diverged.",
|
|
3
|
+
"pricing_version": "2026-06-28",
|
|
4
|
+
"cases": [
|
|
5
|
+
{ "name": "opus cache-read-dominated", "model": "claude-opus-4-8", "tokens": { "input": 1000, "output": 2000, "cache_creation": 0, "cache_read": 500000 }, "expected_cost_usd": 0.305 },
|
|
6
|
+
{ "name": "opus output only", "model": "claude-opus-4-8", "tokens": { "input": 0, "output": 1000, "cache_creation": 0, "cache_read": 0 }, "expected_cost_usd": 0.025 },
|
|
7
|
+
{ "name": "fable output", "model": "claude-fable-5", "tokens": { "input": 0, "output": 100, "cache_creation": 0, "cache_read": 0 }, "expected_cost_usd": 0.005 },
|
|
8
|
+
{ "name": "haiku output", "model": "claude-haiku-4-5", "tokens": { "input": 0, "output": 1000, "cache_creation": 0, "cache_read": 0 }, "expected_cost_usd": 0.005 },
|
|
9
|
+
{ "name": "sonnet input 1M", "model": "claude-sonnet-4-6", "tokens": { "input": 1000000, "output": 0, "cache_creation": 0, "cache_read": 0 }, "expected_cost_usd": 3.0 },
|
|
10
|
+
{ "name": "opus cache-write 5m tier", "model": "claude-opus-4-8", "tokens": { "input": 0, "output": 0, "cache_creation": 1000000, "cache_read": 0 }, "expected_cost_usd": 6.25 },
|
|
11
|
+
{ "name": "opus billion-scale", "model": "claude-opus-4-8", "tokens": { "input": 200000, "output": 1600000, "cache_creation": 9000000, "cache_read": 1000000000 }, "expected_cost_usd": 597.25 },
|
|
12
|
+
{ "name": "synthetic is free", "model": "<synthetic>", "tokens": { "input": 999, "output": 999, "cache_creation": 999, "cache_read": 999 }, "expected_cost_usd": 0 },
|
|
13
|
+
{ "name": "unknown model uses default rate", "model": "some-unlisted-model", "tokens": { "input": 1000000, "output": 0, "cache_creation": 0, "cache_read": 0 }, "expected_cost_usd": 5.0 }
|
|
14
|
+
]
|
|
15
|
+
}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
{
|
|
2
|
+
"schema_version": "2.0",
|
|
3
|
+
"current_version": "2026-06-28",
|
|
4
|
+
"source": "Anthropic public list pricing; cache multipliers per prompt-caching docs",
|
|
5
|
+
"versions": {
|
|
6
|
+
"2026-06-28": {
|
|
7
|
+
"effective_date": "2026-06-28",
|
|
8
|
+
"currency": "USD",
|
|
9
|
+
"unit": "per_1m_tokens",
|
|
10
|
+
"cache_multipliers": {
|
|
11
|
+
"write_5m": 1.25,
|
|
12
|
+
"write_1h": 2.0,
|
|
13
|
+
"read": 0.1
|
|
14
|
+
},
|
|
15
|
+
"models": {
|
|
16
|
+
"claude-fable-5": { "input": 10.0, "output": 50.0 },
|
|
17
|
+
"claude-mythos-5": { "input": 10.0, "output": 50.0 },
|
|
18
|
+
"claude-opus-4-8": { "input": 5.0, "output": 25.0 },
|
|
19
|
+
"claude-opus-4-7": { "input": 5.0, "output": 25.0 },
|
|
20
|
+
"claude-opus-4-6": { "input": 5.0, "output": 25.0 },
|
|
21
|
+
"claude-opus-4-5": { "input": 5.0, "output": 25.0 },
|
|
22
|
+
"claude-opus-4-1": { "input": 15.0, "output": 75.0 },
|
|
23
|
+
"claude-sonnet-4-6": { "input": 3.0, "output": 15.0 },
|
|
24
|
+
"claude-sonnet-4-5": { "input": 3.0, "output": 15.0 },
|
|
25
|
+
"claude-haiku-4-5": { "input": 1.0, "output": 5.0 }
|
|
26
|
+
},
|
|
27
|
+
"default": { "input": 5.0, "output": 25.0 },
|
|
28
|
+
"zero_cost_models": ["<synthetic>", "synthetic", "unknown", ""]
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
}
|
|
@@ -8,6 +8,10 @@ channel.analytics.redact=tool.input,tool.output,turn.prompt_text,delegation.targ
|
|
|
8
8
|
# The transport derives /api/telemetry/records from console_telemetry_url.
|
|
9
9
|
# console_telemetry_token=
|
|
10
10
|
# console_tenant_id=
|
|
11
|
+
# Live pricing registry source. If unset, derived from console_telemetry_url as
|
|
12
|
+
# <console>/api/telemetry/pricing so bash/Python/TS runtimes read one live
|
|
13
|
+
# source; lib/pricing.sh caches it and falls back to bundled pricing.json.
|
|
14
|
+
# console_pricing_url=https://console.kontourai.io/api/telemetry/pricing
|
|
11
15
|
enrich_system=true
|
|
12
16
|
enrich_workspace=true
|
|
13
17
|
enrich_auth=true
|
|
@@ -309,13 +309,35 @@ add_stop_data_and_emit_usage() {
|
|
|
309
309
|
tool_count=$(usage_count_tool_calls "$session_id" "$full_log")
|
|
310
310
|
delegation_count=$(usage_count_delegations "$session_id" "$full_log")
|
|
311
311
|
|
|
312
|
+
# Ground-truth token + cost usage from the runtime transcript, when the
|
|
313
|
+
# runtime exposes one (Claude Code, Codex, etc. set hook.transcript_path).
|
|
314
|
+
# Tokens are source-of-truth; estimated_cost_usd is derived from pricing.json
|
|
315
|
+
# (recomputed authoritatively console-side, so pricing updates are retroactive).
|
|
316
|
+
local transcript_path transcript_usage
|
|
317
|
+
transcript_path=$(echo "$event" | jq -r '.hook.transcript_path // ""')
|
|
318
|
+
transcript_usage=$(usage_parse_transcript "$transcript_path")
|
|
319
|
+
[[ -z "$transcript_usage" ]] && transcript_usage='null'
|
|
320
|
+
|
|
312
321
|
local usage_event
|
|
313
322
|
usage_event=$(echo "$event" | jq -c \
|
|
314
323
|
--arg m "$model" \
|
|
315
324
|
--argjson tc "$tool_count" \
|
|
316
325
|
--argjson dc "$delegation_count" \
|
|
326
|
+
--argjson tu "$transcript_usage" \
|
|
317
327
|
'.event_type = "session.usage" | .event_id = (.event_id + "-usage") | . + {
|
|
318
|
-
usage: {
|
|
328
|
+
usage: ({
|
|
329
|
+
model: $m,
|
|
330
|
+
duration_s: .session.duration_s,
|
|
331
|
+
tool_invocations: $tc,
|
|
332
|
+
delegations: $dc,
|
|
333
|
+
input_tokens: ($tu.input_tokens // null),
|
|
334
|
+
output_tokens: ($tu.output_tokens // null),
|
|
335
|
+
cache_creation_input_tokens: ($tu.cache_creation_input_tokens // null),
|
|
336
|
+
cache_read_input_tokens: ($tu.cache_read_input_tokens // null),
|
|
337
|
+
estimated_cost_usd: ($tu.estimated_cost_usd // null),
|
|
338
|
+
pricing_version: ($tu.pricing_version // null),
|
|
339
|
+
by_model: ($tu.by_model // null)
|
|
340
|
+
})
|
|
319
341
|
}')
|
|
320
342
|
transport_emit "$usage_event"
|
|
321
343
|
fi
|