@kontourai/flow-agents 2.0.1 → 2.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/actions/trust-verify/action.yml +4 -2
- package/.github/workflows/ci.yml +16 -4
- package/.github/workflows/docs-pages.yml +1 -1
- package/.github/workflows/kit-gates-demo.yml +2 -2
- package/.github/workflows/publish-npm.yml +2 -2
- package/.github/workflows/runtime-compat.yml +2 -2
- package/.github/workflows/trust-reconcile.yml +1 -1
- package/CHANGELOG.md +28 -0
- package/README.md +3 -3
- package/build/src/cli/workflow-sidecar.js +8 -2
- package/context/scripts/telemetry/lib/config.sh +15 -0
- package/context/scripts/telemetry/telemetry.conf +4 -0
- package/context/scripts/telemetry/telemetry.sh +23 -1
- package/docs/design/flowrun-eventsourcing-design.md +216 -0
- package/docs/design/workflowrun-observability-design.md +431 -0
- package/evals/ci/antigaming-suite.sh +1 -0
- package/evals/ci/run-baseline.sh +2 -0
- package/evals/integration/test_command_log_concurrency.sh +114 -0
- package/evals/integration/test_gate_lockdown.sh +21 -6
- package/evals/integration/test_usage_cost.sh +119 -0
- package/evals/integration/test_verify_cli.sh +23 -0
- package/integrations/strands/flow_agents_strands/hooks.py +126 -1
- package/integrations/strands/flow_agents_strands/telemetry.py +172 -0
- package/integrations/strands/tests/test_usage.py +129 -0
- package/integrations/strands-ts/src/hooks.ts +135 -1
- package/integrations/strands-ts/src/telemetry.ts +170 -0
- package/integrations/strands-ts/test/test-usage.ts +85 -0
- package/package.json +2 -2
- package/scripts/ci/trust-reconcile.js +7 -23
- package/scripts/hooks/evidence-capture.js +85 -50
- package/scripts/hooks/stop-goal-fit.js +18 -45
- package/scripts/lib/command-log-chain.js +73 -0
- package/scripts/repair-command-log.js +8 -15
- package/scripts/telemetry/lib/config.sh +15 -0
- package/scripts/telemetry/lib/pricing.sh +42 -0
- package/scripts/telemetry/lib/usage.sh +108 -0
- package/scripts/telemetry/pricing.golden.json +15 -0
- package/scripts/telemetry/pricing.json +31 -0
- package/scripts/telemetry/telemetry.conf +4 -0
- package/scripts/telemetry/telemetry.sh +23 -1
- package/src/cli/workflow-sidecar.ts +8 -2
|
@@ -29,6 +29,16 @@ const path = require('path');
|
|
|
29
29
|
const { spawnSync } = require('child_process');
|
|
30
30
|
const crypto = require('crypto');
|
|
31
31
|
|
|
32
|
+
// Hash-chain primitives + the exit-code-laundering heuristic come from ONE shared
|
|
33
|
+
// module, so this verifier can never drift from the writer (evidence-capture.js).
|
|
34
|
+
// CHAIN_GENESIS is re-aliased to CHAIN_GENESIS_VERIFY to preserve the long-standing
|
|
35
|
+
// export name consumed by repair-command-log.js and the fork-classification eval.
|
|
36
|
+
const {
|
|
37
|
+
CHAIN_GENESIS: CHAIN_GENESIS_VERIFY,
|
|
38
|
+
canonicalJsonForChain,
|
|
39
|
+
hasLaunderingOperator,
|
|
40
|
+
} = require('../lib/command-log-chain.js');
|
|
41
|
+
|
|
32
42
|
const MAX_STDIN = 1024 * 1024;
|
|
33
43
|
const ACTIVE_STATUSES = new Set([
|
|
34
44
|
'planning',
|
|
@@ -733,36 +743,10 @@ function claimAcknowledgesFailure(status, value) {
|
|
|
733
743
|
|| v === 'fail' || v === 'failed' || v === 'not_verified' || v === 'failing';
|
|
734
744
|
}
|
|
735
745
|
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
*
|
|
741
|
-
* R6 extended logic (identical patterns used by scripts/ci/trust-reconcile.js — centralize
|
|
742
|
-
* as a follow-up if drift becomes a maintenance concern):
|
|
743
|
-
* - ANY || operator is flagged. A legitimate verification command never needs || — its
|
|
744
|
-
* only purpose in a verification command is to mask the real exit code (e.g.
|
|
745
|
-
* `npm test || exit 0`, `npm test || echo ok`, `npm test || /bin/true`, `npm test || (exit 0)`).
|
|
746
|
-
* - | true (single pipe into true — always exits 0)
|
|
747
|
-
* - Trailing ; or newline followed by: true : exit 0 /bin/true
|
|
748
|
-
*
|
|
749
|
-
* Fix D: applied in captureCrossReference's satisfied path and capturedFailReconciliation.
|
|
750
|
-
*/
|
|
751
|
-
function hasLaunderingOperator(cmd) {
|
|
752
|
-
// ANY || in a claimed verification command is an exit-code mask.
|
|
753
|
-
// Legitimate verification commands never need || — its only purpose there is to
|
|
754
|
-
// suppress the real exit code (|| exit 0, || echo ok, || /bin/true, || (exit 0), etc.).
|
|
755
|
-
if (/\|\|/.test(cmd)) return true;
|
|
756
|
-
// | true — single-pipe into true: `cmd | true` always exits 0 regardless of left-side exit code.
|
|
757
|
-
if (/\|\s*true\b/.test(cmd)) return true;
|
|
758
|
-
// Trailing ; or \n followed by exit-neutralizing commands (same threat, appended after the real cmd):
|
|
759
|
-
// ; true ; : ; exit 0 ; /bin/true (and \n variants)
|
|
760
|
-
if (/[;\n]\s*true\b/.test(cmd)) return true;
|
|
761
|
-
if (/[;\n]\s*:\s*(?:$|\s|;)/.test(cmd)) return true;
|
|
762
|
-
if (/[;\n]\s*exit\s+0\b/.test(cmd)) return true;
|
|
763
|
-
if (/[;\n]\s*\/bin\/true\b/.test(cmd)) return true;
|
|
764
|
-
return false;
|
|
765
|
-
}
|
|
746
|
+
// hasLaunderingOperator (the exit-code-mask heuristic) is imported from
|
|
747
|
+
// ../lib/command-log-chain.js so this verifier and scripts/ci/trust-reconcile.js
|
|
748
|
+
// share one normative definition. Applied in captureCrossReference's satisfied
|
|
749
|
+
// path and capturedFailReconciliation.
|
|
766
750
|
|
|
767
751
|
// ─── Hash-chain integrity verification (Increment B2, tamper-EVIDENCE) ────────
|
|
768
752
|
//
|
|
@@ -786,20 +770,9 @@ function hasLaunderingOperator(cmd) {
|
|
|
786
770
|
// The genesis prevHash is a fixed arbitrary sentinel — NOT the SHA256 of any
|
|
787
771
|
// specific input string. The comment in evidence-capture.js previously (and
|
|
788
772
|
// incorrectly) claimed it was sha256("flow-agents:command-log:genesis"); it is not.
|
|
789
|
-
//
|
|
790
|
-
//
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
/**
|
|
794
|
-
* Canonical JSON for chain verification: record WITHOUT `_chain`, keys sorted.
|
|
795
|
-
* Must be byte-identical to canonicalJsonForChain() in evidence-capture.js.
|
|
796
|
-
*/
|
|
797
|
-
function canonicalJsonForVerify(record) {
|
|
798
|
-
const keys = Object.keys(record).filter(k => k !== '_chain').sort();
|
|
799
|
-
const obj = {};
|
|
800
|
-
for (const k of keys) obj[k] = record[k];
|
|
801
|
-
return JSON.stringify(obj);
|
|
802
|
-
}
|
|
773
|
+
// Both the genesis (CHAIN_GENESIS_VERIFY, imported above) and the canonical-JSON
|
|
774
|
+
// helper (canonicalJsonForChain) come from ../lib/command-log-chain.js, the single
|
|
775
|
+
// source the writer in evidence-capture.js imports too — so they cannot diverge.
|
|
803
776
|
|
|
804
777
|
/**
|
|
805
778
|
* Verify the hash chain of command-log.jsonl.
|
|
@@ -870,7 +843,7 @@ function verifyCommandLogChain(artifactDir) {
|
|
|
870
843
|
// (a) Self-consistency. A content edit without rehashing fails here.
|
|
871
844
|
if (typeof chain.prevHash !== 'string') return { status: 'broken', brokenAt: i, forkAt: null };
|
|
872
845
|
const selfHash = crypto.createHash('sha256')
|
|
873
|
-
.update(chain.prevHash +
|
|
846
|
+
.update(chain.prevHash + canonicalJsonForChain(entry), 'utf8')
|
|
874
847
|
.digest('hex');
|
|
875
848
|
if (chain.hash !== selfHash) return { status: 'broken', brokenAt: i, forkAt: null };
|
|
876
849
|
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
//
|
|
3
|
+
// Single normative source for the command-log hash-chain primitives and the
|
|
4
|
+
// exit-code-laundering heuristic.
|
|
5
|
+
//
|
|
6
|
+
// These were previously copy-pasted across the writer (hooks/evidence-capture.js),
|
|
7
|
+
// the verifier (hooks/stop-goal-fit.js), the repair tool (repair-command-log.js),
|
|
8
|
+
// and CI reconcile (ci/trust-reconcile.js) under "keep byte-identical" comments —
|
|
9
|
+
// the most security-sensitive path in the bundle, since the chain's integrity
|
|
10
|
+
// claim rests on writer and verifier canonicalizing identically. The copies had
|
|
11
|
+
// ALREADY drifted (ci/trust-reconcile's hasLaunderingOperator was missing the
|
|
12
|
+
// trailing `/bin/true` check), which is exactly the failure mode duplication
|
|
13
|
+
// invites. Importing from one module makes that divergence structurally impossible.
|
|
14
|
+
//
|
|
15
|
+
const crypto = require('crypto');
|
|
16
|
+
|
|
17
|
+
// The genesis prevHash is a FIXED ARBITRARY SENTINEL — NOT the SHA256 of any
|
|
18
|
+
// specific input string. (An earlier comment incorrectly claimed it was
|
|
19
|
+
// sha256("flow-agents:command-log:genesis"); that is wrong.) Writer and verifier
|
|
20
|
+
// MUST share this exact value — existing chained logs depend on it.
|
|
21
|
+
//
|
|
22
|
+
// HONEST FRAMING: this makes alteration DETECTABLE, not impossible. An agent that
|
|
23
|
+
// rewrites all hashes can still forge the chain. The real tamper-proof boundary is
|
|
24
|
+
// the signed checkpoint (B1). We do not oversell this boundary.
|
|
25
|
+
const CHAIN_GENESIS = 'a3f9e2b7d5c84f1e6a0d2c3b9f7e1a4d8c6b5f2e9a0d3c7b1f4e8a2d6c0b9f3';
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Stable canonical JSON for a chain link: the record WITHOUT its `_chain` field,
|
|
29
|
+
* keys sorted alphabetically. This makes the hash independent of key insertion
|
|
30
|
+
* order and keeps `_chain` from contributing to its own hash.
|
|
31
|
+
*/
|
|
32
|
+
function canonicalJsonForChain(record) {
|
|
33
|
+
const keys = Object.keys(record).filter((k) => k !== '_chain').sort();
|
|
34
|
+
const obj = {};
|
|
35
|
+
for (const k of keys) obj[k] = record[k];
|
|
36
|
+
return JSON.stringify(obj);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/** Chain link hash: sha256(prevHash + canonicalJsonForChain(record)), hex. */
|
|
40
|
+
function computeChainHash(prevHash, record) {
|
|
41
|
+
return crypto
|
|
42
|
+
.createHash('sha256')
|
|
43
|
+
.update(prevHash + canonicalJsonForChain(record), 'utf8')
|
|
44
|
+
.digest('hex');
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* True when a claimed verification command contains an exit-code-laundering
|
|
49
|
+
* operator. Legitimate verification commands never need these — their only
|
|
50
|
+
* purpose is to suppress a real non-zero exit:
|
|
51
|
+
* - ANY `||` (e.g. `npm test || exit 0`, `|| echo ok`, `|| /bin/true`)
|
|
52
|
+
* - `| true` (pipe into true — the pipeline absorbs the exit code)
|
|
53
|
+
* - trailing `; true` / `; :` / `; exit 0` / `; /bin/true` (and `\n` variants)
|
|
54
|
+
*/
|
|
55
|
+
function hasLaunderingOperator(cmd) {
|
|
56
|
+
// ANY || in a claimed verification command is an exit-code mask.
|
|
57
|
+
if (/\|\|/.test(cmd)) return true;
|
|
58
|
+
// | true — single-pipe into true always exits 0 regardless of the left side.
|
|
59
|
+
if (/\|\s*true\b/.test(cmd)) return true;
|
|
60
|
+
// Trailing ; or \n followed by an exit-neutralizing command:
|
|
61
|
+
if (/[;\n]\s*true\b/.test(cmd)) return true;
|
|
62
|
+
if (/[;\n]\s*:\s*(?:$|\s|;)/.test(cmd)) return true;
|
|
63
|
+
if (/[;\n]\s*exit\s+0\b/.test(cmd)) return true;
|
|
64
|
+
if (/[;\n]\s*\/bin\/true\b/.test(cmd)) return true;
|
|
65
|
+
return false;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
module.exports = {
|
|
69
|
+
CHAIN_GENESIS,
|
|
70
|
+
canonicalJsonForChain,
|
|
71
|
+
computeChainHash,
|
|
72
|
+
hasLaunderingOperator,
|
|
73
|
+
};
|
|
@@ -26,17 +26,10 @@ const path = require('path');
|
|
|
26
26
|
const crypto = require('crypto');
|
|
27
27
|
|
|
28
28
|
const gate = require(path.join(__dirname, 'hooks', 'stop-goal-fit.js'));
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
const obj = {};
|
|
34
|
-
for (const k of keys) obj[k] = rec[k];
|
|
35
|
-
return JSON.stringify(obj);
|
|
36
|
-
}
|
|
37
|
-
function hashLink(prev, rec) {
|
|
38
|
-
return crypto.createHash('sha256').update(prev + canon(rec), 'utf8').digest('hex');
|
|
39
|
-
}
|
|
29
|
+
// Genesis + canonicalization/hash come from the single shared module, so a repaired
|
|
30
|
+
// chain is re-linked byte-identically to how the writer/verifier compute it.
|
|
31
|
+
const { CHAIN_GENESIS, canonicalJsonForChain, computeChainHash } = require('./lib/command-log-chain.js');
|
|
32
|
+
const GENESIS = CHAIN_GENESIS;
|
|
40
33
|
|
|
41
34
|
function main() {
|
|
42
35
|
const dir = process.argv[2];
|
|
@@ -77,8 +70,8 @@ function main() {
|
|
|
77
70
|
records.sort((a, b) => {
|
|
78
71
|
const ta = String(a.capturedAt || ''), tb = String(b.capturedAt || '');
|
|
79
72
|
if (ta !== tb) return ta < tb ? -1 : 1;
|
|
80
|
-
const ha = crypto.createHash('sha256').update(
|
|
81
|
-
const hb = crypto.createHash('sha256').update(
|
|
73
|
+
const ha = crypto.createHash('sha256').update(canonicalJsonForChain(a)).digest('hex');
|
|
74
|
+
const hb = crypto.createHash('sha256').update(canonicalJsonForChain(b)).digest('hex');
|
|
82
75
|
return ha < hb ? -1 : ha > hb ? 1 : 0;
|
|
83
76
|
});
|
|
84
77
|
|
|
@@ -87,7 +80,7 @@ function main() {
|
|
|
87
80
|
let prev = GENESIS;
|
|
88
81
|
let seq = 0;
|
|
89
82
|
for (const rec of records) {
|
|
90
|
-
const h =
|
|
83
|
+
const h = computeChainHash(prev, rec);
|
|
91
84
|
out.push(JSON.stringify({ ...rec, _chain: { seq, prevHash: prev, hash: h } }));
|
|
92
85
|
prev = h; seq += 1;
|
|
93
86
|
}
|
|
@@ -100,7 +93,7 @@ function main() {
|
|
|
100
93
|
source: 'chain-repair',
|
|
101
94
|
repair: { reason, entries: records.length, forkAt: verdict.forkAt },
|
|
102
95
|
};
|
|
103
|
-
const mh =
|
|
96
|
+
const mh = computeChainHash(prev, marker);
|
|
104
97
|
out.push(JSON.stringify({ ...marker, _chain: { seq, prevHash: prev, hash: mh } }));
|
|
105
98
|
|
|
106
99
|
fs.copyFileSync(file, file + '.prebackup-repair');
|
|
@@ -38,6 +38,11 @@ CONSOLE_TELEMETRY_URL="${CONSOLE_TELEMETRY_URL:-${CONSOLE_URL:-}}"
|
|
|
38
38
|
CONSOLE_TELEMETRY_ENDPOINT_URL="${CONSOLE_TELEMETRY_ENDPOINT_URL:-}"
|
|
39
39
|
CONSOLE_TELEMETRY_TOKEN="${CONSOLE_TELEMETRY_TOKEN:-${CONSOLE_AUTH_TOKEN:-}}"
|
|
40
40
|
CONSOLE_TENANT_ID="${CONSOLE_TENANT_ID:-}"
|
|
41
|
+
# Pricing registry source (consumed by lib/pricing.sh). Explicit file/URL win;
|
|
42
|
+
# otherwise the URL is derived from the console below so all runtimes read one
|
|
43
|
+
# live pricing source. Falls back to the bundled pricing.json offline.
|
|
44
|
+
TELEMETRY_PRICING_FILE="${TELEMETRY_PRICING_FILE:-${FLOW_AGENTS_PRICING_FILE:-}}"
|
|
45
|
+
TELEMETRY_PRICING_URL="${TELEMETRY_PRICING_URL:-${FLOW_AGENTS_PRICING_URL:-}}"
|
|
41
46
|
|
|
42
47
|
# Load config file if it exists
|
|
43
48
|
if [[ -f "$TELEMETRY_CONFIG_FILE" ]]; then
|
|
@@ -78,6 +83,9 @@ if [[ -f "$TELEMETRY_CONFIG_FILE" ]]; then
|
|
|
78
83
|
console_telemetry_token) CONSOLE_TELEMETRY_TOKEN="$value" ;;
|
|
79
84
|
console_tenant_id) CONSOLE_TENANT_ID="$value" ;;
|
|
80
85
|
console_telemetry_redact) CONSOLE_TELEMETRY_REDACT="$value" ;;
|
|
86
|
+
console_pricing_url) TELEMETRY_PRICING_URL="$value" ;;
|
|
87
|
+
pricing_url) TELEMETRY_PRICING_URL="$value" ;;
|
|
88
|
+
pricing_file) TELEMETRY_PRICING_FILE="$value" ;;
|
|
81
89
|
esac
|
|
82
90
|
fi
|
|
83
91
|
done < "$TELEMETRY_CONFIG_FILE"
|
|
@@ -85,5 +93,12 @@ fi
|
|
|
85
93
|
|
|
86
94
|
CONSOLE_TELEMETRY_REDACT="${CONSOLE_TELEMETRY_REDACT:-${TELEMETRY_CHANNEL_ANALYTICS_REDACT}}"
|
|
87
95
|
|
|
96
|
+
# Derive the live pricing source from the console when not set explicitly, the
|
|
97
|
+
# same way the transport derives /api/telemetry/records. One live source for
|
|
98
|
+
# bash/Python/TS runtimes; lib/pricing.sh caches it and falls back to bundled.
|
|
99
|
+
if [[ -z "${TELEMETRY_PRICING_URL:-}" && -n "${CONSOLE_TELEMETRY_URL:-}" ]]; then
|
|
100
|
+
TELEMETRY_PRICING_URL="${CONSOLE_TELEMETRY_URL%/}/api/telemetry/pricing"
|
|
101
|
+
fi
|
|
102
|
+
|
|
88
103
|
# Ensure directories exist
|
|
89
104
|
mkdir -p "$TELEMETRY_DATA_DIR" "$TELEMETRY_SESSION_DIR" 2>/dev/null
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# pricing.sh — single-source pricing registry loader.
|
|
3
|
+
#
|
|
4
|
+
# Resolves the versioned pricing registry (pricing.json) from, in priority:
|
|
5
|
+
# 1. explicit local file TELEMETRY_PRICING_FILE / FLOW_AGENTS_PRICING_FILE
|
|
6
|
+
# 2. remote URL (cached) TELEMETRY_PRICING_URL / FLOW_AGENTS_PRICING_URL
|
|
7
|
+
# 3. bundled snapshot <telemetry>/pricing.json
|
|
8
|
+
# This is the one source every runtime + the console read from — local for
|
|
9
|
+
# air-gapped use, remote for a single live registry shared across machines.
|
|
10
|
+
|
|
11
|
+
PRICING_LIB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
12
|
+
|
|
13
|
+
# Echo the raw registry JSON. Returns non-zero if nothing resolvable.
|
|
14
|
+
pricing_registry() {
|
|
15
|
+
local f="${TELEMETRY_PRICING_FILE:-${FLOW_AGENTS_PRICING_FILE:-}}"
|
|
16
|
+
if [[ -n "$f" && -f "$f" ]]; then cat "$f"; return 0; fi
|
|
17
|
+
|
|
18
|
+
local url="${TELEMETRY_PRICING_URL:-${FLOW_AGENTS_PRICING_URL:-}}"
|
|
19
|
+
if [[ -n "$url" ]] && command -v curl >/dev/null 2>&1; then
|
|
20
|
+
local cache="${TMPDIR:-/tmp}/flow-agents-pricing-cache.json"
|
|
21
|
+
local ttl="${TELEMETRY_PRICING_TTL_SEC:-3600}"
|
|
22
|
+
if [[ -f "$cache" ]]; then
|
|
23
|
+
local mtime now age
|
|
24
|
+
mtime=$(stat -f %m "$cache" 2>/dev/null || stat -c %Y "$cache" 2>/dev/null || echo 0)
|
|
25
|
+
now=$(date +%s)
|
|
26
|
+
age=$(( now - mtime ))
|
|
27
|
+
if [[ "$age" -lt "$ttl" ]]; then cat "$cache"; return 0; fi
|
|
28
|
+
fi
|
|
29
|
+
if curl -fsS --max-time 5 "$url" -o "${cache}.tmp" 2>/dev/null && [[ -s "${cache}.tmp" ]]; then
|
|
30
|
+
mv "${cache}.tmp" "$cache"
|
|
31
|
+
cat "$cache"
|
|
32
|
+
return 0
|
|
33
|
+
fi
|
|
34
|
+
rm -f "${cache}.tmp" 2>/dev/null
|
|
35
|
+
[[ -f "$cache" ]] && { cat "$cache"; return 0; } # stale cache beats nothing
|
|
36
|
+
fi
|
|
37
|
+
|
|
38
|
+
local bundled
|
|
39
|
+
bundled="$(cd "${PRICING_LIB_DIR}/.." && pwd)/pricing.json"
|
|
40
|
+
[[ -f "$bundled" ]] && { cat "$bundled"; return 0; }
|
|
41
|
+
return 1
|
|
42
|
+
}
|
|
@@ -1,6 +1,12 @@
|
|
|
1
1
|
#!/usr/bin/env bash
|
|
2
2
|
# usage.sh — Session usage metric functions
|
|
3
3
|
|
|
4
|
+
# Module directory, resolved once at source time (cwd-independent).
|
|
5
|
+
USAGE_LIB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
6
|
+
|
|
7
|
+
# Single-source pricing registry loader (local / remote / bundled).
|
|
8
|
+
source "${USAGE_LIB_DIR}/pricing.sh"
|
|
9
|
+
|
|
4
10
|
# Resolve model from agent-spec.json
|
|
5
11
|
usage_get_model() {
|
|
6
12
|
local agent_name="$1"
|
|
@@ -27,3 +33,105 @@ usage_count_delegations() {
|
|
|
27
33
|
[[ ! -f "$jsonl_path" ]] && echo 0 && return
|
|
28
34
|
grep -c "\"session_id\":\"${session_id}\".*\"event_type\":\"agent.delegate\"" "$jsonl_path" 2>/dev/null || echo 0
|
|
29
35
|
}
|
|
36
|
+
|
|
37
|
+
# Parse a runtime transcript (JSONL) into real per-model token + cost usage.
|
|
38
|
+
# Ground truth lives in each assistant message's `.message.usage` block:
|
|
39
|
+
# input_tokens (uncached), output_tokens, cache_creation_input_tokens,
|
|
40
|
+
# cache_read_input_tokens — plus `.message.model`.
|
|
41
|
+
# Cost is derived from the versioned pricing registry: cache writes bill at
|
|
42
|
+
# input*write_5m, cache reads at input*read. Cost uses the registry's
|
|
43
|
+
# current_version (override with arg $2) and the result stamps `pricing_version`
|
|
44
|
+
# so the console can reproduce or recompute it. Emits a compact JSON object:
|
|
45
|
+
# { by_model: [ {model, input_tokens, output_tokens,
|
|
46
|
+
# cache_creation_input_tokens, cache_read_input_tokens,
|
|
47
|
+
# estimated_cost_usd} ],
|
|
48
|
+
# input_tokens, output_tokens, cache_creation_input_tokens,
|
|
49
|
+
# cache_read_input_tokens, estimated_cost_usd, pricing_version }
|
|
50
|
+
# Prints nothing (non-zero) when the transcript is missing/unparseable so the
|
|
51
|
+
# caller can fall back to null usage. Never blocks agent work.
|
|
52
|
+
# Expected transcript usage path (Claude Code / Anthropic usage object). Bumped
|
|
53
|
+
# if the on-disk schema changes so drift is logged rather than silently zeroed.
|
|
54
|
+
USAGE_TRANSCRIPT_SCHEMA="message.usage.input_tokens"
|
|
55
|
+
|
|
56
|
+
# Append a one-line schema-drift warning (transcript carried usage data we could
|
|
57
|
+
# not parse). Goes to TELEMETRY_DRIFT_LOG if set, else stderr. Never fatal.
|
|
58
|
+
usage_log_drift() {
|
|
59
|
+
local transcript="$1"
|
|
60
|
+
local msg="[telemetry] pricing/usage drift: ${transcript} has usage data but expected path '${USAGE_TRANSCRIPT_SCHEMA}' parsed 0 tokens — transcript schema may have changed"
|
|
61
|
+
if [[ -n "${TELEMETRY_DRIFT_LOG:-}" ]]; then
|
|
62
|
+
echo "$msg" >> "${TELEMETRY_DRIFT_LOG}" 2>/dev/null || echo "$msg" >&2
|
|
63
|
+
else
|
|
64
|
+
echo "$msg" >&2
|
|
65
|
+
fi
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
usage_parse_transcript() {
|
|
69
|
+
local transcript="$1" version="${2:-}"
|
|
70
|
+
[[ -z "$transcript" || ! -f "$transcript" ]] && return 1
|
|
71
|
+
command -v jq >/dev/null 2>&1 || return 1
|
|
72
|
+
local registry
|
|
73
|
+
registry="$(pricing_registry)" || return 1
|
|
74
|
+
[[ -z "$registry" ]] && return 1
|
|
75
|
+
|
|
76
|
+
local out
|
|
77
|
+
out="$(jq -n --argjson registry "$registry" --arg version "$version" '
|
|
78
|
+
$registry as $reg
|
|
79
|
+
| (if $version == "" then ($reg.current_version) else $version end) as $ver
|
|
80
|
+
| ($reg.versions[$ver]) as $p
|
|
81
|
+
| if $p == null then empty else . end
|
|
82
|
+
| ($p.cache_multipliers) as $cm
|
|
83
|
+
| (reduce inputs as $l ({};
|
|
84
|
+
($l.message.usage) as $u
|
|
85
|
+
| if $u then
|
|
86
|
+
(($l.message.model) // "unknown") as $m
|
|
87
|
+
| .[$m].input = ((.[$m].input // 0) + (($u.input_tokens) // 0))
|
|
88
|
+
| .[$m].output = ((.[$m].output // 0) + (($u.output_tokens) // 0))
|
|
89
|
+
| .[$m].cache_creation = ((.[$m].cache_creation // 0) + (($u.cache_creation_input_tokens) // 0))
|
|
90
|
+
| .[$m].cache_read = ((.[$m].cache_read // 0) + (($u.cache_read_input_tokens) // 0))
|
|
91
|
+
else . end)) as $agg
|
|
92
|
+
| ($agg | to_entries
|
|
93
|
+
| map(
|
|
94
|
+
.key as $m | .value as $u
|
|
95
|
+
| (($p.models[$m]) // $p.default) as $rate
|
|
96
|
+
| (if ([$m] | inside($p.zero_cost_models)) then 0 else 1 end) as $billable
|
|
97
|
+
| {
|
|
98
|
+
model: $m,
|
|
99
|
+
input_tokens: ($u.input // 0),
|
|
100
|
+
output_tokens: ($u.output // 0),
|
|
101
|
+
cache_creation_input_tokens: ($u.cache_creation // 0),
|
|
102
|
+
cache_read_input_tokens: ($u.cache_read // 0),
|
|
103
|
+
estimated_cost_usd: (
|
|
104
|
+
$billable * (
|
|
105
|
+
($u.input // 0) * $rate.input
|
|
106
|
+
+ ($u.output // 0) * $rate.output
|
|
107
|
+
+ ($u.cache_creation // 0) * $rate.input * $cm.write_5m
|
|
108
|
+
+ ($u.cache_read // 0) * $rate.input * $cm.read
|
|
109
|
+
) / 1000000
|
|
110
|
+
)
|
|
111
|
+
})) as $by_model
|
|
112
|
+
| {
|
|
113
|
+
by_model: $by_model,
|
|
114
|
+
input_tokens: ([$by_model[].input_tokens] | add // 0),
|
|
115
|
+
output_tokens: ([$by_model[].output_tokens] | add // 0),
|
|
116
|
+
cache_creation_input_tokens: ([$by_model[].cache_creation_input_tokens] | add // 0),
|
|
117
|
+
cache_read_input_tokens: ([$by_model[].cache_read_input_tokens] | add // 0),
|
|
118
|
+
estimated_cost_usd: (([$by_model[].estimated_cost_usd] | add // 0) * 1000000 | round / 1000000),
|
|
119
|
+
pricing_version: $ver
|
|
120
|
+
}
|
|
121
|
+
' < "$transcript" 2>/dev/null)"
|
|
122
|
+
|
|
123
|
+
[[ -z "$out" ]] && return 1
|
|
124
|
+
|
|
125
|
+
# Drift / emptiness check: if we parsed zero tokens but the transcript clearly
|
|
126
|
+
# contains usage data, the schema drifted — warn and fall back to null usage.
|
|
127
|
+
local total
|
|
128
|
+
total="$(printf '%s' "$out" | jq -r '((.input_tokens // 0) + (.output_tokens // 0) + (.cache_creation_input_tokens // 0) + (.cache_read_input_tokens // 0))' 2>/dev/null)"
|
|
129
|
+
if [[ -z "$total" || "$total" == "0" ]]; then
|
|
130
|
+
if grep -q '"input_tokens"' "$transcript" 2>/dev/null; then
|
|
131
|
+
usage_log_drift "$transcript"
|
|
132
|
+
fi
|
|
133
|
+
return 1
|
|
134
|
+
fi
|
|
135
|
+
|
|
136
|
+
printf '%s\n' "$out"
|
|
137
|
+
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
{
|
|
2
|
+
"_note": "Cross-runtime cost golden vectors. Keep IN SYNC with console-telemetry/test/golden-vectors.json (identical content). Asserted by the flow-agents bash usage tests, the Python sink tests, and the console-telemetry package so every runtime that prices tokens produces the SAME cost. If these drift between repos, a runtime's cost math has diverged.",
|
|
3
|
+
"pricing_version": "2026-06-28",
|
|
4
|
+
"cases": [
|
|
5
|
+
{ "name": "opus cache-read-dominated", "model": "claude-opus-4-8", "tokens": { "input": 1000, "output": 2000, "cache_creation": 0, "cache_read": 500000 }, "expected_cost_usd": 0.305 },
|
|
6
|
+
{ "name": "opus output only", "model": "claude-opus-4-8", "tokens": { "input": 0, "output": 1000, "cache_creation": 0, "cache_read": 0 }, "expected_cost_usd": 0.025 },
|
|
7
|
+
{ "name": "fable output", "model": "claude-fable-5", "tokens": { "input": 0, "output": 100, "cache_creation": 0, "cache_read": 0 }, "expected_cost_usd": 0.005 },
|
|
8
|
+
{ "name": "haiku output", "model": "claude-haiku-4-5", "tokens": { "input": 0, "output": 1000, "cache_creation": 0, "cache_read": 0 }, "expected_cost_usd": 0.005 },
|
|
9
|
+
{ "name": "sonnet input 1M", "model": "claude-sonnet-4-6", "tokens": { "input": 1000000, "output": 0, "cache_creation": 0, "cache_read": 0 }, "expected_cost_usd": 3.0 },
|
|
10
|
+
{ "name": "opus cache-write 5m tier", "model": "claude-opus-4-8", "tokens": { "input": 0, "output": 0, "cache_creation": 1000000, "cache_read": 0 }, "expected_cost_usd": 6.25 },
|
|
11
|
+
{ "name": "opus billion-scale", "model": "claude-opus-4-8", "tokens": { "input": 200000, "output": 1600000, "cache_creation": 9000000, "cache_read": 1000000000 }, "expected_cost_usd": 597.25 },
|
|
12
|
+
{ "name": "synthetic is free", "model": "<synthetic>", "tokens": { "input": 999, "output": 999, "cache_creation": 999, "cache_read": 999 }, "expected_cost_usd": 0 },
|
|
13
|
+
{ "name": "unknown model uses default rate", "model": "some-unlisted-model", "tokens": { "input": 1000000, "output": 0, "cache_creation": 0, "cache_read": 0 }, "expected_cost_usd": 5.0 }
|
|
14
|
+
]
|
|
15
|
+
}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
{
|
|
2
|
+
"schema_version": "2.0",
|
|
3
|
+
"current_version": "2026-06-28",
|
|
4
|
+
"source": "Anthropic public list pricing; cache multipliers per prompt-caching docs",
|
|
5
|
+
"versions": {
|
|
6
|
+
"2026-06-28": {
|
|
7
|
+
"effective_date": "2026-06-28",
|
|
8
|
+
"currency": "USD",
|
|
9
|
+
"unit": "per_1m_tokens",
|
|
10
|
+
"cache_multipliers": {
|
|
11
|
+
"write_5m": 1.25,
|
|
12
|
+
"write_1h": 2.0,
|
|
13
|
+
"read": 0.1
|
|
14
|
+
},
|
|
15
|
+
"models": {
|
|
16
|
+
"claude-fable-5": { "input": 10.0, "output": 50.0 },
|
|
17
|
+
"claude-mythos-5": { "input": 10.0, "output": 50.0 },
|
|
18
|
+
"claude-opus-4-8": { "input": 5.0, "output": 25.0 },
|
|
19
|
+
"claude-opus-4-7": { "input": 5.0, "output": 25.0 },
|
|
20
|
+
"claude-opus-4-6": { "input": 5.0, "output": 25.0 },
|
|
21
|
+
"claude-opus-4-5": { "input": 5.0, "output": 25.0 },
|
|
22
|
+
"claude-opus-4-1": { "input": 15.0, "output": 75.0 },
|
|
23
|
+
"claude-sonnet-4-6": { "input": 3.0, "output": 15.0 },
|
|
24
|
+
"claude-sonnet-4-5": { "input": 3.0, "output": 15.0 },
|
|
25
|
+
"claude-haiku-4-5": { "input": 1.0, "output": 5.0 }
|
|
26
|
+
},
|
|
27
|
+
"default": { "input": 5.0, "output": 25.0 },
|
|
28
|
+
"zero_cost_models": ["<synthetic>", "synthetic", "unknown", ""]
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
}
|
|
@@ -8,6 +8,10 @@ channel.analytics.redact=tool.input,tool.output,turn.prompt_text,delegation.targ
|
|
|
8
8
|
# The transport derives /api/telemetry/records from console_telemetry_url.
|
|
9
9
|
# console_telemetry_token=
|
|
10
10
|
# console_tenant_id=
|
|
11
|
+
# Live pricing registry source. If unset, derived from console_telemetry_url as
|
|
12
|
+
# <console>/api/telemetry/pricing so bash/Python/TS runtimes read one live
|
|
13
|
+
# source; lib/pricing.sh caches it and falls back to bundled pricing.json.
|
|
14
|
+
# console_pricing_url=https://console.kontourai.io/api/telemetry/pricing
|
|
11
15
|
enrich_system=true
|
|
12
16
|
enrich_workspace=true
|
|
13
17
|
enrich_auth=true
|
|
@@ -309,13 +309,35 @@ add_stop_data_and_emit_usage() {
|
|
|
309
309
|
tool_count=$(usage_count_tool_calls "$session_id" "$full_log")
|
|
310
310
|
delegation_count=$(usage_count_delegations "$session_id" "$full_log")
|
|
311
311
|
|
|
312
|
+
# Ground-truth token + cost usage from the runtime transcript, when the
|
|
313
|
+
# runtime exposes one (Claude Code, Codex, etc. set hook.transcript_path).
|
|
314
|
+
# Tokens are source-of-truth; estimated_cost_usd is derived from pricing.json
|
|
315
|
+
# (recomputed authoritatively console-side, so pricing updates are retroactive).
|
|
316
|
+
local transcript_path transcript_usage
|
|
317
|
+
transcript_path=$(echo "$event" | jq -r '.hook.transcript_path // ""')
|
|
318
|
+
transcript_usage=$(usage_parse_transcript "$transcript_path")
|
|
319
|
+
[[ -z "$transcript_usage" ]] && transcript_usage='null'
|
|
320
|
+
|
|
312
321
|
local usage_event
|
|
313
322
|
usage_event=$(echo "$event" | jq -c \
|
|
314
323
|
--arg m "$model" \
|
|
315
324
|
--argjson tc "$tool_count" \
|
|
316
325
|
--argjson dc "$delegation_count" \
|
|
326
|
+
--argjson tu "$transcript_usage" \
|
|
317
327
|
'.event_type = "session.usage" | .event_id = (.event_id + "-usage") | . + {
|
|
318
|
-
usage: {
|
|
328
|
+
usage: ({
|
|
329
|
+
model: $m,
|
|
330
|
+
duration_s: .session.duration_s,
|
|
331
|
+
tool_invocations: $tc,
|
|
332
|
+
delegations: $dc,
|
|
333
|
+
input_tokens: ($tu.input_tokens // null),
|
|
334
|
+
output_tokens: ($tu.output_tokens // null),
|
|
335
|
+
cache_creation_input_tokens: ($tu.cache_creation_input_tokens // null),
|
|
336
|
+
cache_read_input_tokens: ($tu.cache_read_input_tokens // null),
|
|
337
|
+
estimated_cost_usd: ($tu.estimated_cost_usd // null),
|
|
338
|
+
pricing_version: ($tu.pricing_version // null),
|
|
339
|
+
by_model: ($tu.by_model // null)
|
|
340
|
+
})
|
|
319
341
|
}')
|
|
320
342
|
transport_emit "$usage_event"
|
|
321
343
|
fi
|
|
@@ -19,11 +19,17 @@ export const verdicts = new Set(["pass", "partial", "fail", "not_verified"]);
|
|
|
19
19
|
function now(): string { return new Date().toISOString().replace(/\.\d{3}Z$/, "Z"); }
|
|
20
20
|
function read(file: string): string { return fs.readFileSync(file, "utf8"); }
|
|
21
21
|
export function writeJson(file: string, payload: AnyObj): void { fs.mkdirSync(path.dirname(file), { recursive: true }); fs.writeFileSync(file, `${JSON.stringify(payload, null, 2)}\n`); }
|
|
22
|
-
|
|
22
|
+
// Single-line but readable "key": "value" form. Built by collapsing the
|
|
23
|
+
// structural whitespace from an indented stringify — corruption-proof, unlike a
|
|
24
|
+
// regex that would also rewrite ":"/"," sequences inside string values.
|
|
25
|
+
function spacedLine(payload: AnyObj, replacer?: (string | number)[]): string {
|
|
26
|
+
return JSON.stringify(payload, replacer as never, 1).replace(/\n\s*/g, " ");
|
|
27
|
+
}
|
|
28
|
+
function printJson(payload: AnyObj): void { console.log(spacedLine(payload)); }
|
|
23
29
|
export function loadJson(file: string, fallback: AnyObj = {}): AnyObj { return fs.existsSync(file) ? JSON.parse(read(file)) : { ...fallback }; }
|
|
24
30
|
export function appendJsonl(file: string, payload: AnyObj): void {
|
|
25
31
|
fs.mkdirSync(path.dirname(file), { recursive: true });
|
|
26
|
-
const line =
|
|
32
|
+
const line = spacedLine(payload, Object.keys(payload).sort());
|
|
27
33
|
fs.appendFileSync(file, `${line}\n`);
|
|
28
34
|
}
|
|
29
35
|
function die(message: string): never { throw new Error(message); }
|