@xerg/cli 0.5.0 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +15 -6
- package/dist/index.js +300 -52
- package/dist/index.js.map +1 -1
- package/package.json +5 -4
- package/skills/xerg/SKILL.md +65 -3
package/README.md
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
# xerg
|
|
2
2
|
|
|
3
|
-
Audit OpenClaw and
|
|
3
|
+
Audit OpenClaw, Hermes, and Cursor spend in dollars, surface provenance-aware waste findings, compare fixes, and optionally connect hosted follow-up after the first local result.
|
|
4
4
|
|
|
5
5
|
Xerg runs locally by default. Local audits and `--compare` are free. No account is required for local value, and no data leaves your machine unless you explicitly push results to Xerg Cloud.
|
|
6
6
|
|
|
7
|
+
The `npx @xerg/cli` path fetches and executes the published npm package before running Xerg. If you want to avoid a runtime fetch, install once with `npm install -g @xerg/cli` or run a local build from source.
|
|
8
|
+
|
|
7
9
|
## Fastest first run
|
|
8
10
|
|
|
9
11
|
```bash
|
|
@@ -14,7 +16,7 @@ npx @xerg/cli init
|
|
|
14
16
|
|
|
15
17
|
- detects local OpenClaw or Hermes data
|
|
16
18
|
- runs a first audit and stores the local snapshot
|
|
17
|
-
- prints the
|
|
19
|
+
- prints the provenance-aware terminal summary
|
|
18
20
|
- offers optional hosted follow-up with `connect` and `mcp-setup`
|
|
19
21
|
|
|
20
22
|
Prefer a global install?
|
|
@@ -32,6 +34,7 @@ If you already know what you want, skip `init` and use the direct flows:
|
|
|
32
34
|
npx @xerg/cli doctor
|
|
33
35
|
npx @xerg/cli audit
|
|
34
36
|
npx @xerg/cli audit --compare
|
|
37
|
+
npx @xerg/cli audit --cursor-usage-csv ./cursor-usage.csv
|
|
35
38
|
npx @xerg/cli audit --json
|
|
36
39
|
npx @xerg/cli audit --markdown
|
|
37
40
|
```
|
|
@@ -54,6 +57,8 @@ node_modules/@xerg/cli/skills/xerg/SKILL.md
|
|
|
54
57
|
|
|
55
58
|
For a global install, the same file lives inside the global npm package directory instead. That file is a packaged copy of the canonical repo skill at [`skills/xerg/SKILL.md`](../../skills/xerg/SKILL.md). Use it if your agent platform imports skills from disk; installing the npm package does not automatically register the skill with every agent product.
|
|
56
59
|
|
|
60
|
+
The bundled skill frontmatter declares the CLI/package surface plus optional Xerg Cloud, SSH, rsync, and Railway requirements so registries can distinguish the default local audit workflow from opt-in hosted sync and remote audit workflows.
|
|
61
|
+
|
|
57
62
|
## Supported runtime
|
|
58
63
|
|
|
59
64
|
`@xerg/cli` supports Node `22` and `24`.
|
|
@@ -105,7 +110,7 @@ xerg push
|
|
|
105
110
|
|
|
106
111
|
## Works where your agent data lives
|
|
107
112
|
|
|
108
|
-
- Local machine:
|
|
113
|
+
- Local machine: OpenClaw, Hermes, and explicit Cursor usage CSV exports
|
|
109
114
|
- VPS or remote server: OpenClaw only in this phase
|
|
110
115
|
- If OpenClaw runs remotely, you can audit it from your local machine with `xerg audit --remote user@host`
|
|
111
116
|
- Or point Xerg at exported files directly with flags
|
|
@@ -131,6 +136,7 @@ xerg audit --runtime openclaw --log-file /path/to/openclaw.log
|
|
|
131
136
|
xerg audit --runtime openclaw --sessions-dir /path/to/sessions
|
|
132
137
|
xerg audit --runtime hermes --log-file ~/.hermes/logs/agent.log
|
|
133
138
|
xerg audit --runtime hermes --sessions-dir ~/.hermes/sessions
|
|
139
|
+
xerg audit --cursor-usage-csv ./cursor-usage.csv
|
|
134
140
|
```
|
|
135
141
|
|
|
136
142
|
If only one supported local runtime is present, Xerg auto-selects it. If both OpenClaw and Hermes are present locally, rerun with `--runtime openclaw` or `--runtime hermes`.
|
|
@@ -152,7 +158,7 @@ xerg mcp-setup
|
|
|
152
158
|
```
|
|
153
159
|
|
|
154
160
|
- `connect` resolves auth from `XERG_API_KEY`, `~/.xerg/config.json`, or stored browser credentials, then offers to push the latest audit
|
|
155
|
-
- `mcp-setup` prints or writes hosted MCP config for Cursor, Claude Code, or another client
|
|
161
|
+
- `mcp-setup` prints or writes hosted MCP config for Cursor, Claude Code, Codex, or another client
|
|
156
162
|
|
|
157
163
|
You can skip both and keep using local audits and compare.
|
|
158
164
|
|
|
@@ -187,10 +193,13 @@ Example `~/.xerg/config.json`:
|
|
|
187
193
|
- Total spend by workflow and model, in dollars
|
|
188
194
|
- Daily spend and confirmed waste rollups in UTC
|
|
189
195
|
- Observed vs. estimated cost (always labeled)
|
|
190
|
-
- Confirmed waste: retry
|
|
196
|
+
- Confirmed waste: retry and loop findings when the required source structure is present
|
|
197
|
+
- Provenance-aware waste rollups by observed, inferred, declared, or unknown signal source
|
|
191
198
|
- Savings opportunities: context bloat, downgrade candidates, idle, max mode concentration where applicable
|
|
192
199
|
- Ranked recommendations with where-to-change guidance and compare validation steps
|
|
193
|
-
- Before/after
|
|
200
|
+
- Before/after normalized rates on re-audit, including waste per run and waste per 1k calls
|
|
201
|
+
|
|
202
|
+
Local JSON findings may include `signalSource`, `ruleId`, and evidence references so agents can distinguish observed signals from inferred or legacy unknown provenance. These local provenance fields are not part of the pushed v2 wire payload.
|
|
194
203
|
|
|
195
204
|
## Privacy
|
|
196
205
|
|
package/dist/index.js
CHANGED
|
@@ -1165,6 +1165,36 @@ function buildTaxonomyBuckets(findings, classification) {
|
|
|
1165
1165
|
}
|
|
1166
1166
|
return Array.from(buckets.values()).sort((left, right) => right.spendUsd - left.spendUsd);
|
|
1167
1167
|
}
|
|
1168
|
+
function buildWasteBySignalSource(findings) {
|
|
1169
|
+
const rollup = {
|
|
1170
|
+
observedUsd: 0,
|
|
1171
|
+
inferredUsd: 0,
|
|
1172
|
+
declaredUsd: 0,
|
|
1173
|
+
unknownUsd: 0,
|
|
1174
|
+
inferredShare: null
|
|
1175
|
+
};
|
|
1176
|
+
for (const finding of findings) {
|
|
1177
|
+
if (finding.classification !== "waste") {
|
|
1178
|
+
continue;
|
|
1179
|
+
}
|
|
1180
|
+
if (finding.signalSource === "observed") {
|
|
1181
|
+
rollup.observedUsd = round2(rollup.observedUsd + finding.costImpactUsd);
|
|
1182
|
+
continue;
|
|
1183
|
+
}
|
|
1184
|
+
if (finding.signalSource === "inferred") {
|
|
1185
|
+
rollup.inferredUsd = round2(rollup.inferredUsd + finding.costImpactUsd);
|
|
1186
|
+
continue;
|
|
1187
|
+
}
|
|
1188
|
+
if (finding.signalSource === "declared") {
|
|
1189
|
+
rollup.declaredUsd = round2(rollup.declaredUsd + finding.costImpactUsd);
|
|
1190
|
+
continue;
|
|
1191
|
+
}
|
|
1192
|
+
rollup.unknownUsd = round2(rollup.unknownUsd + finding.costImpactUsd);
|
|
1193
|
+
}
|
|
1194
|
+
const knownTotal = rollup.observedUsd + rollup.inferredUsd + rollup.declaredUsd;
|
|
1195
|
+
rollup.inferredShare = rollup.unknownUsd > 0 ? null : Number((knownTotal === 0 ? 0 : rollup.inferredUsd / knownTotal).toFixed(4));
|
|
1196
|
+
return rollup;
|
|
1197
|
+
}
|
|
1168
1198
|
function toSpendMap(rows) {
|
|
1169
1199
|
return new Map(rows.map((row) => [row.key, row.spendUsd]));
|
|
1170
1200
|
}
|
|
@@ -1280,6 +1310,7 @@ function hydrateAuditSummary(summary) {
|
|
|
1280
1310
|
opportunityByKind: summary.opportunityByKind?.length > 0 ? summary.opportunityByKind : buildTaxonomyBuckets(summary.findings, "opportunity"),
|
|
1281
1311
|
spendByDay: summary.spendByDay ?? [],
|
|
1282
1312
|
wasteByDay: summary.wasteByDay ?? [],
|
|
1313
|
+
wasteBySignalSource: summary.wasteBySignalSource ?? buildWasteBySignalSource(summary.findings),
|
|
1283
1314
|
recommendations: summary.recommendations ?? [],
|
|
1284
1315
|
notes: summary.notes ?? [],
|
|
1285
1316
|
pricingCoverage: summary.pricingCoverage ?? null,
|
|
@@ -1300,6 +1331,7 @@ function buildAuditComparison(current, baseline) {
|
|
|
1300
1331
|
baselineWasteSpendUsd: baseline.wasteSpendUsd,
|
|
1301
1332
|
baselineOpportunitySpendUsd: baseline.opportunitySpendUsd,
|
|
1302
1333
|
baselineStructuralWasteRate: baseline.structuralWasteRate,
|
|
1334
|
+
baselineWasteBySignalSource: baseline.wasteBySignalSource ?? buildWasteBySignalSource(baseline.findings),
|
|
1303
1335
|
deltaTotalSpendUsd: round2(current.totalSpendUsd - baseline.totalSpendUsd),
|
|
1304
1336
|
deltaObservedSpendUsd: round2(current.observedSpendUsd - baseline.observedSpendUsd),
|
|
1305
1337
|
deltaEstimatedSpendUsd: round2(current.estimatedSpendUsd - baseline.estimatedSpendUsd),
|
|
@@ -1350,6 +1382,8 @@ function readLatestComparableAuditSummary(input) {
|
|
|
1350
1382
|
}
|
|
1351
1383
|
|
|
1352
1384
|
// ../core/src/findings/cursor.ts
|
|
1385
|
+
var CACHE_CARRYOVER_RULE_ID = "cursor_cache_ratio_v1";
|
|
1386
|
+
var MAX_MODE_CONCENTRATION_RULE_ID = "cursor_max_mode_concentration_v1";
|
|
1353
1387
|
function round3(value) {
|
|
1354
1388
|
return Number(value.toFixed(6));
|
|
1355
1389
|
}
|
|
@@ -1419,6 +1453,13 @@ function buildCursorUsageFindings(runs) {
|
|
|
1419
1453
|
scopeId: "all",
|
|
1420
1454
|
scopeLabel: "Cursor usage",
|
|
1421
1455
|
costImpactUsd: cacheImpactUsd,
|
|
1456
|
+
signalSource: "observed",
|
|
1457
|
+
ruleId: CACHE_CARRYOVER_RULE_ID,
|
|
1458
|
+
evidence: {
|
|
1459
|
+
callIds: cacheAwareCalls.map((call) => call.id).sort(),
|
|
1460
|
+
runIds: Array.from(new Set(cacheAwareCalls.map((call) => call.runId))).sort(),
|
|
1461
|
+
sourceKinds: ["cursor-usage-csv"]
|
|
1462
|
+
},
|
|
1422
1463
|
details: {
|
|
1423
1464
|
cacheReadShare: round3(cacheReadShare),
|
|
1424
1465
|
cacheCoverageShare: round3(cacheCoverageShare),
|
|
@@ -1447,6 +1488,13 @@ function buildCursorUsageFindings(runs) {
|
|
|
1447
1488
|
scopeId: "all",
|
|
1448
1489
|
scopeLabel: "Cursor usage",
|
|
1449
1490
|
costImpactUsd: round3(maxModeSpendUsd * 0.2),
|
|
1491
|
+
signalSource: "observed",
|
|
1492
|
+
ruleId: MAX_MODE_CONCENTRATION_RULE_ID,
|
|
1493
|
+
evidence: {
|
|
1494
|
+
callIds: maxModeCalls.map((call) => call.id).sort(),
|
|
1495
|
+
runIds: Array.from(new Set(maxModeCalls.map((call) => call.runId))).sort(),
|
|
1496
|
+
sourceKinds: ["cursor-usage-csv"]
|
|
1497
|
+
},
|
|
1450
1498
|
details: {
|
|
1451
1499
|
maxModeSpendUsd: round3(maxModeSpendUsd),
|
|
1452
1500
|
maxModeSpendShare: round3(maxModeSpendShare),
|
|
@@ -1468,55 +1516,178 @@ function buildCursorUsageFindings(runs) {
|
|
|
1468
1516
|
}
|
|
1469
1517
|
|
|
1470
1518
|
// ../core/src/findings/engine.ts
|
|
1519
|
+
var RETRY_OBSERVED_RULE_ID = "retry_explicit_failed_attempt_v1";
|
|
1520
|
+
var RETRY_INFERRED_RULE_ID = "retry_later_attempt_proxy_v1";
|
|
1521
|
+
var LOOP_RULE_ID = "loop_iteration_threshold_v1";
|
|
1522
|
+
var CONTEXT_OUTLIER_RULE_ID = "context_outlier_tokens_v1";
|
|
1523
|
+
var IDLE_SPEND_RULE_ID = "idle_workflow_name_v1";
|
|
1524
|
+
var CANDIDATE_DOWNGRADE_RULE_ID = "candidate_downgrade_task_model_v1";
|
|
1525
|
+
var LOOP_WASTE_START_ITERATION = 6;
|
|
1526
|
+
var LOOP_FINDING_MIN_ITERATION = 7;
|
|
1471
1527
|
function createFinding2(input) {
|
|
1472
1528
|
return {
|
|
1473
1529
|
...input,
|
|
1474
1530
|
id: sha1(
|
|
1475
|
-
`${input.kind}:${input.scope}:${input.scopeId}:${input.title}:${input.costImpactUsd}:${input.summary}`
|
|
1531
|
+
`${input.kind}:${input.scope}:${input.scopeId}:${input.title}:${input.costImpactUsd}:${input.summary}:${input.signalSource ?? "unknown"}:${input.ruleId ?? "none"}`
|
|
1476
1532
|
)
|
|
1477
1533
|
};
|
|
1478
1534
|
}
|
|
1479
1535
|
function round4(value) {
|
|
1480
1536
|
return Number(value.toFixed(6));
|
|
1481
1537
|
}
|
|
1538
|
+
function isFailedOrAborted(call) {
|
|
1539
|
+
const status = (call.status ?? "").toLowerCase();
|
|
1540
|
+
return status.includes("error") || status.includes("fail") || status.includes("abort");
|
|
1541
|
+
}
|
|
1542
|
+
function hasExplicitRetrySignal(call) {
|
|
1543
|
+
return (call.attempt ?? 1) > 1 || call.retries > 0;
|
|
1544
|
+
}
|
|
1545
|
+
function toTimestampMs(call) {
|
|
1546
|
+
const timestamp = new Date(call.timestamp).getTime();
|
|
1547
|
+
return Number.isFinite(timestamp) ? timestamp : Number.POSITIVE_INFINITY;
|
|
1548
|
+
}
|
|
1549
|
+
function sortCallsByTime(calls) {
|
|
1550
|
+
return calls.map((call, index) => ({ call, index })).sort((left, right) => {
|
|
1551
|
+
const delta = toTimestampMs(left.call) - toTimestampMs(right.call);
|
|
1552
|
+
return delta === 0 ? left.index - right.index : delta;
|
|
1553
|
+
});
|
|
1554
|
+
}
|
|
1555
|
+
function canUseStructuralSignals(sourceKind) {
|
|
1556
|
+
return sourceKind === "gateway";
|
|
1557
|
+
}
|
|
1558
|
+
function hasLaterExplicitRetryAttempt(sortedCalls, currentIndex) {
|
|
1559
|
+
const current = sortedCalls[currentIndex]?.call;
|
|
1560
|
+
if (!current) {
|
|
1561
|
+
return false;
|
|
1562
|
+
}
|
|
1563
|
+
return sortedCalls.slice(currentIndex + 1).some(({ call }) => {
|
|
1564
|
+
if (!hasExplicitRetrySignal(call)) {
|
|
1565
|
+
return false;
|
|
1566
|
+
}
|
|
1567
|
+
if (current.attempt !== null && call.attempt !== null) {
|
|
1568
|
+
return call.attempt > current.attempt;
|
|
1569
|
+
}
|
|
1570
|
+
return true;
|
|
1571
|
+
});
|
|
1572
|
+
}
|
|
1573
|
+
function uniqueSourceKinds(calls, runs) {
|
|
1574
|
+
const runById = new Map(runs.map((run2) => [run2.id, run2]));
|
|
1575
|
+
return Array.from(
|
|
1576
|
+
new Set(
|
|
1577
|
+
calls.map((call) => runById.get(call.runId)?.sourceKind).filter((sourceKind) => Boolean(sourceKind))
|
|
1578
|
+
)
|
|
1579
|
+
).sort();
|
|
1580
|
+
}
|
|
1581
|
+
function buildRetryFinding(input) {
|
|
1582
|
+
const retryCost = input.calls.reduce((sum, call) => sum + call.costUsd, 0);
|
|
1583
|
+
const observed = input.signalSource === "observed";
|
|
1584
|
+
return createFinding2({
|
|
1585
|
+
classification: "waste",
|
|
1586
|
+
confidence: observed ? "high" : "medium",
|
|
1587
|
+
kind: "retry-waste",
|
|
1588
|
+
title: observed ? "Retry waste is consuming measurable spend" : "Retry waste is likely present from later retry attempts",
|
|
1589
|
+
summary: observed ? `${input.calls.length} failed or aborted call${input.calls.length === 1 ? "" : "s"} were followed by explicit retry attempts, making their spend retry overhead.` : `${input.calls.length} later retry attempt${input.calls.length === 1 ? "" : "s"} were counted as proxy retry overhead because the earlier failed attempt was not separately countable.`,
|
|
1590
|
+
scope: "global",
|
|
1591
|
+
scopeId: "all",
|
|
1592
|
+
scopeLabel: "workspace",
|
|
1593
|
+
costImpactUsd: round4(retryCost),
|
|
1594
|
+
signalSource: input.signalSource,
|
|
1595
|
+
ruleId: input.ruleId,
|
|
1596
|
+
evidence: {
|
|
1597
|
+
callIds: input.calls.map((call) => call.id).sort(),
|
|
1598
|
+
runIds: Array.from(new Set(input.calls.map((call) => call.runId))).sort(),
|
|
1599
|
+
sourceKinds: uniqueSourceKinds(input.calls, input.runs)
|
|
1600
|
+
},
|
|
1601
|
+
details: {
|
|
1602
|
+
retryCallCount: input.calls.length
|
|
1603
|
+
}
|
|
1604
|
+
});
|
|
1605
|
+
}
|
|
1482
1606
|
function buildFindings(runs) {
|
|
1483
1607
|
const findings = [];
|
|
1484
1608
|
const wasteAttributions = [];
|
|
1485
|
-
const
|
|
1486
|
-
const
|
|
1487
|
-
|
|
1488
|
-
|
|
1489
|
-
|
|
1490
|
-
|
|
1491
|
-
|
|
1609
|
+
const observedRetryCalls = [];
|
|
1610
|
+
const inferredRetryCalls = [];
|
|
1611
|
+
const retryCoveredCallIds = /* @__PURE__ */ new Set();
|
|
1612
|
+
for (const run2 of runs.filter((candidate) => canUseStructuralSignals(candidate.sourceKind))) {
|
|
1613
|
+
const sortedCalls = sortCallsByTime(run2.calls);
|
|
1614
|
+
sortedCalls.forEach(({ call }, index) => {
|
|
1615
|
+
if (!isFailedOrAborted(call)) {
|
|
1616
|
+
return;
|
|
1617
|
+
}
|
|
1618
|
+
if (!hasExplicitRetrySignal(call) && !hasLaterExplicitRetryAttempt(sortedCalls, index)) {
|
|
1619
|
+
return;
|
|
1620
|
+
}
|
|
1621
|
+
if (!hasLaterExplicitRetryAttempt(sortedCalls, index)) {
|
|
1622
|
+
return;
|
|
1623
|
+
}
|
|
1624
|
+
observedRetryCalls.push(call);
|
|
1625
|
+
retryCoveredCallIds.add(call.id);
|
|
1626
|
+
const later = sortedCalls.slice(index + 1).find(({ call: laterCall }) => hasExplicitRetrySignal(laterCall));
|
|
1627
|
+
if (later) {
|
|
1628
|
+
retryCoveredCallIds.add(later.call.id);
|
|
1629
|
+
}
|
|
1630
|
+
});
|
|
1631
|
+
for (const { call } of sortedCalls) {
|
|
1632
|
+
if (!hasExplicitRetrySignal(call) || retryCoveredCallIds.has(call.id)) {
|
|
1633
|
+
continue;
|
|
1634
|
+
}
|
|
1635
|
+
const hasEarlierCountableFailure = sortedCalls.some(({ call: earlier }) => {
|
|
1636
|
+
if (earlier.id === call.id) {
|
|
1637
|
+
return false;
|
|
1638
|
+
}
|
|
1639
|
+
return toTimestampMs(earlier) < toTimestampMs(call) && isFailedOrAborted(earlier);
|
|
1640
|
+
});
|
|
1641
|
+
if (!hasEarlierCountableFailure) {
|
|
1642
|
+
inferredRetryCalls.push(call);
|
|
1643
|
+
retryCoveredCallIds.add(call.id);
|
|
1644
|
+
}
|
|
1645
|
+
}
|
|
1646
|
+
}
|
|
1647
|
+
if (observedRetryCalls.length > 0) {
|
|
1492
1648
|
wasteAttributions.push(
|
|
1493
|
-
...
|
|
1649
|
+
...observedRetryCalls.map((call) => ({
|
|
1494
1650
|
kind: "retry-waste",
|
|
1495
1651
|
timestamp: call.timestamp,
|
|
1496
1652
|
wasteUsd: call.costUsd
|
|
1497
1653
|
}))
|
|
1498
1654
|
);
|
|
1499
1655
|
findings.push(
|
|
1500
|
-
|
|
1501
|
-
|
|
1502
|
-
|
|
1656
|
+
buildRetryFinding({
|
|
1657
|
+
calls: observedRetryCalls,
|
|
1658
|
+
runs,
|
|
1659
|
+
signalSource: "observed",
|
|
1660
|
+
ruleId: RETRY_OBSERVED_RULE_ID
|
|
1661
|
+
})
|
|
1662
|
+
);
|
|
1663
|
+
}
|
|
1664
|
+
if (inferredRetryCalls.length > 0) {
|
|
1665
|
+
wasteAttributions.push(
|
|
1666
|
+
...inferredRetryCalls.map((call) => ({
|
|
1503
1667
|
kind: "retry-waste",
|
|
1504
|
-
|
|
1505
|
-
|
|
1506
|
-
|
|
1507
|
-
|
|
1508
|
-
|
|
1509
|
-
|
|
1510
|
-
|
|
1511
|
-
|
|
1512
|
-
|
|
1668
|
+
timestamp: call.timestamp,
|
|
1669
|
+
wasteUsd: call.costUsd
|
|
1670
|
+
}))
|
|
1671
|
+
);
|
|
1672
|
+
findings.push(
|
|
1673
|
+
buildRetryFinding({
|
|
1674
|
+
calls: inferredRetryCalls,
|
|
1675
|
+
runs,
|
|
1676
|
+
signalSource: "inferred",
|
|
1677
|
+
ruleId: RETRY_INFERRED_RULE_ID
|
|
1513
1678
|
})
|
|
1514
1679
|
);
|
|
1515
1680
|
}
|
|
1516
|
-
for (const run2 of runs) {
|
|
1517
|
-
const
|
|
1518
|
-
if (
|
|
1519
|
-
|
|
1681
|
+
for (const run2 of runs.filter((candidate) => canUseStructuralSignals(candidate.sourceKind))) {
|
|
1682
|
+
const iterations = run2.calls.map((call) => call.iteration).filter((iteration) => iteration !== null);
|
|
1683
|
+
if (iterations.length === 0) {
|
|
1684
|
+
continue;
|
|
1685
|
+
}
|
|
1686
|
+
const maxIteration = Math.max(...iterations);
|
|
1687
|
+
if (maxIteration >= LOOP_FINDING_MIN_ITERATION) {
|
|
1688
|
+
const loopCalls = run2.calls.filter(
|
|
1689
|
+
(call) => (call.iteration ?? 0) >= LOOP_WASTE_START_ITERATION
|
|
1690
|
+
);
|
|
1520
1691
|
const loopCost = loopCalls.reduce((sum, call) => sum + call.costUsd, 0);
|
|
1521
1692
|
wasteAttributions.push(
|
|
1522
1693
|
...loopCalls.map((call) => ({
|
|
@@ -1531,14 +1702,22 @@ function buildFindings(runs) {
|
|
|
1531
1702
|
confidence: "high",
|
|
1532
1703
|
kind: "loop-waste",
|
|
1533
1704
|
title: `Workflow "${run2.workflow}" ran beyond efficient loop bounds`,
|
|
1534
|
-
summary: `This run reached ${maxIteration} iterations. Xerg treats
|
|
1705
|
+
summary: `This run reached ${maxIteration} iterations. Xerg treats spend from iteration ${LOOP_WASTE_START_ITERATION} onward as loop waste.`,
|
|
1535
1706
|
scope: "run",
|
|
1536
1707
|
scopeId: run2.workflow,
|
|
1537
1708
|
scopeLabel: run2.workflow,
|
|
1538
1709
|
costImpactUsd: round4(loopCost),
|
|
1710
|
+
signalSource: "observed",
|
|
1711
|
+
ruleId: LOOP_RULE_ID,
|
|
1712
|
+
evidence: {
|
|
1713
|
+
callIds: loopCalls.map((call) => call.id).sort(),
|
|
1714
|
+
runIds: [run2.id],
|
|
1715
|
+
sourceKinds: [run2.sourceKind]
|
|
1716
|
+
},
|
|
1539
1717
|
details: {
|
|
1540
1718
|
workflow: run2.workflow,
|
|
1541
|
-
maxIteration
|
|
1719
|
+
maxIteration,
|
|
1720
|
+
thresholdIteration: LOOP_WASTE_START_ITERATION
|
|
1542
1721
|
}
|
|
1543
1722
|
})
|
|
1544
1723
|
);
|
|
@@ -1573,6 +1752,12 @@ function buildFindings(runs) {
|
|
|
1573
1752
|
scopeId: workflow,
|
|
1574
1753
|
scopeLabel: workflow,
|
|
1575
1754
|
costImpactUsd: round4(outlierCost),
|
|
1755
|
+
signalSource: "observed",
|
|
1756
|
+
ruleId: CONTEXT_OUTLIER_RULE_ID,
|
|
1757
|
+
evidence: {
|
|
1758
|
+
runIds: outlierRuns.map((run2) => run2.id).sort(),
|
|
1759
|
+
sourceKinds: Array.from(new Set(outlierRuns.map((run2) => run2.sourceKind))).sort()
|
|
1760
|
+
},
|
|
1576
1761
|
details: {
|
|
1577
1762
|
workflow,
|
|
1578
1763
|
averageInputTokens: round4(average),
|
|
@@ -1598,6 +1783,12 @@ function buildFindings(runs) {
|
|
|
1598
1783
|
scopeId: workflow,
|
|
1599
1784
|
scopeLabel: workflow,
|
|
1600
1785
|
costImpactUsd: round4(idleCost),
|
|
1786
|
+
signalSource: "observed",
|
|
1787
|
+
ruleId: IDLE_SPEND_RULE_ID,
|
|
1788
|
+
evidence: {
|
|
1789
|
+
runIds: idleRuns.map((run2) => run2.id).sort(),
|
|
1790
|
+
sourceKinds: Array.from(new Set(idleRuns.map((run2) => run2.sourceKind))).sort()
|
|
1791
|
+
},
|
|
1601
1792
|
details: {
|
|
1602
1793
|
workflow
|
|
1603
1794
|
}
|
|
@@ -1620,6 +1811,13 @@ function buildFindings(runs) {
|
|
|
1620
1811
|
scopeId: workflow,
|
|
1621
1812
|
scopeLabel: workflow,
|
|
1622
1813
|
costImpactUsd: round4(spend * 0.3),
|
|
1814
|
+
signalSource: "observed",
|
|
1815
|
+
ruleId: CANDIDATE_DOWNGRADE_RULE_ID,
|
|
1816
|
+
evidence: {
|
|
1817
|
+
callIds: downgradeCalls.map((call) => call.id).sort(),
|
|
1818
|
+
runIds: Array.from(new Set(downgradeCalls.map((call) => call.runId))).sort(),
|
|
1819
|
+
sourceKinds: uniqueSourceKinds(downgradeCalls, runs)
|
|
1820
|
+
},
|
|
1623
1821
|
details: {
|
|
1624
1822
|
workflow,
|
|
1625
1823
|
expensiveCallCount: downgradeCalls.length,
|
|
@@ -1781,7 +1979,7 @@ var templatesByKind = {
|
|
|
1781
1979
|
severity: "high",
|
|
1782
1980
|
effort: "low",
|
|
1783
1981
|
titleFn: (finding) => `Reduce retry waste in ${formatScopeLabel(finding)}`,
|
|
1784
|
-
summaryFn: (finding) => `${finding.summary} This is confirmed retry overhead, so it is a fix-now issue rather than an experiment.`,
|
|
1982
|
+
summaryFn: (finding) => finding.signalSource === "observed" ? `${finding.summary} This is confirmed retry overhead, so it is a fix-now issue rather than an experiment.` : `${finding.summary} Treat this as likely retry overhead and inspect the retry wrapper before classifying the full amount as proven waste.`,
|
|
1785
1983
|
whereToChangeFn: (finding) => `Reduce retries or add exponential backoff in the retry wrapper for ${formatScopeLabel(finding)}.`,
|
|
1786
1984
|
validationPlanFn: () => "Ship the change, then rerun `xerg audit --compare --push` against the same source. Retry waste should drop materially on the next audit.",
|
|
1787
1985
|
actionsFn: () => [
|
|
@@ -2128,6 +2326,7 @@ function buildAuditSummary(input) {
|
|
|
2128
2326
|
structuralWasteRate: Number(
|
|
2129
2327
|
(totalSpendUsd === 0 ? 0 : wasteSpendUsd / totalSpendUsd).toFixed(4)
|
|
2130
2328
|
),
|
|
2329
|
+
wasteBySignalSource: buildWasteBySignalSource(input.findings),
|
|
2131
2330
|
wasteByKind: buildTaxonomyBuckets(input.findings, "waste"),
|
|
2132
2331
|
opportunityByKind: buildTaxonomyBuckets(input.findings, "opportunity"),
|
|
2133
2332
|
spendByWorkflow: buildBreakdown(
|
|
@@ -3445,9 +3644,18 @@ function formatUsdDelta(value) {
|
|
|
3445
3644
|
const sign = value > 0 ? "+" : "";
|
|
3446
3645
|
return `${sign}${formatUsd(value)}`;
|
|
3447
3646
|
}
|
|
3647
|
+
function formatUsdRate(value) {
|
|
3648
|
+
return formatUsd(value);
|
|
3649
|
+
}
|
|
3448
3650
|
function isCursorUsageSummary(summary) {
|
|
3449
3651
|
return summary.sourceFiles.some((source) => source.kind === "cursor-usage-csv");
|
|
3450
3652
|
}
|
|
3653
|
+
function divideOrZero(numerator, denominator) {
|
|
3654
|
+
return denominator === 0 ? 0 : numerator / denominator;
|
|
3655
|
+
}
|
|
3656
|
+
function formatInferredShare(value) {
|
|
3657
|
+
return value === null || value === void 0 ? "unavailable" : formatPercent(value);
|
|
3658
|
+
}
|
|
3451
3659
|
function topRows(rows, limit = 5) {
|
|
3452
3660
|
return rows.slice(0, limit).map((row) => {
|
|
3453
3661
|
return `- ${row.key}: ${formatUsd(row.spendUsd)} (${formatPercent(row.observedShare)} observed)`;
|
|
@@ -3532,6 +3740,35 @@ function renderFindingChange(change, state) {
|
|
|
3532
3740
|
}
|
|
3533
3741
|
return `- New: ${change.title} (${formatUsd(change.currentCostImpactUsd ?? 0)})`;
|
|
3534
3742
|
}
|
|
3743
|
+
function renderCompareCoreRows(summary) {
|
|
3744
|
+
if (!summary.comparison) {
|
|
3745
|
+
return [];
|
|
3746
|
+
}
|
|
3747
|
+
const comparison = summary.comparison;
|
|
3748
|
+
const baselineWastePerRun = divideOrZero(
|
|
3749
|
+
comparison.baselineWasteSpendUsd,
|
|
3750
|
+
comparison.baselineRunCount
|
|
3751
|
+
);
|
|
3752
|
+
const currentWastePerRun = divideOrZero(summary.wasteSpendUsd, summary.runCount);
|
|
3753
|
+
const baselineWastePer1kCalls = divideOrZero(
|
|
3754
|
+
comparison.baselineWasteSpendUsd,
|
|
3755
|
+
comparison.baselineCallCount / 1e3
|
|
3756
|
+
);
|
|
3757
|
+
const currentWastePer1kCalls = divideOrZero(summary.wasteSpendUsd, summary.callCount / 1e3);
|
|
3758
|
+
return [
|
|
3759
|
+
"## Before / after",
|
|
3760
|
+
`Compared against ${comparison.baselineGeneratedAt}`,
|
|
3761
|
+
`- Waste rate: ${formatPercent(comparison.baselineStructuralWasteRate)} -> ${formatPercent(summary.structuralWasteRate)} (${formatPercentDelta(comparison.deltaStructuralWasteRate)})`,
|
|
3762
|
+
`- Waste per run: ${formatUsdRate(baselineWastePerRun)} -> ${formatUsdRate(currentWastePerRun)} (${formatUsdDelta(currentWastePerRun - baselineWastePerRun)})`,
|
|
3763
|
+
`- Waste per 1k calls: ${formatUsdRate(baselineWastePer1kCalls)} -> ${formatUsdRate(currentWastePer1kCalls)} (${formatUsdDelta(currentWastePer1kCalls - baselineWastePer1kCalls)})`,
|
|
3764
|
+
`- Inferred waste share: ${formatInferredShare(comparison.baselineWasteBySignalSource?.inferredShare)} -> ${formatInferredShare(summary.wasteBySignalSource?.inferredShare)}`,
|
|
3765
|
+
"- CPO: unavailable (no outcome signal)",
|
|
3766
|
+
`- Total spend (workload-dependent): ${formatUsd(comparison.baselineTotalSpendUsd)} -> ${formatUsd(summary.totalSpendUsd)} (${formatUsdDelta(comparison.deltaTotalSpendUsd)})`,
|
|
3767
|
+
`- Structural waste (workload-dependent): ${formatUsd(comparison.baselineWasteSpendUsd)} -> ${formatUsd(summary.wasteSpendUsd)} (${formatUsdDelta(comparison.deltaWasteSpendUsd)})`,
|
|
3768
|
+
`- Runs analyzed: ${comparison.baselineRunCount} -> ${summary.runCount} (${comparison.deltaRunCount > 0 ? "+" : ""}${comparison.deltaRunCount})`,
|
|
3769
|
+
`- Model calls: ${comparison.baselineCallCount} -> ${summary.callCount} (${comparison.deltaCallCount > 0 ? "+" : ""}${comparison.deltaCallCount})`
|
|
3770
|
+
];
|
|
3771
|
+
}
|
|
3535
3772
|
function renderCompareBlock(summary) {
|
|
3536
3773
|
if (!summary.comparison) {
|
|
3537
3774
|
return [];
|
|
@@ -3552,13 +3789,7 @@ function renderCompareBlock(summary) {
|
|
|
3552
3789
|
)
|
|
3553
3790
|
].slice(0, 5);
|
|
3554
3791
|
return [
|
|
3555
|
-
|
|
3556
|
-
`Compared against ${comparison.baselineGeneratedAt}`,
|
|
3557
|
-
`- Total spend: ${formatUsd(comparison.baselineTotalSpendUsd)} -> ${formatUsd(summary.totalSpendUsd)} (${formatUsdDelta(comparison.deltaTotalSpendUsd)})`,
|
|
3558
|
-
`- Structural waste: ${formatUsd(comparison.baselineWasteSpendUsd)} -> ${formatUsd(summary.wasteSpendUsd)} (${formatUsdDelta(comparison.deltaWasteSpendUsd)})`,
|
|
3559
|
-
`- Waste rate: ${formatPercent(comparison.baselineStructuralWasteRate)} -> ${formatPercent(summary.structuralWasteRate)} (${formatPercentDelta(comparison.deltaStructuralWasteRate)})`,
|
|
3560
|
-
`- Runs analyzed: ${comparison.baselineRunCount} -> ${summary.runCount} (${comparison.deltaRunCount > 0 ? "+" : ""}${comparison.deltaRunCount})`,
|
|
3561
|
-
`- Model calls: ${comparison.baselineCallCount} -> ${summary.callCount} (${comparison.deltaCallCount > 0 ? "+" : ""}${comparison.deltaCallCount})`,
|
|
3792
|
+
...renderCompareCoreRows(summary),
|
|
3562
3793
|
biggestImprovement ? `- Biggest improvement: ${describeSpendDelta(biggestImprovement)}` : "- Biggest improvement: none detected",
|
|
3563
3794
|
biggestRegression ? `- Biggest regression: ${describeSpendDelta(biggestRegression)}` : "- Biggest regression: none detected",
|
|
3564
3795
|
firstWorkflowToInspect ? `- First workflow to inspect now: ${firstWorkflowToInspect}` : "- First workflow to inspect now: no workflow delta available",
|
|
@@ -3682,10 +3913,7 @@ function renderCursorCompareBlock(summary) {
|
|
|
3682
3913
|
const modeSwing = comparison.workflowDeltas[0];
|
|
3683
3914
|
const modelSwing = comparison.modelDeltas[0];
|
|
3684
3915
|
return [
|
|
3685
|
-
|
|
3686
|
-
`Compared against ${comparison.baselineGeneratedAt}`,
|
|
3687
|
-
`- Total spend: ${formatUsd(comparison.baselineTotalSpendUsd)} -> ${formatUsd(summary.totalSpendUsd)} (${formatUsdDelta(comparison.deltaTotalSpendUsd)})`,
|
|
3688
|
-
`- Rows analyzed: ${formatCount(comparison.baselineRunCount)} -> ${formatCount(summary.runCount)} (${comparison.deltaRunCount > 0 ? "+" : ""}${comparison.deltaRunCount})`,
|
|
3916
|
+
...renderCompareCoreRows(summary),
|
|
3689
3917
|
`- Usage rows with pricing: ${formatCount(summary.pricingCoverage?.pricedCallCount ?? 0)}`,
|
|
3690
3918
|
modeSwing ? `- Mode swing to inspect: ${describeSpendDelta(modeSwing)}` : "- Mode swing to inspect: none",
|
|
3691
3919
|
modelSwing ? `- Model swing to inspect: ${describeSpendDelta(modelSwing)}` : "- Model swing to inspect: none"
|
|
@@ -3779,7 +4007,7 @@ function renderCursorMarkdownSummary(summary) {
|
|
|
3779
4007
|
"",
|
|
3780
4008
|
"## Findings",
|
|
3781
4009
|
...summary.findings.slice(0, 10).map((finding) => {
|
|
3782
|
-
return `- **${finding.title}** (${finding.classification}, ${finding.confidence})
|
|
4010
|
+
return `- **${finding.title}** (${finding.classification}, ${finding.confidence}). ${finding.summary} Estimated impact: ${formatUsd(finding.costImpactUsd)}.`;
|
|
3783
4011
|
}),
|
|
3784
4012
|
"",
|
|
3785
4013
|
...renderActionQueue(summary),
|
|
@@ -3862,21 +4090,13 @@ function renderMarkdownSummary(summary) {
|
|
|
3862
4090
|
"",
|
|
3863
4091
|
"## Findings",
|
|
3864
4092
|
...summary.findings.slice(0, 10).map((finding) => {
|
|
3865
|
-
return `- **${finding.title}** (${finding.classification}, ${finding.confidence})
|
|
4093
|
+
return `- **${finding.title}** (${finding.classification}, ${finding.confidence}). ${finding.summary} Estimated impact: ${formatUsd(finding.costImpactUsd)}.`;
|
|
3866
4094
|
}),
|
|
3867
4095
|
"",
|
|
3868
4096
|
...renderActionQueue(summary)
|
|
3869
4097
|
];
|
|
3870
4098
|
if (summary.comparison) {
|
|
3871
|
-
|
|
3872
|
-
lines.push(
|
|
3873
|
-
"",
|
|
3874
|
-
"## Before / after",
|
|
3875
|
-
`- Compared against: ${comparison.baselineGeneratedAt}`,
|
|
3876
|
-
`- Total spend: ${formatUsd(comparison.baselineTotalSpendUsd)} -> ${formatUsd(summary.totalSpendUsd)} (${formatUsdDelta(comparison.deltaTotalSpendUsd)})`,
|
|
3877
|
-
`- Structural waste: ${formatUsd(comparison.baselineWasteSpendUsd)} -> ${formatUsd(summary.wasteSpendUsd)} (${formatUsdDelta(comparison.deltaWasteSpendUsd)})`,
|
|
3878
|
-
`- Waste rate: ${formatPercent(comparison.baselineStructuralWasteRate)} -> ${formatPercent(summary.structuralWasteRate)} (${formatPercentDelta(comparison.deltaStructuralWasteRate)})`
|
|
3879
|
-
);
|
|
4099
|
+
lines.push("", ...renderCompareBlock(summary));
|
|
3880
4100
|
}
|
|
3881
4101
|
return lines.join("\n");
|
|
3882
4102
|
}
|
|
@@ -5989,6 +6209,7 @@ function renderRailwayDoctorReport(report) {
|
|
|
5989
6209
|
import { existsSync as existsSync2, mkdirSync as mkdirSync6, readFileSync as readFileSync9, writeFileSync as writeFileSync2 } from "fs";
|
|
5990
6210
|
import { dirname as dirname3, join as join8 } from "path";
|
|
5991
6211
|
var HOSTED_MCP_URL = "https://mcp.xerg.ai/mcp";
|
|
6212
|
+
var MCP_SERVER_NAME = "xerg";
|
|
5992
6213
|
async function runMcpSetupCommand() {
|
|
5993
6214
|
await runMcpSetupFlow();
|
|
5994
6215
|
}
|
|
@@ -6031,6 +6252,11 @@ async function runMcpSetupFlow() {
|
|
|
6031
6252
|
value: "claude-code",
|
|
6032
6253
|
description: "Project-scoped Claude Code MCP config"
|
|
6033
6254
|
},
|
|
6255
|
+
{
|
|
6256
|
+
name: "Codex",
|
|
6257
|
+
value: "codex",
|
|
6258
|
+
description: "Codex config.toml snippet"
|
|
6259
|
+
},
|
|
6034
6260
|
{
|
|
6035
6261
|
name: "Other",
|
|
6036
6262
|
value: "other",
|
|
@@ -6042,6 +6268,14 @@ async function runMcpSetupFlow() {
|
|
|
6042
6268
|
await handleCursorSetup(snippet, config);
|
|
6043
6269
|
return;
|
|
6044
6270
|
}
|
|
6271
|
+
if (client === "codex") {
|
|
6272
|
+
process.stdout.write(`${buildCodexMcpConfig(config)}
|
|
6273
|
+
`);
|
|
6274
|
+
process.stderr.write(
|
|
6275
|
+
"Add this to `~/.codex/config.toml`, then restart Codex so it loads the Xerg MCP tools.\n"
|
|
6276
|
+
);
|
|
6277
|
+
return;
|
|
6278
|
+
}
|
|
6045
6279
|
process.stdout.write(`${snippet}
|
|
6046
6280
|
`);
|
|
6047
6281
|
if (client === "claude-code") {
|
|
@@ -6079,7 +6313,7 @@ async function handleCursorSetup(snippet, config) {
|
|
|
6079
6313
|
function buildHostedMcpConfig(config) {
|
|
6080
6314
|
return {
|
|
6081
6315
|
mcpServers: {
|
|
6082
|
-
|
|
6316
|
+
[MCP_SERVER_NAME]: {
|
|
6083
6317
|
type: "http",
|
|
6084
6318
|
url: HOSTED_MCP_URL,
|
|
6085
6319
|
headers: {
|
|
@@ -6089,6 +6323,19 @@ function buildHostedMcpConfig(config) {
|
|
|
6089
6323
|
}
|
|
6090
6324
|
};
|
|
6091
6325
|
}
|
|
6326
|
+
function buildCodexMcpConfig(config) {
|
|
6327
|
+
return [
|
|
6328
|
+
`[mcp_servers.${MCP_SERVER_NAME}]`,
|
|
6329
|
+
"enabled = true",
|
|
6330
|
+
`url = ${tomlString(HOSTED_MCP_URL)}`,
|
|
6331
|
+
"",
|
|
6332
|
+
`[mcp_servers.${MCP_SERVER_NAME}.http_headers]`,
|
|
6333
|
+
`Authorization = ${tomlString(`Bearer ${config.apiKey}`)}`
|
|
6334
|
+
].join("\n");
|
|
6335
|
+
}
|
|
6336
|
+
function tomlString(value) {
|
|
6337
|
+
return JSON.stringify(value);
|
|
6338
|
+
}
|
|
6092
6339
|
function writeCursorConfig(filePath, config) {
|
|
6093
6340
|
mkdirSync6(dirname3(filePath), { recursive: true });
|
|
6094
6341
|
let parsed = {};
|
|
@@ -6425,7 +6672,7 @@ Notes:
|
|
|
6425
6672
|
function renderMcpSetupHelp(commandPrefix) {
|
|
6426
6673
|
return `${formatCommand("mcp-setup", commandPrefix)}
|
|
6427
6674
|
|
|
6428
|
-
Generate hosted MCP client configuration for Cursor, Claude Code, or another MCP client.
|
|
6675
|
+
Generate hosted MCP client configuration for Cursor, Claude Code, Codex, or another MCP client.
|
|
6429
6676
|
|
|
6430
6677
|
Usage:
|
|
6431
6678
|
${formatCommand("mcp-setup", commandPrefix)}
|
|
@@ -6434,6 +6681,7 @@ Notes:
|
|
|
6434
6681
|
- Interactive in v1 because client selection is prompt-driven
|
|
6435
6682
|
- Uses the hosted MCP endpoint at https://mcp.xerg.ai/mcp
|
|
6436
6683
|
- Can write a project-scoped Cursor config when .cursor/ already exists
|
|
6684
|
+
- Prints a Codex config.toml snippet when Codex is selected
|
|
6437
6685
|
- Local audits and compare stay available even if you skip hosted MCP setup
|
|
6438
6686
|
|
|
6439
6687
|
-h, --help Show help
|