@xerg/cli 0.5.1 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -5
- package/dist/index.js +270 -50
- package/dist/index.js.map +1 -1
- package/package.json +5 -4
- package/skills/xerg/SKILL.md +13 -2
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# xerg
|
|
2
2
|
|
|
3
|
-
Audit OpenClaw and
|
|
3
|
+
Audit OpenClaw, Hermes, and Cursor spend in dollars, surface provenance-aware waste findings, compare fixes, and optionally connect hosted follow-up after the first local result.
|
|
4
4
|
|
|
5
5
|
Xerg runs locally by default. Local audits and `--compare` are free. No account is required for local value, and no data leaves your machine unless you explicitly push results to Xerg Cloud.
|
|
6
6
|
|
|
@@ -16,7 +16,7 @@ npx @xerg/cli init
|
|
|
16
16
|
|
|
17
17
|
- detects local OpenClaw or Hermes data
|
|
18
18
|
- runs a first audit and stores the local snapshot
|
|
19
|
-
- prints the
|
|
19
|
+
- prints the provenance-aware terminal summary
|
|
20
20
|
- offers optional hosted follow-up with `connect` and `mcp-setup`
|
|
21
21
|
|
|
22
22
|
Prefer a global install?
|
|
@@ -34,6 +34,7 @@ If you already know what you want, skip `init` and use the direct flows:
|
|
|
34
34
|
npx @xerg/cli doctor
|
|
35
35
|
npx @xerg/cli audit
|
|
36
36
|
npx @xerg/cli audit --compare
|
|
37
|
+
npx @xerg/cli audit --cursor-usage-csv ./cursor-usage.csv
|
|
37
38
|
npx @xerg/cli audit --json
|
|
38
39
|
npx @xerg/cli audit --markdown
|
|
39
40
|
```
|
|
@@ -109,7 +110,7 @@ xerg push
|
|
|
109
110
|
|
|
110
111
|
## Works where your agent data lives
|
|
111
112
|
|
|
112
|
-
- Local machine:
|
|
113
|
+
- Local machine: OpenClaw, Hermes, and explicit Cursor usage CSV exports
|
|
113
114
|
- VPS or remote server: OpenClaw only in this phase
|
|
114
115
|
- If OpenClaw runs remotely, you can audit it from your local machine with `xerg audit --remote user@host`
|
|
115
116
|
- Or point Xerg at exported files directly with flags
|
|
@@ -135,6 +136,7 @@ xerg audit --runtime openclaw --log-file /path/to/openclaw.log
|
|
|
135
136
|
xerg audit --runtime openclaw --sessions-dir /path/to/sessions
|
|
136
137
|
xerg audit --runtime hermes --log-file ~/.hermes/logs/agent.log
|
|
137
138
|
xerg audit --runtime hermes --sessions-dir ~/.hermes/sessions
|
|
139
|
+
xerg audit --cursor-usage-csv ./cursor-usage.csv
|
|
138
140
|
```
|
|
139
141
|
|
|
140
142
|
If only one supported local runtime is present, Xerg auto-selects it. If both OpenClaw and Hermes are present locally, rerun with `--runtime openclaw` or `--runtime hermes`.
|
|
@@ -191,10 +193,13 @@ Example `~/.xerg/config.json`:
|
|
|
191
193
|
- Total spend by workflow and model, in dollars
|
|
192
194
|
- Daily spend and confirmed waste rollups in UTC
|
|
193
195
|
- Observed vs. estimated cost (always labeled)
|
|
194
|
-
- Confirmed waste: retry
|
|
196
|
+
- Confirmed waste: retry and loop findings when the required source structure is present
|
|
197
|
+
- Provenance-aware waste rollups by observed, inferred, declared, or unknown signal source
|
|
195
198
|
- Savings opportunities: context bloat, downgrade candidates, idle, max mode concentration where applicable
|
|
196
199
|
- Ranked recommendations with where-to-change guidance and compare validation steps
|
|
197
|
-
- Before/after
|
|
200
|
+
- Before/after normalized rates on re-audit, including waste per run and waste per 1k calls
|
|
201
|
+
|
|
202
|
+
Local JSON findings may include `signalSource`, `ruleId`, and evidence references so agents can distinguish observed signals from inferred or legacy unknown provenance. These local provenance fields are not part of the pushed v2 wire payload.
|
|
198
203
|
|
|
199
204
|
## Privacy
|
|
200
205
|
|
package/dist/index.js
CHANGED
|
@@ -1165,6 +1165,36 @@ function buildTaxonomyBuckets(findings, classification) {
|
|
|
1165
1165
|
}
|
|
1166
1166
|
return Array.from(buckets.values()).sort((left, right) => right.spendUsd - left.spendUsd);
|
|
1167
1167
|
}
|
|
1168
|
+
function buildWasteBySignalSource(findings) {
|
|
1169
|
+
const rollup = {
|
|
1170
|
+
observedUsd: 0,
|
|
1171
|
+
inferredUsd: 0,
|
|
1172
|
+
declaredUsd: 0,
|
|
1173
|
+
unknownUsd: 0,
|
|
1174
|
+
inferredShare: null
|
|
1175
|
+
};
|
|
1176
|
+
for (const finding of findings) {
|
|
1177
|
+
if (finding.classification !== "waste") {
|
|
1178
|
+
continue;
|
|
1179
|
+
}
|
|
1180
|
+
if (finding.signalSource === "observed") {
|
|
1181
|
+
rollup.observedUsd = round2(rollup.observedUsd + finding.costImpactUsd);
|
|
1182
|
+
continue;
|
|
1183
|
+
}
|
|
1184
|
+
if (finding.signalSource === "inferred") {
|
|
1185
|
+
rollup.inferredUsd = round2(rollup.inferredUsd + finding.costImpactUsd);
|
|
1186
|
+
continue;
|
|
1187
|
+
}
|
|
1188
|
+
if (finding.signalSource === "declared") {
|
|
1189
|
+
rollup.declaredUsd = round2(rollup.declaredUsd + finding.costImpactUsd);
|
|
1190
|
+
continue;
|
|
1191
|
+
}
|
|
1192
|
+
rollup.unknownUsd = round2(rollup.unknownUsd + finding.costImpactUsd);
|
|
1193
|
+
}
|
|
1194
|
+
const knownTotal = rollup.observedUsd + rollup.inferredUsd + rollup.declaredUsd;
|
|
1195
|
+
rollup.inferredShare = rollup.unknownUsd > 0 ? null : Number((knownTotal === 0 ? 0 : rollup.inferredUsd / knownTotal).toFixed(4));
|
|
1196
|
+
return rollup;
|
|
1197
|
+
}
|
|
1168
1198
|
function toSpendMap(rows) {
|
|
1169
1199
|
return new Map(rows.map((row) => [row.key, row.spendUsd]));
|
|
1170
1200
|
}
|
|
@@ -1280,6 +1310,7 @@ function hydrateAuditSummary(summary) {
|
|
|
1280
1310
|
opportunityByKind: summary.opportunityByKind?.length > 0 ? summary.opportunityByKind : buildTaxonomyBuckets(summary.findings, "opportunity"),
|
|
1281
1311
|
spendByDay: summary.spendByDay ?? [],
|
|
1282
1312
|
wasteByDay: summary.wasteByDay ?? [],
|
|
1313
|
+
wasteBySignalSource: summary.wasteBySignalSource ?? buildWasteBySignalSource(summary.findings),
|
|
1283
1314
|
recommendations: summary.recommendations ?? [],
|
|
1284
1315
|
notes: summary.notes ?? [],
|
|
1285
1316
|
pricingCoverage: summary.pricingCoverage ?? null,
|
|
@@ -1300,6 +1331,7 @@ function buildAuditComparison(current, baseline) {
|
|
|
1300
1331
|
baselineWasteSpendUsd: baseline.wasteSpendUsd,
|
|
1301
1332
|
baselineOpportunitySpendUsd: baseline.opportunitySpendUsd,
|
|
1302
1333
|
baselineStructuralWasteRate: baseline.structuralWasteRate,
|
|
1334
|
+
baselineWasteBySignalSource: baseline.wasteBySignalSource ?? buildWasteBySignalSource(baseline.findings),
|
|
1303
1335
|
deltaTotalSpendUsd: round2(current.totalSpendUsd - baseline.totalSpendUsd),
|
|
1304
1336
|
deltaObservedSpendUsd: round2(current.observedSpendUsd - baseline.observedSpendUsd),
|
|
1305
1337
|
deltaEstimatedSpendUsd: round2(current.estimatedSpendUsd - baseline.estimatedSpendUsd),
|
|
@@ -1350,6 +1382,8 @@ function readLatestComparableAuditSummary(input) {
|
|
|
1350
1382
|
}
|
|
1351
1383
|
|
|
1352
1384
|
// ../core/src/findings/cursor.ts
|
|
1385
|
+
var CACHE_CARRYOVER_RULE_ID = "cursor_cache_ratio_v1";
|
|
1386
|
+
var MAX_MODE_CONCENTRATION_RULE_ID = "cursor_max_mode_concentration_v1";
|
|
1353
1387
|
function round3(value) {
|
|
1354
1388
|
return Number(value.toFixed(6));
|
|
1355
1389
|
}
|
|
@@ -1419,6 +1453,13 @@ function buildCursorUsageFindings(runs) {
|
|
|
1419
1453
|
scopeId: "all",
|
|
1420
1454
|
scopeLabel: "Cursor usage",
|
|
1421
1455
|
costImpactUsd: cacheImpactUsd,
|
|
1456
|
+
signalSource: "observed",
|
|
1457
|
+
ruleId: CACHE_CARRYOVER_RULE_ID,
|
|
1458
|
+
evidence: {
|
|
1459
|
+
callIds: cacheAwareCalls.map((call) => call.id).sort(),
|
|
1460
|
+
runIds: Array.from(new Set(cacheAwareCalls.map((call) => call.runId))).sort(),
|
|
1461
|
+
sourceKinds: ["cursor-usage-csv"]
|
|
1462
|
+
},
|
|
1422
1463
|
details: {
|
|
1423
1464
|
cacheReadShare: round3(cacheReadShare),
|
|
1424
1465
|
cacheCoverageShare: round3(cacheCoverageShare),
|
|
@@ -1447,6 +1488,13 @@ function buildCursorUsageFindings(runs) {
|
|
|
1447
1488
|
scopeId: "all",
|
|
1448
1489
|
scopeLabel: "Cursor usage",
|
|
1449
1490
|
costImpactUsd: round3(maxModeSpendUsd * 0.2),
|
|
1491
|
+
signalSource: "observed",
|
|
1492
|
+
ruleId: MAX_MODE_CONCENTRATION_RULE_ID,
|
|
1493
|
+
evidence: {
|
|
1494
|
+
callIds: maxModeCalls.map((call) => call.id).sort(),
|
|
1495
|
+
runIds: Array.from(new Set(maxModeCalls.map((call) => call.runId))).sort(),
|
|
1496
|
+
sourceKinds: ["cursor-usage-csv"]
|
|
1497
|
+
},
|
|
1450
1498
|
details: {
|
|
1451
1499
|
maxModeSpendUsd: round3(maxModeSpendUsd),
|
|
1452
1500
|
maxModeSpendShare: round3(maxModeSpendShare),
|
|
@@ -1468,55 +1516,178 @@ function buildCursorUsageFindings(runs) {
|
|
|
1468
1516
|
}
|
|
1469
1517
|
|
|
1470
1518
|
// ../core/src/findings/engine.ts
|
|
1519
|
+
var RETRY_OBSERVED_RULE_ID = "retry_explicit_failed_attempt_v1";
|
|
1520
|
+
var RETRY_INFERRED_RULE_ID = "retry_later_attempt_proxy_v1";
|
|
1521
|
+
var LOOP_RULE_ID = "loop_iteration_threshold_v1";
|
|
1522
|
+
var CONTEXT_OUTLIER_RULE_ID = "context_outlier_tokens_v1";
|
|
1523
|
+
var IDLE_SPEND_RULE_ID = "idle_workflow_name_v1";
|
|
1524
|
+
var CANDIDATE_DOWNGRADE_RULE_ID = "candidate_downgrade_task_model_v1";
|
|
1525
|
+
var LOOP_WASTE_START_ITERATION = 6;
|
|
1526
|
+
var LOOP_FINDING_MIN_ITERATION = 7;
|
|
1471
1527
|
function createFinding2(input) {
|
|
1472
1528
|
return {
|
|
1473
1529
|
...input,
|
|
1474
1530
|
id: sha1(
|
|
1475
|
-
`${input.kind}:${input.scope}:${input.scopeId}:${input.title}:${input.costImpactUsd}:${input.summary}`
|
|
1531
|
+
`${input.kind}:${input.scope}:${input.scopeId}:${input.title}:${input.costImpactUsd}:${input.summary}:${input.signalSource ?? "unknown"}:${input.ruleId ?? "none"}`
|
|
1476
1532
|
)
|
|
1477
1533
|
};
|
|
1478
1534
|
}
|
|
1479
1535
|
function round4(value) {
|
|
1480
1536
|
return Number(value.toFixed(6));
|
|
1481
1537
|
}
|
|
1538
|
+
function isFailedOrAborted(call) {
|
|
1539
|
+
const status = (call.status ?? "").toLowerCase();
|
|
1540
|
+
return status.includes("error") || status.includes("fail") || status.includes("abort");
|
|
1541
|
+
}
|
|
1542
|
+
function hasExplicitRetrySignal(call) {
|
|
1543
|
+
return (call.attempt ?? 1) > 1 || call.retries > 0;
|
|
1544
|
+
}
|
|
1545
|
+
function toTimestampMs(call) {
|
|
1546
|
+
const timestamp = new Date(call.timestamp).getTime();
|
|
1547
|
+
return Number.isFinite(timestamp) ? timestamp : Number.POSITIVE_INFINITY;
|
|
1548
|
+
}
|
|
1549
|
+
function sortCallsByTime(calls) {
|
|
1550
|
+
return calls.map((call, index) => ({ call, index })).sort((left, right) => {
|
|
1551
|
+
const delta = toTimestampMs(left.call) - toTimestampMs(right.call);
|
|
1552
|
+
return delta === 0 ? left.index - right.index : delta;
|
|
1553
|
+
});
|
|
1554
|
+
}
|
|
1555
|
+
function canUseStructuralSignals(sourceKind) {
|
|
1556
|
+
return sourceKind === "gateway";
|
|
1557
|
+
}
|
|
1558
|
+
function hasLaterExplicitRetryAttempt(sortedCalls, currentIndex) {
|
|
1559
|
+
const current = sortedCalls[currentIndex]?.call;
|
|
1560
|
+
if (!current) {
|
|
1561
|
+
return false;
|
|
1562
|
+
}
|
|
1563
|
+
return sortedCalls.slice(currentIndex + 1).some(({ call }) => {
|
|
1564
|
+
if (!hasExplicitRetrySignal(call)) {
|
|
1565
|
+
return false;
|
|
1566
|
+
}
|
|
1567
|
+
if (current.attempt !== null && call.attempt !== null) {
|
|
1568
|
+
return call.attempt > current.attempt;
|
|
1569
|
+
}
|
|
1570
|
+
return true;
|
|
1571
|
+
});
|
|
1572
|
+
}
|
|
1573
|
+
function uniqueSourceKinds(calls, runs) {
|
|
1574
|
+
const runById = new Map(runs.map((run2) => [run2.id, run2]));
|
|
1575
|
+
return Array.from(
|
|
1576
|
+
new Set(
|
|
1577
|
+
calls.map((call) => runById.get(call.runId)?.sourceKind).filter((sourceKind) => Boolean(sourceKind))
|
|
1578
|
+
)
|
|
1579
|
+
).sort();
|
|
1580
|
+
}
|
|
1581
|
+
function buildRetryFinding(input) {
|
|
1582
|
+
const retryCost = input.calls.reduce((sum, call) => sum + call.costUsd, 0);
|
|
1583
|
+
const observed = input.signalSource === "observed";
|
|
1584
|
+
return createFinding2({
|
|
1585
|
+
classification: "waste",
|
|
1586
|
+
confidence: observed ? "high" : "medium",
|
|
1587
|
+
kind: "retry-waste",
|
|
1588
|
+
title: observed ? "Retry waste is consuming measurable spend" : "Retry waste is likely present from later retry attempts",
|
|
1589
|
+
summary: observed ? `${input.calls.length} failed or aborted call${input.calls.length === 1 ? "" : "s"} were followed by explicit retry attempts, making their spend retry overhead.` : `${input.calls.length} later retry attempt${input.calls.length === 1 ? "" : "s"} were counted as proxy retry overhead because the earlier failed attempt was not separately countable.`,
|
|
1590
|
+
scope: "global",
|
|
1591
|
+
scopeId: "all",
|
|
1592
|
+
scopeLabel: "workspace",
|
|
1593
|
+
costImpactUsd: round4(retryCost),
|
|
1594
|
+
signalSource: input.signalSource,
|
|
1595
|
+
ruleId: input.ruleId,
|
|
1596
|
+
evidence: {
|
|
1597
|
+
callIds: input.calls.map((call) => call.id).sort(),
|
|
1598
|
+
runIds: Array.from(new Set(input.calls.map((call) => call.runId))).sort(),
|
|
1599
|
+
sourceKinds: uniqueSourceKinds(input.calls, input.runs)
|
|
1600
|
+
},
|
|
1601
|
+
details: {
|
|
1602
|
+
retryCallCount: input.calls.length
|
|
1603
|
+
}
|
|
1604
|
+
});
|
|
1605
|
+
}
|
|
1482
1606
|
function buildFindings(runs) {
|
|
1483
1607
|
const findings = [];
|
|
1484
1608
|
const wasteAttributions = [];
|
|
1485
|
-
const
|
|
1486
|
-
const
|
|
1487
|
-
|
|
1488
|
-
|
|
1489
|
-
|
|
1490
|
-
|
|
1491
|
-
|
|
1609
|
+
const observedRetryCalls = [];
|
|
1610
|
+
const inferredRetryCalls = [];
|
|
1611
|
+
const retryCoveredCallIds = /* @__PURE__ */ new Set();
|
|
1612
|
+
for (const run2 of runs.filter((candidate) => canUseStructuralSignals(candidate.sourceKind))) {
|
|
1613
|
+
const sortedCalls = sortCallsByTime(run2.calls);
|
|
1614
|
+
sortedCalls.forEach(({ call }, index) => {
|
|
1615
|
+
if (!isFailedOrAborted(call)) {
|
|
1616
|
+
return;
|
|
1617
|
+
}
|
|
1618
|
+
if (!hasExplicitRetrySignal(call) && !hasLaterExplicitRetryAttempt(sortedCalls, index)) {
|
|
1619
|
+
return;
|
|
1620
|
+
}
|
|
1621
|
+
if (!hasLaterExplicitRetryAttempt(sortedCalls, index)) {
|
|
1622
|
+
return;
|
|
1623
|
+
}
|
|
1624
|
+
observedRetryCalls.push(call);
|
|
1625
|
+
retryCoveredCallIds.add(call.id);
|
|
1626
|
+
const later = sortedCalls.slice(index + 1).find(({ call: laterCall }) => hasExplicitRetrySignal(laterCall));
|
|
1627
|
+
if (later) {
|
|
1628
|
+
retryCoveredCallIds.add(later.call.id);
|
|
1629
|
+
}
|
|
1630
|
+
});
|
|
1631
|
+
for (const { call } of sortedCalls) {
|
|
1632
|
+
if (!hasExplicitRetrySignal(call) || retryCoveredCallIds.has(call.id)) {
|
|
1633
|
+
continue;
|
|
1634
|
+
}
|
|
1635
|
+
const hasEarlierCountableFailure = sortedCalls.some(({ call: earlier }) => {
|
|
1636
|
+
if (earlier.id === call.id) {
|
|
1637
|
+
return false;
|
|
1638
|
+
}
|
|
1639
|
+
return toTimestampMs(earlier) < toTimestampMs(call) && isFailedOrAborted(earlier);
|
|
1640
|
+
});
|
|
1641
|
+
if (!hasEarlierCountableFailure) {
|
|
1642
|
+
inferredRetryCalls.push(call);
|
|
1643
|
+
retryCoveredCallIds.add(call.id);
|
|
1644
|
+
}
|
|
1645
|
+
}
|
|
1646
|
+
}
|
|
1647
|
+
if (observedRetryCalls.length > 0) {
|
|
1492
1648
|
wasteAttributions.push(
|
|
1493
|
-
...
|
|
1649
|
+
...observedRetryCalls.map((call) => ({
|
|
1494
1650
|
kind: "retry-waste",
|
|
1495
1651
|
timestamp: call.timestamp,
|
|
1496
1652
|
wasteUsd: call.costUsd
|
|
1497
1653
|
}))
|
|
1498
1654
|
);
|
|
1499
1655
|
findings.push(
|
|
1500
|
-
|
|
1501
|
-
|
|
1502
|
-
|
|
1656
|
+
buildRetryFinding({
|
|
1657
|
+
calls: observedRetryCalls,
|
|
1658
|
+
runs,
|
|
1659
|
+
signalSource: "observed",
|
|
1660
|
+
ruleId: RETRY_OBSERVED_RULE_ID
|
|
1661
|
+
})
|
|
1662
|
+
);
|
|
1663
|
+
}
|
|
1664
|
+
if (inferredRetryCalls.length > 0) {
|
|
1665
|
+
wasteAttributions.push(
|
|
1666
|
+
...inferredRetryCalls.map((call) => ({
|
|
1503
1667
|
kind: "retry-waste",
|
|
1504
|
-
|
|
1505
|
-
|
|
1506
|
-
|
|
1507
|
-
|
|
1508
|
-
|
|
1509
|
-
|
|
1510
|
-
|
|
1511
|
-
|
|
1512
|
-
|
|
1668
|
+
timestamp: call.timestamp,
|
|
1669
|
+
wasteUsd: call.costUsd
|
|
1670
|
+
}))
|
|
1671
|
+
);
|
|
1672
|
+
findings.push(
|
|
1673
|
+
buildRetryFinding({
|
|
1674
|
+
calls: inferredRetryCalls,
|
|
1675
|
+
runs,
|
|
1676
|
+
signalSource: "inferred",
|
|
1677
|
+
ruleId: RETRY_INFERRED_RULE_ID
|
|
1513
1678
|
})
|
|
1514
1679
|
);
|
|
1515
1680
|
}
|
|
1516
|
-
for (const run2 of runs) {
|
|
1517
|
-
const
|
|
1518
|
-
if (
|
|
1519
|
-
|
|
1681
|
+
for (const run2 of runs.filter((candidate) => canUseStructuralSignals(candidate.sourceKind))) {
|
|
1682
|
+
const iterations = run2.calls.map((call) => call.iteration).filter((iteration) => iteration !== null);
|
|
1683
|
+
if (iterations.length === 0) {
|
|
1684
|
+
continue;
|
|
1685
|
+
}
|
|
1686
|
+
const maxIteration = Math.max(...iterations);
|
|
1687
|
+
if (maxIteration >= LOOP_FINDING_MIN_ITERATION) {
|
|
1688
|
+
const loopCalls = run2.calls.filter(
|
|
1689
|
+
(call) => (call.iteration ?? 0) >= LOOP_WASTE_START_ITERATION
|
|
1690
|
+
);
|
|
1520
1691
|
const loopCost = loopCalls.reduce((sum, call) => sum + call.costUsd, 0);
|
|
1521
1692
|
wasteAttributions.push(
|
|
1522
1693
|
...loopCalls.map((call) => ({
|
|
@@ -1531,14 +1702,22 @@ function buildFindings(runs) {
|
|
|
1531
1702
|
confidence: "high",
|
|
1532
1703
|
kind: "loop-waste",
|
|
1533
1704
|
title: `Workflow "${run2.workflow}" ran beyond efficient loop bounds`,
|
|
1534
|
-
summary: `This run reached ${maxIteration} iterations. Xerg treats
|
|
1705
|
+
summary: `This run reached ${maxIteration} iterations. Xerg treats spend from iteration ${LOOP_WASTE_START_ITERATION} onward as loop waste.`,
|
|
1535
1706
|
scope: "run",
|
|
1536
1707
|
scopeId: run2.workflow,
|
|
1537
1708
|
scopeLabel: run2.workflow,
|
|
1538
1709
|
costImpactUsd: round4(loopCost),
|
|
1710
|
+
signalSource: "observed",
|
|
1711
|
+
ruleId: LOOP_RULE_ID,
|
|
1712
|
+
evidence: {
|
|
1713
|
+
callIds: loopCalls.map((call) => call.id).sort(),
|
|
1714
|
+
runIds: [run2.id],
|
|
1715
|
+
sourceKinds: [run2.sourceKind]
|
|
1716
|
+
},
|
|
1539
1717
|
details: {
|
|
1540
1718
|
workflow: run2.workflow,
|
|
1541
|
-
maxIteration
|
|
1719
|
+
maxIteration,
|
|
1720
|
+
thresholdIteration: LOOP_WASTE_START_ITERATION
|
|
1542
1721
|
}
|
|
1543
1722
|
})
|
|
1544
1723
|
);
|
|
@@ -1573,6 +1752,12 @@ function buildFindings(runs) {
|
|
|
1573
1752
|
scopeId: workflow,
|
|
1574
1753
|
scopeLabel: workflow,
|
|
1575
1754
|
costImpactUsd: round4(outlierCost),
|
|
1755
|
+
signalSource: "observed",
|
|
1756
|
+
ruleId: CONTEXT_OUTLIER_RULE_ID,
|
|
1757
|
+
evidence: {
|
|
1758
|
+
runIds: outlierRuns.map((run2) => run2.id).sort(),
|
|
1759
|
+
sourceKinds: Array.from(new Set(outlierRuns.map((run2) => run2.sourceKind))).sort()
|
|
1760
|
+
},
|
|
1576
1761
|
details: {
|
|
1577
1762
|
workflow,
|
|
1578
1763
|
averageInputTokens: round4(average),
|
|
@@ -1598,6 +1783,12 @@ function buildFindings(runs) {
|
|
|
1598
1783
|
scopeId: workflow,
|
|
1599
1784
|
scopeLabel: workflow,
|
|
1600
1785
|
costImpactUsd: round4(idleCost),
|
|
1786
|
+
signalSource: "observed",
|
|
1787
|
+
ruleId: IDLE_SPEND_RULE_ID,
|
|
1788
|
+
evidence: {
|
|
1789
|
+
runIds: idleRuns.map((run2) => run2.id).sort(),
|
|
1790
|
+
sourceKinds: Array.from(new Set(idleRuns.map((run2) => run2.sourceKind))).sort()
|
|
1791
|
+
},
|
|
1601
1792
|
details: {
|
|
1602
1793
|
workflow
|
|
1603
1794
|
}
|
|
@@ -1620,6 +1811,13 @@ function buildFindings(runs) {
|
|
|
1620
1811
|
scopeId: workflow,
|
|
1621
1812
|
scopeLabel: workflow,
|
|
1622
1813
|
costImpactUsd: round4(spend * 0.3),
|
|
1814
|
+
signalSource: "observed",
|
|
1815
|
+
ruleId: CANDIDATE_DOWNGRADE_RULE_ID,
|
|
1816
|
+
evidence: {
|
|
1817
|
+
callIds: downgradeCalls.map((call) => call.id).sort(),
|
|
1818
|
+
runIds: Array.from(new Set(downgradeCalls.map((call) => call.runId))).sort(),
|
|
1819
|
+
sourceKinds: uniqueSourceKinds(downgradeCalls, runs)
|
|
1820
|
+
},
|
|
1623
1821
|
details: {
|
|
1624
1822
|
workflow,
|
|
1625
1823
|
expensiveCallCount: downgradeCalls.length,
|
|
@@ -1781,7 +1979,7 @@ var templatesByKind = {
|
|
|
1781
1979
|
severity: "high",
|
|
1782
1980
|
effort: "low",
|
|
1783
1981
|
titleFn: (finding) => `Reduce retry waste in ${formatScopeLabel(finding)}`,
|
|
1784
|
-
summaryFn: (finding) => `${finding.summary} This is confirmed retry overhead, so it is a fix-now issue rather than an experiment.`,
|
|
1982
|
+
summaryFn: (finding) => finding.signalSource === "observed" ? `${finding.summary} This is confirmed retry overhead, so it is a fix-now issue rather than an experiment.` : `${finding.summary} Treat this as likely retry overhead and inspect the retry wrapper before classifying the full amount as proven waste.`,
|
|
1785
1983
|
whereToChangeFn: (finding) => `Reduce retries or add exponential backoff in the retry wrapper for ${formatScopeLabel(finding)}.`,
|
|
1786
1984
|
validationPlanFn: () => "Ship the change, then rerun `xerg audit --compare --push` against the same source. Retry waste should drop materially on the next audit.",
|
|
1787
1985
|
actionsFn: () => [
|
|
@@ -2128,6 +2326,7 @@ function buildAuditSummary(input) {
|
|
|
2128
2326
|
structuralWasteRate: Number(
|
|
2129
2327
|
(totalSpendUsd === 0 ? 0 : wasteSpendUsd / totalSpendUsd).toFixed(4)
|
|
2130
2328
|
),
|
|
2329
|
+
wasteBySignalSource: buildWasteBySignalSource(input.findings),
|
|
2131
2330
|
wasteByKind: buildTaxonomyBuckets(input.findings, "waste"),
|
|
2132
2331
|
opportunityByKind: buildTaxonomyBuckets(input.findings, "opportunity"),
|
|
2133
2332
|
spendByWorkflow: buildBreakdown(
|
|
@@ -3445,9 +3644,18 @@ function formatUsdDelta(value) {
|
|
|
3445
3644
|
const sign = value > 0 ? "+" : "";
|
|
3446
3645
|
return `${sign}${formatUsd(value)}`;
|
|
3447
3646
|
}
|
|
3647
|
+
function formatUsdRate(value) {
|
|
3648
|
+
return formatUsd(value);
|
|
3649
|
+
}
|
|
3448
3650
|
function isCursorUsageSummary(summary) {
|
|
3449
3651
|
return summary.sourceFiles.some((source) => source.kind === "cursor-usage-csv");
|
|
3450
3652
|
}
|
|
3653
|
+
function divideOrZero(numerator, denominator) {
|
|
3654
|
+
return denominator === 0 ? 0 : numerator / denominator;
|
|
3655
|
+
}
|
|
3656
|
+
function formatInferredShare(value) {
|
|
3657
|
+
return value === null || value === void 0 ? "unavailable" : formatPercent(value);
|
|
3658
|
+
}
|
|
3451
3659
|
function topRows(rows, limit = 5) {
|
|
3452
3660
|
return rows.slice(0, limit).map((row) => {
|
|
3453
3661
|
return `- ${row.key}: ${formatUsd(row.spendUsd)} (${formatPercent(row.observedShare)} observed)`;
|
|
@@ -3532,6 +3740,35 @@ function renderFindingChange(change, state) {
|
|
|
3532
3740
|
}
|
|
3533
3741
|
return `- New: ${change.title} (${formatUsd(change.currentCostImpactUsd ?? 0)})`;
|
|
3534
3742
|
}
|
|
3743
|
+
function renderCompareCoreRows(summary) {
|
|
3744
|
+
if (!summary.comparison) {
|
|
3745
|
+
return [];
|
|
3746
|
+
}
|
|
3747
|
+
const comparison = summary.comparison;
|
|
3748
|
+
const baselineWastePerRun = divideOrZero(
|
|
3749
|
+
comparison.baselineWasteSpendUsd,
|
|
3750
|
+
comparison.baselineRunCount
|
|
3751
|
+
);
|
|
3752
|
+
const currentWastePerRun = divideOrZero(summary.wasteSpendUsd, summary.runCount);
|
|
3753
|
+
const baselineWastePer1kCalls = divideOrZero(
|
|
3754
|
+
comparison.baselineWasteSpendUsd,
|
|
3755
|
+
comparison.baselineCallCount / 1e3
|
|
3756
|
+
);
|
|
3757
|
+
const currentWastePer1kCalls = divideOrZero(summary.wasteSpendUsd, summary.callCount / 1e3);
|
|
3758
|
+
return [
|
|
3759
|
+
"## Before / after",
|
|
3760
|
+
`Compared against ${comparison.baselineGeneratedAt}`,
|
|
3761
|
+
`- Waste rate: ${formatPercent(comparison.baselineStructuralWasteRate)} -> ${formatPercent(summary.structuralWasteRate)} (${formatPercentDelta(comparison.deltaStructuralWasteRate)})`,
|
|
3762
|
+
`- Waste per run: ${formatUsdRate(baselineWastePerRun)} -> ${formatUsdRate(currentWastePerRun)} (${formatUsdDelta(currentWastePerRun - baselineWastePerRun)})`,
|
|
3763
|
+
`- Waste per 1k calls: ${formatUsdRate(baselineWastePer1kCalls)} -> ${formatUsdRate(currentWastePer1kCalls)} (${formatUsdDelta(currentWastePer1kCalls - baselineWastePer1kCalls)})`,
|
|
3764
|
+
`- Inferred waste share: ${formatInferredShare(comparison.baselineWasteBySignalSource?.inferredShare)} -> ${formatInferredShare(summary.wasteBySignalSource?.inferredShare)}`,
|
|
3765
|
+
"- CPO: unavailable (no outcome signal)",
|
|
3766
|
+
`- Total spend (workload-dependent): ${formatUsd(comparison.baselineTotalSpendUsd)} -> ${formatUsd(summary.totalSpendUsd)} (${formatUsdDelta(comparison.deltaTotalSpendUsd)})`,
|
|
3767
|
+
`- Structural waste (workload-dependent): ${formatUsd(comparison.baselineWasteSpendUsd)} -> ${formatUsd(summary.wasteSpendUsd)} (${formatUsdDelta(comparison.deltaWasteSpendUsd)})`,
|
|
3768
|
+
`- Runs analyzed: ${comparison.baselineRunCount} -> ${summary.runCount} (${comparison.deltaRunCount > 0 ? "+" : ""}${comparison.deltaRunCount})`,
|
|
3769
|
+
`- Model calls: ${comparison.baselineCallCount} -> ${summary.callCount} (${comparison.deltaCallCount > 0 ? "+" : ""}${comparison.deltaCallCount})`
|
|
3770
|
+
];
|
|
3771
|
+
}
|
|
3535
3772
|
function renderCompareBlock(summary) {
|
|
3536
3773
|
if (!summary.comparison) {
|
|
3537
3774
|
return [];
|
|
@@ -3552,13 +3789,7 @@ function renderCompareBlock(summary) {
|
|
|
3552
3789
|
)
|
|
3553
3790
|
].slice(0, 5);
|
|
3554
3791
|
return [
|
|
3555
|
-
|
|
3556
|
-
`Compared against ${comparison.baselineGeneratedAt}`,
|
|
3557
|
-
`- Total spend: ${formatUsd(comparison.baselineTotalSpendUsd)} -> ${formatUsd(summary.totalSpendUsd)} (${formatUsdDelta(comparison.deltaTotalSpendUsd)})`,
|
|
3558
|
-
`- Structural waste: ${formatUsd(comparison.baselineWasteSpendUsd)} -> ${formatUsd(summary.wasteSpendUsd)} (${formatUsdDelta(comparison.deltaWasteSpendUsd)})`,
|
|
3559
|
-
`- Waste rate: ${formatPercent(comparison.baselineStructuralWasteRate)} -> ${formatPercent(summary.structuralWasteRate)} (${formatPercentDelta(comparison.deltaStructuralWasteRate)})`,
|
|
3560
|
-
`- Runs analyzed: ${comparison.baselineRunCount} -> ${summary.runCount} (${comparison.deltaRunCount > 0 ? "+" : ""}${comparison.deltaRunCount})`,
|
|
3561
|
-
`- Model calls: ${comparison.baselineCallCount} -> ${summary.callCount} (${comparison.deltaCallCount > 0 ? "+" : ""}${comparison.deltaCallCount})`,
|
|
3792
|
+
...renderCompareCoreRows(summary),
|
|
3562
3793
|
biggestImprovement ? `- Biggest improvement: ${describeSpendDelta(biggestImprovement)}` : "- Biggest improvement: none detected",
|
|
3563
3794
|
biggestRegression ? `- Biggest regression: ${describeSpendDelta(biggestRegression)}` : "- Biggest regression: none detected",
|
|
3564
3795
|
firstWorkflowToInspect ? `- First workflow to inspect now: ${firstWorkflowToInspect}` : "- First workflow to inspect now: no workflow delta available",
|
|
@@ -3682,10 +3913,7 @@ function renderCursorCompareBlock(summary) {
|
|
|
3682
3913
|
const modeSwing = comparison.workflowDeltas[0];
|
|
3683
3914
|
const modelSwing = comparison.modelDeltas[0];
|
|
3684
3915
|
return [
|
|
3685
|
-
|
|
3686
|
-
`Compared against ${comparison.baselineGeneratedAt}`,
|
|
3687
|
-
`- Total spend: ${formatUsd(comparison.baselineTotalSpendUsd)} -> ${formatUsd(summary.totalSpendUsd)} (${formatUsdDelta(comparison.deltaTotalSpendUsd)})`,
|
|
3688
|
-
`- Rows analyzed: ${formatCount(comparison.baselineRunCount)} -> ${formatCount(summary.runCount)} (${comparison.deltaRunCount > 0 ? "+" : ""}${comparison.deltaRunCount})`,
|
|
3916
|
+
...renderCompareCoreRows(summary),
|
|
3689
3917
|
`- Usage rows with pricing: ${formatCount(summary.pricingCoverage?.pricedCallCount ?? 0)}`,
|
|
3690
3918
|
modeSwing ? `- Mode swing to inspect: ${describeSpendDelta(modeSwing)}` : "- Mode swing to inspect: none",
|
|
3691
3919
|
modelSwing ? `- Model swing to inspect: ${describeSpendDelta(modelSwing)}` : "- Model swing to inspect: none"
|
|
@@ -3779,7 +4007,7 @@ function renderCursorMarkdownSummary(summary) {
|
|
|
3779
4007
|
"",
|
|
3780
4008
|
"## Findings",
|
|
3781
4009
|
...summary.findings.slice(0, 10).map((finding) => {
|
|
3782
|
-
return `- **${finding.title}** (${finding.classification}, ${finding.confidence})
|
|
4010
|
+
return `- **${finding.title}** (${finding.classification}, ${finding.confidence}). ${finding.summary} Estimated impact: ${formatUsd(finding.costImpactUsd)}.`;
|
|
3783
4011
|
}),
|
|
3784
4012
|
"",
|
|
3785
4013
|
...renderActionQueue(summary),
|
|
@@ -3862,21 +4090,13 @@ function renderMarkdownSummary(summary) {
|
|
|
3862
4090
|
"",
|
|
3863
4091
|
"## Findings",
|
|
3864
4092
|
...summary.findings.slice(0, 10).map((finding) => {
|
|
3865
|
-
return `- **${finding.title}** (${finding.classification}, ${finding.confidence})
|
|
4093
|
+
return `- **${finding.title}** (${finding.classification}, ${finding.confidence}). ${finding.summary} Estimated impact: ${formatUsd(finding.costImpactUsd)}.`;
|
|
3866
4094
|
}),
|
|
3867
4095
|
"",
|
|
3868
4096
|
...renderActionQueue(summary)
|
|
3869
4097
|
];
|
|
3870
4098
|
if (summary.comparison) {
|
|
3871
|
-
|
|
3872
|
-
lines.push(
|
|
3873
|
-
"",
|
|
3874
|
-
"## Before / after",
|
|
3875
|
-
`- Compared against: ${comparison.baselineGeneratedAt}`,
|
|
3876
|
-
`- Total spend: ${formatUsd(comparison.baselineTotalSpendUsd)} -> ${formatUsd(summary.totalSpendUsd)} (${formatUsdDelta(comparison.deltaTotalSpendUsd)})`,
|
|
3877
|
-
`- Structural waste: ${formatUsd(comparison.baselineWasteSpendUsd)} -> ${formatUsd(summary.wasteSpendUsd)} (${formatUsdDelta(comparison.deltaWasteSpendUsd)})`,
|
|
3878
|
-
`- Waste rate: ${formatPercent(comparison.baselineStructuralWasteRate)} -> ${formatPercent(summary.structuralWasteRate)} (${formatPercentDelta(comparison.deltaStructuralWasteRate)})`
|
|
3879
|
-
);
|
|
4099
|
+
lines.push("", ...renderCompareBlock(summary));
|
|
3880
4100
|
}
|
|
3881
4101
|
return lines.join("\n");
|
|
3882
4102
|
}
|