reasonix 0.0.6 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +34 -7
- package/dist/cli/index.js +552 -24
- package/dist/cli/index.js.map +1 -1
- package/dist/index.d.ts +211 -2
- package/dist/index.js +446 -5
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/cli/index.js
CHANGED
|
@@ -1289,7 +1289,13 @@ var CacheFirstLoop = class {
|
|
|
1289
1289
|
name,
|
|
1290
1290
|
content: result
|
|
1291
1291
|
});
|
|
1292
|
-
yield {
|
|
1292
|
+
yield {
|
|
1293
|
+
turn: this._turn,
|
|
1294
|
+
role: "tool",
|
|
1295
|
+
content: result,
|
|
1296
|
+
toolName: name,
|
|
1297
|
+
toolArgs: args
|
|
1298
|
+
};
|
|
1293
1299
|
}
|
|
1294
1300
|
}
|
|
1295
1301
|
yield { turn: this._turn, role: "done", content: "[max_tool_iters reached]" };
|
|
@@ -1342,8 +1348,431 @@ function loadDotenv(path = ".env") {
|
|
|
1342
1348
|
}
|
|
1343
1349
|
}
|
|
1344
1350
|
|
|
1351
|
+
// src/transcript.ts
|
|
1352
|
+
import { createWriteStream, readFileSync as readFileSync3 } from "fs";
|
|
1353
|
+
function recordFromLoopEvent(ev, extra) {
|
|
1354
|
+
const rec = {
|
|
1355
|
+
ts: (/* @__PURE__ */ new Date()).toISOString(),
|
|
1356
|
+
turn: ev.turn,
|
|
1357
|
+
role: ev.role,
|
|
1358
|
+
content: ev.content
|
|
1359
|
+
};
|
|
1360
|
+
if (ev.toolName !== void 0) rec.tool = ev.toolName;
|
|
1361
|
+
if (ev.toolArgs !== void 0) rec.args = ev.toolArgs;
|
|
1362
|
+
if (ev.error !== void 0) rec.error = ev.error;
|
|
1363
|
+
if (ev.stats) {
|
|
1364
|
+
rec.usage = {
|
|
1365
|
+
prompt_tokens: ev.stats.usage.promptTokens,
|
|
1366
|
+
completion_tokens: ev.stats.usage.completionTokens,
|
|
1367
|
+
total_tokens: ev.stats.usage.totalTokens,
|
|
1368
|
+
prompt_cache_hit_tokens: ev.stats.usage.promptCacheHitTokens,
|
|
1369
|
+
prompt_cache_miss_tokens: ev.stats.usage.promptCacheMissTokens
|
|
1370
|
+
};
|
|
1371
|
+
rec.cost = ev.stats.cost;
|
|
1372
|
+
rec.model = ev.stats.model;
|
|
1373
|
+
rec.prefixHash = extra.prefixHash;
|
|
1374
|
+
} else if (ev.role === "assistant_final") {
|
|
1375
|
+
rec.model = extra.model;
|
|
1376
|
+
rec.prefixHash = extra.prefixHash;
|
|
1377
|
+
}
|
|
1378
|
+
return rec;
|
|
1379
|
+
}
|
|
1380
|
+
function writeRecord(stream, record) {
|
|
1381
|
+
stream.write(`${JSON.stringify(record)}
|
|
1382
|
+
`);
|
|
1383
|
+
}
|
|
1384
|
+
function writeMeta(stream, meta) {
|
|
1385
|
+
const line = { role: "_meta", meta };
|
|
1386
|
+
stream.write(`${JSON.stringify(line)}
|
|
1387
|
+
`);
|
|
1388
|
+
}
|
|
1389
|
+
function openTranscriptFile(path, meta) {
|
|
1390
|
+
const stream = createWriteStream(path, { flags: "a" });
|
|
1391
|
+
writeMeta(stream, meta);
|
|
1392
|
+
return stream;
|
|
1393
|
+
}
|
|
1394
|
+
function readTranscript(path) {
|
|
1395
|
+
const raw = readFileSync3(path, "utf8");
|
|
1396
|
+
return parseTranscript(raw);
|
|
1397
|
+
}
|
|
1398
|
+
function parseTranscript(raw) {
|
|
1399
|
+
const out = { meta: null, records: [] };
|
|
1400
|
+
for (const line of raw.split(/\r?\n/)) {
|
|
1401
|
+
const trimmed = line.trim();
|
|
1402
|
+
if (!trimmed) continue;
|
|
1403
|
+
let obj;
|
|
1404
|
+
try {
|
|
1405
|
+
obj = JSON.parse(trimmed);
|
|
1406
|
+
} catch {
|
|
1407
|
+
continue;
|
|
1408
|
+
}
|
|
1409
|
+
if (!obj || typeof obj !== "object") continue;
|
|
1410
|
+
const rec = obj;
|
|
1411
|
+
if (rec.role === "_meta" && rec.meta && typeof rec.meta === "object") {
|
|
1412
|
+
out.meta = rec.meta;
|
|
1413
|
+
continue;
|
|
1414
|
+
}
|
|
1415
|
+
if (typeof rec.ts === "string" && typeof rec.turn === "number" && typeof rec.role === "string" && typeof rec.content === "string") {
|
|
1416
|
+
out.records.push(rec);
|
|
1417
|
+
}
|
|
1418
|
+
}
|
|
1419
|
+
return out;
|
|
1420
|
+
}
|
|
1421
|
+
|
|
1422
|
+
// src/replay.ts
|
|
1423
|
+
function replayFromFile(path) {
|
|
1424
|
+
const parsed = readTranscript(path);
|
|
1425
|
+
return { parsed, stats: computeReplayStats(parsed.records) };
|
|
1426
|
+
}
|
|
1427
|
+
function computeReplayStats(records) {
|
|
1428
|
+
const turns = [];
|
|
1429
|
+
const models = /* @__PURE__ */ new Set();
|
|
1430
|
+
const prefixHashes = /* @__PURE__ */ new Set();
|
|
1431
|
+
let userTurns = 0;
|
|
1432
|
+
let toolCalls = 0;
|
|
1433
|
+
for (const rec of records) {
|
|
1434
|
+
if (rec.role === "user") userTurns++;
|
|
1435
|
+
else if (rec.role === "tool") toolCalls++;
|
|
1436
|
+
else if (rec.role === "assistant_final") {
|
|
1437
|
+
if (rec.model) models.add(rec.model);
|
|
1438
|
+
if (rec.prefixHash) prefixHashes.add(rec.prefixHash);
|
|
1439
|
+
if (rec.usage && rec.model) {
|
|
1440
|
+
const u = new Usage(
|
|
1441
|
+
rec.usage.prompt_tokens ?? 0,
|
|
1442
|
+
rec.usage.completion_tokens ?? 0,
|
|
1443
|
+
rec.usage.total_tokens ?? 0,
|
|
1444
|
+
rec.usage.prompt_cache_hit_tokens ?? 0,
|
|
1445
|
+
rec.usage.prompt_cache_miss_tokens ?? 0
|
|
1446
|
+
);
|
|
1447
|
+
turns.push({
|
|
1448
|
+
turn: rec.turn,
|
|
1449
|
+
model: rec.model,
|
|
1450
|
+
usage: u,
|
|
1451
|
+
// `rec.cost` wins when present — honors whatever the writer computed
|
|
1452
|
+
// even if pricing tables have since changed. Only recompute when
|
|
1453
|
+
// the transcript didn't record it (old format).
|
|
1454
|
+
cost: rec.cost ?? costUsd(rec.model, u),
|
|
1455
|
+
cacheHitRatio: u.cacheHitRatio
|
|
1456
|
+
});
|
|
1457
|
+
}
|
|
1458
|
+
}
|
|
1459
|
+
}
|
|
1460
|
+
return {
|
|
1461
|
+
perTurn: turns,
|
|
1462
|
+
models: [...models],
|
|
1463
|
+
prefixHashes: [...prefixHashes],
|
|
1464
|
+
userTurns,
|
|
1465
|
+
toolCalls,
|
|
1466
|
+
...summarizeTurns(turns)
|
|
1467
|
+
};
|
|
1468
|
+
}
|
|
1469
|
+
function summarizeTurns(turns) {
|
|
1470
|
+
const totalCost = turns.reduce((s, t) => s + t.cost, 0);
|
|
1471
|
+
const totalClaude = turns.reduce((s, t) => s + claudeEquivalentCost(t.usage), 0);
|
|
1472
|
+
let hit = 0;
|
|
1473
|
+
let miss = 0;
|
|
1474
|
+
for (const t of turns) {
|
|
1475
|
+
hit += t.usage.promptCacheHitTokens;
|
|
1476
|
+
miss += t.usage.promptCacheMissTokens;
|
|
1477
|
+
}
|
|
1478
|
+
const cacheHitRatio = hit + miss > 0 ? hit / (hit + miss) : 0;
|
|
1479
|
+
const savingsVsClaude = totalClaude > 0 ? 1 - totalCost / totalClaude : 0;
|
|
1480
|
+
return {
|
|
1481
|
+
turns: turns.length,
|
|
1482
|
+
totalCostUsd: round2(totalCost, 6),
|
|
1483
|
+
claudeEquivalentUsd: round2(totalClaude, 6),
|
|
1484
|
+
savingsVsClaudePct: round2(savingsVsClaude * 100, 2),
|
|
1485
|
+
cacheHitRatio: round2(cacheHitRatio, 4)
|
|
1486
|
+
};
|
|
1487
|
+
}
|
|
1488
|
+
function round2(n, digits) {
|
|
1489
|
+
const f = 10 ** digits;
|
|
1490
|
+
return Math.round(n * f) / f;
|
|
1491
|
+
}
|
|
1492
|
+
|
|
1493
|
+
// src/diff.ts
|
|
1494
|
+
function diffTranscripts(a, b) {
|
|
1495
|
+
const aSide = {
|
|
1496
|
+
label: a.label,
|
|
1497
|
+
meta: a.parsed.meta,
|
|
1498
|
+
records: a.parsed.records,
|
|
1499
|
+
stats: computeReplayStats(a.parsed.records)
|
|
1500
|
+
};
|
|
1501
|
+
const bSide = {
|
|
1502
|
+
label: b.label,
|
|
1503
|
+
meta: b.parsed.meta,
|
|
1504
|
+
records: b.parsed.records,
|
|
1505
|
+
stats: computeReplayStats(b.parsed.records)
|
|
1506
|
+
};
|
|
1507
|
+
const aByTurn = groupByTurn(a.parsed.records);
|
|
1508
|
+
const bByTurn = groupByTurn(b.parsed.records);
|
|
1509
|
+
const turns = [.../* @__PURE__ */ new Set([...aByTurn.keys(), ...bByTurn.keys()])].sort((x, y) => x - y);
|
|
1510
|
+
const pairs = [];
|
|
1511
|
+
let firstDivergenceTurn = null;
|
|
1512
|
+
for (const turn of turns) {
|
|
1513
|
+
const aGroup = aByTurn.get(turn) ?? { assistant: void 0, tools: [] };
|
|
1514
|
+
const bGroup = bByTurn.get(turn) ?? { assistant: void 0, tools: [] };
|
|
1515
|
+
const aAssistant = aGroup.assistant;
|
|
1516
|
+
const bAssistant = bGroup.assistant;
|
|
1517
|
+
const aTools = aGroup.tools;
|
|
1518
|
+
const bTools = bGroup.tools;
|
|
1519
|
+
let kind;
|
|
1520
|
+
let divergenceNote;
|
|
1521
|
+
if (!aAssistant && bAssistant) kind = "only_in_b";
|
|
1522
|
+
else if (aAssistant && !bAssistant) kind = "only_in_a";
|
|
1523
|
+
else if (!aAssistant && !bAssistant)
|
|
1524
|
+
kind = "diverge";
|
|
1525
|
+
else {
|
|
1526
|
+
divergenceNote = classifyDivergence(aAssistant, bAssistant, aTools, bTools);
|
|
1527
|
+
kind = divergenceNote ? "diverge" : "match";
|
|
1528
|
+
}
|
|
1529
|
+
if (kind !== "match" && firstDivergenceTurn === null) firstDivergenceTurn = turn;
|
|
1530
|
+
pairs.push({ turn, aAssistant, bAssistant, aTools, bTools, kind, divergenceNote });
|
|
1531
|
+
}
|
|
1532
|
+
return { a: aSide, b: bSide, pairs, firstDivergenceTurn };
|
|
1533
|
+
}
|
|
1534
|
+
function classifyDivergence(a, b, aTools, bTools) {
|
|
1535
|
+
const aNames = aTools.map((t) => t.tool ?? "").sort();
|
|
1536
|
+
const bNames = bTools.map((t) => t.tool ?? "").sort();
|
|
1537
|
+
if (aNames.join(",") !== bNames.join(",")) {
|
|
1538
|
+
return `tool calls differ: A=[${aNames.join(",") || "\u2014"}] B=[${bNames.join(",") || "\u2014"}]`;
|
|
1539
|
+
}
|
|
1540
|
+
for (let i = 0; i < aTools.length; i++) {
|
|
1541
|
+
const at = aTools[i];
|
|
1542
|
+
const bt = bTools[i];
|
|
1543
|
+
if (at.tool !== bt.tool) continue;
|
|
1544
|
+
if ((at.args ?? "") !== (bt.args ?? "")) {
|
|
1545
|
+
return `"${at.tool}" args differ`;
|
|
1546
|
+
}
|
|
1547
|
+
}
|
|
1548
|
+
const simRatio = similarity(a.content, b.content);
|
|
1549
|
+
if (simRatio < 0.75) return `text similarity ${(simRatio * 100).toFixed(0)}%`;
|
|
1550
|
+
return void 0;
|
|
1551
|
+
}
|
|
1552
|
+
function similarity(a, b) {
|
|
1553
|
+
if (a === b) return 1;
|
|
1554
|
+
if (!a && !b) return 1;
|
|
1555
|
+
if (!a || !b) return 0;
|
|
1556
|
+
const maxLen = Math.max(a.length, b.length);
|
|
1557
|
+
if (maxLen > 2e3) return tokenOverlap(a, b);
|
|
1558
|
+
const dist = levenshtein(a, b);
|
|
1559
|
+
return 1 - dist / maxLen;
|
|
1560
|
+
}
|
|
1561
|
+
function tokenOverlap(a, b) {
|
|
1562
|
+
const ta = new Set(a.toLowerCase().split(/\s+/).filter(Boolean));
|
|
1563
|
+
const tb = new Set(b.toLowerCase().split(/\s+/).filter(Boolean));
|
|
1564
|
+
if (ta.size === 0 && tb.size === 0) return 1;
|
|
1565
|
+
let shared = 0;
|
|
1566
|
+
for (const t of ta) if (tb.has(t)) shared++;
|
|
1567
|
+
return 2 * shared / (ta.size + tb.size);
|
|
1568
|
+
}
|
|
1569
|
+
function levenshtein(a, b) {
|
|
1570
|
+
const m = a.length;
|
|
1571
|
+
const n = b.length;
|
|
1572
|
+
if (m === 0) return n;
|
|
1573
|
+
if (n === 0) return m;
|
|
1574
|
+
let prev = new Array(n + 1);
|
|
1575
|
+
let curr = new Array(n + 1);
|
|
1576
|
+
for (let j = 0; j <= n; j++) prev[j] = j;
|
|
1577
|
+
for (let i = 1; i <= m; i++) {
|
|
1578
|
+
curr[0] = i;
|
|
1579
|
+
for (let j = 1; j <= n; j++) {
|
|
1580
|
+
const cost = a[i - 1] === b[j - 1] ? 0 : 1;
|
|
1581
|
+
curr[j] = Math.min(curr[j - 1] + 1, prev[j] + 1, prev[j - 1] + cost);
|
|
1582
|
+
}
|
|
1583
|
+
[prev, curr] = [curr, prev];
|
|
1584
|
+
}
|
|
1585
|
+
return prev[n];
|
|
1586
|
+
}
|
|
1587
|
+
function groupByTurn(records) {
|
|
1588
|
+
const out = /* @__PURE__ */ new Map();
|
|
1589
|
+
for (const rec of records) {
|
|
1590
|
+
if (rec.role === "user") continue;
|
|
1591
|
+
const g = out.get(rec.turn) ?? { tools: [] };
|
|
1592
|
+
if (rec.role === "assistant_final") g.assistant = rec;
|
|
1593
|
+
else if (rec.role === "tool") g.tools.push(rec);
|
|
1594
|
+
out.set(rec.turn, g);
|
|
1595
|
+
}
|
|
1596
|
+
return out;
|
|
1597
|
+
}
|
|
1598
|
+
function renderSummaryTable(report, _opts = {}) {
|
|
1599
|
+
const a = report.a;
|
|
1600
|
+
const b = report.b;
|
|
1601
|
+
const lines = [];
|
|
1602
|
+
lines.push("Comparing:");
|
|
1603
|
+
lines.push(` A ${a.label}`);
|
|
1604
|
+
lines.push(` B ${b.label}`);
|
|
1605
|
+
lines.push("");
|
|
1606
|
+
lines.push(row(["", "A", "B", "\u0394"], [20, 14, 14, 14]));
|
|
1607
|
+
lines.push(
|
|
1608
|
+
row(["\u2500".repeat(20), "\u2500".repeat(14), "\u2500".repeat(14), "\u2500".repeat(14)], [20, 14, 14, 14])
|
|
1609
|
+
);
|
|
1610
|
+
lines.push(statRow("model calls", a.stats.turns, b.stats.turns));
|
|
1611
|
+
lines.push(statRow("user turns", a.stats.userTurns, b.stats.userTurns));
|
|
1612
|
+
lines.push(statRow("tool calls", a.stats.toolCalls, b.stats.toolCalls));
|
|
1613
|
+
lines.push(
|
|
1614
|
+
row(
|
|
1615
|
+
[
|
|
1616
|
+
"cache hit",
|
|
1617
|
+
`${pct(a.stats.cacheHitRatio)}`,
|
|
1618
|
+
`${pct(b.stats.cacheHitRatio)}`,
|
|
1619
|
+
signPct(b.stats.cacheHitRatio - a.stats.cacheHitRatio)
|
|
1620
|
+
],
|
|
1621
|
+
[20, 14, 14, 14]
|
|
1622
|
+
)
|
|
1623
|
+
);
|
|
1624
|
+
lines.push(
|
|
1625
|
+
row(
|
|
1626
|
+
[
|
|
1627
|
+
"cost (USD)",
|
|
1628
|
+
`$${a.stats.totalCostUsd.toFixed(6)}`,
|
|
1629
|
+
`$${b.stats.totalCostUsd.toFixed(6)}`,
|
|
1630
|
+
costDelta(a.stats.totalCostUsd, b.stats.totalCostUsd)
|
|
1631
|
+
],
|
|
1632
|
+
[20, 14, 14, 14]
|
|
1633
|
+
)
|
|
1634
|
+
);
|
|
1635
|
+
lines.push(statRow("prefix hashes", a.stats.prefixHashes.length, b.stats.prefixHashes.length));
|
|
1636
|
+
lines.push("");
|
|
1637
|
+
const aPrefixStable = a.stats.prefixHashes.length <= 1;
|
|
1638
|
+
const bPrefixStable = b.stats.prefixHashes.length <= 1;
|
|
1639
|
+
if (aPrefixStable !== bPrefixStable) {
|
|
1640
|
+
const stable = aPrefixStable ? "A" : "B";
|
|
1641
|
+
const churn = aPrefixStable ? "B" : "A";
|
|
1642
|
+
const churnCount = aPrefixStable ? b.stats.prefixHashes.length : a.stats.prefixHashes.length;
|
|
1643
|
+
lines.push(
|
|
1644
|
+
`prefix stability: ${stable} stayed byte-stable across ${Math.max(
|
|
1645
|
+
a.stats.turns,
|
|
1646
|
+
b.stats.turns
|
|
1647
|
+
)} turns; ${churn} churned ${churnCount} distinct prefixes.`
|
|
1648
|
+
);
|
|
1649
|
+
lines.push("");
|
|
1650
|
+
} else if (a.stats.prefixHashes[0] && a.stats.prefixHashes[0] === b.stats.prefixHashes[0]) {
|
|
1651
|
+
lines.push(
|
|
1652
|
+
`prefix: A and B share the same prefix hash (${a.stats.prefixHashes[0].slice(0, 12)}\u2026) \u2014 cache delta is attributable to log stability, not prompt change.`
|
|
1653
|
+
);
|
|
1654
|
+
lines.push("");
|
|
1655
|
+
}
|
|
1656
|
+
if (report.firstDivergenceTurn !== null) {
|
|
1657
|
+
const p = report.pairs.find((p2) => p2.turn === report.firstDivergenceTurn);
|
|
1658
|
+
lines.push(
|
|
1659
|
+
`first divergence: turn ${report.firstDivergenceTurn} \u2014 ${p?.divergenceNote ?? "?"}`
|
|
1660
|
+
);
|
|
1661
|
+
if (p?.aAssistant) lines.push(` A \u2192 ${truncate(p.aAssistant.content, 100)}`);
|
|
1662
|
+
if (p?.bAssistant) lines.push(` B \u2192 ${truncate(p.bAssistant.content, 100)}`);
|
|
1663
|
+
} else {
|
|
1664
|
+
lines.push("no material divergence detected (texts within similarity threshold).");
|
|
1665
|
+
}
|
|
1666
|
+
return lines.join("\n");
|
|
1667
|
+
}
|
|
1668
|
+
function renderMarkdown(report) {
|
|
1669
|
+
const a = report.a;
|
|
1670
|
+
const b = report.b;
|
|
1671
|
+
const out = [];
|
|
1672
|
+
out.push(`# Transcript diff: ${a.label} vs ${b.label}`);
|
|
1673
|
+
out.push("");
|
|
1674
|
+
if (a.meta || b.meta) {
|
|
1675
|
+
out.push("## Meta");
|
|
1676
|
+
out.push("");
|
|
1677
|
+
out.push(`| | ${a.label} | ${b.label} |`);
|
|
1678
|
+
out.push("|---|---|---|");
|
|
1679
|
+
out.push(`| source | ${a.meta?.source ?? "\u2014"} | ${b.meta?.source ?? "\u2014"} |`);
|
|
1680
|
+
out.push(`| model | ${a.meta?.model ?? "\u2014"} | ${b.meta?.model ?? "\u2014"} |`);
|
|
1681
|
+
out.push(`| task | ${a.meta?.task ?? "\u2014"} | ${b.meta?.task ?? "\u2014"} |`);
|
|
1682
|
+
out.push(`| startedAt | ${a.meta?.startedAt ?? "\u2014"} | ${b.meta?.startedAt ?? "\u2014"} |`);
|
|
1683
|
+
out.push("");
|
|
1684
|
+
}
|
|
1685
|
+
out.push("## Summary");
|
|
1686
|
+
out.push("");
|
|
1687
|
+
out.push(`| metric | ${a.label} | ${b.label} | delta |`);
|
|
1688
|
+
out.push("|---|---:|---:|---:|");
|
|
1689
|
+
out.push(
|
|
1690
|
+
`| model calls | ${a.stats.turns} | ${b.stats.turns} | ${signed(b.stats.turns - a.stats.turns)} |`
|
|
1691
|
+
);
|
|
1692
|
+
out.push(
|
|
1693
|
+
`| user turns | ${a.stats.userTurns} | ${b.stats.userTurns} | ${signed(b.stats.userTurns - a.stats.userTurns)} |`
|
|
1694
|
+
);
|
|
1695
|
+
out.push(
|
|
1696
|
+
`| tool calls | ${a.stats.toolCalls} | ${b.stats.toolCalls} | ${signed(b.stats.toolCalls - a.stats.toolCalls)} |`
|
|
1697
|
+
);
|
|
1698
|
+
out.push(
|
|
1699
|
+
`| cache hit | ${pct(a.stats.cacheHitRatio)} | ${pct(b.stats.cacheHitRatio)} | **${signPct(b.stats.cacheHitRatio - a.stats.cacheHitRatio)}** |`
|
|
1700
|
+
);
|
|
1701
|
+
out.push(
|
|
1702
|
+
`| cost (USD) | $${a.stats.totalCostUsd.toFixed(6)} | $${b.stats.totalCostUsd.toFixed(6)} | ${costDelta(a.stats.totalCostUsd, b.stats.totalCostUsd)} |`
|
|
1703
|
+
);
|
|
1704
|
+
out.push(
|
|
1705
|
+
`| prefix hashes | ${a.stats.prefixHashes.length} | ${b.stats.prefixHashes.length} | \u2014 |`
|
|
1706
|
+
);
|
|
1707
|
+
out.push("");
|
|
1708
|
+
out.push("## Turn-by-turn");
|
|
1709
|
+
out.push("");
|
|
1710
|
+
out.push(`| turn | kind | ${a.label} tool calls | ${b.label} tool calls | note |`);
|
|
1711
|
+
out.push("|---:|:---:|---|---|---|");
|
|
1712
|
+
for (const p of report.pairs) {
|
|
1713
|
+
const aTools = p.aTools.map((t) => t.tool).filter(Boolean).join(", ") || "\u2014";
|
|
1714
|
+
const bTools = p.bTools.map((t) => t.tool).filter(Boolean).join(", ") || "\u2014";
|
|
1715
|
+
out.push(`| ${p.turn} | ${p.kind} | ${aTools} | ${bTools} | ${p.divergenceNote ?? ""} |`);
|
|
1716
|
+
}
|
|
1717
|
+
out.push("");
|
|
1718
|
+
if (report.firstDivergenceTurn !== null) {
|
|
1719
|
+
const p = report.pairs.find((x) => x.turn === report.firstDivergenceTurn);
|
|
1720
|
+
out.push(`## First divergence (turn ${report.firstDivergenceTurn})`);
|
|
1721
|
+
out.push("");
|
|
1722
|
+
out.push(p?.divergenceNote ?? "");
|
|
1723
|
+
out.push("");
|
|
1724
|
+
if (p?.aAssistant) {
|
|
1725
|
+
out.push(`**${a.label}:**`);
|
|
1726
|
+
out.push("");
|
|
1727
|
+
out.push("```");
|
|
1728
|
+
out.push(p.aAssistant.content);
|
|
1729
|
+
out.push("```");
|
|
1730
|
+
out.push("");
|
|
1731
|
+
}
|
|
1732
|
+
if (p?.bAssistant) {
|
|
1733
|
+
out.push(`**${b.label}:**`);
|
|
1734
|
+
out.push("");
|
|
1735
|
+
out.push("```");
|
|
1736
|
+
out.push(p.bAssistant.content);
|
|
1737
|
+
out.push("```");
|
|
1738
|
+
out.push("");
|
|
1739
|
+
}
|
|
1740
|
+
}
|
|
1741
|
+
return out.join("\n");
|
|
1742
|
+
}
|
|
1743
|
+
function row(cols, widths) {
|
|
1744
|
+
return cols.map((c, i) => padRight(c, widths[i] ?? c.length)).join(" ");
|
|
1745
|
+
}
|
|
1746
|
+
function statRow(label, av, bv) {
|
|
1747
|
+
return row([label, `${av}`, `${bv}`, signed(bv - av)], [20, 14, 14, 14]);
|
|
1748
|
+
}
|
|
1749
|
+
function padRight(s, w) {
|
|
1750
|
+
return s.length >= w ? s : s + " ".repeat(w - s.length);
|
|
1751
|
+
}
|
|
1752
|
+
function signed(n) {
|
|
1753
|
+
if (n === 0) return "0";
|
|
1754
|
+
return `${n > 0 ? "+" : ""}${n}`;
|
|
1755
|
+
}
|
|
1756
|
+
function signPct(diff) {
|
|
1757
|
+
if (diff === 0) return "0pp";
|
|
1758
|
+
const s = (diff * 100).toFixed(1);
|
|
1759
|
+
return `${diff > 0 ? "+" : ""}${s}pp`;
|
|
1760
|
+
}
|
|
1761
|
+
function pct(x) {
|
|
1762
|
+
return `${(x * 100).toFixed(1)}%`;
|
|
1763
|
+
}
|
|
1764
|
+
function costDelta(a, b) {
|
|
1765
|
+
if (a === 0 && b === 0) return "\u2014";
|
|
1766
|
+
if (a === 0) return "new";
|
|
1767
|
+
const pctChange = (b - a) / a * 100;
|
|
1768
|
+
return `${pctChange > 0 ? "+" : ""}${pctChange.toFixed(1)}%`;
|
|
1769
|
+
}
|
|
1770
|
+
function truncate(s, n) {
|
|
1771
|
+
return s.length > n ? `${s.slice(0, n)}\u2026` : s;
|
|
1772
|
+
}
|
|
1773
|
+
|
|
1345
1774
|
// src/config.ts
|
|
1346
|
-
import { chmodSync as chmodSync2, mkdirSync as mkdirSync2, readFileSync as
|
|
1775
|
+
import { chmodSync as chmodSync2, mkdirSync as mkdirSync2, readFileSync as readFileSync4, writeFileSync } from "fs";
|
|
1347
1776
|
import { homedir as homedir2 } from "os";
|
|
1348
1777
|
import { dirname as dirname2, join as join2 } from "path";
|
|
1349
1778
|
function defaultConfigPath() {
|
|
@@ -1351,7 +1780,7 @@ function defaultConfigPath() {
|
|
|
1351
1780
|
}
|
|
1352
1781
|
function readConfig(path = defaultConfigPath()) {
|
|
1353
1782
|
try {
|
|
1354
|
-
const raw =
|
|
1783
|
+
const raw = readFileSync4(path, "utf8");
|
|
1355
1784
|
const parsed = JSON.parse(raw);
|
|
1356
1785
|
if (parsed && typeof parsed === "object") return parsed;
|
|
1357
1786
|
} catch {
|
|
@@ -1386,14 +1815,13 @@ function redactKey(key) {
|
|
|
1386
1815
|
}
|
|
1387
1816
|
|
|
1388
1817
|
// src/index.ts
|
|
1389
|
-
var VERSION = "0.0
|
|
1818
|
+
var VERSION = "0.2.0";
|
|
1390
1819
|
|
|
1391
1820
|
// src/cli/commands/chat.tsx
|
|
1392
1821
|
import { render } from "ink";
|
|
1393
1822
|
import React7, { useState as useState4 } from "react";
|
|
1394
1823
|
|
|
1395
1824
|
// src/cli/ui/App.tsx
|
|
1396
|
-
import { createWriteStream } from "fs";
|
|
1397
1825
|
import { Box as Box5, Static, Text as Text5, useApp } from "ink";
|
|
1398
1826
|
import React5, { useCallback, useEffect as useEffect2, useMemo, useRef, useState as useState2 } from "react";
|
|
1399
1827
|
|
|
@@ -1601,7 +2029,7 @@ var EventRow = React2.memo(function EventRow2({ event }) {
|
|
|
1601
2029
|
return /* @__PURE__ */ React2.createElement(Box2, { flexDirection: "column", marginTop: 1 }, /* @__PURE__ */ React2.createElement(Box2, null, /* @__PURE__ */ React2.createElement(Text2, { bold: true, color: "green" }, "assistant")), event.branch ? /* @__PURE__ */ React2.createElement(BranchBlock, { branch: event.branch }) : null, event.reasoning ? /* @__PURE__ */ React2.createElement(ReasoningBlock, { reasoning: event.reasoning }) : null, !isPlanStateEmpty(event.planState) ? /* @__PURE__ */ React2.createElement(PlanStateBlock, { planState: event.planState }) : null, event.text ? /* @__PURE__ */ React2.createElement(Markdown, { text: event.text }) : /* @__PURE__ */ React2.createElement(Text2, { dimColor: true }, "(no content)"), event.stats ? /* @__PURE__ */ React2.createElement(StatsLine, { stats: event.stats }) : null, event.repair ? /* @__PURE__ */ React2.createElement(Text2, { color: "magenta" }, event.repair) : null);
|
|
1602
2030
|
}
|
|
1603
2031
|
if (event.role === "tool") {
|
|
1604
|
-
return /* @__PURE__ */ React2.createElement(Box2, { flexDirection: "column", marginTop: 1 }, /* @__PURE__ */ React2.createElement(Text2, { color: "yellow" }, `tool<${event.toolName ?? "?"}> \u2192`), /* @__PURE__ */ React2.createElement(Text2, { dimColor: true }, " ",
|
|
2032
|
+
return /* @__PURE__ */ React2.createElement(Box2, { flexDirection: "column", marginTop: 1 }, /* @__PURE__ */ React2.createElement(Text2, { color: "yellow" }, `tool<${event.toolName ?? "?"}> \u2192`), /* @__PURE__ */ React2.createElement(Text2, { dimColor: true }, " ", truncate2(event.text, 400)));
|
|
1605
2033
|
}
|
|
1606
2034
|
if (event.role === "error") {
|
|
1607
2035
|
return /* @__PURE__ */ React2.createElement(Box2, { marginTop: 1 }, /* @__PURE__ */ React2.createElement(Text2, { color: "red", bold: true }, "error", " "), /* @__PURE__ */ React2.createElement(Text2, { color: "red" }, event.text));
|
|
@@ -1650,8 +2078,8 @@ function StreamingAssistant({ event }) {
|
|
|
1650
2078
|
if (p.completed === 0) {
|
|
1651
2079
|
return /* @__PURE__ */ React2.createElement(Box2, { flexDirection: "column", marginTop: 1 }, /* @__PURE__ */ React2.createElement(Box2, null, /* @__PURE__ */ React2.createElement(Text2, { bold: true, color: "green" }, "assistant", " "), /* @__PURE__ */ React2.createElement(Text2, { color: "blue" }, "\u{1F500} launching ", p.total, " parallel samples (R1 thinking in parallel)\u2026", " "), /* @__PURE__ */ React2.createElement(Elapsed, null)), /* @__PURE__ */ React2.createElement(Text2, { dimColor: true }, " ", "spread across T=0.0/0.5/1.0 \xB7 typical wait 30-90s for reasoner"));
|
|
1652
2080
|
}
|
|
1653
|
-
const
|
|
1654
|
-
return /* @__PURE__ */ React2.createElement(Box2, { flexDirection: "column", marginTop: 1 }, /* @__PURE__ */ React2.createElement(Box2, null, /* @__PURE__ */ React2.createElement(Text2, { bold: true, color: "green" }, "assistant", " "), /* @__PURE__ */ React2.createElement(Text2, { color: "blue" }, "\u{1F500} branching ", p.completed, "/", p.total, " (",
|
|
2081
|
+
const pct2 = Math.round(p.completed / p.total * 100);
|
|
2082
|
+
return /* @__PURE__ */ React2.createElement(Box2, { flexDirection: "column", marginTop: 1 }, /* @__PURE__ */ React2.createElement(Box2, null, /* @__PURE__ */ React2.createElement(Text2, { bold: true, color: "green" }, "assistant", " "), /* @__PURE__ */ React2.createElement(Text2, { color: "blue" }, "\u{1F500} branching ", p.completed, "/", p.total, " (", pct2, "%)", " "), /* @__PURE__ */ React2.createElement(Elapsed, null)), /* @__PURE__ */ React2.createElement(Text2, { dimColor: true }, " latest #", p.latestIndex, " T=", p.latestTemperature.toFixed(1), " u=", p.latestUncertainties, p.completed < p.total ? " \xB7 waiting for other samples\u2026" : " \xB7 selecting winner\u2026"));
|
|
1655
2083
|
}
|
|
1656
2084
|
const tail = lastLine(event.text, 140);
|
|
1657
2085
|
const reasoningTail = event.reasoning ? lastLine(event.reasoning, 120) : "";
|
|
@@ -1666,7 +2094,7 @@ function StatsLine({ stats }) {
|
|
|
1666
2094
|
const hit = (stats.cacheHitRatio * 100).toFixed(1);
|
|
1667
2095
|
return /* @__PURE__ */ React2.createElement(Text2, { dimColor: true }, " \u21B3 cache ", hit, "% \xB7 tokens ", stats.usage.promptTokens, "\u2192", stats.usage.completionTokens, " \xB7 $", stats.cost.toFixed(6));
|
|
1668
2096
|
}
|
|
1669
|
-
function
|
|
2097
|
+
function truncate2(s, max) {
|
|
1670
2098
|
return s.length <= max ? s : `${s.slice(0, max)}\u2026 (+${s.length - max} chars)`;
|
|
1671
2099
|
}
|
|
1672
2100
|
|
|
@@ -1855,7 +2283,12 @@ function App({ model, system, transcript, harvest: harvest2, branch, session })
|
|
|
1855
2283
|
});
|
|
1856
2284
|
const transcriptRef = useRef(null);
|
|
1857
2285
|
if (transcript && !transcriptRef.current) {
|
|
1858
|
-
transcriptRef.current =
|
|
2286
|
+
transcriptRef.current = openTranscriptFile(transcript, {
|
|
2287
|
+
version: 1,
|
|
2288
|
+
source: "reasonix chat",
|
|
2289
|
+
model,
|
|
2290
|
+
startedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
2291
|
+
});
|
|
1859
2292
|
}
|
|
1860
2293
|
useEffect2(() => {
|
|
1861
2294
|
return () => {
|
|
@@ -1905,18 +2338,14 @@ function App({ model, system, transcript, harvest: harvest2, branch, session })
|
|
|
1905
2338
|
}
|
|
1906
2339
|
}, [session, loop]);
|
|
1907
2340
|
const prefixHash = loop.prefix.fingerprint;
|
|
1908
|
-
const writeTranscript = useCallback(
|
|
1909
|
-
|
|
1910
|
-
|
|
1911
|
-
|
|
1912
|
-
|
|
1913
|
-
|
|
1914
|
-
|
|
1915
|
-
|
|
1916
|
-
})}
|
|
1917
|
-
`
|
|
1918
|
-
);
|
|
1919
|
-
}, []);
|
|
2341
|
+
const writeTranscript = useCallback(
|
|
2342
|
+
(ev) => {
|
|
2343
|
+
const stream = transcriptRef.current;
|
|
2344
|
+
if (!stream) return;
|
|
2345
|
+
writeRecord(stream, recordFromLoopEvent(ev, { model, prefixHash }));
|
|
2346
|
+
},
|
|
2347
|
+
[model, prefixHash]
|
|
2348
|
+
);
|
|
1920
2349
|
const handleSubmit = useCallback(
|
|
1921
2350
|
async (raw) => {
|
|
1922
2351
|
const text = raw.trim();
|
|
@@ -2134,6 +2563,93 @@ async function chatCommand(opts) {
|
|
|
2134
2563
|
await waitUntilExit();
|
|
2135
2564
|
}
|
|
2136
2565
|
|
|
2566
|
+
// src/cli/commands/diff.ts
|
|
2567
|
+
import { writeFileSync as writeFileSync2 } from "fs";
|
|
2568
|
+
import { basename } from "path";
|
|
2569
|
+
function diffCommand(opts) {
|
|
2570
|
+
const aParsed = readTranscript(opts.a);
|
|
2571
|
+
const bParsed = readTranscript(opts.b);
|
|
2572
|
+
const report = diffTranscripts(
|
|
2573
|
+
{ label: opts.labelA ?? basename(opts.a), parsed: aParsed },
|
|
2574
|
+
{ label: opts.labelB ?? basename(opts.b), parsed: bParsed }
|
|
2575
|
+
);
|
|
2576
|
+
console.log(renderSummaryTable(report));
|
|
2577
|
+
if (opts.mdPath) {
|
|
2578
|
+
const md = renderMarkdown(report);
|
|
2579
|
+
writeFileSync2(opts.mdPath, md, "utf8");
|
|
2580
|
+
console.log(`
|
|
2581
|
+
markdown report written to ${opts.mdPath}`);
|
|
2582
|
+
}
|
|
2583
|
+
}
|
|
2584
|
+
|
|
2585
|
+
// src/cli/commands/replay.ts
|
|
2586
|
+
function replayCommand(opts) {
|
|
2587
|
+
const { parsed, stats } = replayFromFile(opts.path);
|
|
2588
|
+
if (parsed.meta) {
|
|
2589
|
+
const m = parsed.meta;
|
|
2590
|
+
const bits = [`source=${m.source}`];
|
|
2591
|
+
if (m.model) bits.push(`model=${m.model}`);
|
|
2592
|
+
if (m.task) bits.push(`task=${m.task}`);
|
|
2593
|
+
if (m.mode) bits.push(`mode=${m.mode}`);
|
|
2594
|
+
if (m.repeat !== void 0) bits.push(`repeat=${m.repeat}`);
|
|
2595
|
+
bits.push(`started=${m.startedAt}`);
|
|
2596
|
+
console.log(`[meta] ${bits.join(" ")}`);
|
|
2597
|
+
console.log("");
|
|
2598
|
+
}
|
|
2599
|
+
const records = sliceRecords(parsed.records, opts);
|
|
2600
|
+
for (const rec of records) {
|
|
2601
|
+
renderRecord(rec);
|
|
2602
|
+
}
|
|
2603
|
+
console.log("");
|
|
2604
|
+
console.log("\u2500\u2500 summary \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500");
|
|
2605
|
+
console.log(`model calls: ${stats.turns}`);
|
|
2606
|
+
console.log(`user turns: ${stats.userTurns}`);
|
|
2607
|
+
console.log(`tool calls: ${stats.toolCalls}`);
|
|
2608
|
+
console.log(`cache hit: ${(stats.cacheHitRatio * 100).toFixed(1)}%`);
|
|
2609
|
+
console.log(`cost: $${stats.totalCostUsd.toFixed(6)}`);
|
|
2610
|
+
console.log(`claude equivalent: $${stats.claudeEquivalentUsd.toFixed(6)}`);
|
|
2611
|
+
console.log(`savings vs claude: ${stats.savingsVsClaudePct.toFixed(1)}%`);
|
|
2612
|
+
console.log(`models: ${stats.models.join(", ") || "\u2014"}`);
|
|
2613
|
+
console.log(`prefix hashes: ${stats.prefixHashes.length} distinct`);
|
|
2614
|
+
if (stats.prefixHashes.length === 1) {
|
|
2615
|
+
console.log(` (byte-stable prefix: ${stats.prefixHashes[0]?.slice(0, 16)}\u2026)`);
|
|
2616
|
+
} else if (stats.prefixHashes.length > 1) {
|
|
2617
|
+
console.log(" (prefix churned \u2014 cache-hostile session)");
|
|
2618
|
+
}
|
|
2619
|
+
}
|
|
2620
|
+
function sliceRecords(records, opts) {
|
|
2621
|
+
if (opts.head !== void 0 && opts.head > 0) return records.slice(0, opts.head);
|
|
2622
|
+
if (opts.tail !== void 0 && opts.tail > 0) return records.slice(-opts.tail);
|
|
2623
|
+
return records;
|
|
2624
|
+
}
|
|
2625
|
+
function renderRecord(rec) {
|
|
2626
|
+
const turn = `[t${rec.turn}]`;
|
|
2627
|
+
if (rec.role === "user") {
|
|
2628
|
+
console.log(`${turn} USER: ${oneLine(rec.content)}`);
|
|
2629
|
+
} else if (rec.role === "assistant_final") {
|
|
2630
|
+
const cost = rec.cost !== void 0 ? ` $${rec.cost.toFixed(6)}` : "";
|
|
2631
|
+
const cache = rec.usage && (rec.usage.prompt_cache_hit_tokens !== void 0 || rec.usage.prompt_cache_miss_tokens !== void 0) ? (() => {
|
|
2632
|
+
const hit = rec.usage.prompt_cache_hit_tokens ?? 0;
|
|
2633
|
+
const miss = rec.usage.prompt_cache_miss_tokens ?? 0;
|
|
2634
|
+
const total = hit + miss;
|
|
2635
|
+
return total > 0 ? ` cache=${(hit / total * 100).toFixed(1)}%` : "";
|
|
2636
|
+
})() : "";
|
|
2637
|
+
console.log(`${turn} AGENT:${cost}${cache} ${oneLine(rec.content)}`);
|
|
2638
|
+
} else if (rec.role === "tool") {
|
|
2639
|
+
const args = rec.args ? ` args=${oneLine(rec.args, 80)}` : "";
|
|
2640
|
+
console.log(`${turn} TOOL ${rec.tool ?? "?"}:${args} \u2192 ${oneLine(rec.content, 120)}`);
|
|
2641
|
+
} else if (rec.role === "error") {
|
|
2642
|
+
console.log(`${turn} ERROR: ${rec.error ?? rec.content}`);
|
|
2643
|
+
} else if (rec.role === "done") {
|
|
2644
|
+
} else {
|
|
2645
|
+
console.log(`${turn} ${rec.role}: ${oneLine(rec.content)}`);
|
|
2646
|
+
}
|
|
2647
|
+
}
|
|
2648
|
+
function oneLine(s, max = 200) {
|
|
2649
|
+
const collapsed = s.replace(/\s+/g, " ").trim();
|
|
2650
|
+
return collapsed.length > max ? `${collapsed.slice(0, max)}\u2026` : collapsed;
|
|
2651
|
+
}
|
|
2652
|
+
|
|
2137
2653
|
// src/cli/commands/run.ts
|
|
2138
2654
|
import { stdin, stdout } from "process";
|
|
2139
2655
|
import { createInterface } from "readline/promises";
|
|
@@ -2194,13 +2710,13 @@ async function runCommand(opts) {
|
|
|
2194
2710
|
}
|
|
2195
2711
|
|
|
2196
2712
|
// src/cli/commands/stats.ts
|
|
2197
|
-
import { existsSync as existsSync2, readFileSync as
|
|
2713
|
+
import { existsSync as existsSync2, readFileSync as readFileSync5 } from "fs";
|
|
2198
2714
|
function statsCommand(opts) {
|
|
2199
2715
|
if (!existsSync2(opts.transcript)) {
|
|
2200
2716
|
console.error(`no such transcript: ${opts.transcript}`);
|
|
2201
2717
|
process.exit(1);
|
|
2202
2718
|
}
|
|
2203
|
-
const lines =
|
|
2719
|
+
const lines = readFileSync5(opts.transcript, "utf8").split(/\r?\n/).filter(Boolean);
|
|
2204
2720
|
let assistantTurns = 0;
|
|
2205
2721
|
let toolCalls = 0;
|
|
2206
2722
|
let lastTurn = 0;
|
|
@@ -2262,6 +2778,18 @@ program.command("run <task>").description("Run a single task non-interactively,
|
|
|
2262
2778
|
program.command("stats <transcript>").description("Summarize a JSONL transcript produced by `reasonix chat --transcript`.").action((transcript) => {
|
|
2263
2779
|
statsCommand({ transcript });
|
|
2264
2780
|
});
|
|
2781
|
+
program.command("replay <transcript>").description(
|
|
2782
|
+
"Pretty-print a transcript + rebuild its session summary (cost, cache, prefix stability). No API calls."
|
|
2783
|
+
).option("--head <n>", "Show only the first N records", (v) => Number.parseInt(v, 10)).option("--tail <n>", "Show only the last N records", (v) => Number.parseInt(v, 10)).action((transcript, opts) => {
|
|
2784
|
+
replayCommand({
|
|
2785
|
+
path: transcript,
|
|
2786
|
+
head: Number.isFinite(opts.head) ? opts.head : void 0,
|
|
2787
|
+
tail: Number.isFinite(opts.tail) ? opts.tail : void 0
|
|
2788
|
+
});
|
|
2789
|
+
});
|
|
2790
|
+
program.command("diff <a> <b>").description("Compare two transcripts: aggregate deltas + first divergence.").option("--md <path>", "Also write a markdown report (blog-ready) to this path").option("--label-a <label>", "Display label for transcript A (default: filename)").option("--label-b <label>", "Display label for transcript B (default: filename)").action((a, b, opts) => {
|
|
2791
|
+
diffCommand({ a, b, mdPath: opts.md, labelA: opts.labelA, labelB: opts.labelB });
|
|
2792
|
+
});
|
|
2265
2793
|
program.command("version").description("Print Reasonix version.").action(versionCommand);
|
|
2266
2794
|
program.parseAsync(process.argv).catch((err) => {
|
|
2267
2795
|
console.error(err);
|