npm - reasonix - Versions diffs - 0.0.5 → 0.2.0 - Mend

reasonix 0.0.5 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/index.d.ts CHANGED Viewed

@@ -1,3 +1,5 @@
+import { WriteStream } from 'node:fs';
 /**
  * Retry layer for DeepSeek API calls.
  *
@@ -436,6 +438,12 @@ interface LoopEvent {
     content: string;
     reasoningDelta?: string;
     toolName?: string;
+    /**
+     * Raw JSON-string arguments the model sent for a tool call (role === "tool").
+     * Populated so transcripts can persist *why* a tool was called, not just
+     * what it returned. Needed by `reasonix diff` to explain divergences.
+     */
+    toolArgs?: string;
     stats?: TurnStats;
     planState?: TypedPlanState;
     repair?: RepairReport;
@@ -463,6 +471,12 @@ interface CacheFirstLoopOptions {
      * since the default selector scores samples by plan-state uncertainty.
      */
     branch?: number | BranchOptions;
+    /**
+     * Session name. When set, the loop pre-loads the session's prior messages
+     * into its log on construction, and appends every new log entry to
+     * `~/.reasonix/sessions/<name>.jsonl` so the next run can resume.
+     */
+    session?: string;
 }
 /**
  * Pillar 1 — Cache-First Loop.
@@ -494,9 +508,13 @@ declare class CacheFirstLoop {
     harvestOptions: HarvestOptions;
     branchEnabled: boolean;
     branchOptions: BranchOptions;
+    sessionName: string | null;
+    /** Number of messages that were pre-loaded from the session file. */
+    readonly resumedMessageCount: number;
     private _turn;
     private _streamPreference;
     constructor(opts: CacheFirstLoopOptions);
+    private appendAndPersist;
     /**
      * Reconfigure model/harvest/branch/stream mid-session. The loop's log,
      * scratch, and stats are preserved — only the per-turn behavior changes.
@@ -510,6 +528,38 @@ declare class CacheFirstLoop {
     private assistantMessage;
 }
+/**
+ * Session persistence.
+ *
+ * Every turn's log entries (user / assistant / tool messages) are appended to
+ * a JSONL file under `~/.reasonix/sessions/<name>.jsonl`. Next time the user
+ * starts the CLI with the same session name, the loop pre-loads the file
+ * into its AppendOnlyLog so the new turn has full prior context.
+ *
+ * Design notes:
+ *   - JSONL rather than JSON so concurrent writes don't corrupt.
+ *   - 0600 permissions on Unix (chmod no-ops on Windows).
+ *   - Name sanitization keeps paths safe: only [\w-] and CJK letters pass;
+ *     anything else is replaced with underscore, max 64 chars.
+ *   - The loop's stats/session aren't persisted — only the message log.
+ *     Cost accounting resets each run (by design — old costs are sunk).
+ */
+interface SessionInfo {
+    name: string;
+    path: string;
+    size: number;
+    messageCount: number;
+    mtime: Date;
+}
+declare function sessionsDir(): string;
+declare function sessionPath(name: string): string;
+declare function sanitizeName(name: string): string;
+declare function loadSessionMessages(name: string): ChatMessage[];
+declare function appendSessionMessage(name: string, message: ChatMessage): void;
+declare function listSessions(): SessionInfo[];
+declare function deleteSession(name: string): boolean;
 /**
  * Minimal `.env` loader; no dependency on dotenv.
  *
@@ -519,6 +569,207 @@ declare class CacheFirstLoop {
  */
 declare function loadDotenv(path?: string): void;
+/**
+ * Transcript format — the canonical "audit log" of a Reasonix session.
+ *
+ * Design split:
+ *   - Session file (`~/.reasonix/sessions/<name>.jsonl`) stores only the
+ *     `ChatMessage`s the model needs to resume. See session.ts.
+ *   - Transcript file (this module) stores every LoopEvent with usage, cost,
+ *     model, and prefix fingerprint attached where available — enough for
+ *     replay and diff to reconstruct economics.
+ *
+ * The two are different contracts: sessions are the user's *memory*;
+ * transcripts are the *receipts*. Don't conflate them.
+ *
+ * Backward compatibility: all fields beyond {ts, turn, role, content} are
+ * optional on read. A v0.1 transcript (pre-usage) still parses and renders
+ * — it just shows cost/cache as n/a.
+ */
+interface TranscriptRecord {
+    /** ISO-8601 timestamp at emit time. */
+    ts: string;
+    /** 1-based turn number within the session. */
+    turn: number;
+    /** LoopEvent role — "assistant_delta" | "assistant_final" | "tool" | "done" | ... */
+    role: string;
+    /** For assistant events, the final (or delta) text; for tool events, the tool result. */
+    content: string;
+    /** Tool name (role === "tool"). */
+    tool?: string;
+    /** JSON-string args the model sent for a tool call (role === "tool"). Persisted so diff can explain *why* two runs made different calls. */
+    args?: string;
+    /** DeepSeek token-usage snapshot (role === "assistant_final"). */
+    usage?: RawUsage;
+    /** USD cost of this turn (role === "assistant_final"). */
+    cost?: number;
+    /** Model id that produced this turn. */
+    model?: string;
+    /**
+     * The ImmutablePrefix fingerprint at this turn. Lets diff prove two runs
+     * share a prefix — i.e. any cache-hit delta is attributable to log
+     * stability, not to a different system prompt.
+     */
+    prefixHash?: string;
+    /** Optional error message (role === "error"). */
+    error?: string;
+}
+interface TranscriptMeta {
+    /**
+     * Optional metadata written as the first line of a transcript. Lets
+     * downstream tooling know what it's reading without guessing.
+     * Recognized by a special role "_meta".
+     */
+    version: 1;
+    source: string;
+    model?: string;
+    task?: string;
+    mode?: string;
+    repeat?: number;
+    startedAt: string;
+}
+interface ReadTranscriptResult {
+    meta: TranscriptMeta | null;
+    records: TranscriptRecord[];
+}
+/**
+ * Build a TranscriptRecord from a LoopEvent. Extra fields (model,
+ * prefixHash) that the LoopEvent doesn't carry are passed in separately
+ * because they're session-level, not event-level.
+ */
+declare function recordFromLoopEvent(ev: LoopEvent, extra: {
+    model: string;
+    prefixHash: string;
+}): TranscriptRecord;
+/**
+ * Append a record to an open write stream. Caller owns the stream lifecycle.
+ */
+declare function writeRecord(stream: WriteStream, record: TranscriptRecord): void;
+/**
+ * Write a _meta line to an open write stream. Call exactly once, at the top.
+ */
+declare function writeMeta(stream: WriteStream, meta: TranscriptMeta): void;
+/**
+ * Convenience: open a stream, write meta, return stream.
+ */
+declare function openTranscriptFile(path: string, meta: TranscriptMeta): WriteStream;
+/**
+ * Parse a transcript file. Returns meta (if the first line is a _meta record)
+ * and the full record list.
+ *
+ * Robustness contract:
+ *   - Empty lines are skipped.
+ *   - Malformed JSON lines are skipped silently (do not crash on partial
+ *     files — live chats may be mid-write).
+ *   - Records missing optional fields still parse — they're just rendered
+ *     with n/a where the optional value would go.
+ */
+declare function readTranscript(path: string): ReadTranscriptResult;
+declare function parseTranscript(raw: string): ReadTranscriptResult;
+/**
+ * Replay — reconstruct session economics from a transcript file.
+ *
+ * Given a transcript written by App.tsx or the bench runner, rebuild a
+ * SessionSummary-compatible aggregate (turn count, total cost, cache-hit
+ * ratio, vs-Claude estimate) without replaying the LLM calls.
+ *
+ * The whole point is offline auditing: a reader should be able to reproduce
+ * the headline numbers from a transcript alone, without an API key.
+ */
+interface ReplayStats extends SessionSummary {
+    /** Per-turn stats, in turn order. Only assistant_final records contribute. */
+    perTurn: TurnStats[];
+    /** Unique models that appeared in the transcript's assistant_final records. */
+    models: string[];
+    /** Unique prefix hashes that appeared. Length > 1 means the prefix churned (cache-hostile). */
+    prefixHashes: string[];
+    /** Count of user-role records (user turns issued). */
+    userTurns: number;
+    /** Count of tool-role records (tool calls executed). */
+    toolCalls: number;
+}
+/**
+ * Parse a transcript file and compute replay stats. Throws only on I/O
+ * errors; malformed lines inside the file are skipped silently.
+ */
+declare function replayFromFile(path: string): {
+    parsed: ReadTranscriptResult;
+    stats: ReplayStats;
+};
+declare function computeReplayStats(records: TranscriptRecord[]): ReplayStats;
+/**
+ * Diff — compare two transcripts and produce a summary + divergence report.
+ *
+ * Two transcripts are "comparable" when they stem from the same task (or
+ * the same user prompt). Alignment is by turn number: assistant_final #N
+ * in A pairs with assistant_final #N in B. If one side ran more turns, the
+ * extras are labeled "only in A" / "only in B".
+ *
+ * What we compute:
+ *   - Aggregate deltas: turns, tool calls, cache hit, cost, token counts
+ *   - First divergence: the lowest turn where A and B's tool calls or
+ *     assistant text differ meaningfully
+ *   - Prefix-stability story: how many unique prefix hashes each side used
+ *
+ * Non-goals (deliberately):
+ *   - LLM-judge quality comparison
+ *   - Per-token delta rendering — not useful at the fidelity we're at
+ *   - Embedding similarity — Levenshtein ratio is cheap and good enough
+ */
+interface DiffSide {
+    label: string;
+    meta: ReadTranscriptResult["meta"];
+    records: TranscriptRecord[];
+    stats: ReplayStats;
+}
+interface TurnPair {
+    turn: number;
+    aAssistant?: TranscriptRecord;
+    bAssistant?: TranscriptRecord;
+    aTools: TranscriptRecord[];
+    bTools: TranscriptRecord[];
+    /**
+     * Classification of the pair:
+     *   "match"      — both sides present, text & tool calls within threshold
+     *   "diverge"    — both sides present, but text or tool calls differ
+     *   "only_in_a"  — assistant_final in A but not B
+     *   "only_in_b"  — assistant_final in B but not A
+     */
+    kind: "match" | "diverge" | "only_in_a" | "only_in_b";
+    /** When kind === "diverge", a short one-liner pointing at what differs. */
+    divergenceNote?: string;
+}
+interface DiffReport {
+    a: DiffSide;
+    b: DiffSide;
+    pairs: TurnPair[];
+    firstDivergenceTurn: number | null;
+}
+declare function diffTranscripts(a: {
+    label: string;
+    parsed: ReadTranscriptResult;
+}, b: {
+    label: string;
+    parsed: ReadTranscriptResult;
+}): DiffReport;
+/**
+ * Normalized Levenshtein similarity ratio in [0, 1]. 1 = identical.
+ * Early-exits for long strings (> 2000 chars) with a cheap token-overlap
+ * estimate to keep diff fast on chatty transcripts.
+ */
+declare function similarity(a: string, b: string): number;
+interface RenderOptions {
+    /** Monochrome output (for file redirection or piping). Defaults to true. */
+    monochrome?: boolean;
+}
+declare function renderSummaryTable(report: DiffReport, _opts?: RenderOptions): string;
+declare function renderMarkdown(report: DiffReport): string;
 /**
  * User-level config storage for the Reasonix CLI.
  *
@@ -546,6 +797,6 @@ declare function redactKey(key: string): string;
 /** Reasonix — DeepSeek-native agent framework. Library entry point. */
-declare const VERSION = "0.0.1";
+declare const VERSION = "0.2.0";
-export { AppendOnlyLog, type BranchOptions, type BranchProgress, type BranchResult, type BranchSample, type BranchSelector, type BranchSummary, CacheFirstLoop, type CacheFirstLoopOptions, type ChatMessage, type ChatResponse, DeepSeekClient, type DeepSeekClientOptions, type EventRole, type FlattenDecision, type HarvestOptions, ImmutablePrefix, type ImmutablePrefixOptions, type JSONSchema, type LoopEvent, type ReasonixConfig, type ReconfigurableOptions, type RepairReport, type RetryInfo, type RetryOptions, type Role, type ScavengeOptions, type ScavengeResult, SessionStats, type SessionSummary, StormBreaker, type StreamChunk, type ToolCall, ToolCallRepair, type ToolCallRepairOptions, type ToolDefinition, type ToolFunctionSpec, ToolRegistry, type ToolSpec, type TruncationRepairResult, type TurnStats, type TypedPlanState, Usage, VERSION, VolatileScratch, aggregateBranchUsage, analyzeSchema, claudeEquivalentCost, costUsd, defaultConfigPath, defaultSelector, emptyPlanState, fetchWithRetry, flattenSchema, harvest, isPlanStateEmpty, isPlausibleKey, loadApiKey, loadDotenv, nestArguments, readConfig, redactKey, repairTruncatedJson, runBranches, saveApiKey, scavengeToolCalls, writeConfig };
+export { AppendOnlyLog, type BranchOptions, type BranchProgress, type BranchResult, type BranchSample, type BranchSelector, type BranchSummary, CacheFirstLoop, type CacheFirstLoopOptions, type ChatMessage, type ChatResponse, DeepSeekClient, type DeepSeekClientOptions, type RenderOptions as DiffRenderOptions, type DiffReport, type DiffSide, type EventRole, type FlattenDecision, type HarvestOptions, ImmutablePrefix, type ImmutablePrefixOptions, type JSONSchema, type LoopEvent, type ReadTranscriptResult, type ReasonixConfig, type ReconfigurableOptions, type RepairReport, type ReplayStats, type RetryInfo, type RetryOptions, type Role, type ScavengeOptions, type ScavengeResult, type SessionInfo, SessionStats, type SessionSummary, StormBreaker, type StreamChunk, type ToolCall, ToolCallRepair, type ToolCallRepairOptions, type ToolDefinition, type ToolFunctionSpec, ToolRegistry, type ToolSpec, type TranscriptMeta, type TranscriptRecord, type TruncationRepairResult, type TurnPair, type TurnStats, type TypedPlanState, Usage, VERSION, VolatileScratch, aggregateBranchUsage, analyzeSchema, appendSessionMessage, claudeEquivalentCost, computeReplayStats, costUsd, defaultConfigPath, defaultSelector, deleteSession, diffTranscripts, emptyPlanState, fetchWithRetry, flattenSchema, harvest, isPlanStateEmpty, isPlausibleKey, listSessions, loadApiKey, loadDotenv, loadSessionMessages, nestArguments, openTranscriptFile, parseTranscript, readConfig, readTranscript, recordFromLoopEvent, redactKey, renderMarkdown as renderDiffMarkdown, renderSummaryTable as renderDiffSummary, repairTruncatedJson, replayFromFile, runBranches, sanitizeName as sanitizeSessionName, saveApiKey, scavengeToolCalls, sessionPath, sessionsDir, similarity, writeConfig, writeMeta, writeRecord };