@twarc_net/groundtruth 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -68,6 +68,23 @@ interface Report {
68
68
  verdicts: Verdict[];
69
69
  summary: ReportSummary;
70
70
  }
71
+ /** Verdict levels that can cause a strict failure. */
72
+ type FailLevel = "unsupported" | "unverifiable";
73
+ /** User configuration, from `.groundtruthrc.json` or a `groundtruth` key in package.json. */
74
+ interface Config {
75
+ /** Default for the hook: block the turn when claims fail. */
76
+ strict?: boolean;
77
+ /** Which verdict levels count as a failure in strict mode (default: ["unsupported"]). */
78
+ failOn?: FailLevel[];
79
+ /** Shadow mode: record to the ledger but never print or block. For gradual rollout. */
80
+ shadow?: boolean;
81
+ /** Claim targets to skip — case-insensitive substring, or a glob with `*`. */
82
+ ignore?: string[];
83
+ /** Whole claim kinds to skip (e.g. ["action", "command"]). */
84
+ ignoreKinds?: ClaimKind[];
85
+ /** Default output format for `verify`. */
86
+ output?: "terminal" | "json" | "markdown";
87
+ }
71
88
 
72
89
  declare function parseTranscriptFile(path: string): Turn;
73
90
  declare function parseTranscript(raw: string): Turn;
@@ -85,13 +102,16 @@ declare function parseTranscript(raw: string): Turn;
85
102
  declare function extractClaims(summary: string): Claim[];
86
103
 
87
104
  /**
88
- * Builds the ground-truth evidence for a turn from two sources:
89
- * 1. The agent's own tool calls (precise, turn-scoped) — the primary signal.
90
- * 2. The git working tree (corroborating, catches non-tool edits) — optional.
105
+ * Loads config for a project, merging (in increasing precedence):
106
+ * 1. a `groundtruth` key in package.json
107
+ * 2. a `.groundtruthrc.json` file
108
+ * Unknown/malformed values are ignored — config never throws.
91
109
  */
92
- declare function buildEvidence(toolUses: ToolUse[], cwd?: string): Evidence;
93
- declare function emptyEvidence(): Evidence;
94
- declare function mergeEvidence(target: Evidence, extra: Evidence): void;
110
+ declare function loadConfig(cwd: string): Config;
111
+ /** Drops claims the config asks to ignore (by kind or by target pattern). */
112
+ declare function applyConfig(claims: Claim[], config: Config): Claim[];
113
+ /** How many verdicts count as a failure under the config's `failOn` policy. */
114
+ declare function failingCount(report: Report, config: Config): number;
95
115
 
96
116
  /**
97
117
  * Collects corroborating evidence from git: the working-tree diff against HEAD
@@ -101,7 +121,22 @@ declare function mergeEvidence(target: Evidence, extra: Evidence): void;
101
121
  * If `cwd` is not a git repository (or git is unavailable) this returns empty
102
122
  * evidence rather than throwing — the pipeline degrades gracefully.
103
123
  */
104
- declare function collectGitEvidence(cwd: string): Evidence;
124
+ interface GitOptions {
125
+ /** Diff against a base ref (PR/branch mode: `base...HEAD`). */
126
+ base?: string;
127
+ /** Use the staged index (`git diff --cached`) — for commit-msg checks. */
128
+ staged?: boolean;
129
+ }
130
+ declare function collectGitEvidence(cwd: string, opts?: GitOptions): Evidence;
131
+
132
+ /**
133
+ * Builds the ground-truth evidence for a turn from two sources:
134
+ * 1. The agent's own tool calls (precise, turn-scoped) — the primary signal.
135
+ * 2. The git working tree (corroborating, catches non-tool edits) — optional.
136
+ */
137
+ declare function buildEvidence(toolUses: ToolUse[], cwd?: string, git?: GitOptions): Evidence;
138
+ declare function emptyEvidence(): Evidence;
139
+ declare function mergeEvidence(target: Evidence, extra: Evidence): void;
105
140
 
106
141
  /**
107
142
  * Checks each claim against the evidence and assigns a verdict.
@@ -127,6 +162,12 @@ interface PipelineInput {
127
162
  turn?: Turn;
128
163
  /** Working directory used to collect corroborating git evidence. */
129
164
  cwd?: string;
165
+ /** Base ref to diff against (PR mode: `base...HEAD`). Defaults to the working tree. */
166
+ base?: string;
167
+ /** Use the staged index as evidence (commit-msg checks). */
168
+ staged?: boolean;
169
+ /** Config (ignore rules etc.). If omitted, loaded from `cwd` when present. */
170
+ config?: Config;
130
171
  }
131
172
  /**
132
173
  * The full groundtruth pipeline:
@@ -134,4 +175,94 @@ interface PipelineInput {
134
175
  */
135
176
  declare function runPipeline(input: PipelineInput): Report;
136
177
 
137
- export { type Claim, type ClaimKind, type Evidence, type PipelineInput, type Polarity, type Report, type ReportSummary, type ToolUse, type Turn, type Verdict, type VerdictLevel, buildEvidence, buildReport, collectGitEvidence, emptyEvidence, extractClaims, mergeEvidence, parseTranscript, parseTranscriptFile, renderJson, renderMarkdown, renderTerminal, runPipeline, verifyClaims };
178
+ /**
179
+ * OpenAI Codex CLI rollout transcripts: JSONL where each line is
180
+ * `{timestamp, type, payload}`. The `response_item` payloads carry assistant
181
+ * messages, `function_call`/`custom_tool_call` (incl. `apply_patch`), and
182
+ * `local_shell_call`. See `~/.codex/sessions/YYYY/MM/DD/rollout-*.jsonl`.
183
+ */
184
+ declare function parseCodex(raw: string): Turn;
185
+
186
+ /**
187
+ * Gemini CLI chat transcripts. Current versions write JSONL (one MessageRecord
188
+ * per line); older versions write a single `{messages: [...]}` JSON object.
189
+ * `type:"gemini"` messages carry assistant text + a `toolCalls[]` array.
190
+ * See `~/.gemini/tmp/<project_hash>/chats/`.
191
+ */
192
+ declare function parseGemini(raw: string): Turn;
193
+
194
+ /**
195
+ * Cursor agent transcripts (the newer `agent-transcripts/*.jsonl`, matching the
196
+ * `cursor-agent` stream-json format): `assistant` / `tool_call` / `result`
197
+ * lines. Tool inputs (path + content, command) are recorded; we don't need the
198
+ * cached outputs. See `~/.cursor/projects/<project>/agent-transcripts/`.
199
+ */
200
+ declare function parseCursor(raw: string): Turn;
201
+
202
+ /**
203
+ * OpenCode stores a session across many files under `storage/`:
204
+ * message/<sessionID>/<messageID>.json — message info (role, time)
205
+ * part/<messageID>/<partID>.json — text and tool parts
206
+ * `parseOpenCode` takes the storage root, finds the most recently active
207
+ * session, and reassembles it. See `~/.local/share/opencode/storage/`.
208
+ */
209
+ declare function parseOpenCode(input: string): Turn;
210
+
211
+ /**
212
+ * Aider chat history (`.aider.chat.history.md`) — best-effort. User turns are
213
+ * `#### ` lines; assistant turns are raw markdown; tool output is blockquoted.
214
+ * Edits appear inline as SEARCH/REPLACE blocks (default editblock coder); we
215
+ * recover the new content + the path from the line preceding the block.
216
+ */
217
+ declare function parseAider(raw: string): Turn;
218
+
219
+ interface Adapter {
220
+ name: string;
221
+ /** Locate the most recent transcript for a project, or null. Best-effort. */
222
+ locate(cwd: string): string | null;
223
+ /** Parse a transcript file into a Turn. */
224
+ parse(path: string): Turn;
225
+ }
226
+ declare const ADAPTERS: Record<string, Adapter>;
227
+ declare const AGENT_NAMES: string[];
228
+ declare function getAdapter(name: string): Adapter | null;
229
+ /** Picks the adapter whose latest transcript is the most recently modified. */
230
+ declare function autoDetect(cwd: string): {
231
+ adapter: Adapter;
232
+ path: string;
233
+ } | null;
234
+
235
+ /**
236
+ * A privacy-safe local tally of verdict counts per turn. It stores ONLY counts,
237
+ * timestamps, and the project path — never code, claims, or prompts. Powers the
238
+ * `statusline` and `stats` commands.
239
+ */
240
+ interface LedgerEntry {
241
+ /** ISO timestamp. */
242
+ t: string;
243
+ /** Project working directory. */
244
+ cwd: string;
245
+ /** Session id, when known. */
246
+ session?: string;
247
+ /** verified / unsupported / review counts. */
248
+ v: number;
249
+ u: number;
250
+ r: number;
251
+ }
252
+ interface LedgerSummary {
253
+ runs: number;
254
+ verified: number;
255
+ unsupported: number;
256
+ unverifiable: number;
257
+ }
258
+ declare function ledgerPath(): string;
259
+ /** Appends a turn's verdict counts. Best-effort — never throws into the hook. */
260
+ declare function recordRun(report: Report, cwd: string, session?: string): void;
261
+ declare function readLedger(): LedgerEntry[];
262
+ declare function summarize(entries: LedgerEntry[], opts?: {
263
+ cwd?: string;
264
+ sinceDays?: number;
265
+ session?: string;
266
+ }): LedgerSummary;
267
+
268
+ export { ADAPTERS, AGENT_NAMES, type Adapter, type Claim, type ClaimKind, type Config, type Evidence, type FailLevel, type LedgerEntry, type LedgerSummary, type PipelineInput, type Polarity, type Report, type ReportSummary, type ToolUse, type Turn, type Verdict, type VerdictLevel, applyConfig, autoDetect, buildEvidence, buildReport, collectGitEvidence, emptyEvidence, extractClaims, failingCount, getAdapter, ledgerPath, loadConfig, mergeEvidence, parseAider, parseCodex, parseCursor, parseGemini, parseOpenCode, parseTranscript, parseTranscriptFile, readLedger, recordRun, renderJson, renderMarkdown, renderTerminal, runPipeline, summarize, verifyClaims };