@twarc_net/groundtruth 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -68,6 +68,23 @@ interface Report {
68
68
  verdicts: Verdict[];
69
69
  summary: ReportSummary;
70
70
  }
71
+ /** Verdict levels that can cause a strict failure. */
72
+ type FailLevel = "unsupported" | "unverifiable";
73
+ /** User configuration, from `.groundtruthrc.json` or a `groundtruth` key in package.json. */
74
+ interface Config {
75
+ /** Default for the hook: block the turn when claims fail. */
76
+ strict?: boolean;
77
+ /** Which verdict levels count as a failure in strict mode (default: ["unsupported"]). */
78
+ failOn?: FailLevel[];
79
+ /** Shadow mode: record to the ledger but never print or block. For gradual rollout. */
80
+ shadow?: boolean;
81
+ /** Claim targets to skip — case-insensitive substring, or a glob with `*`. */
82
+ ignore?: string[];
83
+ /** Whole claim kinds to skip (e.g. ["action", "command"]). */
84
+ ignoreKinds?: ClaimKind[];
85
+ /** Default output format for `verify`. */
86
+ output?: "terminal" | "json" | "markdown";
87
+ }
71
88
 
72
89
  declare function parseTranscriptFile(path: string): Turn;
73
90
  declare function parseTranscript(raw: string): Turn;
@@ -84,12 +101,24 @@ declare function parseTranscript(raw: string): Turn;
84
101
  */
85
102
  declare function extractClaims(summary: string): Claim[];
86
103
 
104
+ /**
105
+ * Loads config for a project, merging (in increasing precedence):
106
+ * 1. a `groundtruth` key in package.json
107
+ * 2. a `.groundtruthrc.json` file
108
+ * Unknown/malformed values are ignored — config never throws.
109
+ */
110
+ declare function loadConfig(cwd: string): Config;
111
+ /** Drops claims the config asks to ignore (by kind or by target pattern). */
112
+ declare function applyConfig(claims: Claim[], config: Config): Claim[];
113
+ /** How many verdicts count as a failure under the config's `failOn` policy. */
114
+ declare function failingCount(report: Report, config: Config): number;
115
+
87
116
  /**
88
117
  * Builds the ground-truth evidence for a turn from two sources:
89
118
  * 1. The agent's own tool calls (precise, turn-scoped) — the primary signal.
90
119
  * 2. The git working tree (corroborating, catches non-tool edits) — optional.
91
120
  */
92
- declare function buildEvidence(toolUses: ToolUse[], cwd?: string): Evidence;
121
+ declare function buildEvidence(toolUses: ToolUse[], cwd?: string, base?: string): Evidence;
93
122
  declare function emptyEvidence(): Evidence;
94
123
  declare function mergeEvidence(target: Evidence, extra: Evidence): void;
95
124
 
@@ -101,7 +130,7 @@ declare function mergeEvidence(target: Evidence, extra: Evidence): void;
101
130
  * If `cwd` is not a git repository (or git is unavailable) this returns empty
102
131
  * evidence rather than throwing — the pipeline degrades gracefully.
103
132
  */
104
- declare function collectGitEvidence(cwd: string): Evidence;
133
+ declare function collectGitEvidence(cwd: string, base?: string): Evidence;
105
134
 
106
135
  /**
107
136
  * Checks each claim against the evidence and assigns a verdict.
@@ -127,6 +156,10 @@ interface PipelineInput {
127
156
  turn?: Turn;
128
157
  /** Working directory used to collect corroborating git evidence. */
129
158
  cwd?: string;
159
+ /** Base ref to diff against (PR mode: `base...HEAD`). Defaults to the working tree. */
160
+ base?: string;
161
+ /** Config (ignore rules etc.). If omitted, loaded from `cwd` when present. */
162
+ config?: Config;
130
163
  }
131
164
  /**
132
165
  * The full groundtruth pipeline:
@@ -134,4 +167,77 @@ interface PipelineInput {
134
167
  */
135
168
  declare function runPipeline(input: PipelineInput): Report;
136
169
 
137
- export { type Claim, type ClaimKind, type Evidence, type PipelineInput, type Polarity, type Report, type ReportSummary, type ToolUse, type Turn, type Verdict, type VerdictLevel, buildEvidence, buildReport, collectGitEvidence, emptyEvidence, extractClaims, mergeEvidence, parseTranscript, parseTranscriptFile, renderJson, renderMarkdown, renderTerminal, runPipeline, verifyClaims };
170
+ /**
171
+ * OpenAI Codex CLI rollout transcripts: JSONL where each line is
172
+ * `{timestamp, type, payload}`. The `response_item` payloads carry assistant
173
+ * messages, `function_call`/`custom_tool_call` (incl. `apply_patch`), and
174
+ * `local_shell_call`. See `~/.codex/sessions/YYYY/MM/DD/rollout-*.jsonl`.
175
+ */
176
+ declare function parseCodex(raw: string): Turn;
177
+
178
+ /**
179
+ * Gemini CLI chat transcripts. Current versions write JSONL (one MessageRecord
180
+ * per line); older versions write a single `{messages: [...]}` JSON object.
181
+ * `type:"gemini"` messages carry assistant text + a `toolCalls[]` array.
182
+ * See `~/.gemini/tmp/<project_hash>/chats/`.
183
+ */
184
+ declare function parseGemini(raw: string): Turn;
185
+
186
+ /**
187
+ * Cursor agent transcripts (the newer `agent-transcripts/*.jsonl`, matching the
188
+ * `cursor-agent` stream-json format): `assistant` / `tool_call` / `result`
189
+ * lines. Tool inputs (path + content, command) are recorded; we don't need the
190
+ * cached outputs. See `~/.cursor/projects/<project>/agent-transcripts/`.
191
+ */
192
+ declare function parseCursor(raw: string): Turn;
193
+
194
+ interface Adapter {
195
+ name: string;
196
+ /** Locate the most recent transcript for a project, or null. Best-effort. */
197
+ locate(cwd: string): string | null;
198
+ /** Parse a transcript file into a Turn. */
199
+ parse(path: string): Turn;
200
+ }
201
+ declare const ADAPTERS: Record<string, Adapter>;
202
+ declare const AGENT_NAMES: string[];
203
+ declare function getAdapter(name: string): Adapter | null;
204
+ /** Picks the adapter whose latest transcript is the most recently modified. */
205
+ declare function autoDetect(cwd: string): {
206
+ adapter: Adapter;
207
+ path: string;
208
+ } | null;
209
+
210
+ /**
211
+ * A privacy-safe local tally of verdict counts per turn. It stores ONLY counts,
212
+ * timestamps, and the project path — never code, claims, or prompts. Powers the
213
+ * `statusline` and `stats` commands.
214
+ */
215
+ interface LedgerEntry {
216
+ /** ISO timestamp. */
217
+ t: string;
218
+ /** Project working directory. */
219
+ cwd: string;
220
+ /** Session id, when known. */
221
+ session?: string;
222
+ /** verified / unsupported / review counts. */
223
+ v: number;
224
+ u: number;
225
+ r: number;
226
+ }
227
+ interface LedgerSummary {
228
+ runs: number;
229
+ verified: number;
230
+ unsupported: number;
231
+ unverifiable: number;
232
+ }
233
+ declare function ledgerPath(): string;
234
+ /** Appends a turn's verdict counts. Best-effort — never throws into the hook. */
235
+ declare function recordRun(report: Report, cwd: string, session?: string): void;
236
+ declare function readLedger(): LedgerEntry[];
237
+ declare function summarize(entries: LedgerEntry[], opts?: {
238
+ cwd?: string;
239
+ sinceDays?: number;
240
+ session?: string;
241
+ }): LedgerSummary;
242
+
243
+ export { ADAPTERS, AGENT_NAMES, type Adapter, type Claim, type ClaimKind, type Config, type Evidence, type FailLevel, type LedgerEntry, type LedgerSummary, type PipelineInput, type Polarity, type Report, type ReportSummary, type ToolUse, type Turn, type Verdict, type VerdictLevel, applyConfig, autoDetect, buildEvidence, buildReport, collectGitEvidence, emptyEvidence, extractClaims, failingCount, getAdapter, ledgerPath, loadConfig, mergeEvidence, parseCodex, parseCursor, parseGemini, parseTranscript, parseTranscriptFile, readLedger, recordRun, renderJson, renderMarkdown, renderTerminal, runPipeline, summarize, verifyClaims };