@twarc_net/groundtruth 0.1.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +136 -2
- package/dist/cli.js +1120 -178
- package/dist/index.d.ts +139 -8
- package/dist/index.js +849 -24
- package/package.json +1 -1
package/dist/index.d.ts
CHANGED
|
@@ -68,6 +68,23 @@ interface Report {
|
|
|
68
68
|
verdicts: Verdict[];
|
|
69
69
|
summary: ReportSummary;
|
|
70
70
|
}
|
|
71
|
+
/** Verdict levels that can cause a strict failure. */
|
|
72
|
+
type FailLevel = "unsupported" | "unverifiable";
|
|
73
|
+
/** User configuration, from `.groundtruthrc.json` or a `groundtruth` key in package.json. */
|
|
74
|
+
interface Config {
|
|
75
|
+
/** Default for the hook: block the turn when claims fail. */
|
|
76
|
+
strict?: boolean;
|
|
77
|
+
/** Which verdict levels count as a failure in strict mode (default: ["unsupported"]). */
|
|
78
|
+
failOn?: FailLevel[];
|
|
79
|
+
/** Shadow mode: record to the ledger but never print or block. For gradual rollout. */
|
|
80
|
+
shadow?: boolean;
|
|
81
|
+
/** Claim targets to skip — case-insensitive substring, or a glob with `*`. */
|
|
82
|
+
ignore?: string[];
|
|
83
|
+
/** Whole claim kinds to skip (e.g. ["action", "command"]). */
|
|
84
|
+
ignoreKinds?: ClaimKind[];
|
|
85
|
+
/** Default output format for `verify`. */
|
|
86
|
+
output?: "terminal" | "json" | "markdown";
|
|
87
|
+
}
|
|
71
88
|
|
|
72
89
|
declare function parseTranscriptFile(path: string): Turn;
|
|
73
90
|
declare function parseTranscript(raw: string): Turn;
|
|
@@ -85,13 +102,16 @@ declare function parseTranscript(raw: string): Turn;
|
|
|
85
102
|
declare function extractClaims(summary: string): Claim[];
|
|
86
103
|
|
|
87
104
|
/**
|
|
88
|
-
*
|
|
89
|
-
* 1.
|
|
90
|
-
* 2.
|
|
105
|
+
* Loads config for a project, merging (in increasing precedence):
|
|
106
|
+
* 1. a `groundtruth` key in package.json
|
|
107
|
+
* 2. a `.groundtruthrc.json` file
|
|
108
|
+
* Unknown/malformed values are ignored — config never throws.
|
|
91
109
|
*/
|
|
92
|
-
declare function
|
|
93
|
-
|
|
94
|
-
declare function
|
|
110
|
+
declare function loadConfig(cwd: string): Config;
|
|
111
|
+
/** Drops claims the config asks to ignore (by kind or by target pattern). */
|
|
112
|
+
declare function applyConfig(claims: Claim[], config: Config): Claim[];
|
|
113
|
+
/** How many verdicts count as a failure under the config's `failOn` policy. */
|
|
114
|
+
declare function failingCount(report: Report, config: Config): number;
|
|
95
115
|
|
|
96
116
|
/**
|
|
97
117
|
* Collects corroborating evidence from git: the working-tree diff against HEAD
|
|
@@ -101,7 +121,22 @@ declare function mergeEvidence(target: Evidence, extra: Evidence): void;
|
|
|
101
121
|
* If `cwd` is not a git repository (or git is unavailable) this returns empty
|
|
102
122
|
* evidence rather than throwing — the pipeline degrades gracefully.
|
|
103
123
|
*/
|
|
104
|
-
|
|
124
|
+
interface GitOptions {
|
|
125
|
+
/** Diff against a base ref (PR/branch mode: `base...HEAD`). */
|
|
126
|
+
base?: string;
|
|
127
|
+
/** Use the staged index (`git diff --cached`) — for commit-msg checks. */
|
|
128
|
+
staged?: boolean;
|
|
129
|
+
}
|
|
130
|
+
declare function collectGitEvidence(cwd: string, opts?: GitOptions): Evidence;
|
|
131
|
+
|
|
132
|
+
/**
|
|
133
|
+
* Builds the ground-truth evidence for a turn from two sources:
|
|
134
|
+
* 1. The agent's own tool calls (precise, turn-scoped) — the primary signal.
|
|
135
|
+
* 2. The git working tree (corroborating, catches non-tool edits) — optional.
|
|
136
|
+
*/
|
|
137
|
+
declare function buildEvidence(toolUses: ToolUse[], cwd?: string, git?: GitOptions): Evidence;
|
|
138
|
+
declare function emptyEvidence(): Evidence;
|
|
139
|
+
declare function mergeEvidence(target: Evidence, extra: Evidence): void;
|
|
105
140
|
|
|
106
141
|
/**
|
|
107
142
|
* Checks each claim against the evidence and assigns a verdict.
|
|
@@ -127,6 +162,12 @@ interface PipelineInput {
|
|
|
127
162
|
turn?: Turn;
|
|
128
163
|
/** Working directory used to collect corroborating git evidence. */
|
|
129
164
|
cwd?: string;
|
|
165
|
+
/** Base ref to diff against (PR mode: `base...HEAD`). Defaults to the working tree. */
|
|
166
|
+
base?: string;
|
|
167
|
+
/** Use the staged index as evidence (commit-msg checks). */
|
|
168
|
+
staged?: boolean;
|
|
169
|
+
/** Config (ignore rules etc.). If omitted, loaded from `cwd` when present. */
|
|
170
|
+
config?: Config;
|
|
130
171
|
}
|
|
131
172
|
/**
|
|
132
173
|
* The full groundtruth pipeline:
|
|
@@ -134,4 +175,94 @@ interface PipelineInput {
|
|
|
134
175
|
*/
|
|
135
176
|
declare function runPipeline(input: PipelineInput): Report;
|
|
136
177
|
|
|
137
|
-
|
|
178
|
+
/**
|
|
179
|
+
* OpenAI Codex CLI rollout transcripts: JSONL where each line is
|
|
180
|
+
* `{timestamp, type, payload}`. The `response_item` payloads carry assistant
|
|
181
|
+
* messages, `function_call`/`custom_tool_call` (incl. `apply_patch`), and
|
|
182
|
+
* `local_shell_call`. See `~/.codex/sessions/YYYY/MM/DD/rollout-*.jsonl`.
|
|
183
|
+
*/
|
|
184
|
+
declare function parseCodex(raw: string): Turn;
|
|
185
|
+
|
|
186
|
+
/**
|
|
187
|
+
* Gemini CLI chat transcripts. Current versions write JSONL (one MessageRecord
|
|
188
|
+
* per line); older versions write a single `{messages: [...]}` JSON object.
|
|
189
|
+
* `type:"gemini"` messages carry assistant text + a `toolCalls[]` array.
|
|
190
|
+
* See `~/.gemini/tmp/<project_hash>/chats/`.
|
|
191
|
+
*/
|
|
192
|
+
declare function parseGemini(raw: string): Turn;
|
|
193
|
+
|
|
194
|
+
/**
|
|
195
|
+
* Cursor agent transcripts (the newer `agent-transcripts/*.jsonl`, matching the
|
|
196
|
+
* `cursor-agent` stream-json format): `assistant` / `tool_call` / `result`
|
|
197
|
+
* lines. Tool inputs (path + content, command) are recorded; we don't need the
|
|
198
|
+
* cached outputs. See `~/.cursor/projects/<project>/agent-transcripts/`.
|
|
199
|
+
*/
|
|
200
|
+
declare function parseCursor(raw: string): Turn;
|
|
201
|
+
|
|
202
|
+
/**
|
|
203
|
+
* OpenCode stores a session across many files under `storage/`:
|
|
204
|
+
* message/<sessionID>/<messageID>.json — message info (role, time)
|
|
205
|
+
* part/<messageID>/<partID>.json — text and tool parts
|
|
206
|
+
* `parseOpenCode` takes the storage root, finds the most recently active
|
|
207
|
+
* session, and reassembles it. See `~/.local/share/opencode/storage/`.
|
|
208
|
+
*/
|
|
209
|
+
declare function parseOpenCode(input: string): Turn;
|
|
210
|
+
|
|
211
|
+
/**
|
|
212
|
+
* Aider chat history (`.aider.chat.history.md`) — best-effort. User turns are
|
|
213
|
+
* `#### ` lines; assistant turns are raw markdown; tool output is blockquoted.
|
|
214
|
+
* Edits appear inline as SEARCH/REPLACE blocks (default editblock coder); we
|
|
215
|
+
* recover the new content + the path from the line preceding the block.
|
|
216
|
+
*/
|
|
217
|
+
declare function parseAider(raw: string): Turn;
|
|
218
|
+
|
|
219
|
+
interface Adapter {
|
|
220
|
+
name: string;
|
|
221
|
+
/** Locate the most recent transcript for a project, or null. Best-effort. */
|
|
222
|
+
locate(cwd: string): string | null;
|
|
223
|
+
/** Parse a transcript file into a Turn. */
|
|
224
|
+
parse(path: string): Turn;
|
|
225
|
+
}
|
|
226
|
+
declare const ADAPTERS: Record<string, Adapter>;
|
|
227
|
+
declare const AGENT_NAMES: string[];
|
|
228
|
+
declare function getAdapter(name: string): Adapter | null;
|
|
229
|
+
/** Picks the adapter whose latest transcript is the most recently modified. */
|
|
230
|
+
declare function autoDetect(cwd: string): {
|
|
231
|
+
adapter: Adapter;
|
|
232
|
+
path: string;
|
|
233
|
+
} | null;
|
|
234
|
+
|
|
235
|
+
/**
|
|
236
|
+
* A privacy-safe local tally of verdict counts per turn. It stores ONLY counts,
|
|
237
|
+
* timestamps, and the project path — never code, claims, or prompts. Powers the
|
|
238
|
+
* `statusline` and `stats` commands.
|
|
239
|
+
*/
|
|
240
|
+
interface LedgerEntry {
|
|
241
|
+
/** ISO timestamp. */
|
|
242
|
+
t: string;
|
|
243
|
+
/** Project working directory. */
|
|
244
|
+
cwd: string;
|
|
245
|
+
/** Session id, when known. */
|
|
246
|
+
session?: string;
|
|
247
|
+
/** verified / unsupported / review counts. */
|
|
248
|
+
v: number;
|
|
249
|
+
u: number;
|
|
250
|
+
r: number;
|
|
251
|
+
}
|
|
252
|
+
interface LedgerSummary {
|
|
253
|
+
runs: number;
|
|
254
|
+
verified: number;
|
|
255
|
+
unsupported: number;
|
|
256
|
+
unverifiable: number;
|
|
257
|
+
}
|
|
258
|
+
declare function ledgerPath(): string;
|
|
259
|
+
/** Appends a turn's verdict counts. Best-effort — never throws into the hook. */
|
|
260
|
+
declare function recordRun(report: Report, cwd: string, session?: string): void;
|
|
261
|
+
declare function readLedger(): LedgerEntry[];
|
|
262
|
+
declare function summarize(entries: LedgerEntry[], opts?: {
|
|
263
|
+
cwd?: string;
|
|
264
|
+
sinceDays?: number;
|
|
265
|
+
session?: string;
|
|
266
|
+
}): LedgerSummary;
|
|
267
|
+
|
|
268
|
+
export { ADAPTERS, AGENT_NAMES, type Adapter, type Claim, type ClaimKind, type Config, type Evidence, type FailLevel, type LedgerEntry, type LedgerSummary, type PipelineInput, type Polarity, type Report, type ReportSummary, type ToolUse, type Turn, type Verdict, type VerdictLevel, applyConfig, autoDetect, buildEvidence, buildReport, collectGitEvidence, emptyEvidence, extractClaims, failingCount, getAdapter, ledgerPath, loadConfig, mergeEvidence, parseAider, parseCodex, parseCursor, parseGemini, parseOpenCode, parseTranscript, parseTranscriptFile, readLedger, recordRun, renderJson, renderMarkdown, renderTerminal, runPipeline, summarize, verifyClaims };
|