@twarc_net/groundtruth 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +123 -2
- package/dist/cli.js +890 -177
- package/dist/index.d.ts +109 -3
- package/dist/index.js +625 -24
- package/package.json +1 -1
package/dist/index.d.ts
CHANGED
|
@@ -68,6 +68,23 @@ interface Report {
|
|
|
68
68
|
verdicts: Verdict[];
|
|
69
69
|
summary: ReportSummary;
|
|
70
70
|
}
|
|
71
|
+
/** Verdict levels that can cause a strict failure. */
|
|
72
|
+
type FailLevel = "unsupported" | "unverifiable";
|
|
73
|
+
/** User configuration, from `.groundtruthrc.json` or a `groundtruth` key in package.json. */
|
|
74
|
+
interface Config {
|
|
75
|
+
/** Default for the hook: block the turn when claims fail. */
|
|
76
|
+
strict?: boolean;
|
|
77
|
+
/** Which verdict levels count as a failure in strict mode (default: ["unsupported"]). */
|
|
78
|
+
failOn?: FailLevel[];
|
|
79
|
+
/** Shadow mode: record to the ledger but never print or block. For gradual rollout. */
|
|
80
|
+
shadow?: boolean;
|
|
81
|
+
/** Claim targets to skip — case-insensitive substring, or a glob with `*`. */
|
|
82
|
+
ignore?: string[];
|
|
83
|
+
/** Whole claim kinds to skip (e.g. ["action", "command"]). */
|
|
84
|
+
ignoreKinds?: ClaimKind[];
|
|
85
|
+
/** Default output format for `verify`. */
|
|
86
|
+
output?: "terminal" | "json" | "markdown";
|
|
87
|
+
}
|
|
71
88
|
|
|
72
89
|
declare function parseTranscriptFile(path: string): Turn;
|
|
73
90
|
declare function parseTranscript(raw: string): Turn;
|
|
@@ -84,12 +101,24 @@ declare function parseTranscript(raw: string): Turn;
|
|
|
84
101
|
*/
|
|
85
102
|
declare function extractClaims(summary: string): Claim[];
|
|
86
103
|
|
|
104
|
+
/**
|
|
105
|
+
* Loads config for a project, merging (in increasing precedence):
|
|
106
|
+
* 1. a `groundtruth` key in package.json
|
|
107
|
+
* 2. a `.groundtruthrc.json` file
|
|
108
|
+
* Unknown/malformed values are ignored — config never throws.
|
|
109
|
+
*/
|
|
110
|
+
declare function loadConfig(cwd: string): Config;
|
|
111
|
+
/** Drops claims the config asks to ignore (by kind or by target pattern). */
|
|
112
|
+
declare function applyConfig(claims: Claim[], config: Config): Claim[];
|
|
113
|
+
/** How many verdicts count as a failure under the config's `failOn` policy. */
|
|
114
|
+
declare function failingCount(report: Report, config: Config): number;
|
|
115
|
+
|
|
87
116
|
/**
|
|
88
117
|
* Builds the ground-truth evidence for a turn from two sources:
|
|
89
118
|
* 1. The agent's own tool calls (precise, turn-scoped) — the primary signal.
|
|
90
119
|
* 2. The git working tree (corroborating, catches non-tool edits) — optional.
|
|
91
120
|
*/
|
|
92
|
-
declare function buildEvidence(toolUses: ToolUse[], cwd?: string): Evidence;
|
|
121
|
+
declare function buildEvidence(toolUses: ToolUse[], cwd?: string, base?: string): Evidence;
|
|
93
122
|
declare function emptyEvidence(): Evidence;
|
|
94
123
|
declare function mergeEvidence(target: Evidence, extra: Evidence): void;
|
|
95
124
|
|
|
@@ -101,7 +130,7 @@ declare function mergeEvidence(target: Evidence, extra: Evidence): void;
|
|
|
101
130
|
* If `cwd` is not a git repository (or git is unavailable) this returns empty
|
|
102
131
|
* evidence rather than throwing — the pipeline degrades gracefully.
|
|
103
132
|
*/
|
|
104
|
-
declare function collectGitEvidence(cwd: string): Evidence;
|
|
133
|
+
declare function collectGitEvidence(cwd: string, base?: string): Evidence;
|
|
105
134
|
|
|
106
135
|
/**
|
|
107
136
|
* Checks each claim against the evidence and assigns a verdict.
|
|
@@ -127,6 +156,10 @@ interface PipelineInput {
|
|
|
127
156
|
turn?: Turn;
|
|
128
157
|
/** Working directory used to collect corroborating git evidence. */
|
|
129
158
|
cwd?: string;
|
|
159
|
+
/** Base ref to diff against (PR mode: `base...HEAD`). Defaults to the working tree. */
|
|
160
|
+
base?: string;
|
|
161
|
+
/** Config (ignore rules etc.). If omitted, loaded from `cwd` when present. */
|
|
162
|
+
config?: Config;
|
|
130
163
|
}
|
|
131
164
|
/**
|
|
132
165
|
* The full groundtruth pipeline:
|
|
@@ -134,4 +167,77 @@ interface PipelineInput {
|
|
|
134
167
|
*/
|
|
135
168
|
declare function runPipeline(input: PipelineInput): Report;
|
|
136
169
|
|
|
137
|
-
|
|
170
|
+
/**
|
|
171
|
+
* OpenAI Codex CLI rollout transcripts: JSONL where each line is
|
|
172
|
+
* `{timestamp, type, payload}`. The `response_item` payloads carry assistant
|
|
173
|
+
* messages, `function_call`/`custom_tool_call` (incl. `apply_patch`), and
|
|
174
|
+
* `local_shell_call`. See `~/.codex/sessions/YYYY/MM/DD/rollout-*.jsonl`.
|
|
175
|
+
*/
|
|
176
|
+
declare function parseCodex(raw: string): Turn;
|
|
177
|
+
|
|
178
|
+
/**
|
|
179
|
+
* Gemini CLI chat transcripts. Current versions write JSONL (one MessageRecord
|
|
180
|
+
* per line); older versions write a single `{messages: [...]}` JSON object.
|
|
181
|
+
* `type:"gemini"` messages carry assistant text + a `toolCalls[]` array.
|
|
182
|
+
* See `~/.gemini/tmp/<project_hash>/chats/`.
|
|
183
|
+
*/
|
|
184
|
+
declare function parseGemini(raw: string): Turn;
|
|
185
|
+
|
|
186
|
+
/**
|
|
187
|
+
* Cursor agent transcripts (the newer `agent-transcripts/*.jsonl`, matching the
|
|
188
|
+
* `cursor-agent` stream-json format): `assistant` / `tool_call` / `result`
|
|
189
|
+
* lines. Tool inputs (path + content, command) are recorded; we don't need the
|
|
190
|
+
* cached outputs. See `~/.cursor/projects/<project>/agent-transcripts/`.
|
|
191
|
+
*/
|
|
192
|
+
declare function parseCursor(raw: string): Turn;
|
|
193
|
+
|
|
194
|
+
interface Adapter {
|
|
195
|
+
name: string;
|
|
196
|
+
/** Locate the most recent transcript for a project, or null. Best-effort. */
|
|
197
|
+
locate(cwd: string): string | null;
|
|
198
|
+
/** Parse a transcript file into a Turn. */
|
|
199
|
+
parse(path: string): Turn;
|
|
200
|
+
}
|
|
201
|
+
declare const ADAPTERS: Record<string, Adapter>;
|
|
202
|
+
declare const AGENT_NAMES: string[];
|
|
203
|
+
declare function getAdapter(name: string): Adapter | null;
|
|
204
|
+
/** Picks the adapter whose latest transcript is the most recently modified. */
|
|
205
|
+
declare function autoDetect(cwd: string): {
|
|
206
|
+
adapter: Adapter;
|
|
207
|
+
path: string;
|
|
208
|
+
} | null;
|
|
209
|
+
|
|
210
|
+
/**
|
|
211
|
+
* A privacy-safe local tally of verdict counts per turn. It stores ONLY counts,
|
|
212
|
+
* timestamps, and the project path — never code, claims, or prompts. Powers the
|
|
213
|
+
* `statusline` and `stats` commands.
|
|
214
|
+
*/
|
|
215
|
+
interface LedgerEntry {
|
|
216
|
+
/** ISO timestamp. */
|
|
217
|
+
t: string;
|
|
218
|
+
/** Project working directory. */
|
|
219
|
+
cwd: string;
|
|
220
|
+
/** Session id, when known. */
|
|
221
|
+
session?: string;
|
|
222
|
+
/** verified / unsupported / review counts. */
|
|
223
|
+
v: number;
|
|
224
|
+
u: number;
|
|
225
|
+
r: number;
|
|
226
|
+
}
|
|
227
|
+
interface LedgerSummary {
|
|
228
|
+
runs: number;
|
|
229
|
+
verified: number;
|
|
230
|
+
unsupported: number;
|
|
231
|
+
unverifiable: number;
|
|
232
|
+
}
|
|
233
|
+
declare function ledgerPath(): string;
|
|
234
|
+
/** Appends a turn's verdict counts. Best-effort — never throws into the hook. */
|
|
235
|
+
declare function recordRun(report: Report, cwd: string, session?: string): void;
|
|
236
|
+
declare function readLedger(): LedgerEntry[];
|
|
237
|
+
declare function summarize(entries: LedgerEntry[], opts?: {
|
|
238
|
+
cwd?: string;
|
|
239
|
+
sinceDays?: number;
|
|
240
|
+
session?: string;
|
|
241
|
+
}): LedgerSummary;
|
|
242
|
+
|
|
243
|
+
export { ADAPTERS, AGENT_NAMES, type Adapter, type Claim, type ClaimKind, type Config, type Evidence, type FailLevel, type LedgerEntry, type LedgerSummary, type PipelineInput, type Polarity, type Report, type ReportSummary, type ToolUse, type Turn, type Verdict, type VerdictLevel, applyConfig, autoDetect, buildEvidence, buildReport, collectGitEvidence, emptyEvidence, extractClaims, failingCount, getAdapter, ledgerPath, loadConfig, mergeEvidence, parseCodex, parseCursor, parseGemini, parseTranscript, parseTranscriptFile, readLedger, recordRun, renderJson, renderMarkdown, renderTerminal, runPipeline, summarize, verifyClaims };
|