@mmnto/totem 1.67.1 → 1.69.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/capability/falsification.d.ts +18 -0
- package/dist/capability/falsification.d.ts.map +1 -0
- package/dist/capability/falsification.js +56 -0
- package/dist/capability/falsification.js.map +1 -0
- package/dist/capability/falsification.test.d.ts +2 -0
- package/dist/capability/falsification.test.d.ts.map +1 -0
- package/dist/capability/falsification.test.js +112 -0
- package/dist/capability/falsification.test.js.map +1 -0
- package/dist/capability/regenerate.d.ts +20 -0
- package/dist/capability/regenerate.d.ts.map +1 -0
- package/dist/capability/regenerate.js +0 -0
- package/dist/capability/regenerate.js.map +1 -0
- package/dist/capability/regenerate.test.d.ts +2 -0
- package/dist/capability/regenerate.test.d.ts.map +1 -0
- package/dist/capability/regenerate.test.js +136 -0
- package/dist/capability/regenerate.test.js.map +1 -0
- package/dist/capability/review-catch.d.ts +53 -0
- package/dist/capability/review-catch.d.ts.map +1 -0
- package/dist/capability/review-catch.js +92 -0
- package/dist/capability/review-catch.js.map +1 -0
- package/dist/capability/review-catch.test.d.ts +2 -0
- package/dist/capability/review-catch.test.d.ts.map +1 -0
- package/dist/capability/review-catch.test.js +90 -0
- package/dist/capability/review-catch.test.js.map +1 -0
- package/dist/capability/schema.d.ts +244 -0
- package/dist/capability/schema.d.ts.map +1 -0
- package/dist/capability/schema.js +141 -0
- package/dist/capability/schema.js.map +1 -0
- package/dist/capability/schema.test.d.ts +2 -0
- package/dist/capability/schema.test.d.ts.map +1 -0
- package/dist/capability/schema.test.js +93 -0
- package/dist/capability/schema.test.js.map +1 -0
- package/dist/index.d.ts +20 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +10 -0
- package/dist/index.js.map +1 -1
- package/dist/spine/candidate-rule.d.ts +84 -0
- package/dist/spine/candidate-rule.d.ts.map +1 -0
- package/dist/spine/candidate-rule.js +68 -0
- package/dist/spine/candidate-rule.js.map +1 -0
- package/dist/spine/candidate-rule.test.d.ts +2 -0
- package/dist/spine/candidate-rule.test.d.ts.map +1 -0
- package/dist/spine/candidate-rule.test.js +40 -0
- package/dist/spine/candidate-rule.test.js.map +1 -0
- package/dist/spine/classify.d.ts +93 -0
- package/dist/spine/classify.d.ts.map +1 -0
- package/dist/spine/classify.js +190 -0
- package/dist/spine/classify.js.map +1 -0
- package/dist/spine/classify.test.d.ts +2 -0
- package/dist/spine/classify.test.d.ts.map +1 -0
- package/dist/spine/classify.test.js +302 -0
- package/dist/spine/classify.test.js.map +1 -0
- package/dist/spine/compile.d.ts +62 -0
- package/dist/spine/compile.d.ts.map +1 -0
- package/dist/spine/compile.js +204 -0
- package/dist/spine/compile.js.map +1 -0
- package/dist/spine/compile.test.d.ts +2 -0
- package/dist/spine/compile.test.d.ts.map +1 -0
- package/dist/spine/compile.test.js +327 -0
- package/dist/spine/compile.test.js.map +1 -0
- package/dist/spine/extract.d.ts +146 -0
- package/dist/spine/extract.d.ts.map +1 -0
- package/dist/spine/extract.js +227 -0
- package/dist/spine/extract.js.map +1 -0
- package/dist/spine/extract.test.d.ts +2 -0
- package/dist/spine/extract.test.d.ts.map +1 -0
- package/dist/spine/extract.test.js +382 -0
- package/dist/spine/extract.test.js.map +1 -0
- package/dist/spine/ledgers.d.ts +1102 -0
- package/dist/spine/ledgers.d.ts.map +1 -0
- package/dist/spine/ledgers.js +209 -0
- package/dist/spine/ledgers.js.map +1 -0
- package/dist/spine/miner-harness.d.ts +30 -0
- package/dist/spine/miner-harness.d.ts.map +1 -0
- package/dist/spine/miner-harness.js +214 -0
- package/dist/spine/miner-harness.js.map +1 -0
- package/dist/spine/miner-harness.test.d.ts +2 -0
- package/dist/spine/miner-harness.test.d.ts.map +1 -0
- package/dist/spine/miner-harness.test.js +231 -0
- package/dist/spine/miner-harness.test.js.map +1 -0
- package/dist/spine/split.d.ts +149 -0
- package/dist/spine/split.d.ts.map +1 -0
- package/dist/spine/split.js +235 -0
- package/dist/spine/split.js.map +1 -0
- package/dist/spine/split.test.d.ts +2 -0
- package/dist/spine/split.test.d.ts.map +1 -0
- package/dist/spine/split.test.js +142 -0
- package/dist/spine/split.test.js.map +1 -0
- package/dist/spine/windtunnel-lock.d.ts +8 -8
- package/package.json +1 -1
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
// ─── ADR-111 Stage-1 Extract (slice 2; slice 5a resolution gate): review-thread → draft DSL ──
|
|
2
|
+
//
|
|
3
|
+
// The miner's deterministic Extract stage. It iterates the frozen split's TRAIN
|
|
4
|
+
// slice ONLY, fetches each PR's review thread through an injected port, applies
|
|
5
|
+
// the resolution-eligibility gate (slice 5a: drop resolved/outdated threads,
|
|
6
|
+
// mmnto-ai/totem#2201), runs a completeness check (≥1 HUMAN review comment on
|
|
7
|
+
// the surviving threads), drafts zero-or-more lesson-markdown DSL bodies through
|
|
8
|
+
// an injected `DraftExtractor` port, and either carries a transient
|
|
9
|
+
// `DraftCandidate` or loud-drops to the drop ledger with a reason code. It writes
|
|
10
|
+
// the drop + API-usage ledgers and the in-run seed-blindness fact.
|
|
11
|
+
//
|
|
12
|
+
// ZERO real LLM lives here: the `DraftExtractor` is a port, mocked in tests (the
|
|
13
|
+
// #2188 mock-first discipline); the live LLM adapter rides a later slice. Core
|
|
14
|
+
// stays network-free + LLM-free + deterministic — IO (GitHub fetch, the LLM
|
|
15
|
+
// call) is the CLI layer's, injected as ports (the `Stage4VerifierDeps` DI
|
|
16
|
+
// pattern).
|
|
17
|
+
//
|
|
18
|
+
// ADR-111 boundaries this module enforces:
|
|
19
|
+
// §1 unverified-only — Extract mints nothing; `DraftCandidate` is a
|
|
20
|
+
// transient stage-internal value, never the §3
|
|
21
|
+
// `CandidateRuleRecord` (minted in slice 3).
|
|
22
|
+
// §6 fail-loud, no degrade — every content/provenance/draft failure is a LOUD
|
|
23
|
+
// drop-ledger entry, never a thinner extraction.
|
|
24
|
+
// §6 train-only fetch — held-out / control / excluded PRs are NEVER
|
|
25
|
+
// fetched (FM h); `heldOutFetchCount` is recomputed
|
|
26
|
+
// from the frozen split, not trusted.
|
|
27
|
+
// §7 seed-blindness — the extractor is never handed a seed class (FM f);
|
|
28
|
+
// the fact is carried here, serialized into the
|
|
29
|
+
// emission ledger in slice 3 (single home, Tenet 20).
|
|
30
|
+
// FM(i) (slice-2 half) — every `trainPr` has draftCount + dropCount >= 1;
|
|
31
|
+
// none silently skipped.
|
|
32
|
+
//
|
|
33
|
+
// lesson-markdown is the DSL *syntax* (ADR-058 Pipeline 1/3 target), NOT a
|
|
34
|
+
// Pipeline-1 trust class: every draft body is `unverified` and Stage-4-gated by
|
|
35
|
+
// the slice-4 compiler, never a manual-rule trust bypass.
|
|
36
|
+
import { ProvenanceRecordSchema } from '../compiler-schema.js';
|
|
37
|
+
import { TotemParseError } from '../errors.js';
|
|
38
|
+
import { extractManualPattern } from '../lesson-pattern.js';
|
|
39
|
+
import { isBotIdentity } from './selection-rule.js';
|
|
40
|
+
// ── Helpers ──────────────────────────────────────────────────────────────────
|
|
41
|
+
/**
|
|
42
|
+
* Count HUMAN review comments (fold 5): bot comments (CodeRabbit / Greptile /
|
|
43
|
+
* Renovate / dependabot, via the shared `isBotIdentity`) and empty/whitespace
|
|
44
|
+
* bodies do NOT count toward §6's "≥1 review comment" threshold — a bot-only or
|
|
45
|
+
* empty thread is content-thin and must take the loud-drop path, never seed a
|
|
46
|
+
* hallucinated draft.
|
|
47
|
+
*/
|
|
48
|
+
function humanCommentCount(threads) {
|
|
49
|
+
let count = 0;
|
|
50
|
+
for (const thread of threads) {
|
|
51
|
+
for (const comment of thread.comments) {
|
|
52
|
+
if (comment.body.trim().length > 0 && !isBotIdentity(comment.author))
|
|
53
|
+
count++;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
return count;
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* The resolution-eligibility gate (slice 5a, mmnto-ai/totem#2201). A thread is
|
|
60
|
+
* INELIGIBLE if the author resolved it OR its diff hunk went outdated — either
|
|
61
|
+
* marks it as superseded review discussion, contamination the miner must not
|
|
62
|
+
* draft from. The adapter SURFACES `isResolved`/`isOutdated` (it never
|
|
63
|
+
* pre-filters); core decides here so the rejection is ledgered (§8). Returns the
|
|
64
|
+
* eligible (surviving) threads only.
|
|
65
|
+
*/
|
|
66
|
+
function eligibleThreads(threads) {
|
|
67
|
+
return threads.filter((t) => !t.isResolved && !t.isOutdated);
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Syntactic preflight (fold 4): a draft is a usable lesson-markdown DSL body iff
|
|
71
|
+
* `extractManualPattern` yields a manual pattern (a flat `**Pattern:**` or a
|
|
72
|
+
* compound yaml rule). Empty/whitespace, non-empty-but-no-usable-pattern, and an
|
|
73
|
+
* authoring-error throw (yaml fence + non-`ast-grep` engine) all fail → the
|
|
74
|
+
* draft is dropped `unparseable`, never carried as a "successful" candidate
|
|
75
|
+
* merely for being non-empty.
|
|
76
|
+
*/
|
|
77
|
+
function isUsableDsl(dslSource) {
|
|
78
|
+
if (dslSource.trim().length === 0)
|
|
79
|
+
return false;
|
|
80
|
+
try {
|
|
81
|
+
return extractManualPattern(dslSource) !== null;
|
|
82
|
+
}
|
|
83
|
+
catch (err) {
|
|
84
|
+
// A TotemParseError is the EXPECTED authoring-error signal (e.g. a yaml fence
|
|
85
|
+
// under a non-ast-grep engine) → the draft is simply not usable DSL. Any OTHER
|
|
86
|
+
// error is an unexpected parser bug and must fail loud (Tenet 4).
|
|
87
|
+
if (err instanceof TotemParseError)
|
|
88
|
+
return false;
|
|
89
|
+
throw err;
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
/**
|
|
93
|
+
* Build the candidate's provenance tuple, or report why it is incomplete. `pr`
|
|
94
|
+
* and the review-thread ref are always available (we iterate the train slice and
|
|
95
|
+
* synthesize a canonical per-PR thread ref); the realistic failure is a missing
|
|
96
|
+
* or malformed merge-commit SHA, validated against `ProvenanceRecordSchema`
|
|
97
|
+
* (lowercase 40-hex). A candidate that cannot produce a complete tuple is
|
|
98
|
+
* dropped `incomplete-provenance`, never emitted partial (FM a / Tenet 4).
|
|
99
|
+
*/
|
|
100
|
+
function buildProvenance(pr, content) {
|
|
101
|
+
const parsed = ProvenanceRecordSchema.safeParse({
|
|
102
|
+
mergedPr: pr,
|
|
103
|
+
reviewThread: `pulls/${pr}/comments`,
|
|
104
|
+
commitSha: content.mergeCommitSha,
|
|
105
|
+
});
|
|
106
|
+
if (!parsed.success) {
|
|
107
|
+
return {
|
|
108
|
+
ok: false,
|
|
109
|
+
reason: parsed.error.issues.map((issue) => issue.message).join('; '),
|
|
110
|
+
};
|
|
111
|
+
}
|
|
112
|
+
return { ok: true, value: parsed.data };
|
|
113
|
+
}
|
|
114
|
+
// ── The Extract stage ─────────────────────────────────────────────────────────
|
|
115
|
+
/**
|
|
116
|
+
* Run the deterministic Stage-1 Extract over a frozen split. Deterministic given
|
|
117
|
+
* its deps: identical `split` + deps → identical drafts, drops, and ledgers (the
|
|
118
|
+
* train slice is awaited sequentially, so ordering is stable). The
|
|
119
|
+
* live LLM and GitHub IO are injected ports, so this orchestration is fully
|
|
120
|
+
* CI-locked with a fixture extractor + a strict-spy fetch source.
|
|
121
|
+
*
|
|
122
|
+
* Per train PR (and ONLY train PRs): log the fetch → fetch → on unreachable /
|
|
123
|
+
* unparseable-at-source, loud-drop → resolution-eligibility gate (slice 5a: drop
|
|
124
|
+
* `resolved-rejected` when the resolution gate empties an otherwise-human thread,
|
|
125
|
+
* else `truncated` when thin to begin with) → completeness-check (≥1 human
|
|
126
|
+
* comment on the survivors) → build provenance → draft zero-or-more bodies from
|
|
127
|
+
* the SURVIVING threads only → preflight each → carry a `DraftCandidate` or
|
|
128
|
+
* loud-drop. Every train PR ends with at least one draft or one drop (FM i,
|
|
129
|
+
* slice-2 half).
|
|
130
|
+
*/
|
|
131
|
+
export async function runExtractStage(split, deps) {
|
|
132
|
+
const trainSet = new Set(split.trainPrs);
|
|
133
|
+
const drafts = [];
|
|
134
|
+
const dropEntries = [];
|
|
135
|
+
const apiEntries = [];
|
|
136
|
+
const drop = (sourcePr, reasonCode, detail) => {
|
|
137
|
+
dropEntries.push({ sourcePr, reasonCode, detail });
|
|
138
|
+
};
|
|
139
|
+
// Iterate the TRAIN slice ONLY — held-out / control / excluded PRs are never
|
|
140
|
+
// fetched (§6 / FM h). Deterministic ascending order.
|
|
141
|
+
const trainPrs = [...trainSet].sort((a, b) => a - b);
|
|
142
|
+
for (const pr of trainPrs) {
|
|
143
|
+
// Every attempted content fetch is logged as a train-slice fetch (the audit
|
|
144
|
+
// surface FM h reads). We only ever target train PRs, so this is always
|
|
145
|
+
// `slice: 'train'`.
|
|
146
|
+
apiEntries.push({ targetPr: pr, slice: 'train', fetchKind: 'review-thread' });
|
|
147
|
+
const result = await deps.source.fetch(pr);
|
|
148
|
+
if (result.kind === 'unreachable') {
|
|
149
|
+
drop(pr, 'unreachable', result.detail ?? `review thread unreachable for train PR #${pr}`);
|
|
150
|
+
continue;
|
|
151
|
+
}
|
|
152
|
+
if (result.kind === 'unparseable') {
|
|
153
|
+
drop(pr, 'unparseable', result.detail ?? `review thread unparseable for train PR #${pr}`);
|
|
154
|
+
continue;
|
|
155
|
+
}
|
|
156
|
+
const content = result.content;
|
|
157
|
+
// Content-identity guard: the fetched content MUST be for the requested train
|
|
158
|
+
// PR. A source adapter that returns mismatched content would otherwise mint a
|
|
159
|
+
// draft attributed to the wrong PR — a provenance-integrity failure → loud drop.
|
|
160
|
+
if (content.pr !== pr) {
|
|
161
|
+
drop(pr, 'incomplete-provenance', `fetched content PR #${content.pr} does not match requested train PR #${pr}`);
|
|
162
|
+
continue;
|
|
163
|
+
}
|
|
164
|
+
// Resolution-eligibility gate (slice 5a, mmnto-ai/totem#2201) — BEFORE the
|
|
165
|
+
// completeness check. The adapter surfaced per-thread `isResolved`/`isOutdated`
|
|
166
|
+
// (it never pre-filters); core decides + ledgers here so every resolution
|
|
167
|
+
// rejection is auditable (§8). Filter to eligible (non-resolved, non-outdated)
|
|
168
|
+
// threads and recount human comments on the SURVIVORS only.
|
|
169
|
+
const preFilterHumanCount = humanCommentCount(content.threads);
|
|
170
|
+
const survivingThreads = eligibleThreads(content.threads);
|
|
171
|
+
const survivorHumanCount = humanCommentCount(survivingThreads);
|
|
172
|
+
if (survivorHumanCount < 1) {
|
|
173
|
+
if (preFilterHumanCount >= 1) {
|
|
174
|
+
// The thread carried human content, but the resolution gate is what
|
|
175
|
+
// emptied it → `resolved-rejected` (an eligibility rejection, not thin
|
|
176
|
+
// content). Carry the concrete resolution evidence in the detail.
|
|
177
|
+
const ineligible = content.threads.length - survivingThreads.length;
|
|
178
|
+
drop(pr, 'resolved-rejected', `${ineligible} of ${content.threads.length} threads resolved/outdated; ${survivorHumanCount} eligible human comments remain`);
|
|
179
|
+
}
|
|
180
|
+
else {
|
|
181
|
+
// Thin to begin with (0 human comments BEFORE the resolution gate) — the
|
|
182
|
+
// existing `truncated` path, NOT a resolution rejection.
|
|
183
|
+
drop(pr, 'truncated', 'no non-empty human review comment after bot filtering');
|
|
184
|
+
}
|
|
185
|
+
continue;
|
|
186
|
+
}
|
|
187
|
+
// Provenance must be complete or the PR is dropped, never partial (FM a).
|
|
188
|
+
const provenance = buildProvenance(pr, content);
|
|
189
|
+
if (!provenance.ok) {
|
|
190
|
+
drop(pr, 'incomplete-provenance', provenance.reason);
|
|
191
|
+
continue;
|
|
192
|
+
}
|
|
193
|
+
// Draft from the SURVIVING (eligible) threads ONLY — resolved/outdated threads
|
|
194
|
+
// are excluded from the extractor's input so no draft can be seeded from
|
|
195
|
+
// superseded review discussion (the `content.pr`/`mergeCommitSha` provenance
|
|
196
|
+
// is preserved). Zero-or-more DSL bodies (fold 1, list-shaped). Per the port's
|
|
197
|
+
// error contract the extractor returns [] on a per-PR failure (the CLI adapter
|
|
198
|
+
// catches its own LLM/network errors) — so the core needs no swallowing catch
|
|
199
|
+
// (Tenet 4). An empty list is a loud drop below, not a silent skip.
|
|
200
|
+
const eligibleContent = { ...content, threads: survivingThreads };
|
|
201
|
+
const draftBodies = await deps.extractor.draft(eligibleContent);
|
|
202
|
+
if (draftBodies.length === 0) {
|
|
203
|
+
// A complete thread that yields no draft is a loud drop (keeps the train PR
|
|
204
|
+
// creditable under FM i), not a silent skip.
|
|
205
|
+
drop(pr, 'unparseable', 'extractor produced no draft from a complete thread');
|
|
206
|
+
continue;
|
|
207
|
+
}
|
|
208
|
+
for (const body of draftBodies) {
|
|
209
|
+
if (!isUsableDsl(body)) {
|
|
210
|
+
drop(pr, 'unparseable', 'draft is empty or carries no usable **Pattern:**/yaml DSL');
|
|
211
|
+
continue;
|
|
212
|
+
}
|
|
213
|
+
drafts.push({ provenance: provenance.value, dslSource: body });
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
// Recompute the held-out-fetch count from the frozen split rather than trust a
|
|
217
|
+
// self-declared label (fold 6): any logged fetch whose target is not in the
|
|
218
|
+
// train slice is a violation. 0 by construction here.
|
|
219
|
+
const heldOutFetchCount = apiEntries.filter((entry) => !trainSet.has(entry.targetPr)).length;
|
|
220
|
+
return {
|
|
221
|
+
drafts,
|
|
222
|
+
dropLedger: { entries: dropEntries },
|
|
223
|
+
apiUsageLedger: { entries: apiEntries, heldOutFetchCount },
|
|
224
|
+
seedBlindness: { seedClassesProvided: deps.seedClassesProvided },
|
|
225
|
+
};
|
|
226
|
+
}
|
|
227
|
+
//# sourceMappingURL=extract.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"extract.js","sourceRoot":"","sources":["../../src/spine/extract.ts"],"names":[],"mappings":"AAAA,gGAAgG;AAChG,EAAE;AACF,gFAAgF;AAChF,gFAAgF;AAChF,6EAA6E;AAC7E,8EAA8E;AAC9E,iFAAiF;AACjF,oEAAoE;AACpE,kFAAkF;AAClF,mEAAmE;AACnE,EAAE;AACF,iFAAiF;AACjF,+EAA+E;AAC/E,4EAA4E;AAC5E,2EAA2E;AAC3E,YAAY;AACZ,EAAE;AACF,2CAA2C;AAC3C,4EAA4E;AAC5E,4EAA4E;AAC5E,0EAA0E;AAC1E,iFAAiF;AACjF,8EAA8E;AAC9E,4EAA4E;AAC5E,iFAAiF;AACjF,mEAAmE;AACnE,mFAAmF;AACnF,6EAA6E;AAC7E,mFAAmF;AACnF,iFAAiF;AACjF,sDAAsD;AACtD,EAAE;AACF,2EAA2E;AAC3E,gFAAgF;AAChF,0DAA0D;AAE1D,OAAO,EAAyB,sBAAsB,EAAE,MAAM,uBAAuB,CAAC;AACtF,OAAO,EAAE,eAAe,EAAE,MAAM,cAAc,CAAC;AAC/C,OAAO,EAAE,oBAAoB,EAAE,MAAM,sBAAsB,CAAC;AAQ5D,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAuIpD,gFAAgF;AAEhF;;;;;;GAMG;AACH,SAAS,iBAAiB,CAAC,OAAgC;IACzD,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,KAAK,MAAM,OAAO,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;YACtC,IAAI,OAAO,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,aAAa,CAAC,OAAO,CAAC,MAAM,CAAC;gBAAE,KAAK,EAAE,CAAC;QAChF,CAAC;IACH,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;;;;;;GAOG;AACH,SAAS,eAAe,CAAC,OAAgC;IACvD,OAAO,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,UAAU,IAAI,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC;AAC/D,CAAC;AAED;;;;;;;GAOG;AACH,SAAS,WAAW,CAAC,SAAiB;IACpC,IAAI,SAAS,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,KAAK,CAAC;IAChD,IAAI,CAAC;QACH,OAAO,oBAAoB,CAAC,SAAS,CAAC,KAAK,IAAI,CAAC;IAClD,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,8EAA8E;QAC9E,+EAA+E;QAC/E,kEAAkE;QAClE,IAAI,GAAG,YAAY,eAAe;YAAE,OAAO,KAAK,CAAC;QACjD,MAAM,GAAG,CAAC;IACZ,CAAC;AACH,CAAC;AAED;;;;;;;GAOG;AACH,SAAS,eAAe,CACtB,EAAU,EACV,OAA4B;IAE5B,MAAM,MAAM,GAAG,sBAAsB,CAAC,SAAS,CAAC;QAC9C,QAAQ,EAAE,EAAE;QACZ,YAAY,EAAE,SAAS,EAAE,WAAW;QACpC,SAAS,EAAE,OAAO,CAAC,cAAc;KAClC,CAAC,CAAC;IACH,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;QACpB,OAAO;YACL,EAAE,EAAE,KAAK;YACT,MAAM,EAAE,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC;SACrE,CAAC;IACJ,CAAC;IACD,OAAO,EAAE,EAAE,EAAE,IAAI,EAAE,KAAK,EAAE,MAAM,CAAC,IAAI,EAAE,CAAC;AAC1C,CAAC;AAED,iFAAiF;AAEjF;;;;;;;;;;;;;;;GAeG;AACH,MAAM,CAAC,KAAK,UAAU,eAAe,CACnC,KAAoB,EACpB,IAAsB;IAEtB,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;IACzC,MAAM,MAAM,GAAqB,EAAE,CAAC;IACpC,MAAM,WAAW,GAAsB,EAAE,CAAC;IAC1C,MAAM,UAAU,GAA0B,EAAE,CAAC;IAE7C,MAAM,IAAI,GAAG,CAAC,QAAgB,EAAE,UAA0B,EAAE,MAAc,EAAQ,EAAE;QAClF,WAAW,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,UAAU,EAAE,MAAM,EAAE,CAAC,CAAC;IACrD,CAAC,CAAC;IAEF,6EAA6E;IAC7E,sDAAsD;IACtD,MAAM,QAAQ,GAAG,CAAC,GAAG,QAAQ,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAErD,KAAK,MAAM,EAAE,IAAI,QAAQ,EAAE,CAAC;QAC1B,4EAA4E;QAC5E,wEAAwE;QACxE,oBAAoB;QACpB,UAAU,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,EAAE,EAAE,KAAK,EAAE,OAAO,EAAE,SAAS,EAAE,eAAe,EAAE,CAAC,CAAC;QAE9E,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;QAC3C,IAAI,MAAM,CAAC,IAAI,KAAK,aAAa,EAAE,CAAC;YAClC,IAAI,CAAC,EAAE,EAAE,aAAa,EAAE,MAAM,CAAC,MAAM,IAAI,2CAA2C,EAAE,EAAE,CAAC,CAAC;YAC1F,SAAS;QACX,CAAC;QACD,IAAI,MAAM,CAAC,IAAI,KAAK,aAAa,EAAE,CAAC;YAClC,IAAI,CAAC,EAAE,EAAE,aAAa,EAAE,MAAM,CAAC,MAAM,IAAI,2CAA2C,EAAE,EAAE,CAAC,CAAC;YAC1F,SAAS;QACX,CAAC;QACD,MAAM,OAAO,GAAG,MAAM,CAAC,OAAO,CAAC;QAE/B,8EAA8E;QAC9E,8EAA8E;QAC9E,iFAAiF;QACjF,IAAI,OAAO,CAAC,EAAE,KAAK,EAAE,EAAE,CAAC;YACtB,IAAI,CACF,EAAE,EACF,uBAAuB,EACvB,uBAAuB,OAAO,CAAC,EAAE,uCAAuC,EAAE,EAAE,CAC7E,CAAC;YACF,SAAS;QACX,CAAC;QAED,2EAA2E;QAC3E,gFAAgF;QAChF,0EAA0E;QAC1E,+EAA+E;QAC/E,4DAA4D;QAC5D,MAAM,mBAAmB,GAAG,iBAAiB,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;QAC/D,MAAM,gBAAgB,GAAG,eAAe,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;QAC1D,MAAM,kBAAkB,GAAG,iBAAiB,CAAC,gBAAgB,CAAC,CAAC;QAE/D,IAAI,kBAAkB,GAAG,CAAC,EAAE,CAAC;YAC3B,IAAI,mBAAmB,IAAI,CAAC,EAAE,CAAC;gBAC7B,oEAAoE;gBACpE,uEAAuE;gBACvE,kEAAkE;gBAClE,MAAM,UAAU,GAAG,OAAO,CAAC,OAAO,CAAC,MAAM,GAAG,gBAAgB,CAAC,MAAM,CAAC;gBACpE,IAAI,CACF,EAAE,EACF,mBAAmB,EACnB,GAAG,UAAU,OAAO,OAAO,CAAC,OAAO,CAAC,MAAM,+BAA+B,kBAAkB,iCAAiC,CAC7H,CAAC;YACJ,CAAC;iBAAM,CAAC;gBACN,yEAAyE;gBACzE,yDAAyD;gBACzD,IAAI,CAAC,EAAE,EAAE,WAAW,EAAE,uDAAuD,CAAC,CAAC;YACjF,CAAC;YACD,SAAS;QACX,CAAC;QAED,0EAA0E;QAC1E,MAAM,UAAU,GAAG,eAAe,CAAC,EAAE,EAAE,OAAO,CAAC,CAAC;QAChD,IAAI,CAAC,UAAU,CAAC,EAAE,EAAE,CAAC;YACnB,IAAI,CAAC,EAAE,EAAE,uBAAuB,EAAE,UAAU,CAAC,MAAM,CAAC,CAAC;YACrD,SAAS;QACX,CAAC;QAED,+EAA+E;QAC/E,yEAAyE;QACzE,6EAA6E;QAC7E,+EAA+E;QAC/E,+EAA+E;QAC/E,8EAA8E;QAC9E,oEAAoE;QACpE,MAAM,eAAe,GAAwB,EAAE,GAAG,OAAO,EAAE,OAAO,EAAE,gBAAgB,EAAE,CAAC;QACvF,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,eAAe,CAAC,CAAC;QAEhE,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC7B,4EAA4E;YAC5E,6CAA6C;YAC7C,IAAI,CAAC,EAAE,EAAE,aAAa,EAAE,oDAAoD,CAAC,CAAC;YAC9E,SAAS;QACX,CAAC;QAED,KAAK,MAAM,IAAI,IAAI,WAAW,EAAE,CAAC;YAC/B,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,EAAE,CAAC;gBACvB,IAAI,CAAC,EAAE,EAAE,aAAa,EAAE,2DAA2D,CAAC,CAAC;gBACrF,SAAS;YACX,CAAC;YACD,MAAM,CAAC,IAAI,CAAC,EAAE,UAAU,EAAE,UAAU,CAAC,KAAK,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QACjE,CAAC;IACH,CAAC;IAED,+EAA+E;IAC/E,4EAA4E;IAC5E,sDAAsD;IACtD,MAAM,iBAAiB,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC;IAE7F,OAAO;QACL,MAAM;QACN,UAAU,EAAE,EAAE,OAAO,EAAE,WAAW,EAAE;QACpC,cAAc,EAAE,EAAE,OAAO,EAAE,UAAU,EAAE,iBAAiB,EAAE;QAC1D,aAAa,EAAE,EAAE,mBAAmB,EAAE,IAAI,CAAC,mBAAmB,EAAE;KACjE,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"extract.test.d.ts","sourceRoot":"","sources":["../../src/spine/extract.test.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,382 @@
|
|
|
1
|
+
import { describe, expect, it } from 'vitest';
|
|
2
|
+
import { runExtractStage, } from './extract.js';
|
|
3
|
+
import { SplitArtifactSchema } from './split.js';
|
|
4
|
+
// ─── Helpers ──────────────────────────────────────────────────────────────
|
|
5
|
+
const sha = (n) => String(n).padStart(40, '0');
|
|
6
|
+
function split(overrides) {
|
|
7
|
+
return SplitArtifactSchema.parse({
|
|
8
|
+
asOfCommit: sha(100),
|
|
9
|
+
trainPrs: [1, 2],
|
|
10
|
+
heldOutPrs: [3, 4],
|
|
11
|
+
excludedPrs: [],
|
|
12
|
+
positiveControlPrs: [3],
|
|
13
|
+
negativeControlPrs: [4],
|
|
14
|
+
splitRule: { predicate: 'code-touching non-bot', cutIndex: 2 },
|
|
15
|
+
...overrides,
|
|
16
|
+
});
|
|
17
|
+
}
|
|
18
|
+
/** A usable lesson-markdown body (flat `**Pattern:**`) — passes the preflight. */
|
|
19
|
+
const USABLE_DSL = '**Pattern:** foo';
|
|
20
|
+
/** Non-empty, but no usable `**Pattern:**` — fails the preflight → `unparseable`. */
|
|
21
|
+
const NO_PATTERN_DSL = 'This is just prose with no pattern field.';
|
|
22
|
+
/**
|
|
23
|
+
* Non-empty, but makes `extractManualPattern` THROW a TotemParseError (a yaml
|
|
24
|
+
* `**Pattern:**` fence under a non-`ast-grep` engine). The preflight's catch
|
|
25
|
+
* converts that to a drop, never a propagated throw.
|
|
26
|
+
*/
|
|
27
|
+
const PARSER_THROW_DSL = [
|
|
28
|
+
'**Pattern:**',
|
|
29
|
+
'```yaml',
|
|
30
|
+
'rule:',
|
|
31
|
+
' pattern: foo',
|
|
32
|
+
'```',
|
|
33
|
+
'**Engine:** regex',
|
|
34
|
+
].join('\n');
|
|
35
|
+
function content(pr, overrides) {
|
|
36
|
+
return {
|
|
37
|
+
pr,
|
|
38
|
+
mergeCommitSha: sha(pr),
|
|
39
|
+
threads: [
|
|
40
|
+
{
|
|
41
|
+
path: 'packages/core/src/x.ts',
|
|
42
|
+
comments: [{ author: 'Jane Doe', body: 'a real review note' }],
|
|
43
|
+
isResolved: false,
|
|
44
|
+
isOutdated: false,
|
|
45
|
+
},
|
|
46
|
+
],
|
|
47
|
+
...overrides,
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
/** A review thread with explicit resolution flags (default: eligible). */
|
|
51
|
+
function thread(author, body, flags) {
|
|
52
|
+
return {
|
|
53
|
+
path: 'packages/core/src/x.ts',
|
|
54
|
+
comments: [{ author, body }],
|
|
55
|
+
isResolved: flags?.isResolved ?? false,
|
|
56
|
+
isOutdated: flags?.isOutdated ?? false,
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Strict-spy fetch source (fold 6): throws if asked for a non-train PR, records
|
|
61
|
+
* what it fetched, and serves a per-PR `FetchResult` (default: ok with a
|
|
62
|
+
* standard thread). Async — mirrors the network-IO port shape.
|
|
63
|
+
*/
|
|
64
|
+
function spySource(trainPrs, results) {
|
|
65
|
+
const trainSet = new Set(trainPrs);
|
|
66
|
+
const fetched = [];
|
|
67
|
+
return {
|
|
68
|
+
fetched,
|
|
69
|
+
async fetch(pr) {
|
|
70
|
+
if (!trainSet.has(pr)) {
|
|
71
|
+
throw new Error(`[Totem Error] Extractor violated train boundary: fetched non-train PR ${pr}`);
|
|
72
|
+
}
|
|
73
|
+
fetched.push(pr);
|
|
74
|
+
return results?.get(pr) ?? { kind: 'ok', content: content(pr) };
|
|
75
|
+
},
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
/** Fixture extractor: per-PR draft bodies from a map (default: one usable body). Async. */
|
|
79
|
+
function fixtureExtractor(byPr) {
|
|
80
|
+
return {
|
|
81
|
+
async draft(c) {
|
|
82
|
+
return byPr?.get(c.pr) ?? [USABLE_DSL];
|
|
83
|
+
},
|
|
84
|
+
};
|
|
85
|
+
}
|
|
86
|
+
const deps = (source, extractor, seedClassesProvided = false) => ({
|
|
87
|
+
source,
|
|
88
|
+
extractor,
|
|
89
|
+
seedClassesProvided,
|
|
90
|
+
});
|
|
91
|
+
const coveredPrs = (r) => new Set([
|
|
92
|
+
...r.drafts.map((d) => d.provenance.mergedPr),
|
|
93
|
+
...r.dropLedger.entries.map((e) => e.sourcePr),
|
|
94
|
+
]);
|
|
95
|
+
const dropsFor = (r, pr) => r.dropLedger.entries.filter((e) => e.sourcePr === pr);
|
|
96
|
+
// A single-train-PR split (PR 1), the rest held-out — for per-drop-code fixtures.
|
|
97
|
+
const solo = () => split({ trainPrs: [1], heldOutPrs: [2, 3, 4], positiveControlPrs: [3], negativeControlPrs: [4] });
|
|
98
|
+
// ─── Happy path ─────────────────────────────────────────────────────────────
|
|
99
|
+
describe('runExtractStage — happy path', () => {
|
|
100
|
+
it('emits one draft per train PR with complete provenance and clean ledgers', async () => {
|
|
101
|
+
const r = await runExtractStage(split(), deps(spySource([1, 2]), fixtureExtractor()));
|
|
102
|
+
expect(r.drafts).toHaveLength(2);
|
|
103
|
+
expect(r.drafts.map((d) => d.provenance.mergedPr).sort()).toEqual([1, 2]);
|
|
104
|
+
expect(r.drafts[0].provenance).toEqual({
|
|
105
|
+
mergedPr: 1,
|
|
106
|
+
reviewThread: 'pulls/1/comments',
|
|
107
|
+
commitSha: sha(1),
|
|
108
|
+
});
|
|
109
|
+
expect(r.drafts[0].dslSource).toBe(USABLE_DSL);
|
|
110
|
+
expect(r.dropLedger.entries).toEqual([]);
|
|
111
|
+
expect(r.apiUsageLedger.entries).toHaveLength(2);
|
|
112
|
+
expect(r.apiUsageLedger.entries.every((e) => e.slice === 'train')).toBe(true);
|
|
113
|
+
expect(r.apiUsageLedger.heldOutFetchCount).toBe(0);
|
|
114
|
+
});
|
|
115
|
+
it('iterates the train slice in deterministic ascending order', async () => {
|
|
116
|
+
const source = spySource([2, 1]); // train listed out of order
|
|
117
|
+
await runExtractStage(split({ trainPrs: [2, 1], heldOutPrs: [3, 4] }), deps(source, fixtureExtractor()));
|
|
118
|
+
expect(source.fetched).toEqual([1, 2]);
|
|
119
|
+
});
|
|
120
|
+
});
|
|
121
|
+
// ─── Drop reason codes (one red fixture per code) ────────────────────────────
|
|
122
|
+
describe('runExtractStage — drop reason codes', () => {
|
|
123
|
+
it('unreachable: source reports the thread never fetched', async () => {
|
|
124
|
+
const r = await runExtractStage(solo(), deps(spySource([1], new Map([[1, { kind: 'unreachable' }]])), fixtureExtractor()));
|
|
125
|
+
expect(dropsFor(r, 1)).toEqual([
|
|
126
|
+
expect.objectContaining({ sourcePr: 1, reasonCode: 'unreachable' }),
|
|
127
|
+
]);
|
|
128
|
+
expect(r.drafts).toEqual([]);
|
|
129
|
+
});
|
|
130
|
+
it('unparseable (at source): a fetched-but-unparseable thread', async () => {
|
|
131
|
+
const r = await runExtractStage(solo(), deps(spySource([1], new Map([[1, { kind: 'unparseable' }]])), fixtureExtractor()));
|
|
132
|
+
expect(dropsFor(r, 1)[0].reasonCode).toBe('unparseable');
|
|
133
|
+
});
|
|
134
|
+
it('truncated: an empty thread (no comments)', async () => {
|
|
135
|
+
const r = await runExtractStage(solo(), deps(spySource([1], new Map([[1, { kind: 'ok', content: content(1, { threads: [] }) }]])), fixtureExtractor()));
|
|
136
|
+
expect(dropsFor(r, 1)[0].reasonCode).toBe('truncated');
|
|
137
|
+
});
|
|
138
|
+
it('truncated: a bot-only thread does not satisfy ≥1 human comment (fold 5)', async () => {
|
|
139
|
+
const botThread = content(1, {
|
|
140
|
+
threads: [
|
|
141
|
+
{
|
|
142
|
+
path: 'x.ts',
|
|
143
|
+
comments: [{ author: 'coderabbitai[bot]', body: 'nit: rename this' }],
|
|
144
|
+
isResolved: false,
|
|
145
|
+
isOutdated: false,
|
|
146
|
+
},
|
|
147
|
+
],
|
|
148
|
+
});
|
|
149
|
+
const r = await runExtractStage(solo(), deps(spySource([1], new Map([[1, { kind: 'ok', content: botThread }]])), fixtureExtractor()));
|
|
150
|
+
expect(dropsFor(r, 1)[0].reasonCode).toBe('truncated');
|
|
151
|
+
});
|
|
152
|
+
it('truncated: a whitespace-only human comment counts as no comment', async () => {
|
|
153
|
+
const wsThread = content(1, {
|
|
154
|
+
threads: [
|
|
155
|
+
{
|
|
156
|
+
path: 'x.ts',
|
|
157
|
+
comments: [{ author: 'Jane Doe', body: ' ' }],
|
|
158
|
+
isResolved: false,
|
|
159
|
+
isOutdated: false,
|
|
160
|
+
},
|
|
161
|
+
],
|
|
162
|
+
});
|
|
163
|
+
const r = await runExtractStage(solo(), deps(spySource([1], new Map([[1, { kind: 'ok', content: wsThread }]])), fixtureExtractor()));
|
|
164
|
+
expect(dropsFor(r, 1)[0].reasonCode).toBe('truncated');
|
|
165
|
+
});
|
|
166
|
+
it('incomplete-provenance: a malformed merge-commit SHA', async () => {
|
|
167
|
+
const badSha = content(1, { mergeCommitSha: 'NOTASHA' });
|
|
168
|
+
const r = await runExtractStage(solo(), deps(spySource([1], new Map([[1, { kind: 'ok', content: badSha }]])), fixtureExtractor()));
|
|
169
|
+
expect(dropsFor(r, 1)[0].reasonCode).toBe('incomplete-provenance');
|
|
170
|
+
expect(r.drafts).toEqual([]);
|
|
171
|
+
});
|
|
172
|
+
it('incomplete-provenance: fetched content for the wrong PR is a loud drop (CR-3)', async () => {
|
|
173
|
+
const mismatched = content(2); // content says PR 2, but PR 1 was requested
|
|
174
|
+
const r = await runExtractStage(solo(), deps(spySource([1], new Map([[1, { kind: 'ok', content: mismatched }]])), fixtureExtractor()));
|
|
175
|
+
expect(dropsFor(r, 1)[0].reasonCode).toBe('incomplete-provenance');
|
|
176
|
+
expect(dropsFor(r, 1)[0].detail).toContain('does not match');
|
|
177
|
+
expect(r.drafts).toEqual([]);
|
|
178
|
+
});
|
|
179
|
+
it('unparseable: a non-empty draft with no usable **Pattern:** (fold 4 preflight)', async () => {
|
|
180
|
+
const r = await runExtractStage(solo(), deps(spySource([1]), fixtureExtractor(new Map([[1, [NO_PATTERN_DSL]]]))));
|
|
181
|
+
expect(dropsFor(r, 1)[0].reasonCode).toBe('unparseable');
|
|
182
|
+
expect(r.drafts).toEqual([]);
|
|
183
|
+
});
|
|
184
|
+
it('unparseable: a draft that makes the parser throw is converted to a drop, not propagated (CR-2)', async () => {
|
|
185
|
+
const r = await runExtractStage(solo(), deps(spySource([1]), fixtureExtractor(new Map([[1, [PARSER_THROW_DSL]]]))));
|
|
186
|
+
expect(dropsFor(r, 1)[0].reasonCode).toBe('unparseable');
|
|
187
|
+
expect(r.drafts).toEqual([]);
|
|
188
|
+
});
|
|
189
|
+
it('unparseable: the extractor produced no draft from a complete thread', async () => {
|
|
190
|
+
const r = await runExtractStage(solo(), deps(spySource([1]), fixtureExtractor(new Map([[1, []]]))));
|
|
191
|
+
expect(dropsFor(r, 1)[0].reasonCode).toBe('unparseable');
|
|
192
|
+
});
|
|
193
|
+
it('a contract-violating extractor throw propagates (fail-loud, not swallowed)', async () => {
|
|
194
|
+
// The port contract is: return [] on a per-PR failure (the CLI adapter catches
|
|
195
|
+
// its own IO errors). A throw VIOLATES that contract and must NOT be silently
|
|
196
|
+
// swallowed — it propagates (Tenet 4). Per-PR resilience is the adapter's job;
|
|
197
|
+
// the []-returns path is covered by "extractor produced no draft" above.
|
|
198
|
+
const throwingExtractor = {
|
|
199
|
+
async draft(c) {
|
|
200
|
+
if (c.pr === 1)
|
|
201
|
+
throw new Error('boom');
|
|
202
|
+
return [USABLE_DSL];
|
|
203
|
+
},
|
|
204
|
+
};
|
|
205
|
+
await expect(runExtractStage(split(), deps(spySource([1, 2]), throwingExtractor))).rejects.toThrow('boom');
|
|
206
|
+
});
|
|
207
|
+
});
|
|
208
|
+
// ─── FM(i) slice-2 half: N-draft / M-drop accounting (fold 1) ─────────────────
|
|
209
|
+
describe('runExtractStage — FM(i) slice-2 accounting (at-least-one, list-shaped)', () => {
|
|
210
|
+
it('a single PR may yield N drafts', async () => {
|
|
211
|
+
const r = await runExtractStage(solo(), deps(spySource([1]), fixtureExtractor(new Map([[1, [USABLE_DSL, USABLE_DSL]]]))));
|
|
212
|
+
expect(r.drafts).toHaveLength(2);
|
|
213
|
+
expect(r.drafts.every((d) => d.provenance.mergedPr === 1)).toBe(true);
|
|
214
|
+
});
|
|
215
|
+
it('a single PR may yield a draft AND a drop', async () => {
|
|
216
|
+
const r = await runExtractStage(solo(), deps(spySource([1]), fixtureExtractor(new Map([[1, [USABLE_DSL, NO_PATTERN_DSL]]]))));
|
|
217
|
+
expect(r.drafts).toHaveLength(1);
|
|
218
|
+
expect(dropsFor(r, 1)).toHaveLength(1);
|
|
219
|
+
expect(dropsFor(r, 1)[0].reasonCode).toBe('unparseable');
|
|
220
|
+
});
|
|
221
|
+
it('every train PR is creditable: draftCount + dropCount >= 1 (none silently skipped)', async () => {
|
|
222
|
+
// PR 1 drafts, PR 2 is unreachable (drop-only) — both covered.
|
|
223
|
+
const r = await runExtractStage(split(), deps(spySource([1, 2], new Map([[2, { kind: 'unreachable' }]])), fixtureExtractor()));
|
|
224
|
+
expect(coveredPrs(r)).toEqual(new Set([1, 2]));
|
|
225
|
+
});
|
|
226
|
+
it('the coverage check has teeth — a PR in neither drafts nor drops is detectable', async () => {
|
|
227
|
+
const r = await runExtractStage(split(), deps(spySource([1, 2]), fixtureExtractor()));
|
|
228
|
+
expect(coveredPrs(r)).toEqual(new Set([1, 2]));
|
|
229
|
+
// Simulate a silent skip of PR 2; the FM(i) coverage check flags it.
|
|
230
|
+
const doctored = {
|
|
231
|
+
...r,
|
|
232
|
+
drafts: r.drafts.filter((d) => d.provenance.mergedPr !== 2),
|
|
233
|
+
dropLedger: { entries: r.dropLedger.entries.filter((e) => e.sourcePr !== 2) },
|
|
234
|
+
};
|
|
235
|
+
const missing = [1, 2].filter((pr) => !coveredPrs(doctored).has(pr));
|
|
236
|
+
expect(missing).toEqual([2]);
|
|
237
|
+
});
|
|
238
|
+
});
|
|
239
|
+
// ─── Train-only fetch boundary (FM h, fold 6) ─────────────────────────────────
|
|
240
|
+
describe('runExtractStage — train-only fetch boundary', () => {
|
|
241
|
+
it('the spy source hard-fails if a non-train PR is fetched', async () => {
|
|
242
|
+
const source = spySource([1, 2]);
|
|
243
|
+
await expect(source.fetch(3)).rejects.toThrow(/non-train PR 3/);
|
|
244
|
+
});
|
|
245
|
+
it('never fetches a held-out / control / excluded PR; heldOutFetchCount stays 0', async () => {
|
|
246
|
+
const source = spySource([1, 2]);
|
|
247
|
+
const r = await runExtractStage(split({ trainPrs: [1, 2], heldOutPrs: [3, 4], excludedPrs: [] }), deps(source, fixtureExtractor()));
|
|
248
|
+
expect(source.fetched).toEqual([1, 2]); // only train PRs touched
|
|
249
|
+
expect(r.apiUsageLedger.entries.map((e) => e.targetPr).sort()).toEqual([1, 2]);
|
|
250
|
+
expect(r.apiUsageLedger.heldOutFetchCount).toBe(0);
|
|
251
|
+
});
|
|
252
|
+
it('heldOutFetchCount is derived from the frozen split, not a trusted label', async () => {
|
|
253
|
+
// Every logged entry targets a train PR → recomputed count is 0 regardless.
|
|
254
|
+
const r = await runExtractStage(split(), deps(spySource([1, 2]), fixtureExtractor()));
|
|
255
|
+
const recomputed = r.apiUsageLedger.entries.filter((e) => ![1, 2].includes(e.targetPr)).length;
|
|
256
|
+
expect(recomputed).toBe(r.apiUsageLedger.heldOutFetchCount);
|
|
257
|
+
});
|
|
258
|
+
});
|
|
259
|
+
// ─── Seed-blindness (FM f, carried in-run) ────────────────────────────────────
|
|
260
|
+
describe('runExtractStage — seed-blindness', () => {
|
|
261
|
+
it('carries seedClassesProvided=false through to the result', async () => {
|
|
262
|
+
const r = await runExtractStage(split(), deps(spySource([1, 2]), fixtureExtractor(), false));
|
|
263
|
+
expect(r.seedBlindness.seedClassesProvided).toBe(false);
|
|
264
|
+
});
|
|
265
|
+
it('faithfully carries a violated attestation (slice 3 / the harness asserts it)', async () => {
|
|
266
|
+
const r = await runExtractStage(split(), deps(spySource([1, 2]), fixtureExtractor(), true));
|
|
267
|
+
expect(r.seedBlindness.seedClassesProvided).toBe(true);
|
|
268
|
+
});
|
|
269
|
+
});
|
|
270
|
+
// ─── Resolution-eligibility gate (slice 5a, mmnto-ai/totem#2201) ──────────────
|
|
271
|
+
/** A fixture extractor that records the content it was handed (to assert filtering). */
|
|
272
|
+
function recordingExtractor(byPr) {
|
|
273
|
+
const seen = [];
|
|
274
|
+
return {
|
|
275
|
+
seen,
|
|
276
|
+
async draft(c) {
|
|
277
|
+
seen.push(c);
|
|
278
|
+
return byPr?.get(c.pr) ?? [USABLE_DSL];
|
|
279
|
+
},
|
|
280
|
+
};
|
|
281
|
+
}
|
|
282
|
+
describe('runExtractStage — resolution-eligibility gate (slice 5a)', () => {
|
|
283
|
+
it('all-resolved-but-had-human-content drops resolved-rejected (not truncated)', async () => {
|
|
284
|
+
const allResolved = content(1, {
|
|
285
|
+
threads: [
|
|
286
|
+
thread('Jane Doe', 'a real review note', { isResolved: true }),
|
|
287
|
+
thread('John Roe', 'another note', { isOutdated: true }),
|
|
288
|
+
],
|
|
289
|
+
});
|
|
290
|
+
const r = await runExtractStage(solo(), deps(spySource([1], new Map([[1, { kind: 'ok', content: allResolved }]])), fixtureExtractor()));
|
|
291
|
+
expect(dropsFor(r, 1)[0].reasonCode).toBe('resolved-rejected');
|
|
292
|
+
expect(r.drafts).toEqual([]);
|
|
293
|
+
});
|
|
294
|
+
it('the resolved-rejected drop detail carries concrete resolution evidence', async () => {
|
|
295
|
+
const allResolved = content(1, {
|
|
296
|
+
threads: [
|
|
297
|
+
thread('Jane Doe', 'a real review note', { isResolved: true }),
|
|
298
|
+
thread('John Roe', 'another note', { isOutdated: true }),
|
|
299
|
+
],
|
|
300
|
+
});
|
|
301
|
+
const r = await runExtractStage(solo(), deps(spySource([1], new Map([[1, { kind: 'ok', content: allResolved }]])), fixtureExtractor()));
|
|
302
|
+
const detail = dropsFor(r, 1)[0].detail ?? '';
|
|
303
|
+
expect(detail).toContain('2 of 2 threads resolved/outdated');
|
|
304
|
+
expect(detail).toContain('0 eligible human comments remain');
|
|
305
|
+
});
|
|
306
|
+
it('thin-to-begin-with (0 human comments before the gate) stays truncated, not resolved-rejected', async () => {
|
|
307
|
+
// A resolved thread that ALSO had no human comment — the resolution gate is
|
|
308
|
+
// not what emptied it; it was already thin. Keep the existing truncated path.
|
|
309
|
+
const botResolved = content(1, {
|
|
310
|
+
threads: [thread('coderabbitai[bot]', 'nit: rename this', { isResolved: true })],
|
|
311
|
+
});
|
|
312
|
+
const r = await runExtractStage(solo(), deps(spySource([1], new Map([[1, { kind: 'ok', content: botResolved }]])), fixtureExtractor()));
|
|
313
|
+
expect(dropsFor(r, 1)[0].reasonCode).toBe('truncated');
|
|
314
|
+
});
|
|
315
|
+
it('partial resolution: survivors are processed; resolved threads excluded from the draft input', async () => {
|
|
316
|
+
const mixed = content(1, {
|
|
317
|
+
threads: [
|
|
318
|
+
thread('Jane Doe', 'eligible note A'),
|
|
319
|
+
thread('John Roe', 'resolved note B', { isResolved: true }),
|
|
320
|
+
thread('Kate Poe', 'outdated note C', { isOutdated: true }),
|
|
321
|
+
],
|
|
322
|
+
});
|
|
323
|
+
const extractor = recordingExtractor();
|
|
324
|
+
const r = await runExtractStage(solo(), deps(spySource([1], new Map([[1, { kind: 'ok', content: mixed }]])), extractor));
|
|
325
|
+
// The PR is processed (a draft, no drop).
|
|
326
|
+
expect(r.drafts).toHaveLength(1);
|
|
327
|
+
expect(dropsFor(r, 1)).toEqual([]);
|
|
328
|
+
// The extractor only saw the ONE eligible thread — resolved/outdated excluded.
|
|
329
|
+
expect(extractor.seen).toHaveLength(1);
|
|
330
|
+
const seenThreads = extractor.seen[0].threads;
|
|
331
|
+
expect(seenThreads).toHaveLength(1);
|
|
332
|
+
expect(seenThreads[0].comments[0].body).toBe('eligible note A');
|
|
333
|
+
expect(seenThreads.every((t) => !t.isResolved && !t.isOutdated)).toBe(true);
|
|
334
|
+
});
|
|
335
|
+
it('partial resolution: resolved threads do not count toward the human-comment threshold', async () => {
|
|
336
|
+
// The single eligible thread is bot-only; the resolved thread is the only one
|
|
337
|
+
// with a human comment. Survivors have 0 human comments → resolved-rejected.
|
|
338
|
+
const mixed = content(1, {
|
|
339
|
+
threads: [
|
|
340
|
+
thread('coderabbitai[bot]', 'nit: rename', { isResolved: false }),
|
|
341
|
+
thread('Jane Doe', 'a real human note', { isResolved: true }),
|
|
342
|
+
],
|
|
343
|
+
});
|
|
344
|
+
const r = await runExtractStage(solo(), deps(spySource([1], new Map([[1, { kind: 'ok', content: mixed }]])), fixtureExtractor()));
|
|
345
|
+
expect(dropsFor(r, 1)[0].reasonCode).toBe('resolved-rejected');
|
|
346
|
+
expect(r.drafts).toEqual([]);
|
|
347
|
+
});
|
|
348
|
+
it('a fully-eligible thread is unaffected by the gate (no regression)', async () => {
|
|
349
|
+
const r = await runExtractStage(solo(), deps(spySource([1]), fixtureExtractor()));
|
|
350
|
+
expect(r.drafts).toHaveLength(1);
|
|
351
|
+
expect(dropsFor(r, 1)).toEqual([]);
|
|
352
|
+
});
|
|
353
|
+
});
|
|
354
|
+
// ─── Determinism ──────────────────────────────────────────────────────────────
|
|
355
|
+
describe('runExtractStage — determinism', () => {
|
|
356
|
+
it('identical inputs + fixed deps produce identical output', async () => {
|
|
357
|
+
const run = () => runExtractStage(split(), deps(spySource([1, 2], new Map([[2, { kind: 'unreachable' }]])), fixtureExtractor(new Map([[1, [USABLE_DSL, NO_PATTERN_DSL]]]))));
|
|
358
|
+
expect(await run()).toEqual(await run());
|
|
359
|
+
});
|
|
360
|
+
it('identical inputs are deterministic across the resolution gate (drafts + drops + ledgers)', async () => {
|
|
361
|
+
// PR 1: a partial-resolution mix (one eligible survivor → a draft).
|
|
362
|
+
// PR 2: all-resolved-but-had-human-content → a resolved-rejected drop.
|
|
363
|
+
const pr1 = content(1, {
|
|
364
|
+
threads: [
|
|
365
|
+
thread('Jane Doe', 'eligible note', { isResolved: false }),
|
|
366
|
+
thread('John Roe', 'resolved note', { isResolved: true }),
|
|
367
|
+
],
|
|
368
|
+
});
|
|
369
|
+
const pr2 = content(2, {
|
|
370
|
+
threads: [thread('Kate Poe', 'a real note', { isOutdated: true })],
|
|
371
|
+
});
|
|
372
|
+
const run = () => runExtractStage(split(), deps(spySource([1, 2], new Map([
|
|
373
|
+
[1, { kind: 'ok', content: pr1 }],
|
|
374
|
+
[2, { kind: 'ok', content: pr2 }],
|
|
375
|
+
])), fixtureExtractor()));
|
|
376
|
+
const a = await run();
|
|
377
|
+
const b = await run();
|
|
378
|
+
expect(a).toEqual(b);
|
|
379
|
+
expect(dropsFor(a, 2)[0].reasonCode).toBe('resolved-rejected');
|
|
380
|
+
});
|
|
381
|
+
});
|
|
382
|
+
//# sourceMappingURL=extract.test.js.map
|