@delfini/drift-engine 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1 @@
1
+ {"version":3,"file":"doc-scope.d.ts","sourceRoot":"","sources":["../src/doc-scope.ts"],"names":[],"mappings":"AA+BA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAgCG;AACH,wBAAgB,iBAAiB,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,EAAE,GAAG,IAAI,GAAG,SAAS,GAAG,MAAM,EAAE,CAgBvF;AAID;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AACH,wBAAgB,qBAAqB,CAAC,KAAK,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CA6BvF;AAID;;;;;;;;;;;;;;;;;;GAkBG;AACH,wBAAgB,aAAa,CAAC,KAAK,EAAE,MAAM,GAAG,KAAK,GAAG,MAAM,GAAG,MAAM,CAYpE;AAID;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAgCG;AACH,wBAAgB,gBAAgB,CAAC,QAAQ,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAS3E"}
@@ -0,0 +1,260 @@
1
+ // packages/drift-engine/src/doc-scope.ts
2
+ //
3
+ // Pure doc-scope algebra — the single home for normalization, validation,
4
+ // classification, and the in-scope predicate shared by EVERY surface:
5
+ // the Action (Full + Lite), the Web platform, and the @delfini/cli Skill.
6
+ // Consolidated here under ADR-2026-06-01 so "what smart-skip skips" and
7
+ // "what the expander includes" can never silently diverge — there is one
8
+ // rule set and one glob dialect (picomatch@4).
9
+ //
10
+ // HARD CONSTRAINTS (FR139 / NFR44 / ESLint no-restricted-imports on
11
+ // packages/drift-engine/src/**):
12
+ // - Side-effect-free, pure functions only. No fs, no child_process, no
13
+ // http/https, no network, no process.env, no clock, no randomness.
14
+ // - Platform-independent: the SAME function runs on the developer's
15
+ // Windows CLI, the Action's Linux CI, and the Web edge runtime and MUST
16
+ // return identical results. We therefore do NOT import `node:path`
17
+ // (its `path.sep` is platform-specific and the `node:` specifier can
18
+ // trip edge bundlers) — POSIX normalization is implemented inline below.
19
+ // - Sole matcher: picomatch@4 (the second runtime dep added under
20
+ // ADR-2026-06-01). No other glob engine, no hand-rolled magic-char
21
+ // detection — picomatch owns the dialect.
22
+ //
23
+ // I/O EXPANSION IS NOT HERE. Materialising a scope into a concrete file set
24
+ // (fs walk, git-trees, Octokit) stays per-surface (ports/adapters): the CLI
25
+ // `expandDocScope`, the Action/Web git-trees match. These functions reason
26
+ // over path STRINGS only.
27
+ import picomatch from 'picomatch';
28
+ // -- normalizeDocScope --------------------------------------------------------
29
+ /**
30
+ * Canonicalise a doc-scope value to a deduped POSIX `string[]`.
31
+ *
32
+ * - `null` / `undefined` coerce to `[]` (defensive — JSON config loaders
33
+ * commonly produce these at the boundary).
34
+ * - A single `string` wraps as `[value]`. It is NOT comma/newline-split —
35
+ * delimited-string splitting is a per-surface concern (e.g. Lite's
36
+ * `docs_path` is split in `readPipelineInputs()`), deliberately kept out
37
+ * of the pure algebra.
38
+ * - Each entry is `.trim()`-ed before further processing so `' docs '`
39
+ * and `'docs'` dedupe to one entry (matches `validateDocScopeEntry`'s
40
+ * own trim — keeps validate/normalize aligned).
41
+ * - Backslashes are normalised to forward slashes (the persisted dialect is
42
+ * POSIX). Trailing slashes are stripped; `//` runs collapse; `./` and
43
+ * `..` segments resolve via the inline POSIX normaliser. So `'./docs'`
44
+ * and `'docs'` dedupe, `'docs//api'` becomes `'docs/api'`, and
45
+ * `'docs/sub/../api/*.md'` becomes `'docs/api/*.md'` (which the matcher
46
+ * can actually match).
47
+ * - Entries are deduped, preserving first-occurrence order.
48
+ * - Entries that collapse to nothing (`''`, `'/'`, `'.'`, `'./'`) are
49
+ * dropped — these are tautological or empty, and `validateDocScopeEntry`
50
+ * would otherwise have to special-case them.
51
+ *
52
+ * Non-emptiness of the OUTPUT is NOT enforced here — that is a schema /
53
+ * validation concern at each surface (`docScopeSchema.min(1)`,
54
+ * `writeDocScope`).
55
+ *
56
+ * `normalizeDocScope` is intentionally NOT a security gate: an escape entry
57
+ * like `'../secrets'` survives (validation is `validateDocScopeEntry`'s
58
+ * job). The matcher in `isFileInDocScope` then can't match it against any
59
+ * real in-tree file path, so the worst-case outcome is "silent no-match,"
60
+ * not exfiltration.
61
+ */
62
+ export function normalizeDocScope(input) {
63
+ if (input == null)
64
+ return [];
65
+ const entries = typeof input === 'string' ? [input] : input;
66
+ const seen = new Set();
67
+ const out = [];
68
+ for (const raw of entries) {
69
+ if (typeof raw !== 'string')
70
+ continue;
71
+ const trimmed = raw.trim();
72
+ if (trimmed.length === 0)
73
+ continue;
74
+ const normalised = stripTrailingSlashes(posixNormalize(toPosix(trimmed)));
75
+ if (normalised.length === 0 || normalised === '.')
76
+ continue;
77
+ if (seen.has(normalised))
78
+ continue;
79
+ seen.add(normalised);
80
+ out.push(normalised);
81
+ }
82
+ return out;
83
+ }
84
+ // -- validateDocScopeEntry ----------------------------------------------------
85
+ /**
86
+ * Validate a single doc-scope entry. Returns `null` on success, or a
87
+ * human-readable error string on failure.
88
+ *
89
+ * Ports the @delfini/cli `validatePath` + `longestStaticPrefix` repo-escape
90
+ * technique (the richest existing implementation) — reworked to be PURE and
91
+ * RELATIVE-root based. `repoRootRel` is a relative marker (callers pass
92
+ * `'.'`); we never resolve against an absolute filesystem path or use
93
+ * `path.sep`.
94
+ *
95
+ * Rejects:
96
+ * - absolute paths (POSIX `/...` and Windows-drive `C:\...` / `C:/...`),
97
+ * - entries containing ASCII control characters (CR, LF, TAB, NUL, etc.)
98
+ * — these survive a JSON round-trip but can never be a real path; the
99
+ * matcher silently no-ops them, which is a worse failure mode than a
100
+ * loud rejection,
101
+ * - entries whose normalisation escapes the repo root (`../`, mid-path
102
+ * traversal, AND traversal hidden inside a glob portion such as
103
+ * `**\/../../x` — the CLI's static-prefix-only check could not catch the
104
+ * last case, so we normalise the FULL entry, which is strictly stronger),
105
+ * - empty / whitespace-only entries.
106
+ *
107
+ * NOTE: this validator is layered, not auto-invoked by `normalizeDocScope`
108
+ * or `isFileInDocScope`. Each surface must call it at the persistence
109
+ * boundary (`writeDocScope`, the Zod refine for the FR88g contract, the
110
+ * Web settings list-editor). Bypassing it produces silent matcher
111
+ * no-matches, not insecure behaviour — but callers should treat it as
112
+ * mandatory at user-input boundaries.
113
+ */
114
+ export function validateDocScopeEntry(entry, repoRootRel) {
115
+ if (typeof entry !== 'string' || entry.trim().length === 0) {
116
+ return 'doc-scope entry must be a non-empty string';
117
+ }
118
+ // Reject ASCII control characters (CR, LF, TAB, NUL, etc.). These can
119
+ // survive a JSON round-trip from a hand-edited `doc-scope.json` but the
120
+ // matcher will only ever silently no-op against them.
121
+ if (/[\x00-\x1f]/.test(entry)) {
122
+ return `doc-scope entry must not contain control characters: ${JSON.stringify(entry)}`;
123
+ }
124
+ const posixEntry = toPosix(entry.trim());
125
+ if (isAbsolutePath(posixEntry)) {
126
+ return `doc-scope entries must be relative to the repo root: ${entry}`;
127
+ }
128
+ // Repo-escape check: join under the (relative) root and normalise the WHOLE
129
+ // entry — `..` segments anywhere (including inside a glob like `**/../../x`,
130
+ // whose static prefix is empty) collapse out, so an escape surfaces as a
131
+ // leading `..` in the result.
132
+ const root = stripTrailingSlashes(toPosix(repoRootRel)) || '.';
133
+ const joined = posixNormalize(`${root}/${posixEntry}`);
134
+ if (joined === '..' || joined.startsWith('../')) {
135
+ return `doc-scope entry escapes repo root: ${entry}`;
136
+ }
137
+ return null;
138
+ }
139
+ // -- classifyEntry ------------------------------------------------------------
140
+ /**
141
+ * Classify a doc-scope entry by SHAPE — a pure string heuristic, NOT a
142
+ * filesystem check (this module cannot `stat`):
143
+ * - `'glob'` — contains glob magic (decided by picomatch's own scanner, so
144
+ * the classification dialect matches the matching dialect).
145
+ * - `'dir'` — `.` / `''` (repo-root tautology), OR last segment starts
146
+ * with a `.` (hidden directory pattern: `.github`, `.husky`, `.vscode`,
147
+ * `.changeset`, etc.), OR last segment has no `.` at all.
148
+ * - `'file'` — not a glob, not dot-prefix, AND last segment contains a `.`
149
+ * (heuristic: it looks like `name.ext`).
150
+ *
151
+ * KNOWN LIMITATION: versioned directories like `docs/v1.2` are misclassified
152
+ * as files by the dot-in-last-segment heuristic (we'd need a real extension
153
+ * registry to distinguish `v1.2` from `index.md`). Users who scope a
154
+ * versioned doc tree should prefer an explicit glob form (e.g.
155
+ * `docs/v1.2/<globstar>/*.md`). The predicate's dir/file branches degrade
156
+ * silently here — there is no authoritative fs-expander rescue for the
157
+ * smart-skip path-shape use case.
158
+ */
159
+ export function classifyEntry(entry) {
160
+ const posixEntry = toPosix(entry);
161
+ if (picomatch.scan(posixEntry).isGlob)
162
+ return 'glob';
163
+ const stripped = stripTrailingSlashes(posixEntry);
164
+ if (stripped === '' || stripped === '.')
165
+ return 'dir';
166
+ const lastSegment = stripped.split('/').pop() ?? '';
167
+ // Dot-prefix last segment = hidden directory by convention (.github,
168
+ // .husky, .vscode, .changeset, etc.). Force 'dir' to avoid the otherwise
169
+ // silent "matches exactly one nonexistent file" failure for an extremely
170
+ // common real-world scope.
171
+ if (lastSegment.startsWith('.'))
172
+ return 'dir';
173
+ return lastSegment.includes('.') ? 'file' : 'dir';
174
+ }
175
+ // -- isFileInDocScope ---------------------------------------------------------
176
+ /**
177
+ * True iff `filePath` falls within any entry of `scope`. Both `filePath` and
178
+ * the scope entries are repo-relative POSIX paths.
179
+ *
180
+ * Per-entry strategy keys off `classifyEntry`:
181
+ * - `'dir'` -> matches the recursive subtree (`docs` ⇒ `docs/**`).
182
+ * - `'file'` -> exact path match.
183
+ * - `'glob'` -> picomatch semantics.
184
+ *
185
+ * The predicate is PATH-SHAPE-ONLY — it does not filter by `.md` extension.
186
+ * The `.md`-only restriction on directory expansion belongs to the expanders
187
+ * (CLI `expandDocScope`, Action/Web git-trees match), which keeps this
188
+ * predicate usable by smart-skip on arbitrary changed-file paths.
189
+ *
190
+ * Matching is `dot: false, nocase: true`:
191
+ * - case-insensitive matching aligns with the CLI expander's existing
192
+ * `caseSensitiveMatch: false` (fs realism on Windows/macOS), so the same
193
+ * repo cloned across platforms returns identical in-scope decisions —
194
+ * the dialect-parity invariant the ADR exists to enforce. The header's
195
+ * "platform-independent results" promise IS the case-insensitive choice.
196
+ * - `dot: false` matches the CLI expander default. Dot-prefix hidden
197
+ * directories still match via the `classifyEntry` → `'dir'` path
198
+ * (entry `'.github'` becomes pattern `'.github/**'`, which picomatch
199
+ * matches against `.github/workflows/x.yml` even with `dot: false`
200
+ * because the literal `.github` prefix is present in the pattern).
201
+ *
202
+ * The `filePath` is defensively normalised: backslashes converted to
203
+ * forward slashes, leading `/` and `./` runs stripped, `..` segments
204
+ * resolved — so callers feeding webhook payloads (`/docs/a.md` from
205
+ * `URL.pathname`), Windows-style paths (`docs\a.md`), or composed paths
206
+ * (`./docs/sub/../a.md`) all collapse to the same canonical form before
207
+ * matching.
208
+ */
209
+ export function isFileInDocScope(filePath, scope) {
210
+ const file = posixNormalize(toPosix(filePath).replace(/^\/+/, ''));
211
+ if (file === '' || file === '.')
212
+ return false;
213
+ const entries = normalizeDocScope(scope);
214
+ for (const entry of entries) {
215
+ const pattern = classifyEntry(entry) === 'dir' ? `${entry}/**` : entry;
216
+ if (picomatch(pattern, { dot: false, nocase: true })(file))
217
+ return true;
218
+ }
219
+ return false;
220
+ }
221
+ // -- Internal helpers (NOT exported via index.ts) -----------------------------
222
+ function toPosix(p) {
223
+ return p.split('\\').join('/');
224
+ }
225
+ function stripTrailingSlashes(p) {
226
+ return p.replace(/\/+$/, '');
227
+ }
228
+ function isAbsolutePath(posixEntry) {
229
+ // POSIX-absolute (`/etc`) or Windows-drive-absolute (`C:\` / `C:/`).
230
+ return posixEntry.startsWith('/') || /^[A-Za-z]:\//.test(posixEntry);
231
+ }
232
+ /**
233
+ * Pure POSIX path normalisation — resolves `.` and `..` segments without any
234
+ * `node:path` dependency (edge-safe, platform-independent). Glob magic
235
+ * characters (`*`, `**`, `{`, `?`, etc.) are treated as ordinary literal
236
+ * segments, which is exactly what the repo-escape check needs.
237
+ */
238
+ function posixNormalize(input) {
239
+ const isAbsolute = input.startsWith('/');
240
+ const out = [];
241
+ for (const segment of input.split('/')) {
242
+ if (segment === '' || segment === '.')
243
+ continue;
244
+ if (segment === '..') {
245
+ if (out.length > 0 && out[out.length - 1] !== '..') {
246
+ out.pop();
247
+ }
248
+ else if (!isAbsolute) {
249
+ out.push('..');
250
+ }
251
+ // An absolute path cannot ascend above root — drop the `..`.
252
+ continue;
253
+ }
254
+ out.push(segment);
255
+ }
256
+ const joined = out.join('/');
257
+ if (isAbsolute)
258
+ return `/${joined}`;
259
+ return joined.length === 0 ? '.' : joined;
260
+ }
@@ -0,0 +1,11 @@
1
+ export { buildPrompt, buildPromptWithDrops } from './prompt-builder.js';
2
+ export { validateAndReconcile } from './reconcile.js';
3
+ export { estimatePromptTokens } from './prompt-budget.js';
4
+ export { analysisSchema } from './schema.js';
5
+ export { normalizeDocScope, validateDocScopeEntry, classifyEntry, isFileInDocScope, } from './doc-scope.js';
6
+ export { filterDiff } from './diff-filter.js';
7
+ export { rankedFillSections } from './relevance.js';
8
+ export type { AnalysisInput, AnalysisResult, DocFile, Contradiction, Addition, ClarifyingQuestion, PRMetadata, Severity, BuildPromptOptions, } from './types.js';
9
+ export type { DropReason, DroppedPath, DroppedHunk, FilterDiffResult, } from './diff-filter.js';
10
+ export type { DocSection, DroppedSection, RankedFillCandidate, RankedFillResult, } from './relevance.js';
11
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AA0BA,OAAO,EAAE,WAAW,EAAE,oBAAoB,EAAE,MAAM,qBAAqB,CAAA;AACvE,OAAO,EAAE,oBAAoB,EAAE,MAAM,gBAAgB,CAAA;AACrD,OAAO,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAA;AACzD,OAAO,EAAE,cAAc,EAAE,MAAM,aAAa,CAAA;AAI5C,OAAO,EACL,iBAAiB,EACjB,qBAAqB,EACrB,aAAa,EACb,gBAAgB,GACjB,MAAM,gBAAgB,CAAA;AAOvB,OAAO,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAA;AAS7C,OAAO,EAAE,kBAAkB,EAAE,MAAM,gBAAgB,CAAA;AAEnD,YAAY,EACV,aAAa,EACb,cAAc,EACd,OAAO,EACP,aAAa,EACb,QAAQ,EACR,kBAAkB,EAClB,UAAU,EACV,QAAQ,EACR,kBAAkB,GACnB,MAAM,YAAY,CAAA;AAEnB,YAAY,EACV,UAAU,EACV,WAAW,EACX,WAAW,EACX,gBAAgB,GACjB,MAAM,kBAAkB,CAAA;AAEzB,YAAY,EACV,UAAU,EACV,cAAc,EACd,mBAAmB,EACnB,gBAAgB,GACjB,MAAM,gBAAgB,CAAA"}
package/dist/index.js ADDED
@@ -0,0 +1,46 @@
1
+ // Public API surface for @delfini/drift-engine.
2
+ //
3
+ // Imported by both apps/action (CI surface) and packages/cli (Skill surface).
4
+ // Algorithm parity between the two surfaces holds by construction: a finding
5
+ // surfaced locally is the same finding the Action will surface on the
6
+ // eventual PR.
7
+ //
8
+ // Hard rules (enforced via ESLint no-restricted-imports on
9
+ // packages/drift-engine/src/**/*.ts):
10
+ // - No fs / child_process / http / https
11
+ // - No @anthropic-ai/sdk / openai / @langchain/*
12
+ // - No process.env reads
13
+ // - Runtime deps: zod + picomatch (both pure CPU — no I/O, no network, no
14
+ // env). picomatch was added under ADR-2026-06-01 as the single glob
15
+ // dialect for the doc-scope algebra below; the no-I/O charter is intact.
16
+ // Adding any other runtime dep, or any of the blocked imports above, is a
17
+ // regression.
18
+ //
19
+ // Per AC2 (architecture.md L1055–L1070): the barrel exposes exactly the
20
+ // documented surface — no internal helpers (`dedupeOverlappingContradictions`,
21
+ // `filterActionableContradictions`, `reconcileLineNumbers`,
22
+ // `reconcileAdditiveAnchors`, `ContradictionSchema`, `AdditionSchema`,
23
+ // `locateQuote`, `locateAnchorHeading`, `WarnFn`, etc.) leak through.
24
+ // Tests reach internal helpers via relative `../src/...` imports because
25
+ // they live inside the same workspace package.
26
+ export { buildPrompt, buildPromptWithDrops } from './prompt-builder.js';
27
+ export { validateAndReconcile } from './reconcile.js';
28
+ export { estimatePromptTokens } from './prompt-budget.js';
29
+ export { analysisSchema } from './schema.js';
30
+ // Doc-scope algebra (ADR-2026-06-01) — shared normalize / validate / classify
31
+ // / in-scope predicate. Pure; picomatch@4 is the single glob dialect.
32
+ export { normalizeDocScope, validateDocScopeEntry, classifyEntry, isFileInDocScope, } from './doc-scope.js';
33
+ // Story P3.7.2 / FR151 — deterministic diff pre-filter. Exported because the
34
+ // gate lives at the consumer (CLI `runLocalPrepare` / Action `buildAnalysisInput`)
35
+ // not inside `buildPrompt`; see story Dev Notes §"Where the gate lives". The
36
+ // default consumer path does not call this — `buildPrompt` output stays
37
+ // byte-identical and the NFR44 snapshot gate stays green.
38
+ export { filterDiff } from './diff-filter.js';
39
+ // Story P3.7.3 / FR152 — ranked-fill prompt budget. The pure cross-doc
40
+ // selector + the cross-doc DroppedSection shape extension live in relevance.ts
41
+ // (sibling to selectRelevantSections). The drops-aware sibling
42
+ // `buildPromptWithDrops` is exported above. These are reachable through the
43
+ // public surface because the CLI consumer (`runLocalPrepare`) needs the
44
+ // drop record to render the "dropped N section(s) — over prompt budget"
45
+ // header and to write `_rankedFillResult` into `.delfini-trace/`.
46
+ export { rankedFillSections } from './relevance.js';
@@ -0,0 +1,2 @@
1
+ export declare function estimatePromptTokens(prompt: string): number;
2
+ //# sourceMappingURL=prompt-budget.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"prompt-budget.d.ts","sourceRoot":"","sources":["../src/prompt-budget.ts"],"names":[],"mappings":"AAaA,wBAAgB,oBAAoB,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAE3D"}
@@ -0,0 +1,16 @@
1
+ // Cheap, deterministic, dependency-free token estimator.
2
+ //
3
+ // Consumer: `delfini local-prepare` (Story P3.2.2) uses this to decide whether
4
+ // to exit `4` (`prompt_too_large`) before dispatching the subagent. Estimate
5
+ // only — Anthropic's per-request input-token limit is the hard ceiling; the
6
+ // CLI's budget is set well below that, so a 5–10% error is fine.
7
+ //
8
+ // Heuristic: `Math.ceil(length / 3.5)` is empirically close to GPT-style BPE
9
+ // tokenization for English + code mix. Do NOT add a tokenizer dependency
10
+ // (`gpt-tokenizer`, `js-tiktoken`, etc.) here — drift-engine's sole runtime
11
+ // dep is `zod`; adding a tokenizer would violate FR139 + AC8. If byte-
12
+ // accurate counting becomes necessary for cost prediction in a Post-MVP
13
+ // feature, revisit then. Premature optimisation otherwise.
14
+ export function estimatePromptTokens(prompt) {
15
+ return Math.ceil(prompt.length / 3.5);
16
+ }
@@ -0,0 +1,21 @@
1
+ import type { AnalysisInput, BuildPromptOptions } from './types.js';
2
+ import { type DroppedSection } from './relevance.js';
3
+ export declare function buildPrompt(input: AnalysisInput, template: string, options?: BuildPromptOptions): string;
4
+ /**
5
+ * Drops-aware variant of `buildPrompt` — returns both the rendered prompt
6
+ * and the cross-doc ranked-fill drop record (Story P3.7.3 / FR152). The
7
+ * `droppedSections` array is non-empty ONLY when both
8
+ * `relevanceThreshold > 0` AND `promptTokenBudget > 0` are supplied AND
9
+ * ranked-fill actually dropped at least one retained section. Every other
10
+ * code path (default, retrieval-only, budget-only-without-threshold)
11
+ * returns an empty `droppedSections` array.
12
+ *
13
+ * The single internal rendering path means `buildPrompt`'s output and
14
+ * `buildPromptWithDrops().prompt` are byte-identical for any given input —
15
+ * the NFR44 snapshot test never has to choose between them.
16
+ */
17
+ export declare function buildPromptWithDrops(input: AnalysisInput, template: string, options?: BuildPromptOptions): {
18
+ prompt: string;
19
+ droppedSections: DroppedSection[];
20
+ };
21
+ //# sourceMappingURL=prompt-builder.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"prompt-builder.d.ts","sourceRoot":"","sources":["../src/prompt-builder.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,kBAAkB,EAAW,MAAM,YAAY,CAAA;AAC5E,OAAO,EAKL,KAAK,cAAc,EAEpB,MAAM,gBAAgB,CAAA;AAkFvB,wBAAgB,WAAW,CACzB,KAAK,EAAE,aAAa,EACpB,QAAQ,EAAE,MAAM,EAChB,OAAO,CAAC,EAAE,kBAAkB,GAC3B,MAAM,CAER;AAED;;;;;;;;;;;;GAYG;AACH,wBAAgB,oBAAoB,CAClC,KAAK,EAAE,aAAa,EACpB,QAAQ,EAAE,MAAM,EAChB,OAAO,CAAC,EAAE,kBAAkB,GAC3B;IAAE,MAAM,EAAE,MAAM,CAAC;IAAC,eAAe,EAAE,cAAc,EAAE,CAAA;CAAE,CA+EvD"}
@@ -0,0 +1,267 @@
1
+ import { rankedFillSections, scoreDocRelevance, selectRelevantSections, } from './relevance.js';
2
+ import { estimatePromptTokens } from './prompt-budget.js';
3
+ function countChangedFiles(diff) {
4
+ if (!diff)
5
+ return 0;
6
+ const matches = diff.match(/^diff --git /gm);
7
+ return matches ? matches.length : 0;
8
+ }
9
+ // Story 3.9b — prefix every line of `content` with its absolute (original-file)
10
+ // line number. Line `i` (0-indexed) becomes line `lineOffset + i + 1`
11
+ // (1-indexed). The LLM uses these prefixes when emitting `targetLineStart` /
12
+ // `targetLineEnd`, so it doesn't have to count lines. `quotedDocText` (also new
13
+ // in 3.9b) excludes the `N: ` prefix per the prompt instructions, so the
14
+ // reconciler can `indexOf` the quote in the raw body without stripping prefixes.
15
+ //
16
+ // `lineOffset` is the count of original-file lines BEFORE `content`'s first
17
+ // line. For a whole doc that is `frontMatterLineCount`. For a heading-delimited
18
+ // SECTION (FR150) it is `frontMatterLineCount + section.startLineIndex`, so a
19
+ // retained mid-file section keeps its TRUE absolute line numbers even when
20
+ // earlier sections were elided — the numbers are never renumbered from 1 nor
21
+ // shifted by a dropped section's length.
22
+ function prefixDocLines(content, lineOffset) {
23
+ const lines = content.split(/\r?\n/);
24
+ return lines.map((line, i) => `${lineOffset + i + 1}: ${line}`).join('\n');
25
+ }
26
+ function renderDocsBlock(template, docs) {
27
+ return template.replace(/\{\{#each docs\}\}([\s\S]*?)\{\{\/each\}\}/g, (_match, inner) => docs
28
+ .map((doc) => inner
29
+ .replace(/\{\{this\.path\}\}/g, doc.path)
30
+ .replace(/\{\{this\.content\}\}/g, doc.renderedContent))
31
+ .join(''));
32
+ }
33
+ // Render a single retained section with its ABSOLUTE original-file line
34
+ // numbers (frontMatter offset + section start within the doc body). Used by
35
+ // both the per-doc section-gated path and the cross-doc ranked-fill path.
36
+ function renderSection(doc, section) {
37
+ return prefixDocLines(section.lines.join('\n'), doc.frontMatterLineCount + section.startLineIndex);
38
+ }
39
+ // Render a doc's body for the section-gated path: keep only sections scoring
40
+ // at/above the threshold, each prefixed with its absolute line numbers. Returns
41
+ // null when no section survives — the doc is then omitted from the prompt
42
+ // entirely (same outcome as the whole-doc gate dropping an irrelevant doc).
43
+ function renderGatedDocContent(doc, diff, threshold) {
44
+ const { kept } = selectRelevantSections(doc, diff, threshold);
45
+ if (kept.length === 0)
46
+ return null;
47
+ return kept.map((section) => renderSection(doc, section)).join('\n');
48
+ }
49
+ // Pure-logic prompt assembly. `template` is the contents of the canonical
50
+ // `prompt.md` (bundled with this package at `./prompt.md`). Callers read the
51
+ // file themselves and pass the string in — drift-engine never touches the
52
+ // filesystem. The Action does this from its bundled `dist/`; the CLI does it
53
+ // from `node_modules/@delfini/drift-engine/src/prompt.md` resolved via
54
+ // `import.meta.url`.
55
+ //
56
+ // Delegates to `buildPromptWithDrops` (Story P3.7.3) and discards the
57
+ // drop record — single source of truth on rendering, no divergence between
58
+ // the string-returning entrypoint and the drops-aware sibling.
59
+ export function buildPrompt(input, template, options) {
60
+ return buildPromptWithDrops(input, template, options).prompt;
61
+ }
62
+ /**
63
+ * Drops-aware variant of `buildPrompt` — returns both the rendered prompt
64
+ * and the cross-doc ranked-fill drop record (Story P3.7.3 / FR152). The
65
+ * `droppedSections` array is non-empty ONLY when both
66
+ * `relevanceThreshold > 0` AND `promptTokenBudget > 0` are supplied AND
67
+ * ranked-fill actually dropped at least one retained section. Every other
68
+ * code path (default, retrieval-only, budget-only-without-threshold)
69
+ * returns an empty `droppedSections` array.
70
+ *
71
+ * The single internal rendering path means `buildPrompt`'s output and
72
+ * `buildPromptWithDrops().prompt` are byte-identical for any given input —
73
+ * the NFR44 snapshot test never has to choose between them.
74
+ */
75
+ export function buildPromptWithDrops(input, template, options) {
76
+ const { diff, docs, prMetadata } = input;
77
+ // Default (no threshold / <= 0 / non-finite) → whole-doc render, byte-
78
+ // identical to the pre-FR150 baseline (NFR44 snapshot parity). A positive
79
+ // threshold switches to section-granularity retrieval (FR150): each doc's
80
+ // body is reduced to its relevant heading-delimited sections, docs with no
81
+ // surviving section are omitted.
82
+ const threshold = options?.relevanceThreshold;
83
+ const useSections = typeof threshold === 'number' && Number.isFinite(threshold) && threshold > 0;
84
+ // Ranked-fill gate: only active when BOTH the retrieval stage is on AND a
85
+ // positive budget is supplied. A budget alone (without retrieval) is a no-op
86
+ // — there are no scored candidates to rank. Story Dev Notes §"AC2".
87
+ const budget = options?.promptTokenBudget;
88
+ const useRankedFill = useSections &&
89
+ typeof budget === 'number' &&
90
+ Number.isFinite(budget) &&
91
+ budget > 0;
92
+ const droppedSections = [];
93
+ // Closure-bound substitutions so the rendering helpers (baseline + final)
94
+ // share one source of truth for placeholder replacement.
95
+ const substitutions = {
96
+ '{{diff}}': diff,
97
+ '{{prMetadata.title}}': prMetadata.title,
98
+ '{{prMetadata.owner}}': prMetadata.owner,
99
+ '{{prMetadata.repo}}': prMetadata.repo,
100
+ '{{prMetadata.prNumber}}': String(prMetadata.prNumber),
101
+ '{{prMetadata.headSha}}': prMetadata.headSha,
102
+ '{{prMetadata.baseSha}}': prMetadata.baseSha,
103
+ '{{changedFileCount}}': String(countChangedFiles(diff)),
104
+ };
105
+ const renderPrompt = (renderedDocs) => {
106
+ let out = renderDocsBlock(template, renderedDocs);
107
+ for (const [placeholder, value] of Object.entries(substitutions)) {
108
+ out = out.split(placeholder).join(value);
109
+ }
110
+ return out;
111
+ };
112
+ // Build the set of (doc, retained-section) candidates and apply ranked-fill
113
+ // if active. The set of retained sections per doc comes from
114
+ // `selectRelevantSections` (FR150 — already filters below-threshold
115
+ // sections). Ranked-fill then runs over the cross-doc flat list. The
116
+ // section-budget passed to ranked-fill is `userBudget - nonDocBaseline`
117
+ // where the baseline is the rendered prompt with EMPTY docs[] — this is
118
+ // what keeps AC4's "impossible by construction" invariant: ranked-fill
119
+ // never includes a section whose cumulative section cost would push the
120
+ // FINAL rendered prompt past budget.
121
+ let renderedDocs;
122
+ if (useRankedFill) {
123
+ const baselineCost = estimatePromptTokens(renderPrompt([]));
124
+ const sectionBudget = budget - baselineCost;
125
+ renderedDocs = renderWithRankedFill(docs, diff, threshold, sectionBudget, droppedSections);
126
+ }
127
+ else if (useSections) {
128
+ renderedDocs = [];
129
+ for (const doc of docs) {
130
+ const renderedContent = renderGatedDocContent(doc, diff, threshold);
131
+ if (renderedContent === null)
132
+ continue;
133
+ renderedDocs.push({ path: doc.path, renderedContent });
134
+ }
135
+ }
136
+ else {
137
+ renderedDocs = docs.map((doc) => ({
138
+ path: doc.path,
139
+ renderedContent: prefixDocLines(doc.content, doc.frontMatterLineCount),
140
+ }));
141
+ }
142
+ return { prompt: renderPrompt(renderedDocs), droppedSections };
143
+ }
144
+ // Render the doc set under both retrieval (FR150) AND ranked-fill (FR152).
145
+ // Collects every kept section across every doc as a flat candidate list,
146
+ // runs `rankedFillSections` with a `measure` closure that knows the per-
147
+ // section render cost, then groups included sections back by doc path so
148
+ // each doc renders its surviving sections in original order.
149
+ //
150
+ // Side effect: `droppedSections` is mutated in place with one entry per
151
+ // candidate ranked-fill dropped (each carries the `docPath` so the CLI's
152
+ // trace artefact and stderr header can identify the source unambiguously).
153
+ function renderWithRankedFill(docs, diff, threshold, budget, droppedSections) {
154
+ // When `budget <= 0`, the non-doc baseline (computed in
155
+ // `buildPromptWithDrops`) already exceeds the user's budget — no section
156
+ // budget remains. `rankedFillSections` would include everything on its
157
+ // <=0 no-op fast-path; we let that happen so the final rendered prompt
158
+ // overflows naturally and the CLI emits the AC4 case 3 exit 4
159
+ // ("non-doc payload alone exceeds budget — no candidate section fits")
160
+ // with `droppedSections` empty (AC6 absent-key signal preserved).
161
+ // 1. Per-doc retrieval — build candidates over every doc's retained sections.
162
+ const candidates = [];
163
+ // Capture the per-doc relevance score breakdown so the measure() closure
164
+ // can re-derive each candidate's cost. We reuse `selectRelevantSections`
165
+ // to get the scored list (per-doc, threshold-filtered).
166
+ for (const doc of docs) {
167
+ const { kept } = selectRelevantSections(doc, diff, threshold);
168
+ for (const section of kept) {
169
+ // Score is not surfaced by `selectRelevantSections` — but we don't
170
+ // strictly need it for the ranked-fill cost; we only need it for the
171
+ // ranking key. Recompute via the same scoring path the per-doc helper
172
+ // uses internally so we get the exact tier-summed value.
173
+ const score = scoreSectionAgainstDiff(doc, section, diff);
174
+ candidates.push({ doc, section, score });
175
+ }
176
+ }
177
+ // 2. Ranked-fill — measure() simulates the render cost (line-prefix
178
+ // overhead + a small per-section framing fudge so the first section of an
179
+ // otherwise-not-yet-included doc still accounts for the wrapper cost).
180
+ const result = rankedFillSections(candidates, budget, measureSectionCost);
181
+ // 3. Record drops with docPath populated (cross-doc visibility, AC3).
182
+ for (const drop of result.dropped) {
183
+ droppedSections.push({
184
+ docPath: drop.doc.path,
185
+ startLineIndex: drop.section.startLineIndex,
186
+ score: drop.score,
187
+ });
188
+ }
189
+ // 4. Group included sections back by doc, preserving the doc's original
190
+ // section ordering (NOT the ranked order — the rendered prompt must read
191
+ // top-to-bottom within each doc, even when retrieval picked sections out
192
+ // of order).
193
+ const includedByDoc = new Map();
194
+ for (const candidate of result.included) {
195
+ const entry = includedByDoc.get(candidate.doc.path);
196
+ if (entry) {
197
+ entry.sections.push(candidate.section);
198
+ }
199
+ else {
200
+ includedByDoc.set(candidate.doc.path, {
201
+ doc: candidate.doc,
202
+ sections: [candidate.section],
203
+ });
204
+ }
205
+ }
206
+ // 5. Render — iterate docs in their original input order to preserve the
207
+ // top-level doc sequence the user supplied; within a doc, sort surviving
208
+ // sections by startLineIndex so the rendered output reads in file order.
209
+ const rendered = [];
210
+ for (const doc of docs) {
211
+ const entry = includedByDoc.get(doc.path);
212
+ if (!entry)
213
+ continue;
214
+ const ordered = [...entry.sections].sort((a, b) => a.startLineIndex - b.startLineIndex);
215
+ rendered.push({
216
+ path: doc.path,
217
+ renderedContent: ordered.map((section) => renderSection(doc, section)).join('\n'),
218
+ });
219
+ }
220
+ return rendered;
221
+ }
222
+ // Compute the rendered-token cost of a single (doc, section) candidate.
223
+ // Charges the line-prefixed section body PLUS the full per-doc `<document
224
+ // path="…">…</document>` wrapper the template emits (see `prompt.md` L25-29).
225
+ // The wrapper is charged on EVERY section rather than once per doc: that
226
+ // deliberately OVER-counts the second-and-later sections of a multi-section
227
+ // doc (the real wrapper renders once), which is the safe side — it guarantees
228
+ // the measure NEVER under-counts. Under-counting is the dangerous direction:
229
+ // it would let ranked-fill admit a section whose true rendered cost pushes the
230
+ // final assembled prompt past budget, breaking `buildPromptWithDrops`'s
231
+ // at-or-below-budget contract and AC4's "impossible by construction" invariant
232
+ // for every caller (not just the CLI, which has its own post-render gate).
233
+ // Path length is included in the wrapper bytes so a long doc path is accounted
234
+ // for — a fixed token constant could under-count a long path.
235
+ function measureSectionCost(candidate) {
236
+ const rendered = renderSection(candidate.doc, candidate.section);
237
+ return estimatePromptTokens(rendered + docWrapperFraming(candidate.doc.path));
238
+ }
239
+ // The non-content bytes the template wraps each rendered doc in. Mirrors the
240
+ // `{{#each docs}}` block body in `prompt.md` with `{{this.content}}` removed:
241
+ // \n <document path="PATH">\n </document>\n
242
+ // Kept as a single source of truth so a template wrapper change is reflected
243
+ // in the cost measure. Measured together with the section body via one
244
+ // `estimatePromptTokens` call so the ceil rounding is shared, not double-paid.
245
+ function docWrapperFraming(path) {
246
+ return `\n <document path="${path}">\n </document>\n`;
247
+ }
248
+ // Re-derive a section's score independent of `selectRelevantSections` so
249
+ // `renderWithRankedFill` can attach the score to each candidate. Mirrors the
250
+ // scoring formula in `relevance.ts` — file-overlap + identifier-overlap +
251
+ // heading-overlap (section-scoped) plus the whole-doc `docPathInDiff` bonus
252
+ // applied to every section of that doc. Kept private; the relevance module
253
+ // is the source of truth on what constitutes a "score" and any future
254
+ // formula change must be reflected here in lockstep.
255
+ function scoreSectionAgainstDiff(doc, section, diff) {
256
+ // Reuse `scoreDocRelevance` on a synthetic single-section doc — the
257
+ // resulting tier-summed score equals the per-section arithmetic used
258
+ // internally by `selectRelevantSections`. Keeps the scoring formula in
259
+ // exactly one place (relevance.ts) — any future formula change propagates
260
+ // here automatically.
261
+ const singleSectionDoc = {
262
+ path: doc.path,
263
+ content: section.lines.join('\n'),
264
+ frontMatterLineCount: doc.frontMatterLineCount + section.startLineIndex,
265
+ };
266
+ return scoreDocRelevance(singleSectionDoc, diff).score;
267
+ }