npm - @delfini/drift-engine - Versions diffs - 0.1.0 - Mend

@delfini/drift-engine 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

package/README.md +172 -0
package/dist/diff-filter.d.ts +33 -0
package/dist/diff-filter.d.ts.map +1 -0
package/dist/diff-filter.js +579 -0
package/dist/doc-scope.d.ts +119 -0
package/dist/doc-scope.d.ts.map +1 -0
package/dist/doc-scope.js +260 -0
package/dist/index.d.ts +11 -0
package/dist/index.d.ts.map +1 -0
package/dist/index.js +46 -0
package/dist/prompt-budget.d.ts +2 -0
package/dist/prompt-budget.d.ts.map +1 -0
package/dist/prompt-budget.js +16 -0
package/dist/prompt-builder.d.ts +21 -0
package/dist/prompt-builder.d.ts.map +1 -0
package/dist/prompt-builder.js +267 -0
package/dist/reconcile.d.ts +17 -0
package/dist/reconcile.d.ts.map +1 -0
package/dist/reconcile.js +290 -0
package/dist/relevance.d.ts +73 -0
package/dist/relevance.d.ts.map +1 -0
package/dist/relevance.js +266 -0
package/dist/schema.d.ts +293 -0
package/dist/schema.d.ts.map +1 -0
package/dist/schema.js +50 -0
package/dist/types.d.ts +81 -0
package/dist/types.d.ts.map +1 -0
package/dist/types.js +6 -0
package/package.json +39 -0
package/src/prompt.md +360 -0

package/dist/doc-scope.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"doc-scope.d.ts","sourceRoot":"","sources":["../src/doc-scope.ts"],"names":[],"mappings":"AA+BA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAgCG;AACH,wBAAgB,iBAAiB,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,EAAE,GAAG,IAAI,GAAG,SAAS,GAAG,MAAM,EAAE,CAgBvF;AAID;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AACH,wBAAgB,qBAAqB,CAAC,KAAK,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CA6BvF;AAID;;;;;;;;;;;;;;;;;;GAkBG;AACH,wBAAgB,aAAa,CAAC,KAAK,EAAE,MAAM,GAAG,KAAK,GAAG,MAAM,GAAG,MAAM,CAYpE;AAID;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAgCG;AACH,wBAAgB,gBAAgB,CAAC,QAAQ,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAS3E"}

package/dist/doc-scope.js ADDED Viewed

@@ -0,0 +1,260 @@
+// packages/drift-engine/src/doc-scope.ts
+//
+// Pure doc-scope algebra — the single home for normalization, validation,
+// classification, and the in-scope predicate shared by EVERY surface:
+// the Action (Full + Lite), the Web platform, and the @delfini/cli Skill.
+// Consolidated here under ADR-2026-06-01 so "what smart-skip skips" and
+// "what the expander includes" can never silently diverge — there is one
+// rule set and one glob dialect (picomatch@4).
+//
+// HARD CONSTRAINTS (FR139 / NFR44 / ESLint no-restricted-imports on
+// packages/drift-engine/src/**):
+//   - Side-effect-free, pure functions only. No fs, no child_process, no
+//     http/https, no network, no process.env, no clock, no randomness.
+//   - Platform-independent: the SAME function runs on the developer's
+//     Windows CLI, the Action's Linux CI, and the Web edge runtime and MUST
+//     return identical results. We therefore do NOT import `node:path`
+//     (its `path.sep` is platform-specific and the `node:` specifier can
+//     trip edge bundlers) — POSIX normalization is implemented inline below.
+//   - Sole matcher: picomatch@4 (the second runtime dep added under
+//     ADR-2026-06-01). No other glob engine, no hand-rolled magic-char
+//     detection — picomatch owns the dialect.
+//
+// I/O EXPANSION IS NOT HERE. Materialising a scope into a concrete file set
+// (fs walk, git-trees, Octokit) stays per-surface (ports/adapters): the CLI
+// `expandDocScope`, the Action/Web git-trees match. These functions reason
+// over path STRINGS only.
+import picomatch from 'picomatch';
+// -- normalizeDocScope --------------------------------------------------------
+/**
+ * Canonicalise a doc-scope value to a deduped POSIX `string[]`.
+ *
+ * - `null` / `undefined` coerce to `[]` (defensive — JSON config loaders
+ *   commonly produce these at the boundary).
+ * - A single `string` wraps as `[value]`. It is NOT comma/newline-split —
+ *   delimited-string splitting is a per-surface concern (e.g. Lite's
+ *   `docs_path` is split in `readPipelineInputs()`), deliberately kept out
+ *   of the pure algebra.
+ * - Each entry is `.trim()`-ed before further processing so `'  docs  '`
+ *   and `'docs'` dedupe to one entry (matches `validateDocScopeEntry`'s
+ *   own trim — keeps validate/normalize aligned).
+ * - Backslashes are normalised to forward slashes (the persisted dialect is
+ *   POSIX). Trailing slashes are stripped; `//` runs collapse; `./` and
+ *   `..` segments resolve via the inline POSIX normaliser. So `'./docs'`
+ *   and `'docs'` dedupe, `'docs//api'` becomes `'docs/api'`, and
+ *   `'docs/sub/../api/*.md'` becomes `'docs/api/*.md'` (which the matcher
+ *   can actually match).
+ * - Entries are deduped, preserving first-occurrence order.
+ * - Entries that collapse to nothing (`''`, `'/'`, `'.'`, `'./'`) are
+ *   dropped — these are tautological or empty, and `validateDocScopeEntry`
+ *   would otherwise have to special-case them.
+ *
+ * Non-emptiness of the OUTPUT is NOT enforced here — that is a schema /
+ * validation concern at each surface (`docScopeSchema.min(1)`,
+ * `writeDocScope`).
+ *
+ * `normalizeDocScope` is intentionally NOT a security gate: an escape entry
+ * like `'../secrets'` survives (validation is `validateDocScopeEntry`'s
+ * job). The matcher in `isFileInDocScope` then can't match it against any
+ * real in-tree file path, so the worst-case outcome is "silent no-match,"
+ * not exfiltration.
+ */
+export function normalizeDocScope(input) {
+    if (input == null)
+        return [];
+    const entries = typeof input === 'string' ? [input] : input;
+    const seen = new Set();
+    const out = [];
+    for (const raw of entries) {
+        if (typeof raw !== 'string')
+            continue;
+        const trimmed = raw.trim();
+        if (trimmed.length === 0)
+            continue;
+        const normalised = stripTrailingSlashes(posixNormalize(toPosix(trimmed)));
+        if (normalised.length === 0 || normalised === '.')
+            continue;
+        if (seen.has(normalised))
+            continue;
+        seen.add(normalised);
+        out.push(normalised);
+    }
+    return out;
+}
+// -- validateDocScopeEntry ----------------------------------------------------
+/**
+ * Validate a single doc-scope entry. Returns `null` on success, or a
+ * human-readable error string on failure.
+ *
+ * Ports the @delfini/cli `validatePath` + `longestStaticPrefix` repo-escape
+ * technique (the richest existing implementation) — reworked to be PURE and
+ * RELATIVE-root based. `repoRootRel` is a relative marker (callers pass
+ * `'.'`); we never resolve against an absolute filesystem path or use
+ * `path.sep`.
+ *
+ * Rejects:
+ *   - absolute paths (POSIX `/...` and Windows-drive `C:\...` / `C:/...`),
+ *   - entries containing ASCII control characters (CR, LF, TAB, NUL, etc.)
+ *     — these survive a JSON round-trip but can never be a real path; the
+ *     matcher silently no-ops them, which is a worse failure mode than a
+ *     loud rejection,
+ *   - entries whose normalisation escapes the repo root (`../`, mid-path
+ *     traversal, AND traversal hidden inside a glob portion such as
+ *     `**\/../../x` — the CLI's static-prefix-only check could not catch the
+ *     last case, so we normalise the FULL entry, which is strictly stronger),
+ *   - empty / whitespace-only entries.
+ *
+ * NOTE: this validator is layered, not auto-invoked by `normalizeDocScope`
+ * or `isFileInDocScope`. Each surface must call it at the persistence
+ * boundary (`writeDocScope`, the Zod refine for the FR88g contract, the
+ * Web settings list-editor). Bypassing it produces silent matcher
+ * no-matches, not insecure behaviour — but callers should treat it as
+ * mandatory at user-input boundaries.
+ */
+export function validateDocScopeEntry(entry, repoRootRel) {
+    if (typeof entry !== 'string' || entry.trim().length === 0) {
+        return 'doc-scope entry must be a non-empty string';
+    }
+    // Reject ASCII control characters (CR, LF, TAB, NUL, etc.). These can
+    // survive a JSON round-trip from a hand-edited `doc-scope.json` but the
+    // matcher will only ever silently no-op against them.
+    if (/[\x00-\x1f]/.test(entry)) {
+        return `doc-scope entry must not contain control characters: ${JSON.stringify(entry)}`;
+    }
+    const posixEntry = toPosix(entry.trim());
+    if (isAbsolutePath(posixEntry)) {
+        return `doc-scope entries must be relative to the repo root: ${entry}`;
+    }
+    // Repo-escape check: join under the (relative) root and normalise the WHOLE
+    // entry — `..` segments anywhere (including inside a glob like `**/../../x`,
+    // whose static prefix is empty) collapse out, so an escape surfaces as a
+    // leading `..` in the result.
+    const root = stripTrailingSlashes(toPosix(repoRootRel)) || '.';
+    const joined = posixNormalize(`${root}/${posixEntry}`);
+    if (joined === '..' || joined.startsWith('../')) {
+        return `doc-scope entry escapes repo root: ${entry}`;
+    }
+    return null;
+}
+// -- classifyEntry ------------------------------------------------------------
+/**
+ * Classify a doc-scope entry by SHAPE — a pure string heuristic, NOT a
+ * filesystem check (this module cannot `stat`):
+ *   - `'glob'` — contains glob magic (decided by picomatch's own scanner, so
+ *     the classification dialect matches the matching dialect).
+ *   - `'dir'`  — `.` / `''` (repo-root tautology), OR last segment starts
+ *     with a `.` (hidden directory pattern: `.github`, `.husky`, `.vscode`,
+ *     `.changeset`, etc.), OR last segment has no `.` at all.
+ *   - `'file'` — not a glob, not dot-prefix, AND last segment contains a `.`
+ *     (heuristic: it looks like `name.ext`).
+ *
+ * KNOWN LIMITATION: versioned directories like `docs/v1.2` are misclassified
+ * as files by the dot-in-last-segment heuristic (we'd need a real extension
+ * registry to distinguish `v1.2` from `index.md`). Users who scope a
+ * versioned doc tree should prefer an explicit glob form (e.g.
+ * `docs/v1.2/<globstar>/*.md`). The predicate's dir/file branches degrade
+ * silently here — there is no authoritative fs-expander rescue for the
+ * smart-skip path-shape use case.
+ */
+export function classifyEntry(entry) {
+    const posixEntry = toPosix(entry);
+    if (picomatch.scan(posixEntry).isGlob)
+        return 'glob';
+    const stripped = stripTrailingSlashes(posixEntry);
+    if (stripped === '' || stripped === '.')
+        return 'dir';
+    const lastSegment = stripped.split('/').pop() ?? '';
+    // Dot-prefix last segment = hidden directory by convention (.github,
+    // .husky, .vscode, .changeset, etc.). Force 'dir' to avoid the otherwise
+    // silent "matches exactly one nonexistent file" failure for an extremely
+    // common real-world scope.
+    if (lastSegment.startsWith('.'))
+        return 'dir';
+    return lastSegment.includes('.') ? 'file' : 'dir';
+}
+// -- isFileInDocScope ---------------------------------------------------------
+/**
+ * True iff `filePath` falls within any entry of `scope`. Both `filePath` and
+ * the scope entries are repo-relative POSIX paths.
+ *
+ * Per-entry strategy keys off `classifyEntry`:
+ *   - `'dir'`  -> matches the recursive subtree (`docs` ⇒ `docs/**`).
+ *   - `'file'` -> exact path match.
+ *   - `'glob'` -> picomatch semantics.
+ *
+ * The predicate is PATH-SHAPE-ONLY — it does not filter by `.md` extension.
+ * The `.md`-only restriction on directory expansion belongs to the expanders
+ * (CLI `expandDocScope`, Action/Web git-trees match), which keeps this
+ * predicate usable by smart-skip on arbitrary changed-file paths.
+ *
+ * Matching is `dot: false, nocase: true`:
+ *   - case-insensitive matching aligns with the CLI expander's existing
+ *     `caseSensitiveMatch: false` (fs realism on Windows/macOS), so the same
+ *     repo cloned across platforms returns identical in-scope decisions —
+ *     the dialect-parity invariant the ADR exists to enforce. The header's
+ *     "platform-independent results" promise IS the case-insensitive choice.
+ *   - `dot: false` matches the CLI expander default. Dot-prefix hidden
+ *     directories still match via the `classifyEntry` → `'dir'` path
+ *     (entry `'.github'` becomes pattern `'.github/**'`, which picomatch
+ *     matches against `.github/workflows/x.yml` even with `dot: false`
+ *     because the literal `.github` prefix is present in the pattern).
+ *
+ * The `filePath` is defensively normalised: backslashes converted to
+ * forward slashes, leading `/` and `./` runs stripped, `..` segments
+ * resolved — so callers feeding webhook payloads (`/docs/a.md` from
+ * `URL.pathname`), Windows-style paths (`docs\a.md`), or composed paths
+ * (`./docs/sub/../a.md`) all collapse to the same canonical form before
+ * matching.
+ */
+export function isFileInDocScope(filePath, scope) {
+    const file = posixNormalize(toPosix(filePath).replace(/^\/+/, ''));
+    if (file === '' || file === '.')
+        return false;
+    const entries = normalizeDocScope(scope);
+    for (const entry of entries) {
+        const pattern = classifyEntry(entry) === 'dir' ? `${entry}/**` : entry;
+        if (picomatch(pattern, { dot: false, nocase: true })(file))
+            return true;
+    }
+    return false;
+}
+// -- Internal helpers (NOT exported via index.ts) -----------------------------
+function toPosix(p) {
+    return p.split('\\').join('/');
+}
+function stripTrailingSlashes(p) {
+    return p.replace(/\/+$/, '');
+}
+function isAbsolutePath(posixEntry) {
+    // POSIX-absolute (`/etc`) or Windows-drive-absolute (`C:\` / `C:/`).
+    return posixEntry.startsWith('/') || /^[A-Za-z]:\//.test(posixEntry);
+}
+/**
+ * Pure POSIX path normalisation — resolves `.` and `..` segments without any
+ * `node:path` dependency (edge-safe, platform-independent). Glob magic
+ * characters (`*`, `**`, `{`, `?`, etc.) are treated as ordinary literal
+ * segments, which is exactly what the repo-escape check needs.
+ */
+function posixNormalize(input) {
+    const isAbsolute = input.startsWith('/');
+    const out = [];
+    for (const segment of input.split('/')) {
+        if (segment === '' || segment === '.')
+            continue;
+        if (segment === '..') {
+            if (out.length > 0 && out[out.length - 1] !== '..') {
+                out.pop();
+            }
+            else if (!isAbsolute) {
+                out.push('..');
+            }
+            // An absolute path cannot ascend above root — drop the `..`.
+            continue;
+        }
+        out.push(segment);
+    }
+    const joined = out.join('/');
+    if (isAbsolute)
+        return `/${joined}`;
+    return joined.length === 0 ? '.' : joined;
+}

package/dist/index.d.ts ADDED Viewed

@@ -0,0 +1,11 @@
+export { buildPrompt, buildPromptWithDrops } from './prompt-builder.js';
+export { validateAndReconcile } from './reconcile.js';
+export { estimatePromptTokens } from './prompt-budget.js';
+export { analysisSchema } from './schema.js';
+export { normalizeDocScope, validateDocScopeEntry, classifyEntry, isFileInDocScope, } from './doc-scope.js';
+export { filterDiff } from './diff-filter.js';
+export { rankedFillSections } from './relevance.js';
+export type { AnalysisInput, AnalysisResult, DocFile, Contradiction, Addition, ClarifyingQuestion, PRMetadata, Severity, BuildPromptOptions, } from './types.js';
+export type { DropReason, DroppedPath, DroppedHunk, FilterDiffResult, } from './diff-filter.js';
+export type { DocSection, DroppedSection, RankedFillCandidate, RankedFillResult, } from './relevance.js';
+//# sourceMappingURL=index.d.ts.map

package/dist/index.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AA0BA,OAAO,EAAE,WAAW,EAAE,oBAAoB,EAAE,MAAM,qBAAqB,CAAA;AACvE,OAAO,EAAE,oBAAoB,EAAE,MAAM,gBAAgB,CAAA;AACrD,OAAO,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAA;AACzD,OAAO,EAAE,cAAc,EAAE,MAAM,aAAa,CAAA;AAI5C,OAAO,EACL,iBAAiB,EACjB,qBAAqB,EACrB,aAAa,EACb,gBAAgB,GACjB,MAAM,gBAAgB,CAAA;AAOvB,OAAO,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAA;AAS7C,OAAO,EAAE,kBAAkB,EAAE,MAAM,gBAAgB,CAAA;AAEnD,YAAY,EACV,aAAa,EACb,cAAc,EACd,OAAO,EACP,aAAa,EACb,QAAQ,EACR,kBAAkB,EAClB,UAAU,EACV,QAAQ,EACR,kBAAkB,GACnB,MAAM,YAAY,CAAA;AAEnB,YAAY,EACV,UAAU,EACV,WAAW,EACX,WAAW,EACX,gBAAgB,GACjB,MAAM,kBAAkB,CAAA;AAEzB,YAAY,EACV,UAAU,EACV,cAAc,EACd,mBAAmB,EACnB,gBAAgB,GACjB,MAAM,gBAAgB,CAAA"}

package/dist/index.js ADDED Viewed

@@ -0,0 +1,46 @@
+// Public API surface for @delfini/drift-engine.
+//
+// Imported by both apps/action (CI surface) and packages/cli (Skill surface).
+// Algorithm parity between the two surfaces holds by construction: a finding
+// surfaced locally is the same finding the Action will surface on the
+// eventual PR.
+//
+// Hard rules (enforced via ESLint no-restricted-imports on
+// packages/drift-engine/src/**/*.ts):
+//   - No fs / child_process / http / https
+//   - No @anthropic-ai/sdk / openai / @langchain/*
+//   - No process.env reads
+//   - Runtime deps: zod + picomatch (both pure CPU — no I/O, no network, no
+//     env). picomatch was added under ADR-2026-06-01 as the single glob
+//     dialect for the doc-scope algebra below; the no-I/O charter is intact.
+// Adding any other runtime dep, or any of the blocked imports above, is a
+// regression.
+//
+// Per AC2 (architecture.md L1055–L1070): the barrel exposes exactly the
+// documented surface — no internal helpers (`dedupeOverlappingContradictions`,
+// `filterActionableContradictions`, `reconcileLineNumbers`,
+// `reconcileAdditiveAnchors`, `ContradictionSchema`, `AdditionSchema`,
+// `locateQuote`, `locateAnchorHeading`, `WarnFn`, etc.) leak through.
+// Tests reach internal helpers via relative `../src/...` imports because
+// they live inside the same workspace package.
+export { buildPrompt, buildPromptWithDrops } from './prompt-builder.js';
+export { validateAndReconcile } from './reconcile.js';
+export { estimatePromptTokens } from './prompt-budget.js';
+export { analysisSchema } from './schema.js';
+// Doc-scope algebra (ADR-2026-06-01) — shared normalize / validate / classify
+// / in-scope predicate. Pure; picomatch@4 is the single glob dialect.
+export { normalizeDocScope, validateDocScopeEntry, classifyEntry, isFileInDocScope, } from './doc-scope.js';
+// Story P3.7.2 / FR151 — deterministic diff pre-filter. Exported because the
+// gate lives at the consumer (CLI `runLocalPrepare` / Action `buildAnalysisInput`)
+// not inside `buildPrompt`; see story Dev Notes §"Where the gate lives". The
+// default consumer path does not call this — `buildPrompt` output stays
+// byte-identical and the NFR44 snapshot gate stays green.
+export { filterDiff } from './diff-filter.js';
+// Story P3.7.3 / FR152 — ranked-fill prompt budget. The pure cross-doc
+// selector + the cross-doc DroppedSection shape extension live in relevance.ts
+// (sibling to selectRelevantSections). The drops-aware sibling
+// `buildPromptWithDrops` is exported above. These are reachable through the
+// public surface because the CLI consumer (`runLocalPrepare`) needs the
+// drop record to render the "dropped N section(s) — over prompt budget"
+// header and to write `_rankedFillResult` into `.delfini-trace/`.
+export { rankedFillSections } from './relevance.js';

package/dist/prompt-budget.d.ts ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ export declare function estimatePromptTokens(prompt: string): number;
2	+ //# sourceMappingURL=prompt-budget.d.ts.map

package/dist/prompt-budget.d.ts.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"file":"prompt-budget.d.ts","sourceRoot":"","sources":["../src/prompt-budget.ts"],"names":[],"mappings":"AAaA,wBAAgB,oBAAoB,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAE3D"}

package/dist/prompt-budget.js ADDED Viewed

@@ -0,0 +1,16 @@
+// Cheap, deterministic, dependency-free token estimator.
+//
+// Consumer: `delfini local-prepare` (Story P3.2.2) uses this to decide whether
+// to exit `4` (`prompt_too_large`) before dispatching the subagent. Estimate
+// only — Anthropic's per-request input-token limit is the hard ceiling; the
+// CLI's budget is set well below that, so a 5–10% error is fine.
+//
+// Heuristic: `Math.ceil(length / 3.5)` is empirically close to GPT-style BPE
+// tokenization for English + code mix. Do NOT add a tokenizer dependency
+// (`gpt-tokenizer`, `js-tiktoken`, etc.) here — drift-engine's sole runtime
+// dep is `zod`; adding a tokenizer would violate FR139 + AC8. If byte-
+// accurate counting becomes necessary for cost prediction in a Post-MVP
+// feature, revisit then. Premature optimisation otherwise.
+export function estimatePromptTokens(prompt) {
+    return Math.ceil(prompt.length / 3.5);
+}

package/dist/prompt-builder.d.ts ADDED Viewed

@@ -0,0 +1,21 @@
+import type { AnalysisInput, BuildPromptOptions } from './types.js';
+import { type DroppedSection } from './relevance.js';
+export declare function buildPrompt(input: AnalysisInput, template: string, options?: BuildPromptOptions): string;
+/**
+ * Drops-aware variant of `buildPrompt` — returns both the rendered prompt
+ * and the cross-doc ranked-fill drop record (Story P3.7.3 / FR152). The
+ * `droppedSections` array is non-empty ONLY when both
+ * `relevanceThreshold > 0` AND `promptTokenBudget > 0` are supplied AND
+ * ranked-fill actually dropped at least one retained section. Every other
+ * code path (default, retrieval-only, budget-only-without-threshold)
+ * returns an empty `droppedSections` array.
+ *
+ * The single internal rendering path means `buildPrompt`'s output and
+ * `buildPromptWithDrops().prompt` are byte-identical for any given input —
+ * the NFR44 snapshot test never has to choose between them.
+ */
+export declare function buildPromptWithDrops(input: AnalysisInput, template: string, options?: BuildPromptOptions): {
+    prompt: string;
+    droppedSections: DroppedSection[];
+};
+//# sourceMappingURL=prompt-builder.d.ts.map

package/dist/prompt-builder.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"prompt-builder.d.ts","sourceRoot":"","sources":["../src/prompt-builder.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,kBAAkB,EAAW,MAAM,YAAY,CAAA;AAC5E,OAAO,EAKL,KAAK,cAAc,EAEpB,MAAM,gBAAgB,CAAA;AAkFvB,wBAAgB,WAAW,CACzB,KAAK,EAAE,aAAa,EACpB,QAAQ,EAAE,MAAM,EAChB,OAAO,CAAC,EAAE,kBAAkB,GAC3B,MAAM,CAER;AAED;;;;;;;;;;;;GAYG;AACH,wBAAgB,oBAAoB,CAClC,KAAK,EAAE,aAAa,EACpB,QAAQ,EAAE,MAAM,EAChB,OAAO,CAAC,EAAE,kBAAkB,GAC3B;IAAE,MAAM,EAAE,MAAM,CAAC;IAAC,eAAe,EAAE,cAAc,EAAE,CAAA;CAAE,CA+EvD"}

package/dist/prompt-builder.js ADDED Viewed

@@ -0,0 +1,267 @@
+import { rankedFillSections, scoreDocRelevance, selectRelevantSections, } from './relevance.js';
+import { estimatePromptTokens } from './prompt-budget.js';
+function countChangedFiles(diff) {
+    if (!diff)
+        return 0;
+    const matches = diff.match(/^diff --git /gm);
+    return matches ? matches.length : 0;
+}
+// Story 3.9b — prefix every line of `content` with its absolute (original-file)
+// line number. Line `i` (0-indexed) becomes line `lineOffset + i + 1`
+// (1-indexed). The LLM uses these prefixes when emitting `targetLineStart` /
+// `targetLineEnd`, so it doesn't have to count lines. `quotedDocText` (also new
+// in 3.9b) excludes the `N: ` prefix per the prompt instructions, so the
+// reconciler can `indexOf` the quote in the raw body without stripping prefixes.
+//
+// `lineOffset` is the count of original-file lines BEFORE `content`'s first
+// line. For a whole doc that is `frontMatterLineCount`. For a heading-delimited
+// SECTION (FR150) it is `frontMatterLineCount + section.startLineIndex`, so a
+// retained mid-file section keeps its TRUE absolute line numbers even when
+// earlier sections were elided — the numbers are never renumbered from 1 nor
+// shifted by a dropped section's length.
+function prefixDocLines(content, lineOffset) {
+    const lines = content.split(/\r?\n/);
+    return lines.map((line, i) => `${lineOffset + i + 1}: ${line}`).join('\n');
+}
+function renderDocsBlock(template, docs) {
+    return template.replace(/\{\{#each docs\}\}([\s\S]*?)\{\{\/each\}\}/g, (_match, inner) => docs
+        .map((doc) => inner
+        .replace(/\{\{this\.path\}\}/g, doc.path)
+        .replace(/\{\{this\.content\}\}/g, doc.renderedContent))
+        .join(''));
+}
+// Render a single retained section with its ABSOLUTE original-file line
+// numbers (frontMatter offset + section start within the doc body). Used by
+// both the per-doc section-gated path and the cross-doc ranked-fill path.
+function renderSection(doc, section) {
+    return prefixDocLines(section.lines.join('\n'), doc.frontMatterLineCount + section.startLineIndex);
+}
+// Render a doc's body for the section-gated path: keep only sections scoring
+// at/above the threshold, each prefixed with its absolute line numbers. Returns
+// null when no section survives — the doc is then omitted from the prompt
+// entirely (same outcome as the whole-doc gate dropping an irrelevant doc).
+function renderGatedDocContent(doc, diff, threshold) {
+    const { kept } = selectRelevantSections(doc, diff, threshold);
+    if (kept.length === 0)
+        return null;
+    return kept.map((section) => renderSection(doc, section)).join('\n');
+}
+// Pure-logic prompt assembly. `template` is the contents of the canonical
+// `prompt.md` (bundled with this package at `./prompt.md`). Callers read the
+// file themselves and pass the string in — drift-engine never touches the
+// filesystem. The Action does this from its bundled `dist/`; the CLI does it
+// from `node_modules/@delfini/drift-engine/src/prompt.md` resolved via
+// `import.meta.url`.
+//
+// Delegates to `buildPromptWithDrops` (Story P3.7.3) and discards the
+// drop record — single source of truth on rendering, no divergence between
+// the string-returning entrypoint and the drops-aware sibling.
+export function buildPrompt(input, template, options) {
+    return buildPromptWithDrops(input, template, options).prompt;
+}
+/**
+ * Drops-aware variant of `buildPrompt` — returns both the rendered prompt
+ * and the cross-doc ranked-fill drop record (Story P3.7.3 / FR152). The
+ * `droppedSections` array is non-empty ONLY when both
+ * `relevanceThreshold > 0` AND `promptTokenBudget > 0` are supplied AND
+ * ranked-fill actually dropped at least one retained section. Every other
+ * code path (default, retrieval-only, budget-only-without-threshold)
+ * returns an empty `droppedSections` array.
+ *
+ * The single internal rendering path means `buildPrompt`'s output and
+ * `buildPromptWithDrops().prompt` are byte-identical for any given input —
+ * the NFR44 snapshot test never has to choose between them.
+ */
+export function buildPromptWithDrops(input, template, options) {
+    const { diff, docs, prMetadata } = input;
+    // Default (no threshold / <= 0 / non-finite) → whole-doc render, byte-
+    // identical to the pre-FR150 baseline (NFR44 snapshot parity). A positive
+    // threshold switches to section-granularity retrieval (FR150): each doc's
+    // body is reduced to its relevant heading-delimited sections, docs with no
+    // surviving section are omitted.
+    const threshold = options?.relevanceThreshold;
+    const useSections = typeof threshold === 'number' && Number.isFinite(threshold) && threshold > 0;
+    // Ranked-fill gate: only active when BOTH the retrieval stage is on AND a
+    // positive budget is supplied. A budget alone (without retrieval) is a no-op
+    // — there are no scored candidates to rank. Story Dev Notes §"AC2".
+    const budget = options?.promptTokenBudget;
+    const useRankedFill = useSections &&
+        typeof budget === 'number' &&
+        Number.isFinite(budget) &&
+        budget > 0;
+    const droppedSections = [];
+    // Closure-bound substitutions so the rendering helpers (baseline + final)
+    // share one source of truth for placeholder replacement.
+    const substitutions = {
+        '{{diff}}': diff,
+        '{{prMetadata.title}}': prMetadata.title,
+        '{{prMetadata.owner}}': prMetadata.owner,
+        '{{prMetadata.repo}}': prMetadata.repo,
+        '{{prMetadata.prNumber}}': String(prMetadata.prNumber),
+        '{{prMetadata.headSha}}': prMetadata.headSha,
+        '{{prMetadata.baseSha}}': prMetadata.baseSha,
+        '{{changedFileCount}}': String(countChangedFiles(diff)),
+    };
+    const renderPrompt = (renderedDocs) => {
+        let out = renderDocsBlock(template, renderedDocs);
+        for (const [placeholder, value] of Object.entries(substitutions)) {
+            out = out.split(placeholder).join(value);
+        }
+        return out;
+    };
+    // Build the set of (doc, retained-section) candidates and apply ranked-fill
+    // if active. The set of retained sections per doc comes from
+    // `selectRelevantSections` (FR150 — already filters below-threshold
+    // sections). Ranked-fill then runs over the cross-doc flat list. The
+    // section-budget passed to ranked-fill is `userBudget - nonDocBaseline`
+    // where the baseline is the rendered prompt with EMPTY docs[] — this is
+    // what keeps AC4's "impossible by construction" invariant: ranked-fill
+    // never includes a section whose cumulative section cost would push the
+    // FINAL rendered prompt past budget.
+    let renderedDocs;
+    if (useRankedFill) {
+        const baselineCost = estimatePromptTokens(renderPrompt([]));
+        const sectionBudget = budget - baselineCost;
+        renderedDocs = renderWithRankedFill(docs, diff, threshold, sectionBudget, droppedSections);
+    }
+    else if (useSections) {
+        renderedDocs = [];
+        for (const doc of docs) {
+            const renderedContent = renderGatedDocContent(doc, diff, threshold);
+            if (renderedContent === null)
+                continue;
+            renderedDocs.push({ path: doc.path, renderedContent });
+        }
+    }
+    else {
+        renderedDocs = docs.map((doc) => ({
+            path: doc.path,
+            renderedContent: prefixDocLines(doc.content, doc.frontMatterLineCount),
+        }));
+    }
+    return { prompt: renderPrompt(renderedDocs), droppedSections };
+}
+// Render the doc set under both retrieval (FR150) AND ranked-fill (FR152).
+// Collects every kept section across every doc as a flat candidate list,
+// runs `rankedFillSections` with a `measure` closure that knows the per-
+// section render cost, then groups included sections back by doc path so
+// each doc renders its surviving sections in original order.
+//
+// Side effect: `droppedSections` is mutated in place with one entry per
+// candidate ranked-fill dropped (each carries the `docPath` so the CLI's
+// trace artefact and stderr header can identify the source unambiguously).
+function renderWithRankedFill(docs, diff, threshold, budget, droppedSections) {
+    // When `budget <= 0`, the non-doc baseline (computed in
+    // `buildPromptWithDrops`) already exceeds the user's budget — no section
+    // budget remains. `rankedFillSections` would include everything on its
+    // <=0 no-op fast-path; we let that happen so the final rendered prompt
+    // overflows naturally and the CLI emits the AC4 case 3 exit 4
+    // ("non-doc payload alone exceeds budget — no candidate section fits")
+    // with `droppedSections` empty (AC6 absent-key signal preserved).
+    // 1. Per-doc retrieval — build candidates over every doc's retained sections.
+    const candidates = [];
+    // Capture the per-doc relevance score breakdown so the measure() closure
+    // can re-derive each candidate's cost. We reuse `selectRelevantSections`
+    // to get the scored list (per-doc, threshold-filtered).
+    for (const doc of docs) {
+        const { kept } = selectRelevantSections(doc, diff, threshold);
+        for (const section of kept) {
+            // Score is not surfaced by `selectRelevantSections` — but we don't
+            // strictly need it for the ranked-fill cost; we only need it for the
+            // ranking key. Recompute via the same scoring path the per-doc helper
+            // uses internally so we get the exact tier-summed value.
+            const score = scoreSectionAgainstDiff(doc, section, diff);
+            candidates.push({ doc, section, score });
+        }
+    }
+    // 2. Ranked-fill — measure() simulates the render cost (line-prefix
+    // overhead + a small per-section framing fudge so the first section of an
+    // otherwise-not-yet-included doc still accounts for the wrapper cost).
+    const result = rankedFillSections(candidates, budget, measureSectionCost);
+    // 3. Record drops with docPath populated (cross-doc visibility, AC3).
+    for (const drop of result.dropped) {
+        droppedSections.push({
+            docPath: drop.doc.path,
+            startLineIndex: drop.section.startLineIndex,
+            score: drop.score,
+        });
+    }
+    // 4. Group included sections back by doc, preserving the doc's original
+    // section ordering (NOT the ranked order — the rendered prompt must read
+    // top-to-bottom within each doc, even when retrieval picked sections out
+    // of order).
+    const includedByDoc = new Map();
+    for (const candidate of result.included) {
+        const entry = includedByDoc.get(candidate.doc.path);
+        if (entry) {
+            entry.sections.push(candidate.section);
+        }
+        else {
+            includedByDoc.set(candidate.doc.path, {
+                doc: candidate.doc,
+                sections: [candidate.section],
+            });
+        }
+    }
+    // 5. Render — iterate docs in their original input order to preserve the
+    // top-level doc sequence the user supplied; within a doc, sort surviving
+    // sections by startLineIndex so the rendered output reads in file order.
+    const rendered = [];
+    for (const doc of docs) {
+        const entry = includedByDoc.get(doc.path);
+        if (!entry)
+            continue;
+        const ordered = [...entry.sections].sort((a, b) => a.startLineIndex - b.startLineIndex);
+        rendered.push({
+            path: doc.path,
+            renderedContent: ordered.map((section) => renderSection(doc, section)).join('\n'),
+        });
+    }
+    return rendered;
+}
+// Compute the rendered-token cost of a single (doc, section) candidate.
+// Charges the line-prefixed section body PLUS the full per-doc `<document
+// path="…">…</document>` wrapper the template emits (see `prompt.md` L25-29).
+// The wrapper is charged on EVERY section rather than once per doc: that
+// deliberately OVER-counts the second-and-later sections of a multi-section
+// doc (the real wrapper renders once), which is the safe side — it guarantees
+// the measure NEVER under-counts. Under-counting is the dangerous direction:
+// it would let ranked-fill admit a section whose true rendered cost pushes the
+// final assembled prompt past budget, breaking `buildPromptWithDrops`'s
+// at-or-below-budget contract and AC4's "impossible by construction" invariant
+// for every caller (not just the CLI, which has its own post-render gate).
+// Path length is included in the wrapper bytes so a long doc path is accounted
+// for — a fixed token constant could under-count a long path.
+function measureSectionCost(candidate) {
+    const rendered = renderSection(candidate.doc, candidate.section);
+    return estimatePromptTokens(rendered + docWrapperFraming(candidate.doc.path));
+}
+// The non-content bytes the template wraps each rendered doc in. Mirrors the
+// `{{#each docs}}` block body in `prompt.md` with `{{this.content}}` removed:
+//   \n  <document path="PATH">\n  </document>\n
+// Kept as a single source of truth so a template wrapper change is reflected
+// in the cost measure. Measured together with the section body via one
+// `estimatePromptTokens` call so the ceil rounding is shared, not double-paid.
+function docWrapperFraming(path) {
+    return `\n  <document path="${path}">\n  </document>\n`;
+}
+// Re-derive a section's score independent of `selectRelevantSections` so
+// `renderWithRankedFill` can attach the score to each candidate. Mirrors the
+// scoring formula in `relevance.ts` — file-overlap + identifier-overlap +
+// heading-overlap (section-scoped) plus the whole-doc `docPathInDiff` bonus
+// applied to every section of that doc. Kept private; the relevance module
+// is the source of truth on what constitutes a "score" and any future
+// formula change must be reflected here in lockstep.
+function scoreSectionAgainstDiff(doc, section, diff) {
+    // Reuse `scoreDocRelevance` on a synthetic single-section doc — the
+    // resulting tier-summed score equals the per-section arithmetic used
+    // internally by `selectRelevantSections`. Keeps the scoring formula in
+    // exactly one place (relevance.ts) — any future formula change propagates
+    // here automatically.
+    const singleSectionDoc = {
+        path: doc.path,
+        content: section.lines.join('\n'),
+        frontMatterLineCount: doc.frontMatterLineCount + section.startLineIndex,
+    };
+    return scoreDocRelevance(singleSectionDoc, diff).score;
+}