npm - @delfini/drift-engine - Versions diffs - 0.1.0 - Mend

@delfini/drift-engine 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

package/README.md +172 -0
package/dist/diff-filter.d.ts +33 -0
package/dist/diff-filter.d.ts.map +1 -0
package/dist/diff-filter.js +579 -0
package/dist/doc-scope.d.ts +119 -0
package/dist/doc-scope.d.ts.map +1 -0
package/dist/doc-scope.js +260 -0
package/dist/index.d.ts +11 -0
package/dist/index.d.ts.map +1 -0
package/dist/index.js +46 -0
package/dist/prompt-budget.d.ts +2 -0
package/dist/prompt-budget.d.ts.map +1 -0
package/dist/prompt-budget.js +16 -0
package/dist/prompt-builder.d.ts +21 -0
package/dist/prompt-builder.d.ts.map +1 -0
package/dist/prompt-builder.js +267 -0
package/dist/reconcile.d.ts +17 -0
package/dist/reconcile.d.ts.map +1 -0
package/dist/reconcile.js +290 -0
package/dist/relevance.d.ts +73 -0
package/dist/relevance.d.ts.map +1 -0
package/dist/relevance.js +266 -0
package/dist/schema.d.ts +293 -0
package/dist/schema.d.ts.map +1 -0
package/dist/schema.js +50 -0
package/dist/types.d.ts +81 -0
package/dist/types.d.ts.map +1 -0
package/dist/types.js +6 -0
package/package.json +39 -0
package/src/prompt.md +360 -0

package/dist/reconcile.d.ts ADDED Viewed

@@ -0,0 +1,17 @@
+import type { Addition, AnalysisResult, Contradiction, DocFile } from './types.js';
+export interface LocatedRange {
+    start: number;
+    end: number;
+}
+export declare function locateQuote(quote: string, docBody: string, frontMatterLineCount: number): LocatedRange | null;
+export type WarnFn = (message: string) => void;
+export declare function filterActionableContradictions(contradictions: Contradiction[], onWarn?: WarnFn): {
+    kept: Contradiction[];
+    narrativeOnly: Contradiction[];
+};
+export declare function dedupeOverlappingContradictions(contradictions: Contradiction[], onWarn?: WarnFn): Contradiction[];
+export declare function locateAnchorHeading(anchorSection: string, docBody: string, frontMatterLineCount: number): number | null;
+export declare function reconcileAdditiveAnchors(additions: Addition[], docs: DocFile[], onWarn?: WarnFn): Addition[];
+export declare function reconcileLineNumbers(contradictions: Contradiction[], docs: DocFile[], onWarn?: WarnFn): Contradiction[];
+export declare function validateAndReconcile(rawJson: unknown, docs: DocFile[], onWarn?: WarnFn): AnalysisResult;
+//# sourceMappingURL=reconcile.d.ts.map

package/dist/reconcile.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"reconcile.d.ts","sourceRoot":"","sources":["../src/reconcile.ts"],"names":[],"mappings":"AAyBA,OAAO,KAAK,EAAE,QAAQ,EAAE,cAAc,EAAE,aAAa,EAAE,OAAO,EAAE,MAAM,YAAY,CAAA;AAgBlF,MAAM,WAAW,YAAY;IAI3B,KAAK,EAAE,MAAM,CAAA;IACb,GAAG,EAAE,MAAM,CAAA;CACZ;AAOD,wBAAgB,WAAW,CACzB,KAAK,EAAE,MAAM,EACb,OAAO,EAAE,MAAM,EACf,oBAAoB,EAAE,MAAM,GAC3B,YAAY,GAAG,IAAI,CAsBrB;AAED,MAAM,MAAM,MAAM,GAAG,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,CAAA;AA0B9C,wBAAgB,8BAA8B,CAC5C,cAAc,EAAE,aAAa,EAAE,EAC/B,MAAM,GAAE,MAAiB,GACxB;IAAE,IAAI,EAAE,aAAa,EAAE,CAAC;IAAC,aAAa,EAAE,aAAa,EAAE,CAAA;CAAE,CAqB3D;AAuCD,wBAAgB,+BAA+B,CAC7C,cAAc,EAAE,aAAa,EAAE,EAC/B,MAAM,GAAE,MAAiB,GACxB,aAAa,EAAE,CAmCjB;AAUD,wBAAgB,mBAAmB,CACjC,aAAa,EAAE,MAAM,EACrB,OAAO,EAAE,MAAM,EACf,oBAAoB,EAAE,MAAM,GAC3B,MAAM,GAAG,IAAI,CAmBf;AAMD,wBAAgB,wBAAwB,CACtC,SAAS,EAAE,QAAQ,EAAE,EACrB,IAAI,EAAE,OAAO,EAAE,EACf,MAAM,GAAE,MAAiB,GACxB,QAAQ,EAAE,CAyBZ;AAOD,wBAAgB,oBAAoB,CAClC,cAAc,EAAE,aAAa,EAAE,EAC/B,IAAI,EAAE,OAAO,EAAE,EACf,MAAM,GAAE,MAAiB,GACxB,aAAa,EAAE,CA2BjB;AAeD,wBAAgB,oBAAoB,CAClC,OAAO,EAAE,OAAO,EAChB,IAAI,EAAE,OAAO,EAAE,EACf,MAAM,GAAE,MAAiB,GACxB,cAAc,CAkChB"}

package/dist/reconcile.js ADDED Viewed

@@ -0,0 +1,290 @@
+// Story 3.9b — orchestrator-side line-range reconciliation.
+//
+// architecture.md L268-269 — "Guardrail 2 — Citation Grounding (deterministic):
+// every cited evidence excerpt must appear verbatim (CRLF/whitespace
+// normalized) in the input docs. Findings whose citations don't ground are
+// downgraded from DRIFT to NEEDS CLARIFICATION (no-fabrication principle)."
+//
+// MVP implementation: drop ungrounded findings (instead of synthesising a
+// clarification). The clarification leg is unreachable in v1 (the orchestrator
+// only emits drift) and surfacing a synthesised clarification with no
+// concrete proposed_replacement re-introduces the noise this guardrail exists
+// to suppress. Operators see drops via the `core.warning` callback so the
+// silent-drop rate is observable in the Actions log.
+//
+// Mechanism:
+//   1. The LLM saw doc lines prefixed with absolute line numbers (Story 3.9b
+//      Task 3) and emitted both `targetLineStart` / `targetLineEnd` AND a
+//      verbatim `quotedDocText` excerpt.
+//   2. We `indexOf` the quote in the doc body (after CRLF→LF + per-line
+//      trailing-whitespace normalisation).
+//   3. If the quote is found, derive the line range from the match and
+//      overwrite the LLM's claimed range. The code, not the LLM, is the
+//      source of truth for line numbers.
+//   4. If not found, drop the finding.
+import { analysisSchema } from './schema.js';
+// Whitespace normalisation matched to `apps/web/src/server/reviews/compare-forgiving.ts`
+// (Story 4.10): CRLF→LF and per-line trailing-whitespace trim. Keeping the
+// shape symmetric means a quote that grounds in the orchestrator also matches
+// the slice the FR102 commit-splicer reads — divergence here would create a
+// "grounded in analysis but missing at commit-time" failure mode.
+function normaliseLineEndings(text) {
+    return text
+        .replace(/\r\n/g, '\n')
+        .split('\n')
+        .map((line) => line.replace(/[ \t]+$/, ''))
+        .join('\n');
+}
+// Locate the verbatim quote in the doc body. Returns absolute line numbers
+// (offset by `frontMatterLineCount`), or `null` if the quote isn't found.
+// First-match-wins on duplicate occurrences — there's no obvious better tie-
+// breaker without re-introducing the LLM line range as a hint, which defeats
+// the whole point of this reconciliation pass.
+export function locateQuote(quote, docBody, frontMatterLineCount) {
+    const normBody = normaliseLineEndings(docBody);
+    const normQuote = normaliseLineEndings(quote);
+    if (normQuote.length === 0)
+        return null;
+    const idx = normBody.indexOf(normQuote);
+    if (idx === -1)
+        return null;
+    // Convert character offset → 1-indexed body line number.
+    // `bodyStart` = number of newlines in `normBody[0..idx)` + 1.
+    const before = normBody.slice(0, idx);
+    const bodyStart = (before.match(/\n/g)?.length ?? 0) + 1;
+    // The match spans newlines internal to `normQuote`. Body-end line is body-
+    // start plus the count of newlines inside the quote.
+    const quoteNewlineCount = normQuote.match(/\n/g)?.length ?? 0;
+    const bodyEnd = bodyStart + quoteNewlineCount;
+    return {
+        start: bodyStart + frontMatterLineCount,
+        end: bodyEnd + frontMatterLineCount,
+    };
+}
+// Partition drift findings by actionability:
+//   - null or whitespace-only `proposedReplacement` → returned in `narrativeOnly`.
+//     The LLM correctly detected drift but had no concrete doc patch to
+//     suggest (typically because the doc rule is right and the code is the
+//     violation — resolution is to fix code, not docs). These are NOT
+//     apply-eligible but are real findings the user should see; the Skill's
+//     CLI report surfaces them under "Manual review required". The Action's
+//     hosted-review consumers ignore the `narrativeOnly` arm to preserve
+//     Stream 4a auto-resolve semantics (a null-replacement on the hosted
+//     surface would block auto-resolve forever).
+//   - byte-equal to `quotedDocText` after CRLF/whitespace normalisation
+//     → silently dropped. This is genuine no-op noise — a stale-comment
+//     artefact where the LLM senses contradiction on a code comment but the
+//     doc is already correct. Accepting it would produce no diff. Drop is
+//     logged via `onWarn` for observability but the finding is discarded
+//     (not surfaced as narrative-only) because there is nothing to convey
+//     to the user — the doc and the proposed wording are the same string.
+//
+// Drops in the byte-equal branch are surfaced via `onWarn` (operator-
+// observable in the Actions log) for the same reason `reconcileLineNumbers`
+// warns on ungrounded drops — silent-drop rate is the only signal that the
+// LLM is producing this shape. Null/whitespace drops do NOT warn — they
+// flow through `narrativeOnly` instead, which is the operator-observable
+// surface for that case.
+export function filterActionableContradictions(contradictions, onWarn = () => { }) {
+    const kept = [];
+    const narrativeOnly = [];
+    for (const c of contradictions) {
+        if (c.proposedReplacement === null || c.proposedReplacement.trim() === '') {
+            narrativeOnly.push(c);
+            continue;
+        }
+        if (normaliseLineEndings(c.proposedReplacement) ===
+            normaliseLineEndings(c.quotedDocText)) {
+            const preview = c.quotedDocText.slice(0, 80).replace(/\n/g, ' ');
+            onWarn(`Reconciliation dropped finding for "${c.targetDocPath}" — proposedReplacement is byte-equal to quotedDocText (no-op; preview: "${preview}${c.quotedDocText.length > 80 ? '…' : ''}").`);
+            continue;
+        }
+        kept.push(c);
+    }
+    return { kept, narrativeOnly };
+}
+// Drop overlapping contradictions before they ship to the platform. The
+// downstream splicer (`findOverlappingRanges` in splice-helpers.ts) rejects
+// the entire Approve-and-Commit batch when any two findings on the same doc
+// share an overlapping `[targetLineStart..targetLineEnd]` range — by design,
+// because applying two replacements to the same span clobbers each other.
+//
+// Observed Sonnet-4.6 behaviour: when a section block contains multiple
+// conceptual drifts (e.g. the 3-line TanStack stack block where each line is
+// a separate drift target), the model emits N findings ALL targeting the
+// same line range, each with its own proposed replacement. The reviewer
+// hits "Approve and Commit" → splicer rejects the whole batch. The result
+// is a dead-end for the demo.
+//
+// Strategy: keep the highest-confidence finding per overlap group; drop the
+// rest with a warning so operators see the silent-drop rate in the Actions
+// log. Tie-break by first-seen so behaviour is deterministic for fixtures.
+//
+// This is a SAFETY NET, not a model fix. The prompt instructs the LLM to
+// emit one consolidated finding per overlapping span, but compliance is
+// best-effort. The dedup pass guarantees the platform never receives
+// findings that the splicer will reject.
+//
+// Story 4.26 AC6 (Path B) — additive findings are NOT deduped here. The
+// canonical gate is `findOverlappingRanges` in `apps/web/src/server/reviews/
+// lib/splice-helpers.ts`, which runs server-side after FR88d and before any
+// DB write. Its tests at `apps/web/src/server/reviews/lib/__tests__/
+// splice-helpers.test.ts` (Story 4.25 — "Story 4.25 — additive findings
+// expand the conflict taxonomy" describe block at L396+) cover the three
+// additive overlap classes from the AC2 prompt rewrite:
+//   1. additive anchor inside a drift range — rejected
+//   2. additive anchors at the same line with the SAME insertionMode —
+//      rejected (ambiguous)
+//   3. additive anchors at the same line with DIFFERENT insertionMode
+//      values ('before' vs 'after') — accepted (deterministic before/after)
+// Adding a sibling additive-dedup pass at the orchestrator was considered
+// (AC6 Path A) but rejected for the lower-lift / single-canonical-gate
+// posture — the splicer already catches every overlap the LLM can emit.
+export function dedupeOverlappingContradictions(contradictions, onWarn = () => { }) {
+    // Group by doc path so overlap is only considered within a single file.
+    const byDoc = new Map();
+    for (const c of contradictions) {
+        const list = byDoc.get(c.targetDocPath) ?? [];
+        list.push(c);
+        byDoc.set(c.targetDocPath, list);
+    }
+    const kept = [];
+    for (const [, docFindings] of byDoc) {
+        // Sort by confidence desc, then by original index (stable tie-break).
+        const indexed = docFindings.map((c, i) => ({ c, i }));
+        indexed.sort((a, b) => {
+            if (b.c.confidence !== a.c.confidence)
+                return b.c.confidence - a.c.confidence;
+            return a.i - b.i;
+        });
+        const docKept = [];
+        for (const { c } of indexed) {
+            const overlap = docKept.find((k) => c.targetLineStart <= k.targetLineEnd && c.targetLineEnd >= k.targetLineStart);
+            if (overlap) {
+                onWarn(`Reconciliation dropped overlapping finding on "${c.targetDocPath}" lines ${c.targetLineStart}-${c.targetLineEnd} — already covered by a higher-confidence finding at lines ${overlap.targetLineStart}-${overlap.targetLineEnd}.`);
+                continue;
+            }
+            docKept.push(c);
+        }
+        kept.push(...docKept);
+    }
+    return kept;
+}
+// Story 4.25 — locate an anchor section heading in the doc body and return
+// its absolute line number. Mirrors `locateQuote`'s contract (1-indexed,
+// offset by frontMatterLineCount, first-match-wins) but searches for a
+// markdown heading line whose visible text equals the anchor section. We
+// match any line that strips down to the anchor text after removing leading
+// `#` markers and surrounding whitespace — that lets the LLM emit
+// "Technology Stack & Versions" and ground against `## Technology Stack &
+// Versions` in the doc.
+export function locateAnchorHeading(anchorSection, docBody, frontMatterLineCount) {
+    const normBody = normaliseLineEndings(docBody);
+    const wantedText = anchorSection.trim();
+    if (wantedText.length === 0)
+        return null;
+    const lines = normBody.split('\n');
+    const HEADING_RE = /^\s*#{1,6}\s/;
+    for (let i = 0; i < lines.length; i++) {
+        const line = lines[i];
+        // Story 4.25 code-review fix: require an actual heading prefix. `replace`
+        // returns the line unchanged when the regex doesn't match, so without this
+        // gate any prose line whose trimmed text equals the anchor would match
+        // (e.g. a TOC entry rendered as the bare section name).
+        if (!HEADING_RE.test(line))
+            continue;
+        const stripped = line.replace(/^\s*#{1,6}\s*/, '').trim();
+        if (stripped === wantedText) {
+            return i + 1 + frontMatterLineCount;
+        }
+    }
+    return null;
+}
+// Story 4.25 — drop additive findings whose anchor heading can't be located
+// in the doc body (no-fabrication invariant); overwrite `anchorLine` with
+// the located absolute line number for findings whose anchor IS located.
+// The LLM emits a section heading text; the orchestrator derives the line.
+export function reconcileAdditiveAnchors(additions, docs, onWarn = () => { }) {
+    const kept = [];
+    for (const a of additions) {
+        const doc = docs.find((d) => d.path === a.targetDocPath);
+        if (!doc) {
+            onWarn(`Reconciliation dropped additive finding for unknown doc path "${a.targetDocPath}" (no doc with this path was analysed).`);
+            continue;
+        }
+        const lineNumber = locateAnchorHeading(a.anchorSection, doc.content, doc.frontMatterLineCount);
+        if (lineNumber === null) {
+            const preview = a.anchorSection.slice(0, 80);
+            onWarn(`Reconciliation dropped additive finding for "${a.targetDocPath}" — anchor section heading not found in doc body (preview: "${preview}${a.anchorSection.length > 80 ? '…' : ''}").`);
+            continue;
+        }
+        kept.push({ ...a, anchorLine: lineNumber });
+    }
+    return kept;
+}
+// Reconcile each contradiction's line range against the doc body via
+// `locateQuote(quotedDocText)`. Findings whose quote can't be located are
+// dropped (returned list is shorter than input). Findings whose quote is
+// located have their `targetLineStart` / `targetLineEnd` overwritten — the
+// LLM's emitted numbers become advisory.
+export function reconcileLineNumbers(contradictions, docs, onWarn = () => { }) {
+    const kept = [];
+    for (const c of contradictions) {
+        const doc = docs.find((d) => d.path === c.targetDocPath);
+        if (!doc) {
+            onWarn(`Reconciliation dropped finding for unknown doc path "${c.targetDocPath}" (no doc with this path was analysed).`);
+            continue;
+        }
+        const located = locateQuote(c.quotedDocText, doc.content, doc.frontMatterLineCount);
+        if (located === null) {
+            // First 80 chars of the quote — never log the full quote (could be
+            // arbitrary doc content; in public repos the Actions log is public).
+            const preview = c.quotedDocText.slice(0, 80).replace(/\n/g, ' ');
+            onWarn(`Reconciliation dropped finding for "${c.targetDocPath}" — quotedDocText not found in doc body (preview: "${preview}${c.quotedDocText.length > 80 ? '…' : ''}").`);
+            continue;
+        }
+        kept.push({
+            ...c,
+            targetLineStart: located.start,
+            targetLineEnd: located.end,
+        });
+    }
+    return kept;
+}
+// Public surface — composed reconciliation entry point for the analysis core.
+// Validates the LLM's raw JSON output against `analysisSchema`, then runs the
+// four-stage reconciliation pipeline (line-number grounding, actionability
+// filter, overlap dedup, additive-anchor grounding) in the same order the
+// Action's orchestrator used pre-extraction.
+//
+// Throws on schema-validation failure (`analysisSchema.parse` throws). The
+// `@delfini/cli` `local-finalize` command keys exit-code 3 off this throw via
+// FR145's one-retry contract.
+//
+// `onWarn` defaults to a no-op. The Action wires `core.warning` so dropped
+// findings (ungrounded quotes, unactionable replacements, overlap losers,
+// missing additive anchors) show up in the Actions log.
+export function validateAndReconcile(rawJson, docs, onWarn = () => { }) {
+    const parsed = analysisSchema.parse(rawJson);
+    const reconciledContradictions = reconcileLineNumbers(parsed.contradictions, docs, onWarn);
+    const { kept: actionable, narrativeOnly } = filterActionableContradictions(reconciledContradictions, onWarn);
+    const dedupedContradictions = dedupeOverlappingContradictions(actionable, onWarn);
+    const reconciledAdditions = reconcileAdditiveAnchors(parsed.additions, docs, onWarn);
+    // narrativeOnly is intentionally NOT deduped: it never enters the splicer,
+    // so overlap doesn't matter, and dropping a narrative-only entry that
+    // overlaps with an apply-eligible one would hide useful context from the
+    // user (the two findings can describe the same drift from different angles).
+    //
+    // `narrativeOnlyContradictions` is conditionally spread (only when non-
+    // empty) to keep the optional-field semantics honest: consumers that
+    // predate this field (notably apps/action's hosted-review path) see no
+    // change in result shape for the common case, and their `toEqual`-style
+    // fixture comparisons stay stable. CLI consumers use `?? []` so the
+    // undefined case is handled identically to `[]`.
+    return {
+        contradictions: dedupedContradictions,
+        additions: reconciledAdditions,
+        rawConfidence: parsed.rawConfidence,
+        ...(narrativeOnly.length > 0 ? { narrativeOnlyContradictions: narrativeOnly } : {}),
+    };
+}

package/dist/relevance.d.ts ADDED Viewed

@@ -0,0 +1,73 @@
+import type { DocFile } from './types.js';
+export interface DocRelevanceScore {
+    path: string;
+    score: number;
+    breakdown: {
+        docPathInDiff: number;
+        fileOverlap: number;
+        identifierOverlap: number;
+        headingOverlap: number;
+    };
+}
+export interface SelectRelevantDocsResult {
+    kept: DocFile[];
+    dropped: DocRelevanceScore[];
+}
+export declare function scoreDocRelevance(doc: DocFile, diff: string): DocRelevanceScore;
+export interface DocSection {
+    lines: string[];
+    startLineIndex: number;
+}
+export interface DroppedSection {
+    startLineIndex: number;
+    score: number;
+    /**
+     * Set by `rankedFillSections` (Story P3.7.3) when the dropped record is
+     * cross-doc (ranked-fill operates over a flat candidate list from any
+     * number of docs). `selectRelevantSections` operates per-doc and leaves
+     * this undefined — its caller already knows which doc the section came
+     * from.
+     */
+    docPath?: string;
+}
+export interface SelectRelevantSectionsResult {
+    kept: DocSection[];
+    dropped: DroppedSection[];
+}
+export declare function splitIntoSections(content: string): DocSection[];
+/**
+ * Select the heading-delimited sections of a single doc whose relevance score
+ * is at/above `threshold`. Mirrors `selectRelevantDocs`:
+ *   - inclusive lower bound (`score >= threshold` keeps the section)
+ *   - `threshold <= 0` / non-finite → keep every section (no-op fast-path)
+ *
+ * Each section reuses the same four signals as `scoreDocRelevance`, applied to
+ * the section content — EXCEPT `docPathInDiff`, which is a whole-document
+ * property (the doc itself was edited in the diff). It is computed once per doc
+ * and added to every section's score, so a doc that appears in the diff header
+ * is retained whole.
+ */
+export declare function selectRelevantSections(doc: DocFile, diff: string, threshold: number): SelectRelevantSectionsResult;
+export interface RankedFillCandidate {
+    doc: DocFile;
+    section: DocSection;
+    score: number;
+}
+export interface RankedFillResult {
+    included: RankedFillCandidate[];
+    dropped: RankedFillCandidate[];
+}
+export declare function rankedFillSections(candidates: RankedFillCandidate[], budgetTokens: number, measure: (candidate: RankedFillCandidate) => number): RankedFillResult;
+/**
+ * Filter docs by relevance score against the diff. Docs with
+ * `score >= threshold` are kept; docs below are dropped.
+ *
+ * The threshold is INCLUSIVE on the lower bound — a doc scoring exactly the
+ * threshold value is kept, not dropped.
+ *
+ * Fast-path: `threshold <= 0` or non-finite (NaN, Infinity) returns every
+ * doc in `kept` with `dropped` empty. This makes the function observably
+ * no-op for the default `buildPrompt` call path (NFR44 parity).
+ */
+export declare function selectRelevantDocs(docs: DocFile[], diff: string, threshold: number): SelectRelevantDocsResult;
+//# sourceMappingURL=relevance.d.ts.map

package/dist/relevance.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"relevance.d.ts","sourceRoot":"","sources":["../src/relevance.ts"],"names":[],"mappings":"AAUA,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,YAAY,CAAA;AAEzC,MAAM,WAAW,iBAAiB;IAChC,IAAI,EAAE,MAAM,CAAA;IACZ,KAAK,EAAE,MAAM,CAAA;IACb,SAAS,EAAE;QACT,aAAa,EAAE,MAAM,CAAA;QACrB,WAAW,EAAE,MAAM,CAAA;QACnB,iBAAiB,EAAE,MAAM,CAAA;QACzB,cAAc,EAAE,MAAM,CAAA;KACvB,CAAA;CACF;AAED,MAAM,WAAW,wBAAwB;IACvC,IAAI,EAAE,OAAO,EAAE,CAAA;IACf,OAAO,EAAE,iBAAiB,EAAE,CAAA;CAC7B;AAED,wBAAgB,iBAAiB,CAAC,GAAG,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,GAAG,iBAAiB,CAe/E;AAoHD,MAAM,WAAW,UAAU;IACzB,KAAK,EAAE,MAAM,EAAE,CAAA;IACf,cAAc,EAAE,MAAM,CAAA;CACvB;AAED,MAAM,WAAW,cAAc;IAC7B,cAAc,EAAE,MAAM,CAAA;IACtB,KAAK,EAAE,MAAM,CAAA;IACb;;;;;;OAMG;IACH,OAAO,CAAC,EAAE,MAAM,CAAA;CACjB;AAED,MAAM,WAAW,4BAA4B;IAC3C,IAAI,EAAE,UAAU,EAAE,CAAA;IAClB,OAAO,EAAE,cAAc,EAAE,CAAA;CAC1B;AAQD,wBAAgB,iBAAiB,CAAC,OAAO,EAAE,MAAM,GAAG,UAAU,EAAE,CAe/D;AAED;;;;;;;;;;;GAWG;AACH,wBAAgB,sBAAsB,CACpC,GAAG,EAAE,OAAO,EACZ,IAAI,EAAE,MAAM,EACZ,SAAS,EAAE,MAAM,GAChB,4BAA4B,CAwB9B;AAmBD,MAAM,WAAW,mBAAmB;IAClC,GAAG,EAAE,OAAO,CAAA;IACZ,OAAO,EAAE,UAAU,CAAA;IACnB,KAAK,EAAE,MAAM,CAAA;CACd;AAED,MAAM,WAAW,gBAAgB;IAC/B,QAAQ,EAAE,mBAAmB,EAAE,CAAA;IAC/B,OAAO,EAAE,mBAAmB,EAAE,CAAA;CAC/B;AAED,wBAAgB,kBAAkB,CAChC,UAAU,EAAE,mBAAmB,EAAE,EACjC,YAAY,EAAE,MAAM,EACpB,OAAO,EAAE,CAAC,SAAS,EAAE,mBAAmB,KAAK,MAAM,GAClD,gBAAgB,CAyDlB;AAED;;;;;;;;;;GAUG;AACH,wBAAgB,kBAAkB,CAChC,IAAI,EAAE,OAAO,EAAE,EACf,IAAI,EAAE,MAAM,EACZ,SAAS,EAAE,MAAM,GAChB,wBAAwB,CAe1B"}

package/dist/relevance.js ADDED Viewed

@@ -0,0 +1,266 @@
+// packages/drift-engine/src/relevance.ts
+//
+// Doc-relevance scoring + selection. Pure-logic — no I/O, no clock, no
+// randomness. Consumed by `buildPrompt` when `BuildPromptOptions.relevanceThreshold`
+// is set; bypassed entirely at default (NFR44 parity preserved).
+//
+// Internal helper — not exposed via `index.ts`. Tests reach it via the
+// relative `../src/relevance.js` import, same convention as `reconcile`
+// internal helpers.
+export function scoreDocRelevance(doc, diff) {
+    const diffFilePaths = extractDiffFilePaths(diff);
+    const diffIdentifiers = extractIdentifiers(diff);
+    const breakdown = {
+        docPathInDiff: scoreDocPathInDiff(doc.path, diff),
+        fileOverlap: scoreFileOverlap(doc.content, diffFilePaths),
+        identifierOverlap: scoreIdentifierOverlap(doc.content, diffIdentifiers),
+        headingOverlap: scoreHeadingOverlap(doc.content, diffIdentifiers),
+    };
+    const score = breakdown.docPathInDiff +
+        breakdown.fileOverlap +
+        breakdown.identifierOverlap +
+        breakdown.headingOverlap;
+    return { path: doc.path, score, breakdown };
+}
+function scoreHeadingOverlap(docContent, diffIdentifiers) {
+    let score = 0;
+    const lines = docContent.split(/\r?\n/);
+    for (const line of lines) {
+        if (!/^#{1,6}\s+/.test(line))
+            continue;
+        const headingIdents = extractIdentifiers(line);
+        for (const ident of headingIdents) {
+            if (diffIdentifiers.has(ident)) {
+                score += 5;
+                break; // count each heading at most once
+            }
+        }
+    }
+    return score;
+}
+function scoreDocPathInDiff(docPath, diff) {
+    // `diff --git a/<path> b/<path>` is the canonical header. Match either side.
+    const escaped = docPath.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
+    const pattern = new RegExp(`diff --git [ab]/${escaped} `, 'm');
+    return pattern.test(diff) ? 20 : 0;
+}
+// Extracts file paths from `diff --git a/<path> b/<path>` headers where both
+// sides match. Known limitations (deliberate scope for Tier 2):
+//   - Renames are NOT extracted (`a/old.ts b/new.ts` — sides differ)
+//   - Binary-file headers ARE matched on the diff line (harmless — binary
+//     paths rarely appear verbatim in .md docs)
+//   - Paths containing whitespace (quoted in git's diff format) are truncated
+//     at the first space; out of scope for Tier 2
+function extractDiffFilePaths(diff) {
+    const paths = new Set();
+    const pattern = /^diff --git a\/(\S+) b\/\1/gm;
+    let match;
+    while ((match = pattern.exec(diff)) !== null) {
+        paths.add(match[1]);
+    }
+    return paths;
+}
+// Common keywords we filter out to avoid false positives. Not an exhaustive
+// list — just the highest-frequency offenders. The 3-char minimum already
+// drops `if`, `do`, `or`, `is`, etc.
+const COMMON_KEYWORDS = new Set([
+    'the', 'and', 'for', 'with', 'this', 'that', 'from', 'into', 'have',
+    'has', 'are', 'was', 'were', 'will', 'can', 'not', 'but', 'all', 'any',
+    'one', 'two', 'use', 'add', 'get', 'set', 'put', 'new', 'old', 'now',
+    'let', 'var', 'const', 'function', 'return', 'import', 'export',
+    'true', 'false', 'null', 'undefined', 'void', 'string', 'number',
+    'boolean', 'object', 'array', 'type', 'interface',
+]);
+function extractIdentifiers(text) {
+    // Match camelCase / PascalCase / snake_case / kebab-case tokens of length 3+.
+    // The character class includes `-`, so kebab tokens like `some-token` are
+    // captured whole (one identifier, not two). The token must START with a
+    // letter or underscore — a leading `-` is rejected (so `-flag` is not an
+    // identifier; the `-` is a boundary on the front but not in the middle).
+    const pattern = /[a-zA-Z_][a-zA-Z0-9_-]{2,}/g;
+    const out = new Set();
+    let match;
+    while ((match = pattern.exec(text)) !== null) {
+        const token = match[0];
+        if (!COMMON_KEYWORDS.has(token.toLowerCase())) {
+            out.add(token);
+        }
+    }
+    return out;
+}
+function scoreIdentifierOverlap(docContent, diffIdentifiers) {
+    let count = 0;
+    // Substring match is intentional here (unlike `scoreFileOverlap` which is
+    // boundary-aware). A doc mentioning `processPayment` is plausibly relevant
+    // to a change touching `process`. The +30 cap bounds noise from this.
+    for (const ident of diffIdentifiers) {
+        if (docContent.includes(ident)) {
+            count += 1;
+        }
+    }
+    return Math.min(count * 3, 30);
+}
+function scoreFileOverlap(docContent, diffFilePaths) {
+    let score = 0;
+    for (const filePath of diffFilePaths) {
+        const escaped = filePath.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
+        // Boundary check: filePath must not be immediately followed by a word
+        // character, `/`, `-`, or a `.` that itself continues into another word
+        // character (extension continuation like `.map`). This prevents
+        // `src/pay.ts` from spuriously matching `src/pay.ts.map`,
+        // `src/payments`, or `src/pay.ts-old`, while still matching
+        // `src/auth.ts.` at sentence end (period is not followed by `\w`).
+        const pattern = new RegExp(`${escaped}(?![\\w/-])(?!\\.\\w)`);
+        if (pattern.test(docContent)) {
+            score += 10;
+        }
+    }
+    return score;
+}
+// Split a doc body into heading-delimited sections. Splitting on the single
+// char `\n` (not `/\r?\n/`) is lossless — `sections.flatMap(s => s.lines)
+// .join('\n')` reconstructs `content` byte-for-byte (a CRLF line keeps its
+// trailing `\r` as the last char of its line element). Content before the
+// first heading is a leading section; an all-body doc is one leading section;
+// empty content is one empty section. Nothing is dropped on the floor.
+export function splitIntoSections(content) {
+    const lines = content.split('\n');
+    const sections = [];
+    let current = null;
+    lines.forEach((line, i) => {
+        if (/^#{1,6}\s+/.test(line)) {
+            if (current)
+                sections.push(current);
+            current = { lines: [line], startLineIndex: i };
+        }
+        else {
+            if (!current)
+                current = { lines: [], startLineIndex: i };
+            current.lines.push(line);
+        }
+    });
+    if (current)
+        sections.push(current);
+    return sections;
+}
+/**
+ * Select the heading-delimited sections of a single doc whose relevance score
+ * is at/above `threshold`. Mirrors `selectRelevantDocs`:
+ *   - inclusive lower bound (`score >= threshold` keeps the section)
+ *   - `threshold <= 0` / non-finite → keep every section (no-op fast-path)
+ *
+ * Each section reuses the same four signals as `scoreDocRelevance`, applied to
+ * the section content — EXCEPT `docPathInDiff`, which is a whole-document
+ * property (the doc itself was edited in the diff). It is computed once per doc
+ * and added to every section's score, so a doc that appears in the diff header
+ * is retained whole.
+ */
+export function selectRelevantSections(doc, diff, threshold) {
+    const sections = splitIntoSections(doc.content);
+    if (!Number.isFinite(threshold) || threshold <= 0) {
+        return { kept: sections, dropped: [] };
+    }
+    const diffFilePaths = extractDiffFilePaths(diff);
+    const diffIdentifiers = extractIdentifiers(diff);
+    const docPathScore = scoreDocPathInDiff(doc.path, diff);
+    const kept = [];
+    const dropped = [];
+    for (const section of sections) {
+        const sectionContent = section.lines.join('\n');
+        const score = docPathScore +
+            scoreFileOverlap(sectionContent, diffFilePaths) +
+            scoreIdentifierOverlap(sectionContent, diffIdentifiers) +
+            scoreHeadingOverlap(sectionContent, diffIdentifiers);
+        if (score >= threshold) {
+            kept.push(section);
+        }
+        else {
+            dropped.push({ startLineIndex: section.startLineIndex, score });
+        }
+    }
+    return { kept, dropped };
+}
+export function rankedFillSections(candidates, budgetTokens, measure) {
+    // Non-positive / non-finite budget → no constraint expressed; include
+    // everything (matches the threshold fast-path semantics elsewhere in this
+    // module). A budget of 0 means "include nothing fits at non-zero cost" but
+    // we still apply the inclusion rule per-candidate, so a measure() returning
+    // 0 for every candidate would still keep them all — deliberate.
+    if (!Number.isFinite(budgetTokens) || budgetTokens <= 0) {
+        return { included: [...candidates], dropped: [] };
+    }
+    // Tag with original index so we can restore input order on the dropped side.
+    const indexed = candidates.map((candidate, originalIndex) => ({
+        candidate,
+        originalIndex,
+    }));
+    // Stable, deterministic ranking. Score DESC, then docPath ASC, then
+    // startLineIndex ASC. Equal-score sections from the same doc sort by line.
+    //
+    // Path comparison is CODEPOINT-based (`<` / `>`), NOT `localeCompare`:
+    // `localeCompare` with no locale argument uses the host's default ICU
+    // collation, so identical inputs could rank differently across machines
+    // (different `LANG` / ICU build) — a determinism violation in a pure-logic
+    // engine whose charter (NFR44) is byte-for-byte reproducibility.
+    const ranked = [...indexed].sort((a, b) => {
+        if (b.candidate.score !== a.candidate.score) {
+            return b.candidate.score - a.candidate.score;
+        }
+        const pa = a.candidate.doc.path;
+        const pb = b.candidate.doc.path;
+        if (pa !== pb)
+            return pa < pb ? -1 : 1;
+        return a.candidate.section.startLineIndex - b.candidate.section.startLineIndex;
+    });
+    const includedFlags = new Array(candidates.length).fill(false);
+    let runningTokens = 0;
+    for (const entry of ranked) {
+        const cost = measure(entry.candidate);
+        if (runningTokens + cost <= budgetTokens) {
+            runningTokens += cost;
+            includedFlags[entry.originalIndex] = true;
+        }
+        // Else: skip this candidate. Do NOT break — a smaller later candidate
+        // may still fit ("first-fit decreasing"-style packing). Deterministic
+        // and a strict improvement over "stop at first overflow."
+    }
+    const included = [];
+    const dropped = [];
+    for (let i = 0; i < candidates.length; i++) {
+        if (includedFlags[i]) {
+            included.push(candidates[i]);
+        }
+        else {
+            dropped.push(candidates[i]);
+        }
+    }
+    return { included, dropped };
+}
+/**
+ * Filter docs by relevance score against the diff. Docs with
+ * `score >= threshold` are kept; docs below are dropped.
+ *
+ * The threshold is INCLUSIVE on the lower bound — a doc scoring exactly the
+ * threshold value is kept, not dropped.
+ *
+ * Fast-path: `threshold <= 0` or non-finite (NaN, Infinity) returns every
+ * doc in `kept` with `dropped` empty. This makes the function observably
+ * no-op for the default `buildPrompt` call path (NFR44 parity).
+ */
+export function selectRelevantDocs(docs, diff, threshold) {
+    if (!Number.isFinite(threshold) || threshold <= 0) {
+        return { kept: [...docs], dropped: [] };
+    }
+    const kept = [];
+    const dropped = [];
+    for (const doc of docs) {
+        const scored = scoreDocRelevance(doc, diff);
+        if (scored.score >= threshold) {
+            kept.push(doc);
+        }
+        else {
+            dropped.push(scored);
+        }
+    }
+    return { kept, dropped };
+}