@phren/cli 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +590 -0
  3. package/mcp/dist/capabilities/cli.js +61 -0
  4. package/mcp/dist/capabilities/index.js +15 -0
  5. package/mcp/dist/capabilities/mcp.js +61 -0
  6. package/mcp/dist/capabilities/types.js +57 -0
  7. package/mcp/dist/capabilities/vscode.js +61 -0
  8. package/mcp/dist/capabilities/web-ui.js +61 -0
  9. package/mcp/dist/cli-actions.js +302 -0
  10. package/mcp/dist/cli-config.js +580 -0
  11. package/mcp/dist/cli-extract.js +305 -0
  12. package/mcp/dist/cli-govern.js +371 -0
  13. package/mcp/dist/cli-graph.js +169 -0
  14. package/mcp/dist/cli-hooks-citations.js +44 -0
  15. package/mcp/dist/cli-hooks-context.js +56 -0
  16. package/mcp/dist/cli-hooks-globs.js +83 -0
  17. package/mcp/dist/cli-hooks-output.js +130 -0
  18. package/mcp/dist/cli-hooks-retrieval.js +2 -0
  19. package/mcp/dist/cli-hooks-session.js +1402 -0
  20. package/mcp/dist/cli-hooks.js +350 -0
  21. package/mcp/dist/cli-namespaces.js +989 -0
  22. package/mcp/dist/cli-ops.js +253 -0
  23. package/mcp/dist/cli-search.js +407 -0
  24. package/mcp/dist/cli.js +108 -0
  25. package/mcp/dist/content-archive.js +278 -0
  26. package/mcp/dist/content-citation.js +391 -0
  27. package/mcp/dist/content-dedup.js +622 -0
  28. package/mcp/dist/content-learning.js +472 -0
  29. package/mcp/dist/content-metadata.js +186 -0
  30. package/mcp/dist/content-validate.js +462 -0
  31. package/mcp/dist/core-finding.js +54 -0
  32. package/mcp/dist/core-project.js +36 -0
  33. package/mcp/dist/core-search.js +50 -0
  34. package/mcp/dist/data-access.js +400 -0
  35. package/mcp/dist/data-tasks.js +821 -0
  36. package/mcp/dist/embedding.js +344 -0
  37. package/mcp/dist/entrypoint.js +387 -0
  38. package/mcp/dist/finding-context.js +172 -0
  39. package/mcp/dist/finding-impact.js +181 -0
  40. package/mcp/dist/finding-journal.js +122 -0
  41. package/mcp/dist/finding-lifecycle.js +259 -0
  42. package/mcp/dist/governance-audit.js +22 -0
  43. package/mcp/dist/governance-locks.js +96 -0
  44. package/mcp/dist/governance-policy.js +648 -0
  45. package/mcp/dist/governance-scores.js +355 -0
  46. package/mcp/dist/hooks.js +449 -0
  47. package/mcp/dist/impact-scoring.js +22 -0
  48. package/mcp/dist/index-query.js +168 -0
  49. package/mcp/dist/index.js +205 -0
  50. package/mcp/dist/init-config.js +336 -0
  51. package/mcp/dist/init-preferences.js +62 -0
  52. package/mcp/dist/init-setup.js +1305 -0
  53. package/mcp/dist/init-shared.js +29 -0
  54. package/mcp/dist/init.js +1730 -0
  55. package/mcp/dist/link-checksums.js +62 -0
  56. package/mcp/dist/link-context.js +257 -0
  57. package/mcp/dist/link-doctor.js +591 -0
  58. package/mcp/dist/link-skills.js +212 -0
  59. package/mcp/dist/link.js +596 -0
  60. package/mcp/dist/logger.js +15 -0
  61. package/mcp/dist/machine-identity.js +38 -0
  62. package/mcp/dist/mcp-config.js +254 -0
  63. package/mcp/dist/mcp-data.js +315 -0
  64. package/mcp/dist/mcp-extract-facts.js +78 -0
  65. package/mcp/dist/mcp-extract.js +133 -0
  66. package/mcp/dist/mcp-finding.js +557 -0
  67. package/mcp/dist/mcp-graph.js +339 -0
  68. package/mcp/dist/mcp-hooks.js +256 -0
  69. package/mcp/dist/mcp-memory.js +58 -0
  70. package/mcp/dist/mcp-ops.js +328 -0
  71. package/mcp/dist/mcp-search.js +628 -0
  72. package/mcp/dist/mcp-session.js +651 -0
  73. package/mcp/dist/mcp-skills.js +189 -0
  74. package/mcp/dist/mcp-tasks.js +551 -0
  75. package/mcp/dist/mcp-types.js +7 -0
  76. package/mcp/dist/memory-ui-assets.js +6 -0
  77. package/mcp/dist/memory-ui-data.js +513 -0
  78. package/mcp/dist/memory-ui-graph.js +1910 -0
  79. package/mcp/dist/memory-ui-page.js +353 -0
  80. package/mcp/dist/memory-ui-scripts.js +1387 -0
  81. package/mcp/dist/memory-ui-server.js +1218 -0
  82. package/mcp/dist/memory-ui-styles.js +555 -0
  83. package/mcp/dist/memory-ui.js +9 -0
  84. package/mcp/dist/package-metadata.js +13 -0
  85. package/mcp/dist/phren-art.js +52 -0
  86. package/mcp/dist/phren-core.js +108 -0
  87. package/mcp/dist/phren-dotenv.js +67 -0
  88. package/mcp/dist/phren-paths.js +476 -0
  89. package/mcp/dist/proactivity.js +172 -0
  90. package/mcp/dist/profile-store.js +228 -0
  91. package/mcp/dist/project-config.js +85 -0
  92. package/mcp/dist/project-locator.js +25 -0
  93. package/mcp/dist/project-topics.js +1134 -0
  94. package/mcp/dist/provider-adapters.js +176 -0
  95. package/mcp/dist/runtime-profile.js +18 -0
  96. package/mcp/dist/session-checkpoints.js +131 -0
  97. package/mcp/dist/session-utils.js +68 -0
  98. package/mcp/dist/shared-content.js +8 -0
  99. package/mcp/dist/shared-embedding-cache.js +143 -0
  100. package/mcp/dist/shared-fragment-graph.js +456 -0
  101. package/mcp/dist/shared-governance.js +4 -0
  102. package/mcp/dist/shared-index.js +1334 -0
  103. package/mcp/dist/shared-ollama.js +192 -0
  104. package/mcp/dist/shared-paths.js +1 -0
  105. package/mcp/dist/shared-retrieval.js +796 -0
  106. package/mcp/dist/shared-search-fallback.js +375 -0
  107. package/mcp/dist/shared-sqljs.js +42 -0
  108. package/mcp/dist/shared-stemmer.js +171 -0
  109. package/mcp/dist/shared-vector-index.js +199 -0
  110. package/mcp/dist/shared.js +114 -0
  111. package/mcp/dist/shell-entry.js +209 -0
  112. package/mcp/dist/shell-input.js +943 -0
  113. package/mcp/dist/shell-palette.js +119 -0
  114. package/mcp/dist/shell-render.js +252 -0
  115. package/mcp/dist/shell-state-store.js +81 -0
  116. package/mcp/dist/shell-types.js +13 -0
  117. package/mcp/dist/shell-view-list.js +14 -0
  118. package/mcp/dist/shell-view.js +707 -0
  119. package/mcp/dist/shell.js +352 -0
  120. package/mcp/dist/skill-files.js +117 -0
  121. package/mcp/dist/skill-registry.js +279 -0
  122. package/mcp/dist/skill-state.js +28 -0
  123. package/mcp/dist/startup-embedding.js +57 -0
  124. package/mcp/dist/status.js +323 -0
  125. package/mcp/dist/synonyms.json +670 -0
  126. package/mcp/dist/task-hygiene.js +251 -0
  127. package/mcp/dist/task-lifecycle.js +347 -0
  128. package/mcp/dist/tasks-github.js +76 -0
  129. package/mcp/dist/telemetry.js +165 -0
  130. package/mcp/dist/test-global-setup.js +37 -0
  131. package/mcp/dist/tool-registry.js +104 -0
  132. package/mcp/dist/update.js +97 -0
  133. package/mcp/dist/utils.js +543 -0
  134. package/package.json +67 -0
  135. package/skills/README.md +7 -0
  136. package/skills/consolidate/SKILL.md +152 -0
  137. package/skills/discover/SKILL.md +175 -0
  138. package/skills/init/SKILL.md +216 -0
  139. package/skills/profiles/SKILL.md +121 -0
  140. package/skills/sync/SKILL.md +261 -0
  141. package/starter/README.md +74 -0
  142. package/starter/global/CLAUDE.md +89 -0
  143. package/starter/global/skills/humanize.md +30 -0
  144. package/starter/global/skills/pipeline.md +35 -0
  145. package/starter/global/skills/release.md +35 -0
  146. package/starter/machines.yaml +8 -0
  147. package/starter/my-api/.claude/skills/README.md +7 -0
  148. package/starter/my-api/CLAUDE.md +33 -0
  149. package/starter/my-api/FINDINGS.md +9 -0
  150. package/starter/my-api/summary.md +7 -0
  151. package/starter/my-api/tasks.md +7 -0
  152. package/starter/my-first-project/.claude/skills/README.md +7 -0
  153. package/starter/my-first-project/CLAUDE.md +49 -0
  154. package/starter/my-first-project/FINDINGS.md +24 -0
  155. package/starter/my-first-project/summary.md +11 -0
  156. package/starter/my-first-project/tasks.md +25 -0
  157. package/starter/my-frontend/.claude/skills/README.md +7 -0
  158. package/starter/my-frontend/CLAUDE.md +33 -0
  159. package/starter/my-frontend/FINDINGS.md +9 -0
  160. package/starter/my-frontend/summary.md +7 -0
  161. package/starter/my-frontend/tasks.md +7 -0
  162. package/starter/profiles/default.yaml +4 -0
  163. package/starter/profiles/personal.yaml +4 -0
  164. package/starter/profiles/work.yaml +4 -0
  165. package/starter/templates/README.md +7 -0
  166. package/starter/templates/frontend/CLAUDE.md +23 -0
  167. package/starter/templates/frontend/FINDINGS.md +7 -0
  168. package/starter/templates/frontend/reference/README.md +4 -0
  169. package/starter/templates/frontend/summary.md +7 -0
  170. package/starter/templates/frontend/tasks.md +11 -0
  171. package/starter/templates/library/CLAUDE.md +22 -0
  172. package/starter/templates/library/FINDINGS.md +7 -0
  173. package/starter/templates/library/reference/README.md +4 -0
  174. package/starter/templates/library/summary.md +7 -0
  175. package/starter/templates/library/tasks.md +11 -0
  176. package/starter/templates/monorepo/CLAUDE.md +21 -0
  177. package/starter/templates/monorepo/FINDINGS.md +7 -0
  178. package/starter/templates/monorepo/reference/README.md +4 -0
  179. package/starter/templates/monorepo/summary.md +7 -0
  180. package/starter/templates/monorepo/tasks.md +11 -0
  181. package/starter/templates/python-project/CLAUDE.md +21 -0
  182. package/starter/templates/python-project/FINDINGS.md +7 -0
  183. package/starter/templates/python-project/reference/README.md +4 -0
  184. package/starter/templates/python-project/summary.md +7 -0
  185. package/starter/templates/python-project/tasks.md +10 -0
@@ -0,0 +1,796 @@
1
+ // shared-retrieval.ts — shared retrieval core used by hooks and MCP search.
2
+ import { getQualityMultiplier, entryScoreKey, } from "./shared-governance.js";
3
+ import { queryDocRows, queryRows, cosineFallback, extractSnippet, getDocSourceKey, getEntityBoostDocs, decodeFiniteNumber, rowToDocWithRowid, } from "./shared-index.js";
4
+ import { filterTrustedFindingsDetailed, } from "./shared-content.js";
5
+ import { parseCitationComment } from "./content-citation.js";
6
+ import { getHighImpactFindings } from "./finding-impact.js";
7
+ import { buildFtsQueryVariants, buildRelaxedFtsQuery, isFeatureEnabled, STOP_WORDS } from "./utils.js";
8
+ import * as fs from "fs";
9
+ import * as path from "path";
10
+ import { getProjectGlobBoost } from "./cli-hooks-globs.js";
11
+ import { vectorFallback } from "./shared-search-fallback.js";
12
+ import { getOllamaUrl, getCloudEmbeddingUrl } from "./shared-ollama.js";
13
+ import { keywordFallbackSearch } from "./core-search.js";
14
+ import { debugLog } from "./shared.js";
15
+ // ── Scoring constants ─────────────────────────────────────────────────────────
16
+ /** Number of docs sampled for token-overlap semantic fallback search. */
17
+ const SEMANTIC_FALLBACK_SAMPLE_LIMIT = 100;
18
+ const SEMANTIC_FALLBACK_WINDOW_COUNT = 4;
19
+ /** Minimum overlap score for a doc to be included in semantic fallback results. */
20
+ const SEMANTIC_OVERLAP_MIN_SCORE = 0.25;
21
+ const VECTOR_FALLBACK_SKIP_COUNT = 3;
22
+ const VECTOR_FALLBACK_STRONG_MATCH_SCORE = 0.2;
23
+ const LOCAL_QUERY_OVERLAP_WEIGHT = 3.5;
24
+ const CROSS_PROJECT_QUERY_OVERLAP_WEIGHT = 1.35;
25
+ const WEAK_CROSS_PROJECT_OVERLAP_MAX = 0.18;
26
+ const WEAK_CROSS_PROJECT_OVERLAP_PENALTY = 0.75;
27
+ const LOW_FOCUS_SNIPPET_SCORE = 0.3;
28
+ const VERY_LOW_FOCUS_SNIPPET_SCORE = 0.14;
29
+ const LOW_FOCUS_SNIPPET_LINE_CAP = 3;
30
+ const LOW_FOCUS_SNIPPET_CHAR_FRACTION = 0.55;
31
+ const TASK_RESCUE_MIN_OVERLAP = 0.3;
32
+ const TASK_RESCUE_OVERLAP_MARGIN = 0.12;
33
+ const TASK_RESCUE_SCORE_MARGIN = 0.6;
34
+ /** Fraction of bullets that must be low-value before applying the low-value penalty. */
35
+ const LOW_VALUE_BULLET_FRACTION = 0.5;
36
+ // ── Intent and scoring helpers ───────────────────────────────────────────────
37
+ export function detectTaskIntent(prompt) {
38
+ const p = prompt.toLowerCase();
39
+ if (/(bug|error|fix|broken|regression|fail|stack trace)/.test(p))
40
+ return "debug";
41
+ if (/(review|audit|pr|pull request|nit|refactor)/.test(p))
42
+ return "review";
43
+ if (/(build|deploy|release|ci|workflow|pipeline|test)/.test(p))
44
+ return "build";
45
+ if (/\b(doc|docs|readme|explain|guide|instructions?)\b/.test(p))
46
+ return "docs";
47
+ return "general";
48
+ }
49
+ function intentBoost(intent, docType) {
50
+ if (intent === "debug" && (docType === "findings" || docType === "reference"))
51
+ return 3;
52
+ if (intent === "review" && (docType === "canonical" || docType === "changelog"))
53
+ return 3;
54
+ if (intent === "build" && (docType === "task" || docType === "reference"))
55
+ return 2;
56
+ if (intent === "docs" && (docType === "summary" || docType === "claude"))
57
+ return 2;
58
+ if (docType === "canonical")
59
+ return 2;
60
+ return 0;
61
+ }
62
+ export function fileRelevanceBoost(filePath, changedFiles) {
63
+ if (changedFiles.size === 0)
64
+ return 0;
65
+ const normalized = filePath.replace(/\\/g, "/");
66
+ const docBasename = path.basename(normalized);
67
+ for (const cf of changedFiles) {
68
+ const n = cf.replace(/\\/g, "/");
69
+ // Exact basename match to avoid 'index.ts' matching 'shared-index.ts'
70
+ if (path.basename(n) === docBasename)
71
+ return 3;
72
+ // Also match if the full changed-file path is a suffix of the doc path
73
+ if (normalized.endsWith(`/${n}`))
74
+ return 3;
75
+ }
76
+ return 0;
77
+ }
78
+ function branchTokens(branch) {
79
+ return branch
80
+ .split(/[\/._-]/g)
81
+ .map((s) => s.trim().toLowerCase())
82
+ .filter((s) => s.length > 2 && !["main", "master", "feature", "fix", "bugfix", "hotfix"].includes(s));
83
+ }
84
+ export function branchMatchBoost(content, branch) {
85
+ if (!branch)
86
+ return 0;
87
+ const text = content.toLowerCase();
88
+ const tokens = branchTokens(branch);
89
+ let score = 0;
90
+ for (const token of tokens) {
91
+ if (text.includes(token))
92
+ score += 1;
93
+ }
94
+ return Math.min(3, score);
95
+ }
96
+ let _lowValueRegex = null;
97
+ let _lowValuePatternKey = "";
98
+ function getLowValuePattern() {
99
+ const key = (process.env.PHREN_LOW_VALUE_PATTERNS) || "";
100
+ if (_lowValueRegex && _lowValuePatternKey === key)
101
+ return _lowValueRegex;
102
+ const defaults = ["fixed stuff", "updated things", "misc", "temp", "wip", "todo", "placeholder", "cleanup"];
103
+ const configured = key.split(",").map((s) => s.trim()).filter(Boolean);
104
+ const fragments = configured.length ? configured : defaults;
105
+ _lowValueRegex = new RegExp(`(${fragments.map((f) => f.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")).join("|")})`, "i");
106
+ _lowValuePatternKey = key;
107
+ return _lowValueRegex;
108
+ }
109
+ function lowValuePenalty(content, docType) {
110
+ if (docType !== "findings")
111
+ return 0;
112
+ const bullets = content.split("\n").filter((l) => l.startsWith("- "));
113
+ if (bullets.length === 0)
114
+ return 0;
115
+ const pattern = getLowValuePattern();
116
+ const low = bullets.filter((b) => pattern.test(b) || b.length < 16).length;
117
+ return low >= Math.ceil(bullets.length * LOW_VALUE_BULLET_FRACTION) ? 2 : 0;
118
+ }
119
+ // ── Token and snippet helpers ────────────────────────────────────────────────
120
+ function normalizeToken(token) {
121
+ let normalized = token.toLowerCase().replace(/[^a-z0-9_-]/g, "");
122
+ if (normalized.length > 4 && normalized.endsWith("s") && !normalized.endsWith("ss"))
123
+ normalized = normalized.slice(0, -1);
124
+ return normalized;
125
+ }
126
+ function tokenizeForOverlap(text, maxTokens = 24) {
127
+ const tokens = text
128
+ .toLowerCase()
129
+ .replace(/[^a-z0-9_\-\s]/g, " ")
130
+ .split(/\s+/)
131
+ .map(normalizeToken)
132
+ .filter((t) => t.length > 1 && !STOP_WORDS.has(t));
133
+ const uniqueTokens = [...new Set(tokens)];
134
+ if (!Number.isFinite(maxTokens) || maxTokens < 1)
135
+ return uniqueTokens;
136
+ return uniqueTokens.slice(0, maxTokens);
137
+ }
138
+ function overlapScore(queryTokens, content) {
139
+ if (!queryTokens.length)
140
+ return 0;
141
+ const contentTokens = new Set(tokenizeForOverlap(content, Number.POSITIVE_INFINITY));
142
+ if (!contentTokens.size)
143
+ return 0;
144
+ let matched = 0;
145
+ for (const token of queryTokens) {
146
+ if (contentTokens.has(token))
147
+ matched += 1;
148
+ }
149
+ const denominator = Math.max(2, Math.min(queryTokens.length, 10));
150
+ return matched / denominator;
151
+ }
152
+ function docOverlapScore(queryTokens, doc) {
153
+ const corpus = `${doc.project} ${doc.filename} ${doc.type} ${doc.path}\n${doc.content.slice(0, 5000)}`;
154
+ return overlapScore(queryTokens, corpus);
155
+ }
156
+ function semanticFallbackSeed(text) {
157
+ let hash = 2166136261;
158
+ for (let i = 0; i < text.length; i++) {
159
+ hash ^= text.charCodeAt(i);
160
+ hash = Math.imul(hash, 16777619);
161
+ }
162
+ return hash >>> 0;
163
+ }
164
+ function loadSemanticFallbackWindow(db, startRowid, limit, project, wrapBefore) {
165
+ const where = [
166
+ project ? "project = ?" : "",
167
+ wrapBefore === undefined ? "rowid >= ?" : "rowid < ?",
168
+ ].filter(Boolean).join(" AND ");
169
+ const params = [
170
+ ...(project ? [project] : []),
171
+ wrapBefore ?? startRowid,
172
+ limit,
173
+ ];
174
+ const rows = queryRows(db, `SELECT rowid, project, filename, type, content, path FROM docs WHERE ${where} ORDER BY rowid LIMIT ?`, params) || [];
175
+ return rows.map((row) => rowToDocWithRowid(row));
176
+ }
177
+ // k=60 is the standard RRF constant from Cormack et al. (2009); higher values reduce
178
+ // the impact of top-ranked results, lower values amplify them. 60 is the community default.
179
+ const RRF_K = 60;
180
+ /**
181
+ * Item 4: Reciprocal Rank Fusion — merges ranked result lists from multiple search tiers.
182
+ * Documents appearing in multiple tiers get a higher combined score.
183
+ * Formula: score(d) = Σ 1/(k + rank_i) for each tier i containing d, where k=60 (standard).
184
+ */
185
+ export function rrfMerge(tiers, k = RRF_K) {
186
+ const scores = new Map();
187
+ const docs = new Map();
188
+ for (const tier of tiers) {
189
+ for (let rank = 0; rank < tier.length; rank++) {
190
+ const doc = tier[rank];
191
+ const key = doc.path || `${doc.project}/${doc.filename}`;
192
+ if (!docs.has(key))
193
+ docs.set(key, doc);
194
+ scores.set(key, (scores.get(key) ?? 0) + 1 / (k + rank + 1));
195
+ }
196
+ }
197
+ return [...scores.entries()]
198
+ .sort((a, b) => b[1] - a[1])
199
+ .map(([key]) => docs.get(key));
200
+ }
201
+ function semanticFallbackDocs(db, prompt, project) {
202
+ const terms = tokenizeForOverlap(prompt);
203
+ if (!terms.length)
204
+ return [];
205
+ const sampleLimit = SEMANTIC_FALLBACK_SAMPLE_LIMIT;
206
+ const statsRows = queryRows(db, project
207
+ ? "SELECT MIN(rowid), MAX(rowid), COUNT(*) FROM docs WHERE project = ?"
208
+ : "SELECT MIN(rowid), MAX(rowid), COUNT(*) FROM docs", project ? [project] : []);
209
+ if (!statsRows?.length)
210
+ return [];
211
+ let minRowid = 0;
212
+ let maxRowid = 0;
213
+ let rowCount = 0;
214
+ try {
215
+ minRowid = decodeFiniteNumber(statsRows[0][0], "semanticFallbackDocs.minRowid");
216
+ maxRowid = decodeFiniteNumber(statsRows[0][1], "semanticFallbackDocs.maxRowid");
217
+ rowCount = decodeFiniteNumber(statsRows[0][2], "semanticFallbackDocs.rowCount");
218
+ }
219
+ catch {
220
+ return [];
221
+ }
222
+ if (rowCount <= 0 || maxRowid < minRowid)
223
+ return [];
224
+ const cappedLimit = Math.min(sampleLimit, rowCount);
225
+ const docs = [];
226
+ const seenRowids = new Set();
227
+ const pushRows = (rows) => {
228
+ for (const row of rows) {
229
+ if (seenRowids.has(row.rowid))
230
+ continue;
231
+ seenRowids.add(row.rowid);
232
+ docs.push(row.doc);
233
+ if (docs.length >= cappedLimit)
234
+ break;
235
+ }
236
+ };
237
+ if (rowCount <= cappedLimit) {
238
+ pushRows(loadSemanticFallbackWindow(db, minRowid, cappedLimit, project));
239
+ }
240
+ else {
241
+ const span = Math.max(1, maxRowid - minRowid + 1);
242
+ const windowCount = Math.min(SEMANTIC_FALLBACK_WINDOW_COUNT, cappedLimit);
243
+ const perWindow = Math.max(1, Math.ceil(cappedLimit / windowCount));
244
+ const stride = Math.max(1, Math.floor(span / windowCount));
245
+ const seed = semanticFallbackSeed(`${project ?? "*"}\n${terms.join(" ")}`);
246
+ for (let i = 0; i < windowCount && docs.length < cappedLimit; i++) {
247
+ const offset = (seed + i * stride) % span;
248
+ const startRowid = minRowid + offset;
249
+ pushRows(loadSemanticFallbackWindow(db, startRowid, perWindow, project));
250
+ if (docs.length >= cappedLimit)
251
+ break;
252
+ pushRows(loadSemanticFallbackWindow(db, startRowid, perWindow, project, startRowid));
253
+ }
254
+ }
255
+ if (docs.length < cappedLimit) {
256
+ pushRows(loadSemanticFallbackWindow(db, minRowid, cappedLimit - docs.length, project));
257
+ }
258
+ const scored = docs
259
+ .map((doc) => {
260
+ const score = docOverlapScore(terms, doc);
261
+ return { doc, score };
262
+ })
263
+ .filter((x) => x.score >= SEMANTIC_OVERLAP_MIN_SCORE)
264
+ .sort((a, b) => b.score - a.score)
265
+ .slice(0, 8)
266
+ .map((x) => x.doc);
267
+ return scored;
268
+ }
269
+ export function shouldRunVectorExpansion(rows, prompt, desiredResults = VECTOR_FALLBACK_SKIP_COUNT) {
270
+ if (!rows || rows.length === 0)
271
+ return true;
272
+ const targetCount = Math.max(2, Math.min(VECTOR_FALLBACK_SKIP_COUNT, desiredResults));
273
+ if (rows.length >= targetCount)
274
+ return false;
275
+ const queryTokens = tokenizeForOverlap(prompt);
276
+ if (queryTokens.length === 0)
277
+ return false;
278
+ const bestOverlap = rows
279
+ .slice(0, 2)
280
+ .reduce((maxScore, doc) => Math.max(maxScore, docOverlapScore(queryTokens, doc)), 0);
281
+ return bestOverlap < VECTOR_FALLBACK_STRONG_MATCH_SCORE;
282
+ }
283
+ function approximateTokens(text) {
284
+ return Math.ceil(text.length / 3.5 + (text.match(/\s+/g) || []).length * 0.1);
285
+ }
286
+ function compactSnippet(snippet, maxLines, maxChars) {
287
+ const lines = snippet
288
+ .split("\n")
289
+ .map((l) => l.trimEnd())
290
+ .filter((l) => l.trim().length > 0)
291
+ .slice(0, Math.max(1, maxLines));
292
+ let out = lines.join("\n");
293
+ if (out.length > maxChars)
294
+ out = out.slice(0, Math.max(24, maxChars - 1)).trimEnd() + "\u2026";
295
+ return out;
296
+ }
297
+ // ── Task priority filtering ───────────────────────────────────────────────
298
+ const PRIORITY_TAG_RE = /\[(high|medium|low)\]/i;
299
+ export function filterTaskByPriority(items, allowedPriorities) {
300
+ const envPriorities = (process.env.PHREN_TASK_PRIORITY);
301
+ const allowed = new Set((allowedPriorities || (envPriorities ? envPriorities.split(",").map(s => s.trim().toLowerCase()) : ["high", "medium"])));
302
+ return items.filter(item => {
303
+ const match = item.match(PRIORITY_TAG_RE);
304
+ if (!match) {
305
+ return allowed.has("high") || allowed.has("medium");
306
+ }
307
+ return allowed.has(match[1].toLowerCase());
308
+ });
309
+ }
310
+ // ── Search ───────────────────────────────────────────────────────────────────
311
+ const SHARED_PROJECTS = ["shared", "org"];
312
+ export function searchDocuments(db, safeQuery, prompt, keywords, detectedProject, searchAllProjects = false, phrenPath) {
313
+ // Tier 1: FTS5 — run project-scoped and global in one pass, dedup
314
+ const ftsDocs = [];
315
+ const ftsSeenKeys = new Set();
316
+ const relaxedQuery = buildRelaxedFtsQuery(keywords || prompt, detectedProject, phrenPath);
317
+ const addFtsRows = (rows) => {
318
+ if (!rows)
319
+ return;
320
+ for (const doc of rows) {
321
+ const key = doc.path || `${doc.project}/${doc.filename}`;
322
+ if (!ftsSeenKeys.has(key)) {
323
+ ftsSeenKeys.add(key);
324
+ ftsDocs.push(doc);
325
+ }
326
+ }
327
+ };
328
+ const runScopedFtsQuery = (query) => {
329
+ if (!query)
330
+ return;
331
+ if (detectedProject) {
332
+ addFtsRows(queryDocRows(db, "SELECT project, filename, type, content, path FROM docs WHERE docs MATCH ? AND project = ? ORDER BY rank LIMIT 7", [query, detectedProject]));
333
+ }
334
+ if (searchAllProjects || !detectedProject) {
335
+ addFtsRows(queryDocRows(db, "SELECT project, filename, type, content, path FROM docs WHERE docs MATCH ? ORDER BY rank LIMIT 10", [query]));
336
+ return;
337
+ }
338
+ const scopeProjects = [detectedProject, ...SHARED_PROJECTS];
339
+ const placeholders = scopeProjects.map(() => "?").join(", ");
340
+ addFtsRows(queryDocRows(db, `SELECT project, filename, type, content, path FROM docs WHERE docs MATCH ? AND project IN (${placeholders}) ORDER BY rank LIMIT 10`, [query, ...scopeProjects]));
341
+ };
342
+ runScopedFtsQuery(safeQuery);
343
+ if (ftsDocs.length === 0 && relaxedQuery && relaxedQuery !== safeQuery) {
344
+ runScopedFtsQuery(relaxedQuery);
345
+ }
346
+ // Tier 2: Token-overlap semantic — always run, scored independently
347
+ const semanticDocs = semanticFallbackDocs(db, `${prompt}\n${keywords}`, detectedProject);
348
+ // Merge with Reciprocal Rank Fusion so documents found by both tiers rank highest
349
+ const merged = rrfMerge([ftsDocs, semanticDocs]);
350
+ if (merged.length === 0)
351
+ return null;
352
+ return merged.slice(0, 12);
353
+ }
354
+ /**
355
+ * Async variant of searchDocuments that also runs real vector search (Tier 3)
356
+ * when cloud embeddings (PHREN_EMBEDDING_API_URL) or Ollama are available.
357
+ * Falls back to the sync result if vector search is unavailable or fails.
358
+ */
359
+ export async function searchDocumentsAsync(db, safeQuery, prompt, keywords, detectedProject, searchAllProjects = false, phrenPath) {
360
+ // Sync result (Tier 1 + Tier 2)
361
+ let syncResult = searchDocuments(db, safeQuery, prompt, keywords, detectedProject, searchAllProjects, phrenPath);
362
+ if (!syncResult || syncResult.length === 0) {
363
+ const keywordRows = keywordFallbackSearch(db, prompt, { project: detectedProject ?? undefined, limit: 8 });
364
+ if (keywordRows?.length)
365
+ syncResult = keywordRows;
366
+ }
367
+ // Tier 3: Real vector search — only if embeddings are available and phrenPath provided
368
+ const hasVectorBackend = Boolean(getCloudEmbeddingUrl() || getOllamaUrl());
369
+ if (!phrenPath || !hasVectorBackend || !shouldRunVectorExpansion(syncResult, `${prompt}\n${keywords}`)) {
370
+ return syncResult;
371
+ }
372
+ try {
373
+ const existingPaths = new Set((syncResult ?? []).map((d) => d.path || `${d.project}/${d.filename}`));
374
+ const vectorDocs = await vectorFallback(phrenPath, `${prompt}\n${keywords}`, existingPaths, 8, detectedProject);
375
+ if (vectorDocs.length === 0)
376
+ return syncResult;
377
+ // RRF-merge all three tiers
378
+ const tiers = [syncResult ?? [], vectorDocs];
379
+ const merged = rrfMerge(tiers);
380
+ if (merged.length === 0)
381
+ return syncResult;
382
+ return merged.slice(0, 12);
383
+ }
384
+ catch (err) {
385
+ // Vector search failure is non-fatal — return sync result
386
+ if ((process.env.PHREN_DEBUG || process.env.PHREN_DEBUG))
387
+ process.stderr.write(`[phren] hybridSearch vectorFallback: ${err instanceof Error ? err.message : String(err)}\n`);
388
+ return syncResult;
389
+ }
390
+ }
391
+ export async function searchKnowledgeRows(db, options) {
392
+ const { query, maxResults, fetchLimit = maxResults, filterProject, filterType, phrenPath, } = options;
393
+ const queryVariants = buildFtsQueryVariants(query, filterProject, phrenPath);
394
+ const safeQuery = queryVariants[0] ?? "";
395
+ if (!safeQuery)
396
+ return { safeQuery, rows: null, usedFallback: false };
397
+ let sql = "SELECT project, filename, type, content, path FROM docs WHERE docs MATCH ?";
398
+ const params = [safeQuery];
399
+ if (filterProject) {
400
+ sql += " AND project = ?";
401
+ params.push(filterProject);
402
+ }
403
+ if (filterType) {
404
+ sql += " AND type = ?";
405
+ params.push(filterType);
406
+ }
407
+ sql += " ORDER BY rank LIMIT ?";
408
+ params.push(fetchLimit);
409
+ let activeFtsQuery = safeQuery;
410
+ let rows = queryDocRows(db, sql, params);
411
+ if ((!rows || rows.length === 0) && queryVariants.length > 1) {
412
+ for (const variant of queryVariants.slice(1)) {
413
+ const relaxedParams = [...params];
414
+ relaxedParams[0] = variant;
415
+ rows = queryDocRows(db, sql, relaxedParams);
416
+ if (rows?.length) {
417
+ activeFtsQuery = variant;
418
+ break;
419
+ }
420
+ }
421
+ }
422
+ let usedFallback = false;
423
+ if (rows && rows.length < 3) {
424
+ const ftsRowids = new Set();
425
+ try {
426
+ let rowidSql = "SELECT rowid, project, filename, type, content, path FROM docs WHERE docs MATCH ?";
427
+ const rowidParams = [activeFtsQuery];
428
+ if (filterProject) {
429
+ rowidSql += " AND project = ?";
430
+ rowidParams.push(filterProject);
431
+ }
432
+ if (filterType) {
433
+ rowidSql += " AND type = ?";
434
+ rowidParams.push(filterType);
435
+ }
436
+ rowidSql += " ORDER BY rank LIMIT ?";
437
+ rowidParams.push(maxResults);
438
+ const rowidResult = db.exec(rowidSql, rowidParams);
439
+ if (rowidResult?.length && rowidResult[0]?.values?.length) {
440
+ for (const row of rowidResult[0].values) {
441
+ ftsRowids.add(rowToDocWithRowid(row).rowid);
442
+ }
443
+ }
444
+ }
445
+ catch (err) {
446
+ debugLog(`rowid dedup query failed: ${err instanceof Error ? err.message : String(err)}`);
447
+ }
448
+ const cosineResults = cosineFallback(db, query, ftsRowids, maxResults - rows.length)
449
+ .filter((doc) => (!filterProject || doc.project === filterProject) && (!filterType || doc.type === filterType));
450
+ if (cosineResults.length > 0) {
451
+ rows = [...rows, ...cosineResults];
452
+ usedFallback = true;
453
+ }
454
+ }
455
+ if (!rows) {
456
+ const cosineResults = cosineFallback(db, query, new Set(), maxResults)
457
+ .filter((doc) => (!filterProject || doc.project === filterProject) && (!filterType || doc.type === filterType));
458
+ if (cosineResults.length > 0) {
459
+ rows = cosineResults;
460
+ usedFallback = true;
461
+ }
462
+ }
463
+ if (!rows) {
464
+ const fallbackRows = keywordFallbackSearch(db, query, {
465
+ project: filterProject ?? undefined,
466
+ type: filterType ?? undefined,
467
+ limit: maxResults,
468
+ });
469
+ if (fallbackRows) {
470
+ rows = fallbackRows;
471
+ usedFallback = true;
472
+ }
473
+ }
474
+ if (shouldRunVectorExpansion(rows, query, maxResults)) {
475
+ try {
476
+ const existingRows = rows ?? [];
477
+ const alreadyFoundPaths = new Set(existingRows.map((row) => row.path));
478
+ const vecRows = await vectorFallback(phrenPath, query, alreadyFoundPaths, Math.max(0, maxResults - existingRows.length), filterProject ?? undefined);
479
+ const filteredVecRows = filterType ? vecRows.filter((row) => row.type === filterType) : vecRows;
480
+ if (filteredVecRows.length > 0) {
481
+ rows = [...existingRows, ...filteredVecRows];
482
+ usedFallback = true;
483
+ }
484
+ }
485
+ catch (err) {
486
+ if ((process.env.PHREN_DEBUG || process.env.PHREN_DEBUG)) {
487
+ process.stderr.write(`[phren] vectorFallback: ${err instanceof Error ? err.message : String(err)}\n`);
488
+ }
489
+ }
490
+ }
491
+ return { safeQuery, rows, usedFallback };
492
+ }
493
+ // ── Trust filter ─────────────────────────────────────────────────────────────
494
+ const TRUST_FILTERED_TYPES = new Set(["findings", "reference", "knowledge"]);
495
+ /** Apply trust filter to rows. Returns filtered rows plus any queue/audit items to be written
496
+ * by the caller — retrieval itself should remain side-effect-free. */
497
+ export function applyTrustFilter(rows, ttlDays, minConfidence, decay, phrenPath) {
498
+ const queueItems = [];
499
+ const auditEntries = [];
500
+ const highImpactFindingIds = phrenPath ? getHighImpactFindings(phrenPath, 3) : undefined;
501
+ const filtered = rows
502
+ .map((doc) => {
503
+ if (!TRUST_FILTERED_TYPES.has(doc.type))
504
+ return doc;
505
+ const trust = filterTrustedFindingsDetailed(doc.content, {
506
+ ttlDays,
507
+ minConfidence,
508
+ decay,
509
+ project: doc.project,
510
+ highImpactFindingIds,
511
+ });
512
+ if (trust.issues.length > 0) {
513
+ const stale = trust.issues.filter((i) => i.reason === "stale").map((i) => i.bullet);
514
+ const conflicts = trust.issues.filter((i) => i.reason === "invalid_citation").map((i) => i.bullet);
515
+ if (stale.length)
516
+ queueItems.push({ project: doc.project, section: "Stale", items: stale });
517
+ if (conflicts.length)
518
+ queueItems.push({ project: doc.project, section: "Conflicts", items: conflicts });
519
+ auditEntries.push(`project=${doc.project} type=${doc.type} stale=${stale.length} invalid_citation=${conflicts.length}`);
520
+ }
521
+ return { ...doc, content: trust.content };
522
+ })
523
+ .filter((doc) => {
524
+ return !TRUST_FILTERED_TYPES.has(doc.type) || Boolean(doc.content.trim());
525
+ });
526
+ return { rows: filtered, queueItems, auditEntries };
527
+ }
528
+ // ── Ranking ──────────────────────────────────────────────────────────────────
529
+ function mostRecentDate(content) {
530
+ const matches = content.match(/^## (\d{4}-\d{2}-\d{2})/gm);
531
+ if (!matches || matches.length === 0)
532
+ return "0000-00-00";
533
+ return matches.map((m) => m.slice(3)).sort().reverse()[0];
534
+ }
535
+ /** Shared helper: compute age in days from a YYYY-MM-DD date string. Returns Infinity for invalid/missing dates. */
536
+ function ageInDaysFromDate(dateStr) {
537
+ if (!/^\d{4}-\d{2}-\d{2}$/.test(dateStr) || dateStr === "0000-00-00")
538
+ return Infinity;
539
+ const todayUtc = Date.UTC(new Date().getUTCFullYear(), new Date().getUTCMonth(), new Date().getUTCDate());
540
+ const entryUtc = Date.parse(`${dateStr}T00:00:00Z`);
541
+ if (Number.isNaN(entryUtc))
542
+ return Infinity;
543
+ return Math.max(0, Math.floor((todayUtc - entryUtc) / 86_400_000));
544
+ }
545
+ /** Item 3: Recency boost for findings. Recent findings rank higher. Accepts pre-computed date string. */
546
+ export function recencyBoost(docType, latestDate) {
547
+ if (docType !== "findings")
548
+ return 0;
549
+ const age = ageInDaysFromDate(latestDate);
550
+ if (age <= 7)
551
+ return 0.3;
552
+ if (age <= 30)
553
+ return 0.15;
554
+ return 0;
555
+ }
556
+ function crossProjectAgeMultiplier(doc, detectedProject, latestDate) {
557
+ if (doc.type !== "findings" || !detectedProject || doc.project === detectedProject)
558
+ return 1;
559
+ const decayDaysRaw = Number.parseInt((process.env.PHREN_CROSS_PROJECT_DECAY_DAYS) ?? "30", 10);
560
+ const decayDays = Number.isFinite(decayDaysRaw) && decayDaysRaw > 0 ? decayDaysRaw : 30;
561
+ const age = ageInDaysFromDate(latestDate);
562
+ const ageInDays = Number.isFinite(age) ? age : 90;
563
+ return Math.max(0.1, 1 - (ageInDays / decayDays));
564
+ }
565
+ export function rankResults(rows, intent, gitCtx, detectedProject, phrenPathLocal, db, cwd, query, opts) {
566
+ let ranked = [...rows];
567
+ const queryTokens = query ? tokenizeForOverlap(query) : [];
568
+ if (detectedProject) {
569
+ const localByType = new Set(ranked.filter((r) => r.project === detectedProject).map((r) => r.type));
570
+ // Keep all local docs, and allow up to 2 shared/org docs per type even if
571
+ // that type exists locally — avoids suppressing cross-project knowledge.
572
+ const sharedCountByType = new Map();
573
+ const MAX_SHARED_PER_TYPE = 2;
574
+ ranked = ranked.filter((r) => {
575
+ if (r.project === detectedProject)
576
+ return true;
577
+ if (!localByType.has(r.type))
578
+ return true;
579
+ const count = sharedCountByType.get(r.type) ?? 0;
580
+ if (count < MAX_SHARED_PER_TYPE) {
581
+ sharedCountByType.set(r.type, count + 1);
582
+ return true;
583
+ }
584
+ return false;
585
+ });
586
+ const canonicalRows = queryDocRows(db, "SELECT project, filename, type, content, path FROM docs WHERE project = ? AND type = 'canonical' LIMIT 1", [detectedProject]);
587
+ if (canonicalRows)
588
+ ranked = [...canonicalRows, ...ranked];
589
+ }
590
+ const entityBoost = query ? getEntityBoostDocs(db, query) : new Set();
591
+ const entityBoostPaths = new Set();
592
+ for (const doc of ranked) {
593
+ // Use getDocSourceKey to build the full project/relFile key, matching what
594
+ // entity_links stores (e.g. project/reference/arch.md, not project/arch.md).
595
+ const docKey = getDocSourceKey(doc, phrenPathLocal);
596
+ if (entityBoost.has(docKey))
597
+ entityBoostPaths.add(doc.path);
598
+ }
599
+ // Pre-compute mostRecentDate once per findings doc to avoid O(n log n) regex rescans in sort.
600
+ const recentDateCache = new Map();
601
+ for (const doc of ranked) {
602
+ if (doc.type === "findings") {
603
+ const key = doc.path || `${doc.project}/${doc.filename}`;
604
+ recentDateCache.set(key, mostRecentDate(doc.content));
605
+ }
606
+ }
607
+ const getRecentDate = (doc) => recentDateCache.get(doc.path || `${doc.project}/${doc.filename}`) ?? "0000-00-00";
608
+ // Precompute per-doc ranking metadata once — avoids recomputing inside sort comparator.
609
+ const changedFiles = gitCtx?.changedFiles || new Set();
610
+ const FILE_MATCH_BOOST = 1.5;
611
+ const scored = ranked.map((doc) => {
612
+ const globBoost = getProjectGlobBoost(phrenPathLocal, doc.project, cwd, gitCtx?.changedFiles);
613
+ const key = entryScoreKey(doc.project, doc.filename, doc.content);
614
+ const entity = entityBoostPaths.has(doc.path) ? 1.3 : 1;
615
+ const date = getRecentDate(doc);
616
+ const fileRel = fileRelevanceBoost(doc.path, changedFiles);
617
+ const branchMat = branchMatchBoost(doc.content, gitCtx?.branch);
618
+ const qualityMult = getQualityMultiplier(phrenPathLocal, key);
619
+ const queryOverlap = queryTokens.length > 0 ? docOverlapScore(queryTokens, doc) : 0;
620
+ const queryOverlapWeight = detectedProject && doc.project === detectedProject
621
+ ? LOCAL_QUERY_OVERLAP_WEIGHT
622
+ : CROSS_PROJECT_QUERY_OVERLAP_WEIGHT;
623
+ const weakCrossProjectPenalty = detectedProject
624
+ && doc.project !== detectedProject
625
+ && queryTokens.length > 0
626
+ && queryOverlap < WEAK_CROSS_PROJECT_OVERLAP_MAX
627
+ ? WEAK_CROSS_PROJECT_OVERLAP_PENALTY
628
+ : 0;
629
+ const score = Math.round((intentBoost(intent, doc.type) +
630
+ fileRel +
631
+ branchMat +
632
+ globBoost +
633
+ qualityMult +
634
+ entity +
635
+ queryOverlap * queryOverlapWeight +
636
+ recencyBoost(doc.type, date) -
637
+ weakCrossProjectPenalty -
638
+ lowValuePenalty(doc.content, doc.type)) * crossProjectAgeMultiplier(doc, detectedProject, date) * 10000) / 10000;
639
+ const fileMatch = fileRel > 0 || branchMat > 0;
640
+ return { doc, score, fileMatch, globBoost, qualityMult, entity, date, queryOverlap };
641
+ });
642
+ // Single composite sort on cached values.
643
+ scored.sort((a, b) => {
644
+ if (isFeatureEnabled("PHREN_FEATURE_GIT_CONTEXT_FILTER", false)) {
645
+ if (gitCtx && gitCtx.changedFiles.size > 0) {
646
+ const scoreDiff = (b.fileMatch ? FILE_MATCH_BOOST : 1) - (a.fileMatch ? FILE_MATCH_BOOST : 1);
647
+ if (scoreDiff !== 0)
648
+ return scoreDiff;
649
+ }
650
+ }
651
+ const isFindingsA = a.doc.type === "findings";
652
+ const isFindingsB = b.doc.type === "findings";
653
+ if (isFindingsA !== isFindingsB)
654
+ return isFindingsA ? -1 : 1;
655
+ if (isFindingsA && isFindingsB) {
656
+ const byDate = b.date.localeCompare(a.date);
657
+ if (byDate !== 0)
658
+ return byDate;
659
+ }
660
+ const scoreDelta = b.score - a.score;
661
+ if (Math.abs(scoreDelta) > 0.01)
662
+ return scoreDelta;
663
+ const overlapDelta = b.queryOverlap - a.queryOverlap;
664
+ if (Math.abs(overlapDelta) > 0.01)
665
+ return overlapDelta;
666
+ const globDelta = b.globBoost - a.globBoost;
667
+ if (Math.abs(globDelta) > 0.01)
668
+ return globDelta;
669
+ const qualityDelta = b.qualityMult - a.qualityMult;
670
+ if (qualityDelta !== 0)
671
+ return qualityDelta;
672
+ if (b.entity !== a.entity)
673
+ return b.entity - a.entity;
674
+ return (a.doc.path || `${a.doc.project}/${a.doc.filename}`).localeCompare(b.doc.path || `${b.doc.project}/${b.doc.filename}`);
675
+ });
676
+ const shouldFilterTask = intent !== "build" && !opts?.skipTaskFilter && opts?.filterType !== "task";
677
+ const rescuedTaskPaths = new Set();
678
+ if (shouldFilterTask && queryTokens.length > 0) {
679
+ const bestTask = scored.find((entry) => entry.doc.type === "task");
680
+ if (bestTask && bestTask.queryOverlap >= TASK_RESCUE_MIN_OVERLAP) {
681
+ const bestNonTask = scored.find((entry) => entry.doc.type !== "task");
682
+ if (!bestNonTask
683
+ || bestTask.queryOverlap >= bestNonTask.queryOverlap + TASK_RESCUE_OVERLAP_MARGIN
684
+ || bestTask.score >= bestNonTask.score + TASK_RESCUE_SCORE_MARGIN) {
685
+ rescuedTaskPaths.add(bestTask.doc.path || `${bestTask.doc.project}/${bestTask.doc.filename}`);
686
+ }
687
+ }
688
+ }
689
+ ranked = scored.map((s) => s.doc);
690
+ ranked = ranked.slice(0, 8);
691
+ if (shouldFilterTask) {
692
+ ranked = ranked.filter((r) => {
693
+ if (r.type !== "task")
694
+ return true;
695
+ const key = r.path || `${r.project}/${r.filename}`;
696
+ return rescuedTaskPaths.has(key);
697
+ });
698
+ }
699
+ return ranked;
700
+ }
701
+ /** Mark snippet lines with stale citations (cited file missing or line content changed). */
702
+ export function markStaleCitations(snippet) {
703
+ const lines = snippet.split("\n");
704
+ const result = [];
705
+ for (let i = 0; i < lines.length; i++) {
706
+ const line = lines[i];
707
+ // Check if the next line is a citation comment
708
+ const nextLine = lines[i + 1];
709
+ if (nextLine) {
710
+ const citation = parseCitationComment(nextLine);
711
+ if (citation && citation.file) {
712
+ const resolvedFile = citation.repo
713
+ ? path.resolve(citation.repo, citation.file)
714
+ : (path.isAbsolute(citation.file) ? citation.file : null);
715
+ if (resolvedFile) {
716
+ let stale = false;
717
+ if (!fs.existsSync(resolvedFile)) {
718
+ stale = true;
719
+ }
720
+ else if (citation.line !== undefined && citation.line >= 1) {
721
+ // Verify the cited line still has content (not beyond EOF)
722
+ try {
723
+ const fileLines = fs.readFileSync(resolvedFile, "utf8").split("\n");
724
+ if (citation.line > fileLines.length) {
725
+ stale = true;
726
+ }
727
+ else if (fileLines[citation.line - 1].trim() === "") {
728
+ // Line exists but is now empty — content has drifted
729
+ stale = true;
730
+ }
731
+ }
732
+ catch (err) {
733
+ if ((process.env.PHREN_DEBUG || process.env.PHREN_DEBUG))
734
+ process.stderr.write(`[phren] applyCitationAnnotations fileRead: ${err instanceof Error ? err.message : String(err)}\n`);
735
+ stale = true;
736
+ }
737
+ }
738
+ if (stale) {
739
+ result.push(line + " [stale citation]");
740
+ i++; // skip the citation comment line
741
+ continue;
742
+ }
743
+ }
744
+ }
745
+ }
746
+ result.push(line);
747
+ }
748
+ return result.join("\n");
749
+ }
750
+ export function selectSnippets(rows, keywords, tokenBudget, lineBudget, charBudget) {
751
+ const selected = [];
752
+ let usedTokens = 36;
753
+ const queryTokens = tokenizeForOverlap(keywords);
754
+ for (const doc of rows) {
755
+ let snippet = compactSnippet(extractSnippet(doc.content, keywords, 8), lineBudget, charBudget);
756
+ if (!snippet.trim())
757
+ continue;
758
+ // Mark findings with stale citations before injection
759
+ if (TRUST_FILTERED_TYPES.has(doc.type)) {
760
+ snippet = markStaleCitations(snippet);
761
+ }
762
+ let focusScore = queryTokens.length > 0
763
+ ? overlapScore(queryTokens, `${doc.filename}\n${snippet}`)
764
+ : 1;
765
+ if (focusScore < LOW_FOCUS_SNIPPET_SCORE) {
766
+ snippet = compactSnippet(snippet, Math.min(lineBudget, LOW_FOCUS_SNIPPET_LINE_CAP), Math.max(120, Math.floor(charBudget * LOW_FOCUS_SNIPPET_CHAR_FRACTION)));
767
+ focusScore = queryTokens.length > 0
768
+ ? overlapScore(queryTokens, `${doc.filename}\n${snippet}`)
769
+ : focusScore;
770
+ }
771
+ let est = approximateTokens(snippet) + 14;
772
+ if (selected.length > 0 && focusScore < VERY_LOW_FOCUS_SNIPPET_SCORE && usedTokens + est > Math.floor(tokenBudget * 0.8)) {
773
+ continue;
774
+ }
775
+ if (selected.length > 0 && usedTokens + est > tokenBudget)
776
+ break;
777
+ if (selected.length === 0 && usedTokens + est > tokenBudget) {
778
+ snippet = compactSnippet(snippet, 3, Math.floor(charBudget * 0.55));
779
+ est = approximateTokens(snippet) + 14;
780
+ }
781
+ const key = entryScoreKey(doc.project, doc.filename, doc.content);
782
+ selected.push({ doc, snippet, key });
783
+ usedTokens += est;
784
+ if (selected.length >= 3)
785
+ break;
786
+ }
787
+ // Final pass: trim from the end if token budget is exceeded (guards against
788
+ // rounding / compaction producing more tokens than estimated during selection)
789
+ while (selected.length > 1 && usedTokens > tokenBudget) {
790
+ const removed = selected.pop();
791
+ usedTokens -= approximateTokens(removed.snippet) + 14;
792
+ }
793
+ return { selected, usedTokens };
794
+ }
795
+ // Re-export approximateTokens for use in output module
796
+ export { approximateTokens };