@phren/cli 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +590 -0
  3. package/mcp/dist/capabilities/cli.js +61 -0
  4. package/mcp/dist/capabilities/index.js +15 -0
  5. package/mcp/dist/capabilities/mcp.js +61 -0
  6. package/mcp/dist/capabilities/types.js +57 -0
  7. package/mcp/dist/capabilities/vscode.js +61 -0
  8. package/mcp/dist/capabilities/web-ui.js +61 -0
  9. package/mcp/dist/cli-actions.js +302 -0
  10. package/mcp/dist/cli-config.js +580 -0
  11. package/mcp/dist/cli-extract.js +305 -0
  12. package/mcp/dist/cli-govern.js +371 -0
  13. package/mcp/dist/cli-graph.js +169 -0
  14. package/mcp/dist/cli-hooks-citations.js +44 -0
  15. package/mcp/dist/cli-hooks-context.js +56 -0
  16. package/mcp/dist/cli-hooks-globs.js +83 -0
  17. package/mcp/dist/cli-hooks-output.js +130 -0
  18. package/mcp/dist/cli-hooks-retrieval.js +2 -0
  19. package/mcp/dist/cli-hooks-session.js +1402 -0
  20. package/mcp/dist/cli-hooks.js +350 -0
  21. package/mcp/dist/cli-namespaces.js +989 -0
  22. package/mcp/dist/cli-ops.js +253 -0
  23. package/mcp/dist/cli-search.js +407 -0
  24. package/mcp/dist/cli.js +108 -0
  25. package/mcp/dist/content-archive.js +278 -0
  26. package/mcp/dist/content-citation.js +391 -0
  27. package/mcp/dist/content-dedup.js +622 -0
  28. package/mcp/dist/content-learning.js +472 -0
  29. package/mcp/dist/content-metadata.js +186 -0
  30. package/mcp/dist/content-validate.js +462 -0
  31. package/mcp/dist/core-finding.js +54 -0
  32. package/mcp/dist/core-project.js +36 -0
  33. package/mcp/dist/core-search.js +50 -0
  34. package/mcp/dist/data-access.js +400 -0
  35. package/mcp/dist/data-tasks.js +821 -0
  36. package/mcp/dist/embedding.js +344 -0
  37. package/mcp/dist/entrypoint.js +387 -0
  38. package/mcp/dist/finding-context.js +172 -0
  39. package/mcp/dist/finding-impact.js +181 -0
  40. package/mcp/dist/finding-journal.js +122 -0
  41. package/mcp/dist/finding-lifecycle.js +259 -0
  42. package/mcp/dist/governance-audit.js +22 -0
  43. package/mcp/dist/governance-locks.js +96 -0
  44. package/mcp/dist/governance-policy.js +648 -0
  45. package/mcp/dist/governance-scores.js +355 -0
  46. package/mcp/dist/hooks.js +449 -0
  47. package/mcp/dist/impact-scoring.js +22 -0
  48. package/mcp/dist/index-query.js +168 -0
  49. package/mcp/dist/index.js +205 -0
  50. package/mcp/dist/init-config.js +336 -0
  51. package/mcp/dist/init-preferences.js +62 -0
  52. package/mcp/dist/init-setup.js +1305 -0
  53. package/mcp/dist/init-shared.js +29 -0
  54. package/mcp/dist/init.js +1730 -0
  55. package/mcp/dist/link-checksums.js +62 -0
  56. package/mcp/dist/link-context.js +257 -0
  57. package/mcp/dist/link-doctor.js +591 -0
  58. package/mcp/dist/link-skills.js +212 -0
  59. package/mcp/dist/link.js +596 -0
  60. package/mcp/dist/logger.js +15 -0
  61. package/mcp/dist/machine-identity.js +38 -0
  62. package/mcp/dist/mcp-config.js +254 -0
  63. package/mcp/dist/mcp-data.js +315 -0
  64. package/mcp/dist/mcp-extract-facts.js +78 -0
  65. package/mcp/dist/mcp-extract.js +133 -0
  66. package/mcp/dist/mcp-finding.js +557 -0
  67. package/mcp/dist/mcp-graph.js +339 -0
  68. package/mcp/dist/mcp-hooks.js +256 -0
  69. package/mcp/dist/mcp-memory.js +58 -0
  70. package/mcp/dist/mcp-ops.js +328 -0
  71. package/mcp/dist/mcp-search.js +628 -0
  72. package/mcp/dist/mcp-session.js +651 -0
  73. package/mcp/dist/mcp-skills.js +189 -0
  74. package/mcp/dist/mcp-tasks.js +551 -0
  75. package/mcp/dist/mcp-types.js +7 -0
  76. package/mcp/dist/memory-ui-assets.js +6 -0
  77. package/mcp/dist/memory-ui-data.js +513 -0
  78. package/mcp/dist/memory-ui-graph.js +1910 -0
  79. package/mcp/dist/memory-ui-page.js +353 -0
  80. package/mcp/dist/memory-ui-scripts.js +1387 -0
  81. package/mcp/dist/memory-ui-server.js +1218 -0
  82. package/mcp/dist/memory-ui-styles.js +555 -0
  83. package/mcp/dist/memory-ui.js +9 -0
  84. package/mcp/dist/package-metadata.js +13 -0
  85. package/mcp/dist/phren-art.js +52 -0
  86. package/mcp/dist/phren-core.js +108 -0
  87. package/mcp/dist/phren-dotenv.js +67 -0
  88. package/mcp/dist/phren-paths.js +476 -0
  89. package/mcp/dist/proactivity.js +172 -0
  90. package/mcp/dist/profile-store.js +228 -0
  91. package/mcp/dist/project-config.js +85 -0
  92. package/mcp/dist/project-locator.js +25 -0
  93. package/mcp/dist/project-topics.js +1134 -0
  94. package/mcp/dist/provider-adapters.js +176 -0
  95. package/mcp/dist/runtime-profile.js +18 -0
  96. package/mcp/dist/session-checkpoints.js +131 -0
  97. package/mcp/dist/session-utils.js +68 -0
  98. package/mcp/dist/shared-content.js +8 -0
  99. package/mcp/dist/shared-embedding-cache.js +143 -0
  100. package/mcp/dist/shared-fragment-graph.js +456 -0
  101. package/mcp/dist/shared-governance.js +4 -0
  102. package/mcp/dist/shared-index.js +1334 -0
  103. package/mcp/dist/shared-ollama.js +192 -0
  104. package/mcp/dist/shared-paths.js +1 -0
  105. package/mcp/dist/shared-retrieval.js +796 -0
  106. package/mcp/dist/shared-search-fallback.js +375 -0
  107. package/mcp/dist/shared-sqljs.js +42 -0
  108. package/mcp/dist/shared-stemmer.js +171 -0
  109. package/mcp/dist/shared-vector-index.js +199 -0
  110. package/mcp/dist/shared.js +114 -0
  111. package/mcp/dist/shell-entry.js +209 -0
  112. package/mcp/dist/shell-input.js +943 -0
  113. package/mcp/dist/shell-palette.js +119 -0
  114. package/mcp/dist/shell-render.js +252 -0
  115. package/mcp/dist/shell-state-store.js +81 -0
  116. package/mcp/dist/shell-types.js +13 -0
  117. package/mcp/dist/shell-view-list.js +14 -0
  118. package/mcp/dist/shell-view.js +707 -0
  119. package/mcp/dist/shell.js +352 -0
  120. package/mcp/dist/skill-files.js +117 -0
  121. package/mcp/dist/skill-registry.js +279 -0
  122. package/mcp/dist/skill-state.js +28 -0
  123. package/mcp/dist/startup-embedding.js +57 -0
  124. package/mcp/dist/status.js +323 -0
  125. package/mcp/dist/synonyms.json +670 -0
  126. package/mcp/dist/task-hygiene.js +251 -0
  127. package/mcp/dist/task-lifecycle.js +347 -0
  128. package/mcp/dist/tasks-github.js +76 -0
  129. package/mcp/dist/telemetry.js +165 -0
  130. package/mcp/dist/test-global-setup.js +37 -0
  131. package/mcp/dist/tool-registry.js +104 -0
  132. package/mcp/dist/update.js +97 -0
  133. package/mcp/dist/utils.js +543 -0
  134. package/package.json +67 -0
  135. package/skills/README.md +7 -0
  136. package/skills/consolidate/SKILL.md +152 -0
  137. package/skills/discover/SKILL.md +175 -0
  138. package/skills/init/SKILL.md +216 -0
  139. package/skills/profiles/SKILL.md +121 -0
  140. package/skills/sync/SKILL.md +261 -0
  141. package/starter/README.md +74 -0
  142. package/starter/global/CLAUDE.md +89 -0
  143. package/starter/global/skills/humanize.md +30 -0
  144. package/starter/global/skills/pipeline.md +35 -0
  145. package/starter/global/skills/release.md +35 -0
  146. package/starter/machines.yaml +8 -0
  147. package/starter/my-api/.claude/skills/README.md +7 -0
  148. package/starter/my-api/CLAUDE.md +33 -0
  149. package/starter/my-api/FINDINGS.md +9 -0
  150. package/starter/my-api/summary.md +7 -0
  151. package/starter/my-api/tasks.md +7 -0
  152. package/starter/my-first-project/.claude/skills/README.md +7 -0
  153. package/starter/my-first-project/CLAUDE.md +49 -0
  154. package/starter/my-first-project/FINDINGS.md +24 -0
  155. package/starter/my-first-project/summary.md +11 -0
  156. package/starter/my-first-project/tasks.md +25 -0
  157. package/starter/my-frontend/.claude/skills/README.md +7 -0
  158. package/starter/my-frontend/CLAUDE.md +33 -0
  159. package/starter/my-frontend/FINDINGS.md +9 -0
  160. package/starter/my-frontend/summary.md +7 -0
  161. package/starter/my-frontend/tasks.md +7 -0
  162. package/starter/profiles/default.yaml +4 -0
  163. package/starter/profiles/personal.yaml +4 -0
  164. package/starter/profiles/work.yaml +4 -0
  165. package/starter/templates/README.md +7 -0
  166. package/starter/templates/frontend/CLAUDE.md +23 -0
  167. package/starter/templates/frontend/FINDINGS.md +7 -0
  168. package/starter/templates/frontend/reference/README.md +4 -0
  169. package/starter/templates/frontend/summary.md +7 -0
  170. package/starter/templates/frontend/tasks.md +11 -0
  171. package/starter/templates/library/CLAUDE.md +22 -0
  172. package/starter/templates/library/FINDINGS.md +7 -0
  173. package/starter/templates/library/reference/README.md +4 -0
  174. package/starter/templates/library/summary.md +7 -0
  175. package/starter/templates/library/tasks.md +11 -0
  176. package/starter/templates/monorepo/CLAUDE.md +21 -0
  177. package/starter/templates/monorepo/FINDINGS.md +7 -0
  178. package/starter/templates/monorepo/reference/README.md +4 -0
  179. package/starter/templates/monorepo/summary.md +7 -0
  180. package/starter/templates/monorepo/tasks.md +11 -0
  181. package/starter/templates/python-project/CLAUDE.md +21 -0
  182. package/starter/templates/python-project/FINDINGS.md +7 -0
  183. package/starter/templates/python-project/reference/README.md +4 -0
  184. package/starter/templates/python-project/summary.md +7 -0
  185. package/starter/templates/python-project/tasks.md +10 -0
@@ -0,0 +1,375 @@
1
+ import { createHash } from "crypto";
2
+ import { debugLog } from "./shared.js";
3
+ import { STOP_WORDS } from "./utils.js";
4
+ import { porterStem } from "./shared-stemmer.js";
5
+ import { classifyFile, normalizeIndexedContent, rowToDocWithRowid } from "./shared-index.js";
6
+ import { embedText, cosineSimilarity, getEmbeddingModel, getOllamaUrl, getCloudEmbeddingUrl } from "./shared-ollama.js";
7
+ import { getEmbeddingCache } from "./shared-embedding-cache.js";
8
+ import { getPersistentVectorIndex } from "./shared-vector-index.js";
9
+ import * as fs from "fs";
10
+ import * as path from "path";
11
+ const HYBRID_SEARCH_FLAG = "PHREN_FEATURE_HYBRID_SEARCH";
12
+ const COSINE_SIMILARITY_MIN = 0.15;
13
+ const COSINE_MAX_CORPUS = 10000;
14
+ const COSINE_CANDIDATE_CAP = 500; // max docs loaded into memory for cosine scoring
15
+ const COSINE_WINDOW_COUNT = 4;
16
+ function splitPathSegments(filePath) {
17
+ return filePath.split(/[\\/]+/).filter(Boolean);
18
+ }
19
+ export function deriveVectorDocIdentity(phrenPath, fullPath) {
20
+ const normalizedPhrenPath = phrenPath.replace(/[\\/]+/g, "/").replace(/\/+$/, "");
21
+ const normalizedFullPath = fullPath.replace(/[\\/]+/g, "/");
22
+ let rel = fullPath;
23
+ if (normalizedFullPath === normalizedPhrenPath) {
24
+ rel = "";
25
+ }
26
+ else if (normalizedFullPath.startsWith(`${normalizedPhrenPath}/`)) {
27
+ rel = normalizedFullPath.slice(normalizedPhrenPath.length + 1);
28
+ }
29
+ else {
30
+ const relative = path.relative(phrenPath, fullPath);
31
+ if (!relative.startsWith("..") && !path.isAbsolute(relative)) {
32
+ rel = relative;
33
+ }
34
+ }
35
+ const relParts = splitPathSegments(rel);
36
+ return {
37
+ project: relParts[0] ?? "",
38
+ filename: splitPathSegments(fullPath).at(-1) ?? "",
39
+ relFile: relParts.slice(1).join("/"),
40
+ };
41
+ }
42
+ // Module-level cache for TF-IDF document frequencies.
43
+ // Keyed by a fingerprint of the candidate doc IDs so that different candidate subsets and
44
+ // incremental index mutations produce distinct cache entries rather than reusing stale counts.
45
+ // Intentionally not locked: single-threaded JS event loop, cache is eventually consistent,
46
+ // worst case is a redundant recompute. No data loss is possible since this is a pure computation cache.
47
+ // Max 100 entries to bound memory (LRU-style: oldest key evicted on overflow).
48
+ const MAX_DF_CACHE_SIZE = 100;
49
+ const dfCache = new Map();
50
+ /** Invalidate the DF cache. Call after a full index rebuild. */
51
+ export function invalidateDfCache() {
52
+ dfCache.clear();
53
+ tokenCache.clear();
54
+ }
55
+ // Module-level cache for tokenized document content.
56
+ // Keyed by a short content hash so the same document content is only tokenized once per server lifetime.
57
+ // Cleared on full rebuild (same lifecycle as dfCache). Max 2000 entries to bound memory.
58
+ // Intentionally not locked: single-threaded JS event loop, cache is eventually consistent,
59
+ // worst case is a redundant recompute. No data loss is possible since this is a pure computation cache.
60
+ const MAX_TOKEN_CACHE = 2000;
61
+ const tokenCache = new Map();
62
+ function cachedTokenize(text) {
63
+ const key = createHash("sha256").update(text).digest("hex").slice(0, 16);
64
+ const hit = tokenCache.get(key);
65
+ if (hit)
66
+ return hit;
67
+ const tokens = tokenize(text);
68
+ if (tokenCache.size >= MAX_TOKEN_CACHE) {
69
+ // Evict oldest entry
70
+ tokenCache.delete(tokenCache.keys().next().value ?? "");
71
+ }
72
+ tokenCache.set(key, tokens);
73
+ return tokens;
74
+ }
75
+ function deterministicSeed(text) {
76
+ let hash = 2166136261;
77
+ for (let i = 0; i < text.length; i++) {
78
+ hash ^= text.charCodeAt(i);
79
+ hash = Math.imul(hash, 16777619);
80
+ }
81
+ return hash >>> 0;
82
+ }
83
+ function loadCosineFallbackWindow(db, startRowid, limit, wrapBefore) {
84
+ const where = wrapBefore === undefined ? "rowid >= ?" : "rowid < ?";
85
+ const params = [wrapBefore ?? startRowid, limit];
86
+ const rows = db.exec(`SELECT rowid, project, filename, type, content, path FROM docs WHERE ${where} ORDER BY rowid LIMIT ?`, params);
87
+ return rows?.[0]?.values ?? [];
88
+ }
89
+ /**
90
+ * Tokenize text into non-stop-word tokens for TF-IDF computation, with stemming.
91
+ */
92
+ function tokenize(text) {
93
+ return text
94
+ .toLowerCase()
95
+ .replace(/[^\w\s]/g, " ")
96
+ .split(/\s+/)
97
+ .filter(w => w.length > 1 && !STOP_WORDS.has(w))
98
+ .map(w => porterStem(w));
99
+ }
100
+ /**
101
+ * Compute TF-IDF cosine similarity scores for a query against a corpus of documents.
102
+ * Returns an array of similarity scores in the same order as docs.
103
+ * @param corpusN - Total number of documents in the full corpus (for IDF denominator).
104
+ * Defaults to docs.length, which is correct when docs IS the full corpus.
105
+ * Pass the real total when docs is a pre-filtered subset so IDF scores are not inflated.
106
+ */
107
+ function tfidfCosine(docs, query, corpusN) {
108
+ const queryTokens = tokenize(query);
109
+ if (queryTokens.length === 0)
110
+ return docs.map(() => 0);
111
+ // Collect all unique terms from query + all docs (use cached tokenization for repeated content)
112
+ const allTokens = new Set(queryTokens);
113
+ const docTokenLists = docs.map(d => {
114
+ const tokens = cachedTokenize(d);
115
+ for (const t of tokens)
116
+ allTokens.add(t);
117
+ return tokens;
118
+ });
119
+ // Build a Set per document for O(1) term lookups
120
+ const docTokenSets = docTokenLists.map(tokens => new Set(tokens));
121
+ const terms = [...allTokens];
122
+ // Use the full corpus N for IDF so scores are comparable even when docs is a subset.
123
+ const N = corpusN ?? docs.length;
124
+ // Compute document frequency for each term, keyed by a fingerprint of the candidate doc set
125
+ // so that different subsets and incremental index mutations get distinct cache entries.
126
+ const candidateFingerprint = docTokenLists.map(tl => tl.slice(0, 4).join(",")).join("|").slice(0, 128);
127
+ const cacheKey = `fp:${candidateFingerprint}`;
128
+ const cachedDf = dfCache.get(cacheKey);
129
+ const df = cachedDf ?? new Map();
130
+ // Compute DF for any terms not yet in cache
131
+ for (const term of terms) {
132
+ if (!df.has(term)) {
133
+ let count = 0;
134
+ for (const docSet of docTokenSets) {
135
+ if (docSet.has(term))
136
+ count++;
137
+ }
138
+ df.set(term, count);
139
+ }
140
+ }
141
+ if (!cachedDf) {
142
+ if (dfCache.size >= MAX_DF_CACHE_SIZE)
143
+ dfCache.delete(dfCache.keys().next().value ?? "");
144
+ dfCache.set(cacheKey, df);
145
+ }
146
+ function buildVector(tokens) {
147
+ const tf = new Map();
148
+ for (const t of tokens)
149
+ tf.set(t, (tf.get(t) ?? 0) + 1);
150
+ return terms.map(term => {
151
+ const termTf = (tf.get(term) ?? 0) / (tokens.length || 1);
152
+ const idf = Math.log((N + 1) / ((df.get(term) ?? 0) + 1)) + 1;
153
+ return termTf * idf;
154
+ });
155
+ }
156
+ function cosine(a, b) {
157
+ let dot = 0, normA = 0, normB = 0;
158
+ for (let i = 0; i < a.length; i++) {
159
+ dot += a[i] * b[i];
160
+ normA += a[i] * a[i];
161
+ normB += b[i] * b[i];
162
+ }
163
+ const denom = Math.sqrt(normA) * Math.sqrt(normB);
164
+ return denom === 0 ? 0 : dot / denom;
165
+ }
166
+ const queryVec = buildVector(queryTokens);
167
+ return docTokenLists.map(docTokens => cosine(queryVec, buildVector(docTokens)));
168
+ }
169
+ /**
170
+ * Cosine fallback search: when FTS5 returns fewer than COSINE_FALLBACK_THRESHOLD results,
171
+ * load all docs and rank by TF-IDF cosine similarity.
172
+ * Only activated when PHREN_FEATURE_HYBRID_SEARCH=1 and corpus size <= COSINE_MAX_CORPUS.
173
+ * Returns DocRow[] ranked by similarity (threshold > COSINE_SIMILARITY_MIN), excluding already-found rowids.
174
+ */
175
+ export function cosineFallback(db, query, excludeRowids, limit) {
176
+ // Feature flag guard — default ON; set PHREN_FEATURE_HYBRID_SEARCH=0 to disable
177
+ const flagVal = process.env[HYBRID_SEARCH_FLAG];
178
+ if (flagVal !== undefined && ["0", "false", "off", "no"].includes(flagVal.trim().toLowerCase())) {
179
+ return [];
180
+ }
181
+ // Count total docs to guard against large corpora
182
+ let totalDocs = 0;
183
+ let minRowid = 0;
184
+ let maxRowid = 0;
185
+ try {
186
+ const statsResult = db.exec("SELECT MIN(rowid), MAX(rowid), COUNT(*) FROM docs");
187
+ if (statsResult?.length && statsResult[0]?.values?.length) {
188
+ minRowid = Number(statsResult[0].values[0][0] ?? 0);
189
+ maxRowid = Number(statsResult[0].values[0][1] ?? 0);
190
+ totalDocs = Number(statsResult[0].values[0][2] ?? 0);
191
+ }
192
+ }
193
+ catch (err) {
194
+ if ((process.env.PHREN_DEBUG || process.env.PHREN_DEBUG))
195
+ process.stderr.write(`[phren] cosineFallback count: ${err instanceof Error ? err.message : String(err)}\n`);
196
+ return [];
197
+ }
198
+ if (totalDocs > COSINE_MAX_CORPUS) {
199
+ debugLog(`cosineFallback: corpus size ${totalDocs} exceeds ${COSINE_MAX_CORPUS}, skipping`);
200
+ return [];
201
+ }
202
+ // Load docs with candidate capping to bound memory usage.
203
+ // If corpus fits in cap, load all; otherwise use FTS5 keyword pre-filter to get relevant candidates.
204
+ let allRows = null;
205
+ try {
206
+ if (totalDocs <= COSINE_CANDIDATE_CAP) {
207
+ const results = db.exec("SELECT rowid, project, filename, type, content, path FROM docs");
208
+ if (!Array.isArray(results) || !results.length || !results[0]?.values?.length)
209
+ return [];
210
+ allRows = results[0].values;
211
+ }
212
+ else {
213
+ // Pre-filter: use FTS5 to get top candidates, then fill to cap with deterministic rowid windows
214
+ const safeQ = query.replace(/[^\w\s]/g, " ").trim().split(/\s+/).filter(w => w.length > 2).slice(0, 5).join(" OR ");
215
+ const ftsRows = [];
216
+ if (safeQ) {
217
+ try {
218
+ const ftsRes = db.exec(`SELECT rowid, project, filename, type, content, path FROM docs WHERE docs MATCH ? ORDER BY rank LIMIT ${COSINE_CANDIDATE_CAP}`, [safeQ]);
219
+ if (ftsRes?.length && ftsRes[0]?.values?.length)
220
+ ftsRows.push(...ftsRes[0].values);
221
+ }
222
+ catch (err) {
223
+ if ((process.env.PHREN_DEBUG || process.env.PHREN_DEBUG))
224
+ process.stderr.write(`[phren] cosineFallback FTS pre-filter: ${err instanceof Error ? err.message : String(err)}\n`);
225
+ }
226
+ }
227
+ // If FTS gave fewer than cap, supplement with deterministic rowid windows.
228
+ if (ftsRows.length < COSINE_CANDIDATE_CAP && totalDocs > 0 && maxRowid >= minRowid) {
229
+ const ftsRowIds = new Set(ftsRows.map(r => Number(r[0])));
230
+ const remaining = COSINE_CANDIDATE_CAP - ftsRows.length;
231
+ const span = Math.max(1, maxRowid - minRowid + 1);
232
+ const windowCount = Math.min(COSINE_WINDOW_COUNT, remaining);
233
+ const perWindow = Math.max(1, Math.ceil(remaining / Math.max(1, windowCount)));
234
+ const stride = Math.max(1, Math.floor(span / Math.max(1, windowCount)));
235
+ const seed = deterministicSeed(query);
236
+ const pushRows = (rows) => {
237
+ for (const row of rows) {
238
+ const rowid = Number(row[0]);
239
+ if (ftsRowIds.has(rowid))
240
+ continue;
241
+ ftsRowIds.add(rowid);
242
+ ftsRows.push(row);
243
+ if (ftsRows.length >= COSINE_CANDIDATE_CAP)
244
+ break;
245
+ }
246
+ };
247
+ try {
248
+ for (let i = 0; i < windowCount && ftsRows.length < COSINE_CANDIDATE_CAP; i++) {
249
+ const offset = (seed + i * stride) % span;
250
+ const startRowid = minRowid + offset;
251
+ pushRows(loadCosineFallbackWindow(db, startRowid, perWindow));
252
+ if (ftsRows.length >= COSINE_CANDIDATE_CAP)
253
+ break;
254
+ pushRows(loadCosineFallbackWindow(db, startRowid, perWindow, startRowid));
255
+ }
256
+ if (ftsRows.length < COSINE_CANDIDATE_CAP) {
257
+ pushRows(loadCosineFallbackWindow(db, minRowid, COSINE_CANDIDATE_CAP - ftsRows.length));
258
+ }
259
+ }
260
+ catch (err) {
261
+ if ((process.env.PHREN_DEBUG || process.env.PHREN_DEBUG))
262
+ process.stderr.write(`[phren] cosineFallback deterministicSample: ${err instanceof Error ? err.message : String(err)}\n`);
263
+ }
264
+ }
265
+ if (ftsRows.length === 0)
266
+ return [];
267
+ allRows = ftsRows;
268
+ debugLog(`cosineFallback: pre-filtered ${totalDocs} docs to ${allRows.length} candidates`);
269
+ }
270
+ }
271
+ catch (err) {
272
+ if ((process.env.PHREN_DEBUG || process.env.PHREN_DEBUG))
273
+ process.stderr.write(`[phren] cosineFallback loadDocs: ${err instanceof Error ? err.message : String(err)}\n`);
274
+ return [];
275
+ }
276
+ // Separate rowids, DocRows, and content strings for scoring
277
+ const docContents = [];
278
+ const docMeta = [];
279
+ for (const row of allRows ?? []) {
280
+ const { rowid, doc } = rowToDocWithRowid(row);
281
+ if (excludeRowids.has(rowid))
282
+ continue;
283
+ docContents.push(doc.content);
284
+ docMeta.push(doc);
285
+ }
286
+ if (docContents.length === 0)
287
+ return [];
288
+ // Pass totalDocs so IDF denominators reflect the full corpus, not just the candidate subset.
289
+ const scores = tfidfCosine(docContents, query, totalDocs);
290
+ // Collect scored results above threshold
291
+ const scored = [];
292
+ for (let i = 0; i < scores.length; i++) {
293
+ if (scores[i] > COSINE_SIMILARITY_MIN) {
294
+ scored.push({ score: scores[i], doc: docMeta[i] });
295
+ }
296
+ }
297
+ // Sort descending by score and return top-limit
298
+ scored.sort((a, b) => b.score - a.score);
299
+ return scored.slice(0, limit).map(s => s.doc);
300
+ }
301
+ /**
302
+ * Vector-based semantic search fallback using pre-computed Ollama embeddings.
303
+ * Only runs when Ollama is configured (PHREN_OLLAMA_URL is set or defaults).
304
+ * Returns DocRow[] sorted by cosine similarity, above 0.5 threshold.
305
+ */
306
+ export async function vectorFallback(phrenPath, query, excludePaths, limit, project) {
307
+ // Run when either Ollama or a cloud embedding endpoint is available
308
+ if (!getOllamaUrl() && !getCloudEmbeddingUrl())
309
+ return [];
310
+ const cache = getEmbeddingCache(phrenPath);
311
+ // Ensure the cache is loaded from disk — in hook subprocesses the singleton
312
+ // starts empty because load() is only called in the MCP server / CLI entry.
313
+ if (cache.size() === 0) {
314
+ try {
315
+ await cache.load();
316
+ }
317
+ catch (err) {
318
+ if ((process.env.PHREN_DEBUG || process.env.PHREN_DEBUG))
319
+ process.stderr.write(`[phren] vectorFallback cacheLoad: ${err instanceof Error ? err.message : String(err)}\n`);
320
+ }
321
+ }
322
+ if (cache.size() === 0)
323
+ return [];
324
+ const queryVec = await embedText(query);
325
+ if (!queryVec || queryVec.length === 0)
326
+ return [];
327
+ const model = getEmbeddingModel();
328
+ // Apply project scoping: when a project is detected, restrict vector results to that
329
+ // project and the global project to prevent cross-project memory injection.
330
+ const entries = cache.getAllEntries().filter(e => {
331
+ if (e.model !== model)
332
+ return false;
333
+ if (excludePaths.has(e.path))
334
+ return false;
335
+ if (project) {
336
+ // Allow global docs and docs from the active project
337
+ const entryProject = deriveVectorDocIdentity(phrenPath, e.path).project;
338
+ if (entryProject !== project && entryProject !== "global")
339
+ return false;
340
+ }
341
+ return true;
342
+ });
343
+ if (entries.length === 0)
344
+ return [];
345
+ const eligiblePaths = new Set(entries.map((entry) => entry.path));
346
+ const vectorIndex = getPersistentVectorIndex(phrenPath);
347
+ vectorIndex.ensure(cache.getAllEntries());
348
+ const indexedPaths = vectorIndex.query(model, queryVec, limit, eligiblePaths);
349
+ const candidatePaths = indexedPaths.length > 0 ? new Set(indexedPaths) : eligiblePaths;
350
+ const scored = entries
351
+ .filter((entry) => candidatePaths.has(entry.path))
352
+ .map(e => ({ path: e.path, score: cosineSimilarity(queryVec, e.vec) }))
353
+ .filter(e => e.score > 0.50)
354
+ .sort((a, b) => b.score - a.score)
355
+ .slice(0, limit);
356
+ return scored.map(e => {
357
+ const { project: entryProject, filename, relFile } = deriveVectorDocIdentity(phrenPath, e.path);
358
+ // Use the same path-aware classifyFile logic as the indexer so reference/skills/etc.
359
+ // get their correct type instead of always falling back to "other".
360
+ const type = classifyFile(filename, relFile);
361
+ // Hydrate and normalize content from disk with the same pipeline as the indexer.
362
+ let content = "";
363
+ try {
364
+ if (e.path && fs.existsSync(e.path)) {
365
+ const raw = fs.readFileSync(e.path, "utf-8");
366
+ content = normalizeIndexedContent(raw, type, phrenPath, 10000);
367
+ }
368
+ }
369
+ catch (err) {
370
+ if ((process.env.PHREN_DEBUG || process.env.PHREN_DEBUG))
371
+ process.stderr.write(`[phren] vectorFallback fileRead: ${err instanceof Error ? err.message : String(err)}\n`);
372
+ }
373
+ return { project: entryProject, filename, type, content, path: e.path };
374
+ });
375
+ }
@@ -0,0 +1,42 @@
1
+ import * as fs from "fs";
2
+ import * as path from "path";
3
+ import { fileURLToPath } from "url";
4
+ import { createRequire } from "module";
5
+ const require = createRequire(import.meta.url);
6
+ /**
7
+ * Locate the sql.js-fts5 WASM binary by require.resolve with path-probe fallback.
8
+ * Shared between shared-index.ts and embedding.ts to avoid duplication.
9
+ */
10
+ function findWasmBinary() {
11
+ try {
12
+ const resolved = require.resolve("sql.js-fts5/dist/sql-wasm.wasm");
13
+ if (fs.existsSync(resolved))
14
+ return fs.readFileSync(resolved);
15
+ }
16
+ catch (err) {
17
+ if ((process.env.PHREN_DEBUG || process.env.PHREN_DEBUG))
18
+ process.stderr.write(`[phren] findWasmBinary requireResolve: ${err instanceof Error ? err.message : String(err)}\n`);
19
+ // fall through to path probing
20
+ }
21
+ const __filename = fileURLToPath(import.meta.url);
22
+ let dir = path.dirname(__filename);
23
+ for (let i = 0; i < 5; i++) {
24
+ const candidateA = path.join(dir, "node_modules", "sql.js-fts5", "dist", "sql-wasm.wasm");
25
+ if (fs.existsSync(candidateA))
26
+ return fs.readFileSync(candidateA);
27
+ const candidateB = path.join(dir, "sql.js-fts5", "dist", "sql-wasm.wasm");
28
+ if (fs.existsSync(candidateB))
29
+ return fs.readFileSync(candidateB);
30
+ dir = path.dirname(dir);
31
+ }
32
+ return undefined;
33
+ }
34
+ const _initSqlJs = require("sql.js-fts5");
35
+ /**
36
+ * Bootstrap sql.js-fts5: find the WASM binary and initialise the library.
37
+ * Shared across shared-index.ts and embedding.ts to avoid duplication.
38
+ */
39
+ export async function bootstrapSqlJs() {
40
+ const wasmBinary = findWasmBinary();
41
+ return _initSqlJs(wasmBinary ? { wasmBinary } : {});
42
+ }
@@ -0,0 +1,171 @@
1
+ /**
2
+ * Porter stemmer implementation for English words.
3
+ * Based on the Porter (1980) algorithm.
4
+ */
5
+ export function porterStem(word) {
6
+ if (word.length <= 2)
7
+ return word;
8
+ function isConsonant(w, i) {
9
+ const c = w[i];
10
+ if (c === 'a' || c === 'e' || c === 'i' || c === 'o' || c === 'u')
11
+ return false;
12
+ if (c === 'y')
13
+ return i === 0 ? true : !isConsonant(w, i - 1);
14
+ return true;
15
+ }
16
+ function measure(stem) {
17
+ if (stem.length === 0)
18
+ return 0;
19
+ let m = 0;
20
+ let i = 0;
21
+ // skip initial consonants
22
+ while (i < stem.length && isConsonant(stem, i))
23
+ i++;
24
+ while (i < stem.length) {
25
+ // count vowel sequence
26
+ while (i < stem.length && !isConsonant(stem, i))
27
+ i++;
28
+ if (i >= stem.length)
29
+ break;
30
+ m++;
31
+ // count consonant sequence
32
+ while (i < stem.length && isConsonant(stem, i))
33
+ i++;
34
+ }
35
+ return m;
36
+ }
37
+ function hasVowel(stem) {
38
+ for (let i = 0; i < stem.length; i++) {
39
+ if (!isConsonant(stem, i))
40
+ return true;
41
+ }
42
+ return false;
43
+ }
44
+ function endsDoubleConsonant(w) {
45
+ if (w.length < 2)
46
+ return false;
47
+ return w[w.length - 1] === w[w.length - 2] && isConsonant(w, w.length - 1);
48
+ }
49
+ function endsCVC(w) {
50
+ if (w.length < 3)
51
+ return false;
52
+ const l = w.length;
53
+ if (!isConsonant(w, l - 1) || isConsonant(w, l - 2) || !isConsonant(w, l - 3))
54
+ return false;
55
+ const last = w[l - 1];
56
+ return last !== 'w' && last !== 'x' && last !== 'y';
57
+ }
58
+ function endsWith(w, suffix) {
59
+ if (w.length < suffix.length)
60
+ return null;
61
+ if (w.endsWith(suffix))
62
+ return w.slice(0, -suffix.length);
63
+ return null;
64
+ }
65
+ let w = word;
66
+ // Step 1a
67
+ if (w.endsWith("sses")) {
68
+ w = w.slice(0, -2);
69
+ }
70
+ else if (w.endsWith("ies")) {
71
+ w = w.slice(0, -2);
72
+ }
73
+ else if (!w.endsWith("ss") && w.endsWith("s") && w.length > 2) {
74
+ w = w.slice(0, -1);
75
+ }
76
+ // Step 1b
77
+ let step1bExtra = false;
78
+ if (w.endsWith("eed")) {
79
+ const stem = w.slice(0, -3);
80
+ if (measure(stem) > 0)
81
+ w = w.slice(0, -1); // eed -> ee
82
+ }
83
+ else {
84
+ let stemFound = null;
85
+ if (w.endsWith("ed")) {
86
+ stemFound = w.slice(0, -2);
87
+ }
88
+ else if (w.endsWith("ing")) {
89
+ stemFound = w.slice(0, -3);
90
+ }
91
+ if (stemFound !== null && hasVowel(stemFound)) {
92
+ w = stemFound;
93
+ step1bExtra = true;
94
+ }
95
+ }
96
+ if (step1bExtra) {
97
+ if (w.endsWith("at") || w.endsWith("bl") || w.endsWith("iz")) {
98
+ w += "e";
99
+ }
100
+ else if (endsDoubleConsonant(w) && !w.endsWith("l") && !w.endsWith("s") && !w.endsWith("z")) {
101
+ w = w.slice(0, -1);
102
+ }
103
+ else if (measure(w) === 1 && endsCVC(w)) {
104
+ w += "e";
105
+ }
106
+ }
107
+ // Step 1c
108
+ if (w.endsWith("y") && w.length > 2 && hasVowel(w.slice(0, -1))) {
109
+ w = w.slice(0, -1) + "i";
110
+ }
111
+ // Step 2
112
+ const step2Map = {
113
+ ational: "ate", tional: "tion", enci: "ence", anci: "ance",
114
+ izer: "ize", abli: "able", alli: "al", entli: "ent", eli: "e",
115
+ ousli: "ous", ization: "ize", ation: "ate", ator: "ate",
116
+ alism: "al", iveness: "ive", fulness: "ful", ousness: "ous",
117
+ aliti: "al", iviti: "ive", biliti: "ble",
118
+ };
119
+ for (const [suffix, replacement] of Object.entries(step2Map)) {
120
+ const stem = endsWith(w, suffix);
121
+ if (stem !== null && measure(stem) > 0) {
122
+ w = stem + replacement;
123
+ break;
124
+ }
125
+ }
126
+ // Step 3
127
+ const step3Map = {
128
+ icate: "ic", ative: "", iciti: "ic",
129
+ ical: "ic", ful: "", ness: "",
130
+ };
131
+ for (const [suffix, replacement] of Object.entries(step3Map)) {
132
+ const stem = endsWith(w, suffix);
133
+ if (stem !== null && measure(stem) > 0) {
134
+ w = stem + replacement;
135
+ break;
136
+ }
137
+ }
138
+ // Step 4
139
+ const step4Suffixes = [
140
+ "al", "ance", "ence", "er", "ic", "able", "ible", "ant",
141
+ "ement", "ment", "ent", "ion", "ou", "ism", "ate", "iti",
142
+ "ous", "ive", "ize",
143
+ ];
144
+ for (const suffix of step4Suffixes) {
145
+ const stem = endsWith(w, suffix);
146
+ if (stem !== null && measure(stem) > 1) {
147
+ if (suffix === "ion") {
148
+ if (stem.endsWith("s") || stem.endsWith("t")) {
149
+ w = stem;
150
+ }
151
+ }
152
+ else {
153
+ w = stem;
154
+ }
155
+ break;
156
+ }
157
+ }
158
+ // Step 5a
159
+ if (w.endsWith("e")) {
160
+ const stem = w.slice(0, -1);
161
+ const m = measure(stem);
162
+ if (m > 1 || (m === 1 && !endsCVC(stem))) {
163
+ w = stem;
164
+ }
165
+ }
166
+ // Step 5b
167
+ if (measure(w) > 1 && endsDoubleConsonant(w) && w.endsWith("l")) {
168
+ w = w.slice(0, -1);
169
+ }
170
+ return w;
171
+ }