@phren/cli 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +590 -0
  3. package/mcp/dist/capabilities/cli.js +61 -0
  4. package/mcp/dist/capabilities/index.js +15 -0
  5. package/mcp/dist/capabilities/mcp.js +61 -0
  6. package/mcp/dist/capabilities/types.js +57 -0
  7. package/mcp/dist/capabilities/vscode.js +61 -0
  8. package/mcp/dist/capabilities/web-ui.js +61 -0
  9. package/mcp/dist/cli-actions.js +302 -0
  10. package/mcp/dist/cli-config.js +580 -0
  11. package/mcp/dist/cli-extract.js +305 -0
  12. package/mcp/dist/cli-govern.js +371 -0
  13. package/mcp/dist/cli-graph.js +169 -0
  14. package/mcp/dist/cli-hooks-citations.js +44 -0
  15. package/mcp/dist/cli-hooks-context.js +56 -0
  16. package/mcp/dist/cli-hooks-globs.js +83 -0
  17. package/mcp/dist/cli-hooks-output.js +130 -0
  18. package/mcp/dist/cli-hooks-retrieval.js +2 -0
  19. package/mcp/dist/cli-hooks-session.js +1402 -0
  20. package/mcp/dist/cli-hooks.js +350 -0
  21. package/mcp/dist/cli-namespaces.js +989 -0
  22. package/mcp/dist/cli-ops.js +253 -0
  23. package/mcp/dist/cli-search.js +407 -0
  24. package/mcp/dist/cli.js +108 -0
  25. package/mcp/dist/content-archive.js +278 -0
  26. package/mcp/dist/content-citation.js +391 -0
  27. package/mcp/dist/content-dedup.js +622 -0
  28. package/mcp/dist/content-learning.js +472 -0
  29. package/mcp/dist/content-metadata.js +186 -0
  30. package/mcp/dist/content-validate.js +462 -0
  31. package/mcp/dist/core-finding.js +54 -0
  32. package/mcp/dist/core-project.js +36 -0
  33. package/mcp/dist/core-search.js +50 -0
  34. package/mcp/dist/data-access.js +400 -0
  35. package/mcp/dist/data-tasks.js +821 -0
  36. package/mcp/dist/embedding.js +344 -0
  37. package/mcp/dist/entrypoint.js +387 -0
  38. package/mcp/dist/finding-context.js +172 -0
  39. package/mcp/dist/finding-impact.js +181 -0
  40. package/mcp/dist/finding-journal.js +122 -0
  41. package/mcp/dist/finding-lifecycle.js +259 -0
  42. package/mcp/dist/governance-audit.js +22 -0
  43. package/mcp/dist/governance-locks.js +96 -0
  44. package/mcp/dist/governance-policy.js +648 -0
  45. package/mcp/dist/governance-scores.js +355 -0
  46. package/mcp/dist/hooks.js +449 -0
  47. package/mcp/dist/impact-scoring.js +22 -0
  48. package/mcp/dist/index-query.js +168 -0
  49. package/mcp/dist/index.js +205 -0
  50. package/mcp/dist/init-config.js +336 -0
  51. package/mcp/dist/init-preferences.js +62 -0
  52. package/mcp/dist/init-setup.js +1305 -0
  53. package/mcp/dist/init-shared.js +29 -0
  54. package/mcp/dist/init.js +1730 -0
  55. package/mcp/dist/link-checksums.js +62 -0
  56. package/mcp/dist/link-context.js +257 -0
  57. package/mcp/dist/link-doctor.js +591 -0
  58. package/mcp/dist/link-skills.js +212 -0
  59. package/mcp/dist/link.js +596 -0
  60. package/mcp/dist/logger.js +15 -0
  61. package/mcp/dist/machine-identity.js +38 -0
  62. package/mcp/dist/mcp-config.js +254 -0
  63. package/mcp/dist/mcp-data.js +315 -0
  64. package/mcp/dist/mcp-extract-facts.js +78 -0
  65. package/mcp/dist/mcp-extract.js +133 -0
  66. package/mcp/dist/mcp-finding.js +557 -0
  67. package/mcp/dist/mcp-graph.js +339 -0
  68. package/mcp/dist/mcp-hooks.js +256 -0
  69. package/mcp/dist/mcp-memory.js +58 -0
  70. package/mcp/dist/mcp-ops.js +328 -0
  71. package/mcp/dist/mcp-search.js +628 -0
  72. package/mcp/dist/mcp-session.js +651 -0
  73. package/mcp/dist/mcp-skills.js +189 -0
  74. package/mcp/dist/mcp-tasks.js +551 -0
  75. package/mcp/dist/mcp-types.js +7 -0
  76. package/mcp/dist/memory-ui-assets.js +6 -0
  77. package/mcp/dist/memory-ui-data.js +513 -0
  78. package/mcp/dist/memory-ui-graph.js +1910 -0
  79. package/mcp/dist/memory-ui-page.js +353 -0
  80. package/mcp/dist/memory-ui-scripts.js +1387 -0
  81. package/mcp/dist/memory-ui-server.js +1218 -0
  82. package/mcp/dist/memory-ui-styles.js +555 -0
  83. package/mcp/dist/memory-ui.js +9 -0
  84. package/mcp/dist/package-metadata.js +13 -0
  85. package/mcp/dist/phren-art.js +52 -0
  86. package/mcp/dist/phren-core.js +108 -0
  87. package/mcp/dist/phren-dotenv.js +67 -0
  88. package/mcp/dist/phren-paths.js +476 -0
  89. package/mcp/dist/proactivity.js +172 -0
  90. package/mcp/dist/profile-store.js +228 -0
  91. package/mcp/dist/project-config.js +85 -0
  92. package/mcp/dist/project-locator.js +25 -0
  93. package/mcp/dist/project-topics.js +1134 -0
  94. package/mcp/dist/provider-adapters.js +176 -0
  95. package/mcp/dist/runtime-profile.js +18 -0
  96. package/mcp/dist/session-checkpoints.js +131 -0
  97. package/mcp/dist/session-utils.js +68 -0
  98. package/mcp/dist/shared-content.js +8 -0
  99. package/mcp/dist/shared-embedding-cache.js +143 -0
  100. package/mcp/dist/shared-fragment-graph.js +456 -0
  101. package/mcp/dist/shared-governance.js +4 -0
  102. package/mcp/dist/shared-index.js +1334 -0
  103. package/mcp/dist/shared-ollama.js +192 -0
  104. package/mcp/dist/shared-paths.js +1 -0
  105. package/mcp/dist/shared-retrieval.js +796 -0
  106. package/mcp/dist/shared-search-fallback.js +375 -0
  107. package/mcp/dist/shared-sqljs.js +42 -0
  108. package/mcp/dist/shared-stemmer.js +171 -0
  109. package/mcp/dist/shared-vector-index.js +199 -0
  110. package/mcp/dist/shared.js +114 -0
  111. package/mcp/dist/shell-entry.js +209 -0
  112. package/mcp/dist/shell-input.js +943 -0
  113. package/mcp/dist/shell-palette.js +119 -0
  114. package/mcp/dist/shell-render.js +252 -0
  115. package/mcp/dist/shell-state-store.js +81 -0
  116. package/mcp/dist/shell-types.js +13 -0
  117. package/mcp/dist/shell-view-list.js +14 -0
  118. package/mcp/dist/shell-view.js +707 -0
  119. package/mcp/dist/shell.js +352 -0
  120. package/mcp/dist/skill-files.js +117 -0
  121. package/mcp/dist/skill-registry.js +279 -0
  122. package/mcp/dist/skill-state.js +28 -0
  123. package/mcp/dist/startup-embedding.js +57 -0
  124. package/mcp/dist/status.js +323 -0
  125. package/mcp/dist/synonyms.json +670 -0
  126. package/mcp/dist/task-hygiene.js +251 -0
  127. package/mcp/dist/task-lifecycle.js +347 -0
  128. package/mcp/dist/tasks-github.js +76 -0
  129. package/mcp/dist/telemetry.js +165 -0
  130. package/mcp/dist/test-global-setup.js +37 -0
  131. package/mcp/dist/tool-registry.js +104 -0
  132. package/mcp/dist/update.js +97 -0
  133. package/mcp/dist/utils.js +543 -0
  134. package/package.json +67 -0
  135. package/skills/README.md +7 -0
  136. package/skills/consolidate/SKILL.md +152 -0
  137. package/skills/discover/SKILL.md +175 -0
  138. package/skills/init/SKILL.md +216 -0
  139. package/skills/profiles/SKILL.md +121 -0
  140. package/skills/sync/SKILL.md +261 -0
  141. package/starter/README.md +74 -0
  142. package/starter/global/CLAUDE.md +89 -0
  143. package/starter/global/skills/humanize.md +30 -0
  144. package/starter/global/skills/pipeline.md +35 -0
  145. package/starter/global/skills/release.md +35 -0
  146. package/starter/machines.yaml +8 -0
  147. package/starter/my-api/.claude/skills/README.md +7 -0
  148. package/starter/my-api/CLAUDE.md +33 -0
  149. package/starter/my-api/FINDINGS.md +9 -0
  150. package/starter/my-api/summary.md +7 -0
  151. package/starter/my-api/tasks.md +7 -0
  152. package/starter/my-first-project/.claude/skills/README.md +7 -0
  153. package/starter/my-first-project/CLAUDE.md +49 -0
  154. package/starter/my-first-project/FINDINGS.md +24 -0
  155. package/starter/my-first-project/summary.md +11 -0
  156. package/starter/my-first-project/tasks.md +25 -0
  157. package/starter/my-frontend/.claude/skills/README.md +7 -0
  158. package/starter/my-frontend/CLAUDE.md +33 -0
  159. package/starter/my-frontend/FINDINGS.md +9 -0
  160. package/starter/my-frontend/summary.md +7 -0
  161. package/starter/my-frontend/tasks.md +7 -0
  162. package/starter/profiles/default.yaml +4 -0
  163. package/starter/profiles/personal.yaml +4 -0
  164. package/starter/profiles/work.yaml +4 -0
  165. package/starter/templates/README.md +7 -0
  166. package/starter/templates/frontend/CLAUDE.md +23 -0
  167. package/starter/templates/frontend/FINDINGS.md +7 -0
  168. package/starter/templates/frontend/reference/README.md +4 -0
  169. package/starter/templates/frontend/summary.md +7 -0
  170. package/starter/templates/frontend/tasks.md +11 -0
  171. package/starter/templates/library/CLAUDE.md +22 -0
  172. package/starter/templates/library/FINDINGS.md +7 -0
  173. package/starter/templates/library/reference/README.md +4 -0
  174. package/starter/templates/library/summary.md +7 -0
  175. package/starter/templates/library/tasks.md +11 -0
  176. package/starter/templates/monorepo/CLAUDE.md +21 -0
  177. package/starter/templates/monorepo/FINDINGS.md +7 -0
  178. package/starter/templates/monorepo/reference/README.md +4 -0
  179. package/starter/templates/monorepo/summary.md +7 -0
  180. package/starter/templates/monorepo/tasks.md +11 -0
  181. package/starter/templates/python-project/CLAUDE.md +21 -0
  182. package/starter/templates/python-project/FINDINGS.md +7 -0
  183. package/starter/templates/python-project/reference/README.md +4 -0
  184. package/starter/templates/python-project/summary.md +7 -0
  185. package/starter/templates/python-project/tasks.md +10 -0
@@ -0,0 +1,622 @@
1
+ import * as fs from "fs";
2
+ import * as path from "path";
3
+ import * as crypto from "crypto";
4
+ import { debugLog, runtimeFile, KNOWN_OBSERVATION_TAGS } from "./shared.js";
5
+ import { isFeatureEnabled, safeProjectPath } from "./utils.js";
6
+ import { UNIVERSAL_TECH_TERMS_RE, EXTRA_ENTITY_PATTERNS } from "./phren-core.js";
7
+ import { isInactiveFindingLine } from "./finding-lifecycle.js";
8
+ // ── LLM provider abstraction ────────────────────────────────────────────────
9
+ const MAX_CACHE_ENTRIES = 500;
10
+ function loadCache(cachePath) {
11
+ if (!fs.existsSync(cachePath))
12
+ return {};
13
+ const raw = JSON.parse(fs.readFileSync(cachePath, "utf8"));
14
+ const now = Date.now();
15
+ const normalized = {};
16
+ for (const [key, entry] of Object.entries(raw)) {
17
+ const ts = typeof entry.ts === "number" ? entry.ts : now;
18
+ normalized[key] = { result: entry.result, ts };
19
+ }
20
+ return normalized;
21
+ }
22
+ function trimCache(cache) {
23
+ const entries = Object.entries(cache);
24
+ if (entries.length <= MAX_CACHE_ENTRIES)
25
+ return;
26
+ entries
27
+ .sort(([, a], [, b]) => a.ts - b.ts)
28
+ .slice(0, entries.length - MAX_CACHE_ENTRIES)
29
+ .forEach(([key]) => {
30
+ delete cache[key];
31
+ });
32
+ }
33
+ function persistCache(cachePath, cache) {
34
+ trimCache(cache);
35
+ fs.writeFileSync(cachePath, JSON.stringify(cache));
36
+ }
37
+ /**
38
+ * Generic cache-through helper: load cache → check TTL → touch timestamp → persist → return.
39
+ * If the key is cached and within TTL, returns the cached result.
40
+ * Otherwise, calls `compute()` to produce a fresh result, caches it, and returns it.
41
+ */
42
+ async function withCache(cachePath, key, ttlMs, compute) {
43
+ // Check cache
44
+ try {
45
+ const cache = loadCache(cachePath);
46
+ if (cache[key] && Date.now() - cache[key].ts < ttlMs) {
47
+ cache[key].ts = Date.now();
48
+ persistCache(cachePath, cache);
49
+ return cache[key].result;
50
+ }
51
+ }
52
+ catch (err) {
53
+ if ((process.env.PHREN_DEBUG || process.env.PHREN_DEBUG))
54
+ process.stderr.write(`[phren] withCache load (${path.basename(cachePath)}): ${err instanceof Error ? err.message : String(err)}\n`);
55
+ }
56
+ const result = await compute();
57
+ // Persist result
58
+ try {
59
+ const cache = loadCache(cachePath);
60
+ cache[key] = { result, ts: Date.now() };
61
+ persistCache(cachePath, cache);
62
+ }
63
+ catch (err) {
64
+ if ((process.env.PHREN_DEBUG || process.env.PHREN_DEBUG))
65
+ process.stderr.write(`[phren] withCache persist (${path.basename(cachePath)}): ${err instanceof Error ? err.message : String(err)}\n`);
66
+ }
67
+ return result;
68
+ }
69
+ function isAbortError(error) {
70
+ return error instanceof Error && error.name === "AbortError";
71
+ }
72
+ const LLM_TIMEOUT_MS = 10_000;
73
+ function parseOpenAiResponse(data) {
74
+ const d = data;
75
+ return d.choices?.[0]?.message?.content?.trim() ?? "";
76
+ }
77
+ /** POST to an LLM endpoint with a combined per-call timeout + parent abort relay. */
78
+ async function fetchLlm(url, init, signal, parseResponse) {
79
+ const controller = new AbortController();
80
+ const timeoutId = setTimeout(() => controller.abort(), LLM_TIMEOUT_MS);
81
+ if (signal)
82
+ signal.addEventListener("abort", () => controller.abort(), { once: true });
83
+ let response;
84
+ try {
85
+ response = await fetch(url, { ...init, signal: controller.signal });
86
+ }
87
+ finally {
88
+ clearTimeout(timeoutId);
89
+ }
90
+ if (!response.ok)
91
+ throw new Error(`LLM API error: ${response.status}`);
92
+ return parseResponse(await response.json());
93
+ }
94
+ // Default maxTokens is 10 — callers that only need YES/NO or CONFLICT/OK responses
95
+ // need just 3-5 tokens. Callers expecting longer output pass an explicit override (e.g. 60).
96
+ export async function callLlm(prompt, signal, maxTokens = 10) {
97
+ // Check abort before starting any work to avoid unnecessary API calls
98
+ if (signal?.aborted)
99
+ throw new DOMException("Aborted", "AbortError");
100
+ const endpoint = (process.env.PHREN_LLM_ENDPOINT);
101
+ const customKey = (process.env.PHREN_LLM_KEY);
102
+ const anthropicKey = process.env.ANTHROPIC_API_KEY;
103
+ const openaiKey = process.env.OPENAI_API_KEY;
104
+ const model = (process.env.PHREN_LLM_MODEL);
105
+ if (endpoint) {
106
+ // Custom endpoint: use PHREN_LLM_KEY, fall back to any available key
107
+ const key = customKey || openaiKey || anthropicKey || "";
108
+ return fetchLlm(`${endpoint.replace(/\/$/, "")}/chat/completions`, {
109
+ method: "POST",
110
+ headers: { "Content-Type": "application/json", ...(key ? { Authorization: `Bearer ${key}` } : {}) },
111
+ body: JSON.stringify({ model: model || "gpt-4o-mini", messages: [{ role: "user", content: prompt }], max_tokens: maxTokens, temperature: 0 }),
112
+ }, signal, parseOpenAiResponse);
113
+ }
114
+ else if (anthropicKey) {
115
+ // Anthropic REST API fallback (no SDK required)
116
+ return fetchLlm("https://api.anthropic.com/v1/messages", {
117
+ method: "POST",
118
+ headers: { "content-type": "application/json", "x-api-key": anthropicKey, "anthropic-version": "2023-06-01" },
119
+ body: JSON.stringify({ model: model || "claude-haiku-4-5-20251001", max_tokens: maxTokens, messages: [{ role: "user", content: prompt }] }),
120
+ }, signal, (data) => {
121
+ const d = data;
122
+ const block = d.content?.[0];
123
+ return (block?.type === "text" ? block.text ?? "" : "").trim();
124
+ });
125
+ }
126
+ else if (openaiKey) {
127
+ // OpenAI REST API fallback
128
+ return fetchLlm("https://api.openai.com/v1/chat/completions", {
129
+ method: "POST",
130
+ headers: { "Content-Type": "application/json", Authorization: `Bearer ${openaiKey}` },
131
+ body: JSON.stringify({ model: model || "gpt-4o-mini", messages: [{ role: "user", content: prompt }], max_tokens: maxTokens, temperature: 0 }),
132
+ }, signal, parseOpenAiResponse);
133
+ }
134
+ else {
135
+ // No LLM configured — return empty to signal "not duplicate" / "no conflict"
136
+ return "";
137
+ }
138
+ }
139
+ // ── Cache TTL constants ───────────────────────────────────────────────────────
140
+ const DEDUP_CACHE_TTL_MS = 86_400_000; // 1 day
141
+ const CONFLICT_CACHE_TTL_MS = 7 * 86_400_000; // 7 days
142
+ // ── Metadata stripping helpers ────────────────────────────────────────────────
143
+ /**
144
+ * Strip HTML comments only (timestamp metadata, citations).
145
+ * Use this when you only need to remove <!-- ... --> markers.
146
+ */
147
+ function stripHtmlComments(s) {
148
+ return s.replace(/<!--.*?-->/gs, "");
149
+ }
150
+ /**
151
+ * Strip all common finding metadata:
152
+ * - HTML comments: <!-- ... -->
153
+ * - "migrated from" annotations: (migrated from ...)
154
+ * - Leading bullet dash: "- " at the start of the string
155
+ */
156
+ export function stripMetadata(s) {
157
+ return s
158
+ .replace(/<!--.*?-->/gs, "")
159
+ .replace(/\(migrated from [^)]+\)/gi, "")
160
+ .replace(/^-\s+/, "");
161
+ }
162
+ // Stop words for lightweight semantic overlap checks
163
+ const DEDUP_STOP_WORDS = new Set([
164
+ "the", "a", "an", "is", "are", "was", "were", "in", "on", "at", "to", "for",
165
+ "of", "and", "or", "but", "not", "with", "from", "by", "as", "it", "its",
166
+ "this", "that", "be", "has", "have", "had", "will", "would", "can", "could", "should",
167
+ ]);
168
+ export function jaccardTokenize(text) {
169
+ return new Set(text.toLowerCase()
170
+ .split(/[\s\W]+/)
171
+ .filter(w => w.length > 0 && !DEDUP_STOP_WORDS.has(w)));
172
+ }
173
+ export function jaccardSimilarity(a, b) {
174
+ if (a.size === 0 && b.size === 0)
175
+ return 1;
176
+ let intersection = 0;
177
+ for (const w of a) {
178
+ if (b.has(w))
179
+ intersection++;
180
+ }
181
+ const union = a.size + b.size - intersection;
182
+ return union === 0 ? 0 : intersection / union;
183
+ }
184
+ // ── Contradiction detection ───────────────────────────────────────────────────
185
+ // Use the shared universal starter set. Framework/tool specifics are learned
186
+ // dynamically per project via extractDynamicEntities().
187
+ const PROSE_ENTITY_RE = UNIVERSAL_TECH_TERMS_RE;
188
+ const POSITIVE_RE = /\b(always|prefer|should|must|works|recommend|enable)\b/i;
189
+ const NEGATIVE_RE = /\b(never|avoid|don't|do not|shouldn't|must not|broken|deprecated|disable)\b/i;
190
+ // ── Dynamic entity extraction ─────────────────────────────────────────────────
191
+ const ENTITY_CACHE_TTL_MS = 60 * 60 * 1000; // 1 hour
192
+ // Patterns that suggest a token is a proper noun / tool name:
193
+ // - CamelCase word (at least one interior uppercase): PhotonMappingEngine, GameKit
194
+ // - All-caps acronym of 2–8 letters: AWS, GPU, API
195
+ // - Known suffix patterns: *.js, *Engine, *API, *SDK, *DB, *UI, *ML
196
+ const DYNAMIC_ENTITY_RE = /\b(?:[A-Z][a-z]+(?:[A-Z][a-z]*)+|[A-Z]{2,8}|[A-Z][a-z]+(?:Engine|API|SDK|DB|UI|ML|IO|OS|JS|TS|CLI|MCP|GL|VR|AR|AI|NN|GAN))\b/g;
197
+ /**
198
+ * Scan existing findings for proper nouns / tool names that appear in 2+ bullets.
199
+ * Results are cached in .runtime/project-entities-{project}.json (1h TTL or
200
+ * invalidated when FINDINGS.md changes).
201
+ */
202
+ export function extractDynamicEntities(phrenPath, project) {
203
+ try {
204
+ const findingsPath = path.join(phrenPath, project, "FINDINGS.md");
205
+ if (!fs.existsSync(findingsPath))
206
+ return new Set();
207
+ const findingsStat = fs.statSync(findingsPath);
208
+ const findingsMtime = findingsStat.mtimeMs;
209
+ const cachePath = runtimeFile(phrenPath, `project-entities-${project}.json`);
210
+ // Try reading existing cache
211
+ if (fs.existsSync(cachePath)) {
212
+ try {
213
+ const cached = JSON.parse(fs.readFileSync(cachePath, "utf8"));
214
+ const age = Date.now() - (cached.builtAt ?? 0);
215
+ if (age < ENTITY_CACHE_TTL_MS && cached.findingsMtimeMs === findingsMtime) {
216
+ return new Set(cached.entities);
217
+ }
218
+ }
219
+ catch {
220
+ // fall through to rebuild
221
+ }
222
+ }
223
+ // Rebuild: scan bullets for candidate tokens
224
+ const content = fs.readFileSync(findingsPath, "utf8");
225
+ const bullets = content.split("\n").filter(l => l.startsWith("- ") && !isInactiveFindingLine(l));
226
+ // Count occurrences of each candidate across bullets
227
+ const counts = new Map();
228
+ for (const bullet of bullets) {
229
+ const stripped = bullet.replace(/<!--.*?-->/g, "").replace(/^-\s+/, "");
230
+ const seen = new Set();
231
+ let m;
232
+ const re = new RegExp(DYNAMIC_ENTITY_RE.source, DYNAMIC_ENTITY_RE.flags);
233
+ while ((m = re.exec(stripped)) !== null) {
234
+ const token = m[0];
235
+ if (!seen.has(token)) {
236
+ seen.add(token);
237
+ counts.set(token, (counts.get(token) ?? 0) + 1);
238
+ }
239
+ }
240
+ }
241
+ // Keep tokens that appear in 2+ distinct bullets
242
+ const entities = [...counts.entries()]
243
+ .filter(([, n]) => n >= 2)
244
+ .map(([token]) => token.toLowerCase());
245
+ // Write cache
246
+ const cacheEntry = { entities, builtAt: Date.now(), findingsMtimeMs: findingsMtime };
247
+ fs.writeFileSync(cachePath, JSON.stringify(cacheEntry));
248
+ return new Set(entities);
249
+ }
250
+ catch {
251
+ return new Set();
252
+ }
253
+ }
254
+ function extractProseEntities(text, dynamicEntities) {
255
+ const found = new Set();
256
+ const re = new RegExp(PROSE_ENTITY_RE.source, PROSE_ENTITY_RE.flags);
257
+ let m;
258
+ while ((m = re.exec(text)) !== null)
259
+ found.add(m[0].toLowerCase());
260
+ // Match additional entity patterns (versions, env keys, file paths, error codes, dates)
261
+ for (const { re: pattern } of EXTRA_ENTITY_PATTERNS) {
262
+ const pRe = new RegExp(pattern.source, pattern.flags);
263
+ let pm;
264
+ while ((pm = pRe.exec(text)) !== null)
265
+ found.add(pm[0].toLowerCase());
266
+ }
267
+ if (dynamicEntities) {
268
+ // Also check whether any dynamic entity appears (case-insensitive word match)
269
+ for (const entity of dynamicEntities) {
270
+ const escaped = entity.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
271
+ if (new RegExp(`\\b${escaped}\\b`, "i").test(text)) {
272
+ found.add(entity);
273
+ }
274
+ }
275
+ }
276
+ return [...found];
277
+ }
278
+ function learningPolarity(text) {
279
+ const hasPos = POSITIVE_RE.test(text);
280
+ const hasNeg = NEGATIVE_RE.test(text);
281
+ if (hasPos && !hasNeg)
282
+ return "positive";
283
+ if (hasNeg && !hasPos)
284
+ return "negative";
285
+ return "neutral";
286
+ }
287
+ /** Returns existing learning lines that appear to conflict with newFinding. */
288
+ export function detectConflicts(newFinding, existingLines, dynamicEntities) {
289
+ const newEntities = extractProseEntities(newFinding, dynamicEntities);
290
+ if (newEntities.length === 0)
291
+ return [];
292
+ const newPol = learningPolarity(newFinding);
293
+ if (newPol === "neutral")
294
+ return [];
295
+ const conflicts = [];
296
+ for (const line of existingLines) {
297
+ if (!line.startsWith("- "))
298
+ continue;
299
+ const lineEntities = extractProseEntities(line, dynamicEntities);
300
+ const shared = lineEntities.filter((e) => newEntities.includes(e));
301
+ if (shared.length === 0)
302
+ continue;
303
+ const linePol = learningPolarity(line);
304
+ if (linePol !== "neutral" && linePol !== newPol) {
305
+ conflicts.push(line);
306
+ }
307
+ }
308
+ return conflicts;
309
+ }
310
+ export function isDuplicateFinding(existingContent, newLearning, threshold = 0.6) {
311
+ const normalize = (text) => {
312
+ return stripHtmlComments(text).trim()
313
+ .toLowerCase()
314
+ .replace(/[^a-z0-9\s]/g, " ")
315
+ .split(/\s+/)
316
+ .filter(w => w.length > 2 && !DEDUP_STOP_WORDS.has(w));
317
+ };
318
+ const newWords = normalize(newLearning);
319
+ if (newWords.length === 0)
320
+ return false;
321
+ const newSet = new Set(newWords);
322
+ const bullets = existingContent.split("\n").filter(l => l.startsWith("- "));
323
+ for (const bullet of bullets) {
324
+ if (isInactiveFindingLine(bullet))
325
+ continue;
326
+ const existingWords = normalize(bullet);
327
+ if (existingWords.length === 0)
328
+ continue;
329
+ const existingSet = new Set(existingWords);
330
+ // Fast path: exact word overlap check
331
+ let overlap = 0;
332
+ for (const w of newSet) {
333
+ if (existingSet.has(w))
334
+ overlap++;
335
+ }
336
+ const smaller = Math.min(newSet.size, existingSet.size);
337
+ if (smaller > 0 && overlap / smaller > threshold) {
338
+ debugLog(`duplicate-detection: skipping learning, ${Math.round((overlap / smaller) * 100)}% overlap with existing: "${bullet.slice(0, 80)}"`);
339
+ return true;
340
+ }
341
+ // Second pass: Jaccard similarity (strip metadata before comparing)
342
+ const newTokens = jaccardTokenize(stripMetadata(newLearning));
343
+ const existingTokens = jaccardTokenize(stripMetadata(bullet));
344
+ if (newTokens.size < 3 || existingTokens.size < 3)
345
+ continue; // too few tokens for reliable Jaccard
346
+ const jaccard = jaccardSimilarity(newTokens, existingTokens);
347
+ if (jaccard > 0.55) {
348
+ debugLog(`duplicate-detection: Jaccard ${Math.round(jaccard * 100)}% with existing: "${bullet.slice(0, 80)}"`);
349
+ return true;
350
+ }
351
+ }
352
+ return false;
353
+ }
354
+ // ── Typed observation tags ────────────────────────────────────────────────────
355
+ /**
356
+ * Normalize known observation tags in learning text to lowercase.
357
+ * Returns the normalized text and a warning if unknown bracket tags are found.
358
+ */
359
+ export function normalizeObservationTags(text) {
360
+ // Normalize known tags to lowercase
361
+ let normalized = text.replace(/\[([a-zA-Z_-]+)\]/g, (_match, tag) => {
362
+ const lower = tag.toLowerCase();
363
+ if (KNOWN_OBSERVATION_TAGS.has(lower))
364
+ return `[${lower}]`;
365
+ return _match; // keep unknown tags as-is
366
+ });
367
+ // Detect unknown bracket tags for warning
368
+ const unknownTags = [];
369
+ const tagPattern = /\[([a-zA-Z_-]+)\]/g;
370
+ let m;
371
+ while ((m = tagPattern.exec(normalized)) !== null) {
372
+ const lower = m[1].toLowerCase();
373
+ if (!KNOWN_OBSERVATION_TAGS.has(lower)) {
374
+ unknownTags.push(m[0]);
375
+ }
376
+ }
377
+ const warning = unknownTags.length > 0
378
+ ? `Unknown tag(s) ${unknownTags.join(", ")} — known tags: ${[...KNOWN_OBSERVATION_TAGS].map(t => `[${t}]`).join(", ")}`
379
+ : undefined;
380
+ return { text: normalized, warning };
381
+ }
382
+ /**
383
+ * Scan text for secrets and PII patterns. Returns the type of secret found, or null if clean.
384
+ */
385
+ export function scanForSecrets(text) {
386
+ // AWS Access Key
387
+ if (/AKIA[0-9A-Z]{16}/.test(text))
388
+ return 'AWS access key';
389
+ // AWS Secret Access Key (variable assignment pattern)
390
+ if (/(?:aws[_-]?secret|AWS_SECRET)[_-]?(?:access[_-]?)?key[_-]?(?:id)?['":\s]+[A-Za-z0-9/+=]{40}/i.test(text))
391
+ return 'AWS secret access key';
392
+ // JWT token
393
+ if (/eyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+/.test(text))
394
+ return 'JWT token';
395
+ // Long base64-encoded secret-like blob (requires base64 chars including +/= and must not be
396
+ // a plain hex digest like a git commit SHA — 40-char lowercase hex is explicitly exempt).
397
+ if (!/^[0-9a-f]{40}$/.test(text) && /(?=[A-Za-z0-9+/]*[+/][A-Za-z0-9+/]*)[A-Za-z0-9+/]{40,}={0,2}/.test(text.replace(/[0-9a-f]{40}/g, "")))
398
+ return 'long base64 secret';
399
+ // Connection string with credentials
400
+ if (/(mongodb|postgres|mysql|redis):\/\/[^@\s]+:[^@\s]+@/i.test(text))
401
+ return 'connection string with credentials';
402
+ // SSH private key
403
+ if (/-----BEGIN (RSA|EC|OPENSSH) PRIVATE KEY-----/.test(text))
404
+ return 'SSH private key';
405
+ // Anthropic API key
406
+ if (/sk-ant-api\d{2}-[A-Za-z0-9_\-]{10,}/.test(text))
407
+ return 'Anthropic API key';
408
+ // OpenAI API key
409
+ if (/sk-proj-[A-Za-z0-9_\-]{30,}/.test(text))
410
+ return 'OpenAI API key';
411
+ // GitHub PAT classic
412
+ if (/ghp_[A-Za-z0-9]{36}/.test(text))
413
+ return 'GitHub personal access token';
414
+ // GitHub OAuth token
415
+ if (/gho_[A-Za-z0-9]{36}/.test(text))
416
+ return 'GitHub OAuth token';
417
+ // GitHub tokens (classic, OAuth, user, org, server)
418
+ if (/gh[pousr]_[A-Za-z0-9]{36}/.test(text))
419
+ return 'GitHub token';
420
+ // Slack bot token
421
+ if (/xoxb-[0-9]+-[A-Za-z0-9-]+/.test(text))
422
+ return 'Slack bot token';
423
+ // Slack user token
424
+ if (/xoxp-[0-9]+-[A-Za-z0-9-]+/.test(text))
425
+ return 'Slack user token';
426
+ // Stripe secret key
427
+ if (/sk_live_[A-Za-z0-9]{24,}/.test(text))
428
+ return 'Stripe secret key';
429
+ // Stripe publishable key
430
+ if (/pk_live_[A-Za-z0-9]{24,}/.test(text))
431
+ return 'Stripe publishable key';
432
+ // npm access token
433
+ if (/npm_[A-Za-z0-9]{36}/.test(text))
434
+ return 'npm access token';
435
+ // GCP service account
436
+ if (/"private_key_id"\s*:\s*"[^"]{20,}"/.test(text))
437
+ return 'GCP service account key';
438
+ // Generic API key (only when variable name suggests it)
439
+ if (/['"]?(api_?key|secret|token|password)['"]?\s*[=:]\s*['"]?[a-zA-Z0-9_\-\.]{20,}/i.test(text))
440
+ return 'API key or secret';
441
+ return null;
442
+ }
443
+ /**
444
+ * Resolve coreferences in learning text by replacing vague pronouns with concrete names.
445
+ */
446
+ export function resolveCoref(text, context) {
447
+ if (!context.project && !context.file)
448
+ return text;
449
+ let result = text;
450
+ if (context.project) {
451
+ // Sentence-starting "It ", "This ", "That " followed by a verb-like word
452
+ result = result.replace(/^(It|This|That)\s+(?=[a-z])/i, (match) => `[${context.project}] ${match}`);
453
+ // " the project" -> " {project}"
454
+ result = result.replace(/\bthe project\b/gi, context.project);
455
+ }
456
+ if (context.file) {
457
+ const basename = path.basename(context.file);
458
+ result = result.replace(/\b(this file|the file)\b/gi, basename);
459
+ }
460
+ // If text has no concrete nouns AND has vague pronouns, prepend context
461
+ if (context.project && /\b(it|this|that|they|them)\b/i.test(result)) {
462
+ const hasConcreteNoun = /[A-Z][a-z]+[A-Z]|[a-z]+\.[a-z]+|@[a-z]|https?:\/\//.test(result);
463
+ if (!hasConcreteNoun && result === text) {
464
+ result = `[context: ${context.project}] ${result}`;
465
+ }
466
+ }
467
+ return result;
468
+ }
469
+ /**
470
+ * LLM-based semantic dedup check. Only called when PHREN_FEATURE_SEMANTIC_DEDUP=1.
471
+ * Must be called before addFindingToFile() since that function is sync.
472
+ * Returns true if the new learning is a semantic duplicate of any existing bullet.
473
+ */
474
+ export async function checkSemanticDedup(phrenPath, project, newLearning, signal) {
475
+ if (!isFeatureEnabled("PHREN_FEATURE_SEMANTIC_DEDUP", false))
476
+ return false;
477
+ const resolvedDir = safeProjectPath(phrenPath, project);
478
+ if (!resolvedDir)
479
+ return false;
480
+ const findingsPath = path.join(resolvedDir, "FINDINGS.md");
481
+ if (!fs.existsSync(findingsPath))
482
+ return false;
483
+ const existingContent = fs.readFileSync(findingsPath, "utf8");
484
+ const bullets = existingContent.split("\n").filter((l) => l.startsWith("- ") && !isInactiveFindingLine(l));
485
+ for (const bullet of bullets) {
486
+ const a = stripMetadata(newLearning).trim();
487
+ const b = stripMetadata(bullet).trim();
488
+ const tokA = jaccardTokenize(a);
489
+ const tokB = jaccardTokenize(b);
490
+ if (tokA.size < 3 || tokB.size < 3)
491
+ continue;
492
+ const jaccard = jaccardSimilarity(tokA, tokB);
493
+ if (jaccard >= 0.55)
494
+ continue; // already caught by sync isDuplicateFinding
495
+ if (jaccard >= 0.3) {
496
+ const isDup = await semanticDedup(a, b, phrenPath, signal);
497
+ if (isDup)
498
+ return true;
499
+ }
500
+ }
501
+ return false;
502
+ }
503
+ async function semanticDedup(a, b, phrenPath, signal) {
504
+ const key = crypto.createHash("sha256").update(a + "|||" + b).digest("hex");
505
+ const cachePath = runtimeFile(phrenPath, "dedup-cache.json");
506
+ try {
507
+ return await withCache(cachePath, key, DEDUP_CACHE_TTL_MS, async () => {
508
+ const answer = await callLlm(`Are these two findings semantically equivalent? Reply YES or NO only.\nA: ${a}\nB: ${b}`, signal);
509
+ return answer.trim().toUpperCase().startsWith("YES");
510
+ });
511
+ }
512
+ catch (error) {
513
+ if (isAbortError(error))
514
+ return false;
515
+ return false; // fallback: not a duplicate
516
+ }
517
+ }
518
+ const CONFLICT_CHECK_TOTAL_TIMEOUT_MS = 30_000;
519
+ /**
520
+ * LLM-based conflict check. Only called when PHREN_FEATURE_SEMANTIC_CONFLICT=1.
521
+ * Call after detectConflicts() in addFindingToFile flow.
522
+ * Returns conflict annotations to append to the bullet.
523
+ * Also scans global findings and other projects for cross-project contradictions.
524
+ * Has a 30-second total timeout; returns partial results if the deadline is hit.
525
+ */
526
+ export async function checkSemanticConflicts(phrenPath, project, newFinding, signal) {
527
+ if (!isFeatureEnabled("PHREN_FEATURE_SEMANTIC_CONFLICT", false))
528
+ return { annotations: [], checked: false };
529
+ const resolvedDir = safeProjectPath(phrenPath, project);
530
+ if (!resolvedDir)
531
+ return { annotations: [], checked: false };
532
+ const newEntities = extractProseEntities(newFinding);
533
+ if (newEntities.length === 0)
534
+ return { annotations: [], checked: true };
535
+ // Collect bullet sources: { bullets, sourceProject } pairs
536
+ const sources = [];
537
+ // Current project
538
+ const findingsPath = path.join(resolvedDir, "FINDINGS.md");
539
+ if (fs.existsSync(findingsPath)) {
540
+ const content = fs.readFileSync(findingsPath, "utf8");
541
+ sources.push({ bullets: content.split("\n").filter((l) => l.startsWith("- ")), sourceProject: null });
542
+ }
543
+ // Global project findings
544
+ const globalFindingsPath = path.join(phrenPath, "global", "FINDINGS.md");
545
+ if (fs.existsSync(globalFindingsPath)) {
546
+ const content = fs.readFileSync(globalFindingsPath, "utf8");
547
+ const bullets = content.split("\n").filter((l) => l.startsWith("- "));
548
+ if (bullets.length > 0)
549
+ sources.push({ bullets, sourceProject: "global" });
550
+ }
551
+ // Scan other projects by FINDINGS.md recency so we still check the hottest projects first,
552
+ // but do not truncate the search set and miss older contradictions.
553
+ try {
554
+ const entries = fs.readdirSync(phrenPath, { withFileTypes: true });
555
+ const otherProjects = entries
556
+ .filter((e) => e.isDirectory() && e.name !== project && e.name !== "global" && !e.name.startsWith("."))
557
+ .map((e) => {
558
+ const fp = path.join(phrenPath, e.name, "FINDINGS.md");
559
+ if (!fs.existsSync(fp))
560
+ return null;
561
+ try {
562
+ return { name: e.name, mtime: fs.statSync(fp).mtimeMs, fp };
563
+ }
564
+ catch (err) {
565
+ if ((process.env.PHREN_DEBUG || process.env.PHREN_DEBUG))
566
+ process.stderr.write(`[phren] crossProjectScan stat: ${err instanceof Error ? err.message : String(err)}\n`);
567
+ return null;
568
+ }
569
+ })
570
+ .filter((x) => x !== null)
571
+ .sort((a, b) => b.mtime - a.mtime);
572
+ for (const proj of otherProjects) {
573
+ const content = fs.readFileSync(proj.fp, "utf8");
574
+ const bullets = content.split("\n").filter((l) => l.startsWith("- "));
575
+ if (bullets.length > 0)
576
+ sources.push({ bullets, sourceProject: proj.name });
577
+ }
578
+ }
579
+ catch (err) {
580
+ if ((process.env.PHREN_DEBUG || process.env.PHREN_DEBUG))
581
+ process.stderr.write(`[phren] crossProjectScan: ${err instanceof Error ? err.message : String(err)}\n`);
582
+ }
583
+ const annotations = [];
584
+ const deadline = Date.now() + CONFLICT_CHECK_TOTAL_TIMEOUT_MS;
585
+ outer: for (const { bullets, sourceProject } of sources) {
586
+ for (const line of bullets) {
587
+ // Respect the aggregate deadline — return partial results rather than hanging
588
+ if (Date.now() >= deadline) {
589
+ debugLog("checkSemanticConflicts: total timeout reached, returning partial results");
590
+ break outer;
591
+ }
592
+ const lineEntities = extractProseEntities(line);
593
+ const shared = lineEntities.filter((e) => newEntities.includes(e));
594
+ if (shared.length === 0)
595
+ continue;
596
+ const result = await llmConflictCheck(line, newFinding, shared[0], phrenPath, signal);
597
+ if (result === "CONFLICT") {
598
+ const snippet = stripMetadata(line).trim().slice(0, 80);
599
+ const sourceLabel = sourceProject ? ` (from project: ${sourceProject})` : "";
600
+ annotations.push(`<!-- conflicts_with: "${snippet}"${sourceLabel} -->`);
601
+ }
602
+ }
603
+ }
604
+ return { annotations, checked: true };
605
+ }
606
+ async function llmConflictCheck(existing, newFinding, entity, phrenPath, signal) {
607
+ const key = crypto.createHash("sha256").update(existing + "|||" + newFinding).digest("hex");
608
+ const cachePath = runtimeFile(phrenPath, "conflict-cache.json");
609
+ try {
610
+ return await withCache(cachePath, key, CONFLICT_CACHE_TTL_MS, async () => {
611
+ const answer = await callLlm(`Finding A: ${existing}. Finding B: ${newFinding}. Do these contradict each other about how to use ${entity}? Reply CONFLICT or OK only.`, signal);
612
+ return answer.trim().toUpperCase().startsWith("CONFLICT")
613
+ ? "CONFLICT"
614
+ : "OK";
615
+ });
616
+ }
617
+ catch (error) {
618
+ if (isAbortError(error))
619
+ return "OK";
620
+ return "OK";
621
+ }
622
+ }