@phren/cli 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +590 -0
- package/mcp/dist/capabilities/cli.js +61 -0
- package/mcp/dist/capabilities/index.js +15 -0
- package/mcp/dist/capabilities/mcp.js +61 -0
- package/mcp/dist/capabilities/types.js +57 -0
- package/mcp/dist/capabilities/vscode.js +61 -0
- package/mcp/dist/capabilities/web-ui.js +61 -0
- package/mcp/dist/cli-actions.js +302 -0
- package/mcp/dist/cli-config.js +580 -0
- package/mcp/dist/cli-extract.js +305 -0
- package/mcp/dist/cli-govern.js +371 -0
- package/mcp/dist/cli-graph.js +169 -0
- package/mcp/dist/cli-hooks-citations.js +44 -0
- package/mcp/dist/cli-hooks-context.js +56 -0
- package/mcp/dist/cli-hooks-globs.js +83 -0
- package/mcp/dist/cli-hooks-output.js +130 -0
- package/mcp/dist/cli-hooks-retrieval.js +2 -0
- package/mcp/dist/cli-hooks-session.js +1402 -0
- package/mcp/dist/cli-hooks.js +350 -0
- package/mcp/dist/cli-namespaces.js +989 -0
- package/mcp/dist/cli-ops.js +253 -0
- package/mcp/dist/cli-search.js +407 -0
- package/mcp/dist/cli.js +108 -0
- package/mcp/dist/content-archive.js +278 -0
- package/mcp/dist/content-citation.js +391 -0
- package/mcp/dist/content-dedup.js +622 -0
- package/mcp/dist/content-learning.js +472 -0
- package/mcp/dist/content-metadata.js +186 -0
- package/mcp/dist/content-validate.js +462 -0
- package/mcp/dist/core-finding.js +54 -0
- package/mcp/dist/core-project.js +36 -0
- package/mcp/dist/core-search.js +50 -0
- package/mcp/dist/data-access.js +400 -0
- package/mcp/dist/data-tasks.js +821 -0
- package/mcp/dist/embedding.js +344 -0
- package/mcp/dist/entrypoint.js +387 -0
- package/mcp/dist/finding-context.js +172 -0
- package/mcp/dist/finding-impact.js +181 -0
- package/mcp/dist/finding-journal.js +122 -0
- package/mcp/dist/finding-lifecycle.js +259 -0
- package/mcp/dist/governance-audit.js +22 -0
- package/mcp/dist/governance-locks.js +96 -0
- package/mcp/dist/governance-policy.js +648 -0
- package/mcp/dist/governance-scores.js +355 -0
- package/mcp/dist/hooks.js +449 -0
- package/mcp/dist/impact-scoring.js +22 -0
- package/mcp/dist/index-query.js +168 -0
- package/mcp/dist/index.js +205 -0
- package/mcp/dist/init-config.js +336 -0
- package/mcp/dist/init-preferences.js +62 -0
- package/mcp/dist/init-setup.js +1305 -0
- package/mcp/dist/init-shared.js +29 -0
- package/mcp/dist/init.js +1730 -0
- package/mcp/dist/link-checksums.js +62 -0
- package/mcp/dist/link-context.js +257 -0
- package/mcp/dist/link-doctor.js +591 -0
- package/mcp/dist/link-skills.js +212 -0
- package/mcp/dist/link.js +596 -0
- package/mcp/dist/logger.js +15 -0
- package/mcp/dist/machine-identity.js +38 -0
- package/mcp/dist/mcp-config.js +254 -0
- package/mcp/dist/mcp-data.js +315 -0
- package/mcp/dist/mcp-extract-facts.js +78 -0
- package/mcp/dist/mcp-extract.js +133 -0
- package/mcp/dist/mcp-finding.js +557 -0
- package/mcp/dist/mcp-graph.js +339 -0
- package/mcp/dist/mcp-hooks.js +256 -0
- package/mcp/dist/mcp-memory.js +58 -0
- package/mcp/dist/mcp-ops.js +328 -0
- package/mcp/dist/mcp-search.js +628 -0
- package/mcp/dist/mcp-session.js +651 -0
- package/mcp/dist/mcp-skills.js +189 -0
- package/mcp/dist/mcp-tasks.js +551 -0
- package/mcp/dist/mcp-types.js +7 -0
- package/mcp/dist/memory-ui-assets.js +6 -0
- package/mcp/dist/memory-ui-data.js +513 -0
- package/mcp/dist/memory-ui-graph.js +1910 -0
- package/mcp/dist/memory-ui-page.js +353 -0
- package/mcp/dist/memory-ui-scripts.js +1387 -0
- package/mcp/dist/memory-ui-server.js +1218 -0
- package/mcp/dist/memory-ui-styles.js +555 -0
- package/mcp/dist/memory-ui.js +9 -0
- package/mcp/dist/package-metadata.js +13 -0
- package/mcp/dist/phren-art.js +52 -0
- package/mcp/dist/phren-core.js +108 -0
- package/mcp/dist/phren-dotenv.js +67 -0
- package/mcp/dist/phren-paths.js +476 -0
- package/mcp/dist/proactivity.js +172 -0
- package/mcp/dist/profile-store.js +228 -0
- package/mcp/dist/project-config.js +85 -0
- package/mcp/dist/project-locator.js +25 -0
- package/mcp/dist/project-topics.js +1134 -0
- package/mcp/dist/provider-adapters.js +176 -0
- package/mcp/dist/runtime-profile.js +18 -0
- package/mcp/dist/session-checkpoints.js +131 -0
- package/mcp/dist/session-utils.js +68 -0
- package/mcp/dist/shared-content.js +8 -0
- package/mcp/dist/shared-embedding-cache.js +143 -0
- package/mcp/dist/shared-fragment-graph.js +456 -0
- package/mcp/dist/shared-governance.js +4 -0
- package/mcp/dist/shared-index.js +1334 -0
- package/mcp/dist/shared-ollama.js +192 -0
- package/mcp/dist/shared-paths.js +1 -0
- package/mcp/dist/shared-retrieval.js +796 -0
- package/mcp/dist/shared-search-fallback.js +375 -0
- package/mcp/dist/shared-sqljs.js +42 -0
- package/mcp/dist/shared-stemmer.js +171 -0
- package/mcp/dist/shared-vector-index.js +199 -0
- package/mcp/dist/shared.js +114 -0
- package/mcp/dist/shell-entry.js +209 -0
- package/mcp/dist/shell-input.js +943 -0
- package/mcp/dist/shell-palette.js +119 -0
- package/mcp/dist/shell-render.js +252 -0
- package/mcp/dist/shell-state-store.js +81 -0
- package/mcp/dist/shell-types.js +13 -0
- package/mcp/dist/shell-view-list.js +14 -0
- package/mcp/dist/shell-view.js +707 -0
- package/mcp/dist/shell.js +352 -0
- package/mcp/dist/skill-files.js +117 -0
- package/mcp/dist/skill-registry.js +279 -0
- package/mcp/dist/skill-state.js +28 -0
- package/mcp/dist/startup-embedding.js +57 -0
- package/mcp/dist/status.js +323 -0
- package/mcp/dist/synonyms.json +670 -0
- package/mcp/dist/task-hygiene.js +251 -0
- package/mcp/dist/task-lifecycle.js +347 -0
- package/mcp/dist/tasks-github.js +76 -0
- package/mcp/dist/telemetry.js +165 -0
- package/mcp/dist/test-global-setup.js +37 -0
- package/mcp/dist/tool-registry.js +104 -0
- package/mcp/dist/update.js +97 -0
- package/mcp/dist/utils.js +543 -0
- package/package.json +67 -0
- package/skills/README.md +7 -0
- package/skills/consolidate/SKILL.md +152 -0
- package/skills/discover/SKILL.md +175 -0
- package/skills/init/SKILL.md +216 -0
- package/skills/profiles/SKILL.md +121 -0
- package/skills/sync/SKILL.md +261 -0
- package/starter/README.md +74 -0
- package/starter/global/CLAUDE.md +89 -0
- package/starter/global/skills/humanize.md +30 -0
- package/starter/global/skills/pipeline.md +35 -0
- package/starter/global/skills/release.md +35 -0
- package/starter/machines.yaml +8 -0
- package/starter/my-api/.claude/skills/README.md +7 -0
- package/starter/my-api/CLAUDE.md +33 -0
- package/starter/my-api/FINDINGS.md +9 -0
- package/starter/my-api/summary.md +7 -0
- package/starter/my-api/tasks.md +7 -0
- package/starter/my-first-project/.claude/skills/README.md +7 -0
- package/starter/my-first-project/CLAUDE.md +49 -0
- package/starter/my-first-project/FINDINGS.md +24 -0
- package/starter/my-first-project/summary.md +11 -0
- package/starter/my-first-project/tasks.md +25 -0
- package/starter/my-frontend/.claude/skills/README.md +7 -0
- package/starter/my-frontend/CLAUDE.md +33 -0
- package/starter/my-frontend/FINDINGS.md +9 -0
- package/starter/my-frontend/summary.md +7 -0
- package/starter/my-frontend/tasks.md +7 -0
- package/starter/profiles/default.yaml +4 -0
- package/starter/profiles/personal.yaml +4 -0
- package/starter/profiles/work.yaml +4 -0
- package/starter/templates/README.md +7 -0
- package/starter/templates/frontend/CLAUDE.md +23 -0
- package/starter/templates/frontend/FINDINGS.md +7 -0
- package/starter/templates/frontend/reference/README.md +4 -0
- package/starter/templates/frontend/summary.md +7 -0
- package/starter/templates/frontend/tasks.md +11 -0
- package/starter/templates/library/CLAUDE.md +22 -0
- package/starter/templates/library/FINDINGS.md +7 -0
- package/starter/templates/library/reference/README.md +4 -0
- package/starter/templates/library/summary.md +7 -0
- package/starter/templates/library/tasks.md +11 -0
- package/starter/templates/monorepo/CLAUDE.md +21 -0
- package/starter/templates/monorepo/FINDINGS.md +7 -0
- package/starter/templates/monorepo/reference/README.md +4 -0
- package/starter/templates/monorepo/summary.md +7 -0
- package/starter/templates/monorepo/tasks.md +11 -0
- package/starter/templates/python-project/CLAUDE.md +21 -0
- package/starter/templates/python-project/FINDINGS.md +7 -0
- package/starter/templates/python-project/reference/README.md +4 -0
- package/starter/templates/python-project/summary.md +7 -0
- package/starter/templates/python-project/tasks.md +10 -0
|
@@ -0,0 +1,796 @@
|
|
|
1
|
+
// shared-retrieval.ts — shared retrieval core used by hooks and MCP search.
|
|
2
|
+
import { getQualityMultiplier, entryScoreKey, } from "./shared-governance.js";
|
|
3
|
+
import { queryDocRows, queryRows, cosineFallback, extractSnippet, getDocSourceKey, getEntityBoostDocs, decodeFiniteNumber, rowToDocWithRowid, } from "./shared-index.js";
|
|
4
|
+
import { filterTrustedFindingsDetailed, } from "./shared-content.js";
|
|
5
|
+
import { parseCitationComment } from "./content-citation.js";
|
|
6
|
+
import { getHighImpactFindings } from "./finding-impact.js";
|
|
7
|
+
import { buildFtsQueryVariants, buildRelaxedFtsQuery, isFeatureEnabled, STOP_WORDS } from "./utils.js";
|
|
8
|
+
import * as fs from "fs";
|
|
9
|
+
import * as path from "path";
|
|
10
|
+
import { getProjectGlobBoost } from "./cli-hooks-globs.js";
|
|
11
|
+
import { vectorFallback } from "./shared-search-fallback.js";
|
|
12
|
+
import { getOllamaUrl, getCloudEmbeddingUrl } from "./shared-ollama.js";
|
|
13
|
+
import { keywordFallbackSearch } from "./core-search.js";
|
|
14
|
+
import { debugLog } from "./shared.js";
|
|
15
|
+
// ── Scoring constants ─────────────────────────────────────────────────────────
|
|
16
|
+
/** Number of docs sampled for token-overlap semantic fallback search. */
|
|
17
|
+
const SEMANTIC_FALLBACK_SAMPLE_LIMIT = 100;
|
|
18
|
+
const SEMANTIC_FALLBACK_WINDOW_COUNT = 4;
|
|
19
|
+
/** Minimum overlap score for a doc to be included in semantic fallback results. */
|
|
20
|
+
const SEMANTIC_OVERLAP_MIN_SCORE = 0.25;
|
|
21
|
+
const VECTOR_FALLBACK_SKIP_COUNT = 3;
|
|
22
|
+
const VECTOR_FALLBACK_STRONG_MATCH_SCORE = 0.2;
|
|
23
|
+
const LOCAL_QUERY_OVERLAP_WEIGHT = 3.5;
|
|
24
|
+
const CROSS_PROJECT_QUERY_OVERLAP_WEIGHT = 1.35;
|
|
25
|
+
const WEAK_CROSS_PROJECT_OVERLAP_MAX = 0.18;
|
|
26
|
+
const WEAK_CROSS_PROJECT_OVERLAP_PENALTY = 0.75;
|
|
27
|
+
const LOW_FOCUS_SNIPPET_SCORE = 0.3;
|
|
28
|
+
const VERY_LOW_FOCUS_SNIPPET_SCORE = 0.14;
|
|
29
|
+
const LOW_FOCUS_SNIPPET_LINE_CAP = 3;
|
|
30
|
+
const LOW_FOCUS_SNIPPET_CHAR_FRACTION = 0.55;
|
|
31
|
+
const TASK_RESCUE_MIN_OVERLAP = 0.3;
|
|
32
|
+
const TASK_RESCUE_OVERLAP_MARGIN = 0.12;
|
|
33
|
+
const TASK_RESCUE_SCORE_MARGIN = 0.6;
|
|
34
|
+
/** Fraction of bullets that must be low-value before applying the low-value penalty. */
|
|
35
|
+
const LOW_VALUE_BULLET_FRACTION = 0.5;
|
|
36
|
+
// ── Intent and scoring helpers ───────────────────────────────────────────────
|
|
37
|
+
export function detectTaskIntent(prompt) {
|
|
38
|
+
const p = prompt.toLowerCase();
|
|
39
|
+
if (/(bug|error|fix|broken|regression|fail|stack trace)/.test(p))
|
|
40
|
+
return "debug";
|
|
41
|
+
if (/(review|audit|pr|pull request|nit|refactor)/.test(p))
|
|
42
|
+
return "review";
|
|
43
|
+
if (/(build|deploy|release|ci|workflow|pipeline|test)/.test(p))
|
|
44
|
+
return "build";
|
|
45
|
+
if (/\b(doc|docs|readme|explain|guide|instructions?)\b/.test(p))
|
|
46
|
+
return "docs";
|
|
47
|
+
return "general";
|
|
48
|
+
}
|
|
49
|
+
function intentBoost(intent, docType) {
|
|
50
|
+
if (intent === "debug" && (docType === "findings" || docType === "reference"))
|
|
51
|
+
return 3;
|
|
52
|
+
if (intent === "review" && (docType === "canonical" || docType === "changelog"))
|
|
53
|
+
return 3;
|
|
54
|
+
if (intent === "build" && (docType === "task" || docType === "reference"))
|
|
55
|
+
return 2;
|
|
56
|
+
if (intent === "docs" && (docType === "summary" || docType === "claude"))
|
|
57
|
+
return 2;
|
|
58
|
+
if (docType === "canonical")
|
|
59
|
+
return 2;
|
|
60
|
+
return 0;
|
|
61
|
+
}
|
|
62
|
+
export function fileRelevanceBoost(filePath, changedFiles) {
|
|
63
|
+
if (changedFiles.size === 0)
|
|
64
|
+
return 0;
|
|
65
|
+
const normalized = filePath.replace(/\\/g, "/");
|
|
66
|
+
const docBasename = path.basename(normalized);
|
|
67
|
+
for (const cf of changedFiles) {
|
|
68
|
+
const n = cf.replace(/\\/g, "/");
|
|
69
|
+
// Exact basename match to avoid 'index.ts' matching 'shared-index.ts'
|
|
70
|
+
if (path.basename(n) === docBasename)
|
|
71
|
+
return 3;
|
|
72
|
+
// Also match if the full changed-file path is a suffix of the doc path
|
|
73
|
+
if (normalized.endsWith(`/${n}`))
|
|
74
|
+
return 3;
|
|
75
|
+
}
|
|
76
|
+
return 0;
|
|
77
|
+
}
|
|
78
|
+
function branchTokens(branch) {
|
|
79
|
+
return branch
|
|
80
|
+
.split(/[\/._-]/g)
|
|
81
|
+
.map((s) => s.trim().toLowerCase())
|
|
82
|
+
.filter((s) => s.length > 2 && !["main", "master", "feature", "fix", "bugfix", "hotfix"].includes(s));
|
|
83
|
+
}
|
|
84
|
+
export function branchMatchBoost(content, branch) {
|
|
85
|
+
if (!branch)
|
|
86
|
+
return 0;
|
|
87
|
+
const text = content.toLowerCase();
|
|
88
|
+
const tokens = branchTokens(branch);
|
|
89
|
+
let score = 0;
|
|
90
|
+
for (const token of tokens) {
|
|
91
|
+
if (text.includes(token))
|
|
92
|
+
score += 1;
|
|
93
|
+
}
|
|
94
|
+
return Math.min(3, score);
|
|
95
|
+
}
|
|
96
|
+
let _lowValueRegex = null;
|
|
97
|
+
let _lowValuePatternKey = "";
|
|
98
|
+
function getLowValuePattern() {
|
|
99
|
+
const key = (process.env.PHREN_LOW_VALUE_PATTERNS) || "";
|
|
100
|
+
if (_lowValueRegex && _lowValuePatternKey === key)
|
|
101
|
+
return _lowValueRegex;
|
|
102
|
+
const defaults = ["fixed stuff", "updated things", "misc", "temp", "wip", "todo", "placeholder", "cleanup"];
|
|
103
|
+
const configured = key.split(",").map((s) => s.trim()).filter(Boolean);
|
|
104
|
+
const fragments = configured.length ? configured : defaults;
|
|
105
|
+
_lowValueRegex = new RegExp(`(${fragments.map((f) => f.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")).join("|")})`, "i");
|
|
106
|
+
_lowValuePatternKey = key;
|
|
107
|
+
return _lowValueRegex;
|
|
108
|
+
}
|
|
109
|
+
function lowValuePenalty(content, docType) {
|
|
110
|
+
if (docType !== "findings")
|
|
111
|
+
return 0;
|
|
112
|
+
const bullets = content.split("\n").filter((l) => l.startsWith("- "));
|
|
113
|
+
if (bullets.length === 0)
|
|
114
|
+
return 0;
|
|
115
|
+
const pattern = getLowValuePattern();
|
|
116
|
+
const low = bullets.filter((b) => pattern.test(b) || b.length < 16).length;
|
|
117
|
+
return low >= Math.ceil(bullets.length * LOW_VALUE_BULLET_FRACTION) ? 2 : 0;
|
|
118
|
+
}
|
|
119
|
+
// ── Token and snippet helpers ────────────────────────────────────────────────
|
|
120
|
+
function normalizeToken(token) {
|
|
121
|
+
let normalized = token.toLowerCase().replace(/[^a-z0-9_-]/g, "");
|
|
122
|
+
if (normalized.length > 4 && normalized.endsWith("s") && !normalized.endsWith("ss"))
|
|
123
|
+
normalized = normalized.slice(0, -1);
|
|
124
|
+
return normalized;
|
|
125
|
+
}
|
|
126
|
+
function tokenizeForOverlap(text, maxTokens = 24) {
|
|
127
|
+
const tokens = text
|
|
128
|
+
.toLowerCase()
|
|
129
|
+
.replace(/[^a-z0-9_\-\s]/g, " ")
|
|
130
|
+
.split(/\s+/)
|
|
131
|
+
.map(normalizeToken)
|
|
132
|
+
.filter((t) => t.length > 1 && !STOP_WORDS.has(t));
|
|
133
|
+
const uniqueTokens = [...new Set(tokens)];
|
|
134
|
+
if (!Number.isFinite(maxTokens) || maxTokens < 1)
|
|
135
|
+
return uniqueTokens;
|
|
136
|
+
return uniqueTokens.slice(0, maxTokens);
|
|
137
|
+
}
|
|
138
|
+
function overlapScore(queryTokens, content) {
|
|
139
|
+
if (!queryTokens.length)
|
|
140
|
+
return 0;
|
|
141
|
+
const contentTokens = new Set(tokenizeForOverlap(content, Number.POSITIVE_INFINITY));
|
|
142
|
+
if (!contentTokens.size)
|
|
143
|
+
return 0;
|
|
144
|
+
let matched = 0;
|
|
145
|
+
for (const token of queryTokens) {
|
|
146
|
+
if (contentTokens.has(token))
|
|
147
|
+
matched += 1;
|
|
148
|
+
}
|
|
149
|
+
const denominator = Math.max(2, Math.min(queryTokens.length, 10));
|
|
150
|
+
return matched / denominator;
|
|
151
|
+
}
|
|
152
|
+
function docOverlapScore(queryTokens, doc) {
|
|
153
|
+
const corpus = `${doc.project} ${doc.filename} ${doc.type} ${doc.path}\n${doc.content.slice(0, 5000)}`;
|
|
154
|
+
return overlapScore(queryTokens, corpus);
|
|
155
|
+
}
|
|
156
|
+
function semanticFallbackSeed(text) {
|
|
157
|
+
let hash = 2166136261;
|
|
158
|
+
for (let i = 0; i < text.length; i++) {
|
|
159
|
+
hash ^= text.charCodeAt(i);
|
|
160
|
+
hash = Math.imul(hash, 16777619);
|
|
161
|
+
}
|
|
162
|
+
return hash >>> 0;
|
|
163
|
+
}
|
|
164
|
+
function loadSemanticFallbackWindow(db, startRowid, limit, project, wrapBefore) {
|
|
165
|
+
const where = [
|
|
166
|
+
project ? "project = ?" : "",
|
|
167
|
+
wrapBefore === undefined ? "rowid >= ?" : "rowid < ?",
|
|
168
|
+
].filter(Boolean).join(" AND ");
|
|
169
|
+
const params = [
|
|
170
|
+
...(project ? [project] : []),
|
|
171
|
+
wrapBefore ?? startRowid,
|
|
172
|
+
limit,
|
|
173
|
+
];
|
|
174
|
+
const rows = queryRows(db, `SELECT rowid, project, filename, type, content, path FROM docs WHERE ${where} ORDER BY rowid LIMIT ?`, params) || [];
|
|
175
|
+
return rows.map((row) => rowToDocWithRowid(row));
|
|
176
|
+
}
|
|
177
|
+
// k=60 is the standard RRF constant from Cormack et al. (2009); higher values reduce
|
|
178
|
+
// the impact of top-ranked results, lower values amplify them. 60 is the community default.
|
|
179
|
+
const RRF_K = 60;
|
|
180
|
+
/**
|
|
181
|
+
* Item 4: Reciprocal Rank Fusion — merges ranked result lists from multiple search tiers.
|
|
182
|
+
* Documents appearing in multiple tiers get a higher combined score.
|
|
183
|
+
* Formula: score(d) = Σ 1/(k + rank_i) for each tier i containing d, where k=60 (standard).
|
|
184
|
+
*/
|
|
185
|
+
export function rrfMerge(tiers, k = RRF_K) {
|
|
186
|
+
const scores = new Map();
|
|
187
|
+
const docs = new Map();
|
|
188
|
+
for (const tier of tiers) {
|
|
189
|
+
for (let rank = 0; rank < tier.length; rank++) {
|
|
190
|
+
const doc = tier[rank];
|
|
191
|
+
const key = doc.path || `${doc.project}/${doc.filename}`;
|
|
192
|
+
if (!docs.has(key))
|
|
193
|
+
docs.set(key, doc);
|
|
194
|
+
scores.set(key, (scores.get(key) ?? 0) + 1 / (k + rank + 1));
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
return [...scores.entries()]
|
|
198
|
+
.sort((a, b) => b[1] - a[1])
|
|
199
|
+
.map(([key]) => docs.get(key));
|
|
200
|
+
}
|
|
201
|
+
function semanticFallbackDocs(db, prompt, project) {
|
|
202
|
+
const terms = tokenizeForOverlap(prompt);
|
|
203
|
+
if (!terms.length)
|
|
204
|
+
return [];
|
|
205
|
+
const sampleLimit = SEMANTIC_FALLBACK_SAMPLE_LIMIT;
|
|
206
|
+
const statsRows = queryRows(db, project
|
|
207
|
+
? "SELECT MIN(rowid), MAX(rowid), COUNT(*) FROM docs WHERE project = ?"
|
|
208
|
+
: "SELECT MIN(rowid), MAX(rowid), COUNT(*) FROM docs", project ? [project] : []);
|
|
209
|
+
if (!statsRows?.length)
|
|
210
|
+
return [];
|
|
211
|
+
let minRowid = 0;
|
|
212
|
+
let maxRowid = 0;
|
|
213
|
+
let rowCount = 0;
|
|
214
|
+
try {
|
|
215
|
+
minRowid = decodeFiniteNumber(statsRows[0][0], "semanticFallbackDocs.minRowid");
|
|
216
|
+
maxRowid = decodeFiniteNumber(statsRows[0][1], "semanticFallbackDocs.maxRowid");
|
|
217
|
+
rowCount = decodeFiniteNumber(statsRows[0][2], "semanticFallbackDocs.rowCount");
|
|
218
|
+
}
|
|
219
|
+
catch {
|
|
220
|
+
return [];
|
|
221
|
+
}
|
|
222
|
+
if (rowCount <= 0 || maxRowid < minRowid)
|
|
223
|
+
return [];
|
|
224
|
+
const cappedLimit = Math.min(sampleLimit, rowCount);
|
|
225
|
+
const docs = [];
|
|
226
|
+
const seenRowids = new Set();
|
|
227
|
+
const pushRows = (rows) => {
|
|
228
|
+
for (const row of rows) {
|
|
229
|
+
if (seenRowids.has(row.rowid))
|
|
230
|
+
continue;
|
|
231
|
+
seenRowids.add(row.rowid);
|
|
232
|
+
docs.push(row.doc);
|
|
233
|
+
if (docs.length >= cappedLimit)
|
|
234
|
+
break;
|
|
235
|
+
}
|
|
236
|
+
};
|
|
237
|
+
if (rowCount <= cappedLimit) {
|
|
238
|
+
pushRows(loadSemanticFallbackWindow(db, minRowid, cappedLimit, project));
|
|
239
|
+
}
|
|
240
|
+
else {
|
|
241
|
+
const span = Math.max(1, maxRowid - minRowid + 1);
|
|
242
|
+
const windowCount = Math.min(SEMANTIC_FALLBACK_WINDOW_COUNT, cappedLimit);
|
|
243
|
+
const perWindow = Math.max(1, Math.ceil(cappedLimit / windowCount));
|
|
244
|
+
const stride = Math.max(1, Math.floor(span / windowCount));
|
|
245
|
+
const seed = semanticFallbackSeed(`${project ?? "*"}\n${terms.join(" ")}`);
|
|
246
|
+
for (let i = 0; i < windowCount && docs.length < cappedLimit; i++) {
|
|
247
|
+
const offset = (seed + i * stride) % span;
|
|
248
|
+
const startRowid = minRowid + offset;
|
|
249
|
+
pushRows(loadSemanticFallbackWindow(db, startRowid, perWindow, project));
|
|
250
|
+
if (docs.length >= cappedLimit)
|
|
251
|
+
break;
|
|
252
|
+
pushRows(loadSemanticFallbackWindow(db, startRowid, perWindow, project, startRowid));
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
if (docs.length < cappedLimit) {
|
|
256
|
+
pushRows(loadSemanticFallbackWindow(db, minRowid, cappedLimit - docs.length, project));
|
|
257
|
+
}
|
|
258
|
+
const scored = docs
|
|
259
|
+
.map((doc) => {
|
|
260
|
+
const score = docOverlapScore(terms, doc);
|
|
261
|
+
return { doc, score };
|
|
262
|
+
})
|
|
263
|
+
.filter((x) => x.score >= SEMANTIC_OVERLAP_MIN_SCORE)
|
|
264
|
+
.sort((a, b) => b.score - a.score)
|
|
265
|
+
.slice(0, 8)
|
|
266
|
+
.map((x) => x.doc);
|
|
267
|
+
return scored;
|
|
268
|
+
}
|
|
269
|
+
export function shouldRunVectorExpansion(rows, prompt, desiredResults = VECTOR_FALLBACK_SKIP_COUNT) {
|
|
270
|
+
if (!rows || rows.length === 0)
|
|
271
|
+
return true;
|
|
272
|
+
const targetCount = Math.max(2, Math.min(VECTOR_FALLBACK_SKIP_COUNT, desiredResults));
|
|
273
|
+
if (rows.length >= targetCount)
|
|
274
|
+
return false;
|
|
275
|
+
const queryTokens = tokenizeForOverlap(prompt);
|
|
276
|
+
if (queryTokens.length === 0)
|
|
277
|
+
return false;
|
|
278
|
+
const bestOverlap = rows
|
|
279
|
+
.slice(0, 2)
|
|
280
|
+
.reduce((maxScore, doc) => Math.max(maxScore, docOverlapScore(queryTokens, doc)), 0);
|
|
281
|
+
return bestOverlap < VECTOR_FALLBACK_STRONG_MATCH_SCORE;
|
|
282
|
+
}
|
|
283
|
+
function approximateTokens(text) {
|
|
284
|
+
return Math.ceil(text.length / 3.5 + (text.match(/\s+/g) || []).length * 0.1);
|
|
285
|
+
}
|
|
286
|
+
function compactSnippet(snippet, maxLines, maxChars) {
|
|
287
|
+
const lines = snippet
|
|
288
|
+
.split("\n")
|
|
289
|
+
.map((l) => l.trimEnd())
|
|
290
|
+
.filter((l) => l.trim().length > 0)
|
|
291
|
+
.slice(0, Math.max(1, maxLines));
|
|
292
|
+
let out = lines.join("\n");
|
|
293
|
+
if (out.length > maxChars)
|
|
294
|
+
out = out.slice(0, Math.max(24, maxChars - 1)).trimEnd() + "\u2026";
|
|
295
|
+
return out;
|
|
296
|
+
}
|
|
297
|
+
// ── Task priority filtering ───────────────────────────────────────────────
|
|
298
|
+
const PRIORITY_TAG_RE = /\[(high|medium|low)\]/i;
|
|
299
|
+
export function filterTaskByPriority(items, allowedPriorities) {
|
|
300
|
+
const envPriorities = (process.env.PHREN_TASK_PRIORITY);
|
|
301
|
+
const allowed = new Set((allowedPriorities || (envPriorities ? envPriorities.split(",").map(s => s.trim().toLowerCase()) : ["high", "medium"])));
|
|
302
|
+
return items.filter(item => {
|
|
303
|
+
const match = item.match(PRIORITY_TAG_RE);
|
|
304
|
+
if (!match) {
|
|
305
|
+
return allowed.has("high") || allowed.has("medium");
|
|
306
|
+
}
|
|
307
|
+
return allowed.has(match[1].toLowerCase());
|
|
308
|
+
});
|
|
309
|
+
}
|
|
310
|
+
// ── Search ───────────────────────────────────────────────────────────────────
|
|
311
|
+
const SHARED_PROJECTS = ["shared", "org"];
|
|
312
|
+
export function searchDocuments(db, safeQuery, prompt, keywords, detectedProject, searchAllProjects = false, phrenPath) {
|
|
313
|
+
// Tier 1: FTS5 — run project-scoped and global in one pass, dedup
|
|
314
|
+
const ftsDocs = [];
|
|
315
|
+
const ftsSeenKeys = new Set();
|
|
316
|
+
const relaxedQuery = buildRelaxedFtsQuery(keywords || prompt, detectedProject, phrenPath);
|
|
317
|
+
const addFtsRows = (rows) => {
|
|
318
|
+
if (!rows)
|
|
319
|
+
return;
|
|
320
|
+
for (const doc of rows) {
|
|
321
|
+
const key = doc.path || `${doc.project}/${doc.filename}`;
|
|
322
|
+
if (!ftsSeenKeys.has(key)) {
|
|
323
|
+
ftsSeenKeys.add(key);
|
|
324
|
+
ftsDocs.push(doc);
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
};
|
|
328
|
+
const runScopedFtsQuery = (query) => {
|
|
329
|
+
if (!query)
|
|
330
|
+
return;
|
|
331
|
+
if (detectedProject) {
|
|
332
|
+
addFtsRows(queryDocRows(db, "SELECT project, filename, type, content, path FROM docs WHERE docs MATCH ? AND project = ? ORDER BY rank LIMIT 7", [query, detectedProject]));
|
|
333
|
+
}
|
|
334
|
+
if (searchAllProjects || !detectedProject) {
|
|
335
|
+
addFtsRows(queryDocRows(db, "SELECT project, filename, type, content, path FROM docs WHERE docs MATCH ? ORDER BY rank LIMIT 10", [query]));
|
|
336
|
+
return;
|
|
337
|
+
}
|
|
338
|
+
const scopeProjects = [detectedProject, ...SHARED_PROJECTS];
|
|
339
|
+
const placeholders = scopeProjects.map(() => "?").join(", ");
|
|
340
|
+
addFtsRows(queryDocRows(db, `SELECT project, filename, type, content, path FROM docs WHERE docs MATCH ? AND project IN (${placeholders}) ORDER BY rank LIMIT 10`, [query, ...scopeProjects]));
|
|
341
|
+
};
|
|
342
|
+
runScopedFtsQuery(safeQuery);
|
|
343
|
+
if (ftsDocs.length === 0 && relaxedQuery && relaxedQuery !== safeQuery) {
|
|
344
|
+
runScopedFtsQuery(relaxedQuery);
|
|
345
|
+
}
|
|
346
|
+
// Tier 2: Token-overlap semantic — always run, scored independently
|
|
347
|
+
const semanticDocs = semanticFallbackDocs(db, `${prompt}\n${keywords}`, detectedProject);
|
|
348
|
+
// Merge with Reciprocal Rank Fusion so documents found by both tiers rank highest
|
|
349
|
+
const merged = rrfMerge([ftsDocs, semanticDocs]);
|
|
350
|
+
if (merged.length === 0)
|
|
351
|
+
return null;
|
|
352
|
+
return merged.slice(0, 12);
|
|
353
|
+
}
|
|
354
|
+
/**
|
|
355
|
+
* Async variant of searchDocuments that also runs real vector search (Tier 3)
|
|
356
|
+
* when cloud embeddings (PHREN_EMBEDDING_API_URL) or Ollama are available.
|
|
357
|
+
* Falls back to the sync result if vector search is unavailable or fails.
|
|
358
|
+
*/
|
|
359
|
+
export async function searchDocumentsAsync(db, safeQuery, prompt, keywords, detectedProject, searchAllProjects = false, phrenPath) {
|
|
360
|
+
// Sync result (Tier 1 + Tier 2)
|
|
361
|
+
let syncResult = searchDocuments(db, safeQuery, prompt, keywords, detectedProject, searchAllProjects, phrenPath);
|
|
362
|
+
if (!syncResult || syncResult.length === 0) {
|
|
363
|
+
const keywordRows = keywordFallbackSearch(db, prompt, { project: detectedProject ?? undefined, limit: 8 });
|
|
364
|
+
if (keywordRows?.length)
|
|
365
|
+
syncResult = keywordRows;
|
|
366
|
+
}
|
|
367
|
+
// Tier 3: Real vector search — only if embeddings are available and phrenPath provided
|
|
368
|
+
const hasVectorBackend = Boolean(getCloudEmbeddingUrl() || getOllamaUrl());
|
|
369
|
+
if (!phrenPath || !hasVectorBackend || !shouldRunVectorExpansion(syncResult, `${prompt}\n${keywords}`)) {
|
|
370
|
+
return syncResult;
|
|
371
|
+
}
|
|
372
|
+
try {
|
|
373
|
+
const existingPaths = new Set((syncResult ?? []).map((d) => d.path || `${d.project}/${d.filename}`));
|
|
374
|
+
const vectorDocs = await vectorFallback(phrenPath, `${prompt}\n${keywords}`, existingPaths, 8, detectedProject);
|
|
375
|
+
if (vectorDocs.length === 0)
|
|
376
|
+
return syncResult;
|
|
377
|
+
// RRF-merge all three tiers
|
|
378
|
+
const tiers = [syncResult ?? [], vectorDocs];
|
|
379
|
+
const merged = rrfMerge(tiers);
|
|
380
|
+
if (merged.length === 0)
|
|
381
|
+
return syncResult;
|
|
382
|
+
return merged.slice(0, 12);
|
|
383
|
+
}
|
|
384
|
+
catch (err) {
|
|
385
|
+
// Vector search failure is non-fatal — return sync result
|
|
386
|
+
if ((process.env.PHREN_DEBUG || process.env.PHREN_DEBUG))
|
|
387
|
+
process.stderr.write(`[phren] hybridSearch vectorFallback: ${err instanceof Error ? err.message : String(err)}\n`);
|
|
388
|
+
return syncResult;
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
export async function searchKnowledgeRows(db, options) {
|
|
392
|
+
const { query, maxResults, fetchLimit = maxResults, filterProject, filterType, phrenPath, } = options;
|
|
393
|
+
const queryVariants = buildFtsQueryVariants(query, filterProject, phrenPath);
|
|
394
|
+
const safeQuery = queryVariants[0] ?? "";
|
|
395
|
+
if (!safeQuery)
|
|
396
|
+
return { safeQuery, rows: null, usedFallback: false };
|
|
397
|
+
let sql = "SELECT project, filename, type, content, path FROM docs WHERE docs MATCH ?";
|
|
398
|
+
const params = [safeQuery];
|
|
399
|
+
if (filterProject) {
|
|
400
|
+
sql += " AND project = ?";
|
|
401
|
+
params.push(filterProject);
|
|
402
|
+
}
|
|
403
|
+
if (filterType) {
|
|
404
|
+
sql += " AND type = ?";
|
|
405
|
+
params.push(filterType);
|
|
406
|
+
}
|
|
407
|
+
sql += " ORDER BY rank LIMIT ?";
|
|
408
|
+
params.push(fetchLimit);
|
|
409
|
+
let activeFtsQuery = safeQuery;
|
|
410
|
+
let rows = queryDocRows(db, sql, params);
|
|
411
|
+
if ((!rows || rows.length === 0) && queryVariants.length > 1) {
|
|
412
|
+
for (const variant of queryVariants.slice(1)) {
|
|
413
|
+
const relaxedParams = [...params];
|
|
414
|
+
relaxedParams[0] = variant;
|
|
415
|
+
rows = queryDocRows(db, sql, relaxedParams);
|
|
416
|
+
if (rows?.length) {
|
|
417
|
+
activeFtsQuery = variant;
|
|
418
|
+
break;
|
|
419
|
+
}
|
|
420
|
+
}
|
|
421
|
+
}
|
|
422
|
+
let usedFallback = false;
|
|
423
|
+
if (rows && rows.length < 3) {
|
|
424
|
+
const ftsRowids = new Set();
|
|
425
|
+
try {
|
|
426
|
+
let rowidSql = "SELECT rowid, project, filename, type, content, path FROM docs WHERE docs MATCH ?";
|
|
427
|
+
const rowidParams = [activeFtsQuery];
|
|
428
|
+
if (filterProject) {
|
|
429
|
+
rowidSql += " AND project = ?";
|
|
430
|
+
rowidParams.push(filterProject);
|
|
431
|
+
}
|
|
432
|
+
if (filterType) {
|
|
433
|
+
rowidSql += " AND type = ?";
|
|
434
|
+
rowidParams.push(filterType);
|
|
435
|
+
}
|
|
436
|
+
rowidSql += " ORDER BY rank LIMIT ?";
|
|
437
|
+
rowidParams.push(maxResults);
|
|
438
|
+
const rowidResult = db.exec(rowidSql, rowidParams);
|
|
439
|
+
if (rowidResult?.length && rowidResult[0]?.values?.length) {
|
|
440
|
+
for (const row of rowidResult[0].values) {
|
|
441
|
+
ftsRowids.add(rowToDocWithRowid(row).rowid);
|
|
442
|
+
}
|
|
443
|
+
}
|
|
444
|
+
}
|
|
445
|
+
catch (err) {
|
|
446
|
+
debugLog(`rowid dedup query failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
447
|
+
}
|
|
448
|
+
const cosineResults = cosineFallback(db, query, ftsRowids, maxResults - rows.length)
|
|
449
|
+
.filter((doc) => (!filterProject || doc.project === filterProject) && (!filterType || doc.type === filterType));
|
|
450
|
+
if (cosineResults.length > 0) {
|
|
451
|
+
rows = [...rows, ...cosineResults];
|
|
452
|
+
usedFallback = true;
|
|
453
|
+
}
|
|
454
|
+
}
|
|
455
|
+
if (!rows) {
|
|
456
|
+
const cosineResults = cosineFallback(db, query, new Set(), maxResults)
|
|
457
|
+
.filter((doc) => (!filterProject || doc.project === filterProject) && (!filterType || doc.type === filterType));
|
|
458
|
+
if (cosineResults.length > 0) {
|
|
459
|
+
rows = cosineResults;
|
|
460
|
+
usedFallback = true;
|
|
461
|
+
}
|
|
462
|
+
}
|
|
463
|
+
if (!rows) {
|
|
464
|
+
const fallbackRows = keywordFallbackSearch(db, query, {
|
|
465
|
+
project: filterProject ?? undefined,
|
|
466
|
+
type: filterType ?? undefined,
|
|
467
|
+
limit: maxResults,
|
|
468
|
+
});
|
|
469
|
+
if (fallbackRows) {
|
|
470
|
+
rows = fallbackRows;
|
|
471
|
+
usedFallback = true;
|
|
472
|
+
}
|
|
473
|
+
}
|
|
474
|
+
if (shouldRunVectorExpansion(rows, query, maxResults)) {
|
|
475
|
+
try {
|
|
476
|
+
const existingRows = rows ?? [];
|
|
477
|
+
const alreadyFoundPaths = new Set(existingRows.map((row) => row.path));
|
|
478
|
+
const vecRows = await vectorFallback(phrenPath, query, alreadyFoundPaths, Math.max(0, maxResults - existingRows.length), filterProject ?? undefined);
|
|
479
|
+
const filteredVecRows = filterType ? vecRows.filter((row) => row.type === filterType) : vecRows;
|
|
480
|
+
if (filteredVecRows.length > 0) {
|
|
481
|
+
rows = [...existingRows, ...filteredVecRows];
|
|
482
|
+
usedFallback = true;
|
|
483
|
+
}
|
|
484
|
+
}
|
|
485
|
+
catch (err) {
|
|
486
|
+
if ((process.env.PHREN_DEBUG || process.env.PHREN_DEBUG)) {
|
|
487
|
+
process.stderr.write(`[phren] vectorFallback: ${err instanceof Error ? err.message : String(err)}\n`);
|
|
488
|
+
}
|
|
489
|
+
}
|
|
490
|
+
}
|
|
491
|
+
return { safeQuery, rows, usedFallback };
|
|
492
|
+
}
|
|
493
|
+
// ── Trust filter ─────────────────────────────────────────────────────────────
|
|
494
|
+
const TRUST_FILTERED_TYPES = new Set(["findings", "reference", "knowledge"]);
|
|
495
|
+
/** Apply trust filter to rows. Returns filtered rows plus any queue/audit items to be written
|
|
496
|
+
* by the caller — retrieval itself should remain side-effect-free. */
|
|
497
|
+
export function applyTrustFilter(rows, ttlDays, minConfidence, decay, phrenPath) {
|
|
498
|
+
const queueItems = [];
|
|
499
|
+
const auditEntries = [];
|
|
500
|
+
const highImpactFindingIds = phrenPath ? getHighImpactFindings(phrenPath, 3) : undefined;
|
|
501
|
+
const filtered = rows
|
|
502
|
+
.map((doc) => {
|
|
503
|
+
if (!TRUST_FILTERED_TYPES.has(doc.type))
|
|
504
|
+
return doc;
|
|
505
|
+
const trust = filterTrustedFindingsDetailed(doc.content, {
|
|
506
|
+
ttlDays,
|
|
507
|
+
minConfidence,
|
|
508
|
+
decay,
|
|
509
|
+
project: doc.project,
|
|
510
|
+
highImpactFindingIds,
|
|
511
|
+
});
|
|
512
|
+
if (trust.issues.length > 0) {
|
|
513
|
+
const stale = trust.issues.filter((i) => i.reason === "stale").map((i) => i.bullet);
|
|
514
|
+
const conflicts = trust.issues.filter((i) => i.reason === "invalid_citation").map((i) => i.bullet);
|
|
515
|
+
if (stale.length)
|
|
516
|
+
queueItems.push({ project: doc.project, section: "Stale", items: stale });
|
|
517
|
+
if (conflicts.length)
|
|
518
|
+
queueItems.push({ project: doc.project, section: "Conflicts", items: conflicts });
|
|
519
|
+
auditEntries.push(`project=${doc.project} type=${doc.type} stale=${stale.length} invalid_citation=${conflicts.length}`);
|
|
520
|
+
}
|
|
521
|
+
return { ...doc, content: trust.content };
|
|
522
|
+
})
|
|
523
|
+
.filter((doc) => {
|
|
524
|
+
return !TRUST_FILTERED_TYPES.has(doc.type) || Boolean(doc.content.trim());
|
|
525
|
+
});
|
|
526
|
+
return { rows: filtered, queueItems, auditEntries };
|
|
527
|
+
}
|
|
528
|
+
// ── Ranking ──────────────────────────────────────────────────────────────────
|
|
529
|
+
function mostRecentDate(content) {
|
|
530
|
+
const matches = content.match(/^## (\d{4}-\d{2}-\d{2})/gm);
|
|
531
|
+
if (!matches || matches.length === 0)
|
|
532
|
+
return "0000-00-00";
|
|
533
|
+
return matches.map((m) => m.slice(3)).sort().reverse()[0];
|
|
534
|
+
}
|
|
535
|
+
/** Shared helper: compute age in days from a YYYY-MM-DD date string. Returns Infinity for invalid/missing dates. */
|
|
536
|
+
function ageInDaysFromDate(dateStr) {
|
|
537
|
+
if (!/^\d{4}-\d{2}-\d{2}$/.test(dateStr) || dateStr === "0000-00-00")
|
|
538
|
+
return Infinity;
|
|
539
|
+
const todayUtc = Date.UTC(new Date().getUTCFullYear(), new Date().getUTCMonth(), new Date().getUTCDate());
|
|
540
|
+
const entryUtc = Date.parse(`${dateStr}T00:00:00Z`);
|
|
541
|
+
if (Number.isNaN(entryUtc))
|
|
542
|
+
return Infinity;
|
|
543
|
+
return Math.max(0, Math.floor((todayUtc - entryUtc) / 86_400_000));
|
|
544
|
+
}
|
|
545
|
+
/** Item 3: Recency boost for findings. Recent findings rank higher. Accepts pre-computed date string. */
|
|
546
|
+
export function recencyBoost(docType, latestDate) {
|
|
547
|
+
if (docType !== "findings")
|
|
548
|
+
return 0;
|
|
549
|
+
const age = ageInDaysFromDate(latestDate);
|
|
550
|
+
if (age <= 7)
|
|
551
|
+
return 0.3;
|
|
552
|
+
if (age <= 30)
|
|
553
|
+
return 0.15;
|
|
554
|
+
return 0;
|
|
555
|
+
}
|
|
556
|
+
function crossProjectAgeMultiplier(doc, detectedProject, latestDate) {
|
|
557
|
+
if (doc.type !== "findings" || !detectedProject || doc.project === detectedProject)
|
|
558
|
+
return 1;
|
|
559
|
+
const decayDaysRaw = Number.parseInt((process.env.PHREN_CROSS_PROJECT_DECAY_DAYS) ?? "30", 10);
|
|
560
|
+
const decayDays = Number.isFinite(decayDaysRaw) && decayDaysRaw > 0 ? decayDaysRaw : 30;
|
|
561
|
+
const age = ageInDaysFromDate(latestDate);
|
|
562
|
+
const ageInDays = Number.isFinite(age) ? age : 90;
|
|
563
|
+
return Math.max(0.1, 1 - (ageInDays / decayDays));
|
|
564
|
+
}
|
|
565
|
+
export function rankResults(rows, intent, gitCtx, detectedProject, phrenPathLocal, db, cwd, query, opts) {
|
|
566
|
+
let ranked = [...rows];
|
|
567
|
+
const queryTokens = query ? tokenizeForOverlap(query) : [];
|
|
568
|
+
if (detectedProject) {
|
|
569
|
+
const localByType = new Set(ranked.filter((r) => r.project === detectedProject).map((r) => r.type));
|
|
570
|
+
// Keep all local docs, and allow up to 2 shared/org docs per type even if
|
|
571
|
+
// that type exists locally — avoids suppressing cross-project knowledge.
|
|
572
|
+
const sharedCountByType = new Map();
|
|
573
|
+
const MAX_SHARED_PER_TYPE = 2;
|
|
574
|
+
ranked = ranked.filter((r) => {
|
|
575
|
+
if (r.project === detectedProject)
|
|
576
|
+
return true;
|
|
577
|
+
if (!localByType.has(r.type))
|
|
578
|
+
return true;
|
|
579
|
+
const count = sharedCountByType.get(r.type) ?? 0;
|
|
580
|
+
if (count < MAX_SHARED_PER_TYPE) {
|
|
581
|
+
sharedCountByType.set(r.type, count + 1);
|
|
582
|
+
return true;
|
|
583
|
+
}
|
|
584
|
+
return false;
|
|
585
|
+
});
|
|
586
|
+
const canonicalRows = queryDocRows(db, "SELECT project, filename, type, content, path FROM docs WHERE project = ? AND type = 'canonical' LIMIT 1", [detectedProject]);
|
|
587
|
+
if (canonicalRows)
|
|
588
|
+
ranked = [...canonicalRows, ...ranked];
|
|
589
|
+
}
|
|
590
|
+
const entityBoost = query ? getEntityBoostDocs(db, query) : new Set();
|
|
591
|
+
const entityBoostPaths = new Set();
|
|
592
|
+
for (const doc of ranked) {
|
|
593
|
+
// Use getDocSourceKey to build the full project/relFile key, matching what
|
|
594
|
+
// entity_links stores (e.g. project/reference/arch.md, not project/arch.md).
|
|
595
|
+
const docKey = getDocSourceKey(doc, phrenPathLocal);
|
|
596
|
+
if (entityBoost.has(docKey))
|
|
597
|
+
entityBoostPaths.add(doc.path);
|
|
598
|
+
}
|
|
599
|
+
// Pre-compute mostRecentDate once per findings doc to avoid O(n log n) regex rescans in sort.
|
|
600
|
+
const recentDateCache = new Map();
|
|
601
|
+
for (const doc of ranked) {
|
|
602
|
+
if (doc.type === "findings") {
|
|
603
|
+
const key = doc.path || `${doc.project}/${doc.filename}`;
|
|
604
|
+
recentDateCache.set(key, mostRecentDate(doc.content));
|
|
605
|
+
}
|
|
606
|
+
}
|
|
607
|
+
const getRecentDate = (doc) => recentDateCache.get(doc.path || `${doc.project}/${doc.filename}`) ?? "0000-00-00";
|
|
608
|
+
// Precompute per-doc ranking metadata once — avoids recomputing inside sort comparator.
|
|
609
|
+
const changedFiles = gitCtx?.changedFiles || new Set();
|
|
610
|
+
const FILE_MATCH_BOOST = 1.5;
|
|
611
|
+
const scored = ranked.map((doc) => {
|
|
612
|
+
const globBoost = getProjectGlobBoost(phrenPathLocal, doc.project, cwd, gitCtx?.changedFiles);
|
|
613
|
+
const key = entryScoreKey(doc.project, doc.filename, doc.content);
|
|
614
|
+
const entity = entityBoostPaths.has(doc.path) ? 1.3 : 1;
|
|
615
|
+
const date = getRecentDate(doc);
|
|
616
|
+
const fileRel = fileRelevanceBoost(doc.path, changedFiles);
|
|
617
|
+
const branchMat = branchMatchBoost(doc.content, gitCtx?.branch);
|
|
618
|
+
const qualityMult = getQualityMultiplier(phrenPathLocal, key);
|
|
619
|
+
const queryOverlap = queryTokens.length > 0 ? docOverlapScore(queryTokens, doc) : 0;
|
|
620
|
+
const queryOverlapWeight = detectedProject && doc.project === detectedProject
|
|
621
|
+
? LOCAL_QUERY_OVERLAP_WEIGHT
|
|
622
|
+
: CROSS_PROJECT_QUERY_OVERLAP_WEIGHT;
|
|
623
|
+
const weakCrossProjectPenalty = detectedProject
|
|
624
|
+
&& doc.project !== detectedProject
|
|
625
|
+
&& queryTokens.length > 0
|
|
626
|
+
&& queryOverlap < WEAK_CROSS_PROJECT_OVERLAP_MAX
|
|
627
|
+
? WEAK_CROSS_PROJECT_OVERLAP_PENALTY
|
|
628
|
+
: 0;
|
|
629
|
+
const score = Math.round((intentBoost(intent, doc.type) +
|
|
630
|
+
fileRel +
|
|
631
|
+
branchMat +
|
|
632
|
+
globBoost +
|
|
633
|
+
qualityMult +
|
|
634
|
+
entity +
|
|
635
|
+
queryOverlap * queryOverlapWeight +
|
|
636
|
+
recencyBoost(doc.type, date) -
|
|
637
|
+
weakCrossProjectPenalty -
|
|
638
|
+
lowValuePenalty(doc.content, doc.type)) * crossProjectAgeMultiplier(doc, detectedProject, date) * 10000) / 10000;
|
|
639
|
+
const fileMatch = fileRel > 0 || branchMat > 0;
|
|
640
|
+
return { doc, score, fileMatch, globBoost, qualityMult, entity, date, queryOverlap };
|
|
641
|
+
});
|
|
642
|
+
// Single composite sort on cached values.
|
|
643
|
+
scored.sort((a, b) => {
|
|
644
|
+
if (isFeatureEnabled("PHREN_FEATURE_GIT_CONTEXT_FILTER", false)) {
|
|
645
|
+
if (gitCtx && gitCtx.changedFiles.size > 0) {
|
|
646
|
+
const scoreDiff = (b.fileMatch ? FILE_MATCH_BOOST : 1) - (a.fileMatch ? FILE_MATCH_BOOST : 1);
|
|
647
|
+
if (scoreDiff !== 0)
|
|
648
|
+
return scoreDiff;
|
|
649
|
+
}
|
|
650
|
+
}
|
|
651
|
+
const isFindingsA = a.doc.type === "findings";
|
|
652
|
+
const isFindingsB = b.doc.type === "findings";
|
|
653
|
+
if (isFindingsA !== isFindingsB)
|
|
654
|
+
return isFindingsA ? -1 : 1;
|
|
655
|
+
if (isFindingsA && isFindingsB) {
|
|
656
|
+
const byDate = b.date.localeCompare(a.date);
|
|
657
|
+
if (byDate !== 0)
|
|
658
|
+
return byDate;
|
|
659
|
+
}
|
|
660
|
+
const scoreDelta = b.score - a.score;
|
|
661
|
+
if (Math.abs(scoreDelta) > 0.01)
|
|
662
|
+
return scoreDelta;
|
|
663
|
+
const overlapDelta = b.queryOverlap - a.queryOverlap;
|
|
664
|
+
if (Math.abs(overlapDelta) > 0.01)
|
|
665
|
+
return overlapDelta;
|
|
666
|
+
const globDelta = b.globBoost - a.globBoost;
|
|
667
|
+
if (Math.abs(globDelta) > 0.01)
|
|
668
|
+
return globDelta;
|
|
669
|
+
const qualityDelta = b.qualityMult - a.qualityMult;
|
|
670
|
+
if (qualityDelta !== 0)
|
|
671
|
+
return qualityDelta;
|
|
672
|
+
if (b.entity !== a.entity)
|
|
673
|
+
return b.entity - a.entity;
|
|
674
|
+
return (a.doc.path || `${a.doc.project}/${a.doc.filename}`).localeCompare(b.doc.path || `${b.doc.project}/${b.doc.filename}`);
|
|
675
|
+
});
|
|
676
|
+
const shouldFilterTask = intent !== "build" && !opts?.skipTaskFilter && opts?.filterType !== "task";
|
|
677
|
+
const rescuedTaskPaths = new Set();
|
|
678
|
+
if (shouldFilterTask && queryTokens.length > 0) {
|
|
679
|
+
const bestTask = scored.find((entry) => entry.doc.type === "task");
|
|
680
|
+
if (bestTask && bestTask.queryOverlap >= TASK_RESCUE_MIN_OVERLAP) {
|
|
681
|
+
const bestNonTask = scored.find((entry) => entry.doc.type !== "task");
|
|
682
|
+
if (!bestNonTask
|
|
683
|
+
|| bestTask.queryOverlap >= bestNonTask.queryOverlap + TASK_RESCUE_OVERLAP_MARGIN
|
|
684
|
+
|| bestTask.score >= bestNonTask.score + TASK_RESCUE_SCORE_MARGIN) {
|
|
685
|
+
rescuedTaskPaths.add(bestTask.doc.path || `${bestTask.doc.project}/${bestTask.doc.filename}`);
|
|
686
|
+
}
|
|
687
|
+
}
|
|
688
|
+
}
|
|
689
|
+
ranked = scored.map((s) => s.doc);
|
|
690
|
+
ranked = ranked.slice(0, 8);
|
|
691
|
+
if (shouldFilterTask) {
|
|
692
|
+
ranked = ranked.filter((r) => {
|
|
693
|
+
if (r.type !== "task")
|
|
694
|
+
return true;
|
|
695
|
+
const key = r.path || `${r.project}/${r.filename}`;
|
|
696
|
+
return rescuedTaskPaths.has(key);
|
|
697
|
+
});
|
|
698
|
+
}
|
|
699
|
+
return ranked;
|
|
700
|
+
}
|
|
701
|
+
/** Mark snippet lines with stale citations (cited file missing or line content changed). */
|
|
702
|
+
export function markStaleCitations(snippet) {
|
|
703
|
+
const lines = snippet.split("\n");
|
|
704
|
+
const result = [];
|
|
705
|
+
for (let i = 0; i < lines.length; i++) {
|
|
706
|
+
const line = lines[i];
|
|
707
|
+
// Check if the next line is a citation comment
|
|
708
|
+
const nextLine = lines[i + 1];
|
|
709
|
+
if (nextLine) {
|
|
710
|
+
const citation = parseCitationComment(nextLine);
|
|
711
|
+
if (citation && citation.file) {
|
|
712
|
+
const resolvedFile = citation.repo
|
|
713
|
+
? path.resolve(citation.repo, citation.file)
|
|
714
|
+
: (path.isAbsolute(citation.file) ? citation.file : null);
|
|
715
|
+
if (resolvedFile) {
|
|
716
|
+
let stale = false;
|
|
717
|
+
if (!fs.existsSync(resolvedFile)) {
|
|
718
|
+
stale = true;
|
|
719
|
+
}
|
|
720
|
+
else if (citation.line !== undefined && citation.line >= 1) {
|
|
721
|
+
// Verify the cited line still has content (not beyond EOF)
|
|
722
|
+
try {
|
|
723
|
+
const fileLines = fs.readFileSync(resolvedFile, "utf8").split("\n");
|
|
724
|
+
if (citation.line > fileLines.length) {
|
|
725
|
+
stale = true;
|
|
726
|
+
}
|
|
727
|
+
else if (fileLines[citation.line - 1].trim() === "") {
|
|
728
|
+
// Line exists but is now empty — content has drifted
|
|
729
|
+
stale = true;
|
|
730
|
+
}
|
|
731
|
+
}
|
|
732
|
+
catch (err) {
|
|
733
|
+
if ((process.env.PHREN_DEBUG || process.env.PHREN_DEBUG))
|
|
734
|
+
process.stderr.write(`[phren] applyCitationAnnotations fileRead: ${err instanceof Error ? err.message : String(err)}\n`);
|
|
735
|
+
stale = true;
|
|
736
|
+
}
|
|
737
|
+
}
|
|
738
|
+
if (stale) {
|
|
739
|
+
result.push(line + " [stale citation]");
|
|
740
|
+
i++; // skip the citation comment line
|
|
741
|
+
continue;
|
|
742
|
+
}
|
|
743
|
+
}
|
|
744
|
+
}
|
|
745
|
+
}
|
|
746
|
+
result.push(line);
|
|
747
|
+
}
|
|
748
|
+
return result.join("\n");
|
|
749
|
+
}
|
|
750
|
+
export function selectSnippets(rows, keywords, tokenBudget, lineBudget, charBudget) {
|
|
751
|
+
const selected = [];
|
|
752
|
+
let usedTokens = 36;
|
|
753
|
+
const queryTokens = tokenizeForOverlap(keywords);
|
|
754
|
+
for (const doc of rows) {
|
|
755
|
+
let snippet = compactSnippet(extractSnippet(doc.content, keywords, 8), lineBudget, charBudget);
|
|
756
|
+
if (!snippet.trim())
|
|
757
|
+
continue;
|
|
758
|
+
// Mark findings with stale citations before injection
|
|
759
|
+
if (TRUST_FILTERED_TYPES.has(doc.type)) {
|
|
760
|
+
snippet = markStaleCitations(snippet);
|
|
761
|
+
}
|
|
762
|
+
let focusScore = queryTokens.length > 0
|
|
763
|
+
? overlapScore(queryTokens, `${doc.filename}\n${snippet}`)
|
|
764
|
+
: 1;
|
|
765
|
+
if (focusScore < LOW_FOCUS_SNIPPET_SCORE) {
|
|
766
|
+
snippet = compactSnippet(snippet, Math.min(lineBudget, LOW_FOCUS_SNIPPET_LINE_CAP), Math.max(120, Math.floor(charBudget * LOW_FOCUS_SNIPPET_CHAR_FRACTION)));
|
|
767
|
+
focusScore = queryTokens.length > 0
|
|
768
|
+
? overlapScore(queryTokens, `${doc.filename}\n${snippet}`)
|
|
769
|
+
: focusScore;
|
|
770
|
+
}
|
|
771
|
+
let est = approximateTokens(snippet) + 14;
|
|
772
|
+
if (selected.length > 0 && focusScore < VERY_LOW_FOCUS_SNIPPET_SCORE && usedTokens + est > Math.floor(tokenBudget * 0.8)) {
|
|
773
|
+
continue;
|
|
774
|
+
}
|
|
775
|
+
if (selected.length > 0 && usedTokens + est > tokenBudget)
|
|
776
|
+
break;
|
|
777
|
+
if (selected.length === 0 && usedTokens + est > tokenBudget) {
|
|
778
|
+
snippet = compactSnippet(snippet, 3, Math.floor(charBudget * 0.55));
|
|
779
|
+
est = approximateTokens(snippet) + 14;
|
|
780
|
+
}
|
|
781
|
+
const key = entryScoreKey(doc.project, doc.filename, doc.content);
|
|
782
|
+
selected.push({ doc, snippet, key });
|
|
783
|
+
usedTokens += est;
|
|
784
|
+
if (selected.length >= 3)
|
|
785
|
+
break;
|
|
786
|
+
}
|
|
787
|
+
// Final pass: trim from the end if token budget is exceeded (guards against
|
|
788
|
+
// rounding / compaction producing more tokens than estimated during selection)
|
|
789
|
+
while (selected.length > 1 && usedTokens > tokenBudget) {
|
|
790
|
+
const removed = selected.pop();
|
|
791
|
+
usedTokens -= approximateTokens(removed.snippet) + 14;
|
|
792
|
+
}
|
|
793
|
+
return { selected, usedTokens };
|
|
794
|
+
}
|
|
795
|
+
// Re-export approximateTokens for use in output module
|
|
796
|
+
export { approximateTokens };
|