@poolzin/pool-bot 2026.2.10 → 2026.2.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +24 -0
- package/dist/agents/auth-profiles/usage.js +22 -0
- package/dist/agents/auth-profiles.js +1 -1
- package/dist/agents/bash-tools.exec.js +4 -6
- package/dist/agents/glob-pattern.js +42 -0
- package/dist/agents/memory-search.js +33 -0
- package/dist/agents/model-fallback.js +59 -8
- package/dist/agents/pi-tools.before-tool-call.js +145 -4
- package/dist/agents/pi-tools.js +27 -9
- package/dist/agents/pi-tools.policy.js +85 -92
- package/dist/agents/pi-tools.schema.js +54 -27
- package/dist/agents/sandbox/validate-sandbox-security.js +157 -0
- package/dist/agents/sandbox-tool-policy.js +26 -0
- package/dist/agents/sanitize-for-prompt.js +18 -0
- package/dist/agents/session-write-lock.js +203 -39
- package/dist/agents/system-prompt.js +52 -10
- package/dist/agents/tool-loop-detection.js +466 -0
- package/dist/agents/tool-policy.js +6 -0
- package/dist/auto-reply/reply/post-compaction-audit.js +96 -0
- package/dist/auto-reply/reply/post-compaction-context.js +98 -0
- package/dist/build-info.json +3 -3
- package/dist/config/zod-schema.agent-defaults.js +14 -0
- package/dist/config/zod-schema.agent-runtime.js +14 -0
- package/dist/infra/path-safety.js +16 -0
- package/dist/logging/diagnostic-session-state.js +73 -0
- package/dist/logging/diagnostic.js +22 -0
- package/dist/memory/embeddings.js +36 -9
- package/dist/memory/hybrid.js +24 -5
- package/dist/memory/manager.js +76 -28
- package/dist/memory/mmr.js +164 -0
- package/dist/memory/query-expansion.js +331 -0
- package/dist/memory/temporal-decay.js +119 -0
- package/dist/process/kill-tree.js +98 -0
- package/dist/shared/pid-alive.js +12 -0
- package/dist/shared/process-scoped-map.js +10 -0
- package/extensions/bluebubbles/package.json +1 -1
- package/extensions/copilot-proxy/package.json +1 -1
- package/extensions/diagnostics-otel/package.json +1 -1
- package/extensions/discord/package.json +1 -1
- package/extensions/google-antigravity-auth/package.json +1 -1
- package/extensions/google-gemini-cli-auth/package.json +1 -1
- package/extensions/googlechat/package.json +1 -1
- package/extensions/imessage/package.json +1 -1
- package/extensions/line/package.json +1 -1
- package/extensions/llm-task/package.json +1 -1
- package/extensions/lobster/package.json +1 -1
- package/extensions/matrix/CHANGELOG.md +5 -0
- package/extensions/matrix/package.json +1 -1
- package/extensions/mattermost/package.json +1 -1
- package/extensions/memory-core/package.json +1 -1
- package/extensions/memory-lancedb/package.json +1 -1
- package/extensions/msteams/CHANGELOG.md +5 -0
- package/extensions/msteams/package.json +1 -1
- package/extensions/nextcloud-talk/package.json +1 -1
- package/extensions/nostr/CHANGELOG.md +5 -0
- package/extensions/nostr/package.json +1 -1
- package/extensions/open-prose/package.json +1 -1
- package/extensions/signal/package.json +1 -1
- package/extensions/slack/package.json +1 -1
- package/extensions/telegram/package.json +1 -1
- package/extensions/tlon/package.json +1 -1
- package/extensions/twitch/CHANGELOG.md +5 -0
- package/extensions/twitch/package.json +1 -1
- package/extensions/voice-call/CHANGELOG.md +5 -0
- package/extensions/voice-call/package.json +1 -1
- package/extensions/whatsapp/package.json +1 -1
- package/extensions/zalo/CHANGELOG.md +5 -0
- package/extensions/zalo/package.json +1 -1
- package/extensions/zalouser/CHANGELOG.md +5 -0
- package/extensions/zalouser/package.json +1 -1
- package/package.json +1 -1
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Maximal Marginal Relevance (MMR) re-ranking algorithm.
|
|
3
|
+
*
|
|
4
|
+
* MMR balances relevance with diversity by iteratively selecting results
|
|
5
|
+
* that maximize: λ * relevance - (1-λ) * max_similarity_to_selected
|
|
6
|
+
*
|
|
7
|
+
* @see Carbonell & Goldstein, "The Use of MMR, Diversity-Based Reranking" (1998)
|
|
8
|
+
*/
|
|
9
|
+
export const DEFAULT_MMR_CONFIG = {
|
|
10
|
+
enabled: false,
|
|
11
|
+
lambda: 0.7,
|
|
12
|
+
};
|
|
13
|
+
/**
|
|
14
|
+
* Tokenize text for Jaccard similarity computation.
|
|
15
|
+
* Extracts alphanumeric tokens and normalizes to lowercase.
|
|
16
|
+
*/
|
|
17
|
+
export function tokenize(text) {
|
|
18
|
+
const tokens = text.toLowerCase().match(/[a-z0-9_]+/g) ?? [];
|
|
19
|
+
return new Set(tokens);
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Compute Jaccard similarity between two token sets.
|
|
23
|
+
* Returns a value in [0, 1] where 1 means identical sets.
|
|
24
|
+
*/
|
|
25
|
+
export function jaccardSimilarity(setA, setB) {
|
|
26
|
+
if (setA.size === 0 && setB.size === 0) {
|
|
27
|
+
return 1;
|
|
28
|
+
}
|
|
29
|
+
if (setA.size === 0 || setB.size === 0) {
|
|
30
|
+
return 0;
|
|
31
|
+
}
|
|
32
|
+
let intersectionSize = 0;
|
|
33
|
+
const smaller = setA.size <= setB.size ? setA : setB;
|
|
34
|
+
const larger = setA.size <= setB.size ? setB : setA;
|
|
35
|
+
for (const token of smaller) {
|
|
36
|
+
if (larger.has(token)) {
|
|
37
|
+
intersectionSize++;
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
const unionSize = setA.size + setB.size - intersectionSize;
|
|
41
|
+
return unionSize === 0 ? 0 : intersectionSize / unionSize;
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* Compute text similarity between two content strings using Jaccard on tokens.
|
|
45
|
+
*/
|
|
46
|
+
export function textSimilarity(contentA, contentB) {
|
|
47
|
+
return jaccardSimilarity(tokenize(contentA), tokenize(contentB));
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Compute the maximum similarity between an item and all selected items.
|
|
51
|
+
*/
|
|
52
|
+
function maxSimilarityToSelected(item, selectedItems, tokenCache) {
|
|
53
|
+
if (selectedItems.length === 0) {
|
|
54
|
+
return 0;
|
|
55
|
+
}
|
|
56
|
+
let maxSim = 0;
|
|
57
|
+
const itemTokens = tokenCache.get(item.id) ?? tokenize(item.content);
|
|
58
|
+
for (const selected of selectedItems) {
|
|
59
|
+
const selectedTokens = tokenCache.get(selected.id) ?? tokenize(selected.content);
|
|
60
|
+
const sim = jaccardSimilarity(itemTokens, selectedTokens);
|
|
61
|
+
if (sim > maxSim) {
|
|
62
|
+
maxSim = sim;
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
return maxSim;
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Compute MMR score for a candidate item.
|
|
69
|
+
* MMR = λ * relevance - (1-λ) * max_similarity_to_selected
|
|
70
|
+
*/
|
|
71
|
+
export function computeMMRScore(relevance, maxSimilarity, lambda) {
|
|
72
|
+
return lambda * relevance - (1 - lambda) * maxSimilarity;
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Re-rank items using Maximal Marginal Relevance (MMR).
|
|
76
|
+
*
|
|
77
|
+
* The algorithm iteratively selects items that balance relevance with diversity:
|
|
78
|
+
* 1. Start with the highest-scoring item
|
|
79
|
+
* 2. For each remaining slot, select the item that maximizes the MMR score
|
|
80
|
+
* 3. MMR score = λ * relevance - (1-λ) * max_similarity_to_already_selected
|
|
81
|
+
*
|
|
82
|
+
* @param items - Items to re-rank, must have score and content
|
|
83
|
+
* @param config - MMR configuration (lambda, enabled)
|
|
84
|
+
* @returns Re-ranked items in MMR order
|
|
85
|
+
*/
|
|
86
|
+
export function mmrRerank(items, config = {}) {
|
|
87
|
+
const { enabled = DEFAULT_MMR_CONFIG.enabled, lambda = DEFAULT_MMR_CONFIG.lambda } = config;
|
|
88
|
+
// Early exits
|
|
89
|
+
if (!enabled || items.length <= 1) {
|
|
90
|
+
return [...items];
|
|
91
|
+
}
|
|
92
|
+
// Clamp lambda to valid range
|
|
93
|
+
const clampedLambda = Math.max(0, Math.min(1, lambda));
|
|
94
|
+
// If lambda is 1, just return sorted by relevance (no diversity penalty)
|
|
95
|
+
if (clampedLambda === 1) {
|
|
96
|
+
return [...items].toSorted((a, b) => b.score - a.score);
|
|
97
|
+
}
|
|
98
|
+
// Pre-tokenize all items for efficiency
|
|
99
|
+
const tokenCache = new Map();
|
|
100
|
+
for (const item of items) {
|
|
101
|
+
tokenCache.set(item.id, tokenize(item.content));
|
|
102
|
+
}
|
|
103
|
+
// Normalize scores to [0, 1] for fair comparison with similarity
|
|
104
|
+
const maxScore = Math.max(...items.map((i) => i.score));
|
|
105
|
+
const minScore = Math.min(...items.map((i) => i.score));
|
|
106
|
+
const scoreRange = maxScore - minScore;
|
|
107
|
+
const normalizeScore = (score) => {
|
|
108
|
+
if (scoreRange === 0) {
|
|
109
|
+
return 1; // All scores equal
|
|
110
|
+
}
|
|
111
|
+
return (score - minScore) / scoreRange;
|
|
112
|
+
};
|
|
113
|
+
const selected = [];
|
|
114
|
+
const remaining = new Set(items);
|
|
115
|
+
// Select items iteratively
|
|
116
|
+
while (remaining.size > 0) {
|
|
117
|
+
let bestItem = null;
|
|
118
|
+
let bestMMRScore = -Infinity;
|
|
119
|
+
for (const candidate of remaining) {
|
|
120
|
+
const normalizedRelevance = normalizeScore(candidate.score);
|
|
121
|
+
const maxSim = maxSimilarityToSelected(candidate, selected, tokenCache);
|
|
122
|
+
const mmrScore = computeMMRScore(normalizedRelevance, maxSim, clampedLambda);
|
|
123
|
+
// Use original score as tiebreaker (higher is better)
|
|
124
|
+
if (mmrScore > bestMMRScore ||
|
|
125
|
+
(mmrScore === bestMMRScore && candidate.score > (bestItem?.score ?? -Infinity))) {
|
|
126
|
+
bestMMRScore = mmrScore;
|
|
127
|
+
bestItem = candidate;
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
if (bestItem) {
|
|
131
|
+
selected.push(bestItem);
|
|
132
|
+
remaining.delete(bestItem);
|
|
133
|
+
}
|
|
134
|
+
else {
|
|
135
|
+
// Should never happen, but safety exit
|
|
136
|
+
break;
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
return selected;
|
|
140
|
+
}
|
|
141
|
+
/**
|
|
142
|
+
* Apply MMR re-ranking to hybrid search results.
|
|
143
|
+
* Adapts the generic MMR function to work with the hybrid search result format.
|
|
144
|
+
*/
|
|
145
|
+
export function applyMMRToHybridResults(results, config = {}) {
|
|
146
|
+
if (results.length === 0) {
|
|
147
|
+
return results;
|
|
148
|
+
}
|
|
149
|
+
// Create a map from ID to original item for type-safe retrieval
|
|
150
|
+
const itemById = new Map();
|
|
151
|
+
// Create MMR items with unique IDs
|
|
152
|
+
const mmrItems = results.map((r, index) => {
|
|
153
|
+
const id = `${r.path}:${r.startLine}:${index}`;
|
|
154
|
+
itemById.set(id, r);
|
|
155
|
+
return {
|
|
156
|
+
id,
|
|
157
|
+
score: r.score,
|
|
158
|
+
content: r.snippet,
|
|
159
|
+
};
|
|
160
|
+
});
|
|
161
|
+
const reranked = mmrRerank(mmrItems, config);
|
|
162
|
+
// Map back to original items using the ID
|
|
163
|
+
return reranked.map((item) => itemById.get(item.id));
|
|
164
|
+
}
|
|
@@ -0,0 +1,331 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Query expansion for FTS-only search mode.
|
|
3
|
+
*
|
|
4
|
+
* When no embedding provider is available, we fall back to FTS (full-text search).
|
|
5
|
+
* FTS works best with specific keywords, but users often ask conversational queries
|
|
6
|
+
* like "that thing we discussed yesterday" or "之前讨论的那个方案".
|
|
7
|
+
*
|
|
8
|
+
* This module extracts meaningful keywords from such queries to improve FTS results.
|
|
9
|
+
*/
|
|
10
|
+
// Common stop words that don't add search value
|
|
11
|
+
const STOP_WORDS_EN = new Set([
|
|
12
|
+
// Articles and determiners
|
|
13
|
+
"a",
|
|
14
|
+
"an",
|
|
15
|
+
"the",
|
|
16
|
+
"this",
|
|
17
|
+
"that",
|
|
18
|
+
"these",
|
|
19
|
+
"those",
|
|
20
|
+
// Pronouns
|
|
21
|
+
"i",
|
|
22
|
+
"me",
|
|
23
|
+
"my",
|
|
24
|
+
"we",
|
|
25
|
+
"our",
|
|
26
|
+
"you",
|
|
27
|
+
"your",
|
|
28
|
+
"he",
|
|
29
|
+
"she",
|
|
30
|
+
"it",
|
|
31
|
+
"they",
|
|
32
|
+
"them",
|
|
33
|
+
// Common verbs
|
|
34
|
+
"is",
|
|
35
|
+
"are",
|
|
36
|
+
"was",
|
|
37
|
+
"were",
|
|
38
|
+
"be",
|
|
39
|
+
"been",
|
|
40
|
+
"being",
|
|
41
|
+
"have",
|
|
42
|
+
"has",
|
|
43
|
+
"had",
|
|
44
|
+
"do",
|
|
45
|
+
"does",
|
|
46
|
+
"did",
|
|
47
|
+
"will",
|
|
48
|
+
"would",
|
|
49
|
+
"could",
|
|
50
|
+
"should",
|
|
51
|
+
"can",
|
|
52
|
+
"may",
|
|
53
|
+
"might",
|
|
54
|
+
// Prepositions
|
|
55
|
+
"in",
|
|
56
|
+
"on",
|
|
57
|
+
"at",
|
|
58
|
+
"to",
|
|
59
|
+
"for",
|
|
60
|
+
"of",
|
|
61
|
+
"with",
|
|
62
|
+
"by",
|
|
63
|
+
"from",
|
|
64
|
+
"about",
|
|
65
|
+
"into",
|
|
66
|
+
"through",
|
|
67
|
+
"during",
|
|
68
|
+
"before",
|
|
69
|
+
"after",
|
|
70
|
+
"above",
|
|
71
|
+
"below",
|
|
72
|
+
"between",
|
|
73
|
+
"under",
|
|
74
|
+
"over",
|
|
75
|
+
// Conjunctions
|
|
76
|
+
"and",
|
|
77
|
+
"or",
|
|
78
|
+
"but",
|
|
79
|
+
"if",
|
|
80
|
+
"then",
|
|
81
|
+
"because",
|
|
82
|
+
"as",
|
|
83
|
+
"while",
|
|
84
|
+
"when",
|
|
85
|
+
"where",
|
|
86
|
+
"what",
|
|
87
|
+
"which",
|
|
88
|
+
"who",
|
|
89
|
+
"how",
|
|
90
|
+
"why",
|
|
91
|
+
// Time references (vague, not useful for FTS)
|
|
92
|
+
"yesterday",
|
|
93
|
+
"today",
|
|
94
|
+
"tomorrow",
|
|
95
|
+
"earlier",
|
|
96
|
+
"later",
|
|
97
|
+
"recently",
|
|
98
|
+
"before",
|
|
99
|
+
"ago",
|
|
100
|
+
"just",
|
|
101
|
+
"now",
|
|
102
|
+
// Vague references
|
|
103
|
+
"thing",
|
|
104
|
+
"things",
|
|
105
|
+
"stuff",
|
|
106
|
+
"something",
|
|
107
|
+
"anything",
|
|
108
|
+
"everything",
|
|
109
|
+
"nothing",
|
|
110
|
+
// Question words
|
|
111
|
+
"please",
|
|
112
|
+
"help",
|
|
113
|
+
"find",
|
|
114
|
+
"show",
|
|
115
|
+
"get",
|
|
116
|
+
"tell",
|
|
117
|
+
"give",
|
|
118
|
+
]);
|
|
119
|
+
const STOP_WORDS_ZH = new Set([
|
|
120
|
+
// Pronouns
|
|
121
|
+
"我",
|
|
122
|
+
"我们",
|
|
123
|
+
"你",
|
|
124
|
+
"你们",
|
|
125
|
+
"他",
|
|
126
|
+
"她",
|
|
127
|
+
"它",
|
|
128
|
+
"他们",
|
|
129
|
+
"这",
|
|
130
|
+
"那",
|
|
131
|
+
"这个",
|
|
132
|
+
"那个",
|
|
133
|
+
"这些",
|
|
134
|
+
"那些",
|
|
135
|
+
// Auxiliary words
|
|
136
|
+
"的",
|
|
137
|
+
"了",
|
|
138
|
+
"着",
|
|
139
|
+
"过",
|
|
140
|
+
"得",
|
|
141
|
+
"地",
|
|
142
|
+
"吗",
|
|
143
|
+
"呢",
|
|
144
|
+
"吧",
|
|
145
|
+
"啊",
|
|
146
|
+
"呀",
|
|
147
|
+
"嘛",
|
|
148
|
+
"啦",
|
|
149
|
+
// Verbs (common, vague)
|
|
150
|
+
"是",
|
|
151
|
+
"有",
|
|
152
|
+
"在",
|
|
153
|
+
"被",
|
|
154
|
+
"把",
|
|
155
|
+
"给",
|
|
156
|
+
"让",
|
|
157
|
+
"用",
|
|
158
|
+
"到",
|
|
159
|
+
"去",
|
|
160
|
+
"来",
|
|
161
|
+
"做",
|
|
162
|
+
"说",
|
|
163
|
+
"看",
|
|
164
|
+
"找",
|
|
165
|
+
"想",
|
|
166
|
+
"要",
|
|
167
|
+
"能",
|
|
168
|
+
"会",
|
|
169
|
+
"可以",
|
|
170
|
+
// Prepositions and conjunctions
|
|
171
|
+
"和",
|
|
172
|
+
"与",
|
|
173
|
+
"或",
|
|
174
|
+
"但",
|
|
175
|
+
"但是",
|
|
176
|
+
"因为",
|
|
177
|
+
"所以",
|
|
178
|
+
"如果",
|
|
179
|
+
"虽然",
|
|
180
|
+
"而",
|
|
181
|
+
"也",
|
|
182
|
+
"都",
|
|
183
|
+
"就",
|
|
184
|
+
"还",
|
|
185
|
+
"又",
|
|
186
|
+
"再",
|
|
187
|
+
"才",
|
|
188
|
+
"只",
|
|
189
|
+
// Time (vague)
|
|
190
|
+
"之前",
|
|
191
|
+
"以前",
|
|
192
|
+
"之后",
|
|
193
|
+
"以后",
|
|
194
|
+
"刚才",
|
|
195
|
+
"现在",
|
|
196
|
+
"昨天",
|
|
197
|
+
"今天",
|
|
198
|
+
"明天",
|
|
199
|
+
"最近",
|
|
200
|
+
// Vague references
|
|
201
|
+
"东西",
|
|
202
|
+
"事情",
|
|
203
|
+
"事",
|
|
204
|
+
"什么",
|
|
205
|
+
"哪个",
|
|
206
|
+
"哪些",
|
|
207
|
+
"怎么",
|
|
208
|
+
"为什么",
|
|
209
|
+
"多少",
|
|
210
|
+
// Question/request words
|
|
211
|
+
"请",
|
|
212
|
+
"帮",
|
|
213
|
+
"帮忙",
|
|
214
|
+
"告诉",
|
|
215
|
+
]);
|
|
216
|
+
/**
|
|
217
|
+
* Check if a token looks like a meaningful keyword.
|
|
218
|
+
* Returns false for short tokens, numbers-only, etc.
|
|
219
|
+
*/
|
|
220
|
+
function isValidKeyword(token) {
|
|
221
|
+
if (!token || token.length === 0) {
|
|
222
|
+
return false;
|
|
223
|
+
}
|
|
224
|
+
// Skip very short English words (likely stop words or fragments)
|
|
225
|
+
if (/^[a-zA-Z]+$/.test(token) && token.length < 3) {
|
|
226
|
+
return false;
|
|
227
|
+
}
|
|
228
|
+
// Skip pure numbers (not useful for semantic search)
|
|
229
|
+
if (/^\d+$/.test(token)) {
|
|
230
|
+
return false;
|
|
231
|
+
}
|
|
232
|
+
// Skip tokens that are all punctuation
|
|
233
|
+
if (/^[\p{P}\p{S}]+$/u.test(token)) {
|
|
234
|
+
return false;
|
|
235
|
+
}
|
|
236
|
+
return true;
|
|
237
|
+
}
|
|
238
|
+
/**
|
|
239
|
+
* Simple tokenizer that handles both English and Chinese text.
|
|
240
|
+
* For Chinese, we do character-based splitting since we don't have a proper segmenter.
|
|
241
|
+
* For English, we split on whitespace and punctuation.
|
|
242
|
+
*/
|
|
243
|
+
function tokenize(text) {
|
|
244
|
+
const tokens = [];
|
|
245
|
+
const normalized = text.toLowerCase().trim();
|
|
246
|
+
// Split into segments (English words, Chinese character sequences, etc.)
|
|
247
|
+
const segments = normalized.split(/[\s\p{P}]+/u).filter(Boolean);
|
|
248
|
+
for (const segment of segments) {
|
|
249
|
+
// Check if segment contains CJK characters
|
|
250
|
+
if (/[\u4e00-\u9fff]/.test(segment)) {
|
|
251
|
+
// For Chinese, extract character n-grams (unigrams and bigrams)
|
|
252
|
+
const chars = Array.from(segment).filter((c) => /[\u4e00-\u9fff]/.test(c));
|
|
253
|
+
// Add individual characters
|
|
254
|
+
tokens.push(...chars);
|
|
255
|
+
// Add bigrams for better phrase matching
|
|
256
|
+
for (let i = 0; i < chars.length - 1; i++) {
|
|
257
|
+
tokens.push(chars[i] + chars[i + 1]);
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
else {
|
|
261
|
+
// For non-CJK, keep as single token
|
|
262
|
+
tokens.push(segment);
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
return tokens;
|
|
266
|
+
}
|
|
267
|
+
/**
|
|
268
|
+
* Extract keywords from a conversational query for FTS search.
|
|
269
|
+
*
|
|
270
|
+
* Examples:
|
|
271
|
+
* - "that thing we discussed about the API" → ["discussed", "API"]
|
|
272
|
+
* - "之前讨论的那个方案" → ["讨论", "方案"]
|
|
273
|
+
* - "what was the solution for the bug" → ["solution", "bug"]
|
|
274
|
+
*/
|
|
275
|
+
export function extractKeywords(query) {
|
|
276
|
+
const tokens = tokenize(query);
|
|
277
|
+
const keywords = [];
|
|
278
|
+
const seen = new Set();
|
|
279
|
+
for (const token of tokens) {
|
|
280
|
+
// Skip stop words
|
|
281
|
+
if (STOP_WORDS_EN.has(token) || STOP_WORDS_ZH.has(token)) {
|
|
282
|
+
continue;
|
|
283
|
+
}
|
|
284
|
+
// Skip invalid keywords
|
|
285
|
+
if (!isValidKeyword(token)) {
|
|
286
|
+
continue;
|
|
287
|
+
}
|
|
288
|
+
// Skip duplicates
|
|
289
|
+
if (seen.has(token)) {
|
|
290
|
+
continue;
|
|
291
|
+
}
|
|
292
|
+
seen.add(token);
|
|
293
|
+
keywords.push(token);
|
|
294
|
+
}
|
|
295
|
+
return keywords;
|
|
296
|
+
}
|
|
297
|
+
/**
|
|
298
|
+
* Expand a query for FTS search.
|
|
299
|
+
* Returns both the original query and extracted keywords for OR-matching.
|
|
300
|
+
*
|
|
301
|
+
* @param query - User's original query
|
|
302
|
+
* @returns Object with original query and extracted keywords
|
|
303
|
+
*/
|
|
304
|
+
export function expandQueryForFts(query) {
|
|
305
|
+
const original = query.trim();
|
|
306
|
+
const keywords = extractKeywords(original);
|
|
307
|
+
// Build expanded query: original terms OR extracted keywords
|
|
308
|
+
// This ensures both exact matches and keyword matches are found
|
|
309
|
+
const expanded = keywords.length > 0 ? `${original} OR ${keywords.join(" OR ")}` : original;
|
|
310
|
+
return { original, keywords, expanded };
|
|
311
|
+
}
|
|
312
|
+
/**
|
|
313
|
+
* Expand query with optional LLM assistance.
|
|
314
|
+
* Falls back to local extraction if LLM is unavailable or fails.
|
|
315
|
+
*/
|
|
316
|
+
export async function expandQueryWithLlm(query, llmExpander) {
|
|
317
|
+
// If LLM expander is provided, try it first
|
|
318
|
+
if (llmExpander) {
|
|
319
|
+
try {
|
|
320
|
+
const llmKeywords = await llmExpander(query);
|
|
321
|
+
if (llmKeywords.length > 0) {
|
|
322
|
+
return llmKeywords;
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
catch {
|
|
326
|
+
// LLM failed, fall back to local extraction
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
// Fall back to local keyword extraction
|
|
330
|
+
return extractKeywords(query);
|
|
331
|
+
}
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
import fs from "node:fs/promises";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
export const DEFAULT_TEMPORAL_DECAY_CONFIG = {
|
|
4
|
+
enabled: false,
|
|
5
|
+
halfLifeDays: 30,
|
|
6
|
+
};
|
|
7
|
+
const DAY_MS = 24 * 60 * 60 * 1000;
|
|
8
|
+
const DATED_MEMORY_PATH_RE = /(?:^|\/)memory\/(\d{4})-(\d{2})-(\d{2})\.md$/;
|
|
9
|
+
export function toDecayLambda(halfLifeDays) {
|
|
10
|
+
if (!Number.isFinite(halfLifeDays) || halfLifeDays <= 0) {
|
|
11
|
+
return 0;
|
|
12
|
+
}
|
|
13
|
+
return Math.LN2 / halfLifeDays;
|
|
14
|
+
}
|
|
15
|
+
export function calculateTemporalDecayMultiplier(params) {
|
|
16
|
+
const lambda = toDecayLambda(params.halfLifeDays);
|
|
17
|
+
const clampedAge = Math.max(0, params.ageInDays);
|
|
18
|
+
if (lambda <= 0 || !Number.isFinite(clampedAge)) {
|
|
19
|
+
return 1;
|
|
20
|
+
}
|
|
21
|
+
return Math.exp(-lambda * clampedAge);
|
|
22
|
+
}
|
|
23
|
+
export function applyTemporalDecayToScore(params) {
|
|
24
|
+
return params.score * calculateTemporalDecayMultiplier(params);
|
|
25
|
+
}
|
|
26
|
+
function parseMemoryDateFromPath(filePath) {
|
|
27
|
+
const normalized = filePath.replaceAll("\\", "/").replace(/^\.\//, "");
|
|
28
|
+
const match = DATED_MEMORY_PATH_RE.exec(normalized);
|
|
29
|
+
if (!match) {
|
|
30
|
+
return null;
|
|
31
|
+
}
|
|
32
|
+
const year = Number(match[1]);
|
|
33
|
+
const month = Number(match[2]);
|
|
34
|
+
const day = Number(match[3]);
|
|
35
|
+
if (!Number.isInteger(year) || !Number.isInteger(month) || !Number.isInteger(day)) {
|
|
36
|
+
return null;
|
|
37
|
+
}
|
|
38
|
+
const timestamp = Date.UTC(year, month - 1, day);
|
|
39
|
+
const parsed = new Date(timestamp);
|
|
40
|
+
if (parsed.getUTCFullYear() !== year ||
|
|
41
|
+
parsed.getUTCMonth() !== month - 1 ||
|
|
42
|
+
parsed.getUTCDate() !== day) {
|
|
43
|
+
return null;
|
|
44
|
+
}
|
|
45
|
+
return parsed;
|
|
46
|
+
}
|
|
47
|
+
function isEvergreenMemoryPath(filePath) {
|
|
48
|
+
const normalized = filePath.replaceAll("\\", "/").replace(/^\.\//, "");
|
|
49
|
+
if (normalized === "MEMORY.md" || normalized === "memory.md") {
|
|
50
|
+
return true;
|
|
51
|
+
}
|
|
52
|
+
if (!normalized.startsWith("memory/")) {
|
|
53
|
+
return false;
|
|
54
|
+
}
|
|
55
|
+
return !DATED_MEMORY_PATH_RE.test(normalized);
|
|
56
|
+
}
|
|
57
|
+
async function extractTimestamp(params) {
|
|
58
|
+
const fromPath = parseMemoryDateFromPath(params.filePath);
|
|
59
|
+
if (fromPath) {
|
|
60
|
+
return fromPath;
|
|
61
|
+
}
|
|
62
|
+
// Memory root/topic files are evergreen knowledge and should not decay.
|
|
63
|
+
if (params.source === "memory" && isEvergreenMemoryPath(params.filePath)) {
|
|
64
|
+
return null;
|
|
65
|
+
}
|
|
66
|
+
if (!params.workspaceDir) {
|
|
67
|
+
return null;
|
|
68
|
+
}
|
|
69
|
+
const absolutePath = path.isAbsolute(params.filePath)
|
|
70
|
+
? params.filePath
|
|
71
|
+
: path.resolve(params.workspaceDir, params.filePath);
|
|
72
|
+
try {
|
|
73
|
+
const stat = await fs.stat(absolutePath);
|
|
74
|
+
if (!Number.isFinite(stat.mtimeMs)) {
|
|
75
|
+
return null;
|
|
76
|
+
}
|
|
77
|
+
return new Date(stat.mtimeMs);
|
|
78
|
+
}
|
|
79
|
+
catch {
|
|
80
|
+
return null;
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
function ageInDaysFromTimestamp(timestamp, nowMs) {
|
|
84
|
+
const ageMs = Math.max(0, nowMs - timestamp.getTime());
|
|
85
|
+
return ageMs / DAY_MS;
|
|
86
|
+
}
|
|
87
|
+
export async function applyTemporalDecayToHybridResults(params) {
|
|
88
|
+
const config = { ...DEFAULT_TEMPORAL_DECAY_CONFIG, ...params.temporalDecay };
|
|
89
|
+
if (!config.enabled) {
|
|
90
|
+
return [...params.results];
|
|
91
|
+
}
|
|
92
|
+
const nowMs = params.nowMs ?? Date.now();
|
|
93
|
+
const timestampPromiseCache = new Map();
|
|
94
|
+
return Promise.all(params.results.map(async (entry) => {
|
|
95
|
+
const cacheKey = `${entry.source}:${entry.path}`;
|
|
96
|
+
let timestampPromise = timestampPromiseCache.get(cacheKey);
|
|
97
|
+
if (!timestampPromise) {
|
|
98
|
+
timestampPromise = extractTimestamp({
|
|
99
|
+
filePath: entry.path,
|
|
100
|
+
source: entry.source,
|
|
101
|
+
workspaceDir: params.workspaceDir,
|
|
102
|
+
});
|
|
103
|
+
timestampPromiseCache.set(cacheKey, timestampPromise);
|
|
104
|
+
}
|
|
105
|
+
const timestamp = await timestampPromise;
|
|
106
|
+
if (!timestamp) {
|
|
107
|
+
return entry;
|
|
108
|
+
}
|
|
109
|
+
const decayedScore = applyTemporalDecayToScore({
|
|
110
|
+
score: entry.score,
|
|
111
|
+
ageInDays: ageInDaysFromTimestamp(timestamp, nowMs),
|
|
112
|
+
halfLifeDays: config.halfLifeDays,
|
|
113
|
+
});
|
|
114
|
+
return {
|
|
115
|
+
...entry,
|
|
116
|
+
score: decayedScore,
|
|
117
|
+
};
|
|
118
|
+
}));
|
|
119
|
+
}
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
import { spawn } from "node:child_process";
|
|
2
|
+
const DEFAULT_GRACE_MS = 3000;
|
|
3
|
+
const MAX_GRACE_MS = 60_000;
|
|
4
|
+
/**
|
|
5
|
+
* Best-effort process-tree termination with graceful shutdown.
|
|
6
|
+
* - Windows: use taskkill /T to include descendants. Sends SIGTERM-equivalent
|
|
7
|
+
* first (without /F), then force-kills if process survives.
|
|
8
|
+
* - Unix: send SIGTERM to process group first, wait grace period, then SIGKILL.
|
|
9
|
+
*
|
|
10
|
+
* This gives child processes a chance to clean up (close connections, remove
|
|
11
|
+
* temp files, terminate their own children) before being hard-killed.
|
|
12
|
+
*/
|
|
13
|
+
export function killProcessTree(pid, opts) {
|
|
14
|
+
if (!Number.isFinite(pid) || pid <= 0) {
|
|
15
|
+
return;
|
|
16
|
+
}
|
|
17
|
+
const graceMs = normalizeGraceMs(opts?.graceMs);
|
|
18
|
+
if (process.platform === "win32") {
|
|
19
|
+
killProcessTreeWindows(pid, graceMs);
|
|
20
|
+
return;
|
|
21
|
+
}
|
|
22
|
+
killProcessTreeUnix(pid, graceMs);
|
|
23
|
+
}
|
|
24
|
+
function normalizeGraceMs(value) {
|
|
25
|
+
if (typeof value !== "number" || !Number.isFinite(value)) {
|
|
26
|
+
return DEFAULT_GRACE_MS;
|
|
27
|
+
}
|
|
28
|
+
return Math.max(0, Math.min(MAX_GRACE_MS, Math.floor(value)));
|
|
29
|
+
}
|
|
30
|
+
function isProcessAlive(pid) {
|
|
31
|
+
try {
|
|
32
|
+
process.kill(pid, 0);
|
|
33
|
+
return true;
|
|
34
|
+
}
|
|
35
|
+
catch {
|
|
36
|
+
return false;
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
function killProcessTreeUnix(pid, graceMs) {
|
|
40
|
+
// Step 1: Try graceful SIGTERM to process group
|
|
41
|
+
try {
|
|
42
|
+
process.kill(-pid, "SIGTERM");
|
|
43
|
+
}
|
|
44
|
+
catch {
|
|
45
|
+
// Process group doesn't exist or we lack permission - try direct
|
|
46
|
+
try {
|
|
47
|
+
process.kill(pid, "SIGTERM");
|
|
48
|
+
}
|
|
49
|
+
catch {
|
|
50
|
+
// Already gone
|
|
51
|
+
return;
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
// Step 2: Wait grace period, then SIGKILL if still alive
|
|
55
|
+
setTimeout(() => {
|
|
56
|
+
if (isProcessAlive(-pid)) {
|
|
57
|
+
try {
|
|
58
|
+
process.kill(-pid, "SIGKILL");
|
|
59
|
+
return;
|
|
60
|
+
}
|
|
61
|
+
catch {
|
|
62
|
+
// Fall through to direct pid kill
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
if (!isProcessAlive(pid)) {
|
|
66
|
+
return;
|
|
67
|
+
}
|
|
68
|
+
try {
|
|
69
|
+
process.kill(pid, "SIGKILL");
|
|
70
|
+
}
|
|
71
|
+
catch {
|
|
72
|
+
// Process exited between liveness check and kill
|
|
73
|
+
}
|
|
74
|
+
}, graceMs).unref(); // Don't block event loop exit
|
|
75
|
+
}
|
|
76
|
+
function runTaskkill(args) {
|
|
77
|
+
try {
|
|
78
|
+
spawn("taskkill", args, {
|
|
79
|
+
stdio: "ignore",
|
|
80
|
+
detached: true,
|
|
81
|
+
});
|
|
82
|
+
}
|
|
83
|
+
catch {
|
|
84
|
+
// Ignore taskkill spawn failures
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
function killProcessTreeWindows(pid, graceMs) {
|
|
88
|
+
// Step 1: Try graceful termination (taskkill without /F)
|
|
89
|
+
runTaskkill(["/T", "/PID", String(pid)]);
|
|
90
|
+
// Step 2: Wait grace period, then force kill only if pid still exists.
|
|
91
|
+
// This avoids unconditional delayed /F kills after graceful shutdown.
|
|
92
|
+
setTimeout(() => {
|
|
93
|
+
if (!isProcessAlive(pid)) {
|
|
94
|
+
return;
|
|
95
|
+
}
|
|
96
|
+
runTaskkill(["/F", "/T", "/PID", String(pid)]);
|
|
97
|
+
}, graceMs).unref(); // Don't block event loop exit
|
|
98
|
+
}
|