@softerist/heuristic-mcp 3.0.15 → 3.0.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/README.md +104 -104
  2. package/config.jsonc +173 -173
  3. package/features/ann-config.js +131 -0
  4. package/features/clear-cache.js +84 -0
  5. package/features/find-similar-code.js +291 -0
  6. package/features/hybrid-search.js +544 -0
  7. package/features/index-codebase.js +3268 -0
  8. package/features/lifecycle.js +1189 -0
  9. package/features/package-version.js +302 -0
  10. package/features/register.js +408 -0
  11. package/features/resources.js +156 -0
  12. package/features/set-workspace.js +265 -0
  13. package/index.js +96 -96
  14. package/lib/cache-ops.js +22 -22
  15. package/lib/cache-utils.js +565 -565
  16. package/lib/cache.js +1870 -1870
  17. package/lib/call-graph.js +396 -396
  18. package/lib/cli.js +1 -1
  19. package/lib/config.js +517 -517
  20. package/lib/constants.js +39 -39
  21. package/lib/embed-query-process.js +7 -7
  22. package/lib/embedding-process.js +7 -7
  23. package/lib/embedding-worker.js +299 -299
  24. package/lib/ignore-patterns.js +316 -316
  25. package/lib/json-worker.js +14 -14
  26. package/lib/json-writer.js +337 -337
  27. package/lib/logging.js +164 -164
  28. package/lib/memory-logger.js +13 -13
  29. package/lib/onnx-backend.js +193 -193
  30. package/lib/project-detector.js +84 -84
  31. package/lib/server-lifecycle.js +165 -165
  32. package/lib/settings-editor.js +754 -754
  33. package/lib/tokenizer.js +256 -256
  34. package/lib/utils.js +428 -428
  35. package/lib/vector-store-binary.js +627 -627
  36. package/lib/vector-store-sqlite.js +95 -95
  37. package/lib/workspace-env.js +28 -28
  38. package/mcp_config.json +9 -9
  39. package/package.json +86 -75
  40. package/scripts/clear-cache.js +20 -0
  41. package/scripts/download-model.js +43 -0
  42. package/scripts/mcp-launcher.js +49 -0
  43. package/scripts/postinstall.js +12 -0
  44. package/search-configs.js +36 -36
  45. package/.prettierrc +0 -7
  46. package/debug-pids.js +0 -30
  47. package/eslint.config.js +0 -36
  48. package/specs/plan.md +0 -23
  49. package/vitest.config.js +0 -39
package/lib/tokenizer.js CHANGED
@@ -1,256 +1,256 @@
1
- /**
2
- * Token estimation and limits for embedding models
3
- *
4
- * Performance:
5
- * - O(1) model lookups with precomputed maps
6
- * - Zero regex / Zero allocations in hot loop
7
- * - Proper LRU cache eviction
8
- * - Optimized Unicode whitespace detection (ordered by probability)
9
- * - Eliminated double toLowerCase() calls
10
- * - Type-safe guard rails on all public APIs
11
- * - Branchless special character counting
12
- */
13
-
14
- const IS_TEST_ENV = process.env.VITEST === 'true' || process.env.NODE_ENV === 'test';
15
-
16
- const MODEL_TOKEN_LIMITS_RAW = {
17
- // NOTE: While jina-embeddings-v2-base-code supports 8192 tokens, ONNX runtime
18
- // allocates O(n²) memory for attention. Using 512 tokens for optimal speed
19
- // with 4 ONNX threads (~1.5GB RAM, fastest inference).
20
- 'jinaai/jina-embeddings-v2-base-code': 512,
21
- default: 512, // Safe default for BERT-like models
22
- };
23
-
24
- export const MODEL_TOKEN_LIMITS = IS_TEST_ENV
25
- ? { ...MODEL_TOKEN_LIMITS_RAW }
26
- : Object.freeze({ ...MODEL_TOKEN_LIMITS_RAW });
27
-
28
- const DEFAULT_LIMIT = MODEL_TOKEN_LIMITS.default ?? 512;
29
-
30
- /**
31
- * Precomputed case-insensitive lookup
32
- */
33
- const MODEL_LIMITS_LC = new Map();
34
- for (const [k, v] of Object.entries(MODEL_TOKEN_LIMITS)) {
35
- MODEL_LIMITS_LC.set(k.toLowerCase(), v);
36
- }
37
-
38
- /**
39
- * Internal helper: get model limit from pre-normalized key
40
- * Avoids double toLowerCase() when called from cache flow
41
- * @param {string} lowerName - Pre-normalized lowercase model name
42
- * @param {*} originalName - Original model name (may not be a string)
43
- * @returns {number} Token limit
44
- */
45
- function getModelTokenLimitFromLower(lowerName, originalName) {
46
- // Fast path: try exact match first (only if original is a string)
47
- if (typeof originalName === 'string') {
48
- const direct = MODEL_TOKEN_LIMITS[originalName];
49
- if (direct !== undefined) return direct;
50
- }
51
-
52
- // Slow path: use pre-normalized key
53
- const exact = MODEL_LIMITS_LC.get(lowerName);
54
- if (exact !== undefined) return exact;
55
-
56
- // Heuristics for common models (use conservative limits for ONNX speed)
57
- // 512 tokens = fastest, 1024 = 4x more compute due to O(n²) attention
58
- if (
59
- lowerName.includes('jina') ||
60
- lowerName.includes('nomic') ||
61
- lowerName.includes('gte-large')
62
- ) {
63
- return 512;
64
- }
65
- if (lowerName.includes('gte-base') || lowerName.includes('gte-small')) {
66
- return 512;
67
- }
68
- if (lowerName.includes('minilm')) {
69
- return 512;
70
- }
71
-
72
- return DEFAULT_LIMIT;
73
- }
74
-
75
- /**
76
- * Get the maximum token limit for a given model
77
- * @param {string} modelName - The model name
78
- * @returns {number} Maximum tokens supported by the model
79
- */
80
- export function getModelTokenLimit(modelName) {
81
- // Guard clause for non-string or empty inputs
82
- if (typeof modelName !== 'string' || modelName.length === 0) return DEFAULT_LIMIT;
83
-
84
- const direct = MODEL_TOKEN_LIMITS[modelName];
85
- if (direct !== undefined) return direct;
86
-
87
- const lower = modelName.toLowerCase();
88
- return getModelTokenLimitFromLower(lower, modelName);
89
- }
90
- /**
91
- * LRU cache for chunking parameters
92
- * @type {Map<string, {maxTokens: number, targetTokens: number, overlapTokens: number}>}
93
- */
94
- import { CHUNKING_PARAMS_CACHE_SIZE as MAX_CACHE_SIZE } from './constants.js';
95
- const chunkingParamsCache = new Map();
96
-
97
- /**
98
- * Get chunking parameters for a model
99
- * @param {string} modelName - The model name
100
- * @returns {{maxTokens: number, targetTokens: number, overlapTokens: number}}
101
- */
102
- export function getChunkingParams(modelName) {
103
- const key = typeof modelName === 'string' && modelName.length ? modelName.toLowerCase() : '';
104
-
105
- // Fast path for invalid inputs: don't consume cache slots
106
- if (key === '') {
107
- const maxTokens = DEFAULT_LIMIT;
108
- const targetTokens = Math.trunc(maxTokens * 0.85);
109
- const overlapTokens = Math.trunc(targetTokens * 0.18);
110
- return { maxTokens, targetTokens, overlapTokens };
111
- }
112
-
113
- // LRU pattern: delete-and-reinsert to mark as most recently used.
114
- // Note: This creates minor GC pressure due to Map key reallocation, but is
115
- // acceptable for MAX_CACHE_SIZE=100. For larger caches (1000+), consider
116
- // a doubly-linked-list LRU implementation for O(1) access without reallocation.
117
- const cached = chunkingParamsCache.get(key);
118
- if (cached) {
119
- chunkingParamsCache.delete(key);
120
- chunkingParamsCache.set(key, cached);
121
- return cached;
122
- }
123
-
124
- // Cache miss: compute new params (avoid double toLowerCase)
125
- const maxTokens = getModelTokenLimitFromLower(key, modelName);
126
- const targetTokens = Math.trunc(maxTokens * 0.85);
127
- const overlapTokens = Math.trunc(targetTokens * 0.18);
128
-
129
- const params = { maxTokens, targetTokens, overlapTokens };
130
-
131
- // LRU eviction: remove oldest entry if at capacity
132
- if (chunkingParamsCache.size >= MAX_CACHE_SIZE) {
133
- const oldestKey = chunkingParamsCache.keys().next().value;
134
- chunkingParamsCache.delete(oldestKey);
135
- }
136
-
137
- chunkingParamsCache.set(key, params);
138
- return params;
139
- }
140
-
141
- /**
142
- * ASCII whitespace lookup table
143
- */
144
- const WS = new Uint8Array(128);
145
- WS[9] = 1; // \t (horizontal tab)
146
- WS[10] = 1; // \n (line feed)
147
- WS[11] = 1; // \v (vertical tab)
148
- WS[12] = 1; // \f (form feed)
149
- WS[13] = 1; // \r (carriage return)
150
- WS[32] = 1; // space
151
-
152
- /**
153
- * ASCII special character lookup table
154
- */
155
- const SPECIAL = new Uint8Array(128);
156
- const SPECIAL_CHARS = '{}()[];:,.<>!=+-*/%&|^~@#$"\'`\\';
157
- for (let i = 0; i < SPECIAL_CHARS.length; i++) {
158
- SPECIAL[SPECIAL_CHARS.charCodeAt(i)] = 1;
159
- }
160
-
161
- /**
162
- * Calculate token count for a word of given length
163
- * This function will be inlined by V8
164
- * @param {number} len - Word length in characters
165
- * @returns {number} Estimated token count
166
- */
167
- function calcWordTokens(len) {
168
- if (len <= 4) return 1;
169
- if (len <= 10) return 2;
170
- return (len + 3) >> 2; // ceil(len / 4)
171
- }
172
-
173
- /**
174
- * Estimate token count for text (conservative estimate for code)
175
- *
176
- * Performance optimizations:
177
- * - No regex (pure integer comparisons)
178
- * - No string allocations (charCodeAt only)
179
- * - Inlined word token calculation
180
- * - Unicode checks ordered by frequency
181
- * - Branchless special character counting
182
- *
183
- * @param {string} text - The text to estimate tokens for
184
- * @param {object} [options]
185
- * @param {boolean} [options.includeSpecialTokens=true] - Whether to include [CLS]/[SEP]
186
- * @returns {number} Estimated token count
187
- */
188
- export function estimateTokens(text, { includeSpecialTokens = true } = {}) {
189
- // Type-safe guard: prevents crashes from non-string inputs
190
- if (typeof text !== 'string' || text.length === 0) return 0;
191
-
192
- const len = text.length;
193
- let tokenCount = includeSpecialTokens ? 2 : 0; // [CLS] + [SEP]
194
- let specialCount = 0;
195
- let wordStart = -1;
196
-
197
- for (let i = 0; i < len; i++) {
198
- const code = text.charCodeAt(i);
199
-
200
- // ASCII fast path (most common for code)
201
- if (code < 128) {
202
- if (WS[code]) {
203
- if (wordStart !== -1) {
204
- tokenCount += calcWordTokens(i - wordStart);
205
- wordStart = -1;
206
- }
207
- } else {
208
- // Branchless: add 0 or 1 based on SPECIAL[code]
209
- specialCount += SPECIAL[code];
210
- if (wordStart === -1) wordStart = i;
211
- }
212
- continue;
213
- }
214
-
215
- // Unicode whitespace: ordered by frequency for real-world text
216
- // Note: Includes legacy 0x180E for tokenization compatibility even though
217
- // modern JS \s doesn't consider it whitespace (ES2016+)
218
- const isUnicodeWS =
219
- code === 0x00a0 || // NBSP (most common)
220
- code === 0x202f || // NARROW NO-BREAK SPACE
221
- (code >= 0x2000 && code <= 0x200a) || // EN QUAD..HAIR SPACE
222
- code === 0x3000 || // IDEOGRAPHIC SPACE (CJK)
223
- code === 0x2028 || // LINE SEPARATOR
224
- code === 0x2029 || // PARAGRAPH SEPARATOR
225
- code === 0x205f || // MEDIUM MATHEMATICAL SPACE
226
- code === 0x1680 || // OGHAM SPACE MARK
227
- code === 0x180e || // MONGOLIAN VOWEL SEPARATOR (legacy)
228
- code === 0x0085 || // NEXT LINE (NEL)
229
- code === 0xfeff; // ZERO WIDTH NO-BREAK SPACE / BOM
230
-
231
- if (isUnicodeWS) {
232
- if (wordStart !== -1) {
233
- tokenCount += calcWordTokens(i - wordStart);
234
- wordStart = -1;
235
- }
236
- } else {
237
- // Non-ASCII, non-whitespace (e.g., CJK, emojis, accented chars)
238
- // Conservative estimate: treat each as 1 token
239
- if (wordStart !== -1) {
240
- tokenCount += calcWordTokens(i - wordStart);
241
- wordStart = -1;
242
- }
243
- tokenCount++;
244
- }
245
- }
246
-
247
- // Flush final word
248
- if (wordStart !== -1) {
249
- tokenCount += calcWordTokens(len - wordStart);
250
- }
251
-
252
- // Add ~50% of special chars as tokens
253
- tokenCount += specialCount >> 1;
254
-
255
- return tokenCount;
256
- }
1
+ /**
2
+ * Token estimation and limits for embedding models
3
+ *
4
+ * Performance:
5
+ * - O(1) model lookups with precomputed maps
6
+ * - Zero regex / Zero allocations in hot loop
7
+ * - Proper LRU cache eviction
8
+ * - Optimized Unicode whitespace detection (ordered by probability)
9
+ * - Eliminated double toLowerCase() calls
10
+ * - Type-safe guard rails on all public APIs
11
+ * - Branchless special character counting
12
+ */
13
+
14
+ const IS_TEST_ENV = process.env.VITEST === 'true' || process.env.NODE_ENV === 'test';
15
+
16
+ const MODEL_TOKEN_LIMITS_RAW = {
17
+ // NOTE: While jina-embeddings-v2-base-code supports 8192 tokens, ONNX runtime
18
+ // allocates O(n²) memory for attention. Using 512 tokens for optimal speed
19
+ // with 4 ONNX threads (~1.5GB RAM, fastest inference).
20
+ 'jinaai/jina-embeddings-v2-base-code': 512,
21
+ default: 512, // Safe default for BERT-like models
22
+ };
23
+
24
+ export const MODEL_TOKEN_LIMITS = IS_TEST_ENV
25
+ ? { ...MODEL_TOKEN_LIMITS_RAW }
26
+ : Object.freeze({ ...MODEL_TOKEN_LIMITS_RAW });
27
+
28
+ const DEFAULT_LIMIT = MODEL_TOKEN_LIMITS.default ?? 512;
29
+
30
+ /**
31
+ * Precomputed case-insensitive lookup
32
+ */
33
+ const MODEL_LIMITS_LC = new Map();
34
+ for (const [k, v] of Object.entries(MODEL_TOKEN_LIMITS)) {
35
+ MODEL_LIMITS_LC.set(k.toLowerCase(), v);
36
+ }
37
+
38
+ /**
39
+ * Internal helper: get model limit from pre-normalized key
40
+ * Avoids double toLowerCase() when called from cache flow
41
+ * @param {string} lowerName - Pre-normalized lowercase model name
42
+ * @param {*} originalName - Original model name (may not be a string)
43
+ * @returns {number} Token limit
44
+ */
45
+ function getModelTokenLimitFromLower(lowerName, originalName) {
46
+ // Fast path: try exact match first (only if original is a string)
47
+ if (typeof originalName === 'string') {
48
+ const direct = MODEL_TOKEN_LIMITS[originalName];
49
+ if (direct !== undefined) return direct;
50
+ }
51
+
52
+ // Slow path: use pre-normalized key
53
+ const exact = MODEL_LIMITS_LC.get(lowerName);
54
+ if (exact !== undefined) return exact;
55
+
56
+ // Heuristics for common models (use conservative limits for ONNX speed)
57
+ // 512 tokens = fastest, 1024 = 4x more compute due to O(n²) attention
58
+ if (
59
+ lowerName.includes('jina') ||
60
+ lowerName.includes('nomic') ||
61
+ lowerName.includes('gte-large')
62
+ ) {
63
+ return 512;
64
+ }
65
+ if (lowerName.includes('gte-base') || lowerName.includes('gte-small')) {
66
+ return 512;
67
+ }
68
+ if (lowerName.includes('minilm')) {
69
+ return 512;
70
+ }
71
+
72
+ return DEFAULT_LIMIT;
73
+ }
74
+
75
+ /**
76
+ * Get the maximum token limit for a given model
77
+ * @param {string} modelName - The model name
78
+ * @returns {number} Maximum tokens supported by the model
79
+ */
80
+ export function getModelTokenLimit(modelName) {
81
+ // Guard clause for non-string or empty inputs
82
+ if (typeof modelName !== 'string' || modelName.length === 0) return DEFAULT_LIMIT;
83
+
84
+ const direct = MODEL_TOKEN_LIMITS[modelName];
85
+ if (direct !== undefined) return direct;
86
+
87
+ const lower = modelName.toLowerCase();
88
+ return getModelTokenLimitFromLower(lower, modelName);
89
+ }
90
+ /**
91
+ * LRU cache for chunking parameters
92
+ * @type {Map<string, {maxTokens: number, targetTokens: number, overlapTokens: number}>}
93
+ */
94
+ import { CHUNKING_PARAMS_CACHE_SIZE as MAX_CACHE_SIZE } from './constants.js';
95
+ const chunkingParamsCache = new Map();
96
+
97
+ /**
98
+ * Get chunking parameters for a model
99
+ * @param {string} modelName - The model name
100
+ * @returns {{maxTokens: number, targetTokens: number, overlapTokens: number}}
101
+ */
102
+ export function getChunkingParams(modelName) {
103
+ const key = typeof modelName === 'string' && modelName.length ? modelName.toLowerCase() : '';
104
+
105
+ // Fast path for invalid inputs: don't consume cache slots
106
+ if (key === '') {
107
+ const maxTokens = DEFAULT_LIMIT;
108
+ const targetTokens = Math.trunc(maxTokens * 0.85);
109
+ const overlapTokens = Math.trunc(targetTokens * 0.18);
110
+ return { maxTokens, targetTokens, overlapTokens };
111
+ }
112
+
113
+ // LRU pattern: delete-and-reinsert to mark as most recently used.
114
+ // Note: This creates minor GC pressure due to Map key reallocation, but is
115
+ // acceptable for MAX_CACHE_SIZE=100. For larger caches (1000+), consider
116
+ // a doubly-linked-list LRU implementation for O(1) access without reallocation.
117
+ const cached = chunkingParamsCache.get(key);
118
+ if (cached) {
119
+ chunkingParamsCache.delete(key);
120
+ chunkingParamsCache.set(key, cached);
121
+ return cached;
122
+ }
123
+
124
+ // Cache miss: compute new params (avoid double toLowerCase)
125
+ const maxTokens = getModelTokenLimitFromLower(key, modelName);
126
+ const targetTokens = Math.trunc(maxTokens * 0.85);
127
+ const overlapTokens = Math.trunc(targetTokens * 0.18);
128
+
129
+ const params = { maxTokens, targetTokens, overlapTokens };
130
+
131
+ // LRU eviction: remove oldest entry if at capacity
132
+ if (chunkingParamsCache.size >= MAX_CACHE_SIZE) {
133
+ const oldestKey = chunkingParamsCache.keys().next().value;
134
+ chunkingParamsCache.delete(oldestKey);
135
+ }
136
+
137
+ chunkingParamsCache.set(key, params);
138
+ return params;
139
+ }
140
+
141
+ /**
142
+ * ASCII whitespace lookup table
143
+ */
144
+ const WS = new Uint8Array(128);
145
+ WS[9] = 1; // \t (horizontal tab)
146
+ WS[10] = 1; // \n (line feed)
147
+ WS[11] = 1; // \v (vertical tab)
148
+ WS[12] = 1; // \f (form feed)
149
+ WS[13] = 1; // \r (carriage return)
150
+ WS[32] = 1; // space
151
+
152
+ /**
153
+ * ASCII special character lookup table
154
+ */
155
+ const SPECIAL = new Uint8Array(128);
156
+ const SPECIAL_CHARS = '{}()[];:,.<>!=+-*/%&|^~@#$"\'`\\';
157
+ for (let i = 0; i < SPECIAL_CHARS.length; i++) {
158
+ SPECIAL[SPECIAL_CHARS.charCodeAt(i)] = 1;
159
+ }
160
+
161
+ /**
162
+ * Calculate token count for a word of given length
163
+ * This function will be inlined by V8
164
+ * @param {number} len - Word length in characters
165
+ * @returns {number} Estimated token count
166
+ */
167
+ function calcWordTokens(len) {
168
+ if (len <= 4) return 1;
169
+ if (len <= 10) return 2;
170
+ return (len + 3) >> 2; // ceil(len / 4)
171
+ }
172
+
173
+ /**
174
+ * Estimate token count for text (conservative estimate for code)
175
+ *
176
+ * Performance optimizations:
177
+ * - No regex (pure integer comparisons)
178
+ * - No string allocations (charCodeAt only)
179
+ * - Inlined word token calculation
180
+ * - Unicode checks ordered by frequency
181
+ * - Branchless special character counting
182
+ *
183
+ * @param {string} text - The text to estimate tokens for
184
+ * @param {object} [options]
185
+ * @param {boolean} [options.includeSpecialTokens=true] - Whether to include [CLS]/[SEP]
186
+ * @returns {number} Estimated token count
187
+ */
188
+ export function estimateTokens(text, { includeSpecialTokens = true } = {}) {
189
+ // Type-safe guard: prevents crashes from non-string inputs
190
+ if (typeof text !== 'string' || text.length === 0) return 0;
191
+
192
+ const len = text.length;
193
+ let tokenCount = includeSpecialTokens ? 2 : 0; // [CLS] + [SEP]
194
+ let specialCount = 0;
195
+ let wordStart = -1;
196
+
197
+ for (let i = 0; i < len; i++) {
198
+ const code = text.charCodeAt(i);
199
+
200
+ // ASCII fast path (most common for code)
201
+ if (code < 128) {
202
+ if (WS[code]) {
203
+ if (wordStart !== -1) {
204
+ tokenCount += calcWordTokens(i - wordStart);
205
+ wordStart = -1;
206
+ }
207
+ } else {
208
+ // Branchless: add 0 or 1 based on SPECIAL[code]
209
+ specialCount += SPECIAL[code];
210
+ if (wordStart === -1) wordStart = i;
211
+ }
212
+ continue;
213
+ }
214
+
215
+ // Unicode whitespace: ordered by frequency for real-world text
216
+ // Note: Includes legacy 0x180E for tokenization compatibility even though
217
+ // modern JS \s doesn't consider it whitespace (ES2016+)
218
+ const isUnicodeWS =
219
+ code === 0x00a0 || // NBSP (most common)
220
+ code === 0x202f || // NARROW NO-BREAK SPACE
221
+ (code >= 0x2000 && code <= 0x200a) || // EN QUAD..HAIR SPACE
222
+ code === 0x3000 || // IDEOGRAPHIC SPACE (CJK)
223
+ code === 0x2028 || // LINE SEPARATOR
224
+ code === 0x2029 || // PARAGRAPH SEPARATOR
225
+ code === 0x205f || // MEDIUM MATHEMATICAL SPACE
226
+ code === 0x1680 || // OGHAM SPACE MARK
227
+ code === 0x180e || // MONGOLIAN VOWEL SEPARATOR (legacy)
228
+ code === 0x0085 || // NEXT LINE (NEL)
229
+ code === 0xfeff; // ZERO WIDTH NO-BREAK SPACE / BOM
230
+
231
+ if (isUnicodeWS) {
232
+ if (wordStart !== -1) {
233
+ tokenCount += calcWordTokens(i - wordStart);
234
+ wordStart = -1;
235
+ }
236
+ } else {
237
+ // Non-ASCII, non-whitespace (e.g., CJK, emojis, accented chars)
238
+ // Conservative estimate: treat each as 1 token
239
+ if (wordStart !== -1) {
240
+ tokenCount += calcWordTokens(i - wordStart);
241
+ wordStart = -1;
242
+ }
243
+ tokenCount++;
244
+ }
245
+ }
246
+
247
+ // Flush final word
248
+ if (wordStart !== -1) {
249
+ tokenCount += calcWordTokens(len - wordStart);
250
+ }
251
+
252
+ // Add ~50% of special chars as tokens
253
+ tokenCount += specialCount >> 1;
254
+
255
+ return tokenCount;
256
+ }