@softerist/heuristic-mcp 3.2.3 → 3.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/README.md +387 -376
  2. package/config.jsonc +800 -800
  3. package/features/ann-config.js +102 -110
  4. package/features/clear-cache.js +81 -84
  5. package/features/find-similar-code.js +265 -286
  6. package/features/hybrid-search.js +487 -536
  7. package/features/index-codebase.js +3146 -3271
  8. package/features/lifecycle.js +1011 -1063
  9. package/features/package-version.js +277 -291
  10. package/features/register.js +351 -370
  11. package/features/resources.js +115 -130
  12. package/features/set-workspace.js +214 -240
  13. package/index.js +788 -781
  14. package/lib/cache-ops.js +22 -22
  15. package/lib/cache-utils.js +465 -519
  16. package/lib/cache.js +1749 -1849
  17. package/lib/call-graph.js +396 -396
  18. package/lib/cli.js +232 -226
  19. package/lib/config.js +1483 -1495
  20. package/lib/constants.js +511 -493
  21. package/lib/embed-query-process.js +206 -212
  22. package/lib/embedding-process.js +434 -451
  23. package/lib/embedding-worker.js +862 -934
  24. package/lib/ignore-patterns.js +276 -316
  25. package/lib/json-worker.js +14 -14
  26. package/lib/json-writer.js +302 -310
  27. package/lib/logging.js +133 -127
  28. package/lib/memory-logger.js +13 -13
  29. package/lib/onnx-backend.js +188 -193
  30. package/lib/path-utils.js +18 -23
  31. package/lib/project-detector.js +82 -84
  32. package/lib/server-lifecycle.js +164 -147
  33. package/lib/settings-editor.js +738 -739
  34. package/lib/slice-normalize.js +25 -31
  35. package/lib/tokenizer.js +168 -203
  36. package/lib/utils.js +364 -409
  37. package/lib/vector-store-binary.js +973 -991
  38. package/lib/vector-store-sqlite.js +377 -414
  39. package/lib/workspace-env.js +32 -34
  40. package/mcp_config.json +9 -9
  41. package/package.json +86 -86
  42. package/scripts/clear-cache.js +20 -20
  43. package/scripts/download-model.js +43 -43
  44. package/scripts/mcp-launcher.js +49 -49
  45. package/scripts/postinstall.js +12 -12
  46. package/search-configs.js +36 -36
@@ -1,31 +1,25 @@
1
-
2
- export function sliceAndNormalize(vector, targetDim) {
3
- if (!targetDim || targetDim >= vector.length) {
4
- return vector;
5
- }
6
-
7
-
8
- const sliced = vector.slice(0, targetDim);
9
-
10
-
11
- let sumSquares = 0;
12
- for (let i = 0; i < targetDim; i++) {
13
- sumSquares += sliced[i] * sliced[i];
14
- }
15
- const norm = Math.sqrt(sumSquares);
16
-
17
- if (norm > 0) {
18
- for (let i = 0; i < targetDim; i++) {
19
- sliced[i] /= norm;
20
- }
21
- }
22
-
23
- return sliced;
24
- }
25
-
26
-
27
- export function toFloat32Array(vector) {
28
-
29
-
30
- return new Float32Array(vector);
31
- }
1
+ export function sliceAndNormalize(vector, targetDim) {
2
+ if (!targetDim || targetDim >= vector.length) {
3
+ return vector;
4
+ }
5
+
6
+ const sliced = vector.slice(0, targetDim);
7
+
8
+ let sumSquares = 0;
9
+ for (let i = 0; i < targetDim; i++) {
10
+ sumSquares += sliced[i] * sliced[i];
11
+ }
12
+ const norm = Math.sqrt(sumSquares);
13
+
14
+ if (norm > 0) {
15
+ for (let i = 0; i < targetDim; i++) {
16
+ sliced[i] /= norm;
17
+ }
18
+ }
19
+
20
+ return sliced;
21
+ }
22
+
23
+ export function toFloat32Array(vector) {
24
+ return new Float32Array(vector);
25
+ }
package/lib/tokenizer.js CHANGED
@@ -1,203 +1,168 @@
1
-
2
-
3
- const IS_TEST_ENV = process.env.VITEST === 'true' || process.env.NODE_ENV === 'test';
4
-
5
- const MODEL_TOKEN_LIMITS_RAW = {
6
-
7
-
8
-
9
- 'jinaai/jina-embeddings-v2-base-code': 512,
10
- default: 512,
11
- };
12
-
13
- export const MODEL_TOKEN_LIMITS = IS_TEST_ENV
14
- ? { ...MODEL_TOKEN_LIMITS_RAW }
15
- : Object.freeze({ ...MODEL_TOKEN_LIMITS_RAW });
16
-
17
- const DEFAULT_LIMIT = MODEL_TOKEN_LIMITS.default ?? 512;
18
-
19
-
20
- const MODEL_LIMITS_LC = new Map();
21
- for (const [k, v] of Object.entries(MODEL_TOKEN_LIMITS)) {
22
- MODEL_LIMITS_LC.set(k.toLowerCase(), v);
23
- }
24
-
25
-
26
- function getModelTokenLimitFromLower(lowerName, originalName) {
27
-
28
- if (typeof originalName === 'string') {
29
- const direct = MODEL_TOKEN_LIMITS[originalName];
30
- if (direct !== undefined) return direct;
31
- }
32
-
33
-
34
- const exact = MODEL_LIMITS_LC.get(lowerName);
35
- if (exact !== undefined) return exact;
36
-
37
-
38
-
39
- if (
40
- lowerName.includes('jina') ||
41
- lowerName.includes('nomic') ||
42
- lowerName.includes('gte-large')
43
- ) {
44
- return 512;
45
- }
46
- if (lowerName.includes('gte-base') || lowerName.includes('gte-small')) {
47
- return 512;
48
- }
49
- if (lowerName.includes('minilm')) {
50
- return 512;
51
- }
52
-
53
- return DEFAULT_LIMIT;
54
- }
55
-
56
-
57
- export function getModelTokenLimit(modelName) {
58
-
59
- if (typeof modelName !== 'string' || modelName.length === 0) return DEFAULT_LIMIT;
60
-
61
- const direct = MODEL_TOKEN_LIMITS[modelName];
62
- if (direct !== undefined) return direct;
63
-
64
- const lower = modelName.toLowerCase();
65
- return getModelTokenLimitFromLower(lower, modelName);
66
- }
67
-
68
- import { CHUNKING_PARAMS_CACHE_SIZE as MAX_CACHE_SIZE } from './constants.js';
69
- const chunkingParamsCache = new Map();
70
-
71
-
72
- export function getChunkingParams(modelName) {
73
- const key = typeof modelName === 'string' && modelName.length ? modelName.toLowerCase() : '';
74
-
75
-
76
- if (key === '') {
77
- const maxTokens = DEFAULT_LIMIT;
78
- const targetTokens = Math.trunc(maxTokens * 0.85);
79
- const overlapTokens = Math.trunc(targetTokens * 0.18);
80
- return { maxTokens, targetTokens, overlapTokens };
81
- }
82
-
83
-
84
-
85
-
86
-
87
- const cached = chunkingParamsCache.get(key);
88
- if (cached) {
89
- chunkingParamsCache.delete(key);
90
- chunkingParamsCache.set(key, cached);
91
- return cached;
92
- }
93
-
94
-
95
- const maxTokens = getModelTokenLimitFromLower(key, modelName);
96
- const targetTokens = Math.trunc(maxTokens * 0.85);
97
- const overlapTokens = Math.trunc(targetTokens * 0.18);
98
-
99
- const params = { maxTokens, targetTokens, overlapTokens };
100
-
101
-
102
- if (chunkingParamsCache.size >= MAX_CACHE_SIZE) {
103
- const oldestKey = chunkingParamsCache.keys().next().value;
104
- chunkingParamsCache.delete(oldestKey);
105
- }
106
-
107
- chunkingParamsCache.set(key, params);
108
- return params;
109
- }
110
-
111
-
112
- const WS = new Uint8Array(128);
113
- WS[9] = 1;
114
- WS[10] = 1;
115
- WS[11] = 1;
116
- WS[12] = 1;
117
- WS[13] = 1;
118
- WS[32] = 1;
119
-
120
-
121
- const SPECIAL = new Uint8Array(128);
122
- const SPECIAL_CHARS = '{}()[];:,.<>!=+-*/%&|^~@#$"\'`\\';
123
- for (let i = 0; i < SPECIAL_CHARS.length; i++) {
124
- SPECIAL[SPECIAL_CHARS.charCodeAt(i)] = 1;
125
- }
126
-
127
-
128
- function calcWordTokens(len) {
129
- if (len <= 4) return 1;
130
- if (len <= 10) return 2;
131
- return (len + 3) >> 2;
132
- }
133
-
134
-
135
- export function estimateTokens(text, { includeSpecialTokens = true } = {}) {
136
-
137
- if (typeof text !== 'string' || text.length === 0) return 0;
138
-
139
- const len = text.length;
140
- let tokenCount = includeSpecialTokens ? 2 : 0;
141
- let specialCount = 0;
142
- let wordStart = -1;
143
-
144
- for (let i = 0; i < len; i++) {
145
- const code = text.charCodeAt(i);
146
-
147
-
148
- if (code < 128) {
149
- if (WS[code]) {
150
- if (wordStart !== -1) {
151
- tokenCount += calcWordTokens(i - wordStart);
152
- wordStart = -1;
153
- }
154
- } else {
155
-
156
- specialCount += SPECIAL[code];
157
- if (wordStart === -1) wordStart = i;
158
- }
159
- continue;
160
- }
161
-
162
-
163
-
164
-
165
- const isUnicodeWS =
166
- code === 0x00a0 ||
167
- code === 0x202f ||
168
- (code >= 0x2000 && code <= 0x200a) ||
169
- code === 0x3000 ||
170
- code === 0x2028 ||
171
- code === 0x2029 ||
172
- code === 0x205f ||
173
- code === 0x1680 ||
174
- code === 0x180e ||
175
- code === 0x0085 ||
176
- code === 0xfeff;
177
-
178
- if (isUnicodeWS) {
179
- if (wordStart !== -1) {
180
- tokenCount += calcWordTokens(i - wordStart);
181
- wordStart = -1;
182
- }
183
- } else {
184
-
185
-
186
- if (wordStart !== -1) {
187
- tokenCount += calcWordTokens(i - wordStart);
188
- wordStart = -1;
189
- }
190
- tokenCount++;
191
- }
192
- }
193
-
194
-
195
- if (wordStart !== -1) {
196
- tokenCount += calcWordTokens(len - wordStart);
197
- }
198
-
199
-
200
- tokenCount += specialCount >> 1;
201
-
202
- return tokenCount;
203
- }
1
+ const IS_TEST_ENV = process.env.VITEST === 'true' || process.env.NODE_ENV === 'test';
2
+
3
+ const MODEL_TOKEN_LIMITS_RAW = {
4
+ 'jinaai/jina-embeddings-v2-base-code': 512,
5
+ default: 512,
6
+ };
7
+
8
+ export const MODEL_TOKEN_LIMITS = IS_TEST_ENV
9
+ ? { ...MODEL_TOKEN_LIMITS_RAW }
10
+ : Object.freeze({ ...MODEL_TOKEN_LIMITS_RAW });
11
+
12
+ const DEFAULT_LIMIT = MODEL_TOKEN_LIMITS.default ?? 512;
13
+
14
+ const MODEL_LIMITS_LC = new Map();
15
+ for (const [k, v] of Object.entries(MODEL_TOKEN_LIMITS)) {
16
+ MODEL_LIMITS_LC.set(k.toLowerCase(), v);
17
+ }
18
+
19
+ function getModelTokenLimitFromLower(lowerName, originalName) {
20
+ if (typeof originalName === 'string') {
21
+ const direct = MODEL_TOKEN_LIMITS[originalName];
22
+ if (direct !== undefined) return direct;
23
+ }
24
+
25
+ const exact = MODEL_LIMITS_LC.get(lowerName);
26
+ if (exact !== undefined) return exact;
27
+
28
+ if (
29
+ lowerName.includes('jina') ||
30
+ lowerName.includes('nomic') ||
31
+ lowerName.includes('gte-large')
32
+ ) {
33
+ return 512;
34
+ }
35
+ if (lowerName.includes('gte-base') || lowerName.includes('gte-small')) {
36
+ return 512;
37
+ }
38
+ if (lowerName.includes('minilm')) {
39
+ return 512;
40
+ }
41
+
42
+ return DEFAULT_LIMIT;
43
+ }
44
+
45
+ export function getModelTokenLimit(modelName) {
46
+ if (typeof modelName !== 'string' || modelName.length === 0) return DEFAULT_LIMIT;
47
+
48
+ const direct = MODEL_TOKEN_LIMITS[modelName];
49
+ if (direct !== undefined) return direct;
50
+
51
+ const lower = modelName.toLowerCase();
52
+ return getModelTokenLimitFromLower(lower, modelName);
53
+ }
54
+
55
+ import { CHUNKING_PARAMS_CACHE_SIZE as MAX_CACHE_SIZE } from './constants.js';
56
+ const chunkingParamsCache = new Map();
57
+
58
+ export function getChunkingParams(modelName) {
59
+ const key = typeof modelName === 'string' && modelName.length ? modelName.toLowerCase() : '';
60
+
61
+ if (key === '') {
62
+ const maxTokens = DEFAULT_LIMIT;
63
+ const targetTokens = Math.trunc(maxTokens * 0.85);
64
+ const overlapTokens = Math.trunc(targetTokens * 0.18);
65
+ return { maxTokens, targetTokens, overlapTokens };
66
+ }
67
+
68
+ const cached = chunkingParamsCache.get(key);
69
+ if (cached) {
70
+ chunkingParamsCache.delete(key);
71
+ chunkingParamsCache.set(key, cached);
72
+ return cached;
73
+ }
74
+
75
+ const maxTokens = getModelTokenLimitFromLower(key, modelName);
76
+ const targetTokens = Math.trunc(maxTokens * 0.85);
77
+ const overlapTokens = Math.trunc(targetTokens * 0.18);
78
+
79
+ const params = { maxTokens, targetTokens, overlapTokens };
80
+
81
+ if (chunkingParamsCache.size >= MAX_CACHE_SIZE) {
82
+ const oldestKey = chunkingParamsCache.keys().next().value;
83
+ chunkingParamsCache.delete(oldestKey);
84
+ }
85
+
86
+ chunkingParamsCache.set(key, params);
87
+ return params;
88
+ }
89
+
90
+ const WS = new Uint8Array(128);
91
+ WS[9] = 1;
92
+ WS[10] = 1;
93
+ WS[11] = 1;
94
+ WS[12] = 1;
95
+ WS[13] = 1;
96
+ WS[32] = 1;
97
+
98
+ const SPECIAL = new Uint8Array(128);
99
+ const SPECIAL_CHARS = '{}()[];:,.<>!=+-*/%&|^~@#$"\'`\\';
100
+ for (let i = 0; i < SPECIAL_CHARS.length; i++) {
101
+ SPECIAL[SPECIAL_CHARS.charCodeAt(i)] = 1;
102
+ }
103
+
104
+ function calcWordTokens(len) {
105
+ if (len <= 4) return 1;
106
+ if (len <= 10) return 2;
107
+ return (len + 3) >> 2;
108
+ }
109
+
110
+ export function estimateTokens(text, { includeSpecialTokens = true } = {}) {
111
+ if (typeof text !== 'string' || text.length === 0) return 0;
112
+
113
+ const len = text.length;
114
+ let tokenCount = includeSpecialTokens ? 2 : 0;
115
+ let specialCount = 0;
116
+ let wordStart = -1;
117
+
118
+ for (let i = 0; i < len; i++) {
119
+ const code = text.charCodeAt(i);
120
+
121
+ if (code < 128) {
122
+ if (WS[code]) {
123
+ if (wordStart !== -1) {
124
+ tokenCount += calcWordTokens(i - wordStart);
125
+ wordStart = -1;
126
+ }
127
+ } else {
128
+ specialCount += SPECIAL[code];
129
+ if (wordStart === -1) wordStart = i;
130
+ }
131
+ continue;
132
+ }
133
+
134
+ const isUnicodeWS =
135
+ code === 0x00a0 ||
136
+ code === 0x202f ||
137
+ (code >= 0x2000 && code <= 0x200a) ||
138
+ code === 0x3000 ||
139
+ code === 0x2028 ||
140
+ code === 0x2029 ||
141
+ code === 0x205f ||
142
+ code === 0x1680 ||
143
+ code === 0x180e ||
144
+ code === 0x0085 ||
145
+ code === 0xfeff;
146
+
147
+ if (isUnicodeWS) {
148
+ if (wordStart !== -1) {
149
+ tokenCount += calcWordTokens(i - wordStart);
150
+ wordStart = -1;
151
+ }
152
+ } else {
153
+ if (wordStart !== -1) {
154
+ tokenCount += calcWordTokens(i - wordStart);
155
+ wordStart = -1;
156
+ }
157
+ tokenCount++;
158
+ }
159
+ }
160
+
161
+ if (wordStart !== -1) {
162
+ tokenCount += calcWordTokens(len - wordStart);
163
+ }
164
+
165
+ tokenCount += specialCount >> 1;
166
+
167
+ return tokenCount;
168
+ }