@softerist/heuristic-mcp 3.2.3 → 3.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +387 -376
- package/config.jsonc +800 -800
- package/features/ann-config.js +102 -110
- package/features/clear-cache.js +81 -84
- package/features/find-similar-code.js +265 -286
- package/features/hybrid-search.js +487 -536
- package/features/index-codebase.js +3146 -3271
- package/features/lifecycle.js +1011 -1063
- package/features/package-version.js +277 -291
- package/features/register.js +351 -370
- package/features/resources.js +115 -130
- package/features/set-workspace.js +214 -240
- package/index.js +788 -781
- package/lib/cache-ops.js +22 -22
- package/lib/cache-utils.js +465 -519
- package/lib/cache.js +1749 -1849
- package/lib/call-graph.js +396 -396
- package/lib/cli.js +232 -226
- package/lib/config.js +1483 -1495
- package/lib/constants.js +511 -493
- package/lib/embed-query-process.js +206 -212
- package/lib/embedding-process.js +434 -451
- package/lib/embedding-worker.js +862 -934
- package/lib/ignore-patterns.js +276 -316
- package/lib/json-worker.js +14 -14
- package/lib/json-writer.js +302 -310
- package/lib/logging.js +133 -127
- package/lib/memory-logger.js +13 -13
- package/lib/onnx-backend.js +188 -193
- package/lib/path-utils.js +18 -23
- package/lib/project-detector.js +82 -84
- package/lib/server-lifecycle.js +164 -147
- package/lib/settings-editor.js +738 -739
- package/lib/slice-normalize.js +25 -31
- package/lib/tokenizer.js +168 -203
- package/lib/utils.js +364 -409
- package/lib/vector-store-binary.js +973 -991
- package/lib/vector-store-sqlite.js +377 -414
- package/lib/workspace-env.js +32 -34
- package/mcp_config.json +9 -9
- package/package.json +86 -86
- package/scripts/clear-cache.js +20 -20
- package/scripts/download-model.js +43 -43
- package/scripts/mcp-launcher.js +49 -49
- package/scripts/postinstall.js +12 -12
- package/search-configs.js +36 -36
package/lib/slice-normalize.js
CHANGED
|
@@ -1,31 +1,25 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
export function toFloat32Array(vector) {
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
return new Float32Array(vector);
|
|
31
|
-
}
|
|
1
|
+
export function sliceAndNormalize(vector, targetDim) {
|
|
2
|
+
if (!targetDim || targetDim >= vector.length) {
|
|
3
|
+
return vector;
|
|
4
|
+
}
|
|
5
|
+
|
|
6
|
+
const sliced = vector.slice(0, targetDim);
|
|
7
|
+
|
|
8
|
+
let sumSquares = 0;
|
|
9
|
+
for (let i = 0; i < targetDim; i++) {
|
|
10
|
+
sumSquares += sliced[i] * sliced[i];
|
|
11
|
+
}
|
|
12
|
+
const norm = Math.sqrt(sumSquares);
|
|
13
|
+
|
|
14
|
+
if (norm > 0) {
|
|
15
|
+
for (let i = 0; i < targetDim; i++) {
|
|
16
|
+
sliced[i] /= norm;
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
return sliced;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export function toFloat32Array(vector) {
|
|
24
|
+
return new Float32Array(vector);
|
|
25
|
+
}
|
package/lib/tokenizer.js
CHANGED
|
@@ -1,203 +1,168 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
const
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
}
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
if (
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
if (
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
if (
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
if (
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
code === 0x3000 ||
|
|
170
|
-
code === 0x2028 ||
|
|
171
|
-
code === 0x2029 ||
|
|
172
|
-
code === 0x205f ||
|
|
173
|
-
code === 0x1680 ||
|
|
174
|
-
code === 0x180e ||
|
|
175
|
-
code === 0x0085 ||
|
|
176
|
-
code === 0xfeff;
|
|
177
|
-
|
|
178
|
-
if (isUnicodeWS) {
|
|
179
|
-
if (wordStart !== -1) {
|
|
180
|
-
tokenCount += calcWordTokens(i - wordStart);
|
|
181
|
-
wordStart = -1;
|
|
182
|
-
}
|
|
183
|
-
} else {
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
if (wordStart !== -1) {
|
|
187
|
-
tokenCount += calcWordTokens(i - wordStart);
|
|
188
|
-
wordStart = -1;
|
|
189
|
-
}
|
|
190
|
-
tokenCount++;
|
|
191
|
-
}
|
|
192
|
-
}
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
if (wordStart !== -1) {
|
|
196
|
-
tokenCount += calcWordTokens(len - wordStart);
|
|
197
|
-
}
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
tokenCount += specialCount >> 1;
|
|
201
|
-
|
|
202
|
-
return tokenCount;
|
|
203
|
-
}
|
|
1
|
+
const IS_TEST_ENV = process.env.VITEST === 'true' || process.env.NODE_ENV === 'test';
|
|
2
|
+
|
|
3
|
+
const MODEL_TOKEN_LIMITS_RAW = {
|
|
4
|
+
'jinaai/jina-embeddings-v2-base-code': 512,
|
|
5
|
+
default: 512,
|
|
6
|
+
};
|
|
7
|
+
|
|
8
|
+
export const MODEL_TOKEN_LIMITS = IS_TEST_ENV
|
|
9
|
+
? { ...MODEL_TOKEN_LIMITS_RAW }
|
|
10
|
+
: Object.freeze({ ...MODEL_TOKEN_LIMITS_RAW });
|
|
11
|
+
|
|
12
|
+
const DEFAULT_LIMIT = MODEL_TOKEN_LIMITS.default ?? 512;
|
|
13
|
+
|
|
14
|
+
const MODEL_LIMITS_LC = new Map();
|
|
15
|
+
for (const [k, v] of Object.entries(MODEL_TOKEN_LIMITS)) {
|
|
16
|
+
MODEL_LIMITS_LC.set(k.toLowerCase(), v);
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
function getModelTokenLimitFromLower(lowerName, originalName) {
|
|
20
|
+
if (typeof originalName === 'string') {
|
|
21
|
+
const direct = MODEL_TOKEN_LIMITS[originalName];
|
|
22
|
+
if (direct !== undefined) return direct;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
const exact = MODEL_LIMITS_LC.get(lowerName);
|
|
26
|
+
if (exact !== undefined) return exact;
|
|
27
|
+
|
|
28
|
+
if (
|
|
29
|
+
lowerName.includes('jina') ||
|
|
30
|
+
lowerName.includes('nomic') ||
|
|
31
|
+
lowerName.includes('gte-large')
|
|
32
|
+
) {
|
|
33
|
+
return 512;
|
|
34
|
+
}
|
|
35
|
+
if (lowerName.includes('gte-base') || lowerName.includes('gte-small')) {
|
|
36
|
+
return 512;
|
|
37
|
+
}
|
|
38
|
+
if (lowerName.includes('minilm')) {
|
|
39
|
+
return 512;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
return DEFAULT_LIMIT;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
export function getModelTokenLimit(modelName) {
|
|
46
|
+
if (typeof modelName !== 'string' || modelName.length === 0) return DEFAULT_LIMIT;
|
|
47
|
+
|
|
48
|
+
const direct = MODEL_TOKEN_LIMITS[modelName];
|
|
49
|
+
if (direct !== undefined) return direct;
|
|
50
|
+
|
|
51
|
+
const lower = modelName.toLowerCase();
|
|
52
|
+
return getModelTokenLimitFromLower(lower, modelName);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
import { CHUNKING_PARAMS_CACHE_SIZE as MAX_CACHE_SIZE } from './constants.js';
|
|
56
|
+
const chunkingParamsCache = new Map();
|
|
57
|
+
|
|
58
|
+
export function getChunkingParams(modelName) {
|
|
59
|
+
const key = typeof modelName === 'string' && modelName.length ? modelName.toLowerCase() : '';
|
|
60
|
+
|
|
61
|
+
if (key === '') {
|
|
62
|
+
const maxTokens = DEFAULT_LIMIT;
|
|
63
|
+
const targetTokens = Math.trunc(maxTokens * 0.85);
|
|
64
|
+
const overlapTokens = Math.trunc(targetTokens * 0.18);
|
|
65
|
+
return { maxTokens, targetTokens, overlapTokens };
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
const cached = chunkingParamsCache.get(key);
|
|
69
|
+
if (cached) {
|
|
70
|
+
chunkingParamsCache.delete(key);
|
|
71
|
+
chunkingParamsCache.set(key, cached);
|
|
72
|
+
return cached;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
const maxTokens = getModelTokenLimitFromLower(key, modelName);
|
|
76
|
+
const targetTokens = Math.trunc(maxTokens * 0.85);
|
|
77
|
+
const overlapTokens = Math.trunc(targetTokens * 0.18);
|
|
78
|
+
|
|
79
|
+
const params = { maxTokens, targetTokens, overlapTokens };
|
|
80
|
+
|
|
81
|
+
if (chunkingParamsCache.size >= MAX_CACHE_SIZE) {
|
|
82
|
+
const oldestKey = chunkingParamsCache.keys().next().value;
|
|
83
|
+
chunkingParamsCache.delete(oldestKey);
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
chunkingParamsCache.set(key, params);
|
|
87
|
+
return params;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
const WS = new Uint8Array(128);
|
|
91
|
+
WS[9] = 1;
|
|
92
|
+
WS[10] = 1;
|
|
93
|
+
WS[11] = 1;
|
|
94
|
+
WS[12] = 1;
|
|
95
|
+
WS[13] = 1;
|
|
96
|
+
WS[32] = 1;
|
|
97
|
+
|
|
98
|
+
const SPECIAL = new Uint8Array(128);
|
|
99
|
+
const SPECIAL_CHARS = '{}()[];:,.<>!=+-*/%&|^~@#$"\'`\\';
|
|
100
|
+
for (let i = 0; i < SPECIAL_CHARS.length; i++) {
|
|
101
|
+
SPECIAL[SPECIAL_CHARS.charCodeAt(i)] = 1;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
function calcWordTokens(len) {
|
|
105
|
+
if (len <= 4) return 1;
|
|
106
|
+
if (len <= 10) return 2;
|
|
107
|
+
return (len + 3) >> 2;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
export function estimateTokens(text, { includeSpecialTokens = true } = {}) {
|
|
111
|
+
if (typeof text !== 'string' || text.length === 0) return 0;
|
|
112
|
+
|
|
113
|
+
const len = text.length;
|
|
114
|
+
let tokenCount = includeSpecialTokens ? 2 : 0;
|
|
115
|
+
let specialCount = 0;
|
|
116
|
+
let wordStart = -1;
|
|
117
|
+
|
|
118
|
+
for (let i = 0; i < len; i++) {
|
|
119
|
+
const code = text.charCodeAt(i);
|
|
120
|
+
|
|
121
|
+
if (code < 128) {
|
|
122
|
+
if (WS[code]) {
|
|
123
|
+
if (wordStart !== -1) {
|
|
124
|
+
tokenCount += calcWordTokens(i - wordStart);
|
|
125
|
+
wordStart = -1;
|
|
126
|
+
}
|
|
127
|
+
} else {
|
|
128
|
+
specialCount += SPECIAL[code];
|
|
129
|
+
if (wordStart === -1) wordStart = i;
|
|
130
|
+
}
|
|
131
|
+
continue;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
const isUnicodeWS =
|
|
135
|
+
code === 0x00a0 ||
|
|
136
|
+
code === 0x202f ||
|
|
137
|
+
(code >= 0x2000 && code <= 0x200a) ||
|
|
138
|
+
code === 0x3000 ||
|
|
139
|
+
code === 0x2028 ||
|
|
140
|
+
code === 0x2029 ||
|
|
141
|
+
code === 0x205f ||
|
|
142
|
+
code === 0x1680 ||
|
|
143
|
+
code === 0x180e ||
|
|
144
|
+
code === 0x0085 ||
|
|
145
|
+
code === 0xfeff;
|
|
146
|
+
|
|
147
|
+
if (isUnicodeWS) {
|
|
148
|
+
if (wordStart !== -1) {
|
|
149
|
+
tokenCount += calcWordTokens(i - wordStart);
|
|
150
|
+
wordStart = -1;
|
|
151
|
+
}
|
|
152
|
+
} else {
|
|
153
|
+
if (wordStart !== -1) {
|
|
154
|
+
tokenCount += calcWordTokens(i - wordStart);
|
|
155
|
+
wordStart = -1;
|
|
156
|
+
}
|
|
157
|
+
tokenCount++;
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
if (wordStart !== -1) {
|
|
162
|
+
tokenCount += calcWordTokens(len - wordStart);
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
tokenCount += specialCount >> 1;
|
|
166
|
+
|
|
167
|
+
return tokenCount;
|
|
168
|
+
}
|