@softerist/heuristic-mcp 3.0.17 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -734,21 +734,28 @@ export function upsertMcpServerEntryInToml(text, serverName, serverConfig) {
734
734
  return `${withTrailingNewline}${newline}${section}${newline}`;
735
735
  }
736
736
 
737
- export function setMcpServerDisabledInToml(text, serverName, disabled) {
738
- const source = String(text || '');
739
- const sectionName = `mcp_servers.${serverName}`;
740
- const range = findTomlSectionRange(source, sectionName);
741
-
742
- if (!range) {
743
- return source;
744
- }
745
-
746
- const sectionBlock = source.slice(range.start, range.end);
747
- const newline = detectNewline(sectionBlock || '\n');
748
- const disabledLine = `disabled = ${disabled ? 'true' : 'false'}`;
749
- const updatedSection = /^\s*disabled\s*=.*$/m.test(sectionBlock)
750
- ? sectionBlock.replace(/^\s*disabled\s*=.*$/m, disabledLine)
751
- : `${sectionBlock.trimEnd()}${newline}${disabledLine}${newline}`;
752
-
753
- return `${source.slice(0, range.start)}${updatedSection}${source.slice(range.end)}`;
754
- }
737
+ export function setMcpServerDisabledInToml(text, serverName, disabled) {
738
+ const source = String(text || '');
739
+ const sectionName = `mcp_servers.${serverName}`;
740
+ const range = findTomlSectionRange(source, sectionName);
741
+
742
+ if (!range) {
743
+ return source;
744
+ }
745
+
746
+ const sectionBlock = source.slice(range.start, range.end);
747
+ const newline = detectNewline(sectionBlock || '\n');
748
+ if (disabled) {
749
+ const disabledLine = `disabled = true`;
750
+ const updatedSection = /^\s*disabled\s*=.*$/m.test(sectionBlock)
751
+ ? sectionBlock.replace(/^\s*disabled\s*=.*$/m, disabledLine)
752
+ : `${sectionBlock.trimEnd()}${newline}${disabledLine}${newline}`;
753
+
754
+ return `${source.slice(0, range.start)}${updatedSection}${source.slice(range.end)}`;
755
+ }
756
+
757
+ const cleanedSection = sectionBlock
758
+ .replace(/^\s*disabled\s*=.*$/m, '')
759
+ .replace(/\n\s*\n$/, '\n');
760
+ return `${source.slice(0, range.start)}${cleanedSection}${source.slice(range.end)}`;
761
+ }
@@ -1,19 +1,13 @@
1
- /**
2
- * Slice and L2-normalize a vector for MRL (Matryoshka Representation Learning).
3
- * If targetDim is null/undefined or >= vector length, returns the original vector unchanged.
4
- * @param {Float32Array} vector - The full embedding vector
5
- * @param {number|null} targetDim - Target dimension (64/128/256/512/768 or null)
6
- * @returns {Float32Array} - Sliced and normalized vector, or original if no slicing
7
- */
1
+
8
2
  export function sliceAndNormalize(vector, targetDim) {
9
3
  if (!targetDim || targetDim >= vector.length) {
10
4
  return vector;
11
5
  }
12
6
 
13
- // Slice to target dimension
7
+
14
8
  const sliced = vector.slice(0, targetDim);
15
9
 
16
- // L2 normalize the sliced vector
10
+
17
11
  let sumSquares = 0;
18
12
  for (let i = 0; i < targetDim; i++) {
19
13
  sumSquares += sliced[i] * sliced[i];
@@ -29,13 +23,9 @@ export function sliceAndNormalize(vector, targetDim) {
29
23
  return sliced;
30
24
  }
31
25
 
32
- /**
33
- * Convert any array-like to Float32Array (always creates a copy).
34
- * @param {ArrayLike<number>} vector - Input vector
35
- * @returns {Float32Array} - Copy as Float32Array
36
- */
26
+
37
27
  export function toFloat32Array(vector) {
38
- // Always create a copy to ensure we have a unique buffer
39
- // and avoid issues with reusable WASM memory views
28
+
29
+
40
30
  return new Float32Array(vector);
41
31
  }
package/lib/tokenizer.js CHANGED
@@ -1,24 +1,13 @@
1
- /**
2
- * Token estimation and limits for embedding models
3
- *
4
- * Performance:
5
- * - O(1) model lookups with precomputed maps
6
- * - Zero regex / Zero allocations in hot loop
7
- * - Proper LRU cache eviction
8
- * - Optimized Unicode whitespace detection (ordered by probability)
9
- * - Eliminated double toLowerCase() calls
10
- * - Type-safe guard rails on all public APIs
11
- * - Branchless special character counting
12
- */
1
+
13
2
 
14
3
  const IS_TEST_ENV = process.env.VITEST === 'true' || process.env.NODE_ENV === 'test';
15
4
 
16
5
  const MODEL_TOKEN_LIMITS_RAW = {
17
- // NOTE: While jina-embeddings-v2-base-code supports 8192 tokens, ONNX runtime
18
- // allocates O(n²) memory for attention. Using 512 tokens for optimal speed
19
- // with 4 ONNX threads (~1.5GB RAM, fastest inference).
6
+
7
+
8
+
20
9
  'jinaai/jina-embeddings-v2-base-code': 512,
21
- default: 512, // Safe default for BERT-like models
10
+ default: 512,
22
11
  };
23
12
 
24
13
  export const MODEL_TOKEN_LIMITS = IS_TEST_ENV
@@ -27,34 +16,26 @@ export const MODEL_TOKEN_LIMITS = IS_TEST_ENV
27
16
 
28
17
  const DEFAULT_LIMIT = MODEL_TOKEN_LIMITS.default ?? 512;
29
18
 
30
- /**
31
- * Precomputed case-insensitive lookup
32
- */
19
+
33
20
  const MODEL_LIMITS_LC = new Map();
34
21
  for (const [k, v] of Object.entries(MODEL_TOKEN_LIMITS)) {
35
22
  MODEL_LIMITS_LC.set(k.toLowerCase(), v);
36
23
  }
37
24
 
38
- /**
39
- * Internal helper: get model limit from pre-normalized key
40
- * Avoids double toLowerCase() when called from cache flow
41
- * @param {string} lowerName - Pre-normalized lowercase model name
42
- * @param {*} originalName - Original model name (may not be a string)
43
- * @returns {number} Token limit
44
- */
25
+
45
26
  function getModelTokenLimitFromLower(lowerName, originalName) {
46
- // Fast path: try exact match first (only if original is a string)
27
+
47
28
  if (typeof originalName === 'string') {
48
29
  const direct = MODEL_TOKEN_LIMITS[originalName];
49
30
  if (direct !== undefined) return direct;
50
31
  }
51
32
 
52
- // Slow path: use pre-normalized key
33
+
53
34
  const exact = MODEL_LIMITS_LC.get(lowerName);
54
35
  if (exact !== undefined) return exact;
55
36
 
56
- // Heuristics for common models (use conservative limits for ONNX speed)
57
- // 512 tokens = fastest, 1024 = 4x more compute due to O(n²) attention
37
+
38
+
58
39
  if (
59
40
  lowerName.includes('jina') ||
60
41
  lowerName.includes('nomic') ||
@@ -72,13 +53,9 @@ function getModelTokenLimitFromLower(lowerName, originalName) {
72
53
  return DEFAULT_LIMIT;
73
54
  }
74
55
 
75
- /**
76
- * Get the maximum token limit for a given model
77
- * @param {string} modelName - The model name
78
- * @returns {number} Maximum tokens supported by the model
79
- */
56
+
80
57
  export function getModelTokenLimit(modelName) {
81
- // Guard clause for non-string or empty inputs
58
+
82
59
  if (typeof modelName !== 'string' || modelName.length === 0) return DEFAULT_LIMIT;
83
60
 
84
61
  const direct = MODEL_TOKEN_LIMITS[modelName];
@@ -87,22 +64,15 @@ export function getModelTokenLimit(modelName) {
87
64
  const lower = modelName.toLowerCase();
88
65
  return getModelTokenLimitFromLower(lower, modelName);
89
66
  }
90
- /**
91
- * LRU cache for chunking parameters
92
- * @type {Map<string, {maxTokens: number, targetTokens: number, overlapTokens: number}>}
93
- */
67
+
94
68
  import { CHUNKING_PARAMS_CACHE_SIZE as MAX_CACHE_SIZE } from './constants.js';
95
69
  const chunkingParamsCache = new Map();
96
70
 
97
- /**
98
- * Get chunking parameters for a model
99
- * @param {string} modelName - The model name
100
- * @returns {{maxTokens: number, targetTokens: number, overlapTokens: number}}
101
- */
71
+
102
72
  export function getChunkingParams(modelName) {
103
73
  const key = typeof modelName === 'string' && modelName.length ? modelName.toLowerCase() : '';
104
74
 
105
- // Fast path for invalid inputs: don't consume cache slots
75
+
106
76
  if (key === '') {
107
77
  const maxTokens = DEFAULT_LIMIT;
108
78
  const targetTokens = Math.trunc(maxTokens * 0.85);
@@ -110,10 +80,10 @@ export function getChunkingParams(modelName) {
110
80
  return { maxTokens, targetTokens, overlapTokens };
111
81
  }
112
82
 
113
- // LRU pattern: delete-and-reinsert to mark as most recently used.
114
- // Note: This creates minor GC pressure due to Map key reallocation, but is
115
- // acceptable for MAX_CACHE_SIZE=100. For larger caches (1000+), consider
116
- // a doubly-linked-list LRU implementation for O(1) access without reallocation.
83
+
84
+
85
+
86
+
117
87
  const cached = chunkingParamsCache.get(key);
118
88
  if (cached) {
119
89
  chunkingParamsCache.delete(key);
@@ -121,14 +91,14 @@ export function getChunkingParams(modelName) {
121
91
  return cached;
122
92
  }
123
93
 
124
- // Cache miss: compute new params (avoid double toLowerCase)
94
+
125
95
  const maxTokens = getModelTokenLimitFromLower(key, modelName);
126
96
  const targetTokens = Math.trunc(maxTokens * 0.85);
127
97
  const overlapTokens = Math.trunc(targetTokens * 0.18);
128
98
 
129
99
  const params = { maxTokens, targetTokens, overlapTokens };
130
100
 
131
- // LRU eviction: remove oldest entry if at capacity
101
+
132
102
  if (chunkingParamsCache.size >= MAX_CACHE_SIZE) {
133
103
  const oldestKey = chunkingParamsCache.keys().next().value;
134
104
  chunkingParamsCache.delete(oldestKey);
@@ -138,66 +108,43 @@ export function getChunkingParams(modelName) {
138
108
  return params;
139
109
  }
140
110
 
141
- /**
142
- * ASCII whitespace lookup table
143
- */
111
+
144
112
  const WS = new Uint8Array(128);
145
- WS[9] = 1; // \t (horizontal tab)
146
- WS[10] = 1; // \n (line feed)
147
- WS[11] = 1; // \v (vertical tab)
148
- WS[12] = 1; // \f (form feed)
149
- WS[13] = 1; // \r (carriage return)
150
- WS[32] = 1; // space
151
-
152
- /**
153
- * ASCII special character lookup table
154
- */
113
+ WS[9] = 1;
114
+ WS[10] = 1;
115
+ WS[11] = 1;
116
+ WS[12] = 1;
117
+ WS[13] = 1;
118
+ WS[32] = 1;
119
+
120
+
155
121
  const SPECIAL = new Uint8Array(128);
156
122
  const SPECIAL_CHARS = '{}()[];:,.<>!=+-*/%&|^~@#$"\'`\\';
157
123
  for (let i = 0; i < SPECIAL_CHARS.length; i++) {
158
124
  SPECIAL[SPECIAL_CHARS.charCodeAt(i)] = 1;
159
125
  }
160
126
 
161
- /**
162
- * Calculate token count for a word of given length
163
- * This function will be inlined by V8
164
- * @param {number} len - Word length in characters
165
- * @returns {number} Estimated token count
166
- */
127
+
167
128
  function calcWordTokens(len) {
168
129
  if (len <= 4) return 1;
169
130
  if (len <= 10) return 2;
170
- return (len + 3) >> 2; // ceil(len / 4)
131
+ return (len + 3) >> 2;
171
132
  }
172
133
 
173
- /**
174
- * Estimate token count for text (conservative estimate for code)
175
- *
176
- * Performance optimizations:
177
- * - No regex (pure integer comparisons)
178
- * - No string allocations (charCodeAt only)
179
- * - Inlined word token calculation
180
- * - Unicode checks ordered by frequency
181
- * - Branchless special character counting
182
- *
183
- * @param {string} text - The text to estimate tokens for
184
- * @param {object} [options]
185
- * @param {boolean} [options.includeSpecialTokens=true] - Whether to include [CLS]/[SEP]
186
- * @returns {number} Estimated token count
187
- */
134
+
188
135
  export function estimateTokens(text, { includeSpecialTokens = true } = {}) {
189
- // Type-safe guard: prevents crashes from non-string inputs
136
+
190
137
  if (typeof text !== 'string' || text.length === 0) return 0;
191
138
 
192
139
  const len = text.length;
193
- let tokenCount = includeSpecialTokens ? 2 : 0; // [CLS] + [SEP]
140
+ let tokenCount = includeSpecialTokens ? 2 : 0;
194
141
  let specialCount = 0;
195
142
  let wordStart = -1;
196
143
 
197
144
  for (let i = 0; i < len; i++) {
198
145
  const code = text.charCodeAt(i);
199
146
 
200
- // ASCII fast path (most common for code)
147
+
201
148
  if (code < 128) {
202
149
  if (WS[code]) {
203
150
  if (wordStart !== -1) {
@@ -205,28 +152,28 @@ export function estimateTokens(text, { includeSpecialTokens = true } = {}) {
205
152
  wordStart = -1;
206
153
  }
207
154
  } else {
208
- // Branchless: add 0 or 1 based on SPECIAL[code]
155
+
209
156
  specialCount += SPECIAL[code];
210
157
  if (wordStart === -1) wordStart = i;
211
158
  }
212
159
  continue;
213
160
  }
214
161
 
215
- // Unicode whitespace: ordered by frequency for real-world text
216
- // Note: Includes legacy 0x180E for tokenization compatibility even though
217
- // modern JS \s doesn't consider it whitespace (ES2016+)
162
+
163
+
164
+
218
165
  const isUnicodeWS =
219
- code === 0x00a0 || // NBSP (most common)
220
- code === 0x202f || // NARROW NO-BREAK SPACE
221
- (code >= 0x2000 && code <= 0x200a) || // EN QUAD..HAIR SPACE
222
- code === 0x3000 || // IDEOGRAPHIC SPACE (CJK)
223
- code === 0x2028 || // LINE SEPARATOR
224
- code === 0x2029 || // PARAGRAPH SEPARATOR
225
- code === 0x205f || // MEDIUM MATHEMATICAL SPACE
226
- code === 0x1680 || // OGHAM SPACE MARK
227
- code === 0x180e || // MONGOLIAN VOWEL SEPARATOR (legacy)
228
- code === 0x0085 || // NEXT LINE (NEL)
229
- code === 0xfeff; // ZERO WIDTH NO-BREAK SPACE / BOM
166
+ code === 0x00a0 ||
167
+ code === 0x202f ||
168
+ (code >= 0x2000 && code <= 0x200a) ||
169
+ code === 0x3000 ||
170
+ code === 0x2028 ||
171
+ code === 0x2029 ||
172
+ code === 0x205f ||
173
+ code === 0x1680 ||
174
+ code === 0x180e ||
175
+ code === 0x0085 ||
176
+ code === 0xfeff;
230
177
 
231
178
  if (isUnicodeWS) {
232
179
  if (wordStart !== -1) {
@@ -234,8 +181,8 @@ export function estimateTokens(text, { includeSpecialTokens = true } = {}) {
234
181
  wordStart = -1;
235
182
  }
236
183
  } else {
237
- // Non-ASCII, non-whitespace (e.g., CJK, emojis, accented chars)
238
- // Conservative estimate: treat each as 1 token
184
+
185
+
239
186
  if (wordStart !== -1) {
240
187
  tokenCount += calcWordTokens(i - wordStart);
241
188
  wordStart = -1;
@@ -244,12 +191,12 @@ export function estimateTokens(text, { includeSpecialTokens = true } = {}) {
244
191
  }
245
192
  }
246
193
 
247
- // Flush final word
194
+
248
195
  if (wordStart !== -1) {
249
196
  tokenCount += calcWordTokens(len - wordStart);
250
197
  }
251
198
 
252
- // Add ~50% of special chars as tokens
199
+
253
200
  tokenCount += specialCount >> 1;
254
201
 
255
202
  return tokenCount;