@crashbytes/semantic-text-toolkit 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/DEPLOYMENT.md +203 -0
- package/README.md +300 -0
- package/dist/SemanticEngine-3EGZZHKU.mjs +7 -0
- package/dist/SemanticSearch-CQZQEKEG.mjs +7 -0
- package/dist/chunk-ENOBULOJ.mjs +93 -0
- package/dist/chunk-TPAL6DKL.mjs +149 -0
- package/dist/chunk-XJ4PTDH6.mjs +176 -0
- package/dist/index.d.mts +148 -0
- package/dist/index.d.ts +148 -0
- package/dist/index.js +506 -0
- package/dist/index.mjs +55 -0
- package/package.json +56 -0
- package/src/engine/SemanticEngine.ts +225 -0
- package/src/index.ts +31 -0
- package/src/search/SemanticSearch.ts +154 -0
- package/src/types.ts +73 -0
- package/src/utils/vector.ts +158 -0
- package/tsconfig.json +25 -0
package/dist/index.js
ADDED
|
@@ -0,0 +1,506 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
6
|
+
var __esm = (fn, res) => function __init() {
|
|
7
|
+
return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
|
|
8
|
+
};
|
|
9
|
+
var __export = (target, all) => {
|
|
10
|
+
for (var name in all)
|
|
11
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
12
|
+
};
|
|
13
|
+
var __copyProps = (to, from, except, desc) => {
|
|
14
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
15
|
+
for (let key of __getOwnPropNames(from))
|
|
16
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
17
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
18
|
+
}
|
|
19
|
+
return to;
|
|
20
|
+
};
|
|
21
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
22
|
+
|
|
23
|
+
// src/types.ts
|
|
24
|
+
var SemanticErrorCode, SemanticError;
|
|
25
|
+
var init_types = __esm({
|
|
26
|
+
"src/types.ts"() {
|
|
27
|
+
"use strict";
|
|
28
|
+
SemanticErrorCode = /* @__PURE__ */ ((SemanticErrorCode2) => {
|
|
29
|
+
SemanticErrorCode2["MODEL_NOT_LOADED"] = "MODEL_NOT_LOADED";
|
|
30
|
+
SemanticErrorCode2["INVALID_INPUT"] = "INVALID_INPUT";
|
|
31
|
+
SemanticErrorCode2["EMBEDDING_FAILED"] = "EMBEDDING_FAILED";
|
|
32
|
+
SemanticErrorCode2["COMPUTATION_FAILED"] = "COMPUTATION_FAILED";
|
|
33
|
+
SemanticErrorCode2["DIMENSION_MISMATCH"] = "DIMENSION_MISMATCH";
|
|
34
|
+
return SemanticErrorCode2;
|
|
35
|
+
})(SemanticErrorCode || {});
|
|
36
|
+
SemanticError = class extends Error {
|
|
37
|
+
constructor(code, message, details) {
|
|
38
|
+
super(message);
|
|
39
|
+
this.code = code;
|
|
40
|
+
this.details = details;
|
|
41
|
+
this.name = "SemanticError";
|
|
42
|
+
}
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
// src/utils/vector.ts
|
|
48
|
+
function validateDimensions(a, b) {
|
|
49
|
+
if (a.length !== b.length) {
|
|
50
|
+
throw new SemanticError(
|
|
51
|
+
"DIMENSION_MISMATCH" /* DIMENSION_MISMATCH */,
|
|
52
|
+
`Embedding dimensions must match. Got ${a.length} and ${b.length}`,
|
|
53
|
+
{ dimensions: [a.length, b.length] }
|
|
54
|
+
);
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
function validateEmbedding(embedding, name = "embedding") {
|
|
58
|
+
if (!embedding || embedding.length === 0) {
|
|
59
|
+
throw new SemanticError(
|
|
60
|
+
"INVALID_INPUT" /* INVALID_INPUT */,
|
|
61
|
+
`${name} must be a non-empty array`,
|
|
62
|
+
{ length: embedding?.length }
|
|
63
|
+
);
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
function dotProduct(a, b) {
|
|
67
|
+
validateEmbedding(a, "first embedding");
|
|
68
|
+
validateEmbedding(b, "second embedding");
|
|
69
|
+
validateDimensions(a, b);
|
|
70
|
+
let sum = 0;
|
|
71
|
+
for (let i = 0; i < a.length; i++) {
|
|
72
|
+
sum += a[i] * b[i];
|
|
73
|
+
}
|
|
74
|
+
return sum;
|
|
75
|
+
}
|
|
76
|
+
function magnitude(vector) {
|
|
77
|
+
validateEmbedding(vector);
|
|
78
|
+
let sum = 0;
|
|
79
|
+
for (let i = 0; i < vector.length; i++) {
|
|
80
|
+
sum += vector[i] * vector[i];
|
|
81
|
+
}
|
|
82
|
+
return Math.sqrt(sum);
|
|
83
|
+
}
|
|
84
|
+
function cosineSimilarity(a, b) {
|
|
85
|
+
validateEmbedding(a, "first embedding");
|
|
86
|
+
validateEmbedding(b, "second embedding");
|
|
87
|
+
validateDimensions(a, b);
|
|
88
|
+
const dot = dotProduct(a, b);
|
|
89
|
+
const magA = magnitude(a);
|
|
90
|
+
const magB = magnitude(b);
|
|
91
|
+
if (magA === 0 || magB === 0) {
|
|
92
|
+
throw new SemanticError(
|
|
93
|
+
"COMPUTATION_FAILED" /* COMPUTATION_FAILED */,
|
|
94
|
+
"Cannot compute cosine similarity with zero-magnitude vector",
|
|
95
|
+
{ magnitudes: [magA, magB] }
|
|
96
|
+
);
|
|
97
|
+
}
|
|
98
|
+
return dot / (magA * magB);
|
|
99
|
+
}
|
|
100
|
+
function euclideanDistance(a, b) {
|
|
101
|
+
validateEmbedding(a, "first embedding");
|
|
102
|
+
validateEmbedding(b, "second embedding");
|
|
103
|
+
validateDimensions(a, b);
|
|
104
|
+
let sum = 0;
|
|
105
|
+
for (let i = 0; i < a.length; i++) {
|
|
106
|
+
const diff = a[i] - b[i];
|
|
107
|
+
sum += diff * diff;
|
|
108
|
+
}
|
|
109
|
+
return Math.sqrt(sum);
|
|
110
|
+
}
|
|
111
|
+
function normalize(vector) {
|
|
112
|
+
validateEmbedding(vector);
|
|
113
|
+
const mag = magnitude(vector);
|
|
114
|
+
if (mag === 0) {
|
|
115
|
+
throw new SemanticError(
|
|
116
|
+
"COMPUTATION_FAILED" /* COMPUTATION_FAILED */,
|
|
117
|
+
"Cannot normalize zero-magnitude vector"
|
|
118
|
+
);
|
|
119
|
+
}
|
|
120
|
+
return vector.map((v) => v / mag);
|
|
121
|
+
}
|
|
122
|
+
function centroid(embeddings) {
|
|
123
|
+
if (!embeddings || embeddings.length === 0) {
|
|
124
|
+
throw new SemanticError(
|
|
125
|
+
"INVALID_INPUT" /* INVALID_INPUT */,
|
|
126
|
+
"Cannot compute centroid of empty array"
|
|
127
|
+
);
|
|
128
|
+
}
|
|
129
|
+
const dim = embeddings[0].length;
|
|
130
|
+
const result = new Array(dim).fill(0);
|
|
131
|
+
for (const embedding of embeddings) {
|
|
132
|
+
if (embedding.length !== dim) {
|
|
133
|
+
throw new SemanticError(
|
|
134
|
+
"DIMENSION_MISMATCH" /* DIMENSION_MISMATCH */,
|
|
135
|
+
"All embeddings must have same dimensions"
|
|
136
|
+
);
|
|
137
|
+
}
|
|
138
|
+
for (let i = 0; i < dim; i++) {
|
|
139
|
+
result[i] += embedding[i];
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
return result.map((v) => v / embeddings.length);
|
|
143
|
+
}
|
|
144
|
+
function topKSimilar(query, candidates, k = 10) {
|
|
145
|
+
validateEmbedding(query, "query");
|
|
146
|
+
if (!candidates || candidates.length === 0) {
|
|
147
|
+
return [];
|
|
148
|
+
}
|
|
149
|
+
if (k <= 0) {
|
|
150
|
+
throw new SemanticError(
|
|
151
|
+
"INVALID_INPUT" /* INVALID_INPUT */,
|
|
152
|
+
"k must be positive",
|
|
153
|
+
{ k }
|
|
154
|
+
);
|
|
155
|
+
}
|
|
156
|
+
const similarities = candidates.map((candidate, idx) => {
|
|
157
|
+
try {
|
|
158
|
+
return [idx, cosineSimilarity(query, candidate)];
|
|
159
|
+
} catch (error) {
|
|
160
|
+
return [idx, -Infinity];
|
|
161
|
+
}
|
|
162
|
+
});
|
|
163
|
+
similarities.sort((a, b) => b[1] - a[1]);
|
|
164
|
+
return similarities.slice(0, Math.min(k, similarities.length));
|
|
165
|
+
}
|
|
166
|
+
var init_vector = __esm({
|
|
167
|
+
"src/utils/vector.ts"() {
|
|
168
|
+
"use strict";
|
|
169
|
+
init_types();
|
|
170
|
+
}
|
|
171
|
+
});
|
|
172
|
+
|
|
173
|
+
// src/engine/SemanticEngine.ts
|
|
174
|
+
var SemanticEngine_exports = {};
|
|
175
|
+
__export(SemanticEngine_exports, {
|
|
176
|
+
SemanticEngine: () => SemanticEngine
|
|
177
|
+
});
|
|
178
|
+
var import_transformers, DEFAULT_CONFIG, SemanticEngine;
|
|
179
|
+
var init_SemanticEngine = __esm({
|
|
180
|
+
"src/engine/SemanticEngine.ts"() {
|
|
181
|
+
"use strict";
|
|
182
|
+
import_transformers = require("@xenova/transformers");
|
|
183
|
+
init_types();
|
|
184
|
+
init_vector();
|
|
185
|
+
DEFAULT_CONFIG = {
|
|
186
|
+
modelName: "Xenova/all-MiniLM-L6-v2",
|
|
187
|
+
maxLength: 512,
|
|
188
|
+
quantized: true,
|
|
189
|
+
onProgress: () => {
|
|
190
|
+
}
|
|
191
|
+
};
|
|
192
|
+
SemanticEngine = class {
|
|
193
|
+
constructor(config = {}) {
|
|
194
|
+
this.model = null;
|
|
195
|
+
this.initializationPromise = null;
|
|
196
|
+
this.config = { ...DEFAULT_CONFIG, ...config };
|
|
197
|
+
}
|
|
198
|
+
async initialize() {
|
|
199
|
+
if (this.initializationPromise) {
|
|
200
|
+
return this.initializationPromise;
|
|
201
|
+
}
|
|
202
|
+
if (this.model) {
|
|
203
|
+
return Promise.resolve();
|
|
204
|
+
}
|
|
205
|
+
this.initializationPromise = this._performInitialization();
|
|
206
|
+
try {
|
|
207
|
+
await this.initializationPromise;
|
|
208
|
+
} finally {
|
|
209
|
+
this.initializationPromise = null;
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
async _performInitialization() {
|
|
213
|
+
try {
|
|
214
|
+
this.config.onProgress({
|
|
215
|
+
status: "downloading",
|
|
216
|
+
progress: 0
|
|
217
|
+
});
|
|
218
|
+
this.model = await (0, import_transformers.pipeline)(
|
|
219
|
+
"feature-extraction",
|
|
220
|
+
this.config.modelName,
|
|
221
|
+
{
|
|
222
|
+
quantized: this.config.quantized
|
|
223
|
+
}
|
|
224
|
+
);
|
|
225
|
+
this.config.onProgress({
|
|
226
|
+
status: "ready",
|
|
227
|
+
progress: 100
|
|
228
|
+
});
|
|
229
|
+
} catch (error) {
|
|
230
|
+
throw new SemanticError(
|
|
231
|
+
"MODEL_NOT_LOADED" /* MODEL_NOT_LOADED */,
|
|
232
|
+
`Failed to initialize model: ${error instanceof Error ? error.message : "Unknown error"}`,
|
|
233
|
+
{ modelName: this.config.modelName, error }
|
|
234
|
+
);
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
assertInitialized() {
|
|
238
|
+
if (!this.model) {
|
|
239
|
+
throw new SemanticError(
|
|
240
|
+
"MODEL_NOT_LOADED" /* MODEL_NOT_LOADED */,
|
|
241
|
+
"Model not initialized. Call initialize() first."
|
|
242
|
+
);
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
async embed(text) {
|
|
246
|
+
this.assertInitialized();
|
|
247
|
+
if (!text || typeof text !== "string") {
|
|
248
|
+
throw new SemanticError(
|
|
249
|
+
"INVALID_INPUT" /* INVALID_INPUT */,
|
|
250
|
+
"Text must be a non-empty string",
|
|
251
|
+
{ text }
|
|
252
|
+
);
|
|
253
|
+
}
|
|
254
|
+
const startTime = performance.now();
|
|
255
|
+
try {
|
|
256
|
+
const output = await this.model(text, {
|
|
257
|
+
pooling: "mean",
|
|
258
|
+
normalize: true
|
|
259
|
+
});
|
|
260
|
+
const embedding = Array.from(output.data);
|
|
261
|
+
const processingTime = performance.now() - startTime;
|
|
262
|
+
return {
|
|
263
|
+
embedding,
|
|
264
|
+
text,
|
|
265
|
+
metadata: {
|
|
266
|
+
dimensions: embedding.length,
|
|
267
|
+
modelName: this.config.modelName,
|
|
268
|
+
processingTime
|
|
269
|
+
}
|
|
270
|
+
};
|
|
271
|
+
} catch (error) {
|
|
272
|
+
throw new SemanticError(
|
|
273
|
+
"EMBEDDING_FAILED" /* EMBEDDING_FAILED */,
|
|
274
|
+
`Failed to generate embedding: ${error instanceof Error ? error.message : "Unknown error"}`,
|
|
275
|
+
{ text: text.substring(0, 100), error }
|
|
276
|
+
);
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
async embedBatch(texts, options = {}) {
|
|
280
|
+
this.assertInitialized();
|
|
281
|
+
const { batchSize = 32, onProgress } = options;
|
|
282
|
+
if (!Array.isArray(texts) || texts.length === 0) {
|
|
283
|
+
throw new SemanticError(
|
|
284
|
+
"INVALID_INPUT" /* INVALID_INPUT */,
|
|
285
|
+
"Texts must be a non-empty array"
|
|
286
|
+
);
|
|
287
|
+
}
|
|
288
|
+
const results = [];
|
|
289
|
+
const batches = Math.ceil(texts.length / batchSize);
|
|
290
|
+
for (let i = 0; i < batches; i++) {
|
|
291
|
+
const start = i * batchSize;
|
|
292
|
+
const end = Math.min(start + batchSize, texts.length);
|
|
293
|
+
const batch = texts.slice(start, end);
|
|
294
|
+
const batchResults = await Promise.all(
|
|
295
|
+
batch.map((text) => this.embed(text))
|
|
296
|
+
);
|
|
297
|
+
results.push(...batchResults);
|
|
298
|
+
if (onProgress) {
|
|
299
|
+
onProgress(end, texts.length);
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
return results;
|
|
303
|
+
}
|
|
304
|
+
async similarity(textA, textB, method = "cosine") {
|
|
305
|
+
const startTime = performance.now();
|
|
306
|
+
const [resultA, resultB] = await Promise.all([
|
|
307
|
+
this.embed(textA),
|
|
308
|
+
this.embed(textB)
|
|
309
|
+
]);
|
|
310
|
+
let score;
|
|
311
|
+
switch (method) {
|
|
312
|
+
case "cosine":
|
|
313
|
+
score = cosineSimilarity(resultA.embedding, resultB.embedding);
|
|
314
|
+
break;
|
|
315
|
+
case "euclidean":
|
|
316
|
+
score = -euclideanDistance(resultA.embedding, resultB.embedding);
|
|
317
|
+
break;
|
|
318
|
+
case "dot":
|
|
319
|
+
score = dotProduct(resultA.embedding, resultB.embedding);
|
|
320
|
+
break;
|
|
321
|
+
default:
|
|
322
|
+
throw new SemanticError(
|
|
323
|
+
"INVALID_INPUT" /* INVALID_INPUT */,
|
|
324
|
+
`Unknown similarity method: ${method}`
|
|
325
|
+
);
|
|
326
|
+
}
|
|
327
|
+
const processingTime = performance.now() - startTime;
|
|
328
|
+
return {
|
|
329
|
+
score,
|
|
330
|
+
texts: [textA, textB],
|
|
331
|
+
metadata: {
|
|
332
|
+
method,
|
|
333
|
+
processingTime
|
|
334
|
+
}
|
|
335
|
+
};
|
|
336
|
+
}
|
|
337
|
+
dispose() {
|
|
338
|
+
this.model = null;
|
|
339
|
+
this.initializationPromise = null;
|
|
340
|
+
}
|
|
341
|
+
isReady() {
|
|
342
|
+
return this.model !== null;
|
|
343
|
+
}
|
|
344
|
+
getConfig() {
|
|
345
|
+
return { ...this.config };
|
|
346
|
+
}
|
|
347
|
+
};
|
|
348
|
+
}
|
|
349
|
+
});
|
|
350
|
+
|
|
351
|
+
// src/search/SemanticSearch.ts
|
|
352
|
+
var SemanticSearch_exports = {};
|
|
353
|
+
__export(SemanticSearch_exports, {
|
|
354
|
+
SemanticSearch: () => SemanticSearch
|
|
355
|
+
});
|
|
356
|
+
var SemanticSearch;
|
|
357
|
+
var init_SemanticSearch = __esm({
|
|
358
|
+
"src/search/SemanticSearch.ts"() {
|
|
359
|
+
"use strict";
|
|
360
|
+
init_types();
|
|
361
|
+
init_vector();
|
|
362
|
+
SemanticSearch = class {
|
|
363
|
+
constructor(engine, config = {}) {
|
|
364
|
+
this.indexedItems = [];
|
|
365
|
+
this.engine = engine;
|
|
366
|
+
this.config = {
|
|
367
|
+
topK: config.topK ?? 10,
|
|
368
|
+
threshold: config.threshold ?? 0,
|
|
369
|
+
textExtractor: config.textExtractor ?? ((item) => String(item)),
|
|
370
|
+
metadataExtractor: config.metadataExtractor ?? (() => ({}))
|
|
371
|
+
};
|
|
372
|
+
}
|
|
373
|
+
async index(items, replace = false) {
|
|
374
|
+
if (!Array.isArray(items) || items.length === 0) {
|
|
375
|
+
throw new SemanticError(
|
|
376
|
+
"INVALID_INPUT" /* INVALID_INPUT */,
|
|
377
|
+
"Items must be a non-empty array"
|
|
378
|
+
);
|
|
379
|
+
}
|
|
380
|
+
if (replace) {
|
|
381
|
+
this.indexedItems = [];
|
|
382
|
+
}
|
|
383
|
+
const texts = items.map(this.config.textExtractor);
|
|
384
|
+
const results = await this.engine.embedBatch(texts, { batchSize: 32 });
|
|
385
|
+
const newIndexItems = items.map((item, idx) => ({
|
|
386
|
+
item,
|
|
387
|
+
embedding: results[idx].embedding,
|
|
388
|
+
metadata: this.config.metadataExtractor(item)
|
|
389
|
+
}));
|
|
390
|
+
this.indexedItems.push(...newIndexItems);
|
|
391
|
+
}
|
|
392
|
+
async search(query, overrideConfig) {
|
|
393
|
+
if (this.indexedItems.length === 0) {
|
|
394
|
+
throw new SemanticError(
|
|
395
|
+
"INVALID_INPUT" /* INVALID_INPUT */,
|
|
396
|
+
"Index is empty. Call index() before searching."
|
|
397
|
+
);
|
|
398
|
+
}
|
|
399
|
+
const config = { ...this.config, ...overrideConfig };
|
|
400
|
+
const queryResult = await this.engine.embed(query);
|
|
401
|
+
const candidateEmbeddings = this.indexedItems.map((item) => item.embedding);
|
|
402
|
+
const topK = topKSimilar(queryResult.embedding, candidateEmbeddings, config.topK);
|
|
403
|
+
const results = [];
|
|
404
|
+
let rank = 1;
|
|
405
|
+
for (const [idx, score] of topK) {
|
|
406
|
+
if (score < config.threshold) continue;
|
|
407
|
+
results.push({
|
|
408
|
+
item: this.indexedItems[idx].item,
|
|
409
|
+
score,
|
|
410
|
+
rank: rank++
|
|
411
|
+
});
|
|
412
|
+
}
|
|
413
|
+
return results;
|
|
414
|
+
}
|
|
415
|
+
async searchWithFilter(query, filter, config) {
|
|
416
|
+
const originalIndex = this.indexedItems;
|
|
417
|
+
this.indexedItems = originalIndex.filter((item) => filter(item.metadata ?? {}));
|
|
418
|
+
try {
|
|
419
|
+
return await this.search(query, config);
|
|
420
|
+
} finally {
|
|
421
|
+
this.indexedItems = originalIndex;
|
|
422
|
+
}
|
|
423
|
+
}
|
|
424
|
+
async findSimilar(item, config) {
|
|
425
|
+
const text = this.config.textExtractor(item);
|
|
426
|
+
return this.search(text, config);
|
|
427
|
+
}
|
|
428
|
+
getStats() {
|
|
429
|
+
const itemCount = this.indexedItems.length;
|
|
430
|
+
const dimensions = this.indexedItems[0]?.embedding.length ?? 0;
|
|
431
|
+
const totalBytes = itemCount * dimensions * 8;
|
|
432
|
+
const memoryEstimate = totalBytes < 1024 * 1024 ? `${(totalBytes / 1024).toFixed(2)} KB` : `${(totalBytes / (1024 * 1024)).toFixed(2)} MB`;
|
|
433
|
+
return { itemCount, dimensions, memoryEstimate };
|
|
434
|
+
}
|
|
435
|
+
clear() {
|
|
436
|
+
this.indexedItems = [];
|
|
437
|
+
}
|
|
438
|
+
exportIndex() {
|
|
439
|
+
return [...this.indexedItems];
|
|
440
|
+
}
|
|
441
|
+
importIndex(index) {
|
|
442
|
+
this.indexedItems = [...index];
|
|
443
|
+
}
|
|
444
|
+
};
|
|
445
|
+
}
|
|
446
|
+
});
|
|
447
|
+
|
|
448
|
+
// src/index.ts
|
|
449
|
+
var index_exports = {};
|
|
450
|
+
__export(index_exports, {
|
|
451
|
+
SemanticEngine: () => SemanticEngine,
|
|
452
|
+
SemanticError: () => SemanticError,
|
|
453
|
+
SemanticErrorCode: () => SemanticErrorCode,
|
|
454
|
+
SemanticSearch: () => SemanticSearch,
|
|
455
|
+
centroid: () => centroid,
|
|
456
|
+
cosineSimilarity: () => cosineSimilarity,
|
|
457
|
+
createSemanticEngine: () => createSemanticEngine,
|
|
458
|
+
createSemanticSearch: () => createSemanticSearch,
|
|
459
|
+
dotProduct: () => dotProduct,
|
|
460
|
+
euclideanDistance: () => euclideanDistance,
|
|
461
|
+
magnitude: () => magnitude,
|
|
462
|
+
normalize: () => normalize,
|
|
463
|
+
topKSimilar: () => topKSimilar
|
|
464
|
+
});
|
|
465
|
+
module.exports = __toCommonJS(index_exports);
|
|
466
|
+
init_SemanticEngine();
|
|
467
|
+
init_SemanticSearch();
|
|
468
|
+
init_vector();
|
|
469
|
+
init_types();
|
|
470
|
+
async function createSemanticEngine(config) {
|
|
471
|
+
const { SemanticEngine: SemanticEngine2 } = await Promise.resolve().then(() => (init_SemanticEngine(), SemanticEngine_exports));
|
|
472
|
+
const engine = new SemanticEngine2(config);
|
|
473
|
+
await engine.initialize();
|
|
474
|
+
return engine;
|
|
475
|
+
}
|
|
476
|
+
async function createSemanticSearch(items, config) {
|
|
477
|
+
const engine = await createSemanticEngine();
|
|
478
|
+
const { SemanticSearch: SemanticSearch2 } = await Promise.resolve().then(() => (init_SemanticSearch(), SemanticSearch_exports));
|
|
479
|
+
const search = new SemanticSearch2(engine, config);
|
|
480
|
+
await search.index(items);
|
|
481
|
+
return search;
|
|
482
|
+
}
|
|
483
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
484
|
+
0 && (module.exports = {
|
|
485
|
+
SemanticEngine,
|
|
486
|
+
SemanticError,
|
|
487
|
+
SemanticErrorCode,
|
|
488
|
+
SemanticSearch,
|
|
489
|
+
centroid,
|
|
490
|
+
cosineSimilarity,
|
|
491
|
+
createSemanticEngine,
|
|
492
|
+
createSemanticSearch,
|
|
493
|
+
dotProduct,
|
|
494
|
+
euclideanDistance,
|
|
495
|
+
magnitude,
|
|
496
|
+
normalize,
|
|
497
|
+
topKSimilar
|
|
498
|
+
});
|
|
499
|
+
/**
|
|
500
|
+
* Semantic Text Toolkit
|
|
501
|
+
* Production-grade semantic text analysis
|
|
502
|
+
*
|
|
503
|
+
* @module @crashbytes/semantic-text-toolkit
|
|
504
|
+
* @author Blackhole Software, LLC
|
|
505
|
+
* @license MIT
|
|
506
|
+
*/
|
package/dist/index.mjs
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import {
|
|
2
|
+
SemanticEngine
|
|
3
|
+
} from "./chunk-XJ4PTDH6.mjs";
|
|
4
|
+
import {
|
|
5
|
+
SemanticSearch
|
|
6
|
+
} from "./chunk-ENOBULOJ.mjs";
|
|
7
|
+
import {
|
|
8
|
+
SemanticError,
|
|
9
|
+
SemanticErrorCode,
|
|
10
|
+
centroid,
|
|
11
|
+
cosineSimilarity,
|
|
12
|
+
dotProduct,
|
|
13
|
+
euclideanDistance,
|
|
14
|
+
magnitude,
|
|
15
|
+
normalize,
|
|
16
|
+
topKSimilar
|
|
17
|
+
} from "./chunk-TPAL6DKL.mjs";
|
|
18
|
+
|
|
19
|
+
// src/index.ts
|
|
20
|
+
async function createSemanticEngine(config) {
|
|
21
|
+
const { SemanticEngine: SemanticEngine2 } = await import("./SemanticEngine-3EGZZHKU.mjs");
|
|
22
|
+
const engine = new SemanticEngine2(config);
|
|
23
|
+
await engine.initialize();
|
|
24
|
+
return engine;
|
|
25
|
+
}
|
|
26
|
+
async function createSemanticSearch(items, config) {
|
|
27
|
+
const engine = await createSemanticEngine();
|
|
28
|
+
const { SemanticSearch: SemanticSearch2 } = await import("./SemanticSearch-CQZQEKEG.mjs");
|
|
29
|
+
const search = new SemanticSearch2(engine, config);
|
|
30
|
+
await search.index(items);
|
|
31
|
+
return search;
|
|
32
|
+
}
|
|
33
|
+
export {
|
|
34
|
+
SemanticEngine,
|
|
35
|
+
SemanticError,
|
|
36
|
+
SemanticErrorCode,
|
|
37
|
+
SemanticSearch,
|
|
38
|
+
centroid,
|
|
39
|
+
cosineSimilarity,
|
|
40
|
+
createSemanticEngine,
|
|
41
|
+
createSemanticSearch,
|
|
42
|
+
dotProduct,
|
|
43
|
+
euclideanDistance,
|
|
44
|
+
magnitude,
|
|
45
|
+
normalize,
|
|
46
|
+
topKSimilar
|
|
47
|
+
};
|
|
48
|
+
/**
|
|
49
|
+
* Semantic Text Toolkit
|
|
50
|
+
* Production-grade semantic text analysis
|
|
51
|
+
*
|
|
52
|
+
* @module @crashbytes/semantic-text-toolkit
|
|
53
|
+
* @author Blackhole Software, LLC
|
|
54
|
+
* @license MIT
|
|
55
|
+
*/
|
package/package.json
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@crashbytes/semantic-text-toolkit",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Production-grade semantic text analysis with embeddings, similarity, and vector operations",
|
|
5
|
+
"main": "dist/index.js",
|
|
6
|
+
"types": "dist/index.d.ts",
|
|
7
|
+
"exports": {
|
|
8
|
+
".": {
|
|
9
|
+
"types": "./dist/index.d.ts",
|
|
10
|
+
"import": "./dist/index.mjs",
|
|
11
|
+
"require": "./dist/index.js"
|
|
12
|
+
}
|
|
13
|
+
},
|
|
14
|
+
"scripts": {
|
|
15
|
+
"build": "tsup src/index.ts --format cjs,esm --dts --clean",
|
|
16
|
+
"dev": "tsup src/index.ts --format cjs,esm --dts --watch",
|
|
17
|
+
"test": "jest --passWithNoTests",
|
|
18
|
+
"test:watch": "jest --watch",
|
|
19
|
+
"lint": "eslint src --ext .ts",
|
|
20
|
+
"prepublishOnly": "npm run build && npm test"
|
|
21
|
+
},
|
|
22
|
+
"keywords": [
|
|
23
|
+
"semantic",
|
|
24
|
+
"embeddings",
|
|
25
|
+
"text-analysis",
|
|
26
|
+
"ml",
|
|
27
|
+
"ai",
|
|
28
|
+
"vector-search",
|
|
29
|
+
"similarity",
|
|
30
|
+
"nlp"
|
|
31
|
+
],
|
|
32
|
+
"author": "Blackhole Software, LLC",
|
|
33
|
+
"license": "MIT",
|
|
34
|
+
"repository": {
|
|
35
|
+
"type": "git",
|
|
36
|
+
"url": "https://github.com/crashbytes/crashbytes-npmjs.git",
|
|
37
|
+
"directory": "semantic-text-toolkit"
|
|
38
|
+
},
|
|
39
|
+
"dependencies": {
|
|
40
|
+
"@xenova/transformers": "^2.17.1"
|
|
41
|
+
},
|
|
42
|
+
"devDependencies": {
|
|
43
|
+
"@types/jest": "^29.5.12",
|
|
44
|
+
"@types/node": "^20.11.19",
|
|
45
|
+
"@typescript-eslint/eslint-plugin": "^6.21.0",
|
|
46
|
+
"@typescript-eslint/parser": "^6.21.0",
|
|
47
|
+
"eslint": "^8.56.0",
|
|
48
|
+
"jest": "^29.7.0",
|
|
49
|
+
"ts-jest": "^29.1.2",
|
|
50
|
+
"tsup": "^8.0.2",
|
|
51
|
+
"typescript": "^5.3.3"
|
|
52
|
+
},
|
|
53
|
+
"engines": {
|
|
54
|
+
"node": ">=16.0.0"
|
|
55
|
+
}
|
|
56
|
+
}
|