@lov3kaizen/agentsea-embeddings 1.1.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-DJAURHAS.mjs → chunk-U6EYWYUD.mjs} +31 -1
- package/dist/chunking/index.js +33 -4
- package/dist/chunking/index.mjs +1 -1
- package/dist/index.js +36 -9
- package/dist/index.mjs +19 -20
- package/package.json +5 -5
|
@@ -2,8 +2,37 @@ import {
|
|
|
2
2
|
EmbeddingModel
|
|
3
3
|
} from "./chunk-QAITLJ2E.mjs";
|
|
4
4
|
|
|
5
|
+
// ../../node_modules/.pnpm/nanoid@5.1.6/node_modules/nanoid/index.js
|
|
6
|
+
import { webcrypto as crypto } from "crypto";
|
|
7
|
+
|
|
8
|
+
// ../../node_modules/.pnpm/nanoid@5.1.6/node_modules/nanoid/url-alphabet/index.js
|
|
9
|
+
var urlAlphabet = "useandom-26T198340PX75pxJACKVERYMINDBUSHWOLF_GQZbfghjklqvwyzrict";
|
|
10
|
+
|
|
11
|
+
// ../../node_modules/.pnpm/nanoid@5.1.6/node_modules/nanoid/index.js
|
|
12
|
+
var POOL_SIZE_MULTIPLIER = 128;
|
|
13
|
+
var pool;
|
|
14
|
+
var poolOffset;
|
|
15
|
+
function fillPool(bytes) {
|
|
16
|
+
if (!pool || pool.length < bytes) {
|
|
17
|
+
pool = Buffer.allocUnsafe(bytes * POOL_SIZE_MULTIPLIER);
|
|
18
|
+
crypto.getRandomValues(pool);
|
|
19
|
+
poolOffset = 0;
|
|
20
|
+
} else if (poolOffset + bytes > pool.length) {
|
|
21
|
+
crypto.getRandomValues(pool);
|
|
22
|
+
poolOffset = 0;
|
|
23
|
+
}
|
|
24
|
+
poolOffset += bytes;
|
|
25
|
+
}
|
|
26
|
+
function nanoid(size = 21) {
|
|
27
|
+
fillPool(size |= 0);
|
|
28
|
+
let id = "";
|
|
29
|
+
for (let i = poolOffset - size; i < poolOffset; i++) {
|
|
30
|
+
id += urlAlphabet[pool[i] & 63];
|
|
31
|
+
}
|
|
32
|
+
return id;
|
|
33
|
+
}
|
|
34
|
+
|
|
5
35
|
// src/chunking/BaseChunker.ts
|
|
6
|
-
import { nanoid } from "nanoid";
|
|
7
36
|
var defaultTokenCounter = (text) => {
|
|
8
37
|
return Math.ceil(text.length / 4);
|
|
9
38
|
};
|
|
@@ -1098,6 +1127,7 @@ async function chunk(text, strategy = "recursive", options) {
|
|
|
1098
1127
|
}
|
|
1099
1128
|
|
|
1100
1129
|
export {
|
|
1130
|
+
nanoid,
|
|
1101
1131
|
defaultTokenCounter,
|
|
1102
1132
|
BaseChunker,
|
|
1103
1133
|
mergeSmallChunks,
|
package/dist/chunking/index.js
CHANGED
|
@@ -39,8 +39,37 @@ __export(chunking_exports, {
|
|
|
39
39
|
});
|
|
40
40
|
module.exports = __toCommonJS(chunking_exports);
|
|
41
41
|
|
|
42
|
+
// ../../node_modules/.pnpm/nanoid@5.1.6/node_modules/nanoid/index.js
|
|
43
|
+
var import_node_crypto = require("crypto");
|
|
44
|
+
|
|
45
|
+
// ../../node_modules/.pnpm/nanoid@5.1.6/node_modules/nanoid/url-alphabet/index.js
|
|
46
|
+
var urlAlphabet = "useandom-26T198340PX75pxJACKVERYMINDBUSHWOLF_GQZbfghjklqvwyzrict";
|
|
47
|
+
|
|
48
|
+
// ../../node_modules/.pnpm/nanoid@5.1.6/node_modules/nanoid/index.js
|
|
49
|
+
var POOL_SIZE_MULTIPLIER = 128;
|
|
50
|
+
var pool;
|
|
51
|
+
var poolOffset;
|
|
52
|
+
function fillPool(bytes) {
|
|
53
|
+
if (!pool || pool.length < bytes) {
|
|
54
|
+
pool = Buffer.allocUnsafe(bytes * POOL_SIZE_MULTIPLIER);
|
|
55
|
+
import_node_crypto.webcrypto.getRandomValues(pool);
|
|
56
|
+
poolOffset = 0;
|
|
57
|
+
} else if (poolOffset + bytes > pool.length) {
|
|
58
|
+
import_node_crypto.webcrypto.getRandomValues(pool);
|
|
59
|
+
poolOffset = 0;
|
|
60
|
+
}
|
|
61
|
+
poolOffset += bytes;
|
|
62
|
+
}
|
|
63
|
+
function nanoid(size = 21) {
|
|
64
|
+
fillPool(size |= 0);
|
|
65
|
+
let id = "";
|
|
66
|
+
for (let i = poolOffset - size; i < poolOffset; i++) {
|
|
67
|
+
id += urlAlphabet[pool[i] & 63];
|
|
68
|
+
}
|
|
69
|
+
return id;
|
|
70
|
+
}
|
|
71
|
+
|
|
42
72
|
// src/chunking/BaseChunker.ts
|
|
43
|
-
var import_nanoid = require("nanoid");
|
|
44
73
|
var defaultTokenCounter = (text) => {
|
|
45
74
|
return Math.ceil(text.length / 4);
|
|
46
75
|
};
|
|
@@ -82,7 +111,7 @@ var BaseChunker = class {
|
|
|
82
111
|
if (options.source) metadata.source = options.source;
|
|
83
112
|
if (options.type) metadata.type = options.type;
|
|
84
113
|
return {
|
|
85
|
-
id:
|
|
114
|
+
id: nanoid(),
|
|
86
115
|
text,
|
|
87
116
|
index,
|
|
88
117
|
startPosition,
|
|
@@ -190,7 +219,7 @@ function splitLargeChunks(chunks, maxTokens, tokenCounter) {
|
|
|
190
219
|
if (testTokens > maxTokens && currentText) {
|
|
191
220
|
result.push({
|
|
192
221
|
...chunk2,
|
|
193
|
-
id:
|
|
222
|
+
id: nanoid(),
|
|
194
223
|
text: currentText,
|
|
195
224
|
startPosition: currentStart,
|
|
196
225
|
endPosition: currentStart + currentText.length,
|
|
@@ -206,7 +235,7 @@ function splitLargeChunks(chunks, maxTokens, tokenCounter) {
|
|
|
206
235
|
if (currentText) {
|
|
207
236
|
result.push({
|
|
208
237
|
...chunk2,
|
|
209
|
-
id:
|
|
238
|
+
id: nanoid(),
|
|
210
239
|
text: currentText,
|
|
211
240
|
startPosition: currentStart,
|
|
212
241
|
endPosition: currentStart + currentText.length,
|
package/dist/chunking/index.mjs
CHANGED
package/dist/index.js
CHANGED
|
@@ -1814,8 +1814,37 @@ function createHuggingFaceProvider(config) {
|
|
|
1814
1814
|
return new HuggingFaceProvider(config);
|
|
1815
1815
|
}
|
|
1816
1816
|
|
|
1817
|
+
// ../../node_modules/.pnpm/nanoid@5.1.6/node_modules/nanoid/index.js
|
|
1818
|
+
var import_node_crypto = require("crypto");
|
|
1819
|
+
|
|
1820
|
+
// ../../node_modules/.pnpm/nanoid@5.1.6/node_modules/nanoid/url-alphabet/index.js
|
|
1821
|
+
var urlAlphabet = "useandom-26T198340PX75pxJACKVERYMINDBUSHWOLF_GQZbfghjklqvwyzrict";
|
|
1822
|
+
|
|
1823
|
+
// ../../node_modules/.pnpm/nanoid@5.1.6/node_modules/nanoid/index.js
|
|
1824
|
+
var POOL_SIZE_MULTIPLIER = 128;
|
|
1825
|
+
var pool;
|
|
1826
|
+
var poolOffset;
|
|
1827
|
+
function fillPool(bytes) {
|
|
1828
|
+
if (!pool || pool.length < bytes) {
|
|
1829
|
+
pool = Buffer.allocUnsafe(bytes * POOL_SIZE_MULTIPLIER);
|
|
1830
|
+
import_node_crypto.webcrypto.getRandomValues(pool);
|
|
1831
|
+
poolOffset = 0;
|
|
1832
|
+
} else if (poolOffset + bytes > pool.length) {
|
|
1833
|
+
import_node_crypto.webcrypto.getRandomValues(pool);
|
|
1834
|
+
poolOffset = 0;
|
|
1835
|
+
}
|
|
1836
|
+
poolOffset += bytes;
|
|
1837
|
+
}
|
|
1838
|
+
function nanoid(size = 21) {
|
|
1839
|
+
fillPool(size |= 0);
|
|
1840
|
+
let id = "";
|
|
1841
|
+
for (let i = poolOffset - size; i < poolOffset; i++) {
|
|
1842
|
+
id += urlAlphabet[pool[i] & 63];
|
|
1843
|
+
}
|
|
1844
|
+
return id;
|
|
1845
|
+
}
|
|
1846
|
+
|
|
1817
1847
|
// src/chunking/BaseChunker.ts
|
|
1818
|
-
var import_nanoid = require("nanoid");
|
|
1819
1848
|
var defaultTokenCounter = (text) => {
|
|
1820
1849
|
return Math.ceil(text.length / 4);
|
|
1821
1850
|
};
|
|
@@ -1857,7 +1886,7 @@ var BaseChunker = class {
|
|
|
1857
1886
|
if (options.source) metadata.source = options.source;
|
|
1858
1887
|
if (options.type) metadata.type = options.type;
|
|
1859
1888
|
return {
|
|
1860
|
-
id:
|
|
1889
|
+
id: nanoid(),
|
|
1861
1890
|
text,
|
|
1862
1891
|
index,
|
|
1863
1892
|
startPosition,
|
|
@@ -1965,7 +1994,7 @@ function splitLargeChunks(chunks, maxTokens, tokenCounter) {
|
|
|
1965
1994
|
if (testTokens > maxTokens && currentText) {
|
|
1966
1995
|
result.push({
|
|
1967
1996
|
...chunk2,
|
|
1968
|
-
id:
|
|
1997
|
+
id: nanoid(),
|
|
1969
1998
|
text: currentText,
|
|
1970
1999
|
startPosition: currentStart,
|
|
1971
2000
|
endPosition: currentStart + currentText.length,
|
|
@@ -1981,7 +2010,7 @@ function splitLargeChunks(chunks, maxTokens, tokenCounter) {
|
|
|
1981
2010
|
if (currentText) {
|
|
1982
2011
|
result.push({
|
|
1983
2012
|
...chunk2,
|
|
1984
|
-
id:
|
|
2013
|
+
id: nanoid(),
|
|
1985
2014
|
text: currentText,
|
|
1986
2015
|
startPosition: currentStart,
|
|
1987
2016
|
endPosition: currentStart + currentText.length,
|
|
@@ -5467,7 +5496,6 @@ function createStore(type, config) {
|
|
|
5467
5496
|
}
|
|
5468
5497
|
|
|
5469
5498
|
// src/versioning/VersionRegistry.ts
|
|
5470
|
-
var import_nanoid2 = require("nanoid");
|
|
5471
5499
|
var import_eventemitter32 = __toESM(require("eventemitter3"));
|
|
5472
5500
|
var VersionRegistry = class extends import_eventemitter32.default {
|
|
5473
5501
|
versions = /* @__PURE__ */ new Map();
|
|
@@ -5488,7 +5516,7 @@ var VersionRegistry = class extends import_eventemitter32.default {
|
|
|
5488
5516
|
register(version) {
|
|
5489
5517
|
const newVersion = {
|
|
5490
5518
|
...version,
|
|
5491
|
-
id:
|
|
5519
|
+
id: nanoid(),
|
|
5492
5520
|
createdAt: Date.now(),
|
|
5493
5521
|
active: false,
|
|
5494
5522
|
deprecated: false
|
|
@@ -5719,7 +5747,6 @@ function createVersionRegistry(options) {
|
|
|
5719
5747
|
|
|
5720
5748
|
// src/quality/DriftDetector.ts
|
|
5721
5749
|
var import_eventemitter33 = __toESM(require("eventemitter3"));
|
|
5722
|
-
var import_nanoid3 = require("nanoid");
|
|
5723
5750
|
var DriftDetector = class extends import_eventemitter33.default {
|
|
5724
5751
|
reference = null;
|
|
5725
5752
|
config;
|
|
@@ -5754,7 +5781,7 @@ var DriftDetector = class extends import_eventemitter33.default {
|
|
|
5754
5781
|
varianceVector.push(variance(values));
|
|
5755
5782
|
}
|
|
5756
5783
|
this.reference = {
|
|
5757
|
-
id:
|
|
5784
|
+
id: nanoid(),
|
|
5758
5785
|
model,
|
|
5759
5786
|
version,
|
|
5760
5787
|
sampleCount: embeddings.length,
|
|
@@ -5929,7 +5956,7 @@ var DriftDetector = class extends import_eventemitter33.default {
|
|
|
5929
5956
|
*/
|
|
5930
5957
|
emitAlert(result) {
|
|
5931
5958
|
const alert = {
|
|
5932
|
-
id:
|
|
5959
|
+
id: nanoid(),
|
|
5933
5960
|
type: "drift_detected",
|
|
5934
5961
|
severity: result.severity,
|
|
5935
5962
|
message: `Embedding drift detected with score ${result.driftScore.toFixed(3)}`,
|
package/dist/index.mjs
CHANGED
|
@@ -1,3 +1,18 @@
|
|
|
1
|
+
import {
|
|
2
|
+
BaseProvider,
|
|
3
|
+
CohereProvider,
|
|
4
|
+
HuggingFaceProvider,
|
|
5
|
+
LocalProvider,
|
|
6
|
+
OpenAIProvider,
|
|
7
|
+
VoyageProvider,
|
|
8
|
+
createCohereProvider,
|
|
9
|
+
createHuggingFaceProvider,
|
|
10
|
+
createLocalProvider,
|
|
11
|
+
createMockProvider,
|
|
12
|
+
createOpenAIProvider,
|
|
13
|
+
createRandomProvider,
|
|
14
|
+
createVoyageProvider
|
|
15
|
+
} from "./chunk-5GTQFVEI.mjs";
|
|
1
16
|
import {
|
|
2
17
|
BaseCache,
|
|
3
18
|
MemoryCache,
|
|
@@ -27,23 +42,9 @@ import {
|
|
|
27
42
|
createSemanticChunker,
|
|
28
43
|
defaultTokenCounter,
|
|
29
44
|
mergeSmallChunks,
|
|
45
|
+
nanoid,
|
|
30
46
|
splitLargeChunks
|
|
31
|
-
} from "./chunk-
|
|
32
|
-
import {
|
|
33
|
-
BaseProvider,
|
|
34
|
-
CohereProvider,
|
|
35
|
-
HuggingFaceProvider,
|
|
36
|
-
LocalProvider,
|
|
37
|
-
OpenAIProvider,
|
|
38
|
-
VoyageProvider,
|
|
39
|
-
createCohereProvider,
|
|
40
|
-
createHuggingFaceProvider,
|
|
41
|
-
createLocalProvider,
|
|
42
|
-
createMockProvider,
|
|
43
|
-
createOpenAIProvider,
|
|
44
|
-
createRandomProvider,
|
|
45
|
-
createVoyageProvider
|
|
46
|
-
} from "./chunk-5GTQFVEI.mjs";
|
|
47
|
+
} from "./chunk-U6EYWYUD.mjs";
|
|
47
48
|
import {
|
|
48
49
|
BaseStore,
|
|
49
50
|
ChromaStore,
|
|
@@ -433,7 +434,6 @@ function createEmbeddingManager(config) {
|
|
|
433
434
|
}
|
|
434
435
|
|
|
435
436
|
// src/versioning/VersionRegistry.ts
|
|
436
|
-
import { nanoid } from "nanoid";
|
|
437
437
|
import EventEmitter2 from "eventemitter3";
|
|
438
438
|
var VersionRegistry = class extends EventEmitter2 {
|
|
439
439
|
versions = /* @__PURE__ */ new Map();
|
|
@@ -685,7 +685,6 @@ function createVersionRegistry(options) {
|
|
|
685
685
|
|
|
686
686
|
// src/quality/DriftDetector.ts
|
|
687
687
|
import EventEmitter3 from "eventemitter3";
|
|
688
|
-
import { nanoid as nanoid2 } from "nanoid";
|
|
689
688
|
var DriftDetector = class extends EventEmitter3 {
|
|
690
689
|
reference = null;
|
|
691
690
|
config;
|
|
@@ -720,7 +719,7 @@ var DriftDetector = class extends EventEmitter3 {
|
|
|
720
719
|
varianceVector.push(variance(values));
|
|
721
720
|
}
|
|
722
721
|
this.reference = {
|
|
723
|
-
id:
|
|
722
|
+
id: nanoid(),
|
|
724
723
|
model,
|
|
725
724
|
version,
|
|
726
725
|
sampleCount: embeddings.length,
|
|
@@ -895,7 +894,7 @@ var DriftDetector = class extends EventEmitter3 {
|
|
|
895
894
|
*/
|
|
896
895
|
emitAlert(result) {
|
|
897
896
|
const alert = {
|
|
898
|
-
id:
|
|
897
|
+
id: nanoid(),
|
|
899
898
|
type: "drift_detected",
|
|
900
899
|
severity: result.severity,
|
|
901
900
|
message: `Embedding drift detected with score ${result.driftScore.toFixed(3)}`,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@lov3kaizen/agentsea-embeddings",
|
|
3
|
-
"version": "1.1.
|
|
3
|
+
"version": "1.1.1",
|
|
4
4
|
"description": "Vector embedding lifecycle management toolkit for Node.js - versioning, caching, chunking, drift detection, and migration",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"module": "dist/index.mjs",
|
|
@@ -61,12 +61,12 @@
|
|
|
61
61
|
},
|
|
62
62
|
"dependencies": {
|
|
63
63
|
"eventemitter3": "^5.0.0",
|
|
64
|
-
"lru-cache": "^10.0.0"
|
|
65
|
-
"nanoid": "^5.0.0"
|
|
64
|
+
"lru-cache": "^10.0.0"
|
|
66
65
|
},
|
|
67
66
|
"devDependencies": {
|
|
68
67
|
"@types/better-sqlite3": "^7.6.0",
|
|
69
68
|
"@types/node": "^20.0.0",
|
|
69
|
+
"nanoid": "^5.0.0",
|
|
70
70
|
"tsup": "^8.0.0",
|
|
71
71
|
"typescript": "^5.3.0",
|
|
72
72
|
"vitest": "^3.2.6"
|
|
@@ -94,8 +94,8 @@
|
|
|
94
94
|
"node": ">=20.0.0"
|
|
95
95
|
},
|
|
96
96
|
"scripts": {
|
|
97
|
-
"build": "tsup
|
|
98
|
-
"dev": "tsup
|
|
97
|
+
"build": "tsup",
|
|
98
|
+
"dev": "tsup --watch",
|
|
99
99
|
"test": "vitest run",
|
|
100
100
|
"test:watch": "vitest",
|
|
101
101
|
"test:coverage": "vitest run --coverage",
|