@sparkleideas/embeddings 3.0.0-alpha.12-patch.26 → 3.0.0-alpha.12-patch.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/dist/__tests__/embedding-service.test.d.ts +2 -0
  2. package/dist/__tests__/embedding-service.test.d.ts.map +1 -0
  3. package/dist/__tests__/embedding-service.test.js +98 -0
  4. package/dist/__tests__/embedding-service.test.js.map +1 -0
  5. package/dist/chunking.d.ts +68 -0
  6. package/dist/chunking.d.ts.map +1 -0
  7. package/dist/chunking.js +251 -0
  8. package/dist/chunking.js.map +1 -0
  9. package/dist/embedding-service.d.ts +207 -0
  10. package/dist/embedding-service.d.ts.map +1 -0
  11. package/dist/embedding-service.js +965 -0
  12. package/dist/embedding-service.js.map +1 -0
  13. package/dist/hyperbolic.d.ts +103 -0
  14. package/dist/hyperbolic.d.ts.map +1 -0
  15. package/dist/hyperbolic.js +343 -0
  16. package/dist/hyperbolic.js.map +1 -0
  17. package/dist/index.d.ts +31 -0
  18. package/dist/index.d.ts.map +1 -0
  19. package/dist/index.js +37 -0
  20. package/dist/index.js.map +1 -0
  21. package/dist/neural-integration.d.ts +203 -0
  22. package/dist/neural-integration.d.ts.map +1 -0
  23. package/dist/neural-integration.js +213 -0
  24. package/dist/neural-integration.js.map +1 -0
  25. package/dist/normalization.d.ts +73 -0
  26. package/dist/normalization.d.ts.map +1 -0
  27. package/dist/normalization.js +192 -0
  28. package/dist/normalization.js.map +1 -0
  29. package/dist/persistent-cache.d.ts +119 -0
  30. package/dist/persistent-cache.d.ts.map +1 -0
  31. package/dist/persistent-cache.js +337 -0
  32. package/dist/persistent-cache.js.map +1 -0
  33. package/dist/rvf-embedding-cache.d.ts +118 -0
  34. package/dist/rvf-embedding-cache.d.ts.map +1 -0
  35. package/dist/rvf-embedding-cache.js +458 -0
  36. package/dist/rvf-embedding-cache.js.map +1 -0
  37. package/dist/rvf-embedding-service.d.ts +79 -0
  38. package/dist/rvf-embedding-service.d.ts.map +1 -0
  39. package/dist/rvf-embedding-service.js +318 -0
  40. package/dist/rvf-embedding-service.js.map +1 -0
  41. package/dist/types.d.ts +237 -0
  42. package/dist/types.d.ts.map +1 -0
  43. package/dist/types.js +15 -0
  44. package/dist/types.js.map +1 -0
  45. package/package.json +3 -3
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=embedding-service.test.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"embedding-service.test.d.ts","sourceRoot":"","sources":["../../src/__tests__/embedding-service.test.ts"],"names":[],"mappings":""}
@@ -0,0 +1,98 @@
1
+ /**
2
+ * Tests for EmbeddingService
3
+ */
4
+ import { describe, it, expect, beforeEach } from 'vitest';
5
+ import { createEmbeddingService, MockEmbeddingService, cosineSimilarity, euclideanDistance, dotProduct, computeSimilarity, } from '../index.js';
6
+ describe('EmbeddingService', () => {
7
+ describe('MockEmbeddingService', () => {
8
+ let service;
9
+ beforeEach(() => {
10
+ service = new MockEmbeddingService({ provider: 'mock', dimensions: 128 });
11
+ });
12
+ it('should generate embeddings with correct dimensions', async () => {
13
+ const result = await service.embed('Hello, world!');
14
+ expect(result.embedding).toHaveLength(128);
15
+ });
16
+ it('should generate deterministic embeddings for same text', async () => {
17
+ const result1 = await service.embed('test text');
18
+ const result2 = await service.embed('test text');
19
+ // Mock service should be deterministic
20
+ expect(Array.from(result1.embedding)).toEqual(Array.from(result2.embedding));
21
+ });
22
+ it('should handle batch embeddings', async () => {
23
+ const texts = ['text1', 'text2', 'text3'];
24
+ const results = await service.embedBatch(texts);
25
+ expect(results.embeddings).toHaveLength(3);
26
+ // Each embedding should have correct dimensions
27
+ results.embeddings.forEach((emb) => {
28
+ expect(emb.length).toBe(128);
29
+ });
30
+ });
31
+ });
32
+ describe('createEmbeddingService', () => {
33
+ it('should create mock service', () => {
34
+ const service = createEmbeddingService({
35
+ provider: 'mock',
36
+ dimensions: 64,
37
+ });
38
+ expect(service).toBeInstanceOf(MockEmbeddingService);
39
+ });
40
+ });
41
+ });
42
+ describe('Similarity Functions', () => {
43
+ const vec1 = new Float32Array([1, 0, 0]);
44
+ const vec2 = new Float32Array([1, 0, 0]);
45
+ const vec3 = new Float32Array([0, 1, 0]);
46
+ const vec4 = new Float32Array([-1, 0, 0]);
47
+ describe('cosineSimilarity', () => {
48
+ it('should return 1 for identical vectors', () => {
49
+ expect(cosineSimilarity(vec1, vec2)).toBeCloseTo(1);
50
+ });
51
+ it('should return 0 for orthogonal vectors', () => {
52
+ expect(cosineSimilarity(vec1, vec3)).toBeCloseTo(0);
53
+ });
54
+ it('should return -1 for opposite vectors', () => {
55
+ expect(cosineSimilarity(vec1, vec4)).toBeCloseTo(-1);
56
+ });
57
+ });
58
+ describe('euclideanDistance', () => {
59
+ it('should return 0 for identical vectors', () => {
60
+ expect(euclideanDistance(vec1, vec2)).toBeCloseTo(0);
61
+ });
62
+ it('should return sqrt(2) for unit orthogonal vectors', () => {
63
+ expect(euclideanDistance(vec1, vec3)).toBeCloseTo(Math.sqrt(2));
64
+ });
65
+ it('should return 2 for opposite unit vectors', () => {
66
+ expect(euclideanDistance(vec1, vec4)).toBeCloseTo(2);
67
+ });
68
+ });
69
+ describe('dotProduct', () => {
70
+ it('should return 1 for identical unit vectors', () => {
71
+ expect(dotProduct(vec1, vec2)).toBeCloseTo(1);
72
+ });
73
+ it('should return 0 for orthogonal vectors', () => {
74
+ expect(dotProduct(vec1, vec3)).toBeCloseTo(0);
75
+ });
76
+ it('should return -1 for opposite unit vectors', () => {
77
+ expect(dotProduct(vec1, vec4)).toBeCloseTo(-1);
78
+ });
79
+ });
80
+ describe('computeSimilarity', () => {
81
+ it('should use cosine metric by default', () => {
82
+ const result = computeSimilarity(vec1, vec2);
83
+ expect(result.metric).toBe('cosine');
84
+ expect(result.score).toBeCloseTo(1);
85
+ });
86
+ it('should support euclidean metric', () => {
87
+ const result = computeSimilarity(vec1, vec3, 'euclidean');
88
+ expect(result.metric).toBe('euclidean');
89
+ expect(result.score).toBeCloseTo(Math.sqrt(2));
90
+ });
91
+ it('should support dot product metric', () => {
92
+ const result = computeSimilarity(vec1, vec4, 'dot');
93
+ expect(result.metric).toBe('dot');
94
+ expect(result.score).toBeCloseTo(-1);
95
+ });
96
+ });
97
+ });
98
+ //# sourceMappingURL=embedding-service.test.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"embedding-service.test.js","sourceRoot":"","sources":["../../src/__tests__/embedding-service.test.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAC1D,OAAO,EACL,sBAAsB,EACtB,oBAAoB,EACpB,gBAAgB,EAChB,iBAAiB,EACjB,UAAU,EACV,iBAAiB,GAClB,MAAM,aAAa,CAAC;AAErB,QAAQ,CAAC,kBAAkB,EAAE,GAAG,EAAE;IAChC,QAAQ,CAAC,sBAAsB,EAAE,GAAG,EAAE;QACpC,IAAI,OAA6B,CAAC;QAElC,UAAU,CAAC,GAAG,EAAE;YACd,OAAO,GAAG,IAAI,oBAAoB,CAAC,EAAE,QAAQ,EAAE,MAAM,EAAE,UAAU,EAAE,GAAG,EAAE,CAAC,CAAC;QAC5E,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,oDAAoD,EAAE,KAAK,IAAI,EAAE;YAClE,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,KAAK,CAAC,eAAe,CAAC,CAAC;YACpD,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC;QAC7C,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,wDAAwD,EAAE,KAAK,IAAI,EAAE;YACtE,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC;YACjD,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC;YAEjD,uCAAuC;YACvC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC;QAC/E,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,gCAAgC,EAAE,KAAK,IAAI,EAAE;YAC9C,MAAM,KAAK,GAAG,CAAC,OAAO,EAAE,OAAO,EAAE,OAAO,CAAC,CAAC;YAC1C,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;YAEhD,MAAM,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;YAC3C,gDAAgD;YAChD,OAAO,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,EAAE;gBACjC,MAAM,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YAC/B,CAAC,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,wBAAwB,EAAE,GAAG,EAAE;QACtC,EAAE,CAAC,4BAA4B,EAAE,GAAG,EAAE;YACpC,MAAM,OAAO,GAAG,sBAAsB,CAAC;gBACrC,QAAQ,EAAE,MAAM;gBAChB,UAAU,EAAE,EAAE;aACf,CAAC,CAAC;YAEH,MAAM,CAAC,OAAO,CAAC,CAAC,cAAc,CAAC,oBAAoB,CAAC,CAAC;QACvD,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,sBAAsB,EAAE,GAAG,EAAE;IACpC,MAAM,IAAI,GAAG,IAAI,YAAY,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;IACzC,MAAM,IAAI,GAAG,IAAI,YAAY,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;IACzC,MAAM,IAAI,GAAG,IAAI,YAAY,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;IACzC,MAAM,IAAI,GAAG,IAAI,YAAY,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;IAE1C,QAAQ,CAAC,kBAAkB,EAAE,GAAG,EAAE;QAChC,EAAE,CAAC,uCAAuC,EAAE,GAAG,EAAE;YAC/C,MAAM,CAAC,gBAAgB,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC;QACtD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,wCAAwC,EAAE,GAAG,EAAE;YAChD,MAAM,CAAC,gBAAgB,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC;QACtD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,uCAAuC,EAAE,GAAG,EAAE;YAC/C,MAAM,CAAC,gBAAgB,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC;QACvD,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,mBAAmB,EAAE,GAAG,EAAE;QACjC,EAAE,CAAC,uCAAuC,EAAE,GAAG,EAAE;YAC/C,MAAM,CAAC,iBAAiB,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC;QACvD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,mDAAmD,EAAE,GAAG,EAAE;YAC3D,MAAM,CAAC,iBAAiB,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;QAClE,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,2CAA2C,EAAE,GAAG,EAAE;YACnD,MAAM,CAAC,iBAAiB,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC;QACvD,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,YAAY,EAAE,GAAG,EAAE;QAC1B,EAAE,CAAC,4CAA4C,EAAE,GAAG,EAAE;YACpD,MAAM,CAAC,UAAU,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC;QAChD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,wCAAwC,EAAE,GAAG,EAAE;YAChD,MAAM,CAAC,UAAU,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC;QAChD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,4CAA4C,EAAE,GAAG,EAAE;YACpD,MAAM,CAAC,UAAU,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC;QACjD,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,mBAAmB,EAAE,GAAG,EAAE;QACjC,EAAE,CAAC,qCAAqC,EAAE,GAAG,EAAE;YAC7C,MAAM,MAAM,GAAG,iBAAiB,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;YAC7C,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YACrC,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC;QACtC,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,iCAAiC,EAAE,GAAG,EAAE;YACzC,MAAM,MAAM,GAAG,iBAAiB,CAAC,IAAI,EAAE,IAAI,EAAE,WAAW,CAAC,CAAC;YAC1D,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;YACxC,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;QACjD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,mCAAmC,EAAE,GAAG,EAAE;YAC3C,MAAM,MAAM,GAAG,iBAAiB,CAAC,IAAI,EAAE,IAAI,EAAE,KAAK,CAAC,CAAC;YACpD,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YAClC,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC;QACvC,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
@@ -0,0 +1,68 @@
1
+ /**
2
+ * Document Chunking Utilities
3
+ *
4
+ * Features:
5
+ * - Configurable chunk size and overlap
6
+ * - Sentence-aware splitting
7
+ * - Paragraph-aware splitting
8
+ * - Token-based chunking (approximate)
9
+ * - Metadata tracking for reconstruction
10
+ */
11
+ /**
12
+ * Chunking configuration
13
+ */
14
+ export interface ChunkingConfig {
15
+ /** Maximum chunk size in characters (default: 512) */
16
+ maxChunkSize?: number;
17
+ /** Overlap between chunks in characters (default: 50) */
18
+ overlap?: number;
19
+ /** Strategy for splitting (default: 'sentence') */
20
+ strategy?: 'character' | 'sentence' | 'paragraph' | 'token';
21
+ /** Minimum chunk size (default: 100) */
22
+ minChunkSize?: number;
23
+ /** Include metadata with chunks */
24
+ includeMetadata?: boolean;
25
+ }
26
+ /**
27
+ * Chunk result with metadata
28
+ */
29
+ export interface Chunk {
30
+ /** Chunk text content */
31
+ text: string;
32
+ /** Original index in document */
33
+ index: number;
34
+ /** Start position in original text */
35
+ startPos: number;
36
+ /** End position in original text */
37
+ endPos: number;
38
+ /** Character count */
39
+ length: number;
40
+ /** Approximate token count (chars / 4) */
41
+ tokenCount: number;
42
+ }
43
+ /**
44
+ * Chunked document result
45
+ */
46
+ export interface ChunkedDocument {
47
+ /** Array of chunks */
48
+ chunks: Chunk[];
49
+ /** Original text length */
50
+ originalLength: number;
51
+ /** Total chunks created */
52
+ totalChunks: number;
53
+ /** Configuration used */
54
+ config: Required<ChunkingConfig>;
55
+ }
56
+ /**
57
+ * Split text into chunks with overlap
58
+ */
59
+ export declare function chunkText(text: string, config?: ChunkingConfig): ChunkedDocument;
60
+ /**
61
+ * Estimate token count for text
62
+ */
63
+ export declare function estimateTokens(text: string): number;
64
+ /**
65
+ * Reconstruct original text from chunks (approximate)
66
+ */
67
+ export declare function reconstructFromChunks(chunks: Chunk[]): string;
68
+ //# sourceMappingURL=chunking.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"chunking.d.ts","sourceRoot":"","sources":["../src/chunking.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,sDAAsD;IACtD,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,yDAAyD;IACzD,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,mDAAmD;IACnD,QAAQ,CAAC,EAAE,WAAW,GAAG,UAAU,GAAG,WAAW,GAAG,OAAO,CAAC;IAC5D,wCAAwC;IACxC,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,mCAAmC;IACnC,eAAe,CAAC,EAAE,OAAO,CAAC;CAC3B;AAED;;GAEG;AACH,MAAM,WAAW,KAAK;IACpB,yBAAyB;IACzB,IAAI,EAAE,MAAM,CAAC;IACb,iCAAiC;IACjC,KAAK,EAAE,MAAM,CAAC;IACd,sCAAsC;IACtC,QAAQ,EAAE,MAAM,CAAC;IACjB,oCAAoC;IACpC,MAAM,EAAE,MAAM,CAAC;IACf,sBAAsB;IACtB,MAAM,EAAE,MAAM,CAAC;IACf,0CAA0C;IAC1C,UAAU,EAAE,MAAM,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,sBAAsB;IACtB,MAAM,EAAE,KAAK,EAAE,CAAC;IAChB,2BAA2B;IAC3B,cAAc,EAAE,MAAM,CAAC;IACvB,2BAA2B;IAC3B,WAAW,EAAE,MAAM,CAAC;IACpB,yBAAyB;IACzB,MAAM,EAAE,QAAQ,CAAC,cAAc,CAAC,CAAC;CAClC;AAMD;;GAEG;AACH,wBAAgB,SAAS,CACvB,IAAI,EAAE,MAAM,EACZ,MAAM,GAAE,cAAmB,GAC1B,eAAe,CAqCjB;AAyMD;;GAEG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAGnD;AAED;;GAEG;AACH,wBAAgB,qBAAqB,CAAC,MAAM,EAAE,KAAK,EAAE,GAAG,MAAM,CAgC7D"}
@@ -0,0 +1,251 @@
1
+ /**
2
+ * Document Chunking Utilities
3
+ *
4
+ * Features:
5
+ * - Configurable chunk size and overlap
6
+ * - Sentence-aware splitting
7
+ * - Paragraph-aware splitting
8
+ * - Token-based chunking (approximate)
9
+ * - Metadata tracking for reconstruction
10
+ */
11
+ // Sentence boundary patterns
12
+ const SENTENCE_ENDINGS = /(?<=[.!?])\s+(?=[A-Z])/g;
13
+ const PARAGRAPH_BREAKS = /\n\n+/g;
14
+ /**
15
+ * Split text into chunks with overlap
16
+ */
17
+ export function chunkText(text, config = {}) {
18
+ const finalConfig = {
19
+ maxChunkSize: config.maxChunkSize ?? 512,
20
+ overlap: config.overlap ?? 50,
21
+ strategy: config.strategy ?? 'sentence',
22
+ minChunkSize: config.minChunkSize ?? 100,
23
+ includeMetadata: config.includeMetadata ?? true,
24
+ };
25
+ // Normalize whitespace
26
+ const normalizedText = text.replace(/\s+/g, ' ').trim();
27
+ let chunks;
28
+ switch (finalConfig.strategy) {
29
+ case 'character':
30
+ chunks = chunkByCharacter(normalizedText, finalConfig);
31
+ break;
32
+ case 'sentence':
33
+ chunks = chunkBySentence(normalizedText, finalConfig);
34
+ break;
35
+ case 'paragraph':
36
+ chunks = chunkByParagraph(text, finalConfig); // Keep original for paragraphs
37
+ break;
38
+ case 'token':
39
+ chunks = chunkByToken(normalizedText, finalConfig);
40
+ break;
41
+ default:
42
+ chunks = chunkBySentence(normalizedText, finalConfig);
43
+ }
44
+ return {
45
+ chunks,
46
+ originalLength: text.length,
47
+ totalChunks: chunks.length,
48
+ config: finalConfig,
49
+ };
50
+ }
51
+ /**
52
+ * Simple character-based chunking with overlap
53
+ */
54
+ function chunkByCharacter(text, config) {
55
+ const chunks = [];
56
+ const { maxChunkSize, overlap } = config;
57
+ let pos = 0;
58
+ let index = 0;
59
+ while (pos < text.length) {
60
+ const endPos = Math.min(pos + maxChunkSize, text.length);
61
+ const chunkText = text.slice(pos, endPos);
62
+ chunks.push({
63
+ text: chunkText,
64
+ index,
65
+ startPos: pos,
66
+ endPos,
67
+ length: chunkText.length,
68
+ tokenCount: Math.ceil(chunkText.length / 4),
69
+ });
70
+ // Move position with overlap
71
+ pos = endPos - overlap;
72
+ if (pos >= text.length - overlap) {
73
+ break;
74
+ }
75
+ index++;
76
+ }
77
+ return chunks;
78
+ }
79
+ /**
80
+ * Sentence-aware chunking - keeps sentences intact
81
+ */
82
+ function chunkBySentence(text, config) {
83
+ const { maxChunkSize, overlap, minChunkSize } = config;
84
+ // Split into sentences
85
+ const sentences = text.split(SENTENCE_ENDINGS).filter(s => s.trim().length > 0);
86
+ const chunks = [];
87
+ let currentChunk = '';
88
+ let currentStart = 0;
89
+ let index = 0;
90
+ let textPos = 0;
91
+ for (const sentence of sentences) {
92
+ const trimmedSentence = sentence.trim();
93
+ // If adding this sentence exceeds max size, save current chunk
94
+ if (currentChunk.length + trimmedSentence.length > maxChunkSize && currentChunk.length >= minChunkSize) {
95
+ chunks.push({
96
+ text: currentChunk.trim(),
97
+ index,
98
+ startPos: currentStart,
99
+ endPos: textPos,
100
+ length: currentChunk.length,
101
+ tokenCount: Math.ceil(currentChunk.length / 4),
102
+ });
103
+ // Start new chunk with overlap (last part of previous chunk)
104
+ const overlapText = currentChunk.slice(-overlap);
105
+ currentChunk = overlapText + ' ' + trimmedSentence;
106
+ currentStart = textPos - overlap;
107
+ index++;
108
+ }
109
+ else {
110
+ currentChunk += (currentChunk.length > 0 ? ' ' : '') + trimmedSentence;
111
+ }
112
+ textPos += trimmedSentence.length + 1;
113
+ }
114
+ // Add final chunk
115
+ if (currentChunk.trim().length > 0) {
116
+ chunks.push({
117
+ text: currentChunk.trim(),
118
+ index,
119
+ startPos: currentStart,
120
+ endPos: text.length,
121
+ length: currentChunk.length,
122
+ tokenCount: Math.ceil(currentChunk.length / 4),
123
+ });
124
+ }
125
+ return chunks;
126
+ }
127
+ /**
128
+ * Paragraph-aware chunking
129
+ */
130
+ function chunkByParagraph(text, config) {
131
+ const { maxChunkSize, minChunkSize } = config;
132
+ // Split by paragraph breaks
133
+ const paragraphs = text.split(PARAGRAPH_BREAKS).filter(p => p.trim().length > 0);
134
+ const chunks = [];
135
+ let currentChunk = '';
136
+ let currentStart = 0;
137
+ let index = 0;
138
+ let textPos = 0;
139
+ for (const paragraph of paragraphs) {
140
+ const trimmedPara = paragraph.trim();
141
+ // If single paragraph exceeds max, fall back to sentence chunking
142
+ if (trimmedPara.length > maxChunkSize) {
143
+ if (currentChunk.length > 0) {
144
+ chunks.push({
145
+ text: currentChunk.trim(),
146
+ index,
147
+ startPos: currentStart,
148
+ endPos: textPos,
149
+ length: currentChunk.length,
150
+ tokenCount: Math.ceil(currentChunk.length / 4),
151
+ });
152
+ index++;
153
+ }
154
+ // Chunk the large paragraph by sentence
155
+ const subChunks = chunkBySentence(trimmedPara, config);
156
+ for (const subChunk of subChunks) {
157
+ chunks.push({
158
+ ...subChunk,
159
+ index,
160
+ startPos: textPos + subChunk.startPos,
161
+ endPos: textPos + subChunk.endPos,
162
+ });
163
+ index++;
164
+ }
165
+ currentChunk = '';
166
+ currentStart = textPos + trimmedPara.length;
167
+ }
168
+ else if (currentChunk.length + trimmedPara.length > maxChunkSize && currentChunk.length >= minChunkSize) {
169
+ chunks.push({
170
+ text: currentChunk.trim(),
171
+ index,
172
+ startPos: currentStart,
173
+ endPos: textPos,
174
+ length: currentChunk.length,
175
+ tokenCount: Math.ceil(currentChunk.length / 4),
176
+ });
177
+ currentChunk = trimmedPara;
178
+ currentStart = textPos;
179
+ index++;
180
+ }
181
+ else {
182
+ currentChunk += (currentChunk.length > 0 ? '\n\n' : '') + trimmedPara;
183
+ }
184
+ textPos += trimmedPara.length + 2; // +2 for paragraph break
185
+ }
186
+ // Add final chunk
187
+ if (currentChunk.trim().length > 0) {
188
+ chunks.push({
189
+ text: currentChunk.trim(),
190
+ index,
191
+ startPos: currentStart,
192
+ endPos: text.length,
193
+ length: currentChunk.length,
194
+ tokenCount: Math.ceil(currentChunk.length / 4),
195
+ });
196
+ }
197
+ return chunks;
198
+ }
199
+ /**
200
+ * Token-based chunking (approximate - uses chars/4 as estimate)
201
+ */
202
+ function chunkByToken(text, config) {
203
+ // Convert token limits to character limits (rough estimate: 1 token ≈ 4 chars)
204
+ const charConfig = {
205
+ ...config,
206
+ maxChunkSize: config.maxChunkSize * 4,
207
+ overlap: config.overlap * 4,
208
+ minChunkSize: config.minChunkSize * 4,
209
+ };
210
+ // Use sentence-aware chunking with converted limits
211
+ return chunkBySentence(text, charConfig);
212
+ }
213
+ /**
214
+ * Estimate token count for text
215
+ */
216
+ export function estimateTokens(text) {
217
+ // Simple estimation: ~4 characters per token on average
218
+ return Math.ceil(text.length / 4);
219
+ }
220
+ /**
221
+ * Reconstruct original text from chunks (approximate)
222
+ */
223
+ export function reconstructFromChunks(chunks) {
224
+ if (chunks.length === 0)
225
+ return '';
226
+ if (chunks.length === 1)
227
+ return chunks[0].text;
228
+ // Sort by index
229
+ const sorted = [...chunks].sort((a, b) => a.index - b.index);
230
+ // Simple concatenation (overlap removal is approximate)
231
+ let result = sorted[0].text;
232
+ for (let i = 1; i < sorted.length; i++) {
233
+ const chunk = sorted[i];
234
+ const prevChunk = sorted[i - 1];
235
+ // Find overlap by looking for common suffix/prefix
236
+ const overlapSize = Math.min(100, prevChunk.text.length, chunk.text.length);
237
+ const prevSuffix = prevChunk.text.slice(-overlapSize);
238
+ const currPrefix = chunk.text.slice(0, overlapSize);
239
+ // Find longest common overlap
240
+ let overlap = 0;
241
+ for (let len = overlapSize; len > 0; len--) {
242
+ if (currPrefix.startsWith(prevSuffix.slice(-len))) {
243
+ overlap = len;
244
+ break;
245
+ }
246
+ }
247
+ result += ' ' + chunk.text.slice(overlap);
248
+ }
249
+ return result.replace(/\s+/g, ' ').trim();
250
+ }
251
+ //# sourceMappingURL=chunking.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"chunking.js","sourceRoot":"","sources":["../src/chunking.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAkDH,6BAA6B;AAC7B,MAAM,gBAAgB,GAAG,yBAAyB,CAAC;AACnD,MAAM,gBAAgB,GAAG,QAAQ,CAAC;AAElC;;GAEG;AACH,MAAM,UAAU,SAAS,CACvB,IAAY,EACZ,SAAyB,EAAE;IAE3B,MAAM,WAAW,GAA6B;QAC5C,YAAY,EAAE,MAAM,CAAC,YAAY,IAAI,GAAG;QACxC,OAAO,EAAE,MAAM,CAAC,OAAO,IAAI,EAAE;QAC7B,QAAQ,EAAE,MAAM,CAAC,QAAQ,IAAI,UAAU;QACvC,YAAY,EAAE,MAAM,CAAC,YAAY,IAAI,GAAG;QACxC,eAAe,EAAE,MAAM,CAAC,eAAe,IAAI,IAAI;KAChD,CAAC;IAEF,uBAAuB;IACvB,MAAM,cAAc,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;IAExD,IAAI,MAAe,CAAC;IAEpB,QAAQ,WAAW,CAAC,QAAQ,EAAE,CAAC;QAC7B,KAAK,WAAW;YACd,MAAM,GAAG,gBAAgB,CAAC,cAAc,EAAE,WAAW,CAAC,CAAC;YACvD,MAAM;QACR,KAAK,UAAU;YACb,MAAM,GAAG,eAAe,CAAC,cAAc,EAAE,WAAW,CAAC,CAAC;YACtD,MAAM;QACR,KAAK,WAAW;YACd,MAAM,GAAG,gBAAgB,CAAC,IAAI,EAAE,WAAW,CAAC,CAAC,CAAC,+BAA+B;YAC7E,MAAM;QACR,KAAK,OAAO;YACV,MAAM,GAAG,YAAY,CAAC,cAAc,EAAE,WAAW,CAAC,CAAC;YACnD,MAAM;QACR;YACE,MAAM,GAAG,eAAe,CAAC,cAAc,EAAE,WAAW,CAAC,CAAC;IAC1D,CAAC;IAED,OAAO;QACL,MAAM;QACN,cAAc,EAAE,IAAI,CAAC,MAAM;QAC3B,WAAW,EAAE,MAAM,CAAC,MAAM;QAC1B,MAAM,EAAE,WAAW;KACpB,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAS,gBAAgB,CACvB,IAAY,EACZ,MAAgC;IAEhC,MAAM,MAAM,GAAY,EAAE,CAAC;IAC3B,MAAM,EAAE,YAAY,EAAE,OAAO,EAAE,GAAG,MAAM,CAAC;IAEzC,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,IAAI,KAAK,GAAG,CAAC,CAAC;IAEd,OAAO,GAAG,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;QACzB,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,GAAG,YAAY,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;QACzD,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC;QAE1C,MAAM,CAAC,IAAI,CAAC;YACV,IAAI,EAAE,SAAS;YACf,KAAK;YACL,QAAQ,EAAE,GAAG;YACb,MAAM;YACN,MAAM,EAAE,SAAS,CAAC,MAAM;YACxB,UAAU,EAAE,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC;SAC5C,CAAC,CAAC;QAEH,6BAA6B;QAC7B,GAAG,GAAG,MAAM,GAAG,OAAO,CAAC;QACvB,IAAI,GAAG,IAAI,IAAI,CAAC,MAAM,GAAG,OAAO,EAAE,CAAC;YACjC,MAAM;QACR,CAAC;QACD,KAAK,EAAE,CAAC;IACV,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,SAAS,eAAe,CACtB,IAAY,EACZ,MAAgC;IAEhC,MAAM,EAAE,YAAY,EAAE,OAAO,EAAE,YAAY,EAAE,GAAG,MAAM,CAAC;IAEvD,uBAAuB;IACvB,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAEhF,MAAM,MAAM,GAAY,EAAE,CAAC;IAC3B,IAAI,YAAY,GAAG,EAAE,CAAC;IACtB,IAAI,YAAY,GAAG,CAAC,CAAC;IACrB,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,IAAI,OAAO,GAAG,CAAC,CAAC;IAEhB,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;QACjC,MAAM,eAAe,GAAG,QAAQ,CAAC,IAAI,EAAE,CAAC;QAExC,+DAA+D;QAC/D,IAAI,YAAY,CAAC,MAAM,GAAG,eAAe,CAAC,MAAM,GAAG,YAAY,IAAI,YAAY,CAAC,MAAM,IAAI,YAAY,EAAE,CAAC;YACvG,MAAM,CAAC,IAAI,CAAC;gBACV,IAAI,EAAE,YAAY,CAAC,IAAI,EAAE;gBACzB,KAAK;gBACL,QAAQ,EAAE,YAAY;gBACtB,MAAM,EAAE,OAAO;gBACf,MAAM,EAAE,YAAY,CAAC,MAAM;gBAC3B,UAAU,EAAE,IAAI,CAAC,IAAI,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC;aAC/C,CAAC,CAAC;YAEH,6DAA6D;YAC7D,MAAM,WAAW,GAAG,YAAY,CAAC,KAAK,CAAC,CAAC,OAAO,CAAC,CAAC;YACjD,YAAY,GAAG,WAAW,GAAG,GAAG,GAAG,eAAe,CAAC;YACnD,YAAY,GAAG,OAAO,GAAG,OAAO,CAAC;YACjC,KAAK,EAAE,CAAC;QACV,CAAC;aAAM,CAAC;YACN,YAAY,IAAI,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,GAAG,eAAe,CAAC;QACzE,CAAC;QAED,OAAO,IAAI,eAAe,CAAC,MAAM,GAAG,CAAC,CAAC;IACxC,CAAC;IAED,kBAAkB;IAClB,IAAI,YAAY,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACnC,MAAM,CAAC,IAAI,CAAC;YACV,IAAI,EAAE,YAAY,CAAC,IAAI,EAAE;YACzB,KAAK;YACL,QAAQ,EAAE,YAAY;YACtB,MAAM,EAAE,IAAI,CAAC,MAAM;YACnB,MAAM,EAAE,YAAY,CAAC,MAAM;YAC3B,UAAU,EAAE,IAAI,CAAC,IAAI,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC;SAC/C,CAAC,CAAC;IACL,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,SAAS,gBAAgB,CACvB,IAAY,EACZ,MAAgC;IAEhC,MAAM,EAAE,YAAY,EAAE,YAAY,EAAE,GAAG,MAAM,CAAC;IAE9C,4BAA4B;IAC5B,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAEjF,MAAM,MAAM,GAAY,EAAE,CAAC;IAC3B,IAAI,YAAY,GAAG,EAAE,CAAC;IACtB,IAAI,YAAY,GAAG,CAAC,CAAC;IACrB,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,IAAI,OAAO,GAAG,CAAC,CAAC;IAEhB,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;QACnC,MAAM,WAAW,GAAG,SAAS,CAAC,IAAI,EAAE,CAAC;QAErC,kEAAkE;QAClE,IAAI,WAAW,CAAC,MAAM,GAAG,YAAY,EAAE,CAAC;YACtC,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC5B,MAAM,CAAC,IAAI,CAAC;oBACV,IAAI,EAAE,YAAY,CAAC,IAAI,EAAE;oBACzB,KAAK;oBACL,QAAQ,EAAE,YAAY;oBACtB,MAAM,EAAE,OAAO;oBACf,MAAM,EAAE,YAAY,CAAC,MAAM;oBAC3B,UAAU,EAAE,IAAI,CAAC,IAAI,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC;iBAC/C,CAAC,CAAC;gBACH,KAAK,EAAE,CAAC;YACV,CAAC;YAED,wCAAwC;YACxC,MAAM,SAAS,GAAG,eAAe,CAAC,WAAW,EAAE,MAAM,CAAC,CAAC;YACvD,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;gBACjC,MAAM,CAAC,IAAI,CAAC;oBACV,GAAG,QAAQ;oBACX,KAAK;oBACL,QAAQ,EAAE,OAAO,GAAG,QAAQ,CAAC,QAAQ;oBACrC,MAAM,EAAE,OAAO,GAAG,QAAQ,CAAC,MAAM;iBAClC,CAAC,CAAC;gBACH,KAAK,EAAE,CAAC;YACV,CAAC;YAED,YAAY,GAAG,EAAE,CAAC;YAClB,YAAY,GAAG,OAAO,GAAG,WAAW,CAAC,MAAM,CAAC;QAC9C,CAAC;aAAM,IAAI,YAAY,CAAC,MAAM,GAAG,WAAW,CAAC,MAAM,GAAG,YAAY,IAAI,YAAY,CAAC,MAAM,IAAI,YAAY,EAAE,CAAC;YAC1G,MAAM,CAAC,IAAI,CAAC;gBACV,IAAI,EAAE,YAAY,CAAC,IAAI,EAAE;gBACzB,KAAK;gBACL,QAAQ,EAAE,YAAY;gBACtB,MAAM,EAAE,OAAO;gBACf,MAAM,EAAE,YAAY,CAAC,MAAM;gBAC3B,UAAU,EAAE,IAAI,CAAC,IAAI,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC;aAC/C,CAAC,CAAC;YAEH,YAAY,GAAG,WAAW,CAAC;YAC3B,YAAY,GAAG,OAAO,CAAC;YACvB,KAAK,EAAE,CAAC;QACV,CAAC;aAAM,CAAC;YACN,YAAY,IAAI,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,GAAG,WAAW,CAAC;QACxE,CAAC;QAED,OAAO,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,yBAAyB;IAC9D,CAAC;IAED,kBAAkB;IAClB,IAAI,YAAY,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACnC,MAAM,CAAC,IAAI,CAAC;YACV,IAAI,EAAE,YAAY,CAAC,IAAI,EAAE;YACzB,KAAK;YACL,QAAQ,EAAE,YAAY;YACtB,MAAM,EAAE,IAAI,CAAC,MAAM;YACnB,MAAM,EAAE,YAAY,CAAC,MAAM;YAC3B,UAAU,EAAE,IAAI,CAAC,IAAI,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC;SAC/C,CAAC,CAAC;IACL,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,SAAS,YAAY,CACnB,IAAY,EACZ,MAAgC;IAEhC,+EAA+E;IAC/E,MAAM,UAAU,GAA6B;QAC3C,GAAG,MAAM;QACT,YAAY,EAAE,MAAM,CAAC,YAAY,GAAG,CAAC;QACrC,OAAO,EAAE,MAAM,CAAC,OAAO,GAAG,CAAC;QAC3B,YAAY,EAAE,MAAM,CAAC,YAAY,GAAG,CAAC;KACtC,CAAC;IAEF,oDAAoD;IACpD,OAAO,eAAe,CAAC,IAAI,EAAE,UAAU,CAAC,CAAC;AAC3C,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,wDAAwD;IACxD,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;AACpC,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,qBAAqB,CAAC,MAAe;IACnD,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IACnC,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IAE/C,gBAAgB;IAChB,MAAM,MAAM,GAAG,CAAC,GAAG,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;IAE7D,wDAAwD;IACxD,IAAI,MAAM,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IAE5B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACvC,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;QACxB,MAAM,SAAS,GAAG,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QAEhC,mDAAmD;QACnD,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,SAAS,CAAC,IAAI,CAAC,MAAM,EAAE,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAC5E,MAAM,UAAU,GAAG,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,WAAW,CAAC,CAAC;QACtD,MAAM,UAAU,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,WAAW,CAAC,CAAC;QAEpD,8BAA8B;QAC9B,IAAI,OAAO,GAAG,CAAC,CAAC;QAChB,KAAK,IAAI,GAAG,GAAG,WAAW,EAAE,GAAG,GAAG,CAAC,EAAE,GAAG,EAAE,EAAE,CAAC;YAC3C,IAAI,UAAU,CAAC,UAAU,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC;gBAClD,OAAO,GAAG,GAAG,CAAC;gBACd,MAAM;YACR,CAAC;QACH,CAAC;QAED,MAAM,IAAI,GAAG,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IAC5C,CAAC;IAED,OAAO,MAAM,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;AAC5C,CAAC"}
@@ -0,0 +1,207 @@
1
+ /**
2
+ * V3 Embedding Service Implementation
3
+ *
4
+ * Production embedding service aligned with agentic-flow@alpha:
5
+ * - OpenAI provider (text-embedding-3-small/large)
6
+ * - Transformers.js provider (local ONNX models)
7
+ * - Mock provider (development/testing)
8
+ *
9
+ * Performance Targets:
10
+ * - Single embedding: <100ms (API), <50ms (local)
11
+ * - Batch embedding: <500ms for 10 items
12
+ * - Cache hit: <1ms
13
+ */
14
+ import { EventEmitter } from 'events';
15
+ import type { EmbeddingProvider, EmbeddingConfig, OpenAIEmbeddingConfig, TransformersEmbeddingConfig, MockEmbeddingConfig, AgenticFlowEmbeddingConfig, EmbeddingResult, BatchEmbeddingResult, IEmbeddingService, EmbeddingEvent, EmbeddingEventListener, SimilarityMetric, SimilarityResult, NormalizationType } from './types.js';
16
+ import { PersistentEmbeddingCache } from './persistent-cache.js';
17
+ declare class LRUCache<K, V> {
18
+ private readonly maxSize;
19
+ private cache;
20
+ private hits;
21
+ private misses;
22
+ constructor(maxSize: number);
23
+ get(key: K): V | undefined;
24
+ set(key: K, value: V): void;
25
+ clear(): void;
26
+ get size(): number;
27
+ get hitRate(): number;
28
+ getStats(): {
29
+ size: number;
30
+ maxSize: number;
31
+ hits: number;
32
+ misses: number;
33
+ hitRate: number;
34
+ };
35
+ }
36
+ declare abstract class BaseEmbeddingService extends EventEmitter implements IEmbeddingService {
37
+ protected readonly config: EmbeddingConfig;
38
+ abstract readonly provider: EmbeddingProvider;
39
+ protected cache: LRUCache<string, Float32Array>;
40
+ protected persistentCache: PersistentEmbeddingCache | null;
41
+ protected embeddingListeners: Set<EmbeddingEventListener>;
42
+ protected normalizationType: NormalizationType;
43
+ constructor(config: EmbeddingConfig);
44
+ abstract embed(text: string): Promise<EmbeddingResult>;
45
+ abstract embedBatch(texts: string[]): Promise<BatchEmbeddingResult>;
46
+ /**
47
+ * Apply normalization to embedding if configured
48
+ */
49
+ protected applyNormalization(embedding: Float32Array): Float32Array;
50
+ /**
51
+ * Check persistent cache for embedding
52
+ */
53
+ protected checkPersistentCache(text: string): Promise<Float32Array | null>;
54
+ /**
55
+ * Store embedding in persistent cache
56
+ */
57
+ protected storePersistentCache(text: string, embedding: Float32Array): Promise<void>;
58
+ protected emitEvent(event: EmbeddingEvent): void;
59
+ addEventListener(listener: EmbeddingEventListener): void;
60
+ removeEventListener(listener: EmbeddingEventListener): void;
61
+ clearCache(): void;
62
+ getCacheStats(): {
63
+ size: number;
64
+ maxSize: number;
65
+ hitRate: number;
66
+ };
67
+ shutdown(): Promise<void>;
68
+ }
69
+ export declare class OpenAIEmbeddingService extends BaseEmbeddingService {
70
+ readonly provider: EmbeddingProvider;
71
+ private readonly apiKey;
72
+ private readonly model;
73
+ private readonly baseURL;
74
+ private readonly timeout;
75
+ private readonly maxRetries;
76
+ constructor(config: OpenAIEmbeddingConfig);
77
+ embed(text: string): Promise<EmbeddingResult>;
78
+ embedBatch(texts: string[]): Promise<BatchEmbeddingResult>;
79
+ private callOpenAI;
80
+ }
81
+ export declare class TransformersEmbeddingService extends BaseEmbeddingService {
82
+ readonly provider: EmbeddingProvider;
83
+ private pipeline;
84
+ private readonly modelName;
85
+ private initialized;
86
+ constructor(config: TransformersEmbeddingConfig);
87
+ private initialize;
88
+ embed(text: string): Promise<EmbeddingResult>;
89
+ embedBatch(texts: string[]): Promise<BatchEmbeddingResult>;
90
+ }
91
+ export declare class MockEmbeddingService extends BaseEmbeddingService {
92
+ readonly provider: EmbeddingProvider;
93
+ private readonly dimensions;
94
+ private readonly simulatedLatency;
95
+ constructor(config?: Partial<MockEmbeddingConfig>);
96
+ embed(text: string): Promise<EmbeddingResult>;
97
+ embedBatch(texts: string[]): Promise<BatchEmbeddingResult>;
98
+ /**
99
+ * Generate deterministic hash-based embedding
100
+ */
101
+ private hashEmbedding;
102
+ }
103
+ /**
104
+ * Agentic-Flow embedding service using OptimizedEmbedder
105
+ *
106
+ * Features:
107
+ * - ONNX-based embeddings with SIMD acceleration
108
+ * - 256-entry LRU cache with FNV-1a hash
109
+ * - 8x loop unrolling for cosine similarity
110
+ * - Pre-allocated buffers (no GC pressure)
111
+ * - 3-4x faster batch processing
112
+ */
113
+ export declare class AgenticFlowEmbeddingService extends BaseEmbeddingService {
114
+ readonly provider: EmbeddingProvider;
115
+ private embedder;
116
+ private initialized;
117
+ private readonly modelId;
118
+ private readonly dimensions;
119
+ private readonly embedderCacheSize;
120
+ private readonly modelDir;
121
+ private readonly autoDownload;
122
+ constructor(config: AgenticFlowEmbeddingConfig);
123
+ private initialize;
124
+ embed(text: string): Promise<EmbeddingResult>;
125
+ embedBatch(texts: string[]): Promise<BatchEmbeddingResult>;
126
+ /**
127
+ * Get combined cache statistics from both our LRU cache and embedder's internal cache
128
+ */
129
+ getCacheStats(): {
130
+ size: number;
131
+ maxSize: number;
132
+ hitRate: number;
133
+ } | {
134
+ size: any;
135
+ maxSize: any;
136
+ hitRate: number;
137
+ embedderCache: any;
138
+ };
139
+ shutdown(): Promise<void>;
140
+ }
141
+ /**
142
+ * Create embedding service based on configuration (sync version)
143
+ * Note: For 'auto' provider or smart fallback, use createEmbeddingServiceAsync
144
+ */
145
+ export declare function createEmbeddingService(config: EmbeddingConfig): IEmbeddingService;
146
+ /**
147
+ * Extended config with auto provider option
148
+ */
149
+ export interface AutoEmbeddingConfig {
150
+ /** Provider: 'auto' will pick best available (agentic-flow > transformers > mock) */
151
+ provider: EmbeddingProvider | 'auto';
152
+ /** Fallback provider if primary fails */
153
+ fallback?: EmbeddingProvider;
154
+ /** Auto-install agentic-flow if not available (default: true for 'auto' provider) */
155
+ autoInstall?: boolean;
156
+ /** Model ID for agentic-flow */
157
+ modelId?: string;
158
+ /** Model name for transformers */
159
+ model?: string;
160
+ /** Dimensions */
161
+ dimensions?: number;
162
+ /** Cache size */
163
+ cacheSize?: number;
164
+ /** OpenAI API key (required for openai provider) */
165
+ apiKey?: string;
166
+ }
167
+ /**
168
+ * Create embedding service with automatic provider detection and fallback
169
+ *
170
+ * Features:
171
+ * - 'auto' provider picks best available: agentic-flow > transformers > mock
172
+ * - Automatic fallback if primary provider fails to initialize
173
+ * - Pre-validates provider availability before returning
174
+ *
175
+ * @example
176
+ * // Auto-select best provider
177
+ * const service = await createEmbeddingServiceAsync({ provider: 'auto' });
178
+ *
179
+ * // Try agentic-flow, fallback to transformers
180
+ * const service = await createEmbeddingServiceAsync({
181
+ * provider: 'agentic-flow',
182
+ * fallback: 'transformers'
183
+ * });
184
+ */
185
+ export declare function createEmbeddingServiceAsync(config: AutoEmbeddingConfig): Promise<IEmbeddingService>;
186
+ /**
187
+ * Convenience function for quick embeddings
188
+ */
189
+ export declare function getEmbedding(text: string, config?: Partial<EmbeddingConfig>): Promise<Float32Array | number[]>;
190
+ /**
191
+ * Compute cosine similarity between two embeddings
192
+ */
193
+ export declare function cosineSimilarity(a: Float32Array | number[], b: Float32Array | number[]): number;
194
+ /**
195
+ * Compute Euclidean distance between two embeddings
196
+ */
197
+ export declare function euclideanDistance(a: Float32Array | number[], b: Float32Array | number[]): number;
198
+ /**
199
+ * Compute dot product between two embeddings
200
+ */
201
+ export declare function dotProduct(a: Float32Array | number[], b: Float32Array | number[]): number;
202
+ /**
203
+ * Compute similarity using specified metric
204
+ */
205
+ export declare function computeSimilarity(a: Float32Array | number[], b: Float32Array | number[], metric?: SimilarityMetric): SimilarityResult;
206
+ export {};
207
+ //# sourceMappingURL=embedding-service.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"embedding-service.d.ts","sourceRoot":"","sources":["../src/embedding-service.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,QAAQ,CAAC;AACtC,OAAO,KAAK,EACV,iBAAiB,EACjB,eAAe,EACf,qBAAqB,EACrB,2BAA2B,EAC3B,mBAAmB,EACnB,0BAA0B,EAE1B,eAAe,EACf,oBAAoB,EACpB,iBAAiB,EACjB,cAAc,EACd,sBAAsB,EACtB,gBAAgB,EAChB,gBAAgB,EAChB,iBAAiB,EAElB,MAAM,YAAY,CAAC;AAEpB,OAAO,EAAE,wBAAwB,EAAE,MAAM,uBAAuB,CAAC;AAOjE,cAAM,QAAQ,CAAC,CAAC,EAAE,CAAC;IAKL,OAAO,CAAC,QAAQ,CAAC,OAAO;IAJpC,OAAO,CAAC,KAAK,CAAwB;IACrC,OAAO,CAAC,IAAI,CAAK;IACjB,OAAO,CAAC,MAAM,CAAK;gBAEU,OAAO,EAAE,MAAM;IAE5C,GAAG,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,GAAG,SAAS;IAa1B,GAAG,CAAC,GAAG,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,GAAG,IAAI;IAa3B,KAAK,IAAI,IAAI;IAMb,IAAI,IAAI,IAAI,MAAM,CAEjB;IAED,IAAI,OAAO,IAAI,MAAM,CAGpB;IAED,QAAQ;;;;;;;CAST;AAMD,uBAAe,oBAAqB,SAAQ,YAAa,YAAW,iBAAiB;IAOvE,SAAS,CAAC,QAAQ,CAAC,MAAM,EAAE,eAAe;IANtD,QAAQ,CAAC,QAAQ,CAAC,QAAQ,EAAE,iBAAiB,CAAC;IAC9C,SAAS,CAAC,KAAK,EAAE,QAAQ,CAAC,MAAM,EAAE,YAAY,CAAC,CAAC;IAChD,SAAS,CAAC,eAAe,EAAE,wBAAwB,GAAG,IAAI,CAAQ;IAClE,SAAS,CAAC,kBAAkB,EAAE,GAAG,CAAC,sBAAsB,CAAC,CAAa;IACtE,SAAS,CAAC,iBAAiB,EAAE,iBAAiB,CAAC;gBAEhB,MAAM,EAAE,eAAe;IAgBtD,QAAQ,CAAC,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC;IACtD,QAAQ,CAAC,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,oBAAoB,CAAC;IAEnE;;OAEG;IACH,SAAS,CAAC,kBAAkB,CAAC,SAAS,EAAE,YAAY,GAAG,YAAY;IAOnE;;OAEG;cACa,oBAAoB,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,GAAG,IAAI,CAAC;IAKhF;;OAEG;cACa,oBAAoB,CAAC,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC;IAK1F,SAAS,CAAC,SAAS,CAAC,KAAK,EAAE,cAAc,GAAG,IAAI;IAWhD,gBAAgB,CAAC,QAAQ,EAAE,sBAAsB,GAAG,IAAI;IAIxD,mBAAmB,CAAC,QAAQ,EAAE,sBAAsB,GAAG,IAAI;IAI3D,UAAU,IAAI,IAAI;IAMlB,aAAa;;;;;IASP,QAAQ,IAAI,OAAO,CAAC,IAAI,CAAC;CAIhC;AAMD,qBAAa,sBAAuB,SAAQ,oBAAoB;IAC9D,QAAQ,CAAC,QAAQ,EAAE,iBAAiB,CAAY;IAChD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAS;IAChC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAS;IAC/B,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;IACjC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;IACjC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAS;gBAExB,MAAM,EAAE,qBAAqB;IASnC,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC;IAwC7C,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,oBAAoB,CAAC;YA6DlD,UAAU;CA+CzB;AAMD,qBAAa,4BAA6B,SAAQ,oBAAoB;IACpE,QAAQ,CAAC,QAAQ,EAAE,iBAAiB,CAAkB;IACtD,OAAO,CAAC,QAAQ,CAAa;IAC7B,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,WAAW,CAAS;gBAEhB,MAAM,EAAE,2BAA2B;YAKjC,UAAU;IAYlB,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC;IAsC7C,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,oBAAoB,CAAC;CAoCjE;AAMD,qBAAa,oBAAqB,SAAQ,oBAAoB;IAC5D,QAAQ,CAAC,QAAQ,EAAE,iBAAiB,CAAU;IAC9C,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAS;IACpC,OAAO,CAAC,QAAQ,CAAC,gBAAgB,CAAS;gBAE9B,MAAM,GAAE,OAAO,CAAC,mBAAmB,CAAM;IAa/C,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC;IAgC7C,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,oBAAoB,CAAC;IAiChE;;OAEG;IACH,OAAO,CAAC,aAAa;CAyBtB;AAMD;;;;;;;;;GASG;AACH,qBAAa,2BAA4B,SAAQ,oBAAoB;IACnE,QAAQ,CAAC,QAAQ,EAAE,iBAAiB,CAAkB;IACtD,OAAO,CAAC,QAAQ,CAAa;IAC7B,OAAO,CAAC,WAAW,CAAS;IAC5B,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;IACjC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAS;IACpC,OAAO,CAAC,QAAQ,CAAC,iBAAiB,CAAS;IAC3C,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAqB;IAC9C,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAU;gBAE3B,MAAM,EAAE,0BAA0B;YAShC,UAAU;IA+ElB,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC;IAsC7C,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,oBAAoB,CAAC;IAuDhE;;OAEG;IACM,aAAa;;;;;;;;;;IAgBP,QAAQ,IAAI,OAAO,CAAC,IAAI,CAAC;CAMzC;AA+CD;;;GAGG;AACH,wBAAgB,sBAAsB,CAAC,MAAM,EAAE,eAAe,GAAG,iBAAiB,CAgBjF;AAED;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAClC,qFAAqF;IACrF,QAAQ,EAAE,iBAAiB,GAAG,MAAM,CAAC;IACrC,yCAAyC;IACzC,QAAQ,CAAC,EAAE,iBAAiB,CAAC;IAC7B,qFAAqF;IACrF,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,gCAAgC;IAChC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,kCAAkC;IAClC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,iBAAiB;IACjB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,iBAAiB;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,oDAAoD;IACpD,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED;;;;;;;;;;;;;;;;;GAiBG;AACH,wBAAsB,2BAA2B,CAC/C,MAAM,EAAE,mBAAmB,GAC1B,OAAO,CAAC,iBAAiB,CAAC,CAsH5B;AAED;;GAEG;AACH,wBAAsB,YAAY,CAChC,IAAI,EAAE,MAAM,EACZ,MAAM,CAAC,EAAE,OAAO,CAAC,eAAe,CAAC,GAChC,OAAO,CAAC,YAAY,GAAG,MAAM,EAAE,CAAC,CAalC;AAMD;;GAEG;AACH,wBAAgB,gBAAgB,CAC9B,CAAC,EAAE,YAAY,GAAG,MAAM,EAAE,EAC1B,CAAC,EAAE,YAAY,GAAG,MAAM,EAAE,GACzB,MAAM,CAiBR;AAED;;GAEG;AACH,wBAAgB,iBAAiB,CAC/B,CAAC,EAAE,YAAY,GAAG,MAAM,EAAE,EAC1B,CAAC,EAAE,YAAY,GAAG,MAAM,EAAE,GACzB,MAAM,CAYR;AAED;;GAEG;AACH,wBAAgB,UAAU,CACxB,CAAC,EAAE,YAAY,GAAG,MAAM,EAAE,EAC1B,CAAC,EAAE,YAAY,GAAG,MAAM,EAAE,GACzB,MAAM,CAWR;AAED;;GAEG;AACH,wBAAgB,iBAAiB,CAC/B,CAAC,EAAE,YAAY,GAAG,MAAM,EAAE,EAC1B,CAAC,EAAE,YAAY,GAAG,MAAM,EAAE,EAC1B,MAAM,GAAE,gBAA2B,GAClC,gBAAgB,CAYlB"}