@matperez/coderag 0.1.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +154 -0
- package/dist/.tsbuildinfo +1 -0
- package/dist/ast-chunking.d.ts +40 -0
- package/dist/ast-chunking.d.ts.map +1 -0
- package/dist/ast-chunking.js +88 -0
- package/dist/ast-chunking.js.map +1 -0
- package/dist/ast-chunking.test.d.ts +5 -0
- package/dist/ast-chunking.test.d.ts.map +1 -0
- package/dist/ast-chunking.test.js +173 -0
- package/dist/ast-chunking.test.js.map +1 -0
- package/dist/code-tokenizer.d.ts +62 -0
- package/dist/code-tokenizer.d.ts.map +1 -0
- package/dist/code-tokenizer.js +129 -0
- package/dist/code-tokenizer.js.map +1 -0
- package/dist/code-tokenizer.test.d.ts +5 -0
- package/dist/code-tokenizer.test.d.ts.map +1 -0
- package/dist/code-tokenizer.test.js +96 -0
- package/dist/code-tokenizer.test.js.map +1 -0
- package/dist/db/client-pg.d.ts +16 -0
- package/dist/db/client-pg.d.ts.map +1 -0
- package/dist/db/client-pg.js +38 -0
- package/dist/db/client-pg.js.map +1 -0
- package/dist/db/client.d.ts +36 -0
- package/dist/db/client.d.ts.map +1 -0
- package/dist/db/client.js +81 -0
- package/dist/db/client.js.map +1 -0
- package/dist/db/migrations-pg.d.ts +6 -0
- package/dist/db/migrations-pg.d.ts.map +1 -0
- package/dist/db/migrations-pg.js +88 -0
- package/dist/db/migrations-pg.js.map +1 -0
- package/dist/db/migrations.d.ts +9 -0
- package/dist/db/migrations.d.ts.map +1 -0
- package/dist/db/migrations.js +164 -0
- package/dist/db/migrations.js.map +1 -0
- package/dist/db/schema-pg.d.ts +611 -0
- package/dist/db/schema-pg.d.ts.map +1 -0
- package/dist/db/schema-pg.js +66 -0
- package/dist/db/schema-pg.js.map +1 -0
- package/dist/db/schema.d.ts +630 -0
- package/dist/db/schema.d.ts.map +1 -0
- package/dist/db/schema.js +85 -0
- package/dist/db/schema.js.map +1 -0
- package/dist/embeddings.d.ts +92 -0
- package/dist/embeddings.d.ts.map +1 -0
- package/dist/embeddings.js +275 -0
- package/dist/embeddings.js.map +1 -0
- package/dist/embeddings.test.d.ts +5 -0
- package/dist/embeddings.test.d.ts.map +1 -0
- package/dist/embeddings.test.js +255 -0
- package/dist/embeddings.test.js.map +1 -0
- package/dist/hybrid-search.d.ts +47 -0
- package/dist/hybrid-search.d.ts.map +1 -0
- package/dist/hybrid-search.js +215 -0
- package/dist/hybrid-search.js.map +1 -0
- package/dist/hybrid-search.test.d.ts +5 -0
- package/dist/hybrid-search.test.d.ts.map +1 -0
- package/dist/hybrid-search.test.js +252 -0
- package/dist/hybrid-search.test.js.map +1 -0
- package/dist/incremental-tfidf.d.ts +77 -0
- package/dist/incremental-tfidf.d.ts.map +1 -0
- package/dist/incremental-tfidf.js +248 -0
- package/dist/incremental-tfidf.js.map +1 -0
- package/dist/incremental-tfidf.test.d.ts +5 -0
- package/dist/incremental-tfidf.test.d.ts.map +1 -0
- package/dist/incremental-tfidf.test.js +276 -0
- package/dist/incremental-tfidf.test.js.map +1 -0
- package/dist/index.d.ts +18 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +19 -0
- package/dist/index.js.map +1 -0
- package/dist/indexer.d.ts +205 -0
- package/dist/indexer.d.ts.map +1 -0
- package/dist/indexer.js +1331 -0
- package/dist/indexer.js.map +1 -0
- package/dist/indexer.test.d.ts +12 -0
- package/dist/indexer.test.d.ts.map +1 -0
- package/dist/indexer.test.js +471 -0
- package/dist/indexer.test.js.map +1 -0
- package/dist/language-config.d.ts +54 -0
- package/dist/language-config.d.ts.map +1 -0
- package/dist/language-config.js +75 -0
- package/dist/language-config.js.map +1 -0
- package/dist/search-cache.d.ts +63 -0
- package/dist/search-cache.d.ts.map +1 -0
- package/dist/search-cache.js +118 -0
- package/dist/search-cache.js.map +1 -0
- package/dist/search-cache.test.d.ts +5 -0
- package/dist/search-cache.test.d.ts.map +1 -0
- package/dist/search-cache.test.js +194 -0
- package/dist/search-cache.test.js.map +1 -0
- package/dist/storage-factory.d.ts +11 -0
- package/dist/storage-factory.d.ts.map +1 -0
- package/dist/storage-factory.js +17 -0
- package/dist/storage-factory.js.map +1 -0
- package/dist/storage-persistent-pg.d.ts +75 -0
- package/dist/storage-persistent-pg.d.ts.map +1 -0
- package/dist/storage-persistent-pg.js +579 -0
- package/dist/storage-persistent-pg.js.map +1 -0
- package/dist/storage-persistent-pg.test.d.ts +7 -0
- package/dist/storage-persistent-pg.test.d.ts.map +1 -0
- package/dist/storage-persistent-pg.test.js +90 -0
- package/dist/storage-persistent-pg.test.js.map +1 -0
- package/dist/storage-persistent-types.d.ts +110 -0
- package/dist/storage-persistent-types.d.ts.map +1 -0
- package/dist/storage-persistent-types.js +5 -0
- package/dist/storage-persistent-types.js.map +1 -0
- package/dist/storage-persistent.d.ts +231 -0
- package/dist/storage-persistent.d.ts.map +1 -0
- package/dist/storage-persistent.js +897 -0
- package/dist/storage-persistent.js.map +1 -0
- package/dist/storage-persistent.test.d.ts +5 -0
- package/dist/storage-persistent.test.d.ts.map +1 -0
- package/dist/storage-persistent.test.js +325 -0
- package/dist/storage-persistent.test.js.map +1 -0
- package/dist/storage.d.ts +63 -0
- package/dist/storage.d.ts.map +1 -0
- package/dist/storage.js +67 -0
- package/dist/storage.js.map +1 -0
- package/dist/storage.test.d.ts +5 -0
- package/dist/storage.test.d.ts.map +1 -0
- package/dist/storage.test.js +157 -0
- package/dist/storage.test.js.map +1 -0
- package/dist/tfidf.d.ts +97 -0
- package/dist/tfidf.d.ts.map +1 -0
- package/dist/tfidf.js +308 -0
- package/dist/tfidf.js.map +1 -0
- package/dist/tfidf.test.d.ts +5 -0
- package/dist/tfidf.test.d.ts.map +1 -0
- package/dist/tfidf.test.js +181 -0
- package/dist/tfidf.test.js.map +1 -0
- package/dist/utils.d.ts +61 -0
- package/dist/utils.d.ts.map +1 -0
- package/dist/utils.js +264 -0
- package/dist/utils.js.map +1 -0
- package/dist/utils.test.d.ts +5 -0
- package/dist/utils.test.d.ts.map +1 -0
- package/dist/utils.test.js +94 -0
- package/dist/utils.test.js.map +1 -0
- package/dist/vector-storage.d.ts +120 -0
- package/dist/vector-storage.d.ts.map +1 -0
- package/dist/vector-storage.js +264 -0
- package/dist/vector-storage.js.map +1 -0
- package/dist/vector-storage.test.d.ts +5 -0
- package/dist/vector-storage.test.d.ts.map +1 -0
- package/dist/vector-storage.test.js +345 -0
- package/dist/vector-storage.test.js.map +1 -0
- package/package.json +85 -0
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests for in-memory storage
|
|
3
|
+
*/
|
|
4
|
+
import { beforeEach, describe, expect, it } from 'vitest';
|
|
5
|
+
import { MemoryStorage } from './storage.js';
|
|
6
|
+
describe('MemoryStorage', () => {
|
|
7
|
+
let storage;
|
|
8
|
+
beforeEach(() => {
|
|
9
|
+
storage = new MemoryStorage();
|
|
10
|
+
});
|
|
11
|
+
describe('storeFile', () => {
|
|
12
|
+
it('should store a file', async () => {
|
|
13
|
+
const file = {
|
|
14
|
+
path: 'test.ts',
|
|
15
|
+
content: 'console.log("test");',
|
|
16
|
+
size: 100,
|
|
17
|
+
mtime: Date.now(),
|
|
18
|
+
language: 'TypeScript',
|
|
19
|
+
hash: 'abc123',
|
|
20
|
+
};
|
|
21
|
+
await storage.storeFile(file);
|
|
22
|
+
const retrieved = await storage.getFile('test.ts');
|
|
23
|
+
expect(retrieved).toEqual(file);
|
|
24
|
+
});
|
|
25
|
+
it('should overwrite existing file', async () => {
|
|
26
|
+
const file1 = {
|
|
27
|
+
path: 'test.ts',
|
|
28
|
+
content: 'old content',
|
|
29
|
+
size: 100,
|
|
30
|
+
mtime: Date.now(),
|
|
31
|
+
hash: 'hash1',
|
|
32
|
+
};
|
|
33
|
+
const file2 = {
|
|
34
|
+
path: 'test.ts',
|
|
35
|
+
content: 'new content',
|
|
36
|
+
size: 150,
|
|
37
|
+
mtime: Date.now(),
|
|
38
|
+
hash: 'hash2',
|
|
39
|
+
};
|
|
40
|
+
await storage.storeFile(file1);
|
|
41
|
+
await storage.storeFile(file2);
|
|
42
|
+
const retrieved = await storage.getFile('test.ts');
|
|
43
|
+
expect(retrieved?.content).toBe('new content');
|
|
44
|
+
expect(retrieved?.hash).toBe('hash2');
|
|
45
|
+
});
|
|
46
|
+
});
|
|
47
|
+
describe('getFile', () => {
|
|
48
|
+
it('should retrieve stored file', async () => {
|
|
49
|
+
const file = {
|
|
50
|
+
path: 'test.ts',
|
|
51
|
+
content: 'test',
|
|
52
|
+
size: 100,
|
|
53
|
+
mtime: Date.now(),
|
|
54
|
+
hash: 'abc',
|
|
55
|
+
};
|
|
56
|
+
await storage.storeFile(file);
|
|
57
|
+
const retrieved = await storage.getFile('test.ts');
|
|
58
|
+
expect(retrieved).toEqual(file);
|
|
59
|
+
});
|
|
60
|
+
it('should return null for non-existent file', async () => {
|
|
61
|
+
const retrieved = await storage.getFile('nonexistent.ts');
|
|
62
|
+
expect(retrieved).toBeNull();
|
|
63
|
+
});
|
|
64
|
+
});
|
|
65
|
+
describe('getAllFiles', () => {
|
|
66
|
+
it('should return all stored files', async () => {
|
|
67
|
+
const files = [
|
|
68
|
+
{ path: 'file1.ts', content: 'a', size: 1, mtime: Date.now(), hash: 'h1' },
|
|
69
|
+
{ path: 'file2.ts', content: 'b', size: 2, mtime: Date.now(), hash: 'h2' },
|
|
70
|
+
{ path: 'file3.ts', content: 'c', size: 3, mtime: Date.now(), hash: 'h3' },
|
|
71
|
+
];
|
|
72
|
+
for (const file of files) {
|
|
73
|
+
await storage.storeFile(file);
|
|
74
|
+
}
|
|
75
|
+
const allFiles = await storage.getAllFiles();
|
|
76
|
+
expect(allFiles).toHaveLength(3);
|
|
77
|
+
expect(allFiles).toEqual(expect.arrayContaining(files));
|
|
78
|
+
});
|
|
79
|
+
it('should return empty array when no files stored', async () => {
|
|
80
|
+
const allFiles = await storage.getAllFiles();
|
|
81
|
+
expect(allFiles).toEqual([]);
|
|
82
|
+
});
|
|
83
|
+
});
|
|
84
|
+
describe('deleteFile', () => {
|
|
85
|
+
it('should delete existing file', async () => {
|
|
86
|
+
const file = {
|
|
87
|
+
path: 'test.ts',
|
|
88
|
+
content: 'test',
|
|
89
|
+
size: 100,
|
|
90
|
+
mtime: Date.now(),
|
|
91
|
+
hash: 'abc',
|
|
92
|
+
};
|
|
93
|
+
await storage.storeFile(file);
|
|
94
|
+
await storage.deleteFile('test.ts');
|
|
95
|
+
const retrieved = await storage.getFile('test.ts');
|
|
96
|
+
expect(retrieved).toBeNull();
|
|
97
|
+
});
|
|
98
|
+
it('should not throw when deleting non-existent file', async () => {
|
|
99
|
+
await expect(async () => {
|
|
100
|
+
await storage.deleteFile('nonexistent.ts');
|
|
101
|
+
}).not.toThrow();
|
|
102
|
+
});
|
|
103
|
+
});
|
|
104
|
+
describe('clear', () => {
|
|
105
|
+
it('should remove all files', async () => {
|
|
106
|
+
const files = [
|
|
107
|
+
{ path: 'file1.ts', content: 'a', size: 1, mtime: Date.now(), hash: 'h1' },
|
|
108
|
+
{ path: 'file2.ts', content: 'b', size: 2, mtime: Date.now(), hash: 'h2' },
|
|
109
|
+
];
|
|
110
|
+
for (const file of files) {
|
|
111
|
+
await storage.storeFile(file);
|
|
112
|
+
}
|
|
113
|
+
await storage.clear();
|
|
114
|
+
const allFiles = await storage.getAllFiles();
|
|
115
|
+
expect(allFiles).toHaveLength(0);
|
|
116
|
+
});
|
|
117
|
+
});
|
|
118
|
+
describe('count', () => {
|
|
119
|
+
it('should return correct file count', async () => {
|
|
120
|
+
expect(await storage.count()).toBe(0);
|
|
121
|
+
await storage.storeFile({
|
|
122
|
+
path: 'file1.ts',
|
|
123
|
+
content: 'a',
|
|
124
|
+
size: 1,
|
|
125
|
+
mtime: Date.now(),
|
|
126
|
+
hash: 'h1',
|
|
127
|
+
});
|
|
128
|
+
expect(await storage.count()).toBe(1);
|
|
129
|
+
await storage.storeFile({
|
|
130
|
+
path: 'file2.ts',
|
|
131
|
+
content: 'b',
|
|
132
|
+
size: 2,
|
|
133
|
+
mtime: Date.now(),
|
|
134
|
+
hash: 'h2',
|
|
135
|
+
});
|
|
136
|
+
expect(await storage.count()).toBe(2);
|
|
137
|
+
await storage.deleteFile('file1.ts');
|
|
138
|
+
expect(await storage.count()).toBe(1);
|
|
139
|
+
});
|
|
140
|
+
});
|
|
141
|
+
describe('exists', () => {
|
|
142
|
+
it('should return true for existing file', async () => {
|
|
143
|
+
await storage.storeFile({
|
|
144
|
+
path: 'test.ts',
|
|
145
|
+
content: 'test',
|
|
146
|
+
size: 100,
|
|
147
|
+
mtime: Date.now(),
|
|
148
|
+
hash: 'abc',
|
|
149
|
+
});
|
|
150
|
+
expect(await storage.exists('test.ts')).toBe(true);
|
|
151
|
+
});
|
|
152
|
+
it('should return false for non-existent file', async () => {
|
|
153
|
+
expect(await storage.exists('nonexistent.ts')).toBe(false);
|
|
154
|
+
});
|
|
155
|
+
});
|
|
156
|
+
});
|
|
157
|
+
//# sourceMappingURL=storage.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"storage.test.js","sourceRoot":"","sources":["../src/storage.test.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,QAAQ,CAAA;AACzD,OAAO,EAAqB,aAAa,EAAE,MAAM,cAAc,CAAA;AAE/D,QAAQ,CAAC,eAAe,EAAE,GAAG,EAAE;IAC9B,IAAI,OAAsB,CAAA;IAE1B,UAAU,CAAC,GAAG,EAAE;QACf,OAAO,GAAG,IAAI,aAAa,EAAE,CAAA;IAC9B,CAAC,CAAC,CAAA;IAEF,QAAQ,CAAC,WAAW,EAAE,GAAG,EAAE;QAC1B,EAAE,CAAC,qBAAqB,EAAE,KAAK,IAAI,EAAE;YACpC,MAAM,IAAI,GAAiB;gBAC1B,IAAI,EAAE,SAAS;gBACf,OAAO,EAAE,sBAAsB;gBAC/B,IAAI,EAAE,GAAG;gBACT,KAAK,EAAE,IAAI,CAAC,GAAG,EAAE;gBACjB,QAAQ,EAAE,YAAY;gBACtB,IAAI,EAAE,QAAQ;aACd,CAAA;YAED,MAAM,OAAO,CAAC,SAAS,CAAC,IAAI,CAAC,CAAA;YAC7B,MAAM,SAAS,GAAG,MAAM,OAAO,CAAC,OAAO,CAAC,SAAS,CAAC,CAAA;YAElD,MAAM,CAAC,SAAS,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,CAAA;QAChC,CAAC,CAAC,CAAA;QAEF,EAAE,CAAC,gCAAgC,EAAE,KAAK,IAAI,EAAE;YAC/C,MAAM,KAAK,GAAiB;gBAC3B,IAAI,EAAE,SAAS;gBACf,OAAO,EAAE,aAAa;gBACtB,IAAI,EAAE,GAAG;gBACT,KAAK,EAAE,IAAI,CAAC,GAAG,EAAE;gBACjB,IAAI,EAAE,OAAO;aACb,CAAA;YAED,MAAM,KAAK,GAAiB;gBAC3B,IAAI,EAAE,SAAS;gBACf,OAAO,EAAE,aAAa;gBACtB,IAAI,EAAE,GAAG;gBACT,KAAK,EAAE,IAAI,CAAC,GAAG,EAAE;gBACjB,IAAI,EAAE,OAAO;aACb,CAAA;YAED,MAAM,OAAO,CAAC,SAAS,CAAC,KAAK,CAAC,CAAA;YAC9B,MAAM,OAAO,CAAC,SAAS,CAAC,KAAK,CAAC,CAAA;YAE9B,MAAM,SAAS,GAAG,MAAM,OAAO,CAAC,OAAO,CAAC,SAAS,CAAC,CAAA;YAClD,MAAM,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAA;YAC9C,MAAM,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;QACtC,CAAC,CAAC,CAAA;IACH,CAAC,CAAC,CAAA;IAEF,QAAQ,CAAC,SAAS,EAAE,GAAG,EAAE;QACxB,EAAE,CAAC,6BAA6B,EAAE,KAAK,IAAI,EAAE;YAC5C,MAAM,IAAI,GAAiB;gBAC1B,IAAI,EAAE,SAAS;gBACf,OAAO,EAAE,MAAM;gBACf,IAAI,EAAE,GAAG;gBACT,KAAK,EAAE,IAAI,CAAC,GAAG,EAAE;gBACjB,IAAI,EAAE,KAAK;aACX,CAAA;YAED,MAAM,OAAO,CAAC,SAAS,CAAC,IAAI,CAAC,CAAA;YAC7B,MAAM,SAAS,GAAG,MAAM,OAAO,CAAC,OAAO,CAAC,SAAS,CAAC,CAAA;YAElD,MAAM,CAAC,SAAS,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,CAAA;QAChC,CAAC,CAAC,CAAA;QAEF,EAAE,CAAC,0CAA0C,EAAE,KAAK,IAAI,EAAE;YACzD,MAAM,SAAS,GAAG,MAAM,OAAO,CAAC,OAAO,CAAC,gBAAgB,CAAC,CAAA;YACzD,MAAM,CAAC,SAAS,CAAC,CAAC,QAAQ,EAAE,CAAA;QAC7B,CAAC,CAAC,CAAA;IACH,CAAC,CAAC,CAAA;IAEF,QAAQ,CAAC,aAAa,EAAE,GAAG,EAAE;QAC5B,EAAE,CAAC,gCAAgC,EAAE,KAAK,IAAI,EAAE;YAC/C,MAAM,KAAK,GAAmB;gBAC7B,EAAE,IAAI,EAAE,UAAU,EAAE,OAAO,EAAE,GAAG,EAAE,IAAI,EAAE,CAAC,EAAE,KAAK,EAAE,IAAI,CAAC,GAAG,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE;gBAC1E,EAAE,IAAI,EAAE,UAAU,EAAE,OAAO,EAAE,GAAG,EAAE,IAAI,EAAE,CAAC,EAAE,KAAK,EAAE,IAAI,CAAC,GAAG,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE;gBAC1E,EAAE,IAAI,EAAE,UAAU,EAAE,OAAO,EAAE,GAAG,EAAE,IAAI,EAAE,CAAC,EAAE,KAAK,EAAE,IAAI,CAAC,GAAG,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE;aAC1E,CAAA;YAED,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;gBAC1B,MAAM,OAAO,CAAC,SAAS,CAAC,IAAI,CAAC,CAAA;YAC9B,CAAC;YAED,MAAM,QAAQ,GAAG,MAAM,OAAO,CAAC,WAAW,EAAE,CAAA;YAC5C,MAAM,CAAC,QAAQ,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;YAChC,MAAM,CAAC,QAAQ,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC,eAAe,CAAC,KAAK,CAAC,CAAC,CAAA;QACxD,CAAC,CAAC,CAAA;QAEF,EAAE,CAAC,gDAAgD,EAAE,KAAK,IAAI,EAAE;YAC/D,MAAM,QAAQ,GAAG,MAAM,OAAO,CAAC,WAAW,EAAE,CAAA;YAC5C,MAAM,CAAC,QAAQ,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAA;QAC7B,CAAC,CAAC,CAAA;IACH,CAAC,CAAC,CAAA;IAEF,QAAQ,CAAC,YAAY,EAAE,GAAG,EAAE;QAC3B,EAAE,CAAC,6BAA6B,EAAE,KAAK,IAAI,EAAE;YAC5C,MAAM,IAAI,GAAiB;gBAC1B,IAAI,EAAE,SAAS;gBACf,OAAO,EAAE,MAAM;gBACf,IAAI,EAAE,GAAG;gBACT,KAAK,EAAE,IAAI,CAAC,GAAG,EAAE;gBACjB,IAAI,EAAE,KAAK;aACX,CAAA;YAED,MAAM,OAAO,CAAC,SAAS,CAAC,IAAI,CAAC,CAAA;YAC7B,MAAM,OAAO,CAAC,UAAU,CAAC,SAAS,CAAC,CAAA;YAEnC,MAAM,SAAS,GAAG,MAAM,OAAO,CAAC,OAAO,CAAC,SAAS,CAAC,CAAA;YAClD,MAAM,CAAC,SAAS,CAAC,CAAC,QAAQ,EAAE,CAAA;QAC7B,CAAC,CAAC,CAAA;QAEF,EAAE,CAAC,kDAAkD,EAAE,KAAK,IAAI,EAAE;YACjE,MAAM,MAAM,CAAC,KAAK,IAAI,EAAE;gBACvB,MAAM,OAAO,CAAC,UAAU,CAAC,gBAAgB,CAAC,CAAA;YAC3C,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,EAAE,CAAA;QACjB,CAAC,CAAC,CAAA;IACH,CAAC,CAAC,CAAA;IAEF,QAAQ,CAAC,OAAO,EAAE,GAAG,EAAE;QACtB,EAAE,CAAC,yBAAyB,EAAE,KAAK,IAAI,EAAE;YACxC,MAAM,KAAK,GAAmB;gBAC7B,EAAE,IAAI,EAAE,UAAU,EAAE,OAAO,EAAE,GAAG,EAAE,IAAI,EAAE,CAAC,EAAE,KAAK,EAAE,IAAI,CAAC,GAAG,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE;gBAC1E,EAAE,IAAI,EAAE,UAAU,EAAE,OAAO,EAAE,GAAG,EAAE,IAAI,EAAE,CAAC,EAAE,KAAK,EAAE,IAAI,CAAC,GAAG,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE;aAC1E,CAAA;YAED,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;gBAC1B,MAAM,OAAO,CAAC,SAAS,CAAC,IAAI,CAAC,CAAA;YAC9B,CAAC;YAED,MAAM,OAAO,CAAC,KAAK,EAAE,CAAA;YAErB,MAAM,QAAQ,GAAG,MAAM,OAAO,CAAC,WAAW,EAAE,CAAA;YAC5C,MAAM,CAAC,QAAQ,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QACjC,CAAC,CAAC,CAAA;IACH,CAAC,CAAC,CAAA;IAEF,QAAQ,CAAC,OAAO,EAAE,GAAG,EAAE;QACtB,EAAE,CAAC,kCAAkC,EAAE,KAAK,IAAI,EAAE;YACjD,MAAM,CAAC,MAAM,OAAO,CAAC,KAAK,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;YAErC,MAAM,OAAO,CAAC,SAAS,CAAC;gBACvB,IAAI,EAAE,UAAU;gBAChB,OAAO,EAAE,GAAG;gBACZ,IAAI,EAAE,CAAC;gBACP,KAAK,EAAE,IAAI,CAAC,GAAG,EAAE;gBACjB,IAAI,EAAE,IAAI;aACV,CAAC,CAAA;YACF,MAAM,CAAC,MAAM,OAAO,CAAC,KAAK,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;YAErC,MAAM,OAAO,CAAC,SAAS,CAAC;gBACvB,IAAI,EAAE,UAAU;gBAChB,OAAO,EAAE,GAAG;gBACZ,IAAI,EAAE,CAAC;gBACP,KAAK,EAAE,IAAI,CAAC,GAAG,EAAE;gBACjB,IAAI,EAAE,IAAI;aACV,CAAC,CAAA;YACF,MAAM,CAAC,MAAM,OAAO,CAAC,KAAK,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;YAErC,MAAM,OAAO,CAAC,UAAU,CAAC,UAAU,CAAC,CAAA;YACpC,MAAM,CAAC,MAAM,OAAO,CAAC,KAAK,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QACtC,CAAC,CAAC,CAAA;IACH,CAAC,CAAC,CAAA;IAEF,QAAQ,CAAC,QAAQ,EAAE,GAAG,EAAE;QACvB,EAAE,CAAC,sCAAsC,EAAE,KAAK,IAAI,EAAE;YACrD,MAAM,OAAO,CAAC,SAAS,CAAC;gBACvB,IAAI,EAAE,SAAS;gBACf,OAAO,EAAE,MAAM;gBACf,IAAI,EAAE,GAAG;gBACT,KAAK,EAAE,IAAI,CAAC,GAAG,EAAE;gBACjB,IAAI,EAAE,KAAK;aACX,CAAC,CAAA;YAEF,MAAM,CAAC,MAAM,OAAO,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QACnD,CAAC,CAAC,CAAA;QAEF,EAAE,CAAC,2CAA2C,EAAE,KAAK,IAAI,EAAE;YAC1D,MAAM,CAAC,MAAM,OAAO,CAAC,MAAM,CAAC,gBAAgB,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;QAC3D,CAAC,CAAC,CAAA;IACH,CAAC,CAAC,CAAA;AACH,CAAC,CAAC,CAAA"}
|
package/dist/tfidf.d.ts
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* BM25 (Best Matching 25) implementation
|
|
3
|
+
* Using StarCoder2 tokenizer for code-aware tokenization
|
|
4
|
+
*
|
|
5
|
+
* BM25 improves on TF-IDF with:
|
|
6
|
+
* 1. Term frequency saturation (k1 parameter) - diminishing returns for repeated terms
|
|
7
|
+
* 2. Document length normalization (b parameter) - adjusts for document length
|
|
8
|
+
*/
|
|
9
|
+
import { initializeTokenizer } from './code-tokenizer.js';
|
|
10
|
+
export { initializeTokenizer };
|
|
11
|
+
export interface DocumentVector {
|
|
12
|
+
uri: string;
|
|
13
|
+
terms: Map<string, number>;
|
|
14
|
+
rawTerms: Map<string, number>;
|
|
15
|
+
magnitude: number;
|
|
16
|
+
}
|
|
17
|
+
export interface SearchIndex {
|
|
18
|
+
documents: DocumentVector[];
|
|
19
|
+
idf: Map<string, number>;
|
|
20
|
+
totalDocuments: number;
|
|
21
|
+
metadata: {
|
|
22
|
+
generatedAt: string;
|
|
23
|
+
version: string;
|
|
24
|
+
};
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Tokenize code using StarCoder2 (async)
|
|
28
|
+
*/
|
|
29
|
+
export declare function tokenize(text: string): Promise<string[]>;
|
|
30
|
+
/**
|
|
31
|
+
* Build TF-IDF search index from documents (async - uses StarCoder2)
|
|
32
|
+
*/
|
|
33
|
+
export declare function buildSearchIndex(documents: Array<{
|
|
34
|
+
uri: string;
|
|
35
|
+
content: string;
|
|
36
|
+
}>): Promise<SearchIndex>;
|
|
37
|
+
/**
|
|
38
|
+
* Calculate cosine similarity between query and document
|
|
39
|
+
*/
|
|
40
|
+
export declare function calculateCosineSimilarity(queryVector: Map<string, number>, docVector: DocumentVector): number;
|
|
41
|
+
/**
|
|
42
|
+
* Process query into TF-IDF vector (async - uses StarCoder2)
|
|
43
|
+
*/
|
|
44
|
+
export declare function processQuery(query: string, idf: Map<string, number>): Promise<Map<string, number>>;
|
|
45
|
+
/**
|
|
46
|
+
* SQL-based search result from storage
|
|
47
|
+
* Uses pre-computed magnitude and token count for BM25 scoring
|
|
48
|
+
*/
|
|
49
|
+
export interface StorageSearchResult {
|
|
50
|
+
path: string;
|
|
51
|
+
matchedTerms: Map<string, {
|
|
52
|
+
tfidf: number;
|
|
53
|
+
rawFreq: number;
|
|
54
|
+
}>;
|
|
55
|
+
magnitude: number;
|
|
56
|
+
tokenCount: number;
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Search documents using BM25 scoring (SQL-based storage)
|
|
60
|
+
*
|
|
61
|
+
* BM25 formula: score(D,Q) = Σ IDF(qi) * (f(qi,D) * (k1+1)) / (f(qi,D) + k1 * (1 - b + b * |D|/avgdl))
|
|
62
|
+
*
|
|
63
|
+
* Where:
|
|
64
|
+
* - f(qi,D) = raw frequency of term qi in document D
|
|
65
|
+
* - |D| = document length (token count)
|
|
66
|
+
* - avgdl = average document length
|
|
67
|
+
* - k1 = term frequency saturation (default: 1.2)
|
|
68
|
+
* - b = length normalization (default: 0.75)
|
|
69
|
+
*/
|
|
70
|
+
export declare function searchDocumentsFromStorage(query: string, candidates: StorageSearchResult[], idf: Map<string, number>, options?: {
|
|
71
|
+
limit?: number;
|
|
72
|
+
minScore?: number;
|
|
73
|
+
avgDocLength?: number;
|
|
74
|
+
}): Promise<Array<{
|
|
75
|
+
uri: string;
|
|
76
|
+
score: number;
|
|
77
|
+
matchedTerms: string[];
|
|
78
|
+
}>>;
|
|
79
|
+
/**
|
|
80
|
+
* Get query tokens (exported for SQL-based search) - async
|
|
81
|
+
*/
|
|
82
|
+
export declare function getQueryTokens(query: string): Promise<string[]>;
|
|
83
|
+
/**
|
|
84
|
+
* Search documents using BM25 scoring (in-memory index)
|
|
85
|
+
*
|
|
86
|
+
* For in-memory search, document length is calculated from rawTerms.
|
|
87
|
+
* Average document length is calculated from all documents in the index.
|
|
88
|
+
*/
|
|
89
|
+
export declare function searchDocuments(query: string, index: SearchIndex, options?: {
|
|
90
|
+
limit?: number;
|
|
91
|
+
minScore?: number;
|
|
92
|
+
}): Promise<Array<{
|
|
93
|
+
uri: string;
|
|
94
|
+
score: number;
|
|
95
|
+
matchedTerms: string[];
|
|
96
|
+
}>>;
|
|
97
|
+
//# sourceMappingURL=tfidf.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tfidf.d.ts","sourceRoot":"","sources":["../src/tfidf.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,EAAE,mBAAmB,EAAiC,MAAM,qBAAqB,CAAA;AAGxF,OAAO,EAAE,mBAAmB,EAAE,CAAA;AA2B9B,MAAM,WAAW,cAAc;IAC9B,GAAG,EAAE,MAAM,CAAA;IACX,KAAK,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;IAC1B,QAAQ,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;IAC7B,SAAS,EAAE,MAAM,CAAA;CACjB;AAED,MAAM,WAAW,WAAW;IAC3B,SAAS,EAAE,cAAc,EAAE,CAAA;IAC3B,GAAG,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;IACxB,cAAc,EAAE,MAAM,CAAA;IACtB,QAAQ,EAAE;QACT,WAAW,EAAE,MAAM,CAAA;QACnB,OAAO,EAAE,MAAM,CAAA;KACf,CAAA;CACD;AAED;;GAEG;AACH,wBAAsB,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAE9D;AAmFD;;GAEG;AACH,wBAAsB,gBAAgB,CACrC,SAAS,EAAE,KAAK,CAAC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,CAAC,GAChD,OAAO,CAAC,WAAW,CAAC,CAsCtB;AAED;;GAEG;AACH,wBAAgB,yBAAyB,CACxC,WAAW,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,EAChC,SAAS,EAAE,cAAc,GACvB,MAAM,CAiBR;AAED;;GAEG;AACH,wBAAsB,YAAY,CACjC,KAAK,EAAE,MAAM,EACb,GAAG,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,GACtB,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAG9B;AAkBD;;;GAGG;AACH,MAAM,WAAW,mBAAmB;IACnC,IAAI,EAAE,MAAM,CAAA;IACZ,YAAY,EAAE,GAAG,CAAC,MAAM,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAA;KAAE,CAAC,CAAA;IAC7D,SAAS,EAAE,MAAM,CAAA;IACjB,UAAU,EAAE,MAAM,CAAA;CAClB;AAED;;;;;;;;;;;GAWG;AACH,wBAAsB,0BAA0B,CAC/C,KAAK,EAAE,MAAM,EACb,UAAU,EAAE,mBAAmB,EAAE,EACjC,GAAG,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,EACxB,OAAO,GAAE;IACR,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,YAAY,CAAC,EAAE,MAAM,CAAA;CAChB,GACJ,OAAO,CAAC,KAAK,CAAC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,YAAY,EAAE,MAAM,EAAE,CAAA;CAAE,CAAC,CAAC,CAiExE;AAED;;GAEG;AACH,wBAAsB,cAAc,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAErE;AAED;;;;;GAKG;AACH,wBAAsB,eAAe,CACpC,KAAK,EAAE,MAAM,EACb,KAAK,EAAE,WAAW,EAClB,OAAO,GAAE;IACR,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,QAAQ,CAAC,EAAE,MAAM,CAAA;CACZ,GACJ,OAAO,CAAC,KAAK,CAAC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,YAAY,EAAE,MAAM,EAAE,CAAA;CAAE,CAAC,CAAC,CAkExE"}
|
package/dist/tfidf.js
ADDED
|
@@ -0,0 +1,308 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* BM25 (Best Matching 25) implementation
|
|
3
|
+
* Using StarCoder2 tokenizer for code-aware tokenization
|
|
4
|
+
*
|
|
5
|
+
* BM25 improves on TF-IDF with:
|
|
6
|
+
* 1. Term frequency saturation (k1 parameter) - diminishing returns for repeated terms
|
|
7
|
+
* 2. Document length normalization (b parameter) - adjusts for document length
|
|
8
|
+
*/
|
|
9
|
+
import { initializeTokenizer, tokenize as starcoderTokenize } from './code-tokenizer.js';
|
|
10
|
+
// Re-export tokenize for external use
|
|
11
|
+
export { initializeTokenizer };
|
|
12
|
+
// BM25 parameters (Elasticsearch/Lucene defaults)
|
|
13
|
+
const BM25_K1 = 1.2; // Term frequency saturation (1.2-2.0 typical)
|
|
14
|
+
const BM25_B = 0.75; // Length normalization (0.75 typical, 0 = no normalization, 1 = full normalization)
|
|
15
|
+
// Query token cache - avoids re-tokenizing the same query (CPU optimization)
|
|
16
|
+
const queryTokenCache = new Map();
|
|
17
|
+
const QUERY_CACHE_MAX_SIZE = 100;
|
|
18
|
+
async function getCachedQueryTokens(query) {
|
|
19
|
+
const cached = queryTokenCache.get(query);
|
|
20
|
+
if (cached)
|
|
21
|
+
return cached;
|
|
22
|
+
// Tokenize and dedupe
|
|
23
|
+
const tokens = [...new Set(await tokenize(query))];
|
|
24
|
+
// LRU-style eviction: remove oldest if full
|
|
25
|
+
if (queryTokenCache.size >= QUERY_CACHE_MAX_SIZE) {
|
|
26
|
+
const firstKey = queryTokenCache.keys().next().value;
|
|
27
|
+
if (firstKey)
|
|
28
|
+
queryTokenCache.delete(firstKey);
|
|
29
|
+
}
|
|
30
|
+
queryTokenCache.set(query, tokens);
|
|
31
|
+
return tokens;
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Tokenize code using StarCoder2 (async)
|
|
35
|
+
*/
|
|
36
|
+
export async function tokenize(text) {
|
|
37
|
+
return starcoderTokenize(text);
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* Calculate Term Frequency (TF)
|
|
41
|
+
*/
|
|
42
|
+
function calculateTF(termFrequency) {
|
|
43
|
+
const totalTerms = Array.from(termFrequency.values()).reduce((sum, freq) => sum + freq, 0);
|
|
44
|
+
const tf = new Map();
|
|
45
|
+
for (const [term, freq] of termFrequency.entries()) {
|
|
46
|
+
tf.set(term, freq / totalTerms);
|
|
47
|
+
}
|
|
48
|
+
return tf;
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* Calculate Inverse Document Frequency (IDF)
|
|
52
|
+
*/
|
|
53
|
+
function calculateIDF(documents, totalDocuments) {
|
|
54
|
+
const documentFrequency = new Map();
|
|
55
|
+
// Count how many documents contain each term
|
|
56
|
+
for (const doc of documents) {
|
|
57
|
+
const uniqueTerms = new Set(doc.keys());
|
|
58
|
+
for (const term of uniqueTerms) {
|
|
59
|
+
documentFrequency.set(term, (documentFrequency.get(term) || 0) + 1);
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
// Calculate IDF for each term using smoothed formula
|
|
63
|
+
// Standard formula: log(N/df) = 0 when term appears in ALL documents
|
|
64
|
+
// Smoothed formula: log((N+1)/(df+1)) + 1 ensures no term gets IDF=0
|
|
65
|
+
const idf = new Map();
|
|
66
|
+
for (const [term, docFreq] of documentFrequency.entries()) {
|
|
67
|
+
idf.set(term, Math.log((totalDocuments + 1) / (docFreq + 1)) + 1);
|
|
68
|
+
}
|
|
69
|
+
return idf;
|
|
70
|
+
}
|
|
71
|
+
/**
|
|
72
|
+
* Calculate TF-IDF scores for a document
|
|
73
|
+
*/
|
|
74
|
+
function calculateTFIDF(tf, idf) {
|
|
75
|
+
const tfidf = new Map();
|
|
76
|
+
for (const [term, tfScore] of tf.entries()) {
|
|
77
|
+
const idfScore = idf.get(term) || 0;
|
|
78
|
+
tfidf.set(term, tfScore * idfScore);
|
|
79
|
+
}
|
|
80
|
+
return tfidf;
|
|
81
|
+
}
|
|
82
|
+
/**
|
|
83
|
+
* Calculate vector magnitude for cosine similarity
|
|
84
|
+
*/
|
|
85
|
+
function calculateMagnitude(vector) {
|
|
86
|
+
let sum = 0;
|
|
87
|
+
for (const value of vector.values()) {
|
|
88
|
+
sum += value * value;
|
|
89
|
+
}
|
|
90
|
+
return Math.sqrt(sum);
|
|
91
|
+
}
|
|
92
|
+
/**
|
|
93
|
+
* Extract term frequencies from content (async - uses StarCoder2)
|
|
94
|
+
*/
|
|
95
|
+
async function extractTermFrequencies(content) {
|
|
96
|
+
const tokens = await tokenize(content);
|
|
97
|
+
const frequencies = new Map();
|
|
98
|
+
for (const token of tokens) {
|
|
99
|
+
frequencies.set(token, (frequencies.get(token) || 0) + 1);
|
|
100
|
+
}
|
|
101
|
+
return frequencies;
|
|
102
|
+
}
|
|
103
|
+
/**
|
|
104
|
+
* Build TF-IDF search index from documents (async - uses StarCoder2)
|
|
105
|
+
*/
|
|
106
|
+
export async function buildSearchIndex(documents) {
|
|
107
|
+
// Extract term frequencies for all documents
|
|
108
|
+
const documentTerms = await Promise.all(documents.map(async (doc) => ({
|
|
109
|
+
uri: doc.uri,
|
|
110
|
+
terms: await extractTermFrequencies(doc.content),
|
|
111
|
+
})));
|
|
112
|
+
// Calculate IDF scores
|
|
113
|
+
const idf = calculateIDF(documentTerms.map((d) => d.terms), documents.length);
|
|
114
|
+
// Calculate TF-IDF for each document
|
|
115
|
+
const documentVectors = documentTerms.map((doc) => {
|
|
116
|
+
const tf = calculateTF(doc.terms);
|
|
117
|
+
const tfidf = calculateTFIDF(tf, idf);
|
|
118
|
+
const magnitude = calculateMagnitude(tfidf);
|
|
119
|
+
return {
|
|
120
|
+
uri: doc.uri,
|
|
121
|
+
terms: tfidf,
|
|
122
|
+
rawTerms: doc.terms,
|
|
123
|
+
magnitude,
|
|
124
|
+
};
|
|
125
|
+
});
|
|
126
|
+
return {
|
|
127
|
+
documents: documentVectors,
|
|
128
|
+
idf,
|
|
129
|
+
totalDocuments: documents.length,
|
|
130
|
+
metadata: {
|
|
131
|
+
generatedAt: new Date().toISOString(),
|
|
132
|
+
version: '1.0.0',
|
|
133
|
+
},
|
|
134
|
+
};
|
|
135
|
+
}
|
|
136
|
+
/**
|
|
137
|
+
* Calculate cosine similarity between query and document
|
|
138
|
+
*/
|
|
139
|
+
export function calculateCosineSimilarity(queryVector, docVector) {
|
|
140
|
+
let dotProduct = 0;
|
|
141
|
+
// Calculate dot product
|
|
142
|
+
for (const [term, queryScore] of queryVector.entries()) {
|
|
143
|
+
const docScore = docVector.terms.get(term) || 0;
|
|
144
|
+
dotProduct += queryScore * docScore;
|
|
145
|
+
}
|
|
146
|
+
// Calculate query magnitude
|
|
147
|
+
const queryMagnitude = calculateMagnitude(queryVector);
|
|
148
|
+
if (queryMagnitude === 0 || docVector.magnitude === 0) {
|
|
149
|
+
return 0;
|
|
150
|
+
}
|
|
151
|
+
return dotProduct / (queryMagnitude * docVector.magnitude);
|
|
152
|
+
}
|
|
153
|
+
/**
|
|
154
|
+
* Process query into TF-IDF vector (async - uses StarCoder2)
|
|
155
|
+
*/
|
|
156
|
+
export async function processQuery(query, idf) {
|
|
157
|
+
const terms = await tokenize(query);
|
|
158
|
+
return processQueryWithTokens(terms, idf);
|
|
159
|
+
}
|
|
160
|
+
/**
|
|
161
|
+
* Process query from pre-tokenized terms (CPU optimization - avoids re-tokenizing)
|
|
162
|
+
*/
|
|
163
|
+
function processQueryWithTokens(tokens, idf) {
|
|
164
|
+
const queryVector = new Map();
|
|
165
|
+
for (const term of tokens) {
|
|
166
|
+
const idfValue = idf.get(term) || 0;
|
|
167
|
+
if (idfValue > 0) {
|
|
168
|
+
queryVector.set(term, idfValue);
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
return queryVector;
|
|
172
|
+
}
|
|
173
|
+
/**
|
|
174
|
+
* Search documents using BM25 scoring (SQL-based storage)
|
|
175
|
+
*
|
|
176
|
+
* BM25 formula: score(D,Q) = Σ IDF(qi) * (f(qi,D) * (k1+1)) / (f(qi,D) + k1 * (1 - b + b * |D|/avgdl))
|
|
177
|
+
*
|
|
178
|
+
* Where:
|
|
179
|
+
* - f(qi,D) = raw frequency of term qi in document D
|
|
180
|
+
* - |D| = document length (token count)
|
|
181
|
+
* - avgdl = average document length
|
|
182
|
+
* - k1 = term frequency saturation (default: 1.2)
|
|
183
|
+
* - b = length normalization (default: 0.75)
|
|
184
|
+
*/
|
|
185
|
+
export async function searchDocumentsFromStorage(query, candidates, idf, options = {}) {
|
|
186
|
+
const { limit = 10, minScore = 0 } = options;
|
|
187
|
+
// Get query tokens (cached)
|
|
188
|
+
const queryTokens = await getCachedQueryTokens(query);
|
|
189
|
+
if (queryTokens.length === 0) {
|
|
190
|
+
return [];
|
|
191
|
+
}
|
|
192
|
+
// Calculate average document length if not provided
|
|
193
|
+
// Fallback to average of candidates (less accurate but works without global stats)
|
|
194
|
+
let avgDocLength = options.avgDocLength;
|
|
195
|
+
if (!avgDocLength || avgDocLength === 0) {
|
|
196
|
+
const totalTokens = candidates.reduce((sum, c) => sum + (c.tokenCount || 0), 0);
|
|
197
|
+
avgDocLength = candidates.length > 0 ? totalTokens / candidates.length : 1;
|
|
198
|
+
}
|
|
199
|
+
// Ensure avgDocLength is at least 1 to avoid division by zero
|
|
200
|
+
avgDocLength = Math.max(avgDocLength, 1);
|
|
201
|
+
// Score each candidate using BM25
|
|
202
|
+
const results = [];
|
|
203
|
+
let minThreshold = minScore;
|
|
204
|
+
for (const candidate of candidates) {
|
|
205
|
+
// Get matched terms
|
|
206
|
+
const matchedTerms = [];
|
|
207
|
+
for (const term of queryTokens) {
|
|
208
|
+
if (candidate.matchedTerms.has(term)) {
|
|
209
|
+
matchedTerms.push(term);
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
if (matchedTerms.length === 0)
|
|
213
|
+
continue;
|
|
214
|
+
// BM25 scoring
|
|
215
|
+
const docLen = candidate.tokenCount || 1;
|
|
216
|
+
let score = 0;
|
|
217
|
+
for (const term of matchedTerms) {
|
|
218
|
+
const docData = candidate.matchedTerms.get(term);
|
|
219
|
+
if (!docData)
|
|
220
|
+
continue;
|
|
221
|
+
const termFreq = docData.rawFreq;
|
|
222
|
+
const termIdf = idf.get(term) || 0;
|
|
223
|
+
// BM25 term score: IDF * (tf * (k1+1)) / (tf + k1 * (1 - b + b * docLen/avgdl))
|
|
224
|
+
const numerator = termFreq * (BM25_K1 + 1);
|
|
225
|
+
const denominator = termFreq + BM25_K1 * (1 - BM25_B + (BM25_B * docLen) / avgDocLength);
|
|
226
|
+
score += termIdf * (numerator / denominator);
|
|
227
|
+
}
|
|
228
|
+
if (score < minThreshold)
|
|
229
|
+
continue;
|
|
230
|
+
results.push({ uri: `file://${candidate.path}`, score, matchedTerms });
|
|
231
|
+
// Bounded results (optimization for large candidate sets)
|
|
232
|
+
if (results.length >= limit * 2) {
|
|
233
|
+
results.sort((a, b) => b.score - a.score);
|
|
234
|
+
results.length = limit;
|
|
235
|
+
minThreshold = results[results.length - 1].score;
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
return results.sort((a, b) => b.score - a.score).slice(0, limit);
|
|
239
|
+
}
|
|
240
|
+
/**
|
|
241
|
+
* Get query tokens (exported for SQL-based search) - async
|
|
242
|
+
*/
|
|
243
|
+
export async function getQueryTokens(query) {
|
|
244
|
+
return getCachedQueryTokens(query);
|
|
245
|
+
}
|
|
246
|
+
/**
|
|
247
|
+
* Search documents using BM25 scoring (in-memory index)
|
|
248
|
+
*
|
|
249
|
+
* For in-memory search, document length is calculated from rawTerms.
|
|
250
|
+
* Average document length is calculated from all documents in the index.
|
|
251
|
+
*/
|
|
252
|
+
export async function searchDocuments(query, index, options = {}) {
|
|
253
|
+
const { limit = 10, minScore = 0 } = options;
|
|
254
|
+
// Process query with cached tokens (CPU optimization)
|
|
255
|
+
const queryTokens = await getCachedQueryTokens(query);
|
|
256
|
+
if (queryTokens.length === 0) {
|
|
257
|
+
return [];
|
|
258
|
+
}
|
|
259
|
+
// Calculate average document length from index
|
|
260
|
+
let totalTokens = 0;
|
|
261
|
+
for (const doc of index.documents) {
|
|
262
|
+
for (const freq of doc.rawTerms.values()) {
|
|
263
|
+
totalTokens += freq;
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
const avgDocLength = index.documents.length > 0 ? totalTokens / index.documents.length : 1;
|
|
267
|
+
// Score documents using BM25
|
|
268
|
+
const results = [];
|
|
269
|
+
let minThreshold = minScore;
|
|
270
|
+
for (const doc of index.documents) {
|
|
271
|
+
// Get matched terms
|
|
272
|
+
const matchedTerms = [];
|
|
273
|
+
for (const token of queryTokens) {
|
|
274
|
+
if (doc.rawTerms.has(token)) {
|
|
275
|
+
matchedTerms.push(token);
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
if (matchedTerms.length === 0)
|
|
279
|
+
continue;
|
|
280
|
+
// Calculate document length (sum of all term frequencies)
|
|
281
|
+
let docLen = 0;
|
|
282
|
+
for (const freq of doc.rawTerms.values()) {
|
|
283
|
+
docLen += freq;
|
|
284
|
+
}
|
|
285
|
+
docLen = Math.max(docLen, 1); // Avoid division by zero
|
|
286
|
+
// BM25 scoring
|
|
287
|
+
let score = 0;
|
|
288
|
+
for (const term of matchedTerms) {
|
|
289
|
+
const termFreq = doc.rawTerms.get(term) || 0;
|
|
290
|
+
const termIdf = index.idf.get(term) || 0;
|
|
291
|
+
// BM25 term score
|
|
292
|
+
const numerator = termFreq * (BM25_K1 + 1);
|
|
293
|
+
const denominator = termFreq + BM25_K1 * (1 - BM25_B + (BM25_B * docLen) / avgDocLength);
|
|
294
|
+
score += termIdf * (numerator / denominator);
|
|
295
|
+
}
|
|
296
|
+
if (score < minThreshold)
|
|
297
|
+
continue;
|
|
298
|
+
results.push({ uri: doc.uri, score, matchedTerms });
|
|
299
|
+
// Bounded results (optimization)
|
|
300
|
+
if (results.length >= limit * 2) {
|
|
301
|
+
results.sort((a, b) => b.score - a.score);
|
|
302
|
+
results.length = limit;
|
|
303
|
+
minThreshold = results[results.length - 1].score;
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
return results.sort((a, b) => b.score - a.score).slice(0, limit);
|
|
307
|
+
}
|
|
308
|
+
//# sourceMappingURL=tfidf.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tfidf.js","sourceRoot":"","sources":["../src/tfidf.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,EAAE,mBAAmB,EAAE,QAAQ,IAAI,iBAAiB,EAAE,MAAM,qBAAqB,CAAA;AAExF,sCAAsC;AACtC,OAAO,EAAE,mBAAmB,EAAE,CAAA;AAE9B,kDAAkD;AAClD,MAAM,OAAO,GAAG,GAAG,CAAA,CAAC,8CAA8C;AAClE,MAAM,MAAM,GAAG,IAAI,CAAA,CAAC,oFAAoF;AAExG,6EAA6E;AAC7E,MAAM,eAAe,GAAG,IAAI,GAAG,EAAoB,CAAA;AACnD,MAAM,oBAAoB,GAAG,GAAG,CAAA;AAEhC,KAAK,UAAU,oBAAoB,CAAC,KAAa;IAChD,MAAM,MAAM,GAAG,eAAe,CAAC,GAAG,CAAC,KAAK,CAAC,CAAA;IACzC,IAAI,MAAM;QAAE,OAAO,MAAM,CAAA;IAEzB,sBAAsB;IACtB,MAAM,MAAM,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,MAAM,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAA;IAElD,4CAA4C;IAC5C,IAAI,eAAe,CAAC,IAAI,IAAI,oBAAoB,EAAE,CAAC;QAClD,MAAM,QAAQ,GAAG,eAAe,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC,KAAK,CAAA;QACpD,IAAI,QAAQ;YAAE,eAAe,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAA;IAC/C,CAAC;IAED,eAAe,CAAC,GAAG,CAAC,KAAK,EAAE,MAAM,CAAC,CAAA;IAClC,OAAO,MAAM,CAAA;AACd,CAAC;AAmBD;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,QAAQ,CAAC,IAAY;IAC1C,OAAO,iBAAiB,CAAC,IAAI,CAAC,CAAA;AAC/B,CAAC;AAED;;GAEG;AACH,SAAS,WAAW,CAAC,aAAkC;IACtD,MAAM,UAAU,GAAG,KAAK,CAAC,IAAI,CAAC,aAAa,CAAC,MAAM,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,IAAI,EAAE,EAAE,CAAC,GAAG,GAAG,IAAI,EAAE,CAAC,CAAC,CAAA;IAC1F,MAAM,EAAE,GAAG,IAAI,GAAG,EAAkB,CAAA;IAEpC,KAAK,MAAM,CAAC,IAAI,EAAE,IAAI,CAAC,IAAI,aAAa,CAAC,OAAO,EAAE,EAAE,CAAC;QACpD,EAAE,CAAC,GAAG,CAAC,IAAI,EAAE,IAAI,GAAG,UAAU,CAAC,CAAA;IAChC,CAAC;IAED,OAAO,EAAE,CAAA;AACV,CAAC;AAED;;GAEG;AACH,SAAS,YAAY,CACpB,SAAgC,EAChC,cAAsB;IAEtB,MAAM,iBAAiB,GAAG,IAAI,GAAG,EAAkB,CAAA;IAEnD,6CAA6C;IAC7C,KAAK,MAAM,GAAG,IAAI,SAAS,EAAE,CAAC;QAC7B,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,CAAA;QACvC,KAAK,MAAM,IAAI,IAAI,WAAW,EAAE,CAAC;YAChC,iBAAiB,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,iBAAiB,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAA;QACpE,CAAC;IACF,CAAC;IAED,qDAAqD;IACrD,qEAAqE;IACrE,qEAAqE;IACrE,MAAM,GAAG,GAAG,IAAI,GAAG,EAAkB,CAAA;IACrC,KAAK,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC,IAAI,iBAAiB,CAAC,OAAO,EAAE,EAAE,CAAC;QAC3D,GAAG,CAAC,GAAG,CAAC,IAAI,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,cAAc,GAAG,CAAC,CAAC,GAAG,CAAC,OAAO,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAA;IAClE,CAAC;IAED,OAAO,GAAG,CAAA;AACX,CAAC;AAED;;GAEG;AACH,SAAS,cAAc,CAAC,EAAuB,EAAE,GAAwB;IACxE,MAAM,KAAK,GAAG,IAAI,GAAG,EAAkB,CAAA;IAEvC,KAAK,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC,IAAI,EAAE,CAAC,OAAO,EAAE,EAAE,CAAC;QAC5C,MAAM,QAAQ,GAAG,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QACnC,KAAK,CAAC,GAAG,CAAC,IAAI,EAAE,OAAO,GAAG,QAAQ,CAAC,CAAA;IACpC,CAAC;IAED,OAAO,KAAK,CAAA;AACb,CAAC;AAED;;GAEG;AACH,SAAS,kBAAkB,CAAC,MAA2B;IACtD,IAAI,GAAG,GAAG,CAAC,CAAA;IACX,KAAK,MAAM,KAAK,IAAI,MAAM,CAAC,MAAM,EAAE,EAAE,CAAC;QACrC,GAAG,IAAI,KAAK,GAAG,KAAK,CAAA;IACrB,CAAC;IACD,OAAO,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;AACtB,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,sBAAsB,CAAC,OAAe;IACpD,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,OAAO,CAAC,CAAA;IACtC,MAAM,WAAW,GAAG,IAAI,GAAG,EAAkB,CAAA;IAE7C,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC5B,WAAW,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,WAAW,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAA;IAC1D,CAAC;IAED,OAAO,WAAW,CAAA;AACnB,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,gBAAgB,CACrC,SAAkD;IAElD,6CAA6C;IAC7C,MAAM,aAAa,GAAG,MAAM,OAAO,CAAC,GAAG,CACtC,SAAS,CAAC,GAAG,CAAC,KAAK,EAAE,GAAG,EAAE,EAAE,CAAC,CAAC;QAC7B,GAAG,EAAE,GAAG,CAAC,GAAG;QACZ,KAAK,EAAE,MAAM,sBAAsB,CAAC,GAAG,CAAC,OAAO,CAAC;KAChD,CAAC,CAAC,CACH,CAAA;IAED,uBAAuB;IACvB,MAAM,GAAG,GAAG,YAAY,CACvB,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,EACjC,SAAS,CAAC,MAAM,CAChB,CAAA;IAED,qCAAqC;IACrC,MAAM,eAAe,GAAqB,aAAa,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;QACnE,MAAM,EAAE,GAAG,WAAW,CAAC,GAAG,CAAC,KAAK,CAAC,CAAA;QACjC,MAAM,KAAK,GAAG,cAAc,CAAC,EAAE,EAAE,GAAG,CAAC,CAAA;QACrC,MAAM,SAAS,GAAG,kBAAkB,CAAC,KAAK,CAAC,CAAA;QAE3C,OAAO;YACN,GAAG,EAAE,GAAG,CAAC,GAAG;YACZ,KAAK,EAAE,KAAK;YACZ,QAAQ,EAAE,GAAG,CAAC,KAAK;YACnB,SAAS;SACT,CAAA;IACF,CAAC,CAAC,CAAA;IAEF,OAAO;QACN,SAAS,EAAE,eAAe;QAC1B,GAAG;QACH,cAAc,EAAE,SAAS,CAAC,MAAM;QAChC,QAAQ,EAAE;YACT,WAAW,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;YACrC,OAAO,EAAE,OAAO;SAChB;KACD,CAAA;AACF,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,yBAAyB,CACxC,WAAgC,EAChC,SAAyB;IAEzB,IAAI,UAAU,GAAG,CAAC,CAAA;IAElB,wBAAwB;IACxB,KAAK,MAAM,CAAC,IAAI,EAAE,UAAU,CAAC,IAAI,WAAW,CAAC,OAAO,EAAE,EAAE,CAAC;QACxD,MAAM,QAAQ,GAAG,SAAS,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QAC/C,UAAU,IAAI,UAAU,GAAG,QAAQ,CAAA;IACpC,CAAC;IAED,4BAA4B;IAC5B,MAAM,cAAc,GAAG,kBAAkB,CAAC,WAAW,CAAC,CAAA;IAEtD,IAAI,cAAc,KAAK,CAAC,IAAI,SAAS,CAAC,SAAS,KAAK,CAAC,EAAE,CAAC;QACvD,OAAO,CAAC,CAAA;IACT,CAAC;IAED,OAAO,UAAU,GAAG,CAAC,cAAc,GAAG,SAAS,CAAC,SAAS,CAAC,CAAA;AAC3D,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,YAAY,CACjC,KAAa,EACb,GAAwB;IAExB,MAAM,KAAK,GAAG,MAAM,QAAQ,CAAC,KAAK,CAAC,CAAA;IACnC,OAAO,sBAAsB,CAAC,KAAK,EAAE,GAAG,CAAC,CAAA;AAC1C,CAAC;AAED;;GAEG;AACH,SAAS,sBAAsB,CAAC,MAAgB,EAAE,GAAwB;IACzE,MAAM,WAAW,GAAG,IAAI,GAAG,EAAkB,CAAA;IAE7C,KAAK,MAAM,IAAI,IAAI,MAAM,EAAE,CAAC;QAC3B,MAAM,QAAQ,GAAG,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QACnC,IAAI,QAAQ,GAAG,CAAC,EAAE,CAAC;YAClB,WAAW,CAAC,GAAG,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAA;QAChC,CAAC;IACF,CAAC;IAED,OAAO,WAAW,CAAA;AACnB,CAAC;AAaD;;;;;;;;;;;GAWG;AACH,MAAM,CAAC,KAAK,UAAU,0BAA0B,CAC/C,KAAa,EACb,UAAiC,EACjC,GAAwB,EACxB,UAII,EAAE;IAEN,MAAM,EAAE,KAAK,GAAG,EAAE,EAAE,QAAQ,GAAG,CAAC,EAAE,GAAG,OAAO,CAAA;IAE5C,4BAA4B;IAC5B,MAAM,WAAW,GAAG,MAAM,oBAAoB,CAAC,KAAK,CAAC,CAAA;IAErD,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC9B,OAAO,EAAE,CAAA;IACV,CAAC;IAED,oDAAoD;IACpD,mFAAmF;IACnF,IAAI,YAAY,GAAG,OAAO,CAAC,YAAY,CAAA;IACvC,IAAI,CAAC,YAAY,IAAI,YAAY,KAAK,CAAC,EAAE,CAAC;QACzC,MAAM,WAAW,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,UAAU,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC,CAAA;QAC/E,YAAY,GAAG,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,WAAW,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAA;IAC3E,CAAC;IACD,8DAA8D;IAC9D,YAAY,GAAG,IAAI,CAAC,GAAG,CAAC,YAAY,EAAE,CAAC,CAAC,CAAA;IAExC,kCAAkC;IAClC,MAAM,OAAO,GAAkE,EAAE,CAAA;IACjF,IAAI,YAAY,GAAG,QAAQ,CAAA;IAE3B,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;QACpC,oBAAoB;QACpB,MAAM,YAAY,GAAa,EAAE,CAAA;QACjC,KAAK,MAAM,IAAI,IAAI,WAAW,EAAE,CAAC;YAChC,IAAI,SAAS,CAAC,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;gBACtC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;YACxB,CAAC;QACF,CAAC;QAED,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC;YAAE,SAAQ;QAEvC,eAAe;QACf,MAAM,MAAM,GAAG,SAAS,CAAC,UAAU,IAAI,CAAC,CAAA;QACxC,IAAI,KAAK,GAAG,CAAC,CAAA;QAEb,KAAK,MAAM,IAAI,IAAI,YAAY,EAAE,CAAC;YACjC,MAAM,OAAO,GAAG,SAAS,CAAC,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;YAChD,IAAI,CAAC,OAAO;gBAAE,SAAQ;YAEtB,MAAM,QAAQ,GAAG,OAAO,CAAC,OAAO,CAAA;YAChC,MAAM,OAAO,GAAG,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;YAElC,gFAAgF;YAChF,MAAM,SAAS,GAAG,QAAQ,GAAG,CAAC,OAAO,GAAG,CAAC,CAAC,CAAA;YAC1C,MAAM,WAAW,GAAG,QAAQ,GAAG,OAAO,GAAG,CAAC,CAAC,GAAG,MAAM,GAAG,CAAC,MAAM,GAAG,MAAM,CAAC,GAAG,YAAY,CAAC,CAAA;YACxF,KAAK,IAAI,OAAO,GAAG,CAAC,SAAS,GAAG,WAAW,CAAC,CAAA;QAC7C,CAAC;QAED,IAAI,KAAK,GAAG,YAAY;YAAE,SAAQ;QAElC,OAAO,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,UAAU,SAAS,CAAC,IAAI,EAAE,EAAE,KAAK,EAAE,YAAY,EAAE,CAAC,CAAA;QAEtE,0DAA0D;QAC1D,IAAI,OAAO,CAAC,MAAM,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;YACjC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAA;YACzC,OAAO,CAAC,MAAM,GAAG,KAAK,CAAA;YACtB,YAAY,GAAG,OAAO,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,KAAK,CAAA;QACjD,CAAC;IACF,CAAC;IAED,OAAO,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAA;AACjE,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CAAC,KAAa;IACjD,OAAO,oBAAoB,CAAC,KAAK,CAAC,CAAA;AACnC,CAAC;AAED;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,eAAe,CACpC,KAAa,EACb,KAAkB,EAClB,UAGI,EAAE;IAEN,MAAM,EAAE,KAAK,GAAG,EAAE,EAAE,QAAQ,GAAG,CAAC,EAAE,GAAG,OAAO,CAAA;IAE5C,sDAAsD;IACtD,MAAM,WAAW,GAAG,MAAM,oBAAoB,CAAC,KAAK,CAAC,CAAA;IAErD,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC9B,OAAO,EAAE,CAAA;IACV,CAAC;IAED,+CAA+C;IAC/C,IAAI,WAAW,GAAG,CAAC,CAAA;IACnB,KAAK,MAAM,GAAG,IAAI,KAAK,CAAC,SAAS,EAAE,CAAC;QACnC,KAAK,MAAM,IAAI,IAAI,GAAG,CAAC,QAAQ,CAAC,MAAM,EAAE,EAAE,CAAC;YAC1C,WAAW,IAAI,IAAI,CAAA;QACpB,CAAC;IACF,CAAC;IACD,MAAM,YAAY,GAAG,KAAK,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,WAAW,GAAG,KAAK,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAA;IAE1F,6BAA6B;IAC7B,MAAM,OAAO,GAAkE,EAAE,CAAA;IACjF,IAAI,YAAY,GAAG,QAAQ,CAAA;IAE3B,KAAK,MAAM,GAAG,IAAI,KAAK,CAAC,SAAS,EAAE,CAAC;QACnC,oBAAoB;QACpB,MAAM,YAAY,GAAa,EAAE,CAAA;QACjC,KAAK,MAAM,KAAK,IAAI,WAAW,EAAE,CAAC;YACjC,IAAI,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC;gBAC7B,YAAY,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;YACzB,CAAC;QACF,CAAC;QAED,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC;YAAE,SAAQ;QAEvC,0DAA0D;QAC1D,IAAI,MAAM,GAAG,CAAC,CAAA;QACd,KAAK,MAAM,IAAI,IAAI,GAAG,CAAC,QAAQ,CAAC,MAAM,EAAE,EAAE,CAAC;YAC1C,MAAM,IAAI,IAAI,CAAA;QACf,CAAC;QACD,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,EAAE,CAAC,CAAC,CAAA,CAAC,yBAAyB;QAEtD,eAAe;QACf,IAAI,KAAK,GAAG,CAAC,CAAA;QACb,KAAK,MAAM,IAAI,IAAI,YAAY,EAAE,CAAC;YACjC,MAAM,QAAQ,GAAG,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;YAC5C,MAAM,OAAO,GAAG,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;YAExC,kBAAkB;YAClB,MAAM,SAAS,GAAG,QAAQ,GAAG,CAAC,OAAO,GAAG,CAAC,CAAC,CAAA;YAC1C,MAAM,WAAW,GAAG,QAAQ,GAAG,OAAO,GAAG,CAAC,CAAC,GAAG,MAAM,GAAG,CAAC,MAAM,GAAG,MAAM,CAAC,GAAG,YAAY,CAAC,CAAA;YACxF,KAAK,IAAI,OAAO,GAAG,CAAC,SAAS,GAAG,WAAW,CAAC,CAAA;QAC7C,CAAC;QAED,IAAI,KAAK,GAAG,YAAY;YAAE,SAAQ;QAElC,OAAO,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,GAAG,CAAC,GAAG,EAAE,KAAK,EAAE,YAAY,EAAE,CAAC,CAAA;QAEnD,iCAAiC;QACjC,IAAI,OAAO,CAAC,MAAM,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;YACjC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAA;YACzC,OAAO,CAAC,MAAM,GAAG,KAAK,CAAA;YACtB,YAAY,GAAG,OAAO,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,KAAK,CAAA;QACjD,CAAC;IACF,CAAC;IAED,OAAO,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAA;AACjE,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tfidf.test.d.ts","sourceRoot":"","sources":["../src/tfidf.test.ts"],"names":[],"mappings":"AAAA;;GAEG"}
|