@matperez/coderag 0.1.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. package/README.md +154 -0
  2. package/dist/.tsbuildinfo +1 -0
  3. package/dist/ast-chunking.d.ts +40 -0
  4. package/dist/ast-chunking.d.ts.map +1 -0
  5. package/dist/ast-chunking.js +88 -0
  6. package/dist/ast-chunking.js.map +1 -0
  7. package/dist/ast-chunking.test.d.ts +5 -0
  8. package/dist/ast-chunking.test.d.ts.map +1 -0
  9. package/dist/ast-chunking.test.js +173 -0
  10. package/dist/ast-chunking.test.js.map +1 -0
  11. package/dist/code-tokenizer.d.ts +62 -0
  12. package/dist/code-tokenizer.d.ts.map +1 -0
  13. package/dist/code-tokenizer.js +129 -0
  14. package/dist/code-tokenizer.js.map +1 -0
  15. package/dist/code-tokenizer.test.d.ts +5 -0
  16. package/dist/code-tokenizer.test.d.ts.map +1 -0
  17. package/dist/code-tokenizer.test.js +96 -0
  18. package/dist/code-tokenizer.test.js.map +1 -0
  19. package/dist/db/client-pg.d.ts +16 -0
  20. package/dist/db/client-pg.d.ts.map +1 -0
  21. package/dist/db/client-pg.js +38 -0
  22. package/dist/db/client-pg.js.map +1 -0
  23. package/dist/db/client.d.ts +36 -0
  24. package/dist/db/client.d.ts.map +1 -0
  25. package/dist/db/client.js +81 -0
  26. package/dist/db/client.js.map +1 -0
  27. package/dist/db/migrations-pg.d.ts +6 -0
  28. package/dist/db/migrations-pg.d.ts.map +1 -0
  29. package/dist/db/migrations-pg.js +88 -0
  30. package/dist/db/migrations-pg.js.map +1 -0
  31. package/dist/db/migrations.d.ts +9 -0
  32. package/dist/db/migrations.d.ts.map +1 -0
  33. package/dist/db/migrations.js +164 -0
  34. package/dist/db/migrations.js.map +1 -0
  35. package/dist/db/schema-pg.d.ts +611 -0
  36. package/dist/db/schema-pg.d.ts.map +1 -0
  37. package/dist/db/schema-pg.js +66 -0
  38. package/dist/db/schema-pg.js.map +1 -0
  39. package/dist/db/schema.d.ts +630 -0
  40. package/dist/db/schema.d.ts.map +1 -0
  41. package/dist/db/schema.js +85 -0
  42. package/dist/db/schema.js.map +1 -0
  43. package/dist/embeddings.d.ts +92 -0
  44. package/dist/embeddings.d.ts.map +1 -0
  45. package/dist/embeddings.js +275 -0
  46. package/dist/embeddings.js.map +1 -0
  47. package/dist/embeddings.test.d.ts +5 -0
  48. package/dist/embeddings.test.d.ts.map +1 -0
  49. package/dist/embeddings.test.js +255 -0
  50. package/dist/embeddings.test.js.map +1 -0
  51. package/dist/hybrid-search.d.ts +47 -0
  52. package/dist/hybrid-search.d.ts.map +1 -0
  53. package/dist/hybrid-search.js +215 -0
  54. package/dist/hybrid-search.js.map +1 -0
  55. package/dist/hybrid-search.test.d.ts +5 -0
  56. package/dist/hybrid-search.test.d.ts.map +1 -0
  57. package/dist/hybrid-search.test.js +252 -0
  58. package/dist/hybrid-search.test.js.map +1 -0
  59. package/dist/incremental-tfidf.d.ts +77 -0
  60. package/dist/incremental-tfidf.d.ts.map +1 -0
  61. package/dist/incremental-tfidf.js +248 -0
  62. package/dist/incremental-tfidf.js.map +1 -0
  63. package/dist/incremental-tfidf.test.d.ts +5 -0
  64. package/dist/incremental-tfidf.test.d.ts.map +1 -0
  65. package/dist/incremental-tfidf.test.js +276 -0
  66. package/dist/incremental-tfidf.test.js.map +1 -0
  67. package/dist/index.d.ts +18 -0
  68. package/dist/index.d.ts.map +1 -0
  69. package/dist/index.js +19 -0
  70. package/dist/index.js.map +1 -0
  71. package/dist/indexer.d.ts +205 -0
  72. package/dist/indexer.d.ts.map +1 -0
  73. package/dist/indexer.js +1331 -0
  74. package/dist/indexer.js.map +1 -0
  75. package/dist/indexer.test.d.ts +12 -0
  76. package/dist/indexer.test.d.ts.map +1 -0
  77. package/dist/indexer.test.js +471 -0
  78. package/dist/indexer.test.js.map +1 -0
  79. package/dist/language-config.d.ts +54 -0
  80. package/dist/language-config.d.ts.map +1 -0
  81. package/dist/language-config.js +75 -0
  82. package/dist/language-config.js.map +1 -0
  83. package/dist/search-cache.d.ts +63 -0
  84. package/dist/search-cache.d.ts.map +1 -0
  85. package/dist/search-cache.js +118 -0
  86. package/dist/search-cache.js.map +1 -0
  87. package/dist/search-cache.test.d.ts +5 -0
  88. package/dist/search-cache.test.d.ts.map +1 -0
  89. package/dist/search-cache.test.js +194 -0
  90. package/dist/search-cache.test.js.map +1 -0
  91. package/dist/storage-factory.d.ts +11 -0
  92. package/dist/storage-factory.d.ts.map +1 -0
  93. package/dist/storage-factory.js +17 -0
  94. package/dist/storage-factory.js.map +1 -0
  95. package/dist/storage-persistent-pg.d.ts +75 -0
  96. package/dist/storage-persistent-pg.d.ts.map +1 -0
  97. package/dist/storage-persistent-pg.js +579 -0
  98. package/dist/storage-persistent-pg.js.map +1 -0
  99. package/dist/storage-persistent-pg.test.d.ts +7 -0
  100. package/dist/storage-persistent-pg.test.d.ts.map +1 -0
  101. package/dist/storage-persistent-pg.test.js +90 -0
  102. package/dist/storage-persistent-pg.test.js.map +1 -0
  103. package/dist/storage-persistent-types.d.ts +110 -0
  104. package/dist/storage-persistent-types.d.ts.map +1 -0
  105. package/dist/storage-persistent-types.js +5 -0
  106. package/dist/storage-persistent-types.js.map +1 -0
  107. package/dist/storage-persistent.d.ts +231 -0
  108. package/dist/storage-persistent.d.ts.map +1 -0
  109. package/dist/storage-persistent.js +897 -0
  110. package/dist/storage-persistent.js.map +1 -0
  111. package/dist/storage-persistent.test.d.ts +5 -0
  112. package/dist/storage-persistent.test.d.ts.map +1 -0
  113. package/dist/storage-persistent.test.js +325 -0
  114. package/dist/storage-persistent.test.js.map +1 -0
  115. package/dist/storage.d.ts +63 -0
  116. package/dist/storage.d.ts.map +1 -0
  117. package/dist/storage.js +67 -0
  118. package/dist/storage.js.map +1 -0
  119. package/dist/storage.test.d.ts +5 -0
  120. package/dist/storage.test.d.ts.map +1 -0
  121. package/dist/storage.test.js +157 -0
  122. package/dist/storage.test.js.map +1 -0
  123. package/dist/tfidf.d.ts +97 -0
  124. package/dist/tfidf.d.ts.map +1 -0
  125. package/dist/tfidf.js +308 -0
  126. package/dist/tfidf.js.map +1 -0
  127. package/dist/tfidf.test.d.ts +5 -0
  128. package/dist/tfidf.test.d.ts.map +1 -0
  129. package/dist/tfidf.test.js +181 -0
  130. package/dist/tfidf.test.js.map +1 -0
  131. package/dist/utils.d.ts +61 -0
  132. package/dist/utils.d.ts.map +1 -0
  133. package/dist/utils.js +264 -0
  134. package/dist/utils.js.map +1 -0
  135. package/dist/utils.test.d.ts +5 -0
  136. package/dist/utils.test.d.ts.map +1 -0
  137. package/dist/utils.test.js +94 -0
  138. package/dist/utils.test.js.map +1 -0
  139. package/dist/vector-storage.d.ts +120 -0
  140. package/dist/vector-storage.d.ts.map +1 -0
  141. package/dist/vector-storage.js +264 -0
  142. package/dist/vector-storage.js.map +1 -0
  143. package/dist/vector-storage.test.d.ts +5 -0
  144. package/dist/vector-storage.test.d.ts.map +1 -0
  145. package/dist/vector-storage.test.js +345 -0
  146. package/dist/vector-storage.test.js.map +1 -0
  147. package/package.json +85 -0
@@ -0,0 +1,157 @@
1
+ /**
2
+ * Tests for in-memory storage
3
+ */
4
+ import { beforeEach, describe, expect, it } from 'vitest';
5
+ import { MemoryStorage } from './storage.js';
6
+ describe('MemoryStorage', () => {
7
+ let storage;
8
+ beforeEach(() => {
9
+ storage = new MemoryStorage();
10
+ });
11
+ describe('storeFile', () => {
12
+ it('should store a file', async () => {
13
+ const file = {
14
+ path: 'test.ts',
15
+ content: 'console.log("test");',
16
+ size: 100,
17
+ mtime: Date.now(),
18
+ language: 'TypeScript',
19
+ hash: 'abc123',
20
+ };
21
+ await storage.storeFile(file);
22
+ const retrieved = await storage.getFile('test.ts');
23
+ expect(retrieved).toEqual(file);
24
+ });
25
+ it('should overwrite existing file', async () => {
26
+ const file1 = {
27
+ path: 'test.ts',
28
+ content: 'old content',
29
+ size: 100,
30
+ mtime: Date.now(),
31
+ hash: 'hash1',
32
+ };
33
+ const file2 = {
34
+ path: 'test.ts',
35
+ content: 'new content',
36
+ size: 150,
37
+ mtime: Date.now(),
38
+ hash: 'hash2',
39
+ };
40
+ await storage.storeFile(file1);
41
+ await storage.storeFile(file2);
42
+ const retrieved = await storage.getFile('test.ts');
43
+ expect(retrieved?.content).toBe('new content');
44
+ expect(retrieved?.hash).toBe('hash2');
45
+ });
46
+ });
47
+ describe('getFile', () => {
48
+ it('should retrieve stored file', async () => {
49
+ const file = {
50
+ path: 'test.ts',
51
+ content: 'test',
52
+ size: 100,
53
+ mtime: Date.now(),
54
+ hash: 'abc',
55
+ };
56
+ await storage.storeFile(file);
57
+ const retrieved = await storage.getFile('test.ts');
58
+ expect(retrieved).toEqual(file);
59
+ });
60
+ it('should return null for non-existent file', async () => {
61
+ const retrieved = await storage.getFile('nonexistent.ts');
62
+ expect(retrieved).toBeNull();
63
+ });
64
+ });
65
+ describe('getAllFiles', () => {
66
+ it('should return all stored files', async () => {
67
+ const files = [
68
+ { path: 'file1.ts', content: 'a', size: 1, mtime: Date.now(), hash: 'h1' },
69
+ { path: 'file2.ts', content: 'b', size: 2, mtime: Date.now(), hash: 'h2' },
70
+ { path: 'file3.ts', content: 'c', size: 3, mtime: Date.now(), hash: 'h3' },
71
+ ];
72
+ for (const file of files) {
73
+ await storage.storeFile(file);
74
+ }
75
+ const allFiles = await storage.getAllFiles();
76
+ expect(allFiles).toHaveLength(3);
77
+ expect(allFiles).toEqual(expect.arrayContaining(files));
78
+ });
79
+ it('should return empty array when no files stored', async () => {
80
+ const allFiles = await storage.getAllFiles();
81
+ expect(allFiles).toEqual([]);
82
+ });
83
+ });
84
+ describe('deleteFile', () => {
85
+ it('should delete existing file', async () => {
86
+ const file = {
87
+ path: 'test.ts',
88
+ content: 'test',
89
+ size: 100,
90
+ mtime: Date.now(),
91
+ hash: 'abc',
92
+ };
93
+ await storage.storeFile(file);
94
+ await storage.deleteFile('test.ts');
95
+ const retrieved = await storage.getFile('test.ts');
96
+ expect(retrieved).toBeNull();
97
+ });
98
+ it('should not throw when deleting non-existent file', async () => {
99
+ await expect(async () => {
100
+ await storage.deleteFile('nonexistent.ts');
101
+ }).not.toThrow();
102
+ });
103
+ });
104
+ describe('clear', () => {
105
+ it('should remove all files', async () => {
106
+ const files = [
107
+ { path: 'file1.ts', content: 'a', size: 1, mtime: Date.now(), hash: 'h1' },
108
+ { path: 'file2.ts', content: 'b', size: 2, mtime: Date.now(), hash: 'h2' },
109
+ ];
110
+ for (const file of files) {
111
+ await storage.storeFile(file);
112
+ }
113
+ await storage.clear();
114
+ const allFiles = await storage.getAllFiles();
115
+ expect(allFiles).toHaveLength(0);
116
+ });
117
+ });
118
+ describe('count', () => {
119
+ it('should return correct file count', async () => {
120
+ expect(await storage.count()).toBe(0);
121
+ await storage.storeFile({
122
+ path: 'file1.ts',
123
+ content: 'a',
124
+ size: 1,
125
+ mtime: Date.now(),
126
+ hash: 'h1',
127
+ });
128
+ expect(await storage.count()).toBe(1);
129
+ await storage.storeFile({
130
+ path: 'file2.ts',
131
+ content: 'b',
132
+ size: 2,
133
+ mtime: Date.now(),
134
+ hash: 'h2',
135
+ });
136
+ expect(await storage.count()).toBe(2);
137
+ await storage.deleteFile('file1.ts');
138
+ expect(await storage.count()).toBe(1);
139
+ });
140
+ });
141
+ describe('exists', () => {
142
+ it('should return true for existing file', async () => {
143
+ await storage.storeFile({
144
+ path: 'test.ts',
145
+ content: 'test',
146
+ size: 100,
147
+ mtime: Date.now(),
148
+ hash: 'abc',
149
+ });
150
+ expect(await storage.exists('test.ts')).toBe(true);
151
+ });
152
+ it('should return false for non-existent file', async () => {
153
+ expect(await storage.exists('nonexistent.ts')).toBe(false);
154
+ });
155
+ });
156
+ });
157
+ //# sourceMappingURL=storage.test.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"storage.test.js","sourceRoot":"","sources":["../src/storage.test.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,QAAQ,CAAA;AACzD,OAAO,EAAqB,aAAa,EAAE,MAAM,cAAc,CAAA;AAE/D,QAAQ,CAAC,eAAe,EAAE,GAAG,EAAE;IAC9B,IAAI,OAAsB,CAAA;IAE1B,UAAU,CAAC,GAAG,EAAE;QACf,OAAO,GAAG,IAAI,aAAa,EAAE,CAAA;IAC9B,CAAC,CAAC,CAAA;IAEF,QAAQ,CAAC,WAAW,EAAE,GAAG,EAAE;QAC1B,EAAE,CAAC,qBAAqB,EAAE,KAAK,IAAI,EAAE;YACpC,MAAM,IAAI,GAAiB;gBAC1B,IAAI,EAAE,SAAS;gBACf,OAAO,EAAE,sBAAsB;gBAC/B,IAAI,EAAE,GAAG;gBACT,KAAK,EAAE,IAAI,CAAC,GAAG,EAAE;gBACjB,QAAQ,EAAE,YAAY;gBACtB,IAAI,EAAE,QAAQ;aACd,CAAA;YAED,MAAM,OAAO,CAAC,SAAS,CAAC,IAAI,CAAC,CAAA;YAC7B,MAAM,SAAS,GAAG,MAAM,OAAO,CAAC,OAAO,CAAC,SAAS,CAAC,CAAA;YAElD,MAAM,CAAC,SAAS,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,CAAA;QAChC,CAAC,CAAC,CAAA;QAEF,EAAE,CAAC,gCAAgC,EAAE,KAAK,IAAI,EAAE;YAC/C,MAAM,KAAK,GAAiB;gBAC3B,IAAI,EAAE,SAAS;gBACf,OAAO,EAAE,aAAa;gBACtB,IAAI,EAAE,GAAG;gBACT,KAAK,EAAE,IAAI,CAAC,GAAG,EAAE;gBACjB,IAAI,EAAE,OAAO;aACb,CAAA;YAED,MAAM,KAAK,GAAiB;gBAC3B,IAAI,EAAE,SAAS;gBACf,OAAO,EAAE,aAAa;gBACtB,IAAI,EAAE,GAAG;gBACT,KAAK,EAAE,IAAI,CAAC,GAAG,EAAE;gBACjB,IAAI,EAAE,OAAO;aACb,CAAA;YAED,MAAM,OAAO,CAAC,SAAS,CAAC,KAAK,CAAC,CAAA;YAC9B,MAAM,OAAO,CAAC,SAAS,CAAC,KAAK,CAAC,CAAA;YAE9B,MAAM,SAAS,GAAG,MAAM,OAAO,CAAC,OAAO,CAAC,SAAS,CAAC,CAAA;YAClD,MAAM,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAA;YAC9C,MAAM,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;QACtC,CAAC,CAAC,CAAA;IACH,CAAC,CAAC,CAAA;IAEF,QAAQ,CAAC,SAAS,EAAE,GAAG,EAAE;QACxB,EAAE,CAAC,6BAA6B,EAAE,KAAK,IAAI,EAAE;YAC5C,MAAM,IAAI,GAAiB;gBAC1B,IAAI,EAAE,SAAS;gBACf,OAAO,EAAE,MAAM;gBACf,IAAI,EAAE,GAAG;gBACT,KAAK,EAAE,IAAI,CAAC,GAAG,EAAE;gBACjB,IAAI,EAAE,KAAK;aACX,CAAA;YAED,MAAM,OAAO,CAAC,SAAS,CAAC,IAAI,CAAC,CAAA;YAC7B,MAAM,SAAS,GAAG,MAAM,OAAO,CAAC,OAAO,CAAC,SAAS,CAAC,CAAA;YAElD,MAAM,CAAC,SAAS,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,CAAA;QAChC,CAAC,CAAC,CAAA;QAEF,EAAE,CAAC,0CAA0C,EAAE,KAAK,IAAI,EAAE;YACzD,MAAM,SAAS,GAAG,MAAM,OAAO,CAAC,OAAO,CAAC,gBAAgB,CAAC,CAAA;YACzD,MAAM,CAAC,SAAS,CAAC,CAAC,QAAQ,EAAE,CAAA;QAC7B,CAAC,CAAC,CAAA;IACH,CAAC,CAAC,CAAA;IAEF,QAAQ,CAAC,aAAa,EAAE,GAAG,EAAE;QAC5B,EAAE,CAAC,gCAAgC,EAAE,KAAK,IAAI,EAAE;YAC/C,MAAM,KAAK,GAAmB;gBAC7B,EAAE,IAAI,EAAE,UAAU,EAAE,OAAO,EAAE,GAAG,EAAE,IAAI,EAAE,CAAC,EAAE,KAAK,EAAE,IAAI,CAAC,GAAG,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE;gBAC1E,EAAE,IAAI,EAAE,UAAU,EAAE,OAAO,EAAE,GAAG,EAAE,IAAI,EAAE,CAAC,EAAE,KAAK,EAAE,IAAI,CAAC,GAAG,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE;gBAC1E,EAAE,IAAI,EAAE,UAAU,EAAE,OAAO,EAAE,GAAG,EAAE,IAAI,EAAE,CAAC,EAAE,KAAK,EAAE,IAAI,CAAC,GAAG,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE;aAC1E,CAAA;YAED,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;gBAC1B,MAAM,OAAO,CAAC,SAAS,CAAC,IAAI,CAAC,CAAA;YAC9B,CAAC;YAED,MAAM,QAAQ,GAAG,MAAM,OAAO,CAAC,WAAW,EAAE,CAAA;YAC5C,MAAM,CAAC,QAAQ,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;YAChC,MAAM,CAAC,QAAQ,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC,eAAe,CAAC,KAAK,CAAC,CAAC,CAAA;QACxD,CAAC,CAAC,CAAA;QAEF,EAAE,CAAC,gDAAgD,EAAE,KAAK,IAAI,EAAE;YAC/D,MAAM,QAAQ,GAAG,MAAM,OAAO,CAAC,WAAW,EAAE,CAAA;YAC5C,MAAM,CAAC,QAAQ,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAA;QAC7B,CAAC,CAAC,CAAA;IACH,CAAC,CAAC,CAAA;IAEF,QAAQ,CAAC,YAAY,EAAE,GAAG,EAAE;QAC3B,EAAE,CAAC,6BAA6B,EAAE,KAAK,IAAI,EAAE;YAC5C,MAAM,IAAI,GAAiB;gBAC1B,IAAI,EAAE,SAAS;gBACf,OAAO,EAAE,MAAM;gBACf,IAAI,EAAE,GAAG;gBACT,KAAK,EAAE,IAAI,CAAC,GAAG,EAAE;gBACjB,IAAI,EAAE,KAAK;aACX,CAAA;YAED,MAAM,OAAO,CAAC,SAAS,CAAC,IAAI,CAAC,CAAA;YAC7B,MAAM,OAAO,CAAC,UAAU,CAAC,SAAS,CAAC,CAAA;YAEnC,MAAM,SAAS,GAAG,MAAM,OAAO,CAAC,OAAO,CAAC,SAAS,CAAC,CAAA;YAClD,MAAM,CAAC,SAAS,CAAC,CAAC,QAAQ,EAAE,CAAA;QAC7B,CAAC,CAAC,CAAA;QAEF,EAAE,CAAC,kDAAkD,EAAE,KAAK,IAAI,EAAE;YACjE,MAAM,MAAM,CAAC,KAAK,IAAI,EAAE;gBACvB,MAAM,OAAO,CAAC,UAAU,CAAC,gBAAgB,CAAC,CAAA;YAC3C,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,EAAE,CAAA;QACjB,CAAC,CAAC,CAAA;IACH,CAAC,CAAC,CAAA;IAEF,QAAQ,CAAC,OAAO,EAAE,GAAG,EAAE;QACtB,EAAE,CAAC,yBAAyB,EAAE,KAAK,IAAI,EAAE;YACxC,MAAM,KAAK,GAAmB;gBAC7B,EAAE,IAAI,EAAE,UAAU,EAAE,OAAO,EAAE,GAAG,EAAE,IAAI,EAAE,CAAC,EAAE,KAAK,EAAE,IAAI,CAAC,GAAG,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE;gBAC1E,EAAE,IAAI,EAAE,UAAU,EAAE,OAAO,EAAE,GAAG,EAAE,IAAI,EAAE,CAAC,EAAE,KAAK,EAAE,IAAI,CAAC,GAAG,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE;aAC1E,CAAA;YAED,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;gBAC1B,MAAM,OAAO,CAAC,SAAS,CAAC,IAAI,CAAC,CAAA;YAC9B,CAAC;YAED,MAAM,OAAO,CAAC,KAAK,EAAE,CAAA;YAErB,MAAM,QAAQ,GAAG,MAAM,OAAO,CAAC,WAAW,EAAE,CAAA;YAC5C,MAAM,CAAC,QAAQ,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QACjC,CAAC,CAAC,CAAA;IACH,CAAC,CAAC,CAAA;IAEF,QAAQ,CAAC,OAAO,EAAE,GAAG,EAAE;QACtB,EAAE,CAAC,kCAAkC,EAAE,KAAK,IAAI,EAAE;YACjD,MAAM,CAAC,MAAM,OAAO,CAAC,KAAK,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;YAErC,MAAM,OAAO,CAAC,SAAS,CAAC;gBACvB,IAAI,EAAE,UAAU;gBAChB,OAAO,EAAE,GAAG;gBACZ,IAAI,EAAE,CAAC;gBACP,KAAK,EAAE,IAAI,CAAC,GAAG,EAAE;gBACjB,IAAI,EAAE,IAAI;aACV,CAAC,CAAA;YACF,MAAM,CAAC,MAAM,OAAO,CAAC,KAAK,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;YAErC,MAAM,OAAO,CAAC,SAAS,CAAC;gBACvB,IAAI,EAAE,UAAU;gBAChB,OAAO,EAAE,GAAG;gBACZ,IAAI,EAAE,CAAC;gBACP,KAAK,EAAE,IAAI,CAAC,GAAG,EAAE;gBACjB,IAAI,EAAE,IAAI;aACV,CAAC,CAAA;YACF,MAAM,CAAC,MAAM,OAAO,CAAC,KAAK,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;YAErC,MAAM,OAAO,CAAC,UAAU,CAAC,UAAU,CAAC,CAAA;YACpC,MAAM,CAAC,MAAM,OAAO,CAAC,KAAK,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QACtC,CAAC,CAAC,CAAA;IACH,CAAC,CAAC,CAAA;IAEF,QAAQ,CAAC,QAAQ,EAAE,GAAG,EAAE;QACvB,EAAE,CAAC,sCAAsC,EAAE,KAAK,IAAI,EAAE;YACrD,MAAM,OAAO,CAAC,SAAS,CAAC;gBACvB,IAAI,EAAE,SAAS;gBACf,OAAO,EAAE,MAAM;gBACf,IAAI,EAAE,GAAG;gBACT,KAAK,EAAE,IAAI,CAAC,GAAG,EAAE;gBACjB,IAAI,EAAE,KAAK;aACX,CAAC,CAAA;YAEF,MAAM,CAAC,MAAM,OAAO,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QACnD,CAAC,CAAC,CAAA;QAEF,EAAE,CAAC,2CAA2C,EAAE,KAAK,IAAI,EAAE;YAC1D,MAAM,CAAC,MAAM,OAAO,CAAC,MAAM,CAAC,gBAAgB,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;QAC3D,CAAC,CAAC,CAAA;IACH,CAAC,CAAC,CAAA;AACH,CAAC,CAAC,CAAA"}
@@ -0,0 +1,97 @@
1
+ /**
2
+ * BM25 (Best Matching 25) implementation
3
+ * Using StarCoder2 tokenizer for code-aware tokenization
4
+ *
5
+ * BM25 improves on TF-IDF with:
6
+ * 1. Term frequency saturation (k1 parameter) - diminishing returns for repeated terms
7
+ * 2. Document length normalization (b parameter) - adjusts for document length
8
+ */
9
+ import { initializeTokenizer } from './code-tokenizer.js';
10
+ export { initializeTokenizer };
11
+ export interface DocumentVector {
12
+ uri: string;
13
+ terms: Map<string, number>;
14
+ rawTerms: Map<string, number>;
15
+ magnitude: number;
16
+ }
17
+ export interface SearchIndex {
18
+ documents: DocumentVector[];
19
+ idf: Map<string, number>;
20
+ totalDocuments: number;
21
+ metadata: {
22
+ generatedAt: string;
23
+ version: string;
24
+ };
25
+ }
26
+ /**
27
+ * Tokenize code using StarCoder2 (async)
28
+ */
29
+ export declare function tokenize(text: string): Promise<string[]>;
30
+ /**
31
+ * Build TF-IDF search index from documents (async - uses StarCoder2)
32
+ */
33
+ export declare function buildSearchIndex(documents: Array<{
34
+ uri: string;
35
+ content: string;
36
+ }>): Promise<SearchIndex>;
37
+ /**
38
+ * Calculate cosine similarity between query and document
39
+ */
40
+ export declare function calculateCosineSimilarity(queryVector: Map<string, number>, docVector: DocumentVector): number;
41
+ /**
42
+ * Process query into TF-IDF vector (async - uses StarCoder2)
43
+ */
44
+ export declare function processQuery(query: string, idf: Map<string, number>): Promise<Map<string, number>>;
45
+ /**
46
+ * SQL-based search result from storage
47
+ * Uses pre-computed magnitude and token count for BM25 scoring
48
+ */
49
+ export interface StorageSearchResult {
50
+ path: string;
51
+ matchedTerms: Map<string, {
52
+ tfidf: number;
53
+ rawFreq: number;
54
+ }>;
55
+ magnitude: number;
56
+ tokenCount: number;
57
+ }
58
+ /**
59
+ * Search documents using BM25 scoring (SQL-based storage)
60
+ *
61
+ * BM25 formula: score(D,Q) = Σ IDF(qi) * (f(qi,D) * (k1+1)) / (f(qi,D) + k1 * (1 - b + b * |D|/avgdl))
62
+ *
63
+ * Where:
64
+ * - f(qi,D) = raw frequency of term qi in document D
65
+ * - |D| = document length (token count)
66
+ * - avgdl = average document length
67
+ * - k1 = term frequency saturation (default: 1.2)
68
+ * - b = length normalization (default: 0.75)
69
+ */
70
+ export declare function searchDocumentsFromStorage(query: string, candidates: StorageSearchResult[], idf: Map<string, number>, options?: {
71
+ limit?: number;
72
+ minScore?: number;
73
+ avgDocLength?: number;
74
+ }): Promise<Array<{
75
+ uri: string;
76
+ score: number;
77
+ matchedTerms: string[];
78
+ }>>;
79
+ /**
80
+ * Get query tokens (exported for SQL-based search) - async
81
+ */
82
+ export declare function getQueryTokens(query: string): Promise<string[]>;
83
+ /**
84
+ * Search documents using BM25 scoring (in-memory index)
85
+ *
86
+ * For in-memory search, document length is calculated from rawTerms.
87
+ * Average document length is calculated from all documents in the index.
88
+ */
89
+ export declare function searchDocuments(query: string, index: SearchIndex, options?: {
90
+ limit?: number;
91
+ minScore?: number;
92
+ }): Promise<Array<{
93
+ uri: string;
94
+ score: number;
95
+ matchedTerms: string[];
96
+ }>>;
97
+ //# sourceMappingURL=tfidf.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tfidf.d.ts","sourceRoot":"","sources":["../src/tfidf.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,EAAE,mBAAmB,EAAiC,MAAM,qBAAqB,CAAA;AAGxF,OAAO,EAAE,mBAAmB,EAAE,CAAA;AA2B9B,MAAM,WAAW,cAAc;IAC9B,GAAG,EAAE,MAAM,CAAA;IACX,KAAK,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;IAC1B,QAAQ,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;IAC7B,SAAS,EAAE,MAAM,CAAA;CACjB;AAED,MAAM,WAAW,WAAW;IAC3B,SAAS,EAAE,cAAc,EAAE,CAAA;IAC3B,GAAG,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;IACxB,cAAc,EAAE,MAAM,CAAA;IACtB,QAAQ,EAAE;QACT,WAAW,EAAE,MAAM,CAAA;QACnB,OAAO,EAAE,MAAM,CAAA;KACf,CAAA;CACD;AAED;;GAEG;AACH,wBAAsB,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAE9D;AAmFD;;GAEG;AACH,wBAAsB,gBAAgB,CACrC,SAAS,EAAE,KAAK,CAAC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,CAAC,GAChD,OAAO,CAAC,WAAW,CAAC,CAsCtB;AAED;;GAEG;AACH,wBAAgB,yBAAyB,CACxC,WAAW,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,EAChC,SAAS,EAAE,cAAc,GACvB,MAAM,CAiBR;AAED;;GAEG;AACH,wBAAsB,YAAY,CACjC,KAAK,EAAE,MAAM,EACb,GAAG,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,GACtB,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAG9B;AAkBD;;;GAGG;AACH,MAAM,WAAW,mBAAmB;IACnC,IAAI,EAAE,MAAM,CAAA;IACZ,YAAY,EAAE,GAAG,CAAC,MAAM,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAA;KAAE,CAAC,CAAA;IAC7D,SAAS,EAAE,MAAM,CAAA;IACjB,UAAU,EAAE,MAAM,CAAA;CAClB;AAED;;;;;;;;;;;GAWG;AACH,wBAAsB,0BAA0B,CAC/C,KAAK,EAAE,MAAM,EACb,UAAU,EAAE,mBAAmB,EAAE,EACjC,GAAG,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,EACxB,OAAO,GAAE;IACR,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,YAAY,CAAC,EAAE,MAAM,CAAA;CAChB,GACJ,OAAO,CAAC,KAAK,CAAC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,YAAY,EAAE,MAAM,EAAE,CAAA;CAAE,CAAC,CAAC,CAiExE;AAED;;GAEG;AACH,wBAAsB,cAAc,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAErE;AAED;;;;;GAKG;AACH,wBAAsB,eAAe,CACpC,KAAK,EAAE,MAAM,EACb,KAAK,EAAE,WAAW,EAClB,OAAO,GAAE;IACR,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,QAAQ,CAAC,EAAE,MAAM,CAAA;CACZ,GACJ,OAAO,CAAC,KAAK,CAAC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,YAAY,EAAE,MAAM,EAAE,CAAA;CAAE,CAAC,CAAC,CAkExE"}
package/dist/tfidf.js ADDED
@@ -0,0 +1,308 @@
1
+ /**
2
+ * BM25 (Best Matching 25) implementation
3
+ * Using StarCoder2 tokenizer for code-aware tokenization
4
+ *
5
+ * BM25 improves on TF-IDF with:
6
+ * 1. Term frequency saturation (k1 parameter) - diminishing returns for repeated terms
7
+ * 2. Document length normalization (b parameter) - adjusts for document length
8
+ */
9
+ import { initializeTokenizer, tokenize as starcoderTokenize } from './code-tokenizer.js';
10
+ // Re-export tokenize for external use
11
+ export { initializeTokenizer };
12
+ // BM25 parameters (Elasticsearch/Lucene defaults)
13
+ const BM25_K1 = 1.2; // Term frequency saturation (1.2-2.0 typical)
14
+ const BM25_B = 0.75; // Length normalization (0.75 typical, 0 = no normalization, 1 = full normalization)
15
+ // Query token cache - avoids re-tokenizing the same query (CPU optimization)
16
+ const queryTokenCache = new Map();
17
+ const QUERY_CACHE_MAX_SIZE = 100;
18
+ async function getCachedQueryTokens(query) {
19
+ const cached = queryTokenCache.get(query);
20
+ if (cached)
21
+ return cached;
22
+ // Tokenize and dedupe
23
+ const tokens = [...new Set(await tokenize(query))];
24
+ // LRU-style eviction: remove oldest if full
25
+ if (queryTokenCache.size >= QUERY_CACHE_MAX_SIZE) {
26
+ const firstKey = queryTokenCache.keys().next().value;
27
+ if (firstKey)
28
+ queryTokenCache.delete(firstKey);
29
+ }
30
+ queryTokenCache.set(query, tokens);
31
+ return tokens;
32
+ }
33
+ /**
34
+ * Tokenize code using StarCoder2 (async)
35
+ */
36
+ export async function tokenize(text) {
37
+ return starcoderTokenize(text);
38
+ }
39
+ /**
40
+ * Calculate Term Frequency (TF)
41
+ */
42
+ function calculateTF(termFrequency) {
43
+ const totalTerms = Array.from(termFrequency.values()).reduce((sum, freq) => sum + freq, 0);
44
+ const tf = new Map();
45
+ for (const [term, freq] of termFrequency.entries()) {
46
+ tf.set(term, freq / totalTerms);
47
+ }
48
+ return tf;
49
+ }
50
+ /**
51
+ * Calculate Inverse Document Frequency (IDF)
52
+ */
53
+ function calculateIDF(documents, totalDocuments) {
54
+ const documentFrequency = new Map();
55
+ // Count how many documents contain each term
56
+ for (const doc of documents) {
57
+ const uniqueTerms = new Set(doc.keys());
58
+ for (const term of uniqueTerms) {
59
+ documentFrequency.set(term, (documentFrequency.get(term) || 0) + 1);
60
+ }
61
+ }
62
+ // Calculate IDF for each term using smoothed formula
63
+ // Standard formula: log(N/df) = 0 when term appears in ALL documents
64
+ // Smoothed formula: log((N+1)/(df+1)) + 1 ensures no term gets IDF=0
65
+ const idf = new Map();
66
+ for (const [term, docFreq] of documentFrequency.entries()) {
67
+ idf.set(term, Math.log((totalDocuments + 1) / (docFreq + 1)) + 1);
68
+ }
69
+ return idf;
70
+ }
71
+ /**
72
+ * Calculate TF-IDF scores for a document
73
+ */
74
+ function calculateTFIDF(tf, idf) {
75
+ const tfidf = new Map();
76
+ for (const [term, tfScore] of tf.entries()) {
77
+ const idfScore = idf.get(term) || 0;
78
+ tfidf.set(term, tfScore * idfScore);
79
+ }
80
+ return tfidf;
81
+ }
82
+ /**
83
+ * Calculate vector magnitude for cosine similarity
84
+ */
85
+ function calculateMagnitude(vector) {
86
+ let sum = 0;
87
+ for (const value of vector.values()) {
88
+ sum += value * value;
89
+ }
90
+ return Math.sqrt(sum);
91
+ }
92
+ /**
93
+ * Extract term frequencies from content (async - uses StarCoder2)
94
+ */
95
+ async function extractTermFrequencies(content) {
96
+ const tokens = await tokenize(content);
97
+ const frequencies = new Map();
98
+ for (const token of tokens) {
99
+ frequencies.set(token, (frequencies.get(token) || 0) + 1);
100
+ }
101
+ return frequencies;
102
+ }
103
+ /**
104
+ * Build TF-IDF search index from documents (async - uses StarCoder2)
105
+ */
106
+ export async function buildSearchIndex(documents) {
107
+ // Extract term frequencies for all documents
108
+ const documentTerms = await Promise.all(documents.map(async (doc) => ({
109
+ uri: doc.uri,
110
+ terms: await extractTermFrequencies(doc.content),
111
+ })));
112
+ // Calculate IDF scores
113
+ const idf = calculateIDF(documentTerms.map((d) => d.terms), documents.length);
114
+ // Calculate TF-IDF for each document
115
+ const documentVectors = documentTerms.map((doc) => {
116
+ const tf = calculateTF(doc.terms);
117
+ const tfidf = calculateTFIDF(tf, idf);
118
+ const magnitude = calculateMagnitude(tfidf);
119
+ return {
120
+ uri: doc.uri,
121
+ terms: tfidf,
122
+ rawTerms: doc.terms,
123
+ magnitude,
124
+ };
125
+ });
126
+ return {
127
+ documents: documentVectors,
128
+ idf,
129
+ totalDocuments: documents.length,
130
+ metadata: {
131
+ generatedAt: new Date().toISOString(),
132
+ version: '1.0.0',
133
+ },
134
+ };
135
+ }
136
+ /**
137
+ * Calculate cosine similarity between query and document
138
+ */
139
+ export function calculateCosineSimilarity(queryVector, docVector) {
140
+ let dotProduct = 0;
141
+ // Calculate dot product
142
+ for (const [term, queryScore] of queryVector.entries()) {
143
+ const docScore = docVector.terms.get(term) || 0;
144
+ dotProduct += queryScore * docScore;
145
+ }
146
+ // Calculate query magnitude
147
+ const queryMagnitude = calculateMagnitude(queryVector);
148
+ if (queryMagnitude === 0 || docVector.magnitude === 0) {
149
+ return 0;
150
+ }
151
+ return dotProduct / (queryMagnitude * docVector.magnitude);
152
+ }
153
+ /**
154
+ * Process query into TF-IDF vector (async - uses StarCoder2)
155
+ */
156
+ export async function processQuery(query, idf) {
157
+ const terms = await tokenize(query);
158
+ return processQueryWithTokens(terms, idf);
159
+ }
160
+ /**
161
+ * Process query from pre-tokenized terms (CPU optimization - avoids re-tokenizing)
162
+ */
163
+ function processQueryWithTokens(tokens, idf) {
164
+ const queryVector = new Map();
165
+ for (const term of tokens) {
166
+ const idfValue = idf.get(term) || 0;
167
+ if (idfValue > 0) {
168
+ queryVector.set(term, idfValue);
169
+ }
170
+ }
171
+ return queryVector;
172
+ }
173
+ /**
174
+ * Search documents using BM25 scoring (SQL-based storage)
175
+ *
176
+ * BM25 formula: score(D,Q) = Σ IDF(qi) * (f(qi,D) * (k1+1)) / (f(qi,D) + k1 * (1 - b + b * |D|/avgdl))
177
+ *
178
+ * Where:
179
+ * - f(qi,D) = raw frequency of term qi in document D
180
+ * - |D| = document length (token count)
181
+ * - avgdl = average document length
182
+ * - k1 = term frequency saturation (default: 1.2)
183
+ * - b = length normalization (default: 0.75)
184
+ */
185
+ export async function searchDocumentsFromStorage(query, candidates, idf, options = {}) {
186
+ const { limit = 10, minScore = 0 } = options;
187
+ // Get query tokens (cached)
188
+ const queryTokens = await getCachedQueryTokens(query);
189
+ if (queryTokens.length === 0) {
190
+ return [];
191
+ }
192
+ // Calculate average document length if not provided
193
+ // Fallback to average of candidates (less accurate but works without global stats)
194
+ let avgDocLength = options.avgDocLength;
195
+ if (!avgDocLength || avgDocLength === 0) {
196
+ const totalTokens = candidates.reduce((sum, c) => sum + (c.tokenCount || 0), 0);
197
+ avgDocLength = candidates.length > 0 ? totalTokens / candidates.length : 1;
198
+ }
199
+ // Ensure avgDocLength is at least 1 to avoid division by zero
200
+ avgDocLength = Math.max(avgDocLength, 1);
201
+ // Score each candidate using BM25
202
+ const results = [];
203
+ let minThreshold = minScore;
204
+ for (const candidate of candidates) {
205
+ // Get matched terms
206
+ const matchedTerms = [];
207
+ for (const term of queryTokens) {
208
+ if (candidate.matchedTerms.has(term)) {
209
+ matchedTerms.push(term);
210
+ }
211
+ }
212
+ if (matchedTerms.length === 0)
213
+ continue;
214
+ // BM25 scoring
215
+ const docLen = candidate.tokenCount || 1;
216
+ let score = 0;
217
+ for (const term of matchedTerms) {
218
+ const docData = candidate.matchedTerms.get(term);
219
+ if (!docData)
220
+ continue;
221
+ const termFreq = docData.rawFreq;
222
+ const termIdf = idf.get(term) || 0;
223
+ // BM25 term score: IDF * (tf * (k1+1)) / (tf + k1 * (1 - b + b * docLen/avgdl))
224
+ const numerator = termFreq * (BM25_K1 + 1);
225
+ const denominator = termFreq + BM25_K1 * (1 - BM25_B + (BM25_B * docLen) / avgDocLength);
226
+ score += termIdf * (numerator / denominator);
227
+ }
228
+ if (score < minThreshold)
229
+ continue;
230
+ results.push({ uri: `file://${candidate.path}`, score, matchedTerms });
231
+ // Bounded results (optimization for large candidate sets)
232
+ if (results.length >= limit * 2) {
233
+ results.sort((a, b) => b.score - a.score);
234
+ results.length = limit;
235
+ minThreshold = results[results.length - 1].score;
236
+ }
237
+ }
238
+ return results.sort((a, b) => b.score - a.score).slice(0, limit);
239
+ }
240
+ /**
241
+ * Get query tokens (exported for SQL-based search) - async
242
+ */
243
+ export async function getQueryTokens(query) {
244
+ return getCachedQueryTokens(query);
245
+ }
246
+ /**
247
+ * Search documents using BM25 scoring (in-memory index)
248
+ *
249
+ * For in-memory search, document length is calculated from rawTerms.
250
+ * Average document length is calculated from all documents in the index.
251
+ */
252
+ export async function searchDocuments(query, index, options = {}) {
253
+ const { limit = 10, minScore = 0 } = options;
254
+ // Process query with cached tokens (CPU optimization)
255
+ const queryTokens = await getCachedQueryTokens(query);
256
+ if (queryTokens.length === 0) {
257
+ return [];
258
+ }
259
+ // Calculate average document length from index
260
+ let totalTokens = 0;
261
+ for (const doc of index.documents) {
262
+ for (const freq of doc.rawTerms.values()) {
263
+ totalTokens += freq;
264
+ }
265
+ }
266
+ const avgDocLength = index.documents.length > 0 ? totalTokens / index.documents.length : 1;
267
+ // Score documents using BM25
268
+ const results = [];
269
+ let minThreshold = minScore;
270
+ for (const doc of index.documents) {
271
+ // Get matched terms
272
+ const matchedTerms = [];
273
+ for (const token of queryTokens) {
274
+ if (doc.rawTerms.has(token)) {
275
+ matchedTerms.push(token);
276
+ }
277
+ }
278
+ if (matchedTerms.length === 0)
279
+ continue;
280
+ // Calculate document length (sum of all term frequencies)
281
+ let docLen = 0;
282
+ for (const freq of doc.rawTerms.values()) {
283
+ docLen += freq;
284
+ }
285
+ docLen = Math.max(docLen, 1); // Avoid division by zero
286
+ // BM25 scoring
287
+ let score = 0;
288
+ for (const term of matchedTerms) {
289
+ const termFreq = doc.rawTerms.get(term) || 0;
290
+ const termIdf = index.idf.get(term) || 0;
291
+ // BM25 term score
292
+ const numerator = termFreq * (BM25_K1 + 1);
293
+ const denominator = termFreq + BM25_K1 * (1 - BM25_B + (BM25_B * docLen) / avgDocLength);
294
+ score += termIdf * (numerator / denominator);
295
+ }
296
+ if (score < minThreshold)
297
+ continue;
298
+ results.push({ uri: doc.uri, score, matchedTerms });
299
+ // Bounded results (optimization)
300
+ if (results.length >= limit * 2) {
301
+ results.sort((a, b) => b.score - a.score);
302
+ results.length = limit;
303
+ minThreshold = results[results.length - 1].score;
304
+ }
305
+ }
306
+ return results.sort((a, b) => b.score - a.score).slice(0, limit);
307
+ }
308
+ //# sourceMappingURL=tfidf.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tfidf.js","sourceRoot":"","sources":["../src/tfidf.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,EAAE,mBAAmB,EAAE,QAAQ,IAAI,iBAAiB,EAAE,MAAM,qBAAqB,CAAA;AAExF,sCAAsC;AACtC,OAAO,EAAE,mBAAmB,EAAE,CAAA;AAE9B,kDAAkD;AAClD,MAAM,OAAO,GAAG,GAAG,CAAA,CAAC,8CAA8C;AAClE,MAAM,MAAM,GAAG,IAAI,CAAA,CAAC,oFAAoF;AAExG,6EAA6E;AAC7E,MAAM,eAAe,GAAG,IAAI,GAAG,EAAoB,CAAA;AACnD,MAAM,oBAAoB,GAAG,GAAG,CAAA;AAEhC,KAAK,UAAU,oBAAoB,CAAC,KAAa;IAChD,MAAM,MAAM,GAAG,eAAe,CAAC,GAAG,CAAC,KAAK,CAAC,CAAA;IACzC,IAAI,MAAM;QAAE,OAAO,MAAM,CAAA;IAEzB,sBAAsB;IACtB,MAAM,MAAM,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,MAAM,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAA;IAElD,4CAA4C;IAC5C,IAAI,eAAe,CAAC,IAAI,IAAI,oBAAoB,EAAE,CAAC;QAClD,MAAM,QAAQ,GAAG,eAAe,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC,KAAK,CAAA;QACpD,IAAI,QAAQ;YAAE,eAAe,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAA;IAC/C,CAAC;IAED,eAAe,CAAC,GAAG,CAAC,KAAK,EAAE,MAAM,CAAC,CAAA;IAClC,OAAO,MAAM,CAAA;AACd,CAAC;AAmBD;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,QAAQ,CAAC,IAAY;IAC1C,OAAO,iBAAiB,CAAC,IAAI,CAAC,CAAA;AAC/B,CAAC;AAED;;GAEG;AACH,SAAS,WAAW,CAAC,aAAkC;IACtD,MAAM,UAAU,GAAG,KAAK,CAAC,IAAI,CAAC,aAAa,CAAC,MAAM,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,IAAI,EAAE,EAAE,CAAC,GAAG,GAAG,IAAI,EAAE,CAAC,CAAC,CAAA;IAC1F,MAAM,EAAE,GAAG,IAAI,GAAG,EAAkB,CAAA;IAEpC,KAAK,MAAM,CAAC,IAAI,EAAE,IAAI,CAAC,IAAI,aAAa,CAAC,OAAO,EAAE,EAAE,CAAC;QACpD,EAAE,CAAC,GAAG,CAAC,IAAI,EAAE,IAAI,GAAG,UAAU,CAAC,CAAA;IAChC,CAAC;IAED,OAAO,EAAE,CAAA;AACV,CAAC;AAED;;GAEG;AACH,SAAS,YAAY,CACpB,SAAgC,EAChC,cAAsB;IAEtB,MAAM,iBAAiB,GAAG,IAAI,GAAG,EAAkB,CAAA;IAEnD,6CAA6C;IAC7C,KAAK,MAAM,GAAG,IAAI,SAAS,EAAE,CAAC;QAC7B,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,CAAA;QACvC,KAAK,MAAM,IAAI,IAAI,WAAW,EAAE,CAAC;YAChC,iBAAiB,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,iBAAiB,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAA;QACpE,CAAC;IACF,CAAC;IAED,qDAAqD;IACrD,qEAAqE;IACrE,qEAAqE;IACrE,MAAM,GAAG,GAAG,IAAI,GAAG,EAAkB,CAAA;IACrC,KAAK,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC,IAAI,iBAAiB,CAAC,OAAO,EAAE,EAAE,CAAC;QAC3D,GAAG,CAAC,GAAG,CAAC,IAAI,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,cAAc,GAAG,CAAC,CAAC,GAAG,CAAC,OAAO,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAA;IAClE,CAAC;IAED,OAAO,GAAG,CAAA;AACX,CAAC;AAED;;GAEG;AACH,SAAS,cAAc,CAAC,EAAuB,EAAE,GAAwB;IACxE,MAAM,KAAK,GAAG,IAAI,GAAG,EAAkB,CAAA;IAEvC,KAAK,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC,IAAI,EAAE,CAAC,OAAO,EAAE,EAAE,CAAC;QAC5C,MAAM,QAAQ,GAAG,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QACnC,KAAK,CAAC,GAAG,CAAC,IAAI,EAAE,OAAO,GAAG,QAAQ,CAAC,CAAA;IACpC,CAAC;IAED,OAAO,KAAK,CAAA;AACb,CAAC;AAED;;GAEG;AACH,SAAS,kBAAkB,CAAC,MAA2B;IACtD,IAAI,GAAG,GAAG,CAAC,CAAA;IACX,KAAK,MAAM,KAAK,IAAI,MAAM,CAAC,MAAM,EAAE,EAAE,CAAC;QACrC,GAAG,IAAI,KAAK,GAAG,KAAK,CAAA;IACrB,CAAC;IACD,OAAO,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;AACtB,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,sBAAsB,CAAC,OAAe;IACpD,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,OAAO,CAAC,CAAA;IACtC,MAAM,WAAW,GAAG,IAAI,GAAG,EAAkB,CAAA;IAE7C,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC5B,WAAW,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,WAAW,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAA;IAC1D,CAAC;IAED,OAAO,WAAW,CAAA;AACnB,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,gBAAgB,CACrC,SAAkD;IAElD,6CAA6C;IAC7C,MAAM,aAAa,GAAG,MAAM,OAAO,CAAC,GAAG,CACtC,SAAS,CAAC,GAAG,CAAC,KAAK,EAAE,GAAG,EAAE,EAAE,CAAC,CAAC;QAC7B,GAAG,EAAE,GAAG,CAAC,GAAG;QACZ,KAAK,EAAE,MAAM,sBAAsB,CAAC,GAAG,CAAC,OAAO,CAAC;KAChD,CAAC,CAAC,CACH,CAAA;IAED,uBAAuB;IACvB,MAAM,GAAG,GAAG,YAAY,CACvB,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,EACjC,SAAS,CAAC,MAAM,CAChB,CAAA;IAED,qCAAqC;IACrC,MAAM,eAAe,GAAqB,aAAa,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;QACnE,MAAM,EAAE,GAAG,WAAW,CAAC,GAAG,CAAC,KAAK,CAAC,CAAA;QACjC,MAAM,KAAK,GAAG,cAAc,CAAC,EAAE,EAAE,GAAG,CAAC,CAAA;QACrC,MAAM,SAAS,GAAG,kBAAkB,CAAC,KAAK,CAAC,CAAA;QAE3C,OAAO;YACN,GAAG,EAAE,GAAG,CAAC,GAAG;YACZ,KAAK,EAAE,KAAK;YACZ,QAAQ,EAAE,GAAG,CAAC,KAAK;YACnB,SAAS;SACT,CAAA;IACF,CAAC,CAAC,CAAA;IAEF,OAAO;QACN,SAAS,EAAE,eAAe;QAC1B,GAAG;QACH,cAAc,EAAE,SAAS,CAAC,MAAM;QAChC,QAAQ,EAAE;YACT,WAAW,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;YACrC,OAAO,EAAE,OAAO;SAChB;KACD,CAAA;AACF,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,yBAAyB,CACxC,WAAgC,EAChC,SAAyB;IAEzB,IAAI,UAAU,GAAG,CAAC,CAAA;IAElB,wBAAwB;IACxB,KAAK,MAAM,CAAC,IAAI,EAAE,UAAU,CAAC,IAAI,WAAW,CAAC,OAAO,EAAE,EAAE,CAAC;QACxD,MAAM,QAAQ,GAAG,SAAS,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QAC/C,UAAU,IAAI,UAAU,GAAG,QAAQ,CAAA;IACpC,CAAC;IAED,4BAA4B;IAC5B,MAAM,cAAc,GAAG,kBAAkB,CAAC,WAAW,CAAC,CAAA;IAEtD,IAAI,cAAc,KAAK,CAAC,IAAI,SAAS,CAAC,SAAS,KAAK,CAAC,EAAE,CAAC;QACvD,OAAO,CAAC,CAAA;IACT,CAAC;IAED,OAAO,UAAU,GAAG,CAAC,cAAc,GAAG,SAAS,CAAC,SAAS,CAAC,CAAA;AAC3D,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,YAAY,CACjC,KAAa,EACb,GAAwB;IAExB,MAAM,KAAK,GAAG,MAAM,QAAQ,CAAC,KAAK,CAAC,CAAA;IACnC,OAAO,sBAAsB,CAAC,KAAK,EAAE,GAAG,CAAC,CAAA;AAC1C,CAAC;AAED;;GAEG;AACH,SAAS,sBAAsB,CAAC,MAAgB,EAAE,GAAwB;IACzE,MAAM,WAAW,GAAG,IAAI,GAAG,EAAkB,CAAA;IAE7C,KAAK,MAAM,IAAI,IAAI,MAAM,EAAE,CAAC;QAC3B,MAAM,QAAQ,GAAG,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QACnC,IAAI,QAAQ,GAAG,CAAC,EAAE,CAAC;YAClB,WAAW,CAAC,GAAG,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAA;QAChC,CAAC;IACF,CAAC;IAED,OAAO,WAAW,CAAA;AACnB,CAAC;AAaD;;;;;;;;;;;GAWG;AACH,MAAM,CAAC,KAAK,UAAU,0BAA0B,CAC/C,KAAa,EACb,UAAiC,EACjC,GAAwB,EACxB,UAII,EAAE;IAEN,MAAM,EAAE,KAAK,GAAG,EAAE,EAAE,QAAQ,GAAG,CAAC,EAAE,GAAG,OAAO,CAAA;IAE5C,4BAA4B;IAC5B,MAAM,WAAW,GAAG,MAAM,oBAAoB,CAAC,KAAK,CAAC,CAAA;IAErD,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC9B,OAAO,EAAE,CAAA;IACV,CAAC;IAED,oDAAoD;IACpD,mFAAmF;IACnF,IAAI,YAAY,GAAG,OAAO,CAAC,YAAY,CAAA;IACvC,IAAI,CAAC,YAAY,IAAI,YAAY,KAAK,CAAC,EAAE,CAAC;QACzC,MAAM,WAAW,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,UAAU,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC,CAAA;QAC/E,YAAY,GAAG,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,WAAW,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAA;IAC3E,CAAC;IACD,8DAA8D;IAC9D,YAAY,GAAG,IAAI,CAAC,GAAG,CAAC,YAAY,EAAE,CAAC,CAAC,CAAA;IAExC,kCAAkC;IAClC,MAAM,OAAO,GAAkE,EAAE,CAAA;IACjF,IAAI,YAAY,GAAG,QAAQ,CAAA;IAE3B,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;QACpC,oBAAoB;QACpB,MAAM,YAAY,GAAa,EAAE,CAAA;QACjC,KAAK,MAAM,IAAI,IAAI,WAAW,EAAE,CAAC;YAChC,IAAI,SAAS,CAAC,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;gBACtC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;YACxB,CAAC;QACF,CAAC;QAED,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC;YAAE,SAAQ;QAEvC,eAAe;QACf,MAAM,MAAM,GAAG,SAAS,CAAC,UAAU,IAAI,CAAC,CAAA;QACxC,IAAI,KAAK,GAAG,CAAC,CAAA;QAEb,KAAK,MAAM,IAAI,IAAI,YAAY,EAAE,CAAC;YACjC,MAAM,OAAO,GAAG,SAAS,CAAC,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;YAChD,IAAI,CAAC,OAAO;gBAAE,SAAQ;YAEtB,MAAM,QAAQ,GAAG,OAAO,CAAC,OAAO,CAAA;YAChC,MAAM,OAAO,GAAG,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;YAElC,gFAAgF;YAChF,MAAM,SAAS,GAAG,QAAQ,GAAG,CAAC,OAAO,GAAG,CAAC,CAAC,CAAA;YAC1C,MAAM,WAAW,GAAG,QAAQ,GAAG,OAAO,GAAG,CAAC,CAAC,GAAG,MAAM,GAAG,CAAC,MAAM,GAAG,MAAM,CAAC,GAAG,YAAY,CAAC,CAAA;YACxF,KAAK,IAAI,OAAO,GAAG,CAAC,SAAS,GAAG,WAAW,CAAC,CAAA;QAC7C,CAAC;QAED,IAAI,KAAK,GAAG,YAAY;YAAE,SAAQ;QAElC,OAAO,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,UAAU,SAAS,CAAC,IAAI,EAAE,EAAE,KAAK,EAAE,YAAY,EAAE,CAAC,CAAA;QAEtE,0DAA0D;QAC1D,IAAI,OAAO,CAAC,MAAM,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;YACjC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAA;YACzC,OAAO,CAAC,MAAM,GAAG,KAAK,CAAA;YACtB,YAAY,GAAG,OAAO,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,KAAK,CAAA;QACjD,CAAC;IACF,CAAC;IAED,OAAO,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAA;AACjE,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CAAC,KAAa;IACjD,OAAO,oBAAoB,CAAC,KAAK,CAAC,CAAA;AACnC,CAAC;AAED;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,eAAe,CACpC,KAAa,EACb,KAAkB,EAClB,UAGI,EAAE;IAEN,MAAM,EAAE,KAAK,GAAG,EAAE,EAAE,QAAQ,GAAG,CAAC,EAAE,GAAG,OAAO,CAAA;IAE5C,sDAAsD;IACtD,MAAM,WAAW,GAAG,MAAM,oBAAoB,CAAC,KAAK,CAAC,CAAA;IAErD,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC9B,OAAO,EAAE,CAAA;IACV,CAAC;IAED,+CAA+C;IAC/C,IAAI,WAAW,GAAG,CAAC,CAAA;IACnB,KAAK,MAAM,GAAG,IAAI,KAAK,CAAC,SAAS,EAAE,CAAC;QACnC,KAAK,MAAM,IAAI,IAAI,GAAG,CAAC,QAAQ,CAAC,MAAM,EAAE,EAAE,CAAC;YAC1C,WAAW,IAAI,IAAI,CAAA;QACpB,CAAC;IACF,CAAC;IACD,MAAM,YAAY,GAAG,KAAK,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,WAAW,GAAG,KAAK,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAA;IAE1F,6BAA6B;IAC7B,MAAM,OAAO,GAAkE,EAAE,CAAA;IACjF,IAAI,YAAY,GAAG,QAAQ,CAAA;IAE3B,KAAK,MAAM,GAAG,IAAI,KAAK,CAAC,SAAS,EAAE,CAAC;QACnC,oBAAoB;QACpB,MAAM,YAAY,GAAa,EAAE,CAAA;QACjC,KAAK,MAAM,KAAK,IAAI,WAAW,EAAE,CAAC;YACjC,IAAI,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC;gBAC7B,YAAY,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;YACzB,CAAC;QACF,CAAC;QAED,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC;YAAE,SAAQ;QAEvC,0DAA0D;QAC1D,IAAI,MAAM,GAAG,CAAC,CAAA;QACd,KAAK,MAAM,IAAI,IAAI,GAAG,CAAC,QAAQ,CAAC,MAAM,EAAE,EAAE,CAAC;YAC1C,MAAM,IAAI,IAAI,CAAA;QACf,CAAC;QACD,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,EAAE,CAAC,CAAC,CAAA,CAAC,yBAAyB;QAEtD,eAAe;QACf,IAAI,KAAK,GAAG,CAAC,CAAA;QACb,KAAK,MAAM,IAAI,IAAI,YAAY,EAAE,CAAC;YACjC,MAAM,QAAQ,GAAG,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;YAC5C,MAAM,OAAO,GAAG,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;YAExC,kBAAkB;YAClB,MAAM,SAAS,GAAG,QAAQ,GAAG,CAAC,OAAO,GAAG,CAAC,CAAC,CAAA;YAC1C,MAAM,WAAW,GAAG,QAAQ,GAAG,OAAO,GAAG,CAAC,CAAC,GAAG,MAAM,GAAG,CAAC,MAAM,GAAG,MAAM,CAAC,GAAG,YAAY,CAAC,CAAA;YACxF,KAAK,IAAI,OAAO,GAAG,CAAC,SAAS,GAAG,WAAW,CAAC,CAAA;QAC7C,CAAC;QAED,IAAI,KAAK,GAAG,YAAY;YAAE,SAAQ;QAElC,OAAO,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,GAAG,CAAC,GAAG,EAAE,KAAK,EAAE,YAAY,EAAE,CAAC,CAAA;QAEnD,iCAAiC;QACjC,IAAI,OAAO,CAAC,MAAM,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;YACjC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAA;YACzC,OAAO,CAAC,MAAM,GAAG,KAAK,CAAA;YACtB,YAAY,GAAG,OAAO,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,KAAK,CAAA;QACjD,CAAC;IACF,CAAC;IAED,OAAO,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAA;AACjE,CAAC"}
@@ -0,0 +1,5 @@
1
+ /**
2
+ * Tests for TF-IDF search functionality (StarCoder2 tokenizer)
3
+ */
4
+ export {};
5
+ //# sourceMappingURL=tfidf.test.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tfidf.test.d.ts","sourceRoot":"","sources":["../src/tfidf.test.ts"],"names":[],"mappings":"AAAA;;GAEG"}