@matperez/coderag 0.1.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. package/README.md +154 -0
  2. package/dist/.tsbuildinfo +1 -0
  3. package/dist/ast-chunking.d.ts +40 -0
  4. package/dist/ast-chunking.d.ts.map +1 -0
  5. package/dist/ast-chunking.js +88 -0
  6. package/dist/ast-chunking.js.map +1 -0
  7. package/dist/ast-chunking.test.d.ts +5 -0
  8. package/dist/ast-chunking.test.d.ts.map +1 -0
  9. package/dist/ast-chunking.test.js +173 -0
  10. package/dist/ast-chunking.test.js.map +1 -0
  11. package/dist/code-tokenizer.d.ts +62 -0
  12. package/dist/code-tokenizer.d.ts.map +1 -0
  13. package/dist/code-tokenizer.js +129 -0
  14. package/dist/code-tokenizer.js.map +1 -0
  15. package/dist/code-tokenizer.test.d.ts +5 -0
  16. package/dist/code-tokenizer.test.d.ts.map +1 -0
  17. package/dist/code-tokenizer.test.js +96 -0
  18. package/dist/code-tokenizer.test.js.map +1 -0
  19. package/dist/db/client-pg.d.ts +16 -0
  20. package/dist/db/client-pg.d.ts.map +1 -0
  21. package/dist/db/client-pg.js +38 -0
  22. package/dist/db/client-pg.js.map +1 -0
  23. package/dist/db/client.d.ts +36 -0
  24. package/dist/db/client.d.ts.map +1 -0
  25. package/dist/db/client.js +81 -0
  26. package/dist/db/client.js.map +1 -0
  27. package/dist/db/migrations-pg.d.ts +6 -0
  28. package/dist/db/migrations-pg.d.ts.map +1 -0
  29. package/dist/db/migrations-pg.js +88 -0
  30. package/dist/db/migrations-pg.js.map +1 -0
  31. package/dist/db/migrations.d.ts +9 -0
  32. package/dist/db/migrations.d.ts.map +1 -0
  33. package/dist/db/migrations.js +164 -0
  34. package/dist/db/migrations.js.map +1 -0
  35. package/dist/db/schema-pg.d.ts +611 -0
  36. package/dist/db/schema-pg.d.ts.map +1 -0
  37. package/dist/db/schema-pg.js +66 -0
  38. package/dist/db/schema-pg.js.map +1 -0
  39. package/dist/db/schema.d.ts +630 -0
  40. package/dist/db/schema.d.ts.map +1 -0
  41. package/dist/db/schema.js +85 -0
  42. package/dist/db/schema.js.map +1 -0
  43. package/dist/embeddings.d.ts +92 -0
  44. package/dist/embeddings.d.ts.map +1 -0
  45. package/dist/embeddings.js +275 -0
  46. package/dist/embeddings.js.map +1 -0
  47. package/dist/embeddings.test.d.ts +5 -0
  48. package/dist/embeddings.test.d.ts.map +1 -0
  49. package/dist/embeddings.test.js +255 -0
  50. package/dist/embeddings.test.js.map +1 -0
  51. package/dist/hybrid-search.d.ts +47 -0
  52. package/dist/hybrid-search.d.ts.map +1 -0
  53. package/dist/hybrid-search.js +215 -0
  54. package/dist/hybrid-search.js.map +1 -0
  55. package/dist/hybrid-search.test.d.ts +5 -0
  56. package/dist/hybrid-search.test.d.ts.map +1 -0
  57. package/dist/hybrid-search.test.js +252 -0
  58. package/dist/hybrid-search.test.js.map +1 -0
  59. package/dist/incremental-tfidf.d.ts +77 -0
  60. package/dist/incremental-tfidf.d.ts.map +1 -0
  61. package/dist/incremental-tfidf.js +248 -0
  62. package/dist/incremental-tfidf.js.map +1 -0
  63. package/dist/incremental-tfidf.test.d.ts +5 -0
  64. package/dist/incremental-tfidf.test.d.ts.map +1 -0
  65. package/dist/incremental-tfidf.test.js +276 -0
  66. package/dist/incremental-tfidf.test.js.map +1 -0
  67. package/dist/index.d.ts +18 -0
  68. package/dist/index.d.ts.map +1 -0
  69. package/dist/index.js +19 -0
  70. package/dist/index.js.map +1 -0
  71. package/dist/indexer.d.ts +205 -0
  72. package/dist/indexer.d.ts.map +1 -0
  73. package/dist/indexer.js +1331 -0
  74. package/dist/indexer.js.map +1 -0
  75. package/dist/indexer.test.d.ts +12 -0
  76. package/dist/indexer.test.d.ts.map +1 -0
  77. package/dist/indexer.test.js +471 -0
  78. package/dist/indexer.test.js.map +1 -0
  79. package/dist/language-config.d.ts +54 -0
  80. package/dist/language-config.d.ts.map +1 -0
  81. package/dist/language-config.js +75 -0
  82. package/dist/language-config.js.map +1 -0
  83. package/dist/search-cache.d.ts +63 -0
  84. package/dist/search-cache.d.ts.map +1 -0
  85. package/dist/search-cache.js +118 -0
  86. package/dist/search-cache.js.map +1 -0
  87. package/dist/search-cache.test.d.ts +5 -0
  88. package/dist/search-cache.test.d.ts.map +1 -0
  89. package/dist/search-cache.test.js +194 -0
  90. package/dist/search-cache.test.js.map +1 -0
  91. package/dist/storage-factory.d.ts +11 -0
  92. package/dist/storage-factory.d.ts.map +1 -0
  93. package/dist/storage-factory.js +17 -0
  94. package/dist/storage-factory.js.map +1 -0
  95. package/dist/storage-persistent-pg.d.ts +75 -0
  96. package/dist/storage-persistent-pg.d.ts.map +1 -0
  97. package/dist/storage-persistent-pg.js +579 -0
  98. package/dist/storage-persistent-pg.js.map +1 -0
  99. package/dist/storage-persistent-pg.test.d.ts +7 -0
  100. package/dist/storage-persistent-pg.test.d.ts.map +1 -0
  101. package/dist/storage-persistent-pg.test.js +90 -0
  102. package/dist/storage-persistent-pg.test.js.map +1 -0
  103. package/dist/storage-persistent-types.d.ts +110 -0
  104. package/dist/storage-persistent-types.d.ts.map +1 -0
  105. package/dist/storage-persistent-types.js +5 -0
  106. package/dist/storage-persistent-types.js.map +1 -0
  107. package/dist/storage-persistent.d.ts +231 -0
  108. package/dist/storage-persistent.d.ts.map +1 -0
  109. package/dist/storage-persistent.js +897 -0
  110. package/dist/storage-persistent.js.map +1 -0
  111. package/dist/storage-persistent.test.d.ts +5 -0
  112. package/dist/storage-persistent.test.d.ts.map +1 -0
  113. package/dist/storage-persistent.test.js +325 -0
  114. package/dist/storage-persistent.test.js.map +1 -0
  115. package/dist/storage.d.ts +63 -0
  116. package/dist/storage.d.ts.map +1 -0
  117. package/dist/storage.js +67 -0
  118. package/dist/storage.js.map +1 -0
  119. package/dist/storage.test.d.ts +5 -0
  120. package/dist/storage.test.d.ts.map +1 -0
  121. package/dist/storage.test.js +157 -0
  122. package/dist/storage.test.js.map +1 -0
  123. package/dist/tfidf.d.ts +97 -0
  124. package/dist/tfidf.d.ts.map +1 -0
  125. package/dist/tfidf.js +308 -0
  126. package/dist/tfidf.js.map +1 -0
  127. package/dist/tfidf.test.d.ts +5 -0
  128. package/dist/tfidf.test.d.ts.map +1 -0
  129. package/dist/tfidf.test.js +181 -0
  130. package/dist/tfidf.test.js.map +1 -0
  131. package/dist/utils.d.ts +61 -0
  132. package/dist/utils.d.ts.map +1 -0
  133. package/dist/utils.js +264 -0
  134. package/dist/utils.js.map +1 -0
  135. package/dist/utils.test.d.ts +5 -0
  136. package/dist/utils.test.d.ts.map +1 -0
  137. package/dist/utils.test.js +94 -0
  138. package/dist/utils.test.js.map +1 -0
  139. package/dist/vector-storage.d.ts +120 -0
  140. package/dist/vector-storage.d.ts.map +1 -0
  141. package/dist/vector-storage.js +264 -0
  142. package/dist/vector-storage.js.map +1 -0
  143. package/dist/vector-storage.test.d.ts +5 -0
  144. package/dist/vector-storage.test.d.ts.map +1 -0
  145. package/dist/vector-storage.test.js +345 -0
  146. package/dist/vector-storage.test.js.map +1 -0
  147. package/package.json +85 -0
@@ -0,0 +1,75 @@
1
+ /**
2
+ * Persistent storage implementation using PostgreSQL + Drizzle ORM (pg Pool)
3
+ */
4
+ import type { CodebaseFile } from './storage.js';
5
+ import type { ChunkData, PersistentChunkStorage, PostgresDbConfig, SearchByTermsResultItem, StoredChunk } from './storage-persistent-types.js';
6
+ export declare class PostgresPersistentStorage implements PersistentChunkStorage {
7
+ private dbInstance;
8
+ private initPromise;
9
+ constructor(config: PostgresDbConfig);
10
+ getBackend(): 'postgres';
11
+ private initialize;
12
+ private ensureInit;
13
+ private getOne;
14
+ storeFile(file: CodebaseFile): Promise<void>;
15
+ storeFiles(files: CodebaseFile[]): Promise<void>;
16
+ getFile(path: string): Promise<CodebaseFile | null>;
17
+ getAllFiles(): Promise<CodebaseFile[]>;
18
+ deleteFile(path: string): Promise<void>;
19
+ clear(): Promise<void>;
20
+ storeChunks(filePath: string, chunks: ChunkData[]): Promise<number[]>;
21
+ storeManyChunks(fileChunks: Array<{
22
+ filePath: string;
23
+ chunks: ChunkData[];
24
+ }>): Promise<Map<string, number[]>>;
25
+ getChunksForFile(filePath: string): Promise<StoredChunk[]>;
26
+ getChunkCount(): Promise<number>;
27
+ count(): Promise<number>;
28
+ exists(path: string): Promise<boolean>;
29
+ storeChunkVectors(chunkId: number, terms: Map<string, {
30
+ tf: number;
31
+ tfidf: number;
32
+ rawFreq: number;
33
+ }>, tokenCount?: number): Promise<void>;
34
+ storeManyChunkVectors(chunkVectors: Array<{
35
+ chunkId: number;
36
+ terms: Map<string, {
37
+ tf: number;
38
+ tfidf: number;
39
+ rawFreq: number;
40
+ }>;
41
+ tokenCount?: number;
42
+ }>): Promise<void>;
43
+ storeIdfScores(idf: Map<string, number>, docFreq: Map<string, number>): Promise<void>;
44
+ getIdfScores(): Promise<Map<string, number>>;
45
+ getChunkVectors(chunkId: number): Promise<Map<string, {
46
+ tf: number;
47
+ tfidf: number;
48
+ rawFreq: number;
49
+ }> | null>;
50
+ getIdfScoresForTerms(terms: string[]): Promise<Map<string, number>>;
51
+ getTotalDocuments(): Promise<number>;
52
+ getAllFileMetadata(): Promise<Map<string, {
53
+ mtime: number;
54
+ hash: string;
55
+ }>>;
56
+ deleteFiles(paths: string[]): Promise<void>;
57
+ setMetadata(key: string, value: string): Promise<void>;
58
+ getMetadata(key: string): Promise<string | null>;
59
+ getAverageDocLength(): Promise<number>;
60
+ updateAverageDocLength(): Promise<number>;
61
+ rebuildIdfScoresFromVectors(): Promise<void>;
62
+ recalculateTfidfScores(): Promise<void>;
63
+ updateChunkMagnitudes(): Promise<void>;
64
+ getTermsForFiles(paths: string[]): Promise<Set<string>>;
65
+ searchByTerms(queryTerms: string[], options?: {
66
+ limit?: number;
67
+ }): Promise<SearchByTermsResultItem[]>;
68
+ getAllChunks(): Promise<StoredChunk[]>;
69
+ close(): void;
70
+ }
71
+ /**
72
+ * Create Postgres-backed persistent storage (used by createPersistentStorage when backend === 'postgres').
73
+ */
74
+ export declare function createPostgresPersistentStorage(config: PostgresDbConfig): PersistentChunkStorage;
75
+ //# sourceMappingURL=storage-persistent-pg.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"storage-persistent-pg.d.ts","sourceRoot":"","sources":["../src/storage-persistent-pg.ts"],"names":[],"mappings":"AAAA;;GAEG;AAKH,OAAO,KAAK,EAAE,YAAY,EAAW,MAAM,cAAc,CAAA;AACzD,OAAO,KAAK,EACX,SAAS,EACT,sBAAsB,EACtB,gBAAgB,EAChB,uBAAuB,EACvB,WAAW,EACX,MAAM,+BAA+B,CAAA;AAEtC,qBAAa,yBAA0B,YAAW,sBAAsB;IACvE,OAAO,CAAC,UAAU,CAAe;IACjC,OAAO,CAAC,WAAW,CAAe;gBAEtB,MAAM,EAAE,gBAAgB;IAIpC,UAAU,IAAI,UAAU;YAIV,UAAU;YAIV,UAAU;YAIV,MAAM;IAKd,SAAS,CAAC,IAAI,EAAE,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC;IA6B5C,UAAU,CAAC,KAAK,EAAE,YAAY,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC;IA+BhD,OAAO,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,GAAG,IAAI,CAAC;IAiBnD,WAAW,IAAI,OAAO,CAAC,YAAY,EAAE,CAAC;IActC,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAMvC,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAUtB,WAAW,CAAC,QAAQ,EAAE,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IA2BrE,eAAe,CACpB,UAAU,EAAE,KAAK,CAAC;QAAE,QAAQ,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,SAAS,EAAE,CAAA;KAAE,CAAC,GAC1D,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE,MAAM,EAAE,CAAC,CAAC;IAwC3B,gBAAgB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC;IA6B1D,aAAa,IAAI,OAAO,CAAC,MAAM,CAAC;IAOhC,KAAK,IAAI,OAAO,CAAC,MAAM,CAAC;IAOxB,MAAM,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC;IAStC,iBAAiB,CACtB,OAAO,EAAE,MAAM,EACf,KAAK,EAAE,GAAG,CAAC,MAAM,EAAE;QAAE,EAAE,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAA;KAAE,CAAC,EAClE,UAAU,CAAC,EAAE,MAAM,GACjB,OAAO,CAAC,IAAI,CAAC;IAwBV,qBAAqB,CAC1B,YAAY,EAAE,KAAK,CAAC;QACnB,OAAO,EAAE,MAAM,CAAA;QACf,KAAK,EAAE,GAAG,CAAC,MAAM,EAAE;YAAE,EAAE,EAAE,MAAM,CAAC;YAAC,KAAK,EAAE,MAAM,CAAC;YAAC,OAAO,EAAE,MAAM,CAAA;SAAE,CAAC,CAAA;QAClE,UAAU,CAAC,EAAE,MAAM,CAAA;KACnB,CAAC,GACA,OAAO,CAAC,IAAI,CAAC;IAmCV,cAAc,CAAC,GAAG,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,OAAO,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;IAgBrF,YAAY,IAAI,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAS5C,eAAe,CACpB,OAAO,EAAE,MAAM,GACb,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE;QAAE,EAAE,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAA;KAAE,CAAC,GAAG,IAAI,CAAC;IAexE,oBAAoB,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAUnE,iBAAiB,IAAI,OAAO,CAAC,MAAM,CAAC;IAIpC,kBAAkB,IAAI,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IAa3E,WAAW,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC;IAW3C,WAAW,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAYtD,WAAW,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC;IAShD,mBAAmB,IAAI,OAAO,CAAC,MAAM,CAAC;IAetC,sBAAsB,IAAI,OAAO,CAAC,MAAM,CAAC;IAazC,2BAA2B,IAAI,OAAO,CAAC,IAAI,CAAC;IA4B5C,sBAAsB,IAAI,OAAO,CAAC,IAAI,CAAC;IAWvC,qBAAqB,IAAI,OAAO,CAAC,IAAI,CAAC;IAWtC,gBAAgB,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;IAgBvD,aAAa,CAClB,UAAU,EAAE,MAAM,EAAE,EACpB,OAAO,GAAE;QAAE,KAAK,CAAC,EAAE,MAAM,CAAA;KAAO,GAC9B,OAAO,CAAC,uBAAuB,EAAE,CAAC;IAgF/B,YAAY,IAAI,OAAO,CAAC,WAAW,EAAE,CAAC;IA4B5C,KAAK,IAAI,IAAI;CAGb;AAED;;GAEG;AACH,wBAAgB,+BAA+B,CAAC,MAAM,EAAE,gBAAgB,GAAG,sBAAsB,CAEhG"}
@@ -0,0 +1,579 @@
1
+ /**
2
+ * Persistent storage implementation using PostgreSQL + Drizzle ORM (pg Pool)
3
+ */
4
+ import { desc, eq, inArray, sql } from 'drizzle-orm';
5
+ import { createPostgresDb } from './db/client-pg.js';
6
+ import * as schema from './db/schema-pg.js';
7
+ export class PostgresPersistentStorage {
8
+ dbInstance;
9
+ initPromise;
10
+ constructor(config) {
11
+ this.initPromise = this.initialize(config);
12
+ }
13
+ getBackend() {
14
+ return 'postgres';
15
+ }
16
+ async initialize(config) {
17
+ this.dbInstance = await createPostgresDb(config);
18
+ }
19
+ async ensureInit() {
20
+ await this.initPromise;
21
+ }
22
+ async getOne(promise) {
23
+ const rows = await promise;
24
+ return rows[0];
25
+ }
26
+ async storeFile(file) {
27
+ await this.ensureInit();
28
+ const { db } = this.dbInstance;
29
+ const mtime = typeof file.mtime === 'number' ? file.mtime : file.mtime.getTime();
30
+ const values = {
31
+ path: file.path,
32
+ content: file.content,
33
+ hash: file.hash,
34
+ size: file.size,
35
+ mtime,
36
+ ...(file.language ? { language: file.language } : {}),
37
+ indexedAt: Date.now(),
38
+ };
39
+ await db
40
+ .insert(schema.files)
41
+ .values(values)
42
+ .onConflictDoUpdate({
43
+ target: schema.files.path,
44
+ set: {
45
+ content: values.content,
46
+ hash: values.hash,
47
+ size: values.size,
48
+ mtime: values.mtime,
49
+ indexedAt: values.indexedAt,
50
+ ...(values.language != null ? { language: values.language } : {}),
51
+ },
52
+ });
53
+ }
54
+ async storeFiles(files) {
55
+ if (files.length === 0)
56
+ return;
57
+ await this.ensureInit();
58
+ const { db } = this.dbInstance;
59
+ for (const file of files) {
60
+ const mtime = typeof file.mtime === 'number' ? file.mtime : file.mtime.getTime();
61
+ await db
62
+ .insert(schema.files)
63
+ .values({
64
+ path: file.path,
65
+ content: file.content,
66
+ hash: file.hash,
67
+ size: file.size,
68
+ mtime,
69
+ indexedAt: Date.now(),
70
+ ...(file.language != null ? { language: file.language } : {}),
71
+ })
72
+ .onConflictDoUpdate({
73
+ target: schema.files.path,
74
+ set: {
75
+ content: file.content,
76
+ hash: file.hash,
77
+ size: file.size,
78
+ mtime,
79
+ indexedAt: Date.now(),
80
+ ...(file.language != null ? { language: file.language } : {}),
81
+ },
82
+ });
83
+ }
84
+ }
85
+ async getFile(path) {
86
+ await this.ensureInit();
87
+ const { db } = this.dbInstance;
88
+ const result = await this.getOne(db.select().from(schema.files).where(eq(schema.files.path, path)).limit(1));
89
+ if (!result)
90
+ return null;
91
+ return {
92
+ path: result.path,
93
+ content: result.content,
94
+ hash: result.hash,
95
+ size: result.size,
96
+ mtime: result.mtime,
97
+ language: result.language ?? undefined,
98
+ };
99
+ }
100
+ async getAllFiles() {
101
+ await this.ensureInit();
102
+ const { db } = this.dbInstance;
103
+ const results = await db.select().from(schema.files);
104
+ return results.map((file) => ({
105
+ path: file.path,
106
+ content: file.content,
107
+ hash: file.hash,
108
+ size: file.size,
109
+ mtime: file.mtime,
110
+ language: file.language ?? undefined,
111
+ }));
112
+ }
113
+ async deleteFile(path) {
114
+ await this.ensureInit();
115
+ const { db } = this.dbInstance;
116
+ await db.delete(schema.files).where(eq(schema.files.path, path));
117
+ }
118
+ async clear() {
119
+ await this.ensureInit();
120
+ const { db } = this.dbInstance;
121
+ await db.delete(schema.chunks);
122
+ await db.delete(schema.documentVectors);
123
+ await db.delete(schema.files);
124
+ await db.delete(schema.idfScores);
125
+ await db.delete(schema.indexMetadata);
126
+ }
127
+ async storeChunks(filePath, chunks) {
128
+ await this.ensureInit();
129
+ const { db } = this.dbInstance;
130
+ const file = await this.getOne(db.select().from(schema.files).where(eq(schema.files.path, filePath)).limit(1));
131
+ if (!file)
132
+ throw new Error(`File not found: ${filePath}`);
133
+ await db.delete(schema.chunks).where(eq(schema.chunks.fileId, file.id));
134
+ const chunkIds = [];
135
+ for (const chunk of chunks) {
136
+ const insertValues = {
137
+ fileId: file.id,
138
+ content: chunk.content,
139
+ type: chunk.type,
140
+ startLine: chunk.startLine,
141
+ endLine: chunk.endLine,
142
+ };
143
+ if (chunk.metadata)
144
+ insertValues.metadata = JSON.stringify(chunk.metadata);
145
+ const insertResult = await db
146
+ .insert(schema.chunks)
147
+ .values(insertValues)
148
+ .returning({ id: schema.chunks.id });
149
+ if (insertResult[0])
150
+ chunkIds.push(insertResult[0].id);
151
+ }
152
+ return chunkIds;
153
+ }
154
+ async storeManyChunks(fileChunks) {
155
+ await this.ensureInit();
156
+ const { db } = this.dbInstance;
157
+ const result = new Map();
158
+ if (fileChunks.length === 0)
159
+ return result;
160
+ const filePaths = fileChunks.map((fc) => fc.filePath);
161
+ const files = await db
162
+ .select({ id: schema.files.id, path: schema.files.path })
163
+ .from(schema.files)
164
+ .where(inArray(schema.files.path, filePaths));
165
+ const fileIdMap = new Map();
166
+ for (const file of files)
167
+ fileIdMap.set(file.path, file.id);
168
+ const fileIds = Array.from(fileIdMap.values());
169
+ if (fileIds.length > 0) {
170
+ await db.delete(schema.chunks).where(inArray(schema.chunks.fileId, fileIds));
171
+ }
172
+ for (const fc of fileChunks) {
173
+ const fileId = fileIdMap.get(fc.filePath);
174
+ if (!fileId)
175
+ continue;
176
+ const chunkIds = [];
177
+ for (const chunk of fc.chunks) {
178
+ const insertValues = {
179
+ fileId,
180
+ content: chunk.content,
181
+ type: chunk.type,
182
+ startLine: chunk.startLine,
183
+ endLine: chunk.endLine,
184
+ };
185
+ if (chunk.metadata)
186
+ insertValues.metadata = JSON.stringify(chunk.metadata);
187
+ const insertResult = await db
188
+ .insert(schema.chunks)
189
+ .values(insertValues)
190
+ .returning({ id: schema.chunks.id });
191
+ if (insertResult[0])
192
+ chunkIds.push(insertResult[0].id);
193
+ }
194
+ result.set(fc.filePath, chunkIds);
195
+ }
196
+ return result;
197
+ }
198
+ async getChunksForFile(filePath) {
199
+ await this.ensureInit();
200
+ const { db } = this.dbInstance;
201
+ const results = await db
202
+ .select({
203
+ id: schema.chunks.id,
204
+ fileId: schema.chunks.fileId,
205
+ content: schema.chunks.content,
206
+ type: schema.chunks.type,
207
+ startLine: schema.chunks.startLine,
208
+ endLine: schema.chunks.endLine,
209
+ metadata: schema.chunks.metadata,
210
+ filePath: schema.files.path,
211
+ })
212
+ .from(schema.chunks)
213
+ .innerJoin(schema.files, eq(schema.chunks.fileId, schema.files.id))
214
+ .where(eq(schema.files.path, filePath));
215
+ return results.map((r) => ({
216
+ id: r.id,
217
+ fileId: r.fileId,
218
+ filePath: r.filePath,
219
+ content: r.content,
220
+ type: r.type,
221
+ startLine: r.startLine,
222
+ endLine: r.endLine,
223
+ metadata: r.metadata ? JSON.parse(r.metadata) : undefined,
224
+ }));
225
+ }
226
+ async getChunkCount() {
227
+ await this.ensureInit();
228
+ const { db } = this.dbInstance;
229
+ const rows = await db.select({ count: sql `count(*)::int` }).from(schema.chunks);
230
+ return Number(rows[0]?.count ?? 0);
231
+ }
232
+ async count() {
233
+ await this.ensureInit();
234
+ const { db } = this.dbInstance;
235
+ const rows = await db.select({ count: sql `count(*)::int` }).from(schema.files);
236
+ return Number(rows[0]?.count ?? 0);
237
+ }
238
+ async exists(path) {
239
+ await this.ensureInit();
240
+ const { db } = this.dbInstance;
241
+ const result = await this.getOne(db.select().from(schema.files).where(eq(schema.files.path, path)).limit(1));
242
+ return result !== undefined;
243
+ }
244
+ async storeChunkVectors(chunkId, terms, tokenCount) {
245
+ await this.ensureInit();
246
+ const { db } = this.dbInstance;
247
+ await db.delete(schema.documentVectors).where(eq(schema.documentVectors.chunkId, chunkId));
248
+ if (tokenCount !== undefined) {
249
+ await this.dbInstance.pool.query('UPDATE chunks SET token_count = $1 WHERE id = $2', [
250
+ tokenCount,
251
+ chunkId,
252
+ ]);
253
+ }
254
+ const BATCH_SIZE = 500;
255
+ const vectors = Array.from(terms.entries()).map(([term, scores]) => ({
256
+ chunkId,
257
+ term,
258
+ tf: scores.tf,
259
+ tfidf: scores.tfidf,
260
+ rawFreq: scores.rawFreq,
261
+ }));
262
+ for (let i = 0; i < vectors.length; i += BATCH_SIZE) {
263
+ const batch = vectors.slice(i, i + BATCH_SIZE);
264
+ await db.insert(schema.documentVectors).values(batch);
265
+ }
266
+ }
267
+ async storeManyChunkVectors(chunkVectors) {
268
+ if (chunkVectors.length === 0)
269
+ return;
270
+ await this.ensureInit();
271
+ const { db } = this.dbInstance;
272
+ const chunkIds = chunkVectors.map((cv) => cv.chunkId);
273
+ if (chunkIds.length > 0) {
274
+ await db.delete(schema.documentVectors).where(inArray(schema.documentVectors.chunkId, chunkIds));
275
+ }
276
+ for (const cv of chunkVectors) {
277
+ if (cv.tokenCount !== undefined) {
278
+ await this.dbInstance.pool.query('UPDATE chunks SET token_count = $1 WHERE id = $2', [cv.tokenCount, cv.chunkId]);
279
+ }
280
+ }
281
+ const allVectors = [];
282
+ for (const cv of chunkVectors) {
283
+ for (const [term, scores] of cv.terms.entries()) {
284
+ allVectors.push({
285
+ chunkId: cv.chunkId,
286
+ term,
287
+ tf: scores.tf,
288
+ tfidf: scores.tfidf,
289
+ rawFreq: scores.rawFreq,
290
+ });
291
+ }
292
+ }
293
+ const batchSize = 500;
294
+ for (let i = 0; i < allVectors.length; i += batchSize) {
295
+ const batch = allVectors.slice(i, i + batchSize);
296
+ if (batch.length > 0)
297
+ await db.insert(schema.documentVectors).values(batch);
298
+ }
299
+ }
300
+ async storeIdfScores(idf, docFreq) {
301
+ await this.ensureInit();
302
+ const { db } = this.dbInstance;
303
+ await db.delete(schema.idfScores);
304
+ const BATCH_SIZE = 300;
305
+ const scores = Array.from(idf.entries()).map(([term, idfScore]) => ({
306
+ term,
307
+ idf: idfScore,
308
+ documentFrequency: docFreq.get(term) ?? 0,
309
+ }));
310
+ for (let i = 0; i < scores.length; i += BATCH_SIZE) {
311
+ const batch = scores.slice(i, i + BATCH_SIZE);
312
+ await db.insert(schema.idfScores).values(batch);
313
+ }
314
+ }
315
+ async getIdfScores() {
316
+ await this.ensureInit();
317
+ const { db } = this.dbInstance;
318
+ const scores = await db.select().from(schema.idfScores);
319
+ const idf = new Map();
320
+ for (const score of scores)
321
+ idf.set(score.term, score.idf);
322
+ return idf;
323
+ }
324
+ async getChunkVectors(chunkId) {
325
+ await this.ensureInit();
326
+ const { db } = this.dbInstance;
327
+ const vectors = await db
328
+ .select()
329
+ .from(schema.documentVectors)
330
+ .where(eq(schema.documentVectors.chunkId, chunkId));
331
+ if (vectors.length === 0)
332
+ return null;
333
+ const map = new Map();
334
+ for (const v of vectors) {
335
+ map.set(v.term, { tf: v.tf, tfidf: v.tfidf, rawFreq: v.rawFreq });
336
+ }
337
+ return map;
338
+ }
339
+ async getIdfScoresForTerms(terms) {
340
+ if (terms.length === 0)
341
+ return new Map();
342
+ await this.ensureInit();
343
+ const { db } = this.dbInstance;
344
+ const scores = await db.select().from(schema.idfScores).where(inArray(schema.idfScores.term, terms));
345
+ const idf = new Map();
346
+ for (const score of scores)
347
+ idf.set(score.term, score.idf);
348
+ return idf;
349
+ }
350
+ async getTotalDocuments() {
351
+ return this.getChunkCount();
352
+ }
353
+ async getAllFileMetadata() {
354
+ await this.ensureInit();
355
+ const { db } = this.dbInstance;
356
+ const results = await db.select({
357
+ path: schema.files.path,
358
+ mtime: schema.files.mtime,
359
+ hash: schema.files.hash,
360
+ }).from(schema.files);
361
+ const metadata = new Map();
362
+ for (const row of results)
363
+ metadata.set(row.path, { mtime: row.mtime, hash: row.hash });
364
+ return metadata;
365
+ }
366
+ async deleteFiles(paths) {
367
+ if (paths.length === 0)
368
+ return;
369
+ await this.ensureInit();
370
+ const { db } = this.dbInstance;
371
+ const chunkSize = 500;
372
+ for (let i = 0; i < paths.length; i += chunkSize) {
373
+ const chunk = paths.slice(i, i + chunkSize);
374
+ await db.delete(schema.files).where(inArray(schema.files.path, chunk));
375
+ }
376
+ }
377
+ async setMetadata(key, value) {
378
+ await this.ensureInit();
379
+ const { db } = this.dbInstance;
380
+ await db
381
+ .insert(schema.indexMetadata)
382
+ .values({ key, value, updatedAt: Date.now() })
383
+ .onConflictDoUpdate({
384
+ target: schema.indexMetadata.key,
385
+ set: { value, updatedAt: Date.now() },
386
+ });
387
+ }
388
+ async getMetadata(key) {
389
+ await this.ensureInit();
390
+ const { db } = this.dbInstance;
391
+ const result = await this.getOne(db.select().from(schema.indexMetadata).where(eq(schema.indexMetadata.key, key)).limit(1));
392
+ return result?.value ?? null;
393
+ }
394
+ async getAverageDocLength() {
395
+ await this.ensureInit();
396
+ const cached = await this.getMetadata('avgDocLength');
397
+ if (cached)
398
+ return parseFloat(cached);
399
+ const { db } = this.dbInstance;
400
+ const result = await this.getOne(db.select({
401
+ avgLen: sql `AVG(COALESCE(${schema.chunks.tokenCount}, 0))::double precision`,
402
+ }).from(schema.chunks));
403
+ const avgLen = result?.avgLen ?? 0;
404
+ await this.setMetadata('avgDocLength', String(avgLen));
405
+ return avgLen;
406
+ }
407
+ async updateAverageDocLength() {
408
+ await this.ensureInit();
409
+ const { db } = this.dbInstance;
410
+ const result = await this.getOne(db.select({
411
+ avgLen: sql `AVG(COALESCE(${schema.chunks.tokenCount}, 0))::double precision`,
412
+ }).from(schema.chunks));
413
+ const avgLen = result?.avgLen ?? 0;
414
+ await this.setMetadata('avgDocLength', String(avgLen));
415
+ return avgLen;
416
+ }
417
+ async rebuildIdfScoresFromVectors() {
418
+ await this.ensureInit();
419
+ const { db } = this.dbInstance;
420
+ const totalChunks = await this.getChunkCount();
421
+ if (totalChunks === 0) {
422
+ await db.delete(schema.idfScores);
423
+ return;
424
+ }
425
+ const dfResults = await db
426
+ .select({
427
+ term: schema.documentVectors.term,
428
+ df: sql `COUNT(DISTINCT ${schema.documentVectors.chunkId})::int`,
429
+ })
430
+ .from(schema.documentVectors)
431
+ .groupBy(schema.documentVectors.term);
432
+ await db.delete(schema.idfScores);
433
+ const BATCH_SIZE = 300;
434
+ const scores = dfResults.map((row) => ({
435
+ term: row.term,
436
+ idf: Math.log((totalChunks + 1) / (row.df + 1)) + 1,
437
+ documentFrequency: row.df,
438
+ }));
439
+ for (let i = 0; i < scores.length; i += BATCH_SIZE) {
440
+ const batch = scores.slice(i, i + BATCH_SIZE);
441
+ if (batch.length > 0)
442
+ await db.insert(schema.idfScores).values(batch);
443
+ }
444
+ }
445
+ async recalculateTfidfScores() {
446
+ await this.ensureInit();
447
+ await this.dbInstance.pool.query(`
448
+ UPDATE document_vectors
449
+ SET tfidf = tf * COALESCE(
450
+ (SELECT idf FROM idf_scores WHERE idf_scores.term = document_vectors.term),
451
+ 0
452
+ )
453
+ `);
454
+ }
455
+ async updateChunkMagnitudes() {
456
+ await this.ensureInit();
457
+ await this.dbInstance.pool.query(`
458
+ UPDATE chunks
459
+ SET magnitude = COALESCE(
460
+ (SELECT SQRT(SUM(tfidf * tfidf))::real FROM document_vectors WHERE document_vectors.chunk_id = chunks.id),
461
+ 0
462
+ )
463
+ `);
464
+ }
465
+ async getTermsForFiles(paths) {
466
+ if (paths.length === 0)
467
+ return new Set();
468
+ await this.ensureInit();
469
+ const { db } = this.dbInstance;
470
+ const terms = new Set();
471
+ const files = await db.select({ id: schema.files.id }).from(schema.files).where(inArray(schema.files.path, paths));
472
+ if (files.length === 0)
473
+ return terms;
474
+ const fileIds = files.map((f) => f.id);
475
+ const chunks = await db.select({ id: schema.chunks.id }).from(schema.chunks).where(inArray(schema.chunks.fileId, fileIds));
476
+ if (chunks.length === 0)
477
+ return terms;
478
+ const chunkIds = chunks.map((c) => c.id);
479
+ const results = await db.select({ term: schema.documentVectors.term }).from(schema.documentVectors).where(inArray(schema.documentVectors.chunkId, chunkIds));
480
+ for (const row of results)
481
+ terms.add(row.term);
482
+ return terms;
483
+ }
484
+ async searchByTerms(queryTerms, options = {}) {
485
+ if (queryTerms.length === 0)
486
+ return [];
487
+ await this.ensureInit();
488
+ const { db } = this.dbInstance;
489
+ const limit = options.limit ?? 100;
490
+ const matchingChunks = await db
491
+ .select({
492
+ chunkId: schema.documentVectors.chunkId,
493
+ filePath: schema.files.path,
494
+ content: schema.chunks.content,
495
+ type: schema.chunks.type,
496
+ startLine: schema.chunks.startLine,
497
+ endLine: schema.chunks.endLine,
498
+ magnitude: schema.chunks.magnitude,
499
+ tokenCount: schema.chunks.tokenCount,
500
+ matchCount: sql `COUNT(DISTINCT ${schema.documentVectors.term})::int`,
501
+ })
502
+ .from(schema.documentVectors)
503
+ .innerJoin(schema.chunks, eq(schema.documentVectors.chunkId, schema.chunks.id))
504
+ .innerJoin(schema.files, eq(schema.chunks.fileId, schema.files.id))
505
+ .where(inArray(schema.documentVectors.term, queryTerms))
506
+ .groupBy(schema.documentVectors.chunkId, schema.files.path, schema.chunks.content, schema.chunks.type, schema.chunks.startLine, schema.chunks.endLine, schema.chunks.magnitude, schema.chunks.tokenCount)
507
+ .orderBy(desc(sql `COUNT(DISTINCT ${schema.documentVectors.term})`))
508
+ .limit(limit * 2);
509
+ if (matchingChunks.length === 0)
510
+ return [];
511
+ const chunkIds = matchingChunks.map((c) => c.chunkId);
512
+ const matchedVectors = await db
513
+ .select({
514
+ chunkId: schema.documentVectors.chunkId,
515
+ term: schema.documentVectors.term,
516
+ tfidf: schema.documentVectors.tfidf,
517
+ rawFreq: schema.documentVectors.rawFreq,
518
+ })
519
+ .from(schema.documentVectors)
520
+ .where(sql `${schema.documentVectors.chunkId} IN (${sql.join(chunkIds.map((id) => sql `${id}`), sql `, `)}) AND ${schema.documentVectors.term} IN (${sql.join(queryTerms.map((t) => sql `${t}`), sql `, `)})`);
521
+ const resultMap = new Map();
522
+ for (const c of matchingChunks) {
523
+ resultMap.set(c.chunkId, {
524
+ chunkId: c.chunkId,
525
+ filePath: c.filePath,
526
+ content: c.content,
527
+ type: c.type,
528
+ startLine: c.startLine,
529
+ endLine: c.endLine,
530
+ matchedTerms: new Map(),
531
+ magnitude: c.magnitude ?? 0,
532
+ tokenCount: c.tokenCount ?? 0,
533
+ });
534
+ }
535
+ for (const v of matchedVectors) {
536
+ const entry = resultMap.get(v.chunkId);
537
+ if (entry)
538
+ entry.matchedTerms.set(v.term, { tfidf: v.tfidf, rawFreq: v.rawFreq });
539
+ }
540
+ return Array.from(resultMap.values());
541
+ }
542
+ async getAllChunks() {
543
+ await this.ensureInit();
544
+ const { db } = this.dbInstance;
545
+ const results = await db
546
+ .select({
547
+ id: schema.chunks.id,
548
+ fileId: schema.chunks.fileId,
549
+ content: schema.chunks.content,
550
+ type: schema.chunks.type,
551
+ startLine: schema.chunks.startLine,
552
+ endLine: schema.chunks.endLine,
553
+ metadata: schema.chunks.metadata,
554
+ filePath: schema.files.path,
555
+ })
556
+ .from(schema.chunks)
557
+ .innerJoin(schema.files, eq(schema.chunks.fileId, schema.files.id));
558
+ return results.map((r) => ({
559
+ id: r.id,
560
+ fileId: r.fileId,
561
+ filePath: r.filePath,
562
+ content: r.content,
563
+ type: r.type,
564
+ startLine: r.startLine,
565
+ endLine: r.endLine,
566
+ metadata: r.metadata ? JSON.parse(r.metadata) : undefined,
567
+ }));
568
+ }
569
+ close() {
570
+ this.dbInstance.pool.end();
571
+ }
572
+ }
573
+ /**
574
+ * Create Postgres-backed persistent storage (used by createPersistentStorage when backend === 'postgres').
575
+ */
576
+ export function createPostgresPersistentStorage(config) {
577
+ return new PostgresPersistentStorage(config);
578
+ }
579
+ //# sourceMappingURL=storage-persistent-pg.js.map