@vivantel/rag-core 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/config/release-please.json +38 -0
- package/.github/dependabot.yaml +28 -0
- package/.github/workflows/ci.yaml +119 -0
- package/.github/workflows/publish.yaml +151 -0
- package/.github/workflows/release.yaml +150 -0
- package/.versionrc.json +19 -0
- package/CHANGELOG.md +21 -0
- package/README.md +62 -0
- package/bin/rag-update.ts +49 -0
- package/dist/config-loader.d.ts +3 -0
- package/dist/config-loader.d.ts.map +1 -0
- package/dist/config-loader.js +13 -0
- package/dist/config-loader.js.map +1 -0
- package/dist/core/chunk-processor.d.ts +12 -0
- package/dist/core/chunk-processor.d.ts.map +1 -0
- package/dist/core/chunk-processor.js +65 -0
- package/dist/core/chunk-processor.js.map +1 -0
- package/dist/core/embedder.d.ts +19 -0
- package/dist/core/embedder.d.ts.map +1 -0
- package/dist/core/embedder.js +139 -0
- package/dist/core/embedder.js.map +1 -0
- package/dist/core/git-tracker.d.ts +25 -0
- package/dist/core/git-tracker.d.ts.map +1 -0
- package/dist/core/git-tracker.js +164 -0
- package/dist/core/git-tracker.js.map +1 -0
- package/dist/core/orchestrator.d.ts +22 -0
- package/dist/core/orchestrator.d.ts.map +1 -0
- package/dist/core/orchestrator.js +57 -0
- package/dist/core/orchestrator.js.map +1 -0
- package/dist/core/uploader.d.ts +15 -0
- package/dist/core/uploader.d.ts.map +1 -0
- package/dist/core/uploader.js +79 -0
- package/dist/core/uploader.js.map +1 -0
- package/dist/core/utils.d.ts +6 -0
- package/dist/core/utils.d.ts.map +1 -0
- package/dist/core/utils.js +23 -0
- package/dist/core/utils.js.map +1 -0
- package/dist/helpers/create-chunker.d.ts +9 -0
- package/dist/helpers/create-chunker.d.ts.map +1 -0
- package/dist/helpers/create-chunker.js +24 -0
- package/dist/helpers/create-chunker.js.map +1 -0
- package/dist/index.d.ts +11 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +16 -0
- package/dist/index.js.map +1 -0
- package/dist/interfaces/chunker.d.ts +46 -0
- package/dist/interfaces/chunker.d.ts.map +1 -0
- package/dist/interfaces/chunker.js +5 -0
- package/dist/interfaces/chunker.js.map +1 -0
- package/dist/interfaces/embedder.d.ts +28 -0
- package/dist/interfaces/embedder.d.ts.map +1 -0
- package/dist/interfaces/embedder.js +5 -0
- package/dist/interfaces/embedder.js.map +1 -0
- package/dist/interfaces/index.d.ts +4 -0
- package/dist/interfaces/index.d.ts.map +1 -0
- package/dist/interfaces/index.js +4 -0
- package/dist/interfaces/index.js.map +1 -0
- package/dist/interfaces/vector-store.d.ts +53 -0
- package/dist/interfaces/vector-store.d.ts.map +1 -0
- package/dist/interfaces/vector-store.js +5 -0
- package/dist/interfaces/vector-store.js.map +1 -0
- package/dist/strategies/chunk/index.d.ts +5 -0
- package/dist/strategies/chunk/index.d.ts.map +1 -0
- package/dist/strategies/chunk/index.js +5 -0
- package/dist/strategies/chunk/index.js.map +1 -0
- package/dist/strategies/chunk/markdown-headers.d.ts +7 -0
- package/dist/strategies/chunk/markdown-headers.d.ts.map +1 -0
- package/dist/strategies/chunk/markdown-headers.js +89 -0
- package/dist/strategies/chunk/markdown-headers.js.map +1 -0
- package/dist/strategies/chunk/semantic.d.ts +7 -0
- package/dist/strategies/chunk/semantic.d.ts.map +1 -0
- package/dist/strategies/chunk/semantic.js +62 -0
- package/dist/strategies/chunk/semantic.js.map +1 -0
- package/dist/strategies/chunk/token.d.ts +12 -0
- package/dist/strategies/chunk/token.d.ts.map +1 -0
- package/dist/strategies/chunk/token.js +56 -0
- package/dist/strategies/chunk/token.js.map +1 -0
- package/dist/strategies/chunk/whole-file.d.ts +3 -0
- package/dist/strategies/chunk/whole-file.d.ts.map +1 -0
- package/dist/strategies/chunk/whole-file.js +31 -0
- package/dist/strategies/chunk/whole-file.js.map +1 -0
- package/eslint.config.js +25 -0
- package/package.json +102 -0
- package/src/config-loader.ts +21 -0
- package/src/core/chunk-processor.test.ts +36 -0
- package/src/core/chunk-processor.ts +92 -0
- package/src/core/embedder.ts +189 -0
- package/src/core/git-tracker.test.ts +64 -0
- package/src/core/git-tracker.ts +202 -0
- package/src/core/orchestrator.test.ts +53 -0
- package/src/core/orchestrator.ts +97 -0
- package/src/core/uploader.ts +123 -0
- package/src/core/utils.ts +27 -0
- package/src/helpers/create-chunker.test.ts +31 -0
- package/src/helpers/create-chunker.ts +40 -0
- package/src/index.test.ts +33 -0
- package/src/index.ts +30 -0
- package/src/interfaces/chunker.ts +59 -0
- package/src/interfaces/embedder.ts +36 -0
- package/src/interfaces/index.test.ts +9 -0
- package/src/interfaces/index.ts +3 -0
- package/src/interfaces/vector-store.ts +71 -0
- package/src/strategies/chunk/index.ts +4 -0
- package/src/strategies/chunk/markdown-headers.test.ts +37 -0
- package/src/strategies/chunk/markdown-headers.ts +106 -0
- package/src/strategies/chunk/semantic.test.ts +21 -0
- package/src/strategies/chunk/semantic.ts +80 -0
- package/src/strategies/chunk/token.test.ts +41 -0
- package/src/strategies/chunk/token.ts +72 -0
- package/src/strategies/chunk/whole-file.test.ts +24 -0
- package/src/strategies/chunk/whole-file.ts +35 -0
- package/tsconfig.json +21 -0
- package/typedoc.json +11 -0
- package/vitest.config.ts +19 -0
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import { createHash } from "crypto";
|
|
2
|
+
function computeContentHash(content) {
|
|
3
|
+
return createHash("sha256").update(content).digest("hex").slice(0, 16);
|
|
4
|
+
}
|
|
5
|
+
export class ChunkProcessor {
|
|
6
|
+
chunkers;
|
|
7
|
+
constructor(chunkers) {
|
|
8
|
+
this.chunkers = new Map(chunkers.map((c) => [c.name, c]));
|
|
9
|
+
}
|
|
10
|
+
async processFile(filePath, commitHash, chunker) {
|
|
11
|
+
try {
|
|
12
|
+
const chunks = await chunker.chunk(filePath, commitHash);
|
|
13
|
+
for (const chunk of chunks) {
|
|
14
|
+
chunk.contentHash = computeContentHash(chunk.content);
|
|
15
|
+
chunk.sourceFile = filePath;
|
|
16
|
+
chunk.commitHash = commitHash;
|
|
17
|
+
}
|
|
18
|
+
return chunks;
|
|
19
|
+
}
|
|
20
|
+
catch (error) {
|
|
21
|
+
console.error(` â Error processing ${filePath}: ${error}`);
|
|
22
|
+
return [];
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
async processFiles(files, fileState) {
|
|
26
|
+
const allChunks = [];
|
|
27
|
+
for (let i = 0; i < files.length; i++) {
|
|
28
|
+
const filePath = files[i];
|
|
29
|
+
const info = fileState.get(filePath);
|
|
30
|
+
if (!info) {
|
|
31
|
+
console.log(` â ď¸ No chunker for: ${filePath}`);
|
|
32
|
+
continue;
|
|
33
|
+
}
|
|
34
|
+
console.log(` [${i + 1}/${files.length}] ${filePath}`);
|
|
35
|
+
const chunks = await this.processFile(filePath, info.commitHash, info.chunker);
|
|
36
|
+
if (chunks.length > 0) {
|
|
37
|
+
allChunks.push(...chunks);
|
|
38
|
+
console.log(` â
Generated ${chunks.length} chunk(s)`);
|
|
39
|
+
}
|
|
40
|
+
else {
|
|
41
|
+
console.log(` â ď¸ No chunks generated (skipped)`);
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
return allChunks;
|
|
45
|
+
}
|
|
46
|
+
async saveChunksLocal(chunks, outputFile) {
|
|
47
|
+
const { dirname } = await import("path");
|
|
48
|
+
const { mkdir, writeFile, readFile } = await import("fs/promises");
|
|
49
|
+
await mkdir(dirname(outputFile), { recursive: true });
|
|
50
|
+
let existing = [];
|
|
51
|
+
try {
|
|
52
|
+
const content = await readFile(outputFile, "utf-8");
|
|
53
|
+
existing = JSON.parse(content);
|
|
54
|
+
}
|
|
55
|
+
catch {
|
|
56
|
+
// File doesn't exist
|
|
57
|
+
}
|
|
58
|
+
const processedFiles = new Set(chunks.map((c) => c.sourceFile));
|
|
59
|
+
const filtered = existing.filter((c) => !processedFiles.has(c.sourceFile));
|
|
60
|
+
const allChunks = [...filtered, ...chunks];
|
|
61
|
+
await writeFile(outputFile, JSON.stringify(allChunks, null, 2));
|
|
62
|
+
console.log(`\nđž Saved ${allChunks.length} chunks to ${outputFile}`);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
//# sourceMappingURL=chunk-processor.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"chunk-processor.js","sourceRoot":"","sources":["../../src/core/chunk-processor.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAEpC,SAAS,kBAAkB,CAAC,OAAe;IACzC,OAAO,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;AACzE,CAAC;AAED,MAAM,OAAO,cAAc;IACjB,QAAQ,CAA2B;IAE3C,YAAY,QAAuB;QACjC,IAAI,CAAC,QAAQ,GAAG,IAAI,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;IAC5D,CAAC;IAED,KAAK,CAAC,WAAW,CACf,QAAgB,EAChB,UAAkB,EAClB,OAAoB;QAEpB,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,KAAK,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;YAEzD,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;gBAC3B,KAAK,CAAC,WAAW,GAAG,kBAAkB,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;gBACtD,KAAK,CAAC,UAAU,GAAG,QAAQ,CAAC;gBAC5B,KAAK,CAAC,UAAU,GAAG,UAAU,CAAC;YAChC,CAAC;YAED,OAAO,MAAM,CAAC;QAChB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,KAAK,CAAC,wBAAwB,QAAQ,KAAK,KAAK,EAAE,CAAC,CAAC;YAC5D,OAAO,EAAE,CAAC;QACZ,CAAC;IACH,CAAC;IAED,KAAK,CAAC,YAAY,CAChB,KAAe,EACf,SAAoE;QAEpE,MAAM,SAAS,GAAY,EAAE,CAAC;QAE9B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,MAAM,QAAQ,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YAC1B,MAAM,IAAI,GAAG,SAAS,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;YAErC,IAAI,CAAC,IAAI,EAAE,CAAC;gBACV,OAAO,CAAC,GAAG,CAAC,wBAAwB,QAAQ,EAAE,CAAC,CAAC;gBAChD,SAAS;YACX,CAAC;YAED,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,MAAM,KAAK,QAAQ,EAAE,CAAC,CAAC;YAExD,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,WAAW,CACnC,QAAQ,EACR,IAAI,CAAC,UAAU,EACf,IAAI,CAAC,OAAO,CACb,CAAC;YAEF,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACtB,SAAS,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,CAAC;gBAC1B,OAAO,CAAC,GAAG,CAAC,mBAAmB,MAAM,CAAC,MAAM,WAAW,CAAC,CAAC;YAC3D,CAAC;iBAAM,CAAC;gBACN,OAAO,CAAC,GAAG,CAAC,sCAAsC,CAAC,CAAC;YACtD,CAAC;QACH,CAAC;QAED,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,KAAK,CAAC,eAAe,CAAC,MAAe,EAAE,UAAkB;QACvD,MAAM,EAAE,OAAO,EAAE,GAAG,MAAM,MAAM,CAAC,MAAM,CAAC,CAAC;QACzC,MAAM,EAAE,KAAK,EAAE,SAAS,EAAE,QAAQ,EAAE,GAAG,MAAM,MAAM,CAAC,aAAa,CAAC,CAAC;QAEnE,MAAM,KAAK,CAAC,OAAO,CAAC,UAAU,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAEtD,IAAI,QAAQ,GAAY,EAAE,CAAC;QAC3B,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,UAAU,EAAE,OAAO,CAAC,CAAC;YACpD,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QACjC,CAAC;QAAC,MAAM,CAAC;YACP,qBAAqB;QACvB,CAAC;QAED,MAAM,cAAc,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC;QAChE,MAAM,QAAQ,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC;QAE3E,MAAM,SAAS,GAAG,CAAC,GAAG,QAAQ,EAAE,GAAG,MAAM,CAAC,CAAC;QAE3C,MAAM,SAAS,CAAC,UAAU,EAAE,IAAI,CAAC,SAAS,CAAC,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;QAChE,OAAO,CAAC,GAAG,CAAC,cAAc,SAAS,CAAC,MAAM,cAAc,UAAU,EAAE,CAAC,CAAC;IACxE,CAAC;CACF"}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import { EmbeddingProvider, EmbeddedChunk, Chunk } from "../interfaces/index.js";
|
|
2
|
+
export declare class EmbedderProcessor {
|
|
3
|
+
private provider;
|
|
4
|
+
private rateLimitMs;
|
|
5
|
+
private batchSize;
|
|
6
|
+
constructor(provider: EmbeddingProvider, options?: {
|
|
7
|
+
rateLimitMs?: number;
|
|
8
|
+
batchSize?: number;
|
|
9
|
+
});
|
|
10
|
+
private sleep;
|
|
11
|
+
embedChunk(chunk: Chunk): Promise<EmbeddedChunk>;
|
|
12
|
+
embedBatch(chunks: Chunk[]): Promise<EmbeddedChunk[]>;
|
|
13
|
+
getChunksToEmbed(chunksFile: string, force?: boolean): Promise<{
|
|
14
|
+
chunksToEmbed: Chunk[];
|
|
15
|
+
}>;
|
|
16
|
+
saveEmbeddings(newEmbeddings: EmbeddedChunk[], chunksFile: string, force?: boolean): Promise<void>;
|
|
17
|
+
run(chunksFile: string, force?: boolean): Promise<EmbeddedChunk[]>;
|
|
18
|
+
}
|
|
19
|
+
//# sourceMappingURL=embedder.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"embedder.d.ts","sourceRoot":"","sources":["../../src/core/embedder.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,iBAAiB,EACjB,aAAa,EACb,KAAK,EACN,MAAM,wBAAwB,CAAC;AAUhC,qBAAa,iBAAiB;IAC5B,OAAO,CAAC,QAAQ,CAAoB;IACpC,OAAO,CAAC,WAAW,CAAS;IAC5B,OAAO,CAAC,SAAS,CAAS;gBAGxB,QAAQ,EAAE,iBAAiB,EAC3B,OAAO,GAAE;QAAE,WAAW,CAAC,EAAE,MAAM,CAAC;QAAC,SAAS,CAAC,EAAE,MAAM,CAAA;KAAO;YAO9C,KAAK;IAIb,UAAU,CAAC,KAAK,EAAE,KAAK,GAAG,OAAO,CAAC,aAAa,CAAC;IAUhD,UAAU,CAAC,MAAM,EAAE,KAAK,EAAE,GAAG,OAAO,CAAC,aAAa,EAAE,CAAC;IAqCrD,gBAAgB,CACpB,UAAU,EAAE,MAAM,EAClB,KAAK,GAAE,OAAe,GACrB,OAAO,CAAC;QACT,aAAa,EAAE,KAAK,EAAE,CAAC;KACxB,CAAC;IA4CI,cAAc,CAClB,aAAa,EAAE,aAAa,EAAE,EAC9B,UAAU,EAAE,MAAM,EAClB,KAAK,GAAE,OAAe,GACrB,OAAO,CAAC,IAAI,CAAC;IAoCV,GAAG,CACP,UAAU,EAAE,MAAM,EAClB,KAAK,GAAE,OAAe,GACrB,OAAO,CAAC,aAAa,EAAE,CAAC;CAiB5B"}
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
import { readFile, writeFile, mkdir } from "fs/promises";
|
|
2
|
+
import { dirname } from "path";
|
|
3
|
+
import { createHash } from "crypto";
|
|
4
|
+
function chunkContentHash(chunk) {
|
|
5
|
+
if (chunk.contentHash)
|
|
6
|
+
return chunk.contentHash;
|
|
7
|
+
return createHash("sha256").update(chunk.content).digest("hex").slice(0, 16);
|
|
8
|
+
}
|
|
9
|
+
export class EmbedderProcessor {
|
|
10
|
+
provider;
|
|
11
|
+
rateLimitMs;
|
|
12
|
+
batchSize;
|
|
13
|
+
constructor(provider, options = {}) {
|
|
14
|
+
this.provider = provider;
|
|
15
|
+
this.rateLimitMs = options.rateLimitMs ?? 500;
|
|
16
|
+
this.batchSize = options.batchSize ?? 10;
|
|
17
|
+
}
|
|
18
|
+
async sleep(ms) {
|
|
19
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
20
|
+
}
|
|
21
|
+
async embedChunk(chunk) {
|
|
22
|
+
const embedding = await this.provider.embed(chunk.content);
|
|
23
|
+
return {
|
|
24
|
+
...chunk,
|
|
25
|
+
embedding,
|
|
26
|
+
embeddedAt: Date.now() / 1000,
|
|
27
|
+
};
|
|
28
|
+
}
|
|
29
|
+
async embedBatch(chunks) {
|
|
30
|
+
const results = [];
|
|
31
|
+
if (this.provider.embedBatch && chunks.length >= this.batchSize) {
|
|
32
|
+
const texts = chunks.map((c) => c.content);
|
|
33
|
+
const embeddings = await this.provider.embedBatch(texts);
|
|
34
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
35
|
+
results.push({
|
|
36
|
+
...chunks[i],
|
|
37
|
+
embedding: embeddings[i],
|
|
38
|
+
embeddedAt: Date.now() / 1000,
|
|
39
|
+
});
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
else {
|
|
43
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
44
|
+
const chunk = chunks[i];
|
|
45
|
+
const eventType = chunk.metadata.event_type ||
|
|
46
|
+
chunk.metadata.title ||
|
|
47
|
+
chunk.sourceFile.split("/").pop() ||
|
|
48
|
+
"unknown";
|
|
49
|
+
console.log(` [${i + 1}/${chunks.length}] ${eventType}`);
|
|
50
|
+
const embedded = await this.embedChunk(chunk);
|
|
51
|
+
results.push(embedded);
|
|
52
|
+
if (this.rateLimitMs > 0 && i < chunks.length - 1) {
|
|
53
|
+
await this.sleep(this.rateLimitMs);
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
return results;
|
|
58
|
+
}
|
|
59
|
+
async getChunksToEmbed(chunksFile, force = false) {
|
|
60
|
+
let chunks;
|
|
61
|
+
try {
|
|
62
|
+
const content = await readFile(chunksFile, "utf-8");
|
|
63
|
+
chunks = JSON.parse(content);
|
|
64
|
+
}
|
|
65
|
+
catch {
|
|
66
|
+
throw new Error(`Chunks file not found: ${chunksFile}`);
|
|
67
|
+
}
|
|
68
|
+
console.log(`đ Loaded ${chunks.length} chunks from ${chunksFile}`);
|
|
69
|
+
if (force) {
|
|
70
|
+
console.log(" â ď¸ Force mode: embedding all chunks");
|
|
71
|
+
return { chunksToEmbed: chunks };
|
|
72
|
+
}
|
|
73
|
+
let existingEmbeddings = [];
|
|
74
|
+
const embeddingsFile = chunksFile.replace("chunks", "embeddings");
|
|
75
|
+
try {
|
|
76
|
+
const content = await readFile(embeddingsFile, "utf-8");
|
|
77
|
+
existingEmbeddings = JSON.parse(content);
|
|
78
|
+
}
|
|
79
|
+
catch {
|
|
80
|
+
// No existing embeddings
|
|
81
|
+
}
|
|
82
|
+
const existingState = new Map();
|
|
83
|
+
for (const emb of existingEmbeddings) {
|
|
84
|
+
const hash = emb.contentHash || chunkContentHash(emb);
|
|
85
|
+
existingState.set(hash, emb);
|
|
86
|
+
}
|
|
87
|
+
console.log(`đ Existing embeddings: ${existingState.size} chunks`);
|
|
88
|
+
const chunksToEmbed = [];
|
|
89
|
+
for (const chunk of chunks) {
|
|
90
|
+
const chunkHash = chunkContentHash(chunk);
|
|
91
|
+
if (!existingState.has(chunkHash)) {
|
|
92
|
+
chunksToEmbed.push(chunk);
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
return { chunksToEmbed };
|
|
96
|
+
}
|
|
97
|
+
async saveEmbeddings(newEmbeddings, chunksFile, force = false) {
|
|
98
|
+
const embeddingsFile = chunksFile.replace("chunks", "embeddings");
|
|
99
|
+
await mkdir(dirname(embeddingsFile), { recursive: true });
|
|
100
|
+
const newByHash = new Map();
|
|
101
|
+
for (const emb of newEmbeddings) {
|
|
102
|
+
const hash = emb.contentHash || chunkContentHash(emb);
|
|
103
|
+
newByHash.set(hash, emb);
|
|
104
|
+
}
|
|
105
|
+
let existing = [];
|
|
106
|
+
if (!force) {
|
|
107
|
+
try {
|
|
108
|
+
const content = await readFile(embeddingsFile, "utf-8");
|
|
109
|
+
existing = JSON.parse(content);
|
|
110
|
+
}
|
|
111
|
+
catch {
|
|
112
|
+
// No existing embeddings
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
const final = force
|
|
116
|
+
? []
|
|
117
|
+
: existing.filter((e) => {
|
|
118
|
+
const hash = e.contentHash || chunkContentHash(e);
|
|
119
|
+
return !newByHash.has(hash);
|
|
120
|
+
});
|
|
121
|
+
final.push(...newEmbeddings);
|
|
122
|
+
await writeFile(embeddingsFile, JSON.stringify(final, null, 2));
|
|
123
|
+
console.log(`\nđž Saved ${final.length} embeddings to ${embeddingsFile}`);
|
|
124
|
+
console.log(` New: ${newEmbeddings.length}, Existing: ${final.length - newEmbeddings.length}`);
|
|
125
|
+
}
|
|
126
|
+
async run(chunksFile, force = false) {
|
|
127
|
+
console.log("đ˘ Starting incremental embedding generation...");
|
|
128
|
+
const { chunksToEmbed } = await this.getChunksToEmbed(chunksFile, force);
|
|
129
|
+
if (chunksToEmbed.length === 0) {
|
|
130
|
+
console.log("\n⨠No chunks need embedding.");
|
|
131
|
+
return [];
|
|
132
|
+
}
|
|
133
|
+
console.log(`\nđ Need to embed ${chunksToEmbed.length} chunks`);
|
|
134
|
+
const newEmbeddings = await this.embedBatch(chunksToEmbed);
|
|
135
|
+
await this.saveEmbeddings(newEmbeddings, chunksFile, force);
|
|
136
|
+
return newEmbeddings;
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
//# sourceMappingURL=embedder.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"embedder.js","sourceRoot":"","sources":["../../src/core/embedder.ts"],"names":[],"mappings":"AAKA,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,KAAK,EAAE,MAAM,aAAa,CAAC;AACzD,OAAO,EAAE,OAAO,EAAE,MAAM,MAAM,CAAC;AAC/B,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAEpC,SAAS,gBAAgB,CAAC,KAAY;IACpC,IAAI,KAAK,CAAC,WAAW;QAAE,OAAO,KAAK,CAAC,WAAW,CAAC;IAChD,OAAO,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;AAC/E,CAAC;AAED,MAAM,OAAO,iBAAiB;IACpB,QAAQ,CAAoB;IAC5B,WAAW,CAAS;IACpB,SAAS,CAAS;IAE1B,YACE,QAA2B,EAC3B,UAAwD,EAAE;QAE1D,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;QACzB,IAAI,CAAC,WAAW,GAAG,OAAO,CAAC,WAAW,IAAI,GAAG,CAAC;QAC9C,IAAI,CAAC,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,EAAE,CAAC;IAC3C,CAAC;IAEO,KAAK,CAAC,KAAK,CAAC,EAAU;QAC5B,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC,CAAC;IAC3D,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,KAAY;QAC3B,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QAE3D,OAAO;YACL,GAAG,KAAK;YACR,SAAS;YACT,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,IAAI;SAC9B,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,MAAe;QAC9B,MAAM,OAAO,GAAoB,EAAE,CAAC;QAEpC,IAAI,IAAI,CAAC,QAAQ,CAAC,UAAU,IAAI,MAAM,CAAC,MAAM,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;YAChE,MAAM,KAAK,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;YAC3C,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;YAEzD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBACvC,OAAO,CAAC,IAAI,CAAC;oBACX,GAAG,MAAM,CAAC,CAAC,CAAC;oBACZ,SAAS,EAAE,UAAU,CAAC,CAAC,CAAC;oBACxB,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,IAAI;iBAC9B,CAAC,CAAC;YACL,CAAC;QACH,CAAC;aAAM,CAAC;YACN,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBACvC,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;gBACxB,MAAM,SAAS,GACZ,KAAK,CAAC,QAAQ,CAAC,UAAqB;oBACpC,KAAK,CAAC,QAAQ,CAAC,KAAgB;oBAChC,KAAK,CAAC,UAAU,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE;oBACjC,SAAS,CAAC;gBAEZ,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,IAAI,MAAM,CAAC,MAAM,KAAK,SAAS,EAAE,CAAC,CAAC;gBAE1D,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;gBAC9C,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;gBAEvB,IAAI,IAAI,CAAC,WAAW,GAAG,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBAClD,MAAM,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;gBACrC,CAAC;YACH,CAAC;QACH,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAED,KAAK,CAAC,gBAAgB,CACpB,UAAkB,EAClB,QAAiB,KAAK;QAItB,IAAI,MAAe,CAAC;QACpB,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,UAAU,EAAE,OAAO,CAAC,CAAC;YACpD,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QAC/B,CAAC;QAAC,MAAM,CAAC;YACP,MAAM,IAAI,KAAK,CAAC,0BAA0B,UAAU,EAAE,CAAC,CAAC;QAC1D,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,aAAa,MAAM,CAAC,MAAM,gBAAgB,UAAU,EAAE,CAAC,CAAC;QAEpE,IAAI,KAAK,EAAE,CAAC;YACV,OAAO,CAAC,GAAG,CAAC,uCAAuC,CAAC,CAAC;YACrD,OAAO,EAAE,aAAa,EAAE,MAAM,EAAE,CAAC;QACnC,CAAC;QAED,IAAI,kBAAkB,GAAoB,EAAE,CAAC;QAC7C,MAAM,cAAc,GAAG,UAAU,CAAC,OAAO,CAAC,QAAQ,EAAE,YAAY,CAAC,CAAC;QAClE,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,cAAc,EAAE,OAAO,CAAC,CAAC;YACxD,kBAAkB,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QAC3C,CAAC;QAAC,MAAM,CAAC;YACP,yBAAyB;QAC3B,CAAC;QAED,MAAM,aAAa,GAAG,IAAI,GAAG,EAAyB,CAAC;QACvD,KAAK,MAAM,GAAG,IAAI,kBAAkB,EAAE,CAAC;YACrC,MAAM,IAAI,GAAG,GAAG,CAAC,WAAW,IAAI,gBAAgB,CAAC,GAAG,CAAC,CAAC;YACtD,aAAa,CAAC,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;QAC/B,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,2BAA2B,aAAa,CAAC,IAAI,SAAS,CAAC,CAAC;QAEpE,MAAM,aAAa,GAAY,EAAE,CAAC;QAClC,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC3B,MAAM,SAAS,GAAG,gBAAgB,CAAC,KAAK,CAAC,CAAC;YAC1C,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,SAAS,CAAC,EAAE,CAAC;gBAClC,aAAa,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YAC5B,CAAC;QACH,CAAC;QAED,OAAO,EAAE,aAAa,EAAE,CAAC;IAC3B,CAAC;IAED,KAAK,CAAC,cAAc,CAClB,aAA8B,EAC9B,UAAkB,EAClB,QAAiB,KAAK;QAEtB,MAAM,cAAc,GAAG,UAAU,CAAC,OAAO,CAAC,QAAQ,EAAE,YAAY,CAAC,CAAC;QAClE,MAAM,KAAK,CAAC,OAAO,CAAC,cAAc,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAE1D,MAAM,SAAS,GAAG,IAAI,GAAG,EAAyB,CAAC;QACnD,KAAK,MAAM,GAAG,IAAI,aAAa,EAAE,CAAC;YAChC,MAAM,IAAI,GAAG,GAAG,CAAC,WAAW,IAAI,gBAAgB,CAAC,GAAG,CAAC,CAAC;YACtD,SAAS,CAAC,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;QAC3B,CAAC;QAED,IAAI,QAAQ,GAAoB,EAAE,CAAC;QACnC,IAAI,CAAC,KAAK,EAAE,CAAC;YACX,IAAI,CAAC;gBACH,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,cAAc,EAAE,OAAO,CAAC,CAAC;gBACxD,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YACjC,CAAC;YAAC,MAAM,CAAC;gBACP,yBAAyB;YAC3B,CAAC;QACH,CAAC;QAED,MAAM,KAAK,GAAG,KAAK;YACjB,CAAC,CAAC,EAAE;YACJ,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE;gBACpB,MAAM,IAAI,GAAG,CAAC,CAAC,WAAW,IAAI,gBAAgB,CAAC,CAAC,CAAC,CAAC;gBAClD,OAAO,CAAC,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;YAC9B,CAAC,CAAC,CAAC;QAEP,KAAK,CAAC,IAAI,CAAC,GAAG,aAAa,CAAC,CAAC;QAE7B,MAAM,SAAS,CAAC,cAAc,EAAE,IAAI,CAAC,SAAS,CAAC,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;QAChE,OAAO,CAAC,GAAG,CAAC,cAAc,KAAK,CAAC,MAAM,kBAAkB,cAAc,EAAE,CAAC,CAAC;QAC1E,OAAO,CAAC,GAAG,CACT,WAAW,aAAa,CAAC,MAAM,eAAe,KAAK,CAAC,MAAM,GAAG,aAAa,CAAC,MAAM,EAAE,CACpF,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,GAAG,CACP,UAAkB,EAClB,QAAiB,KAAK;QAEtB,OAAO,CAAC,GAAG,CAAC,iDAAiD,CAAC,CAAC;QAE/D,MAAM,EAAE,aAAa,EAAE,GAAG,MAAM,IAAI,CAAC,gBAAgB,CAAC,UAAU,EAAE,KAAK,CAAC,CAAC;QAEzE,IAAI,aAAa,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC/B,OAAO,CAAC,GAAG,CAAC,+BAA+B,CAAC,CAAC;YAC7C,OAAO,EAAE,CAAC;QACZ,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,sBAAsB,aAAa,CAAC,MAAM,SAAS,CAAC,CAAC;QAEjE,MAAM,aAAa,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC,CAAC;QAC3D,MAAM,IAAI,CAAC,cAAc,CAAC,aAAa,EAAE,UAAU,EAAE,KAAK,CAAC,CAAC;QAE5D,OAAO,aAAa,CAAC;IACvB,CAAC;CACF"}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import { FileChunker } from "../interfaces/index.js";
|
|
2
|
+
export declare class GitTracker {
|
|
3
|
+
private git;
|
|
4
|
+
private chunkers;
|
|
5
|
+
private allPatterns;
|
|
6
|
+
private currentHeadCache;
|
|
7
|
+
private uncommittedCache;
|
|
8
|
+
constructor(chunkers: FileChunker[]);
|
|
9
|
+
private getCurrentHead;
|
|
10
|
+
private hasUncommittedChanges;
|
|
11
|
+
private getChunkerForFile;
|
|
12
|
+
private matchesPattern;
|
|
13
|
+
getAllTrackedFiles(): Promise<string[]>;
|
|
14
|
+
getCommitHashes(files: string[]): Promise<Map<string, string>>;
|
|
15
|
+
getCurrentState(): Promise<Map<string, {
|
|
16
|
+
commitHash: string;
|
|
17
|
+
chunker: FileChunker;
|
|
18
|
+
}>>;
|
|
19
|
+
getChangedFiles(previousState: Map<string, string>): Promise<{
|
|
20
|
+
toProcess: string[];
|
|
21
|
+
toDelete: string[];
|
|
22
|
+
unchanged: string[];
|
|
23
|
+
}>;
|
|
24
|
+
}
|
|
25
|
+
//# sourceMappingURL=git-tracker.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"git-tracker.d.ts","sourceRoot":"","sources":["../../src/core/git-tracker.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,WAAW,EAAE,MAAM,wBAAwB,CAAC;AAsCrD,qBAAa,UAAU;IACrB,OAAO,CAAC,GAAG,CAAY;IACvB,OAAO,CAAC,QAAQ,CAAgB;IAChC,OAAO,CAAC,WAAW,CAAW;IAC9B,OAAO,CAAC,gBAAgB,CAAuB;IAC/C,OAAO,CAAC,gBAAgB,CAAwB;gBAEpC,QAAQ,EAAE,WAAW,EAAE;YAMrB,cAAc;YAWd,qBAAqB;IAYnC,OAAO,CAAC,iBAAiB;IAWzB,OAAO,CAAC,cAAc;IAOhB,kBAAkB,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;IAKvC,eAAe,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAuC9D,eAAe,IAAI,OAAO,CAC9B,GAAG,CAAC,MAAM,EAAE;QAAE,UAAU,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,WAAW,CAAA;KAAE,CAAC,CAC1D;IA0BK,eAAe,CAAC,aAAa,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,OAAO,CAAC;QACjE,SAAS,EAAE,MAAM,EAAE,CAAC;QACpB,QAAQ,EAAE,MAAM,EAAE,CAAC;QACnB,SAAS,EAAE,MAAM,EAAE,CAAC;KACrB,CAAC;CA+BH"}
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
import { simpleGit } from "simple-git";
|
|
2
|
+
import { glob } from "glob";
|
|
3
|
+
import { minimatch } from "minimatch";
|
|
4
|
+
import path from "path";
|
|
5
|
+
const MAX_FILES_PER_BATCH = 100;
|
|
6
|
+
const MAX_CMD_LEN = 32000;
|
|
7
|
+
function batchFiles(files) {
|
|
8
|
+
const batches = [];
|
|
9
|
+
let currentBatch = [];
|
|
10
|
+
let currentLen = 0;
|
|
11
|
+
const baseCmdLen = "git log -1 --format=%H --all -- ".length;
|
|
12
|
+
for (const file of files) {
|
|
13
|
+
const fileLen = file.length + 1;
|
|
14
|
+
if (currentBatch.length >= MAX_FILES_PER_BATCH ||
|
|
15
|
+
currentLen + fileLen > MAX_CMD_LEN) {
|
|
16
|
+
if (currentBatch.length > 0) {
|
|
17
|
+
batches.push(currentBatch);
|
|
18
|
+
currentBatch = [];
|
|
19
|
+
currentLen = baseCmdLen;
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
currentBatch.push(file);
|
|
23
|
+
currentLen += fileLen;
|
|
24
|
+
}
|
|
25
|
+
if (currentBatch.length > 0) {
|
|
26
|
+
batches.push(currentBatch);
|
|
27
|
+
}
|
|
28
|
+
return batches;
|
|
29
|
+
}
|
|
30
|
+
export class GitTracker {
|
|
31
|
+
git;
|
|
32
|
+
chunkers;
|
|
33
|
+
allPatterns;
|
|
34
|
+
currentHeadCache = null;
|
|
35
|
+
uncommittedCache = null;
|
|
36
|
+
constructor(chunkers) {
|
|
37
|
+
this.git = simpleGit();
|
|
38
|
+
this.chunkers = chunkers;
|
|
39
|
+
this.allPatterns = chunkers.flatMap((c) => c.patterns);
|
|
40
|
+
}
|
|
41
|
+
async getCurrentHead() {
|
|
42
|
+
if (!this.currentHeadCache) {
|
|
43
|
+
try {
|
|
44
|
+
this.currentHeadCache = await this.git.revparse(["HEAD"]);
|
|
45
|
+
}
|
|
46
|
+
catch {
|
|
47
|
+
this.currentHeadCache = "dev_0000000000000000000000000000000000000000";
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
return this.currentHeadCache;
|
|
51
|
+
}
|
|
52
|
+
async hasUncommittedChanges() {
|
|
53
|
+
if (this.uncommittedCache === null) {
|
|
54
|
+
try {
|
|
55
|
+
const status = await this.git.status();
|
|
56
|
+
this.uncommittedCache = status.files.length > 0;
|
|
57
|
+
}
|
|
58
|
+
catch {
|
|
59
|
+
this.uncommittedCache = false;
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
return this.uncommittedCache;
|
|
63
|
+
}
|
|
64
|
+
getChunkerForFile(filePath) {
|
|
65
|
+
for (const chunker of this.chunkers) {
|
|
66
|
+
for (const pattern of chunker.patterns) {
|
|
67
|
+
if (this.matchesPattern(filePath, pattern)) {
|
|
68
|
+
return chunker;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
return null;
|
|
73
|
+
}
|
|
74
|
+
matchesPattern(filePath, pattern) {
|
|
75
|
+
const normalizedPath = filePath.split(path.sep).join("/");
|
|
76
|
+
const normalizedPattern = pattern.split(path.sep).join("/");
|
|
77
|
+
return minimatch(normalizedPath, normalizedPattern);
|
|
78
|
+
}
|
|
79
|
+
async getAllTrackedFiles() {
|
|
80
|
+
const files = await glob(this.allPatterns, { nodir: true });
|
|
81
|
+
return [...new Set(files)].sort();
|
|
82
|
+
}
|
|
83
|
+
async getCommitHashes(files) {
|
|
84
|
+
const commitMap = new Map();
|
|
85
|
+
const batches = batchFiles(files);
|
|
86
|
+
const currentHead = await this.getCurrentHead();
|
|
87
|
+
for (const batch of batches) {
|
|
88
|
+
try {
|
|
89
|
+
const output = await this.git.raw([
|
|
90
|
+
"log",
|
|
91
|
+
"-1",
|
|
92
|
+
"--format=%H",
|
|
93
|
+
"--all",
|
|
94
|
+
"--",
|
|
95
|
+
...batch,
|
|
96
|
+
]);
|
|
97
|
+
const lines = output.trim().split("\n");
|
|
98
|
+
for (let i = 0; i < lines.length && i < batch.length; i++) {
|
|
99
|
+
const hash = lines[i].trim();
|
|
100
|
+
if (hash) {
|
|
101
|
+
commitMap.set(batch[i], hash);
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
for (const file of batch) {
|
|
105
|
+
if (!commitMap.has(file)) {
|
|
106
|
+
commitMap.set(file, currentHead);
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
catch {
|
|
111
|
+
for (const file of batch) {
|
|
112
|
+
commitMap.set(file, currentHead);
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
return commitMap;
|
|
117
|
+
}
|
|
118
|
+
async getCurrentState() {
|
|
119
|
+
const allFiles = await this.getAllTrackedFiles();
|
|
120
|
+
const commitMap = await this.getCommitHashes(allFiles);
|
|
121
|
+
const hasDirty = await this.hasUncommittedChanges();
|
|
122
|
+
const currentHead = await this.getCurrentHead();
|
|
123
|
+
const state = new Map();
|
|
124
|
+
for (const file of allFiles) {
|
|
125
|
+
let commitHash = commitMap.get(file) || currentHead;
|
|
126
|
+
if (hasDirty) {
|
|
127
|
+
commitHash = `${commitHash}-dirty`;
|
|
128
|
+
}
|
|
129
|
+
const chunker = this.getChunkerForFile(file);
|
|
130
|
+
if (chunker) {
|
|
131
|
+
state.set(file, { commitHash, chunker });
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
return state;
|
|
135
|
+
}
|
|
136
|
+
async getChangedFiles(previousState) {
|
|
137
|
+
const current = await this.getCurrentState();
|
|
138
|
+
const toProcess = [];
|
|
139
|
+
const toDelete = [];
|
|
140
|
+
const unchanged = [];
|
|
141
|
+
for (const [filePath, info] of current) {
|
|
142
|
+
const prevHash = previousState.get(filePath);
|
|
143
|
+
if (!prevHash) {
|
|
144
|
+
console.log(` đ New: ${filePath}`);
|
|
145
|
+
toProcess.push(filePath);
|
|
146
|
+
}
|
|
147
|
+
else if (prevHash !== info.commitHash) {
|
|
148
|
+
console.log(` đ Changed: ${filePath} (${prevHash.slice(0, 8)} â ${info.commitHash.slice(0, 8)})`);
|
|
149
|
+
toProcess.push(filePath);
|
|
150
|
+
}
|
|
151
|
+
else {
|
|
152
|
+
unchanged.push(filePath);
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
for (const [filePath] of previousState) {
|
|
156
|
+
if (!current.has(filePath)) {
|
|
157
|
+
console.log(` đď¸ Deleted: ${filePath}`);
|
|
158
|
+
toDelete.push(filePath);
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
return { toProcess, toDelete, unchanged };
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
//# sourceMappingURL=git-tracker.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"git-tracker.js","sourceRoot":"","sources":["../../src/core/git-tracker.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAa,MAAM,YAAY,CAAC;AAClD,OAAO,EAAE,IAAI,EAAE,MAAM,MAAM,CAAC;AAE5B,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AACtC,OAAO,IAAI,MAAM,MAAM,CAAC;AAExB,MAAM,mBAAmB,GAAG,GAAG,CAAC;AAChC,MAAM,WAAW,GAAG,KAAK,CAAC;AAE1B,SAAS,UAAU,CAAC,KAAe;IACjC,MAAM,OAAO,GAAe,EAAE,CAAC;IAC/B,IAAI,YAAY,GAAa,EAAE,CAAC;IAChC,IAAI,UAAU,GAAG,CAAC,CAAC;IACnB,MAAM,UAAU,GAAG,kCAAkC,CAAC,MAAM,CAAC;IAE7D,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,OAAO,GAAG,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC;QAEhC,IACE,YAAY,CAAC,MAAM,IAAI,mBAAmB;YAC1C,UAAU,GAAG,OAAO,GAAG,WAAW,EAClC,CAAC;YACD,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC5B,OAAO,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;gBAC3B,YAAY,GAAG,EAAE,CAAC;gBAClB,UAAU,GAAG,UAAU,CAAC;YAC1B,CAAC;QACH,CAAC;QAED,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACxB,UAAU,IAAI,OAAO,CAAC;IACxB,CAAC;IAED,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC5B,OAAO,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;IAC7B,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,MAAM,OAAO,UAAU;IACb,GAAG,CAAY;IACf,QAAQ,CAAgB;IACxB,WAAW,CAAW;IACtB,gBAAgB,GAAkB,IAAI,CAAC;IACvC,gBAAgB,GAAmB,IAAI,CAAC;IAEhD,YAAY,QAAuB;QACjC,IAAI,CAAC,GAAG,GAAG,SAAS,EAAE,CAAC;QACvB,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;QACzB,IAAI,CAAC,WAAW,GAAG,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;IACzD,CAAC;IAEO,KAAK,CAAC,cAAc;QAC1B,IAAI,CAAC,IAAI,CAAC,gBAAgB,EAAE,CAAC;YAC3B,IAAI,CAAC;gBACH,IAAI,CAAC,gBAAgB,GAAG,MAAM,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC;YAC5D,CAAC;YAAC,MAAM,CAAC;gBACP,IAAI,CAAC,gBAAgB,GAAG,8CAA8C,CAAC;YACzE,CAAC;QACH,CAAC;QACD,OAAO,IAAI,CAAC,gBAAgB,CAAC;IAC/B,CAAC;IAEO,KAAK,CAAC,qBAAqB;QACjC,IAAI,IAAI,CAAC,gBAAgB,KAAK,IAAI,EAAE,CAAC;YACnC,IAAI,CAAC;gBACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,GAAG,CAAC,MAAM,EAAE,CAAC;gBACvC,IAAI,CAAC,gBAAgB,GAAG,MAAM,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC;YAClD,CAAC;YAAC,MAAM,CAAC;gBACP,IAAI,CAAC,gBAAgB,GAAG,KAAK,CAAC;YAChC,CAAC;QACH,CAAC;QACD,OAAO,IAAI,CAAC,gBAAgB,CAAC;IAC/B,CAAC;IAEO,iBAAiB,CAAC,QAAgB;QACxC,KAAK,MAAM,OAAO,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;YACpC,KAAK,MAAM,OAAO,IAAI,OAAO,CAAC,QAAQ,EAAE,CAAC;gBACvC,IAAI,IAAI,CAAC,cAAc,CAAC,QAAQ,EAAE,OAAO,CAAC,EAAE,CAAC;oBAC3C,OAAO,OAAO,CAAC;gBACjB,CAAC;YACH,CAAC;QACH,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;IAEO,cAAc,CAAC,QAAgB,EAAE,OAAe;QACtD,MAAM,cAAc,GAAG,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC1D,MAAM,iBAAiB,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAE5D,OAAO,SAAS,CAAC,cAAc,EAAE,iBAAiB,CAAC,CAAC;IACtD,CAAC;IAED,KAAK,CAAC,kBAAkB;QACtB,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;QAC5D,OAAO,CAAC,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;IACpC,CAAC;IAED,KAAK,CAAC,eAAe,CAAC,KAAe;QACnC,MAAM,SAAS,GAAG,IAAI,GAAG,EAAkB,CAAC;QAC5C,MAAM,OAAO,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC;QAClC,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,cAAc,EAAE,CAAC;QAEhD,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;YAC5B,IAAI,CAAC;gBACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC;oBAChC,KAAK;oBACL,IAAI;oBACJ,aAAa;oBACb,OAAO;oBACP,IAAI;oBACJ,GAAG,KAAK;iBACT,CAAC,CAAC;gBACH,MAAM,KAAK,GAAG,MAAM,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;gBAExC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,IAAI,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;oBAC1D,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;oBAC7B,IAAI,IAAI,EAAE,CAAC;wBACT,SAAS,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC;oBAChC,CAAC;gBACH,CAAC;gBAED,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;oBACzB,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;wBACzB,SAAS,CAAC,GAAG,CAAC,IAAI,EAAE,WAAW,CAAC,CAAC;oBACnC,CAAC;gBACH,CAAC;YACH,CAAC;YAAC,MAAM,CAAC;gBACP,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;oBACzB,SAAS,CAAC,GAAG,CAAC,IAAI,EAAE,WAAW,CAAC,CAAC;gBACnC,CAAC;YACH,CAAC;QACH,CAAC;QAED,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,KAAK,CAAC,eAAe;QAGnB,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,kBAAkB,EAAE,CAAC;QACjD,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,eAAe,CAAC,QAAQ,CAAC,CAAC;QACvD,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,qBAAqB,EAAE,CAAC;QACpD,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,cAAc,EAAE,CAAC;QAEhD,MAAM,KAAK,GAAG,IAAI,GAAG,EAGlB,CAAC;QAEJ,KAAK,MAAM,IAAI,IAAI,QAAQ,EAAE,CAAC;YAC5B,IAAI,UAAU,GAAG,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,WAAW,CAAC;YACpD,IAAI,QAAQ,EAAE,CAAC;gBACb,UAAU,GAAG,GAAG,UAAU,QAAQ,CAAC;YACrC,CAAC;YAED,MAAM,OAAO,GAAG,IAAI,CAAC,iBAAiB,CAAC,IAAI,CAAC,CAAC;YAC7C,IAAI,OAAO,EAAE,CAAC;gBACZ,KAAK,CAAC,GAAG,CAAC,IAAI,EAAE,EAAE,UAAU,EAAE,OAAO,EAAE,CAAC,CAAC;YAC3C,CAAC;QACH,CAAC;QAED,OAAO,KAAK,CAAC;IACf,CAAC;IAED,KAAK,CAAC,eAAe,CAAC,aAAkC;QAKtD,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,eAAe,EAAE,CAAC;QAC7C,MAAM,SAAS,GAAa,EAAE,CAAC;QAC/B,MAAM,QAAQ,GAAa,EAAE,CAAC;QAC9B,MAAM,SAAS,GAAa,EAAE,CAAC;QAE/B,KAAK,MAAM,CAAC,QAAQ,EAAE,IAAI,CAAC,IAAI,OAAO,EAAE,CAAC;YACvC,MAAM,QAAQ,GAAG,aAAa,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;YAE7C,IAAI,CAAC,QAAQ,EAAE,CAAC;gBACd,OAAO,CAAC,GAAG,CAAC,aAAa,QAAQ,EAAE,CAAC,CAAC;gBACrC,SAAS,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YAC3B,CAAC;iBAAM,IAAI,QAAQ,KAAK,IAAI,CAAC,UAAU,EAAE,CAAC;gBACxC,OAAO,CAAC,GAAG,CACT,iBAAiB,QAAQ,KAAK,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,MAAM,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,CACvF,CAAC;gBACF,SAAS,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YAC3B,CAAC;iBAAM,CAAC;gBACN,SAAS,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YAC3B,CAAC;QACH,CAAC;QAED,KAAK,MAAM,CAAC,QAAQ,CAAC,IAAI,aAAa,EAAE,CAAC;YACvC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC;gBAC3B,OAAO,CAAC,GAAG,CAAC,kBAAkB,QAAQ,EAAE,CAAC,CAAC;gBAC1C,QAAQ,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YAC1B,CAAC;QACH,CAAC;QAED,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,SAAS,EAAE,CAAC;IAC5C,CAAC;CACF"}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import { FileChunker, EmbeddingProvider, VectorStore } from "../interfaces/index.js";
|
|
2
|
+
export interface RAGPipelineConfig {
|
|
3
|
+
chunkers: FileChunker[];
|
|
4
|
+
embedder: EmbeddingProvider;
|
|
5
|
+
vectorStore: VectorStore;
|
|
6
|
+
options?: {
|
|
7
|
+
chunksFile?: string;
|
|
8
|
+
embeddingsFile?: string;
|
|
9
|
+
force?: boolean;
|
|
10
|
+
skipUpload?: boolean;
|
|
11
|
+
rateLimitMs?: number;
|
|
12
|
+
batchSize?: number;
|
|
13
|
+
};
|
|
14
|
+
}
|
|
15
|
+
export declare class Orchestrator {
|
|
16
|
+
private config;
|
|
17
|
+
private chunksFile;
|
|
18
|
+
private embeddingsFile;
|
|
19
|
+
constructor(config: RAGPipelineConfig);
|
|
20
|
+
run(): Promise<void>;
|
|
21
|
+
}
|
|
22
|
+
//# sourceMappingURL=orchestrator.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"orchestrator.d.ts","sourceRoot":"","sources":["../../src/core/orchestrator.ts"],"names":[],"mappings":"AAIA,OAAO,EACL,WAAW,EACX,iBAAiB,EACjB,WAAW,EACZ,MAAM,wBAAwB,CAAC;AAEhC,MAAM,WAAW,iBAAiB;IAChC,QAAQ,EAAE,WAAW,EAAE,CAAC;IACxB,QAAQ,EAAE,iBAAiB,CAAC;IAC5B,WAAW,EAAE,WAAW,CAAC;IACzB,OAAO,CAAC,EAAE;QACR,UAAU,CAAC,EAAE,MAAM,CAAC;QACpB,cAAc,CAAC,EAAE,MAAM,CAAC;QACxB,KAAK,CAAC,EAAE,OAAO,CAAC;QAChB,UAAU,CAAC,EAAE,OAAO,CAAC;QACrB,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,SAAS,CAAC,EAAE,MAAM,CAAC;KACpB,CAAC;CACH;AAED,qBAAa,YAAY;IACvB,OAAO,CAAC,MAAM,CAAoB;IAClC,OAAO,CAAC,UAAU,CAAS;IAC3B,OAAO,CAAC,cAAc,CAAS;gBAEnB,MAAM,EAAE,iBAAiB;IAO/B,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC;CA4D3B"}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import { GitTracker } from "./git-tracker.js";
|
|
2
|
+
import { ChunkProcessor } from "./chunk-processor.js";
|
|
3
|
+
import { EmbedderProcessor } from "./embedder.js";
|
|
4
|
+
import { Uploader } from "./uploader.js";
|
|
5
|
+
export class Orchestrator {
|
|
6
|
+
config;
|
|
7
|
+
chunksFile;
|
|
8
|
+
embeddingsFile;
|
|
9
|
+
constructor(config) {
|
|
10
|
+
this.config = config;
|
|
11
|
+
this.chunksFile = config.options?.chunksFile || "./docs/rag/chunks.json";
|
|
12
|
+
this.embeddingsFile =
|
|
13
|
+
config.options?.embeddingsFile || "./docs/rag/embeddings.json";
|
|
14
|
+
}
|
|
15
|
+
async run() {
|
|
16
|
+
console.log("đ Starting RAG pipeline...\n");
|
|
17
|
+
console.log("đ Step 1: Scanning for changes...");
|
|
18
|
+
const gitTracker = new GitTracker(this.config.chunkers);
|
|
19
|
+
const currentState = await gitTracker.getCurrentState();
|
|
20
|
+
const previousState = new Map();
|
|
21
|
+
const { toProcess, toDelete } = await gitTracker.getChangedFiles(previousState);
|
|
22
|
+
if (toProcess.length === 0 &&
|
|
23
|
+
toDelete.length === 0 &&
|
|
24
|
+
!this.config.options?.force) {
|
|
25
|
+
console.log("\n⨠No changes detected.");
|
|
26
|
+
return;
|
|
27
|
+
}
|
|
28
|
+
console.log(`\nđ Changes: ${toProcess.length} to process, ${toDelete.length} to delete\n`);
|
|
29
|
+
console.log("đŞ Step 2: Generating chunks...");
|
|
30
|
+
const chunkProcessor = new ChunkProcessor(this.config.chunkers);
|
|
31
|
+
const fileState = new Map();
|
|
32
|
+
for (const file of toProcess) {
|
|
33
|
+
const info = currentState.get(file);
|
|
34
|
+
if (info)
|
|
35
|
+
fileState.set(file, info);
|
|
36
|
+
}
|
|
37
|
+
const chunks = await chunkProcessor.processFiles(toProcess, fileState);
|
|
38
|
+
await chunkProcessor.saveChunksLocal(chunks, this.chunksFile);
|
|
39
|
+
if (chunks.length === 0) {
|
|
40
|
+
console.log("\nâ ď¸ No chunks generated. Exiting.");
|
|
41
|
+
return;
|
|
42
|
+
}
|
|
43
|
+
console.log("\nđ˘ Step 3: Generating embeddings...");
|
|
44
|
+
const embedder = new EmbedderProcessor(this.config.embedder, {
|
|
45
|
+
rateLimitMs: this.config.options?.rateLimitMs,
|
|
46
|
+
batchSize: this.config.options?.batchSize,
|
|
47
|
+
});
|
|
48
|
+
await embedder.run(this.chunksFile, this.config.options?.force || false);
|
|
49
|
+
if (!this.config.options?.skipUpload) {
|
|
50
|
+
console.log("\nđ¤ Step 4: Uploading to vector store...");
|
|
51
|
+
const uploader = new Uploader(this.config.vectorStore);
|
|
52
|
+
await uploader.sync(this.embeddingsFile, this.config.options?.force || false);
|
|
53
|
+
}
|
|
54
|
+
console.log("\n⨠RAG pipeline complete!");
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
//# sourceMappingURL=orchestrator.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"orchestrator.js","sourceRoot":"","sources":["../../src/core/orchestrator.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAC;AAC9C,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AACtD,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AAClD,OAAO,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AAqBzC,MAAM,OAAO,YAAY;IACf,MAAM,CAAoB;IAC1B,UAAU,CAAS;IACnB,cAAc,CAAS;IAE/B,YAAY,MAAyB;QACnC,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;QACrB,IAAI,CAAC,UAAU,GAAG,MAAM,CAAC,OAAO,EAAE,UAAU,IAAI,wBAAwB,CAAC;QACzE,IAAI,CAAC,cAAc;YACjB,MAAM,CAAC,OAAO,EAAE,cAAc,IAAI,4BAA4B,CAAC;IACnE,CAAC;IAED,KAAK,CAAC,GAAG;QACP,OAAO,CAAC,GAAG,CAAC,+BAA+B,CAAC,CAAC;QAE7C,OAAO,CAAC,GAAG,CAAC,oCAAoC,CAAC,CAAC;QAClD,MAAM,UAAU,GAAG,IAAI,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QACxD,MAAM,YAAY,GAAG,MAAM,UAAU,CAAC,eAAe,EAAE,CAAC;QAExD,MAAM,aAAa,GAAG,IAAI,GAAG,EAAkB,CAAC;QAChD,MAAM,EAAE,SAAS,EAAE,QAAQ,EAAE,GAC3B,MAAM,UAAU,CAAC,eAAe,CAAC,aAAa,CAAC,CAAC;QAElD,IACE,SAAS,CAAC,MAAM,KAAK,CAAC;YACtB,QAAQ,CAAC,MAAM,KAAK,CAAC;YACrB,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,KAAK,EAC3B,CAAC;YACD,OAAO,CAAC,GAAG,CAAC,0BAA0B,CAAC,CAAC;YACxC,OAAO;QACT,CAAC;QAED,OAAO,CAAC,GAAG,CACT,iBAAiB,SAAS,CAAC,MAAM,gBAAgB,QAAQ,CAAC,MAAM,cAAc,CAC/E,CAAC;QAEF,OAAO,CAAC,GAAG,CAAC,iCAAiC,CAAC,CAAC;QAC/C,MAAM,cAAc,GAAG,IAAI,cAAc,CAAC,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QAEhE,MAAM,SAAS,GAAG,IAAI,GAAG,EAAE,CAAC;QAC5B,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;YAC7B,MAAM,IAAI,GAAG,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;YACpC,IAAI,IAAI;gBAAE,SAAS,CAAC,GAAG,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;QACtC,CAAC;QAED,MAAM,MAAM,GAAG,MAAM,cAAc,CAAC,YAAY,CAAC,SAAS,EAAE,SAAS,CAAC,CAAC;QACvE,MAAM,cAAc,CAAC,eAAe,CAAC,MAAM,EAAE,IAAI,CAAC,UAAU,CAAC,CAAC;QAE9D,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACxB,OAAO,CAAC,GAAG,CAAC,oCAAoC,CAAC,CAAC;YAClD,OAAO;QACT,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,uCAAuC,CAAC,CAAC;QACrD,MAAM,QAAQ,GAAG,IAAI,iBAAiB,CAAC,IAAI,CAAC,MAAM,CAAC,QAAQ,EAAE;YAC3D,WAAW,EAAE,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,WAAW;YAC7C,SAAS,EAAE,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,SAAS;SAC1C,CAAC,CAAC;QAEH,MAAM,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,UAAU,EAAE,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,KAAK,IAAI,KAAK,CAAC,CAAC;QAEzE,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,UAAU,EAAE,CAAC;YACrC,OAAO,CAAC,GAAG,CAAC,2CAA2C,CAAC,CAAC;YACzD,MAAM,QAAQ,GAAG,IAAI,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC;YACvD,MAAM,QAAQ,CAAC,IAAI,CACjB,IAAI,CAAC,cAAc,EACnB,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,KAAK,IAAI,KAAK,CACpC,CAAC;QACJ,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,4BAA4B,CAAC,CAAC;IAC5C,CAAC;CACF"}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import { VectorStore, EmbeddedChunk } from "../interfaces/index.js";
|
|
2
|
+
export declare class Uploader {
|
|
3
|
+
private vectorStore;
|
|
4
|
+
constructor(vectorStore: VectorStore);
|
|
5
|
+
private chunkToDocument;
|
|
6
|
+
getItemsToUpload(embeddingsFile: string, force?: boolean): Promise<{
|
|
7
|
+
toUpload: EmbeddedChunk[];
|
|
8
|
+
toDelete: string[];
|
|
9
|
+
}>;
|
|
10
|
+
sync(embeddingsFile: string, force?: boolean): Promise<{
|
|
11
|
+
uploaded: number;
|
|
12
|
+
deleted: number;
|
|
13
|
+
}>;
|
|
14
|
+
}
|
|
15
|
+
//# sourceMappingURL=uploader.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"uploader.d.ts","sourceRoot":"","sources":["../../src/core/uploader.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,WAAW,EAEX,aAAa,EACd,MAAM,wBAAwB,CAAC;AAGhC,qBAAa,QAAQ;IACnB,OAAO,CAAC,WAAW,CAAc;gBAErB,WAAW,EAAE,WAAW;IAIpC,OAAO,CAAC,eAAe;IAejB,gBAAgB,CACpB,cAAc,EAAE,MAAM,EACtB,KAAK,GAAE,OAAe,GACrB,OAAO,CAAC;QACT,QAAQ,EAAE,aAAa,EAAE,CAAC;QAC1B,QAAQ,EAAE,MAAM,EAAE,CAAC;KACpB,CAAC;IAuCI,IAAI,CACR,cAAc,EAAE,MAAM,EACtB,KAAK,GAAE,OAAe,GACrB,OAAO,CAAC;QACT,QAAQ,EAAE,MAAM,CAAC;QACjB,OAAO,EAAE,MAAM,CAAC;KACjB,CAAC;CA0CH"}
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
import { readFile } from "fs/promises";
|
|
2
|
+
export class Uploader {
|
|
3
|
+
vectorStore;
|
|
4
|
+
constructor(vectorStore) {
|
|
5
|
+
this.vectorStore = vectorStore;
|
|
6
|
+
}
|
|
7
|
+
chunkToDocument(chunk, collection) {
|
|
8
|
+
return {
|
|
9
|
+
content: chunk.content,
|
|
10
|
+
metadata: chunk.metadata,
|
|
11
|
+
embedding: chunk.embedding,
|
|
12
|
+
sourceFile: chunk.sourceFile,
|
|
13
|
+
commitHash: chunk.commitHash,
|
|
14
|
+
contentHash: chunk.contentHash,
|
|
15
|
+
collection,
|
|
16
|
+
};
|
|
17
|
+
}
|
|
18
|
+
async getItemsToUpload(embeddingsFile, force = false) {
|
|
19
|
+
let embeddings;
|
|
20
|
+
try {
|
|
21
|
+
const content = await readFile(embeddingsFile, "utf-8");
|
|
22
|
+
embeddings = JSON.parse(content);
|
|
23
|
+
}
|
|
24
|
+
catch {
|
|
25
|
+
throw new Error(`Embeddings file not found: ${embeddingsFile}`);
|
|
26
|
+
}
|
|
27
|
+
console.log(`đ Loaded ${embeddings.length} embeddings from ${embeddingsFile}`);
|
|
28
|
+
if (force) {
|
|
29
|
+
const allSourceFiles = [...new Set(embeddings.map((e) => e.sourceFile))];
|
|
30
|
+
return { toUpload: embeddings, toDelete: allSourceFiles };
|
|
31
|
+
}
|
|
32
|
+
const existingState = await this.vectorStore.getCurrentState();
|
|
33
|
+
const toUploadList = [];
|
|
34
|
+
const toDeleteSet = new Set();
|
|
35
|
+
for (const emb of embeddings) {
|
|
36
|
+
const existingHash = existingState.get(emb.sourceFile);
|
|
37
|
+
if (!existingHash) {
|
|
38
|
+
toUploadList.push(emb);
|
|
39
|
+
}
|
|
40
|
+
else if (existingHash !== emb.commitHash) {
|
|
41
|
+
toDeleteSet.add(emb.sourceFile);
|
|
42
|
+
toUploadList.push(emb);
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
return {
|
|
46
|
+
toUpload: toUploadList,
|
|
47
|
+
toDelete: [...toDeleteSet],
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
async sync(embeddingsFile, force = false) {
|
|
51
|
+
console.log("đ¤ Starting incremental upload...");
|
|
52
|
+
await this.vectorStore.initialize();
|
|
53
|
+
const { toUpload, toDelete } = await this.getItemsToUpload(embeddingsFile, force);
|
|
54
|
+
console.log(`\nđ Need to upload: ${toUpload.length} documents`);
|
|
55
|
+
console.log(` Need to delete: ${toDelete.length} files`);
|
|
56
|
+
if (toUpload.length === 0 && toDelete.length === 0) {
|
|
57
|
+
console.log("\n⨠No changes detected.");
|
|
58
|
+
return { uploaded: 0, deleted: 0 };
|
|
59
|
+
}
|
|
60
|
+
if (toDelete.length > 0) {
|
|
61
|
+
await this.vectorStore.deleteBySourceFile(toDelete);
|
|
62
|
+
console.log(` đď¸ Deleted ${toDelete.length} obsolete documents`);
|
|
63
|
+
}
|
|
64
|
+
if (toUpload.length > 0) {
|
|
65
|
+
const documents = toUpload.map((e) => this.chunkToDocument(e));
|
|
66
|
+
const batchSize = 50;
|
|
67
|
+
for (let i = 0; i < documents.length; i += batchSize) {
|
|
68
|
+
const batch = documents.slice(i, i + batchSize);
|
|
69
|
+
await this.vectorStore.upsert(batch);
|
|
70
|
+
console.log(` â
Uploaded batch ${Math.floor(i / batchSize) + 1}/${Math.ceil(documents.length / batchSize)}`);
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
console.log(`\n⨠Upload complete!`);
|
|
74
|
+
console.log(` Uploaded: ${toUpload.length}`);
|
|
75
|
+
console.log(` Deleted: ${toDelete.length}`);
|
|
76
|
+
return { uploaded: toUpload.length, deleted: toDelete.length };
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
//# sourceMappingURL=uploader.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"uploader.js","sourceRoot":"","sources":["../../src/core/uploader.ts"],"names":[],"mappings":"AAKA,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAEvC,MAAM,OAAO,QAAQ;IACX,WAAW,CAAc;IAEjC,YAAY,WAAwB;QAClC,IAAI,CAAC,WAAW,GAAG,WAAW,CAAC;IACjC,CAAC;IAEO,eAAe,CACrB,KAAoB,EACpB,UAAmB;QAEnB,OAAO;YACL,OAAO,EAAE,KAAK,CAAC,OAAO;YACtB,QAAQ,EAAE,KAAK,CAAC,QAAQ;YACxB,SAAS,EAAE,KAAK,CAAC,SAAS;YAC1B,UAAU,EAAE,KAAK,CAAC,UAAU;YAC5B,UAAU,EAAE,KAAK,CAAC,UAAU;YAC5B,WAAW,EAAE,KAAK,CAAC,WAAY;YAC/B,UAAU;SACX,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,gBAAgB,CACpB,cAAsB,EACtB,QAAiB,KAAK;QAKtB,IAAI,UAA2B,CAAC;QAChC,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,cAAc,EAAE,OAAO,CAAC,CAAC;YACxD,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QACnC,CAAC;QAAC,MAAM,CAAC;YACP,MAAM,IAAI,KAAK,CAAC,8BAA8B,cAAc,EAAE,CAAC,CAAC;QAClE,CAAC;QAED,OAAO,CAAC,GAAG,CACT,aAAa,UAAU,CAAC,MAAM,oBAAoB,cAAc,EAAE,CACnE,CAAC;QAEF,IAAI,KAAK,EAAE,CAAC;YACV,MAAM,cAAc,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;YACzE,OAAO,EAAE,QAAQ,EAAE,UAAU,EAAE,QAAQ,EAAE,cAAc,EAAE,CAAC;QAC5D,CAAC;QAED,MAAM,aAAa,GAAG,MAAM,IAAI,CAAC,WAAW,CAAC,eAAe,EAAE,CAAC;QAC/D,MAAM,YAAY,GAAoB,EAAE,CAAC;QACzC,MAAM,WAAW,GAAG,IAAI,GAAG,EAAU,CAAC;QAEtC,KAAK,MAAM,GAAG,IAAI,UAAU,EAAE,CAAC;YAC7B,MAAM,YAAY,GAAG,aAAa,CAAC,GAAG,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;YAEvD,IAAI,CAAC,YAAY,EAAE,CAAC;gBAClB,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACzB,CAAC;iBAAM,IAAI,YAAY,KAAK,GAAG,CAAC,UAAU,EAAE,CAAC;gBAC3C,WAAW,CAAC,GAAG,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;gBAChC,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACzB,CAAC;QACH,CAAC;QAED,OAAO;YACL,QAAQ,EAAE,YAAY;YACtB,QAAQ,EAAE,CAAC,GAAG,WAAW,CAAC;SAC3B,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,IAAI,CACR,cAAsB,EACtB,QAAiB,KAAK;QAKtB,OAAO,CAAC,GAAG,CAAC,mCAAmC,CAAC,CAAC;QAEjD,MAAM,IAAI,CAAC,WAAW,CAAC,UAAU,EAAE,CAAC;QAEpC,MAAM,EAAE,QAAQ,EAAE,QAAQ,EAAE,GAAG,MAAM,IAAI,CAAC,gBAAgB,CACxD,cAAc,EACd,KAAK,CACN,CAAC;QAEF,OAAO,CAAC,GAAG,CAAC,wBAAwB,QAAQ,CAAC,MAAM,YAAY,CAAC,CAAC;QACjE,OAAO,CAAC,GAAG,CAAC,sBAAsB,QAAQ,CAAC,MAAM,QAAQ,CAAC,CAAC;QAE3D,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACnD,OAAO,CAAC,GAAG,CAAC,0BAA0B,CAAC,CAAC;YACxC,OAAO,EAAE,QAAQ,EAAE,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,CAAC;QACrC,CAAC;QAED,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACxB,MAAM,IAAI,CAAC,WAAW,CAAC,kBAAkB,CAAC,QAAQ,CAAC,CAAC;YACpD,OAAO,CAAC,GAAG,CAAC,iBAAiB,QAAQ,CAAC,MAAM,qBAAqB,CAAC,CAAC;QACrE,CAAC;QAED,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACxB,MAAM,SAAS,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC,CAAC;YAE/D,MAAM,SAAS,GAAG,EAAE,CAAC;YACrB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC,IAAI,SAAS,EAAE,CAAC;gBACrD,MAAM,KAAK,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,CAAC;gBAChD,MAAM,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;gBACrC,OAAO,CAAC,GAAG,CACT,sBAAsB,IAAI,CAAC,KAAK,CAAC,CAAC,GAAG,SAAS,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,GAAG,SAAS,CAAC,EAAE,CACjG,CAAC;YACJ,CAAC;QACH,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,sBAAsB,CAAC,CAAC;QACpC,OAAO,CAAC,GAAG,CAAC,gBAAgB,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;QAC/C,OAAO,CAAC,GAAG,CAAC,eAAe,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;QAE9C,OAAO,EAAE,QAAQ,EAAE,QAAQ,CAAC,MAAM,EAAE,OAAO,EAAE,QAAQ,CAAC,MAAM,EAAE,CAAC;IACjE,CAAC;CACF"}
|