@vivantel/rag-core 1.1.2 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bin/rag-update.d.ts +3 -0
- package/dist/bin/rag-update.d.ts.map +1 -0
- package/dist/bin/rag-update.js +116 -0
- package/dist/bin/rag-update.js.map +1 -0
- package/dist/cli/init.d.ts +2 -0
- package/dist/cli/init.d.ts.map +1 -0
- package/dist/cli/init.js +262 -0
- package/dist/cli/init.js.map +1 -0
- package/dist/cli/validate.d.ts +2 -0
- package/dist/cli/validate.d.ts.map +1 -0
- package/dist/cli/validate.js +54 -0
- package/dist/cli/validate.js.map +1 -0
- package/dist/config-loader.d.ts.map +1 -1
- package/dist/config-loader.js +73 -7
- package/dist/config-loader.js.map +1 -1
- package/dist/core/chunk-processor.d.ts +1 -1
- package/dist/core/chunk-processor.d.ts.map +1 -1
- package/dist/core/chunk-processor.js +50 -21
- package/dist/core/chunk-processor.js.map +1 -1
- package/dist/core/embedder.d.ts +5 -1
- package/dist/core/embedder.d.ts.map +1 -1
- package/dist/core/embedder.js +40 -29
- package/dist/core/embedder.js.map +1 -1
- package/dist/core/errors.d.ts +16 -0
- package/dist/core/errors.d.ts.map +1 -0
- package/dist/core/errors.js +17 -0
- package/dist/core/errors.js.map +1 -0
- package/dist/core/git-tracker.d.ts.map +1 -1
- package/dist/core/git-tracker.js +9 -59
- package/dist/core/git-tracker.js.map +1 -1
- package/dist/core/orchestrator.d.ts +8 -0
- package/dist/core/orchestrator.d.ts.map +1 -1
- package/dist/core/orchestrator.js +153 -37
- package/dist/core/orchestrator.js.map +1 -1
- package/dist/core/plugin-discovery.d.ts +19 -0
- package/dist/core/plugin-discovery.d.ts.map +1 -0
- package/dist/core/plugin-discovery.js +47 -0
- package/dist/core/plugin-discovery.js.map +1 -0
- package/dist/core/telemetry.d.ts +61 -0
- package/dist/core/telemetry.d.ts.map +1 -0
- package/dist/core/telemetry.js +50 -0
- package/dist/core/telemetry.js.map +1 -0
- package/dist/core/uploader.d.ts +5 -1
- package/dist/core/uploader.d.ts.map +1 -1
- package/dist/core/uploader.js +23 -7
- package/dist/core/uploader.js.map +1 -1
- package/dist/core/utils.d.ts +7 -0
- package/dist/core/utils.d.ts.map +1 -1
- package/dist/core/utils.js +35 -0
- package/dist/core/utils.js.map +1 -1
- package/dist/index.d.ts +14 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +14 -2
- package/dist/index.js.map +1 -1
- package/dist/interfaces/embedder.d.ts +2 -0
- package/dist/interfaces/embedder.d.ts.map +1 -1
- package/dist/interfaces/vector-store.d.ts +2 -0
- package/dist/interfaces/vector-store.d.ts.map +1 -1
- package/dist/strategies/chunk/token.js +1 -1
- package/dist/strategies/chunk/token.js.map +1 -1
- package/package.json +12 -2
- package/.github/config/release-please.json +0 -38
- package/.github/dependabot.yaml +0 -28
- package/.github/workflows/ci.yaml +0 -119
- package/.github/workflows/publish.yaml +0 -155
- package/.github/workflows/release.yaml +0 -54
- package/.release-please-manifest.json +0 -3
- package/.versionrc.json +0 -19
- package/CHANGELOG.md +0 -51
- package/bin/rag-update.ts +0 -49
- package/eslint.config.js +0 -25
- package/src/config-loader.ts +0 -21
- package/src/core/chunk-processor.test.ts +0 -36
- package/src/core/chunk-processor.ts +0 -92
- package/src/core/embedder.ts +0 -189
- package/src/core/git-tracker.test.ts +0 -64
- package/src/core/git-tracker.ts +0 -202
- package/src/core/orchestrator.test.ts +0 -53
- package/src/core/orchestrator.ts +0 -97
- package/src/core/uploader.ts +0 -123
- package/src/core/utils.ts +0 -27
- package/src/helpers/create-chunker.test.ts +0 -31
- package/src/helpers/create-chunker.ts +0 -40
- package/src/index.test.ts +0 -33
- package/src/index.ts +0 -30
- package/src/interfaces/chunker.ts +0 -59
- package/src/interfaces/embedder.ts +0 -36
- package/src/interfaces/index.test.ts +0 -9
- package/src/interfaces/index.ts +0 -3
- package/src/interfaces/vector-store.ts +0 -71
- package/src/strategies/chunk/index.ts +0 -4
- package/src/strategies/chunk/markdown-headers.test.ts +0 -37
- package/src/strategies/chunk/markdown-headers.ts +0 -106
- package/src/strategies/chunk/semantic.test.ts +0 -21
- package/src/strategies/chunk/semantic.ts +0 -80
- package/src/strategies/chunk/token.test.ts +0 -41
- package/src/strategies/chunk/token.ts +0 -72
- package/src/strategies/chunk/whole-file.test.ts +0 -24
- package/src/strategies/chunk/whole-file.ts +0 -35
- package/tsconfig.json +0 -21
- package/typedoc.json +0 -11
- package/vitest.config.ts +0 -19
|
@@ -8,22 +8,31 @@ export class ChunkProcessor {
|
|
|
8
8
|
this.chunkers = new Map(chunkers.map((c) => [c.name, c]));
|
|
9
9
|
}
|
|
10
10
|
async processFile(filePath, commitHash, chunker) {
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
chunk.commitHash = commitHash;
|
|
17
|
-
}
|
|
18
|
-
return chunks;
|
|
19
|
-
}
|
|
20
|
-
catch (error) {
|
|
21
|
-
console.error(` ❌ Error processing ${filePath}: ${error}`);
|
|
22
|
-
return [];
|
|
11
|
+
const chunks = await chunker.chunk(filePath, commitHash);
|
|
12
|
+
for (const chunk of chunks) {
|
|
13
|
+
chunk.contentHash = computeContentHash(chunk.content);
|
|
14
|
+
chunk.sourceFile = filePath;
|
|
15
|
+
chunk.commitHash = commitHash;
|
|
23
16
|
}
|
|
17
|
+
return chunks;
|
|
24
18
|
}
|
|
25
|
-
async processFiles(files, fileState) {
|
|
19
|
+
async processFiles(files, fileState, existingChunks = []) {
|
|
26
20
|
const allChunks = [];
|
|
21
|
+
let errorCount = 0;
|
|
22
|
+
// Build resume cache: sourceFile → { commitHash, chunks }
|
|
23
|
+
const resumeCache = new Map();
|
|
24
|
+
for (const chunk of existingChunks) {
|
|
25
|
+
const entry = resumeCache.get(chunk.sourceFile);
|
|
26
|
+
if (!entry) {
|
|
27
|
+
resumeCache.set(chunk.sourceFile, {
|
|
28
|
+
commitHash: chunk.commitHash,
|
|
29
|
+
chunks: [chunk],
|
|
30
|
+
});
|
|
31
|
+
}
|
|
32
|
+
else {
|
|
33
|
+
entry.chunks.push(chunk);
|
|
34
|
+
}
|
|
35
|
+
}
|
|
27
36
|
for (let i = 0; i < files.length; i++) {
|
|
28
37
|
const filePath = files[i];
|
|
29
38
|
const info = fileState.get(filePath);
|
|
@@ -31,16 +40,33 @@ export class ChunkProcessor {
|
|
|
31
40
|
console.log(` ⚠️ No chunker for: ${filePath}`);
|
|
32
41
|
continue;
|
|
33
42
|
}
|
|
43
|
+
// Resume: reuse cached chunks when commitHash matches
|
|
44
|
+
const cached = resumeCache.get(filePath);
|
|
45
|
+
if (cached && cached.commitHash === info.commitHash) {
|
|
46
|
+
console.log(` [${i + 1}/${files.length}] ${filePath}`);
|
|
47
|
+
console.log(` ⏭️ Cached (${cached.chunks.length} chunk(s))`);
|
|
48
|
+
allChunks.push(...cached.chunks);
|
|
49
|
+
continue;
|
|
50
|
+
}
|
|
34
51
|
console.log(` [${i + 1}/${files.length}] ${filePath}`);
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
52
|
+
try {
|
|
53
|
+
const chunks = await this.processFile(filePath, info.commitHash, info.chunker);
|
|
54
|
+
if (chunks.length > 0) {
|
|
55
|
+
allChunks.push(...chunks);
|
|
56
|
+
console.log(` ✅ Generated ${chunks.length} chunk(s)`);
|
|
57
|
+
}
|
|
58
|
+
else {
|
|
59
|
+
console.log(` ⚠️ No chunks generated (skipped)`);
|
|
60
|
+
}
|
|
39
61
|
}
|
|
40
|
-
|
|
41
|
-
|
|
62
|
+
catch (error) {
|
|
63
|
+
errorCount++;
|
|
64
|
+
console.error(` ❌ Error processing ${filePath}: ${error}`);
|
|
42
65
|
}
|
|
43
66
|
}
|
|
67
|
+
if (errorCount > 0) {
|
|
68
|
+
console.warn(`\n⚠️ ${errorCount} file(s) failed during chunking.`);
|
|
69
|
+
}
|
|
44
70
|
return allChunks;
|
|
45
71
|
}
|
|
46
72
|
async saveChunksLocal(chunks, outputFile) {
|
|
@@ -50,10 +76,13 @@ export class ChunkProcessor {
|
|
|
50
76
|
let existing = [];
|
|
51
77
|
try {
|
|
52
78
|
const content = await readFile(outputFile, "utf-8");
|
|
53
|
-
|
|
79
|
+
const parsed = JSON.parse(content);
|
|
80
|
+
if (Array.isArray(parsed)) {
|
|
81
|
+
existing = parsed;
|
|
82
|
+
}
|
|
54
83
|
}
|
|
55
84
|
catch {
|
|
56
|
-
// File doesn't exist
|
|
85
|
+
// File doesn't exist or is not valid JSON — start fresh
|
|
57
86
|
}
|
|
58
87
|
const processedFiles = new Set(chunks.map((c) => c.sourceFile));
|
|
59
88
|
const filtered = existing.filter((c) => !processedFiles.has(c.sourceFile));
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"chunk-processor.js","sourceRoot":"","sources":["../../src/core/chunk-processor.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAEpC,SAAS,kBAAkB,CAAC,OAAe;IACzC,OAAO,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;AACzE,CAAC;AAED,MAAM,OAAO,cAAc;IACjB,QAAQ,CAA2B;IAE3C,YAAY,QAAuB;QACjC,IAAI,CAAC,QAAQ,GAAG,IAAI,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;IAC5D,CAAC;IAED,KAAK,CAAC,WAAW,CACf,QAAgB,EAChB,UAAkB,EAClB,OAAoB;QAEpB,
|
|
1
|
+
{"version":3,"file":"chunk-processor.js","sourceRoot":"","sources":["../../src/core/chunk-processor.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAEpC,SAAS,kBAAkB,CAAC,OAAe;IACzC,OAAO,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;AACzE,CAAC;AAED,MAAM,OAAO,cAAc;IACjB,QAAQ,CAA2B;IAE3C,YAAY,QAAuB;QACjC,IAAI,CAAC,QAAQ,GAAG,IAAI,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;IAC5D,CAAC;IAED,KAAK,CAAC,WAAW,CACf,QAAgB,EAChB,UAAkB,EAClB,OAAoB;QAEpB,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,KAAK,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;QAEzD,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC3B,KAAK,CAAC,WAAW,GAAG,kBAAkB,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YACtD,KAAK,CAAC,UAAU,GAAG,QAAQ,CAAC;YAC5B,KAAK,CAAC,UAAU,GAAG,UAAU,CAAC;QAChC,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,KAAK,CAAC,YAAY,CAChB,KAAe,EACf,SAAoE,EACpE,iBAA0B,EAAE;QAE5B,MAAM,SAAS,GAAY,EAAE,CAAC;QAC9B,IAAI,UAAU,GAAG,CAAC,CAAC;QAEnB,0DAA0D;QAC1D,MAAM,WAAW,GAAG,IAAI,GAAG,EAGxB,CAAC;QACJ,KAAK,MAAM,KAAK,IAAI,cAAc,EAAE,CAAC;YACnC,MAAM,KAAK,GAAG,WAAW,CAAC,GAAG,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC;YAChD,IAAI,CAAC,KAAK,EAAE,CAAC;gBACX,WAAW,CAAC,GAAG,CAAC,KAAK,CAAC,UAAU,EAAE;oBAChC,UAAU,EAAE,KAAK,CAAC,UAAU;oBAC5B,MAAM,EAAE,CAAC,KAAK,CAAC;iBAChB,CAAC,CAAC;YACL,CAAC;iBAAM,CAAC;gBACN,KAAK,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YAC3B,CAAC;QACH,CAAC;QAED,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,MAAM,QAAQ,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YAC1B,MAAM,IAAI,GAAG,SAAS,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;YAErC,IAAI,CAAC,IAAI,EAAE,CAAC;gBACV,OAAO,CAAC,GAAG,CAAC,wBAAwB,QAAQ,EAAE,CAAC,CAAC;gBAChD,SAAS;YACX,CAAC;YAED,sDAAsD;YACtD,MAAM,MAAM,GAAG,WAAW,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;YACzC,IAAI,MAAM,IAAI,MAAM,CAAC,UAAU,KAAK,IAAI,CAAC,UAAU,EAAE,CAAC;gBACpD,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,MAAM,KAAK,QAAQ,EAAE,CAAC,CAAC;gBACxD,OAAO,CAAC,GAAG,CAAC,mBAAmB,MAAM,CAAC,MAAM,CAAC,MAAM,YAAY,CAAC,CAAC;gBACjE,SAAS,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC;gBACjC,SAAS;YACX,CAAC;YAED,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,MAAM,KAAK,QAAQ,EAAE,CAAC,CAAC;YAExD,IAAI,CAAC;gBACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,WAAW,CACnC,QAAQ,EACR,IAAI,CAAC,UAAU,EACf,IAAI,CAAC,OAAO,CACb,CAAC;gBAEF,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBACtB,SAAS,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,CAAC;oBAC1B,OAAO,CAAC,GAAG,CAAC,mBAAmB,MAAM,CAAC,MAAM,WAAW,CAAC,CAAC;gBAC3D,CAAC;qBAAM,CAAC;oBACN,OAAO,CAAC,GAAG,CAAC,sCAAsC,CAAC,CAAC;gBACtD,CAAC;YACH,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,UAAU,EAAE,CAAC;gBACb,OAAO,CAAC,KAAK,CAAC,0BAA0B,QAAQ,KAAK,KAAK,EAAE,CAAC,CAAC;YAChE,CAAC;QACH,CAAC;QAED,IAAI,UAAU,GAAG,CAAC,EAAE,CAAC;YACnB,OAAO,CAAC,IAAI,CAAC,QAAQ,UAAU,kCAAkC,CAAC,CAAC;QACrE,CAAC;QAED,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,KAAK,CAAC,eAAe,CAAC,MAAe,EAAE,UAAkB;QACvD,MAAM,EAAE,OAAO,EAAE,GAAG,MAAM,MAAM,CAAC,MAAM,CAAC,CAAC;QACzC,MAAM,EAAE,KAAK,EAAE,SAAS,EAAE,QAAQ,EAAE,GAAG,MAAM,MAAM,CAAC,aAAa,CAAC,CAAC;QAEnE,MAAM,KAAK,CAAC,OAAO,CAAC,UAAU,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAEtD,IAAI,QAAQ,GAAY,EAAE,CAAC;QAC3B,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,UAAU,EAAE,OAAO,CAAC,CAAC;YACpD,MAAM,MAAM,GAAY,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YAC5C,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;gBAC1B,QAAQ,GAAG,MAAiB,CAAC;YAC/B,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,wDAAwD;QAC1D,CAAC;QAED,MAAM,cAAc,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC;QAChE,MAAM,QAAQ,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC;QAE3E,MAAM,SAAS,GAAG,CAAC,GAAG,QAAQ,EAAE,GAAG,MAAM,CAAC,CAAC;QAE3C,MAAM,SAAS,CAAC,UAAU,EAAE,IAAI,CAAC,SAAS,CAAC,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;QAChE,OAAO,CAAC,GAAG,CAAC,cAAc,SAAS,CAAC,MAAM,cAAc,UAAU,EAAE,CAAC,CAAC;IACxE,CAAC;CACF"}
|
package/dist/core/embedder.d.ts
CHANGED
|
@@ -1,13 +1,17 @@
|
|
|
1
1
|
import { EmbeddingProvider, EmbeddedChunk, Chunk } from "../interfaces/index.js";
|
|
2
|
+
import { RetryOptions } from "./utils.js";
|
|
2
3
|
export declare class EmbedderProcessor {
|
|
3
4
|
private provider;
|
|
4
5
|
private rateLimitMs;
|
|
5
6
|
private batchSize;
|
|
7
|
+
private retryOptions;
|
|
8
|
+
private concurrency;
|
|
6
9
|
constructor(provider: EmbeddingProvider, options?: {
|
|
7
10
|
rateLimitMs?: number;
|
|
8
11
|
batchSize?: number;
|
|
12
|
+
retry?: RetryOptions;
|
|
13
|
+
concurrency?: number;
|
|
9
14
|
});
|
|
10
|
-
private sleep;
|
|
11
15
|
embedChunk(chunk: Chunk): Promise<EmbeddedChunk>;
|
|
12
16
|
embedBatch(chunks: Chunk[]): Promise<EmbeddedChunk[]>;
|
|
13
17
|
getChunksToEmbed(chunksFile: string, force?: boolean): Promise<{
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"embedder.d.ts","sourceRoot":"","sources":["../../src/core/embedder.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,iBAAiB,EACjB,aAAa,EACb,KAAK,EACN,MAAM,wBAAwB,CAAC;
|
|
1
|
+
{"version":3,"file":"embedder.d.ts","sourceRoot":"","sources":["../../src/core/embedder.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,iBAAiB,EACjB,aAAa,EACb,KAAK,EACN,MAAM,wBAAwB,CAAC;AAKhC,OAAO,EAAqC,YAAY,EAAE,MAAM,YAAY,CAAC;AAO7E,qBAAa,iBAAiB;IAC5B,OAAO,CAAC,QAAQ,CAAoB;IACpC,OAAO,CAAC,WAAW,CAAS;IAC5B,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,YAAY,CAAe;IACnC,OAAO,CAAC,WAAW,CAAS;gBAG1B,QAAQ,EAAE,iBAAiB,EAC3B,OAAO,GAAE;QACP,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,KAAK,CAAC,EAAE,YAAY,CAAC;QACrB,WAAW,CAAC,EAAE,MAAM,CAAC;KACjB;IASF,UAAU,CAAC,KAAK,EAAE,KAAK,GAAG,OAAO,CAAC,aAAa,CAAC;IAahD,UAAU,CAAC,MAAM,EAAE,KAAK,EAAE,GAAG,OAAO,CAAC,aAAa,EAAE,CAAC;IA+CrD,gBAAgB,CACpB,UAAU,EAAE,MAAM,EAClB,KAAK,GAAE,OAAe,GACrB,OAAO,CAAC;QACT,aAAa,EAAE,KAAK,EAAE,CAAC;KACxB,CAAC;IAuDI,cAAc,CAClB,aAAa,EAAE,aAAa,EAAE,EAC9B,UAAU,EAAE,MAAM,EAClB,KAAK,GAAE,OAAe,GACrB,OAAO,CAAC,IAAI,CAAC;IAoCV,GAAG,CACP,UAAU,EAAE,MAAM,EAClB,KAAK,GAAE,OAAe,GACrB,OAAO,CAAC,aAAa,EAAE,CAAC;CAiB5B"}
|
package/dist/core/embedder.js
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import { readFile, writeFile, mkdir } from "fs/promises";
|
|
2
2
|
import { dirname } from "path";
|
|
3
3
|
import { createHash } from "crypto";
|
|
4
|
+
import { EmbedError } from "./errors.js";
|
|
5
|
+
import { sleep, withRetry, withConcurrency } from "./utils.js";
|
|
4
6
|
function chunkContentHash(chunk) {
|
|
5
7
|
if (chunk.contentHash)
|
|
6
8
|
return chunk.contentHash;
|
|
@@ -10,16 +12,17 @@ export class EmbedderProcessor {
|
|
|
10
12
|
provider;
|
|
11
13
|
rateLimitMs;
|
|
12
14
|
batchSize;
|
|
15
|
+
retryOptions;
|
|
16
|
+
concurrency;
|
|
13
17
|
constructor(provider, options = {}) {
|
|
14
18
|
this.provider = provider;
|
|
15
19
|
this.rateLimitMs = options.rateLimitMs ?? 500;
|
|
16
20
|
this.batchSize = options.batchSize ?? 10;
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
21
|
+
this.retryOptions = options.retry ?? {};
|
|
22
|
+
this.concurrency = options.concurrency ?? 1;
|
|
20
23
|
}
|
|
21
24
|
async embedChunk(chunk) {
|
|
22
|
-
const embedding = await this.provider.embed(chunk.content);
|
|
25
|
+
const embedding = await withRetry(() => this.provider.embed(chunk.content), this.retryOptions);
|
|
23
26
|
return {
|
|
24
27
|
...chunk,
|
|
25
28
|
embedding,
|
|
@@ -27,43 +30,51 @@ export class EmbedderProcessor {
|
|
|
27
30
|
};
|
|
28
31
|
}
|
|
29
32
|
async embedBatch(chunks) {
|
|
30
|
-
const results = [];
|
|
31
33
|
if (this.provider.embedBatch && chunks.length >= this.batchSize) {
|
|
32
34
|
const texts = chunks.map((c) => c.content);
|
|
33
|
-
const embeddings = await this.provider.embedBatch(texts);
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
embedding: embeddings[i],
|
|
38
|
-
embeddedAt: Date.now() / 1000,
|
|
35
|
+
const embeddings = await withRetry(() => this.provider.embedBatch(texts), this.retryOptions);
|
|
36
|
+
if (embeddings.length !== chunks.length) {
|
|
37
|
+
throw new EmbedError(`embedBatch returned ${embeddings.length} embeddings for ${chunks.length} chunks`, {
|
|
38
|
+
suggestion: "Check that your EmbeddingProvider.embedBatch() returns one vector per input text.",
|
|
39
39
|
});
|
|
40
40
|
}
|
|
41
|
+
return chunks.map((chunk, i) => ({
|
|
42
|
+
...chunk,
|
|
43
|
+
embedding: embeddings[i],
|
|
44
|
+
embeddedAt: Date.now() / 1000,
|
|
45
|
+
}));
|
|
41
46
|
}
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
await this.sleep(this.rateLimitMs);
|
|
54
|
-
}
|
|
47
|
+
let completed = 0;
|
|
48
|
+
const tasks = chunks.map((chunk) => async () => {
|
|
49
|
+
const label = chunk.metadata.event_type ||
|
|
50
|
+
chunk.metadata.title ||
|
|
51
|
+
chunk.sourceFile.split("/").pop() ||
|
|
52
|
+
"unknown";
|
|
53
|
+
const embedded = await this.embedChunk(chunk);
|
|
54
|
+
completed++;
|
|
55
|
+
console.log(` [${completed}/${chunks.length}] ${label}`);
|
|
56
|
+
if (this.rateLimitMs > 0) {
|
|
57
|
+
await sleep(this.rateLimitMs);
|
|
55
58
|
}
|
|
56
|
-
|
|
57
|
-
|
|
59
|
+
return embedded;
|
|
60
|
+
});
|
|
61
|
+
return withConcurrency(tasks, this.concurrency);
|
|
58
62
|
}
|
|
59
63
|
async getChunksToEmbed(chunksFile, force = false) {
|
|
60
64
|
let chunks;
|
|
61
65
|
try {
|
|
62
66
|
const content = await readFile(chunksFile, "utf-8");
|
|
63
|
-
|
|
67
|
+
const parsed = JSON.parse(content);
|
|
68
|
+
if (!Array.isArray(parsed)) {
|
|
69
|
+
throw new Error("chunks file does not contain a JSON array");
|
|
70
|
+
}
|
|
71
|
+
chunks = parsed;
|
|
64
72
|
}
|
|
65
|
-
catch {
|
|
66
|
-
throw new
|
|
73
|
+
catch (err) {
|
|
74
|
+
throw new EmbedError(`Failed to load chunks from ${chunksFile}: ${err instanceof Error ? err.message : String(err)}`, {
|
|
75
|
+
suggestion: "Run the pipeline without --skip-upload to regenerate chunks first.",
|
|
76
|
+
cause: err,
|
|
77
|
+
});
|
|
67
78
|
}
|
|
68
79
|
console.log(`📖 Loaded ${chunks.length} chunks from ${chunksFile}`);
|
|
69
80
|
if (force) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"embedder.js","sourceRoot":"","sources":["../../src/core/embedder.ts"],"names":[],"mappings":"AAKA,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,KAAK,EAAE,MAAM,aAAa,CAAC;AACzD,OAAO,EAAE,OAAO,EAAE,MAAM,MAAM,CAAC;AAC/B,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;
|
|
1
|
+
{"version":3,"file":"embedder.js","sourceRoot":"","sources":["../../src/core/embedder.ts"],"names":[],"mappings":"AAKA,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,KAAK,EAAE,MAAM,aAAa,CAAC;AACzD,OAAO,EAAE,OAAO,EAAE,MAAM,MAAM,CAAC;AAC/B,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AACpC,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AACzC,OAAO,EAAE,KAAK,EAAE,SAAS,EAAE,eAAe,EAAgB,MAAM,YAAY,CAAC;AAE7E,SAAS,gBAAgB,CAAC,KAAY;IACpC,IAAI,KAAK,CAAC,WAAW;QAAE,OAAO,KAAK,CAAC,WAAW,CAAC;IAChD,OAAO,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;AAC/E,CAAC;AAED,MAAM,OAAO,iBAAiB;IACpB,QAAQ,CAAoB;IAC5B,WAAW,CAAS;IACpB,SAAS,CAAS;IAClB,YAAY,CAAe;IAC3B,WAAW,CAAS;IAE5B,YACE,QAA2B,EAC3B,UAKI,EAAE;QAEN,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;QACzB,IAAI,CAAC,WAAW,GAAG,OAAO,CAAC,WAAW,IAAI,GAAG,CAAC;QAC9C,IAAI,CAAC,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,EAAE,CAAC;QACzC,IAAI,CAAC,YAAY,GAAG,OAAO,CAAC,KAAK,IAAI,EAAE,CAAC;QACxC,IAAI,CAAC,WAAW,GAAG,OAAO,CAAC,WAAW,IAAI,CAAC,CAAC;IAC9C,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,KAAY;QAC3B,MAAM,SAAS,GAAG,MAAM,SAAS,CAC/B,GAAG,EAAE,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC,EACxC,IAAI,CAAC,YAAY,CAClB,CAAC;QAEF,OAAO;YACL,GAAG,KAAK;YACR,SAAS;YACT,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,IAAI;SAC9B,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,MAAe;QAC9B,IAAI,IAAI,CAAC,QAAQ,CAAC,UAAU,IAAI,MAAM,CAAC,MAAM,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;YAChE,MAAM,KAAK,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;YAC3C,MAAM,UAAU,GAAG,MAAM,SAAS,CAChC,GAAG,EAAE,CAAC,IAAI,CAAC,QAAQ,CAAC,UAAW,CAAC,KAAK,CAAC,EACtC,IAAI,CAAC,YAAY,CAClB,CAAC;YAEF,IAAI,UAAU,CAAC,MAAM,KAAK,MAAM,CAAC,MAAM,EAAE,CAAC;gBACxC,MAAM,IAAI,UAAU,CAClB,uBAAuB,UAAU,CAAC,MAAM,mBAAmB,MAAM,CAAC,MAAM,SAAS,EACjF;oBACE,UAAU,EACR,mFAAmF;iBACtF,CACF,CAAC;YACJ,CAAC;YAED,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC;gBAC/B,GAAG,KAAK;gBACR,SAAS,EAAE,UAAU,CAAC,CAAC,CAAC;gBACxB,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,IAAI;aAC9B,CAAC,CAAC,CAAC;QACN,CAAC;QAED,IAAI,SAAS,GAAG,CAAC,CAAC;QAClB,MAAM,KAAK,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,IAA4B,EAAE;YACrE,MAAM,KAAK,GACR,KAAK,CAAC,QAAQ,CAAC,UAAqB;gBACpC,KAAK,CAAC,QAAQ,CAAC,KAAgB;gBAChC,KAAK,CAAC,UAAU,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE;gBACjC,SAAS,CAAC;YAEZ,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;YAC9C,SAAS,EAAE,CAAC;YACZ,OAAO,CAAC,GAAG,CAAC,MAAM,SAAS,IAAI,MAAM,CAAC,MAAM,KAAK,KAAK,EAAE,CAAC,CAAC;YAE1D,IAAI,IAAI,CAAC,WAAW,GAAG,CAAC,EAAE,CAAC;gBACzB,MAAM,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;YAChC,CAAC;YAED,OAAO,QAAQ,CAAC;QAClB,CAAC,CAAC,CAAC;QAEH,OAAO,eAAe,CAAC,KAAK,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC;IAClD,CAAC;IAED,KAAK,CAAC,gBAAgB,CACpB,UAAkB,EAClB,QAAiB,KAAK;QAItB,IAAI,MAAe,CAAC;QACpB,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,UAAU,EAAE,OAAO,CAAC,CAAC;YACpD,MAAM,MAAM,GAAY,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YAC5C,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;gBAC3B,MAAM,IAAI,KAAK,CAAC,2CAA2C,CAAC,CAAC;YAC/D,CAAC;YACD,MAAM,GAAG,MAAiB,CAAC;QAC7B,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,IAAI,UAAU,CAClB,8BAA8B,UAAU,KAAK,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,EAC/F;gBACE,UAAU,EACR,oEAAoE;gBACtE,KAAK,EAAE,GAAG;aACX,CACF,CAAC;QACJ,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,aAAa,MAAM,CAAC,MAAM,gBAAgB,UAAU,EAAE,CAAC,CAAC;QAEpE,IAAI,KAAK,EAAE,CAAC;YACV,OAAO,CAAC,GAAG,CAAC,uCAAuC,CAAC,CAAC;YACrD,OAAO,EAAE,aAAa,EAAE,MAAM,EAAE,CAAC;QACnC,CAAC;QAED,IAAI,kBAAkB,GAAoB,EAAE,CAAC;QAC7C,MAAM,cAAc,GAAG,UAAU,CAAC,OAAO,CAAC,QAAQ,EAAE,YAAY,CAAC,CAAC;QAClE,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,cAAc,EAAE,OAAO,CAAC,CAAC;YACxD,kBAAkB,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QAC3C,CAAC;QAAC,MAAM,CAAC;YACP,yBAAyB;QAC3B,CAAC;QAED,MAAM,aAAa,GAAG,IAAI,GAAG,EAAyB,CAAC;QACvD,KAAK,MAAM,GAAG,IAAI,kBAAkB,EAAE,CAAC;YACrC,MAAM,IAAI,GAAG,GAAG,CAAC,WAAW,IAAI,gBAAgB,CAAC,GAAG,CAAC,CAAC;YACtD,aAAa,CAAC,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;QAC/B,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,2BAA2B,aAAa,CAAC,IAAI,SAAS,CAAC,CAAC;QAEpE,MAAM,aAAa,GAAY,EAAE,CAAC;QAClC,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC3B,MAAM,SAAS,GAAG,gBAAgB,CAAC,KAAK,CAAC,CAAC;YAC1C,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,SAAS,CAAC,EAAE,CAAC;gBAClC,aAAa,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YAC5B,CAAC;QACH,CAAC;QAED,OAAO,EAAE,aAAa,EAAE,CAAC;IAC3B,CAAC;IAED,KAAK,CAAC,cAAc,CAClB,aAA8B,EAC9B,UAAkB,EAClB,QAAiB,KAAK;QAEtB,MAAM,cAAc,GAAG,UAAU,CAAC,OAAO,CAAC,QAAQ,EAAE,YAAY,CAAC,CAAC;QAClE,MAAM,KAAK,CAAC,OAAO,CAAC,cAAc,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAE1D,MAAM,SAAS,GAAG,IAAI,GAAG,EAAyB,CAAC;QACnD,KAAK,MAAM,GAAG,IAAI,aAAa,EAAE,CAAC;YAChC,MAAM,IAAI,GAAG,GAAG,CAAC,WAAW,IAAI,gBAAgB,CAAC,GAAG,CAAC,CAAC;YACtD,SAAS,CAAC,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;QAC3B,CAAC;QAED,IAAI,QAAQ,GAAoB,EAAE,CAAC;QACnC,IAAI,CAAC,KAAK,EAAE,CAAC;YACX,IAAI,CAAC;gBACH,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,cAAc,EAAE,OAAO,CAAC,CAAC;gBACxD,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YACjC,CAAC;YAAC,MAAM,CAAC;gBACP,yBAAyB;YAC3B,CAAC;QACH,CAAC;QAED,MAAM,KAAK,GAAG,KAAK;YACjB,CAAC,CAAC,EAAE;YACJ,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE;gBACpB,MAAM,IAAI,GAAG,CAAC,CAAC,WAAW,IAAI,gBAAgB,CAAC,CAAC,CAAC,CAAC;gBAClD,OAAO,CAAC,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;YAC9B,CAAC,CAAC,CAAC;QAEP,KAAK,CAAC,IAAI,CAAC,GAAG,aAAa,CAAC,CAAC;QAE7B,MAAM,SAAS,CAAC,cAAc,EAAE,IAAI,CAAC,SAAS,CAAC,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;QAChE,OAAO,CAAC,GAAG,CAAC,cAAc,KAAK,CAAC,MAAM,kBAAkB,cAAc,EAAE,CAAC,CAAC;QAC1E,OAAO,CAAC,GAAG,CACT,WAAW,aAAa,CAAC,MAAM,eAAe,KAAK,CAAC,MAAM,GAAG,aAAa,CAAC,MAAM,EAAE,CACpF,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,GAAG,CACP,UAAkB,EAClB,QAAiB,KAAK;QAEtB,OAAO,CAAC,GAAG,CAAC,iDAAiD,CAAC,CAAC;QAE/D,MAAM,EAAE,aAAa,EAAE,GAAG,MAAM,IAAI,CAAC,gBAAgB,CAAC,UAAU,EAAE,KAAK,CAAC,CAAC;QAEzE,IAAI,aAAa,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC/B,OAAO,CAAC,GAAG,CAAC,+BAA+B,CAAC,CAAC;YAC7C,OAAO,EAAE,CAAC;QACZ,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,sBAAsB,aAAa,CAAC,MAAM,SAAS,CAAC,CAAC;QAEjE,MAAM,aAAa,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC,CAAC;QAC3D,MAAM,IAAI,CAAC,cAAc,CAAC,aAAa,EAAE,UAAU,EAAE,KAAK,CAAC,CAAC;QAE5D,OAAO,aAAa,CAAC;IACvB,CAAC;CACF"}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
export declare class RagError extends Error {
|
|
2
|
+
suggestion?: string;
|
|
3
|
+
constructor(message: string, options?: {
|
|
4
|
+
suggestion?: string;
|
|
5
|
+
cause?: unknown;
|
|
6
|
+
});
|
|
7
|
+
}
|
|
8
|
+
export declare class ConfigError extends RagError {
|
|
9
|
+
}
|
|
10
|
+
export declare class ChunkError extends RagError {
|
|
11
|
+
}
|
|
12
|
+
export declare class EmbedError extends RagError {
|
|
13
|
+
}
|
|
14
|
+
export declare class UploadError extends RagError {
|
|
15
|
+
}
|
|
16
|
+
//# sourceMappingURL=errors.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"errors.d.ts","sourceRoot":"","sources":["../../src/core/errors.ts"],"names":[],"mappings":"AAAA,qBAAa,QAAS,SAAQ,KAAK;IACjC,UAAU,CAAC,EAAE,MAAM,CAAC;gBAGlB,OAAO,EAAE,MAAM,EACf,OAAO,CAAC,EAAE;QAAE,UAAU,CAAC,EAAE,MAAM,CAAC;QAAC,KAAK,CAAC,EAAE,OAAO,CAAA;KAAE;CASrD;AAED,qBAAa,WAAY,SAAQ,QAAQ;CAAG;AAC5C,qBAAa,UAAW,SAAQ,QAAQ;CAAG;AAC3C,qBAAa,UAAW,SAAQ,QAAQ;CAAG;AAC3C,qBAAa,WAAY,SAAQ,QAAQ;CAAG"}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
export class RagError extends Error {
|
|
2
|
+
suggestion;
|
|
3
|
+
constructor(message, options) {
|
|
4
|
+
super(message, options?.cause !== undefined ? { cause: options.cause } : undefined);
|
|
5
|
+
this.name = this.constructor.name;
|
|
6
|
+
this.suggestion = options?.suggestion;
|
|
7
|
+
}
|
|
8
|
+
}
|
|
9
|
+
export class ConfigError extends RagError {
|
|
10
|
+
}
|
|
11
|
+
export class ChunkError extends RagError {
|
|
12
|
+
}
|
|
13
|
+
export class EmbedError extends RagError {
|
|
14
|
+
}
|
|
15
|
+
export class UploadError extends RagError {
|
|
16
|
+
}
|
|
17
|
+
//# sourceMappingURL=errors.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"errors.js","sourceRoot":"","sources":["../../src/core/errors.ts"],"names":[],"mappings":"AAAA,MAAM,OAAO,QAAS,SAAQ,KAAK;IACjC,UAAU,CAAU;IAEpB,YACE,OAAe,EACf,OAAkD;QAElD,KAAK,CACH,OAAO,EACP,OAAO,EAAE,KAAK,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,SAAS,CACpE,CAAC;QACF,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC;QAClC,IAAI,CAAC,UAAU,GAAG,OAAO,EAAE,UAAU,CAAC;IACxC,CAAC;CACF;AAED,MAAM,OAAO,WAAY,SAAQ,QAAQ;CAAG;AAC5C,MAAM,OAAO,UAAW,SAAQ,QAAQ;CAAG;AAC3C,MAAM,OAAO,UAAW,SAAQ,QAAQ;CAAG;AAC3C,MAAM,OAAO,WAAY,SAAQ,QAAQ;CAAG"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"git-tracker.d.ts","sourceRoot":"","sources":["../../src/core/git-tracker.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,WAAW,EAAE,MAAM,wBAAwB,CAAC;
|
|
1
|
+
{"version":3,"file":"git-tracker.d.ts","sourceRoot":"","sources":["../../src/core/git-tracker.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,WAAW,EAAE,MAAM,wBAAwB,CAAC;AAIrD,qBAAa,UAAU;IACrB,OAAO,CAAC,GAAG,CAAY;IACvB,OAAO,CAAC,QAAQ,CAAgB;IAChC,OAAO,CAAC,WAAW,CAAW;IAC9B,OAAO,CAAC,gBAAgB,CAAuB;IAC/C,OAAO,CAAC,gBAAgB,CAAwB;gBAEpC,QAAQ,EAAE,WAAW,EAAE;YAMrB,cAAc;YAOd,qBAAqB;IAQnC,OAAO,CAAC,iBAAiB;IAWzB,OAAO,CAAC,cAAc;IAOhB,kBAAkB,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;IAKvC,eAAe,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAwB9D,eAAe,IAAI,OAAO,CAC9B,GAAG,CAAC,MAAM,EAAE;QAAE,UAAU,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,WAAW,CAAA;KAAE,CAAC,CAC1D;IA0BK,eAAe,CAAC,aAAa,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,OAAO,CAAC;QACjE,SAAS,EAAE,MAAM,EAAE,CAAC;QACpB,QAAQ,EAAE,MAAM,EAAE,CAAC;QACnB,SAAS,EAAE,MAAM,EAAE,CAAC;KACrB,CAAC;CA+BH"}
|
package/dist/core/git-tracker.js
CHANGED
|
@@ -2,31 +2,6 @@ import { simpleGit } from "simple-git";
|
|
|
2
2
|
import { glob } from "glob";
|
|
3
3
|
import { minimatch } from "minimatch";
|
|
4
4
|
import path from "path";
|
|
5
|
-
const MAX_FILES_PER_BATCH = 100;
|
|
6
|
-
const MAX_CMD_LEN = 32000;
|
|
7
|
-
function batchFiles(files) {
|
|
8
|
-
const batches = [];
|
|
9
|
-
let currentBatch = [];
|
|
10
|
-
let currentLen = 0;
|
|
11
|
-
const baseCmdLen = "git log -1 --format=%H --all -- ".length;
|
|
12
|
-
for (const file of files) {
|
|
13
|
-
const fileLen = file.length + 1;
|
|
14
|
-
if (currentBatch.length >= MAX_FILES_PER_BATCH ||
|
|
15
|
-
currentLen + fileLen > MAX_CMD_LEN) {
|
|
16
|
-
if (currentBatch.length > 0) {
|
|
17
|
-
batches.push(currentBatch);
|
|
18
|
-
currentBatch = [];
|
|
19
|
-
currentLen = baseCmdLen;
|
|
20
|
-
}
|
|
21
|
-
}
|
|
22
|
-
currentBatch.push(file);
|
|
23
|
-
currentLen += fileLen;
|
|
24
|
-
}
|
|
25
|
-
if (currentBatch.length > 0) {
|
|
26
|
-
batches.push(currentBatch);
|
|
27
|
-
}
|
|
28
|
-
return batches;
|
|
29
|
-
}
|
|
30
5
|
export class GitTracker {
|
|
31
6
|
git;
|
|
32
7
|
chunkers;
|
|
@@ -40,24 +15,14 @@ export class GitTracker {
|
|
|
40
15
|
}
|
|
41
16
|
async getCurrentHead() {
|
|
42
17
|
if (!this.currentHeadCache) {
|
|
43
|
-
|
|
44
|
-
this.currentHeadCache = await this.git.revparse(["HEAD"]);
|
|
45
|
-
}
|
|
46
|
-
catch {
|
|
47
|
-
this.currentHeadCache = "dev_0000000000000000000000000000000000000000";
|
|
48
|
-
}
|
|
18
|
+
this.currentHeadCache = await this.git.revparse(["HEAD"]);
|
|
49
19
|
}
|
|
50
20
|
return this.currentHeadCache;
|
|
51
21
|
}
|
|
52
22
|
async hasUncommittedChanges() {
|
|
53
23
|
if (this.uncommittedCache === null) {
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
this.uncommittedCache = status.files.length > 0;
|
|
57
|
-
}
|
|
58
|
-
catch {
|
|
59
|
-
this.uncommittedCache = false;
|
|
60
|
-
}
|
|
24
|
+
const status = await this.git.status();
|
|
25
|
+
this.uncommittedCache = status.files.length > 0;
|
|
61
26
|
}
|
|
62
27
|
return this.uncommittedCache;
|
|
63
28
|
}
|
|
@@ -82,37 +47,22 @@ export class GitTracker {
|
|
|
82
47
|
}
|
|
83
48
|
async getCommitHashes(files) {
|
|
84
49
|
const commitMap = new Map();
|
|
85
|
-
|
|
86
|
-
const currentHead = await this.getCurrentHead();
|
|
87
|
-
for (const batch of batches) {
|
|
50
|
+
await Promise.all(files.map(async (file) => {
|
|
88
51
|
try {
|
|
89
52
|
const output = await this.git.raw([
|
|
90
53
|
"log",
|
|
91
54
|
"-1",
|
|
92
55
|
"--format=%H",
|
|
93
|
-
"--all",
|
|
94
56
|
"--",
|
|
95
|
-
|
|
57
|
+
file,
|
|
96
58
|
]);
|
|
97
|
-
const
|
|
98
|
-
|
|
99
|
-
const hash = lines[i].trim();
|
|
100
|
-
if (hash) {
|
|
101
|
-
commitMap.set(batch[i], hash);
|
|
102
|
-
}
|
|
103
|
-
}
|
|
104
|
-
for (const file of batch) {
|
|
105
|
-
if (!commitMap.has(file)) {
|
|
106
|
-
commitMap.set(file, currentHead);
|
|
107
|
-
}
|
|
108
|
-
}
|
|
59
|
+
const hash = output.trim();
|
|
60
|
+
commitMap.set(file, hash || (await this.getCurrentHead()));
|
|
109
61
|
}
|
|
110
62
|
catch {
|
|
111
|
-
|
|
112
|
-
commitMap.set(file, currentHead);
|
|
113
|
-
}
|
|
63
|
+
commitMap.set(file, await this.getCurrentHead());
|
|
114
64
|
}
|
|
115
|
-
}
|
|
65
|
+
}));
|
|
116
66
|
return commitMap;
|
|
117
67
|
}
|
|
118
68
|
async getCurrentState() {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"git-tracker.js","sourceRoot":"","sources":["../../src/core/git-tracker.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAa,MAAM,YAAY,CAAC;AAClD,OAAO,EAAE,IAAI,EAAE,MAAM,MAAM,CAAC;AAE5B,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AACtC,OAAO,IAAI,MAAM,MAAM,CAAC;AAExB,MAAM,
|
|
1
|
+
{"version":3,"file":"git-tracker.js","sourceRoot":"","sources":["../../src/core/git-tracker.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAa,MAAM,YAAY,CAAC;AAClD,OAAO,EAAE,IAAI,EAAE,MAAM,MAAM,CAAC;AAE5B,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AACtC,OAAO,IAAI,MAAM,MAAM,CAAC;AAExB,MAAM,OAAO,UAAU;IACb,GAAG,CAAY;IACf,QAAQ,CAAgB;IACxB,WAAW,CAAW;IACtB,gBAAgB,GAAkB,IAAI,CAAC;IACvC,gBAAgB,GAAmB,IAAI,CAAC;IAEhD,YAAY,QAAuB;QACjC,IAAI,CAAC,GAAG,GAAG,SAAS,EAAE,CAAC;QACvB,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;QACzB,IAAI,CAAC,WAAW,GAAG,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;IACzD,CAAC;IAEO,KAAK,CAAC,cAAc;QAC1B,IAAI,CAAC,IAAI,CAAC,gBAAgB,EAAE,CAAC;YAC3B,IAAI,CAAC,gBAAgB,GAAG,MAAM,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC;QAC5D,CAAC;QACD,OAAO,IAAI,CAAC,gBAAgB,CAAC;IAC/B,CAAC;IAEO,KAAK,CAAC,qBAAqB;QACjC,IAAI,IAAI,CAAC,gBAAgB,KAAK,IAAI,EAAE,CAAC;YACnC,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,GAAG,CAAC,MAAM,EAAE,CAAC;YACvC,IAAI,CAAC,gBAAgB,GAAG,MAAM,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC;QAClD,CAAC;QACD,OAAO,IAAI,CAAC,gBAAgB,CAAC;IAC/B,CAAC;IAEO,iBAAiB,CAAC,QAAgB;QACxC,KAAK,MAAM,OAAO,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;YACpC,KAAK,MAAM,OAAO,IAAI,OAAO,CAAC,QAAQ,EAAE,CAAC;gBACvC,IAAI,IAAI,CAAC,cAAc,CAAC,QAAQ,EAAE,OAAO,CAAC,EAAE,CAAC;oBAC3C,OAAO,OAAO,CAAC;gBACjB,CAAC;YACH,CAAC;QACH,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;IAEO,cAAc,CAAC,QAAgB,EAAE,OAAe;QACtD,MAAM,cAAc,GAAG,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC1D,MAAM,iBAAiB,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAE5D,OAAO,SAAS,CAAC,cAAc,EAAE,iBAAiB,CAAC,CAAC;IACtD,CAAC;IAED,KAAK,CAAC,kBAAkB;QACtB,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;QAC5D,OAAO,CAAC,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;IACpC,CAAC;IAED,KAAK,CAAC,eAAe,CAAC,KAAe;QACnC,MAAM,SAAS,GAAG,IAAI,GAAG,EAAkB,CAAC;QAE5C,MAAM,OAAO,CAAC,GAAG,CACf,KAAK,CAAC,GAAG,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE;YACvB,IAAI,CAAC;gBACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC;oBAChC,KAAK;oBACL,IAAI;oBACJ,aAAa;oBACb,IAAI;oBACJ,IAAI;iBACL,CAAC,CAAC;gBACH,MAAM,IAAI,GAAG,MAAM,CAAC,IAAI,EAAE,CAAC;gBAC3B,SAAS,CAAC,GAAG,CAAC,IAAI,EAAE,IAAI,IAAI,CAAC,MAAM,IAAI,CAAC,cAAc,EAAE,CAAC,CAAC,CAAC;YAC7D,CAAC;YAAC,MAAM,CAAC;gBACP,SAAS,CAAC,GAAG,CAAC,IAAI,EAAE,MAAM,IAAI,CAAC,cAAc,EAAE,CAAC,CAAC;YACnD,CAAC;QACH,CAAC,CAAC,CACH,CAAC;QAEF,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,KAAK,CAAC,eAAe;QAGnB,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,kBAAkB,EAAE,CAAC;QACjD,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,eAAe,CAAC,QAAQ,CAAC,CAAC;QACvD,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,qBAAqB,EAAE,CAAC;QACpD,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,cAAc,EAAE,CAAC;QAEhD,MAAM,KAAK,GAAG,IAAI,GAAG,EAGlB,CAAC;QAEJ,KAAK,MAAM,IAAI,IAAI,QAAQ,EAAE,CAAC;YAC5B,IAAI,UAAU,GAAG,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,WAAW,CAAC;YACpD,IAAI,QAAQ,EAAE,CAAC;gBACb,UAAU,GAAG,GAAG,UAAU,QAAQ,CAAC;YACrC,CAAC;YAED,MAAM,OAAO,GAAG,IAAI,CAAC,iBAAiB,CAAC,IAAI,CAAC,CAAC;YAC7C,IAAI,OAAO,EAAE,CAAC;gBACZ,KAAK,CAAC,GAAG,CAAC,IAAI,EAAE,EAAE,UAAU,EAAE,OAAO,EAAE,CAAC,CAAC;YAC3C,CAAC;QACH,CAAC;QAED,OAAO,KAAK,CAAC;IACf,CAAC;IAED,KAAK,CAAC,eAAe,CAAC,aAAkC;QAKtD,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,eAAe,EAAE,CAAC;QAC7C,MAAM,SAAS,GAAa,EAAE,CAAC;QAC/B,MAAM,QAAQ,GAAa,EAAE,CAAC;QAC9B,MAAM,SAAS,GAAa,EAAE,CAAC;QAE/B,KAAK,MAAM,CAAC,QAAQ,EAAE,IAAI,CAAC,IAAI,OAAO,EAAE,CAAC;YACvC,MAAM,QAAQ,GAAG,aAAa,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;YAE7C,IAAI,CAAC,QAAQ,EAAE,CAAC;gBACd,OAAO,CAAC,GAAG,CAAC,aAAa,QAAQ,EAAE,CAAC,CAAC;gBACrC,SAAS,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YAC3B,CAAC;iBAAM,IAAI,QAAQ,KAAK,IAAI,CAAC,UAAU,EAAE,CAAC;gBACxC,OAAO,CAAC,GAAG,CACT,iBAAiB,QAAQ,KAAK,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,MAAM,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,CACvF,CAAC;gBACF,SAAS,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YAC3B,CAAC;iBAAM,CAAC;gBACN,SAAS,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YAC3B,CAAC;QACH,CAAC;QAED,KAAK,MAAM,CAAC,QAAQ,CAAC,IAAI,aAAa,EAAE,CAAC;YACvC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC;gBAC3B,OAAO,CAAC,GAAG,CAAC,kBAAkB,QAAQ,EAAE,CAAC,CAAC;gBAC1C,QAAQ,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YAC1B,CAAC;QACH,CAAC;QAED,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,SAAS,EAAE,CAAC;IAC5C,CAAC;CACF"}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { FileChunker, EmbeddingProvider, VectorStore } from "../interfaces/index.js";
|
|
2
|
+
import { RetryOptions } from "./utils.js";
|
|
2
3
|
export interface RAGPipelineConfig {
|
|
3
4
|
chunkers: FileChunker[];
|
|
4
5
|
embedder: EmbeddingProvider;
|
|
@@ -8,8 +9,15 @@ export interface RAGPipelineConfig {
|
|
|
8
9
|
embeddingsFile?: string;
|
|
9
10
|
force?: boolean;
|
|
10
11
|
skipUpload?: boolean;
|
|
12
|
+
dryRun?: boolean;
|
|
11
13
|
rateLimitMs?: number;
|
|
12
14
|
batchSize?: number;
|
|
15
|
+
retry?: RetryOptions;
|
|
16
|
+
concurrency?: number;
|
|
17
|
+
telemetry?: boolean;
|
|
18
|
+
notifications?: {
|
|
19
|
+
webhookUrl?: string;
|
|
20
|
+
};
|
|
13
21
|
};
|
|
14
22
|
}
|
|
15
23
|
export declare class Orchestrator {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"orchestrator.d.ts","sourceRoot":"","sources":["../../src/core/orchestrator.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"orchestrator.d.ts","sourceRoot":"","sources":["../../src/core/orchestrator.ts"],"names":[],"mappings":"AAKA,OAAO,EACL,WAAW,EACX,iBAAiB,EACjB,WAAW,EAEZ,MAAM,wBAAwB,CAAC;AAEhC,OAAO,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AAE1C,MAAM,WAAW,iBAAiB;IAChC,QAAQ,EAAE,WAAW,EAAE,CAAC;IACxB,QAAQ,EAAE,iBAAiB,CAAC;IAC5B,WAAW,EAAE,WAAW,CAAC;IACzB,OAAO,CAAC,EAAE;QACR,UAAU,CAAC,EAAE,MAAM,CAAC;QACpB,cAAc,CAAC,EAAE,MAAM,CAAC;QACxB,KAAK,CAAC,EAAE,OAAO,CAAC;QAChB,UAAU,CAAC,EAAE,OAAO,CAAC;QACrB,MAAM,CAAC,EAAE,OAAO,CAAC;QACjB,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,KAAK,CAAC,EAAE,YAAY,CAAC;QACrB,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,SAAS,CAAC,EAAE,OAAO,CAAC;QACpB,aAAa,CAAC,EAAE;YAAE,UAAU,CAAC,EAAE,MAAM,CAAA;SAAE,CAAC;KACzC,CAAC;CACH;AA+CD,qBAAa,YAAY;IACvB,OAAO,CAAC,MAAM,CAAoB;IAClC,OAAO,CAAC,UAAU,CAAS;IAC3B,OAAO,CAAC,cAAc,CAAS;gBAEnB,MAAM,EAAE,iBAAiB;IAO/B,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC;CA+J3B"}
|