@vivantel/rag-core 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. package/.github/config/release-please.json +38 -0
  2. package/.github/dependabot.yaml +28 -0
  3. package/.github/workflows/ci.yaml +119 -0
  4. package/.github/workflows/publish.yaml +151 -0
  5. package/.github/workflows/release.yaml +150 -0
  6. package/.versionrc.json +19 -0
  7. package/CHANGELOG.md +21 -0
  8. package/README.md +62 -0
  9. package/bin/rag-update.ts +49 -0
  10. package/dist/config-loader.d.ts +3 -0
  11. package/dist/config-loader.d.ts.map +1 -0
  12. package/dist/config-loader.js +13 -0
  13. package/dist/config-loader.js.map +1 -0
  14. package/dist/core/chunk-processor.d.ts +12 -0
  15. package/dist/core/chunk-processor.d.ts.map +1 -0
  16. package/dist/core/chunk-processor.js +65 -0
  17. package/dist/core/chunk-processor.js.map +1 -0
  18. package/dist/core/embedder.d.ts +19 -0
  19. package/dist/core/embedder.d.ts.map +1 -0
  20. package/dist/core/embedder.js +139 -0
  21. package/dist/core/embedder.js.map +1 -0
  22. package/dist/core/git-tracker.d.ts +25 -0
  23. package/dist/core/git-tracker.d.ts.map +1 -0
  24. package/dist/core/git-tracker.js +164 -0
  25. package/dist/core/git-tracker.js.map +1 -0
  26. package/dist/core/orchestrator.d.ts +22 -0
  27. package/dist/core/orchestrator.d.ts.map +1 -0
  28. package/dist/core/orchestrator.js +57 -0
  29. package/dist/core/orchestrator.js.map +1 -0
  30. package/dist/core/uploader.d.ts +15 -0
  31. package/dist/core/uploader.d.ts.map +1 -0
  32. package/dist/core/uploader.js +79 -0
  33. package/dist/core/uploader.js.map +1 -0
  34. package/dist/core/utils.d.ts +6 -0
  35. package/dist/core/utils.d.ts.map +1 -0
  36. package/dist/core/utils.js +23 -0
  37. package/dist/core/utils.js.map +1 -0
  38. package/dist/helpers/create-chunker.d.ts +9 -0
  39. package/dist/helpers/create-chunker.d.ts.map +1 -0
  40. package/dist/helpers/create-chunker.js +24 -0
  41. package/dist/helpers/create-chunker.js.map +1 -0
  42. package/dist/index.d.ts +11 -0
  43. package/dist/index.d.ts.map +1 -0
  44. package/dist/index.js +16 -0
  45. package/dist/index.js.map +1 -0
  46. package/dist/interfaces/chunker.d.ts +46 -0
  47. package/dist/interfaces/chunker.d.ts.map +1 -0
  48. package/dist/interfaces/chunker.js +5 -0
  49. package/dist/interfaces/chunker.js.map +1 -0
  50. package/dist/interfaces/embedder.d.ts +28 -0
  51. package/dist/interfaces/embedder.d.ts.map +1 -0
  52. package/dist/interfaces/embedder.js +5 -0
  53. package/dist/interfaces/embedder.js.map +1 -0
  54. package/dist/interfaces/index.d.ts +4 -0
  55. package/dist/interfaces/index.d.ts.map +1 -0
  56. package/dist/interfaces/index.js +4 -0
  57. package/dist/interfaces/index.js.map +1 -0
  58. package/dist/interfaces/vector-store.d.ts +53 -0
  59. package/dist/interfaces/vector-store.d.ts.map +1 -0
  60. package/dist/interfaces/vector-store.js +5 -0
  61. package/dist/interfaces/vector-store.js.map +1 -0
  62. package/dist/strategies/chunk/index.d.ts +5 -0
  63. package/dist/strategies/chunk/index.d.ts.map +1 -0
  64. package/dist/strategies/chunk/index.js +5 -0
  65. package/dist/strategies/chunk/index.js.map +1 -0
  66. package/dist/strategies/chunk/markdown-headers.d.ts +7 -0
  67. package/dist/strategies/chunk/markdown-headers.d.ts.map +1 -0
  68. package/dist/strategies/chunk/markdown-headers.js +89 -0
  69. package/dist/strategies/chunk/markdown-headers.js.map +1 -0
  70. package/dist/strategies/chunk/semantic.d.ts +7 -0
  71. package/dist/strategies/chunk/semantic.d.ts.map +1 -0
  72. package/dist/strategies/chunk/semantic.js +62 -0
  73. package/dist/strategies/chunk/semantic.js.map +1 -0
  74. package/dist/strategies/chunk/token.d.ts +12 -0
  75. package/dist/strategies/chunk/token.d.ts.map +1 -0
  76. package/dist/strategies/chunk/token.js +56 -0
  77. package/dist/strategies/chunk/token.js.map +1 -0
  78. package/dist/strategies/chunk/whole-file.d.ts +3 -0
  79. package/dist/strategies/chunk/whole-file.d.ts.map +1 -0
  80. package/dist/strategies/chunk/whole-file.js +31 -0
  81. package/dist/strategies/chunk/whole-file.js.map +1 -0
  82. package/eslint.config.js +25 -0
  83. package/package.json +102 -0
  84. package/src/config-loader.ts +21 -0
  85. package/src/core/chunk-processor.test.ts +36 -0
  86. package/src/core/chunk-processor.ts +92 -0
  87. package/src/core/embedder.ts +189 -0
  88. package/src/core/git-tracker.test.ts +64 -0
  89. package/src/core/git-tracker.ts +202 -0
  90. package/src/core/orchestrator.test.ts +53 -0
  91. package/src/core/orchestrator.ts +97 -0
  92. package/src/core/uploader.ts +123 -0
  93. package/src/core/utils.ts +27 -0
  94. package/src/helpers/create-chunker.test.ts +31 -0
  95. package/src/helpers/create-chunker.ts +40 -0
  96. package/src/index.test.ts +33 -0
  97. package/src/index.ts +30 -0
  98. package/src/interfaces/chunker.ts +59 -0
  99. package/src/interfaces/embedder.ts +36 -0
  100. package/src/interfaces/index.test.ts +9 -0
  101. package/src/interfaces/index.ts +3 -0
  102. package/src/interfaces/vector-store.ts +71 -0
  103. package/src/strategies/chunk/index.ts +4 -0
  104. package/src/strategies/chunk/markdown-headers.test.ts +37 -0
  105. package/src/strategies/chunk/markdown-headers.ts +106 -0
  106. package/src/strategies/chunk/semantic.test.ts +21 -0
  107. package/src/strategies/chunk/semantic.ts +80 -0
  108. package/src/strategies/chunk/token.test.ts +41 -0
  109. package/src/strategies/chunk/token.ts +72 -0
  110. package/src/strategies/chunk/whole-file.test.ts +24 -0
  111. package/src/strategies/chunk/whole-file.ts +35 -0
  112. package/tsconfig.json +21 -0
  113. package/typedoc.json +11 -0
  114. package/vitest.config.ts +19 -0
@@ -0,0 +1,6 @@
1
+ export declare function computeContentHash(content: string): string;
2
+ export declare function sleep(ms: number): Promise<void>;
3
+ export declare function batchArray<T>(array: T[], batchSize: number): T[][];
4
+ export declare function extractFileName(filePath: string): string;
5
+ export declare function extractDirectory(filePath: string): string;
6
+ //# sourceMappingURL=utils.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../src/core/utils.ts"],"names":[],"mappings":"AAEA,wBAAgB,kBAAkB,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM,CAE1D;AAED,wBAAgB,KAAK,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAE/C;AAED,wBAAgB,UAAU,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,EAAE,SAAS,EAAE,MAAM,GAAG,CAAC,EAAE,EAAE,CAMlE;AAED,wBAAgB,eAAe,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,CAExD;AAED,wBAAgB,gBAAgB,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,CAIzD"}
@@ -0,0 +1,23 @@
1
+ import { createHash } from "crypto";
2
+ export function computeContentHash(content) {
3
+ return createHash("sha256").update(content).digest("hex").slice(0, 16);
4
+ }
5
+ export function sleep(ms) {
6
+ return new Promise((resolve) => setTimeout(resolve, ms));
7
+ }
8
+ export function batchArray(array, batchSize) {
9
+ const batches = [];
10
+ for (let i = 0; i < array.length; i += batchSize) {
11
+ batches.push(array.slice(i, i + batchSize));
12
+ }
13
+ return batches;
14
+ }
15
+ export function extractFileName(filePath) {
16
+ return filePath.split("/").pop() || filePath;
17
+ }
18
+ export function extractDirectory(filePath) {
19
+ const parts = filePath.split("/");
20
+ parts.pop();
21
+ return parts.join("/");
22
+ }
23
+ //# sourceMappingURL=utils.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"utils.js","sourceRoot":"","sources":["../../src/core/utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAEpC,MAAM,UAAU,kBAAkB,CAAC,OAAe;IAChD,OAAO,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;AACzE,CAAC;AAED,MAAM,UAAU,KAAK,CAAC,EAAU;IAC9B,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC,CAAC;AAC3D,CAAC;AAED,MAAM,UAAU,UAAU,CAAI,KAAU,EAAE,SAAiB;IACzD,MAAM,OAAO,GAAU,EAAE,CAAC;IAC1B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,SAAS,EAAE,CAAC;QACjD,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,CAAC,CAAC;IAC9C,CAAC;IACD,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,MAAM,UAAU,eAAe,CAAC,QAAgB;IAC9C,OAAO,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,IAAI,QAAQ,CAAC;AAC/C,CAAC;AAED,MAAM,UAAU,gBAAgB,CAAC,QAAgB;IAC/C,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IAClC,KAAK,CAAC,GAAG,EAAE,CAAC;IACZ,OAAO,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AACzB,CAAC"}
@@ -0,0 +1,9 @@
1
+ import { FileChunker, Chunk } from "../interfaces/index.js";
2
+ export interface CreateChunkerOptions {
3
+ name: string;
4
+ patterns: string[];
5
+ process: (content: string, filePath: string, commitHash: string) => Promise<Chunk[]>;
6
+ canProcess?: (filePath: string, content?: string) => Promise<boolean>;
7
+ }
8
+ export declare function createChunker(options: CreateChunkerOptions): FileChunker;
9
+ //# sourceMappingURL=create-chunker.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"create-chunker.d.ts","sourceRoot":"","sources":["../../src/helpers/create-chunker.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,KAAK,EAAE,MAAM,wBAAwB,CAAC;AAE5D,MAAM,WAAW,oBAAoB;IACnC,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,OAAO,EAAE,CACP,OAAO,EAAE,MAAM,EACf,QAAQ,EAAE,MAAM,EAChB,UAAU,EAAE,MAAM,KACf,OAAO,CAAC,KAAK,EAAE,CAAC,CAAC;IACtB,UAAU,CAAC,EAAE,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,MAAM,KAAK,OAAO,CAAC,OAAO,CAAC,CAAC;CACvE;AAED,wBAAgB,aAAa,CAAC,OAAO,EAAE,oBAAoB,GAAG,WAAW,CA0BxE"}
@@ -0,0 +1,24 @@
1
+ export function createChunker(options) {
2
+ return {
3
+ name: options.name,
4
+ patterns: options.patterns,
5
+ async chunk(filePath, commitHash) {
6
+ const { readFile } = await import("fs/promises");
7
+ const content = await readFile(filePath, "utf-8");
8
+ if (options.canProcess) {
9
+ const canProcess = await options.canProcess(filePath, content);
10
+ if (!canProcess) {
11
+ return [];
12
+ }
13
+ }
14
+ return options.process(content, filePath, commitHash);
15
+ },
16
+ async canProcess(filePath, content) {
17
+ if (options.canProcess) {
18
+ return options.canProcess(filePath, content);
19
+ }
20
+ return true;
21
+ },
22
+ };
23
+ }
24
+ //# sourceMappingURL=create-chunker.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"create-chunker.js","sourceRoot":"","sources":["../../src/helpers/create-chunker.ts"],"names":[],"mappings":"AAaA,MAAM,UAAU,aAAa,CAAC,OAA6B;IACzD,OAAO;QACL,IAAI,EAAE,OAAO,CAAC,IAAI;QAClB,QAAQ,EAAE,OAAO,CAAC,QAAQ;QAE1B,KAAK,CAAC,KAAK,CAAC,QAAgB,EAAE,UAAkB;YAC9C,MAAM,EAAE,QAAQ,EAAE,GAAG,MAAM,MAAM,CAAC,aAAa,CAAC,CAAC;YACjD,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;YAElD,IAAI,OAAO,CAAC,UAAU,EAAE,CAAC;gBACvB,MAAM,UAAU,GAAG,MAAM,OAAO,CAAC,UAAU,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;gBAC/D,IAAI,CAAC,UAAU,EAAE,CAAC;oBAChB,OAAO,EAAE,CAAC;gBACZ,CAAC;YACH,CAAC;YAED,OAAO,OAAO,CAAC,OAAO,CAAC,OAAO,EAAE,QAAQ,EAAE,UAAU,CAAC,CAAC;QACxD,CAAC;QAED,KAAK,CAAC,UAAU,CAAC,QAAgB,EAAE,OAAgB;YACjD,IAAI,OAAO,CAAC,UAAU,EAAE,CAAC;gBACvB,OAAO,OAAO,CAAC,UAAU,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;YAC/C,CAAC;YACD,OAAO,IAAI,CAAC;QACd,CAAC;KACF,CAAC;AACJ,CAAC"}
@@ -0,0 +1,11 @@
1
+ export * from "./interfaces/index.js";
2
+ export { GitTracker } from "./core/git-tracker.js";
3
+ export { ChunkProcessor } from "./core/chunk-processor.js";
4
+ export { EmbedderProcessor } from "./core/embedder.js";
5
+ export { Uploader } from "./core/uploader.js";
6
+ export { Orchestrator, RAGPipelineConfig } from "./core/orchestrator.js";
7
+ export { computeContentHash, sleep, batchArray, extractFileName, extractDirectory, } from "./core/utils.js";
8
+ export { tokenStrategy, markdownHeadersStrategy, semanticStrategy, wholeFileStrategy, } from "./strategies/chunk/index.js";
9
+ export { createChunker } from "./helpers/create-chunker.js";
10
+ export { loadConfig } from "./config-loader.js";
11
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,cAAc,uBAAuB,CAAC;AAGtC,OAAO,EAAE,UAAU,EAAE,MAAM,uBAAuB,CAAC;AACnD,OAAO,EAAE,cAAc,EAAE,MAAM,2BAA2B,CAAC;AAC3D,OAAO,EAAE,iBAAiB,EAAE,MAAM,oBAAoB,CAAC;AACvD,OAAO,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AAC9C,OAAO,EAAE,YAAY,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AACzE,OAAO,EACL,kBAAkB,EAClB,KAAK,EACL,UAAU,EACV,eAAe,EACf,gBAAgB,GACjB,MAAM,iBAAiB,CAAC;AAGzB,OAAO,EACL,aAAa,EACb,uBAAuB,EACvB,gBAAgB,EAChB,iBAAiB,GAClB,MAAM,6BAA6B,CAAC;AAGrC,OAAO,EAAE,aAAa,EAAE,MAAM,6BAA6B,CAAC;AAG5D,OAAO,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC"}
package/dist/index.js ADDED
@@ -0,0 +1,16 @@
1
+ // Interfaces
2
+ export * from "./interfaces/index.js";
3
+ // Core
4
+ export { GitTracker } from "./core/git-tracker.js";
5
+ export { ChunkProcessor } from "./core/chunk-processor.js";
6
+ export { EmbedderProcessor } from "./core/embedder.js";
7
+ export { Uploader } from "./core/uploader.js";
8
+ export { Orchestrator } from "./core/orchestrator.js";
9
+ export { computeContentHash, sleep, batchArray, extractFileName, extractDirectory, } from "./core/utils.js";
10
+ // Strategies
11
+ export { tokenStrategy, markdownHeadersStrategy, semanticStrategy, wholeFileStrategy, } from "./strategies/chunk/index.js";
12
+ // Helpers
13
+ export { createChunker } from "./helpers/create-chunker.js";
14
+ // Config loader
15
+ export { loadConfig } from "./config-loader.js";
16
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,aAAa;AACb,cAAc,uBAAuB,CAAC;AAEtC,OAAO;AACP,OAAO,EAAE,UAAU,EAAE,MAAM,uBAAuB,CAAC;AACnD,OAAO,EAAE,cAAc,EAAE,MAAM,2BAA2B,CAAC;AAC3D,OAAO,EAAE,iBAAiB,EAAE,MAAM,oBAAoB,CAAC;AACvD,OAAO,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AAC9C,OAAO,EAAE,YAAY,EAAqB,MAAM,wBAAwB,CAAC;AACzE,OAAO,EACL,kBAAkB,EAClB,KAAK,EACL,UAAU,EACV,eAAe,EACf,gBAAgB,GACjB,MAAM,iBAAiB,CAAC;AAEzB,aAAa;AACb,OAAO,EACL,aAAa,EACb,uBAAuB,EACvB,gBAAgB,EAChB,iBAAiB,GAClB,MAAM,6BAA6B,CAAC;AAErC,UAAU;AACV,OAAO,EAAE,aAAa,EAAE,MAAM,6BAA6B,CAAC;AAE5D,gBAAgB;AAChB,OAAO,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC"}
@@ -0,0 +1,46 @@
1
+ /**
2
+ * Chunk interfaces - core building blocks for document processing
3
+ */
4
+ export interface Chunk {
5
+ /** The actual text content of the chunk */
6
+ content: string;
7
+ /** Metadata about this chunk (source file, type, etc.) */
8
+ metadata: Record<string, unknown>;
9
+ /** Original source file path */
10
+ sourceFile: string;
11
+ /** Git commit hash when this chunk was generated */
12
+ commitHash: string;
13
+ /** Optional unique hash of the content (for change detection) */
14
+ contentHash?: string;
15
+ }
16
+ export interface FileChunker {
17
+ /** Unique name of this chunker */
18
+ name: string;
19
+ /** Glob patterns this chunker handles */
20
+ patterns: string[];
21
+ /**
22
+ * Process a file and return chunks.
23
+ * Returns empty array if file should be skipped.
24
+ */
25
+ chunk(filePath: string, commitHash: string): Promise<Chunk[]>;
26
+ /**
27
+ * Optional: validate if this chunker can process the file
28
+ * (called before chunk() to filter early)
29
+ */
30
+ canProcess?(filePath: string, content?: string): Promise<boolean>;
31
+ }
32
+ export interface ChunkStrategy {
33
+ /** Strategy name */
34
+ name: string;
35
+ /** Split text into chunks according to strategy */
36
+ chunk(text: string, filePath?: string): Promise<Chunk[]>;
37
+ /** Optional: extract metadata without full chunking */
38
+ extractMetadata?(text: string, filePath?: string): Record<string, unknown>;
39
+ }
40
+ export interface ChunkTransformer {
41
+ /** Transformer name */
42
+ name: string;
43
+ /** Transform a chunk (return null to skip) */
44
+ transform(chunk: Chunk): Promise<Chunk | null>;
45
+ }
46
+ //# sourceMappingURL=chunker.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"chunker.d.ts","sourceRoot":"","sources":["../../src/interfaces/chunker.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,MAAM,WAAW,KAAK;IACpB,2CAA2C;IAC3C,OAAO,EAAE,MAAM,CAAC;IAEhB,0DAA0D;IAC1D,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAElC,gCAAgC;IAChC,UAAU,EAAE,MAAM,CAAC;IAEnB,oDAAoD;IACpD,UAAU,EAAE,MAAM,CAAC;IAEnB,iEAAiE;IACjE,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,WAAW;IAC1B,kCAAkC;IAClC,IAAI,EAAE,MAAM,CAAC;IAEb,yCAAyC;IACzC,QAAQ,EAAE,MAAM,EAAE,CAAC;IAEnB;;;OAGG;IACH,KAAK,CAAC,QAAQ,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,GAAG,OAAO,CAAC,KAAK,EAAE,CAAC,CAAC;IAE9D;;;OAGG;IACH,UAAU,CAAC,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;CACnE;AAED,MAAM,WAAW,aAAa;IAC5B,oBAAoB;IACpB,IAAI,EAAE,MAAM,CAAC;IAEb,mDAAmD;IACnD,KAAK,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,KAAK,EAAE,CAAC,CAAC;IAEzD,uDAAuD;IACvD,eAAe,CAAC,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CAC5E;AAED,MAAM,WAAW,gBAAgB;IAC/B,uBAAuB;IACvB,IAAI,EAAE,MAAM,CAAC;IAEb,8CAA8C;IAC9C,SAAS,CAAC,KAAK,EAAE,KAAK,GAAG,OAAO,CAAC,KAAK,GAAG,IAAI,CAAC,CAAC;CAChD"}
@@ -0,0 +1,5 @@
1
+ /**
2
+ * Chunk interfaces - core building blocks for document processing
3
+ */
4
+ export {};
5
+ //# sourceMappingURL=chunker.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"chunker.js","sourceRoot":"","sources":["../../src/interfaces/chunker.ts"],"names":[],"mappings":"AAAA;;GAEG"}
@@ -0,0 +1,28 @@
1
+ /**
2
+ * Embedding provider interfaces
3
+ */
4
+ import { Chunk } from "./chunker.js";
5
+ export interface EmbeddingProvider {
6
+ /** Provider name (e.g., 'github-models', 'openai') */
7
+ readonly name: string;
8
+ /** Embedding vector dimensions */
9
+ readonly dimensions: number;
10
+ /** Maximum tokens per request (optional) */
11
+ readonly maxTokens?: number;
12
+ /** Convert text to embedding vector */
13
+ embed(text: string): Promise<number[]>;
14
+ /** Batch convert (optional, for performance) */
15
+ embedBatch?(texts: string[]): Promise<number[][]>;
16
+ /** Check if provider is available (e.g., valid API key) */
17
+ healthCheck?(): Promise<boolean>;
18
+ }
19
+ export interface EmbeddingConfig {
20
+ provider: EmbeddingProvider;
21
+ batchSize?: number;
22
+ rateLimitMs?: number;
23
+ }
24
+ export interface EmbeddedChunk extends Chunk {
25
+ embedding: number[];
26
+ embeddedAt: number;
27
+ }
28
+ //# sourceMappingURL=embedder.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"embedder.d.ts","sourceRoot":"","sources":["../../src/interfaces/embedder.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,KAAK,EAAE,MAAM,cAAc,CAAC;AAErC,MAAM,WAAW,iBAAiB;IAChC,sDAAsD;IACtD,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IAEtB,kCAAkC;IAClC,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAE5B,4CAA4C;IAC5C,QAAQ,CAAC,SAAS,CAAC,EAAE,MAAM,CAAC;IAE5B,uCAAuC;IACvC,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;IAEvC,gDAAgD;IAChD,UAAU,CAAC,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;IAElD,2DAA2D;IAC3D,WAAW,CAAC,IAAI,OAAO,CAAC,OAAO,CAAC,CAAC;CAClC;AAED,MAAM,WAAW,eAAe;IAC9B,QAAQ,EAAE,iBAAiB,CAAC;IAC5B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,aAAc,SAAQ,KAAK;IAC1C,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,UAAU,EAAE,MAAM,CAAC;CACpB"}
@@ -0,0 +1,5 @@
1
+ /**
2
+ * Embedding provider interfaces
3
+ */
4
+ export {};
5
+ //# sourceMappingURL=embedder.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"embedder.js","sourceRoot":"","sources":["../../src/interfaces/embedder.ts"],"names":[],"mappings":"AAAA;;GAEG"}
@@ -0,0 +1,4 @@
1
+ export * from "./chunker.js";
2
+ export * from "./embedder.js";
3
+ export * from "./vector-store.js";
4
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/interfaces/index.ts"],"names":[],"mappings":"AAAA,cAAc,cAAc,CAAC;AAC7B,cAAc,eAAe,CAAC;AAC9B,cAAc,mBAAmB,CAAC"}
@@ -0,0 +1,4 @@
1
+ export * from "./chunker.js";
2
+ export * from "./embedder.js";
3
+ export * from "./vector-store.js";
4
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/interfaces/index.ts"],"names":[],"mappings":"AAAA,cAAc,cAAc,CAAC;AAC7B,cAAc,eAAe,CAAC;AAC9B,cAAc,mBAAmB,CAAC"}
@@ -0,0 +1,53 @@
1
+ /**
2
+ * Vector store interfaces
3
+ */
4
+ export interface VectorDocument {
5
+ /** Unique ID (optional, auto-generated if not provided) */
6
+ id?: string;
7
+ /** Original text content */
8
+ content: string;
9
+ /** Metadata for filtering */
10
+ metadata: Record<string, unknown>;
11
+ /** Embedding vector */
12
+ embedding: number[];
13
+ /** Source file path (for tracking updates) */
14
+ sourceFile: string;
15
+ /** Git commit hash (for change detection) */
16
+ commitHash: string;
17
+ /** Content hash (for change detection) */
18
+ contentHash: string;
19
+ /** Collection name (for multi-collection stores) */
20
+ collection?: string;
21
+ }
22
+ export interface VectorSearchResult {
23
+ id: string;
24
+ content: string;
25
+ metadata: Record<string, unknown>;
26
+ similarity: number;
27
+ }
28
+ export interface VectorStore {
29
+ /** Store name */
30
+ readonly name: string;
31
+ /** Initialize store (create tables, indexes, etc.) */
32
+ initialize(): Promise<void>;
33
+ /** Insert or update documents */
34
+ upsert(documents: VectorDocument[]): Promise<void>;
35
+ /** Delete documents by source file */
36
+ deleteBySourceFile(sourceFiles: string[]): Promise<void>;
37
+ /** Get current state (sourceFile → commitHash) for change detection */
38
+ getCurrentState(collection?: string): Promise<Map<string, string>>;
39
+ /** Search by embedding vector */
40
+ search(queryEmbedding: number[], topK: number, collection?: string): Promise<VectorSearchResult[]>;
41
+ /** Optional: delete entire collection */
42
+ deleteCollection?(collection: string): Promise<void>;
43
+ /** Optional: get store statistics */
44
+ getStats?(): Promise<{
45
+ documentCount: number;
46
+ collections: string[];
47
+ }>;
48
+ }
49
+ export interface VectorStoreConfig {
50
+ provider: VectorStore;
51
+ collection?: string;
52
+ }
53
+ //# sourceMappingURL=vector-store.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"vector-store.d.ts","sourceRoot":"","sources":["../../src/interfaces/vector-store.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,MAAM,WAAW,cAAc;IAC7B,2DAA2D;IAC3D,EAAE,CAAC,EAAE,MAAM,CAAC;IAEZ,4BAA4B;IAC5B,OAAO,EAAE,MAAM,CAAC;IAEhB,6BAA6B;IAC7B,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAElC,uBAAuB;IACvB,SAAS,EAAE,MAAM,EAAE,CAAC;IAEpB,8CAA8C;IAC9C,UAAU,EAAE,MAAM,CAAC;IAEnB,6CAA6C;IAC7C,UAAU,EAAE,MAAM,CAAC;IAEnB,0CAA0C;IAC1C,WAAW,EAAE,MAAM,CAAC;IAEpB,oDAAoD;IACpD,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,kBAAkB;IACjC,EAAE,EAAE,MAAM,CAAC;IACX,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAClC,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,WAAW;IAC1B,iBAAiB;IACjB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IAEtB,sDAAsD;IACtD,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;IAE5B,iCAAiC;IACjC,MAAM,CAAC,SAAS,EAAE,cAAc,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAEnD,sCAAsC;IACtC,kBAAkB,CAAC,WAAW,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAEzD,uEAAuE;IACvE,eAAe,CAAC,UAAU,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;IAEnE,iCAAiC;IACjC,MAAM,CACJ,cAAc,EAAE,MAAM,EAAE,EACxB,IAAI,EAAE,MAAM,EACZ,UAAU,CAAC,EAAE,MAAM,GAClB,OAAO,CAAC,kBAAkB,EAAE,CAAC,CAAC;IAEjC,yCAAyC;IACzC,gBAAgB,CAAC,CAAC,UAAU,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAErD,qCAAqC;IACrC,QAAQ,CAAC,IAAI,OAAO,CAAC;QAAE,aAAa,EAAE,MAAM,CAAC;QAAC,WAAW,EAAE,MAAM,EAAE,CAAA;KAAE,CAAC,CAAC;CACxE;AAED,MAAM,WAAW,iBAAiB;IAChC,QAAQ,EAAE,WAAW,CAAC;IACtB,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB"}
@@ -0,0 +1,5 @@
1
+ /**
2
+ * Vector store interfaces
3
+ */
4
+ export {};
5
+ //# sourceMappingURL=vector-store.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"vector-store.js","sourceRoot":"","sources":["../../src/interfaces/vector-store.ts"],"names":[],"mappings":"AAAA;;GAEG"}
@@ -0,0 +1,5 @@
1
+ export { tokenStrategy } from "./token.js";
2
+ export { markdownHeadersStrategy } from "./markdown-headers.js";
3
+ export { semanticStrategy } from "./semantic.js";
4
+ export { wholeFileStrategy } from "./whole-file.js";
5
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/strategies/chunk/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAC3C,OAAO,EAAE,uBAAuB,EAAE,MAAM,uBAAuB,CAAC;AAChE,OAAO,EAAE,gBAAgB,EAAE,MAAM,eAAe,CAAC;AACjD,OAAO,EAAE,iBAAiB,EAAE,MAAM,iBAAiB,CAAC"}
@@ -0,0 +1,5 @@
1
+ export { tokenStrategy } from "./token.js";
2
+ export { markdownHeadersStrategy } from "./markdown-headers.js";
3
+ export { semanticStrategy } from "./semantic.js";
4
+ export { wholeFileStrategy } from "./whole-file.js";
5
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/strategies/chunk/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAC3C,OAAO,EAAE,uBAAuB,EAAE,MAAM,uBAAuB,CAAC;AAChE,OAAO,EAAE,gBAAgB,EAAE,MAAM,eAAe,CAAC;AACjD,OAAO,EAAE,iBAAiB,EAAE,MAAM,iBAAiB,CAAC"}
@@ -0,0 +1,7 @@
1
+ import { ChunkStrategy } from "../../interfaces/index.js";
2
+ export interface MarkdownHeadersOptions {
3
+ minChunkSize?: number;
4
+ maxChunkSize?: number;
5
+ }
6
+ export declare function markdownHeadersStrategy(options?: MarkdownHeadersOptions): ChunkStrategy;
7
+ //# sourceMappingURL=markdown-headers.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"markdown-headers.d.ts","sourceRoot":"","sources":["../../../src/strategies/chunk/markdown-headers.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAS,MAAM,2BAA2B,CAAC;AAEjE,MAAM,WAAW,sBAAsB;IACrC,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AAED,wBAAgB,uBAAuB,CACrC,OAAO,GAAE,sBAA2B,GACnC,aAAa,CAgGf"}
@@ -0,0 +1,89 @@
1
+ export function markdownHeadersStrategy(options = {}) {
2
+ const minChunkSize = options.minChunkSize ?? 100;
3
+ const maxChunkSize = options.maxChunkSize ?? 8000;
4
+ return {
5
+ name: "markdown-headers",
6
+ async chunk(text, filePath) {
7
+ const chunks = [];
8
+ const lines = text.split("\n");
9
+ let currentChunk = [];
10
+ let currentHeader = "";
11
+ let currentHeaderLevel = 0;
12
+ for (const line of lines) {
13
+ const headerMatch = line.match(/^(#{1,6})\s+(.+)$/);
14
+ if (headerMatch) {
15
+ // Save previous chunk if not empty
16
+ if (currentChunk.length > 0) {
17
+ const content = currentChunk.join("\n").trim();
18
+ if (content.length >= minChunkSize) {
19
+ chunks.push({
20
+ content,
21
+ metadata: {
22
+ strategy: this.name,
23
+ header: currentHeader,
24
+ header_level: currentHeaderLevel,
25
+ source_file: filePath,
26
+ },
27
+ sourceFile: filePath || "unknown",
28
+ commitHash: "",
29
+ });
30
+ }
31
+ }
32
+ // Start new chunk
33
+ currentHeaderLevel = headerMatch[1].length;
34
+ currentHeader = headerMatch[2];
35
+ currentChunk = [line];
36
+ }
37
+ else {
38
+ currentChunk.push(line);
39
+ }
40
+ // Prevent chunks from getting too large
41
+ const currentSize = currentChunk.join("\n").length;
42
+ if (currentSize > maxChunkSize && currentChunk.length > 10) {
43
+ const content = currentChunk.join("\n").trim();
44
+ chunks.push({
45
+ content,
46
+ metadata: {
47
+ strategy: this.name,
48
+ header: currentHeader,
49
+ header_level: currentHeaderLevel,
50
+ truncated: true,
51
+ },
52
+ sourceFile: filePath || "unknown",
53
+ commitHash: "",
54
+ });
55
+ currentChunk = [];
56
+ }
57
+ }
58
+ // Last chunk
59
+ if (currentChunk.length > 0) {
60
+ const content = currentChunk.join("\n").trim();
61
+ if (content.length >= minChunkSize) {
62
+ chunks.push({
63
+ content,
64
+ metadata: {
65
+ strategy: this.name,
66
+ header: currentHeader,
67
+ header_level: currentHeaderLevel,
68
+ source_file: filePath,
69
+ is_last: true,
70
+ },
71
+ sourceFile: filePath || "unknown",
72
+ commitHash: "",
73
+ });
74
+ }
75
+ }
76
+ return chunks;
77
+ },
78
+ extractMetadata(text, _filePath) {
79
+ const headerMatch = text.match(/^(#{1,6})\s+(.+)$/m);
80
+ return {
81
+ strategy: this.name,
82
+ has_headers: !!headerMatch,
83
+ first_header: headerMatch?.[2],
84
+ line_count: text.split("\n").length,
85
+ };
86
+ },
87
+ };
88
+ }
89
+ //# sourceMappingURL=markdown-headers.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"markdown-headers.js","sourceRoot":"","sources":["../../../src/strategies/chunk/markdown-headers.ts"],"names":[],"mappings":"AAOA,MAAM,UAAU,uBAAuB,CACrC,UAAkC,EAAE;IAEpC,MAAM,YAAY,GAAG,OAAO,CAAC,YAAY,IAAI,GAAG,CAAC;IACjD,MAAM,YAAY,GAAG,OAAO,CAAC,YAAY,IAAI,IAAI,CAAC;IAElD,OAAO;QACL,IAAI,EAAE,kBAAkB;QAExB,KAAK,CAAC,KAAK,CAAC,IAAY,EAAE,QAAiB;YACzC,MAAM,MAAM,GAAY,EAAE,CAAC;YAC3B,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YAE/B,IAAI,YAAY,GAAa,EAAE,CAAC;YAChC,IAAI,aAAa,GAAG,EAAE,CAAC;YACvB,IAAI,kBAAkB,GAAG,CAAC,CAAC;YAE3B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;gBACzB,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,mBAAmB,CAAC,CAAC;gBAEpD,IAAI,WAAW,EAAE,CAAC;oBAChB,mCAAmC;oBACnC,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;wBAC5B,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;wBAC/C,IAAI,OAAO,CAAC,MAAM,IAAI,YAAY,EAAE,CAAC;4BACnC,MAAM,CAAC,IAAI,CAAC;gCACV,OAAO;gCACP,QAAQ,EAAE;oCACR,QAAQ,EAAE,IAAI,CAAC,IAAI;oCACnB,MAAM,EAAE,aAAa;oCACrB,YAAY,EAAE,kBAAkB;oCAChC,WAAW,EAAE,QAAQ;iCACtB;gCACD,UAAU,EAAE,QAAQ,IAAI,SAAS;gCACjC,UAAU,EAAE,EAAE;6BACf,CAAC,CAAC;wBACL,CAAC;oBACH,CAAC;oBAED,kBAAkB;oBAClB,kBAAkB,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;oBAC3C,aAAa,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC;oBAC/B,YAAY,GAAG,CAAC,IAAI,CAAC,CAAC;gBACxB,CAAC;qBAAM,CAAC;oBACN,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBAC1B,CAAC;gBAED,wCAAwC;gBACxC,MAAM,WAAW,GAAG,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC;gBACnD,IAAI,WAAW,GAAG,YAAY,IAAI,YAAY,CAAC,MAAM,GAAG,EAAE,EAAE,CAAC;oBAC3D,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;oBAC/C,MAAM,CAAC,IAAI,CAAC;wBACV,OAAO;wBACP,QAAQ,EAAE;4BACR,QAAQ,EAAE,IAAI,CAAC,IAAI;4BACnB,MAAM,EAAE,aAAa;4BACrB,YAAY,EAAE,kBAAkB;4BAChC,SAAS,EAAE,IAAI;yBAChB;wBACD,UAAU,EAAE,QAAQ,IAAI,SAAS;wBACjC,UAAU,EAAE,EAAE;qBACf,CAAC,CAAC;oBACH,YAAY,GAAG,EAAE,CAAC;gBACpB,CAAC;YACH,CAAC;YAED,aAAa;YACb,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC5B,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;gBAC/C,IAAI,OAAO,CAAC,MAAM,IAAI,YAAY,EAAE,CAAC;oBACnC,MAAM,CAAC,IAAI,CAAC;wBACV,OAAO;wBACP,QAAQ,EAAE;4BACR,QAAQ,EAAE,IAAI,CAAC,IAAI;4BACnB,MAAM,EAAE,aAAa;4BACrB,YAAY,EAAE,kBAAkB;4BAChC,WAAW,EAAE,QAAQ;4BACrB,OAAO,EAAE,IAAI;yBACd;wBACD,UAAU,EAAE,QAAQ,IAAI,SAAS;wBACjC,UAAU,EAAE,EAAE;qBACf,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;YAED,OAAO,MAAM,CAAC;QAChB,CAAC;QAED,eAAe,CAAC,IAAY,EAAE,SAAkB;YAC9C,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,oBAAoB,CAAC,CAAC;YACrD,OAAO;gBACL,QAAQ,EAAE,IAAI,CAAC,IAAI;gBACnB,WAAW,EAAE,CAAC,CAAC,WAAW;gBAC1B,YAAY,EAAE,WAAW,EAAE,CAAC,CAAC,CAAC;gBAC9B,UAAU,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM;aACpC,CAAC;QACJ,CAAC;KACF,CAAC;AACJ,CAAC"}
@@ -0,0 +1,7 @@
1
+ import { ChunkStrategy } from "../../interfaces/index.js";
2
+ export interface SemanticStrategyOptions {
3
+ maxChars?: number;
4
+ minChars?: number;
5
+ }
6
+ export declare function semanticStrategy(options?: SemanticStrategyOptions): ChunkStrategy;
7
+ //# sourceMappingURL=semantic.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"semantic.d.ts","sourceRoot":"","sources":["../../../src/strategies/chunk/semantic.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAS,MAAM,2BAA2B,CAAC;AAEjE,MAAM,WAAW,uBAAuB;IACtC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,wBAAgB,gBAAgB,CAC9B,OAAO,GAAE,uBAA4B,GACpC,aAAa,CAsEf"}
@@ -0,0 +1,62 @@
1
+ export function semanticStrategy(options = {}) {
2
+ const maxChars = options.maxChars ?? 2000;
3
+ const minChars = options.minChars ?? 100;
4
+ return {
5
+ name: "semantic",
6
+ async chunk(text, filePath) {
7
+ const chunks = [];
8
+ // Split by sentences (simple approach)
9
+ const sentences = text.split(/(?<=[.!?])\s+/);
10
+ let currentChunk = [];
11
+ let currentSize = 0;
12
+ for (const sentence of sentences) {
13
+ const sentenceSize = sentence.length;
14
+ if (currentSize + sentenceSize > maxChars && currentChunk.length > 0) {
15
+ const content = currentChunk.join(" ").trim();
16
+ if (content.length >= minChars) {
17
+ chunks.push({
18
+ content,
19
+ metadata: {
20
+ strategy: this.name,
21
+ sentence_count: currentChunk.length,
22
+ source_file: filePath,
23
+ },
24
+ sourceFile: filePath || "unknown",
25
+ commitHash: "",
26
+ });
27
+ }
28
+ currentChunk = [];
29
+ currentSize = 0;
30
+ }
31
+ currentChunk.push(sentence);
32
+ currentSize += sentenceSize;
33
+ }
34
+ // Last chunk
35
+ if (currentChunk.length > 0) {
36
+ const content = currentChunk.join(" ").trim();
37
+ if (content.length >= minChars) {
38
+ chunks.push({
39
+ content,
40
+ metadata: {
41
+ strategy: this.name,
42
+ sentence_count: currentChunk.length,
43
+ source_file: filePath,
44
+ is_last: true,
45
+ },
46
+ sourceFile: filePath || "unknown",
47
+ commitHash: "",
48
+ });
49
+ }
50
+ }
51
+ return chunks;
52
+ },
53
+ extractMetadata(text, _filePath) {
54
+ return {
55
+ strategy: this.name,
56
+ sentence_count: text.split(/[.!?]+/).length,
57
+ char_count: text.length,
58
+ };
59
+ },
60
+ };
61
+ }
62
+ //# sourceMappingURL=semantic.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"semantic.js","sourceRoot":"","sources":["../../../src/strategies/chunk/semantic.ts"],"names":[],"mappings":"AAOA,MAAM,UAAU,gBAAgB,CAC9B,UAAmC,EAAE;IAErC,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,IAAI,CAAC;IAC1C,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,GAAG,CAAC;IAEzC,OAAO;QACL,IAAI,EAAE,UAAU;QAEhB,KAAK,CAAC,KAAK,CAAC,IAAY,EAAE,QAAiB;YACzC,MAAM,MAAM,GAAY,EAAE,CAAC;YAE3B,uCAAuC;YACvC,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,eAAe,CAAC,CAAC;YAE9C,IAAI,YAAY,GAAa,EAAE,CAAC;YAChC,IAAI,WAAW,GAAG,CAAC,CAAC;YAEpB,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;gBACjC,MAAM,YAAY,GAAG,QAAQ,CAAC,MAAM,CAAC;gBAErC,IAAI,WAAW,GAAG,YAAY,GAAG,QAAQ,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBACrE,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;oBAC9C,IAAI,OAAO,CAAC,MAAM,IAAI,QAAQ,EAAE,CAAC;wBAC/B,MAAM,CAAC,IAAI,CAAC;4BACV,OAAO;4BACP,QAAQ,EAAE;gCACR,QAAQ,EAAE,IAAI,CAAC,IAAI;gCACnB,cAAc,EAAE,YAAY,CAAC,MAAM;gCACnC,WAAW,EAAE,QAAQ;6BACtB;4BACD,UAAU,EAAE,QAAQ,IAAI,SAAS;4BACjC,UAAU,EAAE,EAAE;yBACf,CAAC,CAAC;oBACL,CAAC;oBACD,YAAY,GAAG,EAAE,CAAC;oBAClB,WAAW,GAAG,CAAC,CAAC;gBAClB,CAAC;gBAED,YAAY,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;gBAC5B,WAAW,IAAI,YAAY,CAAC;YAC9B,CAAC;YAED,aAAa;YACb,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC5B,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;gBAC9C,IAAI,OAAO,CAAC,MAAM,IAAI,QAAQ,EAAE,CAAC;oBAC/B,MAAM,CAAC,IAAI,CAAC;wBACV,OAAO;wBACP,QAAQ,EAAE;4BACR,QAAQ,EAAE,IAAI,CAAC,IAAI;4BACnB,cAAc,EAAE,YAAY,CAAC,MAAM;4BACnC,WAAW,EAAE,QAAQ;4BACrB,OAAO,EAAE,IAAI;yBACd;wBACD,UAAU,EAAE,QAAQ,IAAI,SAAS;wBACjC,UAAU,EAAE,EAAE;qBACf,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;YAED,OAAO,MAAM,CAAC;QAChB,CAAC;QAED,eAAe,CAAC,IAAY,EAAE,SAAkB;YAC9C,OAAO;gBACL,QAAQ,EAAE,IAAI,CAAC,IAAI;gBACnB,cAAc,EAAE,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,MAAM;gBAC3C,UAAU,EAAE,IAAI,CAAC,MAAM;aACxB,CAAC;QACJ,CAAC;KACF,CAAC;AACJ,CAAC"}
@@ -0,0 +1,12 @@
1
+ import { ChunkStrategy } from "../../interfaces/index.js";
2
+ export interface TokenStrategyOptions {
3
+ maxTokens?: number;
4
+ overlap?: number;
5
+ }
6
+ /**
7
+ * Split text by approximate token count.
8
+ * Simple implementation: ~4 chars per token for English.
9
+ * For production, use a proper tokenizer (tiktoken, etc.)
10
+ */
11
+ export declare function tokenStrategy(options?: TokenStrategyOptions): ChunkStrategy;
12
+ //# sourceMappingURL=token.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"token.d.ts","sourceRoot":"","sources":["../../../src/strategies/chunk/token.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAS,MAAM,2BAA2B,CAAC;AAEjE,MAAM,WAAW,oBAAoB;IACnC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED;;;;GAIG;AACH,wBAAgB,aAAa,CAC3B,OAAO,GAAE,oBAAyB,GACjC,aAAa,CAyDf"}
@@ -0,0 +1,56 @@
1
+ /**
2
+ * Split text by approximate token count.
3
+ * Simple implementation: ~4 chars per token for English.
4
+ * For production, use a proper tokenizer (tiktoken, etc.)
5
+ */
6
+ export function tokenStrategy(options = {}) {
7
+ const maxTokens = options.maxTokens ?? 500;
8
+ const overlap = options.overlap ?? 50;
9
+ const charsPerToken = 4;
10
+ const maxChars = maxTokens * charsPerToken;
11
+ const overlapChars = overlap * charsPerToken;
12
+ return {
13
+ name: `token-${maxTokens}`,
14
+ async chunk(text, filePath) {
15
+ const chunks = [];
16
+ let start = 0;
17
+ while (start < text.length) {
18
+ let end = Math.min(start + maxChars, text.length);
19
+ // Try to break at sentence boundary
20
+ if (end < text.length) {
21
+ const lastPeriod = text.lastIndexOf(".", end);
22
+ const lastNewline = text.lastIndexOf("\n", end);
23
+ const breakPoint = Math.max(lastPeriod, lastNewline);
24
+ if (breakPoint > start) {
25
+ end = breakPoint + 1;
26
+ }
27
+ }
28
+ const content = text.slice(start, end).trim();
29
+ if (content) {
30
+ chunks.push({
31
+ content,
32
+ metadata: {
33
+ strategy: this.name,
34
+ chunk_index: chunks.length,
35
+ source_file: filePath,
36
+ start_char: start,
37
+ end_char: end,
38
+ },
39
+ sourceFile: filePath || "unknown",
40
+ commitHash: "", // Will be filled by caller
41
+ });
42
+ }
43
+ start = end - overlapChars;
44
+ }
45
+ return chunks;
46
+ },
47
+ extractMetadata(text, _filePath) {
48
+ return {
49
+ strategy: this.name,
50
+ char_count: text.length,
51
+ estimated_tokens: Math.ceil(text.length / charsPerToken),
52
+ };
53
+ },
54
+ };
55
+ }
56
+ //# sourceMappingURL=token.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"token.js","sourceRoot":"","sources":["../../../src/strategies/chunk/token.ts"],"names":[],"mappings":"AAOA;;;;GAIG;AACH,MAAM,UAAU,aAAa,CAC3B,UAAgC,EAAE;IAElC,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,GAAG,CAAC;IAC3C,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,EAAE,CAAC;IACtC,MAAM,aAAa,GAAG,CAAC,CAAC;IACxB,MAAM,QAAQ,GAAG,SAAS,GAAG,aAAa,CAAC;IAC3C,MAAM,YAAY,GAAG,OAAO,GAAG,aAAa,CAAC;IAE7C,OAAO;QACL,IAAI,EAAE,SAAS,SAAS,EAAE;QAE1B,KAAK,CAAC,KAAK,CAAC,IAAY,EAAE,QAAiB;YACzC,MAAM,MAAM,GAAY,EAAE,CAAC;YAC3B,IAAI,KAAK,GAAG,CAAC,CAAC;YAEd,OAAO,KAAK,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;gBAC3B,IAAI,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,GAAG,QAAQ,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;gBAElD,oCAAoC;gBACpC,IAAI,GAAG,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;oBACtB,MAAM,UAAU,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;oBAC9C,MAAM,WAAW,GAAG,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;oBAChD,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,EAAE,WAAW,CAAC,CAAC;oBACrD,IAAI,UAAU,GAAG,KAAK,EAAE,CAAC;wBACvB,GAAG,GAAG,UAAU,GAAG,CAAC,CAAC;oBACvB,CAAC;gBACH,CAAC;gBAED,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;gBAC9C,IAAI,OAAO,EAAE,CAAC;oBACZ,MAAM,CAAC,IAAI,CAAC;wBACV,OAAO;wBACP,QAAQ,EAAE;4BACR,QAAQ,EAAE,IAAI,CAAC,IAAI;4BACnB,WAAW,EAAE,MAAM,CAAC,MAAM;4BAC1B,WAAW,EAAE,QAAQ;4BACrB,UAAU,EAAE,KAAK;4BACjB,QAAQ,EAAE,GAAG;yBACd;wBACD,UAAU,EAAE,QAAQ,IAAI,SAAS;wBACjC,UAAU,EAAE,EAAE,EAAE,2BAA2B;qBAC5C,CAAC,CAAC;gBACL,CAAC;gBAED,KAAK,GAAG,GAAG,GAAG,YAAY,CAAC;YAC7B,CAAC;YAED,OAAO,MAAM,CAAC;QAChB,CAAC;QAED,eAAe,CAAC,IAAY,EAAE,SAAkB;YAC9C,OAAO;gBACL,QAAQ,EAAE,IAAI,CAAC,IAAI;gBACnB,UAAU,EAAE,IAAI,CAAC,MAAM;gBACvB,gBAAgB,EAAE,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,aAAa,CAAC;aACzD,CAAC;QACJ,CAAC;KACF,CAAC;AACJ,CAAC"}
@@ -0,0 +1,3 @@
1
+ import { ChunkStrategy } from "../../interfaces/index.js";
2
+ export declare function wholeFileStrategy(): ChunkStrategy;
3
+ //# sourceMappingURL=whole-file.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"whole-file.d.ts","sourceRoot":"","sources":["../../../src/strategies/chunk/whole-file.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAS,MAAM,2BAA2B,CAAC;AAEjE,wBAAgB,iBAAiB,IAAI,aAAa,CAgCjD"}