@vivantel/rag-core 1.1.2 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. package/dist/bin/rag-update.d.ts +3 -0
  2. package/dist/bin/rag-update.d.ts.map +1 -0
  3. package/dist/bin/rag-update.js +116 -0
  4. package/dist/bin/rag-update.js.map +1 -0
  5. package/dist/cli/init.d.ts +2 -0
  6. package/dist/cli/init.d.ts.map +1 -0
  7. package/dist/cli/init.js +262 -0
  8. package/dist/cli/init.js.map +1 -0
  9. package/dist/cli/validate.d.ts +2 -0
  10. package/dist/cli/validate.d.ts.map +1 -0
  11. package/dist/cli/validate.js +54 -0
  12. package/dist/cli/validate.js.map +1 -0
  13. package/dist/config-loader.d.ts.map +1 -1
  14. package/dist/config-loader.js +73 -7
  15. package/dist/config-loader.js.map +1 -1
  16. package/dist/core/chunk-processor.d.ts +1 -1
  17. package/dist/core/chunk-processor.d.ts.map +1 -1
  18. package/dist/core/chunk-processor.js +50 -21
  19. package/dist/core/chunk-processor.js.map +1 -1
  20. package/dist/core/embedder.d.ts +5 -1
  21. package/dist/core/embedder.d.ts.map +1 -1
  22. package/dist/core/embedder.js +40 -29
  23. package/dist/core/embedder.js.map +1 -1
  24. package/dist/core/errors.d.ts +16 -0
  25. package/dist/core/errors.d.ts.map +1 -0
  26. package/dist/core/errors.js +17 -0
  27. package/dist/core/errors.js.map +1 -0
  28. package/dist/core/git-tracker.d.ts.map +1 -1
  29. package/dist/core/git-tracker.js +9 -59
  30. package/dist/core/git-tracker.js.map +1 -1
  31. package/dist/core/orchestrator.d.ts +8 -0
  32. package/dist/core/orchestrator.d.ts.map +1 -1
  33. package/dist/core/orchestrator.js +153 -37
  34. package/dist/core/orchestrator.js.map +1 -1
  35. package/dist/core/plugin-discovery.d.ts +19 -0
  36. package/dist/core/plugin-discovery.d.ts.map +1 -0
  37. package/dist/core/plugin-discovery.js +47 -0
  38. package/dist/core/plugin-discovery.js.map +1 -0
  39. package/dist/core/telemetry.d.ts +61 -0
  40. package/dist/core/telemetry.d.ts.map +1 -0
  41. package/dist/core/telemetry.js +50 -0
  42. package/dist/core/telemetry.js.map +1 -0
  43. package/dist/core/uploader.d.ts +5 -1
  44. package/dist/core/uploader.d.ts.map +1 -1
  45. package/dist/core/uploader.js +23 -7
  46. package/dist/core/uploader.js.map +1 -1
  47. package/dist/core/utils.d.ts +7 -0
  48. package/dist/core/utils.d.ts.map +1 -1
  49. package/dist/core/utils.js +35 -0
  50. package/dist/core/utils.js.map +1 -1
  51. package/dist/index.d.ts +14 -1
  52. package/dist/index.d.ts.map +1 -1
  53. package/dist/index.js +14 -2
  54. package/dist/index.js.map +1 -1
  55. package/dist/interfaces/embedder.d.ts +2 -0
  56. package/dist/interfaces/embedder.d.ts.map +1 -1
  57. package/dist/interfaces/vector-store.d.ts +2 -0
  58. package/dist/interfaces/vector-store.d.ts.map +1 -1
  59. package/dist/strategies/chunk/token.js +1 -1
  60. package/dist/strategies/chunk/token.js.map +1 -1
  61. package/package.json +12 -2
  62. package/.github/config/release-please.json +0 -38
  63. package/.github/dependabot.yaml +0 -28
  64. package/.github/workflows/ci.yaml +0 -119
  65. package/.github/workflows/publish.yaml +0 -155
  66. package/.github/workflows/release.yaml +0 -54
  67. package/.release-please-manifest.json +0 -3
  68. package/.versionrc.json +0 -19
  69. package/CHANGELOG.md +0 -51
  70. package/bin/rag-update.ts +0 -49
  71. package/eslint.config.js +0 -25
  72. package/src/config-loader.ts +0 -21
  73. package/src/core/chunk-processor.test.ts +0 -36
  74. package/src/core/chunk-processor.ts +0 -92
  75. package/src/core/embedder.ts +0 -189
  76. package/src/core/git-tracker.test.ts +0 -64
  77. package/src/core/git-tracker.ts +0 -202
  78. package/src/core/orchestrator.test.ts +0 -53
  79. package/src/core/orchestrator.ts +0 -97
  80. package/src/core/uploader.ts +0 -123
  81. package/src/core/utils.ts +0 -27
  82. package/src/helpers/create-chunker.test.ts +0 -31
  83. package/src/helpers/create-chunker.ts +0 -40
  84. package/src/index.test.ts +0 -33
  85. package/src/index.ts +0 -30
  86. package/src/interfaces/chunker.ts +0 -59
  87. package/src/interfaces/embedder.ts +0 -36
  88. package/src/interfaces/index.test.ts +0 -9
  89. package/src/interfaces/index.ts +0 -3
  90. package/src/interfaces/vector-store.ts +0 -71
  91. package/src/strategies/chunk/index.ts +0 -4
  92. package/src/strategies/chunk/markdown-headers.test.ts +0 -37
  93. package/src/strategies/chunk/markdown-headers.ts +0 -106
  94. package/src/strategies/chunk/semantic.test.ts +0 -21
  95. package/src/strategies/chunk/semantic.ts +0 -80
  96. package/src/strategies/chunk/token.test.ts +0 -41
  97. package/src/strategies/chunk/token.ts +0 -72
  98. package/src/strategies/chunk/whole-file.test.ts +0 -24
  99. package/src/strategies/chunk/whole-file.ts +0 -35
  100. package/tsconfig.json +0 -21
  101. package/typedoc.json +0 -11
  102. package/vitest.config.ts +0 -19
@@ -1,97 +0,0 @@
1
- import { GitTracker } from "./git-tracker.js";
2
- import { ChunkProcessor } from "./chunk-processor.js";
3
- import { EmbedderProcessor } from "./embedder.js";
4
- import { Uploader } from "./uploader.js";
5
- import {
6
- FileChunker,
7
- EmbeddingProvider,
8
- VectorStore,
9
- } from "../interfaces/index.js";
10
-
11
- export interface RAGPipelineConfig {
12
- chunkers: FileChunker[];
13
- embedder: EmbeddingProvider;
14
- vectorStore: VectorStore;
15
- options?: {
16
- chunksFile?: string;
17
- embeddingsFile?: string;
18
- force?: boolean;
19
- skipUpload?: boolean;
20
- rateLimitMs?: number;
21
- batchSize?: number;
22
- };
23
- }
24
-
25
- export class Orchestrator {
26
- private config: RAGPipelineConfig;
27
- private chunksFile: string;
28
- private embeddingsFile: string;
29
-
30
- constructor(config: RAGPipelineConfig) {
31
- this.config = config;
32
- this.chunksFile = config.options?.chunksFile || "./docs/rag/chunks.json";
33
- this.embeddingsFile =
34
- config.options?.embeddingsFile || "./docs/rag/embeddings.json";
35
- }
36
-
37
- async run(): Promise<void> {
38
- console.log("🚀 Starting RAG pipeline...\n");
39
-
40
- console.log("📂 Step 1: Scanning for changes...");
41
- const gitTracker = new GitTracker(this.config.chunkers);
42
- const currentState = await gitTracker.getCurrentState();
43
-
44
- const previousState = new Map<string, string>();
45
- const { toProcess, toDelete } =
46
- await gitTracker.getChangedFiles(previousState);
47
-
48
- if (
49
- toProcess.length === 0 &&
50
- toDelete.length === 0 &&
51
- !this.config.options?.force
52
- ) {
53
- console.log("\n✨ No changes detected.");
54
- return;
55
- }
56
-
57
- console.log(
58
- `\n📊 Changes: ${toProcess.length} to process, ${toDelete.length} to delete\n`,
59
- );
60
-
61
- console.log("🔪 Step 2: Generating chunks...");
62
- const chunkProcessor = new ChunkProcessor(this.config.chunkers);
63
-
64
- const fileState = new Map();
65
- for (const file of toProcess) {
66
- const info = currentState.get(file);
67
- if (info) fileState.set(file, info);
68
- }
69
-
70
- const chunks = await chunkProcessor.processFiles(toProcess, fileState);
71
- await chunkProcessor.saveChunksLocal(chunks, this.chunksFile);
72
-
73
- if (chunks.length === 0) {
74
- console.log("\n⚠️ No chunks generated. Exiting.");
75
- return;
76
- }
77
-
78
- console.log("\n🔢 Step 3: Generating embeddings...");
79
- const embedder = new EmbedderProcessor(this.config.embedder, {
80
- rateLimitMs: this.config.options?.rateLimitMs,
81
- batchSize: this.config.options?.batchSize,
82
- });
83
-
84
- await embedder.run(this.chunksFile, this.config.options?.force || false);
85
-
86
- if (!this.config.options?.skipUpload) {
87
- console.log("\n📤 Step 4: Uploading to vector store...");
88
- const uploader = new Uploader(this.config.vectorStore);
89
- await uploader.sync(
90
- this.embeddingsFile,
91
- this.config.options?.force || false,
92
- );
93
- }
94
-
95
- console.log("\n✨ RAG pipeline complete!");
96
- }
97
- }
@@ -1,123 +0,0 @@
1
- import {
2
- VectorStore,
3
- VectorDocument,
4
- EmbeddedChunk,
5
- } from "../interfaces/index.js";
6
- import { readFile } from "fs/promises";
7
-
8
- export class Uploader {
9
- private vectorStore: VectorStore;
10
-
11
- constructor(vectorStore: VectorStore) {
12
- this.vectorStore = vectorStore;
13
- }
14
-
15
- private chunkToDocument(
16
- chunk: EmbeddedChunk,
17
- collection?: string,
18
- ): VectorDocument {
19
- return {
20
- content: chunk.content,
21
- metadata: chunk.metadata,
22
- embedding: chunk.embedding,
23
- sourceFile: chunk.sourceFile,
24
- commitHash: chunk.commitHash,
25
- contentHash: chunk.contentHash!,
26
- collection,
27
- };
28
- }
29
-
30
- async getItemsToUpload(
31
- embeddingsFile: string,
32
- force: boolean = false,
33
- ): Promise<{
34
- toUpload: EmbeddedChunk[];
35
- toDelete: string[];
36
- }> {
37
- let embeddings: EmbeddedChunk[];
38
- try {
39
- const content = await readFile(embeddingsFile, "utf-8");
40
- embeddings = JSON.parse(content);
41
- } catch {
42
- throw new Error(`Embeddings file not found: ${embeddingsFile}`);
43
- }
44
-
45
- console.log(
46
- `📖 Loaded ${embeddings.length} embeddings from ${embeddingsFile}`,
47
- );
48
-
49
- if (force) {
50
- const allSourceFiles = [...new Set(embeddings.map((e) => e.sourceFile))];
51
- return { toUpload: embeddings, toDelete: allSourceFiles };
52
- }
53
-
54
- const existingState = await this.vectorStore.getCurrentState();
55
- const toUploadList: EmbeddedChunk[] = [];
56
- const toDeleteSet = new Set<string>();
57
-
58
- for (const emb of embeddings) {
59
- const existingHash = existingState.get(emb.sourceFile);
60
-
61
- if (!existingHash) {
62
- toUploadList.push(emb);
63
- } else if (existingHash !== emb.commitHash) {
64
- toDeleteSet.add(emb.sourceFile);
65
- toUploadList.push(emb);
66
- }
67
- }
68
-
69
- return {
70
- toUpload: toUploadList,
71
- toDelete: [...toDeleteSet],
72
- };
73
- }
74
-
75
- async sync(
76
- embeddingsFile: string,
77
- force: boolean = false,
78
- ): Promise<{
79
- uploaded: number;
80
- deleted: number;
81
- }> {
82
- console.log("📤 Starting incremental upload...");
83
-
84
- await this.vectorStore.initialize();
85
-
86
- const { toUpload, toDelete } = await this.getItemsToUpload(
87
- embeddingsFile,
88
- force,
89
- );
90
-
91
- console.log(`\n📊 Need to upload: ${toUpload.length} documents`);
92
- console.log(` Need to delete: ${toDelete.length} files`);
93
-
94
- if (toUpload.length === 0 && toDelete.length === 0) {
95
- console.log("\n✨ No changes detected.");
96
- return { uploaded: 0, deleted: 0 };
97
- }
98
-
99
- if (toDelete.length > 0) {
100
- await this.vectorStore.deleteBySourceFile(toDelete);
101
- console.log(` 🗑️ Deleted ${toDelete.length} obsolete documents`);
102
- }
103
-
104
- if (toUpload.length > 0) {
105
- const documents = toUpload.map((e) => this.chunkToDocument(e));
106
-
107
- const batchSize = 50;
108
- for (let i = 0; i < documents.length; i += batchSize) {
109
- const batch = documents.slice(i, i + batchSize);
110
- await this.vectorStore.upsert(batch);
111
- console.log(
112
- ` ✅ Uploaded batch ${Math.floor(i / batchSize) + 1}/${Math.ceil(documents.length / batchSize)}`,
113
- );
114
- }
115
- }
116
-
117
- console.log(`\n✨ Upload complete!`);
118
- console.log(` Uploaded: ${toUpload.length}`);
119
- console.log(` Deleted: ${toDelete.length}`);
120
-
121
- return { uploaded: toUpload.length, deleted: toDelete.length };
122
- }
123
- }
package/src/core/utils.ts DELETED
@@ -1,27 +0,0 @@
1
- import { createHash } from "crypto";
2
-
3
- export function computeContentHash(content: string): string {
4
- return createHash("sha256").update(content).digest("hex").slice(0, 16);
5
- }
6
-
7
- export function sleep(ms: number): Promise<void> {
8
- return new Promise((resolve) => setTimeout(resolve, ms));
9
- }
10
-
11
- export function batchArray<T>(array: T[], batchSize: number): T[][] {
12
- const batches: T[][] = [];
13
- for (let i = 0; i < array.length; i += batchSize) {
14
- batches.push(array.slice(i, i + batchSize));
15
- }
16
- return batches;
17
- }
18
-
19
- export function extractFileName(filePath: string): string {
20
- return filePath.split("/").pop() || filePath;
21
- }
22
-
23
- export function extractDirectory(filePath: string): string {
24
- const parts = filePath.split("/");
25
- parts.pop();
26
- return parts.join("/");
27
- }
@@ -1,31 +0,0 @@
1
- import { describe, it, expect, vi } from "vitest";
2
- import { createChunker } from "./create-chunker.js";
3
-
4
- describe("createChunker", () => {
5
- it("should create a chunker with given options", () => {
6
- const mockProcess = vi.fn().mockResolvedValue([]);
7
-
8
- const chunker = createChunker({
9
- name: "test-chunker",
10
- patterns: ["**/*.txt"],
11
- process: mockProcess,
12
- });
13
-
14
- expect(chunker.name).toBe("test-chunker");
15
- expect(chunker.patterns).toEqual(["**/*.txt"]);
16
- expect(chunker.chunk).toBeDefined();
17
- });
18
-
19
- it("should have canProcess method when provided", () => {
20
- const mockCanProcess = vi.fn().mockResolvedValue(true);
21
-
22
- const chunker = createChunker({
23
- name: "test",
24
- patterns: ["**/*.txt"],
25
- process: vi.fn().mockResolvedValue([]),
26
- canProcess: mockCanProcess,
27
- });
28
-
29
- expect(chunker.canProcess).toBeDefined();
30
- });
31
- });
@@ -1,40 +0,0 @@
1
- import { FileChunker, Chunk } from "../interfaces/index.js";
2
-
3
- export interface CreateChunkerOptions {
4
- name: string;
5
- patterns: string[];
6
- process: (
7
- content: string,
8
- filePath: string,
9
- commitHash: string,
10
- ) => Promise<Chunk[]>;
11
- canProcess?: (filePath: string, content?: string) => Promise<boolean>;
12
- }
13
-
14
- export function createChunker(options: CreateChunkerOptions): FileChunker {
15
- return {
16
- name: options.name,
17
- patterns: options.patterns,
18
-
19
- async chunk(filePath: string, commitHash: string): Promise<Chunk[]> {
20
- const { readFile } = await import("fs/promises");
21
- const content = await readFile(filePath, "utf-8");
22
-
23
- if (options.canProcess) {
24
- const canProcess = await options.canProcess(filePath, content);
25
- if (!canProcess) {
26
- return [];
27
- }
28
- }
29
-
30
- return options.process(content, filePath, commitHash);
31
- },
32
-
33
- async canProcess(filePath: string, content?: string): Promise<boolean> {
34
- if (options.canProcess) {
35
- return options.canProcess(filePath, content);
36
- }
37
- return true;
38
- },
39
- };
40
- }
package/src/index.test.ts DELETED
@@ -1,33 +0,0 @@
1
- import { describe, it, expect } from "vitest";
2
-
3
- describe("@vivantel/rag-core", () => {
4
- it("should export all public interfaces", async () => {
5
- const module = await import("./index.js");
6
-
7
- // Core
8
- expect(module.GitTracker).toBeDefined();
9
- expect(module.ChunkProcessor).toBeDefined();
10
- expect(module.EmbedderProcessor).toBeDefined();
11
- expect(module.Uploader).toBeDefined();
12
- expect(module.Orchestrator).toBeDefined();
13
-
14
- // Utils
15
- expect(module.computeContentHash).toBeDefined();
16
- expect(module.sleep).toBeDefined();
17
- expect(module.batchArray).toBeDefined();
18
- expect(module.extractFileName).toBeDefined();
19
- expect(module.extractDirectory).toBeDefined();
20
-
21
- // Strategies
22
- expect(module.tokenStrategy).toBeDefined();
23
- expect(module.markdownHeadersStrategy).toBeDefined();
24
- expect(module.semanticStrategy).toBeDefined();
25
- expect(module.wholeFileStrategy).toBeDefined();
26
-
27
- // Helpers
28
- expect(module.createChunker).toBeDefined();
29
-
30
- // Config loader
31
- expect(module.loadConfig).toBeDefined();
32
- });
33
- });
package/src/index.ts DELETED
@@ -1,30 +0,0 @@
1
- // Interfaces
2
- export * from "./interfaces/index.js";
3
-
4
- // Core
5
- export { GitTracker } from "./core/git-tracker.js";
6
- export { ChunkProcessor } from "./core/chunk-processor.js";
7
- export { EmbedderProcessor } from "./core/embedder.js";
8
- export { Uploader } from "./core/uploader.js";
9
- export { Orchestrator, RAGPipelineConfig } from "./core/orchestrator.js";
10
- export {
11
- computeContentHash,
12
- sleep,
13
- batchArray,
14
- extractFileName,
15
- extractDirectory,
16
- } from "./core/utils.js";
17
-
18
- // Strategies
19
- export {
20
- tokenStrategy,
21
- markdownHeadersStrategy,
22
- semanticStrategy,
23
- wholeFileStrategy,
24
- } from "./strategies/chunk/index.js";
25
-
26
- // Helpers
27
- export { createChunker } from "./helpers/create-chunker.js";
28
-
29
- // Config loader
30
- export { loadConfig } from "./config-loader.js";
@@ -1,59 +0,0 @@
1
- /**
2
- * Chunk interfaces - core building blocks for document processing
3
- */
4
-
5
- export interface Chunk {
6
- /** The actual text content of the chunk */
7
- content: string;
8
-
9
- /** Metadata about this chunk (source file, type, etc.) */
10
- metadata: Record<string, unknown>;
11
-
12
- /** Original source file path */
13
- sourceFile: string;
14
-
15
- /** Git commit hash when this chunk was generated */
16
- commitHash: string;
17
-
18
- /** Optional unique hash of the content (for change detection) */
19
- contentHash?: string;
20
- }
21
-
22
- export interface FileChunker {
23
- /** Unique name of this chunker */
24
- name: string;
25
-
26
- /** Glob patterns this chunker handles */
27
- patterns: string[];
28
-
29
- /**
30
- * Process a file and return chunks.
31
- * Returns empty array if file should be skipped.
32
- */
33
- chunk(filePath: string, commitHash: string): Promise<Chunk[]>;
34
-
35
- /**
36
- * Optional: validate if this chunker can process the file
37
- * (called before chunk() to filter early)
38
- */
39
- canProcess?(filePath: string, content?: string): Promise<boolean>;
40
- }
41
-
42
- export interface ChunkStrategy {
43
- /** Strategy name */
44
- name: string;
45
-
46
- /** Split text into chunks according to strategy */
47
- chunk(text: string, filePath?: string): Promise<Chunk[]>;
48
-
49
- /** Optional: extract metadata without full chunking */
50
- extractMetadata?(text: string, filePath?: string): Record<string, unknown>;
51
- }
52
-
53
- export interface ChunkTransformer {
54
- /** Transformer name */
55
- name: string;
56
-
57
- /** Transform a chunk (return null to skip) */
58
- transform(chunk: Chunk): Promise<Chunk | null>;
59
- }
@@ -1,36 +0,0 @@
1
- /**
2
- * Embedding provider interfaces
3
- */
4
-
5
- import { Chunk } from "./chunker.js";
6
-
7
- export interface EmbeddingProvider {
8
- /** Provider name (e.g., 'github-models', 'openai') */
9
- readonly name: string;
10
-
11
- /** Embedding vector dimensions */
12
- readonly dimensions: number;
13
-
14
- /** Maximum tokens per request (optional) */
15
- readonly maxTokens?: number;
16
-
17
- /** Convert text to embedding vector */
18
- embed(text: string): Promise<number[]>;
19
-
20
- /** Batch convert (optional, for performance) */
21
- embedBatch?(texts: string[]): Promise<number[][]>;
22
-
23
- /** Check if provider is available (e.g., valid API key) */
24
- healthCheck?(): Promise<boolean>;
25
- }
26
-
27
- export interface EmbeddingConfig {
28
- provider: EmbeddingProvider;
29
- batchSize?: number;
30
- rateLimitMs?: number;
31
- }
32
-
33
- export interface EmbeddedChunk extends Chunk {
34
- embedding: number[];
35
- embeddedAt: number;
36
- }
@@ -1,9 +0,0 @@
1
- import { describe, it, expect } from "vitest";
2
-
3
- describe("Interfaces", () => {
4
- it("should export types correctly", async () => {
5
- const module = await import("./index.js");
6
-
7
- expect(module).toBeDefined();
8
- });
9
- });
@@ -1,3 +0,0 @@
1
- export * from "./chunker.js";
2
- export * from "./embedder.js";
3
- export * from "./vector-store.js";
@@ -1,71 +0,0 @@
1
- /**
2
- * Vector store interfaces
3
- */
4
-
5
- export interface VectorDocument {
6
- /** Unique ID (optional, auto-generated if not provided) */
7
- id?: string;
8
-
9
- /** Original text content */
10
- content: string;
11
-
12
- /** Metadata for filtering */
13
- metadata: Record<string, unknown>;
14
-
15
- /** Embedding vector */
16
- embedding: number[];
17
-
18
- /** Source file path (for tracking updates) */
19
- sourceFile: string;
20
-
21
- /** Git commit hash (for change detection) */
22
- commitHash: string;
23
-
24
- /** Content hash (for change detection) */
25
- contentHash: string;
26
-
27
- /** Collection name (for multi-collection stores) */
28
- collection?: string;
29
- }
30
-
31
- export interface VectorSearchResult {
32
- id: string;
33
- content: string;
34
- metadata: Record<string, unknown>;
35
- similarity: number;
36
- }
37
-
38
- export interface VectorStore {
39
- /** Store name */
40
- readonly name: string;
41
-
42
- /** Initialize store (create tables, indexes, etc.) */
43
- initialize(): Promise<void>;
44
-
45
- /** Insert or update documents */
46
- upsert(documents: VectorDocument[]): Promise<void>;
47
-
48
- /** Delete documents by source file */
49
- deleteBySourceFile(sourceFiles: string[]): Promise<void>;
50
-
51
- /** Get current state (sourceFile → commitHash) for change detection */
52
- getCurrentState(collection?: string): Promise<Map<string, string>>;
53
-
54
- /** Search by embedding vector */
55
- search(
56
- queryEmbedding: number[],
57
- topK: number,
58
- collection?: string,
59
- ): Promise<VectorSearchResult[]>;
60
-
61
- /** Optional: delete entire collection */
62
- deleteCollection?(collection: string): Promise<void>;
63
-
64
- /** Optional: get store statistics */
65
- getStats?(): Promise<{ documentCount: number; collections: string[] }>;
66
- }
67
-
68
- export interface VectorStoreConfig {
69
- provider: VectorStore;
70
- collection?: string;
71
- }
@@ -1,4 +0,0 @@
1
- export { tokenStrategy } from "./token.js";
2
- export { markdownHeadersStrategy } from "./markdown-headers.js";
3
- export { semanticStrategy } from "./semantic.js";
4
- export { wholeFileStrategy } from "./whole-file.js";
@@ -1,37 +0,0 @@
1
- import { describe, it, expect } from "vitest";
2
- import { markdownHeadersStrategy } from "./markdown-headers.js";
3
-
4
- describe("markdownHeadersStrategy", () => {
5
- const strategy = markdownHeadersStrategy({ minChunkSize: 10 });
6
-
7
- it("should have correct name", () => {
8
- expect(strategy.name).toBe("markdown-headers");
9
- });
10
-
11
- it("should split by headers", async () => {
12
- const text = `# Header 1
13
- Content for header 1.
14
-
15
- ## Header 2
16
- Content for header 2.
17
-
18
- ### Header 3
19
- Content for header 3.`;
20
-
21
- const chunks = await strategy.chunk(text);
22
-
23
- expect(chunks.length).toBeGreaterThan(0);
24
-
25
- for (const chunk of chunks) {
26
- expect(chunk.metadata.header).toBeDefined();
27
- expect(chunk.metadata.header_level).toBeDefined();
28
- }
29
- });
30
-
31
- it("should handle text without headers", async () => {
32
- const text = "Plain text without any markdown headers.";
33
- const chunks = await strategy.chunk(text);
34
-
35
- expect(Array.isArray(chunks)).toBe(true);
36
- });
37
- });