@vivantel/rag-core 0.1.0 → 1.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/README.md +62 -62
  2. package/dist/config-loader.d.ts.map +1 -1
  3. package/dist/config-loader.js +0 -2
  4. package/dist/config-loader.js.map +1 -1
  5. package/dist/core/chunk-processor.d.ts.map +1 -1
  6. package/dist/core/chunk-processor.js +27 -20
  7. package/dist/core/chunk-processor.js.map +1 -1
  8. package/dist/core/embedder.d.ts.map +1 -1
  9. package/dist/core/embedder.js +10 -3
  10. package/dist/core/embedder.js.map +1 -1
  11. package/dist/core/git-tracker.d.ts.map +1 -1
  12. package/dist/core/git-tracker.js +9 -59
  13. package/dist/core/git-tracker.js.map +1 -1
  14. package/dist/core/orchestrator.d.ts.map +1 -1
  15. package/dist/core/orchestrator.js +22 -1
  16. package/dist/core/orchestrator.js.map +1 -1
  17. package/dist/core/uploader.d.ts.map +1 -1
  18. package/dist/core/uploader.js +13 -4
  19. package/dist/core/uploader.js.map +1 -1
  20. package/dist/strategies/chunk/token.js +1 -1
  21. package/dist/strategies/chunk/token.js.map +1 -1
  22. package/package.json +110 -102
  23. package/.github/config/release-please.json +0 -38
  24. package/.github/dependabot.yaml +0 -28
  25. package/.github/workflows/ci.yaml +0 -119
  26. package/.github/workflows/publish.yaml +0 -151
  27. package/.github/workflows/release.yaml +0 -150
  28. package/.versionrc.json +0 -19
  29. package/CHANGELOG.md +0 -21
  30. package/bin/rag-update.ts +0 -49
  31. package/eslint.config.js +0 -25
  32. package/src/config-loader.ts +0 -21
  33. package/src/core/chunk-processor.test.ts +0 -36
  34. package/src/core/chunk-processor.ts +0 -92
  35. package/src/core/embedder.ts +0 -189
  36. package/src/core/git-tracker.test.ts +0 -64
  37. package/src/core/git-tracker.ts +0 -202
  38. package/src/core/orchestrator.test.ts +0 -53
  39. package/src/core/orchestrator.ts +0 -97
  40. package/src/core/uploader.ts +0 -123
  41. package/src/core/utils.ts +0 -27
  42. package/src/helpers/create-chunker.test.ts +0 -31
  43. package/src/helpers/create-chunker.ts +0 -40
  44. package/src/index.test.ts +0 -33
  45. package/src/index.ts +0 -30
  46. package/src/interfaces/chunker.ts +0 -59
  47. package/src/interfaces/embedder.ts +0 -36
  48. package/src/interfaces/index.test.ts +0 -9
  49. package/src/interfaces/index.ts +0 -3
  50. package/src/interfaces/vector-store.ts +0 -71
  51. package/src/strategies/chunk/index.ts +0 -4
  52. package/src/strategies/chunk/markdown-headers.test.ts +0 -37
  53. package/src/strategies/chunk/markdown-headers.ts +0 -106
  54. package/src/strategies/chunk/semantic.test.ts +0 -21
  55. package/src/strategies/chunk/semantic.ts +0 -80
  56. package/src/strategies/chunk/token.test.ts +0 -41
  57. package/src/strategies/chunk/token.ts +0 -72
  58. package/src/strategies/chunk/whole-file.test.ts +0 -24
  59. package/src/strategies/chunk/whole-file.ts +0 -35
  60. package/tsconfig.json +0 -21
  61. package/typedoc.json +0 -11
  62. package/vitest.config.ts +0 -19
@@ -1,36 +0,0 @@
1
- /**
2
- * Embedding provider interfaces
3
- */
4
-
5
- import { Chunk } from "./chunker.js";
6
-
7
- export interface EmbeddingProvider {
8
- /** Provider name (e.g., 'github-models', 'openai') */
9
- readonly name: string;
10
-
11
- /** Embedding vector dimensions */
12
- readonly dimensions: number;
13
-
14
- /** Maximum tokens per request (optional) */
15
- readonly maxTokens?: number;
16
-
17
- /** Convert text to embedding vector */
18
- embed(text: string): Promise<number[]>;
19
-
20
- /** Batch convert (optional, for performance) */
21
- embedBatch?(texts: string[]): Promise<number[][]>;
22
-
23
- /** Check if provider is available (e.g., valid API key) */
24
- healthCheck?(): Promise<boolean>;
25
- }
26
-
27
- export interface EmbeddingConfig {
28
- provider: EmbeddingProvider;
29
- batchSize?: number;
30
- rateLimitMs?: number;
31
- }
32
-
33
- export interface EmbeddedChunk extends Chunk {
34
- embedding: number[];
35
- embeddedAt: number;
36
- }
@@ -1,9 +0,0 @@
1
- import { describe, it, expect } from "vitest";
2
-
3
- describe("Interfaces", () => {
4
- it("should export types correctly", async () => {
5
- const module = await import("./index.js");
6
-
7
- expect(module).toBeDefined();
8
- });
9
- });
@@ -1,3 +0,0 @@
1
- export * from "./chunker.js";
2
- export * from "./embedder.js";
3
- export * from "./vector-store.js";
@@ -1,71 +0,0 @@
1
- /**
2
- * Vector store interfaces
3
- */
4
-
5
- export interface VectorDocument {
6
- /** Unique ID (optional, auto-generated if not provided) */
7
- id?: string;
8
-
9
- /** Original text content */
10
- content: string;
11
-
12
- /** Metadata for filtering */
13
- metadata: Record<string, unknown>;
14
-
15
- /** Embedding vector */
16
- embedding: number[];
17
-
18
- /** Source file path (for tracking updates) */
19
- sourceFile: string;
20
-
21
- /** Git commit hash (for change detection) */
22
- commitHash: string;
23
-
24
- /** Content hash (for change detection) */
25
- contentHash: string;
26
-
27
- /** Collection name (for multi-collection stores) */
28
- collection?: string;
29
- }
30
-
31
- export interface VectorSearchResult {
32
- id: string;
33
- content: string;
34
- metadata: Record<string, unknown>;
35
- similarity: number;
36
- }
37
-
38
- export interface VectorStore {
39
- /** Store name */
40
- readonly name: string;
41
-
42
- /** Initialize store (create tables, indexes, etc.) */
43
- initialize(): Promise<void>;
44
-
45
- /** Insert or update documents */
46
- upsert(documents: VectorDocument[]): Promise<void>;
47
-
48
- /** Delete documents by source file */
49
- deleteBySourceFile(sourceFiles: string[]): Promise<void>;
50
-
51
- /** Get current state (sourceFile → commitHash) for change detection */
52
- getCurrentState(collection?: string): Promise<Map<string, string>>;
53
-
54
- /** Search by embedding vector */
55
- search(
56
- queryEmbedding: number[],
57
- topK: number,
58
- collection?: string,
59
- ): Promise<VectorSearchResult[]>;
60
-
61
- /** Optional: delete entire collection */
62
- deleteCollection?(collection: string): Promise<void>;
63
-
64
- /** Optional: get store statistics */
65
- getStats?(): Promise<{ documentCount: number; collections: string[] }>;
66
- }
67
-
68
- export interface VectorStoreConfig {
69
- provider: VectorStore;
70
- collection?: string;
71
- }
@@ -1,4 +0,0 @@
1
- export { tokenStrategy } from "./token.js";
2
- export { markdownHeadersStrategy } from "./markdown-headers.js";
3
- export { semanticStrategy } from "./semantic.js";
4
- export { wholeFileStrategy } from "./whole-file.js";
@@ -1,37 +0,0 @@
1
- import { describe, it, expect } from "vitest";
2
- import { markdownHeadersStrategy } from "./markdown-headers.js";
3
-
4
- describe("markdownHeadersStrategy", () => {
5
- const strategy = markdownHeadersStrategy({ minChunkSize: 10 });
6
-
7
- it("should have correct name", () => {
8
- expect(strategy.name).toBe("markdown-headers");
9
- });
10
-
11
- it("should split by headers", async () => {
12
- const text = `# Header 1
13
- Content for header 1.
14
-
15
- ## Header 2
16
- Content for header 2.
17
-
18
- ### Header 3
19
- Content for header 3.`;
20
-
21
- const chunks = await strategy.chunk(text);
22
-
23
- expect(chunks.length).toBeGreaterThan(0);
24
-
25
- for (const chunk of chunks) {
26
- expect(chunk.metadata.header).toBeDefined();
27
- expect(chunk.metadata.header_level).toBeDefined();
28
- }
29
- });
30
-
31
- it("should handle text without headers", async () => {
32
- const text = "Plain text without any markdown headers.";
33
- const chunks = await strategy.chunk(text);
34
-
35
- expect(Array.isArray(chunks)).toBe(true);
36
- });
37
- });
@@ -1,106 +0,0 @@
1
- import { ChunkStrategy, Chunk } from "../../interfaces/index.js";
2
-
3
- export interface MarkdownHeadersOptions {
4
- minChunkSize?: number;
5
- maxChunkSize?: number;
6
- }
7
-
8
- export function markdownHeadersStrategy(
9
- options: MarkdownHeadersOptions = {},
10
- ): ChunkStrategy {
11
- const minChunkSize = options.minChunkSize ?? 100;
12
- const maxChunkSize = options.maxChunkSize ?? 8000;
13
-
14
- return {
15
- name: "markdown-headers",
16
-
17
- async chunk(text: string, filePath?: string): Promise<Chunk[]> {
18
- const chunks: Chunk[] = [];
19
- const lines = text.split("\n");
20
-
21
- let currentChunk: string[] = [];
22
- let currentHeader = "";
23
- let currentHeaderLevel = 0;
24
-
25
- for (const line of lines) {
26
- const headerMatch = line.match(/^(#{1,6})\s+(.+)$/);
27
-
28
- if (headerMatch) {
29
- // Save previous chunk if not empty
30
- if (currentChunk.length > 0) {
31
- const content = currentChunk.join("\n").trim();
32
- if (content.length >= minChunkSize) {
33
- chunks.push({
34
- content,
35
- metadata: {
36
- strategy: this.name,
37
- header: currentHeader,
38
- header_level: currentHeaderLevel,
39
- source_file: filePath,
40
- },
41
- sourceFile: filePath || "unknown",
42
- commitHash: "",
43
- });
44
- }
45
- }
46
-
47
- // Start new chunk
48
- currentHeaderLevel = headerMatch[1].length;
49
- currentHeader = headerMatch[2];
50
- currentChunk = [line];
51
- } else {
52
- currentChunk.push(line);
53
- }
54
-
55
- // Prevent chunks from getting too large
56
- const currentSize = currentChunk.join("\n").length;
57
- if (currentSize > maxChunkSize && currentChunk.length > 10) {
58
- const content = currentChunk.join("\n").trim();
59
- chunks.push({
60
- content,
61
- metadata: {
62
- strategy: this.name,
63
- header: currentHeader,
64
- header_level: currentHeaderLevel,
65
- truncated: true,
66
- },
67
- sourceFile: filePath || "unknown",
68
- commitHash: "",
69
- });
70
- currentChunk = [];
71
- }
72
- }
73
-
74
- // Last chunk
75
- if (currentChunk.length > 0) {
76
- const content = currentChunk.join("\n").trim();
77
- if (content.length >= minChunkSize) {
78
- chunks.push({
79
- content,
80
- metadata: {
81
- strategy: this.name,
82
- header: currentHeader,
83
- header_level: currentHeaderLevel,
84
- source_file: filePath,
85
- is_last: true,
86
- },
87
- sourceFile: filePath || "unknown",
88
- commitHash: "",
89
- });
90
- }
91
- }
92
-
93
- return chunks;
94
- },
95
-
96
- extractMetadata(text: string, _filePath?: string): Record<string, unknown> {
97
- const headerMatch = text.match(/^(#{1,6})\s+(.+)$/m);
98
- return {
99
- strategy: this.name,
100
- has_headers: !!headerMatch,
101
- first_header: headerMatch?.[2],
102
- line_count: text.split("\n").length,
103
- };
104
- },
105
- };
106
- }
@@ -1,21 +0,0 @@
1
- import { describe, it, expect } from "vitest";
2
- import { semanticStrategy } from "./semantic.js";
3
-
4
- describe("semanticStrategy", () => {
5
- const strategy = semanticStrategy({ maxChars: 100, minChars: 10 });
6
-
7
- it("should have correct name", () => {
8
- expect(strategy.name).toBe("semantic");
9
- });
10
-
11
- it("should split by sentences", async () => {
12
- const text = "First sentence. Second sentence! Third sentence? Fourth.";
13
- const chunks = await strategy.chunk(text);
14
-
15
- expect(Array.isArray(chunks)).toBe(true);
16
-
17
- for (const chunk of chunks) {
18
- expect(chunk.metadata.strategy).toBe("semantic");
19
- }
20
- });
21
- });
@@ -1,80 +0,0 @@
1
- import { ChunkStrategy, Chunk } from "../../interfaces/index.js";
2
-
3
- export interface SemanticStrategyOptions {
4
- maxChars?: number;
5
- minChars?: number;
6
- }
7
-
8
- export function semanticStrategy(
9
- options: SemanticStrategyOptions = {},
10
- ): ChunkStrategy {
11
- const maxChars = options.maxChars ?? 2000;
12
- const minChars = options.minChars ?? 100;
13
-
14
- return {
15
- name: "semantic",
16
-
17
- async chunk(text: string, filePath?: string): Promise<Chunk[]> {
18
- const chunks: Chunk[] = [];
19
-
20
- // Split by sentences (simple approach)
21
- const sentences = text.split(/(?<=[.!?])\s+/);
22
-
23
- let currentChunk: string[] = [];
24
- let currentSize = 0;
25
-
26
- for (const sentence of sentences) {
27
- const sentenceSize = sentence.length;
28
-
29
- if (currentSize + sentenceSize > maxChars && currentChunk.length > 0) {
30
- const content = currentChunk.join(" ").trim();
31
- if (content.length >= minChars) {
32
- chunks.push({
33
- content,
34
- metadata: {
35
- strategy: this.name,
36
- sentence_count: currentChunk.length,
37
- source_file: filePath,
38
- },
39
- sourceFile: filePath || "unknown",
40
- commitHash: "",
41
- });
42
- }
43
- currentChunk = [];
44
- currentSize = 0;
45
- }
46
-
47
- currentChunk.push(sentence);
48
- currentSize += sentenceSize;
49
- }
50
-
51
- // Last chunk
52
- if (currentChunk.length > 0) {
53
- const content = currentChunk.join(" ").trim();
54
- if (content.length >= minChars) {
55
- chunks.push({
56
- content,
57
- metadata: {
58
- strategy: this.name,
59
- sentence_count: currentChunk.length,
60
- source_file: filePath,
61
- is_last: true,
62
- },
63
- sourceFile: filePath || "unknown",
64
- commitHash: "",
65
- });
66
- }
67
- }
68
-
69
- return chunks;
70
- },
71
-
72
- extractMetadata(text: string, _filePath?: string): Record<string, unknown> {
73
- return {
74
- strategy: this.name,
75
- sentence_count: text.split(/[.!?]+/).length,
76
- char_count: text.length,
77
- };
78
- },
79
- };
80
- }
@@ -1,41 +0,0 @@
1
- import { describe, it, expect } from "vitest";
2
- import { tokenStrategy } from "./token.js";
3
-
4
- describe.skip("tokenStrategy", () => {
5
- const strategy = tokenStrategy({ maxTokens: 50, overlap: 10 });
6
-
7
- it("should have correct name", () => {
8
- expect(strategy.name).toContain("token");
9
- expect(typeof strategy.name).toBe("string");
10
- });
11
-
12
- it("should chunk text", async () => {
13
- const text = "This is a test sentence. ".repeat(100);
14
- const chunks = await strategy.chunk(text, "test.txt");
15
-
16
- expect(Array.isArray(chunks)).toBe(true);
17
- expect(chunks.length).toBeGreaterThan(0);
18
-
19
- if (chunks.length > 0) {
20
- expect(chunks[0].content).toBeDefined();
21
- expect(typeof chunks[0].content).toBe("string");
22
- expect(chunks[0].metadata).toBeDefined();
23
- expect(chunks[0].metadata.strategy).toBeDefined();
24
- }
25
- });
26
-
27
- it("should extract metadata", () => {
28
- const text = "Test content";
29
- const metadata = strategy.extractMetadata?.(text);
30
-
31
- // extractMetadata is optional, so it might be undefined
32
- if (metadata) {
33
- expect(metadata.strategy).toBe(strategy.name);
34
- expect(metadata.char_count).toBeDefined();
35
- expect(metadata.estimated_tokens).toBeDefined();
36
- } else {
37
- // If extractMetadata is not implemented, just pass
38
- expect(true).toBe(true);
39
- }
40
- });
41
- });
@@ -1,72 +0,0 @@
1
- import { ChunkStrategy, Chunk } from "../../interfaces/index.js";
2
-
3
- export interface TokenStrategyOptions {
4
- maxTokens?: number;
5
- overlap?: number;
6
- }
7
-
8
- /**
9
- * Split text by approximate token count.
10
- * Simple implementation: ~4 chars per token for English.
11
- * For production, use a proper tokenizer (tiktoken, etc.)
12
- */
13
- export function tokenStrategy(
14
- options: TokenStrategyOptions = {},
15
- ): ChunkStrategy {
16
- const maxTokens = options.maxTokens ?? 500;
17
- const overlap = options.overlap ?? 50;
18
- const charsPerToken = 4;
19
- const maxChars = maxTokens * charsPerToken;
20
- const overlapChars = overlap * charsPerToken;
21
-
22
- return {
23
- name: `token-${maxTokens}`,
24
-
25
- async chunk(text: string, filePath?: string): Promise<Chunk[]> {
26
- const chunks: Chunk[] = [];
27
- let start = 0;
28
-
29
- while (start < text.length) {
30
- let end = Math.min(start + maxChars, text.length);
31
-
32
- // Try to break at sentence boundary
33
- if (end < text.length) {
34
- const lastPeriod = text.lastIndexOf(".", end);
35
- const lastNewline = text.lastIndexOf("\n", end);
36
- const breakPoint = Math.max(lastPeriod, lastNewline);
37
- if (breakPoint > start) {
38
- end = breakPoint + 1;
39
- }
40
- }
41
-
42
- const content = text.slice(start, end).trim();
43
- if (content) {
44
- chunks.push({
45
- content,
46
- metadata: {
47
- strategy: this.name,
48
- chunk_index: chunks.length,
49
- source_file: filePath,
50
- start_char: start,
51
- end_char: end,
52
- },
53
- sourceFile: filePath || "unknown",
54
- commitHash: "", // Will be filled by caller
55
- });
56
- }
57
-
58
- start = end - overlapChars;
59
- }
60
-
61
- return chunks;
62
- },
63
-
64
- extractMetadata(text: string, _filePath?: string): Record<string, unknown> {
65
- return {
66
- strategy: this.name,
67
- char_count: text.length,
68
- estimated_tokens: Math.ceil(text.length / charsPerToken),
69
- };
70
- },
71
- };
72
- }
@@ -1,24 +0,0 @@
1
- import { describe, it, expect } from "vitest";
2
- import { wholeFileStrategy } from "./whole-file.js";
3
-
4
- describe("wholeFileStrategy", () => {
5
- const strategy = wholeFileStrategy();
6
-
7
- it("should have correct name", () => {
8
- expect(strategy.name).toBe("whole-file");
9
- });
10
-
11
- it("should return single chunk", async () => {
12
- const text = "Complete file content.";
13
- const chunks = await strategy.chunk(text);
14
-
15
- expect(chunks).toHaveLength(1);
16
- expect(chunks[0].content).toBe(text);
17
- expect(chunks[0].metadata.strategy).toBe("whole-file");
18
- });
19
-
20
- it("should return empty array for empty text", async () => {
21
- const chunks = await strategy.chunk("");
22
- expect(chunks).toHaveLength(0);
23
- });
24
- });
@@ -1,35 +0,0 @@
1
- import { ChunkStrategy, Chunk } from "../../interfaces/index.js";
2
-
3
- export function wholeFileStrategy(): ChunkStrategy {
4
- return {
5
- name: "whole-file",
6
-
7
- async chunk(text: string, filePath?: string): Promise<Chunk[]> {
8
- if (!text || text.trim().length === 0) {
9
- return [];
10
- }
11
-
12
- return [
13
- {
14
- content: text,
15
- metadata: {
16
- strategy: this.name,
17
- source_file: filePath,
18
- char_count: text.length,
19
- line_count: text.split("\n").length,
20
- },
21
- sourceFile: filePath || "unknown",
22
- commitHash: "",
23
- },
24
- ];
25
- },
26
-
27
- extractMetadata(text: string, _filePath?: string): Record<string, unknown> {
28
- return {
29
- strategy: this.name,
30
- char_count: text.length,
31
- line_count: text.split("\n").length,
32
- };
33
- },
34
- };
35
- }
package/tsconfig.json DELETED
@@ -1,21 +0,0 @@
1
- {
2
- "compilerOptions": {
3
- "target": "ES2022",
4
- "module": "NodeNext",
5
- "moduleResolution": "NodeNext",
6
- "lib": ["ES2022"],
7
- "outDir": "./dist",
8
- "rootDir": "./src",
9
- "declaration": true,
10
- "declarationMap": true,
11
- "sourceMap": true,
12
- "strict": true,
13
- "esModuleInterop": true,
14
- "skipLibCheck": true,
15
- "forceConsistentCasingInFileNames": true,
16
- "resolveJsonModule": true,
17
- "types": ["node"]
18
- },
19
- "include": ["src/**/*"],
20
- "exclude": ["node_modules", "dist", "**/*.test.ts"]
21
- }
package/typedoc.json DELETED
@@ -1,11 +0,0 @@
1
- {
2
- "entryPoints": ["src/index.ts"],
3
- "out": "docs/api",
4
- "excludePrivate": true,
5
- "excludeProtected": true,
6
- "skipErrorChecking": true,
7
- "validation": {
8
- "invalidLink": false,
9
- "notExported": false
10
- }
11
- }
package/vitest.config.ts DELETED
@@ -1,19 +0,0 @@
1
- import { defineConfig } from 'vitest/config';
2
-
3
- export default defineConfig({
4
- test: {
5
- maxWorkers: 4,
6
- isolate: false,
7
-
8
- coverage: {
9
- provider: 'v8',
10
- reporter: ['text', 'json', 'html'],
11
- include: ['src/**/*.ts'],
12
- exclude: ['src/**/*.test.ts'],
13
- },
14
- exclude: ['node_modules', '.git'],
15
- globals: true,
16
- testTimeout: 10000,
17
- environment: 'node',
18
- },
19
- });