@vivantel/rag-core 1.1.2 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. package/dist/bin/rag-update.d.ts +3 -0
  2. package/dist/bin/rag-update.d.ts.map +1 -0
  3. package/dist/bin/rag-update.js +116 -0
  4. package/dist/bin/rag-update.js.map +1 -0
  5. package/dist/cli/init.d.ts +2 -0
  6. package/dist/cli/init.d.ts.map +1 -0
  7. package/dist/cli/init.js +262 -0
  8. package/dist/cli/init.js.map +1 -0
  9. package/dist/cli/validate.d.ts +2 -0
  10. package/dist/cli/validate.d.ts.map +1 -0
  11. package/dist/cli/validate.js +54 -0
  12. package/dist/cli/validate.js.map +1 -0
  13. package/dist/config-loader.d.ts.map +1 -1
  14. package/dist/config-loader.js +73 -7
  15. package/dist/config-loader.js.map +1 -1
  16. package/dist/core/chunk-processor.d.ts +1 -1
  17. package/dist/core/chunk-processor.d.ts.map +1 -1
  18. package/dist/core/chunk-processor.js +50 -21
  19. package/dist/core/chunk-processor.js.map +1 -1
  20. package/dist/core/embedder.d.ts +5 -1
  21. package/dist/core/embedder.d.ts.map +1 -1
  22. package/dist/core/embedder.js +40 -29
  23. package/dist/core/embedder.js.map +1 -1
  24. package/dist/core/errors.d.ts +16 -0
  25. package/dist/core/errors.d.ts.map +1 -0
  26. package/dist/core/errors.js +17 -0
  27. package/dist/core/errors.js.map +1 -0
  28. package/dist/core/git-tracker.d.ts.map +1 -1
  29. package/dist/core/git-tracker.js +9 -59
  30. package/dist/core/git-tracker.js.map +1 -1
  31. package/dist/core/orchestrator.d.ts +8 -0
  32. package/dist/core/orchestrator.d.ts.map +1 -1
  33. package/dist/core/orchestrator.js +153 -37
  34. package/dist/core/orchestrator.js.map +1 -1
  35. package/dist/core/plugin-discovery.d.ts +19 -0
  36. package/dist/core/plugin-discovery.d.ts.map +1 -0
  37. package/dist/core/plugin-discovery.js +47 -0
  38. package/dist/core/plugin-discovery.js.map +1 -0
  39. package/dist/core/telemetry.d.ts +61 -0
  40. package/dist/core/telemetry.d.ts.map +1 -0
  41. package/dist/core/telemetry.js +50 -0
  42. package/dist/core/telemetry.js.map +1 -0
  43. package/dist/core/uploader.d.ts +5 -1
  44. package/dist/core/uploader.d.ts.map +1 -1
  45. package/dist/core/uploader.js +23 -7
  46. package/dist/core/uploader.js.map +1 -1
  47. package/dist/core/utils.d.ts +7 -0
  48. package/dist/core/utils.d.ts.map +1 -1
  49. package/dist/core/utils.js +35 -0
  50. package/dist/core/utils.js.map +1 -1
  51. package/dist/index.d.ts +14 -1
  52. package/dist/index.d.ts.map +1 -1
  53. package/dist/index.js +14 -2
  54. package/dist/index.js.map +1 -1
  55. package/dist/interfaces/embedder.d.ts +2 -0
  56. package/dist/interfaces/embedder.d.ts.map +1 -1
  57. package/dist/interfaces/vector-store.d.ts +2 -0
  58. package/dist/interfaces/vector-store.d.ts.map +1 -1
  59. package/dist/strategies/chunk/token.js +1 -1
  60. package/dist/strategies/chunk/token.js.map +1 -1
  61. package/package.json +12 -2
  62. package/.github/config/release-please.json +0 -38
  63. package/.github/dependabot.yaml +0 -28
  64. package/.github/workflows/ci.yaml +0 -119
  65. package/.github/workflows/publish.yaml +0 -155
  66. package/.github/workflows/release.yaml +0 -54
  67. package/.release-please-manifest.json +0 -3
  68. package/.versionrc.json +0 -19
  69. package/CHANGELOG.md +0 -51
  70. package/bin/rag-update.ts +0 -49
  71. package/eslint.config.js +0 -25
  72. package/src/config-loader.ts +0 -21
  73. package/src/core/chunk-processor.test.ts +0 -36
  74. package/src/core/chunk-processor.ts +0 -92
  75. package/src/core/embedder.ts +0 -189
  76. package/src/core/git-tracker.test.ts +0 -64
  77. package/src/core/git-tracker.ts +0 -202
  78. package/src/core/orchestrator.test.ts +0 -53
  79. package/src/core/orchestrator.ts +0 -97
  80. package/src/core/uploader.ts +0 -123
  81. package/src/core/utils.ts +0 -27
  82. package/src/helpers/create-chunker.test.ts +0 -31
  83. package/src/helpers/create-chunker.ts +0 -40
  84. package/src/index.test.ts +0 -33
  85. package/src/index.ts +0 -30
  86. package/src/interfaces/chunker.ts +0 -59
  87. package/src/interfaces/embedder.ts +0 -36
  88. package/src/interfaces/index.test.ts +0 -9
  89. package/src/interfaces/index.ts +0 -3
  90. package/src/interfaces/vector-store.ts +0 -71
  91. package/src/strategies/chunk/index.ts +0 -4
  92. package/src/strategies/chunk/markdown-headers.test.ts +0 -37
  93. package/src/strategies/chunk/markdown-headers.ts +0 -106
  94. package/src/strategies/chunk/semantic.test.ts +0 -21
  95. package/src/strategies/chunk/semantic.ts +0 -80
  96. package/src/strategies/chunk/token.test.ts +0 -41
  97. package/src/strategies/chunk/token.ts +0 -72
  98. package/src/strategies/chunk/whole-file.test.ts +0 -24
  99. package/src/strategies/chunk/whole-file.ts +0 -35
  100. package/tsconfig.json +0 -21
  101. package/typedoc.json +0 -11
  102. package/vitest.config.ts +0 -19
@@ -1,106 +0,0 @@
1
- import { ChunkStrategy, Chunk } from "../../interfaces/index.js";
2
-
3
- export interface MarkdownHeadersOptions {
4
- minChunkSize?: number;
5
- maxChunkSize?: number;
6
- }
7
-
8
- export function markdownHeadersStrategy(
9
- options: MarkdownHeadersOptions = {},
10
- ): ChunkStrategy {
11
- const minChunkSize = options.minChunkSize ?? 100;
12
- const maxChunkSize = options.maxChunkSize ?? 8000;
13
-
14
- return {
15
- name: "markdown-headers",
16
-
17
- async chunk(text: string, filePath?: string): Promise<Chunk[]> {
18
- const chunks: Chunk[] = [];
19
- const lines = text.split("\n");
20
-
21
- let currentChunk: string[] = [];
22
- let currentHeader = "";
23
- let currentHeaderLevel = 0;
24
-
25
- for (const line of lines) {
26
- const headerMatch = line.match(/^(#{1,6})\s+(.+)$/);
27
-
28
- if (headerMatch) {
29
- // Save previous chunk if not empty
30
- if (currentChunk.length > 0) {
31
- const content = currentChunk.join("\n").trim();
32
- if (content.length >= minChunkSize) {
33
- chunks.push({
34
- content,
35
- metadata: {
36
- strategy: this.name,
37
- header: currentHeader,
38
- header_level: currentHeaderLevel,
39
- source_file: filePath,
40
- },
41
- sourceFile: filePath || "unknown",
42
- commitHash: "",
43
- });
44
- }
45
- }
46
-
47
- // Start new chunk
48
- currentHeaderLevel = headerMatch[1].length;
49
- currentHeader = headerMatch[2];
50
- currentChunk = [line];
51
- } else {
52
- currentChunk.push(line);
53
- }
54
-
55
- // Prevent chunks from getting too large
56
- const currentSize = currentChunk.join("\n").length;
57
- if (currentSize > maxChunkSize && currentChunk.length > 10) {
58
- const content = currentChunk.join("\n").trim();
59
- chunks.push({
60
- content,
61
- metadata: {
62
- strategy: this.name,
63
- header: currentHeader,
64
- header_level: currentHeaderLevel,
65
- truncated: true,
66
- },
67
- sourceFile: filePath || "unknown",
68
- commitHash: "",
69
- });
70
- currentChunk = [];
71
- }
72
- }
73
-
74
- // Last chunk
75
- if (currentChunk.length > 0) {
76
- const content = currentChunk.join("\n").trim();
77
- if (content.length >= minChunkSize) {
78
- chunks.push({
79
- content,
80
- metadata: {
81
- strategy: this.name,
82
- header: currentHeader,
83
- header_level: currentHeaderLevel,
84
- source_file: filePath,
85
- is_last: true,
86
- },
87
- sourceFile: filePath || "unknown",
88
- commitHash: "",
89
- });
90
- }
91
- }
92
-
93
- return chunks;
94
- },
95
-
96
- extractMetadata(text: string, _filePath?: string): Record<string, unknown> {
97
- const headerMatch = text.match(/^(#{1,6})\s+(.+)$/m);
98
- return {
99
- strategy: this.name,
100
- has_headers: !!headerMatch,
101
- first_header: headerMatch?.[2],
102
- line_count: text.split("\n").length,
103
- };
104
- },
105
- };
106
- }
@@ -1,21 +0,0 @@
1
- import { describe, it, expect } from "vitest";
2
- import { semanticStrategy } from "./semantic.js";
3
-
4
- describe("semanticStrategy", () => {
5
- const strategy = semanticStrategy({ maxChars: 100, minChars: 10 });
6
-
7
- it("should have correct name", () => {
8
- expect(strategy.name).toBe("semantic");
9
- });
10
-
11
- it("should split by sentences", async () => {
12
- const text = "First sentence. Second sentence! Third sentence? Fourth.";
13
- const chunks = await strategy.chunk(text);
14
-
15
- expect(Array.isArray(chunks)).toBe(true);
16
-
17
- for (const chunk of chunks) {
18
- expect(chunk.metadata.strategy).toBe("semantic");
19
- }
20
- });
21
- });
@@ -1,80 +0,0 @@
1
- import { ChunkStrategy, Chunk } from "../../interfaces/index.js";
2
-
3
- export interface SemanticStrategyOptions {
4
- maxChars?: number;
5
- minChars?: number;
6
- }
7
-
8
- export function semanticStrategy(
9
- options: SemanticStrategyOptions = {},
10
- ): ChunkStrategy {
11
- const maxChars = options.maxChars ?? 2000;
12
- const minChars = options.minChars ?? 100;
13
-
14
- return {
15
- name: "semantic",
16
-
17
- async chunk(text: string, filePath?: string): Promise<Chunk[]> {
18
- const chunks: Chunk[] = [];
19
-
20
- // Split by sentences (simple approach)
21
- const sentences = text.split(/(?<=[.!?])\s+/);
22
-
23
- let currentChunk: string[] = [];
24
- let currentSize = 0;
25
-
26
- for (const sentence of sentences) {
27
- const sentenceSize = sentence.length;
28
-
29
- if (currentSize + sentenceSize > maxChars && currentChunk.length > 0) {
30
- const content = currentChunk.join(" ").trim();
31
- if (content.length >= minChars) {
32
- chunks.push({
33
- content,
34
- metadata: {
35
- strategy: this.name,
36
- sentence_count: currentChunk.length,
37
- source_file: filePath,
38
- },
39
- sourceFile: filePath || "unknown",
40
- commitHash: "",
41
- });
42
- }
43
- currentChunk = [];
44
- currentSize = 0;
45
- }
46
-
47
- currentChunk.push(sentence);
48
- currentSize += sentenceSize;
49
- }
50
-
51
- // Last chunk
52
- if (currentChunk.length > 0) {
53
- const content = currentChunk.join(" ").trim();
54
- if (content.length >= minChars) {
55
- chunks.push({
56
- content,
57
- metadata: {
58
- strategy: this.name,
59
- sentence_count: currentChunk.length,
60
- source_file: filePath,
61
- is_last: true,
62
- },
63
- sourceFile: filePath || "unknown",
64
- commitHash: "",
65
- });
66
- }
67
- }
68
-
69
- return chunks;
70
- },
71
-
72
- extractMetadata(text: string, _filePath?: string): Record<string, unknown> {
73
- return {
74
- strategy: this.name,
75
- sentence_count: text.split(/[.!?]+/).length,
76
- char_count: text.length,
77
- };
78
- },
79
- };
80
- }
@@ -1,41 +0,0 @@
1
- import { describe, it, expect } from "vitest";
2
- import { tokenStrategy } from "./token.js";
3
-
4
- describe.skip("tokenStrategy", () => {
5
- const strategy = tokenStrategy({ maxTokens: 50, overlap: 10 });
6
-
7
- it("should have correct name", () => {
8
- expect(strategy.name).toContain("token");
9
- expect(typeof strategy.name).toBe("string");
10
- });
11
-
12
- it("should chunk text", async () => {
13
- const text = "This is a test sentence. ".repeat(100);
14
- const chunks = await strategy.chunk(text, "test.txt");
15
-
16
- expect(Array.isArray(chunks)).toBe(true);
17
- expect(chunks.length).toBeGreaterThan(0);
18
-
19
- if (chunks.length > 0) {
20
- expect(chunks[0].content).toBeDefined();
21
- expect(typeof chunks[0].content).toBe("string");
22
- expect(chunks[0].metadata).toBeDefined();
23
- expect(chunks[0].metadata.strategy).toBeDefined();
24
- }
25
- });
26
-
27
- it("should extract metadata", () => {
28
- const text = "Test content";
29
- const metadata = strategy.extractMetadata?.(text);
30
-
31
- // extractMetadata is optional, so it might be undefined
32
- if (metadata) {
33
- expect(metadata.strategy).toBe(strategy.name);
34
- expect(metadata.char_count).toBeDefined();
35
- expect(metadata.estimated_tokens).toBeDefined();
36
- } else {
37
- // If extractMetadata is not implemented, just pass
38
- expect(true).toBe(true);
39
- }
40
- });
41
- });
@@ -1,72 +0,0 @@
1
- import { ChunkStrategy, Chunk } from "../../interfaces/index.js";
2
-
3
- export interface TokenStrategyOptions {
4
- maxTokens?: number;
5
- overlap?: number;
6
- }
7
-
8
- /**
9
- * Split text by approximate token count.
10
- * Simple implementation: ~4 chars per token for English.
11
- * For production, use a proper tokenizer (tiktoken, etc.)
12
- */
13
- export function tokenStrategy(
14
- options: TokenStrategyOptions = {},
15
- ): ChunkStrategy {
16
- const maxTokens = options.maxTokens ?? 500;
17
- const overlap = options.overlap ?? 50;
18
- const charsPerToken = 4;
19
- const maxChars = maxTokens * charsPerToken;
20
- const overlapChars = overlap * charsPerToken;
21
-
22
- return {
23
- name: `token-${maxTokens}`,
24
-
25
- async chunk(text: string, filePath?: string): Promise<Chunk[]> {
26
- const chunks: Chunk[] = [];
27
- let start = 0;
28
-
29
- while (start < text.length) {
30
- let end = Math.min(start + maxChars, text.length);
31
-
32
- // Try to break at sentence boundary
33
- if (end < text.length) {
34
- const lastPeriod = text.lastIndexOf(".", end);
35
- const lastNewline = text.lastIndexOf("\n", end);
36
- const breakPoint = Math.max(lastPeriod, lastNewline);
37
- if (breakPoint > start) {
38
- end = breakPoint + 1;
39
- }
40
- }
41
-
42
- const content = text.slice(start, end).trim();
43
- if (content) {
44
- chunks.push({
45
- content,
46
- metadata: {
47
- strategy: this.name,
48
- chunk_index: chunks.length,
49
- source_file: filePath,
50
- start_char: start,
51
- end_char: end,
52
- },
53
- sourceFile: filePath || "unknown",
54
- commitHash: "", // Will be filled by caller
55
- });
56
- }
57
-
58
- start = end - overlapChars;
59
- }
60
-
61
- return chunks;
62
- },
63
-
64
- extractMetadata(text: string, _filePath?: string): Record<string, unknown> {
65
- return {
66
- strategy: this.name,
67
- char_count: text.length,
68
- estimated_tokens: Math.ceil(text.length / charsPerToken),
69
- };
70
- },
71
- };
72
- }
@@ -1,24 +0,0 @@
1
- import { describe, it, expect } from "vitest";
2
- import { wholeFileStrategy } from "./whole-file.js";
3
-
4
- describe("wholeFileStrategy", () => {
5
- const strategy = wholeFileStrategy();
6
-
7
- it("should have correct name", () => {
8
- expect(strategy.name).toBe("whole-file");
9
- });
10
-
11
- it("should return single chunk", async () => {
12
- const text = "Complete file content.";
13
- const chunks = await strategy.chunk(text);
14
-
15
- expect(chunks).toHaveLength(1);
16
- expect(chunks[0].content).toBe(text);
17
- expect(chunks[0].metadata.strategy).toBe("whole-file");
18
- });
19
-
20
- it("should return empty array for empty text", async () => {
21
- const chunks = await strategy.chunk("");
22
- expect(chunks).toHaveLength(0);
23
- });
24
- });
@@ -1,35 +0,0 @@
1
- import { ChunkStrategy, Chunk } from "../../interfaces/index.js";
2
-
3
- export function wholeFileStrategy(): ChunkStrategy {
4
- return {
5
- name: "whole-file",
6
-
7
- async chunk(text: string, filePath?: string): Promise<Chunk[]> {
8
- if (!text || text.trim().length === 0) {
9
- return [];
10
- }
11
-
12
- return [
13
- {
14
- content: text,
15
- metadata: {
16
- strategy: this.name,
17
- source_file: filePath,
18
- char_count: text.length,
19
- line_count: text.split("\n").length,
20
- },
21
- sourceFile: filePath || "unknown",
22
- commitHash: "",
23
- },
24
- ];
25
- },
26
-
27
- extractMetadata(text: string, _filePath?: string): Record<string, unknown> {
28
- return {
29
- strategy: this.name,
30
- char_count: text.length,
31
- line_count: text.split("\n").length,
32
- };
33
- },
34
- };
35
- }
package/tsconfig.json DELETED
@@ -1,21 +0,0 @@
1
- {
2
- "compilerOptions": {
3
- "target": "ES2022",
4
- "module": "NodeNext",
5
- "moduleResolution": "NodeNext",
6
- "lib": ["ES2022"],
7
- "outDir": "./dist",
8
- "rootDir": "./src",
9
- "declaration": true,
10
- "declarationMap": true,
11
- "sourceMap": true,
12
- "strict": true,
13
- "esModuleInterop": true,
14
- "skipLibCheck": true,
15
- "forceConsistentCasingInFileNames": true,
16
- "resolveJsonModule": true,
17
- "types": ["node"]
18
- },
19
- "include": ["src/**/*"],
20
- "exclude": ["node_modules", "dist", "**/*.test.ts"]
21
- }
package/typedoc.json DELETED
@@ -1,11 +0,0 @@
1
- {
2
- "entryPoints": ["src/index.ts"],
3
- "out": "docs/api",
4
- "excludePrivate": true,
5
- "excludeProtected": true,
6
- "skipErrorChecking": true,
7
- "validation": {
8
- "invalidLink": false,
9
- "notExported": false
10
- }
11
- }
package/vitest.config.ts DELETED
@@ -1,19 +0,0 @@
1
- import { defineConfig } from 'vitest/config';
2
-
3
- export default defineConfig({
4
- test: {
5
- maxWorkers: 4,
6
- isolate: false,
7
-
8
- coverage: {
9
- provider: 'v8',
10
- reporter: ['text', 'json', 'html'],
11
- include: ['src/**/*.ts'],
12
- exclude: ['src/**/*.test.ts'],
13
- },
14
- exclude: ['node_modules', '.git'],
15
- globals: true,
16
- testTimeout: 10000,
17
- environment: 'node',
18
- },
19
- });