@vivantel/rag-core 1.1.2 → 1.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/dist/config-loader.d.ts.map +1 -1
  2. package/dist/config-loader.js +0 -2
  3. package/dist/config-loader.js.map +1 -1
  4. package/dist/core/chunk-processor.d.ts.map +1 -1
  5. package/dist/core/chunk-processor.js +27 -20
  6. package/dist/core/chunk-processor.js.map +1 -1
  7. package/dist/core/embedder.d.ts.map +1 -1
  8. package/dist/core/embedder.js +10 -3
  9. package/dist/core/embedder.js.map +1 -1
  10. package/dist/core/git-tracker.d.ts.map +1 -1
  11. package/dist/core/git-tracker.js +9 -59
  12. package/dist/core/git-tracker.js.map +1 -1
  13. package/dist/core/orchestrator.d.ts.map +1 -1
  14. package/dist/core/orchestrator.js +22 -1
  15. package/dist/core/orchestrator.js.map +1 -1
  16. package/dist/core/uploader.d.ts.map +1 -1
  17. package/dist/core/uploader.js +13 -4
  18. package/dist/core/uploader.js.map +1 -1
  19. package/dist/strategies/chunk/token.js +1 -1
  20. package/dist/strategies/chunk/token.js.map +1 -1
  21. package/package.json +5 -1
  22. package/.github/config/release-please.json +0 -38
  23. package/.github/dependabot.yaml +0 -28
  24. package/.github/workflows/ci.yaml +0 -119
  25. package/.github/workflows/publish.yaml +0 -155
  26. package/.github/workflows/release.yaml +0 -54
  27. package/.release-please-manifest.json +0 -3
  28. package/.versionrc.json +0 -19
  29. package/CHANGELOG.md +0 -51
  30. package/bin/rag-update.ts +0 -49
  31. package/eslint.config.js +0 -25
  32. package/src/config-loader.ts +0 -21
  33. package/src/core/chunk-processor.test.ts +0 -36
  34. package/src/core/chunk-processor.ts +0 -92
  35. package/src/core/embedder.ts +0 -189
  36. package/src/core/git-tracker.test.ts +0 -64
  37. package/src/core/git-tracker.ts +0 -202
  38. package/src/core/orchestrator.test.ts +0 -53
  39. package/src/core/orchestrator.ts +0 -97
  40. package/src/core/uploader.ts +0 -123
  41. package/src/core/utils.ts +0 -27
  42. package/src/helpers/create-chunker.test.ts +0 -31
  43. package/src/helpers/create-chunker.ts +0 -40
  44. package/src/index.test.ts +0 -33
  45. package/src/index.ts +0 -30
  46. package/src/interfaces/chunker.ts +0 -59
  47. package/src/interfaces/embedder.ts +0 -36
  48. package/src/interfaces/index.test.ts +0 -9
  49. package/src/interfaces/index.ts +0 -3
  50. package/src/interfaces/vector-store.ts +0 -71
  51. package/src/strategies/chunk/index.ts +0 -4
  52. package/src/strategies/chunk/markdown-headers.test.ts +0 -37
  53. package/src/strategies/chunk/markdown-headers.ts +0 -106
  54. package/src/strategies/chunk/semantic.test.ts +0 -21
  55. package/src/strategies/chunk/semantic.ts +0 -80
  56. package/src/strategies/chunk/token.test.ts +0 -41
  57. package/src/strategies/chunk/token.ts +0 -72
  58. package/src/strategies/chunk/whole-file.test.ts +0 -24
  59. package/src/strategies/chunk/whole-file.ts +0 -35
  60. package/tsconfig.json +0 -21
  61. package/typedoc.json +0 -11
  62. package/vitest.config.ts +0 -19
@@ -1,80 +0,0 @@
1
- import { ChunkStrategy, Chunk } from "../../interfaces/index.js";
2
-
3
- export interface SemanticStrategyOptions {
4
- maxChars?: number;
5
- minChars?: number;
6
- }
7
-
8
- export function semanticStrategy(
9
- options: SemanticStrategyOptions = {},
10
- ): ChunkStrategy {
11
- const maxChars = options.maxChars ?? 2000;
12
- const minChars = options.minChars ?? 100;
13
-
14
- return {
15
- name: "semantic",
16
-
17
- async chunk(text: string, filePath?: string): Promise<Chunk[]> {
18
- const chunks: Chunk[] = [];
19
-
20
- // Split by sentences (simple approach)
21
- const sentences = text.split(/(?<=[.!?])\s+/);
22
-
23
- let currentChunk: string[] = [];
24
- let currentSize = 0;
25
-
26
- for (const sentence of sentences) {
27
- const sentenceSize = sentence.length;
28
-
29
- if (currentSize + sentenceSize > maxChars && currentChunk.length > 0) {
30
- const content = currentChunk.join(" ").trim();
31
- if (content.length >= minChars) {
32
- chunks.push({
33
- content,
34
- metadata: {
35
- strategy: this.name,
36
- sentence_count: currentChunk.length,
37
- source_file: filePath,
38
- },
39
- sourceFile: filePath || "unknown",
40
- commitHash: "",
41
- });
42
- }
43
- currentChunk = [];
44
- currentSize = 0;
45
- }
46
-
47
- currentChunk.push(sentence);
48
- currentSize += sentenceSize;
49
- }
50
-
51
- // Last chunk
52
- if (currentChunk.length > 0) {
53
- const content = currentChunk.join(" ").trim();
54
- if (content.length >= minChars) {
55
- chunks.push({
56
- content,
57
- metadata: {
58
- strategy: this.name,
59
- sentence_count: currentChunk.length,
60
- source_file: filePath,
61
- is_last: true,
62
- },
63
- sourceFile: filePath || "unknown",
64
- commitHash: "",
65
- });
66
- }
67
- }
68
-
69
- return chunks;
70
- },
71
-
72
- extractMetadata(text: string, _filePath?: string): Record<string, unknown> {
73
- return {
74
- strategy: this.name,
75
- sentence_count: text.split(/[.!?]+/).length,
76
- char_count: text.length,
77
- };
78
- },
79
- };
80
- }
@@ -1,41 +0,0 @@
1
- import { describe, it, expect } from "vitest";
2
- import { tokenStrategy } from "./token.js";
3
-
4
- describe.skip("tokenStrategy", () => {
5
- const strategy = tokenStrategy({ maxTokens: 50, overlap: 10 });
6
-
7
- it("should have correct name", () => {
8
- expect(strategy.name).toContain("token");
9
- expect(typeof strategy.name).toBe("string");
10
- });
11
-
12
- it("should chunk text", async () => {
13
- const text = "This is a test sentence. ".repeat(100);
14
- const chunks = await strategy.chunk(text, "test.txt");
15
-
16
- expect(Array.isArray(chunks)).toBe(true);
17
- expect(chunks.length).toBeGreaterThan(0);
18
-
19
- if (chunks.length > 0) {
20
- expect(chunks[0].content).toBeDefined();
21
- expect(typeof chunks[0].content).toBe("string");
22
- expect(chunks[0].metadata).toBeDefined();
23
- expect(chunks[0].metadata.strategy).toBeDefined();
24
- }
25
- });
26
-
27
- it("should extract metadata", () => {
28
- const text = "Test content";
29
- const metadata = strategy.extractMetadata?.(text);
30
-
31
- // extractMetadata is optional, so it might be undefined
32
- if (metadata) {
33
- expect(metadata.strategy).toBe(strategy.name);
34
- expect(metadata.char_count).toBeDefined();
35
- expect(metadata.estimated_tokens).toBeDefined();
36
- } else {
37
- // If extractMetadata is not implemented, just pass
38
- expect(true).toBe(true);
39
- }
40
- });
41
- });
@@ -1,72 +0,0 @@
1
- import { ChunkStrategy, Chunk } from "../../interfaces/index.js";
2
-
3
- export interface TokenStrategyOptions {
4
- maxTokens?: number;
5
- overlap?: number;
6
- }
7
-
8
- /**
9
- * Split text by approximate token count.
10
- * Simple implementation: ~4 chars per token for English.
11
- * For production, use a proper tokenizer (tiktoken, etc.)
12
- */
13
- export function tokenStrategy(
14
- options: TokenStrategyOptions = {},
15
- ): ChunkStrategy {
16
- const maxTokens = options.maxTokens ?? 500;
17
- const overlap = options.overlap ?? 50;
18
- const charsPerToken = 4;
19
- const maxChars = maxTokens * charsPerToken;
20
- const overlapChars = overlap * charsPerToken;
21
-
22
- return {
23
- name: `token-${maxTokens}`,
24
-
25
- async chunk(text: string, filePath?: string): Promise<Chunk[]> {
26
- const chunks: Chunk[] = [];
27
- let start = 0;
28
-
29
- while (start < text.length) {
30
- let end = Math.min(start + maxChars, text.length);
31
-
32
- // Try to break at sentence boundary
33
- if (end < text.length) {
34
- const lastPeriod = text.lastIndexOf(".", end);
35
- const lastNewline = text.lastIndexOf("\n", end);
36
- const breakPoint = Math.max(lastPeriod, lastNewline);
37
- if (breakPoint > start) {
38
- end = breakPoint + 1;
39
- }
40
- }
41
-
42
- const content = text.slice(start, end).trim();
43
- if (content) {
44
- chunks.push({
45
- content,
46
- metadata: {
47
- strategy: this.name,
48
- chunk_index: chunks.length,
49
- source_file: filePath,
50
- start_char: start,
51
- end_char: end,
52
- },
53
- sourceFile: filePath || "unknown",
54
- commitHash: "", // Will be filled by caller
55
- });
56
- }
57
-
58
- start = end - overlapChars;
59
- }
60
-
61
- return chunks;
62
- },
63
-
64
- extractMetadata(text: string, _filePath?: string): Record<string, unknown> {
65
- return {
66
- strategy: this.name,
67
- char_count: text.length,
68
- estimated_tokens: Math.ceil(text.length / charsPerToken),
69
- };
70
- },
71
- };
72
- }
@@ -1,24 +0,0 @@
1
- import { describe, it, expect } from "vitest";
2
- import { wholeFileStrategy } from "./whole-file.js";
3
-
4
- describe("wholeFileStrategy", () => {
5
- const strategy = wholeFileStrategy();
6
-
7
- it("should have correct name", () => {
8
- expect(strategy.name).toBe("whole-file");
9
- });
10
-
11
- it("should return single chunk", async () => {
12
- const text = "Complete file content.";
13
- const chunks = await strategy.chunk(text);
14
-
15
- expect(chunks).toHaveLength(1);
16
- expect(chunks[0].content).toBe(text);
17
- expect(chunks[0].metadata.strategy).toBe("whole-file");
18
- });
19
-
20
- it("should return empty array for empty text", async () => {
21
- const chunks = await strategy.chunk("");
22
- expect(chunks).toHaveLength(0);
23
- });
24
- });
@@ -1,35 +0,0 @@
1
- import { ChunkStrategy, Chunk } from "../../interfaces/index.js";
2
-
3
- export function wholeFileStrategy(): ChunkStrategy {
4
- return {
5
- name: "whole-file",
6
-
7
- async chunk(text: string, filePath?: string): Promise<Chunk[]> {
8
- if (!text || text.trim().length === 0) {
9
- return [];
10
- }
11
-
12
- return [
13
- {
14
- content: text,
15
- metadata: {
16
- strategy: this.name,
17
- source_file: filePath,
18
- char_count: text.length,
19
- line_count: text.split("\n").length,
20
- },
21
- sourceFile: filePath || "unknown",
22
- commitHash: "",
23
- },
24
- ];
25
- },
26
-
27
- extractMetadata(text: string, _filePath?: string): Record<string, unknown> {
28
- return {
29
- strategy: this.name,
30
- char_count: text.length,
31
- line_count: text.split("\n").length,
32
- };
33
- },
34
- };
35
- }
package/tsconfig.json DELETED
@@ -1,21 +0,0 @@
1
- {
2
- "compilerOptions": {
3
- "target": "ES2022",
4
- "module": "NodeNext",
5
- "moduleResolution": "NodeNext",
6
- "lib": ["ES2022"],
7
- "outDir": "./dist",
8
- "rootDir": "./src",
9
- "declaration": true,
10
- "declarationMap": true,
11
- "sourceMap": true,
12
- "strict": true,
13
- "esModuleInterop": true,
14
- "skipLibCheck": true,
15
- "forceConsistentCasingInFileNames": true,
16
- "resolveJsonModule": true,
17
- "types": ["node"]
18
- },
19
- "include": ["src/**/*"],
20
- "exclude": ["node_modules", "dist", "**/*.test.ts"]
21
- }
package/typedoc.json DELETED
@@ -1,11 +0,0 @@
1
- {
2
- "entryPoints": ["src/index.ts"],
3
- "out": "docs/api",
4
- "excludePrivate": true,
5
- "excludeProtected": true,
6
- "skipErrorChecking": true,
7
- "validation": {
8
- "invalidLink": false,
9
- "notExported": false
10
- }
11
- }
package/vitest.config.ts DELETED
@@ -1,19 +0,0 @@
1
- import { defineConfig } from 'vitest/config';
2
-
3
- export default defineConfig({
4
- test: {
5
- maxWorkers: 4,
6
- isolate: false,
7
-
8
- coverage: {
9
- provider: 'v8',
10
- reporter: ['text', 'json', 'html'],
11
- include: ['src/**/*.ts'],
12
- exclude: ['src/**/*.test.ts'],
13
- },
14
- exclude: ['node_modules', '.git'],
15
- globals: true,
16
- testTimeout: 10000,
17
- environment: 'node',
18
- },
19
- });