@vivantel/virage-strategies 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +75 -0
- package/dist/index.d.ts +17 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +16 -0
- package/dist/index.js.map +1 -0
- package/dist/index.test.d.ts +2 -0
- package/dist/index.test.d.ts.map +1 -0
- package/dist/index.test.js +266 -0
- package/dist/index.test.js.map +1 -0
- package/package.json +57 -0
package/README.md
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# @vivantel/virage-strategies
|
|
2
|
+
|
|
3
|
+
[](https://www.npmjs.com/package/@vivantel/virage-strategies)
|
|
4
|
+
|
|
5
|
+
Built-in chunking strategies for `@vivantel/virage-core`.
|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
npm install @vivantel/virage-strategies
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Usage
|
|
14
|
+
|
|
15
|
+
```typescript
|
|
16
|
+
import { createChunker } from "@vivantel/virage-core";
|
|
17
|
+
import {
|
|
18
|
+
markdownHeadersStrategy,
|
|
19
|
+
tokenStrategy,
|
|
20
|
+
wholeFileStrategy,
|
|
21
|
+
semanticStrategy,
|
|
22
|
+
} from "@vivantel/virage-strategies";
|
|
23
|
+
|
|
24
|
+
const chunkers = [
|
|
25
|
+
createChunker({ patterns: ["docs/**/*.md"], strategy: markdownHeadersStrategy() }),
|
|
26
|
+
createChunker({ patterns: ["src/**/*.ts"], strategy: tokenStrategy({ maxTokens: 500 }) }),
|
|
27
|
+
createChunker({ patterns: ["**/*.yaml"], strategy: wholeFileStrategy() }),
|
|
28
|
+
];
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
Or via JSON config (no TypeScript needed):
|
|
32
|
+
|
|
33
|
+
```json
|
|
34
|
+
{
|
|
35
|
+
"chunkers": [
|
|
36
|
+
{ "patterns": ["docs/**/*.md"], "strategy": "markdownHeaders" },
|
|
37
|
+
{ "patterns": ["src/**/*.ts"], "strategy": "token", "strategyOptions": { "maxTokens": 500 } }
|
|
38
|
+
]
|
|
39
|
+
}
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## Strategies
|
|
43
|
+
|
|
44
|
+
### `markdownHeadersStrategy()`
|
|
45
|
+
|
|
46
|
+
Splits Markdown files at each heading (`##`, `###`, …). Each section becomes one chunk with the header as metadata.
|
|
47
|
+
|
|
48
|
+
Best for: documentation, wikis, README files.
|
|
49
|
+
|
|
50
|
+
### `tokenStrategy(options?)`
|
|
51
|
+
|
|
52
|
+
Splits text into fixed-size token windows with optional overlap.
|
|
53
|
+
|
|
54
|
+
| Option | Default | Description |
|
|
55
|
+
| --- | --- | --- |
|
|
56
|
+
| `maxTokens` | `512` | Maximum tokens per chunk |
|
|
57
|
+
| `overlap` | `50` | Token overlap between consecutive chunks |
|
|
58
|
+
|
|
59
|
+
Best for: source code, structured text, anything that needs size control.
|
|
60
|
+
|
|
61
|
+
### `semanticStrategy()`
|
|
62
|
+
|
|
63
|
+
Splits on paragraph and sentence boundaries, trying to keep semantically coherent units together.
|
|
64
|
+
|
|
65
|
+
Best for: prose, articles, documentation with long paragraphs.
|
|
66
|
+
|
|
67
|
+
### `wholeFileStrategy()`
|
|
68
|
+
|
|
69
|
+
Returns the entire file as a single chunk.
|
|
70
|
+
|
|
71
|
+
Best for: small configuration files, YAML, short rule files where splitting would lose context.
|
|
72
|
+
|
|
73
|
+
## License
|
|
74
|
+
|
|
75
|
+
MIT
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @vivantel/virage-strategies
|
|
3
|
+
*
|
|
4
|
+
* Built-in chunking strategies, re-exported from @vivantel/virage-core.
|
|
5
|
+
* Install this package to get a lighter import path for strategies:
|
|
6
|
+
*
|
|
7
|
+
* ```ts
|
|
8
|
+
* // v2+: preferred
|
|
9
|
+
* import { tokenStrategy } from '@vivantel/virage-strategies';
|
|
10
|
+
*
|
|
11
|
+
* // v1.x / legacy (still works, deprecated in @vivantel/virage-core)
|
|
12
|
+
* import { tokenStrategy } from '@vivantel/virage-core';
|
|
13
|
+
* ```
|
|
14
|
+
*/
|
|
15
|
+
export { tokenStrategy, markdownHeadersStrategy, semanticStrategy, wholeFileStrategy, } from "@vivantel/virage-core";
|
|
16
|
+
export type { TokenStrategyOptions } from "@vivantel/virage-core";
|
|
17
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AACH,OAAO,EACL,aAAa,EACb,uBAAuB,EACvB,gBAAgB,EAChB,iBAAiB,GAClB,MAAM,uBAAuB,CAAC;AAE/B,YAAY,EAAE,oBAAoB,EAAE,MAAM,uBAAuB,CAAC"}
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @vivantel/virage-strategies
|
|
3
|
+
*
|
|
4
|
+
* Built-in chunking strategies, re-exported from @vivantel/virage-core.
|
|
5
|
+
* Install this package to get a lighter import path for strategies:
|
|
6
|
+
*
|
|
7
|
+
* ```ts
|
|
8
|
+
* // v2+: preferred
|
|
9
|
+
* import { tokenStrategy } from '@vivantel/virage-strategies';
|
|
10
|
+
*
|
|
11
|
+
* // v1.x / legacy (still works, deprecated in @vivantel/virage-core)
|
|
12
|
+
* import { tokenStrategy } from '@vivantel/virage-core';
|
|
13
|
+
* ```
|
|
14
|
+
*/
|
|
15
|
+
export { tokenStrategy, markdownHeadersStrategy, semanticStrategy, wholeFileStrategy, } from "@vivantel/virage-core";
|
|
16
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AACH,OAAO,EACL,aAAa,EACb,uBAAuB,EACvB,gBAAgB,EAChB,iBAAiB,GAClB,MAAM,uBAAuB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.test.d.ts","sourceRoot":"","sources":["../src/index.test.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
import { describe, it, expect } from "vitest";
|
|
2
|
+
import { tokenStrategy, markdownHeadersStrategy, semanticStrategy, wholeFileStrategy, } from "./index.js";
|
|
3
|
+
// ──────────────────────────────────────────────────────────────
|
|
4
|
+
// Export smoke tests
|
|
5
|
+
// ──────────────────────────────────────────────────────────────
|
|
6
|
+
describe("package exports", () => {
|
|
7
|
+
it("exports all four strategy factories", () => {
|
|
8
|
+
expect(typeof tokenStrategy).toBe("function");
|
|
9
|
+
expect(typeof markdownHeadersStrategy).toBe("function");
|
|
10
|
+
expect(typeof semanticStrategy).toBe("function");
|
|
11
|
+
expect(typeof wholeFileStrategy).toBe("function");
|
|
12
|
+
});
|
|
13
|
+
it("each factory returns an object with chunk(), extractMetadata(), and getQualityMetrics()", () => {
|
|
14
|
+
for (const factory of [
|
|
15
|
+
tokenStrategy,
|
|
16
|
+
markdownHeadersStrategy,
|
|
17
|
+
semanticStrategy,
|
|
18
|
+
wholeFileStrategy,
|
|
19
|
+
]) {
|
|
20
|
+
const s = factory();
|
|
21
|
+
expect(typeof s.chunk).toBe("function");
|
|
22
|
+
expect(typeof s.extractMetadata).toBe("function");
|
|
23
|
+
expect(typeof s.getQualityMetrics).toBe("function");
|
|
24
|
+
}
|
|
25
|
+
});
|
|
26
|
+
});
|
|
27
|
+
// ──────────────────────────────────────────────────────────────
|
|
28
|
+
// wholeFileStrategy
|
|
29
|
+
// ──────────────────────────────────────────────────────────────
|
|
30
|
+
describe("wholeFileStrategy", () => {
|
|
31
|
+
const strategy = wholeFileStrategy();
|
|
32
|
+
it("returns a single chunk containing the full text", async () => {
|
|
33
|
+
const text = "Hello world. This is a test file.";
|
|
34
|
+
const chunks = await strategy.chunk(text, "test.txt");
|
|
35
|
+
expect(chunks).toHaveLength(1);
|
|
36
|
+
expect(chunks[0].content).toBe(text);
|
|
37
|
+
});
|
|
38
|
+
it("returns empty array for blank text", async () => {
|
|
39
|
+
expect(await strategy.chunk("", "test.txt")).toHaveLength(0);
|
|
40
|
+
expect(await strategy.chunk(" \n\t ", "test.txt")).toHaveLength(0);
|
|
41
|
+
});
|
|
42
|
+
it("sets sourceFile and strategy name in metadata", async () => {
|
|
43
|
+
const chunks = await strategy.chunk("content", "file.yaml");
|
|
44
|
+
expect(chunks[0].sourceFile).toBe("file.yaml");
|
|
45
|
+
expect(chunks[0].metadata.strategy).toBe("whole-file");
|
|
46
|
+
});
|
|
47
|
+
it("metadata includes char_count and line_count", async () => {
|
|
48
|
+
const text = "line1\nline2\nline3";
|
|
49
|
+
const chunks = await strategy.chunk(text);
|
|
50
|
+
expect(chunks[0].metadata.char_count).toBe(text.length);
|
|
51
|
+
expect(chunks[0].metadata.line_count).toBe(3);
|
|
52
|
+
});
|
|
53
|
+
it("strategy name is 'whole-file'", () => {
|
|
54
|
+
expect(strategy.name).toBe("whole-file");
|
|
55
|
+
});
|
|
56
|
+
it("extractMetadata returns char_count and line_count", () => {
|
|
57
|
+
const meta = strategy.extractMetadata("a\nb\nc");
|
|
58
|
+
expect(meta.char_count).toBe(5);
|
|
59
|
+
expect(meta.line_count).toBe(3);
|
|
60
|
+
});
|
|
61
|
+
it("getQualityMetrics on a single chunk returns valid metrics", async () => {
|
|
62
|
+
const chunks = await strategy.chunk("The quick brown fox jumps over the lazy dog.", "f.txt");
|
|
63
|
+
const metrics = strategy.getQualityMetrics(chunks);
|
|
64
|
+
expect(metrics.avgChunkSize).toBeGreaterThan(0);
|
|
65
|
+
expect(metrics.semanticCoherence).toBeGreaterThanOrEqual(0);
|
|
66
|
+
expect(metrics.informationDensity).toBeGreaterThan(0);
|
|
67
|
+
});
|
|
68
|
+
});
|
|
69
|
+
// ──────────────────────────────────────────────────────────────
|
|
70
|
+
// tokenStrategy
|
|
71
|
+
// ──────────────────────────────────────────────────────────────
|
|
72
|
+
describe("tokenStrategy", () => {
|
|
73
|
+
it("uses name token-{maxTokens}", () => {
|
|
74
|
+
expect(tokenStrategy().name).toBe("token-500");
|
|
75
|
+
expect(tokenStrategy({ maxTokens: 100 }).name).toBe("token-100");
|
|
76
|
+
});
|
|
77
|
+
it("returns empty array for empty text", async () => {
|
|
78
|
+
expect(await tokenStrategy().chunk("", "f.ts")).toHaveLength(0);
|
|
79
|
+
});
|
|
80
|
+
it("returns a single chunk when text is shorter than maxTokens", async () => {
|
|
81
|
+
// overlap: 0 avoids the start+1 fallback that re-slices short text
|
|
82
|
+
const strategy = tokenStrategy({ maxTokens: 500, overlap: 0 });
|
|
83
|
+
const text = "Short content.";
|
|
84
|
+
const chunks = await strategy.chunk(text, "f.ts");
|
|
85
|
+
expect(chunks).toHaveLength(1);
|
|
86
|
+
expect(chunks[0].content).toBe(text);
|
|
87
|
+
});
|
|
88
|
+
it("splits long text into multiple chunks", async () => {
|
|
89
|
+
const strategy = tokenStrategy({ maxTokens: 10, overlap: 0 });
|
|
90
|
+
const text = "word ".repeat(200); // ~200 tokens at 4 chars/token each
|
|
91
|
+
const chunks = await strategy.chunk(text, "f.ts");
|
|
92
|
+
expect(chunks.length).toBeGreaterThan(1);
|
|
93
|
+
});
|
|
94
|
+
it("all chunks carry sequential chunk_index in metadata", async () => {
|
|
95
|
+
const strategy = tokenStrategy({ maxTokens: 10, overlap: 0 });
|
|
96
|
+
const text = "word ".repeat(200);
|
|
97
|
+
const chunks = await strategy.chunk(text, "f.ts");
|
|
98
|
+
chunks.forEach((c, i) => {
|
|
99
|
+
expect(c.metadata.chunk_index).toBe(i);
|
|
100
|
+
});
|
|
101
|
+
});
|
|
102
|
+
it("prefers to break at a sentence boundary (period)", async () => {
|
|
103
|
+
const strategy = tokenStrategy({ maxTokens: 20, overlap: 0 });
|
|
104
|
+
// Sentence ending well within the first window
|
|
105
|
+
const text = "First sentence ends here. " + "x".repeat(200);
|
|
106
|
+
const chunks = await strategy.chunk(text, "f.ts");
|
|
107
|
+
// The first chunk should end with the sentence, not mid-word
|
|
108
|
+
expect(chunks[0].content.endsWith(".")).toBe(true);
|
|
109
|
+
});
|
|
110
|
+
it("produces chunks with overlap — next chunk re-includes tail of previous", async () => {
|
|
111
|
+
const strategy = tokenStrategy({ maxTokens: 20, overlap: 5 });
|
|
112
|
+
const text = "a".repeat(400); // no punctuation, forces plain slicing
|
|
113
|
+
const chunks = await strategy.chunk(text, "f.ts");
|
|
114
|
+
expect(chunks.length).toBeGreaterThan(1);
|
|
115
|
+
// Overlap means each chunk's start_char < previous chunk's end_char
|
|
116
|
+
for (let i = 1; i < chunks.length; i++) {
|
|
117
|
+
expect(Number(chunks[i].metadata.start_char)).toBeLessThan(Number(chunks[i - 1].metadata.end_char));
|
|
118
|
+
}
|
|
119
|
+
});
|
|
120
|
+
it("sets sourceFile on every chunk", async () => {
|
|
121
|
+
const chunks = await tokenStrategy({ maxTokens: 10, overlap: 0 }).chunk("x".repeat(300), "src/foo.ts");
|
|
122
|
+
expect(chunks.every((c) => c.sourceFile === "src/foo.ts")).toBe(true);
|
|
123
|
+
});
|
|
124
|
+
it("extractMetadata returns char_count and estimated_tokens", () => {
|
|
125
|
+
const meta = tokenStrategy().extractMetadata("hello world");
|
|
126
|
+
expect(typeof meta.char_count).toBe("number");
|
|
127
|
+
expect(typeof meta.estimated_tokens).toBe("number");
|
|
128
|
+
expect(Number(meta.estimated_tokens)).toBeGreaterThan(0);
|
|
129
|
+
});
|
|
130
|
+
it("getQualityMetrics on empty chunks returns zero metrics", () => {
|
|
131
|
+
const metrics = tokenStrategy().getQualityMetrics([]);
|
|
132
|
+
expect(metrics.avgChunkSize).toBe(0);
|
|
133
|
+
expect(metrics.informationDensity).toBe(0);
|
|
134
|
+
});
|
|
135
|
+
});
|
|
136
|
+
// ──────────────────────────────────────────────────────────────
|
|
137
|
+
// markdownHeadersStrategy
|
|
138
|
+
// ──────────────────────────────────────────────────────────────
|
|
139
|
+
describe("markdownHeadersStrategy", () => {
|
|
140
|
+
const strategy = markdownHeadersStrategy();
|
|
141
|
+
it("strategy name is 'markdown-headers'", () => {
|
|
142
|
+
expect(strategy.name).toBe("markdown-headers");
|
|
143
|
+
});
|
|
144
|
+
it("returns empty array for empty text", async () => {
|
|
145
|
+
expect(await strategy.chunk("")).toHaveLength(0);
|
|
146
|
+
});
|
|
147
|
+
it("splits on h1–h6 headers, one chunk per section", async () => {
|
|
148
|
+
const text = [
|
|
149
|
+
"## Section One",
|
|
150
|
+
"Content for section one. ".repeat(10),
|
|
151
|
+
"## Section Two",
|
|
152
|
+
"Content for section two. ".repeat(10),
|
|
153
|
+
].join("\n");
|
|
154
|
+
const chunks = await strategy.chunk(text, "doc.md");
|
|
155
|
+
expect(chunks).toHaveLength(2);
|
|
156
|
+
expect(chunks[0].metadata.header).toBe("Section One");
|
|
157
|
+
expect(chunks[1].metadata.header).toBe("Section Two");
|
|
158
|
+
});
|
|
159
|
+
it("records header_level in metadata", async () => {
|
|
160
|
+
const text = "### H3 Header\n" + "body content ".repeat(20);
|
|
161
|
+
const chunks = await strategy.chunk(text, "doc.md");
|
|
162
|
+
expect(chunks[0].metadata.header_level).toBe(3);
|
|
163
|
+
});
|
|
164
|
+
it("skips sections below minChunkSize", async () => {
|
|
165
|
+
const strategy100 = markdownHeadersStrategy({ minChunkSize: 100 });
|
|
166
|
+
const text = "## Short\nTiny.\n## Long\n" + "long content ".repeat(20);
|
|
167
|
+
const chunks = await strategy100.chunk(text, "doc.md");
|
|
168
|
+
// "Short" section is too small; only "Long" survives
|
|
169
|
+
expect(chunks).toHaveLength(1);
|
|
170
|
+
expect(chunks[0].metadata.header).toBe("Long");
|
|
171
|
+
});
|
|
172
|
+
it("includes final section that has no trailing header", async () => {
|
|
173
|
+
const text = "## Only Section\n" + "body content ".repeat(15);
|
|
174
|
+
const chunks = await strategy.chunk(text, "doc.md");
|
|
175
|
+
expect(chunks).toHaveLength(1);
|
|
176
|
+
expect(chunks[0].metadata.is_last).toBe(true);
|
|
177
|
+
});
|
|
178
|
+
it("handles text with no headers as a headerless chunk (if >= minChunkSize)", async () => {
|
|
179
|
+
const text = "No headers here. " + "x".repeat(200);
|
|
180
|
+
const chunks = await strategy.chunk(text, "doc.md");
|
|
181
|
+
expect(chunks).toHaveLength(1);
|
|
182
|
+
expect(chunks[0].metadata.header).toBe("");
|
|
183
|
+
});
|
|
184
|
+
it("truncates an oversized chunk when > maxChunkSize and > 10 lines", async () => {
|
|
185
|
+
const small = markdownHeadersStrategy({ maxChunkSize: 200 });
|
|
186
|
+
const text = "## Big\n" + "line content\n".repeat(20);
|
|
187
|
+
const chunks = await small.chunk(text, "doc.md");
|
|
188
|
+
// Should produce a truncated chunk
|
|
189
|
+
expect(chunks.some((c) => c.metadata.truncated === true)).toBe(true);
|
|
190
|
+
});
|
|
191
|
+
it("extractMetadata detects the first header and line count", () => {
|
|
192
|
+
const text = "## Title\nSome body.";
|
|
193
|
+
const meta = strategy.extractMetadata(text);
|
|
194
|
+
expect(meta.has_headers).toBe(true);
|
|
195
|
+
expect(meta.first_header).toBe("Title");
|
|
196
|
+
expect(meta.line_count).toBe(2);
|
|
197
|
+
});
|
|
198
|
+
it("extractMetadata reports has_headers = false for plain text", () => {
|
|
199
|
+
expect(strategy.extractMetadata("plain text").has_headers).toBe(false);
|
|
200
|
+
});
|
|
201
|
+
});
|
|
202
|
+
// ──────────────────────────────────────────────────────────────
|
|
203
|
+
// semanticStrategy
|
|
204
|
+
// ──────────────────────────────────────────────────────────────
|
|
205
|
+
describe("semanticStrategy", () => {
|
|
206
|
+
const strategy = semanticStrategy();
|
|
207
|
+
it("strategy name is 'semantic'", () => {
|
|
208
|
+
expect(strategy.name).toBe("semantic");
|
|
209
|
+
});
|
|
210
|
+
it("returns empty array for empty text", async () => {
|
|
211
|
+
expect(await strategy.chunk("")).toHaveLength(0);
|
|
212
|
+
});
|
|
213
|
+
it("returns a single chunk for text shorter than maxChars", async () => {
|
|
214
|
+
// Use minChars: 10 so the short test text passes the minimum-size filter
|
|
215
|
+
const s = semanticStrategy({ minChars: 10 });
|
|
216
|
+
const text = "Short text. Only two sentences.";
|
|
217
|
+
const chunks = await s.chunk(text, "f.txt");
|
|
218
|
+
expect(chunks).toHaveLength(1);
|
|
219
|
+
});
|
|
220
|
+
it("splits at sentence boundaries when text exceeds maxChars", async () => {
|
|
221
|
+
const strategy200 = semanticStrategy({ maxChars: 200, minChars: 10 });
|
|
222
|
+
const text = Array.from({ length: 10 }, (_, i) => `Sentence number ${i + 1} ends here.`).join(" ");
|
|
223
|
+
const chunks = await strategy200.chunk(text, "f.txt");
|
|
224
|
+
expect(chunks.length).toBeGreaterThan(1);
|
|
225
|
+
// Every chunk should be within maxChars (±one sentence overflow)
|
|
226
|
+
chunks.forEach((c) => expect(c.content.length).toBeLessThanOrEqual(300));
|
|
227
|
+
});
|
|
228
|
+
it("skips chunks below minChars", async () => {
|
|
229
|
+
const strategy = semanticStrategy({ maxChars: 10, minChars: 100 });
|
|
230
|
+
// Each sentence is short; after splitting they all fall below minChars
|
|
231
|
+
const text = "Hi. Ok. Yes.";
|
|
232
|
+
const chunks = await strategy.chunk(text, "f.txt");
|
|
233
|
+
expect(chunks).toHaveLength(0);
|
|
234
|
+
});
|
|
235
|
+
it("metadata includes sentence_count", async () => {
|
|
236
|
+
const text = "First. Second. Third. Fourth. Fifth. " + "x".repeat(200);
|
|
237
|
+
const chunks = await semanticStrategy({
|
|
238
|
+
maxChars: 100,
|
|
239
|
+
minChars: 10,
|
|
240
|
+
}).chunk(text, "f.txt");
|
|
241
|
+
chunks.forEach((c) => expect(typeof c.metadata.sentence_count).toBe("number"));
|
|
242
|
+
});
|
|
243
|
+
it("last chunk carries is_last = true", async () => {
|
|
244
|
+
const text = "Alpha. " + "x".repeat(300) + " Beta.";
|
|
245
|
+
const chunks = await semanticStrategy({
|
|
246
|
+
maxChars: 100,
|
|
247
|
+
minChars: 10,
|
|
248
|
+
}).chunk(text, "f.txt");
|
|
249
|
+
expect(chunks[chunks.length - 1].metadata.is_last).toBe(true);
|
|
250
|
+
});
|
|
251
|
+
it("extractMetadata returns sentence_count and char_count", () => {
|
|
252
|
+
const meta = strategy.extractMetadata("Hello. World!");
|
|
253
|
+
expect(typeof meta.sentence_count).toBe("number");
|
|
254
|
+
expect(typeof meta.char_count).toBe("number");
|
|
255
|
+
});
|
|
256
|
+
it("getQualityMetrics returns valid metrics for produced chunks", async () => {
|
|
257
|
+
const text = "The quick brown fox. " + "x".repeat(100) + ". The lazy dog.";
|
|
258
|
+
const chunks = await semanticStrategy({ maxChars: 50, minChars: 5 }).chunk(text, "f.txt");
|
|
259
|
+
const metrics = strategy.getQualityMetrics(chunks);
|
|
260
|
+
expect(metrics.avgChunkSize).toBeGreaterThanOrEqual(0);
|
|
261
|
+
expect(metrics.stdDevChunkSize).toBeGreaterThanOrEqual(0);
|
|
262
|
+
expect(metrics.semanticCoherence).toBeGreaterThanOrEqual(0);
|
|
263
|
+
expect(metrics.informationDensity).toBeGreaterThanOrEqual(0);
|
|
264
|
+
});
|
|
265
|
+
});
|
|
266
|
+
//# sourceMappingURL=index.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.test.js","sourceRoot":"","sources":["../src/index.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,EACL,aAAa,EACb,uBAAuB,EACvB,gBAAgB,EAChB,iBAAiB,GAClB,MAAM,YAAY,CAAC;AAEpB,iEAAiE;AACjE,qBAAqB;AACrB,iEAAiE;AAEjE,QAAQ,CAAC,iBAAiB,EAAE,GAAG,EAAE;IAC/B,EAAE,CAAC,qCAAqC,EAAE,GAAG,EAAE;QAC7C,MAAM,CAAC,OAAO,aAAa,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QAC9C,MAAM,CAAC,OAAO,uBAAuB,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QACxD,MAAM,CAAC,OAAO,gBAAgB,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QACjD,MAAM,CAAC,OAAO,iBAAiB,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IACpD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,yFAAyF,EAAE,GAAG,EAAE;QACjG,KAAK,MAAM,OAAO,IAAI;YACpB,aAAa;YACb,uBAAuB;YACvB,gBAAgB;YAChB,iBAAiB;SAClB,EAAE,CAAC;YACF,MAAM,CAAC,GAAG,OAAO,EAAE,CAAC;YACpB,MAAM,CAAC,OAAO,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;YACxC,MAAM,CAAC,OAAO,CAAC,CAAC,eAAe,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;YAClD,MAAM,CAAC,OAAO,CAAC,CAAC,iBAAiB,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QACtD,CAAC;IACH,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,iEAAiE;AACjE,oBAAoB;AACpB,iEAAiE;AAEjE,QAAQ,CAAC,mBAAmB,EAAE,GAAG,EAAE;IACjC,MAAM,QAAQ,GAAG,iBAAiB,EAAE,CAAC;IAErC,EAAE,CAAC,iDAAiD,EAAE,KAAK,IAAI,EAAE;QAC/D,MAAM,IAAI,GAAG,mCAAmC,CAAC;QACjD,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,KAAK,CAAC,IAAI,EAAE,UAAU,CAAC,CAAC;QAEtD,MAAM,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC/B,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACvC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oCAAoC,EAAE,KAAK,IAAI,EAAE;QAClD,MAAM,CAAC,MAAM,QAAQ,CAAC,KAAK,CAAC,EAAE,EAAE,UAAU,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7D,MAAM,CAAC,MAAM,QAAQ,CAAC,KAAK,CAAC,WAAW,EAAE,UAAU,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;IACxE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,+CAA+C,EAAE,KAAK,IAAI,EAAE;QAC7D,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,KAAK,CAAC,SAAS,EAAE,WAAW,CAAC,CAAC;QAE5D,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QAC/C,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;IACzD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,6CAA6C,EAAE,KAAK,IAAI,EAAE;QAC3D,MAAM,IAAI,GAAG,qBAAqB,CAAC;QACnC,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAE1C,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACxD,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAChD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,+BAA+B,EAAE,GAAG,EAAE;QACvC,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;IAC3C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,mDAAmD,EAAE,GAAG,EAAE;QAC3D,MAAM,IAAI,GAAG,QAAQ,CAAC,eAAgB,CAAC,SAAS,CAAC,CAAC;QAClD,MAAM,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAChC,MAAM,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,2DAA2D,EAAE,KAAK,IAAI,EAAE;QACzE,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,KAAK,CACjC,8CAA8C,EAC9C,OAAO,CACR,CAAC;QACF,MAAM,OAAO,GAAG,QAAQ,CAAC,iBAAkB,CAAC,MAAM,CAAC,CAAC;QAEpD,MAAM,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QAChD,MAAM,CAAC,OAAO,CAAC,iBAAiB,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAC;QAC5D,MAAM,CAAC,OAAO,CAAC,kBAAkB,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;IACxD,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,iEAAiE;AACjE,gBAAgB;AAChB,iEAAiE;AAEjE,QAAQ,CAAC,eAAe,EAAE,GAAG,EAAE;IAC7B,EAAE,CAAC,6BAA6B,EAAE,GAAG,EAAE;QACrC,MAAM,CAAC,aAAa,EAAE,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QAC/C,MAAM,CAAC,aAAa,CAAC,EAAE,SAAS,EAAE,GAAG,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IACnE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oCAAoC,EAAE,KAAK,IAAI,EAAE;QAClD,MAAM,CAAC,MAAM,aAAa,EAAE,CAAC,KAAK,CAAC,EAAE,EAAE,MAAM,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;IAClE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,4DAA4D,EAAE,KAAK,IAAI,EAAE;QAC1E,mEAAmE;QACnE,MAAM,QAAQ,GAAG,aAAa,CAAC,EAAE,SAAS,EAAE,GAAG,EAAE,OAAO,EAAE,CAAC,EAAE,CAAC,CAAC;QAC/D,MAAM,IAAI,GAAG,gBAAgB,CAAC;QAC9B,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,KAAK,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;QAElD,MAAM,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC/B,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACvC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uCAAuC,EAAE,KAAK,IAAI,EAAE;QACrD,MAAM,QAAQ,GAAG,aAAa,CAAC,EAAE,SAAS,EAAE,EAAE,EAAE,OAAO,EAAE,CAAC,EAAE,CAAC,CAAC;QAC9D,MAAM,IAAI,GAAG,OAAO,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,oCAAoC;QACtE,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,KAAK,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;QAElD,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;IAC3C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qDAAqD,EAAE,KAAK,IAAI,EAAE;QACnE,MAAM,QAAQ,GAAG,aAAa,CAAC,EAAE,SAAS,EAAE,EAAE,EAAE,OAAO,EAAE,CAAC,EAAE,CAAC,CAAC;QAC9D,MAAM,IAAI,GAAG,OAAO,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;QACjC,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,KAAK,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;QAElD,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;YACtB,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACzC,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,kDAAkD,EAAE,KAAK,IAAI,EAAE;QAChE,MAAM,QAAQ,GAAG,aAAa,CAAC,EAAE,SAAS,EAAE,EAAE,EAAE,OAAO,EAAE,CAAC,EAAE,CAAC,CAAC;QAC9D,+CAA+C;QAC/C,MAAM,IAAI,GAAG,4BAA4B,GAAG,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;QAC5D,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,KAAK,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;QAElD,6DAA6D;QAC7D,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACrD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,wEAAwE,EAAE,KAAK,IAAI,EAAE;QACtF,MAAM,QAAQ,GAAG,aAAa,CAAC,EAAE,SAAS,EAAE,EAAE,EAAE,OAAO,EAAE,CAAC,EAAE,CAAC,CAAC;QAC9D,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,uCAAuC;QACrE,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,KAAK,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;QAElD,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QACzC,oEAAoE;QACpE,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACvC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC,CAAC,YAAY,CACxD,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,CACxC,CAAC;QACJ,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gCAAgC,EAAE,KAAK,IAAI,EAAE;QAC9C,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,EAAE,SAAS,EAAE,EAAE,EAAE,OAAO,EAAE,CAAC,EAAE,CAAC,CAAC,KAAK,CACrE,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,EACf,YAAY,CACb,CAAC;QACF,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,KAAK,YAAY,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACxE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,yDAAyD,EAAE,GAAG,EAAE;QACjE,MAAM,IAAI,GAAG,aAAa,EAAE,CAAC,eAAgB,CAAC,aAAa,CAAC,CAAC;QAC7D,MAAM,CAAC,OAAO,IAAI,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAC9C,MAAM,CAAC,OAAO,IAAI,CAAC,gBAAgB,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QACpD,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;IAC3D,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,wDAAwD,EAAE,GAAG,EAAE;QAChE,MAAM,OAAO,GAAG,aAAa,EAAE,CAAC,iBAAkB,CAAC,EAAE,CAAC,CAAC;QACvD,MAAM,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACrC,MAAM,CAAC,OAAO,CAAC,kBAAkB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAC7C,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,iEAAiE;AACjE,0BAA0B;AAC1B,iEAAiE;AAEjE,QAAQ,CAAC,yBAAyB,EAAE,GAAG,EAAE;IACvC,MAAM,QAAQ,GAAG,uBAAuB,EAAE,CAAC;IAE3C,EAAE,CAAC,qCAAqC,EAAE,GAAG,EAAE;QAC7C,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC;IACjD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oCAAoC,EAAE,KAAK,IAAI,EAAE;QAClD,MAAM,CAAC,MAAM,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;IACnD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gDAAgD,EAAE,KAAK,IAAI,EAAE;QAC9D,MAAM,IAAI,GAAG;YACX,gBAAgB;YAChB,2BAA2B,CAAC,MAAM,CAAC,EAAE,CAAC;YACtC,gBAAgB;YAChB,2BAA2B,CAAC,MAAM,CAAC,EAAE,CAAC;SACvC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAEb,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,KAAK,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;QAEpD,MAAM,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC/B,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QACtD,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;IACxD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,kCAAkC,EAAE,KAAK,IAAI,EAAE;QAChD,MAAM,IAAI,GAAG,iBAAiB,GAAG,eAAe,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QAC5D,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,KAAK,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;QAEpD,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,mCAAmC,EAAE,KAAK,IAAI,EAAE;QACjD,MAAM,WAAW,GAAG,uBAAuB,CAAC,EAAE,YAAY,EAAE,GAAG,EAAE,CAAC,CAAC;QACnE,MAAM,IAAI,GAAG,4BAA4B,GAAG,eAAe,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QACvE,MAAM,MAAM,GAAG,MAAM,WAAW,CAAC,KAAK,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;QAEvD,qDAAqD;QACrD,MAAM,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC/B,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IACjD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oDAAoD,EAAE,KAAK,IAAI,EAAE;QAClE,MAAM,IAAI,GAAG,mBAAmB,GAAG,eAAe,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QAC9D,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,KAAK,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;QAEpD,MAAM,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC/B,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAChD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,yEAAyE,EAAE,KAAK,IAAI,EAAE;QACvF,MAAM,IAAI,GAAG,mBAAmB,GAAG,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;QACnD,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,KAAK,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;QAEpD,MAAM,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC/B,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAC7C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,iEAAiE,EAAE,KAAK,IAAI,EAAE;QAC/E,MAAM,KAAK,GAAG,uBAAuB,CAAC,EAAE,YAAY,EAAE,GAAG,EAAE,CAAC,CAAC;QAC7D,MAAM,IAAI,GAAG,UAAU,GAAG,gBAAgB,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QACtD,MAAM,MAAM,GAAG,MAAM,KAAK,CAAC,KAAK,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;QAEjD,mCAAmC;QACnC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,SAAS,KAAK,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACvE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,yDAAyD,EAAE,GAAG,EAAE;QACjE,MAAM,IAAI,GAAG,sBAAsB,CAAC;QACpC,MAAM,IAAI,GAAG,QAAQ,CAAC,eAAgB,CAAC,IAAI,CAAC,CAAC;QAE7C,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACpC,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACxC,MAAM,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,4DAA4D,EAAE,GAAG,EAAE;QACpE,MAAM,CAAC,QAAQ,CAAC,eAAgB,CAAC,YAAY,CAAC,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAC1E,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,iEAAiE;AACjE,mBAAmB;AACnB,iEAAiE;AAEjE,QAAQ,CAAC,kBAAkB,EAAE,GAAG,EAAE;IAChC,MAAM,QAAQ,GAAG,gBAAgB,EAAE,CAAC;IAEpC,EAAE,CAAC,6BAA6B,EAAE,GAAG,EAAE;QACrC,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IACzC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oCAAoC,EAAE,KAAK,IAAI,EAAE;QAClD,MAAM,CAAC,MAAM,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;IACnD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uDAAuD,EAAE,KAAK,IAAI,EAAE;QACrE,yEAAyE;QACzE,MAAM,CAAC,GAAG,gBAAgB,CAAC,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC,CAAC;QAC7C,MAAM,IAAI,GAAG,iCAAiC,CAAC;QAC/C,MAAM,MAAM,GAAG,MAAM,CAAC,CAAC,KAAK,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;QAE5C,MAAM,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;IACjC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,0DAA0D,EAAE,KAAK,IAAI,EAAE;QACxE,MAAM,WAAW,GAAG,gBAAgB,CAAC,EAAE,QAAQ,EAAE,GAAG,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC,CAAC;QACtE,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,CACrB,EAAE,MAAM,EAAE,EAAE,EAAE,EACd,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,mBAAmB,CAAC,GAAG,CAAC,aAAa,CAChD,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACZ,MAAM,MAAM,GAAG,MAAM,WAAW,CAAC,KAAK,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;QAEtD,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QACzC,iEAAiE;QACjE,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,mBAAmB,CAAC,GAAG,CAAC,CAAC,CAAC;IAC3E,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,6BAA6B,EAAE,KAAK,IAAI,EAAE;QAC3C,MAAM,QAAQ,GAAG,gBAAgB,CAAC,EAAE,QAAQ,EAAE,EAAE,EAAE,QAAQ,EAAE,GAAG,EAAE,CAAC,CAAC;QACnE,uEAAuE;QACvE,MAAM,IAAI,GAAG,cAAc,CAAC;QAC5B,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,KAAK,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;QAEnD,MAAM,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;IACjC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,kCAAkC,EAAE,KAAK,IAAI,EAAE;QAChD,MAAM,IAAI,GAAG,uCAAuC,GAAG,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;QACvE,MAAM,MAAM,GAAG,MAAM,gBAAgB,CAAC;YACpC,QAAQ,EAAE,GAAG;YACb,QAAQ,EAAE,EAAE;SACb,CAAC,CAAC,KAAK,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;QAExB,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CACnB,MAAM,CAAC,OAAO,CAAC,CAAC,QAAQ,CAAC,cAAc,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CACxD,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,mCAAmC,EAAE,KAAK,IAAI,EAAE;QACjD,MAAM,IAAI,GAAG,SAAS,GAAG,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,GAAG,QAAQ,CAAC;QACpD,MAAM,MAAM,GAAG,MAAM,gBAAgB,CAAC;YACpC,QAAQ,EAAE,GAAG;YACb,QAAQ,EAAE,EAAE;SACb,CAAC,CAAC,KAAK,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;QAExB,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAChE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uDAAuD,EAAE,GAAG,EAAE;QAC/D,MAAM,IAAI,GAAG,QAAQ,CAAC,eAAgB,CAAC,eAAe,CAAC,CAAC;QACxD,MAAM,CAAC,OAAO,IAAI,CAAC,cAAc,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAClD,MAAM,CAAC,OAAO,IAAI,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IAChD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,6DAA6D,EAAE,KAAK,IAAI,EAAE;QAC3E,MAAM,IAAI,GAAG,uBAAuB,GAAG,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,GAAG,iBAAiB,CAAC;QAC3E,MAAM,MAAM,GAAG,MAAM,gBAAgB,CAAC,EAAE,QAAQ,EAAE,EAAE,EAAE,QAAQ,EAAE,CAAC,EAAE,CAAC,CAAC,KAAK,CACxE,IAAI,EACJ,OAAO,CACR,CAAC;QACF,MAAM,OAAO,GAAG,QAAQ,CAAC,iBAAkB,CAAC,MAAM,CAAC,CAAC;QAEpD,MAAM,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAC;QACvD,MAAM,CAAC,OAAO,CAAC,eAAe,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAC;QAC1D,MAAM,CAAC,OAAO,CAAC,iBAAiB,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAC;QAC5D,MAAM,CAAC,OAAO,CAAC,kBAAkB,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAC;IAC/D,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
package/package.json
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@vivantel/virage-strategies",
|
|
3
|
+
"version": "0.2.0",
|
|
4
|
+
"type": "module",
|
|
5
|
+
"description": "Built-in chunking strategies for @vivantel/rag-core",
|
|
6
|
+
"repository": {
|
|
7
|
+
"type": "git",
|
|
8
|
+
"url": "https://github.com/vivantel/virage",
|
|
9
|
+
"directory": "packages/virage-strategies"
|
|
10
|
+
},
|
|
11
|
+
"main": "dist/index.js",
|
|
12
|
+
"types": "dist/index.d.ts",
|
|
13
|
+
"exports": {
|
|
14
|
+
".": {
|
|
15
|
+
"import": "./dist/index.js",
|
|
16
|
+
"types": "./dist/index.d.ts"
|
|
17
|
+
}
|
|
18
|
+
},
|
|
19
|
+
"files": [
|
|
20
|
+
"dist",
|
|
21
|
+
"README.md"
|
|
22
|
+
],
|
|
23
|
+
"sideEffects": false,
|
|
24
|
+
"publishConfig": {
|
|
25
|
+
"access": "public"
|
|
26
|
+
},
|
|
27
|
+
"scripts": {
|
|
28
|
+
"build": "tsc",
|
|
29
|
+
"type-check": "tsc --noEmit",
|
|
30
|
+
"test": "vitest run",
|
|
31
|
+
"prepublishOnly": "npm run build",
|
|
32
|
+
"lint": "eslint src/",
|
|
33
|
+
"lint:fix": "eslint src/ --fix",
|
|
34
|
+
"format": "prettier --write \"src/**/*.ts\"",
|
|
35
|
+
"fix": "npm run lint:fix && npm run format"
|
|
36
|
+
},
|
|
37
|
+
"keywords": [
|
|
38
|
+
"rag",
|
|
39
|
+
"chunking",
|
|
40
|
+
"strategies",
|
|
41
|
+
"embeddings"
|
|
42
|
+
],
|
|
43
|
+
"author": "Vivantel",
|
|
44
|
+
"license": "MIT",
|
|
45
|
+
"peerDependencies": {
|
|
46
|
+
"@vivantel/virage-core": "*"
|
|
47
|
+
},
|
|
48
|
+
"devDependencies": {
|
|
49
|
+
"@vivantel/virage-core": "0.2.0",
|
|
50
|
+
"@types/node": "^25.9.1",
|
|
51
|
+
"typescript": "^6.0.3",
|
|
52
|
+
"vitest": "^4.1.8"
|
|
53
|
+
},
|
|
54
|
+
"engines": {
|
|
55
|
+
"node": ">=18.0.0"
|
|
56
|
+
}
|
|
57
|
+
}
|