@vivantel/virage-code-chunk-chunker 0.1.41 → 0.1.43
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +6 -10
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +52 -35
- package/dist/index.js.map +1 -1
- package/dist/index.test.js +77 -146
- package/dist/index.test.js.map +1 -1
- package/package.json +2 -2
package/dist/index.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import type {
|
|
2
|
-
export interface
|
|
1
|
+
import type { FileChunker } from "@vivantel/virage-core";
|
|
2
|
+
export interface CodeChunkOptions {
|
|
3
3
|
/** Maximum size of each chunk in bytes (default: 1500) */
|
|
4
4
|
maxChunkSize?: number;
|
|
5
5
|
/** How much context to include (default: "full") */
|
|
@@ -11,15 +11,11 @@ export interface CodeChunkStrategyOptions {
|
|
|
11
11
|
/** Number of lines to overlap from the previous chunk (default: 0) */
|
|
12
12
|
overlapLines?: number;
|
|
13
13
|
/**
|
|
14
|
-
*
|
|
15
|
-
* instead of raw
|
|
14
|
+
* When true, `denseText` uses the scope-contextualized form (prepends
|
|
15
|
+
* scope chain + sibling signatures) instead of raw body. Produces richer
|
|
16
|
+
* embeddings at the cost of slightly longer texts. (default: false)
|
|
16
17
|
*/
|
|
17
18
|
useContextualizedText?: boolean;
|
|
18
19
|
}
|
|
19
|
-
export declare function
|
|
20
|
-
export declare const ragPlugin: {
|
|
21
|
-
name: string;
|
|
22
|
-
type: "chunker";
|
|
23
|
-
factory: () => ChunkStrategy;
|
|
24
|
-
};
|
|
20
|
+
export declare function createChunker(options?: CodeChunkOptions): FileChunker;
|
|
25
21
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAOA,OAAO,KAAK,EAAE,WAAW,EAAS,MAAM,uBAAuB,CAAC;AAchE,MAAM,WAAW,gBAAgB;IAC/B,0DAA0D;IAC1D,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,oDAAoD;IACpD,WAAW,CAAC,EAAE,MAAM,GAAG,SAAS,GAAG,MAAM,CAAC;IAC1C,iEAAiE;IACjE,aAAa,CAAC,EAAE,MAAM,GAAG,OAAO,GAAG,YAAY,CAAC;IAChD,4DAA4D;IAC5D,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,sEAAsE;IACtE,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB;;;;OAIG;IACH,qBAAqB,CAAC,EAAE,OAAO,CAAC;CACjC;AAWD,wBAAgB,aAAa,CAAC,OAAO,GAAE,gBAAqB,GAAG,WAAW,CAuDzE"}
|
package/dist/index.js
CHANGED
|
@@ -1,13 +1,38 @@
|
|
|
1
|
+
import { readFile } from "fs/promises";
|
|
1
2
|
import { chunk as codeChunk, detectLanguage, UnsupportedLanguageError, } from "code-chunk";
|
|
2
|
-
import {
|
|
3
|
-
|
|
3
|
+
import { computeDenseTextHash, makeDenseText } from "@vivantel/virage-core";
|
|
4
|
+
import { createHash } from "crypto";
|
|
5
|
+
const VERSION = "0.1.42";
|
|
6
|
+
const SUPPORTED_EXTENSIONS = [
|
|
7
|
+
"**/*.{js,mjs,cjs,ts,tsx,jsx}",
|
|
8
|
+
"**/*.{py,rb,go,rs,java,kt,scala}",
|
|
9
|
+
"**/*.{c,cpp,cc,cxx,h,hpp}",
|
|
10
|
+
"**/*.{cs,swift,m,mm}",
|
|
11
|
+
"**/*.{php,lua,sh,bash,zsh}",
|
|
12
|
+
];
|
|
13
|
+
function optionsFingerprint(opts) {
|
|
14
|
+
const { useContextualizedText: _unused, ...sparseOpts } = opts;
|
|
15
|
+
void _unused;
|
|
16
|
+
return createHash("sha256")
|
|
17
|
+
.update(JSON.stringify(sparseOpts))
|
|
18
|
+
.digest("hex")
|
|
19
|
+
.slice(0, 16);
|
|
20
|
+
}
|
|
21
|
+
export function createChunker(options = {}) {
|
|
4
22
|
const { useContextualizedText = false, ...chunkOptions } = options;
|
|
23
|
+
const sparseId = `code-chunk-ast@${VERSION}:${optionsFingerprint(options)}`;
|
|
24
|
+
const ctxHash = `code-chunk-ast@${VERSION}:ctx:${optionsFingerprint(options)}`;
|
|
5
25
|
return {
|
|
6
26
|
name: "code-chunk-ast",
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
27
|
+
version: VERSION,
|
|
28
|
+
patterns: SUPPORTED_EXTENSIONS,
|
|
29
|
+
sparseTextId: sparseId,
|
|
30
|
+
contextTextHash: ctxHash,
|
|
31
|
+
async canProcess(filePath) {
|
|
32
|
+
return detectLanguage(filePath) !== null;
|
|
33
|
+
},
|
|
34
|
+
async chunk(filePath, commitHash) {
|
|
35
|
+
const text = await readFile(filePath, "utf-8");
|
|
11
36
|
let results;
|
|
12
37
|
try {
|
|
13
38
|
results = await codeChunk(filePath, text, chunkOptions);
|
|
@@ -18,36 +43,28 @@ export function codeChunkStrategy(options = {}) {
|
|
|
18
43
|
}
|
|
19
44
|
throw err;
|
|
20
45
|
}
|
|
21
|
-
return results.map((c, i) =>
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
};
|
|
42
|
-
},
|
|
43
|
-
getQualityMetrics(chunks) {
|
|
44
|
-
return computeChunkQualityMetrics(chunks);
|
|
46
|
+
return results.map((c, i) => {
|
|
47
|
+
const sparseText = c.text;
|
|
48
|
+
const contextText = c.contextualizedText;
|
|
49
|
+
const breadcrumb = c.context.scope.map((s) => s.name);
|
|
50
|
+
const denseText = useContextualizedText
|
|
51
|
+
? contextText
|
|
52
|
+
: makeDenseText(breadcrumb, sparseText);
|
|
53
|
+
return {
|
|
54
|
+
denseText,
|
|
55
|
+
sparseText,
|
|
56
|
+
contextText,
|
|
57
|
+
denseTextHash: computeDenseTextHash(denseText),
|
|
58
|
+
metadata: {
|
|
59
|
+
strategy: "code-chunk-ast",
|
|
60
|
+
chunkIndex: i,
|
|
61
|
+
totalChunks: c.totalChunks,
|
|
62
|
+
},
|
|
63
|
+
sourceFile: filePath,
|
|
64
|
+
commitHash,
|
|
65
|
+
};
|
|
66
|
+
});
|
|
45
67
|
},
|
|
46
68
|
};
|
|
47
69
|
}
|
|
48
|
-
export const ragPlugin = {
|
|
49
|
-
name: "code-chunk-ast",
|
|
50
|
-
type: "chunker",
|
|
51
|
-
factory: () => codeChunkStrategy(),
|
|
52
|
-
};
|
|
53
70
|
//# sourceMappingURL=index.js.map
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,KAAK,IAAI,SAAS,EAClB,cAAc,EACd,wBAAwB,GACzB,MAAM,YAAY,CAAC;
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AACvC,OAAO,EACL,KAAK,IAAI,SAAS,EAClB,cAAc,EACd,wBAAwB,GACzB,MAAM,YAAY,CAAC;AAGpB,OAAO,EAAE,oBAAoB,EAAE,aAAa,EAAE,MAAM,uBAAuB,CAAC;AAC5E,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAEpC,MAAM,OAAO,GAAG,QAAQ,CAAC;AAEzB,MAAM,oBAAoB,GAAG;IAC3B,8BAA8B;IAC9B,kCAAkC;IAClC,2BAA2B;IAC3B,sBAAsB;IACtB,4BAA4B;CAC7B,CAAC;AAqBF,SAAS,kBAAkB,CAAC,IAAsB;IAChD,MAAM,EAAE,qBAAqB,EAAE,OAAO,EAAE,GAAG,UAAU,EAAE,GAAG,IAAI,CAAC;IAC/D,KAAK,OAAO,CAAC;IACb,OAAO,UAAU,CAAC,QAAQ,CAAC;SACxB,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,UAAU,CAAC,CAAC;SAClC,MAAM,CAAC,KAAK,CAAC;SACb,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;AAClB,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,UAA4B,EAAE;IAC1D,MAAM,EAAE,qBAAqB,GAAG,KAAK,EAAE,GAAG,YAAY,EAAE,GACtD,OAA0C,CAAC;IAE7C,MAAM,QAAQ,GAAG,kBAAkB,OAAO,IAAI,kBAAkB,CAAC,OAAO,CAAC,EAAE,CAAC;IAC5E,MAAM,OAAO,GAAG,kBAAkB,OAAO,QAAQ,kBAAkB,CAAC,OAAO,CAAC,EAAE,CAAC;IAE/E,OAAO;QACL,IAAI,EAAE,gBAAgB;QACtB,OAAO,EAAE,OAAO;QAChB,QAAQ,EAAE,oBAAoB;QAC9B,YAAY,EAAE,QAAQ;QACtB,eAAe,EAAE,OAAO;QAExB,KAAK,CAAC,UAAU,CAAC,QAAgB;YAC/B,OAAO,cAAc,CAAC,QAAQ,CAAC,KAAK,IAAI,CAAC;QAC3C,CAAC;QAED,KAAK,CAAC,KAAK,CAAC,QAAgB,EAAE,UAAkB;YAC9C,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;YAE/C,IAAI,OAAoB,CAAC;YACzB,IAAI,CAAC;gBACH,OAAO,GAAG,MAAM,SAAS,CAAC,QAAQ,EAAE,IAAI,EAAE,YAAY,CAAC,CAAC;YAC1D,CAAC;YAAC,OAAO,GAAY,EAAE,CAAC;gBACtB,IAAI,GAAG,YAAY,wBAAwB,EAAE,CAAC;oBAC5C,OAAO,EAAE,CAAC;gBACZ,CAAC;gBACD,MAAM,GAAG,CAAC;YACZ,CAAC;YAED,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;gBAC1B,MAAM,UAAU,GAAG,CAAC,CAAC,IAAI,CAAC;gBAC1B,MAAM,WAAW,GAAG,CAAC,CAAC,kBAAkB,CAAC;gBACzC,MAAM,UAAU,GAAG,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;gBACtD,MAAM,SAAS,GAAG,qBAAqB;oBACrC,CAAC,CAAC,WAAW;oBACb,CAAC,CAAC,aAAa,CAAC,UAAU,EAAE,UAAU,CAAC,CAAC;gBAE1C,OAAO;oBACL,SAAS;oBACT,UAAU;oBACV,WAAW;oBACX,aAAa,EAAE,oBAAoB,CAAC,SAAS,CAAC;oBAC9C,QAAQ,EAAE;wBACR,QAAQ,EAAE,gBAAgB;wBAC1B,UAAU,EAAE,CAAC;wBACb,WAAW,EAAE,CAAC,CAAC,WAAW;qBACkB;oBAC9C,UAAU,EAAE,QAAQ;oBACpB,UAAU;iBACX,CAAC;YACJ,CAAC,CAAC,CAAC;QACL,CAAC;KACF,CAAC;AACJ,CAAC"}
|
package/dist/index.test.js
CHANGED
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
import { describe, it, expect, vi, beforeEach } from "vitest";
|
|
2
|
-
import {
|
|
2
|
+
import { createChunker } from "./index.js";
|
|
3
|
+
vi.mock("fs/promises", () => ({
|
|
4
|
+
readFile: vi.fn().mockResolvedValue("function foo() {}"),
|
|
5
|
+
}));
|
|
3
6
|
vi.mock("code-chunk", () => ({
|
|
4
7
|
chunk: vi.fn(),
|
|
5
8
|
detectLanguage: vi.fn(),
|
|
@@ -28,175 +31,103 @@ const makeMockChunk = (overrides = {}) => ({
|
|
|
28
31
|
imports: [],
|
|
29
32
|
},
|
|
30
33
|
});
|
|
31
|
-
// ───
|
|
32
|
-
describe("
|
|
33
|
-
it("
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
expect(
|
|
38
|
-
expect(
|
|
39
|
-
expect(typeof
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
const
|
|
46
|
-
expect(
|
|
47
|
-
expect(typeof s.extractMetadata).toBe("function");
|
|
48
|
-
expect(typeof s.getQualityMetrics).toBe("function");
|
|
49
|
-
});
|
|
50
|
-
it("strategy name is 'code-chunk-ast'", () => {
|
|
51
|
-
expect(codeChunkStrategy().name).toBe("code-chunk-ast");
|
|
34
|
+
// ─── Factory ─────────────────────────────────────────────────
|
|
35
|
+
describe("createChunker() factory", () => {
|
|
36
|
+
it("returns a FileChunker with required fields", () => {
|
|
37
|
+
const chunker = createChunker();
|
|
38
|
+
expect(chunker.name).toBe("code-chunk-ast");
|
|
39
|
+
expect(typeof chunker.version).toBe("string");
|
|
40
|
+
expect(Array.isArray(chunker.patterns)).toBe(true);
|
|
41
|
+
expect(typeof chunker.sparseTextId).toBe("string");
|
|
42
|
+
expect(typeof chunker.contextTextHash).toBe("string");
|
|
43
|
+
expect(typeof chunker.chunk).toBe("function");
|
|
44
|
+
expect(typeof chunker.canProcess).toBe("function");
|
|
45
|
+
});
|
|
46
|
+
it("different option sets produce different sparseTextId", () => {
|
|
47
|
+
const a = createChunker({ maxChunkSize: 500 });
|
|
48
|
+
const b = createChunker({ maxChunkSize: 1000 });
|
|
49
|
+
expect(a.sparseTextId).not.toBe(b.sparseTextId);
|
|
52
50
|
});
|
|
53
51
|
});
|
|
54
|
-
// ───
|
|
55
|
-
describe("
|
|
56
|
-
beforeEach(() =>
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
const result = await codeChunkStrategy().chunk("const x = 1;");
|
|
61
|
-
expect(result).toHaveLength(0);
|
|
62
|
-
expect(mockCodeChunk).not.toHaveBeenCalled();
|
|
63
|
-
});
|
|
64
|
-
it("returns [] when code-chunk throws UnsupportedLanguageError", async () => {
|
|
65
|
-
vi.mocked(mockCodeChunk).mockRejectedValueOnce(new UnsupportedLanguageError("file.rb"));
|
|
66
|
-
const result = await codeChunkStrategy().chunk("puts 'hello'", "file.rb");
|
|
67
|
-
expect(result).toHaveLength(0);
|
|
52
|
+
// ─── canProcess() ────────────────────────────────────────────
|
|
53
|
+
describe("canProcess()", () => {
|
|
54
|
+
beforeEach(() => vi.clearAllMocks());
|
|
55
|
+
it("returns true when detectLanguage returns a language", async () => {
|
|
56
|
+
vi.mocked(mockDetectLanguage).mockReturnValueOnce("typescript");
|
|
57
|
+
expect(await createChunker().canProcess("src/main.ts")).toBe(true);
|
|
68
58
|
});
|
|
69
|
-
it("
|
|
70
|
-
vi.mocked(
|
|
71
|
-
await
|
|
59
|
+
it("returns false when detectLanguage returns null", async () => {
|
|
60
|
+
vi.mocked(mockDetectLanguage).mockReturnValueOnce(null);
|
|
61
|
+
expect(await createChunker().canProcess("file.xyz")).toBe(false);
|
|
72
62
|
});
|
|
73
63
|
});
|
|
74
64
|
// ─── chunk() — happy path ────────────────────────────────────
|
|
75
|
-
describe("chunk() — mapping", () => {
|
|
76
|
-
beforeEach(() =>
|
|
77
|
-
|
|
78
|
-
});
|
|
79
|
-
it("maps c.text to Chunk.content by default", async () => {
|
|
65
|
+
describe("chunk() — four-artifact mapping", () => {
|
|
66
|
+
beforeEach(() => vi.clearAllMocks());
|
|
67
|
+
it("maps c.text to sparseText and contextText to contextualizedText", async () => {
|
|
80
68
|
vi.mocked(mockCodeChunk).mockResolvedValueOnce([makeMockChunk()]);
|
|
81
|
-
const
|
|
82
|
-
expect(
|
|
69
|
+
const [c] = await createChunker().chunk("f.ts", "abc123");
|
|
70
|
+
expect(c.sparseText).toBe("function foo() {}");
|
|
71
|
+
expect(c.contextText).toBe("// scope: module\nfunction foo() {}");
|
|
72
|
+
});
|
|
73
|
+
it("denseText uses scope breadcrumb + sparseText by default", async () => {
|
|
74
|
+
vi.mocked(mockCodeChunk).mockResolvedValueOnce([
|
|
75
|
+
makeMockChunk({ scope: [{ name: "MyClass", type: "class" }] }),
|
|
76
|
+
]);
|
|
77
|
+
const [c] = await createChunker().chunk("f.ts", "abc123");
|
|
78
|
+
expect(c.denseText).toContain("MyClass");
|
|
79
|
+
expect(c.denseText).toContain("function foo() {}");
|
|
83
80
|
});
|
|
84
|
-
it("
|
|
81
|
+
it("denseText equals contextualizedText when useContextualizedText is true", async () => {
|
|
85
82
|
vi.mocked(mockCodeChunk).mockResolvedValueOnce([makeMockChunk()]);
|
|
86
|
-
const
|
|
87
|
-
|
|
88
|
-
}).chunk("code", "f.ts");
|
|
89
|
-
expect(chunks[0].content).toBe("// scope: module\nfunction foo() {}");
|
|
83
|
+
const [c] = await createChunker({ useContextualizedText: true }).chunk("f.ts", "abc123");
|
|
84
|
+
expect(c.denseText).toBe("// scope: module\nfunction foo() {}");
|
|
90
85
|
});
|
|
91
|
-
it("
|
|
86
|
+
it("denseTextHash is a 16-char hex string", async () => {
|
|
92
87
|
vi.mocked(mockCodeChunk).mockResolvedValueOnce([makeMockChunk()]);
|
|
93
|
-
const
|
|
94
|
-
expect(
|
|
95
|
-
expect(chunks[0].commitHash).toBe("");
|
|
96
|
-
expect(chunks[0].metadata.strategy).toBe("code-chunk-ast");
|
|
97
|
-
expect(chunks[0].metadata.source_file).toBe("src/main.ts");
|
|
88
|
+
const [c] = await createChunker().chunk("f.ts", "abc123");
|
|
89
|
+
expect(c.denseTextHash).toMatch(/^[0-9a-f]{16}$/);
|
|
98
90
|
});
|
|
99
|
-
it("sets
|
|
100
|
-
vi.mocked(mockCodeChunk).mockResolvedValueOnce([
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
const chunks = await codeChunkStrategy().chunk("ab", "f.ts");
|
|
105
|
-
expect(chunks[0].metadata.chunk_index).toBe(0);
|
|
106
|
-
expect(chunks[1].metadata.chunk_index).toBe(1);
|
|
91
|
+
it("sets sourceFile and commitHash from arguments", async () => {
|
|
92
|
+
vi.mocked(mockCodeChunk).mockResolvedValueOnce([makeMockChunk()]);
|
|
93
|
+
const [c] = await createChunker().chunk("src/main.ts", "deadbeef");
|
|
94
|
+
expect(c.sourceFile).toBe("src/main.ts");
|
|
95
|
+
expect(c.commitHash).toBe("deadbeef");
|
|
107
96
|
});
|
|
108
|
-
it("
|
|
109
|
-
const scope = [{ name: "MyClass", type: "class" }];
|
|
110
|
-
const entities = [{ name: "myMethod", type: "method" }];
|
|
97
|
+
it("sets metadata.strategy and index fields", async () => {
|
|
111
98
|
vi.mocked(mockCodeChunk).mockResolvedValueOnce([
|
|
112
|
-
makeMockChunk({
|
|
99
|
+
makeMockChunk({ totalChunks: 3 }),
|
|
113
100
|
]);
|
|
114
|
-
const
|
|
115
|
-
|
|
116
|
-
expect(
|
|
101
|
+
const [c] = await createChunker().chunk("f.ts", "");
|
|
102
|
+
const meta = c.metadata;
|
|
103
|
+
expect(meta.strategy).toBe("code-chunk-ast");
|
|
104
|
+
expect(meta.chunkIndex).toBe(0);
|
|
105
|
+
expect(meta.totalChunks).toBe(3);
|
|
117
106
|
});
|
|
118
107
|
it("passes ChunkOptions to code-chunk (not useContextualizedText)", async () => {
|
|
119
108
|
vi.mocked(mockCodeChunk).mockResolvedValueOnce([]);
|
|
120
|
-
await
|
|
109
|
+
await createChunker({
|
|
121
110
|
maxChunkSize: 500,
|
|
122
111
|
filterImports: true,
|
|
123
112
|
useContextualizedText: true,
|
|
124
|
-
}).chunk("
|
|
125
|
-
expect(mockCodeChunk).toHaveBeenCalledWith("f.ts",
|
|
126
|
-
expect(mockCodeChunk).toHaveBeenCalledWith("f.ts",
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
vi.mocked(mockCodeChunk).mockResolvedValueOnce([
|
|
130
|
-
makeMockChunk({ totalChunks: 3 }),
|
|
131
|
-
]);
|
|
132
|
-
const chunks = await codeChunkStrategy().chunk("code", "f.ts");
|
|
133
|
-
expect(chunks[0].metadata.total_chunks).toBe(3);
|
|
113
|
+
}).chunk("f.ts", "");
|
|
114
|
+
expect(mockCodeChunk).toHaveBeenCalledWith("f.ts", expect.any(String), expect.objectContaining({ maxChunkSize: 500, filterImports: true }));
|
|
115
|
+
expect(mockCodeChunk).toHaveBeenCalledWith("f.ts", expect.any(String), expect.not.objectContaining({
|
|
116
|
+
useContextualizedText: expect.anything(),
|
|
117
|
+
}));
|
|
134
118
|
});
|
|
135
119
|
});
|
|
136
|
-
// ───
|
|
137
|
-
describe("
|
|
138
|
-
beforeEach(() =>
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
const meta = codeChunkStrategy().extractMetadata("code", "src/main.ts");
|
|
144
|
-
expect(meta.supported).toBe(true);
|
|
145
|
-
expect(meta.language).toBe("typescript");
|
|
146
|
-
expect(meta.strategy).toBe("code-chunk-ast");
|
|
147
|
-
});
|
|
148
|
-
it("returns supported: false and language: 'unknown' for unsupported extensions", () => {
|
|
149
|
-
vi.mocked(mockDetectLanguage).mockReturnValueOnce(null);
|
|
150
|
-
const meta = codeChunkStrategy().extractMetadata("code", "file.xyz");
|
|
151
|
-
expect(meta.supported).toBe(false);
|
|
152
|
-
expect(meta.language).toBe("unknown");
|
|
153
|
-
});
|
|
154
|
-
it("returns supported: false when filePath is undefined", () => {
|
|
155
|
-
const meta = codeChunkStrategy().extractMetadata("code");
|
|
156
|
-
expect(meta.supported).toBe(false);
|
|
157
|
-
expect(mockDetectLanguage).not.toHaveBeenCalled();
|
|
158
|
-
});
|
|
159
|
-
});
|
|
160
|
-
// ─── getQualityMetrics() ─────────────────────────────────────
|
|
161
|
-
describe("getQualityMetrics()", () => {
|
|
162
|
-
it("returns zero metrics for empty chunk array", () => {
|
|
163
|
-
const metrics = codeChunkStrategy().getQualityMetrics([]);
|
|
164
|
-
expect(metrics.avgChunkSize).toBe(0);
|
|
165
|
-
expect(metrics.stdDevChunkSize).toBe(0);
|
|
166
|
-
expect(metrics.semanticCoherence).toBe(0);
|
|
167
|
-
expect(metrics.informationDensity).toBe(0);
|
|
168
|
-
});
|
|
169
|
-
it("computes non-zero metrics from real chunk content", () => {
|
|
170
|
-
const chunks = [
|
|
171
|
-
{
|
|
172
|
-
content: "function foo() { return 42; }",
|
|
173
|
-
metadata: {},
|
|
174
|
-
sourceFile: "f.ts",
|
|
175
|
-
commitHash: "",
|
|
176
|
-
},
|
|
177
|
-
{
|
|
178
|
-
content: "function bar(x: number) { return x * 2; }",
|
|
179
|
-
metadata: {},
|
|
180
|
-
sourceFile: "f.ts",
|
|
181
|
-
commitHash: "",
|
|
182
|
-
},
|
|
183
|
-
];
|
|
184
|
-
const metrics = codeChunkStrategy().getQualityMetrics(chunks);
|
|
185
|
-
expect(metrics.avgChunkSize).toBeGreaterThan(0);
|
|
186
|
-
expect(metrics.informationDensity).toBeGreaterThan(0);
|
|
187
|
-
expect(metrics.stdDevChunkSize).toBeGreaterThanOrEqual(0);
|
|
188
|
-
expect(metrics.semanticCoherence).toBeGreaterThanOrEqual(0);
|
|
189
|
-
});
|
|
190
|
-
});
|
|
191
|
-
// ─── ragPlugin ───────────────────────────────────────────────
|
|
192
|
-
describe("ragPlugin", () => {
|
|
193
|
-
it("factory() returns a strategy with correct name and chunk function", () => {
|
|
194
|
-
const strategy = ragPlugin.factory();
|
|
195
|
-
expect(strategy.name).toBe("code-chunk-ast");
|
|
196
|
-
expect(typeof strategy.chunk).toBe("function");
|
|
120
|
+
// ─── chunk() — error handling ────────────────────────────────
|
|
121
|
+
describe("chunk() — edge cases", () => {
|
|
122
|
+
beforeEach(() => vi.clearAllMocks());
|
|
123
|
+
it("returns [] when code-chunk throws UnsupportedLanguageError", async () => {
|
|
124
|
+
vi.mocked(mockCodeChunk).mockRejectedValueOnce(new UnsupportedLanguageError("file.rb"));
|
|
125
|
+
const result = await createChunker().chunk("file.rb", "");
|
|
126
|
+
expect(result).toHaveLength(0);
|
|
197
127
|
});
|
|
198
|
-
it("
|
|
199
|
-
|
|
128
|
+
it("propagates non-UnsupportedLanguageError errors", async () => {
|
|
129
|
+
vi.mocked(mockCodeChunk).mockRejectedValueOnce(new Error("parse failure"));
|
|
130
|
+
await expect(createChunker().chunk("file.ts", "")).rejects.toThrow("parse failure");
|
|
200
131
|
});
|
|
201
132
|
});
|
|
202
133
|
//# sourceMappingURL=index.test.js.map
|
package/dist/index.test.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.test.js","sourceRoot":"","sources":["../src/index.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAE9D,OAAO,EAAE,
|
|
1
|
+
{"version":3,"file":"index.test.js","sourceRoot":"","sources":["../src/index.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAE9D,OAAO,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAE3C,EAAE,CAAC,IAAI,CAAC,aAAa,EAAE,GAAG,EAAE,CAAC,CAAC;IAC5B,QAAQ,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC,mBAAmB,CAAC;CACzD,CAAC,CAAC,CAAC;AAEJ,EAAE,CAAC,IAAI,CAAC,YAAY,EAAE,GAAG,EAAE,CAAC,CAAC;IAC3B,KAAK,EAAE,EAAE,CAAC,EAAE,EAAE;IACd,cAAc,EAAE,EAAE,CAAC,EAAE,EAAE;IACvB,wBAAwB,EAAE,MAAM,wBAAyB,SAAQ,KAAK;QAC3D,IAAI,GAAG,0BAA0B,CAAC;QAC3C,YAAY,QAAgB;YAC1B,KAAK,CAAC,kCAAkC,QAAQ,EAAE,CAAC,CAAC;YACpD,IAAI,CAAC,IAAI,GAAG,0BAA0B,CAAC;QACzC,CAAC;KACF;CACF,CAAC,CAAC,CAAC;AAEJ,OAAO,EACL,KAAK,IAAI,aAAa,EACtB,cAAc,IAAI,kBAAkB,EACpC,wBAAwB,GACzB,MAAM,YAAY,CAAC;AAEpB,MAAM,aAAa,GAAG,CACpB,YAMK,EAAE,EACP,EAAE,CAAC,CAAC;IACJ,IAAI,EAAE,SAAS,CAAC,IAAI,IAAI,mBAAmB;IAC3C,kBAAkB,EAChB,SAAS,CAAC,kBAAkB,IAAI,qCAAqC;IACvE,SAAS,EAAE,EAAE,KAAK,EAAE,CAAC,EAAE,GAAG,EAAE,EAAE,EAAE;IAChC,SAAS,EAAE,EAAE,KAAK,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE;IAC/B,KAAK,EAAE,CAAC;IACR,WAAW,EAAE,SAAS,CAAC,WAAW,IAAI,CAAC;IACvC,OAAO,EAAE;QACP,KAAK,EAAE,SAAS,CAAC,KAAK,IAAI,CAAC,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAE,QAAiB,EAAE,CAAC;QACvE,QAAQ,EAAE,SAAS,CAAC,QAAQ,IAAI;YAC9B,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,UAAmB,EAAE;SAC3C;QACD,QAAQ,EAAE,EAAE;QACZ,OAAO,EAAE,EAAE;KACZ;CACF,CAAC,CAAC;AAEH,gEAAgE;AAEhE,QAAQ,CAAC,yBAAyB,EAAE,GAAG,EAAE;IACvC,EAAE,CAAC,4CAA4C,EAAE,GAAG,EAAE;QACpD,MAAM,OAAO,GAAG,aAAa,EAAE,CAAC;QAChC,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;QAC5C,MAAM,CAAC,OAAO,OAAO,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAC9C,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACnD,MAAM,CAAC,OAAO,OAAO,CAAC,YAAY,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QACnD,MAAM,CAAC,OAAO,OAAO,CAAC,eAAe,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QACtD,MAAM,CAAC,OAAO,OAAO,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QAC9C,MAAM,CAAC,OAAO,OAAO,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IACrD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,sDAAsD,EAAE,GAAG,EAAE;QAC9D,MAAM,CAAC,GAAG,aAAa,CAAC,EAAE,YAAY,EAAE,GAAG,EAAE,CAAC,CAAC;QAC/C,MAAM,CAAC,GAAG,aAAa,CAAC,EAAE,YAAY,EAAE,IAAI,EAAE,CAAC,CAAC;QAChD,MAAM,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC;IAClD,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,gEAAgE;AAEhE,QAAQ,CAAC,cAAc,EAAE,GAAG,EAAE;IAC5B,UAAU,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC,aAAa,EAAE,CAAC,CAAC;IAErC,EAAE,CAAC,qDAAqD,EAAE,KAAK,IAAI,EAAE;QACnE,EAAE,CAAC,MAAM,CAAC,kBAAkB,CAAC,CAAC,mBAAmB,CAAC,YAAY,CAAC,CAAC;QAChE,MAAM,CAAC,MAAM,aAAa,EAAE,CAAC,UAAW,CAAC,aAAa,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACtE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gDAAgD,EAAE,KAAK,IAAI,EAAE;QAC9D,EAAE,CAAC,MAAM,CAAC,kBAAkB,CAAC,CAAC,mBAAmB,CAAC,IAAI,CAAC,CAAC;QACxD,MAAM,CAAC,MAAM,aAAa,EAAE,CAAC,UAAW,CAAC,UAAU,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACpE,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,gEAAgE;AAEhE,QAAQ,CAAC,iCAAiC,EAAE,GAAG,EAAE;IAC/C,UAAU,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC,aAAa,EAAE,CAAC,CAAC;IAErC,EAAE,CAAC,iEAAiE,EAAE,KAAK,IAAI,EAAE;QAC/E,EAAE,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,qBAAqB,CAAC,CAAC,aAAa,EAAE,CAAC,CAAC,CAAC;QAClE,MAAM,CAAC,CAAC,CAAC,GAAG,MAAM,aAAa,EAAE,CAAC,KAAK,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC;QAC1D,MAAM,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC;QAC/C,MAAM,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,qCAAqC,CAAC,CAAC;IACpE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,yDAAyD,EAAE,KAAK,IAAI,EAAE;QACvE,EAAE,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,qBAAqB,CAAC;YAC7C,aAAa,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC,EAAE,CAAC;SAC/D,CAAC,CAAC;QACH,MAAM,CAAC,CAAC,CAAC,GAAG,MAAM,aAAa,EAAE,CAAC,KAAK,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC;QAC1D,MAAM,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,SAAS,CAAC,SAAS,CAAC,CAAC;QACzC,MAAM,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,SAAS,CAAC,mBAAmB,CAAC,CAAC;IACrD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,wEAAwE,EAAE,KAAK,IAAI,EAAE;QACtF,EAAE,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,qBAAqB,CAAC,CAAC,aAAa,EAAE,CAAC,CAAC,CAAC;QAClE,MAAM,CAAC,CAAC,CAAC,GAAG,MAAM,aAAa,CAAC,EAAE,qBAAqB,EAAE,IAAI,EAAE,CAAC,CAAC,KAAK,CACpE,MAAM,EACN,QAAQ,CACT,CAAC;QACF,MAAM,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,qCAAqC,CAAC,CAAC;IAClE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uCAAuC,EAAE,KAAK,IAAI,EAAE;QACrD,EAAE,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,qBAAqB,CAAC,CAAC,aAAa,EAAE,CAAC,CAAC,CAAC;QAClE,MAAM,CAAC,CAAC,CAAC,GAAG,MAAM,aAAa,EAAE,CAAC,KAAK,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC;QAC1D,MAAM,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,OAAO,CAAC,gBAAgB,CAAC,CAAC;IACpD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,+CAA+C,EAAE,KAAK,IAAI,EAAE;QAC7D,EAAE,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,qBAAqB,CAAC,CAAC,aAAa,EAAE,CAAC,CAAC,CAAC;QAClE,MAAM,CAAC,CAAC,CAAC,GAAG,MAAM,aAAa,EAAE,CAAC,KAAK,CAAC,aAAa,EAAE,UAAU,CAAC,CAAC;QACnE,MAAM,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QACzC,MAAM,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IACxC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,yCAAyC,EAAE,KAAK,IAAI,EAAE;QACvD,EAAE,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,qBAAqB,CAAC;YAC7C,aAAa,CAAC,EAAE,WAAW,EAAE,CAAC,EAAE,CAAC;SAClC,CAAC,CAAC;QACH,MAAM,CAAC,CAAC,CAAC,GAAG,MAAM,aAAa,EAAE,CAAC,KAAK,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;QACpD,MAAM,IAAI,GAAG,CAAC,CAAC,QAA8C,CAAC;QAC9D,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;QAC7C,MAAM,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAChC,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACnC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,+DAA+D,EAAE,KAAK,IAAI,EAAE;QAC7E,EAAE,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,qBAAqB,CAAC,EAAE,CAAC,CAAC;QACnD,MAAM,aAAa,CAAC;YAClB,YAAY,EAAE,GAAG;YACjB,aAAa,EAAE,IAAI;YACnB,qBAAqB,EAAE,IAAI;SAC5B,CAAC,CAAC,KAAK,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;QACrB,MAAM,CAAC,aAAa,CAAC,CAAC,oBAAoB,CACxC,MAAM,EACN,MAAM,CAAC,GAAG,CAAC,MAAM,CAAC,EAClB,MAAM,CAAC,gBAAgB,CAAC,EAAE,YAAY,EAAE,GAAG,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,CACpE,CAAC;QACF,MAAM,CAAC,aAAa,CAAC,CAAC,oBAAoB,CACxC,MAAM,EACN,MAAM,CAAC,GAAG,CAAC,MAAM,CAAC,EAClB,MAAM,CAAC,GAAG,CAAC,gBAAgB,CAAC;YAC1B,qBAAqB,EAAE,MAAM,CAAC,QAAQ,EAAE;SACzC,CAAC,CACH,CAAC;IACJ,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,gEAAgE;AAEhE,QAAQ,CAAC,sBAAsB,EAAE,GAAG,EAAE;IACpC,UAAU,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC,aAAa,EAAE,CAAC,CAAC;IAErC,EAAE,CAAC,4DAA4D,EAAE,KAAK,IAAI,EAAE;QAC1E,EAAE,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,qBAAqB,CAC5C,IAAI,wBAAwB,CAAC,SAAS,CAAC,CACxC,CAAC;QACF,MAAM,MAAM,GAAG,MAAM,aAAa,EAAE,CAAC,KAAK,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;QAC1D,MAAM,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;IACjC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gDAAgD,EAAE,KAAK,IAAI,EAAE;QAC9D,EAAE,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,qBAAqB,CAAC,IAAI,KAAK,CAAC,eAAe,CAAC,CAAC,CAAC;QAC3E,MAAM,MAAM,CAAC,aAAa,EAAE,CAAC,KAAK,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAChE,eAAe,CAChB,CAAC;IACJ,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@vivantel/virage-code-chunk-chunker",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.43",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "AST-aware code chunking strategy for @vivantel/virage-core, powered by code-chunk",
|
|
6
6
|
"repository": {
|
|
@@ -52,7 +52,7 @@
|
|
|
52
52
|
"@vivantel/virage-core": "*"
|
|
53
53
|
},
|
|
54
54
|
"devDependencies": {
|
|
55
|
-
"@vivantel/virage-core": "0.2.
|
|
55
|
+
"@vivantel/virage-core": "0.2.58",
|
|
56
56
|
"@types/node": "^25.9.3",
|
|
57
57
|
"typescript": "^6.0.3",
|
|
58
58
|
"vitest": "^4.1.9"
|