@vivantel/virage-code-chunk-chunker 0.1.41 → 0.1.45
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +6 -10
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +52 -35
- package/dist/index.js.map +1 -1
- package/dist/index.test.js +76 -146
- package/dist/index.test.js.map +1 -1
- package/package.json +2 -2
package/dist/index.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import type {
|
|
2
|
-
export interface
|
|
1
|
+
import type { FileChunker } from "@vivantel/virage-core";
|
|
2
|
+
export interface CodeChunkOptions {
|
|
3
3
|
/** Maximum size of each chunk in bytes (default: 1500) */
|
|
4
4
|
maxChunkSize?: number;
|
|
5
5
|
/** How much context to include (default: "full") */
|
|
@@ -11,15 +11,11 @@ export interface CodeChunkStrategyOptions {
|
|
|
11
11
|
/** Number of lines to overlap from the previous chunk (default: 0) */
|
|
12
12
|
overlapLines?: number;
|
|
13
13
|
/**
|
|
14
|
-
*
|
|
15
|
-
* instead of raw
|
|
14
|
+
* When true, `denseText` uses the scope-contextualized form (prepends
|
|
15
|
+
* scope chain + sibling signatures) instead of raw body. Produces richer
|
|
16
|
+
* embeddings at the cost of slightly longer texts. (default: false)
|
|
16
17
|
*/
|
|
17
18
|
useContextualizedText?: boolean;
|
|
18
19
|
}
|
|
19
|
-
export declare function
|
|
20
|
-
export declare const ragPlugin: {
|
|
21
|
-
name: string;
|
|
22
|
-
type: "chunker";
|
|
23
|
-
factory: () => ChunkStrategy;
|
|
24
|
-
};
|
|
20
|
+
export declare function createChunker(options?: CodeChunkOptions): FileChunker;
|
|
25
21
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAOA,OAAO,KAAK,EAAE,WAAW,EAAS,MAAM,uBAAuB,CAAC;AAchE,MAAM,WAAW,gBAAgB;IAC/B,0DAA0D;IAC1D,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,oDAAoD;IACpD,WAAW,CAAC,EAAE,MAAM,GAAG,SAAS,GAAG,MAAM,CAAC;IAC1C,iEAAiE;IACjE,aAAa,CAAC,EAAE,MAAM,GAAG,OAAO,GAAG,YAAY,CAAC;IAChD,4DAA4D;IAC5D,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,sEAAsE;IACtE,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB;;;;OAIG;IACH,qBAAqB,CAAC,EAAE,OAAO,CAAC;CACjC;AAWD,wBAAgB,aAAa,CAAC,OAAO,GAAE,gBAAqB,GAAG,WAAW,CAuDzE"}
|
package/dist/index.js
CHANGED
|
@@ -1,13 +1,38 @@
|
|
|
1
|
+
import { readFile } from "fs/promises";
|
|
1
2
|
import { chunk as codeChunk, detectLanguage, UnsupportedLanguageError, } from "code-chunk";
|
|
2
|
-
import {
|
|
3
|
-
|
|
3
|
+
import { computeDenseTextHash, makeDenseText } from "@vivantel/virage-core";
|
|
4
|
+
import { createHash } from "crypto";
|
|
5
|
+
const VERSION = "0.1.42";
|
|
6
|
+
const SUPPORTED_EXTENSIONS = [
|
|
7
|
+
"**/*.{js,mjs,cjs,ts,tsx,jsx}",
|
|
8
|
+
"**/*.{py,rb,go,rs,java,kt,scala}",
|
|
9
|
+
"**/*.{c,cpp,cc,cxx,h,hpp}",
|
|
10
|
+
"**/*.{cs,swift,m,mm}",
|
|
11
|
+
"**/*.{php,lua,sh,bash,zsh}",
|
|
12
|
+
];
|
|
13
|
+
function optionsFingerprint(opts) {
|
|
14
|
+
const { useContextualizedText: _unused, ...sparseOpts } = opts;
|
|
15
|
+
void _unused;
|
|
16
|
+
return createHash("sha256")
|
|
17
|
+
.update(JSON.stringify(sparseOpts))
|
|
18
|
+
.digest("hex")
|
|
19
|
+
.slice(0, 16);
|
|
20
|
+
}
|
|
21
|
+
export function createChunker(options = {}) {
|
|
4
22
|
const { useContextualizedText = false, ...chunkOptions } = options;
|
|
23
|
+
const sparseId = `code-chunk-ast@${VERSION}:${optionsFingerprint(options)}`;
|
|
24
|
+
const ctxHash = `code-chunk-ast@${VERSION}:ctx:${optionsFingerprint(options)}`;
|
|
5
25
|
return {
|
|
6
26
|
name: "code-chunk-ast",
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
27
|
+
version: VERSION,
|
|
28
|
+
patterns: SUPPORTED_EXTENSIONS,
|
|
29
|
+
sparseTextGeneratorId: sparseId,
|
|
30
|
+
metadataGeneratorId: ctxHash,
|
|
31
|
+
async canProcess(filePath) {
|
|
32
|
+
return detectLanguage(filePath) !== null;
|
|
33
|
+
},
|
|
34
|
+
async chunk(filePath, commitHash) {
|
|
35
|
+
const text = await readFile(filePath, "utf-8");
|
|
11
36
|
let results;
|
|
12
37
|
try {
|
|
13
38
|
results = await codeChunk(filePath, text, chunkOptions);
|
|
@@ -18,36 +43,28 @@ export function codeChunkStrategy(options = {}) {
|
|
|
18
43
|
}
|
|
19
44
|
throw err;
|
|
20
45
|
}
|
|
21
|
-
return results.map((c, i) =>
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
};
|
|
42
|
-
},
|
|
43
|
-
getQualityMetrics(chunks) {
|
|
44
|
-
return computeChunkQualityMetrics(chunks);
|
|
46
|
+
return results.map((c, i) => {
|
|
47
|
+
const sparseText = c.text;
|
|
48
|
+
const breadcrumb = c.context.scope.map((s) => s.name);
|
|
49
|
+
const denseText = useContextualizedText
|
|
50
|
+
? c.contextualizedText
|
|
51
|
+
: makeDenseText(breadcrumb, sparseText);
|
|
52
|
+
return {
|
|
53
|
+
denseText,
|
|
54
|
+
sparseText,
|
|
55
|
+
denseTextHash: computeDenseTextHash(denseText),
|
|
56
|
+
sparseTextGeneratorId: sparseId,
|
|
57
|
+
metadataGeneratorId: ctxHash,
|
|
58
|
+
metadata: {
|
|
59
|
+
strategy: "code-chunk-ast",
|
|
60
|
+
chunkIndex: i,
|
|
61
|
+
totalChunks: c.totalChunks,
|
|
62
|
+
},
|
|
63
|
+
sourceFile: filePath,
|
|
64
|
+
commitHash,
|
|
65
|
+
};
|
|
66
|
+
});
|
|
45
67
|
},
|
|
46
68
|
};
|
|
47
69
|
}
|
|
48
|
-
export const ragPlugin = {
|
|
49
|
-
name: "code-chunk-ast",
|
|
50
|
-
type: "chunker",
|
|
51
|
-
factory: () => codeChunkStrategy(),
|
|
52
|
-
};
|
|
53
70
|
//# sourceMappingURL=index.js.map
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,KAAK,IAAI,SAAS,EAClB,cAAc,EACd,wBAAwB,GACzB,MAAM,YAAY,CAAC;
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AACvC,OAAO,EACL,KAAK,IAAI,SAAS,EAClB,cAAc,EACd,wBAAwB,GACzB,MAAM,YAAY,CAAC;AAGpB,OAAO,EAAE,oBAAoB,EAAE,aAAa,EAAE,MAAM,uBAAuB,CAAC;AAC5E,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAEpC,MAAM,OAAO,GAAG,QAAQ,CAAC;AAEzB,MAAM,oBAAoB,GAAG;IAC3B,8BAA8B;IAC9B,kCAAkC;IAClC,2BAA2B;IAC3B,sBAAsB;IACtB,4BAA4B;CAC7B,CAAC;AAqBF,SAAS,kBAAkB,CAAC,IAAsB;IAChD,MAAM,EAAE,qBAAqB,EAAE,OAAO,EAAE,GAAG,UAAU,EAAE,GAAG,IAAI,CAAC;IAC/D,KAAK,OAAO,CAAC;IACb,OAAO,UAAU,CAAC,QAAQ,CAAC;SACxB,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,UAAU,CAAC,CAAC;SAClC,MAAM,CAAC,KAAK,CAAC;SACb,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;AAClB,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,UAA4B,EAAE;IAC1D,MAAM,EAAE,qBAAqB,GAAG,KAAK,EAAE,GAAG,YAAY,EAAE,GACtD,OAA0C,CAAC;IAE7C,MAAM,QAAQ,GAAG,kBAAkB,OAAO,IAAI,kBAAkB,CAAC,OAAO,CAAC,EAAE,CAAC;IAC5E,MAAM,OAAO,GAAG,kBAAkB,OAAO,QAAQ,kBAAkB,CAAC,OAAO,CAAC,EAAE,CAAC;IAE/E,OAAO;QACL,IAAI,EAAE,gBAAgB;QACtB,OAAO,EAAE,OAAO;QAChB,QAAQ,EAAE,oBAAoB;QAC9B,qBAAqB,EAAE,QAAQ;QAC/B,mBAAmB,EAAE,OAAO;QAE5B,KAAK,CAAC,UAAU,CAAC,QAAgB;YAC/B,OAAO,cAAc,CAAC,QAAQ,CAAC,KAAK,IAAI,CAAC;QAC3C,CAAC;QAED,KAAK,CAAC,KAAK,CAAC,QAAgB,EAAE,UAAkB;YAC9C,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;YAE/C,IAAI,OAAoB,CAAC;YACzB,IAAI,CAAC;gBACH,OAAO,GAAG,MAAM,SAAS,CAAC,QAAQ,EAAE,IAAI,EAAE,YAAY,CAAC,CAAC;YAC1D,CAAC;YAAC,OAAO,GAAY,EAAE,CAAC;gBACtB,IAAI,GAAG,YAAY,wBAAwB,EAAE,CAAC;oBAC5C,OAAO,EAAE,CAAC;gBACZ,CAAC;gBACD,MAAM,GAAG,CAAC;YACZ,CAAC;YAED,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;gBAC1B,MAAM,UAAU,GAAG,CAAC,CAAC,IAAI,CAAC;gBAC1B,MAAM,UAAU,GAAG,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;gBACtD,MAAM,SAAS,GAAG,qBAAqB;oBACrC,CAAC,CAAC,CAAC,CAAC,kBAAkB;oBACtB,CAAC,CAAC,aAAa,CAAC,UAAU,EAAE,UAAU,CAAC,CAAC;gBAE1C,OAAO;oBACL,SAAS;oBACT,UAAU;oBACV,aAAa,EAAE,oBAAoB,CAAC,SAAS,CAAC;oBAC9C,qBAAqB,EAAE,QAAQ;oBAC/B,mBAAmB,EAAE,OAAO;oBAC5B,QAAQ,EAAE;wBACR,QAAQ,EAAE,gBAAgB;wBAC1B,UAAU,EAAE,CAAC;wBACb,WAAW,EAAE,CAAC,CAAC,WAAW;qBACkB;oBAC9C,UAAU,EAAE,QAAQ;oBACpB,UAAU;iBACX,CAAC;YACJ,CAAC,CAAC,CAAC;QACL,CAAC;KACF,CAAC;AACJ,CAAC"}
|
package/dist/index.test.js
CHANGED
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
import { describe, it, expect, vi, beforeEach } from "vitest";
|
|
2
|
-
import {
|
|
2
|
+
import { createChunker } from "./index.js";
|
|
3
|
+
vi.mock("fs/promises", () => ({
|
|
4
|
+
readFile: vi.fn().mockResolvedValue("function foo() {}"),
|
|
5
|
+
}));
|
|
3
6
|
vi.mock("code-chunk", () => ({
|
|
4
7
|
chunk: vi.fn(),
|
|
5
8
|
detectLanguage: vi.fn(),
|
|
@@ -28,175 +31,102 @@ const makeMockChunk = (overrides = {}) => ({
|
|
|
28
31
|
imports: [],
|
|
29
32
|
},
|
|
30
33
|
});
|
|
31
|
-
// ───
|
|
32
|
-
describe("
|
|
33
|
-
it("
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
expect(
|
|
38
|
-
expect(
|
|
39
|
-
expect(typeof
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
const
|
|
46
|
-
expect(
|
|
47
|
-
expect(typeof s.extractMetadata).toBe("function");
|
|
48
|
-
expect(typeof s.getQualityMetrics).toBe("function");
|
|
49
|
-
});
|
|
50
|
-
it("strategy name is 'code-chunk-ast'", () => {
|
|
51
|
-
expect(codeChunkStrategy().name).toBe("code-chunk-ast");
|
|
34
|
+
// ─── Factory ─────────────────────────────────────────────────
|
|
35
|
+
describe("createChunker() factory", () => {
|
|
36
|
+
it("returns a FileChunker with required fields", () => {
|
|
37
|
+
const chunker = createChunker();
|
|
38
|
+
expect(chunker.name).toBe("code-chunk-ast");
|
|
39
|
+
expect(typeof chunker.version).toBe("string");
|
|
40
|
+
expect(Array.isArray(chunker.patterns)).toBe(true);
|
|
41
|
+
expect(typeof chunker.sparseTextGeneratorId).toBe("string");
|
|
42
|
+
expect(typeof chunker.metadataGeneratorId).toBe("string");
|
|
43
|
+
expect(typeof chunker.chunk).toBe("function");
|
|
44
|
+
expect(typeof chunker.canProcess).toBe("function");
|
|
45
|
+
});
|
|
46
|
+
it("different option sets produce different sparseTextGeneratorId", () => {
|
|
47
|
+
const a = createChunker({ maxChunkSize: 500 });
|
|
48
|
+
const b = createChunker({ maxChunkSize: 1000 });
|
|
49
|
+
expect(a.sparseTextGeneratorId).not.toBe(b.sparseTextGeneratorId);
|
|
52
50
|
});
|
|
53
51
|
});
|
|
54
|
-
// ───
|
|
55
|
-
describe("
|
|
56
|
-
beforeEach(() =>
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
const result = await codeChunkStrategy().chunk("const x = 1;");
|
|
61
|
-
expect(result).toHaveLength(0);
|
|
62
|
-
expect(mockCodeChunk).not.toHaveBeenCalled();
|
|
63
|
-
});
|
|
64
|
-
it("returns [] when code-chunk throws UnsupportedLanguageError", async () => {
|
|
65
|
-
vi.mocked(mockCodeChunk).mockRejectedValueOnce(new UnsupportedLanguageError("file.rb"));
|
|
66
|
-
const result = await codeChunkStrategy().chunk("puts 'hello'", "file.rb");
|
|
67
|
-
expect(result).toHaveLength(0);
|
|
52
|
+
// ─── canProcess() ────────────────────────────────────────────
|
|
53
|
+
describe("canProcess()", () => {
|
|
54
|
+
beforeEach(() => vi.clearAllMocks());
|
|
55
|
+
it("returns true when detectLanguage returns a language", async () => {
|
|
56
|
+
vi.mocked(mockDetectLanguage).mockReturnValueOnce("typescript");
|
|
57
|
+
expect(await createChunker().canProcess("src/main.ts")).toBe(true);
|
|
68
58
|
});
|
|
69
|
-
it("
|
|
70
|
-
vi.mocked(
|
|
71
|
-
await
|
|
59
|
+
it("returns false when detectLanguage returns null", async () => {
|
|
60
|
+
vi.mocked(mockDetectLanguage).mockReturnValueOnce(null);
|
|
61
|
+
expect(await createChunker().canProcess("file.xyz")).toBe(false);
|
|
72
62
|
});
|
|
73
63
|
});
|
|
74
64
|
// ─── chunk() — happy path ────────────────────────────────────
|
|
75
|
-
describe("chunk() — mapping", () => {
|
|
76
|
-
beforeEach(() =>
|
|
77
|
-
|
|
78
|
-
});
|
|
79
|
-
it("maps c.text to Chunk.content by default", async () => {
|
|
65
|
+
describe("chunk() — four-artifact mapping", () => {
|
|
66
|
+
beforeEach(() => vi.clearAllMocks());
|
|
67
|
+
it("maps c.text to sparseText", async () => {
|
|
80
68
|
vi.mocked(mockCodeChunk).mockResolvedValueOnce([makeMockChunk()]);
|
|
81
|
-
const
|
|
82
|
-
expect(
|
|
69
|
+
const [c] = await createChunker().chunk("f.ts", "abc123");
|
|
70
|
+
expect(c.sparseText).toBe("function foo() {}");
|
|
71
|
+
});
|
|
72
|
+
it("denseText uses scope breadcrumb + sparseText by default", async () => {
|
|
73
|
+
vi.mocked(mockCodeChunk).mockResolvedValueOnce([
|
|
74
|
+
makeMockChunk({ scope: [{ name: "MyClass", type: "class" }] }),
|
|
75
|
+
]);
|
|
76
|
+
const [c] = await createChunker().chunk("f.ts", "abc123");
|
|
77
|
+
expect(c.denseText).toContain("MyClass");
|
|
78
|
+
expect(c.denseText).toContain("function foo() {}");
|
|
83
79
|
});
|
|
84
|
-
it("
|
|
80
|
+
it("denseText equals contextualizedText when useContextualizedText is true", async () => {
|
|
85
81
|
vi.mocked(mockCodeChunk).mockResolvedValueOnce([makeMockChunk()]);
|
|
86
|
-
const
|
|
87
|
-
|
|
88
|
-
}).chunk("code", "f.ts");
|
|
89
|
-
expect(chunks[0].content).toBe("// scope: module\nfunction foo() {}");
|
|
82
|
+
const [c] = await createChunker({ useContextualizedText: true }).chunk("f.ts", "abc123");
|
|
83
|
+
expect(c.denseText).toBe("// scope: module\nfunction foo() {}");
|
|
90
84
|
});
|
|
91
|
-
it("
|
|
85
|
+
it("denseTextHash is a 16-char hex string", async () => {
|
|
92
86
|
vi.mocked(mockCodeChunk).mockResolvedValueOnce([makeMockChunk()]);
|
|
93
|
-
const
|
|
94
|
-
expect(
|
|
95
|
-
expect(chunks[0].commitHash).toBe("");
|
|
96
|
-
expect(chunks[0].metadata.strategy).toBe("code-chunk-ast");
|
|
97
|
-
expect(chunks[0].metadata.source_file).toBe("src/main.ts");
|
|
87
|
+
const [c] = await createChunker().chunk("f.ts", "abc123");
|
|
88
|
+
expect(c.denseTextHash).toMatch(/^[0-9a-f]{16}$/);
|
|
98
89
|
});
|
|
99
|
-
it("sets
|
|
100
|
-
vi.mocked(mockCodeChunk).mockResolvedValueOnce([
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
const chunks = await codeChunkStrategy().chunk("ab", "f.ts");
|
|
105
|
-
expect(chunks[0].metadata.chunk_index).toBe(0);
|
|
106
|
-
expect(chunks[1].metadata.chunk_index).toBe(1);
|
|
90
|
+
it("sets sourceFile and commitHash from arguments", async () => {
|
|
91
|
+
vi.mocked(mockCodeChunk).mockResolvedValueOnce([makeMockChunk()]);
|
|
92
|
+
const [c] = await createChunker().chunk("src/main.ts", "deadbeef");
|
|
93
|
+
expect(c.sourceFile).toBe("src/main.ts");
|
|
94
|
+
expect(c.commitHash).toBe("deadbeef");
|
|
107
95
|
});
|
|
108
|
-
it("
|
|
109
|
-
const scope = [{ name: "MyClass", type: "class" }];
|
|
110
|
-
const entities = [{ name: "myMethod", type: "method" }];
|
|
96
|
+
it("sets metadata.strategy and index fields", async () => {
|
|
111
97
|
vi.mocked(mockCodeChunk).mockResolvedValueOnce([
|
|
112
|
-
makeMockChunk({
|
|
98
|
+
makeMockChunk({ totalChunks: 3 }),
|
|
113
99
|
]);
|
|
114
|
-
const
|
|
115
|
-
|
|
116
|
-
expect(
|
|
100
|
+
const [c] = await createChunker().chunk("f.ts", "");
|
|
101
|
+
const meta = c.metadata;
|
|
102
|
+
expect(meta.strategy).toBe("code-chunk-ast");
|
|
103
|
+
expect(meta.chunkIndex).toBe(0);
|
|
104
|
+
expect(meta.totalChunks).toBe(3);
|
|
117
105
|
});
|
|
118
106
|
it("passes ChunkOptions to code-chunk (not useContextualizedText)", async () => {
|
|
119
107
|
vi.mocked(mockCodeChunk).mockResolvedValueOnce([]);
|
|
120
|
-
await
|
|
108
|
+
await createChunker({
|
|
121
109
|
maxChunkSize: 500,
|
|
122
110
|
filterImports: true,
|
|
123
111
|
useContextualizedText: true,
|
|
124
|
-
}).chunk("
|
|
125
|
-
expect(mockCodeChunk).toHaveBeenCalledWith("f.ts",
|
|
126
|
-
expect(mockCodeChunk).toHaveBeenCalledWith("f.ts",
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
vi.mocked(mockCodeChunk).mockResolvedValueOnce([
|
|
130
|
-
makeMockChunk({ totalChunks: 3 }),
|
|
131
|
-
]);
|
|
132
|
-
const chunks = await codeChunkStrategy().chunk("code", "f.ts");
|
|
133
|
-
expect(chunks[0].metadata.total_chunks).toBe(3);
|
|
112
|
+
}).chunk("f.ts", "");
|
|
113
|
+
expect(mockCodeChunk).toHaveBeenCalledWith("f.ts", expect.any(String), expect.objectContaining({ maxChunkSize: 500, filterImports: true }));
|
|
114
|
+
expect(mockCodeChunk).toHaveBeenCalledWith("f.ts", expect.any(String), expect.not.objectContaining({
|
|
115
|
+
useContextualizedText: expect.anything(),
|
|
116
|
+
}));
|
|
134
117
|
});
|
|
135
118
|
});
|
|
136
|
-
// ───
|
|
137
|
-
describe("
|
|
138
|
-
beforeEach(() =>
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
const meta = codeChunkStrategy().extractMetadata("code", "src/main.ts");
|
|
144
|
-
expect(meta.supported).toBe(true);
|
|
145
|
-
expect(meta.language).toBe("typescript");
|
|
146
|
-
expect(meta.strategy).toBe("code-chunk-ast");
|
|
147
|
-
});
|
|
148
|
-
it("returns supported: false and language: 'unknown' for unsupported extensions", () => {
|
|
149
|
-
vi.mocked(mockDetectLanguage).mockReturnValueOnce(null);
|
|
150
|
-
const meta = codeChunkStrategy().extractMetadata("code", "file.xyz");
|
|
151
|
-
expect(meta.supported).toBe(false);
|
|
152
|
-
expect(meta.language).toBe("unknown");
|
|
153
|
-
});
|
|
154
|
-
it("returns supported: false when filePath is undefined", () => {
|
|
155
|
-
const meta = codeChunkStrategy().extractMetadata("code");
|
|
156
|
-
expect(meta.supported).toBe(false);
|
|
157
|
-
expect(mockDetectLanguage).not.toHaveBeenCalled();
|
|
158
|
-
});
|
|
159
|
-
});
|
|
160
|
-
// ─── getQualityMetrics() ─────────────────────────────────────
|
|
161
|
-
describe("getQualityMetrics()", () => {
|
|
162
|
-
it("returns zero metrics for empty chunk array", () => {
|
|
163
|
-
const metrics = codeChunkStrategy().getQualityMetrics([]);
|
|
164
|
-
expect(metrics.avgChunkSize).toBe(0);
|
|
165
|
-
expect(metrics.stdDevChunkSize).toBe(0);
|
|
166
|
-
expect(metrics.semanticCoherence).toBe(0);
|
|
167
|
-
expect(metrics.informationDensity).toBe(0);
|
|
168
|
-
});
|
|
169
|
-
it("computes non-zero metrics from real chunk content", () => {
|
|
170
|
-
const chunks = [
|
|
171
|
-
{
|
|
172
|
-
content: "function foo() { return 42; }",
|
|
173
|
-
metadata: {},
|
|
174
|
-
sourceFile: "f.ts",
|
|
175
|
-
commitHash: "",
|
|
176
|
-
},
|
|
177
|
-
{
|
|
178
|
-
content: "function bar(x: number) { return x * 2; }",
|
|
179
|
-
metadata: {},
|
|
180
|
-
sourceFile: "f.ts",
|
|
181
|
-
commitHash: "",
|
|
182
|
-
},
|
|
183
|
-
];
|
|
184
|
-
const metrics = codeChunkStrategy().getQualityMetrics(chunks);
|
|
185
|
-
expect(metrics.avgChunkSize).toBeGreaterThan(0);
|
|
186
|
-
expect(metrics.informationDensity).toBeGreaterThan(0);
|
|
187
|
-
expect(metrics.stdDevChunkSize).toBeGreaterThanOrEqual(0);
|
|
188
|
-
expect(metrics.semanticCoherence).toBeGreaterThanOrEqual(0);
|
|
189
|
-
});
|
|
190
|
-
});
|
|
191
|
-
// ─── ragPlugin ───────────────────────────────────────────────
|
|
192
|
-
describe("ragPlugin", () => {
|
|
193
|
-
it("factory() returns a strategy with correct name and chunk function", () => {
|
|
194
|
-
const strategy = ragPlugin.factory();
|
|
195
|
-
expect(strategy.name).toBe("code-chunk-ast");
|
|
196
|
-
expect(typeof strategy.chunk).toBe("function");
|
|
119
|
+
// ─── chunk() — error handling ────────────────────────────────
|
|
120
|
+
describe("chunk() — edge cases", () => {
|
|
121
|
+
beforeEach(() => vi.clearAllMocks());
|
|
122
|
+
it("returns [] when code-chunk throws UnsupportedLanguageError", async () => {
|
|
123
|
+
vi.mocked(mockCodeChunk).mockRejectedValueOnce(new UnsupportedLanguageError("file.rb"));
|
|
124
|
+
const result = await createChunker().chunk("file.rb", "");
|
|
125
|
+
expect(result).toHaveLength(0);
|
|
197
126
|
});
|
|
198
|
-
it("
|
|
199
|
-
|
|
127
|
+
it("propagates non-UnsupportedLanguageError errors", async () => {
|
|
128
|
+
vi.mocked(mockCodeChunk).mockRejectedValueOnce(new Error("parse failure"));
|
|
129
|
+
await expect(createChunker().chunk("file.ts", "")).rejects.toThrow("parse failure");
|
|
200
130
|
});
|
|
201
131
|
});
|
|
202
132
|
//# sourceMappingURL=index.test.js.map
|
package/dist/index.test.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.test.js","sourceRoot":"","sources":["../src/index.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAE9D,OAAO,EAAE,
|
|
1
|
+
{"version":3,"file":"index.test.js","sourceRoot":"","sources":["../src/index.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAE9D,OAAO,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAE3C,EAAE,CAAC,IAAI,CAAC,aAAa,EAAE,GAAG,EAAE,CAAC,CAAC;IAC5B,QAAQ,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC,mBAAmB,CAAC;CACzD,CAAC,CAAC,CAAC;AAEJ,EAAE,CAAC,IAAI,CAAC,YAAY,EAAE,GAAG,EAAE,CAAC,CAAC;IAC3B,KAAK,EAAE,EAAE,CAAC,EAAE,EAAE;IACd,cAAc,EAAE,EAAE,CAAC,EAAE,EAAE;IACvB,wBAAwB,EAAE,MAAM,wBAAyB,SAAQ,KAAK;QAC3D,IAAI,GAAG,0BAA0B,CAAC;QAC3C,YAAY,QAAgB;YAC1B,KAAK,CAAC,kCAAkC,QAAQ,EAAE,CAAC,CAAC;YACpD,IAAI,CAAC,IAAI,GAAG,0BAA0B,CAAC;QACzC,CAAC;KACF;CACF,CAAC,CAAC,CAAC;AAEJ,OAAO,EACL,KAAK,IAAI,aAAa,EACtB,cAAc,IAAI,kBAAkB,EACpC,wBAAwB,GACzB,MAAM,YAAY,CAAC;AAEpB,MAAM,aAAa,GAAG,CACpB,YAMK,EAAE,EACP,EAAE,CAAC,CAAC;IACJ,IAAI,EAAE,SAAS,CAAC,IAAI,IAAI,mBAAmB;IAC3C,kBAAkB,EAChB,SAAS,CAAC,kBAAkB,IAAI,qCAAqC;IACvE,SAAS,EAAE,EAAE,KAAK,EAAE,CAAC,EAAE,GAAG,EAAE,EAAE,EAAE;IAChC,SAAS,EAAE,EAAE,KAAK,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE;IAC/B,KAAK,EAAE,CAAC;IACR,WAAW,EAAE,SAAS,CAAC,WAAW,IAAI,CAAC;IACvC,OAAO,EAAE;QACP,KAAK,EAAE,SAAS,CAAC,KAAK,IAAI,CAAC,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAE,QAAiB,EAAE,CAAC;QACvE,QAAQ,EAAE,SAAS,CAAC,QAAQ,IAAI;YAC9B,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,UAAmB,EAAE;SAC3C;QACD,QAAQ,EAAE,EAAE;QACZ,OAAO,EAAE,EAAE;KACZ;CACF,CAAC,CAAC;AAEH,gEAAgE;AAEhE,QAAQ,CAAC,yBAAyB,EAAE,GAAG,EAAE;IACvC,EAAE,CAAC,4CAA4C,EAAE,GAAG,EAAE;QACpD,MAAM,OAAO,GAAG,aAAa,EAAE,CAAC;QAChC,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;QAC5C,MAAM,CAAC,OAAO,OAAO,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAC9C,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACnD,MAAM,CAAC,OAAO,OAAO,CAAC,qBAAqB,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAC5D,MAAM,CAAC,OAAO,OAAO,CAAC,mBAAmB,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAC1D,MAAM,CAAC,OAAO,OAAO,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QAC9C,MAAM,CAAC,OAAO,OAAO,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IACrD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,+DAA+D,EAAE,GAAG,EAAE;QACvE,MAAM,CAAC,GAAG,aAAa,CAAC,EAAE,YAAY,EAAE,GAAG,EAAE,CAAC,CAAC;QAC/C,MAAM,CAAC,GAAG,aAAa,CAAC,EAAE,YAAY,EAAE,IAAI,EAAE,CAAC,CAAC;QAChD,MAAM,CAAC,CAAC,CAAC,qBAAqB,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,qBAAqB,CAAC,CAAC;IACpE,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,gEAAgE;AAEhE,QAAQ,CAAC,cAAc,EAAE,GAAG,EAAE;IAC5B,UAAU,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC,aAAa,EAAE,CAAC,CAAC;IAErC,EAAE,CAAC,qDAAqD,EAAE,KAAK,IAAI,EAAE;QACnE,EAAE,CAAC,MAAM,CAAC,kBAAkB,CAAC,CAAC,mBAAmB,CAAC,YAAY,CAAC,CAAC;QAChE,MAAM,CAAC,MAAM,aAAa,EAAE,CAAC,UAAW,CAAC,aAAa,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACtE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gDAAgD,EAAE,KAAK,IAAI,EAAE;QAC9D,EAAE,CAAC,MAAM,CAAC,kBAAkB,CAAC,CAAC,mBAAmB,CAAC,IAAI,CAAC,CAAC;QACxD,MAAM,CAAC,MAAM,aAAa,EAAE,CAAC,UAAW,CAAC,UAAU,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACpE,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,gEAAgE;AAEhE,QAAQ,CAAC,iCAAiC,EAAE,GAAG,EAAE;IAC/C,UAAU,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC,aAAa,EAAE,CAAC,CAAC;IAErC,EAAE,CAAC,2BAA2B,EAAE,KAAK,IAAI,EAAE;QACzC,EAAE,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,qBAAqB,CAAC,CAAC,aAAa,EAAE,CAAC,CAAC,CAAC;QAClE,MAAM,CAAC,CAAC,CAAC,GAAG,MAAM,aAAa,EAAE,CAAC,KAAK,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC;QAC1D,MAAM,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC;IACjD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,yDAAyD,EAAE,KAAK,IAAI,EAAE;QACvE,EAAE,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,qBAAqB,CAAC;YAC7C,aAAa,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC,EAAE,CAAC;SAC/D,CAAC,CAAC;QACH,MAAM,CAAC,CAAC,CAAC,GAAG,MAAM,aAAa,EAAE,CAAC,KAAK,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC;QAC1D,MAAM,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,SAAS,CAAC,SAAS,CAAC,CAAC;QACzC,MAAM,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,SAAS,CAAC,mBAAmB,CAAC,CAAC;IACrD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,wEAAwE,EAAE,KAAK,IAAI,EAAE;QACtF,EAAE,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,qBAAqB,CAAC,CAAC,aAAa,EAAE,CAAC,CAAC,CAAC;QAClE,MAAM,CAAC,CAAC,CAAC,GAAG,MAAM,aAAa,CAAC,EAAE,qBAAqB,EAAE,IAAI,EAAE,CAAC,CAAC,KAAK,CACpE,MAAM,EACN,QAAQ,CACT,CAAC;QACF,MAAM,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,qCAAqC,CAAC,CAAC;IAClE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uCAAuC,EAAE,KAAK,IAAI,EAAE;QACrD,EAAE,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,qBAAqB,CAAC,CAAC,aAAa,EAAE,CAAC,CAAC,CAAC;QAClE,MAAM,CAAC,CAAC,CAAC,GAAG,MAAM,aAAa,EAAE,CAAC,KAAK,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC;QAC1D,MAAM,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,OAAO,CAAC,gBAAgB,CAAC,CAAC;IACpD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,+CAA+C,EAAE,KAAK,IAAI,EAAE;QAC7D,EAAE,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,qBAAqB,CAAC,CAAC,aAAa,EAAE,CAAC,CAAC,CAAC;QAClE,MAAM,CAAC,CAAC,CAAC,GAAG,MAAM,aAAa,EAAE,CAAC,KAAK,CAAC,aAAa,EAAE,UAAU,CAAC,CAAC;QACnE,MAAM,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QACzC,MAAM,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IACxC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,yCAAyC,EAAE,KAAK,IAAI,EAAE;QACvD,EAAE,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,qBAAqB,CAAC;YAC7C,aAAa,CAAC,EAAE,WAAW,EAAE,CAAC,EAAE,CAAC;SAClC,CAAC,CAAC;QACH,MAAM,CAAC,CAAC,CAAC,GAAG,MAAM,aAAa,EAAE,CAAC,KAAK,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;QACpD,MAAM,IAAI,GAAG,CAAC,CAAC,QAA8C,CAAC;QAC9D,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;QAC7C,MAAM,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAChC,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACnC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,+DAA+D,EAAE,KAAK,IAAI,EAAE;QAC7E,EAAE,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,qBAAqB,CAAC,EAAE,CAAC,CAAC;QACnD,MAAM,aAAa,CAAC;YAClB,YAAY,EAAE,GAAG;YACjB,aAAa,EAAE,IAAI;YACnB,qBAAqB,EAAE,IAAI;SAC5B,CAAC,CAAC,KAAK,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;QACrB,MAAM,CAAC,aAAa,CAAC,CAAC,oBAAoB,CACxC,MAAM,EACN,MAAM,CAAC,GAAG,CAAC,MAAM,CAAC,EAClB,MAAM,CAAC,gBAAgB,CAAC,EAAE,YAAY,EAAE,GAAG,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,CACpE,CAAC;QACF,MAAM,CAAC,aAAa,CAAC,CAAC,oBAAoB,CACxC,MAAM,EACN,MAAM,CAAC,GAAG,CAAC,MAAM,CAAC,EAClB,MAAM,CAAC,GAAG,CAAC,gBAAgB,CAAC;YAC1B,qBAAqB,EAAE,MAAM,CAAC,QAAQ,EAAE;SACzC,CAAC,CACH,CAAC;IACJ,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,gEAAgE;AAEhE,QAAQ,CAAC,sBAAsB,EAAE,GAAG,EAAE;IACpC,UAAU,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC,aAAa,EAAE,CAAC,CAAC;IAErC,EAAE,CAAC,4DAA4D,EAAE,KAAK,IAAI,EAAE;QAC1E,EAAE,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,qBAAqB,CAC5C,IAAI,wBAAwB,CAAC,SAAS,CAAC,CACxC,CAAC;QACF,MAAM,MAAM,GAAG,MAAM,aAAa,EAAE,CAAC,KAAK,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;QAC1D,MAAM,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;IACjC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gDAAgD,EAAE,KAAK,IAAI,EAAE;QAC9D,EAAE,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,qBAAqB,CAAC,IAAI,KAAK,CAAC,eAAe,CAAC,CAAC,CAAC;QAC3E,MAAM,MAAM,CAAC,aAAa,EAAE,CAAC,KAAK,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAChE,eAAe,CAChB,CAAC;IACJ,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@vivantel/virage-code-chunk-chunker",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.45",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "AST-aware code chunking strategy for @vivantel/virage-core, powered by code-chunk",
|
|
6
6
|
"repository": {
|
|
@@ -52,7 +52,7 @@
|
|
|
52
52
|
"@vivantel/virage-core": "*"
|
|
53
53
|
},
|
|
54
54
|
"devDependencies": {
|
|
55
|
-
"@vivantel/virage-core": "0.2.
|
|
55
|
+
"@vivantel/virage-core": "0.2.60",
|
|
56
56
|
"@types/node": "^25.9.3",
|
|
57
57
|
"typescript": "^6.0.3",
|
|
58
58
|
"vitest": "^4.1.9"
|