@matperez/coderag 0.1.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +154 -0
- package/dist/.tsbuildinfo +1 -0
- package/dist/ast-chunking.d.ts +40 -0
- package/dist/ast-chunking.d.ts.map +1 -0
- package/dist/ast-chunking.js +88 -0
- package/dist/ast-chunking.js.map +1 -0
- package/dist/ast-chunking.test.d.ts +5 -0
- package/dist/ast-chunking.test.d.ts.map +1 -0
- package/dist/ast-chunking.test.js +173 -0
- package/dist/ast-chunking.test.js.map +1 -0
- package/dist/code-tokenizer.d.ts +62 -0
- package/dist/code-tokenizer.d.ts.map +1 -0
- package/dist/code-tokenizer.js +129 -0
- package/dist/code-tokenizer.js.map +1 -0
- package/dist/code-tokenizer.test.d.ts +5 -0
- package/dist/code-tokenizer.test.d.ts.map +1 -0
- package/dist/code-tokenizer.test.js +96 -0
- package/dist/code-tokenizer.test.js.map +1 -0
- package/dist/db/client-pg.d.ts +16 -0
- package/dist/db/client-pg.d.ts.map +1 -0
- package/dist/db/client-pg.js +38 -0
- package/dist/db/client-pg.js.map +1 -0
- package/dist/db/client.d.ts +36 -0
- package/dist/db/client.d.ts.map +1 -0
- package/dist/db/client.js +81 -0
- package/dist/db/client.js.map +1 -0
- package/dist/db/migrations-pg.d.ts +6 -0
- package/dist/db/migrations-pg.d.ts.map +1 -0
- package/dist/db/migrations-pg.js +88 -0
- package/dist/db/migrations-pg.js.map +1 -0
- package/dist/db/migrations.d.ts +9 -0
- package/dist/db/migrations.d.ts.map +1 -0
- package/dist/db/migrations.js +164 -0
- package/dist/db/migrations.js.map +1 -0
- package/dist/db/schema-pg.d.ts +611 -0
- package/dist/db/schema-pg.d.ts.map +1 -0
- package/dist/db/schema-pg.js +66 -0
- package/dist/db/schema-pg.js.map +1 -0
- package/dist/db/schema.d.ts +630 -0
- package/dist/db/schema.d.ts.map +1 -0
- package/dist/db/schema.js +85 -0
- package/dist/db/schema.js.map +1 -0
- package/dist/embeddings.d.ts +92 -0
- package/dist/embeddings.d.ts.map +1 -0
- package/dist/embeddings.js +275 -0
- package/dist/embeddings.js.map +1 -0
- package/dist/embeddings.test.d.ts +5 -0
- package/dist/embeddings.test.d.ts.map +1 -0
- package/dist/embeddings.test.js +255 -0
- package/dist/embeddings.test.js.map +1 -0
- package/dist/hybrid-search.d.ts +47 -0
- package/dist/hybrid-search.d.ts.map +1 -0
- package/dist/hybrid-search.js +215 -0
- package/dist/hybrid-search.js.map +1 -0
- package/dist/hybrid-search.test.d.ts +5 -0
- package/dist/hybrid-search.test.d.ts.map +1 -0
- package/dist/hybrid-search.test.js +252 -0
- package/dist/hybrid-search.test.js.map +1 -0
- package/dist/incremental-tfidf.d.ts +77 -0
- package/dist/incremental-tfidf.d.ts.map +1 -0
- package/dist/incremental-tfidf.js +248 -0
- package/dist/incremental-tfidf.js.map +1 -0
- package/dist/incremental-tfidf.test.d.ts +5 -0
- package/dist/incremental-tfidf.test.d.ts.map +1 -0
- package/dist/incremental-tfidf.test.js +276 -0
- package/dist/incremental-tfidf.test.js.map +1 -0
- package/dist/index.d.ts +18 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +19 -0
- package/dist/index.js.map +1 -0
- package/dist/indexer.d.ts +205 -0
- package/dist/indexer.d.ts.map +1 -0
- package/dist/indexer.js +1331 -0
- package/dist/indexer.js.map +1 -0
- package/dist/indexer.test.d.ts +12 -0
- package/dist/indexer.test.d.ts.map +1 -0
- package/dist/indexer.test.js +471 -0
- package/dist/indexer.test.js.map +1 -0
- package/dist/language-config.d.ts +54 -0
- package/dist/language-config.d.ts.map +1 -0
- package/dist/language-config.js +75 -0
- package/dist/language-config.js.map +1 -0
- package/dist/search-cache.d.ts +63 -0
- package/dist/search-cache.d.ts.map +1 -0
- package/dist/search-cache.js +118 -0
- package/dist/search-cache.js.map +1 -0
- package/dist/search-cache.test.d.ts +5 -0
- package/dist/search-cache.test.d.ts.map +1 -0
- package/dist/search-cache.test.js +194 -0
- package/dist/search-cache.test.js.map +1 -0
- package/dist/storage-factory.d.ts +11 -0
- package/dist/storage-factory.d.ts.map +1 -0
- package/dist/storage-factory.js +17 -0
- package/dist/storage-factory.js.map +1 -0
- package/dist/storage-persistent-pg.d.ts +75 -0
- package/dist/storage-persistent-pg.d.ts.map +1 -0
- package/dist/storage-persistent-pg.js +579 -0
- package/dist/storage-persistent-pg.js.map +1 -0
- package/dist/storage-persistent-pg.test.d.ts +7 -0
- package/dist/storage-persistent-pg.test.d.ts.map +1 -0
- package/dist/storage-persistent-pg.test.js +90 -0
- package/dist/storage-persistent-pg.test.js.map +1 -0
- package/dist/storage-persistent-types.d.ts +110 -0
- package/dist/storage-persistent-types.d.ts.map +1 -0
- package/dist/storage-persistent-types.js +5 -0
- package/dist/storage-persistent-types.js.map +1 -0
- package/dist/storage-persistent.d.ts +231 -0
- package/dist/storage-persistent.d.ts.map +1 -0
- package/dist/storage-persistent.js +897 -0
- package/dist/storage-persistent.js.map +1 -0
- package/dist/storage-persistent.test.d.ts +5 -0
- package/dist/storage-persistent.test.d.ts.map +1 -0
- package/dist/storage-persistent.test.js +325 -0
- package/dist/storage-persistent.test.js.map +1 -0
- package/dist/storage.d.ts +63 -0
- package/dist/storage.d.ts.map +1 -0
- package/dist/storage.js +67 -0
- package/dist/storage.js.map +1 -0
- package/dist/storage.test.d.ts +5 -0
- package/dist/storage.test.d.ts.map +1 -0
- package/dist/storage.test.js +157 -0
- package/dist/storage.test.js.map +1 -0
- package/dist/tfidf.d.ts +97 -0
- package/dist/tfidf.d.ts.map +1 -0
- package/dist/tfidf.js +308 -0
- package/dist/tfidf.js.map +1 -0
- package/dist/tfidf.test.d.ts +5 -0
- package/dist/tfidf.test.d.ts.map +1 -0
- package/dist/tfidf.test.js +181 -0
- package/dist/tfidf.test.js.map +1 -0
- package/dist/utils.d.ts +61 -0
- package/dist/utils.d.ts.map +1 -0
- package/dist/utils.js +264 -0
- package/dist/utils.js.map +1 -0
- package/dist/utils.test.d.ts +5 -0
- package/dist/utils.test.d.ts.map +1 -0
- package/dist/utils.test.js +94 -0
- package/dist/utils.test.js.map +1 -0
- package/dist/vector-storage.d.ts +120 -0
- package/dist/vector-storage.d.ts.map +1 -0
- package/dist/vector-storage.js +264 -0
- package/dist/vector-storage.js.map +1 -0
- package/dist/vector-storage.test.d.ts +5 -0
- package/dist/vector-storage.test.d.ts.map +1 -0
- package/dist/vector-storage.test.js +345 -0
- package/dist/vector-storage.test.js.map +1 -0
- package/package.json +85 -0
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* AST-Based Code Chunking using code-chunk (tree-sitter)
|
|
3
|
+
*
|
|
4
|
+
* Splits code at semantic boundaries (functions, classes, etc.)
|
|
5
|
+
* Supports TypeScript, JavaScript, Python, Rust, Go, Java.
|
|
6
|
+
* Other file types use character-based fallback.
|
|
7
|
+
*/
|
|
8
|
+
/**
|
|
9
|
+
* AST-based chunking options
|
|
10
|
+
*/
|
|
11
|
+
export interface ASTChunkOptions {
|
|
12
|
+
readonly maxChunkSize?: number;
|
|
13
|
+
readonly minChunkSize?: number;
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* Chunk result with metadata
|
|
17
|
+
*/
|
|
18
|
+
export interface ChunkResult {
|
|
19
|
+
readonly content: string;
|
|
20
|
+
readonly type: string;
|
|
21
|
+
readonly startLine: number;
|
|
22
|
+
readonly endLine: number;
|
|
23
|
+
readonly metadata: Record<string, unknown>;
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Chunk code using AST analysis (via code-chunk / tree-sitter)
|
|
27
|
+
*
|
|
28
|
+
* Supported extensions: .ts, .tsx, .js, .jsx, .mjs, .cjs, .py, .pyi, .rs, .go, .java.
|
|
29
|
+
* Other files are chunked with character-based fallback.
|
|
30
|
+
*/
|
|
31
|
+
export declare function chunkCodeByAST(code: string, filePath: string, options?: ASTChunkOptions): Promise<readonly ChunkResult[]>;
|
|
32
|
+
/**
|
|
33
|
+
* Simple wrapper for backward compatibility
|
|
34
|
+
*/
|
|
35
|
+
export declare function chunkCodeByASTSimple(code: string, filePath: string, options?: ASTChunkOptions): Promise<readonly string[]>;
|
|
36
|
+
/**
|
|
37
|
+
* Get list of languages supported for AST chunking (code-chunk)
|
|
38
|
+
*/
|
|
39
|
+
export declare function getSupportedLanguages(): string[];
|
|
40
|
+
//# sourceMappingURL=ast-chunking.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ast-chunking.d.ts","sourceRoot":"","sources":["../src/ast-chunking.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAUH;;GAEG;AACH,MAAM,WAAW,eAAe;IAC/B,QAAQ,CAAC,YAAY,CAAC,EAAE,MAAM,CAAA;IAC9B,QAAQ,CAAC,YAAY,CAAC,EAAE,MAAM,CAAA;CAC9B;AAED;;GAEG;AACH,MAAM,WAAW,WAAW;IAC3B,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAA;IACxB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAA;IACrB,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAA;IAC1B,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAA;IACxB,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;CAC1C;AA8CD;;;;;GAKG;AACH,wBAAsB,cAAc,CACnC,IAAI,EAAE,MAAM,EACZ,QAAQ,EAAE,MAAM,EAChB,OAAO,GAAE,eAAoB,GAC3B,OAAO,CAAC,SAAS,WAAW,EAAE,CAAC,CAiBjC;AAED;;GAEG;AACH,wBAAsB,oBAAoB,CACzC,IAAI,EAAE,MAAM,EACZ,QAAQ,EAAE,MAAM,EAChB,OAAO,GAAE,eAAoB,GAC3B,OAAO,CAAC,SAAS,MAAM,EAAE,CAAC,CAG5B;AAED;;GAEG;AACH,wBAAgB,qBAAqB,IAAI,MAAM,EAAE,CAEhD"}
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* AST-Based Code Chunking using code-chunk (tree-sitter)
|
|
3
|
+
*
|
|
4
|
+
* Splits code at semantic boundaries (functions, classes, etc.)
|
|
5
|
+
* Supports TypeScript, JavaScript, Python, Rust, Go, Java.
|
|
6
|
+
* Other file types use character-based fallback.
|
|
7
|
+
*/
|
|
8
|
+
import { chunk as codeChunk } from 'code-chunk';
|
|
9
|
+
import { chunkText } from './embeddings.js';
|
|
10
|
+
/** Languages supported by code-chunk for AST chunking */
|
|
11
|
+
const CODE_CHUNK_SUPPORTED_LANGUAGES = [
|
|
12
|
+
'typescript',
|
|
13
|
+
'javascript',
|
|
14
|
+
'python',
|
|
15
|
+
'rust',
|
|
16
|
+
'go',
|
|
17
|
+
'java',
|
|
18
|
+
];
|
|
19
|
+
/**
|
|
20
|
+
* Create fallback chunks using character-based splitting
|
|
21
|
+
*/
|
|
22
|
+
function createFallbackChunks(code, maxChunkSize) {
|
|
23
|
+
const chunks = chunkText(code, { maxChunkSize });
|
|
24
|
+
return chunks.map((content, i) => ({
|
|
25
|
+
content,
|
|
26
|
+
type: 'text',
|
|
27
|
+
startLine: 0,
|
|
28
|
+
endLine: 0,
|
|
29
|
+
metadata: { fallback: true, index: i },
|
|
30
|
+
}));
|
|
31
|
+
}
|
|
32
|
+
function mapCodeChunkToResult(c) {
|
|
33
|
+
const firstEntity = c.context.entities[0];
|
|
34
|
+
const type = firstEntity?.type ?? 'chunk';
|
|
35
|
+
return {
|
|
36
|
+
content: c.text,
|
|
37
|
+
type,
|
|
38
|
+
startLine: c.lineRange.start + 1,
|
|
39
|
+
endLine: c.lineRange.end + 1,
|
|
40
|
+
metadata: {
|
|
41
|
+
fallback: false,
|
|
42
|
+
scope: c.context.scope.map((s) => ({ name: s.name, type: s.type })),
|
|
43
|
+
entities: c.context.entities.map((e) => ({
|
|
44
|
+
name: e.name,
|
|
45
|
+
type: e.type,
|
|
46
|
+
signature: e.signature,
|
|
47
|
+
})),
|
|
48
|
+
},
|
|
49
|
+
};
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* Chunk code using AST analysis (via code-chunk / tree-sitter)
|
|
53
|
+
*
|
|
54
|
+
* Supported extensions: .ts, .tsx, .js, .jsx, .mjs, .cjs, .py, .pyi, .rs, .go, .java.
|
|
55
|
+
* Other files are chunked with character-based fallback.
|
|
56
|
+
*/
|
|
57
|
+
export async function chunkCodeByAST(code, filePath, options = {}) {
|
|
58
|
+
const maxChunkSize = options.maxChunkSize ?? 1000;
|
|
59
|
+
try {
|
|
60
|
+
const chunks = await codeChunk(filePath, code, {
|
|
61
|
+
maxChunkSize,
|
|
62
|
+
contextMode: 'full',
|
|
63
|
+
});
|
|
64
|
+
if (chunks.length === 0 && code.trim().length > 0) {
|
|
65
|
+
return createFallbackChunks(code, maxChunkSize);
|
|
66
|
+
}
|
|
67
|
+
return chunks.map(mapCodeChunkToResult);
|
|
68
|
+
}
|
|
69
|
+
catch (error) {
|
|
70
|
+
// UnsupportedLanguageError, ChunkingError, or any other: fall back to character chunking
|
|
71
|
+
console.error('[WARN] AST chunking failed, falling back to character chunking:', error instanceof Error ? error.message : String(error));
|
|
72
|
+
return createFallbackChunks(code, maxChunkSize);
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* Simple wrapper for backward compatibility
|
|
77
|
+
*/
|
|
78
|
+
export async function chunkCodeByASTSimple(code, filePath, options = {}) {
|
|
79
|
+
const chunks = await chunkCodeByAST(code, filePath, options);
|
|
80
|
+
return chunks.map((chunk) => chunk.content);
|
|
81
|
+
}
|
|
82
|
+
/**
|
|
83
|
+
* Get list of languages supported for AST chunking (code-chunk)
|
|
84
|
+
*/
|
|
85
|
+
export function getSupportedLanguages() {
|
|
86
|
+
return [...CODE_CHUNK_SUPPORTED_LANGUAGES];
|
|
87
|
+
}
|
|
88
|
+
//# sourceMappingURL=ast-chunking.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ast-chunking.js","sourceRoot":"","sources":["../src/ast-chunking.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,EAAE,KAAK,IAAI,SAAS,EAAE,MAAM,YAAY,CAAA;AAE/C,OAAO,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAA;AAyB3C,yDAAyD;AACzD,MAAM,8BAA8B,GAAG;IACtC,YAAY;IACZ,YAAY;IACZ,QAAQ;IACR,MAAM;IACN,IAAI;IACJ,MAAM;CACG,CAAA;AAEV;;GAEG;AACH,SAAS,oBAAoB,CAAC,IAAY,EAAE,YAAoB;IAC/D,MAAM,MAAM,GAAG,SAAS,CAAC,IAAI,EAAE,EAAE,YAAY,EAAE,CAAC,CAAA;IAChD,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC,OAAO,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC;QAClC,OAAO;QACP,IAAI,EAAE,MAAM;QACZ,SAAS,EAAE,CAAC;QACZ,OAAO,EAAE,CAAC;QACV,QAAQ,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC,EAAE;KACtC,CAAC,CAAC,CAAA;AACJ,CAAC;AAED,SAAS,oBAAoB,CAAC,CAAiB;IAC9C,MAAM,WAAW,GAAG,CAAC,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAA;IACzC,MAAM,IAAI,GAAG,WAAW,EAAE,IAAI,IAAI,OAAO,CAAA;IACzC,OAAO;QACN,OAAO,EAAE,CAAC,CAAC,IAAI;QACf,IAAI;QACJ,SAAS,EAAE,CAAC,CAAC,SAAS,CAAC,KAAK,GAAG,CAAC;QAChC,OAAO,EAAE,CAAC,CAAC,SAAS,CAAC,GAAG,GAAG,CAAC;QAC5B,QAAQ,EAAE;YACT,QAAQ,EAAE,KAAK;YACf,KAAK,EAAE,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;YACnE,QAAQ,EAAE,CAAC,CAAC,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBACxC,IAAI,EAAE,CAAC,CAAC,IAAI;gBACZ,IAAI,EAAE,CAAC,CAAC,IAAI;gBACZ,SAAS,EAAE,CAAC,CAAC,SAAS;aACtB,CAAC,CAAC;SACH;KACD,CAAA;AACF,CAAC;AAED;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CACnC,IAAY,EACZ,QAAgB,EAChB,UAA2B,EAAE;IAE7B,MAAM,YAAY,GAAG,OAAO,CAAC,YAAY,IAAI,IAAI,CAAA;IAEjD,IAAI,CAAC;QACJ,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,QAAQ,EAAE,IAAI,EAAE;YAC9C,YAAY;YACZ,WAAW,EAAE,MAAM;SACnB,CAAC,CAAA;QACF,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACnD,OAAO,oBAAoB,CAAC,IAAI,EAAE,YAAY,CAAC,CAAA;QAChD,CAAC;QACD,OAAO,MAAM,CAAC,GAAG,CAAC,oBAAoB,CAAC,CAAA;IACxC,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QAChB,yFAAyF;QACzF,OAAO,CAAC,KAAK,CAAC,iEAAiE,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAA;QACxI,OAAO,oBAAoB,CAAC,IAAI,EAAE,YAAY,CAAC,CAAA;IAChD,CAAC;AACF,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACzC,IAAY,EACZ,QAAgB,EAChB,UAA2B,EAAE;IAE7B,MAAM,MAAM,GAAG,MAAM,cAAc,CAAC,IAAI,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAA;IAC5D,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,OAAO,CAAC,CAAA;AAC5C,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,qBAAqB;IACpC,OAAO,CAAC,GAAG,8BAA8B,CAAC,CAAA;AAC3C,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ast-chunking.test.d.ts","sourceRoot":"","sources":["../src/ast-chunking.test.ts"],"names":[],"mappings":"AAAA;;GAEG"}
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests for AST-based code chunking (code-chunk for supported languages, fallback for others)
|
|
3
|
+
*/
|
|
4
|
+
import { describe, expect, it } from 'vitest';
|
|
5
|
+
import { chunkCodeByAST, chunkCodeByASTSimple, getSupportedLanguages, } from './ast-chunking.js';
|
|
6
|
+
describe('AST-based chunking', () => {
|
|
7
|
+
describe('getSupportedLanguages', () => {
|
|
8
|
+
it('returns code-chunk supported languages', () => {
|
|
9
|
+
const langs = getSupportedLanguages();
|
|
10
|
+
expect(langs).toContain('typescript');
|
|
11
|
+
expect(langs).toContain('javascript');
|
|
12
|
+
expect(langs).toContain('python');
|
|
13
|
+
expect(langs).toContain('rust');
|
|
14
|
+
expect(langs).toContain('go');
|
|
15
|
+
expect(langs).toContain('java');
|
|
16
|
+
expect(langs.length).toBe(6);
|
|
17
|
+
});
|
|
18
|
+
});
|
|
19
|
+
describe('Markdown (fallback)', () => {
|
|
20
|
+
it('uses character fallback for .md (no AST)', async () => {
|
|
21
|
+
const markdown = `# Introduction
|
|
22
|
+
|
|
23
|
+
This is the introduction paragraph.
|
|
24
|
+
|
|
25
|
+
## Section 1
|
|
26
|
+
|
|
27
|
+
Some content here.
|
|
28
|
+
`;
|
|
29
|
+
const chunks = await chunkCodeByAST(markdown, 'test.md');
|
|
30
|
+
expect(chunks.length).toBeGreaterThan(0);
|
|
31
|
+
// Fallback: type 'text', metadata.fallback true
|
|
32
|
+
chunks.forEach((c) => {
|
|
33
|
+
expect(c.type).toBe('text');
|
|
34
|
+
expect(c.metadata.fallback).toBe(true);
|
|
35
|
+
});
|
|
36
|
+
});
|
|
37
|
+
it('fallback preserves content and gives line 0 for fallback chunks', async () => {
|
|
38
|
+
const markdown = `# Title
|
|
39
|
+
|
|
40
|
+
Paragraph 1
|
|
41
|
+
|
|
42
|
+
Paragraph 2
|
|
43
|
+
`;
|
|
44
|
+
const chunks = await chunkCodeByAST(markdown, 'test.md');
|
|
45
|
+
chunks.forEach((chunk) => {
|
|
46
|
+
expect(chunk.content).toBeTruthy();
|
|
47
|
+
expect(chunk.startLine).toBeGreaterThanOrEqual(0);
|
|
48
|
+
expect(chunk.endLine).toBeGreaterThanOrEqual(0);
|
|
49
|
+
});
|
|
50
|
+
expect(chunks.some((c) => c.content.includes('Title'))).toBe(true);
|
|
51
|
+
});
|
|
52
|
+
});
|
|
53
|
+
describe('JavaScript (code-chunk)', () => {
|
|
54
|
+
it('should split JavaScript by functions', async () => {
|
|
55
|
+
const code = `function foo() {
|
|
56
|
+
return 1;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function bar() {
|
|
60
|
+
return 2;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
function baz() {
|
|
64
|
+
return 3;
|
|
65
|
+
}
|
|
66
|
+
`;
|
|
67
|
+
const chunks = await chunkCodeByASTSimple(code, 'test.js');
|
|
68
|
+
expect(chunks.length).toBeGreaterThanOrEqual(1);
|
|
69
|
+
expect(chunks.some((c) => c.includes('function foo'))).toBe(true);
|
|
70
|
+
expect(chunks.some((c) => c.includes('function bar'))).toBe(true);
|
|
71
|
+
expect(chunks.some((c) => c.includes('function baz'))).toBe(true);
|
|
72
|
+
});
|
|
73
|
+
it('should handle classes', async () => {
|
|
74
|
+
const code = `class MyClass {
|
|
75
|
+
constructor() {
|
|
76
|
+
this.value = 0;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
increment() {
|
|
80
|
+
this.value++;
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
`;
|
|
84
|
+
const chunks = await chunkCodeByASTSimple(code, 'test.js');
|
|
85
|
+
expect(chunks.length).toBeGreaterThan(0);
|
|
86
|
+
expect(chunks.some((c) => c.includes('class MyClass'))).toBe(true);
|
|
87
|
+
});
|
|
88
|
+
it('should have meaningful line ranges for JS chunks', async () => {
|
|
89
|
+
const code = `function first() {
|
|
90
|
+
return 1;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
function second() {
|
|
94
|
+
return 2;
|
|
95
|
+
}
|
|
96
|
+
`;
|
|
97
|
+
const chunks = await chunkCodeByAST(code, 'test.js');
|
|
98
|
+
expect(chunks.length).toBeGreaterThan(0);
|
|
99
|
+
chunks.forEach((chunk) => {
|
|
100
|
+
expect(chunk.startLine).toBeGreaterThanOrEqual(1);
|
|
101
|
+
expect(chunk.endLine).toBeGreaterThanOrEqual(chunk.startLine);
|
|
102
|
+
expect(chunk.content).toBeTruthy();
|
|
103
|
+
expect(chunk.metadata.fallback).toBe(false);
|
|
104
|
+
});
|
|
105
|
+
});
|
|
106
|
+
});
|
|
107
|
+
describe('Size constraints', () => {
|
|
108
|
+
it('should respect maxChunkSize', async () => {
|
|
109
|
+
const largeCode = `function veryLargeFunction() {
|
|
110
|
+
${'return 1;\n'.repeat(100)}
|
|
111
|
+
}
|
|
112
|
+
`;
|
|
113
|
+
const chunks = await chunkCodeByASTSimple(largeCode, 'test.js', {
|
|
114
|
+
maxChunkSize: 500,
|
|
115
|
+
});
|
|
116
|
+
// code-chunk may exceed slightly; ensure we got multiple chunks and none is huge
|
|
117
|
+
expect(chunks.length).toBeGreaterThan(1);
|
|
118
|
+
chunks.forEach((chunk) => {
|
|
119
|
+
expect(chunk.length).toBeLessThanOrEqual(800);
|
|
120
|
+
});
|
|
121
|
+
});
|
|
122
|
+
});
|
|
123
|
+
describe('Fallback behavior', () => {
|
|
124
|
+
it('should fallback to character chunking for unknown languages', async () => {
|
|
125
|
+
const code = 'a'.repeat(2000);
|
|
126
|
+
const chunks = await chunkCodeByAST(code, 'test.unknown');
|
|
127
|
+
expect(chunks.length).toBeGreaterThan(1);
|
|
128
|
+
expect(chunks[0].metadata.fallback).toBe(true);
|
|
129
|
+
});
|
|
130
|
+
it('should return chunks when AST parsing fails (fallback or resilient parse)', async () => {
|
|
131
|
+
const invalidCode = 'function { syntax error }';
|
|
132
|
+
const chunks = await chunkCodeByAST(invalidCode, 'test.js');
|
|
133
|
+
// code-chunk may either throw (then we fallback) or return partial chunks
|
|
134
|
+
expect(chunks.length).toBeGreaterThan(0);
|
|
135
|
+
expect(chunks[0].content).toBeTruthy();
|
|
136
|
+
});
|
|
137
|
+
it('should handle empty input', async () => {
|
|
138
|
+
const chunks = await chunkCodeByAST('', 'test.js');
|
|
139
|
+
expect(chunks.length).toBe(0);
|
|
140
|
+
});
|
|
141
|
+
});
|
|
142
|
+
describe('Edge cases', () => {
|
|
143
|
+
it('should use fallback for HTML (unsupported by code-chunk)', async () => {
|
|
144
|
+
const html = `<div>
|
|
145
|
+
<p>Paragraph 1</p>
|
|
146
|
+
</div>
|
|
147
|
+
`;
|
|
148
|
+
const chunks = await chunkCodeByAST(html, 'test.html');
|
|
149
|
+
expect(chunks.length).toBeGreaterThan(0);
|
|
150
|
+
chunks.forEach((c) => expect(c.metadata.fallback).toBe(true));
|
|
151
|
+
});
|
|
152
|
+
it('should handle single-line content', async () => {
|
|
153
|
+
const code = 'const x = 42;';
|
|
154
|
+
const chunks = await chunkCodeByAST(code, 'test.js');
|
|
155
|
+
expect(chunks.length).toBeGreaterThan(0);
|
|
156
|
+
expect(chunks[0].content).toBeTruthy();
|
|
157
|
+
});
|
|
158
|
+
});
|
|
159
|
+
describe('Performance', () => {
|
|
160
|
+
it('should handle large files efficiently', async () => {
|
|
161
|
+
const sections = Array.from({ length: 100 }, (_, i) => {
|
|
162
|
+
return `## Section ${i + 1}\n\nContent for section ${i + 1}.\n`;
|
|
163
|
+
}).join('\n');
|
|
164
|
+
const markdown = `# Large Document\n\n${sections}`;
|
|
165
|
+
const start = Date.now();
|
|
166
|
+
const chunks = await chunkCodeByAST(markdown, 'test.md');
|
|
167
|
+
const duration = Date.now() - start;
|
|
168
|
+
expect(chunks.length).toBeGreaterThan(1);
|
|
169
|
+
expect(duration).toBeLessThan(5000);
|
|
170
|
+
});
|
|
171
|
+
});
|
|
172
|
+
});
|
|
173
|
+
//# sourceMappingURL=ast-chunking.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ast-chunking.test.js","sourceRoot":"","sources":["../src/ast-chunking.test.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,QAAQ,CAAA;AAC7C,OAAO,EACN,cAAc,EACd,oBAAoB,EACpB,qBAAqB,GACrB,MAAM,mBAAmB,CAAA;AAE1B,QAAQ,CAAC,oBAAoB,EAAE,GAAG,EAAE;IACnC,QAAQ,CAAC,uBAAuB,EAAE,GAAG,EAAE;QACtC,EAAE,CAAC,wCAAwC,EAAE,GAAG,EAAE;YACjD,MAAM,KAAK,GAAG,qBAAqB,EAAE,CAAA;YACrC,MAAM,CAAC,KAAK,CAAC,CAAC,SAAS,CAAC,YAAY,CAAC,CAAA;YACrC,MAAM,CAAC,KAAK,CAAC,CAAC,SAAS,CAAC,YAAY,CAAC,CAAA;YACrC,MAAM,CAAC,KAAK,CAAC,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAA;YACjC,MAAM,CAAC,KAAK,CAAC,CAAC,SAAS,CAAC,MAAM,CAAC,CAAA;YAC/B,MAAM,CAAC,KAAK,CAAC,CAAC,SAAS,CAAC,IAAI,CAAC,CAAA;YAC7B,MAAM,CAAC,KAAK,CAAC,CAAC,SAAS,CAAC,MAAM,CAAC,CAAA;YAC/B,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QAC7B,CAAC,CAAC,CAAA;IACH,CAAC,CAAC,CAAA;IAEF,QAAQ,CAAC,qBAAqB,EAAE,GAAG,EAAE;QACpC,EAAE,CAAC,0CAA0C,EAAE,KAAK,IAAI,EAAE;YACzD,MAAM,QAAQ,GAAG;;;;;;;CAOnB,CAAA;YAEE,MAAM,MAAM,GAAG,MAAM,cAAc,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAA;YAExD,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAA;YACxC,gDAAgD;YAChD,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE;gBACpB,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;gBAC3B,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;YACvC,CAAC,CAAC,CAAA;QACH,CAAC,CAAC,CAAA;QAEF,EAAE,CAAC,iEAAiE,EAAE,KAAK,IAAI,EAAE;YAChF,MAAM,QAAQ,GAAG;;;;;CAKnB,CAAA;YAEE,MAAM,MAAM,GAAG,MAAM,cAAc,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAA;YAExD,MAAM,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,EAAE;gBACxB,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,UAAU,EAAE,CAAA;gBAClC,MAAM,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAA;gBACjD,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAA;YAChD,CAAC,CAAC,CAAA;YACF,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QACnE,CAAC,CAAC,CAAA;IACH,CAAC,CAAC,CAAA;IAEF,QAAQ,CAAC,yBAAyB,EAAE,GAAG,EAAE;QACxC,EAAE,CAAC,sCAAsC,EAAE,KAAK,IAAI,EAAE;YACrD,MAAM,IAAI,GAAG;;;;;;;;;;;CAWf,CAAA;YAEE,MAAM,MAAM,GAAG,MAAM,oBAAoB,CAAC,IAAI,EAAE,SAAS,CAAC,CAAA;YAE1D,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAA;YAC/C,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;YACjE,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;YACjE,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QAClE,CAAC,CAAC,CAAA;QAEF,EAAE,CAAC,uBAAuB,EAAE,KAAK,IAAI,EAAE;YACtC,MAAM,IAAI,GAAG;;;;;;;;;CASf,CAAA;YAEE,MAAM,MAAM,GAAG,MAAM,oBAAoB,CAAC,IAAI,EAAE,SAAS,CAAC,CAAA;YAE1D,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAA;YACxC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QACnE,CAAC,CAAC,CAAA;QAEF,EAAE,CAAC,kDAAkD,EAAE,KAAK,IAAI,EAAE;YACjE,MAAM,IAAI,GAAG;;;;;;;CAOf,CAAA;YAEE,MAAM,MAAM,GAAG,MAAM,cAAc,CAAC,IAAI,EAAE,SAAS,CAAC,CAAA;YAEpD,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAA;YACxC,MAAM,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,EAAE;gBACxB,MAAM,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAA;gBACjD,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,sBAAsB,CAAC,KAAK,CAAC,SAAS,CAAC,CAAA;gBAC7D,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,UAAU,EAAE,CAAA;gBAClC,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;YAC5C,CAAC,CAAC,CAAA;QACH,CAAC,CAAC,CAAA;IACH,CAAC,CAAC,CAAA;IAEF,QAAQ,CAAC,kBAAkB,EAAE,GAAG,EAAE;QACjC,EAAE,CAAC,6BAA6B,EAAE,KAAK,IAAI,EAAE;YAC5C,MAAM,SAAS,GAAG;IACjB,aAAa,CAAC,MAAM,CAAC,GAAG,CAAC;;CAE5B,CAAA;YAEE,MAAM,MAAM,GAAG,MAAM,oBAAoB,CAAC,SAAS,EAAE,SAAS,EAAE;gBAC/D,YAAY,EAAE,GAAG;aACjB,CAAC,CAAA;YAEF,iFAAiF;YACjF,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAA;YACxC,MAAM,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,EAAE;gBACxB,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,mBAAmB,CAAC,GAAG,CAAC,CAAA;YAC9C,CAAC,CAAC,CAAA;QACH,CAAC,CAAC,CAAA;IACH,CAAC,CAAC,CAAA;IAEF,QAAQ,CAAC,mBAAmB,EAAE,GAAG,EAAE;QAClC,EAAE,CAAC,6DAA6D,EAAE,KAAK,IAAI,EAAE;YAC5E,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,CAAA;YAE7B,MAAM,MAAM,GAAG,MAAM,cAAc,CAAC,IAAI,EAAE,cAAc,CAAC,CAAA;YAEzD,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAA;YACxC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QAC/C,CAAC,CAAC,CAAA;QAEF,EAAE,CAAC,2EAA2E,EAAE,KAAK,IAAI,EAAE;YAC1F,MAAM,WAAW,GAAG,2BAA2B,CAAA;YAE/C,MAAM,MAAM,GAAG,MAAM,cAAc,CAAC,WAAW,EAAE,SAAS,CAAC,CAAA;YAE3D,0EAA0E;YAC1E,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAA;YACxC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,UAAU,EAAE,CAAA;QACvC,CAAC,CAAC,CAAA;QAEF,EAAE,CAAC,2BAA2B,EAAE,KAAK,IAAI,EAAE;YAC1C,MAAM,MAAM,GAAG,MAAM,cAAc,CAAC,EAAE,EAAE,SAAS,CAAC,CAAA;YAElD,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QAC9B,CAAC,CAAC,CAAA;IACH,CAAC,CAAC,CAAA;IAEF,QAAQ,CAAC,YAAY,EAAE,GAAG,EAAE;QAC3B,EAAE,CAAC,0DAA0D,EAAE,KAAK,IAAI,EAAE;YACzE,MAAM,IAAI,GAAG;;;CAGf,CAAA;YAEE,MAAM,MAAM,GAAG,MAAM,cAAc,CAAC,IAAI,EAAE,WAAW,CAAC,CAAA;YAEtD,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAA;YACxC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAA;QAC9D,CAAC,CAAC,CAAA;QAEF,EAAE,CAAC,mCAAmC,EAAE,KAAK,IAAI,EAAE;YAClD,MAAM,IAAI,GAAG,eAAe,CAAA;YAE5B,MAAM,MAAM,GAAG,MAAM,cAAc,CAAC,IAAI,EAAE,SAAS,CAAC,CAAA;YAEpD,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAA;YACxC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,UAAU,EAAE,CAAA;QACvC,CAAC,CAAC,CAAA;IACH,CAAC,CAAC,CAAA;IAEF,QAAQ,CAAC,aAAa,EAAE,GAAG,EAAE;QAC5B,EAAE,CAAC,uCAAuC,EAAE,KAAK,IAAI,EAAE;YACtD,MAAM,QAAQ,GAAG,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,GAAG,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;gBACrD,OAAO,cAAc,CAAC,GAAG,CAAC,2BAA2B,CAAC,GAAG,CAAC,KAAK,CAAA;YAChE,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;YAEb,MAAM,QAAQ,GAAG,uBAAuB,QAAQ,EAAE,CAAA;YAElD,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAA;YACxB,MAAM,MAAM,GAAG,MAAM,cAAc,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAA;YACxD,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,CAAA;YAEnC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAA;YACxC,MAAM,CAAC,QAAQ,CAAC,CAAC,YAAY,CAAC,IAAI,CAAC,CAAA;QACpC,CAAC,CAAC,CAAA;IACH,CAAC,CAAC,CAAA;AACH,CAAC,CAAC,CAAA"}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Code-Aware Tokenizer using StarCoder2
|
|
3
|
+
*
|
|
4
|
+
* StarCoder2 tokenizer is lightweight (only 4.7MB) and provides
|
|
5
|
+
* world-class code tokenization quality without requiring the full model.
|
|
6
|
+
*/
|
|
7
|
+
export interface CodeToken {
|
|
8
|
+
readonly text: string;
|
|
9
|
+
readonly id: number;
|
|
10
|
+
}
|
|
11
|
+
export interface TokenizerOptions {
|
|
12
|
+
readonly modelPath?: string;
|
|
13
|
+
readonly cacheDir?: string;
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* StarCoder2 Code Tokenizer
|
|
17
|
+
*
|
|
18
|
+
* Uses StarCoder2's tokenizer (4.7MB) for accurate code tokenization.
|
|
19
|
+
* Does NOT require downloading the full 15B parameter model.
|
|
20
|
+
*/
|
|
21
|
+
export declare class CodeTokenizer {
|
|
22
|
+
private tokenizer;
|
|
23
|
+
private initialized;
|
|
24
|
+
private initPromise;
|
|
25
|
+
private modelPath;
|
|
26
|
+
constructor(options?: TokenizerOptions);
|
|
27
|
+
/**
|
|
28
|
+
* Initialize tokenizer (downloads ~4.7MB on first use)
|
|
29
|
+
*/
|
|
30
|
+
initialize(): Promise<void>;
|
|
31
|
+
private doInitialize;
|
|
32
|
+
/**
|
|
33
|
+
* Tokenize code into terms for TF-IDF indexing
|
|
34
|
+
*/
|
|
35
|
+
tokenize(code: string): Promise<string[]>;
|
|
36
|
+
/**
|
|
37
|
+
* Extract unique terms with frequency counts
|
|
38
|
+
*/
|
|
39
|
+
extractTerms(code: string): Promise<Map<string, number>>;
|
|
40
|
+
/**
|
|
41
|
+
* Check if tokenizer is ready
|
|
42
|
+
*/
|
|
43
|
+
isReady(): boolean;
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Get or create the global tokenizer instance
|
|
47
|
+
*/
|
|
48
|
+
export declare function getTokenizer(): CodeTokenizer;
|
|
49
|
+
/**
|
|
50
|
+
* Tokenize code using StarCoder2 (async)
|
|
51
|
+
* This is the main entry point for tokenization
|
|
52
|
+
*/
|
|
53
|
+
export declare function tokenize(code: string): Promise<string[]>;
|
|
54
|
+
/**
|
|
55
|
+
* Extract terms with frequency counts using StarCoder2 (async)
|
|
56
|
+
*/
|
|
57
|
+
export declare function extractTerms(code: string): Promise<Map<string, number>>;
|
|
58
|
+
/**
|
|
59
|
+
* Initialize the global tokenizer (call early to avoid delay on first tokenize)
|
|
60
|
+
*/
|
|
61
|
+
export declare function initializeTokenizer(): Promise<void>;
|
|
62
|
+
//# sourceMappingURL=code-tokenizer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"code-tokenizer.d.ts","sourceRoot":"","sources":["../src/code-tokenizer.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAIH,MAAM,WAAW,SAAS;IACzB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAA;IACrB,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAA;CACnB;AAED,MAAM,WAAW,gBAAgB;IAChC,QAAQ,CAAC,SAAS,CAAC,EAAE,MAAM,CAAA;IAC3B,QAAQ,CAAC,QAAQ,CAAC,EAAE,MAAM,CAAA;CAC1B;AAED;;;;;GAKG;AACH,qBAAa,aAAa;IACzB,OAAO,CAAC,SAAS,CAAK;IACtB,OAAO,CAAC,WAAW,CAAQ;IAC3B,OAAO,CAAC,WAAW,CAA6B;IAChD,OAAO,CAAC,SAAS,CAAQ;gBAEb,OAAO,GAAE,gBAAqB;IAK1C;;OAEG;IACG,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;YAcnB,YAAY;IAiB1B;;OAEG;IACG,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IA8B/C;;OAEG;IACG,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAW9D;;OAEG;IACH,OAAO,IAAI,OAAO;CAGlB;AAKD;;GAEG;AACH,wBAAgB,YAAY,IAAI,aAAa,CAK5C;AAED;;;GAGG;AACH,wBAAsB,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAG9D;AAED;;GAEG;AACH,wBAAsB,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAG7E;AAED;;GAEG;AACH,wBAAsB,mBAAmB,IAAI,OAAO,CAAC,IAAI,CAAC,CAGzD"}
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Code-Aware Tokenizer using StarCoder2
|
|
3
|
+
*
|
|
4
|
+
* StarCoder2 tokenizer is lightweight (only 4.7MB) and provides
|
|
5
|
+
* world-class code tokenization quality without requiring the full model.
|
|
6
|
+
*/
|
|
7
|
+
import { AutoTokenizer } from '@huggingface/transformers';
|
|
8
|
+
/**
|
|
9
|
+
* StarCoder2 Code Tokenizer
|
|
10
|
+
*
|
|
11
|
+
* Uses StarCoder2's tokenizer (4.7MB) for accurate code tokenization.
|
|
12
|
+
* Does NOT require downloading the full 15B parameter model.
|
|
13
|
+
*/
|
|
14
|
+
export class CodeTokenizer {
|
|
15
|
+
tokenizer;
|
|
16
|
+
initialized = false;
|
|
17
|
+
initPromise = null;
|
|
18
|
+
modelPath;
|
|
19
|
+
constructor(options = {}) {
|
|
20
|
+
// Default to StarCoder2 tokenizer (only downloads tokenizer files, not model)
|
|
21
|
+
this.modelPath = options.modelPath || 'bigcode/starcoder2-15b';
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Initialize tokenizer (downloads ~4.7MB on first use)
|
|
25
|
+
*/
|
|
26
|
+
async initialize() {
|
|
27
|
+
if (this.initialized) {
|
|
28
|
+
return;
|
|
29
|
+
}
|
|
30
|
+
// Prevent multiple concurrent initializations
|
|
31
|
+
if (this.initPromise) {
|
|
32
|
+
return this.initPromise;
|
|
33
|
+
}
|
|
34
|
+
this.initPromise = this.doInitialize();
|
|
35
|
+
return this.initPromise;
|
|
36
|
+
}
|
|
37
|
+
async doInitialize() {
|
|
38
|
+
try {
|
|
39
|
+
console.error('[INFO] Loading StarCoder2 tokenizer (4.7MB, one-time download)...');
|
|
40
|
+
const startTime = Date.now();
|
|
41
|
+
this.tokenizer = await AutoTokenizer.from_pretrained(this.modelPath);
|
|
42
|
+
const loadTime = Date.now() - startTime;
|
|
43
|
+
console.error(`[SUCCESS] Tokenizer loaded in ${loadTime}ms`);
|
|
44
|
+
this.initialized = true;
|
|
45
|
+
}
|
|
46
|
+
catch (error) {
|
|
47
|
+
this.initPromise = null;
|
|
48
|
+
throw new Error(`Failed to load tokenizer: ${error.message}`);
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* Tokenize code into terms for TF-IDF indexing
|
|
53
|
+
*/
|
|
54
|
+
async tokenize(code) {
|
|
55
|
+
if (!this.initialized) {
|
|
56
|
+
await this.initialize();
|
|
57
|
+
}
|
|
58
|
+
if (!code || code.trim().length === 0) {
|
|
59
|
+
return [];
|
|
60
|
+
}
|
|
61
|
+
// Encode with StarCoder2
|
|
62
|
+
const encoded = await this.tokenizer(code);
|
|
63
|
+
const inputIds = encoded.input_ids.tolist()[0];
|
|
64
|
+
// Decode each token ID to get the actual tokens
|
|
65
|
+
const tokens = [];
|
|
66
|
+
for (const id of inputIds) {
|
|
67
|
+
const token = await this.tokenizer.decode([id], {
|
|
68
|
+
skip_special_tokens: true,
|
|
69
|
+
});
|
|
70
|
+
const cleaned = token.trim().toLowerCase();
|
|
71
|
+
// Filter: keep tokens with length > 1 (skip single chars and empty)
|
|
72
|
+
if (cleaned.length > 1) {
|
|
73
|
+
tokens.push(cleaned);
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
return tokens;
|
|
77
|
+
}
|
|
78
|
+
/**
|
|
79
|
+
* Extract unique terms with frequency counts
|
|
80
|
+
*/
|
|
81
|
+
async extractTerms(code) {
|
|
82
|
+
const tokens = await this.tokenize(code);
|
|
83
|
+
const termFreq = new Map();
|
|
84
|
+
for (const token of tokens) {
|
|
85
|
+
termFreq.set(token, (termFreq.get(token) || 0) + 1);
|
|
86
|
+
}
|
|
87
|
+
return termFreq;
|
|
88
|
+
}
|
|
89
|
+
/**
|
|
90
|
+
* Check if tokenizer is ready
|
|
91
|
+
*/
|
|
92
|
+
isReady() {
|
|
93
|
+
return this.initialized;
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
// Singleton instance for global use
|
|
97
|
+
let globalTokenizer = null;
|
|
98
|
+
/**
|
|
99
|
+
* Get or create the global tokenizer instance
|
|
100
|
+
*/
|
|
101
|
+
export function getTokenizer() {
|
|
102
|
+
if (!globalTokenizer) {
|
|
103
|
+
globalTokenizer = new CodeTokenizer();
|
|
104
|
+
}
|
|
105
|
+
return globalTokenizer;
|
|
106
|
+
}
|
|
107
|
+
/**
|
|
108
|
+
* Tokenize code using StarCoder2 (async)
|
|
109
|
+
* This is the main entry point for tokenization
|
|
110
|
+
*/
|
|
111
|
+
export async function tokenize(code) {
|
|
112
|
+
const tokenizer = getTokenizer();
|
|
113
|
+
return tokenizer.tokenize(code);
|
|
114
|
+
}
|
|
115
|
+
/**
|
|
116
|
+
* Extract terms with frequency counts using StarCoder2 (async)
|
|
117
|
+
*/
|
|
118
|
+
export async function extractTerms(code) {
|
|
119
|
+
const tokenizer = getTokenizer();
|
|
120
|
+
return tokenizer.extractTerms(code);
|
|
121
|
+
}
|
|
122
|
+
/**
|
|
123
|
+
* Initialize the global tokenizer (call early to avoid delay on first tokenize)
|
|
124
|
+
*/
|
|
125
|
+
export async function initializeTokenizer() {
|
|
126
|
+
const tokenizer = getTokenizer();
|
|
127
|
+
await tokenizer.initialize();
|
|
128
|
+
}
|
|
129
|
+
//# sourceMappingURL=code-tokenizer.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"code-tokenizer.js","sourceRoot":"","sources":["../src/code-tokenizer.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,aAAa,EAAE,MAAM,2BAA2B,CAAA;AAYzD;;;;;GAKG;AACH,MAAM,OAAO,aAAa;IACjB,SAAS,CAAK;IACd,WAAW,GAAG,KAAK,CAAA;IACnB,WAAW,GAAyB,IAAI,CAAA;IACxC,SAAS,CAAQ;IAEzB,YAAY,UAA4B,EAAE;QACzC,8EAA8E;QAC9E,IAAI,CAAC,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,wBAAwB,CAAA;IAC/D,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,UAAU;QACf,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;YACtB,OAAM;QACP,CAAC;QAED,8CAA8C;QAC9C,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;YACtB,OAAO,IAAI,CAAC,WAAW,CAAA;QACxB,CAAC;QAED,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC,YAAY,EAAE,CAAA;QACtC,OAAO,IAAI,CAAC,WAAW,CAAA;IACxB,CAAC;IAEO,KAAK,CAAC,YAAY;QACzB,IAAI,CAAC;YACJ,OAAO,CAAC,KAAK,CAAC,mEAAmE,CAAC,CAAA;YAClF,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAA;YAE5B,IAAI,CAAC,SAAS,GAAG,MAAM,aAAa,CAAC,eAAe,CAAC,IAAI,CAAC,SAAS,CAAC,CAAA;YAEpE,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAA;YACvC,OAAO,CAAC,KAAK,CAAC,iCAAiC,QAAQ,IAAI,CAAC,CAAA;YAE5D,IAAI,CAAC,WAAW,GAAG,IAAI,CAAA;QACxB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YAChB,IAAI,CAAC,WAAW,GAAG,IAAI,CAAA;YACvB,MAAM,IAAI,KAAK,CAAC,6BAA6B,KAAK,CAAC,OAAO,EAAE,CAAC,CAAA;QAC9D,CAAC;IACF,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,QAAQ,CAAC,IAAY;QAC1B,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC;YACvB,MAAM,IAAI,CAAC,UAAU,EAAE,CAAA;QACxB,CAAC;QAED,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACvC,OAAO,EAAE,CAAA;QACV,CAAC;QAED,yBAAyB;QACzB,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAA;QAC1C,MAAM,QAAQ,GAAG,OAAO,CAAC,SAAS,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,CAAA;QAE9C,gDAAgD;QAChD,MAAM,MAAM,GAAa,EAAE,CAAA;QAC3B,KAAK,MAAM,EAAE,IAAI,QAAQ,EAAE,CAAC;YAC3B,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,EAAE;gBAC/C,mBAAmB,EAAE,IAAI;aACzB,CAAC,CAAA;YAEF,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAA;YAC1C,oEAAoE;YACpE,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACxB,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;YACrB,CAAC;QACF,CAAC;QAED,OAAO,MAAM,CAAA;IACd,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,YAAY,CAAC,IAAY;QAC9B,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAA;QACxC,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAkB,CAAA;QAE1C,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC5B,QAAQ,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,QAAQ,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAA;QACpD,CAAC;QAED,OAAO,QAAQ,CAAA;IAChB,CAAC;IAED;;OAEG;IACH,OAAO;QACN,OAAO,IAAI,CAAC,WAAW,CAAA;IACxB,CAAC;CACD;AAED,oCAAoC;AACpC,IAAI,eAAe,GAAyB,IAAI,CAAA;AAEhD;;GAEG;AACH,MAAM,UAAU,YAAY;IAC3B,IAAI,CAAC,eAAe,EAAE,CAAC;QACtB,eAAe,GAAG,IAAI,aAAa,EAAE,CAAA;IACtC,CAAC;IACD,OAAO,eAAe,CAAA;AACvB,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,QAAQ,CAAC,IAAY;IAC1C,MAAM,SAAS,GAAG,YAAY,EAAE,CAAA;IAChC,OAAO,SAAS,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAA;AAChC,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,YAAY,CAAC,IAAY;IAC9C,MAAM,SAAS,GAAG,YAAY,EAAE,CAAA;IAChC,OAAO,SAAS,CAAC,YAAY,CAAC,IAAI,CAAC,CAAA;AACpC,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,mBAAmB;IACxC,MAAM,SAAS,GAAG,YAAY,EAAE,CAAA;IAChC,MAAM,SAAS,CAAC,UAAU,EAAE,CAAA;AAC7B,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"code-tokenizer.test.d.ts","sourceRoot":"","sources":["../src/code-tokenizer.test.ts"],"names":[],"mappings":"AAAA;;GAEG"}
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Code Tokenizer Tests (StarCoder2)
|
|
3
|
+
*/
|
|
4
|
+
import { beforeAll, describe, expect, it } from 'vitest';
|
|
5
|
+
import { CodeTokenizer, extractTerms, initializeTokenizer, tokenize } from './code-tokenizer.js';
|
|
6
|
+
// Skip if running in CI without model cache
|
|
7
|
+
const shouldSkip = process.env.CI === 'true' && !process.env.HF_HOME;
|
|
8
|
+
describe('StarCoder2 Tokenizer', () => {
|
|
9
|
+
beforeAll(async () => {
|
|
10
|
+
if (shouldSkip)
|
|
11
|
+
return;
|
|
12
|
+
// Initialize tokenizer once for all tests
|
|
13
|
+
await initializeTokenizer();
|
|
14
|
+
}, 60000); // 60s timeout for download
|
|
15
|
+
describe('tokenize function', () => {
|
|
16
|
+
it.skipIf(shouldSkip)('should tokenize simple code', async () => {
|
|
17
|
+
const tokens = await tokenize('getUserData(userId)');
|
|
18
|
+
expect(tokens.length).toBeGreaterThan(0);
|
|
19
|
+
// StarCoder2 should understand code structure
|
|
20
|
+
expect(tokens.some((t) => t.includes('get') || t.includes('user'))).toBe(true);
|
|
21
|
+
});
|
|
22
|
+
it.skipIf(shouldSkip)('should tokenize complex code', async () => {
|
|
23
|
+
const code = `
|
|
24
|
+
async function authenticateUser(username: string, password: string) {
|
|
25
|
+
const user = await findUserByUsername(username);
|
|
26
|
+
if (!user) throw new Error('User not found');
|
|
27
|
+
return await verifyPassword(user, password);
|
|
28
|
+
}
|
|
29
|
+
`;
|
|
30
|
+
const tokens = await tokenize(code);
|
|
31
|
+
expect(tokens.length).toBeGreaterThan(10);
|
|
32
|
+
// Should extract meaningful tokens
|
|
33
|
+
expect(tokens.some((t) => t.includes('authenticate'))).toBe(true);
|
|
34
|
+
expect(tokens.some((t) => t.includes('user'))).toBe(true);
|
|
35
|
+
expect(tokens.some((t) => t.includes('password'))).toBe(true);
|
|
36
|
+
});
|
|
37
|
+
it.skipIf(shouldSkip)('should handle empty input', async () => {
|
|
38
|
+
const tokens = await tokenize('');
|
|
39
|
+
expect(tokens).toHaveLength(0);
|
|
40
|
+
});
|
|
41
|
+
it.skipIf(shouldSkip)('should handle whitespace', async () => {
|
|
42
|
+
const tokens = await tokenize(' \n\t ');
|
|
43
|
+
expect(tokens).toHaveLength(0);
|
|
44
|
+
});
|
|
45
|
+
it.skipIf(shouldSkip)('should handle camelCase', async () => {
|
|
46
|
+
const tokens = await tokenize('getUserData');
|
|
47
|
+
expect(tokens.length).toBeGreaterThan(0);
|
|
48
|
+
});
|
|
49
|
+
it.skipIf(shouldSkip)('should handle snake_case', async () => {
|
|
50
|
+
const tokens = await tokenize('is_authenticated');
|
|
51
|
+
expect(tokens.length).toBeGreaterThan(0);
|
|
52
|
+
});
|
|
53
|
+
});
|
|
54
|
+
describe('extractTerms function', () => {
|
|
55
|
+
it.skipIf(shouldSkip)('should extract terms with frequencies', async () => {
|
|
56
|
+
const code = 'user user authenticate user';
|
|
57
|
+
const terms = await extractTerms(code);
|
|
58
|
+
expect(terms.size).toBeGreaterThan(0);
|
|
59
|
+
// Should count frequencies
|
|
60
|
+
const userFreq = terms.get('user');
|
|
61
|
+
expect(userFreq).toBeGreaterThanOrEqual(1);
|
|
62
|
+
});
|
|
63
|
+
it.skipIf(shouldSkip)('should handle code with duplicates', async () => {
|
|
64
|
+
const code = `
|
|
65
|
+
function getUserData(userId) {
|
|
66
|
+
const user = findUser(userId);
|
|
67
|
+
return user;
|
|
68
|
+
}
|
|
69
|
+
`;
|
|
70
|
+
const terms = await extractTerms(code);
|
|
71
|
+
expect(terms.size).toBeGreaterThan(0);
|
|
72
|
+
});
|
|
73
|
+
});
|
|
74
|
+
describe('CodeTokenizer class', () => {
|
|
75
|
+
let tokenizer;
|
|
76
|
+
beforeAll(async () => {
|
|
77
|
+
if (shouldSkip)
|
|
78
|
+
return;
|
|
79
|
+
tokenizer = new CodeTokenizer();
|
|
80
|
+
await tokenizer.initialize();
|
|
81
|
+
}, 30000);
|
|
82
|
+
it.skipIf(shouldSkip)('should initialize successfully', () => {
|
|
83
|
+
expect(tokenizer.isReady()).toBe(true);
|
|
84
|
+
});
|
|
85
|
+
it.skipIf(shouldSkip)('should tokenize code', async () => {
|
|
86
|
+
const tokens = await tokenizer.tokenize('function test() {}');
|
|
87
|
+
expect(tokens.length).toBeGreaterThan(0);
|
|
88
|
+
expect(tokens.some((t) => t.includes('function') || t.includes('test'))).toBe(true);
|
|
89
|
+
});
|
|
90
|
+
it.skipIf(shouldSkip)('should extract terms', async () => {
|
|
91
|
+
const terms = await tokenizer.extractTerms('const x = 1; const y = 2;');
|
|
92
|
+
expect(terms.size).toBeGreaterThan(0);
|
|
93
|
+
});
|
|
94
|
+
});
|
|
95
|
+
});
|
|
96
|
+
//# sourceMappingURL=code-tokenizer.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"code-tokenizer.test.js","sourceRoot":"","sources":["../src/code-tokenizer.test.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,QAAQ,CAAA;AACxD,OAAO,EAAE,aAAa,EAAE,YAAY,EAAE,mBAAmB,EAAE,QAAQ,EAAE,MAAM,qBAAqB,CAAA;AAEhG,4CAA4C;AAC5C,MAAM,UAAU,GAAG,OAAO,CAAC,GAAG,CAAC,EAAE,KAAK,MAAM,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,OAAO,CAAA;AAEpE,QAAQ,CAAC,sBAAsB,EAAE,GAAG,EAAE;IACrC,SAAS,CAAC,KAAK,IAAI,EAAE;QACpB,IAAI,UAAU;YAAE,OAAM;QACtB,0CAA0C;QAC1C,MAAM,mBAAmB,EAAE,CAAA;IAC5B,CAAC,EAAE,KAAK,CAAC,CAAA,CAAC,2BAA2B;IAErC,QAAQ,CAAC,mBAAmB,EAAE,GAAG,EAAE;QAClC,EAAE,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,6BAA6B,EAAE,KAAK,IAAI,EAAE;YAC/D,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,qBAAqB,CAAC,CAAA;YAEpD,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAA;YACxC,8CAA8C;YAC9C,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QAC/E,CAAC,CAAC,CAAA;QAEF,EAAE,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,8BAA8B,EAAE,KAAK,IAAI,EAAE;YAChE,MAAM,IAAI,GAAG;;;;;;IAMZ,CAAA;YAED,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,IAAI,CAAC,CAAA;YAEnC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,EAAE,CAAC,CAAA;YACzC,mCAAmC;YACnC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;YACjE,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;YACzD,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QAC9D,CAAC,CAAC,CAAA;QAEF,EAAE,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,2BAA2B,EAAE,KAAK,IAAI,EAAE;YAC7D,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,EAAE,CAAC,CAAA;YACjC,MAAM,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QAC/B,CAAC,CAAC,CAAA;QAEF,EAAE,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,0BAA0B,EAAE,KAAK,IAAI,EAAE;YAC5D,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,WAAW,CAAC,CAAA;YAC1C,MAAM,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QAC/B,CAAC,CAAC,CAAA;QAEF,EAAE,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,yBAAyB,EAAE,KAAK,IAAI,EAAE;YAC3D,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,aAAa,CAAC,CAAA;YAE5C,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAA;QACzC,CAAC,CAAC,CAAA;QAEF,EAAE,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,0BAA0B,EAAE,KAAK,IAAI,EAAE;YAC5D,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,kBAAkB,CAAC,CAAA;YAEjD,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAA;QACzC,CAAC,CAAC,CAAA;IACH,CAAC,CAAC,CAAA;IAEF,QAAQ,CAAC,uBAAuB,EAAE,GAAG,EAAE;QACtC,EAAE,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,uCAAuC,EAAE,KAAK,IAAI,EAAE;YACzE,MAAM,IAAI,GAAG,6BAA6B,CAAA;YAC1C,MAAM,KAAK,GAAG,MAAM,YAAY,CAAC,IAAI,CAAC,CAAA;YAEtC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAA;YACrC,2BAA2B;YAC3B,MAAM,QAAQ,GAAG,KAAK,CAAC,GAAG,CAAC,MAAM,CAAC,CAAA;YAClC,MAAM,CAAC,QAAQ,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAA;QAC3C,CAAC,CAAC,CAAA;QAEF,EAAE,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,oCAAoC,EAAE,KAAK,IAAI,EAAE;YACtE,MAAM,IAAI,GAAG;;;;;IAKZ,CAAA;YAED,MAAM,KAAK,GAAG,MAAM,YAAY,CAAC,IAAI,CAAC,CAAA;YAEtC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAA;QACtC,CAAC,CAAC,CAAA;IACH,CAAC,CAAC,CAAA;IAEF,QAAQ,CAAC,qBAAqB,EAAE,GAAG,EAAE;QACpC,IAAI,SAAwB,CAAA;QAE5B,SAAS,CAAC,KAAK,IAAI,EAAE;YACpB,IAAI,UAAU;gBAAE,OAAM;YACtB,SAAS,GAAG,IAAI,aAAa,EAAE,CAAA;YAC/B,MAAM,SAAS,CAAC,UAAU,EAAE,CAAA;QAC7B,CAAC,EAAE,KAAK,CAAC,CAAA;QAET,EAAE,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,gCAAgC,EAAE,GAAG,EAAE;YAC5D,MAAM,CAAC,SAAS,CAAC,OAAO,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QACvC,CAAC,CAAC,CAAA;QAEF,EAAE,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,sBAAsB,EAAE,KAAK,IAAI,EAAE;YACxD,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,QAAQ,CAAC,oBAAoB,CAAC,CAAA;YAE7D,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAA;YACxC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QACpF,CAAC,CAAC,CAAA;QAEF,EAAE,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,sBAAsB,EAAE,KAAK,IAAI,EAAE;YACxD,MAAM,KAAK,GAAG,MAAM,SAAS,CAAC,YAAY,CAAC,2BAA2B,CAAC,CAAA;YAEvE,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAA;QACtC,CAAC,CAAC,CAAA;IACH,CAAC,CAAC,CAAA;AACH,CAAC,CAAC,CAAA"}
|