@matperez/coderag 0.1.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. package/README.md +154 -0
  2. package/dist/.tsbuildinfo +1 -0
  3. package/dist/ast-chunking.d.ts +40 -0
  4. package/dist/ast-chunking.d.ts.map +1 -0
  5. package/dist/ast-chunking.js +88 -0
  6. package/dist/ast-chunking.js.map +1 -0
  7. package/dist/ast-chunking.test.d.ts +5 -0
  8. package/dist/ast-chunking.test.d.ts.map +1 -0
  9. package/dist/ast-chunking.test.js +173 -0
  10. package/dist/ast-chunking.test.js.map +1 -0
  11. package/dist/code-tokenizer.d.ts +62 -0
  12. package/dist/code-tokenizer.d.ts.map +1 -0
  13. package/dist/code-tokenizer.js +129 -0
  14. package/dist/code-tokenizer.js.map +1 -0
  15. package/dist/code-tokenizer.test.d.ts +5 -0
  16. package/dist/code-tokenizer.test.d.ts.map +1 -0
  17. package/dist/code-tokenizer.test.js +96 -0
  18. package/dist/code-tokenizer.test.js.map +1 -0
  19. package/dist/db/client-pg.d.ts +16 -0
  20. package/dist/db/client-pg.d.ts.map +1 -0
  21. package/dist/db/client-pg.js +38 -0
  22. package/dist/db/client-pg.js.map +1 -0
  23. package/dist/db/client.d.ts +36 -0
  24. package/dist/db/client.d.ts.map +1 -0
  25. package/dist/db/client.js +81 -0
  26. package/dist/db/client.js.map +1 -0
  27. package/dist/db/migrations-pg.d.ts +6 -0
  28. package/dist/db/migrations-pg.d.ts.map +1 -0
  29. package/dist/db/migrations-pg.js +88 -0
  30. package/dist/db/migrations-pg.js.map +1 -0
  31. package/dist/db/migrations.d.ts +9 -0
  32. package/dist/db/migrations.d.ts.map +1 -0
  33. package/dist/db/migrations.js +164 -0
  34. package/dist/db/migrations.js.map +1 -0
  35. package/dist/db/schema-pg.d.ts +611 -0
  36. package/dist/db/schema-pg.d.ts.map +1 -0
  37. package/dist/db/schema-pg.js +66 -0
  38. package/dist/db/schema-pg.js.map +1 -0
  39. package/dist/db/schema.d.ts +630 -0
  40. package/dist/db/schema.d.ts.map +1 -0
  41. package/dist/db/schema.js +85 -0
  42. package/dist/db/schema.js.map +1 -0
  43. package/dist/embeddings.d.ts +92 -0
  44. package/dist/embeddings.d.ts.map +1 -0
  45. package/dist/embeddings.js +275 -0
  46. package/dist/embeddings.js.map +1 -0
  47. package/dist/embeddings.test.d.ts +5 -0
  48. package/dist/embeddings.test.d.ts.map +1 -0
  49. package/dist/embeddings.test.js +255 -0
  50. package/dist/embeddings.test.js.map +1 -0
  51. package/dist/hybrid-search.d.ts +47 -0
  52. package/dist/hybrid-search.d.ts.map +1 -0
  53. package/dist/hybrid-search.js +215 -0
  54. package/dist/hybrid-search.js.map +1 -0
  55. package/dist/hybrid-search.test.d.ts +5 -0
  56. package/dist/hybrid-search.test.d.ts.map +1 -0
  57. package/dist/hybrid-search.test.js +252 -0
  58. package/dist/hybrid-search.test.js.map +1 -0
  59. package/dist/incremental-tfidf.d.ts +77 -0
  60. package/dist/incremental-tfidf.d.ts.map +1 -0
  61. package/dist/incremental-tfidf.js +248 -0
  62. package/dist/incremental-tfidf.js.map +1 -0
  63. package/dist/incremental-tfidf.test.d.ts +5 -0
  64. package/dist/incremental-tfidf.test.d.ts.map +1 -0
  65. package/dist/incremental-tfidf.test.js +276 -0
  66. package/dist/incremental-tfidf.test.js.map +1 -0
  67. package/dist/index.d.ts +18 -0
  68. package/dist/index.d.ts.map +1 -0
  69. package/dist/index.js +19 -0
  70. package/dist/index.js.map +1 -0
  71. package/dist/indexer.d.ts +205 -0
  72. package/dist/indexer.d.ts.map +1 -0
  73. package/dist/indexer.js +1331 -0
  74. package/dist/indexer.js.map +1 -0
  75. package/dist/indexer.test.d.ts +12 -0
  76. package/dist/indexer.test.d.ts.map +1 -0
  77. package/dist/indexer.test.js +471 -0
  78. package/dist/indexer.test.js.map +1 -0
  79. package/dist/language-config.d.ts +54 -0
  80. package/dist/language-config.d.ts.map +1 -0
  81. package/dist/language-config.js +75 -0
  82. package/dist/language-config.js.map +1 -0
  83. package/dist/search-cache.d.ts +63 -0
  84. package/dist/search-cache.d.ts.map +1 -0
  85. package/dist/search-cache.js +118 -0
  86. package/dist/search-cache.js.map +1 -0
  87. package/dist/search-cache.test.d.ts +5 -0
  88. package/dist/search-cache.test.d.ts.map +1 -0
  89. package/dist/search-cache.test.js +194 -0
  90. package/dist/search-cache.test.js.map +1 -0
  91. package/dist/storage-factory.d.ts +11 -0
  92. package/dist/storage-factory.d.ts.map +1 -0
  93. package/dist/storage-factory.js +17 -0
  94. package/dist/storage-factory.js.map +1 -0
  95. package/dist/storage-persistent-pg.d.ts +75 -0
  96. package/dist/storage-persistent-pg.d.ts.map +1 -0
  97. package/dist/storage-persistent-pg.js +579 -0
  98. package/dist/storage-persistent-pg.js.map +1 -0
  99. package/dist/storage-persistent-pg.test.d.ts +7 -0
  100. package/dist/storage-persistent-pg.test.d.ts.map +1 -0
  101. package/dist/storage-persistent-pg.test.js +90 -0
  102. package/dist/storage-persistent-pg.test.js.map +1 -0
  103. package/dist/storage-persistent-types.d.ts +110 -0
  104. package/dist/storage-persistent-types.d.ts.map +1 -0
  105. package/dist/storage-persistent-types.js +5 -0
  106. package/dist/storage-persistent-types.js.map +1 -0
  107. package/dist/storage-persistent.d.ts +231 -0
  108. package/dist/storage-persistent.d.ts.map +1 -0
  109. package/dist/storage-persistent.js +897 -0
  110. package/dist/storage-persistent.js.map +1 -0
  111. package/dist/storage-persistent.test.d.ts +5 -0
  112. package/dist/storage-persistent.test.d.ts.map +1 -0
  113. package/dist/storage-persistent.test.js +325 -0
  114. package/dist/storage-persistent.test.js.map +1 -0
  115. package/dist/storage.d.ts +63 -0
  116. package/dist/storage.d.ts.map +1 -0
  117. package/dist/storage.js +67 -0
  118. package/dist/storage.js.map +1 -0
  119. package/dist/storage.test.d.ts +5 -0
  120. package/dist/storage.test.d.ts.map +1 -0
  121. package/dist/storage.test.js +157 -0
  122. package/dist/storage.test.js.map +1 -0
  123. package/dist/tfidf.d.ts +97 -0
  124. package/dist/tfidf.d.ts.map +1 -0
  125. package/dist/tfidf.js +308 -0
  126. package/dist/tfidf.js.map +1 -0
  127. package/dist/tfidf.test.d.ts +5 -0
  128. package/dist/tfidf.test.d.ts.map +1 -0
  129. package/dist/tfidf.test.js +181 -0
  130. package/dist/tfidf.test.js.map +1 -0
  131. package/dist/utils.d.ts +61 -0
  132. package/dist/utils.d.ts.map +1 -0
  133. package/dist/utils.js +264 -0
  134. package/dist/utils.js.map +1 -0
  135. package/dist/utils.test.d.ts +5 -0
  136. package/dist/utils.test.d.ts.map +1 -0
  137. package/dist/utils.test.js +94 -0
  138. package/dist/utils.test.js.map +1 -0
  139. package/dist/vector-storage.d.ts +120 -0
  140. package/dist/vector-storage.d.ts.map +1 -0
  141. package/dist/vector-storage.js +264 -0
  142. package/dist/vector-storage.js.map +1 -0
  143. package/dist/vector-storage.test.d.ts +5 -0
  144. package/dist/vector-storage.test.d.ts.map +1 -0
  145. package/dist/vector-storage.test.js +345 -0
  146. package/dist/vector-storage.test.js.map +1 -0
  147. package/package.json +85 -0
@@ -0,0 +1,40 @@
1
+ /**
2
+ * AST-Based Code Chunking using code-chunk (tree-sitter)
3
+ *
4
+ * Splits code at semantic boundaries (functions, classes, etc.)
5
+ * Supports TypeScript, JavaScript, Python, Rust, Go, Java.
6
+ * Other file types use character-based fallback.
7
+ */
8
+ /**
9
+ * AST-based chunking options
10
+ */
11
+ export interface ASTChunkOptions {
12
+ readonly maxChunkSize?: number;
13
+ readonly minChunkSize?: number;
14
+ }
15
+ /**
16
+ * Chunk result with metadata
17
+ */
18
+ export interface ChunkResult {
19
+ readonly content: string;
20
+ readonly type: string;
21
+ readonly startLine: number;
22
+ readonly endLine: number;
23
+ readonly metadata: Record<string, unknown>;
24
+ }
25
+ /**
26
+ * Chunk code using AST analysis (via code-chunk / tree-sitter)
27
+ *
28
+ * Supported extensions: .ts, .tsx, .js, .jsx, .mjs, .cjs, .py, .pyi, .rs, .go, .java.
29
+ * Other files are chunked with character-based fallback.
30
+ */
31
+ export declare function chunkCodeByAST(code: string, filePath: string, options?: ASTChunkOptions): Promise<readonly ChunkResult[]>;
32
+ /**
33
+ * Simple wrapper for backward compatibility
34
+ */
35
+ export declare function chunkCodeByASTSimple(code: string, filePath: string, options?: ASTChunkOptions): Promise<readonly string[]>;
36
+ /**
37
+ * Get list of languages supported for AST chunking (code-chunk)
38
+ */
39
+ export declare function getSupportedLanguages(): string[];
40
+ //# sourceMappingURL=ast-chunking.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ast-chunking.d.ts","sourceRoot":"","sources":["../src/ast-chunking.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAUH;;GAEG;AACH,MAAM,WAAW,eAAe;IAC/B,QAAQ,CAAC,YAAY,CAAC,EAAE,MAAM,CAAA;IAC9B,QAAQ,CAAC,YAAY,CAAC,EAAE,MAAM,CAAA;CAC9B;AAED;;GAEG;AACH,MAAM,WAAW,WAAW;IAC3B,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAA;IACxB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAA;IACrB,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAA;IAC1B,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAA;IACxB,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;CAC1C;AA8CD;;;;;GAKG;AACH,wBAAsB,cAAc,CACnC,IAAI,EAAE,MAAM,EACZ,QAAQ,EAAE,MAAM,EAChB,OAAO,GAAE,eAAoB,GAC3B,OAAO,CAAC,SAAS,WAAW,EAAE,CAAC,CAiBjC;AAED;;GAEG;AACH,wBAAsB,oBAAoB,CACzC,IAAI,EAAE,MAAM,EACZ,QAAQ,EAAE,MAAM,EAChB,OAAO,GAAE,eAAoB,GAC3B,OAAO,CAAC,SAAS,MAAM,EAAE,CAAC,CAG5B;AAED;;GAEG;AACH,wBAAgB,qBAAqB,IAAI,MAAM,EAAE,CAEhD"}
@@ -0,0 +1,88 @@
1
+ /**
2
+ * AST-Based Code Chunking using code-chunk (tree-sitter)
3
+ *
4
+ * Splits code at semantic boundaries (functions, classes, etc.)
5
+ * Supports TypeScript, JavaScript, Python, Rust, Go, Java.
6
+ * Other file types use character-based fallback.
7
+ */
8
+ import { chunk as codeChunk } from 'code-chunk';
9
+ import { chunkText } from './embeddings.js';
10
+ /** Languages supported by code-chunk for AST chunking */
11
+ const CODE_CHUNK_SUPPORTED_LANGUAGES = [
12
+ 'typescript',
13
+ 'javascript',
14
+ 'python',
15
+ 'rust',
16
+ 'go',
17
+ 'java',
18
+ ];
19
+ /**
20
+ * Create fallback chunks using character-based splitting
21
+ */
22
+ function createFallbackChunks(code, maxChunkSize) {
23
+ const chunks = chunkText(code, { maxChunkSize });
24
+ return chunks.map((content, i) => ({
25
+ content,
26
+ type: 'text',
27
+ startLine: 0,
28
+ endLine: 0,
29
+ metadata: { fallback: true, index: i },
30
+ }));
31
+ }
32
+ function mapCodeChunkToResult(c) {
33
+ const firstEntity = c.context.entities[0];
34
+ const type = firstEntity?.type ?? 'chunk';
35
+ return {
36
+ content: c.text,
37
+ type,
38
+ startLine: c.lineRange.start + 1,
39
+ endLine: c.lineRange.end + 1,
40
+ metadata: {
41
+ fallback: false,
42
+ scope: c.context.scope.map((s) => ({ name: s.name, type: s.type })),
43
+ entities: c.context.entities.map((e) => ({
44
+ name: e.name,
45
+ type: e.type,
46
+ signature: e.signature,
47
+ })),
48
+ },
49
+ };
50
+ }
51
+ /**
52
+ * Chunk code using AST analysis (via code-chunk / tree-sitter)
53
+ *
54
+ * Supported extensions: .ts, .tsx, .js, .jsx, .mjs, .cjs, .py, .pyi, .rs, .go, .java.
55
+ * Other files are chunked with character-based fallback.
56
+ */
57
+ export async function chunkCodeByAST(code, filePath, options = {}) {
58
+ const maxChunkSize = options.maxChunkSize ?? 1000;
59
+ try {
60
+ const chunks = await codeChunk(filePath, code, {
61
+ maxChunkSize,
62
+ contextMode: 'full',
63
+ });
64
+ if (chunks.length === 0 && code.trim().length > 0) {
65
+ return createFallbackChunks(code, maxChunkSize);
66
+ }
67
+ return chunks.map(mapCodeChunkToResult);
68
+ }
69
+ catch (error) {
70
+ // UnsupportedLanguageError, ChunkingError, or any other: fall back to character chunking
71
+ console.error('[WARN] AST chunking failed, falling back to character chunking:', error instanceof Error ? error.message : String(error));
72
+ return createFallbackChunks(code, maxChunkSize);
73
+ }
74
+ }
75
+ /**
76
+ * Simple wrapper for backward compatibility
77
+ */
78
+ export async function chunkCodeByASTSimple(code, filePath, options = {}) {
79
+ const chunks = await chunkCodeByAST(code, filePath, options);
80
+ return chunks.map((chunk) => chunk.content);
81
+ }
82
+ /**
83
+ * Get list of languages supported for AST chunking (code-chunk)
84
+ */
85
+ export function getSupportedLanguages() {
86
+ return [...CODE_CHUNK_SUPPORTED_LANGUAGES];
87
+ }
88
+ //# sourceMappingURL=ast-chunking.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ast-chunking.js","sourceRoot":"","sources":["../src/ast-chunking.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,EAAE,KAAK,IAAI,SAAS,EAAE,MAAM,YAAY,CAAA;AAE/C,OAAO,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAA;AAyB3C,yDAAyD;AACzD,MAAM,8BAA8B,GAAG;IACtC,YAAY;IACZ,YAAY;IACZ,QAAQ;IACR,MAAM;IACN,IAAI;IACJ,MAAM;CACG,CAAA;AAEV;;GAEG;AACH,SAAS,oBAAoB,CAAC,IAAY,EAAE,YAAoB;IAC/D,MAAM,MAAM,GAAG,SAAS,CAAC,IAAI,EAAE,EAAE,YAAY,EAAE,CAAC,CAAA;IAChD,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC,OAAO,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC;QAClC,OAAO;QACP,IAAI,EAAE,MAAM;QACZ,SAAS,EAAE,CAAC;QACZ,OAAO,EAAE,CAAC;QACV,QAAQ,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC,EAAE;KACtC,CAAC,CAAC,CAAA;AACJ,CAAC;AAED,SAAS,oBAAoB,CAAC,CAAiB;IAC9C,MAAM,WAAW,GAAG,CAAC,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAA;IACzC,MAAM,IAAI,GAAG,WAAW,EAAE,IAAI,IAAI,OAAO,CAAA;IACzC,OAAO;QACN,OAAO,EAAE,CAAC,CAAC,IAAI;QACf,IAAI;QACJ,SAAS,EAAE,CAAC,CAAC,SAAS,CAAC,KAAK,GAAG,CAAC;QAChC,OAAO,EAAE,CAAC,CAAC,SAAS,CAAC,GAAG,GAAG,CAAC;QAC5B,QAAQ,EAAE;YACT,QAAQ,EAAE,KAAK;YACf,KAAK,EAAE,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;YACnE,QAAQ,EAAE,CAAC,CAAC,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBACxC,IAAI,EAAE,CAAC,CAAC,IAAI;gBACZ,IAAI,EAAE,CAAC,CAAC,IAAI;gBACZ,SAAS,EAAE,CAAC,CAAC,SAAS;aACtB,CAAC,CAAC;SACH;KACD,CAAA;AACF,CAAC;AAED;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CACnC,IAAY,EACZ,QAAgB,EAChB,UAA2B,EAAE;IAE7B,MAAM,YAAY,GAAG,OAAO,CAAC,YAAY,IAAI,IAAI,CAAA;IAEjD,IAAI,CAAC;QACJ,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,QAAQ,EAAE,IAAI,EAAE;YAC9C,YAAY;YACZ,WAAW,EAAE,MAAM;SACnB,CAAC,CAAA;QACF,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACnD,OAAO,oBAAoB,CAAC,IAAI,EAAE,YAAY,CAAC,CAAA;QAChD,CAAC;QACD,OAAO,MAAM,CAAC,GAAG,CAAC,oBAAoB,CAAC,CAAA;IACxC,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QAChB,yFAAyF;QACzF,OAAO,CAAC,KAAK,CAAC,iEAAiE,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAA;QACxI,OAAO,oBAAoB,CAAC,IAAI,EAAE,YAAY,CAAC,CAAA;IAChD,CAAC;AACF,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACzC,IAAY,EACZ,QAAgB,EAChB,UAA2B,EAAE;IAE7B,MAAM,MAAM,GAAG,MAAM,cAAc,CAAC,IAAI,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAA;IAC5D,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,OAAO,CAAC,CAAA;AAC5C,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,qBAAqB;IACpC,OAAO,CAAC,GAAG,8BAA8B,CAAC,CAAA;AAC3C,CAAC"}
@@ -0,0 +1,5 @@
1
+ /**
2
+ * Tests for AST-based code chunking (code-chunk for supported languages, fallback for others)
3
+ */
4
+ export {};
5
+ //# sourceMappingURL=ast-chunking.test.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ast-chunking.test.d.ts","sourceRoot":"","sources":["../src/ast-chunking.test.ts"],"names":[],"mappings":"AAAA;;GAEG"}
@@ -0,0 +1,173 @@
1
+ /**
2
+ * Tests for AST-based code chunking (code-chunk for supported languages, fallback for others)
3
+ */
4
+ import { describe, expect, it } from 'vitest';
5
+ import { chunkCodeByAST, chunkCodeByASTSimple, getSupportedLanguages, } from './ast-chunking.js';
6
+ describe('AST-based chunking', () => {
7
+ describe('getSupportedLanguages', () => {
8
+ it('returns code-chunk supported languages', () => {
9
+ const langs = getSupportedLanguages();
10
+ expect(langs).toContain('typescript');
11
+ expect(langs).toContain('javascript');
12
+ expect(langs).toContain('python');
13
+ expect(langs).toContain('rust');
14
+ expect(langs).toContain('go');
15
+ expect(langs).toContain('java');
16
+ expect(langs.length).toBe(6);
17
+ });
18
+ });
19
+ describe('Markdown (fallback)', () => {
20
+ it('uses character fallback for .md (no AST)', async () => {
21
+ const markdown = `# Introduction
22
+
23
+ This is the introduction paragraph.
24
+
25
+ ## Section 1
26
+
27
+ Some content here.
28
+ `;
29
+ const chunks = await chunkCodeByAST(markdown, 'test.md');
30
+ expect(chunks.length).toBeGreaterThan(0);
31
+ // Fallback: type 'text', metadata.fallback true
32
+ chunks.forEach((c) => {
33
+ expect(c.type).toBe('text');
34
+ expect(c.metadata.fallback).toBe(true);
35
+ });
36
+ });
37
+ it('fallback preserves content and gives line 0 for fallback chunks', async () => {
38
+ const markdown = `# Title
39
+
40
+ Paragraph 1
41
+
42
+ Paragraph 2
43
+ `;
44
+ const chunks = await chunkCodeByAST(markdown, 'test.md');
45
+ chunks.forEach((chunk) => {
46
+ expect(chunk.content).toBeTruthy();
47
+ expect(chunk.startLine).toBeGreaterThanOrEqual(0);
48
+ expect(chunk.endLine).toBeGreaterThanOrEqual(0);
49
+ });
50
+ expect(chunks.some((c) => c.content.includes('Title'))).toBe(true);
51
+ });
52
+ });
53
+ describe('JavaScript (code-chunk)', () => {
54
+ it('should split JavaScript by functions', async () => {
55
+ const code = `function foo() {
56
+ return 1;
57
+ }
58
+
59
+ function bar() {
60
+ return 2;
61
+ }
62
+
63
+ function baz() {
64
+ return 3;
65
+ }
66
+ `;
67
+ const chunks = await chunkCodeByASTSimple(code, 'test.js');
68
+ expect(chunks.length).toBeGreaterThanOrEqual(1);
69
+ expect(chunks.some((c) => c.includes('function foo'))).toBe(true);
70
+ expect(chunks.some((c) => c.includes('function bar'))).toBe(true);
71
+ expect(chunks.some((c) => c.includes('function baz'))).toBe(true);
72
+ });
73
+ it('should handle classes', async () => {
74
+ const code = `class MyClass {
75
+ constructor() {
76
+ this.value = 0;
77
+ }
78
+
79
+ increment() {
80
+ this.value++;
81
+ }
82
+ }
83
+ `;
84
+ const chunks = await chunkCodeByASTSimple(code, 'test.js');
85
+ expect(chunks.length).toBeGreaterThan(0);
86
+ expect(chunks.some((c) => c.includes('class MyClass'))).toBe(true);
87
+ });
88
+ it('should have meaningful line ranges for JS chunks', async () => {
89
+ const code = `function first() {
90
+ return 1;
91
+ }
92
+
93
+ function second() {
94
+ return 2;
95
+ }
96
+ `;
97
+ const chunks = await chunkCodeByAST(code, 'test.js');
98
+ expect(chunks.length).toBeGreaterThan(0);
99
+ chunks.forEach((chunk) => {
100
+ expect(chunk.startLine).toBeGreaterThanOrEqual(1);
101
+ expect(chunk.endLine).toBeGreaterThanOrEqual(chunk.startLine);
102
+ expect(chunk.content).toBeTruthy();
103
+ expect(chunk.metadata.fallback).toBe(false);
104
+ });
105
+ });
106
+ });
107
+ describe('Size constraints', () => {
108
+ it('should respect maxChunkSize', async () => {
109
+ const largeCode = `function veryLargeFunction() {
110
+ ${'return 1;\n'.repeat(100)}
111
+ }
112
+ `;
113
+ const chunks = await chunkCodeByASTSimple(largeCode, 'test.js', {
114
+ maxChunkSize: 500,
115
+ });
116
+ // code-chunk may exceed slightly; ensure we got multiple chunks and none is huge
117
+ expect(chunks.length).toBeGreaterThan(1);
118
+ chunks.forEach((chunk) => {
119
+ expect(chunk.length).toBeLessThanOrEqual(800);
120
+ });
121
+ });
122
+ });
123
+ describe('Fallback behavior', () => {
124
+ it('should fallback to character chunking for unknown languages', async () => {
125
+ const code = 'a'.repeat(2000);
126
+ const chunks = await chunkCodeByAST(code, 'test.unknown');
127
+ expect(chunks.length).toBeGreaterThan(1);
128
+ expect(chunks[0].metadata.fallback).toBe(true);
129
+ });
130
+ it('should return chunks when AST parsing fails (fallback or resilient parse)', async () => {
131
+ const invalidCode = 'function { syntax error }';
132
+ const chunks = await chunkCodeByAST(invalidCode, 'test.js');
133
+ // code-chunk may either throw (then we fallback) or return partial chunks
134
+ expect(chunks.length).toBeGreaterThan(0);
135
+ expect(chunks[0].content).toBeTruthy();
136
+ });
137
+ it('should handle empty input', async () => {
138
+ const chunks = await chunkCodeByAST('', 'test.js');
139
+ expect(chunks.length).toBe(0);
140
+ });
141
+ });
142
+ describe('Edge cases', () => {
143
+ it('should use fallback for HTML (unsupported by code-chunk)', async () => {
144
+ const html = `<div>
145
+ <p>Paragraph 1</p>
146
+ </div>
147
+ `;
148
+ const chunks = await chunkCodeByAST(html, 'test.html');
149
+ expect(chunks.length).toBeGreaterThan(0);
150
+ chunks.forEach((c) => expect(c.metadata.fallback).toBe(true));
151
+ });
152
+ it('should handle single-line content', async () => {
153
+ const code = 'const x = 42;';
154
+ const chunks = await chunkCodeByAST(code, 'test.js');
155
+ expect(chunks.length).toBeGreaterThan(0);
156
+ expect(chunks[0].content).toBeTruthy();
157
+ });
158
+ });
159
+ describe('Performance', () => {
160
+ it('should handle large files efficiently', async () => {
161
+ const sections = Array.from({ length: 100 }, (_, i) => {
162
+ return `## Section ${i + 1}\n\nContent for section ${i + 1}.\n`;
163
+ }).join('\n');
164
+ const markdown = `# Large Document\n\n${sections}`;
165
+ const start = Date.now();
166
+ const chunks = await chunkCodeByAST(markdown, 'test.md');
167
+ const duration = Date.now() - start;
168
+ expect(chunks.length).toBeGreaterThan(1);
169
+ expect(duration).toBeLessThan(5000);
170
+ });
171
+ });
172
+ });
173
+ //# sourceMappingURL=ast-chunking.test.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ast-chunking.test.js","sourceRoot":"","sources":["../src/ast-chunking.test.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,QAAQ,CAAA;AAC7C,OAAO,EACN,cAAc,EACd,oBAAoB,EACpB,qBAAqB,GACrB,MAAM,mBAAmB,CAAA;AAE1B,QAAQ,CAAC,oBAAoB,EAAE,GAAG,EAAE;IACnC,QAAQ,CAAC,uBAAuB,EAAE,GAAG,EAAE;QACtC,EAAE,CAAC,wCAAwC,EAAE,GAAG,EAAE;YACjD,MAAM,KAAK,GAAG,qBAAqB,EAAE,CAAA;YACrC,MAAM,CAAC,KAAK,CAAC,CAAC,SAAS,CAAC,YAAY,CAAC,CAAA;YACrC,MAAM,CAAC,KAAK,CAAC,CAAC,SAAS,CAAC,YAAY,CAAC,CAAA;YACrC,MAAM,CAAC,KAAK,CAAC,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAA;YACjC,MAAM,CAAC,KAAK,CAAC,CAAC,SAAS,CAAC,MAAM,CAAC,CAAA;YAC/B,MAAM,CAAC,KAAK,CAAC,CAAC,SAAS,CAAC,IAAI,CAAC,CAAA;YAC7B,MAAM,CAAC,KAAK,CAAC,CAAC,SAAS,CAAC,MAAM,CAAC,CAAA;YAC/B,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QAC7B,CAAC,CAAC,CAAA;IACH,CAAC,CAAC,CAAA;IAEF,QAAQ,CAAC,qBAAqB,EAAE,GAAG,EAAE;QACpC,EAAE,CAAC,0CAA0C,EAAE,KAAK,IAAI,EAAE;YACzD,MAAM,QAAQ,GAAG;;;;;;;CAOnB,CAAA;YAEE,MAAM,MAAM,GAAG,MAAM,cAAc,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAA;YAExD,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAA;YACxC,gDAAgD;YAChD,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE;gBACpB,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;gBAC3B,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;YACvC,CAAC,CAAC,CAAA;QACH,CAAC,CAAC,CAAA;QAEF,EAAE,CAAC,iEAAiE,EAAE,KAAK,IAAI,EAAE;YAChF,MAAM,QAAQ,GAAG;;;;;CAKnB,CAAA;YAEE,MAAM,MAAM,GAAG,MAAM,cAAc,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAA;YAExD,MAAM,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,EAAE;gBACxB,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,UAAU,EAAE,CAAA;gBAClC,MAAM,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAA;gBACjD,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAA;YAChD,CAAC,CAAC,CAAA;YACF,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QACnE,CAAC,CAAC,CAAA;IACH,CAAC,CAAC,CAAA;IAEF,QAAQ,CAAC,yBAAyB,EAAE,GAAG,EAAE;QACxC,EAAE,CAAC,sCAAsC,EAAE,KAAK,IAAI,EAAE;YACrD,MAAM,IAAI,GAAG;;;;;;;;;;;CAWf,CAAA;YAEE,MAAM,MAAM,GAAG,MAAM,oBAAoB,CAAC,IAAI,EAAE,SAAS,CAAC,CAAA;YAE1D,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAA;YAC/C,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;YACjE,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;YACjE,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QAClE,CAAC,CAAC,CAAA;QAEF,EAAE,CAAC,uBAAuB,EAAE,KAAK,IAAI,EAAE;YACtC,MAAM,IAAI,GAAG;;;;;;;;;CASf,CAAA;YAEE,MAAM,MAAM,GAAG,MAAM,oBAAoB,CAAC,IAAI,EAAE,SAAS,CAAC,CAAA;YAE1D,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAA;YACxC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QACnE,CAAC,CAAC,CAAA;QAEF,EAAE,CAAC,kDAAkD,EAAE,KAAK,IAAI,EAAE;YACjE,MAAM,IAAI,GAAG;;;;;;;CAOf,CAAA;YAEE,MAAM,MAAM,GAAG,MAAM,cAAc,CAAC,IAAI,EAAE,SAAS,CAAC,CAAA;YAEpD,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAA;YACxC,MAAM,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,EAAE;gBACxB,MAAM,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAA;gBACjD,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,sBAAsB,CAAC,KAAK,CAAC,SAAS,CAAC,CAAA;gBAC7D,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,UAAU,EAAE,CAAA;gBAClC,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;YAC5C,CAAC,CAAC,CAAA;QACH,CAAC,CAAC,CAAA;IACH,CAAC,CAAC,CAAA;IAEF,QAAQ,CAAC,kBAAkB,EAAE,GAAG,EAAE;QACjC,EAAE,CAAC,6BAA6B,EAAE,KAAK,IAAI,EAAE;YAC5C,MAAM,SAAS,GAAG;IACjB,aAAa,CAAC,MAAM,CAAC,GAAG,CAAC;;CAE5B,CAAA;YAEE,MAAM,MAAM,GAAG,MAAM,oBAAoB,CAAC,SAAS,EAAE,SAAS,EAAE;gBAC/D,YAAY,EAAE,GAAG;aACjB,CAAC,CAAA;YAEF,iFAAiF;YACjF,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAA;YACxC,MAAM,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,EAAE;gBACxB,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,mBAAmB,CAAC,GAAG,CAAC,CAAA;YAC9C,CAAC,CAAC,CAAA;QACH,CAAC,CAAC,CAAA;IACH,CAAC,CAAC,CAAA;IAEF,QAAQ,CAAC,mBAAmB,EAAE,GAAG,EAAE;QAClC,EAAE,CAAC,6DAA6D,EAAE,KAAK,IAAI,EAAE;YAC5E,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,CAAA;YAE7B,MAAM,MAAM,GAAG,MAAM,cAAc,CAAC,IAAI,EAAE,cAAc,CAAC,CAAA;YAEzD,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAA;YACxC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QAC/C,CAAC,CAAC,CAAA;QAEF,EAAE,CAAC,2EAA2E,EAAE,KAAK,IAAI,EAAE;YAC1F,MAAM,WAAW,GAAG,2BAA2B,CAAA;YAE/C,MAAM,MAAM,GAAG,MAAM,cAAc,CAAC,WAAW,EAAE,SAAS,CAAC,CAAA;YAE3D,0EAA0E;YAC1E,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAA;YACxC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,UAAU,EAAE,CAAA;QACvC,CAAC,CAAC,CAAA;QAEF,EAAE,CAAC,2BAA2B,EAAE,KAAK,IAAI,EAAE;YAC1C,MAAM,MAAM,GAAG,MAAM,cAAc,CAAC,EAAE,EAAE,SAAS,CAAC,CAAA;YAElD,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QAC9B,CAAC,CAAC,CAAA;IACH,CAAC,CAAC,CAAA;IAEF,QAAQ,CAAC,YAAY,EAAE,GAAG,EAAE;QAC3B,EAAE,CAAC,0DAA0D,EAAE,KAAK,IAAI,EAAE;YACzE,MAAM,IAAI,GAAG;;;CAGf,CAAA;YAEE,MAAM,MAAM,GAAG,MAAM,cAAc,CAAC,IAAI,EAAE,WAAW,CAAC,CAAA;YAEtD,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAA;YACxC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAA;QAC9D,CAAC,CAAC,CAAA;QAEF,EAAE,CAAC,mCAAmC,EAAE,KAAK,IAAI,EAAE;YAClD,MAAM,IAAI,GAAG,eAAe,CAAA;YAE5B,MAAM,MAAM,GAAG,MAAM,cAAc,CAAC,IAAI,EAAE,SAAS,CAAC,CAAA;YAEpD,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAA;YACxC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,UAAU,EAAE,CAAA;QACvC,CAAC,CAAC,CAAA;IACH,CAAC,CAAC,CAAA;IAEF,QAAQ,CAAC,aAAa,EAAE,GAAG,EAAE;QAC5B,EAAE,CAAC,uCAAuC,EAAE,KAAK,IAAI,EAAE;YACtD,MAAM,QAAQ,GAAG,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,GAAG,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;gBACrD,OAAO,cAAc,CAAC,GAAG,CAAC,2BAA2B,CAAC,GAAG,CAAC,KAAK,CAAA;YAChE,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;YAEb,MAAM,QAAQ,GAAG,uBAAuB,QAAQ,EAAE,CAAA;YAElD,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAA;YACxB,MAAM,MAAM,GAAG,MAAM,cAAc,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAA;YACxD,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,CAAA;YAEnC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAA;YACxC,MAAM,CAAC,QAAQ,CAAC,CAAC,YAAY,CAAC,IAAI,CAAC,CAAA;QACpC,CAAC,CAAC,CAAA;IACH,CAAC,CAAC,CAAA;AACH,CAAC,CAAC,CAAA"}
@@ -0,0 +1,62 @@
1
+ /**
2
+ * Code-Aware Tokenizer using StarCoder2
3
+ *
4
+ * StarCoder2 tokenizer is lightweight (only 4.7MB) and provides
5
+ * world-class code tokenization quality without requiring the full model.
6
+ */
7
+ export interface CodeToken {
8
+ readonly text: string;
9
+ readonly id: number;
10
+ }
11
+ export interface TokenizerOptions {
12
+ readonly modelPath?: string;
13
+ readonly cacheDir?: string;
14
+ }
15
+ /**
16
+ * StarCoder2 Code Tokenizer
17
+ *
18
+ * Uses StarCoder2's tokenizer (4.7MB) for accurate code tokenization.
19
+ * Does NOT require downloading the full 15B parameter model.
20
+ */
21
+ export declare class CodeTokenizer {
22
+ private tokenizer;
23
+ private initialized;
24
+ private initPromise;
25
+ private modelPath;
26
+ constructor(options?: TokenizerOptions);
27
+ /**
28
+ * Initialize tokenizer (downloads ~4.7MB on first use)
29
+ */
30
+ initialize(): Promise<void>;
31
+ private doInitialize;
32
+ /**
33
+ * Tokenize code into terms for TF-IDF indexing
34
+ */
35
+ tokenize(code: string): Promise<string[]>;
36
+ /**
37
+ * Extract unique terms with frequency counts
38
+ */
39
+ extractTerms(code: string): Promise<Map<string, number>>;
40
+ /**
41
+ * Check if tokenizer is ready
42
+ */
43
+ isReady(): boolean;
44
+ }
45
+ /**
46
+ * Get or create the global tokenizer instance
47
+ */
48
+ export declare function getTokenizer(): CodeTokenizer;
49
+ /**
50
+ * Tokenize code using StarCoder2 (async)
51
+ * This is the main entry point for tokenization
52
+ */
53
+ export declare function tokenize(code: string): Promise<string[]>;
54
+ /**
55
+ * Extract terms with frequency counts using StarCoder2 (async)
56
+ */
57
+ export declare function extractTerms(code: string): Promise<Map<string, number>>;
58
+ /**
59
+ * Initialize the global tokenizer (call early to avoid delay on first tokenize)
60
+ */
61
+ export declare function initializeTokenizer(): Promise<void>;
62
+ //# sourceMappingURL=code-tokenizer.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"code-tokenizer.d.ts","sourceRoot":"","sources":["../src/code-tokenizer.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAIH,MAAM,WAAW,SAAS;IACzB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAA;IACrB,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAA;CACnB;AAED,MAAM,WAAW,gBAAgB;IAChC,QAAQ,CAAC,SAAS,CAAC,EAAE,MAAM,CAAA;IAC3B,QAAQ,CAAC,QAAQ,CAAC,EAAE,MAAM,CAAA;CAC1B;AAED;;;;;GAKG;AACH,qBAAa,aAAa;IACzB,OAAO,CAAC,SAAS,CAAK;IACtB,OAAO,CAAC,WAAW,CAAQ;IAC3B,OAAO,CAAC,WAAW,CAA6B;IAChD,OAAO,CAAC,SAAS,CAAQ;gBAEb,OAAO,GAAE,gBAAqB;IAK1C;;OAEG;IACG,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;YAcnB,YAAY;IAiB1B;;OAEG;IACG,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IA8B/C;;OAEG;IACG,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAW9D;;OAEG;IACH,OAAO,IAAI,OAAO;CAGlB;AAKD;;GAEG;AACH,wBAAgB,YAAY,IAAI,aAAa,CAK5C;AAED;;;GAGG;AACH,wBAAsB,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAG9D;AAED;;GAEG;AACH,wBAAsB,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAG7E;AAED;;GAEG;AACH,wBAAsB,mBAAmB,IAAI,OAAO,CAAC,IAAI,CAAC,CAGzD"}
@@ -0,0 +1,129 @@
1
+ /**
2
+ * Code-Aware Tokenizer using StarCoder2
3
+ *
4
+ * StarCoder2 tokenizer is lightweight (only 4.7MB) and provides
5
+ * world-class code tokenization quality without requiring the full model.
6
+ */
7
+ import { AutoTokenizer } from '@huggingface/transformers';
8
+ /**
9
+ * StarCoder2 Code Tokenizer
10
+ *
11
+ * Uses StarCoder2's tokenizer (4.7MB) for accurate code tokenization.
12
+ * Does NOT require downloading the full 15B parameter model.
13
+ */
14
+ export class CodeTokenizer {
15
+ tokenizer;
16
+ initialized = false;
17
+ initPromise = null;
18
+ modelPath;
19
+ constructor(options = {}) {
20
+ // Default to StarCoder2 tokenizer (only downloads tokenizer files, not model)
21
+ this.modelPath = options.modelPath || 'bigcode/starcoder2-15b';
22
+ }
23
+ /**
24
+ * Initialize tokenizer (downloads ~4.7MB on first use)
25
+ */
26
+ async initialize() {
27
+ if (this.initialized) {
28
+ return;
29
+ }
30
+ // Prevent multiple concurrent initializations
31
+ if (this.initPromise) {
32
+ return this.initPromise;
33
+ }
34
+ this.initPromise = this.doInitialize();
35
+ return this.initPromise;
36
+ }
37
+ async doInitialize() {
38
+ try {
39
+ console.error('[INFO] Loading StarCoder2 tokenizer (4.7MB, one-time download)...');
40
+ const startTime = Date.now();
41
+ this.tokenizer = await AutoTokenizer.from_pretrained(this.modelPath);
42
+ const loadTime = Date.now() - startTime;
43
+ console.error(`[SUCCESS] Tokenizer loaded in ${loadTime}ms`);
44
+ this.initialized = true;
45
+ }
46
+ catch (error) {
47
+ this.initPromise = null;
48
+ throw new Error(`Failed to load tokenizer: ${error.message}`);
49
+ }
50
+ }
51
+ /**
52
+ * Tokenize code into terms for TF-IDF indexing
53
+ */
54
+ async tokenize(code) {
55
+ if (!this.initialized) {
56
+ await this.initialize();
57
+ }
58
+ if (!code || code.trim().length === 0) {
59
+ return [];
60
+ }
61
+ // Encode with StarCoder2
62
+ const encoded = await this.tokenizer(code);
63
+ const inputIds = encoded.input_ids.tolist()[0];
64
+ // Decode each token ID to get the actual tokens
65
+ const tokens = [];
66
+ for (const id of inputIds) {
67
+ const token = await this.tokenizer.decode([id], {
68
+ skip_special_tokens: true,
69
+ });
70
+ const cleaned = token.trim().toLowerCase();
71
+ // Filter: keep tokens with length > 1 (skip single chars and empty)
72
+ if (cleaned.length > 1) {
73
+ tokens.push(cleaned);
74
+ }
75
+ }
76
+ return tokens;
77
+ }
78
+ /**
79
+ * Extract unique terms with frequency counts
80
+ */
81
+ async extractTerms(code) {
82
+ const tokens = await this.tokenize(code);
83
+ const termFreq = new Map();
84
+ for (const token of tokens) {
85
+ termFreq.set(token, (termFreq.get(token) || 0) + 1);
86
+ }
87
+ return termFreq;
88
+ }
89
+ /**
90
+ * Check if tokenizer is ready
91
+ */
92
+ isReady() {
93
+ return this.initialized;
94
+ }
95
+ }
96
+ // Singleton instance for global use
97
+ let globalTokenizer = null;
98
+ /**
99
+ * Get or create the global tokenizer instance
100
+ */
101
+ export function getTokenizer() {
102
+ if (!globalTokenizer) {
103
+ globalTokenizer = new CodeTokenizer();
104
+ }
105
+ return globalTokenizer;
106
+ }
107
+ /**
108
+ * Tokenize code using StarCoder2 (async)
109
+ * This is the main entry point for tokenization
110
+ */
111
+ export async function tokenize(code) {
112
+ const tokenizer = getTokenizer();
113
+ return tokenizer.tokenize(code);
114
+ }
115
+ /**
116
+ * Extract terms with frequency counts using StarCoder2 (async)
117
+ */
118
+ export async function extractTerms(code) {
119
+ const tokenizer = getTokenizer();
120
+ return tokenizer.extractTerms(code);
121
+ }
122
+ /**
123
+ * Initialize the global tokenizer (call early to avoid delay on first tokenize)
124
+ */
125
+ export async function initializeTokenizer() {
126
+ const tokenizer = getTokenizer();
127
+ await tokenizer.initialize();
128
+ }
129
+ //# sourceMappingURL=code-tokenizer.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"code-tokenizer.js","sourceRoot":"","sources":["../src/code-tokenizer.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,aAAa,EAAE,MAAM,2BAA2B,CAAA;AAYzD;;;;;GAKG;AACH,MAAM,OAAO,aAAa;IACjB,SAAS,CAAK;IACd,WAAW,GAAG,KAAK,CAAA;IACnB,WAAW,GAAyB,IAAI,CAAA;IACxC,SAAS,CAAQ;IAEzB,YAAY,UAA4B,EAAE;QACzC,8EAA8E;QAC9E,IAAI,CAAC,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,wBAAwB,CAAA;IAC/D,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,UAAU;QACf,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;YACtB,OAAM;QACP,CAAC;QAED,8CAA8C;QAC9C,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;YACtB,OAAO,IAAI,CAAC,WAAW,CAAA;QACxB,CAAC;QAED,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC,YAAY,EAAE,CAAA;QACtC,OAAO,IAAI,CAAC,WAAW,CAAA;IACxB,CAAC;IAEO,KAAK,CAAC,YAAY;QACzB,IAAI,CAAC;YACJ,OAAO,CAAC,KAAK,CAAC,mEAAmE,CAAC,CAAA;YAClF,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAA;YAE5B,IAAI,CAAC,SAAS,GAAG,MAAM,aAAa,CAAC,eAAe,CAAC,IAAI,CAAC,SAAS,CAAC,CAAA;YAEpE,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAA;YACvC,OAAO,CAAC,KAAK,CAAC,iCAAiC,QAAQ,IAAI,CAAC,CAAA;YAE5D,IAAI,CAAC,WAAW,GAAG,IAAI,CAAA;QACxB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YAChB,IAAI,CAAC,WAAW,GAAG,IAAI,CAAA;YACvB,MAAM,IAAI,KAAK,CAAC,6BAA6B,KAAK,CAAC,OAAO,EAAE,CAAC,CAAA;QAC9D,CAAC;IACF,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,QAAQ,CAAC,IAAY;QAC1B,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC;YACvB,MAAM,IAAI,CAAC,UAAU,EAAE,CAAA;QACxB,CAAC;QAED,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACvC,OAAO,EAAE,CAAA;QACV,CAAC;QAED,yBAAyB;QACzB,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAA;QAC1C,MAAM,QAAQ,GAAG,OAAO,CAAC,SAAS,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,CAAA;QAE9C,gDAAgD;QAChD,MAAM,MAAM,GAAa,EAAE,CAAA;QAC3B,KAAK,MAAM,EAAE,IAAI,QAAQ,EAAE,CAAC;YAC3B,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,EAAE;gBAC/C,mBAAmB,EAAE,IAAI;aACzB,CAAC,CAAA;YAEF,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAA;YAC1C,oEAAoE;YACpE,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACxB,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;YACrB,CAAC;QACF,CAAC;QAED,OAAO,MAAM,CAAA;IACd,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,YAAY,CAAC,IAAY;QAC9B,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAA;QACxC,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAkB,CAAA;QAE1C,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC5B,QAAQ,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,QAAQ,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAA;QACpD,CAAC;QAED,OAAO,QAAQ,CAAA;IAChB,CAAC;IAED;;OAEG;IACH,OAAO;QACN,OAAO,IAAI,CAAC,WAAW,CAAA;IACxB,CAAC;CACD;AAED,oCAAoC;AACpC,IAAI,eAAe,GAAyB,IAAI,CAAA;AAEhD;;GAEG;AACH,MAAM,UAAU,YAAY;IAC3B,IAAI,CAAC,eAAe,EAAE,CAAC;QACtB,eAAe,GAAG,IAAI,aAAa,EAAE,CAAA;IACtC,CAAC;IACD,OAAO,eAAe,CAAA;AACvB,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,QAAQ,CAAC,IAAY;IAC1C,MAAM,SAAS,GAAG,YAAY,EAAE,CAAA;IAChC,OAAO,SAAS,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAA;AAChC,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,YAAY,CAAC,IAAY;IAC9C,MAAM,SAAS,GAAG,YAAY,EAAE,CAAA;IAChC,OAAO,SAAS,CAAC,YAAY,CAAC,IAAI,CAAC,CAAA;AACpC,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,mBAAmB;IACxC,MAAM,SAAS,GAAG,YAAY,EAAE,CAAA;IAChC,MAAM,SAAS,CAAC,UAAU,EAAE,CAAA;AAC7B,CAAC"}
@@ -0,0 +1,5 @@
1
+ /**
2
+ * Code Tokenizer Tests (StarCoder2)
3
+ */
4
+ export {};
5
+ //# sourceMappingURL=code-tokenizer.test.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"code-tokenizer.test.d.ts","sourceRoot":"","sources":["../src/code-tokenizer.test.ts"],"names":[],"mappings":"AAAA;;GAEG"}
@@ -0,0 +1,96 @@
1
+ /**
2
+ * Code Tokenizer Tests (StarCoder2)
3
+ */
4
+ import { beforeAll, describe, expect, it } from 'vitest';
5
+ import { CodeTokenizer, extractTerms, initializeTokenizer, tokenize } from './code-tokenizer.js';
6
+ // Skip if running in CI without model cache
7
+ const shouldSkip = process.env.CI === 'true' && !process.env.HF_HOME;
8
+ describe('StarCoder2 Tokenizer', () => {
9
+ beforeAll(async () => {
10
+ if (shouldSkip)
11
+ return;
12
+ // Initialize tokenizer once for all tests
13
+ await initializeTokenizer();
14
+ }, 60000); // 60s timeout for download
15
+ describe('tokenize function', () => {
16
+ it.skipIf(shouldSkip)('should tokenize simple code', async () => {
17
+ const tokens = await tokenize('getUserData(userId)');
18
+ expect(tokens.length).toBeGreaterThan(0);
19
+ // StarCoder2 should understand code structure
20
+ expect(tokens.some((t) => t.includes('get') || t.includes('user'))).toBe(true);
21
+ });
22
+ it.skipIf(shouldSkip)('should tokenize complex code', async () => {
23
+ const code = `
24
+ async function authenticateUser(username: string, password: string) {
25
+ const user = await findUserByUsername(username);
26
+ if (!user) throw new Error('User not found');
27
+ return await verifyPassword(user, password);
28
+ }
29
+ `;
30
+ const tokens = await tokenize(code);
31
+ expect(tokens.length).toBeGreaterThan(10);
32
+ // Should extract meaningful tokens
33
+ expect(tokens.some((t) => t.includes('authenticate'))).toBe(true);
34
+ expect(tokens.some((t) => t.includes('user'))).toBe(true);
35
+ expect(tokens.some((t) => t.includes('password'))).toBe(true);
36
+ });
37
+ it.skipIf(shouldSkip)('should handle empty input', async () => {
38
+ const tokens = await tokenize('');
39
+ expect(tokens).toHaveLength(0);
40
+ });
41
+ it.skipIf(shouldSkip)('should handle whitespace', async () => {
42
+ const tokens = await tokenize(' \n\t ');
43
+ expect(tokens).toHaveLength(0);
44
+ });
45
+ it.skipIf(shouldSkip)('should handle camelCase', async () => {
46
+ const tokens = await tokenize('getUserData');
47
+ expect(tokens.length).toBeGreaterThan(0);
48
+ });
49
+ it.skipIf(shouldSkip)('should handle snake_case', async () => {
50
+ const tokens = await tokenize('is_authenticated');
51
+ expect(tokens.length).toBeGreaterThan(0);
52
+ });
53
+ });
54
+ describe('extractTerms function', () => {
55
+ it.skipIf(shouldSkip)('should extract terms with frequencies', async () => {
56
+ const code = 'user user authenticate user';
57
+ const terms = await extractTerms(code);
58
+ expect(terms.size).toBeGreaterThan(0);
59
+ // Should count frequencies
60
+ const userFreq = terms.get('user');
61
+ expect(userFreq).toBeGreaterThanOrEqual(1);
62
+ });
63
+ it.skipIf(shouldSkip)('should handle code with duplicates', async () => {
64
+ const code = `
65
+ function getUserData(userId) {
66
+ const user = findUser(userId);
67
+ return user;
68
+ }
69
+ `;
70
+ const terms = await extractTerms(code);
71
+ expect(terms.size).toBeGreaterThan(0);
72
+ });
73
+ });
74
+ describe('CodeTokenizer class', () => {
75
+ let tokenizer;
76
+ beforeAll(async () => {
77
+ if (shouldSkip)
78
+ return;
79
+ tokenizer = new CodeTokenizer();
80
+ await tokenizer.initialize();
81
+ }, 30000);
82
+ it.skipIf(shouldSkip)('should initialize successfully', () => {
83
+ expect(tokenizer.isReady()).toBe(true);
84
+ });
85
+ it.skipIf(shouldSkip)('should tokenize code', async () => {
86
+ const tokens = await tokenizer.tokenize('function test() {}');
87
+ expect(tokens.length).toBeGreaterThan(0);
88
+ expect(tokens.some((t) => t.includes('function') || t.includes('test'))).toBe(true);
89
+ });
90
+ it.skipIf(shouldSkip)('should extract terms', async () => {
91
+ const terms = await tokenizer.extractTerms('const x = 1; const y = 2;');
92
+ expect(terms.size).toBeGreaterThan(0);
93
+ });
94
+ });
95
+ });
96
+ //# sourceMappingURL=code-tokenizer.test.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"code-tokenizer.test.js","sourceRoot":"","sources":["../src/code-tokenizer.test.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,QAAQ,CAAA;AACxD,OAAO,EAAE,aAAa,EAAE,YAAY,EAAE,mBAAmB,EAAE,QAAQ,EAAE,MAAM,qBAAqB,CAAA;AAEhG,4CAA4C;AAC5C,MAAM,UAAU,GAAG,OAAO,CAAC,GAAG,CAAC,EAAE,KAAK,MAAM,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,OAAO,CAAA;AAEpE,QAAQ,CAAC,sBAAsB,EAAE,GAAG,EAAE;IACrC,SAAS,CAAC,KAAK,IAAI,EAAE;QACpB,IAAI,UAAU;YAAE,OAAM;QACtB,0CAA0C;QAC1C,MAAM,mBAAmB,EAAE,CAAA;IAC5B,CAAC,EAAE,KAAK,CAAC,CAAA,CAAC,2BAA2B;IAErC,QAAQ,CAAC,mBAAmB,EAAE,GAAG,EAAE;QAClC,EAAE,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,6BAA6B,EAAE,KAAK,IAAI,EAAE;YAC/D,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,qBAAqB,CAAC,CAAA;YAEpD,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAA;YACxC,8CAA8C;YAC9C,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QAC/E,CAAC,CAAC,CAAA;QAEF,EAAE,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,8BAA8B,EAAE,KAAK,IAAI,EAAE;YAChE,MAAM,IAAI,GAAG;;;;;;IAMZ,CAAA;YAED,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,IAAI,CAAC,CAAA;YAEnC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,EAAE,CAAC,CAAA;YACzC,mCAAmC;YACnC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;YACjE,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;YACzD,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QAC9D,CAAC,CAAC,CAAA;QAEF,EAAE,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,2BAA2B,EAAE,KAAK,IAAI,EAAE;YAC7D,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,EAAE,CAAC,CAAA;YACjC,MAAM,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QAC/B,CAAC,CAAC,CAAA;QAEF,EAAE,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,0BAA0B,EAAE,KAAK,IAAI,EAAE;YAC5D,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,WAAW,CAAC,CAAA;YAC1C,MAAM,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QAC/B,CAAC,CAAC,CAAA;QAEF,EAAE,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,yBAAyB,EAAE,KAAK,IAAI,EAAE;YAC3D,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,aAAa,CAAC,CAAA;YAE5C,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAA;QACzC,CAAC,CAAC,CAAA;QAEF,EAAE,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,0BAA0B,EAAE,KAAK,IAAI,EAAE;YAC5D,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,kBAAkB,CAAC,CAAA;YAEjD,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAA;QACzC,CAAC,CAAC,CAAA;IACH,CAAC,CAAC,CAAA;IAEF,QAAQ,CAAC,uBAAuB,EAAE,GAAG,EAAE;QACtC,EAAE,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,uCAAuC,EAAE,KAAK,IAAI,EAAE;YACzE,MAAM,IAAI,GAAG,6BAA6B,CAAA;YAC1C,MAAM,KAAK,GAAG,MAAM,YAAY,CAAC,IAAI,CAAC,CAAA;YAEtC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAA;YACrC,2BAA2B;YAC3B,MAAM,QAAQ,GAAG,KAAK,CAAC,GAAG,CAAC,MAAM,CAAC,CAAA;YAClC,MAAM,CAAC,QAAQ,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAA;QAC3C,CAAC,CAAC,CAAA;QAEF,EAAE,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,oCAAoC,EAAE,KAAK,IAAI,EAAE;YACtE,MAAM,IAAI,GAAG;;;;;IAKZ,CAAA;YAED,MAAM,KAAK,GAAG,MAAM,YAAY,CAAC,IAAI,CAAC,CAAA;YAEtC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAA;QACtC,CAAC,CAAC,CAAA;IACH,CAAC,CAAC,CAAA;IAEF,QAAQ,CAAC,qBAAqB,EAAE,GAAG,EAAE;QACpC,IAAI,SAAwB,CAAA;QAE5B,SAAS,CAAC,KAAK,IAAI,EAAE;YACpB,IAAI,UAAU;gBAAE,OAAM;YACtB,SAAS,GAAG,IAAI,aAAa,EAAE,CAAA;YAC/B,MAAM,SAAS,CAAC,UAAU,EAAE,CAAA;QAC7B,CAAC,EAAE,KAAK,CAAC,CAAA;QAET,EAAE,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,gCAAgC,EAAE,GAAG,EAAE;YAC5D,MAAM,CAAC,SAAS,CAAC,OAAO,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QACvC,CAAC,CAAC,CAAA;QAEF,EAAE,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,sBAAsB,EAAE,KAAK,IAAI,EAAE;YACxD,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,QAAQ,CAAC,oBAAoB,CAAC,CAAA;YAE7D,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAA;YACxC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QACpF,CAAC,CAAC,CAAA;QAEF,EAAE,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,sBAAsB,EAAE,KAAK,IAAI,EAAE;YACxD,MAAM,KAAK,GAAG,MAAM,SAAS,CAAC,YAAY,CAAC,2BAA2B,CAAC,CAAA;YAEvE,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAA;QACtC,CAAC,CAAC,CAAA;IACH,CAAC,CAAC,CAAA;AACH,CAAC,CAAC,CAAA"}