@liendev/parser 0.39.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. package/dist/ast/chunker.d.ts +30 -0
  2. package/dist/ast/chunker.d.ts.map +1 -0
  3. package/dist/ast/chunker.js +310 -0
  4. package/dist/ast/chunker.js.map +1 -0
  5. package/dist/ast/complexity/cognitive.d.ts +16 -0
  6. package/dist/ast/complexity/cognitive.d.ts.map +1 -0
  7. package/dist/ast/complexity/cognitive.js +137 -0
  8. package/dist/ast/complexity/cognitive.js.map +1 -0
  9. package/dist/ast/complexity/cyclomatic.d.ts +12 -0
  10. package/dist/ast/complexity/cyclomatic.d.ts.map +1 -0
  11. package/dist/ast/complexity/cyclomatic.js +54 -0
  12. package/dist/ast/complexity/cyclomatic.js.map +1 -0
  13. package/dist/ast/complexity/halstead.d.ts +56 -0
  14. package/dist/ast/complexity/halstead.d.ts.map +1 -0
  15. package/dist/ast/complexity/halstead.js +196 -0
  16. package/dist/ast/complexity/halstead.js.map +1 -0
  17. package/dist/ast/complexity/index.d.ts +13 -0
  18. package/dist/ast/complexity/index.d.ts.map +1 -0
  19. package/dist/ast/complexity/index.js +12 -0
  20. package/dist/ast/complexity/index.js.map +1 -0
  21. package/dist/ast/extractors/index.d.ts +35 -0
  22. package/dist/ast/extractors/index.d.ts.map +1 -0
  23. package/dist/ast/extractors/index.js +41 -0
  24. package/dist/ast/extractors/index.js.map +1 -0
  25. package/dist/ast/extractors/symbol-helpers.d.ts +20 -0
  26. package/dist/ast/extractors/symbol-helpers.d.ts.map +1 -0
  27. package/dist/ast/extractors/symbol-helpers.js +58 -0
  28. package/dist/ast/extractors/symbol-helpers.js.map +1 -0
  29. package/dist/ast/extractors/types.d.ts +108 -0
  30. package/dist/ast/extractors/types.d.ts.map +1 -0
  31. package/dist/ast/extractors/types.js +2 -0
  32. package/dist/ast/extractors/types.js.map +1 -0
  33. package/dist/ast/languages/javascript.d.ts +134 -0
  34. package/dist/ast/languages/javascript.d.ts.map +1 -0
  35. package/dist/ast/languages/javascript.js +787 -0
  36. package/dist/ast/languages/javascript.js.map +1 -0
  37. package/dist/ast/languages/php.d.ts +84 -0
  38. package/dist/ast/languages/php.d.ts.map +1 -0
  39. package/dist/ast/languages/php.js +452 -0
  40. package/dist/ast/languages/php.js.map +1 -0
  41. package/dist/ast/languages/python.d.ts +96 -0
  42. package/dist/ast/languages/python.d.ts.map +1 -0
  43. package/dist/ast/languages/python.js +448 -0
  44. package/dist/ast/languages/python.js.map +1 -0
  45. package/dist/ast/languages/registry.d.ts +30 -0
  46. package/dist/ast/languages/registry.d.ts.map +1 -0
  47. package/dist/ast/languages/registry.js +95 -0
  48. package/dist/ast/languages/registry.js.map +1 -0
  49. package/dist/ast/languages/rust.d.ts +113 -0
  50. package/dist/ast/languages/rust.d.ts.map +1 -0
  51. package/dist/ast/languages/rust.js +614 -0
  52. package/dist/ast/languages/rust.js.map +1 -0
  53. package/dist/ast/languages/types.d.ts +52 -0
  54. package/dist/ast/languages/types.d.ts.map +1 -0
  55. package/dist/ast/languages/types.js +2 -0
  56. package/dist/ast/languages/types.js.map +1 -0
  57. package/dist/ast/languages/typescript.d.ts +3 -0
  58. package/dist/ast/languages/typescript.d.ts.map +1 -0
  59. package/dist/ast/languages/typescript.js +134 -0
  60. package/dist/ast/languages/typescript.js.map +1 -0
  61. package/dist/ast/parser.d.ts +29 -0
  62. package/dist/ast/parser.d.ts.map +1 -0
  63. package/dist/ast/parser.js +67 -0
  64. package/dist/ast/parser.js.map +1 -0
  65. package/dist/ast/symbols.d.ts +74 -0
  66. package/dist/ast/symbols.d.ts.map +1 -0
  67. package/dist/ast/symbols.js +171 -0
  68. package/dist/ast/symbols.js.map +1 -0
  69. package/dist/ast/traversers/index.d.ts +19 -0
  70. package/dist/ast/traversers/index.d.ts.map +1 -0
  71. package/dist/ast/traversers/index.js +21 -0
  72. package/dist/ast/traversers/index.js.map +1 -0
  73. package/dist/ast/traversers/types.d.ts +98 -0
  74. package/dist/ast/traversers/types.d.ts.map +1 -0
  75. package/dist/ast/traversers/types.js +2 -0
  76. package/dist/ast/traversers/types.js.map +1 -0
  77. package/dist/ast/types.d.ts +54 -0
  78. package/dist/ast/types.d.ts.map +1 -0
  79. package/dist/ast/types.js +2 -0
  80. package/dist/ast/types.js.map +1 -0
  81. package/dist/chunk-only-index.d.ts +25 -0
  82. package/dist/chunk-only-index.d.ts.map +1 -0
  83. package/dist/chunk-only-index.js +107 -0
  84. package/dist/chunk-only-index.js.map +1 -0
  85. package/dist/chunker.d.ts +12 -0
  86. package/dist/chunker.d.ts.map +1 -0
  87. package/dist/chunker.js +98 -0
  88. package/dist/chunker.js.map +1 -0
  89. package/dist/constants.d.ts +8 -0
  90. package/dist/constants.d.ts.map +1 -0
  91. package/dist/constants.js +11 -0
  92. package/dist/constants.js.map +1 -0
  93. package/dist/content-hash.d.ts +20 -0
  94. package/dist/content-hash.d.ts.map +1 -0
  95. package/dist/content-hash.js +91 -0
  96. package/dist/content-hash.js.map +1 -0
  97. package/dist/dependency-analyzer.d.ts +79 -0
  98. package/dist/dependency-analyzer.d.ts.map +1 -0
  99. package/dist/dependency-analyzer.js +408 -0
  100. package/dist/dependency-analyzer.js.map +1 -0
  101. package/dist/ecosystem-presets.d.ts +32 -0
  102. package/dist/ecosystem-presets.d.ts.map +1 -0
  103. package/dist/ecosystem-presets.js +325 -0
  104. package/dist/ecosystem-presets.js.map +1 -0
  105. package/dist/gitignore.d.ts +22 -0
  106. package/dist/gitignore.d.ts.map +1 -0
  107. package/dist/gitignore.js +128 -0
  108. package/dist/gitignore.js.map +1 -0
  109. package/dist/index.d.ts +32 -0
  110. package/dist/index.d.ts.map +1 -0
  111. package/dist/index.js +68 -0
  112. package/dist/index.js.map +1 -0
  113. package/dist/insights/chunk-complexity.d.ts +89 -0
  114. package/dist/insights/chunk-complexity.d.ts.map +1 -0
  115. package/dist/insights/chunk-complexity.js +332 -0
  116. package/dist/insights/chunk-complexity.js.map +1 -0
  117. package/dist/insights/types.d.ts +73 -0
  118. package/dist/insights/types.d.ts.map +1 -0
  119. package/dist/insights/types.js +9 -0
  120. package/dist/insights/types.js.map +1 -0
  121. package/dist/json-template-chunker.d.ts +12 -0
  122. package/dist/json-template-chunker.d.ts.map +1 -0
  123. package/dist/json-template-chunker.js +87 -0
  124. package/dist/json-template-chunker.js.map +1 -0
  125. package/dist/liquid-chunker.d.ts +16 -0
  126. package/dist/liquid-chunker.d.ts.map +1 -0
  127. package/dist/liquid-chunker.js +274 -0
  128. package/dist/liquid-chunker.js.map +1 -0
  129. package/dist/scanner.d.ts +16 -0
  130. package/dist/scanner.d.ts.map +1 -0
  131. package/dist/scanner.js +95 -0
  132. package/dist/scanner.js.map +1 -0
  133. package/dist/symbol-extractor.d.ts +18 -0
  134. package/dist/symbol-extractor.d.ts.map +1 -0
  135. package/dist/symbol-extractor.js +343 -0
  136. package/dist/symbol-extractor.js.map +1 -0
  137. package/dist/test-associations.d.ts +16 -0
  138. package/dist/test-associations.d.ts.map +1 -0
  139. package/dist/test-associations.js +43 -0
  140. package/dist/test-associations.js.map +1 -0
  141. package/dist/types.d.ts +75 -0
  142. package/dist/types.d.ts.map +1 -0
  143. package/dist/types.js +2 -0
  144. package/dist/types.js.map +1 -0
  145. package/dist/utils/path-matching.d.ts +71 -0
  146. package/dist/utils/path-matching.d.ts.map +1 -0
  147. package/dist/utils/path-matching.js +258 -0
  148. package/dist/utils/path-matching.js.map +1 -0
  149. package/dist/utils/repo-id.d.ts +6 -0
  150. package/dist/utils/repo-id.d.ts.map +1 -0
  151. package/dist/utils/repo-id.js +12 -0
  152. package/dist/utils/repo-id.js.map +1 -0
  153. package/package.json +66 -0
@@ -0,0 +1,98 @@
1
+ import type Parser from 'tree-sitter';
2
+ /**
3
+ * Language-specific node traversal configuration
4
+ *
5
+ * Each language has different AST node types and structures. This interface
6
+ * allows us to implement language-specific traversal strategies while keeping
7
+ * the core chunking logic language-agnostic.
8
+ *
9
+ * @example TypeScript/JavaScript
10
+ * ```typescript
11
+ * targetNodeTypes: ['function_declaration', 'method_definition', 'interface_declaration']
12
+ * containerTypes: ['class_declaration']
13
+ * ```
14
+ *
15
+ * @example Python
16
+ * ```typescript
17
+ * targetNodeTypes: ['function_definition', 'async_function_definition']
18
+ * containerTypes: ['class_definition']
19
+ * ```
20
+ */
21
+ export interface LanguageTraverser {
22
+ /**
23
+ * AST node types that should be extracted as chunks
24
+ * (e.g., 'function_declaration', 'method_definition' for TypeScript)
25
+ */
26
+ targetNodeTypes: string[];
27
+ /**
28
+ * AST node types for containers whose children should be extracted
29
+ * (e.g., 'class_declaration' for TypeScript - we extract methods, not the class itself)
30
+ */
31
+ containerTypes: string[];
32
+ /**
33
+ * AST node types that represent variable declarations that might contain functions
34
+ * (e.g., 'lexical_declaration' for TypeScript const/let with arrow functions)
35
+ */
36
+ declarationTypes: string[];
37
+ /**
38
+ * AST node types that represent function implementations
39
+ * (used to detect functions inside variable declarations)
40
+ */
41
+ functionTypes: string[];
42
+ /**
43
+ * Check if a node should have its children extracted instead of being chunked itself
44
+ *
45
+ * @param node - AST node to check
46
+ * @returns True if we should extract children (e.g., class methods), false otherwise
47
+ */
48
+ shouldExtractChildren(node: Parser.SyntaxNode): boolean;
49
+ /**
50
+ * Check if a node is a declaration that might contain a function
51
+ *
52
+ * @param node - AST node to check
53
+ * @returns True if this is a variable declaration that might contain a function
54
+ */
55
+ isDeclarationWithFunction(node: Parser.SyntaxNode): boolean;
56
+ /**
57
+ * Extract the container body node (e.g., class body) for child traversal
58
+ *
59
+ * @param node - Container node (e.g., class_declaration)
60
+ * @returns The body node containing children, or null if not found
61
+ */
62
+ getContainerBody(node: Parser.SyntaxNode): Parser.SyntaxNode | null;
63
+ /**
64
+ * Check if traversal should continue into this node's children
65
+ *
66
+ * @param node - AST node to check
67
+ * @returns True if we should traverse children (e.g., for 'program', 'export_statement')
68
+ */
69
+ shouldTraverseChildren(node: Parser.SyntaxNode): boolean;
70
+ /**
71
+ * Find the parent container name for a node (e.g., class name for a method)
72
+ *
73
+ * @param node - AST node (e.g., method)
74
+ * @returns Container name (e.g., class name), or undefined if not in a container
75
+ */
76
+ findParentContainerName(node: Parser.SyntaxNode): string | undefined;
77
+ /**
78
+ * Find a function inside a declaration node (e.g., arrow function in const declaration)
79
+ *
80
+ * @param node - Declaration node to search
81
+ * @returns Information about whether a function was found and the function node itself
82
+ */
83
+ findFunctionInDeclaration(node: Parser.SyntaxNode): DeclarationFunctionInfo;
84
+ }
85
+ /**
86
+ * Result of finding a function inside a declaration node
87
+ */
88
+ export interface DeclarationFunctionInfo {
89
+ /**
90
+ * Whether a function was found inside the declaration
91
+ */
92
+ hasFunction: boolean;
93
+ /**
94
+ * The actual function node if found
95
+ */
96
+ functionNode: Parser.SyntaxNode | null;
97
+ }
98
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/ast/traversers/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,MAAM,aAAa,CAAC;AAEtC;;;;;;;;;;;;;;;;;;GAkBG;AACH,MAAM,WAAW,iBAAiB;IAChC;;;OAGG;IACH,eAAe,EAAE,MAAM,EAAE,CAAC;IAE1B;;;OAGG;IACH,cAAc,EAAE,MAAM,EAAE,CAAC;IAEzB;;;OAGG;IACH,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAE3B;;;OAGG;IACH,aAAa,EAAE,MAAM,EAAE,CAAC;IAExB;;;;;OAKG;IACH,qBAAqB,CAAC,IAAI,EAAE,MAAM,CAAC,UAAU,GAAG,OAAO,CAAC;IAExD;;;;;OAKG;IACH,yBAAyB,CAAC,IAAI,EAAE,MAAM,CAAC,UAAU,GAAG,OAAO,CAAC;IAE5D;;;;;OAKG;IACH,gBAAgB,CAAC,IAAI,EAAE,MAAM,CAAC,UAAU,GAAG,MAAM,CAAC,UAAU,GAAG,IAAI,CAAC;IAEpE;;;;;OAKG;IACH,sBAAsB,CAAC,IAAI,EAAE,MAAM,CAAC,UAAU,GAAG,OAAO,CAAC;IAEzD;;;;;OAKG;IACH,uBAAuB,CAAC,IAAI,EAAE,MAAM,CAAC,UAAU,GAAG,MAAM,GAAG,SAAS,CAAC;IAErE;;;;;OAKG;IACH,yBAAyB,CAAC,IAAI,EAAE,MAAM,CAAC,UAAU,GAAG,uBAAuB,CAAC;CAC7E;AAED;;GAEG;AACH,MAAM,WAAW,uBAAuB;IACtC;;OAEG;IACH,WAAW,EAAE,OAAO,CAAC;IAErB;;OAEG;IACH,YAAY,EAAE,MAAM,CAAC,UAAU,GAAG,IAAI,CAAC;CACxC"}
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../../../src/ast/traversers/types.ts"],"names":[],"mappings":""}
@@ -0,0 +1,54 @@
1
+ import type Parser from 'tree-sitter';
2
+ import type { CodeChunk } from '../types.js';
3
+ /**
4
+ * AST parse result containing the tree and any errors
5
+ */
6
+ export interface ASTParseResult {
7
+ tree: Parser.Tree | null;
8
+ error?: string;
9
+ }
10
+ /**
11
+ * Symbol information extracted from AST nodes
12
+ */
13
+ export interface SymbolInfo {
14
+ name: string;
15
+ type: 'function' | 'method' | 'class' | 'interface';
16
+ startLine: number;
17
+ endLine: number;
18
+ parentClass?: string;
19
+ signature?: string;
20
+ parameters?: string[];
21
+ returnType?: string;
22
+ complexity?: number;
23
+ cognitiveComplexity?: number;
24
+ }
25
+ /**
26
+ * Semantic metadata for AST-aware chunks
27
+ */
28
+ export interface SemanticMetadata {
29
+ symbolName?: string;
30
+ symbolType?: 'function' | 'method' | 'class' | 'interface';
31
+ parentClass?: string;
32
+ complexity?: number;
33
+ cognitiveComplexity?: number;
34
+ parameters?: string[];
35
+ signature?: string;
36
+ returnType?: string;
37
+ imports?: string[];
38
+ halsteadVolume?: number;
39
+ halsteadDifficulty?: number;
40
+ halsteadEffort?: number;
41
+ halsteadBugs?: number;
42
+ }
43
+ /**
44
+ * AST-aware chunk with enhanced semantic metadata
45
+ */
46
+ export interface ASTChunk extends CodeChunk {
47
+ metadata: CodeChunk['metadata'] & SemanticMetadata;
48
+ }
49
+ /**
50
+ * Supported languages for AST parsing.
51
+ * Canonical definition lives in languages/registry.ts; re-exported here for convenience.
52
+ */
53
+ export type { SupportedLanguage } from './languages/registry.js';
54
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/ast/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,MAAM,aAAa,CAAC;AACtC,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAE7C;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,MAAM,CAAC,IAAI,GAAG,IAAI,CAAC;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IACzB,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,UAAU,GAAG,QAAQ,GAAG,OAAO,GAAG,WAAW,CAAC;IACpD,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;IACtB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,mBAAmB,CAAC,EAAE,MAAM,CAAC;CAC9B;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,UAAU,CAAC,EAAE,UAAU,GAAG,QAAQ,GAAG,OAAO,GAAG,WAAW,CAAC;IAC3D,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAC7B,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;IACtB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IAGnB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AAED;;GAEG;AACH,MAAM,WAAW,QAAS,SAAQ,SAAS;IACzC,QAAQ,EAAE,SAAS,CAAC,UAAU,CAAC,GAAG,gBAAgB,CAAC;CACpD;AAED;;;GAGG;AACH,YAAY,EAAE,iBAAiB,EAAE,MAAM,yBAAyB,CAAC"}
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../../src/ast/types.ts"],"names":[],"mappings":""}
@@ -0,0 +1,25 @@
1
+ import type { CodeChunk } from './types.js';
2
+ export interface ChunkOnlyOptions {
3
+ /** Explicit list of files to index (skips full repo scan when provided) */
4
+ filesToIndex?: string[];
5
+ /** Concurrency for file processing */
6
+ concurrency?: number;
7
+ /** Chunk size in lines */
8
+ chunkSize?: number;
9
+ /** Chunk overlap in lines */
10
+ chunkOverlap?: number;
11
+ }
12
+ export interface ChunkOnlyResult {
13
+ success: boolean;
14
+ filesIndexed: number;
15
+ chunksCreated: number;
16
+ durationMs: number;
17
+ chunks: CodeChunk[];
18
+ error?: string;
19
+ }
20
+ /**
21
+ * Perform chunk-only indexing (no embeddings or VectorDB).
22
+ * Returns raw chunks in-memory for direct analysis.
23
+ */
24
+ export declare function performChunkOnlyIndex(rootDir: string, options?: ChunkOnlyOptions): Promise<ChunkOnlyResult>;
25
+ //# sourceMappingURL=chunk-only-index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"chunk-only-index.d.ts","sourceRoot":"","sources":["../src/chunk-only-index.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;AAS5C,MAAM,WAAW,gBAAgB;IAC/B,2EAA2E;IAC3E,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;IACxB,sCAAsC;IACtC,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,0BAA0B;IAC1B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,6BAA6B;IAC7B,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AAED,MAAM,WAAW,eAAe;IAC9B,OAAO,EAAE,OAAO,CAAC;IACjB,YAAY,EAAE,MAAM,CAAC;IACrB,aAAa,EAAE,MAAM,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,SAAS,EAAE,CAAC;IACpB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AA8DD;;;GAGG;AACH,wBAAsB,qBAAqB,CACzC,OAAO,EAAE,MAAM,EACf,OAAO,GAAE,gBAAqB,GAC7B,OAAO,CAAC,eAAe,CAAC,CAqD1B"}
@@ -0,0 +1,107 @@
1
+ import fs from 'fs/promises';
2
+ import path from 'path';
3
+ import pLimit from 'p-limit';
4
+ import { chunkFile } from './chunker.js';
5
+ import { scanCodebase } from './scanner.js';
6
+ import { detectEcosystems, getEcosystemExcludePatterns } from './ecosystem-presets.js';
7
+ import { extractRepoId } from './utils/repo-id.js';
8
+ import { DEFAULT_CHUNK_SIZE, DEFAULT_CHUNK_OVERLAP } from './constants.js';
9
+ const DEFAULT_CONCURRENCY = 4;
10
+ /** Scan files by auto-detecting ecosystem presets */
11
+ async function scanFilesToIndex(rootDir) {
12
+ const ecosystems = await detectEcosystems(rootDir);
13
+ const ecosystemExcludes = getEcosystemExcludePatterns(ecosystems);
14
+ return scanCodebase({
15
+ rootDir,
16
+ includePatterns: [
17
+ '**/*.{ts,tsx,js,jsx,mjs,cjs,vue,py,php,go,rs,java,kt,swift,rb,cs,liquid,scala,c,cpp,cc,cxx,h,hpp}',
18
+ '**/*.md',
19
+ '**/*.mdx',
20
+ '**/*.markdown',
21
+ ],
22
+ excludePatterns: ecosystemExcludes,
23
+ });
24
+ }
25
+ /** Normalize a file path to relative form */
26
+ function normalizeToRelativePath(file, rootDir) {
27
+ if (path.isAbsolute(file)) {
28
+ return path.relative(rootDir, file);
29
+ }
30
+ return file;
31
+ }
32
+ /**
33
+ * Process a single file for chunk-only indexing.
34
+ */
35
+ async function chunkFileForCollection(file, rootDir, config, output) {
36
+ try {
37
+ const absolutePath = path.isAbsolute(file) ? file : path.join(rootDir, file);
38
+ const relativePath = normalizeToRelativePath(file, rootDir);
39
+ const content = await fs.readFile(absolutePath, 'utf-8');
40
+ const chunks = chunkFile(relativePath, content, {
41
+ chunkSize: config.chunkSize,
42
+ chunkOverlap: config.chunkOverlap,
43
+ useAST: true,
44
+ astFallback: 'line-based',
45
+ repoId: config.repoId,
46
+ });
47
+ if (chunks.length > 0) {
48
+ output.push(...chunks);
49
+ return true;
50
+ }
51
+ return false;
52
+ }
53
+ catch (error) {
54
+ console.error(`[parser] Failed to process ${file}: ${error instanceof Error ? error.message : String(error)}`);
55
+ return false;
56
+ }
57
+ }
58
+ /**
59
+ * Perform chunk-only indexing (no embeddings or VectorDB).
60
+ * Returns raw chunks in-memory for direct analysis.
61
+ */
62
+ export async function performChunkOnlyIndex(rootDir, options = {}) {
63
+ const startTime = Date.now();
64
+ try {
65
+ const files = options.filesToIndex ?? (await scanFilesToIndex(rootDir));
66
+ if (files.length === 0) {
67
+ return {
68
+ success: false,
69
+ filesIndexed: 0,
70
+ chunksCreated: 0,
71
+ durationMs: Date.now() - startTime,
72
+ chunks: [],
73
+ error: 'No files found to index',
74
+ };
75
+ }
76
+ const config = {
77
+ chunkSize: options.chunkSize ?? DEFAULT_CHUNK_SIZE,
78
+ chunkOverlap: options.chunkOverlap ?? DEFAULT_CHUNK_OVERLAP,
79
+ repoId: extractRepoId(rootDir),
80
+ };
81
+ const allChunks = [];
82
+ let filesProcessed = 0;
83
+ const limit = pLimit(options.concurrency ?? DEFAULT_CONCURRENCY);
84
+ await Promise.all(files.map(file => limit(async () => {
85
+ await chunkFileForCollection(file, rootDir, config, allChunks);
86
+ filesProcessed++;
87
+ })));
88
+ return {
89
+ success: true,
90
+ filesIndexed: filesProcessed,
91
+ chunksCreated: allChunks.length,
92
+ durationMs: Date.now() - startTime,
93
+ chunks: allChunks,
94
+ };
95
+ }
96
+ catch (error) {
97
+ return {
98
+ success: false,
99
+ filesIndexed: 0,
100
+ chunksCreated: 0,
101
+ durationMs: Date.now() - startTime,
102
+ chunks: [],
103
+ error: error instanceof Error ? error.message : String(error),
104
+ };
105
+ }
106
+ }
107
+ //# sourceMappingURL=chunk-only-index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"chunk-only-index.js","sourceRoot":"","sources":["../src/chunk-only-index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,aAAa,CAAC;AAC7B,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,MAAM,MAAM,SAAS,CAAC;AAE7B,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAC5C,OAAO,EAAE,gBAAgB,EAAE,2BAA2B,EAAE,MAAM,wBAAwB,CAAC;AACvF,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AACnD,OAAO,EAAE,kBAAkB,EAAE,qBAAqB,EAAE,MAAM,gBAAgB,CAAC;AAE3E,MAAM,mBAAmB,GAAG,CAAC,CAAC;AAsB9B,qDAAqD;AACrD,KAAK,UAAU,gBAAgB,CAAC,OAAe;IAC7C,MAAM,UAAU,GAAG,MAAM,gBAAgB,CAAC,OAAO,CAAC,CAAC;IACnD,MAAM,iBAAiB,GAAG,2BAA2B,CAAC,UAAU,CAAC,CAAC;IAElE,OAAO,YAAY,CAAC;QAClB,OAAO;QACP,eAAe,EAAE;YACf,mGAAmG;YACnG,SAAS;YACT,UAAU;YACV,eAAe;SAChB;QACD,eAAe,EAAE,iBAAiB;KACnC,CAAC,CAAC;AACL,CAAC;AAED,6CAA6C;AAC7C,SAAS,uBAAuB,CAAC,IAAY,EAAE,OAAe;IAC5D,IAAI,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;QAC1B,OAAO,IAAI,CAAC,QAAQ,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;IACtC,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,sBAAsB,CACnC,IAAY,EACZ,OAAe,EACf,MAAoE,EACpE,MAAmB;IAEnB,IAAI,CAAC;QACH,MAAM,YAAY,GAAG,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;QAC7E,MAAM,YAAY,GAAG,uBAAuB,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;QAC5D,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,YAAY,EAAE,OAAO,CAAC,CAAC;QAEzD,MAAM,MAAM,GAAG,SAAS,CAAC,YAAY,EAAE,OAAO,EAAE;YAC9C,SAAS,EAAE,MAAM,CAAC,SAAS;YAC3B,YAAY,EAAE,MAAM,CAAC,YAAY;YACjC,MAAM,EAAE,IAAI;YACZ,WAAW,EAAE,YAAY;YACzB,MAAM,EAAE,MAAM,CAAC,MAAM;SACtB,CAAC,CAAC;QAEH,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACtB,MAAM,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,CAAC;YACvB,OAAO,IAAI,CAAC;QACd,CAAC;QACD,OAAO,KAAK,CAAC;IACf,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,CAAC,KAAK,CACX,8BAA8B,IAAI,KAAK,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CAChG,CAAC;QACF,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,qBAAqB,CACzC,OAAe,EACf,UAA4B,EAAE;IAE9B,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAE7B,IAAI,CAAC;QACH,MAAM,KAAK,GAAG,OAAO,CAAC,YAAY,IAAI,CAAC,MAAM,gBAAgB,CAAC,OAAO,CAAC,CAAC,CAAC;QAExE,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACvB,OAAO;gBACL,OAAO,EAAE,KAAK;gBACd,YAAY,EAAE,CAAC;gBACf,aAAa,EAAE,CAAC;gBAChB,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;gBAClC,MAAM,EAAE,EAAE;gBACV,KAAK,EAAE,yBAAyB;aACjC,CAAC;QACJ,CAAC;QAED,MAAM,MAAM,GAAG;YACb,SAAS,EAAE,OAAO,CAAC,SAAS,IAAI,kBAAkB;YAClD,YAAY,EAAE,OAAO,CAAC,YAAY,IAAI,qBAAqB;YAC3D,MAAM,EAAE,aAAa,CAAC,OAAO,CAAC;SAC/B,CAAC;QAEF,MAAM,SAAS,GAAgB,EAAE,CAAC;QAClC,IAAI,cAAc,GAAG,CAAC,CAAC;QAEvB,MAAM,KAAK,GAAG,MAAM,CAAC,OAAO,CAAC,WAAW,IAAI,mBAAmB,CAAC,CAAC;QACjE,MAAM,OAAO,CAAC,GAAG,CACf,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CACf,KAAK,CAAC,KAAK,IAAI,EAAE;YACf,MAAM,sBAAsB,CAAC,IAAI,EAAE,OAAO,EAAE,MAAM,EAAE,SAAS,CAAC,CAAC;YAC/D,cAAc,EAAE,CAAC;QACnB,CAAC,CAAC,CACH,CACF,CAAC;QAEF,OAAO;YACL,OAAO,EAAE,IAAI;YACb,YAAY,EAAE,cAAc;YAC5B,aAAa,EAAE,SAAS,CAAC,MAAM;YAC/B,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;YAClC,MAAM,EAAE,SAAS;SAClB,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO;YACL,OAAO,EAAE,KAAK;YACd,YAAY,EAAE,CAAC;YACf,aAAa,EAAE,CAAC;YAChB,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;YAClC,MAAM,EAAE,EAAE;YACV,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;SAC9D,CAAC;IACJ,CAAC;AACH,CAAC"}
@@ -0,0 +1,12 @@
1
+ import type { CodeChunk } from './types.js';
2
+ export interface ChunkOptions {
3
+ chunkSize?: number;
4
+ chunkOverlap?: number;
5
+ useAST?: boolean;
6
+ astFallback?: 'line-based' | 'error';
7
+ repoId?: string;
8
+ orgId?: string;
9
+ }
10
+ export declare function chunkFile(filepath: string, content: string, options?: ChunkOptions): CodeChunk[];
11
+ export declare function chunkText(text: string, options?: ChunkOptions): string[];
12
+ //# sourceMappingURL=chunker.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"chunker.d.ts","sourceRoot":"","sources":["../src/chunker.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;AAO5C,MAAM,WAAW,YAAY;IAC3B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,WAAW,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC;IAErC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,wBAAgB,SAAS,CACvB,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,MAAM,EACf,OAAO,GAAE,YAAiB,GACzB,SAAS,EAAE,CA8Cb;AA6DD,wBAAgB,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,GAAE,YAAiB,GAAG,MAAM,EAAE,CAqB5E"}
@@ -0,0 +1,98 @@
1
+ import { detectFileType } from './scanner.js';
2
+ import { extractSymbols } from './symbol-extractor.js';
3
+ import { shouldUseAST, chunkByAST } from './ast/chunker.js';
4
+ import { chunkLiquidFile } from './liquid-chunker.js';
5
+ import { chunkJSONTemplate } from './json-template-chunker.js';
6
+ export function chunkFile(filepath, content, options = {}) {
7
+ const { chunkSize = 75, chunkOverlap = 10, useAST = true, astFallback = 'line-based', repoId, orgId, } = options;
8
+ // Special handling for Liquid files
9
+ if (filepath.endsWith('.liquid')) {
10
+ return chunkLiquidFile(filepath, content, chunkSize, chunkOverlap, { repoId, orgId });
11
+ }
12
+ // Special handling for Shopify JSON template files (templates/**/*.json)
13
+ // Use regex to ensure 'templates/' is a path segment, not part of another name
14
+ // Matches: templates/product.json OR some-path/templates/customers/account.json
15
+ // Rejects: my-templates/config.json OR node_modules/pkg/templates/file.json (filtered by scanner)
16
+ if (filepath.endsWith('.json') && /(?:^|\/)templates\//.test(filepath)) {
17
+ return chunkJSONTemplate(filepath, content, { repoId, orgId });
18
+ }
19
+ // Try AST-based chunking for supported languages
20
+ if (useAST && shouldUseAST(filepath)) {
21
+ try {
22
+ return chunkByAST(filepath, content, {
23
+ minChunkSize: Math.floor(chunkSize / 10),
24
+ repoId,
25
+ orgId,
26
+ });
27
+ }
28
+ catch (error) {
29
+ // Handle AST errors based on configuration
30
+ if (astFallback === 'error') {
31
+ // Throw error if user wants strict AST-only behavior
32
+ throw new Error(`AST chunking failed for ${filepath}: ${error instanceof Error ? error.message : String(error)}`);
33
+ }
34
+ // Otherwise fallback to line-based chunking
35
+ console.warn(`AST chunking failed for ${filepath}, falling back to line-based:`, error);
36
+ }
37
+ }
38
+ // Line-based chunking (original implementation)
39
+ return chunkByLines(filepath, content, chunkSize, chunkOverlap, { repoId, orgId });
40
+ }
41
+ /**
42
+ * Build a single line-based code chunk with metadata
43
+ */
44
+ function buildLineChunk(chunkContent, filepath, startLine, endLine, fileType, tenantContext) {
45
+ return {
46
+ content: chunkContent,
47
+ metadata: {
48
+ file: filepath,
49
+ startLine,
50
+ endLine,
51
+ type: 'block',
52
+ language: fileType,
53
+ symbols: extractSymbols(chunkContent, fileType),
54
+ ...(tenantContext?.repoId && { repoId: tenantContext.repoId }),
55
+ ...(tenantContext?.orgId && { orgId: tenantContext.orgId }),
56
+ },
57
+ };
58
+ }
59
+ /**
60
+ * Original line-based chunking implementation
61
+ */
62
+ function chunkByLines(filepath, content, chunkSize, chunkOverlap, tenantContext) {
63
+ const lines = content.split('\n');
64
+ if (lines.length === 0 || (lines.length === 1 && lines[0].trim() === '')) {
65
+ return [];
66
+ }
67
+ const chunks = [];
68
+ const fileType = detectFileType(filepath);
69
+ const step = chunkSize - chunkOverlap;
70
+ for (let i = 0; i < lines.length; i += step) {
71
+ const endLine = Math.min(i + chunkSize, lines.length);
72
+ const chunkContent = lines.slice(i, endLine).join('\n');
73
+ if (chunkContent.trim().length > 0) {
74
+ chunks.push(buildLineChunk(chunkContent, filepath, i + 1, endLine, fileType, tenantContext));
75
+ }
76
+ if (endLine >= lines.length)
77
+ break;
78
+ }
79
+ return chunks;
80
+ }
81
+ export function chunkText(text, options = {}) {
82
+ const { chunkSize = 75, chunkOverlap = 10 } = options;
83
+ const lines = text.split('\n');
84
+ const chunks = [];
85
+ for (let i = 0; i < lines.length; i += chunkSize - chunkOverlap) {
86
+ const endLine = Math.min(i + chunkSize, lines.length);
87
+ const chunkLines = lines.slice(i, endLine);
88
+ const chunkContent = chunkLines.join('\n');
89
+ if (chunkContent.trim().length > 0) {
90
+ chunks.push(chunkContent);
91
+ }
92
+ if (endLine >= lines.length) {
93
+ break;
94
+ }
95
+ }
96
+ return chunks;
97
+ }
98
+ //# sourceMappingURL=chunker.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"chunker.js","sourceRoot":"","sources":["../src/chunker.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AAC9C,OAAO,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;AACvD,OAAO,EAAE,YAAY,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAC;AAC5D,OAAO,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAC;AACtD,OAAO,EAAE,iBAAiB,EAAE,MAAM,4BAA4B,CAAC;AAY/D,MAAM,UAAU,SAAS,CACvB,QAAgB,EAChB,OAAe,EACf,UAAwB,EAAE;IAE1B,MAAM,EACJ,SAAS,GAAG,EAAE,EACd,YAAY,GAAG,EAAE,EACjB,MAAM,GAAG,IAAI,EACb,WAAW,GAAG,YAAY,EAC1B,MAAM,EACN,KAAK,GACN,GAAG,OAAO,CAAC;IAEZ,oCAAoC;IACpC,IAAI,QAAQ,CAAC,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;QACjC,OAAO,eAAe,CAAC,QAAQ,EAAE,OAAO,EAAE,SAAS,EAAE,YAAY,EAAE,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,CAAC;IACxF,CAAC;IAED,yEAAyE;IACzE,+EAA+E;IAC/E,gFAAgF;IAChF,kGAAkG;IAClG,IAAI,QAAQ,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,qBAAqB,CAAC,IAAI,CAAC,QAAQ,CAAC,EAAE,CAAC;QACvE,OAAO,iBAAiB,CAAC,QAAQ,EAAE,OAAO,EAAE,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,CAAC;IACjE,CAAC;IAED,iDAAiD;IACjD,IAAI,MAAM,IAAI,YAAY,CAAC,QAAQ,CAAC,EAAE,CAAC;QACrC,IAAI,CAAC;YACH,OAAO,UAAU,CAAC,QAAQ,EAAE,OAAO,EAAE;gBACnC,YAAY,EAAE,IAAI,CAAC,KAAK,CAAC,SAAS,GAAG,EAAE,CAAC;gBACxC,MAAM;gBACN,KAAK;aACN,CAAC,CAAC;QACL,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,2CAA2C;YAC3C,IAAI,WAAW,KAAK,OAAO,EAAE,CAAC;gBAC5B,qDAAqD;gBACrD,MAAM,IAAI,KAAK,CACb,2BAA2B,QAAQ,KAAK,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CACjG,CAAC;YACJ,CAAC;YACD,4CAA4C;YAC5C,OAAO,CAAC,IAAI,CAAC,2BAA2B,QAAQ,+BAA+B,EAAE,KAAK,CAAC,CAAC;QAC1F,CAAC;IACH,CAAC;IAED,gDAAgD;IAChD,OAAO,YAAY,CAAC,QAAQ,EAAE,OAAO,EAAE,SAAS,EAAE,YAAY,EAAE,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,CAAC;AACrF,CAAC;AAED;;GAEG;AACH,SAAS,cAAc,CACrB,YAAoB,EACpB,QAAgB,EAChB,SAAiB,EACjB,OAAe,EACf,QAAgB,EAChB,aAAmD;IAEnD,OAAO;QACL,OAAO,EAAE,YAAY;QACrB,QAAQ,EAAE;YACR,IAAI,EAAE,QAAQ;YACd,SAAS;YACT,OAAO;YACP,IAAI,EAAE,OAAO;YACb,QAAQ,EAAE,QAAQ;YAClB,OAAO,EAAE,cAAc,CAAC,YAAY,EAAE,QAAQ,CAAC;YAC/C,GAAG,CAAC,aAAa,EAAE,MAAM,IAAI,EAAE,MAAM,EAAE,aAAa,CAAC,MAAM,EAAE,CAAC;YAC9D,GAAG,CAAC,aAAa,EAAE,KAAK,IAAI,EAAE,KAAK,EAAE,aAAa,CAAC,KAAK,EAAE,CAAC;SAC5D;KACF,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAS,YAAY,CACnB,QAAgB,EAChB,OAAe,EACf,SAAiB,EACjB,YAAoB,EACpB,aAAmD;IAEnD,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAClC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,KAAK,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC;QACzE,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,MAAM,GAAgB,EAAE,CAAC;IAC/B,MAAM,QAAQ,GAAG,cAAc,CAAC,QAAQ,CAAC,CAAC;IAC1C,MAAM,IAAI,GAAG,SAAS,GAAG,YAAY,CAAC;IAEtC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,IAAI,EAAE,CAAC;QAC5C,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,SAAS,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;QACtD,MAAM,YAAY,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAExD,IAAI,YAAY,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACnC,MAAM,CAAC,IAAI,CAAC,cAAc,CAAC,YAAY,EAAE,QAAQ,EAAE,CAAC,GAAG,CAAC,EAAE,OAAO,EAAE,QAAQ,EAAE,aAAa,CAAC,CAAC,CAAC;QAC/F,CAAC;QAED,IAAI,OAAO,IAAI,KAAK,CAAC,MAAM;YAAE,MAAM;IACrC,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,MAAM,UAAU,SAAS,CAAC,IAAY,EAAE,UAAwB,EAAE;IAChE,MAAM,EAAE,SAAS,GAAG,EAAE,EAAE,YAAY,GAAG,EAAE,EAAE,GAAG,OAAO,CAAC;IAEtD,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAC/B,MAAM,MAAM,GAAa,EAAE,CAAC;IAE5B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,SAAS,GAAG,YAAY,EAAE,CAAC;QAChE,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,SAAS,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;QACtD,MAAM,UAAU,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;QAC3C,MAAM,YAAY,GAAG,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAE3C,IAAI,YAAY,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACnC,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QAC5B,CAAC;QAED,IAAI,OAAO,IAAI,KAAK,CAAC,MAAM,EAAE,CAAC;YAC5B,MAAM;QACR,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC"}
@@ -0,0 +1,8 @@
1
+ /**
2
+ * Constants used by the parser/chunking layer.
3
+ * These will move to @liendev/parser during extraction.
4
+ */
5
+ export declare const DEFAULT_CHUNK_SIZE = 75;
6
+ export declare const DEFAULT_CHUNK_OVERLAP = 10;
7
+ export declare const MAX_CHUNKS_PER_FILE = 100;
8
+ //# sourceMappingURL=constants.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"constants.d.ts","sourceRoot":"","sources":["../src/constants.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAGH,eAAO,MAAM,kBAAkB,KAAK,CAAC;AACrC,eAAO,MAAM,qBAAqB,KAAK,CAAC;AAIxC,eAAO,MAAM,mBAAmB,MAAM,CAAC"}
@@ -0,0 +1,11 @@
1
+ /**
2
+ * Constants used by the parser/chunking layer.
3
+ * These will move to @liendev/parser during extraction.
4
+ */
5
+ // Chunking settings
6
+ export const DEFAULT_CHUNK_SIZE = 75;
7
+ export const DEFAULT_CHUNK_OVERLAP = 10;
8
+ // File query estimation
9
+ // Maximum chunks expected per file when sizing scan queries.
10
+ export const MAX_CHUNKS_PER_FILE = 100;
11
+ //# sourceMappingURL=constants.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"constants.js","sourceRoot":"","sources":["../src/constants.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,oBAAoB;AACpB,MAAM,CAAC,MAAM,kBAAkB,GAAG,EAAE,CAAC;AACrC,MAAM,CAAC,MAAM,qBAAqB,GAAG,EAAE,CAAC;AAExC,wBAAwB;AACxB,6DAA6D;AAC7D,MAAM,CAAC,MAAM,mBAAmB,GAAG,GAAG,CAAC"}
@@ -0,0 +1,20 @@
1
+ /**
2
+ * Compute a content hash for change detection.
3
+ *
4
+ * For small files (<1MB), computes hash of entire content.
5
+ * For large files (>=1MB), uses fingerprint approach (first 8KB + last 8KB + size).
6
+ *
7
+ * Returns 16-character hash (or 'L' prefix + 15 chars for large files).
8
+ *
9
+ * @param filepath - Absolute path to the file
10
+ * @returns Content hash string, or empty string if file cannot be read
11
+ */
12
+ export declare function computeContentHash(filepath: string): Promise<string>;
13
+ /**
14
+ * Check if hash algorithm is compatible with current implementation.
15
+ *
16
+ * @param algorithm - Hash algorithm version from manifest
17
+ * @returns true if compatible, false otherwise
18
+ */
19
+ export declare function isHashAlgorithmCompatible(algorithm?: string): boolean;
20
+ //# sourceMappingURL=content-hash.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"content-hash.d.ts","sourceRoot":"","sources":["../src/content-hash.ts"],"names":[],"mappings":"AAaA;;;;;;;;;;GAUG;AACH,wBAAsB,kBAAkB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAiB1E;AA8CD;;;;;GAKG;AACH,wBAAgB,yBAAyB,CAAC,SAAS,CAAC,EAAE,MAAM,GAAG,OAAO,CAMrE"}
@@ -0,0 +1,91 @@
1
+ import crypto from 'crypto';
2
+ import fs from 'fs/promises';
3
+ /**
4
+ * Threshold for using fingerprint approach (1MB)
5
+ */
6
+ const LARGE_FILE_THRESHOLD = 1024 * 1024;
7
+ /**
8
+ * Sample size for large file fingerprinting (8KB)
9
+ */
10
+ const SAMPLE_SIZE = 8192;
11
+ /**
12
+ * Compute a content hash for change detection.
13
+ *
14
+ * For small files (<1MB), computes hash of entire content.
15
+ * For large files (>=1MB), uses fingerprint approach (first 8KB + last 8KB + size).
16
+ *
17
+ * Returns 16-character hash (or 'L' prefix + 15 chars for large files).
18
+ *
19
+ * @param filepath - Absolute path to the file
20
+ * @returns Content hash string, or empty string if file cannot be read
21
+ */
22
+ export async function computeContentHash(filepath) {
23
+ try {
24
+ const stats = await fs.stat(filepath);
25
+ // For large files, use fingerprint: first 8KB + last 8KB + file size
26
+ if (stats.size > LARGE_FILE_THRESHOLD) {
27
+ return await computeLargeFileFingerprint(filepath, stats.size);
28
+ }
29
+ // For normal files, hash entire content (read as binary to support all file types)
30
+ const content = await fs.readFile(filepath);
31
+ return crypto.createHash('sha256').update(content).digest('hex').slice(0, 16);
32
+ }
33
+ catch {
34
+ // If file can't be read, return empty hash (will trigger reindex)
35
+ // Common cases: file deleted, permission denied, file handle issues
36
+ return '';
37
+ }
38
+ }
39
+ /**
40
+ * Compute fingerprint for large files to avoid reading entire content.
41
+ * Uses first 8KB + last 8KB + file size.
42
+ *
43
+ * Note: This function is only used for files larger than 1MB (LARGE_FILE_THRESHOLD),
44
+ * so the sampled head (first 8KB) and tail (last 8KB) regions never overlap
45
+ * (even for files just over 1MB, 1MB >> 16KB ensures distinct regions).
46
+ *
47
+ * **Known Limitation**: Changes made exclusively to the middle of large files
48
+ * (i.e., modifications that don't affect the first or last 8KB) will NOT be detected.
49
+ * This is an acceptable trade-off for performance, as the primary use case is detecting
50
+ * `touch` operations and header/footer changes. Files with substantive code changes
51
+ * typically have modifications near the beginning or end.
52
+ *
53
+ * @param filepath - Absolute path to the file
54
+ * @param size - File size in bytes
55
+ * @returns Fingerprint hash with 'L' prefix
56
+ */
57
+ async function computeLargeFileFingerprint(filepath, size) {
58
+ const handle = await fs.open(filepath, 'r');
59
+ try {
60
+ const headBuffer = Buffer.alloc(SAMPLE_SIZE);
61
+ const tailBuffer = Buffer.alloc(SAMPLE_SIZE);
62
+ // Read first 8KB
63
+ await handle.read(headBuffer, 0, SAMPLE_SIZE, 0);
64
+ // Read last 8KB
65
+ const tailOffset = Math.max(0, size - SAMPLE_SIZE);
66
+ await handle.read(tailBuffer, 0, SAMPLE_SIZE, tailOffset);
67
+ // Combine: head + tail + size
68
+ const hash = crypto.createHash('sha256');
69
+ hash.update(headBuffer);
70
+ hash.update(tailBuffer);
71
+ hash.update(size.toString());
72
+ return 'L' + hash.digest('hex').slice(0, 15); // 'L' prefix = large file fingerprint
73
+ }
74
+ finally {
75
+ await handle.close();
76
+ }
77
+ }
78
+ /**
79
+ * Check if hash algorithm is compatible with current implementation.
80
+ *
81
+ * @param algorithm - Hash algorithm version from manifest
82
+ * @returns true if compatible, false otherwise
83
+ */
84
+ export function isHashAlgorithmCompatible(algorithm) {
85
+ // If no algorithm specified, assume old format (still compatible)
86
+ if (!algorithm)
87
+ return true;
88
+ // Current supported algorithms
89
+ return algorithm === 'sha256-16' || algorithm === 'sha256-16-large';
90
+ }
91
+ //# sourceMappingURL=content-hash.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"content-hash.js","sourceRoot":"","sources":["../src/content-hash.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,QAAQ,CAAC;AAC5B,OAAO,EAAE,MAAM,aAAa,CAAC;AAE7B;;GAEG;AACH,MAAM,oBAAoB,GAAG,IAAI,GAAG,IAAI,CAAC;AAEzC;;GAEG;AACH,MAAM,WAAW,GAAG,IAAI,CAAC;AAEzB;;;;;;;;;;GAUG;AACH,MAAM,CAAC,KAAK,UAAU,kBAAkB,CAAC,QAAgB;IACvD,IAAI,CAAC;QACH,MAAM,KAAK,GAAG,MAAM,EAAE,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAEtC,qEAAqE;QACrE,IAAI,KAAK,CAAC,IAAI,GAAG,oBAAoB,EAAE,CAAC;YACtC,OAAO,MAAM,2BAA2B,CAAC,QAAQ,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC;QACjE,CAAC;QAED,mFAAmF;QACnF,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;QAC5C,OAAO,MAAM,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;IAChF,CAAC;IAAC,MAAM,CAAC;QACP,kEAAkE;QAClE,oEAAoE;QACpE,OAAO,EAAE,CAAC;IACZ,CAAC;AACH,CAAC;AAED;;;;;;;;;;;;;;;;;GAiBG;AACH,KAAK,UAAU,2BAA2B,CAAC,QAAgB,EAAE,IAAY;IACvE,MAAM,MAAM,GAAG,MAAM,EAAE,CAAC,IAAI,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;IAE5C,IAAI,CAAC;QACH,MAAM,UAAU,GAAG,MAAM,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC;QAC7C,MAAM,UAAU,GAAG,MAAM,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC;QAE7C,iBAAiB;QACjB,MAAM,MAAM,CAAC,IAAI,CAAC,UAAU,EAAE,CAAC,EAAE,WAAW,EAAE,CAAC,CAAC,CAAC;QAEjD,gBAAgB;QAChB,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,GAAG,WAAW,CAAC,CAAC;QACnD,MAAM,MAAM,CAAC,IAAI,CAAC,UAAU,EAAE,CAAC,EAAE,WAAW,EAAE,UAAU,CAAC,CAAC;QAE1D,8BAA8B;QAC9B,MAAM,IAAI,GAAG,MAAM,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC;QACzC,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC;QACxB,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC;QACxB,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC;QAE7B,OAAO,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,sCAAsC;IACtF,CAAC;YAAS,CAAC;QACT,MAAM,MAAM,CAAC,KAAK,EAAE,CAAC;IACvB,CAAC;AACH,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,yBAAyB,CAAC,SAAkB;IAC1D,kEAAkE;IAClE,IAAI,CAAC,SAAS;QAAE,OAAO,IAAI,CAAC;IAE5B,+BAA+B;IAC/B,OAAO,SAAS,KAAK,WAAW,IAAI,SAAS,KAAK,iBAAiB,CAAC;AACtE,CAAC"}