@liendev/parser 0.39.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ast/chunker.d.ts +30 -0
- package/dist/ast/chunker.d.ts.map +1 -0
- package/dist/ast/chunker.js +310 -0
- package/dist/ast/chunker.js.map +1 -0
- package/dist/ast/complexity/cognitive.d.ts +16 -0
- package/dist/ast/complexity/cognitive.d.ts.map +1 -0
- package/dist/ast/complexity/cognitive.js +137 -0
- package/dist/ast/complexity/cognitive.js.map +1 -0
- package/dist/ast/complexity/cyclomatic.d.ts +12 -0
- package/dist/ast/complexity/cyclomatic.d.ts.map +1 -0
- package/dist/ast/complexity/cyclomatic.js +54 -0
- package/dist/ast/complexity/cyclomatic.js.map +1 -0
- package/dist/ast/complexity/halstead.d.ts +56 -0
- package/dist/ast/complexity/halstead.d.ts.map +1 -0
- package/dist/ast/complexity/halstead.js +196 -0
- package/dist/ast/complexity/halstead.js.map +1 -0
- package/dist/ast/complexity/index.d.ts +13 -0
- package/dist/ast/complexity/index.d.ts.map +1 -0
- package/dist/ast/complexity/index.js +12 -0
- package/dist/ast/complexity/index.js.map +1 -0
- package/dist/ast/extractors/index.d.ts +35 -0
- package/dist/ast/extractors/index.d.ts.map +1 -0
- package/dist/ast/extractors/index.js +41 -0
- package/dist/ast/extractors/index.js.map +1 -0
- package/dist/ast/extractors/symbol-helpers.d.ts +20 -0
- package/dist/ast/extractors/symbol-helpers.d.ts.map +1 -0
- package/dist/ast/extractors/symbol-helpers.js +58 -0
- package/dist/ast/extractors/symbol-helpers.js.map +1 -0
- package/dist/ast/extractors/types.d.ts +108 -0
- package/dist/ast/extractors/types.d.ts.map +1 -0
- package/dist/ast/extractors/types.js +2 -0
- package/dist/ast/extractors/types.js.map +1 -0
- package/dist/ast/languages/javascript.d.ts +134 -0
- package/dist/ast/languages/javascript.d.ts.map +1 -0
- package/dist/ast/languages/javascript.js +787 -0
- package/dist/ast/languages/javascript.js.map +1 -0
- package/dist/ast/languages/php.d.ts +84 -0
- package/dist/ast/languages/php.d.ts.map +1 -0
- package/dist/ast/languages/php.js +452 -0
- package/dist/ast/languages/php.js.map +1 -0
- package/dist/ast/languages/python.d.ts +96 -0
- package/dist/ast/languages/python.d.ts.map +1 -0
- package/dist/ast/languages/python.js +448 -0
- package/dist/ast/languages/python.js.map +1 -0
- package/dist/ast/languages/registry.d.ts +30 -0
- package/dist/ast/languages/registry.d.ts.map +1 -0
- package/dist/ast/languages/registry.js +95 -0
- package/dist/ast/languages/registry.js.map +1 -0
- package/dist/ast/languages/rust.d.ts +113 -0
- package/dist/ast/languages/rust.d.ts.map +1 -0
- package/dist/ast/languages/rust.js +614 -0
- package/dist/ast/languages/rust.js.map +1 -0
- package/dist/ast/languages/types.d.ts +52 -0
- package/dist/ast/languages/types.d.ts.map +1 -0
- package/dist/ast/languages/types.js +2 -0
- package/dist/ast/languages/types.js.map +1 -0
- package/dist/ast/languages/typescript.d.ts +3 -0
- package/dist/ast/languages/typescript.d.ts.map +1 -0
- package/dist/ast/languages/typescript.js +134 -0
- package/dist/ast/languages/typescript.js.map +1 -0
- package/dist/ast/parser.d.ts +29 -0
- package/dist/ast/parser.d.ts.map +1 -0
- package/dist/ast/parser.js +67 -0
- package/dist/ast/parser.js.map +1 -0
- package/dist/ast/symbols.d.ts +74 -0
- package/dist/ast/symbols.d.ts.map +1 -0
- package/dist/ast/symbols.js +171 -0
- package/dist/ast/symbols.js.map +1 -0
- package/dist/ast/traversers/index.d.ts +19 -0
- package/dist/ast/traversers/index.d.ts.map +1 -0
- package/dist/ast/traversers/index.js +21 -0
- package/dist/ast/traversers/index.js.map +1 -0
- package/dist/ast/traversers/types.d.ts +98 -0
- package/dist/ast/traversers/types.d.ts.map +1 -0
- package/dist/ast/traversers/types.js +2 -0
- package/dist/ast/traversers/types.js.map +1 -0
- package/dist/ast/types.d.ts +54 -0
- package/dist/ast/types.d.ts.map +1 -0
- package/dist/ast/types.js +2 -0
- package/dist/ast/types.js.map +1 -0
- package/dist/chunk-only-index.d.ts +25 -0
- package/dist/chunk-only-index.d.ts.map +1 -0
- package/dist/chunk-only-index.js +107 -0
- package/dist/chunk-only-index.js.map +1 -0
- package/dist/chunker.d.ts +12 -0
- package/dist/chunker.d.ts.map +1 -0
- package/dist/chunker.js +98 -0
- package/dist/chunker.js.map +1 -0
- package/dist/constants.d.ts +8 -0
- package/dist/constants.d.ts.map +1 -0
- package/dist/constants.js +11 -0
- package/dist/constants.js.map +1 -0
- package/dist/content-hash.d.ts +20 -0
- package/dist/content-hash.d.ts.map +1 -0
- package/dist/content-hash.js +91 -0
- package/dist/content-hash.js.map +1 -0
- package/dist/dependency-analyzer.d.ts +79 -0
- package/dist/dependency-analyzer.d.ts.map +1 -0
- package/dist/dependency-analyzer.js +408 -0
- package/dist/dependency-analyzer.js.map +1 -0
- package/dist/ecosystem-presets.d.ts +32 -0
- package/dist/ecosystem-presets.d.ts.map +1 -0
- package/dist/ecosystem-presets.js +325 -0
- package/dist/ecosystem-presets.js.map +1 -0
- package/dist/gitignore.d.ts +22 -0
- package/dist/gitignore.d.ts.map +1 -0
- package/dist/gitignore.js +128 -0
- package/dist/gitignore.js.map +1 -0
- package/dist/index.d.ts +32 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +68 -0
- package/dist/index.js.map +1 -0
- package/dist/insights/chunk-complexity.d.ts +89 -0
- package/dist/insights/chunk-complexity.d.ts.map +1 -0
- package/dist/insights/chunk-complexity.js +332 -0
- package/dist/insights/chunk-complexity.js.map +1 -0
- package/dist/insights/types.d.ts +73 -0
- package/dist/insights/types.d.ts.map +1 -0
- package/dist/insights/types.js +9 -0
- package/dist/insights/types.js.map +1 -0
- package/dist/json-template-chunker.d.ts +12 -0
- package/dist/json-template-chunker.d.ts.map +1 -0
- package/dist/json-template-chunker.js +87 -0
- package/dist/json-template-chunker.js.map +1 -0
- package/dist/liquid-chunker.d.ts +16 -0
- package/dist/liquid-chunker.d.ts.map +1 -0
- package/dist/liquid-chunker.js +274 -0
- package/dist/liquid-chunker.js.map +1 -0
- package/dist/scanner.d.ts +16 -0
- package/dist/scanner.d.ts.map +1 -0
- package/dist/scanner.js +95 -0
- package/dist/scanner.js.map +1 -0
- package/dist/symbol-extractor.d.ts +18 -0
- package/dist/symbol-extractor.d.ts.map +1 -0
- package/dist/symbol-extractor.js +343 -0
- package/dist/symbol-extractor.js.map +1 -0
- package/dist/test-associations.d.ts +16 -0
- package/dist/test-associations.d.ts.map +1 -0
- package/dist/test-associations.js +43 -0
- package/dist/test-associations.js.map +1 -0
- package/dist/types.d.ts +75 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +2 -0
- package/dist/types.js.map +1 -0
- package/dist/utils/path-matching.d.ts +71 -0
- package/dist/utils/path-matching.d.ts.map +1 -0
- package/dist/utils/path-matching.js +258 -0
- package/dist/utils/path-matching.js.map +1 -0
- package/dist/utils/repo-id.d.ts +6 -0
- package/dist/utils/repo-id.d.ts.map +1 -0
- package/dist/utils/repo-id.js +12 -0
- package/dist/utils/repo-id.js.map +1 -0
- package/package.json +66 -0
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
import type Parser from 'tree-sitter';
|
|
2
|
+
/**
|
|
3
|
+
* Language-specific node traversal configuration
|
|
4
|
+
*
|
|
5
|
+
* Each language has different AST node types and structures. This interface
|
|
6
|
+
* allows us to implement language-specific traversal strategies while keeping
|
|
7
|
+
* the core chunking logic language-agnostic.
|
|
8
|
+
*
|
|
9
|
+
* @example TypeScript/JavaScript
|
|
10
|
+
* ```typescript
|
|
11
|
+
* targetNodeTypes: ['function_declaration', 'method_definition', 'interface_declaration']
|
|
12
|
+
* containerTypes: ['class_declaration']
|
|
13
|
+
* ```
|
|
14
|
+
*
|
|
15
|
+
* @example Python
|
|
16
|
+
* ```typescript
|
|
17
|
+
* targetNodeTypes: ['function_definition', 'async_function_definition']
|
|
18
|
+
* containerTypes: ['class_definition']
|
|
19
|
+
* ```
|
|
20
|
+
*/
|
|
21
|
+
export interface LanguageTraverser {
|
|
22
|
+
/**
|
|
23
|
+
* AST node types that should be extracted as chunks
|
|
24
|
+
* (e.g., 'function_declaration', 'method_definition' for TypeScript)
|
|
25
|
+
*/
|
|
26
|
+
targetNodeTypes: string[];
|
|
27
|
+
/**
|
|
28
|
+
* AST node types for containers whose children should be extracted
|
|
29
|
+
* (e.g., 'class_declaration' for TypeScript - we extract methods, not the class itself)
|
|
30
|
+
*/
|
|
31
|
+
containerTypes: string[];
|
|
32
|
+
/**
|
|
33
|
+
* AST node types that represent variable declarations that might contain functions
|
|
34
|
+
* (e.g., 'lexical_declaration' for TypeScript const/let with arrow functions)
|
|
35
|
+
*/
|
|
36
|
+
declarationTypes: string[];
|
|
37
|
+
/**
|
|
38
|
+
* AST node types that represent function implementations
|
|
39
|
+
* (used to detect functions inside variable declarations)
|
|
40
|
+
*/
|
|
41
|
+
functionTypes: string[];
|
|
42
|
+
/**
|
|
43
|
+
* Check if a node should have its children extracted instead of being chunked itself
|
|
44
|
+
*
|
|
45
|
+
* @param node - AST node to check
|
|
46
|
+
* @returns True if we should extract children (e.g., class methods), false otherwise
|
|
47
|
+
*/
|
|
48
|
+
shouldExtractChildren(node: Parser.SyntaxNode): boolean;
|
|
49
|
+
/**
|
|
50
|
+
* Check if a node is a declaration that might contain a function
|
|
51
|
+
*
|
|
52
|
+
* @param node - AST node to check
|
|
53
|
+
* @returns True if this is a variable declaration that might contain a function
|
|
54
|
+
*/
|
|
55
|
+
isDeclarationWithFunction(node: Parser.SyntaxNode): boolean;
|
|
56
|
+
/**
|
|
57
|
+
* Extract the container body node (e.g., class body) for child traversal
|
|
58
|
+
*
|
|
59
|
+
* @param node - Container node (e.g., class_declaration)
|
|
60
|
+
* @returns The body node containing children, or null if not found
|
|
61
|
+
*/
|
|
62
|
+
getContainerBody(node: Parser.SyntaxNode): Parser.SyntaxNode | null;
|
|
63
|
+
/**
|
|
64
|
+
* Check if traversal should continue into this node's children
|
|
65
|
+
*
|
|
66
|
+
* @param node - AST node to check
|
|
67
|
+
* @returns True if we should traverse children (e.g., for 'program', 'export_statement')
|
|
68
|
+
*/
|
|
69
|
+
shouldTraverseChildren(node: Parser.SyntaxNode): boolean;
|
|
70
|
+
/**
|
|
71
|
+
* Find the parent container name for a node (e.g., class name for a method)
|
|
72
|
+
*
|
|
73
|
+
* @param node - AST node (e.g., method)
|
|
74
|
+
* @returns Container name (e.g., class name), or undefined if not in a container
|
|
75
|
+
*/
|
|
76
|
+
findParentContainerName(node: Parser.SyntaxNode): string | undefined;
|
|
77
|
+
/**
|
|
78
|
+
* Find a function inside a declaration node (e.g., arrow function in const declaration)
|
|
79
|
+
*
|
|
80
|
+
* @param node - Declaration node to search
|
|
81
|
+
* @returns Information about whether a function was found and the function node itself
|
|
82
|
+
*/
|
|
83
|
+
findFunctionInDeclaration(node: Parser.SyntaxNode): DeclarationFunctionInfo;
|
|
84
|
+
}
|
|
85
|
+
/**
|
|
86
|
+
* Result of finding a function inside a declaration node
|
|
87
|
+
*/
|
|
88
|
+
export interface DeclarationFunctionInfo {
|
|
89
|
+
/**
|
|
90
|
+
* Whether a function was found inside the declaration
|
|
91
|
+
*/
|
|
92
|
+
hasFunction: boolean;
|
|
93
|
+
/**
|
|
94
|
+
* The actual function node if found
|
|
95
|
+
*/
|
|
96
|
+
functionNode: Parser.SyntaxNode | null;
|
|
97
|
+
}
|
|
98
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/ast/traversers/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,MAAM,aAAa,CAAC;AAEtC;;;;;;;;;;;;;;;;;;GAkBG;AACH,MAAM,WAAW,iBAAiB;IAChC;;;OAGG;IACH,eAAe,EAAE,MAAM,EAAE,CAAC;IAE1B;;;OAGG;IACH,cAAc,EAAE,MAAM,EAAE,CAAC;IAEzB;;;OAGG;IACH,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAE3B;;;OAGG;IACH,aAAa,EAAE,MAAM,EAAE,CAAC;IAExB;;;;;OAKG;IACH,qBAAqB,CAAC,IAAI,EAAE,MAAM,CAAC,UAAU,GAAG,OAAO,CAAC;IAExD;;;;;OAKG;IACH,yBAAyB,CAAC,IAAI,EAAE,MAAM,CAAC,UAAU,GAAG,OAAO,CAAC;IAE5D;;;;;OAKG;IACH,gBAAgB,CAAC,IAAI,EAAE,MAAM,CAAC,UAAU,GAAG,MAAM,CAAC,UAAU,GAAG,IAAI,CAAC;IAEpE;;;;;OAKG;IACH,sBAAsB,CAAC,IAAI,EAAE,MAAM,CAAC,UAAU,GAAG,OAAO,CAAC;IAEzD;;;;;OAKG;IACH,uBAAuB,CAAC,IAAI,EAAE,MAAM,CAAC,UAAU,GAAG,MAAM,GAAG,SAAS,CAAC;IAErE;;;;;OAKG;IACH,yBAAyB,CAAC,IAAI,EAAE,MAAM,CAAC,UAAU,GAAG,uBAAuB,CAAC;CAC7E;AAED;;GAEG;AACH,MAAM,WAAW,uBAAuB;IACtC;;OAEG;IACH,WAAW,EAAE,OAAO,CAAC;IAErB;;OAEG;IACH,YAAY,EAAE,MAAM,CAAC,UAAU,GAAG,IAAI,CAAC;CACxC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../../src/ast/traversers/types.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import type Parser from 'tree-sitter';
|
|
2
|
+
import type { CodeChunk } from '../types.js';
|
|
3
|
+
/**
|
|
4
|
+
* AST parse result containing the tree and any errors
|
|
5
|
+
*/
|
|
6
|
+
export interface ASTParseResult {
|
|
7
|
+
tree: Parser.Tree | null;
|
|
8
|
+
error?: string;
|
|
9
|
+
}
|
|
10
|
+
/**
|
|
11
|
+
* Symbol information extracted from AST nodes
|
|
12
|
+
*/
|
|
13
|
+
export interface SymbolInfo {
|
|
14
|
+
name: string;
|
|
15
|
+
type: 'function' | 'method' | 'class' | 'interface';
|
|
16
|
+
startLine: number;
|
|
17
|
+
endLine: number;
|
|
18
|
+
parentClass?: string;
|
|
19
|
+
signature?: string;
|
|
20
|
+
parameters?: string[];
|
|
21
|
+
returnType?: string;
|
|
22
|
+
complexity?: number;
|
|
23
|
+
cognitiveComplexity?: number;
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Semantic metadata for AST-aware chunks
|
|
27
|
+
*/
|
|
28
|
+
export interface SemanticMetadata {
|
|
29
|
+
symbolName?: string;
|
|
30
|
+
symbolType?: 'function' | 'method' | 'class' | 'interface';
|
|
31
|
+
parentClass?: string;
|
|
32
|
+
complexity?: number;
|
|
33
|
+
cognitiveComplexity?: number;
|
|
34
|
+
parameters?: string[];
|
|
35
|
+
signature?: string;
|
|
36
|
+
returnType?: string;
|
|
37
|
+
imports?: string[];
|
|
38
|
+
halsteadVolume?: number;
|
|
39
|
+
halsteadDifficulty?: number;
|
|
40
|
+
halsteadEffort?: number;
|
|
41
|
+
halsteadBugs?: number;
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* AST-aware chunk with enhanced semantic metadata
|
|
45
|
+
*/
|
|
46
|
+
export interface ASTChunk extends CodeChunk {
|
|
47
|
+
metadata: CodeChunk['metadata'] & SemanticMetadata;
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Supported languages for AST parsing.
|
|
51
|
+
* Canonical definition lives in languages/registry.ts; re-exported here for convenience.
|
|
52
|
+
*/
|
|
53
|
+
export type { SupportedLanguage } from './languages/registry.js';
|
|
54
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/ast/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,MAAM,aAAa,CAAC;AACtC,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAE7C;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,MAAM,CAAC,IAAI,GAAG,IAAI,CAAC;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IACzB,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,UAAU,GAAG,QAAQ,GAAG,OAAO,GAAG,WAAW,CAAC;IACpD,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;IACtB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,mBAAmB,CAAC,EAAE,MAAM,CAAC;CAC9B;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,UAAU,CAAC,EAAE,UAAU,GAAG,QAAQ,GAAG,OAAO,GAAG,WAAW,CAAC;IAC3D,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAC7B,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;IACtB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IAGnB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AAED;;GAEG;AACH,MAAM,WAAW,QAAS,SAAQ,SAAS;IACzC,QAAQ,EAAE,SAAS,CAAC,UAAU,CAAC,GAAG,gBAAgB,CAAC;CACpD;AAED;;;GAGG;AACH,YAAY,EAAE,iBAAiB,EAAE,MAAM,yBAAyB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../src/ast/types.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import type { CodeChunk } from './types.js';
|
|
2
|
+
export interface ChunkOnlyOptions {
|
|
3
|
+
/** Explicit list of files to index (skips full repo scan when provided) */
|
|
4
|
+
filesToIndex?: string[];
|
|
5
|
+
/** Concurrency for file processing */
|
|
6
|
+
concurrency?: number;
|
|
7
|
+
/** Chunk size in lines */
|
|
8
|
+
chunkSize?: number;
|
|
9
|
+
/** Chunk overlap in lines */
|
|
10
|
+
chunkOverlap?: number;
|
|
11
|
+
}
|
|
12
|
+
export interface ChunkOnlyResult {
|
|
13
|
+
success: boolean;
|
|
14
|
+
filesIndexed: number;
|
|
15
|
+
chunksCreated: number;
|
|
16
|
+
durationMs: number;
|
|
17
|
+
chunks: CodeChunk[];
|
|
18
|
+
error?: string;
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* Perform chunk-only indexing (no embeddings or VectorDB).
|
|
22
|
+
* Returns raw chunks in-memory for direct analysis.
|
|
23
|
+
*/
|
|
24
|
+
export declare function performChunkOnlyIndex(rootDir: string, options?: ChunkOnlyOptions): Promise<ChunkOnlyResult>;
|
|
25
|
+
//# sourceMappingURL=chunk-only-index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"chunk-only-index.d.ts","sourceRoot":"","sources":["../src/chunk-only-index.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;AAS5C,MAAM,WAAW,gBAAgB;IAC/B,2EAA2E;IAC3E,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;IACxB,sCAAsC;IACtC,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,0BAA0B;IAC1B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,6BAA6B;IAC7B,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AAED,MAAM,WAAW,eAAe;IAC9B,OAAO,EAAE,OAAO,CAAC;IACjB,YAAY,EAAE,MAAM,CAAC;IACrB,aAAa,EAAE,MAAM,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,SAAS,EAAE,CAAC;IACpB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AA8DD;;;GAGG;AACH,wBAAsB,qBAAqB,CACzC,OAAO,EAAE,MAAM,EACf,OAAO,GAAE,gBAAqB,GAC7B,OAAO,CAAC,eAAe,CAAC,CAqD1B"}
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import fs from 'fs/promises';
|
|
2
|
+
import path from 'path';
|
|
3
|
+
import pLimit from 'p-limit';
|
|
4
|
+
import { chunkFile } from './chunker.js';
|
|
5
|
+
import { scanCodebase } from './scanner.js';
|
|
6
|
+
import { detectEcosystems, getEcosystemExcludePatterns } from './ecosystem-presets.js';
|
|
7
|
+
import { extractRepoId } from './utils/repo-id.js';
|
|
8
|
+
import { DEFAULT_CHUNK_SIZE, DEFAULT_CHUNK_OVERLAP } from './constants.js';
|
|
9
|
+
const DEFAULT_CONCURRENCY = 4;
|
|
10
|
+
/** Scan files by auto-detecting ecosystem presets */
|
|
11
|
+
async function scanFilesToIndex(rootDir) {
|
|
12
|
+
const ecosystems = await detectEcosystems(rootDir);
|
|
13
|
+
const ecosystemExcludes = getEcosystemExcludePatterns(ecosystems);
|
|
14
|
+
return scanCodebase({
|
|
15
|
+
rootDir,
|
|
16
|
+
includePatterns: [
|
|
17
|
+
'**/*.{ts,tsx,js,jsx,mjs,cjs,vue,py,php,go,rs,java,kt,swift,rb,cs,liquid,scala,c,cpp,cc,cxx,h,hpp}',
|
|
18
|
+
'**/*.md',
|
|
19
|
+
'**/*.mdx',
|
|
20
|
+
'**/*.markdown',
|
|
21
|
+
],
|
|
22
|
+
excludePatterns: ecosystemExcludes,
|
|
23
|
+
});
|
|
24
|
+
}
|
|
25
|
+
/** Normalize a file path to relative form */
|
|
26
|
+
function normalizeToRelativePath(file, rootDir) {
|
|
27
|
+
if (path.isAbsolute(file)) {
|
|
28
|
+
return path.relative(rootDir, file);
|
|
29
|
+
}
|
|
30
|
+
return file;
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Process a single file for chunk-only indexing.
|
|
34
|
+
*/
|
|
35
|
+
async function chunkFileForCollection(file, rootDir, config, output) {
|
|
36
|
+
try {
|
|
37
|
+
const absolutePath = path.isAbsolute(file) ? file : path.join(rootDir, file);
|
|
38
|
+
const relativePath = normalizeToRelativePath(file, rootDir);
|
|
39
|
+
const content = await fs.readFile(absolutePath, 'utf-8');
|
|
40
|
+
const chunks = chunkFile(relativePath, content, {
|
|
41
|
+
chunkSize: config.chunkSize,
|
|
42
|
+
chunkOverlap: config.chunkOverlap,
|
|
43
|
+
useAST: true,
|
|
44
|
+
astFallback: 'line-based',
|
|
45
|
+
repoId: config.repoId,
|
|
46
|
+
});
|
|
47
|
+
if (chunks.length > 0) {
|
|
48
|
+
output.push(...chunks);
|
|
49
|
+
return true;
|
|
50
|
+
}
|
|
51
|
+
return false;
|
|
52
|
+
}
|
|
53
|
+
catch (error) {
|
|
54
|
+
console.error(`[parser] Failed to process ${file}: ${error instanceof Error ? error.message : String(error)}`);
|
|
55
|
+
return false;
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Perform chunk-only indexing (no embeddings or VectorDB).
|
|
60
|
+
* Returns raw chunks in-memory for direct analysis.
|
|
61
|
+
*/
|
|
62
|
+
export async function performChunkOnlyIndex(rootDir, options = {}) {
|
|
63
|
+
const startTime = Date.now();
|
|
64
|
+
try {
|
|
65
|
+
const files = options.filesToIndex ?? (await scanFilesToIndex(rootDir));
|
|
66
|
+
if (files.length === 0) {
|
|
67
|
+
return {
|
|
68
|
+
success: false,
|
|
69
|
+
filesIndexed: 0,
|
|
70
|
+
chunksCreated: 0,
|
|
71
|
+
durationMs: Date.now() - startTime,
|
|
72
|
+
chunks: [],
|
|
73
|
+
error: 'No files found to index',
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
const config = {
|
|
77
|
+
chunkSize: options.chunkSize ?? DEFAULT_CHUNK_SIZE,
|
|
78
|
+
chunkOverlap: options.chunkOverlap ?? DEFAULT_CHUNK_OVERLAP,
|
|
79
|
+
repoId: extractRepoId(rootDir),
|
|
80
|
+
};
|
|
81
|
+
const allChunks = [];
|
|
82
|
+
let filesProcessed = 0;
|
|
83
|
+
const limit = pLimit(options.concurrency ?? DEFAULT_CONCURRENCY);
|
|
84
|
+
await Promise.all(files.map(file => limit(async () => {
|
|
85
|
+
await chunkFileForCollection(file, rootDir, config, allChunks);
|
|
86
|
+
filesProcessed++;
|
|
87
|
+
})));
|
|
88
|
+
return {
|
|
89
|
+
success: true,
|
|
90
|
+
filesIndexed: filesProcessed,
|
|
91
|
+
chunksCreated: allChunks.length,
|
|
92
|
+
durationMs: Date.now() - startTime,
|
|
93
|
+
chunks: allChunks,
|
|
94
|
+
};
|
|
95
|
+
}
|
|
96
|
+
catch (error) {
|
|
97
|
+
return {
|
|
98
|
+
success: false,
|
|
99
|
+
filesIndexed: 0,
|
|
100
|
+
chunksCreated: 0,
|
|
101
|
+
durationMs: Date.now() - startTime,
|
|
102
|
+
chunks: [],
|
|
103
|
+
error: error instanceof Error ? error.message : String(error),
|
|
104
|
+
};
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
//# sourceMappingURL=chunk-only-index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"chunk-only-index.js","sourceRoot":"","sources":["../src/chunk-only-index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,aAAa,CAAC;AAC7B,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,MAAM,MAAM,SAAS,CAAC;AAE7B,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAC5C,OAAO,EAAE,gBAAgB,EAAE,2BAA2B,EAAE,MAAM,wBAAwB,CAAC;AACvF,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AACnD,OAAO,EAAE,kBAAkB,EAAE,qBAAqB,EAAE,MAAM,gBAAgB,CAAC;AAE3E,MAAM,mBAAmB,GAAG,CAAC,CAAC;AAsB9B,qDAAqD;AACrD,KAAK,UAAU,gBAAgB,CAAC,OAAe;IAC7C,MAAM,UAAU,GAAG,MAAM,gBAAgB,CAAC,OAAO,CAAC,CAAC;IACnD,MAAM,iBAAiB,GAAG,2BAA2B,CAAC,UAAU,CAAC,CAAC;IAElE,OAAO,YAAY,CAAC;QAClB,OAAO;QACP,eAAe,EAAE;YACf,mGAAmG;YACnG,SAAS;YACT,UAAU;YACV,eAAe;SAChB;QACD,eAAe,EAAE,iBAAiB;KACnC,CAAC,CAAC;AACL,CAAC;AAED,6CAA6C;AAC7C,SAAS,uBAAuB,CAAC,IAAY,EAAE,OAAe;IAC5D,IAAI,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;QAC1B,OAAO,IAAI,CAAC,QAAQ,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;IACtC,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,sBAAsB,CACnC,IAAY,EACZ,OAAe,EACf,MAAoE,EACpE,MAAmB;IAEnB,IAAI,CAAC;QACH,MAAM,YAAY,GAAG,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;QAC7E,MAAM,YAAY,GAAG,uBAAuB,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;QAC5D,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,YAAY,EAAE,OAAO,CAAC,CAAC;QAEzD,MAAM,MAAM,GAAG,SAAS,CAAC,YAAY,EAAE,OAAO,EAAE;YAC9C,SAAS,EAAE,MAAM,CAAC,SAAS;YAC3B,YAAY,EAAE,MAAM,CAAC,YAAY;YACjC,MAAM,EAAE,IAAI;YACZ,WAAW,EAAE,YAAY;YACzB,MAAM,EAAE,MAAM,CAAC,MAAM;SACtB,CAAC,CAAC;QAEH,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACtB,MAAM,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,CAAC;YACvB,OAAO,IAAI,CAAC;QACd,CAAC;QACD,OAAO,KAAK,CAAC;IACf,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,CAAC,KAAK,CACX,8BAA8B,IAAI,KAAK,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CAChG,CAAC;QACF,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,qBAAqB,CACzC,OAAe,EACf,UAA4B,EAAE;IAE9B,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAE7B,IAAI,CAAC;QACH,MAAM,KAAK,GAAG,OAAO,CAAC,YAAY,IAAI,CAAC,MAAM,gBAAgB,CAAC,OAAO,CAAC,CAAC,CAAC;QAExE,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACvB,OAAO;gBACL,OAAO,EAAE,KAAK;gBACd,YAAY,EAAE,CAAC;gBACf,aAAa,EAAE,CAAC;gBAChB,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;gBAClC,MAAM,EAAE,EAAE;gBACV,KAAK,EAAE,yBAAyB;aACjC,CAAC;QACJ,CAAC;QAED,MAAM,MAAM,GAAG;YACb,SAAS,EAAE,OAAO,CAAC,SAAS,IAAI,kBAAkB;YAClD,YAAY,EAAE,OAAO,CAAC,YAAY,IAAI,qBAAqB;YAC3D,MAAM,EAAE,aAAa,CAAC,OAAO,CAAC;SAC/B,CAAC;QAEF,MAAM,SAAS,GAAgB,EAAE,CAAC;QAClC,IAAI,cAAc,GAAG,CAAC,CAAC;QAEvB,MAAM,KAAK,GAAG,MAAM,CAAC,OAAO,CAAC,WAAW,IAAI,mBAAmB,CAAC,CAAC;QACjE,MAAM,OAAO,CAAC,GAAG,CACf,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CACf,KAAK,CAAC,KAAK,IAAI,EAAE;YACf,MAAM,sBAAsB,CAAC,IAAI,EAAE,OAAO,EAAE,MAAM,EAAE,SAAS,CAAC,CAAC;YAC/D,cAAc,EAAE,CAAC;QACnB,CAAC,CAAC,CACH,CACF,CAAC;QAEF,OAAO;YACL,OAAO,EAAE,IAAI;YACb,YAAY,EAAE,cAAc;YAC5B,aAAa,EAAE,SAAS,CAAC,MAAM;YAC/B,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;YAClC,MAAM,EAAE,SAAS;SAClB,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO;YACL,OAAO,EAAE,KAAK;YACd,YAAY,EAAE,CAAC;YACf,aAAa,EAAE,CAAC;YAChB,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;YAClC,MAAM,EAAE,EAAE;YACV,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;SAC9D,CAAC;IACJ,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import type { CodeChunk } from './types.js';
|
|
2
|
+
export interface ChunkOptions {
|
|
3
|
+
chunkSize?: number;
|
|
4
|
+
chunkOverlap?: number;
|
|
5
|
+
useAST?: boolean;
|
|
6
|
+
astFallback?: 'line-based' | 'error';
|
|
7
|
+
repoId?: string;
|
|
8
|
+
orgId?: string;
|
|
9
|
+
}
|
|
10
|
+
export declare function chunkFile(filepath: string, content: string, options?: ChunkOptions): CodeChunk[];
|
|
11
|
+
export declare function chunkText(text: string, options?: ChunkOptions): string[];
|
|
12
|
+
//# sourceMappingURL=chunker.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"chunker.d.ts","sourceRoot":"","sources":["../src/chunker.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;AAO5C,MAAM,WAAW,YAAY;IAC3B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,WAAW,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC;IAErC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,wBAAgB,SAAS,CACvB,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,MAAM,EACf,OAAO,GAAE,YAAiB,GACzB,SAAS,EAAE,CA8Cb;AA6DD,wBAAgB,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,GAAE,YAAiB,GAAG,MAAM,EAAE,CAqB5E"}
|
package/dist/chunker.js
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
import { detectFileType } from './scanner.js';
|
|
2
|
+
import { extractSymbols } from './symbol-extractor.js';
|
|
3
|
+
import { shouldUseAST, chunkByAST } from './ast/chunker.js';
|
|
4
|
+
import { chunkLiquidFile } from './liquid-chunker.js';
|
|
5
|
+
import { chunkJSONTemplate } from './json-template-chunker.js';
|
|
6
|
+
export function chunkFile(filepath, content, options = {}) {
|
|
7
|
+
const { chunkSize = 75, chunkOverlap = 10, useAST = true, astFallback = 'line-based', repoId, orgId, } = options;
|
|
8
|
+
// Special handling for Liquid files
|
|
9
|
+
if (filepath.endsWith('.liquid')) {
|
|
10
|
+
return chunkLiquidFile(filepath, content, chunkSize, chunkOverlap, { repoId, orgId });
|
|
11
|
+
}
|
|
12
|
+
// Special handling for Shopify JSON template files (templates/**/*.json)
|
|
13
|
+
// Use regex to ensure 'templates/' is a path segment, not part of another name
|
|
14
|
+
// Matches: templates/product.json OR some-path/templates/customers/account.json
|
|
15
|
+
// Rejects: my-templates/config.json OR node_modules/pkg/templates/file.json (filtered by scanner)
|
|
16
|
+
if (filepath.endsWith('.json') && /(?:^|\/)templates\//.test(filepath)) {
|
|
17
|
+
return chunkJSONTemplate(filepath, content, { repoId, orgId });
|
|
18
|
+
}
|
|
19
|
+
// Try AST-based chunking for supported languages
|
|
20
|
+
if (useAST && shouldUseAST(filepath)) {
|
|
21
|
+
try {
|
|
22
|
+
return chunkByAST(filepath, content, {
|
|
23
|
+
minChunkSize: Math.floor(chunkSize / 10),
|
|
24
|
+
repoId,
|
|
25
|
+
orgId,
|
|
26
|
+
});
|
|
27
|
+
}
|
|
28
|
+
catch (error) {
|
|
29
|
+
// Handle AST errors based on configuration
|
|
30
|
+
if (astFallback === 'error') {
|
|
31
|
+
// Throw error if user wants strict AST-only behavior
|
|
32
|
+
throw new Error(`AST chunking failed for ${filepath}: ${error instanceof Error ? error.message : String(error)}`);
|
|
33
|
+
}
|
|
34
|
+
// Otherwise fallback to line-based chunking
|
|
35
|
+
console.warn(`AST chunking failed for ${filepath}, falling back to line-based:`, error);
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
// Line-based chunking (original implementation)
|
|
39
|
+
return chunkByLines(filepath, content, chunkSize, chunkOverlap, { repoId, orgId });
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Build a single line-based code chunk with metadata
|
|
43
|
+
*/
|
|
44
|
+
function buildLineChunk(chunkContent, filepath, startLine, endLine, fileType, tenantContext) {
|
|
45
|
+
return {
|
|
46
|
+
content: chunkContent,
|
|
47
|
+
metadata: {
|
|
48
|
+
file: filepath,
|
|
49
|
+
startLine,
|
|
50
|
+
endLine,
|
|
51
|
+
type: 'block',
|
|
52
|
+
language: fileType,
|
|
53
|
+
symbols: extractSymbols(chunkContent, fileType),
|
|
54
|
+
...(tenantContext?.repoId && { repoId: tenantContext.repoId }),
|
|
55
|
+
...(tenantContext?.orgId && { orgId: tenantContext.orgId }),
|
|
56
|
+
},
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Original line-based chunking implementation
|
|
61
|
+
*/
|
|
62
|
+
function chunkByLines(filepath, content, chunkSize, chunkOverlap, tenantContext) {
|
|
63
|
+
const lines = content.split('\n');
|
|
64
|
+
if (lines.length === 0 || (lines.length === 1 && lines[0].trim() === '')) {
|
|
65
|
+
return [];
|
|
66
|
+
}
|
|
67
|
+
const chunks = [];
|
|
68
|
+
const fileType = detectFileType(filepath);
|
|
69
|
+
const step = chunkSize - chunkOverlap;
|
|
70
|
+
for (let i = 0; i < lines.length; i += step) {
|
|
71
|
+
const endLine = Math.min(i + chunkSize, lines.length);
|
|
72
|
+
const chunkContent = lines.slice(i, endLine).join('\n');
|
|
73
|
+
if (chunkContent.trim().length > 0) {
|
|
74
|
+
chunks.push(buildLineChunk(chunkContent, filepath, i + 1, endLine, fileType, tenantContext));
|
|
75
|
+
}
|
|
76
|
+
if (endLine >= lines.length)
|
|
77
|
+
break;
|
|
78
|
+
}
|
|
79
|
+
return chunks;
|
|
80
|
+
}
|
|
81
|
+
export function chunkText(text, options = {}) {
|
|
82
|
+
const { chunkSize = 75, chunkOverlap = 10 } = options;
|
|
83
|
+
const lines = text.split('\n');
|
|
84
|
+
const chunks = [];
|
|
85
|
+
for (let i = 0; i < lines.length; i += chunkSize - chunkOverlap) {
|
|
86
|
+
const endLine = Math.min(i + chunkSize, lines.length);
|
|
87
|
+
const chunkLines = lines.slice(i, endLine);
|
|
88
|
+
const chunkContent = chunkLines.join('\n');
|
|
89
|
+
if (chunkContent.trim().length > 0) {
|
|
90
|
+
chunks.push(chunkContent);
|
|
91
|
+
}
|
|
92
|
+
if (endLine >= lines.length) {
|
|
93
|
+
break;
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
return chunks;
|
|
97
|
+
}
|
|
98
|
+
//# sourceMappingURL=chunker.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"chunker.js","sourceRoot":"","sources":["../src/chunker.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AAC9C,OAAO,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;AACvD,OAAO,EAAE,YAAY,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAC;AAC5D,OAAO,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAC;AACtD,OAAO,EAAE,iBAAiB,EAAE,MAAM,4BAA4B,CAAC;AAY/D,MAAM,UAAU,SAAS,CACvB,QAAgB,EAChB,OAAe,EACf,UAAwB,EAAE;IAE1B,MAAM,EACJ,SAAS,GAAG,EAAE,EACd,YAAY,GAAG,EAAE,EACjB,MAAM,GAAG,IAAI,EACb,WAAW,GAAG,YAAY,EAC1B,MAAM,EACN,KAAK,GACN,GAAG,OAAO,CAAC;IAEZ,oCAAoC;IACpC,IAAI,QAAQ,CAAC,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;QACjC,OAAO,eAAe,CAAC,QAAQ,EAAE,OAAO,EAAE,SAAS,EAAE,YAAY,EAAE,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,CAAC;IACxF,CAAC;IAED,yEAAyE;IACzE,+EAA+E;IAC/E,gFAAgF;IAChF,kGAAkG;IAClG,IAAI,QAAQ,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,qBAAqB,CAAC,IAAI,CAAC,QAAQ,CAAC,EAAE,CAAC;QACvE,OAAO,iBAAiB,CAAC,QAAQ,EAAE,OAAO,EAAE,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,CAAC;IACjE,CAAC;IAED,iDAAiD;IACjD,IAAI,MAAM,IAAI,YAAY,CAAC,QAAQ,CAAC,EAAE,CAAC;QACrC,IAAI,CAAC;YACH,OAAO,UAAU,CAAC,QAAQ,EAAE,OAAO,EAAE;gBACnC,YAAY,EAAE,IAAI,CAAC,KAAK,CAAC,SAAS,GAAG,EAAE,CAAC;gBACxC,MAAM;gBACN,KAAK;aACN,CAAC,CAAC;QACL,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,2CAA2C;YAC3C,IAAI,WAAW,KAAK,OAAO,EAAE,CAAC;gBAC5B,qDAAqD;gBACrD,MAAM,IAAI,KAAK,CACb,2BAA2B,QAAQ,KAAK,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CACjG,CAAC;YACJ,CAAC;YACD,4CAA4C;YAC5C,OAAO,CAAC,IAAI,CAAC,2BAA2B,QAAQ,+BAA+B,EAAE,KAAK,CAAC,CAAC;QAC1F,CAAC;IACH,CAAC;IAED,gDAAgD;IAChD,OAAO,YAAY,CAAC,QAAQ,EAAE,OAAO,EAAE,SAAS,EAAE,YAAY,EAAE,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,CAAC;AACrF,CAAC;AAED;;GAEG;AACH,SAAS,cAAc,CACrB,YAAoB,EACpB,QAAgB,EAChB,SAAiB,EACjB,OAAe,EACf,QAAgB,EAChB,aAAmD;IAEnD,OAAO;QACL,OAAO,EAAE,YAAY;QACrB,QAAQ,EAAE;YACR,IAAI,EAAE,QAAQ;YACd,SAAS;YACT,OAAO;YACP,IAAI,EAAE,OAAO;YACb,QAAQ,EAAE,QAAQ;YAClB,OAAO,EAAE,cAAc,CAAC,YAAY,EAAE,QAAQ,CAAC;YAC/C,GAAG,CAAC,aAAa,EAAE,MAAM,IAAI,EAAE,MAAM,EAAE,aAAa,CAAC,MAAM,EAAE,CAAC;YAC9D,GAAG,CAAC,aAAa,EAAE,KAAK,IAAI,EAAE,KAAK,EAAE,aAAa,CAAC,KAAK,EAAE,CAAC;SAC5D;KACF,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAS,YAAY,CACnB,QAAgB,EAChB,OAAe,EACf,SAAiB,EACjB,YAAoB,EACpB,aAAmD;IAEnD,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAClC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,KAAK,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC;QACzE,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,MAAM,GAAgB,EAAE,CAAC;IAC/B,MAAM,QAAQ,GAAG,cAAc,CAAC,QAAQ,CAAC,CAAC;IAC1C,MAAM,IAAI,GAAG,SAAS,GAAG,YAAY,CAAC;IAEtC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,IAAI,EAAE,CAAC;QAC5C,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,SAAS,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;QACtD,MAAM,YAAY,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAExD,IAAI,YAAY,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACnC,MAAM,CAAC,IAAI,CAAC,cAAc,CAAC,YAAY,EAAE,QAAQ,EAAE,CAAC,GAAG,CAAC,EAAE,OAAO,EAAE,QAAQ,EAAE,aAAa,CAAC,CAAC,CAAC;QAC/F,CAAC;QAED,IAAI,OAAO,IAAI,KAAK,CAAC,MAAM;YAAE,MAAM;IACrC,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,MAAM,UAAU,SAAS,CAAC,IAAY,EAAE,UAAwB,EAAE;IAChE,MAAM,EAAE,SAAS,GAAG,EAAE,EAAE,YAAY,GAAG,EAAE,EAAE,GAAG,OAAO,CAAC;IAEtD,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAC/B,MAAM,MAAM,GAAa,EAAE,CAAC;IAE5B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,SAAS,GAAG,YAAY,EAAE,CAAC;QAChE,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,SAAS,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;QACtD,MAAM,UAAU,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;QAC3C,MAAM,YAAY,GAAG,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAE3C,IAAI,YAAY,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACnC,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QAC5B,CAAC;QAED,IAAI,OAAO,IAAI,KAAK,CAAC,MAAM,EAAE,CAAC;YAC5B,MAAM;QACR,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC"}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Constants used by the parser/chunking layer.
|
|
3
|
+
* These will move to @liendev/parser during extraction.
|
|
4
|
+
*/
|
|
5
|
+
export declare const DEFAULT_CHUNK_SIZE = 75;
|
|
6
|
+
export declare const DEFAULT_CHUNK_OVERLAP = 10;
|
|
7
|
+
export declare const MAX_CHUNKS_PER_FILE = 100;
|
|
8
|
+
//# sourceMappingURL=constants.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"constants.d.ts","sourceRoot":"","sources":["../src/constants.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAGH,eAAO,MAAM,kBAAkB,KAAK,CAAC;AACrC,eAAO,MAAM,qBAAqB,KAAK,CAAC;AAIxC,eAAO,MAAM,mBAAmB,MAAM,CAAC"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Constants used by the parser/chunking layer.
|
|
3
|
+
* These will move to @liendev/parser during extraction.
|
|
4
|
+
*/
|
|
5
|
+
// Chunking settings
|
|
6
|
+
export const DEFAULT_CHUNK_SIZE = 75;
|
|
7
|
+
export const DEFAULT_CHUNK_OVERLAP = 10;
|
|
8
|
+
// File query estimation
|
|
9
|
+
// Maximum chunks expected per file when sizing scan queries.
|
|
10
|
+
export const MAX_CHUNKS_PER_FILE = 100;
|
|
11
|
+
//# sourceMappingURL=constants.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"constants.js","sourceRoot":"","sources":["../src/constants.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,oBAAoB;AACpB,MAAM,CAAC,MAAM,kBAAkB,GAAG,EAAE,CAAC;AACrC,MAAM,CAAC,MAAM,qBAAqB,GAAG,EAAE,CAAC;AAExC,wBAAwB;AACxB,6DAA6D;AAC7D,MAAM,CAAC,MAAM,mBAAmB,GAAG,GAAG,CAAC"}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Compute a content hash for change detection.
|
|
3
|
+
*
|
|
4
|
+
* For small files (<1MB), computes hash of entire content.
|
|
5
|
+
* For large files (>=1MB), uses fingerprint approach (first 8KB + last 8KB + size).
|
|
6
|
+
*
|
|
7
|
+
* Returns 16-character hash (or 'L' prefix + 15 chars for large files).
|
|
8
|
+
*
|
|
9
|
+
* @param filepath - Absolute path to the file
|
|
10
|
+
* @returns Content hash string, or empty string if file cannot be read
|
|
11
|
+
*/
|
|
12
|
+
export declare function computeContentHash(filepath: string): Promise<string>;
|
|
13
|
+
/**
|
|
14
|
+
* Check if hash algorithm is compatible with current implementation.
|
|
15
|
+
*
|
|
16
|
+
* @param algorithm - Hash algorithm version from manifest
|
|
17
|
+
* @returns true if compatible, false otherwise
|
|
18
|
+
*/
|
|
19
|
+
export declare function isHashAlgorithmCompatible(algorithm?: string): boolean;
|
|
20
|
+
//# sourceMappingURL=content-hash.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"content-hash.d.ts","sourceRoot":"","sources":["../src/content-hash.ts"],"names":[],"mappings":"AAaA;;;;;;;;;;GAUG;AACH,wBAAsB,kBAAkB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAiB1E;AA8CD;;;;;GAKG;AACH,wBAAgB,yBAAyB,CAAC,SAAS,CAAC,EAAE,MAAM,GAAG,OAAO,CAMrE"}
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
import crypto from 'crypto';
|
|
2
|
+
import fs from 'fs/promises';
|
|
3
|
+
/**
|
|
4
|
+
* Threshold for using fingerprint approach (1MB)
|
|
5
|
+
*/
|
|
6
|
+
const LARGE_FILE_THRESHOLD = 1024 * 1024;
|
|
7
|
+
/**
|
|
8
|
+
* Sample size for large file fingerprinting (8KB)
|
|
9
|
+
*/
|
|
10
|
+
const SAMPLE_SIZE = 8192;
|
|
11
|
+
/**
|
|
12
|
+
* Compute a content hash for change detection.
|
|
13
|
+
*
|
|
14
|
+
* For small files (<1MB), computes hash of entire content.
|
|
15
|
+
* For large files (>=1MB), uses fingerprint approach (first 8KB + last 8KB + size).
|
|
16
|
+
*
|
|
17
|
+
* Returns 16-character hash (or 'L' prefix + 15 chars for large files).
|
|
18
|
+
*
|
|
19
|
+
* @param filepath - Absolute path to the file
|
|
20
|
+
* @returns Content hash string, or empty string if file cannot be read
|
|
21
|
+
*/
|
|
22
|
+
export async function computeContentHash(filepath) {
|
|
23
|
+
try {
|
|
24
|
+
const stats = await fs.stat(filepath);
|
|
25
|
+
// For large files, use fingerprint: first 8KB + last 8KB + file size
|
|
26
|
+
if (stats.size > LARGE_FILE_THRESHOLD) {
|
|
27
|
+
return await computeLargeFileFingerprint(filepath, stats.size);
|
|
28
|
+
}
|
|
29
|
+
// For normal files, hash entire content (read as binary to support all file types)
|
|
30
|
+
const content = await fs.readFile(filepath);
|
|
31
|
+
return crypto.createHash('sha256').update(content).digest('hex').slice(0, 16);
|
|
32
|
+
}
|
|
33
|
+
catch {
|
|
34
|
+
// If file can't be read, return empty hash (will trigger reindex)
|
|
35
|
+
// Common cases: file deleted, permission denied, file handle issues
|
|
36
|
+
return '';
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* Compute fingerprint for large files to avoid reading entire content.
|
|
41
|
+
* Uses first 8KB + last 8KB + file size.
|
|
42
|
+
*
|
|
43
|
+
* Note: This function is only used for files larger than 1MB (LARGE_FILE_THRESHOLD),
|
|
44
|
+
* so the sampled head (first 8KB) and tail (last 8KB) regions never overlap
|
|
45
|
+
* (even for files just over 1MB, 1MB >> 16KB ensures distinct regions).
|
|
46
|
+
*
|
|
47
|
+
* **Known Limitation**: Changes made exclusively to the middle of large files
|
|
48
|
+
* (i.e., modifications that don't affect the first or last 8KB) will NOT be detected.
|
|
49
|
+
* This is an acceptable trade-off for performance, as the primary use case is detecting
|
|
50
|
+
* `touch` operations and header/footer changes. Files with substantive code changes
|
|
51
|
+
* typically have modifications near the beginning or end.
|
|
52
|
+
*
|
|
53
|
+
* @param filepath - Absolute path to the file
|
|
54
|
+
* @param size - File size in bytes
|
|
55
|
+
* @returns Fingerprint hash with 'L' prefix
|
|
56
|
+
*/
|
|
57
|
+
async function computeLargeFileFingerprint(filepath, size) {
|
|
58
|
+
const handle = await fs.open(filepath, 'r');
|
|
59
|
+
try {
|
|
60
|
+
const headBuffer = Buffer.alloc(SAMPLE_SIZE);
|
|
61
|
+
const tailBuffer = Buffer.alloc(SAMPLE_SIZE);
|
|
62
|
+
// Read first 8KB
|
|
63
|
+
await handle.read(headBuffer, 0, SAMPLE_SIZE, 0);
|
|
64
|
+
// Read last 8KB
|
|
65
|
+
const tailOffset = Math.max(0, size - SAMPLE_SIZE);
|
|
66
|
+
await handle.read(tailBuffer, 0, SAMPLE_SIZE, tailOffset);
|
|
67
|
+
// Combine: head + tail + size
|
|
68
|
+
const hash = crypto.createHash('sha256');
|
|
69
|
+
hash.update(headBuffer);
|
|
70
|
+
hash.update(tailBuffer);
|
|
71
|
+
hash.update(size.toString());
|
|
72
|
+
return 'L' + hash.digest('hex').slice(0, 15); // 'L' prefix = large file fingerprint
|
|
73
|
+
}
|
|
74
|
+
finally {
|
|
75
|
+
await handle.close();
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
/**
|
|
79
|
+
* Check if hash algorithm is compatible with current implementation.
|
|
80
|
+
*
|
|
81
|
+
* @param algorithm - Hash algorithm version from manifest
|
|
82
|
+
* @returns true if compatible, false otherwise
|
|
83
|
+
*/
|
|
84
|
+
export function isHashAlgorithmCompatible(algorithm) {
|
|
85
|
+
// If no algorithm specified, assume old format (still compatible)
|
|
86
|
+
if (!algorithm)
|
|
87
|
+
return true;
|
|
88
|
+
// Current supported algorithms
|
|
89
|
+
return algorithm === 'sha256-16' || algorithm === 'sha256-16-large';
|
|
90
|
+
}
|
|
91
|
+
//# sourceMappingURL=content-hash.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"content-hash.js","sourceRoot":"","sources":["../src/content-hash.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,QAAQ,CAAC;AAC5B,OAAO,EAAE,MAAM,aAAa,CAAC;AAE7B;;GAEG;AACH,MAAM,oBAAoB,GAAG,IAAI,GAAG,IAAI,CAAC;AAEzC;;GAEG;AACH,MAAM,WAAW,GAAG,IAAI,CAAC;AAEzB;;;;;;;;;;GAUG;AACH,MAAM,CAAC,KAAK,UAAU,kBAAkB,CAAC,QAAgB;IACvD,IAAI,CAAC;QACH,MAAM,KAAK,GAAG,MAAM,EAAE,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAEtC,qEAAqE;QACrE,IAAI,KAAK,CAAC,IAAI,GAAG,oBAAoB,EAAE,CAAC;YACtC,OAAO,MAAM,2BAA2B,CAAC,QAAQ,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC;QACjE,CAAC;QAED,mFAAmF;QACnF,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;QAC5C,OAAO,MAAM,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;IAChF,CAAC;IAAC,MAAM,CAAC;QACP,kEAAkE;QAClE,oEAAoE;QACpE,OAAO,EAAE,CAAC;IACZ,CAAC;AACH,CAAC;AAED;;;;;;;;;;;;;;;;;GAiBG;AACH,KAAK,UAAU,2BAA2B,CAAC,QAAgB,EAAE,IAAY;IACvE,MAAM,MAAM,GAAG,MAAM,EAAE,CAAC,IAAI,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;IAE5C,IAAI,CAAC;QACH,MAAM,UAAU,GAAG,MAAM,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC;QAC7C,MAAM,UAAU,GAAG,MAAM,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC;QAE7C,iBAAiB;QACjB,MAAM,MAAM,CAAC,IAAI,CAAC,UAAU,EAAE,CAAC,EAAE,WAAW,EAAE,CAAC,CAAC,CAAC;QAEjD,gBAAgB;QAChB,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,GAAG,WAAW,CAAC,CAAC;QACnD,MAAM,MAAM,CAAC,IAAI,CAAC,UAAU,EAAE,CAAC,EAAE,WAAW,EAAE,UAAU,CAAC,CAAC;QAE1D,8BAA8B;QAC9B,MAAM,IAAI,GAAG,MAAM,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC;QACzC,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC;QACxB,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC;QACxB,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC;QAE7B,OAAO,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,sCAAsC;IACtF,CAAC;YAAS,CAAC;QACT,MAAM,MAAM,CAAC,KAAK,EAAE,CAAC;IACvB,CAAC;AACH,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,yBAAyB,CAAC,SAAkB;IAC1D,kEAAkE;IAClE,IAAI,CAAC,SAAS;QAAE,OAAO,IAAI,CAAC;IAE5B,+BAA+B;IAC/B,OAAO,SAAS,KAAK,WAAW,IAAI,SAAS,KAAK,iBAAiB,CAAC;AACtE,CAAC"}
|