spec-gen-cli 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +1078 -0
- package/dist/api/analyze.d.ts +17 -0
- package/dist/api/analyze.d.ts.map +1 -0
- package/dist/api/analyze.js +109 -0
- package/dist/api/analyze.js.map +1 -0
- package/dist/api/drift.d.ts +21 -0
- package/dist/api/drift.d.ts.map +1 -0
- package/dist/api/drift.js +145 -0
- package/dist/api/drift.js.map +1 -0
- package/dist/api/generate.d.ts +18 -0
- package/dist/api/generate.d.ts.map +1 -0
- package/dist/api/generate.js +251 -0
- package/dist/api/generate.js.map +1 -0
- package/dist/api/index.d.ts +39 -0
- package/dist/api/index.d.ts.map +1 -0
- package/dist/api/index.js +32 -0
- package/dist/api/index.js.map +1 -0
- package/dist/api/init.d.ts +18 -0
- package/dist/api/init.d.ts.map +1 -0
- package/dist/api/init.js +82 -0
- package/dist/api/init.js.map +1 -0
- package/dist/api/run.d.ts +19 -0
- package/dist/api/run.d.ts.map +1 -0
- package/dist/api/run.js +291 -0
- package/dist/api/run.js.map +1 -0
- package/dist/api/specs.d.ts +49 -0
- package/dist/api/specs.d.ts.map +1 -0
- package/dist/api/specs.js +136 -0
- package/dist/api/specs.js.map +1 -0
- package/dist/api/types.d.ts +176 -0
- package/dist/api/types.d.ts.map +1 -0
- package/dist/api/types.js +9 -0
- package/dist/api/types.js.map +1 -0
- package/dist/api/verify.d.ts +20 -0
- package/dist/api/verify.d.ts.map +1 -0
- package/dist/api/verify.js +117 -0
- package/dist/api/verify.js.map +1 -0
- package/dist/cli/commands/analyze.d.ts +27 -0
- package/dist/cli/commands/analyze.d.ts.map +1 -0
- package/dist/cli/commands/analyze.js +485 -0
- package/dist/cli/commands/analyze.js.map +1 -0
- package/dist/cli/commands/drift.d.ts +9 -0
- package/dist/cli/commands/drift.d.ts.map +1 -0
- package/dist/cli/commands/drift.js +540 -0
- package/dist/cli/commands/drift.js.map +1 -0
- package/dist/cli/commands/generate.d.ts +9 -0
- package/dist/cli/commands/generate.d.ts.map +1 -0
- package/dist/cli/commands/generate.js +633 -0
- package/dist/cli/commands/generate.js.map +1 -0
- package/dist/cli/commands/init.d.ts +9 -0
- package/dist/cli/commands/init.d.ts.map +1 -0
- package/dist/cli/commands/init.js +171 -0
- package/dist/cli/commands/init.js.map +1 -0
- package/dist/cli/commands/mcp.d.ts +638 -0
- package/dist/cli/commands/mcp.d.ts.map +1 -0
- package/dist/cli/commands/mcp.js +574 -0
- package/dist/cli/commands/mcp.js.map +1 -0
- package/dist/cli/commands/run.d.ts +24 -0
- package/dist/cli/commands/run.d.ts.map +1 -0
- package/dist/cli/commands/run.js +546 -0
- package/dist/cli/commands/run.js.map +1 -0
- package/dist/cli/commands/verify.d.ts +9 -0
- package/dist/cli/commands/verify.d.ts.map +1 -0
- package/dist/cli/commands/verify.js +417 -0
- package/dist/cli/commands/verify.js.map +1 -0
- package/dist/cli/commands/view.d.ts +9 -0
- package/dist/cli/commands/view.d.ts.map +1 -0
- package/dist/cli/commands/view.js +511 -0
- package/dist/cli/commands/view.js.map +1 -0
- package/dist/cli/index.d.ts +9 -0
- package/dist/cli/index.d.ts.map +1 -0
- package/dist/cli/index.js +83 -0
- package/dist/cli/index.js.map +1 -0
- package/dist/core/analyzer/architecture-writer.d.ts +67 -0
- package/dist/core/analyzer/architecture-writer.d.ts.map +1 -0
- package/dist/core/analyzer/architecture-writer.js +209 -0
- package/dist/core/analyzer/architecture-writer.js.map +1 -0
- package/dist/core/analyzer/artifact-generator.d.ts +222 -0
- package/dist/core/analyzer/artifact-generator.d.ts.map +1 -0
- package/dist/core/analyzer/artifact-generator.js +726 -0
- package/dist/core/analyzer/artifact-generator.js.map +1 -0
- package/dist/core/analyzer/call-graph.d.ts +83 -0
- package/dist/core/analyzer/call-graph.d.ts.map +1 -0
- package/dist/core/analyzer/call-graph.js +827 -0
- package/dist/core/analyzer/call-graph.js.map +1 -0
- package/dist/core/analyzer/code-shaper.d.ts +33 -0
- package/dist/core/analyzer/code-shaper.d.ts.map +1 -0
- package/dist/core/analyzer/code-shaper.js +149 -0
- package/dist/core/analyzer/code-shaper.js.map +1 -0
- package/dist/core/analyzer/dependency-graph.d.ts +179 -0
- package/dist/core/analyzer/dependency-graph.d.ts.map +1 -0
- package/dist/core/analyzer/dependency-graph.js +574 -0
- package/dist/core/analyzer/dependency-graph.js.map +1 -0
- package/dist/core/analyzer/duplicate-detector.d.ts +52 -0
- package/dist/core/analyzer/duplicate-detector.d.ts.map +1 -0
- package/dist/core/analyzer/duplicate-detector.js +279 -0
- package/dist/core/analyzer/duplicate-detector.js.map +1 -0
- package/dist/core/analyzer/embedding-service.d.ts +50 -0
- package/dist/core/analyzer/embedding-service.d.ts.map +1 -0
- package/dist/core/analyzer/embedding-service.js +104 -0
- package/dist/core/analyzer/embedding-service.js.map +1 -0
- package/dist/core/analyzer/file-walker.d.ts +78 -0
- package/dist/core/analyzer/file-walker.d.ts.map +1 -0
- package/dist/core/analyzer/file-walker.js +531 -0
- package/dist/core/analyzer/file-walker.js.map +1 -0
- package/dist/core/analyzer/import-parser.d.ts +91 -0
- package/dist/core/analyzer/import-parser.d.ts.map +1 -0
- package/dist/core/analyzer/import-parser.js +720 -0
- package/dist/core/analyzer/import-parser.js.map +1 -0
- package/dist/core/analyzer/index.d.ts +10 -0
- package/dist/core/analyzer/index.d.ts.map +1 -0
- package/dist/core/analyzer/index.js +10 -0
- package/dist/core/analyzer/index.js.map +1 -0
- package/dist/core/analyzer/refactor-analyzer.d.ts +80 -0
- package/dist/core/analyzer/refactor-analyzer.d.ts.map +1 -0
- package/dist/core/analyzer/refactor-analyzer.js +339 -0
- package/dist/core/analyzer/refactor-analyzer.js.map +1 -0
- package/dist/core/analyzer/repository-mapper.d.ts +150 -0
- package/dist/core/analyzer/repository-mapper.d.ts.map +1 -0
- package/dist/core/analyzer/repository-mapper.js +731 -0
- package/dist/core/analyzer/repository-mapper.js.map +1 -0
- package/dist/core/analyzer/signature-extractor.d.ts +31 -0
- package/dist/core/analyzer/signature-extractor.d.ts.map +1 -0
- package/dist/core/analyzer/signature-extractor.js +387 -0
- package/dist/core/analyzer/signature-extractor.js.map +1 -0
- package/dist/core/analyzer/significance-scorer.d.ts +79 -0
- package/dist/core/analyzer/significance-scorer.d.ts.map +1 -0
- package/dist/core/analyzer/significance-scorer.js +407 -0
- package/dist/core/analyzer/significance-scorer.js.map +1 -0
- package/dist/core/analyzer/subgraph-extractor.d.ts +43 -0
- package/dist/core/analyzer/subgraph-extractor.d.ts.map +1 -0
- package/dist/core/analyzer/subgraph-extractor.js +129 -0
- package/dist/core/analyzer/subgraph-extractor.js.map +1 -0
- package/dist/core/analyzer/vector-index.d.ts +63 -0
- package/dist/core/analyzer/vector-index.d.ts.map +1 -0
- package/dist/core/analyzer/vector-index.js +169 -0
- package/dist/core/analyzer/vector-index.js.map +1 -0
- package/dist/core/drift/drift-detector.d.ts +102 -0
- package/dist/core/drift/drift-detector.d.ts.map +1 -0
- package/dist/core/drift/drift-detector.js +597 -0
- package/dist/core/drift/drift-detector.js.map +1 -0
- package/dist/core/drift/git-diff.d.ts +55 -0
- package/dist/core/drift/git-diff.d.ts.map +1 -0
- package/dist/core/drift/git-diff.js +356 -0
- package/dist/core/drift/git-diff.js.map +1 -0
- package/dist/core/drift/index.d.ts +12 -0
- package/dist/core/drift/index.d.ts.map +1 -0
- package/dist/core/drift/index.js +9 -0
- package/dist/core/drift/index.js.map +1 -0
- package/dist/core/drift/spec-mapper.d.ts +73 -0
- package/dist/core/drift/spec-mapper.d.ts.map +1 -0
- package/dist/core/drift/spec-mapper.js +353 -0
- package/dist/core/drift/spec-mapper.js.map +1 -0
- package/dist/core/generator/adr-generator.d.ts +32 -0
- package/dist/core/generator/adr-generator.d.ts.map +1 -0
- package/dist/core/generator/adr-generator.js +192 -0
- package/dist/core/generator/adr-generator.js.map +1 -0
- package/dist/core/generator/index.d.ts +9 -0
- package/dist/core/generator/index.d.ts.map +1 -0
- package/dist/core/generator/index.js +12 -0
- package/dist/core/generator/index.js.map +1 -0
- package/dist/core/generator/mapping-generator.d.ts +54 -0
- package/dist/core/generator/mapping-generator.d.ts.map +1 -0
- package/dist/core/generator/mapping-generator.js +239 -0
- package/dist/core/generator/mapping-generator.js.map +1 -0
- package/dist/core/generator/openspec-compat.d.ts +160 -0
- package/dist/core/generator/openspec-compat.d.ts.map +1 -0
- package/dist/core/generator/openspec-compat.js +523 -0
- package/dist/core/generator/openspec-compat.js.map +1 -0
- package/dist/core/generator/openspec-format-generator.d.ts +111 -0
- package/dist/core/generator/openspec-format-generator.d.ts.map +1 -0
- package/dist/core/generator/openspec-format-generator.js +817 -0
- package/dist/core/generator/openspec-format-generator.js.map +1 -0
- package/dist/core/generator/openspec-writer.d.ts +131 -0
- package/dist/core/generator/openspec-writer.d.ts.map +1 -0
- package/dist/core/generator/openspec-writer.js +379 -0
- package/dist/core/generator/openspec-writer.js.map +1 -0
- package/dist/core/generator/prompts.d.ts +35 -0
- package/dist/core/generator/prompts.d.ts.map +1 -0
- package/dist/core/generator/prompts.js +212 -0
- package/dist/core/generator/prompts.js.map +1 -0
- package/dist/core/generator/spec-pipeline.d.ts +94 -0
- package/dist/core/generator/spec-pipeline.d.ts.map +1 -0
- package/dist/core/generator/spec-pipeline.js +474 -0
- package/dist/core/generator/spec-pipeline.js.map +1 -0
- package/dist/core/generator/stages/stage1-survey.d.ts +19 -0
- package/dist/core/generator/stages/stage1-survey.d.ts.map +1 -0
- package/dist/core/generator/stages/stage1-survey.js +105 -0
- package/dist/core/generator/stages/stage1-survey.js.map +1 -0
- package/dist/core/generator/stages/stage2-entities.d.ts +11 -0
- package/dist/core/generator/stages/stage2-entities.d.ts.map +1 -0
- package/dist/core/generator/stages/stage2-entities.js +67 -0
- package/dist/core/generator/stages/stage2-entities.js.map +1 -0
- package/dist/core/generator/stages/stage3-services.d.ts +11 -0
- package/dist/core/generator/stages/stage3-services.d.ts.map +1 -0
- package/dist/core/generator/stages/stage3-services.js +75 -0
- package/dist/core/generator/stages/stage3-services.js.map +1 -0
- package/dist/core/generator/stages/stage4-api.d.ts +11 -0
- package/dist/core/generator/stages/stage4-api.d.ts.map +1 -0
- package/dist/core/generator/stages/stage4-api.js +65 -0
- package/dist/core/generator/stages/stage4-api.js.map +1 -0
- package/dist/core/generator/stages/stage5-architecture.d.ts +10 -0
- package/dist/core/generator/stages/stage5-architecture.d.ts.map +1 -0
- package/dist/core/generator/stages/stage5-architecture.js +62 -0
- package/dist/core/generator/stages/stage5-architecture.js.map +1 -0
- package/dist/core/generator/stages/stage6-adr.d.ts +8 -0
- package/dist/core/generator/stages/stage6-adr.d.ts.map +1 -0
- package/dist/core/generator/stages/stage6-adr.js +41 -0
- package/dist/core/generator/stages/stage6-adr.js.map +1 -0
- package/dist/core/services/chat-agent.d.ts +45 -0
- package/dist/core/services/chat-agent.d.ts.map +1 -0
- package/dist/core/services/chat-agent.js +310 -0
- package/dist/core/services/chat-agent.js.map +1 -0
- package/dist/core/services/chat-tools.d.ts +32 -0
- package/dist/core/services/chat-tools.d.ts.map +1 -0
- package/dist/core/services/chat-tools.js +270 -0
- package/dist/core/services/chat-tools.js.map +1 -0
- package/dist/core/services/config-manager.d.ts +61 -0
- package/dist/core/services/config-manager.d.ts.map +1 -0
- package/dist/core/services/config-manager.js +143 -0
- package/dist/core/services/config-manager.js.map +1 -0
- package/dist/core/services/gitignore-manager.d.ts +29 -0
- package/dist/core/services/gitignore-manager.d.ts.map +1 -0
- package/dist/core/services/gitignore-manager.js +106 -0
- package/dist/core/services/gitignore-manager.js.map +1 -0
- package/dist/core/services/index.d.ts +8 -0
- package/dist/core/services/index.d.ts.map +1 -0
- package/dist/core/services/index.js +8 -0
- package/dist/core/services/index.js.map +1 -0
- package/dist/core/services/llm-service.d.ts +336 -0
- package/dist/core/services/llm-service.d.ts.map +1 -0
- package/dist/core/services/llm-service.js +1155 -0
- package/dist/core/services/llm-service.js.map +1 -0
- package/dist/core/services/mcp-handlers/analysis.d.ts +42 -0
- package/dist/core/services/mcp-handlers/analysis.d.ts.map +1 -0
- package/dist/core/services/mcp-handlers/analysis.js +300 -0
- package/dist/core/services/mcp-handlers/analysis.js.map +1 -0
- package/dist/core/services/mcp-handlers/graph.d.ts +65 -0
- package/dist/core/services/mcp-handlers/graph.d.ts.map +1 -0
- package/dist/core/services/mcp-handlers/graph.js +509 -0
- package/dist/core/services/mcp-handlers/graph.js.map +1 -0
- package/dist/core/services/mcp-handlers/semantic.d.ts +38 -0
- package/dist/core/services/mcp-handlers/semantic.d.ts.map +1 -0
- package/dist/core/services/mcp-handlers/semantic.js +172 -0
- package/dist/core/services/mcp-handlers/semantic.js.map +1 -0
- package/dist/core/services/mcp-handlers/utils.d.ts +21 -0
- package/dist/core/services/mcp-handlers/utils.d.ts.map +1 -0
- package/dist/core/services/mcp-handlers/utils.js +62 -0
- package/dist/core/services/mcp-handlers/utils.js.map +1 -0
- package/dist/core/services/project-detector.d.ts +32 -0
- package/dist/core/services/project-detector.d.ts.map +1 -0
- package/dist/core/services/project-detector.js +111 -0
- package/dist/core/services/project-detector.js.map +1 -0
- package/dist/core/verifier/index.d.ts +5 -0
- package/dist/core/verifier/index.d.ts.map +1 -0
- package/dist/core/verifier/index.js +5 -0
- package/dist/core/verifier/index.js.map +1 -0
- package/dist/core/verifier/verification-engine.d.ts +226 -0
- package/dist/core/verifier/verification-engine.d.ts.map +1 -0
- package/dist/core/verifier/verification-engine.js +681 -0
- package/dist/core/verifier/verification-engine.js.map +1 -0
- package/dist/types/index.d.ts +252 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +5 -0
- package/dist/types/index.js.map +1 -0
- package/dist/types/pipeline.d.ts +148 -0
- package/dist/types/pipeline.d.ts.map +1 -0
- package/dist/types/pipeline.js +5 -0
- package/dist/types/pipeline.js.map +1 -0
- package/dist/utils/errors.d.ts +51 -0
- package/dist/utils/errors.d.ts.map +1 -0
- package/dist/utils/errors.js +128 -0
- package/dist/utils/errors.js.map +1 -0
- package/dist/utils/logger.d.ts +149 -0
- package/dist/utils/logger.d.ts.map +1 -0
- package/dist/utils/logger.js +331 -0
- package/dist/utils/logger.js.map +1 -0
- package/dist/utils/progress.d.ts +142 -0
- package/dist/utils/progress.d.ts.map +1 -0
- package/dist/utils/progress.js +280 -0
- package/dist/utils/progress.js.map +1 -0
- package/dist/utils/prompts.d.ts +53 -0
- package/dist/utils/prompts.d.ts.map +1 -0
- package/dist/utils/prompts.js +199 -0
- package/dist/utils/prompts.js.map +1 -0
- package/dist/utils/shutdown.d.ts +89 -0
- package/dist/utils/shutdown.d.ts.map +1 -0
- package/dist/utils/shutdown.js +237 -0
- package/dist/utils/shutdown.js.map +1 -0
- package/package.json +114 -0
- package/src/viewer/InteractiveGraphViewer.jsx +1486 -0
- package/src/viewer/app/index.html +17 -0
- package/src/viewer/app/main.jsx +13 -0
- package/src/viewer/components/ArchitectureView.jsx +177 -0
- package/src/viewer/components/ChatPanel.jsx +448 -0
- package/src/viewer/components/ClusterGraph.jsx +441 -0
- package/src/viewer/components/FilterBar.jsx +179 -0
- package/src/viewer/components/FlatGraph.jsx +275 -0
- package/src/viewer/components/MicroComponents.jsx +83 -0
- package/src/viewer/hooks/usePanZoom.js +79 -0
- package/src/viewer/utils/constants.js +47 -0
- package/src/viewer/utils/graph-helpers.js +291 -0
|
@@ -0,0 +1,279 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Duplicate Code Detector
|
|
3
|
+
*
|
|
4
|
+
* Detects code clones using pure static analysis — no LLM calls:
|
|
5
|
+
* - Type 1 (exact): identical code after whitespace/comment normalization
|
|
6
|
+
* - Type 2 (structural): same AST structure with renamed variables
|
|
7
|
+
* - Type 3 (near): high Jaccard similarity on token n-grams (≥ 0.7)
|
|
8
|
+
*
|
|
9
|
+
* Requires a CallGraphResult for precise function boundaries (byte ranges).
|
|
10
|
+
* Complexity: O(n) for Types 1-2, O(n²) for Type 3 (bounded by MAX_NEAR_FUNCTIONS).
|
|
11
|
+
*/
|
|
12
|
+
import { createHash } from 'node:crypto';
|
|
13
|
+
// ============================================================================
|
|
14
|
+
// CONSTANTS
|
|
15
|
+
// ============================================================================
|
|
16
|
+
/** Minimum function size (in lines) to consider for duplicate detection */
|
|
17
|
+
const MIN_LINES = 5;
|
|
18
|
+
/** Minimum number of normalized tokens to consider */
|
|
19
|
+
const MIN_TOKENS = 10;
|
|
20
|
+
/** Jaccard similarity threshold for near-clones */
|
|
21
|
+
const NEAR_THRESHOLD = 0.7;
|
|
22
|
+
/** N-gram size for shingle computation */
|
|
23
|
+
const SHINGLE_SIZE = 5;
|
|
24
|
+
/** Skip O(n²) near-clone pass when more than this many candidate functions */
|
|
25
|
+
const MAX_NEAR_FUNCTIONS = 400;
|
|
26
|
+
// ============================================================================
|
|
27
|
+
// KEYWORD SET (Type 2 normalization — preserve keywords, replace identifiers)
|
|
28
|
+
// Covers TypeScript, JavaScript, Python, Go, Rust, Ruby, Java.
|
|
29
|
+
// ============================================================================
|
|
30
|
+
const KEYWORDS = new Set([
|
|
31
|
+
// Control flow
|
|
32
|
+
'if', 'else', 'elif', 'for', 'while', 'do', 'break', 'continue', 'return',
|
|
33
|
+
'switch', 'case', 'default', 'goto', 'fallthrough', 'pass',
|
|
34
|
+
// Error handling
|
|
35
|
+
'try', 'catch', 'finally', 'throw', 'raise', 'rescue', 'ensure',
|
|
36
|
+
// Declarations
|
|
37
|
+
'function', 'func', 'fn', 'def', 'class', 'struct', 'enum', 'interface',
|
|
38
|
+
'module', 'type', 'impl', 'trait',
|
|
39
|
+
// Variable declaration
|
|
40
|
+
'const', 'let', 'var', 'val', 'mut', 'ref', 'move',
|
|
41
|
+
// Modifiers
|
|
42
|
+
'public', 'private', 'protected', 'static', 'abstract', 'final', 'readonly',
|
|
43
|
+
'async', 'await', 'yield', 'override', 'virtual', 'synchronized',
|
|
44
|
+
'pub', 'unsafe', 'extern', 'transient', 'volatile', 'native',
|
|
45
|
+
// OOP
|
|
46
|
+
'new', 'delete', 'this', 'self', 'Self', 'super', 'extends', 'implements',
|
|
47
|
+
// Import/export
|
|
48
|
+
'import', 'export', 'from', 'use', 'require', 'include', 'package', 'mod',
|
|
49
|
+
// Logic
|
|
50
|
+
'in', 'is', 'as', 'not', 'and', 'or', 'typeof', 'instanceof', 'void',
|
|
51
|
+
// Context
|
|
52
|
+
'with', 'match', 'when', 'where', 'select', 'defer', 'go', 'chan',
|
|
53
|
+
// Literals
|
|
54
|
+
'true', 'false', 'null', 'nil', 'None', 'True', 'False', 'undefined',
|
|
55
|
+
// Python extras
|
|
56
|
+
'lambda', 'del', 'global', 'nonlocal', 'assert', 'unless', 'until', 'begin',
|
|
57
|
+
'end', 'then', 'do', 'defined',
|
|
58
|
+
// Java extras
|
|
59
|
+
'throws', 'instanceof',
|
|
60
|
+
// Common builtins (high frequency, preserve to avoid false matches)
|
|
61
|
+
'len', 'make', 'append', 'cap', 'copy', 'map', 'range',
|
|
62
|
+
]);
|
|
63
|
+
// ============================================================================
|
|
64
|
+
// NORMALIZATION
|
|
65
|
+
// ============================================================================
|
|
66
|
+
function stripComments(text) {
|
|
67
|
+
// // single-line (JS/TS/Go/Rust/Java)
|
|
68
|
+
text = text.replace(/\/\/[^\n]*/g, '');
|
|
69
|
+
// # single-line (Python/Ruby)
|
|
70
|
+
text = text.replace(/#[^\n]*/g, '');
|
|
71
|
+
// /* */ multi-line
|
|
72
|
+
text = text.replace(/\/\*[\s\S]*?\*\//g, '');
|
|
73
|
+
// Python """ and ''' docstrings
|
|
74
|
+
text = text.replace(/"""[\s\S]*?"""/g, '');
|
|
75
|
+
text = text.replace(/'''[\s\S]*?'''/g, '');
|
|
76
|
+
return text;
|
|
77
|
+
}
|
|
78
|
+
/** Type 1: strip comments + collapse whitespace */
|
|
79
|
+
function normalizeType1(text) {
|
|
80
|
+
return stripComments(text).replace(/\s+/g, ' ').trim();
|
|
81
|
+
}
|
|
82
|
+
/**
|
|
83
|
+
* Type 2: Type 1 + replace non-keyword identifiers with sequential placeholders.
|
|
84
|
+
* Same identifier name → same placeholder within the function scope.
|
|
85
|
+
*/
|
|
86
|
+
function normalizeType2(text) {
|
|
87
|
+
const base = normalizeType1(text);
|
|
88
|
+
const seen = new Map();
|
|
89
|
+
let counter = 0;
|
|
90
|
+
return base.replace(/\b([a-zA-Z_][a-zA-Z0-9_]*)\b/g, (match) => {
|
|
91
|
+
if (KEYWORDS.has(match))
|
|
92
|
+
return match;
|
|
93
|
+
if (!seen.has(match))
|
|
94
|
+
seen.set(match, `_v${counter++}`);
|
|
95
|
+
return seen.get(match);
|
|
96
|
+
});
|
|
97
|
+
}
|
|
98
|
+
function sha16(text) {
|
|
99
|
+
return createHash('sha256').update(text).digest('hex').slice(0, 16);
|
|
100
|
+
}
|
|
101
|
+
// ============================================================================
|
|
102
|
+
// NEAR-CLONE (TYPE 3) — Jaccard on token n-grams
|
|
103
|
+
// ============================================================================
|
|
104
|
+
function tokenize(normalizedText) {
|
|
105
|
+
return normalizedText.match(/\S+/g) ?? [];
|
|
106
|
+
}
|
|
107
|
+
function getShingles(tokens, k = SHINGLE_SIZE) {
|
|
108
|
+
const s = new Set();
|
|
109
|
+
for (let i = 0; i <= tokens.length - k; i++) {
|
|
110
|
+
s.add(tokens.slice(i, i + k).join('\x00'));
|
|
111
|
+
}
|
|
112
|
+
return s;
|
|
113
|
+
}
|
|
114
|
+
function jaccard(a, b) {
|
|
115
|
+
if (a.size === 0 && b.size === 0)
|
|
116
|
+
return 1;
|
|
117
|
+
let inter = 0;
|
|
118
|
+
for (const x of a)
|
|
119
|
+
if (b.has(x))
|
|
120
|
+
inter++;
|
|
121
|
+
return inter / (a.size + b.size - inter);
|
|
122
|
+
}
|
|
123
|
+
// ============================================================================
|
|
124
|
+
// LINE NUMBER HELPERS
|
|
125
|
+
// ============================================================================
|
|
126
|
+
/** Compute 1-based line number of a byte offset in source text */
|
|
127
|
+
function byteOffsetToLine(content, byteOffset) {
|
|
128
|
+
// Count newlines before the offset
|
|
129
|
+
let line = 1;
|
|
130
|
+
const end = Math.min(byteOffset, content.length);
|
|
131
|
+
for (let i = 0; i < end; i++) {
|
|
132
|
+
if (content[i] === '\n')
|
|
133
|
+
line++;
|
|
134
|
+
}
|
|
135
|
+
return line;
|
|
136
|
+
}
|
|
137
|
+
// ============================================================================
|
|
138
|
+
// MAIN FUNCTION
|
|
139
|
+
// ============================================================================
|
|
140
|
+
/**
|
|
141
|
+
* Detect duplicate functions across the codebase using the call graph's
|
|
142
|
+
* function nodes (which carry byte-range boundaries) and the original file
|
|
143
|
+
* contents.
|
|
144
|
+
*/
|
|
145
|
+
export function detectDuplicates(files, callGraph) {
|
|
146
|
+
const fileContentMap = new Map(files.map(f => [f.path, f.content]));
|
|
147
|
+
const entries = [];
|
|
148
|
+
for (const node of callGraph.nodes.values()) {
|
|
149
|
+
const content = fileContentMap.get(node.filePath);
|
|
150
|
+
if (!content)
|
|
151
|
+
continue;
|
|
152
|
+
// Compute line numbers from byte offsets
|
|
153
|
+
const startLine = byteOffsetToLine(content, node.startIndex);
|
|
154
|
+
const endLine = byteOffsetToLine(content, node.endIndex);
|
|
155
|
+
const lineCount = endLine - startLine + 1;
|
|
156
|
+
if (lineCount < MIN_LINES)
|
|
157
|
+
continue;
|
|
158
|
+
const body = content.slice(node.startIndex, node.endIndex);
|
|
159
|
+
const t1 = normalizeType1(body);
|
|
160
|
+
const t2 = normalizeType2(body);
|
|
161
|
+
const tokens = tokenize(t2);
|
|
162
|
+
if (tokens.length < MIN_TOKENS)
|
|
163
|
+
continue;
|
|
164
|
+
entries.push({
|
|
165
|
+
instance: {
|
|
166
|
+
file: node.filePath,
|
|
167
|
+
functionName: node.name,
|
|
168
|
+
className: node.className,
|
|
169
|
+
startLine,
|
|
170
|
+
endLine,
|
|
171
|
+
},
|
|
172
|
+
t1Hash: sha16(t1),
|
|
173
|
+
t2Hash: sha16(t2),
|
|
174
|
+
shingles: getShingles(tokens),
|
|
175
|
+
});
|
|
176
|
+
}
|
|
177
|
+
const cloneGroups = [];
|
|
178
|
+
const alreadyGrouped = new Set(); // entry indices
|
|
179
|
+
// ---- Step 2: Type 1 + Type 2 groups via hash bucketing ---- O(n)
|
|
180
|
+
const t1Map = new Map();
|
|
181
|
+
const t2Map = new Map();
|
|
182
|
+
for (let i = 0; i < entries.length; i++) {
|
|
183
|
+
const { t1Hash, t2Hash } = entries[i];
|
|
184
|
+
(t1Map.get(t1Hash) ?? t1Map.set(t1Hash, []).get(t1Hash)).push(i);
|
|
185
|
+
(t2Map.get(t2Hash) ?? t2Map.set(t2Hash, []).get(t2Hash)).push(i);
|
|
186
|
+
}
|
|
187
|
+
// Exact clones (Type 1)
|
|
188
|
+
for (const indices of t1Map.values()) {
|
|
189
|
+
if (indices.length < 2)
|
|
190
|
+
continue;
|
|
191
|
+
for (const i of indices)
|
|
192
|
+
alreadyGrouped.add(i);
|
|
193
|
+
const repIdx = indices[0];
|
|
194
|
+
cloneGroups.push({
|
|
195
|
+
type: 'exact',
|
|
196
|
+
similarity: 1.0,
|
|
197
|
+
instances: indices.map(i => entries[i].instance),
|
|
198
|
+
lineCount: entries[repIdx].instance.endLine - entries[repIdx].instance.startLine + 1,
|
|
199
|
+
});
|
|
200
|
+
}
|
|
201
|
+
// Structural clones (Type 2) — exclude those already in an exact group
|
|
202
|
+
for (const indices of t2Map.values()) {
|
|
203
|
+
if (indices.length < 2)
|
|
204
|
+
continue;
|
|
205
|
+
// Keep only entries not already in a Type 1 group
|
|
206
|
+
const novel = indices.filter(i => {
|
|
207
|
+
const t1Size = t1Map.get(entries[i].t1Hash)?.length ?? 0;
|
|
208
|
+
return t1Size < 2;
|
|
209
|
+
});
|
|
210
|
+
if (novel.length < 2)
|
|
211
|
+
continue;
|
|
212
|
+
for (const i of novel)
|
|
213
|
+
alreadyGrouped.add(i);
|
|
214
|
+
const repIdx = novel[0];
|
|
215
|
+
cloneGroups.push({
|
|
216
|
+
type: 'structural',
|
|
217
|
+
similarity: 1.0,
|
|
218
|
+
instances: novel.map(i => entries[i].instance),
|
|
219
|
+
lineCount: entries[repIdx].instance.endLine - entries[repIdx].instance.startLine + 1,
|
|
220
|
+
});
|
|
221
|
+
}
|
|
222
|
+
// ---- Step 3: Near-clones (Type 3) — pairwise Jaccard — O(n²) bounded ----
|
|
223
|
+
const ungrouped = entries
|
|
224
|
+
.map((e, i) => ({ ...e, origIdx: i }))
|
|
225
|
+
.filter(e => !alreadyGrouped.has(e.origIdx));
|
|
226
|
+
if (ungrouped.length >= 2 && ungrouped.length <= MAX_NEAR_FUNCTIONS) {
|
|
227
|
+
const nearGrouped = new Set(); // indices into `ungrouped`
|
|
228
|
+
for (let i = 0; i < ungrouped.length; i++) {
|
|
229
|
+
if (nearGrouped.has(i))
|
|
230
|
+
continue;
|
|
231
|
+
const group = [i];
|
|
232
|
+
for (let j = i + 1; j < ungrouped.length; j++) {
|
|
233
|
+
if (nearGrouped.has(j))
|
|
234
|
+
continue;
|
|
235
|
+
const sim = jaccard(ungrouped[i].shingles, ungrouped[j].shingles);
|
|
236
|
+
if (sim >= NEAR_THRESHOLD) {
|
|
237
|
+
group.push(j);
|
|
238
|
+
nearGrouped.add(j);
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
if (group.length >= 2) {
|
|
242
|
+
nearGrouped.add(i);
|
|
243
|
+
// Use minimum pairwise similarity as the group's score (conservative)
|
|
244
|
+
let minSim = 1.0;
|
|
245
|
+
for (let k = 1; k < group.length; k++) {
|
|
246
|
+
minSim = Math.min(minSim, jaccard(ungrouped[i].shingles, ungrouped[group[k]].shingles));
|
|
247
|
+
}
|
|
248
|
+
const repIdx = group[0];
|
|
249
|
+
cloneGroups.push({
|
|
250
|
+
type: 'near',
|
|
251
|
+
similarity: Math.round(minSim * 100) / 100,
|
|
252
|
+
instances: group.map(k => ungrouped[k].instance),
|
|
253
|
+
lineCount: ungrouped[repIdx].instance.endLine - ungrouped[repIdx].instance.startLine + 1,
|
|
254
|
+
});
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
// Sort by impact: (duplicated lines × copies) descending
|
|
259
|
+
cloneGroups.sort((a, b) => b.instances.length * b.lineCount - a.instances.length * a.lineCount);
|
|
260
|
+
// ---- Stats ----
|
|
261
|
+
const duplicatedSet = new Set();
|
|
262
|
+
for (const g of cloneGroups) {
|
|
263
|
+
for (const inst of g.instances) {
|
|
264
|
+
duplicatedSet.add(`${inst.file}:${inst.functionName}:${inst.startLine}`);
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
return {
|
|
268
|
+
cloneGroups,
|
|
269
|
+
stats: {
|
|
270
|
+
totalFunctions: entries.length,
|
|
271
|
+
duplicatedFunctions: duplicatedSet.size,
|
|
272
|
+
duplicationRatio: entries.length > 0
|
|
273
|
+
? Math.round((duplicatedSet.size / entries.length) * 1000) / 1000
|
|
274
|
+
: 0,
|
|
275
|
+
cloneGroupCount: cloneGroups.length,
|
|
276
|
+
},
|
|
277
|
+
};
|
|
278
|
+
}
|
|
279
|
+
//# sourceMappingURL=duplicate-detector.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"duplicate-detector.js","sourceRoot":"","sources":["../../../src/core/analyzer/duplicate-detector.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAyCzC,+EAA+E;AAC/E,YAAY;AACZ,+EAA+E;AAE/E,2EAA2E;AAC3E,MAAM,SAAS,GAAG,CAAC,CAAC;AAEpB,sDAAsD;AACtD,MAAM,UAAU,GAAG,EAAE,CAAC;AAEtB,mDAAmD;AACnD,MAAM,cAAc,GAAG,GAAG,CAAC;AAE3B,0CAA0C;AAC1C,MAAM,YAAY,GAAG,CAAC,CAAC;AAEvB,8EAA8E;AAC9E,MAAM,kBAAkB,GAAG,GAAG,CAAC;AAE/B,+EAA+E;AAC/E,8EAA8E;AAC9E,+DAA+D;AAC/D,+EAA+E;AAE/E,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC;IACvB,eAAe;IACf,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,UAAU,EAAE,QAAQ;IACzE,QAAQ,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,EAAE,aAAa,EAAE,MAAM;IAC1D,iBAAiB;IACjB,KAAK,EAAE,OAAO,EAAE,SAAS,EAAE,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,QAAQ;IAC/D,eAAe;IACf,UAAU,EAAE,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,WAAW;IACvE,QAAQ,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO;IACjC,uBAAuB;IACvB,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM;IAClD,YAAY;IACZ,QAAQ,EAAE,SAAS,EAAE,WAAW,EAAE,QAAQ,EAAE,UAAU,EAAE,OAAO,EAAE,UAAU;IAC3E,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,cAAc;IAChE,KAAK,EAAE,QAAQ,EAAE,QAAQ,EAAE,WAAW,EAAE,UAAU,EAAE,QAAQ;IAC5D,MAAM;IACN,KAAK,EAAE,QAAQ,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,SAAS,EAAE,YAAY;IACzE,gBAAgB;IAChB,QAAQ,EAAE,QAAQ,EAAE,MAAM,EAAE,KAAK,EAAE,SAAS,EAAE,SAAS,EAAE,SAAS,EAAE,KAAK;IACzE,QAAQ;IACR,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,IAAI,EAAE,QAAQ,EAAE,YAAY,EAAE,MAAM;IACpE,UAAU;IACV,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM;IACjE,WAAW;IACX,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,WAAW;IACpE,gBAAgB;IAChB,QAAQ,EAAE,KAAK,EAAE,QAAQ,EAAE,UAAU,EAAE,QAAQ,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO;IAC3E,KAAK,EAAE,MAAM,EAAE,IAAI,EAAE,SAAS;IAC9B,cAAc;IACd,QAAQ,EAAE,YAAY;IACtB,oEAAoE;IACpE,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,OAAO;CACvD,CAAC,CAAC;AAEH,+EAA+E;AAC/E,gBAAgB;AAChB,+EAA+E;AAE/E,SAAS,aAAa,CAAC,IAAY;IACjC,sCAAsC;IACtC,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,aAAa,EAAE,EAAE,CAAC,CAAC;IACvC,8BAA8B;IAC9B,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC;IACpC,mBAAmB;IACnB,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,mBAAmB,EAAE,EAAE,CAAC,CAAC;IAC7C,gCAAgC;IAChC,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,iBAAiB,EAAE,EAAE,CAAC,CAAC;IAC3C,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,iBAAiB,EAAE,EAAE,CAAC,CAAC;IAC3C,OAAO,IAAI,CAAC;AACd,CAAC;AAED,mDAAmD;AACnD,SAAS,cAAc,CAAC,IAAY;IAClC,OAAO,aAAa,CAAC,IAAI,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;AACzD,CAAC;AAED;;;GAGG;AACH,SAAS,cAAc,CAAC,IAAY;IAClC,MAAM,IAAI,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC;IAClC,MAAM,IAAI,GAAG,IAAI,GAAG,EAAkB,CAAC;IACvC,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,OAAO,IAAI,CAAC,OAAO,CAAC,+BAA+B,EAAE,CAAC,KAAK,EAAE,EAAE;QAC7D,IAAI,QAAQ,CAAC,GAAG,CAAC,KAAK,CAAC;YAAE,OAAO,KAAK,CAAC;QACtC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC;YAAE,IAAI,CAAC,GAAG,CAAC,KAAK,EAAE,KAAK,OAAO,EAAE,EAAE,CAAC,CAAC;QACxD,OAAO,IAAI,CAAC,GAAG,CAAC,KAAK,CAAE,CAAC;IAC1B,CAAC,CAAC,CAAC;AACL,CAAC;AAED,SAAS,KAAK,CAAC,IAAY;IACzB,OAAO,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;AACtE,CAAC;AAED,+EAA+E;AAC/E,iDAAiD;AACjD,+EAA+E;AAE/E,SAAS,QAAQ,CAAC,cAAsB;IACtC,OAAO,cAAc,CAAC,KAAK,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC;AAC5C,CAAC;AAED,SAAS,WAAW,CAAC,MAAgB,EAAE,CAAC,GAAG,YAAY;IACrD,MAAM,CAAC,GAAG,IAAI,GAAG,EAAU,CAAC;IAC5B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAC5C,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC;IAC7C,CAAC;IACD,OAAO,CAAC,CAAC;AACX,CAAC;AAED,SAAS,OAAO,CAAC,CAAc,EAAE,CAAc;IAC7C,IAAI,CAAC,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAC3C,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,KAAK,MAAM,CAAC,IAAI,CAAC;QAAE,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;YAAE,KAAK,EAAE,CAAC;IACzC,OAAO,KAAK,GAAG,CAAC,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,GAAG,KAAK,CAAC,CAAC;AAC3C,CAAC;AAED,+EAA+E;AAC/E,sBAAsB;AACtB,+EAA+E;AAE/E,kEAAkE;AAClE,SAAS,gBAAgB,CAAC,OAAe,EAAE,UAAkB;IAC3D,mCAAmC;IACnC,IAAI,IAAI,GAAG,CAAC,CAAC;IACb,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC;IACjD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;QAC7B,IAAI,OAAO,CAAC,CAAC,CAAC,KAAK,IAAI;YAAE,IAAI,EAAE,CAAC;IAClC,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,+EAA+E;AAC/E,gBAAgB;AAChB,+EAA+E;AAE/E;;;;GAIG;AACH,MAAM,UAAU,gBAAgB,CAC9B,KAA+C,EAC/C,SAA0B;IAE1B,MAAM,cAAc,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;IAUpE,MAAM,OAAO,GAAY,EAAE,CAAC;IAE5B,KAAK,MAAM,IAAI,IAAI,SAAS,CAAC,KAAK,CAAC,MAAM,EAAE,EAAE,CAAC;QAC5C,MAAM,OAAO,GAAG,cAAc,CAAC,GAAG,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAClD,IAAI,CAAC,OAAO;YAAE,SAAS;QAEvB,yCAAyC;QACzC,MAAM,SAAS,GAAG,gBAAgB,CAAC,OAAO,EAAE,IAAI,CAAC,UAAU,CAAC,CAAC;QAC7D,MAAM,OAAO,GAAG,gBAAgB,CAAC,OAAO,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC;QACzD,MAAM,SAAS,GAAG,OAAO,GAAG,SAAS,GAAG,CAAC,CAAC;QAE1C,IAAI,SAAS,GAAG,SAAS;YAAE,SAAS;QAEpC,MAAM,IAAI,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC;QAC3D,MAAM,EAAE,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC;QAChC,MAAM,EAAE,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC;QAChC,MAAM,MAAM,GAAG,QAAQ,CAAC,EAAE,CAAC,CAAC;QAE5B,IAAI,MAAM,CAAC,MAAM,GAAG,UAAU;YAAE,SAAS;QAEzC,OAAO,CAAC,IAAI,CAAC;YACX,QAAQ,EAAE;gBACR,IAAI,EAAE,IAAI,CAAC,QAAQ;gBACnB,YAAY,EAAE,IAAI,CAAC,IAAI;gBACvB,SAAS,EAAE,IAAI,CAAC,SAAS;gBACzB,SAAS;gBACT,OAAO;aACR;YACD,MAAM,EAAE,KAAK,CAAC,EAAE,CAAC;YACjB,MAAM,EAAE,KAAK,CAAC,EAAE,CAAC;YACjB,QAAQ,EAAE,WAAW,CAAC,MAAM,CAAC;SAC9B,CAAC,CAAC;IACL,CAAC;IAED,MAAM,WAAW,GAAiB,EAAE,CAAC;IACrC,MAAM,cAAc,GAAG,IAAI,GAAG,EAAU,CAAC,CAAC,gBAAgB;IAE1D,mEAAmE;IACnE,MAAM,KAAK,GAAG,IAAI,GAAG,EAAoB,CAAC;IAC1C,MAAM,KAAK,GAAG,IAAI,GAAG,EAAoB,CAAC;IAE1C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACxC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;QACtC,CAAC,KAAK,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,KAAK,CAAC,GAAG,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,MAAM,CAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClE,CAAC,KAAK,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,KAAK,CAAC,GAAG,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,MAAM,CAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACpE,CAAC;IAED,wBAAwB;IACxB,KAAK,MAAM,OAAO,IAAI,KAAK,CAAC,MAAM,EAAE,EAAE,CAAC;QACrC,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC;YAAE,SAAS;QACjC,KAAK,MAAM,CAAC,IAAI,OAAO;YAAE,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;QAC/C,MAAM,MAAM,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;QAC1B,WAAW,CAAC,IAAI,CAAC;YACf,IAAI,EAAE,OAAO;YACb,UAAU,EAAE,GAAG;YACf,SAAS,EAAE,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC;YAChD,SAAS,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC,QAAQ,CAAC,OAAO,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,QAAQ,CAAC,SAAS,GAAG,CAAC;SACrF,CAAC,CAAC;IACL,CAAC;IAED,uEAAuE;IACvE,KAAK,MAAM,OAAO,IAAI,KAAK,CAAC,MAAM,EAAE,EAAE,CAAC;QACrC,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC;YAAE,SAAS;QACjC,kDAAkD;QAClD,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE;YAC/B,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,EAAE,MAAM,IAAI,CAAC,CAAC;YACzD,OAAO,MAAM,GAAG,CAAC,CAAC;QACpB,CAAC,CAAC,CAAC;QACH,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC;YAAE,SAAS;QAC/B,KAAK,MAAM,CAAC,IAAI,KAAK;YAAE,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;QAC7C,MAAM,MAAM,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QACxB,WAAW,CAAC,IAAI,CAAC;YACf,IAAI,EAAE,YAAY;YAClB,UAAU,EAAE,GAAG;YACf,SAAS,EAAE,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC;YAC9C,SAAS,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC,QAAQ,CAAC,OAAO,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,QAAQ,CAAC,SAAS,GAAG,CAAC;SACrF,CAAC,CAAC;IACL,CAAC;IAED,4EAA4E;IAC5E,MAAM,SAAS,GAAG,OAAO;SACtB,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,CAAC,CAAC;SACrC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC;IAE/C,IAAI,SAAS,CAAC,MAAM,IAAI,CAAC,IAAI,SAAS,CAAC,MAAM,IAAI,kBAAkB,EAAE,CAAC;QACpE,MAAM,WAAW,GAAG,IAAI,GAAG,EAAU,CAAC,CAAC,2BAA2B;QAElE,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC1C,IAAI,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC;gBAAE,SAAS;YACjC,MAAM,KAAK,GAAa,CAAC,CAAC,CAAC,CAAC;YAE5B,KAAK,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC9C,IAAI,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC;oBAAE,SAAS;gBACjC,MAAM,GAAG,GAAG,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;gBAClE,IAAI,GAAG,IAAI,cAAc,EAAE,CAAC;oBAC1B,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;oBACd,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;gBACrB,CAAC;YACH,CAAC;YAED,IAAI,KAAK,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;gBACtB,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;gBACnB,sEAAsE;gBACtE,IAAI,MAAM,GAAG,GAAG,CAAC;gBACjB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;oBACtC,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,EAAE,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,QAAQ,EAAE,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC;gBAC1F,CAAC;gBACD,MAAM,MAAM,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;gBACxB,WAAW,CAAC,IAAI,CAAC;oBACf,IAAI,EAAE,MAAM;oBACZ,UAAU,EAAE,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,GAAG,CAAC,GAAG,GAAG;oBAC1C,SAAS,EAAE,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC;oBAChD,SAAS,EACP,SAAS,CAAC,MAAM,CAAC,CAAC,QAAQ,CAAC,OAAO,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,QAAQ,CAAC,SAAS,GAAG,CAAC;iBAChF,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IAED,yDAAyD;IACzD,WAAW,CAAC,IAAI,CACd,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,SAAS,GAAG,CAAC,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,SAAS,CAC9E,CAAC;IAEF,kBAAkB;IAClB,MAAM,aAAa,GAAG,IAAI,GAAG,EAAU,CAAC;IACxC,KAAK,MAAM,CAAC,IAAI,WAAW,EAAE,CAAC;QAC5B,KAAK,MAAM,IAAI,IAAI,CAAC,CAAC,SAAS,EAAE,CAAC;YAC/B,aAAa,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,YAAY,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC,CAAC;QAC3E,CAAC;IACH,CAAC;IAED,OAAO;QACL,WAAW;QACX,KAAK,EAAE;YACL,cAAc,EAAE,OAAO,CAAC,MAAM;YAC9B,mBAAmB,EAAE,aAAa,CAAC,IAAI;YACvC,gBAAgB,EACd,OAAO,CAAC,MAAM,GAAG,CAAC;gBAChB,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,aAAa,CAAC,IAAI,GAAG,OAAO,CAAC,MAAM,CAAC,GAAG,IAAI,CAAC,GAAG,IAAI;gBACjE,CAAC,CAAC,CAAC;YACP,eAAe,EAAE,WAAW,CAAC,MAAM;SACpC;KACF,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* EmbeddingService
|
|
3
|
+
*
|
|
4
|
+
* Computes text embeddings via any OpenAI-compatible `/embeddings` endpoint
|
|
5
|
+
* (OpenAI, Ollama, LocalAI, vLLM, LM Studio, …).
|
|
6
|
+
*
|
|
7
|
+
* Configuration (in priority order):
|
|
8
|
+
* 1. Constructor argument `EmbeddingConfig`
|
|
9
|
+
* 2. Environment variables: EMBED_BASE_URL, EMBED_MODEL, EMBED_API_KEY
|
|
10
|
+
*
|
|
11
|
+
* The service batches texts in groups of `batchSize` (default 64) and
|
|
12
|
+
* resolves all batches sequentially to avoid overloading the server.
|
|
13
|
+
*/
|
|
14
|
+
import type { SpecGenConfig } from '../../types/index.js';
|
|
15
|
+
export interface EmbeddingConfig {
|
|
16
|
+
/** Base URL of the OpenAI-compatible API, e.g. "http://localhost:11434/v1" */
|
|
17
|
+
baseUrl: string;
|
|
18
|
+
/** Embedding model name, e.g. "nomic-embed-text" or "text-embedding-3-small" */
|
|
19
|
+
model: string;
|
|
20
|
+
/** API key — optional for local servers */
|
|
21
|
+
apiKey?: string;
|
|
22
|
+
/** Maximum number of texts per API call (default: 64) */
|
|
23
|
+
batchSize?: number;
|
|
24
|
+
/** Disable SSL certificate verification (e.g. self-signed certs on local servers) */
|
|
25
|
+
skipSslVerify?: boolean;
|
|
26
|
+
}
|
|
27
|
+
export declare class EmbeddingService {
|
|
28
|
+
private baseUrl;
|
|
29
|
+
private model;
|
|
30
|
+
private apiKey;
|
|
31
|
+
private batchSize;
|
|
32
|
+
constructor(config: EmbeddingConfig);
|
|
33
|
+
/**
|
|
34
|
+
* Build an EmbeddingService from environment variables.
|
|
35
|
+
* Throws if EMBED_BASE_URL or EMBED_MODEL are not set.
|
|
36
|
+
*/
|
|
37
|
+
static fromEnv(): EmbeddingService;
|
|
38
|
+
/**
|
|
39
|
+
* Build an EmbeddingService from a SpecGenConfig.
|
|
40
|
+
* Returns null if no embedding config is present.
|
|
41
|
+
*/
|
|
42
|
+
static fromConfig(cfg: SpecGenConfig): EmbeddingService | null;
|
|
43
|
+
/**
|
|
44
|
+
* Compute embeddings for a list of texts.
|
|
45
|
+
* Returns one embedding vector per input text (same order).
|
|
46
|
+
*/
|
|
47
|
+
embed(texts: string[]): Promise<number[][]>;
|
|
48
|
+
private callEmbeddingsApi;
|
|
49
|
+
}
|
|
50
|
+
//# sourceMappingURL=embedding-service.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"embedding-service.d.ts","sourceRoot":"","sources":["../../../src/core/analyzer/embedding-service.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAEH,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;AAM1D,MAAM,WAAW,eAAe;IAC9B,8EAA8E;IAC9E,OAAO,EAAE,MAAM,CAAC;IAChB,gFAAgF;IAChF,KAAK,EAAE,MAAM,CAAC;IACd,2CAA2C;IAC3C,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,yDAAyD;IACzD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,qFAAqF;IACrF,aAAa,CAAC,EAAE,OAAO,CAAC;CACzB;AAMD,qBAAa,gBAAgB;IAC3B,OAAO,CAAC,OAAO,CAAS;IACxB,OAAO,CAAC,KAAK,CAAS;IACtB,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,SAAS,CAAS;gBAEd,MAAM,EAAE,eAAe;IAUnC;;;OAGG;IACH,MAAM,CAAC,OAAO,IAAI,gBAAgB;IAalC;;;OAGG;IACH,MAAM,CAAC,UAAU,CAAC,GAAG,EAAE,aAAa,GAAG,gBAAgB,GAAG,IAAI;IAU9D;;;OAGG;IACG,KAAK,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;YAcnC,iBAAiB;CAmChC"}
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* EmbeddingService
|
|
3
|
+
*
|
|
4
|
+
* Computes text embeddings via any OpenAI-compatible `/embeddings` endpoint
|
|
5
|
+
* (OpenAI, Ollama, LocalAI, vLLM, LM Studio, …).
|
|
6
|
+
*
|
|
7
|
+
* Configuration (in priority order):
|
|
8
|
+
* 1. Constructor argument `EmbeddingConfig`
|
|
9
|
+
* 2. Environment variables: EMBED_BASE_URL, EMBED_MODEL, EMBED_API_KEY
|
|
10
|
+
*
|
|
11
|
+
* The service batches texts in groups of `batchSize` (default 64) and
|
|
12
|
+
* resolves all batches sequentially to avoid overloading the server.
|
|
13
|
+
*/
|
|
14
|
+
// ============================================================================
|
|
15
|
+
// EMBEDDING SERVICE
|
|
16
|
+
// ============================================================================
|
|
17
|
+
export class EmbeddingService {
|
|
18
|
+
baseUrl;
|
|
19
|
+
model;
|
|
20
|
+
apiKey;
|
|
21
|
+
batchSize;
|
|
22
|
+
constructor(config) {
|
|
23
|
+
this.baseUrl = config.baseUrl.replace(/\/$/, '');
|
|
24
|
+
this.model = config.model;
|
|
25
|
+
this.apiKey = config.apiKey ?? '';
|
|
26
|
+
this.batchSize = config.batchSize ?? 64;
|
|
27
|
+
if (config.skipSslVerify && process.env.NODE_TLS_REJECT_UNAUTHORIZED !== '0') {
|
|
28
|
+
process.env.NODE_TLS_REJECT_UNAUTHORIZED = '0';
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Build an EmbeddingService from environment variables.
|
|
33
|
+
* Throws if EMBED_BASE_URL or EMBED_MODEL are not set.
|
|
34
|
+
*/
|
|
35
|
+
static fromEnv() {
|
|
36
|
+
const baseUrl = process.env.EMBED_BASE_URL;
|
|
37
|
+
const model = process.env.EMBED_MODEL;
|
|
38
|
+
if (!baseUrl)
|
|
39
|
+
throw new Error('EMBED_BASE_URL environment variable is required');
|
|
40
|
+
if (!model)
|
|
41
|
+
throw new Error('EMBED_MODEL environment variable is required');
|
|
42
|
+
return new EmbeddingService({
|
|
43
|
+
baseUrl,
|
|
44
|
+
model,
|
|
45
|
+
apiKey: process.env.EMBED_API_KEY,
|
|
46
|
+
skipSslVerify: process.env.EMBED_SKIP_SSL_VERIFY === '1' || process.env.EMBED_SKIP_SSL_VERIFY === 'true',
|
|
47
|
+
});
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Build an EmbeddingService from a SpecGenConfig.
|
|
51
|
+
* Returns null if no embedding config is present.
|
|
52
|
+
*/
|
|
53
|
+
static fromConfig(cfg) {
|
|
54
|
+
if (!cfg.embedding?.baseUrl || !cfg.embedding?.model)
|
|
55
|
+
return null;
|
|
56
|
+
return new EmbeddingService({
|
|
57
|
+
baseUrl: cfg.embedding.baseUrl,
|
|
58
|
+
model: cfg.embedding.model,
|
|
59
|
+
apiKey: cfg.embedding.apiKey,
|
|
60
|
+
skipSslVerify: cfg.embedding.skipSslVerify,
|
|
61
|
+
});
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Compute embeddings for a list of texts.
|
|
65
|
+
* Returns one embedding vector per input text (same order).
|
|
66
|
+
*/
|
|
67
|
+
async embed(texts) {
|
|
68
|
+
if (texts.length === 0)
|
|
69
|
+
return [];
|
|
70
|
+
const results = [];
|
|
71
|
+
for (let i = 0; i < texts.length; i += this.batchSize) {
|
|
72
|
+
const batch = texts.slice(i, i + this.batchSize);
|
|
73
|
+
const vectors = await this.callEmbeddingsApi(batch);
|
|
74
|
+
results.push(...vectors);
|
|
75
|
+
}
|
|
76
|
+
return results;
|
|
77
|
+
}
|
|
78
|
+
async callEmbeddingsApi(texts) {
|
|
79
|
+
const url = `${this.baseUrl}/embeddings`;
|
|
80
|
+
const headers = {
|
|
81
|
+
'Content-Type': 'application/json',
|
|
82
|
+
};
|
|
83
|
+
if (this.apiKey) {
|
|
84
|
+
headers['Authorization'] = `Bearer ${this.apiKey}`;
|
|
85
|
+
}
|
|
86
|
+
const response = await fetch(url, {
|
|
87
|
+
method: 'POST',
|
|
88
|
+
headers,
|
|
89
|
+
body: JSON.stringify({ input: texts, model: this.model }),
|
|
90
|
+
});
|
|
91
|
+
if (!response.ok) {
|
|
92
|
+
const body = await response.text().catch(() => '');
|
|
93
|
+
throw new Error(`Embedding API error ${response.status} from ${url}: ${body.slice(0, 200)}`);
|
|
94
|
+
}
|
|
95
|
+
const json = (await response.json());
|
|
96
|
+
if (!Array.isArray(json.data)) {
|
|
97
|
+
throw new Error(`Unexpected embedding response format: missing "data" array`);
|
|
98
|
+
}
|
|
99
|
+
// Sort by index to guarantee order matches input
|
|
100
|
+
const sorted = [...json.data].sort((a, b) => a.index - b.index);
|
|
101
|
+
return sorted.map(d => d.embedding);
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
//# sourceMappingURL=embedding-service.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"embedding-service.js","sourceRoot":"","sources":["../../../src/core/analyzer/embedding-service.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAqBH,+EAA+E;AAC/E,oBAAoB;AACpB,+EAA+E;AAE/E,MAAM,OAAO,gBAAgB;IACnB,OAAO,CAAS;IAChB,KAAK,CAAS;IACd,MAAM,CAAS;IACf,SAAS,CAAS;IAE1B,YAAY,MAAuB;QACjC,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;QACjD,IAAI,CAAC,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC;QAC1B,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,IAAI,EAAE,CAAC;QAClC,IAAI,CAAC,SAAS,GAAG,MAAM,CAAC,SAAS,IAAI,EAAE,CAAC;QACxC,IAAI,MAAM,CAAC,aAAa,IAAI,OAAO,CAAC,GAAG,CAAC,4BAA4B,KAAK,GAAG,EAAE,CAAC;YAC7E,OAAO,CAAC,GAAG,CAAC,4BAA4B,GAAG,GAAG,CAAC;QACjD,CAAC;IACH,CAAC;IAED;;;OAGG;IACH,MAAM,CAAC,OAAO;QACZ,MAAM,OAAO,GAAG,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC;QAC3C,MAAM,KAAK,GAAG,OAAO,CAAC,GAAG,CAAC,WAAW,CAAC;QACtC,IAAI,CAAC,OAAO;YAAE,MAAM,IAAI,KAAK,CAAC,iDAAiD,CAAC,CAAC;QACjF,IAAI,CAAC,KAAK;YAAE,MAAM,IAAI,KAAK,CAAC,8CAA8C,CAAC,CAAC;QAC5E,OAAO,IAAI,gBAAgB,CAAC;YAC1B,OAAO;YACP,KAAK;YACL,MAAM,EAAE,OAAO,CAAC,GAAG,CAAC,aAAa;YACjC,aAAa,EAAE,OAAO,CAAC,GAAG,CAAC,qBAAqB,KAAK,GAAG,IAAI,OAAO,CAAC,GAAG,CAAC,qBAAqB,KAAK,MAAM;SACzG,CAAC,CAAC;IACL,CAAC;IAED;;;OAGG;IACH,MAAM,CAAC,UAAU,CAAC,GAAkB;QAClC,IAAI,CAAC,GAAG,CAAC,SAAS,EAAE,OAAO,IAAI,CAAC,GAAG,CAAC,SAAS,EAAE,KAAK;YAAE,OAAO,IAAI,CAAC;QAClE,OAAO,IAAI,gBAAgB,CAAC;YAC1B,OAAO,EAAE,GAAG,CAAC,SAAS,CAAC,OAAO;YAC9B,KAAK,EAAE,GAAG,CAAC,SAAS,CAAC,KAAK;YAC1B,MAAM,EAAE,GAAG,CAAC,SAAS,CAAC,MAAM;YAC5B,aAAa,EAAE,GAAG,CAAC,SAAS,CAAC,aAAa;SAC3C,CAAC,CAAC;IACL,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,KAAK,CAAC,KAAe;QACzB,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,EAAE,CAAC;QAElC,MAAM,OAAO,GAAe,EAAE,CAAC;QAE/B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;YACtD,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC;YACjD,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,iBAAiB,CAAC,KAAK,CAAC,CAAC;YACpD,OAAO,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,CAAC;QAC3B,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAEO,KAAK,CAAC,iBAAiB,CAAC,KAAe;QAC7C,MAAM,GAAG,GAAG,GAAG,IAAI,CAAC,OAAO,aAAa,CAAC;QAEzC,MAAM,OAAO,GAA2B;YACtC,cAAc,EAAE,kBAAkB;SACnC,CAAC;QACF,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChB,OAAO,CAAC,eAAe,CAAC,GAAG,UAAU,IAAI,CAAC,MAAM,EAAE,CAAC;QACrD,CAAC;QAED,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;YAChC,MAAM,EAAE,MAAM;YACd,OAAO;YACP,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,IAAI,CAAC,KAAK,EAAE,CAAC;SAC1D,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC,CAAC;YACnD,MAAM,IAAI,KAAK,CACb,uBAAuB,QAAQ,CAAC,MAAM,SAAS,GAAG,KAAK,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAC5E,CAAC;QACJ,CAAC;QAED,MAAM,IAAI,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAElC,CAAC;QAEF,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YAC9B,MAAM,IAAI,KAAK,CAAC,4DAA4D,CAAC,CAAC;QAChF,CAAC;QAED,iDAAiD;QACjD,MAAM,MAAM,GAAG,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;QAChE,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC;IACtC,CAAC;CACF"}
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* FileWalker Service
|
|
3
|
+
*
|
|
4
|
+
* Traverses the codebase intelligently, filtering noise and respecting ignore patterns.
|
|
5
|
+
* Collects metadata about each file for significance scoring and analysis.
|
|
6
|
+
*/
|
|
7
|
+
import type { FileWalkerResult } from '../../types/index.js';
|
|
8
|
+
/**
|
|
9
|
+
* Options for the FileWalker
|
|
10
|
+
*/
|
|
11
|
+
export interface FileWalkerOptions {
|
|
12
|
+
/** Maximum number of files to process */
|
|
13
|
+
maxFiles?: number;
|
|
14
|
+
/** Additional glob patterns to include */
|
|
15
|
+
includePatterns?: string[];
|
|
16
|
+
/** Additional glob patterns to exclude */
|
|
17
|
+
excludePatterns?: string[];
|
|
18
|
+
/** Progress callback for UI updates */
|
|
19
|
+
onProgress?: (progress: FileWalkerProgress) => void;
|
|
20
|
+
/** AbortController signal for cancellation */
|
|
21
|
+
signal?: AbortSignal;
|
|
22
|
+
/** Maximum concurrent file reads */
|
|
23
|
+
concurrency?: number;
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Progress information during file walking
|
|
27
|
+
*/
|
|
28
|
+
export interface FileWalkerProgress {
|
|
29
|
+
filesFound: number;
|
|
30
|
+
directoriesScanned: number;
|
|
31
|
+
currentPath: string;
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* FileWalker class for traversing codebases
|
|
35
|
+
*/
|
|
36
|
+
export declare class FileWalker {
|
|
37
|
+
private rootPath;
|
|
38
|
+
private options;
|
|
39
|
+
private ig;
|
|
40
|
+
/** Separate ignore instance used to check if a file matches includePatterns. */
|
|
41
|
+
private igInclude;
|
|
42
|
+
private files;
|
|
43
|
+
private skippedCount;
|
|
44
|
+
private skippedReasons;
|
|
45
|
+
private directoriesScanned;
|
|
46
|
+
private extensionCounts;
|
|
47
|
+
private directoryCounts;
|
|
48
|
+
constructor(rootPath: string, options?: FileWalkerOptions);
|
|
49
|
+
/**
|
|
50
|
+
* Record a skipped file with reason
|
|
51
|
+
*/
|
|
52
|
+
private recordSkip;
|
|
53
|
+
/**
|
|
54
|
+
* Check if we should skip a directory
|
|
55
|
+
*/
|
|
56
|
+
private shouldSkipDirectory;
|
|
57
|
+
/**
|
|
58
|
+
* Check if we should skip a file
|
|
59
|
+
*/
|
|
60
|
+
private shouldSkipFile;
|
|
61
|
+
/**
|
|
62
|
+
* Walk a directory recursively
|
|
63
|
+
*/
|
|
64
|
+
private walkDirectory;
|
|
65
|
+
/**
|
|
66
|
+
* Process a single file and collect metadata
|
|
67
|
+
*/
|
|
68
|
+
private processFile;
|
|
69
|
+
/**
|
|
70
|
+
* Walk the codebase and collect file metadata
|
|
71
|
+
*/
|
|
72
|
+
walk(): Promise<FileWalkerResult>;
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Convenience function to walk a directory
|
|
76
|
+
*/
|
|
77
|
+
export declare function walkDirectory(rootPath: string, options?: FileWalkerOptions): Promise<FileWalkerResult>;
|
|
78
|
+
//# sourceMappingURL=file-walker.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"file-walker.d.ts","sourceRoot":"","sources":["../../../src/core/analyzer/file-walker.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAOH,OAAO,KAAK,EAAgB,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AAE3E;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,yCAAyC;IACzC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,0CAA0C;IAC1C,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;IAC3B,0CAA0C;IAC1C,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;IAC3B,uCAAuC;IACvC,UAAU,CAAC,EAAE,CAAC,QAAQ,EAAE,kBAAkB,KAAK,IAAI,CAAC;IACpD,8CAA8C;IAC9C,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,oCAAoC;IACpC,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;GAEG;AACH,MAAM,WAAW,kBAAkB;IACjC,UAAU,EAAE,MAAM,CAAC;IACnB,kBAAkB,EAAE,MAAM,CAAC;IAC3B,WAAW,EAAE,MAAM,CAAC;CACrB;AA8TD;;GAEG;AACH,qBAAa,UAAU;IACrB,OAAO,CAAC,QAAQ,CAAS;IACzB,OAAO,CAAC,OAAO,CAA8B;IAC7C,OAAO,CAAC,EAAE,CAAuB;IACjC,gFAAgF;IAChF,OAAO,CAAC,SAAS,CAAuB;IACxC,OAAO,CAAC,KAAK,CAAsB;IACnC,OAAO,CAAC,YAAY,CAAK;IACzB,OAAO,CAAC,cAAc,CAA8B;IACpD,OAAO,CAAC,kBAAkB,CAAK;IAC/B,OAAO,CAAC,eAAe,CAA8B;IACrD,OAAO,CAAC,eAAe,CAA8B;gBAEzC,QAAQ,EAAE,MAAM,EAAE,OAAO,GAAE,iBAAsB;IAY7D;;OAEG;IACH,OAAO,CAAC,UAAU;IAKlB;;OAEG;IACH,OAAO,CAAC,mBAAmB;IAwB3B;;OAEG;IACH,OAAO,CAAC,cAAc;IAsBtB;;OAEG;YACW,aAAa;IAkF3B;;OAEG;YACW,WAAW;IAwCzB;;OAEG;IACG,IAAI,IAAI,OAAO,CAAC,gBAAgB,CAAC;CAqCxC;AAED;;GAEG;AACH,wBAAsB,aAAa,CACjC,QAAQ,EAAE,MAAM,EAChB,OAAO,CAAC,EAAE,iBAAiB,GAC1B,OAAO,CAAC,gBAAgB,CAAC,CAG3B"}
|