@vpxa/kb 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +1140 -0
- package/bin/kb.mjs +10 -0
- package/package.json +67 -0
- package/packages/analyzers/dist/blast-radius-analyzer.d.ts +23 -0
- package/packages/analyzers/dist/blast-radius-analyzer.js +114 -0
- package/packages/analyzers/dist/dependency-analyzer.d.ts +29 -0
- package/packages/analyzers/dist/dependency-analyzer.js +425 -0
- package/packages/analyzers/dist/diagram-generator.d.ts +13 -0
- package/packages/analyzers/dist/diagram-generator.js +86 -0
- package/packages/analyzers/dist/entry-point-analyzer.d.ts +19 -0
- package/packages/analyzers/dist/entry-point-analyzer.js +239 -0
- package/packages/analyzers/dist/index.d.ts +14 -0
- package/packages/analyzers/dist/index.js +23 -0
- package/packages/analyzers/dist/knowledge-producer.d.ts +32 -0
- package/packages/analyzers/dist/knowledge-producer.js +113 -0
- package/packages/analyzers/dist/pattern-analyzer.d.ts +12 -0
- package/packages/analyzers/dist/pattern-analyzer.js +359 -0
- package/packages/analyzers/dist/regex-call-graph.d.ts +17 -0
- package/packages/analyzers/dist/regex-call-graph.js +428 -0
- package/packages/analyzers/dist/structure-analyzer.d.ts +11 -0
- package/packages/analyzers/dist/structure-analyzer.js +258 -0
- package/packages/analyzers/dist/symbol-analyzer.d.ts +10 -0
- package/packages/analyzers/dist/symbol-analyzer.js +442 -0
- package/packages/analyzers/dist/ts-call-graph.d.ts +27 -0
- package/packages/analyzers/dist/ts-call-graph.js +160 -0
- package/packages/analyzers/dist/types.d.ts +98 -0
- package/packages/analyzers/dist/types.js +1 -0
- package/packages/chunker/dist/call-graph-extractor.d.ts +22 -0
- package/packages/chunker/dist/call-graph-extractor.js +90 -0
- package/packages/chunker/dist/chunker-factory.d.ts +7 -0
- package/packages/chunker/dist/chunker-factory.js +36 -0
- package/packages/chunker/dist/chunker.interface.d.ts +10 -0
- package/packages/chunker/dist/chunker.interface.js +1 -0
- package/packages/chunker/dist/code-chunker.d.ts +14 -0
- package/packages/chunker/dist/code-chunker.js +134 -0
- package/packages/chunker/dist/generic-chunker.d.ts +12 -0
- package/packages/chunker/dist/generic-chunker.js +72 -0
- package/packages/chunker/dist/index.d.ts +8 -0
- package/packages/chunker/dist/index.js +21 -0
- package/packages/chunker/dist/markdown-chunker.d.ts +14 -0
- package/packages/chunker/dist/markdown-chunker.js +122 -0
- package/packages/chunker/dist/treesitter-chunker.d.ts +47 -0
- package/packages/chunker/dist/treesitter-chunker.js +234 -0
- package/packages/cli/dist/commands/analyze.d.ts +3 -0
- package/packages/cli/dist/commands/analyze.js +112 -0
- package/packages/cli/dist/commands/context-cmds.d.ts +3 -0
- package/packages/cli/dist/commands/context-cmds.js +155 -0
- package/packages/cli/dist/commands/environment.d.ts +3 -0
- package/packages/cli/dist/commands/environment.js +204 -0
- package/packages/cli/dist/commands/execution.d.ts +3 -0
- package/packages/cli/dist/commands/execution.js +137 -0
- package/packages/cli/dist/commands/graph.d.ts +3 -0
- package/packages/cli/dist/commands/graph.js +81 -0
- package/packages/cli/dist/commands/init.d.ts +8 -0
- package/packages/cli/dist/commands/init.js +87 -0
- package/packages/cli/dist/commands/knowledge.d.ts +3 -0
- package/packages/cli/dist/commands/knowledge.js +139 -0
- package/packages/cli/dist/commands/search.d.ts +3 -0
- package/packages/cli/dist/commands/search.js +267 -0
- package/packages/cli/dist/commands/system.d.ts +3 -0
- package/packages/cli/dist/commands/system.js +241 -0
- package/packages/cli/dist/commands/workspace.d.ts +3 -0
- package/packages/cli/dist/commands/workspace.js +388 -0
- package/packages/cli/dist/context.d.ts +5 -0
- package/packages/cli/dist/context.js +14 -0
- package/packages/cli/dist/helpers.d.ts +52 -0
- package/packages/cli/dist/helpers.js +458 -0
- package/packages/cli/dist/index.d.ts +8 -0
- package/packages/cli/dist/index.js +69 -0
- package/packages/cli/dist/kb-init.d.ts +57 -0
- package/packages/cli/dist/kb-init.js +82 -0
- package/packages/cli/dist/types.d.ts +7 -0
- package/packages/cli/dist/types.js +1 -0
- package/packages/core/dist/constants.d.ts +49 -0
- package/packages/core/dist/constants.js +43 -0
- package/packages/core/dist/content-detector.d.ts +9 -0
- package/packages/core/dist/content-detector.js +79 -0
- package/packages/core/dist/errors.d.ts +18 -0
- package/packages/core/dist/errors.js +40 -0
- package/packages/core/dist/index.d.ts +6 -0
- package/packages/core/dist/index.js +9 -0
- package/packages/core/dist/logger.d.ts +9 -0
- package/packages/core/dist/logger.js +34 -0
- package/packages/core/dist/types.d.ts +108 -0
- package/packages/core/dist/types.js +1 -0
- package/packages/embeddings/dist/embedder.interface.d.ts +24 -0
- package/packages/embeddings/dist/embedder.interface.js +1 -0
- package/packages/embeddings/dist/index.d.ts +3 -0
- package/packages/embeddings/dist/index.js +5 -0
- package/packages/embeddings/dist/onnx-embedder.d.ts +24 -0
- package/packages/embeddings/dist/onnx-embedder.js +82 -0
- package/packages/indexer/dist/file-hasher.d.ts +11 -0
- package/packages/indexer/dist/file-hasher.js +13 -0
- package/packages/indexer/dist/filesystem-crawler.d.ts +27 -0
- package/packages/indexer/dist/filesystem-crawler.js +125 -0
- package/packages/indexer/dist/graph-extractor.d.ts +22 -0
- package/packages/indexer/dist/graph-extractor.js +111 -0
- package/packages/indexer/dist/incremental-indexer.d.ts +47 -0
- package/packages/indexer/dist/incremental-indexer.js +278 -0
- package/packages/indexer/dist/index.d.ts +5 -0
- package/packages/indexer/dist/index.js +14 -0
- package/packages/server/dist/api.d.ts +8 -0
- package/packages/server/dist/api.js +9 -0
- package/packages/server/dist/config.d.ts +3 -0
- package/packages/server/dist/config.js +75 -0
- package/packages/server/dist/curated-manager.d.ts +86 -0
- package/packages/server/dist/curated-manager.js +357 -0
- package/packages/server/dist/index.d.ts +2 -0
- package/packages/server/dist/index.js +134 -0
- package/packages/server/dist/replay-interceptor.d.ts +11 -0
- package/packages/server/dist/replay-interceptor.js +38 -0
- package/packages/server/dist/resources/resources.d.ts +4 -0
- package/packages/server/dist/resources/resources.js +40 -0
- package/packages/server/dist/server.d.ts +21 -0
- package/packages/server/dist/server.js +247 -0
- package/packages/server/dist/tools/analyze.tools.d.ts +11 -0
- package/packages/server/dist/tools/analyze.tools.js +288 -0
- package/packages/server/dist/tools/forge.tools.d.ts +12 -0
- package/packages/server/dist/tools/forge.tools.js +501 -0
- package/packages/server/dist/tools/forget.tool.d.ts +4 -0
- package/packages/server/dist/tools/forget.tool.js +43 -0
- package/packages/server/dist/tools/graph.tool.d.ts +4 -0
- package/packages/server/dist/tools/graph.tool.js +110 -0
- package/packages/server/dist/tools/list.tool.d.ts +4 -0
- package/packages/server/dist/tools/list.tool.js +56 -0
- package/packages/server/dist/tools/lookup.tool.d.ts +4 -0
- package/packages/server/dist/tools/lookup.tool.js +53 -0
- package/packages/server/dist/tools/onboard.tool.d.ts +5 -0
- package/packages/server/dist/tools/onboard.tool.js +112 -0
- package/packages/server/dist/tools/produce.tool.d.ts +3 -0
- package/packages/server/dist/tools/produce.tool.js +74 -0
- package/packages/server/dist/tools/read.tool.d.ts +4 -0
- package/packages/server/dist/tools/read.tool.js +49 -0
- package/packages/server/dist/tools/reindex.tool.d.ts +7 -0
- package/packages/server/dist/tools/reindex.tool.js +70 -0
- package/packages/server/dist/tools/remember.tool.d.ts +4 -0
- package/packages/server/dist/tools/remember.tool.js +45 -0
- package/packages/server/dist/tools/replay.tool.d.ts +3 -0
- package/packages/server/dist/tools/replay.tool.js +89 -0
- package/packages/server/dist/tools/search.tool.d.ts +5 -0
- package/packages/server/dist/tools/search.tool.js +331 -0
- package/packages/server/dist/tools/status.tool.d.ts +4 -0
- package/packages/server/dist/tools/status.tool.js +68 -0
- package/packages/server/dist/tools/toolkit.tools.d.ts +35 -0
- package/packages/server/dist/tools/toolkit.tools.js +1674 -0
- package/packages/server/dist/tools/update.tool.d.ts +4 -0
- package/packages/server/dist/tools/update.tool.js +42 -0
- package/packages/server/dist/tools/utility.tools.d.ts +15 -0
- package/packages/server/dist/tools/utility.tools.js +461 -0
- package/packages/store/dist/graph-store.interface.d.ts +104 -0
- package/packages/store/dist/graph-store.interface.js +1 -0
- package/packages/store/dist/index.d.ts +6 -0
- package/packages/store/dist/index.js +9 -0
- package/packages/store/dist/lance-store.d.ts +32 -0
- package/packages/store/dist/lance-store.js +258 -0
- package/packages/store/dist/sqlite-graph-store.d.ts +43 -0
- package/packages/store/dist/sqlite-graph-store.js +374 -0
- package/packages/store/dist/store-factory.d.ts +9 -0
- package/packages/store/dist/store-factory.js +14 -0
- package/packages/store/dist/store.interface.d.ts +48 -0
- package/packages/store/dist/store.interface.js +1 -0
- package/packages/tools/dist/batch.d.ts +21 -0
- package/packages/tools/dist/batch.js +45 -0
- package/packages/tools/dist/changelog.d.ts +34 -0
- package/packages/tools/dist/changelog.js +112 -0
- package/packages/tools/dist/check.d.ts +26 -0
- package/packages/tools/dist/check.js +59 -0
- package/packages/tools/dist/checkpoint.d.ts +17 -0
- package/packages/tools/dist/checkpoint.js +43 -0
- package/packages/tools/dist/codemod.d.ts +37 -0
- package/packages/tools/dist/codemod.js +69 -0
- package/packages/tools/dist/compact.d.ts +41 -0
- package/packages/tools/dist/compact.js +60 -0
- package/packages/tools/dist/data-transform.d.ts +10 -0
- package/packages/tools/dist/data-transform.js +124 -0
- package/packages/tools/dist/dead-symbols.d.ts +21 -0
- package/packages/tools/dist/dead-symbols.js +71 -0
- package/packages/tools/dist/delegate.d.ts +34 -0
- package/packages/tools/dist/delegate.js +130 -0
- package/packages/tools/dist/diff-parse.d.ts +26 -0
- package/packages/tools/dist/diff-parse.js +153 -0
- package/packages/tools/dist/digest.d.ts +53 -0
- package/packages/tools/dist/digest.js +242 -0
- package/packages/tools/dist/encode.d.ts +14 -0
- package/packages/tools/dist/encode.js +46 -0
- package/packages/tools/dist/env-info.d.ts +28 -0
- package/packages/tools/dist/env-info.js +58 -0
- package/packages/tools/dist/eval.d.ts +13 -0
- package/packages/tools/dist/eval.js +79 -0
- package/packages/tools/dist/evidence-map.d.ts +79 -0
- package/packages/tools/dist/evidence-map.js +203 -0
- package/packages/tools/dist/file-summary.d.ts +32 -0
- package/packages/tools/dist/file-summary.js +106 -0
- package/packages/tools/dist/file-walk.d.ts +4 -0
- package/packages/tools/dist/file-walk.js +75 -0
- package/packages/tools/dist/find-examples.d.ts +25 -0
- package/packages/tools/dist/find-examples.js +48 -0
- package/packages/tools/dist/find.d.ts +47 -0
- package/packages/tools/dist/find.js +120 -0
- package/packages/tools/dist/forge-classify.d.ts +44 -0
- package/packages/tools/dist/forge-classify.js +319 -0
- package/packages/tools/dist/forge-ground.d.ts +64 -0
- package/packages/tools/dist/forge-ground.js +184 -0
- package/packages/tools/dist/git-context.d.ts +22 -0
- package/packages/tools/dist/git-context.js +46 -0
- package/packages/tools/dist/graph-query.d.ts +89 -0
- package/packages/tools/dist/graph-query.js +194 -0
- package/packages/tools/dist/health.d.ts +14 -0
- package/packages/tools/dist/health.js +118 -0
- package/packages/tools/dist/http-request.d.ts +23 -0
- package/packages/tools/dist/http-request.js +58 -0
- package/packages/tools/dist/index.d.ts +49 -0
- package/packages/tools/dist/index.js +273 -0
- package/packages/tools/dist/lane.d.ts +39 -0
- package/packages/tools/dist/lane.js +227 -0
- package/packages/tools/dist/measure.d.ts +38 -0
- package/packages/tools/dist/measure.js +119 -0
- package/packages/tools/dist/onboard.d.ts +41 -0
- package/packages/tools/dist/onboard.js +1139 -0
- package/packages/tools/dist/parse-output.d.ts +80 -0
- package/packages/tools/dist/parse-output.js +158 -0
- package/packages/tools/dist/process-manager.d.ts +18 -0
- package/packages/tools/dist/process-manager.js +69 -0
- package/packages/tools/dist/queue.d.ts +38 -0
- package/packages/tools/dist/queue.js +126 -0
- package/packages/tools/dist/regex-test.d.ts +31 -0
- package/packages/tools/dist/regex-test.js +39 -0
- package/packages/tools/dist/rename.d.ts +29 -0
- package/packages/tools/dist/rename.js +70 -0
- package/packages/tools/dist/replay.d.ts +56 -0
- package/packages/tools/dist/replay.js +108 -0
- package/packages/tools/dist/schema-validate.d.ts +23 -0
- package/packages/tools/dist/schema-validate.js +141 -0
- package/packages/tools/dist/scope-map.d.ts +52 -0
- package/packages/tools/dist/scope-map.js +72 -0
- package/packages/tools/dist/snippet.d.ts +34 -0
- package/packages/tools/dist/snippet.js +80 -0
- package/packages/tools/dist/stash.d.ts +12 -0
- package/packages/tools/dist/stash.js +60 -0
- package/packages/tools/dist/stratum-card.d.ts +31 -0
- package/packages/tools/dist/stratum-card.js +239 -0
- package/packages/tools/dist/symbol.d.ts +28 -0
- package/packages/tools/dist/symbol.js +87 -0
- package/packages/tools/dist/test-run.d.ts +23 -0
- package/packages/tools/dist/test-run.js +55 -0
- package/packages/tools/dist/text-utils.d.ts +16 -0
- package/packages/tools/dist/text-utils.js +31 -0
- package/packages/tools/dist/time-utils.d.ts +18 -0
- package/packages/tools/dist/time-utils.js +135 -0
- package/packages/tools/dist/trace.d.ts +24 -0
- package/packages/tools/dist/trace.js +114 -0
- package/packages/tools/dist/truncation.d.ts +22 -0
- package/packages/tools/dist/truncation.js +45 -0
- package/packages/tools/dist/watch.d.ts +30 -0
- package/packages/tools/dist/watch.js +61 -0
- package/packages/tools/dist/web-fetch.d.ts +45 -0
- package/packages/tools/dist/web-fetch.js +249 -0
- package/packages/tools/dist/web-search.d.ts +23 -0
- package/packages/tools/dist/web-search.js +46 -0
- package/packages/tools/dist/workset.d.ts +45 -0
- package/packages/tools/dist/workset.js +77 -0
- package/packages/tui/dist/App.d.ts +8 -0
- package/packages/tui/dist/App.js +52659 -0
- package/packages/tui/dist/index.d.ts +19 -0
- package/packages/tui/dist/index.js +54742 -0
- package/packages/tui/dist/panels/CuratedPanel.d.ts +8 -0
- package/packages/tui/dist/panels/CuratedPanel.js +34452 -0
- package/packages/tui/dist/panels/LogPanel.d.ts +3 -0
- package/packages/tui/dist/panels/LogPanel.js +51894 -0
- package/packages/tui/dist/panels/SearchPanel.d.ts +10 -0
- package/packages/tui/dist/panels/SearchPanel.js +34985 -0
- package/packages/tui/dist/panels/StatusPanel.d.ts +8 -0
- package/packages/tui/dist/panels/StatusPanel.js +34465 -0
- package/skills/knowledge-base/SKILL.md +316 -0
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Analyzer interfaces and result types.
|
|
3
|
+
*/
|
|
4
|
+
export interface AnalysisResult {
|
|
5
|
+
/** Formatted output (Markdown, JSON, or Mermaid) */
|
|
6
|
+
output: string;
|
|
7
|
+
/** Structured data for programmatic access */
|
|
8
|
+
data: Record<string, unknown>;
|
|
9
|
+
/** Analysis metadata */
|
|
10
|
+
meta: {
|
|
11
|
+
analyzedAt: string;
|
|
12
|
+
scope: string;
|
|
13
|
+
fileCount: number;
|
|
14
|
+
durationMs: number;
|
|
15
|
+
};
|
|
16
|
+
}
|
|
17
|
+
export interface AnalyzerOptions {
|
|
18
|
+
format?: 'json' | 'markdown' | 'mermaid';
|
|
19
|
+
[key: string]: unknown;
|
|
20
|
+
}
|
|
21
|
+
export interface IAnalyzer<TOpts extends AnalyzerOptions = AnalyzerOptions> {
|
|
22
|
+
readonly name: string;
|
|
23
|
+
analyze(rootPath: string, options?: TOpts): Promise<AnalysisResult>;
|
|
24
|
+
}
|
|
25
|
+
export interface StructureAnalyzerOptions extends AnalyzerOptions {
|
|
26
|
+
format?: 'json' | 'markdown';
|
|
27
|
+
maxDepth?: number;
|
|
28
|
+
/** When true, only include source code and config files (no docs, images, fonts, etc.) */
|
|
29
|
+
sourceOnly?: boolean;
|
|
30
|
+
}
|
|
31
|
+
export interface DependencyAnalyzerOptions extends AnalyzerOptions {
|
|
32
|
+
format?: 'json' | 'markdown' | 'mermaid';
|
|
33
|
+
maxNodes?: number;
|
|
34
|
+
}
|
|
35
|
+
export interface SymbolAnalyzerOptions extends AnalyzerOptions {
|
|
36
|
+
filter?: string;
|
|
37
|
+
format?: 'json' | 'markdown';
|
|
38
|
+
}
|
|
39
|
+
export interface DiagramOptions extends AnalyzerOptions {
|
|
40
|
+
diagramType?: 'architecture' | 'dependencies';
|
|
41
|
+
scope?: string;
|
|
42
|
+
}
|
|
43
|
+
export interface TreeNode {
|
|
44
|
+
name: string;
|
|
45
|
+
type: 'file' | 'directory';
|
|
46
|
+
purpose?: string;
|
|
47
|
+
language?: string;
|
|
48
|
+
size?: number;
|
|
49
|
+
children?: TreeNode[];
|
|
50
|
+
}
|
|
51
|
+
export interface ProjectStats {
|
|
52
|
+
totalFiles: number;
|
|
53
|
+
totalSize: number;
|
|
54
|
+
languages: Record<string, number>;
|
|
55
|
+
}
|
|
56
|
+
export interface ImportInfo {
|
|
57
|
+
source: string;
|
|
58
|
+
specifiers: string[];
|
|
59
|
+
filePath: string;
|
|
60
|
+
isExternal: boolean;
|
|
61
|
+
/** Confidence level based on import resolution method */
|
|
62
|
+
confidence: 'high' | 'medium' | 'low';
|
|
63
|
+
}
|
|
64
|
+
export interface SymbolInfo {
|
|
65
|
+
name: string;
|
|
66
|
+
kind: 'function' | 'class' | 'interface' | 'type' | 'const' | 'enum' | 'variable' | 'method';
|
|
67
|
+
exported: boolean;
|
|
68
|
+
filePath: string;
|
|
69
|
+
line: number;
|
|
70
|
+
/** Full signature for functions/methods (parameter list + return type) */
|
|
71
|
+
signature?: string;
|
|
72
|
+
}
|
|
73
|
+
export interface PatternMatch {
|
|
74
|
+
pattern: string;
|
|
75
|
+
description: string;
|
|
76
|
+
locations: string[];
|
|
77
|
+
confidence: 'high' | 'medium' | 'low';
|
|
78
|
+
}
|
|
79
|
+
export interface EntryPoint {
|
|
80
|
+
name: string;
|
|
81
|
+
type: 'lambda-handler' | 'main' | 'bin' | 'server' | 'cli';
|
|
82
|
+
filePath: string;
|
|
83
|
+
trigger?: string;
|
|
84
|
+
}
|
|
85
|
+
export interface ExtractionBaselines {
|
|
86
|
+
structure?: AnalysisResult;
|
|
87
|
+
dependencies?: AnalysisResult;
|
|
88
|
+
symbols?: AnalysisResult;
|
|
89
|
+
patterns?: AnalysisResult;
|
|
90
|
+
entryPoints?: AnalysisResult;
|
|
91
|
+
diagrams?: AnalysisResult[];
|
|
92
|
+
}
|
|
93
|
+
export interface ExistingKnowledge {
|
|
94
|
+
categories: string[];
|
|
95
|
+
lastProduced: Record<string, string>;
|
|
96
|
+
documentCount: number;
|
|
97
|
+
}
|
|
98
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
//# sourceMappingURL=types.js.map
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Call-graph extractor using tree-sitter AST.
|
|
3
|
+
*
|
|
4
|
+
* Extracts function-level call sites from parsed code, producing
|
|
5
|
+
* caller→callee edges for blast-radius and dependency analysis.
|
|
6
|
+
*/
|
|
7
|
+
export interface CallEdge {
|
|
8
|
+
/** File containing the call */
|
|
9
|
+
callerFile: string;
|
|
10
|
+
/** Function/method making the call (or '<module>' for top-level) */
|
|
11
|
+
callerName: string;
|
|
12
|
+
/** Name of the function/method being called */
|
|
13
|
+
calleeName: string;
|
|
14
|
+
/** 1-based line number of the call site */
|
|
15
|
+
line: number;
|
|
16
|
+
}
|
|
17
|
+
/**
|
|
18
|
+
* Extract call edges from a source file using tree-sitter AST.
|
|
19
|
+
* Returns null if tree-sitter is unavailable or doesn't support the language.
|
|
20
|
+
*/
|
|
21
|
+
export declare function extractCallEdges(content: string, filePath: string): CallEdge[] | null;
|
|
22
|
+
//# sourceMappingURL=call-graph-extractor.d.ts.map
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
import { extname } from "node:path";
|
|
2
|
+
import { TreeSitterRuntime } from "./treesitter-chunker.js";
|
|
3
|
+
const FUNCTION_NODE_TYPES = /* @__PURE__ */ new Set([
|
|
4
|
+
// TS/JS
|
|
5
|
+
"function_declaration",
|
|
6
|
+
"method_definition",
|
|
7
|
+
"arrow_function",
|
|
8
|
+
// Python
|
|
9
|
+
"function_definition",
|
|
10
|
+
// Go
|
|
11
|
+
"function_declaration",
|
|
12
|
+
"method_declaration",
|
|
13
|
+
// Rust
|
|
14
|
+
"function_item",
|
|
15
|
+
// Java
|
|
16
|
+
"method_declaration",
|
|
17
|
+
"constructor_declaration"
|
|
18
|
+
]);
|
|
19
|
+
const CALL_NODE_TYPES = /* @__PURE__ */ new Set([
|
|
20
|
+
"call_expression",
|
|
21
|
+
// TS/JS/Go/Rust
|
|
22
|
+
"new_expression",
|
|
23
|
+
// TS/JS (new Foo())
|
|
24
|
+
"call"
|
|
25
|
+
// Python
|
|
26
|
+
]);
|
|
27
|
+
function extractCallEdges(content, filePath) {
|
|
28
|
+
const runtime = TreeSitterRuntime.get();
|
|
29
|
+
if (!runtime) return null;
|
|
30
|
+
const ext = extname(filePath).toLowerCase();
|
|
31
|
+
if (!runtime.hasLanguage(ext)) return null;
|
|
32
|
+
const tree = runtime.parse(content, ext);
|
|
33
|
+
if (!tree) return null;
|
|
34
|
+
const edges = [];
|
|
35
|
+
const rootNode = tree.rootNode;
|
|
36
|
+
walkNode(rootNode, filePath, "<module>", edges);
|
|
37
|
+
return edges;
|
|
38
|
+
}
|
|
39
|
+
function walkNode(node, filePath, currentScope, edges) {
|
|
40
|
+
if (!node) return;
|
|
41
|
+
let scope = currentScope;
|
|
42
|
+
if (FUNCTION_NODE_TYPES.has(node.type)) {
|
|
43
|
+
scope = extractFunctionName(node) ?? currentScope;
|
|
44
|
+
}
|
|
45
|
+
if (CALL_NODE_TYPES.has(node.type)) {
|
|
46
|
+
const callee = extractCalleeName(node);
|
|
47
|
+
if (callee) {
|
|
48
|
+
edges.push({
|
|
49
|
+
callerFile: filePath,
|
|
50
|
+
callerName: scope,
|
|
51
|
+
calleeName: callee,
|
|
52
|
+
line: (node.startPosition?.row ?? 0) + 1
|
|
53
|
+
});
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
for (let i = 0; i < (node.childCount ?? 0); i++) {
|
|
57
|
+
const child = node.child(i);
|
|
58
|
+
if (child) walkNode(child, filePath, scope, edges);
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
function extractFunctionName(node) {
|
|
62
|
+
for (let i = 0; i < (node.childCount ?? 0); i++) {
|
|
63
|
+
const child = node.child(i);
|
|
64
|
+
if (!child) continue;
|
|
65
|
+
if (child.type === "identifier" || child.type === "property_identifier" || child.type === "name") {
|
|
66
|
+
return child.text ?? null;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
return null;
|
|
70
|
+
}
|
|
71
|
+
function extractCalleeName(node) {
|
|
72
|
+
const fn = node.childForFieldName?.("function") ?? node.child(0);
|
|
73
|
+
if (!fn) return null;
|
|
74
|
+
if (fn.type === "identifier" || fn.type === "name") {
|
|
75
|
+
return fn.text ?? null;
|
|
76
|
+
}
|
|
77
|
+
if (fn.type === "member_expression" || fn.type === "attribute") {
|
|
78
|
+
const property = fn.childForFieldName?.("property") ?? fn.childForFieldName?.("attribute");
|
|
79
|
+
return property?.text ?? null;
|
|
80
|
+
}
|
|
81
|
+
if (node.type === "new_expression") {
|
|
82
|
+
const ctor = node.child(1);
|
|
83
|
+
return ctor?.text ?? null;
|
|
84
|
+
}
|
|
85
|
+
return null;
|
|
86
|
+
}
|
|
87
|
+
export {
|
|
88
|
+
extractCallEdges
|
|
89
|
+
};
|
|
90
|
+
//# sourceMappingURL=call-graph-extractor.js.map
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { IChunker } from './chunker.interface.js';
|
|
2
|
+
/**
|
|
3
|
+
* Create the appropriate chunker for a file extension.
|
|
4
|
+
* Prefers tree-sitter AST-based chunking when available; falls back to regex-based.
|
|
5
|
+
*/
|
|
6
|
+
export declare function createChunker(fileExtension: string): IChunker;
|
|
7
|
+
//# sourceMappingURL=chunker-factory.d.ts.map
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import { CodeChunker } from "./code-chunker.js";
|
|
2
|
+
import { GenericChunker } from "./generic-chunker.js";
|
|
3
|
+
import { MarkdownChunker } from "./markdown-chunker.js";
|
|
4
|
+
import { TreeSitterChunker, TreeSitterRuntime } from "./treesitter-chunker.js";
|
|
5
|
+
function createChunker(fileExtension) {
|
|
6
|
+
const ext = fileExtension.toLowerCase();
|
|
7
|
+
switch (ext) {
|
|
8
|
+
case ".md":
|
|
9
|
+
case ".mdx":
|
|
10
|
+
return new MarkdownChunker();
|
|
11
|
+
case ".ts":
|
|
12
|
+
case ".tsx":
|
|
13
|
+
case ".mts":
|
|
14
|
+
case ".cts":
|
|
15
|
+
case ".js":
|
|
16
|
+
case ".jsx":
|
|
17
|
+
case ".mjs":
|
|
18
|
+
case ".cjs":
|
|
19
|
+
case ".py":
|
|
20
|
+
case ".go":
|
|
21
|
+
case ".rs":
|
|
22
|
+
case ".java": {
|
|
23
|
+
const runtime = TreeSitterRuntime.get();
|
|
24
|
+
if (runtime?.hasLanguage(ext)) {
|
|
25
|
+
return new TreeSitterChunker(runtime);
|
|
26
|
+
}
|
|
27
|
+
return new CodeChunker();
|
|
28
|
+
}
|
|
29
|
+
default:
|
|
30
|
+
return new GenericChunker();
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
export {
|
|
34
|
+
createChunker
|
|
35
|
+
};
|
|
36
|
+
//# sourceMappingURL=chunker-factory.js.map
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import type { ChunkMetadata, RawChunk } from '@kb/core';
|
|
2
|
+
/**
|
|
3
|
+
* Interface for content chunkers.
|
|
4
|
+
* Each chunker splits a specific content type into searchable chunks.
|
|
5
|
+
*/
|
|
6
|
+
export interface IChunker {
|
|
7
|
+
/** Split content into chunks */
|
|
8
|
+
chunk(content: string, metadata: ChunkMetadata): RawChunk[];
|
|
9
|
+
}
|
|
10
|
+
//# sourceMappingURL=chunker.interface.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
//# sourceMappingURL=chunker.interface.js.map
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import type { ChunkMetadata, RawChunk } from '@kb/core';
|
|
2
|
+
import type { IChunker } from './chunker.interface.js';
|
|
3
|
+
export declare class CodeChunker implements IChunker {
|
|
4
|
+
private readonly maxChunkSize;
|
|
5
|
+
constructor(options?: {
|
|
6
|
+
maxChunkSize?: number;
|
|
7
|
+
});
|
|
8
|
+
chunk(content: string, metadata: ChunkMetadata): RawChunk[];
|
|
9
|
+
private findDeclarationBoundaries;
|
|
10
|
+
private fallbackChunk;
|
|
11
|
+
private splitByLines;
|
|
12
|
+
private getLineNumber;
|
|
13
|
+
}
|
|
14
|
+
//# sourceMappingURL=code-chunker.d.ts.map
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
import { CHUNK_SIZES } from "@kb/core";
|
|
2
|
+
class CodeChunker {
|
|
3
|
+
maxChunkSize;
|
|
4
|
+
constructor(options) {
|
|
5
|
+
this.maxChunkSize = options?.maxChunkSize ?? CHUNK_SIZES.code.max;
|
|
6
|
+
}
|
|
7
|
+
chunk(content, metadata) {
|
|
8
|
+
const boundaries = this.findDeclarationBoundaries(content);
|
|
9
|
+
if (boundaries.length === 0) {
|
|
10
|
+
return this.fallbackChunk(content, metadata);
|
|
11
|
+
}
|
|
12
|
+
const chunks = [];
|
|
13
|
+
for (let i = 0; i < boundaries.length; i++) {
|
|
14
|
+
const start = boundaries[i].offset;
|
|
15
|
+
const end = i + 1 < boundaries.length ? boundaries[i + 1].offset : content.length;
|
|
16
|
+
let text = content.slice(start, end).trim();
|
|
17
|
+
const header = `// File: ${metadata.sourcePath}
|
|
18
|
+
`;
|
|
19
|
+
text = header + text;
|
|
20
|
+
if (text.length > this.maxChunkSize) {
|
|
21
|
+
const subChunks = this.splitByLines(text, this.maxChunkSize);
|
|
22
|
+
let currentLine = this.getLineNumber(content, start);
|
|
23
|
+
for (const sub of subChunks) {
|
|
24
|
+
const subLines = sub.split("\n").length;
|
|
25
|
+
chunks.push({
|
|
26
|
+
text: sub,
|
|
27
|
+
sourcePath: metadata.sourcePath,
|
|
28
|
+
contentType: metadata.contentType,
|
|
29
|
+
chunkIndex: chunks.length,
|
|
30
|
+
totalChunks: 0,
|
|
31
|
+
startLine: currentLine,
|
|
32
|
+
endLine: currentLine + subLines - 1
|
|
33
|
+
});
|
|
34
|
+
currentLine += subLines;
|
|
35
|
+
}
|
|
36
|
+
} else {
|
|
37
|
+
const startLine = this.getLineNumber(content, start);
|
|
38
|
+
chunks.push({
|
|
39
|
+
text,
|
|
40
|
+
sourcePath: metadata.sourcePath,
|
|
41
|
+
contentType: metadata.contentType,
|
|
42
|
+
chunkIndex: chunks.length,
|
|
43
|
+
totalChunks: 0,
|
|
44
|
+
startLine,
|
|
45
|
+
endLine: startLine + text.split("\n").length - 1
|
|
46
|
+
});
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
if (boundaries[0].offset > 0) {
|
|
50
|
+
const preamble = content.slice(0, boundaries[0].offset).trim();
|
|
51
|
+
if (preamble.length > 0) {
|
|
52
|
+
chunks.unshift({
|
|
53
|
+
text: `// File: ${metadata.sourcePath}
|
|
54
|
+
${preamble}`,
|
|
55
|
+
sourcePath: metadata.sourcePath,
|
|
56
|
+
contentType: metadata.contentType,
|
|
57
|
+
chunkIndex: 0,
|
|
58
|
+
totalChunks: 0,
|
|
59
|
+
startLine: 1,
|
|
60
|
+
endLine: this.getLineNumber(content, boundaries[0].offset) - 1
|
|
61
|
+
});
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
return chunks.map((c, i) => ({ ...c, chunkIndex: i, totalChunks: chunks.length }));
|
|
65
|
+
}
|
|
66
|
+
findDeclarationBoundaries(content) {
|
|
67
|
+
const pattern = /^(?:export\s+)?(?:default\s+)?(?:async\s+)?(?:function|class|interface|type|const|enum|abstract\s+class)\s+(\w+)/gm;
|
|
68
|
+
const boundaries = [];
|
|
69
|
+
let match;
|
|
70
|
+
while ((match = pattern.exec(content)) !== null) {
|
|
71
|
+
const lineStart = content.lastIndexOf("\n", match.index - 1) + 1;
|
|
72
|
+
let actualStart = lineStart;
|
|
73
|
+
const beforeContent = content.slice(0, lineStart);
|
|
74
|
+
const beforeLines = beforeContent.split("\n");
|
|
75
|
+
let j = beforeLines.length - 1;
|
|
76
|
+
while (j >= 0) {
|
|
77
|
+
const line = beforeLines[j].trim();
|
|
78
|
+
if (line === "" || line.startsWith("//") || line.startsWith("*") || line.startsWith("/*") || line.startsWith("*/") || line.startsWith("@")) {
|
|
79
|
+
j--;
|
|
80
|
+
} else {
|
|
81
|
+
break;
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
if (j < beforeLines.length - 1) {
|
|
85
|
+
actualStart = beforeLines.slice(0, j + 1).join("\n").length + 1;
|
|
86
|
+
}
|
|
87
|
+
boundaries.push({
|
|
88
|
+
offset: actualStart,
|
|
89
|
+
name: match[1]
|
|
90
|
+
});
|
|
91
|
+
}
|
|
92
|
+
return boundaries;
|
|
93
|
+
}
|
|
94
|
+
fallbackChunk(content, metadata) {
|
|
95
|
+
const header = `// File: ${metadata.sourcePath}
|
|
96
|
+
`;
|
|
97
|
+
return [
|
|
98
|
+
{
|
|
99
|
+
text: header + content,
|
|
100
|
+
sourcePath: metadata.sourcePath,
|
|
101
|
+
contentType: metadata.contentType,
|
|
102
|
+
chunkIndex: 0,
|
|
103
|
+
totalChunks: 1,
|
|
104
|
+
startLine: 1,
|
|
105
|
+
endLine: content.split("\n").length
|
|
106
|
+
}
|
|
107
|
+
];
|
|
108
|
+
}
|
|
109
|
+
splitByLines(text, maxSize) {
|
|
110
|
+
const lines = text.split("\n");
|
|
111
|
+
const result = [];
|
|
112
|
+
let current = [];
|
|
113
|
+
let currentSize = 0;
|
|
114
|
+
for (const line of lines) {
|
|
115
|
+
if (currentSize + line.length + 1 > maxSize && current.length > 0) {
|
|
116
|
+
result.push(current.join("\n"));
|
|
117
|
+
current = [line];
|
|
118
|
+
currentSize = line.length;
|
|
119
|
+
} else {
|
|
120
|
+
current.push(line);
|
|
121
|
+
currentSize += line.length + 1;
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
if (current.length > 0) result.push(current.join("\n"));
|
|
125
|
+
return result;
|
|
126
|
+
}
|
|
127
|
+
getLineNumber(content, offset) {
|
|
128
|
+
return content.slice(0, offset).split("\n").length;
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
export {
|
|
132
|
+
CodeChunker
|
|
133
|
+
};
|
|
134
|
+
//# sourceMappingURL=code-chunker.js.map
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import type { ChunkMetadata, RawChunk } from '@kb/core';
|
|
2
|
+
import type { IChunker } from './chunker.interface.js';
|
|
3
|
+
export declare class GenericChunker implements IChunker {
|
|
4
|
+
private readonly maxChunkSize;
|
|
5
|
+
private readonly overlap;
|
|
6
|
+
constructor(options?: {
|
|
7
|
+
maxChunkSize?: number;
|
|
8
|
+
overlap?: number;
|
|
9
|
+
});
|
|
10
|
+
chunk(content: string, metadata: ChunkMetadata): RawChunk[];
|
|
11
|
+
}
|
|
12
|
+
//# sourceMappingURL=generic-chunker.d.ts.map
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
import { CHUNK_SIZES } from "@kb/core";
|
|
2
|
+
class GenericChunker {
|
|
3
|
+
maxChunkSize;
|
|
4
|
+
overlap;
|
|
5
|
+
constructor(options) {
|
|
6
|
+
this.maxChunkSize = options?.maxChunkSize ?? CHUNK_SIZES.default.max;
|
|
7
|
+
this.overlap = options?.overlap ?? CHUNK_SIZES.default.overlap;
|
|
8
|
+
}
|
|
9
|
+
chunk(content, metadata) {
|
|
10
|
+
if (content.length <= this.maxChunkSize) {
|
|
11
|
+
return [
|
|
12
|
+
{
|
|
13
|
+
text: content,
|
|
14
|
+
sourcePath: metadata.sourcePath,
|
|
15
|
+
contentType: metadata.contentType,
|
|
16
|
+
chunkIndex: 0,
|
|
17
|
+
totalChunks: 1,
|
|
18
|
+
startLine: 1,
|
|
19
|
+
endLine: content.split("\n").length
|
|
20
|
+
}
|
|
21
|
+
];
|
|
22
|
+
}
|
|
23
|
+
const lines = content.split("\n");
|
|
24
|
+
const chunks = [];
|
|
25
|
+
let currentLines = [];
|
|
26
|
+
let currentSize = 0;
|
|
27
|
+
let startLine = 1;
|
|
28
|
+
for (let i = 0; i < lines.length; i++) {
|
|
29
|
+
const line = lines[i];
|
|
30
|
+
if (currentSize + line.length + 1 > this.maxChunkSize && currentLines.length > 0) {
|
|
31
|
+
chunks.push({
|
|
32
|
+
text: currentLines.join("\n"),
|
|
33
|
+
sourcePath: metadata.sourcePath,
|
|
34
|
+
contentType: metadata.contentType,
|
|
35
|
+
chunkIndex: chunks.length,
|
|
36
|
+
totalChunks: 0,
|
|
37
|
+
startLine,
|
|
38
|
+
endLine: startLine + currentLines.length - 1
|
|
39
|
+
});
|
|
40
|
+
const overlapLines = [];
|
|
41
|
+
let overlapSize = 0;
|
|
42
|
+
for (let j = currentLines.length - 1; j >= 0; j--) {
|
|
43
|
+
if (overlapSize + currentLines[j].length + 1 > this.overlap) break;
|
|
44
|
+
overlapLines.unshift(currentLines[j]);
|
|
45
|
+
overlapSize += currentLines[j].length + 1;
|
|
46
|
+
}
|
|
47
|
+
startLine = startLine + currentLines.length - overlapLines.length;
|
|
48
|
+
currentLines = [...overlapLines, line];
|
|
49
|
+
currentSize = overlapSize + line.length + 1;
|
|
50
|
+
} else {
|
|
51
|
+
currentLines.push(line);
|
|
52
|
+
currentSize += line.length + 1;
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
if (currentLines.length > 0) {
|
|
56
|
+
chunks.push({
|
|
57
|
+
text: currentLines.join("\n"),
|
|
58
|
+
sourcePath: metadata.sourcePath,
|
|
59
|
+
contentType: metadata.contentType,
|
|
60
|
+
chunkIndex: chunks.length,
|
|
61
|
+
totalChunks: 0,
|
|
62
|
+
startLine,
|
|
63
|
+
endLine: startLine + currentLines.length - 1
|
|
64
|
+
});
|
|
65
|
+
}
|
|
66
|
+
return chunks.map((c, i) => ({ ...c, chunkIndex: i, totalChunks: chunks.length }));
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
export {
|
|
70
|
+
GenericChunker
|
|
71
|
+
};
|
|
72
|
+
//# sourceMappingURL=generic-chunker.js.map
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
export { type CallEdge, extractCallEdges } from './call-graph-extractor.js';
|
|
2
|
+
export type { IChunker } from './chunker.interface.js';
|
|
3
|
+
export { createChunker } from './chunker-factory.js';
|
|
4
|
+
export { CodeChunker } from './code-chunker.js';
|
|
5
|
+
export { GenericChunker } from './generic-chunker.js';
|
|
6
|
+
export { MarkdownChunker } from './markdown-chunker.js';
|
|
7
|
+
export { initializeTreeSitter, TreeSitterChunker, TreeSitterRuntime, } from './treesitter-chunker.js';
|
|
8
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import { extractCallEdges } from "./call-graph-extractor.js";
|
|
2
|
+
import { createChunker } from "./chunker-factory.js";
|
|
3
|
+
import { CodeChunker } from "./code-chunker.js";
|
|
4
|
+
import { GenericChunker } from "./generic-chunker.js";
|
|
5
|
+
import { MarkdownChunker } from "./markdown-chunker.js";
|
|
6
|
+
import {
|
|
7
|
+
initializeTreeSitter,
|
|
8
|
+
TreeSitterChunker,
|
|
9
|
+
TreeSitterRuntime
|
|
10
|
+
} from "./treesitter-chunker.js";
|
|
11
|
+
export {
|
|
12
|
+
CodeChunker,
|
|
13
|
+
GenericChunker,
|
|
14
|
+
MarkdownChunker,
|
|
15
|
+
TreeSitterChunker,
|
|
16
|
+
TreeSitterRuntime,
|
|
17
|
+
createChunker,
|
|
18
|
+
extractCallEdges,
|
|
19
|
+
initializeTreeSitter
|
|
20
|
+
};
|
|
21
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import type { ChunkMetadata, RawChunk } from '@kb/core';
|
|
2
|
+
import type { IChunker } from './chunker.interface.js';
|
|
3
|
+
export declare class MarkdownChunker implements IChunker {
|
|
4
|
+
private readonly maxChunkSize;
|
|
5
|
+
private readonly minChunkSize;
|
|
6
|
+
constructor(options?: {
|
|
7
|
+
maxChunkSize?: number;
|
|
8
|
+
minChunkSize?: number;
|
|
9
|
+
});
|
|
10
|
+
chunk(content: string, metadata: ChunkMetadata): RawChunk[];
|
|
11
|
+
private splitByHeadings;
|
|
12
|
+
private splitByParagraphs;
|
|
13
|
+
}
|
|
14
|
+
//# sourceMappingURL=markdown-chunker.d.ts.map
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
import { CHUNK_SIZES } from "@kb/core";
|
|
2
|
+
class MarkdownChunker {
|
|
3
|
+
maxChunkSize;
|
|
4
|
+
minChunkSize;
|
|
5
|
+
constructor(options) {
|
|
6
|
+
this.maxChunkSize = options?.maxChunkSize ?? CHUNK_SIZES.markdown.max;
|
|
7
|
+
this.minChunkSize = options?.minChunkSize ?? CHUNK_SIZES.markdown.min;
|
|
8
|
+
}
|
|
9
|
+
chunk(content, metadata) {
|
|
10
|
+
const sections = this.splitByHeadings(content);
|
|
11
|
+
const chunks = [];
|
|
12
|
+
for (const section of sections) {
|
|
13
|
+
if (section.text.trim().length < this.minChunkSize) {
|
|
14
|
+
if (chunks.length > 0) {
|
|
15
|
+
const prev = chunks[chunks.length - 1];
|
|
16
|
+
prev.text += `
|
|
17
|
+
|
|
18
|
+
${section.text}`;
|
|
19
|
+
prev.endLine = section.endLine;
|
|
20
|
+
continue;
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
if (section.text.length > this.maxChunkSize) {
|
|
24
|
+
const subTexts = this.splitByParagraphs(section.text, this.maxChunkSize);
|
|
25
|
+
let currentLine = section.startLine;
|
|
26
|
+
for (const sub of subTexts) {
|
|
27
|
+
const subLines = sub.split("\n").length;
|
|
28
|
+
chunks.push({
|
|
29
|
+
text: sub,
|
|
30
|
+
sourcePath: metadata.sourcePath,
|
|
31
|
+
contentType: metadata.contentType,
|
|
32
|
+
headingPath: section.headingPath,
|
|
33
|
+
chunkIndex: chunks.length,
|
|
34
|
+
totalChunks: 0,
|
|
35
|
+
// will be set below
|
|
36
|
+
startLine: currentLine,
|
|
37
|
+
endLine: currentLine + subLines - 1
|
|
38
|
+
});
|
|
39
|
+
currentLine += subLines;
|
|
40
|
+
}
|
|
41
|
+
} else {
|
|
42
|
+
chunks.push({
|
|
43
|
+
text: section.text,
|
|
44
|
+
sourcePath: metadata.sourcePath,
|
|
45
|
+
contentType: metadata.contentType,
|
|
46
|
+
headingPath: section.headingPath,
|
|
47
|
+
chunkIndex: chunks.length,
|
|
48
|
+
totalChunks: 0,
|
|
49
|
+
startLine: section.startLine,
|
|
50
|
+
endLine: section.endLine
|
|
51
|
+
});
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
return chunks.map((c, i) => ({ ...c, chunkIndex: i, totalChunks: chunks.length }));
|
|
55
|
+
}
|
|
56
|
+
splitByHeadings(content) {
|
|
57
|
+
const lines = content.split("\n");
|
|
58
|
+
const sections = [];
|
|
59
|
+
let currentSection = null;
|
|
60
|
+
const headingStack = [];
|
|
61
|
+
let inFencedCodeBlock = false;
|
|
62
|
+
for (let i = 0; i < lines.length; i++) {
|
|
63
|
+
if (/^```/.test(lines[i])) {
|
|
64
|
+
inFencedCodeBlock = !inFencedCodeBlock;
|
|
65
|
+
}
|
|
66
|
+
const match = !inFencedCodeBlock ? lines[i].match(/^(#{1,6})\s+(.+)/) : null;
|
|
67
|
+
if (match) {
|
|
68
|
+
if (currentSection) {
|
|
69
|
+
sections.push(currentSection);
|
|
70
|
+
}
|
|
71
|
+
const level = match[1].length;
|
|
72
|
+
const title = match[2].trim();
|
|
73
|
+
while (headingStack.length >= level) {
|
|
74
|
+
headingStack.pop();
|
|
75
|
+
}
|
|
76
|
+
headingStack.push(`${"#".repeat(level)} ${title}`);
|
|
77
|
+
currentSection = {
|
|
78
|
+
text: lines[i],
|
|
79
|
+
headingPath: headingStack.join(" > "),
|
|
80
|
+
startLine: i + 1,
|
|
81
|
+
endLine: i + 1
|
|
82
|
+
};
|
|
83
|
+
} else if (currentSection) {
|
|
84
|
+
currentSection.text += `
|
|
85
|
+
${lines[i]}`;
|
|
86
|
+
currentSection.endLine = i + 1;
|
|
87
|
+
} else {
|
|
88
|
+
currentSection = {
|
|
89
|
+
text: lines[i],
|
|
90
|
+
headingPath: "(intro)",
|
|
91
|
+
startLine: i + 1,
|
|
92
|
+
endLine: i + 1
|
|
93
|
+
};
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
if (currentSection) sections.push(currentSection);
|
|
97
|
+
return sections;
|
|
98
|
+
}
|
|
99
|
+
splitByParagraphs(text, maxSize) {
|
|
100
|
+
const paragraphs = text.split(/\n\n+/);
|
|
101
|
+
const result = [];
|
|
102
|
+
let current = "";
|
|
103
|
+
for (const para of paragraphs) {
|
|
104
|
+
if (`${current}
|
|
105
|
+
|
|
106
|
+
${para}`.length > maxSize && current.length > 0) {
|
|
107
|
+
result.push(current.trim());
|
|
108
|
+
current = para;
|
|
109
|
+
} else {
|
|
110
|
+
current = current ? `${current}
|
|
111
|
+
|
|
112
|
+
${para}` : para;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
if (current.trim()) result.push(current.trim());
|
|
116
|
+
return result;
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
export {
|
|
120
|
+
MarkdownChunker
|
|
121
|
+
};
|
|
122
|
+
//# sourceMappingURL=markdown-chunker.js.map
|