viberag 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +661 -0
- package/README.md +219 -0
- package/dist/cli/__tests__/mcp-setup.test.d.ts +6 -0
- package/dist/cli/__tests__/mcp-setup.test.js +597 -0
- package/dist/cli/app.d.ts +2 -0
- package/dist/cli/app.js +238 -0
- package/dist/cli/commands/handlers.d.ts +57 -0
- package/dist/cli/commands/handlers.js +231 -0
- package/dist/cli/commands/index.d.ts +2 -0
- package/dist/cli/commands/index.js +2 -0
- package/dist/cli/commands/mcp-setup.d.ts +107 -0
- package/dist/cli/commands/mcp-setup.js +509 -0
- package/dist/cli/commands/useRagCommands.d.ts +23 -0
- package/dist/cli/commands/useRagCommands.js +180 -0
- package/dist/cli/components/CleanWizard.d.ts +17 -0
- package/dist/cli/components/CleanWizard.js +169 -0
- package/dist/cli/components/InitWizard.d.ts +20 -0
- package/dist/cli/components/InitWizard.js +370 -0
- package/dist/cli/components/McpSetupWizard.d.ts +37 -0
- package/dist/cli/components/McpSetupWizard.js +387 -0
- package/dist/cli/components/SearchResultsDisplay.d.ts +13 -0
- package/dist/cli/components/SearchResultsDisplay.js +130 -0
- package/dist/cli/components/WelcomeBanner.d.ts +10 -0
- package/dist/cli/components/WelcomeBanner.js +26 -0
- package/dist/cli/components/index.d.ts +1 -0
- package/dist/cli/components/index.js +1 -0
- package/dist/cli/data/mcp-editors.d.ts +80 -0
- package/dist/cli/data/mcp-editors.js +270 -0
- package/dist/cli/index.d.ts +2 -0
- package/dist/cli/index.js +26 -0
- package/dist/cli-bundle.cjs +5269 -0
- package/dist/common/commands/terminalSetup.d.ts +2 -0
- package/dist/common/commands/terminalSetup.js +144 -0
- package/dist/common/components/CommandSuggestions.d.ts +9 -0
- package/dist/common/components/CommandSuggestions.js +20 -0
- package/dist/common/components/StaticWithResize.d.ts +23 -0
- package/dist/common/components/StaticWithResize.js +62 -0
- package/dist/common/components/StatusBar.d.ts +8 -0
- package/dist/common/components/StatusBar.js +64 -0
- package/dist/common/components/TextInput.d.ts +12 -0
- package/dist/common/components/TextInput.js +239 -0
- package/dist/common/components/index.d.ts +3 -0
- package/dist/common/components/index.js +3 -0
- package/dist/common/hooks/index.d.ts +4 -0
- package/dist/common/hooks/index.js +4 -0
- package/dist/common/hooks/useCommandHistory.d.ts +7 -0
- package/dist/common/hooks/useCommandHistory.js +51 -0
- package/dist/common/hooks/useCtrlC.d.ts +9 -0
- package/dist/common/hooks/useCtrlC.js +40 -0
- package/dist/common/hooks/useKittyKeyboard.d.ts +10 -0
- package/dist/common/hooks/useKittyKeyboard.js +26 -0
- package/dist/common/hooks/useStaticOutputBuffer.d.ts +31 -0
- package/dist/common/hooks/useStaticOutputBuffer.js +58 -0
- package/dist/common/hooks/useTerminalResize.d.ts +28 -0
- package/dist/common/hooks/useTerminalResize.js +51 -0
- package/dist/common/hooks/useTextBuffer.d.ts +13 -0
- package/dist/common/hooks/useTextBuffer.js +165 -0
- package/dist/common/index.d.ts +13 -0
- package/dist/common/index.js +17 -0
- package/dist/common/types.d.ts +162 -0
- package/dist/common/types.js +1 -0
- package/dist/mcp/index.d.ts +12 -0
- package/dist/mcp/index.js +66 -0
- package/dist/mcp/server.d.ts +25 -0
- package/dist/mcp/server.js +837 -0
- package/dist/mcp/watcher.d.ts +86 -0
- package/dist/mcp/watcher.js +334 -0
- package/dist/rag/__tests__/grammar-smoke.test.d.ts +9 -0
- package/dist/rag/__tests__/grammar-smoke.test.js +161 -0
- package/dist/rag/__tests__/helpers.d.ts +30 -0
- package/dist/rag/__tests__/helpers.js +67 -0
- package/dist/rag/__tests__/merkle.test.d.ts +5 -0
- package/dist/rag/__tests__/merkle.test.js +161 -0
- package/dist/rag/__tests__/metadata-extraction.test.d.ts +10 -0
- package/dist/rag/__tests__/metadata-extraction.test.js +202 -0
- package/dist/rag/__tests__/multi-language.test.d.ts +13 -0
- package/dist/rag/__tests__/multi-language.test.js +535 -0
- package/dist/rag/__tests__/rag.test.d.ts +10 -0
- package/dist/rag/__tests__/rag.test.js +311 -0
- package/dist/rag/__tests__/search-exhaustive.test.d.ts +9 -0
- package/dist/rag/__tests__/search-exhaustive.test.js +87 -0
- package/dist/rag/__tests__/search-filters.test.d.ts +10 -0
- package/dist/rag/__tests__/search-filters.test.js +250 -0
- package/dist/rag/__tests__/search-modes.test.d.ts +8 -0
- package/dist/rag/__tests__/search-modes.test.js +133 -0
- package/dist/rag/config/index.d.ts +61 -0
- package/dist/rag/config/index.js +111 -0
- package/dist/rag/constants.d.ts +41 -0
- package/dist/rag/constants.js +57 -0
- package/dist/rag/embeddings/fastembed.d.ts +62 -0
- package/dist/rag/embeddings/fastembed.js +124 -0
- package/dist/rag/embeddings/gemini.d.ts +26 -0
- package/dist/rag/embeddings/gemini.js +116 -0
- package/dist/rag/embeddings/index.d.ts +10 -0
- package/dist/rag/embeddings/index.js +9 -0
- package/dist/rag/embeddings/local-4b.d.ts +28 -0
- package/dist/rag/embeddings/local-4b.js +51 -0
- package/dist/rag/embeddings/local.d.ts +29 -0
- package/dist/rag/embeddings/local.js +119 -0
- package/dist/rag/embeddings/mistral.d.ts +22 -0
- package/dist/rag/embeddings/mistral.js +85 -0
- package/dist/rag/embeddings/openai.d.ts +22 -0
- package/dist/rag/embeddings/openai.js +85 -0
- package/dist/rag/embeddings/types.d.ts +37 -0
- package/dist/rag/embeddings/types.js +1 -0
- package/dist/rag/gitignore/index.d.ts +57 -0
- package/dist/rag/gitignore/index.js +178 -0
- package/dist/rag/index.d.ts +15 -0
- package/dist/rag/index.js +25 -0
- package/dist/rag/indexer/chunker.d.ts +129 -0
- package/dist/rag/indexer/chunker.js +1352 -0
- package/dist/rag/indexer/index.d.ts +6 -0
- package/dist/rag/indexer/index.js +6 -0
- package/dist/rag/indexer/indexer.d.ts +73 -0
- package/dist/rag/indexer/indexer.js +356 -0
- package/dist/rag/indexer/types.d.ts +68 -0
- package/dist/rag/indexer/types.js +47 -0
- package/dist/rag/logger/index.d.ts +20 -0
- package/dist/rag/logger/index.js +75 -0
- package/dist/rag/manifest/index.d.ts +50 -0
- package/dist/rag/manifest/index.js +97 -0
- package/dist/rag/merkle/diff.d.ts +26 -0
- package/dist/rag/merkle/diff.js +95 -0
- package/dist/rag/merkle/hash.d.ts +34 -0
- package/dist/rag/merkle/hash.js +165 -0
- package/dist/rag/merkle/index.d.ts +68 -0
- package/dist/rag/merkle/index.js +298 -0
- package/dist/rag/merkle/node.d.ts +51 -0
- package/dist/rag/merkle/node.js +69 -0
- package/dist/rag/search/filters.d.ts +21 -0
- package/dist/rag/search/filters.js +100 -0
- package/dist/rag/search/fts.d.ts +32 -0
- package/dist/rag/search/fts.js +61 -0
- package/dist/rag/search/hybrid.d.ts +17 -0
- package/dist/rag/search/hybrid.js +58 -0
- package/dist/rag/search/index.d.ts +89 -0
- package/dist/rag/search/index.js +367 -0
- package/dist/rag/search/types.d.ts +130 -0
- package/dist/rag/search/types.js +4 -0
- package/dist/rag/search/vector.d.ts +25 -0
- package/dist/rag/search/vector.js +44 -0
- package/dist/rag/storage/index.d.ts +92 -0
- package/dist/rag/storage/index.js +287 -0
- package/dist/rag/storage/lancedb-native.d.ts +7 -0
- package/dist/rag/storage/lancedb-native.js +10 -0
- package/dist/rag/storage/schema.d.ts +23 -0
- package/dist/rag/storage/schema.js +50 -0
- package/dist/rag/storage/types.d.ts +100 -0
- package/dist/rag/storage/types.js +68 -0
- package/package.json +67 -0
- package/scripts/check-node-version.js +37 -0
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Gitignore-based file filtering.
|
|
3
|
+
*
|
|
4
|
+
* Uses the `ignore` package to parse .gitignore files and filter paths.
|
|
5
|
+
* This replaces the hardcoded excludePatterns approach.
|
|
6
|
+
*/
|
|
7
|
+
import fs from 'node:fs/promises';
|
|
8
|
+
import path from 'node:path';
|
|
9
|
+
import { createRequire } from 'node:module';
|
|
10
|
+
// ignore is a CJS module, use createRequire to import it
|
|
11
|
+
const require = createRequire(import.meta.url);
|
|
12
|
+
const ignore = require('ignore');
|
|
13
|
+
/**
|
|
14
|
+
* Patterns that should always be ignored, regardless of .gitignore.
|
|
15
|
+
* These are internal/system directories that should never be indexed.
|
|
16
|
+
*/
|
|
17
|
+
const ALWAYS_IGNORED = [
|
|
18
|
+
'.git',
|
|
19
|
+
'.viberag',
|
|
20
|
+
'node_modules', // Fallback in case not in .gitignore
|
|
21
|
+
];
|
|
22
|
+
/**
|
|
23
|
+
* Cache of Ignore instances per project root.
|
|
24
|
+
*/
|
|
25
|
+
const ignoreCache = new Map();
|
|
26
|
+
/**
|
|
27
|
+
* Load and parse .gitignore file from project root.
|
|
28
|
+
* Returns an Ignore instance that can filter paths.
|
|
29
|
+
*
|
|
30
|
+
* @param projectRoot - Project root directory
|
|
31
|
+
* @returns Ignore instance for filtering
|
|
32
|
+
*/
|
|
33
|
+
export async function loadGitignore(projectRoot) {
|
|
34
|
+
// Check cache first
|
|
35
|
+
const cached = ignoreCache.get(projectRoot);
|
|
36
|
+
if (cached) {
|
|
37
|
+
return cached;
|
|
38
|
+
}
|
|
39
|
+
const ig = ignore();
|
|
40
|
+
// Add always-ignored patterns
|
|
41
|
+
ig.add(ALWAYS_IGNORED);
|
|
42
|
+
// Try to load .gitignore
|
|
43
|
+
const gitignorePath = path.join(projectRoot, '.gitignore');
|
|
44
|
+
try {
|
|
45
|
+
const content = await fs.readFile(gitignorePath, 'utf-8');
|
|
46
|
+
ig.add(content);
|
|
47
|
+
}
|
|
48
|
+
catch {
|
|
49
|
+
// .gitignore doesn't exist, that's fine
|
|
50
|
+
}
|
|
51
|
+
// Cache the instance
|
|
52
|
+
ignoreCache.set(projectRoot, ig);
|
|
53
|
+
return ig;
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Check if a path should be ignored based on .gitignore rules.
|
|
57
|
+
*
|
|
58
|
+
* @param projectRoot - Project root directory
|
|
59
|
+
* @param relativePath - Path relative to project root
|
|
60
|
+
* @returns true if the path should be ignored
|
|
61
|
+
*/
|
|
62
|
+
export async function shouldIgnore(projectRoot, relativePath) {
|
|
63
|
+
const ig = await loadGitignore(projectRoot);
|
|
64
|
+
return ig.ignores(relativePath);
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* Create a filter function for use with file listing.
|
|
68
|
+
* The filter returns true for files that should be INCLUDED (not ignored).
|
|
69
|
+
*
|
|
70
|
+
* @param projectRoot - Project root directory
|
|
71
|
+
* @returns Filter function that returns true for non-ignored files
|
|
72
|
+
*/
|
|
73
|
+
export async function createGitignoreFilter(projectRoot) {
|
|
74
|
+
const ig = await loadGitignore(projectRoot);
|
|
75
|
+
return (relativePath) => !ig.ignores(relativePath);
|
|
76
|
+
}
|
|
77
|
+
/**
|
|
78
|
+
* Clear the cache for a specific project root.
|
|
79
|
+
* Call this if .gitignore has been modified.
|
|
80
|
+
*
|
|
81
|
+
* @param projectRoot - Project root directory
|
|
82
|
+
*/
|
|
83
|
+
export function clearGitignoreCache(projectRoot) {
|
|
84
|
+
ignoreCache.delete(projectRoot);
|
|
85
|
+
}
|
|
86
|
+
/**
|
|
87
|
+
* Clear all cached Ignore instances.
|
|
88
|
+
*/
|
|
89
|
+
export function clearAllGitignoreCache() {
|
|
90
|
+
ignoreCache.clear();
|
|
91
|
+
}
|
|
92
|
+
/**
|
|
93
|
+
* Convert gitignore patterns to fast-glob ignore patterns.
|
|
94
|
+
* This allows fast-glob to skip directories upfront instead of
|
|
95
|
+
* scanning them and filtering later.
|
|
96
|
+
*
|
|
97
|
+
* @param projectRoot - Project root directory
|
|
98
|
+
* @returns Array of fast-glob compatible ignore patterns
|
|
99
|
+
*/
|
|
100
|
+
export async function getGlobIgnorePatterns(projectRoot) {
|
|
101
|
+
const patterns = [];
|
|
102
|
+
// Always exclude these (same as ALWAYS_IGNORED)
|
|
103
|
+
patterns.push('**/.git/**', '**/.viberag/**', '**/node_modules/**');
|
|
104
|
+
// Try to load .gitignore
|
|
105
|
+
const gitignorePath = path.join(projectRoot, '.gitignore');
|
|
106
|
+
try {
|
|
107
|
+
const content = await fs.readFile(gitignorePath, 'utf-8');
|
|
108
|
+
const lines = content.split('\n');
|
|
109
|
+
for (const line of lines) {
|
|
110
|
+
const trimmed = line.trim();
|
|
111
|
+
// Skip empty lines and comments
|
|
112
|
+
if (!trimmed || trimmed.startsWith('#')) {
|
|
113
|
+
continue;
|
|
114
|
+
}
|
|
115
|
+
// Skip negation patterns (fast-glob handles these differently)
|
|
116
|
+
if (trimmed.startsWith('!')) {
|
|
117
|
+
continue;
|
|
118
|
+
}
|
|
119
|
+
// Convert gitignore pattern to fast-glob pattern
|
|
120
|
+
const globPattern = gitignoreToGlob(trimmed);
|
|
121
|
+
if (globPattern) {
|
|
122
|
+
patterns.push(globPattern);
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
catch {
|
|
127
|
+
// .gitignore doesn't exist, that's fine
|
|
128
|
+
}
|
|
129
|
+
return patterns;
|
|
130
|
+
}
|
|
131
|
+
/**
|
|
132
|
+
* Convert a single gitignore pattern to a fast-glob pattern.
|
|
133
|
+
*
|
|
134
|
+
* Gitignore patterns:
|
|
135
|
+
* - `foo` matches `foo` anywhere
|
|
136
|
+
* - `foo/` matches directory `foo` anywhere
|
|
137
|
+
* - `/foo` matches `foo` only at root
|
|
138
|
+
* - `*.log` matches `*.log` anywhere
|
|
139
|
+
*
|
|
140
|
+
* Fast-glob patterns:
|
|
141
|
+
* - Need `**/ ` prefix to match anywhere
|
|
142
|
+
* - Need `; /**` suffix to match directory contents
|
|
143
|
+
*/
|
|
144
|
+
function gitignoreToGlob(pattern) {
|
|
145
|
+
let result = pattern;
|
|
146
|
+
// Handle rooted patterns (start with /)
|
|
147
|
+
const isRooted = result.startsWith('/');
|
|
148
|
+
if (isRooted) {
|
|
149
|
+
result = result.slice(1);
|
|
150
|
+
}
|
|
151
|
+
// Handle directory patterns (end with /)
|
|
152
|
+
const isDirectory = result.endsWith('/');
|
|
153
|
+
if (isDirectory) {
|
|
154
|
+
result = result.slice(0, -1);
|
|
155
|
+
}
|
|
156
|
+
// Skip patterns that are already glob-like with **
|
|
157
|
+
const hasDoublestar = result.includes('**');
|
|
158
|
+
// Build the glob pattern
|
|
159
|
+
if (isRooted) {
|
|
160
|
+
// Rooted: match only at project root
|
|
161
|
+
result = isDirectory ? `${result}/**` : result;
|
|
162
|
+
}
|
|
163
|
+
else if (!hasDoublestar) {
|
|
164
|
+
// Non-rooted: match anywhere in tree
|
|
165
|
+
result = isDirectory ? `**/${result}/**` : `**/${result}`;
|
|
166
|
+
// If it doesn't look like a directory name (has extension or glob),
|
|
167
|
+
// don't add trailing /**
|
|
168
|
+
if (!isDirectory &&
|
|
169
|
+
(result.includes('.') || result.includes('*') || result.includes('?'))) {
|
|
170
|
+
// Keep as-is, it's likely a file pattern
|
|
171
|
+
}
|
|
172
|
+
else if (!isDirectory) {
|
|
173
|
+
// Bare name like "node_modules" - treat as directory
|
|
174
|
+
result = `**/${pattern}/**`;
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
return result;
|
|
178
|
+
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* RAG Engine Core
|
|
3
|
+
*
|
|
4
|
+
* Local codebase indexing with hybrid search (vector + BM25).
|
|
5
|
+
*/
|
|
6
|
+
export { VIBERAG_DIR, getViberagDir, getConfigPath, getManifestPath, getLanceDbPath, getLogsDir, TABLE_NAMES, EXTENSION_TO_LANGUAGE, DEFAULT_EMBEDDING_DIMENSIONS, } from './constants.js';
|
|
7
|
+
export { createLogger, createNullLogger, getLogPath, type Logger, type LogLevel, } from './logger/index.js';
|
|
8
|
+
export { loadConfig, saveConfig, configExists, DEFAULT_CONFIG, DEFAULT_WATCH_CONFIG, PROVIDER_CONFIGS, type ViberagConfig, type WatchConfig, type EmbeddingProviderType, } from './config/index.js';
|
|
9
|
+
export { loadManifest, saveManifest, manifestExists, createEmptyManifest, updateManifestStats, updateManifestTree, isSchemaVersionCurrent, getSchemaVersionInfo, type Manifest, type ManifestStats, } from './manifest/index.js';
|
|
10
|
+
export { Storage, SCHEMA_VERSION, createCodeChunksSchema, createEmbeddingCacheSchema, chunkToRow, rowToChunk, embeddingToRow, rowToEmbedding, type CodeChunk, type CodeChunkRow, type CachedEmbedding, type CachedEmbeddingRow, type ChunkType, } from './storage/index.js';
|
|
11
|
+
export { MerkleTree, compareTrees, createEmptyDiff, computeFileHash, computeStringHash, computeDirectoryHash, isBinaryFile, shouldExclude, hasValidExtension, serializeNode, deserializeNode, createFileNode, createDirectoryNode, type MerkleNode, type NodeType, type SerializedNode, type TreeDiff, type BuildStats, } from './merkle/index.js';
|
|
12
|
+
export { Chunker, Indexer, createEmptyIndexStats, type Chunk, type IndexOptions, type IndexStats, type ProgressCallback, type SupportedLanguage, } from './indexer/index.js';
|
|
13
|
+
export { GeminiEmbeddingProvider, MistralEmbeddingProvider, OpenAIEmbeddingProvider, type EmbeddingProvider, } from './embeddings/index.js';
|
|
14
|
+
export { SearchEngine, vectorSearch, ftsSearch, ensureFtsIndex, hybridRerank, type SearchFilters, type SearchMode, type SearchOptions, type SearchResult, type SearchResults, } from './search/index.js';
|
|
15
|
+
export { loadGitignore, shouldIgnore, createGitignoreFilter, clearGitignoreCache, clearAllGitignoreCache, } from './gitignore/index.js';
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* RAG Engine Core
|
|
3
|
+
*
|
|
4
|
+
* Local codebase indexing with hybrid search (vector + BM25).
|
|
5
|
+
*/
|
|
6
|
+
// Constants
|
|
7
|
+
export { VIBERAG_DIR, getViberagDir, getConfigPath, getManifestPath, getLanceDbPath, getLogsDir, TABLE_NAMES, EXTENSION_TO_LANGUAGE, DEFAULT_EMBEDDING_DIMENSIONS, } from './constants.js';
|
|
8
|
+
// Logger
|
|
9
|
+
export { createLogger, createNullLogger, getLogPath, } from './logger/index.js';
|
|
10
|
+
// Config
|
|
11
|
+
export { loadConfig, saveConfig, configExists, DEFAULT_CONFIG, DEFAULT_WATCH_CONFIG, PROVIDER_CONFIGS, } from './config/index.js';
|
|
12
|
+
// Manifest
|
|
13
|
+
export { loadManifest, saveManifest, manifestExists, createEmptyManifest, updateManifestStats, updateManifestTree, isSchemaVersionCurrent, getSchemaVersionInfo, } from './manifest/index.js';
|
|
14
|
+
// Storage
|
|
15
|
+
export { Storage, SCHEMA_VERSION, createCodeChunksSchema, createEmbeddingCacheSchema, chunkToRow, rowToChunk, embeddingToRow, rowToEmbedding, } from './storage/index.js';
|
|
16
|
+
// Merkle Tree
|
|
17
|
+
export { MerkleTree, compareTrees, createEmptyDiff, computeFileHash, computeStringHash, computeDirectoryHash, isBinaryFile, shouldExclude, hasValidExtension, serializeNode, deserializeNode, createFileNode, createDirectoryNode, } from './merkle/index.js';
|
|
18
|
+
// Indexer (Chunking & Orchestration)
|
|
19
|
+
export { Chunker, Indexer, createEmptyIndexStats, } from './indexer/index.js';
|
|
20
|
+
// Embeddings
|
|
21
|
+
export { GeminiEmbeddingProvider, MistralEmbeddingProvider, OpenAIEmbeddingProvider, } from './embeddings/index.js';
|
|
22
|
+
// Search
|
|
23
|
+
export { SearchEngine, vectorSearch, ftsSearch, ensureFtsIndex, hybridRerank, } from './search/index.js';
|
|
24
|
+
// Gitignore
|
|
25
|
+
export { loadGitignore, shouldIgnore, createGitignoreFilter, clearGitignoreCache, clearAllGitignoreCache, } from './gitignore/index.js';
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
import { type Chunk, type SupportedLanguage } from './types.js';
|
|
2
|
+
/**
|
|
3
|
+
* Chunker that uses web-tree-sitter (WASM) to extract semantic code chunks.
|
|
4
|
+
* Provides 100% platform compatibility - no native compilation required.
|
|
5
|
+
*/
|
|
6
|
+
export declare class Chunker {
|
|
7
|
+
private parser;
|
|
8
|
+
private languages;
|
|
9
|
+
private initialized;
|
|
10
|
+
private wasmBasePath;
|
|
11
|
+
constructor();
|
|
12
|
+
/**
|
|
13
|
+
* Initialize web-tree-sitter and load language grammars.
|
|
14
|
+
* Must be called before using chunkFile().
|
|
15
|
+
*/
|
|
16
|
+
initialize(): Promise<void>;
|
|
17
|
+
/**
|
|
18
|
+
* Get the language for a file extension.
|
|
19
|
+
*/
|
|
20
|
+
getLanguageForExtension(ext: string): SupportedLanguage | null;
|
|
21
|
+
/**
|
|
22
|
+
* Check if a language is supported.
|
|
23
|
+
*/
|
|
24
|
+
isLanguageSupported(lang: SupportedLanguage): boolean;
|
|
25
|
+
/**
|
|
26
|
+
* Check if a file is a markdown file.
|
|
27
|
+
*/
|
|
28
|
+
private isMarkdownFile;
|
|
29
|
+
/**
|
|
30
|
+
* Extract chunks from a file.
|
|
31
|
+
*
|
|
32
|
+
* @param filepath - Path to the file (used for extension detection and context headers)
|
|
33
|
+
* @param content - File content to parse
|
|
34
|
+
* @param maxChunkSize - Maximum chunk size in characters (default: 2000)
|
|
35
|
+
* @returns Array of extracted chunks
|
|
36
|
+
*/
|
|
37
|
+
chunkFile(filepath: string, content: string, maxChunkSize?: number): Chunk[];
|
|
38
|
+
/**
|
|
39
|
+
* Extract chunks from a syntax tree.
|
|
40
|
+
*/
|
|
41
|
+
private extractChunks;
|
|
42
|
+
/**
|
|
43
|
+
* Recursively traverse nodes to extract chunks.
|
|
44
|
+
* Tracks parent context (class name) for context headers.
|
|
45
|
+
*/
|
|
46
|
+
private traverseNode;
|
|
47
|
+
/**
|
|
48
|
+
* Convert a syntax node to a chunk.
|
|
49
|
+
*/
|
|
50
|
+
private nodeToChunk;
|
|
51
|
+
/**
|
|
52
|
+
* Extract the signature line (first line of function/class declaration).
|
|
53
|
+
*/
|
|
54
|
+
private extractSignature;
|
|
55
|
+
/**
|
|
56
|
+
* Extract docstring from a function/class node.
|
|
57
|
+
*/
|
|
58
|
+
private extractDocstring;
|
|
59
|
+
/**
|
|
60
|
+
* Check if a node is exported/public.
|
|
61
|
+
*/
|
|
62
|
+
private extractIsExported;
|
|
63
|
+
/**
|
|
64
|
+
* Helper to check for visibility modifiers in a node.
|
|
65
|
+
*/
|
|
66
|
+
private hasVisibilityModifier;
|
|
67
|
+
/**
|
|
68
|
+
* Extract decorator names from a node.
|
|
69
|
+
*/
|
|
70
|
+
private extractDecoratorNames;
|
|
71
|
+
/**
|
|
72
|
+
* Helper to find a child node of specific types.
|
|
73
|
+
*/
|
|
74
|
+
private findChildOfType;
|
|
75
|
+
/**
|
|
76
|
+
* Build a context header for a chunk.
|
|
77
|
+
*/
|
|
78
|
+
private buildContextHeader;
|
|
79
|
+
/**
|
|
80
|
+
* Extract the name of a function/class/method from its node.
|
|
81
|
+
*/
|
|
82
|
+
private extractName;
|
|
83
|
+
/**
|
|
84
|
+
* Create a module-level chunk for the entire file.
|
|
85
|
+
*/
|
|
86
|
+
private createModuleChunk;
|
|
87
|
+
/**
|
|
88
|
+
* Chunk markdown files with heading-aware splitting and overlap.
|
|
89
|
+
*
|
|
90
|
+
* Strategy:
|
|
91
|
+
* 1. Try to split at heading boundaries (# lines)
|
|
92
|
+
* 2. Use sliding window with overlap between chunks
|
|
93
|
+
* 3. Merge small final chunks to avoid orphans
|
|
94
|
+
*/
|
|
95
|
+
private chunkMarkdown;
|
|
96
|
+
/**
|
|
97
|
+
* Create a chunk from markdown content.
|
|
98
|
+
*/
|
|
99
|
+
private createMarkdownChunk;
|
|
100
|
+
/**
|
|
101
|
+
* Enforce size limits: split oversized chunks and merge tiny ones.
|
|
102
|
+
*
|
|
103
|
+
* @param overlapLines - Number of lines to overlap between chunks (for context continuity)
|
|
104
|
+
*/
|
|
105
|
+
private enforceSizeLimits;
|
|
106
|
+
/**
|
|
107
|
+
* Split an oversized chunk by lines.
|
|
108
|
+
* Tries to split at natural boundaries (empty lines, statement ends).
|
|
109
|
+
*
|
|
110
|
+
* @param overlapLines - Number of lines from previous chunk to include for context
|
|
111
|
+
*/
|
|
112
|
+
private splitChunkByLines;
|
|
113
|
+
/**
|
|
114
|
+
* Create a chunk from a split portion.
|
|
115
|
+
*/
|
|
116
|
+
private createSplitChunk;
|
|
117
|
+
/**
|
|
118
|
+
* Extract class name from a context header string.
|
|
119
|
+
*/
|
|
120
|
+
private extractClassFromContext;
|
|
121
|
+
/**
|
|
122
|
+
* Merge small adjacent chunks of the same type to avoid fragment explosion.
|
|
123
|
+
*/
|
|
124
|
+
private mergeSmallChunks;
|
|
125
|
+
/**
|
|
126
|
+
* Close the parser and free resources.
|
|
127
|
+
*/
|
|
128
|
+
close(): void;
|
|
129
|
+
}
|