@milo4jo/contextkit 0.3.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +43 -11
- package/dist/commands/cache.d.ts +3 -0
- package/dist/commands/cache.d.ts.map +1 -0
- package/dist/commands/cache.js +50 -0
- package/dist/commands/cache.js.map +1 -0
- package/dist/commands/select.d.ts.map +1 -1
- package/dist/commands/select.js +17 -9
- package/dist/commands/select.js.map +1 -1
- package/dist/config/index.d.ts +11 -0
- package/dist/config/index.d.ts.map +1 -1
- package/dist/config/index.js +11 -0
- package/dist/config/index.js.map +1 -1
- package/dist/config/validation.d.ts +33 -0
- package/dist/config/validation.d.ts.map +1 -0
- package/dist/config/validation.js +241 -0
- package/dist/config/validation.js.map +1 -0
- package/dist/db/index.d.ts +47 -0
- package/dist/db/index.d.ts.map +1 -1
- package/dist/db/index.js +98 -0
- package/dist/db/index.js.map +1 -1
- package/dist/index.js +3 -0
- package/dist/index.js.map +1 -1
- package/dist/indexer/chunker.d.ts +15 -2
- package/dist/indexer/chunker.d.ts.map +1 -1
- package/dist/indexer/chunker.js +222 -3
- package/dist/indexer/chunker.js.map +1 -1
- package/dist/parsers/index.d.ts +45 -0
- package/dist/parsers/index.d.ts.map +1 -0
- package/dist/parsers/index.js +71 -0
- package/dist/parsers/index.js.map +1 -0
- package/dist/parsers/typescript.d.ts +43 -0
- package/dist/parsers/typescript.d.ts.map +1 -0
- package/dist/parsers/typescript.js +306 -0
- package/dist/parsers/typescript.js.map +1 -0
- package/dist/retrieval/imports.d.ts +76 -0
- package/dist/retrieval/imports.d.ts.map +1 -0
- package/dist/retrieval/imports.js +258 -0
- package/dist/retrieval/imports.js.map +1 -0
- package/dist/selector/formatter.d.ts +15 -0
- package/dist/selector/formatter.d.ts.map +1 -1
- package/dist/selector/formatter.js +132 -0
- package/dist/selector/formatter.js.map +1 -1
- package/dist/selector/index.d.ts +15 -4
- package/dist/selector/index.d.ts.map +1 -1
- package/dist/selector/index.js +100 -12
- package/dist/selector/index.js.map +1 -1
- package/dist/selector/scoring.d.ts +20 -0
- package/dist/selector/scoring.d.ts.map +1 -1
- package/dist/selector/scoring.js +103 -10
- package/dist/selector/scoring.js.map +1 -1
- package/package.json +1 -1
- package/dist/commands/source.d.ts +0 -3
- package/dist/commands/source.d.ts.map +0 -1
- package/dist/commands/source.js +0 -153
- package/dist/commands/source.js.map +0 -1
- package/dist/utils/output.d.ts +0 -42
- package/dist/utils/output.d.ts.map +0 -1
- package/dist/utils/output.js +0 -62
- package/dist/utils/output.js.map +0 -1
package/dist/db/index.d.ts
CHANGED
|
@@ -11,4 +11,51 @@ export declare function openDatabase(): Database.Database;
|
|
|
11
11
|
* Close the database connection
|
|
12
12
|
*/
|
|
13
13
|
export declare function closeDatabase(db: Database.Database): void;
|
|
14
|
+
/** Cache entry structure */
|
|
15
|
+
export interface CacheEntry {
|
|
16
|
+
cacheKey: string;
|
|
17
|
+
result: string;
|
|
18
|
+
indexVersion: string;
|
|
19
|
+
createdAt: string;
|
|
20
|
+
hitCount: number;
|
|
21
|
+
}
|
|
22
|
+
/** Parameters that affect cache key */
|
|
23
|
+
export interface CacheKeyParams {
|
|
24
|
+
query: string;
|
|
25
|
+
budget: number;
|
|
26
|
+
sources?: string[];
|
|
27
|
+
format: string;
|
|
28
|
+
includeImports?: boolean;
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Compute index version hash based on chunk count and last indexed time
|
|
32
|
+
* This invalidates cache when the index changes
|
|
33
|
+
*/
|
|
34
|
+
export declare function computeIndexVersion(db: Database.Database): string;
|
|
35
|
+
/**
|
|
36
|
+
* Generate cache key from query parameters
|
|
37
|
+
*/
|
|
38
|
+
export declare function generateCacheKey(params: CacheKeyParams): string;
|
|
39
|
+
/**
|
|
40
|
+
* Get cached result if valid
|
|
41
|
+
* Returns null if not found or index has changed
|
|
42
|
+
*/
|
|
43
|
+
export declare function getCachedResult(db: Database.Database, cacheKey: string, currentIndexVersion: string): string | null;
|
|
44
|
+
/**
|
|
45
|
+
* Store result in cache
|
|
46
|
+
*/
|
|
47
|
+
export declare function setCachedResult(db: Database.Database, cacheKey: string, result: string, indexVersion: string): void;
|
|
48
|
+
/**
|
|
49
|
+
* Clear all cache entries
|
|
50
|
+
*/
|
|
51
|
+
export declare function clearCache(db: Database.Database): number;
|
|
52
|
+
/**
|
|
53
|
+
* Get cache statistics
|
|
54
|
+
*/
|
|
55
|
+
export declare function getCacheStats(db: Database.Database): {
|
|
56
|
+
entryCount: number;
|
|
57
|
+
totalHits: number;
|
|
58
|
+
oldestEntry: string | null;
|
|
59
|
+
newestEntry: string | null;
|
|
60
|
+
};
|
|
14
61
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/db/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/db/index.ts"],"names":[],"mappings":"AAAA,OAAO,QAAQ,MAAM,gBAAgB,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/db/index.ts"],"names":[],"mappings":"AAAA,OAAO,QAAQ,MAAM,gBAAgB,CAAC;AA0DtC;;GAEG;AACH,wBAAgB,YAAY,CAAC,MAAM,EAAE,MAAM,GAAG,QAAQ,CAAC,QAAQ,CAU9D;AAED;;GAEG;AACH,wBAAgB,YAAY,IAAI,QAAQ,CAAC,QAAQ,CAKhD;AAED;;GAEG;AACH,wBAAgB,aAAa,CAAC,EAAE,EAAE,QAAQ,CAAC,QAAQ,GAAG,IAAI,CAEzD;AAMD,4BAA4B;AAC5B,MAAM,WAAW,UAAU;IACzB,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;IACf,YAAY,EAAE,MAAM,CAAC;IACrB,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,uCAAuC;AACvC,MAAM,WAAW,cAAc;IAC7B,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,MAAM,EAAE,MAAM,CAAC;IACf,cAAc,CAAC,EAAE,OAAO,CAAC;CAC1B;AAED;;;GAGG;AACH,wBAAgB,mBAAmB,CAAC,EAAE,EAAE,QAAQ,CAAC,QAAQ,GAAG,MAAM,CAcjE;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,MAAM,EAAE,cAAc,GAAG,MAAM,CAW/D;AAED;;;GAGG;AACH,wBAAgB,eAAe,CAC7B,EAAE,EAAE,QAAQ,CAAC,QAAQ,EACrB,QAAQ,EAAE,MAAM,EAChB,mBAAmB,EAAE,MAAM,GAC1B,MAAM,GAAG,IAAI,CAyBf;AAED;;GAEG;AACH,wBAAgB,eAAe,CAC7B,EAAE,EAAE,QAAQ,CAAC,QAAQ,EACrB,QAAQ,EAAE,MAAM,EAChB,MAAM,EAAE,MAAM,EACd,YAAY,EAAE,MAAM,GACnB,IAAI,CAKN;AAED;;GAEG;AACH,wBAAgB,UAAU,CAAC,EAAE,EAAE,QAAQ,CAAC,QAAQ,GAAG,MAAM,CAGxD;AAED;;GAEG;AACH,wBAAgB,aAAa,CAAC,EAAE,EAAE,QAAQ,CAAC,QAAQ,GAAG;IACpD,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;CAC5B,CAgBA"}
|
package/dist/db/index.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import Database from 'better-sqlite3';
|
|
2
|
+
import { createHash } from 'crypto';
|
|
2
3
|
import { getDbPath } from '../config/index.js';
|
|
3
4
|
/**
|
|
4
5
|
* Database schema for ContextKit
|
|
@@ -37,11 +38,21 @@ CREATE TABLE IF NOT EXISTS chunks (
|
|
|
37
38
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
38
39
|
);
|
|
39
40
|
|
|
41
|
+
-- Query cache table
|
|
42
|
+
CREATE TABLE IF NOT EXISTS query_cache (
|
|
43
|
+
cache_key TEXT PRIMARY KEY,
|
|
44
|
+
result JSON NOT NULL,
|
|
45
|
+
index_version TEXT NOT NULL,
|
|
46
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
47
|
+
hit_count INTEGER DEFAULT 0
|
|
48
|
+
);
|
|
49
|
+
|
|
40
50
|
-- Indexes for performance
|
|
41
51
|
CREATE INDEX IF NOT EXISTS idx_chunks_source ON chunks(source_id);
|
|
42
52
|
CREATE INDEX IF NOT EXISTS idx_chunks_file ON chunks(file_path);
|
|
43
53
|
CREATE INDEX IF NOT EXISTS idx_files_source ON files(source_id);
|
|
44
54
|
CREATE INDEX IF NOT EXISTS idx_files_path ON files(source_id, file_path);
|
|
55
|
+
CREATE INDEX IF NOT EXISTS idx_cache_created ON query_cache(created_at);
|
|
45
56
|
`;
|
|
46
57
|
/**
|
|
47
58
|
* Initialize the database with schema
|
|
@@ -69,4 +80,91 @@ export function openDatabase() {
|
|
|
69
80
|
export function closeDatabase(db) {
|
|
70
81
|
db.close();
|
|
71
82
|
}
|
|
83
|
+
/**
|
|
84
|
+
* Compute index version hash based on chunk count and last indexed time
|
|
85
|
+
* This invalidates cache when the index changes
|
|
86
|
+
*/
|
|
87
|
+
export function computeIndexVersion(db) {
|
|
88
|
+
const stats = db.prepare(`
|
|
89
|
+
SELECT
|
|
90
|
+
COUNT(*) as chunkCount,
|
|
91
|
+
MAX(created_at) as lastIndexed
|
|
92
|
+
FROM chunks
|
|
93
|
+
`).get();
|
|
94
|
+
const sourceStats = db.prepare(`
|
|
95
|
+
SELECT COUNT(*) as sourceCount FROM sources
|
|
96
|
+
`).get();
|
|
97
|
+
const versionData = `${stats.chunkCount}:${sourceStats.sourceCount}:${stats.lastIndexed || ''}`;
|
|
98
|
+
return createHash('sha256').update(versionData).digest('hex').slice(0, 16);
|
|
99
|
+
}
|
|
100
|
+
/**
|
|
101
|
+
* Generate cache key from query parameters
|
|
102
|
+
*/
|
|
103
|
+
export function generateCacheKey(params) {
|
|
104
|
+
const normalized = {
|
|
105
|
+
query: params.query,
|
|
106
|
+
budget: params.budget,
|
|
107
|
+
sources: params.sources?.slice().sort() || [],
|
|
108
|
+
format: params.format,
|
|
109
|
+
includeImports: params.includeImports || false,
|
|
110
|
+
};
|
|
111
|
+
const keyData = JSON.stringify(normalized);
|
|
112
|
+
return createHash('sha256').update(keyData).digest('hex');
|
|
113
|
+
}
|
|
114
|
+
/**
|
|
115
|
+
* Get cached result if valid
|
|
116
|
+
* Returns null if not found or index has changed
|
|
117
|
+
*/
|
|
118
|
+
export function getCachedResult(db, cacheKey, currentIndexVersion) {
|
|
119
|
+
const row = db.prepare(`
|
|
120
|
+
SELECT result, index_version, hit_count
|
|
121
|
+
FROM query_cache
|
|
122
|
+
WHERE cache_key = ?
|
|
123
|
+
`).get(cacheKey);
|
|
124
|
+
if (!row) {
|
|
125
|
+
return null;
|
|
126
|
+
}
|
|
127
|
+
// Invalidate if index has changed
|
|
128
|
+
if (row.index_version !== currentIndexVersion) {
|
|
129
|
+
db.prepare('DELETE FROM query_cache WHERE cache_key = ?').run(cacheKey);
|
|
130
|
+
return null;
|
|
131
|
+
}
|
|
132
|
+
// Update hit count
|
|
133
|
+
db.prepare(`
|
|
134
|
+
UPDATE query_cache
|
|
135
|
+
SET hit_count = hit_count + 1
|
|
136
|
+
WHERE cache_key = ?
|
|
137
|
+
`).run(cacheKey);
|
|
138
|
+
return row.result;
|
|
139
|
+
}
|
|
140
|
+
/**
|
|
141
|
+
* Store result in cache
|
|
142
|
+
*/
|
|
143
|
+
export function setCachedResult(db, cacheKey, result, indexVersion) {
|
|
144
|
+
db.prepare(`
|
|
145
|
+
INSERT OR REPLACE INTO query_cache (cache_key, result, index_version, created_at, hit_count)
|
|
146
|
+
VALUES (?, ?, ?, datetime('now'), 0)
|
|
147
|
+
`).run(cacheKey, result, indexVersion);
|
|
148
|
+
}
|
|
149
|
+
/**
|
|
150
|
+
* Clear all cache entries
|
|
151
|
+
*/
|
|
152
|
+
export function clearCache(db) {
|
|
153
|
+
const result = db.prepare('DELETE FROM query_cache').run();
|
|
154
|
+
return result.changes;
|
|
155
|
+
}
|
|
156
|
+
/**
|
|
157
|
+
* Get cache statistics
|
|
158
|
+
*/
|
|
159
|
+
export function getCacheStats(db) {
|
|
160
|
+
const stats = db.prepare(`
|
|
161
|
+
SELECT
|
|
162
|
+
COUNT(*) as entryCount,
|
|
163
|
+
COALESCE(SUM(hit_count), 0) as totalHits,
|
|
164
|
+
MIN(created_at) as oldestEntry,
|
|
165
|
+
MAX(created_at) as newestEntry
|
|
166
|
+
FROM query_cache
|
|
167
|
+
`).get();
|
|
168
|
+
return stats;
|
|
169
|
+
}
|
|
72
170
|
//# sourceMappingURL=index.js.map
|
package/dist/db/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/db/index.ts"],"names":[],"mappings":"AAAA,OAAO,QAAQ,MAAM,gBAAgB,CAAC;AACtC,OAAO,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AAE/C;;GAEG;AACH,MAAM,MAAM,GAAG
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/db/index.ts"],"names":[],"mappings":"AAAA,OAAO,QAAQ,MAAM,gBAAgB,CAAC;AACtC,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AACpC,OAAO,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AAE/C;;GAEG;AACH,MAAM,MAAM,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAiDd,CAAC;AAEF;;GAEG;AACH,MAAM,UAAU,YAAY,CAAC,MAAc;IACzC,MAAM,EAAE,GAAG,IAAI,QAAQ,CAAC,MAAM,CAAC,CAAC;IAEhC,sBAAsB;IACtB,EAAE,CAAC,MAAM,CAAC,mBAAmB,CAAC,CAAC;IAE/B,gBAAgB;IAChB,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAEhB,OAAO,EAAE,CAAC;AACZ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,YAAY;IAC1B,MAAM,MAAM,GAAG,SAAS,EAAE,CAAC;IAC3B,MAAM,EAAE,GAAG,IAAI,QAAQ,CAAC,MAAM,CAAC,CAAC;IAChC,EAAE,CAAC,MAAM,CAAC,mBAAmB,CAAC,CAAC;IAC/B,OAAO,EAAE,CAAC;AACZ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,aAAa,CAAC,EAAqB;IACjD,EAAE,CAAC,KAAK,EAAE,CAAC;AACb,CAAC;AAwBD;;;GAGG;AACH,MAAM,UAAU,mBAAmB,CAAC,EAAqB;IACvD,MAAM,KAAK,GAAG,EAAE,CAAC,OAAO,CAAC;;;;;GAKxB,CAAC,CAAC,GAAG,EAAwD,CAAC;IAE/D,MAAM,WAAW,GAAG,EAAE,CAAC,OAAO,CAAC;;GAE9B,CAAC,CAAC,GAAG,EAA6B,CAAC;IAEpC,MAAM,WAAW,GAAG,GAAG,KAAK,CAAC,UAAU,IAAI,WAAW,CAAC,WAAW,IAAI,KAAK,CAAC,WAAW,IAAI,EAAE,EAAE,CAAC;IAChG,OAAO,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;AAC7E,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,gBAAgB,CAAC,MAAsB;IACrD,MAAM,UAAU,GAAG;QACjB,KAAK,EAAE,MAAM,CAAC,KAAK;QACnB,MAAM,EAAE,MAAM,CAAC,MAAM;QACrB,OAAO,EAAE,MAAM,CAAC,OAAO,EAAE,KAAK,EAAE,CAAC,IAAI,EAAE,IAAI,EAAE;QAC7C,MAAM,EAAE,MAAM,CAAC,MAAM;QACrB,cAAc,EAAE,MAAM,CAAC,cAAc,IAAI,KAAK;KAC/C,CAAC;IAEF,MAAM,OAAO,GAAG,IAAI,CAAC,SAAS,CAAC,UAAU,CAAC,CAAC;IAC3C,OAAO,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;AAC5D,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,eAAe,CAC7B,EAAqB,EACrB,QAAgB,EAChB,mBAA2B;IAE3B,MAAM,GAAG,GAAG,EAAE,CAAC,OAAO,CAAC;;;;GAItB,CAAC,CAAC,GAAG,CAAC,QAAQ,CAA6E,CAAC;IAE7F,IAAI,CAAC,GAAG,EAAE,CAAC;QACT,OAAO,IAAI,CAAC;IACd,CAAC;IAED,kCAAkC;IAClC,IAAI,GAAG,CAAC,aAAa,KAAK,mBAAmB,EAAE,CAAC;QAC9C,EAAE,CAAC,OAAO,CAAC,6CAA6C,CAAC,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QACxE,OAAO,IAAI,CAAC;IACd,CAAC;IAED,mBAAmB;IACnB,EAAE,CAAC,OAAO,CAAC;;;;GAIV,CAAC,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;IAEjB,OAAO,GAAG,CAAC,MAAM,CAAC;AACpB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,eAAe,CAC7B,EAAqB,EACrB,QAAgB,EAChB,MAAc,EACd,YAAoB;IAEpB,EAAE,CAAC,OAAO,CAAC;;;GAGV,CAAC,CAAC,GAAG,CAAC,QAAQ,EAAE,MAAM,EAAE,YAAY,CAAC,CAAC;AACzC,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,UAAU,CAAC,EAAqB;IAC9C,MAAM,MAAM,GAAG,EAAE,CAAC,OAAO,CAAC,yBAAyB,CAAC,CAAC,GAAG,EAAE,CAAC;IAC3D,OAAO,MAAM,CAAC,OAAO,CAAC;AACxB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,aAAa,CAAC,EAAqB;IAMjD,MAAM,KAAK,GAAG,EAAE,CAAC,OAAO,CAAC;;;;;;;GAOxB,CAAC,CAAC,GAAG,EAKL,CAAC;IAEF,OAAO,KAAK,CAAC;AACf,CAAC"}
|
package/dist/index.js
CHANGED
|
@@ -7,6 +7,7 @@ import { initCommand } from './commands/init.js';
|
|
|
7
7
|
import { sourceCommand } from './commands/source/index.js';
|
|
8
8
|
import { indexCommand } from './commands/index-cmd.js';
|
|
9
9
|
import { selectCommand } from './commands/select.js';
|
|
10
|
+
import { cacheCommand } from './commands/cache.js';
|
|
10
11
|
import { mcpCommand } from './commands/mcp.js';
|
|
11
12
|
import { watchCommand } from './commands/watch.js';
|
|
12
13
|
import { ContextKitError, InvalidUsageError } from './errors/index.js';
|
|
@@ -62,6 +63,7 @@ program.addCommand(initCommand);
|
|
|
62
63
|
program.addCommand(sourceCommand);
|
|
63
64
|
program.addCommand(indexCommand);
|
|
64
65
|
program.addCommand(selectCommand);
|
|
66
|
+
program.addCommand(cacheCommand);
|
|
65
67
|
program.addCommand(mcpCommand);
|
|
66
68
|
program.addCommand(watchCommand);
|
|
67
69
|
// Default action when no command given
|
|
@@ -80,6 +82,7 @@ Commands:
|
|
|
80
82
|
source Manage source directories
|
|
81
83
|
index Index all sources (incremental by default)
|
|
82
84
|
select Select context for a query
|
|
85
|
+
cache Manage query cache
|
|
83
86
|
watch Watch sources and auto-reindex on changes
|
|
84
87
|
mcp Start MCP server for AI assistants
|
|
85
88
|
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAEA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,YAAY,EAAE,MAAM,IAAI,CAAC;AAClC,OAAO,EAAE,aAAa,EAAE,MAAM,KAAK,CAAC;AACpC,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,MAAM,CAAC;AACrC,OAAO,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AACjD,OAAO,EAAE,aAAa,EAAE,MAAM,4BAA4B,CAAC;AAC3D,OAAO,EAAE,YAAY,EAAE,MAAM,yBAAyB,CAAC;AACvD,OAAO,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;AACrD,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAC/C,OAAO,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAC;AACnD,OAAO,EAAE,eAAe,EAAE,iBAAiB,EAAE,MAAM,mBAAmB,CAAC;AACvE,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAE9D,gCAAgC;AAChC,MAAM,SAAS,GAAG,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AAC1D,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,EAAE,cAAc,CAAC,EAAE,OAAO,CAAC,CAAC,CAAC;AACrF,MAAM,OAAO,GAAG,GAAG,CAAC,OAAO,CAAC;AAE5B,+BAA+B;AAC/B,MAAM,UAAU,GAAG,CAAC,CAAC;AACrB,MAAM,kBAAkB,GAAG,CAAC,CAAC;AAE7B;;GAEG;AACH,SAAS,WAAW,CAAC,KAAc;IACjC,IAAI,KAAK,YAAY,eAAe,EAAE,CAAC;QACrC,UAAU,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QAE1B,MAAM,QAAQ,GAAG,KAAK,YAAY,iBAAiB,CAAC,CAAC,CAAC,kBAAkB,CAAC,CAAC,CAAC,UAAU,CAAC;QAEtF,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACzB,CAAC;IAED,gDAAgD;IAChD,IAAI,KAAK,YAAY,KAAK,EAAE,CAAC;QAC3B,UAAU,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QAC1B,IAAI,OAAO,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC;YACtB,YAAY,CAAC,KAAK,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC;QAClC,CAAC;IACH,CAAC;SAAM,CAAC;QACN,UAAU,CAAC,8BAA8B,CAAC,CAAC;IAC7C,CAAC;IAED,OAAO,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;AAC3B,CAAC;AAED,wBAAwB;AACxB,OAAO,CAAC,EAAE,CAAC,mBAAmB,EAAE,WAAW,CAAC,CAAC;AAC7C,OAAO,CAAC,EAAE,CAAC,oBAAoB,EAAE,WAAW,CAAC,CAAC;AAE9C,MAAM,OAAO,GAAG,IAAI,OAAO,EAAE,CAAC;AAE9B,OAAO;KACJ,IAAI,CAAC,YAAY,CAAC;KAClB,WAAW,CAAC,kCAAkC,CAAC;KAC/C,OAAO,CAAC,OAAO,EAAE,eAAe,EAAE,qBAAqB,CAAC;KACxD,kBAAkB,EAAE;KACpB,aAAa,CAAC;IACb,eAAe,EAAE,IAAI;IACrB,cAAc,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,IAAI,EAAE;CACpC,CAAC,CAAC;AAEL,iBAAiB;AACjB,OAAO;KACJ,MAAM,CAAC,QAAQ,EAAE,gBAAgB,CAAC;KAClC,MAAM,CAAC,SAAS,EAAE,qCAAqC,CAAC;KACxD,MAAM,CAAC,SAAS,EAAE,+BAA+B,CAAC,CAAC;AAEtD,oBAAoB;AACpB,OAAO,CAAC,UAAU,CAAC,WAAW,CAAC,CAAC;AAChC,OAAO,CAAC,UAAU,CAAC,aAAa,CAAC,CAAC;AAClC,OAAO,CAAC,UAAU,CAAC,YAAY,CAAC,CAAC;AACjC,OAAO,CAAC,UAAU,CAAC,aAAa,CAAC,CAAC;AAClC,OAAO,CAAC,UAAU,CAAC,UAAU,CAAC,CAAC;AAC/B,OAAO,CAAC,UAAU,CAAC,YAAY,CAAC,CAAC;AAEjC,uCAAuC;AACvC,OAAO,CAAC,MAAM,CAAC,GAAG,EAAE;IAClB,OAAO,CAAC,GAAG,CAAC
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAEA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,YAAY,EAAE,MAAM,IAAI,CAAC;AAClC,OAAO,EAAE,aAAa,EAAE,MAAM,KAAK,CAAC;AACpC,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,MAAM,CAAC;AACrC,OAAO,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AACjD,OAAO,EAAE,aAAa,EAAE,MAAM,4BAA4B,CAAC;AAC3D,OAAO,EAAE,YAAY,EAAE,MAAM,yBAAyB,CAAC;AACvD,OAAO,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;AACrD,OAAO,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAC;AACnD,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAC/C,OAAO,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAC;AACnD,OAAO,EAAE,eAAe,EAAE,iBAAiB,EAAE,MAAM,mBAAmB,CAAC;AACvE,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAE9D,gCAAgC;AAChC,MAAM,SAAS,GAAG,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AAC1D,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,EAAE,cAAc,CAAC,EAAE,OAAO,CAAC,CAAC,CAAC;AACrF,MAAM,OAAO,GAAG,GAAG,CAAC,OAAO,CAAC;AAE5B,+BAA+B;AAC/B,MAAM,UAAU,GAAG,CAAC,CAAC;AACrB,MAAM,kBAAkB,GAAG,CAAC,CAAC;AAE7B;;GAEG;AACH,SAAS,WAAW,CAAC,KAAc;IACjC,IAAI,KAAK,YAAY,eAAe,EAAE,CAAC;QACrC,UAAU,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QAE1B,MAAM,QAAQ,GAAG,KAAK,YAAY,iBAAiB,CAAC,CAAC,CAAC,kBAAkB,CAAC,CAAC,CAAC,UAAU,CAAC;QAEtF,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACzB,CAAC;IAED,gDAAgD;IAChD,IAAI,KAAK,YAAY,KAAK,EAAE,CAAC;QAC3B,UAAU,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QAC1B,IAAI,OAAO,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC;YACtB,YAAY,CAAC,KAAK,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC;QAClC,CAAC;IACH,CAAC;SAAM,CAAC;QACN,UAAU,CAAC,8BAA8B,CAAC,CAAC;IAC7C,CAAC;IAED,OAAO,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;AAC3B,CAAC;AAED,wBAAwB;AACxB,OAAO,CAAC,EAAE,CAAC,mBAAmB,EAAE,WAAW,CAAC,CAAC;AAC7C,OAAO,CAAC,EAAE,CAAC,oBAAoB,EAAE,WAAW,CAAC,CAAC;AAE9C,MAAM,OAAO,GAAG,IAAI,OAAO,EAAE,CAAC;AAE9B,OAAO;KACJ,IAAI,CAAC,YAAY,CAAC;KAClB,WAAW,CAAC,kCAAkC,CAAC;KAC/C,OAAO,CAAC,OAAO,EAAE,eAAe,EAAE,qBAAqB,CAAC;KACxD,kBAAkB,EAAE;KACpB,aAAa,CAAC;IACb,eAAe,EAAE,IAAI;IACrB,cAAc,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,IAAI,EAAE;CACpC,CAAC,CAAC;AAEL,iBAAiB;AACjB,OAAO;KACJ,MAAM,CAAC,QAAQ,EAAE,gBAAgB,CAAC;KAClC,MAAM,CAAC,SAAS,EAAE,qCAAqC,CAAC;KACxD,MAAM,CAAC,SAAS,EAAE,+BAA+B,CAAC,CAAC;AAEtD,oBAAoB;AACpB,OAAO,CAAC,UAAU,CAAC,WAAW,CAAC,CAAC;AAChC,OAAO,CAAC,UAAU,CAAC,aAAa,CAAC,CAAC;AAClC,OAAO,CAAC,UAAU,CAAC,YAAY,CAAC,CAAC;AACjC,OAAO,CAAC,UAAU,CAAC,aAAa,CAAC,CAAC;AAClC,OAAO,CAAC,UAAU,CAAC,YAAY,CAAC,CAAC;AACjC,OAAO,CAAC,UAAU,CAAC,UAAU,CAAC,CAAC;AAC/B,OAAO,CAAC,UAAU,CAAC,YAAY,CAAC,CAAC;AAEjC,uCAAuC;AACvC,OAAO,CAAC,MAAM,CAAC,GAAG,EAAE;IAClB,OAAO,CAAC,GAAG,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;CAyBb,CAAC,CAAC;AACH,CAAC,CAAC,CAAC;AAEH,OAAO,CAAC,KAAK,EAAE,CAAC"}
|
|
@@ -2,9 +2,11 @@
|
|
|
2
2
|
* Chunking Module
|
|
3
3
|
*
|
|
4
4
|
* Splits files into chunks for embedding.
|
|
5
|
-
* Uses
|
|
5
|
+
* Uses AST-aware chunking for supported languages (TS/JS),
|
|
6
|
+
* with token-based fallback for other files.
|
|
6
7
|
*/
|
|
7
8
|
import type { DiscoveredFile } from './discovery.js';
|
|
9
|
+
import { type CodeUnitType } from '../parsers/index.js';
|
|
8
10
|
/** Chunk of content ready for embedding */
|
|
9
11
|
export interface Chunk {
|
|
10
12
|
/** Unique chunk ID */
|
|
@@ -21,6 +23,12 @@ export interface Chunk {
|
|
|
21
23
|
endLine: number;
|
|
22
24
|
/** Token count */
|
|
23
25
|
tokens: number;
|
|
26
|
+
/** Type of code unit (for AST-aware chunks) */
|
|
27
|
+
chunkType?: CodeUnitType | 'file' | 'token-block';
|
|
28
|
+
/** Name of the code unit (for AST-aware chunks) */
|
|
29
|
+
unitName?: string;
|
|
30
|
+
/** Whether the code unit is exported */
|
|
31
|
+
exported?: boolean;
|
|
24
32
|
}
|
|
25
33
|
/** Chunking options */
|
|
26
34
|
export interface ChunkOptions {
|
|
@@ -28,13 +36,18 @@ export interface ChunkOptions {
|
|
|
28
36
|
chunkSize: number;
|
|
29
37
|
/** Overlap tokens between chunks */
|
|
30
38
|
chunkOverlap: number;
|
|
39
|
+
/** Use AST-aware chunking when available (default: true) */
|
|
40
|
+
useAst?: boolean;
|
|
41
|
+
/** Maximum tokens for a single code unit before splitting (default: 2x chunkSize) */
|
|
42
|
+
maxUnitTokens?: number;
|
|
31
43
|
}
|
|
32
44
|
/**
|
|
33
45
|
* Count tokens in a string
|
|
34
46
|
*/
|
|
35
47
|
export declare function countTokens(text: string): number;
|
|
36
48
|
/**
|
|
37
|
-
* Chunk a single file into pieces
|
|
49
|
+
* Chunk a single file into pieces.
|
|
50
|
+
* Uses AST-aware chunking for supported languages, falls back to token-based.
|
|
38
51
|
*/
|
|
39
52
|
export declare function chunkFile(file: DiscoveredFile, options?: ChunkOptions): Chunk[];
|
|
40
53
|
/**
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"chunker.d.ts","sourceRoot":"","sources":["../../src/indexer/chunker.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"chunker.d.ts","sourceRoot":"","sources":["../../src/indexer/chunker.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAIH,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AACrD,OAAO,EAAuB,KAAK,YAAY,EAAE,MAAM,qBAAqB,CAAC;AAE7E,2CAA2C;AAC3C,MAAM,WAAW,KAAK;IACpB,sBAAsB;IACtB,EAAE,EAAE,MAAM,CAAC;IACX,mCAAmC;IACnC,QAAQ,EAAE,MAAM,CAAC;IACjB,oCAAoC;IACpC,QAAQ,EAAE,MAAM,CAAC;IACjB,oBAAoB;IACpB,OAAO,EAAE,MAAM,CAAC;IAChB,6BAA6B;IAC7B,SAAS,EAAE,MAAM,CAAC;IAClB,sCAAsC;IACtC,OAAO,EAAE,MAAM,CAAC;IAChB,kBAAkB;IAClB,MAAM,EAAE,MAAM,CAAC;IACf,+CAA+C;IAC/C,SAAS,CAAC,EAAE,YAAY,GAAG,MAAM,GAAG,aAAa,CAAC;IAClD,mDAAmD;IACnD,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,wCAAwC;IACxC,QAAQ,CAAC,EAAE,OAAO,CAAC;CACpB;AAED,uBAAuB;AACvB,MAAM,WAAW,YAAY;IAC3B,8BAA8B;IAC9B,SAAS,EAAE,MAAM,CAAC;IAClB,oCAAoC;IACpC,YAAY,EAAE,MAAM,CAAC;IACrB,4DAA4D;IAC5D,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,qFAAqF;IACrF,aAAa,CAAC,EAAE,MAAM,CAAC;CACxB;AAWD;;GAEG;AACH,wBAAgB,WAAW,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAEhD;AAsVD;;;GAGG;AACH,wBAAgB,SAAS,CAAC,IAAI,EAAE,cAAc,EAAE,OAAO,GAAE,YAA8B,GAAG,KAAK,EAAE,CAUhG;AAED;;GAEG;AACH,wBAAgB,UAAU,CACxB,KAAK,EAAE,cAAc,EAAE,EACvB,OAAO,GAAE,YAA8B,GACtC,KAAK,EAAE,CAST"}
|
package/dist/indexer/chunker.js
CHANGED
|
@@ -2,13 +2,16 @@
|
|
|
2
2
|
* Chunking Module
|
|
3
3
|
*
|
|
4
4
|
* Splits files into chunks for embedding.
|
|
5
|
-
* Uses
|
|
5
|
+
* Uses AST-aware chunking for supported languages (TS/JS),
|
|
6
|
+
* with token-based fallback for other files.
|
|
6
7
|
*/
|
|
7
8
|
import { createHash } from 'crypto';
|
|
8
9
|
import { encodingForModel } from 'js-tiktoken';
|
|
10
|
+
import { parseFile, canParse } from '../parsers/index.js';
|
|
9
11
|
const DEFAULT_OPTIONS = {
|
|
10
12
|
chunkSize: 500,
|
|
11
13
|
chunkOverlap: 50,
|
|
14
|
+
useAst: true,
|
|
12
15
|
};
|
|
13
16
|
// Use cl100k_base encoding (GPT-4/Claude compatible)
|
|
14
17
|
const encoder = encodingForModel('gpt-4');
|
|
@@ -29,9 +32,210 @@ function generateChunkId(sourceId, filePath, startLine, endLine) {
|
|
|
29
32
|
return `chunk_${hash}`;
|
|
30
33
|
}
|
|
31
34
|
/**
|
|
32
|
-
*
|
|
35
|
+
* Extract lines from content between start and end line numbers.
|
|
33
36
|
*/
|
|
34
|
-
|
|
37
|
+
function extractLines(lines, startLine, endLine) {
|
|
38
|
+
// Convert to 0-indexed
|
|
39
|
+
const start = startLine - 1;
|
|
40
|
+
const end = endLine; // endLine is inclusive, so slice end is exclusive
|
|
41
|
+
return lines.slice(start, end).join('\n');
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* Chunk a file using AST-aware boundaries.
|
|
45
|
+
* Falls back to token-based chunking if parsing fails.
|
|
46
|
+
*/
|
|
47
|
+
function chunkFileWithAst(file, options) {
|
|
48
|
+
const maxUnitTokens = options.maxUnitTokens ?? options.chunkSize * 2;
|
|
49
|
+
const lines = file.content.split('\n');
|
|
50
|
+
const totalLines = lines.length;
|
|
51
|
+
// Try to parse the file
|
|
52
|
+
const parseResult = parseFile(file.content, file.relativePath);
|
|
53
|
+
if (!parseResult.success || parseResult.boundaries.length === 0) {
|
|
54
|
+
// Fall back to token-based chunking
|
|
55
|
+
return chunkFileTokenBased(file, options);
|
|
56
|
+
}
|
|
57
|
+
const chunks = [];
|
|
58
|
+
const boundaries = parseResult.boundaries;
|
|
59
|
+
// Filter to top-level boundaries (exclude methods if their class is present)
|
|
60
|
+
// This prevents duplication when a class and its methods are both boundaries
|
|
61
|
+
const classBoundaries = new Set(boundaries.filter((b) => b.type === 'class').map((b) => b.name));
|
|
62
|
+
const topLevelBoundaries = boundaries.filter((b) => {
|
|
63
|
+
if (b.type === 'method') {
|
|
64
|
+
// Include method only if its class spans more than maxUnitTokens
|
|
65
|
+
const className = b.name.split('.')[0];
|
|
66
|
+
const classB = boundaries.find((cb) => cb.type === 'class' && cb.name === className);
|
|
67
|
+
if (classB) {
|
|
68
|
+
const classContent = extractLines(lines, classB.startLine, classB.endLine);
|
|
69
|
+
const classTokens = countTokens(classContent);
|
|
70
|
+
return classTokens > maxUnitTokens;
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
return true;
|
|
74
|
+
});
|
|
75
|
+
// Track which lines have been covered
|
|
76
|
+
let lastCoveredLine = 0;
|
|
77
|
+
for (const boundary of topLevelBoundaries) {
|
|
78
|
+
// Check for gaps between boundaries (imports, comments, etc.)
|
|
79
|
+
if (boundary.startLine > lastCoveredLine + 1) {
|
|
80
|
+
const gapContent = extractLines(lines, lastCoveredLine + 1, boundary.startLine - 1);
|
|
81
|
+
const gapTokens = countTokens(gapContent);
|
|
82
|
+
if (gapTokens > 0 && gapContent.trim().length > 0) {
|
|
83
|
+
// Add gap as a "block" chunk if it's substantial
|
|
84
|
+
if (gapTokens > 20) {
|
|
85
|
+
chunks.push({
|
|
86
|
+
id: generateChunkId(file.sourceId, file.relativePath, lastCoveredLine + 1, boundary.startLine - 1),
|
|
87
|
+
sourceId: file.sourceId,
|
|
88
|
+
filePath: file.relativePath,
|
|
89
|
+
content: gapContent,
|
|
90
|
+
startLine: lastCoveredLine + 1,
|
|
91
|
+
endLine: boundary.startLine - 1,
|
|
92
|
+
tokens: gapTokens,
|
|
93
|
+
chunkType: 'block',
|
|
94
|
+
unitName: 'imports/header',
|
|
95
|
+
});
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
// Skip methods if we're not splitting classes
|
|
100
|
+
if (boundary.type === 'method') {
|
|
101
|
+
const className = boundary.name.split('.')[0];
|
|
102
|
+
if (classBoundaries.has(className)) {
|
|
103
|
+
const classB = boundaries.find((cb) => cb.type === 'class' && cb.name === className);
|
|
104
|
+
if (classB) {
|
|
105
|
+
const classContent = extractLines(lines, classB.startLine, classB.endLine);
|
|
106
|
+
const classTokens = countTokens(classContent);
|
|
107
|
+
if (classTokens <= maxUnitTokens) {
|
|
108
|
+
// Class is small enough, skip individual methods
|
|
109
|
+
continue;
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
// Skip class if we're including its methods separately
|
|
115
|
+
if (boundary.type === 'class') {
|
|
116
|
+
const classContent = extractLines(lines, boundary.startLine, boundary.endLine);
|
|
117
|
+
const classTokens = countTokens(classContent);
|
|
118
|
+
if (classTokens > maxUnitTokens) {
|
|
119
|
+
// Class is too large, we'll include methods separately
|
|
120
|
+
// Just update lastCoveredLine and continue
|
|
121
|
+
lastCoveredLine = Math.max(lastCoveredLine, boundary.endLine);
|
|
122
|
+
continue;
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
const content = extractLines(lines, boundary.startLine, boundary.endLine);
|
|
126
|
+
const tokens = countTokens(content);
|
|
127
|
+
// If the unit is too large, split it with token-based chunking
|
|
128
|
+
if (tokens > maxUnitTokens) {
|
|
129
|
+
const subChunks = chunkLargeUnit(file, boundary.startLine, boundary.endLine, options, boundary.type, boundary.name);
|
|
130
|
+
chunks.push(...subChunks);
|
|
131
|
+
}
|
|
132
|
+
else {
|
|
133
|
+
chunks.push({
|
|
134
|
+
id: generateChunkId(file.sourceId, file.relativePath, boundary.startLine, boundary.endLine),
|
|
135
|
+
sourceId: file.sourceId,
|
|
136
|
+
filePath: file.relativePath,
|
|
137
|
+
content,
|
|
138
|
+
startLine: boundary.startLine,
|
|
139
|
+
endLine: boundary.endLine,
|
|
140
|
+
tokens,
|
|
141
|
+
chunkType: boundary.type,
|
|
142
|
+
unitName: boundary.name,
|
|
143
|
+
exported: boundary.exported,
|
|
144
|
+
});
|
|
145
|
+
}
|
|
146
|
+
lastCoveredLine = Math.max(lastCoveredLine, boundary.endLine);
|
|
147
|
+
}
|
|
148
|
+
// Handle any trailing content after the last boundary
|
|
149
|
+
if (lastCoveredLine < totalLines) {
|
|
150
|
+
const trailingContent = extractLines(lines, lastCoveredLine + 1, totalLines);
|
|
151
|
+
const trailingTokens = countTokens(trailingContent);
|
|
152
|
+
if (trailingTokens > 0 && trailingContent.trim().length > 0) {
|
|
153
|
+
chunks.push({
|
|
154
|
+
id: generateChunkId(file.sourceId, file.relativePath, lastCoveredLine + 1, totalLines),
|
|
155
|
+
sourceId: file.sourceId,
|
|
156
|
+
filePath: file.relativePath,
|
|
157
|
+
content: trailingContent,
|
|
158
|
+
startLine: lastCoveredLine + 1,
|
|
159
|
+
endLine: totalLines,
|
|
160
|
+
tokens: trailingTokens,
|
|
161
|
+
chunkType: 'block',
|
|
162
|
+
unitName: 'footer',
|
|
163
|
+
});
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
// If no chunks were created (edge case), fall back to token-based
|
|
167
|
+
if (chunks.length === 0) {
|
|
168
|
+
return chunkFileTokenBased(file, options);
|
|
169
|
+
}
|
|
170
|
+
return chunks;
|
|
171
|
+
}
|
|
172
|
+
/**
|
|
173
|
+
* Split a large code unit using token-based chunking.
|
|
174
|
+
*/
|
|
175
|
+
function chunkLargeUnit(file, startLine, endLine, options, unitType, unitName) {
|
|
176
|
+
const allLines = file.content.split('\n');
|
|
177
|
+
const lines = allLines.slice(startLine - 1, endLine);
|
|
178
|
+
const chunks = [];
|
|
179
|
+
let currentLines = [];
|
|
180
|
+
let currentTokens = 0;
|
|
181
|
+
let currentStartLine = startLine;
|
|
182
|
+
let partNum = 1;
|
|
183
|
+
for (let i = 0; i < lines.length; i++) {
|
|
184
|
+
const line = lines[i];
|
|
185
|
+
const lineTokens = countTokens(line + '\n');
|
|
186
|
+
if (currentTokens + lineTokens > options.chunkSize && currentLines.length > 0) {
|
|
187
|
+
const content = currentLines.join('\n');
|
|
188
|
+
const chunkEndLine = currentStartLine + currentLines.length - 1;
|
|
189
|
+
chunks.push({
|
|
190
|
+
id: generateChunkId(file.sourceId, file.relativePath, currentStartLine, chunkEndLine),
|
|
191
|
+
sourceId: file.sourceId,
|
|
192
|
+
filePath: file.relativePath,
|
|
193
|
+
content,
|
|
194
|
+
startLine: currentStartLine,
|
|
195
|
+
endLine: chunkEndLine,
|
|
196
|
+
tokens: currentTokens,
|
|
197
|
+
chunkType: unitType,
|
|
198
|
+
unitName: `${unitName} (part ${partNum})`,
|
|
199
|
+
});
|
|
200
|
+
partNum++;
|
|
201
|
+
// Calculate overlap
|
|
202
|
+
const overlapLines = [];
|
|
203
|
+
let overlapTokens = 0;
|
|
204
|
+
for (let j = currentLines.length - 1; j >= 0 && overlapTokens < options.chunkOverlap; j--) {
|
|
205
|
+
const overlapLine = currentLines[j];
|
|
206
|
+
const overlapLineTokens = countTokens(overlapLine + '\n');
|
|
207
|
+
overlapLines.unshift(overlapLine);
|
|
208
|
+
overlapTokens += overlapLineTokens;
|
|
209
|
+
}
|
|
210
|
+
currentLines = overlapLines;
|
|
211
|
+
currentTokens = overlapTokens;
|
|
212
|
+
currentStartLine = chunkEndLine + 1 - overlapLines.length;
|
|
213
|
+
}
|
|
214
|
+
currentLines.push(line);
|
|
215
|
+
currentTokens += lineTokens;
|
|
216
|
+
}
|
|
217
|
+
// Last chunk
|
|
218
|
+
if (currentLines.length > 0) {
|
|
219
|
+
const content = currentLines.join('\n');
|
|
220
|
+
const chunkEndLine = currentStartLine + currentLines.length - 1;
|
|
221
|
+
chunks.push({
|
|
222
|
+
id: generateChunkId(file.sourceId, file.relativePath, currentStartLine, chunkEndLine),
|
|
223
|
+
sourceId: file.sourceId,
|
|
224
|
+
filePath: file.relativePath,
|
|
225
|
+
content,
|
|
226
|
+
startLine: currentStartLine,
|
|
227
|
+
endLine: chunkEndLine,
|
|
228
|
+
tokens: countTokens(content),
|
|
229
|
+
chunkType: unitType,
|
|
230
|
+
unitName: partNum > 1 ? `${unitName} (part ${partNum})` : unitName,
|
|
231
|
+
});
|
|
232
|
+
}
|
|
233
|
+
return chunks;
|
|
234
|
+
}
|
|
235
|
+
/**
|
|
236
|
+
* Chunk a file using token-based line splitting (fallback method).
|
|
237
|
+
*/
|
|
238
|
+
function chunkFileTokenBased(file, options) {
|
|
35
239
|
const lines = file.content.split('\n');
|
|
36
240
|
const chunks = [];
|
|
37
241
|
let currentLines = [];
|
|
@@ -52,6 +256,7 @@ export function chunkFile(file, options = DEFAULT_OPTIONS) {
|
|
|
52
256
|
startLine,
|
|
53
257
|
endLine,
|
|
54
258
|
tokens: currentTokens,
|
|
259
|
+
chunkType: 'token-block',
|
|
55
260
|
});
|
|
56
261
|
// Calculate overlap: keep last N tokens worth of lines
|
|
57
262
|
const overlapLines = [];
|
|
@@ -83,10 +288,24 @@ export function chunkFile(file, options = DEFAULT_OPTIONS) {
|
|
|
83
288
|
startLine,
|
|
84
289
|
endLine,
|
|
85
290
|
tokens: countTokens(content),
|
|
291
|
+
chunkType: 'token-block',
|
|
86
292
|
});
|
|
87
293
|
}
|
|
88
294
|
return chunks;
|
|
89
295
|
}
|
|
296
|
+
/**
|
|
297
|
+
* Chunk a single file into pieces.
|
|
298
|
+
* Uses AST-aware chunking for supported languages, falls back to token-based.
|
|
299
|
+
*/
|
|
300
|
+
export function chunkFile(file, options = DEFAULT_OPTIONS) {
|
|
301
|
+
const opts = { ...DEFAULT_OPTIONS, ...options };
|
|
302
|
+
// Use AST-aware chunking if enabled and the file type is supported
|
|
303
|
+
if (opts.useAst && canParse(file.relativePath)) {
|
|
304
|
+
return chunkFileWithAst(file, opts);
|
|
305
|
+
}
|
|
306
|
+
// Fall back to token-based chunking
|
|
307
|
+
return chunkFileTokenBased(file, opts);
|
|
308
|
+
}
|
|
90
309
|
/**
|
|
91
310
|
* Chunk multiple files
|
|
92
311
|
*/
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"chunker.js","sourceRoot":"","sources":["../../src/indexer/chunker.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AACpC,OAAO,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AA6B/C,MAAM,eAAe,GAAiB;IACpC,SAAS,EAAE,GAAG;IACd,YAAY,EAAE,EAAE;CACjB,CAAC;AAEF,qDAAqD;AACrD,MAAM,OAAO,GAAG,gBAAgB,CAAC,OAAO,CAAC,CAAC;AAE1C;;GAEG;AACH,MAAM,UAAU,WAAW,CAAC,IAAY;IACtC,OAAO,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC;AACrC,CAAC;AAED;;;;GAIG;AACH,SAAS,eAAe,CACtB,QAAgB,EAChB,QAAgB,EAChB,SAAiB,EACjB,OAAe;IAEf,MAAM,IAAI,GAAG,GAAG,QAAQ,IAAI,QAAQ,IAAI,SAAS,IAAI,OAAO,EAAE,CAAC;IAC/D,MAAM,IAAI,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;IAC1E,OAAO,SAAS,IAAI,EAAE,CAAC;AACzB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,SAAS,CAAC,IAAoB,EAAE,UAAwB,eAAe;IACrF,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACvC,MAAM,MAAM,GAAY,EAAE,CAAC;IAE3B,IAAI,YAAY,GAAa,EAAE,CAAC;IAChC,IAAI,aAAa,GAAG,CAAC,CAAC;IACtB,IAAI,SAAS,GAAG,CAAC,CAAC;IAElB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QACtB,MAAM,UAAU,GAAG,WAAW,CAAC,IAAI,GAAG,IAAI,CAAC,CAAC;QAE5C,6DAA6D;QAC7D,IAAI,aAAa,GAAG,UAAU,GAAG,OAAO,CAAC,SAAS,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC9E,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACxC,MAAM,OAAO,GAAG,SAAS,GAAG,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC;YACpD,MAAM,CAAC,IAAI,CAAC;gBACV,EAAE,EAAE,eAAe,CAAC,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,YAAY,EAAE,SAAS,EAAE,OAAO,CAAC;gBACzE,QAAQ,EAAE,IAAI,CAAC,QAAQ;gBACvB,QAAQ,EAAE,IAAI,CAAC,YAAY;gBAC3B,OAAO;gBACP,SAAS;gBACT,OAAO;gBACP,MAAM,EAAE,aAAa;aACtB,CAAC,CAAC;YAEH,uDAAuD;YACvD,MAAM,YAAY,GAAa,EAAE,CAAC;YAClC,IAAI,aAAa,GAAG,CAAC,CAAC;YAEtB,KAAK,IAAI,CAAC,GAAG,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,IAAI,aAAa,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC1F,MAAM,WAAW,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC;gBACpC,MAAM,iBAAiB,GAAG,WAAW,CAAC,WAAW,GAAG,IAAI,CAAC,CAAC;gBAC1D,YAAY,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC;gBAClC,aAAa,IAAI,iBAAiB,CAAC;YACrC,CAAC;YAED,gCAAgC;YAChC,YAAY,GAAG,YAAY,CAAC;YAC5B,aAAa,GAAG,aAAa,CAAC;YAC9B,SAAS;gBACP,SAAS,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,OAAO,GAAG,SAAS,GAAG,CAAC,CAAC,GAAG,YAAY,CAAC,MAAM,CAAC;QAC1F,CAAC;QAED,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACxB,aAAa,IAAI,UAAU,CAAC;IAC9B,CAAC;IAED,8BAA8B;IAC9B,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC5B,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACxC,MAAM,OAAO,GAAG,SAAS,GAAG,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC;QACpD,MAAM,CAAC,IAAI,CAAC;YACV,EAAE,EAAE,eAAe,CAAC,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,YAAY,EAAE,SAAS,EAAE,OAAO,CAAC;YACzE,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,QAAQ,EAAE,IAAI,CAAC,YAAY;YAC3B,OAAO;YACP,SAAS;YACT,OAAO;YACP,MAAM,EAAE,WAAW,CAAC,OAAO,CAAC;SAC7B,CAAC,CAAC;IACL,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,UAAU,CACxB,KAAuB,EACvB,UAAwB,eAAe;IAEvC,MAAM,SAAS,GAAY,EAAE,CAAC;IAE9B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,MAAM,GAAG,SAAS,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;QACxC,SAAS,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,CAAC;IAC5B,CAAC;IAED,OAAO,SAAS,CAAC;AACnB,CAAC"}
|
|
1
|
+
{"version":3,"file":"chunker.js","sourceRoot":"","sources":["../../src/indexer/chunker.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AACpC,OAAO,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AAE/C,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAqB,MAAM,qBAAqB,CAAC;AAsC7E,MAAM,eAAe,GAAiB;IACpC,SAAS,EAAE,GAAG;IACd,YAAY,EAAE,EAAE;IAChB,MAAM,EAAE,IAAI;CACb,CAAC;AAEF,qDAAqD;AACrD,MAAM,OAAO,GAAG,gBAAgB,CAAC,OAAO,CAAC,CAAC;AAE1C;;GAEG;AACH,MAAM,UAAU,WAAW,CAAC,IAAY;IACtC,OAAO,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC;AACrC,CAAC;AAED;;;;GAIG;AACH,SAAS,eAAe,CACtB,QAAgB,EAChB,QAAgB,EAChB,SAAiB,EACjB,OAAe;IAEf,MAAM,IAAI,GAAG,GAAG,QAAQ,IAAI,QAAQ,IAAI,SAAS,IAAI,OAAO,EAAE,CAAC;IAC/D,MAAM,IAAI,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;IAC1E,OAAO,SAAS,IAAI,EAAE,CAAC;AACzB,CAAC;AAED;;GAEG;AACH,SAAS,YAAY,CAAC,KAAe,EAAE,SAAiB,EAAE,OAAe;IACvE,uBAAuB;IACvB,MAAM,KAAK,GAAG,SAAS,GAAG,CAAC,CAAC;IAC5B,MAAM,GAAG,GAAG,OAAO,CAAC,CAAC,kDAAkD;IACvE,OAAO,KAAK,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC5C,CAAC;AAED;;;GAGG;AACH,SAAS,gBAAgB,CAAC,IAAoB,EAAE,OAAqB;IACnE,MAAM,aAAa,GAAG,OAAO,CAAC,aAAa,IAAI,OAAO,CAAC,SAAS,GAAG,CAAC,CAAC;IACrE,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACvC,MAAM,UAAU,GAAG,KAAK,CAAC,MAAM,CAAC;IAEhC,wBAAwB;IACxB,MAAM,WAAW,GAAG,SAAS,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,YAAY,CAAC,CAAC;IAE/D,IAAI,CAAC,WAAW,CAAC,OAAO,IAAI,WAAW,CAAC,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAChE,oCAAoC;QACpC,OAAO,mBAAmB,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IAC5C,CAAC;IAED,MAAM,MAAM,GAAY,EAAE,CAAC;IAC3B,MAAM,UAAU,GAAG,WAAW,CAAC,UAAU,CAAC;IAE1C,6EAA6E;IAC7E,6EAA6E;IAC7E,MAAM,eAAe,GAAG,IAAI,GAAG,CAC7B,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,OAAO,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAChE,CAAC;IAEF,MAAM,kBAAkB,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE;QACjD,IAAI,CAAC,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;YACxB,iEAAiE;YACjE,MAAM,SAAS,GAAG,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;YACvC,MAAM,MAAM,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,IAAI,KAAK,OAAO,IAAI,EAAE,CAAC,IAAI,KAAK,SAAS,CAAC,CAAC;YACrF,IAAI,MAAM,EAAE,CAAC;gBACX,MAAM,YAAY,GAAG,YAAY,CAAC,KAAK,EAAE,MAAM,CAAC,SAAS,EAAE,MAAM,CAAC,OAAO,CAAC,CAAC;gBAC3E,MAAM,WAAW,GAAG,WAAW,CAAC,YAAY,CAAC,CAAC;gBAC9C,OAAO,WAAW,GAAG,aAAa,CAAC;YACrC,CAAC;QACH,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC,CAAC,CAAC;IAEH,sCAAsC;IACtC,IAAI,eAAe,GAAG,CAAC,CAAC;IAExB,KAAK,MAAM,QAAQ,IAAI,kBAAkB,EAAE,CAAC;QAC1C,8DAA8D;QAC9D,IAAI,QAAQ,CAAC,SAAS,GAAG,eAAe,GAAG,CAAC,EAAE,CAAC;YAC7C,MAAM,UAAU,GAAG,YAAY,CAAC,KAAK,EAAE,eAAe,GAAG,CAAC,EAAE,QAAQ,CAAC,SAAS,GAAG,CAAC,CAAC,CAAC;YACpF,MAAM,SAAS,GAAG,WAAW,CAAC,UAAU,CAAC,CAAC;YAE1C,IAAI,SAAS,GAAG,CAAC,IAAI,UAAU,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAClD,iDAAiD;gBACjD,IAAI,SAAS,GAAG,EAAE,EAAE,CAAC;oBACnB,MAAM,CAAC,IAAI,CAAC;wBACV,EAAE,EAAE,eAAe,CACjB,IAAI,CAAC,QAAQ,EACb,IAAI,CAAC,YAAY,EACjB,eAAe,GAAG,CAAC,EACnB,QAAQ,CAAC,SAAS,GAAG,CAAC,CACvB;wBACD,QAAQ,EAAE,IAAI,CAAC,QAAQ;wBACvB,QAAQ,EAAE,IAAI,CAAC,YAAY;wBAC3B,OAAO,EAAE,UAAU;wBACnB,SAAS,EAAE,eAAe,GAAG,CAAC;wBAC9B,OAAO,EAAE,QAAQ,CAAC,SAAS,GAAG,CAAC;wBAC/B,MAAM,EAAE,SAAS;wBACjB,SAAS,EAAE,OAAO;wBAClB,QAAQ,EAAE,gBAAgB;qBAC3B,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;QACH,CAAC;QAED,8CAA8C;QAC9C,IAAI,QAAQ,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;YAC/B,MAAM,SAAS,GAAG,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;YAC9C,IAAI,eAAe,CAAC,GAAG,CAAC,SAAS,CAAC,EAAE,CAAC;gBACnC,MAAM,MAAM,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,IAAI,KAAK,OAAO,IAAI,EAAE,CAAC,IAAI,KAAK,SAAS,CAAC,CAAC;gBACrF,IAAI,MAAM,EAAE,CAAC;oBACX,MAAM,YAAY,GAAG,YAAY,CAAC,KAAK,EAAE,MAAM,CAAC,SAAS,EAAE,MAAM,CAAC,OAAO,CAAC,CAAC;oBAC3E,MAAM,WAAW,GAAG,WAAW,CAAC,YAAY,CAAC,CAAC;oBAC9C,IAAI,WAAW,IAAI,aAAa,EAAE,CAAC;wBACjC,iDAAiD;wBACjD,SAAS;oBACX,CAAC;gBACH,CAAC;YACH,CAAC;QACH,CAAC;QAED,uDAAuD;QACvD,IAAI,QAAQ,CAAC,IAAI,KAAK,OAAO,EAAE,CAAC;YAC9B,MAAM,YAAY,GAAG,YAAY,CAAC,KAAK,EAAE,QAAQ,CAAC,SAAS,EAAE,QAAQ,CAAC,OAAO,CAAC,CAAC;YAC/E,MAAM,WAAW,GAAG,WAAW,CAAC,YAAY,CAAC,CAAC;YAC9C,IAAI,WAAW,GAAG,aAAa,EAAE,CAAC;gBAChC,uDAAuD;gBACvD,2CAA2C;gBAC3C,eAAe,GAAG,IAAI,CAAC,GAAG,CAAC,eAAe,EAAE,QAAQ,CAAC,OAAO,CAAC,CAAC;gBAC9D,SAAS;YACX,CAAC;QACH,CAAC;QAED,MAAM,OAAO,GAAG,YAAY,CAAC,KAAK,EAAE,QAAQ,CAAC,SAAS,EAAE,QAAQ,CAAC,OAAO,CAAC,CAAC;QAC1E,MAAM,MAAM,GAAG,WAAW,CAAC,OAAO,CAAC,CAAC;QAEpC,+DAA+D;QAC/D,IAAI,MAAM,GAAG,aAAa,EAAE,CAAC;YAC3B,MAAM,SAAS,GAAG,cAAc,CAC9B,IAAI,EACJ,QAAQ,CAAC,SAAS,EAClB,QAAQ,CAAC,OAAO,EAChB,OAAO,EACP,QAAQ,CAAC,IAAI,EACb,QAAQ,CAAC,IAAI,CACd,CAAC;YACF,MAAM,CAAC,IAAI,CAAC,GAAG,SAAS,CAAC,CAAC;QAC5B,CAAC;aAAM,CAAC;YACN,MAAM,CAAC,IAAI,CAAC;gBACV,EAAE,EAAE,eAAe,CAAC,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,YAAY,EAAE,QAAQ,CAAC,SAAS,EAAE,QAAQ,CAAC,OAAO,CAAC;gBAC3F,QAAQ,EAAE,IAAI,CAAC,QAAQ;gBACvB,QAAQ,EAAE,IAAI,CAAC,YAAY;gBAC3B,OAAO;gBACP,SAAS,EAAE,QAAQ,CAAC,SAAS;gBAC7B,OAAO,EAAE,QAAQ,CAAC,OAAO;gBACzB,MAAM;gBACN,SAAS,EAAE,QAAQ,CAAC,IAAI;gBACxB,QAAQ,EAAE,QAAQ,CAAC,IAAI;gBACvB,QAAQ,EAAE,QAAQ,CAAC,QAAQ;aAC5B,CAAC,CAAC;QACL,CAAC;QAED,eAAe,GAAG,IAAI,CAAC,GAAG,CAAC,eAAe,EAAE,QAAQ,CAAC,OAAO,CAAC,CAAC;IAChE,CAAC;IAED,sDAAsD;IACtD,IAAI,eAAe,GAAG,UAAU,EAAE,CAAC;QACjC,MAAM,eAAe,GAAG,YAAY,CAAC,KAAK,EAAE,eAAe,GAAG,CAAC,EAAE,UAAU,CAAC,CAAC;QAC7E,MAAM,cAAc,GAAG,WAAW,CAAC,eAAe,CAAC,CAAC;QAEpD,IAAI,cAAc,GAAG,CAAC,IAAI,eAAe,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC5D,MAAM,CAAC,IAAI,CAAC;gBACV,EAAE,EAAE,eAAe,CAAC,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,YAAY,EAAE,eAAe,GAAG,CAAC,EAAE,UAAU,CAAC;gBACtF,QAAQ,EAAE,IAAI,CAAC,QAAQ;gBACvB,QAAQ,EAAE,IAAI,CAAC,YAAY;gBAC3B,OAAO,EAAE,eAAe;gBACxB,SAAS,EAAE,eAAe,GAAG,CAAC;gBAC9B,OAAO,EAAE,UAAU;gBACnB,MAAM,EAAE,cAAc;gBACtB,SAAS,EAAE,OAAO;gBAClB,QAAQ,EAAE,QAAQ;aACnB,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,kEAAkE;IAClE,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACxB,OAAO,mBAAmB,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IAC5C,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,SAAS,cAAc,CACrB,IAAoB,EACpB,SAAiB,EACjB,OAAe,EACf,OAAqB,EACrB,QAAsB,EACtB,QAAgB;IAEhB,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAC1C,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,SAAS,GAAG,CAAC,EAAE,OAAO,CAAC,CAAC;IAErD,MAAM,MAAM,GAAY,EAAE,CAAC;IAC3B,IAAI,YAAY,GAAa,EAAE,CAAC;IAChC,IAAI,aAAa,GAAG,CAAC,CAAC;IACtB,IAAI,gBAAgB,GAAG,SAAS,CAAC;IACjC,IAAI,OAAO,GAAG,CAAC,CAAC;IAEhB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QACtB,MAAM,UAAU,GAAG,WAAW,CAAC,IAAI,GAAG,IAAI,CAAC,CAAC;QAE5C,IAAI,aAAa,GAAG,UAAU,GAAG,OAAO,CAAC,SAAS,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC9E,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACxC,MAAM,YAAY,GAAG,gBAAgB,GAAG,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC;YAEhE,MAAM,CAAC,IAAI,CAAC;gBACV,EAAE,EAAE,eAAe,CAAC,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,YAAY,EAAE,gBAAgB,EAAE,YAAY,CAAC;gBACrF,QAAQ,EAAE,IAAI,CAAC,QAAQ;gBACvB,QAAQ,EAAE,IAAI,CAAC,YAAY;gBAC3B,OAAO;gBACP,SAAS,EAAE,gBAAgB;gBAC3B,OAAO,EAAE,YAAY;gBACrB,MAAM,EAAE,aAAa;gBACrB,SAAS,EAAE,QAAQ;gBACnB,QAAQ,EAAE,GAAG,QAAQ,UAAU,OAAO,GAAG;aAC1C,CAAC,CAAC;YAEH,OAAO,EAAE,CAAC;YAEV,oBAAoB;YACpB,MAAM,YAAY,GAAa,EAAE,CAAC;YAClC,IAAI,aAAa,GAAG,CAAC,CAAC;YAEtB,KAAK,IAAI,CAAC,GAAG,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,IAAI,aAAa,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC1F,MAAM,WAAW,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC;gBACpC,MAAM,iBAAiB,GAAG,WAAW,CAAC,WAAW,GAAG,IAAI,CAAC,CAAC;gBAC1D,YAAY,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC;gBAClC,aAAa,IAAI,iBAAiB,CAAC;YACrC,CAAC;YAED,YAAY,GAAG,YAAY,CAAC;YAC5B,aAAa,GAAG,aAAa,CAAC;YAC9B,gBAAgB,GAAG,YAAY,GAAG,CAAC,GAAG,YAAY,CAAC,MAAM,CAAC;QAC5D,CAAC;QAED,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACxB,aAAa,IAAI,UAAU,CAAC;IAC9B,CAAC;IAED,aAAa;IACb,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC5B,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACxC,MAAM,YAAY,GAAG,gBAAgB,GAAG,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC;QAEhE,MAAM,CAAC,IAAI,CAAC;YACV,EAAE,EAAE,eAAe,CAAC,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,YAAY,EAAE,gBAAgB,EAAE,YAAY,CAAC;YACrF,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,QAAQ,EAAE,IAAI,CAAC,YAAY;YAC3B,OAAO;YACP,SAAS,EAAE,gBAAgB;YAC3B,OAAO,EAAE,YAAY;YACrB,MAAM,EAAE,WAAW,CAAC,OAAO,CAAC;YAC5B,SAAS,EAAE,QAAQ;YACnB,QAAQ,EAAE,OAAO,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,QAAQ,UAAU,OAAO,GAAG,CAAC,CAAC,CAAC,QAAQ;SACnE,CAAC,CAAC;IACL,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,SAAS,mBAAmB,CAAC,IAAoB,EAAE,OAAqB;IACtE,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACvC,MAAM,MAAM,GAAY,EAAE,CAAC;IAE3B,IAAI,YAAY,GAAa,EAAE,CAAC;IAChC,IAAI,aAAa,GAAG,CAAC,CAAC;IACtB,IAAI,SAAS,GAAG,CAAC,CAAC;IAElB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QACtB,MAAM,UAAU,GAAG,WAAW,CAAC,IAAI,GAAG,IAAI,CAAC,CAAC;QAE5C,6DAA6D;QAC7D,IAAI,aAAa,GAAG,UAAU,GAAG,OAAO,CAAC,SAAS,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC9E,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACxC,MAAM,OAAO,GAAG,SAAS,GAAG,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC;YACpD,MAAM,CAAC,IAAI,CAAC;gBACV,EAAE,EAAE,eAAe,CAAC,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,YAAY,EAAE,SAAS,EAAE,OAAO,CAAC;gBACzE,QAAQ,EAAE,IAAI,CAAC,QAAQ;gBACvB,QAAQ,EAAE,IAAI,CAAC,YAAY;gBAC3B,OAAO;gBACP,SAAS;gBACT,OAAO;gBACP,MAAM,EAAE,aAAa;gBACrB,SAAS,EAAE,aAAa;aACzB,CAAC,CAAC;YAEH,uDAAuD;YACvD,MAAM,YAAY,GAAa,EAAE,CAAC;YAClC,IAAI,aAAa,GAAG,CAAC,CAAC;YAEtB,KAAK,IAAI,CAAC,GAAG,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,IAAI,aAAa,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC1F,MAAM,WAAW,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC;gBACpC,MAAM,iBAAiB,GAAG,WAAW,CAAC,WAAW,GAAG,IAAI,CAAC,CAAC;gBAC1D,YAAY,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC;gBAClC,aAAa,IAAI,iBAAiB,CAAC;YACrC,CAAC;YAED,gCAAgC;YAChC,YAAY,GAAG,YAAY,CAAC;YAC5B,aAAa,GAAG,aAAa,CAAC;YAC9B,SAAS;gBACP,SAAS,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,OAAO,GAAG,SAAS,GAAG,CAAC,CAAC,GAAG,YAAY,CAAC,MAAM,CAAC;QAC1F,CAAC;QAED,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACxB,aAAa,IAAI,UAAU,CAAC;IAC9B,CAAC;IAED,8BAA8B;IAC9B,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC5B,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACxC,MAAM,OAAO,GAAG,SAAS,GAAG,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC;QACpD,MAAM,CAAC,IAAI,CAAC;YACV,EAAE,EAAE,eAAe,CAAC,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,YAAY,EAAE,SAAS,EAAE,OAAO,CAAC;YACzE,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,QAAQ,EAAE,IAAI,CAAC,YAAY;YAC3B,OAAO;YACP,SAAS;YACT,OAAO;YACP,MAAM,EAAE,WAAW,CAAC,OAAO,CAAC;YAC5B,SAAS,EAAE,aAAa;SACzB,CAAC,CAAC;IACL,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,SAAS,CAAC,IAAoB,EAAE,UAAwB,eAAe;IACrF,MAAM,IAAI,GAAG,EAAE,GAAG,eAAe,EAAE,GAAG,OAAO,EAAE,CAAC;IAEhD,mEAAmE;IACnE,IAAI,IAAI,CAAC,MAAM,IAAI,QAAQ,CAAC,IAAI,CAAC,YAAY,CAAC,EAAE,CAAC;QAC/C,OAAO,gBAAgB,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;IACtC,CAAC;IAED,oCAAoC;IACpC,OAAO,mBAAmB,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;AACzC,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,UAAU,CACxB,KAAuB,EACvB,UAAwB,eAAe;IAEvC,MAAM,SAAS,GAAY,EAAE,CAAC;IAE9B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,MAAM,GAAG,SAAS,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;QACxC,SAAS,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,CAAC;IAC5B,CAAC;IAED,OAAO,SAAS,CAAC;AACnB,CAAC"}
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Parser Registry
|
|
3
|
+
*
|
|
4
|
+
* Central registry for language-specific parsers.
|
|
5
|
+
* Determines which parser to use based on file extension.
|
|
6
|
+
*/
|
|
7
|
+
import { isTypeScriptOrJavaScript, type ParseResult, type CodeBoundary, type CodeUnitType } from './typescript.js';
|
|
8
|
+
export { type ParseResult, type CodeBoundary, type CodeUnitType };
|
|
9
|
+
/** Parser function signature */
|
|
10
|
+
export type ParserFn = (content: string, filePath?: string) => ParseResult;
|
|
11
|
+
/**
|
|
12
|
+
* Get the parser for a file based on its extension.
|
|
13
|
+
*
|
|
14
|
+
* @param filePath - Path to the file
|
|
15
|
+
* @returns Parser function or undefined if no parser is available
|
|
16
|
+
*/
|
|
17
|
+
export declare function getParser(filePath: string): ParserFn | undefined;
|
|
18
|
+
/**
|
|
19
|
+
* Check if a file can be parsed by any registered parser.
|
|
20
|
+
*
|
|
21
|
+
* @param filePath - Path to the file
|
|
22
|
+
* @returns True if a parser is available for this file type
|
|
23
|
+
*/
|
|
24
|
+
export declare function canParse(filePath: string): boolean;
|
|
25
|
+
/**
|
|
26
|
+
* Parse a file and extract code boundaries.
|
|
27
|
+
*
|
|
28
|
+
* @param content - File content
|
|
29
|
+
* @param filePath - Path to the file (used to determine parser)
|
|
30
|
+
* @returns Parse result with boundaries, or failure result if no parser available
|
|
31
|
+
*/
|
|
32
|
+
export declare function parseFile(content: string, filePath: string): ParseResult;
|
|
33
|
+
/**
|
|
34
|
+
* Register a custom parser for a file extension.
|
|
35
|
+
*
|
|
36
|
+
* @param extension - File extension (without dot)
|
|
37
|
+
* @param parser - Parser function
|
|
38
|
+
*/
|
|
39
|
+
export declare function registerParser(extension: string, parser: ParserFn): void;
|
|
40
|
+
/**
|
|
41
|
+
* Get list of supported file extensions.
|
|
42
|
+
*/
|
|
43
|
+
export declare function getSupportedExtensions(): string[];
|
|
44
|
+
export { isTypeScriptOrJavaScript };
|
|
45
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/parsers/index.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAEL,wBAAwB,EACxB,KAAK,WAAW,EAChB,KAAK,YAAY,EACjB,KAAK,YAAY,EAClB,MAAM,iBAAiB,CAAC;AAEzB,OAAO,EAAE,KAAK,WAAW,EAAE,KAAK,YAAY,EAAE,KAAK,YAAY,EAAE,CAAC;AAElE,gCAAgC;AAChC,MAAM,MAAM,QAAQ,GAAG,CAAC,OAAO,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,KAAK,WAAW,CAAC;AAW3E;;;;;GAKG;AACH,wBAAgB,SAAS,CAAC,QAAQ,EAAE,MAAM,GAAG,QAAQ,GAAG,SAAS,CAIhE;AAED;;;;;GAKG;AACH,wBAAgB,QAAQ,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAElD;AAED;;;;;;GAMG;AACH,wBAAgB,SAAS,CAAC,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,WAAW,CAYxE;AAED;;;;;GAKG;AACH,wBAAgB,cAAc,CAAC,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,QAAQ,GAAG,IAAI,CAExE;AAED;;GAEG;AACH,wBAAgB,sBAAsB,IAAI,MAAM,EAAE,CAEjD;AAGD,OAAO,EAAE,wBAAwB,EAAE,CAAC"}
|