grepmax 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. package/LICENSE +202 -0
  2. package/NOTICE +33 -0
  3. package/README.md +375 -0
  4. package/dist/commands/claude-code.js +60 -0
  5. package/dist/commands/codex.js +98 -0
  6. package/dist/commands/doctor.js +92 -0
  7. package/dist/commands/droid.js +189 -0
  8. package/dist/commands/index.js +125 -0
  9. package/dist/commands/list.js +120 -0
  10. package/dist/commands/mcp.js +572 -0
  11. package/dist/commands/opencode.js +199 -0
  12. package/dist/commands/search.js +539 -0
  13. package/dist/commands/serve.js +512 -0
  14. package/dist/commands/setup.js +162 -0
  15. package/dist/commands/skeleton.js +288 -0
  16. package/dist/commands/symbols.js +129 -0
  17. package/dist/commands/trace.js +50 -0
  18. package/dist/commands/verify.js +174 -0
  19. package/dist/config.js +120 -0
  20. package/dist/eval.js +618 -0
  21. package/dist/index.js +82 -0
  22. package/dist/lib/core/languages.js +237 -0
  23. package/dist/lib/graph/graph-builder.js +105 -0
  24. package/dist/lib/index/chunker.js +663 -0
  25. package/dist/lib/index/grammar-loader.js +110 -0
  26. package/dist/lib/index/ignore-patterns.js +63 -0
  27. package/dist/lib/index/index-config.js +86 -0
  28. package/dist/lib/index/sync-helpers.js +97 -0
  29. package/dist/lib/index/syncer.js +396 -0
  30. package/dist/lib/index/walker.js +164 -0
  31. package/dist/lib/index/watcher.js +245 -0
  32. package/dist/lib/output/formatter.js +161 -0
  33. package/dist/lib/output/json-formatter.js +6 -0
  34. package/dist/lib/search/intent.js +23 -0
  35. package/dist/lib/search/searcher.js +475 -0
  36. package/dist/lib/setup/model-loader.js +107 -0
  37. package/dist/lib/setup/setup-helpers.js +106 -0
  38. package/dist/lib/skeleton/body-fields.js +175 -0
  39. package/dist/lib/skeleton/index.js +24 -0
  40. package/dist/lib/skeleton/retriever.js +36 -0
  41. package/dist/lib/skeleton/skeletonizer.js +483 -0
  42. package/dist/lib/skeleton/summary-formatter.js +90 -0
  43. package/dist/lib/store/meta-cache.js +143 -0
  44. package/dist/lib/store/types.js +2 -0
  45. package/dist/lib/store/vector-db.js +340 -0
  46. package/dist/lib/utils/cleanup.js +33 -0
  47. package/dist/lib/utils/exit.js +38 -0
  48. package/dist/lib/utils/file-utils.js +131 -0
  49. package/dist/lib/utils/filter-builder.js +17 -0
  50. package/dist/lib/utils/formatter.js +230 -0
  51. package/dist/lib/utils/git.js +83 -0
  52. package/dist/lib/utils/lock.js +157 -0
  53. package/dist/lib/utils/project-root.js +107 -0
  54. package/dist/lib/utils/server-registry.js +97 -0
  55. package/dist/lib/workers/colbert-math.js +107 -0
  56. package/dist/lib/workers/colbert-tokenizer.js +113 -0
  57. package/dist/lib/workers/download-worker.js +169 -0
  58. package/dist/lib/workers/embeddings/colbert.js +213 -0
  59. package/dist/lib/workers/embeddings/granite.js +180 -0
  60. package/dist/lib/workers/embeddings/mlx-client.js +144 -0
  61. package/dist/lib/workers/orchestrator.js +350 -0
  62. package/dist/lib/workers/pool.js +373 -0
  63. package/dist/lib/workers/process-child.js +92 -0
  64. package/dist/lib/workers/worker.js +31 -0
  65. package/package.json +80 -0
  66. package/plugins/osgrep/.claude-plugin/plugin.json +20 -0
  67. package/plugins/osgrep/hooks/start.js +92 -0
  68. package/plugins/osgrep/hooks/stop.js +3 -0
  69. package/plugins/osgrep/hooks.json +26 -0
  70. package/plugins/osgrep/skills/osgrep/SKILL.md +82 -0
@@ -0,0 +1,288 @@
1
+ "use strict";
2
+ /**
3
+ * osgrep skeleton - Show code skeleton (signatures without implementation)
4
+ *
5
+ * Usage:
6
+ * osgrep skeleton <file> # Skeleton of a file
7
+ * osgrep skeleton <symbol> # Find symbol and skeleton its file
8
+ * osgrep skeleton "query" # Search and skeleton top results
9
+ */
10
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
11
+ if (k2 === undefined) k2 = k;
12
+ var desc = Object.getOwnPropertyDescriptor(m, k);
13
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
14
+ desc = { enumerable: true, get: function() { return m[k]; } };
15
+ }
16
+ Object.defineProperty(o, k2, desc);
17
+ }) : (function(o, m, k, k2) {
18
+ if (k2 === undefined) k2 = k;
19
+ o[k2] = m[k];
20
+ }));
21
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
22
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
23
+ }) : function(o, v) {
24
+ o["default"] = v;
25
+ });
26
+ var __importStar = (this && this.__importStar) || (function () {
27
+ var ownKeys = function(o) {
28
+ ownKeys = Object.getOwnPropertyNames || function (o) {
29
+ var ar = [];
30
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
31
+ return ar;
32
+ };
33
+ return ownKeys(o);
34
+ };
35
+ return function (mod) {
36
+ if (mod && mod.__esModule) return mod;
37
+ var result = {};
38
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
39
+ __setModuleDefault(result, mod);
40
+ return result;
41
+ };
42
+ })();
43
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
44
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
45
+ return new (P || (P = Promise))(function (resolve, reject) {
46
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
47
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
48
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
49
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
50
+ });
51
+ };
52
+ Object.defineProperty(exports, "__esModule", { value: true });
53
+ exports.skeleton = void 0;
54
+ const fs = __importStar(require("node:fs"));
55
+ const path = __importStar(require("node:path"));
56
+ const commander_1 = require("commander");
57
+ const sync_helpers_1 = require("../lib/index/sync-helpers");
58
+ const syncer_1 = require("../lib/index/syncer");
59
+ const searcher_1 = require("../lib/search/searcher");
60
+ const setup_helpers_1 = require("../lib/setup/setup-helpers");
61
+ const retriever_1 = require("../lib/skeleton/retriever");
62
+ const skeletonizer_1 = require("../lib/skeleton/skeletonizer");
63
+ const vector_db_1 = require("../lib/store/vector-db");
64
+ const exit_1 = require("../lib/utils/exit");
65
+ const project_root_1 = require("../lib/utils/project-root");
66
+ /**
67
+ * Check if target looks like a file path.
68
+ */
69
+ function isFilePath(target) {
70
+ // Has path separator or file extension
71
+ return (target.includes("/") || target.includes("\\") || /\.\w{1,10}$/.test(target));
72
+ }
73
+ /**
74
+ * Check if target looks like a symbol name (PascalCase or camelCase identifier).
75
+ */
76
+ function isSymbolLike(target) {
77
+ // PascalCase class name or camelCase function name
78
+ // Must be a single word without spaces
79
+ return /^[A-Za-z_][A-Za-z0-9_]*$/.test(target) && !target.includes(" ");
80
+ }
81
+ /**
82
+ * Find a file by symbol name in the index.
83
+ */
84
+ function findFileBySymbol(symbol, db) {
85
+ return __awaiter(this, void 0, void 0, function* () {
86
+ try {
87
+ const table = yield db.ensureTable();
88
+ // Search for files that define this symbol
89
+ const results = yield table.search(symbol).limit(10).toArray();
90
+ // Find a result where this symbol is defined
91
+ for (const result of results) {
92
+ const defined = result.defined_symbols;
93
+ if (defined === null || defined === void 0 ? void 0 : defined.includes(symbol)) {
94
+ return result.path;
95
+ }
96
+ }
97
+ // Fallback: just return the first match's file
98
+ if (results.length > 0) {
99
+ return results[0].path;
100
+ }
101
+ return null;
102
+ }
103
+ catch (_a) {
104
+ return null;
105
+ }
106
+ });
107
+ }
108
+ exports.skeleton = new commander_1.Command("skeleton")
109
+ .description("Show code skeleton (signatures without implementation)")
110
+ .argument("<target>", "File path, symbol name, or search query")
111
+ .option("-l, --limit <n>", "Max files for query mode", "3")
112
+ .option("--json", "Output as JSON", false)
113
+ .option("--no-summary", "Omit call/complexity summary in bodies", false)
114
+ .option("-s, --sync", "Sync index before searching", false)
115
+ .action((target, options, _cmd) => __awaiter(void 0, void 0, void 0, function* () {
116
+ var _a, _b;
117
+ let vectorDb = null;
118
+ try {
119
+ // Initialize
120
+ yield (0, setup_helpers_1.ensureSetup)();
121
+ const projectRoot = (_a = (0, project_root_1.findProjectRoot)(process.cwd())) !== null && _a !== void 0 ? _a : process.cwd();
122
+ const paths = (0, project_root_1.ensureProjectPaths)(projectRoot);
123
+ vectorDb = new vector_db_1.VectorDB(paths.lancedbDir);
124
+ // Sync if requested
125
+ if (options.sync) {
126
+ const { spinner, onProgress } = (0, sync_helpers_1.createIndexingSpinner)(projectRoot, "Syncing...", { verbose: false });
127
+ yield (0, syncer_1.initialSync)({ projectRoot, onProgress });
128
+ spinner.succeed("Sync complete");
129
+ }
130
+ // Initialize skeletonizer
131
+ const skeletonizer = new skeletonizer_1.Skeletonizer();
132
+ yield skeletonizer.init();
133
+ const skeletonOpts = {
134
+ includeSummary: !options.noSummary,
135
+ };
136
+ // Determine mode based on target
137
+ if (isFilePath(target)) {
138
+ // === FILE MODE ===
139
+ const filePath = path.resolve(target);
140
+ if (!fs.existsSync(filePath)) {
141
+ console.error(`File not found: ${filePath}`);
142
+ process.exitCode = 1;
143
+ return;
144
+ }
145
+ if (vectorDb) {
146
+ const relativeToProject = path.relative(projectRoot, filePath);
147
+ const cached = yield (0, retriever_1.getStoredSkeleton)(vectorDb, relativeToProject);
148
+ if (cached) {
149
+ outputResult({
150
+ success: true,
151
+ skeleton: cached,
152
+ tokenEstimate: Math.ceil(cached.length / 4),
153
+ }, options);
154
+ return;
155
+ }
156
+ }
157
+ const content = fs.readFileSync(filePath, "utf-8");
158
+ const result = yield skeletonizer.skeletonizeFile(filePath, content, skeletonOpts);
159
+ outputResult(result, options);
160
+ }
161
+ else if (isSymbolLike(target) && !target.includes(" ")) {
162
+ // === SYMBOL MODE ===
163
+ const filePath = yield findFileBySymbol(target, vectorDb);
164
+ if (!filePath) {
165
+ console.error(`Symbol not found in index: ${target}`);
166
+ console.error("Try running 'osgrep index' first or use a search query.");
167
+ process.exitCode = 1;
168
+ return;
169
+ }
170
+ const absolutePath = path.resolve(projectRoot, filePath);
171
+ if (!fs.existsSync(absolutePath)) {
172
+ console.error(`File not found: ${absolutePath}`);
173
+ process.exitCode = 1;
174
+ return;
175
+ }
176
+ const cached = yield (0, retriever_1.getStoredSkeleton)(vectorDb, filePath);
177
+ if (cached) {
178
+ outputResult({
179
+ success: true,
180
+ skeleton: cached,
181
+ tokenEstimate: Math.ceil(cached.length / 4),
182
+ }, options);
183
+ return;
184
+ }
185
+ const content = fs.readFileSync(absolutePath, "utf-8");
186
+ const result = yield skeletonizer.skeletonizeFile(filePath, content, skeletonOpts);
187
+ outputResult(result, options);
188
+ }
189
+ else {
190
+ // === QUERY MODE ===
191
+ const searcher = new searcher_1.Searcher(vectorDb);
192
+ const limit = Math.min(Number.parseInt(options.limit, 10) || 3, 10);
193
+ const searchResults = yield searcher.search(target, limit);
194
+ if (!searchResults.data || searchResults.data.length === 0) {
195
+ console.error(`No results found for: ${target}`);
196
+ process.exitCode = 1;
197
+ return;
198
+ }
199
+ // Get unique file paths from results
200
+ const seenPaths = new Set();
201
+ const filePaths = [];
202
+ for (const result of searchResults.data) {
203
+ const resultPath = (_b = result.metadata) === null || _b === void 0 ? void 0 : _b.path;
204
+ if (resultPath && !seenPaths.has(resultPath)) {
205
+ seenPaths.add(resultPath);
206
+ filePaths.push(resultPath);
207
+ if (filePaths.length >= limit)
208
+ break;
209
+ }
210
+ }
211
+ // Skeletonize each file
212
+ const results = [];
213
+ for (const filePath of filePaths) {
214
+ const absolutePath = path.resolve(projectRoot, filePath);
215
+ if (!fs.existsSync(absolutePath)) {
216
+ results.push({
217
+ file: filePath,
218
+ skeleton: `// File not found: ${filePath}`,
219
+ tokens: 0,
220
+ error: "File not found",
221
+ });
222
+ continue;
223
+ }
224
+ // Try cache first
225
+ const cached = yield (0, retriever_1.getStoredSkeleton)(vectorDb, filePath);
226
+ if (cached) {
227
+ results.push({
228
+ file: filePath,
229
+ skeleton: cached,
230
+ tokens: Math.ceil(cached.length / 4),
231
+ });
232
+ continue;
233
+ }
234
+ const content = fs.readFileSync(absolutePath, "utf-8");
235
+ const result = yield skeletonizer.skeletonizeFile(filePath, content, skeletonOpts);
236
+ results.push({
237
+ file: filePath,
238
+ skeleton: result.skeleton,
239
+ tokens: result.tokenEstimate,
240
+ error: result.error,
241
+ });
242
+ }
243
+ // Output results
244
+ if (options.json) {
245
+ console.log(JSON.stringify(results, null, 2));
246
+ }
247
+ else {
248
+ for (const result of results) {
249
+ console.log(result.skeleton);
250
+ console.log(""); // Blank line between files
251
+ }
252
+ }
253
+ }
254
+ }
255
+ catch (error) {
256
+ const message = error instanceof Error ? error.message : String(error);
257
+ console.error("Error:", message);
258
+ process.exitCode = 1;
259
+ }
260
+ finally {
261
+ if (vectorDb) {
262
+ try {
263
+ yield vectorDb.close();
264
+ }
265
+ catch (_c) {
266
+ // Ignore close errors
267
+ }
268
+ }
269
+ const code = typeof process.exitCode === "number" ? process.exitCode : 0;
270
+ yield (0, exit_1.gracefulExit)(code);
271
+ }
272
+ }));
273
+ /**
274
+ * Output a skeleton result.
275
+ */
276
+ function outputResult(result, options) {
277
+ if (options.json) {
278
+ console.log(JSON.stringify({
279
+ success: result.success,
280
+ skeleton: result.skeleton,
281
+ tokens: result.tokenEstimate,
282
+ error: result.error,
283
+ }, null, 2));
284
+ }
285
+ else {
286
+ console.log(result.skeleton);
287
+ }
288
+ }
@@ -0,0 +1,129 @@
1
+ "use strict";
2
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
+ return new (P || (P = Promise))(function (resolve, reject) {
5
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
9
+ });
10
+ };
11
+ Object.defineProperty(exports, "__esModule", { value: true });
12
+ exports.symbols = void 0;
13
+ const commander_1 = require("commander");
14
+ const vector_db_1 = require("../lib/store/vector-db");
15
+ const exit_1 = require("../lib/utils/exit");
16
+ const filter_builder_1 = require("../lib/utils/filter-builder");
17
+ const project_root_1 = require("../lib/utils/project-root");
18
+ const style = {
19
+ bold: (s) => `\x1b[1m${s}\x1b[22m`,
20
+ dim: (s) => `\x1b[2m${s}\x1b[22m`,
21
+ green: (s) => `\x1b[32m${s}\x1b[39m`,
22
+ };
23
+ function toStringArray(val) {
24
+ if (Array.isArray(val))
25
+ return val.filter((v) => typeof v === "string");
26
+ if (val && typeof val.toArray === "function") {
27
+ try {
28
+ const arr = val.toArray();
29
+ return Array.isArray(arr) ? arr.filter((v) => typeof v === "string") : [];
30
+ }
31
+ catch (_a) {
32
+ return [];
33
+ }
34
+ }
35
+ return [];
36
+ }
37
+ function collectSymbols(options) {
38
+ return __awaiter(this, void 0, void 0, function* () {
39
+ const paths = (0, project_root_1.ensureProjectPaths)(options.projectRoot);
40
+ const db = new vector_db_1.VectorDB(paths.lancedbDir);
41
+ try {
42
+ const table = yield db.ensureTable();
43
+ let query = table
44
+ .query()
45
+ .select(["defined_symbols", "path", "start_line"])
46
+ .where("array_length(defined_symbols) > 0")
47
+ // Fetch more rows to ensure we have enough after filtering/aggregation
48
+ .limit(options.pattern ? 10000 : Math.max(options.limit * 50, 2000));
49
+ if (options.pathPrefix) {
50
+ query = query.where(`path LIKE '${(0, filter_builder_1.escapeSqlString)((0, filter_builder_1.normalizePath)(options.pathPrefix))}%'`);
51
+ }
52
+ const rows = yield query.toArray();
53
+ const map = new Map();
54
+ for (const row of rows) {
55
+ const defs = toStringArray(row.defined_symbols);
56
+ const path = String(row.path || "");
57
+ const line = Number(row.start_line || 0);
58
+ for (const sym of defs) {
59
+ if (options.pattern &&
60
+ !sym.toLowerCase().includes(options.pattern.toLowerCase())) {
61
+ continue;
62
+ }
63
+ const existing = map.get(sym);
64
+ if (existing) {
65
+ existing.count += 1;
66
+ }
67
+ else {
68
+ map.set(sym, { symbol: sym, count: 1, path, line });
69
+ }
70
+ }
71
+ }
72
+ return Array.from(map.values())
73
+ .sort((a, b) => {
74
+ // Sort by count desc, then symbol asc
75
+ if (b.count !== a.count)
76
+ return b.count - a.count;
77
+ return a.symbol.localeCompare(b.symbol);
78
+ })
79
+ .slice(0, options.limit);
80
+ }
81
+ finally {
82
+ yield db.close();
83
+ }
84
+ });
85
+ }
86
+ function formatTable(entries) {
87
+ if (entries.length === 0) {
88
+ return "No symbols found. Run `osgrep index` to build the index.";
89
+ }
90
+ const rows = entries.map((e) => ({
91
+ symbol: e.symbol,
92
+ count: e.count.toString(),
93
+ loc: `${e.path}:${Math.max(1, e.line + 1)}`,
94
+ }));
95
+ const headers = { symbol: "Symbol", count: "Count", loc: "Path:Line" };
96
+ const all = [headers, ...rows];
97
+ const widths = {
98
+ symbol: Math.max(...all.map((r) => r.symbol.length)),
99
+ count: Math.max(...all.map((r) => r.count.length)),
100
+ loc: Math.max(...all.map((r) => r.loc.length)),
101
+ };
102
+ const render = (r) => `${r.symbol.padEnd(widths.symbol)} ${r.count
103
+ .padStart(widths.count)
104
+ .padEnd(widths.count + 2)}${r.loc}`;
105
+ const lines = [
106
+ `${style.bold(headers.symbol.padEnd(widths.symbol))} ${style.bold(headers.count.padEnd(widths.count))} ${style.bold(headers.loc)}`,
107
+ `${"-".repeat(widths.symbol)} ${"-".repeat(widths.count)} ${"-".repeat(widths.loc)}`,
108
+ ...rows.map(render),
109
+ ];
110
+ return lines.join("\n");
111
+ }
112
+ exports.symbols = new commander_1.Command("symbols")
113
+ .description("List indexed symbols and where they are defined")
114
+ .argument("[pattern]", "Optional pattern to filter symbols by name")
115
+ .option("-l, --limit <number>", "Max symbols to list (default 20)", "20")
116
+ .option("-p, --path <prefix>", "Only include symbols under this path prefix")
117
+ .action((pattern, cmd) => __awaiter(void 0, void 0, void 0, function* () {
118
+ var _a;
119
+ const projectRoot = (_a = (0, project_root_1.findProjectRoot)(process.cwd())) !== null && _a !== void 0 ? _a : process.cwd();
120
+ const limit = Number.parseInt(cmd.limit, 10);
121
+ const entries = yield collectSymbols({
122
+ projectRoot,
123
+ limit: Number.isFinite(limit) && limit > 0 ? limit : 20,
124
+ pathPrefix: cmd.path,
125
+ pattern: pattern,
126
+ });
127
+ console.log(`${style.bold("Project")}: ${style.green(projectRoot)}\n${formatTable(entries)}`);
128
+ yield (0, exit_1.gracefulExit)();
129
+ }));
@@ -0,0 +1,50 @@
1
+ "use strict";
2
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
+ return new (P || (P = Promise))(function (resolve, reject) {
5
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
9
+ });
10
+ };
11
+ Object.defineProperty(exports, "__esModule", { value: true });
12
+ exports.trace = void 0;
13
+ const commander_1 = require("commander");
14
+ const graph_builder_1 = require("../lib/graph/graph-builder");
15
+ const formatter_1 = require("../lib/output/formatter");
16
+ const vector_db_1 = require("../lib/store/vector-db");
17
+ const exit_1 = require("../lib/utils/exit");
18
+ const project_root_1 = require("../lib/utils/project-root");
19
+ exports.trace = new commander_1.Command("trace")
20
+ .description("Trace the call graph for a symbol")
21
+ .argument("<symbol>", "The symbol to trace")
22
+ .action((symbol) => __awaiter(void 0, void 0, void 0, function* () {
23
+ var _a;
24
+ const root = process.cwd();
25
+ let vectorDb = null;
26
+ try {
27
+ const projectRoot = (_a = (0, project_root_1.findProjectRoot)(root)) !== null && _a !== void 0 ? _a : root;
28
+ const paths = (0, project_root_1.ensureProjectPaths)(projectRoot);
29
+ vectorDb = new vector_db_1.VectorDB(paths.lancedbDir);
30
+ const graphBuilder = new graph_builder_1.GraphBuilder(vectorDb);
31
+ const graph = yield graphBuilder.buildGraph(symbol);
32
+ console.log((0, formatter_1.formatTrace)(graph));
33
+ }
34
+ catch (error) {
35
+ const message = error instanceof Error ? error.message : "Unknown error";
36
+ console.error("Trace failed:", message);
37
+ process.exitCode = 1;
38
+ }
39
+ finally {
40
+ if (vectorDb) {
41
+ try {
42
+ yield vectorDb.close();
43
+ }
44
+ catch (err) {
45
+ console.error("Failed to close VectorDB:", err);
46
+ }
47
+ }
48
+ yield (0, exit_1.gracefulExit)();
49
+ }
50
+ }));
@@ -0,0 +1,174 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
36
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
37
+ return new (P || (P = Promise))(function (resolve, reject) {
38
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
39
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
40
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
41
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
42
+ });
43
+ };
44
+ Object.defineProperty(exports, "__esModule", { value: true });
45
+ const fs = __importStar(require("node:fs"));
46
+ const path = __importStar(require("node:path"));
47
+ const transformers_1 = require("@huggingface/transformers");
48
+ const ort = __importStar(require("onnxruntime-node"));
49
+ // CONFIGURATION
50
+ const MODEL_DIR = path.resolve("./osgrep-models/colbert"); // Adjust if your path differs
51
+ const MODEL_PATH = path.join(MODEL_DIR, "model.onnx");
52
+ const SKIPLIST_PATH = path.join(MODEL_DIR, "skiplist.json");
53
+ function main() {
54
+ return __awaiter(this, void 0, void 0, function* () {
55
+ var _a, _b;
56
+ console.log("šŸ” Starting ColBERT Integrity Check...\n");
57
+ // --- CHECK 1: FILES EXIST ---
58
+ if (!fs.existsSync(MODEL_PATH))
59
+ throw new Error(`Missing model at ${MODEL_PATH}`);
60
+ if (!fs.existsSync(SKIPLIST_PATH))
61
+ throw new Error(`Missing skiplist at ${SKIPLIST_PATH}`);
62
+ console.log("āœ… Files found.");
63
+ // --- CHECK 2: TOKENIZER & MARKERS ---
64
+ console.log("ā³ Loading Tokenizer...");
65
+ const tokenizer = yield transformers_1.AutoTokenizer.from_pretrained(MODEL_DIR);
66
+ const queryText = "function test(a, b)";
67
+ // We manually add the [Q] marker to simulate what the worker does
68
+ // Note: We use the ID we know works from your export: 50368
69
+ // But let's see if the tokenizer resolves "[Q] " correctly.
70
+ const encoded = yield tokenizer(queryText, { add_special_tokens: false });
71
+ const inputIds = encoded.input_ids; // BigInt64Array in newer transformers versions
72
+ // Convert to standard array for inspection
73
+ const ids = Array.from(inputIds).map(Number);
74
+ // Mixedbread expects: [CLS] [Q] ...tokens... [SEP]
75
+ // Let's verify we can construct that.
76
+ const Q_ID = 50368;
77
+ const CLS_ID = (_a = tokenizer.model.tokens_to_ids.get("[CLS]")) !== null && _a !== void 0 ? _a : 50281; // Fallback to standard if null
78
+ console.log(`\n--- Tokenizer Check ---`);
79
+ console.log(`Query: "${queryText}"`);
80
+ console.log(`Raw IDs:`, ids);
81
+ // Check if tokenizer recognizes the special tokens by text
82
+ const qCheck = tokenizer.model.tokens_to_ids.get("[Q] ");
83
+ const dCheck = tokenizer.model.tokens_to_ids.get("[D] ");
84
+ if (qCheck === 50368 && dCheck === 50369) {
85
+ console.log(`āœ… Tokenizer Map Correct: [Q] -> ${qCheck}, [D] -> ${dCheck}`);
86
+ }
87
+ else {
88
+ console.error(`āŒ Tokenizer Map Mismatch! Found [Q]->${qCheck}, [D]->${dCheck}`);
89
+ console.error(` Expected 50368 and 50369.`);
90
+ }
91
+ // --- CHECK 3: SKIPLIST ---
92
+ const skiplist = new Set(JSON.parse(fs.readFileSync(SKIPLIST_PATH, "utf-8")));
93
+ console.log(`\n--- Skiplist Check ---`);
94
+ console.log(`Skiplist size: ${skiplist.size}`);
95
+ // Check common punctuation
96
+ const commaId = tokenizer.model.tokens_to_ids.get(",");
97
+ const dotId = tokenizer.model.tokens_to_ids.get(".");
98
+ if (skiplist.has(commaId) && skiplist.has(dotId)) {
99
+ console.log(`āœ… Skiplist contains punctuation ('.'=${dotId}, ','=${commaId})`);
100
+ }
101
+ else {
102
+ console.error(`āŒ Skiplist missing basic punctuation!`);
103
+ }
104
+ // --- CHECK 4: ONNX INFERENCE ---
105
+ console.log(`\n--- ONNX Inference Check ---`);
106
+ const session = yield ort.InferenceSession.create(MODEL_PATH);
107
+ console.log(`Session loaded. Input names: ${session.inputNames}`);
108
+ // Construct a dummy batch: [CLS] [Q] test [SEP]
109
+ const batchIds = [
110
+ BigInt(CLS_ID),
111
+ BigInt(Q_ID),
112
+ BigInt(1234),
113
+ BigInt((_b = tokenizer.sep_token_id) !== null && _b !== void 0 ? _b : 50282),
114
+ ];
115
+ const tensorIds = new ort.Tensor("int64", new BigInt64Array(batchIds), [1, 4]);
116
+ const tensorMask = new ort.Tensor("int64", new BigInt64Array([BigInt(1), BigInt(1), BigInt(1), BigInt(1)]), [1, 4]);
117
+ const start = performance.now();
118
+ const feeds = { input_ids: tensorIds, attention_mask: tensorMask };
119
+ const results = yield session.run(feeds);
120
+ const end = performance.now();
121
+ const outputName = session.outputNames[0];
122
+ const embeddings = results[outputName];
123
+ // Dims should be [1, 4, 48]
124
+ const dims = embeddings.dims;
125
+ console.log(`Output Dimensions: [${dims.join(", ")}]`);
126
+ console.log(`Inference Time (cold): ${(end - start).toFixed(2)}ms`);
127
+ if (dims[2] !== 48) {
128
+ console.error(`āŒ CRITICAL: Expected dimension 48, got ${dims[2]}`);
129
+ process.exit(1);
130
+ }
131
+ else {
132
+ console.log(`āœ… Correct dimension (48d) detected.`);
133
+ }
134
+ // --- CHECK 5: MAXSIM PERFORMANCE SIMULATION ---
135
+ console.log(`\n--- MaxSim Logic Benchmark ---`);
136
+ // Create dummy vectors for a fake document (1000 tokens)
137
+ const docLen = 1000;
138
+ const docIds = new Array(docLen)
139
+ .fill(0)
140
+ .map(() => Math.floor(Math.random() * 50000));
141
+ // Inject some punctuation into the dummy document to simulate real text
142
+ // Let's say 15% of the doc is punctuation
143
+ let punctuationCount = 0;
144
+ for (let i = 0; i < docLen; i++) {
145
+ if (Math.random() < 0.15) {
146
+ docIds[i] = commaId !== null && commaId !== void 0 ? commaId : 0; // Force a comma
147
+ punctuationCount++;
148
+ }
149
+ }
150
+ const qLen = 32;
151
+ // Naive Dot Product count
152
+ const naiveOps = qLen * docLen;
153
+ // Skiplist Dot Product count
154
+ let optimizedOps = 0;
155
+ for (let i = 0; i < qLen; i++) {
156
+ for (let j = 0; j < docLen; j++) {
157
+ if (!skiplist.has(docIds[j])) {
158
+ optimizedOps++;
159
+ }
160
+ }
161
+ }
162
+ console.log(`Document Length: ${docLen} tokens`);
163
+ console.log(`Punctuation/Skip tokens: ${punctuationCount} (~${((punctuationCount / docLen) * 100).toFixed(1)}%)`);
164
+ console.log(`Naive Operations: ${naiveOps}`);
165
+ console.log(`Skiplist Operations: ${optimizedOps}`);
166
+ console.log(`Savings: ${naiveOps - optimizedOps} operations avoided`);
167
+ console.log(`⚔ Speedup: ${(naiveOps / optimizedOps).toFixed(2)}x (theoretical)`);
168
+ console.log("\nāœ… VERIFICATION COMPLETE. MODEL IS GOOD TO GO.");
169
+ });
170
+ }
171
+ main().catch((err) => {
172
+ console.error("\nāŒ TEST FAILED:", err);
173
+ process.exit(1);
174
+ });