grepmax 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of grepmax might be problematic. Click here for more details.

Files changed (70) hide show
  1. package/LICENSE +202 -0
  2. package/NOTICE +33 -0
  3. package/README.md +375 -0
  4. package/dist/commands/claude-code.js +60 -0
  5. package/dist/commands/codex.js +98 -0
  6. package/dist/commands/doctor.js +92 -0
  7. package/dist/commands/droid.js +189 -0
  8. package/dist/commands/index.js +125 -0
  9. package/dist/commands/list.js +120 -0
  10. package/dist/commands/mcp.js +572 -0
  11. package/dist/commands/opencode.js +199 -0
  12. package/dist/commands/search.js +539 -0
  13. package/dist/commands/serve.js +512 -0
  14. package/dist/commands/setup.js +162 -0
  15. package/dist/commands/skeleton.js +288 -0
  16. package/dist/commands/symbols.js +129 -0
  17. package/dist/commands/trace.js +50 -0
  18. package/dist/commands/verify.js +174 -0
  19. package/dist/config.js +120 -0
  20. package/dist/eval.js +618 -0
  21. package/dist/index.js +82 -0
  22. package/dist/lib/core/languages.js +237 -0
  23. package/dist/lib/graph/graph-builder.js +105 -0
  24. package/dist/lib/index/chunker.js +663 -0
  25. package/dist/lib/index/grammar-loader.js +110 -0
  26. package/dist/lib/index/ignore-patterns.js +63 -0
  27. package/dist/lib/index/index-config.js +86 -0
  28. package/dist/lib/index/sync-helpers.js +97 -0
  29. package/dist/lib/index/syncer.js +396 -0
  30. package/dist/lib/index/walker.js +164 -0
  31. package/dist/lib/index/watcher.js +245 -0
  32. package/dist/lib/output/formatter.js +161 -0
  33. package/dist/lib/output/json-formatter.js +6 -0
  34. package/dist/lib/search/intent.js +23 -0
  35. package/dist/lib/search/searcher.js +475 -0
  36. package/dist/lib/setup/model-loader.js +107 -0
  37. package/dist/lib/setup/setup-helpers.js +106 -0
  38. package/dist/lib/skeleton/body-fields.js +175 -0
  39. package/dist/lib/skeleton/index.js +24 -0
  40. package/dist/lib/skeleton/retriever.js +36 -0
  41. package/dist/lib/skeleton/skeletonizer.js +483 -0
  42. package/dist/lib/skeleton/summary-formatter.js +90 -0
  43. package/dist/lib/store/meta-cache.js +143 -0
  44. package/dist/lib/store/types.js +2 -0
  45. package/dist/lib/store/vector-db.js +340 -0
  46. package/dist/lib/utils/cleanup.js +33 -0
  47. package/dist/lib/utils/exit.js +38 -0
  48. package/dist/lib/utils/file-utils.js +131 -0
  49. package/dist/lib/utils/filter-builder.js +17 -0
  50. package/dist/lib/utils/formatter.js +230 -0
  51. package/dist/lib/utils/git.js +83 -0
  52. package/dist/lib/utils/lock.js +157 -0
  53. package/dist/lib/utils/project-root.js +107 -0
  54. package/dist/lib/utils/server-registry.js +97 -0
  55. package/dist/lib/workers/colbert-math.js +107 -0
  56. package/dist/lib/workers/colbert-tokenizer.js +113 -0
  57. package/dist/lib/workers/download-worker.js +169 -0
  58. package/dist/lib/workers/embeddings/colbert.js +213 -0
  59. package/dist/lib/workers/embeddings/granite.js +180 -0
  60. package/dist/lib/workers/embeddings/mlx-client.js +144 -0
  61. package/dist/lib/workers/orchestrator.js +350 -0
  62. package/dist/lib/workers/pool.js +373 -0
  63. package/dist/lib/workers/process-child.js +92 -0
  64. package/dist/lib/workers/worker.js +31 -0
  65. package/package.json +80 -0
  66. package/plugins/osgrep/.claude-plugin/plugin.json +20 -0
  67. package/plugins/osgrep/hooks/start.js +92 -0
  68. package/plugins/osgrep/hooks/stop.js +3 -0
  69. package/plugins/osgrep/hooks.json +26 -0
  70. package/plugins/osgrep/skills/osgrep/SKILL.md +82 -0
@@ -0,0 +1,350 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
36
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
37
+ return new (P || (P = Promise))(function (resolve, reject) {
38
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
39
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
40
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
41
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
42
+ });
43
+ };
44
+ Object.defineProperty(exports, "__esModule", { value: true });
45
+ exports.WorkerOrchestrator = void 0;
46
+ const fs = __importStar(require("node:fs"));
47
+ const path = __importStar(require("node:path"));
48
+ const transformers_1 = require("@huggingface/transformers");
49
+ const ort = __importStar(require("onnxruntime-node"));
50
+ const uuid_1 = require("uuid");
51
+ const config_1 = require("../../config");
52
+ const chunker_1 = require("../index/chunker");
53
+ const skeleton_1 = require("../skeleton");
54
+ const file_utils_1 = require("../utils/file-utils");
55
+ const colbert_math_1 = require("./colbert-math");
56
+ const colbert_1 = require("./embeddings/colbert");
57
+ const granite_1 = require("./embeddings/granite");
58
+ const mlx_client_1 = require("./embeddings/mlx-client");
59
+ const CACHE_DIR = config_1.PATHS.models;
60
+ const LOG_MODELS = process.env.OSGREP_DEBUG_MODELS === "1" ||
61
+ process.env.OSGREP_DEBUG_MODELS === "true";
62
+ const log = (...args) => {
63
+ if (LOG_MODELS)
64
+ console.log(...args);
65
+ };
66
+ transformers_1.env.cacheDir = CACHE_DIR;
67
+ transformers_1.env.allowLocalModels = true;
68
+ transformers_1.env.allowRemoteModels = true;
69
+ const PROJECT_ROOT = process.env.OSGREP_PROJECT_ROOT
70
+ ? path.resolve(process.env.OSGREP_PROJECT_ROOT)
71
+ : process.cwd();
72
+ const LOCAL_MODELS = path.join(PROJECT_ROOT, "models");
73
+ if (fs.existsSync(LOCAL_MODELS)) {
74
+ transformers_1.env.localModelPath = LOCAL_MODELS;
75
+ log(`Worker: Using local models from ${LOCAL_MODELS}`);
76
+ }
77
+ class WorkerOrchestrator {
78
+ constructor() {
79
+ this.granite = new granite_1.GraniteModel();
80
+ this.colbert = new colbert_1.ColbertModel();
81
+ this.chunker = new chunker_1.TreeSitterChunker();
82
+ this.skeletonizer = new skeleton_1.Skeletonizer();
83
+ this.initPromise = null;
84
+ this.vectorDimensions = config_1.CONFIG.VECTOR_DIM;
85
+ }
86
+ ensureReady() {
87
+ return __awaiter(this, void 0, void 0, function* () {
88
+ if (this.granite.isReady() && this.colbert.isReady()) {
89
+ return;
90
+ }
91
+ if (this.initPromise)
92
+ return this.initPromise;
93
+ this.initPromise = (() => __awaiter(this, void 0, void 0, function* () {
94
+ yield Promise.all([
95
+ this.chunker.init(),
96
+ this.skeletonizer.init(),
97
+ this.granite.load(),
98
+ this.colbert.load(),
99
+ ]);
100
+ }))().finally(() => {
101
+ this.initPromise = null;
102
+ });
103
+ return this.initPromise;
104
+ });
105
+ }
106
+ computeHybrid(texts, onProgress) {
107
+ return __awaiter(this, void 0, void 0, function* () {
108
+ var _a, _b;
109
+ if (!texts.length)
110
+ return [];
111
+ yield this.ensureReady();
112
+ const results = [];
113
+ const envBatch = Number.parseInt((_a = process.env.OSGREP_WORKER_BATCH_SIZE) !== null && _a !== void 0 ? _a : "", 10);
114
+ const BATCH_SIZE = Number.isFinite(envBatch) && envBatch > 0
115
+ ? Math.max(4, Math.min(16, envBatch))
116
+ : 16;
117
+ for (let i = 0; i < texts.length; i += BATCH_SIZE) {
118
+ if (i > 0)
119
+ onProgress === null || onProgress === void 0 ? void 0 : onProgress();
120
+ const batchTexts = texts.slice(i, i + BATCH_SIZE);
121
+ // Try MLX GPU server first, fall back to ONNX CPU
122
+ const denseBatch = (_b = (yield (0, mlx_client_1.mlxEmbed)(batchTexts))) !== null && _b !== void 0 ? _b : (yield this.granite.runBatch(batchTexts));
123
+ const colbertBatch = yield this.colbert.runBatch(batchTexts, denseBatch, this.vectorDimensions);
124
+ results.push(...colbertBatch);
125
+ }
126
+ onProgress === null || onProgress === void 0 ? void 0 : onProgress();
127
+ return results;
128
+ });
129
+ }
130
+ chunkFile(pathname, content) {
131
+ return __awaiter(this, void 0, void 0, function* () {
132
+ yield this.ensureReady();
133
+ const { chunks: parsedChunks, metadata } = yield this.chunker.chunk(pathname, content);
134
+ const anchorChunk = (0, chunker_1.buildAnchorChunk)(pathname, content, metadata);
135
+ const baseChunks = anchorChunk
136
+ ? [anchorChunk, ...parsedChunks]
137
+ : parsedChunks;
138
+ return baseChunks.map((chunk, idx) => {
139
+ const chunkWithContext = chunk;
140
+ return Object.assign(Object.assign({}, chunkWithContext), { context: Array.isArray(chunkWithContext.context)
141
+ ? chunkWithContext.context
142
+ : [], chunkIndex: typeof chunkWithContext.chunkIndex === "number"
143
+ ? chunkWithContext.chunkIndex
144
+ : anchorChunk
145
+ ? idx - 1
146
+ : idx, isAnchor: chunkWithContext.isAnchor === true ||
147
+ (anchorChunk ? idx === 0 : false), imports: metadata.imports });
148
+ });
149
+ });
150
+ }
151
+ toPreparedChunks(path, hash, chunks, skeleton) {
152
+ var _a, _b;
153
+ const texts = chunks.map((chunk) => (0, chunker_1.formatChunkText)(chunk, path));
154
+ const prepared = [];
155
+ for (let i = 0; i < texts.length; i++) {
156
+ const chunk = chunks[i];
157
+ const { content, displayText } = texts[i];
158
+ const prev = (_a = texts[i - 1]) === null || _a === void 0 ? void 0 : _a.displayText;
159
+ const next = (_b = texts[i + 1]) === null || _b === void 0 ? void 0 : _b.displayText;
160
+ prepared.push({
161
+ id: (0, uuid_1.v4)(),
162
+ path,
163
+ hash,
164
+ content: content, // Now minimal
165
+ display_text: displayText, // Now rich
166
+ context_prev: typeof prev === "string" ? prev : undefined,
167
+ context_next: typeof next === "string" ? next : undefined,
168
+ start_line: chunk.startLine,
169
+ end_line: chunk.endLine,
170
+ chunk_index: chunk.chunkIndex,
171
+ is_anchor: chunk.isAnchor === true,
172
+ chunk_type: typeof chunk.type === "string" ? chunk.type : undefined,
173
+ complexity: chunk.complexity,
174
+ is_exported: chunk.isExported,
175
+ defined_symbols: chunk.definedSymbols,
176
+ referenced_symbols: chunk.referencedSymbols,
177
+ role: chunk.role,
178
+ parent_symbol: chunk.parentSymbol,
179
+ file_skeleton: chunk.isAnchor ? skeleton : undefined,
180
+ });
181
+ }
182
+ return prepared;
183
+ }
184
+ processFile(input, onProgress) {
185
+ return __awaiter(this, void 0, void 0, function* () {
186
+ const absolutePath = path.isAbsolute(input.path)
187
+ ? input.path
188
+ : input.absolutePath
189
+ ? input.absolutePath
190
+ : path.join(PROJECT_ROOT, input.path);
191
+ const { buffer, mtimeMs, size } = yield (0, file_utils_1.readFileSnapshot)(absolutePath);
192
+ const hash = (0, file_utils_1.computeBufferHash)(buffer);
193
+ if (!(0, file_utils_1.isIndexableFile)(absolutePath, size)) {
194
+ return { vectors: [], hash, mtimeMs, size, shouldDelete: true };
195
+ }
196
+ if (buffer.length === 0 || (0, file_utils_1.hasNullByte)(buffer)) {
197
+ return { vectors: [], hash, mtimeMs, size, shouldDelete: true };
198
+ }
199
+ onProgress === null || onProgress === void 0 ? void 0 : onProgress();
200
+ yield this.ensureReady();
201
+ onProgress === null || onProgress === void 0 ? void 0 : onProgress();
202
+ const content = buffer.toString("utf-8");
203
+ const chunksPromise = this.chunkFile(input.path, content);
204
+ // Generate skeleton in parallel
205
+ const skeletonPromise = this.skeletonizer.skeletonizeFile(input.path, content, {
206
+ includeSummary: true,
207
+ });
208
+ const [chunks, skeletonResult] = yield Promise.all([
209
+ chunksPromise,
210
+ skeletonPromise,
211
+ ]);
212
+ onProgress === null || onProgress === void 0 ? void 0 : onProgress();
213
+ if (!chunks.length)
214
+ return { vectors: [], hash, mtimeMs, size };
215
+ const preparedChunks = this.toPreparedChunks(input.path, hash, chunks, skeletonResult.success ? skeletonResult.skeleton : undefined);
216
+ const hybrids = yield this.computeHybrid(preparedChunks.map((chunk) => chunk.content), onProgress);
217
+ const vectors = preparedChunks.map((chunk, idx) => {
218
+ var _a;
219
+ const hybrid = (_a = hybrids[idx]) !== null && _a !== void 0 ? _a : {
220
+ dense: new Float32Array(),
221
+ colbert: new Int8Array(),
222
+ scale: 1,
223
+ };
224
+ return Object.assign(Object.assign({}, chunk), { vector: hybrid.dense, colbert: Buffer.from(hybrid.colbert), colbert_scale: hybrid.scale, pooled_colbert_48d: hybrid.pooled_colbert_48d, doc_token_ids: hybrid.token_ids });
225
+ });
226
+ onProgress === null || onProgress === void 0 ? void 0 : onProgress();
227
+ return { vectors, hash, mtimeMs, size };
228
+ });
229
+ }
230
+ encodeQuery(text) {
231
+ return __awaiter(this, void 0, void 0, function* () {
232
+ var _a;
233
+ yield this.ensureReady();
234
+ // Try MLX GPU server first, fall back to ONNX CPU
235
+ const mlxResult = yield (0, mlx_client_1.mlxEmbed)([text]);
236
+ const denseVector = (_a = mlxResult === null || mlxResult === void 0 ? void 0 : mlxResult[0]) !== null && _a !== void 0 ? _a : (yield this.granite.runBatch([text]))[0];
237
+ const encoded = yield this.colbert.encodeQuery(text);
238
+ const feeds = {
239
+ input_ids: new ort.Tensor("int64", encoded.input_ids, [
240
+ 1,
241
+ encoded.input_ids.length,
242
+ ]),
243
+ attention_mask: new ort.Tensor("int64", encoded.attention_mask, [
244
+ 1,
245
+ encoded.attention_mask.length,
246
+ ]),
247
+ };
248
+ const sessionOut = yield this.colbert.runSession(feeds);
249
+ const outputName = this.colbert.getOutputName();
250
+ const output = sessionOut[outputName];
251
+ if (!output) {
252
+ throw new Error("ColBERT session output missing embeddings tensor");
253
+ }
254
+ const data = output.data;
255
+ const [, seq, dim] = output.dims;
256
+ const matrix = [];
257
+ for (let s = 0; s < seq; s++) {
258
+ let sumSq = 0;
259
+ const offset = s * dim;
260
+ for (let d = 0; d < dim; d++) {
261
+ const val = data[offset + d];
262
+ sumSq += val * val;
263
+ }
264
+ const norm = Math.sqrt(sumSq);
265
+ const row = [];
266
+ if (norm > 1e-9) {
267
+ for (let d = 0; d < dim; d++) {
268
+ row.push(data[offset + d] / norm);
269
+ }
270
+ }
271
+ else {
272
+ for (let d = 0; d < dim; d++) {
273
+ row.push(data[offset + d]);
274
+ }
275
+ }
276
+ matrix.push(row);
277
+ }
278
+ // Compute pooled embedding (mean of tokens)
279
+ const pooled = new Float32Array(dim);
280
+ for (const row of matrix) {
281
+ for (let d = 0; d < dim; d++) {
282
+ pooled[d] += row[d];
283
+ }
284
+ }
285
+ // Normalize pooled
286
+ let sumSq = 0;
287
+ for (let d = 0; d < dim; d++) {
288
+ pooled[d] /= matrix.length || 1;
289
+ sumSq += pooled[d] * pooled[d];
290
+ }
291
+ const norm = Math.sqrt(sumSq);
292
+ if (norm > 1e-9) {
293
+ for (let d = 0; d < dim; d++) {
294
+ pooled[d] /= norm;
295
+ }
296
+ }
297
+ return {
298
+ dense: Array.from(denseVector !== null && denseVector !== void 0 ? denseVector : []),
299
+ colbert: matrix,
300
+ colbertDim: dim,
301
+ pooled_colbert_48d: Array.from(pooled),
302
+ };
303
+ });
304
+ }
305
+ rerank(input) {
306
+ return __awaiter(this, void 0, void 0, function* () {
307
+ yield this.ensureReady();
308
+ const queryMatrix = input.query.map((row) => row instanceof Float32Array ? row : new Float32Array(row));
309
+ return input.docs.map((doc) => {
310
+ const col = doc.colbert;
311
+ let colbert;
312
+ if (col instanceof Int8Array) {
313
+ colbert = col;
314
+ }
315
+ else if (Buffer.isBuffer(col)) {
316
+ colbert = new Int8Array(col.buffer, col.byteOffset, col.byteLength);
317
+ }
318
+ else if (col &&
319
+ typeof col === "object" &&
320
+ "type" in col &&
321
+ col.type === "Buffer" &&
322
+ Array.isArray(col.data)) {
323
+ // IPC serialization fallback (still copies, but unavoidable without SharedArrayBuffer)
324
+ colbert = new Int8Array(col.data);
325
+ }
326
+ else if (Array.isArray(col)) {
327
+ colbert = new Int8Array(col);
328
+ }
329
+ else {
330
+ colbert = new Int8Array(0);
331
+ }
332
+ const seqLen = Math.floor(colbert.length / input.colbertDim);
333
+ const docMatrix = [];
334
+ for (let i = 0; i < seqLen; i++) {
335
+ const start = i * input.colbertDim;
336
+ const row = new Float32Array(input.colbertDim);
337
+ for (let d = 0; d < input.colbertDim; d++) {
338
+ row[d] = (colbert[start + d] * doc.scale) / 127.0;
339
+ }
340
+ docMatrix.push(row);
341
+ }
342
+ const tokenIds = Array.isArray(doc.token_ids) && doc.token_ids.length === seqLen
343
+ ? doc.token_ids
344
+ : undefined;
345
+ return (0, colbert_math_1.maxSim)(queryMatrix, docMatrix, tokenIds);
346
+ });
347
+ });
348
+ }
349
+ }
350
+ exports.WorkerOrchestrator = WorkerOrchestrator;