grepmax 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +202 -0
- package/NOTICE +33 -0
- package/README.md +375 -0
- package/dist/commands/claude-code.js +60 -0
- package/dist/commands/codex.js +98 -0
- package/dist/commands/doctor.js +92 -0
- package/dist/commands/droid.js +189 -0
- package/dist/commands/index.js +125 -0
- package/dist/commands/list.js +120 -0
- package/dist/commands/mcp.js +567 -0
- package/dist/commands/opencode.js +199 -0
- package/dist/commands/search.js +539 -0
- package/dist/commands/serve.js +502 -0
- package/dist/commands/setup.js +160 -0
- package/dist/commands/skeleton.js +288 -0
- package/dist/commands/symbols.js +129 -0
- package/dist/commands/trace.js +50 -0
- package/dist/commands/verify.js +174 -0
- package/dist/config.js +120 -0
- package/dist/eval.js +618 -0
- package/dist/index.js +82 -0
- package/dist/lib/core/languages.js +237 -0
- package/dist/lib/graph/graph-builder.js +105 -0
- package/dist/lib/index/chunker.js +663 -0
- package/dist/lib/index/grammar-loader.js +110 -0
- package/dist/lib/index/ignore-patterns.js +63 -0
- package/dist/lib/index/index-config.js +86 -0
- package/dist/lib/index/sync-helpers.js +97 -0
- package/dist/lib/index/syncer.js +396 -0
- package/dist/lib/index/walker.js +164 -0
- package/dist/lib/index/watcher.js +245 -0
- package/dist/lib/output/formatter.js +161 -0
- package/dist/lib/output/json-formatter.js +6 -0
- package/dist/lib/search/intent.js +23 -0
- package/dist/lib/search/searcher.js +475 -0
- package/dist/lib/setup/model-loader.js +107 -0
- package/dist/lib/setup/setup-helpers.js +106 -0
- package/dist/lib/skeleton/body-fields.js +175 -0
- package/dist/lib/skeleton/index.js +24 -0
- package/dist/lib/skeleton/retriever.js +36 -0
- package/dist/lib/skeleton/skeletonizer.js +483 -0
- package/dist/lib/skeleton/summary-formatter.js +92 -0
- package/dist/lib/store/meta-cache.js +143 -0
- package/dist/lib/store/types.js +2 -0
- package/dist/lib/store/vector-db.js +340 -0
- package/dist/lib/utils/cleanup.js +33 -0
- package/dist/lib/utils/exit.js +38 -0
- package/dist/lib/utils/file-utils.js +131 -0
- package/dist/lib/utils/filter-builder.js +17 -0
- package/dist/lib/utils/formatter.js +230 -0
- package/dist/lib/utils/git.js +83 -0
- package/dist/lib/utils/lock.js +157 -0
- package/dist/lib/utils/project-root.js +107 -0
- package/dist/lib/utils/server-registry.js +97 -0
- package/dist/lib/workers/colbert-math.js +107 -0
- package/dist/lib/workers/colbert-tokenizer.js +113 -0
- package/dist/lib/workers/download-worker.js +169 -0
- package/dist/lib/workers/embeddings/colbert.js +213 -0
- package/dist/lib/workers/embeddings/granite.js +180 -0
- package/dist/lib/workers/embeddings/mlx-client.js +144 -0
- package/dist/lib/workers/orchestrator.js +350 -0
- package/dist/lib/workers/pool.js +373 -0
- package/dist/lib/workers/process-child.js +92 -0
- package/dist/lib/workers/worker.js +31 -0
- package/package.json +80 -0
- package/plugins/osgrep/.claude-plugin/plugin.json +20 -0
- package/plugins/osgrep/hooks/start.js +90 -0
- package/plugins/osgrep/hooks/stop.js +3 -0
- package/plugins/osgrep/hooks.json +26 -0
- package/plugins/osgrep/skills/osgrep/SKILL.md +82 -0
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
36
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
37
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
38
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
39
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
40
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
41
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
42
|
+
});
|
|
43
|
+
};
|
|
44
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
45
|
+
exports.ColbertModel = void 0;
|
|
46
|
+
const fs = __importStar(require("node:fs"));
|
|
47
|
+
const path = __importStar(require("node:path"));
|
|
48
|
+
const ort = __importStar(require("onnxruntime-node"));
|
|
49
|
+
const config_1 = require("../../../config");
|
|
50
|
+
const colbert_tokenizer_1 = require("../colbert-tokenizer");
|
|
51
|
+
const CACHE_DIR = config_1.PATHS.models;
|
|
52
|
+
const ONNX_THREADS = 1;
|
|
53
|
+
const LOG_MODELS = process.env.OSGREP_DEBUG_MODELS === "1" ||
|
|
54
|
+
process.env.OSGREP_DEBUG_MODELS === "true";
|
|
55
|
+
const log = (...args) => {
|
|
56
|
+
if (LOG_MODELS)
|
|
57
|
+
console.log(...args);
|
|
58
|
+
};
|
|
59
|
+
class ColbertModel {
|
|
60
|
+
constructor() {
|
|
61
|
+
this.session = null;
|
|
62
|
+
this.tokenizer = null;
|
|
63
|
+
}
|
|
64
|
+
load() {
|
|
65
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
66
|
+
if (this.session && this.tokenizer)
|
|
67
|
+
return;
|
|
68
|
+
this.tokenizer = new colbert_tokenizer_1.ColBERTTokenizer();
|
|
69
|
+
const basePath = path.join(CACHE_DIR, config_1.MODEL_IDS.colbert);
|
|
70
|
+
const onnxDir = path.join(basePath, "onnx");
|
|
71
|
+
const modelPath = path.join(onnxDir, "model_int8.onnx");
|
|
72
|
+
if (!fs.existsSync(modelPath)) {
|
|
73
|
+
throw new Error(`ColBERT ONNX model not found at ${modelPath}`);
|
|
74
|
+
}
|
|
75
|
+
yield this.tokenizer.init(basePath);
|
|
76
|
+
const sessionOptions = {
|
|
77
|
+
executionProviders: ["cpu"],
|
|
78
|
+
intraOpNumThreads: ONNX_THREADS,
|
|
79
|
+
interOpNumThreads: 1,
|
|
80
|
+
graphOptimizationLevel: "all",
|
|
81
|
+
};
|
|
82
|
+
log(`Worker: Loading ColBERT ONNX session from ${modelPath}`);
|
|
83
|
+
this.session = yield ort.InferenceSession.create(modelPath, sessionOptions);
|
|
84
|
+
if (!this.session) {
|
|
85
|
+
throw new Error(`ColBERT ONNX load failed; tried ${modelPath}`);
|
|
86
|
+
}
|
|
87
|
+
});
|
|
88
|
+
}
|
|
89
|
+
isReady() {
|
|
90
|
+
return !!(this.session && this.tokenizer);
|
|
91
|
+
}
|
|
92
|
+
runBatch(texts, denseVectors, vectorDimensions) {
|
|
93
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
94
|
+
var _a;
|
|
95
|
+
if (!this.session || !this.tokenizer)
|
|
96
|
+
return [];
|
|
97
|
+
const tokenizer = this.tokenizer;
|
|
98
|
+
const session = this.session;
|
|
99
|
+
const encodedBatch = yield Promise.all(texts.map((t) => tokenizer.encodeDoc(t)));
|
|
100
|
+
const maxLen = Math.max(...encodedBatch.map((e) => e.input_ids.length));
|
|
101
|
+
const batchInputIds = new BigInt64Array(texts.length * maxLen);
|
|
102
|
+
const batchAttentionMask = new BigInt64Array(texts.length * maxLen);
|
|
103
|
+
const padId = BigInt(50283);
|
|
104
|
+
for (let i = 0; i < encodedBatch.length; i++) {
|
|
105
|
+
const encoded = encodedBatch[i];
|
|
106
|
+
const offset = i * maxLen;
|
|
107
|
+
for (let j = 0; j < maxLen; j++) {
|
|
108
|
+
if (j < encoded.input_ids.length) {
|
|
109
|
+
batchInputIds[offset + j] = encoded.input_ids[j];
|
|
110
|
+
batchAttentionMask[offset + j] = encoded.attention_mask[j];
|
|
111
|
+
}
|
|
112
|
+
else {
|
|
113
|
+
batchInputIds[offset + j] = padId;
|
|
114
|
+
batchAttentionMask[offset + j] = BigInt(0);
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
const feeds = {
|
|
119
|
+
input_ids: new ort.Tensor("int64", batchInputIds, [texts.length, maxLen]),
|
|
120
|
+
attention_mask: new ort.Tensor("int64", batchAttentionMask, [
|
|
121
|
+
texts.length,
|
|
122
|
+
maxLen,
|
|
123
|
+
]),
|
|
124
|
+
};
|
|
125
|
+
const sessionOut = yield session.run(feeds);
|
|
126
|
+
const outputName = session.outputNames[0];
|
|
127
|
+
const output = sessionOut[outputName];
|
|
128
|
+
if (!output) {
|
|
129
|
+
throw new Error("ColBERT session output missing embeddings tensor");
|
|
130
|
+
}
|
|
131
|
+
const data = output.data;
|
|
132
|
+
const [batch, seq, dim] = output.dims;
|
|
133
|
+
const results = [];
|
|
134
|
+
for (let b = 0; b < batch; b++) {
|
|
135
|
+
const batchOffset = b * seq * dim;
|
|
136
|
+
const originalLen = encodedBatch[b].input_ids.length;
|
|
137
|
+
const normalized = new Float32Array(originalLen * dim);
|
|
138
|
+
let maxVal = 0;
|
|
139
|
+
for (let s = 0; s < originalLen; s++) {
|
|
140
|
+
const offset = batchOffset + s * dim;
|
|
141
|
+
let sumSq = 0;
|
|
142
|
+
for (let d = 0; d < dim; d++) {
|
|
143
|
+
const val = data[offset + d];
|
|
144
|
+
sumSq += val * val;
|
|
145
|
+
}
|
|
146
|
+
const norm = Math.sqrt(sumSq) || 1;
|
|
147
|
+
for (let d = 0; d < dim; d++) {
|
|
148
|
+
const val = data[offset + d] / norm;
|
|
149
|
+
const idx = s * dim + d;
|
|
150
|
+
normalized[idx] = val;
|
|
151
|
+
if (Math.abs(val) > maxVal)
|
|
152
|
+
maxVal = Math.abs(val);
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
if (maxVal === 0)
|
|
156
|
+
maxVal = 1;
|
|
157
|
+
const int8Array = new Int8Array(normalized.length);
|
|
158
|
+
for (let i = 0; i < normalized.length; i++) {
|
|
159
|
+
int8Array[i] = Math.max(-127, Math.min(127, Math.round((normalized[i] / maxVal) * 127)));
|
|
160
|
+
}
|
|
161
|
+
const pooled = new Float32Array(dim);
|
|
162
|
+
const tokenCount = Math.max(1, originalLen);
|
|
163
|
+
for (let s = 0; s < originalLen; s++) {
|
|
164
|
+
const tokenOffset = s * dim;
|
|
165
|
+
for (let d = 0; d < dim; d++) {
|
|
166
|
+
pooled[d] += normalized[tokenOffset + d];
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
let pooledNorm = 0;
|
|
170
|
+
for (let d = 0; d < dim; d++) {
|
|
171
|
+
pooled[d] /= tokenCount;
|
|
172
|
+
pooledNorm += pooled[d] * pooled[d];
|
|
173
|
+
}
|
|
174
|
+
pooledNorm = Math.sqrt(pooledNorm) || 1;
|
|
175
|
+
for (let d = 0; d < dim; d++) {
|
|
176
|
+
pooled[d] /= pooledNorm;
|
|
177
|
+
}
|
|
178
|
+
results.push({
|
|
179
|
+
dense: (_a = denseVectors[b]) !== null && _a !== void 0 ? _a : new Float32Array(vectorDimensions).fill(0),
|
|
180
|
+
colbert: int8Array,
|
|
181
|
+
scale: maxVal,
|
|
182
|
+
pooled_colbert_48d: pooled,
|
|
183
|
+
token_ids: Array.from(encodedBatch[b].input_ids, (v) => Number(v)),
|
|
184
|
+
});
|
|
185
|
+
}
|
|
186
|
+
return results;
|
|
187
|
+
});
|
|
188
|
+
}
|
|
189
|
+
encodeQuery(text) {
|
|
190
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
191
|
+
if (!this.tokenizer)
|
|
192
|
+
throw new Error("ColBERT tokenizer not initialized");
|
|
193
|
+
const encoded = yield this.tokenizer.encodeQuery(text);
|
|
194
|
+
return {
|
|
195
|
+
input_ids: new BigInt64Array(encoded.input_ids),
|
|
196
|
+
attention_mask: new BigInt64Array(encoded.attention_mask),
|
|
197
|
+
};
|
|
198
|
+
});
|
|
199
|
+
}
|
|
200
|
+
runSession(feeds) {
|
|
201
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
202
|
+
if (!this.session)
|
|
203
|
+
throw new Error("ColBERT session not initialized");
|
|
204
|
+
return this.session.run(feeds);
|
|
205
|
+
});
|
|
206
|
+
}
|
|
207
|
+
getOutputName() {
|
|
208
|
+
if (!this.session)
|
|
209
|
+
throw new Error("ColBERT session not initialized");
|
|
210
|
+
return this.session.outputNames[0];
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
exports.ColbertModel = ColbertModel;
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
36
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
37
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
38
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
39
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
40
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
41
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
42
|
+
});
|
|
43
|
+
};
|
|
44
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
45
|
+
exports.GraniteModel = void 0;
|
|
46
|
+
const fs = __importStar(require("node:fs"));
|
|
47
|
+
const path = __importStar(require("node:path"));
|
|
48
|
+
const transformers_1 = require("@huggingface/transformers");
|
|
49
|
+
const ort = __importStar(require("onnxruntime-node"));
|
|
50
|
+
const config_1 = require("../../../config");
|
|
51
|
+
const CACHE_DIR = config_1.PATHS.models;
|
|
52
|
+
const ONNX_THREADS = 1;
|
|
53
|
+
const LOG_MODELS = process.env.OSGREP_DEBUG_MODELS === "1" ||
|
|
54
|
+
process.env.OSGREP_DEBUG_MODELS === "true";
|
|
55
|
+
const log = (...args) => {
|
|
56
|
+
if (LOG_MODELS)
|
|
57
|
+
console.log(...args);
|
|
58
|
+
};
|
|
59
|
+
class GraniteModel {
|
|
60
|
+
constructor() {
|
|
61
|
+
this.session = null;
|
|
62
|
+
this.tokenizer = null;
|
|
63
|
+
this.vectorDimensions = config_1.CONFIG.VECTOR_DIM;
|
|
64
|
+
}
|
|
65
|
+
resolvePaths() {
|
|
66
|
+
const basePath = path.join(CACHE_DIR, config_1.MODEL_IDS.embed);
|
|
67
|
+
const onnxDir = path.join(basePath, "onnx");
|
|
68
|
+
const candidates = ["model_q4.onnx", "model.onnx"];
|
|
69
|
+
for (const candidate of candidates) {
|
|
70
|
+
const candidatePath = path.join(onnxDir, candidate);
|
|
71
|
+
if (fs.existsSync(candidatePath)) {
|
|
72
|
+
return { modelPath: candidatePath, tokenizerPath: basePath };
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
throw new Error(`Granite ONNX model not found. Looked for ${candidates.join(", ")} in ${onnxDir}`);
|
|
76
|
+
}
|
|
77
|
+
load() {
|
|
78
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
79
|
+
if (this.session && this.tokenizer)
|
|
80
|
+
return;
|
|
81
|
+
const { modelPath, tokenizerPath } = this.resolvePaths();
|
|
82
|
+
log(`Worker: Loading Granite ONNX session from ${modelPath}`);
|
|
83
|
+
this.tokenizer = yield transformers_1.AutoTokenizer.from_pretrained(tokenizerPath);
|
|
84
|
+
const sessionOptions = {
|
|
85
|
+
executionProviders: ["cpu"],
|
|
86
|
+
intraOpNumThreads: ONNX_THREADS,
|
|
87
|
+
interOpNumThreads: 1,
|
|
88
|
+
graphOptimizationLevel: "all",
|
|
89
|
+
};
|
|
90
|
+
this.session = yield ort.InferenceSession.create(modelPath, sessionOptions);
|
|
91
|
+
});
|
|
92
|
+
}
|
|
93
|
+
isReady() {
|
|
94
|
+
return !!(this.session && this.tokenizer);
|
|
95
|
+
}
|
|
96
|
+
meanPool(hidden, attention, batch, seq, hiddenDim, targetDim) {
|
|
97
|
+
const vectors = [];
|
|
98
|
+
const seqFromMask = attention.length / Math.max(1, batch);
|
|
99
|
+
const usableSeq = Math.min(seq, seqFromMask);
|
|
100
|
+
const dim = Math.min(hiddenDim, targetDim);
|
|
101
|
+
for (let b = 0; b < batch; b++) {
|
|
102
|
+
const sum = new Float32Array(dim);
|
|
103
|
+
let count = 0;
|
|
104
|
+
const attOffset = b * seqFromMask;
|
|
105
|
+
const hiddenOffset = b * seq * hiddenDim;
|
|
106
|
+
for (let s = 0; s < usableSeq; s++) {
|
|
107
|
+
if (attention[attOffset + s] > 0) {
|
|
108
|
+
count++;
|
|
109
|
+
const tokenOffset = hiddenOffset + s * hiddenDim;
|
|
110
|
+
for (let d = 0; d < dim; d++) {
|
|
111
|
+
sum[d] += hidden[tokenOffset + d];
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
if (count === 0)
|
|
116
|
+
count = 1;
|
|
117
|
+
let norm = 0;
|
|
118
|
+
for (let d = 0; d < dim; d++) {
|
|
119
|
+
sum[d] /= count;
|
|
120
|
+
norm += sum[d] * sum[d];
|
|
121
|
+
}
|
|
122
|
+
norm = Math.sqrt(norm) || 1;
|
|
123
|
+
for (let d = 0; d < dim; d++) {
|
|
124
|
+
sum[d] /= norm;
|
|
125
|
+
}
|
|
126
|
+
if (dim < targetDim) {
|
|
127
|
+
const padded = new Float32Array(targetDim);
|
|
128
|
+
padded.set(sum);
|
|
129
|
+
vectors.push(padded);
|
|
130
|
+
}
|
|
131
|
+
else {
|
|
132
|
+
vectors.push(sum);
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
return vectors;
|
|
136
|
+
}
|
|
137
|
+
runBatch(texts) {
|
|
138
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
139
|
+
var _a, _b, _c;
|
|
140
|
+
if (!this.session || !this.tokenizer)
|
|
141
|
+
return [];
|
|
142
|
+
const encoded = yield this.tokenizer(texts, {
|
|
143
|
+
padding: true,
|
|
144
|
+
truncation: true,
|
|
145
|
+
max_length: 256,
|
|
146
|
+
});
|
|
147
|
+
const inputTensor = encoded.input_ids;
|
|
148
|
+
const attentionTensor = encoded.attention_mask;
|
|
149
|
+
const inputIds = inputTensor.data;
|
|
150
|
+
const attentionMask = attentionTensor.data;
|
|
151
|
+
const seqLen = (_b = (_a = inputTensor.dims) === null || _a === void 0 ? void 0 : _a[1]) !== null && _b !== void 0 ? _b : Math.max(1, Math.floor(inputIds.length / texts.length));
|
|
152
|
+
const tokenTypeIdsRaw = encoded.token_type_ids;
|
|
153
|
+
const tokenTypeIds = tokenTypeIdsRaw &&
|
|
154
|
+
tokenTypeIdsRaw.data.length === inputIds.length &&
|
|
155
|
+
tokenTypeIdsRaw.data.length === attentionMask.length
|
|
156
|
+
? tokenTypeIdsRaw.data
|
|
157
|
+
: new BigInt64Array(inputIds.length).fill(BigInt(0));
|
|
158
|
+
const feeds = {
|
|
159
|
+
input_ids: new ort.Tensor("int64", inputIds, [texts.length, seqLen]),
|
|
160
|
+
attention_mask: new ort.Tensor("int64", attentionMask, [
|
|
161
|
+
texts.length,
|
|
162
|
+
seqLen,
|
|
163
|
+
]),
|
|
164
|
+
token_type_ids: new ort.Tensor("int64", tokenTypeIds, [
|
|
165
|
+
texts.length,
|
|
166
|
+
seqLen,
|
|
167
|
+
]),
|
|
168
|
+
};
|
|
169
|
+
const sessionOut = yield this.session.run(feeds);
|
|
170
|
+
const hidden = (_c = sessionOut.last_hidden_state) !== null && _c !== void 0 ? _c : sessionOut[this.session.outputNames[0]];
|
|
171
|
+
if (!hidden) {
|
|
172
|
+
throw new Error("Granite ONNX output missing last_hidden_state");
|
|
173
|
+
}
|
|
174
|
+
const hiddenData = hidden.data;
|
|
175
|
+
const [batch, seq, dim] = hidden.dims;
|
|
176
|
+
return this.meanPool(hiddenData, attentionMask, batch, seq, dim, this.vectorDimensions);
|
|
177
|
+
});
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
exports.GraniteModel = GraniteModel;
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* MLX embedding server HTTP client.
|
|
4
|
+
* Tries the local MLX GPU server for dense embeddings.
|
|
5
|
+
* Returns null if the server isn't running — caller falls back to ONNX.
|
|
6
|
+
*/
|
|
7
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
8
|
+
if (k2 === undefined) k2 = k;
|
|
9
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
10
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
11
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
12
|
+
}
|
|
13
|
+
Object.defineProperty(o, k2, desc);
|
|
14
|
+
}) : (function(o, m, k, k2) {
|
|
15
|
+
if (k2 === undefined) k2 = k;
|
|
16
|
+
o[k2] = m[k];
|
|
17
|
+
}));
|
|
18
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
19
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
20
|
+
}) : function(o, v) {
|
|
21
|
+
o["default"] = v;
|
|
22
|
+
});
|
|
23
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
24
|
+
var ownKeys = function(o) {
|
|
25
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
26
|
+
var ar = [];
|
|
27
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
28
|
+
return ar;
|
|
29
|
+
};
|
|
30
|
+
return ownKeys(o);
|
|
31
|
+
};
|
|
32
|
+
return function (mod) {
|
|
33
|
+
if (mod && mod.__esModule) return mod;
|
|
34
|
+
var result = {};
|
|
35
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
36
|
+
__setModuleDefault(result, mod);
|
|
37
|
+
return result;
|
|
38
|
+
};
|
|
39
|
+
})();
|
|
40
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
41
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
42
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
43
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
44
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
45
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
46
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
47
|
+
});
|
|
48
|
+
};
|
|
49
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
50
|
+
exports.mlxEmbed = mlxEmbed;
|
|
51
|
+
exports.resetMlxCache = resetMlxCache;
|
|
52
|
+
const http = __importStar(require("node:http"));
|
|
53
|
+
const MLX_PORT = parseInt(process.env.MLX_EMBED_PORT || "8100", 10);
|
|
54
|
+
const MLX_HOST = "127.0.0.1";
|
|
55
|
+
const MLX_TIMEOUT_MS = 10000;
|
|
56
|
+
const EMBED_MODE = process.env.OSGREP_EMBED_MODE || "auto";
|
|
57
|
+
let mlxAvailable = null;
|
|
58
|
+
let lastCheck = 0;
|
|
59
|
+
const CHECK_INTERVAL_MS = 30000;
|
|
60
|
+
function postJSON(path, body) {
|
|
61
|
+
return new Promise((resolve) => {
|
|
62
|
+
const payload = JSON.stringify(body);
|
|
63
|
+
const req = http.request({
|
|
64
|
+
hostname: MLX_HOST,
|
|
65
|
+
port: MLX_PORT,
|
|
66
|
+
path,
|
|
67
|
+
method: "POST",
|
|
68
|
+
headers: {
|
|
69
|
+
"Content-Type": "application/json",
|
|
70
|
+
"Content-Length": Buffer.byteLength(payload),
|
|
71
|
+
},
|
|
72
|
+
timeout: MLX_TIMEOUT_MS,
|
|
73
|
+
}, (res) => {
|
|
74
|
+
const chunks = [];
|
|
75
|
+
res.on("data", (chunk) => chunks.push(chunk));
|
|
76
|
+
res.on("end", () => {
|
|
77
|
+
try {
|
|
78
|
+
const data = JSON.parse(Buffer.concat(chunks).toString("utf-8"));
|
|
79
|
+
resolve({ ok: res.statusCode === 200, data });
|
|
80
|
+
}
|
|
81
|
+
catch (_a) {
|
|
82
|
+
resolve({ ok: false });
|
|
83
|
+
}
|
|
84
|
+
});
|
|
85
|
+
});
|
|
86
|
+
req.on("error", () => resolve({ ok: false }));
|
|
87
|
+
req.on("timeout", () => {
|
|
88
|
+
req.destroy();
|
|
89
|
+
resolve({ ok: false });
|
|
90
|
+
});
|
|
91
|
+
req.write(payload);
|
|
92
|
+
req.end();
|
|
93
|
+
});
|
|
94
|
+
}
|
|
95
|
+
/**
|
|
96
|
+
* Check if MLX server is reachable. Caches result for CHECK_INTERVAL_MS.
|
|
97
|
+
*/
|
|
98
|
+
function isMlxUp() {
|
|
99
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
100
|
+
const now = Date.now();
|
|
101
|
+
if (mlxAvailable !== null && now - lastCheck < CHECK_INTERVAL_MS) {
|
|
102
|
+
return mlxAvailable;
|
|
103
|
+
}
|
|
104
|
+
const result = yield new Promise((resolve) => {
|
|
105
|
+
const req = http.get({ hostname: MLX_HOST, port: MLX_PORT, path: "/health", timeout: 2000 }, (res) => {
|
|
106
|
+
res.resume();
|
|
107
|
+
resolve(res.statusCode === 200);
|
|
108
|
+
});
|
|
109
|
+
req.on("error", () => resolve(false));
|
|
110
|
+
req.on("timeout", () => {
|
|
111
|
+
req.destroy();
|
|
112
|
+
resolve(false);
|
|
113
|
+
});
|
|
114
|
+
});
|
|
115
|
+
mlxAvailable = result;
|
|
116
|
+
lastCheck = now;
|
|
117
|
+
return result;
|
|
118
|
+
});
|
|
119
|
+
}
|
|
120
|
+
/**
|
|
121
|
+
* Get dense embeddings from MLX server.
|
|
122
|
+
* Returns Float32Array[] on success, null if server unavailable.
|
|
123
|
+
*/
|
|
124
|
+
function mlxEmbed(texts) {
|
|
125
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
126
|
+
if (EMBED_MODE === "cpu")
|
|
127
|
+
return null;
|
|
128
|
+
if (!(yield isMlxUp()))
|
|
129
|
+
return null;
|
|
130
|
+
const { ok, data } = yield postJSON("/embed", { texts });
|
|
131
|
+
if (!ok || !(data === null || data === void 0 ? void 0 : data.vectors)) {
|
|
132
|
+
mlxAvailable = false;
|
|
133
|
+
return null;
|
|
134
|
+
}
|
|
135
|
+
return data.vectors.map((v) => new Float32Array(v));
|
|
136
|
+
});
|
|
137
|
+
}
|
|
138
|
+
/**
|
|
139
|
+
* Reset availability cache (e.g., after starting the server).
|
|
140
|
+
*/
|
|
141
|
+
function resetMlxCache() {
|
|
142
|
+
mlxAvailable = null;
|
|
143
|
+
lastCheck = 0;
|
|
144
|
+
}
|