@lucas-bur/pix 0.5.3 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.mjs +329 -150
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -54,8 +54,8 @@ var IndexProject = class extends Effect.Service()("IndexProject", {
|
|
|
54
54
|
".js",
|
|
55
55
|
".jsx"
|
|
56
56
|
];
|
|
57
|
-
const
|
|
58
|
-
const allChunks = (yield* Effect.forEach(files, (file) => chunker.chunkFile(file), { concurrency:
|
|
57
|
+
const scanResult = yield* scanner.scanFiles(extensions);
|
|
58
|
+
const allChunks = (yield* Effect.forEach(scanResult.files, (file) => chunker.chunkFile(file), { concurrency: Math.max(1, config.chunkConcurrency ?? 8) })).flat();
|
|
59
59
|
const totalChunks = allChunks.length;
|
|
60
60
|
const totalFiles = new Set(allChunks.map((c) => c.file)).size;
|
|
61
61
|
const totalLines = allChunks.reduce((sum, c) => sum + (c.endLine - c.startLine + 1), 0);
|
|
@@ -97,6 +97,7 @@ const DEFAULT_CONFIG = {
|
|
|
97
97
|
dims: 384,
|
|
98
98
|
chunkLines: 60,
|
|
99
99
|
overlapLines: 10,
|
|
100
|
+
chunkConcurrency: 8,
|
|
100
101
|
files: {}
|
|
101
102
|
};
|
|
102
103
|
//#endregion
|
|
@@ -141,9 +142,21 @@ var ResetIndex = class extends Effect.Service()("ResetIndex", {
|
|
|
141
142
|
}) {};
|
|
142
143
|
//#endregion
|
|
143
144
|
//#region src/lib/error-format.ts
|
|
145
|
+
/**
|
|
146
|
+
* Maps Data.TaggedError _tag values to JSON error codes for structured output. Used by formatError
|
|
147
|
+
* to produce the spec-mandated `{ error: true, code, message }` format.
|
|
148
|
+
*/
|
|
144
149
|
const errorCodes = {
|
|
145
|
-
ConfigError: "
|
|
146
|
-
|
|
150
|
+
ConfigError: "CONFIG_ERROR",
|
|
151
|
+
ConfigNotFoundError: "CONFIG_NOT_FOUND",
|
|
152
|
+
ConfigMalformedError: "CONFIG_MALFORMED",
|
|
153
|
+
NoIndexError: "NO_INDEX",
|
|
154
|
+
DiskFullError: "DISK_FULL",
|
|
155
|
+
StoreError: "STORE_ERROR",
|
|
156
|
+
ChunkerError: "CHUNK_ERROR",
|
|
157
|
+
ModelLoadError: "MODEL_LOAD_ERROR",
|
|
158
|
+
InferenceError: "INFERENCE_ERROR",
|
|
159
|
+
ScanFailed: "SCAN_FAILED"
|
|
147
160
|
};
|
|
148
161
|
const messageFromError = (error) => {
|
|
149
162
|
if (typeof error === "string") return error;
|
|
@@ -154,32 +167,39 @@ const codeFromError = (error) => {
|
|
|
154
167
|
if (error && typeof error === "object" && "_tag" in error) return errorCodes[String(error._tag)] ?? "UNKNOWN";
|
|
155
168
|
return "UNKNOWN";
|
|
156
169
|
};
|
|
170
|
+
/** Format an error as spec-mandated JSON: `{ error: true, code: "...", message: "..." }`. */
|
|
157
171
|
const formatError = (error) => JSON.stringify({
|
|
158
172
|
error: true,
|
|
159
173
|
code: codeFromError(error),
|
|
160
174
|
message: messageFromError(error)
|
|
161
175
|
});
|
|
176
|
+
/** Log the error as JSON to stdout, then re-fail to preserve non-zero exit code. */
|
|
177
|
+
const reportError = (error) => Console.log(formatError(error)).pipe(Effect.flatMap(() => Effect.fail(error)));
|
|
162
178
|
//#endregion
|
|
163
179
|
//#region src/commands/index-cmd.ts
|
|
180
|
+
const logFlagWarnings = (force, verbose, json) => {
|
|
181
|
+
if (json) return Effect.void;
|
|
182
|
+
const warnings = [force ? "--force is currently not implemented and only a placeholder." : void 0, verbose ? "--verbose is currently not implemented and only a placeholder." : void 0].filter((msg) => msg !== void 0);
|
|
183
|
+
return Effect.forEach(warnings, (msg) => Effect.logInfo(msg), { discard: true });
|
|
184
|
+
};
|
|
185
|
+
const logHumanOutput = (chunks, files, duration) => Effect.logInfo(`Indexed ${chunks} chunks from ${files} files in ${duration}.`);
|
|
164
186
|
/** CLI command: pix index [--force] [--verbose] [--json] */
|
|
165
187
|
const indexCommand = Command.make("index", {
|
|
166
188
|
force: Options.boolean("force").pipe(Options.withDefault(false)),
|
|
167
189
|
verbose: Options.boolean("verbose").pipe(Options.withDefault(false)),
|
|
168
190
|
json: Options.boolean("json").pipe(Options.withDefault(false))
|
|
169
191
|
}, ({ force, verbose, json }) => Effect.gen(function* () {
|
|
170
|
-
|
|
171
|
-
if (verbose && !json) yield* Effect.logInfo("--verbose is currently not implemented and only a placeholder.");
|
|
192
|
+
yield* logFlagWarnings(force, verbose, json);
|
|
172
193
|
const startTime = Date.now();
|
|
173
|
-
const result = yield* IndexProject.index()
|
|
174
|
-
if (result._tag === "Left") return yield* Effect.fail(result.left);
|
|
194
|
+
const result = yield* IndexProject.index();
|
|
175
195
|
const duration = `${((Date.now() - startTime) / 1e3).toFixed(1)}s`;
|
|
176
196
|
if (json) return yield* Console.log(JSON.stringify({
|
|
177
|
-
chunks: result.
|
|
178
|
-
files: result.
|
|
197
|
+
chunks: result.status.chunks,
|
|
198
|
+
files: result.status.files,
|
|
179
199
|
duration
|
|
180
200
|
}));
|
|
181
|
-
yield*
|
|
182
|
-
}).pipe(Effect.
|
|
201
|
+
yield* logHumanOutput(result.status.chunks, result.status.files, duration);
|
|
202
|
+
}).pipe(Effect.catchAll(reportError)));
|
|
183
203
|
//#endregion
|
|
184
204
|
//#region src/commands/init.ts
|
|
185
205
|
/** CLI command: pix init [--json] */
|
|
@@ -188,7 +208,10 @@ const initCommand = Command.make("init", { json: Options.boolean("json").pipe(Op
|
|
|
188
208
|
if (json) return yield* Console.log(JSON.stringify(result, null, 2));
|
|
189
209
|
yield* Effect.logInfo("Created .pix/config.json with default settings.");
|
|
190
210
|
yield* Effect.logInfo("Reminder: Add `.pix` to your `.gitignore` file to avoid committing the index.");
|
|
191
|
-
}).pipe(Effect.
|
|
211
|
+
}).pipe(Effect.catchTags({
|
|
212
|
+
ConfigError: reportError,
|
|
213
|
+
DiskFullError: reportError
|
|
214
|
+
})));
|
|
192
215
|
//#endregion
|
|
193
216
|
//#region src/commands/query.ts
|
|
194
217
|
const DEFAULT_TOP_K = 5;
|
|
@@ -216,6 +239,25 @@ const formatResult = (result) => {
|
|
|
216
239
|
const contextAfter = result.contextAfter ? `\n${result.contextAfter}` : "";
|
|
217
240
|
return `${result.file}:${result.startLine}-${result.endLine} (score: ${result.score.toFixed(3)})${contextBefore}\n${result.text}${contextAfter}`;
|
|
218
241
|
};
|
|
242
|
+
const toJsonOutput = (results, ctxLines) => results.map((r) => ({
|
|
243
|
+
score: r.score,
|
|
244
|
+
file: r.file,
|
|
245
|
+
startLine: r.startLine,
|
|
246
|
+
endLine: r.endLine,
|
|
247
|
+
text: r.text,
|
|
248
|
+
...ctxLines > 0 && r.contextBefore && { contextBefore: r.contextBefore },
|
|
249
|
+
...ctxLines > 0 && r.contextAfter && { contextAfter: r.contextAfter }
|
|
250
|
+
}));
|
|
251
|
+
const renderResults = (results) => Effect.gen(function* () {
|
|
252
|
+
if (results.length === 0) {
|
|
253
|
+
yield* Effect.logInfo("No results found");
|
|
254
|
+
return;
|
|
255
|
+
}
|
|
256
|
+
for (const result of results) {
|
|
257
|
+
yield* Console.log(formatResult(result));
|
|
258
|
+
yield* Console.log("---");
|
|
259
|
+
}
|
|
260
|
+
});
|
|
219
261
|
/** CLI command: pix query "<text>" [--top N] [--json] [--context-lines N] */
|
|
220
262
|
const queryCommand = Command.make("query", {
|
|
221
263
|
queryText: Args.text({ name: "query" }),
|
|
@@ -226,29 +268,17 @@ const queryCommand = Command.make("query", {
|
|
|
226
268
|
const topK = Option.getOrElse(top, () => DEFAULT_TOP_K);
|
|
227
269
|
const ctxLines = Option.getOrElse(contextLines, () => DEFAULT_CONTEXT_LINES);
|
|
228
270
|
const clamped = clampTopK(topK);
|
|
229
|
-
if (clamped.clamped) yield* Effect.logDebug(`topK clamped from ${topK} to ${clamped.value}`);
|
|
271
|
+
if (clamped.clamped && !json) yield* Effect.logDebug(`topK clamped from ${topK} to ${clamped.value}`);
|
|
230
272
|
const results = yield* QueryProject.queryProject(queryText, clamped.value);
|
|
231
|
-
if (json)
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
}));
|
|
241
|
-
return yield* Console.log(JSON.stringify(output, null, 2));
|
|
242
|
-
}
|
|
243
|
-
if (results.length === 0) {
|
|
244
|
-
yield* Effect.logInfo("No results found");
|
|
245
|
-
return;
|
|
246
|
-
}
|
|
247
|
-
for (const result of results) {
|
|
248
|
-
yield* Console.log(formatResult(result));
|
|
249
|
-
yield* Console.log("---");
|
|
250
|
-
}
|
|
251
|
-
}).pipe(Effect.tapError((error) => Console.log(formatError(error)))));
|
|
273
|
+
if (json) return yield* Console.log(JSON.stringify(toJsonOutput(results, ctxLines), null, 2));
|
|
274
|
+
yield* renderResults(results);
|
|
275
|
+
}).pipe(Effect.catchTags({
|
|
276
|
+
ModelLoadError: reportError,
|
|
277
|
+
InferenceError: reportError,
|
|
278
|
+
DiskFullError: reportError,
|
|
279
|
+
StoreError: reportError,
|
|
280
|
+
NoIndexError: reportError
|
|
281
|
+
})));
|
|
252
282
|
//#endregion
|
|
253
283
|
//#region src/lib/format.ts
|
|
254
284
|
/** Format byte count as human-readable string (e.g. "1.5 MB") */
|
|
@@ -265,29 +295,34 @@ const formatBytes = (bytes) => {
|
|
|
265
295
|
};
|
|
266
296
|
//#endregion
|
|
267
297
|
//#region src/commands/reset.ts
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
elapsedMs
|
|
279
|
-
}));
|
|
280
|
-
if (!result.deletedChunks && !result.deletedVectors) {
|
|
298
|
+
const logJsonResult = (result, elapsedMs) => Console.log(JSON.stringify({
|
|
299
|
+
status: "ok",
|
|
300
|
+
deletedChunks: result.deletedChunks,
|
|
301
|
+
deletedVectors: result.deletedVectors,
|
|
302
|
+
freedBytes: result.freedBytes,
|
|
303
|
+
elapsedMs
|
|
304
|
+
}));
|
|
305
|
+
const logHumanResult = (result, elapsedMs) => Effect.gen(function* () {
|
|
306
|
+
const deletedParts = [result.deletedChunks ? "chunks.jsonl" : null, result.deletedVectors ? "vectors.bin" : null].filter((part) => part !== null);
|
|
307
|
+
if (deletedParts.length === 0) {
|
|
281
308
|
yield* Effect.logInfo("Nothing to reset.");
|
|
282
309
|
return;
|
|
283
310
|
}
|
|
284
|
-
|
|
285
|
-
if (result.deletedChunks) parts.push("chunks.jsonl");
|
|
286
|
-
if (result.deletedVectors) parts.push("vectors.bin");
|
|
287
|
-
yield* Effect.logInfo(`Deleted: ${parts.join(", ")}`);
|
|
311
|
+
yield* Effect.logInfo(`Deleted: ${deletedParts.join(", ")}`);
|
|
288
312
|
yield* Effect.logInfo(`Freed: ${formatBytes(result.freedBytes)}`);
|
|
289
313
|
yield* Effect.logInfo(`Time: ${elapsedMs}ms`);
|
|
290
|
-
})
|
|
314
|
+
});
|
|
315
|
+
/** CLI command: pix reset [--json] */
|
|
316
|
+
const resetCommand = Command.make("reset", { json: Options.boolean("json").pipe(Options.withDefault(false)) }, ({ json }) => Effect.gen(function* () {
|
|
317
|
+
const start = yield* Clock.currentTimeMillis;
|
|
318
|
+
const result = yield* ResetIndex.reset();
|
|
319
|
+
const elapsedMs = (yield* Clock.currentTimeMillis) - start;
|
|
320
|
+
if (json) return yield* logJsonResult(result, elapsedMs);
|
|
321
|
+
yield* logHumanResult(result, elapsedMs);
|
|
322
|
+
}).pipe(Effect.catchTags({
|
|
323
|
+
DiskFullError: reportError,
|
|
324
|
+
StoreError: reportError
|
|
325
|
+
})));
|
|
291
326
|
//#endregion
|
|
292
327
|
//#region src/commands/status.ts
|
|
293
328
|
/** CLI command: pix status [--json] */
|
|
@@ -300,7 +335,7 @@ const statusCommand = Command.make("status", { json: Options.boolean("json").pip
|
|
|
300
335
|
yield* Effect.logInfo(`Total lines: ${result.totalLines.toLocaleString()}`);
|
|
301
336
|
yield* Effect.logInfo(`Index size: ${formatBytes(result.byteSize)}`);
|
|
302
337
|
yield* Effect.logInfo(`Last indexed: ${lastIndexStr}`);
|
|
303
|
-
}).pipe(Effect.
|
|
338
|
+
}).pipe(Effect.catchTags({ StoreError: reportError })));
|
|
304
339
|
//#endregion
|
|
305
340
|
//#region src/cli.ts
|
|
306
341
|
const VERSION = createRequire(import.meta.url)("../package.json").version;
|
|
@@ -319,36 +354,67 @@ const cli = (args) => Command.run(pix, {
|
|
|
319
354
|
version: VERSION
|
|
320
355
|
})(args).pipe(Effect.provide(CliConfig.layer({ showTypes: false })));
|
|
321
356
|
//#endregion
|
|
357
|
+
//#region src/domain/errors.ts
|
|
358
|
+
/** Config file or directory does not exist. Run pix init first. */
|
|
359
|
+
var ConfigNotFoundError = class extends Data.TaggedError("ConfigNotFoundError") {};
|
|
360
|
+
/** Config file exists but contains invalid JSON. */
|
|
361
|
+
var ConfigMalformedError = class extends Data.TaggedError("ConfigMalformedError") {};
|
|
362
|
+
/** Index files (chunks.jsonl, vectors.bin) do not exist. Run pix index first. */
|
|
363
|
+
var NoIndexError = class extends Data.TaggedError("NoIndexError") {};
|
|
364
|
+
/** Disk is full — write operation could not complete. */
|
|
365
|
+
var DiskFullError = class extends Data.TaggedError("DiskFullError") {};
|
|
366
|
+
/** Generic index store I/O failure (read, write, delete). */
|
|
367
|
+
var StoreError = class extends Data.TaggedError("StoreError") {};
|
|
368
|
+
/** Source file could not be read during chunking (binary, permissions, encoding). */
|
|
369
|
+
var ChunkerError = class extends Data.TaggedError("ChunkerError") {};
|
|
370
|
+
/** Embedding model could not be downloaded or loaded. */
|
|
371
|
+
var ModelLoadError = class extends Data.TaggedError("ModelLoadError") {};
|
|
372
|
+
/** Embedding model failed during inference. */
|
|
373
|
+
var InferenceError = class extends Data.TaggedError("InferenceError") {};
|
|
374
|
+
/**
|
|
375
|
+
* Fatal scan failure — gitignore loading failed entirely. Non-fatal per-entry skips are reported
|
|
376
|
+
* via ScanResult.skipped.
|
|
377
|
+
*/
|
|
378
|
+
var ScanFailed = class extends Data.TaggedError("ScanFailed") {};
|
|
379
|
+
//#endregion
|
|
322
380
|
//#region src/services/chunker.ts
|
|
323
381
|
const MIN_CHUNK_CHARS = 20;
|
|
382
|
+
const readFileContent = (fs, file) => fs.readFileString(file).pipe(Effect.mapError((cause) => new ChunkerError({
|
|
383
|
+
message: "Could not read source file for chunking",
|
|
384
|
+
file,
|
|
385
|
+
cause
|
|
386
|
+
})));
|
|
387
|
+
const buildChunks = (file, content, config) => {
|
|
388
|
+
const lines = content.split("\n");
|
|
389
|
+
const chunks = [];
|
|
390
|
+
let idx = 0;
|
|
391
|
+
let startLine = 1;
|
|
392
|
+
while (startLine <= lines.length) {
|
|
393
|
+
const endLine = Math.min(startLine + config.chunkLines - 1, lines.length);
|
|
394
|
+
const text = lines.slice(startLine - 1, endLine).join("\n");
|
|
395
|
+
if (text.length >= MIN_CHUNK_CHARS) {
|
|
396
|
+
const id = crypto.createHash("sha1").update(`${file}:${startLine}`).digest("hex").slice(0, 12);
|
|
397
|
+
chunks.push({
|
|
398
|
+
id,
|
|
399
|
+
idx,
|
|
400
|
+
file,
|
|
401
|
+
startLine,
|
|
402
|
+
endLine,
|
|
403
|
+
text
|
|
404
|
+
});
|
|
405
|
+
idx++;
|
|
406
|
+
}
|
|
407
|
+
startLine += config.chunkLines - config.overlapLines;
|
|
408
|
+
}
|
|
409
|
+
return chunks;
|
|
410
|
+
};
|
|
324
411
|
const make$4 = Effect.gen(function* () {
|
|
325
412
|
const fs = yield* FileSystem.FileSystem;
|
|
326
413
|
const config = yield* (yield* ConfigStore).readConfig().pipe(Effect.catchAll(() => Effect.succeed(DEFAULT_CONFIG)));
|
|
327
414
|
const chunkFile = (file) => Effect.gen(function* () {
|
|
328
|
-
const content = yield* fs
|
|
415
|
+
const content = yield* readFileContent(fs, file);
|
|
329
416
|
if (content === "") return [];
|
|
330
|
-
|
|
331
|
-
const chunks = [];
|
|
332
|
-
let idx = 0;
|
|
333
|
-
let startLine = 1;
|
|
334
|
-
while (startLine <= lines.length) {
|
|
335
|
-
const endLine = Math.min(startLine + config.chunkLines - 1, lines.length);
|
|
336
|
-
const text = lines.slice(startLine - 1, endLine).join("\n");
|
|
337
|
-
if (text.length >= MIN_CHUNK_CHARS) {
|
|
338
|
-
const id = crypto.createHash("sha1").update(`${file}:${startLine}`).digest("hex").slice(0, 12);
|
|
339
|
-
chunks.push({
|
|
340
|
-
id,
|
|
341
|
-
idx,
|
|
342
|
-
file,
|
|
343
|
-
startLine,
|
|
344
|
-
endLine,
|
|
345
|
-
text
|
|
346
|
-
});
|
|
347
|
-
idx++;
|
|
348
|
-
}
|
|
349
|
-
startLine += config.chunkLines - config.overlapLines;
|
|
350
|
-
}
|
|
351
|
-
return chunks;
|
|
417
|
+
return buildChunks(file, content, config);
|
|
352
418
|
});
|
|
353
419
|
return { chunkFile };
|
|
354
420
|
});
|
|
@@ -357,23 +423,46 @@ const ChunkerLive = Layer.effect(Chunker, make$4);
|
|
|
357
423
|
//#region src/services/config-store.ts
|
|
358
424
|
const CONFIG_DIR = ".pix";
|
|
359
425
|
const CONFIG_PATH = `${CONFIG_DIR}/config.json`;
|
|
426
|
+
const isPlatformReason$1 = (cause, reason) => typeof cause === "object" && cause !== null && "reason" in cause && String(cause.reason) === reason;
|
|
427
|
+
const mapConfigWriteError = (cause, path, action) => {
|
|
428
|
+
if (isPlatformReason$1(cause, "BadResource")) return new DiskFullError({
|
|
429
|
+
message: `Disk full: could not ${action}`,
|
|
430
|
+
path,
|
|
431
|
+
cause
|
|
432
|
+
});
|
|
433
|
+
return new ConfigError({
|
|
434
|
+
message: `Failed to ${action}`,
|
|
435
|
+
cause
|
|
436
|
+
});
|
|
437
|
+
};
|
|
360
438
|
const make$3 = Effect.gen(function* () {
|
|
361
439
|
const fs = yield* FileSystem.FileSystem;
|
|
362
440
|
const writeConfig = (config) => Effect.gen(function* () {
|
|
363
441
|
const configJson = JSON.stringify(config, null, 2);
|
|
364
|
-
yield* fs.makeDirectory(CONFIG_DIR, { recursive: true });
|
|
365
|
-
yield* fs.writeFileString(CONFIG_PATH, configJson);
|
|
366
|
-
})
|
|
367
|
-
message: "Failed to write config.json",
|
|
368
|
-
cause
|
|
369
|
-
})));
|
|
442
|
+
yield* fs.makeDirectory(CONFIG_DIR, { recursive: true }).pipe(Effect.mapError((cause) => mapConfigWriteError(cause, CONFIG_DIR, "create .pix directory")));
|
|
443
|
+
yield* fs.writeFileString(CONFIG_PATH, configJson).pipe(Effect.mapError((cause) => mapConfigWriteError(cause, CONFIG_PATH, "write config.json")));
|
|
444
|
+
});
|
|
370
445
|
const readConfig = () => Effect.gen(function* () {
|
|
371
|
-
const content = yield* fs.readFileString(CONFIG_PATH)
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
446
|
+
const content = yield* fs.readFileString(CONFIG_PATH).pipe(Effect.mapError((cause) => {
|
|
447
|
+
if (isPlatformReason$1(cause, "NotFound")) return new ConfigNotFoundError({
|
|
448
|
+
message: "Config file not found. Run pix init first.",
|
|
449
|
+
path: CONFIG_PATH,
|
|
450
|
+
cause
|
|
451
|
+
});
|
|
452
|
+
return new ConfigError({
|
|
453
|
+
message: "Failed to read config.json",
|
|
454
|
+
cause
|
|
455
|
+
});
|
|
456
|
+
}));
|
|
457
|
+
return yield* Effect.try({
|
|
458
|
+
try: () => JSON.parse(content),
|
|
459
|
+
catch: (error) => new ConfigMalformedError({
|
|
460
|
+
message: "Invalid JSON in config.json",
|
|
461
|
+
path: CONFIG_PATH,
|
|
462
|
+
cause: error
|
|
463
|
+
})
|
|
464
|
+
});
|
|
465
|
+
});
|
|
377
466
|
const configExists = () => Effect.gen(function* () {
|
|
378
467
|
return yield* fs.exists(CONFIG_PATH);
|
|
379
468
|
}).pipe(Effect.catchAll(() => Effect.succeed(false)));
|
|
@@ -407,21 +496,25 @@ const make$2 = Effect.gen(function* () {
|
|
|
407
496
|
device: "cpu",
|
|
408
497
|
dtype: "q8"
|
|
409
498
|
});
|
|
410
|
-
}))
|
|
499
|
+
}).pipe(Effect.mapError((cause) => new ModelLoadError({
|
|
500
|
+
message: "Failed to load embedding model",
|
|
501
|
+
model: MODEL_NAME,
|
|
502
|
+
cause
|
|
503
|
+
}))));
|
|
411
504
|
const embed = (text) => Effect.gen(function* () {
|
|
412
505
|
const extractor = yield* getExtractor;
|
|
413
506
|
const data = (yield* Effect.tryPromise(() => extractor(text, {
|
|
414
507
|
pooling: "mean",
|
|
415
508
|
normalize: false
|
|
416
|
-
})))
|
|
509
|
+
})).pipe(Effect.mapError((cause) => new InferenceError({
|
|
510
|
+
message: "Embedding inference failed",
|
|
511
|
+
cause
|
|
512
|
+
})))).data;
|
|
417
513
|
return {
|
|
418
514
|
vector: normalize(data),
|
|
419
515
|
dims: DIMS
|
|
420
516
|
};
|
|
421
|
-
})
|
|
422
|
-
vector: new Float32Array(DIMS),
|
|
423
|
-
dims: DIMS
|
|
424
|
-
})));
|
|
517
|
+
});
|
|
425
518
|
const batch = (texts) => Effect.gen(function* () {
|
|
426
519
|
const extractor = yield* getExtractor;
|
|
427
520
|
const results = [];
|
|
@@ -430,7 +523,10 @@ const make$2 = Effect.gen(function* () {
|
|
|
430
523
|
const tensor = yield* Effect.tryPromise(() => extractor(slice, {
|
|
431
524
|
pooling: "mean",
|
|
432
525
|
normalize: false
|
|
433
|
-
}))
|
|
526
|
+
})).pipe(Effect.mapError((cause) => new InferenceError({
|
|
527
|
+
message: "Batch embedding inference failed",
|
|
528
|
+
cause
|
|
529
|
+
})));
|
|
434
530
|
const data = tensor.data;
|
|
435
531
|
const n = tensor.dims[0];
|
|
436
532
|
for (let j = 0; j < n; j++) {
|
|
@@ -442,7 +538,7 @@ const make$2 = Effect.gen(function* () {
|
|
|
442
538
|
vector,
|
|
443
539
|
dims: DIMS
|
|
444
540
|
}));
|
|
445
|
-
})
|
|
541
|
+
});
|
|
446
542
|
return {
|
|
447
543
|
embed,
|
|
448
544
|
batch
|
|
@@ -461,43 +557,96 @@ const ALWAYS_IGNORE = new Set([
|
|
|
461
557
|
]);
|
|
462
558
|
const make$1 = Effect.gen(function* () {
|
|
463
559
|
const fs = yield* FileSystem.FileSystem;
|
|
560
|
+
const readFileWithSkip = (path, mkReason) => fs.readFileString(path).pipe(Effect.map((content) => ({
|
|
561
|
+
content,
|
|
562
|
+
skipped: null
|
|
563
|
+
})), Effect.catchAll((error) => Effect.succeed({
|
|
564
|
+
content: "",
|
|
565
|
+
skipped: {
|
|
566
|
+
path,
|
|
567
|
+
reason: mkReason(error)
|
|
568
|
+
}
|
|
569
|
+
})));
|
|
570
|
+
const readDirectoryWithSkip = (dir) => fs.readDirectory(dir).pipe(Effect.map((entries) => ({
|
|
571
|
+
entries,
|
|
572
|
+
skipped: null
|
|
573
|
+
})), Effect.catchAll((error) => Effect.succeed({
|
|
574
|
+
entries: [],
|
|
575
|
+
skipped: {
|
|
576
|
+
path: dir,
|
|
577
|
+
reason: `Could not read directory: ${String(error)}`
|
|
578
|
+
}
|
|
579
|
+
})));
|
|
580
|
+
const statWithSkip = (fullPath) => fs.stat(fullPath).pipe(Effect.map((info) => ({
|
|
581
|
+
info,
|
|
582
|
+
skipped: null
|
|
583
|
+
})), Effect.catchAll((error) => Effect.succeed({
|
|
584
|
+
info: null,
|
|
585
|
+
skipped: {
|
|
586
|
+
path: fullPath,
|
|
587
|
+
reason: `Could not stat: ${String(error)}`
|
|
588
|
+
}
|
|
589
|
+
})));
|
|
464
590
|
const loadGitignoreRules = Effect.gen(function* () {
|
|
465
591
|
const ig = ignore();
|
|
466
592
|
const cwd = process.cwd();
|
|
467
|
-
const
|
|
468
|
-
|
|
593
|
+
const skipped = [];
|
|
594
|
+
const rootContent = yield* readFileWithSkip(`${cwd}/.gitignore`, (error) => `Could not read gitignore: ${String(error)}`);
|
|
595
|
+
if (rootContent.skipped) skipped.push(rootContent.skipped);
|
|
596
|
+
if (rootContent.content.trim()) ig.add(rootContent.content.split("\n"));
|
|
469
597
|
const excludePath = `${cwd}/.git/info/exclude`;
|
|
470
598
|
if (yield* fs.exists(excludePath)) {
|
|
471
|
-
const excludeContent = yield*
|
|
472
|
-
if (excludeContent.
|
|
599
|
+
const excludeContent = yield* readFileWithSkip(excludePath, (error) => `Could not read exclude file: ${String(error)}`);
|
|
600
|
+
if (excludeContent.skipped) skipped.push(excludeContent.skipped);
|
|
601
|
+
if (excludeContent.content.trim()) ig.add(excludeContent.content.split("\n"));
|
|
473
602
|
}
|
|
474
|
-
return
|
|
475
|
-
|
|
603
|
+
return {
|
|
604
|
+
ig,
|
|
605
|
+
skipped
|
|
606
|
+
};
|
|
607
|
+
});
|
|
476
608
|
const walk = (dir, extensions) => Effect.gen(function* () {
|
|
477
|
-
const
|
|
478
|
-
let
|
|
479
|
-
|
|
609
|
+
const result = yield* readDirectoryWithSkip(dir);
|
|
610
|
+
let files = [];
|
|
611
|
+
const skipped = [];
|
|
612
|
+
if (result.skipped) skipped.push(result.skipped);
|
|
613
|
+
for (const entry of result.entries) {
|
|
480
614
|
if (ALWAYS_IGNORE.has(entry)) continue;
|
|
481
615
|
const fullPath = `${dir}/${entry}`;
|
|
482
|
-
const info = yield*
|
|
483
|
-
if (
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
616
|
+
const info = yield* statWithSkip(fullPath);
|
|
617
|
+
if (info.skipped) {
|
|
618
|
+
skipped.push(info.skipped);
|
|
619
|
+
continue;
|
|
620
|
+
}
|
|
621
|
+
if (!info.info) continue;
|
|
622
|
+
if (info.info.type === "Directory") {
|
|
623
|
+
const sub = yield* walk(fullPath, extensions);
|
|
624
|
+
files.push(...sub.files);
|
|
625
|
+
skipped.push(...sub.skipped);
|
|
626
|
+
} else if (info.info.type === "File") {
|
|
488
627
|
const dotIndex = entry.lastIndexOf(".");
|
|
489
628
|
if (dotIndex === -1) continue;
|
|
490
629
|
const ext = entry.slice(dotIndex);
|
|
491
|
-
if (extensions.has(ext))
|
|
630
|
+
if (extensions.has(ext)) files.push(fullPath);
|
|
492
631
|
}
|
|
493
632
|
}
|
|
494
|
-
return
|
|
633
|
+
return {
|
|
634
|
+
files,
|
|
635
|
+
skipped
|
|
636
|
+
};
|
|
495
637
|
});
|
|
496
638
|
const scanFiles = (extensions) => Effect.gen(function* () {
|
|
497
|
-
const ig = yield* loadGitignoreRules
|
|
639
|
+
const { ig, skipped: ignoreSkipped } = yield* loadGitignoreRules.pipe(Effect.mapError((cause) => new ScanFailed({
|
|
640
|
+
message: `Failed to load gitignore rules: ${String(cause)}`,
|
|
641
|
+
cause
|
|
642
|
+
})));
|
|
498
643
|
const cwd = process.cwd();
|
|
499
|
-
const
|
|
500
|
-
|
|
644
|
+
const { files: paths, skipped: walkSkipped } = yield* walk(cwd, new Set(extensions));
|
|
645
|
+
const relativePaths = paths.map((p) => p.startsWith(cwd) ? p.slice(cwd.length + 1) : p);
|
|
646
|
+
return {
|
|
647
|
+
files: ig.filter(relativePaths).map((p) => `${cwd}/${p}`),
|
|
648
|
+
skipped: [...ignoreSkipped, ...walkSkipped]
|
|
649
|
+
};
|
|
501
650
|
});
|
|
502
651
|
return { scanFiles };
|
|
503
652
|
});
|
|
@@ -507,6 +656,7 @@ const ScannerLive = Layer.effect(Scanner, make$1);
|
|
|
507
656
|
const STORE_DIR = ".pix";
|
|
508
657
|
const CHUNKS_FILE = `${STORE_DIR}/chunks.jsonl`;
|
|
509
658
|
const VECTORS_FILE = `${STORE_DIR}/vectors.bin`;
|
|
659
|
+
const isPlatformReason = (cause, reason) => typeof cause === "object" && cause !== null && "reason" in cause && String(cause.reason) === reason;
|
|
510
660
|
/**
|
|
511
661
|
* FileSystem adapter for VectorStore port. Reads from chunks.jsonl and vectors.bin to provide index
|
|
512
662
|
* statistics.
|
|
@@ -533,8 +683,50 @@ const make = Effect.gen(function* () {
|
|
|
533
683
|
} catch {}
|
|
534
684
|
return files;
|
|
535
685
|
};
|
|
686
|
+
const toStoreError = (operation, path) => (cause) => {
|
|
687
|
+
if (isPlatformReason(cause, "BadResource")) return new DiskFullError({
|
|
688
|
+
message: `Disk full during ${operation}`,
|
|
689
|
+
path,
|
|
690
|
+
cause
|
|
691
|
+
});
|
|
692
|
+
return new StoreError({
|
|
693
|
+
message: `Failed to ${operation}`,
|
|
694
|
+
path,
|
|
695
|
+
cause
|
|
696
|
+
});
|
|
697
|
+
};
|
|
698
|
+
const toReadError = (operation, path) => (cause) => new StoreError({
|
|
699
|
+
message: `Failed to ${operation}`,
|
|
700
|
+
path,
|
|
701
|
+
cause
|
|
702
|
+
});
|
|
703
|
+
/** Wrap any fs Effect so failures become StoreError | DiskFullError. */
|
|
704
|
+
const withStoreError = (op, operation, path) => op.pipe(Effect.mapError(toStoreError(operation, path)));
|
|
705
|
+
/** Wrap any fs Effect so failures become StoreError (read-only). */
|
|
706
|
+
const withReadError = (op, operation, path) => op.pipe(Effect.mapError(toReadError(operation, path)));
|
|
707
|
+
/** Ensure a directory exists, creating it recursively if absent. */
|
|
708
|
+
const ensureDirExists = (dir, description = dir) => Effect.gen(function* () {
|
|
709
|
+
if (!(yield* withStoreError(fs.exists(dir), `check ${description}`))) yield* withStoreError(fs.makeDirectory(dir, { recursive: true }), `create ${description}`);
|
|
710
|
+
});
|
|
711
|
+
/**
|
|
712
|
+
* Remove a file if it exists, accumulating freed bytes. Returns the number of freed bytes (0 if
|
|
713
|
+
* the file was absent).
|
|
714
|
+
*/
|
|
715
|
+
const removeIfExists = (file, description) => Effect.gen(function* () {
|
|
716
|
+
if (!(yield* withStoreError(fs.exists(file), `check ${description}`))) return {
|
|
717
|
+
freed: 0,
|
|
718
|
+
deleted: false
|
|
719
|
+
};
|
|
720
|
+
const stat = yield* withStoreError(fs.stat(file), `stat ${description}`, file);
|
|
721
|
+
const freed = stat && "size" in stat ? Number(stat.size) : 0;
|
|
722
|
+
yield* withStoreError(fs.remove(file), `delete ${description}`, file);
|
|
723
|
+
return {
|
|
724
|
+
freed,
|
|
725
|
+
deleted: true
|
|
726
|
+
};
|
|
727
|
+
});
|
|
536
728
|
const store = (chunks, embeddings) => Effect.gen(function* () {
|
|
537
|
-
|
|
729
|
+
yield* ensureDirExists(STORE_DIR, ".pix directory");
|
|
538
730
|
const chunksTemp = `${CHUNKS_FILE}.tmp`;
|
|
539
731
|
const chunksLines = chunks.map((c) => JSON.stringify({
|
|
540
732
|
id: c.id,
|
|
@@ -544,23 +736,23 @@ const make = Effect.gen(function* () {
|
|
|
544
736
|
endLine: c.endLine,
|
|
545
737
|
text: c.text
|
|
546
738
|
}));
|
|
547
|
-
yield* fs.writeFileString(chunksTemp, chunksLines.join("\n"));
|
|
548
|
-
yield* fs.rename(chunksTemp, CHUNKS_FILE);
|
|
739
|
+
yield* withStoreError(fs.writeFileString(chunksTemp, chunksLines.join("\n")), "write chunks", chunksTemp);
|
|
740
|
+
yield* withStoreError(fs.rename(chunksTemp, CHUNKS_FILE), "commit chunks", CHUNKS_FILE);
|
|
549
741
|
const vectorsTemp = `${VECTORS_FILE}.tmp`;
|
|
550
742
|
const dims = embeddings[0]?.dims ?? 384;
|
|
551
743
|
const totalFloats = embeddings.length * dims;
|
|
552
744
|
const vectorsArray = new Float32Array(totalFloats);
|
|
553
745
|
for (let i = 0; i < embeddings.length; i++) vectorsArray.set(embeddings[i].vector, i * dims);
|
|
554
746
|
const buffer = Buffer.from(vectorsArray.buffer);
|
|
555
|
-
yield* fs.writeFile(vectorsTemp, buffer);
|
|
556
|
-
yield* fs.rename(vectorsTemp, VECTORS_FILE);
|
|
747
|
+
yield* withStoreError(fs.writeFile(vectorsTemp, buffer), "write vectors", vectorsTemp);
|
|
748
|
+
yield* withStoreError(fs.rename(vectorsTemp, VECTORS_FILE), "commit vectors", VECTORS_FILE);
|
|
557
749
|
});
|
|
558
750
|
const search = (query, topK) => Effect.gen(function* () {
|
|
559
|
-
const chunksExists = yield* fs.exists(CHUNKS_FILE);
|
|
560
|
-
const vectorsExists = yield* fs.exists(VECTORS_FILE);
|
|
561
|
-
if (!chunksExists || !vectorsExists) return
|
|
562
|
-
const chunkLines = (yield* fs.readFileString(CHUNKS_FILE)).split("\n").filter((l) => l.trim().length > 0);
|
|
563
|
-
const vectorsBuffer = yield* fs.readFile(VECTORS_FILE);
|
|
751
|
+
const chunksExists = yield* withReadError(fs.exists(CHUNKS_FILE), "check chunks file");
|
|
752
|
+
const vectorsExists = yield* withReadError(fs.exists(VECTORS_FILE), "check vectors file");
|
|
753
|
+
if (!chunksExists || !vectorsExists) return yield* new NoIndexError({ message: "No index found. Run pix index first." });
|
|
754
|
+
const chunkLines = (yield* withReadError(fs.readFileString(CHUNKS_FILE), "read chunks", CHUNKS_FILE)).split("\n").filter((l) => l.trim().length > 0);
|
|
755
|
+
const vectorsBuffer = yield* withReadError(fs.readFile(VECTORS_FILE), "read vectors", VECTORS_FILE);
|
|
564
756
|
const vectors = new Float32Array(vectorsBuffer.buffer);
|
|
565
757
|
const results = [];
|
|
566
758
|
for (let i = 0; i < chunkLines.length; i++) try {
|
|
@@ -583,8 +775,8 @@ const make = Effect.gen(function* () {
|
|
|
583
775
|
return results.slice(0, topK);
|
|
584
776
|
});
|
|
585
777
|
const getStatus = () => Effect.gen(function* () {
|
|
586
|
-
const chunksExists = yield* fs.exists(CHUNKS_FILE);
|
|
587
|
-
const vectorsExists = yield* fs.exists(VECTORS_FILE);
|
|
778
|
+
const chunksExists = yield* withReadError(fs.exists(CHUNKS_FILE), "check chunks file");
|
|
779
|
+
const vectorsExists = yield* withReadError(fs.exists(VECTORS_FILE), "check vectors file");
|
|
588
780
|
if (!chunksExists || !vectorsExists) return {
|
|
589
781
|
chunks: 0,
|
|
590
782
|
files: 0,
|
|
@@ -593,13 +785,13 @@ const make = Effect.gen(function* () {
|
|
|
593
785
|
totalLines: 0,
|
|
594
786
|
byteSize: 0
|
|
595
787
|
};
|
|
596
|
-
const lines = (yield* fs.readFileString(CHUNKS_FILE)
|
|
788
|
+
const lines = (yield* withReadError(fs.readFileString(CHUNKS_FILE), "read chunks", CHUNKS_FILE)).split("\n").filter((l) => l.trim().length > 0);
|
|
597
789
|
const chunks = lines.length;
|
|
598
790
|
const files = countUniqueFiles(lines).size;
|
|
599
791
|
const model = "";
|
|
600
792
|
const totalLines = countTotalLines(lines);
|
|
601
|
-
const vectorsStat = yield* fs.stat(VECTORS_FILE)
|
|
602
|
-
const byteSize =
|
|
793
|
+
const vectorsStat = yield* withReadError(fs.stat(VECTORS_FILE), "stat vectors", VECTORS_FILE);
|
|
794
|
+
const byteSize = "size" in vectorsStat ? Number(vectorsStat.size) : 0;
|
|
603
795
|
return {
|
|
604
796
|
chunks,
|
|
605
797
|
files,
|
|
@@ -610,25 +802,12 @@ const make = Effect.gen(function* () {
|
|
|
610
802
|
};
|
|
611
803
|
});
|
|
612
804
|
const reset = () => Effect.gen(function* () {
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
let freedBytes = 0;
|
|
616
|
-
if (yield* fs.exists(CHUNKS_FILE)) {
|
|
617
|
-
const stat = yield* fs.stat(CHUNKS_FILE);
|
|
618
|
-
freedBytes += stat && "size" in stat ? Number(stat.size) : 0;
|
|
619
|
-
yield* fs.remove(CHUNKS_FILE);
|
|
620
|
-
deletedChunks = true;
|
|
621
|
-
}
|
|
622
|
-
if (yield* fs.exists(VECTORS_FILE)) {
|
|
623
|
-
const stat = yield* fs.stat(VECTORS_FILE);
|
|
624
|
-
freedBytes += stat && "size" in stat ? Number(stat.size) : 0;
|
|
625
|
-
yield* fs.remove(VECTORS_FILE);
|
|
626
|
-
deletedVectors = true;
|
|
627
|
-
}
|
|
805
|
+
const chunks = yield* removeIfExists(CHUNKS_FILE, "chunks");
|
|
806
|
+
const vectors = yield* removeIfExists(VECTORS_FILE, "vectors");
|
|
628
807
|
return {
|
|
629
|
-
deletedChunks,
|
|
630
|
-
deletedVectors,
|
|
631
|
-
freedBytes
|
|
808
|
+
deletedChunks: chunks.deleted,
|
|
809
|
+
deletedVectors: vectors.deleted,
|
|
810
|
+
freedBytes: chunks.freed + vectors.freed
|
|
632
811
|
};
|
|
633
812
|
});
|
|
634
813
|
return {
|