@lucas-bur/pix 0.6.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.mjs +414 -168
  2. package/package.json +1 -1
package/dist/index.mjs CHANGED
@@ -23,7 +23,7 @@ var GetStatus = class extends Effect.Service()("GetStatus", {
23
23
  const configStore = yield* ConfigStore;
24
24
  const getStatus = () => Effect.gen(function* () {
25
25
  const status = yield* store.getStatus();
26
- const configModel = yield* configStore.readConfig().pipe(Effect.map((c) => c.model), Effect.catchAll(() => Effect.succeed(status.model)));
26
+ const configModel = yield* configStore.readConfig().pipe(Effect.map((c) => c.embedder.model), Effect.catchAll(() => Effect.succeed(status.model)));
27
27
  return {
28
28
  ...status,
29
29
  model: configModel
@@ -54,8 +54,8 @@ var IndexProject = class extends Effect.Service()("IndexProject", {
54
54
  ".js",
55
55
  ".jsx"
56
56
  ];
57
- const files = yield* scanner.scanFiles(extensions);
58
- const allChunks = (yield* Effect.forEach(files, (file) => chunker.chunkFile(file), { concurrency: "unbounded" })).flat();
57
+ const scanResult = yield* scanner.scanFiles(extensions);
58
+ const allChunks = (yield* Effect.forEach(scanResult.files, (file) => chunker.chunkFile(file), { concurrency: Math.max(1, config.chunkConcurrency ?? 8) })).flat();
59
59
  const totalChunks = allChunks.length;
60
60
  const totalFiles = new Set(allChunks.map((c) => c.file)).size;
61
61
  const totalLines = allChunks.reduce((sum, c) => sum + (c.endLine - c.startLine + 1), 0);
@@ -93,11 +93,15 @@ var IndexProject = class extends Effect.Service()("IndexProject", {
93
93
  var ConfigError = class extends Data.TaggedError("ConfigError") {};
94
94
  const DEFAULT_CONFIG = {
95
95
  schema: "1",
96
- model: "Xenova/all-MiniLM-L6-v2",
97
- dims: 384,
98
96
  chunkLines: 60,
99
97
  overlapLines: 10,
100
- files: {}
98
+ chunkConcurrency: 8,
99
+ files: {},
100
+ embedder: {
101
+ model: "Xenova/all-MiniLM-L6-v2",
102
+ device: "auto",
103
+ dtype: "fp32"
104
+ }
101
105
  };
102
106
  //#endregion
103
107
  //#region src/application/init-project.ts
@@ -141,9 +145,21 @@ var ResetIndex = class extends Effect.Service()("ResetIndex", {
141
145
  }) {};
142
146
  //#endregion
143
147
  //#region src/lib/error-format.ts
148
+ /**
149
+ * Maps Data.TaggedError _tag values to JSON error codes for structured output. Used by formatError
150
+ * to produce the spec-mandated `{ error: true, code, message }` format.
151
+ */
144
152
  const errorCodes = {
145
- ConfigError: "CONFIG_MISSING",
146
- PlatformError: "PLATFORM_ERROR"
153
+ ConfigError: "CONFIG_ERROR",
154
+ ConfigNotFoundError: "CONFIG_NOT_FOUND",
155
+ ConfigMalformedError: "CONFIG_MALFORMED",
156
+ NoIndexError: "NO_INDEX",
157
+ DiskFullError: "DISK_FULL",
158
+ StoreError: "STORE_ERROR",
159
+ ChunkerError: "CHUNK_ERROR",
160
+ ModelLoadError: "MODEL_LOAD_ERROR",
161
+ InferenceError: "INFERENCE_ERROR",
162
+ ScanFailed: "SCAN_FAILED"
147
163
  };
148
164
  const messageFromError = (error) => {
149
165
  if (typeof error === "string") return error;
@@ -154,32 +170,45 @@ const codeFromError = (error) => {
154
170
  if (error && typeof error === "object" && "_tag" in error) return errorCodes[String(error._tag)] ?? "UNKNOWN";
155
171
  return "UNKNOWN";
156
172
  };
173
+ const causeFromError = (error) => {
174
+ if (typeof error === "string") return error;
175
+ if (error && typeof error === "object" && "cause" in error) return String(error.cause);
176
+ return "Unknown cause";
177
+ };
178
+ /** Format an error as spec-mandated JSON: `{ error: true, code: "...", message: "..." }`. */
157
179
  const formatError = (error) => JSON.stringify({
158
180
  error: true,
159
181
  code: codeFromError(error),
160
- message: messageFromError(error)
182
+ message: messageFromError(error),
183
+ cause: causeFromError(error)
161
184
  });
185
+ /** Log the error as JSON to stdout, then re-fail to preserve non-zero exit code. */
186
+ const reportError = (error) => Console.log(formatError(error)).pipe(Effect.flatMap(() => Effect.fail(error)));
162
187
  //#endregion
163
188
  //#region src/commands/index-cmd.ts
189
+ const logFlagWarnings = (force, verbose, json) => {
190
+ if (json) return Effect.void;
191
+ const warnings = [force ? "--force is currently not implemented and only a placeholder." : void 0, verbose ? "--verbose is currently not implemented and only a placeholder." : void 0].filter((msg) => msg !== void 0);
192
+ return Effect.forEach(warnings, (msg) => Effect.logInfo(msg), { discard: true });
193
+ };
194
+ const logHumanOutput = (chunks, files, duration) => Effect.logInfo(`Indexed ${chunks} chunks from ${files} files in ${duration}.`);
164
195
  /** CLI command: pix index [--force] [--verbose] [--json] */
165
196
  const indexCommand = Command.make("index", {
166
197
  force: Options.boolean("force").pipe(Options.withDefault(false)),
167
198
  verbose: Options.boolean("verbose").pipe(Options.withDefault(false)),
168
199
  json: Options.boolean("json").pipe(Options.withDefault(false))
169
200
  }, ({ force, verbose, json }) => Effect.gen(function* () {
170
- if (force && !json) yield* Effect.logInfo("--force is currently not implemented and only a placeholder.");
171
- if (verbose && !json) yield* Effect.logInfo("--verbose is currently not implemented and only a placeholder.");
201
+ yield* logFlagWarnings(force, verbose, json);
172
202
  const startTime = Date.now();
173
- const result = yield* IndexProject.index().pipe(Effect.either);
174
- if (result._tag === "Left") return yield* Effect.fail(result.left);
203
+ const result = yield* IndexProject.index();
175
204
  const duration = `${((Date.now() - startTime) / 1e3).toFixed(1)}s`;
176
205
  if (json) return yield* Console.log(JSON.stringify({
177
- chunks: result.right.status.chunks,
178
- files: result.right.status.files,
206
+ chunks: result.status.chunks,
207
+ files: result.status.files,
179
208
  duration
180
209
  }));
181
- yield* Effect.logInfo(`Indexed ${result.right.status.chunks} chunks from ${result.right.status.files} files in ${duration}.`);
182
- }).pipe(Effect.tapError((error) => Console.log(formatError(error)))));
210
+ yield* logHumanOutput(result.status.chunks, result.status.files, duration);
211
+ }).pipe(Effect.catchAll(reportError)));
183
212
  //#endregion
184
213
  //#region src/commands/init.ts
185
214
  /** CLI command: pix init [--json] */
@@ -188,7 +217,10 @@ const initCommand = Command.make("init", { json: Options.boolean("json").pipe(Op
188
217
  if (json) return yield* Console.log(JSON.stringify(result, null, 2));
189
218
  yield* Effect.logInfo("Created .pix/config.json with default settings.");
190
219
  yield* Effect.logInfo("Reminder: Add `.pix` to your `.gitignore` file to avoid committing the index.");
191
- }).pipe(Effect.tapError((error) => Console.log(formatError(error)))));
220
+ }).pipe(Effect.catchTags({
221
+ ConfigError: reportError,
222
+ DiskFullError: reportError
223
+ })));
192
224
  //#endregion
193
225
  //#region src/commands/query.ts
194
226
  const DEFAULT_TOP_K = 5;
@@ -216,6 +248,25 @@ const formatResult = (result) => {
216
248
  const contextAfter = result.contextAfter ? `\n${result.contextAfter}` : "";
217
249
  return `${result.file}:${result.startLine}-${result.endLine} (score: ${result.score.toFixed(3)})${contextBefore}\n${result.text}${contextAfter}`;
218
250
  };
251
+ const toJsonOutput = (results, ctxLines) => results.map((r) => ({
252
+ score: r.score,
253
+ file: r.file,
254
+ startLine: r.startLine,
255
+ endLine: r.endLine,
256
+ text: r.text,
257
+ ...ctxLines > 0 && r.contextBefore && { contextBefore: r.contextBefore },
258
+ ...ctxLines > 0 && r.contextAfter && { contextAfter: r.contextAfter }
259
+ }));
260
+ const renderResults = (results) => Effect.gen(function* () {
261
+ if (results.length === 0) {
262
+ yield* Effect.logInfo("No results found");
263
+ return;
264
+ }
265
+ for (const result of results) {
266
+ yield* Console.log(formatResult(result));
267
+ yield* Console.log("---");
268
+ }
269
+ });
219
270
  /** CLI command: pix query "<text>" [--top N] [--json] [--context-lines N] */
220
271
  const queryCommand = Command.make("query", {
221
272
  queryText: Args.text({ name: "query" }),
@@ -226,29 +277,17 @@ const queryCommand = Command.make("query", {
226
277
  const topK = Option.getOrElse(top, () => DEFAULT_TOP_K);
227
278
  const ctxLines = Option.getOrElse(contextLines, () => DEFAULT_CONTEXT_LINES);
228
279
  const clamped = clampTopK(topK);
229
- if (clamped.clamped) yield* Effect.logDebug(`topK clamped from ${topK} to ${clamped.value}`);
280
+ if (clamped.clamped && !json) yield* Effect.logDebug(`topK clamped from ${topK} to ${clamped.value}`);
230
281
  const results = yield* QueryProject.queryProject(queryText, clamped.value);
231
- if (json) {
232
- const output = results.map((r) => ({
233
- score: r.score,
234
- file: r.file,
235
- startLine: r.startLine,
236
- endLine: r.endLine,
237
- text: r.text,
238
- ...ctxLines > 0 && r.contextBefore && { contextBefore: r.contextBefore },
239
- ...ctxLines > 0 && r.contextAfter && { contextAfter: r.contextAfter }
240
- }));
241
- return yield* Console.log(JSON.stringify(output, null, 2));
242
- }
243
- if (results.length === 0) {
244
- yield* Effect.logInfo("No results found");
245
- return;
246
- }
247
- for (const result of results) {
248
- yield* Console.log(formatResult(result));
249
- yield* Console.log("---");
250
- }
251
- }).pipe(Effect.tapError((error) => Console.log(formatError(error)))));
282
+ if (json) return yield* Console.log(JSON.stringify(toJsonOutput(results, ctxLines), null, 2));
283
+ yield* renderResults(results);
284
+ }).pipe(Effect.catchTags({
285
+ ModelLoadError: reportError,
286
+ InferenceError: reportError,
287
+ DiskFullError: reportError,
288
+ StoreError: reportError,
289
+ NoIndexError: reportError
290
+ })));
252
291
  //#endregion
253
292
  //#region src/lib/format.ts
254
293
  /** Format byte count as human-readable string (e.g. "1.5 MB") */
@@ -265,29 +304,34 @@ const formatBytes = (bytes) => {
265
304
  };
266
305
  //#endregion
267
306
  //#region src/commands/reset.ts
268
- /** CLI command: pix reset [--json] */
269
- const resetCommand = Command.make("reset", { json: Options.boolean("json").pipe(Options.withDefault(false)) }, ({ json }) => Effect.gen(function* () {
270
- const start = yield* Clock.currentTimeMillis;
271
- const result = yield* ResetIndex.reset();
272
- const elapsedMs = (yield* Clock.currentTimeMillis) - start;
273
- if (json) return yield* Console.log(JSON.stringify({
274
- status: "ok",
275
- deletedChunks: result.deletedChunks,
276
- deletedVectors: result.deletedVectors,
277
- freedBytes: result.freedBytes,
278
- elapsedMs
279
- }));
280
- if (!result.deletedChunks && !result.deletedVectors) {
307
+ const logJsonResult = (result, elapsedMs) => Console.log(JSON.stringify({
308
+ status: "ok",
309
+ deletedChunks: result.deletedChunks,
310
+ deletedVectors: result.deletedVectors,
311
+ freedBytes: result.freedBytes,
312
+ elapsedMs
313
+ }));
314
+ const logHumanResult = (result, elapsedMs) => Effect.gen(function* () {
315
+ const deletedParts = [result.deletedChunks ? "chunks.jsonl" : null, result.deletedVectors ? "vectors.bin" : null].filter((part) => part !== null);
316
+ if (deletedParts.length === 0) {
281
317
  yield* Effect.logInfo("Nothing to reset.");
282
318
  return;
283
319
  }
284
- const parts = [];
285
- if (result.deletedChunks) parts.push("chunks.jsonl");
286
- if (result.deletedVectors) parts.push("vectors.bin");
287
- yield* Effect.logInfo(`Deleted: ${parts.join(", ")}`);
320
+ yield* Effect.logInfo(`Deleted: ${deletedParts.join(", ")}`);
288
321
  yield* Effect.logInfo(`Freed: ${formatBytes(result.freedBytes)}`);
289
322
  yield* Effect.logInfo(`Time: ${elapsedMs}ms`);
290
- }).pipe(Effect.tapError((error) => Console.log(formatError(error)))));
323
+ });
324
+ /** CLI command: pix reset [--json] */
325
+ const resetCommand = Command.make("reset", { json: Options.boolean("json").pipe(Options.withDefault(false)) }, ({ json }) => Effect.gen(function* () {
326
+ const start = yield* Clock.currentTimeMillis;
327
+ const result = yield* ResetIndex.reset();
328
+ const elapsedMs = (yield* Clock.currentTimeMillis) - start;
329
+ if (json) return yield* logJsonResult(result, elapsedMs);
330
+ yield* logHumanResult(result, elapsedMs);
331
+ }).pipe(Effect.catchTags({
332
+ DiskFullError: reportError,
333
+ StoreError: reportError
334
+ })));
291
335
  //#endregion
292
336
  //#region src/commands/status.ts
293
337
  /** CLI command: pix status [--json] */
@@ -300,7 +344,7 @@ const statusCommand = Command.make("status", { json: Options.boolean("json").pip
300
344
  yield* Effect.logInfo(`Total lines: ${result.totalLines.toLocaleString()}`);
301
345
  yield* Effect.logInfo(`Index size: ${formatBytes(result.byteSize)}`);
302
346
  yield* Effect.logInfo(`Last indexed: ${lastIndexStr}`);
303
- }).pipe(Effect.tapError((error) => Console.log(formatError(error)))));
347
+ }).pipe(Effect.catchTags({ StoreError: reportError })));
304
348
  //#endregion
305
349
  //#region src/cli.ts
306
350
  const VERSION = createRequire(import.meta.url)("../package.json").version;
@@ -319,36 +363,67 @@ const cli = (args) => Command.run(pix, {
319
363
  version: VERSION
320
364
  })(args).pipe(Effect.provide(CliConfig.layer({ showTypes: false })));
321
365
  //#endregion
366
+ //#region src/domain/errors.ts
367
+ /** Config file or directory does not exist. Run pix init first. */
368
+ var ConfigNotFoundError = class extends Data.TaggedError("ConfigNotFoundError") {};
369
+ /** Config file exists but contains invalid JSON. */
370
+ var ConfigMalformedError = class extends Data.TaggedError("ConfigMalformedError") {};
371
+ /** Index files (chunks.jsonl, vectors.bin) do not exist. Run pix index first. */
372
+ var NoIndexError = class extends Data.TaggedError("NoIndexError") {};
373
+ /** Disk is full — write operation could not complete. */
374
+ var DiskFullError = class extends Data.TaggedError("DiskFullError") {};
375
+ /** Generic index store I/O failure (read, write, delete). */
376
+ var StoreError = class extends Data.TaggedError("StoreError") {};
377
+ /** Source file could not be read during chunking (binary, permissions, encoding). */
378
+ var ChunkerError = class extends Data.TaggedError("ChunkerError") {};
379
+ /** Embedding model could not be downloaded or loaded. */
380
+ var ModelLoadError = class extends Data.TaggedError("ModelLoadError") {};
381
+ /** Embedding model failed during inference. */
382
+ var InferenceError = class extends Data.TaggedError("InferenceError") {};
383
+ /**
384
+ * Fatal scan failure — gitignore loading failed entirely. Non-fatal per-entry skips are reported
385
+ * via ScanResult.skipped.
386
+ */
387
+ var ScanFailed = class extends Data.TaggedError("ScanFailed") {};
388
+ //#endregion
322
389
  //#region src/services/chunker.ts
323
390
  const MIN_CHUNK_CHARS = 20;
391
+ const readFileContent = (fs, file) => fs.readFileString(file).pipe(Effect.mapError((cause) => new ChunkerError({
392
+ message: "Could not read source file for chunking",
393
+ file,
394
+ cause
395
+ })));
396
+ const buildChunks = (file, content, config) => {
397
+ const lines = content.split("\n");
398
+ const chunks = [];
399
+ let idx = 0;
400
+ let startLine = 1;
401
+ while (startLine <= lines.length) {
402
+ const endLine = Math.min(startLine + config.chunkLines - 1, lines.length);
403
+ const text = lines.slice(startLine - 1, endLine).join("\n");
404
+ if (text.length >= MIN_CHUNK_CHARS) {
405
+ const id = crypto.createHash("sha1").update(`${file}:${startLine}`).digest("hex").slice(0, 12);
406
+ chunks.push({
407
+ id,
408
+ idx,
409
+ file,
410
+ startLine,
411
+ endLine,
412
+ text
413
+ });
414
+ idx++;
415
+ }
416
+ startLine += config.chunkLines - config.overlapLines;
417
+ }
418
+ return chunks;
419
+ };
324
420
  const make$4 = Effect.gen(function* () {
325
421
  const fs = yield* FileSystem.FileSystem;
326
422
  const config = yield* (yield* ConfigStore).readConfig().pipe(Effect.catchAll(() => Effect.succeed(DEFAULT_CONFIG)));
327
423
  const chunkFile = (file) => Effect.gen(function* () {
328
- const content = yield* fs.readFileString(file).pipe(Effect.tapError((err) => Effect.logWarning(`[Chunker] Skipping unreadable file: ${file} — ${String(err)}`)), Effect.catchAll(() => Effect.succeed("")));
424
+ const content = yield* readFileContent(fs, file);
329
425
  if (content === "") return [];
330
- const lines = content.split("\n");
331
- const chunks = [];
332
- let idx = 0;
333
- let startLine = 1;
334
- while (startLine <= lines.length) {
335
- const endLine = Math.min(startLine + config.chunkLines - 1, lines.length);
336
- const text = lines.slice(startLine - 1, endLine).join("\n");
337
- if (text.length >= MIN_CHUNK_CHARS) {
338
- const id = crypto.createHash("sha1").update(`${file}:${startLine}`).digest("hex").slice(0, 12);
339
- chunks.push({
340
- id,
341
- idx,
342
- file,
343
- startLine,
344
- endLine,
345
- text
346
- });
347
- idx++;
348
- }
349
- startLine += config.chunkLines - config.overlapLines;
350
- }
351
- return chunks;
426
+ return buildChunks(file, content, config);
352
427
  });
353
428
  return { chunkFile };
354
429
  });
@@ -357,23 +432,46 @@ const ChunkerLive = Layer.effect(Chunker, make$4);
357
432
  //#region src/services/config-store.ts
358
433
  const CONFIG_DIR = ".pix";
359
434
  const CONFIG_PATH = `${CONFIG_DIR}/config.json`;
435
+ const isPlatformReason$1 = (cause, reason) => typeof cause === "object" && cause !== null && "reason" in cause && String(cause.reason) === reason;
436
+ const mapConfigWriteError = (cause, path, action) => {
437
+ if (isPlatformReason$1(cause, "BadResource")) return new DiskFullError({
438
+ message: `Disk full: could not ${action}`,
439
+ path,
440
+ cause
441
+ });
442
+ return new ConfigError({
443
+ message: `Failed to ${action}`,
444
+ cause
445
+ });
446
+ };
360
447
  const make$3 = Effect.gen(function* () {
361
448
  const fs = yield* FileSystem.FileSystem;
362
449
  const writeConfig = (config) => Effect.gen(function* () {
363
450
  const configJson = JSON.stringify(config, null, 2);
364
- yield* fs.makeDirectory(CONFIG_DIR, { recursive: true });
365
- yield* fs.writeFileString(CONFIG_PATH, configJson);
366
- }).pipe(Effect.mapError((cause) => new ConfigError({
367
- message: "Failed to write config.json",
368
- cause
369
- })));
451
+ yield* fs.makeDirectory(CONFIG_DIR, { recursive: true }).pipe(Effect.mapError((cause) => mapConfigWriteError(cause, CONFIG_DIR, "create .pix directory")));
452
+ yield* fs.writeFileString(CONFIG_PATH, configJson).pipe(Effect.mapError((cause) => mapConfigWriteError(cause, CONFIG_PATH, "write config.json")));
453
+ });
370
454
  const readConfig = () => Effect.gen(function* () {
371
- const content = yield* fs.readFileString(CONFIG_PATH);
372
- return JSON.parse(content);
373
- }).pipe(Effect.mapError((cause) => new ConfigError({
374
- message: "Failed to read config.json",
375
- cause
376
- })));
455
+ const content = yield* fs.readFileString(CONFIG_PATH).pipe(Effect.mapError((cause) => {
456
+ if (isPlatformReason$1(cause, "NotFound")) return new ConfigNotFoundError({
457
+ message: "Config file not found. Run pix init first.",
458
+ path: CONFIG_PATH,
459
+ cause
460
+ });
461
+ return new ConfigError({
462
+ message: "Failed to read config.json",
463
+ cause
464
+ });
465
+ }));
466
+ return yield* Effect.try({
467
+ try: () => JSON.parse(content),
468
+ catch: (error) => new ConfigMalformedError({
469
+ message: "Invalid JSON in config.json",
470
+ path: CONFIG_PATH,
471
+ cause: error
472
+ })
473
+ });
474
+ });
377
475
  const configExists = () => Effect.gen(function* () {
378
476
  return yield* fs.exists(CONFIG_PATH);
379
477
  }).pipe(Effect.catchAll(() => Effect.succeed(false)));
@@ -385,9 +483,34 @@ const make$3 = Effect.gen(function* () {
385
483
  });
386
484
  const ConfigStoreLive = Layer.effect(ConfigStore, make$3);
387
485
  //#endregion
486
+ //#region src/domain/models.ts
487
+ /** Registry of supported embedding models. */
488
+ const MODEL_REGISTRY = {
489
+ "Xenova/all-MiniLM-L6-v2": {
490
+ id: "Xenova/all-MiniLM-L6-v2",
491
+ dims: 384,
492
+ dtypes: [
493
+ "fp32",
494
+ "fp16",
495
+ "q8",
496
+ "q4"
497
+ ],
498
+ description: "General-purpose sentence embeddings, 23MB q8"
499
+ },
500
+ "Xenova/bge-small-en-v1.5": {
501
+ id: "Xenova/bge-small-en-v1.5",
502
+ dims: 384,
503
+ dtypes: [
504
+ "fp32",
505
+ "fp16",
506
+ "q8",
507
+ "q4"
508
+ ],
509
+ description: "BGE retrieval-optimized embeddings, 34MB q8"
510
+ }
511
+ };
512
+ //#endregion
388
513
  //#region src/services/embedder.ts
389
- const MODEL_NAME = "Xenova/all-MiniLM-L6-v2";
390
- const DIMS = 384;
391
514
  const CACHE_DIR = ".pix/cache";
392
515
  const BATCH_SIZE = 16;
393
516
  env.cacheDir = CACHE_DIR;
@@ -400,28 +523,65 @@ const normalize = (arr) => {
400
523
  for (let i = 0; i < arr.length; i++) result[i] = arr[i] / norm;
401
524
  return result;
402
525
  };
526
+ const resolveEmbedderConfig = (configStore) => Effect.gen(function* () {
527
+ const config = yield* configStore.readConfig().pipe(Effect.catchAll(() => Effect.succeed(void 0)));
528
+ const model = config?.embedder.model ?? "Xenova/all-MiniLM-L6-v2";
529
+ const device = config?.embedder.device ?? "auto";
530
+ const dtype = config?.embedder.dtype ?? "fp32";
531
+ const modelInfo = MODEL_REGISTRY[model];
532
+ if (!modelInfo) return yield* new ModelLoadError({
533
+ message: `Unknown embedding model "${model}". Available: ${Object.keys(MODEL_REGISTRY).join(", ")}`,
534
+ model
535
+ });
536
+ if (!modelInfo.dtypes.includes(dtype)) return yield* new ModelLoadError({
537
+ message: `Unsupported dtype "${dtype}" for model "${model}". Supported: ${modelInfo.dtypes.join(", ")}`,
538
+ model
539
+ });
540
+ return {
541
+ model,
542
+ device,
543
+ dtype,
544
+ dims: modelInfo.dims
545
+ };
546
+ });
547
+ const createExtractor = (opts) => Effect.tryPromise(async () => {
548
+ const { pipeline } = await import("@huggingface/transformers");
549
+ return pipeline("feature-extraction", opts.model, {
550
+ device: opts.device,
551
+ dtype: opts.dtype
552
+ });
553
+ }).pipe(Effect.mapError((cause) => new ModelLoadError({
554
+ message: `Failed to load embedding model with device "${opts.device}"`,
555
+ model: opts.model,
556
+ cause
557
+ })));
558
+ const createExtractorWithFallback = (opts) => {
559
+ if (opts.device === "cpu") return createExtractor(opts);
560
+ return createExtractor(opts).pipe(Effect.catchAll((originalError) => Effect.gen(function* () {
561
+ yield* Effect.logWarning(`Embedding device "${opts.device}" failed, falling back to "cpu": ${originalError.message}`);
562
+ return yield* createExtractor({
563
+ ...opts,
564
+ device: "cpu"
565
+ }).pipe(Effect.catchAll(() => Effect.fail(originalError)));
566
+ })));
567
+ };
403
568
  const make$2 = Effect.gen(function* () {
404
- const getExtractor = yield* Effect.cached(Effect.tryPromise(async () => {
405
- const { pipeline } = await import("@huggingface/transformers");
406
- return pipeline("feature-extraction", MODEL_NAME, {
407
- device: "cpu",
408
- dtype: "q8"
409
- });
410
- }));
569
+ const cfg = yield* resolveEmbedderConfig(yield* ConfigStore);
570
+ const getExtractor = yield* Effect.cached(createExtractorWithFallback(cfg));
411
571
  const embed = (text) => Effect.gen(function* () {
412
572
  const extractor = yield* getExtractor;
413
573
  const data = (yield* Effect.tryPromise(() => extractor(text, {
414
574
  pooling: "mean",
415
575
  normalize: false
416
- }))).data;
576
+ })).pipe(Effect.mapError((cause) => new InferenceError({
577
+ message: "Embedding inference failed",
578
+ cause
579
+ })))).data;
417
580
  return {
418
581
  vector: normalize(data),
419
- dims: DIMS
582
+ dims: cfg.dims
420
583
  };
421
- }).pipe(Effect.catchAll(() => Effect.succeed({
422
- vector: new Float32Array(DIMS),
423
- dims: DIMS
424
- })));
584
+ });
425
585
  const batch = (texts) => Effect.gen(function* () {
426
586
  const extractor = yield* getExtractor;
427
587
  const results = [];
@@ -430,25 +590,28 @@ const make$2 = Effect.gen(function* () {
430
590
  const tensor = yield* Effect.tryPromise(() => extractor(slice, {
431
591
  pooling: "mean",
432
592
  normalize: false
433
- }));
593
+ })).pipe(Effect.mapError((cause) => new InferenceError({
594
+ message: "Batch embedding inference failed",
595
+ cause
596
+ })));
434
597
  const data = tensor.data;
435
598
  const n = tensor.dims[0];
436
599
  for (let j = 0; j < n; j++) {
437
- const offset = j * DIMS;
438
- results.push(normalize(data.slice(offset, offset + DIMS)));
600
+ const offset = j * cfg.dims;
601
+ results.push(normalize(data.slice(offset, offset + cfg.dims)));
439
602
  }
440
603
  }
441
604
  return results.map((vector) => ({
442
605
  vector,
443
- dims: DIMS
606
+ dims: cfg.dims
444
607
  }));
445
- }).pipe(Effect.catchAll(() => Effect.succeed([])));
608
+ });
446
609
  return {
447
610
  embed,
448
611
  batch
449
612
  };
450
613
  });
451
- const OnnxEmbedderLive = Layer.effect(Embedder, make$2);
614
+ const OnnxEmbedderLive = Layer.provideMerge(Layer.effect(Embedder, make$2), ConfigStoreLive);
452
615
  //#endregion
453
616
  //#region src/services/scanner.ts
454
617
  const ALWAYS_IGNORE = new Set([
@@ -461,43 +624,96 @@ const ALWAYS_IGNORE = new Set([
461
624
  ]);
462
625
  const make$1 = Effect.gen(function* () {
463
626
  const fs = yield* FileSystem.FileSystem;
627
+ const readFileWithSkip = (path, mkReason) => fs.readFileString(path).pipe(Effect.map((content) => ({
628
+ content,
629
+ skipped: null
630
+ })), Effect.catchAll((error) => Effect.succeed({
631
+ content: "",
632
+ skipped: {
633
+ path,
634
+ reason: mkReason(error)
635
+ }
636
+ })));
637
+ const readDirectoryWithSkip = (dir) => fs.readDirectory(dir).pipe(Effect.map((entries) => ({
638
+ entries,
639
+ skipped: null
640
+ })), Effect.catchAll((error) => Effect.succeed({
641
+ entries: [],
642
+ skipped: {
643
+ path: dir,
644
+ reason: `Could not read directory: ${String(error)}`
645
+ }
646
+ })));
647
+ const statWithSkip = (fullPath) => fs.stat(fullPath).pipe(Effect.map((info) => ({
648
+ info,
649
+ skipped: null
650
+ })), Effect.catchAll((error) => Effect.succeed({
651
+ info: null,
652
+ skipped: {
653
+ path: fullPath,
654
+ reason: `Could not stat: ${String(error)}`
655
+ }
656
+ })));
464
657
  const loadGitignoreRules = Effect.gen(function* () {
465
658
  const ig = ignore();
466
659
  const cwd = process.cwd();
467
- const rootContent = yield* fs.readFileString(`${cwd}/.gitignore`).pipe(Effect.catchAll(() => Effect.succeed("")));
468
- if (rootContent.trim()) ig.add(rootContent.split("\n"));
660
+ const skipped = [];
661
+ const rootContent = yield* readFileWithSkip(`${cwd}/.gitignore`, (error) => `Could not read gitignore: ${String(error)}`);
662
+ if (rootContent.skipped) skipped.push(rootContent.skipped);
663
+ if (rootContent.content.trim()) ig.add(rootContent.content.split("\n"));
469
664
  const excludePath = `${cwd}/.git/info/exclude`;
470
665
  if (yield* fs.exists(excludePath)) {
471
- const excludeContent = yield* fs.readFileString(excludePath).pipe(Effect.catchAll(() => Effect.succeed("")));
472
- if (excludeContent.trim()) ig.add(excludeContent.split("\n"));
666
+ const excludeContent = yield* readFileWithSkip(excludePath, (error) => `Could not read exclude file: ${String(error)}`);
667
+ if (excludeContent.skipped) skipped.push(excludeContent.skipped);
668
+ if (excludeContent.content.trim()) ig.add(excludeContent.content.split("\n"));
473
669
  }
474
- return ig;
475
- }).pipe(Effect.catchAll(() => Effect.succeed(ignore())));
670
+ return {
671
+ ig,
672
+ skipped
673
+ };
674
+ });
476
675
  const walk = (dir, extensions) => Effect.gen(function* () {
477
- const entries = yield* fs.readDirectory(dir).pipe(Effect.catchAll(() => Effect.succeed([])));
478
- let results = [];
479
- for (const entry of entries) {
676
+ const result = yield* readDirectoryWithSkip(dir);
677
+ let files = [];
678
+ const skipped = [];
679
+ if (result.skipped) skipped.push(result.skipped);
680
+ for (const entry of result.entries) {
480
681
  if (ALWAYS_IGNORE.has(entry)) continue;
481
682
  const fullPath = `${dir}/${entry}`;
482
- const info = yield* fs.stat(fullPath).pipe(Effect.catchAll(() => Effect.succeed(null)));
483
- if (!info) continue;
484
- if (info.type === "Directory") {
485
- const subResults = yield* walk(fullPath, extensions);
486
- results.push(...subResults);
487
- } else if (info.type === "File") {
683
+ const info = yield* statWithSkip(fullPath);
684
+ if (info.skipped) {
685
+ skipped.push(info.skipped);
686
+ continue;
687
+ }
688
+ if (!info.info) continue;
689
+ if (info.info.type === "Directory") {
690
+ const sub = yield* walk(fullPath, extensions);
691
+ files.push(...sub.files);
692
+ skipped.push(...sub.skipped);
693
+ } else if (info.info.type === "File") {
488
694
  const dotIndex = entry.lastIndexOf(".");
489
695
  if (dotIndex === -1) continue;
490
696
  const ext = entry.slice(dotIndex);
491
- if (extensions.has(ext)) results.push(fullPath);
697
+ if (extensions.has(ext)) files.push(fullPath);
492
698
  }
493
699
  }
494
- return results;
700
+ return {
701
+ files,
702
+ skipped
703
+ };
495
704
  });
496
705
  const scanFiles = (extensions) => Effect.gen(function* () {
497
- const ig = yield* loadGitignoreRules;
706
+ const { ig, skipped: ignoreSkipped } = yield* loadGitignoreRules.pipe(Effect.mapError((cause) => new ScanFailed({
707
+ message: `Failed to load gitignore rules: ${String(cause)}`,
708
+ cause
709
+ })));
498
710
  const cwd = process.cwd();
499
- const relativePaths = (yield* walk(cwd, new Set(extensions))).map((p) => p.startsWith(cwd) ? p.slice(cwd.length + 1) : p);
500
- return ig.filter(relativePaths).map((p) => `${cwd}/${p}`);
711
+ const { files: paths, skipped: walkSkipped } = yield* walk(cwd, new Set(extensions));
712
+ const relativePaths = paths.map((p) => p.startsWith(cwd) ? p.slice(cwd.length + 1) : p);
713
+ return {
714
+ files: ig.filter(relativePaths).map((p) => `${cwd}/${p}`),
715
+ skipped: [...ignoreSkipped, ...walkSkipped]
716
+ };
501
717
  });
502
718
  return { scanFiles };
503
719
  });
@@ -507,6 +723,7 @@ const ScannerLive = Layer.effect(Scanner, make$1);
507
723
  const STORE_DIR = ".pix";
508
724
  const CHUNKS_FILE = `${STORE_DIR}/chunks.jsonl`;
509
725
  const VECTORS_FILE = `${STORE_DIR}/vectors.bin`;
726
+ const isPlatformReason = (cause, reason) => typeof cause === "object" && cause !== null && "reason" in cause && String(cause.reason) === reason;
510
727
  /**
511
728
  * FileSystem adapter for VectorStore port. Reads from chunks.jsonl and vectors.bin to provide index
512
729
  * statistics.
@@ -533,8 +750,50 @@ const make = Effect.gen(function* () {
533
750
  } catch {}
534
751
  return files;
535
752
  };
753
+ const toStoreError = (operation, path) => (cause) => {
754
+ if (isPlatformReason(cause, "BadResource")) return new DiskFullError({
755
+ message: `Disk full during ${operation}`,
756
+ path,
757
+ cause
758
+ });
759
+ return new StoreError({
760
+ message: `Failed to ${operation}`,
761
+ path,
762
+ cause
763
+ });
764
+ };
765
+ const toReadError = (operation, path) => (cause) => new StoreError({
766
+ message: `Failed to ${operation}`,
767
+ path,
768
+ cause
769
+ });
770
+ /** Wrap any fs Effect so failures become StoreError | DiskFullError. */
771
+ const withStoreError = (op, operation, path) => op.pipe(Effect.mapError(toStoreError(operation, path)));
772
+ /** Wrap any fs Effect so failures become StoreError (read-only). */
773
+ const withReadError = (op, operation, path) => op.pipe(Effect.mapError(toReadError(operation, path)));
774
+ /** Ensure a directory exists, creating it recursively if absent. */
775
+ const ensureDirExists = (dir, description = dir) => Effect.gen(function* () {
776
+ if (!(yield* withStoreError(fs.exists(dir), `check ${description}`))) yield* withStoreError(fs.makeDirectory(dir, { recursive: true }), `create ${description}`);
777
+ });
778
+ /**
779
+ * Remove a file if it exists, accumulating freed bytes. Returns the number of freed bytes (0 if
780
+ * the file was absent).
781
+ */
782
+ const removeIfExists = (file, description) => Effect.gen(function* () {
783
+ if (!(yield* withStoreError(fs.exists(file), `check ${description}`))) return {
784
+ freed: 0,
785
+ deleted: false
786
+ };
787
+ const stat = yield* withStoreError(fs.stat(file), `stat ${description}`, file);
788
+ const freed = stat && "size" in stat ? Number(stat.size) : 0;
789
+ yield* withStoreError(fs.remove(file), `delete ${description}`, file);
790
+ return {
791
+ freed,
792
+ deleted: true
793
+ };
794
+ });
536
795
  const store = (chunks, embeddings) => Effect.gen(function* () {
537
- if (!(yield* fs.exists(STORE_DIR))) yield* fs.makeDirectory(STORE_DIR, { recursive: true });
796
+ yield* ensureDirExists(STORE_DIR, ".pix directory");
538
797
  const chunksTemp = `${CHUNKS_FILE}.tmp`;
539
798
  const chunksLines = chunks.map((c) => JSON.stringify({
540
799
  id: c.id,
@@ -544,23 +803,23 @@ const make = Effect.gen(function* () {
544
803
  endLine: c.endLine,
545
804
  text: c.text
546
805
  }));
547
- yield* fs.writeFileString(chunksTemp, chunksLines.join("\n"));
548
- yield* fs.rename(chunksTemp, CHUNKS_FILE);
806
+ yield* withStoreError(fs.writeFileString(chunksTemp, chunksLines.join("\n")), "write chunks", chunksTemp);
807
+ yield* withStoreError(fs.rename(chunksTemp, CHUNKS_FILE), "commit chunks", CHUNKS_FILE);
549
808
  const vectorsTemp = `${VECTORS_FILE}.tmp`;
550
809
  const dims = embeddings[0]?.dims ?? 384;
551
810
  const totalFloats = embeddings.length * dims;
552
811
  const vectorsArray = new Float32Array(totalFloats);
553
812
  for (let i = 0; i < embeddings.length; i++) vectorsArray.set(embeddings[i].vector, i * dims);
554
813
  const buffer = Buffer.from(vectorsArray.buffer);
555
- yield* fs.writeFile(vectorsTemp, buffer);
556
- yield* fs.rename(vectorsTemp, VECTORS_FILE);
814
+ yield* withStoreError(fs.writeFile(vectorsTemp, buffer), "write vectors", vectorsTemp);
815
+ yield* withStoreError(fs.rename(vectorsTemp, VECTORS_FILE), "commit vectors", VECTORS_FILE);
557
816
  });
558
817
  const search = (query, topK) => Effect.gen(function* () {
559
- const chunksExists = yield* fs.exists(CHUNKS_FILE);
560
- const vectorsExists = yield* fs.exists(VECTORS_FILE);
561
- if (!chunksExists || !vectorsExists) return [];
562
- const chunkLines = (yield* fs.readFileString(CHUNKS_FILE)).split("\n").filter((l) => l.trim().length > 0);
563
- const vectorsBuffer = yield* fs.readFile(VECTORS_FILE);
818
+ const chunksExists = yield* withReadError(fs.exists(CHUNKS_FILE), "check chunks file");
819
+ const vectorsExists = yield* withReadError(fs.exists(VECTORS_FILE), "check vectors file");
820
+ if (!chunksExists || !vectorsExists) return yield* new NoIndexError({ message: "No index found. Run pix index first." });
821
+ const chunkLines = (yield* withReadError(fs.readFileString(CHUNKS_FILE), "read chunks", CHUNKS_FILE)).split("\n").filter((l) => l.trim().length > 0);
822
+ const vectorsBuffer = yield* withReadError(fs.readFile(VECTORS_FILE), "read vectors", VECTORS_FILE);
564
823
  const vectors = new Float32Array(vectorsBuffer.buffer);
565
824
  const results = [];
566
825
  for (let i = 0; i < chunkLines.length; i++) try {
@@ -583,8 +842,8 @@ const make = Effect.gen(function* () {
583
842
  return results.slice(0, topK);
584
843
  });
585
844
  const getStatus = () => Effect.gen(function* () {
586
- const chunksExists = yield* fs.exists(CHUNKS_FILE);
587
- const vectorsExists = yield* fs.exists(VECTORS_FILE);
845
+ const chunksExists = yield* withReadError(fs.exists(CHUNKS_FILE), "check chunks file");
846
+ const vectorsExists = yield* withReadError(fs.exists(VECTORS_FILE), "check vectors file");
588
847
  if (!chunksExists || !vectorsExists) return {
589
848
  chunks: 0,
590
849
  files: 0,
@@ -593,13 +852,13 @@ const make = Effect.gen(function* () {
593
852
  totalLines: 0,
594
853
  byteSize: 0
595
854
  };
596
- const lines = (yield* fs.readFileString(CHUNKS_FILE).pipe(Effect.catchAll(() => Effect.succeed("")))).split("\n").filter((l) => l.trim().length > 0);
855
+ const lines = (yield* withReadError(fs.readFileString(CHUNKS_FILE), "read chunks", CHUNKS_FILE)).split("\n").filter((l) => l.trim().length > 0);
597
856
  const chunks = lines.length;
598
857
  const files = countUniqueFiles(lines).size;
599
858
  const model = "";
600
859
  const totalLines = countTotalLines(lines);
601
- const vectorsStat = yield* fs.stat(VECTORS_FILE).pipe(Effect.catchAll(() => Effect.succeed(null)));
602
- const byteSize = vectorsStat && "size" in vectorsStat ? Number(vectorsStat.size) : 0;
860
+ const vectorsStat = yield* withReadError(fs.stat(VECTORS_FILE), "stat vectors", VECTORS_FILE);
861
+ const byteSize = "size" in vectorsStat ? Number(vectorsStat.size) : 0;
603
862
  return {
604
863
  chunks,
605
864
  files,
@@ -610,25 +869,12 @@ const make = Effect.gen(function* () {
610
869
  };
611
870
  });
612
871
  const reset = () => Effect.gen(function* () {
613
- let deletedChunks = false;
614
- let deletedVectors = false;
615
- let freedBytes = 0;
616
- if (yield* fs.exists(CHUNKS_FILE)) {
617
- const stat = yield* fs.stat(CHUNKS_FILE);
618
- freedBytes += stat && "size" in stat ? Number(stat.size) : 0;
619
- yield* fs.remove(CHUNKS_FILE);
620
- deletedChunks = true;
621
- }
622
- if (yield* fs.exists(VECTORS_FILE)) {
623
- const stat = yield* fs.stat(VECTORS_FILE);
624
- freedBytes += stat && "size" in stat ? Number(stat.size) : 0;
625
- yield* fs.remove(VECTORS_FILE);
626
- deletedVectors = true;
627
- }
872
+ const chunks = yield* removeIfExists(CHUNKS_FILE, "chunks");
873
+ const vectors = yield* removeIfExists(VECTORS_FILE, "vectors");
628
874
  return {
629
- deletedChunks,
630
- deletedVectors,
631
- freedBytes
875
+ deletedChunks: chunks.deleted,
876
+ deletedVectors: vectors.deleted,
877
+ freedBytes: chunks.freed + vectors.freed
632
878
  };
633
879
  });
634
880
  return {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lucas-bur/pix",
3
- "version": "0.6.0",
3
+ "version": "0.8.0",
4
4
  "description": "Lightweight local semantic project indexer",
5
5
  "keywords": [
6
6
  "cli",