@lucas-bur/pix 0.7.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.mjs +90 -23
  2. package/package.json +1 -1
package/dist/index.mjs CHANGED
@@ -23,7 +23,7 @@ var GetStatus = class extends Effect.Service()("GetStatus", {
23
23
  const configStore = yield* ConfigStore;
24
24
  const getStatus = () => Effect.gen(function* () {
25
25
  const status = yield* store.getStatus();
26
- const configModel = yield* configStore.readConfig().pipe(Effect.map((c) => c.model), Effect.catchAll(() => Effect.succeed(status.model)));
26
+ const configModel = yield* configStore.readConfig().pipe(Effect.map((c) => c.embedder.model), Effect.catchAll(() => Effect.succeed(status.model)));
27
27
  return {
28
28
  ...status,
29
29
  model: configModel
@@ -93,12 +93,15 @@ var IndexProject = class extends Effect.Service()("IndexProject", {
93
93
  var ConfigError = class extends Data.TaggedError("ConfigError") {};
94
94
  const DEFAULT_CONFIG = {
95
95
  schema: "1",
96
- model: "Xenova/all-MiniLM-L6-v2",
97
- dims: 384,
98
96
  chunkLines: 60,
99
97
  overlapLines: 10,
100
98
  chunkConcurrency: 8,
101
- files: {}
99
+ files: {},
100
+ embedder: {
101
+ model: "Xenova/all-MiniLM-L6-v2",
102
+ device: "auto",
103
+ dtype: "fp32"
104
+ }
102
105
  };
103
106
  //#endregion
104
107
  //#region src/application/init-project.ts
@@ -167,11 +170,17 @@ const codeFromError = (error) => {
167
170
  if (error && typeof error === "object" && "_tag" in error) return errorCodes[String(error._tag)] ?? "UNKNOWN";
168
171
  return "UNKNOWN";
169
172
  };
173
+ const causeFromError = (error) => {
174
+ if (typeof error === "string") return error;
175
+ if (error && typeof error === "object" && "cause" in error) return String(error.cause);
176
+ return "Unknown cause";
177
+ };
170
178
  /** Format an error as spec-mandated JSON: `{ error: true, code: "...", message: "..." }`. */
171
179
  const formatError = (error) => JSON.stringify({
172
180
  error: true,
173
181
  code: codeFromError(error),
174
- message: messageFromError(error)
182
+ message: messageFromError(error),
183
+ cause: causeFromError(error)
175
184
  });
176
185
  /** Log the error as JSON to stdout, then re-fail to preserve non-zero exit code. */
177
186
  const reportError = (error) => Console.log(formatError(error)).pipe(Effect.flatMap(() => Effect.fail(error)));
@@ -474,9 +483,34 @@ const make$3 = Effect.gen(function* () {
474
483
  });
475
484
  const ConfigStoreLive = Layer.effect(ConfigStore, make$3);
476
485
  //#endregion
486
+ //#region src/domain/models.ts
487
+ /** Registry of supported embedding models. */
488
+ const MODEL_REGISTRY = {
489
+ "Xenova/all-MiniLM-L6-v2": {
490
+ id: "Xenova/all-MiniLM-L6-v2",
491
+ dims: 384,
492
+ dtypes: [
493
+ "fp32",
494
+ "fp16",
495
+ "q8",
496
+ "q4"
497
+ ],
498
+ description: "General-purpose sentence embeddings, 23MB q8"
499
+ },
500
+ "Xenova/bge-small-en-v1.5": {
501
+ id: "Xenova/bge-small-en-v1.5",
502
+ dims: 384,
503
+ dtypes: [
504
+ "fp32",
505
+ "fp16",
506
+ "q8",
507
+ "q4"
508
+ ],
509
+ description: "BGE retrieval-optimized embeddings, 34MB q8"
510
+ }
511
+ };
512
+ //#endregion
477
513
  //#region src/services/embedder.ts
478
- const MODEL_NAME = "Xenova/all-MiniLM-L6-v2";
479
- const DIMS = 384;
480
514
  const CACHE_DIR = ".pix/cache";
481
515
  const BATCH_SIZE = 16;
482
516
  env.cacheDir = CACHE_DIR;
@@ -489,18 +523,51 @@ const normalize = (arr) => {
489
523
  for (let i = 0; i < arr.length; i++) result[i] = arr[i] / norm;
490
524
  return result;
491
525
  };
526
+ const resolveEmbedderConfig = (configStore) => Effect.gen(function* () {
527
+ const config = yield* configStore.readConfig().pipe(Effect.catchAll(() => Effect.succeed(void 0)));
528
+ const model = config?.embedder.model ?? "Xenova/all-MiniLM-L6-v2";
529
+ const device = config?.embedder.device ?? "auto";
530
+ const dtype = config?.embedder.dtype ?? "fp32";
531
+ const modelInfo = MODEL_REGISTRY[model];
532
+ if (!modelInfo) return yield* new ModelLoadError({
533
+ message: `Unknown embedding model "${model}". Available: ${Object.keys(MODEL_REGISTRY).join(", ")}`,
534
+ model
535
+ });
536
+ if (!modelInfo.dtypes.includes(dtype)) return yield* new ModelLoadError({
537
+ message: `Unsupported dtype "${dtype}" for model "${model}". Supported: ${modelInfo.dtypes.join(", ")}`,
538
+ model
539
+ });
540
+ return {
541
+ model,
542
+ device,
543
+ dtype,
544
+ dims: modelInfo.dims
545
+ };
546
+ });
547
+ const createExtractor = (opts) => Effect.tryPromise(async () => {
548
+ const { pipeline } = await import("@huggingface/transformers");
549
+ return pipeline("feature-extraction", opts.model, {
550
+ device: opts.device,
551
+ dtype: opts.dtype
552
+ });
553
+ }).pipe(Effect.mapError((cause) => new ModelLoadError({
554
+ message: `Failed to load embedding model with device "${opts.device}"`,
555
+ model: opts.model,
556
+ cause
557
+ })));
558
+ const createExtractorWithFallback = (opts) => {
559
+ if (opts.device === "cpu") return createExtractor(opts);
560
+ return createExtractor(opts).pipe(Effect.catchAll((originalError) => Effect.gen(function* () {
561
+ yield* Effect.logWarning(`Embedding device "${opts.device}" failed, falling back to "cpu": ${originalError.message}`);
562
+ return yield* createExtractor({
563
+ ...opts,
564
+ device: "cpu"
565
+ }).pipe(Effect.catchAll(() => Effect.fail(originalError)));
566
+ })));
567
+ };
492
568
  const make$2 = Effect.gen(function* () {
493
- const getExtractor = yield* Effect.cached(Effect.tryPromise(async () => {
494
- const { pipeline } = await import("@huggingface/transformers");
495
- return pipeline("feature-extraction", MODEL_NAME, {
496
- device: "cpu",
497
- dtype: "q8"
498
- });
499
- }).pipe(Effect.mapError((cause) => new ModelLoadError({
500
- message: "Failed to load embedding model",
501
- model: MODEL_NAME,
502
- cause
503
- }))));
569
+ const cfg = yield* resolveEmbedderConfig(yield* ConfigStore);
570
+ const getExtractor = yield* Effect.cached(createExtractorWithFallback(cfg));
504
571
  const embed = (text) => Effect.gen(function* () {
505
572
  const extractor = yield* getExtractor;
506
573
  const data = (yield* Effect.tryPromise(() => extractor(text, {
@@ -512,7 +579,7 @@ const make$2 = Effect.gen(function* () {
512
579
  })))).data;
513
580
  return {
514
581
  vector: normalize(data),
515
- dims: DIMS
582
+ dims: cfg.dims
516
583
  };
517
584
  });
518
585
  const batch = (texts) => Effect.gen(function* () {
@@ -530,13 +597,13 @@ const make$2 = Effect.gen(function* () {
530
597
  const data = tensor.data;
531
598
  const n = tensor.dims[0];
532
599
  for (let j = 0; j < n; j++) {
533
- const offset = j * DIMS;
534
- results.push(normalize(data.slice(offset, offset + DIMS)));
600
+ const offset = j * cfg.dims;
601
+ results.push(normalize(data.slice(offset, offset + cfg.dims)));
535
602
  }
536
603
  }
537
604
  return results.map((vector) => ({
538
605
  vector,
539
- dims: DIMS
606
+ dims: cfg.dims
540
607
  }));
541
608
  });
542
609
  return {
@@ -544,7 +611,7 @@ const make$2 = Effect.gen(function* () {
544
611
  batch
545
612
  };
546
613
  });
547
- const OnnxEmbedderLive = Layer.effect(Embedder, make$2);
614
+ const OnnxEmbedderLive = Layer.provideMerge(Layer.effect(Embedder, make$2), ConfigStoreLive);
548
615
  //#endregion
549
616
  //#region src/services/scanner.ts
550
617
  const ALWAYS_IGNORE = new Set([
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lucas-bur/pix",
3
- "version": "0.7.0",
3
+ "version": "0.8.0",
4
4
  "description": "Lightweight local semantic project indexer",
5
5
  "keywords": [
6
6
  "cli",