@lucas-bur/pix 0.7.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.mjs +100 -32
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -23,7 +23,7 @@ var GetStatus = class extends Effect.Service()("GetStatus", {
|
|
|
23
23
|
const configStore = yield* ConfigStore;
|
|
24
24
|
const getStatus = () => Effect.gen(function* () {
|
|
25
25
|
const status = yield* store.getStatus();
|
|
26
|
-
const configModel = yield* configStore.readConfig().pipe(Effect.map((c) => c.model), Effect.catchAll(() => Effect.succeed(status.model)));
|
|
26
|
+
const configModel = yield* configStore.readConfig().pipe(Effect.map((c) => c.embedder.model), Effect.catchAll(() => Effect.succeed(status.model)));
|
|
27
27
|
return {
|
|
28
28
|
...status,
|
|
29
29
|
model: configModel
|
|
@@ -33,6 +33,21 @@ var GetStatus = class extends Effect.Service()("GetStatus", {
|
|
|
33
33
|
})
|
|
34
34
|
}) {};
|
|
35
35
|
//#endregion
|
|
36
|
+
//#region src/domain/config.ts
|
|
37
|
+
var ConfigError = class extends Data.TaggedError("ConfigError") {};
|
|
38
|
+
const DEFAULT_CONFIG = {
|
|
39
|
+
schema: "1",
|
|
40
|
+
chunkLines: 60,
|
|
41
|
+
overlapLines: 10,
|
|
42
|
+
chunkConcurrency: 8,
|
|
43
|
+
files: {},
|
|
44
|
+
embedder: {
|
|
45
|
+
model: "Xenova/all-MiniLM-L6-v2",
|
|
46
|
+
device: "auto",
|
|
47
|
+
dtype: "fp32"
|
|
48
|
+
}
|
|
49
|
+
};
|
|
50
|
+
//#endregion
|
|
36
51
|
//#region src/application/index-project.ts
|
|
37
52
|
/**
|
|
38
53
|
* Use case: index project files. Pipeline: scan → chunk → embed → store. Depends on ConfigStore,
|
|
@@ -47,6 +62,7 @@ var IndexProject = class extends Effect.Service()("IndexProject", {
|
|
|
47
62
|
const embedder = yield* Embedder;
|
|
48
63
|
const vectorStore = yield* VectorStore;
|
|
49
64
|
const index = () => Effect.gen(function* () {
|
|
65
|
+
if (!(yield* configStore.configExists())) yield* configStore.writeConfig(DEFAULT_CONFIG);
|
|
50
66
|
const config = yield* configStore.readConfig();
|
|
51
67
|
const extensions = Object.keys(config.files).length > 0 ? Object.keys(config.files) : [
|
|
52
68
|
".ts",
|
|
@@ -89,18 +105,6 @@ var IndexProject = class extends Effect.Service()("IndexProject", {
|
|
|
89
105
|
})
|
|
90
106
|
}) {};
|
|
91
107
|
//#endregion
|
|
92
|
-
//#region src/domain/config.ts
|
|
93
|
-
var ConfigError = class extends Data.TaggedError("ConfigError") {};
|
|
94
|
-
const DEFAULT_CONFIG = {
|
|
95
|
-
schema: "1",
|
|
96
|
-
model: "Xenova/all-MiniLM-L6-v2",
|
|
97
|
-
dims: 384,
|
|
98
|
-
chunkLines: 60,
|
|
99
|
-
overlapLines: 10,
|
|
100
|
-
chunkConcurrency: 8,
|
|
101
|
-
files: {}
|
|
102
|
-
};
|
|
103
|
-
//#endregion
|
|
104
108
|
//#region src/application/init-project.ts
|
|
105
109
|
/**
|
|
106
110
|
* Use case: initialize a pix project by writing default config. Depends on ConfigStore via Effect
|
|
@@ -167,11 +171,17 @@ const codeFromError = (error) => {
|
|
|
167
171
|
if (error && typeof error === "object" && "_tag" in error) return errorCodes[String(error._tag)] ?? "UNKNOWN";
|
|
168
172
|
return "UNKNOWN";
|
|
169
173
|
};
|
|
174
|
+
const causeFromError = (error) => {
|
|
175
|
+
if (typeof error === "string") return error;
|
|
176
|
+
if (error && typeof error === "object" && "cause" in error) return String(error.cause);
|
|
177
|
+
return "Unknown cause";
|
|
178
|
+
};
|
|
170
179
|
/** Format an error as spec-mandated JSON: `{ error: true, code: "...", message: "..." }`. */
|
|
171
180
|
const formatError = (error) => JSON.stringify({
|
|
172
181
|
error: true,
|
|
173
182
|
code: codeFromError(error),
|
|
174
|
-
message: messageFromError(error)
|
|
183
|
+
message: messageFromError(error),
|
|
184
|
+
cause: causeFromError(error)
|
|
175
185
|
});
|
|
176
186
|
/** Log the error as JSON to stdout, then re-fail to preserve non-zero exit code. */
|
|
177
187
|
const reportError = (error) => Console.log(formatError(error)).pipe(Effect.flatMap(() => Effect.fail(error)));
|
|
@@ -474,9 +484,34 @@ const make$3 = Effect.gen(function* () {
|
|
|
474
484
|
});
|
|
475
485
|
const ConfigStoreLive = Layer.effect(ConfigStore, make$3);
|
|
476
486
|
//#endregion
|
|
487
|
+
//#region src/domain/models.ts
|
|
488
|
+
/** Registry of supported embedding models. */
|
|
489
|
+
const MODEL_REGISTRY = {
|
|
490
|
+
"Xenova/all-MiniLM-L6-v2": {
|
|
491
|
+
id: "Xenova/all-MiniLM-L6-v2",
|
|
492
|
+
dims: 384,
|
|
493
|
+
dtypes: [
|
|
494
|
+
"fp32",
|
|
495
|
+
"fp16",
|
|
496
|
+
"q8",
|
|
497
|
+
"q4"
|
|
498
|
+
],
|
|
499
|
+
description: "General-purpose sentence embeddings, 23MB q8"
|
|
500
|
+
},
|
|
501
|
+
"Xenova/bge-small-en-v1.5": {
|
|
502
|
+
id: "Xenova/bge-small-en-v1.5",
|
|
503
|
+
dims: 384,
|
|
504
|
+
dtypes: [
|
|
505
|
+
"fp32",
|
|
506
|
+
"fp16",
|
|
507
|
+
"q8",
|
|
508
|
+
"q4"
|
|
509
|
+
],
|
|
510
|
+
description: "BGE retrieval-optimized embeddings, 34MB q8"
|
|
511
|
+
}
|
|
512
|
+
};
|
|
513
|
+
//#endregion
|
|
477
514
|
//#region src/services/embedder.ts
|
|
478
|
-
const MODEL_NAME = "Xenova/all-MiniLM-L6-v2";
|
|
479
|
-
const DIMS = 384;
|
|
480
515
|
const CACHE_DIR = ".pix/cache";
|
|
481
516
|
const BATCH_SIZE = 16;
|
|
482
517
|
env.cacheDir = CACHE_DIR;
|
|
@@ -489,18 +524,51 @@ const normalize = (arr) => {
|
|
|
489
524
|
for (let i = 0; i < arr.length; i++) result[i] = arr[i] / norm;
|
|
490
525
|
return result;
|
|
491
526
|
};
|
|
527
|
+
const resolveEmbedderConfig = (configStore) => Effect.gen(function* () {
|
|
528
|
+
const config = yield* configStore.readConfig().pipe(Effect.catchAll(() => Effect.succeed(void 0)));
|
|
529
|
+
const model = config?.embedder.model ?? "Xenova/all-MiniLM-L6-v2";
|
|
530
|
+
const device = config?.embedder.device ?? "auto";
|
|
531
|
+
const dtype = config?.embedder.dtype ?? "fp32";
|
|
532
|
+
const modelInfo = MODEL_REGISTRY[model];
|
|
533
|
+
if (!modelInfo) return yield* new ModelLoadError({
|
|
534
|
+
message: `Unknown embedding model "${model}". Available: ${Object.keys(MODEL_REGISTRY).join(", ")}`,
|
|
535
|
+
model
|
|
536
|
+
});
|
|
537
|
+
if (!modelInfo.dtypes.includes(dtype)) return yield* new ModelLoadError({
|
|
538
|
+
message: `Unsupported dtype "${dtype}" for model "${model}". Supported: ${modelInfo.dtypes.join(", ")}`,
|
|
539
|
+
model
|
|
540
|
+
});
|
|
541
|
+
return {
|
|
542
|
+
model,
|
|
543
|
+
device,
|
|
544
|
+
dtype,
|
|
545
|
+
dims: modelInfo.dims
|
|
546
|
+
};
|
|
547
|
+
});
|
|
548
|
+
const createExtractor = (opts) => Effect.tryPromise(async () => {
|
|
549
|
+
const { pipeline } = await import("@huggingface/transformers");
|
|
550
|
+
return pipeline("feature-extraction", opts.model, {
|
|
551
|
+
device: opts.device,
|
|
552
|
+
dtype: opts.dtype
|
|
553
|
+
});
|
|
554
|
+
}).pipe(Effect.mapError((cause) => new ModelLoadError({
|
|
555
|
+
message: `Failed to load embedding model with device "${opts.device}"`,
|
|
556
|
+
model: opts.model,
|
|
557
|
+
cause
|
|
558
|
+
})));
|
|
559
|
+
const createExtractorWithFallback = (opts) => {
|
|
560
|
+
if (opts.device === "cpu") return createExtractor(opts);
|
|
561
|
+
return createExtractor(opts).pipe(Effect.catchAll((originalError) => Effect.gen(function* () {
|
|
562
|
+
yield* Effect.logWarning(`Embedding device "${opts.device}" failed, falling back to "cpu": ${originalError.message}`);
|
|
563
|
+
return yield* createExtractor({
|
|
564
|
+
...opts,
|
|
565
|
+
device: "cpu"
|
|
566
|
+
}).pipe(Effect.catchAll(() => Effect.fail(originalError)));
|
|
567
|
+
})));
|
|
568
|
+
};
|
|
492
569
|
const make$2 = Effect.gen(function* () {
|
|
493
|
-
const
|
|
494
|
-
|
|
495
|
-
return pipeline("feature-extraction", MODEL_NAME, {
|
|
496
|
-
device: "cpu",
|
|
497
|
-
dtype: "q8"
|
|
498
|
-
});
|
|
499
|
-
}).pipe(Effect.mapError((cause) => new ModelLoadError({
|
|
500
|
-
message: "Failed to load embedding model",
|
|
501
|
-
model: MODEL_NAME,
|
|
502
|
-
cause
|
|
503
|
-
}))));
|
|
570
|
+
const cfg = yield* resolveEmbedderConfig(yield* ConfigStore);
|
|
571
|
+
const getExtractor = yield* Effect.cached(createExtractorWithFallback(cfg));
|
|
504
572
|
const embed = (text) => Effect.gen(function* () {
|
|
505
573
|
const extractor = yield* getExtractor;
|
|
506
574
|
const data = (yield* Effect.tryPromise(() => extractor(text, {
|
|
@@ -512,7 +580,7 @@ const make$2 = Effect.gen(function* () {
|
|
|
512
580
|
})))).data;
|
|
513
581
|
return {
|
|
514
582
|
vector: normalize(data),
|
|
515
|
-
dims:
|
|
583
|
+
dims: cfg.dims
|
|
516
584
|
};
|
|
517
585
|
});
|
|
518
586
|
const batch = (texts) => Effect.gen(function* () {
|
|
@@ -530,13 +598,13 @@ const make$2 = Effect.gen(function* () {
|
|
|
530
598
|
const data = tensor.data;
|
|
531
599
|
const n = tensor.dims[0];
|
|
532
600
|
for (let j = 0; j < n; j++) {
|
|
533
|
-
const offset = j *
|
|
534
|
-
results.push(normalize(data.slice(offset, offset +
|
|
601
|
+
const offset = j * cfg.dims;
|
|
602
|
+
results.push(normalize(data.slice(offset, offset + cfg.dims)));
|
|
535
603
|
}
|
|
536
604
|
}
|
|
537
605
|
return results.map((vector) => ({
|
|
538
606
|
vector,
|
|
539
|
-
dims:
|
|
607
|
+
dims: cfg.dims
|
|
540
608
|
}));
|
|
541
609
|
});
|
|
542
610
|
return {
|
|
@@ -544,7 +612,7 @@ const make$2 = Effect.gen(function* () {
|
|
|
544
612
|
batch
|
|
545
613
|
};
|
|
546
614
|
});
|
|
547
|
-
const OnnxEmbedderLive = Layer.effect(Embedder, make$2);
|
|
615
|
+
const OnnxEmbedderLive = Layer.provideMerge(Layer.effect(Embedder, make$2), ConfigStoreLive);
|
|
548
616
|
//#endregion
|
|
549
617
|
//#region src/services/scanner.ts
|
|
550
618
|
const ALWAYS_IGNORE = new Set([
|