npm - @lucas-bur/pix - Versions diffs - 0.7.0 → 0.8.0 - Mend

@lucas-bur/pix 0.7.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/dist/index.mjs +90 -23
package/package.json +1 -1

package/dist/index.mjs CHANGED Viewed

@@ -23,7 +23,7 @@ var GetStatus = class extends Effect.Service()("GetStatus", {
 		const configStore = yield* ConfigStore;
 		const getStatus = () => Effect.gen(function* () {
 			const status = yield* store.getStatus();
-			const configModel = yield* configStore.readConfig().pipe(Effect.map((c) => c.model), Effect.catchAll(() => Effect.succeed(status.model)));
+			const configModel = yield* configStore.readConfig().pipe(Effect.map((c) => c.embedder.model), Effect.catchAll(() => Effect.succeed(status.model)));
 			return {
 				...status,
 				model: configModel
@@ -93,12 +93,15 @@ var IndexProject = class extends Effect.Service()("IndexProject", {
 var ConfigError = class extends Data.TaggedError("ConfigError") {};
 const DEFAULT_CONFIG = {
 	schema: "1",
-	model: "Xenova/all-MiniLM-L6-v2",
-	dims: 384,
 	chunkLines: 60,
 	overlapLines: 10,
 	chunkConcurrency: 8,
-	files: {}
+	files: {},
+	embedder: {
+		model: "Xenova/all-MiniLM-L6-v2",
+		device: "auto",
+		dtype: "fp32"
+	}
 };
 //#endregion
 //#region src/application/init-project.ts
@@ -167,11 +170,17 @@ const codeFromError = (error) => {
 	if (error && typeof error === "object" && "_tag" in error) return errorCodes[String(error._tag)] ?? "UNKNOWN";
 	return "UNKNOWN";
 };
+const causeFromError = (error) => {
+	if (typeof error === "string") return error;
+	if (error && typeof error === "object" && "cause" in error) return String(error.cause);
+	return "Unknown cause";
+};
 /** Format an error as spec-mandated JSON: `{ error: true, code: "...", message: "..." }`. */
 const formatError = (error) => JSON.stringify({
 	error: true,
 	code: codeFromError(error),
-	message: messageFromError(error)
+	message: messageFromError(error),
+	cause: causeFromError(error)
 });
 /** Log the error as JSON to stdout, then re-fail to preserve non-zero exit code. */
 const reportError = (error) => Console.log(formatError(error)).pipe(Effect.flatMap(() => Effect.fail(error)));
@@ -474,9 +483,34 @@ const make$3 = Effect.gen(function* () {
 });
 const ConfigStoreLive = Layer.effect(ConfigStore, make$3);
 //#endregion
+//#region src/domain/models.ts
+/** Registry of supported embedding models. */
+const MODEL_REGISTRY = {
+	"Xenova/all-MiniLM-L6-v2": {
+		id: "Xenova/all-MiniLM-L6-v2",
+		dims: 384,
+		dtypes: [
+			"fp32",
+			"fp16",
+			"q8",
+			"q4"
+		],
+		description: "General-purpose sentence embeddings, 23MB q8"
+	},
+	"Xenova/bge-small-en-v1.5": {
+		id: "Xenova/bge-small-en-v1.5",
+		dims: 384,
+		dtypes: [
+			"fp32",
+			"fp16",
+			"q8",
+			"q4"
+		],
+		description: "BGE retrieval-optimized embeddings, 34MB q8"
+	}
+};
+//#endregion
 //#region src/services/embedder.ts
-const MODEL_NAME = "Xenova/all-MiniLM-L6-v2";
-const DIMS = 384;
 const CACHE_DIR = ".pix/cache";
 const BATCH_SIZE = 16;
 env.cacheDir = CACHE_DIR;
@@ -489,18 +523,51 @@ const normalize = (arr) => {
 	for (let i = 0; i < arr.length; i++) result[i] = arr[i] / norm;
 	return result;
 };
+const resolveEmbedderConfig = (configStore) => Effect.gen(function* () {
+	const config = yield* configStore.readConfig().pipe(Effect.catchAll(() => Effect.succeed(void 0)));
+	const model = config?.embedder.model ?? "Xenova/all-MiniLM-L6-v2";
+	const device = config?.embedder.device ?? "auto";
+	const dtype = config?.embedder.dtype ?? "fp32";
+	const modelInfo = MODEL_REGISTRY[model];
+	if (!modelInfo) return yield* new ModelLoadError({
+		message: `Unknown embedding model "${model}". Available: ${Object.keys(MODEL_REGISTRY).join(", ")}`,
+		model
+	});
+	if (!modelInfo.dtypes.includes(dtype)) return yield* new ModelLoadError({
+		message: `Unsupported dtype "${dtype}" for model "${model}". Supported: ${modelInfo.dtypes.join(", ")}`,
+		model
+	});
+	return {
+		model,
+		device,
+		dtype,
+		dims: modelInfo.dims
+	};
+});
+const createExtractor = (opts) => Effect.tryPromise(async () => {
+	const { pipeline } = await import("@huggingface/transformers");
+	return pipeline("feature-extraction", opts.model, {
+		device: opts.device,
+		dtype: opts.dtype
+	});
+}).pipe(Effect.mapError((cause) => new ModelLoadError({
+	message: `Failed to load embedding model with device "${opts.device}"`,
+	model: opts.model,
+	cause
+})));
+const createExtractorWithFallback = (opts) => {
+	if (opts.device === "cpu") return createExtractor(opts);
+	return createExtractor(opts).pipe(Effect.catchAll((originalError) => Effect.gen(function* () {
+		yield* Effect.logWarning(`Embedding device "${opts.device}" failed, falling back to "cpu": ${originalError.message}`);
+		return yield* createExtractor({
+			...opts,
+			device: "cpu"
+		}).pipe(Effect.catchAll(() => Effect.fail(originalError)));
+	})));
+};
 const make$2 = Effect.gen(function* () {
-	const getExtractor = yield* Effect.cached(Effect.tryPromise(async () => {
-		const { pipeline } = await import("@huggingface/transformers");
-		return pipeline("feature-extraction", MODEL_NAME, {
-			device: "cpu",
-			dtype: "q8"
-		});
-	}).pipe(Effect.mapError((cause) => new ModelLoadError({
-		message: "Failed to load embedding model",
-		model: MODEL_NAME,
-		cause
-	}))));
+	const cfg = yield* resolveEmbedderConfig(yield* ConfigStore);
+	const getExtractor = yield* Effect.cached(createExtractorWithFallback(cfg));
 	const embed = (text) => Effect.gen(function* () {
 		const extractor = yield* getExtractor;
 		const data = (yield* Effect.tryPromise(() => extractor(text, {
@@ -512,7 +579,7 @@ const make$2 = Effect.gen(function* () {
 		})))).data;
 		return {
 			vector: normalize(data),
-			dims: DIMS
+			dims: cfg.dims
 		};
 	});
 	const batch = (texts) => Effect.gen(function* () {
@@ -530,13 +597,13 @@ const make$2 = Effect.gen(function* () {
 			const data = tensor.data;
 			const n = tensor.dims[0];
 			for (let j = 0; j < n; j++) {
-				const offset = j * DIMS;
-				results.push(normalize(data.slice(offset, offset + DIMS)));
+				const offset = j * cfg.dims;
+				results.push(normalize(data.slice(offset, offset + cfg.dims)));
 			}
 		}
 		return results.map((vector) => ({
 			vector,
-			dims: DIMS
+			dims: cfg.dims
 		}));
 	});
 	return {
@@ -544,7 +611,7 @@ const make$2 = Effect.gen(function* () {
 		batch
 	};
 });
-const OnnxEmbedderLive = Layer.effect(Embedder, make$2);
+const OnnxEmbedderLive = Layer.provideMerge(Layer.effect(Embedder, make$2), ConfigStoreLive);
 //#endregion
 //#region src/services/scanner.ts
 const ALWAYS_IGNORE = new Set([

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@lucas-bur/pix",
-  "version": "0.7.0",
+  "version": "0.8.0",
   "description": "Lightweight local semantic project indexer",
   "keywords": [
     "cli",