@lucas-bur/pix 0.10.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.mjs +323 -118
  2. package/package.json +1 -1
package/dist/index.mjs CHANGED
@@ -2,7 +2,8 @@
2
2
  import { createRequire } from "node:module";
3
3
  import { Args, CliConfig, Command, Options } from "@effect/cli";
4
4
  import { NodeContext, NodeRuntime } from "@effect/platform-node";
5
- import { Clock, Context, Data, Effect, Layer, Option, Ref } from "effect";
5
+ import { Clock, Context, Data, Effect, Exit, Layer, Option, Ref, Stream } from "effect";
6
+ import * as Chunk from "effect/Chunk";
6
7
  import { styleText } from "node:util";
7
8
  import * as clack from "@clack/prompts";
8
9
  import { FileSystem } from "@effect/platform";
@@ -74,6 +75,7 @@ const withInteractive = (activeRef, acquire, setActive, release, effect) => Ref.
74
75
  /** Display implementation using @clack/prompts for interactive terminal output */
75
76
  const ClackDisplay = { layer: Layer.effect(Display, Effect.gen(function* () {
76
77
  const activeRef = yield* Ref.make(null);
78
+ const lastSpinnerMsg = yield* Ref.make("");
77
79
  return {
78
80
  intro: (title) => Effect.sync(() => clack.intro(styleText("inverse", ` ${title} `))),
79
81
  outro: (message) => Effect.sync(() => clack.outro(message)),
@@ -84,11 +86,17 @@ const ClackDisplay = { layer: Layer.effect(Display, Effect.gen(function* () {
84
86
  const s = clack.spinner();
85
87
  s.start(message);
86
88
  return s;
87
- }), (s) => ({
89
+ }).pipe(Effect.tap(() => Ref.set(lastSpinnerMsg, message))), (s) => ({
88
90
  type: "spinner",
89
91
  handle: s
90
- }), (s, exit) => Effect.sync(() => s.stop(exit._tag === "Success" ? message : `${message} (failed)`)), effect),
91
- progress: (opts, effect) => withInteractive(activeRef, Effect.sync(() => {
92
+ }), (s, exit) => lastSpinnerMsg.pipe(Effect.flatMap((lastMsg) => Effect.sync(() => s.stop(exit._tag === "Success" && lastMsg ? lastMsg : `${message} (failed)`)))), effect),
93
+ progress: (opts, effect) => Effect.gen(function* () {
94
+ const current = yield* Ref.get(activeRef);
95
+ if (current && current.type === "spinner") {
96
+ const msg = yield* Ref.get(lastSpinnerMsg);
97
+ current.handle.stop(msg || opts.message);
98
+ yield* Ref.set(activeRef, null);
99
+ }
92
100
  const bar = clack.progress({
93
101
  max: opts.max,
94
102
  style: opts.style ?? "heavy",
@@ -96,16 +104,27 @@ const ClackDisplay = { layer: Layer.effect(Display, Effect.gen(function* () {
96
104
  indicator: opts.indicator ?? "dots"
97
105
  });
98
106
  bar.start(opts.message);
99
- return bar;
100
- }), (bar) => ({
101
- type: "progress",
102
- handle: bar,
103
- value: 0,
104
- max: opts.max
105
- }), (bar, exit) => Effect.sync(() => exit._tag === "Success" ? bar.stop(opts.message) : bar.error(opts.message)), effect),
107
+ yield* Ref.set(activeRef, {
108
+ type: "progress",
109
+ handle: bar,
110
+ value: 0,
111
+ max: opts.max
112
+ });
113
+ const exit = yield* Effect.exit(effect);
114
+ yield* Ref.set(activeRef, null);
115
+ if (Exit.isSuccess(exit)) {
116
+ bar.stop(opts.message);
117
+ return exit.value;
118
+ }
119
+ bar.error(opts.message);
120
+ return yield* Effect.failCause(exit.cause);
121
+ }),
106
122
  updateInteractive: (payload) => Ref.get(activeRef).pipe(Effect.flatMap((active) => {
107
123
  if (!active) return Effect.void;
108
- if (active.type === "spinner") return Effect.sync(() => active.handle.message(payloadText(payload)));
124
+ if (active.type === "spinner") {
125
+ const msg = payloadText(payload);
126
+ return Effect.sync(() => active.handle.message(msg)).pipe(Effect.andThen(Ref.set(lastSpinnerMsg, msg)));
127
+ }
109
128
  const delta = computeDelta(payload, {
110
129
  value: active.value,
111
130
  max: active.max
@@ -149,19 +168,19 @@ const DEFAULT_CONFIG = {
149
168
  "dist",
150
169
  "build",
151
170
  ".next",
152
- ".agents",
153
- ".claude",
154
171
  ".vscode",
155
- ".github",
156
172
  "coverage",
157
173
  "*-lock.yaml",
158
174
  "*-lock.json",
159
- "*.lock"
175
+ "*.lock",
176
+ ".vite-hooks",
177
+ ".fallow"
160
178
  ],
161
179
  embedder: {
162
180
  model: "Xenova/all-MiniLM-L6-v2",
163
181
  device: "auto",
164
- dtype: "fp32"
182
+ dtype: "fp32",
183
+ batchSize: 16
165
184
  }
166
185
  };
167
186
  //#endregion
@@ -289,6 +308,13 @@ function buildProcessorMap(skipExtensions) {
289
308
  }
290
309
  //#endregion
291
310
  //#region src/application/index-project.ts
311
+ const deriveEffectiveConfig = (opts, config) => ({
312
+ batchSize: opts.batchSize ?? config.embedder.batchSize ?? 16,
313
+ concurrency: Math.max(1, opts.chunkConcurrency ?? config.chunkConcurrency ?? 8),
314
+ skipExtensions: opts.skipExtensions ? [...config.skipExtensions, ...opts.skipExtensions] : config.skipExtensions,
315
+ ignoredPaths: opts.ignorePaths ? [...config.ignoredPaths ?? DEFAULT_CONFIG.ignoredPaths, ...opts.ignorePaths] : config.ignoredPaths ?? DEFAULT_CONFIG.ignoredPaths,
316
+ ignoreGitignore: opts.ignoreGitignore ?? config.ignoreGitignore ?? false
317
+ });
292
318
  function getExtension(file) {
293
319
  const lastSlash = file.lastIndexOf("/");
294
320
  const name = lastSlash >= 0 ? file.slice(lastSlash + 1) : file;
@@ -313,11 +339,19 @@ const classifyFiles = (files, processorMap) => {
313
339
  unknownExtensions
314
340
  };
315
341
  };
316
- /**
317
- * Use case: index project files. Pipeline: scan → ContentExtractor → chunk → embed → store. Depends
318
- * on ConfigStore, Scanner, Chunker, Embedder, VectorStore, Display, ContentExtractor via Effect
319
- * tags.
320
- */
342
+ const classifyAndCollectChunks = (knownFiles, extractor, chunker, concurrency, skipped) => Stream.fromIterable(knownFiles).pipe(Stream.mapEffect((file) => extractor.extract(file).pipe(Effect.flatMap((text) => Effect.succeed(Option.some({
343
+ file,
344
+ text
345
+ }))), Effect.catchAll((err) => Ref.update(skipped, (prev) => [...prev, {
346
+ path: file,
347
+ reason: err.message
348
+ }]).pipe(Effect.flatMap(() => Effect.succeed(Option.none()))))), { concurrency }), Stream.filterMap((opt) => opt), Stream.mapEffect(({ file, text }) => chunker.chunkText(text, file), { concurrency }), Stream.flatMap((chunks) => Stream.fromIterable(chunks)), Stream.runCollect, Effect.map((allChunks) => {
349
+ const chunks = Chunk.toArray(allChunks);
350
+ return {
351
+ chunks,
352
+ totalChunks: chunks.length
353
+ };
354
+ }));
321
355
  var IndexProject = class extends Effect.Service()("IndexProject", {
322
356
  accessors: true,
323
357
  effect: Effect.gen(function* () {
@@ -328,65 +362,123 @@ var IndexProject = class extends Effect.Service()("IndexProject", {
328
362
  const vectorStore = yield* VectorStore;
329
363
  const d = yield* Display;
330
364
  const extractor = yield* ContentExtractor;
331
- const index = () => Effect.gen(function* () {
365
+ const index = (opts = {}) => Effect.gen(function* () {
366
+ const start = Date.now();
332
367
  if (!(yield* configStore.configExists())) yield* configStore.writeConfig(DEFAULT_CONFIG);
333
- const config = yield* configStore.readConfig();
334
- const processorMap = buildProcessorMap(config.skipExtensions);
368
+ const eff = deriveEffectiveConfig(opts, yield* configStore.readConfig());
369
+ const processorMap = buildProcessorMap(eff.skipExtensions);
335
370
  yield* d.updateInteractive("Scanning source files...");
336
- const ignoredPaths = config.ignoredPaths ?? DEFAULT_CONFIG.ignoredPaths;
337
- const { knownFiles, skippedFiles, unknownExtensions } = classifyFiles((yield* scanner.scanFiles(ignoredPaths)).files, processorMap);
338
- if (unknownExtensions.size > 0) yield* d.log(`Skipped ${skippedFiles.length} files with unknown extensions: ${[...unknownExtensions].join(", ")}`, "warn");
339
- if (knownFiles.length === 0) return {
340
- success: true,
341
- status: {
342
- chunks: 0,
343
- files: 0,
344
- totalLines: 0,
345
- byteSize: 0
346
- }
347
- };
371
+ const scanResult = yield* scanner.scanFiles(eff.ignoredPaths, eff.ignoreGitignore);
372
+ const { knownFiles, skippedFiles, unknownExtensions } = classifyFiles(scanResult.files, processorMap);
373
+ const skipped = yield* Ref.make(scanResult.skipped.filter((s) => !s.reason.startsWith("Ignored by config pattern")).map((s) => ({
374
+ path: s.path,
375
+ reason: s.reason
376
+ })));
377
+ if (unknownExtensions.size > 0) yield* Ref.update(skipped, (prev) => [...prev, ...skippedFiles.map((f) => ({
378
+ path: f,
379
+ reason: "unknown extension"
380
+ }))]);
381
+ if (knownFiles.length === 0) {
382
+ yield* displaySkippedNote(d, yield* Ref.get(skipped));
383
+ return {
384
+ success: true,
385
+ status: {
386
+ chunks: 0,
387
+ files: 0,
388
+ totalLines: 0,
389
+ byteSize: 0
390
+ },
391
+ durationMs: Date.now() - start
392
+ };
393
+ }
348
394
  yield* d.updateInteractive(`Processing ${knownFiles.length} files...`);
349
- const allChunks = (yield* Effect.forEach(knownFiles, (file) => Effect.gen(function* () {
350
- const result = yield* Effect.either(extractor.extract(file));
351
- if (result._tag === "Left") {
352
- if (result.left._tag === "UnsupportedFormat") {
353
- yield* d.log(`Skipping ${file}: ${result.left.message}`, "warn");
354
- return [];
355
- }
356
- return yield* Effect.fail(result.left);
357
- }
358
- return yield* chunker.chunkText(result.right, file);
359
- }), { concurrency: Math.max(1, config.chunkConcurrency ?? 8) })).flat();
360
- const totalChunks = allChunks.length;
361
- const totalFiles = new Set(allChunks.map((c) => c.file)).size;
362
- const totalLines = allChunks.reduce((sum, c) => sum + (c.endLine - c.startLine + 1), 0);
363
- if (totalChunks === 0) return {
364
- success: true,
365
- status: {
366
- chunks: 0,
367
- files: 0,
368
- totalLines: 0,
369
- byteSize: 0
370
- }
371
- };
372
- yield* d.updateInteractive(`Embedding ${totalChunks} chunks...`);
373
- const texts = allChunks.map((c) => c.text);
374
- const embeddings = yield* embedder.batch(texts);
375
- yield* vectorStore.store(allChunks, embeddings);
376
- const dims = embeddings[0]?.dims ?? 384;
395
+ const { chunks, totalChunks } = yield* classifyAndCollectChunks(knownFiles, extractor, chunker, eff.concurrency, skipped);
396
+ if (totalChunks === 0) {
397
+ yield* displaySkippedNote(d, yield* Ref.get(skipped));
398
+ return {
399
+ success: true,
400
+ status: {
401
+ chunks: 0,
402
+ files: 0,
403
+ totalLines: 0,
404
+ byteSize: 0
405
+ },
406
+ durationMs: Date.now() - start
407
+ };
408
+ }
409
+ yield* vectorStore.storeBegin();
410
+ const embeddedRef = yield* Ref.make(0);
411
+ const stats = yield* d.progress({
412
+ message: `Embedding ${totalChunks} chunks...`,
413
+ max: totalChunks
414
+ }, Stream.fromIterable(chunks).pipe(Stream.grouped(eff.batchSize), Stream.mapEffect((batchChunk) => Effect.gen(function* () {
415
+ const batch = Chunk.toArray(batchChunk);
416
+ const texts = batch.map((c) => c.text);
417
+ const embeddings = yield* embedder.batch(texts);
418
+ yield* vectorStore.storeBatch(batch, embeddings);
419
+ const count = yield* Ref.updateAndGet(embeddedRef, (n) => n + batch.length);
420
+ yield* d.updateInteractive({
421
+ message: `Embedding ${count} of ${totalChunks} chunks`,
422
+ setTo: count
423
+ });
424
+ })), Stream.runDrain, Effect.matchEffect({
425
+ onSuccess: () => vectorStore.storeCommit(),
426
+ onFailure: (err) => vectorStore.storeAbort().pipe(Effect.flatMap(() => Effect.fail(err)))
427
+ })));
428
+ yield* displaySkippedNote(d, yield* Ref.get(skipped));
429
+ const durationSec = ((Date.now() - start) / 1e3).toFixed(1);
430
+ yield* d.log(`Indexed ${stats.chunks} chunks from ${stats.files} files in ${durationSec}s`, "success");
431
+ const fallbackInfo = yield* embedder.getFallbackInfo();
377
432
  return {
378
433
  success: true,
379
434
  status: {
380
- chunks: totalChunks,
381
- files: totalFiles,
382
- totalLines,
383
- byteSize: embeddings.length * dims * 4
384
- }
435
+ chunks: stats.chunks,
436
+ files: stats.files,
437
+ totalLines: stats.totalLines,
438
+ byteSize: stats.byteSize
439
+ },
440
+ durationMs: Date.now() - start,
441
+ embedderFallback: fallbackInfo
385
442
  };
386
443
  });
387
444
  return { index };
388
445
  })
389
446
  }) {};
447
+ const getFilename = (path) => path.split("/").pop() ?? path;
448
+ const getFileExtension = (filename) => {
449
+ const dotIndex = filename.lastIndexOf(".");
450
+ return dotIndex >= 0 ? filename.slice(dotIndex) : "(no extension)";
451
+ };
452
+ const groupByExtension = (entries) => {
453
+ const byExt = /* @__PURE__ */ new Map();
454
+ for (const s of entries) {
455
+ const name = getFilename(s.path);
456
+ const ext = getFileExtension(name);
457
+ if (!byExt.has(ext)) byExt.set(ext, []);
458
+ byExt.get(ext).push(name);
459
+ }
460
+ return byExt;
461
+ };
462
+ const formatFileList = (files, maxDisplay = 5) => files.length > maxDisplay ? `${files.slice(0, maxDisplay).join(", ")} +${files.length - maxDisplay} more` : files.join(", ");
463
+ const buildSkippedLines = (extFailures, extractErrors) => {
464
+ const lines = [];
465
+ if (extFailures.length > 0) {
466
+ lines.push(`Unknown extensions (${extFailures.length})`);
467
+ for (const [ext, files] of groupByExtension(extFailures)) lines.push(` ${ext} (${files.length}): ${formatFileList(files)}`);
468
+ }
469
+ if (extractErrors.length > 0) {
470
+ if (lines.length > 0) lines.push("");
471
+ lines.push(`Extraction errors (${extractErrors.length})`);
472
+ for (const s of extractErrors) lines.push(` ${getFilename(s.path)}: ${s.reason}`);
473
+ }
474
+ return lines;
475
+ };
476
+ const displaySkippedNote = (d, skipped) => {
477
+ if (skipped.length === 0) return Effect.void;
478
+ const extFailures = skipped.filter((s) => s.reason === "unknown extension");
479
+ const extractErrors = skipped.filter((s) => s.reason !== "unknown extension");
480
+ return d.note(buildSkippedLines(extFailures, extractErrors).join("\n"), `Skipped ${skipped.length} files`);
481
+ };
390
482
  //#endregion
391
483
  //#region src/application/init-project.ts
392
484
  /**
@@ -475,22 +567,60 @@ const reportError = (error) => Effect.gen(function* () {
475
567
  });
476
568
  //#endregion
477
569
  //#region src/commands/index-cmd.ts
478
- /** CLI command: pix index [--force] [--verbose] [--json] */
570
+ const batchSizeOption = Options.integer("batch-size").pipe(Options.withAlias("b"), Options.optional);
571
+ const chunkConcurrencyOption = Options.integer("chunk-concurrency").pipe(Options.withAlias("c"), Options.optional);
572
+ const skipExtensionsOption = Options.text("skip-extensions").pipe(Options.withAlias("s"), Options.repeated);
573
+ const ignorePathOption = Options.text("ignore-path").pipe(Options.repeated);
574
+ const ignorePathsOption = Options.text("ignore-paths").pipe(Options.repeated);
575
+ const ignoreGitignoreOption = Options.boolean("ignore-gitignore").pipe(Options.withDefault(false));
576
+ const splitCsv = (values) => values.flatMap((v) => v.split(",").map((s) => s.trim()).filter((s) => s.length > 0));
577
+ const buildIndexOptions = (args) => {
578
+ if (Option.isSome(args.batchSize) && args.batchSize.value <= 0) throw new Error(`--batch-size must be positive, got ${args.batchSize.value}`);
579
+ if (Option.isSome(args.chunkConcurrency) && args.chunkConcurrency.value <= 0) throw new Error(`--chunk-concurrency must be positive, got ${args.chunkConcurrency.value}`);
580
+ const cliSkipExtensions = splitCsv(args.skipExtensions);
581
+ const cliIgnorePaths = [...args.ignorePath.map((s) => s.trim()).filter((s) => s.length > 0), ...splitCsv(args.ignorePaths)];
582
+ return {
583
+ batchSize: Option.getOrUndefined(args.batchSize),
584
+ chunkConcurrency: Option.getOrUndefined(args.chunkConcurrency),
585
+ skipExtensions: cliSkipExtensions.length > 0 ? cliSkipExtensions : void 0,
586
+ ignorePaths: cliIgnorePaths.length > 0 ? cliIgnorePaths : void 0,
587
+ ignoreGitignore: args.ignoreGitignore || void 0
588
+ };
589
+ };
590
+ const emitIndexResult = (d, result) => Effect.gen(function* () {
591
+ yield* d.json({
592
+ chunks: result.status.chunks,
593
+ files: result.status.files,
594
+ totalLines: result.status.totalLines,
595
+ byteSize: result.status.byteSize,
596
+ durationMs: result.durationMs,
597
+ ...result.embedderFallback && { embedderFallback: result.embedderFallback }
598
+ });
599
+ if (result.status.chunks === 0) yield* d.log("No chunks to index.", "warn");
600
+ });
479
601
  const indexCommand = Command.make("index", {
480
602
  force: Options.boolean("force").pipe(Options.withDefault(false)),
481
603
  verbose: Options.boolean("verbose").pipe(Options.withDefault(false)),
482
- json: Options.boolean("json").pipe(Options.withDefault(false))
483
- }, ({ force, verbose }) => Effect.gen(function* () {
604
+ json: Options.boolean("json").pipe(Options.withDefault(false)),
605
+ batchSize: batchSizeOption,
606
+ chunkConcurrency: chunkConcurrencyOption,
607
+ skipExtensions: skipExtensionsOption,
608
+ ignorePath: ignorePathOption,
609
+ ignorePaths: ignorePathsOption,
610
+ ignoreGitignore: ignoreGitignoreOption
611
+ }, ({ force, verbose, batchSize, chunkConcurrency, skipExtensions, ignorePath, ignorePaths, ignoreGitignore }) => Effect.gen(function* () {
484
612
  const d = yield* Display;
485
613
  if (force) yield* d.log("--force is currently not implemented and only a placeholder.", "warn");
486
614
  if (verbose) yield* d.log("--verbose is currently not implemented and only a placeholder.", "warn");
487
- const result = yield* d.spinner("Indexing project...", IndexProject.index());
488
- yield* d.json({
489
- chunks: result.status.chunks,
490
- files: result.status.files
615
+ const options = buildIndexOptions({
616
+ batchSize,
617
+ chunkConcurrency,
618
+ skipExtensions,
619
+ ignorePath,
620
+ ignorePaths,
621
+ ignoreGitignore
491
622
  });
492
- if (result.status.chunks === 0) yield* d.log("No chunks to index.", "warn");
493
- else yield* d.log(`Indexed ${result.status.chunks} chunks from ${result.status.files} files.`, "success");
623
+ yield* emitIndexResult(d, yield* d.spinner("Indexing project...", IndexProject.index(options)));
494
624
  }).pipe(Effect.catchAll(reportError)));
495
625
  //#endregion
496
626
  //#region src/commands/init.ts
@@ -828,9 +958,7 @@ const MODEL_REGISTRY = {
828
958
  };
829
959
  //#endregion
830
960
  //#region src/services/embedder.ts
831
- const CACHE_DIR = ".pix/cache";
832
- const BATCH_SIZE = 16;
833
- env.cacheDir = CACHE_DIR;
961
+ env.cacheDir = ".pix/cache";
834
962
  const normalize = (arr) => {
835
963
  let norm = 0;
836
964
  for (let i = 0; i < arr.length; i++) norm += arr[i] * arr[i];
@@ -872,16 +1000,14 @@ const createExtractor = (opts) => Effect.tryPromise(async () => {
872
1000
  model: opts.model,
873
1001
  cause
874
1002
  })));
875
- const createExtractorWithFallback = (opts) => {
1003
+ const createExtractorWithFallback = (opts, fallbackRef) => {
876
1004
  if (opts.device === "cpu") return createExtractor(opts);
877
1005
  return createExtractor(opts).pipe(Effect.catchAll((originalError) => Effect.gen(function* () {
878
- const d = yield* Display;
879
- yield* d.log(`GPU (${opts.device}) failed, falling back to CPU...`, "warn");
880
- yield* d.json({
881
- event: "embedder_fallback",
1006
+ yield* (yield* Display).log(`GPU (${opts.device}) failed, falling back to CPU...`, "warn");
1007
+ yield* Ref.set(fallbackRef, Option.some({
882
1008
  originalDevice: opts.device,
883
1009
  reason: originalError.message
884
- });
1010
+ }));
885
1011
  return yield* createExtractor({
886
1012
  ...opts,
887
1013
  device: "cpu"
@@ -892,7 +1018,8 @@ const make$2 = Effect.gen(function* () {
892
1018
  const configStore = yield* ConfigStore;
893
1019
  const d = yield* Display;
894
1020
  const cfg = yield* resolveEmbedderConfig(configStore);
895
- const getExtractor = yield* Effect.cached(createExtractorWithFallback(cfg));
1021
+ const fallbackRef = yield* Ref.make(Option.none());
1022
+ const getExtractor = yield* Effect.cached(createExtractorWithFallback(cfg, fallbackRef));
896
1023
  const embed = (text) => Effect.gen(function* () {
897
1024
  const extractor = yield* getExtractor;
898
1025
  const data = (yield* Effect.tryPromise(() => extractor(text, {
@@ -909,31 +1036,30 @@ const make$2 = Effect.gen(function* () {
909
1036
  }).pipe(Effect.provideService(Display, d));
910
1037
  const batch = (texts) => Effect.gen(function* () {
911
1038
  const extractor = yield* getExtractor;
1039
+ const tensor = yield* Effect.tryPromise(() => extractor([...texts], {
1040
+ pooling: "mean",
1041
+ normalize: false
1042
+ })).pipe(Effect.mapError((cause) => new InferenceError({
1043
+ message: "Batch embedding inference failed",
1044
+ cause
1045
+ })));
1046
+ const data = tensor.data;
1047
+ const n = tensor.dims[0];
912
1048
  const results = [];
913
- for (let i = 0; i < texts.length; i += BATCH_SIZE) {
914
- const slice = texts.slice(i, i + BATCH_SIZE);
915
- const tensor = yield* Effect.tryPromise(() => extractor(slice, {
916
- pooling: "mean",
917
- normalize: false
918
- })).pipe(Effect.mapError((cause) => new InferenceError({
919
- message: "Batch embedding inference failed",
920
- cause
921
- })));
922
- const data = tensor.data;
923
- const n = tensor.dims[0];
924
- for (let j = 0; j < n; j++) {
925
- const offset = j * cfg.dims;
926
- results.push(normalize(data.slice(offset, offset + cfg.dims)));
927
- }
1049
+ for (let j = 0; j < n; j++) {
1050
+ const offset = j * cfg.dims;
1051
+ results.push({
1052
+ vector: normalize(data.slice(offset, offset + cfg.dims)),
1053
+ dims: cfg.dims
1054
+ });
928
1055
  }
929
- return results.map((vector) => ({
930
- vector,
931
- dims: cfg.dims
932
- }));
1056
+ return results;
933
1057
  }).pipe(Effect.provideService(Display, d));
1058
+ const getFallbackInfo = () => Ref.get(fallbackRef).pipe(Effect.map(Option.getOrElse(() => void 0)));
934
1059
  return {
935
1060
  embed,
936
- batch
1061
+ batch,
1062
+ getFallbackInfo
937
1063
  };
938
1064
  });
939
1065
  const OnnxEmbedderLive = Layer.provideMerge(Layer.effect(Embedder, make$2), ConfigStoreLive);
@@ -977,7 +1103,16 @@ const make$1 = Effect.gen(function* () {
977
1103
  if (result.skipped) skipped.push(result.skipped);
978
1104
  if (result.content.trim()) ig.add(result.content.split("\n"));
979
1105
  });
980
- const loadGitignoreRules = (ignoredPaths, cwd) => Effect.gen(function* () {
1106
+ const loadGitignoreRules = (ignoredPaths) => {
1107
+ const ig = ignore();
1108
+ const skipped = [];
1109
+ if (ignoredPaths.length > 0) ig.add(ignoredPaths);
1110
+ return Effect.succeed({
1111
+ ig,
1112
+ skipped
1113
+ });
1114
+ };
1115
+ const loadGitignoreRulesWithFiles = (ignoredPaths, cwd) => Effect.gen(function* () {
981
1116
  const ig = ignore();
982
1117
  const skipped = [];
983
1118
  if (ignoredPaths.length > 0) ig.add(ignoredPaths);
@@ -1056,10 +1191,10 @@ const make$1 = Effect.gen(function* () {
1056
1191
  skipped
1057
1192
  };
1058
1193
  });
1059
- const scanFiles = (ignoredPaths) => Effect.gen(function* () {
1194
+ const scanFiles = (ignoredPaths, ignoreGitignore) => Effect.gen(function* () {
1060
1195
  const cwd = process.cwd();
1061
- const { ig, skipped: ignoreSkipped } = yield* loadGitignoreRules(ignoredPaths, cwd).pipe(Effect.mapError((cause) => new ScanFailed({
1062
- message: `Failed to load gitignore rules: ${String(cause)}`,
1196
+ const { ig, skipped: ignoreSkipped } = yield* (ignoreGitignore ? loadGitignoreRules(ignoredPaths) : loadGitignoreRulesWithFiles(ignoredPaths, cwd)).pipe(Effect.mapError((cause) => new ScanFailed({
1197
+ message: `Failed to load ignore rules: ${String(cause)}`,
1063
1198
  cause
1064
1199
  })));
1065
1200
  const { files, skipped: walkSkipped } = yield* walk(cwd, ig, cwd);
@@ -1083,6 +1218,22 @@ const isPlatformReason = (cause, reason) => typeof cause === "object" && cause !
1083
1218
  */
1084
1219
  const make = Effect.gen(function* () {
1085
1220
  const fs = yield* FileSystem.FileSystem;
1221
+ const chunksTemp = `${CHUNKS_FILE}.tmp`;
1222
+ const vectorsTemp = `${VECTORS_FILE}.tmp`;
1223
+ const seenFiles = yield* Ref.make(/* @__PURE__ */ new Set());
1224
+ const statsAccumulator = yield* Ref.make({
1225
+ chunks: 0,
1226
+ files: 0,
1227
+ totalLines: 0,
1228
+ byteSize: 0
1229
+ });
1230
+ const serializeVectors = (embeddings) => {
1231
+ const dims = embeddings[0]?.dims ?? 384;
1232
+ const totalFloats = embeddings.length * dims;
1233
+ const vectorsArray = new Float32Array(totalFloats);
1234
+ for (let i = 0; i < embeddings.length; i++) vectorsArray.set(embeddings[i].vector, i * dims);
1235
+ return Buffer.from(vectorsArray.buffer);
1236
+ };
1086
1237
  /**
1087
1238
  * Count total lines across all chunks in chunks.jsonl. Each line is a JSON object; the 'text'
1088
1239
  * field contains the source code.
@@ -1145,6 +1296,60 @@ const make = Effect.gen(function* () {
1145
1296
  deleted: true
1146
1297
  };
1147
1298
  });
1299
+ const storeBegin = () => Effect.gen(function* () {
1300
+ yield* ensureDirExists(STORE_DIR, ".pix directory");
1301
+ yield* Ref.set(seenFiles, /* @__PURE__ */ new Set());
1302
+ yield* Ref.set(statsAccumulator, {
1303
+ chunks: 0,
1304
+ files: 0,
1305
+ totalLines: 0,
1306
+ byteSize: 0
1307
+ });
1308
+ if (yield* withStoreError(fs.exists(chunksTemp), "check chunks temp")) yield* withStoreError(fs.remove(chunksTemp), "clean stale chunks temp", chunksTemp);
1309
+ if (yield* withStoreError(fs.exists(vectorsTemp), "check vectors temp")) yield* withStoreError(fs.remove(vectorsTemp), "clean stale vectors temp", vectorsTemp);
1310
+ });
1311
+ const storeBatch = (chunks, embeddings) => Effect.gen(function* () {
1312
+ const content = chunks.map((c) => JSON.stringify({
1313
+ id: c.id,
1314
+ idx: c.idx,
1315
+ file: c.file,
1316
+ startLine: c.startLine,
1317
+ endLine: c.endLine,
1318
+ text: c.text
1319
+ })).join("\n") + "\n";
1320
+ yield* withStoreError(fs.writeFile(chunksTemp, Buffer.from(content), { flag: "a" }), "append chunks", chunksTemp);
1321
+ const buffer = serializeVectors(embeddings);
1322
+ yield* withStoreError(fs.writeFile(vectorsTemp, buffer, { flag: "a" }), "append vectors", vectorsTemp);
1323
+ const dims = embeddings[0]?.dims ?? 384;
1324
+ const batchLines = chunks.reduce((sum, c) => sum + (c.endLine - c.startLine + 1), 0);
1325
+ const batchBytes = embeddings.length * dims * 4;
1326
+ yield* Ref.update(seenFiles, (prev) => {
1327
+ for (const c of chunks) prev.add(c.file);
1328
+ return prev;
1329
+ });
1330
+ yield* Ref.update(statsAccumulator, (prev) => ({
1331
+ chunks: prev.chunks + chunks.length,
1332
+ files: 0,
1333
+ totalLines: prev.totalLines + batchLines,
1334
+ byteSize: prev.byteSize + batchBytes
1335
+ }));
1336
+ });
1337
+ const storeCommit = () => Effect.gen(function* () {
1338
+ yield* withStoreError(fs.rename(chunksTemp, CHUNKS_FILE), "commit chunks", CHUNKS_FILE);
1339
+ yield* withStoreError(fs.rename(vectorsTemp, VECTORS_FILE), "commit vectors", VECTORS_FILE);
1340
+ const stats = yield* Ref.get(statsAccumulator);
1341
+ const files = yield* Ref.get(seenFiles);
1342
+ yield* Ref.set(seenFiles, /* @__PURE__ */ new Set());
1343
+ return {
1344
+ ...stats,
1345
+ files: files.size
1346
+ };
1347
+ });
1348
+ const storeAbort = () => Effect.gen(function* () {
1349
+ yield* Ref.set(seenFiles, /* @__PURE__ */ new Set());
1350
+ if (yield* withReadError(fs.exists(chunksTemp), "check chunks temp")) yield* withReadError(fs.remove(chunksTemp), "abort chunks temp", chunksTemp);
1351
+ if (yield* withReadError(fs.exists(vectorsTemp), "check vectors temp")) yield* withReadError(fs.remove(vectorsTemp), "abort vectors temp", vectorsTemp);
1352
+ });
1148
1353
  const store = (chunks, embeddings) => Effect.gen(function* () {
1149
1354
  yield* ensureDirExists(STORE_DIR, ".pix directory");
1150
1355
  const chunksTemp = `${CHUNKS_FILE}.tmp`;
@@ -1159,11 +1364,7 @@ const make = Effect.gen(function* () {
1159
1364
  yield* withStoreError(fs.writeFileString(chunksTemp, chunksLines.join("\n")), "write chunks", chunksTemp);
1160
1365
  yield* withStoreError(fs.rename(chunksTemp, CHUNKS_FILE), "commit chunks", CHUNKS_FILE);
1161
1366
  const vectorsTemp = `${VECTORS_FILE}.tmp`;
1162
- const dims = embeddings[0]?.dims ?? 384;
1163
- const totalFloats = embeddings.length * dims;
1164
- const vectorsArray = new Float32Array(totalFloats);
1165
- for (let i = 0; i < embeddings.length; i++) vectorsArray.set(embeddings[i].vector, i * dims);
1166
- const buffer = Buffer.from(vectorsArray.buffer);
1367
+ const buffer = serializeVectors(embeddings);
1167
1368
  yield* withStoreError(fs.writeFile(vectorsTemp, buffer), "write vectors", vectorsTemp);
1168
1369
  yield* withStoreError(fs.rename(vectorsTemp, VECTORS_FILE), "commit vectors", VECTORS_FILE);
1169
1370
  });
@@ -1232,6 +1433,10 @@ const make = Effect.gen(function* () {
1232
1433
  });
1233
1434
  return {
1234
1435
  store,
1436
+ storeBegin,
1437
+ storeBatch,
1438
+ storeCommit,
1439
+ storeAbort,
1235
1440
  search,
1236
1441
  getStatus,
1237
1442
  reset
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lucas-bur/pix",
3
- "version": "0.10.0",
3
+ "version": "0.11.0",
4
4
  "description": "Lightweight local semantic project indexer",
5
5
  "keywords": [
6
6
  "cli",