@lucas-bur/pix 0.9.1 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.mjs +585 -165
  2. package/package.json +1 -1
package/dist/index.mjs CHANGED
@@ -2,16 +2,18 @@
2
2
  import { createRequire } from "node:module";
3
3
  import { Args, CliConfig, Command, Options } from "@effect/cli";
4
4
  import { NodeContext, NodeRuntime } from "@effect/platform-node";
5
- import { Clock, Context, Data, Effect, Layer, Option, Ref } from "effect";
5
+ import { Clock, Context, Data, Effect, Exit, Layer, Option, Ref, Stream } from "effect";
6
+ import * as Chunk from "effect/Chunk";
6
7
  import { styleText } from "node:util";
7
8
  import * as clack from "@clack/prompts";
8
- import crypto from "node:crypto";
9
9
  import { FileSystem } from "@effect/platform";
10
+ import crypto from "node:crypto";
10
11
  import { env } from "@huggingface/transformers";
11
12
  import ignore from "ignore";
12
13
  //#region src/domain/ports.ts
13
14
  var ConfigStore = class extends Context.Tag("ConfigStore")() {};
14
15
  var Scanner = class extends Context.Tag("Scanner")() {};
16
+ var ContentExtractor = class extends Context.Tag("ContentExtractor")() {};
15
17
  var Chunker = class extends Context.Tag("Chunker")() {};
16
18
  var Embedder = class extends Context.Tag("Embedder")() {};
17
19
  var VectorStore = class extends Context.Tag("VectorStore")() {};
@@ -73,6 +75,7 @@ const withInteractive = (activeRef, acquire, setActive, release, effect) => Ref.
73
75
  /** Display implementation using @clack/prompts for interactive terminal output */
74
76
  const ClackDisplay = { layer: Layer.effect(Display, Effect.gen(function* () {
75
77
  const activeRef = yield* Ref.make(null);
78
+ const lastSpinnerMsg = yield* Ref.make("");
76
79
  return {
77
80
  intro: (title) => Effect.sync(() => clack.intro(styleText("inverse", ` ${title} `))),
78
81
  outro: (message) => Effect.sync(() => clack.outro(message)),
@@ -83,11 +86,17 @@ const ClackDisplay = { layer: Layer.effect(Display, Effect.gen(function* () {
83
86
  const s = clack.spinner();
84
87
  s.start(message);
85
88
  return s;
86
- }), (s) => ({
89
+ }).pipe(Effect.tap(() => Ref.set(lastSpinnerMsg, message))), (s) => ({
87
90
  type: "spinner",
88
91
  handle: s
89
- }), (s, exit) => Effect.sync(() => s.stop(exit._tag === "Success" ? message : `${message} (failed)`)), effect),
90
- progress: (opts, effect) => withInteractive(activeRef, Effect.sync(() => {
92
+ }), (s, exit) => lastSpinnerMsg.pipe(Effect.flatMap((lastMsg) => Effect.sync(() => s.stop(exit._tag === "Success" && lastMsg ? lastMsg : `${message} (failed)`)))), effect),
93
+ progress: (opts, effect) => Effect.gen(function* () {
94
+ const current = yield* Ref.get(activeRef);
95
+ if (current && current.type === "spinner") {
96
+ const msg = yield* Ref.get(lastSpinnerMsg);
97
+ current.handle.stop(msg || opts.message);
98
+ yield* Ref.set(activeRef, null);
99
+ }
91
100
  const bar = clack.progress({
92
101
  max: opts.max,
93
102
  style: opts.style ?? "heavy",
@@ -95,16 +104,27 @@ const ClackDisplay = { layer: Layer.effect(Display, Effect.gen(function* () {
95
104
  indicator: opts.indicator ?? "dots"
96
105
  });
97
106
  bar.start(opts.message);
98
- return bar;
99
- }), (bar) => ({
100
- type: "progress",
101
- handle: bar,
102
- value: 0,
103
- max: opts.max
104
- }), (bar, exit) => Effect.sync(() => exit._tag === "Success" ? bar.stop(opts.message) : bar.error(opts.message)), effect),
107
+ yield* Ref.set(activeRef, {
108
+ type: "progress",
109
+ handle: bar,
110
+ value: 0,
111
+ max: opts.max
112
+ });
113
+ const exit = yield* Effect.exit(effect);
114
+ yield* Ref.set(activeRef, null);
115
+ if (Exit.isSuccess(exit)) {
116
+ bar.stop(opts.message);
117
+ return exit.value;
118
+ }
119
+ bar.error(opts.message);
120
+ return yield* Effect.failCause(exit.cause);
121
+ }),
105
122
  updateInteractive: (payload) => Ref.get(activeRef).pipe(Effect.flatMap((active) => {
106
123
  if (!active) return Effect.void;
107
- if (active.type === "spinner") return Effect.sync(() => active.handle.message(payloadText(payload)));
124
+ if (active.type === "spinner") {
125
+ const msg = payloadText(payload);
126
+ return Effect.sync(() => active.handle.message(msg)).pipe(Effect.andThen(Ref.set(lastSpinnerMsg, msg)));
127
+ }
108
128
  const delta = computeDelta(payload, {
109
129
  value: active.value,
110
130
  max: active.max
@@ -140,19 +160,198 @@ const DEFAULT_CONFIG = {
140
160
  chunkLines: 60,
141
161
  overlapLines: 10,
142
162
  chunkConcurrency: 8,
143
- files: {},
163
+ skipExtensions: [],
164
+ ignoredPaths: [
165
+ ".pix",
166
+ "node_modules",
167
+ ".git",
168
+ "dist",
169
+ "build",
170
+ ".next",
171
+ ".vscode",
172
+ "coverage",
173
+ "*-lock.yaml",
174
+ "*-lock.json",
175
+ "*.lock",
176
+ ".vite-hooks",
177
+ ".fallow"
178
+ ],
144
179
  embedder: {
145
180
  model: "Xenova/all-MiniLM-L6-v2",
146
181
  device: "auto",
147
- dtype: "fp32"
182
+ dtype: "fp32",
183
+ batchSize: 16
148
184
  }
149
185
  };
150
186
  //#endregion
151
- //#region src/application/index-project.ts
187
+ //#region src/domain/errors.ts
188
+ /** Config file or directory does not exist. Run pix init first. */
189
+ var ConfigNotFoundError = class extends Data.TaggedError("ConfigNotFoundError") {};
190
+ /** Config file exists but contains invalid JSON. */
191
+ var ConfigMalformedError = class extends Data.TaggedError("ConfigMalformedError") {};
192
+ /** Index files (chunks.jsonl, vectors.bin) do not exist. Run pix index first. */
193
+ var NoIndexError = class extends Data.TaggedError("NoIndexError") {};
194
+ /** Disk is full — write operation could not complete. */
195
+ var DiskFullError = class extends Data.TaggedError("DiskFullError") {};
196
+ /** Generic index store I/O failure (read, write, delete). */
197
+ var StoreError = class extends Data.TaggedError("StoreError") {};
198
+ /** Source file could not be read during chunking (binary, permissions, encoding). */
199
+ var ChunkerError = class extends Data.TaggedError("ChunkerError") {};
200
+ /** Embedding model could not be downloaded or loaded. */
201
+ var ModelLoadError = class extends Data.TaggedError("ModelLoadError") {};
202
+ /** Embedding model failed during inference. */
203
+ var InferenceError = class extends Data.TaggedError("InferenceError") {};
152
204
  /**
153
- * Use case: index project files. Pipeline: scan chunk embed store. Depends on ConfigStore,
154
- * Scanner, Chunker, Embedder, VectorStore, Display via Effect tags.
205
+ * Fatal scan failure gitignore loading failed entirely. Non-fatal per-entry skips are reported
206
+ * via ScanResult.skipped.
155
207
  */
208
+ var ScanFailed = class extends Data.TaggedError("ScanFailed") {};
209
+ /** File type is unsupported for text extraction. */
210
+ var UnsupportedFormat = class extends Data.TaggedError("UnsupportedFormat") {};
211
+ /** Text extraction failed for a supported file type. */
212
+ var ExtractionFailed = class extends Data.TaggedError("ExtractionFailed") {};
213
+ //#endregion
214
+ //#region src/services/processors/identity.ts
215
+ const identityProcessor = (file) => FileSystem.FileSystem.pipe(Effect.flatMap((fs) => fs.readFileString(file)), Effect.mapError((cause) => new ExtractionFailed({
216
+ message: `Failed to read file for extraction: ${file}`,
217
+ file,
218
+ cause
219
+ })));
220
+ //#endregion
221
+ //#region src/services/processors/skip.ts
222
+ const skipProcessor = (extension) => {
223
+ const error = new UnsupportedFormat({
224
+ message: `Unsupported file type: ${extension}`,
225
+ extension
226
+ });
227
+ return (_file) => Effect.fail(error);
228
+ };
229
+ //#endregion
230
+ //#region src/services/processors/index.ts
231
+ const DEFAULT_PROCESSOR_MAP = {
232
+ ".ts": identityProcessor,
233
+ ".tsx": identityProcessor,
234
+ ".js": identityProcessor,
235
+ ".jsx": identityProcessor,
236
+ ".py": identityProcessor,
237
+ ".rs": identityProcessor,
238
+ ".go": identityProcessor,
239
+ ".java": identityProcessor,
240
+ ".c": identityProcessor,
241
+ ".cpp": identityProcessor,
242
+ ".h": identityProcessor,
243
+ ".hpp": identityProcessor,
244
+ ".json": identityProcessor,
245
+ ".yaml": identityProcessor,
246
+ ".yml": identityProcessor,
247
+ ".toml": identityProcessor,
248
+ ".xml": identityProcessor,
249
+ ".csv": identityProcessor,
250
+ ".md": identityProcessor,
251
+ ".mdx": identityProcessor,
252
+ ".txt": identityProcessor,
253
+ ".rst": identityProcessor,
254
+ ".html": identityProcessor,
255
+ ".css": identityProcessor,
256
+ ".scss": identityProcessor,
257
+ ".less": identityProcessor,
258
+ ".sql": identityProcessor,
259
+ ".graphql": identityProcessor,
260
+ ".sh": identityProcessor,
261
+ ".bash": identityProcessor,
262
+ ".ps1": identityProcessor,
263
+ ".bat": identityProcessor,
264
+ ".cmake": identityProcessor,
265
+ ".dockerfile": identityProcessor,
266
+ dockerfile: identityProcessor,
267
+ makefile: identityProcessor,
268
+ gemfile: identityProcessor,
269
+ ".pdf": skipProcessor(".pdf"),
270
+ ".png": skipProcessor(".png"),
271
+ ".jpg": skipProcessor(".jpg"),
272
+ ".jpeg": skipProcessor(".jpeg"),
273
+ ".gif": skipProcessor(".gif"),
274
+ ".svg": identityProcessor,
275
+ ".ico": skipProcessor(".ico"),
276
+ ".webp": skipProcessor(".webp"),
277
+ ".mp3": skipProcessor(".mp3"),
278
+ ".mp4": skipProcessor(".mp4"),
279
+ ".wav": skipProcessor(".wav"),
280
+ ".avi": skipProcessor(".avi"),
281
+ ".mov": skipProcessor(".mov"),
282
+ ".mkv": skipProcessor(".mkv"),
283
+ ".exe": skipProcessor(".exe"),
284
+ ".dll": skipProcessor(".dll"),
285
+ ".so": skipProcessor(".so"),
286
+ ".zip": skipProcessor(".zip"),
287
+ ".tar": skipProcessor(".tar"),
288
+ ".gz": skipProcessor(".gz"),
289
+ ".7z": skipProcessor(".7z"),
290
+ ".rar": skipProcessor(".rar"),
291
+ ".ttf": skipProcessor(".ttf"),
292
+ ".woff": skipProcessor(".woff"),
293
+ ".woff2": skipProcessor(".woff2"),
294
+ ".eot": skipProcessor(".eot"),
295
+ ".otf": skipProcessor(".otf"),
296
+ ".lock": identityProcessor,
297
+ lock: identityProcessor
298
+ };
299
+ /**
300
+ * Builds the processor map by merging domain defaults with user-specified skip extensions. Skip
301
+ * extensions override any existing mapping with a skip processor. Unknown extensions remain absent
302
+ * from the map — callers decide how to handle them.
303
+ */
304
+ function buildProcessorMap(skipExtensions) {
305
+ const mapped = { ...DEFAULT_PROCESSOR_MAP };
306
+ for (const ext of skipExtensions) mapped[ext] = skipProcessor(ext);
307
+ return mapped;
308
+ }
309
+ //#endregion
310
+ //#region src/application/index-project.ts
311
+ const deriveEffectiveConfig = (opts, config) => ({
312
+ batchSize: opts.batchSize ?? config.embedder.batchSize ?? 16,
313
+ concurrency: Math.max(1, opts.chunkConcurrency ?? config.chunkConcurrency ?? 8),
314
+ skipExtensions: opts.skipExtensions ? [...config.skipExtensions, ...opts.skipExtensions] : config.skipExtensions,
315
+ ignoredPaths: opts.ignorePaths ? [...config.ignoredPaths ?? DEFAULT_CONFIG.ignoredPaths, ...opts.ignorePaths] : config.ignoredPaths ?? DEFAULT_CONFIG.ignoredPaths,
316
+ ignoreGitignore: opts.ignoreGitignore ?? config.ignoreGitignore ?? false
317
+ });
318
+ function getExtension(file) {
319
+ const lastSlash = file.lastIndexOf("/");
320
+ const name = lastSlash >= 0 ? file.slice(lastSlash + 1) : file;
321
+ const dotIndex = name.lastIndexOf(".");
322
+ if (dotIndex === -1) return name.toLowerCase();
323
+ return name.slice(dotIndex).toLowerCase();
324
+ }
325
+ const classifyFiles = (files, processorMap) => {
326
+ const knownFiles = [];
327
+ const skippedFiles = [];
328
+ const unknownExtensions = /* @__PURE__ */ new Set();
329
+ for (const file of files) {
330
+ const ext = getExtension(file);
331
+ if (!processorMap[ext]) {
332
+ unknownExtensions.add(ext);
333
+ skippedFiles.push(file);
334
+ } else knownFiles.push(file);
335
+ }
336
+ return {
337
+ knownFiles,
338
+ skippedFiles,
339
+ unknownExtensions
340
+ };
341
+ };
342
+ const classifyAndCollectChunks = (knownFiles, extractor, chunker, concurrency, skipped) => Stream.fromIterable(knownFiles).pipe(Stream.mapEffect((file) => extractor.extract(file).pipe(Effect.flatMap((text) => Effect.succeed(Option.some({
343
+ file,
344
+ text
345
+ }))), Effect.catchAll((err) => Ref.update(skipped, (prev) => [...prev, {
346
+ path: file,
347
+ reason: err.message
348
+ }]).pipe(Effect.flatMap(() => Effect.succeed(Option.none()))))), { concurrency }), Stream.filterMap((opt) => opt), Stream.mapEffect(({ file, text }) => chunker.chunkText(text, file), { concurrency }), Stream.flatMap((chunks) => Stream.fromIterable(chunks)), Stream.runCollect, Effect.map((allChunks) => {
349
+ const chunks = Chunk.toArray(allChunks);
350
+ return {
351
+ chunks,
352
+ totalChunks: chunks.length
353
+ };
354
+ }));
156
355
  var IndexProject = class extends Effect.Service()("IndexProject", {
157
356
  accessors: true,
158
357
  effect: Effect.gen(function* () {
@@ -162,49 +361,124 @@ var IndexProject = class extends Effect.Service()("IndexProject", {
162
361
  const embedder = yield* Embedder;
163
362
  const vectorStore = yield* VectorStore;
164
363
  const d = yield* Display;
165
- const index = () => Effect.gen(function* () {
364
+ const extractor = yield* ContentExtractor;
365
+ const index = (opts = {}) => Effect.gen(function* () {
366
+ const start = Date.now();
166
367
  if (!(yield* configStore.configExists())) yield* configStore.writeConfig(DEFAULT_CONFIG);
167
- const config = yield* configStore.readConfig();
168
- const extensions = Object.keys(config.files).length > 0 ? Object.keys(config.files) : [
169
- ".ts",
170
- ".tsx",
171
- ".js",
172
- ".jsx"
173
- ];
368
+ const eff = deriveEffectiveConfig(opts, yield* configStore.readConfig());
369
+ const processorMap = buildProcessorMap(eff.skipExtensions);
174
370
  yield* d.updateInteractive("Scanning source files...");
175
- const scanResult = yield* scanner.scanFiles(extensions);
176
- yield* d.updateInteractive(`Chunking ${scanResult.files.length} files...`);
177
- const allChunks = (yield* Effect.forEach(scanResult.files, (file) => chunker.chunkFile(file), { concurrency: Math.max(1, config.chunkConcurrency ?? 8) })).flat();
178
- const totalChunks = allChunks.length;
179
- const totalFiles = new Set(allChunks.map((c) => c.file)).size;
180
- const totalLines = allChunks.reduce((sum, c) => sum + (c.endLine - c.startLine + 1), 0);
181
- if (totalChunks === 0) return {
182
- success: true,
183
- status: {
184
- chunks: 0,
185
- files: 0,
186
- totalLines: 0,
187
- byteSize: 0
188
- }
189
- };
190
- yield* d.updateInteractive(`Embedding ${totalChunks} chunks...`);
191
- const texts = allChunks.map((c) => c.text);
192
- const embeddings = yield* embedder.batch(texts);
193
- yield* vectorStore.store(allChunks, embeddings);
194
- const dims = embeddings[0]?.dims ?? 384;
371
+ const scanResult = yield* scanner.scanFiles(eff.ignoredPaths, eff.ignoreGitignore);
372
+ const { knownFiles, skippedFiles, unknownExtensions } = classifyFiles(scanResult.files, processorMap);
373
+ const skipped = yield* Ref.make(scanResult.skipped.filter((s) => !s.reason.startsWith("Ignored by config pattern")).map((s) => ({
374
+ path: s.path,
375
+ reason: s.reason
376
+ })));
377
+ if (unknownExtensions.size > 0) yield* Ref.update(skipped, (prev) => [...prev, ...skippedFiles.map((f) => ({
378
+ path: f,
379
+ reason: "unknown extension"
380
+ }))]);
381
+ if (knownFiles.length === 0) {
382
+ yield* displaySkippedNote(d, yield* Ref.get(skipped));
383
+ return {
384
+ success: true,
385
+ status: {
386
+ chunks: 0,
387
+ files: 0,
388
+ totalLines: 0,
389
+ byteSize: 0
390
+ },
391
+ durationMs: Date.now() - start
392
+ };
393
+ }
394
+ yield* d.updateInteractive(`Processing ${knownFiles.length} files...`);
395
+ const { chunks, totalChunks } = yield* classifyAndCollectChunks(knownFiles, extractor, chunker, eff.concurrency, skipped);
396
+ if (totalChunks === 0) {
397
+ yield* displaySkippedNote(d, yield* Ref.get(skipped));
398
+ return {
399
+ success: true,
400
+ status: {
401
+ chunks: 0,
402
+ files: 0,
403
+ totalLines: 0,
404
+ byteSize: 0
405
+ },
406
+ durationMs: Date.now() - start
407
+ };
408
+ }
409
+ yield* vectorStore.storeBegin();
410
+ const embeddedRef = yield* Ref.make(0);
411
+ const stats = yield* d.progress({
412
+ message: `Embedding ${totalChunks} chunks...`,
413
+ max: totalChunks
414
+ }, Stream.fromIterable(chunks).pipe(Stream.grouped(eff.batchSize), Stream.mapEffect((batchChunk) => Effect.gen(function* () {
415
+ const batch = Chunk.toArray(batchChunk);
416
+ const texts = batch.map((c) => c.text);
417
+ const embeddings = yield* embedder.batch(texts);
418
+ yield* vectorStore.storeBatch(batch, embeddings);
419
+ const count = yield* Ref.updateAndGet(embeddedRef, (n) => n + batch.length);
420
+ yield* d.updateInteractive({
421
+ message: `Embedding ${count} of ${totalChunks} chunks`,
422
+ setTo: count
423
+ });
424
+ })), Stream.runDrain, Effect.matchEffect({
425
+ onSuccess: () => vectorStore.storeCommit(),
426
+ onFailure: (err) => vectorStore.storeAbort().pipe(Effect.flatMap(() => Effect.fail(err)))
427
+ })));
428
+ yield* displaySkippedNote(d, yield* Ref.get(skipped));
429
+ const durationSec = ((Date.now() - start) / 1e3).toFixed(1);
430
+ yield* d.log(`Indexed ${stats.chunks} chunks from ${stats.files} files in ${durationSec}s`, "success");
431
+ const fallbackInfo = yield* embedder.getFallbackInfo();
195
432
  return {
196
433
  success: true,
197
434
  status: {
198
- chunks: totalChunks,
199
- files: totalFiles,
200
- totalLines,
201
- byteSize: embeddings.length * dims * 4
202
- }
435
+ chunks: stats.chunks,
436
+ files: stats.files,
437
+ totalLines: stats.totalLines,
438
+ byteSize: stats.byteSize
439
+ },
440
+ durationMs: Date.now() - start,
441
+ embedderFallback: fallbackInfo
203
442
  };
204
443
  });
205
444
  return { index };
206
445
  })
207
446
  }) {};
447
+ const getFilename = (path) => path.split("/").pop() ?? path;
448
+ const getFileExtension = (filename) => {
449
+ const dotIndex = filename.lastIndexOf(".");
450
+ return dotIndex >= 0 ? filename.slice(dotIndex) : "(no extension)";
451
+ };
452
+ const groupByExtension = (entries) => {
453
+ const byExt = /* @__PURE__ */ new Map();
454
+ for (const s of entries) {
455
+ const name = getFilename(s.path);
456
+ const ext = getFileExtension(name);
457
+ if (!byExt.has(ext)) byExt.set(ext, []);
458
+ byExt.get(ext).push(name);
459
+ }
460
+ return byExt;
461
+ };
462
+ const formatFileList = (files, maxDisplay = 5) => files.length > maxDisplay ? `${files.slice(0, maxDisplay).join(", ")} +${files.length - maxDisplay} more` : files.join(", ");
463
+ const buildSkippedLines = (extFailures, extractErrors) => {
464
+ const lines = [];
465
+ if (extFailures.length > 0) {
466
+ lines.push(`Unknown extensions (${extFailures.length})`);
467
+ for (const [ext, files] of groupByExtension(extFailures)) lines.push(` ${ext} (${files.length}): ${formatFileList(files)}`);
468
+ }
469
+ if (extractErrors.length > 0) {
470
+ if (lines.length > 0) lines.push("");
471
+ lines.push(`Extraction errors (${extractErrors.length})`);
472
+ for (const s of extractErrors) lines.push(` ${getFilename(s.path)}: ${s.reason}`);
473
+ }
474
+ return lines;
475
+ };
476
+ const displaySkippedNote = (d, skipped) => {
477
+ if (skipped.length === 0) return Effect.void;
478
+ const extFailures = skipped.filter((s) => s.reason === "unknown extension");
479
+ const extractErrors = skipped.filter((s) => s.reason !== "unknown extension");
480
+ return d.note(buildSkippedLines(extFailures, extractErrors).join("\n"), `Skipped ${skipped.length} files`);
481
+ };
208
482
  //#endregion
209
483
  //#region src/application/init-project.ts
210
484
  /**
@@ -293,22 +567,60 @@ const reportError = (error) => Effect.gen(function* () {
293
567
  });
294
568
  //#endregion
295
569
  //#region src/commands/index-cmd.ts
296
- /** CLI command: pix index [--force] [--verbose] [--json] */
570
+ const batchSizeOption = Options.integer("batch-size").pipe(Options.withAlias("b"), Options.optional);
571
+ const chunkConcurrencyOption = Options.integer("chunk-concurrency").pipe(Options.withAlias("c"), Options.optional);
572
+ const skipExtensionsOption = Options.text("skip-extensions").pipe(Options.withAlias("s"), Options.repeated);
573
+ const ignorePathOption = Options.text("ignore-path").pipe(Options.repeated);
574
+ const ignorePathsOption = Options.text("ignore-paths").pipe(Options.repeated);
575
+ const ignoreGitignoreOption = Options.boolean("ignore-gitignore").pipe(Options.withDefault(false));
576
+ const splitCsv = (values) => values.flatMap((v) => v.split(",").map((s) => s.trim()).filter((s) => s.length > 0));
577
+ const buildIndexOptions = (args) => {
578
+ if (Option.isSome(args.batchSize) && args.batchSize.value <= 0) throw new Error(`--batch-size must be positive, got ${args.batchSize.value}`);
579
+ if (Option.isSome(args.chunkConcurrency) && args.chunkConcurrency.value <= 0) throw new Error(`--chunk-concurrency must be positive, got ${args.chunkConcurrency.value}`);
580
+ const cliSkipExtensions = splitCsv(args.skipExtensions);
581
+ const cliIgnorePaths = [...args.ignorePath.map((s) => s.trim()).filter((s) => s.length > 0), ...splitCsv(args.ignorePaths)];
582
+ return {
583
+ batchSize: Option.getOrUndefined(args.batchSize),
584
+ chunkConcurrency: Option.getOrUndefined(args.chunkConcurrency),
585
+ skipExtensions: cliSkipExtensions.length > 0 ? cliSkipExtensions : void 0,
586
+ ignorePaths: cliIgnorePaths.length > 0 ? cliIgnorePaths : void 0,
587
+ ignoreGitignore: args.ignoreGitignore || void 0
588
+ };
589
+ };
590
+ const emitIndexResult = (d, result) => Effect.gen(function* () {
591
+ yield* d.json({
592
+ chunks: result.status.chunks,
593
+ files: result.status.files,
594
+ totalLines: result.status.totalLines,
595
+ byteSize: result.status.byteSize,
596
+ durationMs: result.durationMs,
597
+ ...result.embedderFallback && { embedderFallback: result.embedderFallback }
598
+ });
599
+ if (result.status.chunks === 0) yield* d.log("No chunks to index.", "warn");
600
+ });
297
601
  const indexCommand = Command.make("index", {
298
602
  force: Options.boolean("force").pipe(Options.withDefault(false)),
299
603
  verbose: Options.boolean("verbose").pipe(Options.withDefault(false)),
300
- json: Options.boolean("json").pipe(Options.withDefault(false))
301
- }, ({ force, verbose }) => Effect.gen(function* () {
604
+ json: Options.boolean("json").pipe(Options.withDefault(false)),
605
+ batchSize: batchSizeOption,
606
+ chunkConcurrency: chunkConcurrencyOption,
607
+ skipExtensions: skipExtensionsOption,
608
+ ignorePath: ignorePathOption,
609
+ ignorePaths: ignorePathsOption,
610
+ ignoreGitignore: ignoreGitignoreOption
611
+ }, ({ force, verbose, batchSize, chunkConcurrency, skipExtensions, ignorePath, ignorePaths, ignoreGitignore }) => Effect.gen(function* () {
302
612
  const d = yield* Display;
303
613
  if (force) yield* d.log("--force is currently not implemented and only a placeholder.", "warn");
304
614
  if (verbose) yield* d.log("--verbose is currently not implemented and only a placeholder.", "warn");
305
- const result = yield* d.spinner("Indexing project...", IndexProject.index());
306
- yield* d.json({
307
- chunks: result.status.chunks,
308
- files: result.status.files
615
+ const options = buildIndexOptions({
616
+ batchSize,
617
+ chunkConcurrency,
618
+ skipExtensions,
619
+ ignorePath,
620
+ ignorePaths,
621
+ ignoreGitignore
309
622
  });
310
- if (result.status.chunks === 0) yield* d.log("No chunks to index.", "warn");
311
- else yield* d.log(`Indexed ${result.status.chunks} chunks from ${result.status.files} files.`, "success");
623
+ yield* emitIndexResult(d, yield* d.spinner("Indexing project...", IndexProject.index(options)));
312
624
  }).pipe(Effect.catchAll(reportError)));
313
625
  //#endregion
314
626
  //#region src/commands/init.ts
@@ -493,29 +805,6 @@ const setupTerminalCleanup = () => {
493
805
  process.on("exit", makeTerminalCleanupHandler(process.stdin, process.stdout));
494
806
  };
495
807
  //#endregion
496
- //#region src/domain/errors.ts
497
- /** Config file or directory does not exist. Run pix init first. */
498
- var ConfigNotFoundError = class extends Data.TaggedError("ConfigNotFoundError") {};
499
- /** Config file exists but contains invalid JSON. */
500
- var ConfigMalformedError = class extends Data.TaggedError("ConfigMalformedError") {};
501
- /** Index files (chunks.jsonl, vectors.bin) do not exist. Run pix index first. */
502
- var NoIndexError = class extends Data.TaggedError("NoIndexError") {};
503
- /** Disk is full — write operation could not complete. */
504
- var DiskFullError = class extends Data.TaggedError("DiskFullError") {};
505
- /** Generic index store I/O failure (read, write, delete). */
506
- var StoreError = class extends Data.TaggedError("StoreError") {};
507
- /** Source file could not be read during chunking (binary, permissions, encoding). */
508
- var ChunkerError = class extends Data.TaggedError("ChunkerError") {};
509
- /** Embedding model could not be downloaded or loaded. */
510
- var ModelLoadError = class extends Data.TaggedError("ModelLoadError") {};
511
- /** Embedding model failed during inference. */
512
- var InferenceError = class extends Data.TaggedError("InferenceError") {};
513
- /**
514
- * Fatal scan failure — gitignore loading failed entirely. Non-fatal per-entry skips are reported
515
- * via ScanResult.skipped.
516
- */
517
- var ScanFailed = class extends Data.TaggedError("ScanFailed") {};
518
- //#endregion
519
808
  //#region src/services/chunker.ts
520
809
  const MIN_CHUNK_CHARS = 20;
521
810
  const readFileContent = (fs, file) => fs.readFileString(file).pipe(Effect.mapError((cause) => new ChunkerError({
@@ -547,17 +836,24 @@ const buildChunks = (file, content, config) => {
547
836
  }
548
837
  return chunks;
549
838
  };
550
- const make$4 = Effect.gen(function* () {
839
+ const make$5 = Effect.gen(function* () {
551
840
  const fs = yield* FileSystem.FileSystem;
552
841
  const config = yield* (yield* ConfigStore).readConfig().pipe(Effect.catchAll(() => Effect.succeed(DEFAULT_CONFIG)));
842
+ const chunkText = (text, file) => Effect.sync(() => {
843
+ if (text === "") return [];
844
+ return buildChunks(file, text, config);
845
+ });
553
846
  const chunkFile = (file) => Effect.gen(function* () {
554
847
  const content = yield* readFileContent(fs, file);
555
848
  if (content === "") return [];
556
849
  return buildChunks(file, content, config);
557
850
  });
558
- return { chunkFile };
851
+ return {
852
+ chunkFile,
853
+ chunkText
854
+ };
559
855
  });
560
- const ChunkerLive = Layer.effect(Chunker, make$4);
856
+ const ChunkerLive = Layer.effect(Chunker, make$5);
561
857
  //#endregion
562
858
  //#region src/services/config-store.ts
563
859
  const CONFIG_DIR = ".pix";
@@ -574,7 +870,7 @@ const mapConfigWriteError = (cause, path, action) => {
574
870
  cause
575
871
  });
576
872
  };
577
- const make$3 = Effect.gen(function* () {
873
+ const make$4 = Effect.gen(function* () {
578
874
  const fs = yield* FileSystem.FileSystem;
579
875
  const writeConfig = (config) => Effect.gen(function* () {
580
876
  const configJson = JSON.stringify(config, null, 2);
@@ -611,7 +907,28 @@ const make$3 = Effect.gen(function* () {
611
907
  configExists
612
908
  };
613
909
  });
614
- const ConfigStoreLive = Layer.effect(ConfigStore, make$3);
910
+ const ConfigStoreLive = Layer.effect(ConfigStore, make$4);
911
+ //#endregion
912
+ //#region src/services/content-extractor.ts
913
+ const make$3 = Effect.gen(function* () {
914
+ const fs = yield* FileSystem.FileSystem;
915
+ const processorMap = buildProcessorMap([]);
916
+ const extract = (file) => {
917
+ const lastSlash = file.lastIndexOf("/");
918
+ const name = lastSlash >= 0 ? file.slice(lastSlash + 1) : file;
919
+ const dotIndex = name.lastIndexOf(".");
920
+ const ext = dotIndex === -1 ? name.toLowerCase() : name.slice(dotIndex).toLowerCase();
921
+ const processor = processorMap[ext];
922
+ if (!processor) return Effect.fail({
923
+ _tag: "UnsupportedFormat",
924
+ message: `No processor for extension: ${ext}`,
925
+ extension: ext
926
+ });
927
+ return processor(file).pipe(Effect.provideService(FileSystem.FileSystem, fs));
928
+ };
929
+ return { extract };
930
+ });
931
+ const ContentExtractorLive = Layer.effect(ContentExtractor, make$3);
615
932
  //#endregion
616
933
  //#region src/domain/models.ts
617
934
  /** Registry of supported embedding models. */
@@ -641,9 +958,7 @@ const MODEL_REGISTRY = {
641
958
  };
642
959
  //#endregion
643
960
  //#region src/services/embedder.ts
644
- const CACHE_DIR = ".pix/cache";
645
- const BATCH_SIZE = 16;
646
- env.cacheDir = CACHE_DIR;
961
+ env.cacheDir = ".pix/cache";
647
962
  const normalize = (arr) => {
648
963
  let norm = 0;
649
964
  for (let i = 0; i < arr.length; i++) norm += arr[i] * arr[i];
@@ -685,16 +1000,14 @@ const createExtractor = (opts) => Effect.tryPromise(async () => {
685
1000
  model: opts.model,
686
1001
  cause
687
1002
  })));
688
- const createExtractorWithFallback = (opts) => {
1003
+ const createExtractorWithFallback = (opts, fallbackRef) => {
689
1004
  if (opts.device === "cpu") return createExtractor(opts);
690
1005
  return createExtractor(opts).pipe(Effect.catchAll((originalError) => Effect.gen(function* () {
691
- const d = yield* Display;
692
- yield* d.log(`GPU (${opts.device}) failed, falling back to CPU...`, "warn");
693
- yield* d.json({
694
- event: "embedder_fallback",
1006
+ yield* (yield* Display).log(`GPU (${opts.device}) failed, falling back to CPU...`, "warn");
1007
+ yield* Ref.set(fallbackRef, Option.some({
695
1008
  originalDevice: opts.device,
696
1009
  reason: originalError.message
697
- });
1010
+ }));
698
1011
  return yield* createExtractor({
699
1012
  ...opts,
700
1013
  device: "cpu"
@@ -705,7 +1018,8 @@ const make$2 = Effect.gen(function* () {
705
1018
  const configStore = yield* ConfigStore;
706
1019
  const d = yield* Display;
707
1020
  const cfg = yield* resolveEmbedderConfig(configStore);
708
- const getExtractor = yield* Effect.cached(createExtractorWithFallback(cfg));
1021
+ const fallbackRef = yield* Ref.make(Option.none());
1022
+ const getExtractor = yield* Effect.cached(createExtractorWithFallback(cfg, fallbackRef));
709
1023
  const embed = (text) => Effect.gen(function* () {
710
1024
  const extractor = yield* getExtractor;
711
1025
  const data = (yield* Effect.tryPromise(() => extractor(text, {
@@ -722,44 +1036,35 @@ const make$2 = Effect.gen(function* () {
722
1036
  }).pipe(Effect.provideService(Display, d));
723
1037
  const batch = (texts) => Effect.gen(function* () {
724
1038
  const extractor = yield* getExtractor;
1039
+ const tensor = yield* Effect.tryPromise(() => extractor([...texts], {
1040
+ pooling: "mean",
1041
+ normalize: false
1042
+ })).pipe(Effect.mapError((cause) => new InferenceError({
1043
+ message: "Batch embedding inference failed",
1044
+ cause
1045
+ })));
1046
+ const data = tensor.data;
1047
+ const n = tensor.dims[0];
725
1048
  const results = [];
726
- for (let i = 0; i < texts.length; i += BATCH_SIZE) {
727
- const slice = texts.slice(i, i + BATCH_SIZE);
728
- const tensor = yield* Effect.tryPromise(() => extractor(slice, {
729
- pooling: "mean",
730
- normalize: false
731
- })).pipe(Effect.mapError((cause) => new InferenceError({
732
- message: "Batch embedding inference failed",
733
- cause
734
- })));
735
- const data = tensor.data;
736
- const n = tensor.dims[0];
737
- for (let j = 0; j < n; j++) {
738
- const offset = j * cfg.dims;
739
- results.push(normalize(data.slice(offset, offset + cfg.dims)));
740
- }
1049
+ for (let j = 0; j < n; j++) {
1050
+ const offset = j * cfg.dims;
1051
+ results.push({
1052
+ vector: normalize(data.slice(offset, offset + cfg.dims)),
1053
+ dims: cfg.dims
1054
+ });
741
1055
  }
742
- return results.map((vector) => ({
743
- vector,
744
- dims: cfg.dims
745
- }));
1056
+ return results;
746
1057
  }).pipe(Effect.provideService(Display, d));
1058
+ const getFallbackInfo = () => Ref.get(fallbackRef).pipe(Effect.map(Option.getOrElse(() => void 0)));
747
1059
  return {
748
1060
  embed,
749
- batch
1061
+ batch,
1062
+ getFallbackInfo
750
1063
  };
751
1064
  });
752
1065
  const OnnxEmbedderLive = Layer.provideMerge(Layer.effect(Embedder, make$2), ConfigStoreLive);
753
1066
  //#endregion
754
1067
  //#region src/services/scanner.ts
755
- const ALWAYS_IGNORE = new Set([
756
- ".pix",
757
- "node_modules",
758
- ".git",
759
- "dist",
760
- "build",
761
- ".next"
762
- ]);
763
1068
  const make$1 = Effect.gen(function* () {
764
1069
  const fs = yield* FileSystem.FileSystem;
765
1070
  const readFileWithSkip = (path, mkReason) => fs.readFileString(path).pipe(Effect.map((content) => ({
@@ -792,47 +1097,93 @@ const make$1 = Effect.gen(function* () {
792
1097
  reason: `Could not stat: ${String(error)}`
793
1098
  }
794
1099
  })));
795
- const loadGitignoreRules = Effect.gen(function* () {
1100
+ const computeRelative = (fullPath, cwd) => fullPath.startsWith(cwd) ? fullPath.slice(cwd.length + 1) : fullPath;
1101
+ const loadIgnoreFile = (filePath, ig, skipped) => Effect.gen(function* () {
1102
+ const result = yield* readFileWithSkip(filePath, (error) => `Could not read ignore file: ${String(error)}`);
1103
+ if (result.skipped) skipped.push(result.skipped);
1104
+ if (result.content.trim()) ig.add(result.content.split("\n"));
1105
+ });
1106
+ const loadGitignoreRules = (ignoredPaths) => {
796
1107
  const ig = ignore();
797
- const cwd = process.cwd();
798
1108
  const skipped = [];
799
- const rootContent = yield* readFileWithSkip(`${cwd}/.gitignore`, (error) => `Could not read gitignore: ${String(error)}`);
800
- if (rootContent.skipped) skipped.push(rootContent.skipped);
801
- if (rootContent.content.trim()) ig.add(rootContent.content.split("\n"));
1109
+ if (ignoredPaths.length > 0) ig.add(ignoredPaths);
1110
+ return Effect.succeed({
1111
+ ig,
1112
+ skipped
1113
+ });
1114
+ };
1115
+ const loadGitignoreRulesWithFiles = (ignoredPaths, cwd) => Effect.gen(function* () {
1116
+ const ig = ignore();
1117
+ const skipped = [];
1118
+ if (ignoredPaths.length > 0) ig.add(ignoredPaths);
1119
+ const gitignorePath = `${cwd}/.gitignore`;
1120
+ if (yield* fs.exists(gitignorePath).pipe(Effect.catchAll(() => Effect.succeed(false)))) yield* loadIgnoreFile(gitignorePath, ig, skipped);
802
1121
  const excludePath = `${cwd}/.git/info/exclude`;
803
- if (yield* fs.exists(excludePath)) {
804
- const excludeContent = yield* readFileWithSkip(excludePath, (error) => `Could not read exclude file: ${String(error)}`);
805
- if (excludeContent.skipped) skipped.push(excludeContent.skipped);
806
- if (excludeContent.content.trim()) ig.add(excludeContent.content.split("\n"));
807
- }
1122
+ if (yield* fs.exists(excludePath).pipe(Effect.catchAll(() => Effect.succeed(false)))) yield* loadIgnoreFile(excludePath, ig, skipped);
808
1123
  return {
809
1124
  ig,
810
1125
  skipped
811
1126
  };
812
1127
  });
813
- const walk = (dir, extensions) => Effect.gen(function* () {
1128
+ const processEntry = (entry, dir, ig, cwd) => Effect.gen(function* () {
1129
+ const fullPath = `${dir}/${entry}`;
1130
+ const statResult = yield* statWithSkip(fullPath);
1131
+ if (statResult.skipped) return {
1132
+ files: [],
1133
+ skipped: [statResult.skipped]
1134
+ };
1135
+ if (!statResult.info) return {
1136
+ files: [],
1137
+ skipped: []
1138
+ };
1139
+ const info = statResult.info;
1140
+ if (info.type === "Directory") {
1141
+ const relativeDir = computeRelative(fullPath, cwd);
1142
+ if (ig.ignores(relativeDir)) return {
1143
+ files: [],
1144
+ skipped: [{
1145
+ path: fullPath,
1146
+ reason: `Ignored by config pattern: ${relativeDir}`
1147
+ }]
1148
+ };
1149
+ return {
1150
+ files: [],
1151
+ skipped: [],
1152
+ recurse: true
1153
+ };
1154
+ }
1155
+ if (info.type === "File") {
1156
+ const relativePath = computeRelative(fullPath, cwd);
1157
+ if (ig.ignores(relativePath)) return {
1158
+ files: [],
1159
+ skipped: [{
1160
+ path: fullPath,
1161
+ reason: `Ignored by config pattern: ${relativePath}`
1162
+ }]
1163
+ };
1164
+ return {
1165
+ files: [fullPath],
1166
+ skipped: []
1167
+ };
1168
+ }
1169
+ return {
1170
+ files: [],
1171
+ skipped: []
1172
+ };
1173
+ });
1174
+ const walk = (dir, ig, cwd) => Effect.gen(function* () {
814
1175
  const result = yield* readDirectoryWithSkip(dir);
815
1176
  let files = [];
816
1177
  const skipped = [];
817
1178
  if (result.skipped) skipped.push(result.skipped);
818
1179
  for (const entry of result.entries) {
819
- if (ALWAYS_IGNORE.has(entry)) continue;
820
- const fullPath = `${dir}/${entry}`;
821
- const info = yield* statWithSkip(fullPath);
822
- if (info.skipped) {
823
- skipped.push(info.skipped);
824
- continue;
825
- }
826
- if (!info.info) continue;
827
- if (info.info.type === "Directory") {
828
- const sub = yield* walk(fullPath, extensions);
1180
+ const entryResult = yield* processEntry(entry, dir, ig, cwd);
1181
+ files.push(...entryResult.files);
1182
+ skipped.push(...entryResult.skipped);
1183
+ if ("recurse" in entryResult) {
1184
+ const sub = yield* walk(`${dir}/${entry}`, ig, cwd);
829
1185
  files.push(...sub.files);
830
1186
  skipped.push(...sub.skipped);
831
- } else if (info.info.type === "File") {
832
- const dotIndex = entry.lastIndexOf(".");
833
- if (dotIndex === -1) continue;
834
- const ext = entry.slice(dotIndex);
835
- if (extensions.has(ext)) files.push(fullPath);
836
1187
  }
837
1188
  }
838
1189
  return {
@@ -840,16 +1191,15 @@ const make$1 = Effect.gen(function* () {
840
1191
  skipped
841
1192
  };
842
1193
  });
843
- const scanFiles = (extensions) => Effect.gen(function* () {
844
- const { ig, skipped: ignoreSkipped } = yield* loadGitignoreRules.pipe(Effect.mapError((cause) => new ScanFailed({
845
- message: `Failed to load gitignore rules: ${String(cause)}`,
1194
+ const scanFiles = (ignoredPaths, ignoreGitignore) => Effect.gen(function* () {
1195
+ const cwd = process.cwd();
1196
+ const { ig, skipped: ignoreSkipped } = yield* (ignoreGitignore ? loadGitignoreRules(ignoredPaths) : loadGitignoreRulesWithFiles(ignoredPaths, cwd)).pipe(Effect.mapError((cause) => new ScanFailed({
1197
+ message: `Failed to load ignore rules: ${String(cause)}`,
846
1198
  cause
847
1199
  })));
848
- const cwd = process.cwd();
849
- const { files: paths, skipped: walkSkipped } = yield* walk(cwd, new Set(extensions));
850
- const relativePaths = paths.map((p) => p.startsWith(cwd) ? p.slice(cwd.length + 1) : p);
1200
+ const { files, skipped: walkSkipped } = yield* walk(cwd, ig, cwd);
851
1201
  return {
852
- files: ig.filter(relativePaths).map((p) => `${cwd}/${p}`),
1202
+ files,
853
1203
  skipped: [...ignoreSkipped, ...walkSkipped]
854
1204
  };
855
1205
  });
@@ -868,6 +1218,22 @@ const isPlatformReason = (cause, reason) => typeof cause === "object" && cause !
868
1218
  */
869
1219
  const make = Effect.gen(function* () {
870
1220
  const fs = yield* FileSystem.FileSystem;
1221
+ const chunksTemp = `${CHUNKS_FILE}.tmp`;
1222
+ const vectorsTemp = `${VECTORS_FILE}.tmp`;
1223
+ const seenFiles = yield* Ref.make(/* @__PURE__ */ new Set());
1224
+ const statsAccumulator = yield* Ref.make({
1225
+ chunks: 0,
1226
+ files: 0,
1227
+ totalLines: 0,
1228
+ byteSize: 0
1229
+ });
1230
+ const serializeVectors = (embeddings) => {
1231
+ const dims = embeddings[0]?.dims ?? 384;
1232
+ const totalFloats = embeddings.length * dims;
1233
+ const vectorsArray = new Float32Array(totalFloats);
1234
+ for (let i = 0; i < embeddings.length; i++) vectorsArray.set(embeddings[i].vector, i * dims);
1235
+ return Buffer.from(vectorsArray.buffer);
1236
+ };
871
1237
  /**
872
1238
  * Count total lines across all chunks in chunks.jsonl. Each line is a JSON object; the 'text'
873
1239
  * field contains the source code.
@@ -930,6 +1296,60 @@ const make = Effect.gen(function* () {
930
1296
  deleted: true
931
1297
  };
932
1298
  });
1299
+ const storeBegin = () => Effect.gen(function* () {
1300
+ yield* ensureDirExists(STORE_DIR, ".pix directory");
1301
+ yield* Ref.set(seenFiles, /* @__PURE__ */ new Set());
1302
+ yield* Ref.set(statsAccumulator, {
1303
+ chunks: 0,
1304
+ files: 0,
1305
+ totalLines: 0,
1306
+ byteSize: 0
1307
+ });
1308
+ if (yield* withStoreError(fs.exists(chunksTemp), "check chunks temp")) yield* withStoreError(fs.remove(chunksTemp), "clean stale chunks temp", chunksTemp);
1309
+ if (yield* withStoreError(fs.exists(vectorsTemp), "check vectors temp")) yield* withStoreError(fs.remove(vectorsTemp), "clean stale vectors temp", vectorsTemp);
1310
+ });
1311
+ const storeBatch = (chunks, embeddings) => Effect.gen(function* () {
1312
+ const content = chunks.map((c) => JSON.stringify({
1313
+ id: c.id,
1314
+ idx: c.idx,
1315
+ file: c.file,
1316
+ startLine: c.startLine,
1317
+ endLine: c.endLine,
1318
+ text: c.text
1319
+ })).join("\n") + "\n";
1320
+ yield* withStoreError(fs.writeFile(chunksTemp, Buffer.from(content), { flag: "a" }), "append chunks", chunksTemp);
1321
+ const buffer = serializeVectors(embeddings);
1322
+ yield* withStoreError(fs.writeFile(vectorsTemp, buffer, { flag: "a" }), "append vectors", vectorsTemp);
1323
+ const dims = embeddings[0]?.dims ?? 384;
1324
+ const batchLines = chunks.reduce((sum, c) => sum + (c.endLine - c.startLine + 1), 0);
1325
+ const batchBytes = embeddings.length * dims * 4;
1326
+ yield* Ref.update(seenFiles, (prev) => {
1327
+ for (const c of chunks) prev.add(c.file);
1328
+ return prev;
1329
+ });
1330
+ yield* Ref.update(statsAccumulator, (prev) => ({
1331
+ chunks: prev.chunks + chunks.length,
1332
+ files: 0,
1333
+ totalLines: prev.totalLines + batchLines,
1334
+ byteSize: prev.byteSize + batchBytes
1335
+ }));
1336
+ });
1337
+ const storeCommit = () => Effect.gen(function* () {
1338
+ yield* withStoreError(fs.rename(chunksTemp, CHUNKS_FILE), "commit chunks", CHUNKS_FILE);
1339
+ yield* withStoreError(fs.rename(vectorsTemp, VECTORS_FILE), "commit vectors", VECTORS_FILE);
1340
+ const stats = yield* Ref.get(statsAccumulator);
1341
+ const files = yield* Ref.get(seenFiles);
1342
+ yield* Ref.set(seenFiles, /* @__PURE__ */ new Set());
1343
+ return {
1344
+ ...stats,
1345
+ files: files.size
1346
+ };
1347
+ });
1348
+ const storeAbort = () => Effect.gen(function* () {
1349
+ yield* Ref.set(seenFiles, /* @__PURE__ */ new Set());
1350
+ if (yield* withReadError(fs.exists(chunksTemp), "check chunks temp")) yield* withReadError(fs.remove(chunksTemp), "abort chunks temp", chunksTemp);
1351
+ if (yield* withReadError(fs.exists(vectorsTemp), "check vectors temp")) yield* withReadError(fs.remove(vectorsTemp), "abort vectors temp", vectorsTemp);
1352
+ });
933
1353
  const store = (chunks, embeddings) => Effect.gen(function* () {
934
1354
  yield* ensureDirExists(STORE_DIR, ".pix directory");
935
1355
  const chunksTemp = `${CHUNKS_FILE}.tmp`;
@@ -944,11 +1364,7 @@ const make = Effect.gen(function* () {
944
1364
  yield* withStoreError(fs.writeFileString(chunksTemp, chunksLines.join("\n")), "write chunks", chunksTemp);
945
1365
  yield* withStoreError(fs.rename(chunksTemp, CHUNKS_FILE), "commit chunks", CHUNKS_FILE);
946
1366
  const vectorsTemp = `${VECTORS_FILE}.tmp`;
947
- const dims = embeddings[0]?.dims ?? 384;
948
- const totalFloats = embeddings.length * dims;
949
- const vectorsArray = new Float32Array(totalFloats);
950
- for (let i = 0; i < embeddings.length; i++) vectorsArray.set(embeddings[i].vector, i * dims);
951
- const buffer = Buffer.from(vectorsArray.buffer);
1367
+ const buffer = serializeVectors(embeddings);
952
1368
  yield* withStoreError(fs.writeFile(vectorsTemp, buffer), "write vectors", vectorsTemp);
953
1369
  yield* withStoreError(fs.rename(vectorsTemp, VECTORS_FILE), "commit vectors", VECTORS_FILE);
954
1370
  });
@@ -1017,6 +1433,10 @@ const make = Effect.gen(function* () {
1017
1433
  });
1018
1434
  return {
1019
1435
  store,
1436
+ storeBegin,
1437
+ storeBatch,
1438
+ storeCommit,
1439
+ storeAbort,
1020
1440
  search,
1021
1441
  getStatus,
1022
1442
  reset
@@ -1025,7 +1445,7 @@ const make = Effect.gen(function* () {
1025
1445
  const VectorStoreLive = Layer.effect(VectorStore, make);
1026
1446
  //#endregion
1027
1447
  //#region src/index.ts
1028
- const ServicesLayer = Layer.mergeAll(ConfigStoreLive, ScannerLive, OnnxEmbedderLive, VectorStoreLive);
1448
+ const ServicesLayer = Layer.mergeAll(ConfigStoreLive, ScannerLive, OnnxEmbedderLive, VectorStoreLive, ContentExtractorLive);
1029
1449
  const ChunkerLayer = ChunkerLive.pipe(Layer.provide(ServicesLayer));
1030
1450
  const InfraLayer = Layer.mergeAll(ServicesLayer, ChunkerLayer).pipe(Layer.provide(NodeContext.layer));
1031
1451
  const UseCaseLayer = Layer.mergeAll(InitProject.Default, GetStatus.Default, QueryProject.Default, IndexProject.Default, ResetIndex.Default);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lucas-bur/pix",
3
- "version": "0.9.1",
3
+ "version": "0.11.0",
4
4
  "description": "Lightweight local semantic project indexer",
5
5
  "keywords": [
6
6
  "cli",