@lucas-bur/pix 0.10.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +7 -7
  2. package/dist/index.mjs +498 -180
  3. package/package.json +1 -1
package/README.md CHANGED
@@ -19,13 +19,13 @@ pix query "authentication middleware"
19
19
 
20
20
  ## Commands
21
21
 
22
- | Command | Description | JSON flag |
23
- | -------------------- | ------------------------------------------- | --------- |
24
- | `pix init` | Create `.pix/config.json` with defaults | `--json` |
25
- | `pix index` | Scan, chunk, embed, and store project files | `--json` |
26
- | `pix query "<text>"` | Semantic search via cosine similarity | `--json` |
27
- | `pix status` | Show index statistics | `--json` |
28
- | `pix reset` | Delete index files (chunks + vectors) | `--json` |
22
+ | Command | Description | JSON flag |
23
+ | ---------------------------- | -------------------------------------------------------------------------------------------------------------------------------------- | --------- |
24
+ | `pix init` | Create `.pix/config.json` with defaults | `--json` |
25
+ | `pix index` | Scan, chunk, embed, and store project files | `--json` |
26
+ | `pix query "<text>" [flags]` | Semantic search via cosine similarity (`--top`, `--context-lines`, `--ignore-path`, `--only-path`, `--max-characters`, `--no-content`) | `--json` |
27
+ | `pix status` | Show index statistics | `--json` |
28
+ | `pix reset` | Delete index files (chunks + vectors) | `--json` |
29
29
 
30
30
  All commands support `--json` for structured output on stdout — ideal for piping to AI agents.
31
31
 
package/dist/index.mjs CHANGED
@@ -2,7 +2,8 @@
2
2
  import { createRequire } from "node:module";
3
3
  import { Args, CliConfig, Command, Options } from "@effect/cli";
4
4
  import { NodeContext, NodeRuntime } from "@effect/platform-node";
5
- import { Clock, Context, Data, Effect, Layer, Option, Ref } from "effect";
5
+ import { Clock, Context, Data, Effect, Exit, Layer, Option, Ref, Stream } from "effect";
6
+ import * as Chunk from "effect/Chunk";
6
7
  import { styleText } from "node:util";
7
8
  import * as clack from "@clack/prompts";
8
9
  import { FileSystem } from "@effect/platform";
@@ -74,6 +75,7 @@ const withInteractive = (activeRef, acquire, setActive, release, effect) => Ref.
74
75
  /** Display implementation using @clack/prompts for interactive terminal output */
75
76
  const ClackDisplay = { layer: Layer.effect(Display, Effect.gen(function* () {
76
77
  const activeRef = yield* Ref.make(null);
78
+ const lastSpinnerMsg = yield* Ref.make("");
77
79
  return {
78
80
  intro: (title) => Effect.sync(() => clack.intro(styleText("inverse", ` ${title} `))),
79
81
  outro: (message) => Effect.sync(() => clack.outro(message)),
@@ -84,11 +86,17 @@ const ClackDisplay = { layer: Layer.effect(Display, Effect.gen(function* () {
84
86
  const s = clack.spinner();
85
87
  s.start(message);
86
88
  return s;
87
- }), (s) => ({
89
+ }).pipe(Effect.tap(() => Ref.set(lastSpinnerMsg, message))), (s) => ({
88
90
  type: "spinner",
89
91
  handle: s
90
- }), (s, exit) => Effect.sync(() => s.stop(exit._tag === "Success" ? message : `${message} (failed)`)), effect),
91
- progress: (opts, effect) => withInteractive(activeRef, Effect.sync(() => {
92
+ }), (s, exit) => lastSpinnerMsg.pipe(Effect.flatMap((lastMsg) => Effect.sync(() => s.stop(exit._tag === "Success" && lastMsg ? lastMsg : `${message} (failed)`)))), effect),
93
+ progress: (opts, effect) => Effect.gen(function* () {
94
+ const current = yield* Ref.get(activeRef);
95
+ if (current && current.type === "spinner") {
96
+ const msg = yield* Ref.get(lastSpinnerMsg);
97
+ current.handle.stop(msg || opts.message);
98
+ yield* Ref.set(activeRef, null);
99
+ }
92
100
  const bar = clack.progress({
93
101
  max: opts.max,
94
102
  style: opts.style ?? "heavy",
@@ -96,16 +104,27 @@ const ClackDisplay = { layer: Layer.effect(Display, Effect.gen(function* () {
96
104
  indicator: opts.indicator ?? "dots"
97
105
  });
98
106
  bar.start(opts.message);
99
- return bar;
100
- }), (bar) => ({
101
- type: "progress",
102
- handle: bar,
103
- value: 0,
104
- max: opts.max
105
- }), (bar, exit) => Effect.sync(() => exit._tag === "Success" ? bar.stop(opts.message) : bar.error(opts.message)), effect),
107
+ yield* Ref.set(activeRef, {
108
+ type: "progress",
109
+ handle: bar,
110
+ value: 0,
111
+ max: opts.max
112
+ });
113
+ const exit = yield* Effect.exit(effect);
114
+ yield* Ref.set(activeRef, null);
115
+ if (Exit.isSuccess(exit)) {
116
+ bar.stop(opts.message);
117
+ return exit.value;
118
+ }
119
+ bar.error(opts.message);
120
+ return yield* Effect.failCause(exit.cause);
121
+ }),
106
122
  updateInteractive: (payload) => Ref.get(activeRef).pipe(Effect.flatMap((active) => {
107
123
  if (!active) return Effect.void;
108
- if (active.type === "spinner") return Effect.sync(() => active.handle.message(payloadText(payload)));
124
+ if (active.type === "spinner") {
125
+ const msg = payloadText(payload);
126
+ return Effect.sync(() => active.handle.message(msg)).pipe(Effect.andThen(Ref.set(lastSpinnerMsg, msg)));
127
+ }
109
128
  const delta = computeDelta(payload, {
110
129
  value: active.value,
111
130
  max: active.max
@@ -149,22 +168,48 @@ const DEFAULT_CONFIG = {
149
168
  "dist",
150
169
  "build",
151
170
  ".next",
152
- ".agents",
153
- ".claude",
154
171
  ".vscode",
155
- ".github",
156
172
  "coverage",
157
173
  "*-lock.yaml",
158
174
  "*-lock.json",
159
- "*.lock"
175
+ "*.lock",
176
+ ".vite-hooks",
177
+ ".fallow"
160
178
  ],
161
179
  embedder: {
162
180
  model: "Xenova/all-MiniLM-L6-v2",
163
181
  device: "auto",
164
- dtype: "fp32"
182
+ dtype: "fp32",
183
+ batchSize: 16
165
184
  }
166
185
  };
167
186
  //#endregion
187
+ //#region src/lib/extension.ts
188
+ /** Extract the last path segment (filename) from a file path. Handles both `/` and `\\` separators. */
189
+ const getFilename = (path) => {
190
+ const sepIndex = Math.max(path.lastIndexOf("/"), path.lastIndexOf("\\"));
191
+ return sepIndex >= 0 ? path.slice(sepIndex + 1) : path;
192
+ };
193
+ /**
194
+ * Extract the lowercase extension (including dot) from a file path. Used for processor dispatch.
195
+ * Strips the directory, then returns the part after the last dot. If no dot, returns the full
196
+ * filename lowercased.
197
+ */
198
+ const getExtension = (file) => {
199
+ const name = getFilename(file);
200
+ const dotIndex = name.lastIndexOf(".");
201
+ if (dotIndex === -1) return name.toLowerCase();
202
+ return name.slice(dotIndex).toLowerCase();
203
+ };
204
+ /**
205
+ * Extract the extension from a filename (not full path). Returns `"(no extension)"` if no dot
206
+ * exists. Used for display grouping of skipped files.
207
+ */
208
+ const getFileExtension = (filename) => {
209
+ const dotIndex = filename.lastIndexOf(".");
210
+ return dotIndex >= 0 ? filename.slice(dotIndex) : "(no extension)";
211
+ };
212
+ //#endregion
168
213
  //#region src/domain/errors.ts
169
214
  /** Config file or directory does not exist. Run pix init first. */
170
215
  var ConfigNotFoundError = class extends Data.TaggedError("ConfigNotFoundError") {};
@@ -289,13 +334,13 @@ function buildProcessorMap(skipExtensions) {
289
334
  }
290
335
  //#endregion
291
336
  //#region src/application/index-project.ts
292
- function getExtension(file) {
293
- const lastSlash = file.lastIndexOf("/");
294
- const name = lastSlash >= 0 ? file.slice(lastSlash + 1) : file;
295
- const dotIndex = name.lastIndexOf(".");
296
- if (dotIndex === -1) return name.toLowerCase();
297
- return name.slice(dotIndex).toLowerCase();
298
- }
337
+ const deriveEffectiveConfig = (opts, config) => ({
338
+ batchSize: opts.batchSize ?? config.embedder.batchSize ?? 16,
339
+ concurrency: Math.max(1, opts.chunkConcurrency ?? config.chunkConcurrency ?? 8),
340
+ skipExtensions: opts.skipExtensions ? [...config.skipExtensions, ...opts.skipExtensions] : config.skipExtensions,
341
+ ignoredPaths: opts.ignorePaths ? [...config.ignoredPaths ?? DEFAULT_CONFIG.ignoredPaths, ...opts.ignorePaths] : config.ignoredPaths ?? DEFAULT_CONFIG.ignoredPaths,
342
+ ignoreGitignore: opts.ignoreGitignore ?? config.ignoreGitignore ?? false
343
+ });
299
344
  const classifyFiles = (files, processorMap) => {
300
345
  const knownFiles = [];
301
346
  const skippedFiles = [];
@@ -313,11 +358,19 @@ const classifyFiles = (files, processorMap) => {
313
358
  unknownExtensions
314
359
  };
315
360
  };
316
- /**
317
- * Use case: index project files. Pipeline: scan → ContentExtractor → chunk → embed → store. Depends
318
- * on ConfigStore, Scanner, Chunker, Embedder, VectorStore, Display, ContentExtractor via Effect
319
- * tags.
320
- */
361
+ const classifyAndCollectChunks = (knownFiles, extractor, chunker, concurrency, skipped) => Stream.fromIterable(knownFiles).pipe(Stream.mapEffect((file) => extractor.extract(file).pipe(Effect.flatMap((text) => Effect.succeed(Option.some({
362
+ file,
363
+ text
364
+ }))), Effect.catchAll((err) => Ref.update(skipped, (prev) => [...prev, {
365
+ path: file,
366
+ reason: err.message
367
+ }]).pipe(Effect.flatMap(() => Effect.succeed(Option.none()))))), { concurrency }), Stream.filterMap((opt) => opt), Stream.mapEffect(({ file, text }) => chunker.chunkText(text, file), { concurrency }), Stream.flatMap((chunks) => Stream.fromIterable(chunks)), Stream.runCollect, Effect.map((allChunks) => {
368
+ const chunks = Chunk.toArray(allChunks);
369
+ return {
370
+ chunks,
371
+ totalChunks: chunks.length
372
+ };
373
+ }));
321
374
  var IndexProject = class extends Effect.Service()("IndexProject", {
322
375
  accessors: true,
323
376
  effect: Effect.gen(function* () {
@@ -328,65 +381,118 @@ var IndexProject = class extends Effect.Service()("IndexProject", {
328
381
  const vectorStore = yield* VectorStore;
329
382
  const d = yield* Display;
330
383
  const extractor = yield* ContentExtractor;
331
- const index = () => Effect.gen(function* () {
384
+ const index = (opts = {}) => Effect.gen(function* () {
385
+ const start = Date.now();
332
386
  if (!(yield* configStore.configExists())) yield* configStore.writeConfig(DEFAULT_CONFIG);
333
- const config = yield* configStore.readConfig();
334
- const processorMap = buildProcessorMap(config.skipExtensions);
387
+ const eff = deriveEffectiveConfig(opts, yield* configStore.readConfig());
388
+ const processorMap = buildProcessorMap(eff.skipExtensions);
335
389
  yield* d.updateInteractive("Scanning source files...");
336
- const ignoredPaths = config.ignoredPaths ?? DEFAULT_CONFIG.ignoredPaths;
337
- const { knownFiles, skippedFiles, unknownExtensions } = classifyFiles((yield* scanner.scanFiles(ignoredPaths)).files, processorMap);
338
- if (unknownExtensions.size > 0) yield* d.log(`Skipped ${skippedFiles.length} files with unknown extensions: ${[...unknownExtensions].join(", ")}`, "warn");
339
- if (knownFiles.length === 0) return {
340
- success: true,
341
- status: {
342
- chunks: 0,
343
- files: 0,
344
- totalLines: 0,
345
- byteSize: 0
346
- }
347
- };
390
+ const scanResult = yield* scanner.scanFiles(eff.ignoredPaths, eff.ignoreGitignore);
391
+ const { knownFiles, skippedFiles, unknownExtensions } = classifyFiles(scanResult.files, processorMap);
392
+ const skipped = yield* Ref.make(scanResult.skipped.filter((s) => !s.reason.startsWith("Ignored by config pattern")).map((s) => ({
393
+ path: s.path,
394
+ reason: s.reason
395
+ })));
396
+ if (unknownExtensions.size > 0) yield* Ref.update(skipped, (prev) => [...prev, ...skippedFiles.map((f) => ({
397
+ path: f,
398
+ reason: "unknown extension"
399
+ }))]);
400
+ if (knownFiles.length === 0) {
401
+ yield* displaySkippedNote(d, yield* Ref.get(skipped));
402
+ return {
403
+ success: true,
404
+ status: {
405
+ chunks: 0,
406
+ files: 0,
407
+ totalLines: 0,
408
+ byteSize: 0
409
+ },
410
+ durationMs: Date.now() - start
411
+ };
412
+ }
348
413
  yield* d.updateInteractive(`Processing ${knownFiles.length} files...`);
349
- const allChunks = (yield* Effect.forEach(knownFiles, (file) => Effect.gen(function* () {
350
- const result = yield* Effect.either(extractor.extract(file));
351
- if (result._tag === "Left") {
352
- if (result.left._tag === "UnsupportedFormat") {
353
- yield* d.log(`Skipping ${file}: ${result.left.message}`, "warn");
354
- return [];
355
- }
356
- return yield* Effect.fail(result.left);
357
- }
358
- return yield* chunker.chunkText(result.right, file);
359
- }), { concurrency: Math.max(1, config.chunkConcurrency ?? 8) })).flat();
360
- const totalChunks = allChunks.length;
361
- const totalFiles = new Set(allChunks.map((c) => c.file)).size;
362
- const totalLines = allChunks.reduce((sum, c) => sum + (c.endLine - c.startLine + 1), 0);
363
- if (totalChunks === 0) return {
364
- success: true,
365
- status: {
366
- chunks: 0,
367
- files: 0,
368
- totalLines: 0,
369
- byteSize: 0
370
- }
371
- };
372
- yield* d.updateInteractive(`Embedding ${totalChunks} chunks...`);
373
- const texts = allChunks.map((c) => c.text);
374
- const embeddings = yield* embedder.batch(texts);
375
- yield* vectorStore.store(allChunks, embeddings);
376
- const dims = embeddings[0]?.dims ?? 384;
414
+ const { chunks, totalChunks } = yield* classifyAndCollectChunks(knownFiles, extractor, chunker, eff.concurrency, skipped);
415
+ if (totalChunks === 0) {
416
+ yield* displaySkippedNote(d, yield* Ref.get(skipped));
417
+ return {
418
+ success: true,
419
+ status: {
420
+ chunks: 0,
421
+ files: 0,
422
+ totalLines: 0,
423
+ byteSize: 0
424
+ },
425
+ durationMs: Date.now() - start
426
+ };
427
+ }
428
+ yield* vectorStore.storeBegin();
429
+ const embeddedRef = yield* Ref.make(0);
430
+ const stats = yield* d.progress({
431
+ message: `Embedding ${totalChunks} chunks...`,
432
+ max: totalChunks
433
+ }, Stream.fromIterable(chunks).pipe(Stream.grouped(eff.batchSize), Stream.mapEffect((batchChunk) => Effect.gen(function* () {
434
+ const batch = Chunk.toArray(batchChunk);
435
+ const texts = batch.map((c) => c.text);
436
+ const embeddings = yield* embedder.batch(texts);
437
+ yield* vectorStore.storeBatch(batch, embeddings);
438
+ const count = yield* Ref.updateAndGet(embeddedRef, (n) => n + batch.length);
439
+ yield* d.updateInteractive({
440
+ message: `Embedding ${count} of ${totalChunks} chunks`,
441
+ setTo: count
442
+ });
443
+ })), Stream.runDrain, Effect.matchEffect({
444
+ onSuccess: () => vectorStore.storeCommit(),
445
+ onFailure: (err) => vectorStore.storeAbort().pipe(Effect.flatMap(() => Effect.fail(err)))
446
+ })));
447
+ yield* displaySkippedNote(d, yield* Ref.get(skipped));
448
+ const durationSec = ((Date.now() - start) / 1e3).toFixed(1);
449
+ yield* d.log(`Indexed ${stats.chunks} chunks from ${stats.files} files in ${durationSec}s`, "success");
450
+ const fallbackInfo = yield* embedder.getFallbackInfo();
377
451
  return {
378
452
  success: true,
379
453
  status: {
380
- chunks: totalChunks,
381
- files: totalFiles,
382
- totalLines,
383
- byteSize: embeddings.length * dims * 4
384
- }
454
+ chunks: stats.chunks,
455
+ files: stats.files,
456
+ totalLines: stats.totalLines,
457
+ byteSize: stats.byteSize
458
+ },
459
+ durationMs: Date.now() - start,
460
+ embedderFallback: fallbackInfo
385
461
  };
386
462
  });
387
463
  return { index };
388
464
  })
389
465
  }) {};
466
+ const groupByExtension = (entries) => {
467
+ const byExt = /* @__PURE__ */ new Map();
468
+ for (const s of entries) {
469
+ const name = getFilename(s.path);
470
+ const ext = getFileExtension(name);
471
+ if (!byExt.has(ext)) byExt.set(ext, []);
472
+ byExt.get(ext).push(name);
473
+ }
474
+ return byExt;
475
+ };
476
+ const formatFileList = (files, maxDisplay = 5) => files.length > maxDisplay ? `${files.slice(0, maxDisplay).join(", ")} +${files.length - maxDisplay} more` : files.join(", ");
477
+ const buildSkippedLines = (extFailures, extractErrors) => {
478
+ const lines = [];
479
+ if (extFailures.length > 0) {
480
+ lines.push(`Unknown extensions (${extFailures.length})`);
481
+ for (const [ext, files] of groupByExtension(extFailures)) lines.push(` ${ext} (${files.length}): ${formatFileList(files)}`);
482
+ }
483
+ if (extractErrors.length > 0) {
484
+ if (lines.length > 0) lines.push("");
485
+ lines.push(`Extraction errors (${extractErrors.length})`);
486
+ for (const s of extractErrors) lines.push(` ${getFilename(s.path)}: ${s.reason}`);
487
+ }
488
+ return lines;
489
+ };
490
+ const displaySkippedNote = (d, skipped) => {
491
+ if (skipped.length === 0) return Effect.void;
492
+ const extFailures = skipped.filter((s) => s.reason === "unknown extension");
493
+ const extractErrors = skipped.filter((s) => s.reason !== "unknown extension");
494
+ return d.note(buildSkippedLines(extFailures, extractErrors).join("\n"), `Skipped ${skipped.length} files`);
495
+ };
390
496
  //#endregion
391
497
  //#region src/application/init-project.ts
392
498
  /**
@@ -412,7 +518,7 @@ var QueryProject = class extends Effect.Service()("QueryProject", {
412
518
  effect: Effect.gen(function* () {
413
519
  const embedder = yield* Embedder;
414
520
  const store = yield* VectorStore;
415
- const queryProject = (queryText, topK) => embedder.embed(queryText).pipe(Effect.flatMap((embedding) => store.search(embedding, topK)));
521
+ const queryProject = (queryText, options) => embedder.embed(queryText).pipe(Effect.flatMap((embedding) => store.search(embedding, options)));
416
522
  return { queryProject };
417
523
  })
418
524
  }) {};
@@ -475,22 +581,60 @@ const reportError = (error) => Effect.gen(function* () {
475
581
  });
476
582
  //#endregion
477
583
  //#region src/commands/index-cmd.ts
478
- /** CLI command: pix index [--force] [--verbose] [--json] */
584
+ const batchSizeOption = Options.integer("batch-size").pipe(Options.withAlias("b"), Options.optional);
585
+ const chunkConcurrencyOption = Options.integer("chunk-concurrency").pipe(Options.withAlias("c"), Options.optional);
586
+ const skipExtensionsOption = Options.text("skip-extensions").pipe(Options.withAlias("s"), Options.repeated);
587
+ const ignorePathOption = Options.text("ignore-path").pipe(Options.repeated);
588
+ const ignorePathsOption = Options.text("ignore-paths").pipe(Options.repeated);
589
+ const ignoreGitignoreOption = Options.boolean("ignore-gitignore").pipe(Options.withDefault(false));
590
+ const splitCsv = (values) => values.flatMap((v) => v.split(",").map((s) => s.trim()).filter((s) => s.length > 0));
591
+ const buildIndexOptions = (args) => {
592
+ if (Option.isSome(args.batchSize) && args.batchSize.value <= 0) throw new Error(`--batch-size must be positive, got ${args.batchSize.value}`);
593
+ if (Option.isSome(args.chunkConcurrency) && args.chunkConcurrency.value <= 0) throw new Error(`--chunk-concurrency must be positive, got ${args.chunkConcurrency.value}`);
594
+ const cliSkipExtensions = splitCsv(args.skipExtensions);
595
+ const cliIgnorePaths = [...args.ignorePath.map((s) => s.trim()).filter((s) => s.length > 0), ...splitCsv(args.ignorePaths)];
596
+ return {
597
+ batchSize: Option.getOrUndefined(args.batchSize),
598
+ chunkConcurrency: Option.getOrUndefined(args.chunkConcurrency),
599
+ skipExtensions: cliSkipExtensions.length > 0 ? cliSkipExtensions : void 0,
600
+ ignorePaths: cliIgnorePaths.length > 0 ? cliIgnorePaths : void 0,
601
+ ignoreGitignore: args.ignoreGitignore || void 0
602
+ };
603
+ };
604
+ const emitIndexResult = (d, result) => Effect.gen(function* () {
605
+ yield* d.json({
606
+ chunks: result.status.chunks,
607
+ files: result.status.files,
608
+ totalLines: result.status.totalLines,
609
+ byteSize: result.status.byteSize,
610
+ durationMs: result.durationMs,
611
+ ...result.embedderFallback && { embedderFallback: result.embedderFallback }
612
+ });
613
+ if (result.status.chunks === 0) yield* d.log("No chunks to index.", "warn");
614
+ });
479
615
  const indexCommand = Command.make("index", {
480
616
  force: Options.boolean("force").pipe(Options.withDefault(false)),
481
617
  verbose: Options.boolean("verbose").pipe(Options.withDefault(false)),
482
- json: Options.boolean("json").pipe(Options.withDefault(false))
483
- }, ({ force, verbose }) => Effect.gen(function* () {
618
+ json: Options.boolean("json").pipe(Options.withDefault(false)),
619
+ batchSize: batchSizeOption,
620
+ chunkConcurrency: chunkConcurrencyOption,
621
+ skipExtensions: skipExtensionsOption,
622
+ ignorePath: ignorePathOption,
623
+ ignorePaths: ignorePathsOption,
624
+ ignoreGitignore: ignoreGitignoreOption
625
+ }, ({ force, verbose, batchSize, chunkConcurrency, skipExtensions, ignorePath, ignorePaths, ignoreGitignore }) => Effect.gen(function* () {
484
626
  const d = yield* Display;
485
627
  if (force) yield* d.log("--force is currently not implemented and only a placeholder.", "warn");
486
628
  if (verbose) yield* d.log("--verbose is currently not implemented and only a placeholder.", "warn");
487
- const result = yield* d.spinner("Indexing project...", IndexProject.index());
488
- yield* d.json({
489
- chunks: result.status.chunks,
490
- files: result.status.files
629
+ const options = buildIndexOptions({
630
+ batchSize,
631
+ chunkConcurrency,
632
+ skipExtensions,
633
+ ignorePath,
634
+ ignorePaths,
635
+ ignoreGitignore
491
636
  });
492
- if (result.status.chunks === 0) yield* d.log("No chunks to index.", "warn");
493
- else yield* d.log(`Indexed ${result.status.chunks} chunks from ${result.status.files} files.`, "success");
637
+ yield* emitIndexResult(d, yield* d.spinner("Indexing project...", IndexProject.index(options)));
494
638
  }).pipe(Effect.catchAll(reportError)));
495
639
  //#endregion
496
640
  //#region src/commands/init.ts
@@ -506,6 +650,51 @@ const initCommand = Command.make("init", { json: Options.boolean("json").pipe(Op
506
650
  DiskFullError: reportError
507
651
  })));
508
652
  //#endregion
653
+ //#region src/lib/format.ts
654
+ /** Format byte count as human-readable string (e.g. "1.5 MB") */
655
+ const formatBytes = (bytes) => {
656
+ if (bytes === 0) return "0 B";
657
+ const units = [
658
+ "B",
659
+ "KB",
660
+ "MB",
661
+ "GB"
662
+ ];
663
+ const i = Math.min(Math.floor(Math.log(bytes) / Math.log(1024)), units.length - 1);
664
+ return `${(bytes / 1024 ** i).toFixed(1)} ${units[i]}`;
665
+ };
666
+ /**
667
+ * Apply a character budget to search results. Returns results in score order capped by the budget.
668
+ * The last result may be truncated to fit the remaining budget. Character count includes file path,
669
+ * line numbers, chunk text, and context lines.
670
+ */
671
+ const applyCharBudget = (results, maxChars) => {
672
+ if (!maxChars || maxChars <= 0) return { results };
673
+ const budgeted = [];
674
+ let remaining = maxChars;
675
+ for (const result of results) {
676
+ const indicator = " [...]";
677
+ const metadata = `${result.file}:${result.startLine}-${result.endLine}\n`;
678
+ const chars = `${metadata}${result.text}${result.contextBefore ? `\n${result.contextBefore}` : ""}${result.contextAfter ? `\n${result.contextAfter}` : ""}`.length;
679
+ if (chars <= remaining) {
680
+ budgeted.push(result);
681
+ remaining -= chars;
682
+ } else {
683
+ const textBudget = remaining - metadata.length - 6;
684
+ if (textBudget <= 0) break;
685
+ const truncated = result.text.slice(0, textBudget);
686
+ budgeted.push({
687
+ ...result,
688
+ text: `${truncated}${indicator}`,
689
+ contextBefore: null,
690
+ contextAfter: null
691
+ });
692
+ break;
693
+ }
694
+ }
695
+ return { results: budgeted };
696
+ };
697
+ //#endregion
509
698
  //#region src/commands/query.ts
510
699
  const DEFAULT_TOP_K = 5;
511
700
  const DEFAULT_CONTEXT_LINES = 0;
@@ -532,31 +721,64 @@ const formatResult = (result) => {
532
721
  const contextAfter = result.contextAfter ? `\n${result.contextAfter}` : "";
533
722
  return `${result.file}:${result.startLine}-${result.endLine} (score: ${result.score.toFixed(3)})${contextBefore}\n${result.text}${contextAfter}`;
534
723
  };
535
- const toJsonOutput = (results, ctxLines) => results.map((r) => ({
724
+ /** Format a result as a lightweight location reference (no text content). */
725
+ const formatLocation = (result) => `${result.file}:${result.startLine}-${result.endLine} (score: ${result.score.toFixed(3)})`;
726
+ /** Build optional content fields for a single JSON output entry. */
727
+ const buildContentFields = (r, ctxLines, noContent) => {
728
+ if (noContent) return {};
729
+ return {
730
+ text: r.text,
731
+ ...ctxLines > 0 && r.contextBefore && { contextBefore: r.contextBefore },
732
+ ...ctxLines > 0 && r.contextAfter && { contextAfter: r.contextAfter }
733
+ };
734
+ };
735
+ const toJsonOutput = (results, ctxLines, noContent = false) => results.map((r) => ({
536
736
  score: r.score,
537
737
  file: r.file,
538
738
  startLine: r.startLine,
539
739
  endLine: r.endLine,
540
- text: r.text,
541
- ...ctxLines > 0 && r.contextBefore && { contextBefore: r.contextBefore },
542
- ...ctxLines > 0 && r.contextAfter && { contextAfter: r.contextAfter }
740
+ ...buildContentFields(r, ctxLines, noContent)
543
741
  }));
544
- /** CLI command: pix query "<text>" [--top N] [--json] [--context-lines N] */
742
+ /** Build SearchOptions from parsed CLI args, clamping topK. */
743
+ const buildSearchOptions = (top, ignorePath, onlyPath) => {
744
+ const rawValue = Option.getOrElse(top, () => DEFAULT_TOP_K);
745
+ const clamped = clampTopK(rawValue);
746
+ return {
747
+ options: {
748
+ topK: clamped.value,
749
+ ...ignorePath.length > 0 && { ignorePaths: [...ignorePath] },
750
+ ...onlyPath.length > 0 && { onlyPaths: [...onlyPath] }
751
+ },
752
+ clamped: clamped.clamped,
753
+ rawValue
754
+ };
755
+ };
756
+ /** Render search results via Display — JSON + human-readable text. */
757
+ const renderResults = (d, results, ctxLines, noContent) => Effect.gen(function* () {
758
+ yield* d.json(toJsonOutput(results, ctxLines, noContent));
759
+ if (results.length === 0) yield* d.log("No results found", "warn");
760
+ else for (const result of results) yield* d.text(noContent ? formatLocation(result) : formatResult(result));
761
+ });
762
+ /**
763
+ * CLI command: pix query "<text>" [--top N] [--json] [--context-lines N] [--ignore-path P]
764
+ * [--only-path P] [--max-characters N] [--no-content]
765
+ */
545
766
  const queryCommand = Command.make("query", {
546
767
  queryText: Args.text({ name: "query" }),
547
768
  top: Options.integer("top").pipe(Options.withDefault(DEFAULT_TOP_K), Options.optional),
548
769
  json: Options.boolean("json").pipe(Options.withDefault(false)),
549
- contextLines: Options.integer("context-lines").pipe(Options.withDefault(DEFAULT_CONTEXT_LINES), Options.optional)
550
- }, ({ queryText, top, contextLines }) => Effect.gen(function* () {
770
+ contextLines: Options.integer("context-lines").pipe(Options.withDefault(DEFAULT_CONTEXT_LINES), Options.optional),
771
+ ignorePath: Options.text("ignore-path").pipe(Options.repeated),
772
+ onlyPath: Options.text("only-path").pipe(Options.repeated),
773
+ maxCharacters: Options.integer("max-characters").pipe(Options.optional),
774
+ noContent: Options.boolean("no-content").pipe(Options.withDefault(false))
775
+ }, ({ queryText, top, contextLines, ignorePath, onlyPath, maxCharacters, noContent }) => Effect.gen(function* () {
551
776
  const d = yield* Display;
552
- const topK = Option.getOrElse(top, () => DEFAULT_TOP_K);
553
777
  const ctxLines = Option.getOrElse(contextLines, () => DEFAULT_CONTEXT_LINES);
554
- const clamped = clampTopK(topK);
555
- if (clamped.clamped) yield* d.log(`topK clamped from ${topK} to ${clamped.value}`, "warn");
556
- const results = yield* d.spinner("Searching...", QueryProject.queryProject(queryText, clamped.value));
557
- yield* d.json(toJsonOutput(results, ctxLines));
558
- if (results.length === 0) yield* d.log("No results found", "warn");
559
- else for (const result of results) yield* d.text(formatResult(result));
778
+ const { options: searchOptions, clamped, rawValue } = buildSearchOptions(top, ignorePath, onlyPath);
779
+ if (clamped) yield* d.log(`topK clamped from ${rawValue} to ${searchOptions.topK}`, "warn");
780
+ const results = yield* d.spinner("Searching...", QueryProject.queryProject(queryText, searchOptions));
781
+ yield* renderResults(d, noContent ? results : applyCharBudget(results, Option.getOrUndefined(maxCharacters)).results, ctxLines, noContent);
560
782
  }).pipe(Effect.catchTags({
561
783
  ModelLoadError: reportError,
562
784
  InferenceError: reportError,
@@ -565,20 +787,6 @@ const queryCommand = Command.make("query", {
565
787
  NoIndexError: reportError
566
788
  })));
567
789
  //#endregion
568
- //#region src/lib/format.ts
569
- /** Format byte count as human-readable string (e.g. "1.5 MB") */
570
- const formatBytes = (bytes) => {
571
- if (bytes === 0) return "0 B";
572
- const units = [
573
- "B",
574
- "KB",
575
- "MB",
576
- "GB"
577
- ];
578
- const i = Math.min(Math.floor(Math.log(bytes) / Math.log(1024)), units.length - 1);
579
- return `${(bytes / 1024 ** i).toFixed(1)} ${units[i]}`;
580
- };
581
- //#endregion
582
790
  //#region src/commands/reset.ts
583
791
  /** CLI command: pix reset [--json] */
584
792
  const resetCommand = Command.make("reset", { json: Options.boolean("json").pipe(Options.withDefault(false)) }, () => Effect.gen(function* () {
@@ -692,13 +900,19 @@ const buildChunks = (file, content, config) => {
692
900
  const text = lines.slice(startLine - 1, endLine).join("\n");
693
901
  if (text.length >= MIN_CHUNK_CHARS) {
694
902
  const id = crypto.createHash("sha1").update(`${file}:${startLine}`).digest("hex").slice(0, 12);
903
+ const contextBeforeStart = Math.max(0, startLine - 1 - config.overlapLines);
904
+ const contextBefore = lines.slice(contextBeforeStart, startLine - 1).join("\n");
905
+ const contextAfterEnd = Math.min(lines.length, endLine + config.overlapLines);
906
+ const contextAfter = lines.slice(endLine, contextAfterEnd).join("\n");
695
907
  chunks.push({
696
908
  id,
697
909
  idx,
698
910
  file,
699
911
  startLine,
700
912
  endLine,
701
- text
913
+ text,
914
+ contextBefore: contextBefore || null,
915
+ contextAfter: contextAfter || null
702
916
  });
703
917
  idx++;
704
918
  }
@@ -725,12 +939,19 @@ const make$5 = Effect.gen(function* () {
725
939
  });
726
940
  const ChunkerLive = Layer.effect(Chunker, make$5);
727
941
  //#endregion
942
+ //#region src/lib/platform-error.ts
943
+ /**
944
+ * Check if a platform error has a specific `reason` string (e.g. "BadResource" for disk full,
945
+ * "NotFound" for missing files). Platform errors from @effect/platform include a `reason` property
946
+ * that categorizes the failure.
947
+ */
948
+ const isPlatformReason = (cause, reason) => typeof cause === "object" && cause !== null && "reason" in cause && String(cause.reason) === reason;
949
+ //#endregion
728
950
  //#region src/services/config-store.ts
729
951
  const CONFIG_DIR = ".pix";
730
952
  const CONFIG_PATH = `${CONFIG_DIR}/config.json`;
731
- const isPlatformReason$1 = (cause, reason) => typeof cause === "object" && cause !== null && "reason" in cause && String(cause.reason) === reason;
732
953
  const mapConfigWriteError = (cause, path, action) => {
733
- if (isPlatformReason$1(cause, "BadResource")) return new DiskFullError({
954
+ if (isPlatformReason(cause, "BadResource")) return new DiskFullError({
734
955
  message: `Disk full: could not ${action}`,
735
956
  path,
736
957
  cause
@@ -749,7 +970,7 @@ const make$4 = Effect.gen(function* () {
749
970
  });
750
971
  const readConfig = () => Effect.gen(function* () {
751
972
  const content = yield* fs.readFileString(CONFIG_PATH).pipe(Effect.mapError((cause) => {
752
- if (isPlatformReason$1(cause, "NotFound")) return new ConfigNotFoundError({
973
+ if (isPlatformReason(cause, "NotFound")) return new ConfigNotFoundError({
753
974
  message: "Config file not found. Run pix init first.",
754
975
  path: CONFIG_PATH,
755
976
  cause
@@ -784,10 +1005,7 @@ const make$3 = Effect.gen(function* () {
784
1005
  const fs = yield* FileSystem.FileSystem;
785
1006
  const processorMap = buildProcessorMap([]);
786
1007
  const extract = (file) => {
787
- const lastSlash = file.lastIndexOf("/");
788
- const name = lastSlash >= 0 ? file.slice(lastSlash + 1) : file;
789
- const dotIndex = name.lastIndexOf(".");
790
- const ext = dotIndex === -1 ? name.toLowerCase() : name.slice(dotIndex).toLowerCase();
1008
+ const ext = getExtension(file);
791
1009
  const processor = processorMap[ext];
792
1010
  if (!processor) return Effect.fail({
793
1011
  _tag: "UnsupportedFormat",
@@ -828,9 +1046,7 @@ const MODEL_REGISTRY = {
828
1046
  };
829
1047
  //#endregion
830
1048
  //#region src/services/embedder.ts
831
- const CACHE_DIR = ".pix/cache";
832
- const BATCH_SIZE = 16;
833
- env.cacheDir = CACHE_DIR;
1049
+ env.cacheDir = ".pix/cache";
834
1050
  const normalize = (arr) => {
835
1051
  let norm = 0;
836
1052
  for (let i = 0; i < arr.length; i++) norm += arr[i] * arr[i];
@@ -872,16 +1088,14 @@ const createExtractor = (opts) => Effect.tryPromise(async () => {
872
1088
  model: opts.model,
873
1089
  cause
874
1090
  })));
875
- const createExtractorWithFallback = (opts) => {
1091
+ const createExtractorWithFallback = (opts, fallbackRef) => {
876
1092
  if (opts.device === "cpu") return createExtractor(opts);
877
1093
  return createExtractor(opts).pipe(Effect.catchAll((originalError) => Effect.gen(function* () {
878
- const d = yield* Display;
879
- yield* d.log(`GPU (${opts.device}) failed, falling back to CPU...`, "warn");
880
- yield* d.json({
881
- event: "embedder_fallback",
1094
+ yield* (yield* Display).log(`GPU (${opts.device}) failed, falling back to CPU...`, "warn");
1095
+ yield* Ref.set(fallbackRef, Option.some({
882
1096
  originalDevice: opts.device,
883
1097
  reason: originalError.message
884
- });
1098
+ }));
885
1099
  return yield* createExtractor({
886
1100
  ...opts,
887
1101
  device: "cpu"
@@ -892,7 +1106,8 @@ const make$2 = Effect.gen(function* () {
892
1106
  const configStore = yield* ConfigStore;
893
1107
  const d = yield* Display;
894
1108
  const cfg = yield* resolveEmbedderConfig(configStore);
895
- const getExtractor = yield* Effect.cached(createExtractorWithFallback(cfg));
1109
+ const fallbackRef = yield* Ref.make(Option.none());
1110
+ const getExtractor = yield* Effect.cached(createExtractorWithFallback(cfg, fallbackRef));
896
1111
  const embed = (text) => Effect.gen(function* () {
897
1112
  const extractor = yield* getExtractor;
898
1113
  const data = (yield* Effect.tryPromise(() => extractor(text, {
@@ -909,31 +1124,30 @@ const make$2 = Effect.gen(function* () {
909
1124
  }).pipe(Effect.provideService(Display, d));
910
1125
  const batch = (texts) => Effect.gen(function* () {
911
1126
  const extractor = yield* getExtractor;
1127
+ const tensor = yield* Effect.tryPromise(() => extractor([...texts], {
1128
+ pooling: "mean",
1129
+ normalize: false
1130
+ })).pipe(Effect.mapError((cause) => new InferenceError({
1131
+ message: "Batch embedding inference failed",
1132
+ cause
1133
+ })));
1134
+ const data = tensor.data;
1135
+ const n = tensor.dims[0];
912
1136
  const results = [];
913
- for (let i = 0; i < texts.length; i += BATCH_SIZE) {
914
- const slice = texts.slice(i, i + BATCH_SIZE);
915
- const tensor = yield* Effect.tryPromise(() => extractor(slice, {
916
- pooling: "mean",
917
- normalize: false
918
- })).pipe(Effect.mapError((cause) => new InferenceError({
919
- message: "Batch embedding inference failed",
920
- cause
921
- })));
922
- const data = tensor.data;
923
- const n = tensor.dims[0];
924
- for (let j = 0; j < n; j++) {
925
- const offset = j * cfg.dims;
926
- results.push(normalize(data.slice(offset, offset + cfg.dims)));
927
- }
1137
+ for (let j = 0; j < n; j++) {
1138
+ const offset = j * cfg.dims;
1139
+ results.push({
1140
+ vector: normalize(data.slice(offset, offset + cfg.dims)),
1141
+ dims: cfg.dims
1142
+ });
928
1143
  }
929
- return results.map((vector) => ({
930
- vector,
931
- dims: cfg.dims
932
- }));
1144
+ return results;
933
1145
  }).pipe(Effect.provideService(Display, d));
1146
+ const getFallbackInfo = () => Ref.get(fallbackRef).pipe(Effect.map(Option.getOrElse(() => void 0)));
934
1147
  return {
935
1148
  embed,
936
- batch
1149
+ batch,
1150
+ getFallbackInfo
937
1151
  };
938
1152
  });
939
1153
  const OnnxEmbedderLive = Layer.provideMerge(Layer.effect(Embedder, make$2), ConfigStoreLive);
@@ -977,7 +1191,16 @@ const make$1 = Effect.gen(function* () {
977
1191
  if (result.skipped) skipped.push(result.skipped);
978
1192
  if (result.content.trim()) ig.add(result.content.split("\n"));
979
1193
  });
980
- const loadGitignoreRules = (ignoredPaths, cwd) => Effect.gen(function* () {
1194
+ const loadGitignoreRules = (ignoredPaths) => {
1195
+ const ig = ignore();
1196
+ const skipped = [];
1197
+ if (ignoredPaths.length > 0) ig.add(ignoredPaths);
1198
+ return Effect.succeed({
1199
+ ig,
1200
+ skipped
1201
+ });
1202
+ };
1203
+ const loadGitignoreRulesWithFiles = (ignoredPaths, cwd) => Effect.gen(function* () {
981
1204
  const ig = ignore();
982
1205
  const skipped = [];
983
1206
  if (ignoredPaths.length > 0) ig.add(ignoredPaths);
@@ -1056,10 +1279,10 @@ const make$1 = Effect.gen(function* () {
1056
1279
  skipped
1057
1280
  };
1058
1281
  });
1059
- const scanFiles = (ignoredPaths) => Effect.gen(function* () {
1282
+ const scanFiles = (ignoredPaths, ignoreGitignore) => Effect.gen(function* () {
1060
1283
  const cwd = process.cwd();
1061
- const { ig, skipped: ignoreSkipped } = yield* loadGitignoreRules(ignoredPaths, cwd).pipe(Effect.mapError((cause) => new ScanFailed({
1062
- message: `Failed to load gitignore rules: ${String(cause)}`,
1284
+ const { ig, skipped: ignoreSkipped } = yield* (ignoreGitignore ? loadGitignoreRules(ignoredPaths) : loadGitignoreRulesWithFiles(ignoredPaths, cwd)).pipe(Effect.mapError((cause) => new ScanFailed({
1285
+ message: `Failed to load ignore rules: ${String(cause)}`,
1063
1286
  cause
1064
1287
  })));
1065
1288
  const { files, skipped: walkSkipped } = yield* walk(cwd, ig, cwd);
@@ -1073,16 +1296,66 @@ const make$1 = Effect.gen(function* () {
1073
1296
  const ScannerLive = Layer.effect(Scanner, make$1);
1074
1297
  //#endregion
1075
1298
  //#region src/services/vector-store.ts
1299
+ /**
1300
+ * Parse a single JSON line from chunks.jsonl and normalize context fields (old indexes may lack
1301
+ * them).
1302
+ */
1303
+ const parseChunkLine = (line) => {
1304
+ const raw = JSON.parse(line);
1305
+ return {
1306
+ file: typeof raw.file === "string" ? raw.file : "",
1307
+ startLine: typeof raw.startLine === "number" ? raw.startLine : 0,
1308
+ endLine: typeof raw.endLine === "number" ? raw.endLine : 0,
1309
+ text: typeof raw.text === "string" ? raw.text : "",
1310
+ contextBefore: typeof raw.contextBefore === "string" ? raw.contextBefore : null,
1311
+ contextAfter: typeof raw.contextAfter === "string" ? raw.contextAfter : null
1312
+ };
1313
+ };
1314
+ /** Compute dot-product similarity between a chunk vector and the query embedding. */
1315
+ const computeDotProduct = (chunkVector, query) => {
1316
+ let dot = 0;
1317
+ for (let j = 0; j < query.dims; j++) dot += chunkVector[j] * query.vector[j];
1318
+ return dot;
1319
+ };
1076
1320
  const STORE_DIR = ".pix";
1077
1321
  const CHUNKS_FILE = `${STORE_DIR}/chunks.jsonl`;
1078
1322
  const VECTORS_FILE = `${STORE_DIR}/vectors.bin`;
1079
- const isPlatformReason = (cause, reason) => typeof cause === "object" && cause !== null && "reason" in cause && String(cause.reason) === reason;
1323
+ /**
1324
+ * Serialize a Chunk to a JSON object for storage in chunks.jsonl. Always includes context fields
1325
+ * for schema consistency.
1326
+ */
1327
+ const serializeChunk = (c) => ({
1328
+ id: c.id,
1329
+ idx: c.idx,
1330
+ file: c.file,
1331
+ startLine: c.startLine,
1332
+ endLine: c.endLine,
1333
+ text: c.text,
1334
+ contextBefore: c.contextBefore,
1335
+ contextAfter: c.contextAfter
1336
+ });
1080
1337
  /**
1081
1338
  * FileSystem adapter for VectorStore port. Reads from chunks.jsonl and vectors.bin to provide index
1082
1339
  * statistics.
1083
1340
  */
1084
1341
  const make = Effect.gen(function* () {
1085
1342
  const fs = yield* FileSystem.FileSystem;
1343
+ const chunksTemp = `${CHUNKS_FILE}.tmp`;
1344
+ const vectorsTemp = `${VECTORS_FILE}.tmp`;
1345
+ const seenFiles = yield* Ref.make(/* @__PURE__ */ new Set());
1346
+ const statsAccumulator = yield* Ref.make({
1347
+ chunks: 0,
1348
+ files: 0,
1349
+ totalLines: 0,
1350
+ byteSize: 0
1351
+ });
1352
+ const serializeVectors = (embeddings) => {
1353
+ const dims = embeddings[0]?.dims ?? 384;
1354
+ const totalFloats = embeddings.length * dims;
1355
+ const vectorsArray = new Float32Array(totalFloats);
1356
+ for (let i = 0; i < embeddings.length; i++) vectorsArray.set(embeddings[i].vector, i * dims);
1357
+ return Buffer.from(vectorsArray.buffer);
1358
+ };
1086
1359
  /**
1087
1360
  * Count total lines across all chunks in chunks.jsonl. Each line is a JSON object; the 'text'
1088
1361
  * field contains the source code.
@@ -1145,44 +1418,82 @@ const make = Effect.gen(function* () {
1145
1418
  deleted: true
1146
1419
  };
1147
1420
  });
1421
+ const storeBegin = () => Effect.gen(function* () {
1422
+ yield* ensureDirExists(STORE_DIR, ".pix directory");
1423
+ yield* Ref.set(seenFiles, /* @__PURE__ */ new Set());
1424
+ yield* Ref.set(statsAccumulator, {
1425
+ chunks: 0,
1426
+ files: 0,
1427
+ totalLines: 0,
1428
+ byteSize: 0
1429
+ });
1430
+ if (yield* withStoreError(fs.exists(chunksTemp), "check chunks temp")) yield* withStoreError(fs.remove(chunksTemp), "clean stale chunks temp", chunksTemp);
1431
+ if (yield* withStoreError(fs.exists(vectorsTemp), "check vectors temp")) yield* withStoreError(fs.remove(vectorsTemp), "clean stale vectors temp", vectorsTemp);
1432
+ });
1433
+ const storeBatch = (chunks, embeddings) => Effect.gen(function* () {
1434
+ const content = chunks.map((c) => JSON.stringify(serializeChunk(c))).join("\n") + "\n";
1435
+ yield* withStoreError(fs.writeFile(chunksTemp, Buffer.from(content), { flag: "a" }), "append chunks", chunksTemp);
1436
+ const buffer = serializeVectors(embeddings);
1437
+ yield* withStoreError(fs.writeFile(vectorsTemp, buffer, { flag: "a" }), "append vectors", vectorsTemp);
1438
+ const dims = embeddings[0]?.dims ?? 384;
1439
+ const batchLines = chunks.reduce((sum, c) => sum + (c.endLine - c.startLine + 1), 0);
1440
+ const batchBytes = embeddings.length * dims * 4;
1441
+ yield* Ref.update(seenFiles, (prev) => {
1442
+ for (const c of chunks) prev.add(c.file);
1443
+ return prev;
1444
+ });
1445
+ yield* Ref.update(statsAccumulator, (prev) => ({
1446
+ chunks: prev.chunks + chunks.length,
1447
+ files: 0,
1448
+ totalLines: prev.totalLines + batchLines,
1449
+ byteSize: prev.byteSize + batchBytes
1450
+ }));
1451
+ });
1452
+ const storeCommit = () => Effect.gen(function* () {
1453
+ yield* withStoreError(fs.rename(chunksTemp, CHUNKS_FILE), "commit chunks", CHUNKS_FILE);
1454
+ yield* withStoreError(fs.rename(vectorsTemp, VECTORS_FILE), "commit vectors", VECTORS_FILE);
1455
+ const stats = yield* Ref.get(statsAccumulator);
1456
+ const files = yield* Ref.get(seenFiles);
1457
+ yield* Ref.set(seenFiles, /* @__PURE__ */ new Set());
1458
+ return {
1459
+ ...stats,
1460
+ files: files.size
1461
+ };
1462
+ });
1463
+ const storeAbort = () => Effect.gen(function* () {
1464
+ yield* Ref.set(seenFiles, /* @__PURE__ */ new Set());
1465
+ if (yield* withReadError(fs.exists(chunksTemp), "check chunks temp")) yield* withReadError(fs.remove(chunksTemp), "abort chunks temp", chunksTemp);
1466
+ if (yield* withReadError(fs.exists(vectorsTemp), "check vectors temp")) yield* withReadError(fs.remove(vectorsTemp), "abort vectors temp", vectorsTemp);
1467
+ });
1148
1468
  const store = (chunks, embeddings) => Effect.gen(function* () {
1149
1469
  yield* ensureDirExists(STORE_DIR, ".pix directory");
1150
1470
  const chunksTemp = `${CHUNKS_FILE}.tmp`;
1151
- const chunksLines = chunks.map((c) => JSON.stringify({
1152
- id: c.id,
1153
- idx: c.idx,
1154
- file: c.file,
1155
- startLine: c.startLine,
1156
- endLine: c.endLine,
1157
- text: c.text
1158
- }));
1159
- yield* withStoreError(fs.writeFileString(chunksTemp, chunksLines.join("\n")), "write chunks", chunksTemp);
1471
+ const chunksJson = chunks.map((c) => JSON.stringify(serializeChunk(c))).join("\n");
1472
+ yield* withStoreError(fs.writeFileString(chunksTemp, chunksJson), "write chunks", chunksTemp);
1160
1473
  yield* withStoreError(fs.rename(chunksTemp, CHUNKS_FILE), "commit chunks", CHUNKS_FILE);
1161
1474
  const vectorsTemp = `${VECTORS_FILE}.tmp`;
1162
- const dims = embeddings[0]?.dims ?? 384;
1163
- const totalFloats = embeddings.length * dims;
1164
- const vectorsArray = new Float32Array(totalFloats);
1165
- for (let i = 0; i < embeddings.length; i++) vectorsArray.set(embeddings[i].vector, i * dims);
1166
- const buffer = Buffer.from(vectorsArray.buffer);
1475
+ const buffer = serializeVectors(embeddings);
1167
1476
  yield* withStoreError(fs.writeFile(vectorsTemp, buffer), "write vectors", vectorsTemp);
1168
1477
  yield* withStoreError(fs.rename(vectorsTemp, VECTORS_FILE), "commit vectors", VECTORS_FILE);
1169
1478
  });
1170
- const search = (query, topK) => Effect.gen(function* () {
1479
+ const search = (query, options) => Effect.gen(function* () {
1171
1480
  const chunksExists = yield* withReadError(fs.exists(CHUNKS_FILE), "check chunks file");
1172
1481
  const vectorsExists = yield* withReadError(fs.exists(VECTORS_FILE), "check vectors file");
1173
1482
  if (!chunksExists || !vectorsExists) return yield* new NoIndexError({ message: "No index found. Run pix index first." });
1174
1483
  const chunkLines = (yield* withReadError(fs.readFileString(CHUNKS_FILE), "read chunks", CHUNKS_FILE)).split("\n").filter((l) => l.trim().length > 0);
1175
1484
  const vectorsBuffer = yield* withReadError(fs.readFile(VECTORS_FILE), "read vectors", VECTORS_FILE);
1176
- const vectors = new Float32Array(vectorsBuffer.buffer);
1485
+ const vectors = new Float32Array(vectorsBuffer.buffer, vectorsBuffer.byteOffset, vectorsBuffer.byteLength / Float32Array.BYTES_PER_ELEMENT);
1486
+ const ignoreIg = options?.ignorePaths?.length ? ignore().add([...options.ignorePaths]) : null;
1487
+ const onlyIg = options?.onlyPaths?.length ? ignore().add([...options.onlyPaths]) : null;
1177
1488
  const results = [];
1178
1489
  for (let i = 0; i < chunkLines.length; i++) try {
1179
- const chunk = JSON.parse(chunkLines[i]);
1490
+ const chunk = parseChunkLine(chunkLines[i]);
1491
+ if (ignoreIg && ignoreIg.ignores(chunk.file)) continue;
1492
+ if (onlyIg && !onlyIg.ignores(chunk.file)) continue;
1180
1493
  const startIdx = i * query.dims;
1181
- const chunkVector = vectors.slice(startIdx, startIdx + query.dims);
1182
- let dotProduct = 0;
1183
- for (let j = 0; j < query.dims; j++) dotProduct += chunkVector[j] * query.vector[j];
1494
+ const score = computeDotProduct(vectors.slice(startIdx, startIdx + query.dims), query);
1184
1495
  results.push({
1185
- score: dotProduct,
1496
+ score,
1186
1497
  file: chunk.file,
1187
1498
  startLine: chunk.startLine,
1188
1499
  endLine: chunk.endLine,
@@ -1192,7 +1503,10 @@ const make = Effect.gen(function* () {
1192
1503
  });
1193
1504
  } catch {}
1194
1505
  results.sort((a, b) => b.score - a.score);
1195
- return results.slice(0, topK);
1506
+ const topK = options?.topK;
1507
+ if (topK == null) return results;
1508
+ const clamped = Math.max(0, Math.min(Math.floor(topK), results.length));
1509
+ return results.slice(0, clamped);
1196
1510
  });
1197
1511
  const getStatus = () => Effect.gen(function* () {
1198
1512
  const chunksExists = yield* withReadError(fs.exists(CHUNKS_FILE), "check chunks file");
@@ -1232,6 +1546,10 @@ const make = Effect.gen(function* () {
1232
1546
  });
1233
1547
  return {
1234
1548
  store,
1549
+ storeBegin,
1550
+ storeBatch,
1551
+ storeCommit,
1552
+ storeAbort,
1235
1553
  search,
1236
1554
  getStatus,
1237
1555
  reset
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lucas-bur/pix",
3
- "version": "0.10.0",
3
+ "version": "0.12.0",
4
4
  "description": "Lightweight local semantic project indexer",
5
5
  "keywords": [
6
6
  "cli",