@lucas-bur/pix 0.11.0 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +7 -7
  2. package/dist/index.mjs +341 -113
  3. package/package.json +1 -1
package/README.md CHANGED
@@ -19,13 +19,13 @@ pix query "authentication middleware"
19
19
 
20
20
  ## Commands
21
21
 
22
- | Command | Description | JSON flag |
23
- | -------------------- | ------------------------------------------- | --------- |
24
- | `pix init` | Create `.pix/config.json` with defaults | `--json` |
25
- | `pix index` | Scan, chunk, embed, and store project files | `--json` |
26
- | `pix query "<text>"` | Semantic search via cosine similarity | `--json` |
27
- | `pix status` | Show index statistics | `--json` |
28
- | `pix reset` | Delete index files (chunks + vectors) | `--json` |
22
+ | Command | Description | JSON flag |
23
+ | ---------------------------- | -------------------------------------------------------------------------------------------------------------------------------------- | --------- |
24
+ | `pix init` | Create `.pix/config.json` with defaults | `--json` |
25
+ | `pix index` | Scan, chunk, embed, and store project files | `--json` |
26
+ | `pix query "<text>" [flags]` | Semantic search via cosine similarity (`--top`, `--context-lines`, `--ignore-path`, `--only-path`, `--max-characters`, `--no-content`) | `--json` |
27
+ | `pix status` | Show index statistics | `--json` |
28
+ | `pix reset` | Delete index files (chunks + vectors) | `--json` |
29
29
 
30
30
  All commands support `--json` for structured output on stdout — ideal for piping to AI agents.
31
31
 
package/dist/index.mjs CHANGED
@@ -2,12 +2,13 @@
2
2
  import { createRequire } from "node:module";
3
3
  import { Args, CliConfig, Command, Options } from "@effect/cli";
4
4
  import { NodeContext, NodeRuntime } from "@effect/platform-node";
5
- import { Clock, Context, Data, Effect, Exit, Layer, Option, Ref, Stream } from "effect";
5
+ import { Clock, Context, Data, Effect, Exit, Layer, Option, Ref, Schema, Stream } from "effect";
6
6
  import * as Chunk from "effect/Chunk";
7
7
  import { styleText } from "node:util";
8
8
  import * as clack from "@clack/prompts";
9
9
  import { FileSystem } from "@effect/platform";
10
10
  import crypto from "node:crypto";
11
+ import * as ParseResult from "effect/ParseResult";
11
12
  import { env } from "@huggingface/transformers";
12
13
  import ignore from "ignore";
13
14
  //#region src/domain/ports.ts
@@ -154,7 +155,26 @@ const JsonDisplay = { layer: Layer.succeed(Display, {
154
155
  }) };
155
156
  //#endregion
156
157
  //#region src/domain/config.ts
157
- var ConfigError = class extends Data.TaggedError("ConfigError") {};
158
+ const EmbedderConfigSchema = Schema.Struct({
159
+ model: Schema.String,
160
+ device: Schema.Literal("auto", "cpu", "cuda", "dml", "coreml"),
161
+ dtype: Schema.Literal("fp32", "fp16", "q8"),
162
+ batchSize: Schema.Number
163
+ });
164
+ /**
165
+ * Runtime schema for persisted project configuration. Defines the structure and validation rules
166
+ * for `.pix/config.json`.
167
+ */
168
+ const ConfigSchema = Schema.Struct({
169
+ schema: Schema.Literal("1"),
170
+ chunkLines: Schema.Number,
171
+ overlapLines: Schema.Number,
172
+ chunkConcurrency: Schema.optionalWith(Schema.Number, { exact: true }),
173
+ skipExtensions: Schema.Array(Schema.String),
174
+ ignoredPaths: Schema.Array(Schema.String),
175
+ ignoreGitignore: Schema.optionalWith(Schema.Boolean, { exact: true }),
176
+ embedder: EmbedderConfigSchema
177
+ });
158
178
  const DEFAULT_CONFIG = {
159
179
  schema: "1",
160
180
  chunkLines: 60,
@@ -184,11 +204,41 @@ const DEFAULT_CONFIG = {
184
204
  }
185
205
  };
186
206
  //#endregion
207
+ //#region src/lib/extension.ts
208
+ /** Extract the last path segment (filename) from a file path. Handles both `/` and `\\` separators. */
209
+ const getFilename = (path) => {
210
+ const sepIndex = Math.max(path.lastIndexOf("/"), path.lastIndexOf("\\"));
211
+ return sepIndex >= 0 ? path.slice(sepIndex + 1) : path;
212
+ };
213
+ /**
214
+ * Extract the lowercase extension (including dot) from a file path. Used for processor dispatch.
215
+ * Strips the directory, then returns the part after the last dot. If no dot, returns the full
216
+ * filename lowercased.
217
+ */
218
+ const getExtension = (file) => {
219
+ const name = getFilename(file);
220
+ const dotIndex = name.lastIndexOf(".");
221
+ if (dotIndex === -1) return name.toLowerCase();
222
+ return name.slice(dotIndex).toLowerCase();
223
+ };
224
+ /**
225
+ * Extract the extension from a filename (not full path). Returns `"(no extension)"` if no dot
226
+ * exists. Used for display grouping of skipped files.
227
+ */
228
+ const getFileExtension = (filename) => {
229
+ const dotIndex = filename.lastIndexOf(".");
230
+ return dotIndex >= 0 ? filename.slice(dotIndex) : "(no extension)";
231
+ };
232
+ //#endregion
187
233
  //#region src/domain/errors.ts
234
+ /** Generic config I/O failure (read, write, encode). */
235
+ var ConfigError = class extends Data.TaggedError("ConfigError") {};
188
236
  /** Config file or directory does not exist. Run pix init first. */
189
237
  var ConfigNotFoundError = class extends Data.TaggedError("ConfigNotFoundError") {};
190
238
  /** Config file exists but contains invalid JSON. */
191
239
  var ConfigMalformedError = class extends Data.TaggedError("ConfigMalformedError") {};
240
+ /** Config failed schema validation — missing/invalid fields. */
241
+ var ConfigValidationError = class extends Data.TaggedError("ConfigValidationError") {};
192
242
  /** Index files (chunks.jsonl, vectors.bin) do not exist. Run pix index first. */
193
243
  var NoIndexError = class extends Data.TaggedError("NoIndexError") {};
194
244
  /** Disk is full — write operation could not complete. */
@@ -206,6 +256,8 @@ var InferenceError = class extends Data.TaggedError("InferenceError") {};
206
256
  * via ScanResult.skipped.
207
257
  */
208
258
  var ScanFailed = class extends Data.TaggedError("ScanFailed") {};
259
+ /** A chunk line in chunks.jsonl failed schema validation. */
260
+ var ChunkValidationError = class extends Data.TaggedError("ChunkValidationError") {};
209
261
  /** File type is unsupported for text extraction. */
210
262
  var UnsupportedFormat = class extends Data.TaggedError("UnsupportedFormat") {};
211
263
  /** Text extraction failed for a supported file type. */
@@ -315,13 +367,6 @@ const deriveEffectiveConfig = (opts, config) => ({
315
367
  ignoredPaths: opts.ignorePaths ? [...config.ignoredPaths ?? DEFAULT_CONFIG.ignoredPaths, ...opts.ignorePaths] : config.ignoredPaths ?? DEFAULT_CONFIG.ignoredPaths,
316
368
  ignoreGitignore: opts.ignoreGitignore ?? config.ignoreGitignore ?? false
317
369
  });
318
- function getExtension(file) {
319
- const lastSlash = file.lastIndexOf("/");
320
- const name = lastSlash >= 0 ? file.slice(lastSlash + 1) : file;
321
- const dotIndex = name.lastIndexOf(".");
322
- if (dotIndex === -1) return name.toLowerCase();
323
- return name.slice(dotIndex).toLowerCase();
324
- }
325
370
  const classifyFiles = (files, processorMap) => {
326
371
  const knownFiles = [];
327
372
  const skippedFiles = [];
@@ -386,7 +431,8 @@ var IndexProject = class extends Effect.Service()("IndexProject", {
386
431
  chunks: 0,
387
432
  files: 0,
388
433
  totalLines: 0,
389
- byteSize: 0
434
+ byteSize: 0,
435
+ validationErrors: []
390
436
  },
391
437
  durationMs: Date.now() - start
392
438
  };
@@ -401,7 +447,8 @@ var IndexProject = class extends Effect.Service()("IndexProject", {
401
447
  chunks: 0,
402
448
  files: 0,
403
449
  totalLines: 0,
404
- byteSize: 0
450
+ byteSize: 0,
451
+ validationErrors: []
405
452
  },
406
453
  durationMs: Date.now() - start
407
454
  };
@@ -435,7 +482,8 @@ var IndexProject = class extends Effect.Service()("IndexProject", {
435
482
  chunks: stats.chunks,
436
483
  files: stats.files,
437
484
  totalLines: stats.totalLines,
438
- byteSize: stats.byteSize
485
+ byteSize: stats.byteSize,
486
+ validationErrors: []
439
487
  },
440
488
  durationMs: Date.now() - start,
441
489
  embedderFallback: fallbackInfo
@@ -444,11 +492,6 @@ var IndexProject = class extends Effect.Service()("IndexProject", {
444
492
  return { index };
445
493
  })
446
494
  }) {};
447
- const getFilename = (path) => path.split("/").pop() ?? path;
448
- const getFileExtension = (filename) => {
449
- const dotIndex = filename.lastIndexOf(".");
450
- return dotIndex >= 0 ? filename.slice(dotIndex) : "(no extension)";
451
- };
452
495
  const groupByExtension = (entries) => {
453
496
  const byExt = /* @__PURE__ */ new Map();
454
497
  for (const s of entries) {
@@ -504,7 +547,7 @@ var QueryProject = class extends Effect.Service()("QueryProject", {
504
547
  effect: Effect.gen(function* () {
505
548
  const embedder = yield* Embedder;
506
549
  const store = yield* VectorStore;
507
- const queryProject = (queryText, topK) => embedder.embed(queryText).pipe(Effect.flatMap((embedding) => store.search(embedding, topK)));
550
+ const queryProject = (queryText, options) => embedder.embed(queryText).pipe(Effect.flatMap((embedding) => store.search(embedding, options)));
508
551
  return { queryProject };
509
552
  })
510
553
  }) {};
@@ -529,6 +572,8 @@ const errorCodes = {
529
572
  ConfigError: "CONFIG_ERROR",
530
573
  ConfigNotFoundError: "CONFIG_NOT_FOUND",
531
574
  ConfigMalformedError: "CONFIG_MALFORMED",
575
+ ConfigValidationError: "CONFIG_VALIDATION_ERROR",
576
+ ChunkValidationError: "CHUNK_VALIDATION_ERROR",
532
577
  NoIndexError: "NO_INDEX",
533
578
  DiskFullError: "DISK_FULL",
534
579
  StoreError: "STORE_ERROR",
@@ -636,6 +681,51 @@ const initCommand = Command.make("init", { json: Options.boolean("json").pipe(Op
636
681
  DiskFullError: reportError
637
682
  })));
638
683
  //#endregion
684
+ //#region src/lib/format.ts
685
+ /** Format byte count as human-readable string (e.g. "1.5 MB") */
686
+ const formatBytes = (bytes) => {
687
+ if (bytes === 0) return "0 B";
688
+ const units = [
689
+ "B",
690
+ "KB",
691
+ "MB",
692
+ "GB"
693
+ ];
694
+ const i = Math.min(Math.floor(Math.log(bytes) / Math.log(1024)), units.length - 1);
695
+ return `${(bytes / 1024 ** i).toFixed(1)} ${units[i]}`;
696
+ };
697
+ /**
698
+ * Apply a character budget to search results. Returns results in score order capped by the budget.
699
+ * The last result may be truncated to fit the remaining budget. Character count includes file path,
700
+ * line numbers, chunk text, and context lines.
701
+ */
702
+ const applyCharBudget = (results, maxChars) => {
703
+ if (!maxChars || maxChars <= 0) return { results };
704
+ const budgeted = [];
705
+ let remaining = maxChars;
706
+ for (const result of results) {
707
+ const indicator = " [...]";
708
+ const metadata = `${result.file}:${result.startLine}-${result.endLine}\n`;
709
+ const chars = `${metadata}${result.text}${result.contextBefore ? `\n${result.contextBefore}` : ""}${result.contextAfter ? `\n${result.contextAfter}` : ""}`.length;
710
+ if (chars <= remaining) {
711
+ budgeted.push(result);
712
+ remaining -= chars;
713
+ } else {
714
+ const textBudget = remaining - metadata.length - 6;
715
+ if (textBudget <= 0) break;
716
+ const truncated = result.text.slice(0, textBudget);
717
+ budgeted.push({
718
+ ...result,
719
+ text: `${truncated}${indicator}`,
720
+ contextBefore: null,
721
+ contextAfter: null
722
+ });
723
+ break;
724
+ }
725
+ }
726
+ return { results: budgeted };
727
+ };
728
+ //#endregion
639
729
  //#region src/commands/query.ts
640
730
  const DEFAULT_TOP_K = 5;
641
731
  const DEFAULT_CONTEXT_LINES = 0;
@@ -662,31 +752,72 @@ const formatResult = (result) => {
662
752
  const contextAfter = result.contextAfter ? `\n${result.contextAfter}` : "";
663
753
  return `${result.file}:${result.startLine}-${result.endLine} (score: ${result.score.toFixed(3)})${contextBefore}\n${result.text}${contextAfter}`;
664
754
  };
665
- const toJsonOutput = (results, ctxLines) => results.map((r) => ({
755
+ /** Format a result as a lightweight location reference (no text content). */
756
+ const formatLocation = (result) => `${result.file}:${result.startLine}-${result.endLine} (score: ${result.score.toFixed(3)})`;
757
+ /** Build optional content fields for a single JSON output entry. */
758
+ const buildContentFields = (r, ctxLines, noContent) => {
759
+ if (noContent) return {};
760
+ return {
761
+ text: r.text,
762
+ ...ctxLines > 0 && r.contextBefore && { contextBefore: r.contextBefore },
763
+ ...ctxLines > 0 && r.contextAfter && { contextAfter: r.contextAfter }
764
+ };
765
+ };
766
+ const toJsonOutput = (results, ctxLines, noContent = false) => results.map((r) => ({
666
767
  score: r.score,
667
768
  file: r.file,
668
769
  startLine: r.startLine,
669
770
  endLine: r.endLine,
670
- text: r.text,
671
- ...ctxLines > 0 && r.contextBefore && { contextBefore: r.contextBefore },
672
- ...ctxLines > 0 && r.contextAfter && { contextAfter: r.contextAfter }
771
+ ...buildContentFields(r, ctxLines, noContent)
673
772
  }));
674
- /** CLI command: pix query "<text>" [--top N] [--json] [--context-lines N] */
773
+ /** Build SearchOptions from parsed CLI args, clamping topK. */
774
+ const buildSearchOptions = (top, ignorePath, onlyPath) => {
775
+ const rawValue = Option.getOrElse(top, () => DEFAULT_TOP_K);
776
+ const clamped = clampTopK(rawValue);
777
+ return {
778
+ options: {
779
+ topK: clamped.value,
780
+ ...ignorePath.length > 0 && { ignorePaths: [...ignorePath] },
781
+ ...onlyPath.length > 0 && { onlyPaths: [...onlyPath] }
782
+ },
783
+ clamped: clamped.clamped,
784
+ rawValue
785
+ };
786
+ };
787
+ /** Render search results via Display — JSON + human-readable text. */
788
+ const renderResults = (d, response, ctxLines, noContent) => Effect.gen(function* () {
789
+ const { results, validationErrors } = response;
790
+ yield* d.json({
791
+ results: toJsonOutput(results, ctxLines, noContent),
792
+ ...validationErrors.length > 0 && { validationErrors }
793
+ });
794
+ if (results.length === 0) yield* d.log("No results found", "warn");
795
+ else for (const result of results) yield* d.text(noContent ? formatLocation(result) : formatResult(result));
796
+ });
797
+ /**
798
+ * CLI command: pix query "<text>" [--top N] [--json] [--context-lines N] [--ignore-path P]
799
+ * [--only-path P] [--max-characters N] [--no-content]
800
+ */
675
801
  const queryCommand = Command.make("query", {
676
802
  queryText: Args.text({ name: "query" }),
677
803
  top: Options.integer("top").pipe(Options.withDefault(DEFAULT_TOP_K), Options.optional),
678
804
  json: Options.boolean("json").pipe(Options.withDefault(false)),
679
- contextLines: Options.integer("context-lines").pipe(Options.withDefault(DEFAULT_CONTEXT_LINES), Options.optional)
680
- }, ({ queryText, top, contextLines }) => Effect.gen(function* () {
805
+ contextLines: Options.integer("context-lines").pipe(Options.withDefault(DEFAULT_CONTEXT_LINES), Options.optional),
806
+ ignorePath: Options.text("ignore-path").pipe(Options.repeated),
807
+ onlyPath: Options.text("only-path").pipe(Options.repeated),
808
+ maxCharacters: Options.integer("max-characters").pipe(Options.optional),
809
+ noContent: Options.boolean("no-content").pipe(Options.withDefault(false))
810
+ }, ({ queryText, top, contextLines, ignorePath, onlyPath, maxCharacters, noContent }) => Effect.gen(function* () {
681
811
  const d = yield* Display;
682
- const topK = Option.getOrElse(top, () => DEFAULT_TOP_K);
683
812
  const ctxLines = Option.getOrElse(contextLines, () => DEFAULT_CONTEXT_LINES);
684
- const clamped = clampTopK(topK);
685
- if (clamped.clamped) yield* d.log(`topK clamped from ${topK} to ${clamped.value}`, "warn");
686
- const results = yield* d.spinner("Searching...", QueryProject.queryProject(queryText, clamped.value));
687
- yield* d.json(toJsonOutput(results, ctxLines));
688
- if (results.length === 0) yield* d.log("No results found", "warn");
689
- else for (const result of results) yield* d.text(formatResult(result));
813
+ const { options: searchOptions, clamped, rawValue } = buildSearchOptions(top, ignorePath, onlyPath);
814
+ if (clamped) yield* d.log(`topK clamped from ${rawValue} to ${searchOptions.topK}`, "warn");
815
+ const searchResponse = yield* d.spinner("Searching...", QueryProject.queryProject(queryText, searchOptions));
816
+ const finalResults = noContent ? searchResponse.results : applyCharBudget(searchResponse.results, Option.getOrUndefined(maxCharacters)).results;
817
+ yield* renderResults(d, {
818
+ ...searchResponse,
819
+ results: finalResults
820
+ }, ctxLines, noContent);
690
821
  }).pipe(Effect.catchTags({
691
822
  ModelLoadError: reportError,
692
823
  InferenceError: reportError,
@@ -695,20 +826,6 @@ const queryCommand = Command.make("query", {
695
826
  NoIndexError: reportError
696
827
  })));
697
828
  //#endregion
698
- //#region src/lib/format.ts
699
- /** Format byte count as human-readable string (e.g. "1.5 MB") */
700
- const formatBytes = (bytes) => {
701
- if (bytes === 0) return "0 B";
702
- const units = [
703
- "B",
704
- "KB",
705
- "MB",
706
- "GB"
707
- ];
708
- const i = Math.min(Math.floor(Math.log(bytes) / Math.log(1024)), units.length - 1);
709
- return `${(bytes / 1024 ** i).toFixed(1)} ${units[i]}`;
710
- };
711
- //#endregion
712
829
  //#region src/commands/reset.ts
713
830
  /** CLI command: pix reset [--json] */
714
831
  const resetCommand = Command.make("reset", { json: Options.boolean("json").pipe(Options.withDefault(false)) }, () => Effect.gen(function* () {
@@ -747,6 +864,7 @@ const statusCommand = Command.make("status", { json: Options.boolean("json").pip
747
864
  yield* d.log(`Total lines: ${result.totalLines.toLocaleString()}`, "info");
748
865
  yield* d.log(`Index size: ${result.byteSize.toLocaleString()} bytes`, "info");
749
866
  yield* d.log(`Last indexed: ${lastIndexStr}`, "info");
867
+ if (result.validationErrors.length > 0) yield* d.log(`Warnings: ${result.validationErrors[0].message}`, "warn");
750
868
  }).pipe(Effect.catchTags({ StoreError: reportError })));
751
869
  //#endregion
752
870
  //#region src/cli.ts
@@ -822,13 +940,19 @@ const buildChunks = (file, content, config) => {
822
940
  const text = lines.slice(startLine - 1, endLine).join("\n");
823
941
  if (text.length >= MIN_CHUNK_CHARS) {
824
942
  const id = crypto.createHash("sha1").update(`${file}:${startLine}`).digest("hex").slice(0, 12);
943
+ const contextBeforeStart = Math.max(0, startLine - 1 - config.overlapLines);
944
+ const contextBefore = lines.slice(contextBeforeStart, startLine - 1).join("\n");
945
+ const contextAfterEnd = Math.min(lines.length, endLine + config.overlapLines);
946
+ const contextAfter = lines.slice(endLine, contextAfterEnd).join("\n");
825
947
  chunks.push({
826
948
  id,
827
949
  idx,
828
950
  file,
829
951
  startLine,
830
952
  endLine,
831
- text
953
+ text,
954
+ contextBefore: contextBefore || null,
955
+ contextAfter: contextAfter || null
832
956
  });
833
957
  idx++;
834
958
  }
@@ -855,12 +979,59 @@ const make$5 = Effect.gen(function* () {
855
979
  });
856
980
  const ChunkerLive = Layer.effect(Chunker, make$5);
857
981
  //#endregion
982
+ //#region src/lib/platform-error.ts
983
+ /**
984
+ * Check if a platform error has a specific `reason` string (e.g. "BadResource" for disk full,
985
+ * "NotFound" for missing files). Platform errors from @effect/platform include a `reason` property
986
+ * that categorizes the failure.
987
+ */
988
+ const isPlatformReason = (cause, reason) => typeof cause === "object" && cause !== null && "reason" in cause && String(cause.reason) === reason;
989
+ //#endregion
990
+ //#region src/lib/validation.ts
991
+ const mergeMessages = (messages) => {
992
+ if (messages.length === 1) return messages[0];
993
+ const uniq = [...new Set(messages)];
994
+ if (uniq.every((m) => m.startsWith("Expected"))) {
995
+ const actualMatch = uniq[0].match(/actual (.+)$/);
996
+ const actual = actualMatch ? actualMatch[1] : "";
997
+ return `Expected ${uniq.map((m) => m.replace(/^Expected /, "").replace(/, actual .+$/, "")).join(" | ")}, actual ${actual}`;
998
+ }
999
+ return uniq.join("\n");
1000
+ };
1001
+ const formatSchemaErrors = (error) => {
1002
+ const issues = ParseResult.ArrayFormatter.formatErrorSync(error);
1003
+ const byPath = /* @__PURE__ */ new Map();
1004
+ for (const issue of issues) {
1005
+ const path = issue.path.join(".");
1006
+ if (!byPath.has(path)) byPath.set(path, []);
1007
+ byPath.get(path).push(issue.message);
1008
+ }
1009
+ return Array.from(byPath.entries()).map(([path, messages]) => ({
1010
+ path,
1011
+ message: mergeMessages(messages)
1012
+ }));
1013
+ };
1014
+ const formatSchemaMessage = (error) => ParseResult.TreeFormatter.formatErrorSync(error);
1015
+ const isJsonSyntaxError = (error) => error.issue._tag === "Transformation" && error.issue.kind === "Transformation";
1016
+ const decodeJsonWithErrors = (schema, json) => Schema.decodeUnknown(Schema.parseJson(schema))(json).pipe(Effect.mapError((error) => {
1017
+ const base = {
1018
+ message: formatSchemaMessage(error),
1019
+ errors: formatSchemaErrors(error)
1020
+ };
1021
+ return isJsonSyntaxError(error) ? {
1022
+ ...base,
1023
+ _tag: "JsonSyntaxError"
1024
+ } : {
1025
+ ...base,
1026
+ _tag: "SchemaValidationError"
1027
+ };
1028
+ }));
1029
+ //#endregion
858
1030
  //#region src/services/config-store.ts
859
1031
  const CONFIG_DIR = ".pix";
860
1032
  const CONFIG_PATH = `${CONFIG_DIR}/config.json`;
861
- const isPlatformReason$1 = (cause, reason) => typeof cause === "object" && cause !== null && "reason" in cause && String(cause.reason) === reason;
862
1033
  const mapConfigWriteError = (cause, path, action) => {
863
- if (isPlatformReason$1(cause, "BadResource")) return new DiskFullError({
1034
+ if (isPlatformReason(cause, "BadResource")) return new DiskFullError({
864
1035
  message: `Disk full: could not ${action}`,
865
1036
  path,
866
1037
  cause
@@ -873,13 +1044,17 @@ const mapConfigWriteError = (cause, path, action) => {
873
1044
  const make$4 = Effect.gen(function* () {
874
1045
  const fs = yield* FileSystem.FileSystem;
875
1046
  const writeConfig = (config) => Effect.gen(function* () {
876
- const configJson = JSON.stringify(config, null, 2);
1047
+ const encodeJson = Schema.parseJson(ConfigSchema, { space: 2 });
1048
+ const configJson = yield* Schema.encode(encodeJson)(config).pipe(Effect.mapError((e) => new ConfigError({
1049
+ message: "Failed to encode config",
1050
+ cause: e
1051
+ })));
877
1052
  yield* fs.makeDirectory(CONFIG_DIR, { recursive: true }).pipe(Effect.mapError((cause) => mapConfigWriteError(cause, CONFIG_DIR, "create .pix directory")));
878
1053
  yield* fs.writeFileString(CONFIG_PATH, configJson).pipe(Effect.mapError((cause) => mapConfigWriteError(cause, CONFIG_PATH, "write config.json")));
879
1054
  });
880
1055
  const readConfig = () => Effect.gen(function* () {
881
- const content = yield* fs.readFileString(CONFIG_PATH).pipe(Effect.mapError((cause) => {
882
- if (isPlatformReason$1(cause, "NotFound")) return new ConfigNotFoundError({
1056
+ return yield* decodeJsonWithErrors(ConfigSchema, yield* fs.readFileString(CONFIG_PATH).pipe(Effect.mapError((cause) => {
1057
+ if (isPlatformReason(cause, "NotFound")) return new ConfigNotFoundError({
883
1058
  message: "Config file not found. Run pix init first.",
884
1059
  path: CONFIG_PATH,
885
1060
  cause
@@ -888,15 +1063,17 @@ const make$4 = Effect.gen(function* () {
888
1063
  message: "Failed to read config.json",
889
1064
  cause
890
1065
  });
891
- }));
892
- return yield* Effect.try({
893
- try: () => JSON.parse(content),
894
- catch: (error) => new ConfigMalformedError({
1066
+ }))).pipe(Effect.mapError((err) => {
1067
+ if (err._tag === "JsonSyntaxError") return new ConfigMalformedError({
895
1068
  message: "Invalid JSON in config.json",
896
1069
  path: CONFIG_PATH,
897
- cause: error
898
- })
899
- });
1070
+ cause: err
1071
+ });
1072
+ return new ConfigValidationError({
1073
+ message: err.message,
1074
+ errors: err.errors
1075
+ });
1076
+ }));
900
1077
  });
901
1078
  const configExists = () => Effect.gen(function* () {
902
1079
  return yield* fs.exists(CONFIG_PATH);
@@ -914,10 +1091,7 @@ const make$3 = Effect.gen(function* () {
914
1091
  const fs = yield* FileSystem.FileSystem;
915
1092
  const processorMap = buildProcessorMap([]);
916
1093
  const extract = (file) => {
917
- const lastSlash = file.lastIndexOf("/");
918
- const name = lastSlash >= 0 ? file.slice(lastSlash + 1) : file;
919
- const dotIndex = name.lastIndexOf(".");
920
- const ext = dotIndex === -1 ? name.toLowerCase() : name.slice(dotIndex).toLowerCase();
1094
+ const ext = getExtension(file);
921
1095
  const processor = processorMap[ext];
922
1096
  if (!processor) return Effect.fail({
923
1097
  _tag: "UnsupportedFormat",
@@ -1207,11 +1381,46 @@ const make$1 = Effect.gen(function* () {
1207
1381
  });
1208
1382
  const ScannerLive = Layer.effect(Scanner, make$1);
1209
1383
  //#endregion
1384
+ //#region src/domain/chunk.ts
1385
+ /** Runtime schema for persisted/searchable chunk entries. */
1386
+ const ChunkSchema = Schema.Struct({
1387
+ id: Schema.String,
1388
+ idx: Schema.Number,
1389
+ file: Schema.String,
1390
+ startLine: Schema.Number,
1391
+ endLine: Schema.Number,
1392
+ text: Schema.String,
1393
+ contextBefore: Schema.Union(Schema.String, Schema.Null),
1394
+ contextAfter: Schema.Union(Schema.String, Schema.Null)
1395
+ });
1396
+ //#endregion
1210
1397
  //#region src/services/vector-store.ts
1398
+ const parseChunkLine = (line) => {
1399
+ try {
1400
+ return Option.some(Schema.decodeUnknownSync(Schema.parseJson(ChunkSchema))(line));
1401
+ } catch {
1402
+ return Option.none();
1403
+ }
1404
+ };
1405
+ /** Compute dot-product similarity between a chunk vector and the query embedding. */
1406
+ const computeDotProduct = (chunkVector, query) => {
1407
+ let dot = 0;
1408
+ for (let j = 0; j < query.dims; j++) dot += chunkVector[j] * query.vector[j];
1409
+ return dot;
1410
+ };
1211
1411
  const STORE_DIR = ".pix";
1212
1412
  const CHUNKS_FILE = `${STORE_DIR}/chunks.jsonl`;
1213
1413
  const VECTORS_FILE = `${STORE_DIR}/vectors.bin`;
1214
- const isPlatformReason = (cause, reason) => typeof cause === "object" && cause !== null && "reason" in cause && String(cause.reason) === reason;
1414
+ /** Pre-built Schema instance for chunk encode/decode. */
1415
+ const parseJsonChunk = Schema.parseJson(ChunkSchema);
1416
+ /** Build ChunkValidationError array from malformed line count, or [] if none. */
1417
+ const buildChunkValidationErrors = (malformedLines) => malformedLines > 0 ? [new ChunkValidationError({
1418
+ message: `Skipped ${malformedLines} malformed chunk line(s) in chunks.jsonl`,
1419
+ errors: [{
1420
+ path: "chunks.jsonl",
1421
+ message: `${malformedLines} line(s) failed schema validation`
1422
+ }]
1423
+ })] : [];
1215
1424
  /**
1216
1425
  * FileSystem adapter for VectorStore port. Reads from chunks.jsonl and vectors.bin to provide index
1217
1426
  * statistics.
@@ -1238,22 +1447,30 @@ const make = Effect.gen(function* () {
1238
1447
  * Count total lines across all chunks in chunks.jsonl. Each line is a JSON object; the 'text'
1239
1448
  * field contains the source code.
1240
1449
  */
1241
- const countTotalLines = (lines) => lines.reduce((sum, line) => {
1242
- try {
1243
- return sum + JSON.parse(line).text.split("\n").length;
1244
- } catch {
1245
- return sum;
1246
- }
1247
- }, 0);
1248
- /** Count unique files across all chunks in chunks.jsonl. */
1249
- const countUniqueFiles = (lines) => {
1450
+ /** Count files, total lines, and malformed lines in a single pass. */
1451
+ const countChunkStats = (lines) => {
1250
1452
  const files = /* @__PURE__ */ new Set();
1251
- for (const line of lines) try {
1252
- const chunk = JSON.parse(line);
1253
- files.add(chunk.file);
1254
- } catch {}
1255
- return files;
1453
+ let totalLines = 0;
1454
+ let malformedLines = 0;
1455
+ for (const line of lines) {
1456
+ const chunk = parseChunkLine(line);
1457
+ if (Option.isSome(chunk)) {
1458
+ files.add(chunk.value.file);
1459
+ totalLines += chunk.value.text.split("\n").length;
1460
+ } else malformedLines++;
1461
+ }
1462
+ return {
1463
+ files,
1464
+ totalLines,
1465
+ malformedLines
1466
+ };
1256
1467
  };
1468
+ /** Check that index files exist; fail with NoIndexError if either is missing. */
1469
+ const requireIndex = () => Effect.gen(function* () {
1470
+ const chunksExists = yield* withReadError(fs.exists(CHUNKS_FILE), "check chunks file");
1471
+ const vectorsExists = yield* withReadError(fs.exists(VECTORS_FILE), "check vectors file");
1472
+ if (!chunksExists || !vectorsExists) return yield* new NoIndexError({ message: "No index found. Run pix index first." });
1473
+ });
1257
1474
  const toStoreError = (operation, path) => (cause) => {
1258
1475
  if (isPlatformReason(cause, "BadResource")) return new DiskFullError({
1259
1476
  message: `Disk full during ${operation}`,
@@ -1309,14 +1526,10 @@ const make = Effect.gen(function* () {
1309
1526
  if (yield* withStoreError(fs.exists(vectorsTemp), "check vectors temp")) yield* withStoreError(fs.remove(vectorsTemp), "clean stale vectors temp", vectorsTemp);
1310
1527
  });
1311
1528
  const storeBatch = (chunks, embeddings) => Effect.gen(function* () {
1312
- const content = chunks.map((c) => JSON.stringify({
1313
- id: c.id,
1314
- idx: c.idx,
1315
- file: c.file,
1316
- startLine: c.startLine,
1317
- endLine: c.endLine,
1318
- text: c.text
1319
- })).join("\n") + "\n";
1529
+ const content = (yield* Effect.forEach(chunks, (c) => Schema.encode(parseJsonChunk)(c).pipe(Effect.mapError((e) => new StoreError({
1530
+ message: "Failed to encode chunk",
1531
+ cause: e
1532
+ }))))).join("\n") + "\n";
1320
1533
  yield* withStoreError(fs.writeFile(chunksTemp, Buffer.from(content), { flag: "a" }), "append chunks", chunksTemp);
1321
1534
  const buffer = serializeVectors(embeddings);
1322
1535
  yield* withStoreError(fs.writeFile(vectorsTemp, buffer, { flag: "a" }), "append vectors", vectorsTemp);
@@ -1353,37 +1566,39 @@ const make = Effect.gen(function* () {
1353
1566
  const store = (chunks, embeddings) => Effect.gen(function* () {
1354
1567
  yield* ensureDirExists(STORE_DIR, ".pix directory");
1355
1568
  const chunksTemp = `${CHUNKS_FILE}.tmp`;
1356
- const chunksLines = chunks.map((c) => JSON.stringify({
1357
- id: c.id,
1358
- idx: c.idx,
1359
- file: c.file,
1360
- startLine: c.startLine,
1361
- endLine: c.endLine,
1362
- text: c.text
1363
- }));
1364
- yield* withStoreError(fs.writeFileString(chunksTemp, chunksLines.join("\n")), "write chunks", chunksTemp);
1569
+ const chunksJson = (yield* Effect.forEach(chunks, (c) => Schema.encode(parseJsonChunk)(c).pipe(Effect.mapError((e) => new StoreError({
1570
+ message: "Failed to encode chunk",
1571
+ cause: e
1572
+ }))))).join("\n");
1573
+ yield* withStoreError(fs.writeFileString(chunksTemp, chunksJson), "write chunks", chunksTemp);
1365
1574
  yield* withStoreError(fs.rename(chunksTemp, CHUNKS_FILE), "commit chunks", CHUNKS_FILE);
1366
1575
  const vectorsTemp = `${VECTORS_FILE}.tmp`;
1367
1576
  const buffer = serializeVectors(embeddings);
1368
1577
  yield* withStoreError(fs.writeFile(vectorsTemp, buffer), "write vectors", vectorsTemp);
1369
1578
  yield* withStoreError(fs.rename(vectorsTemp, VECTORS_FILE), "commit vectors", VECTORS_FILE);
1370
1579
  });
1371
- const search = (query, topK) => Effect.gen(function* () {
1372
- const chunksExists = yield* withReadError(fs.exists(CHUNKS_FILE), "check chunks file");
1373
- const vectorsExists = yield* withReadError(fs.exists(VECTORS_FILE), "check vectors file");
1374
- if (!chunksExists || !vectorsExists) return yield* new NoIndexError({ message: "No index found. Run pix index first." });
1580
+ const search = (query, options) => Effect.gen(function* () {
1581
+ yield* requireIndex();
1375
1582
  const chunkLines = (yield* withReadError(fs.readFileString(CHUNKS_FILE), "read chunks", CHUNKS_FILE)).split("\n").filter((l) => l.trim().length > 0);
1376
1583
  const vectorsBuffer = yield* withReadError(fs.readFile(VECTORS_FILE), "read vectors", VECTORS_FILE);
1377
- const vectors = new Float32Array(vectorsBuffer.buffer);
1584
+ const vectors = new Float32Array(vectorsBuffer.buffer, vectorsBuffer.byteOffset, vectorsBuffer.byteLength / Float32Array.BYTES_PER_ELEMENT);
1585
+ const ignoreIg = options?.ignorePaths?.length ? ignore().add([...options.ignorePaths]) : null;
1586
+ const onlyIg = options?.onlyPaths?.length ? ignore().add([...options.onlyPaths]) : null;
1378
1587
  const results = [];
1379
- for (let i = 0; i < chunkLines.length; i++) try {
1380
- const chunk = JSON.parse(chunkLines[i]);
1588
+ let malformedLines = 0;
1589
+ for (let i = 0; i < chunkLines.length; i++) {
1590
+ const parsed = parseChunkLine(chunkLines[i]);
1591
+ if (Option.isNone(parsed)) {
1592
+ malformedLines++;
1593
+ continue;
1594
+ }
1595
+ const chunk = parsed.value;
1596
+ if (ignoreIg && ignoreIg.ignores(chunk.file)) continue;
1597
+ if (onlyIg && !onlyIg.ignores(chunk.file)) continue;
1381
1598
  const startIdx = i * query.dims;
1382
- const chunkVector = vectors.slice(startIdx, startIdx + query.dims);
1383
- let dotProduct = 0;
1384
- for (let j = 0; j < query.dims; j++) dotProduct += chunkVector[j] * query.vector[j];
1599
+ const score = computeDotProduct(vectors.slice(startIdx, startIdx + query.dims), query);
1385
1600
  results.push({
1386
- score: dotProduct,
1601
+ score,
1387
1602
  file: chunk.file,
1388
1603
  startLine: chunk.startLine,
1389
1604
  endLine: chunk.endLine,
@@ -1391,9 +1606,19 @@ const make = Effect.gen(function* () {
1391
1606
  contextBefore: chunk.contextBefore,
1392
1607
  contextAfter: chunk.contextAfter
1393
1608
  });
1394
- } catch {}
1609
+ }
1610
+ const validationErrors = buildChunkValidationErrors(malformedLines);
1395
1611
  results.sort((a, b) => b.score - a.score);
1396
- return results.slice(0, topK);
1612
+ const topK = options?.topK;
1613
+ if (topK == null) return {
1614
+ results,
1615
+ validationErrors
1616
+ };
1617
+ const clamped = Math.max(0, Math.min(Math.floor(topK), results.length));
1618
+ return {
1619
+ results: results.slice(0, clamped),
1620
+ validationErrors
1621
+ };
1397
1622
  });
1398
1623
  const getStatus = () => Effect.gen(function* () {
1399
1624
  const chunksExists = yield* withReadError(fs.exists(CHUNKS_FILE), "check chunks file");
@@ -1404,13 +1629,15 @@ const make = Effect.gen(function* () {
1404
1629
  model: "",
1405
1630
  lastIndex: 0,
1406
1631
  totalLines: 0,
1407
- byteSize: 0
1632
+ byteSize: 0,
1633
+ validationErrors: []
1408
1634
  };
1409
1635
  const lines = (yield* withReadError(fs.readFileString(CHUNKS_FILE), "read chunks", CHUNKS_FILE)).split("\n").filter((l) => l.trim().length > 0);
1410
- const chunks = lines.length;
1411
- const files = countUniqueFiles(lines).size;
1636
+ const { files: uniqueFiles, totalLines, malformedLines } = countChunkStats(lines);
1637
+ const chunks = lines.length - malformedLines;
1638
+ const files = uniqueFiles.size;
1412
1639
  const model = "";
1413
- const totalLines = countTotalLines(lines);
1640
+ const validationErrors = buildChunkValidationErrors(malformedLines);
1414
1641
  const vectorsStat = yield* withReadError(fs.stat(VECTORS_FILE), "stat vectors", VECTORS_FILE);
1415
1642
  const byteSize = "size" in vectorsStat ? Number(vectorsStat.size) : 0;
1416
1643
  return {
@@ -1419,7 +1646,8 @@ const make = Effect.gen(function* () {
1419
1646
  model,
1420
1647
  lastIndex: Option.map(vectorsStat?.mtime ?? Option.none(), (d) => d instanceof Date ? d.getTime() : 0).pipe(Option.getOrElse(() => 0)),
1421
1648
  totalLines,
1422
- byteSize
1649
+ byteSize,
1650
+ validationErrors
1423
1651
  };
1424
1652
  });
1425
1653
  const reset = () => Effect.gen(function* () {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lucas-bur/pix",
3
- "version": "0.11.0",
3
+ "version": "0.13.0",
4
4
  "description": "Lightweight local semantic project indexer",
5
5
  "keywords": [
6
6
  "cli",