@lucas-bur/pix 0.12.0 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.mjs +189 -74
  2. package/package.json +1 -1
package/dist/index.mjs CHANGED
@@ -2,12 +2,13 @@
2
2
  import { createRequire } from "node:module";
3
3
  import { Args, CliConfig, Command, Options } from "@effect/cli";
4
4
  import { NodeContext, NodeRuntime } from "@effect/platform-node";
5
- import { Clock, Context, Data, Effect, Exit, Layer, Option, Ref, Stream } from "effect";
5
+ import { Clock, Context, Data, Effect, Exit, Layer, Option, Ref, Schema, Stream } from "effect";
6
6
  import * as Chunk from "effect/Chunk";
7
7
  import { styleText } from "node:util";
8
8
  import * as clack from "@clack/prompts";
9
9
  import { FileSystem } from "@effect/platform";
10
10
  import crypto from "node:crypto";
11
+ import * as ParseResult from "effect/ParseResult";
11
12
  import { env } from "@huggingface/transformers";
12
13
  import ignore from "ignore";
13
14
  //#region src/domain/ports.ts
@@ -154,7 +155,26 @@ const JsonDisplay = { layer: Layer.succeed(Display, {
154
155
  }) };
155
156
  //#endregion
156
157
  //#region src/domain/config.ts
157
- var ConfigError = class extends Data.TaggedError("ConfigError") {};
158
+ const EmbedderConfigSchema = Schema.Struct({
159
+ model: Schema.String,
160
+ device: Schema.Literal("auto", "cpu", "cuda", "dml", "coreml"),
161
+ dtype: Schema.Literal("fp32", "fp16", "q8"),
162
+ batchSize: Schema.Number
163
+ });
164
+ /**
165
+ * Runtime schema for persisted project configuration. Defines the structure and validation rules
166
+ * for `.pix/config.json`.
167
+ */
168
+ const ConfigSchema = Schema.Struct({
169
+ schema: Schema.Literal("1"),
170
+ chunkLines: Schema.Number,
171
+ overlapLines: Schema.Number,
172
+ chunkConcurrency: Schema.optionalWith(Schema.Number, { exact: true }),
173
+ skipExtensions: Schema.Array(Schema.String),
174
+ ignoredPaths: Schema.Array(Schema.String),
175
+ ignoreGitignore: Schema.optionalWith(Schema.Boolean, { exact: true }),
176
+ embedder: EmbedderConfigSchema
177
+ });
158
178
  const DEFAULT_CONFIG = {
159
179
  schema: "1",
160
180
  chunkLines: 60,
@@ -211,10 +231,14 @@ const getFileExtension = (filename) => {
211
231
  };
212
232
  //#endregion
213
233
  //#region src/domain/errors.ts
234
+ /** Generic config I/O failure (read, write, encode). */
235
+ var ConfigError = class extends Data.TaggedError("ConfigError") {};
214
236
  /** Config file or directory does not exist. Run pix init first. */
215
237
  var ConfigNotFoundError = class extends Data.TaggedError("ConfigNotFoundError") {};
216
238
  /** Config file exists but contains invalid JSON. */
217
239
  var ConfigMalformedError = class extends Data.TaggedError("ConfigMalformedError") {};
240
+ /** Config failed schema validation — missing/invalid fields. */
241
+ var ConfigValidationError = class extends Data.TaggedError("ConfigValidationError") {};
218
242
  /** Index files (chunks.jsonl, vectors.bin) do not exist. Run pix index first. */
219
243
  var NoIndexError = class extends Data.TaggedError("NoIndexError") {};
220
244
  /** Disk is full — write operation could not complete. */
@@ -232,6 +256,8 @@ var InferenceError = class extends Data.TaggedError("InferenceError") {};
232
256
  * via ScanResult.skipped.
233
257
  */
234
258
  var ScanFailed = class extends Data.TaggedError("ScanFailed") {};
259
+ /** A chunk line in chunks.jsonl failed schema validation. */
260
+ var ChunkValidationError = class extends Data.TaggedError("ChunkValidationError") {};
235
261
  /** File type is unsupported for text extraction. */
236
262
  var UnsupportedFormat = class extends Data.TaggedError("UnsupportedFormat") {};
237
263
  /** Text extraction failed for a supported file type. */
@@ -405,7 +431,8 @@ var IndexProject = class extends Effect.Service()("IndexProject", {
405
431
  chunks: 0,
406
432
  files: 0,
407
433
  totalLines: 0,
408
- byteSize: 0
434
+ byteSize: 0,
435
+ validationErrors: []
409
436
  },
410
437
  durationMs: Date.now() - start
411
438
  };
@@ -420,7 +447,8 @@ var IndexProject = class extends Effect.Service()("IndexProject", {
420
447
  chunks: 0,
421
448
  files: 0,
422
449
  totalLines: 0,
423
- byteSize: 0
450
+ byteSize: 0,
451
+ validationErrors: []
424
452
  },
425
453
  durationMs: Date.now() - start
426
454
  };
@@ -454,7 +482,8 @@ var IndexProject = class extends Effect.Service()("IndexProject", {
454
482
  chunks: stats.chunks,
455
483
  files: stats.files,
456
484
  totalLines: stats.totalLines,
457
- byteSize: stats.byteSize
485
+ byteSize: stats.byteSize,
486
+ validationErrors: []
458
487
  },
459
488
  durationMs: Date.now() - start,
460
489
  embedderFallback: fallbackInfo
@@ -543,6 +572,8 @@ const errorCodes = {
543
572
  ConfigError: "CONFIG_ERROR",
544
573
  ConfigNotFoundError: "CONFIG_NOT_FOUND",
545
574
  ConfigMalformedError: "CONFIG_MALFORMED",
575
+ ConfigValidationError: "CONFIG_VALIDATION_ERROR",
576
+ ChunkValidationError: "CHUNK_VALIDATION_ERROR",
546
577
  NoIndexError: "NO_INDEX",
547
578
  DiskFullError: "DISK_FULL",
548
579
  StoreError: "STORE_ERROR",
@@ -754,8 +785,12 @@ const buildSearchOptions = (top, ignorePath, onlyPath) => {
754
785
  };
755
786
  };
756
787
  /** Render search results via Display — JSON + human-readable text. */
757
- const renderResults = (d, results, ctxLines, noContent) => Effect.gen(function* () {
758
- yield* d.json(toJsonOutput(results, ctxLines, noContent));
788
+ const renderResults = (d, response, ctxLines, noContent) => Effect.gen(function* () {
789
+ const { results, validationErrors } = response;
790
+ yield* d.json({
791
+ results: toJsonOutput(results, ctxLines, noContent),
792
+ ...validationErrors.length > 0 && { validationErrors }
793
+ });
759
794
  if (results.length === 0) yield* d.log("No results found", "warn");
760
795
  else for (const result of results) yield* d.text(noContent ? formatLocation(result) : formatResult(result));
761
796
  });
@@ -777,8 +812,12 @@ const queryCommand = Command.make("query", {
777
812
  const ctxLines = Option.getOrElse(contextLines, () => DEFAULT_CONTEXT_LINES);
778
813
  const { options: searchOptions, clamped, rawValue } = buildSearchOptions(top, ignorePath, onlyPath);
779
814
  if (clamped) yield* d.log(`topK clamped from ${rawValue} to ${searchOptions.topK}`, "warn");
780
- const results = yield* d.spinner("Searching...", QueryProject.queryProject(queryText, searchOptions));
781
- yield* renderResults(d, noContent ? results : applyCharBudget(results, Option.getOrUndefined(maxCharacters)).results, ctxLines, noContent);
815
+ const searchResponse = yield* d.spinner("Searching...", QueryProject.queryProject(queryText, searchOptions));
816
+ const finalResults = noContent ? searchResponse.results : applyCharBudget(searchResponse.results, Option.getOrUndefined(maxCharacters)).results;
817
+ yield* renderResults(d, {
818
+ ...searchResponse,
819
+ results: finalResults
820
+ }, ctxLines, noContent);
782
821
  }).pipe(Effect.catchTags({
783
822
  ModelLoadError: reportError,
784
823
  InferenceError: reportError,
@@ -825,6 +864,7 @@ const statusCommand = Command.make("status", { json: Options.boolean("json").pip
825
864
  yield* d.log(`Total lines: ${result.totalLines.toLocaleString()}`, "info");
826
865
  yield* d.log(`Index size: ${result.byteSize.toLocaleString()} bytes`, "info");
827
866
  yield* d.log(`Last indexed: ${lastIndexStr}`, "info");
867
+ if (result.validationErrors.length > 0) yield* d.log(`Warnings: ${result.validationErrors[0].message}`, "warn");
828
868
  }).pipe(Effect.catchTags({ StoreError: reportError })));
829
869
  //#endregion
830
870
  //#region src/cli.ts
@@ -947,6 +987,46 @@ const ChunkerLive = Layer.effect(Chunker, make$5);
947
987
  */
948
988
  const isPlatformReason = (cause, reason) => typeof cause === "object" && cause !== null && "reason" in cause && String(cause.reason) === reason;
949
989
  //#endregion
990
+ //#region src/lib/validation.ts
991
+ const mergeMessages = (messages) => {
992
+ if (messages.length === 1) return messages[0];
993
+ const uniq = [...new Set(messages)];
994
+ if (uniq.every((m) => m.startsWith("Expected"))) {
995
+ const actualMatch = uniq[0].match(/actual (.+)$/);
996
+ const actual = actualMatch ? actualMatch[1] : "";
997
+ return `Expected ${uniq.map((m) => m.replace(/^Expected /, "").replace(/, actual .+$/, "")).join(" | ")}, actual ${actual}`;
998
+ }
999
+ return uniq.join("\n");
1000
+ };
1001
+ const formatSchemaErrors = (error) => {
1002
+ const issues = ParseResult.ArrayFormatter.formatErrorSync(error);
1003
+ const byPath = /* @__PURE__ */ new Map();
1004
+ for (const issue of issues) {
1005
+ const path = issue.path.join(".");
1006
+ if (!byPath.has(path)) byPath.set(path, []);
1007
+ byPath.get(path).push(issue.message);
1008
+ }
1009
+ return Array.from(byPath.entries()).map(([path, messages]) => ({
1010
+ path,
1011
+ message: mergeMessages(messages)
1012
+ }));
1013
+ };
1014
+ const formatSchemaMessage = (error) => ParseResult.TreeFormatter.formatErrorSync(error);
1015
+ const isJsonSyntaxError = (error) => error.issue._tag === "Transformation" && error.issue.kind === "Transformation";
1016
+ const decodeJsonWithErrors = (schema, json) => Schema.decodeUnknown(Schema.parseJson(schema))(json).pipe(Effect.mapError((error) => {
1017
+ const base = {
1018
+ message: formatSchemaMessage(error),
1019
+ errors: formatSchemaErrors(error)
1020
+ };
1021
+ return isJsonSyntaxError(error) ? {
1022
+ ...base,
1023
+ _tag: "JsonSyntaxError"
1024
+ } : {
1025
+ ...base,
1026
+ _tag: "SchemaValidationError"
1027
+ };
1028
+ }));
1029
+ //#endregion
950
1030
  //#region src/services/config-store.ts
951
1031
  const CONFIG_DIR = ".pix";
952
1032
  const CONFIG_PATH = `${CONFIG_DIR}/config.json`;
@@ -964,12 +1044,16 @@ const mapConfigWriteError = (cause, path, action) => {
964
1044
  const make$4 = Effect.gen(function* () {
965
1045
  const fs = yield* FileSystem.FileSystem;
966
1046
  const writeConfig = (config) => Effect.gen(function* () {
967
- const configJson = JSON.stringify(config, null, 2);
1047
+ const encodeJson = Schema.parseJson(ConfigSchema, { space: 2 });
1048
+ const configJson = yield* Schema.encode(encodeJson)(config).pipe(Effect.mapError((e) => new ConfigError({
1049
+ message: "Failed to encode config",
1050
+ cause: e
1051
+ })));
968
1052
  yield* fs.makeDirectory(CONFIG_DIR, { recursive: true }).pipe(Effect.mapError((cause) => mapConfigWriteError(cause, CONFIG_DIR, "create .pix directory")));
969
1053
  yield* fs.writeFileString(CONFIG_PATH, configJson).pipe(Effect.mapError((cause) => mapConfigWriteError(cause, CONFIG_PATH, "write config.json")));
970
1054
  });
971
1055
  const readConfig = () => Effect.gen(function* () {
972
- const content = yield* fs.readFileString(CONFIG_PATH).pipe(Effect.mapError((cause) => {
1056
+ return yield* decodeJsonWithErrors(ConfigSchema, yield* fs.readFileString(CONFIG_PATH).pipe(Effect.mapError((cause) => {
973
1057
  if (isPlatformReason(cause, "NotFound")) return new ConfigNotFoundError({
974
1058
  message: "Config file not found. Run pix init first.",
975
1059
  path: CONFIG_PATH,
@@ -979,15 +1063,17 @@ const make$4 = Effect.gen(function* () {
979
1063
  message: "Failed to read config.json",
980
1064
  cause
981
1065
  });
982
- }));
983
- return yield* Effect.try({
984
- try: () => JSON.parse(content),
985
- catch: (error) => new ConfigMalformedError({
1066
+ }))).pipe(Effect.mapError((err) => {
1067
+ if (err._tag === "JsonSyntaxError") return new ConfigMalformedError({
986
1068
  message: "Invalid JSON in config.json",
987
1069
  path: CONFIG_PATH,
988
- cause: error
989
- })
990
- });
1070
+ cause: err
1071
+ });
1072
+ return new ConfigValidationError({
1073
+ message: err.message,
1074
+ errors: err.errors
1075
+ });
1076
+ }));
991
1077
  });
992
1078
  const configExists = () => Effect.gen(function* () {
993
1079
  return yield* fs.exists(CONFIG_PATH);
@@ -1295,21 +1381,26 @@ const make$1 = Effect.gen(function* () {
1295
1381
  });
1296
1382
  const ScannerLive = Layer.effect(Scanner, make$1);
1297
1383
  //#endregion
1384
+ //#region src/domain/chunk.ts
1385
+ /** Runtime schema for persisted/searchable chunk entries. */
1386
+ const ChunkSchema = Schema.Struct({
1387
+ id: Schema.String,
1388
+ idx: Schema.Number,
1389
+ file: Schema.String,
1390
+ startLine: Schema.Number,
1391
+ endLine: Schema.Number,
1392
+ text: Schema.String,
1393
+ contextBefore: Schema.Union(Schema.String, Schema.Null),
1394
+ contextAfter: Schema.Union(Schema.String, Schema.Null)
1395
+ });
1396
+ //#endregion
1298
1397
  //#region src/services/vector-store.ts
1299
- /**
1300
- * Parse a single JSON line from chunks.jsonl and normalize context fields (old indexes may lack
1301
- * them).
1302
- */
1303
1398
  const parseChunkLine = (line) => {
1304
- const raw = JSON.parse(line);
1305
- return {
1306
- file: typeof raw.file === "string" ? raw.file : "",
1307
- startLine: typeof raw.startLine === "number" ? raw.startLine : 0,
1308
- endLine: typeof raw.endLine === "number" ? raw.endLine : 0,
1309
- text: typeof raw.text === "string" ? raw.text : "",
1310
- contextBefore: typeof raw.contextBefore === "string" ? raw.contextBefore : null,
1311
- contextAfter: typeof raw.contextAfter === "string" ? raw.contextAfter : null
1312
- };
1399
+ try {
1400
+ return Option.some(Schema.decodeUnknownSync(Schema.parseJson(ChunkSchema))(line));
1401
+ } catch {
1402
+ return Option.none();
1403
+ }
1313
1404
  };
1314
1405
  /** Compute dot-product similarity between a chunk vector and the query embedding. */
1315
1406
  const computeDotProduct = (chunkVector, query) => {
@@ -1320,20 +1411,16 @@ const computeDotProduct = (chunkVector, query) => {
1320
1411
  const STORE_DIR = ".pix";
1321
1412
  const CHUNKS_FILE = `${STORE_DIR}/chunks.jsonl`;
1322
1413
  const VECTORS_FILE = `${STORE_DIR}/vectors.bin`;
1323
- /**
1324
- * Serialize a Chunk to a JSON object for storage in chunks.jsonl. Always includes context fields
1325
- * for schema consistency.
1326
- */
1327
- const serializeChunk = (c) => ({
1328
- id: c.id,
1329
- idx: c.idx,
1330
- file: c.file,
1331
- startLine: c.startLine,
1332
- endLine: c.endLine,
1333
- text: c.text,
1334
- contextBefore: c.contextBefore,
1335
- contextAfter: c.contextAfter
1336
- });
1414
+ /** Pre-built Schema instance for chunk encode/decode. */
1415
+ const parseJsonChunk = Schema.parseJson(ChunkSchema);
1416
+ /** Build ChunkValidationError array from malformed line count, or [] if none. */
1417
+ const buildChunkValidationErrors = (malformedLines) => malformedLines > 0 ? [new ChunkValidationError({
1418
+ message: `Skipped ${malformedLines} malformed chunk line(s) in chunks.jsonl`,
1419
+ errors: [{
1420
+ path: "chunks.jsonl",
1421
+ message: `${malformedLines} line(s) failed schema validation`
1422
+ }]
1423
+ })] : [];
1337
1424
  /**
1338
1425
  * FileSystem adapter for VectorStore port. Reads from chunks.jsonl and vectors.bin to provide index
1339
1426
  * statistics.
@@ -1360,22 +1447,30 @@ const make = Effect.gen(function* () {
1360
1447
  * Count total lines across all chunks in chunks.jsonl. Each line is a JSON object; the 'text'
1361
1448
  * field contains the source code.
1362
1449
  */
1363
- const countTotalLines = (lines) => lines.reduce((sum, line) => {
1364
- try {
1365
- return sum + JSON.parse(line).text.split("\n").length;
1366
- } catch {
1367
- return sum;
1368
- }
1369
- }, 0);
1370
- /** Count unique files across all chunks in chunks.jsonl. */
1371
- const countUniqueFiles = (lines) => {
1450
+ /** Count files, total lines, and malformed lines in a single pass. */
1451
+ const countChunkStats = (lines) => {
1372
1452
  const files = /* @__PURE__ */ new Set();
1373
- for (const line of lines) try {
1374
- const chunk = JSON.parse(line);
1375
- files.add(chunk.file);
1376
- } catch {}
1377
- return files;
1453
+ let totalLines = 0;
1454
+ let malformedLines = 0;
1455
+ for (const line of lines) {
1456
+ const chunk = parseChunkLine(line);
1457
+ if (Option.isSome(chunk)) {
1458
+ files.add(chunk.value.file);
1459
+ totalLines += chunk.value.text.split("\n").length;
1460
+ } else malformedLines++;
1461
+ }
1462
+ return {
1463
+ files,
1464
+ totalLines,
1465
+ malformedLines
1466
+ };
1378
1467
  };
1468
+ /** Check that index files exist; fail with NoIndexError if either is missing. */
1469
+ const requireIndex = () => Effect.gen(function* () {
1470
+ const chunksExists = yield* withReadError(fs.exists(CHUNKS_FILE), "check chunks file");
1471
+ const vectorsExists = yield* withReadError(fs.exists(VECTORS_FILE), "check vectors file");
1472
+ if (!chunksExists || !vectorsExists) return yield* new NoIndexError({ message: "No index found. Run pix index first." });
1473
+ });
1379
1474
  const toStoreError = (operation, path) => (cause) => {
1380
1475
  if (isPlatformReason(cause, "BadResource")) return new DiskFullError({
1381
1476
  message: `Disk full during ${operation}`,
@@ -1431,7 +1526,10 @@ const make = Effect.gen(function* () {
1431
1526
  if (yield* withStoreError(fs.exists(vectorsTemp), "check vectors temp")) yield* withStoreError(fs.remove(vectorsTemp), "clean stale vectors temp", vectorsTemp);
1432
1527
  });
1433
1528
  const storeBatch = (chunks, embeddings) => Effect.gen(function* () {
1434
- const content = chunks.map((c) => JSON.stringify(serializeChunk(c))).join("\n") + "\n";
1529
+ const content = (yield* Effect.forEach(chunks, (c) => Schema.encode(parseJsonChunk)(c).pipe(Effect.mapError((e) => new StoreError({
1530
+ message: "Failed to encode chunk",
1531
+ cause: e
1532
+ }))))).join("\n") + "\n";
1435
1533
  yield* withStoreError(fs.writeFile(chunksTemp, Buffer.from(content), { flag: "a" }), "append chunks", chunksTemp);
1436
1534
  const buffer = serializeVectors(embeddings);
1437
1535
  yield* withStoreError(fs.writeFile(vectorsTemp, buffer, { flag: "a" }), "append vectors", vectorsTemp);
@@ -1468,7 +1566,10 @@ const make = Effect.gen(function* () {
1468
1566
  const store = (chunks, embeddings) => Effect.gen(function* () {
1469
1567
  yield* ensureDirExists(STORE_DIR, ".pix directory");
1470
1568
  const chunksTemp = `${CHUNKS_FILE}.tmp`;
1471
- const chunksJson = chunks.map((c) => JSON.stringify(serializeChunk(c))).join("\n");
1569
+ const chunksJson = (yield* Effect.forEach(chunks, (c) => Schema.encode(parseJsonChunk)(c).pipe(Effect.mapError((e) => new StoreError({
1570
+ message: "Failed to encode chunk",
1571
+ cause: e
1572
+ }))))).join("\n");
1472
1573
  yield* withStoreError(fs.writeFileString(chunksTemp, chunksJson), "write chunks", chunksTemp);
1473
1574
  yield* withStoreError(fs.rename(chunksTemp, CHUNKS_FILE), "commit chunks", CHUNKS_FILE);
1474
1575
  const vectorsTemp = `${VECTORS_FILE}.tmp`;
@@ -1477,17 +1578,21 @@ const make = Effect.gen(function* () {
1477
1578
  yield* withStoreError(fs.rename(vectorsTemp, VECTORS_FILE), "commit vectors", VECTORS_FILE);
1478
1579
  });
1479
1580
  const search = (query, options) => Effect.gen(function* () {
1480
- const chunksExists = yield* withReadError(fs.exists(CHUNKS_FILE), "check chunks file");
1481
- const vectorsExists = yield* withReadError(fs.exists(VECTORS_FILE), "check vectors file");
1482
- if (!chunksExists || !vectorsExists) return yield* new NoIndexError({ message: "No index found. Run pix index first." });
1581
+ yield* requireIndex();
1483
1582
  const chunkLines = (yield* withReadError(fs.readFileString(CHUNKS_FILE), "read chunks", CHUNKS_FILE)).split("\n").filter((l) => l.trim().length > 0);
1484
1583
  const vectorsBuffer = yield* withReadError(fs.readFile(VECTORS_FILE), "read vectors", VECTORS_FILE);
1485
1584
  const vectors = new Float32Array(vectorsBuffer.buffer, vectorsBuffer.byteOffset, vectorsBuffer.byteLength / Float32Array.BYTES_PER_ELEMENT);
1486
1585
  const ignoreIg = options?.ignorePaths?.length ? ignore().add([...options.ignorePaths]) : null;
1487
1586
  const onlyIg = options?.onlyPaths?.length ? ignore().add([...options.onlyPaths]) : null;
1488
1587
  const results = [];
1489
- for (let i = 0; i < chunkLines.length; i++) try {
1490
- const chunk = parseChunkLine(chunkLines[i]);
1588
+ let malformedLines = 0;
1589
+ for (let i = 0; i < chunkLines.length; i++) {
1590
+ const parsed = parseChunkLine(chunkLines[i]);
1591
+ if (Option.isNone(parsed)) {
1592
+ malformedLines++;
1593
+ continue;
1594
+ }
1595
+ const chunk = parsed.value;
1491
1596
  if (ignoreIg && ignoreIg.ignores(chunk.file)) continue;
1492
1597
  if (onlyIg && !onlyIg.ignores(chunk.file)) continue;
1493
1598
  const startIdx = i * query.dims;
@@ -1501,12 +1606,19 @@ const make = Effect.gen(function* () {
1501
1606
  contextBefore: chunk.contextBefore,
1502
1607
  contextAfter: chunk.contextAfter
1503
1608
  });
1504
- } catch {}
1609
+ }
1610
+ const validationErrors = buildChunkValidationErrors(malformedLines);
1505
1611
  results.sort((a, b) => b.score - a.score);
1506
1612
  const topK = options?.topK;
1507
- if (topK == null) return results;
1613
+ if (topK == null) return {
1614
+ results,
1615
+ validationErrors
1616
+ };
1508
1617
  const clamped = Math.max(0, Math.min(Math.floor(topK), results.length));
1509
- return results.slice(0, clamped);
1618
+ return {
1619
+ results: results.slice(0, clamped),
1620
+ validationErrors
1621
+ };
1510
1622
  });
1511
1623
  const getStatus = () => Effect.gen(function* () {
1512
1624
  const chunksExists = yield* withReadError(fs.exists(CHUNKS_FILE), "check chunks file");
@@ -1517,13 +1629,15 @@ const make = Effect.gen(function* () {
1517
1629
  model: "",
1518
1630
  lastIndex: 0,
1519
1631
  totalLines: 0,
1520
- byteSize: 0
1632
+ byteSize: 0,
1633
+ validationErrors: []
1521
1634
  };
1522
1635
  const lines = (yield* withReadError(fs.readFileString(CHUNKS_FILE), "read chunks", CHUNKS_FILE)).split("\n").filter((l) => l.trim().length > 0);
1523
- const chunks = lines.length;
1524
- const files = countUniqueFiles(lines).size;
1636
+ const { files: uniqueFiles, totalLines, malformedLines } = countChunkStats(lines);
1637
+ const chunks = lines.length - malformedLines;
1638
+ const files = uniqueFiles.size;
1525
1639
  const model = "";
1526
- const totalLines = countTotalLines(lines);
1640
+ const validationErrors = buildChunkValidationErrors(malformedLines);
1527
1641
  const vectorsStat = yield* withReadError(fs.stat(VECTORS_FILE), "stat vectors", VECTORS_FILE);
1528
1642
  const byteSize = "size" in vectorsStat ? Number(vectorsStat.size) : 0;
1529
1643
  return {
@@ -1532,7 +1646,8 @@ const make = Effect.gen(function* () {
1532
1646
  model,
1533
1647
  lastIndex: Option.map(vectorsStat?.mtime ?? Option.none(), (d) => d instanceof Date ? d.getTime() : 0).pipe(Option.getOrElse(() => 0)),
1534
1648
  totalLines,
1535
- byteSize
1649
+ byteSize,
1650
+ validationErrors
1536
1651
  };
1537
1652
  });
1538
1653
  const reset = () => Effect.gen(function* () {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lucas-bur/pix",
3
- "version": "0.12.0",
3
+ "version": "0.13.0",
4
4
  "description": "Lightweight local semantic project indexer",
5
5
  "keywords": [
6
6
  "cli",