@lucas-bur/pix 0.12.0 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.mjs +189 -74
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -2,12 +2,13 @@
|
|
|
2
2
|
import { createRequire } from "node:module";
|
|
3
3
|
import { Args, CliConfig, Command, Options } from "@effect/cli";
|
|
4
4
|
import { NodeContext, NodeRuntime } from "@effect/platform-node";
|
|
5
|
-
import { Clock, Context, Data, Effect, Exit, Layer, Option, Ref, Stream } from "effect";
|
|
5
|
+
import { Clock, Context, Data, Effect, Exit, Layer, Option, Ref, Schema, Stream } from "effect";
|
|
6
6
|
import * as Chunk from "effect/Chunk";
|
|
7
7
|
import { styleText } from "node:util";
|
|
8
8
|
import * as clack from "@clack/prompts";
|
|
9
9
|
import { FileSystem } from "@effect/platform";
|
|
10
10
|
import crypto from "node:crypto";
|
|
11
|
+
import * as ParseResult from "effect/ParseResult";
|
|
11
12
|
import { env } from "@huggingface/transformers";
|
|
12
13
|
import ignore from "ignore";
|
|
13
14
|
//#region src/domain/ports.ts
|
|
@@ -154,7 +155,26 @@ const JsonDisplay = { layer: Layer.succeed(Display, {
|
|
|
154
155
|
}) };
|
|
155
156
|
//#endregion
|
|
156
157
|
//#region src/domain/config.ts
|
|
157
|
-
|
|
158
|
+
const EmbedderConfigSchema = Schema.Struct({
|
|
159
|
+
model: Schema.String,
|
|
160
|
+
device: Schema.Literal("auto", "cpu", "cuda", "dml", "coreml"),
|
|
161
|
+
dtype: Schema.Literal("fp32", "fp16", "q8"),
|
|
162
|
+
batchSize: Schema.Number
|
|
163
|
+
});
|
|
164
|
+
/**
|
|
165
|
+
* Runtime schema for persisted project configuration. Defines the structure and validation rules
|
|
166
|
+
* for `.pix/config.json`.
|
|
167
|
+
*/
|
|
168
|
+
const ConfigSchema = Schema.Struct({
|
|
169
|
+
schema: Schema.Literal("1"),
|
|
170
|
+
chunkLines: Schema.Number,
|
|
171
|
+
overlapLines: Schema.Number,
|
|
172
|
+
chunkConcurrency: Schema.optionalWith(Schema.Number, { exact: true }),
|
|
173
|
+
skipExtensions: Schema.Array(Schema.String),
|
|
174
|
+
ignoredPaths: Schema.Array(Schema.String),
|
|
175
|
+
ignoreGitignore: Schema.optionalWith(Schema.Boolean, { exact: true }),
|
|
176
|
+
embedder: EmbedderConfigSchema
|
|
177
|
+
});
|
|
158
178
|
const DEFAULT_CONFIG = {
|
|
159
179
|
schema: "1",
|
|
160
180
|
chunkLines: 60,
|
|
@@ -211,10 +231,14 @@ const getFileExtension = (filename) => {
|
|
|
211
231
|
};
|
|
212
232
|
//#endregion
|
|
213
233
|
//#region src/domain/errors.ts
|
|
234
|
+
/** Generic config I/O failure (read, write, encode). */
|
|
235
|
+
var ConfigError = class extends Data.TaggedError("ConfigError") {};
|
|
214
236
|
/** Config file or directory does not exist. Run pix init first. */
|
|
215
237
|
var ConfigNotFoundError = class extends Data.TaggedError("ConfigNotFoundError") {};
|
|
216
238
|
/** Config file exists but contains invalid JSON. */
|
|
217
239
|
var ConfigMalformedError = class extends Data.TaggedError("ConfigMalformedError") {};
|
|
240
|
+
/** Config failed schema validation — missing/invalid fields. */
|
|
241
|
+
var ConfigValidationError = class extends Data.TaggedError("ConfigValidationError") {};
|
|
218
242
|
/** Index files (chunks.jsonl, vectors.bin) do not exist. Run pix index first. */
|
|
219
243
|
var NoIndexError = class extends Data.TaggedError("NoIndexError") {};
|
|
220
244
|
/** Disk is full — write operation could not complete. */
|
|
@@ -232,6 +256,8 @@ var InferenceError = class extends Data.TaggedError("InferenceError") {};
|
|
|
232
256
|
* via ScanResult.skipped.
|
|
233
257
|
*/
|
|
234
258
|
var ScanFailed = class extends Data.TaggedError("ScanFailed") {};
|
|
259
|
+
/** A chunk line in chunks.jsonl failed schema validation. */
|
|
260
|
+
var ChunkValidationError = class extends Data.TaggedError("ChunkValidationError") {};
|
|
235
261
|
/** File type is unsupported for text extraction. */
|
|
236
262
|
var UnsupportedFormat = class extends Data.TaggedError("UnsupportedFormat") {};
|
|
237
263
|
/** Text extraction failed for a supported file type. */
|
|
@@ -405,7 +431,8 @@ var IndexProject = class extends Effect.Service()("IndexProject", {
|
|
|
405
431
|
chunks: 0,
|
|
406
432
|
files: 0,
|
|
407
433
|
totalLines: 0,
|
|
408
|
-
byteSize: 0
|
|
434
|
+
byteSize: 0,
|
|
435
|
+
validationErrors: []
|
|
409
436
|
},
|
|
410
437
|
durationMs: Date.now() - start
|
|
411
438
|
};
|
|
@@ -420,7 +447,8 @@ var IndexProject = class extends Effect.Service()("IndexProject", {
|
|
|
420
447
|
chunks: 0,
|
|
421
448
|
files: 0,
|
|
422
449
|
totalLines: 0,
|
|
423
|
-
byteSize: 0
|
|
450
|
+
byteSize: 0,
|
|
451
|
+
validationErrors: []
|
|
424
452
|
},
|
|
425
453
|
durationMs: Date.now() - start
|
|
426
454
|
};
|
|
@@ -454,7 +482,8 @@ var IndexProject = class extends Effect.Service()("IndexProject", {
|
|
|
454
482
|
chunks: stats.chunks,
|
|
455
483
|
files: stats.files,
|
|
456
484
|
totalLines: stats.totalLines,
|
|
457
|
-
byteSize: stats.byteSize
|
|
485
|
+
byteSize: stats.byteSize,
|
|
486
|
+
validationErrors: []
|
|
458
487
|
},
|
|
459
488
|
durationMs: Date.now() - start,
|
|
460
489
|
embedderFallback: fallbackInfo
|
|
@@ -543,6 +572,8 @@ const errorCodes = {
|
|
|
543
572
|
ConfigError: "CONFIG_ERROR",
|
|
544
573
|
ConfigNotFoundError: "CONFIG_NOT_FOUND",
|
|
545
574
|
ConfigMalformedError: "CONFIG_MALFORMED",
|
|
575
|
+
ConfigValidationError: "CONFIG_VALIDATION_ERROR",
|
|
576
|
+
ChunkValidationError: "CHUNK_VALIDATION_ERROR",
|
|
546
577
|
NoIndexError: "NO_INDEX",
|
|
547
578
|
DiskFullError: "DISK_FULL",
|
|
548
579
|
StoreError: "STORE_ERROR",
|
|
@@ -754,8 +785,12 @@ const buildSearchOptions = (top, ignorePath, onlyPath) => {
|
|
|
754
785
|
};
|
|
755
786
|
};
|
|
756
787
|
/** Render search results via Display — JSON + human-readable text. */
|
|
757
|
-
const renderResults = (d,
|
|
758
|
-
|
|
788
|
+
const renderResults = (d, response, ctxLines, noContent) => Effect.gen(function* () {
|
|
789
|
+
const { results, validationErrors } = response;
|
|
790
|
+
yield* d.json({
|
|
791
|
+
results: toJsonOutput(results, ctxLines, noContent),
|
|
792
|
+
...validationErrors.length > 0 && { validationErrors }
|
|
793
|
+
});
|
|
759
794
|
if (results.length === 0) yield* d.log("No results found", "warn");
|
|
760
795
|
else for (const result of results) yield* d.text(noContent ? formatLocation(result) : formatResult(result));
|
|
761
796
|
});
|
|
@@ -777,8 +812,12 @@ const queryCommand = Command.make("query", {
|
|
|
777
812
|
const ctxLines = Option.getOrElse(contextLines, () => DEFAULT_CONTEXT_LINES);
|
|
778
813
|
const { options: searchOptions, clamped, rawValue } = buildSearchOptions(top, ignorePath, onlyPath);
|
|
779
814
|
if (clamped) yield* d.log(`topK clamped from ${rawValue} to ${searchOptions.topK}`, "warn");
|
|
780
|
-
const
|
|
781
|
-
|
|
815
|
+
const searchResponse = yield* d.spinner("Searching...", QueryProject.queryProject(queryText, searchOptions));
|
|
816
|
+
const finalResults = noContent ? searchResponse.results : applyCharBudget(searchResponse.results, Option.getOrUndefined(maxCharacters)).results;
|
|
817
|
+
yield* renderResults(d, {
|
|
818
|
+
...searchResponse,
|
|
819
|
+
results: finalResults
|
|
820
|
+
}, ctxLines, noContent);
|
|
782
821
|
}).pipe(Effect.catchTags({
|
|
783
822
|
ModelLoadError: reportError,
|
|
784
823
|
InferenceError: reportError,
|
|
@@ -825,6 +864,7 @@ const statusCommand = Command.make("status", { json: Options.boolean("json").pip
|
|
|
825
864
|
yield* d.log(`Total lines: ${result.totalLines.toLocaleString()}`, "info");
|
|
826
865
|
yield* d.log(`Index size: ${result.byteSize.toLocaleString()} bytes`, "info");
|
|
827
866
|
yield* d.log(`Last indexed: ${lastIndexStr}`, "info");
|
|
867
|
+
if (result.validationErrors.length > 0) yield* d.log(`Warnings: ${result.validationErrors[0].message}`, "warn");
|
|
828
868
|
}).pipe(Effect.catchTags({ StoreError: reportError })));
|
|
829
869
|
//#endregion
|
|
830
870
|
//#region src/cli.ts
|
|
@@ -947,6 +987,46 @@ const ChunkerLive = Layer.effect(Chunker, make$5);
|
|
|
947
987
|
*/
|
|
948
988
|
const isPlatformReason = (cause, reason) => typeof cause === "object" && cause !== null && "reason" in cause && String(cause.reason) === reason;
|
|
949
989
|
//#endregion
|
|
990
|
+
//#region src/lib/validation.ts
|
|
991
|
+
const mergeMessages = (messages) => {
|
|
992
|
+
if (messages.length === 1) return messages[0];
|
|
993
|
+
const uniq = [...new Set(messages)];
|
|
994
|
+
if (uniq.every((m) => m.startsWith("Expected"))) {
|
|
995
|
+
const actualMatch = uniq[0].match(/actual (.+)$/);
|
|
996
|
+
const actual = actualMatch ? actualMatch[1] : "";
|
|
997
|
+
return `Expected ${uniq.map((m) => m.replace(/^Expected /, "").replace(/, actual .+$/, "")).join(" | ")}, actual ${actual}`;
|
|
998
|
+
}
|
|
999
|
+
return uniq.join("\n");
|
|
1000
|
+
};
|
|
1001
|
+
const formatSchemaErrors = (error) => {
|
|
1002
|
+
const issues = ParseResult.ArrayFormatter.formatErrorSync(error);
|
|
1003
|
+
const byPath = /* @__PURE__ */ new Map();
|
|
1004
|
+
for (const issue of issues) {
|
|
1005
|
+
const path = issue.path.join(".");
|
|
1006
|
+
if (!byPath.has(path)) byPath.set(path, []);
|
|
1007
|
+
byPath.get(path).push(issue.message);
|
|
1008
|
+
}
|
|
1009
|
+
return Array.from(byPath.entries()).map(([path, messages]) => ({
|
|
1010
|
+
path,
|
|
1011
|
+
message: mergeMessages(messages)
|
|
1012
|
+
}));
|
|
1013
|
+
};
|
|
1014
|
+
const formatSchemaMessage = (error) => ParseResult.TreeFormatter.formatErrorSync(error);
|
|
1015
|
+
const isJsonSyntaxError = (error) => error.issue._tag === "Transformation" && error.issue.kind === "Transformation";
|
|
1016
|
+
const decodeJsonWithErrors = (schema, json) => Schema.decodeUnknown(Schema.parseJson(schema))(json).pipe(Effect.mapError((error) => {
|
|
1017
|
+
const base = {
|
|
1018
|
+
message: formatSchemaMessage(error),
|
|
1019
|
+
errors: formatSchemaErrors(error)
|
|
1020
|
+
};
|
|
1021
|
+
return isJsonSyntaxError(error) ? {
|
|
1022
|
+
...base,
|
|
1023
|
+
_tag: "JsonSyntaxError"
|
|
1024
|
+
} : {
|
|
1025
|
+
...base,
|
|
1026
|
+
_tag: "SchemaValidationError"
|
|
1027
|
+
};
|
|
1028
|
+
}));
|
|
1029
|
+
//#endregion
|
|
950
1030
|
//#region src/services/config-store.ts
|
|
951
1031
|
const CONFIG_DIR = ".pix";
|
|
952
1032
|
const CONFIG_PATH = `${CONFIG_DIR}/config.json`;
|
|
@@ -964,12 +1044,16 @@ const mapConfigWriteError = (cause, path, action) => {
|
|
|
964
1044
|
const make$4 = Effect.gen(function* () {
|
|
965
1045
|
const fs = yield* FileSystem.FileSystem;
|
|
966
1046
|
const writeConfig = (config) => Effect.gen(function* () {
|
|
967
|
-
const
|
|
1047
|
+
const encodeJson = Schema.parseJson(ConfigSchema, { space: 2 });
|
|
1048
|
+
const configJson = yield* Schema.encode(encodeJson)(config).pipe(Effect.mapError((e) => new ConfigError({
|
|
1049
|
+
message: "Failed to encode config",
|
|
1050
|
+
cause: e
|
|
1051
|
+
})));
|
|
968
1052
|
yield* fs.makeDirectory(CONFIG_DIR, { recursive: true }).pipe(Effect.mapError((cause) => mapConfigWriteError(cause, CONFIG_DIR, "create .pix directory")));
|
|
969
1053
|
yield* fs.writeFileString(CONFIG_PATH, configJson).pipe(Effect.mapError((cause) => mapConfigWriteError(cause, CONFIG_PATH, "write config.json")));
|
|
970
1054
|
});
|
|
971
1055
|
const readConfig = () => Effect.gen(function* () {
|
|
972
|
-
|
|
1056
|
+
return yield* decodeJsonWithErrors(ConfigSchema, yield* fs.readFileString(CONFIG_PATH).pipe(Effect.mapError((cause) => {
|
|
973
1057
|
if (isPlatformReason(cause, "NotFound")) return new ConfigNotFoundError({
|
|
974
1058
|
message: "Config file not found. Run pix init first.",
|
|
975
1059
|
path: CONFIG_PATH,
|
|
@@ -979,15 +1063,17 @@ const make$4 = Effect.gen(function* () {
|
|
|
979
1063
|
message: "Failed to read config.json",
|
|
980
1064
|
cause
|
|
981
1065
|
});
|
|
982
|
-
}))
|
|
983
|
-
|
|
984
|
-
try: () => JSON.parse(content),
|
|
985
|
-
catch: (error) => new ConfigMalformedError({
|
|
1066
|
+
}))).pipe(Effect.mapError((err) => {
|
|
1067
|
+
if (err._tag === "JsonSyntaxError") return new ConfigMalformedError({
|
|
986
1068
|
message: "Invalid JSON in config.json",
|
|
987
1069
|
path: CONFIG_PATH,
|
|
988
|
-
cause:
|
|
989
|
-
})
|
|
990
|
-
|
|
1070
|
+
cause: err
|
|
1071
|
+
});
|
|
1072
|
+
return new ConfigValidationError({
|
|
1073
|
+
message: err.message,
|
|
1074
|
+
errors: err.errors
|
|
1075
|
+
});
|
|
1076
|
+
}));
|
|
991
1077
|
});
|
|
992
1078
|
const configExists = () => Effect.gen(function* () {
|
|
993
1079
|
return yield* fs.exists(CONFIG_PATH);
|
|
@@ -1295,21 +1381,26 @@ const make$1 = Effect.gen(function* () {
|
|
|
1295
1381
|
});
|
|
1296
1382
|
const ScannerLive = Layer.effect(Scanner, make$1);
|
|
1297
1383
|
//#endregion
|
|
1384
|
+
//#region src/domain/chunk.ts
|
|
1385
|
+
/** Runtime schema for persisted/searchable chunk entries. */
|
|
1386
|
+
const ChunkSchema = Schema.Struct({
|
|
1387
|
+
id: Schema.String,
|
|
1388
|
+
idx: Schema.Number,
|
|
1389
|
+
file: Schema.String,
|
|
1390
|
+
startLine: Schema.Number,
|
|
1391
|
+
endLine: Schema.Number,
|
|
1392
|
+
text: Schema.String,
|
|
1393
|
+
contextBefore: Schema.Union(Schema.String, Schema.Null),
|
|
1394
|
+
contextAfter: Schema.Union(Schema.String, Schema.Null)
|
|
1395
|
+
});
|
|
1396
|
+
//#endregion
|
|
1298
1397
|
//#region src/services/vector-store.ts
|
|
1299
|
-
/**
|
|
1300
|
-
* Parse a single JSON line from chunks.jsonl and normalize context fields (old indexes may lack
|
|
1301
|
-
* them).
|
|
1302
|
-
*/
|
|
1303
1398
|
const parseChunkLine = (line) => {
|
|
1304
|
-
|
|
1305
|
-
|
|
1306
|
-
|
|
1307
|
-
|
|
1308
|
-
|
|
1309
|
-
text: typeof raw.text === "string" ? raw.text : "",
|
|
1310
|
-
contextBefore: typeof raw.contextBefore === "string" ? raw.contextBefore : null,
|
|
1311
|
-
contextAfter: typeof raw.contextAfter === "string" ? raw.contextAfter : null
|
|
1312
|
-
};
|
|
1399
|
+
try {
|
|
1400
|
+
return Option.some(Schema.decodeUnknownSync(Schema.parseJson(ChunkSchema))(line));
|
|
1401
|
+
} catch {
|
|
1402
|
+
return Option.none();
|
|
1403
|
+
}
|
|
1313
1404
|
};
|
|
1314
1405
|
/** Compute dot-product similarity between a chunk vector and the query embedding. */
|
|
1315
1406
|
const computeDotProduct = (chunkVector, query) => {
|
|
@@ -1320,20 +1411,16 @@ const computeDotProduct = (chunkVector, query) => {
|
|
|
1320
1411
|
const STORE_DIR = ".pix";
|
|
1321
1412
|
const CHUNKS_FILE = `${STORE_DIR}/chunks.jsonl`;
|
|
1322
1413
|
const VECTORS_FILE = `${STORE_DIR}/vectors.bin`;
|
|
1323
|
-
/**
|
|
1324
|
-
|
|
1325
|
-
|
|
1326
|
-
|
|
1327
|
-
|
|
1328
|
-
|
|
1329
|
-
|
|
1330
|
-
|
|
1331
|
-
|
|
1332
|
-
|
|
1333
|
-
text: c.text,
|
|
1334
|
-
contextBefore: c.contextBefore,
|
|
1335
|
-
contextAfter: c.contextAfter
|
|
1336
|
-
});
|
|
1414
|
+
/** Pre-built Schema instance for chunk encode/decode. */
|
|
1415
|
+
const parseJsonChunk = Schema.parseJson(ChunkSchema);
|
|
1416
|
+
/** Build ChunkValidationError array from malformed line count, or [] if none. */
|
|
1417
|
+
const buildChunkValidationErrors = (malformedLines) => malformedLines > 0 ? [new ChunkValidationError({
|
|
1418
|
+
message: `Skipped ${malformedLines} malformed chunk line(s) in chunks.jsonl`,
|
|
1419
|
+
errors: [{
|
|
1420
|
+
path: "chunks.jsonl",
|
|
1421
|
+
message: `${malformedLines} line(s) failed schema validation`
|
|
1422
|
+
}]
|
|
1423
|
+
})] : [];
|
|
1337
1424
|
/**
|
|
1338
1425
|
* FileSystem adapter for VectorStore port. Reads from chunks.jsonl and vectors.bin to provide index
|
|
1339
1426
|
* statistics.
|
|
@@ -1360,22 +1447,30 @@ const make = Effect.gen(function* () {
|
|
|
1360
1447
|
* Count total lines across all chunks in chunks.jsonl. Each line is a JSON object; the 'text'
|
|
1361
1448
|
* field contains the source code.
|
|
1362
1449
|
*/
|
|
1363
|
-
|
|
1364
|
-
|
|
1365
|
-
return sum + JSON.parse(line).text.split("\n").length;
|
|
1366
|
-
} catch {
|
|
1367
|
-
return sum;
|
|
1368
|
-
}
|
|
1369
|
-
}, 0);
|
|
1370
|
-
/** Count unique files across all chunks in chunks.jsonl. */
|
|
1371
|
-
const countUniqueFiles = (lines) => {
|
|
1450
|
+
/** Count files, total lines, and malformed lines in a single pass. */
|
|
1451
|
+
const countChunkStats = (lines) => {
|
|
1372
1452
|
const files = /* @__PURE__ */ new Set();
|
|
1373
|
-
|
|
1374
|
-
|
|
1375
|
-
|
|
1376
|
-
|
|
1377
|
-
|
|
1453
|
+
let totalLines = 0;
|
|
1454
|
+
let malformedLines = 0;
|
|
1455
|
+
for (const line of lines) {
|
|
1456
|
+
const chunk = parseChunkLine(line);
|
|
1457
|
+
if (Option.isSome(chunk)) {
|
|
1458
|
+
files.add(chunk.value.file);
|
|
1459
|
+
totalLines += chunk.value.text.split("\n").length;
|
|
1460
|
+
} else malformedLines++;
|
|
1461
|
+
}
|
|
1462
|
+
return {
|
|
1463
|
+
files,
|
|
1464
|
+
totalLines,
|
|
1465
|
+
malformedLines
|
|
1466
|
+
};
|
|
1378
1467
|
};
|
|
1468
|
+
/** Check that index files exist; fail with NoIndexError if either is missing. */
|
|
1469
|
+
const requireIndex = () => Effect.gen(function* () {
|
|
1470
|
+
const chunksExists = yield* withReadError(fs.exists(CHUNKS_FILE), "check chunks file");
|
|
1471
|
+
const vectorsExists = yield* withReadError(fs.exists(VECTORS_FILE), "check vectors file");
|
|
1472
|
+
if (!chunksExists || !vectorsExists) return yield* new NoIndexError({ message: "No index found. Run pix index first." });
|
|
1473
|
+
});
|
|
1379
1474
|
const toStoreError = (operation, path) => (cause) => {
|
|
1380
1475
|
if (isPlatformReason(cause, "BadResource")) return new DiskFullError({
|
|
1381
1476
|
message: `Disk full during ${operation}`,
|
|
@@ -1431,7 +1526,10 @@ const make = Effect.gen(function* () {
|
|
|
1431
1526
|
if (yield* withStoreError(fs.exists(vectorsTemp), "check vectors temp")) yield* withStoreError(fs.remove(vectorsTemp), "clean stale vectors temp", vectorsTemp);
|
|
1432
1527
|
});
|
|
1433
1528
|
const storeBatch = (chunks, embeddings) => Effect.gen(function* () {
|
|
1434
|
-
const content =
|
|
1529
|
+
const content = (yield* Effect.forEach(chunks, (c) => Schema.encode(parseJsonChunk)(c).pipe(Effect.mapError((e) => new StoreError({
|
|
1530
|
+
message: "Failed to encode chunk",
|
|
1531
|
+
cause: e
|
|
1532
|
+
}))))).join("\n") + "\n";
|
|
1435
1533
|
yield* withStoreError(fs.writeFile(chunksTemp, Buffer.from(content), { flag: "a" }), "append chunks", chunksTemp);
|
|
1436
1534
|
const buffer = serializeVectors(embeddings);
|
|
1437
1535
|
yield* withStoreError(fs.writeFile(vectorsTemp, buffer, { flag: "a" }), "append vectors", vectorsTemp);
|
|
@@ -1468,7 +1566,10 @@ const make = Effect.gen(function* () {
|
|
|
1468
1566
|
const store = (chunks, embeddings) => Effect.gen(function* () {
|
|
1469
1567
|
yield* ensureDirExists(STORE_DIR, ".pix directory");
|
|
1470
1568
|
const chunksTemp = `${CHUNKS_FILE}.tmp`;
|
|
1471
|
-
const chunksJson =
|
|
1569
|
+
const chunksJson = (yield* Effect.forEach(chunks, (c) => Schema.encode(parseJsonChunk)(c).pipe(Effect.mapError((e) => new StoreError({
|
|
1570
|
+
message: "Failed to encode chunk",
|
|
1571
|
+
cause: e
|
|
1572
|
+
}))))).join("\n");
|
|
1472
1573
|
yield* withStoreError(fs.writeFileString(chunksTemp, chunksJson), "write chunks", chunksTemp);
|
|
1473
1574
|
yield* withStoreError(fs.rename(chunksTemp, CHUNKS_FILE), "commit chunks", CHUNKS_FILE);
|
|
1474
1575
|
const vectorsTemp = `${VECTORS_FILE}.tmp`;
|
|
@@ -1477,17 +1578,21 @@ const make = Effect.gen(function* () {
|
|
|
1477
1578
|
yield* withStoreError(fs.rename(vectorsTemp, VECTORS_FILE), "commit vectors", VECTORS_FILE);
|
|
1478
1579
|
});
|
|
1479
1580
|
const search = (query, options) => Effect.gen(function* () {
|
|
1480
|
-
|
|
1481
|
-
const vectorsExists = yield* withReadError(fs.exists(VECTORS_FILE), "check vectors file");
|
|
1482
|
-
if (!chunksExists || !vectorsExists) return yield* new NoIndexError({ message: "No index found. Run pix index first." });
|
|
1581
|
+
yield* requireIndex();
|
|
1483
1582
|
const chunkLines = (yield* withReadError(fs.readFileString(CHUNKS_FILE), "read chunks", CHUNKS_FILE)).split("\n").filter((l) => l.trim().length > 0);
|
|
1484
1583
|
const vectorsBuffer = yield* withReadError(fs.readFile(VECTORS_FILE), "read vectors", VECTORS_FILE);
|
|
1485
1584
|
const vectors = new Float32Array(vectorsBuffer.buffer, vectorsBuffer.byteOffset, vectorsBuffer.byteLength / Float32Array.BYTES_PER_ELEMENT);
|
|
1486
1585
|
const ignoreIg = options?.ignorePaths?.length ? ignore().add([...options.ignorePaths]) : null;
|
|
1487
1586
|
const onlyIg = options?.onlyPaths?.length ? ignore().add([...options.onlyPaths]) : null;
|
|
1488
1587
|
const results = [];
|
|
1489
|
-
|
|
1490
|
-
|
|
1588
|
+
let malformedLines = 0;
|
|
1589
|
+
for (let i = 0; i < chunkLines.length; i++) {
|
|
1590
|
+
const parsed = parseChunkLine(chunkLines[i]);
|
|
1591
|
+
if (Option.isNone(parsed)) {
|
|
1592
|
+
malformedLines++;
|
|
1593
|
+
continue;
|
|
1594
|
+
}
|
|
1595
|
+
const chunk = parsed.value;
|
|
1491
1596
|
if (ignoreIg && ignoreIg.ignores(chunk.file)) continue;
|
|
1492
1597
|
if (onlyIg && !onlyIg.ignores(chunk.file)) continue;
|
|
1493
1598
|
const startIdx = i * query.dims;
|
|
@@ -1501,12 +1606,19 @@ const make = Effect.gen(function* () {
|
|
|
1501
1606
|
contextBefore: chunk.contextBefore,
|
|
1502
1607
|
contextAfter: chunk.contextAfter
|
|
1503
1608
|
});
|
|
1504
|
-
}
|
|
1609
|
+
}
|
|
1610
|
+
const validationErrors = buildChunkValidationErrors(malformedLines);
|
|
1505
1611
|
results.sort((a, b) => b.score - a.score);
|
|
1506
1612
|
const topK = options?.topK;
|
|
1507
|
-
if (topK == null) return
|
|
1613
|
+
if (topK == null) return {
|
|
1614
|
+
results,
|
|
1615
|
+
validationErrors
|
|
1616
|
+
};
|
|
1508
1617
|
const clamped = Math.max(0, Math.min(Math.floor(topK), results.length));
|
|
1509
|
-
return
|
|
1618
|
+
return {
|
|
1619
|
+
results: results.slice(0, clamped),
|
|
1620
|
+
validationErrors
|
|
1621
|
+
};
|
|
1510
1622
|
});
|
|
1511
1623
|
const getStatus = () => Effect.gen(function* () {
|
|
1512
1624
|
const chunksExists = yield* withReadError(fs.exists(CHUNKS_FILE), "check chunks file");
|
|
@@ -1517,13 +1629,15 @@ const make = Effect.gen(function* () {
|
|
|
1517
1629
|
model: "",
|
|
1518
1630
|
lastIndex: 0,
|
|
1519
1631
|
totalLines: 0,
|
|
1520
|
-
byteSize: 0
|
|
1632
|
+
byteSize: 0,
|
|
1633
|
+
validationErrors: []
|
|
1521
1634
|
};
|
|
1522
1635
|
const lines = (yield* withReadError(fs.readFileString(CHUNKS_FILE), "read chunks", CHUNKS_FILE)).split("\n").filter((l) => l.trim().length > 0);
|
|
1523
|
-
const
|
|
1524
|
-
const
|
|
1636
|
+
const { files: uniqueFiles, totalLines, malformedLines } = countChunkStats(lines);
|
|
1637
|
+
const chunks = lines.length - malformedLines;
|
|
1638
|
+
const files = uniqueFiles.size;
|
|
1525
1639
|
const model = "";
|
|
1526
|
-
const
|
|
1640
|
+
const validationErrors = buildChunkValidationErrors(malformedLines);
|
|
1527
1641
|
const vectorsStat = yield* withReadError(fs.stat(VECTORS_FILE), "stat vectors", VECTORS_FILE);
|
|
1528
1642
|
const byteSize = "size" in vectorsStat ? Number(vectorsStat.size) : 0;
|
|
1529
1643
|
return {
|
|
@@ -1532,7 +1646,8 @@ const make = Effect.gen(function* () {
|
|
|
1532
1646
|
model,
|
|
1533
1647
|
lastIndex: Option.map(vectorsStat?.mtime ?? Option.none(), (d) => d instanceof Date ? d.getTime() : 0).pipe(Option.getOrElse(() => 0)),
|
|
1534
1648
|
totalLines,
|
|
1535
|
-
byteSize
|
|
1649
|
+
byteSize,
|
|
1650
|
+
validationErrors
|
|
1536
1651
|
};
|
|
1537
1652
|
});
|
|
1538
1653
|
const reset = () => Effect.gen(function* () {
|