@lucas-bur/pix 0.11.0 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -7
- package/dist/index.mjs +341 -113
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -19,13 +19,13 @@ pix query "authentication middleware"
|
|
|
19
19
|
|
|
20
20
|
## Commands
|
|
21
21
|
|
|
22
|
-
| Command
|
|
23
|
-
|
|
|
24
|
-
| `pix init`
|
|
25
|
-
| `pix index`
|
|
26
|
-
| `pix query "<text>"` | Semantic search via cosine similarity
|
|
27
|
-
| `pix status`
|
|
28
|
-
| `pix reset`
|
|
22
|
+
| Command | Description | JSON flag |
|
|
23
|
+
| ---------------------------- | -------------------------------------------------------------------------------------------------------------------------------------- | --------- |
|
|
24
|
+
| `pix init` | Create `.pix/config.json` with defaults | `--json` |
|
|
25
|
+
| `pix index` | Scan, chunk, embed, and store project files | `--json` |
|
|
26
|
+
| `pix query "<text>" [flags]` | Semantic search via cosine similarity (`--top`, `--context-lines`, `--ignore-path`, `--only-path`, `--max-characters`, `--no-content`) | `--json` |
|
|
27
|
+
| `pix status` | Show index statistics | `--json` |
|
|
28
|
+
| `pix reset` | Delete index files (chunks + vectors) | `--json` |
|
|
29
29
|
|
|
30
30
|
All commands support `--json` for structured output on stdout — ideal for piping to AI agents.
|
|
31
31
|
|
package/dist/index.mjs
CHANGED
|
@@ -2,12 +2,13 @@
|
|
|
2
2
|
import { createRequire } from "node:module";
|
|
3
3
|
import { Args, CliConfig, Command, Options } from "@effect/cli";
|
|
4
4
|
import { NodeContext, NodeRuntime } from "@effect/platform-node";
|
|
5
|
-
import { Clock, Context, Data, Effect, Exit, Layer, Option, Ref, Stream } from "effect";
|
|
5
|
+
import { Clock, Context, Data, Effect, Exit, Layer, Option, Ref, Schema, Stream } from "effect";
|
|
6
6
|
import * as Chunk from "effect/Chunk";
|
|
7
7
|
import { styleText } from "node:util";
|
|
8
8
|
import * as clack from "@clack/prompts";
|
|
9
9
|
import { FileSystem } from "@effect/platform";
|
|
10
10
|
import crypto from "node:crypto";
|
|
11
|
+
import * as ParseResult from "effect/ParseResult";
|
|
11
12
|
import { env } from "@huggingface/transformers";
|
|
12
13
|
import ignore from "ignore";
|
|
13
14
|
//#region src/domain/ports.ts
|
|
@@ -154,7 +155,26 @@ const JsonDisplay = { layer: Layer.succeed(Display, {
|
|
|
154
155
|
}) };
|
|
155
156
|
//#endregion
|
|
156
157
|
//#region src/domain/config.ts
|
|
157
|
-
|
|
158
|
+
const EmbedderConfigSchema = Schema.Struct({
|
|
159
|
+
model: Schema.String,
|
|
160
|
+
device: Schema.Literal("auto", "cpu", "cuda", "dml", "coreml"),
|
|
161
|
+
dtype: Schema.Literal("fp32", "fp16", "q8"),
|
|
162
|
+
batchSize: Schema.Number
|
|
163
|
+
});
|
|
164
|
+
/**
|
|
165
|
+
* Runtime schema for persisted project configuration. Defines the structure and validation rules
|
|
166
|
+
* for `.pix/config.json`.
|
|
167
|
+
*/
|
|
168
|
+
const ConfigSchema = Schema.Struct({
|
|
169
|
+
schema: Schema.Literal("1"),
|
|
170
|
+
chunkLines: Schema.Number,
|
|
171
|
+
overlapLines: Schema.Number,
|
|
172
|
+
chunkConcurrency: Schema.optionalWith(Schema.Number, { exact: true }),
|
|
173
|
+
skipExtensions: Schema.Array(Schema.String),
|
|
174
|
+
ignoredPaths: Schema.Array(Schema.String),
|
|
175
|
+
ignoreGitignore: Schema.optionalWith(Schema.Boolean, { exact: true }),
|
|
176
|
+
embedder: EmbedderConfigSchema
|
|
177
|
+
});
|
|
158
178
|
const DEFAULT_CONFIG = {
|
|
159
179
|
schema: "1",
|
|
160
180
|
chunkLines: 60,
|
|
@@ -184,11 +204,41 @@ const DEFAULT_CONFIG = {
|
|
|
184
204
|
}
|
|
185
205
|
};
|
|
186
206
|
//#endregion
|
|
207
|
+
//#region src/lib/extension.ts
|
|
208
|
+
/** Extract the last path segment (filename) from a file path. Handles both `/` and `\\` separators. */
|
|
209
|
+
const getFilename = (path) => {
|
|
210
|
+
const sepIndex = Math.max(path.lastIndexOf("/"), path.lastIndexOf("\\"));
|
|
211
|
+
return sepIndex >= 0 ? path.slice(sepIndex + 1) : path;
|
|
212
|
+
};
|
|
213
|
+
/**
|
|
214
|
+
* Extract the lowercase extension (including dot) from a file path. Used for processor dispatch.
|
|
215
|
+
* Strips the directory, then returns the part after the last dot. If no dot, returns the full
|
|
216
|
+
* filename lowercased.
|
|
217
|
+
*/
|
|
218
|
+
const getExtension = (file) => {
|
|
219
|
+
const name = getFilename(file);
|
|
220
|
+
const dotIndex = name.lastIndexOf(".");
|
|
221
|
+
if (dotIndex === -1) return name.toLowerCase();
|
|
222
|
+
return name.slice(dotIndex).toLowerCase();
|
|
223
|
+
};
|
|
224
|
+
/**
|
|
225
|
+
* Extract the extension from a filename (not full path). Returns `"(no extension)"` if no dot
|
|
226
|
+
* exists. Used for display grouping of skipped files.
|
|
227
|
+
*/
|
|
228
|
+
const getFileExtension = (filename) => {
|
|
229
|
+
const dotIndex = filename.lastIndexOf(".");
|
|
230
|
+
return dotIndex >= 0 ? filename.slice(dotIndex) : "(no extension)";
|
|
231
|
+
};
|
|
232
|
+
//#endregion
|
|
187
233
|
//#region src/domain/errors.ts
|
|
234
|
+
/** Generic config I/O failure (read, write, encode). */
|
|
235
|
+
var ConfigError = class extends Data.TaggedError("ConfigError") {};
|
|
188
236
|
/** Config file or directory does not exist. Run pix init first. */
|
|
189
237
|
var ConfigNotFoundError = class extends Data.TaggedError("ConfigNotFoundError") {};
|
|
190
238
|
/** Config file exists but contains invalid JSON. */
|
|
191
239
|
var ConfigMalformedError = class extends Data.TaggedError("ConfigMalformedError") {};
|
|
240
|
+
/** Config failed schema validation — missing/invalid fields. */
|
|
241
|
+
var ConfigValidationError = class extends Data.TaggedError("ConfigValidationError") {};
|
|
192
242
|
/** Index files (chunks.jsonl, vectors.bin) do not exist. Run pix index first. */
|
|
193
243
|
var NoIndexError = class extends Data.TaggedError("NoIndexError") {};
|
|
194
244
|
/** Disk is full — write operation could not complete. */
|
|
@@ -206,6 +256,8 @@ var InferenceError = class extends Data.TaggedError("InferenceError") {};
|
|
|
206
256
|
* via ScanResult.skipped.
|
|
207
257
|
*/
|
|
208
258
|
var ScanFailed = class extends Data.TaggedError("ScanFailed") {};
|
|
259
|
+
/** A chunk line in chunks.jsonl failed schema validation. */
|
|
260
|
+
var ChunkValidationError = class extends Data.TaggedError("ChunkValidationError") {};
|
|
209
261
|
/** File type is unsupported for text extraction. */
|
|
210
262
|
var UnsupportedFormat = class extends Data.TaggedError("UnsupportedFormat") {};
|
|
211
263
|
/** Text extraction failed for a supported file type. */
|
|
@@ -315,13 +367,6 @@ const deriveEffectiveConfig = (opts, config) => ({
|
|
|
315
367
|
ignoredPaths: opts.ignorePaths ? [...config.ignoredPaths ?? DEFAULT_CONFIG.ignoredPaths, ...opts.ignorePaths] : config.ignoredPaths ?? DEFAULT_CONFIG.ignoredPaths,
|
|
316
368
|
ignoreGitignore: opts.ignoreGitignore ?? config.ignoreGitignore ?? false
|
|
317
369
|
});
|
|
318
|
-
function getExtension(file) {
|
|
319
|
-
const lastSlash = file.lastIndexOf("/");
|
|
320
|
-
const name = lastSlash >= 0 ? file.slice(lastSlash + 1) : file;
|
|
321
|
-
const dotIndex = name.lastIndexOf(".");
|
|
322
|
-
if (dotIndex === -1) return name.toLowerCase();
|
|
323
|
-
return name.slice(dotIndex).toLowerCase();
|
|
324
|
-
}
|
|
325
370
|
const classifyFiles = (files, processorMap) => {
|
|
326
371
|
const knownFiles = [];
|
|
327
372
|
const skippedFiles = [];
|
|
@@ -386,7 +431,8 @@ var IndexProject = class extends Effect.Service()("IndexProject", {
|
|
|
386
431
|
chunks: 0,
|
|
387
432
|
files: 0,
|
|
388
433
|
totalLines: 0,
|
|
389
|
-
byteSize: 0
|
|
434
|
+
byteSize: 0,
|
|
435
|
+
validationErrors: []
|
|
390
436
|
},
|
|
391
437
|
durationMs: Date.now() - start
|
|
392
438
|
};
|
|
@@ -401,7 +447,8 @@ var IndexProject = class extends Effect.Service()("IndexProject", {
|
|
|
401
447
|
chunks: 0,
|
|
402
448
|
files: 0,
|
|
403
449
|
totalLines: 0,
|
|
404
|
-
byteSize: 0
|
|
450
|
+
byteSize: 0,
|
|
451
|
+
validationErrors: []
|
|
405
452
|
},
|
|
406
453
|
durationMs: Date.now() - start
|
|
407
454
|
};
|
|
@@ -435,7 +482,8 @@ var IndexProject = class extends Effect.Service()("IndexProject", {
|
|
|
435
482
|
chunks: stats.chunks,
|
|
436
483
|
files: stats.files,
|
|
437
484
|
totalLines: stats.totalLines,
|
|
438
|
-
byteSize: stats.byteSize
|
|
485
|
+
byteSize: stats.byteSize,
|
|
486
|
+
validationErrors: []
|
|
439
487
|
},
|
|
440
488
|
durationMs: Date.now() - start,
|
|
441
489
|
embedderFallback: fallbackInfo
|
|
@@ -444,11 +492,6 @@ var IndexProject = class extends Effect.Service()("IndexProject", {
|
|
|
444
492
|
return { index };
|
|
445
493
|
})
|
|
446
494
|
}) {};
|
|
447
|
-
const getFilename = (path) => path.split("/").pop() ?? path;
|
|
448
|
-
const getFileExtension = (filename) => {
|
|
449
|
-
const dotIndex = filename.lastIndexOf(".");
|
|
450
|
-
return dotIndex >= 0 ? filename.slice(dotIndex) : "(no extension)";
|
|
451
|
-
};
|
|
452
495
|
const groupByExtension = (entries) => {
|
|
453
496
|
const byExt = /* @__PURE__ */ new Map();
|
|
454
497
|
for (const s of entries) {
|
|
@@ -504,7 +547,7 @@ var QueryProject = class extends Effect.Service()("QueryProject", {
|
|
|
504
547
|
effect: Effect.gen(function* () {
|
|
505
548
|
const embedder = yield* Embedder;
|
|
506
549
|
const store = yield* VectorStore;
|
|
507
|
-
const queryProject = (queryText,
|
|
550
|
+
const queryProject = (queryText, options) => embedder.embed(queryText).pipe(Effect.flatMap((embedding) => store.search(embedding, options)));
|
|
508
551
|
return { queryProject };
|
|
509
552
|
})
|
|
510
553
|
}) {};
|
|
@@ -529,6 +572,8 @@ const errorCodes = {
|
|
|
529
572
|
ConfigError: "CONFIG_ERROR",
|
|
530
573
|
ConfigNotFoundError: "CONFIG_NOT_FOUND",
|
|
531
574
|
ConfigMalformedError: "CONFIG_MALFORMED",
|
|
575
|
+
ConfigValidationError: "CONFIG_VALIDATION_ERROR",
|
|
576
|
+
ChunkValidationError: "CHUNK_VALIDATION_ERROR",
|
|
532
577
|
NoIndexError: "NO_INDEX",
|
|
533
578
|
DiskFullError: "DISK_FULL",
|
|
534
579
|
StoreError: "STORE_ERROR",
|
|
@@ -636,6 +681,51 @@ const initCommand = Command.make("init", { json: Options.boolean("json").pipe(Op
|
|
|
636
681
|
DiskFullError: reportError
|
|
637
682
|
})));
|
|
638
683
|
//#endregion
|
|
684
|
+
//#region src/lib/format.ts
|
|
685
|
+
/** Format byte count as human-readable string (e.g. "1.5 MB") */
|
|
686
|
+
const formatBytes = (bytes) => {
|
|
687
|
+
if (bytes === 0) return "0 B";
|
|
688
|
+
const units = [
|
|
689
|
+
"B",
|
|
690
|
+
"KB",
|
|
691
|
+
"MB",
|
|
692
|
+
"GB"
|
|
693
|
+
];
|
|
694
|
+
const i = Math.min(Math.floor(Math.log(bytes) / Math.log(1024)), units.length - 1);
|
|
695
|
+
return `${(bytes / 1024 ** i).toFixed(1)} ${units[i]}`;
|
|
696
|
+
};
|
|
697
|
+
/**
|
|
698
|
+
* Apply a character budget to search results. Returns results in score order capped by the budget.
|
|
699
|
+
* The last result may be truncated to fit the remaining budget. Character count includes file path,
|
|
700
|
+
* line numbers, chunk text, and context lines.
|
|
701
|
+
*/
|
|
702
|
+
const applyCharBudget = (results, maxChars) => {
|
|
703
|
+
if (!maxChars || maxChars <= 0) return { results };
|
|
704
|
+
const budgeted = [];
|
|
705
|
+
let remaining = maxChars;
|
|
706
|
+
for (const result of results) {
|
|
707
|
+
const indicator = " [...]";
|
|
708
|
+
const metadata = `${result.file}:${result.startLine}-${result.endLine}\n`;
|
|
709
|
+
const chars = `${metadata}${result.text}${result.contextBefore ? `\n${result.contextBefore}` : ""}${result.contextAfter ? `\n${result.contextAfter}` : ""}`.length;
|
|
710
|
+
if (chars <= remaining) {
|
|
711
|
+
budgeted.push(result);
|
|
712
|
+
remaining -= chars;
|
|
713
|
+
} else {
|
|
714
|
+
const textBudget = remaining - metadata.length - 6;
|
|
715
|
+
if (textBudget <= 0) break;
|
|
716
|
+
const truncated = result.text.slice(0, textBudget);
|
|
717
|
+
budgeted.push({
|
|
718
|
+
...result,
|
|
719
|
+
text: `${truncated}${indicator}`,
|
|
720
|
+
contextBefore: null,
|
|
721
|
+
contextAfter: null
|
|
722
|
+
});
|
|
723
|
+
break;
|
|
724
|
+
}
|
|
725
|
+
}
|
|
726
|
+
return { results: budgeted };
|
|
727
|
+
};
|
|
728
|
+
//#endregion
|
|
639
729
|
//#region src/commands/query.ts
|
|
640
730
|
const DEFAULT_TOP_K = 5;
|
|
641
731
|
const DEFAULT_CONTEXT_LINES = 0;
|
|
@@ -662,31 +752,72 @@ const formatResult = (result) => {
|
|
|
662
752
|
const contextAfter = result.contextAfter ? `\n${result.contextAfter}` : "";
|
|
663
753
|
return `${result.file}:${result.startLine}-${result.endLine} (score: ${result.score.toFixed(3)})${contextBefore}\n${result.text}${contextAfter}`;
|
|
664
754
|
};
|
|
665
|
-
|
|
755
|
+
/** Format a result as a lightweight location reference (no text content). */
|
|
756
|
+
const formatLocation = (result) => `${result.file}:${result.startLine}-${result.endLine} (score: ${result.score.toFixed(3)})`;
|
|
757
|
+
/** Build optional content fields for a single JSON output entry. */
|
|
758
|
+
const buildContentFields = (r, ctxLines, noContent) => {
|
|
759
|
+
if (noContent) return {};
|
|
760
|
+
return {
|
|
761
|
+
text: r.text,
|
|
762
|
+
...ctxLines > 0 && r.contextBefore && { contextBefore: r.contextBefore },
|
|
763
|
+
...ctxLines > 0 && r.contextAfter && { contextAfter: r.contextAfter }
|
|
764
|
+
};
|
|
765
|
+
};
|
|
766
|
+
const toJsonOutput = (results, ctxLines, noContent = false) => results.map((r) => ({
|
|
666
767
|
score: r.score,
|
|
667
768
|
file: r.file,
|
|
668
769
|
startLine: r.startLine,
|
|
669
770
|
endLine: r.endLine,
|
|
670
|
-
|
|
671
|
-
...ctxLines > 0 && r.contextBefore && { contextBefore: r.contextBefore },
|
|
672
|
-
...ctxLines > 0 && r.contextAfter && { contextAfter: r.contextAfter }
|
|
771
|
+
...buildContentFields(r, ctxLines, noContent)
|
|
673
772
|
}));
|
|
674
|
-
/**
|
|
773
|
+
/** Build SearchOptions from parsed CLI args, clamping topK. */
|
|
774
|
+
const buildSearchOptions = (top, ignorePath, onlyPath) => {
|
|
775
|
+
const rawValue = Option.getOrElse(top, () => DEFAULT_TOP_K);
|
|
776
|
+
const clamped = clampTopK(rawValue);
|
|
777
|
+
return {
|
|
778
|
+
options: {
|
|
779
|
+
topK: clamped.value,
|
|
780
|
+
...ignorePath.length > 0 && { ignorePaths: [...ignorePath] },
|
|
781
|
+
...onlyPath.length > 0 && { onlyPaths: [...onlyPath] }
|
|
782
|
+
},
|
|
783
|
+
clamped: clamped.clamped,
|
|
784
|
+
rawValue
|
|
785
|
+
};
|
|
786
|
+
};
|
|
787
|
+
/** Render search results via Display — JSON + human-readable text. */
|
|
788
|
+
const renderResults = (d, response, ctxLines, noContent) => Effect.gen(function* () {
|
|
789
|
+
const { results, validationErrors } = response;
|
|
790
|
+
yield* d.json({
|
|
791
|
+
results: toJsonOutput(results, ctxLines, noContent),
|
|
792
|
+
...validationErrors.length > 0 && { validationErrors }
|
|
793
|
+
});
|
|
794
|
+
if (results.length === 0) yield* d.log("No results found", "warn");
|
|
795
|
+
else for (const result of results) yield* d.text(noContent ? formatLocation(result) : formatResult(result));
|
|
796
|
+
});
|
|
797
|
+
/**
|
|
798
|
+
* CLI command: pix query "<text>" [--top N] [--json] [--context-lines N] [--ignore-path P]
|
|
799
|
+
* [--only-path P] [--max-characters N] [--no-content]
|
|
800
|
+
*/
|
|
675
801
|
const queryCommand = Command.make("query", {
|
|
676
802
|
queryText: Args.text({ name: "query" }),
|
|
677
803
|
top: Options.integer("top").pipe(Options.withDefault(DEFAULT_TOP_K), Options.optional),
|
|
678
804
|
json: Options.boolean("json").pipe(Options.withDefault(false)),
|
|
679
|
-
contextLines: Options.integer("context-lines").pipe(Options.withDefault(DEFAULT_CONTEXT_LINES), Options.optional)
|
|
680
|
-
|
|
805
|
+
contextLines: Options.integer("context-lines").pipe(Options.withDefault(DEFAULT_CONTEXT_LINES), Options.optional),
|
|
806
|
+
ignorePath: Options.text("ignore-path").pipe(Options.repeated),
|
|
807
|
+
onlyPath: Options.text("only-path").pipe(Options.repeated),
|
|
808
|
+
maxCharacters: Options.integer("max-characters").pipe(Options.optional),
|
|
809
|
+
noContent: Options.boolean("no-content").pipe(Options.withDefault(false))
|
|
810
|
+
}, ({ queryText, top, contextLines, ignorePath, onlyPath, maxCharacters, noContent }) => Effect.gen(function* () {
|
|
681
811
|
const d = yield* Display;
|
|
682
|
-
const topK = Option.getOrElse(top, () => DEFAULT_TOP_K);
|
|
683
812
|
const ctxLines = Option.getOrElse(contextLines, () => DEFAULT_CONTEXT_LINES);
|
|
684
|
-
const clamped =
|
|
685
|
-
if (clamped
|
|
686
|
-
const
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
813
|
+
const { options: searchOptions, clamped, rawValue } = buildSearchOptions(top, ignorePath, onlyPath);
|
|
814
|
+
if (clamped) yield* d.log(`topK clamped from ${rawValue} to ${searchOptions.topK}`, "warn");
|
|
815
|
+
const searchResponse = yield* d.spinner("Searching...", QueryProject.queryProject(queryText, searchOptions));
|
|
816
|
+
const finalResults = noContent ? searchResponse.results : applyCharBudget(searchResponse.results, Option.getOrUndefined(maxCharacters)).results;
|
|
817
|
+
yield* renderResults(d, {
|
|
818
|
+
...searchResponse,
|
|
819
|
+
results: finalResults
|
|
820
|
+
}, ctxLines, noContent);
|
|
690
821
|
}).pipe(Effect.catchTags({
|
|
691
822
|
ModelLoadError: reportError,
|
|
692
823
|
InferenceError: reportError,
|
|
@@ -695,20 +826,6 @@ const queryCommand = Command.make("query", {
|
|
|
695
826
|
NoIndexError: reportError
|
|
696
827
|
})));
|
|
697
828
|
//#endregion
|
|
698
|
-
//#region src/lib/format.ts
|
|
699
|
-
/** Format byte count as human-readable string (e.g. "1.5 MB") */
|
|
700
|
-
const formatBytes = (bytes) => {
|
|
701
|
-
if (bytes === 0) return "0 B";
|
|
702
|
-
const units = [
|
|
703
|
-
"B",
|
|
704
|
-
"KB",
|
|
705
|
-
"MB",
|
|
706
|
-
"GB"
|
|
707
|
-
];
|
|
708
|
-
const i = Math.min(Math.floor(Math.log(bytes) / Math.log(1024)), units.length - 1);
|
|
709
|
-
return `${(bytes / 1024 ** i).toFixed(1)} ${units[i]}`;
|
|
710
|
-
};
|
|
711
|
-
//#endregion
|
|
712
829
|
//#region src/commands/reset.ts
|
|
713
830
|
/** CLI command: pix reset [--json] */
|
|
714
831
|
const resetCommand = Command.make("reset", { json: Options.boolean("json").pipe(Options.withDefault(false)) }, () => Effect.gen(function* () {
|
|
@@ -747,6 +864,7 @@ const statusCommand = Command.make("status", { json: Options.boolean("json").pip
|
|
|
747
864
|
yield* d.log(`Total lines: ${result.totalLines.toLocaleString()}`, "info");
|
|
748
865
|
yield* d.log(`Index size: ${result.byteSize.toLocaleString()} bytes`, "info");
|
|
749
866
|
yield* d.log(`Last indexed: ${lastIndexStr}`, "info");
|
|
867
|
+
if (result.validationErrors.length > 0) yield* d.log(`Warnings: ${result.validationErrors[0].message}`, "warn");
|
|
750
868
|
}).pipe(Effect.catchTags({ StoreError: reportError })));
|
|
751
869
|
//#endregion
|
|
752
870
|
//#region src/cli.ts
|
|
@@ -822,13 +940,19 @@ const buildChunks = (file, content, config) => {
|
|
|
822
940
|
const text = lines.slice(startLine - 1, endLine).join("\n");
|
|
823
941
|
if (text.length >= MIN_CHUNK_CHARS) {
|
|
824
942
|
const id = crypto.createHash("sha1").update(`${file}:${startLine}`).digest("hex").slice(0, 12);
|
|
943
|
+
const contextBeforeStart = Math.max(0, startLine - 1 - config.overlapLines);
|
|
944
|
+
const contextBefore = lines.slice(contextBeforeStart, startLine - 1).join("\n");
|
|
945
|
+
const contextAfterEnd = Math.min(lines.length, endLine + config.overlapLines);
|
|
946
|
+
const contextAfter = lines.slice(endLine, contextAfterEnd).join("\n");
|
|
825
947
|
chunks.push({
|
|
826
948
|
id,
|
|
827
949
|
idx,
|
|
828
950
|
file,
|
|
829
951
|
startLine,
|
|
830
952
|
endLine,
|
|
831
|
-
text
|
|
953
|
+
text,
|
|
954
|
+
contextBefore: contextBefore || null,
|
|
955
|
+
contextAfter: contextAfter || null
|
|
832
956
|
});
|
|
833
957
|
idx++;
|
|
834
958
|
}
|
|
@@ -855,12 +979,59 @@ const make$5 = Effect.gen(function* () {
|
|
|
855
979
|
});
|
|
856
980
|
const ChunkerLive = Layer.effect(Chunker, make$5);
|
|
857
981
|
//#endregion
|
|
982
|
+
//#region src/lib/platform-error.ts
|
|
983
|
+
/**
|
|
984
|
+
* Check if a platform error has a specific `reason` string (e.g. "BadResource" for disk full,
|
|
985
|
+
* "NotFound" for missing files). Platform errors from @effect/platform include a `reason` property
|
|
986
|
+
* that categorizes the failure.
|
|
987
|
+
*/
|
|
988
|
+
const isPlatformReason = (cause, reason) => typeof cause === "object" && cause !== null && "reason" in cause && String(cause.reason) === reason;
|
|
989
|
+
//#endregion
|
|
990
|
+
//#region src/lib/validation.ts
|
|
991
|
+
const mergeMessages = (messages) => {
|
|
992
|
+
if (messages.length === 1) return messages[0];
|
|
993
|
+
const uniq = [...new Set(messages)];
|
|
994
|
+
if (uniq.every((m) => m.startsWith("Expected"))) {
|
|
995
|
+
const actualMatch = uniq[0].match(/actual (.+)$/);
|
|
996
|
+
const actual = actualMatch ? actualMatch[1] : "";
|
|
997
|
+
return `Expected ${uniq.map((m) => m.replace(/^Expected /, "").replace(/, actual .+$/, "")).join(" | ")}, actual ${actual}`;
|
|
998
|
+
}
|
|
999
|
+
return uniq.join("\n");
|
|
1000
|
+
};
|
|
1001
|
+
const formatSchemaErrors = (error) => {
|
|
1002
|
+
const issues = ParseResult.ArrayFormatter.formatErrorSync(error);
|
|
1003
|
+
const byPath = /* @__PURE__ */ new Map();
|
|
1004
|
+
for (const issue of issues) {
|
|
1005
|
+
const path = issue.path.join(".");
|
|
1006
|
+
if (!byPath.has(path)) byPath.set(path, []);
|
|
1007
|
+
byPath.get(path).push(issue.message);
|
|
1008
|
+
}
|
|
1009
|
+
return Array.from(byPath.entries()).map(([path, messages]) => ({
|
|
1010
|
+
path,
|
|
1011
|
+
message: mergeMessages(messages)
|
|
1012
|
+
}));
|
|
1013
|
+
};
|
|
1014
|
+
const formatSchemaMessage = (error) => ParseResult.TreeFormatter.formatErrorSync(error);
|
|
1015
|
+
const isJsonSyntaxError = (error) => error.issue._tag === "Transformation" && error.issue.kind === "Transformation";
|
|
1016
|
+
const decodeJsonWithErrors = (schema, json) => Schema.decodeUnknown(Schema.parseJson(schema))(json).pipe(Effect.mapError((error) => {
|
|
1017
|
+
const base = {
|
|
1018
|
+
message: formatSchemaMessage(error),
|
|
1019
|
+
errors: formatSchemaErrors(error)
|
|
1020
|
+
};
|
|
1021
|
+
return isJsonSyntaxError(error) ? {
|
|
1022
|
+
...base,
|
|
1023
|
+
_tag: "JsonSyntaxError"
|
|
1024
|
+
} : {
|
|
1025
|
+
...base,
|
|
1026
|
+
_tag: "SchemaValidationError"
|
|
1027
|
+
};
|
|
1028
|
+
}));
|
|
1029
|
+
//#endregion
|
|
858
1030
|
//#region src/services/config-store.ts
|
|
859
1031
|
const CONFIG_DIR = ".pix";
|
|
860
1032
|
const CONFIG_PATH = `${CONFIG_DIR}/config.json`;
|
|
861
|
-
const isPlatformReason$1 = (cause, reason) => typeof cause === "object" && cause !== null && "reason" in cause && String(cause.reason) === reason;
|
|
862
1033
|
const mapConfigWriteError = (cause, path, action) => {
|
|
863
|
-
if (isPlatformReason
|
|
1034
|
+
if (isPlatformReason(cause, "BadResource")) return new DiskFullError({
|
|
864
1035
|
message: `Disk full: could not ${action}`,
|
|
865
1036
|
path,
|
|
866
1037
|
cause
|
|
@@ -873,13 +1044,17 @@ const mapConfigWriteError = (cause, path, action) => {
|
|
|
873
1044
|
const make$4 = Effect.gen(function* () {
|
|
874
1045
|
const fs = yield* FileSystem.FileSystem;
|
|
875
1046
|
const writeConfig = (config) => Effect.gen(function* () {
|
|
876
|
-
const
|
|
1047
|
+
const encodeJson = Schema.parseJson(ConfigSchema, { space: 2 });
|
|
1048
|
+
const configJson = yield* Schema.encode(encodeJson)(config).pipe(Effect.mapError((e) => new ConfigError({
|
|
1049
|
+
message: "Failed to encode config",
|
|
1050
|
+
cause: e
|
|
1051
|
+
})));
|
|
877
1052
|
yield* fs.makeDirectory(CONFIG_DIR, { recursive: true }).pipe(Effect.mapError((cause) => mapConfigWriteError(cause, CONFIG_DIR, "create .pix directory")));
|
|
878
1053
|
yield* fs.writeFileString(CONFIG_PATH, configJson).pipe(Effect.mapError((cause) => mapConfigWriteError(cause, CONFIG_PATH, "write config.json")));
|
|
879
1054
|
});
|
|
880
1055
|
const readConfig = () => Effect.gen(function* () {
|
|
881
|
-
|
|
882
|
-
if (isPlatformReason
|
|
1056
|
+
return yield* decodeJsonWithErrors(ConfigSchema, yield* fs.readFileString(CONFIG_PATH).pipe(Effect.mapError((cause) => {
|
|
1057
|
+
if (isPlatformReason(cause, "NotFound")) return new ConfigNotFoundError({
|
|
883
1058
|
message: "Config file not found. Run pix init first.",
|
|
884
1059
|
path: CONFIG_PATH,
|
|
885
1060
|
cause
|
|
@@ -888,15 +1063,17 @@ const make$4 = Effect.gen(function* () {
|
|
|
888
1063
|
message: "Failed to read config.json",
|
|
889
1064
|
cause
|
|
890
1065
|
});
|
|
891
|
-
}))
|
|
892
|
-
|
|
893
|
-
try: () => JSON.parse(content),
|
|
894
|
-
catch: (error) => new ConfigMalformedError({
|
|
1066
|
+
}))).pipe(Effect.mapError((err) => {
|
|
1067
|
+
if (err._tag === "JsonSyntaxError") return new ConfigMalformedError({
|
|
895
1068
|
message: "Invalid JSON in config.json",
|
|
896
1069
|
path: CONFIG_PATH,
|
|
897
|
-
cause:
|
|
898
|
-
})
|
|
899
|
-
|
|
1070
|
+
cause: err
|
|
1071
|
+
});
|
|
1072
|
+
return new ConfigValidationError({
|
|
1073
|
+
message: err.message,
|
|
1074
|
+
errors: err.errors
|
|
1075
|
+
});
|
|
1076
|
+
}));
|
|
900
1077
|
});
|
|
901
1078
|
const configExists = () => Effect.gen(function* () {
|
|
902
1079
|
return yield* fs.exists(CONFIG_PATH);
|
|
@@ -914,10 +1091,7 @@ const make$3 = Effect.gen(function* () {
|
|
|
914
1091
|
const fs = yield* FileSystem.FileSystem;
|
|
915
1092
|
const processorMap = buildProcessorMap([]);
|
|
916
1093
|
const extract = (file) => {
|
|
917
|
-
const
|
|
918
|
-
const name = lastSlash >= 0 ? file.slice(lastSlash + 1) : file;
|
|
919
|
-
const dotIndex = name.lastIndexOf(".");
|
|
920
|
-
const ext = dotIndex === -1 ? name.toLowerCase() : name.slice(dotIndex).toLowerCase();
|
|
1094
|
+
const ext = getExtension(file);
|
|
921
1095
|
const processor = processorMap[ext];
|
|
922
1096
|
if (!processor) return Effect.fail({
|
|
923
1097
|
_tag: "UnsupportedFormat",
|
|
@@ -1207,11 +1381,46 @@ const make$1 = Effect.gen(function* () {
|
|
|
1207
1381
|
});
|
|
1208
1382
|
const ScannerLive = Layer.effect(Scanner, make$1);
|
|
1209
1383
|
//#endregion
|
|
1384
|
+
//#region src/domain/chunk.ts
|
|
1385
|
+
/** Runtime schema for persisted/searchable chunk entries. */
|
|
1386
|
+
const ChunkSchema = Schema.Struct({
|
|
1387
|
+
id: Schema.String,
|
|
1388
|
+
idx: Schema.Number,
|
|
1389
|
+
file: Schema.String,
|
|
1390
|
+
startLine: Schema.Number,
|
|
1391
|
+
endLine: Schema.Number,
|
|
1392
|
+
text: Schema.String,
|
|
1393
|
+
contextBefore: Schema.Union(Schema.String, Schema.Null),
|
|
1394
|
+
contextAfter: Schema.Union(Schema.String, Schema.Null)
|
|
1395
|
+
});
|
|
1396
|
+
//#endregion
|
|
1210
1397
|
//#region src/services/vector-store.ts
|
|
1398
|
+
const parseChunkLine = (line) => {
|
|
1399
|
+
try {
|
|
1400
|
+
return Option.some(Schema.decodeUnknownSync(Schema.parseJson(ChunkSchema))(line));
|
|
1401
|
+
} catch {
|
|
1402
|
+
return Option.none();
|
|
1403
|
+
}
|
|
1404
|
+
};
|
|
1405
|
+
/** Compute dot-product similarity between a chunk vector and the query embedding. */
|
|
1406
|
+
const computeDotProduct = (chunkVector, query) => {
|
|
1407
|
+
let dot = 0;
|
|
1408
|
+
for (let j = 0; j < query.dims; j++) dot += chunkVector[j] * query.vector[j];
|
|
1409
|
+
return dot;
|
|
1410
|
+
};
|
|
1211
1411
|
const STORE_DIR = ".pix";
|
|
1212
1412
|
const CHUNKS_FILE = `${STORE_DIR}/chunks.jsonl`;
|
|
1213
1413
|
const VECTORS_FILE = `${STORE_DIR}/vectors.bin`;
|
|
1214
|
-
|
|
1414
|
+
/** Pre-built Schema instance for chunk encode/decode. */
|
|
1415
|
+
const parseJsonChunk = Schema.parseJson(ChunkSchema);
|
|
1416
|
+
/** Build ChunkValidationError array from malformed line count, or [] if none. */
|
|
1417
|
+
const buildChunkValidationErrors = (malformedLines) => malformedLines > 0 ? [new ChunkValidationError({
|
|
1418
|
+
message: `Skipped ${malformedLines} malformed chunk line(s) in chunks.jsonl`,
|
|
1419
|
+
errors: [{
|
|
1420
|
+
path: "chunks.jsonl",
|
|
1421
|
+
message: `${malformedLines} line(s) failed schema validation`
|
|
1422
|
+
}]
|
|
1423
|
+
})] : [];
|
|
1215
1424
|
/**
|
|
1216
1425
|
* FileSystem adapter for VectorStore port. Reads from chunks.jsonl and vectors.bin to provide index
|
|
1217
1426
|
* statistics.
|
|
@@ -1238,22 +1447,30 @@ const make = Effect.gen(function* () {
|
|
|
1238
1447
|
* Count total lines across all chunks in chunks.jsonl. Each line is a JSON object; the 'text'
|
|
1239
1448
|
* field contains the source code.
|
|
1240
1449
|
*/
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
return sum + JSON.parse(line).text.split("\n").length;
|
|
1244
|
-
} catch {
|
|
1245
|
-
return sum;
|
|
1246
|
-
}
|
|
1247
|
-
}, 0);
|
|
1248
|
-
/** Count unique files across all chunks in chunks.jsonl. */
|
|
1249
|
-
const countUniqueFiles = (lines) => {
|
|
1450
|
+
/** Count files, total lines, and malformed lines in a single pass. */
|
|
1451
|
+
const countChunkStats = (lines) => {
|
|
1250
1452
|
const files = /* @__PURE__ */ new Set();
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
|
|
1254
|
-
|
|
1255
|
-
|
|
1453
|
+
let totalLines = 0;
|
|
1454
|
+
let malformedLines = 0;
|
|
1455
|
+
for (const line of lines) {
|
|
1456
|
+
const chunk = parseChunkLine(line);
|
|
1457
|
+
if (Option.isSome(chunk)) {
|
|
1458
|
+
files.add(chunk.value.file);
|
|
1459
|
+
totalLines += chunk.value.text.split("\n").length;
|
|
1460
|
+
} else malformedLines++;
|
|
1461
|
+
}
|
|
1462
|
+
return {
|
|
1463
|
+
files,
|
|
1464
|
+
totalLines,
|
|
1465
|
+
malformedLines
|
|
1466
|
+
};
|
|
1256
1467
|
};
|
|
1468
|
+
/** Check that index files exist; fail with NoIndexError if either is missing. */
|
|
1469
|
+
const requireIndex = () => Effect.gen(function* () {
|
|
1470
|
+
const chunksExists = yield* withReadError(fs.exists(CHUNKS_FILE), "check chunks file");
|
|
1471
|
+
const vectorsExists = yield* withReadError(fs.exists(VECTORS_FILE), "check vectors file");
|
|
1472
|
+
if (!chunksExists || !vectorsExists) return yield* new NoIndexError({ message: "No index found. Run pix index first." });
|
|
1473
|
+
});
|
|
1257
1474
|
const toStoreError = (operation, path) => (cause) => {
|
|
1258
1475
|
if (isPlatformReason(cause, "BadResource")) return new DiskFullError({
|
|
1259
1476
|
message: `Disk full during ${operation}`,
|
|
@@ -1309,14 +1526,10 @@ const make = Effect.gen(function* () {
|
|
|
1309
1526
|
if (yield* withStoreError(fs.exists(vectorsTemp), "check vectors temp")) yield* withStoreError(fs.remove(vectorsTemp), "clean stale vectors temp", vectorsTemp);
|
|
1310
1527
|
});
|
|
1311
1528
|
const storeBatch = (chunks, embeddings) => Effect.gen(function* () {
|
|
1312
|
-
const content =
|
|
1313
|
-
|
|
1314
|
-
|
|
1315
|
-
|
|
1316
|
-
startLine: c.startLine,
|
|
1317
|
-
endLine: c.endLine,
|
|
1318
|
-
text: c.text
|
|
1319
|
-
})).join("\n") + "\n";
|
|
1529
|
+
const content = (yield* Effect.forEach(chunks, (c) => Schema.encode(parseJsonChunk)(c).pipe(Effect.mapError((e) => new StoreError({
|
|
1530
|
+
message: "Failed to encode chunk",
|
|
1531
|
+
cause: e
|
|
1532
|
+
}))))).join("\n") + "\n";
|
|
1320
1533
|
yield* withStoreError(fs.writeFile(chunksTemp, Buffer.from(content), { flag: "a" }), "append chunks", chunksTemp);
|
|
1321
1534
|
const buffer = serializeVectors(embeddings);
|
|
1322
1535
|
yield* withStoreError(fs.writeFile(vectorsTemp, buffer, { flag: "a" }), "append vectors", vectorsTemp);
|
|
@@ -1353,37 +1566,39 @@ const make = Effect.gen(function* () {
|
|
|
1353
1566
|
const store = (chunks, embeddings) => Effect.gen(function* () {
|
|
1354
1567
|
yield* ensureDirExists(STORE_DIR, ".pix directory");
|
|
1355
1568
|
const chunksTemp = `${CHUNKS_FILE}.tmp`;
|
|
1356
|
-
const
|
|
1357
|
-
|
|
1358
|
-
|
|
1359
|
-
|
|
1360
|
-
|
|
1361
|
-
endLine: c.endLine,
|
|
1362
|
-
text: c.text
|
|
1363
|
-
}));
|
|
1364
|
-
yield* withStoreError(fs.writeFileString(chunksTemp, chunksLines.join("\n")), "write chunks", chunksTemp);
|
|
1569
|
+
const chunksJson = (yield* Effect.forEach(chunks, (c) => Schema.encode(parseJsonChunk)(c).pipe(Effect.mapError((e) => new StoreError({
|
|
1570
|
+
message: "Failed to encode chunk",
|
|
1571
|
+
cause: e
|
|
1572
|
+
}))))).join("\n");
|
|
1573
|
+
yield* withStoreError(fs.writeFileString(chunksTemp, chunksJson), "write chunks", chunksTemp);
|
|
1365
1574
|
yield* withStoreError(fs.rename(chunksTemp, CHUNKS_FILE), "commit chunks", CHUNKS_FILE);
|
|
1366
1575
|
const vectorsTemp = `${VECTORS_FILE}.tmp`;
|
|
1367
1576
|
const buffer = serializeVectors(embeddings);
|
|
1368
1577
|
yield* withStoreError(fs.writeFile(vectorsTemp, buffer), "write vectors", vectorsTemp);
|
|
1369
1578
|
yield* withStoreError(fs.rename(vectorsTemp, VECTORS_FILE), "commit vectors", VECTORS_FILE);
|
|
1370
1579
|
});
|
|
1371
|
-
const search = (query,
|
|
1372
|
-
|
|
1373
|
-
const vectorsExists = yield* withReadError(fs.exists(VECTORS_FILE), "check vectors file");
|
|
1374
|
-
if (!chunksExists || !vectorsExists) return yield* new NoIndexError({ message: "No index found. Run pix index first." });
|
|
1580
|
+
const search = (query, options) => Effect.gen(function* () {
|
|
1581
|
+
yield* requireIndex();
|
|
1375
1582
|
const chunkLines = (yield* withReadError(fs.readFileString(CHUNKS_FILE), "read chunks", CHUNKS_FILE)).split("\n").filter((l) => l.trim().length > 0);
|
|
1376
1583
|
const vectorsBuffer = yield* withReadError(fs.readFile(VECTORS_FILE), "read vectors", VECTORS_FILE);
|
|
1377
|
-
const vectors = new Float32Array(vectorsBuffer.buffer);
|
|
1584
|
+
const vectors = new Float32Array(vectorsBuffer.buffer, vectorsBuffer.byteOffset, vectorsBuffer.byteLength / Float32Array.BYTES_PER_ELEMENT);
|
|
1585
|
+
const ignoreIg = options?.ignorePaths?.length ? ignore().add([...options.ignorePaths]) : null;
|
|
1586
|
+
const onlyIg = options?.onlyPaths?.length ? ignore().add([...options.onlyPaths]) : null;
|
|
1378
1587
|
const results = [];
|
|
1379
|
-
|
|
1380
|
-
|
|
1588
|
+
let malformedLines = 0;
|
|
1589
|
+
for (let i = 0; i < chunkLines.length; i++) {
|
|
1590
|
+
const parsed = parseChunkLine(chunkLines[i]);
|
|
1591
|
+
if (Option.isNone(parsed)) {
|
|
1592
|
+
malformedLines++;
|
|
1593
|
+
continue;
|
|
1594
|
+
}
|
|
1595
|
+
const chunk = parsed.value;
|
|
1596
|
+
if (ignoreIg && ignoreIg.ignores(chunk.file)) continue;
|
|
1597
|
+
if (onlyIg && !onlyIg.ignores(chunk.file)) continue;
|
|
1381
1598
|
const startIdx = i * query.dims;
|
|
1382
|
-
const
|
|
1383
|
-
let dotProduct = 0;
|
|
1384
|
-
for (let j = 0; j < query.dims; j++) dotProduct += chunkVector[j] * query.vector[j];
|
|
1599
|
+
const score = computeDotProduct(vectors.slice(startIdx, startIdx + query.dims), query);
|
|
1385
1600
|
results.push({
|
|
1386
|
-
score
|
|
1601
|
+
score,
|
|
1387
1602
|
file: chunk.file,
|
|
1388
1603
|
startLine: chunk.startLine,
|
|
1389
1604
|
endLine: chunk.endLine,
|
|
@@ -1391,9 +1606,19 @@ const make = Effect.gen(function* () {
|
|
|
1391
1606
|
contextBefore: chunk.contextBefore,
|
|
1392
1607
|
contextAfter: chunk.contextAfter
|
|
1393
1608
|
});
|
|
1394
|
-
}
|
|
1609
|
+
}
|
|
1610
|
+
const validationErrors = buildChunkValidationErrors(malformedLines);
|
|
1395
1611
|
results.sort((a, b) => b.score - a.score);
|
|
1396
|
-
|
|
1612
|
+
const topK = options?.topK;
|
|
1613
|
+
if (topK == null) return {
|
|
1614
|
+
results,
|
|
1615
|
+
validationErrors
|
|
1616
|
+
};
|
|
1617
|
+
const clamped = Math.max(0, Math.min(Math.floor(topK), results.length));
|
|
1618
|
+
return {
|
|
1619
|
+
results: results.slice(0, clamped),
|
|
1620
|
+
validationErrors
|
|
1621
|
+
};
|
|
1397
1622
|
});
|
|
1398
1623
|
const getStatus = () => Effect.gen(function* () {
|
|
1399
1624
|
const chunksExists = yield* withReadError(fs.exists(CHUNKS_FILE), "check chunks file");
|
|
@@ -1404,13 +1629,15 @@ const make = Effect.gen(function* () {
|
|
|
1404
1629
|
model: "",
|
|
1405
1630
|
lastIndex: 0,
|
|
1406
1631
|
totalLines: 0,
|
|
1407
|
-
byteSize: 0
|
|
1632
|
+
byteSize: 0,
|
|
1633
|
+
validationErrors: []
|
|
1408
1634
|
};
|
|
1409
1635
|
const lines = (yield* withReadError(fs.readFileString(CHUNKS_FILE), "read chunks", CHUNKS_FILE)).split("\n").filter((l) => l.trim().length > 0);
|
|
1410
|
-
const
|
|
1411
|
-
const
|
|
1636
|
+
const { files: uniqueFiles, totalLines, malformedLines } = countChunkStats(lines);
|
|
1637
|
+
const chunks = lines.length - malformedLines;
|
|
1638
|
+
const files = uniqueFiles.size;
|
|
1412
1639
|
const model = "";
|
|
1413
|
-
const
|
|
1640
|
+
const validationErrors = buildChunkValidationErrors(malformedLines);
|
|
1414
1641
|
const vectorsStat = yield* withReadError(fs.stat(VECTORS_FILE), "stat vectors", VECTORS_FILE);
|
|
1415
1642
|
const byteSize = "size" in vectorsStat ? Number(vectorsStat.size) : 0;
|
|
1416
1643
|
return {
|
|
@@ -1419,7 +1646,8 @@ const make = Effect.gen(function* () {
|
|
|
1419
1646
|
model,
|
|
1420
1647
|
lastIndex: Option.map(vectorsStat?.mtime ?? Option.none(), (d) => d instanceof Date ? d.getTime() : 0).pipe(Option.getOrElse(() => 0)),
|
|
1421
1648
|
totalLines,
|
|
1422
|
-
byteSize
|
|
1649
|
+
byteSize,
|
|
1650
|
+
validationErrors
|
|
1423
1651
|
};
|
|
1424
1652
|
});
|
|
1425
1653
|
const reset = () => Effect.gen(function* () {
|