@lucas-bur/pix 0.11.0 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -7
- package/dist/index.mjs +188 -75
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -19,13 +19,13 @@ pix query "authentication middleware"
|
|
|
19
19
|
|
|
20
20
|
## Commands
|
|
21
21
|
|
|
22
|
-
| Command
|
|
23
|
-
|
|
|
24
|
-
| `pix init`
|
|
25
|
-
| `pix index`
|
|
26
|
-
| `pix query "<text>"` | Semantic search via cosine similarity
|
|
27
|
-
| `pix status`
|
|
28
|
-
| `pix reset`
|
|
22
|
+
| Command | Description | JSON flag |
|
|
23
|
+
| ---------------------------- | -------------------------------------------------------------------------------------------------------------------------------------- | --------- |
|
|
24
|
+
| `pix init` | Create `.pix/config.json` with defaults | `--json` |
|
|
25
|
+
| `pix index` | Scan, chunk, embed, and store project files | `--json` |
|
|
26
|
+
| `pix query "<text>" [flags]` | Semantic search via cosine similarity (`--top`, `--context-lines`, `--ignore-path`, `--only-path`, `--max-characters`, `--no-content`) | `--json` |
|
|
27
|
+
| `pix status` | Show index statistics | `--json` |
|
|
28
|
+
| `pix reset` | Delete index files (chunks + vectors) | `--json` |
|
|
29
29
|
|
|
30
30
|
All commands support `--json` for structured output on stdout — ideal for piping to AI agents.
|
|
31
31
|
|
package/dist/index.mjs
CHANGED
|
@@ -184,6 +184,32 @@ const DEFAULT_CONFIG = {
|
|
|
184
184
|
}
|
|
185
185
|
};
|
|
186
186
|
//#endregion
|
|
187
|
+
//#region src/lib/extension.ts
|
|
188
|
+
/** Extract the last path segment (filename) from a file path. Handles both `/` and `\\` separators. */
|
|
189
|
+
const getFilename = (path) => {
|
|
190
|
+
const sepIndex = Math.max(path.lastIndexOf("/"), path.lastIndexOf("\\"));
|
|
191
|
+
return sepIndex >= 0 ? path.slice(sepIndex + 1) : path;
|
|
192
|
+
};
|
|
193
|
+
/**
|
|
194
|
+
* Extract the lowercase extension (including dot) from a file path. Used for processor dispatch.
|
|
195
|
+
* Strips the directory, then returns the part after the last dot. If no dot, returns the full
|
|
196
|
+
* filename lowercased.
|
|
197
|
+
*/
|
|
198
|
+
const getExtension = (file) => {
|
|
199
|
+
const name = getFilename(file);
|
|
200
|
+
const dotIndex = name.lastIndexOf(".");
|
|
201
|
+
if (dotIndex === -1) return name.toLowerCase();
|
|
202
|
+
return name.slice(dotIndex).toLowerCase();
|
|
203
|
+
};
|
|
204
|
+
/**
|
|
205
|
+
* Extract the extension from a filename (not full path). Returns `"(no extension)"` if no dot
|
|
206
|
+
* exists. Used for display grouping of skipped files.
|
|
207
|
+
*/
|
|
208
|
+
const getFileExtension = (filename) => {
|
|
209
|
+
const dotIndex = filename.lastIndexOf(".");
|
|
210
|
+
return dotIndex >= 0 ? filename.slice(dotIndex) : "(no extension)";
|
|
211
|
+
};
|
|
212
|
+
//#endregion
|
|
187
213
|
//#region src/domain/errors.ts
|
|
188
214
|
/** Config file or directory does not exist. Run pix init first. */
|
|
189
215
|
var ConfigNotFoundError = class extends Data.TaggedError("ConfigNotFoundError") {};
|
|
@@ -315,13 +341,6 @@ const deriveEffectiveConfig = (opts, config) => ({
|
|
|
315
341
|
ignoredPaths: opts.ignorePaths ? [...config.ignoredPaths ?? DEFAULT_CONFIG.ignoredPaths, ...opts.ignorePaths] : config.ignoredPaths ?? DEFAULT_CONFIG.ignoredPaths,
|
|
316
342
|
ignoreGitignore: opts.ignoreGitignore ?? config.ignoreGitignore ?? false
|
|
317
343
|
});
|
|
318
|
-
function getExtension(file) {
|
|
319
|
-
const lastSlash = file.lastIndexOf("/");
|
|
320
|
-
const name = lastSlash >= 0 ? file.slice(lastSlash + 1) : file;
|
|
321
|
-
const dotIndex = name.lastIndexOf(".");
|
|
322
|
-
if (dotIndex === -1) return name.toLowerCase();
|
|
323
|
-
return name.slice(dotIndex).toLowerCase();
|
|
324
|
-
}
|
|
325
344
|
const classifyFiles = (files, processorMap) => {
|
|
326
345
|
const knownFiles = [];
|
|
327
346
|
const skippedFiles = [];
|
|
@@ -444,11 +463,6 @@ var IndexProject = class extends Effect.Service()("IndexProject", {
|
|
|
444
463
|
return { index };
|
|
445
464
|
})
|
|
446
465
|
}) {};
|
|
447
|
-
const getFilename = (path) => path.split("/").pop() ?? path;
|
|
448
|
-
const getFileExtension = (filename) => {
|
|
449
|
-
const dotIndex = filename.lastIndexOf(".");
|
|
450
|
-
return dotIndex >= 0 ? filename.slice(dotIndex) : "(no extension)";
|
|
451
|
-
};
|
|
452
466
|
const groupByExtension = (entries) => {
|
|
453
467
|
const byExt = /* @__PURE__ */ new Map();
|
|
454
468
|
for (const s of entries) {
|
|
@@ -504,7 +518,7 @@ var QueryProject = class extends Effect.Service()("QueryProject", {
|
|
|
504
518
|
effect: Effect.gen(function* () {
|
|
505
519
|
const embedder = yield* Embedder;
|
|
506
520
|
const store = yield* VectorStore;
|
|
507
|
-
const queryProject = (queryText,
|
|
521
|
+
const queryProject = (queryText, options) => embedder.embed(queryText).pipe(Effect.flatMap((embedding) => store.search(embedding, options)));
|
|
508
522
|
return { queryProject };
|
|
509
523
|
})
|
|
510
524
|
}) {};
|
|
@@ -636,6 +650,51 @@ const initCommand = Command.make("init", { json: Options.boolean("json").pipe(Op
|
|
|
636
650
|
DiskFullError: reportError
|
|
637
651
|
})));
|
|
638
652
|
//#endregion
|
|
653
|
+
//#region src/lib/format.ts
|
|
654
|
+
/** Format byte count as human-readable string (e.g. "1.5 MB") */
|
|
655
|
+
const formatBytes = (bytes) => {
|
|
656
|
+
if (bytes === 0) return "0 B";
|
|
657
|
+
const units = [
|
|
658
|
+
"B",
|
|
659
|
+
"KB",
|
|
660
|
+
"MB",
|
|
661
|
+
"GB"
|
|
662
|
+
];
|
|
663
|
+
const i = Math.min(Math.floor(Math.log(bytes) / Math.log(1024)), units.length - 1);
|
|
664
|
+
return `${(bytes / 1024 ** i).toFixed(1)} ${units[i]}`;
|
|
665
|
+
};
|
|
666
|
+
/**
|
|
667
|
+
* Apply a character budget to search results. Returns results in score order capped by the budget.
|
|
668
|
+
* The last result may be truncated to fit the remaining budget. Character count includes file path,
|
|
669
|
+
* line numbers, chunk text, and context lines.
|
|
670
|
+
*/
|
|
671
|
+
const applyCharBudget = (results, maxChars) => {
|
|
672
|
+
if (!maxChars || maxChars <= 0) return { results };
|
|
673
|
+
const budgeted = [];
|
|
674
|
+
let remaining = maxChars;
|
|
675
|
+
for (const result of results) {
|
|
676
|
+
const indicator = " [...]";
|
|
677
|
+
const metadata = `${result.file}:${result.startLine}-${result.endLine}\n`;
|
|
678
|
+
const chars = `${metadata}${result.text}${result.contextBefore ? `\n${result.contextBefore}` : ""}${result.contextAfter ? `\n${result.contextAfter}` : ""}`.length;
|
|
679
|
+
if (chars <= remaining) {
|
|
680
|
+
budgeted.push(result);
|
|
681
|
+
remaining -= chars;
|
|
682
|
+
} else {
|
|
683
|
+
const textBudget = remaining - metadata.length - 6;
|
|
684
|
+
if (textBudget <= 0) break;
|
|
685
|
+
const truncated = result.text.slice(0, textBudget);
|
|
686
|
+
budgeted.push({
|
|
687
|
+
...result,
|
|
688
|
+
text: `${truncated}${indicator}`,
|
|
689
|
+
contextBefore: null,
|
|
690
|
+
contextAfter: null
|
|
691
|
+
});
|
|
692
|
+
break;
|
|
693
|
+
}
|
|
694
|
+
}
|
|
695
|
+
return { results: budgeted };
|
|
696
|
+
};
|
|
697
|
+
//#endregion
|
|
639
698
|
//#region src/commands/query.ts
|
|
640
699
|
const DEFAULT_TOP_K = 5;
|
|
641
700
|
const DEFAULT_CONTEXT_LINES = 0;
|
|
@@ -662,31 +721,64 @@ const formatResult = (result) => {
|
|
|
662
721
|
const contextAfter = result.contextAfter ? `\n${result.contextAfter}` : "";
|
|
663
722
|
return `${result.file}:${result.startLine}-${result.endLine} (score: ${result.score.toFixed(3)})${contextBefore}\n${result.text}${contextAfter}`;
|
|
664
723
|
};
|
|
665
|
-
|
|
724
|
+
/** Format a result as a lightweight location reference (no text content). */
|
|
725
|
+
const formatLocation = (result) => `${result.file}:${result.startLine}-${result.endLine} (score: ${result.score.toFixed(3)})`;
|
|
726
|
+
/** Build optional content fields for a single JSON output entry. */
|
|
727
|
+
const buildContentFields = (r, ctxLines, noContent) => {
|
|
728
|
+
if (noContent) return {};
|
|
729
|
+
return {
|
|
730
|
+
text: r.text,
|
|
731
|
+
...ctxLines > 0 && r.contextBefore && { contextBefore: r.contextBefore },
|
|
732
|
+
...ctxLines > 0 && r.contextAfter && { contextAfter: r.contextAfter }
|
|
733
|
+
};
|
|
734
|
+
};
|
|
735
|
+
const toJsonOutput = (results, ctxLines, noContent = false) => results.map((r) => ({
|
|
666
736
|
score: r.score,
|
|
667
737
|
file: r.file,
|
|
668
738
|
startLine: r.startLine,
|
|
669
739
|
endLine: r.endLine,
|
|
670
|
-
|
|
671
|
-
...ctxLines > 0 && r.contextBefore && { contextBefore: r.contextBefore },
|
|
672
|
-
...ctxLines > 0 && r.contextAfter && { contextAfter: r.contextAfter }
|
|
740
|
+
...buildContentFields(r, ctxLines, noContent)
|
|
673
741
|
}));
|
|
674
|
-
/**
|
|
742
|
+
/** Build SearchOptions from parsed CLI args, clamping topK. */
|
|
743
|
+
const buildSearchOptions = (top, ignorePath, onlyPath) => {
|
|
744
|
+
const rawValue = Option.getOrElse(top, () => DEFAULT_TOP_K);
|
|
745
|
+
const clamped = clampTopK(rawValue);
|
|
746
|
+
return {
|
|
747
|
+
options: {
|
|
748
|
+
topK: clamped.value,
|
|
749
|
+
...ignorePath.length > 0 && { ignorePaths: [...ignorePath] },
|
|
750
|
+
...onlyPath.length > 0 && { onlyPaths: [...onlyPath] }
|
|
751
|
+
},
|
|
752
|
+
clamped: clamped.clamped,
|
|
753
|
+
rawValue
|
|
754
|
+
};
|
|
755
|
+
};
|
|
756
|
+
/** Render search results via Display — JSON + human-readable text. */
|
|
757
|
+
const renderResults = (d, results, ctxLines, noContent) => Effect.gen(function* () {
|
|
758
|
+
yield* d.json(toJsonOutput(results, ctxLines, noContent));
|
|
759
|
+
if (results.length === 0) yield* d.log("No results found", "warn");
|
|
760
|
+
else for (const result of results) yield* d.text(noContent ? formatLocation(result) : formatResult(result));
|
|
761
|
+
});
|
|
762
|
+
/**
|
|
763
|
+
* CLI command: pix query "<text>" [--top N] [--json] [--context-lines N] [--ignore-path P]
|
|
764
|
+
* [--only-path P] [--max-characters N] [--no-content]
|
|
765
|
+
*/
|
|
675
766
|
const queryCommand = Command.make("query", {
|
|
676
767
|
queryText: Args.text({ name: "query" }),
|
|
677
768
|
top: Options.integer("top").pipe(Options.withDefault(DEFAULT_TOP_K), Options.optional),
|
|
678
769
|
json: Options.boolean("json").pipe(Options.withDefault(false)),
|
|
679
|
-
contextLines: Options.integer("context-lines").pipe(Options.withDefault(DEFAULT_CONTEXT_LINES), Options.optional)
|
|
680
|
-
|
|
770
|
+
contextLines: Options.integer("context-lines").pipe(Options.withDefault(DEFAULT_CONTEXT_LINES), Options.optional),
|
|
771
|
+
ignorePath: Options.text("ignore-path").pipe(Options.repeated),
|
|
772
|
+
onlyPath: Options.text("only-path").pipe(Options.repeated),
|
|
773
|
+
maxCharacters: Options.integer("max-characters").pipe(Options.optional),
|
|
774
|
+
noContent: Options.boolean("no-content").pipe(Options.withDefault(false))
|
|
775
|
+
}, ({ queryText, top, contextLines, ignorePath, onlyPath, maxCharacters, noContent }) => Effect.gen(function* () {
|
|
681
776
|
const d = yield* Display;
|
|
682
|
-
const topK = Option.getOrElse(top, () => DEFAULT_TOP_K);
|
|
683
777
|
const ctxLines = Option.getOrElse(contextLines, () => DEFAULT_CONTEXT_LINES);
|
|
684
|
-
const clamped =
|
|
685
|
-
if (clamped
|
|
686
|
-
const results = yield* d.spinner("Searching...", QueryProject.queryProject(queryText,
|
|
687
|
-
yield* d.
|
|
688
|
-
if (results.length === 0) yield* d.log("No results found", "warn");
|
|
689
|
-
else for (const result of results) yield* d.text(formatResult(result));
|
|
778
|
+
const { options: searchOptions, clamped, rawValue } = buildSearchOptions(top, ignorePath, onlyPath);
|
|
779
|
+
if (clamped) yield* d.log(`topK clamped from ${rawValue} to ${searchOptions.topK}`, "warn");
|
|
780
|
+
const results = yield* d.spinner("Searching...", QueryProject.queryProject(queryText, searchOptions));
|
|
781
|
+
yield* renderResults(d, noContent ? results : applyCharBudget(results, Option.getOrUndefined(maxCharacters)).results, ctxLines, noContent);
|
|
690
782
|
}).pipe(Effect.catchTags({
|
|
691
783
|
ModelLoadError: reportError,
|
|
692
784
|
InferenceError: reportError,
|
|
@@ -695,20 +787,6 @@ const queryCommand = Command.make("query", {
|
|
|
695
787
|
NoIndexError: reportError
|
|
696
788
|
})));
|
|
697
789
|
//#endregion
|
|
698
|
-
//#region src/lib/format.ts
|
|
699
|
-
/** Format byte count as human-readable string (e.g. "1.5 MB") */
|
|
700
|
-
const formatBytes = (bytes) => {
|
|
701
|
-
if (bytes === 0) return "0 B";
|
|
702
|
-
const units = [
|
|
703
|
-
"B",
|
|
704
|
-
"KB",
|
|
705
|
-
"MB",
|
|
706
|
-
"GB"
|
|
707
|
-
];
|
|
708
|
-
const i = Math.min(Math.floor(Math.log(bytes) / Math.log(1024)), units.length - 1);
|
|
709
|
-
return `${(bytes / 1024 ** i).toFixed(1)} ${units[i]}`;
|
|
710
|
-
};
|
|
711
|
-
//#endregion
|
|
712
790
|
//#region src/commands/reset.ts
|
|
713
791
|
/** CLI command: pix reset [--json] */
|
|
714
792
|
const resetCommand = Command.make("reset", { json: Options.boolean("json").pipe(Options.withDefault(false)) }, () => Effect.gen(function* () {
|
|
@@ -822,13 +900,19 @@ const buildChunks = (file, content, config) => {
|
|
|
822
900
|
const text = lines.slice(startLine - 1, endLine).join("\n");
|
|
823
901
|
if (text.length >= MIN_CHUNK_CHARS) {
|
|
824
902
|
const id = crypto.createHash("sha1").update(`${file}:${startLine}`).digest("hex").slice(0, 12);
|
|
903
|
+
const contextBeforeStart = Math.max(0, startLine - 1 - config.overlapLines);
|
|
904
|
+
const contextBefore = lines.slice(contextBeforeStart, startLine - 1).join("\n");
|
|
905
|
+
const contextAfterEnd = Math.min(lines.length, endLine + config.overlapLines);
|
|
906
|
+
const contextAfter = lines.slice(endLine, contextAfterEnd).join("\n");
|
|
825
907
|
chunks.push({
|
|
826
908
|
id,
|
|
827
909
|
idx,
|
|
828
910
|
file,
|
|
829
911
|
startLine,
|
|
830
912
|
endLine,
|
|
831
|
-
text
|
|
913
|
+
text,
|
|
914
|
+
contextBefore: contextBefore || null,
|
|
915
|
+
contextAfter: contextAfter || null
|
|
832
916
|
});
|
|
833
917
|
idx++;
|
|
834
918
|
}
|
|
@@ -855,12 +939,19 @@ const make$5 = Effect.gen(function* () {
|
|
|
855
939
|
});
|
|
856
940
|
const ChunkerLive = Layer.effect(Chunker, make$5);
|
|
857
941
|
//#endregion
|
|
942
|
+
//#region src/lib/platform-error.ts
|
|
943
|
+
/**
|
|
944
|
+
* Check if a platform error has a specific `reason` string (e.g. "BadResource" for disk full,
|
|
945
|
+
* "NotFound" for missing files). Platform errors from @effect/platform include a `reason` property
|
|
946
|
+
* that categorizes the failure.
|
|
947
|
+
*/
|
|
948
|
+
const isPlatformReason = (cause, reason) => typeof cause === "object" && cause !== null && "reason" in cause && String(cause.reason) === reason;
|
|
949
|
+
//#endregion
|
|
858
950
|
//#region src/services/config-store.ts
|
|
859
951
|
const CONFIG_DIR = ".pix";
|
|
860
952
|
const CONFIG_PATH = `${CONFIG_DIR}/config.json`;
|
|
861
|
-
const isPlatformReason$1 = (cause, reason) => typeof cause === "object" && cause !== null && "reason" in cause && String(cause.reason) === reason;
|
|
862
953
|
const mapConfigWriteError = (cause, path, action) => {
|
|
863
|
-
if (isPlatformReason
|
|
954
|
+
if (isPlatformReason(cause, "BadResource")) return new DiskFullError({
|
|
864
955
|
message: `Disk full: could not ${action}`,
|
|
865
956
|
path,
|
|
866
957
|
cause
|
|
@@ -879,7 +970,7 @@ const make$4 = Effect.gen(function* () {
|
|
|
879
970
|
});
|
|
880
971
|
const readConfig = () => Effect.gen(function* () {
|
|
881
972
|
const content = yield* fs.readFileString(CONFIG_PATH).pipe(Effect.mapError((cause) => {
|
|
882
|
-
if (isPlatformReason
|
|
973
|
+
if (isPlatformReason(cause, "NotFound")) return new ConfigNotFoundError({
|
|
883
974
|
message: "Config file not found. Run pix init first.",
|
|
884
975
|
path: CONFIG_PATH,
|
|
885
976
|
cause
|
|
@@ -914,10 +1005,7 @@ const make$3 = Effect.gen(function* () {
|
|
|
914
1005
|
const fs = yield* FileSystem.FileSystem;
|
|
915
1006
|
const processorMap = buildProcessorMap([]);
|
|
916
1007
|
const extract = (file) => {
|
|
917
|
-
const
|
|
918
|
-
const name = lastSlash >= 0 ? file.slice(lastSlash + 1) : file;
|
|
919
|
-
const dotIndex = name.lastIndexOf(".");
|
|
920
|
-
const ext = dotIndex === -1 ? name.toLowerCase() : name.slice(dotIndex).toLowerCase();
|
|
1008
|
+
const ext = getExtension(file);
|
|
921
1009
|
const processor = processorMap[ext];
|
|
922
1010
|
if (!processor) return Effect.fail({
|
|
923
1011
|
_tag: "UnsupportedFormat",
|
|
@@ -1208,10 +1296,44 @@ const make$1 = Effect.gen(function* () {
|
|
|
1208
1296
|
const ScannerLive = Layer.effect(Scanner, make$1);
|
|
1209
1297
|
//#endregion
|
|
1210
1298
|
//#region src/services/vector-store.ts
|
|
1299
|
+
/**
|
|
1300
|
+
* Parse a single JSON line from chunks.jsonl and normalize context fields (old indexes may lack
|
|
1301
|
+
* them).
|
|
1302
|
+
*/
|
|
1303
|
+
const parseChunkLine = (line) => {
|
|
1304
|
+
const raw = JSON.parse(line);
|
|
1305
|
+
return {
|
|
1306
|
+
file: typeof raw.file === "string" ? raw.file : "",
|
|
1307
|
+
startLine: typeof raw.startLine === "number" ? raw.startLine : 0,
|
|
1308
|
+
endLine: typeof raw.endLine === "number" ? raw.endLine : 0,
|
|
1309
|
+
text: typeof raw.text === "string" ? raw.text : "",
|
|
1310
|
+
contextBefore: typeof raw.contextBefore === "string" ? raw.contextBefore : null,
|
|
1311
|
+
contextAfter: typeof raw.contextAfter === "string" ? raw.contextAfter : null
|
|
1312
|
+
};
|
|
1313
|
+
};
|
|
1314
|
+
/** Compute dot-product similarity between a chunk vector and the query embedding. */
|
|
1315
|
+
const computeDotProduct = (chunkVector, query) => {
|
|
1316
|
+
let dot = 0;
|
|
1317
|
+
for (let j = 0; j < query.dims; j++) dot += chunkVector[j] * query.vector[j];
|
|
1318
|
+
return dot;
|
|
1319
|
+
};
|
|
1211
1320
|
const STORE_DIR = ".pix";
|
|
1212
1321
|
const CHUNKS_FILE = `${STORE_DIR}/chunks.jsonl`;
|
|
1213
1322
|
const VECTORS_FILE = `${STORE_DIR}/vectors.bin`;
|
|
1214
|
-
|
|
1323
|
+
/**
|
|
1324
|
+
* Serialize a Chunk to a JSON object for storage in chunks.jsonl. Always includes context fields
|
|
1325
|
+
* for schema consistency.
|
|
1326
|
+
*/
|
|
1327
|
+
const serializeChunk = (c) => ({
|
|
1328
|
+
id: c.id,
|
|
1329
|
+
idx: c.idx,
|
|
1330
|
+
file: c.file,
|
|
1331
|
+
startLine: c.startLine,
|
|
1332
|
+
endLine: c.endLine,
|
|
1333
|
+
text: c.text,
|
|
1334
|
+
contextBefore: c.contextBefore,
|
|
1335
|
+
contextAfter: c.contextAfter
|
|
1336
|
+
});
|
|
1215
1337
|
/**
|
|
1216
1338
|
* FileSystem adapter for VectorStore port. Reads from chunks.jsonl and vectors.bin to provide index
|
|
1217
1339
|
* statistics.
|
|
@@ -1309,14 +1431,7 @@ const make = Effect.gen(function* () {
|
|
|
1309
1431
|
if (yield* withStoreError(fs.exists(vectorsTemp), "check vectors temp")) yield* withStoreError(fs.remove(vectorsTemp), "clean stale vectors temp", vectorsTemp);
|
|
1310
1432
|
});
|
|
1311
1433
|
const storeBatch = (chunks, embeddings) => Effect.gen(function* () {
|
|
1312
|
-
const content = chunks.map((c) => JSON.stringify(
|
|
1313
|
-
id: c.id,
|
|
1314
|
-
idx: c.idx,
|
|
1315
|
-
file: c.file,
|
|
1316
|
-
startLine: c.startLine,
|
|
1317
|
-
endLine: c.endLine,
|
|
1318
|
-
text: c.text
|
|
1319
|
-
})).join("\n") + "\n";
|
|
1434
|
+
const content = chunks.map((c) => JSON.stringify(serializeChunk(c))).join("\n") + "\n";
|
|
1320
1435
|
yield* withStoreError(fs.writeFile(chunksTemp, Buffer.from(content), { flag: "a" }), "append chunks", chunksTemp);
|
|
1321
1436
|
const buffer = serializeVectors(embeddings);
|
|
1322
1437
|
yield* withStoreError(fs.writeFile(vectorsTemp, buffer, { flag: "a" }), "append vectors", vectorsTemp);
|
|
@@ -1353,37 +1468,32 @@ const make = Effect.gen(function* () {
|
|
|
1353
1468
|
const store = (chunks, embeddings) => Effect.gen(function* () {
|
|
1354
1469
|
yield* ensureDirExists(STORE_DIR, ".pix directory");
|
|
1355
1470
|
const chunksTemp = `${CHUNKS_FILE}.tmp`;
|
|
1356
|
-
const
|
|
1357
|
-
|
|
1358
|
-
idx: c.idx,
|
|
1359
|
-
file: c.file,
|
|
1360
|
-
startLine: c.startLine,
|
|
1361
|
-
endLine: c.endLine,
|
|
1362
|
-
text: c.text
|
|
1363
|
-
}));
|
|
1364
|
-
yield* withStoreError(fs.writeFileString(chunksTemp, chunksLines.join("\n")), "write chunks", chunksTemp);
|
|
1471
|
+
const chunksJson = chunks.map((c) => JSON.stringify(serializeChunk(c))).join("\n");
|
|
1472
|
+
yield* withStoreError(fs.writeFileString(chunksTemp, chunksJson), "write chunks", chunksTemp);
|
|
1365
1473
|
yield* withStoreError(fs.rename(chunksTemp, CHUNKS_FILE), "commit chunks", CHUNKS_FILE);
|
|
1366
1474
|
const vectorsTemp = `${VECTORS_FILE}.tmp`;
|
|
1367
1475
|
const buffer = serializeVectors(embeddings);
|
|
1368
1476
|
yield* withStoreError(fs.writeFile(vectorsTemp, buffer), "write vectors", vectorsTemp);
|
|
1369
1477
|
yield* withStoreError(fs.rename(vectorsTemp, VECTORS_FILE), "commit vectors", VECTORS_FILE);
|
|
1370
1478
|
});
|
|
1371
|
-
const search = (query,
|
|
1479
|
+
const search = (query, options) => Effect.gen(function* () {
|
|
1372
1480
|
const chunksExists = yield* withReadError(fs.exists(CHUNKS_FILE), "check chunks file");
|
|
1373
1481
|
const vectorsExists = yield* withReadError(fs.exists(VECTORS_FILE), "check vectors file");
|
|
1374
1482
|
if (!chunksExists || !vectorsExists) return yield* new NoIndexError({ message: "No index found. Run pix index first." });
|
|
1375
1483
|
const chunkLines = (yield* withReadError(fs.readFileString(CHUNKS_FILE), "read chunks", CHUNKS_FILE)).split("\n").filter((l) => l.trim().length > 0);
|
|
1376
1484
|
const vectorsBuffer = yield* withReadError(fs.readFile(VECTORS_FILE), "read vectors", VECTORS_FILE);
|
|
1377
|
-
const vectors = new Float32Array(vectorsBuffer.buffer);
|
|
1485
|
+
const vectors = new Float32Array(vectorsBuffer.buffer, vectorsBuffer.byteOffset, vectorsBuffer.byteLength / Float32Array.BYTES_PER_ELEMENT);
|
|
1486
|
+
const ignoreIg = options?.ignorePaths?.length ? ignore().add([...options.ignorePaths]) : null;
|
|
1487
|
+
const onlyIg = options?.onlyPaths?.length ? ignore().add([...options.onlyPaths]) : null;
|
|
1378
1488
|
const results = [];
|
|
1379
1489
|
for (let i = 0; i < chunkLines.length; i++) try {
|
|
1380
|
-
const chunk =
|
|
1490
|
+
const chunk = parseChunkLine(chunkLines[i]);
|
|
1491
|
+
if (ignoreIg && ignoreIg.ignores(chunk.file)) continue;
|
|
1492
|
+
if (onlyIg && !onlyIg.ignores(chunk.file)) continue;
|
|
1381
1493
|
const startIdx = i * query.dims;
|
|
1382
|
-
const
|
|
1383
|
-
let dotProduct = 0;
|
|
1384
|
-
for (let j = 0; j < query.dims; j++) dotProduct += chunkVector[j] * query.vector[j];
|
|
1494
|
+
const score = computeDotProduct(vectors.slice(startIdx, startIdx + query.dims), query);
|
|
1385
1495
|
results.push({
|
|
1386
|
-
score
|
|
1496
|
+
score,
|
|
1387
1497
|
file: chunk.file,
|
|
1388
1498
|
startLine: chunk.startLine,
|
|
1389
1499
|
endLine: chunk.endLine,
|
|
@@ -1393,7 +1503,10 @@ const make = Effect.gen(function* () {
|
|
|
1393
1503
|
});
|
|
1394
1504
|
} catch {}
|
|
1395
1505
|
results.sort((a, b) => b.score - a.score);
|
|
1396
|
-
|
|
1506
|
+
const topK = options?.topK;
|
|
1507
|
+
if (topK == null) return results;
|
|
1508
|
+
const clamped = Math.max(0, Math.min(Math.floor(topK), results.length));
|
|
1509
|
+
return results.slice(0, clamped);
|
|
1397
1510
|
});
|
|
1398
1511
|
const getStatus = () => Effect.gen(function* () {
|
|
1399
1512
|
const chunksExists = yield* withReadError(fs.exists(CHUNKS_FILE), "check chunks file");
|