@lucas-bur/pix 0.10.0 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -7
- package/dist/index.mjs +498 -180
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -19,13 +19,13 @@ pix query "authentication middleware"
|
|
|
19
19
|
|
|
20
20
|
## Commands
|
|
21
21
|
|
|
22
|
-
| Command
|
|
23
|
-
|
|
|
24
|
-
| `pix init`
|
|
25
|
-
| `pix index`
|
|
26
|
-
| `pix query "<text>"` | Semantic search via cosine similarity
|
|
27
|
-
| `pix status`
|
|
28
|
-
| `pix reset`
|
|
22
|
+
| Command | Description | JSON flag |
|
|
23
|
+
| ---------------------------- | -------------------------------------------------------------------------------------------------------------------------------------- | --------- |
|
|
24
|
+
| `pix init` | Create `.pix/config.json` with defaults | `--json` |
|
|
25
|
+
| `pix index` | Scan, chunk, embed, and store project files | `--json` |
|
|
26
|
+
| `pix query "<text>" [flags]` | Semantic search via cosine similarity (`--top`, `--context-lines`, `--ignore-path`, `--only-path`, `--max-characters`, `--no-content`) | `--json` |
|
|
27
|
+
| `pix status` | Show index statistics | `--json` |
|
|
28
|
+
| `pix reset` | Delete index files (chunks + vectors) | `--json` |
|
|
29
29
|
|
|
30
30
|
All commands support `--json` for structured output on stdout — ideal for piping to AI agents.
|
|
31
31
|
|
package/dist/index.mjs
CHANGED
|
@@ -2,7 +2,8 @@
|
|
|
2
2
|
import { createRequire } from "node:module";
|
|
3
3
|
import { Args, CliConfig, Command, Options } from "@effect/cli";
|
|
4
4
|
import { NodeContext, NodeRuntime } from "@effect/platform-node";
|
|
5
|
-
import { Clock, Context, Data, Effect, Layer, Option, Ref } from "effect";
|
|
5
|
+
import { Clock, Context, Data, Effect, Exit, Layer, Option, Ref, Stream } from "effect";
|
|
6
|
+
import * as Chunk from "effect/Chunk";
|
|
6
7
|
import { styleText } from "node:util";
|
|
7
8
|
import * as clack from "@clack/prompts";
|
|
8
9
|
import { FileSystem } from "@effect/platform";
|
|
@@ -74,6 +75,7 @@ const withInteractive = (activeRef, acquire, setActive, release, effect) => Ref.
|
|
|
74
75
|
/** Display implementation using @clack/prompts for interactive terminal output */
|
|
75
76
|
const ClackDisplay = { layer: Layer.effect(Display, Effect.gen(function* () {
|
|
76
77
|
const activeRef = yield* Ref.make(null);
|
|
78
|
+
const lastSpinnerMsg = yield* Ref.make("");
|
|
77
79
|
return {
|
|
78
80
|
intro: (title) => Effect.sync(() => clack.intro(styleText("inverse", ` ${title} `))),
|
|
79
81
|
outro: (message) => Effect.sync(() => clack.outro(message)),
|
|
@@ -84,11 +86,17 @@ const ClackDisplay = { layer: Layer.effect(Display, Effect.gen(function* () {
|
|
|
84
86
|
const s = clack.spinner();
|
|
85
87
|
s.start(message);
|
|
86
88
|
return s;
|
|
87
|
-
}), (s) => ({
|
|
89
|
+
}).pipe(Effect.tap(() => Ref.set(lastSpinnerMsg, message))), (s) => ({
|
|
88
90
|
type: "spinner",
|
|
89
91
|
handle: s
|
|
90
|
-
}), (s, exit) => Effect.sync(() => s.stop(exit._tag === "Success" ?
|
|
91
|
-
progress: (opts, effect) =>
|
|
92
|
+
}), (s, exit) => lastSpinnerMsg.pipe(Effect.flatMap((lastMsg) => Effect.sync(() => s.stop(exit._tag === "Success" && lastMsg ? lastMsg : `${message} (failed)`)))), effect),
|
|
93
|
+
progress: (opts, effect) => Effect.gen(function* () {
|
|
94
|
+
const current = yield* Ref.get(activeRef);
|
|
95
|
+
if (current && current.type === "spinner") {
|
|
96
|
+
const msg = yield* Ref.get(lastSpinnerMsg);
|
|
97
|
+
current.handle.stop(msg || opts.message);
|
|
98
|
+
yield* Ref.set(activeRef, null);
|
|
99
|
+
}
|
|
92
100
|
const bar = clack.progress({
|
|
93
101
|
max: opts.max,
|
|
94
102
|
style: opts.style ?? "heavy",
|
|
@@ -96,16 +104,27 @@ const ClackDisplay = { layer: Layer.effect(Display, Effect.gen(function* () {
|
|
|
96
104
|
indicator: opts.indicator ?? "dots"
|
|
97
105
|
});
|
|
98
106
|
bar.start(opts.message);
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
107
|
+
yield* Ref.set(activeRef, {
|
|
108
|
+
type: "progress",
|
|
109
|
+
handle: bar,
|
|
110
|
+
value: 0,
|
|
111
|
+
max: opts.max
|
|
112
|
+
});
|
|
113
|
+
const exit = yield* Effect.exit(effect);
|
|
114
|
+
yield* Ref.set(activeRef, null);
|
|
115
|
+
if (Exit.isSuccess(exit)) {
|
|
116
|
+
bar.stop(opts.message);
|
|
117
|
+
return exit.value;
|
|
118
|
+
}
|
|
119
|
+
bar.error(opts.message);
|
|
120
|
+
return yield* Effect.failCause(exit.cause);
|
|
121
|
+
}),
|
|
106
122
|
updateInteractive: (payload) => Ref.get(activeRef).pipe(Effect.flatMap((active) => {
|
|
107
123
|
if (!active) return Effect.void;
|
|
108
|
-
if (active.type === "spinner")
|
|
124
|
+
if (active.type === "spinner") {
|
|
125
|
+
const msg = payloadText(payload);
|
|
126
|
+
return Effect.sync(() => active.handle.message(msg)).pipe(Effect.andThen(Ref.set(lastSpinnerMsg, msg)));
|
|
127
|
+
}
|
|
109
128
|
const delta = computeDelta(payload, {
|
|
110
129
|
value: active.value,
|
|
111
130
|
max: active.max
|
|
@@ -149,22 +168,48 @@ const DEFAULT_CONFIG = {
|
|
|
149
168
|
"dist",
|
|
150
169
|
"build",
|
|
151
170
|
".next",
|
|
152
|
-
".agents",
|
|
153
|
-
".claude",
|
|
154
171
|
".vscode",
|
|
155
|
-
".github",
|
|
156
172
|
"coverage",
|
|
157
173
|
"*-lock.yaml",
|
|
158
174
|
"*-lock.json",
|
|
159
|
-
"*.lock"
|
|
175
|
+
"*.lock",
|
|
176
|
+
".vite-hooks",
|
|
177
|
+
".fallow"
|
|
160
178
|
],
|
|
161
179
|
embedder: {
|
|
162
180
|
model: "Xenova/all-MiniLM-L6-v2",
|
|
163
181
|
device: "auto",
|
|
164
|
-
dtype: "fp32"
|
|
182
|
+
dtype: "fp32",
|
|
183
|
+
batchSize: 16
|
|
165
184
|
}
|
|
166
185
|
};
|
|
167
186
|
//#endregion
|
|
187
|
+
//#region src/lib/extension.ts
|
|
188
|
+
/** Extract the last path segment (filename) from a file path. Handles both `/` and `\\` separators. */
|
|
189
|
+
const getFilename = (path) => {
|
|
190
|
+
const sepIndex = Math.max(path.lastIndexOf("/"), path.lastIndexOf("\\"));
|
|
191
|
+
return sepIndex >= 0 ? path.slice(sepIndex + 1) : path;
|
|
192
|
+
};
|
|
193
|
+
/**
|
|
194
|
+
* Extract the lowercase extension (including dot) from a file path. Used for processor dispatch.
|
|
195
|
+
* Strips the directory, then returns the part after the last dot. If no dot, returns the full
|
|
196
|
+
* filename lowercased.
|
|
197
|
+
*/
|
|
198
|
+
const getExtension = (file) => {
|
|
199
|
+
const name = getFilename(file);
|
|
200
|
+
const dotIndex = name.lastIndexOf(".");
|
|
201
|
+
if (dotIndex === -1) return name.toLowerCase();
|
|
202
|
+
return name.slice(dotIndex).toLowerCase();
|
|
203
|
+
};
|
|
204
|
+
/**
|
|
205
|
+
* Extract the extension from a filename (not full path). Returns `"(no extension)"` if no dot
|
|
206
|
+
* exists. Used for display grouping of skipped files.
|
|
207
|
+
*/
|
|
208
|
+
const getFileExtension = (filename) => {
|
|
209
|
+
const dotIndex = filename.lastIndexOf(".");
|
|
210
|
+
return dotIndex >= 0 ? filename.slice(dotIndex) : "(no extension)";
|
|
211
|
+
};
|
|
212
|
+
//#endregion
|
|
168
213
|
//#region src/domain/errors.ts
|
|
169
214
|
/** Config file or directory does not exist. Run pix init first. */
|
|
170
215
|
var ConfigNotFoundError = class extends Data.TaggedError("ConfigNotFoundError") {};
|
|
@@ -289,13 +334,13 @@ function buildProcessorMap(skipExtensions) {
|
|
|
289
334
|
}
|
|
290
335
|
//#endregion
|
|
291
336
|
//#region src/application/index-project.ts
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
}
|
|
337
|
+
const deriveEffectiveConfig = (opts, config) => ({
|
|
338
|
+
batchSize: opts.batchSize ?? config.embedder.batchSize ?? 16,
|
|
339
|
+
concurrency: Math.max(1, opts.chunkConcurrency ?? config.chunkConcurrency ?? 8),
|
|
340
|
+
skipExtensions: opts.skipExtensions ? [...config.skipExtensions, ...opts.skipExtensions] : config.skipExtensions,
|
|
341
|
+
ignoredPaths: opts.ignorePaths ? [...config.ignoredPaths ?? DEFAULT_CONFIG.ignoredPaths, ...opts.ignorePaths] : config.ignoredPaths ?? DEFAULT_CONFIG.ignoredPaths,
|
|
342
|
+
ignoreGitignore: opts.ignoreGitignore ?? config.ignoreGitignore ?? false
|
|
343
|
+
});
|
|
299
344
|
const classifyFiles = (files, processorMap) => {
|
|
300
345
|
const knownFiles = [];
|
|
301
346
|
const skippedFiles = [];
|
|
@@ -313,11 +358,19 @@ const classifyFiles = (files, processorMap) => {
|
|
|
313
358
|
unknownExtensions
|
|
314
359
|
};
|
|
315
360
|
};
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
361
|
+
const classifyAndCollectChunks = (knownFiles, extractor, chunker, concurrency, skipped) => Stream.fromIterable(knownFiles).pipe(Stream.mapEffect((file) => extractor.extract(file).pipe(Effect.flatMap((text) => Effect.succeed(Option.some({
|
|
362
|
+
file,
|
|
363
|
+
text
|
|
364
|
+
}))), Effect.catchAll((err) => Ref.update(skipped, (prev) => [...prev, {
|
|
365
|
+
path: file,
|
|
366
|
+
reason: err.message
|
|
367
|
+
}]).pipe(Effect.flatMap(() => Effect.succeed(Option.none()))))), { concurrency }), Stream.filterMap((opt) => opt), Stream.mapEffect(({ file, text }) => chunker.chunkText(text, file), { concurrency }), Stream.flatMap((chunks) => Stream.fromIterable(chunks)), Stream.runCollect, Effect.map((allChunks) => {
|
|
368
|
+
const chunks = Chunk.toArray(allChunks);
|
|
369
|
+
return {
|
|
370
|
+
chunks,
|
|
371
|
+
totalChunks: chunks.length
|
|
372
|
+
};
|
|
373
|
+
}));
|
|
321
374
|
var IndexProject = class extends Effect.Service()("IndexProject", {
|
|
322
375
|
accessors: true,
|
|
323
376
|
effect: Effect.gen(function* () {
|
|
@@ -328,65 +381,118 @@ var IndexProject = class extends Effect.Service()("IndexProject", {
|
|
|
328
381
|
const vectorStore = yield* VectorStore;
|
|
329
382
|
const d = yield* Display;
|
|
330
383
|
const extractor = yield* ContentExtractor;
|
|
331
|
-
const index = () => Effect.gen(function* () {
|
|
384
|
+
const index = (opts = {}) => Effect.gen(function* () {
|
|
385
|
+
const start = Date.now();
|
|
332
386
|
if (!(yield* configStore.configExists())) yield* configStore.writeConfig(DEFAULT_CONFIG);
|
|
333
|
-
const
|
|
334
|
-
const processorMap = buildProcessorMap(
|
|
387
|
+
const eff = deriveEffectiveConfig(opts, yield* configStore.readConfig());
|
|
388
|
+
const processorMap = buildProcessorMap(eff.skipExtensions);
|
|
335
389
|
yield* d.updateInteractive("Scanning source files...");
|
|
336
|
-
const
|
|
337
|
-
const { knownFiles, skippedFiles, unknownExtensions } = classifyFiles(
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
390
|
+
const scanResult = yield* scanner.scanFiles(eff.ignoredPaths, eff.ignoreGitignore);
|
|
391
|
+
const { knownFiles, skippedFiles, unknownExtensions } = classifyFiles(scanResult.files, processorMap);
|
|
392
|
+
const skipped = yield* Ref.make(scanResult.skipped.filter((s) => !s.reason.startsWith("Ignored by config pattern")).map((s) => ({
|
|
393
|
+
path: s.path,
|
|
394
|
+
reason: s.reason
|
|
395
|
+
})));
|
|
396
|
+
if (unknownExtensions.size > 0) yield* Ref.update(skipped, (prev) => [...prev, ...skippedFiles.map((f) => ({
|
|
397
|
+
path: f,
|
|
398
|
+
reason: "unknown extension"
|
|
399
|
+
}))]);
|
|
400
|
+
if (knownFiles.length === 0) {
|
|
401
|
+
yield* displaySkippedNote(d, yield* Ref.get(skipped));
|
|
402
|
+
return {
|
|
403
|
+
success: true,
|
|
404
|
+
status: {
|
|
405
|
+
chunks: 0,
|
|
406
|
+
files: 0,
|
|
407
|
+
totalLines: 0,
|
|
408
|
+
byteSize: 0
|
|
409
|
+
},
|
|
410
|
+
durationMs: Date.now() - start
|
|
411
|
+
};
|
|
412
|
+
}
|
|
348
413
|
yield* d.updateInteractive(`Processing ${knownFiles.length} files...`);
|
|
349
|
-
const
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
414
|
+
const { chunks, totalChunks } = yield* classifyAndCollectChunks(knownFiles, extractor, chunker, eff.concurrency, skipped);
|
|
415
|
+
if (totalChunks === 0) {
|
|
416
|
+
yield* displaySkippedNote(d, yield* Ref.get(skipped));
|
|
417
|
+
return {
|
|
418
|
+
success: true,
|
|
419
|
+
status: {
|
|
420
|
+
chunks: 0,
|
|
421
|
+
files: 0,
|
|
422
|
+
totalLines: 0,
|
|
423
|
+
byteSize: 0
|
|
424
|
+
},
|
|
425
|
+
durationMs: Date.now() - start
|
|
426
|
+
};
|
|
427
|
+
}
|
|
428
|
+
yield* vectorStore.storeBegin();
|
|
429
|
+
const embeddedRef = yield* Ref.make(0);
|
|
430
|
+
const stats = yield* d.progress({
|
|
431
|
+
message: `Embedding ${totalChunks} chunks...`,
|
|
432
|
+
max: totalChunks
|
|
433
|
+
}, Stream.fromIterable(chunks).pipe(Stream.grouped(eff.batchSize), Stream.mapEffect((batchChunk) => Effect.gen(function* () {
|
|
434
|
+
const batch = Chunk.toArray(batchChunk);
|
|
435
|
+
const texts = batch.map((c) => c.text);
|
|
436
|
+
const embeddings = yield* embedder.batch(texts);
|
|
437
|
+
yield* vectorStore.storeBatch(batch, embeddings);
|
|
438
|
+
const count = yield* Ref.updateAndGet(embeddedRef, (n) => n + batch.length);
|
|
439
|
+
yield* d.updateInteractive({
|
|
440
|
+
message: `Embedding ${count} of ${totalChunks} chunks`,
|
|
441
|
+
setTo: count
|
|
442
|
+
});
|
|
443
|
+
})), Stream.runDrain, Effect.matchEffect({
|
|
444
|
+
onSuccess: () => vectorStore.storeCommit(),
|
|
445
|
+
onFailure: (err) => vectorStore.storeAbort().pipe(Effect.flatMap(() => Effect.fail(err)))
|
|
446
|
+
})));
|
|
447
|
+
yield* displaySkippedNote(d, yield* Ref.get(skipped));
|
|
448
|
+
const durationSec = ((Date.now() - start) / 1e3).toFixed(1);
|
|
449
|
+
yield* d.log(`Indexed ${stats.chunks} chunks from ${stats.files} files in ${durationSec}s`, "success");
|
|
450
|
+
const fallbackInfo = yield* embedder.getFallbackInfo();
|
|
377
451
|
return {
|
|
378
452
|
success: true,
|
|
379
453
|
status: {
|
|
380
|
-
chunks:
|
|
381
|
-
files:
|
|
382
|
-
totalLines,
|
|
383
|
-
byteSize:
|
|
384
|
-
}
|
|
454
|
+
chunks: stats.chunks,
|
|
455
|
+
files: stats.files,
|
|
456
|
+
totalLines: stats.totalLines,
|
|
457
|
+
byteSize: stats.byteSize
|
|
458
|
+
},
|
|
459
|
+
durationMs: Date.now() - start,
|
|
460
|
+
embedderFallback: fallbackInfo
|
|
385
461
|
};
|
|
386
462
|
});
|
|
387
463
|
return { index };
|
|
388
464
|
})
|
|
389
465
|
}) {};
|
|
466
|
+
const groupByExtension = (entries) => {
|
|
467
|
+
const byExt = /* @__PURE__ */ new Map();
|
|
468
|
+
for (const s of entries) {
|
|
469
|
+
const name = getFilename(s.path);
|
|
470
|
+
const ext = getFileExtension(name);
|
|
471
|
+
if (!byExt.has(ext)) byExt.set(ext, []);
|
|
472
|
+
byExt.get(ext).push(name);
|
|
473
|
+
}
|
|
474
|
+
return byExt;
|
|
475
|
+
};
|
|
476
|
+
const formatFileList = (files, maxDisplay = 5) => files.length > maxDisplay ? `${files.slice(0, maxDisplay).join(", ")} +${files.length - maxDisplay} more` : files.join(", ");
|
|
477
|
+
const buildSkippedLines = (extFailures, extractErrors) => {
|
|
478
|
+
const lines = [];
|
|
479
|
+
if (extFailures.length > 0) {
|
|
480
|
+
lines.push(`Unknown extensions (${extFailures.length})`);
|
|
481
|
+
for (const [ext, files] of groupByExtension(extFailures)) lines.push(` ${ext} (${files.length}): ${formatFileList(files)}`);
|
|
482
|
+
}
|
|
483
|
+
if (extractErrors.length > 0) {
|
|
484
|
+
if (lines.length > 0) lines.push("");
|
|
485
|
+
lines.push(`Extraction errors (${extractErrors.length})`);
|
|
486
|
+
for (const s of extractErrors) lines.push(` ${getFilename(s.path)}: ${s.reason}`);
|
|
487
|
+
}
|
|
488
|
+
return lines;
|
|
489
|
+
};
|
|
490
|
+
const displaySkippedNote = (d, skipped) => {
|
|
491
|
+
if (skipped.length === 0) return Effect.void;
|
|
492
|
+
const extFailures = skipped.filter((s) => s.reason === "unknown extension");
|
|
493
|
+
const extractErrors = skipped.filter((s) => s.reason !== "unknown extension");
|
|
494
|
+
return d.note(buildSkippedLines(extFailures, extractErrors).join("\n"), `Skipped ${skipped.length} files`);
|
|
495
|
+
};
|
|
390
496
|
//#endregion
|
|
391
497
|
//#region src/application/init-project.ts
|
|
392
498
|
/**
|
|
@@ -412,7 +518,7 @@ var QueryProject = class extends Effect.Service()("QueryProject", {
|
|
|
412
518
|
effect: Effect.gen(function* () {
|
|
413
519
|
const embedder = yield* Embedder;
|
|
414
520
|
const store = yield* VectorStore;
|
|
415
|
-
const queryProject = (queryText,
|
|
521
|
+
const queryProject = (queryText, options) => embedder.embed(queryText).pipe(Effect.flatMap((embedding) => store.search(embedding, options)));
|
|
416
522
|
return { queryProject };
|
|
417
523
|
})
|
|
418
524
|
}) {};
|
|
@@ -475,22 +581,60 @@ const reportError = (error) => Effect.gen(function* () {
|
|
|
475
581
|
});
|
|
476
582
|
//#endregion
|
|
477
583
|
//#region src/commands/index-cmd.ts
|
|
478
|
-
|
|
584
|
+
const batchSizeOption = Options.integer("batch-size").pipe(Options.withAlias("b"), Options.optional);
|
|
585
|
+
const chunkConcurrencyOption = Options.integer("chunk-concurrency").pipe(Options.withAlias("c"), Options.optional);
|
|
586
|
+
const skipExtensionsOption = Options.text("skip-extensions").pipe(Options.withAlias("s"), Options.repeated);
|
|
587
|
+
const ignorePathOption = Options.text("ignore-path").pipe(Options.repeated);
|
|
588
|
+
const ignorePathsOption = Options.text("ignore-paths").pipe(Options.repeated);
|
|
589
|
+
const ignoreGitignoreOption = Options.boolean("ignore-gitignore").pipe(Options.withDefault(false));
|
|
590
|
+
const splitCsv = (values) => values.flatMap((v) => v.split(",").map((s) => s.trim()).filter((s) => s.length > 0));
|
|
591
|
+
const buildIndexOptions = (args) => {
|
|
592
|
+
if (Option.isSome(args.batchSize) && args.batchSize.value <= 0) throw new Error(`--batch-size must be positive, got ${args.batchSize.value}`);
|
|
593
|
+
if (Option.isSome(args.chunkConcurrency) && args.chunkConcurrency.value <= 0) throw new Error(`--chunk-concurrency must be positive, got ${args.chunkConcurrency.value}`);
|
|
594
|
+
const cliSkipExtensions = splitCsv(args.skipExtensions);
|
|
595
|
+
const cliIgnorePaths = [...args.ignorePath.map((s) => s.trim()).filter((s) => s.length > 0), ...splitCsv(args.ignorePaths)];
|
|
596
|
+
return {
|
|
597
|
+
batchSize: Option.getOrUndefined(args.batchSize),
|
|
598
|
+
chunkConcurrency: Option.getOrUndefined(args.chunkConcurrency),
|
|
599
|
+
skipExtensions: cliSkipExtensions.length > 0 ? cliSkipExtensions : void 0,
|
|
600
|
+
ignorePaths: cliIgnorePaths.length > 0 ? cliIgnorePaths : void 0,
|
|
601
|
+
ignoreGitignore: args.ignoreGitignore || void 0
|
|
602
|
+
};
|
|
603
|
+
};
|
|
604
|
+
const emitIndexResult = (d, result) => Effect.gen(function* () {
|
|
605
|
+
yield* d.json({
|
|
606
|
+
chunks: result.status.chunks,
|
|
607
|
+
files: result.status.files,
|
|
608
|
+
totalLines: result.status.totalLines,
|
|
609
|
+
byteSize: result.status.byteSize,
|
|
610
|
+
durationMs: result.durationMs,
|
|
611
|
+
...result.embedderFallback && { embedderFallback: result.embedderFallback }
|
|
612
|
+
});
|
|
613
|
+
if (result.status.chunks === 0) yield* d.log("No chunks to index.", "warn");
|
|
614
|
+
});
|
|
479
615
|
const indexCommand = Command.make("index", {
|
|
480
616
|
force: Options.boolean("force").pipe(Options.withDefault(false)),
|
|
481
617
|
verbose: Options.boolean("verbose").pipe(Options.withDefault(false)),
|
|
482
|
-
json: Options.boolean("json").pipe(Options.withDefault(false))
|
|
483
|
-
|
|
618
|
+
json: Options.boolean("json").pipe(Options.withDefault(false)),
|
|
619
|
+
batchSize: batchSizeOption,
|
|
620
|
+
chunkConcurrency: chunkConcurrencyOption,
|
|
621
|
+
skipExtensions: skipExtensionsOption,
|
|
622
|
+
ignorePath: ignorePathOption,
|
|
623
|
+
ignorePaths: ignorePathsOption,
|
|
624
|
+
ignoreGitignore: ignoreGitignoreOption
|
|
625
|
+
}, ({ force, verbose, batchSize, chunkConcurrency, skipExtensions, ignorePath, ignorePaths, ignoreGitignore }) => Effect.gen(function* () {
|
|
484
626
|
const d = yield* Display;
|
|
485
627
|
if (force) yield* d.log("--force is currently not implemented and only a placeholder.", "warn");
|
|
486
628
|
if (verbose) yield* d.log("--verbose is currently not implemented and only a placeholder.", "warn");
|
|
487
|
-
const
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
629
|
+
const options = buildIndexOptions({
|
|
630
|
+
batchSize,
|
|
631
|
+
chunkConcurrency,
|
|
632
|
+
skipExtensions,
|
|
633
|
+
ignorePath,
|
|
634
|
+
ignorePaths,
|
|
635
|
+
ignoreGitignore
|
|
491
636
|
});
|
|
492
|
-
|
|
493
|
-
else yield* d.log(`Indexed ${result.status.chunks} chunks from ${result.status.files} files.`, "success");
|
|
637
|
+
yield* emitIndexResult(d, yield* d.spinner("Indexing project...", IndexProject.index(options)));
|
|
494
638
|
}).pipe(Effect.catchAll(reportError)));
|
|
495
639
|
//#endregion
|
|
496
640
|
//#region src/commands/init.ts
|
|
@@ -506,6 +650,51 @@ const initCommand = Command.make("init", { json: Options.boolean("json").pipe(Op
|
|
|
506
650
|
DiskFullError: reportError
|
|
507
651
|
})));
|
|
508
652
|
//#endregion
|
|
653
|
+
//#region src/lib/format.ts
|
|
654
|
+
/** Format byte count as human-readable string (e.g. "1.5 MB") */
|
|
655
|
+
const formatBytes = (bytes) => {
|
|
656
|
+
if (bytes === 0) return "0 B";
|
|
657
|
+
const units = [
|
|
658
|
+
"B",
|
|
659
|
+
"KB",
|
|
660
|
+
"MB",
|
|
661
|
+
"GB"
|
|
662
|
+
];
|
|
663
|
+
const i = Math.min(Math.floor(Math.log(bytes) / Math.log(1024)), units.length - 1);
|
|
664
|
+
return `${(bytes / 1024 ** i).toFixed(1)} ${units[i]}`;
|
|
665
|
+
};
|
|
666
|
+
/**
|
|
667
|
+
* Apply a character budget to search results. Returns results in score order capped by the budget.
|
|
668
|
+
* The last result may be truncated to fit the remaining budget. Character count includes file path,
|
|
669
|
+
* line numbers, chunk text, and context lines.
|
|
670
|
+
*/
|
|
671
|
+
const applyCharBudget = (results, maxChars) => {
|
|
672
|
+
if (!maxChars || maxChars <= 0) return { results };
|
|
673
|
+
const budgeted = [];
|
|
674
|
+
let remaining = maxChars;
|
|
675
|
+
for (const result of results) {
|
|
676
|
+
const indicator = " [...]";
|
|
677
|
+
const metadata = `${result.file}:${result.startLine}-${result.endLine}\n`;
|
|
678
|
+
const chars = `${metadata}${result.text}${result.contextBefore ? `\n${result.contextBefore}` : ""}${result.contextAfter ? `\n${result.contextAfter}` : ""}`.length;
|
|
679
|
+
if (chars <= remaining) {
|
|
680
|
+
budgeted.push(result);
|
|
681
|
+
remaining -= chars;
|
|
682
|
+
} else {
|
|
683
|
+
const textBudget = remaining - metadata.length - 6;
|
|
684
|
+
if (textBudget <= 0) break;
|
|
685
|
+
const truncated = result.text.slice(0, textBudget);
|
|
686
|
+
budgeted.push({
|
|
687
|
+
...result,
|
|
688
|
+
text: `${truncated}${indicator}`,
|
|
689
|
+
contextBefore: null,
|
|
690
|
+
contextAfter: null
|
|
691
|
+
});
|
|
692
|
+
break;
|
|
693
|
+
}
|
|
694
|
+
}
|
|
695
|
+
return { results: budgeted };
|
|
696
|
+
};
|
|
697
|
+
//#endregion
|
|
509
698
|
//#region src/commands/query.ts
|
|
510
699
|
const DEFAULT_TOP_K = 5;
|
|
511
700
|
const DEFAULT_CONTEXT_LINES = 0;
|
|
@@ -532,31 +721,64 @@ const formatResult = (result) => {
|
|
|
532
721
|
const contextAfter = result.contextAfter ? `\n${result.contextAfter}` : "";
|
|
533
722
|
return `${result.file}:${result.startLine}-${result.endLine} (score: ${result.score.toFixed(3)})${contextBefore}\n${result.text}${contextAfter}`;
|
|
534
723
|
};
|
|
535
|
-
|
|
724
|
+
/** Format a result as a lightweight location reference (no text content). */
|
|
725
|
+
const formatLocation = (result) => `${result.file}:${result.startLine}-${result.endLine} (score: ${result.score.toFixed(3)})`;
|
|
726
|
+
/** Build optional content fields for a single JSON output entry. */
|
|
727
|
+
const buildContentFields = (r, ctxLines, noContent) => {
|
|
728
|
+
if (noContent) return {};
|
|
729
|
+
return {
|
|
730
|
+
text: r.text,
|
|
731
|
+
...ctxLines > 0 && r.contextBefore && { contextBefore: r.contextBefore },
|
|
732
|
+
...ctxLines > 0 && r.contextAfter && { contextAfter: r.contextAfter }
|
|
733
|
+
};
|
|
734
|
+
};
|
|
735
|
+
const toJsonOutput = (results, ctxLines, noContent = false) => results.map((r) => ({
|
|
536
736
|
score: r.score,
|
|
537
737
|
file: r.file,
|
|
538
738
|
startLine: r.startLine,
|
|
539
739
|
endLine: r.endLine,
|
|
540
|
-
|
|
541
|
-
...ctxLines > 0 && r.contextBefore && { contextBefore: r.contextBefore },
|
|
542
|
-
...ctxLines > 0 && r.contextAfter && { contextAfter: r.contextAfter }
|
|
740
|
+
...buildContentFields(r, ctxLines, noContent)
|
|
543
741
|
}));
|
|
544
|
-
/**
|
|
742
|
+
/** Build SearchOptions from parsed CLI args, clamping topK. */
|
|
743
|
+
const buildSearchOptions = (top, ignorePath, onlyPath) => {
|
|
744
|
+
const rawValue = Option.getOrElse(top, () => DEFAULT_TOP_K);
|
|
745
|
+
const clamped = clampTopK(rawValue);
|
|
746
|
+
return {
|
|
747
|
+
options: {
|
|
748
|
+
topK: clamped.value,
|
|
749
|
+
...ignorePath.length > 0 && { ignorePaths: [...ignorePath] },
|
|
750
|
+
...onlyPath.length > 0 && { onlyPaths: [...onlyPath] }
|
|
751
|
+
},
|
|
752
|
+
clamped: clamped.clamped,
|
|
753
|
+
rawValue
|
|
754
|
+
};
|
|
755
|
+
};
|
|
756
|
+
/** Render search results via Display — JSON + human-readable text. */
|
|
757
|
+
const renderResults = (d, results, ctxLines, noContent) => Effect.gen(function* () {
|
|
758
|
+
yield* d.json(toJsonOutput(results, ctxLines, noContent));
|
|
759
|
+
if (results.length === 0) yield* d.log("No results found", "warn");
|
|
760
|
+
else for (const result of results) yield* d.text(noContent ? formatLocation(result) : formatResult(result));
|
|
761
|
+
});
|
|
762
|
+
/**
|
|
763
|
+
* CLI command: pix query "<text>" [--top N] [--json] [--context-lines N] [--ignore-path P]
|
|
764
|
+
* [--only-path P] [--max-characters N] [--no-content]
|
|
765
|
+
*/
|
|
545
766
|
const queryCommand = Command.make("query", {
|
|
546
767
|
queryText: Args.text({ name: "query" }),
|
|
547
768
|
top: Options.integer("top").pipe(Options.withDefault(DEFAULT_TOP_K), Options.optional),
|
|
548
769
|
json: Options.boolean("json").pipe(Options.withDefault(false)),
|
|
549
|
-
contextLines: Options.integer("context-lines").pipe(Options.withDefault(DEFAULT_CONTEXT_LINES), Options.optional)
|
|
550
|
-
|
|
770
|
+
contextLines: Options.integer("context-lines").pipe(Options.withDefault(DEFAULT_CONTEXT_LINES), Options.optional),
|
|
771
|
+
ignorePath: Options.text("ignore-path").pipe(Options.repeated),
|
|
772
|
+
onlyPath: Options.text("only-path").pipe(Options.repeated),
|
|
773
|
+
maxCharacters: Options.integer("max-characters").pipe(Options.optional),
|
|
774
|
+
noContent: Options.boolean("no-content").pipe(Options.withDefault(false))
|
|
775
|
+
}, ({ queryText, top, contextLines, ignorePath, onlyPath, maxCharacters, noContent }) => Effect.gen(function* () {
|
|
551
776
|
const d = yield* Display;
|
|
552
|
-
const topK = Option.getOrElse(top, () => DEFAULT_TOP_K);
|
|
553
777
|
const ctxLines = Option.getOrElse(contextLines, () => DEFAULT_CONTEXT_LINES);
|
|
554
|
-
const clamped =
|
|
555
|
-
if (clamped
|
|
556
|
-
const results = yield* d.spinner("Searching...", QueryProject.queryProject(queryText,
|
|
557
|
-
yield* d.
|
|
558
|
-
if (results.length === 0) yield* d.log("No results found", "warn");
|
|
559
|
-
else for (const result of results) yield* d.text(formatResult(result));
|
|
778
|
+
const { options: searchOptions, clamped, rawValue } = buildSearchOptions(top, ignorePath, onlyPath);
|
|
779
|
+
if (clamped) yield* d.log(`topK clamped from ${rawValue} to ${searchOptions.topK}`, "warn");
|
|
780
|
+
const results = yield* d.spinner("Searching...", QueryProject.queryProject(queryText, searchOptions));
|
|
781
|
+
yield* renderResults(d, noContent ? results : applyCharBudget(results, Option.getOrUndefined(maxCharacters)).results, ctxLines, noContent);
|
|
560
782
|
}).pipe(Effect.catchTags({
|
|
561
783
|
ModelLoadError: reportError,
|
|
562
784
|
InferenceError: reportError,
|
|
@@ -565,20 +787,6 @@ const queryCommand = Command.make("query", {
|
|
|
565
787
|
NoIndexError: reportError
|
|
566
788
|
})));
|
|
567
789
|
//#endregion
|
|
568
|
-
//#region src/lib/format.ts
|
|
569
|
-
/** Format byte count as human-readable string (e.g. "1.5 MB") */
|
|
570
|
-
const formatBytes = (bytes) => {
|
|
571
|
-
if (bytes === 0) return "0 B";
|
|
572
|
-
const units = [
|
|
573
|
-
"B",
|
|
574
|
-
"KB",
|
|
575
|
-
"MB",
|
|
576
|
-
"GB"
|
|
577
|
-
];
|
|
578
|
-
const i = Math.min(Math.floor(Math.log(bytes) / Math.log(1024)), units.length - 1);
|
|
579
|
-
return `${(bytes / 1024 ** i).toFixed(1)} ${units[i]}`;
|
|
580
|
-
};
|
|
581
|
-
//#endregion
|
|
582
790
|
//#region src/commands/reset.ts
|
|
583
791
|
/** CLI command: pix reset [--json] */
|
|
584
792
|
const resetCommand = Command.make("reset", { json: Options.boolean("json").pipe(Options.withDefault(false)) }, () => Effect.gen(function* () {
|
|
@@ -692,13 +900,19 @@ const buildChunks = (file, content, config) => {
|
|
|
692
900
|
const text = lines.slice(startLine - 1, endLine).join("\n");
|
|
693
901
|
if (text.length >= MIN_CHUNK_CHARS) {
|
|
694
902
|
const id = crypto.createHash("sha1").update(`${file}:${startLine}`).digest("hex").slice(0, 12);
|
|
903
|
+
const contextBeforeStart = Math.max(0, startLine - 1 - config.overlapLines);
|
|
904
|
+
const contextBefore = lines.slice(contextBeforeStart, startLine - 1).join("\n");
|
|
905
|
+
const contextAfterEnd = Math.min(lines.length, endLine + config.overlapLines);
|
|
906
|
+
const contextAfter = lines.slice(endLine, contextAfterEnd).join("\n");
|
|
695
907
|
chunks.push({
|
|
696
908
|
id,
|
|
697
909
|
idx,
|
|
698
910
|
file,
|
|
699
911
|
startLine,
|
|
700
912
|
endLine,
|
|
701
|
-
text
|
|
913
|
+
text,
|
|
914
|
+
contextBefore: contextBefore || null,
|
|
915
|
+
contextAfter: contextAfter || null
|
|
702
916
|
});
|
|
703
917
|
idx++;
|
|
704
918
|
}
|
|
@@ -725,12 +939,19 @@ const make$5 = Effect.gen(function* () {
|
|
|
725
939
|
});
|
|
726
940
|
const ChunkerLive = Layer.effect(Chunker, make$5);
|
|
727
941
|
//#endregion
|
|
942
|
+
//#region src/lib/platform-error.ts
|
|
943
|
+
/**
|
|
944
|
+
* Check if a platform error has a specific `reason` string (e.g. "BadResource" for disk full,
|
|
945
|
+
* "NotFound" for missing files). Platform errors from @effect/platform include a `reason` property
|
|
946
|
+
* that categorizes the failure.
|
|
947
|
+
*/
|
|
948
|
+
const isPlatformReason = (cause, reason) => typeof cause === "object" && cause !== null && "reason" in cause && String(cause.reason) === reason;
|
|
949
|
+
//#endregion
|
|
728
950
|
//#region src/services/config-store.ts
|
|
729
951
|
const CONFIG_DIR = ".pix";
|
|
730
952
|
const CONFIG_PATH = `${CONFIG_DIR}/config.json`;
|
|
731
|
-
const isPlatformReason$1 = (cause, reason) => typeof cause === "object" && cause !== null && "reason" in cause && String(cause.reason) === reason;
|
|
732
953
|
const mapConfigWriteError = (cause, path, action) => {
|
|
733
|
-
if (isPlatformReason
|
|
954
|
+
if (isPlatformReason(cause, "BadResource")) return new DiskFullError({
|
|
734
955
|
message: `Disk full: could not ${action}`,
|
|
735
956
|
path,
|
|
736
957
|
cause
|
|
@@ -749,7 +970,7 @@ const make$4 = Effect.gen(function* () {
|
|
|
749
970
|
});
|
|
750
971
|
const readConfig = () => Effect.gen(function* () {
|
|
751
972
|
const content = yield* fs.readFileString(CONFIG_PATH).pipe(Effect.mapError((cause) => {
|
|
752
|
-
if (isPlatformReason
|
|
973
|
+
if (isPlatformReason(cause, "NotFound")) return new ConfigNotFoundError({
|
|
753
974
|
message: "Config file not found. Run pix init first.",
|
|
754
975
|
path: CONFIG_PATH,
|
|
755
976
|
cause
|
|
@@ -784,10 +1005,7 @@ const make$3 = Effect.gen(function* () {
|
|
|
784
1005
|
const fs = yield* FileSystem.FileSystem;
|
|
785
1006
|
const processorMap = buildProcessorMap([]);
|
|
786
1007
|
const extract = (file) => {
|
|
787
|
-
const
|
|
788
|
-
const name = lastSlash >= 0 ? file.slice(lastSlash + 1) : file;
|
|
789
|
-
const dotIndex = name.lastIndexOf(".");
|
|
790
|
-
const ext = dotIndex === -1 ? name.toLowerCase() : name.slice(dotIndex).toLowerCase();
|
|
1008
|
+
const ext = getExtension(file);
|
|
791
1009
|
const processor = processorMap[ext];
|
|
792
1010
|
if (!processor) return Effect.fail({
|
|
793
1011
|
_tag: "UnsupportedFormat",
|
|
@@ -828,9 +1046,7 @@ const MODEL_REGISTRY = {
|
|
|
828
1046
|
};
|
|
829
1047
|
//#endregion
|
|
830
1048
|
//#region src/services/embedder.ts
|
|
831
|
-
|
|
832
|
-
const BATCH_SIZE = 16;
|
|
833
|
-
env.cacheDir = CACHE_DIR;
|
|
1049
|
+
env.cacheDir = ".pix/cache";
|
|
834
1050
|
const normalize = (arr) => {
|
|
835
1051
|
let norm = 0;
|
|
836
1052
|
for (let i = 0; i < arr.length; i++) norm += arr[i] * arr[i];
|
|
@@ -872,16 +1088,14 @@ const createExtractor = (opts) => Effect.tryPromise(async () => {
|
|
|
872
1088
|
model: opts.model,
|
|
873
1089
|
cause
|
|
874
1090
|
})));
|
|
875
|
-
const createExtractorWithFallback = (opts) => {
|
|
1091
|
+
const createExtractorWithFallback = (opts, fallbackRef) => {
|
|
876
1092
|
if (opts.device === "cpu") return createExtractor(opts);
|
|
877
1093
|
return createExtractor(opts).pipe(Effect.catchAll((originalError) => Effect.gen(function* () {
|
|
878
|
-
|
|
879
|
-
yield*
|
|
880
|
-
yield* d.json({
|
|
881
|
-
event: "embedder_fallback",
|
|
1094
|
+
yield* (yield* Display).log(`GPU (${opts.device}) failed, falling back to CPU...`, "warn");
|
|
1095
|
+
yield* Ref.set(fallbackRef, Option.some({
|
|
882
1096
|
originalDevice: opts.device,
|
|
883
1097
|
reason: originalError.message
|
|
884
|
-
});
|
|
1098
|
+
}));
|
|
885
1099
|
return yield* createExtractor({
|
|
886
1100
|
...opts,
|
|
887
1101
|
device: "cpu"
|
|
@@ -892,7 +1106,8 @@ const make$2 = Effect.gen(function* () {
|
|
|
892
1106
|
const configStore = yield* ConfigStore;
|
|
893
1107
|
const d = yield* Display;
|
|
894
1108
|
const cfg = yield* resolveEmbedderConfig(configStore);
|
|
895
|
-
const
|
|
1109
|
+
const fallbackRef = yield* Ref.make(Option.none());
|
|
1110
|
+
const getExtractor = yield* Effect.cached(createExtractorWithFallback(cfg, fallbackRef));
|
|
896
1111
|
const embed = (text) => Effect.gen(function* () {
|
|
897
1112
|
const extractor = yield* getExtractor;
|
|
898
1113
|
const data = (yield* Effect.tryPromise(() => extractor(text, {
|
|
@@ -909,31 +1124,30 @@ const make$2 = Effect.gen(function* () {
|
|
|
909
1124
|
}).pipe(Effect.provideService(Display, d));
|
|
910
1125
|
const batch = (texts) => Effect.gen(function* () {
|
|
911
1126
|
const extractor = yield* getExtractor;
|
|
1127
|
+
const tensor = yield* Effect.tryPromise(() => extractor([...texts], {
|
|
1128
|
+
pooling: "mean",
|
|
1129
|
+
normalize: false
|
|
1130
|
+
})).pipe(Effect.mapError((cause) => new InferenceError({
|
|
1131
|
+
message: "Batch embedding inference failed",
|
|
1132
|
+
cause
|
|
1133
|
+
})));
|
|
1134
|
+
const data = tensor.data;
|
|
1135
|
+
const n = tensor.dims[0];
|
|
912
1136
|
const results = [];
|
|
913
|
-
for (let
|
|
914
|
-
const
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
})
|
|
919
|
-
message: "Batch embedding inference failed",
|
|
920
|
-
cause
|
|
921
|
-
})));
|
|
922
|
-
const data = tensor.data;
|
|
923
|
-
const n = tensor.dims[0];
|
|
924
|
-
for (let j = 0; j < n; j++) {
|
|
925
|
-
const offset = j * cfg.dims;
|
|
926
|
-
results.push(normalize(data.slice(offset, offset + cfg.dims)));
|
|
927
|
-
}
|
|
1137
|
+
for (let j = 0; j < n; j++) {
|
|
1138
|
+
const offset = j * cfg.dims;
|
|
1139
|
+
results.push({
|
|
1140
|
+
vector: normalize(data.slice(offset, offset + cfg.dims)),
|
|
1141
|
+
dims: cfg.dims
|
|
1142
|
+
});
|
|
928
1143
|
}
|
|
929
|
-
return results
|
|
930
|
-
vector,
|
|
931
|
-
dims: cfg.dims
|
|
932
|
-
}));
|
|
1144
|
+
return results;
|
|
933
1145
|
}).pipe(Effect.provideService(Display, d));
|
|
1146
|
+
const getFallbackInfo = () => Ref.get(fallbackRef).pipe(Effect.map(Option.getOrElse(() => void 0)));
|
|
934
1147
|
return {
|
|
935
1148
|
embed,
|
|
936
|
-
batch
|
|
1149
|
+
batch,
|
|
1150
|
+
getFallbackInfo
|
|
937
1151
|
};
|
|
938
1152
|
});
|
|
939
1153
|
const OnnxEmbedderLive = Layer.provideMerge(Layer.effect(Embedder, make$2), ConfigStoreLive);
|
|
@@ -977,7 +1191,16 @@ const make$1 = Effect.gen(function* () {
|
|
|
977
1191
|
if (result.skipped) skipped.push(result.skipped);
|
|
978
1192
|
if (result.content.trim()) ig.add(result.content.split("\n"));
|
|
979
1193
|
});
|
|
980
|
-
const loadGitignoreRules = (ignoredPaths
|
|
1194
|
+
const loadGitignoreRules = (ignoredPaths) => {
|
|
1195
|
+
const ig = ignore();
|
|
1196
|
+
const skipped = [];
|
|
1197
|
+
if (ignoredPaths.length > 0) ig.add(ignoredPaths);
|
|
1198
|
+
return Effect.succeed({
|
|
1199
|
+
ig,
|
|
1200
|
+
skipped
|
|
1201
|
+
});
|
|
1202
|
+
};
|
|
1203
|
+
const loadGitignoreRulesWithFiles = (ignoredPaths, cwd) => Effect.gen(function* () {
|
|
981
1204
|
const ig = ignore();
|
|
982
1205
|
const skipped = [];
|
|
983
1206
|
if (ignoredPaths.length > 0) ig.add(ignoredPaths);
|
|
@@ -1056,10 +1279,10 @@ const make$1 = Effect.gen(function* () {
|
|
|
1056
1279
|
skipped
|
|
1057
1280
|
};
|
|
1058
1281
|
});
|
|
1059
|
-
const scanFiles = (ignoredPaths) => Effect.gen(function* () {
|
|
1282
|
+
const scanFiles = (ignoredPaths, ignoreGitignore) => Effect.gen(function* () {
|
|
1060
1283
|
const cwd = process.cwd();
|
|
1061
|
-
const { ig, skipped: ignoreSkipped } = yield* loadGitignoreRules(ignoredPaths, cwd).pipe(Effect.mapError((cause) => new ScanFailed({
|
|
1062
|
-
message: `Failed to load
|
|
1284
|
+
const { ig, skipped: ignoreSkipped } = yield* (ignoreGitignore ? loadGitignoreRules(ignoredPaths) : loadGitignoreRulesWithFiles(ignoredPaths, cwd)).pipe(Effect.mapError((cause) => new ScanFailed({
|
|
1285
|
+
message: `Failed to load ignore rules: ${String(cause)}`,
|
|
1063
1286
|
cause
|
|
1064
1287
|
})));
|
|
1065
1288
|
const { files, skipped: walkSkipped } = yield* walk(cwd, ig, cwd);
|
|
@@ -1073,16 +1296,66 @@ const make$1 = Effect.gen(function* () {
|
|
|
1073
1296
|
const ScannerLive = Layer.effect(Scanner, make$1);
|
|
1074
1297
|
//#endregion
|
|
1075
1298
|
//#region src/services/vector-store.ts
|
|
1299
|
+
/**
|
|
1300
|
+
* Parse a single JSON line from chunks.jsonl and normalize context fields (old indexes may lack
|
|
1301
|
+
* them).
|
|
1302
|
+
*/
|
|
1303
|
+
const parseChunkLine = (line) => {
|
|
1304
|
+
const raw = JSON.parse(line);
|
|
1305
|
+
return {
|
|
1306
|
+
file: typeof raw.file === "string" ? raw.file : "",
|
|
1307
|
+
startLine: typeof raw.startLine === "number" ? raw.startLine : 0,
|
|
1308
|
+
endLine: typeof raw.endLine === "number" ? raw.endLine : 0,
|
|
1309
|
+
text: typeof raw.text === "string" ? raw.text : "",
|
|
1310
|
+
contextBefore: typeof raw.contextBefore === "string" ? raw.contextBefore : null,
|
|
1311
|
+
contextAfter: typeof raw.contextAfter === "string" ? raw.contextAfter : null
|
|
1312
|
+
};
|
|
1313
|
+
};
|
|
1314
|
+
/** Compute dot-product similarity between a chunk vector and the query embedding. */
|
|
1315
|
+
const computeDotProduct = (chunkVector, query) => {
|
|
1316
|
+
let dot = 0;
|
|
1317
|
+
for (let j = 0; j < query.dims; j++) dot += chunkVector[j] * query.vector[j];
|
|
1318
|
+
return dot;
|
|
1319
|
+
};
|
|
1076
1320
|
const STORE_DIR = ".pix";
|
|
1077
1321
|
const CHUNKS_FILE = `${STORE_DIR}/chunks.jsonl`;
|
|
1078
1322
|
const VECTORS_FILE = `${STORE_DIR}/vectors.bin`;
|
|
1079
|
-
|
|
1323
|
+
/**
|
|
1324
|
+
* Serialize a Chunk to a JSON object for storage in chunks.jsonl. Always includes context fields
|
|
1325
|
+
* for schema consistency.
|
|
1326
|
+
*/
|
|
1327
|
+
const serializeChunk = (c) => ({
|
|
1328
|
+
id: c.id,
|
|
1329
|
+
idx: c.idx,
|
|
1330
|
+
file: c.file,
|
|
1331
|
+
startLine: c.startLine,
|
|
1332
|
+
endLine: c.endLine,
|
|
1333
|
+
text: c.text,
|
|
1334
|
+
contextBefore: c.contextBefore,
|
|
1335
|
+
contextAfter: c.contextAfter
|
|
1336
|
+
});
|
|
1080
1337
|
/**
|
|
1081
1338
|
* FileSystem adapter for VectorStore port. Reads from chunks.jsonl and vectors.bin to provide index
|
|
1082
1339
|
* statistics.
|
|
1083
1340
|
*/
|
|
1084
1341
|
const make = Effect.gen(function* () {
|
|
1085
1342
|
const fs = yield* FileSystem.FileSystem;
|
|
1343
|
+
const chunksTemp = `${CHUNKS_FILE}.tmp`;
|
|
1344
|
+
const vectorsTemp = `${VECTORS_FILE}.tmp`;
|
|
1345
|
+
const seenFiles = yield* Ref.make(/* @__PURE__ */ new Set());
|
|
1346
|
+
const statsAccumulator = yield* Ref.make({
|
|
1347
|
+
chunks: 0,
|
|
1348
|
+
files: 0,
|
|
1349
|
+
totalLines: 0,
|
|
1350
|
+
byteSize: 0
|
|
1351
|
+
});
|
|
1352
|
+
const serializeVectors = (embeddings) => {
|
|
1353
|
+
const dims = embeddings[0]?.dims ?? 384;
|
|
1354
|
+
const totalFloats = embeddings.length * dims;
|
|
1355
|
+
const vectorsArray = new Float32Array(totalFloats);
|
|
1356
|
+
for (let i = 0; i < embeddings.length; i++) vectorsArray.set(embeddings[i].vector, i * dims);
|
|
1357
|
+
return Buffer.from(vectorsArray.buffer);
|
|
1358
|
+
};
|
|
1086
1359
|
/**
|
|
1087
1360
|
* Count total lines across all chunks in chunks.jsonl. Each line is a JSON object; the 'text'
|
|
1088
1361
|
* field contains the source code.
|
|
@@ -1145,44 +1418,82 @@ const make = Effect.gen(function* () {
|
|
|
1145
1418
|
deleted: true
|
|
1146
1419
|
};
|
|
1147
1420
|
});
|
|
1421
|
+
const storeBegin = () => Effect.gen(function* () {
|
|
1422
|
+
yield* ensureDirExists(STORE_DIR, ".pix directory");
|
|
1423
|
+
yield* Ref.set(seenFiles, /* @__PURE__ */ new Set());
|
|
1424
|
+
yield* Ref.set(statsAccumulator, {
|
|
1425
|
+
chunks: 0,
|
|
1426
|
+
files: 0,
|
|
1427
|
+
totalLines: 0,
|
|
1428
|
+
byteSize: 0
|
|
1429
|
+
});
|
|
1430
|
+
if (yield* withStoreError(fs.exists(chunksTemp), "check chunks temp")) yield* withStoreError(fs.remove(chunksTemp), "clean stale chunks temp", chunksTemp);
|
|
1431
|
+
if (yield* withStoreError(fs.exists(vectorsTemp), "check vectors temp")) yield* withStoreError(fs.remove(vectorsTemp), "clean stale vectors temp", vectorsTemp);
|
|
1432
|
+
});
|
|
1433
|
+
const storeBatch = (chunks, embeddings) => Effect.gen(function* () {
|
|
1434
|
+
const content = chunks.map((c) => JSON.stringify(serializeChunk(c))).join("\n") + "\n";
|
|
1435
|
+
yield* withStoreError(fs.writeFile(chunksTemp, Buffer.from(content), { flag: "a" }), "append chunks", chunksTemp);
|
|
1436
|
+
const buffer = serializeVectors(embeddings);
|
|
1437
|
+
yield* withStoreError(fs.writeFile(vectorsTemp, buffer, { flag: "a" }), "append vectors", vectorsTemp);
|
|
1438
|
+
const dims = embeddings[0]?.dims ?? 384;
|
|
1439
|
+
const batchLines = chunks.reduce((sum, c) => sum + (c.endLine - c.startLine + 1), 0);
|
|
1440
|
+
const batchBytes = embeddings.length * dims * 4;
|
|
1441
|
+
yield* Ref.update(seenFiles, (prev) => {
|
|
1442
|
+
for (const c of chunks) prev.add(c.file);
|
|
1443
|
+
return prev;
|
|
1444
|
+
});
|
|
1445
|
+
yield* Ref.update(statsAccumulator, (prev) => ({
|
|
1446
|
+
chunks: prev.chunks + chunks.length,
|
|
1447
|
+
files: 0,
|
|
1448
|
+
totalLines: prev.totalLines + batchLines,
|
|
1449
|
+
byteSize: prev.byteSize + batchBytes
|
|
1450
|
+
}));
|
|
1451
|
+
});
|
|
1452
|
+
const storeCommit = () => Effect.gen(function* () {
|
|
1453
|
+
yield* withStoreError(fs.rename(chunksTemp, CHUNKS_FILE), "commit chunks", CHUNKS_FILE);
|
|
1454
|
+
yield* withStoreError(fs.rename(vectorsTemp, VECTORS_FILE), "commit vectors", VECTORS_FILE);
|
|
1455
|
+
const stats = yield* Ref.get(statsAccumulator);
|
|
1456
|
+
const files = yield* Ref.get(seenFiles);
|
|
1457
|
+
yield* Ref.set(seenFiles, /* @__PURE__ */ new Set());
|
|
1458
|
+
return {
|
|
1459
|
+
...stats,
|
|
1460
|
+
files: files.size
|
|
1461
|
+
};
|
|
1462
|
+
});
|
|
1463
|
+
const storeAbort = () => Effect.gen(function* () {
|
|
1464
|
+
yield* Ref.set(seenFiles, /* @__PURE__ */ new Set());
|
|
1465
|
+
if (yield* withReadError(fs.exists(chunksTemp), "check chunks temp")) yield* withReadError(fs.remove(chunksTemp), "abort chunks temp", chunksTemp);
|
|
1466
|
+
if (yield* withReadError(fs.exists(vectorsTemp), "check vectors temp")) yield* withReadError(fs.remove(vectorsTemp), "abort vectors temp", vectorsTemp);
|
|
1467
|
+
});
|
|
1148
1468
|
const store = (chunks, embeddings) => Effect.gen(function* () {
|
|
1149
1469
|
yield* ensureDirExists(STORE_DIR, ".pix directory");
|
|
1150
1470
|
const chunksTemp = `${CHUNKS_FILE}.tmp`;
|
|
1151
|
-
const
|
|
1152
|
-
|
|
1153
|
-
idx: c.idx,
|
|
1154
|
-
file: c.file,
|
|
1155
|
-
startLine: c.startLine,
|
|
1156
|
-
endLine: c.endLine,
|
|
1157
|
-
text: c.text
|
|
1158
|
-
}));
|
|
1159
|
-
yield* withStoreError(fs.writeFileString(chunksTemp, chunksLines.join("\n")), "write chunks", chunksTemp);
|
|
1471
|
+
const chunksJson = chunks.map((c) => JSON.stringify(serializeChunk(c))).join("\n");
|
|
1472
|
+
yield* withStoreError(fs.writeFileString(chunksTemp, chunksJson), "write chunks", chunksTemp);
|
|
1160
1473
|
yield* withStoreError(fs.rename(chunksTemp, CHUNKS_FILE), "commit chunks", CHUNKS_FILE);
|
|
1161
1474
|
const vectorsTemp = `${VECTORS_FILE}.tmp`;
|
|
1162
|
-
const
|
|
1163
|
-
const totalFloats = embeddings.length * dims;
|
|
1164
|
-
const vectorsArray = new Float32Array(totalFloats);
|
|
1165
|
-
for (let i = 0; i < embeddings.length; i++) vectorsArray.set(embeddings[i].vector, i * dims);
|
|
1166
|
-
const buffer = Buffer.from(vectorsArray.buffer);
|
|
1475
|
+
const buffer = serializeVectors(embeddings);
|
|
1167
1476
|
yield* withStoreError(fs.writeFile(vectorsTemp, buffer), "write vectors", vectorsTemp);
|
|
1168
1477
|
yield* withStoreError(fs.rename(vectorsTemp, VECTORS_FILE), "commit vectors", VECTORS_FILE);
|
|
1169
1478
|
});
|
|
1170
|
-
const search = (query,
|
|
1479
|
+
const search = (query, options) => Effect.gen(function* () {
|
|
1171
1480
|
const chunksExists = yield* withReadError(fs.exists(CHUNKS_FILE), "check chunks file");
|
|
1172
1481
|
const vectorsExists = yield* withReadError(fs.exists(VECTORS_FILE), "check vectors file");
|
|
1173
1482
|
if (!chunksExists || !vectorsExists) return yield* new NoIndexError({ message: "No index found. Run pix index first." });
|
|
1174
1483
|
const chunkLines = (yield* withReadError(fs.readFileString(CHUNKS_FILE), "read chunks", CHUNKS_FILE)).split("\n").filter((l) => l.trim().length > 0);
|
|
1175
1484
|
const vectorsBuffer = yield* withReadError(fs.readFile(VECTORS_FILE), "read vectors", VECTORS_FILE);
|
|
1176
|
-
const vectors = new Float32Array(vectorsBuffer.buffer);
|
|
1485
|
+
const vectors = new Float32Array(vectorsBuffer.buffer, vectorsBuffer.byteOffset, vectorsBuffer.byteLength / Float32Array.BYTES_PER_ELEMENT);
|
|
1486
|
+
const ignoreIg = options?.ignorePaths?.length ? ignore().add([...options.ignorePaths]) : null;
|
|
1487
|
+
const onlyIg = options?.onlyPaths?.length ? ignore().add([...options.onlyPaths]) : null;
|
|
1177
1488
|
const results = [];
|
|
1178
1489
|
for (let i = 0; i < chunkLines.length; i++) try {
|
|
1179
|
-
const chunk =
|
|
1490
|
+
const chunk = parseChunkLine(chunkLines[i]);
|
|
1491
|
+
if (ignoreIg && ignoreIg.ignores(chunk.file)) continue;
|
|
1492
|
+
if (onlyIg && !onlyIg.ignores(chunk.file)) continue;
|
|
1180
1493
|
const startIdx = i * query.dims;
|
|
1181
|
-
const
|
|
1182
|
-
let dotProduct = 0;
|
|
1183
|
-
for (let j = 0; j < query.dims; j++) dotProduct += chunkVector[j] * query.vector[j];
|
|
1494
|
+
const score = computeDotProduct(vectors.slice(startIdx, startIdx + query.dims), query);
|
|
1184
1495
|
results.push({
|
|
1185
|
-
score
|
|
1496
|
+
score,
|
|
1186
1497
|
file: chunk.file,
|
|
1187
1498
|
startLine: chunk.startLine,
|
|
1188
1499
|
endLine: chunk.endLine,
|
|
@@ -1192,7 +1503,10 @@ const make = Effect.gen(function* () {
|
|
|
1192
1503
|
});
|
|
1193
1504
|
} catch {}
|
|
1194
1505
|
results.sort((a, b) => b.score - a.score);
|
|
1195
|
-
|
|
1506
|
+
const topK = options?.topK;
|
|
1507
|
+
if (topK == null) return results;
|
|
1508
|
+
const clamped = Math.max(0, Math.min(Math.floor(topK), results.length));
|
|
1509
|
+
return results.slice(0, clamped);
|
|
1196
1510
|
});
|
|
1197
1511
|
const getStatus = () => Effect.gen(function* () {
|
|
1198
1512
|
const chunksExists = yield* withReadError(fs.exists(CHUNKS_FILE), "check chunks file");
|
|
@@ -1232,6 +1546,10 @@ const make = Effect.gen(function* () {
|
|
|
1232
1546
|
});
|
|
1233
1547
|
return {
|
|
1234
1548
|
store,
|
|
1549
|
+
storeBegin,
|
|
1550
|
+
storeBatch,
|
|
1551
|
+
storeCommit,
|
|
1552
|
+
storeAbort,
|
|
1235
1553
|
search,
|
|
1236
1554
|
getStatus,
|
|
1237
1555
|
reset
|