@lucas-bur/pix 0.10.0 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.mjs +323 -118
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -2,7 +2,8 @@
|
|
|
2
2
|
import { createRequire } from "node:module";
|
|
3
3
|
import { Args, CliConfig, Command, Options } from "@effect/cli";
|
|
4
4
|
import { NodeContext, NodeRuntime } from "@effect/platform-node";
|
|
5
|
-
import { Clock, Context, Data, Effect, Layer, Option, Ref } from "effect";
|
|
5
|
+
import { Clock, Context, Data, Effect, Exit, Layer, Option, Ref, Stream } from "effect";
|
|
6
|
+
import * as Chunk from "effect/Chunk";
|
|
6
7
|
import { styleText } from "node:util";
|
|
7
8
|
import * as clack from "@clack/prompts";
|
|
8
9
|
import { FileSystem } from "@effect/platform";
|
|
@@ -74,6 +75,7 @@ const withInteractive = (activeRef, acquire, setActive, release, effect) => Ref.
|
|
|
74
75
|
/** Display implementation using @clack/prompts for interactive terminal output */
|
|
75
76
|
const ClackDisplay = { layer: Layer.effect(Display, Effect.gen(function* () {
|
|
76
77
|
const activeRef = yield* Ref.make(null);
|
|
78
|
+
const lastSpinnerMsg = yield* Ref.make("");
|
|
77
79
|
return {
|
|
78
80
|
intro: (title) => Effect.sync(() => clack.intro(styleText("inverse", ` ${title} `))),
|
|
79
81
|
outro: (message) => Effect.sync(() => clack.outro(message)),
|
|
@@ -84,11 +86,17 @@ const ClackDisplay = { layer: Layer.effect(Display, Effect.gen(function* () {
|
|
|
84
86
|
const s = clack.spinner();
|
|
85
87
|
s.start(message);
|
|
86
88
|
return s;
|
|
87
|
-
}), (s) => ({
|
|
89
|
+
}).pipe(Effect.tap(() => Ref.set(lastSpinnerMsg, message))), (s) => ({
|
|
88
90
|
type: "spinner",
|
|
89
91
|
handle: s
|
|
90
|
-
}), (s, exit) => Effect.sync(() => s.stop(exit._tag === "Success" ?
|
|
91
|
-
progress: (opts, effect) =>
|
|
92
|
+
}), (s, exit) => lastSpinnerMsg.pipe(Effect.flatMap((lastMsg) => Effect.sync(() => s.stop(exit._tag === "Success" && lastMsg ? lastMsg : `${message} (failed)`)))), effect),
|
|
93
|
+
progress: (opts, effect) => Effect.gen(function* () {
|
|
94
|
+
const current = yield* Ref.get(activeRef);
|
|
95
|
+
if (current && current.type === "spinner") {
|
|
96
|
+
const msg = yield* Ref.get(lastSpinnerMsg);
|
|
97
|
+
current.handle.stop(msg || opts.message);
|
|
98
|
+
yield* Ref.set(activeRef, null);
|
|
99
|
+
}
|
|
92
100
|
const bar = clack.progress({
|
|
93
101
|
max: opts.max,
|
|
94
102
|
style: opts.style ?? "heavy",
|
|
@@ -96,16 +104,27 @@ const ClackDisplay = { layer: Layer.effect(Display, Effect.gen(function* () {
|
|
|
96
104
|
indicator: opts.indicator ?? "dots"
|
|
97
105
|
});
|
|
98
106
|
bar.start(opts.message);
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
107
|
+
yield* Ref.set(activeRef, {
|
|
108
|
+
type: "progress",
|
|
109
|
+
handle: bar,
|
|
110
|
+
value: 0,
|
|
111
|
+
max: opts.max
|
|
112
|
+
});
|
|
113
|
+
const exit = yield* Effect.exit(effect);
|
|
114
|
+
yield* Ref.set(activeRef, null);
|
|
115
|
+
if (Exit.isSuccess(exit)) {
|
|
116
|
+
bar.stop(opts.message);
|
|
117
|
+
return exit.value;
|
|
118
|
+
}
|
|
119
|
+
bar.error(opts.message);
|
|
120
|
+
return yield* Effect.failCause(exit.cause);
|
|
121
|
+
}),
|
|
106
122
|
updateInteractive: (payload) => Ref.get(activeRef).pipe(Effect.flatMap((active) => {
|
|
107
123
|
if (!active) return Effect.void;
|
|
108
|
-
if (active.type === "spinner")
|
|
124
|
+
if (active.type === "spinner") {
|
|
125
|
+
const msg = payloadText(payload);
|
|
126
|
+
return Effect.sync(() => active.handle.message(msg)).pipe(Effect.andThen(Ref.set(lastSpinnerMsg, msg)));
|
|
127
|
+
}
|
|
109
128
|
const delta = computeDelta(payload, {
|
|
110
129
|
value: active.value,
|
|
111
130
|
max: active.max
|
|
@@ -149,19 +168,19 @@ const DEFAULT_CONFIG = {
|
|
|
149
168
|
"dist",
|
|
150
169
|
"build",
|
|
151
170
|
".next",
|
|
152
|
-
".agents",
|
|
153
|
-
".claude",
|
|
154
171
|
".vscode",
|
|
155
|
-
".github",
|
|
156
172
|
"coverage",
|
|
157
173
|
"*-lock.yaml",
|
|
158
174
|
"*-lock.json",
|
|
159
|
-
"*.lock"
|
|
175
|
+
"*.lock",
|
|
176
|
+
".vite-hooks",
|
|
177
|
+
".fallow"
|
|
160
178
|
],
|
|
161
179
|
embedder: {
|
|
162
180
|
model: "Xenova/all-MiniLM-L6-v2",
|
|
163
181
|
device: "auto",
|
|
164
|
-
dtype: "fp32"
|
|
182
|
+
dtype: "fp32",
|
|
183
|
+
batchSize: 16
|
|
165
184
|
}
|
|
166
185
|
};
|
|
167
186
|
//#endregion
|
|
@@ -289,6 +308,13 @@ function buildProcessorMap(skipExtensions) {
|
|
|
289
308
|
}
|
|
290
309
|
//#endregion
|
|
291
310
|
//#region src/application/index-project.ts
|
|
311
|
+
const deriveEffectiveConfig = (opts, config) => ({
|
|
312
|
+
batchSize: opts.batchSize ?? config.embedder.batchSize ?? 16,
|
|
313
|
+
concurrency: Math.max(1, opts.chunkConcurrency ?? config.chunkConcurrency ?? 8),
|
|
314
|
+
skipExtensions: opts.skipExtensions ? [...config.skipExtensions, ...opts.skipExtensions] : config.skipExtensions,
|
|
315
|
+
ignoredPaths: opts.ignorePaths ? [...config.ignoredPaths ?? DEFAULT_CONFIG.ignoredPaths, ...opts.ignorePaths] : config.ignoredPaths ?? DEFAULT_CONFIG.ignoredPaths,
|
|
316
|
+
ignoreGitignore: opts.ignoreGitignore ?? config.ignoreGitignore ?? false
|
|
317
|
+
});
|
|
292
318
|
function getExtension(file) {
|
|
293
319
|
const lastSlash = file.lastIndexOf("/");
|
|
294
320
|
const name = lastSlash >= 0 ? file.slice(lastSlash + 1) : file;
|
|
@@ -313,11 +339,19 @@ const classifyFiles = (files, processorMap) => {
|
|
|
313
339
|
unknownExtensions
|
|
314
340
|
};
|
|
315
341
|
};
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
342
|
+
const classifyAndCollectChunks = (knownFiles, extractor, chunker, concurrency, skipped) => Stream.fromIterable(knownFiles).pipe(Stream.mapEffect((file) => extractor.extract(file).pipe(Effect.flatMap((text) => Effect.succeed(Option.some({
|
|
343
|
+
file,
|
|
344
|
+
text
|
|
345
|
+
}))), Effect.catchAll((err) => Ref.update(skipped, (prev) => [...prev, {
|
|
346
|
+
path: file,
|
|
347
|
+
reason: err.message
|
|
348
|
+
}]).pipe(Effect.flatMap(() => Effect.succeed(Option.none()))))), { concurrency }), Stream.filterMap((opt) => opt), Stream.mapEffect(({ file, text }) => chunker.chunkText(text, file), { concurrency }), Stream.flatMap((chunks) => Stream.fromIterable(chunks)), Stream.runCollect, Effect.map((allChunks) => {
|
|
349
|
+
const chunks = Chunk.toArray(allChunks);
|
|
350
|
+
return {
|
|
351
|
+
chunks,
|
|
352
|
+
totalChunks: chunks.length
|
|
353
|
+
};
|
|
354
|
+
}));
|
|
321
355
|
var IndexProject = class extends Effect.Service()("IndexProject", {
|
|
322
356
|
accessors: true,
|
|
323
357
|
effect: Effect.gen(function* () {
|
|
@@ -328,65 +362,123 @@ var IndexProject = class extends Effect.Service()("IndexProject", {
|
|
|
328
362
|
const vectorStore = yield* VectorStore;
|
|
329
363
|
const d = yield* Display;
|
|
330
364
|
const extractor = yield* ContentExtractor;
|
|
331
|
-
const index = () => Effect.gen(function* () {
|
|
365
|
+
const index = (opts = {}) => Effect.gen(function* () {
|
|
366
|
+
const start = Date.now();
|
|
332
367
|
if (!(yield* configStore.configExists())) yield* configStore.writeConfig(DEFAULT_CONFIG);
|
|
333
|
-
const
|
|
334
|
-
const processorMap = buildProcessorMap(
|
|
368
|
+
const eff = deriveEffectiveConfig(opts, yield* configStore.readConfig());
|
|
369
|
+
const processorMap = buildProcessorMap(eff.skipExtensions);
|
|
335
370
|
yield* d.updateInteractive("Scanning source files...");
|
|
336
|
-
const
|
|
337
|
-
const { knownFiles, skippedFiles, unknownExtensions } = classifyFiles(
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
371
|
+
const scanResult = yield* scanner.scanFiles(eff.ignoredPaths, eff.ignoreGitignore);
|
|
372
|
+
const { knownFiles, skippedFiles, unknownExtensions } = classifyFiles(scanResult.files, processorMap);
|
|
373
|
+
const skipped = yield* Ref.make(scanResult.skipped.filter((s) => !s.reason.startsWith("Ignored by config pattern")).map((s) => ({
|
|
374
|
+
path: s.path,
|
|
375
|
+
reason: s.reason
|
|
376
|
+
})));
|
|
377
|
+
if (unknownExtensions.size > 0) yield* Ref.update(skipped, (prev) => [...prev, ...skippedFiles.map((f) => ({
|
|
378
|
+
path: f,
|
|
379
|
+
reason: "unknown extension"
|
|
380
|
+
}))]);
|
|
381
|
+
if (knownFiles.length === 0) {
|
|
382
|
+
yield* displaySkippedNote(d, yield* Ref.get(skipped));
|
|
383
|
+
return {
|
|
384
|
+
success: true,
|
|
385
|
+
status: {
|
|
386
|
+
chunks: 0,
|
|
387
|
+
files: 0,
|
|
388
|
+
totalLines: 0,
|
|
389
|
+
byteSize: 0
|
|
390
|
+
},
|
|
391
|
+
durationMs: Date.now() - start
|
|
392
|
+
};
|
|
393
|
+
}
|
|
348
394
|
yield* d.updateInteractive(`Processing ${knownFiles.length} files...`);
|
|
349
|
-
const
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
395
|
+
const { chunks, totalChunks } = yield* classifyAndCollectChunks(knownFiles, extractor, chunker, eff.concurrency, skipped);
|
|
396
|
+
if (totalChunks === 0) {
|
|
397
|
+
yield* displaySkippedNote(d, yield* Ref.get(skipped));
|
|
398
|
+
return {
|
|
399
|
+
success: true,
|
|
400
|
+
status: {
|
|
401
|
+
chunks: 0,
|
|
402
|
+
files: 0,
|
|
403
|
+
totalLines: 0,
|
|
404
|
+
byteSize: 0
|
|
405
|
+
},
|
|
406
|
+
durationMs: Date.now() - start
|
|
407
|
+
};
|
|
408
|
+
}
|
|
409
|
+
yield* vectorStore.storeBegin();
|
|
410
|
+
const embeddedRef = yield* Ref.make(0);
|
|
411
|
+
const stats = yield* d.progress({
|
|
412
|
+
message: `Embedding ${totalChunks} chunks...`,
|
|
413
|
+
max: totalChunks
|
|
414
|
+
}, Stream.fromIterable(chunks).pipe(Stream.grouped(eff.batchSize), Stream.mapEffect((batchChunk) => Effect.gen(function* () {
|
|
415
|
+
const batch = Chunk.toArray(batchChunk);
|
|
416
|
+
const texts = batch.map((c) => c.text);
|
|
417
|
+
const embeddings = yield* embedder.batch(texts);
|
|
418
|
+
yield* vectorStore.storeBatch(batch, embeddings);
|
|
419
|
+
const count = yield* Ref.updateAndGet(embeddedRef, (n) => n + batch.length);
|
|
420
|
+
yield* d.updateInteractive({
|
|
421
|
+
message: `Embedding ${count} of ${totalChunks} chunks`,
|
|
422
|
+
setTo: count
|
|
423
|
+
});
|
|
424
|
+
})), Stream.runDrain, Effect.matchEffect({
|
|
425
|
+
onSuccess: () => vectorStore.storeCommit(),
|
|
426
|
+
onFailure: (err) => vectorStore.storeAbort().pipe(Effect.flatMap(() => Effect.fail(err)))
|
|
427
|
+
})));
|
|
428
|
+
yield* displaySkippedNote(d, yield* Ref.get(skipped));
|
|
429
|
+
const durationSec = ((Date.now() - start) / 1e3).toFixed(1);
|
|
430
|
+
yield* d.log(`Indexed ${stats.chunks} chunks from ${stats.files} files in ${durationSec}s`, "success");
|
|
431
|
+
const fallbackInfo = yield* embedder.getFallbackInfo();
|
|
377
432
|
return {
|
|
378
433
|
success: true,
|
|
379
434
|
status: {
|
|
380
|
-
chunks:
|
|
381
|
-
files:
|
|
382
|
-
totalLines,
|
|
383
|
-
byteSize:
|
|
384
|
-
}
|
|
435
|
+
chunks: stats.chunks,
|
|
436
|
+
files: stats.files,
|
|
437
|
+
totalLines: stats.totalLines,
|
|
438
|
+
byteSize: stats.byteSize
|
|
439
|
+
},
|
|
440
|
+
durationMs: Date.now() - start,
|
|
441
|
+
embedderFallback: fallbackInfo
|
|
385
442
|
};
|
|
386
443
|
});
|
|
387
444
|
return { index };
|
|
388
445
|
})
|
|
389
446
|
}) {};
|
|
447
|
+
const getFilename = (path) => path.split("/").pop() ?? path;
|
|
448
|
+
const getFileExtension = (filename) => {
|
|
449
|
+
const dotIndex = filename.lastIndexOf(".");
|
|
450
|
+
return dotIndex >= 0 ? filename.slice(dotIndex) : "(no extension)";
|
|
451
|
+
};
|
|
452
|
+
const groupByExtension = (entries) => {
|
|
453
|
+
const byExt = /* @__PURE__ */ new Map();
|
|
454
|
+
for (const s of entries) {
|
|
455
|
+
const name = getFilename(s.path);
|
|
456
|
+
const ext = getFileExtension(name);
|
|
457
|
+
if (!byExt.has(ext)) byExt.set(ext, []);
|
|
458
|
+
byExt.get(ext).push(name);
|
|
459
|
+
}
|
|
460
|
+
return byExt;
|
|
461
|
+
};
|
|
462
|
+
const formatFileList = (files, maxDisplay = 5) => files.length > maxDisplay ? `${files.slice(0, maxDisplay).join(", ")} +${files.length - maxDisplay} more` : files.join(", ");
|
|
463
|
+
const buildSkippedLines = (extFailures, extractErrors) => {
|
|
464
|
+
const lines = [];
|
|
465
|
+
if (extFailures.length > 0) {
|
|
466
|
+
lines.push(`Unknown extensions (${extFailures.length})`);
|
|
467
|
+
for (const [ext, files] of groupByExtension(extFailures)) lines.push(` ${ext} (${files.length}): ${formatFileList(files)}`);
|
|
468
|
+
}
|
|
469
|
+
if (extractErrors.length > 0) {
|
|
470
|
+
if (lines.length > 0) lines.push("");
|
|
471
|
+
lines.push(`Extraction errors (${extractErrors.length})`);
|
|
472
|
+
for (const s of extractErrors) lines.push(` ${getFilename(s.path)}: ${s.reason}`);
|
|
473
|
+
}
|
|
474
|
+
return lines;
|
|
475
|
+
};
|
|
476
|
+
const displaySkippedNote = (d, skipped) => {
|
|
477
|
+
if (skipped.length === 0) return Effect.void;
|
|
478
|
+
const extFailures = skipped.filter((s) => s.reason === "unknown extension");
|
|
479
|
+
const extractErrors = skipped.filter((s) => s.reason !== "unknown extension");
|
|
480
|
+
return d.note(buildSkippedLines(extFailures, extractErrors).join("\n"), `Skipped ${skipped.length} files`);
|
|
481
|
+
};
|
|
390
482
|
//#endregion
|
|
391
483
|
//#region src/application/init-project.ts
|
|
392
484
|
/**
|
|
@@ -475,22 +567,60 @@ const reportError = (error) => Effect.gen(function* () {
|
|
|
475
567
|
});
|
|
476
568
|
//#endregion
|
|
477
569
|
//#region src/commands/index-cmd.ts
|
|
478
|
-
|
|
570
|
+
const batchSizeOption = Options.integer("batch-size").pipe(Options.withAlias("b"), Options.optional);
|
|
571
|
+
const chunkConcurrencyOption = Options.integer("chunk-concurrency").pipe(Options.withAlias("c"), Options.optional);
|
|
572
|
+
const skipExtensionsOption = Options.text("skip-extensions").pipe(Options.withAlias("s"), Options.repeated);
|
|
573
|
+
const ignorePathOption = Options.text("ignore-path").pipe(Options.repeated);
|
|
574
|
+
const ignorePathsOption = Options.text("ignore-paths").pipe(Options.repeated);
|
|
575
|
+
const ignoreGitignoreOption = Options.boolean("ignore-gitignore").pipe(Options.withDefault(false));
|
|
576
|
+
const splitCsv = (values) => values.flatMap((v) => v.split(",").map((s) => s.trim()).filter((s) => s.length > 0));
|
|
577
|
+
const buildIndexOptions = (args) => {
|
|
578
|
+
if (Option.isSome(args.batchSize) && args.batchSize.value <= 0) throw new Error(`--batch-size must be positive, got ${args.batchSize.value}`);
|
|
579
|
+
if (Option.isSome(args.chunkConcurrency) && args.chunkConcurrency.value <= 0) throw new Error(`--chunk-concurrency must be positive, got ${args.chunkConcurrency.value}`);
|
|
580
|
+
const cliSkipExtensions = splitCsv(args.skipExtensions);
|
|
581
|
+
const cliIgnorePaths = [...args.ignorePath.map((s) => s.trim()).filter((s) => s.length > 0), ...splitCsv(args.ignorePaths)];
|
|
582
|
+
return {
|
|
583
|
+
batchSize: Option.getOrUndefined(args.batchSize),
|
|
584
|
+
chunkConcurrency: Option.getOrUndefined(args.chunkConcurrency),
|
|
585
|
+
skipExtensions: cliSkipExtensions.length > 0 ? cliSkipExtensions : void 0,
|
|
586
|
+
ignorePaths: cliIgnorePaths.length > 0 ? cliIgnorePaths : void 0,
|
|
587
|
+
ignoreGitignore: args.ignoreGitignore || void 0
|
|
588
|
+
};
|
|
589
|
+
};
|
|
590
|
+
const emitIndexResult = (d, result) => Effect.gen(function* () {
|
|
591
|
+
yield* d.json({
|
|
592
|
+
chunks: result.status.chunks,
|
|
593
|
+
files: result.status.files,
|
|
594
|
+
totalLines: result.status.totalLines,
|
|
595
|
+
byteSize: result.status.byteSize,
|
|
596
|
+
durationMs: result.durationMs,
|
|
597
|
+
...result.embedderFallback && { embedderFallback: result.embedderFallback }
|
|
598
|
+
});
|
|
599
|
+
if (result.status.chunks === 0) yield* d.log("No chunks to index.", "warn");
|
|
600
|
+
});
|
|
479
601
|
const indexCommand = Command.make("index", {
|
|
480
602
|
force: Options.boolean("force").pipe(Options.withDefault(false)),
|
|
481
603
|
verbose: Options.boolean("verbose").pipe(Options.withDefault(false)),
|
|
482
|
-
json: Options.boolean("json").pipe(Options.withDefault(false))
|
|
483
|
-
|
|
604
|
+
json: Options.boolean("json").pipe(Options.withDefault(false)),
|
|
605
|
+
batchSize: batchSizeOption,
|
|
606
|
+
chunkConcurrency: chunkConcurrencyOption,
|
|
607
|
+
skipExtensions: skipExtensionsOption,
|
|
608
|
+
ignorePath: ignorePathOption,
|
|
609
|
+
ignorePaths: ignorePathsOption,
|
|
610
|
+
ignoreGitignore: ignoreGitignoreOption
|
|
611
|
+
}, ({ force, verbose, batchSize, chunkConcurrency, skipExtensions, ignorePath, ignorePaths, ignoreGitignore }) => Effect.gen(function* () {
|
|
484
612
|
const d = yield* Display;
|
|
485
613
|
if (force) yield* d.log("--force is currently not implemented and only a placeholder.", "warn");
|
|
486
614
|
if (verbose) yield* d.log("--verbose is currently not implemented and only a placeholder.", "warn");
|
|
487
|
-
const
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
615
|
+
const options = buildIndexOptions({
|
|
616
|
+
batchSize,
|
|
617
|
+
chunkConcurrency,
|
|
618
|
+
skipExtensions,
|
|
619
|
+
ignorePath,
|
|
620
|
+
ignorePaths,
|
|
621
|
+
ignoreGitignore
|
|
491
622
|
});
|
|
492
|
-
|
|
493
|
-
else yield* d.log(`Indexed ${result.status.chunks} chunks from ${result.status.files} files.`, "success");
|
|
623
|
+
yield* emitIndexResult(d, yield* d.spinner("Indexing project...", IndexProject.index(options)));
|
|
494
624
|
}).pipe(Effect.catchAll(reportError)));
|
|
495
625
|
//#endregion
|
|
496
626
|
//#region src/commands/init.ts
|
|
@@ -828,9 +958,7 @@ const MODEL_REGISTRY = {
|
|
|
828
958
|
};
|
|
829
959
|
//#endregion
|
|
830
960
|
//#region src/services/embedder.ts
|
|
831
|
-
|
|
832
|
-
const BATCH_SIZE = 16;
|
|
833
|
-
env.cacheDir = CACHE_DIR;
|
|
961
|
+
env.cacheDir = ".pix/cache";
|
|
834
962
|
const normalize = (arr) => {
|
|
835
963
|
let norm = 0;
|
|
836
964
|
for (let i = 0; i < arr.length; i++) norm += arr[i] * arr[i];
|
|
@@ -872,16 +1000,14 @@ const createExtractor = (opts) => Effect.tryPromise(async () => {
|
|
|
872
1000
|
model: opts.model,
|
|
873
1001
|
cause
|
|
874
1002
|
})));
|
|
875
|
-
const createExtractorWithFallback = (opts) => {
|
|
1003
|
+
const createExtractorWithFallback = (opts, fallbackRef) => {
|
|
876
1004
|
if (opts.device === "cpu") return createExtractor(opts);
|
|
877
1005
|
return createExtractor(opts).pipe(Effect.catchAll((originalError) => Effect.gen(function* () {
|
|
878
|
-
|
|
879
|
-
yield*
|
|
880
|
-
yield* d.json({
|
|
881
|
-
event: "embedder_fallback",
|
|
1006
|
+
yield* (yield* Display).log(`GPU (${opts.device}) failed, falling back to CPU...`, "warn");
|
|
1007
|
+
yield* Ref.set(fallbackRef, Option.some({
|
|
882
1008
|
originalDevice: opts.device,
|
|
883
1009
|
reason: originalError.message
|
|
884
|
-
});
|
|
1010
|
+
}));
|
|
885
1011
|
return yield* createExtractor({
|
|
886
1012
|
...opts,
|
|
887
1013
|
device: "cpu"
|
|
@@ -892,7 +1018,8 @@ const make$2 = Effect.gen(function* () {
|
|
|
892
1018
|
const configStore = yield* ConfigStore;
|
|
893
1019
|
const d = yield* Display;
|
|
894
1020
|
const cfg = yield* resolveEmbedderConfig(configStore);
|
|
895
|
-
const
|
|
1021
|
+
const fallbackRef = yield* Ref.make(Option.none());
|
|
1022
|
+
const getExtractor = yield* Effect.cached(createExtractorWithFallback(cfg, fallbackRef));
|
|
896
1023
|
const embed = (text) => Effect.gen(function* () {
|
|
897
1024
|
const extractor = yield* getExtractor;
|
|
898
1025
|
const data = (yield* Effect.tryPromise(() => extractor(text, {
|
|
@@ -909,31 +1036,30 @@ const make$2 = Effect.gen(function* () {
|
|
|
909
1036
|
}).pipe(Effect.provideService(Display, d));
|
|
910
1037
|
const batch = (texts) => Effect.gen(function* () {
|
|
911
1038
|
const extractor = yield* getExtractor;
|
|
1039
|
+
const tensor = yield* Effect.tryPromise(() => extractor([...texts], {
|
|
1040
|
+
pooling: "mean",
|
|
1041
|
+
normalize: false
|
|
1042
|
+
})).pipe(Effect.mapError((cause) => new InferenceError({
|
|
1043
|
+
message: "Batch embedding inference failed",
|
|
1044
|
+
cause
|
|
1045
|
+
})));
|
|
1046
|
+
const data = tensor.data;
|
|
1047
|
+
const n = tensor.dims[0];
|
|
912
1048
|
const results = [];
|
|
913
|
-
for (let
|
|
914
|
-
const
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
})
|
|
919
|
-
message: "Batch embedding inference failed",
|
|
920
|
-
cause
|
|
921
|
-
})));
|
|
922
|
-
const data = tensor.data;
|
|
923
|
-
const n = tensor.dims[0];
|
|
924
|
-
for (let j = 0; j < n; j++) {
|
|
925
|
-
const offset = j * cfg.dims;
|
|
926
|
-
results.push(normalize(data.slice(offset, offset + cfg.dims)));
|
|
927
|
-
}
|
|
1049
|
+
for (let j = 0; j < n; j++) {
|
|
1050
|
+
const offset = j * cfg.dims;
|
|
1051
|
+
results.push({
|
|
1052
|
+
vector: normalize(data.slice(offset, offset + cfg.dims)),
|
|
1053
|
+
dims: cfg.dims
|
|
1054
|
+
});
|
|
928
1055
|
}
|
|
929
|
-
return results
|
|
930
|
-
vector,
|
|
931
|
-
dims: cfg.dims
|
|
932
|
-
}));
|
|
1056
|
+
return results;
|
|
933
1057
|
}).pipe(Effect.provideService(Display, d));
|
|
1058
|
+
const getFallbackInfo = () => Ref.get(fallbackRef).pipe(Effect.map(Option.getOrElse(() => void 0)));
|
|
934
1059
|
return {
|
|
935
1060
|
embed,
|
|
936
|
-
batch
|
|
1061
|
+
batch,
|
|
1062
|
+
getFallbackInfo
|
|
937
1063
|
};
|
|
938
1064
|
});
|
|
939
1065
|
const OnnxEmbedderLive = Layer.provideMerge(Layer.effect(Embedder, make$2), ConfigStoreLive);
|
|
@@ -977,7 +1103,16 @@ const make$1 = Effect.gen(function* () {
|
|
|
977
1103
|
if (result.skipped) skipped.push(result.skipped);
|
|
978
1104
|
if (result.content.trim()) ig.add(result.content.split("\n"));
|
|
979
1105
|
});
|
|
980
|
-
const loadGitignoreRules = (ignoredPaths
|
|
1106
|
+
const loadGitignoreRules = (ignoredPaths) => {
|
|
1107
|
+
const ig = ignore();
|
|
1108
|
+
const skipped = [];
|
|
1109
|
+
if (ignoredPaths.length > 0) ig.add(ignoredPaths);
|
|
1110
|
+
return Effect.succeed({
|
|
1111
|
+
ig,
|
|
1112
|
+
skipped
|
|
1113
|
+
});
|
|
1114
|
+
};
|
|
1115
|
+
const loadGitignoreRulesWithFiles = (ignoredPaths, cwd) => Effect.gen(function* () {
|
|
981
1116
|
const ig = ignore();
|
|
982
1117
|
const skipped = [];
|
|
983
1118
|
if (ignoredPaths.length > 0) ig.add(ignoredPaths);
|
|
@@ -1056,10 +1191,10 @@ const make$1 = Effect.gen(function* () {
|
|
|
1056
1191
|
skipped
|
|
1057
1192
|
};
|
|
1058
1193
|
});
|
|
1059
|
-
const scanFiles = (ignoredPaths) => Effect.gen(function* () {
|
|
1194
|
+
const scanFiles = (ignoredPaths, ignoreGitignore) => Effect.gen(function* () {
|
|
1060
1195
|
const cwd = process.cwd();
|
|
1061
|
-
const { ig, skipped: ignoreSkipped } = yield* loadGitignoreRules(ignoredPaths, cwd).pipe(Effect.mapError((cause) => new ScanFailed({
|
|
1062
|
-
message: `Failed to load
|
|
1196
|
+
const { ig, skipped: ignoreSkipped } = yield* (ignoreGitignore ? loadGitignoreRules(ignoredPaths) : loadGitignoreRulesWithFiles(ignoredPaths, cwd)).pipe(Effect.mapError((cause) => new ScanFailed({
|
|
1197
|
+
message: `Failed to load ignore rules: ${String(cause)}`,
|
|
1063
1198
|
cause
|
|
1064
1199
|
})));
|
|
1065
1200
|
const { files, skipped: walkSkipped } = yield* walk(cwd, ig, cwd);
|
|
@@ -1083,6 +1218,22 @@ const isPlatformReason = (cause, reason) => typeof cause === "object" && cause !
|
|
|
1083
1218
|
*/
|
|
1084
1219
|
const make = Effect.gen(function* () {
|
|
1085
1220
|
const fs = yield* FileSystem.FileSystem;
|
|
1221
|
+
const chunksTemp = `${CHUNKS_FILE}.tmp`;
|
|
1222
|
+
const vectorsTemp = `${VECTORS_FILE}.tmp`;
|
|
1223
|
+
const seenFiles = yield* Ref.make(/* @__PURE__ */ new Set());
|
|
1224
|
+
const statsAccumulator = yield* Ref.make({
|
|
1225
|
+
chunks: 0,
|
|
1226
|
+
files: 0,
|
|
1227
|
+
totalLines: 0,
|
|
1228
|
+
byteSize: 0
|
|
1229
|
+
});
|
|
1230
|
+
const serializeVectors = (embeddings) => {
|
|
1231
|
+
const dims = embeddings[0]?.dims ?? 384;
|
|
1232
|
+
const totalFloats = embeddings.length * dims;
|
|
1233
|
+
const vectorsArray = new Float32Array(totalFloats);
|
|
1234
|
+
for (let i = 0; i < embeddings.length; i++) vectorsArray.set(embeddings[i].vector, i * dims);
|
|
1235
|
+
return Buffer.from(vectorsArray.buffer);
|
|
1236
|
+
};
|
|
1086
1237
|
/**
|
|
1087
1238
|
* Count total lines across all chunks in chunks.jsonl. Each line is a JSON object; the 'text'
|
|
1088
1239
|
* field contains the source code.
|
|
@@ -1145,6 +1296,60 @@ const make = Effect.gen(function* () {
|
|
|
1145
1296
|
deleted: true
|
|
1146
1297
|
};
|
|
1147
1298
|
});
|
|
1299
|
+
const storeBegin = () => Effect.gen(function* () {
|
|
1300
|
+
yield* ensureDirExists(STORE_DIR, ".pix directory");
|
|
1301
|
+
yield* Ref.set(seenFiles, /* @__PURE__ */ new Set());
|
|
1302
|
+
yield* Ref.set(statsAccumulator, {
|
|
1303
|
+
chunks: 0,
|
|
1304
|
+
files: 0,
|
|
1305
|
+
totalLines: 0,
|
|
1306
|
+
byteSize: 0
|
|
1307
|
+
});
|
|
1308
|
+
if (yield* withStoreError(fs.exists(chunksTemp), "check chunks temp")) yield* withStoreError(fs.remove(chunksTemp), "clean stale chunks temp", chunksTemp);
|
|
1309
|
+
if (yield* withStoreError(fs.exists(vectorsTemp), "check vectors temp")) yield* withStoreError(fs.remove(vectorsTemp), "clean stale vectors temp", vectorsTemp);
|
|
1310
|
+
});
|
|
1311
|
+
const storeBatch = (chunks, embeddings) => Effect.gen(function* () {
|
|
1312
|
+
const content = chunks.map((c) => JSON.stringify({
|
|
1313
|
+
id: c.id,
|
|
1314
|
+
idx: c.idx,
|
|
1315
|
+
file: c.file,
|
|
1316
|
+
startLine: c.startLine,
|
|
1317
|
+
endLine: c.endLine,
|
|
1318
|
+
text: c.text
|
|
1319
|
+
})).join("\n") + "\n";
|
|
1320
|
+
yield* withStoreError(fs.writeFile(chunksTemp, Buffer.from(content), { flag: "a" }), "append chunks", chunksTemp);
|
|
1321
|
+
const buffer = serializeVectors(embeddings);
|
|
1322
|
+
yield* withStoreError(fs.writeFile(vectorsTemp, buffer, { flag: "a" }), "append vectors", vectorsTemp);
|
|
1323
|
+
const dims = embeddings[0]?.dims ?? 384;
|
|
1324
|
+
const batchLines = chunks.reduce((sum, c) => sum + (c.endLine - c.startLine + 1), 0);
|
|
1325
|
+
const batchBytes = embeddings.length * dims * 4;
|
|
1326
|
+
yield* Ref.update(seenFiles, (prev) => {
|
|
1327
|
+
for (const c of chunks) prev.add(c.file);
|
|
1328
|
+
return prev;
|
|
1329
|
+
});
|
|
1330
|
+
yield* Ref.update(statsAccumulator, (prev) => ({
|
|
1331
|
+
chunks: prev.chunks + chunks.length,
|
|
1332
|
+
files: 0,
|
|
1333
|
+
totalLines: prev.totalLines + batchLines,
|
|
1334
|
+
byteSize: prev.byteSize + batchBytes
|
|
1335
|
+
}));
|
|
1336
|
+
});
|
|
1337
|
+
const storeCommit = () => Effect.gen(function* () {
|
|
1338
|
+
yield* withStoreError(fs.rename(chunksTemp, CHUNKS_FILE), "commit chunks", CHUNKS_FILE);
|
|
1339
|
+
yield* withStoreError(fs.rename(vectorsTemp, VECTORS_FILE), "commit vectors", VECTORS_FILE);
|
|
1340
|
+
const stats = yield* Ref.get(statsAccumulator);
|
|
1341
|
+
const files = yield* Ref.get(seenFiles);
|
|
1342
|
+
yield* Ref.set(seenFiles, /* @__PURE__ */ new Set());
|
|
1343
|
+
return {
|
|
1344
|
+
...stats,
|
|
1345
|
+
files: files.size
|
|
1346
|
+
};
|
|
1347
|
+
});
|
|
1348
|
+
const storeAbort = () => Effect.gen(function* () {
|
|
1349
|
+
yield* Ref.set(seenFiles, /* @__PURE__ */ new Set());
|
|
1350
|
+
if (yield* withReadError(fs.exists(chunksTemp), "check chunks temp")) yield* withReadError(fs.remove(chunksTemp), "abort chunks temp", chunksTemp);
|
|
1351
|
+
if (yield* withReadError(fs.exists(vectorsTemp), "check vectors temp")) yield* withReadError(fs.remove(vectorsTemp), "abort vectors temp", vectorsTemp);
|
|
1352
|
+
});
|
|
1148
1353
|
const store = (chunks, embeddings) => Effect.gen(function* () {
|
|
1149
1354
|
yield* ensureDirExists(STORE_DIR, ".pix directory");
|
|
1150
1355
|
const chunksTemp = `${CHUNKS_FILE}.tmp`;
|
|
@@ -1159,11 +1364,7 @@ const make = Effect.gen(function* () {
|
|
|
1159
1364
|
yield* withStoreError(fs.writeFileString(chunksTemp, chunksLines.join("\n")), "write chunks", chunksTemp);
|
|
1160
1365
|
yield* withStoreError(fs.rename(chunksTemp, CHUNKS_FILE), "commit chunks", CHUNKS_FILE);
|
|
1161
1366
|
const vectorsTemp = `${VECTORS_FILE}.tmp`;
|
|
1162
|
-
const
|
|
1163
|
-
const totalFloats = embeddings.length * dims;
|
|
1164
|
-
const vectorsArray = new Float32Array(totalFloats);
|
|
1165
|
-
for (let i = 0; i < embeddings.length; i++) vectorsArray.set(embeddings[i].vector, i * dims);
|
|
1166
|
-
const buffer = Buffer.from(vectorsArray.buffer);
|
|
1367
|
+
const buffer = serializeVectors(embeddings);
|
|
1167
1368
|
yield* withStoreError(fs.writeFile(vectorsTemp, buffer), "write vectors", vectorsTemp);
|
|
1168
1369
|
yield* withStoreError(fs.rename(vectorsTemp, VECTORS_FILE), "commit vectors", VECTORS_FILE);
|
|
1169
1370
|
});
|
|
@@ -1232,6 +1433,10 @@ const make = Effect.gen(function* () {
|
|
|
1232
1433
|
});
|
|
1233
1434
|
return {
|
|
1234
1435
|
store,
|
|
1436
|
+
storeBegin,
|
|
1437
|
+
storeBatch,
|
|
1438
|
+
storeCommit,
|
|
1439
|
+
storeAbort,
|
|
1235
1440
|
search,
|
|
1236
1441
|
getStatus,
|
|
1237
1442
|
reset
|