@lucas-bur/pix 0.9.1 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.mjs +585 -165
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -2,16 +2,18 @@
|
|
|
2
2
|
import { createRequire } from "node:module";
|
|
3
3
|
import { Args, CliConfig, Command, Options } from "@effect/cli";
|
|
4
4
|
import { NodeContext, NodeRuntime } from "@effect/platform-node";
|
|
5
|
-
import { Clock, Context, Data, Effect, Layer, Option, Ref } from "effect";
|
|
5
|
+
import { Clock, Context, Data, Effect, Exit, Layer, Option, Ref, Stream } from "effect";
|
|
6
|
+
import * as Chunk from "effect/Chunk";
|
|
6
7
|
import { styleText } from "node:util";
|
|
7
8
|
import * as clack from "@clack/prompts";
|
|
8
|
-
import crypto from "node:crypto";
|
|
9
9
|
import { FileSystem } from "@effect/platform";
|
|
10
|
+
import crypto from "node:crypto";
|
|
10
11
|
import { env } from "@huggingface/transformers";
|
|
11
12
|
import ignore from "ignore";
|
|
12
13
|
//#region src/domain/ports.ts
|
|
13
14
|
var ConfigStore = class extends Context.Tag("ConfigStore")() {};
|
|
14
15
|
var Scanner = class extends Context.Tag("Scanner")() {};
|
|
16
|
+
var ContentExtractor = class extends Context.Tag("ContentExtractor")() {};
|
|
15
17
|
var Chunker = class extends Context.Tag("Chunker")() {};
|
|
16
18
|
var Embedder = class extends Context.Tag("Embedder")() {};
|
|
17
19
|
var VectorStore = class extends Context.Tag("VectorStore")() {};
|
|
@@ -73,6 +75,7 @@ const withInteractive = (activeRef, acquire, setActive, release, effect) => Ref.
|
|
|
73
75
|
/** Display implementation using @clack/prompts for interactive terminal output */
|
|
74
76
|
const ClackDisplay = { layer: Layer.effect(Display, Effect.gen(function* () {
|
|
75
77
|
const activeRef = yield* Ref.make(null);
|
|
78
|
+
const lastSpinnerMsg = yield* Ref.make("");
|
|
76
79
|
return {
|
|
77
80
|
intro: (title) => Effect.sync(() => clack.intro(styleText("inverse", ` ${title} `))),
|
|
78
81
|
outro: (message) => Effect.sync(() => clack.outro(message)),
|
|
@@ -83,11 +86,17 @@ const ClackDisplay = { layer: Layer.effect(Display, Effect.gen(function* () {
|
|
|
83
86
|
const s = clack.spinner();
|
|
84
87
|
s.start(message);
|
|
85
88
|
return s;
|
|
86
|
-
}), (s) => ({
|
|
89
|
+
}).pipe(Effect.tap(() => Ref.set(lastSpinnerMsg, message))), (s) => ({
|
|
87
90
|
type: "spinner",
|
|
88
91
|
handle: s
|
|
89
|
-
}), (s, exit) => Effect.sync(() => s.stop(exit._tag === "Success" ?
|
|
90
|
-
progress: (opts, effect) =>
|
|
92
|
+
}), (s, exit) => lastSpinnerMsg.pipe(Effect.flatMap((lastMsg) => Effect.sync(() => s.stop(exit._tag === "Success" && lastMsg ? lastMsg : `${message} (failed)`)))), effect),
|
|
93
|
+
progress: (opts, effect) => Effect.gen(function* () {
|
|
94
|
+
const current = yield* Ref.get(activeRef);
|
|
95
|
+
if (current && current.type === "spinner") {
|
|
96
|
+
const msg = yield* Ref.get(lastSpinnerMsg);
|
|
97
|
+
current.handle.stop(msg || opts.message);
|
|
98
|
+
yield* Ref.set(activeRef, null);
|
|
99
|
+
}
|
|
91
100
|
const bar = clack.progress({
|
|
92
101
|
max: opts.max,
|
|
93
102
|
style: opts.style ?? "heavy",
|
|
@@ -95,16 +104,27 @@ const ClackDisplay = { layer: Layer.effect(Display, Effect.gen(function* () {
|
|
|
95
104
|
indicator: opts.indicator ?? "dots"
|
|
96
105
|
});
|
|
97
106
|
bar.start(opts.message);
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
107
|
+
yield* Ref.set(activeRef, {
|
|
108
|
+
type: "progress",
|
|
109
|
+
handle: bar,
|
|
110
|
+
value: 0,
|
|
111
|
+
max: opts.max
|
|
112
|
+
});
|
|
113
|
+
const exit = yield* Effect.exit(effect);
|
|
114
|
+
yield* Ref.set(activeRef, null);
|
|
115
|
+
if (Exit.isSuccess(exit)) {
|
|
116
|
+
bar.stop(opts.message);
|
|
117
|
+
return exit.value;
|
|
118
|
+
}
|
|
119
|
+
bar.error(opts.message);
|
|
120
|
+
return yield* Effect.failCause(exit.cause);
|
|
121
|
+
}),
|
|
105
122
|
updateInteractive: (payload) => Ref.get(activeRef).pipe(Effect.flatMap((active) => {
|
|
106
123
|
if (!active) return Effect.void;
|
|
107
|
-
if (active.type === "spinner")
|
|
124
|
+
if (active.type === "spinner") {
|
|
125
|
+
const msg = payloadText(payload);
|
|
126
|
+
return Effect.sync(() => active.handle.message(msg)).pipe(Effect.andThen(Ref.set(lastSpinnerMsg, msg)));
|
|
127
|
+
}
|
|
108
128
|
const delta = computeDelta(payload, {
|
|
109
129
|
value: active.value,
|
|
110
130
|
max: active.max
|
|
@@ -140,19 +160,198 @@ const DEFAULT_CONFIG = {
|
|
|
140
160
|
chunkLines: 60,
|
|
141
161
|
overlapLines: 10,
|
|
142
162
|
chunkConcurrency: 8,
|
|
143
|
-
|
|
163
|
+
skipExtensions: [],
|
|
164
|
+
ignoredPaths: [
|
|
165
|
+
".pix",
|
|
166
|
+
"node_modules",
|
|
167
|
+
".git",
|
|
168
|
+
"dist",
|
|
169
|
+
"build",
|
|
170
|
+
".next",
|
|
171
|
+
".vscode",
|
|
172
|
+
"coverage",
|
|
173
|
+
"*-lock.yaml",
|
|
174
|
+
"*-lock.json",
|
|
175
|
+
"*.lock",
|
|
176
|
+
".vite-hooks",
|
|
177
|
+
".fallow"
|
|
178
|
+
],
|
|
144
179
|
embedder: {
|
|
145
180
|
model: "Xenova/all-MiniLM-L6-v2",
|
|
146
181
|
device: "auto",
|
|
147
|
-
dtype: "fp32"
|
|
182
|
+
dtype: "fp32",
|
|
183
|
+
batchSize: 16
|
|
148
184
|
}
|
|
149
185
|
};
|
|
150
186
|
//#endregion
|
|
151
|
-
//#region src/
|
|
187
|
+
//#region src/domain/errors.ts
|
|
188
|
+
/** Config file or directory does not exist. Run pix init first. */
|
|
189
|
+
var ConfigNotFoundError = class extends Data.TaggedError("ConfigNotFoundError") {};
|
|
190
|
+
/** Config file exists but contains invalid JSON. */
|
|
191
|
+
var ConfigMalformedError = class extends Data.TaggedError("ConfigMalformedError") {};
|
|
192
|
+
/** Index files (chunks.jsonl, vectors.bin) do not exist. Run pix index first. */
|
|
193
|
+
var NoIndexError = class extends Data.TaggedError("NoIndexError") {};
|
|
194
|
+
/** Disk is full — write operation could not complete. */
|
|
195
|
+
var DiskFullError = class extends Data.TaggedError("DiskFullError") {};
|
|
196
|
+
/** Generic index store I/O failure (read, write, delete). */
|
|
197
|
+
var StoreError = class extends Data.TaggedError("StoreError") {};
|
|
198
|
+
/** Source file could not be read during chunking (binary, permissions, encoding). */
|
|
199
|
+
var ChunkerError = class extends Data.TaggedError("ChunkerError") {};
|
|
200
|
+
/** Embedding model could not be downloaded or loaded. */
|
|
201
|
+
var ModelLoadError = class extends Data.TaggedError("ModelLoadError") {};
|
|
202
|
+
/** Embedding model failed during inference. */
|
|
203
|
+
var InferenceError = class extends Data.TaggedError("InferenceError") {};
|
|
152
204
|
/**
|
|
153
|
-
*
|
|
154
|
-
*
|
|
205
|
+
* Fatal scan failure — gitignore loading failed entirely. Non-fatal per-entry skips are reported
|
|
206
|
+
* via ScanResult.skipped.
|
|
155
207
|
*/
|
|
208
|
+
var ScanFailed = class extends Data.TaggedError("ScanFailed") {};
|
|
209
|
+
/** File type is unsupported for text extraction. */
|
|
210
|
+
var UnsupportedFormat = class extends Data.TaggedError("UnsupportedFormat") {};
|
|
211
|
+
/** Text extraction failed for a supported file type. */
|
|
212
|
+
var ExtractionFailed = class extends Data.TaggedError("ExtractionFailed") {};
|
|
213
|
+
//#endregion
|
|
214
|
+
//#region src/services/processors/identity.ts
|
|
215
|
+
const identityProcessor = (file) => FileSystem.FileSystem.pipe(Effect.flatMap((fs) => fs.readFileString(file)), Effect.mapError((cause) => new ExtractionFailed({
|
|
216
|
+
message: `Failed to read file for extraction: ${file}`,
|
|
217
|
+
file,
|
|
218
|
+
cause
|
|
219
|
+
})));
|
|
220
|
+
//#endregion
|
|
221
|
+
//#region src/services/processors/skip.ts
|
|
222
|
+
const skipProcessor = (extension) => {
|
|
223
|
+
const error = new UnsupportedFormat({
|
|
224
|
+
message: `Unsupported file type: ${extension}`,
|
|
225
|
+
extension
|
|
226
|
+
});
|
|
227
|
+
return (_file) => Effect.fail(error);
|
|
228
|
+
};
|
|
229
|
+
//#endregion
|
|
230
|
+
//#region src/services/processors/index.ts
|
|
231
|
+
const DEFAULT_PROCESSOR_MAP = {
|
|
232
|
+
".ts": identityProcessor,
|
|
233
|
+
".tsx": identityProcessor,
|
|
234
|
+
".js": identityProcessor,
|
|
235
|
+
".jsx": identityProcessor,
|
|
236
|
+
".py": identityProcessor,
|
|
237
|
+
".rs": identityProcessor,
|
|
238
|
+
".go": identityProcessor,
|
|
239
|
+
".java": identityProcessor,
|
|
240
|
+
".c": identityProcessor,
|
|
241
|
+
".cpp": identityProcessor,
|
|
242
|
+
".h": identityProcessor,
|
|
243
|
+
".hpp": identityProcessor,
|
|
244
|
+
".json": identityProcessor,
|
|
245
|
+
".yaml": identityProcessor,
|
|
246
|
+
".yml": identityProcessor,
|
|
247
|
+
".toml": identityProcessor,
|
|
248
|
+
".xml": identityProcessor,
|
|
249
|
+
".csv": identityProcessor,
|
|
250
|
+
".md": identityProcessor,
|
|
251
|
+
".mdx": identityProcessor,
|
|
252
|
+
".txt": identityProcessor,
|
|
253
|
+
".rst": identityProcessor,
|
|
254
|
+
".html": identityProcessor,
|
|
255
|
+
".css": identityProcessor,
|
|
256
|
+
".scss": identityProcessor,
|
|
257
|
+
".less": identityProcessor,
|
|
258
|
+
".sql": identityProcessor,
|
|
259
|
+
".graphql": identityProcessor,
|
|
260
|
+
".sh": identityProcessor,
|
|
261
|
+
".bash": identityProcessor,
|
|
262
|
+
".ps1": identityProcessor,
|
|
263
|
+
".bat": identityProcessor,
|
|
264
|
+
".cmake": identityProcessor,
|
|
265
|
+
".dockerfile": identityProcessor,
|
|
266
|
+
dockerfile: identityProcessor,
|
|
267
|
+
makefile: identityProcessor,
|
|
268
|
+
gemfile: identityProcessor,
|
|
269
|
+
".pdf": skipProcessor(".pdf"),
|
|
270
|
+
".png": skipProcessor(".png"),
|
|
271
|
+
".jpg": skipProcessor(".jpg"),
|
|
272
|
+
".jpeg": skipProcessor(".jpeg"),
|
|
273
|
+
".gif": skipProcessor(".gif"),
|
|
274
|
+
".svg": identityProcessor,
|
|
275
|
+
".ico": skipProcessor(".ico"),
|
|
276
|
+
".webp": skipProcessor(".webp"),
|
|
277
|
+
".mp3": skipProcessor(".mp3"),
|
|
278
|
+
".mp4": skipProcessor(".mp4"),
|
|
279
|
+
".wav": skipProcessor(".wav"),
|
|
280
|
+
".avi": skipProcessor(".avi"),
|
|
281
|
+
".mov": skipProcessor(".mov"),
|
|
282
|
+
".mkv": skipProcessor(".mkv"),
|
|
283
|
+
".exe": skipProcessor(".exe"),
|
|
284
|
+
".dll": skipProcessor(".dll"),
|
|
285
|
+
".so": skipProcessor(".so"),
|
|
286
|
+
".zip": skipProcessor(".zip"),
|
|
287
|
+
".tar": skipProcessor(".tar"),
|
|
288
|
+
".gz": skipProcessor(".gz"),
|
|
289
|
+
".7z": skipProcessor(".7z"),
|
|
290
|
+
".rar": skipProcessor(".rar"),
|
|
291
|
+
".ttf": skipProcessor(".ttf"),
|
|
292
|
+
".woff": skipProcessor(".woff"),
|
|
293
|
+
".woff2": skipProcessor(".woff2"),
|
|
294
|
+
".eot": skipProcessor(".eot"),
|
|
295
|
+
".otf": skipProcessor(".otf"),
|
|
296
|
+
".lock": identityProcessor,
|
|
297
|
+
lock: identityProcessor
|
|
298
|
+
};
|
|
299
|
+
/**
|
|
300
|
+
* Builds the processor map by merging domain defaults with user-specified skip extensions. Skip
|
|
301
|
+
* extensions override any existing mapping with a skip processor. Unknown extensions remain absent
|
|
302
|
+
* from the map — callers decide how to handle them.
|
|
303
|
+
*/
|
|
304
|
+
function buildProcessorMap(skipExtensions) {
|
|
305
|
+
const mapped = { ...DEFAULT_PROCESSOR_MAP };
|
|
306
|
+
for (const ext of skipExtensions) mapped[ext] = skipProcessor(ext);
|
|
307
|
+
return mapped;
|
|
308
|
+
}
|
|
309
|
+
//#endregion
|
|
310
|
+
//#region src/application/index-project.ts
|
|
311
|
+
const deriveEffectiveConfig = (opts, config) => ({
|
|
312
|
+
batchSize: opts.batchSize ?? config.embedder.batchSize ?? 16,
|
|
313
|
+
concurrency: Math.max(1, opts.chunkConcurrency ?? config.chunkConcurrency ?? 8),
|
|
314
|
+
skipExtensions: opts.skipExtensions ? [...config.skipExtensions, ...opts.skipExtensions] : config.skipExtensions,
|
|
315
|
+
ignoredPaths: opts.ignorePaths ? [...config.ignoredPaths ?? DEFAULT_CONFIG.ignoredPaths, ...opts.ignorePaths] : config.ignoredPaths ?? DEFAULT_CONFIG.ignoredPaths,
|
|
316
|
+
ignoreGitignore: opts.ignoreGitignore ?? config.ignoreGitignore ?? false
|
|
317
|
+
});
|
|
318
|
+
function getExtension(file) {
|
|
319
|
+
const lastSlash = file.lastIndexOf("/");
|
|
320
|
+
const name = lastSlash >= 0 ? file.slice(lastSlash + 1) : file;
|
|
321
|
+
const dotIndex = name.lastIndexOf(".");
|
|
322
|
+
if (dotIndex === -1) return name.toLowerCase();
|
|
323
|
+
return name.slice(dotIndex).toLowerCase();
|
|
324
|
+
}
|
|
325
|
+
const classifyFiles = (files, processorMap) => {
|
|
326
|
+
const knownFiles = [];
|
|
327
|
+
const skippedFiles = [];
|
|
328
|
+
const unknownExtensions = /* @__PURE__ */ new Set();
|
|
329
|
+
for (const file of files) {
|
|
330
|
+
const ext = getExtension(file);
|
|
331
|
+
if (!processorMap[ext]) {
|
|
332
|
+
unknownExtensions.add(ext);
|
|
333
|
+
skippedFiles.push(file);
|
|
334
|
+
} else knownFiles.push(file);
|
|
335
|
+
}
|
|
336
|
+
return {
|
|
337
|
+
knownFiles,
|
|
338
|
+
skippedFiles,
|
|
339
|
+
unknownExtensions
|
|
340
|
+
};
|
|
341
|
+
};
|
|
342
|
+
const classifyAndCollectChunks = (knownFiles, extractor, chunker, concurrency, skipped) => Stream.fromIterable(knownFiles).pipe(Stream.mapEffect((file) => extractor.extract(file).pipe(Effect.flatMap((text) => Effect.succeed(Option.some({
|
|
343
|
+
file,
|
|
344
|
+
text
|
|
345
|
+
}))), Effect.catchAll((err) => Ref.update(skipped, (prev) => [...prev, {
|
|
346
|
+
path: file,
|
|
347
|
+
reason: err.message
|
|
348
|
+
}]).pipe(Effect.flatMap(() => Effect.succeed(Option.none()))))), { concurrency }), Stream.filterMap((opt) => opt), Stream.mapEffect(({ file, text }) => chunker.chunkText(text, file), { concurrency }), Stream.flatMap((chunks) => Stream.fromIterable(chunks)), Stream.runCollect, Effect.map((allChunks) => {
|
|
349
|
+
const chunks = Chunk.toArray(allChunks);
|
|
350
|
+
return {
|
|
351
|
+
chunks,
|
|
352
|
+
totalChunks: chunks.length
|
|
353
|
+
};
|
|
354
|
+
}));
|
|
156
355
|
var IndexProject = class extends Effect.Service()("IndexProject", {
|
|
157
356
|
accessors: true,
|
|
158
357
|
effect: Effect.gen(function* () {
|
|
@@ -162,49 +361,124 @@ var IndexProject = class extends Effect.Service()("IndexProject", {
|
|
|
162
361
|
const embedder = yield* Embedder;
|
|
163
362
|
const vectorStore = yield* VectorStore;
|
|
164
363
|
const d = yield* Display;
|
|
165
|
-
const
|
|
364
|
+
const extractor = yield* ContentExtractor;
|
|
365
|
+
const index = (opts = {}) => Effect.gen(function* () {
|
|
366
|
+
const start = Date.now();
|
|
166
367
|
if (!(yield* configStore.configExists())) yield* configStore.writeConfig(DEFAULT_CONFIG);
|
|
167
|
-
const
|
|
168
|
-
const
|
|
169
|
-
".ts",
|
|
170
|
-
".tsx",
|
|
171
|
-
".js",
|
|
172
|
-
".jsx"
|
|
173
|
-
];
|
|
368
|
+
const eff = deriveEffectiveConfig(opts, yield* configStore.readConfig());
|
|
369
|
+
const processorMap = buildProcessorMap(eff.skipExtensions);
|
|
174
370
|
yield* d.updateInteractive("Scanning source files...");
|
|
175
|
-
const scanResult = yield* scanner.scanFiles(
|
|
176
|
-
|
|
177
|
-
const
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
if (
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
371
|
+
const scanResult = yield* scanner.scanFiles(eff.ignoredPaths, eff.ignoreGitignore);
|
|
372
|
+
const { knownFiles, skippedFiles, unknownExtensions } = classifyFiles(scanResult.files, processorMap);
|
|
373
|
+
const skipped = yield* Ref.make(scanResult.skipped.filter((s) => !s.reason.startsWith("Ignored by config pattern")).map((s) => ({
|
|
374
|
+
path: s.path,
|
|
375
|
+
reason: s.reason
|
|
376
|
+
})));
|
|
377
|
+
if (unknownExtensions.size > 0) yield* Ref.update(skipped, (prev) => [...prev, ...skippedFiles.map((f) => ({
|
|
378
|
+
path: f,
|
|
379
|
+
reason: "unknown extension"
|
|
380
|
+
}))]);
|
|
381
|
+
if (knownFiles.length === 0) {
|
|
382
|
+
yield* displaySkippedNote(d, yield* Ref.get(skipped));
|
|
383
|
+
return {
|
|
384
|
+
success: true,
|
|
385
|
+
status: {
|
|
386
|
+
chunks: 0,
|
|
387
|
+
files: 0,
|
|
388
|
+
totalLines: 0,
|
|
389
|
+
byteSize: 0
|
|
390
|
+
},
|
|
391
|
+
durationMs: Date.now() - start
|
|
392
|
+
};
|
|
393
|
+
}
|
|
394
|
+
yield* d.updateInteractive(`Processing ${knownFiles.length} files...`);
|
|
395
|
+
const { chunks, totalChunks } = yield* classifyAndCollectChunks(knownFiles, extractor, chunker, eff.concurrency, skipped);
|
|
396
|
+
if (totalChunks === 0) {
|
|
397
|
+
yield* displaySkippedNote(d, yield* Ref.get(skipped));
|
|
398
|
+
return {
|
|
399
|
+
success: true,
|
|
400
|
+
status: {
|
|
401
|
+
chunks: 0,
|
|
402
|
+
files: 0,
|
|
403
|
+
totalLines: 0,
|
|
404
|
+
byteSize: 0
|
|
405
|
+
},
|
|
406
|
+
durationMs: Date.now() - start
|
|
407
|
+
};
|
|
408
|
+
}
|
|
409
|
+
yield* vectorStore.storeBegin();
|
|
410
|
+
const embeddedRef = yield* Ref.make(0);
|
|
411
|
+
const stats = yield* d.progress({
|
|
412
|
+
message: `Embedding ${totalChunks} chunks...`,
|
|
413
|
+
max: totalChunks
|
|
414
|
+
}, Stream.fromIterable(chunks).pipe(Stream.grouped(eff.batchSize), Stream.mapEffect((batchChunk) => Effect.gen(function* () {
|
|
415
|
+
const batch = Chunk.toArray(batchChunk);
|
|
416
|
+
const texts = batch.map((c) => c.text);
|
|
417
|
+
const embeddings = yield* embedder.batch(texts);
|
|
418
|
+
yield* vectorStore.storeBatch(batch, embeddings);
|
|
419
|
+
const count = yield* Ref.updateAndGet(embeddedRef, (n) => n + batch.length);
|
|
420
|
+
yield* d.updateInteractive({
|
|
421
|
+
message: `Embedding ${count} of ${totalChunks} chunks`,
|
|
422
|
+
setTo: count
|
|
423
|
+
});
|
|
424
|
+
})), Stream.runDrain, Effect.matchEffect({
|
|
425
|
+
onSuccess: () => vectorStore.storeCommit(),
|
|
426
|
+
onFailure: (err) => vectorStore.storeAbort().pipe(Effect.flatMap(() => Effect.fail(err)))
|
|
427
|
+
})));
|
|
428
|
+
yield* displaySkippedNote(d, yield* Ref.get(skipped));
|
|
429
|
+
const durationSec = ((Date.now() - start) / 1e3).toFixed(1);
|
|
430
|
+
yield* d.log(`Indexed ${stats.chunks} chunks from ${stats.files} files in ${durationSec}s`, "success");
|
|
431
|
+
const fallbackInfo = yield* embedder.getFallbackInfo();
|
|
195
432
|
return {
|
|
196
433
|
success: true,
|
|
197
434
|
status: {
|
|
198
|
-
chunks:
|
|
199
|
-
files:
|
|
200
|
-
totalLines,
|
|
201
|
-
byteSize:
|
|
202
|
-
}
|
|
435
|
+
chunks: stats.chunks,
|
|
436
|
+
files: stats.files,
|
|
437
|
+
totalLines: stats.totalLines,
|
|
438
|
+
byteSize: stats.byteSize
|
|
439
|
+
},
|
|
440
|
+
durationMs: Date.now() - start,
|
|
441
|
+
embedderFallback: fallbackInfo
|
|
203
442
|
};
|
|
204
443
|
});
|
|
205
444
|
return { index };
|
|
206
445
|
})
|
|
207
446
|
}) {};
|
|
447
|
+
const getFilename = (path) => path.split("/").pop() ?? path;
|
|
448
|
+
const getFileExtension = (filename) => {
|
|
449
|
+
const dotIndex = filename.lastIndexOf(".");
|
|
450
|
+
return dotIndex >= 0 ? filename.slice(dotIndex) : "(no extension)";
|
|
451
|
+
};
|
|
452
|
+
const groupByExtension = (entries) => {
|
|
453
|
+
const byExt = /* @__PURE__ */ new Map();
|
|
454
|
+
for (const s of entries) {
|
|
455
|
+
const name = getFilename(s.path);
|
|
456
|
+
const ext = getFileExtension(name);
|
|
457
|
+
if (!byExt.has(ext)) byExt.set(ext, []);
|
|
458
|
+
byExt.get(ext).push(name);
|
|
459
|
+
}
|
|
460
|
+
return byExt;
|
|
461
|
+
};
|
|
462
|
+
const formatFileList = (files, maxDisplay = 5) => files.length > maxDisplay ? `${files.slice(0, maxDisplay).join(", ")} +${files.length - maxDisplay} more` : files.join(", ");
|
|
463
|
+
const buildSkippedLines = (extFailures, extractErrors) => {
|
|
464
|
+
const lines = [];
|
|
465
|
+
if (extFailures.length > 0) {
|
|
466
|
+
lines.push(`Unknown extensions (${extFailures.length})`);
|
|
467
|
+
for (const [ext, files] of groupByExtension(extFailures)) lines.push(` ${ext} (${files.length}): ${formatFileList(files)}`);
|
|
468
|
+
}
|
|
469
|
+
if (extractErrors.length > 0) {
|
|
470
|
+
if (lines.length > 0) lines.push("");
|
|
471
|
+
lines.push(`Extraction errors (${extractErrors.length})`);
|
|
472
|
+
for (const s of extractErrors) lines.push(` ${getFilename(s.path)}: ${s.reason}`);
|
|
473
|
+
}
|
|
474
|
+
return lines;
|
|
475
|
+
};
|
|
476
|
+
const displaySkippedNote = (d, skipped) => {
|
|
477
|
+
if (skipped.length === 0) return Effect.void;
|
|
478
|
+
const extFailures = skipped.filter((s) => s.reason === "unknown extension");
|
|
479
|
+
const extractErrors = skipped.filter((s) => s.reason !== "unknown extension");
|
|
480
|
+
return d.note(buildSkippedLines(extFailures, extractErrors).join("\n"), `Skipped ${skipped.length} files`);
|
|
481
|
+
};
|
|
208
482
|
//#endregion
|
|
209
483
|
//#region src/application/init-project.ts
|
|
210
484
|
/**
|
|
@@ -293,22 +567,60 @@ const reportError = (error) => Effect.gen(function* () {
|
|
|
293
567
|
});
|
|
294
568
|
//#endregion
|
|
295
569
|
//#region src/commands/index-cmd.ts
|
|
296
|
-
|
|
570
|
+
const batchSizeOption = Options.integer("batch-size").pipe(Options.withAlias("b"), Options.optional);
|
|
571
|
+
const chunkConcurrencyOption = Options.integer("chunk-concurrency").pipe(Options.withAlias("c"), Options.optional);
|
|
572
|
+
const skipExtensionsOption = Options.text("skip-extensions").pipe(Options.withAlias("s"), Options.repeated);
|
|
573
|
+
const ignorePathOption = Options.text("ignore-path").pipe(Options.repeated);
|
|
574
|
+
const ignorePathsOption = Options.text("ignore-paths").pipe(Options.repeated);
|
|
575
|
+
const ignoreGitignoreOption = Options.boolean("ignore-gitignore").pipe(Options.withDefault(false));
|
|
576
|
+
const splitCsv = (values) => values.flatMap((v) => v.split(",").map((s) => s.trim()).filter((s) => s.length > 0));
|
|
577
|
+
const buildIndexOptions = (args) => {
|
|
578
|
+
if (Option.isSome(args.batchSize) && args.batchSize.value <= 0) throw new Error(`--batch-size must be positive, got ${args.batchSize.value}`);
|
|
579
|
+
if (Option.isSome(args.chunkConcurrency) && args.chunkConcurrency.value <= 0) throw new Error(`--chunk-concurrency must be positive, got ${args.chunkConcurrency.value}`);
|
|
580
|
+
const cliSkipExtensions = splitCsv(args.skipExtensions);
|
|
581
|
+
const cliIgnorePaths = [...args.ignorePath.map((s) => s.trim()).filter((s) => s.length > 0), ...splitCsv(args.ignorePaths)];
|
|
582
|
+
return {
|
|
583
|
+
batchSize: Option.getOrUndefined(args.batchSize),
|
|
584
|
+
chunkConcurrency: Option.getOrUndefined(args.chunkConcurrency),
|
|
585
|
+
skipExtensions: cliSkipExtensions.length > 0 ? cliSkipExtensions : void 0,
|
|
586
|
+
ignorePaths: cliIgnorePaths.length > 0 ? cliIgnorePaths : void 0,
|
|
587
|
+
ignoreGitignore: args.ignoreGitignore || void 0
|
|
588
|
+
};
|
|
589
|
+
};
|
|
590
|
+
const emitIndexResult = (d, result) => Effect.gen(function* () {
|
|
591
|
+
yield* d.json({
|
|
592
|
+
chunks: result.status.chunks,
|
|
593
|
+
files: result.status.files,
|
|
594
|
+
totalLines: result.status.totalLines,
|
|
595
|
+
byteSize: result.status.byteSize,
|
|
596
|
+
durationMs: result.durationMs,
|
|
597
|
+
...result.embedderFallback && { embedderFallback: result.embedderFallback }
|
|
598
|
+
});
|
|
599
|
+
if (result.status.chunks === 0) yield* d.log("No chunks to index.", "warn");
|
|
600
|
+
});
|
|
297
601
|
const indexCommand = Command.make("index", {
|
|
298
602
|
force: Options.boolean("force").pipe(Options.withDefault(false)),
|
|
299
603
|
verbose: Options.boolean("verbose").pipe(Options.withDefault(false)),
|
|
300
|
-
json: Options.boolean("json").pipe(Options.withDefault(false))
|
|
301
|
-
|
|
604
|
+
json: Options.boolean("json").pipe(Options.withDefault(false)),
|
|
605
|
+
batchSize: batchSizeOption,
|
|
606
|
+
chunkConcurrency: chunkConcurrencyOption,
|
|
607
|
+
skipExtensions: skipExtensionsOption,
|
|
608
|
+
ignorePath: ignorePathOption,
|
|
609
|
+
ignorePaths: ignorePathsOption,
|
|
610
|
+
ignoreGitignore: ignoreGitignoreOption
|
|
611
|
+
}, ({ force, verbose, batchSize, chunkConcurrency, skipExtensions, ignorePath, ignorePaths, ignoreGitignore }) => Effect.gen(function* () {
|
|
302
612
|
const d = yield* Display;
|
|
303
613
|
if (force) yield* d.log("--force is currently not implemented and only a placeholder.", "warn");
|
|
304
614
|
if (verbose) yield* d.log("--verbose is currently not implemented and only a placeholder.", "warn");
|
|
305
|
-
const
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
615
|
+
const options = buildIndexOptions({
|
|
616
|
+
batchSize,
|
|
617
|
+
chunkConcurrency,
|
|
618
|
+
skipExtensions,
|
|
619
|
+
ignorePath,
|
|
620
|
+
ignorePaths,
|
|
621
|
+
ignoreGitignore
|
|
309
622
|
});
|
|
310
|
-
|
|
311
|
-
else yield* d.log(`Indexed ${result.status.chunks} chunks from ${result.status.files} files.`, "success");
|
|
623
|
+
yield* emitIndexResult(d, yield* d.spinner("Indexing project...", IndexProject.index(options)));
|
|
312
624
|
}).pipe(Effect.catchAll(reportError)));
|
|
313
625
|
//#endregion
|
|
314
626
|
//#region src/commands/init.ts
|
|
@@ -493,29 +805,6 @@ const setupTerminalCleanup = () => {
|
|
|
493
805
|
process.on("exit", makeTerminalCleanupHandler(process.stdin, process.stdout));
|
|
494
806
|
};
|
|
495
807
|
//#endregion
|
|
496
|
-
//#region src/domain/errors.ts
|
|
497
|
-
/** Config file or directory does not exist. Run pix init first. */
|
|
498
|
-
var ConfigNotFoundError = class extends Data.TaggedError("ConfigNotFoundError") {};
|
|
499
|
-
/** Config file exists but contains invalid JSON. */
|
|
500
|
-
var ConfigMalformedError = class extends Data.TaggedError("ConfigMalformedError") {};
|
|
501
|
-
/** Index files (chunks.jsonl, vectors.bin) do not exist. Run pix index first. */
|
|
502
|
-
var NoIndexError = class extends Data.TaggedError("NoIndexError") {};
|
|
503
|
-
/** Disk is full — write operation could not complete. */
|
|
504
|
-
var DiskFullError = class extends Data.TaggedError("DiskFullError") {};
|
|
505
|
-
/** Generic index store I/O failure (read, write, delete). */
|
|
506
|
-
var StoreError = class extends Data.TaggedError("StoreError") {};
|
|
507
|
-
/** Source file could not be read during chunking (binary, permissions, encoding). */
|
|
508
|
-
var ChunkerError = class extends Data.TaggedError("ChunkerError") {};
|
|
509
|
-
/** Embedding model could not be downloaded or loaded. */
|
|
510
|
-
var ModelLoadError = class extends Data.TaggedError("ModelLoadError") {};
|
|
511
|
-
/** Embedding model failed during inference. */
|
|
512
|
-
var InferenceError = class extends Data.TaggedError("InferenceError") {};
|
|
513
|
-
/**
|
|
514
|
-
* Fatal scan failure — gitignore loading failed entirely. Non-fatal per-entry skips are reported
|
|
515
|
-
* via ScanResult.skipped.
|
|
516
|
-
*/
|
|
517
|
-
var ScanFailed = class extends Data.TaggedError("ScanFailed") {};
|
|
518
|
-
//#endregion
|
|
519
808
|
//#region src/services/chunker.ts
|
|
520
809
|
const MIN_CHUNK_CHARS = 20;
|
|
521
810
|
const readFileContent = (fs, file) => fs.readFileString(file).pipe(Effect.mapError((cause) => new ChunkerError({
|
|
@@ -547,17 +836,24 @@ const buildChunks = (file, content, config) => {
|
|
|
547
836
|
}
|
|
548
837
|
return chunks;
|
|
549
838
|
};
|
|
550
|
-
const make$
|
|
839
|
+
const make$5 = Effect.gen(function* () {
|
|
551
840
|
const fs = yield* FileSystem.FileSystem;
|
|
552
841
|
const config = yield* (yield* ConfigStore).readConfig().pipe(Effect.catchAll(() => Effect.succeed(DEFAULT_CONFIG)));
|
|
842
|
+
const chunkText = (text, file) => Effect.sync(() => {
|
|
843
|
+
if (text === "") return [];
|
|
844
|
+
return buildChunks(file, text, config);
|
|
845
|
+
});
|
|
553
846
|
const chunkFile = (file) => Effect.gen(function* () {
|
|
554
847
|
const content = yield* readFileContent(fs, file);
|
|
555
848
|
if (content === "") return [];
|
|
556
849
|
return buildChunks(file, content, config);
|
|
557
850
|
});
|
|
558
|
-
return {
|
|
851
|
+
return {
|
|
852
|
+
chunkFile,
|
|
853
|
+
chunkText
|
|
854
|
+
};
|
|
559
855
|
});
|
|
560
|
-
const ChunkerLive = Layer.effect(Chunker, make$
|
|
856
|
+
const ChunkerLive = Layer.effect(Chunker, make$5);
|
|
561
857
|
//#endregion
|
|
562
858
|
//#region src/services/config-store.ts
|
|
563
859
|
const CONFIG_DIR = ".pix";
|
|
@@ -574,7 +870,7 @@ const mapConfigWriteError = (cause, path, action) => {
|
|
|
574
870
|
cause
|
|
575
871
|
});
|
|
576
872
|
};
|
|
577
|
-
const make$
|
|
873
|
+
const make$4 = Effect.gen(function* () {
|
|
578
874
|
const fs = yield* FileSystem.FileSystem;
|
|
579
875
|
const writeConfig = (config) => Effect.gen(function* () {
|
|
580
876
|
const configJson = JSON.stringify(config, null, 2);
|
|
@@ -611,7 +907,28 @@ const make$3 = Effect.gen(function* () {
|
|
|
611
907
|
configExists
|
|
612
908
|
};
|
|
613
909
|
});
|
|
614
|
-
const ConfigStoreLive = Layer.effect(ConfigStore, make$
|
|
910
|
+
const ConfigStoreLive = Layer.effect(ConfigStore, make$4);
|
|
911
|
+
//#endregion
|
|
912
|
+
//#region src/services/content-extractor.ts
|
|
913
|
+
const make$3 = Effect.gen(function* () {
|
|
914
|
+
const fs = yield* FileSystem.FileSystem;
|
|
915
|
+
const processorMap = buildProcessorMap([]);
|
|
916
|
+
const extract = (file) => {
|
|
917
|
+
const lastSlash = file.lastIndexOf("/");
|
|
918
|
+
const name = lastSlash >= 0 ? file.slice(lastSlash + 1) : file;
|
|
919
|
+
const dotIndex = name.lastIndexOf(".");
|
|
920
|
+
const ext = dotIndex === -1 ? name.toLowerCase() : name.slice(dotIndex).toLowerCase();
|
|
921
|
+
const processor = processorMap[ext];
|
|
922
|
+
if (!processor) return Effect.fail({
|
|
923
|
+
_tag: "UnsupportedFormat",
|
|
924
|
+
message: `No processor for extension: ${ext}`,
|
|
925
|
+
extension: ext
|
|
926
|
+
});
|
|
927
|
+
return processor(file).pipe(Effect.provideService(FileSystem.FileSystem, fs));
|
|
928
|
+
};
|
|
929
|
+
return { extract };
|
|
930
|
+
});
|
|
931
|
+
const ContentExtractorLive = Layer.effect(ContentExtractor, make$3);
|
|
615
932
|
//#endregion
|
|
616
933
|
//#region src/domain/models.ts
|
|
617
934
|
/** Registry of supported embedding models. */
|
|
@@ -641,9 +958,7 @@ const MODEL_REGISTRY = {
|
|
|
641
958
|
};
|
|
642
959
|
//#endregion
|
|
643
960
|
//#region src/services/embedder.ts
|
|
644
|
-
|
|
645
|
-
const BATCH_SIZE = 16;
|
|
646
|
-
env.cacheDir = CACHE_DIR;
|
|
961
|
+
env.cacheDir = ".pix/cache";
|
|
647
962
|
const normalize = (arr) => {
|
|
648
963
|
let norm = 0;
|
|
649
964
|
for (let i = 0; i < arr.length; i++) norm += arr[i] * arr[i];
|
|
@@ -685,16 +1000,14 @@ const createExtractor = (opts) => Effect.tryPromise(async () => {
|
|
|
685
1000
|
model: opts.model,
|
|
686
1001
|
cause
|
|
687
1002
|
})));
|
|
688
|
-
const createExtractorWithFallback = (opts) => {
|
|
1003
|
+
const createExtractorWithFallback = (opts, fallbackRef) => {
|
|
689
1004
|
if (opts.device === "cpu") return createExtractor(opts);
|
|
690
1005
|
return createExtractor(opts).pipe(Effect.catchAll((originalError) => Effect.gen(function* () {
|
|
691
|
-
|
|
692
|
-
yield*
|
|
693
|
-
yield* d.json({
|
|
694
|
-
event: "embedder_fallback",
|
|
1006
|
+
yield* (yield* Display).log(`GPU (${opts.device}) failed, falling back to CPU...`, "warn");
|
|
1007
|
+
yield* Ref.set(fallbackRef, Option.some({
|
|
695
1008
|
originalDevice: opts.device,
|
|
696
1009
|
reason: originalError.message
|
|
697
|
-
});
|
|
1010
|
+
}));
|
|
698
1011
|
return yield* createExtractor({
|
|
699
1012
|
...opts,
|
|
700
1013
|
device: "cpu"
|
|
@@ -705,7 +1018,8 @@ const make$2 = Effect.gen(function* () {
|
|
|
705
1018
|
const configStore = yield* ConfigStore;
|
|
706
1019
|
const d = yield* Display;
|
|
707
1020
|
const cfg = yield* resolveEmbedderConfig(configStore);
|
|
708
|
-
const
|
|
1021
|
+
const fallbackRef = yield* Ref.make(Option.none());
|
|
1022
|
+
const getExtractor = yield* Effect.cached(createExtractorWithFallback(cfg, fallbackRef));
|
|
709
1023
|
const embed = (text) => Effect.gen(function* () {
|
|
710
1024
|
const extractor = yield* getExtractor;
|
|
711
1025
|
const data = (yield* Effect.tryPromise(() => extractor(text, {
|
|
@@ -722,44 +1036,35 @@ const make$2 = Effect.gen(function* () {
|
|
|
722
1036
|
}).pipe(Effect.provideService(Display, d));
|
|
723
1037
|
const batch = (texts) => Effect.gen(function* () {
|
|
724
1038
|
const extractor = yield* getExtractor;
|
|
1039
|
+
const tensor = yield* Effect.tryPromise(() => extractor([...texts], {
|
|
1040
|
+
pooling: "mean",
|
|
1041
|
+
normalize: false
|
|
1042
|
+
})).pipe(Effect.mapError((cause) => new InferenceError({
|
|
1043
|
+
message: "Batch embedding inference failed",
|
|
1044
|
+
cause
|
|
1045
|
+
})));
|
|
1046
|
+
const data = tensor.data;
|
|
1047
|
+
const n = tensor.dims[0];
|
|
725
1048
|
const results = [];
|
|
726
|
-
for (let
|
|
727
|
-
const
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
})
|
|
732
|
-
message: "Batch embedding inference failed",
|
|
733
|
-
cause
|
|
734
|
-
})));
|
|
735
|
-
const data = tensor.data;
|
|
736
|
-
const n = tensor.dims[0];
|
|
737
|
-
for (let j = 0; j < n; j++) {
|
|
738
|
-
const offset = j * cfg.dims;
|
|
739
|
-
results.push(normalize(data.slice(offset, offset + cfg.dims)));
|
|
740
|
-
}
|
|
1049
|
+
for (let j = 0; j < n; j++) {
|
|
1050
|
+
const offset = j * cfg.dims;
|
|
1051
|
+
results.push({
|
|
1052
|
+
vector: normalize(data.slice(offset, offset + cfg.dims)),
|
|
1053
|
+
dims: cfg.dims
|
|
1054
|
+
});
|
|
741
1055
|
}
|
|
742
|
-
return results
|
|
743
|
-
vector,
|
|
744
|
-
dims: cfg.dims
|
|
745
|
-
}));
|
|
1056
|
+
return results;
|
|
746
1057
|
}).pipe(Effect.provideService(Display, d));
|
|
1058
|
+
const getFallbackInfo = () => Ref.get(fallbackRef).pipe(Effect.map(Option.getOrElse(() => void 0)));
|
|
747
1059
|
return {
|
|
748
1060
|
embed,
|
|
749
|
-
batch
|
|
1061
|
+
batch,
|
|
1062
|
+
getFallbackInfo
|
|
750
1063
|
};
|
|
751
1064
|
});
|
|
752
1065
|
const OnnxEmbedderLive = Layer.provideMerge(Layer.effect(Embedder, make$2), ConfigStoreLive);
|
|
753
1066
|
//#endregion
|
|
754
1067
|
//#region src/services/scanner.ts
|
|
755
|
-
const ALWAYS_IGNORE = new Set([
|
|
756
|
-
".pix",
|
|
757
|
-
"node_modules",
|
|
758
|
-
".git",
|
|
759
|
-
"dist",
|
|
760
|
-
"build",
|
|
761
|
-
".next"
|
|
762
|
-
]);
|
|
763
1068
|
const make$1 = Effect.gen(function* () {
|
|
764
1069
|
const fs = yield* FileSystem.FileSystem;
|
|
765
1070
|
const readFileWithSkip = (path, mkReason) => fs.readFileString(path).pipe(Effect.map((content) => ({
|
|
@@ -792,47 +1097,93 @@ const make$1 = Effect.gen(function* () {
|
|
|
792
1097
|
reason: `Could not stat: ${String(error)}`
|
|
793
1098
|
}
|
|
794
1099
|
})));
|
|
795
|
-
const
|
|
1100
|
+
const computeRelative = (fullPath, cwd) => fullPath.startsWith(cwd) ? fullPath.slice(cwd.length + 1) : fullPath;
|
|
1101
|
+
const loadIgnoreFile = (filePath, ig, skipped) => Effect.gen(function* () {
|
|
1102
|
+
const result = yield* readFileWithSkip(filePath, (error) => `Could not read ignore file: ${String(error)}`);
|
|
1103
|
+
if (result.skipped) skipped.push(result.skipped);
|
|
1104
|
+
if (result.content.trim()) ig.add(result.content.split("\n"));
|
|
1105
|
+
});
|
|
1106
|
+
const loadGitignoreRules = (ignoredPaths) => {
|
|
796
1107
|
const ig = ignore();
|
|
797
|
-
const cwd = process.cwd();
|
|
798
1108
|
const skipped = [];
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
1109
|
+
if (ignoredPaths.length > 0) ig.add(ignoredPaths);
|
|
1110
|
+
return Effect.succeed({
|
|
1111
|
+
ig,
|
|
1112
|
+
skipped
|
|
1113
|
+
});
|
|
1114
|
+
};
|
|
1115
|
+
const loadGitignoreRulesWithFiles = (ignoredPaths, cwd) => Effect.gen(function* () {
|
|
1116
|
+
const ig = ignore();
|
|
1117
|
+
const skipped = [];
|
|
1118
|
+
if (ignoredPaths.length > 0) ig.add(ignoredPaths);
|
|
1119
|
+
const gitignorePath = `${cwd}/.gitignore`;
|
|
1120
|
+
if (yield* fs.exists(gitignorePath).pipe(Effect.catchAll(() => Effect.succeed(false)))) yield* loadIgnoreFile(gitignorePath, ig, skipped);
|
|
802
1121
|
const excludePath = `${cwd}/.git/info/exclude`;
|
|
803
|
-
if (yield* fs.exists(excludePath))
|
|
804
|
-
const excludeContent = yield* readFileWithSkip(excludePath, (error) => `Could not read exclude file: ${String(error)}`);
|
|
805
|
-
if (excludeContent.skipped) skipped.push(excludeContent.skipped);
|
|
806
|
-
if (excludeContent.content.trim()) ig.add(excludeContent.content.split("\n"));
|
|
807
|
-
}
|
|
1122
|
+
if (yield* fs.exists(excludePath).pipe(Effect.catchAll(() => Effect.succeed(false)))) yield* loadIgnoreFile(excludePath, ig, skipped);
|
|
808
1123
|
return {
|
|
809
1124
|
ig,
|
|
810
1125
|
skipped
|
|
811
1126
|
};
|
|
812
1127
|
});
|
|
813
|
-
const
|
|
1128
|
+
const processEntry = (entry, dir, ig, cwd) => Effect.gen(function* () {
|
|
1129
|
+
const fullPath = `${dir}/${entry}`;
|
|
1130
|
+
const statResult = yield* statWithSkip(fullPath);
|
|
1131
|
+
if (statResult.skipped) return {
|
|
1132
|
+
files: [],
|
|
1133
|
+
skipped: [statResult.skipped]
|
|
1134
|
+
};
|
|
1135
|
+
if (!statResult.info) return {
|
|
1136
|
+
files: [],
|
|
1137
|
+
skipped: []
|
|
1138
|
+
};
|
|
1139
|
+
const info = statResult.info;
|
|
1140
|
+
if (info.type === "Directory") {
|
|
1141
|
+
const relativeDir = computeRelative(fullPath, cwd);
|
|
1142
|
+
if (ig.ignores(relativeDir)) return {
|
|
1143
|
+
files: [],
|
|
1144
|
+
skipped: [{
|
|
1145
|
+
path: fullPath,
|
|
1146
|
+
reason: `Ignored by config pattern: ${relativeDir}`
|
|
1147
|
+
}]
|
|
1148
|
+
};
|
|
1149
|
+
return {
|
|
1150
|
+
files: [],
|
|
1151
|
+
skipped: [],
|
|
1152
|
+
recurse: true
|
|
1153
|
+
};
|
|
1154
|
+
}
|
|
1155
|
+
if (info.type === "File") {
|
|
1156
|
+
const relativePath = computeRelative(fullPath, cwd);
|
|
1157
|
+
if (ig.ignores(relativePath)) return {
|
|
1158
|
+
files: [],
|
|
1159
|
+
skipped: [{
|
|
1160
|
+
path: fullPath,
|
|
1161
|
+
reason: `Ignored by config pattern: ${relativePath}`
|
|
1162
|
+
}]
|
|
1163
|
+
};
|
|
1164
|
+
return {
|
|
1165
|
+
files: [fullPath],
|
|
1166
|
+
skipped: []
|
|
1167
|
+
};
|
|
1168
|
+
}
|
|
1169
|
+
return {
|
|
1170
|
+
files: [],
|
|
1171
|
+
skipped: []
|
|
1172
|
+
};
|
|
1173
|
+
});
|
|
1174
|
+
const walk = (dir, ig, cwd) => Effect.gen(function* () {
|
|
814
1175
|
const result = yield* readDirectoryWithSkip(dir);
|
|
815
1176
|
let files = [];
|
|
816
1177
|
const skipped = [];
|
|
817
1178
|
if (result.skipped) skipped.push(result.skipped);
|
|
818
1179
|
for (const entry of result.entries) {
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
if (
|
|
823
|
-
|
|
824
|
-
continue;
|
|
825
|
-
}
|
|
826
|
-
if (!info.info) continue;
|
|
827
|
-
if (info.info.type === "Directory") {
|
|
828
|
-
const sub = yield* walk(fullPath, extensions);
|
|
1180
|
+
const entryResult = yield* processEntry(entry, dir, ig, cwd);
|
|
1181
|
+
files.push(...entryResult.files);
|
|
1182
|
+
skipped.push(...entryResult.skipped);
|
|
1183
|
+
if ("recurse" in entryResult) {
|
|
1184
|
+
const sub = yield* walk(`${dir}/${entry}`, ig, cwd);
|
|
829
1185
|
files.push(...sub.files);
|
|
830
1186
|
skipped.push(...sub.skipped);
|
|
831
|
-
} else if (info.info.type === "File") {
|
|
832
|
-
const dotIndex = entry.lastIndexOf(".");
|
|
833
|
-
if (dotIndex === -1) continue;
|
|
834
|
-
const ext = entry.slice(dotIndex);
|
|
835
|
-
if (extensions.has(ext)) files.push(fullPath);
|
|
836
1187
|
}
|
|
837
1188
|
}
|
|
838
1189
|
return {
|
|
@@ -840,16 +1191,15 @@ const make$1 = Effect.gen(function* () {
|
|
|
840
1191
|
skipped
|
|
841
1192
|
};
|
|
842
1193
|
});
|
|
843
|
-
const scanFiles = (
|
|
844
|
-
const
|
|
845
|
-
|
|
1194
|
+
const scanFiles = (ignoredPaths, ignoreGitignore) => Effect.gen(function* () {
|
|
1195
|
+
const cwd = process.cwd();
|
|
1196
|
+
const { ig, skipped: ignoreSkipped } = yield* (ignoreGitignore ? loadGitignoreRules(ignoredPaths) : loadGitignoreRulesWithFiles(ignoredPaths, cwd)).pipe(Effect.mapError((cause) => new ScanFailed({
|
|
1197
|
+
message: `Failed to load ignore rules: ${String(cause)}`,
|
|
846
1198
|
cause
|
|
847
1199
|
})));
|
|
848
|
-
const
|
|
849
|
-
const { files: paths, skipped: walkSkipped } = yield* walk(cwd, new Set(extensions));
|
|
850
|
-
const relativePaths = paths.map((p) => p.startsWith(cwd) ? p.slice(cwd.length + 1) : p);
|
|
1200
|
+
const { files, skipped: walkSkipped } = yield* walk(cwd, ig, cwd);
|
|
851
1201
|
return {
|
|
852
|
-
files
|
|
1202
|
+
files,
|
|
853
1203
|
skipped: [...ignoreSkipped, ...walkSkipped]
|
|
854
1204
|
};
|
|
855
1205
|
});
|
|
@@ -868,6 +1218,22 @@ const isPlatformReason = (cause, reason) => typeof cause === "object" && cause !
|
|
|
868
1218
|
*/
|
|
869
1219
|
const make = Effect.gen(function* () {
|
|
870
1220
|
const fs = yield* FileSystem.FileSystem;
|
|
1221
|
+
const chunksTemp = `${CHUNKS_FILE}.tmp`;
|
|
1222
|
+
const vectorsTemp = `${VECTORS_FILE}.tmp`;
|
|
1223
|
+
const seenFiles = yield* Ref.make(/* @__PURE__ */ new Set());
|
|
1224
|
+
const statsAccumulator = yield* Ref.make({
|
|
1225
|
+
chunks: 0,
|
|
1226
|
+
files: 0,
|
|
1227
|
+
totalLines: 0,
|
|
1228
|
+
byteSize: 0
|
|
1229
|
+
});
|
|
1230
|
+
const serializeVectors = (embeddings) => {
|
|
1231
|
+
const dims = embeddings[0]?.dims ?? 384;
|
|
1232
|
+
const totalFloats = embeddings.length * dims;
|
|
1233
|
+
const vectorsArray = new Float32Array(totalFloats);
|
|
1234
|
+
for (let i = 0; i < embeddings.length; i++) vectorsArray.set(embeddings[i].vector, i * dims);
|
|
1235
|
+
return Buffer.from(vectorsArray.buffer);
|
|
1236
|
+
};
|
|
871
1237
|
/**
|
|
872
1238
|
* Count total lines across all chunks in chunks.jsonl. Each line is a JSON object; the 'text'
|
|
873
1239
|
* field contains the source code.
|
|
@@ -930,6 +1296,60 @@ const make = Effect.gen(function* () {
|
|
|
930
1296
|
deleted: true
|
|
931
1297
|
};
|
|
932
1298
|
});
|
|
1299
|
+
const storeBegin = () => Effect.gen(function* () {
|
|
1300
|
+
yield* ensureDirExists(STORE_DIR, ".pix directory");
|
|
1301
|
+
yield* Ref.set(seenFiles, /* @__PURE__ */ new Set());
|
|
1302
|
+
yield* Ref.set(statsAccumulator, {
|
|
1303
|
+
chunks: 0,
|
|
1304
|
+
files: 0,
|
|
1305
|
+
totalLines: 0,
|
|
1306
|
+
byteSize: 0
|
|
1307
|
+
});
|
|
1308
|
+
if (yield* withStoreError(fs.exists(chunksTemp), "check chunks temp")) yield* withStoreError(fs.remove(chunksTemp), "clean stale chunks temp", chunksTemp);
|
|
1309
|
+
if (yield* withStoreError(fs.exists(vectorsTemp), "check vectors temp")) yield* withStoreError(fs.remove(vectorsTemp), "clean stale vectors temp", vectorsTemp);
|
|
1310
|
+
});
|
|
1311
|
+
const storeBatch = (chunks, embeddings) => Effect.gen(function* () {
|
|
1312
|
+
const content = chunks.map((c) => JSON.stringify({
|
|
1313
|
+
id: c.id,
|
|
1314
|
+
idx: c.idx,
|
|
1315
|
+
file: c.file,
|
|
1316
|
+
startLine: c.startLine,
|
|
1317
|
+
endLine: c.endLine,
|
|
1318
|
+
text: c.text
|
|
1319
|
+
})).join("\n") + "\n";
|
|
1320
|
+
yield* withStoreError(fs.writeFile(chunksTemp, Buffer.from(content), { flag: "a" }), "append chunks", chunksTemp);
|
|
1321
|
+
const buffer = serializeVectors(embeddings);
|
|
1322
|
+
yield* withStoreError(fs.writeFile(vectorsTemp, buffer, { flag: "a" }), "append vectors", vectorsTemp);
|
|
1323
|
+
const dims = embeddings[0]?.dims ?? 384;
|
|
1324
|
+
const batchLines = chunks.reduce((sum, c) => sum + (c.endLine - c.startLine + 1), 0);
|
|
1325
|
+
const batchBytes = embeddings.length * dims * 4;
|
|
1326
|
+
yield* Ref.update(seenFiles, (prev) => {
|
|
1327
|
+
for (const c of chunks) prev.add(c.file);
|
|
1328
|
+
return prev;
|
|
1329
|
+
});
|
|
1330
|
+
yield* Ref.update(statsAccumulator, (prev) => ({
|
|
1331
|
+
chunks: prev.chunks + chunks.length,
|
|
1332
|
+
files: 0,
|
|
1333
|
+
totalLines: prev.totalLines + batchLines,
|
|
1334
|
+
byteSize: prev.byteSize + batchBytes
|
|
1335
|
+
}));
|
|
1336
|
+
});
|
|
1337
|
+
const storeCommit = () => Effect.gen(function* () {
|
|
1338
|
+
yield* withStoreError(fs.rename(chunksTemp, CHUNKS_FILE), "commit chunks", CHUNKS_FILE);
|
|
1339
|
+
yield* withStoreError(fs.rename(vectorsTemp, VECTORS_FILE), "commit vectors", VECTORS_FILE);
|
|
1340
|
+
const stats = yield* Ref.get(statsAccumulator);
|
|
1341
|
+
const files = yield* Ref.get(seenFiles);
|
|
1342
|
+
yield* Ref.set(seenFiles, /* @__PURE__ */ new Set());
|
|
1343
|
+
return {
|
|
1344
|
+
...stats,
|
|
1345
|
+
files: files.size
|
|
1346
|
+
};
|
|
1347
|
+
});
|
|
1348
|
+
const storeAbort = () => Effect.gen(function* () {
|
|
1349
|
+
yield* Ref.set(seenFiles, /* @__PURE__ */ new Set());
|
|
1350
|
+
if (yield* withReadError(fs.exists(chunksTemp), "check chunks temp")) yield* withReadError(fs.remove(chunksTemp), "abort chunks temp", chunksTemp);
|
|
1351
|
+
if (yield* withReadError(fs.exists(vectorsTemp), "check vectors temp")) yield* withReadError(fs.remove(vectorsTemp), "abort vectors temp", vectorsTemp);
|
|
1352
|
+
});
|
|
933
1353
|
const store = (chunks, embeddings) => Effect.gen(function* () {
|
|
934
1354
|
yield* ensureDirExists(STORE_DIR, ".pix directory");
|
|
935
1355
|
const chunksTemp = `${CHUNKS_FILE}.tmp`;
|
|
@@ -944,11 +1364,7 @@ const make = Effect.gen(function* () {
|
|
|
944
1364
|
yield* withStoreError(fs.writeFileString(chunksTemp, chunksLines.join("\n")), "write chunks", chunksTemp);
|
|
945
1365
|
yield* withStoreError(fs.rename(chunksTemp, CHUNKS_FILE), "commit chunks", CHUNKS_FILE);
|
|
946
1366
|
const vectorsTemp = `${VECTORS_FILE}.tmp`;
|
|
947
|
-
const
|
|
948
|
-
const totalFloats = embeddings.length * dims;
|
|
949
|
-
const vectorsArray = new Float32Array(totalFloats);
|
|
950
|
-
for (let i = 0; i < embeddings.length; i++) vectorsArray.set(embeddings[i].vector, i * dims);
|
|
951
|
-
const buffer = Buffer.from(vectorsArray.buffer);
|
|
1367
|
+
const buffer = serializeVectors(embeddings);
|
|
952
1368
|
yield* withStoreError(fs.writeFile(vectorsTemp, buffer), "write vectors", vectorsTemp);
|
|
953
1369
|
yield* withStoreError(fs.rename(vectorsTemp, VECTORS_FILE), "commit vectors", VECTORS_FILE);
|
|
954
1370
|
});
|
|
@@ -1017,6 +1433,10 @@ const make = Effect.gen(function* () {
|
|
|
1017
1433
|
});
|
|
1018
1434
|
return {
|
|
1019
1435
|
store,
|
|
1436
|
+
storeBegin,
|
|
1437
|
+
storeBatch,
|
|
1438
|
+
storeCommit,
|
|
1439
|
+
storeAbort,
|
|
1020
1440
|
search,
|
|
1021
1441
|
getStatus,
|
|
1022
1442
|
reset
|
|
@@ -1025,7 +1445,7 @@ const make = Effect.gen(function* () {
|
|
|
1025
1445
|
const VectorStoreLive = Layer.effect(VectorStore, make);
|
|
1026
1446
|
//#endregion
|
|
1027
1447
|
//#region src/index.ts
|
|
1028
|
-
const ServicesLayer = Layer.mergeAll(ConfigStoreLive, ScannerLive, OnnxEmbedderLive, VectorStoreLive);
|
|
1448
|
+
const ServicesLayer = Layer.mergeAll(ConfigStoreLive, ScannerLive, OnnxEmbedderLive, VectorStoreLive, ContentExtractorLive);
|
|
1029
1449
|
const ChunkerLayer = ChunkerLive.pipe(Layer.provide(ServicesLayer));
|
|
1030
1450
|
const InfraLayer = Layer.mergeAll(ServicesLayer, ChunkerLayer).pipe(Layer.provide(NodeContext.layer));
|
|
1031
1451
|
const UseCaseLayer = Layer.mergeAll(InitProject.Default, GetStatus.Default, QueryProject.Default, IndexProject.Default, ResetIndex.Default);
|