@lucas-bur/pix 0.9.1 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.mjs +297 -82
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -5,13 +5,14 @@ import { NodeContext, NodeRuntime } from "@effect/platform-node";
|
|
|
5
5
|
import { Clock, Context, Data, Effect, Layer, Option, Ref } from "effect";
|
|
6
6
|
import { styleText } from "node:util";
|
|
7
7
|
import * as clack from "@clack/prompts";
|
|
8
|
-
import crypto from "node:crypto";
|
|
9
8
|
import { FileSystem } from "@effect/platform";
|
|
9
|
+
import crypto from "node:crypto";
|
|
10
10
|
import { env } from "@huggingface/transformers";
|
|
11
11
|
import ignore from "ignore";
|
|
12
12
|
//#region src/domain/ports.ts
|
|
13
13
|
var ConfigStore = class extends Context.Tag("ConfigStore")() {};
|
|
14
14
|
var Scanner = class extends Context.Tag("Scanner")() {};
|
|
15
|
+
var ContentExtractor = class extends Context.Tag("ContentExtractor")() {};
|
|
15
16
|
var Chunker = class extends Context.Tag("Chunker")() {};
|
|
16
17
|
var Embedder = class extends Context.Tag("Embedder")() {};
|
|
17
18
|
var VectorStore = class extends Context.Tag("VectorStore")() {};
|
|
@@ -140,7 +141,23 @@ const DEFAULT_CONFIG = {
|
|
|
140
141
|
chunkLines: 60,
|
|
141
142
|
overlapLines: 10,
|
|
142
143
|
chunkConcurrency: 8,
|
|
143
|
-
|
|
144
|
+
skipExtensions: [],
|
|
145
|
+
ignoredPaths: [
|
|
146
|
+
".pix",
|
|
147
|
+
"node_modules",
|
|
148
|
+
".git",
|
|
149
|
+
"dist",
|
|
150
|
+
"build",
|
|
151
|
+
".next",
|
|
152
|
+
".agents",
|
|
153
|
+
".claude",
|
|
154
|
+
".vscode",
|
|
155
|
+
".github",
|
|
156
|
+
"coverage",
|
|
157
|
+
"*-lock.yaml",
|
|
158
|
+
"*-lock.json",
|
|
159
|
+
"*.lock"
|
|
160
|
+
],
|
|
144
161
|
embedder: {
|
|
145
162
|
model: "Xenova/all-MiniLM-L6-v2",
|
|
146
163
|
device: "auto",
|
|
@@ -148,10 +165,158 @@ const DEFAULT_CONFIG = {
|
|
|
148
165
|
}
|
|
149
166
|
};
|
|
150
167
|
//#endregion
|
|
168
|
+
//#region src/domain/errors.ts
|
|
169
|
+
/** Config file or directory does not exist. Run pix init first. */
|
|
170
|
+
var ConfigNotFoundError = class extends Data.TaggedError("ConfigNotFoundError") {};
|
|
171
|
+
/** Config file exists but contains invalid JSON. */
|
|
172
|
+
var ConfigMalformedError = class extends Data.TaggedError("ConfigMalformedError") {};
|
|
173
|
+
/** Index files (chunks.jsonl, vectors.bin) do not exist. Run pix index first. */
|
|
174
|
+
var NoIndexError = class extends Data.TaggedError("NoIndexError") {};
|
|
175
|
+
/** Disk is full — write operation could not complete. */
|
|
176
|
+
var DiskFullError = class extends Data.TaggedError("DiskFullError") {};
|
|
177
|
+
/** Generic index store I/O failure (read, write, delete). */
|
|
178
|
+
var StoreError = class extends Data.TaggedError("StoreError") {};
|
|
179
|
+
/** Source file could not be read during chunking (binary, permissions, encoding). */
|
|
180
|
+
var ChunkerError = class extends Data.TaggedError("ChunkerError") {};
|
|
181
|
+
/** Embedding model could not be downloaded or loaded. */
|
|
182
|
+
var ModelLoadError = class extends Data.TaggedError("ModelLoadError") {};
|
|
183
|
+
/** Embedding model failed during inference. */
|
|
184
|
+
var InferenceError = class extends Data.TaggedError("InferenceError") {};
|
|
185
|
+
/**
|
|
186
|
+
* Fatal scan failure — gitignore loading failed entirely. Non-fatal per-entry skips are reported
|
|
187
|
+
* via ScanResult.skipped.
|
|
188
|
+
*/
|
|
189
|
+
var ScanFailed = class extends Data.TaggedError("ScanFailed") {};
|
|
190
|
+
/** File type is unsupported for text extraction. */
|
|
191
|
+
var UnsupportedFormat = class extends Data.TaggedError("UnsupportedFormat") {};
|
|
192
|
+
/** Text extraction failed for a supported file type. */
|
|
193
|
+
var ExtractionFailed = class extends Data.TaggedError("ExtractionFailed") {};
|
|
194
|
+
//#endregion
|
|
195
|
+
//#region src/services/processors/identity.ts
|
|
196
|
+
const identityProcessor = (file) => FileSystem.FileSystem.pipe(Effect.flatMap((fs) => fs.readFileString(file)), Effect.mapError((cause) => new ExtractionFailed({
|
|
197
|
+
message: `Failed to read file for extraction: ${file}`,
|
|
198
|
+
file,
|
|
199
|
+
cause
|
|
200
|
+
})));
|
|
201
|
+
//#endregion
|
|
202
|
+
//#region src/services/processors/skip.ts
|
|
203
|
+
const skipProcessor = (extension) => {
|
|
204
|
+
const error = new UnsupportedFormat({
|
|
205
|
+
message: `Unsupported file type: ${extension}`,
|
|
206
|
+
extension
|
|
207
|
+
});
|
|
208
|
+
return (_file) => Effect.fail(error);
|
|
209
|
+
};
|
|
210
|
+
//#endregion
|
|
211
|
+
//#region src/services/processors/index.ts
|
|
212
|
+
const DEFAULT_PROCESSOR_MAP = {
|
|
213
|
+
".ts": identityProcessor,
|
|
214
|
+
".tsx": identityProcessor,
|
|
215
|
+
".js": identityProcessor,
|
|
216
|
+
".jsx": identityProcessor,
|
|
217
|
+
".py": identityProcessor,
|
|
218
|
+
".rs": identityProcessor,
|
|
219
|
+
".go": identityProcessor,
|
|
220
|
+
".java": identityProcessor,
|
|
221
|
+
".c": identityProcessor,
|
|
222
|
+
".cpp": identityProcessor,
|
|
223
|
+
".h": identityProcessor,
|
|
224
|
+
".hpp": identityProcessor,
|
|
225
|
+
".json": identityProcessor,
|
|
226
|
+
".yaml": identityProcessor,
|
|
227
|
+
".yml": identityProcessor,
|
|
228
|
+
".toml": identityProcessor,
|
|
229
|
+
".xml": identityProcessor,
|
|
230
|
+
".csv": identityProcessor,
|
|
231
|
+
".md": identityProcessor,
|
|
232
|
+
".mdx": identityProcessor,
|
|
233
|
+
".txt": identityProcessor,
|
|
234
|
+
".rst": identityProcessor,
|
|
235
|
+
".html": identityProcessor,
|
|
236
|
+
".css": identityProcessor,
|
|
237
|
+
".scss": identityProcessor,
|
|
238
|
+
".less": identityProcessor,
|
|
239
|
+
".sql": identityProcessor,
|
|
240
|
+
".graphql": identityProcessor,
|
|
241
|
+
".sh": identityProcessor,
|
|
242
|
+
".bash": identityProcessor,
|
|
243
|
+
".ps1": identityProcessor,
|
|
244
|
+
".bat": identityProcessor,
|
|
245
|
+
".cmake": identityProcessor,
|
|
246
|
+
".dockerfile": identityProcessor,
|
|
247
|
+
dockerfile: identityProcessor,
|
|
248
|
+
makefile: identityProcessor,
|
|
249
|
+
gemfile: identityProcessor,
|
|
250
|
+
".pdf": skipProcessor(".pdf"),
|
|
251
|
+
".png": skipProcessor(".png"),
|
|
252
|
+
".jpg": skipProcessor(".jpg"),
|
|
253
|
+
".jpeg": skipProcessor(".jpeg"),
|
|
254
|
+
".gif": skipProcessor(".gif"),
|
|
255
|
+
".svg": identityProcessor,
|
|
256
|
+
".ico": skipProcessor(".ico"),
|
|
257
|
+
".webp": skipProcessor(".webp"),
|
|
258
|
+
".mp3": skipProcessor(".mp3"),
|
|
259
|
+
".mp4": skipProcessor(".mp4"),
|
|
260
|
+
".wav": skipProcessor(".wav"),
|
|
261
|
+
".avi": skipProcessor(".avi"),
|
|
262
|
+
".mov": skipProcessor(".mov"),
|
|
263
|
+
".mkv": skipProcessor(".mkv"),
|
|
264
|
+
".exe": skipProcessor(".exe"),
|
|
265
|
+
".dll": skipProcessor(".dll"),
|
|
266
|
+
".so": skipProcessor(".so"),
|
|
267
|
+
".zip": skipProcessor(".zip"),
|
|
268
|
+
".tar": skipProcessor(".tar"),
|
|
269
|
+
".gz": skipProcessor(".gz"),
|
|
270
|
+
".7z": skipProcessor(".7z"),
|
|
271
|
+
".rar": skipProcessor(".rar"),
|
|
272
|
+
".ttf": skipProcessor(".ttf"),
|
|
273
|
+
".woff": skipProcessor(".woff"),
|
|
274
|
+
".woff2": skipProcessor(".woff2"),
|
|
275
|
+
".eot": skipProcessor(".eot"),
|
|
276
|
+
".otf": skipProcessor(".otf"),
|
|
277
|
+
".lock": identityProcessor,
|
|
278
|
+
lock: identityProcessor
|
|
279
|
+
};
|
|
280
|
+
/**
|
|
281
|
+
* Builds the processor map by merging domain defaults with user-specified skip extensions. Skip
|
|
282
|
+
* extensions override any existing mapping with a skip processor. Unknown extensions remain absent
|
|
283
|
+
* from the map — callers decide how to handle them.
|
|
284
|
+
*/
|
|
285
|
+
function buildProcessorMap(skipExtensions) {
|
|
286
|
+
const mapped = { ...DEFAULT_PROCESSOR_MAP };
|
|
287
|
+
for (const ext of skipExtensions) mapped[ext] = skipProcessor(ext);
|
|
288
|
+
return mapped;
|
|
289
|
+
}
|
|
290
|
+
//#endregion
|
|
151
291
|
//#region src/application/index-project.ts
|
|
292
|
+
function getExtension(file) {
|
|
293
|
+
const lastSlash = file.lastIndexOf("/");
|
|
294
|
+
const name = lastSlash >= 0 ? file.slice(lastSlash + 1) : file;
|
|
295
|
+
const dotIndex = name.lastIndexOf(".");
|
|
296
|
+
if (dotIndex === -1) return name.toLowerCase();
|
|
297
|
+
return name.slice(dotIndex).toLowerCase();
|
|
298
|
+
}
|
|
299
|
+
const classifyFiles = (files, processorMap) => {
|
|
300
|
+
const knownFiles = [];
|
|
301
|
+
const skippedFiles = [];
|
|
302
|
+
const unknownExtensions = /* @__PURE__ */ new Set();
|
|
303
|
+
for (const file of files) {
|
|
304
|
+
const ext = getExtension(file);
|
|
305
|
+
if (!processorMap[ext]) {
|
|
306
|
+
unknownExtensions.add(ext);
|
|
307
|
+
skippedFiles.push(file);
|
|
308
|
+
} else knownFiles.push(file);
|
|
309
|
+
}
|
|
310
|
+
return {
|
|
311
|
+
knownFiles,
|
|
312
|
+
skippedFiles,
|
|
313
|
+
unknownExtensions
|
|
314
|
+
};
|
|
315
|
+
};
|
|
152
316
|
/**
|
|
153
|
-
* Use case: index project files. Pipeline: scan → chunk → embed → store. Depends
|
|
154
|
-
* Scanner, Chunker, Embedder, VectorStore, Display via Effect
|
|
317
|
+
* Use case: index project files. Pipeline: scan → ContentExtractor → chunk → embed → store. Depends
|
|
318
|
+
* on ConfigStore, Scanner, Chunker, Embedder, VectorStore, Display, ContentExtractor via Effect
|
|
319
|
+
* tags.
|
|
155
320
|
*/
|
|
156
321
|
var IndexProject = class extends Effect.Service()("IndexProject", {
|
|
157
322
|
accessors: true,
|
|
@@ -162,19 +327,36 @@ var IndexProject = class extends Effect.Service()("IndexProject", {
|
|
|
162
327
|
const embedder = yield* Embedder;
|
|
163
328
|
const vectorStore = yield* VectorStore;
|
|
164
329
|
const d = yield* Display;
|
|
330
|
+
const extractor = yield* ContentExtractor;
|
|
165
331
|
const index = () => Effect.gen(function* () {
|
|
166
332
|
if (!(yield* configStore.configExists())) yield* configStore.writeConfig(DEFAULT_CONFIG);
|
|
167
333
|
const config = yield* configStore.readConfig();
|
|
168
|
-
const
|
|
169
|
-
".ts",
|
|
170
|
-
".tsx",
|
|
171
|
-
".js",
|
|
172
|
-
".jsx"
|
|
173
|
-
];
|
|
334
|
+
const processorMap = buildProcessorMap(config.skipExtensions);
|
|
174
335
|
yield* d.updateInteractive("Scanning source files...");
|
|
175
|
-
const
|
|
176
|
-
yield*
|
|
177
|
-
|
|
336
|
+
const ignoredPaths = config.ignoredPaths ?? DEFAULT_CONFIG.ignoredPaths;
|
|
337
|
+
const { knownFiles, skippedFiles, unknownExtensions } = classifyFiles((yield* scanner.scanFiles(ignoredPaths)).files, processorMap);
|
|
338
|
+
if (unknownExtensions.size > 0) yield* d.log(`Skipped ${skippedFiles.length} files with unknown extensions: ${[...unknownExtensions].join(", ")}`, "warn");
|
|
339
|
+
if (knownFiles.length === 0) return {
|
|
340
|
+
success: true,
|
|
341
|
+
status: {
|
|
342
|
+
chunks: 0,
|
|
343
|
+
files: 0,
|
|
344
|
+
totalLines: 0,
|
|
345
|
+
byteSize: 0
|
|
346
|
+
}
|
|
347
|
+
};
|
|
348
|
+
yield* d.updateInteractive(`Processing ${knownFiles.length} files...`);
|
|
349
|
+
const allChunks = (yield* Effect.forEach(knownFiles, (file) => Effect.gen(function* () {
|
|
350
|
+
const result = yield* Effect.either(extractor.extract(file));
|
|
351
|
+
if (result._tag === "Left") {
|
|
352
|
+
if (result.left._tag === "UnsupportedFormat") {
|
|
353
|
+
yield* d.log(`Skipping ${file}: ${result.left.message}`, "warn");
|
|
354
|
+
return [];
|
|
355
|
+
}
|
|
356
|
+
return yield* Effect.fail(result.left);
|
|
357
|
+
}
|
|
358
|
+
return yield* chunker.chunkText(result.right, file);
|
|
359
|
+
}), { concurrency: Math.max(1, config.chunkConcurrency ?? 8) })).flat();
|
|
178
360
|
const totalChunks = allChunks.length;
|
|
179
361
|
const totalFiles = new Set(allChunks.map((c) => c.file)).size;
|
|
180
362
|
const totalLines = allChunks.reduce((sum, c) => sum + (c.endLine - c.startLine + 1), 0);
|
|
@@ -493,29 +675,6 @@ const setupTerminalCleanup = () => {
|
|
|
493
675
|
process.on("exit", makeTerminalCleanupHandler(process.stdin, process.stdout));
|
|
494
676
|
};
|
|
495
677
|
//#endregion
|
|
496
|
-
//#region src/domain/errors.ts
|
|
497
|
-
/** Config file or directory does not exist. Run pix init first. */
|
|
498
|
-
var ConfigNotFoundError = class extends Data.TaggedError("ConfigNotFoundError") {};
|
|
499
|
-
/** Config file exists but contains invalid JSON. */
|
|
500
|
-
var ConfigMalformedError = class extends Data.TaggedError("ConfigMalformedError") {};
|
|
501
|
-
/** Index files (chunks.jsonl, vectors.bin) do not exist. Run pix index first. */
|
|
502
|
-
var NoIndexError = class extends Data.TaggedError("NoIndexError") {};
|
|
503
|
-
/** Disk is full — write operation could not complete. */
|
|
504
|
-
var DiskFullError = class extends Data.TaggedError("DiskFullError") {};
|
|
505
|
-
/** Generic index store I/O failure (read, write, delete). */
|
|
506
|
-
var StoreError = class extends Data.TaggedError("StoreError") {};
|
|
507
|
-
/** Source file could not be read during chunking (binary, permissions, encoding). */
|
|
508
|
-
var ChunkerError = class extends Data.TaggedError("ChunkerError") {};
|
|
509
|
-
/** Embedding model could not be downloaded or loaded. */
|
|
510
|
-
var ModelLoadError = class extends Data.TaggedError("ModelLoadError") {};
|
|
511
|
-
/** Embedding model failed during inference. */
|
|
512
|
-
var InferenceError = class extends Data.TaggedError("InferenceError") {};
|
|
513
|
-
/**
|
|
514
|
-
* Fatal scan failure — gitignore loading failed entirely. Non-fatal per-entry skips are reported
|
|
515
|
-
* via ScanResult.skipped.
|
|
516
|
-
*/
|
|
517
|
-
var ScanFailed = class extends Data.TaggedError("ScanFailed") {};
|
|
518
|
-
//#endregion
|
|
519
678
|
//#region src/services/chunker.ts
|
|
520
679
|
const MIN_CHUNK_CHARS = 20;
|
|
521
680
|
const readFileContent = (fs, file) => fs.readFileString(file).pipe(Effect.mapError((cause) => new ChunkerError({
|
|
@@ -547,17 +706,24 @@ const buildChunks = (file, content, config) => {
|
|
|
547
706
|
}
|
|
548
707
|
return chunks;
|
|
549
708
|
};
|
|
550
|
-
const make$
|
|
709
|
+
const make$5 = Effect.gen(function* () {
|
|
551
710
|
const fs = yield* FileSystem.FileSystem;
|
|
552
711
|
const config = yield* (yield* ConfigStore).readConfig().pipe(Effect.catchAll(() => Effect.succeed(DEFAULT_CONFIG)));
|
|
712
|
+
const chunkText = (text, file) => Effect.sync(() => {
|
|
713
|
+
if (text === "") return [];
|
|
714
|
+
return buildChunks(file, text, config);
|
|
715
|
+
});
|
|
553
716
|
const chunkFile = (file) => Effect.gen(function* () {
|
|
554
717
|
const content = yield* readFileContent(fs, file);
|
|
555
718
|
if (content === "") return [];
|
|
556
719
|
return buildChunks(file, content, config);
|
|
557
720
|
});
|
|
558
|
-
return {
|
|
721
|
+
return {
|
|
722
|
+
chunkFile,
|
|
723
|
+
chunkText
|
|
724
|
+
};
|
|
559
725
|
});
|
|
560
|
-
const ChunkerLive = Layer.effect(Chunker, make$
|
|
726
|
+
const ChunkerLive = Layer.effect(Chunker, make$5);
|
|
561
727
|
//#endregion
|
|
562
728
|
//#region src/services/config-store.ts
|
|
563
729
|
const CONFIG_DIR = ".pix";
|
|
@@ -574,7 +740,7 @@ const mapConfigWriteError = (cause, path, action) => {
|
|
|
574
740
|
cause
|
|
575
741
|
});
|
|
576
742
|
};
|
|
577
|
-
const make$
|
|
743
|
+
const make$4 = Effect.gen(function* () {
|
|
578
744
|
const fs = yield* FileSystem.FileSystem;
|
|
579
745
|
const writeConfig = (config) => Effect.gen(function* () {
|
|
580
746
|
const configJson = JSON.stringify(config, null, 2);
|
|
@@ -611,7 +777,28 @@ const make$3 = Effect.gen(function* () {
|
|
|
611
777
|
configExists
|
|
612
778
|
};
|
|
613
779
|
});
|
|
614
|
-
const ConfigStoreLive = Layer.effect(ConfigStore, make$
|
|
780
|
+
const ConfigStoreLive = Layer.effect(ConfigStore, make$4);
|
|
781
|
+
//#endregion
|
|
782
|
+
//#region src/services/content-extractor.ts
|
|
783
|
+
const make$3 = Effect.gen(function* () {
|
|
784
|
+
const fs = yield* FileSystem.FileSystem;
|
|
785
|
+
const processorMap = buildProcessorMap([]);
|
|
786
|
+
const extract = (file) => {
|
|
787
|
+
const lastSlash = file.lastIndexOf("/");
|
|
788
|
+
const name = lastSlash >= 0 ? file.slice(lastSlash + 1) : file;
|
|
789
|
+
const dotIndex = name.lastIndexOf(".");
|
|
790
|
+
const ext = dotIndex === -1 ? name.toLowerCase() : name.slice(dotIndex).toLowerCase();
|
|
791
|
+
const processor = processorMap[ext];
|
|
792
|
+
if (!processor) return Effect.fail({
|
|
793
|
+
_tag: "UnsupportedFormat",
|
|
794
|
+
message: `No processor for extension: ${ext}`,
|
|
795
|
+
extension: ext
|
|
796
|
+
});
|
|
797
|
+
return processor(file).pipe(Effect.provideService(FileSystem.FileSystem, fs));
|
|
798
|
+
};
|
|
799
|
+
return { extract };
|
|
800
|
+
});
|
|
801
|
+
const ContentExtractorLive = Layer.effect(ContentExtractor, make$3);
|
|
615
802
|
//#endregion
|
|
616
803
|
//#region src/domain/models.ts
|
|
617
804
|
/** Registry of supported embedding models. */
|
|
@@ -752,14 +939,6 @@ const make$2 = Effect.gen(function* () {
|
|
|
752
939
|
const OnnxEmbedderLive = Layer.provideMerge(Layer.effect(Embedder, make$2), ConfigStoreLive);
|
|
753
940
|
//#endregion
|
|
754
941
|
//#region src/services/scanner.ts
|
|
755
|
-
const ALWAYS_IGNORE = new Set([
|
|
756
|
-
".pix",
|
|
757
|
-
"node_modules",
|
|
758
|
-
".git",
|
|
759
|
-
"dist",
|
|
760
|
-
"build",
|
|
761
|
-
".next"
|
|
762
|
-
]);
|
|
763
942
|
const make$1 = Effect.gen(function* () {
|
|
764
943
|
const fs = yield* FileSystem.FileSystem;
|
|
765
944
|
const readFileWithSkip = (path, mkReason) => fs.readFileString(path).pipe(Effect.map((content) => ({
|
|
@@ -792,47 +971,84 @@ const make$1 = Effect.gen(function* () {
|
|
|
792
971
|
reason: `Could not stat: ${String(error)}`
|
|
793
972
|
}
|
|
794
973
|
})));
|
|
795
|
-
const
|
|
974
|
+
const computeRelative = (fullPath, cwd) => fullPath.startsWith(cwd) ? fullPath.slice(cwd.length + 1) : fullPath;
|
|
975
|
+
const loadIgnoreFile = (filePath, ig, skipped) => Effect.gen(function* () {
|
|
976
|
+
const result = yield* readFileWithSkip(filePath, (error) => `Could not read ignore file: ${String(error)}`);
|
|
977
|
+
if (result.skipped) skipped.push(result.skipped);
|
|
978
|
+
if (result.content.trim()) ig.add(result.content.split("\n"));
|
|
979
|
+
});
|
|
980
|
+
const loadGitignoreRules = (ignoredPaths, cwd) => Effect.gen(function* () {
|
|
796
981
|
const ig = ignore();
|
|
797
|
-
const cwd = process.cwd();
|
|
798
982
|
const skipped = [];
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
if (
|
|
983
|
+
if (ignoredPaths.length > 0) ig.add(ignoredPaths);
|
|
984
|
+
const gitignorePath = `${cwd}/.gitignore`;
|
|
985
|
+
if (yield* fs.exists(gitignorePath).pipe(Effect.catchAll(() => Effect.succeed(false)))) yield* loadIgnoreFile(gitignorePath, ig, skipped);
|
|
802
986
|
const excludePath = `${cwd}/.git/info/exclude`;
|
|
803
|
-
if (yield* fs.exists(excludePath))
|
|
804
|
-
const excludeContent = yield* readFileWithSkip(excludePath, (error) => `Could not read exclude file: ${String(error)}`);
|
|
805
|
-
if (excludeContent.skipped) skipped.push(excludeContent.skipped);
|
|
806
|
-
if (excludeContent.content.trim()) ig.add(excludeContent.content.split("\n"));
|
|
807
|
-
}
|
|
987
|
+
if (yield* fs.exists(excludePath).pipe(Effect.catchAll(() => Effect.succeed(false)))) yield* loadIgnoreFile(excludePath, ig, skipped);
|
|
808
988
|
return {
|
|
809
989
|
ig,
|
|
810
990
|
skipped
|
|
811
991
|
};
|
|
812
992
|
});
|
|
813
|
-
const
|
|
993
|
+
const processEntry = (entry, dir, ig, cwd) => Effect.gen(function* () {
|
|
994
|
+
const fullPath = `${dir}/${entry}`;
|
|
995
|
+
const statResult = yield* statWithSkip(fullPath);
|
|
996
|
+
if (statResult.skipped) return {
|
|
997
|
+
files: [],
|
|
998
|
+
skipped: [statResult.skipped]
|
|
999
|
+
};
|
|
1000
|
+
if (!statResult.info) return {
|
|
1001
|
+
files: [],
|
|
1002
|
+
skipped: []
|
|
1003
|
+
};
|
|
1004
|
+
const info = statResult.info;
|
|
1005
|
+
if (info.type === "Directory") {
|
|
1006
|
+
const relativeDir = computeRelative(fullPath, cwd);
|
|
1007
|
+
if (ig.ignores(relativeDir)) return {
|
|
1008
|
+
files: [],
|
|
1009
|
+
skipped: [{
|
|
1010
|
+
path: fullPath,
|
|
1011
|
+
reason: `Ignored by config pattern: ${relativeDir}`
|
|
1012
|
+
}]
|
|
1013
|
+
};
|
|
1014
|
+
return {
|
|
1015
|
+
files: [],
|
|
1016
|
+
skipped: [],
|
|
1017
|
+
recurse: true
|
|
1018
|
+
};
|
|
1019
|
+
}
|
|
1020
|
+
if (info.type === "File") {
|
|
1021
|
+
const relativePath = computeRelative(fullPath, cwd);
|
|
1022
|
+
if (ig.ignores(relativePath)) return {
|
|
1023
|
+
files: [],
|
|
1024
|
+
skipped: [{
|
|
1025
|
+
path: fullPath,
|
|
1026
|
+
reason: `Ignored by config pattern: ${relativePath}`
|
|
1027
|
+
}]
|
|
1028
|
+
};
|
|
1029
|
+
return {
|
|
1030
|
+
files: [fullPath],
|
|
1031
|
+
skipped: []
|
|
1032
|
+
};
|
|
1033
|
+
}
|
|
1034
|
+
return {
|
|
1035
|
+
files: [],
|
|
1036
|
+
skipped: []
|
|
1037
|
+
};
|
|
1038
|
+
});
|
|
1039
|
+
const walk = (dir, ig, cwd) => Effect.gen(function* () {
|
|
814
1040
|
const result = yield* readDirectoryWithSkip(dir);
|
|
815
1041
|
let files = [];
|
|
816
1042
|
const skipped = [];
|
|
817
1043
|
if (result.skipped) skipped.push(result.skipped);
|
|
818
1044
|
for (const entry of result.entries) {
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
if (
|
|
823
|
-
|
|
824
|
-
continue;
|
|
825
|
-
}
|
|
826
|
-
if (!info.info) continue;
|
|
827
|
-
if (info.info.type === "Directory") {
|
|
828
|
-
const sub = yield* walk(fullPath, extensions);
|
|
1045
|
+
const entryResult = yield* processEntry(entry, dir, ig, cwd);
|
|
1046
|
+
files.push(...entryResult.files);
|
|
1047
|
+
skipped.push(...entryResult.skipped);
|
|
1048
|
+
if ("recurse" in entryResult) {
|
|
1049
|
+
const sub = yield* walk(`${dir}/${entry}`, ig, cwd);
|
|
829
1050
|
files.push(...sub.files);
|
|
830
1051
|
skipped.push(...sub.skipped);
|
|
831
|
-
} else if (info.info.type === "File") {
|
|
832
|
-
const dotIndex = entry.lastIndexOf(".");
|
|
833
|
-
if (dotIndex === -1) continue;
|
|
834
|
-
const ext = entry.slice(dotIndex);
|
|
835
|
-
if (extensions.has(ext)) files.push(fullPath);
|
|
836
1052
|
}
|
|
837
1053
|
}
|
|
838
1054
|
return {
|
|
@@ -840,16 +1056,15 @@ const make$1 = Effect.gen(function* () {
|
|
|
840
1056
|
skipped
|
|
841
1057
|
};
|
|
842
1058
|
});
|
|
843
|
-
const scanFiles = (
|
|
844
|
-
const
|
|
1059
|
+
const scanFiles = (ignoredPaths) => Effect.gen(function* () {
|
|
1060
|
+
const cwd = process.cwd();
|
|
1061
|
+
const { ig, skipped: ignoreSkipped } = yield* loadGitignoreRules(ignoredPaths, cwd).pipe(Effect.mapError((cause) => new ScanFailed({
|
|
845
1062
|
message: `Failed to load gitignore rules: ${String(cause)}`,
|
|
846
1063
|
cause
|
|
847
1064
|
})));
|
|
848
|
-
const
|
|
849
|
-
const { files: paths, skipped: walkSkipped } = yield* walk(cwd, new Set(extensions));
|
|
850
|
-
const relativePaths = paths.map((p) => p.startsWith(cwd) ? p.slice(cwd.length + 1) : p);
|
|
1065
|
+
const { files, skipped: walkSkipped } = yield* walk(cwd, ig, cwd);
|
|
851
1066
|
return {
|
|
852
|
-
files
|
|
1067
|
+
files,
|
|
853
1068
|
skipped: [...ignoreSkipped, ...walkSkipped]
|
|
854
1069
|
};
|
|
855
1070
|
});
|
|
@@ -1025,7 +1240,7 @@ const make = Effect.gen(function* () {
|
|
|
1025
1240
|
const VectorStoreLive = Layer.effect(VectorStore, make);
|
|
1026
1241
|
//#endregion
|
|
1027
1242
|
//#region src/index.ts
|
|
1028
|
-
const ServicesLayer = Layer.mergeAll(ConfigStoreLive, ScannerLive, OnnxEmbedderLive, VectorStoreLive);
|
|
1243
|
+
const ServicesLayer = Layer.mergeAll(ConfigStoreLive, ScannerLive, OnnxEmbedderLive, VectorStoreLive, ContentExtractorLive);
|
|
1029
1244
|
const ChunkerLayer = ChunkerLive.pipe(Layer.provide(ServicesLayer));
|
|
1030
1245
|
const InfraLayer = Layer.mergeAll(ServicesLayer, ChunkerLayer).pipe(Layer.provide(NodeContext.layer));
|
|
1031
1246
|
const UseCaseLayer = Layer.mergeAll(InitProject.Default, GetStatus.Default, QueryProject.Default, IndexProject.Default, ResetIndex.Default);
|