@lucas-bur/pix 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +9 -0
- package/README.md +46 -0
- package/dist/index.mjs +565 -0
- package/package.json +81 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Lucas Burmeister
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
|
6
|
+
|
|
7
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
|
8
|
+
|
|
9
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# @lucas-bur/pix
|
|
2
|
+
|
|
3
|
+
[](https://github.com/lucas-bur/pix/actions/workflows/ci.yml)
|
|
4
|
+
[](https://codecov.io/gh/lucas-bur/pix)
|
|
5
|
+
[](https://www.npmjs.com/package/@lucas-bur/pix)
|
|
6
|
+
[](https://www.npmjs.com/package/@lucas-bur/pix)
|
|
7
|
+
[](https://github.com/fallow-rs/fallow)
|
|
8
|
+
|
|
9
|
+
Lightweight local semantic project indexer (short pix)
|
|
10
|
+
|
|
11
|
+
Zero external services, 100% local + offline. Installs as a devDependency and provides agent-ready structured JSON output.
|
|
12
|
+
|
|
13
|
+
## Status
|
|
14
|
+
|
|
15
|
+
MVP in development. See [CONTEXT.md](./CONTEXT.md) for architecture decisions and [.scratch/pix-mvp/PRD.md](./.scratch/pix-mvp/PRD.md) for the product requirements.
|
|
16
|
+
|
|
17
|
+
## Quick Start
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
npm install --save-dev @lucas-bur/pix
|
|
21
|
+
pix init
|
|
22
|
+
pix index
|
|
23
|
+
pix query "authentication middleware"
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
## Quality Gates
|
|
27
|
+
|
|
28
|
+
This project uses [fallow](https://github.com/fallow-rs/fallow) for static analysis (dead code, duplication, complexity).
|
|
29
|
+
|
|
30
|
+
### Commands
|
|
31
|
+
|
|
32
|
+
- `vp run lint:fallow` — Run fallow with JSON output (used in CI)
|
|
33
|
+
- `fallow audit --summary` — Check only changed files (used in pre-commit hook)
|
|
34
|
+
|
|
35
|
+
### Pre-commit Hook
|
|
36
|
+
|
|
37
|
+
The pre-commit hook is managed by vite-plus and runs:
|
|
38
|
+
|
|
39
|
+
1. `vp staged` — Formats, lints, and type-checks staged files
|
|
40
|
+
2. `fallow audit --summary` — Audits changed files for quality issues
|
|
41
|
+
|
|
42
|
+
To set up hooks after cloning: `vp config`
|
|
43
|
+
|
|
44
|
+
## License
|
|
45
|
+
|
|
46
|
+
[MIT](./LICENSE)
|
package/dist/index.mjs
ADDED
|
@@ -0,0 +1,565 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { createRequire } from "node:module";
|
|
3
|
+
import { NodeContext, NodeRuntime } from "@effect/platform-node";
|
|
4
|
+
import { Context, Data, Effect, Layer, Option } from "effect";
|
|
5
|
+
import { Args, Command, Options } from "@effect/cli";
|
|
6
|
+
import crypto from "node:crypto";
|
|
7
|
+
import { FileSystem } from "@effect/platform";
|
|
8
|
+
import { env } from "@huggingface/transformers";
|
|
9
|
+
import fg from "fast-glob";
|
|
10
|
+
import ignore from "ignore";
|
|
11
|
+
//#region src/domain/ports.ts
|
|
12
|
+
var ConfigStore = class extends Context.Tag("ConfigStore")() {};
|
|
13
|
+
var Scanner = class extends Context.Tag("Scanner")() {};
|
|
14
|
+
var Chunker = class extends Context.Tag("Chunker")() {};
|
|
15
|
+
var Embedder = class extends Context.Tag("Embedder")() {};
|
|
16
|
+
var VectorStore = class extends Context.Tag("VectorStore")() {};
|
|
17
|
+
//#endregion
|
|
18
|
+
//#region src/application/get-status.ts
|
|
19
|
+
/** Use case: get index statistics. Depends on VectorStore via Effect tag. */
|
|
20
|
+
var GetStatus = class extends Effect.Service()("GetStatus", {
|
|
21
|
+
accessors: true,
|
|
22
|
+
effect: Effect.gen(function* () {
|
|
23
|
+
const store = yield* VectorStore;
|
|
24
|
+
const getStatus = () => store.getStats();
|
|
25
|
+
return { getStatus };
|
|
26
|
+
})
|
|
27
|
+
}) {};
|
|
28
|
+
//#endregion
|
|
29
|
+
//#region src/application/index-project.ts
|
|
30
|
+
/**
|
|
31
|
+
* Use case: index project files. Pipeline: scan → chunk → embed → store. Depends on ConfigStore,
|
|
32
|
+
* Scanner, Chunker, Embedder, VectorStore via Effect tags.
|
|
33
|
+
*/
|
|
34
|
+
var IndexProject = class extends Effect.Service()("IndexProject", {
|
|
35
|
+
accessors: true,
|
|
36
|
+
effect: Effect.gen(function* () {
|
|
37
|
+
const configStore = yield* ConfigStore;
|
|
38
|
+
const scanner = yield* Scanner;
|
|
39
|
+
const chunker = yield* Chunker;
|
|
40
|
+
const embedder = yield* Embedder;
|
|
41
|
+
const vectorStore = yield* VectorStore;
|
|
42
|
+
const index = () => Effect.gen(function* () {
|
|
43
|
+
const config = yield* configStore.readConfig();
|
|
44
|
+
const extensions = Object.keys(config.files).length > 0 ? Object.keys(config.files) : [
|
|
45
|
+
".ts",
|
|
46
|
+
".tsx",
|
|
47
|
+
".js",
|
|
48
|
+
".jsx"
|
|
49
|
+
];
|
|
50
|
+
const files = yield* scanner.scanFiles(extensions);
|
|
51
|
+
const allChunks = (yield* Effect.forEach(files, (file) => chunker.chunkFile(file), { concurrency: "unbounded" })).flat();
|
|
52
|
+
const totalChunks = allChunks.length;
|
|
53
|
+
const totalFiles = new Set(allChunks.map((c) => c.file)).size;
|
|
54
|
+
const totalLines = allChunks.reduce((sum, c) => sum + (c.endLine - c.startLine + 1), 0);
|
|
55
|
+
if (totalChunks === 0) {
|
|
56
|
+
yield* Effect.logInfo("No chunks to index.");
|
|
57
|
+
return {
|
|
58
|
+
success: true,
|
|
59
|
+
stats: {
|
|
60
|
+
chunks: 0,
|
|
61
|
+
files: 0,
|
|
62
|
+
totalLines: 0,
|
|
63
|
+
byteSize: 0
|
|
64
|
+
}
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
const texts = allChunks.map((c) => c.text);
|
|
68
|
+
const embeddings = yield* embedder.batch(texts);
|
|
69
|
+
yield* vectorStore.store(allChunks, embeddings);
|
|
70
|
+
const dims = embeddings[0]?.dims ?? 384;
|
|
71
|
+
const byteSize = embeddings.length * dims * 4;
|
|
72
|
+
yield* Effect.logInfo(`Indexed ${totalChunks} chunks from ${totalFiles} files.`);
|
|
73
|
+
return {
|
|
74
|
+
success: true,
|
|
75
|
+
stats: {
|
|
76
|
+
chunks: totalChunks,
|
|
77
|
+
files: totalFiles,
|
|
78
|
+
totalLines,
|
|
79
|
+
byteSize
|
|
80
|
+
}
|
|
81
|
+
};
|
|
82
|
+
});
|
|
83
|
+
return { index };
|
|
84
|
+
})
|
|
85
|
+
}) {};
|
|
86
|
+
//#endregion
|
|
87
|
+
//#region src/domain/config.ts
|
|
88
|
+
var ConfigError = class extends Data.TaggedError("ConfigError") {};
|
|
89
|
+
const DEFAULT_CONFIG = {
|
|
90
|
+
schema: "1",
|
|
91
|
+
model: "Xenova/all-MiniLM-L6-v2",
|
|
92
|
+
dims: 384,
|
|
93
|
+
chunkLines: 60,
|
|
94
|
+
overlapLines: 10,
|
|
95
|
+
files: {}
|
|
96
|
+
};
|
|
97
|
+
//#endregion
|
|
98
|
+
//#region src/application/init-project.ts
|
|
99
|
+
/**
|
|
100
|
+
* Use case: initialize a pix project by writing default config. Depends on ConfigStore via Effect
|
|
101
|
+
* tag.
|
|
102
|
+
*/
|
|
103
|
+
var InitProject = class extends Effect.Service()("InitProject", {
|
|
104
|
+
accessors: true,
|
|
105
|
+
effect: Effect.gen(function* () {
|
|
106
|
+
const store = yield* ConfigStore;
|
|
107
|
+
const init = () => store.writeConfig(DEFAULT_CONFIG).pipe(Effect.as({
|
|
108
|
+
success: true,
|
|
109
|
+
config: DEFAULT_CONFIG
|
|
110
|
+
}));
|
|
111
|
+
return { init };
|
|
112
|
+
})
|
|
113
|
+
}) {};
|
|
114
|
+
//#endregion
|
|
115
|
+
//#region src/application/query-project.ts
|
|
116
|
+
/** Use case: semantic search over indexed code. Depends on Embedder + VectorStore via Effect tags. */
|
|
117
|
+
var QueryProject = class extends Effect.Service()("QueryProject", {
|
|
118
|
+
accessors: true,
|
|
119
|
+
effect: Effect.gen(function* () {
|
|
120
|
+
const embedder = yield* Embedder;
|
|
121
|
+
const store = yield* VectorStore;
|
|
122
|
+
const queryProject = (queryText, topK) => embedder.embed(queryText).pipe(Effect.flatMap((embedding) => store.search(embedding, topK)));
|
|
123
|
+
return { queryProject };
|
|
124
|
+
})
|
|
125
|
+
}) {};
|
|
126
|
+
//#endregion
|
|
127
|
+
//#region src/commands/index-cmd.ts
|
|
128
|
+
/** CLI command: pix index [--force] [--verbose] [--json] */
|
|
129
|
+
const indexCommand = Command.make("index", {
|
|
130
|
+
force: Options.boolean("force").pipe(Options.withDefault(false)),
|
|
131
|
+
verbose: Options.boolean("verbose").pipe(Options.withDefault(false)),
|
|
132
|
+
json: Options.boolean("json").pipe(Options.withDefault(false))
|
|
133
|
+
}, ({ force, verbose, json }) => Effect.gen(function* () {
|
|
134
|
+
if (force) yield* Effect.logInfo(`--force is currently not implemented and only a placeholder.`);
|
|
135
|
+
if (verbose) yield* Effect.logInfo(`--verbose is currently not implemented and only a placeholder.`);
|
|
136
|
+
const startTime = Date.now();
|
|
137
|
+
const result = yield* IndexProject.index().pipe(Effect.either);
|
|
138
|
+
if (result._tag === "Left") {
|
|
139
|
+
const error = result.left;
|
|
140
|
+
const message = error.message ?? String(error);
|
|
141
|
+
yield* Effect.sync(() => {
|
|
142
|
+
console.log(JSON.stringify({ error: message }));
|
|
143
|
+
});
|
|
144
|
+
return yield* Effect.fail(error);
|
|
145
|
+
}
|
|
146
|
+
const duration = `${((Date.now() - startTime) / 1e3).toFixed(1)}s`;
|
|
147
|
+
if (json) return yield* Effect.sync(() => {
|
|
148
|
+
console.log(JSON.stringify({
|
|
149
|
+
chunks: result.right.stats.chunks,
|
|
150
|
+
files: result.right.stats.files,
|
|
151
|
+
duration
|
|
152
|
+
}));
|
|
153
|
+
});
|
|
154
|
+
yield* Effect.logInfo(`Indexed ${result.right.stats.chunks} chunks from ${result.right.stats.files} files in ${duration}.`);
|
|
155
|
+
}));
|
|
156
|
+
//#endregion
|
|
157
|
+
//#region src/commands/init.ts
|
|
158
|
+
/** CLI command: pix init [--json] */
|
|
159
|
+
const initCommand = Command.make("init", { json: Options.boolean("json").pipe(Options.withDefault(false)) }, ({ json }) => Effect.gen(function* () {
|
|
160
|
+
const result = yield* InitProject.init();
|
|
161
|
+
if (json) return yield* Effect.sync(() => {
|
|
162
|
+
console.log(JSON.stringify(result, null, 2));
|
|
163
|
+
});
|
|
164
|
+
yield* Effect.logInfo("Created .pix/config.json with default settings.");
|
|
165
|
+
yield* Effect.logInfo("Reminder: Add `.pix` to your `.gitignore` file to avoid committing the index.");
|
|
166
|
+
}));
|
|
167
|
+
//#endregion
|
|
168
|
+
//#region src/commands/query.ts
|
|
169
|
+
const DEFAULT_TOP_K = 5;
|
|
170
|
+
const DEFAULT_CONTEXT_LINES = 0;
|
|
171
|
+
const MIN_TOP_K = 1;
|
|
172
|
+
const MAX_TOP_K = 100;
|
|
173
|
+
/** Clamp topK to [MIN_TOP_K, MAX_TOP_K]. Returns the clamped value and whether clamping was applied. */
|
|
174
|
+
const clampTopK = (value) => {
|
|
175
|
+
if (value < MIN_TOP_K) return {
|
|
176
|
+
value: MIN_TOP_K,
|
|
177
|
+
clamped: true
|
|
178
|
+
};
|
|
179
|
+
if (value > MAX_TOP_K) return {
|
|
180
|
+
value: MAX_TOP_K,
|
|
181
|
+
clamped: true
|
|
182
|
+
};
|
|
183
|
+
return {
|
|
184
|
+
value,
|
|
185
|
+
clamped: false
|
|
186
|
+
};
|
|
187
|
+
};
|
|
188
|
+
/** Format a single result for human-readable output */
|
|
189
|
+
const formatResult = (result) => {
|
|
190
|
+
const contextBefore = result.contextBefore ? `\n${result.contextBefore}` : "";
|
|
191
|
+
const contextAfter = result.contextAfter ? `\n${result.contextAfter}` : "";
|
|
192
|
+
return `${result.file}:${result.startLine}-${result.endLine} (score: ${result.score.toFixed(3)})${contextBefore}\n${result.text}${contextAfter}`;
|
|
193
|
+
};
|
|
194
|
+
/** CLI command: pix query "<text>" [--top N] [--json] [--context-lines N] */
|
|
195
|
+
const queryCommand = Command.make("query", {
|
|
196
|
+
queryText: Args.text({ name: "query" }),
|
|
197
|
+
top: Options.integer("top").pipe(Options.withDefault(DEFAULT_TOP_K), Options.optional),
|
|
198
|
+
json: Options.boolean("json").pipe(Options.withDefault(false)),
|
|
199
|
+
contextLines: Options.integer("context-lines").pipe(Options.withDefault(DEFAULT_CONTEXT_LINES), Options.optional)
|
|
200
|
+
}, ({ queryText, top, json, contextLines }) => Effect.gen(function* () {
|
|
201
|
+
const topK = Option.getOrElse(top, () => DEFAULT_TOP_K);
|
|
202
|
+
const ctxLines = Option.getOrElse(contextLines, () => DEFAULT_CONTEXT_LINES);
|
|
203
|
+
const clamped = clampTopK(topK);
|
|
204
|
+
if (clamped.clamped) yield* Effect.logDebug(`topK clamped from ${topK} to ${clamped.value}`);
|
|
205
|
+
const results = yield* QueryProject.queryProject(queryText, clamped.value);
|
|
206
|
+
if (json) return yield* Effect.sync(() => {
|
|
207
|
+
const output = results.map((r) => ({
|
|
208
|
+
score: r.score,
|
|
209
|
+
file: r.file,
|
|
210
|
+
startLine: r.startLine,
|
|
211
|
+
endLine: r.endLine,
|
|
212
|
+
text: r.text,
|
|
213
|
+
...ctxLines > 0 && r.contextBefore && { contextBefore: r.contextBefore },
|
|
214
|
+
...ctxLines > 0 && r.contextAfter && { contextAfter: r.contextAfter }
|
|
215
|
+
}));
|
|
216
|
+
console.log(JSON.stringify(output, null, 2));
|
|
217
|
+
});
|
|
218
|
+
if (results.length === 0) {
|
|
219
|
+
yield* Effect.logInfo("No results found");
|
|
220
|
+
return;
|
|
221
|
+
}
|
|
222
|
+
for (const result of results) yield* Effect.sync(() => {
|
|
223
|
+
console.log(formatResult(result));
|
|
224
|
+
console.log("---");
|
|
225
|
+
});
|
|
226
|
+
}));
|
|
227
|
+
//#endregion
|
|
228
|
+
//#region src/commands/status.ts
|
|
229
|
+
/** CLI command: pix status [--json] */
|
|
230
|
+
const statusCommand = Command.make("status", { json: Options.boolean("json").pipe(Options.withDefault(false)) }, ({ json }) => Effect.gen(function* () {
|
|
231
|
+
const result = yield* GetStatus.getStatus();
|
|
232
|
+
if (json) return yield* Effect.sync(() => {
|
|
233
|
+
console.log(JSON.stringify(result, null, 2));
|
|
234
|
+
});
|
|
235
|
+
const lastIndexStr = result.lastIndex > 0 ? new Date(result.lastIndex).toISOString() : "never";
|
|
236
|
+
yield* Effect.logInfo(`Indexed: ${result.chunks} chunks across ${result.files} files`);
|
|
237
|
+
yield* Effect.logInfo(`Model: ${result.model || "none"}`);
|
|
238
|
+
yield* Effect.logInfo(`Total lines: ${result.totalLines.toLocaleString()}`);
|
|
239
|
+
yield* Effect.logInfo(`Index size: ${formatBytes(result.byteSize)}`);
|
|
240
|
+
yield* Effect.logInfo(`Last indexed: ${lastIndexStr}`);
|
|
241
|
+
}));
|
|
242
|
+
/** Format byte count as human-readable string (e.g. "1.5 MB") */
|
|
243
|
+
const formatBytes = (bytes) => {
|
|
244
|
+
if (bytes === 0) return "0 B";
|
|
245
|
+
const units = [
|
|
246
|
+
"B",
|
|
247
|
+
"KB",
|
|
248
|
+
"MB",
|
|
249
|
+
"GB"
|
|
250
|
+
];
|
|
251
|
+
const i = Math.floor(Math.log(bytes) / Math.log(1024));
|
|
252
|
+
return `${(bytes / Math.pow(1024, i)).toFixed(1)} ${units[i]}`;
|
|
253
|
+
};
|
|
254
|
+
//#endregion
|
|
255
|
+
//#region src/cli.ts
|
|
256
|
+
const VERSION = createRequire(import.meta.url)("../package.json").version;
|
|
257
|
+
const pix = Command.make("pix", {}, () => Effect.gen(function* () {
|
|
258
|
+
yield* Effect.logInfo("pix - Lightweight local semantic project indexer");
|
|
259
|
+
yield* Effect.logInfo("Use `pix --help` to see available commands.");
|
|
260
|
+
})).pipe(Command.withSubcommands([
|
|
261
|
+
initCommand,
|
|
262
|
+
statusCommand,
|
|
263
|
+
indexCommand,
|
|
264
|
+
queryCommand
|
|
265
|
+
]));
|
|
266
|
+
const cli = Command.run(pix, {
|
|
267
|
+
name: "pix",
|
|
268
|
+
version: VERSION
|
|
269
|
+
});
|
|
270
|
+
//#endregion
|
|
271
|
+
//#region src/services/chunker.ts
|
|
272
|
+
const MIN_CHUNK_CHARS = 20;
|
|
273
|
+
const make$4 = Effect.gen(function* () {
|
|
274
|
+
const fs = yield* FileSystem.FileSystem;
|
|
275
|
+
const config = yield* (yield* ConfigStore).readConfig().pipe(Effect.catchAll(() => Effect.succeed(DEFAULT_CONFIG)));
|
|
276
|
+
const chunkFile = (file) => Effect.gen(function* () {
|
|
277
|
+
const content = yield* fs.readFileString(file).pipe(Effect.tapError((err) => Effect.logWarning(`[Chunker] Skipping unreadable file: ${file} — ${String(err)}`)), Effect.catchAll(() => Effect.succeed("")));
|
|
278
|
+
if (content === "") return [];
|
|
279
|
+
const lines = content.split("\n");
|
|
280
|
+
const chunks = [];
|
|
281
|
+
let idx = 0;
|
|
282
|
+
let startLine = 1;
|
|
283
|
+
while (startLine <= lines.length) {
|
|
284
|
+
const endLine = Math.min(startLine + config.chunkLines - 1, lines.length);
|
|
285
|
+
const text = lines.slice(startLine - 1, endLine).join("\n");
|
|
286
|
+
if (text.length >= MIN_CHUNK_CHARS) {
|
|
287
|
+
const id = crypto.createHash("sha1").update(`${file}:${startLine}`).digest("hex").slice(0, 12);
|
|
288
|
+
chunks.push({
|
|
289
|
+
id,
|
|
290
|
+
idx,
|
|
291
|
+
file,
|
|
292
|
+
startLine,
|
|
293
|
+
endLine,
|
|
294
|
+
text
|
|
295
|
+
});
|
|
296
|
+
idx++;
|
|
297
|
+
}
|
|
298
|
+
startLine += config.chunkLines - config.overlapLines;
|
|
299
|
+
}
|
|
300
|
+
return chunks;
|
|
301
|
+
});
|
|
302
|
+
return { chunkFile };
|
|
303
|
+
});
|
|
304
|
+
const ChunkerLive = Layer.effect(Chunker, make$4);
|
|
305
|
+
//#endregion
|
|
306
|
+
//#region src/services/config-store.ts
|
|
307
|
+
const CONFIG_DIR = ".pix";
|
|
308
|
+
const CONFIG_PATH = `${CONFIG_DIR}/config.json`;
|
|
309
|
+
const make$3 = Effect.gen(function* () {
|
|
310
|
+
const fs = yield* FileSystem.FileSystem;
|
|
311
|
+
const writeConfig = (config) => Effect.gen(function* () {
|
|
312
|
+
const configJson = JSON.stringify(config, null, 2);
|
|
313
|
+
yield* fs.makeDirectory(CONFIG_DIR, { recursive: true });
|
|
314
|
+
yield* fs.writeFileString(CONFIG_PATH, configJson);
|
|
315
|
+
}).pipe(Effect.mapError((cause) => new ConfigError({
|
|
316
|
+
message: "Failed to write config.json",
|
|
317
|
+
cause
|
|
318
|
+
})));
|
|
319
|
+
const readConfig = () => Effect.gen(function* () {
|
|
320
|
+
const content = yield* fs.readFileString(CONFIG_PATH);
|
|
321
|
+
return JSON.parse(content);
|
|
322
|
+
}).pipe(Effect.mapError((cause) => new ConfigError({
|
|
323
|
+
message: "Failed to read config.json",
|
|
324
|
+
cause
|
|
325
|
+
})));
|
|
326
|
+
const configExists = () => Effect.gen(function* () {
|
|
327
|
+
return yield* fs.exists(CONFIG_PATH);
|
|
328
|
+
}).pipe(Effect.catchAll(() => Effect.succeed(false)));
|
|
329
|
+
return {
|
|
330
|
+
writeConfig,
|
|
331
|
+
readConfig,
|
|
332
|
+
configExists
|
|
333
|
+
};
|
|
334
|
+
});
|
|
335
|
+
const ConfigStoreLive = Layer.effect(ConfigStore, make$3);
|
|
336
|
+
//#endregion
|
|
337
|
+
//#region src/services/embedder.ts
|
|
338
|
+
const MODEL_NAME = "Xenova/all-MiniLM-L6-v2";
|
|
339
|
+
const DIMS = 384;
|
|
340
|
+
const CACHE_DIR = ".pix/cache";
|
|
341
|
+
const BATCH_SIZE = 16;
|
|
342
|
+
env.cacheDir = CACHE_DIR;
|
|
343
|
+
const normalize = (arr) => {
|
|
344
|
+
let norm = 0;
|
|
345
|
+
for (let i = 0; i < arr.length; i++) norm += arr[i] * arr[i];
|
|
346
|
+
norm = Math.sqrt(norm);
|
|
347
|
+
if (norm === 0) return arr;
|
|
348
|
+
const result = new Float32Array(arr.length);
|
|
349
|
+
for (let i = 0; i < arr.length; i++) result[i] = arr[i] / norm;
|
|
350
|
+
return result;
|
|
351
|
+
};
|
|
352
|
+
const make$2 = Effect.gen(function* () {
|
|
353
|
+
const getExtractor = yield* Effect.cached(Effect.tryPromise(async () => {
|
|
354
|
+
const { pipeline } = await import("@huggingface/transformers");
|
|
355
|
+
return pipeline("feature-extraction", MODEL_NAME, {
|
|
356
|
+
device: "cpu",
|
|
357
|
+
dtype: "q8"
|
|
358
|
+
});
|
|
359
|
+
}));
|
|
360
|
+
const embed = (text) => Effect.gen(function* () {
|
|
361
|
+
const extractor = yield* getExtractor;
|
|
362
|
+
const data = (yield* Effect.tryPromise(() => extractor(text, {
|
|
363
|
+
pooling: "mean",
|
|
364
|
+
normalize: false
|
|
365
|
+
}))).data;
|
|
366
|
+
return {
|
|
367
|
+
vector: normalize(data),
|
|
368
|
+
dims: DIMS
|
|
369
|
+
};
|
|
370
|
+
}).pipe(Effect.catchAll(() => Effect.succeed({
|
|
371
|
+
vector: new Float32Array(DIMS),
|
|
372
|
+
dims: DIMS
|
|
373
|
+
})));
|
|
374
|
+
const batch = (texts) => Effect.gen(function* () {
|
|
375
|
+
const extractor = yield* getExtractor;
|
|
376
|
+
const results = [];
|
|
377
|
+
for (let i = 0; i < texts.length; i += BATCH_SIZE) {
|
|
378
|
+
const slice = texts.slice(i, i + BATCH_SIZE);
|
|
379
|
+
const tensor = yield* Effect.tryPromise(() => extractor(slice, {
|
|
380
|
+
pooling: "mean",
|
|
381
|
+
normalize: false
|
|
382
|
+
}));
|
|
383
|
+
const data = tensor.data;
|
|
384
|
+
const n = tensor.dims[0];
|
|
385
|
+
for (let j = 0; j < n; j++) {
|
|
386
|
+
const offset = j * DIMS;
|
|
387
|
+
results.push(normalize(data.slice(offset, offset + DIMS)));
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
return results.map((vector) => ({
|
|
391
|
+
vector,
|
|
392
|
+
dims: DIMS
|
|
393
|
+
}));
|
|
394
|
+
}).pipe(Effect.catchAll(() => Effect.succeed([])));
|
|
395
|
+
return {
|
|
396
|
+
embed,
|
|
397
|
+
batch
|
|
398
|
+
};
|
|
399
|
+
});
|
|
400
|
+
const OnnxEmbedderLive = Layer.effect(Embedder, make$2);
|
|
401
|
+
//#endregion
|
|
402
|
+
//#region src/services/scanner.ts
|
|
403
|
+
const make$1 = Effect.gen(function* () {
|
|
404
|
+
const fs = yield* FileSystem.FileSystem;
|
|
405
|
+
/** Loads all gitignore patterns from .gitignore files in the repo. */
|
|
406
|
+
const loadGitignoreRules = Effect.gen(function* () {
|
|
407
|
+
const ig = ignore();
|
|
408
|
+
const cwd = process.cwd();
|
|
409
|
+
const rootContent = yield* fs.readFileString(`${cwd}/.gitignore`).pipe(Effect.catchAll(() => Effect.succeed("")));
|
|
410
|
+
if (rootContent.trim()) ig.add(rootContent.split("\n"));
|
|
411
|
+
const excludePath = `${cwd}/.git/info/exclude`;
|
|
412
|
+
if (yield* fs.exists(excludePath)) {
|
|
413
|
+
const excludeContent = yield* fs.readFileString(excludePath).pipe(Effect.catchAll(() => Effect.succeed("")));
|
|
414
|
+
if (excludeContent.trim()) ig.add(excludeContent.split("\n"));
|
|
415
|
+
}
|
|
416
|
+
return ig;
|
|
417
|
+
}).pipe(Effect.catchAll(() => Effect.succeed(ignore())));
|
|
418
|
+
const scanFiles = (extensions) => Effect.gen(function* () {
|
|
419
|
+
const ig = yield* loadGitignoreRules;
|
|
420
|
+
const cwd = process.cwd();
|
|
421
|
+
const pattern = extensions.map((ext) => `**/*${ext}`);
|
|
422
|
+
const relativePaths = (yield* Effect.tryPromise(() => fg(pattern, { dot: false })).pipe(Effect.catchAll(() => Effect.succeed([])))).map((p) => {
|
|
423
|
+
return p.startsWith(cwd) ? p.slice(cwd.length + 1) : p;
|
|
424
|
+
});
|
|
425
|
+
return ig.filter(relativePaths).map((p) => `${cwd}/${p}`);
|
|
426
|
+
});
|
|
427
|
+
return { scanFiles };
|
|
428
|
+
});
|
|
429
|
+
const ScannerLive = Layer.effect(Scanner, make$1);
|
|
430
|
+
//#endregion
|
|
431
|
+
//#region src/services/vector-store.ts
|
|
432
|
+
const STORE_DIR = ".pix";
|
|
433
|
+
const CHUNKS_FILE = `${STORE_DIR}/chunks.jsonl`;
|
|
434
|
+
const VECTORS_FILE = `${STORE_DIR}/vectors.bin`;
|
|
435
|
+
/**
|
|
436
|
+
* Reads the first line of chunks.jsonl to get the model name. Returns empty string if the file
|
|
437
|
+
* doesn't exist or is empty.
|
|
438
|
+
*/
|
|
439
|
+
const readModelFromChunks = (lines) => {
|
|
440
|
+
if (lines.length === 0) return "";
|
|
441
|
+
try {
|
|
442
|
+
return JSON.parse(lines[0]).model ?? "";
|
|
443
|
+
} catch {
|
|
444
|
+
return "";
|
|
445
|
+
}
|
|
446
|
+
};
|
|
447
|
+
/**
|
|
448
|
+
* FileSystem adapter for VectorStore port. Reads from chunks.jsonl and vectors.bin to provide index
|
|
449
|
+
* statistics.
|
|
450
|
+
*/
|
|
451
|
+
const make = Effect.gen(function* () {
|
|
452
|
+
const fs = yield* FileSystem.FileSystem;
|
|
453
|
+
/**
|
|
454
|
+
* Count total lines across all chunks in chunks.jsonl. Each line is a JSON object; the 'text'
|
|
455
|
+
* field contains the source code.
|
|
456
|
+
*/
|
|
457
|
+
const countTotalLines = (lines) => lines.reduce((sum, line) => {
|
|
458
|
+
try {
|
|
459
|
+
return sum + JSON.parse(line).text.split("\n").length;
|
|
460
|
+
} catch {
|
|
461
|
+
return sum;
|
|
462
|
+
}
|
|
463
|
+
}, 0);
|
|
464
|
+
/** Count unique files across all chunks in chunks.jsonl. */
|
|
465
|
+
const countUniqueFiles = (lines) => {
|
|
466
|
+
const files = /* @__PURE__ */ new Set();
|
|
467
|
+
for (const line of lines) try {
|
|
468
|
+
const chunk = JSON.parse(line);
|
|
469
|
+
files.add(chunk.file);
|
|
470
|
+
} catch {}
|
|
471
|
+
return files;
|
|
472
|
+
};
|
|
473
|
+
const store = (chunks, embeddings) => Effect.gen(function* () {
|
|
474
|
+
if (!(yield* fs.exists(STORE_DIR))) yield* fs.makeDirectory(STORE_DIR, { recursive: true });
|
|
475
|
+
const chunksTemp = `${CHUNKS_FILE}.tmp`;
|
|
476
|
+
const chunksLines = chunks.map((c) => JSON.stringify({
|
|
477
|
+
id: c.id,
|
|
478
|
+
idx: c.idx,
|
|
479
|
+
file: c.file,
|
|
480
|
+
startLine: c.startLine,
|
|
481
|
+
endLine: c.endLine,
|
|
482
|
+
text: c.text
|
|
483
|
+
}));
|
|
484
|
+
yield* fs.writeFileString(chunksTemp, chunksLines.join("\n"));
|
|
485
|
+
yield* fs.rename(chunksTemp, CHUNKS_FILE);
|
|
486
|
+
const vectorsTemp = `${VECTORS_FILE}.tmp`;
|
|
487
|
+
const dims = embeddings[0]?.dims ?? 384;
|
|
488
|
+
const totalFloats = embeddings.length * dims;
|
|
489
|
+
const vectorsArray = new Float32Array(totalFloats);
|
|
490
|
+
for (let i = 0; i < embeddings.length; i++) vectorsArray.set(embeddings[i].vector, i * dims);
|
|
491
|
+
const buffer = Buffer.from(vectorsArray.buffer);
|
|
492
|
+
yield* fs.writeFile(vectorsTemp, buffer);
|
|
493
|
+
yield* fs.rename(vectorsTemp, VECTORS_FILE);
|
|
494
|
+
});
|
|
495
|
+
const search = (query, topK) => Effect.gen(function* () {
|
|
496
|
+
const chunksExists = yield* fs.exists(CHUNKS_FILE);
|
|
497
|
+
const vectorsExists = yield* fs.exists(VECTORS_FILE);
|
|
498
|
+
if (!chunksExists || !vectorsExists) return [];
|
|
499
|
+
const chunkLines = (yield* fs.readFileString(CHUNKS_FILE)).split("\n").filter((l) => l.trim().length > 0);
|
|
500
|
+
const vectorsBuffer = yield* fs.readFile(VECTORS_FILE);
|
|
501
|
+
const vectors = new Float32Array(vectorsBuffer.buffer);
|
|
502
|
+
const results = [];
|
|
503
|
+
for (let i = 0; i < chunkLines.length; i++) try {
|
|
504
|
+
const chunk = JSON.parse(chunkLines[i]);
|
|
505
|
+
const startIdx = i * query.dims;
|
|
506
|
+
const chunkVector = vectors.slice(startIdx, startIdx + query.dims);
|
|
507
|
+
let dotProduct = 0;
|
|
508
|
+
for (let j = 0; j < query.dims; j++) dotProduct += chunkVector[j] * query.vector[j];
|
|
509
|
+
results.push({
|
|
510
|
+
score: dotProduct,
|
|
511
|
+
file: chunk.file,
|
|
512
|
+
startLine: chunk.startLine,
|
|
513
|
+
endLine: chunk.endLine,
|
|
514
|
+
text: chunk.text,
|
|
515
|
+
contextBefore: chunk.contextBefore,
|
|
516
|
+
contextAfter: chunk.contextAfter
|
|
517
|
+
});
|
|
518
|
+
} catch {}
|
|
519
|
+
results.sort((a, b) => b.score - a.score);
|
|
520
|
+
return results.slice(0, topK);
|
|
521
|
+
});
|
|
522
|
+
const getStats = () => Effect.gen(function* () {
|
|
523
|
+
const chunksExists = yield* fs.exists(CHUNKS_FILE);
|
|
524
|
+
const vectorsExists = yield* fs.exists(VECTORS_FILE);
|
|
525
|
+
if (!chunksExists || !vectorsExists) return {
|
|
526
|
+
chunks: 0,
|
|
527
|
+
files: 0,
|
|
528
|
+
model: "",
|
|
529
|
+
lastIndex: 0,
|
|
530
|
+
totalLines: 0,
|
|
531
|
+
byteSize: 0
|
|
532
|
+
};
|
|
533
|
+
const lines = (yield* fs.readFileString(CHUNKS_FILE).pipe(Effect.catchAll(() => Effect.succeed("")))).split("\n").filter((l) => l.trim().length > 0);
|
|
534
|
+
const chunks = lines.length;
|
|
535
|
+
const files = countUniqueFiles(lines).size;
|
|
536
|
+
const model = readModelFromChunks(lines);
|
|
537
|
+
const totalLines = countTotalLines(lines);
|
|
538
|
+
const vectorsStat = yield* fs.stat(VECTORS_FILE).pipe(Effect.catchAll(() => Effect.succeed(null)));
|
|
539
|
+
const byteSize = vectorsStat && "size" in vectorsStat ? vectorsStat.size : 0;
|
|
540
|
+
return {
|
|
541
|
+
chunks,
|
|
542
|
+
files,
|
|
543
|
+
model,
|
|
544
|
+
lastIndex: Option.map(vectorsStat?.mtime ?? Option.none(), (d) => d instanceof Date ? d.getTime() : 0).pipe(Option.getOrElse(() => 0)),
|
|
545
|
+
totalLines,
|
|
546
|
+
byteSize
|
|
547
|
+
};
|
|
548
|
+
});
|
|
549
|
+
return {
|
|
550
|
+
store,
|
|
551
|
+
search,
|
|
552
|
+
getStats
|
|
553
|
+
};
|
|
554
|
+
});
|
|
555
|
+
const VectorStoreLive = Layer.effect(VectorStore, make);
|
|
556
|
+
//#endregion
|
|
557
|
+
//#region src/index.ts
|
|
558
|
+
const ServicesLayer = Layer.mergeAll(ConfigStoreLive, ScannerLive, OnnxEmbedderLive, VectorStoreLive);
|
|
559
|
+
const ChunkerLayer = ChunkerLive.pipe(Layer.provide(ServicesLayer));
|
|
560
|
+
const InfraLayer = Layer.mergeAll(ServicesLayer, ChunkerLayer).pipe(Layer.provide(NodeContext.layer));
|
|
561
|
+
const UseCaseLayer = Layer.mergeAll(InitProject.Default, GetStatus.Default, QueryProject.Default, IndexProject.Default);
|
|
562
|
+
const AppLayer = Layer.merge(UseCaseLayer.pipe(Layer.provide(InfraLayer)), NodeContext.layer);
|
|
563
|
+
cli(process.argv).pipe(Effect.provide(AppLayer), NodeRuntime.runMain);
|
|
564
|
+
//#endregion
|
|
565
|
+
export {};
|
package/package.json
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@lucas-bur/pix",
|
|
3
|
+
"version": "0.2.0",
|
|
4
|
+
"description": "Lightweight local semantic project indexer",
|
|
5
|
+
"keywords": [
|
|
6
|
+
"cli",
|
|
7
|
+
"embeddings",
|
|
8
|
+
"local",
|
|
9
|
+
"rag",
|
|
10
|
+
"semantic-search"
|
|
11
|
+
],
|
|
12
|
+
"homepage": "https://github.com/lucas-bur/pix",
|
|
13
|
+
"bugs": {
|
|
14
|
+
"url": "https://github.com/lucas-bur/pix/issues"
|
|
15
|
+
},
|
|
16
|
+
"license": "MIT",
|
|
17
|
+
"author": "Lucas Burmeister <lucas.burmeister@gmx.de>",
|
|
18
|
+
"repository": {
|
|
19
|
+
"type": "git",
|
|
20
|
+
"url": "git+https://github.com/lucas-bur/pix.git"
|
|
21
|
+
},
|
|
22
|
+
"bin": {
|
|
23
|
+
"pix": "./dist/index.mjs"
|
|
24
|
+
},
|
|
25
|
+
"files": [
|
|
26
|
+
"dist"
|
|
27
|
+
],
|
|
28
|
+
"type": "module",
|
|
29
|
+
"exports": {
|
|
30
|
+
".": "./dist/index.mjs",
|
|
31
|
+
"./package.json": "./package.json"
|
|
32
|
+
},
|
|
33
|
+
"publishConfig": {
|
|
34
|
+
"access": "public"
|
|
35
|
+
},
|
|
36
|
+
"scripts": {
|
|
37
|
+
"build": "vp pack",
|
|
38
|
+
"dev": "vp pack --watch",
|
|
39
|
+
"test": "vp test",
|
|
40
|
+
"test:coverage": "vp test --coverage",
|
|
41
|
+
"check": "vp check",
|
|
42
|
+
"ci": "vp check && vp test --coverage && vp run build && fallow audit",
|
|
43
|
+
"lint:fallow": "fallow --format json",
|
|
44
|
+
"fallow": "fallow",
|
|
45
|
+
"prepublishOnly": "vp run build",
|
|
46
|
+
"pix": ""
|
|
47
|
+
},
|
|
48
|
+
"dependencies": {
|
|
49
|
+
"@effect/cli": "^0.75.1",
|
|
50
|
+
"@effect/platform": "^0.96.1",
|
|
51
|
+
"@effect/platform-node": "^0.106.0",
|
|
52
|
+
"@huggingface/transformers": "^4.2.0",
|
|
53
|
+
"effect": "^3.21.2",
|
|
54
|
+
"fast-glob": "^3.3.3",
|
|
55
|
+
"ignore": "^7.0.5"
|
|
56
|
+
},
|
|
57
|
+
"devDependencies": {
|
|
58
|
+
"@effect/language-service": "^0.85.1",
|
|
59
|
+
"@types/node": "^25.5.0",
|
|
60
|
+
"@typescript/native-preview": "7.0.0-dev.20260328.1",
|
|
61
|
+
"@vitest/coverage-v8": "^4.1.6",
|
|
62
|
+
"fallow": "^2.65.0",
|
|
63
|
+
"typescript": "^6.0.2",
|
|
64
|
+
"vite-plus": "^0.1.14"
|
|
65
|
+
},
|
|
66
|
+
"packageManager": "pnpm@10.33.3",
|
|
67
|
+
"pnpm": {
|
|
68
|
+
"overrides": {
|
|
69
|
+
"vite": "npm:@voidzero-dev/vite-plus-core@latest",
|
|
70
|
+
"vitest": "npm:@voidzero-dev/vite-plus-test@latest"
|
|
71
|
+
},
|
|
72
|
+
"onlyBuiltDependencies": [
|
|
73
|
+
"@parcel/watcher",
|
|
74
|
+
"fallow",
|
|
75
|
+
"msgpackr-extract",
|
|
76
|
+
"onnxruntime-node",
|
|
77
|
+
"protobufjs",
|
|
78
|
+
"sharp"
|
|
79
|
+
]
|
|
80
|
+
}
|
|
81
|
+
}
|