@cruxy/cli 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +39 -83
- package/dist/cli/commands/index.d.ts +7 -0
- package/dist/cli/commands/index.js +59 -0
- package/dist/cli/program.js +2 -0
- package/dist/config/schema.d.ts +199 -0
- package/dist/config/schema.js +55 -0
- package/dist/indexing/chunker.d.ts +28 -0
- package/dist/indexing/chunker.js +65 -0
- package/dist/indexing/embedder.d.ts +98 -0
- package/dist/indexing/embedder.js +140 -0
- package/dist/indexing/index.d.ts +9 -0
- package/dist/indexing/index.js +9 -0
- package/dist/indexing/indexer.d.ts +45 -0
- package/dist/indexing/indexer.js +104 -0
- package/dist/indexing/retriever.d.ts +32 -0
- package/dist/indexing/retriever.js +53 -0
- package/dist/indexing/service.d.ts +49 -0
- package/dist/indexing/service.js +132 -0
- package/dist/indexing/store.d.ts +103 -0
- package/dist/indexing/store.js +279 -0
- package/dist/indexing/types.d.ts +71 -0
- package/dist/indexing/types.js +6 -0
- package/dist/indexing/util.d.ts +34 -0
- package/dist/indexing/util.js +97 -0
- package/dist/indexing/walker.d.ts +42 -0
- package/dist/indexing/walker.js +166 -0
- package/dist/tools/index.d.ts +1 -0
- package/dist/tools/index.js +1 -0
- package/dist/tools/registry.js +2 -0
- package/dist/tools/search-codebase.d.ts +25 -0
- package/dist/tools/search-codebase.js +70 -0
- package/package.json +4 -1
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
import { promises as fs } from "node:fs";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { globToRegexBody, isBinary } from "./util.js";
|
|
4
|
+
/** Directories never descended into, regardless of ignore files. */
|
|
5
|
+
const ALWAYS_IGNORE_DIRS = new Set([".git", "node_modules", ".cruxy"]);
|
|
6
|
+
const DEFAULT_IGNORE_FILES = [".gitignore", ".cruxyignore"];
|
|
7
|
+
/**
|
|
8
|
+
* Hard denylist for secret-bearing files. Applied independently of ignore files
|
|
9
|
+
* (and of negation rules), so secrets are never indexed even when untracked.
|
|
10
|
+
* Matched against the project-relative POSIX path.
|
|
11
|
+
*/
|
|
12
|
+
const SECRET_PATTERNS = [
|
|
13
|
+
/(^|\/)\.env($|\.|rc)/i, // .env, .env.local, .env.production, .envrc
|
|
14
|
+
/(^|\/)id_(rsa|dsa|ecdsa|ed25519)(\.|$)/, // ssh private keys
|
|
15
|
+
/\.(pem|key|pfx|p12|p8|keystore|jks|asc|gpg)$/i, // keys / certs / keystores
|
|
16
|
+
/(^|\/)\.(npmrc|netrc|pgpass)$/i, // credential dotfiles
|
|
17
|
+
/(^|\/)\.aws\/credentials$/i,
|
|
18
|
+
];
|
|
19
|
+
function isSecretPath(relPath) {
|
|
20
|
+
return SECRET_PATTERNS.some((re) => re.test(relPath));
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* A compiled set of gitignore-style patterns, matched against paths relative to
|
|
24
|
+
* the file's own directory. Supports comments, blank lines, `!` negation,
|
|
25
|
+
* leading-`/` anchoring, trailing-`/` directory-only rules, and `*`/`**`/`?`.
|
|
26
|
+
*/
|
|
27
|
+
export class IgnoreMatcher {
|
|
28
|
+
rules = [];
|
|
29
|
+
constructor(patterns) {
|
|
30
|
+
for (const line of patterns)
|
|
31
|
+
this.add(line);
|
|
32
|
+
}
|
|
33
|
+
add(raw) {
|
|
34
|
+
// Strip a trailing CR and unescaped trailing whitespace.
|
|
35
|
+
let line = raw.replace(/\r$/, "").replace(/(?<!\\)\s+$/, "");
|
|
36
|
+
if (line === "" || line.startsWith("#"))
|
|
37
|
+
return;
|
|
38
|
+
let negate = false;
|
|
39
|
+
if (line.startsWith("!")) {
|
|
40
|
+
negate = true;
|
|
41
|
+
line = line.slice(1);
|
|
42
|
+
}
|
|
43
|
+
// Unescape a leading "\#" / "\!".
|
|
44
|
+
line = line.replace(/^\\([#!])/, "$1");
|
|
45
|
+
let dirOnly = false;
|
|
46
|
+
if (line.endsWith("/")) {
|
|
47
|
+
dirOnly = true;
|
|
48
|
+
line = line.slice(0, -1);
|
|
49
|
+
}
|
|
50
|
+
const anchored = line.startsWith("/");
|
|
51
|
+
if (anchored)
|
|
52
|
+
line = line.slice(1);
|
|
53
|
+
// A pattern with an internal separator is anchored to this directory;
|
|
54
|
+
// otherwise it may match at any depth.
|
|
55
|
+
const hasInternalSlash = line.includes("/");
|
|
56
|
+
const prefix = anchored || hasInternalSlash ? "" : "(?:.*/)?";
|
|
57
|
+
// Trailing "(?:/.*)?" lets a directory match also cover its contents.
|
|
58
|
+
const re = new RegExp(`^${prefix}${globToRegexBody(line)}(?:/.*)?$`);
|
|
59
|
+
this.rules.push({ re, negate, dirOnly });
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Decide whether `relPath` is ignored: `true`/`false` when a rule matches
|
|
63
|
+
* (last match wins), or `undefined` when no rule applies.
|
|
64
|
+
*/
|
|
65
|
+
decide(relPath, isDir) {
|
|
66
|
+
let decision;
|
|
67
|
+
for (const rule of this.rules) {
|
|
68
|
+
if (rule.dirOnly && !isDir)
|
|
69
|
+
continue;
|
|
70
|
+
if (rule.re.test(relPath))
|
|
71
|
+
decision = !rule.negate;
|
|
72
|
+
}
|
|
73
|
+
return decision;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
/**
|
|
77
|
+
* Resolve the ignore decision for a path against the full stack (shallow→deep);
|
|
78
|
+
* the deepest scope that makes a decision wins, mirroring git's precedence.
|
|
79
|
+
*/
|
|
80
|
+
function isIgnored(stack, absPath, isDir) {
|
|
81
|
+
let ignored = false;
|
|
82
|
+
for (const { baseDir, matcher } of stack) {
|
|
83
|
+
const rel = path.relative(baseDir, absPath).split(path.sep).join("/");
|
|
84
|
+
const decision = matcher.decide(rel, isDir);
|
|
85
|
+
if (decision !== undefined)
|
|
86
|
+
ignored = decision;
|
|
87
|
+
}
|
|
88
|
+
return ignored;
|
|
89
|
+
}
|
|
90
|
+
async function loadIgnoreScope(dir, ignoreFileNames) {
|
|
91
|
+
const patterns = [];
|
|
92
|
+
for (const name of ignoreFileNames) {
|
|
93
|
+
try {
|
|
94
|
+
const text = await fs.readFile(path.join(dir, name), "utf8");
|
|
95
|
+
patterns.push(...text.split("\n"));
|
|
96
|
+
}
|
|
97
|
+
catch {
|
|
98
|
+
/* no such ignore file in this directory */
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
return patterns.length > 0
|
|
102
|
+
? { baseDir: dir, matcher: new IgnoreMatcher(patterns) }
|
|
103
|
+
: null;
|
|
104
|
+
}
|
|
105
|
+
async function isBinaryFile(absPath) {
|
|
106
|
+
const fh = await fs.open(absPath, "r");
|
|
107
|
+
try {
|
|
108
|
+
const buf = Buffer.alloc(4096);
|
|
109
|
+
const { bytesRead } = await fh.read(buf, 0, buf.length, 0);
|
|
110
|
+
return isBinary(buf.subarray(0, bytesRead));
|
|
111
|
+
}
|
|
112
|
+
finally {
|
|
113
|
+
await fh.close();
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
/**
|
|
117
|
+
* Walk `root` and yield every indexable text file. Directories in the
|
|
118
|
+
* always-ignore set are skipped wholesale; ignore files accumulate down the
|
|
119
|
+
* tree; secret-bearing, oversized, and binary files are filtered out.
|
|
120
|
+
*/
|
|
121
|
+
export async function* walkRepo(root, opts) {
|
|
122
|
+
const absRoot = path.resolve(root);
|
|
123
|
+
const ignoreFileNames = opts.ignoreFileNames ?? DEFAULT_IGNORE_FILES;
|
|
124
|
+
yield* walkDir(absRoot, absRoot, [], ignoreFileNames, opts.maxFileBytes);
|
|
125
|
+
}
|
|
126
|
+
async function* walkDir(dir, root, parentStack, ignoreFileNames, maxFileBytes) {
|
|
127
|
+
const scope = await loadIgnoreScope(dir, ignoreFileNames);
|
|
128
|
+
const stack = scope ? [...parentStack, scope] : parentStack;
|
|
129
|
+
let entries;
|
|
130
|
+
try {
|
|
131
|
+
entries = await fs.readdir(dir, { withFileTypes: true });
|
|
132
|
+
}
|
|
133
|
+
catch {
|
|
134
|
+
return; // unreadable directory — skip
|
|
135
|
+
}
|
|
136
|
+
entries.sort((a, b) => a.name.localeCompare(b.name));
|
|
137
|
+
for (const entry of entries) {
|
|
138
|
+
const absPath = path.join(dir, entry.name);
|
|
139
|
+
const relPath = path.relative(root, absPath).split(path.sep).join("/");
|
|
140
|
+
const isDir = entry.isDirectory();
|
|
141
|
+
if (isDir && ALWAYS_IGNORE_DIRS.has(entry.name))
|
|
142
|
+
continue;
|
|
143
|
+
if (!isDir && !entry.isFile())
|
|
144
|
+
continue; // symlinks, sockets, fifos, etc.
|
|
145
|
+
if (isSecretPath(relPath))
|
|
146
|
+
continue;
|
|
147
|
+
if (isIgnored(stack, absPath, isDir))
|
|
148
|
+
continue;
|
|
149
|
+
if (isDir) {
|
|
150
|
+
yield* walkDir(absPath, root, stack, ignoreFileNames, maxFileBytes);
|
|
151
|
+
continue;
|
|
152
|
+
}
|
|
153
|
+
let size;
|
|
154
|
+
try {
|
|
155
|
+
size = (await fs.stat(absPath)).size;
|
|
156
|
+
}
|
|
157
|
+
catch {
|
|
158
|
+
continue;
|
|
159
|
+
}
|
|
160
|
+
if (size > maxFileBytes)
|
|
161
|
+
continue;
|
|
162
|
+
if (await isBinaryFile(absPath))
|
|
163
|
+
continue;
|
|
164
|
+
yield { relPath, absPath, size };
|
|
165
|
+
}
|
|
166
|
+
}
|
package/dist/tools/index.d.ts
CHANGED
package/dist/tools/index.js
CHANGED
package/dist/tools/registry.js
CHANGED
|
@@ -3,6 +3,7 @@ import { listFilesTool } from "./list-files.js";
|
|
|
3
3
|
import { gitStatusTool } from "./git-status.js";
|
|
4
4
|
import { readFileTool, writeFileTool, editFileTool, applyPatchTool, globTool, grepFilesTool, } from "./file/index.js";
|
|
5
5
|
import { runCommandTool } from "./shell/index.js";
|
|
6
|
+
import { searchCodebaseTool } from "./search-codebase.js";
|
|
6
7
|
/**
|
|
7
8
|
* In-memory catalogue of the tools available to the agent. Names are unique;
|
|
8
9
|
* `toToolSpecs()` projects the catalogue into the `@cruxy/sdk` wire format.
|
|
@@ -59,5 +60,6 @@ export function buildDefaultRegistry() {
|
|
|
59
60
|
registry.register(grepFilesTool);
|
|
60
61
|
registry.register(gitStatusTool);
|
|
61
62
|
registry.register(runCommandTool);
|
|
63
|
+
registry.register(searchCodebaseTool);
|
|
62
64
|
return registry;
|
|
63
65
|
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
import type { Tool } from "./types.js";
|
|
3
|
+
declare const parameters: z.ZodObject<{
|
|
4
|
+
query: z.ZodString;
|
|
5
|
+
k: z.ZodOptional<z.ZodNumber>;
|
|
6
|
+
pathGlob: z.ZodOptional<z.ZodString>;
|
|
7
|
+
}, "strip", z.ZodTypeAny, {
|
|
8
|
+
query: string;
|
|
9
|
+
k?: number | undefined;
|
|
10
|
+
pathGlob?: string | undefined;
|
|
11
|
+
}, {
|
|
12
|
+
query: string;
|
|
13
|
+
k?: number | undefined;
|
|
14
|
+
pathGlob?: string | undefined;
|
|
15
|
+
}>;
|
|
16
|
+
/**
|
|
17
|
+
* Semantic search over the project's local code index (C.17). Read-only — no
|
|
18
|
+
* approval — like read_file and grep_files. The index is built/refreshed lazily
|
|
19
|
+
* on first use; results are ranked by cosine similarity and token-budgeted.
|
|
20
|
+
*
|
|
21
|
+
* Complements `grep_files`: prefer this for conceptual "where / how does X work"
|
|
22
|
+
* questions, and grep for exact strings or symbols.
|
|
23
|
+
*/
|
|
24
|
+
export declare const searchCodebaseTool: Tool<typeof parameters>;
|
|
25
|
+
export {};
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
import { getIndexService } from "../indexing/index.js";
|
|
3
|
+
/** Hard cap on `k`, mirroring the retriever. */
|
|
4
|
+
const MAX_K = 50;
|
|
5
|
+
const parameters = z.object({
|
|
6
|
+
query: z
|
|
7
|
+
.string()
|
|
8
|
+
.min(1)
|
|
9
|
+
.describe("A natural-language description of the code you're looking for (e.g. 'where are tool results fed back to the model', 'JWT verification'). Concepts work better than exact tokens."),
|
|
10
|
+
k: z
|
|
11
|
+
.number()
|
|
12
|
+
.int()
|
|
13
|
+
.positive()
|
|
14
|
+
.max(MAX_K)
|
|
15
|
+
.optional()
|
|
16
|
+
.describe("Number of results to return (default 8)."),
|
|
17
|
+
pathGlob: z
|
|
18
|
+
.string()
|
|
19
|
+
.optional()
|
|
20
|
+
.describe("Optional glob to restrict results by path, e.g. 'src/**/*.ts' or 'packages/cli/**'."),
|
|
21
|
+
});
|
|
22
|
+
/**
|
|
23
|
+
* Semantic search over the project's local code index (C.17). Read-only — no
|
|
24
|
+
* approval — like read_file and grep_files. The index is built/refreshed lazily
|
|
25
|
+
* on first use; results are ranked by cosine similarity and token-budgeted.
|
|
26
|
+
*
|
|
27
|
+
* Complements `grep_files`: prefer this for conceptual "where / how does X work"
|
|
28
|
+
* questions, and grep for exact strings or symbols.
|
|
29
|
+
*/
|
|
30
|
+
export const searchCodebaseTool = {
|
|
31
|
+
name: "search_codebase",
|
|
32
|
+
description: "Semantically search the project's code index for the snippets most relevant to a natural-language query. Returns ranked matches as 'path:startLine-endLine (score)' with a code snippet. Read-only, no approval. Prefer this for conceptual 'where is / how does X work' questions; use grep_files for exact strings or symbol names.",
|
|
33
|
+
parameters,
|
|
34
|
+
async execute(input, ctx) {
|
|
35
|
+
if (!ctx.config.index.enabled) {
|
|
36
|
+
return {
|
|
37
|
+
ok: false,
|
|
38
|
+
error: "codebase indexing is disabled (set index.enabled = true to use search_codebase)",
|
|
39
|
+
};
|
|
40
|
+
}
|
|
41
|
+
try {
|
|
42
|
+
const service = await getIndexService(ctx.cwd, ctx.config, ctx.logger);
|
|
43
|
+
const hits = await service.search({
|
|
44
|
+
query: input.query,
|
|
45
|
+
k: input.k,
|
|
46
|
+
pathGlob: input.pathGlob,
|
|
47
|
+
});
|
|
48
|
+
if (hits.length === 0) {
|
|
49
|
+
return { ok: true, output: "(no matches in the codebase index)" };
|
|
50
|
+
}
|
|
51
|
+
return { ok: true, output: formatHits(hits) };
|
|
52
|
+
}
|
|
53
|
+
catch (err) {
|
|
54
|
+
return { ok: false, error: err.message };
|
|
55
|
+
}
|
|
56
|
+
},
|
|
57
|
+
};
|
|
58
|
+
/** Render hits as a compact, model-readable block. */
|
|
59
|
+
function formatHits(hits) {
|
|
60
|
+
return hits
|
|
61
|
+
.map((hit) => {
|
|
62
|
+
const header = `${hit.path}:${hit.startLine}-${hit.endLine} (score ${hit.score.toFixed(3)})`;
|
|
63
|
+
const body = hit.snippet
|
|
64
|
+
.split("\n")
|
|
65
|
+
.map((line) => ` ${line}`)
|
|
66
|
+
.join("\n");
|
|
67
|
+
return `${header}\n${body}`;
|
|
68
|
+
})
|
|
69
|
+
.join("\n\n");
|
|
70
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@cruxy/cli",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.2.0",
|
|
4
4
|
"description": "an agentic coding CLI",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -28,7 +28,9 @@
|
|
|
28
28
|
"directory": "packages/cli"
|
|
29
29
|
},
|
|
30
30
|
"dependencies": {
|
|
31
|
+
"better-sqlite3": "^12.11.1",
|
|
31
32
|
"commander": "^12.1.0",
|
|
33
|
+
"fastembed": "^2.1.0",
|
|
32
34
|
"picocolors": "^1.1.1",
|
|
33
35
|
"tinyglobby": "^0.2.10",
|
|
34
36
|
"zod": "^3.23.8",
|
|
@@ -36,6 +38,7 @@
|
|
|
36
38
|
"@cruxy/sdk": "0.1.0"
|
|
37
39
|
},
|
|
38
40
|
"devDependencies": {
|
|
41
|
+
"@types/better-sqlite3": "^7.6.13",
|
|
39
42
|
"@types/node": "^22.10.0",
|
|
40
43
|
"tsx": "^4.19.2",
|
|
41
44
|
"typescript": "^5.7.2",
|