@cruxy/cli 0.1.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +35 -1
- package/dist/cli/commands/index.d.ts +7 -0
- package/dist/cli/commands/index.js +59 -0
- package/dist/cli/commands/skills.d.ts +8 -0
- package/dist/cli/commands/skills.js +51 -0
- package/dist/cli/program.js +4 -0
- package/dist/config/schema.d.ts +199 -0
- package/dist/config/schema.js +55 -0
- package/dist/constants.d.ts +13 -0
- package/dist/constants.js +13 -0
- package/dist/indexing/chunker.d.ts +28 -0
- package/dist/indexing/chunker.js +65 -0
- package/dist/indexing/embedder.d.ts +98 -0
- package/dist/indexing/embedder.js +140 -0
- package/dist/indexing/index.d.ts +9 -0
- package/dist/indexing/index.js +9 -0
- package/dist/indexing/indexer.d.ts +45 -0
- package/dist/indexing/indexer.js +104 -0
- package/dist/indexing/retriever.d.ts +32 -0
- package/dist/indexing/retriever.js +53 -0
- package/dist/indexing/service.d.ts +49 -0
- package/dist/indexing/service.js +132 -0
- package/dist/indexing/store.d.ts +103 -0
- package/dist/indexing/store.js +279 -0
- package/dist/indexing/types.d.ts +71 -0
- package/dist/indexing/types.js +6 -0
- package/dist/indexing/util.d.ts +34 -0
- package/dist/indexing/util.js +97 -0
- package/dist/indexing/walker.d.ts +42 -0
- package/dist/indexing/walker.js +166 -0
- package/dist/skills/index.d.ts +4 -0
- package/dist/skills/index.js +4 -0
- package/dist/skills/loader.d.ts +42 -0
- package/dist/skills/loader.js +0 -0
- package/dist/skills/parser.d.ts +29 -0
- package/dist/skills/parser.js +90 -0
- package/dist/skills/service.d.ts +41 -0
- package/dist/skills/service.js +92 -0
- package/dist/skills/types.d.ts +94 -0
- package/dist/skills/types.js +21 -0
- package/dist/tools/index.d.ts +3 -0
- package/dist/tools/index.js +3 -0
- package/dist/tools/list-skills.d.ts +9 -0
- package/dist/tools/list-skills.js +34 -0
- package/dist/tools/load-skill.d.ts +21 -0
- package/dist/tools/load-skill.js +49 -0
- package/dist/tools/registry.js +6 -0
- package/dist/tools/search-codebase.d.ts +25 -0
- package/dist/tools/search-codebase.js +70 -0
- package/package.json +6 -2
- package/skills/git-commit/SKILL.md +60 -0
- package/skills/using-skills/SKILL.md +62 -0
package/README.md
CHANGED
|
@@ -25,7 +25,18 @@ cruxy run # interactive session
|
|
|
25
25
|
## Features
|
|
26
26
|
|
|
27
27
|
- **Tools** — `read_file`, `write_file`, `edit_file`, `glob`, `list_files`,
|
|
28
|
-
`grep_files`, `run_command`, `git_status`, `apply_patch
|
|
28
|
+
`grep_files`, `run_command`, `git_status`, `apply_patch`, `search_codebase`,
|
|
29
|
+
`list_skills`, `load_skill`.
|
|
30
|
+
- **Codebase index** — a local, incremental semantic index (`cruxy index`)
|
|
31
|
+
behind the `search_codebase` tool. Embeddings run on-device via fastembed
|
|
32
|
+
(ONNX, bge-small-en-v1.5) with no network calls; the store is SQLite at
|
|
33
|
+
`.cruxy/index.db`. Only changed files are re-embedded; `.gitignore` /
|
|
34
|
+
`.cruxyignore` are respected and secrets (`.env*`, keys) are never indexed.
|
|
35
|
+
- **Skills** — reusable, task-specific instructions in a `SKILL.md`
|
|
36
|
+
(frontmatter + markdown), discovered via `list_skills` and pulled on demand
|
|
37
|
+
with `load_skill` (progressive disclosure: only name + description are in
|
|
38
|
+
context until a skill is loaded). Layered project > user > builtin, strictly
|
|
39
|
+
validated, and never auto-executed.
|
|
29
40
|
- **Agent** — streaming output, multi-turn interactive sessions, context
|
|
30
41
|
compaction, and awareness of git state and project instructions (`CRUXY.md`).
|
|
31
42
|
- **Safety** — a single approval gate with diff previews that fails closed;
|
|
@@ -36,6 +47,29 @@ Interactive slash commands: `/help`, `/clear`, `/compact`, `/reload`, `/exit`.
|
|
|
36
47
|
|
|
37
48
|
For unattended runs, pass `--yes` to approve every action.
|
|
38
49
|
|
|
50
|
+
### Codebase search
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
cruxy index # build / refresh the local index (.cruxy/index.db)
|
|
54
|
+
cruxy index --status # show what's indexed
|
|
55
|
+
cruxy index --force # re-embed every file
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
The index refreshes itself lazily the first time the agent calls
|
|
59
|
+
`search_codebase`, so it works even without running `cruxy index` first. The
|
|
60
|
+
bge-small embedding model (~130 MB) downloads and caches on first use.
|
|
61
|
+
|
|
62
|
+
### Skills
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
cruxy skills # list the resolved skill catalog
|
|
66
|
+
cruxy skills --status # show source directories and validation errors
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
Add a skill by creating `<name>/SKILL.md` under `.cruxy/skills/` (project) or
|
|
70
|
+
`~/.cruxy/skills/` (user); shipped builtins are the lowest layer. See the
|
|
71
|
+
built-in `using-skills` skill for the authoring rules.
|
|
72
|
+
|
|
39
73
|
The LLM client is [`@cruxy/sdk`](https://www.npmjs.com/package/@cruxy/sdk) —
|
|
40
74
|
provider-agnostic, built over `fetch`, with no vendor SDKs.
|
|
41
75
|
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import { Command } from "commander";
|
|
2
|
+
/**
|
|
3
|
+
* `cruxy index` — build or refresh the local codebase search index that backs
|
|
4
|
+
* the `search_codebase` tool. `--status` reports the current state without
|
|
5
|
+
* modifying it; `--force` re-embeds every file regardless of content hashes.
|
|
6
|
+
*/
|
|
7
|
+
export declare function indexCommand(): Command;
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import { existsSync } from "node:fs";
|
|
2
|
+
import { Command } from "commander";
|
|
3
|
+
import pc from "picocolors";
|
|
4
|
+
import { loadConfig } from "../../config/index.js";
|
|
5
|
+
import { getIndexService, indexDbPath, resetIndexServices, } from "../../indexing/index.js";
|
|
6
|
+
import { logger } from "../../utils/logger.js";
|
|
7
|
+
/**
|
|
8
|
+
* `cruxy index` — build or refresh the local codebase search index that backs
|
|
9
|
+
* the `search_codebase` tool. `--status` reports the current state without
|
|
10
|
+
* modifying it; `--force` re-embeds every file regardless of content hashes.
|
|
11
|
+
*/
|
|
12
|
+
export function indexCommand() {
|
|
13
|
+
return new Command("index")
|
|
14
|
+
.description("build or refresh the local codebase search index")
|
|
15
|
+
.option("--status", "show index status without building")
|
|
16
|
+
.option("--force", "re-embed every file, ignoring content hashes")
|
|
17
|
+
.action(async (opts) => {
|
|
18
|
+
const { config } = loadConfig();
|
|
19
|
+
if (!config.index.enabled) {
|
|
20
|
+
logger.warn("codebase indexing is disabled (index.enabled = false)");
|
|
21
|
+
return;
|
|
22
|
+
}
|
|
23
|
+
const cwd = process.cwd();
|
|
24
|
+
if (opts.status) {
|
|
25
|
+
// Don't create an empty DB just to report "no index built yet".
|
|
26
|
+
if (config.index.store !== "memory" && !existsSync(indexDbPath(cwd))) {
|
|
27
|
+
logger.print(`${pc.bold("index:")} ${pc.dim("not built yet")} — run ${pc.bold("cruxy index")}`);
|
|
28
|
+
return;
|
|
29
|
+
}
|
|
30
|
+
const service = await getIndexService(cwd, config, logger);
|
|
31
|
+
printStatus(service.status());
|
|
32
|
+
await resetIndexServices();
|
|
33
|
+
return;
|
|
34
|
+
}
|
|
35
|
+
logger.info(pc.dim(`indexing ${cwd} …`));
|
|
36
|
+
logger.info(pc.dim("(first run may download the local embedding model)"));
|
|
37
|
+
const service = await getIndexService(cwd, config, logger);
|
|
38
|
+
try {
|
|
39
|
+
const stats = await service.index({ force: opts.force });
|
|
40
|
+
logger.print(`${pc.green("indexed")} ${stats.filesIndexed} file(s), ${stats.chunksIndexed} chunk(s) ` +
|
|
41
|
+
pc.dim(`(${stats.filesSkipped} unchanged, ${stats.filesPurged} purged, ${stats.durationMs}ms)`));
|
|
42
|
+
}
|
|
43
|
+
catch (err) {
|
|
44
|
+
logger.error(`indexing failed: ${err.message}`);
|
|
45
|
+
process.exitCode = 1;
|
|
46
|
+
}
|
|
47
|
+
finally {
|
|
48
|
+
await resetIndexServices();
|
|
49
|
+
}
|
|
50
|
+
});
|
|
51
|
+
}
|
|
52
|
+
function printStatus(status) {
|
|
53
|
+
logger.print(`${pc.bold("index:")} ${status.exists ? pc.green("built") : pc.yellow("empty")}`);
|
|
54
|
+
logger.print(`${pc.bold("store:")} ${status.storePath ?? "(in-memory)"}`);
|
|
55
|
+
logger.print(`${pc.bold("embedder:")} ${status.embedderId ?? pc.dim("(none)")}` +
|
|
56
|
+
(status.dim ? pc.dim(` · ${status.dim}d`) : ""));
|
|
57
|
+
logger.print(`${pc.bold("files:")} ${status.files}`);
|
|
58
|
+
logger.print(`${pc.bold("chunks:")} ${status.chunks}`);
|
|
59
|
+
}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import { Command } from "commander";
|
|
2
|
+
/**
|
|
3
|
+
* `cruxy skills` — list the skills available to the agent (the same catalog the
|
|
4
|
+
* `list_skills` tool sees). `--status` additionally shows the three source
|
|
5
|
+
* directories in precedence order and any validation errors (skills that were
|
|
6
|
+
* excluded for being malformed).
|
|
7
|
+
*/
|
|
8
|
+
export declare function skillsCommand(): Command;
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import { Command } from "commander";
|
|
2
|
+
import pc from "picocolors";
|
|
3
|
+
import { getSkillService, resetSkillServices } from "../../skills/index.js";
|
|
4
|
+
import { logger } from "../../utils/logger.js";
|
|
5
|
+
/**
|
|
6
|
+
* `cruxy skills` — list the skills available to the agent (the same catalog the
|
|
7
|
+
* `list_skills` tool sees). `--status` additionally shows the three source
|
|
8
|
+
* directories in precedence order and any validation errors (skills that were
|
|
9
|
+
* excluded for being malformed).
|
|
10
|
+
*/
|
|
11
|
+
export function skillsCommand() {
|
|
12
|
+
return new Command("skills")
|
|
13
|
+
.description("list the skills available to the agent")
|
|
14
|
+
.option("--status", "show source directories and validation errors")
|
|
15
|
+
.action(async (opts) => {
|
|
16
|
+
const service = getSkillService(process.cwd(), logger);
|
|
17
|
+
const status = await service.status();
|
|
18
|
+
if (status.entries.length === 0) {
|
|
19
|
+
logger.print(pc.dim("no skills found"));
|
|
20
|
+
}
|
|
21
|
+
else {
|
|
22
|
+
for (const entry of status.entries) {
|
|
23
|
+
logger.print(`${pc.bold(entry.name)} ${pc.dim(`[${entry.source}]`)}`);
|
|
24
|
+
logger.print(` ${entry.description}`);
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
if (!opts.status) {
|
|
28
|
+
if (status.errors.length > 0) {
|
|
29
|
+
logger.print(pc.yellow(`\n${status.errors.length} skill(s) failed validation — run ${pc.bold("cruxy skills --status")} for details`));
|
|
30
|
+
}
|
|
31
|
+
resetSkillServices();
|
|
32
|
+
return;
|
|
33
|
+
}
|
|
34
|
+
logger.print(`\n${pc.bold("sources")} ${pc.dim("(precedence high → low):")}`);
|
|
35
|
+
for (const { source, dir } of status.sources) {
|
|
36
|
+
logger.print(` ${source.padEnd(8)} ${pc.dim(dir)}`);
|
|
37
|
+
}
|
|
38
|
+
logger.print("");
|
|
39
|
+
if (status.errors.length === 0) {
|
|
40
|
+
logger.print(pc.green("no validation errors"));
|
|
41
|
+
}
|
|
42
|
+
else {
|
|
43
|
+
logger.print(pc.yellow(`validation errors (${status.errors.length}):`));
|
|
44
|
+
for (const err of status.errors) {
|
|
45
|
+
logger.print(` ${pc.red("✗")} ${pc.bold(err.name)} ${pc.dim(`[${err.source}]`)}: ${err.message}`);
|
|
46
|
+
logger.print(` ${pc.dim(err.dir)}`);
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
resetSkillServices();
|
|
50
|
+
});
|
|
51
|
+
}
|
package/dist/cli/program.js
CHANGED
|
@@ -4,6 +4,8 @@ import { APP_NAME, APP_VERSION, APP_DESCRIPTION } from "../constants.js";
|
|
|
4
4
|
import { logger } from "../utils/logger.js";
|
|
5
5
|
import { runCommand } from "./commands/run.js";
|
|
6
6
|
import { configCommand } from "./commands/config.js";
|
|
7
|
+
import { indexCommand } from "./commands/index.js";
|
|
8
|
+
import { skillsCommand } from "./commands/skills.js";
|
|
7
9
|
export function buildProgram() {
|
|
8
10
|
const program = new Command();
|
|
9
11
|
program
|
|
@@ -24,6 +26,8 @@ export function buildProgram() {
|
|
|
24
26
|
});
|
|
25
27
|
program.addCommand(runCommand());
|
|
26
28
|
program.addCommand(configCommand());
|
|
29
|
+
program.addCommand(indexCommand());
|
|
30
|
+
program.addCommand(skillsCommand());
|
|
27
31
|
// Default action: no subcommand -> interactive entrypoint (stub for now).
|
|
28
32
|
program.action(() => {
|
|
29
33
|
logger.print(pc.cyan(`${APP_NAME} v${APP_VERSION}`));
|
package/dist/config/schema.d.ts
CHANGED
|
@@ -97,6 +97,93 @@ export declare const ApprovalConfigSchema: z.ZodObject<{
|
|
|
97
97
|
}, {
|
|
98
98
|
mode?: "auto" | "prompt" | undefined;
|
|
99
99
|
}>;
|
|
100
|
+
/**
|
|
101
|
+
* Codebase semantic index (C.17): the local embedding index that backs the
|
|
102
|
+
* `search_codebase` tool and `cruxy index`. Every field is defaulted so the
|
|
103
|
+
* feature works with zero configuration.
|
|
104
|
+
*/
|
|
105
|
+
export declare const IndexConfigSchema: z.ZodObject<{
|
|
106
|
+
/** Master switch for `search_codebase` and `cruxy index`. */
|
|
107
|
+
enabled: z.ZodDefault<z.ZodBoolean>;
|
|
108
|
+
/**
|
|
109
|
+
* Embedding backend. Only `fastembed` (bge-small-en-v1.5, local ONNX; the
|
|
110
|
+
* model downloads and caches on first use) is selectable — if it cannot be
|
|
111
|
+
* loaded, indexing fails loudly rather than degrading. (The deterministic
|
|
112
|
+
* hashing embedder exists for tests and is injected directly, never chosen
|
|
113
|
+
* here.)
|
|
114
|
+
*/
|
|
115
|
+
embedder: z.ZodDefault<z.ZodEnum<["fastembed"]>>;
|
|
116
|
+
/**
|
|
117
|
+
* Vector store backend. `sqlite` persists to `.cruxy/index.db` via
|
|
118
|
+
* better-sqlite3; `memory` is ephemeral (tests). `auto` prefers sqlite and
|
|
119
|
+
* falls back to memory when the native dependency cannot be loaded.
|
|
120
|
+
*/
|
|
121
|
+
store: z.ZodDefault<z.ZodEnum<["auto", "sqlite", "memory"]>>;
|
|
122
|
+
/** Hard per-file size cap, in bytes; larger files are skipped entirely. */
|
|
123
|
+
maxFileBytes: z.ZodDefault<z.ZodNumber>;
|
|
124
|
+
chunk: z.ZodEffects<z.ZodDefault<z.ZodObject<{
|
|
125
|
+
/** Target chunk window, in lines. */
|
|
126
|
+
windowLines: z.ZodDefault<z.ZodNumber>;
|
|
127
|
+
/** Lines shared between consecutive chunks; must be < windowLines. */
|
|
128
|
+
overlapLines: z.ZodDefault<z.ZodNumber>;
|
|
129
|
+
}, "strict", z.ZodTypeAny, {
|
|
130
|
+
windowLines: number;
|
|
131
|
+
overlapLines: number;
|
|
132
|
+
}, {
|
|
133
|
+
windowLines?: number | undefined;
|
|
134
|
+
overlapLines?: number | undefined;
|
|
135
|
+
}>>, {
|
|
136
|
+
windowLines: number;
|
|
137
|
+
overlapLines: number;
|
|
138
|
+
}, {
|
|
139
|
+
windowLines?: number | undefined;
|
|
140
|
+
overlapLines?: number | undefined;
|
|
141
|
+
} | undefined>;
|
|
142
|
+
search: z.ZodDefault<z.ZodObject<{
|
|
143
|
+
/** Number of hits returned when the tool omits `k`. */
|
|
144
|
+
defaultK: z.ZodDefault<z.ZodNumber>;
|
|
145
|
+
/** Approximate token budget for combined snippets (chars/4 heuristic). */
|
|
146
|
+
tokenBudget: z.ZodDefault<z.ZodNumber>;
|
|
147
|
+
/** Maximum lines kept in any single returned snippet. */
|
|
148
|
+
maxSnippetLines: z.ZodDefault<z.ZodNumber>;
|
|
149
|
+
}, "strict", z.ZodTypeAny, {
|
|
150
|
+
defaultK: number;
|
|
151
|
+
tokenBudget: number;
|
|
152
|
+
maxSnippetLines: number;
|
|
153
|
+
}, {
|
|
154
|
+
defaultK?: number | undefined;
|
|
155
|
+
tokenBudget?: number | undefined;
|
|
156
|
+
maxSnippetLines?: number | undefined;
|
|
157
|
+
}>>;
|
|
158
|
+
}, "strict", z.ZodTypeAny, {
|
|
159
|
+
search: {
|
|
160
|
+
defaultK: number;
|
|
161
|
+
tokenBudget: number;
|
|
162
|
+
maxSnippetLines: number;
|
|
163
|
+
};
|
|
164
|
+
enabled: boolean;
|
|
165
|
+
embedder: "fastembed";
|
|
166
|
+
store: "auto" | "sqlite" | "memory";
|
|
167
|
+
maxFileBytes: number;
|
|
168
|
+
chunk: {
|
|
169
|
+
windowLines: number;
|
|
170
|
+
overlapLines: number;
|
|
171
|
+
};
|
|
172
|
+
}, {
|
|
173
|
+
search?: {
|
|
174
|
+
defaultK?: number | undefined;
|
|
175
|
+
tokenBudget?: number | undefined;
|
|
176
|
+
maxSnippetLines?: number | undefined;
|
|
177
|
+
} | undefined;
|
|
178
|
+
enabled?: boolean | undefined;
|
|
179
|
+
embedder?: "fastembed" | undefined;
|
|
180
|
+
store?: "auto" | "sqlite" | "memory" | undefined;
|
|
181
|
+
maxFileBytes?: number | undefined;
|
|
182
|
+
chunk?: {
|
|
183
|
+
windowLines?: number | undefined;
|
|
184
|
+
overlapLines?: number | undefined;
|
|
185
|
+
} | undefined;
|
|
186
|
+
}>;
|
|
100
187
|
/** MCP server entry — stdio or URL transport (wired up in a later phase). */
|
|
101
188
|
export declare const McpServerSchema: z.ZodObject<{
|
|
102
189
|
command: z.ZodOptional<z.ZodString>;
|
|
@@ -203,6 +290,88 @@ export declare const CruxyConfigSchema: z.ZodObject<{
|
|
|
203
290
|
}, {
|
|
204
291
|
mode?: "auto" | "prompt" | undefined;
|
|
205
292
|
}>>;
|
|
293
|
+
index: z.ZodDefault<z.ZodObject<{
|
|
294
|
+
/** Master switch for `search_codebase` and `cruxy index`. */
|
|
295
|
+
enabled: z.ZodDefault<z.ZodBoolean>;
|
|
296
|
+
/**
|
|
297
|
+
* Embedding backend. Only `fastembed` (bge-small-en-v1.5, local ONNX; the
|
|
298
|
+
* model downloads and caches on first use) is selectable — if it cannot be
|
|
299
|
+
* loaded, indexing fails loudly rather than degrading. (The deterministic
|
|
300
|
+
* hashing embedder exists for tests and is injected directly, never chosen
|
|
301
|
+
* here.)
|
|
302
|
+
*/
|
|
303
|
+
embedder: z.ZodDefault<z.ZodEnum<["fastembed"]>>;
|
|
304
|
+
/**
|
|
305
|
+
* Vector store backend. `sqlite` persists to `.cruxy/index.db` via
|
|
306
|
+
* better-sqlite3; `memory` is ephemeral (tests). `auto` prefers sqlite and
|
|
307
|
+
* falls back to memory when the native dependency cannot be loaded.
|
|
308
|
+
*/
|
|
309
|
+
store: z.ZodDefault<z.ZodEnum<["auto", "sqlite", "memory"]>>;
|
|
310
|
+
/** Hard per-file size cap, in bytes; larger files are skipped entirely. */
|
|
311
|
+
maxFileBytes: z.ZodDefault<z.ZodNumber>;
|
|
312
|
+
chunk: z.ZodEffects<z.ZodDefault<z.ZodObject<{
|
|
313
|
+
/** Target chunk window, in lines. */
|
|
314
|
+
windowLines: z.ZodDefault<z.ZodNumber>;
|
|
315
|
+
/** Lines shared between consecutive chunks; must be < windowLines. */
|
|
316
|
+
overlapLines: z.ZodDefault<z.ZodNumber>;
|
|
317
|
+
}, "strict", z.ZodTypeAny, {
|
|
318
|
+
windowLines: number;
|
|
319
|
+
overlapLines: number;
|
|
320
|
+
}, {
|
|
321
|
+
windowLines?: number | undefined;
|
|
322
|
+
overlapLines?: number | undefined;
|
|
323
|
+
}>>, {
|
|
324
|
+
windowLines: number;
|
|
325
|
+
overlapLines: number;
|
|
326
|
+
}, {
|
|
327
|
+
windowLines?: number | undefined;
|
|
328
|
+
overlapLines?: number | undefined;
|
|
329
|
+
} | undefined>;
|
|
330
|
+
search: z.ZodDefault<z.ZodObject<{
|
|
331
|
+
/** Number of hits returned when the tool omits `k`. */
|
|
332
|
+
defaultK: z.ZodDefault<z.ZodNumber>;
|
|
333
|
+
/** Approximate token budget for combined snippets (chars/4 heuristic). */
|
|
334
|
+
tokenBudget: z.ZodDefault<z.ZodNumber>;
|
|
335
|
+
/** Maximum lines kept in any single returned snippet. */
|
|
336
|
+
maxSnippetLines: z.ZodDefault<z.ZodNumber>;
|
|
337
|
+
}, "strict", z.ZodTypeAny, {
|
|
338
|
+
defaultK: number;
|
|
339
|
+
tokenBudget: number;
|
|
340
|
+
maxSnippetLines: number;
|
|
341
|
+
}, {
|
|
342
|
+
defaultK?: number | undefined;
|
|
343
|
+
tokenBudget?: number | undefined;
|
|
344
|
+
maxSnippetLines?: number | undefined;
|
|
345
|
+
}>>;
|
|
346
|
+
}, "strict", z.ZodTypeAny, {
|
|
347
|
+
search: {
|
|
348
|
+
defaultK: number;
|
|
349
|
+
tokenBudget: number;
|
|
350
|
+
maxSnippetLines: number;
|
|
351
|
+
};
|
|
352
|
+
enabled: boolean;
|
|
353
|
+
embedder: "fastembed";
|
|
354
|
+
store: "auto" | "sqlite" | "memory";
|
|
355
|
+
maxFileBytes: number;
|
|
356
|
+
chunk: {
|
|
357
|
+
windowLines: number;
|
|
358
|
+
overlapLines: number;
|
|
359
|
+
};
|
|
360
|
+
}, {
|
|
361
|
+
search?: {
|
|
362
|
+
defaultK?: number | undefined;
|
|
363
|
+
tokenBudget?: number | undefined;
|
|
364
|
+
maxSnippetLines?: number | undefined;
|
|
365
|
+
} | undefined;
|
|
366
|
+
enabled?: boolean | undefined;
|
|
367
|
+
embedder?: "fastembed" | undefined;
|
|
368
|
+
store?: "auto" | "sqlite" | "memory" | undefined;
|
|
369
|
+
maxFileBytes?: number | undefined;
|
|
370
|
+
chunk?: {
|
|
371
|
+
windowLines?: number | undefined;
|
|
372
|
+
overlapLines?: number | undefined;
|
|
373
|
+
} | undefined;
|
|
374
|
+
}>>;
|
|
206
375
|
mcpServers: z.ZodDefault<z.ZodRecord<z.ZodString, z.ZodObject<{
|
|
207
376
|
command: z.ZodOptional<z.ZodString>;
|
|
208
377
|
args: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
|
|
@@ -251,6 +420,21 @@ export declare const CruxyConfigSchema: z.ZodObject<{
|
|
|
251
420
|
approval: {
|
|
252
421
|
mode: "auto" | "prompt";
|
|
253
422
|
};
|
|
423
|
+
index: {
|
|
424
|
+
search: {
|
|
425
|
+
defaultK: number;
|
|
426
|
+
tokenBudget: number;
|
|
427
|
+
maxSnippetLines: number;
|
|
428
|
+
};
|
|
429
|
+
enabled: boolean;
|
|
430
|
+
embedder: "fastembed";
|
|
431
|
+
store: "auto" | "sqlite" | "memory";
|
|
432
|
+
maxFileBytes: number;
|
|
433
|
+
chunk: {
|
|
434
|
+
windowLines: number;
|
|
435
|
+
overlapLines: number;
|
|
436
|
+
};
|
|
437
|
+
};
|
|
254
438
|
mcpServers: Record<string, {
|
|
255
439
|
command?: string | undefined;
|
|
256
440
|
args?: string[] | undefined;
|
|
@@ -291,6 +475,21 @@ export declare const CruxyConfigSchema: z.ZodObject<{
|
|
|
291
475
|
approval?: {
|
|
292
476
|
mode?: "auto" | "prompt" | undefined;
|
|
293
477
|
} | undefined;
|
|
478
|
+
index?: {
|
|
479
|
+
search?: {
|
|
480
|
+
defaultK?: number | undefined;
|
|
481
|
+
tokenBudget?: number | undefined;
|
|
482
|
+
maxSnippetLines?: number | undefined;
|
|
483
|
+
} | undefined;
|
|
484
|
+
enabled?: boolean | undefined;
|
|
485
|
+
embedder?: "fastembed" | undefined;
|
|
486
|
+
store?: "auto" | "sqlite" | "memory" | undefined;
|
|
487
|
+
maxFileBytes?: number | undefined;
|
|
488
|
+
chunk?: {
|
|
489
|
+
windowLines?: number | undefined;
|
|
490
|
+
overlapLines?: number | undefined;
|
|
491
|
+
} | undefined;
|
|
492
|
+
} | undefined;
|
|
294
493
|
mcpServers?: Record<string, {
|
|
295
494
|
command?: string | undefined;
|
|
296
495
|
args?: string[] | undefined;
|
package/dist/config/schema.js
CHANGED
|
@@ -72,6 +72,60 @@ export const ApprovalConfigSchema = z
|
|
|
72
72
|
mode: z.enum(["prompt", "auto"]).default("prompt"),
|
|
73
73
|
})
|
|
74
74
|
.strict();
|
|
75
|
+
/**
|
|
76
|
+
* Codebase semantic index (C.17): the local embedding index that backs the
|
|
77
|
+
* `search_codebase` tool and `cruxy index`. Every field is defaulted so the
|
|
78
|
+
* feature works with zero configuration.
|
|
79
|
+
*/
|
|
80
|
+
export const IndexConfigSchema = z
|
|
81
|
+
.object({
|
|
82
|
+
/** Master switch for `search_codebase` and `cruxy index`. */
|
|
83
|
+
enabled: z.boolean().default(true),
|
|
84
|
+
/**
|
|
85
|
+
* Embedding backend. Only `fastembed` (bge-small-en-v1.5, local ONNX; the
|
|
86
|
+
* model downloads and caches on first use) is selectable — if it cannot be
|
|
87
|
+
* loaded, indexing fails loudly rather than degrading. (The deterministic
|
|
88
|
+
* hashing embedder exists for tests and is injected directly, never chosen
|
|
89
|
+
* here.)
|
|
90
|
+
*/
|
|
91
|
+
embedder: z.enum(["fastembed"]).default("fastembed"),
|
|
92
|
+
/**
|
|
93
|
+
* Vector store backend. `sqlite` persists to `.cruxy/index.db` via
|
|
94
|
+
* better-sqlite3; `memory` is ephemeral (tests). `auto` prefers sqlite and
|
|
95
|
+
* falls back to memory when the native dependency cannot be loaded.
|
|
96
|
+
*/
|
|
97
|
+
store: z.enum(["auto", "sqlite", "memory"]).default("auto"),
|
|
98
|
+
/** Hard per-file size cap, in bytes; larger files are skipped entirely. */
|
|
99
|
+
maxFileBytes: z
|
|
100
|
+
.number()
|
|
101
|
+
.int()
|
|
102
|
+
.positive()
|
|
103
|
+
.default(1024 * 1024),
|
|
104
|
+
chunk: z
|
|
105
|
+
.object({
|
|
106
|
+
/** Target chunk window, in lines. */
|
|
107
|
+
windowLines: z.number().int().positive().default(60),
|
|
108
|
+
/** Lines shared between consecutive chunks; must be < windowLines. */
|
|
109
|
+
overlapLines: z.number().int().nonnegative().default(15),
|
|
110
|
+
})
|
|
111
|
+
.strict()
|
|
112
|
+
.default({})
|
|
113
|
+
.refine((c) => c.overlapLines < c.windowLines, {
|
|
114
|
+
message: "index.chunk.overlapLines must be less than windowLines",
|
|
115
|
+
}),
|
|
116
|
+
search: z
|
|
117
|
+
.object({
|
|
118
|
+
/** Number of hits returned when the tool omits `k`. */
|
|
119
|
+
defaultK: z.number().int().positive().default(8),
|
|
120
|
+
/** Approximate token budget for combined snippets (chars/4 heuristic). */
|
|
121
|
+
tokenBudget: z.number().int().positive().default(1500),
|
|
122
|
+
/** Maximum lines kept in any single returned snippet. */
|
|
123
|
+
maxSnippetLines: z.number().int().positive().default(40),
|
|
124
|
+
})
|
|
125
|
+
.strict()
|
|
126
|
+
.default({}),
|
|
127
|
+
})
|
|
128
|
+
.strict();
|
|
75
129
|
/** MCP server entry — stdio or URL transport (wired up in a later phase). */
|
|
76
130
|
export const McpServerSchema = z
|
|
77
131
|
.object({
|
|
@@ -90,6 +144,7 @@ export const CruxyConfigSchema = z
|
|
|
90
144
|
shell: ShellConfigSchema.default({}),
|
|
91
145
|
context: ContextConfigSchema.default({}),
|
|
92
146
|
approval: ApprovalConfigSchema.default({}),
|
|
147
|
+
index: IndexConfigSchema.default({}),
|
|
93
148
|
mcpServers: z.record(z.string(), McpServerSchema).default({}),
|
|
94
149
|
logLevel: z.enum(LOG_LEVELS).default("info"),
|
|
95
150
|
})
|
package/dist/constants.d.ts
CHANGED
|
@@ -9,3 +9,16 @@ export declare const CONFIG_FILE_NAME = "config.json";
|
|
|
9
9
|
export declare const PROJECT_CONFIG_FILENAMES: string[];
|
|
10
10
|
/** Project-instruction filenames, checked in order (first match wins). */
|
|
11
11
|
export declare const PROJECT_INSTRUCTION_FILENAMES: string[];
|
|
12
|
+
/**
|
|
13
|
+
* Name of the skills subdirectory, used under the project dir
|
|
14
|
+
* (`<cwd>/.cruxy/skills`), the global dir (`~/.cruxy/skills`), and the package
|
|
15
|
+
* root for shipped builtins.
|
|
16
|
+
*/
|
|
17
|
+
export declare const SKILLS_DIR_NAME = "skills";
|
|
18
|
+
/**
|
|
19
|
+
* Absolute path of the shipped builtin skills directory (`<pkg>/skills`).
|
|
20
|
+
* Anchored the same way as the package.json lookup above: both `dist/` and
|
|
21
|
+
* `src/` sit one level below the package root, so `../skills` resolves to the
|
|
22
|
+
* shipped `skills/` directory in dev, in `dist`, and when published.
|
|
23
|
+
*/
|
|
24
|
+
export declare const BUILTIN_SKILLS_DIR: string;
|
package/dist/constants.js
CHANGED
|
@@ -29,3 +29,16 @@ export const PROJECT_CONFIG_FILENAMES = [
|
|
|
29
29
|
];
|
|
30
30
|
/** Project-instruction filenames, checked in order (first match wins). */
|
|
31
31
|
export const PROJECT_INSTRUCTION_FILENAMES = ["CRUXY.md", "AGENTS.md"];
|
|
32
|
+
/**
|
|
33
|
+
* Name of the skills subdirectory, used under the project dir
|
|
34
|
+
* (`<cwd>/.cruxy/skills`), the global dir (`~/.cruxy/skills`), and the package
|
|
35
|
+
* root for shipped builtins.
|
|
36
|
+
*/
|
|
37
|
+
export const SKILLS_DIR_NAME = "skills";
|
|
38
|
+
/**
|
|
39
|
+
* Absolute path of the shipped builtin skills directory (`<pkg>/skills`).
|
|
40
|
+
* Anchored the same way as the package.json lookup above: both `dist/` and
|
|
41
|
+
* `src/` sit one level below the package root, so `../skills` resolves to the
|
|
42
|
+
* shipped `skills/` directory in dev, in `dist`, and when published.
|
|
43
|
+
*/
|
|
44
|
+
export const BUILTIN_SKILLS_DIR = join(__dirname, "..", SKILLS_DIR_NAME);
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import type { Chunk } from "./types.js";
|
|
2
|
+
/** Knobs for {@link chunkFile}. */
|
|
3
|
+
export interface ChunkOptions {
|
|
4
|
+
/** Target window size, in lines. */
|
|
5
|
+
windowLines: number;
|
|
6
|
+
/** Lines shared between consecutive windows (clamped to < windowLines). */
|
|
7
|
+
overlapLines: number;
|
|
8
|
+
/**
|
|
9
|
+
* How far (in lines) the window end may slide to land on a blank-line
|
|
10
|
+
* boundary. Keeps chunks aligned to natural gaps without drifting too far
|
|
11
|
+
* from the target size.
|
|
12
|
+
*/
|
|
13
|
+
snapSlack?: number;
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* Split a file's text into overlapping line windows.
|
|
17
|
+
*
|
|
18
|
+
* Windows are `windowLines` long and advance by `windowLines - overlapLines`, so
|
|
19
|
+
* consecutive chunks share `overlapLines` lines (context that would otherwise be
|
|
20
|
+
* severed at a hard cut). Where possible the window end is nudged — within
|
|
21
|
+
* `snapSlack` lines — to fall right after a blank line, so chunks break at
|
|
22
|
+
* natural boundaries (between functions, paragraphs, etc.).
|
|
23
|
+
*
|
|
24
|
+
* Line numbers are 1-based and inclusive. A whitespace-only (or empty) file
|
|
25
|
+
* yields no chunks. A file shorter than one window yields exactly one chunk.
|
|
26
|
+
* Coverage is gap-free: every source line belongs to at least one chunk.
|
|
27
|
+
*/
|
|
28
|
+
export declare function chunkFile(path: string, text: string, opts: ChunkOptions): Chunk[];
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
const DEFAULT_SNAP_SLACK = 8;
|
|
2
|
+
/**
|
|
3
|
+
* Split a file's text into overlapping line windows.
|
|
4
|
+
*
|
|
5
|
+
* Windows are `windowLines` long and advance by `windowLines - overlapLines`, so
|
|
6
|
+
* consecutive chunks share `overlapLines` lines (context that would otherwise be
|
|
7
|
+
* severed at a hard cut). Where possible the window end is nudged — within
|
|
8
|
+
* `snapSlack` lines — to fall right after a blank line, so chunks break at
|
|
9
|
+
* natural boundaries (between functions, paragraphs, etc.).
|
|
10
|
+
*
|
|
11
|
+
* Line numbers are 1-based and inclusive. A whitespace-only (or empty) file
|
|
12
|
+
* yields no chunks. A file shorter than one window yields exactly one chunk.
|
|
13
|
+
* Coverage is gap-free: every source line belongs to at least one chunk.
|
|
14
|
+
*/
|
|
15
|
+
export function chunkFile(path, text, opts) {
|
|
16
|
+
if (text.trim() === "")
|
|
17
|
+
return [];
|
|
18
|
+
const lines = text.split("\n");
|
|
19
|
+
// A trailing newline produces a phantom empty final element; drop it so line
|
|
20
|
+
// counts match what an editor shows.
|
|
21
|
+
if (lines.length > 1 && lines[lines.length - 1] === "")
|
|
22
|
+
lines.pop();
|
|
23
|
+
const n = lines.length;
|
|
24
|
+
const window = Math.max(1, Math.floor(opts.windowLines));
|
|
25
|
+
const overlap = Math.min(Math.max(0, Math.floor(opts.overlapLines)), window - 1);
|
|
26
|
+
const slack = Math.max(0, opts.snapSlack ?? DEFAULT_SNAP_SLACK);
|
|
27
|
+
const chunks = [];
|
|
28
|
+
let start = 0; // 0-based, inclusive
|
|
29
|
+
while (start < n) {
|
|
30
|
+
let end = Math.min(start + window, n); // 0-based, exclusive
|
|
31
|
+
// Snap the end to a nearby blank-line boundary (the last included line is
|
|
32
|
+
// blank), unless we've already reached EOF.
|
|
33
|
+
if (end < n && slack > 0) {
|
|
34
|
+
const lo = Math.max(start + 1, end - slack);
|
|
35
|
+
const hi = Math.min(n, end + slack);
|
|
36
|
+
let best = -1;
|
|
37
|
+
let bestDist = Infinity;
|
|
38
|
+
for (let e = lo; e <= hi; e++) {
|
|
39
|
+
const endsOnBlank = e === n || lines[e - 1].trim() === "";
|
|
40
|
+
if (!endsOnBlank)
|
|
41
|
+
continue;
|
|
42
|
+
const dist = Math.abs(e - end);
|
|
43
|
+
if (dist < bestDist) {
|
|
44
|
+
bestDist = dist;
|
|
45
|
+
best = e;
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
if (best !== -1)
|
|
49
|
+
end = best;
|
|
50
|
+
}
|
|
51
|
+
chunks.push({
|
|
52
|
+
path,
|
|
53
|
+
startLine: start + 1,
|
|
54
|
+
endLine: end,
|
|
55
|
+
text: lines.slice(start, end).join("\n"),
|
|
56
|
+
});
|
|
57
|
+
if (end >= n)
|
|
58
|
+
break;
|
|
59
|
+
// Advance by the window stride, but never regress (a short snapped window
|
|
60
|
+
// could otherwise leave nextStart <= start).
|
|
61
|
+
const nextStart = end - overlap;
|
|
62
|
+
start = nextStart > start ? nextStart : end;
|
|
63
|
+
}
|
|
64
|
+
return chunks;
|
|
65
|
+
}
|