reasonix 0.8.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/{chunk-DVBNMXA6.js → chunk-WRG56OKI.js} +130 -2
- package/dist/cli/chunk-WRG56OKI.js.map +1 -0
- package/dist/cli/index.js +692 -209
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/{prompt-POARCKKR.js → prompt-LJ44NWSU.js} +2 -2
- package/dist/index.d.ts +33 -13
- package/dist/index.js +133 -5
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/dist/cli/chunk-DVBNMXA6.js.map +0 -1
- /package/dist/cli/{prompt-POARCKKR.js.map → prompt-LJ44NWSU.js.map} +0 -0
|
@@ -2,9 +2,9 @@
|
|
|
2
2
|
import {
|
|
3
3
|
CODE_SYSTEM_PROMPT,
|
|
4
4
|
codeSystemPrompt
|
|
5
|
-
} from "./chunk-
|
|
5
|
+
} from "./chunk-WRG56OKI.js";
|
|
6
6
|
export {
|
|
7
7
|
CODE_SYSTEM_PROMPT,
|
|
8
8
|
codeSystemPrompt
|
|
9
9
|
};
|
|
10
|
-
//# sourceMappingURL=prompt-
|
|
10
|
+
//# sourceMappingURL=prompt-LJ44NWSU.js.map
|
package/dist/index.d.ts
CHANGED
|
@@ -1622,12 +1622,12 @@ declare function applyUserMemory(basePrompt: string, opts?: {
|
|
|
1622
1622
|
projectRoot?: string;
|
|
1623
1623
|
}): string;
|
|
1624
1624
|
/**
|
|
1625
|
-
* Compose every lazy-loaded prefix block in one call: REASONIX.md,
|
|
1626
|
-
*
|
|
1627
|
-
*
|
|
1628
|
-
*
|
|
1629
|
-
*
|
|
1630
|
-
* filesystem state.
|
|
1625
|
+
* Compose every lazy-loaded prefix block in one call: project REASONIX.md,
|
|
1626
|
+
* global REASONIX.md (`#g` destination), user memory indexes (global +
|
|
1627
|
+
* per-project), and the skills index. Drop-in replacement for
|
|
1628
|
+
* `applyProjectMemory` at CLI entry points. Stacking order is stable —
|
|
1629
|
+
* the prefix hash only changes when block *content* changes, not when
|
|
1630
|
+
* this helper is called a second time with the same filesystem state.
|
|
1631
1631
|
*/
|
|
1632
1632
|
declare function applyMemoryStack(basePrompt: string, rootDir: string): string;
|
|
1633
1633
|
|
|
@@ -2214,8 +2214,14 @@ interface ShellToolsOptions {
|
|
|
2214
2214
|
* When true, skip the allowlist entirely and auto-run every command.
|
|
2215
2215
|
* Off by default — this is an escape hatch for non-interactive use
|
|
2216
2216
|
* (CI, benchmarks) where a human can't be in the loop to confirm.
|
|
2217
|
+
*
|
|
2218
|
+
* Accepts either a static boolean (captured once) or a getter called
|
|
2219
|
+
* on every dispatch. The getter form is what `reasonix code` uses to
|
|
2220
|
+
* wire `editMode === "yolo"` into the registry: flipping the mode
|
|
2221
|
+
* mid-session must take effect on the next tool call without forcing
|
|
2222
|
+
* a re-registration. Static `true` is fine for CI / benchmark code.
|
|
2217
2223
|
*/
|
|
2218
|
-
allowAll?: boolean;
|
|
2224
|
+
allowAll?: boolean | (() => boolean);
|
|
2219
2225
|
/**
|
|
2220
2226
|
* Background-process registry shared between `run_background`,
|
|
2221
2227
|
* `job_output`, `stop_job`, `list_jobs`, and the /jobs /kill slashes.
|
|
@@ -3452,13 +3458,27 @@ declare function codeSystemPrompt(rootDir: string): string;
|
|
|
3452
3458
|
/** One of the preset bundles (model + harvest + branch combo). */
|
|
3453
3459
|
type PresetName = "fast" | "smart" | "max";
|
|
3454
3460
|
/**
|
|
3455
|
-
* How `reasonix code` handles model-issued
|
|
3456
|
-
*
|
|
3457
|
-
*
|
|
3458
|
-
*
|
|
3459
|
-
*
|
|
3461
|
+
* How `reasonix code` handles model-issued tool calls. Two axes folded
|
|
3462
|
+
* into one enum because users think about "how trusting am I right now?"
|
|
3463
|
+
* as a single dial, not as "writes vs shell" pairs.
|
|
3464
|
+
*
|
|
3465
|
+
* - "review" — queue edits into pendingEdits (user /apply or `y` to
|
|
3466
|
+
* commit); shell commands NOT on the read-only allowlist
|
|
3467
|
+
* hit ShellConfirm. Default.
|
|
3468
|
+
* - "auto" — apply edits immediately, snapshot for /undo, show a
|
|
3469
|
+
* short undo banner. Shell still goes through ShellConfirm
|
|
3470
|
+
* for non-allowlisted commands.
|
|
3471
|
+
* - "yolo" — apply edits immediately AND auto-approve every shell
|
|
3472
|
+
* command. No prompts at all. Use when you trust the
|
|
3473
|
+
* current direction and just want to iterate fast; /undo
|
|
3474
|
+
* still rolls back individual edit batches.
|
|
3475
|
+
*
|
|
3476
|
+
* Persisted so `/mode <x>` survives a relaunch. Missing → "review".
|
|
3477
|
+
*
|
|
3478
|
+
* Codex-equivalence note: review ≈ untrusted, auto ≈ on-request,
|
|
3479
|
+
* yolo ≈ never.
|
|
3460
3480
|
*/
|
|
3461
|
-
type EditMode = "review" | "auto";
|
|
3481
|
+
type EditMode = "review" | "auto" | "yolo";
|
|
3462
3482
|
/**
|
|
3463
3483
|
* reasoning_effort cap for the model. "max" is the agent-class default;
|
|
3464
3484
|
* "high" is cheaper / faster. Persisted so `/effort high` survives a
|
package/dist/index.js
CHANGED
|
@@ -3553,6 +3553,83 @@ ${NEGATIVE_CLAIM_RULE}
|
|
|
3553
3553
|
${TUI_FORMATTING_RULES}
|
|
3554
3554
|
|
|
3555
3555
|
The 'task' the parent gave you describes WHAT to review (a branch, a file set, or "the pending changes"). Stay on it; don't redesign the feature.`;
|
|
3556
|
+
var BUILTIN_SECURITY_REVIEW_BODY = `You are running as a security-review subagent. Your job is to inspect the changes the user is about to ship \u2014 usually the current git branch vs its upstream \u2014 through a security lens specifically, and report exploitable issues.
|
|
3557
|
+
|
|
3558
|
+
How to operate:
|
|
3559
|
+
- Default scope: the current branch's diff vs the default branch. If the user names a different range or a directory, honor that.
|
|
3560
|
+
- Discover scope first: \`git status\`, \`git diff --stat\`, \`git diff <base>...HEAD\`. Read touched files (\`read_file\`) when the diff alone doesn't carry security context \u2014 auth checks, input validation, the actual handler that calls into the changed function.
|
|
3561
|
+
- Use \`search_content\` to verify "is this user-controlled input ever sanitized later?" / "are there other call sites that depend on this validation?" before asserting impact.
|
|
3562
|
+
- Stay read-only. Never write, never run destructive commands, never propose SEARCH/REPLACE blocks. The parent decides what to act on.
|
|
3563
|
+
- Cap yourself at ~12 tool calls. If the diff is too big, focus on the riskiest 2-3 files and say so explicitly.
|
|
3564
|
+
|
|
3565
|
+
Threat model \u2014 flag with severity:
|
|
3566
|
+
|
|
3567
|
+
**CRITICAL** (do-not-ship):
|
|
3568
|
+
- SQL / NoSQL / shell / template injection \u2014 user input concatenated into a query, command, or template without parameterization.
|
|
3569
|
+
- Path traversal \u2014 user-controlled filenames touching the filesystem without canonicalization + sandbox check.
|
|
3570
|
+
- Authentication / authorization missing \u2014 endpoints / actions that should require a session check but don't.
|
|
3571
|
+
- Hardcoded secrets \u2014 API keys, passwords, signing tokens visible in the diff.
|
|
3572
|
+
- Deserialization of untrusted input \u2014 \`pickle.loads\`, \`yaml.load\` (non-safe), \`eval\`, \`Function()\`, \`unserialize()\`.
|
|
3573
|
+
- Cryptographic mistakes \u2014 homemade crypto, weak hashes (MD5/SHA-1) for passwords, missing IVs, ECB mode, predictable nonces.
|
|
3574
|
+
|
|
3575
|
+
**HIGH**:
|
|
3576
|
+
- XSS \u2014 user input rendered into HTML without escaping (or wrong escaping context).
|
|
3577
|
+
- SSRF \u2014 fetching URLs from user input without an allowlist.
|
|
3578
|
+
- Race conditions in security-relevant code \u2014 TOCTOU on auth/file checks.
|
|
3579
|
+
- Open redirects \u2014 user-controlled URL passed to a redirect helper.
|
|
3580
|
+
- Insufficient logging on security events (login failure, permission denial) \u2014 only flag if the codebase clearly DOES log elsewhere.
|
|
3581
|
+
|
|
3582
|
+
**MEDIUM**:
|
|
3583
|
+
- Verbose error messages leaking internal paths / stack traces / SQL.
|
|
3584
|
+
- Missing rate limiting on a credential / token endpoint.
|
|
3585
|
+
- Cross-origin / cookie-flag issues (missing \`Secure\` / \`HttpOnly\` / \`SameSite\`).
|
|
3586
|
+
|
|
3587
|
+
Things to NOT pile on (out of scope here \u2014 the regular /review covers them):
|
|
3588
|
+
- Style, formatting, naming.
|
|
3589
|
+
- Performance, refactor opportunities, test coverage gaps that aren't security-relevant.
|
|
3590
|
+
- "Should be a constant" / "extract this helper" \u2014 irrelevant to ship-blocking.
|
|
3591
|
+
|
|
3592
|
+
Your final answer:
|
|
3593
|
+
- Lead with a one-sentence verdict: "no security issues found", "minor concerns", or "blocking issues".
|
|
3594
|
+
- Then a list grouped by severity. Each item: file:line + 1-sentence threat + 1-sentence fix direction (no full SEARCH/REPLACE \u2014 the user / parent agent will write that).
|
|
3595
|
+
- If clean, say so plainly. Don't manufacture findings.
|
|
3596
|
+
|
|
3597
|
+
${NEGATIVE_CLAIM_RULE}
|
|
3598
|
+
|
|
3599
|
+
${TUI_FORMATTING_RULES}
|
|
3600
|
+
|
|
3601
|
+
The 'task' the parent gave you names what to review. Stay on it; don't redesign the feature.`;
|
|
3602
|
+
var BUILTIN_TEST_BODY = `You are running as the parent agent \u2014 this skill is INLINED, not a subagent. The user invoked /test (or asked you to "run the tests and fix failures"). Your job: run the project's test suite, diagnose any failure, propose fixes as SEARCH/REPLACE edit blocks, then re-run. Repeat until green or you hit a wall you should escalate.
|
|
3603
|
+
|
|
3604
|
+
How to operate:
|
|
3605
|
+
|
|
3606
|
+
1. **Detect the test command**.
|
|
3607
|
+
- Look for \`package.json\` \u2192 \`scripts.test\` first (most common: \`npm test\`, \`pnpm test\`, \`yarn test\`).
|
|
3608
|
+
- If no package.json or no test script: try \`pytest\`, \`go test ./...\`, \`cargo test\` based on what files exist (pyproject.toml/requirements.txt \u2192 pytest; go.mod \u2192 go test; Cargo.toml \u2192 cargo test).
|
|
3609
|
+
- If you can't tell, ASK the user for the command \u2014 don't guess. One question, one tool call to confirm.
|
|
3610
|
+
|
|
3611
|
+
2. **Run it via run_command** (typical timeout 120s, bigger if the suite is large). Capture stdout + stderr.
|
|
3612
|
+
|
|
3613
|
+
3. **Read the failures**. Pull out: which test names failed, the actual error/traceback, the file + line that threw. Don't just paraphrase \u2014 locate the exact assertion or stack frame.
|
|
3614
|
+
|
|
3615
|
+
4. **Propose fixes**. For each distinct failure:
|
|
3616
|
+
- If the failure is in PRODUCTION code (test catches a real bug) \u2192 propose a SEARCH/REPLACE that fixes the production code.
|
|
3617
|
+
- If the failure is in TEST code (test is wrong, codebase is right) \u2192 propose a SEARCH/REPLACE that updates the test, AND say so explicitly: "This is a test bug, not a production bug \u2014 updating the assertion."
|
|
3618
|
+
- If the failure is environmental (missing dep, wrong node version, missing fixture file) \u2192 say so and stop. Don't try to install packages or change config without checking with the user.
|
|
3619
|
+
|
|
3620
|
+
5. **Apply + re-run**. After the user accepts the edit blocks, run the test command again. Iterate.
|
|
3621
|
+
|
|
3622
|
+
6. **Stop conditions**:
|
|
3623
|
+
- All tests pass \u2192 report green, summarize what changed.
|
|
3624
|
+
- Same test still failing after 2 fix attempts on the same line \u2192 STOP. Tell the user "I've tried twice, it's still failing \u2014 here's what I think is happening, want me to try a different angle?". Don't loop indefinitely.
|
|
3625
|
+
- 3+ unrelated failures \u2192 fix one at a time, smallest first, so each pass narrows the surface.
|
|
3626
|
+
|
|
3627
|
+
Don't:
|
|
3628
|
+
- Run \`npm install\` / \`pip install\` / \`cargo update\` without asking \u2014 those mutate lockfiles and have global effects.
|
|
3629
|
+
- Disable, skip, or delete failing tests to "make it green". If a test seems wrong, update its assertion with a one-sentence explanation, but never add \`.skip\` / \`it.skip\` / \`@pytest.mark.skip\`.
|
|
3630
|
+
- Modify the test runner config (vitest.config, jest.config, etc.) to silence failures.
|
|
3631
|
+
|
|
3632
|
+
Lead each turn with a one-line status: "\u25B8 running \`npm test\` ..." \u2192 "\u25B8 2 failures in tests/foo.test.ts \u2014 first is \u2026" \u2192 so the user always knows where you are without scrolling tool output.`;
|
|
3556
3633
|
var BUILTIN_SKILLS = Object.freeze([
|
|
3557
3634
|
Object.freeze({
|
|
3558
3635
|
name: "explore",
|
|
@@ -3577,6 +3654,22 @@ var BUILTIN_SKILLS = Object.freeze([
|
|
|
3577
3654
|
scope: "builtin",
|
|
3578
3655
|
path: "(builtin)",
|
|
3579
3656
|
runAs: "subagent"
|
|
3657
|
+
}),
|
|
3658
|
+
Object.freeze({
|
|
3659
|
+
name: "security-review",
|
|
3660
|
+
description: "Security-focused review of the current branch diff in an isolated subagent \u2014 flags injection/authz/secrets/deserialization/path-traversal/crypto issues, severity-tagged. Read-only. Use when shipping changes that touch auth, input parsing, file IO, or external requests.",
|
|
3661
|
+
body: BUILTIN_SECURITY_REVIEW_BODY,
|
|
3662
|
+
scope: "builtin",
|
|
3663
|
+
path: "(builtin)",
|
|
3664
|
+
runAs: "subagent"
|
|
3665
|
+
}),
|
|
3666
|
+
Object.freeze({
|
|
3667
|
+
name: "test",
|
|
3668
|
+
description: "Run the project's test suite, diagnose failures, propose SEARCH/REPLACE fixes, re-run until green (or stop after 2 fix attempts on the same failure). Inlined \u2014 runs in the parent loop so you see the edit blocks and can /apply them. Detects npm/pnpm/yarn/pytest/go/cargo.",
|
|
3669
|
+
body: BUILTIN_TEST_BODY,
|
|
3670
|
+
scope: "builtin",
|
|
3671
|
+
path: "(builtin)",
|
|
3672
|
+
runAs: "inline"
|
|
3580
3673
|
})
|
|
3581
3674
|
]);
|
|
3582
3675
|
|
|
@@ -3816,6 +3909,40 @@ var MemoryStore = class {
|
|
|
3816
3909
|
`, "utf8");
|
|
3817
3910
|
}
|
|
3818
3911
|
};
|
|
3912
|
+
function readGlobalReasonixMemory(homeDir = join7(homedir4(), ".reasonix")) {
|
|
3913
|
+
const path = join7(homeDir, "REASONIX.md");
|
|
3914
|
+
if (!existsSync7(path)) return null;
|
|
3915
|
+
let raw;
|
|
3916
|
+
try {
|
|
3917
|
+
raw = readFileSync7(path, "utf8");
|
|
3918
|
+
} catch {
|
|
3919
|
+
return null;
|
|
3920
|
+
}
|
|
3921
|
+
const trimmed = raw.trim();
|
|
3922
|
+
if (!trimmed) return null;
|
|
3923
|
+
const originalChars = trimmed.length;
|
|
3924
|
+
const truncated = originalChars > 8e3;
|
|
3925
|
+
const content = truncated ? `${trimmed.slice(0, 8e3)}
|
|
3926
|
+
\u2026 (truncated ${originalChars - 8e3} chars)` : trimmed;
|
|
3927
|
+
return { path, content, originalChars, truncated };
|
|
3928
|
+
}
|
|
3929
|
+
function applyGlobalReasonixMemory(basePrompt, homeDir) {
|
|
3930
|
+
if (!memoryEnabled()) return basePrompt;
|
|
3931
|
+
const dir = homeDir ?? join7(homedir4(), ".reasonix");
|
|
3932
|
+
const mem = readGlobalReasonixMemory(dir);
|
|
3933
|
+
if (!mem) return basePrompt;
|
|
3934
|
+
return [
|
|
3935
|
+
basePrompt,
|
|
3936
|
+
"",
|
|
3937
|
+
"# Global memory (~/.reasonix/REASONIX.md)",
|
|
3938
|
+
"",
|
|
3939
|
+
"Cross-project notes the user pinned via the `#g` prompt prefix. Treat as authoritative \u2014 same level of trust as project memory.",
|
|
3940
|
+
"",
|
|
3941
|
+
"```",
|
|
3942
|
+
mem.content,
|
|
3943
|
+
"```"
|
|
3944
|
+
].join("\n");
|
|
3945
|
+
}
|
|
3819
3946
|
function applyUserMemory(basePrompt, opts = {}) {
|
|
3820
3947
|
if (!memoryEnabled()) return basePrompt;
|
|
3821
3948
|
const store = new MemoryStore(opts);
|
|
@@ -3851,7 +3978,8 @@ function applyUserMemory(basePrompt, opts = {}) {
|
|
|
3851
3978
|
}
|
|
3852
3979
|
function applyMemoryStack(basePrompt, rootDir) {
|
|
3853
3980
|
const withProject = applyProjectMemory(basePrompt, rootDir);
|
|
3854
|
-
const
|
|
3981
|
+
const withGlobal = applyGlobalReasonixMemory(withProject);
|
|
3982
|
+
const withMemory = applyUserMemory(withGlobal, { projectRoot: rootDir });
|
|
3855
3983
|
return applySkillsIndex(withMemory, { projectRoot: rootDir });
|
|
3856
3984
|
}
|
|
3857
3985
|
|
|
@@ -5774,7 +5902,7 @@ function registerShellTools(registry, opts) {
|
|
|
5774
5902
|
const snapshot2 = opts.extraAllowed ?? [];
|
|
5775
5903
|
return () => snapshot2;
|
|
5776
5904
|
})();
|
|
5777
|
-
const
|
|
5905
|
+
const isAllowAll = typeof opts.allowAll === "function" ? opts.allowAll : () => opts.allowAll === true;
|
|
5778
5906
|
registry.register({
|
|
5779
5907
|
name: "run_command",
|
|
5780
5908
|
description: "Run a shell command in the project root and return its combined stdout+stderr.\n\nConstraints (read these before the first call):\n\u2022 ONE process per call, NO shell expansion. `&&`, `||`, `|`, `;`, `>`, `<`, `2>&1` are all rejected up-front \u2014 split into separate calls and combine results in reasoning. Example: instead of `grep foo *.ts | wc -l`, use `grep -c foo *.ts`; instead of `cd sub && npm test`, use `npm test --prefix sub` (or whatever --cwd flag the binary accepts).\n\u2022 `cd` DOES NOT PERSIST between calls \u2014 each call spawns a fresh process rooted at the project. If a tool needs a subdirectory, pass it via the tool's own flag (`npm --prefix`, `cargo -C`, `git -C`, `pytest tests/\u2026`), NOT via a preceding `cd`.\n\u2022 Avoid commands with unbounded output (`netstat -ano`, `find /`, etc.) \u2014 they waste tokens. Filter at source: `netstat -ano -p TCP`, `find src -name '*.ts'`, `grep -c`, `wc -l`.\n\nCommon read-only inspection and test/lint/typecheck commands run immediately; anything that could mutate state, install dependencies, or touch the network is refused until the user confirms it in the TUI. Prefer this over asking the user to run a command manually \u2014 after edits, run the project's tests to verify.",
|
|
@@ -5783,7 +5911,7 @@ function registerShellTools(registry, opts) {
|
|
|
5783
5911
|
// during planning. Anything that would otherwise trigger a
|
|
5784
5912
|
// confirmation prompt is treated as "not read-only" and bounced.
|
|
5785
5913
|
readOnlyCheck: (args) => {
|
|
5786
|
-
if (
|
|
5914
|
+
if (isAllowAll()) return true;
|
|
5787
5915
|
const cmd = typeof args?.command === "string" ? args.command.trim() : "";
|
|
5788
5916
|
if (!cmd) return false;
|
|
5789
5917
|
return isAllowed(cmd, getExtraAllowed());
|
|
@@ -5805,7 +5933,7 @@ function registerShellTools(registry, opts) {
|
|
|
5805
5933
|
fn: async (args, ctx) => {
|
|
5806
5934
|
const cmd = args.command.trim();
|
|
5807
5935
|
if (!cmd) throw new Error("run_command: empty command");
|
|
5808
|
-
if (!
|
|
5936
|
+
if (!isAllowAll() && !isAllowed(cmd, getExtraAllowed())) {
|
|
5809
5937
|
throw new NeedsConfirmationError(cmd);
|
|
5810
5938
|
}
|
|
5811
5939
|
const effectiveTimeout = Math.max(1, Math.min(600, args.timeoutSec ?? timeoutSec));
|
|
@@ -5838,7 +5966,7 @@ function registerShellTools(registry, opts) {
|
|
|
5838
5966
|
fn: async (args, ctx) => {
|
|
5839
5967
|
const cmd = args.command.trim();
|
|
5840
5968
|
if (!cmd) throw new Error("run_background: empty command");
|
|
5841
|
-
if (!
|
|
5969
|
+
if (!isAllowAll() && !isAllowed(cmd, getExtraAllowed())) {
|
|
5842
5970
|
throw new NeedsConfirmationError(cmd);
|
|
5843
5971
|
}
|
|
5844
5972
|
const result = await jobs.start(cmd, {
|