@scira/cli 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/README.md +54 -10
  2. package/dist/agent/background-tasks.js +173 -0
  3. package/dist/agent/research-agent.js +95 -38
  4. package/dist/agent/todos.js +140 -0
  5. package/dist/agent/tools.js +146 -143
  6. package/dist/agent/tools.test.js +33 -0
  7. package/dist/agent/workspace.js +85 -0
  8. package/dist/cli/commands/init.js +51 -39
  9. package/dist/cli/index.js +30 -14
  10. package/dist/config/env-guide.js +151 -0
  11. package/dist/config/env-guide.test.js +18 -0
  12. package/dist/config/env-store.js +53 -0
  13. package/dist/config/env-store.test.js +60 -0
  14. package/dist/tools/agent-tools.js +621 -0
  15. package/dist/tools/background-tasks.js +261 -0
  16. package/dist/tools/bash-policy.test.js +38 -0
  17. package/dist/tools/file-tools.js +6 -1
  18. package/dist/tools/search-web.js +24 -6
  19. package/dist/tools/search-web.test.js +24 -0
  20. package/dist/tools/todos.js +140 -0
  21. package/dist/tools/workspace.js +91 -0
  22. package/dist/tools/workspace.test.js +75 -0
  23. package/dist/tools/x-search.js +142 -0
  24. package/dist/ui/ink/SciraApp.js +11 -8
  25. package/dist/ui/ink/components/overlays.js +4 -4
  26. package/dist/ui/ink/constants.js +11 -3
  27. package/dist/ui/ink/hooks/use-agent-turn.js +24 -5
  28. package/dist/ui/ink/hooks/use-keyboard.js +3 -0
  29. package/dist/ui/ink/hooks/use-session.js +5 -3
  30. package/dist/ui/ink/hooks/use-settings.js +10 -8
  31. package/dist/ui/ink/hooks/use-submit.js +13 -2
  32. package/dist/ui/ink/hooks/use-theme.js +1 -1
  33. package/dist/ui/ink/lib/tool-result.js +72 -5
  34. package/dist/ui/ink/lib/utils.js +40 -3
  35. package/dist/ui/ink/theme-context.js +29 -26
  36. package/dist/ui/ink/theme.js +36 -9
  37. package/dist/ui/ink/theme.test.js +32 -5
  38. package/package.json +5 -2
package/README.md CHANGED
@@ -2,6 +2,8 @@
2
2
 
3
3
  Terminal-native AI research and coding agent. Ask a question, get a grounded report with cited sources and verified claims — all stored locally and inspectable.
4
4
 
5
+ **Documentation:** [docs site](./docs) (local: `cd docs && bun run dev`) · MDX sources in `docs/content/docs/`
6
+
5
7
  ## Install
6
8
 
7
9
  ```bash
@@ -14,12 +16,13 @@ Requires **Node.js ≥ 20**. Run the interactive setup:
14
16
  scira init
15
17
  ```
16
18
 
17
- This walks you through API keys and configuration. Keys go in `~/.scira/.env` so they work from any directory.
19
+ This walks you through API keys and configuration with signup links and step-by-step instructions.
18
20
 
19
21
  Check your setup:
20
22
 
21
23
  ```bash
22
- scira doctor
24
+ scira doctor # verify keys are detected
25
+ scira keys # show where to get any missing keys
23
26
  ```
24
27
 
25
28
  ## Quickstart
@@ -38,20 +41,55 @@ scira new "history of the Silk Road" --tui
38
41
  scira new "history of the Silk Road" --shell
39
42
  ```
40
43
 
41
- ## Setup
44
+ ## API keys
45
+
46
+ Scira needs credentials for an **LLM provider** (model calls) and a **search provider** (web search). Run `scira init` for a guided setup, or copy `.env.example` and fill in keys manually.
42
47
 
43
- Put your API keys in `~/.scira/.env` (loaded automatically from any working directory):
48
+ **Where keys are loaded from** (highest priority first):
49
+
50
+ 1. Shell environment (already exported in your terminal)
51
+ 2. `<project>/.scira/.env` when you run Scira from that project
52
+ 3. `~/.scira/.env` for global defaults
44
53
 
45
54
  ```bash
55
+ # Option A: interactive wizard (saves to ~/.scira/.env)
56
+ scira init
57
+
58
+ # Option B: manual — global keys
46
59
  mkdir -p ~/.scira && cp .env.example ~/.scira/.env
47
- # then edit ~/.scira/.env
60
+
61
+ # Option B: manual — project keys only
62
+ mkdir -p .scira && cp .env.example .scira/.env
63
+
64
+ scira doctor # confirm keys are detected
65
+ scira keys # signup links + steps for anything still missing
48
66
  ```
49
67
 
68
+ ### LLM providers (set one in config via `scira init` or `/llm`)
69
+
70
+ | Key | Provider | Where to get it |
71
+ |---|---|---|
72
+ | `AI_GATEWAY_API_KEY` | Vercel AI Gateway (default) | [vercel.com/docs/ai-gateway](https://vercel.com/docs/ai-gateway) → dashboard → AI Gateway → API Keys |
73
+ | `XAI_API_KEY` | xAI (Grok) | [console.x.ai](https://console.x.ai/) → API Keys |
74
+ | `CLOUDFLARE_ACCOUNT_ID` + `CLOUDFLARE_API_TOKEN` | Cloudflare Workers AI | [dash.cloudflare.com](https://dash.cloudflare.com/) (account ID) + [API Tokens](https://dash.cloudflare.com/profile/api-tokens) with Workers AI permission |
75
+ | `HF_API_KEY` | Hugging Face Inference | [huggingface.co/settings/tokens](https://huggingface.co/settings/tokens) |
76
+
77
+ ### Search providers (set one via `scira init` or `/provider`)
78
+
79
+ | Key | Provider | Where to get it |
80
+ |---|---|---|
81
+ | `EXA_API_KEY` | Exa (default) | [dashboard.exa.ai/api-keys](https://dashboard.exa.ai/api-keys) |
82
+ | `FIRECRAWL_API_KEY` | Firecrawl | [firecrawl.dev/app/api-keys](https://www.firecrawl.dev/app/api-keys) |
83
+ | `PARALLEL_API_KEY` | Parallel | [platform.parallel.ai](https://platform.parallel.ai/) |
84
+
85
+ `FIRECRAWL_API_KEY` is also used as an automatic fallback when Exa or Parallel search fails, so it is worth setting even if Firecrawl is not your primary search provider.
86
+
50
87
  ## Commands
51
88
 
52
89
  | Command | Description |
53
90
  |---|---|
54
91
  | `scira init` | Interactive setup for API keys and configuration |
92
+ | `scira keys` | Show where to get and save missing API keys |
55
93
  | `scira [question]` | Open TUI home, or run headlessly if a question is given |
56
94
  | `scira new <question>` | Start a run; add `--tui` or `--shell` to open interactive UI |
57
95
  | `scira resume <run-id>` | Resume a run; add `--tui` or `--shell` to specify UI |
@@ -101,13 +139,19 @@ Config merges `~/.scira/config.json` (global) with `.scira/config.json` (project
101
139
  | `search.provider` | `exa` | `exa`, `firecrawl`, or `parallel` |
102
140
  | `search.maxResults` | `8` | Max results per search query |
103
141
 
104
- ## Environment Variables
142
+ ## Environment variables
143
+
144
+ See [API keys](#api-keys) for signup links. Required keys depend on your `llmProvider` and `search.provider` in config.
105
145
 
106
- | Variable | Required | Purpose |
146
+ | Variable | Required when | Purpose |
107
147
  |---|---|---|
108
- | `AI_GATEWAY_API_KEY` | Yes | Vercel AI Gateway — all model calls |
109
- | `EXA_API_KEY` | With Exa | Web search via Exa |
110
- | `FIRECRAWL_API_KEY` | With Firecrawl | Web scraping via Firecrawl |
148
+ | `AI_GATEWAY_API_KEY` | `llmProvider: gateway` | Vercel AI Gateway model calls |
149
+ | `XAI_API_KEY` | `llmProvider: xai` or xSearch | Grok model calls; also enables the `xSearch` tool for real-time X/Twitter posts |
150
+ | `CLOUDFLARE_ACCOUNT_ID`, `CLOUDFLARE_API_TOKEN` | `llmProvider: workers-ai` | Workers AI model calls |
151
+ | `HF_API_KEY` | `llmProvider: huggingface` | Hugging Face Inference |
152
+ | `EXA_API_KEY` | `search.provider: exa` | Web search via Exa |
153
+ | `FIRECRAWL_API_KEY` | `search.provider: firecrawl` | Web search + scrape via Firecrawl |
154
+ | `PARALLEL_API_KEY` | `search.provider: parallel` | Web search via Parallel |
111
155
 
112
156
  ## Run Directory
113
157
 
@@ -0,0 +1,173 @@
1
+ import { spawn } from "node:child_process";
2
+ import { readFile, writeFile, mkdir } from "node:fs/promises";
3
+ import { dirname, join } from "node:path";
4
+ const MAX_OUTPUT_LINES = 500;
5
+ const MAX_TAIL_CHARS = 4000;
6
+ function nextTaskId(existing) {
7
+ const nums = existing
8
+ .map((t) => /^task_(\d+)$/u.exec(t.id)?.[1])
9
+ .filter((n) => Boolean(n))
10
+ .map((n) => Number.parseInt(n, 10));
11
+ const next = nums.length > 0 ? Math.max(...nums) + 1 : 1;
12
+ return `task_${String(next).padStart(3, "0")}`;
13
+ }
14
+ function tailText(lines, maxChars = MAX_TAIL_CHARS) {
15
+ const joined = lines.join("\n");
16
+ if (joined.length <= maxChars)
17
+ return joined;
18
+ return `…[truncated]\n${joined.slice(-maxChars)}`;
19
+ }
20
+ export class BackgroundTaskManager {
21
+ persistPath;
22
+ defaultCwd;
23
+ runtime = new Map();
24
+ records = [];
25
+ loaded = false;
26
+ constructor(persistPath, defaultCwd) {
27
+ this.persistPath = persistPath;
28
+ this.defaultCwd = defaultCwd;
29
+ }
30
+ async ensureLoaded() {
31
+ if (this.loaded)
32
+ return;
33
+ this.loaded = true;
34
+ try {
35
+ const raw = await readFile(this.persistPath, "utf8");
36
+ const parsed = JSON.parse(raw);
37
+ if (Array.isArray(parsed)) {
38
+ this.records = parsed.filter((t) => typeof t === "object" && t !== null && typeof t.id === "string");
39
+ }
40
+ }
41
+ catch {
42
+ this.records = [];
43
+ }
44
+ }
45
+ async persist() {
46
+ await mkdir(dirname(this.persistPath), { recursive: true });
47
+ await writeFile(this.persistPath, JSON.stringify(this.records, null, 2) + "\n");
48
+ }
49
+ syncRecord(task) {
50
+ const idx = this.records.findIndex((r) => r.id === task.record.id);
51
+ task.record.outputTail = tailText(task.output);
52
+ if (idx === -1)
53
+ this.records.push({ ...task.record });
54
+ else
55
+ this.records[idx] = { ...task.record };
56
+ }
57
+ async spawn(command, cwd) {
58
+ await this.ensureLoaded();
59
+ const id = nextTaskId(this.records);
60
+ const workDir = cwd ?? this.defaultCwd;
61
+ const proc = spawn(command, {
62
+ cwd: workDir,
63
+ shell: "/bin/bash",
64
+ env: process.env,
65
+ detached: false,
66
+ stdio: ["ignore", "pipe", "pipe"]
67
+ });
68
+ const record = {
69
+ id,
70
+ command,
71
+ cwd: workDir,
72
+ pid: proc.pid ?? 0,
73
+ startedAt: new Date().toISOString(),
74
+ status: "running",
75
+ exitCode: null,
76
+ outputTail: ""
77
+ };
78
+ const output = [];
79
+ const append = (chunk) => {
80
+ const text = chunk.toString();
81
+ for (const line of text.split("\n")) {
82
+ if (line.length > 0)
83
+ output.push(line);
84
+ }
85
+ while (output.length > MAX_OUTPUT_LINES)
86
+ output.shift();
87
+ const rt = this.runtime.get(id);
88
+ if (rt) {
89
+ rt.output = output;
90
+ rt.record.outputTail = tailText(output);
91
+ }
92
+ };
93
+ proc.stdout?.on("data", append);
94
+ proc.stderr?.on("data", append);
95
+ const runtime = { record, proc, output };
96
+ this.runtime.set(id, runtime);
97
+ this.records.push({ ...record });
98
+ await this.persist();
99
+ proc.on("close", (code) => {
100
+ record.status = "exited";
101
+ record.exitCode = code;
102
+ record.outputTail = tailText(output);
103
+ this.syncRecord(runtime);
104
+ void this.persist();
105
+ this.runtime.delete(id);
106
+ });
107
+ proc.on("error", (err) => {
108
+ output.push(`[spawn error] ${err.message}`);
109
+ record.status = "exited";
110
+ record.exitCode = 1;
111
+ record.outputTail = tailText(output);
112
+ this.syncRecord(runtime);
113
+ void this.persist();
114
+ this.runtime.delete(id);
115
+ });
116
+ return { ...record };
117
+ }
118
+ async list() {
119
+ await this.ensureLoaded();
120
+ for (const rt of this.runtime.values()) {
121
+ rt.record.outputTail = tailText(rt.output);
122
+ this.syncRecord(rt);
123
+ }
124
+ return this.records.map((r) => {
125
+ const live = this.runtime.get(r.id);
126
+ return live ? { ...live.record } : { ...r };
127
+ });
128
+ }
129
+ async getOutput(taskId, tailLines = 50) {
130
+ await this.ensureLoaded();
131
+ const live = this.runtime.get(taskId);
132
+ if (live) {
133
+ const lines = live.output.slice(-tailLines);
134
+ return lines.length > 0 ? lines.join("\n") : "(no output yet)";
135
+ }
136
+ const rec = this.records.find((r) => r.id === taskId);
137
+ if (!rec)
138
+ return `Task "${taskId}" not found.`;
139
+ const lines = rec.outputTail.split("\n").slice(-tailLines);
140
+ return lines.length > 0 ? lines.join("\n") : "(no output)";
141
+ }
142
+ async kill(taskId) {
143
+ await this.ensureLoaded();
144
+ const live = this.runtime.get(taskId);
145
+ if (live) {
146
+ live.proc.kill("SIGTERM");
147
+ live.record.status = "killed";
148
+ live.record.exitCode = live.record.exitCode ?? 143;
149
+ this.syncRecord(live);
150
+ await this.persist();
151
+ return `Killed ${taskId} (pid ${live.record.pid}).`;
152
+ }
153
+ const rec = this.records.find((r) => r.id === taskId);
154
+ if (!rec)
155
+ return `Task "${taskId}" not found.`;
156
+ if (rec.status !== "running")
157
+ return `${taskId} is already ${rec.status}.`;
158
+ rec.status = "killed";
159
+ await this.persist();
160
+ return `Marked ${taskId} as killed (process not tracked in this session).`;
161
+ }
162
+ async formatContextForAgent() {
163
+ const tasks = await this.list();
164
+ const active = tasks.filter((t) => t.status === "running");
165
+ if (active.length === 0)
166
+ return "";
167
+ const lines = active.map((t) => ` - ${t.id}: [running pid ${t.pid}] ${t.command} (cwd: ${t.cwd})`);
168
+ return `\nActive background tasks:\n${lines.join("\n")}\nUse bash with action "output" and taskId to read logs, or action "kill" to stop a task.\n`;
169
+ }
170
+ }
171
+ export function createBackgroundTaskManager(runPath, workspacePath) {
172
+ return new BackgroundTaskManager(join(runPath, "background-tasks.json"), workspacePath);
173
+ }
@@ -3,10 +3,31 @@ import { stdin, stdout } from "node:process";
3
3
  import { ToolLoopAgent, isLoopFinished } from "ai";
4
4
  import { Spinner } from "picospinner";
5
5
  import { getLanguageModel, requireLlmKeys } from "../providers/llm/registry.js";
6
- import { createResearchTools, createOneShotTools, createCodingTools } from "./tools.js";
6
+ import { createResearchTools, createOneShotTools, createCodingTools, wrapToolsForPlanMode } from "../tools/agent-tools.js";
7
7
  import { SKILL_CATALOG } from "./skills.js";
8
8
  import { createMcpBridge } from "../tools/mcp-bridge.js";
9
- function instructions(goal, config, workspacePath) {
9
+ import { createBackgroundTaskManager } from "../tools/background-tasks.js";
10
+ function resolvePlanMode(options) {
11
+ return options.getPlanMode ? options.getPlanMode() : (options.planMode ?? false);
12
+ }
13
+ function planModeBlock(active) {
14
+ if (!active)
15
+ return "";
16
+ return `
17
+
18
+ PLAN MODE (active):
19
+ You are in plan mode. Explore and plan before making changes.
20
+ - Use readFile, grepWorkspace, listWorkspaceDir, webSearch, and readUrl to understand the task
21
+ - Use the todo tool to break work into trackable steps (create, mark in_progress when starting, completed when done)
22
+ - Write or update plan.md with your approach (harness file, bare name)
23
+ - Do NOT use writeFile or editFile except plan.md, and do not use bash action=run/background
24
+ - Do NOT use MCP or browser tools while plan mode is active
25
+ - Read-only bash is OK: ls, cat, git status, git log, git diff, find, grep (workspace-relative paths only)
26
+ - When the plan is ready, summarize it and tell the user to type /plan to exit plan mode and begin execution`;
27
+ }
28
+ function instructions(goal, config, options = {}) {
29
+ const { workspacePath } = options;
30
+ const planMode = resolvePlanMode(options);
10
31
  const now = new Date();
11
32
  const temporalContext = now.toLocaleDateString("en-US", {
12
33
  weekday: "long",
@@ -19,26 +40,29 @@ function instructions(goal, config, workspacePath) {
19
40
  : "Citation policy (balanced): cite source IDs for all major claims; minor background context may be uncited but must not be overstated.";
20
41
  const codingSection = workspacePath ? `
21
42
 
22
- CODING CAPABILITIES:
23
- You also have workspace-aware coding tools to build, modify, and debug code:
24
- - readWorkspaceFile: Read any file in the workspace
25
- - writeWorkspaceFile: Create or overwrite files (requires approval)
26
- - editWorkspaceFile: Make surgical edits by replacing exact strings (requires approval)
27
- - listWorkspaceDir: List files and directories
28
- - grepWorkspace: Search for patterns across the codebase
29
- - runWorkspaceCommand: Execute shell commands like builds, tests, installs (requires approval)
43
+ PROJECT LAYOUT:
44
+ - Project root (codebase): ${workspacePath}
45
+ - Run harness (.scira/runs/…): plan.md, notes.md, report.md, sources.jsonl, claims.jsonl, todos.json
30
46
 
31
- Workspace: ${workspacePath}
47
+ FILE TOOLS:
48
+ - readFile / writeFile / editFile route automatically:
49
+ - Harness files by bare name: plan.md, notes.md, report.md, sources.jsonl → stored under .scira/runs/
50
+ - Everything else (src/…, package.json, …) → project root
51
+ - Never write source code under .scira. Never put harness files at the project root.
32
52
 
33
- When the task involves code:
34
- - Use grepWorkspace and readWorkspaceFile to understand existing code structure
35
- - Use editWorkspaceFile for precise changes, writeWorkspaceFile for new files
36
- - Run tests/builds with runWorkspaceCommand to verify changes
37
- - Research APIs, libraries, or error messages with webSearch + readUrl when needed
38
- - Match existing code style and patterns
53
+ CODING TOOLS:
54
+ - listWorkspaceDir, grepWorkspace: explore the codebase
55
+ - bash: shell in the project root. action=run (default), action=background for dev servers, action=list/output/kill for background tasks
56
+ - runBash: shell in the run harness directory for grepping or listing harness artifacts (notes.md, sources.jsonl, etc.)
57
+ - todo: structured task list (create, edit, mark, remove, rewrite, list)
39
58
 
40
- You can seamlessly combine research and coding - e.g., research how to implement a feature, then implement it, or debug an issue by researching the error and fixing the code.` : "";
41
- return `You are Scira AI CLI, made by Zaid Mukaddam, an autonomous research ${workspacePath ? "and coding " : ""}agent operating inside a single run directory on the user's machine.
59
+ When the task involves code:
60
+ - Use todo to track multi-step work
61
+ - Use grepWorkspace and readFile to understand the codebase
62
+ - Use editFile for precise changes, writeFile for new source files (paths like src/foo.ts)
63
+ - Run tests/builds with bash; use bash action=background for servers then action=output to check logs
64
+ - Match existing code style and patterns` : "";
65
+ return `You are Scira AI CLI, made by Zaid Mukaddam, an autonomous research ${workspacePath ? "and coding " : ""}agent.${workspacePath ? " Source code lives at the project root; harness artifacts live under .scira/runs/." : " You operate inside a single run directory on the user's machine."}
42
66
 
43
67
  Your goal:
44
68
  ${goal}
@@ -53,7 +77,7 @@ You have shell, file, search, skill${config.files ? ", and local files" : ""}${w
53
77
  0. Bootstrap: these built-in research skills are available — pull the relevant ones with readSkill before you begin. This is mandatory — skills contain concrete tactics for search, source quality, claim verification, and report writing.
54
78
  ${SKILL_CATALOG}
55
79
  1. Plan: write a short plan.md outlining your approach (use the research-plan skill as a template).
56
- 2. Gather: use webSearch with 3-5 parallel query variations to find real, citable sources, then readUrl to read the most relevant ones. Record findings in notes.md as you go. Never invent sources or URLs.
80
+ 2. Gather: use webSearch with 3-5 parallel query variations to find real, citable sources, then readUrl to read the most relevant ones. Use xSearch for current reactions, announcements, and real-time opinions on X/Twitter (requires XAI_API_KEY). Record findings in notes.md as you go. Never invent sources or URLs.
57
81
  3. Extract claims: after reading each source, use createClaim to record significant findings. Assign a short ID like claim_001, set confidence, and link source IDs.
58
82
  4. Verify: once all claims are recorded, use verifyClaim to update each claim's status (verified / weak / contradicted / needs_review). Be honest — flag weak or vendor-only evidence.
59
83
  5. Record sources: write all sources you actually used to sources.jsonl (include the snapshotPath reported by readUrl for each one) — STRICT JSONL rules: one compact JSON object per line, no literal newlines inside string values, no trailing commas. Use writeFile to write the entire file at once.
@@ -62,10 +86,10 @@ ${SKILL_CATALOG}
62
86
 
63
87
  Rules:
64
88
  - Prefer primary sources. Cross-check important claims across multiple sources.
65
- - Keep files inside the run directory (paths are relative to it).
89
+ ${workspacePath ? "- Harness files (plan.md, notes.md, report.md, sources.jsonl) go in the run directory. All source code changes go under the project root." : "- Keep files inside the run directory (paths are relative to it)."}
66
90
  - Be terse in your narration between tool calls — say what you're doing and why in one line.
67
91
  - Do not claim something is done before you have actually written report.md.
68
- - Re-read a skill with readSkill any time you are uncertain how to proceed.`;
92
+ - Re-read a skill with readSkill any time you are uncertain how to proceed.${planModeBlock(planMode ?? false)}`;
69
93
  }
70
94
  function devtoolsInstructionsBlock(toolNames) {
71
95
  if (toolNames.length === 0)
@@ -103,21 +127,32 @@ Rules for browser tools:
103
127
  - Browser observations are primary evidence for page state but not independent corroboration; cross-check important factual claims with separate sources.
104
128
  - Never paste secrets or credentials into the browser.`;
105
129
  }
106
- export async function createResearchAgent(runPath, goal, config, onApprovalRequired, workspacePath) {
130
+ export async function createResearchAgent(runPath, goal, config, onApprovalRequired, options = {}) {
107
131
  requireLlmKeys(config);
108
132
  const bridge = await createMcpBridge(config);
109
- const researchTools = createResearchTools(runPath, config, onApprovalRequired);
110
- const codingTools = workspacePath ? createCodingTools(workspacePath, config, onApprovalRequired) : {};
111
- const tools = { ...researchTools, ...codingTools, ...bridge.tools };
133
+ const getPlanMode = options.getPlanMode ?? (() => options.planMode ?? false);
134
+ const researchTools = createResearchTools(runPath, config, onApprovalRequired, options.workspacePath, getPlanMode);
135
+ const codingTools = options.workspacePath
136
+ ? createCodingTools(options.workspacePath, config, onApprovalRequired, options.backgroundTasks, runPath, getPlanMode)
137
+ : {};
138
+ const tools = { ...researchTools, ...codingTools, ...wrapToolsForPlanMode(bridge.tools, getPlanMode) };
139
+ const bgContext = options.backgroundTasks ? await options.backgroundTasks.formatContextForAgent() : "";
112
140
  const agent = new ToolLoopAgent({
113
141
  model: getLanguageModel(config),
114
- instructions: instructions(goal, config, workspacePath) + devtoolsInstructionsBlock(bridge.toolNames),
142
+ instructions: instructions(goal, config, options) + bgContext + devtoolsInstructionsBlock(bridge.toolNames),
115
143
  tools,
116
144
  stopWhen: isLoopFinished()
117
145
  });
118
146
  return { agent, close: bridge.close };
119
147
  }
120
- function oneShotInstructions(goal, hasDevtools) {
148
+ function oneShotInstructions(goal, hasDevtools, options = {}) {
149
+ const { workspacePath } = options;
150
+ const planMode = resolvePlanMode(options);
151
+ const codingHint = workspacePath ? `
152
+
153
+ Project root: ${workspacePath}. readFile/writeFile/editFile route code paths to the project root; harness files (plan.md, notes.md, …) stay under .scira/runs/.
154
+ - listWorkspaceDir, grepWorkspace, bash (with background tasks), todo
155
+ Use them for code questions, debugging, and implementation tasks.` : "";
121
156
  const now = new Date();
122
157
  const temporalContext = now.toLocaleDateString("en-US", {
123
158
  weekday: "long",
@@ -152,18 +187,23 @@ Step 1 — Decide the depth required:
152
187
  - When in doubt, escalate.${browserHint}
153
188
 
154
189
  Step 2 — If you decide to answer directly:
155
- - Default path: use webSearch (2-3 query variations) to find relevant, recent sources, then readUrl to read the best 1-2.
190
+ - Default path: use webSearch (2-3 query variations) to find relevant, recent sources, then readUrl to read the best 1-2. Use xSearch to surface real-time X posts when the question involves public reactions, announcements, or social discussions.
156
191
  - Browser path (only if the routing rules above triggered): use the devtools_* tools to drive a real Chromium session, then summarize what you observed (cite the URL you visited).
157
192
  - Synthesize a clear, direct answer in a few short paragraphs. Cite sources inline as [title](url). Never invent sources or URLs.
158
- - Do NOT write files, create claims, or produce a formal report — just answer in chat.`;
193
+ - Do NOT write files, create claims, or produce a formal report — just answer in chat.${codingHint}${planModeBlock(planMode ?? false)}`;
159
194
  }
160
- export async function createOneShotAgent(runPath, goal, config, onApprovalRequired, onEscalate) {
195
+ export async function createOneShotAgent(runPath, goal, config, onApprovalRequired, onEscalate, options = {}) {
161
196
  requireLlmKeys(config);
162
197
  const bridge = await createMcpBridge(config);
163
- const tools = { ...createOneShotTools(runPath, config, onApprovalRequired, onEscalate), ...bridge.tools };
198
+ const getPlanMode = options.getPlanMode ?? (() => options.planMode ?? false);
199
+ const tools = {
200
+ ...createOneShotTools(runPath, config, onApprovalRequired, onEscalate, options.workspacePath, options.backgroundTasks, getPlanMode),
201
+ ...wrapToolsForPlanMode(bridge.tools, getPlanMode)
202
+ };
203
+ const bgContext = options.backgroundTasks ? await options.backgroundTasks.formatContextForAgent() : "";
164
204
  const agent = new ToolLoopAgent({
165
205
  model: getLanguageModel(config),
166
- instructions: oneShotInstructions(goal, bridge.toolNames.length > 0) + devtoolsInstructionsBlock(bridge.toolNames),
206
+ instructions: oneShotInstructions(goal, bridge.toolNames.length > 0, options) + bgContext + devtoolsInstructionsBlock(bridge.toolNames),
167
207
  tools,
168
208
  stopWhen: isLoopFinished()
169
209
  });
@@ -173,6 +213,15 @@ export async function createOneShotAgent(runPath, goal, config, onApprovalRequir
173
213
  * Run the research agent headlessly, streaming a compact timeline to stdout.
174
214
  */
175
215
  export async function runResearchAgent(runPath, goal, config, workspacePath) {
216
+ const options = {
217
+ ...(workspacePath
218
+ ? {
219
+ workspacePath,
220
+ backgroundTasks: createBackgroundTaskManager(runPath, workspacePath)
221
+ }
222
+ : {}),
223
+ getPlanMode: () => false
224
+ };
176
225
  const spinner = new Spinner();
177
226
  const onApprovalRequired = async (toolName, description) => {
178
227
  spinner.stop();
@@ -188,7 +237,7 @@ export async function runResearchAgent(runPath, goal, config, workspacePath) {
188
237
  spinner.start();
189
238
  return approved;
190
239
  };
191
- const bundle = await createResearchAgent(runPath, goal, config, onApprovalRequired, workspacePath);
240
+ const bundle = await createResearchAgent(runPath, goal, config, onApprovalRequired, options);
192
241
  try {
193
242
  const result = await bundle.agent.stream({ prompt: goal });
194
243
  for await (const part of result.fullStream) {
@@ -230,6 +279,7 @@ const TOOL_ICONS = {
230
279
  createClaim: "◎",
231
280
  verifyClaim: "✓",
232
281
  webSearch: "⌕",
282
+ xSearch: "𝕏",
233
283
  readUrl: "↗",
234
284
  listSkills: "★",
235
285
  readSkill: "★",
@@ -238,17 +288,24 @@ const TOOL_ICONS = {
238
288
  getFile: "▤",
239
289
  fileExists: "▤",
240
290
  moveFile: "✎",
241
- deleteFile: "✗"
291
+ deleteFile: "✗",
292
+ todo: "☐"
242
293
  };
243
294
  function summarize(input) {
244
295
  const obj = (input ?? {});
245
- return String(obj.command ?? obj.query ?? obj.url ?? obj.path ?? obj.key ?? obj.pattern ?? obj.source ?? "").slice(0, 100);
296
+ if (obj.action && obj.action !== "run") {
297
+ return `${obj.action}${obj.taskId ? ` ${obj.taskId}` : ""}`.slice(0, 100);
298
+ }
299
+ if (Array.isArray(obj.queries)) {
300
+ const qs = obj.queries;
301
+ return (qs.slice(0, 2).join(" · ") + (qs.length > 2 ? ` +${qs.length - 2}` : "")).slice(0, 100);
302
+ }
303
+ return String(obj.command ?? obj.query ?? obj.url ?? obj.path ?? obj.key ?? obj.pattern ?? obj.source ?? obj.action ?? "").slice(0, 100);
246
304
  }
247
305
  const CODING_ICONS = {
248
306
  readWorkspaceFile: "▤",
249
307
  writeWorkspaceFile: "✎",
250
308
  editWorkspaceFile: "✎",
251
309
  listWorkspaceDir: "▤",
252
- grepWorkspace: "⌕",
253
- runWorkspaceCommand: "⌘"
310
+ grepWorkspace: "⌕"
254
311
  };
@@ -0,0 +1,140 @@
1
+ import { readFile, writeFile, mkdir } from "node:fs/promises";
2
+ import { dirname, join } from "node:path";
3
+ import { tool } from "ai";
4
+ import { z } from "zod";
5
+ import { logEvent } from "../storage/run-store.js";
6
+ const TodoStatusSchema = z.enum(["pending", "in_progress", "completed", "cancelled"]);
7
+ function nextTodoId(existing) {
8
+ const nums = existing
9
+ .map((t) => /^todo_(\d+)$/u.exec(t.id)?.[1])
10
+ .filter((n) => Boolean(n))
11
+ .map((n) => Number.parseInt(n, 10));
12
+ const next = nums.length > 0 ? Math.max(...nums) + 1 : 1;
13
+ return `todo_${String(next).padStart(3, "0")}`;
14
+ }
15
+ async function loadTodos(path) {
16
+ try {
17
+ const raw = await readFile(path, "utf8");
18
+ const parsed = JSON.parse(raw);
19
+ if (!Array.isArray(parsed))
20
+ return [];
21
+ return parsed.filter((t) => typeof t === "object" && t !== null && typeof t.id === "string");
22
+ }
23
+ catch {
24
+ return [];
25
+ }
26
+ }
27
+ async function saveTodos(path, items) {
28
+ await mkdir(dirname(path), { recursive: true });
29
+ await writeFile(path, JSON.stringify(items, null, 2) + "\n");
30
+ }
31
+ function formatTodoList(items) {
32
+ if (items.length === 0)
33
+ return "No todos.";
34
+ const icon = {
35
+ pending: "[ ]",
36
+ in_progress: "[~]",
37
+ completed: "[x]",
38
+ cancelled: "[-]"
39
+ };
40
+ return items
41
+ .map((t) => `${icon[t.status]} ${t.id}: ${t.content} (${t.status})`)
42
+ .join("\n");
43
+ }
44
+ export function createTodoTool(runPath) {
45
+ const todosPath = join(runPath, "todos.json");
46
+ return tool({
47
+ description: "Manage structured task todos for the current session. " +
48
+ "Actions: create (add items), edit (change content), mark (set status), remove (delete one), rewrite (replace entire list), list (show all). " +
49
+ "Statuses: pending, in_progress, completed, cancelled.",
50
+ inputSchema: z.object({
51
+ action: z.enum(["create", "edit", "mark", "remove", "rewrite", "list"]),
52
+ id: z.string().optional().describe("Todo id for edit, mark, or remove."),
53
+ content: z.string().optional().describe("Todo text for create, edit, or rewrite items."),
54
+ status: TodoStatusSchema.optional().describe("Status for mark action or rewrite items."),
55
+ items: z
56
+ .array(z.object({
57
+ id: z.string().optional(),
58
+ content: z.string(),
59
+ status: TodoStatusSchema.optional()
60
+ }))
61
+ .optional()
62
+ .describe("Items for create or rewrite.")
63
+ }),
64
+ execute: async ({ action, id, content, status, items }) => {
65
+ const now = new Date().toISOString();
66
+ let todos = await loadTodos(todosPath);
67
+ switch (action) {
68
+ case "list":
69
+ return formatTodoList(todos);
70
+ case "create": {
71
+ const toAdd = items ?? (content ? [{ content, status: status ?? "pending" }] : []);
72
+ if (toAdd.length === 0)
73
+ return "create requires content or items.";
74
+ for (const item of toAdd) {
75
+ const todoId = item.id ?? nextTodoId(todos);
76
+ todos.push({
77
+ id: todoId,
78
+ content: item.content,
79
+ status: item.status ?? "pending",
80
+ createdAt: now,
81
+ updatedAt: now
82
+ });
83
+ }
84
+ await saveTodos(todosPath, todos);
85
+ await logEvent(runPath, "todo.created", { count: toAdd.length });
86
+ return `Created ${toAdd.length} todo(s).\n\n${formatTodoList(todos)}`;
87
+ }
88
+ case "edit": {
89
+ if (!id || !content)
90
+ return "edit requires id and content.";
91
+ const idx = todos.findIndex((t) => t.id === id);
92
+ if (idx === -1)
93
+ return `Todo "${id}" not found.`;
94
+ todos[idx] = { ...todos[idx], content, updatedAt: now };
95
+ await saveTodos(todosPath, todos);
96
+ await logEvent(runPath, "todo.edited", { id });
97
+ return `Updated ${id}.\n\n${formatTodoList(todos)}`;
98
+ }
99
+ case "mark": {
100
+ if (!id || !status)
101
+ return "mark requires id and status.";
102
+ const idx = todos.findIndex((t) => t.id === id);
103
+ if (idx === -1)
104
+ return `Todo "${id}" not found.`;
105
+ todos[idx] = { ...todos[idx], status, updatedAt: now };
106
+ await saveTodos(todosPath, todos);
107
+ await logEvent(runPath, "todo.marked", { id, status });
108
+ return `Marked ${id} as ${status}.\n\n${formatTodoList(todos)}`;
109
+ }
110
+ case "remove": {
111
+ if (!id)
112
+ return "remove requires id.";
113
+ const before = todos.length;
114
+ todos = todos.filter((t) => t.id !== id);
115
+ if (todos.length === before)
116
+ return `Todo "${id}" not found.`;
117
+ await saveTodos(todosPath, todos);
118
+ await logEvent(runPath, "todo.removed", { id });
119
+ return `Removed ${id}.\n\n${formatTodoList(todos)}`;
120
+ }
121
+ case "rewrite": {
122
+ if (!items || items.length === 0)
123
+ return "rewrite requires a non-empty items array.";
124
+ todos = items.map((item, i) => ({
125
+ id: item.id ?? `todo_${String(i + 1).padStart(3, "0")}`,
126
+ content: item.content,
127
+ status: item.status ?? "pending",
128
+ createdAt: now,
129
+ updatedAt: now
130
+ }));
131
+ await saveTodos(todosPath, todos);
132
+ await logEvent(runPath, "todo.rewritten", { count: todos.length });
133
+ return `Rewrote todo list (${todos.length} items).\n\n${formatTodoList(todos)}`;
134
+ }
135
+ default:
136
+ return `Unknown action: ${action}`;
137
+ }
138
+ }
139
+ });
140
+ }