@nanhara/hara 0.0.2 → 0.48.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/CHANGELOG.md +582 -0
  2. package/CLA.md +1 -1
  3. package/README.md +207 -10
  4. package/dist/activity.js +30 -0
  5. package/dist/agent/loop.js +184 -0
  6. package/dist/config.js +114 -0
  7. package/dist/context/agents-md.js +64 -0
  8. package/dist/context/mentions.js +90 -0
  9. package/dist/diff.js +103 -0
  10. package/dist/fs-walk.js +103 -0
  11. package/dist/fuzzy.js +62 -0
  12. package/dist/images.js +146 -0
  13. package/dist/index.js +1589 -0
  14. package/dist/mcp/client.js +54 -0
  15. package/dist/md.js +52 -0
  16. package/dist/memory/guard.js +51 -0
  17. package/dist/memory/store.js +93 -0
  18. package/dist/org/planner.js +174 -0
  19. package/dist/org/roles.js +140 -0
  20. package/dist/org/router.js +39 -0
  21. package/dist/plugins/plugins.js +124 -0
  22. package/dist/providers/anthropic.js +83 -0
  23. package/dist/providers/openai.js +125 -0
  24. package/dist/providers/qwen-oauth.js +139 -0
  25. package/dist/providers/types.js +2 -0
  26. package/dist/recall.js +76 -0
  27. package/dist/sandbox.js +78 -0
  28. package/dist/search/embed.js +42 -0
  29. package/dist/search/hybrid.js +38 -0
  30. package/dist/search/semindex.js +192 -0
  31. package/dist/session/store.js +109 -0
  32. package/dist/skills/skills.js +141 -0
  33. package/dist/statusbar.js +69 -0
  34. package/dist/tools/agent.js +26 -0
  35. package/dist/tools/apply-core.js +63 -0
  36. package/dist/tools/builtin.js +106 -0
  37. package/dist/tools/codebase.js +102 -0
  38. package/dist/tools/computer.js +376 -0
  39. package/dist/tools/edit.js +62 -0
  40. package/dist/tools/memory.js +147 -0
  41. package/dist/tools/patch.js +123 -0
  42. package/dist/tools/registry.js +18 -0
  43. package/dist/tools/search.js +176 -0
  44. package/dist/tools/skill.js +30 -0
  45. package/dist/tools/web.js +73 -0
  46. package/dist/tui/App.js +200 -0
  47. package/dist/tui/InputBox.js +208 -0
  48. package/dist/tui/run.js +10 -0
  49. package/dist/tui/theme.js +11 -0
  50. package/dist/ui.js +17 -0
  51. package/dist/undo.js +40 -0
  52. package/dist/vision.js +130 -0
  53. package/package.json +34 -9
  54. package/plugins/browser/.hara-plugin/plugin.json +9 -0
  55. package/plugins/browser/skills/web/SKILL.md +27 -0
  56. package/plugins/chrome/.hara-plugin/plugin.json +9 -0
  57. package/plugins/chrome/skills/chrome/SKILL.md +26 -0
  58. package/LICENSE-MIT +0 -21
  59. package/bin/hara.mjs +0 -25
  60. /package/{LICENSE-APACHE → LICENSE} +0 -0
@@ -0,0 +1,106 @@
1
+ import { readFile, writeFile, mkdir } from "node:fs/promises";
2
+ import { dirname, resolve, isAbsolute } from "node:path";
3
+ import { stdout as procOut } from "node:process";
4
+ import { registerTool } from "./registry.js";
5
+ import { runShell } from "../sandbox.js";
6
+ import { nearestPaths } from "../fs-walk.js";
7
+ import { emitDiff } from "../diff.js";
8
+ import { recordEdit } from "../undo.js";
9
+ const MAX = 100_000;
10
+ function abs(p, cwd) {
11
+ return isAbsolute(p) ? p : resolve(cwd, p);
12
+ }
13
+ function cap(s) {
14
+ return s.length > MAX ? s.slice(0, MAX) + `\n…[truncated ${s.length - MAX} chars]` : s;
15
+ }
16
+ registerTool({
17
+ name: "read_file",
18
+ description: "Read a UTF-8 text file and return its contents.",
19
+ input_schema: {
20
+ type: "object",
21
+ properties: {
22
+ path: { type: "string", description: "File path, relative to cwd or absolute" },
23
+ },
24
+ required: ["path"],
25
+ },
26
+ kind: "read",
27
+ async run(input, ctx) {
28
+ try {
29
+ return cap(await readFile(abs(input.path, ctx.cwd), "utf8"));
30
+ }
31
+ catch (e) {
32
+ const near = nearestPaths(ctx.cwd, input.path);
33
+ return `Error: cannot read ${input.path}: ${e.code ?? e.message}.` + (near.length ? ` Did you mean: ${near.join(", ")}?` : "");
34
+ }
35
+ },
36
+ });
37
+ registerTool({
38
+ name: "write_file",
39
+ description: "Create or overwrite a UTF-8 text file (creates parent directories).",
40
+ input_schema: {
41
+ type: "object",
42
+ properties: {
43
+ path: { type: "string" },
44
+ content: { type: "string" },
45
+ },
46
+ required: ["path", "content"],
47
+ },
48
+ kind: "edit",
49
+ async run(input, ctx) {
50
+ const p = abs(input.path, ctx.cwd);
51
+ let prev = null;
52
+ try {
53
+ prev = await readFile(p, "utf8");
54
+ }
55
+ catch {
56
+ /* new file */
57
+ }
58
+ await mkdir(dirname(p), { recursive: true });
59
+ await writeFile(p, input.content, "utf8");
60
+ emitDiff(input.path, prev ?? "", input.content, ctx.ui);
61
+ recordEdit([{ path: input.path, absPath: p, before: prev }]);
62
+ return `Wrote ${String(input.content).length} chars to ${p}`;
63
+ },
64
+ });
65
+ registerTool({
66
+ name: "bash",
67
+ description: "Run a shell command in the working directory; returns combined stdout/stderr.",
68
+ input_schema: {
69
+ type: "object",
70
+ properties: {
71
+ command: { type: "string" },
72
+ timeout_ms: { type: "number", description: "default 120000" },
73
+ },
74
+ required: ["command"],
75
+ },
76
+ kind: "exec",
77
+ async run(input, ctx) {
78
+ let buf = ""; // TUI: line-buffer live output into the sink (one notice per line)
79
+ const live = ctx.ui
80
+ ? (s) => {
81
+ buf += s;
82
+ let i;
83
+ while ((i = buf.indexOf("\n")) >= 0) {
84
+ ctx.ui.notice(buf.slice(0, i));
85
+ buf = buf.slice(i + 1);
86
+ }
87
+ }
88
+ : procOut.isTTY
89
+ ? (s) => procOut.write(s) // stream output in a plain terminal
90
+ : undefined;
91
+ try {
92
+ const { stdout, stderr } = await runShell(input.command, ctx.cwd, ctx.sandbox ?? "off", {
93
+ timeout: input.timeout_ms ?? 120_000,
94
+ maxBuffer: 10 * 1024 * 1024,
95
+ onData: live,
96
+ });
97
+ if (ctx.ui && buf)
98
+ ctx.ui.notice(buf); // flush trailing partial line
99
+ const combined = (stdout || "") + (stderr ? `\n[stderr]\n${stderr}` : "");
100
+ return cap(combined.trim() || "(no output)");
101
+ }
102
+ catch (e) {
103
+ return cap(`Command failed: ${e.message}\n${e.stdout || ""}${e.stderr || ""}`);
104
+ }
105
+ },
106
+ });
@@ -0,0 +1,102 @@
1
+ // codebase_search — treat the current project as a knowledge base. Lexical relevance search over the
2
+ // repo's code/text (respects .gitignore via listProjectFiles), ranked by how many distinct query words a
3
+ // file contains, returning the densest snippet. Distinct from grep (exact pattern): this finds *related*
4
+ // code from a natural-language query. The interface a semantic (zvec) index slots into later.
5
+ import { readFileSync } from "node:fs";
6
+ import { join } from "node:path";
7
+ import { registerTool } from "./registry.js";
8
+ import { listProjectFiles, isProbablyBinary, fileSize } from "../fs-walk.js";
9
+ import { findProjectRoot } from "../context/agents-md.js";
10
+ import { loadConfig } from "../config.js";
11
+ import { getEmbedder } from "../search/embed.js";
12
+ import { queryIndex, indexExists } from "../search/semindex.js";
13
+ const MAX_FILE = 200_000; // skip very large files
14
+ const CODE_RE = /\.(ts|tsx|js|jsx|mjs|cjs|py|go|rs|java|kt|rb|php|c|h|cc|cpp|hpp|cs|swift|scala|sh|bash|sql|md|mdx|json|ya?ml|toml|html|css|scss|less|vue|svelte|astro|tf|proto|graphql|gql|gradle|txt)$/i;
15
+ registerTool({
16
+ name: "codebase_search",
17
+ description: "Find code in THIS project relevant to a natural-language query — ranked by relevance (not exact match). " +
18
+ "Use it to locate similar/related code while working ('where is auth handled?', 'retry logic'); use grep " +
19
+ "for exact strings/regex. Returns the top files with their most relevant snippet (file:line).",
20
+ input_schema: {
21
+ type: "object",
22
+ properties: { query: { type: "string" }, limit: { type: "number", description: "default 6 (max 20)" } },
23
+ required: ["query"],
24
+ },
25
+ kind: "read",
26
+ async run(input, ctx) {
27
+ const words = [...new Set(String(input.query ?? "").toLowerCase().split(/\s+/).filter((w) => w.length > 1))];
28
+ if (!words.length)
29
+ return "(empty query)";
30
+ const need = Math.min(2, words.length); // require most of the query to actually appear (conceptual overlap)
31
+ const limit = Math.min(Number(input.limit) || 6, 20);
32
+ const root = findProjectRoot(ctx.cwd);
33
+ const hits = [];
34
+ for (const rel of listProjectFiles(root)) {
35
+ if (!CODE_RE.test(rel))
36
+ continue;
37
+ const abs = join(root, rel);
38
+ if (fileSize(abs) > MAX_FILE)
39
+ continue;
40
+ let buf;
41
+ try {
42
+ buf = readFileSync(abs);
43
+ }
44
+ catch {
45
+ continue;
46
+ }
47
+ if (isProbablyBinary(buf))
48
+ continue;
49
+ const text = buf.toString("utf8");
50
+ const lower = text.toLowerCase();
51
+ const present = words.filter((w) => lower.includes(w));
52
+ if (present.length < need)
53
+ continue;
54
+ // densest line = the one matching the most distinct query words; show it with a little context
55
+ const lines = text.split("\n");
56
+ let bestLine = 0;
57
+ let bestHits = 0;
58
+ for (let i = 0; i < lines.length; i++) {
59
+ const ll = lines[i].toLowerCase();
60
+ const h = present.reduce((n, w) => (ll.includes(w) ? n + 1 : n), 0);
61
+ if (h > bestHits) {
62
+ bestHits = h;
63
+ bestLine = i;
64
+ }
65
+ }
66
+ const snippet = lines.slice(Math.max(0, bestLine - 2), bestLine + 4).join("\n");
67
+ hits.push({ file: rel, score: present.length * 100 + bestHits, line: bestLine + 1, snippet });
68
+ }
69
+ hits.sort((a, b) => b.score - a.score || a.file.length - b.file.length);
70
+ // Semantic layer (opt-in): if a repo index + embedder are configured, prepend the most relevant
71
+ // chunks (more precise than word overlap), then fill remaining slots with lexical hits. Falls back
72
+ // to pure lexical when no index/embedder — zero behaviour change for the default install.
73
+ const out = [];
74
+ const seen = new Set();
75
+ const cfg = loadConfig();
76
+ const embed = getEmbedder(cfg);
77
+ if (embed && indexExists("repo", ctx.cwd)) {
78
+ try {
79
+ for (const s of await queryIndex("repo", String(input.query), embed, ctx.cwd, limit)) {
80
+ if (s.score < 0.2 || seen.has(s.file))
81
+ continue;
82
+ seen.add(s.file);
83
+ out.push(`${s.file} (semantic ${s.score.toFixed(2)})\n${s.text.split("\n").slice(0, 6).join("\n")}`);
84
+ }
85
+ }
86
+ catch {
87
+ /* embedding endpoint down → degrade to lexical */
88
+ }
89
+ }
90
+ for (const h of hits) {
91
+ if (out.length >= limit)
92
+ break;
93
+ if (seen.has(h.file))
94
+ continue;
95
+ seen.add(h.file);
96
+ out.push(`${h.file}:${h.line}\n${h.snippet}`);
97
+ }
98
+ if (!out.length)
99
+ return "(no relevant code found)";
100
+ return out.join("\n\n---\n\n");
101
+ },
102
+ });
@@ -0,0 +1,376 @@
1
+ // computer — native screen control (operate desktop software, not just the browser). Shell-out per OS, no
2
+ // heavy deps: mac = screencapture + cliclick · windows = PowerShell + .NET/user32 · linux = scrot + xdotool.
3
+ // Safety: opt-in tier (config computerUse off|read|click|full) + per-app allowlist (config computerApps:
4
+ // frontmost-window check before any pointer/keyboard action) + dangerous-key blocklist + a once-per-session
5
+ // grant (tool kind "computer" always confirms once, even in full-auto). Screenshots are read via the vision
6
+ // sidecar (ctx.describeImage) so a text main model can still "see" them.
7
+ import { spawnSync } from "node:child_process";
8
+ import { existsSync, statSync } from "node:fs";
9
+ import { tmpdir } from "node:os";
10
+ import { join } from "node:path";
11
+ import { registerTool } from "./registry.js";
12
+ import { loadConfig } from "../config.js";
13
+ const RANK = { off: 0, read: 1, click: 2, full: 3 };
14
+ const ACTION_MIN = { screenshot: "read", find: "read", activate: "click", move: "click", click: "click", type: "full", key: "full" };
15
+ // dangerous combos refused even at full tier (quit / close / delete / task-switch-kill)
16
+ const KEY_BLOCK = /(?:\b(cmd|command|ctrl|control|alt|option|win|super|meta)\b.*\+.*\b(q|w|delete|del|f4|escape|esc)\b)|ctrl\+alt\+(?:delete|del|backspace)/i;
17
+ /** Whether the configured tier permits the action. Exported for tests. */
18
+ export function actionAllowed(tier, action) {
19
+ return RANK[tier] >= RANK[ACTION_MIN[action] ?? "full"];
20
+ }
21
+ /** Whether a key combo is on the dangerous blocklist. Exported for tests. */
22
+ export function keyIsBlocked(keys) {
23
+ return KEY_BLOCK.test(keys);
24
+ }
25
+ // Circuit breaker (learned from codex): bound consecutive screen-control failures so the agent can't loop
26
+ // forever on a broken setup. Reset on any success; after FAIL_LIMIT in a row, return a clear stop + how to fix.
27
+ const FAIL_LIMIT = 3;
28
+ let consecFails = 0;
29
+ export function resetComputerFails() {
30
+ consecFails = 0;
31
+ }
32
+ function ok(msg) {
33
+ consecFails = 0;
34
+ return msg;
35
+ }
36
+ function fail(msg) {
37
+ consecFails += 1;
38
+ if (consecFails >= FAIL_LIMIT) {
39
+ consecFails = 0;
40
+ return `⛔ Stopping screen control — ${FAIL_LIMIT} actions failed in a row (last: ${msg}). Most likely a missing macOS permission (Accessibility for click/type, Screen Recording for screenshots) or the target app isn't reachable. Fix that, then ask me to try again — I won't keep retrying blindly.`;
41
+ }
42
+ return `Failed: ${msg} [${consecFails}/${FAIL_LIMIT} before I stop]`;
43
+ }
44
+ function run(cmd, args) {
45
+ try {
46
+ const r = spawnSync(cmd, args, { encoding: "utf8", timeout: 15000 });
47
+ return { ok: r.status === 0, out: ((r.stdout || "") + (r.stderr || "")).trim() };
48
+ }
49
+ catch (e) {
50
+ return { ok: false, out: e?.message || "spawn failed" };
51
+ }
52
+ }
53
+ function has(cmd) {
54
+ return (process.platform === "win32" ? run("where", [cmd]) : run("which", [cmd])).ok;
55
+ }
56
+ const ps = (script) => run("powershell", ["-NoProfile", "-Command", script]);
57
+ /** Put text on the OS clipboard (so `type` can paste it — IME-safe + Unicode-safe, unlike keystroke injection). */
58
+ function setClipboard(text) {
59
+ try {
60
+ if (process.platform === "darwin")
61
+ return spawnSync("pbcopy", [], { input: text, timeout: 5000 }).status === 0;
62
+ if (process.platform === "win32")
63
+ return spawnSync("clip", [], { input: text, timeout: 5000 }).status === 0;
64
+ if (has("wl-copy"))
65
+ return spawnSync("wl-copy", [], { input: text, timeout: 5000 }).status === 0;
66
+ if (has("xclip"))
67
+ return spawnSync("xclip", ["-selection", "clipboard"], { input: text, timeout: 5000 }).status === 0;
68
+ }
69
+ catch {
70
+ /* fall through */
71
+ }
72
+ return false;
73
+ }
74
+ let seq = 0;
75
+ function tmpShot() {
76
+ seq += 1;
77
+ return join(tmpdir(), `hara-screen-${process.pid}-${Date.now()}-${seq}.png`);
78
+ }
79
+ function screenshot() {
80
+ const out = tmpShot();
81
+ if (process.platform === "darwin") {
82
+ if (!run("screencapture", ["-x", out]).ok)
83
+ return { error: "screencapture failed (grant Screen Recording permission)" };
84
+ }
85
+ else if (process.platform === "linux") {
86
+ if (has("scrot"))
87
+ run("scrot", ["-o", out]);
88
+ else if (has("import"))
89
+ run("import", ["-window", "root", out]);
90
+ else if (has("grim"))
91
+ run("grim", [out]);
92
+ else
93
+ return { error: "no screenshot tool — install scrot / imagemagick / grim" };
94
+ }
95
+ else if (process.platform === "win32") {
96
+ const script = `Add-Type -AssemblyName System.Windows.Forms,System.Drawing; $b=[System.Windows.Forms.Screen]::PrimaryScreen.Bounds; $bmp=New-Object System.Drawing.Bitmap($b.Width,$b.Height); $g=[System.Drawing.Graphics]::FromImage($bmp); $g.CopyFromScreen($b.Location,[System.Drawing.Point]::Empty,$b.Size); $bmp.Save(${JSON.stringify(out)})`;
97
+ if (!ps(script).ok)
98
+ return { error: "PowerShell screenshot failed" };
99
+ }
100
+ else {
101
+ return { error: `unsupported platform ${process.platform}` };
102
+ }
103
+ try {
104
+ if (!existsSync(out) || statSync(out).size === 0)
105
+ return { error: "screenshot produced no file" };
106
+ }
107
+ catch {
108
+ return { error: "screenshot produced no file" };
109
+ }
110
+ return { path: out };
111
+ }
112
+ /** Bring an app to the foreground so screenshots/clicks land on IT, not the terminal hara runs in. */
113
+ function activateApp(app) {
114
+ if (process.platform === "darwin") {
115
+ // `open -a` reliably launches+foregrounds; `osascript … activate` often leaves another window on top.
116
+ const r = run("open", ["-a", app]);
117
+ return { ok: r.ok, msg: r.ok ? `activated ${app}` : r.out || `couldn't activate ${app}` };
118
+ }
119
+ if (process.platform === "win32") {
120
+ const r = ps(`(New-Object -ComObject WScript.Shell).AppActivate(${JSON.stringify(app)})`);
121
+ return { ok: r.ok, msg: r.ok ? `activated ${app}` : r.out || `couldn't activate ${app}` };
122
+ }
123
+ if (process.platform === "linux") {
124
+ const r = has("wmctrl") ? run("wmctrl", ["-a", app]) : run("xdotool", ["search", "--name", app, "windowactivate"]);
125
+ return { ok: r.ok, msg: r.ok ? `activated ${app}` : r.out || `couldn't activate ${app} (need wmctrl/xdotool)` };
126
+ }
127
+ return { ok: false, msg: `activate unsupported on ${process.platform}` };
128
+ }
129
+ /** Logical screen size in the coordinate space the click backends use (points on mac, pixels on win/linux).
130
+ * Grounding returns 0..1 fractions, so click = fraction × this. null if undetectable. */
131
+ function screenSize() {
132
+ try {
133
+ if (process.platform === "darwin") {
134
+ const r = run("osascript", ["-e", 'tell application "Finder" to get bounds of window of desktop']);
135
+ const n = r.out.match(/-?\d+/g);
136
+ if (n && n.length >= 4)
137
+ return { w: Number(n[2]), h: Number(n[3]) };
138
+ }
139
+ else if (process.platform === "linux") {
140
+ const [w, h] = run("xdotool", ["getdisplaygeometry"]).out.trim().split(/\s+/).map(Number);
141
+ if (w && h)
142
+ return { w, h };
143
+ }
144
+ else if (process.platform === "win32") {
145
+ const [w, h] = ps('Add-Type -AssemblyName System.Windows.Forms; $b=[System.Windows.Forms.Screen]::PrimaryScreen.Bounds; "$($b.Width) $($b.Height)"').out.trim().split(/\s+/).map(Number);
146
+ if (w && h)
147
+ return { w, h };
148
+ }
149
+ }
150
+ catch {
151
+ /* fall through */
152
+ }
153
+ return null;
154
+ }
155
+ /** Name of the frontmost application/window (for the allowlist check). "" if undetectable. */
156
+ function frontmostApp() {
157
+ if (process.platform === "darwin") {
158
+ const r = run("osascript", ["-e", 'tell application "System Events" to get name of first application process whose frontmost is true']);
159
+ return r.ok ? r.out : "";
160
+ }
161
+ if (process.platform === "linux") {
162
+ const r = run("xdotool", ["getactivewindow", "getwindowclassname"]);
163
+ return r.ok ? r.out : "";
164
+ }
165
+ if (process.platform === "win32") {
166
+ const script = `Add-Type @"
167
+ using System;using System.Runtime.InteropServices;public class Hw{[DllImport("user32.dll")]public static extern IntPtr GetForegroundWindow();[DllImport("user32.dll")]public static extern int GetWindowThreadProcessId(IntPtr h,out int p);}
168
+ "@; $p=0;[void][Hw]::GetWindowThreadProcessId([Hw]::GetForegroundWindow(),[ref]$p);(Get-Process -Id $p).ProcessName`;
169
+ const r = ps(script);
170
+ return r.ok ? r.out : "";
171
+ }
172
+ return "";
173
+ }
174
+ function pointerOrKeyboard(action, input) {
175
+ const x = Math.round(Number(input.x));
176
+ const y = Math.round(Number(input.y));
177
+ const mac = process.platform === "darwin";
178
+ const lin = process.platform === "linux";
179
+ const win = process.platform === "win32";
180
+ if (action === "click" || action === "move") {
181
+ if (!Number.isFinite(x) || !Number.isFinite(y))
182
+ return { ok: false, msg: `${action} needs x,y` };
183
+ if (mac) {
184
+ if (!has("cliclick"))
185
+ return { ok: false, msg: "cliclick not found — install with `brew install cliclick`" };
186
+ const r = run("cliclick", [`${action === "click" ? "c" : "m"}:${x},${y}`]);
187
+ return { ok: r.ok, msg: r.ok ? `${action} at ${x},${y}` : r.out };
188
+ }
189
+ if (lin) {
190
+ if (!has("xdotool"))
191
+ return { ok: false, msg: "xdotool not found" };
192
+ const r = run("xdotool", action === "click" ? ["mousemove", `${x}`, `${y}`, "click", "1"] : ["mousemove", `${x}`, `${y}`]);
193
+ return { ok: r.ok, msg: r.ok ? `${action} at ${x},${y}` : r.out };
194
+ }
195
+ if (win) {
196
+ const move = `Add-Type -AssemblyName System.Windows.Forms;[System.Windows.Forms.Cursor]::Position=New-Object System.Drawing.Point(${x},${y})`;
197
+ const m1 = ps(`Add-Type -AssemblyName System.Drawing;${move}`);
198
+ if (action === "click" && m1.ok) {
199
+ ps(`Add-Type @"
200
+ using System;using System.Runtime.InteropServices;public class Ms{[DllImport("user32.dll")]public static extern void mouse_event(int f,int x,int y,int d,int e);}
201
+ "@; [Ms]::mouse_event(0x2,0,0,0,0);[Ms]::mouse_event(0x4,0,0,0,0)`);
202
+ }
203
+ return { ok: m1.ok, msg: m1.ok ? `${action} at ${x},${y}` : m1.out };
204
+ }
205
+ }
206
+ if (action === "type") {
207
+ const text = String(input.text ?? "");
208
+ if (!text)
209
+ return { ok: false, msg: "type needs text" };
210
+ // IME-safe path: set the clipboard and paste. Keystroke injection (below) is intercepted/garbled by a
211
+ // CJK input method and can't enter Chinese/emoji reliably; pasting is immune and Unicode-safe.
212
+ if (setClipboard(text)) {
213
+ if (mac && has("cliclick")) {
214
+ const r = run("cliclick", ["kd:cmd", "t:v", "ku:cmd"]); // Cmd+V
215
+ if (r.ok)
216
+ return { ok: true, msg: `pasted ${text.length} chars` };
217
+ }
218
+ else if (lin && has("xdotool")) {
219
+ const r = run("xdotool", ["key", "ctrl+v"]);
220
+ if (r.ok)
221
+ return { ok: true, msg: `pasted ${text.length} chars` };
222
+ }
223
+ else if (win) {
224
+ const r = ps("Add-Type -AssemblyName System.Windows.Forms;[System.Windows.Forms.SendKeys]::SendWait('^v')");
225
+ if (r.ok)
226
+ return { ok: true, msg: `pasted ${text.length} chars` };
227
+ }
228
+ }
229
+ // Fallback: keystroke injection (fine for ASCII when no IME is active).
230
+ if (mac) {
231
+ if (!has("cliclick"))
232
+ return { ok: false, msg: "cliclick not found — install with `brew install cliclick`" };
233
+ const r = run("cliclick", [`t:${text}`]);
234
+ return { ok: r.ok, msg: r.ok ? `typed ${text.length} chars (keystroke)` : r.out };
235
+ }
236
+ if (lin) {
237
+ if (!has("xdotool"))
238
+ return { ok: false, msg: "xdotool not found" };
239
+ const r = run("xdotool", ["type", "--clearmodifiers", text]);
240
+ return { ok: r.ok, msg: r.ok ? `typed ${text.length} chars (keystroke)` : r.out };
241
+ }
242
+ if (win) {
243
+ const r = ps(`Add-Type -AssemblyName System.Windows.Forms;[System.Windows.Forms.SendKeys]::SendWait(${JSON.stringify(text)})`);
244
+ return { ok: r.ok, msg: r.ok ? `typed ${text.length} chars (keystroke)` : r.out };
245
+ }
246
+ }
247
+ if (action === "key") {
248
+ const keys = String(input.keys ?? "");
249
+ if (!keys)
250
+ return { ok: false, msg: "key needs a key/combo" };
251
+ if (keyIsBlocked(keys))
252
+ return { ok: false, msg: `refused dangerous key combo: ${keys}` };
253
+ if (mac) {
254
+ if (!has("cliclick"))
255
+ return { ok: false, msg: "cliclick not found — install with `brew install cliclick`" };
256
+ const r = run("cliclick", [`kp:${keys}`]);
257
+ return { ok: r.ok, msg: r.ok ? `pressed ${keys}` : r.out };
258
+ }
259
+ if (lin) {
260
+ if (!has("xdotool"))
261
+ return { ok: false, msg: "xdotool not found" };
262
+ const r = run("xdotool", ["key", keys]);
263
+ return { ok: r.ok, msg: r.ok ? `pressed ${keys}` : r.out };
264
+ }
265
+ if (win) {
266
+ const r = ps(`Add-Type -AssemblyName System.Windows.Forms;[System.Windows.Forms.SendKeys]::SendWait(${JSON.stringify(keys)})`);
267
+ return { ok: r.ok, msg: r.ok ? `pressed ${keys}` : r.out };
268
+ }
269
+ }
270
+ return { ok: false, msg: `unknown or unsupported action '${action}' on ${process.platform}` };
271
+ }
272
+ /** Per-OS backend availability — for `hara doctor`. */
273
+ export function computerBackends() {
274
+ if (process.platform === "darwin")
275
+ return `screencapture ✓ · cliclick ${has("cliclick") ? "✓" : "✗ (brew install cliclick)"}`;
276
+ if (process.platform === "linux")
277
+ return `scrot ${has("scrot") ? "✓" : "✗"} · xdotool ${has("xdotool") ? "✓" : "✗"}`;
278
+ if (process.platform === "win32")
279
+ return "PowerShell (built-in)";
280
+ return `unsupported (${process.platform})`;
281
+ }
282
+ registerTool({
283
+ name: "computer",
284
+ description: "Control the screen to operate desktop software (not just the browser). ALWAYS `activate` the target app " +
285
+ "FIRST (e.g. activate WeChat) — otherwise screenshots/clicks hit the terminal hara runs in, not the app. " +
286
+ "Then prefer grounding over guessing pixels: pass `target` (e.g. 'the Send button') to click/move and it's " +
287
+ "located by a vision model; or `find` to just get coordinates. Workflow: activate → screenshot → click a " +
288
+ "target → re-screenshot to verify. When typing, type the ACTUAL text — never placeholders. Opt-in and " +
289
+ "permission-gated (tier + per-app allowlist).",
290
+ input_schema: {
291
+ type: "object",
292
+ properties: {
293
+ action: { type: "string", enum: ["screenshot", "activate", "find", "click", "move", "type", "key"] },
294
+ app: { type: "string", description: "app to bring to the foreground (activate) — e.g. 'WeChat'. Do this BEFORE screenshot/click so they hit the app, not the terminal." },
295
+ target: { type: "string", description: "describe a UI element to locate (find) or click/move to — e.g. 'the Send button'. Preferred over x,y." },
296
+ x: { type: "number", description: "x pixel (click/move; or use `target`)" },
297
+ y: { type: "number", description: "y pixel (click/move; or use `target`)" },
298
+ text: { type: "string", description: "text to type (type)" },
299
+ keys: { type: "string", description: "key or combo, e.g. 'return', 'cmd+c' (key)" },
300
+ focus: { type: "string", description: "screenshot only: what to look for — focuses the read" },
301
+ },
302
+ required: ["action"],
303
+ },
304
+ kind: "computer",
305
+ async run(input, ctx) {
306
+ const cfg = loadConfig();
307
+ const tier = cfg.computerUse;
308
+ if (tier === "off")
309
+ return "Screen control is off. Enable it: `hara config set computerUse read|click|full` (and `hara config set computerApps \"App Name, …\"` for the click/type allowlist).";
310
+ const action = String(input.action ?? "");
311
+ if (!actionAllowed(tier, action))
312
+ return `'${action}' needs a higher tier (current computerUse=${tier}). Raise it with \`hara config set computerUse …\`.`;
313
+ // Bring the target app to the foreground first — without this, clicks land on the terminal hara runs in.
314
+ if (action === "activate") {
315
+ const app = String(input.app ?? input.target ?? "");
316
+ if (!app)
317
+ return "activate needs an `app` name (e.g. 'WeChat').";
318
+ if (!cfg.computerApps.some((a) => app.toLowerCase().includes(a.toLowerCase()) || a.toLowerCase().includes(app.toLowerCase())))
319
+ return `Refused: "${app}" isn't in your allowlist (${cfg.computerApps.join(", ") || "empty"}). Add it: \`hara config set computerApps "${app}"\`.`;
320
+ const r = activateApp(app);
321
+ return r.ok ? ok(`✓ ${r.msg} — now screenshot/find/click to act on it`) : fail(r.msg);
322
+ }
323
+ if (action !== "screenshot" && action !== "find") {
324
+ // per-app allowlist: only act when an allowlisted app is frontmost (the key guard against wrong-window clicks)
325
+ if (!cfg.computerApps.length)
326
+ return "No apps allowlisted — set `hara config set computerApps \"App Name, …\"` before clicking/typing.";
327
+ const app = frontmostApp();
328
+ const allowed = cfg.computerApps.some((a) => app.toLowerCase().includes(a.toLowerCase()) || a.toLowerCase().includes(app.toLowerCase()));
329
+ if (!allowed)
330
+ return `Refused: frontmost app "${app || "unknown"}" isn't in your allowlist (${cfg.computerApps.join(", ")}). Switch to an allowed app or update computerApps.`;
331
+ }
332
+ if (action === "screenshot") {
333
+ const s = screenshot();
334
+ if (s.error)
335
+ return fail(`screenshot — ${s.error}`);
336
+ if (ctx.describeImage) {
337
+ try {
338
+ const desc = await ctx.describeImage(s.path, input.focus ? String(input.focus) : undefined);
339
+ if (desc)
340
+ return ok(`Screenshot (read via vision):\n${desc}`);
341
+ }
342
+ catch {
343
+ /* fall through to path */
344
+ }
345
+ }
346
+ return ok(`Screenshot saved to ${s.path}. Configure a vision model so I can read it: \`hara config set visionModel <model>\`.`);
347
+ }
348
+ // Grounding: locate a described element and turn it into screen coordinates (more reliable than guessing
349
+ // pixels from a text description). Used for `find`, and for click/move when given a `target` and no x,y.
350
+ const needsLocate = action === "find" || ((action === "click" || action === "move") && input.target != null && (input.x == null || input.y == null));
351
+ if (needsLocate) {
352
+ const target = String(input.target ?? "");
353
+ if (!target)
354
+ return action === "find" ? "find needs a `target` (what to locate)." : "click/move needs `x,y` or a `target`.";
355
+ if (!ctx.locate)
356
+ return "Grounding needs a vision model that can see images — set one: `hara config set visionModel <model>`.";
357
+ const s = screenshot();
358
+ if (s.error)
359
+ return fail(`screenshot — ${s.error}`);
360
+ const loc = await ctx.locate(s.path, target);
361
+ if (!loc)
362
+ return fail(`couldn't locate "${target}" on screen — try a screenshot first, or rephrase the target`);
363
+ const size = screenSize();
364
+ if (!size)
365
+ return fail(`located "${target}" but couldn't read the screen size to convert coordinates`);
366
+ const gx = Math.round(loc.x * size.w);
367
+ const gy = Math.round(loc.y * size.h);
368
+ if (action === "find")
369
+ return ok(`"${target}" is at ~${gx},${gy} (${Math.round(loc.x * 100)}% across, ${Math.round(loc.y * 100)}% down).`);
370
+ input.x = gx;
371
+ input.y = gy;
372
+ }
373
+ const r = pointerOrKeyboard(action, input);
374
+ return r.ok ? ok(`✓ ${r.msg}${needsLocate ? ` (located "${input.target}")` : ""}`) : fail(r.msg);
375
+ },
376
+ });
@@ -0,0 +1,62 @@
1
+ import { readFile, writeFile } from "node:fs/promises";
2
+ import { isAbsolute, resolve } from "node:path";
3
+ import { registerTool } from "./registry.js";
4
+ import { nearestPaths } from "../fs-walk.js";
5
+ import { emitDiff } from "../diff.js";
6
+ import { applyEdits } from "./apply-core.js";
7
+ import { recordEdit } from "../undo.js";
8
+ registerTool({
9
+ name: "edit_file",
10
+ description: "Edit an existing file by replacing exact strings. Provide a single `old_string`/`new_string`, " +
11
+ "or `edits` (an array of {old_string,new_string,replace_all?}) applied in order. Each `old_string` " +
12
+ "must match exactly and appear once (include surrounding context) unless `replace_all` is true. " +
13
+ "Quote variants (straight/curly) are matched leniently. Use write_file to create a new file, or " +
14
+ "apply_patch to change several files at once.",
15
+ input_schema: {
16
+ type: "object",
17
+ properties: {
18
+ path: { type: "string" },
19
+ old_string: { type: "string", description: "exact text to replace (verbatim, incl. whitespace)" },
20
+ new_string: { type: "string", description: "replacement text" },
21
+ replace_all: { type: "boolean", description: "replace every occurrence (default false)" },
22
+ edits: {
23
+ type: "array",
24
+ description: "multiple edits applied in sequence (alternative to a single old/new)",
25
+ items: {
26
+ type: "object",
27
+ properties: {
28
+ old_string: { type: "string" },
29
+ new_string: { type: "string" },
30
+ replace_all: { type: "boolean" },
31
+ },
32
+ required: ["old_string", "new_string"],
33
+ },
34
+ },
35
+ },
36
+ required: ["path"],
37
+ },
38
+ kind: "edit",
39
+ async run(input, ctx) {
40
+ const p = isAbsolute(input.path) ? input.path : resolve(ctx.cwd, input.path);
41
+ const edits = Array.isArray(input.edits) && input.edits.length
42
+ ? input.edits
43
+ : [{ old_string: input.old_string, new_string: input.new_string, replace_all: input.replace_all }];
44
+ let text;
45
+ try {
46
+ text = await readFile(p, "utf8");
47
+ }
48
+ catch {
49
+ const near = nearestPaths(ctx.cwd, input.path);
50
+ return `Error: cannot read ${input.path} (use write_file to create a new file).` + (near.length ? ` Did you mean: ${near.join(", ")}?` : "");
51
+ }
52
+ const res = applyEdits(text, edits);
53
+ if ("error" in res)
54
+ return `Error: ${res.error} in ${input.path}. No changes written.`;
55
+ await writeFile(p, res.text, "utf8");
56
+ emitDiff(input.path, text, res.text, ctx.ui);
57
+ recordEdit([{ path: input.path, absPath: p, before: text }]);
58
+ const note = res.fuzzy ? " (quote-normalized)" : "";
59
+ const plural = (n, w) => `${n} ${w}${n === 1 ? "" : "s"}`;
60
+ return `Edited ${input.path}: ${plural(edits.length, "edit")}, ${plural(res.total, "replacement")}${note}.`;
61
+ },
62
+ });