@nanhara/hara 0.33.0 → 0.53.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +216 -1
- package/README.md +15 -4
- package/dist/agent/loop.js +16 -1
- package/dist/config.js +4 -2
- package/dist/hooks.js +64 -0
- package/dist/index.js +331 -77
- package/dist/notify.js +42 -0
- package/dist/org/planner.js +19 -0
- package/dist/plugins/plugins.js +14 -0
- package/dist/providers/anthropic.js +21 -11
- package/dist/search/semindex.js +62 -11
- package/dist/session/store.js +14 -0
- package/dist/tools/computer.js +156 -16
- package/dist/tools/todo.js +51 -0
- package/dist/tools/web.js +97 -0
- package/dist/tui/App.js +55 -7
- package/dist/tui/InputBox.js +2 -2
- package/dist/vision.js +52 -3
- package/package.json +3 -2
- package/plugins/browser/.hara-plugin/plugin.json +9 -0
- package/plugins/browser/skills/web/SKILL.md +27 -0
- package/plugins/chrome/.hara-plugin/plugin.json +9 -0
- package/plugins/chrome/skills/chrome/SKILL.md +26 -0
package/dist/org/planner.js
CHANGED
|
@@ -91,6 +91,25 @@ export function topoOrder(atoms) {
|
|
|
91
91
|
return { error: "plan has a dependency cycle — cannot sequence" };
|
|
92
92
|
return { ok: order };
|
|
93
93
|
}
|
|
94
|
+
/** Group atoms into dependency "waves": every atom in a wave depends only on atoms in EARLIER waves, so a
|
|
95
|
+
* wave's atoms are mutually independent and may run concurrently. Preserves atom order; errors on a cycle. */
|
|
96
|
+
export function topoWaves(atoms) {
|
|
97
|
+
const byId = new Map(atoms.map((a) => [a.id, a]));
|
|
98
|
+
const remaining = new Map(atoms.map((a) => [a.id, a]));
|
|
99
|
+
const done = new Set();
|
|
100
|
+
const waves = [];
|
|
101
|
+
while (remaining.size) {
|
|
102
|
+
const wave = [...remaining.values()].filter((a) => a.deps.every((d) => !byId.has(d) || done.has(d)));
|
|
103
|
+
if (!wave.length)
|
|
104
|
+
return { error: "plan has a dependency cycle — cannot sequence" };
|
|
105
|
+
for (const a of wave)
|
|
106
|
+
remaining.delete(a.id);
|
|
107
|
+
for (const a of wave)
|
|
108
|
+
done.add(a.id);
|
|
109
|
+
waves.push(wave);
|
|
110
|
+
}
|
|
111
|
+
return { ok: waves };
|
|
112
|
+
}
|
|
94
113
|
/** Prompt to execute a single atom in the context of the overall plan. */
|
|
95
114
|
export function atomPrompt(atom, plan, done) {
|
|
96
115
|
const priors = done.length ? `Already completed: ${done.map((a) => a.title).join("; ")}.\n` : "";
|
package/dist/plugins/plugins.js
CHANGED
|
@@ -66,6 +66,20 @@ export function pluginMcpServers() {
|
|
|
66
66
|
Object.assign(out, p.manifest.mcpServers ?? {});
|
|
67
67
|
return out;
|
|
68
68
|
}
|
|
69
|
+
/** Lifecycle hooks contributed by enabled plugins (appended after user-config hooks). */
|
|
70
|
+
export function pluginHooks() {
|
|
71
|
+
const out = { PreToolUse: [], PostToolUse: [] };
|
|
72
|
+
for (const p of enabledPlugins()) {
|
|
73
|
+
const h = p.manifest.hooks;
|
|
74
|
+
if (!h || typeof h !== "object")
|
|
75
|
+
continue;
|
|
76
|
+
if (Array.isArray(h.PreToolUse))
|
|
77
|
+
out.PreToolUse.push(...h.PreToolUse);
|
|
78
|
+
if (Array.isArray(h.PostToolUse))
|
|
79
|
+
out.PostToolUse.push(...h.PostToolUse);
|
|
80
|
+
}
|
|
81
|
+
return out;
|
|
82
|
+
}
|
|
69
83
|
/** Install a plugin from `file:<path>`, `github:<owner/repo>`, or `git:<url>` into ~/.hara/plugins/<name>. */
|
|
70
84
|
export function installPlugin(source) {
|
|
71
85
|
mkdirSync(pluginsDir(), { recursive: true });
|
|
@@ -2,6 +2,19 @@ import Anthropic from "@anthropic-ai/sdk";
|
|
|
2
2
|
import { imageToBase64 } from "../images.js";
|
|
3
3
|
export function toAnthropic(history) {
|
|
4
4
|
const msgs = [];
|
|
5
|
+
// Append a user message, merging into the previous one if it's also `user` — Anthropic requires
|
|
6
|
+
// alternating roles, and tool-results map to a user message, so a mid-turn-injected user message
|
|
7
|
+
// (type-ahead steering) lands right after one. Merging keeps the request valid; dormant otherwise.
|
|
8
|
+
const pushUser = (content) => {
|
|
9
|
+
const last = msgs[msgs.length - 1];
|
|
10
|
+
if (last && last.role === "user") {
|
|
11
|
+
const toBlocks = (c) => typeof c === "string" ? [{ type: "text", text: c }] : c;
|
|
12
|
+
last.content = [...toBlocks(last.content), ...toBlocks(content)];
|
|
13
|
+
}
|
|
14
|
+
else {
|
|
15
|
+
msgs.push({ role: "user", content });
|
|
16
|
+
}
|
|
17
|
+
};
|
|
5
18
|
for (const m of history) {
|
|
6
19
|
if (m.role === "user") {
|
|
7
20
|
if (m.images?.length) {
|
|
@@ -13,10 +26,10 @@ export function toAnthropic(history) {
|
|
|
13
26
|
if (data)
|
|
14
27
|
blocks.push({ type: "image", source: { type: "base64", media_type: img.mediaType, data } });
|
|
15
28
|
}
|
|
16
|
-
|
|
29
|
+
pushUser(blocks.length ? blocks : m.content);
|
|
17
30
|
}
|
|
18
31
|
else {
|
|
19
|
-
|
|
32
|
+
pushUser(m.content);
|
|
20
33
|
}
|
|
21
34
|
}
|
|
22
35
|
else if (m.role === "assistant") {
|
|
@@ -28,15 +41,12 @@ export function toAnthropic(history) {
|
|
|
28
41
|
msgs.push({ role: "assistant", content: content.length ? content : [{ type: "text", text: "(no output)" }] });
|
|
29
42
|
}
|
|
30
43
|
else {
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
is_error: r.isError,
|
|
38
|
-
})),
|
|
39
|
-
});
|
|
44
|
+
pushUser(m.results.map((r) => ({
|
|
45
|
+
type: "tool_result",
|
|
46
|
+
tool_use_id: r.id,
|
|
47
|
+
content: r.content,
|
|
48
|
+
is_error: r.isError,
|
|
49
|
+
})));
|
|
40
50
|
}
|
|
41
51
|
}
|
|
42
52
|
return msgs;
|
package/dist/search/semindex.js
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
// code-asset / repo / knowledge-base scale (hundreds–low-thousands of chunks); the optional zvec adapter is
|
|
3
3
|
// the scale-up path later. Markdown/code stays the SSOT; this index is a derived, rebuildable, gitignored
|
|
4
4
|
// artifact. The embedder is injected (see embed.ts) so the store + chunking are testable without a model.
|
|
5
|
-
import { readFileSync, writeFileSync, mkdirSync, existsSync } from "node:fs";
|
|
5
|
+
import { readFileSync, writeFileSync, mkdirSync, existsSync, statSync } from "node:fs";
|
|
6
6
|
import { homedir } from "node:os";
|
|
7
7
|
import { join, dirname } from "node:path";
|
|
8
8
|
import { findProjectRoot } from "../context/agents-md.js";
|
|
@@ -15,14 +15,22 @@ export function indexPath(name, cwd) {
|
|
|
15
15
|
return join(findProjectRoot(cwd), ".hara", "index", "repo.json");
|
|
16
16
|
return join(homedir(), ".hara", "index", `${name}.json`);
|
|
17
17
|
}
|
|
18
|
+
function statMtime(p) {
|
|
19
|
+
try {
|
|
20
|
+
return statSync(p).mtimeMs;
|
|
21
|
+
}
|
|
22
|
+
catch {
|
|
23
|
+
return 0;
|
|
24
|
+
}
|
|
25
|
+
}
|
|
18
26
|
/** Split a file into chunks: Markdown by `#` headings, code by ~40-line windows. Heuristic, zero-dep —
|
|
19
|
-
* also the substrate embeddings reuse. */
|
|
20
|
-
export function chunkText(text, file, source) {
|
|
27
|
+
* also the substrate embeddings reuse. `mtime` (when given) is stamped on every chunk for incremental reuse. */
|
|
28
|
+
export function chunkText(text, file, source, mtime) {
|
|
21
29
|
const out = [];
|
|
22
30
|
const push = (body, n) => {
|
|
23
31
|
const t = body.trim();
|
|
24
32
|
if (t.length >= 12)
|
|
25
|
-
out.push({ id: `${file}#${n}`, text: t.slice(0, 2000), file, source });
|
|
33
|
+
out.push({ id: `${file}#${n}`, text: t.slice(0, 2000), file, source, mtime });
|
|
26
34
|
};
|
|
27
35
|
if (/\.(md|mdx)$/i.test(file)) {
|
|
28
36
|
const parts = text.split(/^(?=#{1,6}\s)/m);
|
|
@@ -49,23 +57,66 @@ function cosine(a, b) {
|
|
|
49
57
|
}
|
|
50
58
|
return na && nb ? dot / (Math.sqrt(na) * Math.sqrt(nb)) : 0;
|
|
51
59
|
}
|
|
52
|
-
/**
|
|
60
|
+
/** Build/refresh the index. **Incremental**: files whose mtime is unchanged since the last build keep their
|
|
61
|
+
* existing vectors (no re-embed); only new/changed files are embedded, and deleted files drop out. A changed
|
|
62
|
+
* embedding model forces a full rebuild (old vectors aren't comparable). Returns counts. */
|
|
53
63
|
export async function buildIndex(name, chunks, embed, cwd, model = "embed") {
|
|
64
|
+
const p = indexPath(name, cwd);
|
|
65
|
+
// Load the previous index → reuse vectors for unchanged files.
|
|
66
|
+
const prevByFile = new Map();
|
|
67
|
+
let prevModel = "";
|
|
68
|
+
if (existsSync(p)) {
|
|
69
|
+
try {
|
|
70
|
+
const old = JSON.parse(readFileSync(p, "utf8"));
|
|
71
|
+
prevModel = old.model;
|
|
72
|
+
for (const it of old.items ?? []) {
|
|
73
|
+
const arr = prevByFile.get(it.file);
|
|
74
|
+
if (arr)
|
|
75
|
+
arr.push(it);
|
|
76
|
+
else
|
|
77
|
+
prevByFile.set(it.file, [it]);
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
catch {
|
|
81
|
+
/* corrupt index → full rebuild */
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
const sameModel = prevModel === model;
|
|
85
|
+
const byFile = new Map();
|
|
86
|
+
for (const c of chunks) {
|
|
87
|
+
const arr = byFile.get(c.file);
|
|
88
|
+
if (arr)
|
|
89
|
+
arr.push(c);
|
|
90
|
+
else
|
|
91
|
+
byFile.set(c.file, [c]);
|
|
92
|
+
}
|
|
54
93
|
const items = [];
|
|
94
|
+
const toEmbed = [];
|
|
95
|
+
let reused = 0;
|
|
96
|
+
for (const [file, fchunks] of byFile) {
|
|
97
|
+
const mtime = fchunks[0].mtime ?? 0;
|
|
98
|
+
const prev = prevByFile.get(file);
|
|
99
|
+
if (sameModel && prev?.length && mtime > 0 && prev.every((it) => it.mtime === mtime)) {
|
|
100
|
+
items.push(...prev); // file unchanged → keep its vectors
|
|
101
|
+
reused += prev.length;
|
|
102
|
+
}
|
|
103
|
+
else {
|
|
104
|
+
toEmbed.push(...fchunks);
|
|
105
|
+
}
|
|
106
|
+
}
|
|
55
107
|
const B = 64;
|
|
56
|
-
for (let i = 0; i <
|
|
57
|
-
const batch =
|
|
108
|
+
for (let i = 0; i < toEmbed.length; i += B) {
|
|
109
|
+
const batch = toEmbed.slice(i, i + B);
|
|
58
110
|
const vecs = await embed(batch.map((c) => c.text));
|
|
59
111
|
batch.forEach((c, j) => vecs[j] && items.push({ ...c, vec: vecs[j] }));
|
|
60
112
|
}
|
|
61
|
-
const p = indexPath(name, cwd);
|
|
62
113
|
const dir = dirname(p);
|
|
63
114
|
mkdirSync(dir, { recursive: true });
|
|
64
115
|
// The index is derived + rebuildable (and may embed file contents) — never let it be committed.
|
|
65
116
|
if (!existsSync(join(dir, ".gitignore")))
|
|
66
117
|
writeFileSync(join(dir, ".gitignore"), "*\n", "utf8");
|
|
67
118
|
writeFileSync(p, JSON.stringify({ model, items }), "utf8");
|
|
68
|
-
return items.length;
|
|
119
|
+
return { total: items.length, embedded: toEmbed.length, reused };
|
|
69
120
|
}
|
|
70
121
|
export function indexExists(name, cwd) {
|
|
71
122
|
return existsSync(indexPath(name, cwd));
|
|
@@ -91,7 +142,7 @@ export function collectDirChunks(dir, source) {
|
|
|
91
142
|
}
|
|
92
143
|
if (isProbablyBinary(buf))
|
|
93
144
|
continue;
|
|
94
|
-
chunks.push(...chunkText(buf.toString("utf8"), abs, source));
|
|
145
|
+
chunks.push(...chunkText(buf.toString("utf8"), abs, source, statMtime(abs)));
|
|
95
146
|
}
|
|
96
147
|
return chunks;
|
|
97
148
|
}
|
|
@@ -113,7 +164,7 @@ export function collectRepoChunks(root) {
|
|
|
113
164
|
}
|
|
114
165
|
if (isProbablyBinary(buf))
|
|
115
166
|
continue;
|
|
116
|
-
chunks.push(...chunkText(buf.toString("utf8"), rel, "repo"));
|
|
167
|
+
chunks.push(...chunkText(buf.toString("utf8"), rel, "repo", statMtime(abs)));
|
|
117
168
|
}
|
|
118
169
|
return chunks;
|
|
119
170
|
}
|
package/dist/session/store.js
CHANGED
|
@@ -56,6 +56,20 @@ export function titleFrom(history) {
|
|
|
56
56
|
const firstUser = history.find((h) => h.role === "user");
|
|
57
57
|
return deriveTitle(firstUser && firstUser.role === "user" ? firstUser.content : "");
|
|
58
58
|
}
|
|
59
|
+
/** Normalize a phrase to an ASCII kebab-case slug (lowercase, a–z0–9 + single hyphens, capped). Non-ASCII
|
|
60
|
+
* is dropped — used to clean a model-generated English session name. Returns "" if nothing ASCII remains. */
|
|
61
|
+
export function slugify(text, max = 40) {
|
|
62
|
+
return text
|
|
63
|
+
.trim()
|
|
64
|
+
.toLowerCase()
|
|
65
|
+
.replace(/[^a-z0-9\s-]/g, "")
|
|
66
|
+
.trim()
|
|
67
|
+
.replace(/\s+/g, "-")
|
|
68
|
+
.replace(/-+/g, "-")
|
|
69
|
+
.replace(/^-+/, "")
|
|
70
|
+
.slice(0, max)
|
|
71
|
+
.replace(/-+$/, "");
|
|
72
|
+
}
|
|
59
73
|
export function saveSession(meta, history) {
|
|
60
74
|
meta.updatedAt = new Date().toISOString();
|
|
61
75
|
const data = { meta, history };
|
package/dist/tools/computer.js
CHANGED
|
@@ -11,7 +11,7 @@ import { join } from "node:path";
|
|
|
11
11
|
import { registerTool } from "./registry.js";
|
|
12
12
|
import { loadConfig } from "../config.js";
|
|
13
13
|
const RANK = { off: 0, read: 1, click: 2, full: 3 };
|
|
14
|
-
const ACTION_MIN = { screenshot: "read", move: "click", click: "click", type: "full", key: "full" };
|
|
14
|
+
const ACTION_MIN = { screenshot: "read", find: "read", activate: "click", move: "click", click: "click", type: "full", key: "full" };
|
|
15
15
|
// dangerous combos refused even at full tier (quit / close / delete / task-switch-kill)
|
|
16
16
|
const KEY_BLOCK = /(?:\b(cmd|command|ctrl|control|alt|option|win|super|meta)\b.*\+.*\b(q|w|delete|del|f4|escape|esc)\b)|ctrl\+alt\+(?:delete|del|backspace)/i;
|
|
17
17
|
/** Whether the configured tier permits the action. Exported for tests. */
|
|
@@ -22,6 +22,25 @@ export function actionAllowed(tier, action) {
|
|
|
22
22
|
export function keyIsBlocked(keys) {
|
|
23
23
|
return KEY_BLOCK.test(keys);
|
|
24
24
|
}
|
|
25
|
+
// Circuit breaker (learned from codex): bound consecutive screen-control failures so the agent can't loop
|
|
26
|
+
// forever on a broken setup. Reset on any success; after FAIL_LIMIT in a row, return a clear stop + how to fix.
|
|
27
|
+
const FAIL_LIMIT = 3;
|
|
28
|
+
let consecFails = 0;
|
|
29
|
+
export function resetComputerFails() {
|
|
30
|
+
consecFails = 0;
|
|
31
|
+
}
|
|
32
|
+
function ok(msg) {
|
|
33
|
+
consecFails = 0;
|
|
34
|
+
return msg;
|
|
35
|
+
}
|
|
36
|
+
function fail(msg) {
|
|
37
|
+
consecFails += 1;
|
|
38
|
+
if (consecFails >= FAIL_LIMIT) {
|
|
39
|
+
consecFails = 0;
|
|
40
|
+
return `⛔ Stopping screen control — ${FAIL_LIMIT} actions failed in a row (last: ${msg}). Most likely a missing macOS permission (Accessibility for click/type, Screen Recording for screenshots) or the target app isn't reachable. Fix that, then ask me to try again — I won't keep retrying blindly.`;
|
|
41
|
+
}
|
|
42
|
+
return `Failed: ${msg} [${consecFails}/${FAIL_LIMIT} before I stop]`;
|
|
43
|
+
}
|
|
25
44
|
function run(cmd, args) {
|
|
26
45
|
try {
|
|
27
46
|
const r = spawnSync(cmd, args, { encoding: "utf8", timeout: 15000 });
|
|
@@ -35,6 +54,23 @@ function has(cmd) {
|
|
|
35
54
|
return (process.platform === "win32" ? run("where", [cmd]) : run("which", [cmd])).ok;
|
|
36
55
|
}
|
|
37
56
|
const ps = (script) => run("powershell", ["-NoProfile", "-Command", script]);
|
|
57
|
+
/** Put text on the OS clipboard (so `type` can paste it — IME-safe + Unicode-safe, unlike keystroke injection). */
|
|
58
|
+
function setClipboard(text) {
|
|
59
|
+
try {
|
|
60
|
+
if (process.platform === "darwin")
|
|
61
|
+
return spawnSync("pbcopy", [], { input: text, timeout: 5000 }).status === 0;
|
|
62
|
+
if (process.platform === "win32")
|
|
63
|
+
return spawnSync("clip", [], { input: text, timeout: 5000 }).status === 0;
|
|
64
|
+
if (has("wl-copy"))
|
|
65
|
+
return spawnSync("wl-copy", [], { input: text, timeout: 5000 }).status === 0;
|
|
66
|
+
if (has("xclip"))
|
|
67
|
+
return spawnSync("xclip", ["-selection", "clipboard"], { input: text, timeout: 5000 }).status === 0;
|
|
68
|
+
}
|
|
69
|
+
catch {
|
|
70
|
+
/* fall through */
|
|
71
|
+
}
|
|
72
|
+
return false;
|
|
73
|
+
}
|
|
38
74
|
let seq = 0;
|
|
39
75
|
function tmpShot() {
|
|
40
76
|
seq += 1;
|
|
@@ -73,6 +109,49 @@ function screenshot() {
|
|
|
73
109
|
}
|
|
74
110
|
return { path: out };
|
|
75
111
|
}
|
|
112
|
+
/** Bring an app to the foreground so screenshots/clicks land on IT, not the terminal hara runs in. */
|
|
113
|
+
function activateApp(app) {
|
|
114
|
+
if (process.platform === "darwin") {
|
|
115
|
+
// `open -a` reliably launches+foregrounds; `osascript … activate` often leaves another window on top.
|
|
116
|
+
const r = run("open", ["-a", app]);
|
|
117
|
+
return { ok: r.ok, msg: r.ok ? `activated ${app}` : r.out || `couldn't activate ${app}` };
|
|
118
|
+
}
|
|
119
|
+
if (process.platform === "win32") {
|
|
120
|
+
const r = ps(`(New-Object -ComObject WScript.Shell).AppActivate(${JSON.stringify(app)})`);
|
|
121
|
+
return { ok: r.ok, msg: r.ok ? `activated ${app}` : r.out || `couldn't activate ${app}` };
|
|
122
|
+
}
|
|
123
|
+
if (process.platform === "linux") {
|
|
124
|
+
const r = has("wmctrl") ? run("wmctrl", ["-a", app]) : run("xdotool", ["search", "--name", app, "windowactivate"]);
|
|
125
|
+
return { ok: r.ok, msg: r.ok ? `activated ${app}` : r.out || `couldn't activate ${app} (need wmctrl/xdotool)` };
|
|
126
|
+
}
|
|
127
|
+
return { ok: false, msg: `activate unsupported on ${process.platform}` };
|
|
128
|
+
}
|
|
129
|
+
/** Logical screen size in the coordinate space the click backends use (points on mac, pixels on win/linux).
|
|
130
|
+
* Grounding returns 0..1 fractions, so click = fraction × this. null if undetectable. */
|
|
131
|
+
function screenSize() {
|
|
132
|
+
try {
|
|
133
|
+
if (process.platform === "darwin") {
|
|
134
|
+
const r = run("osascript", ["-e", 'tell application "Finder" to get bounds of window of desktop']);
|
|
135
|
+
const n = r.out.match(/-?\d+/g);
|
|
136
|
+
if (n && n.length >= 4)
|
|
137
|
+
return { w: Number(n[2]), h: Number(n[3]) };
|
|
138
|
+
}
|
|
139
|
+
else if (process.platform === "linux") {
|
|
140
|
+
const [w, h] = run("xdotool", ["getdisplaygeometry"]).out.trim().split(/\s+/).map(Number);
|
|
141
|
+
if (w && h)
|
|
142
|
+
return { w, h };
|
|
143
|
+
}
|
|
144
|
+
else if (process.platform === "win32") {
|
|
145
|
+
const [w, h] = ps('Add-Type -AssemblyName System.Windows.Forms; $b=[System.Windows.Forms.Screen]::PrimaryScreen.Bounds; "$($b.Width) $($b.Height)"').out.trim().split(/\s+/).map(Number);
|
|
146
|
+
if (w && h)
|
|
147
|
+
return { w, h };
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
catch {
|
|
151
|
+
/* fall through */
|
|
152
|
+
}
|
|
153
|
+
return null;
|
|
154
|
+
}
|
|
76
155
|
/** Name of the frontmost application/window (for the allowlist check). "" if undetectable. */
|
|
77
156
|
function frontmostApp() {
|
|
78
157
|
if (process.platform === "darwin") {
|
|
@@ -128,21 +207,41 @@ using System;using System.Runtime.InteropServices;public class Ms{[DllImport("us
|
|
|
128
207
|
const text = String(input.text ?? "");
|
|
129
208
|
if (!text)
|
|
130
209
|
return { ok: false, msg: "type needs text" };
|
|
210
|
+
// IME-safe path: set the clipboard and paste. Keystroke injection (below) is intercepted/garbled by a
|
|
211
|
+
// CJK input method and can't enter Chinese/emoji reliably; pasting is immune and Unicode-safe.
|
|
212
|
+
if (setClipboard(text)) {
|
|
213
|
+
if (mac && has("cliclick")) {
|
|
214
|
+
const r = run("cliclick", ["kd:cmd", "t:v", "ku:cmd"]); // Cmd+V
|
|
215
|
+
if (r.ok)
|
|
216
|
+
return { ok: true, msg: `pasted ${text.length} chars` };
|
|
217
|
+
}
|
|
218
|
+
else if (lin && has("xdotool")) {
|
|
219
|
+
const r = run("xdotool", ["key", "ctrl+v"]);
|
|
220
|
+
if (r.ok)
|
|
221
|
+
return { ok: true, msg: `pasted ${text.length} chars` };
|
|
222
|
+
}
|
|
223
|
+
else if (win) {
|
|
224
|
+
const r = ps("Add-Type -AssemblyName System.Windows.Forms;[System.Windows.Forms.SendKeys]::SendWait('^v')");
|
|
225
|
+
if (r.ok)
|
|
226
|
+
return { ok: true, msg: `pasted ${text.length} chars` };
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
// Fallback: keystroke injection (fine for ASCII when no IME is active).
|
|
131
230
|
if (mac) {
|
|
132
231
|
if (!has("cliclick"))
|
|
133
232
|
return { ok: false, msg: "cliclick not found — install with `brew install cliclick`" };
|
|
134
233
|
const r = run("cliclick", [`t:${text}`]);
|
|
135
|
-
return { ok: r.ok, msg: r.ok ? `typed ${text.length} chars` : r.out };
|
|
234
|
+
return { ok: r.ok, msg: r.ok ? `typed ${text.length} chars (keystroke)` : r.out };
|
|
136
235
|
}
|
|
137
236
|
if (lin) {
|
|
138
237
|
if (!has("xdotool"))
|
|
139
238
|
return { ok: false, msg: "xdotool not found" };
|
|
140
239
|
const r = run("xdotool", ["type", "--clearmodifiers", text]);
|
|
141
|
-
return { ok: r.ok, msg: r.ok ? `typed ${text.length} chars` : r.out };
|
|
240
|
+
return { ok: r.ok, msg: r.ok ? `typed ${text.length} chars (keystroke)` : r.out };
|
|
142
241
|
}
|
|
143
242
|
if (win) {
|
|
144
243
|
const r = ps(`Add-Type -AssemblyName System.Windows.Forms;[System.Windows.Forms.SendKeys]::SendWait(${JSON.stringify(text)})`);
|
|
145
|
-
return { ok: r.ok, msg: r.ok ? `typed ${text.length} chars` : r.out };
|
|
244
|
+
return { ok: r.ok, msg: r.ok ? `typed ${text.length} chars (keystroke)` : r.out };
|
|
146
245
|
}
|
|
147
246
|
}
|
|
148
247
|
if (action === "key") {
|
|
@@ -182,17 +281,23 @@ export function computerBackends() {
|
|
|
182
281
|
}
|
|
183
282
|
registerTool({
|
|
184
283
|
name: "computer",
|
|
185
|
-
description: "Control the screen to operate desktop software (not just the browser)
|
|
186
|
-
"
|
|
187
|
-
"
|
|
284
|
+
description: "Control the screen to operate desktop software (not just the browser). ALWAYS `activate` the target app " +
|
|
285
|
+
"FIRST (e.g. activate WeChat) — otherwise screenshots/clicks hit the terminal hara runs in, not the app. " +
|
|
286
|
+
"Then prefer grounding over guessing pixels: pass `target` (e.g. 'the Send button') to click/move and it's " +
|
|
287
|
+
"located by a vision model; or `find` to just get coordinates. Workflow: activate → screenshot → click a " +
|
|
288
|
+
"target → re-screenshot to verify. When typing, type the ACTUAL text — never placeholders. Opt-in and " +
|
|
289
|
+
"permission-gated (tier + per-app allowlist).",
|
|
188
290
|
input_schema: {
|
|
189
291
|
type: "object",
|
|
190
292
|
properties: {
|
|
191
|
-
action: { type: "string", enum: ["screenshot", "click", "move", "type", "key"] },
|
|
192
|
-
|
|
193
|
-
|
|
293
|
+
action: { type: "string", enum: ["screenshot", "activate", "find", "click", "move", "type", "key"] },
|
|
294
|
+
app: { type: "string", description: "app to bring to the foreground (activate) — e.g. 'WeChat'. Do this BEFORE screenshot/click so they hit the app, not the terminal." },
|
|
295
|
+
target: { type: "string", description: "describe a UI element to locate (find) or click/move to — e.g. 'the Send button'. Preferred over x,y." },
|
|
296
|
+
x: { type: "number", description: "x pixel (click/move; or use `target`)" },
|
|
297
|
+
y: { type: "number", description: "y pixel (click/move; or use `target`)" },
|
|
194
298
|
text: { type: "string", description: "text to type (type)" },
|
|
195
299
|
keys: { type: "string", description: "key or combo, e.g. 'return', 'cmd+c' (key)" },
|
|
300
|
+
focus: { type: "string", description: "screenshot only: what to look for — focuses the read" },
|
|
196
301
|
},
|
|
197
302
|
required: ["action"],
|
|
198
303
|
},
|
|
@@ -205,7 +310,17 @@ registerTool({
|
|
|
205
310
|
const action = String(input.action ?? "");
|
|
206
311
|
if (!actionAllowed(tier, action))
|
|
207
312
|
return `'${action}' needs a higher tier (current computerUse=${tier}). Raise it with \`hara config set computerUse …\`.`;
|
|
208
|
-
|
|
313
|
+
// Bring the target app to the foreground first — without this, clicks land on the terminal hara runs in.
|
|
314
|
+
if (action === "activate") {
|
|
315
|
+
const app = String(input.app ?? input.target ?? "");
|
|
316
|
+
if (!app)
|
|
317
|
+
return "activate needs an `app` name (e.g. 'WeChat').";
|
|
318
|
+
if (!cfg.computerApps.some((a) => app.toLowerCase().includes(a.toLowerCase()) || a.toLowerCase().includes(app.toLowerCase())))
|
|
319
|
+
return `Refused: "${app}" isn't in your allowlist (${cfg.computerApps.join(", ") || "empty"}). Add it: \`hara config set computerApps "${app}"\`.`;
|
|
320
|
+
const r = activateApp(app);
|
|
321
|
+
return r.ok ? ok(`✓ ${r.msg} — now screenshot/find/click to act on it`) : fail(r.msg);
|
|
322
|
+
}
|
|
323
|
+
if (action !== "screenshot" && action !== "find") {
|
|
209
324
|
// per-app allowlist: only act when an allowlisted app is frontmost (the key guard against wrong-window clicks)
|
|
210
325
|
if (!cfg.computerApps.length)
|
|
211
326
|
return "No apps allowlisted — set `hara config set computerApps \"App Name, …\"` before clicking/typing.";
|
|
@@ -217,20 +332,45 @@ registerTool({
|
|
|
217
332
|
if (action === "screenshot") {
|
|
218
333
|
const s = screenshot();
|
|
219
334
|
if (s.error)
|
|
220
|
-
return `
|
|
335
|
+
return fail(`screenshot — ${s.error}`);
|
|
221
336
|
if (ctx.describeImage) {
|
|
222
337
|
try {
|
|
223
|
-
const desc = await ctx.describeImage(s.path);
|
|
338
|
+
const desc = await ctx.describeImage(s.path, input.focus ? String(input.focus) : undefined);
|
|
224
339
|
if (desc)
|
|
225
|
-
return `Screenshot (read via vision):\n${desc}
|
|
340
|
+
return ok(`Screenshot (read via vision):\n${desc}`);
|
|
226
341
|
}
|
|
227
342
|
catch {
|
|
228
343
|
/* fall through to path */
|
|
229
344
|
}
|
|
230
345
|
}
|
|
231
|
-
return `Screenshot saved to ${s.path}. Configure a vision model so I can read it: \`hara config set visionModel <model
|
|
346
|
+
return ok(`Screenshot saved to ${s.path}. Configure a vision model so I can read it: \`hara config set visionModel <model>\`.`);
|
|
347
|
+
}
|
|
348
|
+
// Grounding: locate a described element and turn it into screen coordinates (more reliable than guessing
|
|
349
|
+
// pixels from a text description). Used for `find`, and for click/move when given a `target` and no x,y.
|
|
350
|
+
const needsLocate = action === "find" || ((action === "click" || action === "move") && input.target != null && (input.x == null || input.y == null));
|
|
351
|
+
if (needsLocate) {
|
|
352
|
+
const target = String(input.target ?? "");
|
|
353
|
+
if (!target)
|
|
354
|
+
return action === "find" ? "find needs a `target` (what to locate)." : "click/move needs `x,y` or a `target`.";
|
|
355
|
+
if (!ctx.locate)
|
|
356
|
+
return "Grounding needs a vision model that can see images — set one: `hara config set visionModel <model>`.";
|
|
357
|
+
const s = screenshot();
|
|
358
|
+
if (s.error)
|
|
359
|
+
return fail(`screenshot — ${s.error}`);
|
|
360
|
+
const loc = await ctx.locate(s.path, target);
|
|
361
|
+
if (!loc)
|
|
362
|
+
return fail(`couldn't locate "${target}" on screen — try a screenshot first, or rephrase the target`);
|
|
363
|
+
const size = screenSize();
|
|
364
|
+
if (!size)
|
|
365
|
+
return fail(`located "${target}" but couldn't read the screen size to convert coordinates`);
|
|
366
|
+
const gx = Math.round(loc.x * size.w);
|
|
367
|
+
const gy = Math.round(loc.y * size.h);
|
|
368
|
+
if (action === "find")
|
|
369
|
+
return ok(`"${target}" is at ~${gx},${gy} (${Math.round(loc.x * 100)}% across, ${Math.round(loc.y * 100)}% down).`);
|
|
370
|
+
input.x = gx;
|
|
371
|
+
input.y = gy;
|
|
232
372
|
}
|
|
233
373
|
const r = pointerOrKeyboard(action, input);
|
|
234
|
-
return r.ok ? `✓ ${r.msg}` : `
|
|
374
|
+
return r.ok ? ok(`✓ ${r.msg}${needsLocate ? ` (located "${input.target}")` : ""}`) : fail(r.msg);
|
|
235
375
|
},
|
|
236
376
|
});
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
// todo_write — an inline task checklist the agent maintains during a turn (like codex's update_plan /
|
|
2
|
+
// Claude Code's TodoWrite). Keeps the model organized on multi-step work and shows the user live progress.
|
|
3
|
+
// In-memory, replace-whole-list semantics; kind:"read" so it never prompts and is safe to call freely.
|
|
4
|
+
import { registerTool } from "./registry.js";
|
|
5
|
+
let todos = [];
|
|
6
|
+
/** The current checklist (latest todo_write wins) — for a TUI/statusline to render. */
|
|
7
|
+
export function currentTodos() {
|
|
8
|
+
return todos;
|
|
9
|
+
}
|
|
10
|
+
const MARK = { pending: "☐", in_progress: "▶", done: "☑" };
|
|
11
|
+
export function renderTodos(list) {
|
|
12
|
+
if (!list.length)
|
|
13
|
+
return "(todo list cleared)";
|
|
14
|
+
const done = list.filter((t) => t.status === "done").length;
|
|
15
|
+
return `Todos (${done}/${list.length} done):\n` + list.map((t) => ` ${MARK[t.status]} ${t.text}`).join("\n");
|
|
16
|
+
}
|
|
17
|
+
registerTool({
|
|
18
|
+
name: "todo_write",
|
|
19
|
+
description: "Maintain a short task checklist for the CURRENT work. Use it to plan a multi-step task up front, then " +
|
|
20
|
+
"update it as you go: keep exactly one item 'in_progress', flip items to 'done' as you finish, add items " +
|
|
21
|
+
"you discover. Pass the FULL list each call (it replaces the previous). Skip it for trivial one-step tasks.",
|
|
22
|
+
input_schema: {
|
|
23
|
+
type: "object",
|
|
24
|
+
properties: {
|
|
25
|
+
todos: {
|
|
26
|
+
type: "array",
|
|
27
|
+
description: "the full checklist, in order",
|
|
28
|
+
items: {
|
|
29
|
+
type: "object",
|
|
30
|
+
properties: {
|
|
31
|
+
text: { type: "string", description: "the task, a short imperative phrase" },
|
|
32
|
+
status: { type: "string", enum: ["pending", "in_progress", "done"] },
|
|
33
|
+
},
|
|
34
|
+
required: ["text", "status"],
|
|
35
|
+
},
|
|
36
|
+
},
|
|
37
|
+
},
|
|
38
|
+
required: ["todos"],
|
|
39
|
+
},
|
|
40
|
+
kind: "read", // pure state + display: never prompts, parallel-safe
|
|
41
|
+
async run(input) {
|
|
42
|
+
const raw = Array.isArray(input.todos) ? input.todos : [];
|
|
43
|
+
todos = raw
|
|
44
|
+
.map((t) => ({
|
|
45
|
+
text: String(t?.text ?? "").trim(),
|
|
46
|
+
status: (["pending", "in_progress", "done"].includes(t?.status) ? t.status : "pending"),
|
|
47
|
+
}))
|
|
48
|
+
.filter((t) => t.text);
|
|
49
|
+
return renderTodos(todos);
|
|
50
|
+
},
|
|
51
|
+
});
|
package/dist/tools/web.js
CHANGED
|
@@ -24,6 +24,103 @@ export function htmlToText(html) {
|
|
|
24
24
|
.replace(/\n{3,}/g, "\n\n")
|
|
25
25
|
.trim();
|
|
26
26
|
}
|
|
27
|
+
/** Parse DuckDuckGo HTML results → [{title, url, snippet}]. Best-effort HTML scrape (no key, no dependency). */
|
|
28
|
+
export function parseSearchResults(html, limit) {
|
|
29
|
+
const strip = (s) => s
|
|
30
|
+
.replace(/<[^>]+>/g, "")
|
|
31
|
+
.replace(/&/g, "&")
|
|
32
|
+
.replace(/</g, "<")
|
|
33
|
+
.replace(/>/g, ">")
|
|
34
|
+
.replace(/"/g, '"')
|
|
35
|
+
.replace(/'|'/g, "'")
|
|
36
|
+
.replace(/\s+/g, " ")
|
|
37
|
+
.trim();
|
|
38
|
+
const snippets = [];
|
|
39
|
+
const snipRe = /class="result__snippet"[^>]*>([\s\S]*?)<\/a>/g;
|
|
40
|
+
let m;
|
|
41
|
+
while ((m = snipRe.exec(html)))
|
|
42
|
+
snippets.push(strip(m[1]));
|
|
43
|
+
const out = [];
|
|
44
|
+
const linkRe = /class="result__a"[^>]*href="([^"]+)"[^>]*>([\s\S]*?)<\/a>/g;
|
|
45
|
+
let i = 0;
|
|
46
|
+
while ((m = linkRe.exec(html)) && out.length < limit) {
|
|
47
|
+
let href = m[1].replace(/&/g, "&");
|
|
48
|
+
const uddg = /[?&]uddg=([^&]+)/.exec(href); // DuckDuckGo wraps results in a /l/?uddg=<real-url> redirect
|
|
49
|
+
if (uddg)
|
|
50
|
+
href = decodeURIComponent(uddg[1]);
|
|
51
|
+
else if (href.startsWith("//"))
|
|
52
|
+
href = "https:" + href;
|
|
53
|
+
out.push({ title: strip(m[2]), url: href, snippet: snippets[i++] ?? "" });
|
|
54
|
+
}
|
|
55
|
+
return out;
|
|
56
|
+
}
|
|
57
|
+
registerTool({
|
|
58
|
+
name: "web_search",
|
|
59
|
+
description: "Search the web and return the top results (title, URL, snippet). Use it to FIND information or pages you " +
|
|
60
|
+
"don't already have a URL for, then `web_fetch` a result to read it. Read-only. Reliable with a Tavily key " +
|
|
61
|
+
"(env HARA_SEARCH_API_KEY); otherwise a best-effort keyless fallback that may be rate-limited.",
|
|
62
|
+
input_schema: {
|
|
63
|
+
type: "object",
|
|
64
|
+
properties: {
|
|
65
|
+
query: { type: "string" },
|
|
66
|
+
limit: { type: "number", description: "max results (default 6, max 10)" },
|
|
67
|
+
},
|
|
68
|
+
required: ["query"],
|
|
69
|
+
},
|
|
70
|
+
kind: "read",
|
|
71
|
+
async run(input) {
|
|
72
|
+
const q = String(input.query ?? "").trim();
|
|
73
|
+
if (!q)
|
|
74
|
+
return "(empty query)";
|
|
75
|
+
const limit = Math.min(Math.max(1, Number(input.limit) || 6), 10);
|
|
76
|
+
const fmt = (rs) => rs.map((r, n) => `${n + 1}. ${r.title}\n ${r.url}${r.snippet ? `\n ${r.snippet}` : ""}`).join("\n\n");
|
|
77
|
+
const ctrl = new AbortController();
|
|
78
|
+
const timer = setTimeout(() => ctrl.abort(), 20_000);
|
|
79
|
+
try {
|
|
80
|
+
// Reliable path: Tavily (designed for agents, free tier) when a key is configured.
|
|
81
|
+
const key = process.env.HARA_SEARCH_API_KEY || process.env.TAVILY_API_KEY;
|
|
82
|
+
if (key) {
|
|
83
|
+
const res = await fetch("https://api.tavily.com/search", {
|
|
84
|
+
method: "POST",
|
|
85
|
+
signal: ctrl.signal,
|
|
86
|
+
headers: { "content-type": "application/json" },
|
|
87
|
+
body: JSON.stringify({ api_key: key, query: q, max_results: limit }),
|
|
88
|
+
});
|
|
89
|
+
if (res.ok) {
|
|
90
|
+
const j = (await res.json());
|
|
91
|
+
const rs = (j.results ?? []).map((x) => ({ title: String(x.title ?? x.url ?? ""), url: String(x.url ?? ""), snippet: String(x.content ?? "").slice(0, 200) }));
|
|
92
|
+
if (rs.length)
|
|
93
|
+
return fmt(rs);
|
|
94
|
+
}
|
|
95
|
+
// Tavily failed → fall through to the keyless best-effort path.
|
|
96
|
+
}
|
|
97
|
+
// Keyless fallback: DuckDuckGo HTML (POST — GET returns a 202 challenge). May be rate-limited.
|
|
98
|
+
const res = await fetch("https://html.duckduckgo.com/html/", {
|
|
99
|
+
method: "POST",
|
|
100
|
+
signal: ctrl.signal,
|
|
101
|
+
redirect: "follow",
|
|
102
|
+
headers: {
|
|
103
|
+
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0 Safari/537.36",
|
|
104
|
+
"content-type": "application/x-www-form-urlencoded",
|
|
105
|
+
accept: "text/html",
|
|
106
|
+
},
|
|
107
|
+
body: `q=${encodeURIComponent(q)}`,
|
|
108
|
+
});
|
|
109
|
+
if (!res.ok)
|
|
110
|
+
return `Search failed: HTTP ${res.status}. Keyless search is rate-limited — set HARA_SEARCH_API_KEY (Tavily) for reliable search, or web_fetch a known URL.`;
|
|
111
|
+
const results = parseSearchResults(await res.text(), limit);
|
|
112
|
+
if (!results.length)
|
|
113
|
+
return "(no results — the keyless endpoint is rate-limited or changed. Set HARA_SEARCH_API_KEY (Tavily) for reliable search, or web_fetch a known URL.)";
|
|
114
|
+
return fmt(results);
|
|
115
|
+
}
|
|
116
|
+
catch (e) {
|
|
117
|
+
return `Search failed: ${e?.name === "AbortError" ? "timed out (20s)" : (e?.message ?? e)}`;
|
|
118
|
+
}
|
|
119
|
+
finally {
|
|
120
|
+
clearTimeout(timer);
|
|
121
|
+
}
|
|
122
|
+
},
|
|
123
|
+
});
|
|
27
124
|
registerTool({
|
|
28
125
|
name: "web_fetch",
|
|
29
126
|
description: "Fetch an http(s) URL and return its text content (HTML is reduced to readable text). Read-only. " +
|