@nanhara/hara 0.33.0 → 0.48.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +152 -1
- package/README.md +12 -4
- package/dist/index.js +303 -76
- package/dist/org/planner.js +19 -0
- package/dist/search/semindex.js +62 -11
- package/dist/session/store.js +14 -0
- package/dist/tools/computer.js +156 -16
- package/dist/tui/App.js +40 -5
- package/dist/tui/InputBox.js +2 -2
- package/dist/vision.js +52 -3
- package/package.json +3 -2
- package/plugins/browser/.hara-plugin/plugin.json +9 -0
- package/plugins/browser/skills/web/SKILL.md +27 -0
- package/plugins/chrome/.hara-plugin/plugin.json +9 -0
- package/plugins/chrome/skills/chrome/SKILL.md +26 -0
package/dist/search/semindex.js
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
// code-asset / repo / knowledge-base scale (hundreds–low-thousands of chunks); the optional zvec adapter is
|
|
3
3
|
// the scale-up path later. Markdown/code stays the SSOT; this index is a derived, rebuildable, gitignored
|
|
4
4
|
// artifact. The embedder is injected (see embed.ts) so the store + chunking are testable without a model.
|
|
5
|
-
import { readFileSync, writeFileSync, mkdirSync, existsSync } from "node:fs";
|
|
5
|
+
import { readFileSync, writeFileSync, mkdirSync, existsSync, statSync } from "node:fs";
|
|
6
6
|
import { homedir } from "node:os";
|
|
7
7
|
import { join, dirname } from "node:path";
|
|
8
8
|
import { findProjectRoot } from "../context/agents-md.js";
|
|
@@ -15,14 +15,22 @@ export function indexPath(name, cwd) {
|
|
|
15
15
|
return join(findProjectRoot(cwd), ".hara", "index", "repo.json");
|
|
16
16
|
return join(homedir(), ".hara", "index", `${name}.json`);
|
|
17
17
|
}
|
|
18
|
+
function statMtime(p) {
|
|
19
|
+
try {
|
|
20
|
+
return statSync(p).mtimeMs;
|
|
21
|
+
}
|
|
22
|
+
catch {
|
|
23
|
+
return 0;
|
|
24
|
+
}
|
|
25
|
+
}
|
|
18
26
|
/** Split a file into chunks: Markdown by `#` headings, code by ~40-line windows. Heuristic, zero-dep —
|
|
19
|
-
* also the substrate embeddings reuse. */
|
|
20
|
-
export function chunkText(text, file, source) {
|
|
27
|
+
* also the substrate embeddings reuse. `mtime` (when given) is stamped on every chunk for incremental reuse. */
|
|
28
|
+
export function chunkText(text, file, source, mtime) {
|
|
21
29
|
const out = [];
|
|
22
30
|
const push = (body, n) => {
|
|
23
31
|
const t = body.trim();
|
|
24
32
|
if (t.length >= 12)
|
|
25
|
-
out.push({ id: `${file}#${n}`, text: t.slice(0, 2000), file, source });
|
|
33
|
+
out.push({ id: `${file}#${n}`, text: t.slice(0, 2000), file, source, mtime });
|
|
26
34
|
};
|
|
27
35
|
if (/\.(md|mdx)$/i.test(file)) {
|
|
28
36
|
const parts = text.split(/^(?=#{1,6}\s)/m);
|
|
@@ -49,23 +57,66 @@ function cosine(a, b) {
|
|
|
49
57
|
}
|
|
50
58
|
return na && nb ? dot / (Math.sqrt(na) * Math.sqrt(nb)) : 0;
|
|
51
59
|
}
|
|
52
|
-
/**
|
|
60
|
+
/** Build/refresh the index. **Incremental**: files whose mtime is unchanged since the last build keep their
|
|
61
|
+
* existing vectors (no re-embed); only new/changed files are embedded, and deleted files drop out. A changed
|
|
62
|
+
* embedding model forces a full rebuild (old vectors aren't comparable). Returns counts. */
|
|
53
63
|
export async function buildIndex(name, chunks, embed, cwd, model = "embed") {
|
|
64
|
+
const p = indexPath(name, cwd);
|
|
65
|
+
// Load the previous index → reuse vectors for unchanged files.
|
|
66
|
+
const prevByFile = new Map();
|
|
67
|
+
let prevModel = "";
|
|
68
|
+
if (existsSync(p)) {
|
|
69
|
+
try {
|
|
70
|
+
const old = JSON.parse(readFileSync(p, "utf8"));
|
|
71
|
+
prevModel = old.model;
|
|
72
|
+
for (const it of old.items ?? []) {
|
|
73
|
+
const arr = prevByFile.get(it.file);
|
|
74
|
+
if (arr)
|
|
75
|
+
arr.push(it);
|
|
76
|
+
else
|
|
77
|
+
prevByFile.set(it.file, [it]);
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
catch {
|
|
81
|
+
/* corrupt index → full rebuild */
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
const sameModel = prevModel === model;
|
|
85
|
+
const byFile = new Map();
|
|
86
|
+
for (const c of chunks) {
|
|
87
|
+
const arr = byFile.get(c.file);
|
|
88
|
+
if (arr)
|
|
89
|
+
arr.push(c);
|
|
90
|
+
else
|
|
91
|
+
byFile.set(c.file, [c]);
|
|
92
|
+
}
|
|
54
93
|
const items = [];
|
|
94
|
+
const toEmbed = [];
|
|
95
|
+
let reused = 0;
|
|
96
|
+
for (const [file, fchunks] of byFile) {
|
|
97
|
+
const mtime = fchunks[0].mtime ?? 0;
|
|
98
|
+
const prev = prevByFile.get(file);
|
|
99
|
+
if (sameModel && prev?.length && mtime > 0 && prev.every((it) => it.mtime === mtime)) {
|
|
100
|
+
items.push(...prev); // file unchanged → keep its vectors
|
|
101
|
+
reused += prev.length;
|
|
102
|
+
}
|
|
103
|
+
else {
|
|
104
|
+
toEmbed.push(...fchunks);
|
|
105
|
+
}
|
|
106
|
+
}
|
|
55
107
|
const B = 64;
|
|
56
|
-
for (let i = 0; i <
|
|
57
|
-
const batch =
|
|
108
|
+
for (let i = 0; i < toEmbed.length; i += B) {
|
|
109
|
+
const batch = toEmbed.slice(i, i + B);
|
|
58
110
|
const vecs = await embed(batch.map((c) => c.text));
|
|
59
111
|
batch.forEach((c, j) => vecs[j] && items.push({ ...c, vec: vecs[j] }));
|
|
60
112
|
}
|
|
61
|
-
const p = indexPath(name, cwd);
|
|
62
113
|
const dir = dirname(p);
|
|
63
114
|
mkdirSync(dir, { recursive: true });
|
|
64
115
|
// The index is derived + rebuildable (and may embed file contents) — never let it be committed.
|
|
65
116
|
if (!existsSync(join(dir, ".gitignore")))
|
|
66
117
|
writeFileSync(join(dir, ".gitignore"), "*\n", "utf8");
|
|
67
118
|
writeFileSync(p, JSON.stringify({ model, items }), "utf8");
|
|
68
|
-
return items.length;
|
|
119
|
+
return { total: items.length, embedded: toEmbed.length, reused };
|
|
69
120
|
}
|
|
70
121
|
export function indexExists(name, cwd) {
|
|
71
122
|
return existsSync(indexPath(name, cwd));
|
|
@@ -91,7 +142,7 @@ export function collectDirChunks(dir, source) {
|
|
|
91
142
|
}
|
|
92
143
|
if (isProbablyBinary(buf))
|
|
93
144
|
continue;
|
|
94
|
-
chunks.push(...chunkText(buf.toString("utf8"), abs, source));
|
|
145
|
+
chunks.push(...chunkText(buf.toString("utf8"), abs, source, statMtime(abs)));
|
|
95
146
|
}
|
|
96
147
|
return chunks;
|
|
97
148
|
}
|
|
@@ -113,7 +164,7 @@ export function collectRepoChunks(root) {
|
|
|
113
164
|
}
|
|
114
165
|
if (isProbablyBinary(buf))
|
|
115
166
|
continue;
|
|
116
|
-
chunks.push(...chunkText(buf.toString("utf8"), rel, "repo"));
|
|
167
|
+
chunks.push(...chunkText(buf.toString("utf8"), rel, "repo", statMtime(abs)));
|
|
117
168
|
}
|
|
118
169
|
return chunks;
|
|
119
170
|
}
|
package/dist/session/store.js
CHANGED
|
@@ -56,6 +56,20 @@ export function titleFrom(history) {
|
|
|
56
56
|
const firstUser = history.find((h) => h.role === "user");
|
|
57
57
|
return deriveTitle(firstUser && firstUser.role === "user" ? firstUser.content : "");
|
|
58
58
|
}
|
|
59
|
+
/** Normalize a phrase to an ASCII kebab-case slug (lowercase, a–z0–9 + single hyphens, capped). Non-ASCII
|
|
60
|
+
* is dropped — used to clean a model-generated English session name. Returns "" if nothing ASCII remains. */
|
|
61
|
+
export function slugify(text, max = 40) {
|
|
62
|
+
return text
|
|
63
|
+
.trim()
|
|
64
|
+
.toLowerCase()
|
|
65
|
+
.replace(/[^a-z0-9\s-]/g, "")
|
|
66
|
+
.trim()
|
|
67
|
+
.replace(/\s+/g, "-")
|
|
68
|
+
.replace(/-+/g, "-")
|
|
69
|
+
.replace(/^-+/, "")
|
|
70
|
+
.slice(0, max)
|
|
71
|
+
.replace(/-+$/, "");
|
|
72
|
+
}
|
|
59
73
|
export function saveSession(meta, history) {
|
|
60
74
|
meta.updatedAt = new Date().toISOString();
|
|
61
75
|
const data = { meta, history };
|
package/dist/tools/computer.js
CHANGED
|
@@ -11,7 +11,7 @@ import { join } from "node:path";
|
|
|
11
11
|
import { registerTool } from "./registry.js";
|
|
12
12
|
import { loadConfig } from "../config.js";
|
|
13
13
|
const RANK = { off: 0, read: 1, click: 2, full: 3 };
|
|
14
|
-
const ACTION_MIN = { screenshot: "read", move: "click", click: "click", type: "full", key: "full" };
|
|
14
|
+
const ACTION_MIN = { screenshot: "read", find: "read", activate: "click", move: "click", click: "click", type: "full", key: "full" };
|
|
15
15
|
// dangerous combos refused even at full tier (quit / close / delete / task-switch-kill)
|
|
16
16
|
const KEY_BLOCK = /(?:\b(cmd|command|ctrl|control|alt|option|win|super|meta)\b.*\+.*\b(q|w|delete|del|f4|escape|esc)\b)|ctrl\+alt\+(?:delete|del|backspace)/i;
|
|
17
17
|
/** Whether the configured tier permits the action. Exported for tests. */
|
|
@@ -22,6 +22,25 @@ export function actionAllowed(tier, action) {
|
|
|
22
22
|
export function keyIsBlocked(keys) {
|
|
23
23
|
return KEY_BLOCK.test(keys);
|
|
24
24
|
}
|
|
25
|
+
// Circuit breaker (learned from codex): bound consecutive screen-control failures so the agent can't loop
|
|
26
|
+
// forever on a broken setup. Reset on any success; after FAIL_LIMIT in a row, return a clear stop + how to fix.
|
|
27
|
+
const FAIL_LIMIT = 3;
|
|
28
|
+
let consecFails = 0;
|
|
29
|
+
export function resetComputerFails() {
|
|
30
|
+
consecFails = 0;
|
|
31
|
+
}
|
|
32
|
+
function ok(msg) {
|
|
33
|
+
consecFails = 0;
|
|
34
|
+
return msg;
|
|
35
|
+
}
|
|
36
|
+
function fail(msg) {
|
|
37
|
+
consecFails += 1;
|
|
38
|
+
if (consecFails >= FAIL_LIMIT) {
|
|
39
|
+
consecFails = 0;
|
|
40
|
+
return `⛔ Stopping screen control — ${FAIL_LIMIT} actions failed in a row (last: ${msg}). Most likely a missing macOS permission (Accessibility for click/type, Screen Recording for screenshots) or the target app isn't reachable. Fix that, then ask me to try again — I won't keep retrying blindly.`;
|
|
41
|
+
}
|
|
42
|
+
return `Failed: ${msg} [${consecFails}/${FAIL_LIMIT} before I stop]`;
|
|
43
|
+
}
|
|
25
44
|
function run(cmd, args) {
|
|
26
45
|
try {
|
|
27
46
|
const r = spawnSync(cmd, args, { encoding: "utf8", timeout: 15000 });
|
|
@@ -35,6 +54,23 @@ function has(cmd) {
|
|
|
35
54
|
return (process.platform === "win32" ? run("where", [cmd]) : run("which", [cmd])).ok;
|
|
36
55
|
}
|
|
37
56
|
const ps = (script) => run("powershell", ["-NoProfile", "-Command", script]);
|
|
57
|
+
/** Put text on the OS clipboard (so `type` can paste it — IME-safe + Unicode-safe, unlike keystroke injection). */
|
|
58
|
+
function setClipboard(text) {
|
|
59
|
+
try {
|
|
60
|
+
if (process.platform === "darwin")
|
|
61
|
+
return spawnSync("pbcopy", [], { input: text, timeout: 5000 }).status === 0;
|
|
62
|
+
if (process.platform === "win32")
|
|
63
|
+
return spawnSync("clip", [], { input: text, timeout: 5000 }).status === 0;
|
|
64
|
+
if (has("wl-copy"))
|
|
65
|
+
return spawnSync("wl-copy", [], { input: text, timeout: 5000 }).status === 0;
|
|
66
|
+
if (has("xclip"))
|
|
67
|
+
return spawnSync("xclip", ["-selection", "clipboard"], { input: text, timeout: 5000 }).status === 0;
|
|
68
|
+
}
|
|
69
|
+
catch {
|
|
70
|
+
/* fall through */
|
|
71
|
+
}
|
|
72
|
+
return false;
|
|
73
|
+
}
|
|
38
74
|
let seq = 0;
|
|
39
75
|
function tmpShot() {
|
|
40
76
|
seq += 1;
|
|
@@ -73,6 +109,49 @@ function screenshot() {
|
|
|
73
109
|
}
|
|
74
110
|
return { path: out };
|
|
75
111
|
}
|
|
112
|
+
/** Bring an app to the foreground so screenshots/clicks land on IT, not the terminal hara runs in. */
|
|
113
|
+
function activateApp(app) {
|
|
114
|
+
if (process.platform === "darwin") {
|
|
115
|
+
// `open -a` reliably launches+foregrounds; `osascript … activate` often leaves another window on top.
|
|
116
|
+
const r = run("open", ["-a", app]);
|
|
117
|
+
return { ok: r.ok, msg: r.ok ? `activated ${app}` : r.out || `couldn't activate ${app}` };
|
|
118
|
+
}
|
|
119
|
+
if (process.platform === "win32") {
|
|
120
|
+
const r = ps(`(New-Object -ComObject WScript.Shell).AppActivate(${JSON.stringify(app)})`);
|
|
121
|
+
return { ok: r.ok, msg: r.ok ? `activated ${app}` : r.out || `couldn't activate ${app}` };
|
|
122
|
+
}
|
|
123
|
+
if (process.platform === "linux") {
|
|
124
|
+
const r = has("wmctrl") ? run("wmctrl", ["-a", app]) : run("xdotool", ["search", "--name", app, "windowactivate"]);
|
|
125
|
+
return { ok: r.ok, msg: r.ok ? `activated ${app}` : r.out || `couldn't activate ${app} (need wmctrl/xdotool)` };
|
|
126
|
+
}
|
|
127
|
+
return { ok: false, msg: `activate unsupported on ${process.platform}` };
|
|
128
|
+
}
|
|
129
|
+
/** Logical screen size in the coordinate space the click backends use (points on mac, pixels on win/linux).
|
|
130
|
+
* Grounding returns 0..1 fractions, so click = fraction × this. null if undetectable. */
|
|
131
|
+
function screenSize() {
|
|
132
|
+
try {
|
|
133
|
+
if (process.platform === "darwin") {
|
|
134
|
+
const r = run("osascript", ["-e", 'tell application "Finder" to get bounds of window of desktop']);
|
|
135
|
+
const n = r.out.match(/-?\d+/g);
|
|
136
|
+
if (n && n.length >= 4)
|
|
137
|
+
return { w: Number(n[2]), h: Number(n[3]) };
|
|
138
|
+
}
|
|
139
|
+
else if (process.platform === "linux") {
|
|
140
|
+
const [w, h] = run("xdotool", ["getdisplaygeometry"]).out.trim().split(/\s+/).map(Number);
|
|
141
|
+
if (w && h)
|
|
142
|
+
return { w, h };
|
|
143
|
+
}
|
|
144
|
+
else if (process.platform === "win32") {
|
|
145
|
+
const [w, h] = ps('Add-Type -AssemblyName System.Windows.Forms; $b=[System.Windows.Forms.Screen]::PrimaryScreen.Bounds; "$($b.Width) $($b.Height)"').out.trim().split(/\s+/).map(Number);
|
|
146
|
+
if (w && h)
|
|
147
|
+
return { w, h };
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
catch {
|
|
151
|
+
/* fall through */
|
|
152
|
+
}
|
|
153
|
+
return null;
|
|
154
|
+
}
|
|
76
155
|
/** Name of the frontmost application/window (for the allowlist check). "" if undetectable. */
|
|
77
156
|
function frontmostApp() {
|
|
78
157
|
if (process.platform === "darwin") {
|
|
@@ -128,21 +207,41 @@ using System;using System.Runtime.InteropServices;public class Ms{[DllImport("us
|
|
|
128
207
|
const text = String(input.text ?? "");
|
|
129
208
|
if (!text)
|
|
130
209
|
return { ok: false, msg: "type needs text" };
|
|
210
|
+
// IME-safe path: set the clipboard and paste. Keystroke injection (below) is intercepted/garbled by a
|
|
211
|
+
// CJK input method and can't enter Chinese/emoji reliably; pasting is immune and Unicode-safe.
|
|
212
|
+
if (setClipboard(text)) {
|
|
213
|
+
if (mac && has("cliclick")) {
|
|
214
|
+
const r = run("cliclick", ["kd:cmd", "t:v", "ku:cmd"]); // Cmd+V
|
|
215
|
+
if (r.ok)
|
|
216
|
+
return { ok: true, msg: `pasted ${text.length} chars` };
|
|
217
|
+
}
|
|
218
|
+
else if (lin && has("xdotool")) {
|
|
219
|
+
const r = run("xdotool", ["key", "ctrl+v"]);
|
|
220
|
+
if (r.ok)
|
|
221
|
+
return { ok: true, msg: `pasted ${text.length} chars` };
|
|
222
|
+
}
|
|
223
|
+
else if (win) {
|
|
224
|
+
const r = ps("Add-Type -AssemblyName System.Windows.Forms;[System.Windows.Forms.SendKeys]::SendWait('^v')");
|
|
225
|
+
if (r.ok)
|
|
226
|
+
return { ok: true, msg: `pasted ${text.length} chars` };
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
// Fallback: keystroke injection (fine for ASCII when no IME is active).
|
|
131
230
|
if (mac) {
|
|
132
231
|
if (!has("cliclick"))
|
|
133
232
|
return { ok: false, msg: "cliclick not found — install with `brew install cliclick`" };
|
|
134
233
|
const r = run("cliclick", [`t:${text}`]);
|
|
135
|
-
return { ok: r.ok, msg: r.ok ? `typed ${text.length} chars` : r.out };
|
|
234
|
+
return { ok: r.ok, msg: r.ok ? `typed ${text.length} chars (keystroke)` : r.out };
|
|
136
235
|
}
|
|
137
236
|
if (lin) {
|
|
138
237
|
if (!has("xdotool"))
|
|
139
238
|
return { ok: false, msg: "xdotool not found" };
|
|
140
239
|
const r = run("xdotool", ["type", "--clearmodifiers", text]);
|
|
141
|
-
return { ok: r.ok, msg: r.ok ? `typed ${text.length} chars` : r.out };
|
|
240
|
+
return { ok: r.ok, msg: r.ok ? `typed ${text.length} chars (keystroke)` : r.out };
|
|
142
241
|
}
|
|
143
242
|
if (win) {
|
|
144
243
|
const r = ps(`Add-Type -AssemblyName System.Windows.Forms;[System.Windows.Forms.SendKeys]::SendWait(${JSON.stringify(text)})`);
|
|
145
|
-
return { ok: r.ok, msg: r.ok ? `typed ${text.length} chars` : r.out };
|
|
244
|
+
return { ok: r.ok, msg: r.ok ? `typed ${text.length} chars (keystroke)` : r.out };
|
|
146
245
|
}
|
|
147
246
|
}
|
|
148
247
|
if (action === "key") {
|
|
@@ -182,17 +281,23 @@ export function computerBackends() {
|
|
|
182
281
|
}
|
|
183
282
|
registerTool({
|
|
184
283
|
name: "computer",
|
|
185
|
-
description: "Control the screen to operate desktop software (not just the browser)
|
|
186
|
-
"
|
|
187
|
-
"
|
|
284
|
+
description: "Control the screen to operate desktop software (not just the browser). ALWAYS `activate` the target app " +
|
|
285
|
+
"FIRST (e.g. activate WeChat) — otherwise screenshots/clicks hit the terminal hara runs in, not the app. " +
|
|
286
|
+
"Then prefer grounding over guessing pixels: pass `target` (e.g. 'the Send button') to click/move and it's " +
|
|
287
|
+
"located by a vision model; or `find` to just get coordinates. Workflow: activate → screenshot → click a " +
|
|
288
|
+
"target → re-screenshot to verify. When typing, type the ACTUAL text — never placeholders. Opt-in and " +
|
|
289
|
+
"permission-gated (tier + per-app allowlist).",
|
|
188
290
|
input_schema: {
|
|
189
291
|
type: "object",
|
|
190
292
|
properties: {
|
|
191
|
-
action: { type: "string", enum: ["screenshot", "click", "move", "type", "key"] },
|
|
192
|
-
|
|
193
|
-
|
|
293
|
+
action: { type: "string", enum: ["screenshot", "activate", "find", "click", "move", "type", "key"] },
|
|
294
|
+
app: { type: "string", description: "app to bring to the foreground (activate) — e.g. 'WeChat'. Do this BEFORE screenshot/click so they hit the app, not the terminal." },
|
|
295
|
+
target: { type: "string", description: "describe a UI element to locate (find) or click/move to — e.g. 'the Send button'. Preferred over x,y." },
|
|
296
|
+
x: { type: "number", description: "x pixel (click/move; or use `target`)" },
|
|
297
|
+
y: { type: "number", description: "y pixel (click/move; or use `target`)" },
|
|
194
298
|
text: { type: "string", description: "text to type (type)" },
|
|
195
299
|
keys: { type: "string", description: "key or combo, e.g. 'return', 'cmd+c' (key)" },
|
|
300
|
+
focus: { type: "string", description: "screenshot only: what to look for — focuses the read" },
|
|
196
301
|
},
|
|
197
302
|
required: ["action"],
|
|
198
303
|
},
|
|
@@ -205,7 +310,17 @@ registerTool({
|
|
|
205
310
|
const action = String(input.action ?? "");
|
|
206
311
|
if (!actionAllowed(tier, action))
|
|
207
312
|
return `'${action}' needs a higher tier (current computerUse=${tier}). Raise it with \`hara config set computerUse …\`.`;
|
|
208
|
-
|
|
313
|
+
// Bring the target app to the foreground first — without this, clicks land on the terminal hara runs in.
|
|
314
|
+
if (action === "activate") {
|
|
315
|
+
const app = String(input.app ?? input.target ?? "");
|
|
316
|
+
if (!app)
|
|
317
|
+
return "activate needs an `app` name (e.g. 'WeChat').";
|
|
318
|
+
if (!cfg.computerApps.some((a) => app.toLowerCase().includes(a.toLowerCase()) || a.toLowerCase().includes(app.toLowerCase())))
|
|
319
|
+
return `Refused: "${app}" isn't in your allowlist (${cfg.computerApps.join(", ") || "empty"}). Add it: \`hara config set computerApps "${app}"\`.`;
|
|
320
|
+
const r = activateApp(app);
|
|
321
|
+
return r.ok ? ok(`✓ ${r.msg} — now screenshot/find/click to act on it`) : fail(r.msg);
|
|
322
|
+
}
|
|
323
|
+
if (action !== "screenshot" && action !== "find") {
|
|
209
324
|
// per-app allowlist: only act when an allowlisted app is frontmost (the key guard against wrong-window clicks)
|
|
210
325
|
if (!cfg.computerApps.length)
|
|
211
326
|
return "No apps allowlisted — set `hara config set computerApps \"App Name, …\"` before clicking/typing.";
|
|
@@ -217,20 +332,45 @@ registerTool({
|
|
|
217
332
|
if (action === "screenshot") {
|
|
218
333
|
const s = screenshot();
|
|
219
334
|
if (s.error)
|
|
220
|
-
return `
|
|
335
|
+
return fail(`screenshot — ${s.error}`);
|
|
221
336
|
if (ctx.describeImage) {
|
|
222
337
|
try {
|
|
223
|
-
const desc = await ctx.describeImage(s.path);
|
|
338
|
+
const desc = await ctx.describeImage(s.path, input.focus ? String(input.focus) : undefined);
|
|
224
339
|
if (desc)
|
|
225
|
-
return `Screenshot (read via vision):\n${desc}
|
|
340
|
+
return ok(`Screenshot (read via vision):\n${desc}`);
|
|
226
341
|
}
|
|
227
342
|
catch {
|
|
228
343
|
/* fall through to path */
|
|
229
344
|
}
|
|
230
345
|
}
|
|
231
|
-
return `Screenshot saved to ${s.path}. Configure a vision model so I can read it: \`hara config set visionModel <model
|
|
346
|
+
return ok(`Screenshot saved to ${s.path}. Configure a vision model so I can read it: \`hara config set visionModel <model>\`.`);
|
|
347
|
+
}
|
|
348
|
+
// Grounding: locate a described element and turn it into screen coordinates (more reliable than guessing
|
|
349
|
+
// pixels from a text description). Used for `find`, and for click/move when given a `target` and no x,y.
|
|
350
|
+
const needsLocate = action === "find" || ((action === "click" || action === "move") && input.target != null && (input.x == null || input.y == null));
|
|
351
|
+
if (needsLocate) {
|
|
352
|
+
const target = String(input.target ?? "");
|
|
353
|
+
if (!target)
|
|
354
|
+
return action === "find" ? "find needs a `target` (what to locate)." : "click/move needs `x,y` or a `target`.";
|
|
355
|
+
if (!ctx.locate)
|
|
356
|
+
return "Grounding needs a vision model that can see images — set one: `hara config set visionModel <model>`.";
|
|
357
|
+
const s = screenshot();
|
|
358
|
+
if (s.error)
|
|
359
|
+
return fail(`screenshot — ${s.error}`);
|
|
360
|
+
const loc = await ctx.locate(s.path, target);
|
|
361
|
+
if (!loc)
|
|
362
|
+
return fail(`couldn't locate "${target}" on screen — try a screenshot first, or rephrase the target`);
|
|
363
|
+
const size = screenSize();
|
|
364
|
+
if (!size)
|
|
365
|
+
return fail(`located "${target}" but couldn't read the screen size to convert coordinates`);
|
|
366
|
+
const gx = Math.round(loc.x * size.w);
|
|
367
|
+
const gy = Math.round(loc.y * size.h);
|
|
368
|
+
if (action === "find")
|
|
369
|
+
return ok(`"${target}" is at ~${gx},${gy} (${Math.round(loc.x * 100)}% across, ${Math.round(loc.y * 100)}% down).`);
|
|
370
|
+
input.x = gx;
|
|
371
|
+
input.y = gy;
|
|
232
372
|
}
|
|
233
373
|
const r = pointerOrKeyboard(action, input);
|
|
234
|
-
return r.ok ? `✓ ${r.msg}` : `
|
|
374
|
+
return r.ok ? ok(`✓ ${r.msg}${needsLocate ? ` (located "${input.target}")` : ""}`) : fail(r.msg);
|
|
235
375
|
},
|
|
236
376
|
});
|
package/dist/tui/App.js
CHANGED
|
@@ -13,6 +13,7 @@ import { InputBox } from "./InputBox.js";
|
|
|
13
13
|
import { activity } from "../activity.js";
|
|
14
14
|
import { ctxPctFor } from "../statusbar.js";
|
|
15
15
|
import { accent } from "./theme.js";
|
|
16
|
+
import { renderMarkdown } from "../md.js";
|
|
16
17
|
let _id = 0;
|
|
17
18
|
const nid = () => ++_id;
|
|
18
19
|
const stripAnsi = (s) => s.replace(/\x1b\[[0-9;]*m/g, "");
|
|
@@ -21,7 +22,7 @@ function Block({ item, open }) {
|
|
|
21
22
|
case "user":
|
|
22
23
|
return (_jsxs(Box, { marginTop: 1, children: [_jsx(Text, { color: "cyan", children: "\u203A " }), _jsx(Text, { children: item.text })] }));
|
|
23
24
|
case "assistant":
|
|
24
|
-
return _jsx(Text, { children: item.text });
|
|
25
|
+
return _jsx(Text, { children: renderMarkdown(item.text) }); // headers/bold/inline-code/bullets + verbatim fences
|
|
25
26
|
case "reasoning": {
|
|
26
27
|
// fixed-height window: show the last 5 lines while thinking; ctrl-r toggles the full text.
|
|
27
28
|
const lines = item.text.replace(/\n+$/, "").split("\n");
|
|
@@ -71,6 +72,9 @@ export function App({ initialStatus, model, cwd, header, onSubmit, cycleApproval
|
|
|
71
72
|
const [promptSel, setPromptSel] = useState(0);
|
|
72
73
|
const [reasoningOpen, setReasoningOpen] = useState(false);
|
|
73
74
|
const ctrlRef = useRef(null);
|
|
75
|
+
const queueRef = useRef([]); // type-ahead: FIFO of messages entered while working
|
|
76
|
+
const [pool, setPool] = useState([]); // type-ahead pool: queued message lines, shown above the input
|
|
77
|
+
const drainingRef = useRef(false); // idempotency guard so the drain effect can't double-send one item
|
|
74
78
|
const currentRef = useRef([]);
|
|
75
79
|
currentRef.current = current;
|
|
76
80
|
const statusRef = useRef(status);
|
|
@@ -90,8 +94,14 @@ export function App({ initialStatus, model, cwd, header, onSubmit, cycleApproval
|
|
|
90
94
|
}, []);
|
|
91
95
|
const handleSubmit = useCallback(async (line, images) => {
|
|
92
96
|
const t = line.trim();
|
|
93
|
-
if ((!t && !images?.length) ||
|
|
94
|
-
return; //
|
|
97
|
+
if ((!t && !images?.length) || prompt)
|
|
98
|
+
return; // nothing to send, or a choice is pending
|
|
99
|
+
if (working) {
|
|
100
|
+
// type-ahead: hold the message in the pool; all pooled messages are sent together when the turn ends
|
|
101
|
+
queueRef.current.push({ line, images });
|
|
102
|
+
setPool(queueRef.current.map((q) => q.line.trim() || "🖼 (image)"));
|
|
103
|
+
return;
|
|
104
|
+
}
|
|
95
105
|
setHistory((h) => [...h, { id: nid(), kind: "user", text: t }]); // t already carries any [Image #N] tokens
|
|
96
106
|
const ctrl = new AbortController();
|
|
97
107
|
ctrlRef.current = ctrl;
|
|
@@ -130,6 +140,21 @@ export function App({ initialStatus, model, cwd, header, onSubmit, cycleApproval
|
|
|
130
140
|
setWorking(false);
|
|
131
141
|
ctrlRef.current = null;
|
|
132
142
|
}, [working, prompt, onSubmit, pushCurrent, model, exit]);
|
|
143
|
+
// Drain the type-ahead pool: when the turn finishes (working → false) and nothing awaits a choice, COALESCE
|
|
144
|
+
// every pooled message into ONE turn and send it — additions/clarifications go to the agent together, in order.
|
|
145
|
+
useEffect(() => {
|
|
146
|
+
if (working || prompt || drainingRef.current || !queueRef.current.length)
|
|
147
|
+
return;
|
|
148
|
+
drainingRef.current = true;
|
|
149
|
+
const batch = queueRef.current;
|
|
150
|
+
queueRef.current = [];
|
|
151
|
+
setPool([]);
|
|
152
|
+
const line = batch.map((b) => b.line).join("\n\n");
|
|
153
|
+
const images = batch.flatMap((b) => b.images ?? []);
|
|
154
|
+
void Promise.resolve(handleSubmit(line, images.length ? images : undefined)).finally(() => {
|
|
155
|
+
drainingRef.current = false;
|
|
156
|
+
});
|
|
157
|
+
}, [working, prompt, handleSubmit]);
|
|
133
158
|
useInput((input, key) => {
|
|
134
159
|
if (prompt) {
|
|
135
160
|
const opts = prompt.options;
|
|
@@ -145,6 +170,10 @@ export function App({ initialStatus, model, cwd, header, onSubmit, cycleApproval
|
|
|
145
170
|
prompt.resolve(opts[opts.length - 1].value); // last option = cancel/no
|
|
146
171
|
setPrompt(null);
|
|
147
172
|
}
|
|
173
|
+
else if (/^[1-9]$/.test(input) && Number(input) <= opts.length) {
|
|
174
|
+
prompt.resolve(opts[Number(input) - 1].value); // type a number to pick directly
|
|
175
|
+
setPrompt(null);
|
|
176
|
+
}
|
|
148
177
|
else if (input) {
|
|
149
178
|
const hit = opts.find((o) => o.key && o.key === input.toLowerCase());
|
|
150
179
|
if (hit) {
|
|
@@ -156,10 +185,16 @@ export function App({ initialStatus, model, cwd, header, onSubmit, cycleApproval
|
|
|
156
185
|
}
|
|
157
186
|
if (key.ctrl && input === "r")
|
|
158
187
|
return setReasoningOpen((x) => !x);
|
|
159
|
-
if (key.escape && working)
|
|
188
|
+
if (key.escape && working) {
|
|
189
|
+
// Esc = stop everything: abort the turn AND drop any type-ahead (a stopped turn shouldn't fire queued msgs)
|
|
190
|
+
if (queueRef.current.length) {
|
|
191
|
+
queueRef.current = [];
|
|
192
|
+
setPool([]);
|
|
193
|
+
}
|
|
160
194
|
ctrlRef.current?.abort();
|
|
195
|
+
}
|
|
161
196
|
else if (key.tab && key.shift && cycleApproval)
|
|
162
197
|
setStatus((s) => ({ ...s, approval: cycleApproval(s.approval) }));
|
|
163
198
|
});
|
|
164
|
-
return (_jsxs(Box, { flexDirection: "column", children: [_jsx(Static, { items: header ? [{ id: -1, kind: "notice", text: "" }, ...history] : history, children: (item) => (item.id === -1 ? _jsx(HeaderCard, { ...header }, "hdr") : _jsx(Block, { item: item }, item.id)) }), current.map((item) => (_jsx(Block, { item: item, open: reasoningOpen }, item.id))), working && !prompt && _jsx(Working, {}), prompt && (_jsxs(Box, { flexDirection: "column", marginTop: 1, children: [_jsx(Text, { color: "yellow", children: ` ${stripAnsi(prompt.title)}` }), prompt.options.map((o, i) => (_jsx(Text, { color: i === promptSel ? "cyan" : undefined, bold: i === promptSel, children: (i === promptSel ? " ❯ " : " ") + o.label }, i)))] })), _jsx(InputBox, { status: status, cwd: cwd, isActive: !working
|
|
199
|
+
return (_jsxs(Box, { flexDirection: "column", children: [_jsx(Static, { items: header ? [{ id: -1, kind: "notice", text: "" }, ...history] : history, children: (item) => (item.id === -1 ? _jsx(HeaderCard, { ...header }, "hdr") : _jsx(Block, { item: item }, item.id)) }), current.map((item) => (_jsx(Block, { item: item, open: reasoningOpen }, item.id))), working && !prompt && _jsx(Working, {}), prompt && (_jsxs(Box, { flexDirection: "column", marginTop: 1, children: [_jsx(Text, { color: "yellow", children: ` ${stripAnsi(prompt.title)}` }), prompt.options.map((o, i) => (_jsx(Text, { color: i === promptSel ? "cyan" : undefined, bold: i === promptSel, children: (i === promptSel ? " ❯ " : " ") + `${i + 1}. ` + o.label }, i))), _jsx(Text, { dimColor: true, children: ` ↑↓ or 1–${prompt.options.length} to choose · Enter · Esc cancels` })] })), pool.length > 0 && !prompt && (_jsx(Box, { flexDirection: "column", children: pool.map((l, i) => (_jsx(Text, { color: accent(), children: ` › ${l.length > 72 ? l.slice(0, 72) + "…" : l}` }, i))) })), _jsx(InputBox, { status: status, cwd: cwd, isActive: !prompt, working: working, queued: pool.length, onSubmit: handleSubmit, onClipboardImage: onClipboardImage })] }));
|
|
165
200
|
}
|
package/dist/tui/InputBox.js
CHANGED
|
@@ -84,7 +84,7 @@ function InputLine({ value, cursor }) {
|
|
|
84
84
|
return _jsx(Text, { children: nodes });
|
|
85
85
|
}
|
|
86
86
|
/** Top border (session) + prompt line + bottom border (usage) + ModeBar, with an @path popup. */
|
|
87
|
-
export function InputBox({ status, cwd, width, onSubmit, onClipboardImage, isActive = true, placeholder = "Type a task · /help · @file · Ctrl+V paste image · shift+tab mode · Esc interrupts", }) {
|
|
87
|
+
export function InputBox({ status, cwd, width, onSubmit, onClipboardImage, isActive = true, working = false, queued = 0, placeholder = "Type a task · /help · @file · Ctrl+V paste image · shift+tab mode · Esc interrupts", }) {
|
|
88
88
|
const { stdout } = useStdout();
|
|
89
89
|
const w = width ?? stdout?.columns ?? 80;
|
|
90
90
|
const [value, setValue] = useState("");
|
|
@@ -204,5 +204,5 @@ export function InputBox({ status, cwd, width, onSubmit, onClipboardImage, isAct
|
|
|
204
204
|
set(value.slice(0, cursor) + input + value.slice(cursor), cursor + input.length);
|
|
205
205
|
}
|
|
206
206
|
}, { isActive });
|
|
207
|
-
return (_jsxs(Box, { flexDirection: "column", children: [_jsx(TopBorder, { name: status.sessionName || "session", width: w }), _jsxs(Box, { children: [_jsx(Text, { color: "cyan", children: "› " }), value.length === 0 ? (_jsxs(Text, { children: [_jsx(Text, { inverse: true, children: " " }), _jsx(Text, { dimColor: true, children: placeholder })] })) : (_jsx(InputLine, { value: value, cursor: cursor }))] }), _jsx(BottomBorder, { s: status, width: w }), popupOpen ? _jsx(MentionPopup, { items: candidates, selected: selIdx, query: mention.query }) : null, _jsx(ModeBar, { approval: status.approval })] }));
|
|
207
|
+
return (_jsxs(Box, { flexDirection: "column", children: [_jsx(TopBorder, { name: status.sessionName || "session", width: w }), _jsxs(Box, { children: [_jsx(Text, { color: "cyan", children: "› " }), value.length === 0 ? (_jsxs(Text, { children: [_jsx(Text, { inverse: true, children: " " }), _jsx(Text, { dimColor: true, children: placeholder })] })) : (_jsx(InputLine, { value: value, cursor: cursor }))] }), _jsx(BottomBorder, { s: status, width: w }), working ? _jsx(Text, { dimColor: true, children: ` ⌨ working — Enter queues your message${queued ? ` · ${queued} queued` : ""} · Esc interrupts` }) : null, popupOpen ? _jsx(MentionPopup, { items: candidates, selected: selIdx, query: mention.query }) : null, _jsx(ModeBar, { approval: status.approval })] }));
|
|
208
208
|
}
|
package/dist/vision.js
CHANGED
|
@@ -65,12 +65,61 @@ export const DESCRIBE_SYSTEM = [
|
|
|
65
65
|
"4. Quote any error or warning messages exactly.",
|
|
66
66
|
"5. Be thorough and factual; do not speculate beyond what is visible.",
|
|
67
67
|
].join("\n");
|
|
68
|
+
// Screenshot variant — tuned for driving the desktop (RPA) rather than transcription. A text-only main
|
|
69
|
+
// model can't see, so it needs *actionable* output: where things are, so it can issue clicks.
|
|
70
|
+
export const SCREENSHOT_SYSTEM = [
|
|
71
|
+
"You are the eyes of an assistant operating this computer; it cannot see the screen and acts only on your",
|
|
72
|
+
"words. Describe the screenshot so it can ACT. Prioritise, in order:",
|
|
73
|
+
"1. INTERACTIVE elements — buttons, links, text fields, checkboxes, menus, tabs, icons — each with its",
|
|
74
|
+
" visible label and an approximate location: a region (e.g. top-right) AND a rough pixel x,y if you can.",
|
|
75
|
+
"2. The currently focused/active element or selection, and any open dialog/modal/popup.",
|
|
76
|
+
"3. Errors, warnings, and key visible text/headings — quote them exactly.",
|
|
77
|
+
"4. One line on what app/screen this appears to be.",
|
|
78
|
+
"Positions guide clicks, so always estimate them. Be concise and factual; never invent elements.",
|
|
79
|
+
].join("\n");
|
|
80
|
+
// Grounding — ask a vision model WHERE a UI element is (for accurate RPA clicking), as resolution-independent
|
|
81
|
+
// fractions so it works regardless of Retina/DPI scaling.
|
|
82
|
+
export const LOCATE_SYSTEM = [
|
|
83
|
+
"You are given a screenshot. The user names ONE UI element (button, field, icon, menu item, link).",
|
|
84
|
+
"Return ONLY its CENTER as JSON: {\"x\": <0-1000>, \"y\": <0-1000>}, where x is the position as per-mille of",
|
|
85
|
+
"the image WIDTH (0=left, 1000=right) and y as per-mille of the HEIGHT (0=top, 1000=bottom).",
|
|
86
|
+
"If the element is not visible, return {\"x\": -1, \"y\": -1}. Output ONLY the JSON, nothing else.",
|
|
87
|
+
].join("\n");
|
|
88
|
+
/** Parse a grounding reply → {x,y} as 0..1 fractions (accepts per-mille / percent / fraction), or null. */
|
|
89
|
+
export function parseLocate(text) {
|
|
90
|
+
const m = text.match(/"x"\s*:\s*(-?\d+(?:\.\d+)?)[\s,}]+.*?"y"\s*:\s*(-?\d+(?:\.\d+)?)/s) || text.match(/(-?\d+(?:\.\d+)?)\s*[,\s]\s*(-?\d+(?:\.\d+)?)/);
|
|
91
|
+
if (!m)
|
|
92
|
+
return null;
|
|
93
|
+
let x = Number(m[1]);
|
|
94
|
+
let y = Number(m[2]);
|
|
95
|
+
if (x < 0 || y < 0 || Number.isNaN(x) || Number.isNaN(y))
|
|
96
|
+
return null; // not found / unparseable
|
|
97
|
+
const norm = (v) => (v > 100 ? v / 1000 : v > 1.5 ? v / 100 : v); // per-mille | percent | fraction → 0..1
|
|
98
|
+
x = Math.min(1, Math.max(0, norm(x)));
|
|
99
|
+
y = Math.min(1, Math.max(0, norm(y)));
|
|
100
|
+
return { x, y };
|
|
101
|
+
}
|
|
102
|
+
/** Send a screenshot to a (grounding-capable) vision model and get the target's center as 0..1 fractions. */
|
|
103
|
+
export async function locateImage(provider, image, target, opts = {}) {
|
|
104
|
+
const r = await provider.turn({
|
|
105
|
+
system: LOCATE_SYSTEM,
|
|
106
|
+
history: [{ role: "user", content: `Locate this element: ${target}`, images: [image] }],
|
|
107
|
+
tools: [],
|
|
108
|
+
onText: () => { },
|
|
109
|
+
signal: opts.signal,
|
|
110
|
+
});
|
|
111
|
+
if (r.stop === "error")
|
|
112
|
+
return null;
|
|
113
|
+
return parseLocate(r.text);
|
|
114
|
+
}
|
|
68
115
|
const PROMPT = "Describe the attached image(s) per your instructions.";
|
|
69
|
-
/** Send images to the vision provider and return its textual description. Throws on a provider error.
|
|
116
|
+
/** Send images to the vision provider and return its textual description. Throws on a provider error.
|
|
117
|
+
* `system` overrides the default prompt (e.g. SCREENSHOT_SYSTEM); `hint` focuses it on a specific goal. */
|
|
70
118
|
export async function describeImages(provider, images, opts = {}) {
|
|
119
|
+
const content = opts.hint ? `${PROMPT}\nFocus especially on: ${opts.hint}` : PROMPT;
|
|
71
120
|
const r = await provider.turn({
|
|
72
|
-
system: DESCRIBE_SYSTEM,
|
|
73
|
-
history: [{ role: "user", content
|
|
121
|
+
system: opts.system ?? DESCRIBE_SYSTEM,
|
|
122
|
+
history: [{ role: "user", content, images }],
|
|
74
123
|
tools: [],
|
|
75
124
|
onText: () => { },
|
|
76
125
|
signal: opts.signal,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@nanhara/hara",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.48.0",
|
|
4
4
|
"description": "hara — a coding agent CLI that runs like an engineering org.",
|
|
5
5
|
"bin": {
|
|
6
6
|
"hara": "dist/index.js"
|
|
@@ -11,7 +11,8 @@
|
|
|
11
11
|
"README.md",
|
|
12
12
|
"CHANGELOG.md",
|
|
13
13
|
"LICENSE",
|
|
14
|
-
"CLA.md"
|
|
14
|
+
"CLA.md",
|
|
15
|
+
"plugins"
|
|
15
16
|
],
|
|
16
17
|
"keywords": [
|
|
17
18
|
"ai",
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "browser",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Reliable web automation for hara via the Playwright MCP — acts on the DOM/accessibility tree (selectors, auto-wait), not pixels. navigate / click / type / fill / snapshot.",
|
|
5
|
+
"skills": ["skills"],
|
|
6
|
+
"mcpServers": {
|
|
7
|
+
"browser": { "command": "npx", "args": ["-y", "@playwright/mcp@latest"] }
|
|
8
|
+
}
|
|
9
|
+
}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: web-automation
|
|
3
|
+
description: Operate web pages reliably — navigate, click, fill forms, log in, extract — via the Playwright MCP. Acts on the DOM/accessibility tree by selector/role (deterministic, auto-waiting), NOT screenshots or pixel coordinates. Far more reliable than desktop screen control.
|
|
4
|
+
when_to_use: when the user wants to do anything on a website — open a page, click, fill/submit a form, log in, scrape data, automate a web flow.
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# Web automation (Playwright MCP)
|
|
8
|
+
|
|
9
|
+
Reliable browser tools are available as `mcp__browser__*` (navigate, snapshot, click, type, fill_form,
|
|
10
|
+
select_option, evaluate, …). They act on the page's **accessibility tree by element ref/role/text** — not
|
|
11
|
+
screenshots or pixel coordinates — so they're deterministic and auto-wait for elements. This is the reliable
|
|
12
|
+
counterpart to the fragile desktop `computer` tool: prefer it for anything on the web.
|
|
13
|
+
|
|
14
|
+
## Workflow
|
|
15
|
+
1. `browser_navigate` to the URL.
|
|
16
|
+
2. `browser_snapshot` — read the accessibility tree (elements + their `ref`s). This is your "eyes": use the
|
|
17
|
+
refs to act precisely. Prefer it over a screenshot.
|
|
18
|
+
3. Act by ref/role/text: `browser_click`, `browser_type`, `browser_fill_form`, `browser_select_option`.
|
|
19
|
+
4. `browser_snapshot` again to verify before the next step.
|
|
20
|
+
|
|
21
|
+
## Notes
|
|
22
|
+
- First run downloads a browser once: `npx playwright install chromium`.
|
|
23
|
+
- The Playwright MCP uses its **own** browser (no logins). For tasks needing your **real logged-in Chrome**, use
|
|
24
|
+
`chrome-devtools-mcp` instead (drives your actual Chrome via CDP) — swap the mcpServers command to
|
|
25
|
+
`npx chrome-devtools-mcp@latest`. (This is what openclaw/cc-haha use.)
|
|
26
|
+
- **Confirm before irreversible actions** — purchases, posting, sending messages, deleting. Verify the page/state
|
|
27
|
+
with a snapshot first.
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "chrome",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Drive a real, persistent-login Chrome from hara via chrome-devtools-mcp (CDP) — for web tasks on sites you're already signed into (logins persist across runs). Alternative to the `browser` plugin's isolated Playwright browser — enable one, not both.",
|
|
5
|
+
"skills": ["skills"],
|
|
6
|
+
"mcpServers": {
|
|
7
|
+
"chrome": { "command": "npx", "args": ["-y", "chrome-devtools-mcp@latest"] }
|
|
8
|
+
}
|
|
9
|
+
}
|