@nanhara/hara 0.33.0 β†’ 0.53.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/tui/App.js CHANGED
@@ -13,6 +13,7 @@ import { InputBox } from "./InputBox.js";
13
13
  import { activity } from "../activity.js";
14
14
  import { ctxPctFor } from "../statusbar.js";
15
15
  import { accent } from "./theme.js";
16
+ import { renderMarkdown } from "../md.js";
16
17
  let _id = 0;
17
18
  const nid = () => ++_id;
18
19
  const stripAnsi = (s) => s.replace(/\x1b\[[0-9;]*m/g, "");
@@ -21,7 +22,7 @@ function Block({ item, open }) {
21
22
  case "user":
22
23
  return (_jsxs(Box, { marginTop: 1, children: [_jsx(Text, { color: "cyan", children: "\u203A " }), _jsx(Text, { children: item.text })] }));
23
24
  case "assistant":
24
- return _jsx(Text, { children: item.text });
25
+ return _jsx(Text, { children: renderMarkdown(item.text) }); // headers/bold/inline-code/bullets + verbatim fences
25
26
  case "reasoning": {
26
27
  // fixed-height window: show the last 5 lines while thinking; ctrl-r toggles the full text.
27
28
  const lines = item.text.replace(/\n+$/, "").split("\n");
@@ -71,6 +72,9 @@ export function App({ initialStatus, model, cwd, header, onSubmit, cycleApproval
71
72
  const [promptSel, setPromptSel] = useState(0);
72
73
  const [reasoningOpen, setReasoningOpen] = useState(false);
73
74
  const ctrlRef = useRef(null);
75
+ const queueRef = useRef([]); // type-ahead: FIFO of messages entered while working
76
+ const [pool, setPool] = useState([]); // type-ahead pool: queued message lines, shown above the input
77
+ const drainingRef = useRef(false); // idempotency guard so the drain effect can't double-send one item
74
78
  const currentRef = useRef([]);
75
79
  currentRef.current = current;
76
80
  const statusRef = useRef(status);
@@ -88,10 +92,29 @@ export function App({ initialStatus, model, cwd, header, onSubmit, cycleApproval
88
92
  return [...cur, { id: nid(), kind, text }];
89
93
  });
90
94
  }, []);
95
+ // Type-ahead steering: hand the runner everything queued while the turn ran, showing each message
96
+ // inline (as a user block) at the point it gets folded into the conversation. Drained mid-turn so an
97
+ // addition reaches the model on its next call; whatever's still queued at turn end is the effect below.
98
+ const drainQueue = useCallback(() => {
99
+ if (!queueRef.current.length)
100
+ return [];
101
+ const batch = queueRef.current;
102
+ queueRef.current = [];
103
+ setPool([]);
104
+ for (const b of batch)
105
+ pushCurrent("user", b.line.trim() || "πŸ–Ό (image)");
106
+ return batch;
107
+ }, [pushCurrent]);
91
108
  const handleSubmit = useCallback(async (line, images) => {
92
109
  const t = line.trim();
93
- if ((!t && !images?.length) || working || prompt)
94
- return; // allow image-only turns
110
+ if ((!t && !images?.length) || prompt)
111
+ return; // nothing to send, or a choice is pending
112
+ if (working) {
113
+ // type-ahead: hold the message in the pool; all pooled messages are sent together when the turn ends
114
+ queueRef.current.push({ line, images });
115
+ setPool(queueRef.current.map((q) => q.line.trim() || "πŸ–Ό (image)"));
116
+ return;
117
+ }
95
118
  setHistory((h) => [...h, { id: nid(), kind: "user", text: t }]); // t already carries any [Image #N] tokens
96
119
  const ctrl = new AbortController();
97
120
  ctrlRef.current = ctrl;
@@ -117,7 +140,7 @@ export function App({ initialStatus, model, cwd, header, onSubmit, cycleApproval
117
140
  const selectFn = (title, options) => openPrompt(title, options);
118
141
  const setApprovalFn = (m) => setStatus((s) => ({ ...s, approval: m }));
119
142
  try {
120
- await onSubmit(t, { sink, confirm: confirmFn, select: selectFn, setApproval: setApprovalFn, signal: ctrl.signal, exit, approval: statusRef.current.approval }, images);
143
+ await onSubmit(t, { sink, confirm: confirmFn, select: selectFn, setApproval: setApprovalFn, signal: ctrl.signal, exit, approval: statusRef.current.approval, drainQueue }, images);
121
144
  }
122
145
  catch (e) {
123
146
  pushCurrent("notice", `error: ${e instanceof Error ? e.message : String(e)}`);
@@ -129,7 +152,22 @@ export function App({ initialStatus, model, cwd, header, onSubmit, cycleApproval
129
152
  setCurrent([]);
130
153
  setWorking(false);
131
154
  ctrlRef.current = null;
132
- }, [working, prompt, onSubmit, pushCurrent, model, exit]);
155
+ }, [working, prompt, onSubmit, pushCurrent, model, exit, drainQueue]);
156
+ // Drain the type-ahead pool: when the turn finishes (working β†’ false) and nothing awaits a choice, COALESCE
157
+ // every pooled message into ONE turn and send it β€” additions/clarifications go to the agent together, in order.
158
+ useEffect(() => {
159
+ if (working || prompt || drainingRef.current || !queueRef.current.length)
160
+ return;
161
+ drainingRef.current = true;
162
+ const batch = queueRef.current;
163
+ queueRef.current = [];
164
+ setPool([]);
165
+ const line = batch.map((b) => b.line).join("\n\n");
166
+ const images = batch.flatMap((b) => b.images ?? []);
167
+ void Promise.resolve(handleSubmit(line, images.length ? images : undefined)).finally(() => {
168
+ drainingRef.current = false;
169
+ });
170
+ }, [working, prompt, handleSubmit]);
133
171
  useInput((input, key) => {
134
172
  if (prompt) {
135
173
  const opts = prompt.options;
@@ -145,6 +183,10 @@ export function App({ initialStatus, model, cwd, header, onSubmit, cycleApproval
145
183
  prompt.resolve(opts[opts.length - 1].value); // last option = cancel/no
146
184
  setPrompt(null);
147
185
  }
186
+ else if (/^[1-9]$/.test(input) && Number(input) <= opts.length) {
187
+ prompt.resolve(opts[Number(input) - 1].value); // type a number to pick directly
188
+ setPrompt(null);
189
+ }
148
190
  else if (input) {
149
191
  const hit = opts.find((o) => o.key && o.key === input.toLowerCase());
150
192
  if (hit) {
@@ -156,10 +198,16 @@ export function App({ initialStatus, model, cwd, header, onSubmit, cycleApproval
156
198
  }
157
199
  if (key.ctrl && input === "r")
158
200
  return setReasoningOpen((x) => !x);
159
- if (key.escape && working)
201
+ if (key.escape && working) {
202
+ // Esc = stop everything: abort the turn AND drop any type-ahead (a stopped turn shouldn't fire queued msgs)
203
+ if (queueRef.current.length) {
204
+ queueRef.current = [];
205
+ setPool([]);
206
+ }
160
207
  ctrlRef.current?.abort();
208
+ }
161
209
  else if (key.tab && key.shift && cycleApproval)
162
210
  setStatus((s) => ({ ...s, approval: cycleApproval(s.approval) }));
163
211
  });
164
- return (_jsxs(Box, { flexDirection: "column", children: [_jsx(Static, { items: header ? [{ id: -1, kind: "notice", text: "" }, ...history] : history, children: (item) => (item.id === -1 ? _jsx(HeaderCard, { ...header }, "hdr") : _jsx(Block, { item: item }, item.id)) }), current.map((item) => (_jsx(Block, { item: item, open: reasoningOpen }, item.id))), working && !prompt && _jsx(Working, {}), prompt && (_jsxs(Box, { flexDirection: "column", marginTop: 1, children: [_jsx(Text, { color: "yellow", children: ` ${stripAnsi(prompt.title)}` }), prompt.options.map((o, i) => (_jsx(Text, { color: i === promptSel ? "cyan" : undefined, bold: i === promptSel, children: (i === promptSel ? " ❯ " : " ") + o.label }, i)))] })), _jsx(InputBox, { status: status, cwd: cwd, isActive: !working && !prompt, onSubmit: handleSubmit, onClipboardImage: onClipboardImage })] }));
212
+ return (_jsxs(Box, { flexDirection: "column", children: [_jsx(Static, { items: header ? [{ id: -1, kind: "notice", text: "" }, ...history] : history, children: (item) => (item.id === -1 ? _jsx(HeaderCard, { ...header }, "hdr") : _jsx(Block, { item: item }, item.id)) }), current.map((item) => (_jsx(Block, { item: item, open: reasoningOpen }, item.id))), working && !prompt && _jsx(Working, {}), prompt && (_jsxs(Box, { flexDirection: "column", marginTop: 1, children: [_jsx(Text, { color: "yellow", children: ` ${stripAnsi(prompt.title)}` }), prompt.options.map((o, i) => (_jsx(Text, { color: i === promptSel ? "cyan" : undefined, bold: i === promptSel, children: (i === promptSel ? " ❯ " : " ") + `${i + 1}. ` + o.label }, i))), _jsx(Text, { dimColor: true, children: ` ↑↓ or 1–${prompt.options.length} to choose Β· Enter Β· Esc cancels` })] })), pool.length > 0 && !prompt && (_jsx(Box, { flexDirection: "column", children: pool.map((l, i) => (_jsx(Text, { color: accent(), children: ` β€Ί ${l.length > 72 ? l.slice(0, 72) + "…" : l}` }, i))) })), _jsx(InputBox, { status: status, cwd: cwd, isActive: !prompt, working: working, queued: pool.length, onSubmit: handleSubmit, onClipboardImage: onClipboardImage })] }));
165
213
  }
@@ -84,7 +84,7 @@ function InputLine({ value, cursor }) {
84
84
  return _jsx(Text, { children: nodes });
85
85
  }
86
86
  /** Top border (session) + prompt line + bottom border (usage) + ModeBar, with an @path popup. */
87
- export function InputBox({ status, cwd, width, onSubmit, onClipboardImage, isActive = true, placeholder = "Type a task Β· /help Β· @file Β· Ctrl+V paste image Β· shift+tab mode Β· Esc interrupts", }) {
87
+ export function InputBox({ status, cwd, width, onSubmit, onClipboardImage, isActive = true, working = false, queued = 0, placeholder = "Type a task Β· /help Β· @file Β· Ctrl+V paste image Β· shift+tab mode Β· Esc interrupts", }) {
88
88
  const { stdout } = useStdout();
89
89
  const w = width ?? stdout?.columns ?? 80;
90
90
  const [value, setValue] = useState("");
@@ -204,5 +204,5 @@ export function InputBox({ status, cwd, width, onSubmit, onClipboardImage, isAct
204
204
  set(value.slice(0, cursor) + input + value.slice(cursor), cursor + input.length);
205
205
  }
206
206
  }, { isActive });
207
- return (_jsxs(Box, { flexDirection: "column", children: [_jsx(TopBorder, { name: status.sessionName || "session", width: w }), _jsxs(Box, { children: [_jsx(Text, { color: "cyan", children: "β€Ί " }), value.length === 0 ? (_jsxs(Text, { children: [_jsx(Text, { inverse: true, children: " " }), _jsx(Text, { dimColor: true, children: placeholder })] })) : (_jsx(InputLine, { value: value, cursor: cursor }))] }), _jsx(BottomBorder, { s: status, width: w }), popupOpen ? _jsx(MentionPopup, { items: candidates, selected: selIdx, query: mention.query }) : null, _jsx(ModeBar, { approval: status.approval })] }));
207
+ return (_jsxs(Box, { flexDirection: "column", children: [_jsx(TopBorder, { name: status.sessionName || "session", width: w }), _jsxs(Box, { children: [_jsx(Text, { color: "cyan", children: "β€Ί " }), value.length === 0 ? (_jsxs(Text, { children: [_jsx(Text, { inverse: true, children: " " }), _jsx(Text, { dimColor: true, children: placeholder })] })) : (_jsx(InputLine, { value: value, cursor: cursor }))] }), _jsx(BottomBorder, { s: status, width: w }), working ? _jsx(Text, { dimColor: true, children: ` ⌨ working β€” Enter queues your message${queued ? ` Β· ${queued} queued` : ""} Β· Esc interrupts` }) : null, popupOpen ? _jsx(MentionPopup, { items: candidates, selected: selIdx, query: mention.query }) : null, _jsx(ModeBar, { approval: status.approval })] }));
208
208
  }
package/dist/vision.js CHANGED
@@ -65,12 +65,61 @@ export const DESCRIBE_SYSTEM = [
65
65
  "4. Quote any error or warning messages exactly.",
66
66
  "5. Be thorough and factual; do not speculate beyond what is visible.",
67
67
  ].join("\n");
68
+ // Screenshot variant β€” tuned for driving the desktop (RPA) rather than transcription. A text-only main
69
+ // model can't see, so it needs *actionable* output: where things are, so it can issue clicks.
70
+ export const SCREENSHOT_SYSTEM = [
71
+ "You are the eyes of an assistant operating this computer; it cannot see the screen and acts only on your",
72
+ "words. Describe the screenshot so it can ACT. Prioritise, in order:",
73
+ "1. INTERACTIVE elements β€” buttons, links, text fields, checkboxes, menus, tabs, icons β€” each with its",
74
+ " visible label and an approximate location: a region (e.g. top-right) AND a rough pixel x,y if you can.",
75
+ "2. The currently focused/active element or selection, and any open dialog/modal/popup.",
76
+ "3. Errors, warnings, and key visible text/headings β€” quote them exactly.",
77
+ "4. One line on what app/screen this appears to be.",
78
+ "Positions guide clicks, so always estimate them. Be concise and factual; never invent elements.",
79
+ ].join("\n");
80
+ // Grounding β€” ask a vision model WHERE a UI element is (for accurate RPA clicking), as resolution-independent
81
+ // fractions so it works regardless of Retina/DPI scaling.
82
+ export const LOCATE_SYSTEM = [
83
+ "You are given a screenshot. The user names ONE UI element (button, field, icon, menu item, link).",
84
+ "Return ONLY its CENTER as JSON: {\"x\": <0-1000>, \"y\": <0-1000>}, where x is the position as per-mille of",
85
+ "the image WIDTH (0=left, 1000=right) and y as per-mille of the HEIGHT (0=top, 1000=bottom).",
86
+ "If the element is not visible, return {\"x\": -1, \"y\": -1}. Output ONLY the JSON, nothing else.",
87
+ ].join("\n");
88
+ /** Parse a grounding reply β†’ {x,y} as 0..1 fractions (accepts per-mille / percent / fraction), or null. */
89
+ export function parseLocate(text) {
90
+ const m = text.match(/"x"\s*:\s*(-?\d+(?:\.\d+)?)[\s,}]+.*?"y"\s*:\s*(-?\d+(?:\.\d+)?)/s) || text.match(/(-?\d+(?:\.\d+)?)\s*[,\s]\s*(-?\d+(?:\.\d+)?)/);
91
+ if (!m)
92
+ return null;
93
+ let x = Number(m[1]);
94
+ let y = Number(m[2]);
95
+ if (x < 0 || y < 0 || Number.isNaN(x) || Number.isNaN(y))
96
+ return null; // not found / unparseable
97
+ const norm = (v) => (v > 100 ? v / 1000 : v > 1.5 ? v / 100 : v); // per-mille | percent | fraction β†’ 0..1
98
+ x = Math.min(1, Math.max(0, norm(x)));
99
+ y = Math.min(1, Math.max(0, norm(y)));
100
+ return { x, y };
101
+ }
102
+ /** Send a screenshot to a (grounding-capable) vision model and get the target's center as 0..1 fractions. */
103
+ export async function locateImage(provider, image, target, opts = {}) {
104
+ const r = await provider.turn({
105
+ system: LOCATE_SYSTEM,
106
+ history: [{ role: "user", content: `Locate this element: ${target}`, images: [image] }],
107
+ tools: [],
108
+ onText: () => { },
109
+ signal: opts.signal,
110
+ });
111
+ if (r.stop === "error")
112
+ return null;
113
+ return parseLocate(r.text);
114
+ }
68
115
  const PROMPT = "Describe the attached image(s) per your instructions.";
69
- /** Send images to the vision provider and return its textual description. Throws on a provider error. */
116
+ /** Send images to the vision provider and return its textual description. Throws on a provider error.
117
+ * `system` overrides the default prompt (e.g. SCREENSHOT_SYSTEM); `hint` focuses it on a specific goal. */
70
118
  export async function describeImages(provider, images, opts = {}) {
119
+ const content = opts.hint ? `${PROMPT}\nFocus especially on: ${opts.hint}` : PROMPT;
71
120
  const r = await provider.turn({
72
- system: DESCRIBE_SYSTEM,
73
- history: [{ role: "user", content: PROMPT, images }],
121
+ system: opts.system ?? DESCRIBE_SYSTEM,
122
+ history: [{ role: "user", content, images }],
74
123
  tools: [],
75
124
  onText: () => { },
76
125
  signal: opts.signal,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@nanhara/hara",
3
- "version": "0.33.0",
3
+ "version": "0.53.0",
4
4
  "description": "hara β€” a coding agent CLI that runs like an engineering org.",
5
5
  "bin": {
6
6
  "hara": "dist/index.js"
@@ -11,7 +11,8 @@
11
11
  "README.md",
12
12
  "CHANGELOG.md",
13
13
  "LICENSE",
14
- "CLA.md"
14
+ "CLA.md",
15
+ "plugins"
15
16
  ],
16
17
  "keywords": [
17
18
  "ai",
@@ -0,0 +1,9 @@
1
+ {
2
+ "name": "browser",
3
+ "version": "0.1.0",
4
+ "description": "Reliable web automation for hara via the Playwright MCP β€” acts on the DOM/accessibility tree (selectors, auto-wait), not pixels. navigate / click / type / fill / snapshot.",
5
+ "skills": ["skills"],
6
+ "mcpServers": {
7
+ "browser": { "command": "npx", "args": ["-y", "@playwright/mcp@latest"] }
8
+ }
9
+ }
@@ -0,0 +1,27 @@
1
+ ---
2
+ name: web-automation
3
+ description: Operate web pages reliably β€” navigate, click, fill forms, log in, extract β€” via the Playwright MCP. Acts on the DOM/accessibility tree by selector/role (deterministic, auto-waiting), NOT screenshots or pixel coordinates. Far more reliable than desktop screen control.
4
+ when_to_use: when the user wants to do anything on a website β€” open a page, click, fill/submit a form, log in, scrape data, automate a web flow.
5
+ ---
6
+
7
+ # Web automation (Playwright MCP)
8
+
9
+ Reliable browser tools are available as `mcp__browser__*` (navigate, snapshot, click, type, fill_form,
10
+ select_option, evaluate, …). They act on the page's **accessibility tree by element ref/role/text** β€” not
11
+ screenshots or pixel coordinates β€” so they're deterministic and auto-wait for elements. This is the reliable
12
+ counterpart to the fragile desktop `computer` tool: prefer it for anything on the web.
13
+
14
+ ## Workflow
15
+ 1. `browser_navigate` to the URL.
16
+ 2. `browser_snapshot` β€” read the accessibility tree (elements + their `ref`s). This is your "eyes": use the
17
+ refs to act precisely. Prefer it over a screenshot.
18
+ 3. Act by ref/role/text: `browser_click`, `browser_type`, `browser_fill_form`, `browser_select_option`.
19
+ 4. `browser_snapshot` again to verify before the next step.
20
+
21
+ ## Notes
22
+ - First run downloads a browser once: `npx playwright install chromium`.
23
+ - The Playwright MCP uses its **own** browser (no logins). For tasks needing your **real logged-in Chrome**, use
24
+ `chrome-devtools-mcp` instead (drives your actual Chrome via CDP) β€” swap the mcpServers command to
25
+ `npx chrome-devtools-mcp@latest`. (This is what openclaw/cc-haha use.)
26
+ - **Confirm before irreversible actions** β€” purchases, posting, sending messages, deleting. Verify the page/state
27
+ with a snapshot first.
@@ -0,0 +1,9 @@
1
+ {
2
+ "name": "chrome",
3
+ "version": "0.1.0",
4
+ "description": "Drive a real, persistent-login Chrome from hara via chrome-devtools-mcp (CDP) β€” for web tasks on sites you're already signed into (logins persist across runs). Alternative to the `browser` plugin's isolated Playwright browser β€” enable one, not both.",
5
+ "skills": ["skills"],
6
+ "mcpServers": {
7
+ "chrome": { "command": "npx", "args": ["-y", "chrome-devtools-mcp@latest"] }
8
+ }
9
+ }
@@ -0,0 +1,26 @@
1
+ ---
2
+ name: chrome-control
3
+ description: Operate a REAL Chrome (with your persistent logins) for web tasks on signed-in sites β€” via chrome-devtools-mcp (Chrome DevTools Protocol). Use instead of the isolated Playwright `browser` plugin when the task needs your existing accounts/sessions.
4
+ when_to_use: when a web task must run on a site you're logged into (your dashboards, accounts, web apps) rather than a fresh anonymous browser.
5
+ ---
6
+
7
+ # Chrome (real, logged-in) via chrome-devtools-mcp
8
+
9
+ Tools appear as `mcp__chrome__*` (navigate, click, fill, snapshot, network, performance…). Same
10
+ DOM/accessibility-tree reliability as the `browser` plugin, but it drives a **real Chrome with a persistent
11
+ profile** β€” log into a site once and the session is remembered across runs.
12
+
13
+ ## Modes
14
+ - **Persistent profile (default):** `npx chrome-devtools-mcp@latest` launches Chrome with a saved profile at
15
+ `~/.cache/chrome-devtools-mcp/chrome-profile`. Log in once; it persists. Good default.
16
+ - **Attach to YOUR running Chrome:** launch Chrome with `--remote-debugging-port=9222`, then set the MCP command
17
+ to `npx chrome-devtools-mcp@latest --browserUrl http://127.0.0.1:9222` β€” hara then drives your actual browser
18
+ and all its logins.
19
+
20
+ ## Enable (alternative to `browser`, not both)
21
+ Running two browser MCPs at once is confusing. To switch from the default Playwright `browser`:
22
+ `hara plugin add file:<repo>/plugins/chrome && hara plugin disable browser`.
23
+
24
+ ## Caution
25
+ This controls a **real** browser session. Confirm before destructive/irreversible actions (purchases, posting,
26
+ sending, deleting); take a snapshot to verify the page/state first.