@hybridaione/hybridclaw 0.2.6 → 0.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. package/CHANGELOG.md +25 -0
  2. package/README.md +22 -7
  3. package/config.example.json +5 -0
  4. package/container/package-lock.json +2 -2
  5. package/container/package.json +1 -1
  6. package/container/src/approval-policy.ts +136 -34
  7. package/container/src/browser-tools.ts +241 -0
  8. package/container/src/hybridai-client.ts +31 -3
  9. package/container/src/index.ts +12 -1
  10. package/container/src/token-usage.ts +89 -10
  11. package/container/src/tools.ts +1 -0
  12. package/container/src/types.ts +17 -0
  13. package/dist/agent.d.ts.map +1 -1
  14. package/dist/agent.js +5 -2
  15. package/dist/agent.js.map +1 -1
  16. package/dist/channels/discord/delivery.d.ts.map +1 -1
  17. package/dist/channels/discord/delivery.js +5 -1
  18. package/dist/channels/discord/delivery.js.map +1 -1
  19. package/dist/channels/discord/inbound.d.ts +27 -0
  20. package/dist/channels/discord/inbound.d.ts.map +1 -1
  21. package/dist/channels/discord/inbound.js +125 -16
  22. package/dist/channels/discord/inbound.js.map +1 -1
  23. package/dist/channels/discord/runtime.d.ts +1 -1
  24. package/dist/channels/discord/runtime.d.ts.map +1 -1
  25. package/dist/channels/discord/runtime.js +166 -57
  26. package/dist/channels/discord/runtime.js.map +1 -1
  27. package/dist/channels/discord/stream.d.ts.map +1 -1
  28. package/dist/channels/discord/stream.js +3 -4
  29. package/dist/channels/discord/stream.js.map +1 -1
  30. package/dist/cli.js +30 -1
  31. package/dist/cli.js.map +1 -1
  32. package/dist/config.d.ts +5 -0
  33. package/dist/config.d.ts.map +1 -1
  34. package/dist/config.js +10 -0
  35. package/dist/config.js.map +1 -1
  36. package/dist/container-runner.d.ts.map +1 -1
  37. package/dist/container-runner.js +2 -1
  38. package/dist/container-runner.js.map +1 -1
  39. package/dist/gateway-service.d.ts.map +1 -1
  40. package/dist/gateway-service.js +62 -15
  41. package/dist/gateway-service.js.map +1 -1
  42. package/dist/gateway.js +124 -2
  43. package/dist/gateway.js.map +1 -1
  44. package/dist/heartbeat.d.ts.map +1 -1
  45. package/dist/heartbeat.js +14 -0
  46. package/dist/heartbeat.js.map +1 -1
  47. package/dist/hybridai-models.d.ts +8 -0
  48. package/dist/hybridai-models.d.ts.map +1 -0
  49. package/dist/hybridai-models.js +94 -0
  50. package/dist/hybridai-models.js.map +1 -0
  51. package/dist/prompt-hooks.d.ts.map +1 -1
  52. package/dist/prompt-hooks.js +2 -0
  53. package/dist/prompt-hooks.js.map +1 -1
  54. package/dist/runtime-config.d.ts +6 -0
  55. package/dist/runtime-config.d.ts.map +1 -1
  56. package/dist/runtime-config.js +26 -1
  57. package/dist/runtime-config.js.map +1 -1
  58. package/dist/scheduled-task-runner.d.ts.map +1 -1
  59. package/dist/scheduled-task-runner.js +14 -0
  60. package/dist/scheduled-task-runner.js.map +1 -1
  61. package/dist/types.d.ts +4 -0
  62. package/dist/types.d.ts.map +1 -1
  63. package/dist/types.js.map +1 -1
  64. package/docs/index.html +36 -1
  65. package/package.json +1 -1
  66. package/src/agent.ts +15 -1
  67. package/src/channels/discord/delivery.ts +8 -1
  68. package/src/channels/discord/inbound.ts +152 -23
  69. package/src/channels/discord/runtime.ts +222 -63
  70. package/src/channels/discord/stream.ts +12 -4
  71. package/src/cli.ts +33 -1
  72. package/src/config.ts +20 -0
  73. package/src/container-runner.ts +2 -0
  74. package/src/gateway-service.ts +66 -13
  75. package/src/gateway.ts +166 -4
  76. package/src/heartbeat.ts +15 -0
  77. package/src/hybridai-models.ts +158 -0
  78. package/src/prompt-hooks.ts +2 -0
  79. package/src/runtime-config.ts +57 -5
  80. package/src/scheduled-task-runner.ts +14 -0
  81. package/src/types.ts +4 -0
  82. package/tests/approval-policy.test.ts +111 -0
  83. package/tests/discord.basic.test.ts +182 -1
  84. package/tests/hybridai-client.test.ts +112 -0
  85. package/tests/hybridai-models.test.ts +46 -0
  86. package/tests/token-usage.cache.test.ts +128 -0
package/CHANGELOG.md CHANGED
@@ -4,6 +4,31 @@
4
4
 
5
5
  No unreleased changes.
6
6
 
7
+ ## [0.2.7](https://github.com/HybridAIOne/hybridclaw/tree/v0.2.7)
8
+
9
+ ### Added
10
+
11
+ - **Private approval slash command**: Added `/approve` with private (ephemeral) responses for `view`, `yes`, `session`, `agent`, and `no`, including optional `approval_id`.
12
+ - **Static model context-window catalog**: Added curated context-window mappings (Claude/Gemini/GPT-5 families) plus family-aware model-id fallback matching for session status metrics without runtime model-list fetches.
13
+ - **Discord command access + output controls**: Added runtime config support for `discord.commandMode`, `discord.commandAllowedUserIds`, `discord.textChunkLimit`, and `discord.maxLinesPerMessage`.
14
+ - **HybridAI completion budget control**: Added `hybridai.maxTokens` runtime setting and request wiring (`max_tokens`) for container model calls.
15
+
16
+ ### Changed
17
+
18
+ - **Approval prompt visibility in Discord**: Channel responses now post a minimal “approval required” notice and move full approval details/decisions into private slash-command responses (`/approve`), matching the visibility pattern of `/status`.
19
+ - **Discord command handler context**: Command execution now receives invoking `userId` and `username` so approval actions can be scoped to the requesting user.
20
+ - **Discord slash command discoverability**: `/status` and `/approve` are now upserted globally for DM visibility while guild-only authorization checks remain enforced in servers.
21
+ - **Discord free-mode message relevance gating**: Free-mode replies now skip low-signal acknowledgements/URL-only chatter and avoid jumping in when other users are explicitly mentioned.
22
+ - **Status context usage reporting**: Session status now derives context usage from usage telemetry and static model context-window resolution instead of char-budget estimation only.
23
+ - **Approval parsing and trust scoping**: Approval response parsing now handles mention-prefixed/batched messages, and network trust scopes now normalize hosts to broader domain scopes.
24
+ - **Prompt dump diagnostics**: `data/last_prompt.jsonl` now includes media context plus allowed/blocked tool lists for richer debugging context.
25
+
26
+ ### Fixed
27
+
28
+ - **Google Images/Lens upload compatibility**: `browser_upload` now supports CSS-selector targets and automatically falls back from wrapper refs to detected `input[type="file"]` selectors when upload fails with non-input elements.
29
+ - **Install-root container bootstrap**: CLI container readiness checks now resolve the package install root, preventing false build failures when invoked from non-package working directories.
30
+ - **DM slash command registration regression**: Restored reliable discovery/usage of HybridClaw slash commands in Discord DMs.
31
+
7
32
  ## [0.2.6](https://github.com/HybridAIOne/hybridclaw/tree/v0.2.6)
8
33
 
9
34
  ### Added
package/README.md CHANGED
@@ -11,7 +11,17 @@ npm install -g @hybridaione/hybridclaw
11
11
  hybridclaw onboarding
12
12
  ```
13
13
 
14
- Latest release: [v0.2.6](https://github.com/HybridAIOne/hybridclaw/releases/tag/v0.2.6)
14
+ Latest release: [v0.2.7](https://github.com/HybridAIOne/hybridclaw/releases/tag/v0.2.7)
15
+
16
+ ## Release highlights (v0.2.7)
17
+
18
+ - Private approval flow via `/approve [view|yes|session|agent|no] [approval_id]` with ephemeral responses, and DM-ready global slash registration for `/status` + `/approve`.
19
+ - Discord command access controls now support `discord.commandMode` (`public|restricted`) plus `discord.commandAllowedUserIds` (with legacy `commandUserId` compatibility).
20
+ - Free-mode Discord replies now apply stronger relevance gating (for example short acknowledgements/URL-only chatter are skipped, and messages mentioning other users are de-prioritized).
21
+ - `browser_upload` now supports selector targets and automatic fallback from wrapper refs to detected `input[type="file"]` elements.
22
+ - HybridAI status/usage context metrics now use a curated static context-window catalog (Claude/Gemini/GPT-5) with family-aware fallback matching, and runtime supports `hybridai.maxTokens` for default completion budgeting.
23
+ - Model-usage telemetry now captures cache read/write token counters where providers expose them, and prompt-dump diagnostics include media plus allowed/blocked tool context.
24
+ - CLI container readiness checks now resolve the package install root to avoid non-root invocation failures.
15
25
 
16
26
  ## HybridAI Advantage
17
27
 
@@ -32,7 +42,7 @@ Latest release: [v0.2.6](https://github.com/HybridAIOne/hybridclaw/releases/tag/
32
42
  ## Quick start
33
43
 
34
44
  ```bash
35
- # Install dependencies (this also installs container deps via postinstall)
45
+ # Install dependencies
36
46
  npm install
37
47
 
38
48
  # Run onboarding (also auto-runs on first `gateway`/`tui` start if API key is missing)
@@ -100,10 +110,14 @@ HybridClaw uses typed runtime config in `config.json` (auto-created on first run
100
110
  - `discord.guildMembersIntent` enables richer guild member context and better `@name` mention resolution in replies (requires enabling **Server Members Intent** in Discord Developer Portal)
101
111
  - `discord.presenceIntent` enables Discord presence events (requires enabling **Presence Intent** in Discord Developer Portal)
102
112
  - `discord.respondToAllMessages` is a global fallback for open-policy guild channels without explicit mode config (`false` mention-gated, `true` free-response)
103
- - `discord.commandUserId` restricts `!claw <command>` admin commands to a single Discord user ID (all other messages still use normal chat handling)
104
- - `discord.commandsOnly` optional hard mode: if `true`, the bot ignores non-`!claw` messages and only accepts prefixed commands (optionally limited by `discord.commandUserId`)
105
- - `discord.groupPolicy` controls guild channel scope: `open` (default), `allowlist`, or `disabled`
113
+ - `discord.commandMode` controls command access: `public` (any user can run slash/`!claw` commands) or `restricted` (only allowlisted users can run slash/`!claw` commands)
114
+ - `discord.commandAllowedUserIds` is the allowlist used when `discord.commandMode` is `restricted`
115
+ - `discord.commandUserId` is a legacy single-user allowlist alias; when set without `commandMode`, runtime treats command access as `restricted` for backward compatibility
116
+ - `discord.commandsOnly` optional hard mode: if `true`, the bot ignores non-`!claw` messages and only accepts prefixed commands (still subject to `discord.commandMode`)
117
+ - `discord.groupPolicy` controls guild channel scope: `open` (default, mention-first unless a channel is set to `free`), `allowlist`, or `disabled`
106
118
  - `discord.freeResponseChannels` is a Hermes-style channel ID list that gets free-response behavior while other channels remain mention-gated
119
+ - `discord.textChunkLimit` controls Discord message chunk size (default `2000`)
120
+ - `discord.maxLinesPerMessage` controls max lines per Discord chunk (default `17`)
107
121
  - `discord.humanDelay` controls natural delays between multi-part messages (`off|natural|custom`)
108
122
  - `discord.typingMode` controls typing indicator lifecycle (`instant|thinking|streaming|never`)
109
123
  - `discord.presence.*` enables dynamic self-presence health states (healthy/degraded/exhausted mapped to `online|idle|dnd`, plus maintenance `invisible` during shutdown)
@@ -124,7 +138,7 @@ HybridClaw uses typed runtime config in `config.json` (auto-created on first run
124
138
  - `sessionCompaction.tokenBudget` and `sessionCompaction.budgetRatio` tune compaction token budgeting behavior
125
139
  - Built-in Discord humanization behaviors include night/weekend pacing, post-exchange cooldown scaling (after 5+ exchanges, reset after 20 minutes idle), selective silence in active free-mode channels, short-ack read reactions, and reconnect staggered dequeue
126
140
  - Per-guild/per-channel mode takes precedence over `discord.respondToAllMessages`
127
- - Discord slash commands: `/status`, `/channel-mode <off|mention|free>`, and `/channel-policy <open|allowlist|disabled>` (ephemeral replies)
141
+ - Discord slash commands: `/status`, `/approve [view|yes|session|agent|no] [approval_id]`, `/channel-mode <off|mention|free>`, and `/channel-policy <open|allowlist|disabled>` (ephemeral replies)
128
142
  - `skills.extraDirs` adds additional enterprise/shared skill roots (lowest precedence tier)
129
143
  - `proactive.*` controls autonomous behavior (`activeHours`, `delegation`, `autoRetry`, `ralph`)
130
144
  - `proactive.ralph.maxIterations` enables Ralph loop (`0` off, `-1` unlimited, `>0` extra autonomous iterations before forcing completion)
@@ -132,6 +146,7 @@ HybridClaw uses typed runtime config in `config.json` (auto-created on first run
132
146
  - `observability.*` controls push ingest into HybridAI (`events:batch` endpoint, batching, identity metadata)
133
147
  - Some settings require restart to fully apply (for example HTTP bind host/port)
134
148
  - Default bot is configured via `hybridai.defaultChatbotId` in `config.json`
149
+ - `hybridai.maxTokens` sets the default completion budget per model call (default `4096`)
135
150
 
136
151
  Secrets remain in `.env`:
137
152
 
@@ -328,7 +343,7 @@ The agent has access to these sandboxed tools inside the container:
328
343
  - `session_search` — search/summarize historical sessions from transcript archives
329
344
  - `delegate` — push-based background subagent tasks (`single`, `parallel`, `chain`) with auto-announced completion (no polling)
330
345
  - `web_fetch` — plain HTTP fetch + extraction for static/read-only content (docs, articles, READMEs, JSON/text APIs, direct files)
331
- - `browser_*` (optional) — full browser automation for JS-rendered or interactive pages (`navigate`, `snapshot`, `click`, `type`, `press`, `scroll`, `back`, `screenshot`, `pdf`, `close`)
346
+ - `browser_*` (optional) — full browser automation for JS-rendered or interactive pages (`navigate`, `snapshot`, `click`, `type`, `upload`, `press`, `scroll`, `back`, `screenshot`, `pdf`, `close`)
332
347
 
333
348
  `delegate` mode examples:
334
349
 
@@ -15,9 +15,13 @@
15
15
  "presenceIntent": false,
16
16
  "respondToAllMessages": false,
17
17
  "commandsOnly": false,
18
+ "commandMode": "public",
19
+ "commandAllowedUserIds": [],
18
20
  "commandUserId": "",
19
21
  "groupPolicy": "open",
20
22
  "freeResponseChannels": [],
23
+ "textChunkLimit": 2000,
24
+ "maxLinesPerMessage": 17,
21
25
  "humanDelay": {
22
26
  "mode": "natural",
23
27
  "minMs": 800,
@@ -58,6 +62,7 @@
58
62
  "baseUrl": "https://hybridai.one",
59
63
  "defaultModel": "gpt-5-nano",
60
64
  "defaultChatbotId": "",
65
+ "maxTokens": 4096,
61
66
  "enableRag": true,
62
67
  "models": ["gpt-5-nano", "gpt-5-mini", "gpt-5"]
63
68
  },
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "hybridclaw-agent",
3
- "version": "0.2.6",
3
+ "version": "0.2.7",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "hybridclaw-agent",
9
- "version": "0.2.6",
9
+ "version": "0.2.7",
10
10
  "dependencies": {
11
11
  "@mozilla/readability": "^0.6.0",
12
12
  "agent-browser": "^0.15.1",
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "hybridclaw-agent",
3
- "version": "0.2.6",
3
+ "version": "0.2.7",
4
4
  "type": "module",
5
5
  "scripts": {
6
6
  "build": "tsc",
@@ -437,7 +437,8 @@ function latestUserMessageText(messages: ChatMessage[]): string {
437
437
  for (let i = messages.length - 1; i >= 0; i -= 1) {
438
438
  if (messages[i].role !== 'user') continue;
439
439
  const content = messages[i].content;
440
- if (typeof content === 'string') return normalizePrompt(content);
440
+ if (typeof content === 'string')
441
+ return content.trim().slice(0, MAX_PROMPT_CHARS);
441
442
  if (!Array.isArray(content)) continue;
442
443
  const textParts: string[] = [];
443
444
  for (const part of content) {
@@ -448,7 +449,7 @@ function latestUserMessageText(messages: ChatMessage[]): string {
448
449
  if (trimmed) textParts.push(trimmed);
449
450
  }
450
451
  if (textParts.length > 0) {
451
- return normalizePrompt(textParts.join('\n'));
452
+ return textParts.join('\n').trim().slice(0, MAX_PROMPT_CHARS);
452
453
  }
453
454
  }
454
455
  return '';
@@ -474,6 +475,44 @@ function extractHostsFromUrlLikeText(input: string): string[] {
474
475
  return [...hosts];
475
476
  }
476
477
 
478
+ function normalizeHostScope(host: string): string {
479
+ const normalized = host.trim().toLowerCase().replace(/\.$/, '');
480
+ if (!normalized) return 'unknown-host';
481
+ if (/^\d{1,3}(?:\.\d{1,3}){3}$/.test(normalized)) return normalized;
482
+ if (normalized.includes(':')) return normalized; // IPv6/host:port fragments
483
+
484
+ const labels = normalized.split('.').filter(Boolean);
485
+ if (labels.length <= 2) return normalized;
486
+
487
+ const secondLevel = labels[labels.length - 2];
488
+ const topLevel = labels[labels.length - 1];
489
+ const commonSecondLevelTlds = new Set([
490
+ 'ac',
491
+ 'co',
492
+ 'com',
493
+ 'edu',
494
+ 'gov',
495
+ 'net',
496
+ 'org',
497
+ ]);
498
+ if (
499
+ topLevel.length === 2 &&
500
+ commonSecondLevelTlds.has(secondLevel) &&
501
+ labels.length >= 3
502
+ ) {
503
+ return labels.slice(-3).join('.');
504
+ }
505
+ return labels.slice(-2).join('.');
506
+ }
507
+
508
+ function extractHostScopes(hosts: string[]): string[] {
509
+ const scopes = new Set<string>();
510
+ for (const host of hosts) {
511
+ scopes.add(normalizeHostScope(host));
512
+ }
513
+ return [...scopes];
514
+ }
515
+
477
516
  function extractAbsolutePaths(input: string): string[] {
478
517
  const paths = new Set<string>();
479
518
  for (const match of input.matchAll(ABS_PATH_RE)) {
@@ -500,39 +539,91 @@ function parseModeFromApproveMatch(
500
539
  return 'once';
501
540
  }
502
541
 
503
- function parseApprovalUserResponse(input: string): {
542
+ function parseApprovalDirective(input: string): {
504
543
  kind: 'approve' | 'deny';
505
544
  mode?: 'once' | 'session' | 'agent';
506
545
  requestId: string;
507
546
  } | null {
508
- const menuMatch = input.match(MENU_SELECTION_RE);
509
- if (menuMatch) {
510
- const requestId = String(menuMatch[2] || '').trim();
511
- const selection = menuMatch[1];
512
- if (selection === '1') return { kind: 'approve', mode: 'once', requestId };
513
- if (selection === '2')
514
- return { kind: 'approve', mode: 'session', requestId };
515
- if (selection === '3') return { kind: 'approve', mode: 'agent', requestId };
516
- return { kind: 'deny', requestId };
547
+ const normalized = input.trim();
548
+ if (!normalized) return null;
549
+
550
+ const directiveCandidates = [
551
+ normalized,
552
+ normalized.replace(/^(?:<@!?\d+>\s*)+/, ''),
553
+ ];
554
+
555
+ for (const candidate of directiveCandidates) {
556
+ if (!candidate) continue;
557
+ const menuMatch = candidate.match(MENU_SELECTION_RE);
558
+ if (menuMatch) {
559
+ const requestId = String(menuMatch[2] || '').trim();
560
+ const selection = menuMatch[1];
561
+ if (selection === '1')
562
+ return { kind: 'approve', mode: 'once', requestId };
563
+ if (selection === '2')
564
+ return { kind: 'approve', mode: 'session', requestId };
565
+ if (selection === '3')
566
+ return { kind: 'approve', mode: 'agent', requestId };
567
+ return { kind: 'deny', requestId };
568
+ }
569
+
570
+ const approveMatch = candidate.match(APPROVE_RE);
571
+ if (approveMatch) {
572
+ return {
573
+ kind: 'approve',
574
+ mode: parseModeFromApproveMatch(approveMatch),
575
+ requestId: String(approveMatch[1] || '').trim(),
576
+ };
577
+ }
578
+
579
+ const denyMatch = candidate.match(DENY_RE);
580
+ if (denyMatch) {
581
+ return {
582
+ kind: 'deny',
583
+ requestId: String(denyMatch[1] || '').trim(),
584
+ };
585
+ }
517
586
  }
518
587
 
519
- const approveMatch = input.match(APPROVE_RE);
520
- if (approveMatch) {
521
- return {
522
- kind: 'approve',
523
- mode: parseModeFromApproveMatch(approveMatch),
524
- requestId: String(approveMatch[1] || '').trim(),
525
- };
588
+ return null;
589
+ }
590
+
591
+ function parseApprovalUserResponse(input: string): {
592
+ kind: 'approve' | 'deny';
593
+ mode?: 'once' | 'session' | 'agent';
594
+ requestId: string;
595
+ } | null {
596
+ const normalized = input.trim();
597
+ if (!normalized) return null;
598
+
599
+ const candidates: string[] = [];
600
+ const pushCandidate = (value: string): void => {
601
+ const trimmed = value.trim();
602
+ if (!trimmed) return;
603
+ if (candidates.includes(trimmed)) return;
604
+ candidates.push(trimmed);
605
+ };
606
+
607
+ pushCandidate(normalized);
608
+ pushCandidate(normalized.replace(/^(?:<@!?\d+>\s*)+/, ''));
609
+
610
+ const batchTailMatch = normalized.match(/Message\s+\d+\s*:\s*([\s\S]+)$/i);
611
+ if (batchTailMatch?.[1]) {
612
+ pushCandidate(batchTailMatch[1]);
526
613
  }
527
614
 
528
- const denyMatch = input.match(DENY_RE);
529
- if (denyMatch) {
530
- return {
531
- kind: 'deny',
532
- requestId: String(denyMatch[1] || '').trim(),
533
- };
615
+ const lines = normalized
616
+ .split(/\r?\n/)
617
+ .map((line) => line.trim())
618
+ .filter(Boolean);
619
+ if (lines.length > 0) {
620
+ pushCandidate(lines[lines.length - 1]);
534
621
  }
535
622
 
623
+ for (const candidate of candidates) {
624
+ const parsed = parseApprovalDirective(candidate);
625
+ if (parsed) return parsed;
626
+ }
536
627
  return null;
537
628
  }
538
629
 
@@ -879,16 +970,25 @@ export class TrustedCoworkerApprovalRuntime {
879
970
  const requestLabel = evaluation.requestId
880
971
  ? `Approval ID: ${evaluation.requestId}`
881
972
  : '';
882
- const trustHint = evaluation.pinned
883
- ? 'This action is pinned sensitive, so session/agent trust is disabled.'
884
- : 'Reply `yes for session` (or `2`) to trust this action for this session, or `yes for agent` (or `3`) to trust it for this agent.';
973
+ const optionLines = evaluation.pinned
974
+ ? [
975
+ 'Reply `yes` (or `1`) to approve once.',
976
+ 'Reply `yes for session` (or `2`) is unavailable for pinned-sensitive actions.',
977
+ 'Reply `yes for agent` (or `3`) is unavailable for pinned-sensitive actions.',
978
+ 'Reply `no` (or `4`) to deny.',
979
+ ]
980
+ : [
981
+ 'Reply `yes` (or `1`) to approve once.',
982
+ 'Reply `yes for session` (or `2`) to trust this action for this session.',
983
+ 'Reply `yes for agent` (or `3`) to trust it for this agent.',
984
+ 'Reply `no` (or `4`) to deny.',
985
+ ];
885
986
  return [
886
987
  `I need your approval before I ${evaluation.intent.toLowerCase()}.`,
887
988
  `Why: ${evaluation.reason}`,
888
989
  `If you skip this, ${evaluation.consequenceIfDenied.charAt(0).toLowerCase()}${evaluation.consequenceIfDenied.slice(1)}`,
889
990
  requestLabel,
890
- `Reply \`yes\` (or \`1\`) to approve once, or \`no\` (or \`4\`) to deny.`,
891
- trustHint,
991
+ ...optionLines,
892
992
  `Approval expires in ${expiresIn}s.`,
893
993
  ]
894
994
  .filter(Boolean)
@@ -1023,9 +1123,11 @@ export class TrustedCoworkerApprovalRuntime {
1023
1123
 
1024
1124
  if (lowerTool === 'web_fetch' || lowerTool === 'browser_navigate') {
1025
1125
  const rawUrl = normalizeText(args.url);
1026
- const hosts = extractHostsFromUrlLikeText(rawUrl);
1027
- const primaryHost = hosts[0] || 'unknown-host';
1028
- const unseen = hosts.filter((host) => !this.seenNetworkHosts.has(host));
1126
+ const hostScopes = extractHostScopes(extractHostsFromUrlLikeText(rawUrl));
1127
+ const primaryHost = hostScopes[0] || 'unknown-host';
1128
+ const unseen = hostScopes.filter(
1129
+ (host) => !this.seenNetworkHosts.has(host),
1130
+ );
1029
1131
  return {
1030
1132
  tier: unseen.length > 0 ? 'red' : 'yellow',
1031
1133
  actionKey: `network:${primaryHost}`,
@@ -1038,7 +1140,7 @@ export class TrustedCoworkerApprovalRuntime {
1038
1140
  : 'this is an external network action',
1039
1141
  commandPreview: normalizePreview(rawUrl),
1040
1142
  pathHints: [],
1041
- hostHints: hosts,
1143
+ hostHints: hostScopes,
1042
1144
  writeIntent: false,
1043
1145
  promotableRed: unseen.length > 0,
1044
1146
  stickyYellow: true,
@@ -13,6 +13,7 @@ const execFileAsync = promisify(execFile);
13
13
  const WORKSPACE_ROOT = '/workspace';
14
14
  const BROWSER_SOCKET_ROOT = '/tmp/hybridclaw-browser';
15
15
  const BROWSER_ARTIFACT_ROOT = path.join(WORKSPACE_ROOT, '.browser-artifacts');
16
+ const DISCORD_MEDIA_CACHE_ROOT = '/discord-media-cache';
16
17
  const BROWSER_DEFAULT_TIMEOUT_MS = 45_000;
17
18
  const BROWSER_MAX_SNAPSHOT_CHARS = 12_000;
18
19
  const BROWSER_RUNTIME_ROOT = path.join(WORKSPACE_ROOT, '.hybridclaw-runtime');
@@ -98,11 +99,55 @@ const CLEAR_NETWORK_TIMINGS_SCRIPT = `(() => {
98
99
  return true;
99
100
  })()`;
100
101
 
102
+ const FIND_FILE_INPUT_SELECTORS_SCRIPT = `(() => {
103
+ const selectors = [];
104
+ const seen = new Set();
105
+ const esc = (value) => {
106
+ const text = String(value || '');
107
+ if (typeof CSS !== 'undefined' && CSS && typeof CSS.escape === 'function') {
108
+ return CSS.escape(text);
109
+ }
110
+ return text.replace(/["\\\\]/g, '\\\\$&');
111
+ };
112
+ const push = (selector) => {
113
+ const normalized = String(selector || '').trim();
114
+ if (!normalized || seen.has(normalized)) return;
115
+ seen.add(normalized);
116
+ selectors.push(normalized);
117
+ };
118
+ const inputs = Array.from(document.querySelectorAll('input[type="file"]'));
119
+ for (const input of inputs) {
120
+ const id = input.getAttribute('id');
121
+ if (id) push(\`#\${esc(id)}\`);
122
+
123
+ const name = input.getAttribute('name');
124
+ if (name) push(\`input[type="file"][name="\${esc(name)}"]\`);
125
+
126
+ const accept = input.getAttribute('accept');
127
+ if (accept) push(\`input[type="file"][accept="\${esc(accept)}"]\`);
128
+
129
+ const form = input.closest('form');
130
+ const formId = form ? form.getAttribute('id') : null;
131
+ if (formId) {
132
+ if (name) {
133
+ push(\`#\${esc(formId)} input[type="file"][name="\${esc(name)}"]\`);
134
+ }
135
+ push(\`#\${esc(formId)} input[type="file"]\`);
136
+ }
137
+ }
138
+ push('input[type="file"]');
139
+ return selectors.slice(0, 10);
140
+ })()`;
141
+
101
142
  type SnapshotMode = 'default' | 'interactive' | 'full';
102
143
  type FrameTarget = {
103
144
  raw: string;
104
145
  isMain: boolean;
105
146
  };
147
+ type UploadTarget = {
148
+ raw: string;
149
+ source: 'ref' | 'selector';
150
+ };
106
151
  type BrowserModelContext = {
107
152
  baseUrl: string;
108
153
  apiKey: string;
@@ -428,6 +473,85 @@ function ensureRef(raw: unknown): string {
428
473
  return ref.startsWith('@') ? ref : `@${ref}`;
429
474
  }
430
475
 
476
+ function resolveUploadTarget(args: Record<string, unknown>): UploadTarget {
477
+ const selector = String(args.selector || args.target || '').trim();
478
+ if (selector) return { raw: selector, source: 'selector' };
479
+
480
+ const ref = String(args.ref || '').trim();
481
+ if (!ref) {
482
+ throw new Error('ref is required (or provide selector)');
483
+ }
484
+ return {
485
+ raw: ref.startsWith('@') ? ref : `@${ref}`,
486
+ source: 'ref',
487
+ };
488
+ }
489
+
490
+ function normalizeUploadPath(rawPath: string): string | null {
491
+ const trimmed = rawPath.trim();
492
+ if (!trimmed) return null;
493
+ const normalizedInput = trimmed.replace(/\\/g, '/');
494
+ const candidate = normalizedInput.startsWith('/')
495
+ ? path.posix.normalize(normalizedInput)
496
+ : path.posix.normalize(path.posix.join(WORKSPACE_ROOT, normalizedInput));
497
+ if (
498
+ !(
499
+ candidate === WORKSPACE_ROOT || candidate.startsWith(`${WORKSPACE_ROOT}/`)
500
+ ) &&
501
+ !(
502
+ candidate === DISCORD_MEDIA_CACHE_ROOT ||
503
+ candidate.startsWith(`${DISCORD_MEDIA_CACHE_ROOT}/`)
504
+ )
505
+ ) {
506
+ return null;
507
+ }
508
+ return candidate;
509
+ }
510
+
511
+ function resolveUploadPaths(args: Record<string, unknown>): string[] {
512
+ const candidates: string[] = [];
513
+ const addPath = (value: unknown): void => {
514
+ if (typeof value === 'string') {
515
+ const trimmed = value.trim();
516
+ if (trimmed) candidates.push(trimmed);
517
+ return;
518
+ }
519
+ if (Array.isArray(value)) {
520
+ for (const item of value) {
521
+ if (typeof item !== 'string') continue;
522
+ const trimmed = item.trim();
523
+ if (trimmed) candidates.push(trimmed);
524
+ }
525
+ }
526
+ };
527
+
528
+ addPath(args.path);
529
+ addPath(args.file);
530
+ addPath(args.files);
531
+ addPath(args.paths);
532
+
533
+ const deduped: string[] = [];
534
+ const seen = new Set<string>();
535
+ for (const raw of candidates) {
536
+ const normalized = normalizeUploadPath(raw);
537
+ if (!normalized) {
538
+ throw new Error(
539
+ `invalid upload path "${raw}" (must stay within /workspace or /discord-media-cache)`,
540
+ );
541
+ }
542
+ if (!fs.existsSync(normalized)) {
543
+ throw new Error(`upload file not found: ${normalized}`);
544
+ }
545
+ if (seen.has(normalized)) continue;
546
+ seen.add(normalized);
547
+ deduped.push(normalized);
548
+ }
549
+ if (deduped.length === 0) {
550
+ throw new Error('path is required (or provide files/paths)');
551
+ }
552
+ return deduped;
553
+ }
554
+
431
555
  function resolveOutputPath(rawPath: unknown, extension: 'png' | 'pdf'): string {
432
556
  fs.mkdirSync(BROWSER_ARTIFACT_ROOT, { recursive: true });
433
557
 
@@ -563,6 +687,30 @@ function normalizeImageList(raw: unknown): Record<string, unknown>[] {
563
687
  return images;
564
688
  }
565
689
 
690
+ function normalizeStringList(raw: unknown, max = 10): string[] {
691
+ if (!Array.isArray(raw)) return [];
692
+ const values: string[] = [];
693
+ const seen = new Set<string>();
694
+ for (const item of raw) {
695
+ if (typeof item !== 'string') continue;
696
+ const normalized = item.trim();
697
+ if (!normalized || seen.has(normalized)) continue;
698
+ seen.add(normalized);
699
+ values.push(normalized);
700
+ if (values.length >= max) break;
701
+ }
702
+ return values;
703
+ }
704
+
705
+ function isUploadTypeMismatchError(message: string): boolean {
706
+ const normalized = String(message || '').toLowerCase();
707
+ if (!normalized) return false;
708
+ return (
709
+ normalized.includes('setinputfiles') ||
710
+ normalized.includes('not an htmlinputelement')
711
+ );
712
+ }
713
+
566
714
  function normalizeTrackedRequests(raw: unknown): Record<string, unknown>[] {
567
715
  if (!Array.isArray(raw)) return [];
568
716
  const requests: Record<string, unknown>[] = [];
@@ -1025,6 +1173,59 @@ export async function executeBrowserTool(
1025
1173
  });
1026
1174
  }
1027
1175
 
1176
+ case 'browser_upload': {
1177
+ const target = resolveUploadTarget(args);
1178
+ const filePaths = resolveUploadPaths(args);
1179
+ const frame = parseOptionalFrame(args.frame);
1180
+ await applyFrameTarget(effectiveSessionId, frame);
1181
+ const result = await runAgentBrowser(effectiveSessionId, 'upload', [
1182
+ target.raw,
1183
+ ...filePaths,
1184
+ ]);
1185
+ if (
1186
+ !result.success &&
1187
+ target.source === 'ref' &&
1188
+ isUploadTypeMismatchError(result.error || '')
1189
+ ) {
1190
+ const selectorEval = await runBrowserEval(
1191
+ effectiveSessionId,
1192
+ FIND_FILE_INPUT_SELECTORS_SCRIPT,
1193
+ 15_000,
1194
+ );
1195
+ const selectors = selectorEval.success
1196
+ ? normalizeStringList(selectorEval.result, 10)
1197
+ : [];
1198
+ for (const selector of selectors) {
1199
+ const retry = await runAgentBrowser(effectiveSessionId, 'upload', [
1200
+ selector,
1201
+ ...filePaths,
1202
+ ]);
1203
+ if (!retry.success) continue;
1204
+ return success({
1205
+ element: target.raw,
1206
+ selector,
1207
+ target: selector,
1208
+ uploaded_count: filePaths.length,
1209
+ files: filePaths,
1210
+ fallback_from_ref: true,
1211
+ ...(frame ? { frame: frame.raw } : {}),
1212
+ });
1213
+ }
1214
+ }
1215
+ if (!result.success) {
1216
+ return failure(result.error || `failed to upload via ${target.raw}`);
1217
+ }
1218
+ return success({
1219
+ target: target.raw,
1220
+ ...(target.source === 'ref'
1221
+ ? { element: target.raw }
1222
+ : { selector: target.raw }),
1223
+ uploaded_count: filePaths.length,
1224
+ files: filePaths,
1225
+ ...(frame ? { frame: frame.raw } : {}),
1226
+ });
1227
+ }
1228
+
1028
1229
  case 'browser_press': {
1029
1230
  const key = String(args.key || '').trim();
1030
1231
  if (!key) return failure('key is required');
@@ -1378,6 +1579,46 @@ export const BROWSER_TOOL_DEFINITIONS: ToolDefinition[] = [
1378
1579
  },
1379
1580
  },
1380
1581
  },
1582
+ {
1583
+ type: 'function',
1584
+ function: {
1585
+ name: 'browser_upload',
1586
+ description:
1587
+ 'Upload one or more local files to a file input. Prefer a snapshot ref (for example "@e12"); if that ref points to a wrapper (like a span/button), provide selector for the underlying input[type=file].',
1588
+ parameters: {
1589
+ type: 'object',
1590
+ properties: {
1591
+ ref: {
1592
+ type: 'string',
1593
+ description:
1594
+ 'Optional element reference from browser_snapshot (for example "@e12").',
1595
+ },
1596
+ selector: {
1597
+ type: 'string',
1598
+ description:
1599
+ 'Optional CSS selector for the actual file input (for example input[type="file"]).',
1600
+ },
1601
+ path: {
1602
+ type: 'string',
1603
+ description:
1604
+ 'Primary local file path to upload (relative to /workspace or absolute /discord-media-cache path).',
1605
+ },
1606
+ files: {
1607
+ type: 'array',
1608
+ items: { type: 'string' },
1609
+ description:
1610
+ 'Optional additional local file paths for multi-file inputs.',
1611
+ },
1612
+ frame: {
1613
+ type: 'string',
1614
+ description:
1615
+ 'Optional frame selector. Use "main" to target the main document again.',
1616
+ },
1617
+ },
1618
+ required: ['path'],
1619
+ },
1620
+ },
1621
+ },
1381
1622
  {
1382
1623
  type: 'function',
1383
1624
  function: {