npm - @hybridaione/hybridclaw - Versions diffs - 0.2.6 → 0.2.7 - Mend

@hybridaione/hybridclaw 0.2.6 → 0.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (86) hide show

package/CHANGELOG.md +25 -0
package/README.md +22 -7
package/config.example.json +5 -0
package/container/package-lock.json +2 -2
package/container/package.json +1 -1
package/container/src/approval-policy.ts +136 -34
package/container/src/browser-tools.ts +241 -0
package/container/src/hybridai-client.ts +31 -3
package/container/src/index.ts +12 -1
package/container/src/token-usage.ts +89 -10
package/container/src/tools.ts +1 -0
package/container/src/types.ts +17 -0
package/dist/agent.d.ts.map +1 -1
package/dist/agent.js +5 -2
package/dist/agent.js.map +1 -1
package/dist/channels/discord/delivery.d.ts.map +1 -1
package/dist/channels/discord/delivery.js +5 -1
package/dist/channels/discord/delivery.js.map +1 -1
package/dist/channels/discord/inbound.d.ts +27 -0
package/dist/channels/discord/inbound.d.ts.map +1 -1
package/dist/channels/discord/inbound.js +125 -16
package/dist/channels/discord/inbound.js.map +1 -1
package/dist/channels/discord/runtime.d.ts +1 -1
package/dist/channels/discord/runtime.d.ts.map +1 -1
package/dist/channels/discord/runtime.js +166 -57
package/dist/channels/discord/runtime.js.map +1 -1
package/dist/channels/discord/stream.d.ts.map +1 -1
package/dist/channels/discord/stream.js +3 -4
package/dist/channels/discord/stream.js.map +1 -1
package/dist/cli.js +30 -1
package/dist/cli.js.map +1 -1
package/dist/config.d.ts +5 -0
package/dist/config.d.ts.map +1 -1
package/dist/config.js +10 -0
package/dist/config.js.map +1 -1
package/dist/container-runner.d.ts.map +1 -1
package/dist/container-runner.js +2 -1
package/dist/container-runner.js.map +1 -1
package/dist/gateway-service.d.ts.map +1 -1
package/dist/gateway-service.js +62 -15
package/dist/gateway-service.js.map +1 -1
package/dist/gateway.js +124 -2
package/dist/gateway.js.map +1 -1
package/dist/heartbeat.d.ts.map +1 -1
package/dist/heartbeat.js +14 -0
package/dist/heartbeat.js.map +1 -1
package/dist/hybridai-models.d.ts +8 -0
package/dist/hybridai-models.d.ts.map +1 -0
package/dist/hybridai-models.js +94 -0
package/dist/hybridai-models.js.map +1 -0
package/dist/prompt-hooks.d.ts.map +1 -1
package/dist/prompt-hooks.js +2 -0
package/dist/prompt-hooks.js.map +1 -1
package/dist/runtime-config.d.ts +6 -0
package/dist/runtime-config.d.ts.map +1 -1
package/dist/runtime-config.js +26 -1
package/dist/runtime-config.js.map +1 -1
package/dist/scheduled-task-runner.d.ts.map +1 -1
package/dist/scheduled-task-runner.js +14 -0
package/dist/scheduled-task-runner.js.map +1 -1
package/dist/types.d.ts +4 -0
package/dist/types.d.ts.map +1 -1
package/dist/types.js.map +1 -1
package/docs/index.html +36 -1
package/package.json +1 -1
package/src/agent.ts +15 -1
package/src/channels/discord/delivery.ts +8 -1
package/src/channels/discord/inbound.ts +152 -23
package/src/channels/discord/runtime.ts +222 -63
package/src/channels/discord/stream.ts +12 -4
package/src/cli.ts +33 -1
package/src/config.ts +20 -0
package/src/container-runner.ts +2 -0
package/src/gateway-service.ts +66 -13
package/src/gateway.ts +166 -4
package/src/heartbeat.ts +15 -0
package/src/hybridai-models.ts +158 -0
package/src/prompt-hooks.ts +2 -0
package/src/runtime-config.ts +57 -5
package/src/scheduled-task-runner.ts +14 -0
package/src/types.ts +4 -0
package/tests/approval-policy.test.ts +111 -0
package/tests/discord.basic.test.ts +182 -1
package/tests/hybridai-client.test.ts +112 -0
package/tests/hybridai-models.test.ts +46 -0
package/tests/token-usage.cache.test.ts +128 -0

package/CHANGELOG.md CHANGED Viewed

@@ -4,6 +4,31 @@
 No unreleased changes.
+## [0.2.7](https://github.com/HybridAIOne/hybridclaw/tree/v0.2.7)
+### Added
+- **Private approval slash command**: Added `/approve` with private (ephemeral) responses for `view`, `yes`, `session`, `agent`, and `no`, including optional `approval_id`.
+- **Static model context-window catalog**: Added curated context-window mappings (Claude/Gemini/GPT-5 families) plus family-aware model-id fallback matching for session status metrics without runtime model-list fetches.
+- **Discord command access + output controls**: Added runtime config support for `discord.commandMode`, `discord.commandAllowedUserIds`, `discord.textChunkLimit`, and `discord.maxLinesPerMessage`.
+- **HybridAI completion budget control**: Added `hybridai.maxTokens` runtime setting and request wiring (`max_tokens`) for container model calls.
+### Changed
+- **Approval prompt visibility in Discord**: Channel responses now post a minimal “approval required” notice and move full approval details/decisions into private slash-command responses (`/approve`), matching the visibility pattern of `/status`.
+- **Discord command handler context**: Command execution now receives invoking `userId` and `username` so approval actions can be scoped to the requesting user.
+- **Discord slash command discoverability**: `/status` and `/approve` are now upserted globally for DM visibility while guild-only authorization checks remain enforced in servers.
+- **Discord free-mode message relevance gating**: Free-mode replies now skip low-signal acknowledgements/URL-only chatter and avoid jumping in when other users are explicitly mentioned.
+- **Status context usage reporting**: Session status now derives context usage from usage telemetry and static model context-window resolution instead of char-budget estimation only.
+- **Approval parsing and trust scoping**: Approval response parsing now handles mention-prefixed/batched messages, and network trust scopes now normalize hosts to broader domain scopes.
+- **Prompt dump diagnostics**: `data/last_prompt.jsonl` now includes media context plus allowed/blocked tool lists for richer debugging context.
+### Fixed
+- **Google Images/Lens upload compatibility**: `browser_upload` now supports CSS-selector targets and automatically falls back from wrapper refs to detected `input[type="file"]` selectors when upload fails with non-input elements.
+- **Install-root container bootstrap**: CLI container readiness checks now resolve the package install root, preventing false build failures when invoked from non-package working directories.
+- **DM slash command registration regression**: Restored reliable discovery/usage of HybridClaw slash commands in Discord DMs.
 ## [0.2.6](https://github.com/HybridAIOne/hybridclaw/tree/v0.2.6)
 ### Added

package/README.md CHANGED Viewed

@@ -11,7 +11,17 @@ npm install -g @hybridaione/hybridclaw
 hybridclaw onboarding
 ```
-Latest release: [v0.2.6](https://github.com/HybridAIOne/hybridclaw/releases/tag/v0.2.6)
+Latest release: [v0.2.7](https://github.com/HybridAIOne/hybridclaw/releases/tag/v0.2.7)
+## Release highlights (v0.2.7)
+- Private approval flow via `/approve [view|yes|session|agent|no] [approval_id]` with ephemeral responses, and DM-ready global slash registration for `/status` + `/approve`.
+- Discord command access controls now support `discord.commandMode` (`public|restricted`) plus `discord.commandAllowedUserIds` (with legacy `commandUserId` compatibility).
+- Free-mode Discord replies now apply stronger relevance gating (for example short acknowledgements/URL-only chatter are skipped, and messages mentioning other users are de-prioritized).
+- `browser_upload` now supports selector targets and automatic fallback from wrapper refs to detected `input[type="file"]` elements.
+- HybridAI status/usage context metrics now use a curated static context-window catalog (Claude/Gemini/GPT-5) with family-aware fallback matching, and runtime supports `hybridai.maxTokens` for default completion budgeting.
+- Model-usage telemetry now captures cache read/write token counters where providers expose them, and prompt-dump diagnostics include media plus allowed/blocked tool context.
+- CLI container readiness checks now resolve the package install root to avoid non-root invocation failures.
 ## HybridAI Advantage
@@ -32,7 +42,7 @@ Latest release: [v0.2.6](https://github.com/HybridAIOne/hybridclaw/releases/tag/
 ## Quick start
 ```bash
-# Install dependencies (this also installs container deps via postinstall)
+# Install dependencies
 npm install
 # Run onboarding (also auto-runs on first `gateway`/`tui` start if API key is missing)
@@ -100,10 +110,14 @@ HybridClaw uses typed runtime config in `config.json` (auto-created on first run
 - `discord.guildMembersIntent` enables richer guild member context and better `@name` mention resolution in replies (requires enabling **Server Members Intent** in Discord Developer Portal)
 - `discord.presenceIntent` enables Discord presence events (requires enabling **Presence Intent** in Discord Developer Portal)
 - `discord.respondToAllMessages` is a global fallback for open-policy guild channels without explicit mode config (`false` mention-gated, `true` free-response)
-- `discord.commandUserId` restricts `!claw <command>` admin commands to a single Discord user ID (all other messages still use normal chat handling)
-- `discord.commandsOnly` optional hard mode: if `true`, the bot ignores non-`!claw` messages and only accepts prefixed commands (optionally limited by `discord.commandUserId`)
-- `discord.groupPolicy` controls guild channel scope: `open` (default), `allowlist`, or `disabled`
+- `discord.commandMode` controls command access: `public` (any user can run slash/`!claw` commands) or `restricted` (only allowlisted users can run slash/`!claw` commands)
+- `discord.commandAllowedUserIds` is the allowlist used when `discord.commandMode` is `restricted`
+- `discord.commandUserId` is a legacy single-user allowlist alias; when set without `commandMode`, runtime treats command access as `restricted` for backward compatibility
+- `discord.commandsOnly` optional hard mode: if `true`, the bot ignores non-`!claw` messages and only accepts prefixed commands (still subject to `discord.commandMode`)
+- `discord.groupPolicy` controls guild channel scope: `open` (default, mention-first unless a channel is set to `free`), `allowlist`, or `disabled`
 - `discord.freeResponseChannels` is a Hermes-style channel ID list that gets free-response behavior while other channels remain mention-gated
+- `discord.textChunkLimit` controls Discord message chunk size (default `2000`)
+- `discord.maxLinesPerMessage` controls max lines per Discord chunk (default `17`)
 - `discord.humanDelay` controls natural delays between multi-part messages (`off|natural|custom`)
 - `discord.typingMode` controls typing indicator lifecycle (`instant|thinking|streaming|never`)
 - `discord.presence.*` enables dynamic self-presence health states (healthy/degraded/exhausted mapped to `online|idle|dnd`, plus maintenance `invisible` during shutdown)
@@ -124,7 +138,7 @@ HybridClaw uses typed runtime config in `config.json` (auto-created on first run
 - `sessionCompaction.tokenBudget` and `sessionCompaction.budgetRatio` tune compaction token budgeting behavior
 - Built-in Discord humanization behaviors include night/weekend pacing, post-exchange cooldown scaling (after 5+ exchanges, reset after 20 minutes idle), selective silence in active free-mode channels, short-ack read reactions, and reconnect staggered dequeue
 - Per-guild/per-channel mode takes precedence over `discord.respondToAllMessages`
-- Discord slash commands: `/status`, `/channel-mode <off|mention|free>`, and `/channel-policy <open|allowlist|disabled>` (ephemeral replies)
+- Discord slash commands: `/status`, `/approve [view|yes|session|agent|no] [approval_id]`, `/channel-mode <off|mention|free>`, and `/channel-policy <open|allowlist|disabled>` (ephemeral replies)
 - `skills.extraDirs` adds additional enterprise/shared skill roots (lowest precedence tier)
 - `proactive.*` controls autonomous behavior (`activeHours`, `delegation`, `autoRetry`, `ralph`)
 - `proactive.ralph.maxIterations` enables Ralph loop (`0` off, `-1` unlimited, `>0` extra autonomous iterations before forcing completion)
@@ -132,6 +146,7 @@ HybridClaw uses typed runtime config in `config.json` (auto-created on first run
 - `observability.*` controls push ingest into HybridAI (`events:batch` endpoint, batching, identity metadata)
 - Some settings require restart to fully apply (for example HTTP bind host/port)
 - Default bot is configured via `hybridai.defaultChatbotId` in `config.json`
+- `hybridai.maxTokens` sets the default completion budget per model call (default `4096`)
 Secrets remain in `.env`:
@@ -328,7 +343,7 @@ The agent has access to these sandboxed tools inside the container:
 - `session_search` — search/summarize historical sessions from transcript archives
 - `delegate` — push-based background subagent tasks (`single`, `parallel`, `chain`) with auto-announced completion (no polling)
 - `web_fetch` — plain HTTP fetch + extraction for static/read-only content (docs, articles, READMEs, JSON/text APIs, direct files)
-- `browser_*` (optional) — full browser automation for JS-rendered or interactive pages (`navigate`, `snapshot`, `click`, `type`, `press`, `scroll`, `back`, `screenshot`, `pdf`, `close`)
+- `browser_*` (optional) — full browser automation for JS-rendered or interactive pages (`navigate`, `snapshot`, `click`, `type`, `upload`, `press`, `scroll`, `back`, `screenshot`, `pdf`, `close`)
 `delegate` mode examples:

package/config.example.json CHANGED Viewed

@@ -15,9 +15,13 @@
     "presenceIntent": false,
     "respondToAllMessages": false,
     "commandsOnly": false,
+    "commandMode": "public",
+    "commandAllowedUserIds": [],
     "commandUserId": "",
     "groupPolicy": "open",
     "freeResponseChannels": [],
+    "textChunkLimit": 2000,
+    "maxLinesPerMessage": 17,
     "humanDelay": {
       "mode": "natural",
       "minMs": 800,
@@ -58,6 +62,7 @@
     "baseUrl": "https://hybridai.one",
     "defaultModel": "gpt-5-nano",
     "defaultChatbotId": "",
+    "maxTokens": 4096,
     "enableRag": true,
     "models": ["gpt-5-nano", "gpt-5-mini", "gpt-5"]
   },

package/container/package-lock.json CHANGED Viewed

@@ -1,12 +1,12 @@
 {
   "name": "hybridclaw-agent",
-  "version": "0.2.6",
+  "version": "0.2.7",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
       "name": "hybridclaw-agent",
-      "version": "0.2.6",
+      "version": "0.2.7",
       "dependencies": {
         "@mozilla/readability": "^0.6.0",
         "agent-browser": "^0.15.1",

package/container/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "hybridclaw-agent",
-  "version": "0.2.6",
+  "version": "0.2.7",
   "type": "module",
   "scripts": {
     "build": "tsc",

package/container/src/approval-policy.ts CHANGED Viewed

@@ -437,7 +437,8 @@ function latestUserMessageText(messages: ChatMessage[]): string {
   for (let i = messages.length - 1; i >= 0; i -= 1) {
     if (messages[i].role !== 'user') continue;
     const content = messages[i].content;
-    if (typeof content === 'string') return normalizePrompt(content);
+    if (typeof content === 'string')
+      return content.trim().slice(0, MAX_PROMPT_CHARS);
     if (!Array.isArray(content)) continue;
     const textParts: string[] = [];
     for (const part of content) {
@@ -448,7 +449,7 @@ function latestUserMessageText(messages: ChatMessage[]): string {
       if (trimmed) textParts.push(trimmed);
     }
     if (textParts.length > 0) {
-      return normalizePrompt(textParts.join('\n'));
+      return textParts.join('\n').trim().slice(0, MAX_PROMPT_CHARS);
     }
   }
   return '';
@@ -474,6 +475,44 @@ function extractHostsFromUrlLikeText(input: string): string[] {
   return [...hosts];
 }
+function normalizeHostScope(host: string): string {
+  const normalized = host.trim().toLowerCase().replace(/\.$/, '');
+  if (!normalized) return 'unknown-host';
+  if (/^\d{1,3}(?:\.\d{1,3}){3}$/.test(normalized)) return normalized;
+  if (normalized.includes(':')) return normalized; // IPv6/host:port fragments
+  const labels = normalized.split('.').filter(Boolean);
+  if (labels.length <= 2) return normalized;
+  const secondLevel = labels[labels.length - 2];
+  const topLevel = labels[labels.length - 1];
+  const commonSecondLevelTlds = new Set([
+    'ac',
+    'co',
+    'com',
+    'edu',
+    'gov',
+    'net',
+    'org',
+  ]);
+  if (
+    topLevel.length === 2 &&
+    commonSecondLevelTlds.has(secondLevel) &&
+    labels.length >= 3
+  ) {
+    return labels.slice(-3).join('.');
+  }
+  return labels.slice(-2).join('.');
+}
+function extractHostScopes(hosts: string[]): string[] {
+  const scopes = new Set<string>();
+  for (const host of hosts) {
+    scopes.add(normalizeHostScope(host));
+  }
+  return [...scopes];
+}
 function extractAbsolutePaths(input: string): string[] {
   const paths = new Set<string>();
   for (const match of input.matchAll(ABS_PATH_RE)) {
@@ -500,39 +539,91 @@ function parseModeFromApproveMatch(
   return 'once';
 }
-function parseApprovalUserResponse(input: string): {
+function parseApprovalDirective(input: string): {
   kind: 'approve' | 'deny';
   mode?: 'once' | 'session' | 'agent';
   requestId: string;
 } | null {
-  const menuMatch = input.match(MENU_SELECTION_RE);
-  if (menuMatch) {
-    const requestId = String(menuMatch[2] || '').trim();
-    const selection = menuMatch[1];
-    if (selection === '1') return { kind: 'approve', mode: 'once', requestId };
-    if (selection === '2')
-      return { kind: 'approve', mode: 'session', requestId };
-    if (selection === '3') return { kind: 'approve', mode: 'agent', requestId };
-    return { kind: 'deny', requestId };
+  const normalized = input.trim();
+  if (!normalized) return null;
+  const directiveCandidates = [
+    normalized,
+    normalized.replace(/^(?:<@!?\d+>\s*)+/, ''),
+  ];
+  for (const candidate of directiveCandidates) {
+    if (!candidate) continue;
+    const menuMatch = candidate.match(MENU_SELECTION_RE);
+    if (menuMatch) {
+      const requestId = String(menuMatch[2] || '').trim();
+      const selection = menuMatch[1];
+      if (selection === '1')
+        return { kind: 'approve', mode: 'once', requestId };
+      if (selection === '2')
+        return { kind: 'approve', mode: 'session', requestId };
+      if (selection === '3')
+        return { kind: 'approve', mode: 'agent', requestId };
+      return { kind: 'deny', requestId };
+    }
+    const approveMatch = candidate.match(APPROVE_RE);
+    if (approveMatch) {
+      return {
+        kind: 'approve',
+        mode: parseModeFromApproveMatch(approveMatch),
+        requestId: String(approveMatch[1] || '').trim(),
+      };
+    }
+    const denyMatch = candidate.match(DENY_RE);
+    if (denyMatch) {
+      return {
+        kind: 'deny',
+        requestId: String(denyMatch[1] || '').trim(),
+      };
+    }
   }
-  const approveMatch = input.match(APPROVE_RE);
-  if (approveMatch) {
-    return {
-      kind: 'approve',
-      mode: parseModeFromApproveMatch(approveMatch),
-      requestId: String(approveMatch[1] || '').trim(),
-    };
+  return null;
+}
+function parseApprovalUserResponse(input: string): {
+  kind: 'approve' | 'deny';
+  mode?: 'once' | 'session' | 'agent';
+  requestId: string;
+} | null {
+  const normalized = input.trim();
+  if (!normalized) return null;
+  const candidates: string[] = [];
+  const pushCandidate = (value: string): void => {
+    const trimmed = value.trim();
+    if (!trimmed) return;
+    if (candidates.includes(trimmed)) return;
+    candidates.push(trimmed);
+  };
+  pushCandidate(normalized);
+  pushCandidate(normalized.replace(/^(?:<@!?\d+>\s*)+/, ''));
+  const batchTailMatch = normalized.match(/Message\s+\d+\s*:\s*([\s\S]+)$/i);
+  if (batchTailMatch?.[1]) {
+    pushCandidate(batchTailMatch[1]);
   }
-  const denyMatch = input.match(DENY_RE);
-  if (denyMatch) {
-    return {
-      kind: 'deny',
-      requestId: String(denyMatch[1] || '').trim(),
-    };
+  const lines = normalized
+    .split(/\r?\n/)
+    .map((line) => line.trim())
+    .filter(Boolean);
+  if (lines.length > 0) {
+    pushCandidate(lines[lines.length - 1]);
   }
+  for (const candidate of candidates) {
+    const parsed = parseApprovalDirective(candidate);
+    if (parsed) return parsed;
+  }
   return null;
 }
@@ -879,16 +970,25 @@ export class TrustedCoworkerApprovalRuntime {
     const requestLabel = evaluation.requestId
       ? `Approval ID: ${evaluation.requestId}`
       : '';
-    const trustHint = evaluation.pinned
-      ? 'This action is pinned sensitive, so session/agent trust is disabled.'
-      : 'Reply `yes for session` (or `2`) to trust this action for this session, or `yes for agent` (or `3`) to trust it for this agent.';
+    const optionLines = evaluation.pinned
+      ? [
+          'Reply `yes` (or `1`) to approve once.',
+          'Reply `yes for session` (or `2`) is unavailable for pinned-sensitive actions.',
+          'Reply `yes for agent` (or `3`) is unavailable for pinned-sensitive actions.',
+          'Reply `no` (or `4`) to deny.',
+        ]
+      : [
+          'Reply `yes` (or `1`) to approve once.',
+          'Reply `yes for session` (or `2`) to trust this action for this session.',
+          'Reply `yes for agent` (or `3`) to trust it for this agent.',
+          'Reply `no` (or `4`) to deny.',
+        ];
     return [
       `I need your approval before I ${evaluation.intent.toLowerCase()}.`,
       `Why: ${evaluation.reason}`,
       `If you skip this, ${evaluation.consequenceIfDenied.charAt(0).toLowerCase()}${evaluation.consequenceIfDenied.slice(1)}`,
       requestLabel,
-      `Reply \`yes\` (or \`1\`) to approve once, or \`no\` (or \`4\`) to deny.`,
-      trustHint,
+      ...optionLines,
       `Approval expires in ${expiresIn}s.`,
     ]
       .filter(Boolean)
@@ -1023,9 +1123,11 @@ export class TrustedCoworkerApprovalRuntime {
     if (lowerTool === 'web_fetch' || lowerTool === 'browser_navigate') {
       const rawUrl = normalizeText(args.url);
-      const hosts = extractHostsFromUrlLikeText(rawUrl);
-      const primaryHost = hosts[0] || 'unknown-host';
-      const unseen = hosts.filter((host) => !this.seenNetworkHosts.has(host));
+      const hostScopes = extractHostScopes(extractHostsFromUrlLikeText(rawUrl));
+      const primaryHost = hostScopes[0] || 'unknown-host';
+      const unseen = hostScopes.filter(
+        (host) => !this.seenNetworkHosts.has(host),
+      );
       return {
         tier: unseen.length > 0 ? 'red' : 'yellow',
         actionKey: `network:${primaryHost}`,
@@ -1038,7 +1140,7 @@ export class TrustedCoworkerApprovalRuntime {
             : 'this is an external network action',
         commandPreview: normalizePreview(rawUrl),
         pathHints: [],
-        hostHints: hosts,
+        hostHints: hostScopes,
         writeIntent: false,
         promotableRed: unseen.length > 0,
         stickyYellow: true,

package/container/src/browser-tools.ts CHANGED Viewed

@@ -13,6 +13,7 @@ const execFileAsync = promisify(execFile);
 const WORKSPACE_ROOT = '/workspace';
 const BROWSER_SOCKET_ROOT = '/tmp/hybridclaw-browser';
 const BROWSER_ARTIFACT_ROOT = path.join(WORKSPACE_ROOT, '.browser-artifacts');
+const DISCORD_MEDIA_CACHE_ROOT = '/discord-media-cache';
 const BROWSER_DEFAULT_TIMEOUT_MS = 45_000;
 const BROWSER_MAX_SNAPSHOT_CHARS = 12_000;
 const BROWSER_RUNTIME_ROOT = path.join(WORKSPACE_ROOT, '.hybridclaw-runtime');
@@ -98,11 +99,55 @@ const CLEAR_NETWORK_TIMINGS_SCRIPT = `(() => {
   return true;
 })()`;
+const FIND_FILE_INPUT_SELECTORS_SCRIPT = `(() => {
+  const selectors = [];
+  const seen = new Set();
+  const esc = (value) => {
+    const text = String(value || '');
+    if (typeof CSS !== 'undefined' && CSS && typeof CSS.escape === 'function') {
+      return CSS.escape(text);
+    }
+    return text.replace(/["\\\\]/g, '\\\\$&');
+  };
+  const push = (selector) => {
+    const normalized = String(selector || '').trim();
+    if (!normalized || seen.has(normalized)) return;
+    seen.add(normalized);
+    selectors.push(normalized);
+  };
+  const inputs = Array.from(document.querySelectorAll('input[type="file"]'));
+  for (const input of inputs) {
+    const id = input.getAttribute('id');
+    if (id) push(\`#\${esc(id)}\`);
+    const name = input.getAttribute('name');
+    if (name) push(\`input[type="file"][name="\${esc(name)}"]\`);
+    const accept = input.getAttribute('accept');
+    if (accept) push(\`input[type="file"][accept="\${esc(accept)}"]\`);
+    const form = input.closest('form');
+    const formId = form ? form.getAttribute('id') : null;
+    if (formId) {
+      if (name) {
+        push(\`#\${esc(formId)} input[type="file"][name="\${esc(name)}"]\`);
+      }
+      push(\`#\${esc(formId)} input[type="file"]\`);
+    }
+  }
+  push('input[type="file"]');
+  return selectors.slice(0, 10);
+})()`;
 type SnapshotMode = 'default' | 'interactive' | 'full';
 type FrameTarget = {
   raw: string;
   isMain: boolean;
 };
+type UploadTarget = {
+  raw: string;
+  source: 'ref' | 'selector';
+};
 type BrowserModelContext = {
   baseUrl: string;
   apiKey: string;
@@ -428,6 +473,85 @@ function ensureRef(raw: unknown): string {
   return ref.startsWith('@') ? ref : `@${ref}`;
 }
+function resolveUploadTarget(args: Record<string, unknown>): UploadTarget {
+  const selector = String(args.selector || args.target || '').trim();
+  if (selector) return { raw: selector, source: 'selector' };
+  const ref = String(args.ref || '').trim();
+  if (!ref) {
+    throw new Error('ref is required (or provide selector)');
+  }
+  return {
+    raw: ref.startsWith('@') ? ref : `@${ref}`,
+    source: 'ref',
+  };
+}
+function normalizeUploadPath(rawPath: string): string | null {
+  const trimmed = rawPath.trim();
+  if (!trimmed) return null;
+  const normalizedInput = trimmed.replace(/\\/g, '/');
+  const candidate = normalizedInput.startsWith('/')
+    ? path.posix.normalize(normalizedInput)
+    : path.posix.normalize(path.posix.join(WORKSPACE_ROOT, normalizedInput));
+  if (
+    !(
+      candidate === WORKSPACE_ROOT || candidate.startsWith(`${WORKSPACE_ROOT}/`)
+    ) &&
+    !(
+      candidate === DISCORD_MEDIA_CACHE_ROOT ||
+      candidate.startsWith(`${DISCORD_MEDIA_CACHE_ROOT}/`)
+    )
+  ) {
+    return null;
+  }
+  return candidate;
+}
+function resolveUploadPaths(args: Record<string, unknown>): string[] {
+  const candidates: string[] = [];
+  const addPath = (value: unknown): void => {
+    if (typeof value === 'string') {
+      const trimmed = value.trim();
+      if (trimmed) candidates.push(trimmed);
+      return;
+    }
+    if (Array.isArray(value)) {
+      for (const item of value) {
+        if (typeof item !== 'string') continue;
+        const trimmed = item.trim();
+        if (trimmed) candidates.push(trimmed);
+      }
+    }
+  };
+  addPath(args.path);
+  addPath(args.file);
+  addPath(args.files);
+  addPath(args.paths);
+  const deduped: string[] = [];
+  const seen = new Set<string>();
+  for (const raw of candidates) {
+    const normalized = normalizeUploadPath(raw);
+    if (!normalized) {
+      throw new Error(
+        `invalid upload path "${raw}" (must stay within /workspace or /discord-media-cache)`,
+      );
+    }
+    if (!fs.existsSync(normalized)) {
+      throw new Error(`upload file not found: ${normalized}`);
+    }
+    if (seen.has(normalized)) continue;
+    seen.add(normalized);
+    deduped.push(normalized);
+  }
+  if (deduped.length === 0) {
+    throw new Error('path is required (or provide files/paths)');
+  }
+  return deduped;
+}
 function resolveOutputPath(rawPath: unknown, extension: 'png' | 'pdf'): string {
   fs.mkdirSync(BROWSER_ARTIFACT_ROOT, { recursive: true });
@@ -563,6 +687,30 @@ function normalizeImageList(raw: unknown): Record<string, unknown>[] {
   return images;
 }
+function normalizeStringList(raw: unknown, max = 10): string[] {
+  if (!Array.isArray(raw)) return [];
+  const values: string[] = [];
+  const seen = new Set<string>();
+  for (const item of raw) {
+    if (typeof item !== 'string') continue;
+    const normalized = item.trim();
+    if (!normalized || seen.has(normalized)) continue;
+    seen.add(normalized);
+    values.push(normalized);
+    if (values.length >= max) break;
+  }
+  return values;
+}
+function isUploadTypeMismatchError(message: string): boolean {
+  const normalized = String(message || '').toLowerCase();
+  if (!normalized) return false;
+  return (
+    normalized.includes('setinputfiles') ||
+    normalized.includes('not an htmlinputelement')
+  );
+}
 function normalizeTrackedRequests(raw: unknown): Record<string, unknown>[] {
   if (!Array.isArray(raw)) return [];
   const requests: Record<string, unknown>[] = [];
@@ -1025,6 +1173,59 @@ export async function executeBrowserTool(
         });
       }
+      case 'browser_upload': {
+        const target = resolveUploadTarget(args);
+        const filePaths = resolveUploadPaths(args);
+        const frame = parseOptionalFrame(args.frame);
+        await applyFrameTarget(effectiveSessionId, frame);
+        const result = await runAgentBrowser(effectiveSessionId, 'upload', [
+          target.raw,
+          ...filePaths,
+        ]);
+        if (
+          !result.success &&
+          target.source === 'ref' &&
+          isUploadTypeMismatchError(result.error || '')
+        ) {
+          const selectorEval = await runBrowserEval(
+            effectiveSessionId,
+            FIND_FILE_INPUT_SELECTORS_SCRIPT,
+            15_000,
+          );
+          const selectors = selectorEval.success
+            ? normalizeStringList(selectorEval.result, 10)
+            : [];
+          for (const selector of selectors) {
+            const retry = await runAgentBrowser(effectiveSessionId, 'upload', [
+              selector,
+              ...filePaths,
+            ]);
+            if (!retry.success) continue;
+            return success({
+              element: target.raw,
+              selector,
+              target: selector,
+              uploaded_count: filePaths.length,
+              files: filePaths,
+              fallback_from_ref: true,
+              ...(frame ? { frame: frame.raw } : {}),
+            });
+          }
+        }
+        if (!result.success) {
+          return failure(result.error || `failed to upload via ${target.raw}`);
+        }
+        return success({
+          target: target.raw,
+          ...(target.source === 'ref'
+            ? { element: target.raw }
+            : { selector: target.raw }),
+          uploaded_count: filePaths.length,
+          files: filePaths,
+          ...(frame ? { frame: frame.raw } : {}),
+        });
+      }
       case 'browser_press': {
         const key = String(args.key || '').trim();
         if (!key) return failure('key is required');
@@ -1378,6 +1579,46 @@ export const BROWSER_TOOL_DEFINITIONS: ToolDefinition[] = [
       },
     },
   },
+  {
+    type: 'function',
+    function: {
+      name: 'browser_upload',
+      description:
+        'Upload one or more local files to a file input. Prefer a snapshot ref (for example "@e12"); if that ref points to a wrapper (like a span/button), provide selector for the underlying input[type=file].',
+      parameters: {
+        type: 'object',
+        properties: {
+          ref: {
+            type: 'string',
+            description:
+              'Optional element reference from browser_snapshot (for example "@e12").',
+          },
+          selector: {
+            type: 'string',
+            description:
+              'Optional CSS selector for the actual file input (for example input[type="file"]).',
+          },
+          path: {
+            type: 'string',
+            description:
+              'Primary local file path to upload (relative to /workspace or absolute /discord-media-cache path).',
+          },
+          files: {
+            type: 'array',
+            items: { type: 'string' },
+            description:
+              'Optional additional local file paths for multi-file inputs.',
+          },
+          frame: {
+            type: 'string',
+            description:
+              'Optional frame selector. Use "main" to target the main document again.',
+          },
+        },
+        required: ['path'],
+      },
+    },
+  },
   {
     type: 'function',
     function: {