@hybridaione/hybridclaw 0.1.21 → 0.1.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. package/CHANGELOG.md +59 -0
  2. package/README.md +50 -8
  3. package/config.example.json +3 -0
  4. package/container/package-lock.json +2 -2
  5. package/container/package.json +1 -1
  6. package/container/src/browser-tools.ts +53 -3
  7. package/container/src/hybridai-client.ts +270 -8
  8. package/container/src/index.ts +66 -3
  9. package/container/src/token-usage.ts +89 -0
  10. package/container/src/tools.ts +9 -2
  11. package/container/src/types.ts +19 -0
  12. package/container/src/web-fetch.ts +98 -7
  13. package/dist/agent.d.ts +1 -1
  14. package/dist/agent.d.ts.map +1 -1
  15. package/dist/agent.js +2 -2
  16. package/dist/agent.js.map +1 -1
  17. package/dist/chunk.d.ts +6 -0
  18. package/dist/chunk.d.ts.map +1 -0
  19. package/dist/chunk.js +129 -0
  20. package/dist/chunk.js.map +1 -0
  21. package/dist/container-runner.d.ts +1 -1
  22. package/dist/container-runner.d.ts.map +1 -1
  23. package/dist/container-runner.js +25 -1
  24. package/dist/container-runner.js.map +1 -1
  25. package/dist/conversation.d.ts +4 -0
  26. package/dist/conversation.d.ts.map +1 -1
  27. package/dist/conversation.js +13 -3
  28. package/dist/conversation.js.map +1 -1
  29. package/dist/discord-stream.d.ts +32 -0
  30. package/dist/discord-stream.d.ts.map +1 -0
  31. package/dist/discord-stream.js +196 -0
  32. package/dist/discord-stream.js.map +1 -0
  33. package/dist/discord.d.ts +9 -2
  34. package/dist/discord.d.ts.map +1 -1
  35. package/dist/discord.js +452 -23
  36. package/dist/discord.js.map +1 -1
  37. package/dist/gateway-client.d.ts.map +1 -1
  38. package/dist/gateway-client.js +5 -0
  39. package/dist/gateway-client.js.map +1 -1
  40. package/dist/gateway-service.d.ts +1 -0
  41. package/dist/gateway-service.d.ts.map +1 -1
  42. package/dist/gateway-service.js +60 -2
  43. package/dist/gateway-service.js.map +1 -1
  44. package/dist/gateway-types.d.ts +7 -1
  45. package/dist/gateway-types.d.ts.map +1 -1
  46. package/dist/gateway-types.js.map +1 -1
  47. package/dist/gateway.js +55 -4
  48. package/dist/gateway.js.map +1 -1
  49. package/dist/health.d.ts.map +1 -1
  50. package/dist/health.js +7 -0
  51. package/dist/health.js.map +1 -1
  52. package/dist/heartbeat.d.ts.map +1 -1
  53. package/dist/heartbeat.js +20 -0
  54. package/dist/heartbeat.js.map +1 -1
  55. package/dist/observability-ingest.d.ts.map +1 -1
  56. package/dist/observability-ingest.js +26 -0
  57. package/dist/observability-ingest.js.map +1 -1
  58. package/dist/prompt-hooks.d.ts +2 -0
  59. package/dist/prompt-hooks.d.ts.map +1 -1
  60. package/dist/prompt-hooks.js +29 -0
  61. package/dist/prompt-hooks.js.map +1 -1
  62. package/dist/runtime-config.d.ts +3 -0
  63. package/dist/runtime-config.d.ts.map +1 -1
  64. package/dist/runtime-config.js +17 -1
  65. package/dist/runtime-config.js.map +1 -1
  66. package/dist/scheduled-task-runner.d.ts.map +1 -1
  67. package/dist/scheduled-task-runner.js +20 -0
  68. package/dist/scheduled-task-runner.js.map +1 -1
  69. package/dist/session-maintenance.d.ts.map +1 -1
  70. package/dist/session-maintenance.js +1 -0
  71. package/dist/session-maintenance.js.map +1 -1
  72. package/dist/skills-guard.d.ts +36 -0
  73. package/dist/skills-guard.d.ts.map +1 -0
  74. package/dist/skills-guard.js +607 -0
  75. package/dist/skills-guard.js.map +1 -0
  76. package/dist/skills.d.ts +13 -2
  77. package/dist/skills.d.ts.map +1 -1
  78. package/dist/skills.js +494 -59
  79. package/dist/skills.js.map +1 -1
  80. package/dist/token-efficiency.d.ts +41 -0
  81. package/dist/token-efficiency.d.ts.map +1 -0
  82. package/dist/token-efficiency.js +164 -0
  83. package/dist/token-efficiency.js.map +1 -0
  84. package/dist/types.d.ts +11 -0
  85. package/dist/types.d.ts.map +1 -1
  86. package/dist/workspace.d.ts.map +1 -1
  87. package/dist/workspace.js +2 -1
  88. package/dist/workspace.js.map +1 -1
  89. package/docs/index.html +33 -7
  90. package/package.json +1 -1
  91. package/src/agent.ts +15 -1
  92. package/src/chunk.ts +153 -0
  93. package/src/container-runner.ts +24 -0
  94. package/src/conversation.ts +28 -4
  95. package/src/discord-stream.ts +240 -0
  96. package/src/discord.ts +517 -23
  97. package/src/gateway-client.ts +7 -0
  98. package/src/gateway-service.ts +72 -1
  99. package/src/gateway-types.ts +12 -1
  100. package/src/gateway.ts +65 -4
  101. package/src/health.ts +8 -0
  102. package/src/heartbeat.ts +20 -0
  103. package/src/observability-ingest.ts +24 -0
  104. package/src/prompt-hooks.ts +29 -0
  105. package/src/runtime-config.ts +18 -1
  106. package/src/scheduled-task-runner.ts +20 -0
  107. package/src/session-maintenance.ts +1 -0
  108. package/src/skills-guard.ts +736 -0
  109. package/src/skills.ts +570 -61
  110. package/src/token-efficiency.ts +228 -0
  111. package/src/types.ts +12 -0
  112. package/src/workspace.ts +2 -2
  113. package/.hybridclaw/container-image-state.json +0 -5
package/CHANGELOG.md CHANGED
@@ -8,6 +8,65 @@
8
8
 
9
9
  ### Fixed
10
10
 
11
+ ## [0.1.24](https://github.com/HybridAIOne/hybridclaw/tree/v0.1.24)
12
+
13
+ ### Added
14
+
15
+ - **Discord edit-in-place streaming pipeline**: Added end-to-end assistant text delta streaming from container runtime to Discord delivery, including NDJSON `text` events and incremental Discord message edits.
16
+ - **Discord stream/chunk primitives**: Added `src/discord-stream.ts` (stream lifecycle manager with throttled edits and rollover) and `src/chunk.ts` (boundary-aware chunking with code-fence preservation and line limits).
17
+ - **Discord conversational event handling**: Added message debounce batching, in-flight run tracking, message edit/delete interruption handling, and thumbs-down reaction feedback capture for subsequent context.
18
+
19
+ ### Changed
20
+
21
+ - **Discord reply delivery semantics**: Replaced fixed 2000-char truncation with complete multi-message delivery and chunk-safe send/edit behavior.
22
+ - **Discord responsiveness model**: Message handling now keeps typing indicators alive during long turns, updates presence while processing, and acknowledges queued work with processing reactions.
23
+ - **Discord context assembly**: Conversation turns now prepend reply-chain/thread context and include parsed attachment context (inline text/code where readable, metadata fallback for unsupported types).
24
+
25
+ ### Fixed
26
+
27
+ - **Long response truncation**: Removed `.slice(0, 2000)` response truncation paths that dropped tail content and broke code blocks.
28
+ - **Perceived Discord stalls**: Fixed single-shot typing behavior by introducing a periodic typing loop for long-running turns.
29
+ - **Mid-turn user correction handling**: Edited/deleted source messages now cancel in-flight processing and clean up partial streamed output to prevent orphaned replies.
30
+ - **Screenshot reply verbosity in Discord**: Image-attachment responses now suppress workspace-path narration and default to concise delivery text (`Here it is.`/`Here they are.`).
31
+
32
+ ## [0.1.23](https://github.com/HybridAIOne/hybridclaw/tree/v0.1.23)
33
+
34
+ ### Added
35
+
36
+ - **Token usage observability fields**: `model.usage` audit events now include prompt/completion/total token counts (API-reported when available, deterministic estimates as fallback), model-call counts, and char-level prompt/completion sizing.
37
+ - **Context optimization telemetry**: Added `context.optimization` audit events with history compression statistics (per-message truncation count, dropped chars/messages, and applied history budgets).
38
+
39
+ ### Changed
40
+
41
+ - **Runtime-config migration logging clarity**: Startup schema normalization now logs a dedicated `normalized config schema vN` message when version is unchanged, instead of reporting a misleading `migrated ... from vN to vN`.
42
+ - **History prompt assembly**: Conversation history now applies per-message truncation plus head/tail-aware budget compression to reduce token load while preserving recent context.
43
+ - **Bootstrap file truncation strategy**: Oversized workspace context files now use head/tail truncation (70/20 split) instead of head-only clipping.
44
+ - **Prompt mode tiers**: Prompt hooks now support `full`/`minimal`/`none` modes; pre-compaction memory flush uses `minimal` mode to reduce static prompt overhead.
45
+
46
+ ### Fixed
47
+
48
+ - **Local runtime-state git noise**: Added `.hybridclaw/` to `.gitignore` so container image fingerprint state files are no longer reported as untracked changes.
49
+
50
+ ## [0.1.22](https://github.com/HybridAIOne/hybridclaw/tree/v0.1.22)
51
+
52
+ ### Added
53
+
54
+ - **Skills trust scanner**: Added `src/skills-guard.ts` with Hermes-derived regex threat detection (exfiltration, prompt injection, destructive ops, persistence, reverse shells, obfuscation, supply chain, credential exposure), structural checks (file count/size limits, binary blocking, symlink escape checks), and invisible-unicode detection.
55
+ - **Skill scan cache**: Added mtime-signature + content-hash scanner caching to skip re-scan on unchanged skills.
56
+ - **Extended SKILL frontmatter**: Added support for `always`, `requires.bins`, `requires.env`, and `metadata.hybridclaw.{tags,related_skills}` while preserving backward compatibility for existing fields.
57
+
58
+ ### Changed
59
+
60
+ - **Skill discovery tiers**: Expanded skill discovery precedence to `extra < bundled < codex < claude < agents-personal < agents-project < workspace`, including `config.skills.extraDirs[]` and `.agents/skills` interop paths.
61
+ - **Skill prompt embedding modes**: Implemented Always/Summary/Hidden behavior via frontmatter flags (`always`, `disable-model-invocation`) with `maxAlwaysChars=10000`, `maxSkillsPromptChars=30000`, and `maxSkillsInPrompt=150`.
62
+ - **Skill eligibility gating**: Skills with unmet `requires` are now silently excluded from both prompt availability and slash-command resolution.
63
+ - **Skill slash commands**: Added command-name sanitization (32-char max), reserved built-in command blocking, and deterministic collision deduplication (`-2`, `-3`, ...), while keeping `/skill name`, `/skill:name`, and `/<name>` invocation compatibility.
64
+ - **Web tool routing guidance**: Tool descriptions and runtime prompt guidance now include explicit `web_fetch` vs browser decision rules, concrete SPA/auth/app categories, and quantified cost asymmetry.
65
+ - **web_fetch escalation signaling**: `web_fetch` now emits structured escalation hints (`javascript_required`, `spa_shell_only`, `empty_extraction`, `boilerplate_only`, `bot_blocked`) and surfaces them in tool output for browser fallback routing.
66
+ - **Browser extraction steering**: `browser_navigate` responses now include text preview metadata and explicit next-step hints (`browser_snapshot` with `mode="full"`), and docs/prompts now clarify that `browser_pdf` is export-only (not text extraction).
67
+
68
+ ### Fixed
69
+
11
70
  ## [0.1.21](https://github.com/HybridAIOne/hybridclaw/tree/v0.1.21)
12
71
 
13
72
  ### Added
package/README.md CHANGED
@@ -11,6 +11,8 @@ npm install -g @hybridaione/hybridclaw
11
11
  hybridclaw onboarding
12
12
  ```
13
13
 
14
+ Latest release: [v0.1.24](https://github.com/HybridAIOne/hybridclaw/releases/tag/v0.1.24)
15
+
14
16
  ## HybridAI Advantage
15
17
 
16
18
  - Security-focused foundation
@@ -75,9 +77,12 @@ HybridClaw best-in-class capabilities:
75
77
  - explicit trust-model acceptance during onboarding (recorded in `config.json`)
76
78
  - typed `config.json` runtime settings with defaults, validation, and hot reload
77
79
  - formal prompt hook orchestration (`bootstrap`, `memory`, `safety`)
80
+ - Discord conversational UX: edit-in-place streaming responses, fence-safe chunking beyond Discord's 2000-char limit, typing keepalive, debounce batching, reply-chain-aware context, and concise attachment-first screenshot replies
81
+ - token-efficient context assembly: per-message history truncation, hard history budgets with head/tail preservation, and head/tail truncation for oversized bootstrap files
78
82
  - proactive runtime layer with active-hours gating, push delegation (`single`/`parallel`/`chain`), depth-aware tool policy, and retry controls
79
83
  - structured audit trail: append-only hash-chained wire logs (`data/audit/<session>/wire.jsonl`) with tamper-evident immutability, normalized SQLite audit tables, and verification/search CLI commands
80
84
  - observability export: incremental `events:batch` forwarding with durable cursor tracking and bot-scoped ingest token lifecycle via `ingest-token:ensure`
85
+ - model token telemetry in audit/observability events (`model.usage`) with API usage + deterministic fallback estimates
81
86
  - gateway lifecycle controls: managed + unmanaged restart/stop flows with graceful shutdown fallback paths
82
87
  - instruction-integrity approval flow: core instruction docs (`AGENTS.md`, `SECURITY.md`, `TRUST_MODEL.md`) are hash-verified against a local approved baseline before TUI start
83
88
 
@@ -87,6 +92,7 @@ HybridClaw uses typed runtime config in `config.json` (auto-created on first run
87
92
 
88
93
  - Start from `config.example.json` (reference)
89
94
  - Runtime watches `config.json` and hot-reloads most settings (model defaults, heartbeat, prompt hooks, limits, etc.)
95
+ - `skills.extraDirs` adds additional enterprise/shared skill roots (lowest precedence tier)
90
96
  - `proactive.*` controls autonomous behavior (`activeHours`, `delegation`, `autoRetry`)
91
97
  - `observability.*` controls push ingest into HybridAI (`events:batch` endpoint, batching, identity metadata)
92
98
  - Some settings require restart to fully apply (for example HTTP bind host/port)
@@ -138,6 +144,7 @@ HybridClaw can forward structured audit records to HybridAI's ingest API:
138
144
  - transport: bearer ingest token auto-fetched via `POST /api/v1/agent-observability/ingest-token:ensure` using `HYBRIDAI_API_KEY`
139
145
  - delivery: incremental batches with persisted cursor (`observability_offsets` table), max 1000 events and max 2,000,000-byte payload per request
140
146
  - token handling: token cache is stored locally in SQLite (`observability_ingest_tokens`) and automatically refreshed on ingest auth failures
147
+ - token visibility: `model.usage` payloads include `promptTokens`, `completionTokens`, `totalTokens`, plus estimated and API-native counters for accuracy/coverage
141
148
 
142
149
  Config keys (in `config.json`):
143
150
 
@@ -178,13 +185,25 @@ HybridClaw supports `SKILL.md`-based skills (`<skill-name>/SKILL.md`).
178
185
 
179
186
  You can place skills in:
180
187
 
181
- - `./skills/<skill-name>/SKILL.md` (project-level)
182
- - `<agent workspace>/skills/<skill-name>/SKILL.md` (agent-specific)
183
- - `$CODEX_HOME/skills/<skill-name>/SKILL.md`, `~/.codex/skills/<skill-name>/SKILL.md`, or `~/.claude/skills/<skill-name>/SKILL.md` (managed/shared)
188
+ - any directory listed in `config.skills.extraDirs[]` (enterprise/shared)
189
+ - bundled package skills (`<hybridclaw install>/skills/<skill-name>/SKILL.md`)
190
+ - `$CODEX_HOME/skills/<skill-name>/SKILL.md` or `~/.codex/skills/<skill-name>/SKILL.md`
191
+ - `~/.claude/skills/<skill-name>/SKILL.md`
192
+ - `~/.agents/skills/<skill-name>/SKILL.md`
193
+ - `./.agents/skills/<skill-name>/SKILL.md` (project)
194
+ - `./skills/<skill-name>/SKILL.md` (workspace)
184
195
 
185
196
  Load precedence is:
186
197
 
187
- - managed/shared < project < agent workspace
198
+ - `extra < bundled < codex < claude < agents-personal < agents-project < workspace`
199
+ - skills are merged by `name`; higher-precedence sources override lower-precedence ones
200
+
201
+ Security scanning is trust-aware:
202
+
203
+ - `bundled` sources are treated as `builtin` and not scanned
204
+ - `workspace` sources (`./skills/`, `./.agents/skills/`) are scanned; `caution` is allowed, `dangerous` is blocked
205
+ - `personal` sources (`~/.codex/skills/`, `~/.claude/skills/`, `~/.agents/skills/`) are scanned and blocked on `caution`/`dangerous`
206
+ - scanner includes Hermes-derived regex checks, structural limits (50 files, 1MB total, 256KB/file, binary/symlink checks), invisible-unicode detection, and mtime+content-hash cache reuse
188
207
 
189
208
  ### Required format
190
209
 
@@ -196,6 +215,14 @@ name: repo-orientation
196
215
  description: Quickly map an unfamiliar repository and identify where a requested feature should be implemented.
197
216
  user-invocable: true
198
217
  disable-model-invocation: false
218
+ always: false
219
+ requires:
220
+ bins: [docker, git]
221
+ env: [GITHUB_TOKEN]
222
+ metadata:
223
+ hybridclaw:
224
+ tags: [devops, docker]
225
+ related_skills: [kubernetes]
199
226
  ---
200
227
 
201
228
  # Repo Orientation
@@ -208,16 +235,25 @@ Supported frontmatter keys:
208
235
  - `description` (required)
209
236
  - `user-invocable` (optional, default `true`)
210
237
  - `disable-model-invocation` (optional, default `false`)
238
+ - `always` (optional, default `false`; embeds full skill body in the system prompt up to `maxAlwaysChars=10000`, then demotes to summary)
239
+ - `requires.bins` / `requires.env` (optional; skill is excluded unless requirements are met)
240
+ - `metadata.hybridclaw.tags` / `metadata.hybridclaw.related_skills` (optional metadata namespace)
211
241
 
212
242
  ### Using skills
213
243
 
214
- Skills are listed to the model as metadata (`name`, `description`, `location`), and the model reads `SKILL.md` on demand with the `read` tool.
244
+ Skills are listed to the model as metadata (`name`, `description`, `location`), and the model reads `SKILL.md` on demand with the `read` tool. Skills with `always: true` are embedded directly in the system prompt.
245
+
246
+ Prompt embedding modes:
247
+
248
+ - `Always`: `always: true` embeds full body in `<skill_always ...>` (budgeted by `maxAlwaysChars=10000`)
249
+ - `Summary`: default mode, emits only XML metadata under `<available_skills>`
250
+ - `Hidden`: `disable-model-invocation: true` excludes the skill from model prompt metadata (still invocable by slash command when `user-invocable: true`)
215
251
 
216
252
  Explicit invocation is supported via:
217
253
 
218
254
  - `/skill <name> [input]`
219
255
  - `/skill:<name> [input]`
220
- - `/<name> [input]` (when `user-invocable: true`)
256
+ - `/<name> [input]` (when `user-invocable: true`; command names are sanitized to lowercase `a-z0-9-`, max 32 chars, with `-2`/`-3` dedup and built-in command-name blocking)
221
257
 
222
258
  Example skill in this repo:
223
259
 
@@ -233,8 +269,8 @@ The agent has access to these sandboxed tools inside the container:
233
269
  - `memory` — durable memory files (`MEMORY.md`, `USER.md`, `memory/YYYY-MM-DD.md`)
234
270
  - `session_search` — search/summarize historical sessions from transcript archives
235
271
  - `delegate` — push-based background subagent tasks (`single`, `parallel`, `chain`) with auto-announced completion (no polling)
236
- - `web_fetch` — fetch a URL and extract readable content (HTML markdown/text)
237
- - `browser_*` (optional) — interactive browser automation (`navigate`, `snapshot`, `click`, `type`, `press`, `scroll`, `back`, `screenshot`, `pdf`, `close`)
272
+ - `web_fetch` — plain HTTP fetch + extraction for static/read-only content (docs, articles, READMEs, JSON/text APIs, direct files)
273
+ - `browser_*` (optional) — full browser automation for JS-rendered or interactive pages (`navigate`, `snapshot`, `click`, `type`, `press`, `scroll`, `back`, `screenshot`, `pdf`, `close`)
238
274
 
239
275
  `delegate` mode examples:
240
276
 
@@ -244,6 +280,12 @@ The agent has access to these sandboxed tools inside the container:
244
280
 
245
281
  Browser tooling notes:
246
282
 
283
+ - Routing default: prefer `web_fetch` first for read-only retrieval.
284
+ - Use browser tools for SPAs/web apps/auth flows/interaction tasks, or when `web_fetch` returns escalation hints (`javascript_required`, `spa_shell_only`, `empty_extraction`, `boilerplate_only`, `bot_blocked`).
285
+ - Cost profile: browser calls are typically ~10-100x slower/more expensive than `web_fetch`.
286
+ - Browser read flow: after `browser_navigate`, use `browser_snapshot` with `mode="full"` to extract content, then `browser_scroll` + `browser_snapshot` for additional lazy-loaded sections.
287
+ - `browser_pdf` is for export artifacts, not text extraction.
288
+
247
289
  - The shipped container image preinstalls `agent-browser` and Chromium (Playwright).
248
290
  - You can override the binary via `AGENT_BROWSER_BIN` if needed.
249
291
  - User-directed authenticated browser-flow testing is supported (including filling/submitting login forms on the requested site).
@@ -6,6 +6,9 @@
6
6
  "trustModelVersion": "",
7
7
  "trustModelAcceptedBy": ""
8
8
  },
9
+ "skills": {
10
+ "extraDirs": []
11
+ },
9
12
  "discord": {
10
13
  "prefix": "!claw"
11
14
  },
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "hybridclaw-agent",
3
- "version": "0.1.21",
3
+ "version": "0.1.24",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "hybridclaw-agent",
9
- "version": "0.1.21",
9
+ "version": "0.1.24",
10
10
  "dependencies": {
11
11
  "@mozilla/readability": "^0.6.0",
12
12
  "agent-browser": "^0.15.1",
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "hybridclaw-agent",
3
- "version": "0.1.21",
3
+ "version": "0.1.24",
4
4
  "type": "module",
5
5
  "scripts": {
6
6
  "build": "tsc",
@@ -56,6 +56,24 @@ const EXTRACT_IFRAMES_SCRIPT = `(() => {
56
56
  }));
57
57
  })()`;
58
58
 
59
+ const EXTRACT_TEXT_PREVIEW_SCRIPT = `(() => {
60
+ const bodyText = document.body ? String(document.body.innerText || '') : '';
61
+ const normalized = bodyText
62
+ .replace(/\\r/g, '')
63
+ .replace(/[ \\t]+\\n/g, '\\n')
64
+ .replace(/\\n{3,}/g, '\\n\\n')
65
+ .trim();
66
+ const previewLimit = 6000;
67
+ return {
68
+ text_length: normalized.length,
69
+ preview: normalized.slice(0, previewLimit),
70
+ preview_truncated: normalized.length > previewLimit,
71
+ has_noscript: Boolean(document.querySelector('noscript')),
72
+ root_shell: Boolean(document.querySelector('div#root:empty, div#app:empty, div#__next:empty')),
73
+ ready_state: String(document.readyState || ''),
74
+ };
75
+ })()`;
76
+
59
77
  const NETWORK_TIMINGS_SCRIPT = `(() => {
60
78
  const entries = performance.getEntriesByType('resource');
61
79
  return entries
@@ -563,6 +581,19 @@ function buildBotDetectionWarning(titleValue: unknown): Record<string, unknown>
563
581
  };
564
582
  }
565
583
 
584
+ function buildReadExtractionHint(params: {
585
+ contentLength: number;
586
+ hasNoscript: boolean;
587
+ rootShell: boolean;
588
+ }): string {
589
+ const base =
590
+ 'For content extraction, call browser_snapshot with {"mode":"full"} next. For long or lazy-loaded pages, run browser_scroll then browser_snapshot again.';
591
+ if (params.hasNoscript || params.rootShell || params.contentLength < 200) {
592
+ return `${base} This page currently looks dynamic/app-shell-like; do not conclude "inaccessible" before snapshot attempts.`;
593
+ }
594
+ return `${base} Avoid browser_pdf for text extraction; PDF export is for artifact output.`;
595
+ }
596
+
566
597
  function extractVisionTextContent(content: unknown): string {
567
598
  if (typeof content === 'string') return content.trim();
568
599
  if (!Array.isArray(content)) return '';
@@ -754,12 +785,31 @@ export async function executeBrowserTool(name: string, args: Record<string, unkn
754
785
  const data = (result.data || {}) as Record<string, unknown>;
755
786
  const title = String(data.title || '');
756
787
  const botWarning = buildBotDetectionWarning(title);
788
+ const textEval = await runBrowserEval(effectiveSessionId, EXTRACT_TEXT_PREVIEW_SCRIPT, 20_000);
789
+ const textData = textEval.success ? asRecord(textEval.result) : null;
790
+ const contentPreview = typeof textData?.preview === 'string' ? textData.preview : '';
791
+ const contentLength =
792
+ typeof textData?.text_length === 'number' && Number.isFinite(textData.text_length)
793
+ ? Math.max(0, Math.floor(textData.text_length))
794
+ : 0;
795
+ const contentPreviewTruncated = textData?.preview_truncated === true;
796
+ const hasNoscript = textData?.has_noscript === true;
797
+ const rootShell = textData?.root_shell === true;
798
+ const readyState = typeof textData?.ready_state === 'string' ? textData.ready_state : '';
799
+ const extractionHint = buildReadExtractionHint({ contentLength, hasNoscript, rootShell });
757
800
  // Best-effort priming so browser_network has request listeners active quickly.
758
801
  await runAgentBrowser(effectiveSessionId, 'network', ['requests']).catch(() => undefined);
759
802
  return success({
760
803
  url: data.url || parsed.toString(),
761
804
  title,
762
805
  session_id: effectiveSessionId,
806
+ content_text_length: contentLength,
807
+ ...(contentPreview ? { content_preview: contentPreview } : {}),
808
+ ...(contentPreview ? { content_preview_truncated: contentPreviewTruncated } : {}),
809
+ ...(readyState ? { ready_state: readyState } : {}),
810
+ ...(hasNoscript ? { has_noscript: true } : {}),
811
+ ...(rootShell ? { root_shell: true } : {}),
812
+ read_extraction_hint: extractionHint,
763
813
  ...(botWarning ? { bot_detection_warning: botWarning } : {}),
764
814
  });
765
815
  }
@@ -1000,7 +1050,7 @@ export const BROWSER_TOOL_DEFINITIONS: ToolDefinition[] = [
1000
1050
  function: {
1001
1051
  name: 'browser_navigate',
1002
1052
  description:
1003
- 'Navigate to an HTTP/HTTPS URL in a browser session. Private/loopback hosts are blocked by default (SSRF guard).',
1053
+ 'Navigate to a URL in a full browser session with JavaScript execution and dynamic rendering. Use for SPAs (React/Vue/Angular/Svelte), auth/login flows, dashboards/web apps (Notion, Google Docs, Airtable, Jira, etc.), interaction tasks (click/type/submit/scroll), bot/captcha/consent flows, or when web_fetch returns escalation hints (javascript_required, spa_shell_only, empty_extraction, boilerplate_only, bot_blocked). Prefer web_fetch instead for static docs/articles/wikis, direct API JSON/XML/text endpoints, and simple read-only retrieval. Important: browser_navigate opens the page but does not replace content extraction; for read/summarize tasks call browser_snapshot with mode="full" next. Browser usage is typically ~10-100x slower/more expensive than web_fetch. Private/loopback hosts are blocked by default (SSRF guard).',
1004
1054
  parameters: {
1005
1055
  type: 'object',
1006
1056
  properties: {
@@ -1015,7 +1065,7 @@ export const BROWSER_TOOL_DEFINITIONS: ToolDefinition[] = [
1015
1065
  function: {
1016
1066
  name: 'browser_snapshot',
1017
1067
  description:
1018
- 'Return an accessibility-tree snapshot of the current page with element refs usable by browser_click/browser_type.',
1068
+ 'Return an accessibility-tree snapshot of the current page with element refs usable by browser_click/browser_type. Use this to actually read page content after browser_navigate; for extraction tasks prefer mode="full" and repeat after browser_scroll on long/lazy-loaded pages.',
1019
1069
  parameters: {
1020
1070
  type: 'object',
1021
1071
  properties: {
@@ -1134,7 +1184,7 @@ export const BROWSER_TOOL_DEFINITIONS: ToolDefinition[] = [
1134
1184
  function: {
1135
1185
  name: 'browser_pdf',
1136
1186
  description:
1137
- 'Save the current page as PDF. Output path is constrained under /workspace/.browser-artifacts for safety.',
1187
+ 'Save the current page as PDF artifact. Output path is constrained under /workspace/.browser-artifacts for safety. Use for export/sharing only, not for text extraction or summarization.',
1138
1188
  parameters: {
1139
1189
  type: 'object',
1140
1190
  properties: {
@@ -1,4 +1,4 @@
1
- import type { ChatCompletionResponse, ChatMessage, ToolDefinition } from './types.js';
1
+ import type { ChatCompletionResponse, ChatMessage, ToolCall, ToolDefinition } from './types.js';
2
2
 
3
3
  export class HybridAIRequestError extends Error {
4
4
  status: number;
@@ -12,18 +12,45 @@ export class HybridAIRequestError extends Error {
12
12
  }
13
13
  }
14
14
 
15
- export async function callHybridAI(
16
- baseUrl: string,
17
- apiKey: string,
15
+ interface StreamToolCallDelta {
16
+ index?: number;
17
+ id?: string;
18
+ type?: 'function';
19
+ function?: {
20
+ name?: string;
21
+ arguments?: string;
22
+ };
23
+ }
24
+
25
+ interface StreamChoiceChunk {
26
+ delta?: {
27
+ role?: string;
28
+ content?: string | null;
29
+ tool_calls?: StreamToolCallDelta[];
30
+ };
31
+ message?: {
32
+ role?: string;
33
+ content?: string | null;
34
+ tool_calls?: ToolCall[];
35
+ };
36
+ finish_reason?: string | null;
37
+ }
38
+
39
+ interface StreamChunkPayload {
40
+ id?: string;
41
+ model?: string;
42
+ usage?: ChatCompletionResponse['usage'];
43
+ choices?: StreamChoiceChunk[];
44
+ }
45
+
46
+ function buildRequestBody(
18
47
  model: string,
19
48
  chatbotId: string,
20
49
  enableRag: boolean,
21
50
  messages: ChatMessage[],
22
51
  tools: ToolDefinition[],
23
- ): Promise<ChatCompletionResponse> {
24
- const url = `${baseUrl}/v1/chat/completions`;
25
-
26
- const body: Record<string, unknown> = {
52
+ ): Record<string, unknown> {
53
+ return {
27
54
  model,
28
55
  chatbot_id: chatbotId,
29
56
  messages,
@@ -31,6 +58,65 @@ export async function callHybridAI(
31
58
  tool_choice: 'auto',
32
59
  enable_rag: enableRag,
33
60
  };
61
+ }
62
+
63
+ function parseStreamPayloadLine(rawLine: string): string | null {
64
+ const trimmed = rawLine.trim();
65
+ if (!trimmed) return null;
66
+ if (trimmed.startsWith(':')) return null;
67
+ if (trimmed.startsWith('event:')) return null;
68
+ if (trimmed.startsWith('id:')) return null;
69
+ if (trimmed.startsWith('data:')) {
70
+ return trimmed.slice(5).trim();
71
+ }
72
+ return trimmed;
73
+ }
74
+
75
+ function ensureToolCall(toolCalls: ToolCall[], index: number): ToolCall {
76
+ while (toolCalls.length <= index) {
77
+ toolCalls.push({
78
+ id: '',
79
+ type: 'function',
80
+ function: {
81
+ name: '',
82
+ arguments: '',
83
+ },
84
+ });
85
+ }
86
+ return toolCalls[index];
87
+ }
88
+
89
+ function mergeToolCallDelta(target: ToolCall, delta: StreamToolCallDelta): void {
90
+ if (typeof delta.id === 'string' && delta.id) {
91
+ target.id = target.id ? `${target.id}${delta.id}` : delta.id;
92
+ }
93
+ if (typeof delta.type === 'string') {
94
+ target.type = delta.type;
95
+ }
96
+ if (delta.function) {
97
+ if (typeof delta.function.name === 'string' && delta.function.name) {
98
+ target.function.name = target.function.name
99
+ ? `${target.function.name}${delta.function.name}`
100
+ : delta.function.name;
101
+ }
102
+ if (typeof delta.function.arguments === 'string' && delta.function.arguments) {
103
+ target.function.arguments += delta.function.arguments;
104
+ }
105
+ }
106
+ }
107
+
108
+ export async function callHybridAI(
109
+ baseUrl: string,
110
+ apiKey: string,
111
+ model: string,
112
+ chatbotId: string,
113
+ enableRag: boolean,
114
+ messages: ChatMessage[],
115
+ tools: ToolDefinition[],
116
+ ): Promise<ChatCompletionResponse> {
117
+ const url = `${baseUrl}/v1/chat/completions`;
118
+
119
+ const body = buildRequestBody(model, chatbotId, enableRag, messages, tools);
34
120
 
35
121
  const response = await fetch(url, {
36
122
  method: 'POST',
@@ -48,3 +134,179 @@ export async function callHybridAI(
48
134
 
49
135
  return (await response.json()) as ChatCompletionResponse;
50
136
  }
137
+
138
+ export async function callHybridAIStream(
139
+ baseUrl: string,
140
+ apiKey: string,
141
+ model: string,
142
+ chatbotId: string,
143
+ enableRag: boolean,
144
+ messages: ChatMessage[],
145
+ tools: ToolDefinition[],
146
+ onTextDelta: (delta: string) => void,
147
+ ): Promise<ChatCompletionResponse> {
148
+ const url = `${baseUrl}/v1/chat/completions`;
149
+ const body = {
150
+ ...buildRequestBody(model, chatbotId, enableRag, messages, tools),
151
+ stream: true,
152
+ };
153
+
154
+ const response = await fetch(url, {
155
+ method: 'POST',
156
+ headers: {
157
+ 'Content-Type': 'application/json',
158
+ Accept: 'text/event-stream, application/x-ndjson, application/json',
159
+ Authorization: `Bearer ${apiKey}`,
160
+ },
161
+ body: JSON.stringify(body),
162
+ });
163
+
164
+ if (!response.ok) {
165
+ const text = await response.text();
166
+ throw new HybridAIRequestError(response.status, text);
167
+ }
168
+
169
+ const contentType = (response.headers.get('content-type') || '').toLowerCase();
170
+ if (
171
+ contentType.includes('application/json')
172
+ && !contentType.includes('ndjson')
173
+ && !contentType.includes('event-stream')
174
+ ) {
175
+ return (await response.json()) as ChatCompletionResponse;
176
+ }
177
+
178
+ if (!response.body) {
179
+ return (await response.json()) as ChatCompletionResponse;
180
+ }
181
+
182
+ const reader = response.body.getReader();
183
+ const decoder = new TextDecoder();
184
+
185
+ let buffer = '';
186
+ let streamId = '';
187
+ let streamModel = model;
188
+ let finishReason: string | null = null;
189
+ let usage: ChatCompletionResponse['usage'] | undefined;
190
+ let role: string = 'assistant';
191
+ let textContent = '';
192
+ const toolCalls: ToolCall[] = [];
193
+ let sawPayload = false;
194
+ let streamDone = false;
195
+
196
+ const consumePayload = (payloadText: string): void => {
197
+ if (!payloadText || payloadText === '[DONE]') {
198
+ if (payloadText === '[DONE]') streamDone = true;
199
+ return;
200
+ }
201
+
202
+ let payload: StreamChunkPayload;
203
+ try {
204
+ payload = JSON.parse(payloadText) as StreamChunkPayload;
205
+ } catch {
206
+ return;
207
+ }
208
+
209
+ sawPayload = true;
210
+ if (typeof payload.id === 'string' && payload.id) streamId = payload.id;
211
+ if (typeof payload.model === 'string' && payload.model) streamModel = payload.model;
212
+ if (payload.usage && typeof payload.usage === 'object') usage = payload.usage;
213
+
214
+ const choice = Array.isArray(payload.choices) ? payload.choices[0] : undefined;
215
+ if (!choice) return;
216
+
217
+ if (choice.message) {
218
+ const message = choice.message;
219
+ if (typeof message.role === 'string' && message.role) role = message.role;
220
+ if (typeof message.content === 'string') {
221
+ const nextContent = message.content;
222
+ const delta = nextContent.startsWith(textContent)
223
+ ? nextContent.slice(textContent.length)
224
+ : nextContent;
225
+ textContent = nextContent;
226
+ if (delta) onTextDelta(delta);
227
+ }
228
+ if (Array.isArray(message.tool_calls) && message.tool_calls.length > 0) {
229
+ toolCalls.length = 0;
230
+ for (const call of message.tool_calls) {
231
+ toolCalls.push({
232
+ id: call.id || '',
233
+ type: call.type || 'function',
234
+ function: {
235
+ name: call.function?.name || '',
236
+ arguments: call.function?.arguments || '',
237
+ },
238
+ });
239
+ }
240
+ }
241
+ }
242
+
243
+ if (choice.delta) {
244
+ const delta = choice.delta;
245
+ if (typeof delta.role === 'string' && delta.role) role = delta.role;
246
+ if (typeof delta.content === 'string' && delta.content) {
247
+ textContent += delta.content;
248
+ onTextDelta(delta.content);
249
+ }
250
+ if (Array.isArray(delta.tool_calls) && delta.tool_calls.length > 0) {
251
+ for (const callDelta of delta.tool_calls) {
252
+ const index = typeof callDelta.index === 'number' && callDelta.index >= 0 ? callDelta.index : 0;
253
+ const target = ensureToolCall(toolCalls, index);
254
+ mergeToolCallDelta(target, callDelta);
255
+ }
256
+ }
257
+ }
258
+
259
+ if (typeof choice.finish_reason === 'string' && choice.finish_reason) {
260
+ finishReason = choice.finish_reason;
261
+ }
262
+ };
263
+
264
+ try {
265
+ while (!streamDone) {
266
+ const { done, value } = await reader.read();
267
+ if (done) break;
268
+
269
+ buffer += decoder.decode(value, { stream: true });
270
+ const lines = buffer.split('\n');
271
+ buffer = lines.pop() || '';
272
+
273
+ for (const rawLine of lines) {
274
+ const payloadText = parseStreamPayloadLine(rawLine);
275
+ if (!payloadText) continue;
276
+ consumePayload(payloadText);
277
+ if (streamDone) break;
278
+ }
279
+ }
280
+
281
+ if (!streamDone && buffer.trim()) {
282
+ const payloadText = parseStreamPayloadLine(buffer);
283
+ if (payloadText) {
284
+ consumePayload(payloadText);
285
+ }
286
+ }
287
+ } finally {
288
+ reader.releaseLock();
289
+ decoder.decode();
290
+ }
291
+
292
+ if (!sawPayload) {
293
+ throw new Error('Streaming response ended without payload');
294
+ }
295
+
296
+ const finalFinishReason = finishReason || (toolCalls.length > 0 ? 'tool_calls' : 'stop');
297
+ return {
298
+ id: streamId || 'stream',
299
+ model: streamModel,
300
+ choices: [
301
+ {
302
+ message: {
303
+ role,
304
+ content: textContent || null,
305
+ ...(toolCalls.length > 0 ? { tool_calls: toolCalls } : {}),
306
+ },
307
+ finish_reason: finalFinishReason,
308
+ },
309
+ ],
310
+ ...(usage ? { usage } : {}),
311
+ };
312
+ }