@hybridaione/hybridclaw 0.1.21 → 0.1.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +59 -0
- package/README.md +50 -8
- package/config.example.json +3 -0
- package/container/package-lock.json +2 -2
- package/container/package.json +1 -1
- package/container/src/browser-tools.ts +53 -3
- package/container/src/hybridai-client.ts +270 -8
- package/container/src/index.ts +66 -3
- package/container/src/token-usage.ts +89 -0
- package/container/src/tools.ts +9 -2
- package/container/src/types.ts +19 -0
- package/container/src/web-fetch.ts +98 -7
- package/dist/agent.d.ts +1 -1
- package/dist/agent.d.ts.map +1 -1
- package/dist/agent.js +2 -2
- package/dist/agent.js.map +1 -1
- package/dist/chunk.d.ts +6 -0
- package/dist/chunk.d.ts.map +1 -0
- package/dist/chunk.js +129 -0
- package/dist/chunk.js.map +1 -0
- package/dist/container-runner.d.ts +1 -1
- package/dist/container-runner.d.ts.map +1 -1
- package/dist/container-runner.js +25 -1
- package/dist/container-runner.js.map +1 -1
- package/dist/conversation.d.ts +4 -0
- package/dist/conversation.d.ts.map +1 -1
- package/dist/conversation.js +13 -3
- package/dist/conversation.js.map +1 -1
- package/dist/discord-stream.d.ts +32 -0
- package/dist/discord-stream.d.ts.map +1 -0
- package/dist/discord-stream.js +196 -0
- package/dist/discord-stream.js.map +1 -0
- package/dist/discord.d.ts +9 -2
- package/dist/discord.d.ts.map +1 -1
- package/dist/discord.js +452 -23
- package/dist/discord.js.map +1 -1
- package/dist/gateway-client.d.ts.map +1 -1
- package/dist/gateway-client.js +5 -0
- package/dist/gateway-client.js.map +1 -1
- package/dist/gateway-service.d.ts +1 -0
- package/dist/gateway-service.d.ts.map +1 -1
- package/dist/gateway-service.js +60 -2
- package/dist/gateway-service.js.map +1 -1
- package/dist/gateway-types.d.ts +7 -1
- package/dist/gateway-types.d.ts.map +1 -1
- package/dist/gateway-types.js.map +1 -1
- package/dist/gateway.js +55 -4
- package/dist/gateway.js.map +1 -1
- package/dist/health.d.ts.map +1 -1
- package/dist/health.js +7 -0
- package/dist/health.js.map +1 -1
- package/dist/heartbeat.d.ts.map +1 -1
- package/dist/heartbeat.js +20 -0
- package/dist/heartbeat.js.map +1 -1
- package/dist/observability-ingest.d.ts.map +1 -1
- package/dist/observability-ingest.js +26 -0
- package/dist/observability-ingest.js.map +1 -1
- package/dist/prompt-hooks.d.ts +2 -0
- package/dist/prompt-hooks.d.ts.map +1 -1
- package/dist/prompt-hooks.js +29 -0
- package/dist/prompt-hooks.js.map +1 -1
- package/dist/runtime-config.d.ts +3 -0
- package/dist/runtime-config.d.ts.map +1 -1
- package/dist/runtime-config.js +17 -1
- package/dist/runtime-config.js.map +1 -1
- package/dist/scheduled-task-runner.d.ts.map +1 -1
- package/dist/scheduled-task-runner.js +20 -0
- package/dist/scheduled-task-runner.js.map +1 -1
- package/dist/session-maintenance.d.ts.map +1 -1
- package/dist/session-maintenance.js +1 -0
- package/dist/session-maintenance.js.map +1 -1
- package/dist/skills-guard.d.ts +36 -0
- package/dist/skills-guard.d.ts.map +1 -0
- package/dist/skills-guard.js +607 -0
- package/dist/skills-guard.js.map +1 -0
- package/dist/skills.d.ts +13 -2
- package/dist/skills.d.ts.map +1 -1
- package/dist/skills.js +494 -59
- package/dist/skills.js.map +1 -1
- package/dist/token-efficiency.d.ts +41 -0
- package/dist/token-efficiency.d.ts.map +1 -0
- package/dist/token-efficiency.js +164 -0
- package/dist/token-efficiency.js.map +1 -0
- package/dist/types.d.ts +11 -0
- package/dist/types.d.ts.map +1 -1
- package/dist/workspace.d.ts.map +1 -1
- package/dist/workspace.js +2 -1
- package/dist/workspace.js.map +1 -1
- package/docs/index.html +33 -7
- package/package.json +1 -1
- package/src/agent.ts +15 -1
- package/src/chunk.ts +153 -0
- package/src/container-runner.ts +24 -0
- package/src/conversation.ts +28 -4
- package/src/discord-stream.ts +240 -0
- package/src/discord.ts +517 -23
- package/src/gateway-client.ts +7 -0
- package/src/gateway-service.ts +72 -1
- package/src/gateway-types.ts +12 -1
- package/src/gateway.ts +65 -4
- package/src/health.ts +8 -0
- package/src/heartbeat.ts +20 -0
- package/src/observability-ingest.ts +24 -0
- package/src/prompt-hooks.ts +29 -0
- package/src/runtime-config.ts +18 -1
- package/src/scheduled-task-runner.ts +20 -0
- package/src/session-maintenance.ts +1 -0
- package/src/skills-guard.ts +736 -0
- package/src/skills.ts +570 -61
- package/src/token-efficiency.ts +228 -0
- package/src/types.ts +12 -0
- package/src/workspace.ts +2 -2
- package/.hybridclaw/container-image-state.json +0 -5
package/CHANGELOG.md
CHANGED
|
@@ -8,6 +8,65 @@
|
|
|
8
8
|
|
|
9
9
|
### Fixed
|
|
10
10
|
|
|
11
|
+
## [0.1.24](https://github.com/HybridAIOne/hybridclaw/tree/v0.1.24)
|
|
12
|
+
|
|
13
|
+
### Added
|
|
14
|
+
|
|
15
|
+
- **Discord edit-in-place streaming pipeline**: Added end-to-end assistant text delta streaming from container runtime to Discord delivery, including NDJSON `text` events and incremental Discord message edits.
|
|
16
|
+
- **Discord stream/chunk primitives**: Added `src/discord-stream.ts` (stream lifecycle manager with throttled edits and rollover) and `src/chunk.ts` (boundary-aware chunking with code-fence preservation and line limits).
|
|
17
|
+
- **Discord conversational event handling**: Added message debounce batching, in-flight run tracking, message edit/delete interruption handling, and thumbs-down reaction feedback capture for subsequent context.
|
|
18
|
+
|
|
19
|
+
### Changed
|
|
20
|
+
|
|
21
|
+
- **Discord reply delivery semantics**: Replaced fixed 2000-char truncation with complete multi-message delivery and chunk-safe send/edit behavior.
|
|
22
|
+
- **Discord responsiveness model**: Message handling now keeps typing indicators alive during long turns, updates presence while processing, and acknowledges queued work with processing reactions.
|
|
23
|
+
- **Discord context assembly**: Conversation turns now prepend reply-chain/thread context and include parsed attachment context (inline text/code where readable, metadata fallback for unsupported types).
|
|
24
|
+
|
|
25
|
+
### Fixed
|
|
26
|
+
|
|
27
|
+
- **Long response truncation**: Removed `.slice(0, 2000)` response truncation paths that dropped tail content and broke code blocks.
|
|
28
|
+
- **Perceived Discord stalls**: Fixed single-shot typing behavior by introducing a periodic typing loop for long-running turns.
|
|
29
|
+
- **Mid-turn user correction handling**: Edited/deleted source messages now cancel in-flight processing and clean up partial streamed output to prevent orphaned replies.
|
|
30
|
+
- **Screenshot reply verbosity in Discord**: Image-attachment responses now suppress workspace-path narration and default to concise delivery text (`Here it is.`/`Here they are.`).
|
|
31
|
+
|
|
32
|
+
## [0.1.23](https://github.com/HybridAIOne/hybridclaw/tree/v0.1.23)
|
|
33
|
+
|
|
34
|
+
### Added
|
|
35
|
+
|
|
36
|
+
- **Token usage observability fields**: `model.usage` audit events now include prompt/completion/total token counts (API-reported when available, deterministic estimates as fallback), model-call counts, and char-level prompt/completion sizing.
|
|
37
|
+
- **Context optimization telemetry**: Added `context.optimization` audit events with history compression statistics (per-message truncation count, dropped chars/messages, and applied history budgets).
|
|
38
|
+
|
|
39
|
+
### Changed
|
|
40
|
+
|
|
41
|
+
- **Runtime-config migration logging clarity**: Startup schema normalization now logs a dedicated `normalized config schema vN` message when version is unchanged, instead of reporting a misleading `migrated ... from vN to vN`.
|
|
42
|
+
- **History prompt assembly**: Conversation history now applies per-message truncation plus head/tail-aware budget compression to reduce token load while preserving recent context.
|
|
43
|
+
- **Bootstrap file truncation strategy**: Oversized workspace context files now use head/tail truncation (70/20 split) instead of head-only clipping.
|
|
44
|
+
- **Prompt mode tiers**: Prompt hooks now support `full`/`minimal`/`none` modes; pre-compaction memory flush uses `minimal` mode to reduce static prompt overhead.
|
|
45
|
+
|
|
46
|
+
### Fixed
|
|
47
|
+
|
|
48
|
+
- **Local runtime-state git noise**: Added `.hybridclaw/` to `.gitignore` so container image fingerprint state files are no longer reported as untracked changes.
|
|
49
|
+
|
|
50
|
+
## [0.1.22](https://github.com/HybridAIOne/hybridclaw/tree/v0.1.22)
|
|
51
|
+
|
|
52
|
+
### Added
|
|
53
|
+
|
|
54
|
+
- **Skills trust scanner**: Added `src/skills-guard.ts` with Hermes-derived regex threat detection (exfiltration, prompt injection, destructive ops, persistence, reverse shells, obfuscation, supply chain, credential exposure), structural checks (file count/size limits, binary blocking, symlink escape checks), and invisible-unicode detection.
|
|
55
|
+
- **Skill scan cache**: Added mtime-signature + content-hash scanner caching to skip re-scan on unchanged skills.
|
|
56
|
+
- **Extended SKILL frontmatter**: Added support for `always`, `requires.bins`, `requires.env`, and `metadata.hybridclaw.{tags,related_skills}` while preserving backward compatibility for existing fields.
|
|
57
|
+
|
|
58
|
+
### Changed
|
|
59
|
+
|
|
60
|
+
- **Skill discovery tiers**: Expanded skill discovery precedence to `extra < bundled < codex < claude < agents-personal < agents-project < workspace`, including `config.skills.extraDirs[]` and `.agents/skills` interop paths.
|
|
61
|
+
- **Skill prompt embedding modes**: Implemented Always/Summary/Hidden behavior via frontmatter flags (`always`, `disable-model-invocation`) with `maxAlwaysChars=10000`, `maxSkillsPromptChars=30000`, and `maxSkillsInPrompt=150`.
|
|
62
|
+
- **Skill eligibility gating**: Skills with unmet `requires` are now silently excluded from both prompt availability and slash-command resolution.
|
|
63
|
+
- **Skill slash commands**: Added command-name sanitization (32-char max), reserved built-in command blocking, and deterministic collision deduplication (`-2`, `-3`, ...), while keeping `/skill name`, `/skill:name`, and `/<name>` invocation compatibility.
|
|
64
|
+
- **Web tool routing guidance**: Tool descriptions and runtime prompt guidance now include explicit `web_fetch` vs browser decision rules, concrete SPA/auth/app categories, and quantified cost asymmetry.
|
|
65
|
+
- **web_fetch escalation signaling**: `web_fetch` now emits structured escalation hints (`javascript_required`, `spa_shell_only`, `empty_extraction`, `boilerplate_only`, `bot_blocked`) and surfaces them in tool output for browser fallback routing.
|
|
66
|
+
- **Browser extraction steering**: `browser_navigate` responses now include text preview metadata and explicit next-step hints (`browser_snapshot` with `mode="full"`), and docs/prompts now clarify that `browser_pdf` is export-only (not text extraction).
|
|
67
|
+
|
|
68
|
+
### Fixed
|
|
69
|
+
|
|
11
70
|
## [0.1.21](https://github.com/HybridAIOne/hybridclaw/tree/v0.1.21)
|
|
12
71
|
|
|
13
72
|
### Added
|
package/README.md
CHANGED
|
@@ -11,6 +11,8 @@ npm install -g @hybridaione/hybridclaw
|
|
|
11
11
|
hybridclaw onboarding
|
|
12
12
|
```
|
|
13
13
|
|
|
14
|
+
Latest release: [v0.1.24](https://github.com/HybridAIOne/hybridclaw/releases/tag/v0.1.24)
|
|
15
|
+
|
|
14
16
|
## HybridAI Advantage
|
|
15
17
|
|
|
16
18
|
- Security-focused foundation
|
|
@@ -75,9 +77,12 @@ HybridClaw best-in-class capabilities:
|
|
|
75
77
|
- explicit trust-model acceptance during onboarding (recorded in `config.json`)
|
|
76
78
|
- typed `config.json` runtime settings with defaults, validation, and hot reload
|
|
77
79
|
- formal prompt hook orchestration (`bootstrap`, `memory`, `safety`)
|
|
80
|
+
- Discord conversational UX: edit-in-place streaming responses, fence-safe chunking beyond Discord's 2000-char limit, typing keepalive, debounce batching, reply-chain-aware context, and concise attachment-first screenshot replies
|
|
81
|
+
- token-efficient context assembly: per-message history truncation, hard history budgets with head/tail preservation, and head/tail truncation for oversized bootstrap files
|
|
78
82
|
- proactive runtime layer with active-hours gating, push delegation (`single`/`parallel`/`chain`), depth-aware tool policy, and retry controls
|
|
79
83
|
- structured audit trail: append-only hash-chained wire logs (`data/audit/<session>/wire.jsonl`) with tamper-evident immutability, normalized SQLite audit tables, and verification/search CLI commands
|
|
80
84
|
- observability export: incremental `events:batch` forwarding with durable cursor tracking and bot-scoped ingest token lifecycle via `ingest-token:ensure`
|
|
85
|
+
- model token telemetry in audit/observability events (`model.usage`) with API usage + deterministic fallback estimates
|
|
81
86
|
- gateway lifecycle controls: managed + unmanaged restart/stop flows with graceful shutdown fallback paths
|
|
82
87
|
- instruction-integrity approval flow: core instruction docs (`AGENTS.md`, `SECURITY.md`, `TRUST_MODEL.md`) are hash-verified against a local approved baseline before TUI start
|
|
83
88
|
|
|
@@ -87,6 +92,7 @@ HybridClaw uses typed runtime config in `config.json` (auto-created on first run
|
|
|
87
92
|
|
|
88
93
|
- Start from `config.example.json` (reference)
|
|
89
94
|
- Runtime watches `config.json` and hot-reloads most settings (model defaults, heartbeat, prompt hooks, limits, etc.)
|
|
95
|
+
- `skills.extraDirs` adds additional enterprise/shared skill roots (lowest precedence tier)
|
|
90
96
|
- `proactive.*` controls autonomous behavior (`activeHours`, `delegation`, `autoRetry`)
|
|
91
97
|
- `observability.*` controls push ingest into HybridAI (`events:batch` endpoint, batching, identity metadata)
|
|
92
98
|
- Some settings require restart to fully apply (for example HTTP bind host/port)
|
|
@@ -138,6 +144,7 @@ HybridClaw can forward structured audit records to HybridAI's ingest API:
|
|
|
138
144
|
- transport: bearer ingest token auto-fetched via `POST /api/v1/agent-observability/ingest-token:ensure` using `HYBRIDAI_API_KEY`
|
|
139
145
|
- delivery: incremental batches with persisted cursor (`observability_offsets` table), max 1000 events and max 2,000,000-byte payload per request
|
|
140
146
|
- token handling: token cache is stored locally in SQLite (`observability_ingest_tokens`) and automatically refreshed on ingest auth failures
|
|
147
|
+
- token visibility: `model.usage` payloads include `promptTokens`, `completionTokens`, `totalTokens`, plus estimated and API-native counters for accuracy/coverage
|
|
141
148
|
|
|
142
149
|
Config keys (in `config.json`):
|
|
143
150
|
|
|
@@ -178,13 +185,25 @@ HybridClaw supports `SKILL.md`-based skills (`<skill-name>/SKILL.md`).
|
|
|
178
185
|
|
|
179
186
|
You can place skills in:
|
|
180
187
|
|
|
181
|
-
-
|
|
182
|
-
- `<
|
|
183
|
-
- `$CODEX_HOME/skills/<skill-name>/SKILL.md
|
|
188
|
+
- any directory listed in `config.skills.extraDirs[]` (enterprise/shared)
|
|
189
|
+
- bundled package skills (`<hybridclaw install>/skills/<skill-name>/SKILL.md`)
|
|
190
|
+
- `$CODEX_HOME/skills/<skill-name>/SKILL.md` or `~/.codex/skills/<skill-name>/SKILL.md`
|
|
191
|
+
- `~/.claude/skills/<skill-name>/SKILL.md`
|
|
192
|
+
- `~/.agents/skills/<skill-name>/SKILL.md`
|
|
193
|
+
- `./.agents/skills/<skill-name>/SKILL.md` (project)
|
|
194
|
+
- `./skills/<skill-name>/SKILL.md` (workspace)
|
|
184
195
|
|
|
185
196
|
Load precedence is:
|
|
186
197
|
|
|
187
|
-
-
|
|
198
|
+
- `extra < bundled < codex < claude < agents-personal < agents-project < workspace`
|
|
199
|
+
- skills are merged by `name`; higher-precedence sources override lower-precedence ones
|
|
200
|
+
|
|
201
|
+
Security scanning is trust-aware:
|
|
202
|
+
|
|
203
|
+
- `bundled` sources are treated as `builtin` and not scanned
|
|
204
|
+
- `workspace` sources (`./skills/`, `./.agents/skills/`) are scanned; `caution` is allowed, `dangerous` is blocked
|
|
205
|
+
- `personal` sources (`~/.codex/skills/`, `~/.claude/skills/`, `~/.agents/skills/`) are scanned and blocked on `caution`/`dangerous`
|
|
206
|
+
- scanner includes Hermes-derived regex checks, structural limits (50 files, 1MB total, 256KB/file, binary/symlink checks), invisible-unicode detection, and mtime+content-hash cache reuse
|
|
188
207
|
|
|
189
208
|
### Required format
|
|
190
209
|
|
|
@@ -196,6 +215,14 @@ name: repo-orientation
|
|
|
196
215
|
description: Quickly map an unfamiliar repository and identify where a requested feature should be implemented.
|
|
197
216
|
user-invocable: true
|
|
198
217
|
disable-model-invocation: false
|
|
218
|
+
always: false
|
|
219
|
+
requires:
|
|
220
|
+
bins: [docker, git]
|
|
221
|
+
env: [GITHUB_TOKEN]
|
|
222
|
+
metadata:
|
|
223
|
+
hybridclaw:
|
|
224
|
+
tags: [devops, docker]
|
|
225
|
+
related_skills: [kubernetes]
|
|
199
226
|
---
|
|
200
227
|
|
|
201
228
|
# Repo Orientation
|
|
@@ -208,16 +235,25 @@ Supported frontmatter keys:
|
|
|
208
235
|
- `description` (required)
|
|
209
236
|
- `user-invocable` (optional, default `true`)
|
|
210
237
|
- `disable-model-invocation` (optional, default `false`)
|
|
238
|
+
- `always` (optional, default `false`; embeds full skill body in the system prompt up to `maxAlwaysChars=10000`, then demotes to summary)
|
|
239
|
+
- `requires.bins` / `requires.env` (optional; skill is excluded unless requirements are met)
|
|
240
|
+
- `metadata.hybridclaw.tags` / `metadata.hybridclaw.related_skills` (optional metadata namespace)
|
|
211
241
|
|
|
212
242
|
### Using skills
|
|
213
243
|
|
|
214
|
-
Skills are listed to the model as metadata (`name`, `description`, `location`), and the model reads `SKILL.md` on demand with the `read` tool.
|
|
244
|
+
Skills are listed to the model as metadata (`name`, `description`, `location`), and the model reads `SKILL.md` on demand with the `read` tool. Skills with `always: true` are embedded directly in the system prompt.
|
|
245
|
+
|
|
246
|
+
Prompt embedding modes:
|
|
247
|
+
|
|
248
|
+
- `Always`: `always: true` embeds full body in `<skill_always ...>` (budgeted by `maxAlwaysChars=10000`)
|
|
249
|
+
- `Summary`: default mode, emits only XML metadata under `<available_skills>`
|
|
250
|
+
- `Hidden`: `disable-model-invocation: true` excludes the skill from model prompt metadata (still invocable by slash command when `user-invocable: true`)
|
|
215
251
|
|
|
216
252
|
Explicit invocation is supported via:
|
|
217
253
|
|
|
218
254
|
- `/skill <name> [input]`
|
|
219
255
|
- `/skill:<name> [input]`
|
|
220
|
-
- `/<name> [input]` (when `user-invocable: true`)
|
|
256
|
+
- `/<name> [input]` (when `user-invocable: true`; command names are sanitized to lowercase `a-z0-9-`, max 32 chars, with `-2`/`-3` dedup and built-in command-name blocking)
|
|
221
257
|
|
|
222
258
|
Example skill in this repo:
|
|
223
259
|
|
|
@@ -233,8 +269,8 @@ The agent has access to these sandboxed tools inside the container:
|
|
|
233
269
|
- `memory` — durable memory files (`MEMORY.md`, `USER.md`, `memory/YYYY-MM-DD.md`)
|
|
234
270
|
- `session_search` — search/summarize historical sessions from transcript archives
|
|
235
271
|
- `delegate` — push-based background subagent tasks (`single`, `parallel`, `chain`) with auto-announced completion (no polling)
|
|
236
|
-
- `web_fetch` — fetch
|
|
237
|
-
- `browser_*` (optional) —
|
|
272
|
+
- `web_fetch` — plain HTTP fetch + extraction for static/read-only content (docs, articles, READMEs, JSON/text APIs, direct files)
|
|
273
|
+
- `browser_*` (optional) — full browser automation for JS-rendered or interactive pages (`navigate`, `snapshot`, `click`, `type`, `press`, `scroll`, `back`, `screenshot`, `pdf`, `close`)
|
|
238
274
|
|
|
239
275
|
`delegate` mode examples:
|
|
240
276
|
|
|
@@ -244,6 +280,12 @@ The agent has access to these sandboxed tools inside the container:
|
|
|
244
280
|
|
|
245
281
|
Browser tooling notes:
|
|
246
282
|
|
|
283
|
+
- Routing default: prefer `web_fetch` first for read-only retrieval.
|
|
284
|
+
- Use browser tools for SPAs/web apps/auth flows/interaction tasks, or when `web_fetch` returns escalation hints (`javascript_required`, `spa_shell_only`, `empty_extraction`, `boilerplate_only`, `bot_blocked`).
|
|
285
|
+
- Cost profile: browser calls are typically ~10-100x slower/more expensive than `web_fetch`.
|
|
286
|
+
- Browser read flow: after `browser_navigate`, use `browser_snapshot` with `mode="full"` to extract content, then `browser_scroll` + `browser_snapshot` for additional lazy-loaded sections.
|
|
287
|
+
- `browser_pdf` is for export artifacts, not text extraction.
|
|
288
|
+
|
|
247
289
|
- The shipped container image preinstalls `agent-browser` and Chromium (Playwright).
|
|
248
290
|
- You can override the binary via `AGENT_BROWSER_BIN` if needed.
|
|
249
291
|
- User-directed authenticated browser-flow testing is supported (including filling/submitting login forms on the requested site).
|
package/config.example.json
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "hybridclaw-agent",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.24",
|
|
4
4
|
"lockfileVersion": 3,
|
|
5
5
|
"requires": true,
|
|
6
6
|
"packages": {
|
|
7
7
|
"": {
|
|
8
8
|
"name": "hybridclaw-agent",
|
|
9
|
-
"version": "0.1.
|
|
9
|
+
"version": "0.1.24",
|
|
10
10
|
"dependencies": {
|
|
11
11
|
"@mozilla/readability": "^0.6.0",
|
|
12
12
|
"agent-browser": "^0.15.1",
|
package/container/package.json
CHANGED
|
@@ -56,6 +56,24 @@ const EXTRACT_IFRAMES_SCRIPT = `(() => {
|
|
|
56
56
|
}));
|
|
57
57
|
})()`;
|
|
58
58
|
|
|
59
|
+
const EXTRACT_TEXT_PREVIEW_SCRIPT = `(() => {
|
|
60
|
+
const bodyText = document.body ? String(document.body.innerText || '') : '';
|
|
61
|
+
const normalized = bodyText
|
|
62
|
+
.replace(/\\r/g, '')
|
|
63
|
+
.replace(/[ \\t]+\\n/g, '\\n')
|
|
64
|
+
.replace(/\\n{3,}/g, '\\n\\n')
|
|
65
|
+
.trim();
|
|
66
|
+
const previewLimit = 6000;
|
|
67
|
+
return {
|
|
68
|
+
text_length: normalized.length,
|
|
69
|
+
preview: normalized.slice(0, previewLimit),
|
|
70
|
+
preview_truncated: normalized.length > previewLimit,
|
|
71
|
+
has_noscript: Boolean(document.querySelector('noscript')),
|
|
72
|
+
root_shell: Boolean(document.querySelector('div#root:empty, div#app:empty, div#__next:empty')),
|
|
73
|
+
ready_state: String(document.readyState || ''),
|
|
74
|
+
};
|
|
75
|
+
})()`;
|
|
76
|
+
|
|
59
77
|
const NETWORK_TIMINGS_SCRIPT = `(() => {
|
|
60
78
|
const entries = performance.getEntriesByType('resource');
|
|
61
79
|
return entries
|
|
@@ -563,6 +581,19 @@ function buildBotDetectionWarning(titleValue: unknown): Record<string, unknown>
|
|
|
563
581
|
};
|
|
564
582
|
}
|
|
565
583
|
|
|
584
|
+
function buildReadExtractionHint(params: {
|
|
585
|
+
contentLength: number;
|
|
586
|
+
hasNoscript: boolean;
|
|
587
|
+
rootShell: boolean;
|
|
588
|
+
}): string {
|
|
589
|
+
const base =
|
|
590
|
+
'For content extraction, call browser_snapshot with {"mode":"full"} next. For long or lazy-loaded pages, run browser_scroll then browser_snapshot again.';
|
|
591
|
+
if (params.hasNoscript || params.rootShell || params.contentLength < 200) {
|
|
592
|
+
return `${base} This page currently looks dynamic/app-shell-like; do not conclude "inaccessible" before snapshot attempts.`;
|
|
593
|
+
}
|
|
594
|
+
return `${base} Avoid browser_pdf for text extraction; PDF export is for artifact output.`;
|
|
595
|
+
}
|
|
596
|
+
|
|
566
597
|
function extractVisionTextContent(content: unknown): string {
|
|
567
598
|
if (typeof content === 'string') return content.trim();
|
|
568
599
|
if (!Array.isArray(content)) return '';
|
|
@@ -754,12 +785,31 @@ export async function executeBrowserTool(name: string, args: Record<string, unkn
|
|
|
754
785
|
const data = (result.data || {}) as Record<string, unknown>;
|
|
755
786
|
const title = String(data.title || '');
|
|
756
787
|
const botWarning = buildBotDetectionWarning(title);
|
|
788
|
+
const textEval = await runBrowserEval(effectiveSessionId, EXTRACT_TEXT_PREVIEW_SCRIPT, 20_000);
|
|
789
|
+
const textData = textEval.success ? asRecord(textEval.result) : null;
|
|
790
|
+
const contentPreview = typeof textData?.preview === 'string' ? textData.preview : '';
|
|
791
|
+
const contentLength =
|
|
792
|
+
typeof textData?.text_length === 'number' && Number.isFinite(textData.text_length)
|
|
793
|
+
? Math.max(0, Math.floor(textData.text_length))
|
|
794
|
+
: 0;
|
|
795
|
+
const contentPreviewTruncated = textData?.preview_truncated === true;
|
|
796
|
+
const hasNoscript = textData?.has_noscript === true;
|
|
797
|
+
const rootShell = textData?.root_shell === true;
|
|
798
|
+
const readyState = typeof textData?.ready_state === 'string' ? textData.ready_state : '';
|
|
799
|
+
const extractionHint = buildReadExtractionHint({ contentLength, hasNoscript, rootShell });
|
|
757
800
|
// Best-effort priming so browser_network has request listeners active quickly.
|
|
758
801
|
await runAgentBrowser(effectiveSessionId, 'network', ['requests']).catch(() => undefined);
|
|
759
802
|
return success({
|
|
760
803
|
url: data.url || parsed.toString(),
|
|
761
804
|
title,
|
|
762
805
|
session_id: effectiveSessionId,
|
|
806
|
+
content_text_length: contentLength,
|
|
807
|
+
...(contentPreview ? { content_preview: contentPreview } : {}),
|
|
808
|
+
...(contentPreview ? { content_preview_truncated: contentPreviewTruncated } : {}),
|
|
809
|
+
...(readyState ? { ready_state: readyState } : {}),
|
|
810
|
+
...(hasNoscript ? { has_noscript: true } : {}),
|
|
811
|
+
...(rootShell ? { root_shell: true } : {}),
|
|
812
|
+
read_extraction_hint: extractionHint,
|
|
763
813
|
...(botWarning ? { bot_detection_warning: botWarning } : {}),
|
|
764
814
|
});
|
|
765
815
|
}
|
|
@@ -1000,7 +1050,7 @@ export const BROWSER_TOOL_DEFINITIONS: ToolDefinition[] = [
|
|
|
1000
1050
|
function: {
|
|
1001
1051
|
name: 'browser_navigate',
|
|
1002
1052
|
description:
|
|
1003
|
-
'Navigate to
|
|
1053
|
+
'Navigate to a URL in a full browser session with JavaScript execution and dynamic rendering. Use for SPAs (React/Vue/Angular/Svelte), auth/login flows, dashboards/web apps (Notion, Google Docs, Airtable, Jira, etc.), interaction tasks (click/type/submit/scroll), bot/captcha/consent flows, or when web_fetch returns escalation hints (javascript_required, spa_shell_only, empty_extraction, boilerplate_only, bot_blocked). Prefer web_fetch instead for static docs/articles/wikis, direct API JSON/XML/text endpoints, and simple read-only retrieval. Important: browser_navigate opens the page but does not replace content extraction; for read/summarize tasks call browser_snapshot with mode="full" next. Browser usage is typically ~10-100x slower/more expensive than web_fetch. Private/loopback hosts are blocked by default (SSRF guard).',
|
|
1004
1054
|
parameters: {
|
|
1005
1055
|
type: 'object',
|
|
1006
1056
|
properties: {
|
|
@@ -1015,7 +1065,7 @@ export const BROWSER_TOOL_DEFINITIONS: ToolDefinition[] = [
|
|
|
1015
1065
|
function: {
|
|
1016
1066
|
name: 'browser_snapshot',
|
|
1017
1067
|
description:
|
|
1018
|
-
'Return an accessibility-tree snapshot of the current page with element refs usable by browser_click/browser_type.',
|
|
1068
|
+
'Return an accessibility-tree snapshot of the current page with element refs usable by browser_click/browser_type. Use this to actually read page content after browser_navigate; for extraction tasks prefer mode="full" and repeat after browser_scroll on long/lazy-loaded pages.',
|
|
1019
1069
|
parameters: {
|
|
1020
1070
|
type: 'object',
|
|
1021
1071
|
properties: {
|
|
@@ -1134,7 +1184,7 @@ export const BROWSER_TOOL_DEFINITIONS: ToolDefinition[] = [
|
|
|
1134
1184
|
function: {
|
|
1135
1185
|
name: 'browser_pdf',
|
|
1136
1186
|
description:
|
|
1137
|
-
'Save the current page as PDF. Output path is constrained under /workspace/.browser-artifacts for safety.',
|
|
1187
|
+
'Save the current page as PDF artifact. Output path is constrained under /workspace/.browser-artifacts for safety. Use for export/sharing only, not for text extraction or summarization.',
|
|
1138
1188
|
parameters: {
|
|
1139
1189
|
type: 'object',
|
|
1140
1190
|
properties: {
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { ChatCompletionResponse, ChatMessage, ToolDefinition } from './types.js';
|
|
1
|
+
import type { ChatCompletionResponse, ChatMessage, ToolCall, ToolDefinition } from './types.js';
|
|
2
2
|
|
|
3
3
|
export class HybridAIRequestError extends Error {
|
|
4
4
|
status: number;
|
|
@@ -12,18 +12,45 @@ export class HybridAIRequestError extends Error {
|
|
|
12
12
|
}
|
|
13
13
|
}
|
|
14
14
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
15
|
+
interface StreamToolCallDelta {
|
|
16
|
+
index?: number;
|
|
17
|
+
id?: string;
|
|
18
|
+
type?: 'function';
|
|
19
|
+
function?: {
|
|
20
|
+
name?: string;
|
|
21
|
+
arguments?: string;
|
|
22
|
+
};
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
interface StreamChoiceChunk {
|
|
26
|
+
delta?: {
|
|
27
|
+
role?: string;
|
|
28
|
+
content?: string | null;
|
|
29
|
+
tool_calls?: StreamToolCallDelta[];
|
|
30
|
+
};
|
|
31
|
+
message?: {
|
|
32
|
+
role?: string;
|
|
33
|
+
content?: string | null;
|
|
34
|
+
tool_calls?: ToolCall[];
|
|
35
|
+
};
|
|
36
|
+
finish_reason?: string | null;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
interface StreamChunkPayload {
|
|
40
|
+
id?: string;
|
|
41
|
+
model?: string;
|
|
42
|
+
usage?: ChatCompletionResponse['usage'];
|
|
43
|
+
choices?: StreamChoiceChunk[];
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
function buildRequestBody(
|
|
18
47
|
model: string,
|
|
19
48
|
chatbotId: string,
|
|
20
49
|
enableRag: boolean,
|
|
21
50
|
messages: ChatMessage[],
|
|
22
51
|
tools: ToolDefinition[],
|
|
23
|
-
):
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
const body: Record<string, unknown> = {
|
|
52
|
+
): Record<string, unknown> {
|
|
53
|
+
return {
|
|
27
54
|
model,
|
|
28
55
|
chatbot_id: chatbotId,
|
|
29
56
|
messages,
|
|
@@ -31,6 +58,65 @@ export async function callHybridAI(
|
|
|
31
58
|
tool_choice: 'auto',
|
|
32
59
|
enable_rag: enableRag,
|
|
33
60
|
};
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
function parseStreamPayloadLine(rawLine: string): string | null {
|
|
64
|
+
const trimmed = rawLine.trim();
|
|
65
|
+
if (!trimmed) return null;
|
|
66
|
+
if (trimmed.startsWith(':')) return null;
|
|
67
|
+
if (trimmed.startsWith('event:')) return null;
|
|
68
|
+
if (trimmed.startsWith('id:')) return null;
|
|
69
|
+
if (trimmed.startsWith('data:')) {
|
|
70
|
+
return trimmed.slice(5).trim();
|
|
71
|
+
}
|
|
72
|
+
return trimmed;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
function ensureToolCall(toolCalls: ToolCall[], index: number): ToolCall {
|
|
76
|
+
while (toolCalls.length <= index) {
|
|
77
|
+
toolCalls.push({
|
|
78
|
+
id: '',
|
|
79
|
+
type: 'function',
|
|
80
|
+
function: {
|
|
81
|
+
name: '',
|
|
82
|
+
arguments: '',
|
|
83
|
+
},
|
|
84
|
+
});
|
|
85
|
+
}
|
|
86
|
+
return toolCalls[index];
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
function mergeToolCallDelta(target: ToolCall, delta: StreamToolCallDelta): void {
|
|
90
|
+
if (typeof delta.id === 'string' && delta.id) {
|
|
91
|
+
target.id = target.id ? `${target.id}${delta.id}` : delta.id;
|
|
92
|
+
}
|
|
93
|
+
if (typeof delta.type === 'string') {
|
|
94
|
+
target.type = delta.type;
|
|
95
|
+
}
|
|
96
|
+
if (delta.function) {
|
|
97
|
+
if (typeof delta.function.name === 'string' && delta.function.name) {
|
|
98
|
+
target.function.name = target.function.name
|
|
99
|
+
? `${target.function.name}${delta.function.name}`
|
|
100
|
+
: delta.function.name;
|
|
101
|
+
}
|
|
102
|
+
if (typeof delta.function.arguments === 'string' && delta.function.arguments) {
|
|
103
|
+
target.function.arguments += delta.function.arguments;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
export async function callHybridAI(
|
|
109
|
+
baseUrl: string,
|
|
110
|
+
apiKey: string,
|
|
111
|
+
model: string,
|
|
112
|
+
chatbotId: string,
|
|
113
|
+
enableRag: boolean,
|
|
114
|
+
messages: ChatMessage[],
|
|
115
|
+
tools: ToolDefinition[],
|
|
116
|
+
): Promise<ChatCompletionResponse> {
|
|
117
|
+
const url = `${baseUrl}/v1/chat/completions`;
|
|
118
|
+
|
|
119
|
+
const body = buildRequestBody(model, chatbotId, enableRag, messages, tools);
|
|
34
120
|
|
|
35
121
|
const response = await fetch(url, {
|
|
36
122
|
method: 'POST',
|
|
@@ -48,3 +134,179 @@ export async function callHybridAI(
|
|
|
48
134
|
|
|
49
135
|
return (await response.json()) as ChatCompletionResponse;
|
|
50
136
|
}
|
|
137
|
+
|
|
138
|
+
export async function callHybridAIStream(
|
|
139
|
+
baseUrl: string,
|
|
140
|
+
apiKey: string,
|
|
141
|
+
model: string,
|
|
142
|
+
chatbotId: string,
|
|
143
|
+
enableRag: boolean,
|
|
144
|
+
messages: ChatMessage[],
|
|
145
|
+
tools: ToolDefinition[],
|
|
146
|
+
onTextDelta: (delta: string) => void,
|
|
147
|
+
): Promise<ChatCompletionResponse> {
|
|
148
|
+
const url = `${baseUrl}/v1/chat/completions`;
|
|
149
|
+
const body = {
|
|
150
|
+
...buildRequestBody(model, chatbotId, enableRag, messages, tools),
|
|
151
|
+
stream: true,
|
|
152
|
+
};
|
|
153
|
+
|
|
154
|
+
const response = await fetch(url, {
|
|
155
|
+
method: 'POST',
|
|
156
|
+
headers: {
|
|
157
|
+
'Content-Type': 'application/json',
|
|
158
|
+
Accept: 'text/event-stream, application/x-ndjson, application/json',
|
|
159
|
+
Authorization: `Bearer ${apiKey}`,
|
|
160
|
+
},
|
|
161
|
+
body: JSON.stringify(body),
|
|
162
|
+
});
|
|
163
|
+
|
|
164
|
+
if (!response.ok) {
|
|
165
|
+
const text = await response.text();
|
|
166
|
+
throw new HybridAIRequestError(response.status, text);
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
const contentType = (response.headers.get('content-type') || '').toLowerCase();
|
|
170
|
+
if (
|
|
171
|
+
contentType.includes('application/json')
|
|
172
|
+
&& !contentType.includes('ndjson')
|
|
173
|
+
&& !contentType.includes('event-stream')
|
|
174
|
+
) {
|
|
175
|
+
return (await response.json()) as ChatCompletionResponse;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
if (!response.body) {
|
|
179
|
+
return (await response.json()) as ChatCompletionResponse;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
const reader = response.body.getReader();
|
|
183
|
+
const decoder = new TextDecoder();
|
|
184
|
+
|
|
185
|
+
let buffer = '';
|
|
186
|
+
let streamId = '';
|
|
187
|
+
let streamModel = model;
|
|
188
|
+
let finishReason: string | null = null;
|
|
189
|
+
let usage: ChatCompletionResponse['usage'] | undefined;
|
|
190
|
+
let role: string = 'assistant';
|
|
191
|
+
let textContent = '';
|
|
192
|
+
const toolCalls: ToolCall[] = [];
|
|
193
|
+
let sawPayload = false;
|
|
194
|
+
let streamDone = false;
|
|
195
|
+
|
|
196
|
+
const consumePayload = (payloadText: string): void => {
|
|
197
|
+
if (!payloadText || payloadText === '[DONE]') {
|
|
198
|
+
if (payloadText === '[DONE]') streamDone = true;
|
|
199
|
+
return;
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
let payload: StreamChunkPayload;
|
|
203
|
+
try {
|
|
204
|
+
payload = JSON.parse(payloadText) as StreamChunkPayload;
|
|
205
|
+
} catch {
|
|
206
|
+
return;
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
sawPayload = true;
|
|
210
|
+
if (typeof payload.id === 'string' && payload.id) streamId = payload.id;
|
|
211
|
+
if (typeof payload.model === 'string' && payload.model) streamModel = payload.model;
|
|
212
|
+
if (payload.usage && typeof payload.usage === 'object') usage = payload.usage;
|
|
213
|
+
|
|
214
|
+
const choice = Array.isArray(payload.choices) ? payload.choices[0] : undefined;
|
|
215
|
+
if (!choice) return;
|
|
216
|
+
|
|
217
|
+
if (choice.message) {
|
|
218
|
+
const message = choice.message;
|
|
219
|
+
if (typeof message.role === 'string' && message.role) role = message.role;
|
|
220
|
+
if (typeof message.content === 'string') {
|
|
221
|
+
const nextContent = message.content;
|
|
222
|
+
const delta = nextContent.startsWith(textContent)
|
|
223
|
+
? nextContent.slice(textContent.length)
|
|
224
|
+
: nextContent;
|
|
225
|
+
textContent = nextContent;
|
|
226
|
+
if (delta) onTextDelta(delta);
|
|
227
|
+
}
|
|
228
|
+
if (Array.isArray(message.tool_calls) && message.tool_calls.length > 0) {
|
|
229
|
+
toolCalls.length = 0;
|
|
230
|
+
for (const call of message.tool_calls) {
|
|
231
|
+
toolCalls.push({
|
|
232
|
+
id: call.id || '',
|
|
233
|
+
type: call.type || 'function',
|
|
234
|
+
function: {
|
|
235
|
+
name: call.function?.name || '',
|
|
236
|
+
arguments: call.function?.arguments || '',
|
|
237
|
+
},
|
|
238
|
+
});
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
if (choice.delta) {
|
|
244
|
+
const delta = choice.delta;
|
|
245
|
+
if (typeof delta.role === 'string' && delta.role) role = delta.role;
|
|
246
|
+
if (typeof delta.content === 'string' && delta.content) {
|
|
247
|
+
textContent += delta.content;
|
|
248
|
+
onTextDelta(delta.content);
|
|
249
|
+
}
|
|
250
|
+
if (Array.isArray(delta.tool_calls) && delta.tool_calls.length > 0) {
|
|
251
|
+
for (const callDelta of delta.tool_calls) {
|
|
252
|
+
const index = typeof callDelta.index === 'number' && callDelta.index >= 0 ? callDelta.index : 0;
|
|
253
|
+
const target = ensureToolCall(toolCalls, index);
|
|
254
|
+
mergeToolCallDelta(target, callDelta);
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
if (typeof choice.finish_reason === 'string' && choice.finish_reason) {
|
|
260
|
+
finishReason = choice.finish_reason;
|
|
261
|
+
}
|
|
262
|
+
};
|
|
263
|
+
|
|
264
|
+
try {
|
|
265
|
+
while (!streamDone) {
|
|
266
|
+
const { done, value } = await reader.read();
|
|
267
|
+
if (done) break;
|
|
268
|
+
|
|
269
|
+
buffer += decoder.decode(value, { stream: true });
|
|
270
|
+
const lines = buffer.split('\n');
|
|
271
|
+
buffer = lines.pop() || '';
|
|
272
|
+
|
|
273
|
+
for (const rawLine of lines) {
|
|
274
|
+
const payloadText = parseStreamPayloadLine(rawLine);
|
|
275
|
+
if (!payloadText) continue;
|
|
276
|
+
consumePayload(payloadText);
|
|
277
|
+
if (streamDone) break;
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
if (!streamDone && buffer.trim()) {
|
|
282
|
+
const payloadText = parseStreamPayloadLine(buffer);
|
|
283
|
+
if (payloadText) {
|
|
284
|
+
consumePayload(payloadText);
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
} finally {
|
|
288
|
+
reader.releaseLock();
|
|
289
|
+
decoder.decode();
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
if (!sawPayload) {
|
|
293
|
+
throw new Error('Streaming response ended without payload');
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
const finalFinishReason = finishReason || (toolCalls.length > 0 ? 'tool_calls' : 'stop');
|
|
297
|
+
return {
|
|
298
|
+
id: streamId || 'stream',
|
|
299
|
+
model: streamModel,
|
|
300
|
+
choices: [
|
|
301
|
+
{
|
|
302
|
+
message: {
|
|
303
|
+
role,
|
|
304
|
+
content: textContent || null,
|
|
305
|
+
...(toolCalls.length > 0 ? { tool_calls: toolCalls } : {}),
|
|
306
|
+
},
|
|
307
|
+
finish_reason: finalFinishReason,
|
|
308
|
+
},
|
|
309
|
+
],
|
|
310
|
+
...(usage ? { usage } : {}),
|
|
311
|
+
};
|
|
312
|
+
}
|