@mercuryo-ai/agentbrowse 0.2.52 → 0.2.54

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,10 @@
2
2
 
3
3
  ## Unreleased
4
4
 
5
+ - No unreleased changes yet.
6
+
7
+ ## 0.2.52
8
+
5
9
  - switched the library extraction API to `extract(session, schema, scopeRef?)`, where
6
10
  `schema` is a plain schema object or a Zod schema
7
11
  - made `observe(...)` return a top-level flat `targets` array alongside grouped
package/README.md CHANGED
@@ -111,6 +111,7 @@ try {
111
111
 
112
112
  Runnable examples live in [`examples/`](./examples/README.md):
113
113
 
114
+ - first run `npm run build` when executing them from this repo
114
115
  - `npx tsx examples/basic.ts`
115
116
  - `npx tsx examples/attach.ts`
116
117
  - `npx tsx examples/extract.ts`
@@ -201,29 +202,12 @@ The runtime contract is intentionally small: you provide an object that can
201
202
  create an OpenAI-compatible chat-completions client.
202
203
 
203
204
  ```ts
205
+ // Pseudocode shape only. For a runnable fetch-based adapter, see
206
+ // `examples/extract.ts` and `docs/assistive-runtime.md`.
204
207
  import { createAgentbrowseClient } from '@mercuryo-ai/agentbrowse';
205
208
 
206
209
  const client = createAgentbrowseClient({
207
- assistiveRuntime: {
208
- createLlmClient: () => ({
209
- async createChatCompletion(args) {
210
- const { messages, response_model, image, temperature, maxOutputTokens } = args.options;
211
-
212
- const result = await callStructuredProvider({
213
- messages,
214
- responseModel: response_model,
215
- image,
216
- temperature,
217
- maxOutputTokens,
218
- });
219
-
220
- return {
221
- data: result.data,
222
- usage: result.usage,
223
- };
224
- },
225
- }),
226
- },
210
+ assistiveRuntime: createMyFetchBackedRuntime(),
227
211
  });
228
212
  ```
229
213
 
@@ -47,26 +47,11 @@ your adapter returns the expected response shape.
47
47
  import { createAgentbrowseClient } from '@mercuryo-ai/agentbrowse';
48
48
 
49
49
  const client = createAgentbrowseClient({
50
- assistiveRuntime: {
51
- createLlmClient: () => ({
52
- async createChatCompletion(args) {
53
- const { messages, response_model, image, temperature, maxOutputTokens } = args.options;
54
-
55
- const json = await callStructuredProvider({
56
- messages,
57
- responseModel: response_model,
58
- image,
59
- temperature,
60
- maxOutputTokens,
61
- });
62
-
63
- return {
64
- data: json.data,
65
- usage: json.usage,
66
- };
67
- },
68
- }),
69
- },
50
+ assistiveRuntime: createOpenAiCompatibleAssistiveRuntime({
51
+ baseUrl: 'https://api.openai.com/v1',
52
+ apiKey: process.env.OPENAI_API_KEY!,
53
+ model: 'gpt-4.1-mini',
54
+ }),
70
55
  });
71
56
  ```
72
57
 
@@ -81,27 +66,101 @@ This pattern works well when:
81
66
  You can wrap the adapter once and reuse it:
82
67
 
83
68
  ```ts
69
+ import { toJsonSchema } from '@browserbasehq/stagehand';
70
+ import type {
71
+ AgentbrowseAssistiveChatCompletionOptions,
72
+ AgentbrowseAssistiveLlmUsage,
73
+ } from '@mercuryo-ai/agentbrowse';
74
+
75
+ type StructuredChatResponse = {
76
+ choices?: Array<{
77
+ message?: {
78
+ content?: string;
79
+ };
80
+ }>;
81
+ usage?: AgentbrowseAssistiveLlmUsage;
82
+ };
83
+
84
+ function buildMessages(options: AgentbrowseAssistiveChatCompletionOptions) {
85
+ const messages = [...options.messages];
86
+ if (!options.image) {
87
+ return messages;
88
+ }
89
+
90
+ const content: Array<
91
+ | { type: 'text'; text: string }
92
+ | { type: 'image_url'; image_url: { url: string; detail: 'auto' } }
93
+ > = [];
94
+
95
+ if (options.image.description?.trim()) {
96
+ content.push({ type: 'text', text: options.image.description.trim() });
97
+ }
98
+
99
+ content.push({
100
+ type: 'image_url',
101
+ image_url: {
102
+ url: `data:image/jpeg;base64,${options.image.buffer.toString('base64')}`,
103
+ detail: 'auto',
104
+ },
105
+ });
106
+
107
+ messages.push({
108
+ role: 'user',
109
+ content,
110
+ });
111
+
112
+ return messages;
113
+ }
114
+
84
115
  function createOpenAiCompatibleAssistiveRuntime(input: {
85
116
  baseUrl: string;
86
117
  apiKey: string;
118
+ model: string;
87
119
  }) {
120
+ const baseUrl = input.baseUrl.replace(/\/$/, '');
121
+
88
122
  return {
89
123
  createLlmClient: () => ({
90
- async createChatCompletion(args) {
91
- const { messages, response_model, image, temperature, maxOutputTokens } = args.options;
92
-
93
- const json = await callStructuredProvider({
94
- baseUrl: input.baseUrl,
95
- apiKey: input.apiKey,
96
- messages,
97
- responseModel: response_model,
98
- image,
99
- temperature,
100
- maxOutputTokens,
124
+ async createChatCompletion({ options }) {
125
+ if (!options.response_model) {
126
+ throw new Error('AgentBrowse assistive extract requires response_model.');
127
+ }
128
+
129
+ const response = await fetch(`${baseUrl}/chat/completions`, {
130
+ method: 'POST',
131
+ headers: {
132
+ Authorization: `Bearer ${input.apiKey}`,
133
+ Accept: 'application/json',
134
+ 'Content-Type': 'application/json',
135
+ },
136
+ body: JSON.stringify({
137
+ model: input.model,
138
+ messages: buildMessages(options),
139
+ response_format: {
140
+ type: 'json_schema',
141
+ json_schema: {
142
+ name: options.response_model.name,
143
+ strict: true,
144
+ schema: toJsonSchema(options.response_model.schema),
145
+ },
146
+ },
147
+ temperature: options.temperature,
148
+ max_completion_tokens: options.maxOutputTokens,
149
+ }),
101
150
  });
102
151
 
152
+ if (!response.ok) {
153
+ throw new Error(`assistive_provider_http_${response.status}`);
154
+ }
155
+
156
+ const json = (await response.json()) as StructuredChatResponse;
157
+ const content = json.choices?.[0]?.message?.content;
158
+ if (typeof content !== 'string' || content.trim().length === 0) {
159
+ throw new Error('assistive_provider_missing_content');
160
+ }
161
+
103
162
  return {
104
- data: json.data,
163
+ data: JSON.parse(content),
105
164
  usage: json.usage,
106
165
  };
107
166
  },
@@ -1,6 +1,15 @@
1
1
  # AgentBrowse Examples
2
2
 
3
- Run these examples from `packages/agentbrowse`:
3
+ The published package includes these same files under `examples/`.
4
+
5
+ If you run them from this repo, build once first so the self-referenced package
6
+ entrypoint resolves to `dist/`:
7
+
8
+ ```bash
9
+ npm run build
10
+ ```
11
+
12
+ Then run the examples from `packages/agentbrowse`:
4
13
 
5
14
  ```bash
6
15
  npx tsx examples/basic.ts
@@ -1,4 +1,4 @@
1
- import { attach, observe } from '../src/library.ts';
1
+ import { attach, observe } from '@mercuryo-ai/agentbrowse';
2
2
 
3
3
  const cdpUrl = process.env.AGENTBROWSE_CDP_URL;
4
4
 
package/examples/basic.ts CHANGED
@@ -1,4 +1,4 @@
1
- import { close, launch, observe, screenshot } from '../src/library.ts';
1
+ import { close, launch, observe, screenshot } from '@mercuryo-ai/agentbrowse';
2
2
 
3
3
  const launchResult = await launch('https://example.com', {
4
4
  headless: false,
@@ -1,6 +1,59 @@
1
- import { createAgentbrowseClient } from '../src/library.ts';
1
+ import { toJsonSchema } from '@browserbasehq/stagehand';
2
+ import {
3
+ createAgentbrowseClient,
4
+ type AgentbrowseAssistiveChatCompletionOptions,
5
+ } from '@mercuryo-ai/agentbrowse';
6
+
7
+ type StructuredChatResponse = {
8
+ choices?: Array<{
9
+ message?: {
10
+ content?: string;
11
+ };
12
+ }>;
13
+ usage?: {
14
+ prompt_tokens?: number;
15
+ completion_tokens?: number;
16
+ total_tokens?: number;
17
+ };
18
+ };
19
+
20
+ function buildMessages(options: AgentbrowseAssistiveChatCompletionOptions) {
21
+ const messages = [...options.messages];
22
+ if (!options.image) {
23
+ return messages;
24
+ }
25
+
26
+ const content: Array<
27
+ | { type: 'text'; text: string }
28
+ | { type: 'image_url'; image_url: { url: string; detail: 'auto' } }
29
+ > = [];
30
+
31
+ if (options.image.description?.trim()) {
32
+ content.push({ type: 'text', text: options.image.description.trim() });
33
+ }
34
+
35
+ content.push({
36
+ type: 'image_url',
37
+ image_url: {
38
+ url: `data:image/jpeg;base64,${options.image.buffer.toString('base64')}`,
39
+ detail: 'auto',
40
+ },
41
+ });
42
+
43
+ messages.push({
44
+ role: 'user',
45
+ content,
46
+ });
47
+
48
+ return messages;
49
+ }
2
50
 
3
51
  const openAiApiKey = process.env.OPENAI_API_KEY;
52
+ const openAiBaseUrl = (process.env.OPENAI_BASE_URL ?? 'https://api.openai.com/v1').replace(
53
+ /\/$/,
54
+ ''
55
+ );
56
+ const openAiModel = process.env.OPENAI_MODEL ?? 'gpt-4.1-mini';
4
57
 
5
58
  if (!openAiApiKey) {
6
59
  throw new Error('Set OPENAI_API_KEY before running this example.');
@@ -9,21 +62,47 @@ if (!openAiApiKey) {
9
62
  const client = createAgentbrowseClient({
10
63
  assistiveRuntime: {
11
64
  createLlmClient: () => ({
12
- async createChatCompletion(args) {
13
- const response = await fetch('https://api.openai.com/v1/chat/completions', {
65
+ async createChatCompletion({ options }) {
66
+ if (!options.response_model) {
67
+ throw new Error('AgentBrowse extract requires response_model in the assistive runtime.');
68
+ }
69
+
70
+ const response = await fetch(`${openAiBaseUrl}/chat/completions`, {
14
71
  method: 'POST',
15
72
  headers: {
16
73
  'content-type': 'application/json',
17
74
  authorization: `Bearer ${openAiApiKey}`,
18
75
  },
19
- body: JSON.stringify(args),
76
+ body: JSON.stringify({
77
+ model: openAiModel,
78
+ messages: buildMessages(options),
79
+ response_format: {
80
+ type: 'json_schema',
81
+ json_schema: {
82
+ name: options.response_model.name,
83
+ strict: true,
84
+ schema: toJsonSchema(options.response_model.schema),
85
+ },
86
+ },
87
+ temperature: options.temperature,
88
+ max_completion_tokens: options.maxOutputTokens,
89
+ }),
20
90
  });
21
91
 
22
92
  if (!response.ok) {
23
93
  throw new Error(`openai_request_failed:${response.status}`);
24
94
  }
25
95
 
26
- return (await response.json()) as any;
96
+ const json = (await response.json()) as StructuredChatResponse;
97
+ const content = json.choices?.[0]?.message?.content;
98
+ if (typeof content !== 'string' || content.trim().length === 0) {
99
+ throw new Error('openai_response_missing_content');
100
+ }
101
+
102
+ return {
103
+ data: JSON.parse(content),
104
+ usage: json.usage,
105
+ };
27
106
  },
28
107
  }),
29
108
  },
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mercuryo-ai/agentbrowse",
3
- "version": "0.2.52",
3
+ "version": "0.2.54",
4
4
  "type": "module",
5
5
  "description": "Browser automation primitives library for AI agents",
6
6
  "license": "MIT",