@darbotlabs/darbot-browser-mcp 0.1.1 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/README.md +249 -158
- package/cli.js +1 -1
- package/config.d.ts +77 -1
- package/index.d.ts +1 -1
- package/index.js +1 -1
- package/lib/ai/context.js +150 -0
- package/lib/ai/guardrails.js +382 -0
- package/lib/ai/integration.js +397 -0
- package/lib/ai/intent.js +237 -0
- package/lib/ai/manualPromise.js +111 -0
- package/lib/ai/memory.js +273 -0
- package/lib/ai/ml-scorer.js +265 -0
- package/lib/ai/orchestrator-tools.js +292 -0
- package/lib/ai/orchestrator.js +473 -0
- package/lib/ai/planner.js +300 -0
- package/lib/ai/reporter.js +493 -0
- package/lib/ai/workflow.js +407 -0
- package/lib/auth/apiKeyAuth.js +46 -0
- package/lib/auth/entraAuth.js +110 -0
- package/lib/auth/entraJwtVerifier.js +117 -0
- package/lib/auth/index.js +210 -0
- package/lib/auth/managedIdentityAuth.js +175 -0
- package/lib/auth/mcpOAuthProvider.js +186 -0
- package/lib/auth/tunnelAuth.js +120 -0
- package/lib/browserContextFactory.js +1 -1
- package/lib/browserServer.js +1 -1
- package/lib/cdpRelay.js +2 -2
- package/lib/common.js +68 -0
- package/lib/config.js +62 -3
- package/lib/connection.js +1 -1
- package/lib/context.js +1 -1
- package/lib/fileUtils.js +1 -1
- package/lib/guardrails.js +382 -0
- package/lib/health.js +178 -0
- package/lib/httpServer.js +1 -1
- package/lib/index.js +1 -1
- package/lib/javascript.js +1 -1
- package/lib/manualPromise.js +1 -1
- package/lib/memory.js +273 -0
- package/lib/openapi.js +373 -0
- package/lib/orchestrator.js +473 -0
- package/lib/package.js +1 -1
- package/lib/pageSnapshot.js +17 -2
- package/lib/planner.js +302 -0
- package/lib/program.js +17 -5
- package/lib/reporter.js +493 -0
- package/lib/resources/resource.js +1 -1
- package/lib/server.js +5 -3
- package/lib/tab.js +1 -1
- package/lib/tools/ai-native.js +298 -0
- package/lib/tools/autonomous.js +147 -0
- package/lib/tools/clock.js +183 -0
- package/lib/tools/common.js +1 -1
- package/lib/tools/console.js +1 -1
- package/lib/tools/diagnostics.js +132 -0
- package/lib/tools/dialogs.js +1 -1
- package/lib/tools/emulation.js +155 -0
- package/lib/tools/files.js +1 -1
- package/lib/tools/install.js +1 -1
- package/lib/tools/keyboard.js +1 -1
- package/lib/tools/navigate.js +1 -1
- package/lib/tools/network.js +1 -1
- package/lib/tools/pageSnapshot.js +58 -0
- package/lib/tools/pdf.js +1 -1
- package/lib/tools/profiles.js +76 -25
- package/lib/tools/screenshot.js +1 -1
- package/lib/tools/scroll.js +93 -0
- package/lib/tools/snapshot.js +1 -1
- package/lib/tools/storage.js +328 -0
- package/lib/tools/tab.js +16 -0
- package/lib/tools/tabs.js +1 -1
- package/lib/tools/testing.js +1 -1
- package/lib/tools/tool.js +1 -1
- package/lib/tools/utils.js +1 -1
- package/lib/tools/vision.js +1 -1
- package/lib/tools/wait.js +1 -1
- package/lib/tools.js +22 -1
- package/lib/transport.js +251 -31
- package/package.json +54 -21
|
@@ -0,0 +1,473 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) DarbotLabs.
|
|
3
|
+
*
|
|
4
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
* you may not use this file except in compliance with the License.
|
|
6
|
+
* You may obtain a copy of the License at
|
|
7
|
+
*
|
|
8
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
*
|
|
10
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
* See the License for the specific language governing permissions and
|
|
14
|
+
* limitations under the License.
|
|
15
|
+
*/
|
|
16
|
+
import debug from 'debug';
|
|
17
|
+
import { MemoryManager } from './memory.js';
|
|
18
|
+
import { BFSPlanner } from './planner.js';
|
|
19
|
+
import { CrawlReporter } from './reporter.js';
|
|
20
|
+
import { GuardrailSystem } from './guardrails.js';
|
|
21
|
+
const log = debug('darbot:orchestrator');
|
|
22
|
+
/**
|
|
23
|
+
* Main orchestrator for autonomous browser crawling
|
|
24
|
+
*/
|
|
25
|
+
export class CrawlOrchestrator {
|
|
26
|
+
config;
|
|
27
|
+
memory;
|
|
28
|
+
planner;
|
|
29
|
+
reporter;
|
|
30
|
+
guardrails;
|
|
31
|
+
context;
|
|
32
|
+
session;
|
|
33
|
+
isRunning = false;
|
|
34
|
+
shouldStop = false;
|
|
35
|
+
constructor(context, config) {
|
|
36
|
+
this.context = context;
|
|
37
|
+
this.config = {
|
|
38
|
+
sessionId: this.generateSessionId(),
|
|
39
|
+
maxDepth: 3,
|
|
40
|
+
maxPages: 50,
|
|
41
|
+
timeoutMs: 300000,
|
|
42
|
+
generateReport: true,
|
|
43
|
+
takeScreenshots: true,
|
|
44
|
+
verbose: false,
|
|
45
|
+
...config
|
|
46
|
+
};
|
|
47
|
+
// Initialize subsystems
|
|
48
|
+
this.memory = new MemoryManager(this.config.memory || { enabled: true });
|
|
49
|
+
const plannerConfig = {
|
|
50
|
+
maxDepth: this.config.maxDepth,
|
|
51
|
+
maxPages: this.config.maxPages,
|
|
52
|
+
timeout: this.config.timeoutMs,
|
|
53
|
+
allowedDomains: this.config.allowedDomains,
|
|
54
|
+
strategy: 'bfs',
|
|
55
|
+
goalDescription: this.config.goal,
|
|
56
|
+
...this.config.planner
|
|
57
|
+
};
|
|
58
|
+
this.planner = new BFSPlanner(plannerConfig, this.memory);
|
|
59
|
+
this.reporter = new CrawlReporter(this.config.sessionId, this.config.startUrl, this.config.goal, this.config.reporter);
|
|
60
|
+
this.guardrails = new GuardrailSystem({
|
|
61
|
+
maxDepth: this.config.maxDepth,
|
|
62
|
+
timeoutMs: this.config.timeoutMs,
|
|
63
|
+
allowedDomains: this.config.allowedDomains,
|
|
64
|
+
...this.config.guardrails
|
|
65
|
+
});
|
|
66
|
+
// Initialize session
|
|
67
|
+
this.session = {
|
|
68
|
+
sessionId: this.config.sessionId,
|
|
69
|
+
startTime: Date.now(),
|
|
70
|
+
stats: {
|
|
71
|
+
pagesVisited: 0,
|
|
72
|
+
actionsPerformed: 0,
|
|
73
|
+
errorsEncountered: 0
|
|
74
|
+
},
|
|
75
|
+
status: 'running'
|
|
76
|
+
};
|
|
77
|
+
log('Initialized orchestrator with config:', this.config);
|
|
78
|
+
}
|
|
79
|
+
/**
|
|
80
|
+
* Start autonomous crawling session
|
|
81
|
+
*/
|
|
82
|
+
async startCrawling() {
|
|
83
|
+
if (this.isRunning)
|
|
84
|
+
throw new Error('Crawling session already running');
|
|
85
|
+
this.isRunning = true;
|
|
86
|
+
this.shouldStop = false;
|
|
87
|
+
try {
|
|
88
|
+
log('Starting autonomous crawling session:', this.session.sessionId);
|
|
89
|
+
// Initialize planner with start URL
|
|
90
|
+
await this.planner.initialize(this.config.startUrl);
|
|
91
|
+
// Navigate to start URL
|
|
92
|
+
await this.navigateToUrl(this.config.startUrl);
|
|
93
|
+
// Main crawling loop
|
|
94
|
+
while (!this.shouldStop && this.isRunning) {
|
|
95
|
+
try {
|
|
96
|
+
const success = await this.performCrawlStep();
|
|
97
|
+
if (!success)
|
|
98
|
+
break;
|
|
99
|
+
// Small delay between actions
|
|
100
|
+
await this.sleep(1000);
|
|
101
|
+
}
|
|
102
|
+
catch (error) {
|
|
103
|
+
log('Error in crawl step:', error);
|
|
104
|
+
this.reporter.addError(this.getCurrentUrl(), String(error));
|
|
105
|
+
this.session.stats.errorsEncountered++;
|
|
106
|
+
// Continue with next action after error
|
|
107
|
+
await this.sleep(2000);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
catch (error) {
|
|
112
|
+
log('Fatal error in crawling session:', error);
|
|
113
|
+
this.session.status = 'error';
|
|
114
|
+
this.reporter.addError(this.config.startUrl, String(error));
|
|
115
|
+
}
|
|
116
|
+
finally {
|
|
117
|
+
await this.finalizeCrawling();
|
|
118
|
+
}
|
|
119
|
+
return this.session;
|
|
120
|
+
}
|
|
121
|
+
/**
|
|
122
|
+
* Stop the crawling session
|
|
123
|
+
*/
|
|
124
|
+
async stopCrawling() {
|
|
125
|
+
log('Stopping crawling session');
|
|
126
|
+
this.shouldStop = true;
|
|
127
|
+
this.session.status = 'cancelled';
|
|
128
|
+
}
|
|
129
|
+
/**
|
|
130
|
+
* Perform a single crawl step
|
|
131
|
+
*/
|
|
132
|
+
async performCrawlStep() {
|
|
133
|
+
try {
|
|
134
|
+
// Get current page observation
|
|
135
|
+
const observation = await this.getCurrentObservation();
|
|
136
|
+
// Plan next action
|
|
137
|
+
const action = await this.planner.planNextAction(observation);
|
|
138
|
+
log('Planned action:', action.type, action.target || action.url || action.reason);
|
|
139
|
+
// Validate action with guardrails
|
|
140
|
+
const actionContext = this.getActionContext();
|
|
141
|
+
const validation = await this.guardrails.validateAction(action, actionContext);
|
|
142
|
+
if (!validation.allowed) {
|
|
143
|
+
log('Action blocked by guardrails:', validation.reason);
|
|
144
|
+
this.reporter.addError(observation.url, `Action blocked: ${validation.reason}`);
|
|
145
|
+
return false;
|
|
146
|
+
}
|
|
147
|
+
// Execute action
|
|
148
|
+
const success = await this.executeAction(action);
|
|
149
|
+
if (success) {
|
|
150
|
+
this.session.stats.actionsPerformed++;
|
|
151
|
+
this.guardrails.recordAction(action, observation.url);
|
|
152
|
+
}
|
|
153
|
+
// Check if we should finish
|
|
154
|
+
if (action.type === 'finish') {
|
|
155
|
+
log('Crawling finished:', action.reason);
|
|
156
|
+
return false;
|
|
157
|
+
}
|
|
158
|
+
return success;
|
|
159
|
+
}
|
|
160
|
+
catch (error) {
|
|
161
|
+
log('Error in crawl step:', error);
|
|
162
|
+
throw error;
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
/**
|
|
166
|
+
* Get current page observation
|
|
167
|
+
*/
|
|
168
|
+
async getCurrentObservation() {
|
|
169
|
+
const tab = this.context.currentTabOrDie();
|
|
170
|
+
if (!tab)
|
|
171
|
+
throw new Error('No active tab available');
|
|
172
|
+
const page = tab.page;
|
|
173
|
+
const url = page.url();
|
|
174
|
+
// Get page title
|
|
175
|
+
const title = await page.title().catch(() => 'Untitled');
|
|
176
|
+
// Get accessibility snapshot using aria snapshot (Playwright 1.57+)
|
|
177
|
+
const snapshot = await page.locator(':root').ariaSnapshot().catch(() => '');
|
|
178
|
+
const domSnapshot = snapshot || '';
|
|
179
|
+
// Extract links
|
|
180
|
+
const links = await this.extractLinks(page);
|
|
181
|
+
// Extract clickable elements
|
|
182
|
+
const clickableElements = await this.extractClickableElements(page);
|
|
183
|
+
return {
|
|
184
|
+
url,
|
|
185
|
+
title,
|
|
186
|
+
domSnapshot,
|
|
187
|
+
links,
|
|
188
|
+
clickableElements
|
|
189
|
+
};
|
|
190
|
+
}
|
|
191
|
+
/**
|
|
192
|
+
* Extract links from current page
|
|
193
|
+
*/
|
|
194
|
+
async extractLinks(page) {
|
|
195
|
+
try {
|
|
196
|
+
return await page.evaluate(() => {
|
|
197
|
+
const links = [];
|
|
198
|
+
const anchorElements = document.querySelectorAll('a[href]');
|
|
199
|
+
anchorElements.forEach((element, index) => {
|
|
200
|
+
const href = element.getAttribute('href');
|
|
201
|
+
const text = element.textContent?.trim() || '';
|
|
202
|
+
if (href && text) {
|
|
203
|
+
links.push({
|
|
204
|
+
text: text.substring(0, 100), // Limit text length
|
|
205
|
+
href: href,
|
|
206
|
+
selector: `a[href="${href}"]:nth-of-type(${index + 1})`
|
|
207
|
+
});
|
|
208
|
+
}
|
|
209
|
+
});
|
|
210
|
+
return links.slice(0, 50); // Limit number of links
|
|
211
|
+
});
|
|
212
|
+
}
|
|
213
|
+
catch (error) {
|
|
214
|
+
log('Error extracting links:', error);
|
|
215
|
+
return [];
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
/**
|
|
219
|
+
* Extract clickable elements from current page
|
|
220
|
+
*/
|
|
221
|
+
async extractClickableElements(page) {
|
|
222
|
+
try {
|
|
223
|
+
return await page.evaluate(() => {
|
|
224
|
+
const elements = [];
|
|
225
|
+
const clickableSelectors = 'button, input[type="button"], input[type="submit"], [role="button"], .btn, .button';
|
|
226
|
+
const clickableElements = document.querySelectorAll(clickableSelectors);
|
|
227
|
+
clickableElements.forEach((element, index) => {
|
|
228
|
+
const text = element.textContent?.trim() || element.getAttribute('value') || element.getAttribute('aria-label') || '';
|
|
229
|
+
const tagName = element.tagName.toLowerCase();
|
|
230
|
+
if (text && element.offsetParent !== null) { // Only visible elements
|
|
231
|
+
elements.push({
|
|
232
|
+
text: text.substring(0, 50), // Limit text length
|
|
233
|
+
selector: `${tagName}:nth-of-type(${index + 1})`,
|
|
234
|
+
tag: tagName
|
|
235
|
+
});
|
|
236
|
+
}
|
|
237
|
+
});
|
|
238
|
+
return elements.slice(0, 20); // Limit number of elements
|
|
239
|
+
});
|
|
240
|
+
}
|
|
241
|
+
catch (error) {
|
|
242
|
+
log('Error extracting clickable elements:', error);
|
|
243
|
+
return [];
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
/**
|
|
247
|
+
* Execute a crawl action
|
|
248
|
+
*/
|
|
249
|
+
async executeAction(action) {
|
|
250
|
+
try {
|
|
251
|
+
switch (action.type) {
|
|
252
|
+
case 'navigate':
|
|
253
|
+
if (action.url)
|
|
254
|
+
return await this.navigateToUrl(action.url);
|
|
255
|
+
break;
|
|
256
|
+
case 'click':
|
|
257
|
+
if (action.target)
|
|
258
|
+
return await this.clickElement(action.target);
|
|
259
|
+
break;
|
|
260
|
+
case 'type':
|
|
261
|
+
if (action.target && action.text)
|
|
262
|
+
return await this.typeText(action.target, action.text);
|
|
263
|
+
break;
|
|
264
|
+
case 'wait':
|
|
265
|
+
await this.sleep(2000);
|
|
266
|
+
return true;
|
|
267
|
+
case 'snapshot':
|
|
268
|
+
return await this.takeSnapshot();
|
|
269
|
+
case 'finish':
|
|
270
|
+
return true;
|
|
271
|
+
}
|
|
272
|
+
return false;
|
|
273
|
+
}
|
|
274
|
+
catch (error) {
|
|
275
|
+
log('Error executing action:', error);
|
|
276
|
+
return false;
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
/**
|
|
280
|
+
* Navigate to a URL
|
|
281
|
+
*/
|
|
282
|
+
async navigateToUrl(url) {
|
|
283
|
+
try {
|
|
284
|
+
const tab = this.context.currentTabOrDie();
|
|
285
|
+
if (!tab)
|
|
286
|
+
throw new Error('No active tab available');
|
|
287
|
+
log('Navigating to:', url);
|
|
288
|
+
await tab.page.goto(url, {
|
|
289
|
+
waitUntil: 'domcontentloaded',
|
|
290
|
+
timeout: 30000
|
|
291
|
+
});
|
|
292
|
+
this.session.stats.pagesVisited++;
|
|
293
|
+
// Take screenshot if enabled
|
|
294
|
+
if (this.config.takeScreenshots)
|
|
295
|
+
await this.takeScreenshot();
|
|
296
|
+
// Store state in memory
|
|
297
|
+
const observation = await this.getCurrentObservation();
|
|
298
|
+
if (this.memory.enabled) {
|
|
299
|
+
await this.memory.storeState(url, observation.title, observation.domSnapshot, undefined, // Screenshot handled separately
|
|
300
|
+
observation.links.map(link => link.href));
|
|
301
|
+
}
|
|
302
|
+
// Add to report
|
|
303
|
+
this.reporter.addState({
|
|
304
|
+
url,
|
|
305
|
+
title: observation.title,
|
|
306
|
+
stateHash: this.memory.constructor.name + Date.now().toString(),
|
|
307
|
+
timestamp: Date.now(),
|
|
308
|
+
links: observation.links.map(link => link.href),
|
|
309
|
+
visited: true
|
|
310
|
+
});
|
|
311
|
+
return true;
|
|
312
|
+
}
|
|
313
|
+
catch (error) {
|
|
314
|
+
log('Navigation error:', error);
|
|
315
|
+
this.reporter.addError(url, String(error));
|
|
316
|
+
return false;
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
/**
|
|
320
|
+
* Click an element
|
|
321
|
+
*/
|
|
322
|
+
async clickElement(selector) {
|
|
323
|
+
try {
|
|
324
|
+
const tab = this.context.currentTabOrDie();
|
|
325
|
+
if (!tab)
|
|
326
|
+
throw new Error('No active tab available');
|
|
327
|
+
log('Clicking element:', selector);
|
|
328
|
+
await tab.page.click(selector, { timeout: 10000 });
|
|
329
|
+
await tab.page.waitForTimeout(2000); // Wait for potential page changes
|
|
330
|
+
return true;
|
|
331
|
+
}
|
|
332
|
+
catch (error) {
|
|
333
|
+
log('Click error:', error);
|
|
334
|
+
return false;
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
/**
|
|
338
|
+
* Type text into an element
|
|
339
|
+
*/
|
|
340
|
+
async typeText(selector, text) {
|
|
341
|
+
try {
|
|
342
|
+
const tab = this.context.currentTabOrDie();
|
|
343
|
+
if (!tab)
|
|
344
|
+
throw new Error('No active tab available');
|
|
345
|
+
log('Typing text into:', selector);
|
|
346
|
+
await tab.page.fill(selector, text);
|
|
347
|
+
return true;
|
|
348
|
+
}
|
|
349
|
+
catch (error) {
|
|
350
|
+
log('Type error:', error);
|
|
351
|
+
return false;
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
/**
|
|
355
|
+
* Take a screenshot
|
|
356
|
+
*/
|
|
357
|
+
async takeScreenshot() {
|
|
358
|
+
try {
|
|
359
|
+
const tab = this.context.currentTabOrDie();
|
|
360
|
+
if (!tab)
|
|
361
|
+
return false;
|
|
362
|
+
const screenshot = await tab.page.screenshot({
|
|
363
|
+
type: 'png',
|
|
364
|
+
fullPage: false
|
|
365
|
+
});
|
|
366
|
+
// Store screenshot in memory system
|
|
367
|
+
const url = tab.page.url();
|
|
368
|
+
const title = await tab.page.title().catch(() => 'Untitled');
|
|
369
|
+
const observation = await this.getCurrentObservation();
|
|
370
|
+
if (this.memory.enabled) {
|
|
371
|
+
await this.memory.storeState(url, title, observation.domSnapshot, screenshot, observation.links.map(link => link.href));
|
|
372
|
+
}
|
|
373
|
+
return true;
|
|
374
|
+
}
|
|
375
|
+
catch (error) {
|
|
376
|
+
log('Screenshot error:', error);
|
|
377
|
+
return false;
|
|
378
|
+
}
|
|
379
|
+
}
|
|
380
|
+
/**
|
|
381
|
+
* Take accessibility snapshot
|
|
382
|
+
*/
|
|
383
|
+
async takeSnapshot() {
|
|
384
|
+
try {
|
|
385
|
+
const observation = await this.getCurrentObservation();
|
|
386
|
+
log('Took accessibility snapshot for:', observation.url);
|
|
387
|
+
return true;
|
|
388
|
+
}
|
|
389
|
+
catch (error) {
|
|
390
|
+
log('Snapshot error:', error);
|
|
391
|
+
return false;
|
|
392
|
+
}
|
|
393
|
+
}
|
|
394
|
+
/**
|
|
395
|
+
* Get action context for guardrails
|
|
396
|
+
*/
|
|
397
|
+
getActionContext() {
|
|
398
|
+
const stats = this.planner.getStats();
|
|
399
|
+
return {
|
|
400
|
+
currentUrl: this.getCurrentUrl(),
|
|
401
|
+
visitedUrls: stats.visitedUrls || [],
|
|
402
|
+
currentDepth: stats.currentDepth || 0,
|
|
403
|
+
sessionStartTime: this.session.startTime,
|
|
404
|
+
lastActionTime: Date.now()
|
|
405
|
+
};
|
|
406
|
+
}
|
|
407
|
+
/**
|
|
408
|
+
* Get current URL from the active tab
|
|
409
|
+
*/
|
|
410
|
+
getCurrentUrl() {
|
|
411
|
+
try {
|
|
412
|
+
const tab = this.context.currentTabOrDie();
|
|
413
|
+
return tab.page.url();
|
|
414
|
+
}
|
|
415
|
+
catch {
|
|
416
|
+
return '';
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
/**
|
|
420
|
+
* Finalize crawling session
|
|
421
|
+
*/
|
|
422
|
+
async finalizeCrawling() {
|
|
423
|
+
this.isRunning = false;
|
|
424
|
+
this.session.endTime = Date.now();
|
|
425
|
+
if (this.session.status === 'running')
|
|
426
|
+
this.session.status = 'completed';
|
|
427
|
+
// Generate report if enabled
|
|
428
|
+
if (this.config.generateReport) {
|
|
429
|
+
try {
|
|
430
|
+
const reportPath = await this.reporter.generateReport();
|
|
431
|
+
this.session.stats.reportPath = reportPath;
|
|
432
|
+
log('Generated report:', reportPath);
|
|
433
|
+
}
|
|
434
|
+
catch (error) {
|
|
435
|
+
log('Error generating report:', error);
|
|
436
|
+
}
|
|
437
|
+
}
|
|
438
|
+
log('Crawling session finalized:', this.session);
|
|
439
|
+
}
|
|
440
|
+
/**
|
|
441
|
+
* Generate a unique session ID
|
|
442
|
+
*/
|
|
443
|
+
generateSessionId() {
|
|
444
|
+
const timestamp = Date.now();
|
|
445
|
+
const random = Math.random().toString(36).substring(2, 8);
|
|
446
|
+
return `crawl_${timestamp}_${random}`;
|
|
447
|
+
}
|
|
448
|
+
/**
|
|
449
|
+
* Sleep for specified milliseconds
|
|
450
|
+
*/
|
|
451
|
+
async sleep(ms) {
|
|
452
|
+
return new Promise(resolve => setTimeout(resolve, ms));
|
|
453
|
+
}
|
|
454
|
+
/**
|
|
455
|
+
* Get current session status
|
|
456
|
+
*/
|
|
457
|
+
getSession() {
|
|
458
|
+
return { ...this.session };
|
|
459
|
+
}
|
|
460
|
+
/**
|
|
461
|
+
* Get crawling statistics
|
|
462
|
+
*/
|
|
463
|
+
getStats() {
|
|
464
|
+
return {
|
|
465
|
+
session: this.session,
|
|
466
|
+
planner: this.planner.getStats(),
|
|
467
|
+
guardrails: this.guardrails.getStats(),
|
|
468
|
+
memory: {
|
|
469
|
+
enabled: this.memory.enabled
|
|
470
|
+
}
|
|
471
|
+
};
|
|
472
|
+
}
|
|
473
|
+
}
|
package/lib/package.js
CHANGED
package/lib/pageSnapshot.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) DarbotLabs.
|
|
3
3
|
*
|
|
4
4
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
5
|
* you may not use this file except in compliance with the License.
|
|
@@ -29,7 +29,22 @@ export class PageSnapshot {
|
|
|
29
29
|
return this._text;
|
|
30
30
|
}
|
|
31
31
|
async _build() {
|
|
32
|
-
const
|
|
32
|
+
const snapshotResult = await callOnPageNoTrace(this._page, page => page._snapshotForAI());
|
|
33
|
+
// Handle both old (string) and new (object with full property) Playwright snapshot formats
|
|
34
|
+
let snapshot;
|
|
35
|
+
if (typeof snapshotResult === 'string') {
|
|
36
|
+
snapshot = snapshotResult;
|
|
37
|
+
}
|
|
38
|
+
else if (snapshotResult && typeof snapshotResult === 'object') {
|
|
39
|
+
// Try different known property names for the snapshot text
|
|
40
|
+
snapshot = snapshotResult.full
|
|
41
|
+
?? snapshotResult.text
|
|
42
|
+
?? snapshotResult.snapshot
|
|
43
|
+
?? JSON.stringify(snapshotResult, null, 2);
|
|
44
|
+
}
|
|
45
|
+
else {
|
|
46
|
+
snapshot = String(snapshotResult);
|
|
47
|
+
}
|
|
33
48
|
this._text = [
|
|
34
49
|
`- Page Snapshot`,
|
|
35
50
|
'```yaml',
|