@darbotlabs/darbot-browser-mcp 0.1.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/LICENSE +1 -1
  2. package/README.md +249 -158
  3. package/cli.js +1 -1
  4. package/config.d.ts +77 -1
  5. package/index.d.ts +1 -1
  6. package/index.js +1 -1
  7. package/lib/ai/context.js +150 -0
  8. package/lib/ai/guardrails.js +382 -0
  9. package/lib/ai/integration.js +397 -0
  10. package/lib/ai/intent.js +237 -0
  11. package/lib/ai/manualPromise.js +111 -0
  12. package/lib/ai/memory.js +273 -0
  13. package/lib/ai/ml-scorer.js +265 -0
  14. package/lib/ai/orchestrator-tools.js +292 -0
  15. package/lib/ai/orchestrator.js +473 -0
  16. package/lib/ai/planner.js +300 -0
  17. package/lib/ai/reporter.js +493 -0
  18. package/lib/ai/workflow.js +407 -0
  19. package/lib/auth/apiKeyAuth.js +46 -0
  20. package/lib/auth/entraAuth.js +110 -0
  21. package/lib/auth/entraJwtVerifier.js +117 -0
  22. package/lib/auth/index.js +210 -0
  23. package/lib/auth/managedIdentityAuth.js +175 -0
  24. package/lib/auth/mcpOAuthProvider.js +186 -0
  25. package/lib/auth/tunnelAuth.js +120 -0
  26. package/lib/browserContextFactory.js +1 -1
  27. package/lib/browserServer.js +1 -1
  28. package/lib/cdpRelay.js +2 -2
  29. package/lib/common.js +68 -0
  30. package/lib/config.js +62 -3
  31. package/lib/connection.js +1 -1
  32. package/lib/context.js +1 -1
  33. package/lib/fileUtils.js +1 -1
  34. package/lib/guardrails.js +382 -0
  35. package/lib/health.js +178 -0
  36. package/lib/httpServer.js +1 -1
  37. package/lib/index.js +1 -1
  38. package/lib/javascript.js +1 -1
  39. package/lib/manualPromise.js +1 -1
  40. package/lib/memory.js +273 -0
  41. package/lib/openapi.js +373 -0
  42. package/lib/orchestrator.js +473 -0
  43. package/lib/package.js +1 -1
  44. package/lib/pageSnapshot.js +17 -2
  45. package/lib/planner.js +302 -0
  46. package/lib/program.js +17 -5
  47. package/lib/reporter.js +493 -0
  48. package/lib/resources/resource.js +1 -1
  49. package/lib/server.js +5 -3
  50. package/lib/tab.js +1 -1
  51. package/lib/tools/ai-native.js +298 -0
  52. package/lib/tools/autonomous.js +147 -0
  53. package/lib/tools/clock.js +183 -0
  54. package/lib/tools/common.js +1 -1
  55. package/lib/tools/console.js +1 -1
  56. package/lib/tools/diagnostics.js +132 -0
  57. package/lib/tools/dialogs.js +1 -1
  58. package/lib/tools/emulation.js +155 -0
  59. package/lib/tools/files.js +1 -1
  60. package/lib/tools/install.js +1 -1
  61. package/lib/tools/keyboard.js +1 -1
  62. package/lib/tools/navigate.js +1 -1
  63. package/lib/tools/network.js +1 -1
  64. package/lib/tools/pageSnapshot.js +58 -0
  65. package/lib/tools/pdf.js +1 -1
  66. package/lib/tools/profiles.js +76 -25
  67. package/lib/tools/screenshot.js +1 -1
  68. package/lib/tools/scroll.js +93 -0
  69. package/lib/tools/snapshot.js +1 -1
  70. package/lib/tools/storage.js +328 -0
  71. package/lib/tools/tab.js +16 -0
  72. package/lib/tools/tabs.js +1 -1
  73. package/lib/tools/testing.js +1 -1
  74. package/lib/tools/tool.js +1 -1
  75. package/lib/tools/utils.js +1 -1
  76. package/lib/tools/vision.js +1 -1
  77. package/lib/tools/wait.js +1 -1
  78. package/lib/tools.js +22 -1
  79. package/lib/transport.js +251 -31
  80. package/package.json +54 -21
@@ -0,0 +1,300 @@
1
+ /**
2
+ * Copyright (c) DarbotLabs.
3
+ *
4
+ * Licensed under the Apache License, Version 2.0 (the "License");
5
+ * you may not use this file except in compliance with the License.
6
+ * You may obtain a copy of the License at
7
+ *
8
+ * http://www.apache.org/licenses/LICENSE-2.0
9
+ *
10
+ * Unless required by applicable law or agreed to in writing, software
11
+ * distributed under the License is distributed on an "AS IS" BASIS,
12
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ * See the License for the specific language governing permissions and
14
+ * limitations under the License.
15
+ */
16
+ import debug from 'debug';
17
+ import { MLBasedScorer } from '../ai/ml-scorer.js';
18
+ const log = debug('darbot:planner');
19
+ /**
20
+ * BFS Planner for autonomous site crawling
21
+ */
22
+ export class BFSPlanner {
23
+ config;
24
+ memory;
25
+ visitQueue;
26
+ visited;
27
+ mlScorer;
28
+ currentDepth = 0;
29
+ pagesVisited = 0;
30
+ constructor(config, memory) {
31
+ this.config = {
32
+ ...config,
33
+ // Set default strategy if not provided
34
+ strategy: config.strategy || 'bfs'
35
+ };
36
+ this.memory = memory;
37
+ this.visitQueue = [];
38
+ this.visited = new Set();
39
+ // Initialize ML-based scorer with goal description
40
+ this.mlScorer = new MLBasedScorer(config.goalDescription || '');
41
+ log('Initialized ML-based scorer with goal:', config.goalDescription);
42
+ }
43
+ /**
44
+ * Plan the next action based on current observation and goal
45
+ */
46
+ async planNextAction(observation) {
47
+ try {
48
+ log('Planning next action for:', observation.url);
49
+ // Check if we should finish crawling
50
+ if (this.shouldFinish(observation)) {
51
+ return {
52
+ type: 'finish',
53
+ reason: this.getFinishReason(),
54
+ priority: 1
55
+ };
56
+ }
57
+ // Mark current URL as visited
58
+ this.visited.add(observation.url);
59
+ this.pagesVisited++;
60
+ // Learn from successful navigation (no error)
61
+ if (!observation.error) {
62
+ this.mlScorer.learn(observation.url, true);
63
+ log('ML scorer learned from successful visit:', observation.url);
64
+ }
65
+ else {
66
+ this.mlScorer.learn(observation.url, false);
67
+ log('ML scorer learned from failed visit:', observation.url);
68
+ }
69
+ // Store current state in memory
70
+ if (this.memory.enabled) {
71
+ await this.memory.storeState(observation.url, observation.title, observation.domSnapshot, undefined, // Screenshots handled separately
72
+ observation.links.map(link => link.href));
73
+ }
74
+ // Extract and queue new links
75
+ await this.extractAndQueueLinks(observation);
76
+ // Get next URL to visit
77
+ const nextTarget = this.getNextTarget();
78
+ if (nextTarget) {
79
+ return {
80
+ type: 'navigate',
81
+ url: nextTarget.url,
82
+ reason: `BFS navigation to depth ${nextTarget.depth}: ${nextTarget.url}`,
83
+ priority: this.calculatePriority(nextTarget.url)
84
+ };
85
+ }
86
+ // If no more URLs to visit, check for clickable elements on current page
87
+ const clickTarget = this.findBestClickTarget(observation);
88
+ if (clickTarget) {
89
+ return {
90
+ type: 'click',
91
+ target: clickTarget.selector,
92
+ reason: `Clicking "${clickTarget.text}" to discover new content`,
93
+ priority: 2
94
+ };
95
+ }
96
+ // No more actions possible
97
+ return {
98
+ type: 'finish',
99
+ reason: 'No more discoverable content within constraints',
100
+ priority: 1
101
+ };
102
+ }
103
+ catch (error) {
104
+ log('Error in planning:', error);
105
+ return {
106
+ type: 'finish',
107
+ reason: `Planning error: ${error}`,
108
+ priority: 1
109
+ };
110
+ }
111
+ }
112
+ /**
113
+ * Initialize the planner with a starting URL
114
+ */
115
+ async initialize(startUrl) {
116
+ this.visitQueue.push({ url: startUrl, depth: 0 });
117
+ log('Initialized BFS planner with start URL:', startUrl);
118
+ }
119
+ /**
120
+ * Extract links from observation and add to queue
121
+ */
122
+ async extractAndQueueLinks(observation) {
123
+ const currentDepth = this.getCurrentDepth(observation.url);
124
+ for (const link of observation.links) {
125
+ const normalizedUrl = this.normalizeUrl(link.href, observation.url);
126
+ if (this.shouldVisitUrl(normalizedUrl, currentDepth + 1)) {
127
+ if (!this.visited.has(normalizedUrl) && !this.isQueued(normalizedUrl)) {
128
+ this.visitQueue.push({
129
+ url: normalizedUrl,
130
+ depth: currentDepth + 1,
131
+ parent: observation.url
132
+ });
133
+ log('Queued URL:', normalizedUrl, 'at depth', currentDepth + 1);
134
+ }
135
+ }
136
+ }
137
+ // Sort queue by priority (breadth-first)
138
+ this.visitQueue.sort((a, b) => {
139
+ if (a.depth !== b.depth)
140
+ return a.depth - b.depth;
141
+ return this.calculatePriority(b.url) - this.calculatePriority(a.url);
142
+ });
143
+ }
144
+ /**
145
+ * Get the next target URL from the queue
146
+ */
147
+ getNextTarget() {
148
+ while (this.visitQueue.length > 0) {
149
+ const target = this.visitQueue.shift();
150
+ if (!this.visited.has(target.url) && this.shouldVisitUrl(target.url, target.depth))
151
+ return target;
152
+ }
153
+ return null;
154
+ }
155
+ /**
156
+ * Find the best clickable element to interact with
157
+ */
158
+ findBestClickTarget(observation) {
159
+ const clickableElements = observation.clickableElements
160
+ .filter(el => this.isInterestingElement(el))
161
+ .sort((a, b) => this.calculateElementPriority(b) - this.calculateElementPriority(a));
162
+ return clickableElements.length > 0 ? clickableElements[0] : null;
163
+ }
164
+ /**
165
+ * Check if an element is interesting to click
166
+ */
167
+ isInterestingElement(element) {
168
+ const text = element.text.toLowerCase();
169
+ const tag = element.tag.toLowerCase();
170
+ // Avoid navigation elements we might get stuck in
171
+ const avoidPatterns = [
172
+ /back/i, /previous/i, /close/i, /cancel/i, /logout/i, /sign.?out/i,
173
+ /advertisement/i, /ad/i, /sponsor/i, /cookie/i, /privacy/i
174
+ ];
175
+ if (avoidPatterns.some(pattern => pattern.test(text)))
176
+ return false;
177
+ // Prefer buttons and links with meaningful text
178
+ if (['button', 'a', 'input'].includes(tag) && text.length > 2)
179
+ return true;
180
+ return false;
181
+ }
182
+ /**
183
+ * Calculate priority for clicking an element using ML-based scoring
184
+ */
185
+ calculateElementPriority(element) {
186
+ // Use ML-based scorer for intelligent element prioritization
187
+ const context = {
188
+ goal: this.config.goalDescription,
189
+ visitedUrls: this.visited,
190
+ successfulUrls: this.visited,
191
+ currentDepth: this.currentDepth
192
+ };
193
+ const score = this.mlScorer.scoreElement(element, context);
194
+ // Convert 0-1 score to priority range (0-10)
195
+ return score * 10;
196
+ }
197
+ /**
198
+ * Calculate priority for visiting a URL using ML-based scoring
199
+ */
200
+ calculatePriority(url) {
201
+ // Use ML-based scorer for intelligent URL prioritization
202
+ const context = {
203
+ goal: this.config.goalDescription,
204
+ visitedUrls: this.visited,
205
+ successfulUrls: this.visited,
206
+ currentDepth: this.currentDepth
207
+ };
208
+ const score = this.mlScorer.scoreUrl(url, context);
209
+ // Convert 0-1 score to priority range (0-10)
210
+ return score * 10;
211
+ }
212
+ /**
213
+ * Check if we should visit a URL
214
+ */
215
+ shouldVisitUrl(url, depth) {
216
+ if (depth > this.config.maxDepth)
217
+ return false;
218
+ if (this.pagesVisited >= this.config.maxPages)
219
+ return false;
220
+ // Check allowed domains
221
+ if (this.config.allowedDomains && this.config.allowedDomains.length > 0) {
222
+ try {
223
+ const urlObj = new URL(url);
224
+ const allowed = this.config.allowedDomains.some(domain => urlObj.hostname === domain || urlObj.hostname.endsWith('.' + domain));
225
+ if (!allowed)
226
+ return false;
227
+ }
228
+ catch {
229
+ return false;
230
+ }
231
+ }
232
+ // Check exclude patterns
233
+ if (this.config.excludePatterns) {
234
+ if (this.config.excludePatterns.some(pattern => pattern.test(url)))
235
+ return false;
236
+ }
237
+ // Exclude common file types and non-HTML resources
238
+ const fileExtensionPattern = /\.(pdf|doc|docx|xls|xlsx|ppt|pptx|zip|rar|exe|dmg|mp4|mp3|jpg|jpeg|png|gif|svg|css|js|json|xml)$/i;
239
+ if (fileExtensionPattern.test(url))
240
+ return false;
241
+ return true;
242
+ }
243
+ /**
244
+ * Check if URL is already queued
245
+ */
246
+ isQueued(url) {
247
+ return this.visitQueue.some(item => item.url === url);
248
+ }
249
+ /**
250
+ * Get current depth for a URL
251
+ */
252
+ getCurrentDepth(url) {
253
+ const queueItem = this.visitQueue.find(item => item.url === url);
254
+ return queueItem ? queueItem.depth : this.currentDepth;
255
+ }
256
+ /**
257
+ * Normalize URL relative to base URL
258
+ */
259
+ normalizeUrl(href, baseUrl) {
260
+ try {
261
+ return new URL(href, baseUrl).href;
262
+ }
263
+ catch {
264
+ return href;
265
+ }
266
+ }
267
+ /**
268
+ * Check if we should finish crawling
269
+ */
270
+ shouldFinish(observation) {
271
+ if (this.pagesVisited >= this.config.maxPages)
272
+ return true;
273
+ if (this.visitQueue.length === 0 && observation.clickableElements.length === 0)
274
+ return true;
275
+ return false;
276
+ }
277
+ /**
278
+ * Get reason for finishing
279
+ */
280
+ getFinishReason() {
281
+ if (this.pagesVisited >= this.config.maxPages)
282
+ return `Reached maximum page limit (${this.config.maxPages})`;
283
+ if (this.visitQueue.length === 0)
284
+ return 'No more URLs to visit in the queue';
285
+ return 'Crawling complete';
286
+ }
287
+ /**
288
+ * Get crawling statistics
289
+ */
290
+ getStats() {
291
+ return {
292
+ pagesVisited: this.pagesVisited,
293
+ queueSize: this.visitQueue.length,
294
+ visitedUrls: Array.from(this.visited),
295
+ currentDepth: this.currentDepth,
296
+ maxDepth: this.config.maxDepth,
297
+ maxPages: this.config.maxPages
298
+ };
299
+ }
300
+ }