@darbotlabs/darbot-browser-mcp 0.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/LICENSE +1 -1
  2. package/README.md +222 -161
  3. package/cli.js +1 -1
  4. package/config.d.ts +77 -1
  5. package/index.d.ts +1 -1
  6. package/index.js +1 -1
  7. package/lib/ai/context.js +150 -0
  8. package/lib/ai/guardrails.js +382 -0
  9. package/lib/ai/integration.js +397 -0
  10. package/lib/ai/intent.js +237 -0
  11. package/lib/ai/manualPromise.js +111 -0
  12. package/lib/ai/memory.js +273 -0
  13. package/lib/ai/ml-scorer.js +265 -0
  14. package/lib/ai/orchestrator-tools.js +292 -0
  15. package/lib/ai/orchestrator.js +473 -0
  16. package/lib/ai/planner.js +300 -0
  17. package/lib/ai/reporter.js +493 -0
  18. package/lib/ai/workflow.js +407 -0
  19. package/lib/auth/apiKeyAuth.js +46 -0
  20. package/lib/auth/entraAuth.js +110 -0
  21. package/lib/auth/entraJwtVerifier.js +117 -0
  22. package/lib/auth/index.js +210 -0
  23. package/lib/auth/managedIdentityAuth.js +175 -0
  24. package/lib/auth/mcpOAuthProvider.js +186 -0
  25. package/lib/auth/tunnelAuth.js +120 -0
  26. package/lib/browserContextFactory.js +1 -1
  27. package/lib/browserServer.js +1 -1
  28. package/lib/cdpRelay.js +2 -2
  29. package/lib/common.js +68 -0
  30. package/lib/config.js +62 -3
  31. package/lib/connection.js +1 -1
  32. package/lib/context.js +1 -1
  33. package/lib/fileUtils.js +1 -1
  34. package/lib/guardrails.js +382 -0
  35. package/lib/health.js +178 -0
  36. package/lib/httpServer.js +1 -1
  37. package/lib/index.js +1 -1
  38. package/lib/javascript.js +1 -1
  39. package/lib/manualPromise.js +1 -1
  40. package/lib/memory.js +273 -0
  41. package/lib/openapi.js +373 -0
  42. package/lib/orchestrator.js +473 -0
  43. package/lib/package.js +1 -1
  44. package/lib/pageSnapshot.js +17 -2
  45. package/lib/planner.js +302 -0
  46. package/lib/program.js +17 -5
  47. package/lib/reporter.js +493 -0
  48. package/lib/resources/resource.js +1 -1
  49. package/lib/server.js +5 -3
  50. package/lib/tab.js +1 -1
  51. package/lib/tools/ai-native.js +298 -0
  52. package/lib/tools/autonomous.js +147 -0
  53. package/lib/tools/clock.js +183 -0
  54. package/lib/tools/common.js +1 -1
  55. package/lib/tools/console.js +1 -1
  56. package/lib/tools/diagnostics.js +132 -0
  57. package/lib/tools/dialogs.js +1 -1
  58. package/lib/tools/emulation.js +155 -0
  59. package/lib/tools/files.js +1 -1
  60. package/lib/tools/install.js +1 -1
  61. package/lib/tools/keyboard.js +1 -1
  62. package/lib/tools/navigate.js +1 -1
  63. package/lib/tools/network.js +1 -1
  64. package/lib/tools/pageSnapshot.js +58 -0
  65. package/lib/tools/pdf.js +1 -1
  66. package/lib/tools/profiles.js +76 -25
  67. package/lib/tools/screenshot.js +1 -1
  68. package/lib/tools/scroll.js +93 -0
  69. package/lib/tools/snapshot.js +1 -1
  70. package/lib/tools/storage.js +328 -0
  71. package/lib/tools/tab.js +16 -0
  72. package/lib/tools/tabs.js +1 -1
  73. package/lib/tools/testing.js +1 -1
  74. package/lib/tools/tool.js +1 -1
  75. package/lib/tools/utils.js +1 -1
  76. package/lib/tools/vision.js +1 -1
  77. package/lib/tools/wait.js +1 -1
  78. package/lib/tools.js +22 -1
  79. package/lib/transport.js +251 -31
  80. package/package.json +28 -22
@@ -0,0 +1,111 @@
1
+ /**
2
+ * Copyright (c) DarbotLabs.
3
+ *
4
+ * Licensed under the Apache License, Version 2.0 (the "License");
5
+ * you may not use this file except in compliance with the License.
6
+ * You may obtain a copy of the License at
7
+ *
8
+ * http://www.apache.org/licenses/LICENSE-2.0
9
+ *
10
+ * Unless required by applicable law or agreed to in writing, software
11
+ * distributed under the License is distributed on an "AS IS" BASIS,
12
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ * See the License for the specific language governing permissions and
14
+ * limitations under the License.
15
+ */
16
+ export class ManualPromise extends Promise {
17
+ _resolve;
18
+ _reject;
19
+ _isDone;
20
+ constructor() {
21
+ let resolve;
22
+ let reject;
23
+ super((f, r) => {
24
+ resolve = f;
25
+ reject = r;
26
+ });
27
+ this._isDone = false;
28
+ this._resolve = resolve;
29
+ this._reject = reject;
30
+ }
31
+ isDone() {
32
+ return this._isDone;
33
+ }
34
+ resolve(t) {
35
+ this._isDone = true;
36
+ this._resolve(t);
37
+ }
38
+ reject(e) {
39
+ this._isDone = true;
40
+ this._reject(e);
41
+ }
42
+ static get [Symbol.species]() {
43
+ return Promise;
44
+ }
45
+ get [Symbol.toStringTag]() {
46
+ return 'ManualPromise';
47
+ }
48
+ }
49
+ export class LongStandingScope {
50
+ _terminateError;
51
+ _closeError;
52
+ _terminatePromises = new Map();
53
+ _isClosed = false;
54
+ reject(error) {
55
+ this._isClosed = true;
56
+ this._terminateError = error;
57
+ for (const p of this._terminatePromises.keys())
58
+ p.resolve(error);
59
+ }
60
+ close(error) {
61
+ this._isClosed = true;
62
+ this._closeError = error;
63
+ for (const [p, frames] of this._terminatePromises)
64
+ p.resolve(cloneError(error, frames));
65
+ }
66
+ isClosed() {
67
+ return this._isClosed;
68
+ }
69
+ static async raceMultiple(scopes, promise) {
70
+ return Promise.race(scopes.map(s => s.race(promise)));
71
+ }
72
+ async race(promise) {
73
+ return this._race(Array.isArray(promise) ? promise : [promise], false);
74
+ }
75
+ async safeRace(promise, defaultValue) {
76
+ return this._race([promise], true, defaultValue);
77
+ }
78
+ async _race(promises, safe, defaultValue) {
79
+ const terminatePromise = new ManualPromise();
80
+ const frames = captureRawStack();
81
+ if (this._terminateError)
82
+ terminatePromise.resolve(this._terminateError);
83
+ if (this._closeError)
84
+ terminatePromise.resolve(cloneError(this._closeError, frames));
85
+ this._terminatePromises.set(terminatePromise, frames);
86
+ try {
87
+ return await Promise.race([
88
+ terminatePromise.then(e => safe ? defaultValue : Promise.reject(e)),
89
+ ...promises
90
+ ]);
91
+ }
92
+ finally {
93
+ this._terminatePromises.delete(terminatePromise);
94
+ }
95
+ }
96
+ }
97
+ function cloneError(error, frames) {
98
+ const clone = new Error();
99
+ clone.name = error.name;
100
+ clone.message = error.message;
101
+ clone.stack = [error.name + ':' + error.message, ...frames].join('\n');
102
+ return clone;
103
+ }
104
+ function captureRawStack() {
105
+ const stackTraceLimit = Error.stackTraceLimit;
106
+ Error.stackTraceLimit = 50;
107
+ const error = new Error();
108
+ const stack = error.stack || '';
109
+ Error.stackTraceLimit = stackTraceLimit;
110
+ return stack.split('\n');
111
+ }
@@ -0,0 +1,273 @@
1
+ /**
2
+ * Copyright (c) DarbotLabs.
3
+ *
4
+ * Licensed under the Apache License, Version 2.0 (the "License");
5
+ * you may not use this file except in compliance with the License.
6
+ * You may obtain a copy of the License at
7
+ *
8
+ * http://www.apache.org/licenses/LICENSE-2.0
9
+ *
10
+ * Unless required by applicable law or agreed to in writing, software
11
+ * distributed under the License is distributed on an "AS IS" BASIS,
12
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ * See the License for the specific language governing permissions and
14
+ * limitations under the License.
15
+ */
16
+ import crypto from 'node:crypto';
17
+ import fs from 'node:fs';
18
+ import path from 'node:path';
19
+ import debug from 'debug';
20
+ const log = debug('darbot:memory');
21
+ /**
22
+ * Local file-based memory storage implementation
23
+ */
24
+ export class LocalMemoryStorage {
25
+ storagePath;
26
+ maxStates;
27
+ constructor(config = {}) {
28
+ this.storagePath = config.storagePath || path.join(process.cwd(), '.darbot', 'memory');
29
+ this.maxStates = config.maxStates || 1000;
30
+ this.ensureStorageDirectory();
31
+ }
32
+ ensureStorageDirectory() {
33
+ if (!fs.existsSync(this.storagePath))
34
+ fs.mkdirSync(this.storagePath, { recursive: true });
35
+ }
36
+ getStatePath(stateHash) {
37
+ return path.join(this.storagePath, `${stateHash}.json`);
38
+ }
39
+ async storeState(state) {
40
+ try {
41
+ const statePath = this.getStatePath(state.stateHash);
42
+ await fs.promises.writeFile(statePath, JSON.stringify(state, null, 2));
43
+ log('Stored state:', state.stateHash, state.url);
44
+ // Clean up old states if we exceed the limit
45
+ await this.cleanupOldStates();
46
+ }
47
+ catch (error) {
48
+ log('Error storing state:', error);
49
+ throw error;
50
+ }
51
+ }
52
+ async getState(stateHash) {
53
+ try {
54
+ const statePath = this.getStatePath(stateHash);
55
+ if (!fs.existsSync(statePath))
56
+ return null;
57
+ const data = await fs.promises.readFile(statePath, 'utf-8');
58
+ return JSON.parse(data);
59
+ }
60
+ catch (error) {
61
+ log('Error reading state:', error);
62
+ return null;
63
+ }
64
+ }
65
+ async hasState(stateHash) {
66
+ const statePath = this.getStatePath(stateHash);
67
+ return fs.existsSync(statePath);
68
+ }
69
+ async getAllStates() {
70
+ try {
71
+ const files = await fs.promises.readdir(this.storagePath);
72
+ const states = [];
73
+ for (const file of files) {
74
+ if (file.endsWith('.json')) {
75
+ const filePath = path.join(this.storagePath, file);
76
+ try {
77
+ const data = await fs.promises.readFile(filePath, 'utf-8');
78
+ states.push(JSON.parse(data));
79
+ }
80
+ catch (error) {
81
+ log('Error reading state file:', file, error);
82
+ }
83
+ }
84
+ }
85
+ return states.sort((a, b) => a.timestamp - b.timestamp);
86
+ }
87
+ catch (error) {
88
+ log('Error reading states:', error);
89
+ return [];
90
+ }
91
+ }
92
+ async getUnvisitedLinks() {
93
+ const states = await this.getAllStates();
94
+ const visited = new Set(states.filter(s => s.visited).map(s => s.url));
95
+ const allLinks = new Set();
96
+ states.forEach(state => {
97
+ state.links.forEach(link => {
98
+ if (!visited.has(link))
99
+ allLinks.add(link);
100
+ });
101
+ });
102
+ return Array.from(allLinks);
103
+ }
104
+ async clear() {
105
+ try {
106
+ const files = await fs.promises.readdir(this.storagePath);
107
+ await Promise.all(files.map(file => fs.promises.unlink(path.join(this.storagePath, file))));
108
+ log('Cleared memory storage');
109
+ }
110
+ catch (error) {
111
+ log('Error clearing storage:', error);
112
+ throw error;
113
+ }
114
+ }
115
+ async cleanupOldStates() {
116
+ const states = await this.getAllStates();
117
+ if (states.length <= this.maxStates)
118
+ return;
119
+ // Remove oldest states
120
+ const toRemove = states.slice(0, states.length - this.maxStates);
121
+ await Promise.all(toRemove.map(state => fs.promises.unlink(this.getStatePath(state.stateHash)).catch(() => { })));
122
+ log(`Cleaned up ${toRemove.length} old states`);
123
+ }
124
+ }
125
+ /**
126
+ * Darbot Memory MCP connector (placeholder for future implementation)
127
+ */
128
+ export class DarbotMemoryStorage {
129
+ constructor(config = {}) {
130
+ // TODO: Implement darbot-memory-mcp integration
131
+ log('Darbot Memory MCP connector not yet implemented, falling back to local storage');
132
+ }
133
+ async storeState(state) {
134
+ // TODO: Send to darbot-memory-mcp server
135
+ throw new Error('Darbot Memory MCP connector not yet implemented');
136
+ }
137
+ async getState(stateHash) {
138
+ // TODO: Query darbot-memory-mcp server
139
+ throw new Error('Darbot Memory MCP connector not yet implemented');
140
+ }
141
+ async hasState(stateHash) {
142
+ // TODO: Check darbot-memory-mcp server
143
+ throw new Error('Darbot Memory MCP connector not yet implemented');
144
+ }
145
+ async getAllStates() {
146
+ // TODO: Fetch from darbot-memory-mcp server
147
+ throw new Error('Darbot Memory MCP connector not yet implemented');
148
+ }
149
+ async getUnvisitedLinks() {
150
+ // TODO: Query darbot-memory-mcp server
151
+ throw new Error('Darbot Memory MCP connector not yet implemented');
152
+ }
153
+ async clear() {
154
+ // TODO: Clear darbot-memory-mcp storage
155
+ throw new Error('Darbot Memory MCP connector not yet implemented');
156
+ }
157
+ }
158
+ /**
159
+ * Memory manager with optional darbot-memory-mcp integration
160
+ */
161
+ export class MemoryManager {
162
+ storage;
163
+ config;
164
+ constructor(config = { enabled: true }) {
165
+ this.config = config;
166
+ if (!config.enabled) {
167
+ this.storage = new LocalMemoryStorage(); // Dummy storage that won't be used
168
+ return;
169
+ }
170
+ switch (config.connector) {
171
+ case 'darbot-memory-mcp':
172
+ try {
173
+ this.storage = new DarbotMemoryStorage();
174
+ }
175
+ catch (error) {
176
+ log('Failed to initialize darbot-memory-mcp connector, falling back to local storage:', error);
177
+ this.storage = new LocalMemoryStorage({
178
+ storagePath: config.storagePath,
179
+ maxStates: config.maxStates
180
+ });
181
+ }
182
+ break;
183
+ case 'local':
184
+ default:
185
+ this.storage = new LocalMemoryStorage({
186
+ storagePath: config.storagePath,
187
+ maxStates: config.maxStates
188
+ });
189
+ break;
190
+ }
191
+ }
192
+ /**
193
+ * Generate a hash for the current page state
194
+ */
195
+ static stateHash(domSnapshot) {
196
+ return crypto.createHash('sha256').update(domSnapshot).digest('hex').substring(0, 16);
197
+ }
198
+ /**
199
+ * Store a crawl state with screenshot
200
+ */
201
+ async storeState(url, title, domSnapshot, screenshot, links = []) {
202
+ if (!this.config.enabled)
203
+ return '';
204
+ const stateHash = MemoryManager.stateHash(domSnapshot);
205
+ let screenshotPath;
206
+ // Save screenshot if provided
207
+ if (screenshot) {
208
+ const screenshotDir = path.join(process.cwd(), '.darbot', 'screenshots');
209
+ if (!fs.existsSync(screenshotDir))
210
+ fs.mkdirSync(screenshotDir, { recursive: true });
211
+ screenshotPath = path.join(screenshotDir, `${stateHash}.png`);
212
+ await fs.promises.writeFile(screenshotPath, screenshot);
213
+ }
214
+ const state = {
215
+ url,
216
+ title,
217
+ stateHash,
218
+ timestamp: Date.now(),
219
+ screenshot: screenshotPath,
220
+ links,
221
+ visited: true
222
+ };
223
+ await this.storage.storeState(state);
224
+ return stateHash;
225
+ }
226
+ /**
227
+ * Check if we've seen this state before
228
+ */
229
+ async hasState(domSnapshot) {
230
+ if (!this.config.enabled)
231
+ return false;
232
+ const stateHash = MemoryManager.stateHash(domSnapshot);
233
+ return await this.storage.hasState(stateHash);
234
+ }
235
+ /**
236
+ * Get a stored state by hash
237
+ */
238
+ async getState(stateHash) {
239
+ if (!this.config.enabled)
240
+ return null;
241
+ return await this.storage.getState(stateHash);
242
+ }
243
+ /**
244
+ * Get all stored states
245
+ */
246
+ async getAllStates() {
247
+ if (!this.config.enabled)
248
+ return [];
249
+ return await this.storage.getAllStates();
250
+ }
251
+ /**
252
+ * Get unvisited links for BFS crawling
253
+ */
254
+ async getUnvisitedLinks() {
255
+ if (!this.config.enabled)
256
+ return [];
257
+ return await this.storage.getUnvisitedLinks();
258
+ }
259
+ /**
260
+ * Clear all stored states
261
+ */
262
+ async clear() {
263
+ if (!this.config.enabled)
264
+ return;
265
+ await this.storage.clear();
266
+ }
267
+ /**
268
+ * Check if memory is enabled
269
+ */
270
+ get enabled() {
271
+ return this.config.enabled;
272
+ }
273
+ }
@@ -0,0 +1,265 @@
1
+ /**
2
+ * Copyright (c) DarbotLabs.
3
+ *
4
+ * Licensed under the Apache License, Version 2.0 (the "License");
5
+ * you may not use this file except in compliance with the License.
6
+ * You may obtain a copy of the License at
7
+ *
8
+ * http://www.apache.org/licenses/LICENSE-2.0
9
+ *
10
+ * Unless required by applicable law or agreed to in writing, software
11
+ * distributed under the License is distributed on an "AS IS" BASIS,
12
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ * See the License for the specific language governing permissions and
14
+ * limitations under the License.
15
+ */
16
+ /**
17
+ * ML-based scoring system for intelligent URL and element prioritization
18
+ *
19
+ * This replaces basic heuristics with feature-based scoring that learns from
20
+ * crawl patterns and goal descriptions to make better decisions.
21
+ */
22
+ import debug from 'debug';
23
+ const log = debug('darbot:ml-scorer');
24
+ /**
25
+ * ML-inspired scoring system using feature engineering and weighted scoring
26
+ */
27
+ export class MLBasedScorer {
28
+ weights;
29
+ goalKeywords;
30
+ learnedPatterns = new Map();
31
+ constructor(goal) {
32
+ // Initialize feature weights (these could be learned from data)
33
+ this.weights = new Map([
34
+ // URL structure weights
35
+ ['urlDepth', -0.3], // Prefer shallower URLs
36
+ ['urlLength', -0.1], // Prefer shorter URLs
37
+ ['pathSegments', 0.2], // More segments = more specific
38
+ ['queryParams', -0.2], // Too many params = dynamic/session pages
39
+ // Content weights
40
+ ['textLength', 0.3], // More text = more content
41
+ ['hasKeywords', 2.0], // Strong signal for goal relevance
42
+ ['semanticRelevance', 1.5], // Goal-based relevance
43
+ // Context weights
44
+ ['parentScore', 0.4], // Good parents suggest good children
45
+ ['visitedSiblings', -0.2], // Avoid repetitive sibling pages
46
+ ['domDepth', -0.1], // Prefer more accessible elements
47
+ // Pattern weights
48
+ ['contentPattern', 1.0], // Content pages
49
+ ['navPattern', 0.5], // Navigation pages
50
+ ['utilityPattern', -0.5], // Utility pages (login, etc.)
51
+ ]);
52
+ // Extract keywords from goal
53
+ this.goalKeywords = new Set();
54
+ if (goal) {
55
+ const words = goal.toLowerCase()
56
+ .split(/\s+/)
57
+ .filter(w => w.length > 3)
58
+ .filter(w => !this.isStopWord(w));
59
+ words.forEach(w => this.goalKeywords.add(w));
60
+ }
61
+ log('Initialized ML scorer with goal keywords:', Array.from(this.goalKeywords));
62
+ }
63
+ /**
64
+ * Score a URL for crawling priority
65
+ */
66
+ scoreUrl(url, context) {
67
+ const features = this.extractUrlFeatures(url, context);
68
+ return this.calculateScore(features);
69
+ }
70
+ /**
71
+ * Score an element for interaction priority
72
+ */
73
+ scoreElement(element, context) {
74
+ const features = this.extractElementFeatures(element, context);
75
+ return this.calculateScore(features);
76
+ }
77
+ /**
78
+ * Update learned patterns based on successful crawl
79
+ */
80
+ learn(url, success, features) {
81
+ // Extract pattern from URL
82
+ const pattern = this.extractPattern(url);
83
+ // Update pattern score based on success
84
+ const currentScore = this.learnedPatterns.get(pattern) || 0;
85
+ const delta = success ? 0.1 : -0.05;
86
+ this.learnedPatterns.set(pattern, currentScore + delta);
87
+ log('Updated pattern score:', pattern, '->', this.learnedPatterns.get(pattern));
88
+ }
89
+ /**
90
+ * Extract features from URL
91
+ */
92
+ extractUrlFeatures(url, context) {
93
+ const features = new Map();
94
+ try {
95
+ const urlObj = new URL(url);
96
+ const path = urlObj.pathname;
97
+ // URL structure features
98
+ features.set('urlDepth', context.currentDepth);
99
+ features.set('urlLength', url.length / 100); // Normalize
100
+ features.set('hasNumbers', /\d/.test(path) ? 1 : 0);
101
+ features.set('hasHyphens', /-/.test(path) ? 1 : 0);
102
+ features.set('pathSegments', path.split('/').filter(Boolean).length);
103
+ features.set('queryParams', urlObj.searchParams.size);
104
+ // Pattern matching
105
+ features.set('contentPattern', this.matchesContentPattern(url) ? 1 : 0);
106
+ features.set('navPattern', this.matchesNavPattern(url) ? 1 : 0);
107
+ features.set('utilityPattern', this.matchesUtilityPattern(url) ? 1 : 0);
108
+ // Goal relevance
109
+ const relevance = this.calculateSemanticRelevance(url);
110
+ features.set('semanticRelevance', relevance);
111
+ features.set('hasKeywords', relevance > 0.5 ? 1 : 0);
112
+ // Context features
113
+ const parentPattern = context.parentUrl ? this.extractPattern(context.parentUrl) : '';
114
+ features.set('parentScore', this.learnedPatterns.get(parentPattern) || 0);
115
+ // Visited sibling counting
116
+ const urlPattern = this.extractPattern(url);
117
+ let visitedSiblings = 0;
118
+ for (const visitedUrl of context.visitedUrls) {
119
+ if (this.extractPattern(visitedUrl) === urlPattern)
120
+ visitedSiblings++;
121
+ }
122
+ features.set('visitedSiblings', visitedSiblings);
123
+ }
124
+ catch (error) {
125
+ log('Error extracting URL features:', error);
126
+ }
127
+ return features;
128
+ }
129
+ /**
130
+ * Extract features from element
131
+ */
132
+ extractElementFeatures(element, context) {
133
+ const features = new Map();
134
+ const text = element.text.toLowerCase();
135
+ // Text features
136
+ features.set('textLength', Math.min(element.text.length / 50, 1));
137
+ features.set('hasKeywords', this.containsKeywords(text) ? 1 : 0);
138
+ // Semantic relevance
139
+ features.set('semanticRelevance', this.calculateSemanticRelevance(text));
140
+ // DOM features
141
+ const selectorDepth = element.selector.split('>').length;
142
+ features.set('domDepth', selectorDepth);
143
+ // Element type features
144
+ const isActionButton = ['button', 'submit'].includes(element.tag);
145
+ features.set('isActionButton', isActionButton ? 1 : 0);
146
+ // Pattern matching
147
+ features.set('contentPattern', this.matchesContentPattern(text) ? 1 : 0);
148
+ features.set('navPattern', this.matchesNavPattern(text) ? 1 : 0);
149
+ features.set('utilityPattern', this.matchesUtilityPattern(text) ? 1 : 0);
150
+ return features;
151
+ }
152
+ /**
153
+ * Calculate final score from features
154
+ */
155
+ calculateScore(features) {
156
+ let score = 0;
157
+ for (const [feature, value] of features.entries()) {
158
+ const weight = this.weights.get(feature) || 0;
159
+ score += weight * value;
160
+ }
161
+ // Apply sigmoid to bound score between 0 and 1
162
+ return 1 / (1 + Math.exp(-score));
163
+ }
164
+ /**
165
+ * Calculate semantic relevance to goal
166
+ */
167
+ calculateSemanticRelevance(text) {
168
+ if (this.goalKeywords.size === 0)
169
+ return 0.5; // Neutral if no goal specified
170
+ const words = text.toLowerCase().split(/\W+/);
171
+ let matches = 0;
172
+ for (const word of words) {
173
+ if (this.goalKeywords.has(word))
174
+ matches++;
175
+ }
176
+ // Normalize by goal keyword count
177
+ return Math.min(matches / this.goalKeywords.size, 1.0);
178
+ }
179
+ /**
180
+ * Check if text contains goal keywords
181
+ */
182
+ containsKeywords(text) {
183
+ if (this.goalKeywords.size === 0)
184
+ return false;
185
+ const words = text.toLowerCase().split(/\W+/);
186
+ return words.some(word => this.goalKeywords.has(word));
187
+ }
188
+ /**
189
+ * Extract pattern from URL
190
+ */
191
+ extractPattern(url) {
192
+ try {
193
+ const urlObj = new URL(url);
194
+ // Pattern = domain + path structure (without specific IDs/numbers)
195
+ const pathPattern = urlObj.pathname.replace(/\d+/g, '*').replace(/\/[a-f0-9-]{36}/gi, '/*');
196
+ return `${urlObj.hostname}${pathPattern}`;
197
+ }
198
+ catch {
199
+ return url;
200
+ }
201
+ }
202
+ /**
203
+ * Check if URL/text matches content patterns
204
+ */
205
+ matchesContentPattern(text) {
206
+ const patterns = [
207
+ /article/i, /post/i, /blog/i, /news/i, /story/i,
208
+ /product/i, /item/i, /detail/i, /content/i, /page/i,
209
+ /documentation/i, /docs/i, /guide/i, /tutorial/i
210
+ ];
211
+ return patterns.some(p => p.test(text));
212
+ }
213
+ /**
214
+ * Check if URL/text matches navigation patterns
215
+ */
216
+ matchesNavPattern(text) {
217
+ const patterns = [
218
+ /category/i, /section/i, /menu/i, /nav/i,
219
+ /index/i, /list/i, /archive/i, /browse/i
220
+ ];
221
+ return patterns.some(p => p.test(text));
222
+ }
223
+ /**
224
+ * Check if URL/text matches utility patterns (usually low priority)
225
+ */
226
+ matchesUtilityPattern(text) {
227
+ const patterns = [
228
+ /login/i, /signin/i, /register/i, /signup/i,
229
+ /logout/i, /signout/i, /profile/i, /account/i,
230
+ /terms/i, /privacy/i, /legal/i, /cookie/i,
231
+ /contact/i, /about/i, /help/i, /faq/i
232
+ ];
233
+ return patterns.some(p => p.test(text));
234
+ }
235
+ /**
236
+ * Check if word is a stop word
237
+ */
238
+ isStopWord(word) {
239
+ const stopWords = new Set([
240
+ 'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for',
241
+ 'of', 'with', 'by', 'from', 'up', 'about', 'into', 'through', 'during'
242
+ ]);
243
+ return stopWords.has(word.toLowerCase());
244
+ }
245
+ /**
246
+ * Get learned patterns for debugging/export
247
+ */
248
+ getLearnedPatterns() {
249
+ return new Map(this.learnedPatterns);
250
+ }
251
+ /**
252
+ * Export scoring statistics
253
+ */
254
+ getStatistics() {
255
+ const scores = Array.from(this.learnedPatterns.values());
256
+ const avgScore = scores.length > 0
257
+ ? scores.reduce((a, b) => a + b, 0) / scores.length
258
+ : 0;
259
+ return {
260
+ goalKeywords: Array.from(this.goalKeywords),
261
+ learnedPatterns: this.learnedPatterns.size,
262
+ averagePatternScore: avgScore
263
+ };
264
+ }
265
+ }