musubi-sdd 2.2.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,265 @@
1
+ /**
2
+ * @fileoverview Browser Agent - Natural Language Browser Automation
3
+ * @module agents/browser
4
+ * @description Provides browser automation through natural language commands,
5
+ * screenshot capture and comparison, and E2E test generation.
6
+ */
7
+
8
+ const { chromium, firefox, webkit } = require('playwright');
9
+ const NLParser = require('./nl-parser');
10
+ const ActionExecutor = require('./action-executor');
11
+ const ContextManager = require('./context-manager');
12
+ const ScreenshotCapture = require('./screenshot');
13
+ const AIComparator = require('./ai-comparator');
14
+ const TestGenerator = require('./test-generator');
15
+
16
+ /**
17
+ * @typedef {Object} BrowserAgentOptions
18
+ * @property {boolean} [headless=true] - Run browser in headless mode
19
+ * @property {'chromium'|'firefox'|'webkit'} [browser='chromium'] - Browser type
20
+ * @property {string} [outputDir='./screenshots'] - Screenshot output directory
21
+ * @property {string} [visionModel='gpt-4-vision-preview'] - Vision AI model
22
+ * @property {number} [timeout=30000] - Default timeout in milliseconds
23
+ * @property {number} [threshold=0.95] - Screenshot comparison threshold
24
+ */
25
+
26
+ /**
27
+ * Browser Agent for natural language browser automation
28
+ */
29
+ class BrowserAgent {
30
+ /**
31
+ * Create a new BrowserAgent instance
32
+ * @param {BrowserAgentOptions} options - Configuration options
33
+ */
34
+ constructor(options = {}) {
35
+ this.options = {
36
+ headless: options.headless !== false,
37
+ browser: options.browser || 'chromium',
38
+ outputDir: options.outputDir || './screenshots',
39
+ visionModel: options.visionModel || 'gpt-4-vision-preview',
40
+ timeout: options.timeout || 30000,
41
+ threshold: options.threshold || 0.95,
42
+ };
43
+
44
+ this.parser = new NLParser();
45
+ this.executor = new ActionExecutor();
46
+ this.contextManager = new ContextManager();
47
+ this.screenshot = new ScreenshotCapture(this.options.outputDir);
48
+ this.comparator = new AIComparator({
49
+ model: this.options.visionModel,
50
+ threshold: this.options.threshold,
51
+ });
52
+ this.testGenerator = new TestGenerator();
53
+
54
+ this.browser = null;
55
+ this.isLaunched = false;
56
+ }
57
+
58
+ /**
59
+ * Launch the browser
60
+ * @returns {Promise<void>}
61
+ */
62
+ async launch() {
63
+ if (this.isLaunched) {
64
+ return;
65
+ }
66
+
67
+ const browserType = this.getBrowserType();
68
+ this.browser = await browserType.launch({
69
+ headless: this.options.headless,
70
+ });
71
+
72
+ await this.contextManager.initialize(this.browser);
73
+ this.isLaunched = true;
74
+ }
75
+
76
+ /**
77
+ * Get the Playwright browser type
78
+ * @returns {import('playwright').BrowserType}
79
+ * @private
80
+ */
81
+ getBrowserType() {
82
+ switch (this.options.browser) {
83
+ case 'firefox':
84
+ return firefox;
85
+ case 'webkit':
86
+ return webkit;
87
+ case 'chromium':
88
+ default:
89
+ return chromium;
90
+ }
91
+ }
92
+
93
+ /**
94
+ * Execute a natural language command
95
+ * @param {string} command - Natural language command
96
+ * @returns {Promise<import('./action-executor').ActionResult>}
97
+ */
98
+ async execute(command) {
99
+ if (!this.isLaunched) {
100
+ await this.launch();
101
+ }
102
+
103
+ // Parse natural language to actions
104
+ const parseResult = this.parser.parse(command);
105
+
106
+ if (!parseResult.success) {
107
+ return {
108
+ success: false,
109
+ error: parseResult.error,
110
+ command,
111
+ };
112
+ }
113
+
114
+ // Get current page context
115
+ const page = await this.contextManager.getOrCreatePage();
116
+
117
+ // Execute each action
118
+ const results = [];
119
+ for (const action of parseResult.actions) {
120
+ const result = await this.executor.execute(action, {
121
+ page,
122
+ screenshot: this.screenshot,
123
+ timeout: this.options.timeout,
124
+ });
125
+
126
+ results.push(result);
127
+
128
+ if (!result.success) {
129
+ return {
130
+ success: false,
131
+ error: result.error,
132
+ action,
133
+ results,
134
+ };
135
+ }
136
+ }
137
+
138
+ return {
139
+ success: true,
140
+ command,
141
+ actions: parseResult.actions,
142
+ results,
143
+ };
144
+ }
145
+
146
+ /**
147
+ * Execute multiple commands in sequence
148
+ * @param {string[]} commands - Array of natural language commands
149
+ * @returns {Promise<Object>}
150
+ */
151
+ async executeSequence(commands) {
152
+ const results = [];
153
+
154
+ for (const command of commands) {
155
+ const result = await this.execute(command);
156
+ results.push(result);
157
+
158
+ if (!result.success) {
159
+ return {
160
+ success: false,
161
+ error: result.error,
162
+ completedCommands: results.length - 1,
163
+ results,
164
+ };
165
+ }
166
+ }
167
+
168
+ return {
169
+ success: true,
170
+ totalCommands: commands.length,
171
+ results,
172
+ };
173
+ }
174
+
175
+ /**
176
+ * Take a screenshot of the current page
177
+ * @param {Object} options - Screenshot options
178
+ * @param {string} [options.name] - Screenshot name
179
+ * @param {boolean} [options.fullPage=false] - Capture full page
180
+ * @returns {Promise<string>} Screenshot file path
181
+ */
182
+ async takeScreenshot(options = {}) {
183
+ if (!this.isLaunched) {
184
+ throw new Error('Browser not launched. Call launch() first.');
185
+ }
186
+
187
+ const page = await this.contextManager.getOrCreatePage();
188
+ return this.screenshot.capture(page, options);
189
+ }
190
+
191
+ /**
192
+ * Compare two screenshots using AI
193
+ * @param {string} expected - Path to expected screenshot
194
+ * @param {string} actual - Path to actual screenshot
195
+ * @param {Object} options - Comparison options
196
+ * @param {number} [options.threshold] - Similarity threshold
197
+ * @param {string} [options.description] - What to verify
198
+ * @returns {Promise<import('./ai-comparator').ComparisonResult>}
199
+ */
200
+ async compare(expected, actual, options = {}) {
201
+ return this.comparator.compare(expected, actual, {
202
+ threshold: options.threshold || this.options.threshold,
203
+ description: options.description,
204
+ });
205
+ }
206
+
207
+ /**
208
+ * Generate Playwright test code from action history
209
+ * @param {Object} options - Generation options
210
+ * @param {string} [options.name] - Test name
211
+ * @param {string} [options.output] - Output file path
212
+ * @returns {Promise<string>} Generated test code
213
+ */
214
+ async generateTest(options = {}) {
215
+ const history = this.contextManager.getActionHistory();
216
+ return this.testGenerator.generateTest(history, options);
217
+ }
218
+
219
+ /**
220
+ * Get the current page
221
+ * @returns {Promise<import('playwright').Page>}
222
+ */
223
+ async getPage() {
224
+ if (!this.isLaunched) {
225
+ await this.launch();
226
+ }
227
+ return this.contextManager.getOrCreatePage();
228
+ }
229
+
230
+ /**
231
+ * Get action history
232
+ * @returns {Array<import('./action-executor').Action>}
233
+ */
234
+ getActionHistory() {
235
+ return this.contextManager.getActionHistory();
236
+ }
237
+
238
+ /**
239
+ * Clear action history
240
+ */
241
+ clearHistory() {
242
+ this.contextManager.clearHistory();
243
+ }
244
+
245
+ /**
246
+ * Close the browser
247
+ * @returns {Promise<void>}
248
+ */
249
+ async close() {
250
+ if (this.browser) {
251
+ await this.browser.close();
252
+ this.browser = null;
253
+ this.isLaunched = false;
254
+ this.contextManager.reset();
255
+ }
256
+ }
257
+ }
258
+
259
+ module.exports = BrowserAgent;
260
+ module.exports.NLParser = NLParser;
261
+ module.exports.ActionExecutor = ActionExecutor;
262
+ module.exports.ContextManager = ContextManager;
263
+ module.exports.ScreenshotCapture = ScreenshotCapture;
264
+ module.exports.AIComparator = AIComparator;
265
+ module.exports.TestGenerator = TestGenerator;
@@ -0,0 +1,408 @@
1
+ /**
2
+ * @fileoverview Natural Language Parser for Browser Commands
3
+ * @module agents/browser/nl-parser
4
+ */
5
+
6
+ /**
7
+ * @typedef {Object} Action
8
+ * @property {string} type - Action type
9
+ * @property {string} [url] - URL for navigate actions
10
+ * @property {string} [selector] - CSS selector for element actions
11
+ * @property {string} [value] - Value for fill/select actions
12
+ * @property {number} [delay] - Delay in milliseconds for wait actions
13
+ * @property {string} [name] - Name for screenshot actions
14
+ * @property {boolean} [fullPage] - Full page screenshot flag
15
+ */
16
+
17
+ /**
18
+ * @typedef {Object} ParseResult
19
+ * @property {boolean} success - Whether parsing succeeded
20
+ * @property {Action[]} actions - Parsed actions
21
+ * @property {string} [error] - Error message if failed
22
+ */
23
+
24
+ /**
25
+ * Action patterns for Japanese and English
26
+ */
27
+ const ACTION_PATTERNS = {
28
+ navigate: {
29
+ patterns: [
30
+ /(?:に)?移動|開く|アクセス/,
31
+ /(?:go to|navigate|open|visit)/i,
32
+ ],
33
+ urlPattern: /(https?:\/\/[^\s]+)/,
34
+ },
35
+ click: {
36
+ patterns: [
37
+ /クリック|押す|タップ|選択/,
38
+ /click|press|tap|select/i,
39
+ ],
40
+ },
41
+ fill: {
42
+ patterns: [
43
+ /(?:に|を)?入力|記入|タイプ/,
44
+ /fill|type|enter|input/i,
45
+ ],
46
+ valuePattern: /[「「]([^」」]+)[」」]|"([^"]+)"|'([^']+)'/,
47
+ },
48
+ select: {
49
+ patterns: [
50
+ /ドロップダウン.*選択|選択.*オプション/,
51
+ /select.*dropdown|choose.*option/i,
52
+ ],
53
+ },
54
+ wait: {
55
+ patterns: [
56
+ /秒?待つ|待機/,
57
+ /wait|pause|delay/i,
58
+ ],
59
+ durationPattern: /(\d+)\s*秒|(\d+)\s*(?:seconds?|ms|milliseconds?)/i,
60
+ },
61
+ screenshot: {
62
+ patterns: [
63
+ /スクリーンショット|画面.*(?:保存|撮|キャプチャ)/,
64
+ /screenshot|capture|save.*screen/i,
65
+ ],
66
+ namePattern: /[「「]([^」」]+)[」」]|"([^"]+)"|として\s*(\S+)/,
67
+ },
68
+ assert: {
69
+ patterns: [
70
+ /(?:が)?表示|確認|検証|存在/,
71
+ /(?:is )?visible|assert|verify|check|exists?/i,
72
+ ],
73
+ textPattern: /[「「]([^」」]+)[」」]|"([^"]+)"/,
74
+ },
75
+ };
76
+
77
+ /**
78
+ * Common element selector patterns
79
+ */
80
+ const ELEMENT_PATTERNS = {
81
+ // Japanese element names
82
+ 'ログインボタン': 'button:has-text("ログイン"), [data-testid="login-button"], button[type="submit"]',
83
+ '送信ボタン': 'button:has-text("送信"), [data-testid="submit-button"], button[type="submit"]',
84
+ 'メール': 'input[type="email"], input[name="email"], [data-testid="email-input"]',
85
+ 'パスワード': 'input[type="password"], [data-testid="password-input"]',
86
+ '検索': 'input[type="search"], [data-testid="search-input"], input[name="q"]',
87
+ // English element names
88
+ 'login button': 'button:has-text("Login"), [data-testid="login-button"], button[type="submit"]',
89
+ 'submit button': 'button:has-text("Submit"), [data-testid="submit-button"], button[type="submit"]',
90
+ 'email': 'input[type="email"], input[name="email"], [data-testid="email-input"]',
91
+ 'password': 'input[type="password"], [data-testid="password-input"]',
92
+ 'search': 'input[type="search"], [data-testid="search-input"], input[name="q"]',
93
+ };
94
+
95
+ /**
96
+ * Natural Language Parser for browser commands
97
+ */
98
+ class NLParser {
99
+ constructor() {
100
+ this.actionPatterns = ACTION_PATTERNS;
101
+ this.elementPatterns = ELEMENT_PATTERNS;
102
+ }
103
+
104
+ /**
105
+ * Parse a natural language command into actions
106
+ * @param {string} command - Natural language command
107
+ * @returns {ParseResult}
108
+ */
109
+ parse(command) {
110
+ try {
111
+ const normalizedCommand = this.normalizeCommand(command);
112
+ const actions = this.extractActions(normalizedCommand);
113
+
114
+ if (actions.length === 0) {
115
+ return {
116
+ success: false,
117
+ actions: [],
118
+ error: `Could not understand command: "${command}"`,
119
+ };
120
+ }
121
+
122
+ return {
123
+ success: true,
124
+ actions,
125
+ };
126
+ } catch (error) {
127
+ return {
128
+ success: false,
129
+ actions: [],
130
+ error: error.message,
131
+ };
132
+ }
133
+ }
134
+
135
+ /**
136
+ * Normalize a command for parsing
137
+ * @param {string} command
138
+ * @returns {string}
139
+ */
140
+ normalizeCommand(command) {
141
+ return command
142
+ .trim()
143
+ .replace(/\s+/g, ' ')
144
+ .replace(/、/g, ',')
145
+ .replace(/。/g, '.');
146
+ }
147
+
148
+ /**
149
+ * Extract actions from a normalized command
150
+ * @param {string} command
151
+ * @returns {Action[]}
152
+ */
153
+ extractActions(command) {
154
+ const actions = [];
155
+ const parts = this.splitCommand(command);
156
+
157
+ for (const part of parts) {
158
+ const action = this.parseAction(part);
159
+ if (action) {
160
+ actions.push(action);
161
+ }
162
+ }
163
+
164
+ return actions;
165
+ }
166
+
167
+ /**
168
+ * Split command into individual action parts
169
+ * @param {string} command
170
+ * @returns {string[]}
171
+ */
172
+ splitCommand(command) {
173
+ // Split by conjunctions and separators
174
+ const separators = /[,、]|\s+(?:そして|して|and|then)\s+/i;
175
+ return command.split(separators).map(s => s.trim()).filter(Boolean);
176
+ }
177
+
178
+ /**
179
+ * Parse a single action from text
180
+ * @param {string} text
181
+ * @returns {Action|null}
182
+ */
183
+ parseAction(text) {
184
+ // Try each action type
185
+ if (this.matchesPattern(text, this.actionPatterns.navigate.patterns)) {
186
+ return this.parseNavigate(text);
187
+ }
188
+
189
+ if (this.matchesPattern(text, this.actionPatterns.screenshot.patterns)) {
190
+ return this.parseScreenshot(text);
191
+ }
192
+
193
+ if (this.matchesPattern(text, this.actionPatterns.wait.patterns)) {
194
+ return this.parseWait(text);
195
+ }
196
+
197
+ if (this.matchesPattern(text, this.actionPatterns.fill.patterns)) {
198
+ return this.parseFill(text);
199
+ }
200
+
201
+ if (this.matchesPattern(text, this.actionPatterns.select.patterns)) {
202
+ return this.parseSelect(text);
203
+ }
204
+
205
+ if (this.matchesPattern(text, this.actionPatterns.click.patterns)) {
206
+ return this.parseClick(text);
207
+ }
208
+
209
+ if (this.matchesPattern(text, this.actionPatterns.assert.patterns)) {
210
+ return this.parseAssert(text);
211
+ }
212
+
213
+ return null;
214
+ }
215
+
216
+ /**
217
+ * Check if text matches any of the patterns
218
+ * @param {string} text
219
+ * @param {RegExp[]} patterns
220
+ * @returns {boolean}
221
+ */
222
+ matchesPattern(text, patterns) {
223
+ return patterns.some(pattern => pattern.test(text));
224
+ }
225
+
226
+ /**
227
+ * Parse navigate action
228
+ * @param {string} text
229
+ * @returns {Action}
230
+ */
231
+ parseNavigate(text) {
232
+ const urlMatch = text.match(this.actionPatterns.navigate.urlPattern);
233
+ const url = urlMatch ? urlMatch[1] : null;
234
+
235
+ return {
236
+ type: 'navigate',
237
+ url: url || 'about:blank',
238
+ raw: text,
239
+ };
240
+ }
241
+
242
+ /**
243
+ * Parse click action
244
+ * @param {string} text
245
+ * @returns {Action}
246
+ */
247
+ parseClick(text) {
248
+ const selector = this.extractSelector(text);
249
+
250
+ return {
251
+ type: 'click',
252
+ selector,
253
+ raw: text,
254
+ };
255
+ }
256
+
257
+ /**
258
+ * Parse fill action
259
+ * @param {string} text
260
+ * @returns {Action}
261
+ */
262
+ parseFill(text) {
263
+ const selector = this.extractSelector(text);
264
+ const valueMatch = text.match(this.actionPatterns.fill.valuePattern);
265
+ const value = valueMatch ? (valueMatch[1] || valueMatch[2] || valueMatch[3]) : '';
266
+
267
+ return {
268
+ type: 'fill',
269
+ selector,
270
+ value,
271
+ raw: text,
272
+ };
273
+ }
274
+
275
+ /**
276
+ * Parse select action
277
+ * @param {string} text
278
+ * @returns {Action}
279
+ */
280
+ parseSelect(text) {
281
+ const selector = this.extractSelector(text);
282
+ const valueMatch = text.match(this.actionPatterns.fill.valuePattern);
283
+ const value = valueMatch ? (valueMatch[1] || valueMatch[2] || valueMatch[3]) : '';
284
+
285
+ return {
286
+ type: 'select',
287
+ selector,
288
+ value,
289
+ raw: text,
290
+ };
291
+ }
292
+
293
+ /**
294
+ * Parse wait action
295
+ * @param {string} text
296
+ * @returns {Action}
297
+ */
298
+ parseWait(text) {
299
+ const durationMatch = text.match(this.actionPatterns.wait.durationPattern);
300
+ let delay = 1000; // Default 1 second
301
+
302
+ if (durationMatch) {
303
+ const seconds = durationMatch[1] || durationMatch[2];
304
+ delay = parseInt(seconds, 10) * 1000;
305
+
306
+ // Check if it's milliseconds
307
+ if (/ms|milliseconds?/i.test(text)) {
308
+ delay = parseInt(seconds, 10);
309
+ }
310
+ }
311
+
312
+ return {
313
+ type: 'wait',
314
+ delay,
315
+ raw: text,
316
+ };
317
+ }
318
+
319
+ /**
320
+ * Parse screenshot action
321
+ * @param {string} text
322
+ * @returns {Action}
323
+ */
324
+ parseScreenshot(text) {
325
+ const nameMatch = text.match(this.actionPatterns.screenshot.namePattern);
326
+ const name = nameMatch ? (nameMatch[1] || nameMatch[2] || nameMatch[3]) : undefined;
327
+ const fullPage = /全体|full\s*page/i.test(text);
328
+
329
+ return {
330
+ type: 'screenshot',
331
+ name,
332
+ fullPage,
333
+ raw: text,
334
+ };
335
+ }
336
+
337
+ /**
338
+ * Parse assert action
339
+ * @param {string} text
340
+ * @returns {Action}
341
+ */
342
+ parseAssert(text) {
343
+ const textMatch = text.match(this.actionPatterns.assert.textPattern);
344
+ const expectedText = textMatch ? (textMatch[1] || textMatch[2]) : null;
345
+ const selector = expectedText ? `text="${expectedText}"` : this.extractSelector(text);
346
+
347
+ return {
348
+ type: 'assert',
349
+ selector,
350
+ expectedText,
351
+ raw: text,
352
+ };
353
+ }
354
+
355
+ /**
356
+ * Extract a CSS selector from text
357
+ * @param {string} text
358
+ * @returns {string}
359
+ */
360
+ extractSelector(text) {
361
+ // Check for known element patterns
362
+ for (const [name, selector] of Object.entries(this.elementPatterns)) {
363
+ if (text.toLowerCase().includes(name.toLowerCase())) {
364
+ return selector;
365
+ }
366
+ }
367
+
368
+ // Check for CSS selector in the text (match only the selector part)
369
+ const selectorMatch = text.match(/([#\.][a-zA-Z][\w\-]*|\[[^\]]+\])/);
370
+ if (selectorMatch) {
371
+ return selectorMatch[0];
372
+ }
373
+
374
+ // Check for data-testid
375
+ const testIdMatch = text.match(/data-testid[=:]["']?([^"'\s]+)/i);
376
+ if (testIdMatch) {
377
+ return `[data-testid="${testIdMatch[1]}"]`;
378
+ }
379
+
380
+ // Try to extract element description
381
+ const elementMatch = text.match(/(?:の)?(?:ボタン|リンク|入力欄?|フィールド|テキスト|button|link|input|field|text)\s*[「「]?([^」」\s]*)[」」]?/i);
382
+ if (elementMatch && elementMatch[1]) {
383
+ return `text="${elementMatch[1]}"`;
384
+ }
385
+
386
+ // Default to a generic selector
387
+ return '*';
388
+ }
389
+
390
+ /**
391
+ * Add a custom element pattern
392
+ * @param {string} name - Element name
393
+ * @param {string} selector - CSS selector
394
+ */
395
+ addElementPattern(name, selector) {
396
+ this.elementPatterns[name] = selector;
397
+ }
398
+
399
+ /**
400
+ * Get all supported action types
401
+ * @returns {string[]}
402
+ */
403
+ getSupportedActions() {
404
+ return Object.keys(this.actionPatterns);
405
+ }
406
+ }
407
+
408
+ module.exports = NLParser;