camel-ai 0.2.76a0__py3-none-any.whl → 0.2.76a2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of camel-ai might be problematic. Click here for more details.

Files changed (34) hide show
  1. camel/__init__.py +1 -1
  2. camel/agents/chat_agent.py +8 -1
  3. camel/environments/tic_tac_toe.py +1 -1
  4. camel/memories/__init__.py +2 -1
  5. camel/memories/agent_memories.py +3 -1
  6. camel/memories/blocks/chat_history_block.py +17 -2
  7. camel/models/base_model.py +30 -0
  8. camel/societies/workforce/single_agent_worker.py +44 -38
  9. camel/societies/workforce/workforce.py +10 -1
  10. camel/storages/object_storages/google_cloud.py +1 -1
  11. camel/toolkits/__init__.py +9 -2
  12. camel/toolkits/aci_toolkit.py +45 -0
  13. camel/toolkits/context_summarizer_toolkit.py +683 -0
  14. camel/toolkits/{file_write_toolkit.py → file_toolkit.py} +194 -34
  15. camel/toolkits/hybrid_browser_toolkit/config_loader.py +4 -0
  16. camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py +67 -2
  17. camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit_ts.py +62 -45
  18. camel/toolkits/hybrid_browser_toolkit/ts/src/browser-session.ts +489 -60
  19. camel/toolkits/hybrid_browser_toolkit/ts/src/config-loader.ts +5 -2
  20. camel/toolkits/hybrid_browser_toolkit/ts/src/hybrid-browser-toolkit.ts +72 -12
  21. camel/toolkits/hybrid_browser_toolkit/ts/src/snapshot-parser.ts +2 -14
  22. camel/toolkits/hybrid_browser_toolkit/ts/src/types.ts +1 -0
  23. camel/toolkits/hybrid_browser_toolkit/ws_wrapper.py +228 -62
  24. camel/toolkits/hybrid_browser_toolkit_py/hybrid_browser_toolkit.py +4 -4
  25. camel/toolkits/markitdown_toolkit.py +27 -1
  26. camel/toolkits/note_taking_toolkit.py +18 -8
  27. camel/toolkits/slack_toolkit.py +50 -1
  28. camel/toolkits/vertex_ai_veo_toolkit.py +590 -0
  29. camel/toolkits/wechat_official_toolkit.py +483 -0
  30. camel/utils/context_utils.py +395 -0
  31. {camel_ai-0.2.76a0.dist-info → camel_ai-0.2.76a2.dist-info}/METADATA +84 -6
  32. {camel_ai-0.2.76a0.dist-info → camel_ai-0.2.76a2.dist-info}/RECORD +34 -30
  33. {camel_ai-0.2.76a0.dist-info → camel_ai-0.2.76a2.dist-info}/WHEEL +0 -0
  34. {camel_ai-0.2.76a0.dist-info → camel_ai-0.2.76a2.dist-info}/licenses/LICENSE +0 -0
@@ -1,10 +1,11 @@
1
- import { Page, Browser, BrowserContext, chromium, ConsoleMessage } from 'playwright';
1
+ import { Page, Browser, BrowserContext, chromium, ConsoleMessage, Frame } from 'playwright';
2
2
  import { BrowserToolkitConfig, SnapshotResult, SnapshotElement, ActionResult, TabInfo, BrowserAction, DetailedTiming } from './types';
3
3
  import { ConfigLoader, StealthConfig } from './config-loader';
4
4
 
5
5
  export class HybridBrowserSession {
6
6
  private browser: Browser | null = null;
7
7
  private context: BrowserContext | null = null;
8
+ private contextOwnedByUs: boolean = false;
8
9
  private pages: Map<string, Page> = new Map();
9
10
  private consoleLogs: Map<string, ConsoleMessage[]> = new Map();
10
11
  private currentTabId: string | null = null;
@@ -50,8 +51,8 @@ export class HybridBrowserSession {
50
51
  const browserConfig = this.configLoader.getBrowserConfig();
51
52
  const stealthConfig = this.configLoader.getStealthConfig();
52
53
 
53
- // Check if CDP connection is requested
54
- if (browserConfig.connectOverCdp && browserConfig.cdpUrl) {
54
+ // Check if CDP URL is provided
55
+ if (browserConfig.cdpUrl) {
55
56
  // Connect to existing browser via CDP
56
57
  this.browser = await chromium.connectOverCDP(browserConfig.cdpUrl);
57
58
 
@@ -59,6 +60,7 @@ export class HybridBrowserSession {
59
60
  const contexts = this.browser.contexts();
60
61
  if (contexts.length > 0) {
61
62
  this.context = contexts[0];
63
+ this.contextOwnedByUs = false;
62
64
 
63
65
  // Apply stealth headers to existing context if configured
64
66
  // Note: userAgent cannot be changed on an existing context
@@ -86,31 +88,66 @@ export class HybridBrowserSession {
86
88
  }
87
89
 
88
90
  this.context = await this.browser.newContext(contextOptions);
91
+ this.contextOwnedByUs = true;
92
+ this.browser = this.context.browser();
89
93
  }
90
94
 
91
95
  const pages = this.context.pages();
92
- if (pages.length > 0) {
93
- // Map existing pages - for CDP, find ONE available blank page
94
- let availablePageFound = false;
95
- for (const page of pages) {
96
- const pageUrl = page.url();
97
- if (this.isBlankPageUrl(pageUrl)) {
96
+ console.log(`[CDP] cdpKeepCurrentPage: ${browserConfig.cdpKeepCurrentPage}, pages count: ${pages.length}`);
97
+ if (browserConfig.cdpKeepCurrentPage) {
98
+ // Use existing page without creating new ones
99
+ if (pages.length > 0) {
100
+ // Find first non-closed page
101
+ let validPage: Page | null = null;
102
+ for (const page of pages) {
103
+ if (!page.isClosed()) {
104
+ validPage = page;
105
+ break;
106
+ }
107
+ }
108
+
109
+ if (validPage) {
98
110
  const tabId = this.generateTabId();
99
- this.registerNewPage(tabId, page);
111
+ this.registerNewPage(tabId, validPage);
100
112
  this.currentTabId = tabId;
101
- availablePageFound = true;
102
- console.log(`[CDP] Registered blank page as initial tab: ${tabId}, URL: ${pageUrl}`);
103
- break; // Only register ONE page initially
113
+ console.log(`[CDP] cdpKeepCurrentPage mode: using existing page as initial tab: ${tabId}, URL: ${validPage.url()}`);
114
+ } else {
115
+ throw new Error('No active pages available in CDP mode with cdpKeepCurrentPage=true (all pages are closed)');
104
116
  }
105
- }
106
-
107
- // If no available blank pages found in CDP mode, we cannot create new ones
108
- if (!availablePageFound) {
109
- throw new Error('No available blank tabs found in CDP mode. The frontend should have pre-created blank tabs.');
117
+ } else {
118
+ throw new Error('No pages available in CDP mode with cdpKeepCurrentPage=true');
110
119
  }
111
120
  } else {
112
- // In CDP mode, newPage is not supported
113
- throw new Error('No pages available in CDP mode and newPage() is not supported. Ensure the frontend has pre-created blank tabs.');
121
+ // Look for blank pages or create new ones
122
+ if (pages.length > 0) {
123
+ // Find one available blank page
124
+ let availablePageFound = false;
125
+ for (const page of pages) {
126
+ const pageUrl = page.url();
127
+ if (this.isBlankPageUrl(pageUrl)) {
128
+ const tabId = this.generateTabId();
129
+ this.registerNewPage(tabId, page);
130
+ this.currentTabId = tabId;
131
+ availablePageFound = true;
132
+ console.log(`[CDP] Registered blank page as initial tab: ${tabId}, URL: ${pageUrl}`);
133
+ break;
134
+ }
135
+ }
136
+
137
+ if (!availablePageFound) {
138
+ console.log('[CDP] No blank pages found, creating new page');
139
+ const newPage = await this.context.newPage();
140
+ const tabId = this.generateTabId();
141
+ this.registerNewPage(tabId, newPage);
142
+ this.currentTabId = tabId;
143
+ }
144
+ } else {
145
+ console.log('[CDP] No existing pages, creating initial page');
146
+ const newPage = await this.context.newPage();
147
+ const tabId = this.generateTabId();
148
+ this.registerNewPage(tabId, newPage);
149
+ this.currentTabId = tabId;
150
+ }
114
151
  }
115
152
  } else {
116
153
  // Original launch logic
@@ -137,7 +174,8 @@ export class HybridBrowserSession {
137
174
  browserConfig.userDataDir,
138
175
  launchOptions
139
176
  );
140
-
177
+ this.contextOwnedByUs = true;
178
+ this.browser = this.context.browser();
141
179
  const pages = this.context.pages();
142
180
  if (pages.length > 0) {
143
181
  const initialTabId = this.generateTabId();
@@ -161,6 +199,7 @@ export class HybridBrowserSession {
161
199
  }
162
200
 
163
201
  this.context = await this.browser.newContext(contextOptions);
202
+ this.contextOwnedByUs = true;
164
203
 
165
204
  const initialPage = await this.context.newPage();
166
205
  const initialTabId = this.generateTabId();
@@ -199,12 +238,57 @@ export class HybridBrowserSession {
199
238
 
200
239
  async getCurrentPage(): Promise<Page> {
201
240
  if (!this.currentTabId || !this.pages.has(this.currentTabId)) {
202
- // In CDP mode, we cannot create new pages
203
241
  const browserConfig = this.configLoader.getBrowserConfig();
204
- if (browserConfig.connectOverCdp) {
205
- throw new Error('No active page available in CDP mode; frontend must pre-create blank tabs.');
242
+
243
+ // In CDP keep-current-page mode, find existing page
244
+ if (browserConfig.cdpKeepCurrentPage && browserConfig.cdpUrl && this.context) {
245
+ const allPages = this.context.pages();
246
+ console.log(`[getCurrentPage] cdpKeepCurrentPage mode: Looking for existing page, found ${allPages.length} pages`);
247
+
248
+ if (allPages.length > 0) {
249
+ // Try to find a page that's not already tracked
250
+ for (const page of allPages) {
251
+ const isTracked = Array.from(this.pages.values()).includes(page);
252
+ if (!isTracked && !page.isClosed()) {
253
+ const tabId = this.generateTabId();
254
+ this.registerNewPage(tabId, page);
255
+ this.currentTabId = tabId;
256
+ console.log(`[getCurrentPage] cdpKeepCurrentPage mode: Found and registered untracked page: ${tabId}`);
257
+ return page;
258
+ }
259
+ }
260
+
261
+ // If all pages are tracked, use the first available one
262
+ const firstPage = allPages[0];
263
+ if (!firstPage.isClosed()) {
264
+ // Find the tab ID for this page
265
+ for (const [tabId, page] of this.pages.entries()) {
266
+ if (page === firstPage) {
267
+ this.currentTabId = tabId;
268
+ console.log(`[getCurrentPage] cdpKeepCurrentPage mode: Using existing tracked page: ${tabId}`);
269
+ return page;
270
+ }
271
+ }
272
+ }
273
+ }
274
+
275
+ throw new Error('No active page available in CDP mode with cdpKeepCurrentPage=true');
276
+ }
277
+
278
+ // Normal mode: create new page
279
+ if (this.context) {
280
+ console.log('[getCurrentPage] No active page, creating new page');
281
+ const newPage = await this.context.newPage();
282
+ const tabId = this.generateTabId();
283
+ this.registerNewPage(tabId, newPage);
284
+ this.currentTabId = tabId;
285
+
286
+ newPage.setDefaultNavigationTimeout(browserConfig.navigationTimeout);
287
+ newPage.setDefaultTimeout(browserConfig.navigationTimeout);
288
+
289
+ return newPage;
206
290
  }
207
- throw new Error('No active page available');
291
+ throw new Error('No browser context available');
208
292
  }
209
293
  return this.pages.get(this.currentTabId)!;
210
294
  }
@@ -396,7 +480,7 @@ export class HybridBrowserSession {
396
480
  /**
397
481
  * Enhanced click implementation with new tab detection and scroll fix
398
482
  */
399
- private async performClick(page: Page, ref: string): Promise<{ success: boolean; method?: string; error?: string; newTabId?: string }> {
483
+ private async performClick(page: Page, ref: string): Promise<{ success: boolean; method?: string; error?: string; newTabId?: string; diffSnapshot?: string }> {
400
484
 
401
485
  try {
402
486
  // Ensure we have the latest snapshot and mapping
@@ -413,6 +497,17 @@ export class HybridBrowserSession {
413
497
  return { success: false, error: `Element with ref ${ref} not found` };
414
498
  }
415
499
 
500
+ const role = await element.getAttribute('role');
501
+ const elementTagName = await element.evaluate(el => el.tagName.toLowerCase());
502
+ const isCombobox = role === 'combobox' || elementTagName === 'combobox';
503
+ const isTextbox = role === 'textbox' || elementTagName === 'input' || elementTagName === 'textarea';
504
+ const shouldCheckDiff = isCombobox || isTextbox;
505
+
506
+ let snapshotBefore: string | null = null;
507
+ if (shouldCheckDiff) {
508
+ snapshotBefore = await (page as any)._snapshotForAI();
509
+ }
510
+
416
511
  // Check element properties
417
512
  const browserConfig = this.configLoader.getBrowserConfig();
418
513
  const target = await element.getAttribute(browserConfig.targetAttribute);
@@ -482,13 +577,17 @@ export class HybridBrowserSession {
482
577
  }
483
578
  } else {
484
579
  // Add options to prevent scrolling issues
485
- try {
486
- // First try normal click
487
- const browserConfig = this.configLoader.getBrowserConfig();
488
- await element.click({ timeout: browserConfig.clickTimeout });
489
- } catch (clickError) {
490
- // If normal click fails due to scrolling, try force click
491
- await element.click({ force: browserConfig.forceClick });
580
+ const browserConfig = this.configLoader.getBrowserConfig();
581
+ await element.click({ force: browserConfig.forceClick });
582
+
583
+ if (shouldCheckDiff && snapshotBefore) {
584
+ await page.waitForTimeout(300);
585
+ const snapshotAfter = await (page as any)._snapshotForAI();
586
+ const diffSnapshot = this.getSnapshotDiff(snapshotBefore, snapshotAfter, ['option', 'menuitem']);
587
+
588
+ if (diffSnapshot && diffSnapshot.trim() !== '') {
589
+ return { success: true, method: 'playwright-aria-ref', diffSnapshot };
590
+ }
492
591
  }
493
592
 
494
593
  return { success: true, method: 'playwright-aria-ref' };
@@ -500,11 +599,46 @@ export class HybridBrowserSession {
500
599
  }
501
600
  }
502
601
 
602
+ /**
603
+ * Extract diff between two snapshots, returning only new elements of specified types
604
+ */
605
+ private getSnapshotDiff(snapshotBefore: string, snapshotAfter: string, targetRoles: string[]): string {
606
+ const refsBefore = new Set<string>();
607
+ const refPattern = /\[ref=([^\]]+)\]/g;
608
+ let match;
609
+ while ((match = refPattern.exec(snapshotBefore)) !== null) {
610
+ refsBefore.add(match[1]);
611
+ }
612
+
613
+ const lines = snapshotAfter.split('\n');
614
+ const newElements: string[] = [];
615
+
616
+ for (const line of lines) {
617
+ const refMatch = line.match(/\[ref=([^\]]+)\]/);
618
+ if (refMatch && !refsBefore.has(refMatch[1])) {
619
+ const hasTargetRole = targetRoles.some(role => {
620
+ const rolePattern = new RegExp(`\\b${role}\\b`, 'i');
621
+ return rolePattern.test(line);
622
+ });
623
+
624
+ if (hasTargetRole) {
625
+ newElements.push(line.trim());
626
+ }
627
+ }
628
+ }
629
+
630
+ if (newElements.length > 0) {
631
+ return newElements.join('\n');
632
+ } else {
633
+ return '';
634
+ }
635
+ }
636
+
503
637
  /**
504
638
  * Simplified type implementation using Playwright's aria-ref selector
505
639
  * Supports both single and multiple input operations
506
640
  */
507
- private async performType(page: Page, ref: string | undefined, text: string | undefined, inputs?: Array<{ ref: string; text: string }>): Promise<{ success: boolean; error?: string; details?: Record<string, any> }> {
641
+ private async performType(page: Page, ref: string | undefined, text: string | undefined, inputs?: Array<{ ref: string; text: string }>): Promise<{ success: boolean; error?: string; details?: Record<string, any>; diffSnapshot?: string }> {
508
642
  try {
509
643
  // Ensure we have the latest snapshot
510
644
  await (page as any)._snapshotForAI();
@@ -514,22 +648,11 @@ export class HybridBrowserSession {
514
648
  const results: Record<string, { success: boolean; error?: string }> = {};
515
649
 
516
650
  for (const input of inputs) {
517
- const selector = `aria-ref=${input.ref}`;
518
- const element = await page.locator(selector).first();
519
-
520
- const exists = await element.count() > 0;
521
- if (!exists) {
522
- results[input.ref] = { success: false, error: `Element with ref ${input.ref} not found` };
523
- continue;
524
- }
525
-
526
- try {
527
- // Type text using Playwright's built-in fill method
528
- await element.fill(input.text);
529
- results[input.ref] = { success: true };
530
- } catch (error) {
531
- results[input.ref] = { success: false, error: `Type failed: ${error}` };
532
- }
651
+ const singleResult = await this.performType(page, input.ref, input.text);
652
+ results[input.ref] = {
653
+ success: singleResult.success,
654
+ error: singleResult.error
655
+ };
533
656
  }
534
657
 
535
658
  // Check if all inputs were successful
@@ -556,10 +679,292 @@ export class HybridBrowserSession {
556
679
  return { success: false, error: `Element with ref ${ref} not found` };
557
680
  }
558
681
 
559
- // Type text using Playwright's built-in fill method
560
- await element.fill(text);
682
+ // Get element attributes to check if it's readonly or a special input type
683
+ let originalPlaceholder: string | null = null;
684
+ let isReadonly = false;
685
+ let elementType: string | null = null;
686
+ let isCombobox = false;
687
+ let isTextbox = false;
688
+ let shouldCheckDiff = false;
689
+
690
+ try {
691
+ // Get element info in one evaluation to minimize interactions
692
+ const elementInfo = await element.evaluate((el: any) => {
693
+ return {
694
+ placeholder: el.placeholder || null,
695
+ readonly: el.readOnly || el.hasAttribute('readonly'),
696
+ type: el.type || null,
697
+ tagName: el.tagName.toLowerCase(),
698
+ disabled: el.disabled || false,
699
+ role: el.getAttribute('role'),
700
+ ariaHaspopup: el.getAttribute('aria-haspopup')
701
+ };
702
+ });
703
+
704
+ originalPlaceholder = elementInfo.placeholder;
705
+ isReadonly = elementInfo.readonly;
706
+ elementType = elementInfo.type;
707
+ isCombobox = elementInfo.role === 'combobox' ||
708
+ elementInfo.tagName === 'combobox' ||
709
+ elementInfo.ariaHaspopup === 'listbox';
710
+ isTextbox = elementInfo.role === 'textbox' ||
711
+ elementInfo.tagName === 'input' ||
712
+ elementInfo.tagName === 'textarea';
713
+ shouldCheckDiff = isCombobox || isTextbox;
714
+
715
+ } catch (e) {
716
+ console.log(`Warning: Failed to get element attributes: ${e}`);
717
+ }
561
718
 
562
- return { success: true };
719
+ // Get snapshot before action to record existing elements
720
+ const snapshotBefore = await (page as any)._snapshotForAI();
721
+ const existingRefs = new Set<string>();
722
+ const refPattern = /\[ref=([^\]]+)\]/g;
723
+ let match;
724
+ while ((match = refPattern.exec(snapshotBefore)) !== null) {
725
+ existingRefs.add(match[1]);
726
+ }
727
+ console.log(`Found ${existingRefs.size} total elements before action`);
728
+
729
+ // If element is readonly or a date/time input, skip fill attempt and go directly to click
730
+ if (isReadonly || ['date', 'datetime-local', 'time'].includes(elementType || '')) {
731
+ console.log(`Element ref=${ref} is readonly or date/time input, skipping direct fill attempt`);
732
+
733
+ // Click with force option to avoid scrolling
734
+ try {
735
+ await element.click({ force: true });
736
+ console.log(`Clicked readonly/special element ref=${ref} to trigger dynamic content`);
737
+ // Wait for potential dynamic content to appear
738
+ await page.waitForTimeout(500);
739
+ } catch (clickError) {
740
+ console.log(`Warning: Failed to click element: ${clickError}`);
741
+ }
742
+ } else {
743
+ // For normal inputs, click first then try to fill
744
+ try {
745
+ await element.click({ force: true });
746
+ console.log(`Clicked element ref=${ref} before typing`);
747
+ } catch (clickError) {
748
+ console.log(`Warning: Failed to click element before typing: ${clickError}`);
749
+ }
750
+
751
+ // Try to fill the element directly
752
+ try {
753
+ // Use force option to avoid scrolling during fill
754
+ await element.fill(text, { timeout: 3000, force: true });
755
+
756
+ // If this element might show dropdown, wait and check for new elements
757
+ if (shouldCheckDiff) {
758
+ await page.waitForTimeout(300);
759
+ const snapshotAfter = await (page as any)._snapshotForAI();
760
+ const diffSnapshot = this.getSnapshotDiff(snapshotBefore, snapshotAfter, ['option', 'menuitem']);
761
+
762
+ if (diffSnapshot && diffSnapshot.trim() !== '') {
763
+ return { success: true, diffSnapshot };
764
+ }
765
+ }
766
+
767
+ return { success: true };
768
+ } catch (fillError: any) {
769
+ // Log the error for debugging
770
+ console.log(`Fill error for ref ${ref}: ${fillError.message}`);
771
+
772
+ // Check for various error messages that indicate the element is not fillable
773
+ const errorMessage = fillError.message.toLowerCase();
774
+ if (errorMessage.includes('not an <input>') ||
775
+ errorMessage.includes('not have a role allowing') ||
776
+ errorMessage.includes('element is not') ||
777
+ errorMessage.includes('cannot type') ||
778
+ errorMessage.includes('readonly') ||
779
+ errorMessage.includes('not editable') ||
780
+ errorMessage.includes('timeout') ||
781
+ errorMessage.includes('timeouterror')) {
782
+
783
+ // Click the element again to trigger dynamic content (like date pickers)
784
+ try {
785
+ await element.click({ force: true });
786
+ console.log(`Clicked element ref=${ref} again to trigger dynamic content`);
787
+ // Wait for potential dynamic content to appear
788
+ await page.waitForTimeout(500);
789
+ } catch (clickError) {
790
+ console.log(`Warning: Failed to click element to trigger dynamic content: ${clickError}`);
791
+ }
792
+
793
+ // Step 1: Try to find input elements within the clicked element
794
+ const inputSelector = `input:visible, textarea:visible, [contenteditable="true"]:visible, [role="textbox"]:visible`;
795
+ const inputElement = await element.locator(inputSelector).first();
796
+
797
+ const inputExists = await inputElement.count() > 0;
798
+ if (inputExists) {
799
+ console.log(`Found input element within ref ${ref}, attempting to fill`);
800
+ try {
801
+ await inputElement.fill(text, { force: true });
802
+
803
+ // If element might show dropdown, check for new elements
804
+ if (shouldCheckDiff) {
805
+ await page.waitForTimeout(300);
806
+ const snapshotFinal = await (page as any)._snapshotForAI();
807
+ const diffSnapshot = this.getSnapshotDiff(snapshotBefore, snapshotFinal, ['option', 'menuitem']);
808
+
809
+ if (diffSnapshot && diffSnapshot.trim() !== '') {
810
+ return { success: true, diffSnapshot };
811
+ }
812
+ }
813
+
814
+ return { success: true };
815
+ } catch (innerError) {
816
+ console.log(`Failed to fill child element: ${innerError}`);
817
+ }
818
+ }
819
+
820
+ // Step 2: Look for new elements that appeared after the action
821
+ console.log(`Looking for new elements that appeared after action...`);
822
+
823
+ // Get snapshot after action to find new elements
824
+ const snapshotAfter = await (page as any)._snapshotForAI();
825
+ const newRefs = new Set<string>();
826
+ const afterRefPattern = /\[ref=([^\]]+)\]/g;
827
+ let afterMatch;
828
+ while ((afterMatch = afterRefPattern.exec(snapshotAfter)) !== null) {
829
+ const refId = afterMatch[1];
830
+ if (!existingRefs.has(refId)) {
831
+ newRefs.add(refId);
832
+ }
833
+ }
834
+
835
+ console.log(`Found ${newRefs.size} new elements after action`);
836
+
837
+ // If we have a placeholder, try to find new input elements with that placeholder
838
+ if (originalPlaceholder && newRefs.size > 0) {
839
+ console.log(`Looking for new input elements with placeholder: ${originalPlaceholder}`);
840
+
841
+ // Try each new ref to see if it's an input with our placeholder
842
+ for (const newRef of newRefs) {
843
+ try {
844
+ const newElement = await page.locator(`aria-ref=${newRef}`).first();
845
+ const tagName = await newElement.evaluate(el => el.tagName.toLowerCase()).catch(() => null);
846
+
847
+ if (tagName === 'input' || tagName === 'textarea') {
848
+ const placeholder = await newElement.getAttribute('placeholder').catch(() => null);
849
+ if (placeholder === originalPlaceholder) {
850
+ console.log(`Found new input element with matching placeholder: ref=${newRef}`);
851
+
852
+ // Check if it's visible and fillable
853
+ const elementInfo = await newElement.evaluate((el: any) => {
854
+ return {
855
+ tagName: el.tagName,
856
+ id: el.id,
857
+ className: el.className,
858
+ placeholder: el.placeholder,
859
+ isVisible: el.offsetParent !== null,
860
+ isReadonly: el.readOnly || el.getAttribute('readonly') !== null
861
+ };
862
+ });
863
+ console.log(`New element details:`, JSON.stringify(elementInfo));
864
+
865
+ // Try to fill it with force to avoid scrolling
866
+ await newElement.fill(text, { force: true });
867
+
868
+ // If element might show dropdown, check for new elements
869
+ if (shouldCheckDiff) {
870
+ await page.waitForTimeout(300);
871
+ const snapshotFinal = await (page as any)._snapshotForAI();
872
+ const diffSnapshot = this.getSnapshotDiff(snapshotBefore, snapshotFinal, ['option', 'menuitem']);
873
+
874
+ if (diffSnapshot && diffSnapshot.trim() !== '') {
875
+ return { success: true, diffSnapshot };
876
+ }
877
+ }
878
+
879
+ return { success: true };
880
+ }
881
+ }
882
+ } catch (e) {
883
+ // Ignore errors for non-input elements
884
+ }
885
+ }
886
+ }
887
+
888
+ console.log(`No suitable input element found for ref ${ref}`);
889
+ }
890
+ // Re-throw the original error if we couldn't find an input element
891
+ throw fillError;
892
+ }
893
+ }
894
+
895
+ // If we skipped the fill attempt (readonly elements), look for new elements directly
896
+ if (isReadonly || ['date', 'datetime-local', 'time'].includes(elementType || '')) {
897
+ // Look for new elements that appeared after clicking
898
+ console.log(`Looking for new elements that appeared after clicking readonly element...`);
899
+
900
+ // Get snapshot after action to find new elements
901
+ const snapshotAfter = await (page as any)._snapshotForAI();
902
+ const newRefs = new Set<string>();
903
+ const afterRefPattern = /\[ref=([^\]]+)\]/g;
904
+ let afterMatch;
905
+ while ((afterMatch = afterRefPattern.exec(snapshotAfter)) !== null) {
906
+ const refId = afterMatch[1];
907
+ if (!existingRefs.has(refId)) {
908
+ newRefs.add(refId);
909
+ }
910
+ }
911
+
912
+ console.log(`Found ${newRefs.size} new elements after clicking readonly element`);
913
+
914
+ // If we have a placeholder, try to find new input elements with that placeholder
915
+ if (originalPlaceholder && newRefs.size > 0) {
916
+ console.log(`Looking for new input elements with placeholder: ${originalPlaceholder}`);
917
+
918
+ // Try each new ref to see if it's an input with our placeholder
919
+ for (const newRef of newRefs) {
920
+ try {
921
+ const newElement = await page.locator(`aria-ref=${newRef}`).first();
922
+ const tagName = await newElement.evaluate(el => el.tagName.toLowerCase()).catch(() => null);
923
+
924
+ if (tagName === 'input' || tagName === 'textarea') {
925
+ const placeholder = await newElement.getAttribute('placeholder').catch(() => null);
926
+ if (placeholder === originalPlaceholder) {
927
+ console.log(`Found new input element with matching placeholder: ref=${newRef}`);
928
+
929
+ // Check if it's visible and fillable
930
+ const elementInfo = await newElement.evaluate((el: any) => {
931
+ return {
932
+ tagName: el.tagName,
933
+ id: el.id,
934
+ className: el.className,
935
+ placeholder: el.placeholder,
936
+ isVisible: el.offsetParent !== null,
937
+ isReadonly: el.readOnly || el.getAttribute('readonly') !== null
938
+ };
939
+ });
940
+ console.log(`New element details:`, JSON.stringify(elementInfo));
941
+
942
+ // Try to fill it with force to avoid scrolling
943
+ await newElement.fill(text, { force: true });
944
+
945
+ // If element might show dropdown, check for new elements
946
+ if (shouldCheckDiff) {
947
+ await page.waitForTimeout(300);
948
+ const snapshotFinal = await (page as any)._snapshotForAI();
949
+ const diffSnapshot = this.getSnapshotDiff(snapshotBefore, snapshotFinal, ['option', 'menuitem']);
950
+
951
+ if (diffSnapshot && diffSnapshot.trim() !== '') {
952
+ return { success: true, diffSnapshot };
953
+ }
954
+ }
955
+
956
+ return { success: true };
957
+ }
958
+ }
959
+ } catch (e) {
960
+ // Ignore errors for non-input elements
961
+ }
962
+ }
963
+ }
964
+
965
+ console.log(`No suitable input element found for readonly ref ${ref}`);
966
+ return { success: false, error: `Element ref=${ref} is readonly and no suitable input was found` };
967
+ }
563
968
  }
564
969
 
565
970
  return { success: false, error: 'No valid input provided' };
@@ -718,6 +1123,11 @@ export class HybridBrowserSession {
718
1123
  // Capture new tab ID if present
719
1124
  newTabId = clickResult.newTabId;
720
1125
 
1126
+ // Capture diff snapshot if present
1127
+ if (clickResult.diffSnapshot) {
1128
+ actionDetails = { diffSnapshot: clickResult.diffSnapshot };
1129
+ }
1130
+
721
1131
  actionExecutionTime = Date.now() - clickStart;
722
1132
  break;
723
1133
  }
@@ -740,6 +1150,14 @@ export class HybridBrowserSession {
740
1150
  actionDetails = typeResult.details;
741
1151
  }
742
1152
 
1153
+ // Capture diff snapshot if present
1154
+ if (typeResult.diffSnapshot) {
1155
+ if (!actionDetails) {
1156
+ actionDetails = {};
1157
+ }
1158
+ actionDetails.diffSnapshot = typeResult.diffSnapshot;
1159
+ }
1160
+
743
1161
  actionExecutionTime = Date.now() - typeStart;
744
1162
  break;
745
1163
  }
@@ -992,7 +1410,7 @@ export class HybridBrowserSession {
992
1410
  let newTabId: string | null = null;
993
1411
 
994
1412
  const browserConfig = this.configLoader.getBrowserConfig();
995
- if (browserConfig.connectOverCdp) {
1413
+ if (browserConfig.cdpUrl) {
996
1414
  // CDP mode: find an available blank tab
997
1415
  const allPages = this.context.pages();
998
1416
  for (const page of allPages) {
@@ -1008,7 +1426,10 @@ export class HybridBrowserSession {
1008
1426
  }
1009
1427
 
1010
1428
  if (!newPage || !newTabId) {
1011
- throw new Error('No available blank tabs in CDP mode. Frontend should create more blank tabs when half are used.');
1429
+ console.log('[CDP] No available blank tabs, creating new page');
1430
+ newPage = await this.context.newPage();
1431
+ newTabId = this.generateTabId();
1432
+ this.registerNewPage(newTabId, newPage);
1012
1433
  }
1013
1434
  } else {
1014
1435
  // Non-CDP mode: create new page as usual
@@ -1207,17 +1628,25 @@ export class HybridBrowserSession {
1207
1628
  this.pages.clear();
1208
1629
  this.currentTabId = null;
1209
1630
 
1210
- if (this.context) {
1631
+ // Handle context cleanup separately for CDP mode
1632
+ if (!browserConfig.cdpUrl && this.context && this.contextOwnedByUs) {
1633
+ // For non-CDP mode, close context here
1211
1634
  await this.context.close();
1212
1635
  this.context = null;
1636
+ this.contextOwnedByUs = false;
1213
1637
  }
1214
1638
 
1215
1639
  if (this.browser) {
1216
- if (browserConfig.connectOverCdp) {
1217
- // For CDP connections, just disconnect without closing the browser
1218
- await this.browser.close();
1640
+ if (browserConfig.cdpUrl) {
1641
+ // In CDP mode: tear down only our context, then disconnect
1642
+ if (this.context && this.contextOwnedByUs) {
1643
+ await this.context.close().catch(() => {});
1644
+ this.context = null;
1645
+ this.contextOwnedByUs = false;
1646
+ }
1647
+ await this.browser.close(); // disconnect
1219
1648
  } else {
1220
- // For launched browsers, close completely
1649
+ // Local launch: close everything
1221
1650
  await this.browser.close();
1222
1651
  }
1223
1652
  this.browser = null;