puppeteer-ruby 0.45.6 → 0.50.0.alpha5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +1 -3
  3. data/AGENTS.md +169 -0
  4. data/CLAUDE/README.md +41 -0
  5. data/CLAUDE/architecture.md +253 -0
  6. data/CLAUDE/cdp_protocol.md +230 -0
  7. data/CLAUDE/concurrency.md +216 -0
  8. data/CLAUDE/porting_puppeteer.md +575 -0
  9. data/CLAUDE/rbs_type_checking.md +101 -0
  10. data/CLAUDE/spec_migration_plans.md +1041 -0
  11. data/CLAUDE/testing.md +278 -0
  12. data/CLAUDE.md +242 -0
  13. data/README.md +8 -0
  14. data/Rakefile +7 -0
  15. data/Steepfile +28 -0
  16. data/docs/api_coverage.md +105 -56
  17. data/lib/puppeteer/aria_query_handler.rb +3 -2
  18. data/lib/puppeteer/async_utils.rb +214 -0
  19. data/lib/puppeteer/browser.rb +98 -56
  20. data/lib/puppeteer/browser_connector.rb +18 -3
  21. data/lib/puppeteer/browser_context.rb +196 -3
  22. data/lib/puppeteer/browser_runner.rb +18 -10
  23. data/lib/puppeteer/cdp_session.rb +67 -23
  24. data/lib/puppeteer/chrome_target_manager.rb +65 -40
  25. data/lib/puppeteer/connection.rb +55 -36
  26. data/lib/puppeteer/console_message.rb +9 -1
  27. data/lib/puppeteer/console_patch.rb +47 -0
  28. data/lib/puppeteer/css_coverage.rb +5 -3
  29. data/lib/puppeteer/custom_query_handler.rb +80 -33
  30. data/lib/puppeteer/define_async_method.rb +31 -37
  31. data/lib/puppeteer/dialog.rb +47 -14
  32. data/lib/puppeteer/element_handle.rb +231 -62
  33. data/lib/puppeteer/emulation_manager.rb +1 -1
  34. data/lib/puppeteer/env.rb +1 -1
  35. data/lib/puppeteer/errors.rb +25 -2
  36. data/lib/puppeteer/event_callbackable.rb +15 -0
  37. data/lib/puppeteer/events.rb +4 -0
  38. data/lib/puppeteer/execution_context.rb +148 -3
  39. data/lib/puppeteer/file_chooser.rb +6 -0
  40. data/lib/puppeteer/frame.rb +162 -91
  41. data/lib/puppeteer/frame_manager.rb +69 -48
  42. data/lib/puppeteer/http_request.rb +114 -38
  43. data/lib/puppeteer/http_response.rb +24 -7
  44. data/lib/puppeteer/isolated_world.rb +64 -41
  45. data/lib/puppeteer/js_coverage.rb +5 -3
  46. data/lib/puppeteer/js_handle.rb +58 -16
  47. data/lib/puppeteer/keyboard.rb +30 -17
  48. data/lib/puppeteer/launcher/browser_options.rb +3 -1
  49. data/lib/puppeteer/launcher/chrome.rb +8 -5
  50. data/lib/puppeteer/launcher/launch_options.rb +7 -2
  51. data/lib/puppeteer/launcher.rb +4 -8
  52. data/lib/puppeteer/lifecycle_watcher.rb +38 -22
  53. data/lib/puppeteer/mouse.rb +273 -64
  54. data/lib/puppeteer/network_event_manager.rb +7 -0
  55. data/lib/puppeteer/network_manager.rb +393 -112
  56. data/lib/puppeteer/page/screenshot_task_queue.rb +14 -4
  57. data/lib/puppeteer/page.rb +568 -226
  58. data/lib/puppeteer/puppeteer.rb +171 -64
  59. data/lib/puppeteer/query_handler_manager.rb +112 -16
  60. data/lib/puppeteer/reactor_runner.rb +247 -0
  61. data/lib/puppeteer/remote_object.rb +127 -47
  62. data/lib/puppeteer/target.rb +74 -27
  63. data/lib/puppeteer/task_manager.rb +3 -1
  64. data/lib/puppeteer/timeout_helper.rb +6 -10
  65. data/lib/puppeteer/touch_handle.rb +39 -0
  66. data/lib/puppeteer/touch_screen.rb +72 -22
  67. data/lib/puppeteer/tracing.rb +3 -3
  68. data/lib/puppeteer/version.rb +1 -1
  69. data/lib/puppeteer/wait_task.rb +264 -101
  70. data/lib/puppeteer/web_socket.rb +2 -2
  71. data/lib/puppeteer/web_socket_transport.rb +91 -27
  72. data/lib/puppeteer/web_worker.rb +175 -0
  73. data/lib/puppeteer.rb +20 -4
  74. data/puppeteer-ruby.gemspec +15 -11
  75. data/sig/_external.rbs +8 -0
  76. data/sig/_supplementary.rbs +314 -0
  77. data/sig/puppeteer/browser.rbs +166 -0
  78. data/sig/puppeteer/cdp_session.rbs +64 -0
  79. data/sig/puppeteer/dialog.rbs +41 -0
  80. data/sig/puppeteer/element_handle.rbs +305 -0
  81. data/sig/puppeteer/execution_context.rbs +87 -0
  82. data/sig/puppeteer/frame.rbs +226 -0
  83. data/sig/puppeteer/http_request.rbs +214 -0
  84. data/sig/puppeteer/http_response.rbs +89 -0
  85. data/sig/puppeteer/js_handle.rbs +64 -0
  86. data/sig/puppeteer/keyboard.rbs +40 -0
  87. data/sig/puppeteer/mouse.rbs +113 -0
  88. data/sig/puppeteer/page.rbs +515 -0
  89. data/sig/puppeteer/puppeteer.rbs +98 -0
  90. data/sig/puppeteer/remote_object.rbs +78 -0
  91. data/sig/puppeteer/touch_handle.rbs +21 -0
  92. data/sig/puppeteer/touch_screen.rbs +35 -0
  93. data/sig/puppeteer/web_worker.rbs +83 -0
  94. metadata +116 -45
  95. data/CHANGELOG.md +0 -397
  96. data/lib/puppeteer/concurrent_ruby_utils.rb +0 -81
  97. data/lib/puppeteer/firefox_target_manager.rb +0 -157
  98. data/lib/puppeteer/launcher/firefox.rb +0 -453
@@ -0,0 +1,230 @@
1
+ # Chrome DevTools Protocol (CDP)
2
+
3
+ This document covers CDP usage in puppeteer-ruby.
4
+
5
+ ## Overview
6
+
7
+ The Chrome DevTools Protocol (CDP) is the low-level protocol used to communicate with Chrome/Chromium browsers. All browser automation in puppeteer-ruby is done through CDP.
8
+
9
+ ## CDP Domains
10
+
11
+ CDP is organized into domains, each handling specific functionality:
12
+
13
+ | Domain | Purpose | Key Commands |
14
+ |--------|---------|--------------|
15
+ | `Page` | Page lifecycle | `navigate`, `reload`, `setContent` |
16
+ | `Runtime` | JavaScript execution | `evaluate`, `callFunctionOn` |
17
+ | `DOM` | DOM manipulation | `querySelector`, `getContentQuads` |
18
+ | `Input` | User input simulation | `dispatchKeyEvent`, `dispatchMouseEvent` |
19
+ | `Network` | Network control | `enable`, `setRequestInterception` |
20
+ | `Emulation` | Device emulation | `setDeviceMetricsOverride` |
21
+ | `Target` | Target management | `createTarget`, `attachToTarget` |
22
+
23
+ ## Sending CDP Commands
24
+
25
+ ### Basic Pattern
26
+
27
+ ```ruby
28
+ # Via CDPSession
29
+ result = @client.send_message('Page.navigate', url: 'https://example.com')
30
+
31
+ # Result is a hash with response data
32
+ # { "frameId" => "...", "loaderId" => "..." }
33
+ ```
34
+
35
+ ### With Timeout
36
+
37
+ ```ruby
38
+ # send_message has built-in timeout handling
39
+ result = @client.send_message('Page.captureScreenshot', format: 'png')
40
+ ```
41
+
42
+ ### Error Handling
43
+
44
+ ```ruby
45
+ begin
46
+ @client.send_message('Page.navigate', url: 'invalid-url')
47
+ rescue Puppeteer::CDPSession::Error => e
48
+ # Handle CDP error
49
+ puts "CDP Error: #{e.message}"
50
+ end
51
+ ```
52
+
53
+ ## Subscribing to Events
54
+
55
+ ### Basic Event Subscription
56
+
57
+ ```ruby
58
+ # Subscribe to event (persistent)
59
+ @client.on('Network.requestWillBeSent') do |event|
60
+ puts "Request: #{event['request']['url']}"
61
+ end
62
+
63
+ # Subscribe once (auto-removes after first event)
64
+ @client.once('Page.loadEventFired') do |event|
65
+ puts "Page loaded!"
66
+ end
67
+ ```
68
+
69
+ ### Enabling Domains
70
+
71
+ Some CDP domains require explicit enabling before events are sent:
72
+
73
+ ```ruby
74
+ # Enable the Network domain
75
+ @client.send_message('Network.enable')
76
+
77
+ # Now Network events will be received
78
+ @client.on('Network.requestWillBeSent') { |e| ... }
79
+ @client.on('Network.responseReceived') { |e| ... }
80
+ ```
81
+
82
+ ### Common Domains That Need Enabling
83
+
84
+ - `Network.enable` - Network events
85
+ - `Page.enable` - Page lifecycle events
86
+ - `Runtime.enable` - Runtime events (console, exceptions)
87
+ - `DOM.enable` - DOM events
88
+
89
+ ## Common CDP Patterns
90
+
91
+ ### JavaScript Evaluation
92
+
93
+ ```ruby
94
+ # Evaluate expression
95
+ result = @client.send_message('Runtime.evaluate',
96
+ expression: 'document.title',
97
+ returnByValue: true
98
+ )
99
+ # result['result']['value'] => "Page Title"
100
+
101
+ # Call function on object
102
+ result = @client.send_message('Runtime.callFunctionOn',
103
+ functionDeclaration: '(a, b) => a + b',
104
+ arguments: [{ value: 1 }, { value: 2 }],
105
+ executionContextId: context_id,
106
+ returnByValue: true
107
+ )
108
+ # result['result']['value'] => 3
109
+ ```
110
+
111
+ ### DOM Queries
112
+
113
+ ```ruby
114
+ # Get document node
115
+ doc = @client.send_message('DOM.getDocument')
116
+ root_id = doc['root']['nodeId']
117
+
118
+ # Query selector
119
+ result = @client.send_message('DOM.querySelector',
120
+ nodeId: root_id,
121
+ selector: 'button'
122
+ )
123
+ button_node_id = result['nodeId']
124
+ ```
125
+
126
+ ### Input Simulation
127
+
128
+ ```ruby
129
+ # Mouse click
130
+ @client.send_message('Input.dispatchMouseEvent',
131
+ type: 'mousePressed',
132
+ x: 100,
133
+ y: 200,
134
+ button: 'left',
135
+ clickCount: 1
136
+ )
137
+ @client.send_message('Input.dispatchMouseEvent',
138
+ type: 'mouseReleased',
139
+ x: 100,
140
+ y: 200,
141
+ button: 'left',
142
+ clickCount: 1
143
+ )
144
+
145
+ # Key press
146
+ @client.send_message('Input.dispatchKeyEvent',
147
+ type: 'keyDown',
148
+ key: 'Enter',
149
+ code: 'Enter'
150
+ )
151
+ @client.send_message('Input.dispatchKeyEvent',
152
+ type: 'keyUp',
153
+ key: 'Enter',
154
+ code: 'Enter'
155
+ )
156
+ ```
157
+
158
+ ### Screenshots
159
+
160
+ ```ruby
161
+ result = @client.send_message('Page.captureScreenshot',
162
+ format: 'png',
163
+ clip: {
164
+ x: 0,
165
+ y: 0,
166
+ width: 800,
167
+ height: 600,
168
+ scale: 1
169
+ }
170
+ )
171
+ image_data = Base64.decode64(result['data'])
172
+ ```
173
+
174
+ ## CDP Session Management
175
+
176
+ ### Session Hierarchy
177
+
178
+ ```
179
+ Browser
180
+ └── Connection (WebSocket to browser DevTools)
181
+ ├── Browser-level CDPSession
182
+ └── Target-level CDPSessions (one per page/worker)
183
+ ```
184
+
185
+ ### Creating Target Sessions
186
+
187
+ ```ruby
188
+ # New page creates its own session
189
+ target = browser.wait_for_target { |t| t.url.include?('example.com') }
190
+ session = target.create_cdp_session
191
+
192
+ # Use session for that specific page
193
+ session.send_message('Page.enable')
194
+ ```
195
+
196
+ ## CDP Events Reference
197
+
198
+ ### Page Events
199
+
200
+ | Event | When Fired |
201
+ |-------|------------|
202
+ | `Page.loadEventFired` | Window load event |
203
+ | `Page.domContentEventFired` | DOMContentLoaded event |
204
+ | `Page.frameNavigated` | Frame navigation complete |
205
+ | `Page.frameAttached` | New frame attached |
206
+ | `Page.frameDetached` | Frame removed |
207
+
208
+ ### Network Events
209
+
210
+ | Event | When Fired |
211
+ |-------|------------|
212
+ | `Network.requestWillBeSent` | Request about to be sent |
213
+ | `Network.responseReceived` | Response headers received |
214
+ | `Network.loadingFinished` | Response body loaded |
215
+ | `Network.loadingFailed` | Request failed |
216
+
217
+ ### Runtime Events
218
+
219
+ | Event | When Fired |
220
+ |-------|------------|
221
+ | `Runtime.consoleAPICalled` | console.log/warn/error |
222
+ | `Runtime.exceptionThrown` | Uncaught exception |
223
+ | `Runtime.executionContextCreated` | New JS context |
224
+ | `Runtime.executionContextDestroyed` | Context destroyed |
225
+
226
+ ## Resources
227
+
228
+ - [Chrome DevTools Protocol Documentation](https://chromedevtools.github.io/devtools-protocol/)
229
+ - [CDP Protocol Viewer](https://chromedevtools.github.io/devtools-protocol/tot/)
230
+ - [Puppeteer CDP Usage](https://github.com/puppeteer/puppeteer/tree/main/packages/puppeteer-core/src/cdp)
@@ -0,0 +1,216 @@
1
+ # Concurrency Model
2
+
3
+ This document explains the concurrency architecture in puppeteer-ruby using `socketry/async`.
4
+
5
+ ## Current State: socketry/async
6
+
7
+ puppeteer-ruby uses Fiber-based concurrency with the `socketry/async` gem (version 2.35.1+).
8
+
9
+ ### Key Components
10
+
11
+ | Component | Purpose |
12
+ |-----------|---------|
13
+ | `Async::Promise` | Promise that can be resolved/rejected later |
14
+ | `Puppeteer::AsyncUtils` | Utility module for async operations |
15
+ | `Puppeteer::ReactorRunner` | Dedicated Async reactor thread for sync API |
16
+
17
+ ### AsyncUtils Module
18
+
19
+ Located in `lib/puppeteer/async_utils.rb`:
20
+
21
+ ```ruby
22
+ module Puppeteer::AsyncUtils
23
+ # Wait for all promises to complete (like Promise.all)
24
+ def await_promise_all(*tasks)
25
+ # ...
26
+ end
27
+
28
+ # Wait for first promise to complete (like Promise.race)
29
+ def await_promise_race(*tasks)
30
+ # ...
31
+ end
32
+
33
+ # Timeout wrapper
34
+ def async_timeout(timeout_ms, &block)
35
+ # ...
36
+ end
37
+
38
+ # Sleep helper that works in Async context
39
+ def sleep_seconds(seconds)
40
+ # ...
41
+ end
42
+ end
43
+ ```
44
+
45
+ ### ReactorRunner
46
+
47
+ `ReactorRunner` manages a dedicated Async reactor thread and provides a bridge between synchronous API calls and the Async context:
48
+
49
+ ```ruby
50
+ # From lib/puppeteer/reactor_runner.rb
51
+
52
+ # ReactorRunner runs Async operations in a dedicated thread
53
+ runner = Puppeteer::ReactorRunner.new
54
+
55
+ # Wrap an object to proxy calls through the reactor
56
+ browser = runner.wrap(actual_browser)
57
+
58
+ # Calls are executed in the Async reactor context
59
+ browser.new_page # Runs in reactor thread
60
+ ```
61
+
62
+ Key features:
63
+ - Dedicated thread running Async reactor
64
+ - Proxies method calls into reactor context
65
+ - Handles result unwrapping and error propagation
66
+ - `wait_until_idle` for graceful shutdown
67
+
68
+ ### Threading Model
69
+
70
+ ```
71
+ Main Thread Reactor Thread (Async)
72
+ │ │
73
+ ├── sync method call ─────────►│
74
+ │ │ (execute in Async context)
75
+ │ ◄────────────────────────── result
76
+ │ │
77
+ ├── browser.close ────────────►│
78
+ │ │ wait_until_idle
79
+ │ ◄────────────────────────── cleanup complete
80
+ │ │
81
+ ```
82
+
83
+ ### Promise Patterns
84
+
85
+ #### Creating and Resolving Promises
86
+
87
+ ```ruby
88
+ # Create a promise
89
+ promise = Async::Promise.new
90
+
91
+ # Resolve with value
92
+ promise.resolve(result)
93
+
94
+ # Reject with error
95
+ promise.reject(error)
96
+
97
+ # Wait for result
98
+ result = promise.wait
99
+ ```
100
+
101
+ #### Waiting for Events
102
+
103
+ ```ruby
104
+ # Common pattern for waiting on events
105
+ promise = Async::Promise.new.tap do |p|
106
+ page.once('load') { p.resolve(true) }
107
+ end
108
+
109
+ # Later, wait for the event
110
+ promise.wait
111
+ ```
112
+
113
+ #### Running Multiple Operations
114
+
115
+ ```ruby
116
+ # Wait for all (like Promise.all)
117
+ Puppeteer::AsyncUtils.await_promise_all(
118
+ page.async_goto('https://example1.com'),
119
+ page.async_goto('https://example2.com'),
120
+ )
121
+
122
+ # Wait for any (like Promise.race)
123
+ Puppeteer::AsyncUtils.await_promise_race(
124
+ timeout_promise,
125
+ navigation_promise,
126
+ )
127
+ ```
128
+
129
+ #### Timeout Handling
130
+
131
+ ```ruby
132
+ # With timeout (milliseconds)
133
+ Puppeteer::AsyncUtils.async_timeout(5000) do
134
+ slow_operation
135
+ end
136
+ # Raises Async::TimeoutError if exceeded
137
+ ```
138
+
139
+ ### Async Method Pattern
140
+
141
+ ```ruby
142
+ # Define synchronous method
143
+ def wait_for_selector(selector, timeout: nil)
144
+ # Implementation...
145
+ end
146
+
147
+ # Generate async version that returns Async task
148
+ define_async_method :async_wait_for_selector
149
+ ```
150
+
151
+ Usage:
152
+
153
+ ```ruby
154
+ # Synchronous (blocks)
155
+ element = page.wait_for_selector('button')
156
+
157
+ # Asynchronous (returns Async task)
158
+ task = page.async_wait_for_selector('button')
159
+ # Do other work...
160
+ element = task.wait
161
+ ```
162
+
163
+ ## Key Implementation Files
164
+
165
+ | File | Description |
166
+ |------|-------------|
167
+ | `lib/puppeteer/async_utils.rb` | Core async utility functions |
168
+ | `lib/puppeteer/reactor_runner.rb` | Reactor thread management |
169
+ | `lib/puppeteer/define_async_method.rb` | Async method generation |
170
+ | `lib/puppeteer/connection.rb` | WebSocket with async messaging |
171
+ | `lib/puppeteer/lifecycle_watcher.rb` | Navigation wait logic |
172
+
173
+ ## Guidelines for New Code
174
+
175
+ ### Do
176
+
177
+ - Use `Async::Promise` for deferred results
178
+ - Use `AsyncUtils` methods for combining promises
179
+ - Keep synchronous and async logic separate
180
+ - Handle `Async::TimeoutError` for timeout operations
181
+ - Use `Mutex` for shared state between threads
182
+
183
+ ### Don't
184
+
185
+ - Block the reactor thread with synchronous I/O
186
+ - Create nested `Async` blocks unnecessarily
187
+ - Ignore promise rejections
188
+ - Use `sleep` directly (use `AsyncUtils.sleep_seconds`)
189
+
190
+ ### Example: Async-Ready Code
191
+
192
+ ```ruby
193
+ class MyComponent
194
+ def perform_operation(timeout: 30000)
195
+ promise = Async::Promise.new
196
+
197
+ listener_id = @emitter.add_event_listener('complete') do |result|
198
+ promise.resolve(result) unless promise.resolved?
199
+ end
200
+
201
+ begin
202
+ Puppeteer::AsyncUtils.async_timeout(timeout) do
203
+ promise.wait
204
+ end
205
+ ensure
206
+ @emitter.remove_event_listener(listener_id)
207
+ end
208
+ end
209
+ end
210
+ ```
211
+
212
+ ## Resources
213
+
214
+ - [socketry/async documentation](https://github.com/socketry/async)
215
+ - [Async::Promise](https://github.com/socketry/async)
216
+ - [Ruby Fiber scheduler](https://ruby-doc.org/core-3.1.0/Fiber/Scheduler.html)