spidy 0.4.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b6a7d70df09642e17d34cc85e1973914b8b7e151c34670526cb4d6b2d3589227
4
- data.tar.gz: 012b7def5510c16d68676bada533d452315244fc5d47d7f26a9bd71068a3f9a3
3
+ metadata.gz: 743eadfde1aa8f5e9dbfde067b1c92e38014f274bd59502ca64d845d622c3e53
4
+ data.tar.gz: 3b89159ea679762e361214ecf9ece14642ff1aeb48b978084bfd35c71e3ad8ea
5
5
  SHA512:
6
- metadata.gz: 8b6682cd3d1499b115cdfba3964cab62ae65f3a7943fb87f53d8613c2a6553b4f2a4a0728f2af07990dc90ae03c07df4d33914739ab4df462a37c2b04f5efdc5
7
- data.tar.gz: a2680dd41fb1a6dead95ecd20742560d749f5c3f27367baac7d3f1294d4c6ee7de946cb173e1f2750169e6a341915197e375d1e56ca0faf7c5be4491f5a55ae9
6
+ metadata.gz: bad3fd94c682d94a2d92130759178ccea388701cbcbf2ef0e125db8d558af349e81e6c2f623d78277dfffcb89f82d60d7c06d32fef158990cdd3ec19118dc63f
7
+ data.tar.gz: 24217cbb1c12ebbc4fee9f18e3a3583f2194e33c7dfac99a270cde46f93367a6a397375b08905bb04de39a12f12226f13795c794355ca87ba6f155f8da45b5c6
data/.rubocop.yml CHANGED
@@ -1,34 +1,79 @@
1
- inherit_from: .rubocop_todo.yml
1
+ plugins:
2
+ - rubocop-performance
3
+ - rubocop-rspec
4
+
2
5
  AllCops:
3
- TargetRubyVersion: 3.0.2
6
+ TargetRubyVersion: 3.4.2
4
7
  NewCops: enable
5
8
  DisplayCopNames: true
9
+ Exclude:
10
+ - 'vendor/**/*'
11
+ - 'bin/**/*'
12
+ - 'tmp/**/*'
13
+
14
+ Gemspec/RequiredRubyVersion:
15
+ Enabled: false
16
+
17
+ Style/FrozenStringLiteralComment:
18
+ EnforcedStyle: never
6
19
 
20
+ # Style
7
21
  Style/ClassAndModuleChildren:
8
22
  Enabled: false
9
23
 
10
24
  Style/SignalException:
11
25
  EnforcedStyle: semantic
12
26
 
27
+ Style/Documentation:
28
+ Enabled: false
29
+
30
+ Style/StringLiterals:
31
+ EnforcedStyle: single_quotes
32
+ ConsistentQuotesInMultiline: true
33
+
34
+ # Naming
13
35
  Naming/MethodParameterName:
14
36
  AllowedNames:
15
37
  - as
38
+ - id
39
+ - io
40
+ - ip
41
+ - of
42
+ - on
43
+ - to
44
+ - up
16
45
 
46
+ # Metrics
17
47
  Metrics/AbcSize:
18
- Max: 21
48
+ Max: 25
19
49
  Exclude:
50
+ - 'lib/spidy/connector/lightpanda.rb'
20
51
 
21
52
  Metrics/MethodLength:
22
- Max: 15
53
+ Max: 20
54
+ Exclude:
55
+ - 'lib/spidy/connector/lightpanda.rb'
56
+
57
+ Metrics/ClassLength:
58
+ Max: 150
59
+ Exclude:
60
+ - 'lib/spidy/connector/lightpanda.rb'
23
61
 
24
- Metrics/LineLength:
62
+ Layout/LineLength:
25
63
  Max: 130
26
64
 
27
65
  Metrics/BlockLength:
28
66
  Max: 120
29
-
30
- SignalException:
31
- EnforcedStyle: semantic
67
+ Exclude:
68
+ - 'spec/**/*'
69
+ - 'example/**/*'
32
70
 
33
71
  Layout/EmptyLineAfterGuardClause:
34
72
  Enabled: false
73
+
74
+ # RSpec
75
+ RSpec/ExampleLength:
76
+ Max: 15
77
+
78
+ RSpec/MultipleExpectations:
79
+ Max: 5
data/Gemfile CHANGED
@@ -1,10 +1,24 @@
1
- # frozen_string_literal: true
2
-
3
1
  source 'https://rubygems.org'
4
2
 
5
3
  # Specify your gem's dependencies in crawler.gemspec
6
4
  gemspec
7
5
 
8
- gem 'webrick'
9
- gem 'rackup'
10
6
  gem 'irb'
7
+ gem 'rackup'
8
+ gem 'webrick'
9
+
10
+ gem 'capybara_discoball'
11
+ gem 'ffaker'
12
+ gem 'rake', '~> 13.0'
13
+ gem 'rspec', '~> 3.0'
14
+ gem 'rspec-command'
15
+ gem 'sinatra'
16
+
17
+ group :development do
18
+ gem 'ferrum'
19
+
20
+ gem 'rubocop', require: false
21
+ gem 'rubocop-performance', require: false
22
+ gem 'rubocop-rake', require: false
23
+ gem 'rubocop-rspec', require: false
24
+ end
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- spidy (0.3.12)
4
+ spidy (1.0.0)
5
5
  activesupport (~> 7.1)
6
6
  mechanize
7
7
  socksify
@@ -24,6 +24,7 @@ GEM
24
24
  tzinfo (~> 2.0, >= 2.0.5)
25
25
  addressable (2.8.7)
26
26
  public_suffix (>= 2.0.2, < 7.0)
27
+ ast (2.4.3)
27
28
  base64 (0.2.0)
28
29
  benchmark (0.4.0)
29
30
  bigdecimal (3.1.9)
@@ -41,20 +42,29 @@ GEM
41
42
  concurrent-ruby (1.3.5)
42
43
  connection_pool (2.5.0)
43
44
  date (3.4.1)
44
- diff-lcs (1.6.0)
45
+ diff-lcs (1.6.1)
45
46
  domain_name (0.6.20240107)
46
47
  drb (2.2.1)
48
+ ferrum (0.16)
49
+ addressable (~> 2.5)
50
+ base64 (~> 0.2)
51
+ concurrent-ruby (~> 1.1)
52
+ webrick (~> 1.7)
53
+ websocket-driver (~> 0.7)
47
54
  ffaker (2.24.0)
48
55
  http-cookie (1.0.8)
49
56
  domain_name (~> 0.5)
50
57
  i18n (1.14.7)
51
58
  concurrent-ruby (~> 1.0)
52
59
  io-console (0.8.0)
53
- irb (1.15.1)
60
+ irb (1.15.2)
54
61
  pp (>= 0.6.0)
55
62
  rdoc (>= 4.0.0)
56
63
  reline (>= 0.4.2)
57
- logger (1.6.6)
64
+ json (2.10.2)
65
+ language_server-protocol (3.17.0.4)
66
+ lint_roller (1.1.0)
67
+ logger (1.7.0)
58
68
  matrix (0.4.2)
59
69
  mechanize (2.14.0)
60
70
  addressable (~> 2.8)
@@ -69,10 +79,10 @@ GEM
69
79
  rubyntlm (~> 0.6, >= 0.6.3)
70
80
  webrick (~> 1.7)
71
81
  webrobots (~> 0.1.2)
72
- mime-types (3.6.1)
82
+ mime-types (3.6.2)
73
83
  logger
74
84
  mime-types-data (~> 3.2015)
75
- mime-types-data (3.2025.0318)
85
+ mime-types-data (3.2025.0408)
76
86
  mini_mime (1.1.5)
77
87
  minitest (5.25.5)
78
88
  mixlib-shellout (2.4.4)
@@ -82,31 +92,36 @@ GEM
82
92
  net-http-persistent (4.0.5)
83
93
  connection_pool (~> 2.2)
84
94
  nkf (0.2.0)
85
- nokogiri (1.18.5-aarch64-linux-gnu)
95
+ nokogiri (1.18.7-aarch64-linux-gnu)
86
96
  racc (~> 1.4)
87
- nokogiri (1.18.5-aarch64-linux-musl)
97
+ nokogiri (1.18.7-aarch64-linux-musl)
88
98
  racc (~> 1.4)
89
- nokogiri (1.18.5-arm-linux-gnu)
99
+ nokogiri (1.18.7-arm-linux-gnu)
90
100
  racc (~> 1.4)
91
- nokogiri (1.18.5-arm-linux-musl)
101
+ nokogiri (1.18.7-arm-linux-musl)
92
102
  racc (~> 1.4)
93
- nokogiri (1.18.5-arm64-darwin)
103
+ nokogiri (1.18.7-arm64-darwin)
94
104
  racc (~> 1.4)
95
- nokogiri (1.18.5-x86_64-darwin)
105
+ nokogiri (1.18.7-x86_64-darwin)
96
106
  racc (~> 1.4)
97
- nokogiri (1.18.5-x86_64-linux-gnu)
107
+ nokogiri (1.18.7-x86_64-linux-gnu)
98
108
  racc (~> 1.4)
99
- nokogiri (1.18.5-x86_64-linux-musl)
109
+ nokogiri (1.18.7-x86_64-linux-musl)
100
110
  racc (~> 1.4)
111
+ parallel (1.27.0)
112
+ parser (3.3.8.0)
113
+ ast (~> 2.4.1)
114
+ racc
101
115
  pp (0.6.2)
102
116
  prettyprint
103
117
  prettyprint (0.2.0)
118
+ prism (1.4.0)
104
119
  psych (5.2.3)
105
120
  date
106
121
  stringio
107
122
  public_suffix (6.0.1)
108
123
  racc (1.8.1)
109
- rack (3.1.12)
124
+ rack (3.1.13)
110
125
  rack-protection (4.1.1)
111
126
  base64 (>= 0.1.0)
112
127
  logger (>= 1.6.0)
@@ -118,11 +133,12 @@ GEM
118
133
  rack (>= 1.3)
119
134
  rackup (2.2.1)
120
135
  rack (>= 3)
136
+ rainbow (3.1.1)
121
137
  rake (13.2.1)
122
- rdoc (6.12.0)
138
+ rdoc (6.13.1)
123
139
  psych (>= 4.0.0)
124
140
  regexp_parser (2.10.0)
125
- reline (0.6.0)
141
+ reline (0.6.1)
126
142
  io-console (~> 0.5)
127
143
  rspec (3.13.0)
128
144
  rspec-core (~> 3.13.0)
@@ -144,6 +160,31 @@ GEM
144
160
  diff-lcs (>= 1.2.0, < 2.0)
145
161
  rspec-support (~> 3.13.0)
146
162
  rspec-support (3.13.2)
163
+ rubocop (1.75.2)
164
+ json (~> 2.3)
165
+ language_server-protocol (~> 3.17.0.2)
166
+ lint_roller (~> 1.1.0)
167
+ parallel (~> 1.10)
168
+ parser (>= 3.3.0.2)
169
+ rainbow (>= 2.2.2, < 4.0)
170
+ regexp_parser (>= 2.9.3, < 3.0)
171
+ rubocop-ast (>= 1.44.0, < 2.0)
172
+ ruby-progressbar (~> 1.7)
173
+ unicode-display_width (>= 2.4.0, < 4.0)
174
+ rubocop-ast (1.44.1)
175
+ parser (>= 3.3.7.2)
176
+ prism (~> 1.4)
177
+ rubocop-performance (1.25.0)
178
+ lint_roller (~> 1.1)
179
+ rubocop (>= 1.75.0, < 2.0)
180
+ rubocop-ast (>= 1.38.0, < 2.0)
181
+ rubocop-rake (0.7.1)
182
+ lint_roller (~> 1.1)
183
+ rubocop (>= 1.72.1)
184
+ rubocop-rspec (3.5.0)
185
+ lint_roller (~> 1.1)
186
+ rubocop (~> 1.72, >= 1.72.1)
187
+ ruby-progressbar (1.13.0)
147
188
  ruby2_keywords (0.0.5)
148
189
  rubyntlm (0.6.5)
149
190
  base64
@@ -156,13 +197,20 @@ GEM
156
197
  rack-session (>= 2.0.0, < 3)
157
198
  tilt (~> 2.0)
158
199
  socksify (1.7.1)
159
- stringio (3.1.5)
200
+ stringio (3.1.6)
160
201
  tilt (2.6.0)
161
202
  tor (0.1.7)
162
203
  tzinfo (2.0.6)
163
204
  concurrent-ruby (~> 1.0)
205
+ unicode-display_width (3.1.4)
206
+ unicode-emoji (~> 4.0, >= 4.0.4)
207
+ unicode-emoji (4.0.4)
164
208
  webrick (1.9.1)
165
209
  webrobots (0.1.2)
210
+ websocket-driver (0.7.7)
211
+ base64
212
+ websocket-extensions (>= 0.1.0)
213
+ websocket-extensions (0.1.5)
166
214
  xpath (3.2.0)
167
215
  nokogiri (~> 1.8)
168
216
 
@@ -177,14 +225,18 @@ PLATFORMS
177
225
  x86_64-linux-musl
178
226
 
179
227
  DEPENDENCIES
180
- bundler (~> 2.0)
181
228
  capybara_discoball
229
+ ferrum
182
230
  ffaker
183
231
  irb
184
232
  rackup
185
233
  rake (~> 13.0)
186
234
  rspec (~> 3.0)
187
235
  rspec-command
236
+ rubocop
237
+ rubocop-performance
238
+ rubocop-rake
239
+ rubocop-rspec
188
240
  sinatra
189
241
  spidy!
190
242
  webrick
data/README.md CHANGED
@@ -20,14 +20,73 @@ Or install it yourself as:
20
20
 
21
21
  ## Usage
22
22
 
23
- ### When used from the command line
23
+ ### Connectors
24
24
 
25
- website.rb
25
+ Spidy supports different connectors for fetching web pages:
26
+
27
+ 1. **HTML Connector (Mechanize)**: Default connector for regular HTTP requests and HTML parsing
28
+ 2. **JSON Connector**: For parsing JSON APIs
29
+ 3. **XML Connector**: For parsing XML responses
30
+ 4. **Lightpanda Connector**: For JavaScript-rendered websites (uses Playwright)
31
+
32
+ #### Lightpanda Connector for JavaScript-Rendered Websites
33
+
34
+ The Lightpanda connector allows you to process JavaScript-rendered websites by connecting to a running lightpanda CDP server.
35
+
36
+ ##### Prerequisites
37
+
38
+ 1. Install the Playwright Ruby client:
39
+
40
+ ```bash
41
+ $ gem install playwright-ruby-client
42
+ ```
43
+
44
+ 2. Start a lightpanda CDP server in a separate terminal:
45
+
46
+ ```bash
47
+ $ lightpanda serve --host 127.0.0.1 --port 9222
48
+ ```
49
+
50
+ ##### Usage
51
+
52
+ ```ruby
53
+ # Define a scraper with lightpanda support
54
+ scraper = Spidy.define do
55
+ # Use the :lightpanda connector for JavaScript-rendered sites
56
+ spider(as: :lightpanda) do |yielder, connector, url|
57
+ connector.call(url) do |page|
58
+ # Process the JavaScript-rendered page
59
+ # page is a Nokogiri-like object
60
+ yielder.call(page)
61
+ end
62
+ end
63
+
64
+ define(as: :html) do
65
+ let(:title, 'title')
66
+ # Extract content from JS-rendered page...
67
+ end
68
+ end
69
+ ```
70
+
71
+ ##### Configuration
72
+
73
+ You can customize the lightpanda CDP server connection using environment variables:
74
+
75
+ ```bash
76
+ # Set custom host and port
77
+ $ LIGHTPANDA_HOST=192.168.1.100 LIGHTPANDA_PORT=9333 ruby your_script.rb
78
+ ```
79
+
80
+ Check `example/playwright_example.rb` for a complete example.
81
+
82
+ ### Command Line Usage
83
+
84
+ Create a definition file (e.g., website.rb):
26
85
  ```rb
27
- Spidy.defin do
86
+ Spidy.define do
28
87
  spider(as: :html) do |yielder, connector, url|
29
88
  connector.call(url) do |html|
30
- # html as nokogiri object ( mechanize )
89
+ # html is a Nokogiri object (from Mechanize)
31
90
  yielder.call(url)
32
91
  end
33
92
  end
@@ -37,41 +96,61 @@ Spidy.defin do
37
96
  end
38
97
  end
39
98
  ```
99
+
100
+ Use it from the command line:
40
101
  ```bash
41
102
  echo 'http://example.com' | spidy each website.rb > urls
42
103
  cat urls | spidy call website.rb > website.json
43
- # shorthands
104
+ # shorthand
44
105
  echo 'http://example.com' | spidy each website.rb | spidy call website.rb | jq .
45
106
  ```
46
107
 
47
- ### When development console
108
+ ### Development Console
109
+
110
+ Start an interactive console with your definition:
48
111
  ```bash
49
112
  spidy console website.rb
50
113
  ```
51
114
 
52
- ### reload source code
115
+ Reload your source code during development:
53
116
  ```
54
117
  irb(#<Spidy::Console>)> reload!
55
118
  ```
56
119
 
120
+ Example console usage:
57
121
  ```rb
58
122
  each('http://example.com') { |url| break url }
59
- call('http://example.com') { |html| break html } # html as nokogiri object ( mechanize )
123
+ call('http://example.com') { |html| break html } # html is a Nokogiri object (from Mechanize)
60
124
  ```
61
125
 
62
- ### When used from the ruby code
126
+ ### Ruby Code Usage
127
+
128
+ Create and use a scraper in your Ruby code:
63
129
  ```rb
64
- a = Spidy.define do
65
- # Implementing spiders and scrapers
130
+ scraper = Spidy.define do
131
+ # Implement spiders and scrapers
132
+ spider(as: :html) do |yielder, connector, url|
133
+ connector.call(url) do |page|
134
+ yielder.call(page)
135
+ end
136
+ end
137
+
138
+ define(as: :html) do
139
+ let(:title, 'title')
140
+ let(:links) { |doc| doc.css('a').map { |a| a['href'] } }
141
+ end
66
142
  end
67
143
 
68
- a.each(url) do |url|
69
- # Loop for the number of retrieved URLs
144
+ # Extract URLs from a site
145
+ scraper.each(url) do |page_url|
146
+ # Process each URL found
147
+ puts page_url
70
148
  end
71
149
 
72
- a.call(url) do |object|
73
- # The scrape result is passed as a defined object
74
- end
150
+ # Extract structured data from a site
151
+ result = scraper.call(url)
152
+ puts "Title: #{result[:title]}"
153
+ puts "Found #{result[:links].size} links"
75
154
  ```
76
155
 
77
156
  ## Development
data/Rakefile CHANGED
@@ -1,5 +1,3 @@
1
- # frozen_string_literal: true
2
-
3
1
  require 'bundler/gem_tasks'
4
2
  require 'rspec/core/rake_task'
5
3
 
@@ -0,0 +1,114 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # This script checks what Ferrum API methods are available
4
+ begin
5
+ require 'ferrum'
6
+ puts 'Ferrum gem is loaded!'
7
+
8
+ # Check Ferrum version
9
+ puts "Ferrum version: #{begin
10
+ Ferrum::VERSION
11
+ rescue StandardError
12
+ 'unknown'
13
+ end}"
14
+
15
+ # Try to create a browser instance
16
+ puts "\nTrying to create a browser instance..."
17
+
18
+ # Find Chrome executable path
19
+ def find_chrome_path
20
+ # Common locations on macOS
21
+ macos_paths = [
22
+ '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
23
+ '/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary',
24
+ '/Applications/Chromium.app/Contents/MacOS/Chromium',
25
+ '/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge'
26
+ ]
27
+
28
+ # Check macOS paths
29
+ macos_paths.each do |path|
30
+ return path if File.exist?(path)
31
+ end
32
+
33
+ # Try to locate Chrome using 'which' command
34
+ %w[google-chrome chromium chromium-browser].each do |browser|
35
+ path = `which #{browser} 2>/dev/null`.strip
36
+ return path if path != '' && File.exist?(path)
37
+ end
38
+
39
+ nil
40
+ end
41
+
42
+ # Get Chrome path
43
+ chrome_path = ENV['CHROME_PATH'] || find_chrome_path
44
+ if chrome_path
45
+ puts "Using Chrome executable: #{chrome_path}"
46
+ else
47
+ puts 'No Chrome executable found. Using default.'
48
+ end
49
+
50
+ # Create browser with options
51
+ options = {
52
+ headless: true,
53
+ window_size: [1280, 800]
54
+ }
55
+
56
+ # Add Chrome path if available
57
+ options[:browser_path] = chrome_path if chrome_path
58
+
59
+ browser = Ferrum::Browser.new(options)
60
+ puts 'Browser instance created successfully!'
61
+
62
+ # Check available methods on browser
63
+ puts "\nAvailable methods on browser object:"
64
+ browser_methods = (browser.methods - Object.methods).sort
65
+ puts browser_methods.join(', ')
66
+
67
+ # Check if headers method exists
68
+ puts "\nDoes browser respond to 'headers='? #{browser.respond_to?(:headers=)}"
69
+
70
+ # Check available methods on browser.network
71
+ if browser.respond_to?(:network)
72
+ puts "\nAvailable methods on browser.network object:"
73
+ network_methods = (browser.network.methods - Object.methods).sort
74
+ puts network_methods.join(', ')
75
+
76
+ # Check if wait_for_idle method exists and what parameters it accepts
77
+ if browser.network.respond_to?(:wait_for_idle)
78
+ puts "\nExamine wait_for_idle method:"
79
+ begin
80
+ # Try with timeout parameter
81
+ browser.network.wait_for_idle(timeout: 1)
82
+ puts 'wait_for_idle accepts timeout parameter'
83
+ rescue ArgumentError => e
84
+ puts "wait_for_idle does not accept timeout parameter: #{e.message}"
85
+ rescue StandardError => e
86
+ puts "Error calling wait_for_idle with timeout: #{e.message}"
87
+ end
88
+ else
89
+ puts "\nwait_for_idle method not available on network object"
90
+ end
91
+ else
92
+ puts "\nnetwork method not available on browser object"
93
+ end
94
+
95
+ # Test goto method
96
+ puts "\nTesting navigation with goto method:"
97
+ begin
98
+ browser.goto('https://example.com')
99
+ puts 'Navigation successful!'
100
+ puts "Page title: #{browser.title}"
101
+ rescue StandardError => e
102
+ puts "Error during navigation: #{e.message}"
103
+ end
104
+
105
+ # Clean up
106
+ browser.quit
107
+ puts "\nBrowser closed successfully"
108
+ rescue LoadError => e
109
+ puts "Error: Ferrum gem is not installed: #{e.message}"
110
+ puts 'Install it with: gem install ferrum'
111
+ rescue StandardError => e
112
+ puts "Error: #{e.message}"
113
+ puts e.backtrace.join("\n")
114
+ end
@@ -0,0 +1,59 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Script to check if Lightpanda is running and start it if needed
4
+ require 'net/http'
5
+
6
+ def lightpanda_running?(host = '127.0.0.1', port = 9222)
7
+ uri = URI("http://#{host}:#{port}/json/version")
8
+ response = Net::HTTP.get_response(uri)
9
+ response.is_a?(Net::HTTPSuccess)
10
+ rescue StandardError
11
+ false
12
+ end
13
+
14
+ def start_lightpanda(host = '127.0.0.1', port = 9222)
15
+ puts 'Starting Lightpanda...'
16
+
17
+ # Build the command to start Lightpanda in the background
18
+ cmd = "/Users/aileron/bin/lightpanda serve --host #{host} --port #{port} > /tmp/lightpanda.log 2>&1 &"
19
+
20
+ # Execute the command
21
+ result = system(cmd)
22
+
23
+ if result
24
+ puts "Lightpanda started! Service should be available at http://#{host}:#{port}"
25
+
26
+ # Wait for it to be ready
27
+ 10.times do
28
+ if lightpanda_running?(host, port)
29
+ puts 'Lightpanda is now running and accepting connections!'
30
+ return true
31
+ end
32
+ puts 'Waiting for Lightpanda to start...'
33
+ sleep 1
34
+ end
35
+
36
+ puts "Lightpanda might have started but isn't responding yet."
37
+ puts 'Check /tmp/lightpanda.log for details.'
38
+ else
39
+ puts 'Failed to start Lightpanda. Make sure the path is correct: /Users/aileron/bin/lightpanda'
40
+ end
41
+ false
42
+ end
43
+
44
+ # Main script
45
+ host = '127.0.0.1'
46
+ port = 9222
47
+
48
+ if lightpanda_running?(host, port)
49
+ puts "✅ Lightpanda is already running at http://#{host}:#{port}"
50
+ else
51
+ puts "❌ Lightpanda is not running at http://#{host}:#{port}"
52
+
53
+ if ARGV.include?('--start') || ARGV.include?('-s')
54
+ start_lightpanda(host, port)
55
+ else
56
+ puts 'Run this script with --start or -s option to start Lightpanda automatically:'
57
+ puts " #{$PROGRAM_NAME} --start"
58
+ end
59
+ end
@@ -0,0 +1,48 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Test connecting to existing Chrome instance at 127.0.0.1:9222
4
+
5
+ begin
6
+ require 'ferrum'
7
+ puts 'Successfully loaded Ferrum'
8
+ rescue LoadError => e
9
+ puts "Ferrum is not installed: #{e.message}"
10
+ puts "Run 'gem install ferrum' to install it"
11
+ exit 1
12
+ end
13
+
14
+ puts 'Testing connection to Chrome at 127.0.0.1:9222'
15
+ puts '=============================================='
16
+
17
+ begin
18
+ # Connect to the remote Chrome instance
19
+ # Note: We're setting process: false to prevent launching a new browser
20
+ browser = Ferrum::Browser.new(
21
+ url: 'http://127.0.0.1:9222',
22
+ process: false
23
+ )
24
+
25
+ # Access a test URL
26
+ url = 'https://example.com'
27
+ puts "Accessing: #{url}"
28
+ browser.goto(url)
29
+
30
+ # Get page title
31
+ title = browser.title
32
+ puts "Page title: #{title}"
33
+
34
+ # Clean up browser connection (but don't close Chrome)
35
+ browser.quit
36
+
37
+ puts "\nSuccess! Connected to Chrome at 127.0.0.1:9222"
38
+ rescue StandardError => e
39
+ puts "Error: #{e.message}"
40
+ puts e.backtrace.join("\n")
41
+
42
+ puts "\nTroubleshooting tips:"
43
+ puts '1. Make sure Chrome is running with remote debugging enabled'
44
+ puts '2. Verify the command: /Users/aileron/bin/lightpanda serve --host 127.0.0.1 --port 9222'
45
+ puts '3. Check if you can access http://127.0.0.1:9222/json/version in your browser'
46
+ end
47
+
48
+ puts "\nTest completed"