lightpanda 0.0.1 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,121 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require "concurrent-ruby"
5
+
6
+ require_relative "client/web_socket"
7
+ require_relative "client/subscriber"
8
+
9
+ module Lightpanda
10
+ class Client
11
+ attr_reader :ws_url, :options
12
+
13
+ def initialize(ws_url, options)
14
+ @ws_url = ws_url
15
+ @options = options
16
+ @ws = WebSocket.new(ws_url, options)
17
+ @command_id = 0
18
+ @pendings = Concurrent::Hash.new
19
+ @subscriber = Subscriber.new
20
+ @mutex = Mutex.new
21
+
22
+ start_message_thread
23
+ end
24
+
25
+ def command(method, params = {}, async: false, session_id: nil)
26
+ message = build_message(method, params, session_id: session_id)
27
+
28
+ if async
29
+ @ws.send_message(JSON.generate(message))
30
+ return true
31
+ end
32
+
33
+ pending = Concurrent::IVar.new
34
+ @pendings[message[:id]] = pending
35
+
36
+ @ws.send_message(JSON.generate(message))
37
+
38
+ response = pending.value!(@options.timeout)
39
+ raise TimeoutError, "Command #{method} timed out after #{@options.timeout}s" if response.nil?
40
+
41
+ handle_error(response) if response["error"]
42
+
43
+ response["result"]
44
+ ensure
45
+ @pendings.delete(message[:id]) if message
46
+ end
47
+
48
+ def on(event, &)
49
+ @subscriber.subscribe(event, &)
50
+ end
51
+
52
+ def off(event, block = nil)
53
+ @subscriber.unsubscribe(event, block)
54
+ end
55
+
56
+ def close
57
+ @running = false
58
+ @message_thread&.kill
59
+ @ws&.close
60
+ @subscriber.clear
61
+ @pendings.clear
62
+ end
63
+
64
+ def closed?
65
+ @ws.closed?
66
+ end
67
+
68
+ private
69
+
70
+ def build_message(method, params, session_id: nil)
71
+ id = next_command_id
72
+ message = { id: id, method: method, params: params }
73
+ message[:sessionId] = session_id if session_id
74
+
75
+ message
76
+ end
77
+
78
+ def next_command_id
79
+ @mutex.synchronize { @command_id += 1 }
80
+ end
81
+
82
+ def start_message_thread
83
+ @running = true
84
+
85
+ @message_thread = Thread.new do
86
+ Thread.current.abort_on_exception = true
87
+
88
+ while @running && !@ws.closed?
89
+ message = @ws.messages.pop
90
+ next unless message
91
+
92
+ handle_message(message)
93
+ end
94
+ end
95
+ end
96
+
97
+ def handle_message(message)
98
+ if message["id"]
99
+ pending = @pendings[message["id"]]
100
+ pending&.set(message)
101
+ elsif message["method"]
102
+ @subscriber.dispatch(message["method"], message["params"])
103
+ end
104
+ end
105
+
106
+ def handle_error(response)
107
+ error = response["error"]
108
+ message = error["message"]
109
+ error["code"]
110
+
111
+ case message
112
+ when /No node with given id found/i
113
+ raise NodeNotFoundError, message
114
+ when /Cannot find context with specified id/i, /Execution context was destroyed/i
115
+ raise NoExecutionContextError, message
116
+ else
117
+ raise BrowserError, error
118
+ end
119
+ end
120
+ end
121
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Lightpanda
4
+ class Configuration
5
+ attr_accessor :binary_path
6
+
7
+ def initialize
8
+ @binary_path = nil
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,47 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Lightpanda
4
+ class Cookies
5
+ attr_reader :browser
6
+
7
+ def initialize(browser)
8
+ @browser = browser
9
+ end
10
+
11
+ def all
12
+ result = browser.command("Network.getAllCookies")
13
+
14
+ result["cookies"] || []
15
+ end
16
+
17
+ def get(name)
18
+ all.find { |cookie| cookie["name"] == name }
19
+ end
20
+
21
+ def set(name:, value:, domain: nil, path: "/", secure: false, http_only: false, expires: nil)
22
+ params = {
23
+ name: name,
24
+ value: value,
25
+ path: path,
26
+ secure: secure,
27
+ httpOnly: http_only,
28
+ }
29
+
30
+ params[:domain] = domain if domain
31
+ params[:expires] = expires.to_i if expires
32
+
33
+ browser.command("Network.setCookie", **params)
34
+ end
35
+
36
+ def remove(name:, domain: nil, path: "/")
37
+ params = { name: name, path: path }
38
+ params[:domain] = domain if domain
39
+
40
+ browser.command("Network.deleteCookies", **params)
41
+ end
42
+
43
+ def clear
44
+ browser.command("Network.clearBrowserCookies")
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Lightpanda
4
+ class Error < StandardError; end
5
+
6
+ class ProcessTimeoutError < Error; end
7
+ class BinaryNotFoundError < Error; end
8
+ class BinaryError < Error; end
9
+ class UnsupportedPlatformError < Error; end
10
+
11
+ class DeadBrowserError < Error; end
12
+ class TimeoutError < Error; end
13
+
14
+ class BrowserError < Error
15
+ attr_reader :response
16
+
17
+ def initialize(response)
18
+ @response = response
19
+ super(response["message"])
20
+ end
21
+ end
22
+
23
+ class JavaScriptError < Error
24
+ attr_reader :class_name, :message
25
+
26
+ def initialize(response)
27
+ @class_name = response.dig("exceptionDetails", "exception", "className")
28
+ @message = response.dig("exceptionDetails", "exception",
29
+ "description") || response.dig("exceptionDetails", "text")
30
+
31
+ super(@message)
32
+ end
33
+ end
34
+
35
+ class NodeNotFoundError < Error; end
36
+ class NoExecutionContextError < Error; end
37
+
38
+ class NoSuchPageError < Error; end
39
+ class StatusError < Error; end
40
+ end
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Lightpanda
4
+ class Network
5
+ attr_reader :browser
6
+
7
+ def initialize(browser)
8
+ @browser = browser
9
+ @traffic = []
10
+ @enabled = false
11
+ end
12
+
13
+ def enable
14
+ return if @enabled
15
+
16
+ browser.command("Network.enable")
17
+ subscribe
18
+ @enabled = true
19
+ end
20
+
21
+ def disable
22
+ return unless @enabled
23
+
24
+ browser.command("Network.disable")
25
+ @enabled = false
26
+ end
27
+
28
+ def traffic
29
+ @traffic.dup
30
+ end
31
+
32
+ def clear
33
+ @traffic.clear
34
+ end
35
+
36
+ def wait_for_idle(timeout: 5, connections: 0) # rubocop:disable Naming/PredicateMethod
37
+ started_at = Time.now
38
+
39
+ while Time.now - started_at < timeout
40
+ pending = @traffic.count { |t| t[:response].nil? }
41
+ return true if pending <= connections
42
+
43
+ sleep 0.1
44
+ end
45
+
46
+ false
47
+ end
48
+
49
+ private
50
+
51
+ def subscribe
52
+ browser.on("Network.requestWillBeSent") do |params|
53
+ @traffic << {
54
+ request_id: params["requestId"],
55
+ url: params.dig("request", "url"),
56
+ method: params.dig("request", "method"),
57
+ timestamp: params["timestamp"],
58
+ response: nil,
59
+ }
60
+ end
61
+
62
+ browser.on("Network.responseReceived") do |params|
63
+ request = @traffic.find { |t| t[:request_id] == params["requestId"] }
64
+
65
+ next unless request
66
+
67
+ request[:response] = {
68
+ status: params.dig("response", "status"),
69
+ headers: params.dig("response", "headers"),
70
+ mime_type: params.dig("response", "mimeType"),
71
+ }
72
+ end
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Lightpanda
4
+ class Options
5
+ DEFAULT_TIMEOUT = ENV.fetch("LIGHTPANDA_DEFAULT_TIMEOUT", 5).to_i
6
+ DEFAULT_PROCESS_TIMEOUT = ENV.fetch("LIGHTPANDA_PROCESS_TIMEOUT", 10).to_i
7
+ DEFAULT_HOST = "127.0.0.1"
8
+ DEFAULT_PORT = 9222
9
+ DEFAULT_WINDOW_SIZE = [1024, 768].freeze
10
+
11
+ attr_accessor :host, :port, :timeout, :process_timeout, :window_size, :browser_path, :headless
12
+ attr_writer :ws_url
13
+
14
+ def initialize(options = {})
15
+ @host = options.fetch(:host, DEFAULT_HOST)
16
+ @port = options.fetch(:port, DEFAULT_PORT)
17
+ @timeout = options.fetch(:timeout, DEFAULT_TIMEOUT)
18
+ @process_timeout = options.fetch(:process_timeout, DEFAULT_PROCESS_TIMEOUT)
19
+ @window_size = options.fetch(:window_size, DEFAULT_WINDOW_SIZE)
20
+ @browser_path = options[:browser_path]
21
+ @headless = options.fetch(:headless, true)
22
+ @ws_url = options[:ws_url]
23
+ end
24
+
25
+ def ws_url
26
+ @ws_url || "ws://#{host}:#{port}/"
27
+ end
28
+
29
+ def ws_url?
30
+ !@ws_url.nil?
31
+ end
32
+
33
+ def to_h
34
+ {
35
+ host: host,
36
+ port: port,
37
+ timeout: timeout,
38
+ process_timeout: process_timeout,
39
+ window_size: window_size,
40
+ browser_path: browser_path,
41
+ headless: headless,
42
+ ws_url: ws_url,
43
+ }
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,121 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Lightpanda
4
+ class Process
5
+ READY_PATTERN = /server running.*address=(\d+\.\d+\.\d+\.\d+:\d+)/
6
+
7
+ attr_reader :pid, :ws_url
8
+
9
+ def initialize(options)
10
+ @options = options
11
+ @pid = nil
12
+ @ws_url = nil
13
+ @stdout_r = nil
14
+ @stdout_w = nil
15
+ @stderr_r = nil
16
+ @stderr_w = nil
17
+ end
18
+
19
+ def start
20
+ binary_path = @options.browser_path || Binary.find_or_download
21
+
22
+ raise BinaryNotFoundError, "Lightpanda binary not found" unless binary_path
23
+
24
+ @stdout_r, @stdout_w = IO.pipe
25
+ @stderr_r, @stderr_w = IO.pipe
26
+
27
+ @pid = spawn_process(binary_path)
28
+
29
+ @stdout_w.close
30
+ @stderr_w.close
31
+
32
+ wait_for_ready
33
+ end
34
+
35
+ def stop
36
+ return unless @pid
37
+
38
+ begin
39
+ ::Process.kill("TERM", @pid)
40
+ ::Process.wait(@pid)
41
+ rescue Errno::ESRCH, Errno::ECHILD
42
+ # Process already dead
43
+ end
44
+
45
+ cleanup_pipes
46
+ @pid = nil
47
+ end
48
+
49
+ def alive?
50
+ return false unless @pid
51
+
52
+ ::Process.kill(0, @pid)
53
+ true
54
+ rescue Errno::ESRCH, Errno::EPERM
55
+ false
56
+ end
57
+
58
+ private
59
+
60
+ def spawn_process(binary_path)
61
+ args = build_args
62
+
63
+ ::Process.spawn(
64
+ binary_path, *args,
65
+ out: @stdout_w,
66
+ err: @stderr_w,
67
+ pgroup: true
68
+ )
69
+ end
70
+
71
+ def build_args
72
+ [
73
+ "serve",
74
+ "--host",
75
+ @options.host.to_s,
76
+ "--port",
77
+ @options.port.to_s,
78
+ "--log_level",
79
+ "info",
80
+ ]
81
+ end
82
+
83
+ def wait_for_ready
84
+ started_at = Time.now
85
+ output = +""
86
+
87
+ catch(:ready) do
88
+ while Time.now - started_at < @options.process_timeout
89
+ ready = IO.select([@stdout_r, @stderr_r], nil, nil, 0.1)
90
+
91
+ next unless ready
92
+
93
+ ready[0].each do |io|
94
+ chunk = io.read_nonblock(1024)
95
+ output << chunk
96
+
97
+ if (match = output.match(READY_PATTERN))
98
+ @ws_url = "ws://#{match[1]}/"
99
+ throw(:ready)
100
+ end
101
+ rescue IO::WaitReadable
102
+ # No data available yet
103
+ rescue EOFError
104
+ # Pipe closed
105
+ end
106
+ end
107
+
108
+ stop
109
+
110
+ raise ProcessTimeoutError,
111
+ "Lightpanda failed to start within #{@options.process_timeout} seconds.\nOutput: #{output}"
112
+ end
113
+ end
114
+
115
+ def cleanup_pipes
116
+ [@stdout_r, @stdout_w, @stderr_r, @stderr_w].each do |pipe|
117
+ pipe&.close unless pipe&.closed?
118
+ end
119
+ end
120
+ end
121
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Lightpanda
4
- VERSION = "0.0.1"
4
+ VERSION = "0.1.1"
5
5
  end
data/lib/lightpanda.rb CHANGED
@@ -1,8 +1,28 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative "lightpanda/version"
4
+ require_relative "lightpanda/errors"
5
+ require_relative "lightpanda/configuration"
6
+ require_relative "lightpanda/options"
7
+ require_relative "lightpanda/binary"
8
+ require_relative "lightpanda/process"
9
+ require_relative "lightpanda/client"
10
+ require_relative "lightpanda/network"
11
+ require_relative "lightpanda/cookies"
12
+ require_relative "lightpanda/browser"
4
13
 
5
14
  module Lightpanda
6
- class Error < StandardError; end
7
- # Your code goes here...
15
+ class << self
16
+ def new(**)
17
+ Browser.new(**)
18
+ end
19
+
20
+ def configuration
21
+ @configuration ||= Configuration.new
22
+ end
23
+
24
+ def configure
25
+ yield(configuration)
26
+ end
27
+ end
8
28
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lightpanda
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Marco Roth
@@ -29,39 +29,56 @@ dependencies:
29
29
  requirements:
30
30
  - - "~>"
31
31
  - !ruby/object:Gem::Version
32
- version: '1.1'
32
+ version: '1.3'
33
33
  type: :runtime
34
34
  prerelease: false
35
35
  version_requirements: !ruby/object:Gem::Requirement
36
36
  requirements:
37
37
  - - "~>"
38
38
  - !ruby/object:Gem::Version
39
- version: '1.1'
39
+ version: '1.3'
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: websocket-driver
42
42
  requirement: !ruby/object:Gem::Requirement
43
43
  requirements:
44
44
  - - "~>"
45
45
  - !ruby/object:Gem::Version
46
- version: '0.7'
46
+ version: '0.8'
47
47
  type: :runtime
48
48
  prerelease: false
49
49
  version_requirements: !ruby/object:Gem::Requirement
50
50
  requirements:
51
51
  - - "~>"
52
52
  - !ruby/object:Gem::Version
53
- version: '0.7'
54
- description: High-level Ruby API to control Lightpanda browser. Lightpanda is a fast,
55
- lightweight headless browser built for web automation, AI agents, and scraping.
53
+ version: '0.8'
54
+ description: High-level Ruby API to control the Lightpanda browser. Lightpanda is
55
+ a fast, lightweight headless browser built for web automation, AI agents, and scraping.
56
56
  This gem provides CDP-based browser control similar to Ferrum.
57
57
  email:
58
58
  - marco.roth@intergga.ch
59
- executables: []
59
+ executables:
60
+ - lightpanda
60
61
  extensions: []
61
62
  extra_rdoc_files: []
62
63
  files:
64
+ - LICENSE.txt
63
65
  - README.md
66
+ - exe/lightpanda
64
67
  - lib/lightpanda.rb
68
+ - lib/lightpanda/binary.rb
69
+ - lib/lightpanda/browser.rb
70
+ - lib/lightpanda/capybara.rb
71
+ - lib/lightpanda/capybara/driver.rb
72
+ - lib/lightpanda/capybara/node.rb
73
+ - lib/lightpanda/client.rb
74
+ - lib/lightpanda/client/subscriber.rb
75
+ - lib/lightpanda/client/web_socket.rb
76
+ - lib/lightpanda/configuration.rb
77
+ - lib/lightpanda/cookies.rb
78
+ - lib/lightpanda/errors.rb
79
+ - lib/lightpanda/network.rb
80
+ - lib/lightpanda/options.rb
81
+ - lib/lightpanda/process.rb
65
82
  - lib/lightpanda/version.rb
66
83
  homepage: https://github.com/marcoroth/lightpanda-ruby
67
84
  licenses:
@@ -78,14 +95,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
78
95
  requirements:
79
96
  - - ">="
80
97
  - !ruby/object:Gem::Version
81
- version: 3.1.0
98
+ version: '3.2'
82
99
  required_rubygems_version: !ruby/object:Gem::Requirement
83
100
  requirements:
84
101
  - - ">="
85
102
  - !ruby/object:Gem::Version
86
103
  version: '0'
87
104
  requirements: []
88
- rubygems_version: 3.6.9
105
+ rubygems_version: 4.0.3
89
106
  specification_version: 4
90
107
  summary: Ruby client for the Lightpanda headless browser via Chrome DevTools Protocol.
91
108
  test_files: []