bidi2pdf 0.1.6 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +4 -1
- data/CHANGELOG.md +63 -8
- data/README.md +28 -0
- data/docker/Dockerfile +1 -1
- data/docker/Dockerfile.chromedriver +9 -2
- data/docker/Dockerfile.slim +2 -2
- data/lib/bidi2pdf/bidi/browser_console_logger.rb +92 -0
- data/lib/bidi2pdf/bidi/browser_tab.rb +431 -41
- data/lib/bidi2pdf/bidi/client.rb +85 -23
- data/lib/bidi2pdf/bidi/command_manager.rb +46 -60
- data/lib/bidi2pdf/bidi/commands/base.rb +39 -1
- data/lib/bidi2pdf/bidi/commands/browser_remove_user_context.rb +27 -0
- data/lib/bidi2pdf/bidi/commands/browsing_context_print.rb +4 -0
- data/lib/bidi2pdf/bidi/commands/print_parameters_validator.rb +5 -0
- data/lib/bidi2pdf/bidi/commands.rb +1 -0
- data/lib/bidi2pdf/bidi/connection_manager.rb +3 -9
- data/lib/bidi2pdf/bidi/event_manager.rb +2 -2
- data/lib/bidi2pdf/bidi/interceptor.rb +1 -1
- data/lib/bidi2pdf/bidi/js_logger_helper.rb +16 -0
- data/lib/bidi2pdf/bidi/logger_events.rb +25 -45
- data/lib/bidi2pdf/bidi/navigation_failed_events.rb +41 -0
- data/lib/bidi2pdf/bidi/network_event.rb +15 -0
- data/lib/bidi2pdf/bidi/network_event_formatters/network_event_console_formatter.rb +4 -3
- data/lib/bidi2pdf/bidi/network_events.rb +27 -17
- data/lib/bidi2pdf/bidi/session.rb +123 -13
- data/lib/bidi2pdf/bidi/user_context.rb +62 -0
- data/lib/bidi2pdf/bidi/web_socket_dispatcher.rb +7 -7
- data/lib/bidi2pdf/chromedriver_manager.rb +48 -21
- data/lib/bidi2pdf/cli.rb +10 -2
- data/lib/bidi2pdf/dsl.rb +33 -0
- data/lib/bidi2pdf/launcher.rb +30 -0
- data/lib/bidi2pdf/notifications/event.rb +52 -0
- data/lib/bidi2pdf/notifications/instrumenter.rb +65 -0
- data/lib/bidi2pdf/notifications/logging_subscriber.rb +136 -0
- data/lib/bidi2pdf/notifications.rb +78 -0
- data/lib/bidi2pdf/session_runner.rb +35 -3
- data/lib/bidi2pdf/test_helpers/matchers/contains_pdf_text.rb +50 -0
- data/lib/bidi2pdf/test_helpers/matchers/have_pdf_page_count.rb +50 -0
- data/lib/bidi2pdf/test_helpers/matchers/match_pdf_text.rb +45 -0
- data/lib/bidi2pdf/test_helpers/pdf_reader_utils.rb +89 -0
- data/lib/bidi2pdf/test_helpers/pdf_text_sanitizer.rb +232 -0
- data/lib/bidi2pdf/test_helpers/testcontainers/chromedriver_container.rb +87 -0
- data/lib/bidi2pdf/test_helpers.rb +13 -0
- data/lib/bidi2pdf/verbose_logger.rb +79 -0
- data/lib/bidi2pdf/version.rb +1 -1
- data/lib/bidi2pdf.rb +131 -10
- data/sig/bidi2pdf/bidi/client.rbs +1 -1
- metadata +67 -4
- data/lib/bidi2pdf/utils.rb +0 -15
@@ -5,13 +5,17 @@ require "securerandom"
|
|
5
5
|
|
6
6
|
module Bidi2pdf
|
7
7
|
class ChromedriverManager
|
8
|
-
|
8
|
+
include Chromedriver::Binary::Platform
|
9
9
|
|
10
|
-
|
10
|
+
attr_reader :port, :pid, :started, :headless, :chrome_args, :shutdown_mutex
|
11
|
+
|
12
|
+
def initialize(port: 0, headless: true, chrome_args: Bidi::Session::DEFAULT_CHROME_ARGS)
|
11
13
|
@port = port
|
12
14
|
@headless = headless
|
13
15
|
@session = nil
|
14
16
|
@started = false
|
17
|
+
@chrome_args = chrome_args
|
18
|
+
@shutdown_mutex ||= Mutex.new
|
15
19
|
end
|
16
20
|
|
17
21
|
def start
|
@@ -23,11 +27,7 @@ module Bidi2pdf
|
|
23
27
|
cmd = build_cmd
|
24
28
|
Bidi2pdf.logger.info "Starting Chromedriver with command: #{cmd}"
|
25
29
|
|
26
|
-
|
27
|
-
@pid = Process.spawn(cmd, out: w, err: w)
|
28
|
-
w.close # close writer in parent
|
29
|
-
|
30
|
-
parse_port_from_output(r)
|
30
|
+
spawn_process(cmd)
|
31
31
|
|
32
32
|
Bidi2pdf.logger.info "Started Chromedriver on port #{@port}, PID #{@pid}"
|
33
33
|
wait_until_chromedriver_ready
|
@@ -40,7 +40,7 @@ module Bidi2pdf
|
|
40
40
|
def session
|
41
41
|
return unless @started
|
42
42
|
|
43
|
-
@session ||= Bidi::Session.new(session_url: session_url, headless: @headless)
|
43
|
+
@session ||= Bidi::Session.new(session_url: session_url, headless: @headless, chrome_args: @chrome_args)
|
44
44
|
end
|
45
45
|
|
46
46
|
def session_url
|
@@ -50,27 +50,54 @@ module Bidi2pdf
|
|
50
50
|
end
|
51
51
|
|
52
52
|
def stop(timeout: 5)
|
53
|
-
|
53
|
+
shutdown_mutex.synchronize do
|
54
|
+
return unless @pid
|
54
55
|
|
55
|
-
|
56
|
+
@started = false
|
56
57
|
|
57
|
-
|
58
|
+
close_session
|
58
59
|
|
59
|
-
|
60
|
+
debug_show_all_children
|
60
61
|
|
61
|
-
|
62
|
+
old_childprocesses = term_chromedriver
|
62
63
|
|
63
|
-
|
64
|
+
detect_zombie_processes old_childprocesses
|
64
65
|
|
65
|
-
|
66
|
+
return unless process_alive?
|
66
67
|
|
67
|
-
|
68
|
-
|
69
|
-
|
68
|
+
kill_chromedriver timeout: timeout
|
69
|
+
ensure
|
70
|
+
@pid = nil
|
71
|
+
@started = false
|
72
|
+
end
|
70
73
|
end
|
71
74
|
|
72
75
|
private
|
73
76
|
|
77
|
+
def spawn_process(cmd)
|
78
|
+
r, w = IO.pipe
|
79
|
+
|
80
|
+
options = {
|
81
|
+
out: w,
|
82
|
+
err: w,
|
83
|
+
close_others: true,
|
84
|
+
chdir: Dir.tmpdir
|
85
|
+
}
|
86
|
+
|
87
|
+
if platform == "win"
|
88
|
+
options[:new_pgroup] = true
|
89
|
+
else
|
90
|
+
options[:pgroup] = true
|
91
|
+
end
|
92
|
+
|
93
|
+
env = {}
|
94
|
+
|
95
|
+
@pid = Process.spawn(env, cmd, **options)
|
96
|
+
w.close # close writer in parent
|
97
|
+
|
98
|
+
parse_port_from_output(r)
|
99
|
+
end
|
100
|
+
|
74
101
|
def detect_zombie_processes(old_childprocesses)
|
75
102
|
Bidi2pdf.logger.debug "Old child processes for #{@pid}: #{old_childprocesses.map(&:pid).join(", ")}"
|
76
103
|
|
@@ -99,7 +126,7 @@ module Bidi2pdf
|
|
99
126
|
Bidi2pdf::ProcessTree.new(@pid).traverse do |process, level|
|
100
127
|
indent = " " * level
|
101
128
|
prefix = level.zero? ? "" : "└─ "
|
102
|
-
Bidi2pdf.logger.
|
129
|
+
Bidi2pdf.logger.debug2 "#{indent}#{prefix}PID #{process.pid} (#{process.name})"
|
103
130
|
end
|
104
131
|
end
|
105
132
|
|
@@ -114,7 +141,7 @@ module Bidi2pdf
|
|
114
141
|
Bidi2pdf::ProcessTree.new(@pid).children(@pid).tap do |_child_processes|
|
115
142
|
Bidi2pdf.logger.info "Stopping Chromedriver (PID #{@pid})"
|
116
143
|
|
117
|
-
Process.kill("TERM",
|
144
|
+
Process.kill("TERM", -@pid) # - meanskill linux pgroup
|
118
145
|
end
|
119
146
|
rescue Errno::ESRCH
|
120
147
|
Bidi2pdf.logger.debug "Process already gone"
|
@@ -159,7 +186,7 @@ module Bidi2pdf
|
|
159
186
|
def parse_port_from_output(io, timeout: 5)
|
160
187
|
Thread.new do
|
161
188
|
io.each_line do |line|
|
162
|
-
Bidi2pdf.logger.
|
189
|
+
Bidi2pdf.logger.debug1 line.chomp
|
163
190
|
|
164
191
|
next unless line =~ /ChromeDriver was started successfully on port (\d+)/
|
165
192
|
|
data/lib/bidi2pdf/cli.rb
CHANGED
@@ -51,6 +51,11 @@ module Bidi2pdf
|
|
51
51
|
option :log_level,
|
52
52
|
type: :string,
|
53
53
|
default: "info", enum: %w[debug info warn error fatal unknown], desc: "Set log level"
|
54
|
+
verbosity_levels = Bidi2pdf::VerboseLogger::VERBOSITY_LEVELS.keys.sort_by { |k| Bidi2pdf::VerboseLogger::VERBOSITY_LEVELS[k] }
|
55
|
+
option :verbosity,
|
56
|
+
type: :string,
|
57
|
+
default: verbosity_levels.first, enum: Bidi2pdf::VerboseLogger::VERBOSITY_LEVELS.keys.sort_by { |k| Bidi2pdf::VerboseLogger::VERBOSITY_LEVELS[k] }.map(&:to_s),
|
58
|
+
desc: "Set debug verbosity level", aliases: "-v"
|
54
59
|
option :log_network_traffic, type: :boolean, default: false, desc: "Log network traffic", aliases: "-n"
|
55
60
|
option :network_log_format,
|
56
61
|
type: :string,
|
@@ -216,12 +221,12 @@ module Bidi2pdf
|
|
216
221
|
end
|
217
222
|
end
|
218
223
|
|
219
|
-
# rubocop:enable Metrics/AbcSize
|
220
|
-
|
221
224
|
def configure
|
222
225
|
Bidi2pdf.configure do |config|
|
223
226
|
config.logger.level = log_level
|
224
227
|
|
228
|
+
config.logger.verbosity = merged_options[:verbosity]
|
229
|
+
|
225
230
|
config.network_events_logger.level = Logger::INFO if merged_options[:log_network_traffic]
|
226
231
|
|
227
232
|
config.default_timeout = merged_options[:default_timeout]
|
@@ -232,6 +237,8 @@ module Bidi2pdf
|
|
232
237
|
end
|
233
238
|
end
|
234
239
|
|
240
|
+
# rubocop: enable Metrics/MethodLength
|
241
|
+
|
235
242
|
def log_level
|
236
243
|
case merged_options[:log_level]
|
237
244
|
when "debug" then Logger::DEBUG
|
@@ -266,3 +273,4 @@ module Bidi2pdf
|
|
266
273
|
end
|
267
274
|
end
|
268
275
|
end
|
276
|
+
# rubocop:enable Metrics/AbcSize
|
data/lib/bidi2pdf/dsl.rb
CHANGED
@@ -4,7 +4,39 @@ require "bidi2pdf"
|
|
4
4
|
|
5
5
|
module Bidi2pdf
|
6
6
|
module DSL
|
7
|
+
# Provides a DSL for managing browser sessions and tabs
|
8
|
+
# using the Bidi2pdf library. This module includes a method to create and manage
|
9
|
+
# browser tabs within a controlled session.
|
10
|
+
|
7
11
|
# rubocop: disable Metrics/AbcSize
|
12
|
+
#
|
13
|
+
# Executes a block of code within the context of a browser tab.
|
14
|
+
#
|
15
|
+
# This method handles the setup and teardown of a browser session, user context,
|
16
|
+
# browser window, and tab. It ensures that resources are properly cleaned up
|
17
|
+
# after the block is executed.
|
18
|
+
#
|
19
|
+
# @param [String, nil] remote_browser_url The URL of a remote browser to connect to.
|
20
|
+
# If provided, the session will connect to this browser in headless mode.
|
21
|
+
# @param [Integer] port The port to use for the local browser session. Defaults to 0 (chooses a random port).
|
22
|
+
# @param [Boolean] headless Whether to run the browser in headless mode. Defaults to true.
|
23
|
+
# @param [Array<String>] chrome_args Additional arguments to pass to the Chrome browser.
|
24
|
+
# Defaults to the `DEFAULT_CHROME_ARGS` from the `Bidi2pdf::Bidi::Session` class.
|
25
|
+
#
|
26
|
+
# @yield [tab] The browser tab created within the session.
|
27
|
+
# @yieldparam [Object] tab The browser tab object.
|
28
|
+
#
|
29
|
+
# @example Using a local browser session
|
30
|
+
# Bidi2pdf::DSL.with_tab(port: 9222, headless: false) do |tab|
|
31
|
+
# # Perform actions with the tab
|
32
|
+
# end
|
33
|
+
#
|
34
|
+
# @example Using a remote browser session
|
35
|
+
# Bidi2pdf::DSL.with_tab(remote_browser_url: "http://remote-browser:9222/session") do |tab|
|
36
|
+
# # Perform actions with the tab
|
37
|
+
# end
|
38
|
+
#
|
39
|
+
# @return [void]
|
8
40
|
def self.with_tab(remote_browser_url: nil, port: 0, headless: true, chrome_args: Bidi2pdf::Bidi::Session::DEFAULT_CHROME_ARGS.dup)
|
9
41
|
manager = nil
|
10
42
|
session = nil
|
@@ -35,6 +67,7 @@ module Bidi2pdf
|
|
35
67
|
ensure
|
36
68
|
tab&.close
|
37
69
|
window&.close
|
70
|
+
context&.close
|
38
71
|
session&.close
|
39
72
|
manager&.stop
|
40
73
|
end
|
data/lib/bidi2pdf/launcher.rb
CHANGED
@@ -5,6 +5,36 @@ require_relative "session_runner"
|
|
5
5
|
require_relative "bidi/session"
|
6
6
|
|
7
7
|
module Bidi2pdf
|
8
|
+
# Represents a launcher for managing browser sessions and executing tasks
|
9
|
+
# using the Bidi2pdf library. This class handles the setup and teardown
|
10
|
+
# of browser sessions, as well as the execution of tasks within those sessions.
|
11
|
+
#
|
12
|
+
# @example Launching a session
|
13
|
+
# launcher = Bidi2pdf::Launcher.new(
|
14
|
+
# url: "http://example.com",
|
15
|
+
# inputfile: "input.pdf",
|
16
|
+
# output: "output.pdf",
|
17
|
+
# cookies: [],
|
18
|
+
# headers: {},
|
19
|
+
# auth: nil,
|
20
|
+
# headless: true
|
21
|
+
# )
|
22
|
+
# launcher.launch
|
23
|
+
# launcher.stop
|
24
|
+
#
|
25
|
+
# @param [String] url The URL to navigate to in the browser session.
|
26
|
+
# @param [String] inputfile The path to the input file to be processed.
|
27
|
+
# @param [String] output The path to the output file to be generated.
|
28
|
+
# @param [Array<Hash>] cookies An array of cookies to set in the browser session.
|
29
|
+
# @param [Hash] headers A hash of HTTP headers to include in the browser session.
|
30
|
+
# @param [Hash, nil] auth Authentication credentials (e.g., username and password).
|
31
|
+
# @param [Boolean] headless Whether to run the browser in headless mode. Defaults to true.
|
32
|
+
# @param [Integer] port The port to use for the browser session. Defaults to 0.
|
33
|
+
# @param [Boolean] wait_window_loaded Whether to wait for the window to fully load. Defaults to false.
|
34
|
+
# @param [Boolean] wait_network_idle Whether to wait for the network to become idle. Defaults to false.
|
35
|
+
# @param [Hash] print_options Options for printing the page. Defaults to an empty hash.
|
36
|
+
# @param [String, nil] remote_browser_url The URL of a remote browser to connect to. Defaults to nil.
|
37
|
+
# @param [Symbol] network_log_format The format for network logs. Defaults to :console.
|
8
38
|
class Launcher
|
9
39
|
# rubocop:disable Metrics/ParameterLists
|
10
40
|
def initialize(url:, inputfile:, output:, cookies:, headers:, auth:, headless: true, port: 0, wait_window_loaded: false,
|
@@ -0,0 +1,52 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Bidi2pdf
|
4
|
+
# rubocop: disable Lint/RescueException
|
5
|
+
module Notifications
|
6
|
+
class Event
|
7
|
+
attr_reader :name, :transaction_id
|
8
|
+
attr_accessor :payload
|
9
|
+
|
10
|
+
def initialize(name, start, ending, transaction_id, payload)
|
11
|
+
@name = name
|
12
|
+
@payload = payload
|
13
|
+
@time = start ? start.to_f * 1_000.0 : start
|
14
|
+
@transaction_id = transaction_id
|
15
|
+
@end = ending ? ending.to_f * 1_000.0 : ending
|
16
|
+
end
|
17
|
+
|
18
|
+
def record # :nodoc:
|
19
|
+
start!
|
20
|
+
begin
|
21
|
+
yield payload if block_given?
|
22
|
+
rescue Exception => e
|
23
|
+
payload[:exception] = [e.class.name, e.message]
|
24
|
+
payload[:exception_object] = e
|
25
|
+
raise e
|
26
|
+
ensure
|
27
|
+
finish!
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def start! = @time = now
|
32
|
+
|
33
|
+
def finish! = @end = now
|
34
|
+
|
35
|
+
def duration = @end - @time
|
36
|
+
|
37
|
+
def time
|
38
|
+
@time / 1000.0 if @time
|
39
|
+
end
|
40
|
+
|
41
|
+
def end
|
42
|
+
@end / 1000.0 if @end
|
43
|
+
end
|
44
|
+
|
45
|
+
private
|
46
|
+
|
47
|
+
def now = Process.clock_gettime(Process::CLOCK_MONOTONIC, :float_millisecond)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
# rubocop: enable Lint/RescueException
|
@@ -0,0 +1,65 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "securerandom"
|
4
|
+
|
5
|
+
module Bidi2pdf
|
6
|
+
# This module provides a way to instrument events in the Bidi2pdf library.
|
7
|
+
# It it's heavyly inspired by ActiveSupport::Notifications.
|
8
|
+
# and thought to be used in a similar way.
|
9
|
+
# In Rails environment, ActiveSupport::Notifications should be use instead.
|
10
|
+
# via configuration: Bidi2pdf.notification_service = ActiveSupport::Notifications
|
11
|
+
|
12
|
+
# rubocop: disable Lint/RescueException, Lint/SuppressedException
|
13
|
+
module Notifications
|
14
|
+
class Instrumenter
|
15
|
+
attr_reader :id
|
16
|
+
|
17
|
+
def initialize
|
18
|
+
@id = SecureRandom.uuid
|
19
|
+
end
|
20
|
+
|
21
|
+
def notify(name, payload, &)
|
22
|
+
event = create_event(name, payload)
|
23
|
+
result = nil
|
24
|
+
begin
|
25
|
+
result = event.record(&)
|
26
|
+
rescue Exception => e
|
27
|
+
end
|
28
|
+
|
29
|
+
subscriber_exceptions = notify_subscribers(name, event)
|
30
|
+
|
31
|
+
raise Bidi2pdf::NotificationsError.new(subscriber_exceptions), cause: subscriber_exceptions.first if subscriber_exceptions.any?
|
32
|
+
raise e if e
|
33
|
+
|
34
|
+
result
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
def create_event(name, payload)
|
40
|
+
Event.new(name, nil, nil, @id, payload)
|
41
|
+
end
|
42
|
+
|
43
|
+
# rubocop:disable Style/CaseEquality
|
44
|
+
def notify_subscribers(name, event)
|
45
|
+
exceptions = []
|
46
|
+
|
47
|
+
Notifications.subscribers.each do |pattern, blocks|
|
48
|
+
next unless pattern === name
|
49
|
+
|
50
|
+
blocks.each do |subscriber|
|
51
|
+
subscriber.call(event)
|
52
|
+
rescue Exception => e
|
53
|
+
exceptions << e
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
exceptions
|
58
|
+
end
|
59
|
+
|
60
|
+
# rubocop:enable Style/CaseEquality
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
# rubocop: enable Lint/RescueException, Lint/SuppressedException
|
@@ -0,0 +1,136 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Bidi2pdf
|
4
|
+
module Notifications
|
5
|
+
# for reuse within ActiveSupport::LogSubscriber
|
6
|
+
module LoggingSubscriberActions
|
7
|
+
def handle_response(event)
|
8
|
+
payload = event.payload
|
9
|
+
|
10
|
+
if payload[:error]
|
11
|
+
logger.error "Received error: #{payload[:error].inspect} for cmd: #{payload[:id] || "-"}"
|
12
|
+
elsif !payload[:handled]
|
13
|
+
Bidi2pdf.logger.warn "Unknown response: #{payload[:data].inspect}"
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def send_cmd(event)
|
18
|
+
logger.debug "Sending command: #{event.payload[:cmd].method_name} id: ##{event.payload[:cmd_payload][:id]}"
|
19
|
+
|
20
|
+
logger.debug1 do
|
21
|
+
payload = redact_sensitive_fields(event.payload[:cmd_payload])
|
22
|
+
"Sending command: #{payload.inspect} (#{event.duration.round(1)}ms)"
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def send_cmd_and_wait(event)
|
27
|
+
return unless event.payload[:exception]
|
28
|
+
|
29
|
+
payload = redact_sensitive_fields(event.payload[:cmd]&.params || {})
|
30
|
+
logger.error "Error sending command: #{payload} (#{event.duration.round(1)}ms) - #{event.payload[:exception].inspect}"
|
31
|
+
end
|
32
|
+
|
33
|
+
def session_close(event)
|
34
|
+
return unless event.payload[:error]
|
35
|
+
|
36
|
+
logger.error "Session close error: #{event.payload[:error].inspect}, attempt: #{event.payload[:attempt]}, retry: #{event.payload[:retry]}"
|
37
|
+
end
|
38
|
+
|
39
|
+
# rubocop: disable Metrics/AbcSize
|
40
|
+
def network_event_received(event)
|
41
|
+
return unless logger.debug2?
|
42
|
+
|
43
|
+
msg = case event.payload[:method]
|
44
|
+
when "network.beforeRequestSent"
|
45
|
+
"Request url '#{event.payload[:url]}' started"
|
46
|
+
|
47
|
+
when "network.responseStarted"
|
48
|
+
nil
|
49
|
+
when "network.responseCompleted"
|
50
|
+
"Request url '#{event.payload[:url]}' completed"
|
51
|
+
when "network.fetchError"
|
52
|
+
"Request url '#{event.payload[:url]}' error."
|
53
|
+
else
|
54
|
+
"Unknown network event: #{event.payload[:method]} for url '#{event.payload[:url]}'"
|
55
|
+
end
|
56
|
+
|
57
|
+
logger.debug2 msg if msg
|
58
|
+
end
|
59
|
+
|
60
|
+
# rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
61
|
+
def network_idle(event)
|
62
|
+
return unless logger.info?
|
63
|
+
|
64
|
+
requests = event.payload[:requests]
|
65
|
+
transfered = requests.map { |request| request.bytes_received || 0 }.sum
|
66
|
+
status_counts = requests
|
67
|
+
.group_by { |evt| evt.http_status_code || 0 }
|
68
|
+
.transform_keys { |code| code.zero? || code.nil? ? "pending" : code.to_s }
|
69
|
+
.transform_values(&:count)
|
70
|
+
.map { |code, count| "#{code}: #{count}" }
|
71
|
+
.join(", ")
|
72
|
+
|
73
|
+
logger.info "Network was idle after #{event.duration.round(1)}ms, #{requests.size} requests, " \
|
74
|
+
"transferred #{transfered} bytes (status codes: #{status_counts})"
|
75
|
+
end
|
76
|
+
|
77
|
+
# rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
78
|
+
|
79
|
+
def page_loaded(event)
|
80
|
+
logger.info "Page loaded: #{event.duration.round(1)}ms"
|
81
|
+
end
|
82
|
+
|
83
|
+
def print(event)
|
84
|
+
logger.info "Page printed: #{event.duration.round(1)}ms"
|
85
|
+
end
|
86
|
+
|
87
|
+
private
|
88
|
+
|
89
|
+
def redact_sensitive_fields(obj, sensitive_keys = %w[value token password authorization username])
|
90
|
+
case obj
|
91
|
+
when Hash
|
92
|
+
obj.transform_values.with_index do |v, idx|
|
93
|
+
k = obj.keys[idx]
|
94
|
+
sensitive_keys.include?(k.to_s.downcase) ? "[REDACTED]" : redact_sensitive_fields(v, sensitive_keys)
|
95
|
+
end
|
96
|
+
when Array
|
97
|
+
obj.map { |item| redact_sensitive_fields(item, sensitive_keys) }
|
98
|
+
else
|
99
|
+
obj
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
class LoggingSubscriber
|
105
|
+
include LoggingSubscriberActions
|
106
|
+
|
107
|
+
attr_accessor :logger
|
108
|
+
|
109
|
+
# rubocop: disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
110
|
+
def initialize(logger: Logger.new($stdout))
|
111
|
+
@logger = logger
|
112
|
+
Bidi2pdf.notification_service&.subscribe("handle_response.bidi2pdf", &method(:handle_response))
|
113
|
+
Bidi2pdf.notification_service&.subscribe("send_cmd.bidi2pdf", &method(:send_cmd))
|
114
|
+
Bidi2pdf.notification_service&.subscribe("send_cmd_and_wait.bidi2pdf", &method(:send_cmd_and_wait))
|
115
|
+
Bidi2pdf.notification_service&.subscribe("session_close.bidi2pdf", &method(:session_close))
|
116
|
+
Bidi2pdf.notification_service&.subscribe("network_idle.bidi2pdf", &method(:network_idle))
|
117
|
+
Bidi2pdf.notification_service&.subscribe("page_loaded.bidi2pdf", &method(:page_loaded))
|
118
|
+
Bidi2pdf.notification_service&.subscribe("network_event_received.bidi2pdf", &method(:network_event_received))
|
119
|
+
Bidi2pdf.notification_service&.subscribe("print.bidi2pdf", &method(:network_event_received))
|
120
|
+
end
|
121
|
+
|
122
|
+
def unsubscribe
|
123
|
+
Bidi2pdf.notification_service&.unsubscribe("handle_response.bidi2pdf", &method(:handle_response))
|
124
|
+
Bidi2pdf.notification_service&.unsubscribe("send_cmd.bidi2pdf", &method(:send_cmd))
|
125
|
+
Bidi2pdf.notification_service&.unsubscribe("send_cmd_and_wait.bidi2pdf", &method(:send_cmd_and_wait))
|
126
|
+
Bidi2pdf.notification_service&.unsubscribe("session_close.bidi2pdf", &method(:session_close))
|
127
|
+
Bidi2pdf.notification_service&.unsubscribe("network_idle.bidi2pdf", &method(:network_idle))
|
128
|
+
Bidi2pdf.notification_service&.unsubscribe("page_loaded.bidi2pdf", &method(:page_loaded))
|
129
|
+
Bidi2pdf.notification_service&.unsubscribe("network_event_received.bidi2pdf", &method(:network_event_received))
|
130
|
+
Bidi2pdf.notification_service&.unsubscribe("print.bidi2pdf", &method(:network_event_received))
|
131
|
+
end
|
132
|
+
|
133
|
+
# rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "chromedriver_manager"
|
4
|
+
require_relative "session_runner"
|
5
|
+
require_relative "bidi/session"
|
6
|
+
require_relative "notifications/event"
|
7
|
+
require_relative "notifications/instrumenter"
|
8
|
+
|
9
|
+
require "securerandom"
|
10
|
+
|
11
|
+
module Bidi2pdf
|
12
|
+
# This module provides a way to instrument events in the Bidi2pdf library.
|
13
|
+
# It it's heavyly inspired by ActiveSupport::Notifications.
|
14
|
+
# and thought to be used in a similar way.
|
15
|
+
# In Rails environment, ActiveSupport::Notifications should be used instead.
|
16
|
+
# via configuration: config.notification_service = ActiveSupport::Notifications
|
17
|
+
|
18
|
+
module Notifications
|
19
|
+
Thread.attr_accessor :bidi2pdf_notification_instrumenter
|
20
|
+
|
21
|
+
@subscribers = Concurrent::Hash.new { |h, k| h[k] = [] }
|
22
|
+
|
23
|
+
class << self
|
24
|
+
attr_reader :subscribers
|
25
|
+
|
26
|
+
def instrument(name, payload = {})
|
27
|
+
payload = payload.dup
|
28
|
+
|
29
|
+
if listening?(name)
|
30
|
+
notify(name, payload) { yield payload if block_given? }
|
31
|
+
elsif block_given?
|
32
|
+
yield payload
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def subscribe(event_pattern, &block)
|
37
|
+
pattern = normalize_pattern(event_pattern)
|
38
|
+
|
39
|
+
@subscribers[pattern] << block
|
40
|
+
|
41
|
+
block
|
42
|
+
end
|
43
|
+
|
44
|
+
def unsubscribe(event_pattern, block = nil)
|
45
|
+
pattern = normalize_pattern(event_pattern)
|
46
|
+
|
47
|
+
if block
|
48
|
+
@subscribers[pattern].delete(block)
|
49
|
+
else
|
50
|
+
@subscribers[pattern].clear
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
# rubocop: disable Style/CaseEquality
|
55
|
+
def listening?(name)
|
56
|
+
@subscribers.any? do |pattern, blocks|
|
57
|
+
pattern === name && blocks.any?
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
# rubocop: enable Style/CaseEquality
|
62
|
+
|
63
|
+
private
|
64
|
+
|
65
|
+
def bidi2pdf_notification_instrumenter = Thread.current.bidi2pdf_notification_instrumenter ||= Instrumenter.new
|
66
|
+
|
67
|
+
def notify(name, payload, &) = bidi2pdf_notification_instrumenter.notify(name, payload, &)
|
68
|
+
|
69
|
+
def normalize_pattern(pat)
|
70
|
+
case pat
|
71
|
+
when String, Regexp then pat
|
72
|
+
else
|
73
|
+
raise ArgumentError, "Pattern must be String or Regexp"
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
@@ -1,6 +1,38 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Bidi2pdf
|
4
|
+
# Represents a runner for managing browser sessions and executing tasks
|
5
|
+
# using the Bidi2pdf library. This class handles the setup, configuration,
|
6
|
+
# and execution of browser-related workflows, including navigation, cookie
|
7
|
+
# management, and printing.
|
8
|
+
#
|
9
|
+
# @example Running a session
|
10
|
+
# session_runner = Bidi2pdf::SessionRunner.new(
|
11
|
+
# session: session,
|
12
|
+
# url: "http://example.com",
|
13
|
+
# inputfile: "input.html",
|
14
|
+
# output: "output.pdf",
|
15
|
+
# cookies: { "key" => "value" },
|
16
|
+
# headers: { "Authorization" => "Bearer token" },
|
17
|
+
# auth: { username: "user", password: "pass" },
|
18
|
+
# wait_window_loaded: true,
|
19
|
+
# wait_network_idle: true,
|
20
|
+
# print_options: { landscape: true },
|
21
|
+
# network_log_format: :json
|
22
|
+
# )
|
23
|
+
# session_runner.run
|
24
|
+
#
|
25
|
+
# @param [Object] session The browser session object to use.
|
26
|
+
# @param [String, nil] url The URL to navigate to in the browser session.
|
27
|
+
# @param [String, nil] inputfile The path to the input file to be processed if no URL is provided.
|
28
|
+
# @param [String, nil] output The path to the output file to be generated.
|
29
|
+
# @param [Hash] cookies A hash of cookies to set in the browser session. Defaults to an empty hash.
|
30
|
+
# @param [Hash] headers A hash of HTTP headers to include in the browser session. Defaults to an empty hash.
|
31
|
+
# @param [Hash, nil] auth Authentication credentials (e.g., username and password). Defaults to an empty hash.
|
32
|
+
# @param [Boolean] wait_window_loaded Whether to wait for the window to fully load. Defaults to false.
|
33
|
+
# @param [Boolean] wait_network_idle Whether to wait for the network to become idle. Defaults to false.
|
34
|
+
# @param [Hash] print_options Options for printing the page. Defaults to an empty hash.
|
35
|
+
# @param [Symbol] network_log_format The format for network logs. Defaults to :console.
|
4
36
|
class SessionRunner
|
5
37
|
# rubocop: disable Metrics/ParameterLists
|
6
38
|
def initialize(session:, url:, inputfile:, output:, cookies: {}, headers: {}, auth: {}, wait_window_loaded: false,
|
@@ -39,6 +71,7 @@ module Bidi2pdf
|
|
39
71
|
|
40
72
|
@window = window
|
41
73
|
@tab = tab
|
74
|
+
@user_context = user_context
|
42
75
|
|
43
76
|
add_cookies(tab)
|
44
77
|
|
@@ -99,15 +132,14 @@ module Bidi2pdf
|
|
99
132
|
|
100
133
|
if @wait_window_loaded
|
101
134
|
Bidi2pdf.logger.info "Waiting for window to be loaded"
|
102
|
-
@tab.
|
103
|
-
new Promise(resolve => { const check = () => window.loaded ? resolve('done') : setTimeout(check, 100); check(); });
|
104
|
-
EOF_SCRIPT
|
135
|
+
@tab.wait_until_page_loaded
|
105
136
|
end
|
106
137
|
|
107
138
|
@tab.print(@output, print_options: @print_options)
|
108
139
|
ensure
|
109
140
|
@tab.close
|
110
141
|
@window.close
|
142
|
+
@user_context.close
|
111
143
|
end
|
112
144
|
|
113
145
|
# rubocop: enable Metrics/AbcSize
|