bidi2pdf 0.1.8 → 0.1.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +88 -3
- data/README.md +146 -7
- data/docker/Dockerfile.chromedriver +23 -5
- data/docker/entrypoint.sh +41 -0
- data/lib/bidi2pdf/bidi/auth_interceptor.rb +3 -0
- data/lib/bidi2pdf/bidi/browser_tab.rb +60 -19
- data/lib/bidi2pdf/bidi/client.rb +7 -5
- data/lib/bidi2pdf/bidi/commands/cdp_get_session.rb +21 -0
- data/lib/bidi2pdf/bidi/commands/page_print.rb +101 -0
- data/lib/bidi2pdf/bidi/commands/print_parameters_validator.rb +4 -1
- data/lib/bidi2pdf/bidi/commands.rb +2 -0
- data/lib/bidi2pdf/bidi/connection_manager.rb +3 -0
- data/lib/bidi2pdf/bidi/event_manager.rb +34 -4
- data/lib/bidi2pdf/bidi/interceptor.rb +12 -2
- data/lib/bidi2pdf/bidi/session.rb +35 -2
- data/lib/bidi2pdf/bidi/web_socket_dispatcher.rb +5 -5
- data/lib/bidi2pdf/chromedriver_manager.rb +25 -11
- data/lib/bidi2pdf/cli.rb +9 -2
- data/lib/bidi2pdf/test_helpers/configuration.rb +67 -0
- data/lib/bidi2pdf/test_helpers/images/extractor.rb +99 -0
- data/lib/bidi2pdf/test_helpers/images/image_similarity_checker.rb +50 -0
- data/lib/bidi2pdf/test_helpers/images/tiff_helper.rb +204 -0
- data/lib/bidi2pdf/test_helpers/images.rb +12 -0
- data/lib/bidi2pdf/test_helpers/matchers/contains_pdf_image.rb +29 -0
- data/lib/bidi2pdf/test_helpers/pdf_file_helper.rb +39 -0
- data/lib/bidi2pdf/test_helpers/spec_paths_helper.rb +60 -0
- data/lib/bidi2pdf/test_helpers/testcontainers/chromedriver_container.rb +0 -6
- data/lib/bidi2pdf/test_helpers/testcontainers/chromedriver_test_helper.rb +103 -0
- data/lib/bidi2pdf/test_helpers/testcontainers/shared_docker_network.rb +21 -0
- data/lib/bidi2pdf/test_helpers/testcontainers/testcontainers_refinement.rb +53 -0
- data/lib/bidi2pdf/test_helpers/testcontainers.rb +17 -0
- data/lib/bidi2pdf/test_helpers.rb +7 -0
- data/lib/bidi2pdf/version.rb +1 -1
- data/sig/bidi2pdf/bidi/event_manager.rbs +19 -13
- metadata +55 -10
@@ -0,0 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Bidi2pdf
|
4
|
+
module Bidi
|
5
|
+
module Commands
|
6
|
+
class CdpGetSession
|
7
|
+
include Base
|
8
|
+
|
9
|
+
def initialize(context:)
|
10
|
+
@context = context
|
11
|
+
end
|
12
|
+
|
13
|
+
def params = { context: @context }
|
14
|
+
|
15
|
+
def method_name
|
16
|
+
"goog:cdp.getSession"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,101 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "print_parameters_validator"
|
4
|
+
|
5
|
+
module Bidi2pdf
|
6
|
+
module Bidi
|
7
|
+
module Commands
|
8
|
+
class PagePrint
|
9
|
+
include Base
|
10
|
+
|
11
|
+
def initialize(cdp_session:, print_options:)
|
12
|
+
@cdp_session = cdp_session
|
13
|
+
@print_options = print_options || { background: true }
|
14
|
+
|
15
|
+
PrintParametersValidator.validate!(@print_options)
|
16
|
+
|
17
|
+
return unless @print_options[:page]&.key?(:format)
|
18
|
+
|
19
|
+
@print_options[:page] = Bidi2pdf.translate_paper_format @print_options[:page][:format]
|
20
|
+
end
|
21
|
+
|
22
|
+
# rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
23
|
+
def params
|
24
|
+
{
|
25
|
+
# https://chromedevtools.github.io/devtools-protocol/tot/Page/#method-printToPDF
|
26
|
+
method: "Page.printToPDF",
|
27
|
+
session: @cdp_session,
|
28
|
+
params: {
|
29
|
+
"printBackground" => @print_options[:background],
|
30
|
+
|
31
|
+
"marginTop" => cm_to_inch(@print_options.dig(:margin, :top) || 0),
|
32
|
+
"marginBottom" => cm_to_inch(@print_options.dig(:margin, :bottom) || 0),
|
33
|
+
"marginLeft" => cm_to_inch(@print_options.dig(:margin, :left) || 0),
|
34
|
+
"marginRight" => cm_to_inch(@print_options.dig(:margin, :right) || 0),
|
35
|
+
"landscape" => (@print_options[:orientation] || "portrait").to_sym == :landscape,
|
36
|
+
|
37
|
+
"paperWidth" => cm_to_inch(@print_options.dig(:page, :width)),
|
38
|
+
"paperHeight" => cm_to_inch(@print_options.dig(:page, :height)),
|
39
|
+
"pageRanges" => page_ranges_to_string(@print_options[:pageRanges]),
|
40
|
+
"scale" => @print_options[:scale] || 1.0,
|
41
|
+
|
42
|
+
"displayHeaderFooter" => @print_options[:display_header_footer],
|
43
|
+
"headerTemplate" => @print_options[:header_template] || "",
|
44
|
+
"footerTemplate" => @print_options[:footer_template] || "",
|
45
|
+
|
46
|
+
"preferCSSPageSize" => @print_options.fetch(:prefer_css_page_size, true),
|
47
|
+
|
48
|
+
"generateTaggedPDF" => @print_options.fetch(:generate_tagged_pdf, false),
|
49
|
+
"generateDocumentOutline" => @print_options.fetch(:generate_document_outline, false),
|
50
|
+
|
51
|
+
transferMode: "ReturnAsBase64"
|
52
|
+
}.compact
|
53
|
+
}
|
54
|
+
end
|
55
|
+
|
56
|
+
# rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
57
|
+
|
58
|
+
def method_name
|
59
|
+
"goog:cdp.sendCommand"
|
60
|
+
end
|
61
|
+
|
62
|
+
private
|
63
|
+
|
64
|
+
# rubocop:disable Naming/MethodParameterName
|
65
|
+
def cm_to_inch(cm)
|
66
|
+
return nil if cm.nil?
|
67
|
+
|
68
|
+
cm.to_f / 2.54
|
69
|
+
end
|
70
|
+
|
71
|
+
# rubocop:enable Naming/MethodParameterName
|
72
|
+
|
73
|
+
# rubocop:disable Metrics/CyclomaticComplexity
|
74
|
+
def page_ranges_to_string(input)
|
75
|
+
return nil if input.nil? || input.empty?
|
76
|
+
|
77
|
+
segments = input.map do |entry|
|
78
|
+
case entry
|
79
|
+
when Integer
|
80
|
+
entry.to_s
|
81
|
+
when String
|
82
|
+
raise ArgumentError, "Invalid page entry: #{entry.inspect}" unless entry =~ /\A\d+(-\d+)?\z/
|
83
|
+
|
84
|
+
entry
|
85
|
+
else
|
86
|
+
raise ArgumentError, "Unsupported page entry type: #{entry.class}"
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
# dedupe, sort by numeric start, and join
|
91
|
+
segments
|
92
|
+
.uniq
|
93
|
+
.sort_by { |seg| seg.split("-", 2).first.to_i }
|
94
|
+
.join(",")
|
95
|
+
end
|
96
|
+
|
97
|
+
# rubocop:enable Metrics/CyclomaticComplexity
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
@@ -37,6 +37,7 @@ module Bidi2pdf
|
|
37
37
|
@params = params
|
38
38
|
end
|
39
39
|
|
40
|
+
# rubocop:disable Naming/PredicateMethod
|
40
41
|
def validate!
|
41
42
|
raise ArgumentError, "params must be a Hash" unless @params.is_a?(Hash)
|
42
43
|
|
@@ -51,6 +52,8 @@ module Bidi2pdf
|
|
51
52
|
true
|
52
53
|
end
|
53
54
|
|
55
|
+
# rubocop:enable Naming/PredicateMethod
|
56
|
+
|
54
57
|
private
|
55
58
|
|
56
59
|
def validate_boolean(key)
|
@@ -79,7 +82,7 @@ module Bidi2pdf
|
|
79
82
|
def validate_page_ranges
|
80
83
|
return unless @params.key?(:pageRanges)
|
81
84
|
unless @params[:pageRanges].is_a?(Array) &&
|
82
|
-
|
85
|
+
@params[:pageRanges].all? { |v| v.is_a?(Integer) || v.is_a?(String) }
|
83
86
|
raise ArgumentError, ":pageRanges must be an array of integers or strings"
|
84
87
|
end
|
85
88
|
end
|
@@ -18,6 +18,8 @@ module Bidi2pdf
|
|
18
18
|
require_relative "commands/browsing_context_close"
|
19
19
|
require_relative "commands/browsing_context_navigate"
|
20
20
|
require_relative "commands/browsing_context_print"
|
21
|
+
require_relative "commands/cdp_get_session"
|
22
|
+
require_relative "commands/page_print"
|
21
23
|
require_relative "commands/session_subscribe"
|
22
24
|
require_relative "commands/session_end"
|
23
25
|
require_relative "commands/cancel_auth"
|
@@ -17,6 +17,7 @@ module Bidi2pdf
|
|
17
17
|
@connection_latch.count_down
|
18
18
|
end
|
19
19
|
|
20
|
+
# rubocop:disable Naming/PredicateMethod
|
20
21
|
def wait_until_open(timeout:)
|
21
22
|
return true if @connected
|
22
23
|
|
@@ -26,6 +27,8 @@ module Bidi2pdf
|
|
26
27
|
|
27
28
|
true
|
28
29
|
end
|
30
|
+
|
31
|
+
# rubocop:enable Naming/PredicateMethod
|
29
32
|
end
|
30
33
|
end
|
31
34
|
end
|
@@ -3,6 +3,27 @@
|
|
3
3
|
module Bidi2pdf
|
4
4
|
module Bidi
|
5
5
|
class EventManager
|
6
|
+
Listener = Struct.new(:block, :id, :source_location) do
|
7
|
+
def initialize(block, id = SecureRandom.uuid)
|
8
|
+
super
|
9
|
+
self.source_location = block.source_location
|
10
|
+
end
|
11
|
+
|
12
|
+
def call(*args)
|
13
|
+
block.call(*args)
|
14
|
+
end
|
15
|
+
|
16
|
+
def ==(other)
|
17
|
+
other.is_a?(Listener) && id == other.id
|
18
|
+
end
|
19
|
+
|
20
|
+
alias_method :eql?, :==
|
21
|
+
|
22
|
+
def hash
|
23
|
+
id.hash
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
6
27
|
attr_reader :type
|
7
28
|
|
8
29
|
def initialize(type)
|
@@ -11,12 +32,21 @@ module Bidi2pdf
|
|
11
32
|
end
|
12
33
|
|
13
34
|
def on(*event_names, &block)
|
14
|
-
|
15
|
-
|
16
|
-
|
35
|
+
Listener.new(block).tap do |listener|
|
36
|
+
event_names.each do |event_name|
|
37
|
+
@listeners[event_name.to_sym] << listener
|
38
|
+
log_msg("Adding #{event_name} listener", listener)
|
39
|
+
end
|
40
|
+
end
|
17
41
|
end
|
18
42
|
|
19
|
-
def off(event_name,
|
43
|
+
def off(event_name, listener)
|
44
|
+
raise ArgumentError, "Listener not registered" unless listener.is_a?(Listener)
|
45
|
+
|
46
|
+
log_msg("Removing #{event_name} listener", listener)
|
47
|
+
|
48
|
+
@listeners[event_name.to_sym].delete(listener)
|
49
|
+
end
|
20
50
|
|
21
51
|
def dispatch(event_name, *args)
|
22
52
|
listeners = @listeners[event_name.to_sym] || []
|
@@ -27,14 +27,24 @@ module Bidi2pdf
|
|
27
27
|
client.send_cmd_and_wait(cmd) do |response|
|
28
28
|
@interceptor_id = response["result"]["intercept"]
|
29
29
|
|
30
|
-
Bidi2pdf.logger.
|
30
|
+
Bidi2pdf.logger.debug2 "Interceptor added: #{@interceptor_id}"
|
31
31
|
|
32
|
-
client.on_event(*self.class.events, &method(:handle_event))
|
32
|
+
@handle_event_listener = client.on_event(*self.class.events, &method(:handle_event))
|
33
33
|
|
34
34
|
self
|
35
35
|
end
|
36
36
|
end
|
37
37
|
|
38
|
+
def unregister_with_client(client:)
|
39
|
+
return unless @handle_event_listener
|
40
|
+
|
41
|
+
client.remove_event_listener(*self.class.events, @handle_event_listener)
|
42
|
+
|
43
|
+
Bidi2pdf.logger.debug2 "Interceptor removed: #{@interceptor_id}"
|
44
|
+
|
45
|
+
@handle_event_listener = nil
|
46
|
+
end
|
47
|
+
|
38
48
|
# rubocop: disable Metrics/AbcSize
|
39
49
|
def handle_event(response)
|
40
50
|
event_response = response["params"]
|
@@ -32,7 +32,38 @@ module Bidi2pdf
|
|
32
32
|
SUBSCRIBE_EVENTS = %w[script].freeze
|
33
33
|
|
34
34
|
# Default Chrome arguments for the session.
|
35
|
-
DEFAULT_CHROME_ARGS =
|
35
|
+
DEFAULT_CHROME_ARGS = [
|
36
|
+
"--allow-pre-commit-input", # Allow pre-commit input for form fields
|
37
|
+
"--disable-dev-shm-usage", # Disable /dev/shm usage; use /tmp instead
|
38
|
+
"--disable-gpu", # Disable GPU hardware acceleration; force software rendering
|
39
|
+
"--disable-popup-blocking", # Allow all pop-ups; bypass built-in popup blocker
|
40
|
+
"--disable-hang-monitor", # Disable “Page Unresponsive” / “Aw, Snap!” dialogs on hangs
|
41
|
+
"--disable-background-networking", # Turn off speculative/periodic network requests (DNS prefetch, Safe Browsing updates, etc.)
|
42
|
+
"--disable-background-timer-throttling", # Prevent JS timers from being throttled in background tabs
|
43
|
+
"--disable-client-side-phishing-detection", # Disable built-in phishing checks; rely only on server-side detection
|
44
|
+
"--disable-component-extensions-with-background-pages", # Block component extensions that run persistent background pages (PDF viewer, Translate, etc.)
|
45
|
+
"--disable-crash-reporter", # Disable crash-report uploads and UI
|
46
|
+
"--disable-default-apps", # Stop installation of Chrome’s default apps on a fresh profile
|
47
|
+
"--disable-infobars", # Suppress “Chrome is being controlled by automated test software” infobar (and similar)
|
48
|
+
"--disable-ipc-flooding-protection", # Turn off defenses against too-many IPC messages from renderers
|
49
|
+
"--disable-prompt-on-repost", # Skip “Confirm Form Resubmission” dialogs on page reloads after POST
|
50
|
+
"--disable-renderer-backgrounding", # Keep background tab renderers at full priority
|
51
|
+
"--disable-search-engine-choice-screen", # Skip first-run search engine selection UI
|
52
|
+
"--disable-sync", # Turn off all Google account sync (bookmarks, passwords, etc.)
|
53
|
+
"--enable-automation", # Expose WebDriver hooks (navigator.webdriver) for automation frameworks
|
54
|
+
"--export-tagged-pdf", # When printing to PDF, include tagged structure for accessibility
|
55
|
+
"--force-color-profile=srgb", # Force rendering to use the sRGB color profile
|
56
|
+
"--generate-pdf-document-outline", # Auto-generate PDF bookmarks/outlines from HTML headings, not supported by chrome/chromium https://issues.chromium.org/issues/41387522#comment48
|
57
|
+
"--metrics-recording-only", # Collect UMA metrics locally but never upload them
|
58
|
+
"--no-first-run", # Skip the “Welcome” or “What’s New” screens on fresh profiles
|
59
|
+
"--password-store=basic", # Use Chrome’s basic (in-profile) password storage vs. OS vault
|
60
|
+
"--use-mock-keychain", # On macOS, use a fake keychain for testing (don’t touch the real one)
|
61
|
+
"--disable-backgrounding-occluded-windows", # Prevent fully-occluded windows from being treated as background
|
62
|
+
"--disable-breakpad", # Disable the Breakpad crash-reporting library entirely
|
63
|
+
"--enable-features=PdfOopif", # Enable out-of-process iframe (OOPIF) architecture for PDF rendering
|
64
|
+
"--disable-features=Translate,AcceptCHFrame,MediaRouter,OptimizationHints,ProcessPerSiteUpToMainFrameThreshold,IsolateSandboxedIframes",
|
65
|
+
"--disable-extensions about:blank"
|
66
|
+
].freeze
|
36
67
|
|
37
68
|
# @return [URI] The URI of the session.
|
38
69
|
attr_reader :session_uri
|
@@ -165,6 +196,7 @@ module Bidi2pdf
|
|
165
196
|
Bidi2pdf.logger.info "Subscribing to events"
|
166
197
|
|
167
198
|
Bidi::Client.new(websocket_url).tap do |event_client|
|
199
|
+
@event_socket = event_client
|
168
200
|
event_client.start
|
169
201
|
event_client.wait_until_open
|
170
202
|
|
@@ -208,7 +240,7 @@ module Bidi2pdf
|
|
208
240
|
# @return [Hash] The session request payload.
|
209
241
|
def session_request
|
210
242
|
session_chrome_args = chrome_args.dup
|
211
|
-
session_chrome_args << "--headless" if @headless
|
243
|
+
session_chrome_args << "--headless=new" if @headless
|
212
244
|
|
213
245
|
{
|
214
246
|
"capabilities" => {
|
@@ -320,6 +352,7 @@ module Bidi2pdf
|
|
320
352
|
# Cleans up resources associated with the session.
|
321
353
|
def cleanup
|
322
354
|
@client&.close
|
355
|
+
@event_socket&.close
|
323
356
|
@client = @websocket_url = @browser = nil
|
324
357
|
end
|
325
358
|
end
|
@@ -24,7 +24,7 @@ module Bidi2pdf
|
|
24
24
|
|
25
25
|
def on_message(&) = socket_events.on(:message, &)
|
26
26
|
|
27
|
-
def on_event(
|
27
|
+
def on_event(*event_names, &) = session_events.on(*event_names, &)
|
28
28
|
|
29
29
|
def on_open(&) = socket_events.on(:open, &)
|
30
30
|
|
@@ -34,13 +34,13 @@ module Bidi2pdf
|
|
34
34
|
|
35
35
|
def remove_message_listener(block) = socket_events.off(:message, block)
|
36
36
|
|
37
|
-
def remove_event_listener(name,
|
37
|
+
def remove_event_listener(name, listener) = session_events.off(name, listener)
|
38
38
|
|
39
|
-
def remove_open_listener(
|
39
|
+
def remove_open_listener(listener) = socket_events.off(:open, listener)
|
40
40
|
|
41
|
-
def remove_close_listener(
|
41
|
+
def remove_close_listener(listener) = socket_events.off(:close, listener)
|
42
42
|
|
43
|
-
def remove_error_listener(
|
43
|
+
def remove_error_listener(listener) = socket_events.off(:error, listener)
|
44
44
|
|
45
45
|
private
|
46
46
|
|
@@ -8,6 +8,7 @@ module Bidi2pdf
|
|
8
8
|
include Chromedriver::Binary::Platform
|
9
9
|
|
10
10
|
attr_reader :port, :pid, :started, :headless, :chrome_args, :shutdown_mutex
|
11
|
+
attr_accessor :reader_thread
|
11
12
|
|
12
13
|
def initialize(port: 0, headless: true, chrome_args: Bidi::Session::DEFAULT_CHROME_ARGS)
|
13
14
|
@port = port
|
@@ -49,10 +50,20 @@ module Bidi2pdf
|
|
49
50
|
"http://localhost:#{@port}/session"
|
50
51
|
end
|
51
52
|
|
53
|
+
# rubocop: disable Metrics/AbcSize
|
52
54
|
def stop(timeout: 5)
|
53
55
|
shutdown_mutex.synchronize do
|
54
56
|
return unless @pid
|
55
57
|
|
58
|
+
if reader_thread&.alive?
|
59
|
+
begin
|
60
|
+
reader_thread.kill
|
61
|
+
reader_thread.join
|
62
|
+
rescue StandardError => e
|
63
|
+
Bidi2pdf.logger.error "Error killing reader thread: #{e.message}"
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
56
67
|
@started = false
|
57
68
|
|
58
69
|
close_session
|
@@ -72,6 +83,8 @@ module Bidi2pdf
|
|
72
83
|
end
|
73
84
|
end
|
74
85
|
|
86
|
+
# rubocop: enable Metrics/AbcSize
|
87
|
+
|
75
88
|
private
|
76
89
|
|
77
90
|
def spawn_process(cmd)
|
@@ -184,25 +197,26 @@ module Bidi2pdf
|
|
184
197
|
|
185
198
|
# rubocop: disable Metrics/AbcSize
|
186
199
|
def parse_port_from_output(io, timeout: 5)
|
187
|
-
|
188
|
-
io.each_line do |line|
|
189
|
-
Bidi2pdf.logger.debug1 line.chomp
|
200
|
+
port_event = Concurrent::Event.new
|
190
201
|
|
191
|
-
|
192
|
-
|
193
|
-
Bidi2pdf.logger.
|
194
|
-
|
195
|
-
@port = ::Regexp.last_match(1).to_i if @port.nil? || @port.zero?
|
202
|
+
self.reader_thread = Thread.new do
|
203
|
+
io.each_line do |line|
|
204
|
+
Bidi2pdf.logger.info "[chromedriver] #{line.chomp}"
|
196
205
|
|
197
|
-
|
206
|
+
if line =~ /ChromeDriver was started successfully on port (\d+)/
|
207
|
+
@port = ::Regexp.last_match(1).to_i if @port.nil? || @port.zero?
|
208
|
+
port_event.set
|
209
|
+
end
|
198
210
|
end
|
199
211
|
rescue IOError
|
200
212
|
# reader closed
|
201
213
|
ensure
|
202
214
|
io.close unless io.closed?
|
203
|
-
end
|
215
|
+
end
|
216
|
+
|
217
|
+
return if port_event.wait(timeout)
|
204
218
|
|
205
|
-
raise "Chromedriver did not report a usable port in #{timeout}s"
|
219
|
+
raise "Chromedriver did not report a usable port in #{timeout}s"
|
206
220
|
end
|
207
221
|
|
208
222
|
# rubocop: enable Metrics/AbcSize
|
data/lib/bidi2pdf/cli.rb
CHANGED
@@ -74,6 +74,8 @@ module Bidi2pdf
|
|
74
74
|
option :page_ranges, type: :array, desc: "Page ranges to print (e.g., 1-2 4 6)"
|
75
75
|
option :scale, type: :numeric, default: 1.0, desc: "Scale between 0.1 and 2.0"
|
76
76
|
option :shrink_to_fit, type: :boolean, default: true, desc: "Shrink content to fit page"
|
77
|
+
option :generate_tagged_pdf, type: :boolean, default: false, desc: "Generate tagged PDF"
|
78
|
+
option :generate_document_outline, type: :boolean, default: false, desc: "Generate document outline"
|
77
79
|
|
78
80
|
class << self
|
79
81
|
def exit_on_failure?
|
@@ -150,7 +152,7 @@ module Bidi2pdf
|
|
150
152
|
raise Thor::Error, "Invalid print option: #{e.message}"
|
151
153
|
end
|
152
154
|
|
153
|
-
# rubocop:disable Metrics/CyclomaticComplexity
|
155
|
+
# rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
154
156
|
def print_options
|
155
157
|
opts = {}
|
156
158
|
|
@@ -186,10 +188,15 @@ module Bidi2pdf
|
|
186
188
|
assign_if_provided(page, :height, :page_height)
|
187
189
|
opts[:page] = page unless page.empty?
|
188
190
|
|
191
|
+
assign_if_provided(opts, :generate_tagged_pdf)
|
192
|
+
assign_if_provided(opts, :generate_document_outline)
|
193
|
+
|
194
|
+
opts[:cmd_type] = :cdp if opts[:generate_tagged_pdf] || opts[:generate_document_outline]
|
195
|
+
|
189
196
|
opts.empty? ? nil : opts
|
190
197
|
end
|
191
198
|
|
192
|
-
# rubocop:enable Metrics/CyclomaticComplexity
|
199
|
+
# rubocop:enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
193
200
|
|
194
201
|
def option_provided?(key)
|
195
202
|
ARGV.include?("--#{key.to_s.tr("_", "-")}") || ARGV.include?("--#{key}")
|
@@ -0,0 +1,67 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Bidi2pdf
|
4
|
+
module TestHelpers
|
5
|
+
class Configuration
|
6
|
+
# @!attribute [rw] spec_dir
|
7
|
+
# @return [Pathname] the directory where specs are located
|
8
|
+
attr_accessor :spec_dir
|
9
|
+
|
10
|
+
# @!attribute [rw] tmp_dir
|
11
|
+
# @return [String] the directory for temporary files
|
12
|
+
attr_accessor :tmp_dir
|
13
|
+
|
14
|
+
# @!attribute [rw] prefix
|
15
|
+
# @return [String] the prefix for temporary files
|
16
|
+
attr_accessor :prefix
|
17
|
+
|
18
|
+
# @!attribute [rw] docker_dir
|
19
|
+
# @return [String] the directory for Docker files
|
20
|
+
attr_accessor :docker_dir
|
21
|
+
|
22
|
+
# @!attribute [rw] fixture_dir
|
23
|
+
# @return [String] the directory for fixture files
|
24
|
+
attr_accessor :fixture_dir
|
25
|
+
|
26
|
+
def initialize
|
27
|
+
project_root = if defined?(Rails) && Rails.respond_to?(:root)
|
28
|
+
Pathname.new(Rails.root)
|
29
|
+
elsif defined?(Bundler) && Bundler.respond_to?(:root)
|
30
|
+
Pathname.new(Bundler.root)
|
31
|
+
else
|
32
|
+
Pathname.new(Dir.pwd)
|
33
|
+
end
|
34
|
+
|
35
|
+
@spec_dir = project_root.join("spec").expand_path
|
36
|
+
@docker_dir = project_root.join("docker")
|
37
|
+
@fixture_dir = project_root.join("spec", "fixtures")
|
38
|
+
@tmp_dir = project_root.join("tmp")
|
39
|
+
@prefix = "tmp_"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
class << self
|
44
|
+
# Retrieves the current configuration object for TestHelpers.
|
45
|
+
# @return [Configuration] the configuration object
|
46
|
+
def configuration
|
47
|
+
@configuration ||= Configuration.new
|
48
|
+
end
|
49
|
+
|
50
|
+
# Sets the configuration object for TestHelpers.
|
51
|
+
# @param [Configuration] config the configuration object to set
|
52
|
+
attr_writer :configuration
|
53
|
+
|
54
|
+
# Allows configuration of TestHelpers by yielding the configuration object.
|
55
|
+
# @yieldparam [Configuration] configuration the configuration object to modify
|
56
|
+
def configure
|
57
|
+
yield(configuration)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
# Configures RSpec to include and extend SpecPathsHelper for examples with the `:pdf` metadata.
|
63
|
+
RSpec.configure do |config|
|
64
|
+
# Adds a custom RSpec setting for TestHelpers configuration.
|
65
|
+
config.add_setting :bidi2pdf_test_helpers_config, default: TestHelpers.configuration
|
66
|
+
end
|
67
|
+
end
|
@@ -0,0 +1,99 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Bidi2pdf
|
4
|
+
module TestHelpers
|
5
|
+
module Images
|
6
|
+
require "vips"
|
7
|
+
require "zlib"
|
8
|
+
|
9
|
+
class Extractor
|
10
|
+
include PDFReaderUtils
|
11
|
+
include TIFFHelper
|
12
|
+
|
13
|
+
attr_reader :pages, :logger
|
14
|
+
|
15
|
+
def initialize(pdf_data, logger: Bidi2pdf.logger)
|
16
|
+
reader = pdf_reader_for pdf_data
|
17
|
+
@pages = reader.pages
|
18
|
+
@logger = logger
|
19
|
+
end
|
20
|
+
|
21
|
+
def all_images
|
22
|
+
extracted_images.map { |images| images[:images] }.flatten
|
23
|
+
end
|
24
|
+
|
25
|
+
def image_on_page(page_number, image_number)
|
26
|
+
images = images_on_page(page_number)
|
27
|
+
return nil if images.empty? || image_number > images.size
|
28
|
+
|
29
|
+
images[image_number - 1]
|
30
|
+
end
|
31
|
+
|
32
|
+
def images_on_page(page_number)
|
33
|
+
extracted_images.find { |images| images[:page] == page_number }&.dig(:images) || []
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
def extracted_images
|
39
|
+
@extracted_images ||= @pages.each_with_index.with_object([]) do |(page, index), result|
|
40
|
+
result << { page: index + 1, images: extract_images(page) }
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def extract_images(page)
|
45
|
+
xobjects = page.xobjects
|
46
|
+
return if xobjects.empty?
|
47
|
+
|
48
|
+
xobjects.each_value.map do |stream|
|
49
|
+
case stream.hash[:Subtype]
|
50
|
+
when :Image
|
51
|
+
process_image_stream(stream)
|
52
|
+
when :Form
|
53
|
+
extract_images(PDF::Reader::FormXObject.new(page, stream))
|
54
|
+
end
|
55
|
+
end.flatten
|
56
|
+
end
|
57
|
+
|
58
|
+
def process_image_stream(stream)
|
59
|
+
filter = Array(stream.hash[:Filter]).first
|
60
|
+
raw = extract_raw_image_data(stream, filter)
|
61
|
+
|
62
|
+
return nil if raw.nil? || raw.empty?
|
63
|
+
|
64
|
+
create_vips_image(raw, filter)
|
65
|
+
end
|
66
|
+
|
67
|
+
def extract_raw_image_data(stream, filter)
|
68
|
+
case filter
|
69
|
+
when :DCTDecode, :JPXDecode then stream.data
|
70
|
+
when :CCITTFaxDecode then tiff_header_for_CCITT(stream.hash, stream.data)
|
71
|
+
when :LZWDecode, :RunLengthDecode, :FlateDecode then handle_compressed_image(stream)
|
72
|
+
else
|
73
|
+
Bidi2pdf.logger.warn("Unsupported image filter '#{filter}'. Attempting to process raw data.")
|
74
|
+
stream.data
|
75
|
+
end
|
76
|
+
rescue StandardError => e
|
77
|
+
Bidi2pdf.logger.error("Error extracting raw image data with filter '#{filter}': #{e.message}")
|
78
|
+
nil # Return nil to indicate failure
|
79
|
+
end
|
80
|
+
|
81
|
+
def handle_compressed_image(stream)
|
82
|
+
hash = stream.hash
|
83
|
+
data = stream.unfiltered_data
|
84
|
+
|
85
|
+
header = tiff_header(hash, data)
|
86
|
+
|
87
|
+
header + data
|
88
|
+
end
|
89
|
+
|
90
|
+
def create_vips_image(raw, filter)
|
91
|
+
Vips::Image.new_from_buffer(raw, "", disc: true)
|
92
|
+
rescue Vips::Error => e
|
93
|
+
Bidi2pdf.logger.error("Error creating Vips image from buffer (filter: #{filter}): #{e.message}")
|
94
|
+
nil # Return nil if Vips fails
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Bidi2pdf
|
4
|
+
module TestHelpers
|
5
|
+
module Images
|
6
|
+
require "dhash-vips"
|
7
|
+
|
8
|
+
class ImageSimilarityChecker
|
9
|
+
def initialize(expected_image, image_to_check)
|
10
|
+
@expected_image = expected_image.is_a?(Vips::Image) ? expected_image : Vips::Image.new_from_file(expected_image)
|
11
|
+
@image_to_check = image_to_check.is_a?(Vips::Image) ? image_to_check : Vips::Image.new_from_file(image_to_check)
|
12
|
+
end
|
13
|
+
|
14
|
+
def similar?(tolerance: 20)
|
15
|
+
distance < tolerance
|
16
|
+
end
|
17
|
+
|
18
|
+
def very_similar?
|
19
|
+
similar? tolerance: 20
|
20
|
+
end
|
21
|
+
|
22
|
+
def slightly_similar?
|
23
|
+
similar? tolerance: 25
|
24
|
+
end
|
25
|
+
|
26
|
+
def different?
|
27
|
+
!slightly_similar?
|
28
|
+
end
|
29
|
+
|
30
|
+
def expected_fingerprint
|
31
|
+
@expected_fingerprint ||= fingerprint @expected_image
|
32
|
+
end
|
33
|
+
|
34
|
+
def actual_fingerprint
|
35
|
+
@actual_fingerprint ||= fingerprint @image_to_check
|
36
|
+
end
|
37
|
+
|
38
|
+
def distance
|
39
|
+
@distance ||= DHashVips::IDHash.distance(expected_fingerprint, actual_fingerprint)
|
40
|
+
end
|
41
|
+
|
42
|
+
def fingerprint(image)
|
43
|
+
image = image.resize(32.0 / [image.width, image.height].min) if image.width < 32 || image.height < 32
|
44
|
+
|
45
|
+
DHashVips::IDHash.fingerprint image
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|