bidi2pdf 0.1.8 → 0.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +88 -3
  3. data/README.md +146 -7
  4. data/docker/Dockerfile.chromedriver +23 -5
  5. data/docker/entrypoint.sh +41 -0
  6. data/lib/bidi2pdf/bidi/auth_interceptor.rb +3 -0
  7. data/lib/bidi2pdf/bidi/browser_tab.rb +60 -19
  8. data/lib/bidi2pdf/bidi/client.rb +7 -5
  9. data/lib/bidi2pdf/bidi/commands/cdp_get_session.rb +21 -0
  10. data/lib/bidi2pdf/bidi/commands/page_print.rb +101 -0
  11. data/lib/bidi2pdf/bidi/commands/print_parameters_validator.rb +4 -1
  12. data/lib/bidi2pdf/bidi/commands.rb +2 -0
  13. data/lib/bidi2pdf/bidi/connection_manager.rb +3 -0
  14. data/lib/bidi2pdf/bidi/event_manager.rb +34 -4
  15. data/lib/bidi2pdf/bidi/interceptor.rb +12 -2
  16. data/lib/bidi2pdf/bidi/session.rb +35 -2
  17. data/lib/bidi2pdf/bidi/web_socket_dispatcher.rb +5 -5
  18. data/lib/bidi2pdf/chromedriver_manager.rb +25 -11
  19. data/lib/bidi2pdf/cli.rb +9 -2
  20. data/lib/bidi2pdf/test_helpers/configuration.rb +67 -0
  21. data/lib/bidi2pdf/test_helpers/images/extractor.rb +99 -0
  22. data/lib/bidi2pdf/test_helpers/images/image_similarity_checker.rb +50 -0
  23. data/lib/bidi2pdf/test_helpers/images/tiff_helper.rb +204 -0
  24. data/lib/bidi2pdf/test_helpers/images.rb +12 -0
  25. data/lib/bidi2pdf/test_helpers/matchers/contains_pdf_image.rb +29 -0
  26. data/lib/bidi2pdf/test_helpers/pdf_file_helper.rb +39 -0
  27. data/lib/bidi2pdf/test_helpers/spec_paths_helper.rb +60 -0
  28. data/lib/bidi2pdf/test_helpers/testcontainers/chromedriver_container.rb +0 -6
  29. data/lib/bidi2pdf/test_helpers/testcontainers/chromedriver_test_helper.rb +103 -0
  30. data/lib/bidi2pdf/test_helpers/testcontainers/shared_docker_network.rb +21 -0
  31. data/lib/bidi2pdf/test_helpers/testcontainers/testcontainers_refinement.rb +53 -0
  32. data/lib/bidi2pdf/test_helpers/testcontainers.rb +17 -0
  33. data/lib/bidi2pdf/test_helpers.rb +7 -0
  34. data/lib/bidi2pdf/version.rb +1 -1
  35. data/sig/bidi2pdf/bidi/event_manager.rbs +19 -13
  36. metadata +55 -10
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bidi2pdf
4
+ module Bidi
5
+ module Commands
6
+ class CdpGetSession
7
+ include Base
8
+
9
+ def initialize(context:)
10
+ @context = context
11
+ end
12
+
13
+ def params = { context: @context }
14
+
15
+ def method_name
16
+ "goog:cdp.getSession"
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,101 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "print_parameters_validator"
4
+
5
+ module Bidi2pdf
6
+ module Bidi
7
+ module Commands
8
+ class PagePrint
9
+ include Base
10
+
11
+ def initialize(cdp_session:, print_options:)
12
+ @cdp_session = cdp_session
13
+ @print_options = print_options || { background: true }
14
+
15
+ PrintParametersValidator.validate!(@print_options)
16
+
17
+ return unless @print_options[:page]&.key?(:format)
18
+
19
+ @print_options[:page] = Bidi2pdf.translate_paper_format @print_options[:page][:format]
20
+ end
21
+
22
+ # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
23
+ def params
24
+ {
25
+ # https://chromedevtools.github.io/devtools-protocol/tot/Page/#method-printToPDF
26
+ method: "Page.printToPDF",
27
+ session: @cdp_session,
28
+ params: {
29
+ "printBackground" => @print_options[:background],
30
+
31
+ "marginTop" => cm_to_inch(@print_options.dig(:margin, :top) || 0),
32
+ "marginBottom" => cm_to_inch(@print_options.dig(:margin, :bottom) || 0),
33
+ "marginLeft" => cm_to_inch(@print_options.dig(:margin, :left) || 0),
34
+ "marginRight" => cm_to_inch(@print_options.dig(:margin, :right) || 0),
35
+ "landscape" => (@print_options[:orientation] || "portrait").to_sym == :landscape,
36
+
37
+ "paperWidth" => cm_to_inch(@print_options.dig(:page, :width)),
38
+ "paperHeight" => cm_to_inch(@print_options.dig(:page, :height)),
39
+ "pageRanges" => page_ranges_to_string(@print_options[:pageRanges]),
40
+ "scale" => @print_options[:scale] || 1.0,
41
+
42
+ "displayHeaderFooter" => @print_options[:display_header_footer],
43
+ "headerTemplate" => @print_options[:header_template] || "",
44
+ "footerTemplate" => @print_options[:footer_template] || "",
45
+
46
+ "preferCSSPageSize" => @print_options.fetch(:prefer_css_page_size, true),
47
+
48
+ "generateTaggedPDF" => @print_options.fetch(:generate_tagged_pdf, false),
49
+ "generateDocumentOutline" => @print_options.fetch(:generate_document_outline, false),
50
+
51
+ transferMode: "ReturnAsBase64"
52
+ }.compact
53
+ }
54
+ end
55
+
56
+ # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
57
+
58
+ def method_name
59
+ "goog:cdp.sendCommand"
60
+ end
61
+
62
+ private
63
+
64
+ # rubocop:disable Naming/MethodParameterName
65
+ def cm_to_inch(cm)
66
+ return nil if cm.nil?
67
+
68
+ cm.to_f / 2.54
69
+ end
70
+
71
+ # rubocop:enable Naming/MethodParameterName
72
+
73
+ # rubocop:disable Metrics/CyclomaticComplexity
74
+ def page_ranges_to_string(input)
75
+ return nil if input.nil? || input.empty?
76
+
77
+ segments = input.map do |entry|
78
+ case entry
79
+ when Integer
80
+ entry.to_s
81
+ when String
82
+ raise ArgumentError, "Invalid page entry: #{entry.inspect}" unless entry =~ /\A\d+(-\d+)?\z/
83
+
84
+ entry
85
+ else
86
+ raise ArgumentError, "Unsupported page entry type: #{entry.class}"
87
+ end
88
+ end
89
+
90
+ # dedupe, sort by numeric start, and join
91
+ segments
92
+ .uniq
93
+ .sort_by { |seg| seg.split("-", 2).first.to_i }
94
+ .join(",")
95
+ end
96
+
97
+ # rubocop:enable Metrics/CyclomaticComplexity
98
+ end
99
+ end
100
+ end
101
+ end
@@ -37,6 +37,7 @@ module Bidi2pdf
37
37
  @params = params
38
38
  end
39
39
 
40
+ # rubocop:disable Naming/PredicateMethod
40
41
  def validate!
41
42
  raise ArgumentError, "params must be a Hash" unless @params.is_a?(Hash)
42
43
 
@@ -51,6 +52,8 @@ module Bidi2pdf
51
52
  true
52
53
  end
53
54
 
55
+ # rubocop:enable Naming/PredicateMethod
56
+
54
57
  private
55
58
 
56
59
  def validate_boolean(key)
@@ -79,7 +82,7 @@ module Bidi2pdf
79
82
  def validate_page_ranges
80
83
  return unless @params.key?(:pageRanges)
81
84
  unless @params[:pageRanges].is_a?(Array) &&
82
- @params[:pageRanges].all? { |v| v.is_a?(Integer) || v.is_a?(String) }
85
+ @params[:pageRanges].all? { |v| v.is_a?(Integer) || v.is_a?(String) }
83
86
  raise ArgumentError, ":pageRanges must be an array of integers or strings"
84
87
  end
85
88
  end
@@ -18,6 +18,8 @@ module Bidi2pdf
18
18
  require_relative "commands/browsing_context_close"
19
19
  require_relative "commands/browsing_context_navigate"
20
20
  require_relative "commands/browsing_context_print"
21
+ require_relative "commands/cdp_get_session"
22
+ require_relative "commands/page_print"
21
23
  require_relative "commands/session_subscribe"
22
24
  require_relative "commands/session_end"
23
25
  require_relative "commands/cancel_auth"
@@ -17,6 +17,7 @@ module Bidi2pdf
17
17
  @connection_latch.count_down
18
18
  end
19
19
 
20
+ # rubocop:disable Naming/PredicateMethod
20
21
  def wait_until_open(timeout:)
21
22
  return true if @connected
22
23
 
@@ -26,6 +27,8 @@ module Bidi2pdf
26
27
 
27
28
  true
28
29
  end
30
+
31
+ # rubocop:enable Naming/PredicateMethod
29
32
  end
30
33
  end
31
34
  end
@@ -3,6 +3,27 @@
3
3
  module Bidi2pdf
4
4
  module Bidi
5
5
  class EventManager
6
+ Listener = Struct.new(:block, :id, :source_location) do
7
+ def initialize(block, id = SecureRandom.uuid)
8
+ super
9
+ self.source_location = block.source_location
10
+ end
11
+
12
+ def call(*args)
13
+ block.call(*args)
14
+ end
15
+
16
+ def ==(other)
17
+ other.is_a?(Listener) && id == other.id
18
+ end
19
+
20
+ alias_method :eql?, :==
21
+
22
+ def hash
23
+ id.hash
24
+ end
25
+ end
26
+
6
27
  attr_reader :type
7
28
 
8
29
  def initialize(type)
@@ -11,12 +32,21 @@ module Bidi2pdf
11
32
  end
12
33
 
13
34
  def on(*event_names, &block)
14
- event_names.each { |event_name| @listeners[event_name.to_sym] << block }
15
-
16
- block
35
+ Listener.new(block).tap do |listener|
36
+ event_names.each do |event_name|
37
+ @listeners[event_name.to_sym] << listener
38
+ log_msg("Adding #{event_name} listener", listener)
39
+ end
40
+ end
17
41
  end
18
42
 
19
- def off(event_name, block) = @listeners[event_name.to_sym].delete(block)
43
+ def off(event_name, listener)
44
+ raise ArgumentError, "Listener not registered" unless listener.is_a?(Listener)
45
+
46
+ log_msg("Removing #{event_name} listener", listener)
47
+
48
+ @listeners[event_name.to_sym].delete(listener)
49
+ end
20
50
 
21
51
  def dispatch(event_name, *args)
22
52
  listeners = @listeners[event_name.to_sym] || []
@@ -27,14 +27,24 @@ module Bidi2pdf
27
27
  client.send_cmd_and_wait(cmd) do |response|
28
28
  @interceptor_id = response["result"]["intercept"]
29
29
 
30
- Bidi2pdf.logger.debug "Interceptor added: #{@interceptor_id}"
30
+ Bidi2pdf.logger.debug2 "Interceptor added: #{@interceptor_id}"
31
31
 
32
- client.on_event(*self.class.events, &method(:handle_event))
32
+ @handle_event_listener = client.on_event(*self.class.events, &method(:handle_event))
33
33
 
34
34
  self
35
35
  end
36
36
  end
37
37
 
38
+ def unregister_with_client(client:)
39
+ return unless @handle_event_listener
40
+
41
+ client.remove_event_listener(*self.class.events, @handle_event_listener)
42
+
43
+ Bidi2pdf.logger.debug2 "Interceptor removed: #{@interceptor_id}"
44
+
45
+ @handle_event_listener = nil
46
+ end
47
+
38
48
  # rubocop: disable Metrics/AbcSize
39
49
  def handle_event(response)
40
50
  event_response = response["params"]
@@ -32,7 +32,38 @@ module Bidi2pdf
32
32
  SUBSCRIBE_EVENTS = %w[script].freeze
33
33
 
34
34
  # Default Chrome arguments for the session.
35
- DEFAULT_CHROME_ARGS = %w[--disable-gpu --disable-popup-blocking --disable-hang-monitor].freeze
35
+ DEFAULT_CHROME_ARGS = [
36
+ "--allow-pre-commit-input", # Allow pre-commit input for form fields
37
+ "--disable-dev-shm-usage", # Disable /dev/shm usage; use /tmp instead
38
+ "--disable-gpu", # Disable GPU hardware acceleration; force software rendering
39
+ "--disable-popup-blocking", # Allow all pop-ups; bypass built-in popup blocker
40
+ "--disable-hang-monitor", # Disable “Page Unresponsive” / “Aw, Snap!” dialogs on hangs
41
+ "--disable-background-networking", # Turn off speculative/periodic network requests (DNS prefetch, Safe Browsing updates, etc.)
42
+ "--disable-background-timer-throttling", # Prevent JS timers from being throttled in background tabs
43
+ "--disable-client-side-phishing-detection", # Disable built-in phishing checks; rely only on server-side detection
44
+ "--disable-component-extensions-with-background-pages", # Block component extensions that run persistent background pages (PDF viewer, Translate, etc.)
45
+ "--disable-crash-reporter", # Disable crash-report uploads and UI
46
+ "--disable-default-apps", # Stop installation of Chrome’s default apps on a fresh profile
47
+ "--disable-infobars", # Suppress “Chrome is being controlled by automated test software” infobar (and similar)
48
+ "--disable-ipc-flooding-protection", # Turn off defenses against too-many IPC messages from renderers
49
+ "--disable-prompt-on-repost", # Skip “Confirm Form Resubmission” dialogs on page reloads after POST
50
+ "--disable-renderer-backgrounding", # Keep background tab renderers at full priority
51
+ "--disable-search-engine-choice-screen", # Skip first-run search engine selection UI
52
+ "--disable-sync", # Turn off all Google account sync (bookmarks, passwords, etc.)
53
+ "--enable-automation", # Expose WebDriver hooks (navigator.webdriver) for automation frameworks
54
+ "--export-tagged-pdf", # When printing to PDF, include tagged structure for accessibility
55
+ "--force-color-profile=srgb", # Force rendering to use the sRGB color profile
56
+ "--generate-pdf-document-outline", # Auto-generate PDF bookmarks/outlines from HTML headings, not supported by chrome/chromium https://issues.chromium.org/issues/41387522#comment48
57
+ "--metrics-recording-only", # Collect UMA metrics locally but never upload them
58
+ "--no-first-run", # Skip the “Welcome” or “What’s New” screens on fresh profiles
59
+ "--password-store=basic", # Use Chrome’s basic (in-profile) password storage vs. OS vault
60
+ "--use-mock-keychain", # On macOS, use a fake keychain for testing (don’t touch the real one)
61
+ "--disable-backgrounding-occluded-windows", # Prevent fully-occluded windows from being treated as background
62
+ "--disable-breakpad", # Disable the Breakpad crash-reporting library entirely
63
+ "--enable-features=PdfOopif", # Enable out-of-process iframe (OOPIF) architecture for PDF rendering
64
+ "--disable-features=Translate,AcceptCHFrame,MediaRouter,OptimizationHints,ProcessPerSiteUpToMainFrameThreshold,IsolateSandboxedIframes",
65
+ "--disable-extensions about:blank"
66
+ ].freeze
36
67
 
37
68
  # @return [URI] The URI of the session.
38
69
  attr_reader :session_uri
@@ -165,6 +196,7 @@ module Bidi2pdf
165
196
  Bidi2pdf.logger.info "Subscribing to events"
166
197
 
167
198
  Bidi::Client.new(websocket_url).tap do |event_client|
199
+ @event_socket = event_client
168
200
  event_client.start
169
201
  event_client.wait_until_open
170
202
 
@@ -208,7 +240,7 @@ module Bidi2pdf
208
240
  # @return [Hash] The session request payload.
209
241
  def session_request
210
242
  session_chrome_args = chrome_args.dup
211
- session_chrome_args << "--headless" if @headless
243
+ session_chrome_args << "--headless=new" if @headless
212
244
 
213
245
  {
214
246
  "capabilities" => {
@@ -320,6 +352,7 @@ module Bidi2pdf
320
352
  # Cleans up resources associated with the session.
321
353
  def cleanup
322
354
  @client&.close
355
+ @event_socket&.close
323
356
  @client = @websocket_url = @browser = nil
324
357
  end
325
358
  end
@@ -24,7 +24,7 @@ module Bidi2pdf
24
24
 
25
25
  def on_message(&) = socket_events.on(:message, &)
26
26
 
27
- def on_event(name, &) = session_events.on(name, &)
27
+ def on_event(*event_names, &) = session_events.on(*event_names, &)
28
28
 
29
29
  def on_open(&) = socket_events.on(:open, &)
30
30
 
@@ -34,13 +34,13 @@ module Bidi2pdf
34
34
 
35
35
  def remove_message_listener(block) = socket_events.off(:message, block)
36
36
 
37
- def remove_event_listener(name, block) = session_events.off(name, block)
37
+ def remove_event_listener(name, listener) = session_events.off(name, listener)
38
38
 
39
- def remove_open_listener(block) = socket_events.off(:open, block)
39
+ def remove_open_listener(listener) = socket_events.off(:open, listener)
40
40
 
41
- def remove_close_listener(block) = socket_events.off(:close, block)
41
+ def remove_close_listener(listener) = socket_events.off(:close, listener)
42
42
 
43
- def remove_error_listener(block) = socket_events.off(:error, block)
43
+ def remove_error_listener(listener) = socket_events.off(:error, listener)
44
44
 
45
45
  private
46
46
 
@@ -8,6 +8,7 @@ module Bidi2pdf
8
8
  include Chromedriver::Binary::Platform
9
9
 
10
10
  attr_reader :port, :pid, :started, :headless, :chrome_args, :shutdown_mutex
11
+ attr_accessor :reader_thread
11
12
 
12
13
  def initialize(port: 0, headless: true, chrome_args: Bidi::Session::DEFAULT_CHROME_ARGS)
13
14
  @port = port
@@ -49,10 +50,20 @@ module Bidi2pdf
49
50
  "http://localhost:#{@port}/session"
50
51
  end
51
52
 
53
+ # rubocop: disable Metrics/AbcSize
52
54
  def stop(timeout: 5)
53
55
  shutdown_mutex.synchronize do
54
56
  return unless @pid
55
57
 
58
+ if reader_thread&.alive?
59
+ begin
60
+ reader_thread.kill
61
+ reader_thread.join
62
+ rescue StandardError => e
63
+ Bidi2pdf.logger.error "Error killing reader thread: #{e.message}"
64
+ end
65
+ end
66
+
56
67
  @started = false
57
68
 
58
69
  close_session
@@ -72,6 +83,8 @@ module Bidi2pdf
72
83
  end
73
84
  end
74
85
 
86
+ # rubocop: enable Metrics/AbcSize
87
+
75
88
  private
76
89
 
77
90
  def spawn_process(cmd)
@@ -184,25 +197,26 @@ module Bidi2pdf
184
197
 
185
198
  # rubocop: disable Metrics/AbcSize
186
199
  def parse_port_from_output(io, timeout: 5)
187
- Thread.new do
188
- io.each_line do |line|
189
- Bidi2pdf.logger.debug1 line.chomp
200
+ port_event = Concurrent::Event.new
190
201
 
191
- next unless line =~ /ChromeDriver was started successfully on port (\d+)/
192
-
193
- Bidi2pdf.logger.debug "Found port: #{::Regexp.last_match(1).to_i} setup port: #{@port}"
194
-
195
- @port = ::Regexp.last_match(1).to_i if @port.nil? || @port.zero?
202
+ self.reader_thread = Thread.new do
203
+ io.each_line do |line|
204
+ Bidi2pdf.logger.info "[chromedriver] #{line.chomp}"
196
205
 
197
- break
206
+ if line =~ /ChromeDriver was started successfully on port (\d+)/
207
+ @port = ::Regexp.last_match(1).to_i if @port.nil? || @port.zero?
208
+ port_event.set
209
+ end
198
210
  end
199
211
  rescue IOError
200
212
  # reader closed
201
213
  ensure
202
214
  io.close unless io.closed?
203
- end.join(timeout)
215
+ end
216
+
217
+ return if port_event.wait(timeout)
204
218
 
205
- raise "Chromedriver did not report a usable port in #{timeout}s" if @port.nil?
219
+ raise "Chromedriver did not report a usable port in #{timeout}s"
206
220
  end
207
221
 
208
222
  # rubocop: enable Metrics/AbcSize
data/lib/bidi2pdf/cli.rb CHANGED
@@ -74,6 +74,8 @@ module Bidi2pdf
74
74
  option :page_ranges, type: :array, desc: "Page ranges to print (e.g., 1-2 4 6)"
75
75
  option :scale, type: :numeric, default: 1.0, desc: "Scale between 0.1 and 2.0"
76
76
  option :shrink_to_fit, type: :boolean, default: true, desc: "Shrink content to fit page"
77
+ option :generate_tagged_pdf, type: :boolean, default: false, desc: "Generate tagged PDF"
78
+ option :generate_document_outline, type: :boolean, default: false, desc: "Generate document outline"
77
79
 
78
80
  class << self
79
81
  def exit_on_failure?
@@ -150,7 +152,7 @@ module Bidi2pdf
150
152
  raise Thor::Error, "Invalid print option: #{e.message}"
151
153
  end
152
154
 
153
- # rubocop:disable Metrics/CyclomaticComplexity
155
+ # rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
154
156
  def print_options
155
157
  opts = {}
156
158
 
@@ -186,10 +188,15 @@ module Bidi2pdf
186
188
  assign_if_provided(page, :height, :page_height)
187
189
  opts[:page] = page unless page.empty?
188
190
 
191
+ assign_if_provided(opts, :generate_tagged_pdf)
192
+ assign_if_provided(opts, :generate_document_outline)
193
+
194
+ opts[:cmd_type] = :cdp if opts[:generate_tagged_pdf] || opts[:generate_document_outline]
195
+
189
196
  opts.empty? ? nil : opts
190
197
  end
191
198
 
192
- # rubocop:enable Metrics/CyclomaticComplexity
199
+ # rubocop:enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
193
200
 
194
201
  def option_provided?(key)
195
202
  ARGV.include?("--#{key.to_s.tr("_", "-")}") || ARGV.include?("--#{key}")
@@ -0,0 +1,67 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bidi2pdf
4
+ module TestHelpers
5
+ class Configuration
6
+ # @!attribute [rw] spec_dir
7
+ # @return [Pathname] the directory where specs are located
8
+ attr_accessor :spec_dir
9
+
10
+ # @!attribute [rw] tmp_dir
11
+ # @return [String] the directory for temporary files
12
+ attr_accessor :tmp_dir
13
+
14
+ # @!attribute [rw] prefix
15
+ # @return [String] the prefix for temporary files
16
+ attr_accessor :prefix
17
+
18
+ # @!attribute [rw] docker_dir
19
+ # @return [String] the directory for Docker files
20
+ attr_accessor :docker_dir
21
+
22
+ # @!attribute [rw] fixture_dir
23
+ # @return [String] the directory for fixture files
24
+ attr_accessor :fixture_dir
25
+
26
+ def initialize
27
+ project_root = if defined?(Rails) && Rails.respond_to?(:root)
28
+ Pathname.new(Rails.root)
29
+ elsif defined?(Bundler) && Bundler.respond_to?(:root)
30
+ Pathname.new(Bundler.root)
31
+ else
32
+ Pathname.new(Dir.pwd)
33
+ end
34
+
35
+ @spec_dir = project_root.join("spec").expand_path
36
+ @docker_dir = project_root.join("docker")
37
+ @fixture_dir = project_root.join("spec", "fixtures")
38
+ @tmp_dir = project_root.join("tmp")
39
+ @prefix = "tmp_"
40
+ end
41
+ end
42
+
43
+ class << self
44
+ # Retrieves the current configuration object for TestHelpers.
45
+ # @return [Configuration] the configuration object
46
+ def configuration
47
+ @configuration ||= Configuration.new
48
+ end
49
+
50
+ # Sets the configuration object for TestHelpers.
51
+ # @param [Configuration] config the configuration object to set
52
+ attr_writer :configuration
53
+
54
+ # Allows configuration of TestHelpers by yielding the configuration object.
55
+ # @yieldparam [Configuration] configuration the configuration object to modify
56
+ def configure
57
+ yield(configuration)
58
+ end
59
+ end
60
+ end
61
+
62
+ # Configures RSpec to include and extend SpecPathsHelper for examples with the `:pdf` metadata.
63
+ RSpec.configure do |config|
64
+ # Adds a custom RSpec setting for TestHelpers configuration.
65
+ config.add_setting :bidi2pdf_test_helpers_config, default: TestHelpers.configuration
66
+ end
67
+ end
@@ -0,0 +1,99 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bidi2pdf
4
+ module TestHelpers
5
+ module Images
6
+ require "vips"
7
+ require "zlib"
8
+
9
+ class Extractor
10
+ include PDFReaderUtils
11
+ include TIFFHelper
12
+
13
+ attr_reader :pages, :logger
14
+
15
+ def initialize(pdf_data, logger: Bidi2pdf.logger)
16
+ reader = pdf_reader_for pdf_data
17
+ @pages = reader.pages
18
+ @logger = logger
19
+ end
20
+
21
+ def all_images
22
+ extracted_images.map { |images| images[:images] }.flatten
23
+ end
24
+
25
+ def image_on_page(page_number, image_number)
26
+ images = images_on_page(page_number)
27
+ return nil if images.empty? || image_number > images.size
28
+
29
+ images[image_number - 1]
30
+ end
31
+
32
+ def images_on_page(page_number)
33
+ extracted_images.find { |images| images[:page] == page_number }&.dig(:images) || []
34
+ end
35
+
36
+ private
37
+
38
+ def extracted_images
39
+ @extracted_images ||= @pages.each_with_index.with_object([]) do |(page, index), result|
40
+ result << { page: index + 1, images: extract_images(page) }
41
+ end
42
+ end
43
+
44
+ def extract_images(page)
45
+ xobjects = page.xobjects
46
+ return if xobjects.empty?
47
+
48
+ xobjects.each_value.map do |stream|
49
+ case stream.hash[:Subtype]
50
+ when :Image
51
+ process_image_stream(stream)
52
+ when :Form
53
+ extract_images(PDF::Reader::FormXObject.new(page, stream))
54
+ end
55
+ end.flatten
56
+ end
57
+
58
+ def process_image_stream(stream)
59
+ filter = Array(stream.hash[:Filter]).first
60
+ raw = extract_raw_image_data(stream, filter)
61
+
62
+ return nil if raw.nil? || raw.empty?
63
+
64
+ create_vips_image(raw, filter)
65
+ end
66
+
67
+ def extract_raw_image_data(stream, filter)
68
+ case filter
69
+ when :DCTDecode, :JPXDecode then stream.data
70
+ when :CCITTFaxDecode then tiff_header_for_CCITT(stream.hash, stream.data)
71
+ when :LZWDecode, :RunLengthDecode, :FlateDecode then handle_compressed_image(stream)
72
+ else
73
+ Bidi2pdf.logger.warn("Unsupported image filter '#{filter}'. Attempting to process raw data.")
74
+ stream.data
75
+ end
76
+ rescue StandardError => e
77
+ Bidi2pdf.logger.error("Error extracting raw image data with filter '#{filter}': #{e.message}")
78
+ nil # Return nil to indicate failure
79
+ end
80
+
81
+ def handle_compressed_image(stream)
82
+ hash = stream.hash
83
+ data = stream.unfiltered_data
84
+
85
+ header = tiff_header(hash, data)
86
+
87
+ header + data
88
+ end
89
+
90
+ def create_vips_image(raw, filter)
91
+ Vips::Image.new_from_buffer(raw, "", disc: true)
92
+ rescue Vips::Error => e
93
+ Bidi2pdf.logger.error("Error creating Vips image from buffer (filter: #{filter}): #{e.message}")
94
+ nil # Return nil if Vips fails
95
+ end
96
+ end
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,50 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bidi2pdf
4
+ module TestHelpers
5
+ module Images
6
+ require "dhash-vips"
7
+
8
+ class ImageSimilarityChecker
9
+ def initialize(expected_image, image_to_check)
10
+ @expected_image = expected_image.is_a?(Vips::Image) ? expected_image : Vips::Image.new_from_file(expected_image)
11
+ @image_to_check = image_to_check.is_a?(Vips::Image) ? image_to_check : Vips::Image.new_from_file(image_to_check)
12
+ end
13
+
14
+ def similar?(tolerance: 20)
15
+ distance < tolerance
16
+ end
17
+
18
+ def very_similar?
19
+ similar? tolerance: 20
20
+ end
21
+
22
+ def slightly_similar?
23
+ similar? tolerance: 25
24
+ end
25
+
26
+ def different?
27
+ !slightly_similar?
28
+ end
29
+
30
+ def expected_fingerprint
31
+ @expected_fingerprint ||= fingerprint @expected_image
32
+ end
33
+
34
+ def actual_fingerprint
35
+ @actual_fingerprint ||= fingerprint @image_to_check
36
+ end
37
+
38
+ def distance
39
+ @distance ||= DHashVips::IDHash.distance(expected_fingerprint, actual_fingerprint)
40
+ end
41
+
42
+ def fingerprint(image)
43
+ image = image.resize(32.0 / [image.width, image.height].min) if image.width < 32 || image.height < 32
44
+
45
+ DHashVips::IDHash.fingerprint image
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end