bidi2pdf 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +64 -8
  3. data/README.md +14 -0
  4. data/docker/Dockerfile.chromedriver +30 -5
  5. data/docker/entrypoint.sh +41 -0
  6. data/lib/bidi2pdf/bidi/browser_tab.rb +59 -8
  7. data/lib/bidi2pdf/bidi/client.rb +7 -5
  8. data/lib/bidi2pdf/bidi/command_manager.rb +14 -26
  9. data/lib/bidi2pdf/bidi/connection_manager.rb +3 -9
  10. data/lib/bidi2pdf/bidi/event_manager.rb +35 -5
  11. data/lib/bidi2pdf/bidi/interceptor.rb +12 -2
  12. data/lib/bidi2pdf/bidi/navigation_failed_events.rb +41 -0
  13. data/lib/bidi2pdf/bidi/session.rb +6 -1
  14. data/lib/bidi2pdf/bidi/web_socket_dispatcher.rb +5 -5
  15. data/lib/bidi2pdf/chromedriver_manager.rb +25 -11
  16. data/lib/bidi2pdf/notifications.rb +1 -1
  17. data/lib/bidi2pdf/test_helpers/matchers/contains_pdf_text.rb +50 -0
  18. data/lib/bidi2pdf/test_helpers/matchers/have_pdf_page_count.rb +50 -0
  19. data/lib/bidi2pdf/test_helpers/matchers/match_pdf_text.rb +45 -0
  20. data/lib/bidi2pdf/test_helpers/pdf_reader_utils.rb +89 -0
  21. data/lib/bidi2pdf/test_helpers/pdf_text_sanitizer.rb +232 -0
  22. data/lib/bidi2pdf/test_helpers/testcontainers/chromedriver_container.rb +81 -0
  23. data/lib/bidi2pdf/test_helpers/testcontainers/chromedriver_test_helper.rb +103 -0
  24. data/lib/bidi2pdf/test_helpers/testcontainers/shared_docker_network.rb +21 -0
  25. data/lib/bidi2pdf/test_helpers/testcontainers/testcontainers_refinement.rb +53 -0
  26. data/lib/bidi2pdf/test_helpers/testcontainers.rb +17 -0
  27. data/lib/bidi2pdf/test_helpers.rb +13 -0
  28. data/lib/bidi2pdf/version.rb +1 -1
  29. data/lib/bidi2pdf.rb +32 -3
  30. data/sig/bidi2pdf/bidi/event_manager.rbs +19 -13
  31. metadata +35 -6
@@ -117,7 +117,10 @@ module Bidi2pdf
117
117
 
118
118
  # Retrieves the status of the session.
119
119
  def status
120
- send_cmd(Bidi2pdf::Bidi::Commands::SessionStatus.new) { |resp| Bidi2pdf.logger.info "Session status: #{resp.inspect}" }
120
+ send_cmd(Bidi2pdf::Bidi::Commands::SessionStatus.new) do |resp|
121
+ Bidi2pdf.logger.info "Session status: #{resp["result"].inspect}"
122
+ resp["result"]
123
+ end
121
124
  end
122
125
 
123
126
  # Checks if the session has started.
@@ -162,6 +165,7 @@ module Bidi2pdf
162
165
  Bidi2pdf.logger.info "Subscribing to events"
163
166
 
164
167
  Bidi::Client.new(websocket_url).tap do |event_client|
168
+ @event_socket = event_client
165
169
  event_client.start
166
170
  event_client.wait_until_open
167
171
 
@@ -317,6 +321,7 @@ module Bidi2pdf
317
321
  # Cleans up resources associated with the session.
318
322
  def cleanup
319
323
  @client&.close
324
+ @event_socket&.close
320
325
  @client = @websocket_url = @browser = nil
321
326
  end
322
327
  end
@@ -24,7 +24,7 @@ module Bidi2pdf
24
24
 
25
25
  def on_message(&) = socket_events.on(:message, &)
26
26
 
27
- def on_event(name, &) = session_events.on(name, &)
27
+ def on_event(*event_names, &) = session_events.on(*event_names, &)
28
28
 
29
29
  def on_open(&) = socket_events.on(:open, &)
30
30
 
@@ -34,13 +34,13 @@ module Bidi2pdf
34
34
 
35
35
  def remove_message_listener(block) = socket_events.off(:message, block)
36
36
 
37
- def remove_event_listener(name, block) = session_events.off(name, block)
37
+ def remove_event_listener(name, listener) = session_events.off(name, listener)
38
38
 
39
- def remove_open_listener(block) = socket_events.off(:open, block)
39
+ def remove_open_listener(listener) = socket_events.off(:open, listener)
40
40
 
41
- def remove_close_listener(block) = socket_events.off(:close, block)
41
+ def remove_close_listener(listener) = socket_events.off(:close, listener)
42
42
 
43
- def remove_error_listener(block) = socket_events.off(:error, block)
43
+ def remove_error_listener(listener) = socket_events.off(:error, listener)
44
44
 
45
45
  private
46
46
 
@@ -8,6 +8,7 @@ module Bidi2pdf
8
8
  include Chromedriver::Binary::Platform
9
9
 
10
10
  attr_reader :port, :pid, :started, :headless, :chrome_args, :shutdown_mutex
11
+ attr_accessor :reader_thread
11
12
 
12
13
  def initialize(port: 0, headless: true, chrome_args: Bidi::Session::DEFAULT_CHROME_ARGS)
13
14
  @port = port
@@ -49,10 +50,20 @@ module Bidi2pdf
49
50
  "http://localhost:#{@port}/session"
50
51
  end
51
52
 
53
+ # rubocop: disable Metrics/AbcSize
52
54
  def stop(timeout: 5)
53
55
  shutdown_mutex.synchronize do
54
56
  return unless @pid
55
57
 
58
+ if reader_thread&.alive?
59
+ begin
60
+ reader_thread.kill
61
+ reader_thread.join
62
+ rescue StandardError => e
63
+ Bidi2pdf.logger.error "Error killing reader thread: #{e.message}"
64
+ end
65
+ end
66
+
56
67
  @started = false
57
68
 
58
69
  close_session
@@ -72,6 +83,8 @@ module Bidi2pdf
72
83
  end
73
84
  end
74
85
 
86
+ # rubocop: enable Metrics/AbcSize
87
+
75
88
  private
76
89
 
77
90
  def spawn_process(cmd)
@@ -184,25 +197,26 @@ module Bidi2pdf
184
197
 
185
198
  # rubocop: disable Metrics/AbcSize
186
199
  def parse_port_from_output(io, timeout: 5)
187
- Thread.new do
188
- io.each_line do |line|
189
- Bidi2pdf.logger.debug1 line.chomp
200
+ port_event = Concurrent::Event.new
190
201
 
191
- next unless line =~ /ChromeDriver was started successfully on port (\d+)/
192
-
193
- Bidi2pdf.logger.debug "Found port: #{::Regexp.last_match(1).to_i} setup port: #{@port}"
194
-
195
- @port = ::Regexp.last_match(1).to_i if @port.nil? || @port.zero?
202
+ self.reader_thread = Thread.new do
203
+ io.each_line do |line|
204
+ Bidi2pdf.logger.info "[chromedriver] #{line.chomp}"
196
205
 
197
- break
206
+ if line =~ /ChromeDriver was started successfully on port (\d+)/
207
+ @port = ::Regexp.last_match(1).to_i if @port.nil? || @port.zero?
208
+ port_event.set
209
+ end
198
210
  end
199
211
  rescue IOError
200
212
  # reader closed
201
213
  ensure
202
214
  io.close unless io.closed?
203
- end.join(timeout)
215
+ end
216
+
217
+ return if port_event.wait(timeout)
204
218
 
205
- raise "Chromedriver did not report a usable port in #{timeout}s" if @port.nil?
219
+ raise "Chromedriver did not report a usable port in #{timeout}s"
206
220
  end
207
221
 
208
222
  # rubocop: enable Metrics/AbcSize
@@ -18,7 +18,7 @@ module Bidi2pdf
18
18
  module Notifications
19
19
  Thread.attr_accessor :bidi2pdf_notification_instrumenter
20
20
 
21
- @subscribers = Hash.new { |h, k| h[k] = [] }
21
+ @subscribers = Concurrent::Hash.new { |h, k| h[k] = [] }
22
22
 
23
23
  class << self
24
24
  attr_reader :subscribers
@@ -0,0 +1,50 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../pdf_text_sanitizer"
4
+
5
+ # Custom RSpec matcher for checking whether a PDF document contains specific text.
6
+ #
7
+ # This matcher allows you to assert that a certain string or regular expression
8
+ # is present in the sanitized text of a PDF document.
9
+ #
10
+ # It supports chaining with `.at_page(n)` to limit the search to a specific page.
11
+ #
12
+ # ## Examples
13
+ #
14
+ # expect(pdf_data).to contains_pdf_text("Total: 123.45")
15
+ # expect(pdf_data).to contains_pdf_text(/Invoice #\d+/).at_page(2)
16
+ #
17
+ # @param expected [String, Regexp] The text or pattern to match inside the PDF.
18
+ #
19
+ # @return [Boolean] true if the expected content is found (on the given page if specified)
20
+ RSpec::Matchers.define :contains_pdf_text do |expected|
21
+ chain :at_page do |page_number|
22
+ @page_number = page_number
23
+ end
24
+
25
+ match do |actual|
26
+ Bidi2pdf::TestHelpers::PDFTextSanitizer.contains?(actual, expected, @page_number)
27
+ end
28
+
29
+ failure_message do |actual|
30
+ pages = Bidi2pdf::TestHelpers::PDFTextSanitizer.clean_pages(actual)
31
+
32
+ return "Document does not contain page #{@page_number}" if @page_number && !(@page_number && @page_number <= pages.size)
33
+
34
+ <<~MSG
35
+ PDF text did not contain expected content.
36
+
37
+ --- Expected (#{expected.inspect}) ---
38
+ On page #{@page_number || "any"}:
39
+
40
+ --- Actual ---
41
+ #{pages.each_with_index.map { |text, i| "Page #{i + 1}:\n#{text}" }.join("\n\n")}
42
+ MSG
43
+ end
44
+
45
+ description do
46
+ desc = "contain #{expected.inspect} in PDF"
47
+ desc += " on page #{@page_number}" if @page_number
48
+ desc
49
+ end
50
+ end
@@ -0,0 +1,50 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "pdf-reader"
4
+ require "base64"
5
+
6
+ # RSpec matcher to assert the number of pages in a PDF document.
7
+ #
8
+ # This matcher is useful for verifying the structural integrity of generated or uploaded PDFs,
9
+ # especially in tests for reporting, invoice generation, or document exports.
10
+ #
11
+ # It supports a variety of input types:
12
+ # - Raw PDF data as a `String`
13
+ # - File paths (`String`)
14
+ # - `StringIO` or `File` objects
15
+ # - Even Base64-encoded strings, if your `pdf_reader_for` method handles it
16
+ #
17
+ # ## Example
18
+ #
19
+ # expect(pdf_data).to have_pdf_page_count(5)
20
+ # expect(StringIO.new(pdf_data)).to have_pdf_page_count(3)
21
+ #
22
+ # If the PDF is malformed, the matcher will gracefully fail and show the error message.
23
+ #
24
+ # @param expected_count [Integer] The number of pages the PDF is expected to contain.
25
+ # @return [RSpec::Matchers::Matcher] The matcher object for use in specs.
26
+ #
27
+ # @note This matcher depends on `Bidi2pdf::TestHelpers::PDFReaderUtils.pdf_reader_for`
28
+ # to extract the page count. Make sure it supports all your intended input formats.
29
+ RSpec::Matchers.define :have_pdf_page_count do |expected_count|
30
+ match do |pdf_data|
31
+ reader = Bidi2pdf::TestHelpers::PDFReaderUtils.pdf_reader_for(pdf_data)
32
+ @actual_count = reader.page_count
33
+ @actual_count == expected_count
34
+ rescue PDF::Reader::MalformedPDFError => e
35
+ @error_message = e.message
36
+ false
37
+ end
38
+
39
+ failure_message do |_pdf_data|
40
+ if @error_message
41
+ "Expected a valid PDF with #{expected_count} pages, but encountered an error: #{@error_message}"
42
+ else
43
+ "Expected PDF to have #{expected_count} pages, but it has #{@actual_count} pages"
44
+ end
45
+ end
46
+
47
+ description do
48
+ "have #{expected_count} PDF pages"
49
+ end
50
+ end
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../pdf_text_sanitizer"
4
+
5
+ # Custom RSpec matcher to compare the **sanitized text content** of two PDF files.
6
+ #
7
+ # This matcher is useful for comparing PDF documents where formatting and metadata may differ,
8
+ # but the actual visible text content should be the same. It uses `PDFTextSanitizer` internally
9
+ # to normalize and clean the text before comparison.
10
+ #
11
+ # ## Example
12
+ #
13
+ # expect(actual_pdf).to match_pdf_text(expected_pdf)
14
+ #
15
+ # If the texts don’t match, it prints a diff-friendly message showing cleaned text content.
16
+ #
17
+ # @param expected [String, StringIO, File] The expected PDF content (can be a file path, StringIO, or raw string).
18
+ # @return [RSpec::Matchers::Matcher] An RSpec matcher to compare against an actual PDF.
19
+ #
20
+ # @note Ensure `PDFTextSanitizer.match?` and `PDFTextSanitizer.clean_pages` are implemented
21
+ # to handle your specific PDF processing logic.
22
+ RSpec::Matchers.define :match_pdf_text do |expected|
23
+ match do |actual|
24
+ Bidi2pdf::TestHelpers::PDFTextSanitizer.match?(actual, expected)
25
+ end
26
+
27
+ failure_message do |actual|
28
+ cleaned_actual = Bidi2pdf::TestHelpers::PDFTextSanitizer.clean_pages(actual)
29
+ cleaned_expected = Bidi2pdf::TestHelpers::PDFTextSanitizer.clean_pages(expected)
30
+
31
+ <<~MSG
32
+ PDF text did not match.
33
+
34
+ --- Expected ---
35
+ #{cleaned_expected.join("\n")}
36
+
37
+ --- Actual ---
38
+ #{cleaned_actual.join("\n")}
39
+ MSG
40
+ end
41
+
42
+ description do
43
+ "match sanitized PDF text content"
44
+ end
45
+ end
@@ -0,0 +1,89 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bidi2pdf
4
+ module TestHelpers
5
+ module PDFReaderUtils
6
+ class << self
7
+ # Extracts text content from a PDF document.
8
+ #
9
+ # This method accepts various PDF input formats and attempts to extract text content
10
+ # from all pages. If extraction fails due to malformed PDF data, it returns the original input.
11
+ #
12
+ # @param pdf_data [String, StringIO, File] The PDF data in one of the following formats:
13
+ # * Base64-encoded PDF string
14
+ # * Raw PDF data beginning with "%PDF-"
15
+ # * StringIO object containing PDF data
16
+ # * Path to a PDF file as String
17
+ # * Raw PDF data as String
18
+ # @return [Array<String>] An array of strings, with each string representing the text content of a page
19
+ # @return [Object] The original input if PDF extraction fails
20
+ # @example Extract text from a PDF file
21
+ # text_content = pdf_text('path/to/document.pdf')
22
+ #
23
+ # @example Extract text from Base64-encoded string
24
+ # text_content = pdf_text(base64_encoded_pdf_data)
25
+ def pdf_text(pdf_data)
26
+ return pdf_data unless pdf_data.is_a?(String) || pdf_data.is_a?(StringIO) || pdf_data.is_a?(File)
27
+
28
+ begin
29
+ reader = pdf_reader_for pdf_data
30
+ reader.pages.map(&:text)
31
+ rescue PDF::Reader::MalformedPDFError
32
+ [pdf_data]
33
+ end
34
+ end
35
+
36
+ # Converts the input PDF data into an IO object and initializes a PDF::Reader.
37
+ #
38
+ # @param pdf_data [String, StringIO, File] The PDF data to be read.
39
+ # @return [PDF::Reader] A PDF::Reader instance for the given data.
40
+ # @raise [PDF::Reader::MalformedPDFError] If the PDF data is invalid.
41
+ def pdf_reader_for(pdf_data)
42
+ io = convert_data_to_io(pdf_data)
43
+ PDF::Reader.new(io)
44
+ end
45
+
46
+ # rubocop: disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
47
+ # Converts various input formats into an IO object for PDF::Reader.
48
+ #
49
+ # @param pdf_data [String, StringIO, File] The PDF data to be converted.
50
+ # @return [IO] An IO object containing the PDF data.
51
+ def convert_data_to_io(pdf_data)
52
+ # rubocop:disable Lint/DuplicateBranch
53
+ if pdf_data.is_a?(String) && (pdf_data.start_with?("JVBERi") || pdf_data.start_with?("JVBER"))
54
+ StringIO.new(Base64.decode64(pdf_data))
55
+ elsif pdf_data.start_with?("%PDF-")
56
+ StringIO.new(pdf_data)
57
+ elsif pdf_data.is_a?(StringIO)
58
+ pdf_data
59
+ elsif pdf_data.is_a?(String) && File.exist?(pdf_data)
60
+ File.open(pdf_data, "rb")
61
+ else
62
+ StringIO.new(pdf_data)
63
+ end
64
+ # rubocop:enable Lint/DuplicateBranch
65
+ end
66
+ end
67
+
68
+ # rubocop: enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
69
+
70
+ module InstanceMethods
71
+ def pdf_text(pdf_data)
72
+ PDFReaderUtils.pdf_text(pdf_data)
73
+ end
74
+
75
+ def pdf_reader_for(pdf_data)
76
+ PDFReaderUtils.pdf_reader_for(pdf_data)
77
+ end
78
+
79
+ def convert_data_to_io(pdf_data)
80
+ PDFReaderUtils.convert_data_to_io(pdf_data)
81
+ end
82
+ end
83
+
84
+ def self.included(base)
85
+ base.include(InstanceMethods)
86
+ end
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,232 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "unicode_utils"
4
+ require "diff/lcs"
5
+ require "diff/lcs/hunk"
6
+
7
+ module Bidi2pdf
8
+ module TestHelpers
9
+ # rubocop: disable Metrics/ModuleLength
10
+ # Provides utilities for sanitizing and comparing PDF text content.
11
+ # This module includes methods for cleaning text, comparing PDF content,
12
+ # and reporting differences between actual and expected PDF outputs.
13
+ #
14
+ # The sanitization process includes normalizing whitespace, replacing
15
+ # typographic ligatures, and handling other common text formatting issues.
16
+ #
17
+ # @example Cleaning text
18
+ # sanitized_text = Bidi2pdf::TestHelpers::PDFTextSanitizer.clean("Some text")
19
+ #
20
+ # @example Comparing PDF content
21
+ # match = Bidi2pdf::TestHelpers::PDFTextSanitizer.match?(actual_pdf, expected_pdf)
22
+ module PDFTextSanitizer
23
+ class << self
24
+ # Cleans the given text by replacing common typographic ligatures,
25
+ # normalizing whitespace, and removing unnecessary characters.
26
+ #
27
+ # @param [String] text The text to clean.
28
+ # @return [String] The cleaned text.
29
+ def clean(text)
30
+ text = UnicodeUtils.nfkd(text)
31
+
32
+ text.gsub("\uFB01", "fi")
33
+ .gsub("\uFB02", "fl")
34
+ .gsub("-\n", "")
35
+ .gsub(/["]/, '"')
36
+ .gsub(/[']/, "'")
37
+ .gsub("…", "...")
38
+ .gsub("—", "--")
39
+ .gsub("–", "-")
40
+ .gsub(/\s+/, " ") # Replace all whitespace sequences with a single space
41
+ .strip
42
+ end
43
+
44
+ # Cleans an array of PDF page texts by applying the `clean` method
45
+ # to each page's content.
46
+ #
47
+ # @param [Object] actual_pdf_thingy The PDF object to clean.
48
+ # @return [Array<String>] An array of cleaned page texts.
49
+ def clean_pages(actual_pdf_thingy)
50
+ Bidi2pdf::TestHelpers::PDFReaderUtils.pdf_text(actual_pdf_thingy).map { |text| clean(text) }
51
+ end
52
+
53
+ # Cleans the given text and removes all whitespace for comparison purposes.
54
+ #
55
+ # @param [String] text The text to clean and normalize.
56
+ # @return [String] The cleaned text without whitespace.
57
+ def normalize(text)
58
+ clean(text).gsub(/\s+/, "")
59
+ end
60
+
61
+ # Checks if the given PDF contains the expected text or pattern.
62
+ #
63
+ # @param [Object] actual_pdf_thingy The PDF object to search.
64
+ # @param [String, Regexp] expected The expected text or pattern.
65
+ # @param [Integer, nil] page_number The specific page to search (optional).
66
+ # @return [Boolean] `true` if the expected text is found, `false` otherwise.
67
+ def contains?(actual_pdf_thingy, expected, page_number = nil)
68
+ pages = Bidi2pdf::TestHelpers::PDFReaderUtils.pdf_text(actual_pdf_thingy)
69
+ cleaned_pages = clean_pages(pages)
70
+
71
+ return false if page_number && page_number > cleaned_pages.size
72
+
73
+ # Narrow to specific page if requested
74
+ if page_number
75
+ text = cleaned_pages[page_number - 1]
76
+ return match_expected?(text, expected)
77
+ end
78
+
79
+ # Search all pages
80
+ cleaned_pages.any? { |page| match_expected?(page, expected) }
81
+ end
82
+
83
+ # Matches the given text against the expected text or pattern.
84
+ #
85
+ # @param [String] text The text to match.
86
+ # @param [String, Regexp] expected The expected text or pattern.
87
+ # @return [Boolean] `true` if the text matches, `false` otherwise.
88
+ def match_expected?(text, expected)
89
+ return false unless text
90
+
91
+ expected.is_a?(Regexp) ? text.match?(expected) : text.include?(expected.to_s)
92
+ end
93
+
94
+ # Compares the content of two PDF objects for equality.
95
+ #
96
+ # @param [Object] actual_pdf_thingy The actual PDF object.
97
+ # @param [Object] expected_pdf_thingy The expected PDF object.
98
+ # @return [Boolean] `true` if the content matches, `false` otherwise.
99
+ def match?(actual_pdf_thingy, expected_pdf_thingy)
100
+ actual = Bidi2pdf::TestHelpers::PDFReaderUtils.pdf_text actual_pdf_thingy
101
+ expected = Bidi2pdf::TestHelpers::PDFReaderUtils.pdf_text expected_pdf_thingy
102
+
103
+ cleaned_actual = clean_pages(actual)
104
+ cleaned_expected = clean_pages(expected)
105
+
106
+ # Compare without whitespace for equality check
107
+ actual_for_comparison = cleaned_actual.map { |text| normalize(text) }
108
+ expected_for_comparison = cleaned_expected.map { |text| normalize(text) }
109
+
110
+ if actual_for_comparison == expected_for_comparison
111
+ true
112
+ else
113
+ report_content_mismatch(cleaned_actual, cleaned_expected)
114
+ false
115
+ end
116
+ end
117
+
118
+ # Reports differences between actual and expected PDF content.
119
+ #
120
+ # @param [Array<String>] actual The actual PDF content.
121
+ # @param [Array<String>] expected The expected PDF content.
122
+ # @return [void]
123
+ def report_content_mismatch(actual, expected)
124
+ puts "--- PDF content mismatch ---"
125
+ print_differences(actual, expected)
126
+ end
127
+
128
+ # Prints detailed differences between actual and expected PDF content.
129
+ #
130
+ # @param [Array<String>] actual The actual PDF content.
131
+ # @param [Array<String>] expected The expected PDF content.
132
+ # @return [void]
133
+ def print_differences(actual, expected)
134
+ max_pages = [actual.length, expected.length].max
135
+
136
+ (0...max_pages).each do |page_idx|
137
+ actual_page = actual[page_idx] || "(missing page)"
138
+ expected_page = expected[page_idx] || "(missing page)"
139
+
140
+ print_differences_for_page(actual_page, expected_page, page_idx)
141
+ end
142
+ end
143
+
144
+ # Prints the differences between actual and expected content for a specific page.
145
+ # This method compares the content ignoring whitespace and, if differences are found,
146
+ # outputs a formatted representation of those differences.
147
+ #
148
+ # @param [String] actual_page The actual page content.
149
+ # @param [String] expected_page The expected page content.
150
+ # @param [Integer] page_idx The zero-based index of the page being compared.
151
+ # @return [void]
152
+ def print_differences_for_page(actual_page, expected_page, page_idx)
153
+ # Compare without whitespace
154
+ actual_no_space = normalize(actual_page.to_s)
155
+ expected_no_space = normalize(expected_page.to_s)
156
+
157
+ return if actual_no_space == expected_no_space
158
+
159
+ puts "\nPage #{page_idx + 1} differences (ignoring whitespace):"
160
+
161
+ # Create diffs between the two pages
162
+ diffs = Diff::LCS.sdiff(expected_page, actual_page)
163
+
164
+ # Format and display the differences
165
+ puts format_diff_output(diffs, expected_page, actual_page)
166
+ end
167
+
168
+ # Formats the output of differences for display.
169
+ #
170
+ # @param [Array<Diff::LCS::ContextChange>] diffs The list of differences.
171
+ # @param [String] expected The expected text.
172
+ # @param [String] actual The actual text.
173
+ # @return [String] The formatted differences.
174
+ def format_diff_output(diffs, expected, actual)
175
+ output = []
176
+
177
+ changes = group_changed_diffs(diffs)
178
+
179
+ # Output each change with context
180
+ changes.each do |change|
181
+ output += format_change expected, actual, change
182
+ end
183
+
184
+ output.join("\n")
185
+ end
186
+
187
+ private
188
+
189
+ # Groups contiguous “real” diffs (added/removed/changed) into blocks,
190
+ # splitting whenever you hit an unchanged (“=”) diff.
191
+ def group_changed_diffs(diffs)
192
+ diffs
193
+ .chunk_while { |_prev, curr| curr.action != "=" }
194
+ .map { |chunk| chunk.reject { |elem| elem.action == "=" } }
195
+ .select(&:any?)
196
+ .map { |chunk| { diffs: chunk } }
197
+ end
198
+
199
+ def format_change(expected, actual, change)
200
+ pos = change[:diffs].first.old_position
201
+ snippets = extract_snippets(expected, actual, change, pos)
202
+
203
+ build_output(snippets, pos)
204
+ end
205
+
206
+ def extract_snippets(expected, actual, change, pos)
207
+ {
208
+ context_start: [0, pos - 20].max,
209
+ context: expected,
210
+ expected_snip: expected[pos, 50],
211
+ actual_snip: actual[change[:diffs].first.new_position, 50]
212
+ }
213
+ end
214
+
215
+ # 3. Build the final lines of output
216
+ def build_output(snip_data, pos)
217
+ start = snip_data[:context_start]
218
+ ctx = snip_data[:context]
219
+
220
+ [
221
+ " Context: ...#{ctx[start...pos]}",
222
+ " Expected: #{snip_data[:expected_snip]}...",
223
+ " Actual: #{snip_data[:actual_snip]}...",
224
+ " Expected (no spaces): #{normalize(snip_data[:expected_snip])}...",
225
+ " Actual (no spaces): #{normalize(snip_data[:actual_snip])}..."
226
+ ]
227
+ end
228
+ end
229
+ end
230
+ # rubocop:enable Metrics/ModuleLength
231
+ end
232
+ end
@@ -0,0 +1,81 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bidi2pdf
4
+ module TestHelpers
5
+ module Testcontainers
6
+ class ChromedriverContainer < ::Testcontainers::DockerContainer
7
+ DEFAULT_CHROMEDRIVER_PORT = 3000
8
+ DEFAULT_IMAGE = "dieters877565/chromedriver"
9
+
10
+ attr_reader :docker_file, :build_dir
11
+
12
+ def initialize(image = DEFAULT_IMAGE, **options)
13
+ @docker_file = options.delete(:docker_file) || "Dockerfile"
14
+ @build_dir = options.delete(:build_dir) || options[:working_dir]
15
+
16
+ super
17
+
18
+ @wait_for ||= add_wait_for(:logs, /ChromeDriver was started successfully on port/)
19
+ end
20
+
21
+ def start
22
+ with_exposed_ports(port)
23
+ super
24
+ end
25
+
26
+ def port
27
+ DEFAULT_CHROMEDRIVER_PORT
28
+ end
29
+
30
+ # rubocop: disable Metrics/AbcSize
31
+ def build_local_image
32
+ old_timeout = Docker.options[:read_timeout]
33
+ Docker.options[:read_timeout] = 60 * 10
34
+
35
+ Docker::Image.build_from_dir(build_dir, { "t" => image, "dockerfile" => docker_file }) do |lines|
36
+ lines.split("\n").each do |line|
37
+ next unless (log = JSON.parse(line)) && log.key?("stream")
38
+ next unless log["stream"] && !(trimmed_stream = log["stream"].strip).empty?
39
+
40
+ timestamp = Time.now.strftime("[%Y-%m-%dT%H:%M:%S.%6N]")
41
+ $stdout.write "#{timestamp} #{trimmed_stream}\n"
42
+ end
43
+ end
44
+
45
+ Docker.options[:read_timeout] = old_timeout
46
+ end
47
+
48
+ # rubocop: enable Metrics/AbcSize
49
+
50
+ # rubocop: disable Metrics/AbcSize
51
+ def start_local_image
52
+ build_local_image
53
+
54
+ with_exposed_ports(port)
55
+
56
+ @_container ||= Docker::Container.create(_container_create_options)
57
+ @_container.start
58
+
59
+ @_id = @_container.id
60
+ json = @_container.json
61
+ @name = json["Name"]
62
+ @_created_at = json["Created"]
63
+
64
+ @wait_for&.call(self)
65
+
66
+ self
67
+ rescue Docker::Error::NotFoundError => e
68
+ raise Testcontainers::NotFoundError, e.message
69
+ rescue Excon::Error::Socket => e
70
+ raise Testcontainers::ConnectionError, e.message
71
+ end
72
+
73
+ # rubocop: enable Metrics/AbcSize
74
+
75
+ def session_url(protocol: "http")
76
+ "#{protocol}://#{host}:#{mapped_port(port)}/session"
77
+ end
78
+ end
79
+ end
80
+ end
81
+ end