bidi2pdf 0.1.6 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +4 -1
- data/CHANGELOG.md +63 -8
- data/README.md +28 -0
- data/docker/Dockerfile +1 -1
- data/docker/Dockerfile.chromedriver +9 -2
- data/docker/Dockerfile.slim +2 -2
- data/lib/bidi2pdf/bidi/browser_console_logger.rb +92 -0
- data/lib/bidi2pdf/bidi/browser_tab.rb +431 -41
- data/lib/bidi2pdf/bidi/client.rb +85 -23
- data/lib/bidi2pdf/bidi/command_manager.rb +46 -60
- data/lib/bidi2pdf/bidi/commands/base.rb +39 -1
- data/lib/bidi2pdf/bidi/commands/browser_remove_user_context.rb +27 -0
- data/lib/bidi2pdf/bidi/commands/browsing_context_print.rb +4 -0
- data/lib/bidi2pdf/bidi/commands/print_parameters_validator.rb +5 -0
- data/lib/bidi2pdf/bidi/commands.rb +1 -0
- data/lib/bidi2pdf/bidi/connection_manager.rb +3 -9
- data/lib/bidi2pdf/bidi/event_manager.rb +2 -2
- data/lib/bidi2pdf/bidi/interceptor.rb +1 -1
- data/lib/bidi2pdf/bidi/js_logger_helper.rb +16 -0
- data/lib/bidi2pdf/bidi/logger_events.rb +25 -45
- data/lib/bidi2pdf/bidi/navigation_failed_events.rb +41 -0
- data/lib/bidi2pdf/bidi/network_event.rb +15 -0
- data/lib/bidi2pdf/bidi/network_event_formatters/network_event_console_formatter.rb +4 -3
- data/lib/bidi2pdf/bidi/network_events.rb +27 -17
- data/lib/bidi2pdf/bidi/session.rb +123 -13
- data/lib/bidi2pdf/bidi/user_context.rb +62 -0
- data/lib/bidi2pdf/bidi/web_socket_dispatcher.rb +7 -7
- data/lib/bidi2pdf/chromedriver_manager.rb +48 -21
- data/lib/bidi2pdf/cli.rb +10 -2
- data/lib/bidi2pdf/dsl.rb +33 -0
- data/lib/bidi2pdf/launcher.rb +30 -0
- data/lib/bidi2pdf/notifications/event.rb +52 -0
- data/lib/bidi2pdf/notifications/instrumenter.rb +65 -0
- data/lib/bidi2pdf/notifications/logging_subscriber.rb +136 -0
- data/lib/bidi2pdf/notifications.rb +78 -0
- data/lib/bidi2pdf/session_runner.rb +35 -3
- data/lib/bidi2pdf/test_helpers/matchers/contains_pdf_text.rb +50 -0
- data/lib/bidi2pdf/test_helpers/matchers/have_pdf_page_count.rb +50 -0
- data/lib/bidi2pdf/test_helpers/matchers/match_pdf_text.rb +45 -0
- data/lib/bidi2pdf/test_helpers/pdf_reader_utils.rb +89 -0
- data/lib/bidi2pdf/test_helpers/pdf_text_sanitizer.rb +232 -0
- data/lib/bidi2pdf/test_helpers/testcontainers/chromedriver_container.rb +87 -0
- data/lib/bidi2pdf/test_helpers.rb +13 -0
- data/lib/bidi2pdf/verbose_logger.rb +79 -0
- data/lib/bidi2pdf/version.rb +1 -1
- data/lib/bidi2pdf.rb +131 -10
- data/sig/bidi2pdf/bidi/client.rbs +1 -1
- metadata +67 -4
- data/lib/bidi2pdf/utils.rb +0 -15
@@ -0,0 +1,50 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "../pdf_text_sanitizer"
|
4
|
+
|
5
|
+
# Custom RSpec matcher for checking whether a PDF document contains specific text.
|
6
|
+
#
|
7
|
+
# This matcher allows you to assert that a certain string or regular expression
|
8
|
+
# is present in the sanitized text of a PDF document.
|
9
|
+
#
|
10
|
+
# It supports chaining with `.at_page(n)` to limit the search to a specific page.
|
11
|
+
#
|
12
|
+
# ## Examples
|
13
|
+
#
|
14
|
+
# expect(pdf_data).to contains_pdf_text("Total: 123.45")
|
15
|
+
# expect(pdf_data).to contains_pdf_text(/Invoice #\d+/).at_page(2)
|
16
|
+
#
|
17
|
+
# @param expected [String, Regexp] The text or pattern to match inside the PDF.
|
18
|
+
#
|
19
|
+
# @return [Boolean] true if the expected content is found (on the given page if specified)
|
20
|
+
RSpec::Matchers.define :contains_pdf_text do |expected|
|
21
|
+
chain :at_page do |page_number|
|
22
|
+
@page_number = page_number
|
23
|
+
end
|
24
|
+
|
25
|
+
match do |actual|
|
26
|
+
Bidi2pdf::TestHelpers::PDFTextSanitizer.contains?(actual, expected, @page_number)
|
27
|
+
end
|
28
|
+
|
29
|
+
failure_message do |actual|
|
30
|
+
pages = Bidi2pdf::TestHelpers::PDFTextSanitizer.clean_pages(actual)
|
31
|
+
|
32
|
+
return "Document does not contain page #{@page_number}" if @page_number && !(@page_number && @page_number <= pages.size)
|
33
|
+
|
34
|
+
<<~MSG
|
35
|
+
PDF text did not contain expected content.
|
36
|
+
|
37
|
+
--- Expected (#{expected.inspect}) ---
|
38
|
+
On page #{@page_number || "any"}:
|
39
|
+
|
40
|
+
--- Actual ---
|
41
|
+
#{pages.each_with_index.map { |text, i| "Page #{i + 1}:\n#{text}" }.join("\n\n")}
|
42
|
+
MSG
|
43
|
+
end
|
44
|
+
|
45
|
+
description do
|
46
|
+
desc = "contain #{expected.inspect} in PDF"
|
47
|
+
desc += " on page #{@page_number}" if @page_number
|
48
|
+
desc
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "pdf-reader"
|
4
|
+
require "base64"
|
5
|
+
|
6
|
+
# RSpec matcher to assert the number of pages in a PDF document.
|
7
|
+
#
|
8
|
+
# This matcher is useful for verifying the structural integrity of generated or uploaded PDFs,
|
9
|
+
# especially in tests for reporting, invoice generation, or document exports.
|
10
|
+
#
|
11
|
+
# It supports a variety of input types:
|
12
|
+
# - Raw PDF data as a `String`
|
13
|
+
# - File paths (`String`)
|
14
|
+
# - `StringIO` or `File` objects
|
15
|
+
# - Even Base64-encoded strings, if your `pdf_reader_for` method handles it
|
16
|
+
#
|
17
|
+
# ## Example
|
18
|
+
#
|
19
|
+
# expect(pdf_data).to have_pdf_page_count(5)
|
20
|
+
# expect(StringIO.new(pdf_data)).to have_pdf_page_count(3)
|
21
|
+
#
|
22
|
+
# If the PDF is malformed, the matcher will gracefully fail and show the error message.
|
23
|
+
#
|
24
|
+
# @param expected_count [Integer] The number of pages the PDF is expected to contain.
|
25
|
+
# @return [RSpec::Matchers::Matcher] The matcher object for use in specs.
|
26
|
+
#
|
27
|
+
# @note This matcher depends on `Bidi2pdf::TestHelpers::PDFReaderUtils.pdf_reader_for`
|
28
|
+
# to extract the page count. Make sure it supports all your intended input formats.
|
29
|
+
RSpec::Matchers.define :have_pdf_page_count do |expected_count|
|
30
|
+
match do |pdf_data|
|
31
|
+
reader = Bidi2pdf::TestHelpers::PDFReaderUtils.pdf_reader_for(pdf_data)
|
32
|
+
@actual_count = reader.page_count
|
33
|
+
@actual_count == expected_count
|
34
|
+
rescue PDF::Reader::MalformedPDFError => e
|
35
|
+
@error_message = e.message
|
36
|
+
false
|
37
|
+
end
|
38
|
+
|
39
|
+
failure_message do |_pdf_data|
|
40
|
+
if @error_message
|
41
|
+
"Expected a valid PDF with #{expected_count} pages, but encountered an error: #{@error_message}"
|
42
|
+
else
|
43
|
+
"Expected PDF to have #{expected_count} pages, but it has #{@actual_count} pages"
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
description do
|
48
|
+
"have #{expected_count} PDF pages"
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "../pdf_text_sanitizer"
|
4
|
+
|
5
|
+
# Custom RSpec matcher to compare the **sanitized text content** of two PDF files.
|
6
|
+
#
|
7
|
+
# This matcher is useful for comparing PDF documents where formatting and metadata may differ,
|
8
|
+
# but the actual visible text content should be the same. It uses `PDFTextSanitizer` internally
|
9
|
+
# to normalize and clean the text before comparison.
|
10
|
+
#
|
11
|
+
# ## Example
|
12
|
+
#
|
13
|
+
# expect(actual_pdf).to match_pdf_text(expected_pdf)
|
14
|
+
#
|
15
|
+
# If the texts don’t match, it prints a diff-friendly message showing cleaned text content.
|
16
|
+
#
|
17
|
+
# @param expected [String, StringIO, File] The expected PDF content (can be a file path, StringIO, or raw string).
|
18
|
+
# @return [RSpec::Matchers::Matcher] An RSpec matcher to compare against an actual PDF.
|
19
|
+
#
|
20
|
+
# @note Ensure `PDFTextSanitizer.match?` and `PDFTextSanitizer.clean_pages` are implemented
|
21
|
+
# to handle your specific PDF processing logic.
|
22
|
+
RSpec::Matchers.define :match_pdf_text do |expected|
|
23
|
+
match do |actual|
|
24
|
+
Bidi2pdf::TestHelpers::PDFTextSanitizer.match?(actual, expected)
|
25
|
+
end
|
26
|
+
|
27
|
+
failure_message do |actual|
|
28
|
+
cleaned_actual = Bidi2pdf::TestHelpers::PDFTextSanitizer.clean_pages(actual)
|
29
|
+
cleaned_expected = Bidi2pdf::TestHelpers::PDFTextSanitizer.clean_pages(expected)
|
30
|
+
|
31
|
+
<<~MSG
|
32
|
+
PDF text did not match.
|
33
|
+
|
34
|
+
--- Expected ---
|
35
|
+
#{cleaned_expected.join("\n")}
|
36
|
+
|
37
|
+
--- Actual ---
|
38
|
+
#{cleaned_actual.join("\n")}
|
39
|
+
MSG
|
40
|
+
end
|
41
|
+
|
42
|
+
description do
|
43
|
+
"match sanitized PDF text content"
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,89 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Bidi2pdf
|
4
|
+
module TestHelpers
|
5
|
+
module PDFReaderUtils
|
6
|
+
class << self
|
7
|
+
# Extracts text content from a PDF document.
|
8
|
+
#
|
9
|
+
# This method accepts various PDF input formats and attempts to extract text content
|
10
|
+
# from all pages. If extraction fails due to malformed PDF data, it returns the original input.
|
11
|
+
#
|
12
|
+
# @param pdf_data [String, StringIO, File] The PDF data in one of the following formats:
|
13
|
+
# * Base64-encoded PDF string
|
14
|
+
# * Raw PDF data beginning with "%PDF-"
|
15
|
+
# * StringIO object containing PDF data
|
16
|
+
# * Path to a PDF file as String
|
17
|
+
# * Raw PDF data as String
|
18
|
+
# @return [Array<String>] An array of strings, with each string representing the text content of a page
|
19
|
+
# @return [Object] The original input if PDF extraction fails
|
20
|
+
# @example Extract text from a PDF file
|
21
|
+
# text_content = pdf_text('path/to/document.pdf')
|
22
|
+
#
|
23
|
+
# @example Extract text from Base64-encoded string
|
24
|
+
# text_content = pdf_text(base64_encoded_pdf_data)
|
25
|
+
def pdf_text(pdf_data)
|
26
|
+
return pdf_data unless pdf_data.is_a?(String) || pdf_data.is_a?(StringIO) || pdf_data.is_a?(File)
|
27
|
+
|
28
|
+
begin
|
29
|
+
reader = pdf_reader_for pdf_data
|
30
|
+
reader.pages.map(&:text)
|
31
|
+
rescue PDF::Reader::MalformedPDFError
|
32
|
+
[pdf_data]
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# Converts the input PDF data into an IO object and initializes a PDF::Reader.
|
37
|
+
#
|
38
|
+
# @param pdf_data [String, StringIO, File] The PDF data to be read.
|
39
|
+
# @return [PDF::Reader] A PDF::Reader instance for the given data.
|
40
|
+
# @raise [PDF::Reader::MalformedPDFError] If the PDF data is invalid.
|
41
|
+
def pdf_reader_for(pdf_data)
|
42
|
+
io = convert_data_to_io(pdf_data)
|
43
|
+
PDF::Reader.new(io)
|
44
|
+
end
|
45
|
+
|
46
|
+
# rubocop: disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
47
|
+
# Converts various input formats into an IO object for PDF::Reader.
|
48
|
+
#
|
49
|
+
# @param pdf_data [String, StringIO, File] The PDF data to be converted.
|
50
|
+
# @return [IO] An IO object containing the PDF data.
|
51
|
+
def convert_data_to_io(pdf_data)
|
52
|
+
# rubocop:disable Lint/DuplicateBranch
|
53
|
+
if pdf_data.is_a?(String) && (pdf_data.start_with?("JVBERi") || pdf_data.start_with?("JVBER"))
|
54
|
+
StringIO.new(Base64.decode64(pdf_data))
|
55
|
+
elsif pdf_data.start_with?("%PDF-")
|
56
|
+
StringIO.new(pdf_data)
|
57
|
+
elsif pdf_data.is_a?(StringIO)
|
58
|
+
pdf_data
|
59
|
+
elsif pdf_data.is_a?(String) && File.exist?(pdf_data)
|
60
|
+
File.open(pdf_data, "rb")
|
61
|
+
else
|
62
|
+
StringIO.new(pdf_data)
|
63
|
+
end
|
64
|
+
# rubocop:enable Lint/DuplicateBranch
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
# rubocop: enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
69
|
+
|
70
|
+
module InstanceMethods
|
71
|
+
def pdf_text(pdf_data)
|
72
|
+
PDFReaderUtils.pdf_text(pdf_data)
|
73
|
+
end
|
74
|
+
|
75
|
+
def pdf_reader_for(pdf_data)
|
76
|
+
PDFReaderUtils.pdf_reader_for(pdf_data)
|
77
|
+
end
|
78
|
+
|
79
|
+
def convert_data_to_io(pdf_data)
|
80
|
+
PDFReaderUtils.convert_data_to_io(pdf_data)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def self.included(base)
|
85
|
+
base.include(InstanceMethods)
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
@@ -0,0 +1,232 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "unicode_utils"
|
4
|
+
require "diff/lcs"
|
5
|
+
require "diff/lcs/hunk"
|
6
|
+
|
7
|
+
module Bidi2pdf
|
8
|
+
module TestHelpers
|
9
|
+
# rubocop: disable Metrics/ModuleLength
|
10
|
+
# Provides utilities for sanitizing and comparing PDF text content.
|
11
|
+
# This module includes methods for cleaning text, comparing PDF content,
|
12
|
+
# and reporting differences between actual and expected PDF outputs.
|
13
|
+
#
|
14
|
+
# The sanitization process includes normalizing whitespace, replacing
|
15
|
+
# typographic ligatures, and handling other common text formatting issues.
|
16
|
+
#
|
17
|
+
# @example Cleaning text
|
18
|
+
# sanitized_text = Bidi2pdf::TestHelpers::PDFTextSanitizer.clean("Some text")
|
19
|
+
#
|
20
|
+
# @example Comparing PDF content
|
21
|
+
# match = Bidi2pdf::TestHelpers::PDFTextSanitizer.match?(actual_pdf, expected_pdf)
|
22
|
+
module PDFTextSanitizer
|
23
|
+
class << self
|
24
|
+
# Cleans the given text by replacing common typographic ligatures,
|
25
|
+
# normalizing whitespace, and removing unnecessary characters.
|
26
|
+
#
|
27
|
+
# @param [String] text The text to clean.
|
28
|
+
# @return [String] The cleaned text.
|
29
|
+
def clean(text)
|
30
|
+
text = UnicodeUtils.nfkd(text)
|
31
|
+
|
32
|
+
text.gsub("\uFB01", "fi")
|
33
|
+
.gsub("\uFB02", "fl")
|
34
|
+
.gsub("-\n", "")
|
35
|
+
.gsub(/["]/, '"')
|
36
|
+
.gsub(/[']/, "'")
|
37
|
+
.gsub("…", "...")
|
38
|
+
.gsub("—", "--")
|
39
|
+
.gsub("–", "-")
|
40
|
+
.gsub(/\s+/, " ") # Replace all whitespace sequences with a single space
|
41
|
+
.strip
|
42
|
+
end
|
43
|
+
|
44
|
+
# Cleans an array of PDF page texts by applying the `clean` method
|
45
|
+
# to each page's content.
|
46
|
+
#
|
47
|
+
# @param [Object] actual_pdf_thingy The PDF object to clean.
|
48
|
+
# @return [Array<String>] An array of cleaned page texts.
|
49
|
+
def clean_pages(actual_pdf_thingy)
|
50
|
+
Bidi2pdf::TestHelpers::PDFReaderUtils.pdf_text(actual_pdf_thingy).map { |text| clean(text) }
|
51
|
+
end
|
52
|
+
|
53
|
+
# Cleans the given text and removes all whitespace for comparison purposes.
|
54
|
+
#
|
55
|
+
# @param [String] text The text to clean and normalize.
|
56
|
+
# @return [String] The cleaned text without whitespace.
|
57
|
+
def normalize(text)
|
58
|
+
clean(text).gsub(/\s+/, "")
|
59
|
+
end
|
60
|
+
|
61
|
+
# Checks if the given PDF contains the expected text or pattern.
|
62
|
+
#
|
63
|
+
# @param [Object] actual_pdf_thingy The PDF object to search.
|
64
|
+
# @param [String, Regexp] expected The expected text or pattern.
|
65
|
+
# @param [Integer, nil] page_number The specific page to search (optional).
|
66
|
+
# @return [Boolean] `true` if the expected text is found, `false` otherwise.
|
67
|
+
def contains?(actual_pdf_thingy, expected, page_number = nil)
|
68
|
+
pages = Bidi2pdf::TestHelpers::PDFReaderUtils.pdf_text(actual_pdf_thingy)
|
69
|
+
cleaned_pages = clean_pages(pages)
|
70
|
+
|
71
|
+
return false if page_number && page_number > cleaned_pages.size
|
72
|
+
|
73
|
+
# Narrow to specific page if requested
|
74
|
+
if page_number
|
75
|
+
text = cleaned_pages[page_number - 1]
|
76
|
+
return match_expected?(text, expected)
|
77
|
+
end
|
78
|
+
|
79
|
+
# Search all pages
|
80
|
+
cleaned_pages.any? { |page| match_expected?(page, expected) }
|
81
|
+
end
|
82
|
+
|
83
|
+
# Matches the given text against the expected text or pattern.
|
84
|
+
#
|
85
|
+
# @param [String] text The text to match.
|
86
|
+
# @param [String, Regexp] expected The expected text or pattern.
|
87
|
+
# @return [Boolean] `true` if the text matches, `false` otherwise.
|
88
|
+
def match_expected?(text, expected)
|
89
|
+
return false unless text
|
90
|
+
|
91
|
+
expected.is_a?(Regexp) ? text.match?(expected) : text.include?(expected.to_s)
|
92
|
+
end
|
93
|
+
|
94
|
+
# Compares the content of two PDF objects for equality.
|
95
|
+
#
|
96
|
+
# @param [Object] actual_pdf_thingy The actual PDF object.
|
97
|
+
# @param [Object] expected_pdf_thingy The expected PDF object.
|
98
|
+
# @return [Boolean] `true` if the content matches, `false` otherwise.
|
99
|
+
def match?(actual_pdf_thingy, expected_pdf_thingy)
|
100
|
+
actual = Bidi2pdf::TestHelpers::PDFReaderUtils.pdf_text actual_pdf_thingy
|
101
|
+
expected = Bidi2pdf::TestHelpers::PDFReaderUtils.pdf_text expected_pdf_thingy
|
102
|
+
|
103
|
+
cleaned_actual = clean_pages(actual)
|
104
|
+
cleaned_expected = clean_pages(expected)
|
105
|
+
|
106
|
+
# Compare without whitespace for equality check
|
107
|
+
actual_for_comparison = cleaned_actual.map { |text| normalize(text) }
|
108
|
+
expected_for_comparison = cleaned_expected.map { |text| normalize(text) }
|
109
|
+
|
110
|
+
if actual_for_comparison == expected_for_comparison
|
111
|
+
true
|
112
|
+
else
|
113
|
+
report_content_mismatch(cleaned_actual, cleaned_expected)
|
114
|
+
false
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
# Reports differences between actual and expected PDF content.
|
119
|
+
#
|
120
|
+
# @param [Array<String>] actual The actual PDF content.
|
121
|
+
# @param [Array<String>] expected The expected PDF content.
|
122
|
+
# @return [void]
|
123
|
+
def report_content_mismatch(actual, expected)
|
124
|
+
puts "--- PDF content mismatch ---"
|
125
|
+
print_differences(actual, expected)
|
126
|
+
end
|
127
|
+
|
128
|
+
# Prints detailed differences between actual and expected PDF content.
|
129
|
+
#
|
130
|
+
# @param [Array<String>] actual The actual PDF content.
|
131
|
+
# @param [Array<String>] expected The expected PDF content.
|
132
|
+
# @return [void]
|
133
|
+
def print_differences(actual, expected)
|
134
|
+
max_pages = [actual.length, expected.length].max
|
135
|
+
|
136
|
+
(0...max_pages).each do |page_idx|
|
137
|
+
actual_page = actual[page_idx] || "(missing page)"
|
138
|
+
expected_page = expected[page_idx] || "(missing page)"
|
139
|
+
|
140
|
+
print_differences_for_page(actual_page, expected_page, page_idx)
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
# Prints the differences between actual and expected content for a specific page.
|
145
|
+
# This method compares the content ignoring whitespace and, if differences are found,
|
146
|
+
# outputs a formatted representation of those differences.
|
147
|
+
#
|
148
|
+
# @param [String] actual_page The actual page content.
|
149
|
+
# @param [String] expected_page The expected page content.
|
150
|
+
# @param [Integer] page_idx The zero-based index of the page being compared.
|
151
|
+
# @return [void]
|
152
|
+
def print_differences_for_page(actual_page, expected_page, page_idx)
|
153
|
+
# Compare without whitespace
|
154
|
+
actual_no_space = normalize(actual_page.to_s)
|
155
|
+
expected_no_space = normalize(expected_page.to_s)
|
156
|
+
|
157
|
+
return if actual_no_space == expected_no_space
|
158
|
+
|
159
|
+
puts "\nPage #{page_idx + 1} differences (ignoring whitespace):"
|
160
|
+
|
161
|
+
# Create diffs between the two pages
|
162
|
+
diffs = Diff::LCS.sdiff(expected_page, actual_page)
|
163
|
+
|
164
|
+
# Format and display the differences
|
165
|
+
puts format_diff_output(diffs, expected_page, actual_page)
|
166
|
+
end
|
167
|
+
|
168
|
+
# Formats the output of differences for display.
|
169
|
+
#
|
170
|
+
# @param [Array<Diff::LCS::ContextChange>] diffs The list of differences.
|
171
|
+
# @param [String] expected The expected text.
|
172
|
+
# @param [String] actual The actual text.
|
173
|
+
# @return [String] The formatted differences.
|
174
|
+
def format_diff_output(diffs, expected, actual)
|
175
|
+
output = []
|
176
|
+
|
177
|
+
changes = group_changed_diffs(diffs)
|
178
|
+
|
179
|
+
# Output each change with context
|
180
|
+
changes.each do |change|
|
181
|
+
output += format_change expected, actual, change
|
182
|
+
end
|
183
|
+
|
184
|
+
output.join("\n")
|
185
|
+
end
|
186
|
+
|
187
|
+
private
|
188
|
+
|
189
|
+
# Groups contiguous “real” diffs (added/removed/changed) into blocks,
|
190
|
+
# splitting whenever you hit an unchanged (“=”) diff.
|
191
|
+
def group_changed_diffs(diffs)
|
192
|
+
diffs
|
193
|
+
.chunk_while { |_prev, curr| curr.action != "=" }
|
194
|
+
.map { |chunk| chunk.reject { |elem| elem.action == "=" } }
|
195
|
+
.select(&:any?)
|
196
|
+
.map { |chunk| { diffs: chunk } }
|
197
|
+
end
|
198
|
+
|
199
|
+
def format_change(expected, actual, change)
|
200
|
+
pos = change[:diffs].first.old_position
|
201
|
+
snippets = extract_snippets(expected, actual, change, pos)
|
202
|
+
|
203
|
+
build_output(snippets, pos)
|
204
|
+
end
|
205
|
+
|
206
|
+
def extract_snippets(expected, actual, change, pos)
|
207
|
+
{
|
208
|
+
context_start: [0, pos - 20].max,
|
209
|
+
context: expected,
|
210
|
+
expected_snip: expected[pos, 50],
|
211
|
+
actual_snip: actual[change[:diffs].first.new_position, 50]
|
212
|
+
}
|
213
|
+
end
|
214
|
+
|
215
|
+
# 3. Build the final lines of output
|
216
|
+
def build_output(snip_data, pos)
|
217
|
+
start = snip_data[:context_start]
|
218
|
+
ctx = snip_data[:context]
|
219
|
+
|
220
|
+
[
|
221
|
+
" Context: ...#{ctx[start...pos]}",
|
222
|
+
" Expected: #{snip_data[:expected_snip]}...",
|
223
|
+
" Actual: #{snip_data[:actual_snip]}...",
|
224
|
+
" Expected (no spaces): #{normalize(snip_data[:expected_snip])}...",
|
225
|
+
" Actual (no spaces): #{normalize(snip_data[:actual_snip])}..."
|
226
|
+
]
|
227
|
+
end
|
228
|
+
end
|
229
|
+
end
|
230
|
+
# rubocop:enable Metrics/ModuleLength
|
231
|
+
end
|
232
|
+
end
|
@@ -0,0 +1,87 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
begin
|
4
|
+
require "testcontainers"
|
5
|
+
rescue LoadError
|
6
|
+
warn "Missing #{dep}. Add it to your Gemfile if you're using Bidi2pdf test helpers."
|
7
|
+
end
|
8
|
+
|
9
|
+
module Bidi2pdf
|
10
|
+
module TestHelpers
|
11
|
+
module Testcontainers
|
12
|
+
class ChromedriverContainer < ::Testcontainers::DockerContainer
|
13
|
+
DEFAULT_CHROMEDRIVER_PORT = 3000
|
14
|
+
DEFAULT_IMAGE = "dieters877565/chromedriver"
|
15
|
+
|
16
|
+
attr_reader :docker_file, :build_dir
|
17
|
+
|
18
|
+
def initialize(image = DEFAULT_IMAGE, **options)
|
19
|
+
@docker_file = options.delete(:docker_file) || "Dockerfile"
|
20
|
+
@build_dir = options.delete(:build_dir) || options[:working_dir]
|
21
|
+
|
22
|
+
super
|
23
|
+
|
24
|
+
@wait_for ||= add_wait_for(:logs, /ChromeDriver was started successfully on port/)
|
25
|
+
end
|
26
|
+
|
27
|
+
def start
|
28
|
+
with_exposed_ports(port)
|
29
|
+
super
|
30
|
+
end
|
31
|
+
|
32
|
+
def port
|
33
|
+
DEFAULT_CHROMEDRIVER_PORT
|
34
|
+
end
|
35
|
+
|
36
|
+
# rubocop: disable Metrics/AbcSize
|
37
|
+
def build_local_image
|
38
|
+
old_timeout = Docker.options[:read_timeout]
|
39
|
+
Docker.options[:read_timeout] = 60 * 10
|
40
|
+
|
41
|
+
Docker::Image.build_from_dir(build_dir, { "t" => image, "dockerfile" => docker_file }) do |lines|
|
42
|
+
lines.split("\n").each do |line|
|
43
|
+
next unless (log = JSON.parse(line)) && log.key?("stream")
|
44
|
+
next unless log["stream"] && !(trimmed_stream = log["stream"].strip).empty?
|
45
|
+
|
46
|
+
timestamp = Time.now.strftime("[%Y-%m-%dT%H:%M:%S.%6N]")
|
47
|
+
$stdout.write "#{timestamp} #{trimmed_stream}\n"
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
Docker.options[:read_timeout] = old_timeout
|
52
|
+
end
|
53
|
+
|
54
|
+
# rubocop: enable Metrics/AbcSize
|
55
|
+
|
56
|
+
# rubocop: disable Metrics/AbcSize
|
57
|
+
def start_local_image
|
58
|
+
build_local_image
|
59
|
+
|
60
|
+
with_exposed_ports(port)
|
61
|
+
|
62
|
+
@_container ||= Docker::Container.create(_container_create_options)
|
63
|
+
@_container.start
|
64
|
+
|
65
|
+
@_id = @_container.id
|
66
|
+
json = @_container.json
|
67
|
+
@name = json["Name"]
|
68
|
+
@_created_at = json["Created"]
|
69
|
+
|
70
|
+
@wait_for&.call(self)
|
71
|
+
|
72
|
+
self
|
73
|
+
rescue Docker::Error::NotFoundError => e
|
74
|
+
raise Testcontainers::NotFoundError, e.message
|
75
|
+
rescue Excon::Error::Socket => e
|
76
|
+
raise Testcontainers::ConnectionError, e.message
|
77
|
+
end
|
78
|
+
|
79
|
+
# rubocop: enable Metrics/AbcSize
|
80
|
+
|
81
|
+
def session_url(protocol: "http")
|
82
|
+
"#{protocol}://#{host}:#{mapped_port(port)}/session"
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
%w[pdf-reader diff-lcs unicode_utils].each do |dep|
|
4
|
+
require dep
|
5
|
+
rescue LoadError
|
6
|
+
warn "Missing #{dep}. Add it to your Gemfile if you're using Bidi2pdf test helpers."
|
7
|
+
end
|
8
|
+
|
9
|
+
require "bidi2pdf/test_helpers/pdf_text_sanitizer"
|
10
|
+
require "bidi2pdf/test_helpers/pdf_reader_utils"
|
11
|
+
require "bidi2pdf/test_helpers/matchers/match_pdf_text"
|
12
|
+
require "bidi2pdf/test_helpers/matchers/contains_pdf_text"
|
13
|
+
require "bidi2pdf/test_helpers/matchers/have_pdf_page_count"
|
@@ -0,0 +1,79 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Bidi2pdf
|
4
|
+
class VerboseLogger < SimpleDelegator
|
5
|
+
VERBOSITY_LEVELS = {
|
6
|
+
none: 0,
|
7
|
+
low: 1,
|
8
|
+
medium: 2,
|
9
|
+
high: 3
|
10
|
+
}.freeze
|
11
|
+
|
12
|
+
attr_reader :logger, :verbosity
|
13
|
+
|
14
|
+
def initialize(logger, verbosity = :low)
|
15
|
+
super(logger)
|
16
|
+
self.verbosity = verbosity
|
17
|
+
@logger = logger
|
18
|
+
end
|
19
|
+
|
20
|
+
def verbosity=(verbosity)
|
21
|
+
min_verbosity = VERBOSITY_LEVELS.values.min
|
22
|
+
|
23
|
+
@verbosity = if verbosity.is_a?(Numeric)
|
24
|
+
verbosity = verbosity.to_i
|
25
|
+
max_verbosity = VERBOSITY_LEVELS.values.max
|
26
|
+
|
27
|
+
verbosity.clamp(min_verbosity, max_verbosity)
|
28
|
+
else
|
29
|
+
VERBOSITY_LEVELS.fetch verbosity.to_sym, min_verbosity
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def verbosity_sym
|
34
|
+
VERBOSITY_LEVELS.find { |_, v| v == verbosity }.first
|
35
|
+
end
|
36
|
+
|
37
|
+
def debug1(progname = nil, &)
|
38
|
+
return unless debug1?
|
39
|
+
|
40
|
+
logger.debug("[D1] #{progname}", &)
|
41
|
+
end
|
42
|
+
|
43
|
+
def debug1?
|
44
|
+
verbosity >= 1
|
45
|
+
end
|
46
|
+
|
47
|
+
def debug1!
|
48
|
+
@verbosity = VERBOSITY_LEVELS[:high]
|
49
|
+
end
|
50
|
+
|
51
|
+
def debug2(progname = nil, &)
|
52
|
+
return unless debug2?
|
53
|
+
|
54
|
+
logger.debug("[D2] #{progname}", &)
|
55
|
+
end
|
56
|
+
|
57
|
+
def debug2?
|
58
|
+
verbosity >= 2
|
59
|
+
end
|
60
|
+
|
61
|
+
def debug2!
|
62
|
+
@verbosity = VERBOSITY_LEVELS[:high]
|
63
|
+
end
|
64
|
+
|
65
|
+
def debug3(progname = nil, &)
|
66
|
+
return unless debug3?
|
67
|
+
|
68
|
+
logger.debug("[D3] #{progname}", &)
|
69
|
+
end
|
70
|
+
|
71
|
+
def debug3?
|
72
|
+
verbosity >= 3
|
73
|
+
end
|
74
|
+
|
75
|
+
def debug3!
|
76
|
+
@verbosity = VERBOSITY_LEVELS[:high]
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
data/lib/bidi2pdf/version.rb
CHANGED