bidi2pdf 0.1.7 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +35 -8
- data/README.md +14 -0
- data/docker/Dockerfile.chromedriver +8 -1
- data/lib/bidi2pdf/bidi/browser_tab.rb +40 -0
- data/lib/bidi2pdf/bidi/command_manager.rb +14 -26
- data/lib/bidi2pdf/bidi/connection_manager.rb +3 -9
- data/lib/bidi2pdf/bidi/event_manager.rb +1 -1
- data/lib/bidi2pdf/bidi/navigation_failed_events.rb +41 -0
- data/lib/bidi2pdf/bidi/session.rb +4 -1
- data/lib/bidi2pdf/notifications.rb +1 -1
- data/lib/bidi2pdf/test_helpers/matchers/contains_pdf_text.rb +50 -0
- data/lib/bidi2pdf/test_helpers/matchers/have_pdf_page_count.rb +50 -0
- data/lib/bidi2pdf/test_helpers/matchers/match_pdf_text.rb +45 -0
- data/lib/bidi2pdf/test_helpers/pdf_reader_utils.rb +89 -0
- data/lib/bidi2pdf/test_helpers/pdf_text_sanitizer.rb +232 -0
- data/lib/bidi2pdf/test_helpers/testcontainers/chromedriver_container.rb +87 -0
- data/lib/bidi2pdf/test_helpers.rb +13 -0
- data/lib/bidi2pdf/version.rb +1 -1
- data/lib/bidi2pdf.rb +32 -3
- metadata +30 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d71c88a5941411b13770993de9b38f6321c263765ffce1a9bbd347fb960855ac
|
4
|
+
data.tar.gz: aa64333d4dc4de54f6e1b627287a5d11661634f3244a8e3a213428c243f155f1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1d598fe002552f46e53f803f46577adceeeb087b377a40b486d5d2ef7bf713463f429aa26b2687fb7d0b865d73aacf3262be71e17db154794ac82e1e4a245986
|
7
|
+
data.tar.gz: 3b7cb02b0e857e551c720a665ac31d3669a9a27e8c9e3e5c1cdc497517b8fbcd3e917d6b0735113e3b956b23ded042b44c72bfff637cfdbf2431642bd98aaa2b
|
data/CHANGELOG.md
CHANGED
@@ -7,8 +7,37 @@ All notable changes to this project will be documented in this file.
|
|
7
7
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
8
8
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
9
9
|
|
10
|
+
[unreleased]: https://github.com/dieter-medium/bidi2pdf/compare/v0.1.8..HEAD
|
11
|
+
|
10
12
|
<!-- generated by git-cliff end -->
|
11
13
|
|
14
|
+
## [0.1.8] - 2025-04-22
|
15
|
+
|
16
|
+
### 🎨 Refactored
|
17
|
+
|
18
|
+
- Modularize ChromedriverContainer implementation by @dieter-medium
|
19
|
+
- Replace method calls for clarity and consistency by @dieter-medium
|
20
|
+
- Namespace PDFTextSanitizer under Bidi2pdf::TestHelpers by @dieter-medium
|
21
|
+
- Refactor command management with concurrent queues by @dieter-medium
|
22
|
+
|
23
|
+
### 🐛 Fixed
|
24
|
+
|
25
|
+
- Update CHANGELOG links to correct Markdown syntax by @dieter-medium
|
26
|
+
|
27
|
+
### 📝 Docs
|
28
|
+
|
29
|
+
- Add Rails integration section to README by @dieter-medium
|
30
|
+
|
31
|
+
### 🚀 Added
|
32
|
+
|
33
|
+
- Update Chromedriver container setup and default image by @dieter-medium
|
34
|
+
- Add workflow for pushing Chromedriver Docker image by @dieter-medium
|
35
|
+
- Return session status and add test coverage by @dieter-medium
|
36
|
+
- Integrate concurrent-ruby for thread safety improvements by @dieter-medium
|
37
|
+
- Add specific navigation error classes for better handling by @dieter-medium
|
38
|
+
- Enhance navigation error handling in BrowserTab by @dieter-medium
|
39
|
+
- Add test helpers and matchers for PDF validation by @dieter-medium
|
40
|
+
|
12
41
|
## [0.1.7] - 2025-04-17
|
13
42
|
|
14
43
|
### 🎨 Refactored
|
@@ -143,12 +172,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
143
172
|
|
144
173
|
- Initial release
|
145
174
|
|
146
|
-
[unreleased]: https://github.com/dieter-medium/bidi2pdf/compare/v0.1.7..HEAD
|
147
|
-
|
148
|
-
[unreleased]: https://github.com/dieter-medium/bidi2pdf/compare/v0.1.6..v0.1.7
|
149
|
-
|
150
|
-
[0.1.6]: https://github.com/dieter-medium/bidi2pdf/compare/v0.1.5..v0.1.6
|
151
|
-
|
152
|
-
[0.1.5]: https://github.com/dieter-medium/bidi2pdf/compare/v0.1.4..v0.1.5
|
153
175
|
|
154
|
-
[
|
176
|
+
- [unreleased](https://github.com/dieter-medium/bidi2pdf/compare/v0.1.8..HEAD)
|
177
|
+
- [0.1.8](https://github.com/dieter-medium/bidi2pdf/compare/v0.1.7..v0.1.8)
|
178
|
+
- [0.1.7](https://github.com/dieter-medium/bidi2pdf/compare/v0.1.6..v0.1.7)
|
179
|
+
- [0.1.6](https://github.com/dieter-medium/bidi2pdf/compare/v0.1.5..v0.1.6)
|
180
|
+
- [0.1.5](https://github.com/dieter-medium/bidi2pdf/compare/v0.1.4..v0.1.5)
|
181
|
+
- [0.1.4](https://github.com/dieter-medium/bidi2pdf/compare/v0.1.3..v0.1.4)
|
data/README.md
CHANGED
@@ -257,6 +257,20 @@ docker compose -f docker/docker-compose.yml down
|
|
257
257
|
|
258
258
|
---
|
259
259
|
|
260
|
+
## 🚂 Rails Integration
|
261
|
+
|
262
|
+
Rails integration is available as an additional gem:
|
263
|
+
|
264
|
+
```ruby
|
265
|
+
# In your Gemfile
|
266
|
+
gem 'bidi2pdf-rails'
|
267
|
+
```
|
268
|
+
|
269
|
+
For full documentation and usage examples,
|
270
|
+
visit: [https://github.com/dieter-medium/bidi2pdf-rails](https://github.com/dieter-medium/bidi2pdf-rails)
|
271
|
+
|
272
|
+
---
|
273
|
+
|
260
274
|
## 🛠 Development
|
261
275
|
|
262
276
|
```bash
|
@@ -7,7 +7,7 @@ ENV DEBIAN_FRONTEND=noninteractive
|
|
7
7
|
# Install dependencies
|
8
8
|
RUN apt-get update && apt-get upgrade -y && \
|
9
9
|
apt-get install -y --no-install-recommends\
|
10
|
-
chromium \
|
10
|
+
chromium chromium-driver\
|
11
11
|
libglib2.0-0 \
|
12
12
|
libnss3 \
|
13
13
|
libxss1 \
|
@@ -26,6 +26,13 @@ RUN groupadd -r appuser && useradd -r -g appuser -m -d /home/appuser appuser
|
|
26
26
|
COPY ./docker/entrypoint.sh /usr/local/bin/entrypoint.sh
|
27
27
|
RUN chmod +x /usr/local/bin/entrypoint.sh
|
28
28
|
|
29
|
+
# ARM compatibility workaround:
|
30
|
+
# On ARM architectures (such as Apple Silicon), downloading chromedriver via automated scripts may fail or cause ELF binary errors,
|
31
|
+
# such as "rosetta error: failed to open elf at /lib64/ld-linux-x86-64.so.2".
|
32
|
+
# To avoid these issues, we directly install 'chromium-driver' via the package manager and explicitly create a symlink in the expected location.
|
33
|
+
|
34
|
+
RUN mkdir -p /home/appuser/.webdrivers && ln -s /usr/bin/chromedriver /home/appuser/.webdrivers/chromedriver
|
35
|
+
|
29
36
|
# Set working directory
|
30
37
|
WORKDIR /app
|
31
38
|
|
@@ -4,6 +4,7 @@ require "base64"
|
|
4
4
|
|
5
5
|
require_relative "network_events"
|
6
6
|
require_relative "logger_events"
|
7
|
+
require_relative "navigation_failed_events"
|
7
8
|
require_relative "auth_interceptor"
|
8
9
|
require_relative "add_headers_interceptor"
|
9
10
|
require_relative "js_logger_helper"
|
@@ -32,6 +33,11 @@ require_relative "js_logger_helper"
|
|
32
33
|
# @param [String] user_context_id The ID of the user context.
|
33
34
|
module Bidi2pdf
|
34
35
|
module Bidi
|
36
|
+
# Represents a browser tab for managing interactions and communication
|
37
|
+
# using the Bidi2pdf library. This class provides methods for creating
|
38
|
+
# browser tabs, managing cookies, navigating to URLs, executing scripts,
|
39
|
+
# handling network events, and general tab lifecycle management.
|
40
|
+
#
|
35
41
|
class BrowserTab
|
36
42
|
include JsLoggerHelper
|
37
43
|
|
@@ -56,6 +62,9 @@ module Bidi2pdf
|
|
56
62
|
# @return [LoggerEvents] The logger events handler.
|
57
63
|
attr_reader :logger_events
|
58
64
|
|
65
|
+
# @return [NavigationFailedEvents] The navigation failed events handler.
|
66
|
+
attr_reader :navigation_failed_events
|
67
|
+
|
59
68
|
# Initializes a new browser tab.
|
60
69
|
#
|
61
70
|
# @param [Object] client The WebSocket client for communication.
|
@@ -68,6 +77,7 @@ module Bidi2pdf
|
|
68
77
|
@tabs = []
|
69
78
|
@network_events = NetworkEvents.new browsing_context_id
|
70
79
|
@logger_events = LoggerEvents.new browsing_context_id
|
80
|
+
@navigation_failed_events = NavigationFailedEvents.new browsing_context_id
|
71
81
|
@open = true
|
72
82
|
end
|
73
83
|
|
@@ -154,8 +164,21 @@ module Bidi2pdf
|
|
154
164
|
|
155
165
|
# Navigates the browser tab to a specified URL.
|
156
166
|
#
|
167
|
+
# This method registers necessary event listeners and sends a navigation
|
168
|
+
# command to the browser tab, instructing it to load the specified URL.
|
169
|
+
# It validates that the URL is properly formatted before attempting navigation.
|
170
|
+
#
|
157
171
|
# @param [String] url The URL to navigate to.
|
172
|
+
# @raise [NavigationError] If the URL is invalid or improperly formatted.
|
173
|
+
# @example
|
174
|
+
# browser_tab.navigate_to("https://example.com")
|
158
175
|
def navigate_to(url)
|
176
|
+
begin
|
177
|
+
URI.parse(url)
|
178
|
+
rescue URI::InvalidURIError => e
|
179
|
+
raise NavigationError, "Invalid URL: #{url} - #{e.message}"
|
180
|
+
end
|
181
|
+
|
159
182
|
Bidi2pdf.notification_service.instrument("navigate_to.bidi2pdf", url: url) do
|
160
183
|
navigate_with_listeners url
|
161
184
|
end
|
@@ -389,6 +412,18 @@ module Bidi2pdf
|
|
389
412
|
client.send_cmd_and_wait(cmd) do |response|
|
390
413
|
Bidi2pdf.logger.debug "Navigated to page url: #{url} response: #{response}"
|
391
414
|
end
|
415
|
+
rescue Bidi2pdf::CmdError => e
|
416
|
+
msg = e.response["message"]
|
417
|
+
case msg
|
418
|
+
when /^net::ERR_INVALID_AUTH_CREDENTIALS/
|
419
|
+
raise NavigationAuthError.new(url, msg)
|
420
|
+
when /^net::ERR_NAME_NOT_RESOLVED/
|
421
|
+
raise NavigationDNSError.new(url, msg)
|
422
|
+
when /^net::/
|
423
|
+
raise NavigationError, "Connection error: #{url} #{msg}"
|
424
|
+
else
|
425
|
+
raise e
|
426
|
+
end
|
392
427
|
end
|
393
428
|
|
394
429
|
def register_event_listeners
|
@@ -401,6 +436,8 @@ module Bidi2pdf
|
|
401
436
|
|
402
437
|
client.on_event("log.entryAdded",
|
403
438
|
&logger_events.method(:handle_event))
|
439
|
+
|
440
|
+
client.on_event("browsingContext.navigationFailed", &navigation_failed_events.method(:handle_event))
|
404
441
|
end
|
405
442
|
|
406
443
|
def handle_injection_exception(response, url, exception_class)
|
@@ -536,6 +573,9 @@ module Bidi2pdf
|
|
536
573
|
|
537
574
|
client.remove_event_listener "network.responseStarted", "network.responseCompleted", "network.fetchError",
|
538
575
|
&network_events.method(:handle_event)
|
576
|
+
|
577
|
+
client.remove_event_listener("log.entryAdded",
|
578
|
+
&logger_events.method(:handle_event))
|
539
579
|
end
|
540
580
|
|
541
581
|
# Closes all tabs associated with the browser tab.
|
@@ -5,11 +5,10 @@ module Bidi2pdf
|
|
5
5
|
class CommandManager
|
6
6
|
class << self
|
7
7
|
def initialize_counter
|
8
|
-
@id = 0
|
9
|
-
@id_mutex = Mutex.new
|
8
|
+
@id = Concurrent::AtomicFixnum.new(0)
|
10
9
|
end
|
11
10
|
|
12
|
-
def next_id = @
|
11
|
+
def next_id = @id.increment
|
13
12
|
end
|
14
13
|
|
15
14
|
initialize_counter
|
@@ -17,19 +16,14 @@ module Bidi2pdf
|
|
17
16
|
def initialize(socket)
|
18
17
|
@socket = socket
|
19
18
|
|
20
|
-
@pending_responses =
|
21
|
-
@initiated_cmds = {}
|
19
|
+
@pending_responses = Concurrent::Hash.new
|
22
20
|
end
|
23
21
|
|
24
|
-
def send_cmd(cmd,
|
22
|
+
def send_cmd(cmd, result_queue: nil)
|
25
23
|
id = next_id
|
26
24
|
|
27
25
|
Bidi2pdf.notification_service.instrument("send_cmd.bidi2pdf", id: id, cmd: cmd) do |instrumentation_payload|
|
28
|
-
|
29
|
-
init_queue_for id
|
30
|
-
else
|
31
|
-
@initiated_cmds[id] = true
|
32
|
-
end
|
26
|
+
init_queue_for id, result_queue
|
33
27
|
|
34
28
|
payload = cmd.as_payload(id)
|
35
29
|
|
@@ -42,17 +36,20 @@ module Bidi2pdf
|
|
42
36
|
end
|
43
37
|
|
44
38
|
def send_cmd_and_wait(cmd, timeout: Bidi2pdf.default_timeout, &block)
|
39
|
+
result_queue = Thread::Queue.new
|
40
|
+
|
45
41
|
Bidi2pdf.notification_service.instrument("send_cmd_and_wait.bidi2pdf", cmd: cmd, timeout: timeout) do |instrumentation_payload|
|
46
|
-
id = send_cmd(cmd,
|
42
|
+
id = send_cmd(cmd, result_queue: result_queue)
|
47
43
|
|
48
44
|
instrumentation_payload[:id] = id
|
49
45
|
|
50
|
-
response =
|
46
|
+
response = result_queue.pop(timeout: timeout)
|
51
47
|
|
52
48
|
instrumentation_payload[:response] = response
|
53
49
|
|
54
50
|
raise CmdTimeoutError, "Timeout waiting for response to command ID #{id}" if response.nil?
|
55
|
-
|
51
|
+
|
52
|
+
raise Bidi2pdf::CmdError.new(cmd, response) if response["error"]
|
56
53
|
|
57
54
|
block ? block.call(response) : response
|
58
55
|
ensure
|
@@ -60,14 +57,6 @@ module Bidi2pdf
|
|
60
57
|
end
|
61
58
|
end
|
62
59
|
|
63
|
-
def pop_response(id, timeout:)
|
64
|
-
raise CmdResponseNotStoredError, "No response stored for command ID #{id} or already popped or this command was not send" unless @pending_responses.key?(id)
|
65
|
-
|
66
|
-
@pending_responses[id].pop(timeout: timeout)
|
67
|
-
ensure
|
68
|
-
@pending_responses.delete(id)
|
69
|
-
end
|
70
|
-
|
71
60
|
def handle_response(data)
|
72
61
|
Bidi2pdf.notification_service.instrument("handle_response.bidi2pdf", data: data) do |instrumentation_payload|
|
73
62
|
instrumentation_payload[:error] = data["error"] if data["error"]
|
@@ -78,9 +67,6 @@ module Bidi2pdf
|
|
78
67
|
|
79
68
|
if @pending_responses.key?(id)
|
80
69
|
@pending_responses[id]&.push(data)
|
81
|
-
return true
|
82
|
-
elsif @initiated_cmds.key?(id)
|
83
|
-
@initiated_cmds.delete(id)
|
84
70
|
|
85
71
|
return true
|
86
72
|
end
|
@@ -89,12 +75,14 @@ module Bidi2pdf
|
|
89
75
|
instrumentation_payload[:handled] = false
|
90
76
|
|
91
77
|
false
|
78
|
+
ensure
|
79
|
+
@pending_responses.delete id
|
92
80
|
end
|
93
81
|
end
|
94
82
|
|
95
83
|
private
|
96
84
|
|
97
|
-
def init_queue_for(id) = @pending_responses[id] =
|
85
|
+
def init_queue_for(id, result_queue) = @pending_responses[id] = result_queue
|
98
86
|
|
99
87
|
def next_id = self.class.next_id
|
100
88
|
end
|
@@ -6,7 +6,7 @@ module Bidi2pdf
|
|
6
6
|
def initialize(logger:)
|
7
7
|
@logger = logger
|
8
8
|
@connected = false
|
9
|
-
@
|
9
|
+
@connection_latch = Concurrent::CountDownLatch.new(1)
|
10
10
|
end
|
11
11
|
|
12
12
|
def mark_connected
|
@@ -14,7 +14,7 @@ module Bidi2pdf
|
|
14
14
|
|
15
15
|
@connected = true
|
16
16
|
@logger.debug "WebSocket connection is open"
|
17
|
-
@
|
17
|
+
@connection_latch.count_down
|
18
18
|
end
|
19
19
|
|
20
20
|
def wait_until_open(timeout:)
|
@@ -22,13 +22,7 @@ module Bidi2pdf
|
|
22
22
|
|
23
23
|
@logger.debug "Waiting for WebSocket connection to open"
|
24
24
|
|
25
|
-
|
26
|
-
Timeout.timeout(timeout) do
|
27
|
-
@connection_queue.pop
|
28
|
-
end
|
29
|
-
rescue Timeout::Error
|
30
|
-
raise Bidi2pdf::WebsocketError, "WebSocket connection did not open in time #{timeout} sec."
|
31
|
-
end
|
25
|
+
raise Bidi2pdf::WebsocketError, "WebSocket connection did not open in time #{timeout} sec." unless @connection_latch.wait(timeout)
|
32
26
|
|
33
27
|
true
|
34
28
|
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "browser_console_logger"
|
4
|
+
|
5
|
+
module Bidi2pdf
|
6
|
+
module Bidi
|
7
|
+
class NavigationFailedEvents
|
8
|
+
attr_reader :context_id, :browser_console_logger
|
9
|
+
|
10
|
+
def initialize(context_id)
|
11
|
+
@context_id = context_id
|
12
|
+
end
|
13
|
+
|
14
|
+
def handle_event(data)
|
15
|
+
event = data["params"]
|
16
|
+
method = data["method"]
|
17
|
+
|
18
|
+
if event["context"] == context_id
|
19
|
+
handle_response(method, event)
|
20
|
+
else
|
21
|
+
Bidi2pdf.logger.debug2 "Ignoring Log event: #{method}, context_id: #{context_id}, params: #{event}"
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def handle_response(_method, event)
|
26
|
+
url = event["url"]
|
27
|
+
navigation = event["navigation"]
|
28
|
+
timestamp = event["timestamp"]
|
29
|
+
|
30
|
+
Bidi2pdf.notification_service.instrument("navigation_failed_received.bidi2pdf",
|
31
|
+
{
|
32
|
+
url: url,
|
33
|
+
timestamp: timestamp,
|
34
|
+
navigation: navigation
|
35
|
+
})
|
36
|
+
|
37
|
+
Bidi2pdf.logger.error "Navigation failed for URL: #{url}, Navigation: #{navigation}"
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -117,7 +117,10 @@ module Bidi2pdf
|
|
117
117
|
|
118
118
|
# Retrieves the status of the session.
|
119
119
|
def status
|
120
|
-
send_cmd(Bidi2pdf::Bidi::Commands::SessionStatus.new)
|
120
|
+
send_cmd(Bidi2pdf::Bidi::Commands::SessionStatus.new) do |resp|
|
121
|
+
Bidi2pdf.logger.info "Session status: #{resp["result"].inspect}"
|
122
|
+
resp["result"]
|
123
|
+
end
|
121
124
|
end
|
122
125
|
|
123
126
|
# Checks if the session has started.
|
@@ -0,0 +1,50 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "../pdf_text_sanitizer"
|
4
|
+
|
5
|
+
# Custom RSpec matcher for checking whether a PDF document contains specific text.
|
6
|
+
#
|
7
|
+
# This matcher allows you to assert that a certain string or regular expression
|
8
|
+
# is present in the sanitized text of a PDF document.
|
9
|
+
#
|
10
|
+
# It supports chaining with `.at_page(n)` to limit the search to a specific page.
|
11
|
+
#
|
12
|
+
# ## Examples
|
13
|
+
#
|
14
|
+
# expect(pdf_data).to contains_pdf_text("Total: 123.45")
|
15
|
+
# expect(pdf_data).to contains_pdf_text(/Invoice #\d+/).at_page(2)
|
16
|
+
#
|
17
|
+
# @param expected [String, Regexp] The text or pattern to match inside the PDF.
|
18
|
+
#
|
19
|
+
# @return [Boolean] true if the expected content is found (on the given page if specified)
|
20
|
+
RSpec::Matchers.define :contains_pdf_text do |expected|
|
21
|
+
chain :at_page do |page_number|
|
22
|
+
@page_number = page_number
|
23
|
+
end
|
24
|
+
|
25
|
+
match do |actual|
|
26
|
+
Bidi2pdf::TestHelpers::PDFTextSanitizer.contains?(actual, expected, @page_number)
|
27
|
+
end
|
28
|
+
|
29
|
+
failure_message do |actual|
|
30
|
+
pages = Bidi2pdf::TestHelpers::PDFTextSanitizer.clean_pages(actual)
|
31
|
+
|
32
|
+
return "Document does not contain page #{@page_number}" if @page_number && !(@page_number && @page_number <= pages.size)
|
33
|
+
|
34
|
+
<<~MSG
|
35
|
+
PDF text did not contain expected content.
|
36
|
+
|
37
|
+
--- Expected (#{expected.inspect}) ---
|
38
|
+
On page #{@page_number || "any"}:
|
39
|
+
|
40
|
+
--- Actual ---
|
41
|
+
#{pages.each_with_index.map { |text, i| "Page #{i + 1}:\n#{text}" }.join("\n\n")}
|
42
|
+
MSG
|
43
|
+
end
|
44
|
+
|
45
|
+
description do
|
46
|
+
desc = "contain #{expected.inspect} in PDF"
|
47
|
+
desc += " on page #{@page_number}" if @page_number
|
48
|
+
desc
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "pdf-reader"
|
4
|
+
require "base64"
|
5
|
+
|
6
|
+
# RSpec matcher to assert the number of pages in a PDF document.
|
7
|
+
#
|
8
|
+
# This matcher is useful for verifying the structural integrity of generated or uploaded PDFs,
|
9
|
+
# especially in tests for reporting, invoice generation, or document exports.
|
10
|
+
#
|
11
|
+
# It supports a variety of input types:
|
12
|
+
# - Raw PDF data as a `String`
|
13
|
+
# - File paths (`String`)
|
14
|
+
# - `StringIO` or `File` objects
|
15
|
+
# - Even Base64-encoded strings, if your `pdf_reader_for` method handles it
|
16
|
+
#
|
17
|
+
# ## Example
|
18
|
+
#
|
19
|
+
# expect(pdf_data).to have_pdf_page_count(5)
|
20
|
+
# expect(StringIO.new(pdf_data)).to have_pdf_page_count(3)
|
21
|
+
#
|
22
|
+
# If the PDF is malformed, the matcher will gracefully fail and show the error message.
|
23
|
+
#
|
24
|
+
# @param expected_count [Integer] The number of pages the PDF is expected to contain.
|
25
|
+
# @return [RSpec::Matchers::Matcher] The matcher object for use in specs.
|
26
|
+
#
|
27
|
+
# @note This matcher depends on `Bidi2pdf::TestHelpers::PDFReaderUtils.pdf_reader_for`
|
28
|
+
# to extract the page count. Make sure it supports all your intended input formats.
|
29
|
+
RSpec::Matchers.define :have_pdf_page_count do |expected_count|
|
30
|
+
match do |pdf_data|
|
31
|
+
reader = Bidi2pdf::TestHelpers::PDFReaderUtils.pdf_reader_for(pdf_data)
|
32
|
+
@actual_count = reader.page_count
|
33
|
+
@actual_count == expected_count
|
34
|
+
rescue PDF::Reader::MalformedPDFError => e
|
35
|
+
@error_message = e.message
|
36
|
+
false
|
37
|
+
end
|
38
|
+
|
39
|
+
failure_message do |_pdf_data|
|
40
|
+
if @error_message
|
41
|
+
"Expected a valid PDF with #{expected_count} pages, but encountered an error: #{@error_message}"
|
42
|
+
else
|
43
|
+
"Expected PDF to have #{expected_count} pages, but it has #{@actual_count} pages"
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
description do
|
48
|
+
"have #{expected_count} PDF pages"
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "../pdf_text_sanitizer"
|
4
|
+
|
5
|
+
# Custom RSpec matcher to compare the **sanitized text content** of two PDF files.
|
6
|
+
#
|
7
|
+
# This matcher is useful for comparing PDF documents where formatting and metadata may differ,
|
8
|
+
# but the actual visible text content should be the same. It uses `PDFTextSanitizer` internally
|
9
|
+
# to normalize and clean the text before comparison.
|
10
|
+
#
|
11
|
+
# ## Example
|
12
|
+
#
|
13
|
+
# expect(actual_pdf).to match_pdf_text(expected_pdf)
|
14
|
+
#
|
15
|
+
# If the texts don’t match, it prints a diff-friendly message showing cleaned text content.
|
16
|
+
#
|
17
|
+
# @param expected [String, StringIO, File] The expected PDF content (can be a file path, StringIO, or raw string).
|
18
|
+
# @return [RSpec::Matchers::Matcher] An RSpec matcher to compare against an actual PDF.
|
19
|
+
#
|
20
|
+
# @note Ensure `PDFTextSanitizer.match?` and `PDFTextSanitizer.clean_pages` are implemented
|
21
|
+
# to handle your specific PDF processing logic.
|
22
|
+
RSpec::Matchers.define :match_pdf_text do |expected|
|
23
|
+
match do |actual|
|
24
|
+
Bidi2pdf::TestHelpers::PDFTextSanitizer.match?(actual, expected)
|
25
|
+
end
|
26
|
+
|
27
|
+
failure_message do |actual|
|
28
|
+
cleaned_actual = Bidi2pdf::TestHelpers::PDFTextSanitizer.clean_pages(actual)
|
29
|
+
cleaned_expected = Bidi2pdf::TestHelpers::PDFTextSanitizer.clean_pages(expected)
|
30
|
+
|
31
|
+
<<~MSG
|
32
|
+
PDF text did not match.
|
33
|
+
|
34
|
+
--- Expected ---
|
35
|
+
#{cleaned_expected.join("\n")}
|
36
|
+
|
37
|
+
--- Actual ---
|
38
|
+
#{cleaned_actual.join("\n")}
|
39
|
+
MSG
|
40
|
+
end
|
41
|
+
|
42
|
+
description do
|
43
|
+
"match sanitized PDF text content"
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,89 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Bidi2pdf
|
4
|
+
module TestHelpers
|
5
|
+
module PDFReaderUtils
|
6
|
+
class << self
|
7
|
+
# Extracts text content from a PDF document.
|
8
|
+
#
|
9
|
+
# This method accepts various PDF input formats and attempts to extract text content
|
10
|
+
# from all pages. If extraction fails due to malformed PDF data, it returns the original input.
|
11
|
+
#
|
12
|
+
# @param pdf_data [String, StringIO, File] The PDF data in one of the following formats:
|
13
|
+
# * Base64-encoded PDF string
|
14
|
+
# * Raw PDF data beginning with "%PDF-"
|
15
|
+
# * StringIO object containing PDF data
|
16
|
+
# * Path to a PDF file as String
|
17
|
+
# * Raw PDF data as String
|
18
|
+
# @return [Array<String>] An array of strings, with each string representing the text content of a page
|
19
|
+
# @return [Object] The original input if PDF extraction fails
|
20
|
+
# @example Extract text from a PDF file
|
21
|
+
# text_content = pdf_text('path/to/document.pdf')
|
22
|
+
#
|
23
|
+
# @example Extract text from Base64-encoded string
|
24
|
+
# text_content = pdf_text(base64_encoded_pdf_data)
|
25
|
+
def pdf_text(pdf_data)
|
26
|
+
return pdf_data unless pdf_data.is_a?(String) || pdf_data.is_a?(StringIO) || pdf_data.is_a?(File)
|
27
|
+
|
28
|
+
begin
|
29
|
+
reader = pdf_reader_for pdf_data
|
30
|
+
reader.pages.map(&:text)
|
31
|
+
rescue PDF::Reader::MalformedPDFError
|
32
|
+
[pdf_data]
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# Converts the input PDF data into an IO object and initializes a PDF::Reader.
|
37
|
+
#
|
38
|
+
# @param pdf_data [String, StringIO, File] The PDF data to be read.
|
39
|
+
# @return [PDF::Reader] A PDF::Reader instance for the given data.
|
40
|
+
# @raise [PDF::Reader::MalformedPDFError] If the PDF data is invalid.
|
41
|
+
def pdf_reader_for(pdf_data)
|
42
|
+
io = convert_data_to_io(pdf_data)
|
43
|
+
PDF::Reader.new(io)
|
44
|
+
end
|
45
|
+
|
46
|
+
# rubocop: disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
47
|
+
# Converts various input formats into an IO object for PDF::Reader.
|
48
|
+
#
|
49
|
+
# @param pdf_data [String, StringIO, File] The PDF data to be converted.
|
50
|
+
# @return [IO] An IO object containing the PDF data.
|
51
|
+
def convert_data_to_io(pdf_data)
|
52
|
+
# rubocop:disable Lint/DuplicateBranch
|
53
|
+
if pdf_data.is_a?(String) && (pdf_data.start_with?("JVBERi") || pdf_data.start_with?("JVBER"))
|
54
|
+
StringIO.new(Base64.decode64(pdf_data))
|
55
|
+
elsif pdf_data.start_with?("%PDF-")
|
56
|
+
StringIO.new(pdf_data)
|
57
|
+
elsif pdf_data.is_a?(StringIO)
|
58
|
+
pdf_data
|
59
|
+
elsif pdf_data.is_a?(String) && File.exist?(pdf_data)
|
60
|
+
File.open(pdf_data, "rb")
|
61
|
+
else
|
62
|
+
StringIO.new(pdf_data)
|
63
|
+
end
|
64
|
+
# rubocop:enable Lint/DuplicateBranch
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
# rubocop: enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
69
|
+
|
70
|
+
module InstanceMethods
|
71
|
+
def pdf_text(pdf_data)
|
72
|
+
PDFReaderUtils.pdf_text(pdf_data)
|
73
|
+
end
|
74
|
+
|
75
|
+
def pdf_reader_for(pdf_data)
|
76
|
+
PDFReaderUtils.pdf_reader_for(pdf_data)
|
77
|
+
end
|
78
|
+
|
79
|
+
def convert_data_to_io(pdf_data)
|
80
|
+
PDFReaderUtils.convert_data_to_io(pdf_data)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def self.included(base)
|
85
|
+
base.include(InstanceMethods)
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
@@ -0,0 +1,232 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "unicode_utils"
|
4
|
+
require "diff/lcs"
|
5
|
+
require "diff/lcs/hunk"
|
6
|
+
|
7
|
+
module Bidi2pdf
|
8
|
+
module TestHelpers
|
9
|
+
# rubocop: disable Metrics/ModuleLength
|
10
|
+
# Provides utilities for sanitizing and comparing PDF text content.
|
11
|
+
# This module includes methods for cleaning text, comparing PDF content,
|
12
|
+
# and reporting differences between actual and expected PDF outputs.
|
13
|
+
#
|
14
|
+
# The sanitization process includes normalizing whitespace, replacing
|
15
|
+
# typographic ligatures, and handling other common text formatting issues.
|
16
|
+
#
|
17
|
+
# @example Cleaning text
|
18
|
+
# sanitized_text = Bidi2pdf::TestHelpers::PDFTextSanitizer.clean("Some text")
|
19
|
+
#
|
20
|
+
# @example Comparing PDF content
|
21
|
+
# match = Bidi2pdf::TestHelpers::PDFTextSanitizer.match?(actual_pdf, expected_pdf)
|
22
|
+
module PDFTextSanitizer
|
23
|
+
class << self
|
24
|
+
# Cleans the given text by replacing common typographic ligatures,
|
25
|
+
# normalizing whitespace, and removing unnecessary characters.
|
26
|
+
#
|
27
|
+
# @param [String] text The text to clean.
|
28
|
+
# @return [String] The cleaned text.
|
29
|
+
def clean(text)
|
30
|
+
text = UnicodeUtils.nfkd(text)
|
31
|
+
|
32
|
+
text.gsub("\uFB01", "fi")
|
33
|
+
.gsub("\uFB02", "fl")
|
34
|
+
.gsub("-\n", "")
|
35
|
+
.gsub(/["]/, '"')
|
36
|
+
.gsub(/[']/, "'")
|
37
|
+
.gsub("…", "...")
|
38
|
+
.gsub("—", "--")
|
39
|
+
.gsub("–", "-")
|
40
|
+
.gsub(/\s+/, " ") # Replace all whitespace sequences with a single space
|
41
|
+
.strip
|
42
|
+
end
|
43
|
+
|
44
|
+
# Cleans an array of PDF page texts by applying the `clean` method
|
45
|
+
# to each page's content.
|
46
|
+
#
|
47
|
+
# @param [Object] actual_pdf_thingy The PDF object to clean.
|
48
|
+
# @return [Array<String>] An array of cleaned page texts.
|
49
|
+
def clean_pages(actual_pdf_thingy)
|
50
|
+
Bidi2pdf::TestHelpers::PDFReaderUtils.pdf_text(actual_pdf_thingy).map { |text| clean(text) }
|
51
|
+
end
|
52
|
+
|
53
|
+
# Cleans the given text and removes all whitespace for comparison purposes.
|
54
|
+
#
|
55
|
+
# @param [String] text The text to clean and normalize.
|
56
|
+
# @return [String] The cleaned text without whitespace.
|
57
|
+
def normalize(text)
|
58
|
+
clean(text).gsub(/\s+/, "")
|
59
|
+
end
|
60
|
+
|
61
|
+
# Checks if the given PDF contains the expected text or pattern.
|
62
|
+
#
|
63
|
+
# @param [Object] actual_pdf_thingy The PDF object to search.
|
64
|
+
# @param [String, Regexp] expected The expected text or pattern.
|
65
|
+
# @param [Integer, nil] page_number The specific page to search (optional).
|
66
|
+
# @return [Boolean] `true` if the expected text is found, `false` otherwise.
|
67
|
+
def contains?(actual_pdf_thingy, expected, page_number = nil)
|
68
|
+
pages = Bidi2pdf::TestHelpers::PDFReaderUtils.pdf_text(actual_pdf_thingy)
|
69
|
+
cleaned_pages = clean_pages(pages)
|
70
|
+
|
71
|
+
return false if page_number && page_number > cleaned_pages.size
|
72
|
+
|
73
|
+
# Narrow to specific page if requested
|
74
|
+
if page_number
|
75
|
+
text = cleaned_pages[page_number - 1]
|
76
|
+
return match_expected?(text, expected)
|
77
|
+
end
|
78
|
+
|
79
|
+
# Search all pages
|
80
|
+
cleaned_pages.any? { |page| match_expected?(page, expected) }
|
81
|
+
end
|
82
|
+
|
83
|
+
# Matches the given text against the expected text or pattern.
|
84
|
+
#
|
85
|
+
# @param [String] text The text to match.
|
86
|
+
# @param [String, Regexp] expected The expected text or pattern.
|
87
|
+
# @return [Boolean] `true` if the text matches, `false` otherwise.
|
88
|
+
def match_expected?(text, expected)
|
89
|
+
return false unless text
|
90
|
+
|
91
|
+
expected.is_a?(Regexp) ? text.match?(expected) : text.include?(expected.to_s)
|
92
|
+
end
|
93
|
+
|
94
|
+
# Compares the content of two PDF objects for equality.
|
95
|
+
#
|
96
|
+
# @param [Object] actual_pdf_thingy The actual PDF object.
|
97
|
+
# @param [Object] expected_pdf_thingy The expected PDF object.
|
98
|
+
# @return [Boolean] `true` if the content matches, `false` otherwise.
|
99
|
+
def match?(actual_pdf_thingy, expected_pdf_thingy)
|
100
|
+
actual = Bidi2pdf::TestHelpers::PDFReaderUtils.pdf_text actual_pdf_thingy
|
101
|
+
expected = Bidi2pdf::TestHelpers::PDFReaderUtils.pdf_text expected_pdf_thingy
|
102
|
+
|
103
|
+
cleaned_actual = clean_pages(actual)
|
104
|
+
cleaned_expected = clean_pages(expected)
|
105
|
+
|
106
|
+
# Compare without whitespace for equality check
|
107
|
+
actual_for_comparison = cleaned_actual.map { |text| normalize(text) }
|
108
|
+
expected_for_comparison = cleaned_expected.map { |text| normalize(text) }
|
109
|
+
|
110
|
+
if actual_for_comparison == expected_for_comparison
|
111
|
+
true
|
112
|
+
else
|
113
|
+
report_content_mismatch(cleaned_actual, cleaned_expected)
|
114
|
+
false
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
# Reports differences between actual and expected PDF content.
|
119
|
+
#
|
120
|
+
# @param [Array<String>] actual The actual PDF content.
|
121
|
+
# @param [Array<String>] expected The expected PDF content.
|
122
|
+
# @return [void]
|
123
|
+
def report_content_mismatch(actual, expected)
|
124
|
+
puts "--- PDF content mismatch ---"
|
125
|
+
print_differences(actual, expected)
|
126
|
+
end
|
127
|
+
|
128
|
+
# Prints detailed differences between actual and expected PDF content.
|
129
|
+
#
|
130
|
+
# @param [Array<String>] actual The actual PDF content.
|
131
|
+
# @param [Array<String>] expected The expected PDF content.
|
132
|
+
# @return [void]
|
133
|
+
def print_differences(actual, expected)
|
134
|
+
max_pages = [actual.length, expected.length].max
|
135
|
+
|
136
|
+
(0...max_pages).each do |page_idx|
|
137
|
+
actual_page = actual[page_idx] || "(missing page)"
|
138
|
+
expected_page = expected[page_idx] || "(missing page)"
|
139
|
+
|
140
|
+
print_differences_for_page(actual_page, expected_page, page_idx)
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
# Prints the differences between actual and expected content for a specific page.
|
145
|
+
# This method compares the content ignoring whitespace and, if differences are found,
|
146
|
+
# outputs a formatted representation of those differences.
|
147
|
+
#
|
148
|
+
# @param [String] actual_page The actual page content.
|
149
|
+
# @param [String] expected_page The expected page content.
|
150
|
+
# @param [Integer] page_idx The zero-based index of the page being compared.
|
151
|
+
# @return [void]
|
152
|
+
def print_differences_for_page(actual_page, expected_page, page_idx)
|
153
|
+
# Compare without whitespace
|
154
|
+
actual_no_space = normalize(actual_page.to_s)
|
155
|
+
expected_no_space = normalize(expected_page.to_s)
|
156
|
+
|
157
|
+
return if actual_no_space == expected_no_space
|
158
|
+
|
159
|
+
puts "\nPage #{page_idx + 1} differences (ignoring whitespace):"
|
160
|
+
|
161
|
+
# Create diffs between the two pages
|
162
|
+
diffs = Diff::LCS.sdiff(expected_page, actual_page)
|
163
|
+
|
164
|
+
# Format and display the differences
|
165
|
+
puts format_diff_output(diffs, expected_page, actual_page)
|
166
|
+
end
|
167
|
+
|
168
|
+
# Formats the output of differences for display.
|
169
|
+
#
|
170
|
+
# @param [Array<Diff::LCS::ContextChange>] diffs The list of differences.
|
171
|
+
# @param [String] expected The expected text.
|
172
|
+
# @param [String] actual The actual text.
|
173
|
+
# @return [String] The formatted differences.
|
174
|
+
def format_diff_output(diffs, expected, actual)
|
175
|
+
output = []
|
176
|
+
|
177
|
+
changes = group_changed_diffs(diffs)
|
178
|
+
|
179
|
+
# Output each change with context
|
180
|
+
changes.each do |change|
|
181
|
+
output += format_change expected, actual, change
|
182
|
+
end
|
183
|
+
|
184
|
+
output.join("\n")
|
185
|
+
end
|
186
|
+
|
187
|
+
private
|
188
|
+
|
189
|
+
# Groups contiguous “real” diffs (added/removed/changed) into blocks,
|
190
|
+
# splitting whenever you hit an unchanged (“=”) diff.
|
191
|
+
def group_changed_diffs(diffs)
|
192
|
+
diffs
|
193
|
+
.chunk_while { |_prev, curr| curr.action != "=" }
|
194
|
+
.map { |chunk| chunk.reject { |elem| elem.action == "=" } }
|
195
|
+
.select(&:any?)
|
196
|
+
.map { |chunk| { diffs: chunk } }
|
197
|
+
end
|
198
|
+
|
199
|
+
def format_change(expected, actual, change)
|
200
|
+
pos = change[:diffs].first.old_position
|
201
|
+
snippets = extract_snippets(expected, actual, change, pos)
|
202
|
+
|
203
|
+
build_output(snippets, pos)
|
204
|
+
end
|
205
|
+
|
206
|
+
def extract_snippets(expected, actual, change, pos)
|
207
|
+
{
|
208
|
+
context_start: [0, pos - 20].max,
|
209
|
+
context: expected,
|
210
|
+
expected_snip: expected[pos, 50],
|
211
|
+
actual_snip: actual[change[:diffs].first.new_position, 50]
|
212
|
+
}
|
213
|
+
end
|
214
|
+
|
215
|
+
# 3. Build the final lines of output
|
216
|
+
def build_output(snip_data, pos)
|
217
|
+
start = snip_data[:context_start]
|
218
|
+
ctx = snip_data[:context]
|
219
|
+
|
220
|
+
[
|
221
|
+
" Context: ...#{ctx[start...pos]}",
|
222
|
+
" Expected: #{snip_data[:expected_snip]}...",
|
223
|
+
" Actual: #{snip_data[:actual_snip]}...",
|
224
|
+
" Expected (no spaces): #{normalize(snip_data[:expected_snip])}...",
|
225
|
+
" Actual (no spaces): #{normalize(snip_data[:actual_snip])}..."
|
226
|
+
]
|
227
|
+
end
|
228
|
+
end
|
229
|
+
end
|
230
|
+
# rubocop:enable Metrics/ModuleLength
|
231
|
+
end
|
232
|
+
end
|
@@ -0,0 +1,87 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
begin
|
4
|
+
require "testcontainers"
|
5
|
+
rescue LoadError
|
6
|
+
warn "Missing #{dep}. Add it to your Gemfile if you're using Bidi2pdf test helpers."
|
7
|
+
end
|
8
|
+
|
9
|
+
module Bidi2pdf
|
10
|
+
module TestHelpers
|
11
|
+
module Testcontainers
|
12
|
+
class ChromedriverContainer < ::Testcontainers::DockerContainer
|
13
|
+
DEFAULT_CHROMEDRIVER_PORT = 3000
|
14
|
+
DEFAULT_IMAGE = "dieters877565/chromedriver"
|
15
|
+
|
16
|
+
attr_reader :docker_file, :build_dir
|
17
|
+
|
18
|
+
def initialize(image = DEFAULT_IMAGE, **options)
|
19
|
+
@docker_file = options.delete(:docker_file) || "Dockerfile"
|
20
|
+
@build_dir = options.delete(:build_dir) || options[:working_dir]
|
21
|
+
|
22
|
+
super
|
23
|
+
|
24
|
+
@wait_for ||= add_wait_for(:logs, /ChromeDriver was started successfully on port/)
|
25
|
+
end
|
26
|
+
|
27
|
+
def start
|
28
|
+
with_exposed_ports(port)
|
29
|
+
super
|
30
|
+
end
|
31
|
+
|
32
|
+
def port
|
33
|
+
DEFAULT_CHROMEDRIVER_PORT
|
34
|
+
end
|
35
|
+
|
36
|
+
# rubocop: disable Metrics/AbcSize
|
37
|
+
def build_local_image
|
38
|
+
old_timeout = Docker.options[:read_timeout]
|
39
|
+
Docker.options[:read_timeout] = 60 * 10
|
40
|
+
|
41
|
+
Docker::Image.build_from_dir(build_dir, { "t" => image, "dockerfile" => docker_file }) do |lines|
|
42
|
+
lines.split("\n").each do |line|
|
43
|
+
next unless (log = JSON.parse(line)) && log.key?("stream")
|
44
|
+
next unless log["stream"] && !(trimmed_stream = log["stream"].strip).empty?
|
45
|
+
|
46
|
+
timestamp = Time.now.strftime("[%Y-%m-%dT%H:%M:%S.%6N]")
|
47
|
+
$stdout.write "#{timestamp} #{trimmed_stream}\n"
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
Docker.options[:read_timeout] = old_timeout
|
52
|
+
end
|
53
|
+
|
54
|
+
# rubocop: enable Metrics/AbcSize
|
55
|
+
|
56
|
+
# rubocop: disable Metrics/AbcSize
|
57
|
+
def start_local_image
|
58
|
+
build_local_image
|
59
|
+
|
60
|
+
with_exposed_ports(port)
|
61
|
+
|
62
|
+
@_container ||= Docker::Container.create(_container_create_options)
|
63
|
+
@_container.start
|
64
|
+
|
65
|
+
@_id = @_container.id
|
66
|
+
json = @_container.json
|
67
|
+
@name = json["Name"]
|
68
|
+
@_created_at = json["Created"]
|
69
|
+
|
70
|
+
@wait_for&.call(self)
|
71
|
+
|
72
|
+
self
|
73
|
+
rescue Docker::Error::NotFoundError => e
|
74
|
+
raise Testcontainers::NotFoundError, e.message
|
75
|
+
rescue Excon::Error::Socket => e
|
76
|
+
raise Testcontainers::ConnectionError, e.message
|
77
|
+
end
|
78
|
+
|
79
|
+
# rubocop: enable Metrics/AbcSize
|
80
|
+
|
81
|
+
def session_url(protocol: "http")
|
82
|
+
"#{protocol}://#{host}:#{mapped_port(port)}/session"
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
%w[pdf-reader diff-lcs unicode_utils].each do |dep|
|
4
|
+
require dep
|
5
|
+
rescue LoadError
|
6
|
+
warn "Missing #{dep}. Add it to your Gemfile if you're using Bidi2pdf test helpers."
|
7
|
+
end
|
8
|
+
|
9
|
+
require "bidi2pdf/test_helpers/pdf_text_sanitizer"
|
10
|
+
require "bidi2pdf/test_helpers/pdf_reader_utils"
|
11
|
+
require "bidi2pdf/test_helpers/matchers/match_pdf_text"
|
12
|
+
require "bidi2pdf/test_helpers/matchers/contains_pdf_text"
|
13
|
+
require "bidi2pdf/test_helpers/matchers/have_pdf_page_count"
|
data/lib/bidi2pdf/version.rb
CHANGED
data/lib/bidi2pdf.rb
CHANGED
@@ -1,5 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require "concurrent-ruby"
|
4
|
+
require "logger"
|
5
|
+
|
3
6
|
require_relative "bidi2pdf/process_tree"
|
4
7
|
require_relative "bidi2pdf/launcher"
|
5
8
|
require_relative "bidi2pdf/bidi/session"
|
@@ -8,8 +11,6 @@ require_relative "bidi2pdf/notifications"
|
|
8
11
|
require_relative "bidi2pdf/notifications/logging_subscriber"
|
9
12
|
require_relative "bidi2pdf/verbose_logger"
|
10
13
|
|
11
|
-
require "logger"
|
12
|
-
|
13
14
|
module Bidi2pdf
|
14
15
|
PAPER_FORMATS_CM = {
|
15
16
|
letter: { width: 21.59, height: 27.94 },
|
@@ -33,7 +34,16 @@ module Bidi2pdf
|
|
33
34
|
|
34
35
|
class ClientError < WebsocketError; end
|
35
36
|
|
36
|
-
class CmdError < ClientError
|
37
|
+
class CmdError < ClientError
|
38
|
+
attr_reader :cmd, :response
|
39
|
+
|
40
|
+
def initialize(cmd, response)
|
41
|
+
@cmd = cmd
|
42
|
+
@response = response
|
43
|
+
|
44
|
+
super("Error response: #{response["error"]} #{cmd.inspect}")
|
45
|
+
end
|
46
|
+
end
|
37
47
|
|
38
48
|
class CmdResponseNotStoredError < ClientError; end
|
39
49
|
|
@@ -55,6 +65,25 @@ module Bidi2pdf
|
|
55
65
|
end
|
56
66
|
end
|
57
67
|
|
68
|
+
class NavigationError < Error; end
|
69
|
+
|
70
|
+
class NavigationAuthError < NavigationError
|
71
|
+
attr_reader :url
|
72
|
+
|
73
|
+
def initialize(url, message = nil)
|
74
|
+
@url = url
|
75
|
+
super("Navigation to #{url} failed due to authentication error. #{message}")
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
class NavigationTimeoutError < NavigationError; end
|
80
|
+
|
81
|
+
class NavigationNotFoundError < NavigationError; end
|
82
|
+
|
83
|
+
class NavigationDNSError < NavigationError; end
|
84
|
+
|
85
|
+
# Global configuration for Bidi2pdf
|
86
|
+
|
58
87
|
class << self
|
59
88
|
attr_accessor :default_timeout, :enable_default_logging_subscriber
|
60
89
|
attr_reader :logging_subscriber, :logger, :network_events_logger, :browser_console_logger, :notification_service
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bidi2pdf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dieter S.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-04-
|
11
|
+
date: 2025-04-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: base64
|
@@ -38,6 +38,26 @@ dependencies:
|
|
38
38
|
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: concurrent-ruby
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '1.0'
|
48
|
+
- - ">="
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
version: 1.3.1
|
51
|
+
type: :runtime
|
52
|
+
prerelease: false
|
53
|
+
version_requirements: !ruby/object:Gem::Requirement
|
54
|
+
requirements:
|
55
|
+
- - "~>"
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
version: '1.0'
|
58
|
+
- - ">="
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: 1.3.1
|
41
61
|
- !ruby/object:Gem::Dependency
|
42
62
|
name: json
|
43
63
|
requirement: !ruby/object:Gem::Requirement
|
@@ -379,6 +399,7 @@ files:
|
|
379
399
|
- lib/bidi2pdf/bidi/interceptor.rb
|
380
400
|
- lib/bidi2pdf/bidi/js_logger_helper.rb
|
381
401
|
- lib/bidi2pdf/bidi/logger_events.rb
|
402
|
+
- lib/bidi2pdf/bidi/navigation_failed_events.rb
|
382
403
|
- lib/bidi2pdf/bidi/network_event.rb
|
383
404
|
- lib/bidi2pdf/bidi/network_event_formatters.rb
|
384
405
|
- lib/bidi2pdf/bidi/network_event_formatters/network_event_console_formatter.rb
|
@@ -398,6 +419,13 @@ files:
|
|
398
419
|
- lib/bidi2pdf/notifications/logging_subscriber.rb
|
399
420
|
- lib/bidi2pdf/process_tree.rb
|
400
421
|
- lib/bidi2pdf/session_runner.rb
|
422
|
+
- lib/bidi2pdf/test_helpers.rb
|
423
|
+
- lib/bidi2pdf/test_helpers/matchers/contains_pdf_text.rb
|
424
|
+
- lib/bidi2pdf/test_helpers/matchers/have_pdf_page_count.rb
|
425
|
+
- lib/bidi2pdf/test_helpers/matchers/match_pdf_text.rb
|
426
|
+
- lib/bidi2pdf/test_helpers/pdf_reader_utils.rb
|
427
|
+
- lib/bidi2pdf/test_helpers/pdf_text_sanitizer.rb
|
428
|
+
- lib/bidi2pdf/test_helpers/testcontainers/chromedriver_container.rb
|
401
429
|
- lib/bidi2pdf/verbose_logger.rb
|
402
430
|
- lib/bidi2pdf/version.rb
|
403
431
|
- sig/bidi2pdf.rbs
|