bidi2pdf 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +7 -0
  2. data/.idea/.gitignore +8 -0
  3. data/.rspec +3 -0
  4. data/.rubocop.yml +50 -0
  5. data/.ruby-gemset +1 -0
  6. data/.ruby-version +1 -0
  7. data/CHANGELOG.md +5 -0
  8. data/LICENSE.txt +21 -0
  9. data/README.md +119 -0
  10. data/Rakefile +22 -0
  11. data/docker/Dockerfile +35 -0
  12. data/docker/docker-compose.yml +1 -0
  13. data/exe/bidi2pdf +7 -0
  14. data/lib/bidi2pdf/bidi/add_headers_interceptor.rb +42 -0
  15. data/lib/bidi2pdf/bidi/auth_interceptor.rb +67 -0
  16. data/lib/bidi2pdf/bidi/browser.rb +15 -0
  17. data/lib/bidi2pdf/bidi/browser_tab.rb +180 -0
  18. data/lib/bidi2pdf/bidi/client.rb +224 -0
  19. data/lib/bidi2pdf/bidi/event_manager.rb +84 -0
  20. data/lib/bidi2pdf/bidi/network_event.rb +54 -0
  21. data/lib/bidi2pdf/bidi/network_events.rb +82 -0
  22. data/lib/bidi2pdf/bidi/print_parameters_validator.rb +114 -0
  23. data/lib/bidi2pdf/bidi/session.rb +135 -0
  24. data/lib/bidi2pdf/bidi/user_context.rb +75 -0
  25. data/lib/bidi2pdf/bidi/web_socket_dispatcher.rb +70 -0
  26. data/lib/bidi2pdf/chromedriver_manager.rb +160 -0
  27. data/lib/bidi2pdf/cli.rb +118 -0
  28. data/lib/bidi2pdf/launcher.rb +46 -0
  29. data/lib/bidi2pdf/session_runner.rb +123 -0
  30. data/lib/bidi2pdf/utils.rb +15 -0
  31. data/lib/bidi2pdf/version.rb +5 -0
  32. data/lib/bidi2pdf.rb +25 -0
  33. data/sig/bidi2pdf/chrome/chromedriver_downloader.rbs +11 -0
  34. data/sig/bidi2pdf/chrome/downloader_helper.rbs +9 -0
  35. data/sig/bidi2pdf/chrome/finder.rbs +27 -0
  36. data/sig/bidi2pdf/chrome/platform.rbs +13 -0
  37. data/sig/bidi2pdf/chrome/version_resolver.rbs +19 -0
  38. data/sig/bidi2pdf.rbs +4 -0
  39. data/tmp/.keep +0 -0
  40. metadata +327 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 7d773a323c63404e1c8dee00d180e6f774f5b45c33e59bc759b8186d3398aeed
4
+ data.tar.gz: 9b3452ac73398cc6037ca3d39b792afafbd2dfd47e55e3fe0c69783627b7fc88
5
+ SHA512:
6
+ metadata.gz: e656e92c5a4aa8a82ab3d0dde12fdf3cd4a8743ab8dd47a77d229452e5ab0127f40ed5ee84c02c190bb35aed5de7fe14696d30b22de0bc9966cdb9fd94eaf9b9
7
+ data.tar.gz: 57b04e9c98c91e12ccfb432d292f13186f9a92776a81847736719843dffeed1d07adab9d32dd858709aa9803b8ac476c38cbfc9a87bbed5fc6ce88efec599ecd
data/.idea/.gitignore ADDED
@@ -0,0 +1,8 @@
1
+ # Default ignored files
2
+ /shelf/
3
+ /workspace.xml
4
+ # Editor-based HTTP Client requests
5
+ /httpRequests/
6
+ # Datasource local storage ignored files
7
+ /dataSources/
8
+ /dataSources.local.xml
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
data/.rubocop.yml ADDED
@@ -0,0 +1,50 @@
1
+ AllCops:
2
+ NewCops: enable
3
+ TargetRubyVersion: 3.0
4
+
5
+ Style/StringLiterals:
6
+ EnforcedStyle: double_quotes
7
+
8
+ Style/StringLiteralsInInterpolation:
9
+ EnforcedStyle: double_quotes
10
+
11
+ Style/Documentation:
12
+ Enabled: false
13
+
14
+ RSpec/SubjectStub:
15
+ Enabled: false
16
+
17
+ RSpec/ExampleLength:
18
+ Enabled: false
19
+
20
+ Layout/MultilineMethodCallIndentation:
21
+ Enabled: false
22
+
23
+ Layout/FirstHashElementIndentation:
24
+ EnforcedStyle: consistent
25
+
26
+ Layout/FirstArrayElementIndentation:
27
+ EnforcedStyle: consistent
28
+
29
+ RSpec/MultipleMemoizedHelpers:
30
+ Max: 10
31
+
32
+ Metrics/MethodLength:
33
+ Enabled: false
34
+
35
+ Metrics/ClassLength:
36
+ Enabled: false
37
+
38
+ Metrics/ParameterLists:
39
+ Max: 10
40
+
41
+
42
+ Gemspec/DevelopmentDependencies:
43
+ EnforcedStyle: gemspec
44
+
45
+ RSpec/InstanceVariable:
46
+ Enabled: false
47
+
48
+ plugins:
49
+ - rubocop-rake
50
+ - rubocop-rspec
data/.ruby-gemset ADDED
@@ -0,0 +1 @@
1
+ bidi2pdf
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ ruby-3.3.4
data/CHANGELOG.md ADDED
@@ -0,0 +1,5 @@
1
+ ## [Unreleased]
2
+
3
+ ## [0.1.0] - 2025-03-26
4
+
5
+ - Initial release
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2025 fastjack
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,119 @@
1
+ # Bidi2pdf
2
+
3
+ Bidi2pdf is a Ruby gem that generates high-quality PDFs from web pages using Chrome's BiDi (BiDirectional) protocol. It
4
+ offers precise control over PDF generation with support for modern web technologies.
5
+
6
+ ## Features
7
+
8
+ - **Simple CLI** - Generate PDFs with a single command
9
+ - **Rich Configuration** - Customize with cookies, headers, and authentication
10
+ - **Waiting Conditions** - Wait for window loaded or network idle
11
+ - **Headless Support** - Run without a visible browser
12
+ - **Docker Ready** - Easy containerization
13
+ - **Modern Architecture** - Uses Chrome's BiDi protocol for better control
14
+
15
+ ## Installation
16
+
17
+ Add to your application's Gemfile:
18
+
19
+ ```ruby
20
+ gem 'bidi2pdf'
21
+ ```
22
+
23
+ Or install manually:
24
+
25
+ ```bash
26
+ $ gem install bidi2pdf
27
+ ```
28
+
29
+ ### Dependencies
30
+
31
+ - **Ruby**: 3.3 or higher
32
+ - **Bidi2pdf** automatically manages ChromeDriver binaries through
33
+ the [chromedriver-binary](https://github.com/dieter-medium/chromedriver-binary) gem, which:
34
+ Downloads and installs the ChromeDriver version matching your installed Chrome/Chromium browser
35
+ Eliminates the need to manually install or update ChromeDriver
36
+ Ensures compatibility between Chrome and ChromeDriver versions
37
+
38
+ ## Usage
39
+
40
+ ### Basic Command Line Usage
41
+
42
+ ```bash
43
+ bidi2pdf render --url https://example.com --output example.pdf
44
+ ```
45
+
46
+ ### Advanced Options
47
+
48
+ ```bash
49
+ bidi2pdf render \
50
+ --url https://example.com \
51
+ --output example.pdf \
52
+ --cookie session=abc123 \
53
+ --header X-API-KEY=token \
54
+ --auth admin:password \
55
+ --wait_network_idle \
56
+ --wait_window_loaded \
57
+ --log-level debug
58
+ ```
59
+
60
+ ### Ruby API
61
+
62
+ ```ruby
63
+ require 'bidi2pdf'
64
+
65
+ launcher = Bidi2pdf::Launcher.new(
66
+ url: 'https://example.com',
67
+ output: 'example.pdf', # nil for base64 encoded string as result of launcher.launch
68
+ cookies: { 'session' => 'abc123' },
69
+ headers: { 'X-API-KEY' => 'token' },
70
+ auth: { username: 'admin', password: 'password' },
71
+ wait_window_loaded: true,
72
+ wait_network_idle: true
73
+ )
74
+
75
+ launcher.launch
76
+
77
+ # see Bidi2pdf::SessionRunner for more options
78
+ ```
79
+
80
+ ## Docker Support
81
+
82
+ Build and run with Docker:
83
+
84
+ ```bash
85
+ # Build gem and Docker image
86
+ rake build
87
+ docker build -t bidi2pdf -f docker/Dockerfile .
88
+
89
+ # Generate PDF using Docker
90
+ docker run -it --rm -v ./output:/reports bidi2pdf \
91
+ bidi2pdf render --url=https://example.com --output /reports/example.pdf
92
+ ```
93
+
94
+ ## Configuration Options
95
+
96
+ | Option | Description |
97
+ |------------------------|---------------------------------------------------------------------------------------------------------------------|
98
+ | `--url` | The URL to render (required) |
99
+ | `--output` | Output PDF filename (default: output.pdf) |
100
+ | `--cookie` | Cookies in name=value format |
101
+ | `--header` | HTTP headers in name=value format |
102
+ | `--auth` | Basic auth credentials (user:pass) |
103
+ | `--headless` | Run Chrome in headless mode (default: true) |
104
+ | `--port` | Port for ChromeDriver (0 = auto) |
105
+ | `--wait_window_loaded` | Wait for the window to be fully loaded. You need to set a variable `window.loaded`. See ./spec/fixtures/sample.html |
106
+ | `--wait_network_idle` | Wait for network to be idle |
107
+ | `--log_level` | Log level (debug, info, warn, error, fatal) |
108
+
109
+ ## Development
110
+
111
+ After checking out the repo:
112
+
113
+ 1. Run `bin/setup` to install dependencies
114
+ 2. Run `rake spec` to run the tests
115
+ 3. Run `bin/console` for an interactive prompt
116
+
117
+ ## License
118
+
119
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
data/Rakefile ADDED
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bundler/gem_tasks"
4
+ require "rspec/core/rake_task"
5
+
6
+ RSpec::Core::RakeTask.new(:spec)
7
+
8
+ require "rubocop/rake_task"
9
+
10
+ RuboCop::RakeTask.new
11
+
12
+ task default: %i[spec rubocop]
13
+
14
+ require "chromedriver/binary"
15
+ load "chromedriver/Rakefile"
16
+
17
+ desc "Run tests with coverage"
18
+ task :coverage do
19
+ ENV["COVERAGE"] = "true"
20
+ Rake::Task["spec"].execute
21
+ puts "Coverage report generated in coverage/ directory"
22
+ end
data/docker/Dockerfile ADDED
@@ -0,0 +1,35 @@
1
+ FROM ruby:3.3
2
+
3
+ # Install dependencies
4
+ RUN apt-get update && \
5
+ apt-get install -y \
6
+ chromium \
7
+ libglib2.0-0 \
8
+ libnss3 \
9
+ libxss1 \
10
+ libasound2 \
11
+ libatk-bridge2.0-0 \
12
+ libgtk-3-0 \
13
+ libdrm2 \
14
+ curl \
15
+ unzip \
16
+ xvfb \
17
+ && rm -rf /var/lib/apt/lists/*
18
+
19
+ # Create a non-root user
20
+ RUN groupadd -r appuser && useradd -r -g appuser -m -d /home/appuser appuser
21
+
22
+
23
+ # Set working directory
24
+ WORKDIR /app
25
+
26
+ # Copy your gem into container
27
+ COPY ./pkg/bidi2pdf-*.gem ./
28
+
29
+ RUN gem install ./bidi2pdf-*.gem && \
30
+ chown -R appuser:appuser /app
31
+
32
+ # Switch to non-root user
33
+ USER appuser
34
+
35
+ CMD ["/usr/bin/bash"]
@@ -0,0 +1 @@
1
+ name: bidi2pdf
data/exe/bidi2pdf ADDED
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require "bidi2pdf"
5
+ require "bidi2pdf/cli"
6
+
7
+ Bidi2pdf::CLI.start(ARGV)
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bidi2pdf
4
+ module Bidi
5
+ class AddHeadersInterceptor
6
+ attr_reader :id, :headers
7
+
8
+ def initialize(id, headers, client)
9
+ @id = id
10
+ @client = client
11
+ @headers = headers.map do |header|
12
+ {
13
+ name: header[:name],
14
+ value: {
15
+ type: "string",
16
+ value: header[:value]
17
+ }
18
+ }
19
+ end
20
+ end
21
+
22
+ def handle_event(response)
23
+ event_response = response["params"]
24
+
25
+ return unless event_response["intercepts"]&.include?(id) && event_response["isBlocked"]
26
+
27
+ network_id = event_response["request"]["request"]
28
+
29
+ Bidi2pdf.logger.debug "Interceptor #{id} handle event: #{network_id}"
30
+
31
+ client.send_cmd "network.continueRequest", {
32
+ request: network_id,
33
+ headers: headers
34
+ }
35
+ end
36
+
37
+ private
38
+
39
+ attr_reader :client
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,67 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bidi2pdf
4
+ module Bidi
5
+ class AuthInterceptor
6
+ attr_reader :id, :username, :password, :network_ids
7
+
8
+ def initialize(id, username, password, client)
9
+ @id = id
10
+ @client = client
11
+ @username = username
12
+ @password = password
13
+ @network_ids = []
14
+ end
15
+
16
+ # rubocop:disable Metrics/AbcSize
17
+ def handle_event(response)
18
+ event_response = response["params"]
19
+
20
+ return unless event_response["intercepts"]&.include?(id) && event_response["isBlocked"]
21
+
22
+ navigation_id = event_response["navigation"]
23
+ network_id = event_response["request"]["request"]
24
+ url = event_response["request"]["url"]
25
+
26
+ handle_bad_credentials(navigation_id, network_id, url)
27
+
28
+ network_ids << network_id
29
+
30
+ Bidi2pdf.logger.debug "Auth-Interceptor #{id} handle event: #{navigation_id}/#{network_id}/#{url}"
31
+
32
+ client.send_cmd("network.continueWithAuth", {
33
+ request: network_id,
34
+ action: "provideCredentials",
35
+ credentials: {
36
+ type: "password",
37
+ username: username,
38
+ password: password
39
+ }
40
+ })
41
+ end
42
+
43
+ # rubocop:enable Metrics/AbcSize
44
+
45
+ private
46
+
47
+ def handle_bad_credentials(navigation_id, network_id, url)
48
+ return unless network_ids.include?(network_id)
49
+
50
+ network_ids.delete(network_id)
51
+
52
+ Bidi2pdf.logger.debug "Auth-Interceptor #{id} already handled event: #{navigation_id}/#{network_id}/#{url}"
53
+
54
+ # rubocop: disable Layout/LineLength
55
+ Bidi2pdf.logger.error "It seems that the same request is being intercepted multiple times. Check your credentials or the URL you are trying to access. If you are using a proxy, make sure it is configured correctly."
56
+ # rubocop: enable Layout/LineLength
57
+
58
+ client.send_cmd("network.continueWithAuth", {
59
+ request: network_id,
60
+ action: "cancel"
61
+ })
62
+ end
63
+
64
+ attr_reader :client
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "user_context"
4
+
5
+ module Bidi2pdf
6
+ module Bidi
7
+ class Browser
8
+ def initialize(client)
9
+ @client = client
10
+ end
11
+
12
+ def create_user_context = UserContext.new(@client)
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,180 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "base64"
4
+
5
+ require_relative "network_events"
6
+ require_relative "print_parameters_validator"
7
+
8
+ module Bidi2pdf
9
+ module Bidi
10
+ class BrowserTab
11
+ attr_reader :client, :browsing_context_id, :user_context_id, :tabs, :network_events, :open
12
+
13
+ def initialize(client, browsing_context_id, user_context_id)
14
+ @client = client
15
+ @browsing_context_id = browsing_context_id
16
+ @user_context_id = user_context_id
17
+ @tabs = []
18
+ @network_events = NetworkEvents.new browsing_context_id
19
+ @open = true
20
+ end
21
+
22
+ def create_browser_tab
23
+ client.send_cmd_and_wait("browsingContext.create", {
24
+ type: "tab",
25
+ userContext: @user_context_id
26
+ }) do |response|
27
+ tab_browsing_context_id = response["result"]["context"]
28
+
29
+ BrowserTab.new(client, tab_browsing_context_id, user_context_id).tap do |tab|
30
+ tabs << tab
31
+ Bidi2pdf.logger.debug "Created new browser tab: #{tab.inspect}"
32
+ end
33
+ end
34
+ end
35
+
36
+ def set_cookie(
37
+ name:,
38
+ value:,
39
+ domain:,
40
+ path: "/",
41
+ secure: true,
42
+ http_only: false,
43
+ same_site: "strict",
44
+ ttl: 30
45
+ )
46
+ expiry = Time.now.to_i + ttl
47
+ client.send_cmd_and_wait("storage.setCookie", {
48
+ cookie: {
49
+ name: name,
50
+ value: {
51
+ type: "string",
52
+ value: value
53
+ },
54
+ domain: domain,
55
+ path: path,
56
+ secure: secure,
57
+ httpOnly: http_only,
58
+ sameSite: same_site,
59
+ expiry: expiry
60
+ },
61
+ partition: {
62
+ type: "context",
63
+ context: browsing_context_id
64
+ }
65
+ }) do |response|
66
+ Bidi2pdf.logger.debug "Cookie set: #{response.inspect}"
67
+ end
68
+ end
69
+
70
+ def add_headers(
71
+ headers:,
72
+ url_patterns:
73
+ )
74
+ client.add_headers_interceptor(
75
+ context: browsing_context_id,
76
+ url_patterns: url_patterns,
77
+ headers: headers
78
+ )
79
+ end
80
+
81
+ def basic_auth(username:, password:, url_patterns:)
82
+ client.add_auth_interceptor(
83
+ context: browsing_context_id,
84
+ url_patterns: url_patterns,
85
+ username: username,
86
+ password: password
87
+ )
88
+ end
89
+
90
+ def open_page(url)
91
+ client.on_event("network.responseStarted", "network.responseCompleted", "network.fetchError",
92
+ &network_events.method(:handle_event))
93
+
94
+ client.send_cmd_and_wait("browsingContext.navigate", {
95
+ url: url,
96
+ context: browsing_context_id,
97
+ wait: "complete"
98
+ }) do |response|
99
+ Bidi2pdf.logger.debug "Navigated to page url: #{url} response: #{response}"
100
+ end
101
+ end
102
+
103
+ def execute_script(script)
104
+ client.send_cmd_and_wait("script.evaluate", {
105
+ expression: script,
106
+ target: {
107
+ context: browsing_context_id
108
+ },
109
+ awaitPromise: true
110
+ }) do |response|
111
+ Bidi2pdf.logger.debug "Script Result: #{response.inspect}"
112
+
113
+ response["result"]
114
+ end
115
+ end
116
+
117
+ def wait_until_all_finished(timeout: 10, poll_interval: 0.1)
118
+ network_events.wait_until_all_finished(timeout: timeout, poll_interval: poll_interval)
119
+ end
120
+
121
+ def close
122
+ return unless open
123
+
124
+ close_tabs
125
+ remove_event_listeners
126
+ close_context
127
+
128
+ @open = false
129
+ end
130
+
131
+ # rubocop:disable Metrics/AbcSize
132
+ def print(outputfile, print_options: { background: true })
133
+ PrintParametersValidator.validate!(print_options)
134
+
135
+ cmd_params = (print_options || {}).merge(context: browsing_context_id)
136
+
137
+ client.send_cmd_and_wait("browsingContext.print", cmd_params) do |response|
138
+ if response["result"]
139
+ pdf_base64 = response["result"]["data"]
140
+
141
+ if outputfile
142
+ File.binwrite(outputfile, Base64.decode64(pdf_base64))
143
+ Bidi2pdf.logger.info "PDF saved as '#{outputfile}'."
144
+ else
145
+ Bidi2pdf.logger.info "PDF generated successfully."
146
+ end
147
+
148
+ return pdf_base64 unless outputfile
149
+ else
150
+ Bidi2pdf.logger.error "Error printing: #{response}"
151
+ end
152
+ end
153
+ end
154
+
155
+ # rubocop:enable Metrics/AbcSize
156
+
157
+ private
158
+
159
+ def close_context
160
+ client.send_cmd_and_wait("browsingContext.close", { context: browsing_context_id }) do |response|
161
+ Bidi2pdf.logger.debug "Browsing context closed: #{response}"
162
+ end
163
+ end
164
+
165
+ def remove_event_listeners
166
+ Bidi2pdf.logger.debug "Network events: #{network_events.all_events.map(&:to_s)}"
167
+
168
+ client.remove_event_listener "network.responseStarted", "network.responseCompleted", "network.fetchError",
169
+ &network_events.method(:handle_event)
170
+ end
171
+
172
+ def close_tabs
173
+ tabs.each do |tab|
174
+ tab.close
175
+ Bidi2pdf.logger.debug "Closed tab: #{tab.browsing_context_id}"
176
+ end
177
+ end
178
+ end
179
+ end
180
+ end