gkhtmltopdf 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +1 -0
- data/.ruby-version +1 -0
- data/CHANGELOG.md +19 -0
- data/Dockerfile +16 -0
- data/LICENSE +21 -0
- data/README.md +106 -0
- data/Rakefile +4 -0
- data/TODO.md +18 -0
- data/exe/gkhtmltopdf +88 -0
- data/lib/gkhtmltopdf/converter.rb +174 -0
- data/lib/gkhtmltopdf/version.rb +5 -0
- data/lib/gkhtmltopdf.rb +13 -0
- data/spec/fixtures/test.html +58 -0
- data/spec/gkhtmltopdf/converter_spec.rb +46 -0
- data/spec/gkhtmltopdf_spec.rb +25 -0
- data/spec/spec_helper.rb +107 -0
- metadata +126 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: bbc9b80ee14f3f1a0eae169fbd5b1b48a3a14f79bccf8c23cb445720ccf256ac
|
|
4
|
+
data.tar.gz: 84596639fd0dfc5a82a5d9e16373d7b00652688f747a6c8e6fd71a9eb9280038
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 587c445948f7c8552a51ad81bf98e94fb6d76d8699faa59cfbf4138633585b9a3b64cd2ad0a307723908473d7f3258434346771bc6f4921b88628f07da101b81
|
|
7
|
+
data.tar.gz: 8d5edab4799522f1121a0fc2801fff8d5133a0cd58f4a98398f65b90b40dcabda48de29ebe442341e562b2ed27c045b7d312d01a78432bb72ef1e561057a56c2
|
data/.rspec
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
--require spec_helper
|
data/.ruby-version
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3.2.10
|
data/CHANGELOG.md
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All noteworthy changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
## 0.9.0 / 2026-03-13
|
|
6
|
+
|
|
7
|
+
- Achieved 100% test coverage.🎉
|
|
8
|
+
- Fixed github url.
|
|
9
|
+
|
|
10
|
+
## 0.8.0 / 2026-03-10
|
|
11
|
+
|
|
12
|
+
- Added parallel processing support with automatic free port checking.
|
|
13
|
+
- Added print options.
|
|
14
|
+
- Added Dockerfile (Debian compatibility verified).
|
|
15
|
+
- Added error message display when PATH errors occur.
|
|
16
|
+
|
|
17
|
+
## 0.1.0 / 2026-01-09
|
|
18
|
+
|
|
19
|
+
- Initial release.
|
data/Dockerfile
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
FROM ruby:3.2-slim
|
|
2
|
+
|
|
3
|
+
RUN apt-get update
|
|
4
|
+
RUN apt-get install -y git wget xz-utils build-essential libyaml-dev
|
|
5
|
+
|
|
6
|
+
# Install Firefox
|
|
7
|
+
RUN apt-get install -y firefox-esr
|
|
8
|
+
|
|
9
|
+
# Install Geckodriver
|
|
10
|
+
RUN wget "https://github.com/mozilla/geckodriver/releases/download/v0.36.0/geckodriver-v0.36.0-linux64.tar.gz" -O geckodriver.tar.gz
|
|
11
|
+
RUN tar -xzf geckodriver.tar.gz -C /usr/local/bin
|
|
12
|
+
|
|
13
|
+
COPY . /app
|
|
14
|
+
WORKDIR /app
|
|
15
|
+
RUN bundle install
|
|
16
|
+
CMD ["bundle", "exec", "rspec"]
|
data/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Kazuki Sakane
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
# Gkhtmltopdf
|
|
2
|
+
|
|
3
|
+
Gkhtmltopdf is mean Gecko HTML to PDF converter.
|
|
4
|
+
|
|
5
|
+
Developed as an alternative to wkhtmltopdf.
|
|
6
|
+
This gem converts HTML to PDF using Firefox's Geckodriver.
|
|
7
|
+
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
## How to
|
|
11
|
+
|
|
12
|
+
### 1. Install
|
|
13
|
+
|
|
14
|
+
1. [Firefox](https://www.firefox.com)
|
|
15
|
+
- for Ubuntu
|
|
16
|
+
```Ubuntu
|
|
17
|
+
$ apt install -y firefox
|
|
18
|
+
```
|
|
19
|
+
- for Debian
|
|
20
|
+
```bash
|
|
21
|
+
$ apt install -y firefox-esr
|
|
22
|
+
```
|
|
23
|
+
2. [geckodriver](https://github.com/mozilla/geckodriver)
|
|
24
|
+
- for Linux(Ubuntu / Debian)
|
|
25
|
+
```bash
|
|
26
|
+
$ wget "https://github.com/mozilla/geckodriver/releases/download/v0.36.0/geckodriver-v0.36.0-linux64.tar.gz" -O /tmp/geckodriver.tar.gz
|
|
27
|
+
$ tar -xzf /tmp/geckodriver.tar.gz -C /usr/local/bin
|
|
28
|
+
```
|
|
29
|
+
3. gem install
|
|
30
|
+
- bundler
|
|
31
|
+
```bash
|
|
32
|
+
$ bundle add gkhtmltopdf
|
|
33
|
+
```
|
|
34
|
+
- other
|
|
35
|
+
```bash
|
|
36
|
+
$ gem install gkhtmltopdf
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
---
|
|
40
|
+
|
|
41
|
+
### 2. Using
|
|
42
|
+
|
|
43
|
+
#### ruby
|
|
44
|
+
|
|
45
|
+
> **⚠️ Security Warning for Web Frameworks (e.g., Ruby on Rails):**
|
|
46
|
+
> If you are accepting URLs from untrusted users, you must implement strict SSRF protection. Do not pass user-input URLs directly without network-level isolation. Please read the [SSRF](#what-is-ssrf) section below for details.
|
|
47
|
+
|
|
48
|
+
```ruby
|
|
49
|
+
# over network
|
|
50
|
+
Gkhtmltopdf.convert('https://example.com', 'example_com.pdf')
|
|
51
|
+
# local file
|
|
52
|
+
Gkhtmltopdf.convert('file:///foo/bar/test.html', 'local.pdf')
|
|
53
|
+
# with option (print background)
|
|
54
|
+
Gkhtmltopdf.convert('https://f6a.net/oss/', 'with_bg.pdf', print_options: {background: true})
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
#### shell
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
# over network
|
|
61
|
+
$ gkhtmltopdf https://example.com/ example_com.pdf
|
|
62
|
+
# local file
|
|
63
|
+
$ gkhtmltopdf /foo/bar/test.html local.pdf
|
|
64
|
+
# with option (print background)
|
|
65
|
+
$ gkhtmltopdf https://f6a.net/oss/ with_bg.pdf --background
|
|
66
|
+
# other option
|
|
67
|
+
$ gkhtmltopdf --help
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
---
|
|
71
|
+
|
|
72
|
+
## FAQ
|
|
73
|
+
|
|
74
|
+
### Why generated blank (white-color) PDF?
|
|
75
|
+
|
|
76
|
+
Due to the W3C WebDriver specification, Geckodriver does not throw an error if the target URL returns an HTTP error status (such as `404 Not Found` or `500 Internal Server Error`). If the browser successfully renders an error page, that error page will simply be converted into a PDF.
|
|
77
|
+
If you need to verify the status of a URL or branch your logic based on HTTP status codes, please perform a pre-flight check using an HTTP client (e.g., `Net::HTTP` or `Faraday`) before passing the URL to this gem.
|
|
78
|
+
|
|
79
|
+
### What is SSRF?
|
|
80
|
+
|
|
81
|
+
SSRF is Server-Side Request Forgery.
|
|
82
|
+
|
|
83
|
+
This gem passes the provided URL directly to Headless Firefox.
|
|
84
|
+
If you integrate this gem into a web service that accepts arbitrary URLs from untrusted users, it may be vulnerable to SSRF and DNS Rebinding attacks.
|
|
85
|
+
Attackers could potentially generate PDFs of internal network resources (e.g., `localhost`, `192.168.0.1`, `169.254.169.254` for cloud metadata).
|
|
86
|
+
|
|
87
|
+
**Recommendation:** Do not rely solely on application-level URL validation. If you process untrusted URLs, strongly consider using network-level isolation (such as Docker container networking restrictions, iptables, or an egress proxy) to block access to private/internal IP ranges.
|
|
88
|
+
|
|
89
|
+
---
|
|
90
|
+
|
|
91
|
+
## Acknowledgments & Third-Party Licenses
|
|
92
|
+
|
|
93
|
+
This gem acts as a wrapper and communicates with the following external open-source tools.
|
|
94
|
+
|
|
95
|
+
We are deeply grateful to their developers:
|
|
96
|
+
|
|
97
|
+
* Firefox: Licensed under the [MPL-2.0](https://www.mozilla.org/en-US/MPL/2.0/).
|
|
98
|
+
* Geckodriver: Licensed under the [MPL-2.0](https://github.com/mozilla/geckodriver/blob/master/LICENSE).
|
|
99
|
+
|
|
100
|
+
_Note: Gkhtmltopdf does not bundle these binaries. When you install and use Firefox and Geckodriver in your environment, please refer to their respective licenses._
|
|
101
|
+
|
|
102
|
+
---
|
|
103
|
+
|
|
104
|
+
## License
|
|
105
|
+
|
|
106
|
+
The gem is available as open source under the terms of the [MIT License](LICENSE).
|
data/Rakefile
ADDED
data/TODO.md
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# TODO
|
|
2
|
+
|
|
3
|
+
## 完了
|
|
4
|
+
|
|
5
|
+
- [x] 並列処理対応(自動空きportチェック)
|
|
6
|
+
- [x] 印刷オプションの追加
|
|
7
|
+
- [x] Dockerfileの追加(Debian動作確認)
|
|
8
|
+
- [x] PATHエラー時の表示追加
|
|
9
|
+
- [x] 入力値検証
|
|
10
|
+
- [x] テストカバレッジ100%
|
|
11
|
+
|
|
12
|
+
## 未了
|
|
13
|
+
|
|
14
|
+
- [ ] RubyGemsで公開
|
|
15
|
+
- [ ] UA設定機能
|
|
16
|
+
- [ ] ポート範囲設定
|
|
17
|
+
- [ ] 複数ファイル&URLの直列実行による高速化
|
|
18
|
+
- [ ] configファイルからオプションを設定
|
data/exe/gkhtmltopdf
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
require 'optparse'
|
|
5
|
+
require 'gkhtmltopdf'
|
|
6
|
+
|
|
7
|
+
options = {
|
|
8
|
+
print_options: {},
|
|
9
|
+
firefox_prefs: {}
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
parser = OptionParser.new do |opts|
|
|
13
|
+
opts.banner = "Usage: gkhtmltopdf [options] <URL_OR_FILE> <OUTPUT_PDF>"
|
|
14
|
+
|
|
15
|
+
opts.on("-O", "--orientation [PORTRAIT|LANDSCAPE]", "default: portrait") do |v|
|
|
16
|
+
options[:print_options][:orientation] = v.downcase
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
opts.on("--background", "Print background") do
|
|
20
|
+
options[:print_options][:background] = true
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
opts.on("--margin-top [CM]", Float, "margin top (cm)") do |v|
|
|
24
|
+
options[:print_options][:margin] ||= {}
|
|
25
|
+
options[:print_options][:margin][:top] = v
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
opts.on("--margin-bottom [CM]", Float, "margin bottom (cm)") do |v|
|
|
29
|
+
options[:print_options][:margin] ||= {}
|
|
30
|
+
options[:print_options][:margin][:bottom] = v
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
opts.on("--firefox-path [PATH]", "Firefox custom PATH") do |v|
|
|
34
|
+
options[:firefox_path] = v
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
opts.on("--geckodriver-path [PATH]", "geckodriver custom PATH") do |v|
|
|
38
|
+
options[:geckodriver_path] = v
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
opts.on("-v", "--version", "display version") do
|
|
42
|
+
puts "Gkhtmltopdf version #{Gkhtmltopdf::VERSION}"
|
|
43
|
+
exit
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
opts.on("-h", "--help", "display help") do
|
|
47
|
+
puts opts
|
|
48
|
+
exit
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
begin
|
|
53
|
+
parser.parse!
|
|
54
|
+
|
|
55
|
+
if ARGV.length != 2
|
|
56
|
+
puts parser.help
|
|
57
|
+
exit 1
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
input_url = ARGV[0]
|
|
61
|
+
output_path = ARGV[1]
|
|
62
|
+
|
|
63
|
+
unless input_url.start_with?('http://', 'https://', 'file://')
|
|
64
|
+
input_url = "file://#{File.absolute_path(input_url)}"
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
puts "Converting: #{input_url} -> #{output_path}"
|
|
68
|
+
|
|
69
|
+
init_options = {}
|
|
70
|
+
init_options[:firefox_path] = options.delete(:firefox_path) if options[:firefox_path]
|
|
71
|
+
init_options[:geckodriver_path] = options.delete(:geckodriver_path) if options[:geckodriver_path]
|
|
72
|
+
|
|
73
|
+
converter = Gkhtmltopdf::Converter.new(**init_options)
|
|
74
|
+
converter.convert(input_url, output_path, print_options: options[:print_options])
|
|
75
|
+
|
|
76
|
+
puts "✅ Completed PDF generation!"
|
|
77
|
+
|
|
78
|
+
rescue Gkhtmltopdf::Error => e
|
|
79
|
+
warn "\e[31m[Error] #{e.message}\e[0m"
|
|
80
|
+
exit 1
|
|
81
|
+
rescue OptionParser::InvalidOption => e
|
|
82
|
+
warn "\e[31m[Error] #{e.message}\e[0m"
|
|
83
|
+
puts parser.help
|
|
84
|
+
exit 1
|
|
85
|
+
rescue StandardError => e
|
|
86
|
+
warn "\e[31m[Error] #{e.class}: #{e.message}\e[0m"
|
|
87
|
+
exit 1
|
|
88
|
+
end
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
require 'net/http'
|
|
2
|
+
require 'json'
|
|
3
|
+
require 'base64'
|
|
4
|
+
require 'uri'
|
|
5
|
+
require 'socket'
|
|
6
|
+
|
|
7
|
+
module Gkhtmltopdf
|
|
8
|
+
class Converter
|
|
9
|
+
def initialize(geckodriver_path: nil, firefox_path: nil, port: nil)
|
|
10
|
+
@geckodriver_path = resolve_geckodriver_path!(geckodriver_path)
|
|
11
|
+
@firefox_path = resolve_firefox_path!(firefox_path)
|
|
12
|
+
@port = port || get_free_port
|
|
13
|
+
@base_url = "http://127.0.0.1:#{@port}"
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def convert(url, output_path, print_options: {})
|
|
17
|
+
validate_url_scheme!(url)
|
|
18
|
+
|
|
19
|
+
pid = spawn("#{@geckodriver_path} --port #{@port}", out: File::NULL, err: File::NULL)
|
|
20
|
+
wait_for_server
|
|
21
|
+
|
|
22
|
+
session_id = nil
|
|
23
|
+
begin
|
|
24
|
+
session_id = create_session
|
|
25
|
+
navigate(session_id, url)
|
|
26
|
+
|
|
27
|
+
pdf_base64 = print_pdf(session_id, print_options)
|
|
28
|
+
File.binwrite(output_path, Base64.decode64(pdf_base64))
|
|
29
|
+
ensure
|
|
30
|
+
delete_session(session_id) if session_id
|
|
31
|
+
begin
|
|
32
|
+
Process.kill('TERM', pid)
|
|
33
|
+
Process.wait(pid)
|
|
34
|
+
rescue Errno::ESRCH, Errno::ECHILD
|
|
35
|
+
# nothing to do if the process is already terminated
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
private
|
|
41
|
+
|
|
42
|
+
def get_free_port
|
|
43
|
+
server = TCPServer.new('127.0.0.1', 0)
|
|
44
|
+
port = server.addr[1]
|
|
45
|
+
server.close
|
|
46
|
+
port
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def resolve_geckodriver_path!(provided_path)
|
|
50
|
+
path = provided_path || find_default_geckodriver
|
|
51
|
+
unless path
|
|
52
|
+
raise Error, "Geckodriver is not found. Please ensure Geckodriver is installed and either in your PATH or specify the path during initialization."
|
|
53
|
+
end
|
|
54
|
+
path
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def resolve_firefox_path!(provided_path)
|
|
58
|
+
path = provided_path || find_default_firefox
|
|
59
|
+
unless path
|
|
60
|
+
raise Error, "Firefox is not found. Please ensure Firefox is installed and either in your PATH or specify the path during initialization."
|
|
61
|
+
end
|
|
62
|
+
path
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def executable_exists?(cmd)
|
|
66
|
+
exts = ENV['PATHEXT'] ? ENV['PATHEXT'].split(';') : ['']
|
|
67
|
+
ENV['PATH'].to_s.split(File::PATH_SEPARATOR).each do |path|
|
|
68
|
+
exts.each do |ext|
|
|
69
|
+
exe = File.join(path, "#{cmd}#{ext}")
|
|
70
|
+
return true if File.executable?(exe) && !File.directory?(exe)
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
false
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def find_default_geckodriver
|
|
77
|
+
return 'geckodriver' if executable_exists?('geckodriver')
|
|
78
|
+
nil
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def find_default_firefox
|
|
82
|
+
return 'firefox' if executable_exists?('firefox')
|
|
83
|
+
|
|
84
|
+
common_paths = [
|
|
85
|
+
'/Applications/Firefox.app/Contents/MacOS/firefox',
|
|
86
|
+
'C:/Program Files/Mozilla Firefox/firefox.exe',
|
|
87
|
+
'C:/Program Files (x86)/Mozilla Firefox/firefox.exe'
|
|
88
|
+
]
|
|
89
|
+
common_paths.find { |path| File.executable?(path) && !File.directory?(path) }
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def wait_for_server
|
|
93
|
+
10.times do
|
|
94
|
+
begin
|
|
95
|
+
Net::HTTP.get(URI("#{@base_url}/status"))
|
|
96
|
+
return
|
|
97
|
+
rescue Errno::ECONNREFUSED
|
|
98
|
+
sleep 0.2
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
raise Error, "Failed to launch geckodriver (port #{@port})"
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def post(path, payload)
|
|
105
|
+
uri = URI("#{@base_url}#{path}")
|
|
106
|
+
req = Net::HTTP::Post.new(uri, 'Content-Type' => 'application/json')
|
|
107
|
+
req.body = payload.to_json
|
|
108
|
+
res = Net::HTTP.start(uri.hostname, uri.port) { |http| http.request(req) }
|
|
109
|
+
|
|
110
|
+
begin
|
|
111
|
+
JSON.parse(res.body)
|
|
112
|
+
rescue JSON::ParserError
|
|
113
|
+
raise Error, "Invalid geckodriver response (Status: #{res.code}): #{res.body}"
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def create_session
|
|
118
|
+
firefox_options = { args: ["-headless"] }
|
|
119
|
+
firefox_options[:binary] = @firefox_path if @firefox_path != 'firefox'
|
|
120
|
+
|
|
121
|
+
payload = {
|
|
122
|
+
capabilities: {
|
|
123
|
+
alwaysMatch: {
|
|
124
|
+
browserName: "firefox",
|
|
125
|
+
"moz:firefoxOptions": firefox_options
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
response = post("/session", payload)
|
|
131
|
+
value = response["value"]
|
|
132
|
+
raise Error, "Failed to launch Firefox: #{value}" if value["error"]
|
|
133
|
+
|
|
134
|
+
value["sessionId"]
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
def navigate(session_id, url)
|
|
138
|
+
post("/session/#{session_id}/url", { url: url })
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def print_pdf(session_id, user_options)
|
|
142
|
+
default_options = {
|
|
143
|
+
background: false,
|
|
144
|
+
shrinkToFit: true,
|
|
145
|
+
orientation: "portrait",
|
|
146
|
+
page: { width: 21.0, height: 29.7 },
|
|
147
|
+
margin: { top: 1.0, bottom: 1.0, left: 1.0, right: 1.0 }
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
payload = default_options.merge(user_options)
|
|
151
|
+
|
|
152
|
+
response = post("/session/#{session_id}/print", payload)
|
|
153
|
+
value = response["value"]
|
|
154
|
+
raise Error, "Failed to generate PDF: #{value}" if value["error"]
|
|
155
|
+
|
|
156
|
+
value
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
def delete_session(session_id)
|
|
160
|
+
uri = URI("#{@base_url}/session/#{session_id}")
|
|
161
|
+
req = Net::HTTP::Delete.new(uri)
|
|
162
|
+
Net::HTTP.start(uri.hostname, uri.port) { |http| http.request(req) }
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
def validate_url_scheme!(url_string)
|
|
166
|
+
parsed_url = URI.parse(url_string)
|
|
167
|
+
allowed_schemes = ['http', 'https', 'file']
|
|
168
|
+
raise Error, 'URL scheme is nil' if parsed_url.scheme.nil?
|
|
169
|
+
unless allowed_schemes.include?(parsed_url.scheme)
|
|
170
|
+
raise Error, "Invalid URL scheme: #{parsed_url.scheme}"
|
|
171
|
+
end
|
|
172
|
+
end
|
|
173
|
+
end
|
|
174
|
+
end
|
data/lib/gkhtmltopdf.rb
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "gkhtmltopdf/version"
|
|
4
|
+
require_relative "gkhtmltopdf/converter"
|
|
5
|
+
|
|
6
|
+
module Gkhtmltopdf
|
|
7
|
+
class Error < StandardError; end
|
|
8
|
+
|
|
9
|
+
def self.convert(url, output_path, geckodriver_path: nil, firefox_path: nil, port: nil, print_options: {})
|
|
10
|
+
converter = Converter.new(geckodriver_path: geckodriver_path, firefox_path: firefox_path, port: port)
|
|
11
|
+
converter.convert(url, output_path, print_options: print_options)
|
|
12
|
+
end
|
|
13
|
+
end
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
<!DOCTYPE html>
|
|
2
|
+
<html lang="en">
|
|
3
|
+
<head>
|
|
4
|
+
<meta charset="UTF-8">
|
|
5
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
6
|
+
<title>Test HTML</title>
|
|
7
|
+
<style>
|
|
8
|
+
table {
|
|
9
|
+
width: 100%;
|
|
10
|
+
border-collapse: collapse;
|
|
11
|
+
}
|
|
12
|
+
table caption {
|
|
13
|
+
font-size: 1.25rem;
|
|
14
|
+
border-bottom: 1px solid #333333;
|
|
15
|
+
}
|
|
16
|
+
table th, table td {
|
|
17
|
+
border-bottom: 1px solid #999999;
|
|
18
|
+
padding: 0.25rem 0.5rem;
|
|
19
|
+
}
|
|
20
|
+
hr {
|
|
21
|
+
border-width: 1px 0 0;
|
|
22
|
+
margin: 1.25rem 0;
|
|
23
|
+
}
|
|
24
|
+
</style>
|
|
25
|
+
</head>
|
|
26
|
+
<body>
|
|
27
|
+
<h1>TEST</h1>
|
|
28
|
+
<hr>
|
|
29
|
+
<table>
|
|
30
|
+
<caption>LANG</caption>
|
|
31
|
+
<tbody>
|
|
32
|
+
<tr><td>ENG</td><td>HELLO</td></tr>
|
|
33
|
+
<tr><td>FRA</td><td>BONJOUR</td></tr>
|
|
34
|
+
<tr><td>ESP</td><td>HOLA</td></tr>
|
|
35
|
+
<tr><td>ITA</td><td>CIAO</td></tr>
|
|
36
|
+
<tr><td>DEU</td><td>HALLO</td></tr>
|
|
37
|
+
<tr><td>POR</td><td>OLÁ</td></tr>
|
|
38
|
+
<tr><td>RUS</td><td>ПРИВЕТ</td></tr>
|
|
39
|
+
<tr><td>ARA</td><td>مرحبا</td></tr>
|
|
40
|
+
<tr><td>CHN</td><td>你好</td></tr>
|
|
41
|
+
<tr><td>JPN</td><td>こんにちは</td></tr>
|
|
42
|
+
<tr><td>KOR</td><td>안녕하세요</td></tr>
|
|
43
|
+
</tbody>
|
|
44
|
+
</table>
|
|
45
|
+
<hr>
|
|
46
|
+
<table>
|
|
47
|
+
<caption>EMOJI</caption>
|
|
48
|
+
<tbody>
|
|
49
|
+
<tr><td>SMILE</td><td>😀</td></tr>
|
|
50
|
+
<tr><td>LAUGH</td><td>😂</td></tr>
|
|
51
|
+
<tr><td>WINK</td><td>😉</td></tr>
|
|
52
|
+
<tr><td>HEART</td><td>❤️</td></tr>
|
|
53
|
+
<tr><td>THUMBS UP</td><td>👍</td></tr>
|
|
54
|
+
<tr><td>CLAP</td><td>👏</td></tr>
|
|
55
|
+
<tr><td>CRY</td><td>😭</td></tr>
|
|
56
|
+
</tbody>
|
|
57
|
+
</table>
|
|
58
|
+
</html>
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
require 'gkhtmltopdf'
|
|
3
|
+
|
|
4
|
+
RSpec.describe Gkhtmltopdf::Converter do
|
|
5
|
+
let(:converter) { Gkhtmltopdf::Converter.allocate }
|
|
6
|
+
describe '#resolve_geckodriver_path!' do
|
|
7
|
+
subject { converter.send(:resolve_geckodriver_path!, nil) }
|
|
8
|
+
context 'geckodriver is not available' do
|
|
9
|
+
before { allow(File).to receive(:executable?).and_return(false) }
|
|
10
|
+
it 'raises an error' do
|
|
11
|
+
expect { subject }.to raise_error(Gkhtmltopdf::Error, /\AGeckodriver is not found./)
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
describe '#resolve_firefox_path!' do
|
|
17
|
+
subject { converter.send(:resolve_firefox_path!, nil) }
|
|
18
|
+
context 'firefox is not available' do
|
|
19
|
+
before { allow(File).to receive(:executable?).and_return(false) }
|
|
20
|
+
it 'raises an error' do
|
|
21
|
+
expect { subject }.to raise_error(Gkhtmltopdf::Error, /\AFirefox is not found./)
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
describe '#wait_for_server' do
|
|
26
|
+
subject { converter.send(:wait_for_server) }
|
|
27
|
+
context 'fail launch geckodriver' do
|
|
28
|
+
before { allow(Net::HTTP).to receive(:get).and_raise(Errno::ECONNREFUSED, 'Dummy error') }
|
|
29
|
+
it 'raises an error' do
|
|
30
|
+
expect { subject }.to raise_error(Gkhtmltopdf::Error, /\AFailed to launch geckodriver \(port \)\Z/)
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
describe '#post' do
|
|
35
|
+
let(:converter) { Gkhtmltopdf::Converter.new }
|
|
36
|
+
subject { converter.send(:post, '/dummy', {test: :value}) }
|
|
37
|
+
context 'Invalid json response from geckodriver' do
|
|
38
|
+
before {
|
|
39
|
+
allow(Net::HTTP).to receive(:start).and_return(Struct.new(:code, :body).new('200', 'invalid_json: 0123'))
|
|
40
|
+
}
|
|
41
|
+
it 'raises an error' do
|
|
42
|
+
expect { subject }.to raise_error(Gkhtmltopdf::Error, 'Invalid geckodriver response (Status: 200): invalid_json: 0123')
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
require 'gkhtmltopdf'
|
|
3
|
+
require 'tmpdir'
|
|
4
|
+
require 'base64'
|
|
5
|
+
|
|
6
|
+
RSpec.describe Gkhtmltopdf do
|
|
7
|
+
describe '.convert' do
|
|
8
|
+
let(:url) { 'https://f6a.net/oss/' }
|
|
9
|
+
let(:output) { File.join(Dir.mktmpdir, 'output.pdf') }
|
|
10
|
+
let(:hash) { {} }
|
|
11
|
+
|
|
12
|
+
subject { Gkhtmltopdf.convert(url, output, **hash) }
|
|
13
|
+
|
|
14
|
+
it 'successful conversion' do
|
|
15
|
+
expect { subject }.not_to raise_error
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
context 'invalid URL' do
|
|
19
|
+
let(:url) { 'ftp://example.com' }
|
|
20
|
+
it 'raises an error' do
|
|
21
|
+
expect { subject }.to raise_error(Gkhtmltopdf::Error, 'Invalid URL scheme: ftp')
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
data/spec/spec_helper.rb
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
require 'simplecov'
|
|
2
|
+
SimpleCov.start do
|
|
3
|
+
coverage_dir 'coverage'
|
|
4
|
+
add_filter '/spec/'
|
|
5
|
+
formatter SimpleCov::Formatter::MultiFormatter.new(
|
|
6
|
+
[
|
|
7
|
+
SimpleCov::Formatter::HTMLFormatter,
|
|
8
|
+
SimpleCov::Formatter::SimpleFormatter
|
|
9
|
+
]
|
|
10
|
+
)
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
# specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
|
|
14
|
+
# The generated `.rspec` file contains `--require spec_helper` which will cause
|
|
15
|
+
# this file to always be loaded, without a need to explicitly require it in any
|
|
16
|
+
# files.
|
|
17
|
+
#
|
|
18
|
+
# Given that it is always loaded, you are encouraged to keep this file as
|
|
19
|
+
# light-weight as possible. Requiring heavyweight dependencies from this file
|
|
20
|
+
# will add to the boot time of your test suite on EVERY test run, even for an
|
|
21
|
+
# individual file that may not need all of that loaded. Instead, consider making
|
|
22
|
+
# a separate helper file that requires the additional dependencies and performs
|
|
23
|
+
# the additional setup, and require it from the spec files that actually need
|
|
24
|
+
# it.
|
|
25
|
+
#
|
|
26
|
+
# See https://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
|
|
27
|
+
RSpec.configure do |config|
|
|
28
|
+
# rspec-expectations config goes here. You can use an alternate
|
|
29
|
+
# assertion/expectation library such as wrong or the stdlib/minitest
|
|
30
|
+
# assertions if you prefer.
|
|
31
|
+
config.expect_with :rspec do |expectations|
|
|
32
|
+
# This option will default to `true` in RSpec 4. It makes the `description`
|
|
33
|
+
# and `failure_message` of custom matchers include text for helper methods
|
|
34
|
+
# defined using `chain`, e.g.:
|
|
35
|
+
# be_bigger_than(2).and_smaller_than(4).description
|
|
36
|
+
# # => "be bigger than 2 and smaller than 4"
|
|
37
|
+
# ...rather than:
|
|
38
|
+
# # => "be bigger than 2"
|
|
39
|
+
expectations.include_chain_clauses_in_custom_matcher_descriptions = true
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# rspec-mocks config goes here. You can use an alternate test double
|
|
43
|
+
# library (such as bogus or mocha) by changing the `mock_with` option here.
|
|
44
|
+
config.mock_with :rspec do |mocks|
|
|
45
|
+
# Prevents you from mocking or stubbing a method that does not exist on
|
|
46
|
+
# a real object. This is generally recommended, and will default to
|
|
47
|
+
# `true` in RSpec 4.
|
|
48
|
+
mocks.verify_partial_doubles = true
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# This option will default to `:apply_to_host_groups` in RSpec 4 (and will
|
|
52
|
+
# have no way to turn it off -- the option exists only for backwards
|
|
53
|
+
# compatibility in RSpec 3). It causes shared context metadata to be
|
|
54
|
+
# inherited by the metadata hash of host groups and examples, rather than
|
|
55
|
+
# triggering implicit auto-inclusion in groups with matching metadata.
|
|
56
|
+
config.shared_context_metadata_behavior = :apply_to_host_groups
|
|
57
|
+
|
|
58
|
+
# The settings below are suggested to provide a good initial experience
|
|
59
|
+
# with RSpec, but feel free to customize to your heart's content.
|
|
60
|
+
# # This allows you to limit a spec run to individual examples or groups
|
|
61
|
+
# # you care about by tagging them with `:focus` metadata. When nothing
|
|
62
|
+
# # is tagged with `:focus`, all examples get run. RSpec also provides
|
|
63
|
+
# # aliases for `it`, `describe`, and `context` that include `:focus`
|
|
64
|
+
# # metadata: `fit`, `fdescribe` and `fcontext`, respectively.
|
|
65
|
+
# config.filter_run_when_matching :focus
|
|
66
|
+
#
|
|
67
|
+
# # Allows RSpec to persist some state between runs in order to support
|
|
68
|
+
# # the `--only-failures` and `--next-failure` CLI options. We recommend
|
|
69
|
+
# # you configure your source control system to ignore this file.
|
|
70
|
+
# config.example_status_persistence_file_path = "spec/examples.txt"
|
|
71
|
+
#
|
|
72
|
+
# # Limits the available syntax to the non-monkey patched syntax that is
|
|
73
|
+
# # recommended. For more details, see:
|
|
74
|
+
# # https://rspec.info/features/3-12/rspec-core/configuration/zero-monkey-patching-mode/
|
|
75
|
+
# config.disable_monkey_patching!
|
|
76
|
+
#
|
|
77
|
+
# # This setting enables warnings. It's recommended, but in some cases may
|
|
78
|
+
# # be too noisy due to issues in dependencies.
|
|
79
|
+
# config.warnings = true
|
|
80
|
+
#
|
|
81
|
+
# # Many RSpec users commonly either run the entire suite or an individual
|
|
82
|
+
# # file, and it's useful to allow more verbose output when running an
|
|
83
|
+
# # individual spec file.
|
|
84
|
+
# if config.files_to_run.one?
|
|
85
|
+
# # Use the documentation formatter for detailed output,
|
|
86
|
+
# # unless a formatter has already been configured
|
|
87
|
+
# # (e.g. via a command-line flag).
|
|
88
|
+
# config.default_formatter = "doc"
|
|
89
|
+
# end
|
|
90
|
+
#
|
|
91
|
+
# # Print the 10 slowest examples and example groups at the
|
|
92
|
+
# # end of the spec run, to help surface which specs are running
|
|
93
|
+
# # particularly slow.
|
|
94
|
+
# config.profile_examples = 10
|
|
95
|
+
#
|
|
96
|
+
# # Run specs in random order to surface order dependencies. If you find an
|
|
97
|
+
# # order dependency and want to debug it, you can fix the order by providing
|
|
98
|
+
# # the seed, which is printed after each run.
|
|
99
|
+
# # --seed 1234
|
|
100
|
+
# config.order = :random
|
|
101
|
+
#
|
|
102
|
+
# # Seed global randomization in this process using the `--seed` CLI option.
|
|
103
|
+
# # Setting this allows you to use `--seed` to deterministically reproduce
|
|
104
|
+
# # test failures related to randomization by passing the same `--seed` value
|
|
105
|
+
# # as the one that triggered the failure.
|
|
106
|
+
# Kernel.srand config.seed
|
|
107
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: gkhtmltopdf
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.9.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Kazuki Sakane
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: exe
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2026-03-12 00:00:00.000000000 Z
|
|
12
|
+
dependencies:
|
|
13
|
+
- !ruby/object:Gem::Dependency
|
|
14
|
+
name: base64
|
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
|
16
|
+
requirements:
|
|
17
|
+
- - "~>"
|
|
18
|
+
- !ruby/object:Gem::Version
|
|
19
|
+
version: '0.2'
|
|
20
|
+
type: :runtime
|
|
21
|
+
prerelease: false
|
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
+
requirements:
|
|
24
|
+
- - "~>"
|
|
25
|
+
- !ruby/object:Gem::Version
|
|
26
|
+
version: '0.2'
|
|
27
|
+
- !ruby/object:Gem::Dependency
|
|
28
|
+
name: irb
|
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
|
30
|
+
requirements:
|
|
31
|
+
- - ">="
|
|
32
|
+
- !ruby/object:Gem::Version
|
|
33
|
+
version: '0'
|
|
34
|
+
type: :development
|
|
35
|
+
prerelease: false
|
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
37
|
+
requirements:
|
|
38
|
+
- - ">="
|
|
39
|
+
- !ruby/object:Gem::Version
|
|
40
|
+
version: '0'
|
|
41
|
+
- !ruby/object:Gem::Dependency
|
|
42
|
+
name: rspec
|
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
|
44
|
+
requirements:
|
|
45
|
+
- - ">="
|
|
46
|
+
- !ruby/object:Gem::Version
|
|
47
|
+
version: '0'
|
|
48
|
+
type: :development
|
|
49
|
+
prerelease: false
|
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
51
|
+
requirements:
|
|
52
|
+
- - ">="
|
|
53
|
+
- !ruby/object:Gem::Version
|
|
54
|
+
version: '0'
|
|
55
|
+
- !ruby/object:Gem::Dependency
|
|
56
|
+
name: simplecov
|
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
|
58
|
+
requirements:
|
|
59
|
+
- - "~>"
|
|
60
|
+
- !ruby/object:Gem::Version
|
|
61
|
+
version: '0.22'
|
|
62
|
+
type: :development
|
|
63
|
+
prerelease: false
|
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
65
|
+
requirements:
|
|
66
|
+
- - "~>"
|
|
67
|
+
- !ruby/object:Gem::Version
|
|
68
|
+
version: '0.22'
|
|
69
|
+
description: |
|
|
70
|
+
Developed as an alternative to wkhtmltopdf.
|
|
71
|
+
This gem converts HTML to PDF using Firefox's Geckodriver.
|
|
72
|
+
email:
|
|
73
|
+
- sakane@f6a.net
|
|
74
|
+
executables:
|
|
75
|
+
- gkhtmltopdf
|
|
76
|
+
extensions: []
|
|
77
|
+
extra_rdoc_files: []
|
|
78
|
+
files:
|
|
79
|
+
- ".rspec"
|
|
80
|
+
- ".ruby-version"
|
|
81
|
+
- CHANGELOG.md
|
|
82
|
+
- Dockerfile
|
|
83
|
+
- LICENSE
|
|
84
|
+
- README.md
|
|
85
|
+
- Rakefile
|
|
86
|
+
- TODO.md
|
|
87
|
+
- exe/gkhtmltopdf
|
|
88
|
+
- lib/gkhtmltopdf.rb
|
|
89
|
+
- lib/gkhtmltopdf/converter.rb
|
|
90
|
+
- lib/gkhtmltopdf/version.rb
|
|
91
|
+
- spec/fixtures/test.html
|
|
92
|
+
- spec/gkhtmltopdf/converter_spec.rb
|
|
93
|
+
- spec/gkhtmltopdf_spec.rb
|
|
94
|
+
- spec/spec_helper.rb
|
|
95
|
+
homepage: https://f6a.net/oss/
|
|
96
|
+
licenses:
|
|
97
|
+
- MIT
|
|
98
|
+
metadata:
|
|
99
|
+
homepage_uri: https://f6a.net/oss/
|
|
100
|
+
source_code_uri: https://github.com/fantasia-tech/gkhtmltopdf-rb
|
|
101
|
+
post_install_message: "=====================================================================\nGkhtmltopdf
|
|
102
|
+
has been installed successfully. \U0001F389\n\n⚠️ Caution\nRequired: To run this
|
|
103
|
+
gem, you need to have `firefox` and `geckodriver` installed and added to your PATH.\n\ncheck
|
|
104
|
+
[readme.md](https://github.com/fantasia-tech/gkhtmltopdf-rb/blob/main/README.md)
|
|
105
|
+
for more details.\n=====================================================================\n"
|
|
106
|
+
rdoc_options: []
|
|
107
|
+
require_paths:
|
|
108
|
+
- lib
|
|
109
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
110
|
+
requirements:
|
|
111
|
+
- - ">="
|
|
112
|
+
- !ruby/object:Gem::Version
|
|
113
|
+
version: 3.2.0
|
|
114
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
115
|
+
requirements:
|
|
116
|
+
- - ">="
|
|
117
|
+
- !ruby/object:Gem::Version
|
|
118
|
+
version: '0'
|
|
119
|
+
requirements:
|
|
120
|
+
- Firefox
|
|
121
|
+
- Geckodriver
|
|
122
|
+
rubygems_version: 3.4.19
|
|
123
|
+
signing_key:
|
|
124
|
+
specification_version: 4
|
|
125
|
+
summary: Gkhtmltopdf is mean Gecko HTML to PDF converter.
|
|
126
|
+
test_files: []
|