pdf2htmlex 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: b41b15ee0a9766fab2d08d3f77ccfe92c10b811c883347e57964b9b6e79c9b09
4
+ data.tar.gz: f846a1c3abbf90d5cc01d4f05960af3da32d9d2c8566a37fa52d96996c4f7ee9
5
+ SHA512:
6
+ metadata.gz: 38cde8a2d5ba49fdb2fec7ab39843a62bfe3c3f96eb7c85dc71fe962f7832fac276c66daa514b74301169d4eb44674f752940203f1992fe5fe05fb5b01f96a54
7
+ data.tar.gz: 7c23bf805cebc8c9241ca1030bdcd025eed22909b0cf94438bae40a850fafa2bd362fdc31f67a281e28724794d61e29940f63eef93fd0d711c0417143efca338
data/.gitignore ADDED
@@ -0,0 +1,9 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --require spec_helper
data/CHANGELOG.md ADDED
File without changes
data/Gemfile ADDED
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ source 'https://rubygems.org'
4
+
5
+ # Specify your gem's dependencies in pdf2htmlex.gemspec
6
+ gemspec
7
+
8
+ gem 'rake', '~> 12.0'
data/Gemfile.lock ADDED
@@ -0,0 +1,34 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ pdf2htmlex (0.1.0)
5
+
6
+ GEM
7
+ remote: https://rubygems.org/
8
+ specs:
9
+ diff-lcs (1.4.4)
10
+ rake (12.3.3)
11
+ rspec (3.10.0)
12
+ rspec-core (~> 3.10.0)
13
+ rspec-expectations (~> 3.10.0)
14
+ rspec-mocks (~> 3.10.0)
15
+ rspec-core (3.10.1)
16
+ rspec-support (~> 3.10.0)
17
+ rspec-expectations (3.10.1)
18
+ diff-lcs (>= 1.2.0, < 2.0)
19
+ rspec-support (~> 3.10.0)
20
+ rspec-mocks (3.10.2)
21
+ diff-lcs (>= 1.2.0, < 2.0)
22
+ rspec-support (~> 3.10.0)
23
+ rspec-support (3.10.2)
24
+
25
+ PLATFORMS
26
+ ruby
27
+
28
+ DEPENDENCIES
29
+ pdf2htmlex!
30
+ rake (~> 12.0)
31
+ rspec (~> 3.2)
32
+
33
+ BUNDLED WITH
34
+ 2.1.4
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2021 Marcos G. Zimmermann
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,125 @@
1
+ # Ruby wrapper for pdf2htmlEX
2
+
3
+ `pdf2htmlEX` converts PDF to HTML while retaining text, format and style as much as possible by making use of HTML5, JavaScript and modern CSS features. Even difficult content like PDFs with embedded fonts, multicolumn documents, scientific papers with complicated figures and mathematical formulas will mostly be represented correctly. Fallback mode generates HTML pages which do not require any JavaScript to view them correctly at the expense of a larger file size.
4
+
5
+ ## Installation
6
+
7
+ You will need `pdf2htmlEX` installed. If you are using Mac OS X, I recommend installing pdf2htmlEX with Homebrew by running the following:
8
+
9
+ $ brew install pdf2htmlex
10
+
11
+ If you are using Debian, you can install the pdf2htmlex package like so:
12
+
13
+ $ apt install pdf2htmlex
14
+
15
+ If you've docker env setup, just install it via docker:
16
+
17
+ $ alias pdf2htmlex="docker run -ti --rm -v ~/pdf:/tmp/pdf iapain/pdf2htmlex pdf2htmlEX"
18
+
19
+ After that just add this line to your application's Gemfile:
20
+
21
+ ```ruby
22
+ gem 'pdf2htmlex'
23
+ ```
24
+
25
+ And then execute:
26
+
27
+ $ bundle install
28
+
29
+ Or install it yourself as:
30
+
31
+ $ gem install pdf2htmlex
32
+
33
+ ## Usage
34
+
35
+ Configuration:
36
+
37
+ ```ruby
38
+ require 'pdf2htmlex'
39
+
40
+ Pdf2htmlex.config.executable = '/usr/local/bin/pdf2htmlEX' # Default value: 'pdf2htmlex'
41
+ ```
42
+
43
+ To convert files use the `Pdf2htmlex::convert` method with the PDF file as the first argument, optional output filename as the second argument and list of options. Example:
44
+
45
+ ```ruby
46
+ 2.7.0 (main):0 > output = Pdf2htmlex.convert('./example/demo1.pdf')
47
+ => #<Pathname:/var/folders/rn/1xx0_xsd089fldtvtnq1tk7m0000gn/T/d20210409-96258-1pmrw5z/demo1.html>
48
+ 2.7.0 (main):0 > output = Pdf2htmlex.convert('./example/demo1.pdf', 'sample.html')
49
+ => #<Pathname:/var/folders/rn/1xx0_xsd089fldtvtnq1tk7m0000gn/T/d20210409-96258-14l7p6u/sample.html>
50
+ 2.7.0 (main):0 > output = Pdf2htmlex.convert('./example/demo1.pdf', 'demo.html', dest_dir: '/tmp')
51
+ => #<Pathname:/tmp/demo.html>
52
+ ```
53
+
54
+ All options:
55
+ ```bash
56
+ first_page: '--first-page', # first page to convert (default: 1)
57
+ last_page: '--last-page', # last page to convert (default: 2147483647)
58
+ zoom: '--zoom', # zoom ratio
59
+ fit_width: '--fit-width', # fit width to <fp> pixels
60
+ fit_height: '--fit-height', # fit height to <fp> pixels
61
+ use_cropbox: '--use-cropbox', # use CropBox instead of MediaBox (default: 1)
62
+ hdpi: '--hdpi', # horizontal resolution for graphics in DPI (default: 144)
63
+ vdpi: '--vdpi', # vertical resolution for graphics in DPI (default: 144)
64
+ embed: '--embed', # specify which elements should be embedded into output
65
+ embed_css: '--embed-css', # embed CSS files into output (default: 1)
66
+ embed_font: '--embed-font', # embed font files into output (default: 1)
67
+ embed_image: '--embed-image', # embed image files into output (default: 1)
68
+ embed_javascript: '--embed-javascript', # embed JavaScript files into output (default: 1)
69
+ embed_outline: '--embed-outline', # embed outlines into output (default: 1)
70
+ split_pages: '--split-pages', # split pages into separate files (default: 0)
71
+ dest_dir: '--dest-dir', # specify destination directory (default: ".")
72
+ css_filename: '--css-filename', # filename of the generated css file (default: "")
73
+ page_filename: '--page-filename', # filename template for split pages (default: "")
74
+ outline_filename: '--outline-filename', # filename of the generated outline file (default: "")
75
+ process_nontext: '--process-nontext', # render graphics in addition to text (default: 1)
76
+ process_outline: '--process-outline', # show outline in HTML (default: 1)
77
+ process_annotation: '--process-annotation', # show annotation in HTML (default: 0)
78
+ process_form: '--process-form', # include text fields and radio buttons (default: 0)
79
+ printing: '--printing', # enable printing support (default: 1)
80
+ fallback: '--fallback', # output in fallback mode (default: 0)
81
+ tmp_file_size_limit: '--tmp-file-size-limit', # Maximum size (in KB) used by temporary files, -1 for no limit. (default: -1)
82
+ embed_external_font: '--embed-external-font', # embed local match for external fonts (default: 1)
83
+ font_format: '--font-format', # suffix for embedded font files (ttf,otf,woff,svg) (default: "woff")
84
+ decompose_ligature: '--decompose-ligature', # decompose ligatures, such as fi -> fi (default: 0)
85
+ auto_hint: '--auto-hint', # use fontforge autohint on fonts without hints (default: 0)
86
+ external_hint_tool: '--external-hint-tool', # external tool for hinting fonts (overrides --auto-hint) (default: "")
87
+ stretch_narrow_glyph: '--stretch-narrow-glyph', # stretch narrow glyphs instead of padding them (default: 0)
88
+ squeeze_wide_glyph: '--squeeze-wide-glyph', # shrink wide glyphs instead of truncating them (default: 1)
89
+ override_fstype: '--override-fstype', # clear the fstype bits in TTF/OTF fonts (default: 0)
90
+ process_type3: '--process-type3', # convert Type 3 fonts for web (experimental) (default: 0)
91
+ heps: '--heps', # horizontal threshold for merging text, in pixels (default: 1)
92
+ veps: '--veps', # vertical threshold for merging text, in pixels (default: 1)
93
+ space_threshold: '--space-threshold', # word break threshold (threshold * em) (default: 0.125)
94
+ font_size_multiplier: '--font-size-multiplier', # a value greater than 1 increases the rendering accuracy (default: 4)
95
+ space_as_offset: '--space-as-offset', # treat space characters as offsets (default: 0)
96
+ tounicode: '--tounicode', # how to handle ToUnicode CMaps (0=auto, 1=force, -1=ignore) (default: 0)
97
+ optimize_text: '--optimize-text', # try to reduce the number of HTML elements used for text (default: 0)
98
+ correct_text_visibility: '--correct-text-visibility', # try to detect texts covered by other graphics and properly arrange them (default: 0)
99
+ bg_format: '--bg-format', # specify background image format (default: "png")
100
+ svg_node_count_limit: '--svg-node-count-limit', # if node count in a svg background image exceeds this limit, fall back this page to bitmap background; negative value means no limit. (default: -1)
101
+ svg_embed_bitmap: '--svg-embed-bitmap', # 1: embed bitmaps in svg background; 0: dump bitmaps to external files if possible. (default: 1)
102
+ owner_password: '--owner-password', # owner password (for encrypted files)
103
+ user_password: '--user-password', # user password (for encrypted files)
104
+ no_drm: '--no-drm', # override document DRM settings (default: 0)
105
+ clean_tmp: '--clean-tmp', # remove temporary files after conversion (default: 1)
106
+ tmp_dir: '--tmp-dir', # specify the location of temporary directory. (default: "/var/folders/rn/1xx0_xsd089fldtvtnq1tk7m0000gn/T/")
107
+ data_dir: '--data-dir', # specify data directory (default: "/usr/local/Cellar/pdf2htmlex/0.14.6_24/share/pdf2htmlEX")
108
+ debug: '--debug', # print debugging information (default: 0)
109
+ proof: '--proof', # texts are drawn on both text layer and background for proof. (default: 0)
110
+ ```
111
+
112
+ ## Development
113
+
114
+ After checking out the repo, run `bin/setup` to install dependencies. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
115
+
116
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
117
+
118
+ ## Contributing
119
+
120
+ Bug reports and pull requests are welcome on GitHub at https://github.com/marcosgz/pdf2htmlex.
121
+
122
+
123
+ ## License
124
+
125
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
data/Rakefile ADDED
@@ -0,0 +1,4 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'bundler/gem_tasks'
4
+ task default: :spec
data/bin/console ADDED
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require 'bundler/setup'
5
+ require 'pdf2htmlex'
6
+
7
+ # You can add fixtures and/or initialization code here to make experimenting
8
+ # with your gem easier. You can also use a different console, if you like.
9
+
10
+ # (If you use this, don't forget to add pry to your Gemfile!)
11
+ # require "pry"
12
+ # Pry.start
13
+
14
+ require 'irb'
15
+ IRB.start(__FILE__)
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
data/example/demo1.pdf ADDED
Binary file
data/lib/pdf2htmlex.rb ADDED
@@ -0,0 +1,156 @@
1
+ # frozen_string_literal: true
2
+
3
+ # @see https://www.rubydoc.info/stdlib/open3/Open3.popen3
4
+ require 'open3'
5
+ # @see https://www.rubydoc.info/stdlib/pathname/Pathname
6
+ require 'pathname'
7
+ # @see https://ruby-doc.com/stdlib/libdoc/tmpdir/rdoc/Dir.html
8
+ require 'tmpdir'
9
+
10
+ require_relative 'pdf2htmlex/version'
11
+ require_relative 'pdf2htmlex/config'
12
+
13
+ # Ruby wrapper for the pdf2htmlEX tool
14
+ #
15
+ # `pdf2htmlEX` converts PDF to HTML while retaining text, format and style as much as
16
+ # possible by making use of HTML5, JavaScript and modern CSS features. Even difficult
17
+ # content like PDFs with embedded fonts, multicolumn documents, scientific papers with
18
+ # complicated figures and mathematical formulas will mostly be represented correctly.
19
+ # Fallback mode generates HTML pages which do not require any JavaScript to view them
20
+ # correctly at the expense of a larger file size.
21
+ #
22
+ # @see https://github.com/coolwanglu/pdf2htmlEX
23
+ module Pdf2htmlex
24
+ class Error < StandardError; end
25
+
26
+ extend self
27
+
28
+ # Convert input PDF file to HTML
29
+ #
30
+ # @param input_pdf [String, Pathname] The path of PDF file
31
+ # @param html_filename [String, Pathname, NilClass] The output HTML filename
32
+ # @param options [Hash] List of pdf2htmlex options
33
+ # @option [Integer] :first_page (default: 1) First page to convert
34
+ # @option [Integer] :last_page (default: 2147483647) Last page to convert
35
+ # @option [Float] :zoom Zoom ratio
36
+ # @option [Float] :fit_width Fit width to <fp> pixels
37
+ # @option [Float] :fit_height Fit height to <fp> pixels
38
+ # @option [Boolean, Integer] :use_cropbo (default: true) Use CropBox instead of MediaBox
39
+ # @option [Float] :hdpi (default: 144) Horizontal resolution for graphics in DPI
40
+ # @option [Float] :vdpi (default: 144) Vertical resolution for graphics in DPI
41
+ # @option [String] :embed Specify which elements should be embedded into output
42
+ # @option [Boolean, Integer] :embed_css (default: true) Embed CSS files into output
43
+ # @option [Boolean, Integer] :embed_font (default: true) Embed font files into output
44
+ # @option [Boolean, Integer] :embed_image (default: true) Embed image files into output
45
+ # @option [Boolean, Integer] :embed_javascript (default: true) Embed JavaScript files into output
46
+ # @option [Boolean, Integer] :embed_outline (default: true) Embed outlines into output
47
+ # @option [Boolean, Integer] :split_pages (default: false) Split pages into separate files
48
+ # @option [String] :dest_dir (default: ".") Specify destination directory
49
+ # @option [String] :css_filename (default: "") Filename of the generated css file
50
+ # @option [String] :page_filename (default: "") Filename template for split pages
51
+ # @option [String] :outline_filename (default: "") Filename of the generated outline file
52
+ # @option [Boolean, Integer] :process_nontext (default: true) Render graphics in addition to text
53
+ # @option [Boolean, Integer] :process_outline (default: true) Show outline in HTML
54
+ # @option [Boolean, Integer] :process_annotation (default: false) Show annotation in HTML
55
+ # @option [Boolean, Integer] :process_form (default: false) Include text fields and radio buttons
56
+ # @option [Boolean, Integer] :printing (default: true) Enable printing support
57
+ # @option [Boolean, Integer] :fallback (default: false) Output in fallback mode
58
+ # @option [Integer] :tmp_file_size_limit (default: -1) Maximum size (in KB) used by temporary files, -1 for no limit.
59
+ # @option [Boolean, Integer] :embed_external_font (default: true) Embed local match for external fonts
60
+ # @option [String] :font_format (default: "woff") Suffix for embedded font files (ttf,otf,woff,svg)
61
+ # @option [Boolean, Integer] :decompose_ligature (default: false) Decompose ligatures, such as fi -> fi
62
+ # @option [Boolean, Integer] :auto_hint (default: false) Use fontforge autohint on fonts without hints
63
+ # @option [String] :external_hint_tool (default: "") External tool for hinting fonts (overrides --auto-hint)
64
+ # @option [Boolean, Integer] :stretch_narrow_glyph (default: false) Stretch narrow glyphs instead of padding them
65
+ # @option [Boolean, Integer] :squeeze_wide_glyph (default: true) Shrink wide glyphs instead of truncating them
66
+ # @option [Boolean, Integer] :override_fstype (default: false) Clear the fstype bits in TTF/OTF fonts
67
+ # @option [Boolean, Integer] :process_type3 (default: false) Convert Type 3 fonts for web (experimental)
68
+ # @option [Float] :heps (default: 1) Horizontal threshold for merging text, in pixels
69
+ # @option [Float] :veps (default: 1) Vertical threshold for merging text, in pixels
70
+ # @option [Float] :space_threshold(default: 0.125) Word break threshold (threshold * em)
71
+ # @option [Integer] :font_size_multiplier (default: 4) A value greater than 1 increases the rendering accuracy
72
+ # @option [Boolean, Integer] :space_as_offset (default: false) Treat space characters as offsets
73
+ # @option [Integer] :tounicode (default: 0) How to handle ToUnicode CMaps (0=auto, 1=force, -1=ignore)
74
+ # @option [Boolean, Integer] :optimize_text (default: false) Try to reduce the number of HTML elements used for text
75
+ # @option [Boolean, Integer] :correct_text_visibility (default: false) Try to detect texts covered by other graphics and properly arrange them
76
+ # @option [String] :bg_format (default: "png") Specify background image format
77
+ # @option [Integer] :svg_node_count_limit (default: -1) if node count in a svg background image exceeds this limit, fall back this page to bitmap background; negative value means no limit
78
+ # @option [Boolean, Integer] :svg_embed_bitmap (default: true) True: embed bitmaps in svg background; False: dump bitmaps to external files if possible.
79
+ # @option [String] :owner_password Owner password (for encrypted files)
80
+ # @option [String] :user_password User password (for encrypted files)
81
+ # @option [Boolean, Integer] :no_drm (default: false) Override document DRM settings
82
+ # @option [Boolean, Integer] :clean_tmp (default: true) Remove temporary files after conversion
83
+ # @option [String] :tmp_dir (default: "/var/folders/rn/1xx0_xsd089fldtvtnq1tk7m0000gn/T/") Specify the location of temporary directory
84
+ # @option [String] :data_dir (default: "/usr/local/Cellar/pdf2htmlex/0.14.6_24/share/pdf2htmlEX") Specify data directory
85
+ # @option [Boolean, Integer] :debug (default: false) Print debugging information
86
+ # @option [Boolean, Integer] :proof (default: false) Exts are drawn on both text layer and background for proof.
87
+ # @raise [Pdf2htmlex::Error] catch errors from pdf2htmlEX into Pdf2htmlex::Error exception
88
+ # @return [Pathname] The generated HTML file
89
+ def convert(input_pdf, html_filename = nil, **options)
90
+ options[:dest_dir] ||= make_tempdir
91
+
92
+ output = output_html(options[:dest_dir], html_filename || input_pdf)
93
+ command = normalize_options(options)
94
+ command << input_pdf
95
+ command << output.basename.to_s
96
+
97
+ run_command(command)
98
+
99
+ output
100
+ end
101
+
102
+ private
103
+
104
+ def make_tempdir
105
+ Dir.mktmpdir
106
+ end
107
+
108
+ # Generate a unique HTML filename within destination directory
109
+ # @param base_dir [String] The directory path
110
+ # @param target_filename [String] the
111
+ def output_html(base_dir, target_filename)
112
+ basename = File.basename(target_filename).sub(%r{\.(pdf|htm|html)$}i, '')
113
+ filename = File.join(base_dir, format('%<fname>s.html', fname: basename))
114
+ n = 0
115
+ while File.exist?(filename)
116
+ n += 1
117
+ filename = File.join(base_dir, format('%<fname>s.%<fnum>d.html', fname: basename, fnum: n))
118
+ end
119
+ Pathname.new(filename)
120
+ end
121
+
122
+ def normalize_options(opts)
123
+ [].tap do |arr|
124
+ opts.each do |k, v|
125
+ next if v.nil?
126
+ next unless CMD_OPTIONS.key?(k)
127
+
128
+ arr << CMD_OPTIONS[k] << cast_value(v)
129
+ end
130
+ end
131
+ end
132
+
133
+ def cast_value(value)
134
+ case value
135
+ when Proc then cast_value(value.call)
136
+ when TrueClass then '1'
137
+ when FalseClass then '0'
138
+ else
139
+ value
140
+ end
141
+ end
142
+
143
+ def run_command(command, input = nil)
144
+ opts = { binmode: true, stdin_data: input }
145
+
146
+ command.unshift config.executable
147
+
148
+ output, error, status = Open3.capture3(*command, opts)
149
+
150
+ if status.exitstatus != 0
151
+ raise Error, "pdf2htmlEX failed with: #{error}\nCommand: #{command.join(' ')}"
152
+ end
153
+
154
+ output
155
+ end
156
+ end
@@ -0,0 +1,66 @@
1
+ # frizen_string_literal: true
2
+
3
+ module Pdf2htmlex
4
+ CMD_OPTIONS = {
5
+ first_page: '--first-page', # first page to convert (default: 1)
6
+ last_page: '--last-page', # last page to convert (default: 2147483647)
7
+ zoom: '--zoom', # zoom ratio
8
+ fit_width: '--fit-width', # fit width to <fp> pixels
9
+ fit_height: '--fit-height', # fit height to <fp> pixels
10
+ use_cropbox: '--use-cropbox', # use CropBox instead of MediaBox (default: 1)
11
+ hdpi: '--hdpi', # horizontal resolution for graphics in DPI (default: 144)
12
+ vdpi: '--vdpi', # vertical resolution for graphics in DPI (default: 144)
13
+ embed: '--embed', # specify which elements should be embedded into output
14
+ embed_css: '--embed-css', # embed CSS files into output (default: 1)
15
+ embed_font: '--embed-font', # embed font files into output (default: 1)
16
+ embed_image: '--embed-image', # embed image files into output (default: 1)
17
+ embed_javascript: '--embed-javascript', # embed JavaScript files into output (default: 1)
18
+ embed_outline: '--embed-outline', # embed outlines into output (default: 1)
19
+ split_pages: '--split-pages', # split pages into separate files (default: 0)
20
+ dest_dir: '--dest-dir', # specify destination directory (default: ".")
21
+ css_filename: '--css-filename', # filename of the generated css file (default: "")
22
+ page_filename: '--page-filename', # filename template for split pages (default: "")
23
+ outline_filename: '--outline-filename', # filename of the generated outline file (default: "")
24
+ process_nontext: '--process-nontext', # render graphics in addition to text (default: 1)
25
+ process_outline: '--process-outline', # show outline in HTML (default: 1)
26
+ process_annotation: '--process-annotation', # show annotation in HTML (default: 0)
27
+ process_form: '--process-form', # include text fields and radio buttons (default: 0)
28
+ printing: '--printing', # enable printing support (default: 1)
29
+ fallback: '--fallback', # output in fallback mode (default: 0)
30
+ tmp_file_size_limit: '--tmp-file-size-limit', # Maximum size (in KB) used by temporary files, -1 for no limit. (default: -1)
31
+ embed_external_font: '--embed-external-font', # embed local match for external fonts (default: 1)
32
+ font_format: '--font-format', # suffix for embedded font files (ttf,otf,woff,svg) (default: "woff")
33
+ decompose_ligature: '--decompose-ligature', # decompose ligatures, such as fi -> fi (default: 0)
34
+ auto_hint: '--auto-hint', # use fontforge autohint on fonts without hints (default: 0)
35
+ external_hint_tool: '--external-hint-tool', # external tool for hinting fonts (overrides --auto-hint) (default: "")
36
+ stretch_narrow_glyph: '--stretch-narrow-glyph', # stretch narrow glyphs instead of padding them (default: 0)
37
+ squeeze_wide_glyph: '--squeeze-wide-glyph', # shrink wide glyphs instead of truncating them (default: 1)
38
+ override_fstype: '--override-fstype', # clear the fstype bits in TTF/OTF fonts (default: 0)
39
+ process_type3: '--process-type3', # convert Type 3 fonts for web (experimental) (default: 0)
40
+ heps: '--heps', # horizontal threshold for merging text, in pixels (default: 1)
41
+ veps: '--veps', # vertical threshold for merging text, in pixels (default: 1)
42
+ space_threshold: '--space-threshold', # word break threshold (threshold * em) (default: 0.125)
43
+ font_size_multiplier: '--font-size-multiplier', # a value greater than 1 increases the rendering accuracy (default: 4)
44
+ space_as_offset: '--space-as-offset', # treat space characters as offsets (default: 0)
45
+ tounicode: '--tounicode', # how to handle ToUnicode CMaps (0=auto, 1=force, -1=ignore) (default: 0)
46
+ optimize_text: '--optimize-text', # try to reduce the number of HTML elements used for text (default: 0)
47
+ correct_text_visibility: '--correct-text-visibility', # try to detect texts covered by other graphics and properly arrange them (default: 0)
48
+ bg_format: '--bg-format', # specify background image format (default: "png")
49
+ svg_node_count_limit: '--svg-node-count-limit', # if node count in a svg background image exceeds this limit, fall back this page to bitmap background; negative value means no limit. (default: -1)
50
+ svg_embed_bitmap: '--svg-embed-bitmap', # 1: embed bitmaps in svg background; 0: dump bitmaps to external files if possible. (default: 1)
51
+ owner_password: '--owner-password', # owner password (for encrypted files)
52
+ user_password: '--user-password', # user password (for encrypted files)
53
+ no_drm: '--no-drm', # override document DRM settings (default: 0)
54
+ clean_tmp: '--clean-tmp', # remove temporary files after conversion (default: 1)
55
+ tmp_dir: '--tmp-dir', # specify the location of temporary directory. (default: "/var/folders/rn/1xx0_xsd089fldtvtnq1tk7m0000gn/T/")
56
+ data_dir: '--data-dir', # specify data directory (default: "/usr/local/Cellar/pdf2htmlex/0.14.6_24/share/pdf2htmlEX")
57
+ debug: '--debug', # print debugging information (default: 0)
58
+ proof: '--proof', # texts are drawn on both text layer and background for proof. (default: 0)
59
+ }
60
+
61
+ Config = Struct.new(:executable)
62
+
63
+ def config
64
+ @config ||= Config.new('pdf2htmlex')
65
+ end
66
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pdf2htmlex
4
+ VERSION = '0.1.0'
5
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'lib/pdf2htmlex/version'
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = 'pdf2htmlex'
7
+ spec.version = Pdf2htmlex::VERSION
8
+ spec.authors = ['Marcos G. Zimmermann']
9
+ spec.email = ['mgzmaster@gmail.com']
10
+
11
+ spec.summary = 'Ruby wrapper for the pdf2htmlEX that convert PDF files to HTML'
12
+ spec.description = 'pdf2htmlEX helps to convert PDF files into HTML. This simple library uses the pdf2htmlEX tool under the hood.'
13
+ spec.homepage = 'https://github.com/marcosgz/pdf2htmlex'
14
+ spec.license = 'MIT'
15
+ spec.required_ruby_version = Gem::Requirement.new('>= 2.3.0')
16
+
17
+ spec.metadata['allowed_push_host'] = 'https://rubygems.org'
18
+
19
+ spec.metadata['homepage_uri'] = spec.homepage
20
+ spec.metadata['source_code_uri'] = 'https://github.com/marcosgz/pdf2htmlex'
21
+ spec.metadata['changelog_uri'] = 'https://github.com/marcosgz/blob/main/pdf2htmlex'
22
+
23
+ # Specify which files should be added to the gem when it is released.
24
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
25
+ spec.files = Dir.chdir(File.expand_path(__dir__)) do
26
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
27
+ end
28
+ spec.bindir = 'exe'
29
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
30
+ spec.require_paths = ['lib']
31
+ spec.requirements << 'pdf2htmlEX'
32
+
33
+ spec.add_development_dependency 'rspec', '~> 3.2'
34
+ end
metadata ADDED
@@ -0,0 +1,78 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pdf2htmlex
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Marcos G. Zimmermann
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2021-04-09 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rspec
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '3.2'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '3.2'
27
+ description: pdf2htmlEX helps to convert PDF files into HTML. This simple library
28
+ uses the pdf2htmlEX tool under the hood.
29
+ email:
30
+ - mgzmaster@gmail.com
31
+ executables: []
32
+ extensions: []
33
+ extra_rdoc_files: []
34
+ files:
35
+ - ".gitignore"
36
+ - ".rspec"
37
+ - CHANGELOG.md
38
+ - Gemfile
39
+ - Gemfile.lock
40
+ - LICENSE.txt
41
+ - README.md
42
+ - Rakefile
43
+ - bin/console
44
+ - bin/setup
45
+ - example/demo1.pdf
46
+ - lib/pdf2htmlex.rb
47
+ - lib/pdf2htmlex/config.rb
48
+ - lib/pdf2htmlex/version.rb
49
+ - pdf2htmlex.gemspec
50
+ homepage: https://github.com/marcosgz/pdf2htmlex
51
+ licenses:
52
+ - MIT
53
+ metadata:
54
+ allowed_push_host: https://rubygems.org
55
+ homepage_uri: https://github.com/marcosgz/pdf2htmlex
56
+ source_code_uri: https://github.com/marcosgz/pdf2htmlex
57
+ changelog_uri: https://github.com/marcosgz/blob/main/pdf2htmlex
58
+ post_install_message:
59
+ rdoc_options: []
60
+ require_paths:
61
+ - lib
62
+ required_ruby_version: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ version: 2.3.0
67
+ required_rubygems_version: !ruby/object:Gem::Requirement
68
+ requirements:
69
+ - - ">="
70
+ - !ruby/object:Gem::Version
71
+ version: '0'
72
+ requirements:
73
+ - pdf2htmlEX
74
+ rubygems_version: 3.1.2
75
+ signing_key:
76
+ specification_version: 4
77
+ summary: Ruby wrapper for the pdf2htmlEX that convert PDF files to HTML
78
+ test_files: []