hexapdf 0.12.1 → 0.14.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +130 -0
- data/examples/019-acro_form.rb +41 -4
- data/lib/hexapdf/cli/command.rb +4 -2
- data/lib/hexapdf/cli/image2pdf.rb +2 -1
- data/lib/hexapdf/cli/info.rb +51 -2
- data/lib/hexapdf/cli/inspect.rb +30 -8
- data/lib/hexapdf/cli/merge.rb +1 -1
- data/lib/hexapdf/cli/split.rb +74 -14
- data/lib/hexapdf/configuration.rb +15 -0
- data/lib/hexapdf/content/graphic_object/arc.rb +3 -3
- data/lib/hexapdf/content/parser.rb +1 -1
- data/lib/hexapdf/dictionary.rb +9 -6
- data/lib/hexapdf/dictionary_fields.rb +1 -9
- data/lib/hexapdf/document.rb +41 -16
- data/lib/hexapdf/document/files.rb +0 -1
- data/lib/hexapdf/encryption/fast_arc4.rb +1 -1
- data/lib/hexapdf/encryption/security_handler.rb +1 -0
- data/lib/hexapdf/encryption/standard_security_handler.rb +1 -0
- data/lib/hexapdf/font/cmap.rb +1 -4
- data/lib/hexapdf/font/true_type/subsetter.rb +12 -3
- data/lib/hexapdf/font/true_type/table/head.rb +1 -0
- data/lib/hexapdf/font/true_type/table/os2.rb +2 -0
- data/lib/hexapdf/font/true_type/table/post.rb +15 -10
- data/lib/hexapdf/font_loader/from_configuration.rb +2 -2
- data/lib/hexapdf/font_loader/from_file.rb +18 -8
- data/lib/hexapdf/image_loader/png.rb +3 -2
- data/lib/hexapdf/importer.rb +3 -2
- data/lib/hexapdf/layout/line.rb +1 -1
- data/lib/hexapdf/layout/style.rb +23 -23
- data/lib/hexapdf/layout/text_layouter.rb +2 -2
- data/lib/hexapdf/layout/text_shaper.rb +3 -2
- data/lib/hexapdf/object.rb +52 -25
- data/lib/hexapdf/parser.rb +96 -4
- data/lib/hexapdf/pdf_array.rb +12 -5
- data/lib/hexapdf/revisions.rb +29 -21
- data/lib/hexapdf/serializer.rb +34 -8
- data/lib/hexapdf/task/optimize.rb +6 -4
- data/lib/hexapdf/tokenizer.rb +4 -3
- data/lib/hexapdf/type/acro_form/appearance_generator.rb +132 -28
- data/lib/hexapdf/type/acro_form/button_field.rb +21 -13
- data/lib/hexapdf/type/acro_form/choice_field.rb +68 -14
- data/lib/hexapdf/type/acro_form/field.rb +35 -5
- data/lib/hexapdf/type/acro_form/form.rb +139 -14
- data/lib/hexapdf/type/acro_form/text_field.rb +70 -4
- data/lib/hexapdf/type/actions/uri.rb +3 -2
- data/lib/hexapdf/type/annotations/widget.rb +3 -4
- data/lib/hexapdf/type/catalog.rb +2 -2
- data/lib/hexapdf/type/cid_font.rb +1 -1
- data/lib/hexapdf/type/file_specification.rb +1 -1
- data/lib/hexapdf/type/font.rb +1 -1
- data/lib/hexapdf/type/font_simple.rb +4 -2
- data/lib/hexapdf/type/font_true_type.rb +6 -2
- data/lib/hexapdf/type/font_type0.rb +4 -4
- data/lib/hexapdf/type/form.rb +15 -2
- data/lib/hexapdf/type/image.rb +2 -2
- data/lib/hexapdf/type/page.rb +37 -13
- data/lib/hexapdf/type/page_tree_node.rb +29 -5
- data/lib/hexapdf/type/resources.rb +1 -0
- data/lib/hexapdf/type/trailer.rb +2 -3
- data/lib/hexapdf/utils/object_hash.rb +0 -1
- data/lib/hexapdf/utils/sorted_tree_node.rb +18 -15
- data/lib/hexapdf/version.rb +1 -1
- data/test/hexapdf/common_tokenizer_tests.rb +6 -1
- data/test/hexapdf/content/graphic_object/test_arc.rb +4 -4
- data/test/hexapdf/content/test_canvas.rb +3 -3
- data/test/hexapdf/content/test_color_space.rb +1 -1
- data/test/hexapdf/encryption/test_aes.rb +4 -4
- data/test/hexapdf/encryption/test_standard_security_handler.rb +11 -11
- data/test/hexapdf/filter/test_ascii85_decode.rb +1 -1
- data/test/hexapdf/filter/test_ascii_hex_decode.rb +1 -1
- data/test/hexapdf/font/true_type/table/test_post.rb +1 -1
- data/test/hexapdf/font/true_type/test_subsetter.rb +5 -0
- data/test/hexapdf/font_loader/test_from_configuration.rb +7 -3
- data/test/hexapdf/font_loader/test_from_file.rb +7 -0
- data/test/hexapdf/layout/test_style.rb +1 -1
- data/test/hexapdf/layout/test_text_layouter.rb +12 -5
- data/test/hexapdf/test_configuration.rb +2 -2
- data/test/hexapdf/test_dictionary.rb +8 -1
- data/test/hexapdf/test_dictionary_fields.rb +2 -2
- data/test/hexapdf/test_document.rb +18 -10
- data/test/hexapdf/test_object.rb +71 -26
- data/test/hexapdf/test_parser.rb +171 -53
- data/test/hexapdf/test_pdf_array.rb +8 -1
- data/test/hexapdf/test_revisions.rb +35 -0
- data/test/hexapdf/test_writer.rb +2 -2
- data/test/hexapdf/type/acro_form/test_appearance_generator.rb +296 -38
- data/test/hexapdf/type/acro_form/test_button_field.rb +22 -2
- data/test/hexapdf/type/acro_form/test_choice_field.rb +92 -9
- data/test/hexapdf/type/acro_form/test_field.rb +39 -0
- data/test/hexapdf/type/acro_form/test_form.rb +87 -15
- data/test/hexapdf/type/acro_form/test_text_field.rb +77 -1
- data/test/hexapdf/type/test_font_simple.rb +2 -1
- data/test/hexapdf/type/test_font_true_type.rb +6 -0
- data/test/hexapdf/type/test_form.rb +26 -1
- data/test/hexapdf/type/test_page.rb +45 -7
- data/test/hexapdf/type/test_page_tree_node.rb +42 -0
- data/test/hexapdf/utils/test_bit_field.rb +2 -0
- data/test/hexapdf/utils/test_object_hash.rb +5 -0
- data/test/hexapdf/utils/test_sorted_tree_node.rb +10 -9
- data/test/test_helper.rb +2 -0
- metadata +6 -11
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e4010e277168cec5c8cc5d584ec324064461e63756d18b538cd335235fe04e6d
|
4
|
+
data.tar.gz: 2b7a71463082a32605adee682c81cdde6b0eb48d360ca66249b08884f82e571b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5748273dc4dc532cd365598e25c4a9cc5872011d2eb638c2986050aeed0a68d2dc5769fda075eb60cbcb76fccbfb1a5b52c3c58581cb6e969978c17d770013e6
|
7
|
+
data.tar.gz: 0ab3abf80967804486fa1f50f186b508fd792acfbd8c47646fa7d0c5b0245161e2833620142b2f05a1ee73b01145016dca7bf7781d579284160c9d2dd2c78d0c
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,133 @@
|
|
1
|
+
## 0.14.1 - 2021-01-21
|
2
|
+
|
3
|
+
### Changed
|
4
|
+
|
5
|
+
* Validation message when checking for allowed values to include the invalid
|
6
|
+
object
|
7
|
+
* [HexaPDF::FontLoader::FromFile] to allow (re)using an existing font object
|
8
|
+
* [HexaPDF::Importer] internals to avoid problems with retained memory
|
9
|
+
|
10
|
+
### Fixed
|
11
|
+
|
12
|
+
* Parsing of invalid PDF files where whitespace is missing after the integer
|
13
|
+
value of an indirect object
|
14
|
+
* [HexaPDF::Dictionary] so that adding new key-value pairs during validation is
|
15
|
+
possible
|
16
|
+
|
17
|
+
|
18
|
+
## 0.14.0 - 2020-12-30
|
19
|
+
|
20
|
+
### Added
|
21
|
+
|
22
|
+
* Support for creating AcroForm multiline text fields and their appearances
|
23
|
+
* Support for creating AcroForm comb text fields and their appearances
|
24
|
+
* Support for creating AcroForm password fields and their appearances
|
25
|
+
* Support for creating AcroForm file select fields and their appearances
|
26
|
+
* Support for creating AcroForm list box appearances
|
27
|
+
* [HexaPDF::Type::AcroForm::ChoiceField#list_box_top_index] and its setter
|
28
|
+
method
|
29
|
+
* [HexaPDF::Type::AcroForm::ChoiceField#update_widgets] to create appearances if
|
30
|
+
they don't exist
|
31
|
+
* Methods for caching data to [HexaPDF::Object]
|
32
|
+
* Support for splitting by page size to CLI command `hexapdf split`
|
33
|
+
|
34
|
+
### Changed
|
35
|
+
|
36
|
+
* [HexaPDF::Utils::ObjectHash#oids] to be public instead of private
|
37
|
+
* Cross-reference table parsing to handle invalidly numbered main sections
|
38
|
+
* [HexaPDF::Document#cache] and [HexaPDF::Object#cache] to allow updating
|
39
|
+
values for existing keys
|
40
|
+
* Appearance creation methods of AcroForm objects to allow forcing the creation
|
41
|
+
of new appearances
|
42
|
+
* [HexaPDF::Type::AcroForm::AppearanceGenerator#create_text_appearances] to
|
43
|
+
re-use existing form objects
|
44
|
+
* AcroForm field creation methods to allow specifying often used field
|
45
|
+
properties
|
46
|
+
|
47
|
+
### Fixed
|
48
|
+
|
49
|
+
* Missing usage of `:sort` flag for AcroForm choice fields
|
50
|
+
* Setting the `/I` field for AcroForm list boxes with multiple selection
|
51
|
+
* [HexaPDF::Layout::TextLayouter::SimpleLineWrapping] to remove glue items
|
52
|
+
(whitespace) before a hard line break
|
53
|
+
* Infinite loop when reconstructing the cross-reference table
|
54
|
+
* [HexaPDF::Type::AcroForm::ChoiceField] to support export values for option
|
55
|
+
items
|
56
|
+
* AcroForm text field appearance creation to only create a new appearance if the
|
57
|
+
field's value has changed
|
58
|
+
* AcroForm choice field appearance creation to only create a new appearance if
|
59
|
+
the involved dictionary fields' values have changed
|
60
|
+
* [HexaPDF::Type::AcroForm::ChoiceField#list_box_top_index=] to raise an error
|
61
|
+
if no option items are set
|
62
|
+
* [HexaPDF::PDFArray#to_ary] to return an array with preprocessed values
|
63
|
+
* [HexaPDF::Type::Form#contents=] to clear cached values to avoid returning e.g.
|
64
|
+
an invalid canvas object later
|
65
|
+
* [HexaPDF::Type::AcroForm::ButtonField#update_widgets] to create appearances if
|
66
|
+
they don't exist
|
67
|
+
|
68
|
+
|
69
|
+
## 0.13.0 - 2020-11-15
|
70
|
+
|
71
|
+
### Added
|
72
|
+
|
73
|
+
* Cross-reference table reconstruction for damaged PDFs, controllable via the
|
74
|
+
new 'parser.try_xref_reconstruction' option
|
75
|
+
* Two new `hexapdf inspect` commands for showing page objects and page content
|
76
|
+
streams by page number
|
77
|
+
* Flag `--check` to the CLI command `hexapdf info` for checking a file for
|
78
|
+
parse and validation errors
|
79
|
+
* [HexaPDF::Type::AcroForm::Field#embedded_widget?] for checking if a widget is
|
80
|
+
embedded in the field object
|
81
|
+
* [HexaPDF::Type::AcroForm::Field#delete_widget] for deleting a widget
|
82
|
+
* [HexaPDF::PDFArray#delete] for deleting an object from a PDF array
|
83
|
+
* [HexaPDF::Type::Page#ancestor_nodes] for retrieving all ancestor page tree
|
84
|
+
nodes of a page
|
85
|
+
* [HexaPDF::Type::PageTreeNode#move_page] for moving a page to another index
|
86
|
+
|
87
|
+
### Changed
|
88
|
+
|
89
|
+
* **Breaking change**: Overhauled document/object validation interfaces and
|
90
|
+
internals to be more similar and to allow for reporting of multiple validation
|
91
|
+
problems
|
92
|
+
* Validation of TrueType fonts to ignore missing fields if the font name
|
93
|
+
suggests that the font is one of the standard 14 PDF fonts
|
94
|
+
* Option `-p` of CLI command `hexapdf image2pdf` to also allow lowercase page
|
95
|
+
size names
|
96
|
+
|
97
|
+
### Fixed
|
98
|
+
|
99
|
+
* Reporting of cross-reference section entry parsing error
|
100
|
+
* PDF version used by default for dictionary fields
|
101
|
+
* Error in CLI command `hexapdf inspect` when parsing an invalid object number
|
102
|
+
* Output of error messages in CLI command `hexapdf inspect` to go to `$stderr`
|
103
|
+
* Bug in [HexaPDF::Type::AcroForm::TextField] validation due to missing nil
|
104
|
+
handling
|
105
|
+
|
106
|
+
|
107
|
+
## 0.12.3 - 2020-08-22
|
108
|
+
|
109
|
+
### Changed
|
110
|
+
|
111
|
+
* Allow any object responding to `#to_sym` when setting a radio button value
|
112
|
+
|
113
|
+
### Fixed
|
114
|
+
|
115
|
+
* Error in the AcroForm appearance generator for text fields when the font is
|
116
|
+
not found in the default resources
|
117
|
+
* Parsing of long numbers when reading a file from IO
|
118
|
+
* Usage of unsupported method for Ruby 2.4 so that all tests pass again
|
119
|
+
|
120
|
+
|
121
|
+
## 0.12.2 - 2020-08-17
|
122
|
+
|
123
|
+
### Fixed
|
124
|
+
|
125
|
+
- Wrong origin for page canvases when bottom left corner of media box doesn't
|
126
|
+
coincide with origin of coordinate system
|
127
|
+
- Wrong origin for Form XObject canvas when bottom left corner of bounding box
|
128
|
+
doesn't coincide with origin of coordinate system
|
129
|
+
|
130
|
+
|
1
131
|
## 0.12.1 - 2020-08-16
|
2
132
|
|
3
133
|
### Added
|
data/examples/019-acro_form.rb
CHANGED
@@ -42,10 +42,47 @@ rb = form.create_radio_button("Radio")
|
|
42
42
|
end
|
43
43
|
rb.field_value = :button0
|
44
44
|
|
45
|
-
canvas.text("Text
|
46
|
-
|
47
|
-
|
48
|
-
tx.
|
45
|
+
canvas.text("Text fields", at: [50, 450])
|
46
|
+
|
47
|
+
canvas.text("Single line", at: [70, 420])
|
48
|
+
tx = form.create_text_field("Single Line", font_size: 16)
|
49
|
+
widget = tx.create_widget(page, Rect: [200, 415, 500, 435])
|
49
50
|
tx.field_value = "A sample test string!"
|
50
51
|
|
52
|
+
canvas.text("Multiline", at: [70, 390])
|
53
|
+
tx = form.create_multiline_text_field("Multiline", font_size: 0, align: :right)
|
54
|
+
widget = tx.create_widget(page, Rect: [200, 325, 500, 405])
|
55
|
+
widget.border_style(color: 0, width: 1)
|
56
|
+
tx.field_value = "A sample test string! " * 30 + "\nNew line\n\nAnother line"
|
57
|
+
|
58
|
+
canvas.text("Password", at: [70, 300])
|
59
|
+
tx = form.create_password_field("Password", font_size: 16)
|
60
|
+
widget = tx.create_widget(page, Rect: [200, 295, 500, 315])
|
61
|
+
|
62
|
+
canvas.text("File select", at: [70, 270])
|
63
|
+
tx = form.create_file_select_field("File Select", font_size: 16)
|
64
|
+
widget = tx.create_widget(page, Rect: [200, 265, 500, 285])
|
65
|
+
tx.field_value = "path/to/file.pdf"
|
66
|
+
|
67
|
+
canvas.text("Comb", at: [70, 240])
|
68
|
+
tx = form.create_comb_text_field("Comb field", max_chars: 10, font_size: 16, align: :center)
|
69
|
+
widget = tx.create_widget(page, Rect: [200, 220, 500, 255])
|
70
|
+
widget.border_style(color: [30, 128, 0], width: 1)
|
71
|
+
tx.field_value = 'Hello'
|
72
|
+
|
73
|
+
canvas.text("Combo Box", at: [50, 170])
|
74
|
+
cb = form.create_combo_box("Combo Box", font_size: 12, editable: true,
|
75
|
+
option_items: ['Value 1', 'Another value', 'Choose me!'])
|
76
|
+
widget = cb.create_widget(page, Rect: [200, 150, 500, 185])
|
77
|
+
widget.border_style(width: 1)
|
78
|
+
cb.field_value = 'Another value'
|
79
|
+
|
80
|
+
canvas.text("List Box", at: [50, 120])
|
81
|
+
lb = form.create_list_box("List Box", font_size: 15, align: :center, multi_select: true,
|
82
|
+
option_items: 1.upto(7).map {|i| "Value #{i}" })
|
83
|
+
widget = lb.create_widget(page, Rect: [200, 50, 500, 135])
|
84
|
+
widget.border_style(width: 1)
|
85
|
+
lb.list_box_top_index = 1
|
86
|
+
lb.field_value = ['Value 6', 'Value 2']
|
87
|
+
|
51
88
|
doc.write('acro_form.pdf', optimize: true)
|
data/lib/hexapdf/cli/command.rb
CHANGED
@@ -100,6 +100,7 @@ module HexaPDF
|
|
100
100
|
def pdf_options(password)
|
101
101
|
hash = {decryption_opts: {password: password}, config: {}}
|
102
102
|
HexaPDF::GlobalConfiguration['filter.predictor.strict'] = command_parser.strict
|
103
|
+
hash[:config]['parser.try_xref_reconstruction'] = !command_parser.strict
|
103
104
|
hash[:config]['parser.on_correctable_error'] =
|
104
105
|
if command_parser.strict
|
105
106
|
proc { true }
|
@@ -277,14 +278,15 @@ module HexaPDF
|
|
277
278
|
#
|
278
279
|
# See: #define_encryption_options
|
279
280
|
def apply_encryption_options(doc)
|
280
|
-
|
281
|
+
case @out_options.encryption
|
282
|
+
when :add
|
281
283
|
doc.encrypt(algorithm: @out_options.enc_algorithm,
|
282
284
|
key_length: @out_options.enc_key_length,
|
283
285
|
force_v4: @out_options.enc_force_v4,
|
284
286
|
permissions: @out_options.enc_permissions,
|
285
287
|
owner_password: @out_options.enc_owner_pwd,
|
286
288
|
user_password: @out_options.enc_user_pwd)
|
287
|
-
|
289
|
+
when :remove
|
288
290
|
doc.encrypt(name: nil)
|
289
291
|
end
|
290
292
|
end
|
@@ -64,7 +64,8 @@ module HexaPDF
|
|
64
64
|
orientation = :landscape
|
65
65
|
page_size.delete_suffix!('-landscape')
|
66
66
|
end
|
67
|
-
|
67
|
+
page_size = page_size.capitalize.to_sym
|
68
|
+
HexaPDF::Type::Page.media_box(page_size, orientation: orientation)
|
68
69
|
end
|
69
70
|
end
|
70
71
|
options.on("--[no-]auto-rotate", "Automatically rotate pages based on image dimesions. " \
|
data/lib/hexapdf/cli/info.rb
CHANGED
@@ -55,13 +55,21 @@ module HexaPDF
|
|
55
55
|
long_desc(<<~EOF)
|
56
56
|
This command extracts information from the Info dictionary of a PDF file as well
|
57
57
|
as some other useful information like the used PDF version and encryption information.
|
58
|
+
|
59
|
+
If the --check option is specified, the PDF file will also be checked for parse and
|
60
|
+
validation errors. And if the process doesn't abort, HexaPDF is still able to handle the
|
61
|
+
file by correcting the errors.
|
58
62
|
EOF
|
63
|
+
options.on("--check", "-c", "Check the PDF file for parse errors and validity") do |check|
|
64
|
+
@check_file = check
|
65
|
+
end
|
59
66
|
options.on("--password PASSWORD", "-p", String,
|
60
67
|
"The password for decryption. Use - for reading from standard input.") do |pwd|
|
61
68
|
@password = (pwd == '-' ? read_password : pwd)
|
62
69
|
end
|
63
70
|
@password = nil
|
64
71
|
@auto_decrypt = true
|
72
|
+
@check_file = false
|
65
73
|
end
|
66
74
|
|
67
75
|
def execute(file) #:nodoc:
|
@@ -79,8 +87,30 @@ module HexaPDF
|
|
79
87
|
options = pdf_options(@password)
|
80
88
|
options[:config]['document.auto_decrypt'] = @auto_decrypt
|
81
89
|
HexaPDF::Document.open(file, **options) do |doc|
|
90
|
+
if @check_file
|
91
|
+
indirect_object = nil
|
92
|
+
validation_block = lambda do |msg, correctable, object|
|
93
|
+
object = indirect_object unless object.indirect? || object.type == :XXTrailer
|
94
|
+
object_type = if object.type == :XXTrailer
|
95
|
+
'trailer'
|
96
|
+
elsif !object.type.to_s.start_with?("XX")
|
97
|
+
"object type #{object.type} (#{object.oid},#{object.gen})"
|
98
|
+
else
|
99
|
+
"object (#{object.oid},#{object.gen})"
|
100
|
+
end
|
101
|
+
object_type = "sub-object of #{object_type}" if object == indirect_object
|
102
|
+
puts "WARNING: Validation error for #{object_type}: #{msg} " \
|
103
|
+
"#{correctable ? '(correctable)' : ''}"
|
104
|
+
end
|
105
|
+
doc.trailer.validate(auto_correct: true, &validation_block)
|
106
|
+
doc.each(only_current: false, only_loaded: false) do |obj|
|
107
|
+
indirect_object = obj
|
108
|
+
obj.validate(auto_correct: true, &validation_block)
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
82
112
|
output_line("File name", file)
|
83
|
-
output_line("File size", File.stat(file).size.to_s
|
113
|
+
output_line("File size", File.stat(file).size.to_s << " bytes")
|
84
114
|
@auto_decrypt && INFO_KEYS.each do |name|
|
85
115
|
next unless doc.trailer.info.key?(name)
|
86
116
|
output_line(name.to_s, doc.trailer.info[name].to_s)
|
@@ -110,10 +140,29 @@ module HexaPDF
|
|
110
140
|
else
|
111
141
|
raise
|
112
142
|
end
|
143
|
+
rescue HexaPDF::MalformedPDFError => e
|
144
|
+
$stderr.puts "Error: PDF file #{file} is damaged and cannot be recovered"
|
145
|
+
$stderr.puts " #{e}"
|
146
|
+
end
|
147
|
+
|
148
|
+
# Use custom options if we are checking the PDF file for errors.
|
149
|
+
def pdf_options(password)
|
150
|
+
if @check_file
|
151
|
+
options = {decryption_opts: {password: password}, config: {}}
|
152
|
+
HexaPDF::GlobalConfiguration['filter.predictor.strict'] = false
|
153
|
+
options[:config]['parser.try_xref_reconstruction'] = true
|
154
|
+
options[:config]['parser.on_correctable_error'] = lambda do |_, msg, pos|
|
155
|
+
puts "WARNING: Parse error at position #{pos}: #{msg}"
|
156
|
+
false
|
157
|
+
end
|
158
|
+
options
|
159
|
+
else
|
160
|
+
super
|
161
|
+
end
|
113
162
|
end
|
114
163
|
|
115
164
|
def output_line(header, text) #:nodoc:
|
116
|
-
puts((header
|
165
|
+
puts(("#{header}:").ljust(COLUMN_WIDTH) << text)
|
117
166
|
end
|
118
167
|
|
119
168
|
end
|
data/lib/hexapdf/cli/inspect.rb
CHANGED
@@ -122,22 +122,22 @@ module HexaPDF
|
|
122
122
|
case command
|
123
123
|
when /^\d+(,\d+)?$/, 'o', 'object'
|
124
124
|
arg = (command.start_with?('o') ? data.shift : command)
|
125
|
-
obj = pdf_object_from_string_reference(arg) rescue puts($!.message)
|
126
|
-
if obj
|
125
|
+
obj = pdf_object_from_string_reference(arg) rescue $stderr.puts($!.message)
|
126
|
+
if obj&.data&.stream && command_parser.verbosity_info?
|
127
127
|
$stderr.puts("Note: Object also has stream data")
|
128
128
|
end
|
129
129
|
serialize(obj.value, recursive: false) if obj
|
130
130
|
|
131
131
|
when 'r', 'recursive'
|
132
132
|
obj = if (obj = data.shift)
|
133
|
-
pdf_object_from_string_reference(obj) rescue puts($!.message)
|
133
|
+
pdf_object_from_string_reference(obj) rescue $stderr.puts($!.message)
|
134
134
|
else
|
135
135
|
@doc.trailer
|
136
136
|
end
|
137
137
|
serialize(obj.value, recursive: true) if obj
|
138
138
|
|
139
139
|
when 's', 'stream', 'raw', 'raw-stream'
|
140
|
-
if (obj = pdf_object_from_string_reference(data.shift) rescue puts($!.message)) &&
|
140
|
+
if (obj = pdf_object_from_string_reference(data.shift) rescue $stderr.puts($!.message)) &&
|
141
141
|
obj.kind_of?(HexaPDF::Stream)
|
142
142
|
source = (command.start_with?('raw') ? obj.stream_source : obj.stream_decoder)
|
143
143
|
while source.alive? && (stream_data = source.resume)
|
@@ -148,7 +148,7 @@ module HexaPDF
|
|
148
148
|
end
|
149
149
|
|
150
150
|
when 'x', 'xref'
|
151
|
-
if (obj = pdf_object_from_string_reference(data.shift) rescue puts($!.message))
|
151
|
+
if (obj = pdf_object_from_string_reference(data.shift) rescue $stderr.puts($!.message))
|
152
152
|
@doc.revisions.reverse_each do |rev|
|
153
153
|
if (xref = rev.xref(obj))
|
154
154
|
puts xref
|
@@ -178,6 +178,26 @@ module HexaPDF
|
|
178
178
|
puts str
|
179
179
|
end
|
180
180
|
|
181
|
+
when 'po', 'ps'
|
182
|
+
page_number_str = data.shift
|
183
|
+
unless page_number_str
|
184
|
+
$stderr.puts("Error: Missing PAGE argument to #{command}")
|
185
|
+
next
|
186
|
+
end
|
187
|
+
page_number = parse_pages_specification(page_number_str, @doc.pages.count).first&.first
|
188
|
+
unless page_number
|
189
|
+
$stderr.puts("Error: Invalid page number #{page_number_str}")
|
190
|
+
next
|
191
|
+
end
|
192
|
+
page = @doc.pages[page_number]
|
193
|
+
if command.start_with?('ps')
|
194
|
+
$stdout.write(page.contents)
|
195
|
+
else
|
196
|
+
puts "#{page.oid} #{page.gen} obj"
|
197
|
+
serialize(page.value, recursive: false)
|
198
|
+
puts "endobj"
|
199
|
+
end
|
200
|
+
|
181
201
|
when 'pc', 'page-count'
|
182
202
|
puts @doc.pages.count
|
183
203
|
|
@@ -217,9 +237,9 @@ module HexaPDF
|
|
217
237
|
if str.nil?
|
218
238
|
raise "Error: Missing argument object identifier OID[,GEN]"
|
219
239
|
elsif !str.match?(/^\d+(,\d+)?$/)
|
220
|
-
raise "Error: Invalid argument: Must be of form OID[,GEN]"
|
240
|
+
raise "Error: Invalid argument: Must be of form OID[,GEN], not '#{str}'"
|
221
241
|
elsif !(obj = @doc.object(pdf_reference_from_string(str)))
|
222
|
-
raise "Error: No object with the given object identifier found"
|
242
|
+
raise "Error: No object with the given object identifier '#{str}' found"
|
223
243
|
else
|
224
244
|
obj
|
225
245
|
end
|
@@ -240,7 +260,7 @@ module HexaPDF
|
|
240
260
|
puts "<<"
|
241
261
|
(recursive ? val.sort : val).each do |k, v|
|
242
262
|
next if v.nil? || (v.respond_to?(:null?) && v.null?)
|
243
|
-
print ' ' * (indent + 1)
|
263
|
+
print '%s%s ' % [' ' * (indent + 1), @serializer.serialize_symbol(k)]
|
244
264
|
serialize(v, recursive: recursive, seen: seen, indent: indent + 1)
|
245
265
|
puts
|
246
266
|
end
|
@@ -283,6 +303,8 @@ module HexaPDF
|
|
283
303
|
["c[atalog]", "Print the catalog dictionary"],
|
284
304
|
["t[railer]", "Print the trailer dictionary"],
|
285
305
|
["p[ages] [RANGE]", "Print information about pages"],
|
306
|
+
["po PAGE", "Print the page object"],
|
307
|
+
["ps PAGE", "Print the content stream of the page"],
|
286
308
|
["pc | page-count", "Print the number of pages"],
|
287
309
|
["search REGEXP", "Print objects matching the pattern"],
|
288
310
|
["h[elp]", "Show the help"],
|
data/lib/hexapdf/cli/merge.rb
CHANGED
@@ -122,7 +122,7 @@ module HexaPDF
|
|
122
122
|
|
123
123
|
# Assemble pages
|
124
124
|
target = (@initial_empty ? HexaPDF::Document.new : @files.first.file)
|
125
|
-
page_tree = target.add(Type: :Pages)
|
125
|
+
page_tree = target.add({Type: :Pages})
|
126
126
|
import_pages(page_tree)
|
127
127
|
target.catalog[:Pages] = page_tree
|
128
128
|
remove_unused_pages(target)
|
data/lib/hexapdf/cli/split.rb
CHANGED
@@ -44,16 +44,28 @@ module HexaPDF
|
|
44
44
|
|
45
45
|
def initialize #:nodoc:
|
46
46
|
super('split', takes_commands: false)
|
47
|
-
short_desc("Split a PDF file
|
47
|
+
short_desc("Split a PDF file")
|
48
48
|
long_desc(<<~EOF)
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
49
|
+
The default strategy is to split a PDF into individual pages, i.e. splitting is done by
|
50
|
+
page number. It is also possible to split by page size where pages with the same page size
|
51
|
+
get put into the same output PDF.
|
52
|
+
|
53
|
+
If no OUTPUT_SPEC is specified, the resulting PDF files are named <PDF>_0001.pdf,
|
54
|
+
<PDF>_0002.pdf, ... when splitting by page number and <PDF>_A4.pdf, <PDF>_Letter.pdf, ...
|
55
|
+
when splitting by page size.
|
56
|
+
|
57
|
+
To specify a custom name, provide the OUTPUT_SPEC argument. It can contain a printf-style
|
58
|
+
format definition like '%04d' to specify the place where the page number should be
|
59
|
+
inserted. In case of splitting by page size, the place of the format defintion is replaced
|
60
|
+
with the name of the page size, e.g. A4 or Letter.
|
53
61
|
|
54
62
|
The optimization and encryption options are applied to each created output file.
|
55
63
|
EOF
|
56
64
|
|
65
|
+
options.on("--strategy STRATEGY", "-s", [:page_number, :page_size], "Defines how the PDF " \
|
66
|
+
"file should be split: page_number or page_size (default: page_number)") do |s|
|
67
|
+
@strategy = s
|
68
|
+
end
|
57
69
|
options.on("--password PASSWORD", "-p", String,
|
58
70
|
"The password for decryption. Use - for reading from standard input.") do |pwd|
|
59
71
|
@password = (pwd == '-' ? read_password : pwd)
|
@@ -62,23 +74,71 @@ module HexaPDF
|
|
62
74
|
define_encryption_options
|
63
75
|
|
64
76
|
@password = nil
|
77
|
+
@strategy = :page_number
|
65
78
|
end
|
66
79
|
|
67
80
|
def execute(pdf, output_spec = pdf.sub(/\.pdf$/i, '_%04d.pdf')) #:nodoc:
|
68
|
-
output_spec = output_spec.sub('%', '%<page>')
|
69
81
|
with_document(pdf, password: @password) do |doc|
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
out.pages.add(out.import(page))
|
75
|
-
apply_encryption_options(out)
|
76
|
-
apply_optimization_options(out)
|
77
|
-
write_document(out, output_file)
|
82
|
+
if @strategy == :page_number
|
83
|
+
split_by_page_number(doc, output_spec)
|
84
|
+
else
|
85
|
+
split_by_page_size(doc, output_spec)
|
78
86
|
end
|
79
87
|
end
|
80
88
|
end
|
81
89
|
|
90
|
+
private
|
91
|
+
|
92
|
+
# Splits the document into individual pages.
|
93
|
+
def split_by_page_number(doc, output_spec)
|
94
|
+
doc.pages.each_with_index do |page, index|
|
95
|
+
output_file = sprintf(output_spec, index + 1)
|
96
|
+
maybe_raise_on_existing_file(output_file)
|
97
|
+
out = HexaPDF::Document.new
|
98
|
+
out.pages.add(out.import(page))
|
99
|
+
apply_encryption_options(out)
|
100
|
+
apply_optimization_options(out)
|
101
|
+
write_document(out, output_file)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
# Splits the document into files based on the page sizes.
|
106
|
+
def split_by_page_size(doc, output_spec)
|
107
|
+
output_spec = output_spec.sub(/%.*?[a-zA-Z]/, '%s')
|
108
|
+
out_files = Hash.new do |hash, key|
|
109
|
+
output_file = sprintf(output_spec, key)
|
110
|
+
maybe_raise_on_existing_file(output_file)
|
111
|
+
out = HexaPDF::Document.new
|
112
|
+
out.config['output_file'] = output_file
|
113
|
+
hash[key] = out
|
114
|
+
end
|
115
|
+
|
116
|
+
doc.pages.each do |page|
|
117
|
+
out = out_files[page_size_name(page.box(:media).value)]
|
118
|
+
out.pages.add(out.import(page))
|
119
|
+
end
|
120
|
+
|
121
|
+
out_files.each_value do |out|
|
122
|
+
apply_encryption_options(out)
|
123
|
+
apply_optimization_options(out)
|
124
|
+
write_document(out, out.config['output_file'])
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
# Tries to retrieve a page size name based on the media box. If this is not possible, the
|
129
|
+
# returned page size name consists of width x height.
|
130
|
+
def page_size_name(media_box)
|
131
|
+
@page_name_cache ||= {}
|
132
|
+
return @page_name_cache[media_box] if @page_name_cache.key?(media_box)
|
133
|
+
|
134
|
+
paper_size = HexaPDF::Type::Page::PAPER_SIZE.find do |_name, box|
|
135
|
+
box.each_with_index.all? {|entry, index| (entry - media_box[index]).abs < 5 }
|
136
|
+
end
|
137
|
+
|
138
|
+
@page_name_cache[media_box] =
|
139
|
+
paper_size ? paper_size[0] : "%.0fx%.0f" % media_box.values_at(2, 3)
|
140
|
+
end
|
141
|
+
|
82
142
|
end
|
83
143
|
|
84
144
|
end
|