hexapdf 0.12.3 → 0.14.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +132 -0
- data/examples/019-acro_form.rb +41 -4
- data/lib/hexapdf/cli/command.rb +4 -2
- data/lib/hexapdf/cli/image2pdf.rb +2 -1
- data/lib/hexapdf/cli/info.rb +51 -2
- data/lib/hexapdf/cli/inspect.rb +30 -8
- data/lib/hexapdf/cli/merge.rb +1 -1
- data/lib/hexapdf/cli/split.rb +74 -14
- data/lib/hexapdf/configuration.rb +15 -0
- data/lib/hexapdf/content/graphic_object/arc.rb +3 -3
- data/lib/hexapdf/dictionary.rb +12 -6
- data/lib/hexapdf/dictionary_fields.rb +2 -10
- data/lib/hexapdf/document.rb +41 -16
- data/lib/hexapdf/document/files.rb +0 -1
- data/lib/hexapdf/encryption/fast_arc4.rb +1 -1
- data/lib/hexapdf/encryption/security_handler.rb +1 -0
- data/lib/hexapdf/encryption/standard_security_handler.rb +1 -0
- data/lib/hexapdf/font/cmap.rb +1 -4
- data/lib/hexapdf/font/true_type/subsetter.rb +16 -3
- data/lib/hexapdf/font/true_type/table/head.rb +1 -0
- data/lib/hexapdf/font/true_type/table/os2.rb +2 -0
- data/lib/hexapdf/font/true_type/table/post.rb +15 -10
- data/lib/hexapdf/font_loader/from_configuration.rb +2 -2
- data/lib/hexapdf/font_loader/from_file.rb +18 -8
- data/lib/hexapdf/image_loader/png.rb +3 -2
- data/lib/hexapdf/importer.rb +3 -2
- data/lib/hexapdf/layout/line.rb +1 -1
- data/lib/hexapdf/layout/style.rb +23 -23
- data/lib/hexapdf/layout/text_layouter.rb +2 -2
- data/lib/hexapdf/layout/text_shaper.rb +3 -2
- data/lib/hexapdf/object.rb +52 -25
- data/lib/hexapdf/parser.rb +107 -7
- data/lib/hexapdf/pdf_array.rb +15 -5
- data/lib/hexapdf/revisions.rb +29 -21
- data/lib/hexapdf/serializer.rb +37 -10
- data/lib/hexapdf/task/optimize.rb +6 -4
- data/lib/hexapdf/tokenizer.rb +22 -0
- data/lib/hexapdf/type/acro_form/appearance_generator.rb +130 -27
- data/lib/hexapdf/type/acro_form/button_field.rb +5 -2
- data/lib/hexapdf/type/acro_form/choice_field.rb +68 -14
- data/lib/hexapdf/type/acro_form/field.rb +35 -5
- data/lib/hexapdf/type/acro_form/form.rb +139 -14
- data/lib/hexapdf/type/acro_form/text_field.rb +70 -4
- data/lib/hexapdf/type/actions/uri.rb +3 -2
- data/lib/hexapdf/type/annotations/widget.rb +3 -4
- data/lib/hexapdf/type/catalog.rb +2 -2
- data/lib/hexapdf/type/cid_font.rb +1 -1
- data/lib/hexapdf/type/file_specification.rb +1 -1
- data/lib/hexapdf/type/font.rb +1 -1
- data/lib/hexapdf/type/font_simple.rb +4 -2
- data/lib/hexapdf/type/font_true_type.rb +6 -2
- data/lib/hexapdf/type/font_type0.rb +4 -4
- data/lib/hexapdf/type/form.rb +6 -2
- data/lib/hexapdf/type/image.rb +2 -2
- data/lib/hexapdf/type/page.rb +21 -12
- data/lib/hexapdf/type/page_tree_node.rb +29 -5
- data/lib/hexapdf/type/resources.rb +5 -0
- data/lib/hexapdf/type/trailer.rb +2 -3
- data/lib/hexapdf/utils/object_hash.rb +0 -1
- data/lib/hexapdf/utils/sorted_tree_node.rb +18 -15
- data/lib/hexapdf/version.rb +1 -1
- data/test/hexapdf/common_tokenizer_tests.rb +2 -2
- data/test/hexapdf/content/graphic_object/test_arc.rb +4 -4
- data/test/hexapdf/content/test_canvas.rb +3 -3
- data/test/hexapdf/content/test_color_space.rb +1 -1
- data/test/hexapdf/encryption/test_aes.rb +4 -4
- data/test/hexapdf/encryption/test_standard_security_handler.rb +11 -11
- data/test/hexapdf/filter/test_ascii85_decode.rb +1 -1
- data/test/hexapdf/filter/test_ascii_hex_decode.rb +1 -1
- data/test/hexapdf/font/true_type/table/test_post.rb +1 -1
- data/test/hexapdf/font/true_type/test_subsetter.rb +10 -0
- data/test/hexapdf/font_loader/test_from_configuration.rb +7 -3
- data/test/hexapdf/font_loader/test_from_file.rb +7 -0
- data/test/hexapdf/layout/test_text_layouter.rb +12 -5
- data/test/hexapdf/test_configuration.rb +2 -2
- data/test/hexapdf/test_dictionary.rb +8 -1
- data/test/hexapdf/test_dictionary_fields.rb +9 -2
- data/test/hexapdf/test_document.rb +18 -10
- data/test/hexapdf/test_object.rb +71 -26
- data/test/hexapdf/test_parser.rb +205 -51
- data/test/hexapdf/test_pdf_array.rb +8 -1
- data/test/hexapdf/test_revisions.rb +35 -0
- data/test/hexapdf/test_serializer.rb +7 -0
- data/test/hexapdf/test_tokenizer.rb +28 -0
- data/test/hexapdf/test_writer.rb +2 -2
- data/test/hexapdf/type/acro_form/test_appearance_generator.rb +288 -35
- data/test/hexapdf/type/acro_form/test_button_field.rb +15 -0
- data/test/hexapdf/type/acro_form/test_choice_field.rb +92 -9
- data/test/hexapdf/type/acro_form/test_field.rb +39 -0
- data/test/hexapdf/type/acro_form/test_form.rb +87 -15
- data/test/hexapdf/type/acro_form/test_text_field.rb +77 -1
- data/test/hexapdf/type/test_font_simple.rb +2 -1
- data/test/hexapdf/type/test_font_true_type.rb +6 -0
- data/test/hexapdf/type/test_form.rb +8 -1
- data/test/hexapdf/type/test_page.rb +8 -1
- data/test/hexapdf/type/test_page_tree_node.rb +42 -0
- data/test/hexapdf/type/test_resources.rb +6 -0
- data/test/hexapdf/utils/test_bit_field.rb +2 -0
- data/test/hexapdf/utils/test_object_hash.rb +5 -0
- data/test/hexapdf/utils/test_sorted_tree_node.rb +10 -9
- data/test/test_helper.rb +2 -0
- metadata +6 -12
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: c43d8e9e117db1717ddfee73a54e4384743b8aa35863ab5bd19ffe57b8ce5674
|
|
4
|
+
data.tar.gz: 1020c8a3de8fcdf201500c1c0d22dfb99ed27daebac7baac92748f8127efc992
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: e19eea4e88077afb7e8532fa6fe9ab2a03ffc5588749b72277462a971ebcec877ee72868d0ab698744117d46566be98e65c10225649d3bd1b4cd6e64e9625767
|
|
7
|
+
data.tar.gz: 6626a9feba0af0b46f293c1069a0d53b458a0dc29d08b82253f14f9bb98a878b914042faccc433b73f2f0e35d4da47c58a1bdebd2f3dee2fefb24c076a4e6bb3
|
data/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,135 @@
|
|
|
1
|
+
## 0.14.3 - 2021-02-16
|
|
2
|
+
|
|
3
|
+
### Fixed
|
|
4
|
+
|
|
5
|
+
* Bug in [HexaPDF::Font::TrueType::Subsetter#use_glyph] which lead to corrupt
|
|
6
|
+
text output
|
|
7
|
+
* [HexaPDF::Serializer] to handle infinite recursion problem
|
|
8
|
+
* Cross-reference table reconstruction to avoid an O(n^2) performance problem
|
|
9
|
+
* [HexaPDF::Type::Resources] validation to handle an invalid `/ProcSet` entry
|
|
10
|
+
containing a single value instead of an array
|
|
11
|
+
* Processing of invalid PDF files missing a required value in appearance streams
|
|
12
|
+
* Processing of invalid empty arrays that should be rectangles by converting
|
|
13
|
+
them to PDF null objects
|
|
14
|
+
* Processing of invalid PDF files containing indirect objects with offset 0
|
|
15
|
+
* Processing of invalid PDF files containing a space/CR or space/LF combination
|
|
16
|
+
after the 'stream' keyword
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
## 0.14.2 - 2021-01-22
|
|
20
|
+
|
|
21
|
+
### Fixed
|
|
22
|
+
|
|
23
|
+
* [HexaPDF::Font::TrueType::Subsetter#use_glyph] to really avoid using subset
|
|
24
|
+
glyph ID 41 (`)`)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
## 0.14.1 - 2021-01-21
|
|
28
|
+
|
|
29
|
+
### Changed
|
|
30
|
+
|
|
31
|
+
* Validation message when checking for allowed values to include the invalid
|
|
32
|
+
object
|
|
33
|
+
* [HexaPDF::FontLoader::FromFile] to allow (re)using an existing font object
|
|
34
|
+
* [HexaPDF::Importer] internals to avoid problems with retained memory
|
|
35
|
+
|
|
36
|
+
### Fixed
|
|
37
|
+
|
|
38
|
+
* Parsing of invalid PDF files where whitespace is missing after the integer
|
|
39
|
+
value of an indirect object
|
|
40
|
+
* [HexaPDF::Dictionary] so that adding new key-value pairs during validation is
|
|
41
|
+
possible
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
## 0.14.0 - 2020-12-30
|
|
45
|
+
|
|
46
|
+
### Added
|
|
47
|
+
|
|
48
|
+
* Support for creating AcroForm multiline text fields and their appearances
|
|
49
|
+
* Support for creating AcroForm comb text fields and their appearances
|
|
50
|
+
* Support for creating AcroForm password fields and their appearances
|
|
51
|
+
* Support for creating AcroForm file select fields and their appearances
|
|
52
|
+
* Support for creating AcroForm list box appearances
|
|
53
|
+
* [HexaPDF::Type::AcroForm::ChoiceField#list_box_top_index] and its setter
|
|
54
|
+
method
|
|
55
|
+
* [HexaPDF::Type::AcroForm::ChoiceField#update_widgets] to create appearances if
|
|
56
|
+
they don't exist
|
|
57
|
+
* Methods for caching data to [HexaPDF::Object]
|
|
58
|
+
* Support for splitting by page size to CLI command `hexapdf split`
|
|
59
|
+
|
|
60
|
+
### Changed
|
|
61
|
+
|
|
62
|
+
* [HexaPDF::Utils::ObjectHash#oids] to be public instead of private
|
|
63
|
+
* Cross-reference table parsing to handle invalidly numbered main sections
|
|
64
|
+
* [HexaPDF::Document#cache] and [HexaPDF::Object#cache] to allow updating
|
|
65
|
+
values for existing keys
|
|
66
|
+
* Appearance creation methods of AcroForm objects to allow forcing the creation
|
|
67
|
+
of new appearances
|
|
68
|
+
* [HexaPDF::Type::AcroForm::AppearanceGenerator#create_text_appearances] to
|
|
69
|
+
re-use existing form objects
|
|
70
|
+
* AcroForm field creation methods to allow specifying often used field
|
|
71
|
+
properties
|
|
72
|
+
|
|
73
|
+
### Fixed
|
|
74
|
+
|
|
75
|
+
* Missing usage of `:sort` flag for AcroForm choice fields
|
|
76
|
+
* Setting the `/I` field for AcroForm list boxes with multiple selection
|
|
77
|
+
* [HexaPDF::Layout::TextLayouter::SimpleLineWrapping] to remove glue items
|
|
78
|
+
(whitespace) before a hard line break
|
|
79
|
+
* Infinite loop when reconstructing the cross-reference table
|
|
80
|
+
* [HexaPDF::Type::AcroForm::ChoiceField] to support export values for option
|
|
81
|
+
items
|
|
82
|
+
* AcroForm text field appearance creation to only create a new appearance if the
|
|
83
|
+
field's value has changed
|
|
84
|
+
* AcroForm choice field appearance creation to only create a new appearance if
|
|
85
|
+
the involved dictionary fields' values have changed
|
|
86
|
+
* [HexaPDF::Type::AcroForm::ChoiceField#list_box_top_index=] to raise an error
|
|
87
|
+
if no option items are set
|
|
88
|
+
* [HexaPDF::PDFArray#to_ary] to return an array with preprocessed values
|
|
89
|
+
* [HexaPDF::Type::Form#contents=] to clear cached values to avoid returning e.g.
|
|
90
|
+
an invalid canvas object later
|
|
91
|
+
* [HexaPDF::Type::AcroForm::ButtonField#update_widgets] to create appearances if
|
|
92
|
+
they don't exist
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
## 0.13.0 - 2020-11-15
|
|
96
|
+
|
|
97
|
+
### Added
|
|
98
|
+
|
|
99
|
+
* Cross-reference table reconstruction for damaged PDFs, controllable via the
|
|
100
|
+
new 'parser.try_xref_reconstruction' option
|
|
101
|
+
* Two new `hexapdf inspect` commands for showing page objects and page content
|
|
102
|
+
streams by page number
|
|
103
|
+
* Flag `--check` to the CLI command `hexapdf info` for checking a file for
|
|
104
|
+
parse and validation errors
|
|
105
|
+
* [HexaPDF::Type::AcroForm::Field#embedded_widget?] for checking if a widget is
|
|
106
|
+
embedded in the field object
|
|
107
|
+
* [HexaPDF::Type::AcroForm::Field#delete_widget] for deleting a widget
|
|
108
|
+
* [HexaPDF::PDFArray#delete] for deleting an object from a PDF array
|
|
109
|
+
* [HexaPDF::Type::Page#ancestor_nodes] for retrieving all ancestor page tree
|
|
110
|
+
nodes of a page
|
|
111
|
+
* [HexaPDF::Type::PageTreeNode#move_page] for moving a page to another index
|
|
112
|
+
|
|
113
|
+
### Changed
|
|
114
|
+
|
|
115
|
+
* **Breaking change**: Overhauled document/object validation interfaces and
|
|
116
|
+
internals to be more similar and to allow for reporting of multiple validation
|
|
117
|
+
problems
|
|
118
|
+
* Validation of TrueType fonts to ignore missing fields if the font name
|
|
119
|
+
suggests that the font is one of the standard 14 PDF fonts
|
|
120
|
+
* Option `-p` of CLI command `hexapdf image2pdf` to also allow lowercase page
|
|
121
|
+
size names
|
|
122
|
+
|
|
123
|
+
### Fixed
|
|
124
|
+
|
|
125
|
+
* Reporting of cross-reference section entry parsing error
|
|
126
|
+
* PDF version used by default for dictionary fields
|
|
127
|
+
* Error in CLI command `hexapdf inspect` when parsing an invalid object number
|
|
128
|
+
* Output of error messages in CLI command `hexapdf inspect` to go to `$stderr`
|
|
129
|
+
* Bug in [HexaPDF::Type::AcroForm::TextField] validation due to missing nil
|
|
130
|
+
handling
|
|
131
|
+
|
|
132
|
+
|
|
1
133
|
## 0.12.3 - 2020-08-22
|
|
2
134
|
|
|
3
135
|
### Changed
|
data/examples/019-acro_form.rb
CHANGED
|
@@ -42,10 +42,47 @@ rb = form.create_radio_button("Radio")
|
|
|
42
42
|
end
|
|
43
43
|
rb.field_value = :button0
|
|
44
44
|
|
|
45
|
-
canvas.text("Text
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
tx.
|
|
45
|
+
canvas.text("Text fields", at: [50, 450])
|
|
46
|
+
|
|
47
|
+
canvas.text("Single line", at: [70, 420])
|
|
48
|
+
tx = form.create_text_field("Single Line", font_size: 16)
|
|
49
|
+
widget = tx.create_widget(page, Rect: [200, 415, 500, 435])
|
|
49
50
|
tx.field_value = "A sample test string!"
|
|
50
51
|
|
|
52
|
+
canvas.text("Multiline", at: [70, 390])
|
|
53
|
+
tx = form.create_multiline_text_field("Multiline", font_size: 0, align: :right)
|
|
54
|
+
widget = tx.create_widget(page, Rect: [200, 325, 500, 405])
|
|
55
|
+
widget.border_style(color: 0, width: 1)
|
|
56
|
+
tx.field_value = "A sample test string! " * 30 + "\nNew line\n\nAnother line"
|
|
57
|
+
|
|
58
|
+
canvas.text("Password", at: [70, 300])
|
|
59
|
+
tx = form.create_password_field("Password", font_size: 16)
|
|
60
|
+
widget = tx.create_widget(page, Rect: [200, 295, 500, 315])
|
|
61
|
+
|
|
62
|
+
canvas.text("File select", at: [70, 270])
|
|
63
|
+
tx = form.create_file_select_field("File Select", font_size: 16)
|
|
64
|
+
widget = tx.create_widget(page, Rect: [200, 265, 500, 285])
|
|
65
|
+
tx.field_value = "path/to/file.pdf"
|
|
66
|
+
|
|
67
|
+
canvas.text("Comb", at: [70, 240])
|
|
68
|
+
tx = form.create_comb_text_field("Comb field", max_chars: 10, font_size: 16, align: :center)
|
|
69
|
+
widget = tx.create_widget(page, Rect: [200, 220, 500, 255])
|
|
70
|
+
widget.border_style(color: [30, 128, 0], width: 1)
|
|
71
|
+
tx.field_value = 'Hello'
|
|
72
|
+
|
|
73
|
+
canvas.text("Combo Box", at: [50, 170])
|
|
74
|
+
cb = form.create_combo_box("Combo Box", font_size: 12, editable: true,
|
|
75
|
+
option_items: ['Value 1', 'Another value', 'Choose me!'])
|
|
76
|
+
widget = cb.create_widget(page, Rect: [200, 150, 500, 185])
|
|
77
|
+
widget.border_style(width: 1)
|
|
78
|
+
cb.field_value = 'Another value'
|
|
79
|
+
|
|
80
|
+
canvas.text("List Box", at: [50, 120])
|
|
81
|
+
lb = form.create_list_box("List Box", font_size: 15, align: :center, multi_select: true,
|
|
82
|
+
option_items: 1.upto(7).map {|i| "Value #{i}" })
|
|
83
|
+
widget = lb.create_widget(page, Rect: [200, 50, 500, 135])
|
|
84
|
+
widget.border_style(width: 1)
|
|
85
|
+
lb.list_box_top_index = 1
|
|
86
|
+
lb.field_value = ['Value 6', 'Value 2']
|
|
87
|
+
|
|
51
88
|
doc.write('acro_form.pdf', optimize: true)
|
data/lib/hexapdf/cli/command.rb
CHANGED
|
@@ -100,6 +100,7 @@ module HexaPDF
|
|
|
100
100
|
def pdf_options(password)
|
|
101
101
|
hash = {decryption_opts: {password: password}, config: {}}
|
|
102
102
|
HexaPDF::GlobalConfiguration['filter.predictor.strict'] = command_parser.strict
|
|
103
|
+
hash[:config]['parser.try_xref_reconstruction'] = !command_parser.strict
|
|
103
104
|
hash[:config]['parser.on_correctable_error'] =
|
|
104
105
|
if command_parser.strict
|
|
105
106
|
proc { true }
|
|
@@ -277,14 +278,15 @@ module HexaPDF
|
|
|
277
278
|
#
|
|
278
279
|
# See: #define_encryption_options
|
|
279
280
|
def apply_encryption_options(doc)
|
|
280
|
-
|
|
281
|
+
case @out_options.encryption
|
|
282
|
+
when :add
|
|
281
283
|
doc.encrypt(algorithm: @out_options.enc_algorithm,
|
|
282
284
|
key_length: @out_options.enc_key_length,
|
|
283
285
|
force_v4: @out_options.enc_force_v4,
|
|
284
286
|
permissions: @out_options.enc_permissions,
|
|
285
287
|
owner_password: @out_options.enc_owner_pwd,
|
|
286
288
|
user_password: @out_options.enc_user_pwd)
|
|
287
|
-
|
|
289
|
+
when :remove
|
|
288
290
|
doc.encrypt(name: nil)
|
|
289
291
|
end
|
|
290
292
|
end
|
|
@@ -64,7 +64,8 @@ module HexaPDF
|
|
|
64
64
|
orientation = :landscape
|
|
65
65
|
page_size.delete_suffix!('-landscape')
|
|
66
66
|
end
|
|
67
|
-
|
|
67
|
+
page_size = page_size.capitalize.to_sym
|
|
68
|
+
HexaPDF::Type::Page.media_box(page_size, orientation: orientation)
|
|
68
69
|
end
|
|
69
70
|
end
|
|
70
71
|
options.on("--[no-]auto-rotate", "Automatically rotate pages based on image dimesions. " \
|
data/lib/hexapdf/cli/info.rb
CHANGED
|
@@ -55,13 +55,21 @@ module HexaPDF
|
|
|
55
55
|
long_desc(<<~EOF)
|
|
56
56
|
This command extracts information from the Info dictionary of a PDF file as well
|
|
57
57
|
as some other useful information like the used PDF version and encryption information.
|
|
58
|
+
|
|
59
|
+
If the --check option is specified, the PDF file will also be checked for parse and
|
|
60
|
+
validation errors. And if the process doesn't abort, HexaPDF is still able to handle the
|
|
61
|
+
file by correcting the errors.
|
|
58
62
|
EOF
|
|
63
|
+
options.on("--check", "-c", "Check the PDF file for parse errors and validity") do |check|
|
|
64
|
+
@check_file = check
|
|
65
|
+
end
|
|
59
66
|
options.on("--password PASSWORD", "-p", String,
|
|
60
67
|
"The password for decryption. Use - for reading from standard input.") do |pwd|
|
|
61
68
|
@password = (pwd == '-' ? read_password : pwd)
|
|
62
69
|
end
|
|
63
70
|
@password = nil
|
|
64
71
|
@auto_decrypt = true
|
|
72
|
+
@check_file = false
|
|
65
73
|
end
|
|
66
74
|
|
|
67
75
|
def execute(file) #:nodoc:
|
|
@@ -79,8 +87,30 @@ module HexaPDF
|
|
|
79
87
|
options = pdf_options(@password)
|
|
80
88
|
options[:config]['document.auto_decrypt'] = @auto_decrypt
|
|
81
89
|
HexaPDF::Document.open(file, **options) do |doc|
|
|
90
|
+
if @check_file
|
|
91
|
+
indirect_object = nil
|
|
92
|
+
validation_block = lambda do |msg, correctable, object|
|
|
93
|
+
object = indirect_object unless object.indirect? || object.type == :XXTrailer
|
|
94
|
+
object_type = if object.type == :XXTrailer
|
|
95
|
+
'trailer'
|
|
96
|
+
elsif !object.type.to_s.start_with?("XX")
|
|
97
|
+
"object type #{object.type} (#{object.oid},#{object.gen})"
|
|
98
|
+
else
|
|
99
|
+
"object (#{object.oid},#{object.gen})"
|
|
100
|
+
end
|
|
101
|
+
object_type = "sub-object of #{object_type}" if object == indirect_object
|
|
102
|
+
puts "WARNING: Validation error for #{object_type}: #{msg} " \
|
|
103
|
+
"#{correctable ? '(correctable)' : ''}"
|
|
104
|
+
end
|
|
105
|
+
doc.trailer.validate(auto_correct: true, &validation_block)
|
|
106
|
+
doc.each(only_current: false, only_loaded: false) do |obj|
|
|
107
|
+
indirect_object = obj
|
|
108
|
+
obj.validate(auto_correct: true, &validation_block)
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
|
|
82
112
|
output_line("File name", file)
|
|
83
|
-
output_line("File size", File.stat(file).size.to_s
|
|
113
|
+
output_line("File size", File.stat(file).size.to_s << " bytes")
|
|
84
114
|
@auto_decrypt && INFO_KEYS.each do |name|
|
|
85
115
|
next unless doc.trailer.info.key?(name)
|
|
86
116
|
output_line(name.to_s, doc.trailer.info[name].to_s)
|
|
@@ -110,10 +140,29 @@ module HexaPDF
|
|
|
110
140
|
else
|
|
111
141
|
raise
|
|
112
142
|
end
|
|
143
|
+
rescue HexaPDF::MalformedPDFError => e
|
|
144
|
+
$stderr.puts "Error: PDF file #{file} is damaged and cannot be recovered"
|
|
145
|
+
$stderr.puts " #{e}"
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
# Use custom options if we are checking the PDF file for errors.
|
|
149
|
+
def pdf_options(password)
|
|
150
|
+
if @check_file
|
|
151
|
+
options = {decryption_opts: {password: password}, config: {}}
|
|
152
|
+
HexaPDF::GlobalConfiguration['filter.predictor.strict'] = false
|
|
153
|
+
options[:config]['parser.try_xref_reconstruction'] = true
|
|
154
|
+
options[:config]['parser.on_correctable_error'] = lambda do |_, msg, pos|
|
|
155
|
+
puts "WARNING: Parse error at position #{pos}: #{msg}"
|
|
156
|
+
false
|
|
157
|
+
end
|
|
158
|
+
options
|
|
159
|
+
else
|
|
160
|
+
super
|
|
161
|
+
end
|
|
113
162
|
end
|
|
114
163
|
|
|
115
164
|
def output_line(header, text) #:nodoc:
|
|
116
|
-
puts((header
|
|
165
|
+
puts(("#{header}:").ljust(COLUMN_WIDTH) << text)
|
|
117
166
|
end
|
|
118
167
|
|
|
119
168
|
end
|
data/lib/hexapdf/cli/inspect.rb
CHANGED
|
@@ -122,22 +122,22 @@ module HexaPDF
|
|
|
122
122
|
case command
|
|
123
123
|
when /^\d+(,\d+)?$/, 'o', 'object'
|
|
124
124
|
arg = (command.start_with?('o') ? data.shift : command)
|
|
125
|
-
obj = pdf_object_from_string_reference(arg) rescue puts($!.message)
|
|
126
|
-
if obj
|
|
125
|
+
obj = pdf_object_from_string_reference(arg) rescue $stderr.puts($!.message)
|
|
126
|
+
if obj&.data&.stream && command_parser.verbosity_info?
|
|
127
127
|
$stderr.puts("Note: Object also has stream data")
|
|
128
128
|
end
|
|
129
129
|
serialize(obj.value, recursive: false) if obj
|
|
130
130
|
|
|
131
131
|
when 'r', 'recursive'
|
|
132
132
|
obj = if (obj = data.shift)
|
|
133
|
-
pdf_object_from_string_reference(obj) rescue puts($!.message)
|
|
133
|
+
pdf_object_from_string_reference(obj) rescue $stderr.puts($!.message)
|
|
134
134
|
else
|
|
135
135
|
@doc.trailer
|
|
136
136
|
end
|
|
137
137
|
serialize(obj.value, recursive: true) if obj
|
|
138
138
|
|
|
139
139
|
when 's', 'stream', 'raw', 'raw-stream'
|
|
140
|
-
if (obj = pdf_object_from_string_reference(data.shift) rescue puts($!.message)) &&
|
|
140
|
+
if (obj = pdf_object_from_string_reference(data.shift) rescue $stderr.puts($!.message)) &&
|
|
141
141
|
obj.kind_of?(HexaPDF::Stream)
|
|
142
142
|
source = (command.start_with?('raw') ? obj.stream_source : obj.stream_decoder)
|
|
143
143
|
while source.alive? && (stream_data = source.resume)
|
|
@@ -148,7 +148,7 @@ module HexaPDF
|
|
|
148
148
|
end
|
|
149
149
|
|
|
150
150
|
when 'x', 'xref'
|
|
151
|
-
if (obj = pdf_object_from_string_reference(data.shift) rescue puts($!.message))
|
|
151
|
+
if (obj = pdf_object_from_string_reference(data.shift) rescue $stderr.puts($!.message))
|
|
152
152
|
@doc.revisions.reverse_each do |rev|
|
|
153
153
|
if (xref = rev.xref(obj))
|
|
154
154
|
puts xref
|
|
@@ -178,6 +178,26 @@ module HexaPDF
|
|
|
178
178
|
puts str
|
|
179
179
|
end
|
|
180
180
|
|
|
181
|
+
when 'po', 'ps'
|
|
182
|
+
page_number_str = data.shift
|
|
183
|
+
unless page_number_str
|
|
184
|
+
$stderr.puts("Error: Missing PAGE argument to #{command}")
|
|
185
|
+
next
|
|
186
|
+
end
|
|
187
|
+
page_number = parse_pages_specification(page_number_str, @doc.pages.count).first&.first
|
|
188
|
+
unless page_number
|
|
189
|
+
$stderr.puts("Error: Invalid page number #{page_number_str}")
|
|
190
|
+
next
|
|
191
|
+
end
|
|
192
|
+
page = @doc.pages[page_number]
|
|
193
|
+
if command.start_with?('ps')
|
|
194
|
+
$stdout.write(page.contents)
|
|
195
|
+
else
|
|
196
|
+
puts "#{page.oid} #{page.gen} obj"
|
|
197
|
+
serialize(page.value, recursive: false)
|
|
198
|
+
puts "endobj"
|
|
199
|
+
end
|
|
200
|
+
|
|
181
201
|
when 'pc', 'page-count'
|
|
182
202
|
puts @doc.pages.count
|
|
183
203
|
|
|
@@ -217,9 +237,9 @@ module HexaPDF
|
|
|
217
237
|
if str.nil?
|
|
218
238
|
raise "Error: Missing argument object identifier OID[,GEN]"
|
|
219
239
|
elsif !str.match?(/^\d+(,\d+)?$/)
|
|
220
|
-
raise "Error: Invalid argument: Must be of form OID[,GEN]"
|
|
240
|
+
raise "Error: Invalid argument: Must be of form OID[,GEN], not '#{str}'"
|
|
221
241
|
elsif !(obj = @doc.object(pdf_reference_from_string(str)))
|
|
222
|
-
raise "Error: No object with the given object identifier found"
|
|
242
|
+
raise "Error: No object with the given object identifier '#{str}' found"
|
|
223
243
|
else
|
|
224
244
|
obj
|
|
225
245
|
end
|
|
@@ -240,7 +260,7 @@ module HexaPDF
|
|
|
240
260
|
puts "<<"
|
|
241
261
|
(recursive ? val.sort : val).each do |k, v|
|
|
242
262
|
next if v.nil? || (v.respond_to?(:null?) && v.null?)
|
|
243
|
-
print ' ' * (indent + 1)
|
|
263
|
+
print '%s%s ' % [' ' * (indent + 1), @serializer.serialize_symbol(k)]
|
|
244
264
|
serialize(v, recursive: recursive, seen: seen, indent: indent + 1)
|
|
245
265
|
puts
|
|
246
266
|
end
|
|
@@ -283,6 +303,8 @@ module HexaPDF
|
|
|
283
303
|
["c[atalog]", "Print the catalog dictionary"],
|
|
284
304
|
["t[railer]", "Print the trailer dictionary"],
|
|
285
305
|
["p[ages] [RANGE]", "Print information about pages"],
|
|
306
|
+
["po PAGE", "Print the page object"],
|
|
307
|
+
["ps PAGE", "Print the content stream of the page"],
|
|
286
308
|
["pc | page-count", "Print the number of pages"],
|
|
287
309
|
["search REGEXP", "Print objects matching the pattern"],
|
|
288
310
|
["h[elp]", "Show the help"],
|
data/lib/hexapdf/cli/merge.rb
CHANGED
|
@@ -122,7 +122,7 @@ module HexaPDF
|
|
|
122
122
|
|
|
123
123
|
# Assemble pages
|
|
124
124
|
target = (@initial_empty ? HexaPDF::Document.new : @files.first.file)
|
|
125
|
-
page_tree = target.add(Type: :Pages)
|
|
125
|
+
page_tree = target.add({Type: :Pages})
|
|
126
126
|
import_pages(page_tree)
|
|
127
127
|
target.catalog[:Pages] = page_tree
|
|
128
128
|
remove_unused_pages(target)
|
data/lib/hexapdf/cli/split.rb
CHANGED
|
@@ -44,16 +44,28 @@ module HexaPDF
|
|
|
44
44
|
|
|
45
45
|
def initialize #:nodoc:
|
|
46
46
|
super('split', takes_commands: false)
|
|
47
|
-
short_desc("Split a PDF file
|
|
47
|
+
short_desc("Split a PDF file")
|
|
48
48
|
long_desc(<<~EOF)
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
49
|
+
The default strategy is to split a PDF into individual pages, i.e. splitting is done by
|
|
50
|
+
page number. It is also possible to split by page size where pages with the same page size
|
|
51
|
+
get put into the same output PDF.
|
|
52
|
+
|
|
53
|
+
If no OUTPUT_SPEC is specified, the resulting PDF files are named <PDF>_0001.pdf,
|
|
54
|
+
<PDF>_0002.pdf, ... when splitting by page number and <PDF>_A4.pdf, <PDF>_Letter.pdf, ...
|
|
55
|
+
when splitting by page size.
|
|
56
|
+
|
|
57
|
+
To specify a custom name, provide the OUTPUT_SPEC argument. It can contain a printf-style
|
|
58
|
+
format definition like '%04d' to specify the place where the page number should be
|
|
59
|
+
inserted. In case of splitting by page size, the place of the format defintion is replaced
|
|
60
|
+
with the name of the page size, e.g. A4 or Letter.
|
|
53
61
|
|
|
54
62
|
The optimization and encryption options are applied to each created output file.
|
|
55
63
|
EOF
|
|
56
64
|
|
|
65
|
+
options.on("--strategy STRATEGY", "-s", [:page_number, :page_size], "Defines how the PDF " \
|
|
66
|
+
"file should be split: page_number or page_size (default: page_number)") do |s|
|
|
67
|
+
@strategy = s
|
|
68
|
+
end
|
|
57
69
|
options.on("--password PASSWORD", "-p", String,
|
|
58
70
|
"The password for decryption. Use - for reading from standard input.") do |pwd|
|
|
59
71
|
@password = (pwd == '-' ? read_password : pwd)
|
|
@@ -62,23 +74,71 @@ module HexaPDF
|
|
|
62
74
|
define_encryption_options
|
|
63
75
|
|
|
64
76
|
@password = nil
|
|
77
|
+
@strategy = :page_number
|
|
65
78
|
end
|
|
66
79
|
|
|
67
80
|
def execute(pdf, output_spec = pdf.sub(/\.pdf$/i, '_%04d.pdf')) #:nodoc:
|
|
68
|
-
output_spec = output_spec.sub('%', '%<page>')
|
|
69
81
|
with_document(pdf, password: @password) do |doc|
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
out.pages.add(out.import(page))
|
|
75
|
-
apply_encryption_options(out)
|
|
76
|
-
apply_optimization_options(out)
|
|
77
|
-
write_document(out, output_file)
|
|
82
|
+
if @strategy == :page_number
|
|
83
|
+
split_by_page_number(doc, output_spec)
|
|
84
|
+
else
|
|
85
|
+
split_by_page_size(doc, output_spec)
|
|
78
86
|
end
|
|
79
87
|
end
|
|
80
88
|
end
|
|
81
89
|
|
|
90
|
+
private
|
|
91
|
+
|
|
92
|
+
# Splits the document into individual pages.
|
|
93
|
+
def split_by_page_number(doc, output_spec)
|
|
94
|
+
doc.pages.each_with_index do |page, index|
|
|
95
|
+
output_file = sprintf(output_spec, index + 1)
|
|
96
|
+
maybe_raise_on_existing_file(output_file)
|
|
97
|
+
out = HexaPDF::Document.new
|
|
98
|
+
out.pages.add(out.import(page))
|
|
99
|
+
apply_encryption_options(out)
|
|
100
|
+
apply_optimization_options(out)
|
|
101
|
+
write_document(out, output_file)
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# Splits the document into files based on the page sizes.
|
|
106
|
+
def split_by_page_size(doc, output_spec)
|
|
107
|
+
output_spec = output_spec.sub(/%.*?[a-zA-Z]/, '%s')
|
|
108
|
+
out_files = Hash.new do |hash, key|
|
|
109
|
+
output_file = sprintf(output_spec, key)
|
|
110
|
+
maybe_raise_on_existing_file(output_file)
|
|
111
|
+
out = HexaPDF::Document.new
|
|
112
|
+
out.config['output_file'] = output_file
|
|
113
|
+
hash[key] = out
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
doc.pages.each do |page|
|
|
117
|
+
out = out_files[page_size_name(page.box(:media).value)]
|
|
118
|
+
out.pages.add(out.import(page))
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
out_files.each_value do |out|
|
|
122
|
+
apply_encryption_options(out)
|
|
123
|
+
apply_optimization_options(out)
|
|
124
|
+
write_document(out, out.config['output_file'])
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
# Tries to retrieve a page size name based on the media box. If this is not possible, the
|
|
129
|
+
# returned page size name consists of width x height.
|
|
130
|
+
def page_size_name(media_box)
|
|
131
|
+
@page_name_cache ||= {}
|
|
132
|
+
return @page_name_cache[media_box] if @page_name_cache.key?(media_box)
|
|
133
|
+
|
|
134
|
+
paper_size = HexaPDF::Type::Page::PAPER_SIZE.find do |_name, box|
|
|
135
|
+
box.each_with_index.all? {|entry, index| (entry - media_box[index]).abs < 5 }
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
@page_name_cache[media_box] =
|
|
139
|
+
paper_size ? paper_size[0] : "%.0fx%.0f" % media_box.values_at(2, 3)
|
|
140
|
+
end
|
|
141
|
+
|
|
82
142
|
end
|
|
83
143
|
|
|
84
144
|
end
|