hexapdf 0.12.3 → 0.13.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +38 -0
- data/lib/hexapdf/cli/command.rb +4 -2
- data/lib/hexapdf/cli/image2pdf.rb +2 -1
- data/lib/hexapdf/cli/info.rb +51 -2
- data/lib/hexapdf/cli/inspect.rb +30 -8
- data/lib/hexapdf/cli/merge.rb +1 -1
- data/lib/hexapdf/configuration.rb +15 -0
- data/lib/hexapdf/content/graphic_object/arc.rb +3 -3
- data/lib/hexapdf/dictionary.rb +4 -4
- data/lib/hexapdf/dictionary_fields.rb +1 -9
- data/lib/hexapdf/document.rb +31 -12
- data/lib/hexapdf/document/files.rb +0 -1
- data/lib/hexapdf/encryption/fast_arc4.rb +1 -1
- data/lib/hexapdf/encryption/security_handler.rb +1 -0
- data/lib/hexapdf/encryption/standard_security_handler.rb +1 -0
- data/lib/hexapdf/font/cmap.rb +1 -4
- data/lib/hexapdf/font/true_type/table/head.rb +1 -0
- data/lib/hexapdf/font/true_type/table/os2.rb +2 -0
- data/lib/hexapdf/image_loader/png.rb +3 -2
- data/lib/hexapdf/layout/line.rb +1 -1
- data/lib/hexapdf/layout/style.rb +23 -23
- data/lib/hexapdf/layout/text_shaper.rb +3 -2
- data/lib/hexapdf/object.rb +30 -25
- data/lib/hexapdf/parser.rb +65 -3
- data/lib/hexapdf/pdf_array.rb +9 -2
- data/lib/hexapdf/revisions.rb +29 -21
- data/lib/hexapdf/serializer.rb +1 -1
- data/lib/hexapdf/task/optimize.rb +6 -4
- data/lib/hexapdf/type/acro_form/choice_field.rb +4 -4
- data/lib/hexapdf/type/acro_form/field.rb +35 -5
- data/lib/hexapdf/type/acro_form/form.rb +6 -4
- data/lib/hexapdf/type/acro_form/text_field.rb +2 -1
- data/lib/hexapdf/type/actions/uri.rb +3 -2
- data/lib/hexapdf/type/annotations/widget.rb +3 -4
- data/lib/hexapdf/type/catalog.rb +2 -2
- data/lib/hexapdf/type/file_specification.rb +1 -1
- data/lib/hexapdf/type/font_simple.rb +3 -1
- data/lib/hexapdf/type/font_true_type.rb +6 -2
- data/lib/hexapdf/type/font_type0.rb +1 -1
- data/lib/hexapdf/type/form.rb +2 -1
- data/lib/hexapdf/type/image.rb +2 -2
- data/lib/hexapdf/type/page.rb +16 -7
- data/lib/hexapdf/type/page_tree_node.rb +29 -5
- data/lib/hexapdf/type/resources.rb +1 -0
- data/lib/hexapdf/type/trailer.rb +2 -3
- data/lib/hexapdf/utils/sorted_tree_node.rb +18 -15
- data/lib/hexapdf/version.rb +1 -1
- data/test/hexapdf/common_tokenizer_tests.rb +2 -2
- data/test/hexapdf/content/graphic_object/test_arc.rb +4 -4
- data/test/hexapdf/content/test_canvas.rb +3 -3
- data/test/hexapdf/content/test_color_space.rb +1 -1
- data/test/hexapdf/encryption/test_aes.rb +4 -4
- data/test/hexapdf/encryption/test_standard_security_handler.rb +11 -11
- data/test/hexapdf/filter/test_ascii85_decode.rb +1 -1
- data/test/hexapdf/filter/test_ascii_hex_decode.rb +1 -1
- data/test/hexapdf/layout/test_text_layouter.rb +3 -4
- data/test/hexapdf/test_configuration.rb +2 -2
- data/test/hexapdf/test_dictionary.rb +3 -1
- data/test/hexapdf/test_dictionary_fields.rb +2 -2
- data/test/hexapdf/test_document.rb +4 -4
- data/test/hexapdf/test_object.rb +44 -26
- data/test/hexapdf/test_parser.rb +115 -55
- data/test/hexapdf/test_pdf_array.rb +7 -0
- data/test/hexapdf/test_revisions.rb +35 -0
- data/test/hexapdf/test_writer.rb +2 -2
- data/test/hexapdf/type/acro_form/test_appearance_generator.rb +1 -2
- data/test/hexapdf/type/acro_form/test_field.rb +39 -0
- data/test/hexapdf/type/acro_form/test_form.rb +4 -4
- data/test/hexapdf/type/acro_form/test_text_field.rb +2 -0
- data/test/hexapdf/type/test_font_simple.rb +2 -1
- data/test/hexapdf/type/test_font_true_type.rb +6 -0
- data/test/hexapdf/type/test_form.rb +1 -1
- data/test/hexapdf/type/test_page.rb +8 -1
- data/test/hexapdf/type/test_page_tree_node.rb +42 -0
- data/test/hexapdf/utils/test_bit_field.rb +2 -0
- data/test/hexapdf/utils/test_sorted_tree_node.rb +10 -9
- metadata +5 -12
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 77d9895ece62ee8b8df5afb5a44035868c6b33eb7b43a7cb5b85bd730bee56bc
|
4
|
+
data.tar.gz: 16b61502ef5c35df588c6a2fd53e1099b80f584276c07ec7a3c23343705ccb42
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 490fca7cfa535ebfab2af613dacf3ff9e9a6b0b2b76c865ceeb98a45d1cc7b668772dbd3a46ea00a5b82eb1374f36cb42b604754bbb1353e103cde726bc7e886
|
7
|
+
data.tar.gz: 1f85edaa9b2214218bb586d8c5409c9f741893fa0effdb60c64d294be231893ed96f1efa9db286d9a98f094caeb3fb272d318782dc7977adb8b252425d72cfb4
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,41 @@
|
|
1
|
+
## 0.13.0 - 2020-11-15
|
2
|
+
|
3
|
+
### Added
|
4
|
+
|
5
|
+
* Cross-reference table reconstruction for damaged PDFs, controllable via the
|
6
|
+
new 'parser.try_xref_reconstruction' option
|
7
|
+
* Two new `hexapdf inspect` commands for showing page objects and page content
|
8
|
+
streams by page number
|
9
|
+
* Flag `--check` to the CLI command `hexapdf info` for checking a file for
|
10
|
+
parse and validation errors
|
11
|
+
* [HexaPDF::Type::AcroForm::Field#embedded_widget?] for checking if a widget is
|
12
|
+
embedded in the field object
|
13
|
+
* [HexaPDF::Type::AcroForm::Field#delete_widget] for deleting a widget
|
14
|
+
* [HexaPDF::PDFArray#delete] for deleting an object from a PDF array
|
15
|
+
* [HexaPDF::Type::Page#ancestor_nodes] for retrieving all ancestor page tree
|
16
|
+
nodes of a page
|
17
|
+
* [HexaPDF::Type::PageTreeNode#move_page] for moving a page to another index
|
18
|
+
|
19
|
+
### Changed
|
20
|
+
|
21
|
+
* **Breaking change**: Overhauled document/object validation interfaces and
|
22
|
+
internals to be more similar and to allow for reporting of multiple validation
|
23
|
+
problems
|
24
|
+
* Validation of TrueType fonts to ignore missing fields if the font name
|
25
|
+
suggests that the font is one of the standard 14 PDF fonts
|
26
|
+
* Option `-p` of CLI command `hexapdf image2pdf` to also allow lowercase page
|
27
|
+
size names
|
28
|
+
|
29
|
+
### Fixed
|
30
|
+
|
31
|
+
* Reporting of cross-reference section entry parsing error
|
32
|
+
* PDF version used by default for dictionary fields
|
33
|
+
* Error in CLI command `hexapdf inspect` when parsing an invalid object number
|
34
|
+
* Output of error messages in CLI command `hexapdf inspect` to go to `$stderr`
|
35
|
+
* Bug in [HexaPDF::Type::AcroForm::TextField] validation due to missing nil
|
36
|
+
handling
|
37
|
+
|
38
|
+
|
1
39
|
## 0.12.3 - 2020-08-22
|
2
40
|
|
3
41
|
### Changed
|
data/lib/hexapdf/cli/command.rb
CHANGED
@@ -100,6 +100,7 @@ module HexaPDF
|
|
100
100
|
def pdf_options(password)
|
101
101
|
hash = {decryption_opts: {password: password}, config: {}}
|
102
102
|
HexaPDF::GlobalConfiguration['filter.predictor.strict'] = command_parser.strict
|
103
|
+
hash[:config]['parser.try_xref_reconstruction'] = !command_parser.strict
|
103
104
|
hash[:config]['parser.on_correctable_error'] =
|
104
105
|
if command_parser.strict
|
105
106
|
proc { true }
|
@@ -277,14 +278,15 @@ module HexaPDF
|
|
277
278
|
#
|
278
279
|
# See: #define_encryption_options
|
279
280
|
def apply_encryption_options(doc)
|
280
|
-
|
281
|
+
case @out_options.encryption
|
282
|
+
when :add
|
281
283
|
doc.encrypt(algorithm: @out_options.enc_algorithm,
|
282
284
|
key_length: @out_options.enc_key_length,
|
283
285
|
force_v4: @out_options.enc_force_v4,
|
284
286
|
permissions: @out_options.enc_permissions,
|
285
287
|
owner_password: @out_options.enc_owner_pwd,
|
286
288
|
user_password: @out_options.enc_user_pwd)
|
287
|
-
|
289
|
+
when :remove
|
288
290
|
doc.encrypt(name: nil)
|
289
291
|
end
|
290
292
|
end
|
@@ -64,7 +64,8 @@ module HexaPDF
|
|
64
64
|
orientation = :landscape
|
65
65
|
page_size.delete_suffix!('-landscape')
|
66
66
|
end
|
67
|
-
|
67
|
+
page_size = page_size.capitalize.to_sym
|
68
|
+
HexaPDF::Type::Page.media_box(page_size, orientation: orientation)
|
68
69
|
end
|
69
70
|
end
|
70
71
|
options.on("--[no-]auto-rotate", "Automatically rotate pages based on image dimesions. " \
|
data/lib/hexapdf/cli/info.rb
CHANGED
@@ -55,13 +55,21 @@ module HexaPDF
|
|
55
55
|
long_desc(<<~EOF)
|
56
56
|
This command extracts information from the Info dictionary of a PDF file as well
|
57
57
|
as some other useful information like the used PDF version and encryption information.
|
58
|
+
|
59
|
+
If the --check option is specified, the PDF file will also be checked for parse and
|
60
|
+
validation errors. And if the process doesn't abort, HexaPDF is still able to handle the
|
61
|
+
file by correcting the errors.
|
58
62
|
EOF
|
63
|
+
options.on("--check", "-c", "Check the PDF file for parse errors and validity") do |check|
|
64
|
+
@check_file = check
|
65
|
+
end
|
59
66
|
options.on("--password PASSWORD", "-p", String,
|
60
67
|
"The password for decryption. Use - for reading from standard input.") do |pwd|
|
61
68
|
@password = (pwd == '-' ? read_password : pwd)
|
62
69
|
end
|
63
70
|
@password = nil
|
64
71
|
@auto_decrypt = true
|
72
|
+
@check_file = false
|
65
73
|
end
|
66
74
|
|
67
75
|
def execute(file) #:nodoc:
|
@@ -79,8 +87,30 @@ module HexaPDF
|
|
79
87
|
options = pdf_options(@password)
|
80
88
|
options[:config]['document.auto_decrypt'] = @auto_decrypt
|
81
89
|
HexaPDF::Document.open(file, **options) do |doc|
|
90
|
+
if @check_file
|
91
|
+
indirect_object = nil
|
92
|
+
validation_block = lambda do |msg, correctable, object|
|
93
|
+
object = indirect_object unless object.indirect? || object.type == :XXTrailer
|
94
|
+
object_type = if object.type == :XXTrailer
|
95
|
+
'trailer'
|
96
|
+
elsif !object.type.to_s.start_with?("XX")
|
97
|
+
"object type #{object.type} (#{object.oid},#{object.gen})"
|
98
|
+
else
|
99
|
+
"object (#{object.oid},#{object.gen})"
|
100
|
+
end
|
101
|
+
object_type = "sub-object of #{object_type}" if object == indirect_object
|
102
|
+
puts "WARNING: Validation error for #{object_type}: #{msg} " \
|
103
|
+
"#{correctable ? '(correctable)' : ''}"
|
104
|
+
end
|
105
|
+
doc.trailer.validate(auto_correct: true, &validation_block)
|
106
|
+
doc.each(only_current: false, only_loaded: false) do |obj|
|
107
|
+
indirect_object = obj
|
108
|
+
obj.validate(auto_correct: true, &validation_block)
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
82
112
|
output_line("File name", file)
|
83
|
-
output_line("File size", File.stat(file).size.to_s
|
113
|
+
output_line("File size", File.stat(file).size.to_s << " bytes")
|
84
114
|
@auto_decrypt && INFO_KEYS.each do |name|
|
85
115
|
next unless doc.trailer.info.key?(name)
|
86
116
|
output_line(name.to_s, doc.trailer.info[name].to_s)
|
@@ -110,10 +140,29 @@ module HexaPDF
|
|
110
140
|
else
|
111
141
|
raise
|
112
142
|
end
|
143
|
+
rescue HexaPDF::MalformedPDFError => e
|
144
|
+
$stderr.puts "Error: PDF file #{file} is damaged and cannot be recovered"
|
145
|
+
$stderr.puts " #{e}"
|
146
|
+
end
|
147
|
+
|
148
|
+
# Use custom options if we are checking the PDF file for errors.
|
149
|
+
def pdf_options(password)
|
150
|
+
if @check_file
|
151
|
+
options = {decryption_opts: {password: password}, config: {}}
|
152
|
+
HexaPDF::GlobalConfiguration['filter.predictor.strict'] = false
|
153
|
+
options[:config]['parser.try_xref_reconstruction'] = true
|
154
|
+
options[:config]['parser.on_correctable_error'] = lambda do |_, msg, pos|
|
155
|
+
puts "WARNING: Parse error at position #{pos}: #{msg}"
|
156
|
+
false
|
157
|
+
end
|
158
|
+
options
|
159
|
+
else
|
160
|
+
super
|
161
|
+
end
|
113
162
|
end
|
114
163
|
|
115
164
|
def output_line(header, text) #:nodoc:
|
116
|
-
puts((header
|
165
|
+
puts(("#{header}:").ljust(COLUMN_WIDTH) << text)
|
117
166
|
end
|
118
167
|
|
119
168
|
end
|
data/lib/hexapdf/cli/inspect.rb
CHANGED
@@ -122,22 +122,22 @@ module HexaPDF
|
|
122
122
|
case command
|
123
123
|
when /^\d+(,\d+)?$/, 'o', 'object'
|
124
124
|
arg = (command.start_with?('o') ? data.shift : command)
|
125
|
-
obj = pdf_object_from_string_reference(arg) rescue puts($!.message)
|
126
|
-
if obj
|
125
|
+
obj = pdf_object_from_string_reference(arg) rescue $stderr.puts($!.message)
|
126
|
+
if obj&.data&.stream && command_parser.verbosity_info?
|
127
127
|
$stderr.puts("Note: Object also has stream data")
|
128
128
|
end
|
129
129
|
serialize(obj.value, recursive: false) if obj
|
130
130
|
|
131
131
|
when 'r', 'recursive'
|
132
132
|
obj = if (obj = data.shift)
|
133
|
-
pdf_object_from_string_reference(obj) rescue puts($!.message)
|
133
|
+
pdf_object_from_string_reference(obj) rescue $stderr.puts($!.message)
|
134
134
|
else
|
135
135
|
@doc.trailer
|
136
136
|
end
|
137
137
|
serialize(obj.value, recursive: true) if obj
|
138
138
|
|
139
139
|
when 's', 'stream', 'raw', 'raw-stream'
|
140
|
-
if (obj = pdf_object_from_string_reference(data.shift) rescue puts($!.message)) &&
|
140
|
+
if (obj = pdf_object_from_string_reference(data.shift) rescue $stderr.puts($!.message)) &&
|
141
141
|
obj.kind_of?(HexaPDF::Stream)
|
142
142
|
source = (command.start_with?('raw') ? obj.stream_source : obj.stream_decoder)
|
143
143
|
while source.alive? && (stream_data = source.resume)
|
@@ -148,7 +148,7 @@ module HexaPDF
|
|
148
148
|
end
|
149
149
|
|
150
150
|
when 'x', 'xref'
|
151
|
-
if (obj = pdf_object_from_string_reference(data.shift) rescue puts($!.message))
|
151
|
+
if (obj = pdf_object_from_string_reference(data.shift) rescue $stderr.puts($!.message))
|
152
152
|
@doc.revisions.reverse_each do |rev|
|
153
153
|
if (xref = rev.xref(obj))
|
154
154
|
puts xref
|
@@ -178,6 +178,26 @@ module HexaPDF
|
|
178
178
|
puts str
|
179
179
|
end
|
180
180
|
|
181
|
+
when 'po', 'ps'
|
182
|
+
page_number_str = data.shift
|
183
|
+
unless page_number_str
|
184
|
+
$stderr.puts("Error: Missing PAGE argument to #{command}")
|
185
|
+
next
|
186
|
+
end
|
187
|
+
page_number = parse_pages_specification(page_number_str, @doc.pages.count).first&.first
|
188
|
+
unless page_number
|
189
|
+
$stderr.puts("Error: Invalid page number #{page_number_str}")
|
190
|
+
next
|
191
|
+
end
|
192
|
+
page = @doc.pages[page_number]
|
193
|
+
if command.start_with?('ps')
|
194
|
+
$stdout.write(page.contents)
|
195
|
+
else
|
196
|
+
puts "#{page.oid} #{page.gen} obj"
|
197
|
+
serialize(page.value, recursive: false)
|
198
|
+
puts "endobj"
|
199
|
+
end
|
200
|
+
|
181
201
|
when 'pc', 'page-count'
|
182
202
|
puts @doc.pages.count
|
183
203
|
|
@@ -217,9 +237,9 @@ module HexaPDF
|
|
217
237
|
if str.nil?
|
218
238
|
raise "Error: Missing argument object identifier OID[,GEN]"
|
219
239
|
elsif !str.match?(/^\d+(,\d+)?$/)
|
220
|
-
raise "Error: Invalid argument: Must be of form OID[,GEN]"
|
240
|
+
raise "Error: Invalid argument: Must be of form OID[,GEN], not '#{str}'"
|
221
241
|
elsif !(obj = @doc.object(pdf_reference_from_string(str)))
|
222
|
-
raise "Error: No object with the given object identifier found"
|
242
|
+
raise "Error: No object with the given object identifier '#{str}' found"
|
223
243
|
else
|
224
244
|
obj
|
225
245
|
end
|
@@ -240,7 +260,7 @@ module HexaPDF
|
|
240
260
|
puts "<<"
|
241
261
|
(recursive ? val.sort : val).each do |k, v|
|
242
262
|
next if v.nil? || (v.respond_to?(:null?) && v.null?)
|
243
|
-
print ' ' * (indent + 1)
|
263
|
+
print '%s%s ' % [' ' * (indent + 1), @serializer.serialize_symbol(k)]
|
244
264
|
serialize(v, recursive: recursive, seen: seen, indent: indent + 1)
|
245
265
|
puts
|
246
266
|
end
|
@@ -283,6 +303,8 @@ module HexaPDF
|
|
283
303
|
["c[atalog]", "Print the catalog dictionary"],
|
284
304
|
["t[railer]", "Print the trailer dictionary"],
|
285
305
|
["p[ages] [RANGE]", "Print information about pages"],
|
306
|
+
["po PAGE", "Print the page object"],
|
307
|
+
["ps PAGE", "Print the content stream of the page"],
|
286
308
|
["pc | page-count", "Print the number of pages"],
|
287
309
|
["search REGEXP", "Print objects matching the pattern"],
|
288
310
|
["h[elp]", "Show the help"],
|
data/lib/hexapdf/cli/merge.rb
CHANGED
@@ -122,7 +122,7 @@ module HexaPDF
|
|
122
122
|
|
123
123
|
# Assemble pages
|
124
124
|
target = (@initial_empty ? HexaPDF::Document.new : @files.first.file)
|
125
|
-
page_tree = target.add(Type: :Pages)
|
125
|
+
page_tree = target.add({Type: :Pages})
|
126
126
|
import_pages(page_tree)
|
127
127
|
target.catalog[:Pages] = page_tree
|
128
128
|
remove_unused_pages(target)
|
@@ -334,6 +334,20 @@ module HexaPDF
|
|
334
334
|
# The value needs to be an object that responds to \#call(document, message, position) and
|
335
335
|
# returns +true+ if an error should be raised.
|
336
336
|
#
|
337
|
+
# parser.try_xref_reconstruction::
|
338
|
+
# A boolean specifying whether non-recoverable parsing errors should lead to reconstructing the
|
339
|
+
# main cross-reference table.
|
340
|
+
#
|
341
|
+
# The reconstructed cross-reference table might make damaged files usable but there is no way
|
342
|
+
# to ensure that the reconstructed file is equal to the undamaged original file (though
|
343
|
+
# generally it works out).
|
344
|
+
#
|
345
|
+
# There is also the possibility that reconstructing doesn't work because the algorithm has to
|
346
|
+
# assume that the PDF was written in a certain way (which is recommended by the PDF
|
347
|
+
# specification).
|
348
|
+
#
|
349
|
+
# Defaults to +true+.
|
350
|
+
#
|
337
351
|
# sorted_tree.max_leaf_node_size::
|
338
352
|
# The maximum number of nodes that should be in a leaf node of a node tree.
|
339
353
|
#
|
@@ -412,6 +426,7 @@ module HexaPDF
|
|
412
426
|
'page.default_media_box' => :A4,
|
413
427
|
'page.default_media_orientation' => :portrait,
|
414
428
|
'parser.on_correctable_error' => proc { false },
|
429
|
+
'parser.try_xref_reconstruction' => true,
|
415
430
|
'sorted_tree.max_leaf_node_size' => 64,
|
416
431
|
'style.layers_map' => {
|
417
432
|
link: 'HexaPDF::Layout::Style::LinkLayer',
|
@@ -45,7 +45,7 @@ module HexaPDF
|
|
45
45
|
# all either in clockwise or counterclockwise direction and optionally inclined in respect to
|
46
46
|
# the x-axis.
|
47
47
|
#
|
48
|
-
# See: ELL - https://
|
48
|
+
# See: ELL - https://spaceroots.org/documents/ellipse/elliptical-arc.pdf
|
49
49
|
class Arc
|
50
50
|
|
51
51
|
include HexaPDF::Utils::MathHelpers
|
@@ -202,8 +202,8 @@ module HexaPDF
|
|
202
202
|
p2x_prime, p2y_prime = derivative_evaluate(eta2)
|
203
203
|
|
204
204
|
result << [p2x, p2y,
|
205
|
-
p1: [p1x + alpha * p1x_prime, p1y + alpha * p1y_prime],
|
206
|
-
|
205
|
+
{p1: [p1x + alpha * p1x_prime, p1y + alpha * p1y_prime],
|
206
|
+
p2: [p2x - alpha * p2x_prime, p2y - alpha * p2y_prime]}]
|
207
207
|
end
|
208
208
|
|
209
209
|
result
|
data/lib/hexapdf/dictionary.rb
CHANGED
@@ -97,7 +97,7 @@ module HexaPDF
|
|
97
97
|
#
|
98
98
|
# version:: Specifies the minimum version of the PDF specification needed for this value.
|
99
99
|
def self.define_field(name, type:, required: false, default: nil, indirect: nil,
|
100
|
-
allowed_values: nil, version: '1.
|
100
|
+
allowed_values: nil, version: '1.0')
|
101
101
|
@fields ||= {}
|
102
102
|
@fields[name] = Field.new(type, required: required, default: default, indirect: indirect,
|
103
103
|
allowed_values: allowed_values, version: version)
|
@@ -163,7 +163,7 @@ module HexaPDF
|
|
163
163
|
value[name] = field.default
|
164
164
|
end
|
165
165
|
value[name] = data = document.deref(data) if data.kind_of?(HexaPDF::Reference)
|
166
|
-
if data.
|
166
|
+
if data.instance_of?(HexaPDF::Object) || (data.kind_of?(HexaPDF::Object) && data.value.nil?)
|
167
167
|
data = data.value
|
168
168
|
end
|
169
169
|
if (result = field&.convert(data, document))
|
@@ -182,7 +182,7 @@ module HexaPDF
|
|
182
182
|
raise ArgumentError, "Only Symbol (Name) keys are allowed to be used in PDF dictionaries"
|
183
183
|
end
|
184
184
|
|
185
|
-
if value[name].
|
185
|
+
if value[name].instance_of?(HexaPDF::Object) && !data.kind_of?(HexaPDF::Object) &&
|
186
186
|
!data.kind_of?(HexaPDF::Reference)
|
187
187
|
value[name].value = data
|
188
188
|
else
|
@@ -273,7 +273,7 @@ module HexaPDF
|
|
273
273
|
# Check that required fields are set
|
274
274
|
if field.required? && obj.nil?
|
275
275
|
yield("Required field #{name} is not set", field.default?)
|
276
|
-
self[name] = obj = field.default
|
276
|
+
self[name] = obj = field.default if field.default?
|
277
277
|
end
|
278
278
|
|
279
279
|
# Check if the document version is set high enough
|
@@ -151,17 +151,9 @@ module HexaPDF
|
|
151
151
|
# Returns a duplicated default value, automatically taking unduplicatable classes into
|
152
152
|
# account.
|
153
153
|
def default
|
154
|
-
|
154
|
+
@default.dup
|
155
155
|
end
|
156
156
|
|
157
|
-
# Returns +true+ if the default value can safely be duplicated with #dup.
|
158
|
-
def duplicatable_default?
|
159
|
-
@duplicatable_default ||= HexaPDF::Object::NOT_DUPLICATABLE_CLASSES.none? do |klass|
|
160
|
-
@default.kind_of?(klass)
|
161
|
-
end
|
162
|
-
end
|
163
|
-
private :duplicatable_default?
|
164
|
-
|
165
157
|
# Returns +true+ if the given object is valid for this field.
|
166
158
|
def valid_object?(obj)
|
167
159
|
type.any? {|t| obj.kind_of?(t) } ||
|
data/lib/hexapdf/document.rb
CHANGED
@@ -69,15 +69,35 @@ module HexaPDF
|
|
69
69
|
|
70
70
|
autoload(:Composer, 'hexapdf/composer')
|
71
71
|
|
72
|
+
# == HexaPDF::Document
|
73
|
+
#
|
72
74
|
# Represents one PDF document.
|
73
75
|
#
|
74
76
|
# A PDF document consists of (indirect) objects, so the main job of this class is to provide
|
75
77
|
# methods for working with these objects. However, since a PDF document may also be
|
76
78
|
# incrementally updated and can therefore contain one or more revisions, there are also methods
|
77
|
-
#
|
79
|
+
# for working with these revisions.
|
78
80
|
#
|
79
81
|
# Note: This class provides everything to work on PDF documents on a low-level basis. This means
|
80
|
-
# that there are no convenience methods for higher PDF functionality
|
82
|
+
# that there are no convenience methods for higher PDF functionality. Those can be found in the
|
83
|
+
# objects linked from here, like #catalog.
|
84
|
+
#
|
85
|
+
# == Known Messages
|
86
|
+
#
|
87
|
+
# The document object provides a basic message dispatch system via #register_listener and
|
88
|
+
# #dispatch_message.
|
89
|
+
#
|
90
|
+
# Following are the messages that are used by HexaPDF itself:
|
91
|
+
#
|
92
|
+
# :complete_objects::
|
93
|
+
# This message is called before the first step of writing a document. Listeners should
|
94
|
+
# complete PDF objects that are missing some information.
|
95
|
+
#
|
96
|
+
# For example, the font system uses this message to complete the font objects with
|
97
|
+
# information that is only available once all the used glyphs are known.
|
98
|
+
#
|
99
|
+
# :before_write::
|
100
|
+
# This message is called before a document is actually serialized and written.
|
81
101
|
class Document
|
82
102
|
|
83
103
|
autoload(:Pages, 'hexapdf/document/pages')
|
@@ -400,11 +420,11 @@ module HexaPDF
|
|
400
420
|
# object in the PDF document. The block may either accept only the object or the object and the
|
401
421
|
# revision it is in.
|
402
422
|
#
|
403
|
-
# By default, only the current version of each object is returned which implies that each
|
404
|
-
#
|
405
|
-
#
|
423
|
+
# By default, only the current version of each object is returned which implies that each object
|
424
|
+
# number is yielded exactly once. If the +only_current+ option is +false+, all stored objects
|
425
|
+
# from newest to oldest are returned, not only the current version of each object.
|
406
426
|
#
|
407
|
-
# The +
|
427
|
+
# The +only_current+ option can make a difference because the document can contain multiple
|
408
428
|
# revisions:
|
409
429
|
#
|
410
430
|
# * Multiple revisions may contain objects with the same object and generation numbers, e.g.
|
@@ -442,6 +462,9 @@ module HexaPDF
|
|
442
462
|
end
|
443
463
|
|
444
464
|
# Dispatches the message +name+ with the given arguments to all registered listeners.
|
465
|
+
#
|
466
|
+
# See the main Document documentation for an overview of messages that are used by HexaPDF
|
467
|
+
# itself.
|
445
468
|
def dispatch_message(name, *args)
|
446
469
|
@listeners[name]&.each {|obj| obj.call(*args) }
|
447
470
|
end
|
@@ -594,13 +617,9 @@ module HexaPDF
|
|
594
617
|
# If a block is given, it is called on validation problems.
|
595
618
|
#
|
596
619
|
# See HexaPDF::Object#validate for more information.
|
597
|
-
def validate(auto_correct: true, only_loaded: false) #:yield:
|
598
|
-
cur_obj = trailer
|
599
|
-
block = (block_given? ? lambda {|msg, correctable| yield(cur_obj, msg, correctable) } : nil)
|
600
|
-
|
620
|
+
def validate(auto_correct: true, only_loaded: false, &block) #:yield: msg, correctable, object
|
601
621
|
result = trailer.validate(auto_correct: auto_correct, &block)
|
602
622
|
each(only_current: false, only_loaded: only_loaded) do |obj|
|
603
|
-
cur_obj = obj
|
604
623
|
result &&= obj.validate(auto_correct: auto_correct, &block)
|
605
624
|
end
|
606
625
|
result
|
@@ -643,7 +662,7 @@ module HexaPDF
|
|
643
662
|
end
|
644
663
|
|
645
664
|
if validate
|
646
|
-
self.validate(auto_correct: true) do |
|
665
|
+
self.validate(auto_correct: true) do |msg, correctable, obj|
|
647
666
|
next if correctable
|
648
667
|
raise HexaPDF::Error, "Validation error for (#{obj.oid},#{obj.gen}): #{msg}"
|
649
668
|
end
|