hexapdf 0.12.3 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +38 -0
- data/lib/hexapdf/cli/command.rb +4 -2
- data/lib/hexapdf/cli/image2pdf.rb +2 -1
- data/lib/hexapdf/cli/info.rb +51 -2
- data/lib/hexapdf/cli/inspect.rb +30 -8
- data/lib/hexapdf/cli/merge.rb +1 -1
- data/lib/hexapdf/configuration.rb +15 -0
- data/lib/hexapdf/content/graphic_object/arc.rb +3 -3
- data/lib/hexapdf/dictionary.rb +4 -4
- data/lib/hexapdf/dictionary_fields.rb +1 -9
- data/lib/hexapdf/document.rb +31 -12
- data/lib/hexapdf/document/files.rb +0 -1
- data/lib/hexapdf/encryption/fast_arc4.rb +1 -1
- data/lib/hexapdf/encryption/security_handler.rb +1 -0
- data/lib/hexapdf/encryption/standard_security_handler.rb +1 -0
- data/lib/hexapdf/font/cmap.rb +1 -4
- data/lib/hexapdf/font/true_type/table/head.rb +1 -0
- data/lib/hexapdf/font/true_type/table/os2.rb +2 -0
- data/lib/hexapdf/image_loader/png.rb +3 -2
- data/lib/hexapdf/layout/line.rb +1 -1
- data/lib/hexapdf/layout/style.rb +23 -23
- data/lib/hexapdf/layout/text_shaper.rb +3 -2
- data/lib/hexapdf/object.rb +30 -25
- data/lib/hexapdf/parser.rb +65 -3
- data/lib/hexapdf/pdf_array.rb +9 -2
- data/lib/hexapdf/revisions.rb +29 -21
- data/lib/hexapdf/serializer.rb +1 -1
- data/lib/hexapdf/task/optimize.rb +6 -4
- data/lib/hexapdf/type/acro_form/choice_field.rb +4 -4
- data/lib/hexapdf/type/acro_form/field.rb +35 -5
- data/lib/hexapdf/type/acro_form/form.rb +6 -4
- data/lib/hexapdf/type/acro_form/text_field.rb +2 -1
- data/lib/hexapdf/type/actions/uri.rb +3 -2
- data/lib/hexapdf/type/annotations/widget.rb +3 -4
- data/lib/hexapdf/type/catalog.rb +2 -2
- data/lib/hexapdf/type/file_specification.rb +1 -1
- data/lib/hexapdf/type/font_simple.rb +3 -1
- data/lib/hexapdf/type/font_true_type.rb +6 -2
- data/lib/hexapdf/type/font_type0.rb +1 -1
- data/lib/hexapdf/type/form.rb +2 -1
- data/lib/hexapdf/type/image.rb +2 -2
- data/lib/hexapdf/type/page.rb +16 -7
- data/lib/hexapdf/type/page_tree_node.rb +29 -5
- data/lib/hexapdf/type/resources.rb +1 -0
- data/lib/hexapdf/type/trailer.rb +2 -3
- data/lib/hexapdf/utils/sorted_tree_node.rb +18 -15
- data/lib/hexapdf/version.rb +1 -1
- data/test/hexapdf/common_tokenizer_tests.rb +2 -2
- data/test/hexapdf/content/graphic_object/test_arc.rb +4 -4
- data/test/hexapdf/content/test_canvas.rb +3 -3
- data/test/hexapdf/content/test_color_space.rb +1 -1
- data/test/hexapdf/encryption/test_aes.rb +4 -4
- data/test/hexapdf/encryption/test_standard_security_handler.rb +11 -11
- data/test/hexapdf/filter/test_ascii85_decode.rb +1 -1
- data/test/hexapdf/filter/test_ascii_hex_decode.rb +1 -1
- data/test/hexapdf/layout/test_text_layouter.rb +3 -4
- data/test/hexapdf/test_configuration.rb +2 -2
- data/test/hexapdf/test_dictionary.rb +3 -1
- data/test/hexapdf/test_dictionary_fields.rb +2 -2
- data/test/hexapdf/test_document.rb +4 -4
- data/test/hexapdf/test_object.rb +44 -26
- data/test/hexapdf/test_parser.rb +115 -55
- data/test/hexapdf/test_pdf_array.rb +7 -0
- data/test/hexapdf/test_revisions.rb +35 -0
- data/test/hexapdf/test_writer.rb +2 -2
- data/test/hexapdf/type/acro_form/test_appearance_generator.rb +1 -2
- data/test/hexapdf/type/acro_form/test_field.rb +39 -0
- data/test/hexapdf/type/acro_form/test_form.rb +4 -4
- data/test/hexapdf/type/acro_form/test_text_field.rb +2 -0
- data/test/hexapdf/type/test_font_simple.rb +2 -1
- data/test/hexapdf/type/test_font_true_type.rb +6 -0
- data/test/hexapdf/type/test_form.rb +1 -1
- data/test/hexapdf/type/test_page.rb +8 -1
- data/test/hexapdf/type/test_page_tree_node.rb +42 -0
- data/test/hexapdf/utils/test_bit_field.rb +2 -0
- data/test/hexapdf/utils/test_sorted_tree_node.rb +10 -9
- metadata +5 -12
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 77d9895ece62ee8b8df5afb5a44035868c6b33eb7b43a7cb5b85bd730bee56bc
|
4
|
+
data.tar.gz: 16b61502ef5c35df588c6a2fd53e1099b80f584276c07ec7a3c23343705ccb42
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 490fca7cfa535ebfab2af613dacf3ff9e9a6b0b2b76c865ceeb98a45d1cc7b668772dbd3a46ea00a5b82eb1374f36cb42b604754bbb1353e103cde726bc7e886
|
7
|
+
data.tar.gz: 1f85edaa9b2214218bb586d8c5409c9f741893fa0effdb60c64d294be231893ed96f1efa9db286d9a98f094caeb3fb272d318782dc7977adb8b252425d72cfb4
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,41 @@
|
|
1
|
+
## 0.13.0 - 2020-11-15
|
2
|
+
|
3
|
+
### Added
|
4
|
+
|
5
|
+
* Cross-reference table reconstruction for damaged PDFs, controllable via the
|
6
|
+
new 'parser.try_xref_reconstruction' option
|
7
|
+
* Two new `hexapdf inspect` commands for showing page objects and page content
|
8
|
+
streams by page number
|
9
|
+
* Flag `--check` to the CLI command `hexapdf info` for checking a file for
|
10
|
+
parse and validation errors
|
11
|
+
* [HexaPDF::Type::AcroForm::Field#embedded_widget?] for checking if a widget is
|
12
|
+
embedded in the field object
|
13
|
+
* [HexaPDF::Type::AcroForm::Field#delete_widget] for deleting a widget
|
14
|
+
* [HexaPDF::PDFArray#delete] for deleting an object from a PDF array
|
15
|
+
* [HexaPDF::Type::Page#ancestor_nodes] for retrieving all ancestor page tree
|
16
|
+
nodes of a page
|
17
|
+
* [HexaPDF::Type::PageTreeNode#move_page] for moving a page to another index
|
18
|
+
|
19
|
+
### Changed
|
20
|
+
|
21
|
+
* **Breaking change**: Overhauled document/object validation interfaces and
|
22
|
+
internals to be more similar and to allow for reporting of multiple validation
|
23
|
+
problems
|
24
|
+
* Validation of TrueType fonts to ignore missing fields if the font name
|
25
|
+
suggests that the font is one of the standard 14 PDF fonts
|
26
|
+
* Option `-p` of CLI command `hexapdf image2pdf` to also allow lowercase page
|
27
|
+
size names
|
28
|
+
|
29
|
+
### Fixed
|
30
|
+
|
31
|
+
* Reporting of cross-reference section entry parsing error
|
32
|
+
* PDF version used by default for dictionary fields
|
33
|
+
* Error in CLI command `hexapdf inspect` when parsing an invalid object number
|
34
|
+
* Output of error messages in CLI command `hexapdf inspect` to go to `$stderr`
|
35
|
+
* Bug in [HexaPDF::Type::AcroForm::TextField] validation due to missing nil
|
36
|
+
handling
|
37
|
+
|
38
|
+
|
1
39
|
## 0.12.3 - 2020-08-22
|
2
40
|
|
3
41
|
### Changed
|
data/lib/hexapdf/cli/command.rb
CHANGED
@@ -100,6 +100,7 @@ module HexaPDF
|
|
100
100
|
def pdf_options(password)
|
101
101
|
hash = {decryption_opts: {password: password}, config: {}}
|
102
102
|
HexaPDF::GlobalConfiguration['filter.predictor.strict'] = command_parser.strict
|
103
|
+
hash[:config]['parser.try_xref_reconstruction'] = !command_parser.strict
|
103
104
|
hash[:config]['parser.on_correctable_error'] =
|
104
105
|
if command_parser.strict
|
105
106
|
proc { true }
|
@@ -277,14 +278,15 @@ module HexaPDF
|
|
277
278
|
#
|
278
279
|
# See: #define_encryption_options
|
279
280
|
def apply_encryption_options(doc)
|
280
|
-
|
281
|
+
case @out_options.encryption
|
282
|
+
when :add
|
281
283
|
doc.encrypt(algorithm: @out_options.enc_algorithm,
|
282
284
|
key_length: @out_options.enc_key_length,
|
283
285
|
force_v4: @out_options.enc_force_v4,
|
284
286
|
permissions: @out_options.enc_permissions,
|
285
287
|
owner_password: @out_options.enc_owner_pwd,
|
286
288
|
user_password: @out_options.enc_user_pwd)
|
287
|
-
|
289
|
+
when :remove
|
288
290
|
doc.encrypt(name: nil)
|
289
291
|
end
|
290
292
|
end
|
@@ -64,7 +64,8 @@ module HexaPDF
|
|
64
64
|
orientation = :landscape
|
65
65
|
page_size.delete_suffix!('-landscape')
|
66
66
|
end
|
67
|
-
|
67
|
+
page_size = page_size.capitalize.to_sym
|
68
|
+
HexaPDF::Type::Page.media_box(page_size, orientation: orientation)
|
68
69
|
end
|
69
70
|
end
|
70
71
|
options.on("--[no-]auto-rotate", "Automatically rotate pages based on image dimesions. " \
|
data/lib/hexapdf/cli/info.rb
CHANGED
@@ -55,13 +55,21 @@ module HexaPDF
|
|
55
55
|
long_desc(<<~EOF)
|
56
56
|
This command extracts information from the Info dictionary of a PDF file as well
|
57
57
|
as some other useful information like the used PDF version and encryption information.
|
58
|
+
|
59
|
+
If the --check option is specified, the PDF file will also be checked for parse and
|
60
|
+
validation errors. And if the process doesn't abort, HexaPDF is still able to handle the
|
61
|
+
file by correcting the errors.
|
58
62
|
EOF
|
63
|
+
options.on("--check", "-c", "Check the PDF file for parse errors and validity") do |check|
|
64
|
+
@check_file = check
|
65
|
+
end
|
59
66
|
options.on("--password PASSWORD", "-p", String,
|
60
67
|
"The password for decryption. Use - for reading from standard input.") do |pwd|
|
61
68
|
@password = (pwd == '-' ? read_password : pwd)
|
62
69
|
end
|
63
70
|
@password = nil
|
64
71
|
@auto_decrypt = true
|
72
|
+
@check_file = false
|
65
73
|
end
|
66
74
|
|
67
75
|
def execute(file) #:nodoc:
|
@@ -79,8 +87,30 @@ module HexaPDF
|
|
79
87
|
options = pdf_options(@password)
|
80
88
|
options[:config]['document.auto_decrypt'] = @auto_decrypt
|
81
89
|
HexaPDF::Document.open(file, **options) do |doc|
|
90
|
+
if @check_file
|
91
|
+
indirect_object = nil
|
92
|
+
validation_block = lambda do |msg, correctable, object|
|
93
|
+
object = indirect_object unless object.indirect? || object.type == :XXTrailer
|
94
|
+
object_type = if object.type == :XXTrailer
|
95
|
+
'trailer'
|
96
|
+
elsif !object.type.to_s.start_with?("XX")
|
97
|
+
"object type #{object.type} (#{object.oid},#{object.gen})"
|
98
|
+
else
|
99
|
+
"object (#{object.oid},#{object.gen})"
|
100
|
+
end
|
101
|
+
object_type = "sub-object of #{object_type}" if object == indirect_object
|
102
|
+
puts "WARNING: Validation error for #{object_type}: #{msg} " \
|
103
|
+
"#{correctable ? '(correctable)' : ''}"
|
104
|
+
end
|
105
|
+
doc.trailer.validate(auto_correct: true, &validation_block)
|
106
|
+
doc.each(only_current: false, only_loaded: false) do |obj|
|
107
|
+
indirect_object = obj
|
108
|
+
obj.validate(auto_correct: true, &validation_block)
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
82
112
|
output_line("File name", file)
|
83
|
-
output_line("File size", File.stat(file).size.to_s
|
113
|
+
output_line("File size", File.stat(file).size.to_s << " bytes")
|
84
114
|
@auto_decrypt && INFO_KEYS.each do |name|
|
85
115
|
next unless doc.trailer.info.key?(name)
|
86
116
|
output_line(name.to_s, doc.trailer.info[name].to_s)
|
@@ -110,10 +140,29 @@ module HexaPDF
|
|
110
140
|
else
|
111
141
|
raise
|
112
142
|
end
|
143
|
+
rescue HexaPDF::MalformedPDFError => e
|
144
|
+
$stderr.puts "Error: PDF file #{file} is damaged and cannot be recovered"
|
145
|
+
$stderr.puts " #{e}"
|
146
|
+
end
|
147
|
+
|
148
|
+
# Use custom options if we are checking the PDF file for errors.
|
149
|
+
def pdf_options(password)
|
150
|
+
if @check_file
|
151
|
+
options = {decryption_opts: {password: password}, config: {}}
|
152
|
+
HexaPDF::GlobalConfiguration['filter.predictor.strict'] = false
|
153
|
+
options[:config]['parser.try_xref_reconstruction'] = true
|
154
|
+
options[:config]['parser.on_correctable_error'] = lambda do |_, msg, pos|
|
155
|
+
puts "WARNING: Parse error at position #{pos}: #{msg}"
|
156
|
+
false
|
157
|
+
end
|
158
|
+
options
|
159
|
+
else
|
160
|
+
super
|
161
|
+
end
|
113
162
|
end
|
114
163
|
|
115
164
|
def output_line(header, text) #:nodoc:
|
116
|
-
puts((header
|
165
|
+
puts(("#{header}:").ljust(COLUMN_WIDTH) << text)
|
117
166
|
end
|
118
167
|
|
119
168
|
end
|
data/lib/hexapdf/cli/inspect.rb
CHANGED
@@ -122,22 +122,22 @@ module HexaPDF
|
|
122
122
|
case command
|
123
123
|
when /^\d+(,\d+)?$/, 'o', 'object'
|
124
124
|
arg = (command.start_with?('o') ? data.shift : command)
|
125
|
-
obj = pdf_object_from_string_reference(arg) rescue puts($!.message)
|
126
|
-
if obj
|
125
|
+
obj = pdf_object_from_string_reference(arg) rescue $stderr.puts($!.message)
|
126
|
+
if obj&.data&.stream && command_parser.verbosity_info?
|
127
127
|
$stderr.puts("Note: Object also has stream data")
|
128
128
|
end
|
129
129
|
serialize(obj.value, recursive: false) if obj
|
130
130
|
|
131
131
|
when 'r', 'recursive'
|
132
132
|
obj = if (obj = data.shift)
|
133
|
-
pdf_object_from_string_reference(obj) rescue puts($!.message)
|
133
|
+
pdf_object_from_string_reference(obj) rescue $stderr.puts($!.message)
|
134
134
|
else
|
135
135
|
@doc.trailer
|
136
136
|
end
|
137
137
|
serialize(obj.value, recursive: true) if obj
|
138
138
|
|
139
139
|
when 's', 'stream', 'raw', 'raw-stream'
|
140
|
-
if (obj = pdf_object_from_string_reference(data.shift) rescue puts($!.message)) &&
|
140
|
+
if (obj = pdf_object_from_string_reference(data.shift) rescue $stderr.puts($!.message)) &&
|
141
141
|
obj.kind_of?(HexaPDF::Stream)
|
142
142
|
source = (command.start_with?('raw') ? obj.stream_source : obj.stream_decoder)
|
143
143
|
while source.alive? && (stream_data = source.resume)
|
@@ -148,7 +148,7 @@ module HexaPDF
|
|
148
148
|
end
|
149
149
|
|
150
150
|
when 'x', 'xref'
|
151
|
-
if (obj = pdf_object_from_string_reference(data.shift) rescue puts($!.message))
|
151
|
+
if (obj = pdf_object_from_string_reference(data.shift) rescue $stderr.puts($!.message))
|
152
152
|
@doc.revisions.reverse_each do |rev|
|
153
153
|
if (xref = rev.xref(obj))
|
154
154
|
puts xref
|
@@ -178,6 +178,26 @@ module HexaPDF
|
|
178
178
|
puts str
|
179
179
|
end
|
180
180
|
|
181
|
+
when 'po', 'ps'
|
182
|
+
page_number_str = data.shift
|
183
|
+
unless page_number_str
|
184
|
+
$stderr.puts("Error: Missing PAGE argument to #{command}")
|
185
|
+
next
|
186
|
+
end
|
187
|
+
page_number = parse_pages_specification(page_number_str, @doc.pages.count).first&.first
|
188
|
+
unless page_number
|
189
|
+
$stderr.puts("Error: Invalid page number #{page_number_str}")
|
190
|
+
next
|
191
|
+
end
|
192
|
+
page = @doc.pages[page_number]
|
193
|
+
if command.start_with?('ps')
|
194
|
+
$stdout.write(page.contents)
|
195
|
+
else
|
196
|
+
puts "#{page.oid} #{page.gen} obj"
|
197
|
+
serialize(page.value, recursive: false)
|
198
|
+
puts "endobj"
|
199
|
+
end
|
200
|
+
|
181
201
|
when 'pc', 'page-count'
|
182
202
|
puts @doc.pages.count
|
183
203
|
|
@@ -217,9 +237,9 @@ module HexaPDF
|
|
217
237
|
if str.nil?
|
218
238
|
raise "Error: Missing argument object identifier OID[,GEN]"
|
219
239
|
elsif !str.match?(/^\d+(,\d+)?$/)
|
220
|
-
raise "Error: Invalid argument: Must be of form OID[,GEN]"
|
240
|
+
raise "Error: Invalid argument: Must be of form OID[,GEN], not '#{str}'"
|
221
241
|
elsif !(obj = @doc.object(pdf_reference_from_string(str)))
|
222
|
-
raise "Error: No object with the given object identifier found"
|
242
|
+
raise "Error: No object with the given object identifier '#{str}' found"
|
223
243
|
else
|
224
244
|
obj
|
225
245
|
end
|
@@ -240,7 +260,7 @@ module HexaPDF
|
|
240
260
|
puts "<<"
|
241
261
|
(recursive ? val.sort : val).each do |k, v|
|
242
262
|
next if v.nil? || (v.respond_to?(:null?) && v.null?)
|
243
|
-
print ' ' * (indent + 1)
|
263
|
+
print '%s%s ' % [' ' * (indent + 1), @serializer.serialize_symbol(k)]
|
244
264
|
serialize(v, recursive: recursive, seen: seen, indent: indent + 1)
|
245
265
|
puts
|
246
266
|
end
|
@@ -283,6 +303,8 @@ module HexaPDF
|
|
283
303
|
["c[atalog]", "Print the catalog dictionary"],
|
284
304
|
["t[railer]", "Print the trailer dictionary"],
|
285
305
|
["p[ages] [RANGE]", "Print information about pages"],
|
306
|
+
["po PAGE", "Print the page object"],
|
307
|
+
["ps PAGE", "Print the content stream of the page"],
|
286
308
|
["pc | page-count", "Print the number of pages"],
|
287
309
|
["search REGEXP", "Print objects matching the pattern"],
|
288
310
|
["h[elp]", "Show the help"],
|
data/lib/hexapdf/cli/merge.rb
CHANGED
@@ -122,7 +122,7 @@ module HexaPDF
|
|
122
122
|
|
123
123
|
# Assemble pages
|
124
124
|
target = (@initial_empty ? HexaPDF::Document.new : @files.first.file)
|
125
|
-
page_tree = target.add(Type: :Pages)
|
125
|
+
page_tree = target.add({Type: :Pages})
|
126
126
|
import_pages(page_tree)
|
127
127
|
target.catalog[:Pages] = page_tree
|
128
128
|
remove_unused_pages(target)
|
@@ -334,6 +334,20 @@ module HexaPDF
|
|
334
334
|
# The value needs to be an object that responds to \#call(document, message, position) and
|
335
335
|
# returns +true+ if an error should be raised.
|
336
336
|
#
|
337
|
+
# parser.try_xref_reconstruction::
|
338
|
+
# A boolean specifying whether non-recoverable parsing errors should lead to reconstructing the
|
339
|
+
# main cross-reference table.
|
340
|
+
#
|
341
|
+
# The reconstructed cross-reference table might make damaged files usable but there is no way
|
342
|
+
# to ensure that the reconstructed file is equal to the undamaged original file (though
|
343
|
+
# generally it works out).
|
344
|
+
#
|
345
|
+
# There is also the possibility that reconstructing doesn't work because the algorithm has to
|
346
|
+
# assume that the PDF was written in a certain way (which is recommended by the PDF
|
347
|
+
# specification).
|
348
|
+
#
|
349
|
+
# Defaults to +true+.
|
350
|
+
#
|
337
351
|
# sorted_tree.max_leaf_node_size::
|
338
352
|
# The maximum number of nodes that should be in a leaf node of a node tree.
|
339
353
|
#
|
@@ -412,6 +426,7 @@ module HexaPDF
|
|
412
426
|
'page.default_media_box' => :A4,
|
413
427
|
'page.default_media_orientation' => :portrait,
|
414
428
|
'parser.on_correctable_error' => proc { false },
|
429
|
+
'parser.try_xref_reconstruction' => true,
|
415
430
|
'sorted_tree.max_leaf_node_size' => 64,
|
416
431
|
'style.layers_map' => {
|
417
432
|
link: 'HexaPDF::Layout::Style::LinkLayer',
|
@@ -45,7 +45,7 @@ module HexaPDF
|
|
45
45
|
# all either in clockwise or counterclockwise direction and optionally inclined in respect to
|
46
46
|
# the x-axis.
|
47
47
|
#
|
48
|
-
# See: ELL - https://
|
48
|
+
# See: ELL - https://spaceroots.org/documents/ellipse/elliptical-arc.pdf
|
49
49
|
class Arc
|
50
50
|
|
51
51
|
include HexaPDF::Utils::MathHelpers
|
@@ -202,8 +202,8 @@ module HexaPDF
|
|
202
202
|
p2x_prime, p2y_prime = derivative_evaluate(eta2)
|
203
203
|
|
204
204
|
result << [p2x, p2y,
|
205
|
-
p1: [p1x + alpha * p1x_prime, p1y + alpha * p1y_prime],
|
206
|
-
|
205
|
+
{p1: [p1x + alpha * p1x_prime, p1y + alpha * p1y_prime],
|
206
|
+
p2: [p2x - alpha * p2x_prime, p2y - alpha * p2y_prime]}]
|
207
207
|
end
|
208
208
|
|
209
209
|
result
|
data/lib/hexapdf/dictionary.rb
CHANGED
@@ -97,7 +97,7 @@ module HexaPDF
|
|
97
97
|
#
|
98
98
|
# version:: Specifies the minimum version of the PDF specification needed for this value.
|
99
99
|
def self.define_field(name, type:, required: false, default: nil, indirect: nil,
|
100
|
-
allowed_values: nil, version: '1.
|
100
|
+
allowed_values: nil, version: '1.0')
|
101
101
|
@fields ||= {}
|
102
102
|
@fields[name] = Field.new(type, required: required, default: default, indirect: indirect,
|
103
103
|
allowed_values: allowed_values, version: version)
|
@@ -163,7 +163,7 @@ module HexaPDF
|
|
163
163
|
value[name] = field.default
|
164
164
|
end
|
165
165
|
value[name] = data = document.deref(data) if data.kind_of?(HexaPDF::Reference)
|
166
|
-
if data.
|
166
|
+
if data.instance_of?(HexaPDF::Object) || (data.kind_of?(HexaPDF::Object) && data.value.nil?)
|
167
167
|
data = data.value
|
168
168
|
end
|
169
169
|
if (result = field&.convert(data, document))
|
@@ -182,7 +182,7 @@ module HexaPDF
|
|
182
182
|
raise ArgumentError, "Only Symbol (Name) keys are allowed to be used in PDF dictionaries"
|
183
183
|
end
|
184
184
|
|
185
|
-
if value[name].
|
185
|
+
if value[name].instance_of?(HexaPDF::Object) && !data.kind_of?(HexaPDF::Object) &&
|
186
186
|
!data.kind_of?(HexaPDF::Reference)
|
187
187
|
value[name].value = data
|
188
188
|
else
|
@@ -273,7 +273,7 @@ module HexaPDF
|
|
273
273
|
# Check that required fields are set
|
274
274
|
if field.required? && obj.nil?
|
275
275
|
yield("Required field #{name} is not set", field.default?)
|
276
|
-
self[name] = obj = field.default
|
276
|
+
self[name] = obj = field.default if field.default?
|
277
277
|
end
|
278
278
|
|
279
279
|
# Check if the document version is set high enough
|
@@ -151,17 +151,9 @@ module HexaPDF
|
|
151
151
|
# Returns a duplicated default value, automatically taking unduplicatable classes into
|
152
152
|
# account.
|
153
153
|
def default
|
154
|
-
|
154
|
+
@default.dup
|
155
155
|
end
|
156
156
|
|
157
|
-
# Returns +true+ if the default value can safely be duplicated with #dup.
|
158
|
-
def duplicatable_default?
|
159
|
-
@duplicatable_default ||= HexaPDF::Object::NOT_DUPLICATABLE_CLASSES.none? do |klass|
|
160
|
-
@default.kind_of?(klass)
|
161
|
-
end
|
162
|
-
end
|
163
|
-
private :duplicatable_default?
|
164
|
-
|
165
157
|
# Returns +true+ if the given object is valid for this field.
|
166
158
|
def valid_object?(obj)
|
167
159
|
type.any? {|t| obj.kind_of?(t) } ||
|
data/lib/hexapdf/document.rb
CHANGED
@@ -69,15 +69,35 @@ module HexaPDF
|
|
69
69
|
|
70
70
|
autoload(:Composer, 'hexapdf/composer')
|
71
71
|
|
72
|
+
# == HexaPDF::Document
|
73
|
+
#
|
72
74
|
# Represents one PDF document.
|
73
75
|
#
|
74
76
|
# A PDF document consists of (indirect) objects, so the main job of this class is to provide
|
75
77
|
# methods for working with these objects. However, since a PDF document may also be
|
76
78
|
# incrementally updated and can therefore contain one or more revisions, there are also methods
|
77
|
-
#
|
79
|
+
# for working with these revisions.
|
78
80
|
#
|
79
81
|
# Note: This class provides everything to work on PDF documents on a low-level basis. This means
|
80
|
-
# that there are no convenience methods for higher PDF functionality
|
82
|
+
# that there are no convenience methods for higher PDF functionality. Those can be found in the
|
83
|
+
# objects linked from here, like #catalog.
|
84
|
+
#
|
85
|
+
# == Known Messages
|
86
|
+
#
|
87
|
+
# The document object provides a basic message dispatch system via #register_listener and
|
88
|
+
# #dispatch_message.
|
89
|
+
#
|
90
|
+
# Following are the messages that are used by HexaPDF itself:
|
91
|
+
#
|
92
|
+
# :complete_objects::
|
93
|
+
# This message is called before the first step of writing a document. Listeners should
|
94
|
+
# complete PDF objects that are missing some information.
|
95
|
+
#
|
96
|
+
# For example, the font system uses this message to complete the font objects with
|
97
|
+
# information that is only available once all the used glyphs are known.
|
98
|
+
#
|
99
|
+
# :before_write::
|
100
|
+
# This message is called before a document is actually serialized and written.
|
81
101
|
class Document
|
82
102
|
|
83
103
|
autoload(:Pages, 'hexapdf/document/pages')
|
@@ -400,11 +420,11 @@ module HexaPDF
|
|
400
420
|
# object in the PDF document. The block may either accept only the object or the object and the
|
401
421
|
# revision it is in.
|
402
422
|
#
|
403
|
-
# By default, only the current version of each object is returned which implies that each
|
404
|
-
#
|
405
|
-
#
|
423
|
+
# By default, only the current version of each object is returned which implies that each object
|
424
|
+
# number is yielded exactly once. If the +only_current+ option is +false+, all stored objects
|
425
|
+
# from newest to oldest are returned, not only the current version of each object.
|
406
426
|
#
|
407
|
-
# The +
|
427
|
+
# The +only_current+ option can make a difference because the document can contain multiple
|
408
428
|
# revisions:
|
409
429
|
#
|
410
430
|
# * Multiple revisions may contain objects with the same object and generation numbers, e.g.
|
@@ -442,6 +462,9 @@ module HexaPDF
|
|
442
462
|
end
|
443
463
|
|
444
464
|
# Dispatches the message +name+ with the given arguments to all registered listeners.
|
465
|
+
#
|
466
|
+
# See the main Document documentation for an overview of messages that are used by HexaPDF
|
467
|
+
# itself.
|
445
468
|
def dispatch_message(name, *args)
|
446
469
|
@listeners[name]&.each {|obj| obj.call(*args) }
|
447
470
|
end
|
@@ -594,13 +617,9 @@ module HexaPDF
|
|
594
617
|
# If a block is given, it is called on validation problems.
|
595
618
|
#
|
596
619
|
# See HexaPDF::Object#validate for more information.
|
597
|
-
def validate(auto_correct: true, only_loaded: false) #:yield:
|
598
|
-
cur_obj = trailer
|
599
|
-
block = (block_given? ? lambda {|msg, correctable| yield(cur_obj, msg, correctable) } : nil)
|
600
|
-
|
620
|
+
def validate(auto_correct: true, only_loaded: false, &block) #:yield: msg, correctable, object
|
601
621
|
result = trailer.validate(auto_correct: auto_correct, &block)
|
602
622
|
each(only_current: false, only_loaded: only_loaded) do |obj|
|
603
|
-
cur_obj = obj
|
604
623
|
result &&= obj.validate(auto_correct: auto_correct, &block)
|
605
624
|
end
|
606
625
|
result
|
@@ -643,7 +662,7 @@ module HexaPDF
|
|
643
662
|
end
|
644
663
|
|
645
664
|
if validate
|
646
|
-
self.validate(auto_correct: true) do |
|
665
|
+
self.validate(auto_correct: true) do |msg, correctable, obj|
|
647
666
|
next if correctable
|
648
667
|
raise HexaPDF::Error, "Validation error for (#{obj.oid},#{obj.gen}): #{msg}"
|
649
668
|
end
|