hexapdf 0.15.2 → 0.15.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +36 -0
- data/lib/hexapdf/cli/command.rb +1 -1
- data/lib/hexapdf/encryption/standard_security_handler.rb +6 -2
- data/lib/hexapdf/parser.rb +10 -7
- data/lib/hexapdf/tokenizer.rb +5 -4
- data/lib/hexapdf/type/annotation.rb +7 -2
- data/lib/hexapdf/version.rb +1 -1
- data/test/hexapdf/common_tokenizer_tests.rb +6 -2
- data/test/hexapdf/encryption/test_standard_security_handler.rb +6 -5
- data/test/hexapdf/test_parser.rb +13 -1
- data/test/hexapdf/test_writer.rb +2 -2
- data/test/hexapdf/type/test_annotation.rb +5 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bcffb10babdbf723a478ea40721d9515222adf8d12ba9bd799f7b95fa66bc408
|
4
|
+
data.tar.gz: d5929900ab1b010a39964edc366ea223ef8a2ee9bcd1e9a5873874b4d5a8ecc5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d12bbd49204c28675d399477ce0249140bf6ead3fe9332541128802f0edc3ebb2b187752b464b9acd3b71ca4ce6cb5cba33caf19282bd5a15020cec3c6e20297
|
7
|
+
data.tar.gz: 8df3586c8069db615bf317f22b28069f4bfd1395d31c285bb5c4a63b1b46ec60b088082a5d461adafd590718df6307734361e1e61fe3210009d158fc95c558be
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,39 @@
|
|
1
|
+
## 0.15.6 - 2021-07-16
|
2
|
+
|
3
|
+
### Fixed
|
4
|
+
|
5
|
+
* Handling of indirect objects with invalid values which are now treated as null
|
6
|
+
objects
|
7
|
+
|
8
|
+
|
9
|
+
## 0.15.5 - 2021-07-06
|
10
|
+
|
11
|
+
### Changed
|
12
|
+
|
13
|
+
* Refactored [HexaPDF::Tokenizer#next_xref_entry] and changed yielded value
|
14
|
+
|
15
|
+
|
16
|
+
### Fixed
|
17
|
+
|
18
|
+
* Handling of invalid cross-reference stream entries that ends with the sequence
|
19
|
+
`\r\r`
|
20
|
+
|
21
|
+
|
22
|
+
## 0.15.4 - 2021-05-27
|
23
|
+
|
24
|
+
### Fixed
|
25
|
+
|
26
|
+
* [HexaPDF::Type::Annotation#appearance] to handle cases where there is
|
27
|
+
no valid appearance stream
|
28
|
+
|
29
|
+
|
30
|
+
## 0.15.3 - 2021-05-01
|
31
|
+
|
32
|
+
### Fixed
|
33
|
+
|
34
|
+
* Handling of general (not document-level), unencrypted metadata streams
|
35
|
+
|
36
|
+
|
1
37
|
## 0.15.2 - 2021-05-01
|
2
38
|
|
3
39
|
### Fixed
|
data/lib/hexapdf/cli/command.rb
CHANGED
@@ -50,7 +50,7 @@ module HexaPDF
|
|
50
50
|
module Extensions #:nodoc:
|
51
51
|
def help_banner #:nodoc:
|
52
52
|
"hexapdf #{HexaPDF::VERSION} - Versatile PDF Manipulation Tool\n" \
|
53
|
-
"Copyright (c) 2014-
|
53
|
+
"Copyright (c) 2014-2021 Thomas Leitner; licensed under the AGPLv3\n\n" \
|
54
54
|
"#{format(usage, indent: 7)}\n\n"
|
55
55
|
end
|
56
56
|
end
|
@@ -241,7 +241,7 @@ module HexaPDF
|
|
241
241
|
end
|
242
242
|
|
243
243
|
def decrypt(obj) #:nodoc:
|
244
|
-
if obj.type == :Metadata && obj ==
|
244
|
+
if dict[:V] >= 4 && obj.type == :Metadata && obj[:Subtype] == :XML && !dict[:EncryptMetadata]
|
245
245
|
obj
|
246
246
|
else
|
247
247
|
super
|
@@ -249,7 +249,11 @@ module HexaPDF
|
|
249
249
|
end
|
250
250
|
|
251
251
|
def encrypt_stream(obj) #:nodoc
|
252
|
-
obj ==
|
252
|
+
if dict[:V] >= 4 && obj.type == :Metadata && obj[:Subtype] == :XML && !dict[:EncryptMetadata]
|
253
|
+
obj.stream_encoder
|
254
|
+
else
|
255
|
+
super
|
256
|
+
end
|
253
257
|
end
|
254
258
|
|
255
259
|
private
|
data/lib/hexapdf/parser.rb
CHANGED
@@ -125,11 +125,14 @@ module HexaPDF
|
|
125
125
|
begin
|
126
126
|
object = @tokenizer.next_object
|
127
127
|
rescue MalformedPDFError
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
128
|
+
if tok.kind_of?(Tokenizer::Token) && tok =~ /\A\d+endobj\z/
|
129
|
+
# Handle often found invalid indirect object with missing whitespace after number
|
130
|
+
maybe_raise("Missing whitespace after number'", pos: @tokenizer.pos)
|
131
|
+
object = tok.to_i
|
132
|
+
@tokenizer.pos -= 6
|
133
|
+
else
|
134
|
+
maybe_raise("Invalid value after '#{oid} #{gen} obj', treating as null", pos: @tokenizer.pos)
|
135
|
+
end
|
133
136
|
end
|
134
137
|
end
|
135
138
|
|
@@ -263,9 +266,9 @@ module HexaPDF
|
|
263
266
|
|
264
267
|
@tokenizer.skip_whitespace
|
265
268
|
start.upto(start + number_of_entries - 1) do |oid|
|
266
|
-
pos, gen, type = @tokenizer.next_xref_entry do |
|
269
|
+
pos, gen, type = @tokenizer.next_xref_entry do |recoverable|
|
267
270
|
maybe_raise("Invalid cross-reference entry", pos: @tokenizer.pos,
|
268
|
-
force: !
|
271
|
+
force: !recoverable)
|
269
272
|
end
|
270
273
|
if xref.entry?(oid)
|
271
274
|
next
|
data/lib/hexapdf/tokenizer.rb
CHANGED
@@ -225,13 +225,14 @@ module HexaPDF
|
|
225
225
|
# Reads the cross-reference subsection entry at the current position and advances the scan
|
226
226
|
# pointer.
|
227
227
|
#
|
228
|
-
# If a
|
228
|
+
# If a problem is detected, yields to caller where the argument +recoverable+ is truthy if the
|
229
|
+
# problem is recoverable.
|
229
230
|
#
|
230
231
|
# See: PDF1.7 7.5.4
|
231
|
-
def next_xref_entry #:yield:
|
232
|
+
def next_xref_entry #:yield: recoverable
|
232
233
|
prepare_string_scanner(20)
|
233
|
-
|
234
|
-
yield(@ss
|
234
|
+
if !@ss.skip(/(\d{10}) (\d{5}) ([nf])(?: \r| \n|\r\n|(\r\r|\r|\n))/) || @ss[4]
|
235
|
+
yield(@ss[4])
|
235
236
|
end
|
236
237
|
[@ss[1].to_i, @ss[2].to_i, @ss[3]]
|
237
238
|
end
|
@@ -138,8 +138,13 @@ module HexaPDF
|
|
138
138
|
if entry.kind_of?(HexaPDF::Dictionary) && !entry.kind_of?(HexaPDF::Stream)
|
139
139
|
entry = entry[self[:AS]]
|
140
140
|
end
|
141
|
-
|
142
|
-
|
141
|
+
return unless entry.kind_of?(HexaPDF::Stream)
|
142
|
+
|
143
|
+
if entry.type == :XObject && entry[:Subtype] == :Form
|
144
|
+
entry
|
145
|
+
elsif (entry[:Type].nil? || entry[:Type] == :XObject) &&
|
146
|
+
(entry[:Subtype].nil? || entry[:Subtype] == :Form) && entry[:BBox]
|
147
|
+
document.wrap(entry, type: :XObject, subtype: :Form)
|
143
148
|
end
|
144
149
|
end
|
145
150
|
alias appearance? appearance
|
data/lib/hexapdf/version.rb
CHANGED
@@ -210,8 +210,12 @@ module CommonTokenizerTests
|
|
210
210
|
|
211
211
|
it "next_xref_entry: fails on invalidly formatted entries" do
|
212
212
|
create_tokenizer("0000000001 00001 g \n")
|
213
|
-
assert_raises(RuntimeError) { @tokenizer.next_xref_entry { raise } }
|
213
|
+
assert_raises(RuntimeError) { @tokenizer.next_xref_entry {|recoverable| refute(recoverable); raise } }
|
214
214
|
create_tokenizer("0000000001 00001 n\n")
|
215
|
-
assert_raises(RuntimeError) { @tokenizer.next_xref_entry { raise } }
|
215
|
+
assert_raises(RuntimeError) { @tokenizer.next_xref_entry {|recoverable| assert(recoverable); raise } }
|
216
|
+
create_tokenizer("0000000001 00001 n\r")
|
217
|
+
assert_raises(RuntimeError) { @tokenizer.next_xref_entry {|recoverable| assert(recoverable); raise } }
|
218
|
+
create_tokenizer("0000000001 00001 n\r\r")
|
219
|
+
assert_raises(RuntimeError) { @tokenizer.next_xref_entry {|recoverable| assert(recoverable); raise } }
|
216
220
|
end
|
217
221
|
end
|
@@ -296,11 +296,11 @@ describe HexaPDF::Encryption::StandardSecurityHandler do
|
|
296
296
|
describe "handling of metadata streams" do
|
297
297
|
before do
|
298
298
|
@doc = HexaPDF::Document.new
|
299
|
-
@doc.encrypt(encrypt_metadata: false)
|
300
299
|
@output = StringIO.new(''.b)
|
301
300
|
end
|
302
301
|
|
303
|
-
it "doesn't decrypt or encrypt
|
302
|
+
it "doesn't decrypt or encrypt a metadata stream if /EncryptMetadata is false" do
|
303
|
+
@doc.encrypt(encrypt_metadata: false)
|
304
304
|
@doc.catalog[:Metadata] = @doc.wrap({Type: :Metadata, Subtype: :XML}, stream: "HELLODATA")
|
305
305
|
@doc.write(@output)
|
306
306
|
assert_match(/stream\nHELLODATA\nendstream/, @output.string)
|
@@ -309,13 +309,14 @@ describe HexaPDF::Encryption::StandardSecurityHandler do
|
|
309
309
|
assert_equal('HELLODATA', doc.catalog[:Metadata].stream)
|
310
310
|
end
|
311
311
|
|
312
|
-
it "doesn't modify decryption/encryption for
|
313
|
-
@doc.
|
312
|
+
it "doesn't modify decryption/encryption for metadata streams if /V is not 4 or 5" do
|
313
|
+
@doc.encrypt(encrypt_metadata: false, algorithm: :arc4)
|
314
|
+
@doc.catalog[:Metadata] = @doc.wrap({Type: :Metadata, Subtype: :XML}, stream: "HELLODATA")
|
314
315
|
@doc.write(@output)
|
315
316
|
refute_match(/stream\nHELLODATA\nendstream/, @output.string)
|
316
317
|
|
317
318
|
doc = HexaPDF::Document.new(io: @output)
|
318
|
-
assert_equal('HELLODATA', doc.catalog[:
|
319
|
+
assert_equal('HELLODATA', doc.catalog[:Metadata].stream)
|
319
320
|
end
|
320
321
|
end
|
321
322
|
end
|
data/test/hexapdf/test_parser.rb
CHANGED
@@ -107,6 +107,12 @@ describe HexaPDF::Parser do
|
|
107
107
|
assert_equal(749, object)
|
108
108
|
end
|
109
109
|
|
110
|
+
it "treats indirect objects with invalid values as null objects" do
|
111
|
+
create_parser("1 0 obj <</test ( /other (end)>> endobj")
|
112
|
+
object, * = @parser.parse_indirect_object
|
113
|
+
assert_nil(object)
|
114
|
+
end
|
115
|
+
|
110
116
|
it "recovers from an invalid stream length value" do
|
111
117
|
create_parser("1 0 obj<</Length 4>> stream\n12endstream endobj")
|
112
118
|
obj, _, _, stream = @parser.parse_indirect_object
|
@@ -185,7 +191,13 @@ describe HexaPDF::Parser do
|
|
185
191
|
it "fails for numbers followed by endobj without space" do
|
186
192
|
create_parser("1 0 obj 749endobj")
|
187
193
|
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
188
|
-
assert_match(/
|
194
|
+
assert_match(/Missing whitespace after number/, exp.message)
|
195
|
+
end
|
196
|
+
|
197
|
+
it "fails for invalid values" do
|
198
|
+
create_parser("1 0 obj <</test ( /other (end)>> endobj")
|
199
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
200
|
+
assert_match(/Invalid value after '1 0 obj'/, exp.message)
|
189
201
|
end
|
190
202
|
|
191
203
|
it "fails if the stream length value is invalid" do
|
data/test/hexapdf/test_writer.rb
CHANGED
@@ -40,7 +40,7 @@ describe HexaPDF::Writer do
|
|
40
40
|
219
|
41
41
|
%%EOF
|
42
42
|
3 0 obj
|
43
|
-
<</Producer(HexaPDF version 0.15.
|
43
|
+
<</Producer(HexaPDF version 0.15.6)>>
|
44
44
|
endobj
|
45
45
|
xref
|
46
46
|
3 1
|
@@ -72,7 +72,7 @@ describe HexaPDF::Writer do
|
|
72
72
|
141
|
73
73
|
%%EOF
|
74
74
|
6 0 obj
|
75
|
-
<</Producer(HexaPDF version 0.15.
|
75
|
+
<</Producer(HexaPDF version 0.15.6)>>
|
76
76
|
endobj
|
77
77
|
2 0 obj
|
78
78
|
<</Length 10>>stream
|
@@ -51,14 +51,18 @@ describe HexaPDF::Type::Annotation do
|
|
51
51
|
|
52
52
|
stream = @doc.wrap({}, stream: '')
|
53
53
|
@annot[:AP][:N] = stream
|
54
|
+
assert_nil(@annot.appearance)
|
55
|
+
|
56
|
+
stream[:BBox] = [1, 2, 3, 4]
|
54
57
|
appearance = @annot.appearance
|
55
58
|
assert_same(stream.data, appearance.data)
|
56
59
|
assert_equal(:Form, appearance[:Subtype])
|
57
60
|
|
58
|
-
@annot[:AP][:N] = {X:
|
61
|
+
@annot[:AP][:N] = {X: {}}
|
59
62
|
assert_nil(@annot.appearance)
|
60
63
|
|
61
64
|
@annot[:AS] = :X
|
65
|
+
@annot[:AP][:N][:X] = stream
|
62
66
|
assert_same(stream.data, @annot.appearance.data)
|
63
67
|
|
64
68
|
@annot[:AP][:D] = {X: stream}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hexapdf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.15.
|
4
|
+
version: 0.15.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Thomas Leitner
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-07-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: cmdparse
|