hexapdf 0.15.2 → 0.15.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +36 -0
- data/lib/hexapdf/cli/command.rb +1 -1
- data/lib/hexapdf/encryption/standard_security_handler.rb +6 -2
- data/lib/hexapdf/parser.rb +10 -7
- data/lib/hexapdf/tokenizer.rb +5 -4
- data/lib/hexapdf/type/annotation.rb +7 -2
- data/lib/hexapdf/version.rb +1 -1
- data/test/hexapdf/common_tokenizer_tests.rb +6 -2
- data/test/hexapdf/encryption/test_standard_security_handler.rb +6 -5
- data/test/hexapdf/test_parser.rb +13 -1
- data/test/hexapdf/test_writer.rb +2 -2
- data/test/hexapdf/type/test_annotation.rb +5 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bcffb10babdbf723a478ea40721d9515222adf8d12ba9bd799f7b95fa66bc408
|
4
|
+
data.tar.gz: d5929900ab1b010a39964edc366ea223ef8a2ee9bcd1e9a5873874b4d5a8ecc5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d12bbd49204c28675d399477ce0249140bf6ead3fe9332541128802f0edc3ebb2b187752b464b9acd3b71ca4ce6cb5cba33caf19282bd5a15020cec3c6e20297
|
7
|
+
data.tar.gz: 8df3586c8069db615bf317f22b28069f4bfd1395d31c285bb5c4a63b1b46ec60b088082a5d461adafd590718df6307734361e1e61fe3210009d158fc95c558be
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,39 @@
|
|
1
|
+
## 0.15.6 - 2021-07-16
|
2
|
+
|
3
|
+
### Fixed
|
4
|
+
|
5
|
+
* Handling of indirect objects with invalid values which are now treated as null
|
6
|
+
objects
|
7
|
+
|
8
|
+
|
9
|
+
## 0.15.5 - 2021-07-06
|
10
|
+
|
11
|
+
### Changed
|
12
|
+
|
13
|
+
* Refactored [HexaPDF::Tokenizer#next_xref_entry] and changed yielded value
|
14
|
+
|
15
|
+
|
16
|
+
### Fixed
|
17
|
+
|
18
|
+
* Handling of invalid cross-reference stream entries that ends with the sequence
|
19
|
+
`\r\r`
|
20
|
+
|
21
|
+
|
22
|
+
## 0.15.4 - 2021-05-27
|
23
|
+
|
24
|
+
### Fixed
|
25
|
+
|
26
|
+
* [HexaPDF::Type::Annotation#appearance] to handle cases where there is
|
27
|
+
no valid appearance stream
|
28
|
+
|
29
|
+
|
30
|
+
## 0.15.3 - 2021-05-01
|
31
|
+
|
32
|
+
### Fixed
|
33
|
+
|
34
|
+
* Handling of general (not document-level), unencrypted metadata streams
|
35
|
+
|
36
|
+
|
1
37
|
## 0.15.2 - 2021-05-01
|
2
38
|
|
3
39
|
### Fixed
|
data/lib/hexapdf/cli/command.rb
CHANGED
@@ -50,7 +50,7 @@ module HexaPDF
|
|
50
50
|
module Extensions #:nodoc:
|
51
51
|
def help_banner #:nodoc:
|
52
52
|
"hexapdf #{HexaPDF::VERSION} - Versatile PDF Manipulation Tool\n" \
|
53
|
-
"Copyright (c) 2014-
|
53
|
+
"Copyright (c) 2014-2021 Thomas Leitner; licensed under the AGPLv3\n\n" \
|
54
54
|
"#{format(usage, indent: 7)}\n\n"
|
55
55
|
end
|
56
56
|
end
|
@@ -241,7 +241,7 @@ module HexaPDF
|
|
241
241
|
end
|
242
242
|
|
243
243
|
def decrypt(obj) #:nodoc:
|
244
|
-
if obj.type == :Metadata && obj ==
|
244
|
+
if dict[:V] >= 4 && obj.type == :Metadata && obj[:Subtype] == :XML && !dict[:EncryptMetadata]
|
245
245
|
obj
|
246
246
|
else
|
247
247
|
super
|
@@ -249,7 +249,11 @@ module HexaPDF
|
|
249
249
|
end
|
250
250
|
|
251
251
|
def encrypt_stream(obj) #:nodoc
|
252
|
-
obj ==
|
252
|
+
if dict[:V] >= 4 && obj.type == :Metadata && obj[:Subtype] == :XML && !dict[:EncryptMetadata]
|
253
|
+
obj.stream_encoder
|
254
|
+
else
|
255
|
+
super
|
256
|
+
end
|
253
257
|
end
|
254
258
|
|
255
259
|
private
|
data/lib/hexapdf/parser.rb
CHANGED
@@ -125,11 +125,14 @@ module HexaPDF
|
|
125
125
|
begin
|
126
126
|
object = @tokenizer.next_object
|
127
127
|
rescue MalformedPDFError
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
128
|
+
if tok.kind_of?(Tokenizer::Token) && tok =~ /\A\d+endobj\z/
|
129
|
+
# Handle often found invalid indirect object with missing whitespace after number
|
130
|
+
maybe_raise("Missing whitespace after number'", pos: @tokenizer.pos)
|
131
|
+
object = tok.to_i
|
132
|
+
@tokenizer.pos -= 6
|
133
|
+
else
|
134
|
+
maybe_raise("Invalid value after '#{oid} #{gen} obj', treating as null", pos: @tokenizer.pos)
|
135
|
+
end
|
133
136
|
end
|
134
137
|
end
|
135
138
|
|
@@ -263,9 +266,9 @@ module HexaPDF
|
|
263
266
|
|
264
267
|
@tokenizer.skip_whitespace
|
265
268
|
start.upto(start + number_of_entries - 1) do |oid|
|
266
|
-
pos, gen, type = @tokenizer.next_xref_entry do |
|
269
|
+
pos, gen, type = @tokenizer.next_xref_entry do |recoverable|
|
267
270
|
maybe_raise("Invalid cross-reference entry", pos: @tokenizer.pos,
|
268
|
-
force: !
|
271
|
+
force: !recoverable)
|
269
272
|
end
|
270
273
|
if xref.entry?(oid)
|
271
274
|
next
|
data/lib/hexapdf/tokenizer.rb
CHANGED
@@ -225,13 +225,14 @@ module HexaPDF
|
|
225
225
|
# Reads the cross-reference subsection entry at the current position and advances the scan
|
226
226
|
# pointer.
|
227
227
|
#
|
228
|
-
# If a
|
228
|
+
# If a problem is detected, yields to caller where the argument +recoverable+ is truthy if the
|
229
|
+
# problem is recoverable.
|
229
230
|
#
|
230
231
|
# See: PDF1.7 7.5.4
|
231
|
-
def next_xref_entry #:yield:
|
232
|
+
def next_xref_entry #:yield: recoverable
|
232
233
|
prepare_string_scanner(20)
|
233
|
-
|
234
|
-
yield(@ss
|
234
|
+
if !@ss.skip(/(\d{10}) (\d{5}) ([nf])(?: \r| \n|\r\n|(\r\r|\r|\n))/) || @ss[4]
|
235
|
+
yield(@ss[4])
|
235
236
|
end
|
236
237
|
[@ss[1].to_i, @ss[2].to_i, @ss[3]]
|
237
238
|
end
|
@@ -138,8 +138,13 @@ module HexaPDF
|
|
138
138
|
if entry.kind_of?(HexaPDF::Dictionary) && !entry.kind_of?(HexaPDF::Stream)
|
139
139
|
entry = entry[self[:AS]]
|
140
140
|
end
|
141
|
-
|
142
|
-
|
141
|
+
return unless entry.kind_of?(HexaPDF::Stream)
|
142
|
+
|
143
|
+
if entry.type == :XObject && entry[:Subtype] == :Form
|
144
|
+
entry
|
145
|
+
elsif (entry[:Type].nil? || entry[:Type] == :XObject) &&
|
146
|
+
(entry[:Subtype].nil? || entry[:Subtype] == :Form) && entry[:BBox]
|
147
|
+
document.wrap(entry, type: :XObject, subtype: :Form)
|
143
148
|
end
|
144
149
|
end
|
145
150
|
alias appearance? appearance
|
data/lib/hexapdf/version.rb
CHANGED
@@ -210,8 +210,12 @@ module CommonTokenizerTests
|
|
210
210
|
|
211
211
|
it "next_xref_entry: fails on invalidly formatted entries" do
|
212
212
|
create_tokenizer("0000000001 00001 g \n")
|
213
|
-
assert_raises(RuntimeError) { @tokenizer.next_xref_entry { raise } }
|
213
|
+
assert_raises(RuntimeError) { @tokenizer.next_xref_entry {|recoverable| refute(recoverable); raise } }
|
214
214
|
create_tokenizer("0000000001 00001 n\n")
|
215
|
-
assert_raises(RuntimeError) { @tokenizer.next_xref_entry { raise } }
|
215
|
+
assert_raises(RuntimeError) { @tokenizer.next_xref_entry {|recoverable| assert(recoverable); raise } }
|
216
|
+
create_tokenizer("0000000001 00001 n\r")
|
217
|
+
assert_raises(RuntimeError) { @tokenizer.next_xref_entry {|recoverable| assert(recoverable); raise } }
|
218
|
+
create_tokenizer("0000000001 00001 n\r\r")
|
219
|
+
assert_raises(RuntimeError) { @tokenizer.next_xref_entry {|recoverable| assert(recoverable); raise } }
|
216
220
|
end
|
217
221
|
end
|
@@ -296,11 +296,11 @@ describe HexaPDF::Encryption::StandardSecurityHandler do
|
|
296
296
|
describe "handling of metadata streams" do
|
297
297
|
before do
|
298
298
|
@doc = HexaPDF::Document.new
|
299
|
-
@doc.encrypt(encrypt_metadata: false)
|
300
299
|
@output = StringIO.new(''.b)
|
301
300
|
end
|
302
301
|
|
303
|
-
it "doesn't decrypt or encrypt
|
302
|
+
it "doesn't decrypt or encrypt a metadata stream if /EncryptMetadata is false" do
|
303
|
+
@doc.encrypt(encrypt_metadata: false)
|
304
304
|
@doc.catalog[:Metadata] = @doc.wrap({Type: :Metadata, Subtype: :XML}, stream: "HELLODATA")
|
305
305
|
@doc.write(@output)
|
306
306
|
assert_match(/stream\nHELLODATA\nendstream/, @output.string)
|
@@ -309,13 +309,14 @@ describe HexaPDF::Encryption::StandardSecurityHandler do
|
|
309
309
|
assert_equal('HELLODATA', doc.catalog[:Metadata].stream)
|
310
310
|
end
|
311
311
|
|
312
|
-
it "doesn't modify decryption/encryption for
|
313
|
-
@doc.
|
312
|
+
it "doesn't modify decryption/encryption for metadata streams if /V is not 4 or 5" do
|
313
|
+
@doc.encrypt(encrypt_metadata: false, algorithm: :arc4)
|
314
|
+
@doc.catalog[:Metadata] = @doc.wrap({Type: :Metadata, Subtype: :XML}, stream: "HELLODATA")
|
314
315
|
@doc.write(@output)
|
315
316
|
refute_match(/stream\nHELLODATA\nendstream/, @output.string)
|
316
317
|
|
317
318
|
doc = HexaPDF::Document.new(io: @output)
|
318
|
-
assert_equal('HELLODATA', doc.catalog[:
|
319
|
+
assert_equal('HELLODATA', doc.catalog[:Metadata].stream)
|
319
320
|
end
|
320
321
|
end
|
321
322
|
end
|
data/test/hexapdf/test_parser.rb
CHANGED
@@ -107,6 +107,12 @@ describe HexaPDF::Parser do
|
|
107
107
|
assert_equal(749, object)
|
108
108
|
end
|
109
109
|
|
110
|
+
it "treats indirect objects with invalid values as null objects" do
|
111
|
+
create_parser("1 0 obj <</test ( /other (end)>> endobj")
|
112
|
+
object, * = @parser.parse_indirect_object
|
113
|
+
assert_nil(object)
|
114
|
+
end
|
115
|
+
|
110
116
|
it "recovers from an invalid stream length value" do
|
111
117
|
create_parser("1 0 obj<</Length 4>> stream\n12endstream endobj")
|
112
118
|
obj, _, _, stream = @parser.parse_indirect_object
|
@@ -185,7 +191,13 @@ describe HexaPDF::Parser do
|
|
185
191
|
it "fails for numbers followed by endobj without space" do
|
186
192
|
create_parser("1 0 obj 749endobj")
|
187
193
|
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
188
|
-
assert_match(/
|
194
|
+
assert_match(/Missing whitespace after number/, exp.message)
|
195
|
+
end
|
196
|
+
|
197
|
+
it "fails for invalid values" do
|
198
|
+
create_parser("1 0 obj <</test ( /other (end)>> endobj")
|
199
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
200
|
+
assert_match(/Invalid value after '1 0 obj'/, exp.message)
|
189
201
|
end
|
190
202
|
|
191
203
|
it "fails if the stream length value is invalid" do
|
data/test/hexapdf/test_writer.rb
CHANGED
@@ -40,7 +40,7 @@ describe HexaPDF::Writer do
|
|
40
40
|
219
|
41
41
|
%%EOF
|
42
42
|
3 0 obj
|
43
|
-
<</Producer(HexaPDF version 0.15.
|
43
|
+
<</Producer(HexaPDF version 0.15.6)>>
|
44
44
|
endobj
|
45
45
|
xref
|
46
46
|
3 1
|
@@ -72,7 +72,7 @@ describe HexaPDF::Writer do
|
|
72
72
|
141
|
73
73
|
%%EOF
|
74
74
|
6 0 obj
|
75
|
-
<</Producer(HexaPDF version 0.15.
|
75
|
+
<</Producer(HexaPDF version 0.15.6)>>
|
76
76
|
endobj
|
77
77
|
2 0 obj
|
78
78
|
<</Length 10>>stream
|
@@ -51,14 +51,18 @@ describe HexaPDF::Type::Annotation do
|
|
51
51
|
|
52
52
|
stream = @doc.wrap({}, stream: '')
|
53
53
|
@annot[:AP][:N] = stream
|
54
|
+
assert_nil(@annot.appearance)
|
55
|
+
|
56
|
+
stream[:BBox] = [1, 2, 3, 4]
|
54
57
|
appearance = @annot.appearance
|
55
58
|
assert_same(stream.data, appearance.data)
|
56
59
|
assert_equal(:Form, appearance[:Subtype])
|
57
60
|
|
58
|
-
@annot[:AP][:N] = {X:
|
61
|
+
@annot[:AP][:N] = {X: {}}
|
59
62
|
assert_nil(@annot.appearance)
|
60
63
|
|
61
64
|
@annot[:AS] = :X
|
65
|
+
@annot[:AP][:N][:X] = stream
|
62
66
|
assert_same(stream.data, @annot.appearance.data)
|
63
67
|
|
64
68
|
@annot[:AP][:D] = {X: stream}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hexapdf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.15.
|
4
|
+
version: 0.15.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Thomas Leitner
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-07-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: cmdparse
|