hexapdf 0.15.2 → 0.15.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1a6ef1bdd17664ef0b9474b931d31ad69c57c77928ce2b69a3bbd5dadda0dce6
4
- data.tar.gz: 11b87436d19cc5498fd6a77f0b6c410e717809264153964668be0f5199b9354d
3
+ metadata.gz: bcffb10babdbf723a478ea40721d9515222adf8d12ba9bd799f7b95fa66bc408
4
+ data.tar.gz: d5929900ab1b010a39964edc366ea223ef8a2ee9bcd1e9a5873874b4d5a8ecc5
5
5
  SHA512:
6
- metadata.gz: 525a55832758b5eecd1a7f2daf5f220e1afa7ff8e88ca2d65998e658585f290ff2018450e50423f2331b7f195865eab8b1c62562ecdbf3671b46d4da770aed12
7
- data.tar.gz: 0b0e18c7f79f0e2a54080fefad1dd4d94e15157f72e5360a3ebd827fc0cc2037ae6e06302155426e7f0900e97ee0cee678e069bd8ef05a9333d684c50e1343a5
6
+ metadata.gz: d12bbd49204c28675d399477ce0249140bf6ead3fe9332541128802f0edc3ebb2b187752b464b9acd3b71ca4ce6cb5cba33caf19282bd5a15020cec3c6e20297
7
+ data.tar.gz: 8df3586c8069db615bf317f22b28069f4bfd1395d31c285bb5c4a63b1b46ec60b088082a5d461adafd590718df6307734361e1e61fe3210009d158fc95c558be
data/CHANGELOG.md CHANGED
@@ -1,3 +1,39 @@
1
+ ## 0.15.6 - 2021-07-16
2
+
3
+ ### Fixed
4
+
5
+ * Handling of indirect objects with invalid values which are now treated as null
6
+ objects
7
+
8
+
9
+ ## 0.15.5 - 2021-07-06
10
+
11
+ ### Changed
12
+
13
+ * Refactored [HexaPDF::Tokenizer#next_xref_entry] and changed yielded value
14
+
15
+
16
+ ### Fixed
17
+
18
+ * Handling of invalid cross-reference stream entries that ends with the sequence
19
+ `\r\r`
20
+
21
+
22
+ ## 0.15.4 - 2021-05-27
23
+
24
+ ### Fixed
25
+
26
+ * [HexaPDF::Type::Annotation#appearance] to handle cases where there is
27
+ no valid appearance stream
28
+
29
+
30
+ ## 0.15.3 - 2021-05-01
31
+
32
+ ### Fixed
33
+
34
+ * Handling of general (not document-level), unencrypted metadata streams
35
+
36
+
1
37
  ## 0.15.2 - 2021-05-01
2
38
 
3
39
  ### Fixed
@@ -50,7 +50,7 @@ module HexaPDF
50
50
  module Extensions #:nodoc:
51
51
  def help_banner #:nodoc:
52
52
  "hexapdf #{HexaPDF::VERSION} - Versatile PDF Manipulation Tool\n" \
53
- "Copyright (c) 2014-2017 Thomas Leitner; licensed under the AGPLv3\n\n" \
53
+ "Copyright (c) 2014-2021 Thomas Leitner; licensed under the AGPLv3\n\n" \
54
54
  "#{format(usage, indent: 7)}\n\n"
55
55
  end
56
56
  end
@@ -241,7 +241,7 @@ module HexaPDF
241
241
  end
242
242
 
243
243
  def decrypt(obj) #:nodoc:
244
- if obj.type == :Metadata && obj == document.catalog.value[:Metadata] && !dict[:EncryptMetadata]
244
+ if dict[:V] >= 4 && obj.type == :Metadata && obj[:Subtype] == :XML && !dict[:EncryptMetadata]
245
245
  obj
246
246
  else
247
247
  super
@@ -249,7 +249,11 @@ module HexaPDF
249
249
  end
250
250
 
251
251
  def encrypt_stream(obj) #:nodoc
252
- obj == document.catalog.value[:Metadata] && !dict[:EncryptMetadata] ? obj.stream_encoder : super
252
+ if dict[:V] >= 4 && obj.type == :Metadata && obj[:Subtype] == :XML && !dict[:EncryptMetadata]
253
+ obj.stream_encoder
254
+ else
255
+ super
256
+ end
253
257
  end
254
258
 
255
259
  private
@@ -125,11 +125,14 @@ module HexaPDF
125
125
  begin
126
126
  object = @tokenizer.next_object
127
127
  rescue MalformedPDFError
128
- # Handle often found invalid indirect object with missing whitespace after number
129
- maybe_raise("Invalid object value after 'obj'", pos: @tokenizer.pos,
130
- force: !(tok.kind_of?(Tokenizer::Token) && tok =~ /\A\d+endobj\z/))
131
- object = tok.to_i
132
- @tokenizer.pos -= 6
128
+ if tok.kind_of?(Tokenizer::Token) && tok =~ /\A\d+endobj\z/
129
+ # Handle often found invalid indirect object with missing whitespace after number
130
+ maybe_raise("Missing whitespace after number'", pos: @tokenizer.pos)
131
+ object = tok.to_i
132
+ @tokenizer.pos -= 6
133
+ else
134
+ maybe_raise("Invalid value after '#{oid} #{gen} obj', treating as null", pos: @tokenizer.pos)
135
+ end
133
136
  end
134
137
  end
135
138
 
@@ -263,9 +266,9 @@ module HexaPDF
263
266
 
264
267
  @tokenizer.skip_whitespace
265
268
  start.upto(start + number_of_entries - 1) do |oid|
266
- pos, gen, type = @tokenizer.next_xref_entry do |matched_size|
269
+ pos, gen, type = @tokenizer.next_xref_entry do |recoverable|
267
270
  maybe_raise("Invalid cross-reference entry", pos: @tokenizer.pos,
268
- force: !matched_size)
271
+ force: !recoverable)
269
272
  end
270
273
  if xref.entry?(oid)
271
274
  next
@@ -225,13 +225,14 @@ module HexaPDF
225
225
  # Reads the cross-reference subsection entry at the current position and advances the scan
226
226
  # pointer.
227
227
  #
228
- # If a possible problem is detected, yields to caller.
228
+ # If a problem is detected, yields to caller where the argument +recoverable+ is truthy if the
229
+ # problem is recoverable.
229
230
  #
230
231
  # See: PDF1.7 7.5.4
231
- def next_xref_entry #:yield: matched_size
232
+ def next_xref_entry #:yield: recoverable
232
233
  prepare_string_scanner(20)
233
- unless @ss.skip(/(\d{10}) (\d{5}) ([nf])(?: \r| \n|\r\n|\r|\n)/) && @ss.matched_size == 20
234
- yield(@ss.matched_size)
234
+ if !@ss.skip(/(\d{10}) (\d{5}) ([nf])(?: \r| \n|\r\n|(\r\r|\r|\n))/) || @ss[4]
235
+ yield(@ss[4])
235
236
  end
236
237
  [@ss[1].to_i, @ss[2].to_i, @ss[3]]
237
238
  end
@@ -138,8 +138,13 @@ module HexaPDF
138
138
  if entry.kind_of?(HexaPDF::Dictionary) && !entry.kind_of?(HexaPDF::Stream)
139
139
  entry = entry[self[:AS]]
140
140
  end
141
- if entry.kind_of?(HexaPDF::Stream)
142
- entry[:Subtype] == :Form ? entry : document.wrap(entry, type: :XObject, subtype: :Form)
141
+ return unless entry.kind_of?(HexaPDF::Stream)
142
+
143
+ if entry.type == :XObject && entry[:Subtype] == :Form
144
+ entry
145
+ elsif (entry[:Type].nil? || entry[:Type] == :XObject) &&
146
+ (entry[:Subtype].nil? || entry[:Subtype] == :Form) && entry[:BBox]
147
+ document.wrap(entry, type: :XObject, subtype: :Form)
143
148
  end
144
149
  end
145
150
  alias appearance? appearance
@@ -37,6 +37,6 @@
37
37
  module HexaPDF
38
38
 
39
39
  # The version of HexaPDF.
40
- VERSION = '0.15.2'
40
+ VERSION = '0.15.6'
41
41
 
42
42
  end
@@ -210,8 +210,12 @@ module CommonTokenizerTests
210
210
 
211
211
  it "next_xref_entry: fails on invalidly formatted entries" do
212
212
  create_tokenizer("0000000001 00001 g \n")
213
- assert_raises(RuntimeError) { @tokenizer.next_xref_entry { raise } }
213
+ assert_raises(RuntimeError) { @tokenizer.next_xref_entry {|recoverable| refute(recoverable); raise } }
214
214
  create_tokenizer("0000000001 00001 n\n")
215
- assert_raises(RuntimeError) { @tokenizer.next_xref_entry { raise } }
215
+ assert_raises(RuntimeError) { @tokenizer.next_xref_entry {|recoverable| assert(recoverable); raise } }
216
+ create_tokenizer("0000000001 00001 n\r")
217
+ assert_raises(RuntimeError) { @tokenizer.next_xref_entry {|recoverable| assert(recoverable); raise } }
218
+ create_tokenizer("0000000001 00001 n\r\r")
219
+ assert_raises(RuntimeError) { @tokenizer.next_xref_entry {|recoverable| assert(recoverable); raise } }
216
220
  end
217
221
  end
@@ -296,11 +296,11 @@ describe HexaPDF::Encryption::StandardSecurityHandler do
296
296
  describe "handling of metadata streams" do
297
297
  before do
298
298
  @doc = HexaPDF::Document.new
299
- @doc.encrypt(encrypt_metadata: false)
300
299
  @output = StringIO.new(''.b)
301
300
  end
302
301
 
303
- it "doesn't decrypt or encrypt the document level metadata stream if /EncryptMetadata is false" do
302
+ it "doesn't decrypt or encrypt a metadata stream if /EncryptMetadata is false" do
303
+ @doc.encrypt(encrypt_metadata: false)
304
304
  @doc.catalog[:Metadata] = @doc.wrap({Type: :Metadata, Subtype: :XML}, stream: "HELLODATA")
305
305
  @doc.write(@output)
306
306
  assert_match(/stream\nHELLODATA\nendstream/, @output.string)
@@ -309,13 +309,14 @@ describe HexaPDF::Encryption::StandardSecurityHandler do
309
309
  assert_equal('HELLODATA', doc.catalog[:Metadata].stream)
310
310
  end
311
311
 
312
- it "doesn't modify decryption/encryption for arbitrary metadata streams" do
313
- @doc.catalog[:Anything] = @doc.wrap({Type: :Metadata, Subtype: :XML}, stream: "HELLODATA")
312
+ it "doesn't modify decryption/encryption for metadata streams if /V is not 4 or 5" do
313
+ @doc.encrypt(encrypt_metadata: false, algorithm: :arc4)
314
+ @doc.catalog[:Metadata] = @doc.wrap({Type: :Metadata, Subtype: :XML}, stream: "HELLODATA")
314
315
  @doc.write(@output)
315
316
  refute_match(/stream\nHELLODATA\nendstream/, @output.string)
316
317
 
317
318
  doc = HexaPDF::Document.new(io: @output)
318
- assert_equal('HELLODATA', doc.catalog[:Anything].stream)
319
+ assert_equal('HELLODATA', doc.catalog[:Metadata].stream)
319
320
  end
320
321
  end
321
322
  end
@@ -107,6 +107,12 @@ describe HexaPDF::Parser do
107
107
  assert_equal(749, object)
108
108
  end
109
109
 
110
+ it "treats indirect objects with invalid values as null objects" do
111
+ create_parser("1 0 obj <</test ( /other (end)>> endobj")
112
+ object, * = @parser.parse_indirect_object
113
+ assert_nil(object)
114
+ end
115
+
110
116
  it "recovers from an invalid stream length value" do
111
117
  create_parser("1 0 obj<</Length 4>> stream\n12endstream endobj")
112
118
  obj, _, _, stream = @parser.parse_indirect_object
@@ -185,7 +191,13 @@ describe HexaPDF::Parser do
185
191
  it "fails for numbers followed by endobj without space" do
186
192
  create_parser("1 0 obj 749endobj")
187
193
  exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
188
- assert_match(/Invalid object value after 'obj'/, exp.message)
194
+ assert_match(/Missing whitespace after number/, exp.message)
195
+ end
196
+
197
+ it "fails for invalid values" do
198
+ create_parser("1 0 obj <</test ( /other (end)>> endobj")
199
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
200
+ assert_match(/Invalid value after '1 0 obj'/, exp.message)
189
201
  end
190
202
 
191
203
  it "fails if the stream length value is invalid" do
@@ -40,7 +40,7 @@ describe HexaPDF::Writer do
40
40
  219
41
41
  %%EOF
42
42
  3 0 obj
43
- <</Producer(HexaPDF version 0.15.2)>>
43
+ <</Producer(HexaPDF version 0.15.6)>>
44
44
  endobj
45
45
  xref
46
46
  3 1
@@ -72,7 +72,7 @@ describe HexaPDF::Writer do
72
72
  141
73
73
  %%EOF
74
74
  6 0 obj
75
- <</Producer(HexaPDF version 0.15.2)>>
75
+ <</Producer(HexaPDF version 0.15.6)>>
76
76
  endobj
77
77
  2 0 obj
78
78
  <</Length 10>>stream
@@ -51,14 +51,18 @@ describe HexaPDF::Type::Annotation do
51
51
 
52
52
  stream = @doc.wrap({}, stream: '')
53
53
  @annot[:AP][:N] = stream
54
+ assert_nil(@annot.appearance)
55
+
56
+ stream[:BBox] = [1, 2, 3, 4]
54
57
  appearance = @annot.appearance
55
58
  assert_same(stream.data, appearance.data)
56
59
  assert_equal(:Form, appearance[:Subtype])
57
60
 
58
- @annot[:AP][:N] = {X: stream}
61
+ @annot[:AP][:N] = {X: {}}
59
62
  assert_nil(@annot.appearance)
60
63
 
61
64
  @annot[:AS] = :X
65
+ @annot[:AP][:N][:X] = stream
62
66
  assert_same(stream.data, @annot.appearance.data)
63
67
 
64
68
  @annot[:AP][:D] = {X: stream}
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hexapdf
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.15.2
4
+ version: 0.15.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Thomas Leitner
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-05-01 00:00:00.000000000 Z
11
+ date: 2021-07-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: cmdparse