hexapdf 0.15.2 → 0.15.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1a6ef1bdd17664ef0b9474b931d31ad69c57c77928ce2b69a3bbd5dadda0dce6
4
- data.tar.gz: 11b87436d19cc5498fd6a77f0b6c410e717809264153964668be0f5199b9354d
3
+ metadata.gz: bcffb10babdbf723a478ea40721d9515222adf8d12ba9bd799f7b95fa66bc408
4
+ data.tar.gz: d5929900ab1b010a39964edc366ea223ef8a2ee9bcd1e9a5873874b4d5a8ecc5
5
5
  SHA512:
6
- metadata.gz: 525a55832758b5eecd1a7f2daf5f220e1afa7ff8e88ca2d65998e658585f290ff2018450e50423f2331b7f195865eab8b1c62562ecdbf3671b46d4da770aed12
7
- data.tar.gz: 0b0e18c7f79f0e2a54080fefad1dd4d94e15157f72e5360a3ebd827fc0cc2037ae6e06302155426e7f0900e97ee0cee678e069bd8ef05a9333d684c50e1343a5
6
+ metadata.gz: d12bbd49204c28675d399477ce0249140bf6ead3fe9332541128802f0edc3ebb2b187752b464b9acd3b71ca4ce6cb5cba33caf19282bd5a15020cec3c6e20297
7
+ data.tar.gz: 8df3586c8069db615bf317f22b28069f4bfd1395d31c285bb5c4a63b1b46ec60b088082a5d461adafd590718df6307734361e1e61fe3210009d158fc95c558be
data/CHANGELOG.md CHANGED
@@ -1,3 +1,39 @@
1
+ ## 0.15.6 - 2021-07-16
2
+
3
+ ### Fixed
4
+
5
+ * Handling of indirect objects with invalid values which are now treated as null
6
+ objects
7
+
8
+
9
+ ## 0.15.5 - 2021-07-06
10
+
11
+ ### Changed
12
+
13
+ * Refactored [HexaPDF::Tokenizer#next_xref_entry] and changed yielded value
14
+
15
+
16
+ ### Fixed
17
+
18
+ * Handling of invalid cross-reference stream entries that ends with the sequence
19
+ `\r\r`
20
+
21
+
22
+ ## 0.15.4 - 2021-05-27
23
+
24
+ ### Fixed
25
+
26
+ * [HexaPDF::Type::Annotation#appearance] to handle cases where there is
27
+ no valid appearance stream
28
+
29
+
30
+ ## 0.15.3 - 2021-05-01
31
+
32
+ ### Fixed
33
+
34
+ * Handling of general (not document-level), unencrypted metadata streams
35
+
36
+
1
37
  ## 0.15.2 - 2021-05-01
2
38
 
3
39
  ### Fixed
@@ -50,7 +50,7 @@ module HexaPDF
50
50
  module Extensions #:nodoc:
51
51
  def help_banner #:nodoc:
52
52
  "hexapdf #{HexaPDF::VERSION} - Versatile PDF Manipulation Tool\n" \
53
- "Copyright (c) 2014-2017 Thomas Leitner; licensed under the AGPLv3\n\n" \
53
+ "Copyright (c) 2014-2021 Thomas Leitner; licensed under the AGPLv3\n\n" \
54
54
  "#{format(usage, indent: 7)}\n\n"
55
55
  end
56
56
  end
@@ -241,7 +241,7 @@ module HexaPDF
241
241
  end
242
242
 
243
243
  def decrypt(obj) #:nodoc:
244
- if obj.type == :Metadata && obj == document.catalog.value[:Metadata] && !dict[:EncryptMetadata]
244
+ if dict[:V] >= 4 && obj.type == :Metadata && obj[:Subtype] == :XML && !dict[:EncryptMetadata]
245
245
  obj
246
246
  else
247
247
  super
@@ -249,7 +249,11 @@ module HexaPDF
249
249
  end
250
250
 
251
251
  def encrypt_stream(obj) #:nodoc
252
- obj == document.catalog.value[:Metadata] && !dict[:EncryptMetadata] ? obj.stream_encoder : super
252
+ if dict[:V] >= 4 && obj.type == :Metadata && obj[:Subtype] == :XML && !dict[:EncryptMetadata]
253
+ obj.stream_encoder
254
+ else
255
+ super
256
+ end
253
257
  end
254
258
 
255
259
  private
@@ -125,11 +125,14 @@ module HexaPDF
125
125
  begin
126
126
  object = @tokenizer.next_object
127
127
  rescue MalformedPDFError
128
- # Handle often found invalid indirect object with missing whitespace after number
129
- maybe_raise("Invalid object value after 'obj'", pos: @tokenizer.pos,
130
- force: !(tok.kind_of?(Tokenizer::Token) && tok =~ /\A\d+endobj\z/))
131
- object = tok.to_i
132
- @tokenizer.pos -= 6
128
+ if tok.kind_of?(Tokenizer::Token) && tok =~ /\A\d+endobj\z/
129
+ # Handle often found invalid indirect object with missing whitespace after number
130
+ maybe_raise("Missing whitespace after number'", pos: @tokenizer.pos)
131
+ object = tok.to_i
132
+ @tokenizer.pos -= 6
133
+ else
134
+ maybe_raise("Invalid value after '#{oid} #{gen} obj', treating as null", pos: @tokenizer.pos)
135
+ end
133
136
  end
134
137
  end
135
138
 
@@ -263,9 +266,9 @@ module HexaPDF
263
266
 
264
267
  @tokenizer.skip_whitespace
265
268
  start.upto(start + number_of_entries - 1) do |oid|
266
- pos, gen, type = @tokenizer.next_xref_entry do |matched_size|
269
+ pos, gen, type = @tokenizer.next_xref_entry do |recoverable|
267
270
  maybe_raise("Invalid cross-reference entry", pos: @tokenizer.pos,
268
- force: !matched_size)
271
+ force: !recoverable)
269
272
  end
270
273
  if xref.entry?(oid)
271
274
  next
@@ -225,13 +225,14 @@ module HexaPDF
225
225
  # Reads the cross-reference subsection entry at the current position and advances the scan
226
226
  # pointer.
227
227
  #
228
- # If a possible problem is detected, yields to caller.
228
+ # If a problem is detected, yields to caller where the argument +recoverable+ is truthy if the
229
+ # problem is recoverable.
229
230
  #
230
231
  # See: PDF1.7 7.5.4
231
- def next_xref_entry #:yield: matched_size
232
+ def next_xref_entry #:yield: recoverable
232
233
  prepare_string_scanner(20)
233
- unless @ss.skip(/(\d{10}) (\d{5}) ([nf])(?: \r| \n|\r\n|\r|\n)/) && @ss.matched_size == 20
234
- yield(@ss.matched_size)
234
+ if !@ss.skip(/(\d{10}) (\d{5}) ([nf])(?: \r| \n|\r\n|(\r\r|\r|\n))/) || @ss[4]
235
+ yield(@ss[4])
235
236
  end
236
237
  [@ss[1].to_i, @ss[2].to_i, @ss[3]]
237
238
  end
@@ -138,8 +138,13 @@ module HexaPDF
138
138
  if entry.kind_of?(HexaPDF::Dictionary) && !entry.kind_of?(HexaPDF::Stream)
139
139
  entry = entry[self[:AS]]
140
140
  end
141
- if entry.kind_of?(HexaPDF::Stream)
142
- entry[:Subtype] == :Form ? entry : document.wrap(entry, type: :XObject, subtype: :Form)
141
+ return unless entry.kind_of?(HexaPDF::Stream)
142
+
143
+ if entry.type == :XObject && entry[:Subtype] == :Form
144
+ entry
145
+ elsif (entry[:Type].nil? || entry[:Type] == :XObject) &&
146
+ (entry[:Subtype].nil? || entry[:Subtype] == :Form) && entry[:BBox]
147
+ document.wrap(entry, type: :XObject, subtype: :Form)
143
148
  end
144
149
  end
145
150
  alias appearance? appearance
@@ -37,6 +37,6 @@
37
37
  module HexaPDF
38
38
 
39
39
  # The version of HexaPDF.
40
- VERSION = '0.15.2'
40
+ VERSION = '0.15.6'
41
41
 
42
42
  end
@@ -210,8 +210,12 @@ module CommonTokenizerTests
210
210
 
211
211
  it "next_xref_entry: fails on invalidly formatted entries" do
212
212
  create_tokenizer("0000000001 00001 g \n")
213
- assert_raises(RuntimeError) { @tokenizer.next_xref_entry { raise } }
213
+ assert_raises(RuntimeError) { @tokenizer.next_xref_entry {|recoverable| refute(recoverable); raise } }
214
214
  create_tokenizer("0000000001 00001 n\n")
215
- assert_raises(RuntimeError) { @tokenizer.next_xref_entry { raise } }
215
+ assert_raises(RuntimeError) { @tokenizer.next_xref_entry {|recoverable| assert(recoverable); raise } }
216
+ create_tokenizer("0000000001 00001 n\r")
217
+ assert_raises(RuntimeError) { @tokenizer.next_xref_entry {|recoverable| assert(recoverable); raise } }
218
+ create_tokenizer("0000000001 00001 n\r\r")
219
+ assert_raises(RuntimeError) { @tokenizer.next_xref_entry {|recoverable| assert(recoverable); raise } }
216
220
  end
217
221
  end
@@ -296,11 +296,11 @@ describe HexaPDF::Encryption::StandardSecurityHandler do
296
296
  describe "handling of metadata streams" do
297
297
  before do
298
298
  @doc = HexaPDF::Document.new
299
- @doc.encrypt(encrypt_metadata: false)
300
299
  @output = StringIO.new(''.b)
301
300
  end
302
301
 
303
- it "doesn't decrypt or encrypt the document level metadata stream if /EncryptMetadata is false" do
302
+ it "doesn't decrypt or encrypt a metadata stream if /EncryptMetadata is false" do
303
+ @doc.encrypt(encrypt_metadata: false)
304
304
  @doc.catalog[:Metadata] = @doc.wrap({Type: :Metadata, Subtype: :XML}, stream: "HELLODATA")
305
305
  @doc.write(@output)
306
306
  assert_match(/stream\nHELLODATA\nendstream/, @output.string)
@@ -309,13 +309,14 @@ describe HexaPDF::Encryption::StandardSecurityHandler do
309
309
  assert_equal('HELLODATA', doc.catalog[:Metadata].stream)
310
310
  end
311
311
 
312
- it "doesn't modify decryption/encryption for arbitrary metadata streams" do
313
- @doc.catalog[:Anything] = @doc.wrap({Type: :Metadata, Subtype: :XML}, stream: "HELLODATA")
312
+ it "doesn't modify decryption/encryption for metadata streams if /V is not 4 or 5" do
313
+ @doc.encrypt(encrypt_metadata: false, algorithm: :arc4)
314
+ @doc.catalog[:Metadata] = @doc.wrap({Type: :Metadata, Subtype: :XML}, stream: "HELLODATA")
314
315
  @doc.write(@output)
315
316
  refute_match(/stream\nHELLODATA\nendstream/, @output.string)
316
317
 
317
318
  doc = HexaPDF::Document.new(io: @output)
318
- assert_equal('HELLODATA', doc.catalog[:Anything].stream)
319
+ assert_equal('HELLODATA', doc.catalog[:Metadata].stream)
319
320
  end
320
321
  end
321
322
  end
@@ -107,6 +107,12 @@ describe HexaPDF::Parser do
107
107
  assert_equal(749, object)
108
108
  end
109
109
 
110
+ it "treats indirect objects with invalid values as null objects" do
111
+ create_parser("1 0 obj <</test ( /other (end)>> endobj")
112
+ object, * = @parser.parse_indirect_object
113
+ assert_nil(object)
114
+ end
115
+
110
116
  it "recovers from an invalid stream length value" do
111
117
  create_parser("1 0 obj<</Length 4>> stream\n12endstream endobj")
112
118
  obj, _, _, stream = @parser.parse_indirect_object
@@ -185,7 +191,13 @@ describe HexaPDF::Parser do
185
191
  it "fails for numbers followed by endobj without space" do
186
192
  create_parser("1 0 obj 749endobj")
187
193
  exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
188
- assert_match(/Invalid object value after 'obj'/, exp.message)
194
+ assert_match(/Missing whitespace after number/, exp.message)
195
+ end
196
+
197
+ it "fails for invalid values" do
198
+ create_parser("1 0 obj <</test ( /other (end)>> endobj")
199
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
200
+ assert_match(/Invalid value after '1 0 obj'/, exp.message)
189
201
  end
190
202
 
191
203
  it "fails if the stream length value is invalid" do
@@ -40,7 +40,7 @@ describe HexaPDF::Writer do
40
40
  219
41
41
  %%EOF
42
42
  3 0 obj
43
- <</Producer(HexaPDF version 0.15.2)>>
43
+ <</Producer(HexaPDF version 0.15.6)>>
44
44
  endobj
45
45
  xref
46
46
  3 1
@@ -72,7 +72,7 @@ describe HexaPDF::Writer do
72
72
  141
73
73
  %%EOF
74
74
  6 0 obj
75
- <</Producer(HexaPDF version 0.15.2)>>
75
+ <</Producer(HexaPDF version 0.15.6)>>
76
76
  endobj
77
77
  2 0 obj
78
78
  <</Length 10>>stream
@@ -51,14 +51,18 @@ describe HexaPDF::Type::Annotation do
51
51
 
52
52
  stream = @doc.wrap({}, stream: '')
53
53
  @annot[:AP][:N] = stream
54
+ assert_nil(@annot.appearance)
55
+
56
+ stream[:BBox] = [1, 2, 3, 4]
54
57
  appearance = @annot.appearance
55
58
  assert_same(stream.data, appearance.data)
56
59
  assert_equal(:Form, appearance[:Subtype])
57
60
 
58
- @annot[:AP][:N] = {X: stream}
61
+ @annot[:AP][:N] = {X: {}}
59
62
  assert_nil(@annot.appearance)
60
63
 
61
64
  @annot[:AS] = :X
65
+ @annot[:AP][:N][:X] = stream
62
66
  assert_same(stream.data, @annot.appearance.data)
63
67
 
64
68
  @annot[:AP][:D] = {X: stream}
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hexapdf
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.15.2
4
+ version: 0.15.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Thomas Leitner
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-05-01 00:00:00.000000000 Z
11
+ date: 2021-07-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: cmdparse