hexapdf 1.0.0 → 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2889ba1d03e2c351efd694b1583063023fff97c0da636ff5103f88538255735c
4
- data.tar.gz: 6fb4727db05900e8fccba2ad4e093d1092e17305e5b5616ded97a76cf835673c
3
+ metadata.gz: 61b0fb56c6522f2af82eb8ffb10570c45bb11460cf4c048c1bdfe8d9daf71afe
4
+ data.tar.gz: 91cb053019c367825ac0799a84e4ddad837fe283a6ab2bc6df16ee9ed9f2456d
5
5
  SHA512:
6
- metadata.gz: 00be8ed2c306a88e5bfc0eada97a7e6bf802ec269e832bb21b3521d4077b18ecad11946ddc6f8a6d575820e66339059e59ba2c4cdd2b74d6c7d6defd0f2f5256
7
- data.tar.gz: 94c6a8178ead2a986921b72b07ef5dc388a5fa6a67945573eec921db30c9940d241d3f47591c8fbbf9bdcf313df0dda536f0fc78e0e946e27dfa3bc13dad9a28
6
+ metadata.gz: 9a71ee1e9307f0ef67c9dec108c7f68db45166a62f9b6ec60915ce2c089cf0e9ec5bfcd8d74e8b31b63238a09c820a0798689a84e5ea0b1577e2492e5a1d425e
7
+ data.tar.gz: b20043cead03f7fc7fe527fdbcb3674ab2d1da06b546bac9c1549b6eb6d143232453132709d93ae008d78a83bff36cf85fd0dbc0938da848e7847a1830e6011e
data/CHANGELOG.md CHANGED
@@ -1,3 +1,18 @@
1
+ ## 1.0.1 - 2024-11-04
2
+
3
+ ### Changed
4
+
5
+ * Informational output on errors when running CLI commands to provide more
6
+ details
7
+
8
+ ### Fixed
9
+
10
+ * Parsing of indirect objects the value of which is an indirect reference
11
+ * Writing of the initial cross-reference section to ensure a single subsection
12
+ * [HexaPDF::Utils::SortedTreeNode] to wrap all /Kids entries with the correct
13
+ type class
14
+
15
+
1
16
  ## 1.0.0 - 2024-10-26
2
17
 
3
18
  ### Added
data/lib/hexapdf/cli.rb CHANGED
@@ -64,8 +64,21 @@ module HexaPDF
64
64
  rescue StandardError => e
65
65
  $stderr.puts "Problem encountered: #{e.message}"
66
66
  unless e.kind_of?(HexaPDF::Error)
67
+ $stderr.puts "Backtrace (last 10 lines):"
68
+ $stderr.puts e.backtrace[0, 10]
69
+ $stderr.puts
67
70
  $stderr.puts "--> The problem might indicate a faulty PDF or a bug in HexaPDF."
68
- $stderr.puts "--> Please report this at https://github.com/gettalong/hexapdf/issues - thanks!"
71
+ $stderr.puts "--> Please report this at"
72
+ $stderr.puts "-->"
73
+ $stderr.puts "--> https://github.com/gettalong/hexapdf/issues"
74
+ $stderr.puts "-->"
75
+ $stderr.puts "--> and include the information above as well as the output of running"
76
+ $stderr.puts "--> the following command on the input PDF:"
77
+ $stderr.puts "-->"
78
+ $stderr.puts "--> hexapdf info --check INPUT.PDF"
79
+ $stderr.puts "-->"
80
+ $stderr.puts "--> If possible, please also provide the input PDF."
81
+ $stderr.puts "--> Thanks!"
69
82
  end
70
83
  exit(1)
71
84
  end
@@ -116,7 +116,11 @@ module HexaPDF
116
116
  "the values (#{xref_entry.oid},#{xref_entry.gen}) from the xref")
117
117
  end
118
118
 
119
- @document.wrap(obj, oid: oid, gen: gen, stream: stream)
119
+ if obj.kind_of?(Reference)
120
+ @document.deref(obj)
121
+ else
122
+ @document.wrap(obj, oid: oid, gen: gen, stream: stream)
123
+ end
120
124
  rescue HexaPDF::MalformedPDFError
121
125
  reconstructed_revision.object(xref_entry) ||
122
126
  @document.wrap(nil, oid: xref_entry.oid, gen: xref_entry.gen)
@@ -174,6 +174,7 @@ module HexaPDF
174
174
  elsif node.key?(:Kids)
175
175
  index = find_in_intermediate_node(node[:Kids], key)
176
176
  node = node[:Kids][index]
177
+ node = document.wrap(node, type: self.class) if node
177
178
  break unless node && key >= node[:Limits][0] && key <= node[:Limits][1]
178
179
  else
179
180
  break
@@ -194,7 +195,7 @@ module HexaPDF
194
195
  container_name = leaf_node_container_name
195
196
  stack = [self]
196
197
  until stack.empty?
197
- node = stack.pop
198
+ node = document.wrap(stack.pop, type: self.class)
198
199
  if node.key?(container_name)
199
200
  data = node[container_name]
200
201
  index = 0
@@ -217,7 +218,7 @@ module HexaPDF
217
218
  def path_to_key(node, key, stack)
218
219
  return unless node.key?(:Kids)
219
220
  index = find_in_intermediate_node(node[:Kids], key)
220
- stack << node[:Kids][index]
221
+ stack << document.wrap(node[:Kids][index], type: self.class)
221
222
  path_to_key(stack.last, key, stack)
222
223
  end
223
224
 
@@ -307,6 +308,15 @@ module HexaPDF
307
308
  super
308
309
  container_name = leaf_node_container_name
309
310
 
311
+ if key?(:Kids)
312
+ self[:Kids].each do |kid|
313
+ unless kid.indirect?
314
+ yield("Children of sorted tree nodes must be indirect", true)
315
+ document.add(kid)
316
+ end
317
+ end
318
+ end
319
+
310
320
  # All keys of the container must be lexically ordered strings and the container must be
311
321
  # correctly formatted
312
322
  if key?(container_name)
@@ -37,6 +37,6 @@
37
37
  module HexaPDF
38
38
 
39
39
  # The version of HexaPDF.
40
- VERSION = '1.0.0'
40
+ VERSION = '1.0.1'
41
41
 
42
42
  end
@@ -149,6 +149,7 @@ module HexaPDF
149
149
  obj_to_stm = object_streams.each_with_object({}) {|stm, m| m.update(stm.write_objects(rev)) }
150
150
 
151
151
  xref_section = XRefSection.new
152
+ xref_section.mark_as_initial_section! unless previous_xref_pos
152
153
  xref_section.add_free_entry(0, 65535) if previous_xref_pos.nil?
153
154
  rev.each do |obj|
154
155
  if obj.null?
@@ -111,6 +111,13 @@ module HexaPDF
111
111
  # used.
112
112
  private :'[]='
113
113
 
114
+ # Marks this XRefSection object as being the first cross-reference section in a PDF file.
115
+ #
116
+ # This has the consequence that only a single sub-section is created.
117
+ def mark_as_initial_section!
118
+ @initial_section = true
119
+ end
120
+
114
121
  # Adds an in-use entry to the cross-reference section.
115
122
  #
116
123
  # See: ::in_use_entry
@@ -147,15 +154,24 @@ module HexaPDF
147
154
  # If this section contains no objects, a single empty array is yielded (corresponding to a
148
155
  # subsection with zero elements).
149
156
  #
150
- # The subsections are dynamically generated based on the object numbers in this section.
157
+ # The subsections are dynamically generated based on the object numbers in this section. In case
158
+ # the section was marked as the initial section (see #mark_as_initial_section!) only a single
159
+ # subsection is yielded.
151
160
  def each_subsection
152
161
  return to_enum(__method__) unless block_given?
153
162
 
154
163
  temp = []
155
164
  oids.sort.each do |oid|
156
- if !temp.empty? && temp[-1].oid + 1 != oid
157
- yield(temp)
158
- temp = []
165
+ expected_next_oid = !temp.empty? && temp[-1].oid + 1
166
+ if expected_next_oid && expected_next_oid != oid
167
+ if @initial_section
168
+ expected_next_oid.upto(oid - 1) do |free_oid|
169
+ temp << self.class.free_entry(free_oid, 0)
170
+ end
171
+ else
172
+ yield(temp)
173
+ temp = []
174
+ end
159
175
  end
160
176
  temp << self[oid]
161
177
  end
@@ -33,18 +33,23 @@ describe HexaPDF::Parser do
33
33
  endstream
34
34
  endobj
35
35
 
36
+ 5 0 obj
37
+ 1 0 R
38
+ endobj
39
+
36
40
  xref
37
41
  0 4
38
42
  0000000000 65535 f
39
43
  0000000010 00000 n
40
44
  0000000029 00000 n
41
45
  0000000000 65535 f
42
- 3 1
46
+ 3 2
43
47
  0000000556 00000 n
48
+ 0000000308 00000 n
44
49
  trailer
45
50
  << /Test (now) >>
46
51
  startxref
47
- 308
52
+ 330
48
53
  %%EOF
49
54
  EOF
50
55
  end
@@ -305,6 +310,11 @@ describe HexaPDF::Parser do
305
310
  assert_equal(0, obj.gen)
306
311
  end
307
312
 
313
+ it "handles the case of the value of an indirect object being an indirect reference" do
314
+ obj = @parser.load_object(HexaPDF::XRefSection.in_use_entry(5, 0, 308))
315
+ assert_equal(1, obj.oid)
316
+ end
317
+
308
318
  describe "with strict parsing" do
309
319
  it "raises an error if an indirect object has an offset of 0" do
310
320
  @document.config['parser.on_correctable_error'] = proc { true }
@@ -343,13 +353,13 @@ describe HexaPDF::Parser do
343
353
 
344
354
  describe "startxref_offset" do
345
355
  it "caches the offset value" do
346
- assert_equal(308, @parser.startxref_offset)
347
- @parser.instance_eval { @io }.string.sub!(/308\n/, "309\n")
348
- assert_equal(308, @parser.startxref_offset)
356
+ assert_equal(330, @parser.startxref_offset)
357
+ @parser.instance_eval { @io }.string.sub!(/330\n/, "309\n")
358
+ assert_equal(330, @parser.startxref_offset)
349
359
  end
350
360
 
351
361
  it "returns the correct offset" do
352
- assert_equal(308, @parser.startxref_offset)
362
+ assert_equal(330, @parser.startxref_offset)
353
363
  end
354
364
 
355
365
  it "ignores garbage at the end of the file" do
@@ -53,8 +53,8 @@ describe HexaPDF::Writer do
53
53
  EOF
54
54
 
55
55
  xref_stream = case HexaPDF::VERSION.length
56
- when 5 then "x\xDAcbdlc``b`\xB0\x04\x93\x93\x19\x18\x00\f\x0F\x01["
57
- when 6 then "x\xDAcbdlg``b`\xB0\x04\x93\x93\x18\x18\x00\f\e\x01["
56
+ when 5 then "x\xDAcbdlg``b`\xB0\x04\x93\x93\x19\x18\x00\f\x1E\x01\\"
57
+ when 6 then "x\xDAcbd\xEC```b`\xB0\x04\x93\x93\x18\x18\x00\f*\x01\\"
58
58
  else fail
59
59
  end
60
60
  @compressed_input_io = StringIO.new(<<~EOF.force_encoding(Encoding::BINARY))
@@ -69,8 +69,8 @@ describe HexaPDF::Writer do
69
69
  20
70
70
  endobj
71
71
  3 0 obj
72
- <</Size 6/Type/XRef/W[1 1 2]/Index[0 4 5 1]/Filter/FlateDecode/DecodeParms<</Columns 4/Predictor 12>>/Length 31>>stream
73
- x\xDAcb`\xF8\xFF\x9F\x89\x89\x95\x91\x91\xE9\x7F\x19\x03\x03\x13\x83\x10\x88he`\x00\x00B4\x04\x1E
72
+ <</Size 6/Type/XRef/W[1 1 2]/Index[0 6]/Filter/FlateDecode/DecodeParms<</Columns 4/Predictor 12>>/Length 36>>stream
73
+ x\xDAcb`\xF8\xFF\x9F\x89\x89\x95\x91\x91\xE9\x7F\x19\x03\x03\x13\x83\x10\x90\xF8_\f\x14c\x14bd\x04\x00lk\a
74
74
  endstream
75
75
  endobj
76
76
  startxref
@@ -90,7 +90,7 @@ describe HexaPDF::Writer do
90
90
  endstream
91
91
  endobj
92
92
  startxref
93
- #{442 + HexaPDF::VERSION.length}
93
+ #{443 + HexaPDF::VERSION.length}
94
94
  %%EOF
95
95
  EOF
96
96
  end
@@ -57,5 +57,20 @@ describe HexaPDF::XRefSection do
57
57
  @xref_section.add_in_use_entry(20, 0, 0)
58
58
  assert_subsections([[1, 2], [10, 11], [20]])
59
59
  end
60
+
61
+ it "yields a single subsection if the section was marked as the initial one" do
62
+ @xref_section.mark_as_initial_section!
63
+ @xref_section.add_in_use_entry(6, 0, 0)
64
+ @xref_section.add_in_use_entry(7, 0, 0)
65
+ @xref_section.add_in_use_entry(9, 0, 0)
66
+ @xref_section.add_in_use_entry(1, 0, 0)
67
+ @xref_section.add_in_use_entry(2, 0, 0)
68
+ result = @xref_section.each_subsection.map {|s| s.map {|e| [e.oid, e.type] }}
69
+ assert_equal([[[1, :in_use], [2, :in_use],
70
+ [3, :free], [4, :free], [5, :free],
71
+ [6, :in_use], [7, :in_use],
72
+ [8, :free],
73
+ [9, :in_use]]], result)
74
+ end
60
75
  end
61
76
  end
@@ -12,10 +12,12 @@ describe HexaPDF::Utils::SortedTreeNode do
12
12
  end
13
13
 
14
14
  def add_multilevel_entries
15
- @kid11 = @doc.add({Limits: ['c', 'f'], Names: ['c', 1, 'f', 1]}, type: HexaPDF::NameTreeNode)
15
+ item = @doc.add(1)
16
+ @item_ref = HexaPDF::Reference.new(item.oid, item.gen)
17
+ @kid11 = @doc.add({Limits: ['c', 'f'], Names: ['c', @item_ref, 'f', 1]}, type: HexaPDF::NameTreeNode)
16
18
  @kid12 = @doc.add({Limits: ['i', 'm'], Names: ['i', 1, 'm', 1]}, type: HexaPDF::NameTreeNode)
17
19
  ref = HexaPDF::Reference.new(@kid11.oid, @kid11.gen)
18
- @kid1 = @doc.add({Limits: ['c', 'm'], Kids: [ref, @kid12]}, type: HexaPDF::NameTreeNode)
20
+ @kid1 = @doc.add({Limits: ['c', 'm'], Kids: [ref, @kid12]})
19
21
  @kid21 = @doc.add({Limits: ['o', 'q'], Names: ['o', 1, 'q', 1]}, type: HexaPDF::NameTreeNode)
20
22
  @kid221 = @doc.add({Limits: ['s', 'u'], Names: ['s', 1, 'u', 1]}, type: HexaPDF::NameTreeNode)
21
23
  @kid22 = @doc.add({Limits: ['s', 'u'], Kids: [@kid221]}, type: HexaPDF::NameTreeNode)
@@ -75,7 +77,7 @@ describe HexaPDF::Utils::SortedTreeNode do
75
77
  @root.add_entry('v', 1)
76
78
  assert_equal(['a', 'm'], @kid1[:Limits].value)
77
79
  assert_equal(['a', 'f'], @kid11[:Limits].value)
78
- assert_equal(['a', 1, 'c', 1, 'e', 1, 'f', 1], @kid11[:Names].value)
80
+ assert_equal(['a', 1, 'c', @item_ref, 'e', 1, 'f', 1], @kid11[:Names].value)
79
81
  assert_equal(['g', 'm'], @kid12[:Limits].value)
80
82
  assert_equal(['g', 1, 'i', 1, 'j', 1, 'm', 1], @kid12[:Names].value)
81
83
  assert_equal(['n', 'v'], @kid2[:Limits].value)
@@ -203,13 +205,12 @@ describe HexaPDF::Utils::SortedTreeNode do
203
205
  end
204
206
 
205
207
  it "checks that all kid objects are indirect objects" do
206
- @root[:Kids][0] = ref = HexaPDF::Reference.new(@kid1.oid, @kid1.gen)
207
208
  assert(@root.validate)
208
209
 
209
- @root[:Kids][0] = ref
210
+ @root[:Kids][0] = @kid1
210
211
  @kid1.oid = 0
211
212
  assert(@root.validate do |message, c|
212
- assert_match(/must be an indirect object/, message)
213
+ assert_match(/children.*must be indirect/i, message)
213
214
  assert(c)
214
215
  end)
215
216
  assert(@kid1.indirect?)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hexapdf
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Thomas Leitner
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-10-26 00:00:00.000000000 Z
11
+ date: 2024-11-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: cmdparse