hexapdf 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2889ba1d03e2c351efd694b1583063023fff97c0da636ff5103f88538255735c
4
- data.tar.gz: 6fb4727db05900e8fccba2ad4e093d1092e17305e5b5616ded97a76cf835673c
3
+ metadata.gz: 61b0fb56c6522f2af82eb8ffb10570c45bb11460cf4c048c1bdfe8d9daf71afe
4
+ data.tar.gz: 91cb053019c367825ac0799a84e4ddad837fe283a6ab2bc6df16ee9ed9f2456d
5
5
  SHA512:
6
- metadata.gz: 00be8ed2c306a88e5bfc0eada97a7e6bf802ec269e832bb21b3521d4077b18ecad11946ddc6f8a6d575820e66339059e59ba2c4cdd2b74d6c7d6defd0f2f5256
7
- data.tar.gz: 94c6a8178ead2a986921b72b07ef5dc388a5fa6a67945573eec921db30c9940d241d3f47591c8fbbf9bdcf313df0dda536f0fc78e0e946e27dfa3bc13dad9a28
6
+ metadata.gz: 9a71ee1e9307f0ef67c9dec108c7f68db45166a62f9b6ec60915ce2c089cf0e9ec5bfcd8d74e8b31b63238a09c820a0798689a84e5ea0b1577e2492e5a1d425e
7
+ data.tar.gz: b20043cead03f7fc7fe527fdbcb3674ab2d1da06b546bac9c1549b6eb6d143232453132709d93ae008d78a83bff36cf85fd0dbc0938da848e7847a1830e6011e
data/CHANGELOG.md CHANGED
@@ -1,3 +1,18 @@
1
+ ## 1.0.1 - 2024-11-04
2
+
3
+ ### Changed
4
+
5
+ * Informational output on errors when running CLI commands to provide more
6
+ details
7
+
8
+ ### Fixed
9
+
10
+ * Parsing of indirect objects the value of which is an indirect reference
11
+ * Writing of the initial cross-reference section to ensure a single subsection
12
+ * [HexaPDF::Utils::SortedTreeNode] to wrap all /Kids entries with the correct
13
+ type class
14
+
15
+
1
16
  ## 1.0.0 - 2024-10-26
2
17
 
3
18
  ### Added
data/lib/hexapdf/cli.rb CHANGED
@@ -64,8 +64,21 @@ module HexaPDF
64
64
  rescue StandardError => e
65
65
  $stderr.puts "Problem encountered: #{e.message}"
66
66
  unless e.kind_of?(HexaPDF::Error)
67
+ $stderr.puts "Backtrace (last 10 lines):"
68
+ $stderr.puts e.backtrace[0, 10]
69
+ $stderr.puts
67
70
  $stderr.puts "--> The problem might indicate a faulty PDF or a bug in HexaPDF."
68
- $stderr.puts "--> Please report this at https://github.com/gettalong/hexapdf/issues - thanks!"
71
+ $stderr.puts "--> Please report this at"
72
+ $stderr.puts "-->"
73
+ $stderr.puts "--> https://github.com/gettalong/hexapdf/issues"
74
+ $stderr.puts "-->"
75
+ $stderr.puts "--> and include the information above as well as the output of running"
76
+ $stderr.puts "--> the following command on the input PDF:"
77
+ $stderr.puts "-->"
78
+ $stderr.puts "--> hexapdf info --check INPUT.PDF"
79
+ $stderr.puts "-->"
80
+ $stderr.puts "--> If possible, please also provide the input PDF."
81
+ $stderr.puts "--> Thanks!"
69
82
  end
70
83
  exit(1)
71
84
  end
@@ -116,7 +116,11 @@ module HexaPDF
116
116
  "the values (#{xref_entry.oid},#{xref_entry.gen}) from the xref")
117
117
  end
118
118
 
119
- @document.wrap(obj, oid: oid, gen: gen, stream: stream)
119
+ if obj.kind_of?(Reference)
120
+ @document.deref(obj)
121
+ else
122
+ @document.wrap(obj, oid: oid, gen: gen, stream: stream)
123
+ end
120
124
  rescue HexaPDF::MalformedPDFError
121
125
  reconstructed_revision.object(xref_entry) ||
122
126
  @document.wrap(nil, oid: xref_entry.oid, gen: xref_entry.gen)
@@ -174,6 +174,7 @@ module HexaPDF
174
174
  elsif node.key?(:Kids)
175
175
  index = find_in_intermediate_node(node[:Kids], key)
176
176
  node = node[:Kids][index]
177
+ node = document.wrap(node, type: self.class) if node
177
178
  break unless node && key >= node[:Limits][0] && key <= node[:Limits][1]
178
179
  else
179
180
  break
@@ -194,7 +195,7 @@ module HexaPDF
194
195
  container_name = leaf_node_container_name
195
196
  stack = [self]
196
197
  until stack.empty?
197
- node = stack.pop
198
+ node = document.wrap(stack.pop, type: self.class)
198
199
  if node.key?(container_name)
199
200
  data = node[container_name]
200
201
  index = 0
@@ -217,7 +218,7 @@ module HexaPDF
217
218
  def path_to_key(node, key, stack)
218
219
  return unless node.key?(:Kids)
219
220
  index = find_in_intermediate_node(node[:Kids], key)
220
- stack << node[:Kids][index]
221
+ stack << document.wrap(node[:Kids][index], type: self.class)
221
222
  path_to_key(stack.last, key, stack)
222
223
  end
223
224
 
@@ -307,6 +308,15 @@ module HexaPDF
307
308
  super
308
309
  container_name = leaf_node_container_name
309
310
 
311
+ if key?(:Kids)
312
+ self[:Kids].each do |kid|
313
+ unless kid.indirect?
314
+ yield("Children of sorted tree nodes must be indirect", true)
315
+ document.add(kid)
316
+ end
317
+ end
318
+ end
319
+
310
320
  # All keys of the container must be lexically ordered strings and the container must be
311
321
  # correctly formatted
312
322
  if key?(container_name)
@@ -37,6 +37,6 @@
37
37
  module HexaPDF
38
38
 
39
39
  # The version of HexaPDF.
40
- VERSION = '1.0.0'
40
+ VERSION = '1.0.1'
41
41
 
42
42
  end
@@ -149,6 +149,7 @@ module HexaPDF
149
149
  obj_to_stm = object_streams.each_with_object({}) {|stm, m| m.update(stm.write_objects(rev)) }
150
150
 
151
151
  xref_section = XRefSection.new
152
+ xref_section.mark_as_initial_section! unless previous_xref_pos
152
153
  xref_section.add_free_entry(0, 65535) if previous_xref_pos.nil?
153
154
  rev.each do |obj|
154
155
  if obj.null?
@@ -111,6 +111,13 @@ module HexaPDF
111
111
  # used.
112
112
  private :'[]='
113
113
 
114
+ # Marks this XRefSection object as being the first cross-reference section in a PDF file.
115
+ #
116
+ # This has the consequence that only a single sub-section is created.
117
+ def mark_as_initial_section!
118
+ @initial_section = true
119
+ end
120
+
114
121
  # Adds an in-use entry to the cross-reference section.
115
122
  #
116
123
  # See: ::in_use_entry
@@ -147,15 +154,24 @@ module HexaPDF
147
154
  # If this section contains no objects, a single empty array is yielded (corresponding to a
148
155
  # subsection with zero elements).
149
156
  #
150
- # The subsections are dynamically generated based on the object numbers in this section.
157
+ # The subsections are dynamically generated based on the object numbers in this section. In case
158
+ # the section was marked as the initial section (see #mark_as_initial_section!) only a single
159
+ # subsection is yielded.
151
160
  def each_subsection
152
161
  return to_enum(__method__) unless block_given?
153
162
 
154
163
  temp = []
155
164
  oids.sort.each do |oid|
156
- if !temp.empty? && temp[-1].oid + 1 != oid
157
- yield(temp)
158
- temp = []
165
+ expected_next_oid = !temp.empty? && temp[-1].oid + 1
166
+ if expected_next_oid && expected_next_oid != oid
167
+ if @initial_section
168
+ expected_next_oid.upto(oid - 1) do |free_oid|
169
+ temp << self.class.free_entry(free_oid, 0)
170
+ end
171
+ else
172
+ yield(temp)
173
+ temp = []
174
+ end
159
175
  end
160
176
  temp << self[oid]
161
177
  end
@@ -33,18 +33,23 @@ describe HexaPDF::Parser do
33
33
  endstream
34
34
  endobj
35
35
 
36
+ 5 0 obj
37
+ 1 0 R
38
+ endobj
39
+
36
40
  xref
37
41
  0 4
38
42
  0000000000 65535 f
39
43
  0000000010 00000 n
40
44
  0000000029 00000 n
41
45
  0000000000 65535 f
42
- 3 1
46
+ 3 2
43
47
  0000000556 00000 n
48
+ 0000000308 00000 n
44
49
  trailer
45
50
  << /Test (now) >>
46
51
  startxref
47
- 308
52
+ 330
48
53
  %%EOF
49
54
  EOF
50
55
  end
@@ -305,6 +310,11 @@ describe HexaPDF::Parser do
305
310
  assert_equal(0, obj.gen)
306
311
  end
307
312
 
313
+ it "handles the case of the value of an indirect object being an indirect reference" do
314
+ obj = @parser.load_object(HexaPDF::XRefSection.in_use_entry(5, 0, 308))
315
+ assert_equal(1, obj.oid)
316
+ end
317
+
308
318
  describe "with strict parsing" do
309
319
  it "raises an error if an indirect object has an offset of 0" do
310
320
  @document.config['parser.on_correctable_error'] = proc { true }
@@ -343,13 +353,13 @@ describe HexaPDF::Parser do
343
353
 
344
354
  describe "startxref_offset" do
345
355
  it "caches the offset value" do
346
- assert_equal(308, @parser.startxref_offset)
347
- @parser.instance_eval { @io }.string.sub!(/308\n/, "309\n")
348
- assert_equal(308, @parser.startxref_offset)
356
+ assert_equal(330, @parser.startxref_offset)
357
+ @parser.instance_eval { @io }.string.sub!(/330\n/, "309\n")
358
+ assert_equal(330, @parser.startxref_offset)
349
359
  end
350
360
 
351
361
  it "returns the correct offset" do
352
- assert_equal(308, @parser.startxref_offset)
362
+ assert_equal(330, @parser.startxref_offset)
353
363
  end
354
364
 
355
365
  it "ignores garbage at the end of the file" do
@@ -53,8 +53,8 @@ describe HexaPDF::Writer do
53
53
  EOF
54
54
 
55
55
  xref_stream = case HexaPDF::VERSION.length
56
- when 5 then "x\xDAcbdlc``b`\xB0\x04\x93\x93\x19\x18\x00\f\x0F\x01["
57
- when 6 then "x\xDAcbdlg``b`\xB0\x04\x93\x93\x18\x18\x00\f\e\x01["
56
+ when 5 then "x\xDAcbdlg``b`\xB0\x04\x93\x93\x19\x18\x00\f\x1E\x01\\"
57
+ when 6 then "x\xDAcbd\xEC```b`\xB0\x04\x93\x93\x18\x18\x00\f*\x01\\"
58
58
  else fail
59
59
  end
60
60
  @compressed_input_io = StringIO.new(<<~EOF.force_encoding(Encoding::BINARY))
@@ -69,8 +69,8 @@ describe HexaPDF::Writer do
69
69
  20
70
70
  endobj
71
71
  3 0 obj
72
- <</Size 6/Type/XRef/W[1 1 2]/Index[0 4 5 1]/Filter/FlateDecode/DecodeParms<</Columns 4/Predictor 12>>/Length 31>>stream
73
- x\xDAcb`\xF8\xFF\x9F\x89\x89\x95\x91\x91\xE9\x7F\x19\x03\x03\x13\x83\x10\x88he`\x00\x00B4\x04\x1E
72
+ <</Size 6/Type/XRef/W[1 1 2]/Index[0 6]/Filter/FlateDecode/DecodeParms<</Columns 4/Predictor 12>>/Length 36>>stream
73
+ x\xDAcb`\xF8\xFF\x9F\x89\x89\x95\x91\x91\xE9\x7F\x19\x03\x03\x13\x83\x10\x90\xF8_\f\x14c\x14bd\x04\x00lk\a
74
74
  endstream
75
75
  endobj
76
76
  startxref
@@ -90,7 +90,7 @@ describe HexaPDF::Writer do
90
90
  endstream
91
91
  endobj
92
92
  startxref
93
- #{442 + HexaPDF::VERSION.length}
93
+ #{443 + HexaPDF::VERSION.length}
94
94
  %%EOF
95
95
  EOF
96
96
  end
@@ -57,5 +57,20 @@ describe HexaPDF::XRefSection do
57
57
  @xref_section.add_in_use_entry(20, 0, 0)
58
58
  assert_subsections([[1, 2], [10, 11], [20]])
59
59
  end
60
+
61
+ it "yields a single subsection if the section was marked as the initial one" do
62
+ @xref_section.mark_as_initial_section!
63
+ @xref_section.add_in_use_entry(6, 0, 0)
64
+ @xref_section.add_in_use_entry(7, 0, 0)
65
+ @xref_section.add_in_use_entry(9, 0, 0)
66
+ @xref_section.add_in_use_entry(1, 0, 0)
67
+ @xref_section.add_in_use_entry(2, 0, 0)
68
+ result = @xref_section.each_subsection.map {|s| s.map {|e| [e.oid, e.type] }}
69
+ assert_equal([[[1, :in_use], [2, :in_use],
70
+ [3, :free], [4, :free], [5, :free],
71
+ [6, :in_use], [7, :in_use],
72
+ [8, :free],
73
+ [9, :in_use]]], result)
74
+ end
60
75
  end
61
76
  end
@@ -12,10 +12,12 @@ describe HexaPDF::Utils::SortedTreeNode do
12
12
  end
13
13
 
14
14
  def add_multilevel_entries
15
- @kid11 = @doc.add({Limits: ['c', 'f'], Names: ['c', 1, 'f', 1]}, type: HexaPDF::NameTreeNode)
15
+ item = @doc.add(1)
16
+ @item_ref = HexaPDF::Reference.new(item.oid, item.gen)
17
+ @kid11 = @doc.add({Limits: ['c', 'f'], Names: ['c', @item_ref, 'f', 1]}, type: HexaPDF::NameTreeNode)
16
18
  @kid12 = @doc.add({Limits: ['i', 'm'], Names: ['i', 1, 'm', 1]}, type: HexaPDF::NameTreeNode)
17
19
  ref = HexaPDF::Reference.new(@kid11.oid, @kid11.gen)
18
- @kid1 = @doc.add({Limits: ['c', 'm'], Kids: [ref, @kid12]}, type: HexaPDF::NameTreeNode)
20
+ @kid1 = @doc.add({Limits: ['c', 'm'], Kids: [ref, @kid12]})
19
21
  @kid21 = @doc.add({Limits: ['o', 'q'], Names: ['o', 1, 'q', 1]}, type: HexaPDF::NameTreeNode)
20
22
  @kid221 = @doc.add({Limits: ['s', 'u'], Names: ['s', 1, 'u', 1]}, type: HexaPDF::NameTreeNode)
21
23
  @kid22 = @doc.add({Limits: ['s', 'u'], Kids: [@kid221]}, type: HexaPDF::NameTreeNode)
@@ -75,7 +77,7 @@ describe HexaPDF::Utils::SortedTreeNode do
75
77
  @root.add_entry('v', 1)
76
78
  assert_equal(['a', 'm'], @kid1[:Limits].value)
77
79
  assert_equal(['a', 'f'], @kid11[:Limits].value)
78
- assert_equal(['a', 1, 'c', 1, 'e', 1, 'f', 1], @kid11[:Names].value)
80
+ assert_equal(['a', 1, 'c', @item_ref, 'e', 1, 'f', 1], @kid11[:Names].value)
79
81
  assert_equal(['g', 'm'], @kid12[:Limits].value)
80
82
  assert_equal(['g', 1, 'i', 1, 'j', 1, 'm', 1], @kid12[:Names].value)
81
83
  assert_equal(['n', 'v'], @kid2[:Limits].value)
@@ -203,13 +205,12 @@ describe HexaPDF::Utils::SortedTreeNode do
203
205
  end
204
206
 
205
207
  it "checks that all kid objects are indirect objects" do
206
- @root[:Kids][0] = ref = HexaPDF::Reference.new(@kid1.oid, @kid1.gen)
207
208
  assert(@root.validate)
208
209
 
209
- @root[:Kids][0] = ref
210
+ @root[:Kids][0] = @kid1
210
211
  @kid1.oid = 0
211
212
  assert(@root.validate do |message, c|
212
- assert_match(/must be an indirect object/, message)
213
+ assert_match(/children.*must be indirect/i, message)
213
214
  assert(c)
214
215
  end)
215
216
  assert(@kid1.indirect?)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hexapdf
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Thomas Leitner
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-10-26 00:00:00.000000000 Z
11
+ date: 2024-11-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: cmdparse