hexapdf 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +15 -0
- data/lib/hexapdf/cli.rb +14 -1
- data/lib/hexapdf/parser.rb +5 -1
- data/lib/hexapdf/utils/sorted_tree_node.rb +12 -2
- data/lib/hexapdf/version.rb +1 -1
- data/lib/hexapdf/writer.rb +1 -0
- data/lib/hexapdf/xref_section.rb +20 -4
- data/test/hexapdf/test_parser.rb +16 -6
- data/test/hexapdf/test_writer.rb +5 -5
- data/test/hexapdf/test_xref_section.rb +15 -0
- data/test/hexapdf/utils/test_sorted_tree_node.rb +7 -6
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 61b0fb56c6522f2af82eb8ffb10570c45bb11460cf4c048c1bdfe8d9daf71afe
|
4
|
+
data.tar.gz: 91cb053019c367825ac0799a84e4ddad837fe283a6ab2bc6df16ee9ed9f2456d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9a71ee1e9307f0ef67c9dec108c7f68db45166a62f9b6ec60915ce2c089cf0e9ec5bfcd8d74e8b31b63238a09c820a0798689a84e5ea0b1577e2492e5a1d425e
|
7
|
+
data.tar.gz: b20043cead03f7fc7fe527fdbcb3674ab2d1da06b546bac9c1549b6eb6d143232453132709d93ae008d78a83bff36cf85fd0dbc0938da848e7847a1830e6011e
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,18 @@
|
|
1
|
+
## 1.0.1 - 2024-11-04
|
2
|
+
|
3
|
+
### Changed
|
4
|
+
|
5
|
+
* Informational output on errors when running CLI commands to provide more
|
6
|
+
details
|
7
|
+
|
8
|
+
### Fixed
|
9
|
+
|
10
|
+
* Parsing of indirect objects the value of which is an indirect reference
|
11
|
+
* Writing of the initial cross-reference section to ensure a single subsection
|
12
|
+
* [HexaPDF::Utils::SortedTreeNode] to wrap all /Kids entries with the correct
|
13
|
+
type class
|
14
|
+
|
15
|
+
|
1
16
|
## 1.0.0 - 2024-10-26
|
2
17
|
|
3
18
|
### Added
|
data/lib/hexapdf/cli.rb
CHANGED
@@ -64,8 +64,21 @@ module HexaPDF
|
|
64
64
|
rescue StandardError => e
|
65
65
|
$stderr.puts "Problem encountered: #{e.message}"
|
66
66
|
unless e.kind_of?(HexaPDF::Error)
|
67
|
+
$stderr.puts "Backtrace (last 10 lines):"
|
68
|
+
$stderr.puts e.backtrace[0, 10]
|
69
|
+
$stderr.puts
|
67
70
|
$stderr.puts "--> The problem might indicate a faulty PDF or a bug in HexaPDF."
|
68
|
-
$stderr.puts "--> Please report this at
|
71
|
+
$stderr.puts "--> Please report this at"
|
72
|
+
$stderr.puts "-->"
|
73
|
+
$stderr.puts "--> https://github.com/gettalong/hexapdf/issues"
|
74
|
+
$stderr.puts "-->"
|
75
|
+
$stderr.puts "--> and include the information above as well as the output of running"
|
76
|
+
$stderr.puts "--> the following command on the input PDF:"
|
77
|
+
$stderr.puts "-->"
|
78
|
+
$stderr.puts "--> hexapdf info --check INPUT.PDF"
|
79
|
+
$stderr.puts "-->"
|
80
|
+
$stderr.puts "--> If possible, please also provide the input PDF."
|
81
|
+
$stderr.puts "--> Thanks!"
|
69
82
|
end
|
70
83
|
exit(1)
|
71
84
|
end
|
data/lib/hexapdf/parser.rb
CHANGED
@@ -116,7 +116,11 @@ module HexaPDF
|
|
116
116
|
"the values (#{xref_entry.oid},#{xref_entry.gen}) from the xref")
|
117
117
|
end
|
118
118
|
|
119
|
-
|
119
|
+
if obj.kind_of?(Reference)
|
120
|
+
@document.deref(obj)
|
121
|
+
else
|
122
|
+
@document.wrap(obj, oid: oid, gen: gen, stream: stream)
|
123
|
+
end
|
120
124
|
rescue HexaPDF::MalformedPDFError
|
121
125
|
reconstructed_revision.object(xref_entry) ||
|
122
126
|
@document.wrap(nil, oid: xref_entry.oid, gen: xref_entry.gen)
|
@@ -174,6 +174,7 @@ module HexaPDF
|
|
174
174
|
elsif node.key?(:Kids)
|
175
175
|
index = find_in_intermediate_node(node[:Kids], key)
|
176
176
|
node = node[:Kids][index]
|
177
|
+
node = document.wrap(node, type: self.class) if node
|
177
178
|
break unless node && key >= node[:Limits][0] && key <= node[:Limits][1]
|
178
179
|
else
|
179
180
|
break
|
@@ -194,7 +195,7 @@ module HexaPDF
|
|
194
195
|
container_name = leaf_node_container_name
|
195
196
|
stack = [self]
|
196
197
|
until stack.empty?
|
197
|
-
node = stack.pop
|
198
|
+
node = document.wrap(stack.pop, type: self.class)
|
198
199
|
if node.key?(container_name)
|
199
200
|
data = node[container_name]
|
200
201
|
index = 0
|
@@ -217,7 +218,7 @@ module HexaPDF
|
|
217
218
|
def path_to_key(node, key, stack)
|
218
219
|
return unless node.key?(:Kids)
|
219
220
|
index = find_in_intermediate_node(node[:Kids], key)
|
220
|
-
stack << node[:Kids][index]
|
221
|
+
stack << document.wrap(node[:Kids][index], type: self.class)
|
221
222
|
path_to_key(stack.last, key, stack)
|
222
223
|
end
|
223
224
|
|
@@ -307,6 +308,15 @@ module HexaPDF
|
|
307
308
|
super
|
308
309
|
container_name = leaf_node_container_name
|
309
310
|
|
311
|
+
if key?(:Kids)
|
312
|
+
self[:Kids].each do |kid|
|
313
|
+
unless kid.indirect?
|
314
|
+
yield("Children of sorted tree nodes must be indirect", true)
|
315
|
+
document.add(kid)
|
316
|
+
end
|
317
|
+
end
|
318
|
+
end
|
319
|
+
|
310
320
|
# All keys of the container must be lexically ordered strings and the container must be
|
311
321
|
# correctly formatted
|
312
322
|
if key?(container_name)
|
data/lib/hexapdf/version.rb
CHANGED
data/lib/hexapdf/writer.rb
CHANGED
@@ -149,6 +149,7 @@ module HexaPDF
|
|
149
149
|
obj_to_stm = object_streams.each_with_object({}) {|stm, m| m.update(stm.write_objects(rev)) }
|
150
150
|
|
151
151
|
xref_section = XRefSection.new
|
152
|
+
xref_section.mark_as_initial_section! unless previous_xref_pos
|
152
153
|
xref_section.add_free_entry(0, 65535) if previous_xref_pos.nil?
|
153
154
|
rev.each do |obj|
|
154
155
|
if obj.null?
|
data/lib/hexapdf/xref_section.rb
CHANGED
@@ -111,6 +111,13 @@ module HexaPDF
|
|
111
111
|
# used.
|
112
112
|
private :'[]='
|
113
113
|
|
114
|
+
# Marks this XRefSection object as being the first cross-reference section in a PDF file.
|
115
|
+
#
|
116
|
+
# This has the consequence that only a single sub-section is created.
|
117
|
+
def mark_as_initial_section!
|
118
|
+
@initial_section = true
|
119
|
+
end
|
120
|
+
|
114
121
|
# Adds an in-use entry to the cross-reference section.
|
115
122
|
#
|
116
123
|
# See: ::in_use_entry
|
@@ -147,15 +154,24 @@ module HexaPDF
|
|
147
154
|
# If this section contains no objects, a single empty array is yielded (corresponding to a
|
148
155
|
# subsection with zero elements).
|
149
156
|
#
|
150
|
-
# The subsections are dynamically generated based on the object numbers in this section.
|
157
|
+
# The subsections are dynamically generated based on the object numbers in this section. In case
|
158
|
+
# the section was marked as the initial section (see #mark_as_initial_section!) only a single
|
159
|
+
# subsection is yielded.
|
151
160
|
def each_subsection
|
152
161
|
return to_enum(__method__) unless block_given?
|
153
162
|
|
154
163
|
temp = []
|
155
164
|
oids.sort.each do |oid|
|
156
|
-
|
157
|
-
|
158
|
-
|
165
|
+
expected_next_oid = !temp.empty? && temp[-1].oid + 1
|
166
|
+
if expected_next_oid && expected_next_oid != oid
|
167
|
+
if @initial_section
|
168
|
+
expected_next_oid.upto(oid - 1) do |free_oid|
|
169
|
+
temp << self.class.free_entry(free_oid, 0)
|
170
|
+
end
|
171
|
+
else
|
172
|
+
yield(temp)
|
173
|
+
temp = []
|
174
|
+
end
|
159
175
|
end
|
160
176
|
temp << self[oid]
|
161
177
|
end
|
data/test/hexapdf/test_parser.rb
CHANGED
@@ -33,18 +33,23 @@ describe HexaPDF::Parser do
|
|
33
33
|
endstream
|
34
34
|
endobj
|
35
35
|
|
36
|
+
5 0 obj
|
37
|
+
1 0 R
|
38
|
+
endobj
|
39
|
+
|
36
40
|
xref
|
37
41
|
0 4
|
38
42
|
0000000000 65535 f
|
39
43
|
0000000010 00000 n
|
40
44
|
0000000029 00000 n
|
41
45
|
0000000000 65535 f
|
42
|
-
3
|
46
|
+
3 2
|
43
47
|
0000000556 00000 n
|
48
|
+
0000000308 00000 n
|
44
49
|
trailer
|
45
50
|
<< /Test (now) >>
|
46
51
|
startxref
|
47
|
-
|
52
|
+
330
|
48
53
|
%%EOF
|
49
54
|
EOF
|
50
55
|
end
|
@@ -305,6 +310,11 @@ describe HexaPDF::Parser do
|
|
305
310
|
assert_equal(0, obj.gen)
|
306
311
|
end
|
307
312
|
|
313
|
+
it "handles the case of the value of an indirect object being an indirect reference" do
|
314
|
+
obj = @parser.load_object(HexaPDF::XRefSection.in_use_entry(5, 0, 308))
|
315
|
+
assert_equal(1, obj.oid)
|
316
|
+
end
|
317
|
+
|
308
318
|
describe "with strict parsing" do
|
309
319
|
it "raises an error if an indirect object has an offset of 0" do
|
310
320
|
@document.config['parser.on_correctable_error'] = proc { true }
|
@@ -343,13 +353,13 @@ describe HexaPDF::Parser do
|
|
343
353
|
|
344
354
|
describe "startxref_offset" do
|
345
355
|
it "caches the offset value" do
|
346
|
-
assert_equal(
|
347
|
-
@parser.instance_eval { @io }.string.sub!(/
|
348
|
-
assert_equal(
|
356
|
+
assert_equal(330, @parser.startxref_offset)
|
357
|
+
@parser.instance_eval { @io }.string.sub!(/330\n/, "309\n")
|
358
|
+
assert_equal(330, @parser.startxref_offset)
|
349
359
|
end
|
350
360
|
|
351
361
|
it "returns the correct offset" do
|
352
|
-
assert_equal(
|
362
|
+
assert_equal(330, @parser.startxref_offset)
|
353
363
|
end
|
354
364
|
|
355
365
|
it "ignores garbage at the end of the file" do
|
data/test/hexapdf/test_writer.rb
CHANGED
@@ -53,8 +53,8 @@ describe HexaPDF::Writer do
|
|
53
53
|
EOF
|
54
54
|
|
55
55
|
xref_stream = case HexaPDF::VERSION.length
|
56
|
-
when 5 then "x\
|
57
|
-
when 6 then "x\
|
56
|
+
when 5 then "x\xDAcbdlg``b`\xB0\x04\x93\x93\x19\x18\x00\f\x1E\x01\\"
|
57
|
+
when 6 then "x\xDAcbd\xEC```b`\xB0\x04\x93\x93\x18\x18\x00\f*\x01\\"
|
58
58
|
else fail
|
59
59
|
end
|
60
60
|
@compressed_input_io = StringIO.new(<<~EOF.force_encoding(Encoding::BINARY))
|
@@ -69,8 +69,8 @@ describe HexaPDF::Writer do
|
|
69
69
|
20
|
70
70
|
endobj
|
71
71
|
3 0 obj
|
72
|
-
<</Size 6/Type/XRef/W[1 1 2]/Index[0
|
73
|
-
x\xDAcb`\xF8\xFF\x9F\x89\x89\x95\x91\x91\xE9\x7F\x19\x03\x03\x13\x83\x10\
|
72
|
+
<</Size 6/Type/XRef/W[1 1 2]/Index[0 6]/Filter/FlateDecode/DecodeParms<</Columns 4/Predictor 12>>/Length 36>>stream
|
73
|
+
x\xDAcb`\xF8\xFF\x9F\x89\x89\x95\x91\x91\xE9\x7F\x19\x03\x03\x13\x83\x10\x90\xF8_\f\x14c\x14bd\x04\x00lk\a
|
74
74
|
endstream
|
75
75
|
endobj
|
76
76
|
startxref
|
@@ -90,7 +90,7 @@ describe HexaPDF::Writer do
|
|
90
90
|
endstream
|
91
91
|
endobj
|
92
92
|
startxref
|
93
|
-
#{
|
93
|
+
#{443 + HexaPDF::VERSION.length}
|
94
94
|
%%EOF
|
95
95
|
EOF
|
96
96
|
end
|
@@ -57,5 +57,20 @@ describe HexaPDF::XRefSection do
|
|
57
57
|
@xref_section.add_in_use_entry(20, 0, 0)
|
58
58
|
assert_subsections([[1, 2], [10, 11], [20]])
|
59
59
|
end
|
60
|
+
|
61
|
+
it "yields a single subsection if the section was marked as the initial one" do
|
62
|
+
@xref_section.mark_as_initial_section!
|
63
|
+
@xref_section.add_in_use_entry(6, 0, 0)
|
64
|
+
@xref_section.add_in_use_entry(7, 0, 0)
|
65
|
+
@xref_section.add_in_use_entry(9, 0, 0)
|
66
|
+
@xref_section.add_in_use_entry(1, 0, 0)
|
67
|
+
@xref_section.add_in_use_entry(2, 0, 0)
|
68
|
+
result = @xref_section.each_subsection.map {|s| s.map {|e| [e.oid, e.type] }}
|
69
|
+
assert_equal([[[1, :in_use], [2, :in_use],
|
70
|
+
[3, :free], [4, :free], [5, :free],
|
71
|
+
[6, :in_use], [7, :in_use],
|
72
|
+
[8, :free],
|
73
|
+
[9, :in_use]]], result)
|
74
|
+
end
|
60
75
|
end
|
61
76
|
end
|
@@ -12,10 +12,12 @@ describe HexaPDF::Utils::SortedTreeNode do
|
|
12
12
|
end
|
13
13
|
|
14
14
|
def add_multilevel_entries
|
15
|
-
|
15
|
+
item = @doc.add(1)
|
16
|
+
@item_ref = HexaPDF::Reference.new(item.oid, item.gen)
|
17
|
+
@kid11 = @doc.add({Limits: ['c', 'f'], Names: ['c', @item_ref, 'f', 1]}, type: HexaPDF::NameTreeNode)
|
16
18
|
@kid12 = @doc.add({Limits: ['i', 'm'], Names: ['i', 1, 'm', 1]}, type: HexaPDF::NameTreeNode)
|
17
19
|
ref = HexaPDF::Reference.new(@kid11.oid, @kid11.gen)
|
18
|
-
@kid1 = @doc.add({Limits: ['c', 'm'], Kids: [ref, @kid12]}
|
20
|
+
@kid1 = @doc.add({Limits: ['c', 'm'], Kids: [ref, @kid12]})
|
19
21
|
@kid21 = @doc.add({Limits: ['o', 'q'], Names: ['o', 1, 'q', 1]}, type: HexaPDF::NameTreeNode)
|
20
22
|
@kid221 = @doc.add({Limits: ['s', 'u'], Names: ['s', 1, 'u', 1]}, type: HexaPDF::NameTreeNode)
|
21
23
|
@kid22 = @doc.add({Limits: ['s', 'u'], Kids: [@kid221]}, type: HexaPDF::NameTreeNode)
|
@@ -75,7 +77,7 @@ describe HexaPDF::Utils::SortedTreeNode do
|
|
75
77
|
@root.add_entry('v', 1)
|
76
78
|
assert_equal(['a', 'm'], @kid1[:Limits].value)
|
77
79
|
assert_equal(['a', 'f'], @kid11[:Limits].value)
|
78
|
-
assert_equal(['a', 1, 'c',
|
80
|
+
assert_equal(['a', 1, 'c', @item_ref, 'e', 1, 'f', 1], @kid11[:Names].value)
|
79
81
|
assert_equal(['g', 'm'], @kid12[:Limits].value)
|
80
82
|
assert_equal(['g', 1, 'i', 1, 'j', 1, 'm', 1], @kid12[:Names].value)
|
81
83
|
assert_equal(['n', 'v'], @kid2[:Limits].value)
|
@@ -203,13 +205,12 @@ describe HexaPDF::Utils::SortedTreeNode do
|
|
203
205
|
end
|
204
206
|
|
205
207
|
it "checks that all kid objects are indirect objects" do
|
206
|
-
@root[:Kids][0] = ref = HexaPDF::Reference.new(@kid1.oid, @kid1.gen)
|
207
208
|
assert(@root.validate)
|
208
209
|
|
209
|
-
@root[:Kids][0] =
|
210
|
+
@root[:Kids][0] = @kid1
|
210
211
|
@kid1.oid = 0
|
211
212
|
assert(@root.validate do |message, c|
|
212
|
-
assert_match(/must be
|
213
|
+
assert_match(/children.*must be indirect/i, message)
|
213
214
|
assert(c)
|
214
215
|
end)
|
215
216
|
assert(@kid1.indirect?)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hexapdf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Thomas Leitner
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-11-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: cmdparse
|