hexapdf 1.0.0 → 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +15 -0
- data/lib/hexapdf/cli.rb +14 -1
- data/lib/hexapdf/parser.rb +5 -1
- data/lib/hexapdf/utils/sorted_tree_node.rb +12 -2
- data/lib/hexapdf/version.rb +1 -1
- data/lib/hexapdf/writer.rb +1 -0
- data/lib/hexapdf/xref_section.rb +20 -4
- data/test/hexapdf/test_parser.rb +16 -6
- data/test/hexapdf/test_writer.rb +5 -5
- data/test/hexapdf/test_xref_section.rb +15 -0
- data/test/hexapdf/utils/test_sorted_tree_node.rb +7 -6
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 61b0fb56c6522f2af82eb8ffb10570c45bb11460cf4c048c1bdfe8d9daf71afe
|
4
|
+
data.tar.gz: 91cb053019c367825ac0799a84e4ddad837fe283a6ab2bc6df16ee9ed9f2456d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9a71ee1e9307f0ef67c9dec108c7f68db45166a62f9b6ec60915ce2c089cf0e9ec5bfcd8d74e8b31b63238a09c820a0798689a84e5ea0b1577e2492e5a1d425e
|
7
|
+
data.tar.gz: b20043cead03f7fc7fe527fdbcb3674ab2d1da06b546bac9c1549b6eb6d143232453132709d93ae008d78a83bff36cf85fd0dbc0938da848e7847a1830e6011e
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,18 @@
|
|
1
|
+
## 1.0.1 - 2024-11-04
|
2
|
+
|
3
|
+
### Changed
|
4
|
+
|
5
|
+
* Informational output on errors when running CLI commands to provide more
|
6
|
+
details
|
7
|
+
|
8
|
+
### Fixed
|
9
|
+
|
10
|
+
* Parsing of indirect objects the value of which is an indirect reference
|
11
|
+
* Writing of the initial cross-reference section to ensure a single subsection
|
12
|
+
* [HexaPDF::Utils::SortedTreeNode] to wrap all /Kids entries with the correct
|
13
|
+
type class
|
14
|
+
|
15
|
+
|
1
16
|
## 1.0.0 - 2024-10-26
|
2
17
|
|
3
18
|
### Added
|
data/lib/hexapdf/cli.rb
CHANGED
@@ -64,8 +64,21 @@ module HexaPDF
|
|
64
64
|
rescue StandardError => e
|
65
65
|
$stderr.puts "Problem encountered: #{e.message}"
|
66
66
|
unless e.kind_of?(HexaPDF::Error)
|
67
|
+
$stderr.puts "Backtrace (last 10 lines):"
|
68
|
+
$stderr.puts e.backtrace[0, 10]
|
69
|
+
$stderr.puts
|
67
70
|
$stderr.puts "--> The problem might indicate a faulty PDF or a bug in HexaPDF."
|
68
|
-
$stderr.puts "--> Please report this at
|
71
|
+
$stderr.puts "--> Please report this at"
|
72
|
+
$stderr.puts "-->"
|
73
|
+
$stderr.puts "--> https://github.com/gettalong/hexapdf/issues"
|
74
|
+
$stderr.puts "-->"
|
75
|
+
$stderr.puts "--> and include the information above as well as the output of running"
|
76
|
+
$stderr.puts "--> the following command on the input PDF:"
|
77
|
+
$stderr.puts "-->"
|
78
|
+
$stderr.puts "--> hexapdf info --check INPUT.PDF"
|
79
|
+
$stderr.puts "-->"
|
80
|
+
$stderr.puts "--> If possible, please also provide the input PDF."
|
81
|
+
$stderr.puts "--> Thanks!"
|
69
82
|
end
|
70
83
|
exit(1)
|
71
84
|
end
|
data/lib/hexapdf/parser.rb
CHANGED
@@ -116,7 +116,11 @@ module HexaPDF
|
|
116
116
|
"the values (#{xref_entry.oid},#{xref_entry.gen}) from the xref")
|
117
117
|
end
|
118
118
|
|
119
|
-
|
119
|
+
if obj.kind_of?(Reference)
|
120
|
+
@document.deref(obj)
|
121
|
+
else
|
122
|
+
@document.wrap(obj, oid: oid, gen: gen, stream: stream)
|
123
|
+
end
|
120
124
|
rescue HexaPDF::MalformedPDFError
|
121
125
|
reconstructed_revision.object(xref_entry) ||
|
122
126
|
@document.wrap(nil, oid: xref_entry.oid, gen: xref_entry.gen)
|
@@ -174,6 +174,7 @@ module HexaPDF
|
|
174
174
|
elsif node.key?(:Kids)
|
175
175
|
index = find_in_intermediate_node(node[:Kids], key)
|
176
176
|
node = node[:Kids][index]
|
177
|
+
node = document.wrap(node, type: self.class) if node
|
177
178
|
break unless node && key >= node[:Limits][0] && key <= node[:Limits][1]
|
178
179
|
else
|
179
180
|
break
|
@@ -194,7 +195,7 @@ module HexaPDF
|
|
194
195
|
container_name = leaf_node_container_name
|
195
196
|
stack = [self]
|
196
197
|
until stack.empty?
|
197
|
-
node = stack.pop
|
198
|
+
node = document.wrap(stack.pop, type: self.class)
|
198
199
|
if node.key?(container_name)
|
199
200
|
data = node[container_name]
|
200
201
|
index = 0
|
@@ -217,7 +218,7 @@ module HexaPDF
|
|
217
218
|
def path_to_key(node, key, stack)
|
218
219
|
return unless node.key?(:Kids)
|
219
220
|
index = find_in_intermediate_node(node[:Kids], key)
|
220
|
-
stack << node[:Kids][index]
|
221
|
+
stack << document.wrap(node[:Kids][index], type: self.class)
|
221
222
|
path_to_key(stack.last, key, stack)
|
222
223
|
end
|
223
224
|
|
@@ -307,6 +308,15 @@ module HexaPDF
|
|
307
308
|
super
|
308
309
|
container_name = leaf_node_container_name
|
309
310
|
|
311
|
+
if key?(:Kids)
|
312
|
+
self[:Kids].each do |kid|
|
313
|
+
unless kid.indirect?
|
314
|
+
yield("Children of sorted tree nodes must be indirect", true)
|
315
|
+
document.add(kid)
|
316
|
+
end
|
317
|
+
end
|
318
|
+
end
|
319
|
+
|
310
320
|
# All keys of the container must be lexically ordered strings and the container must be
|
311
321
|
# correctly formatted
|
312
322
|
if key?(container_name)
|
data/lib/hexapdf/version.rb
CHANGED
data/lib/hexapdf/writer.rb
CHANGED
@@ -149,6 +149,7 @@ module HexaPDF
|
|
149
149
|
obj_to_stm = object_streams.each_with_object({}) {|stm, m| m.update(stm.write_objects(rev)) }
|
150
150
|
|
151
151
|
xref_section = XRefSection.new
|
152
|
+
xref_section.mark_as_initial_section! unless previous_xref_pos
|
152
153
|
xref_section.add_free_entry(0, 65535) if previous_xref_pos.nil?
|
153
154
|
rev.each do |obj|
|
154
155
|
if obj.null?
|
data/lib/hexapdf/xref_section.rb
CHANGED
@@ -111,6 +111,13 @@ module HexaPDF
|
|
111
111
|
# used.
|
112
112
|
private :'[]='
|
113
113
|
|
114
|
+
# Marks this XRefSection object as being the first cross-reference section in a PDF file.
|
115
|
+
#
|
116
|
+
# This has the consequence that only a single sub-section is created.
|
117
|
+
def mark_as_initial_section!
|
118
|
+
@initial_section = true
|
119
|
+
end
|
120
|
+
|
114
121
|
# Adds an in-use entry to the cross-reference section.
|
115
122
|
#
|
116
123
|
# See: ::in_use_entry
|
@@ -147,15 +154,24 @@ module HexaPDF
|
|
147
154
|
# If this section contains no objects, a single empty array is yielded (corresponding to a
|
148
155
|
# subsection with zero elements).
|
149
156
|
#
|
150
|
-
# The subsections are dynamically generated based on the object numbers in this section.
|
157
|
+
# The subsections are dynamically generated based on the object numbers in this section. In case
|
158
|
+
# the section was marked as the initial section (see #mark_as_initial_section!) only a single
|
159
|
+
# subsection is yielded.
|
151
160
|
def each_subsection
|
152
161
|
return to_enum(__method__) unless block_given?
|
153
162
|
|
154
163
|
temp = []
|
155
164
|
oids.sort.each do |oid|
|
156
|
-
|
157
|
-
|
158
|
-
|
165
|
+
expected_next_oid = !temp.empty? && temp[-1].oid + 1
|
166
|
+
if expected_next_oid && expected_next_oid != oid
|
167
|
+
if @initial_section
|
168
|
+
expected_next_oid.upto(oid - 1) do |free_oid|
|
169
|
+
temp << self.class.free_entry(free_oid, 0)
|
170
|
+
end
|
171
|
+
else
|
172
|
+
yield(temp)
|
173
|
+
temp = []
|
174
|
+
end
|
159
175
|
end
|
160
176
|
temp << self[oid]
|
161
177
|
end
|
data/test/hexapdf/test_parser.rb
CHANGED
@@ -33,18 +33,23 @@ describe HexaPDF::Parser do
|
|
33
33
|
endstream
|
34
34
|
endobj
|
35
35
|
|
36
|
+
5 0 obj
|
37
|
+
1 0 R
|
38
|
+
endobj
|
39
|
+
|
36
40
|
xref
|
37
41
|
0 4
|
38
42
|
0000000000 65535 f
|
39
43
|
0000000010 00000 n
|
40
44
|
0000000029 00000 n
|
41
45
|
0000000000 65535 f
|
42
|
-
3
|
46
|
+
3 2
|
43
47
|
0000000556 00000 n
|
48
|
+
0000000308 00000 n
|
44
49
|
trailer
|
45
50
|
<< /Test (now) >>
|
46
51
|
startxref
|
47
|
-
|
52
|
+
330
|
48
53
|
%%EOF
|
49
54
|
EOF
|
50
55
|
end
|
@@ -305,6 +310,11 @@ describe HexaPDF::Parser do
|
|
305
310
|
assert_equal(0, obj.gen)
|
306
311
|
end
|
307
312
|
|
313
|
+
it "handles the case of the value of an indirect object being an indirect reference" do
|
314
|
+
obj = @parser.load_object(HexaPDF::XRefSection.in_use_entry(5, 0, 308))
|
315
|
+
assert_equal(1, obj.oid)
|
316
|
+
end
|
317
|
+
|
308
318
|
describe "with strict parsing" do
|
309
319
|
it "raises an error if an indirect object has an offset of 0" do
|
310
320
|
@document.config['parser.on_correctable_error'] = proc { true }
|
@@ -343,13 +353,13 @@ describe HexaPDF::Parser do
|
|
343
353
|
|
344
354
|
describe "startxref_offset" do
|
345
355
|
it "caches the offset value" do
|
346
|
-
assert_equal(
|
347
|
-
@parser.instance_eval { @io }.string.sub!(/
|
348
|
-
assert_equal(
|
356
|
+
assert_equal(330, @parser.startxref_offset)
|
357
|
+
@parser.instance_eval { @io }.string.sub!(/330\n/, "309\n")
|
358
|
+
assert_equal(330, @parser.startxref_offset)
|
349
359
|
end
|
350
360
|
|
351
361
|
it "returns the correct offset" do
|
352
|
-
assert_equal(
|
362
|
+
assert_equal(330, @parser.startxref_offset)
|
353
363
|
end
|
354
364
|
|
355
365
|
it "ignores garbage at the end of the file" do
|
data/test/hexapdf/test_writer.rb
CHANGED
@@ -53,8 +53,8 @@ describe HexaPDF::Writer do
|
|
53
53
|
EOF
|
54
54
|
|
55
55
|
xref_stream = case HexaPDF::VERSION.length
|
56
|
-
when 5 then "x\
|
57
|
-
when 6 then "x\
|
56
|
+
when 5 then "x\xDAcbdlg``b`\xB0\x04\x93\x93\x19\x18\x00\f\x1E\x01\\"
|
57
|
+
when 6 then "x\xDAcbd\xEC```b`\xB0\x04\x93\x93\x18\x18\x00\f*\x01\\"
|
58
58
|
else fail
|
59
59
|
end
|
60
60
|
@compressed_input_io = StringIO.new(<<~EOF.force_encoding(Encoding::BINARY))
|
@@ -69,8 +69,8 @@ describe HexaPDF::Writer do
|
|
69
69
|
20
|
70
70
|
endobj
|
71
71
|
3 0 obj
|
72
|
-
<</Size 6/Type/XRef/W[1 1 2]/Index[0
|
73
|
-
x\xDAcb`\xF8\xFF\x9F\x89\x89\x95\x91\x91\xE9\x7F\x19\x03\x03\x13\x83\x10\
|
72
|
+
<</Size 6/Type/XRef/W[1 1 2]/Index[0 6]/Filter/FlateDecode/DecodeParms<</Columns 4/Predictor 12>>/Length 36>>stream
|
73
|
+
x\xDAcb`\xF8\xFF\x9F\x89\x89\x95\x91\x91\xE9\x7F\x19\x03\x03\x13\x83\x10\x90\xF8_\f\x14c\x14bd\x04\x00lk\a
|
74
74
|
endstream
|
75
75
|
endobj
|
76
76
|
startxref
|
@@ -90,7 +90,7 @@ describe HexaPDF::Writer do
|
|
90
90
|
endstream
|
91
91
|
endobj
|
92
92
|
startxref
|
93
|
-
#{
|
93
|
+
#{443 + HexaPDF::VERSION.length}
|
94
94
|
%%EOF
|
95
95
|
EOF
|
96
96
|
end
|
@@ -57,5 +57,20 @@ describe HexaPDF::XRefSection do
|
|
57
57
|
@xref_section.add_in_use_entry(20, 0, 0)
|
58
58
|
assert_subsections([[1, 2], [10, 11], [20]])
|
59
59
|
end
|
60
|
+
|
61
|
+
it "yields a single subsection if the section was marked as the initial one" do
|
62
|
+
@xref_section.mark_as_initial_section!
|
63
|
+
@xref_section.add_in_use_entry(6, 0, 0)
|
64
|
+
@xref_section.add_in_use_entry(7, 0, 0)
|
65
|
+
@xref_section.add_in_use_entry(9, 0, 0)
|
66
|
+
@xref_section.add_in_use_entry(1, 0, 0)
|
67
|
+
@xref_section.add_in_use_entry(2, 0, 0)
|
68
|
+
result = @xref_section.each_subsection.map {|s| s.map {|e| [e.oid, e.type] }}
|
69
|
+
assert_equal([[[1, :in_use], [2, :in_use],
|
70
|
+
[3, :free], [4, :free], [5, :free],
|
71
|
+
[6, :in_use], [7, :in_use],
|
72
|
+
[8, :free],
|
73
|
+
[9, :in_use]]], result)
|
74
|
+
end
|
60
75
|
end
|
61
76
|
end
|
@@ -12,10 +12,12 @@ describe HexaPDF::Utils::SortedTreeNode do
|
|
12
12
|
end
|
13
13
|
|
14
14
|
def add_multilevel_entries
|
15
|
-
|
15
|
+
item = @doc.add(1)
|
16
|
+
@item_ref = HexaPDF::Reference.new(item.oid, item.gen)
|
17
|
+
@kid11 = @doc.add({Limits: ['c', 'f'], Names: ['c', @item_ref, 'f', 1]}, type: HexaPDF::NameTreeNode)
|
16
18
|
@kid12 = @doc.add({Limits: ['i', 'm'], Names: ['i', 1, 'm', 1]}, type: HexaPDF::NameTreeNode)
|
17
19
|
ref = HexaPDF::Reference.new(@kid11.oid, @kid11.gen)
|
18
|
-
@kid1 = @doc.add({Limits: ['c', 'm'], Kids: [ref, @kid12]}
|
20
|
+
@kid1 = @doc.add({Limits: ['c', 'm'], Kids: [ref, @kid12]})
|
19
21
|
@kid21 = @doc.add({Limits: ['o', 'q'], Names: ['o', 1, 'q', 1]}, type: HexaPDF::NameTreeNode)
|
20
22
|
@kid221 = @doc.add({Limits: ['s', 'u'], Names: ['s', 1, 'u', 1]}, type: HexaPDF::NameTreeNode)
|
21
23
|
@kid22 = @doc.add({Limits: ['s', 'u'], Kids: [@kid221]}, type: HexaPDF::NameTreeNode)
|
@@ -75,7 +77,7 @@ describe HexaPDF::Utils::SortedTreeNode do
|
|
75
77
|
@root.add_entry('v', 1)
|
76
78
|
assert_equal(['a', 'm'], @kid1[:Limits].value)
|
77
79
|
assert_equal(['a', 'f'], @kid11[:Limits].value)
|
78
|
-
assert_equal(['a', 1, 'c',
|
80
|
+
assert_equal(['a', 1, 'c', @item_ref, 'e', 1, 'f', 1], @kid11[:Names].value)
|
79
81
|
assert_equal(['g', 'm'], @kid12[:Limits].value)
|
80
82
|
assert_equal(['g', 1, 'i', 1, 'j', 1, 'm', 1], @kid12[:Names].value)
|
81
83
|
assert_equal(['n', 'v'], @kid2[:Limits].value)
|
@@ -203,13 +205,12 @@ describe HexaPDF::Utils::SortedTreeNode do
|
|
203
205
|
end
|
204
206
|
|
205
207
|
it "checks that all kid objects are indirect objects" do
|
206
|
-
@root[:Kids][0] = ref = HexaPDF::Reference.new(@kid1.oid, @kid1.gen)
|
207
208
|
assert(@root.validate)
|
208
209
|
|
209
|
-
@root[:Kids][0] =
|
210
|
+
@root[:Kids][0] = @kid1
|
210
211
|
@kid1.oid = 0
|
211
212
|
assert(@root.validate do |message, c|
|
212
|
-
assert_match(/must be
|
213
|
+
assert_match(/children.*must be indirect/i, message)
|
213
214
|
assert(c)
|
214
215
|
end)
|
215
216
|
assert(@kid1.indirect?)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hexapdf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Thomas Leitner
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-11-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: cmdparse
|