hexapdf 1.4.0 → 1.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +25 -2
- data/lib/hexapdf/cli/form.rb +1 -1
- data/lib/hexapdf/cli/images.rb +5 -1
- data/lib/hexapdf/cli.rb +3 -0
- data/lib/hexapdf/document.rb +1 -1
- data/lib/hexapdf/font/encoding/base.rb +27 -0
- data/lib/hexapdf/font/type1_wrapper.rb +1 -3
- data/lib/hexapdf/layout/table_box.rb +2 -2
- data/lib/hexapdf/serializer.rb +7 -7
- data/lib/hexapdf/type/font_type1.rb +12 -1
- data/lib/hexapdf/utils/sorted_tree_node.rb +4 -1
- data/lib/hexapdf/version.rb +1 -1
- data/test/hexapdf/font/encoding/test_base.rb +20 -0
- data/test/hexapdf/test_document.rb +1 -0
- data/test/hexapdf/test_serializer.rb +2 -1
- data/test/hexapdf/type/annotations/test_widget.rb +8 -0
- data/test/hexapdf/type/test_font_type1.rb +14 -0
- data/test/hexapdf/utils/test_sorted_tree_node.rb +11 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b88ce85ee9bc603011b9f5a278829d588da10f53614c0b84a57e2d7fa38f52dc
|
4
|
+
data.tar.gz: ec2c8739ed69038e1297435550371bf329e516e9ef970fa0456502b15720d07b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 103edc366ef9f48ddd6579f7137b3ab23b4266dc2df0a77ee5b89cb4256419a00727776158a7c7570a0b10075e4f062506d524ec71ee801c00fdd9e4726c8232
|
7
|
+
data.tar.gz: f1f4a1af54445b2e7c3c9fc1adfa81fb9fad84f32461f58378fc550bd5aec16e16b276dadc7516ca5b0b6212394886d06f2cc2a3506dc2b2745b5a1f4c8136d1
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,26 @@
|
|
1
|
+
## 1.4.1 - 2025-09-23
|
2
|
+
|
3
|
+
### Added
|
4
|
+
|
5
|
+
* [HexaPDF::Font::Encoding::Base#to_compact_array] for creating a compact array
|
6
|
+
representation of the encoding
|
7
|
+
|
8
|
+
### Changed
|
9
|
+
|
10
|
+
- CLI to handle missing file errors better
|
11
|
+
|
12
|
+
### Fixed
|
13
|
+
|
14
|
+
* Serialization of strings that need to be UTF-16 encoded when using encryption
|
15
|
+
* [HexaPDF::Document#write_to_string] to pass on arguments to `#write`
|
16
|
+
* [HexaPDF::Type::FontType1] validation to handle PDFs with an invalid value of
|
17
|
+
/SymbolEncoding for the /Encoding key
|
18
|
+
* [HexaPDF::Type::FontType1] validation to handle PDFs with an invalid value of
|
19
|
+
/StandardEncoding for the /Encoding key
|
20
|
+
* CLI command `hexapdf form` to ignore widgets that don't belong to any field
|
21
|
+
* Validation of invalid sorted tree root nodes with odd number of direct entries
|
22
|
+
|
23
|
+
|
1
24
|
## 1.4.0 - 2025-08-03
|
2
25
|
|
3
26
|
### Added
|
@@ -24,8 +47,8 @@
|
|
24
47
|
of a table cell
|
25
48
|
* [HexaPDF::Layout::Style::Quad#set] to allow setting a subset of values using a
|
26
49
|
hash
|
27
|
-
* CLI command `
|
28
|
-
* CLI command `
|
50
|
+
* CLI command `hexapdf form` to show the names of radio button widgets
|
51
|
+
* CLI command `hexapdf form` to show position and size of widgets in easier to
|
29
52
|
understand form
|
30
53
|
* Default signing handler to not set /DigestMethod entry on signature reference
|
31
54
|
dictionary anymore
|
data/lib/hexapdf/cli/form.rb
CHANGED
@@ -290,7 +290,7 @@ module HexaPDF
|
|
290
290
|
page.each_annotation do |annotation|
|
291
291
|
next unless annotation[:Subtype] == :Widget
|
292
292
|
field = annotation.form_field
|
293
|
-
next if field.concrete_field_type == :push_button
|
293
|
+
next if !field.concrete_field_type || field.concrete_field_type == :push_button
|
294
294
|
if with_seen || !seen[field.full_field_name]
|
295
295
|
yield(page, page_index, field, annotation)
|
296
296
|
seen[field.full_field_name] = true
|
data/lib/hexapdf/cli/images.rb
CHANGED
@@ -132,7 +132,7 @@ module HexaPDF
|
|
132
132
|
printf("%5s %5s %9s %6s %6s %5s %4s %3s %5s %5s %6s %5s %8s\n",
|
133
133
|
"index", "page", "oid", "width", "height", "color", "comp", "bpc",
|
134
134
|
"x-ppi", "y-ppi", "size", "type", "writable")
|
135
|
-
puts("-" *
|
135
|
+
puts("-" * 84)
|
136
136
|
each_image(doc) do |image, index, pindex, (x_ppi, y_ppi)|
|
137
137
|
info = image.info
|
138
138
|
size = human_readable_file_size(image[:Length] + image[:SMask]&.[](:Length).to_i)
|
@@ -155,6 +155,10 @@ module HexaPDF
|
|
155
155
|
puts "Extracting #{path}..." if command_parser.verbosity_info?
|
156
156
|
image.write(path)
|
157
157
|
done << index
|
158
|
+
if info.color_space == :cmyk && info.type == :jpeg
|
159
|
+
$stderr.puts "Note (image #{path}): JPEG uses CMYK colorspace and may " \
|
160
|
+
"need color post-processing"
|
161
|
+
end
|
158
162
|
elsif command_parser.verbosity_warning?
|
159
163
|
$stderr.puts "Warning (image #{index}): PDF image format not supported for writing"
|
160
164
|
end
|
data/lib/hexapdf/cli.rb
CHANGED
@@ -61,6 +61,9 @@ module HexaPDF
|
|
61
61
|
# Runs the CLI application.
|
62
62
|
def self.run(args = ARGV)
|
63
63
|
Application.new.parse(args)
|
64
|
+
rescue Errno::ENOENT => e
|
65
|
+
path = e.message.scan(/(?<= - ).*?$/).first
|
66
|
+
$stderr.puts "Problem encountered: No such file - #{path}"
|
64
67
|
rescue StandardError => e
|
65
68
|
$stderr.puts "Problem encountered: #{e.message}"
|
66
69
|
unless e.kind_of?(HexaPDF::Error)
|
data/lib/hexapdf/document.rb
CHANGED
@@ -81,6 +81,33 @@ module HexaPDF
|
|
81
81
|
@code_to_name.key(name)
|
82
82
|
end
|
83
83
|
|
84
|
+
# Returns the encoding in a compact array form.
|
85
|
+
#
|
86
|
+
# If the optional +base_encoding+ argument is specified, all codes that have the same value
|
87
|
+
# in the base encoding are ignored.
|
88
|
+
#
|
89
|
+
# The returned array is of the form:
|
90
|
+
#
|
91
|
+
# code1 name1 name2 ... code2 name3 name4 ...
|
92
|
+
#
|
93
|
+
# This means that name1 is associated with code1, name2 with code1 + 1 and so on.
|
94
|
+
#
|
95
|
+
# See: PDF 2.0 s9.6.5.1
|
96
|
+
def to_compact_array(base_encoding: nil)
|
97
|
+
result = []
|
98
|
+
last_code = -3
|
99
|
+
@code_to_name.sort.each do |code, name|
|
100
|
+
next if base_encoding&.name(code) == name
|
101
|
+
if last_code + 1 == code
|
102
|
+
result << name
|
103
|
+
else
|
104
|
+
result << code << name
|
105
|
+
end
|
106
|
+
last_code = code
|
107
|
+
end
|
108
|
+
result
|
109
|
+
end
|
110
|
+
|
84
111
|
end
|
85
112
|
|
86
113
|
end
|
@@ -279,9 +279,7 @@ module HexaPDF
|
|
279
279
|
if VALID_ENCODING_NAMES.include?(@encoding.encoding_name)
|
280
280
|
dict[:Encoding] = @encoding.encoding_name
|
281
281
|
elsif @encoding != @wrapped_font.encoding
|
282
|
-
|
283
|
-
(min..max).each {|code| differences << @encoding.name(code) }
|
284
|
-
dict[:Encoding] = {Differences: differences}
|
282
|
+
dict[:Encoding] = {Differences: @encoding.to_compact_array}
|
285
283
|
end
|
286
284
|
end
|
287
285
|
|
@@ -131,8 +131,8 @@ module HexaPDF
|
|
131
131
|
# fixed height (only if the actual content is smaller or equal than it):
|
132
132
|
#
|
133
133
|
# #>pdf-composer
|
134
|
-
# cells = [[{content: layout.text('A'),
|
135
|
-
# [{content: layout.text('C'),
|
134
|
+
# cells = [[{content: layout.text('A'), min_height: 5}, layout.text('B')],
|
135
|
+
# [{content: layout.text('C'), min_height: 40}, layout.text('D')]]
|
136
136
|
# composer.table(cells)
|
137
137
|
#
|
138
138
|
# The cells can be styled using a callable object for more complex styling:
|
data/lib/hexapdf/serializer.rb
CHANGED
@@ -276,16 +276,16 @@ module HexaPDF
|
|
276
276
|
#
|
277
277
|
# See: PDF2.0 s7.3.4
|
278
278
|
def serialize_string(obj)
|
279
|
+
if obj.encoding != Encoding::BINARY && obj.match?(/[^ -~\t\r\n]/)
|
280
|
+
utf16_encoded = true
|
281
|
+
obj = "\xFE\xFF".b << obj.encode(Encoding::UTF_16BE).force_encoding(Encoding::BINARY)
|
282
|
+
end
|
279
283
|
obj = if @encrypter && @object.kind_of?(HexaPDF::Object) && @object.indirect?
|
280
284
|
encrypter.encrypt_string(obj, @object)
|
281
|
-
elsif
|
282
|
-
|
283
|
-
"\xFE\xFF".b << obj.encode(Encoding::UTF_16BE).force_encoding(Encoding::BINARY)
|
284
|
-
else
|
285
|
-
obj.b
|
286
|
-
end
|
285
|
+
elsif utf16_encoded
|
286
|
+
obj
|
287
287
|
else
|
288
|
-
obj.
|
288
|
+
obj.b
|
289
289
|
end
|
290
290
|
obj.gsub!(/[()\\\r]/n, STRING_ESCAPE_MAP)
|
291
291
|
"(#{obj})"
|
@@ -183,7 +183,18 @@ module HexaPDF
|
|
183
183
|
|
184
184
|
encoding = self[:Encoding]
|
185
185
|
if encoding.kind_of?(Symbol) && !PREDEFINED_ENCODING.include?(encoding)
|
186
|
-
|
186
|
+
correctable = (self[:BaseFont] == :Symbol && encoding == :SymbolEncoding) ||
|
187
|
+
(!symbolic? && encoding == :StandardEncoding)
|
188
|
+
yield("The /Encoding value '#{encoding}' is invalid", correctable)
|
189
|
+
if correctable
|
190
|
+
if encoding == :SymbolEncoding
|
191
|
+
delete(:Encoding)
|
192
|
+
else
|
193
|
+
diffs = HexaPDF::Font::Encoding.for_name(:StandardEncoding).
|
194
|
+
to_compact_array(base_encoding: HexaPDF::Font::Encoding.for_name(:WinAnsiEncoding))
|
195
|
+
self[:Encoding] = {BaseEncoding: :WinAnsiEncoding, Differences: diffs}
|
196
|
+
end
|
197
|
+
end
|
187
198
|
end
|
188
199
|
end
|
189
200
|
|
@@ -322,7 +322,10 @@ module HexaPDF
|
|
322
322
|
if key?(container_name)
|
323
323
|
container = self[container_name]
|
324
324
|
if container.length.odd?
|
325
|
-
|
325
|
+
root_node = !key?(:Limits)
|
326
|
+
yield("Sorted tree #{root_node ? 'root' : 'leaf'} node contains odd number of entries",
|
327
|
+
root_node)
|
328
|
+
container.value.clear if root_node
|
326
329
|
return
|
327
330
|
end
|
328
331
|
index = 0
|
data/lib/hexapdf/version.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
2
|
|
3
3
|
require 'test_helper'
|
4
|
+
require 'hexapdf/font/encoding'
|
4
5
|
require 'hexapdf/font/encoding/base'
|
5
6
|
|
6
7
|
describe HexaPDF::Font::Encoding::Base do
|
@@ -42,4 +43,23 @@ describe HexaPDF::Font::Encoding::Base do
|
|
42
43
|
assert_nil(@base.code(:Unknown))
|
43
44
|
end
|
44
45
|
end
|
46
|
+
|
47
|
+
describe "to_compact_array" do
|
48
|
+
before do
|
49
|
+
@base.code_to_name[66] = :B
|
50
|
+
@base.code_to_name[67] = :C
|
51
|
+
@base.code_to_name[20] = :space
|
52
|
+
@base.code_to_name[28] = :D
|
53
|
+
@base.code_to_name[29] = :E
|
54
|
+
end
|
55
|
+
|
56
|
+
it "returns the difference array" do
|
57
|
+
assert_equal([20, :space, 28, :D, :E, 65, :A, :B, :C], @base.to_compact_array)
|
58
|
+
end
|
59
|
+
|
60
|
+
it "ignores the codes that are the same in the base encoding" do
|
61
|
+
std_encoding = HexaPDF::Font::Encoding.for_name(:StandardEncoding)
|
62
|
+
assert_equal([20, :space, 28, :D, :E, ], @base.to_compact_array(base_encoding: std_encoding))
|
63
|
+
end
|
64
|
+
end
|
45
65
|
end
|
@@ -181,7 +181,8 @@ describe HexaPDF::Serializer do
|
|
181
181
|
|
182
182
|
it "encrypts strings in indirect PDF objects" do
|
183
183
|
assert_serialized("(enc:1:test)", HexaPDF::Object.new("test", oid: 1))
|
184
|
-
assert_serialized("<</x[(enc:1
|
184
|
+
assert_serialized("<</x[(enc:1:\xFE\xFF\x00t\x00e\x00s\x00t\x00\xF6)]>>".b,
|
185
|
+
HexaPDF::Object.new({x: ["testö"]}, oid: 1))
|
185
186
|
end
|
186
187
|
|
187
188
|
it "doesn't encrypt strings in direct PDF objects" do
|
@@ -52,6 +52,14 @@ describe HexaPDF::Type::Annotations::Widget do
|
|
52
52
|
assert_kind_of(HexaPDF::Type::AcroForm::TextField, result)
|
53
53
|
refute_same(@widget.data, result.data)
|
54
54
|
end
|
55
|
+
|
56
|
+
it "works when the type of the field is defined higher up in the field hierarchy" do
|
57
|
+
@widget[:Parent] = {T: 'parent', Kids: [@widget]}
|
58
|
+
@widget[:Parent][:Parent] = {FT: :Tx, Kids: [@widget[:Parent]]}
|
59
|
+
result = @widget.form_field
|
60
|
+
assert_kind_of(HexaPDF::Type::AcroForm::TextField, result)
|
61
|
+
refute_same(@widget.data, result.data)
|
62
|
+
end
|
55
63
|
end
|
56
64
|
|
57
65
|
describe "background_color" do
|
@@ -143,5 +143,19 @@ describe HexaPDF::Type::FontType1 do
|
|
143
143
|
@font[:Encoding] = :Other
|
144
144
|
refute(@font.validate)
|
145
145
|
end
|
146
|
+
|
147
|
+
it "works around certain invalid PDFs with a /SymbolEncoding value for /Encoding" do
|
148
|
+
@font[:Encoding] = :SymbolEncoding
|
149
|
+
@font[:BaseFont] = :Symbol
|
150
|
+
assert(@font.validate)
|
151
|
+
refute(@font.key?(:Encoding))
|
152
|
+
end
|
153
|
+
|
154
|
+
it "works around certain invalid PDFs with a /StandardEncoding value for /Encoding" do
|
155
|
+
@font[:Encoding] = :StandardEncoding
|
156
|
+
assert(@font.validate)
|
157
|
+
assert(:WinAnsiEncoding, @font[:Encoding][:BaseEncoding])
|
158
|
+
assert_equal([39, :quoteright, 96, :quoteleft], @font[:Encoding][:Differences][0, 4])
|
159
|
+
end
|
146
160
|
end
|
147
161
|
end
|
@@ -219,11 +219,21 @@ describe HexaPDF::Utils::SortedTreeNode do
|
|
219
219
|
it "checks that leaf node containers have an even number of entries" do
|
220
220
|
@kid11[:Names].delete_at(0)
|
221
221
|
refute(@kid11.validate do |message, c|
|
222
|
-
assert_match(/odd number/, message)
|
222
|
+
assert_match(/leaf.*odd number/, message)
|
223
223
|
refute(c)
|
224
224
|
end)
|
225
225
|
end
|
226
226
|
|
227
|
+
it "corrects a root node container with an odd number of entries" do
|
228
|
+
@root.value.clear
|
229
|
+
@root[:Names] = ['Test']
|
230
|
+
assert(@root.validate do |message, c|
|
231
|
+
assert_match(/root.*odd number/, message)
|
232
|
+
assert(c)
|
233
|
+
end)
|
234
|
+
assert(@root[:Names].empty?)
|
235
|
+
end
|
236
|
+
|
227
237
|
it "checks that the keys are of the correct type" do
|
228
238
|
@kid11[:Names][2] = 5
|
229
239
|
refute(@kid11.validate do |message, c|
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hexapdf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.4.
|
4
|
+
version: 1.4.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Thomas Leitner
|
8
8
|
bindir: bin
|
9
9
|
cert_chain: []
|
10
|
-
date: 2025-
|
10
|
+
date: 2025-09-23 00:00:00.000000000 Z
|
11
11
|
dependencies:
|
12
12
|
- !ruby/object:Gem::Dependency
|
13
13
|
name: cmdparse
|