hexapdf 1.4.0 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 82d0430964f9f4c6925af5bb076a2e641908c3a4cb6796acc64dbe1303bc3407
4
- data.tar.gz: 689a86b637a86331ca203d7d7ac90a9fb3c50c275f07b21d8109013faa509f07
3
+ metadata.gz: b88ce85ee9bc603011b9f5a278829d588da10f53614c0b84a57e2d7fa38f52dc
4
+ data.tar.gz: ec2c8739ed69038e1297435550371bf329e516e9ef970fa0456502b15720d07b
5
5
  SHA512:
6
- metadata.gz: 4cfe8038379e5dc7f3bebeb38b44525676b934cb2b2355ac7575fa7a4466a6c4ec9ab9e0a80a184aebab32b2aa87e06dff735c10915abebe11541ada95d8129c
7
- data.tar.gz: 62562b4bae557ad3dac03cb51913a8d1d6796d6cad9ce0626bc901cc15afeb301e42d061c345a1ebcb30e09a018dbe4b7c26fc4c567423c262f67607d04b95c9
6
+ metadata.gz: 103edc366ef9f48ddd6579f7137b3ab23b4266dc2df0a77ee5b89cb4256419a00727776158a7c7570a0b10075e4f062506d524ec71ee801c00fdd9e4726c8232
7
+ data.tar.gz: f1f4a1af54445b2e7c3c9fc1adfa81fb9fad84f32461f58378fc550bd5aec16e16b276dadc7516ca5b0b6212394886d06f2cc2a3506dc2b2745b5a1f4c8136d1
data/CHANGELOG.md CHANGED
@@ -1,3 +1,26 @@
1
+ ## 1.4.1 - 2025-09-23
2
+
3
+ ### Added
4
+
5
+ * [HexaPDF::Font::Encoding::Base#to_compact_array] for creating a compact array
6
+ representation of the encoding
7
+
8
+ ### Changed
9
+
10
+ - CLI to handle missing file errors better
11
+
12
+ ### Fixed
13
+
14
+ * Serialization of strings that need to be UTF-16 encoded when using encryption
15
+ * [HexaPDF::Document#write_to_string] to pass on arguments to `#write`
16
+ * [HexaPDF::Type::FontType1] validation to handle PDFs with an invalid value of
17
+ /SymbolEncoding for the /Encoding key
18
+ * [HexaPDF::Type::FontType1] validation to handle PDFs with an invalid value of
19
+ /StandardEncoding for the /Encoding key
20
+ * CLI command `hexapdf form` to ignore widgets that don't belong to any field
21
+ * Validation of invalid sorted tree root nodes with odd number of direct entries
22
+
23
+
1
24
  ## 1.4.0 - 2025-08-03
2
25
 
3
26
  ### Added
@@ -24,8 +47,8 @@
24
47
  of a table cell
25
48
  * [HexaPDF::Layout::Style::Quad#set] to allow setting a subset of values using a
26
49
  hash
27
- * CLI command `hp form` to show the names of radio button widgets
28
- * CLI command `hp form` to show position and size of widgets in easier to
50
+ * CLI command `hexapdf form` to show the names of radio button widgets
51
+ * CLI command `hexapdf form` to show position and size of widgets in easier to
29
52
  understand form
30
53
  * Default signing handler to not set /DigestMethod entry on signature reference
31
54
  dictionary anymore
@@ -290,7 +290,7 @@ module HexaPDF
290
290
  page.each_annotation do |annotation|
291
291
  next unless annotation[:Subtype] == :Widget
292
292
  field = annotation.form_field
293
- next if field.concrete_field_type == :push_button
293
+ next if !field.concrete_field_type || field.concrete_field_type == :push_button
294
294
  if with_seen || !seen[field.full_field_name]
295
295
  yield(page, page_index, field, annotation)
296
296
  seen[field.full_field_name] = true
@@ -132,7 +132,7 @@ module HexaPDF
132
132
  printf("%5s %5s %9s %6s %6s %5s %4s %3s %5s %5s %6s %5s %8s\n",
133
133
  "index", "page", "oid", "width", "height", "color", "comp", "bpc",
134
134
  "x-ppi", "y-ppi", "size", "type", "writable")
135
- puts("-" * 77)
135
+ puts("-" * 84)
136
136
  each_image(doc) do |image, index, pindex, (x_ppi, y_ppi)|
137
137
  info = image.info
138
138
  size = human_readable_file_size(image[:Length] + image[:SMask]&.[](:Length).to_i)
@@ -155,6 +155,10 @@ module HexaPDF
155
155
  puts "Extracting #{path}..." if command_parser.verbosity_info?
156
156
  image.write(path)
157
157
  done << index
158
+ if info.color_space == :cmyk && info.type == :jpeg
159
+ $stderr.puts "Note (image #{path}): JPEG uses CMYK colorspace and may " \
160
+ "need color post-processing"
161
+ end
158
162
  elsif command_parser.verbosity_warning?
159
163
  $stderr.puts "Warning (image #{index}): PDF image format not supported for writing"
160
164
  end
data/lib/hexapdf/cli.rb CHANGED
@@ -61,6 +61,9 @@ module HexaPDF
61
61
  # Runs the CLI application.
62
62
  def self.run(args = ARGV)
63
63
  Application.new.parse(args)
64
+ rescue Errno::ENOENT => e
65
+ path = e.message.scan(/(?<= - ).*?$/).first
66
+ $stderr.puts "Problem encountered: No such file - #{path}"
64
67
  rescue StandardError => e
65
68
  $stderr.puts "Problem encountered: #{e.message}"
66
69
  unless e.kind_of?(HexaPDF::Error)
@@ -823,7 +823,7 @@ module HexaPDF
823
823
  # See #write for further information and details on the available arguments.
824
824
  def write_to_string(**args)
825
825
  io = StringIO.new(''.b)
826
- write(io)
826
+ write(io, **args)
827
827
  io.string
828
828
  end
829
829
 
@@ -81,6 +81,33 @@ module HexaPDF
81
81
  @code_to_name.key(name)
82
82
  end
83
83
 
84
+ # Returns the encoding in a compact array form.
85
+ #
86
+ # If the optional +base_encoding+ argument is specified, all codes that have the same value
87
+ # in the base encoding are ignored.
88
+ #
89
+ # The returned array is of the form:
90
+ #
91
+ # code1 name1 name2 ... code2 name3 name4 ...
92
+ #
93
+ # This means that name1 is associated with code1, name2 with code1 + 1 and so on.
94
+ #
95
+ # See: PDF 2.0 s9.6.5.1
96
+ def to_compact_array(base_encoding: nil)
97
+ result = []
98
+ last_code = -3
99
+ @code_to_name.sort.each do |code, name|
100
+ next if base_encoding&.name(code) == name
101
+ if last_code + 1 == code
102
+ result << name
103
+ else
104
+ result << code << name
105
+ end
106
+ last_code = code
107
+ end
108
+ result
109
+ end
110
+
84
111
  end
85
112
 
86
113
  end
@@ -279,9 +279,7 @@ module HexaPDF
279
279
  if VALID_ENCODING_NAMES.include?(@encoding.encoding_name)
280
280
  dict[:Encoding] = @encoding.encoding_name
281
281
  elsif @encoding != @wrapped_font.encoding
282
- differences = [min]
283
- (min..max).each {|code| differences << @encoding.name(code) }
284
- dict[:Encoding] = {Differences: differences}
282
+ dict[:Encoding] = {Differences: @encoding.to_compact_array}
285
283
  end
286
284
  end
287
285
 
@@ -131,8 +131,8 @@ module HexaPDF
131
131
  # fixed height (only if the actual content is smaller or equal than it):
132
132
  #
133
133
  # #>pdf-composer
134
- # cells = [[{content: layout.text('A'), height: 5}, layout.text('B')],
135
- # [{content: layout.text('C'), height: 40}, layout.text('D')]]
134
+ # cells = [[{content: layout.text('A'), min_height: 5}, layout.text('B')],
135
+ # [{content: layout.text('C'), min_height: 40}, layout.text('D')]]
136
136
  # composer.table(cells)
137
137
  #
138
138
  # The cells can be styled using a callable object for more complex styling:
@@ -276,16 +276,16 @@ module HexaPDF
276
276
  #
277
277
  # See: PDF2.0 s7.3.4
278
278
  def serialize_string(obj)
279
+ if obj.encoding != Encoding::BINARY && obj.match?(/[^ -~\t\r\n]/)
280
+ utf16_encoded = true
281
+ obj = "\xFE\xFF".b << obj.encode(Encoding::UTF_16BE).force_encoding(Encoding::BINARY)
282
+ end
279
283
  obj = if @encrypter && @object.kind_of?(HexaPDF::Object) && @object.indirect?
280
284
  encrypter.encrypt_string(obj, @object)
281
- elsif obj.encoding != Encoding::BINARY
282
- if obj.match?(/[^ -~\t\r\n]/)
283
- "\xFE\xFF".b << obj.encode(Encoding::UTF_16BE).force_encoding(Encoding::BINARY)
284
- else
285
- obj.b
286
- end
285
+ elsif utf16_encoded
286
+ obj
287
287
  else
288
- obj.dup
288
+ obj.b
289
289
  end
290
290
  obj.gsub!(/[()\\\r]/n, STRING_ESCAPE_MAP)
291
291
  "(#{obj})"
@@ -183,7 +183,18 @@ module HexaPDF
183
183
 
184
184
  encoding = self[:Encoding]
185
185
  if encoding.kind_of?(Symbol) && !PREDEFINED_ENCODING.include?(encoding)
186
- yield("The /Encoding value '#{encoding}' is invalid", false)
186
+ correctable = (self[:BaseFont] == :Symbol && encoding == :SymbolEncoding) ||
187
+ (!symbolic? && encoding == :StandardEncoding)
188
+ yield("The /Encoding value '#{encoding}' is invalid", correctable)
189
+ if correctable
190
+ if encoding == :SymbolEncoding
191
+ delete(:Encoding)
192
+ else
193
+ diffs = HexaPDF::Font::Encoding.for_name(:StandardEncoding).
194
+ to_compact_array(base_encoding: HexaPDF::Font::Encoding.for_name(:WinAnsiEncoding))
195
+ self[:Encoding] = {BaseEncoding: :WinAnsiEncoding, Differences: diffs}
196
+ end
197
+ end
187
198
  end
188
199
  end
189
200
 
@@ -322,7 +322,10 @@ module HexaPDF
322
322
  if key?(container_name)
323
323
  container = self[container_name]
324
324
  if container.length.odd?
325
- yield("Sorted tree leaf node contains odd number of entries", false)
325
+ root_node = !key?(:Limits)
326
+ yield("Sorted tree #{root_node ? 'root' : 'leaf'} node contains odd number of entries",
327
+ root_node)
328
+ container.value.clear if root_node
326
329
  return
327
330
  end
328
331
  index = 0
@@ -37,6 +37,6 @@
37
37
  module HexaPDF
38
38
 
39
39
  # The version of HexaPDF.
40
- VERSION = '1.4.0'
40
+ VERSION = '1.4.1'
41
41
 
42
42
  end
@@ -1,6 +1,7 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
3
  require 'test_helper'
4
+ require 'hexapdf/font/encoding'
4
5
  require 'hexapdf/font/encoding/base'
5
6
 
6
7
  describe HexaPDF::Font::Encoding::Base do
@@ -42,4 +43,23 @@ describe HexaPDF::Font::Encoding::Base do
42
43
  assert_nil(@base.code(:Unknown))
43
44
  end
44
45
  end
46
+
47
+ describe "to_compact_array" do
48
+ before do
49
+ @base.code_to_name[66] = :B
50
+ @base.code_to_name[67] = :C
51
+ @base.code_to_name[20] = :space
52
+ @base.code_to_name[28] = :D
53
+ @base.code_to_name[29] = :E
54
+ end
55
+
56
+ it "returns the difference array" do
57
+ assert_equal([20, :space, 28, :D, :E, 65, :A, :B, :C], @base.to_compact_array)
58
+ end
59
+
60
+ it "ignores the codes that are the same in the base encoding" do
61
+ std_encoding = HexaPDF::Font::Encoding.for_name(:StandardEncoding)
62
+ assert_equal([20, :space, 28, :D, :E, ], @base.to_compact_array(base_encoding: std_encoding))
63
+ end
64
+ end
45
65
  end
@@ -611,5 +611,6 @@ describe HexaPDF::Document do
611
611
  assert_equal(Encoding::ASCII_8BIT, str.encoding)
612
612
  doc = HexaPDF::Document.new(io: StringIO.new(str))
613
613
  assert_equal(:test, doc.trailer.info[:test])
614
+ assert_nil(doc.trailer.info[:ModDate])
614
615
  end
615
616
  end
@@ -181,7 +181,8 @@ describe HexaPDF::Serializer do
181
181
 
182
182
  it "encrypts strings in indirect PDF objects" do
183
183
  assert_serialized("(enc:1:test)", HexaPDF::Object.new("test", oid: 1))
184
- assert_serialized("<</x[(enc:1:test)]>>", HexaPDF::Object.new({x: ["test"]}, oid: 1))
184
+ assert_serialized("<</x[(enc:1:\xFE\xFF\x00t\x00e\x00s\x00t\x00\xF6)]>>".b,
185
+ HexaPDF::Object.new({x: ["testö"]}, oid: 1))
185
186
  end
186
187
 
187
188
  it "doesn't encrypt strings in direct PDF objects" do
@@ -52,6 +52,14 @@ describe HexaPDF::Type::Annotations::Widget do
52
52
  assert_kind_of(HexaPDF::Type::AcroForm::TextField, result)
53
53
  refute_same(@widget.data, result.data)
54
54
  end
55
+
56
+ it "works when the type of the field is defined higher up in the field hierarchy" do
57
+ @widget[:Parent] = {T: 'parent', Kids: [@widget]}
58
+ @widget[:Parent][:Parent] = {FT: :Tx, Kids: [@widget[:Parent]]}
59
+ result = @widget.form_field
60
+ assert_kind_of(HexaPDF::Type::AcroForm::TextField, result)
61
+ refute_same(@widget.data, result.data)
62
+ end
55
63
  end
56
64
 
57
65
  describe "background_color" do
@@ -143,5 +143,19 @@ describe HexaPDF::Type::FontType1 do
143
143
  @font[:Encoding] = :Other
144
144
  refute(@font.validate)
145
145
  end
146
+
147
+ it "works around certain invalid PDFs with a /SymbolEncoding value for /Encoding" do
148
+ @font[:Encoding] = :SymbolEncoding
149
+ @font[:BaseFont] = :Symbol
150
+ assert(@font.validate)
151
+ refute(@font.key?(:Encoding))
152
+ end
153
+
154
+ it "works around certain invalid PDFs with a /StandardEncoding value for /Encoding" do
155
+ @font[:Encoding] = :StandardEncoding
156
+ assert(@font.validate)
157
+ assert(:WinAnsiEncoding, @font[:Encoding][:BaseEncoding])
158
+ assert_equal([39, :quoteright, 96, :quoteleft], @font[:Encoding][:Differences][0, 4])
159
+ end
146
160
  end
147
161
  end
@@ -219,11 +219,21 @@ describe HexaPDF::Utils::SortedTreeNode do
219
219
  it "checks that leaf node containers have an even number of entries" do
220
220
  @kid11[:Names].delete_at(0)
221
221
  refute(@kid11.validate do |message, c|
222
- assert_match(/odd number/, message)
222
+ assert_match(/leaf.*odd number/, message)
223
223
  refute(c)
224
224
  end)
225
225
  end
226
226
 
227
+ it "corrects a root node container with an odd number of entries" do
228
+ @root.value.clear
229
+ @root[:Names] = ['Test']
230
+ assert(@root.validate do |message, c|
231
+ assert_match(/root.*odd number/, message)
232
+ assert(c)
233
+ end)
234
+ assert(@root[:Names].empty?)
235
+ end
236
+
227
237
  it "checks that the keys are of the correct type" do
228
238
  @kid11[:Names][2] = 5
229
239
  refute(@kid11.validate do |message, c|
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hexapdf
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.0
4
+ version: 1.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Thomas Leitner
8
8
  bindir: bin
9
9
  cert_chain: []
10
- date: 2025-08-03 00:00:00.000000000 Z
10
+ date: 2025-09-23 00:00:00.000000000 Z
11
11
  dependencies:
12
12
  - !ruby/object:Gem::Dependency
13
13
  name: cmdparse