hexapdf 0.17.0 → 0.17.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 49952c8667073a4bf4c1508684fa03cbffacab9d54b2821f4d69aa83bb8332ec
4
- data.tar.gz: db57322db66eb9c35b618fdf23e70119943874c8f4dab9a5fb01d965a90e91bf
3
+ metadata.gz: dd14e53954c77d18ef283523c31784467530575521b283cac8dd2ab377789f3f
4
+ data.tar.gz: e874c630dfcd1e9bdb29278ddb1ce7c80b8a5d7bbde9e739ebe4ab07ff9e9919
5
5
  SHA512:
6
- metadata.gz: cdb87d3a9f26165bfa2a901d4b91fc720e24b4b5839ace8163d18c74f6cab145ac2ab9095dcad8bb30515f8e1a1437eb2d6b3e86609259f059f6c909d24b87ff
7
- data.tar.gz: c087e748a9750156bb6ab3fcf3dfe98b11b4daff9161e290d9acb1cb0953494e0778ec4b81486bda680a11eeb21c321e8c2029266716efce9906d9da2cef2d87
6
+ metadata.gz: ad7641bf6bee9469f6753efb95580f461faacfc1a18d03bf2e787e4bb67779a1bc61f6b31a8f0a878b4a104d889c905a65999229f1a77ef74d9d9c4b4e508112
7
+ data.tar.gz: 23bb0bcbbf234df4861dbd235d3b3cc47f6f1bc3c6173271d62d2fb6380955f667c48b995a84e677a448b2bf2b3657e585afa66179210023520ccce654e84365
@@ -203,6 +203,12 @@ module HexaPDF
203
203
  #
204
204
  # In nearly all cases this option should not be changed from its default setting!
205
205
  #
206
+ # document.on_invalid_string::
207
+ # A callable object that takes the invalid UTF-16BE encoded string and returns a valid UTF-8
208
+ # encoded string.
209
+ #
210
+ # The default is to remove all invalid characters.
211
+ #
206
212
  # encryption.aes::
207
213
  # The class that should be used for AES encryption. If the value is a String, it should
208
214
  # contain the name of a constant to such a class.
@@ -380,6 +386,9 @@ module HexaPDF
380
386
  end,
381
387
  'acro_form.text_field.default_width' => 100,
382
388
  'document.auto_decrypt' => true,
389
+ 'document.on_invalid_string' => proc do |str|
390
+ str.encode(Encoding::UTF_8, invalid: :replace, replace: '')
391
+ end,
383
392
  'encryption.aes' => 'HexaPDF::Encryption::FastAES',
384
393
  'encryption.arc4' => 'HexaPDF::Encryption::FastARC4',
385
394
  'encryption.filter_map' => {
@@ -233,11 +233,16 @@ module HexaPDF
233
233
 
234
234
  # Converts the string into UTF-8 encoding, assuming it is a binary string. Otherwise +nil+ is
235
235
  # returned.
236
- def self.convert(str, _type, _document)
236
+ def self.convert(str, _type, document)
237
237
  return unless str.kind_of?(String) && str.encoding == Encoding::BINARY
238
238
 
239
239
  if str.getbyte(0) == 254 && str.getbyte(1) == 255
240
- str[2..-1].force_encoding(Encoding::UTF_16BE).encode(Encoding::UTF_8)
240
+ str = str[2..-1].force_encoding(Encoding::UTF_16BE)
241
+ if str.valid_encoding?
242
+ str.encode!(Encoding::UTF_8)
243
+ else
244
+ document.configuration['document.on_invalid_string'].call(str)
245
+ end
241
246
  else
242
247
  Utils::PDFDocEncoding.convert_to_utf8(str)
243
248
  end
@@ -37,6 +37,6 @@
37
37
  module HexaPDF
38
38
 
39
39
  # The version of HexaPDF.
40
- VERSION = '0.17.0'
40
+ VERSION = '0.17.1'
41
41
 
42
42
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hexapdf
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.17.0
4
+ version: 0.17.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Thomas Leitner
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-10-21 00:00:00.000000000 Z
11
+ date: 2021-10-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: cmdparse