pdf-reader 2.15.0 → 2.15.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1be615eb6abc5557e61ba53958c7211fac0f1528e75dc54eff27ffb5554d7c80
4
- data.tar.gz: 875221f31dc119cd0f7ae3cc0246b3bbb70f6127c0047ec924c8030e9186b55b
3
+ metadata.gz: 680d773fb89a823854ce986d7e35c5313df55087d0b4e8bfc3c70c51d97a8130
4
+ data.tar.gz: 7484cc4e28a01b9a74b869c2dede32a47bfd2519cded7ff6fc421c99d546a406
5
5
  SHA512:
6
- metadata.gz: 4b4501ca72d06b5a569fdcc77f384131fbd85342f8da7a084a02210ec7a3821e8b9f1cad88685262d0cc4e993f7b0031bed5d510c353c7d8fb5fe28f97a2ea83
7
- data.tar.gz: a4fe329f2d8ae7cc295cb17d573963ddab6c0cde52d6524ad182f4651dab8ba90215bcb1ecf60c7fcf248135aed152b50a1d34afa03b270b93c5a172ac4048b3
6
+ metadata.gz: e9b2ad3cfb37fb76f731d646bf5097e0334d88b3f7e5ba39abfe9530e51a7dbcfcf08a75d81b6cc1e17f2b18015a25f3ef9ef0d9b75b0d5763eb38470263ee49
7
+ data.tar.gz: 7acdc84e89045708ac4c983deefa2bb1a6246f1a57d8d9a809efeb3b1477d69921238067b797f78d2b7a33eff9bd54c03e139fd4bed10102f365a4d48e8899a0
data/CHANGELOG CHANGED
@@ -1,5 +1,9 @@
1
- v2.15.0 (13th August 2025)
1
+ v2.15.1 (28th December 2025)
2
+ - Add ruby 4.0 to the CI matrix (https://github.com/yob/pdf-reader/pull/575)
3
+ - Avoiding raising an error when ToUnicode poins to the wrong object type (https://github.com/yob/pdf-reader/pull/573)
4
+ - Skip invalid UTF-16 surrogate pairs in CMaps (https://github.com/yob/pdf-reader/pull/574)
2
5
 
6
+ v2.15.0 (13th August 2025)
3
7
  - Overhaul sorbet types, moving from an external RBI file to inline comments in RBS syntax
4
8
  - multiple PRs, but mainly https://github.com/yob/pdf-reader/pull/562
5
9
  - See https://railsatscale.com/2025-04-23-rbs-support-for-sorbet/
@@ -46,6 +46,10 @@ class PDF::Reader
46
46
  "def" => :noop
47
47
  } #: Hash[String, Symbol]
48
48
 
49
+ # Indicates the start of a UTF-16 surrogate pair, see
50
+ # https://en.wikipedia.org/wiki/Universal_Character_Set_characters
51
+ HIGH_SURROGATE_RANGE = (0xD800..0xDBFF) #: Range[Integer]
52
+
49
53
  #: Hash[Integer, Array[Integer]]
50
54
  attr_reader :map
51
55
 
@@ -124,15 +128,16 @@ class PDF::Reader
124
128
  end
125
129
  result = []
126
130
  while unpacked_string.any? do
127
- if unpacked_string.size >= 2 &&
128
- unpacked_string.first.to_i >= 0xD800 &&
129
- unpacked_string.first.to_i <= 0xDBFF
131
+ if unpacked_string.size >= 2 && HIGH_SURROGATE_RANGE.include?(unpacked_string.first.to_i)
130
132
  # this is a Unicode UTF-16 "Surrogate Pair" see Unicode Spec. Chapter 3.7
131
133
  # lets convert to a UTF-32. (the high bit is between 0xD800-0xDBFF, the
132
134
  # low bit is between 0xDC00-0xDFFF) for example: U+1D44E (U+D835 U+DC4E)
133
135
  point_one = unpacked_string.shift.to_i
134
136
  point_two = unpacked_string.shift.to_i
135
137
  result << (point_one - 0xD800) * 0x400 + (point_two - 0xDC00) + 0x10000
138
+ elsif unpacked_string.size == 1 && HIGH_SURROGATE_RANGE.include?(unpacked_string.first.to_i)
139
+ # the start of a surrogate pair but the pair is missing. Skip it
140
+ unpacked_string.shift
136
141
  else
137
142
  result << unpacked_string.shift
138
143
  end
@@ -235,9 +235,17 @@ class PDF::Reader
235
235
 
236
236
  if obj[:ToUnicode]
237
237
  # ToUnicode is optional for Type1 and Type3
238
- stream = @ohash.deref_stream(obj[:ToUnicode])
239
- if stream
240
- @tounicode = PDF::Reader::CMap.new(stream.unfiltered_data)
238
+ begin
239
+ stream = @ohash.deref_stream(obj[:ToUnicode])
240
+ if stream
241
+ @tounicode = PDF::Reader::CMap.new(stream.unfiltered_data)
242
+ end
243
+ rescue PDF::Reader::MalformedPDFError => e
244
+ # If ToUnicode exists but isn't a stream, lets pretend it didn't exist. We
245
+ # can't do anything with it unless it's a stream with a CMap
246
+ unless e.message == "expected object to be a Stream or nil"
247
+ raise e
248
+ end
241
249
  end
242
250
  end
243
251
  end
data/rbi/pdf-reader.rbi CHANGED
@@ -282,6 +282,7 @@ class PDF::Reader::CMap
282
282
  end
283
283
 
284
284
  PDF::Reader::CMap::CMAP_KEYWORDS = T.let(T.unsafe(nil), Hash)
285
+ PDF::Reader::CMap::HIGH_SURROGATE_RANGE = T.let(T.unsafe(nil), Range)
285
286
 
286
287
  class PDF::Reader::CidWidths
287
288
  extend ::Forwardable
@@ -537,7 +538,9 @@ class PDF::Reader::Font
537
538
  sig { params(obj: T::Hash[::Symbol, T.untyped]).returns(::PDF::Reader::Encoding) }
538
539
  def build_encoding(obj); end
539
540
 
540
- sig { returns(T.untyped) }
541
+ sig do
542
+ returns(T.any(::PDF::Reader::WidthCalculator::BuiltIn, ::PDF::Reader::WidthCalculator::Composite, ::PDF::Reader::WidthCalculator::TrueType, ::PDF::Reader::WidthCalculator::TypeOneOrThree, ::PDF::Reader::WidthCalculator::TypeZero))
543
+ end
541
544
  def build_width_calculator; end
542
545
 
543
546
  sig { params(font_name: T.nilable(T.any(::String, ::Symbol))).returns(::PDF::Reader::Encoding) }
@@ -573,6 +576,8 @@ class PDF::Reader::Font
573
576
  def unpack_string_to_array_of_ints(unpack_me, unpack_arg); end
574
577
  end
575
578
 
579
+ PDF::Reader::Font::WidthCalculator = T.type_alias { T.any(::PDF::Reader::WidthCalculator::BuiltIn, ::PDF::Reader::WidthCalculator::Composite, ::PDF::Reader::WidthCalculator::TrueType, ::PDF::Reader::WidthCalculator::TypeOneOrThree, ::PDF::Reader::WidthCalculator::TypeZero) }
580
+
576
581
  class PDF::Reader::FontDescriptor
577
582
  sig { params(ohash: ::PDF::Reader::ObjectHash, fd_hash: T::Hash[T.untyped, T.untyped]).void }
578
583
  def initialize(ohash, fd_hash); end
@@ -950,7 +955,9 @@ class PDF::Reader::ObjectHash
950
955
  sig { returns(::Float) }
951
956
  def pdf_version; end
952
957
 
953
- sig { returns(T.untyped) }
958
+ sig do
959
+ returns(T.any(::PDF::Reader::AesV2SecurityHandler, ::PDF::Reader::AesV3SecurityHandler, ::PDF::Reader::NullSecurityHandler, ::PDF::Reader::Rc4SecurityHandler, ::PDF::Reader::UnimplementedSecurityHandler))
960
+ end
954
961
  def sec_handler; end
955
962
 
956
963
  sig { returns(T::Boolean) }
@@ -1037,6 +1044,8 @@ class PDF::Reader::ObjectHash
1037
1044
  def xref; end
1038
1045
  end
1039
1046
 
1047
+ PDF::Reader::ObjectHash::SecurityHandler = T.type_alias { T.any(::PDF::Reader::AesV2SecurityHandler, ::PDF::Reader::AesV3SecurityHandler, ::PDF::Reader::NullSecurityHandler, ::PDF::Reader::Rc4SecurityHandler, ::PDF::Reader::UnimplementedSecurityHandler) }
1048
+
1040
1049
  class PDF::Reader::ObjectStream
1041
1050
  sig { params(stream: ::PDF::Reader::Stream).void }
1042
1051
  def initialize(stream); end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdf-reader
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.15.0
4
+ version: 2.15.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Healy
@@ -307,9 +307,9 @@ licenses:
307
307
  - MIT
308
308
  metadata:
309
309
  bug_tracker_uri: https://github.com/yob/pdf-reader/issues
310
- changelog_uri: https://github.com/yob/pdf-reader/blob/v2.15.0/CHANGELOG
311
- documentation_uri: https://www.rubydoc.info/gems/pdf-reader/2.15.0
312
- source_code_uri: https://github.com/yob/pdf-reader/tree/v2.15.0
310
+ changelog_uri: https://github.com/yob/pdf-reader/blob/v2.15.1/CHANGELOG
311
+ documentation_uri: https://www.rubydoc.info/gems/pdf-reader/2.15.1
312
+ source_code_uri: https://github.com/yob/pdf-reader/tree/v2.15.1
313
313
  rdoc_options:
314
314
  - "--title"
315
315
  - PDF::Reader Documentation