pdf-reader 2.15.0 → 2.15.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG +5 -1
- data/lib/pdf/reader/cmap.rb +8 -3
- data/lib/pdf/reader/font.rb +11 -3
- data/rbi/pdf-reader.rbi +11 -2
- metadata +4 -4
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 680d773fb89a823854ce986d7e35c5313df55087d0b4e8bfc3c70c51d97a8130
|
|
4
|
+
data.tar.gz: 7484cc4e28a01b9a74b869c2dede32a47bfd2519cded7ff6fc421c99d546a406
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: e9b2ad3cfb37fb76f731d646bf5097e0334d88b3f7e5ba39abfe9530e51a7dbcfcf08a75d81b6cc1e17f2b18015a25f3ef9ef0d9b75b0d5763eb38470263ee49
|
|
7
|
+
data.tar.gz: 7acdc84e89045708ac4c983deefa2bb1a6246f1a57d8d9a809efeb3b1477d69921238067b797f78d2b7a33eff9bd54c03e139fd4bed10102f365a4d48e8899a0
|
data/CHANGELOG
CHANGED
|
@@ -1,5 +1,9 @@
|
|
|
1
|
-
v2.15.
|
|
1
|
+
v2.15.1 (28th December 2025)
|
|
2
|
+
- Add ruby 4.0 to the CI matrix (https://github.com/yob/pdf-reader/pull/575)
|
|
3
|
+
- Avoiding raising an error when ToUnicode poins to the wrong object type (https://github.com/yob/pdf-reader/pull/573)
|
|
4
|
+
- Skip invalid UTF-16 surrogate pairs in CMaps (https://github.com/yob/pdf-reader/pull/574)
|
|
2
5
|
|
|
6
|
+
v2.15.0 (13th August 2025)
|
|
3
7
|
- Overhaul sorbet types, moving from an external RBI file to inline comments in RBS syntax
|
|
4
8
|
- multiple PRs, but mainly https://github.com/yob/pdf-reader/pull/562
|
|
5
9
|
- See https://railsatscale.com/2025-04-23-rbs-support-for-sorbet/
|
data/lib/pdf/reader/cmap.rb
CHANGED
|
@@ -46,6 +46,10 @@ class PDF::Reader
|
|
|
46
46
|
"def" => :noop
|
|
47
47
|
} #: Hash[String, Symbol]
|
|
48
48
|
|
|
49
|
+
# Indicates the start of a UTF-16 surrogate pair, see
|
|
50
|
+
# https://en.wikipedia.org/wiki/Universal_Character_Set_characters
|
|
51
|
+
HIGH_SURROGATE_RANGE = (0xD800..0xDBFF) #: Range[Integer]
|
|
52
|
+
|
|
49
53
|
#: Hash[Integer, Array[Integer]]
|
|
50
54
|
attr_reader :map
|
|
51
55
|
|
|
@@ -124,15 +128,16 @@ class PDF::Reader
|
|
|
124
128
|
end
|
|
125
129
|
result = []
|
|
126
130
|
while unpacked_string.any? do
|
|
127
|
-
if unpacked_string.size >= 2 &&
|
|
128
|
-
unpacked_string.first.to_i >= 0xD800 &&
|
|
129
|
-
unpacked_string.first.to_i <= 0xDBFF
|
|
131
|
+
if unpacked_string.size >= 2 && HIGH_SURROGATE_RANGE.include?(unpacked_string.first.to_i)
|
|
130
132
|
# this is a Unicode UTF-16 "Surrogate Pair" see Unicode Spec. Chapter 3.7
|
|
131
133
|
# lets convert to a UTF-32. (the high bit is between 0xD800-0xDBFF, the
|
|
132
134
|
# low bit is between 0xDC00-0xDFFF) for example: U+1D44E (U+D835 U+DC4E)
|
|
133
135
|
point_one = unpacked_string.shift.to_i
|
|
134
136
|
point_two = unpacked_string.shift.to_i
|
|
135
137
|
result << (point_one - 0xD800) * 0x400 + (point_two - 0xDC00) + 0x10000
|
|
138
|
+
elsif unpacked_string.size == 1 && HIGH_SURROGATE_RANGE.include?(unpacked_string.first.to_i)
|
|
139
|
+
# the start of a surrogate pair but the pair is missing. Skip it
|
|
140
|
+
unpacked_string.shift
|
|
136
141
|
else
|
|
137
142
|
result << unpacked_string.shift
|
|
138
143
|
end
|
data/lib/pdf/reader/font.rb
CHANGED
|
@@ -235,9 +235,17 @@ class PDF::Reader
|
|
|
235
235
|
|
|
236
236
|
if obj[:ToUnicode]
|
|
237
237
|
# ToUnicode is optional for Type1 and Type3
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
238
|
+
begin
|
|
239
|
+
stream = @ohash.deref_stream(obj[:ToUnicode])
|
|
240
|
+
if stream
|
|
241
|
+
@tounicode = PDF::Reader::CMap.new(stream.unfiltered_data)
|
|
242
|
+
end
|
|
243
|
+
rescue PDF::Reader::MalformedPDFError => e
|
|
244
|
+
# If ToUnicode exists but isn't a stream, lets pretend it didn't exist. We
|
|
245
|
+
# can't do anything with it unless it's a stream with a CMap
|
|
246
|
+
unless e.message == "expected object to be a Stream or nil"
|
|
247
|
+
raise e
|
|
248
|
+
end
|
|
241
249
|
end
|
|
242
250
|
end
|
|
243
251
|
end
|
data/rbi/pdf-reader.rbi
CHANGED
|
@@ -282,6 +282,7 @@ class PDF::Reader::CMap
|
|
|
282
282
|
end
|
|
283
283
|
|
|
284
284
|
PDF::Reader::CMap::CMAP_KEYWORDS = T.let(T.unsafe(nil), Hash)
|
|
285
|
+
PDF::Reader::CMap::HIGH_SURROGATE_RANGE = T.let(T.unsafe(nil), Range)
|
|
285
286
|
|
|
286
287
|
class PDF::Reader::CidWidths
|
|
287
288
|
extend ::Forwardable
|
|
@@ -537,7 +538,9 @@ class PDF::Reader::Font
|
|
|
537
538
|
sig { params(obj: T::Hash[::Symbol, T.untyped]).returns(::PDF::Reader::Encoding) }
|
|
538
539
|
def build_encoding(obj); end
|
|
539
540
|
|
|
540
|
-
sig
|
|
541
|
+
sig do
|
|
542
|
+
returns(T.any(::PDF::Reader::WidthCalculator::BuiltIn, ::PDF::Reader::WidthCalculator::Composite, ::PDF::Reader::WidthCalculator::TrueType, ::PDF::Reader::WidthCalculator::TypeOneOrThree, ::PDF::Reader::WidthCalculator::TypeZero))
|
|
543
|
+
end
|
|
541
544
|
def build_width_calculator; end
|
|
542
545
|
|
|
543
546
|
sig { params(font_name: T.nilable(T.any(::String, ::Symbol))).returns(::PDF::Reader::Encoding) }
|
|
@@ -573,6 +576,8 @@ class PDF::Reader::Font
|
|
|
573
576
|
def unpack_string_to_array_of_ints(unpack_me, unpack_arg); end
|
|
574
577
|
end
|
|
575
578
|
|
|
579
|
+
PDF::Reader::Font::WidthCalculator = T.type_alias { T.any(::PDF::Reader::WidthCalculator::BuiltIn, ::PDF::Reader::WidthCalculator::Composite, ::PDF::Reader::WidthCalculator::TrueType, ::PDF::Reader::WidthCalculator::TypeOneOrThree, ::PDF::Reader::WidthCalculator::TypeZero) }
|
|
580
|
+
|
|
576
581
|
class PDF::Reader::FontDescriptor
|
|
577
582
|
sig { params(ohash: ::PDF::Reader::ObjectHash, fd_hash: T::Hash[T.untyped, T.untyped]).void }
|
|
578
583
|
def initialize(ohash, fd_hash); end
|
|
@@ -950,7 +955,9 @@ class PDF::Reader::ObjectHash
|
|
|
950
955
|
sig { returns(::Float) }
|
|
951
956
|
def pdf_version; end
|
|
952
957
|
|
|
953
|
-
sig
|
|
958
|
+
sig do
|
|
959
|
+
returns(T.any(::PDF::Reader::AesV2SecurityHandler, ::PDF::Reader::AesV3SecurityHandler, ::PDF::Reader::NullSecurityHandler, ::PDF::Reader::Rc4SecurityHandler, ::PDF::Reader::UnimplementedSecurityHandler))
|
|
960
|
+
end
|
|
954
961
|
def sec_handler; end
|
|
955
962
|
|
|
956
963
|
sig { returns(T::Boolean) }
|
|
@@ -1037,6 +1044,8 @@ class PDF::Reader::ObjectHash
|
|
|
1037
1044
|
def xref; end
|
|
1038
1045
|
end
|
|
1039
1046
|
|
|
1047
|
+
PDF::Reader::ObjectHash::SecurityHandler = T.type_alias { T.any(::PDF::Reader::AesV2SecurityHandler, ::PDF::Reader::AesV3SecurityHandler, ::PDF::Reader::NullSecurityHandler, ::PDF::Reader::Rc4SecurityHandler, ::PDF::Reader::UnimplementedSecurityHandler) }
|
|
1048
|
+
|
|
1040
1049
|
class PDF::Reader::ObjectStream
|
|
1041
1050
|
sig { params(stream: ::PDF::Reader::Stream).void }
|
|
1042
1051
|
def initialize(stream); end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: pdf-reader
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 2.15.
|
|
4
|
+
version: 2.15.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- James Healy
|
|
@@ -307,9 +307,9 @@ licenses:
|
|
|
307
307
|
- MIT
|
|
308
308
|
metadata:
|
|
309
309
|
bug_tracker_uri: https://github.com/yob/pdf-reader/issues
|
|
310
|
-
changelog_uri: https://github.com/yob/pdf-reader/blob/v2.15.
|
|
311
|
-
documentation_uri: https://www.rubydoc.info/gems/pdf-reader/2.15.
|
|
312
|
-
source_code_uri: https://github.com/yob/pdf-reader/tree/v2.15.
|
|
310
|
+
changelog_uri: https://github.com/yob/pdf-reader/blob/v2.15.1/CHANGELOG
|
|
311
|
+
documentation_uri: https://www.rubydoc.info/gems/pdf-reader/2.15.1
|
|
312
|
+
source_code_uri: https://github.com/yob/pdf-reader/tree/v2.15.1
|
|
313
313
|
rdoc_options:
|
|
314
314
|
- "--title"
|
|
315
315
|
- PDF::Reader Documentation
|