pdf-reader 2.2.0 → 2.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG +3 -0
- data/lib/pdf/reader/cmap.rb +13 -12
- data/lib/pdf/reader/orientation_detector.rb +2 -2
- metadata +4 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e1d87a1e4cc6989cb579c5c720ebe8277ab8099a2a6d7044a5c6f843cfabe2a7
|
4
|
+
data.tar.gz: '02693bdcc7d21572494ffa3f7e4a7e7ecaa558601951590f6c348c97c892ead3'
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ae3845f040bff4089ba8e4b2df1e22c10ddea1019475e4525b89fdf3889ffd904f98c72162cfe451ff7cfbe2f697e9462d5b2efe4a4144fdfa34568343c51f2c
|
7
|
+
data.tar.gz: 26755a0cc78cd490e7013f548ed8b46999629995109986f4ee474fff430fd77913888e6172512630118a2b99ef14546a3e94a38681ff66ac9b80482f7504351b
|
data/CHANGELOG
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
v2.2.1 (27th July 2019)
|
2
|
+
- Improve utf8 text extraction from CMaps that contain surrogate pair ligatures
|
3
|
+
|
1
4
|
v2.2.0 (18th December 2018)
|
2
5
|
- Support additional XRef Stream variants (thanks Stefan Wienert)
|
3
6
|
- Add frozen_strings pragma to reduce object allocations on ruby 2.3+
|
data/lib/pdf/reader/cmap.rb
CHANGED
@@ -98,23 +98,24 @@ class PDF::Reader
|
|
98
98
|
|
99
99
|
def str_to_int(str)
|
100
100
|
return nil if str.nil? || str.size == 0
|
101
|
-
unpacked_string = if str.
|
101
|
+
unpacked_string = if str.bytesize == 1 # UTF-8
|
102
102
|
str.unpack("C*")
|
103
103
|
else # UTF-16
|
104
104
|
str.unpack("n*")
|
105
105
|
end
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
106
|
+
result = []
|
107
|
+
while unpacked_string.any? do
|
108
|
+
if unpacked_string.size >= 2 && unpacked_string[0] > 0xD800 && unpacked_string[0] < 0xDBFF
|
109
|
+
# this is a Unicode UTF-16 "Surrogate Pair" see Unicode Spec. Chapter 3.7
|
110
|
+
# lets convert to a UTF-32. (the high bit is between 0xD800-0xDBFF, the
|
111
|
+
# low bit is between 0xDC00-0xDFFF) for example: U+1D44E (U+D835 U+DC4E)
|
112
|
+
points = [unpacked_string.shift, unpacked_string.shift]
|
113
|
+
result << (points[0] - 0xD800) * 0x400 + (points[1] - 0xDC00) + 0x10000
|
114
|
+
else
|
115
|
+
result << unpacked_string.shift
|
116
|
+
end
|
117
117
|
end
|
118
|
+
result
|
118
119
|
end
|
119
120
|
|
120
121
|
def process_bfchar_instructions(instructions)
|
@@ -25,9 +25,9 @@ class PDF::Reader
|
|
25
25
|
width = urx.to_i - llx.to_i
|
26
26
|
height = ury.to_i - lly.to_i
|
27
27
|
if width > height
|
28
|
-
|
28
|
+
(rotation % 180).zero? ? 'landscape' : 'portrait'
|
29
29
|
else
|
30
|
-
|
30
|
+
(rotation % 180).zero? ? 'portrait' : 'landscape'
|
31
31
|
end
|
32
32
|
end
|
33
33
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdf-reader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.2.
|
4
|
+
version: 2.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Healy
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-07-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -167,7 +167,7 @@ dependencies:
|
|
167
167
|
description: The PDF::Reader library implements a PDF parser conforming as much as
|
168
168
|
possible to the PDF specification from Adobe
|
169
169
|
email:
|
170
|
-
-
|
170
|
+
- james@yob.id.au
|
171
171
|
executables:
|
172
172
|
- pdf_object
|
173
173
|
- pdf_text
|
@@ -295,8 +295,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
295
295
|
- !ruby/object:Gem::Version
|
296
296
|
version: '0'
|
297
297
|
requirements: []
|
298
|
-
|
299
|
-
rubygems_version: 2.7.6
|
298
|
+
rubygems_version: 3.0.1
|
300
299
|
signing_key:
|
301
300
|
specification_version: 4
|
302
301
|
summary: A library for accessing the content of PDF files
|