pdf-reader 2.2.0 → 2.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG +3 -0
- data/lib/pdf/reader/cmap.rb +13 -12
- data/lib/pdf/reader/orientation_detector.rb +2 -2
- metadata +4 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e1d87a1e4cc6989cb579c5c720ebe8277ab8099a2a6d7044a5c6f843cfabe2a7
|
4
|
+
data.tar.gz: '02693bdcc7d21572494ffa3f7e4a7e7ecaa558601951590f6c348c97c892ead3'
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ae3845f040bff4089ba8e4b2df1e22c10ddea1019475e4525b89fdf3889ffd904f98c72162cfe451ff7cfbe2f697e9462d5b2efe4a4144fdfa34568343c51f2c
|
7
|
+
data.tar.gz: 26755a0cc78cd490e7013f548ed8b46999629995109986f4ee474fff430fd77913888e6172512630118a2b99ef14546a3e94a38681ff66ac9b80482f7504351b
|
data/CHANGELOG
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
v2.2.1 (27th July 2019)
|
2
|
+
- Improve utf8 text extraction from CMaps that contain surrogate pair ligatures
|
3
|
+
|
1
4
|
v2.2.0 (18th December 2018)
|
2
5
|
- Support additional XRef Stream variants (thanks Stefan Wienert)
|
3
6
|
- Add frozen_strings pragma to reduce object allocations on ruby 2.3+
|
data/lib/pdf/reader/cmap.rb
CHANGED
@@ -98,23 +98,24 @@ class PDF::Reader
|
|
98
98
|
|
99
99
|
def str_to_int(str)
|
100
100
|
return nil if str.nil? || str.size == 0
|
101
|
-
unpacked_string = if str.
|
101
|
+
unpacked_string = if str.bytesize == 1 # UTF-8
|
102
102
|
str.unpack("C*")
|
103
103
|
else # UTF-16
|
104
104
|
str.unpack("n*")
|
105
105
|
end
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
106
|
+
result = []
|
107
|
+
while unpacked_string.any? do
|
108
|
+
if unpacked_string.size >= 2 && unpacked_string[0] > 0xD800 && unpacked_string[0] < 0xDBFF
|
109
|
+
# this is a Unicode UTF-16 "Surrogate Pair" see Unicode Spec. Chapter 3.7
|
110
|
+
# lets convert to a UTF-32. (the high bit is between 0xD800-0xDBFF, the
|
111
|
+
# low bit is between 0xDC00-0xDFFF) for example: U+1D44E (U+D835 U+DC4E)
|
112
|
+
points = [unpacked_string.shift, unpacked_string.shift]
|
113
|
+
result << (points[0] - 0xD800) * 0x400 + (points[1] - 0xDC00) + 0x10000
|
114
|
+
else
|
115
|
+
result << unpacked_string.shift
|
116
|
+
end
|
117
117
|
end
|
118
|
+
result
|
118
119
|
end
|
119
120
|
|
120
121
|
def process_bfchar_instructions(instructions)
|
@@ -25,9 +25,9 @@ class PDF::Reader
|
|
25
25
|
width = urx.to_i - llx.to_i
|
26
26
|
height = ury.to_i - lly.to_i
|
27
27
|
if width > height
|
28
|
-
|
28
|
+
(rotation % 180).zero? ? 'landscape' : 'portrait'
|
29
29
|
else
|
30
|
-
|
30
|
+
(rotation % 180).zero? ? 'portrait' : 'landscape'
|
31
31
|
end
|
32
32
|
end
|
33
33
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdf-reader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.2.
|
4
|
+
version: 2.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Healy
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-07-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -167,7 +167,7 @@ dependencies:
|
|
167
167
|
description: The PDF::Reader library implements a PDF parser conforming as much as
|
168
168
|
possible to the PDF specification from Adobe
|
169
169
|
email:
|
170
|
-
-
|
170
|
+
- james@yob.id.au
|
171
171
|
executables:
|
172
172
|
- pdf_object
|
173
173
|
- pdf_text
|
@@ -295,8 +295,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
295
295
|
- !ruby/object:Gem::Version
|
296
296
|
version: '0'
|
297
297
|
requirements: []
|
298
|
-
|
299
|
-
rubygems_version: 2.7.6
|
298
|
+
rubygems_version: 3.0.1
|
300
299
|
signing_key:
|
301
300
|
specification_version: 4
|
302
301
|
summary: A library for accessing the content of PDF files
|