pdf-reader 2.2.0 → 2.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cfc4ed13692a51d8b78fc181d67fcf8b5e00fb1679dbca36137961f63365edaf
4
- data.tar.gz: de5556fabc41642746fd242a2623c92c9424c56da2d845507c49624c312b646b
3
+ metadata.gz: e1d87a1e4cc6989cb579c5c720ebe8277ab8099a2a6d7044a5c6f843cfabe2a7
4
+ data.tar.gz: '02693bdcc7d21572494ffa3f7e4a7e7ecaa558601951590f6c348c97c892ead3'
5
5
  SHA512:
6
- metadata.gz: 4074d5dd87f1ad9286f4022ad46a4160f44c6afed2341f9115029770770ae80b248ace9a8d5df0e444046bed662f9aa5a9334822b23222abec9574523d9e7c36
7
- data.tar.gz: a69837921f7581d2aeb9226d0791b4b0dd5925a9f83e9cb4cee4dbaf43af33e6a7a570292650a14006ffc9d1759f2ea4ef268381e5aa63fc6da5c1a6d38f46a7
6
+ metadata.gz: ae3845f040bff4089ba8e4b2df1e22c10ddea1019475e4525b89fdf3889ffd904f98c72162cfe451ff7cfbe2f697e9462d5b2efe4a4144fdfa34568343c51f2c
7
+ data.tar.gz: 26755a0cc78cd490e7013f548ed8b46999629995109986f4ee474fff430fd77913888e6172512630118a2b99ef14546a3e94a38681ff66ac9b80482f7504351b
data/CHANGELOG CHANGED
@@ -1,3 +1,6 @@
1
+ v2.2.1 (27th July 2019)
2
+ - Improve utf8 text extraction from CMaps that contain surrogate pair ligatures
3
+
1
4
  v2.2.0 (18th December 2018)
2
5
  - Support additional XRef Stream variants (thanks Stefan Wienert)
3
6
  - Add frozen_strings pragma to reduce object allocations on ruby 2.3+
@@ -98,23 +98,24 @@ class PDF::Reader
98
98
 
99
99
  def str_to_int(str)
100
100
  return nil if str.nil? || str.size == 0
101
- unpacked_string = if str.size == 1 # UTF-8
101
+ unpacked_string = if str.bytesize == 1 # UTF-8
102
102
  str.unpack("C*")
103
103
  else # UTF-16
104
104
  str.unpack("n*")
105
105
  end
106
- if unpacked_string.size == 1
107
- unpacked_string
108
- elsif unpacked_string.size == 2 && unpacked_string[0] > 0xD800 && unpacked_string[0] < 0xDBFF
109
- # this is a Unicode UTF-16 "Surrogate Pair" see Unicode Spec. Chapter 3.7
110
- # lets convert to a UTF-32. (the high bit is between 0xD800-0xDBFF, the
111
- # low bit is between 0xDC00-0xDFFF) for example: U+1D44E (U+D835 U+DC4E)
112
- [(unpacked_string[0] - 0xD800) * 0x400 + (unpacked_string[1] - 0xDC00) + 0x10000]
113
- else
114
- # it is a bad idea to just return the first 16 bits, as this doesn't allow
115
- # for ligatures for example fi (U+0066 U+0069)
116
- unpacked_string
106
+ result = []
107
+ while unpacked_string.any? do
108
+ if unpacked_string.size >= 2 && unpacked_string[0] > 0xD800 && unpacked_string[0] < 0xDBFF
109
+ # this is a Unicode UTF-16 "Surrogate Pair" see Unicode Spec. Chapter 3.7
110
+ # lets convert to a UTF-32. (the high bit is between 0xD800-0xDBFF, the
111
+ # low bit is between 0xDC00-0xDFFF) for example: U+1D44E (U+D835 U+DC4E)
112
+ points = [unpacked_string.shift, unpacked_string.shift]
113
+ result << (points[0] - 0xD800) * 0x400 + (points[1] - 0xDC00) + 0x10000
114
+ else
115
+ result << unpacked_string.shift
116
+ end
117
117
  end
118
+ result
118
119
  end
119
120
 
120
121
  def process_bfchar_instructions(instructions)
@@ -25,9 +25,9 @@ class PDF::Reader
25
25
  width = urx.to_i - llx.to_i
26
26
  height = ury.to_i - lly.to_i
27
27
  if width > height
28
- [0,180].include?(rotation) ? 'landscape' : 'portrait'
28
+ (rotation % 180).zero? ? 'landscape' : 'portrait'
29
29
  else
30
- [0,180].include?(rotation) ? 'portrait' : 'landscape'
30
+ (rotation % 180).zero? ? 'portrait' : 'landscape'
31
31
  end
32
32
  end
33
33
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdf-reader
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.2.0
4
+ version: 2.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Healy
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-12-18 00:00:00.000000000 Z
11
+ date: 2019-07-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -167,7 +167,7 @@ dependencies:
167
167
  description: The PDF::Reader library implements a PDF parser conforming as much as
168
168
  possible to the PDF specification from Adobe
169
169
  email:
170
- - jimmy@deefa.com
170
+ - james@yob.id.au
171
171
  executables:
172
172
  - pdf_object
173
173
  - pdf_text
@@ -295,8 +295,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
295
295
  - !ruby/object:Gem::Version
296
296
  version: '0'
297
297
  requirements: []
298
- rubyforge_project:
299
- rubygems_version: 2.7.6
298
+ rubygems_version: 3.0.1
300
299
  signing_key:
301
300
  specification_version: 4
302
301
  summary: A library for accessing the content of PDF files