pdf-reader 2.4.1 → 2.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG +11 -0
- data/lib/pdf/reader/filter/flate.rb +25 -14
- data/lib/pdf/reader/object_hash.rb +3 -1
- data/lib/pdf/reader/page_layout.rb +1 -0
- data/lib/pdf/reader/page_state.rb +7 -13
- data/lib/pdf/reader/page_text_receiver.rb +18 -0
- data/lib/pdf/reader/width_calculator/built_in.rb +7 -15
- metadata +12 -12
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 652d05cf6a22fad5ecb4b92de1e27ba60cafc6525c5ca524e24c7f9796fe1b83
|
4
|
+
data.tar.gz: 2c7448e97890a9fcbd10ec2cd5bafb9025db2fb75dabaf71a4074c542b1065a1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ac82452924cf46af98ee15f2a20642b1d06d5b9c22104fe171b5b4612665e482f341e12473805016ccb9d921fc15324ba51675170b369adeace8b278cd1279fb
|
7
|
+
data.tar.gz: b1dc1c4422b0e6bf01092cf724630ba7424fdef1fdaf34f33aaa3a31397caf6ef5a73185a98e6e2828a9e082d87cbca311565397cb064cac20d86e72be27626f
|
data/CHANGELOG
CHANGED
@@ -1,3 +1,14 @@
|
|
1
|
+
v2.5.0 (6th June 2021)
|
2
|
+
- bump minimum ruby version to 2.0
|
3
|
+
- Correctly handle trascoding to UTF-8 from some fonts that use a difference table [#344](https://github.com/yob/pdf-reader/pull/344/)
|
4
|
+
- Fix some character spacing issues with the TJ operator [#343](https://github.com/yob/pdf-reader/pull/343)
|
5
|
+
- Fix crash with some encrypted PDFs [#348](https://github.com/yob/pdf-reader/pull/348/)
|
6
|
+
- Fix positions of text on some PDFs with pages rotated 90° [#350](https://github.com/yob/pdf-reader/pull/350/)
|
7
|
+
|
8
|
+
v2.4.2 (28th January 2021)
|
9
|
+
- relax ASCII85 dependency to allow 1.x
|
10
|
+
- improved support for decompressing objects with slightly malformed zlib data
|
11
|
+
|
1
12
|
v.2.4.1 (24th September 2020)
|
2
13
|
- Re-vendor font metrics from Adobe to clarify their license
|
3
14
|
|
@@ -9,6 +9,7 @@ class PDF::Reader
|
|
9
9
|
# implementation of the Flate (zlib) stream filter
|
10
10
|
class Flate
|
11
11
|
ZLIB_AUTO_DETECT_ZLIB_OR_GZIP = 47 # Zlib::MAX_WBITS + 32
|
12
|
+
ZLIB_RAW_DEFLATE = -15 # Zlib::MAX_WBITS * -1
|
12
13
|
|
13
14
|
def initialize(options = {})
|
14
15
|
@options = options
|
@@ -17,24 +18,34 @@ class PDF::Reader
|
|
17
18
|
################################################################################
|
18
19
|
# Decode the specified data with the Zlib compression algorithm
|
19
20
|
def filter(data)
|
20
|
-
deflated =
|
21
|
+
deflated = zlib_inflate(data) || zlib_inflate(data[0, data.bytesize-1])
|
22
|
+
|
23
|
+
if deflated.nil?
|
24
|
+
raise MalformedPDFError,
|
25
|
+
"Error while inflating a compressed stream (no suitable inflation algorithm found)"
|
26
|
+
end
|
27
|
+
Depredict.new(@options).filter(deflated)
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def zlib_inflate(data)
|
21
33
|
begin
|
22
|
-
|
23
|
-
rescue Zlib::DataError
|
34
|
+
return Zlib::Inflate.new(ZLIB_AUTO_DETECT_ZLIB_OR_GZIP).inflate(data)
|
35
|
+
rescue Zlib::DataError
|
24
36
|
# by default, Ruby's Zlib assumes the data it's inflating
|
25
37
|
# is RFC1951 deflated data, wrapped in a RFC1950 zlib container. If that
|
26
|
-
# fails,
|
27
|
-
#
|
28
|
-
#
|
29
|
-
# See
|
30
|
-
# - http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-talk/243545
|
31
|
-
deflated = Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(data)
|
38
|
+
# fails, swallow the exception and attempt to inflate the data as a raw
|
39
|
+
# RFC1951 stream.
|
32
40
|
end
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
41
|
+
|
42
|
+
begin
|
43
|
+
return Zlib::Inflate.new(ZLIB_RAW_DEFLATE).inflate(data)
|
44
|
+
rescue StandardError
|
45
|
+
# swallow this one too, so we can try some other fallback options
|
46
|
+
end
|
47
|
+
|
48
|
+
nil
|
38
49
|
end
|
39
50
|
end
|
40
51
|
end
|
@@ -331,7 +331,9 @@ class PDF::Reader
|
|
331
331
|
def decrypt(ref, obj)
|
332
332
|
case obj
|
333
333
|
when PDF::Reader::Stream then
|
334
|
-
|
334
|
+
# PDF 32000-1:2008 7.5.8.2: "The cross-reference stream shall not be encrypted [...]."
|
335
|
+
# Therefore we shouldn't try to decrypt it.
|
336
|
+
obj.data = sec_handler.decrypt(obj.data, ref) unless obj.hash[:Type] == :XRef
|
335
337
|
obj
|
336
338
|
when Hash then
|
337
339
|
arr = obj.map { |key,val| [key, decrypt(ref, val)] }.flatten(1)
|
@@ -30,15 +30,7 @@ class PDF::Reader
|
|
30
30
|
@xobject_stack = [page.xobjects]
|
31
31
|
@cs_stack = [page.color_spaces]
|
32
32
|
@stack = [DEFAULT_GRAPHICS_STATE.dup]
|
33
|
-
|
34
|
-
state[:ctm] = identity_matrix
|
35
|
-
else
|
36
|
-
rotate_cos = Math.cos(page.rotate * (Math::PI/180.0)).round(2)
|
37
|
-
rotate_sin = Math.sin(page.rotate * (Math::PI/180.0)).round(2)
|
38
|
-
state[:ctm] = TransformationMatrix.new(rotate_cos, rotate_sin,
|
39
|
-
rotate_sin * -1, rotate_cos,
|
40
|
-
0, 0)
|
41
|
-
end
|
33
|
+
state[:ctm] = identity_matrix
|
42
34
|
end
|
43
35
|
|
44
36
|
#####################################################
|
@@ -330,11 +322,13 @@ class PDF::Reader
|
|
330
322
|
th = state[:h_scaling]
|
331
323
|
# optimise the common path to reduce Float allocations
|
332
324
|
if th == 1 && tj == 0 && tc == 0 && tw == 0
|
333
|
-
|
334
|
-
|
325
|
+
tx = w0 * fs
|
326
|
+
elsif tj != 0
|
327
|
+
# don't apply spacing to TJ displacement
|
328
|
+
tx = (w0 - (tj/1000.0)) * fs * th
|
335
329
|
else
|
336
|
-
|
337
|
-
tx =
|
330
|
+
# apply horizontal scaling to spacing values but not font size
|
331
|
+
tx = ((w0 * fs) + tc + tw) * th
|
338
332
|
end
|
339
333
|
|
340
334
|
# TODO: I'm pretty sure that tx shouldn't need to be divided by
|
@@ -41,6 +41,7 @@ module PDF
|
|
41
41
|
# starting a new page
|
42
42
|
def page=(page)
|
43
43
|
@state = PageState.new(page)
|
44
|
+
@page = page
|
44
45
|
@content = []
|
45
46
|
@characters = []
|
46
47
|
@mediabox = page.objects.deref(page.attributes[:MediaBox])
|
@@ -104,6 +105,8 @@ module PDF
|
|
104
105
|
glyphs.each_with_index do |glyph_code, index|
|
105
106
|
# paint the current glyph
|
106
107
|
newx, newy = @state.trm_transform(0,0)
|
108
|
+
newx, newy = apply_rotation(newx, newy)
|
109
|
+
|
107
110
|
utf8_chars = @state.current_font.to_utf8(glyph_code)
|
108
111
|
|
109
112
|
# apply to glyph displacment for the current glyph so the next
|
@@ -118,6 +121,21 @@ module PDF
|
|
118
121
|
end
|
119
122
|
end
|
120
123
|
|
124
|
+
def apply_rotation(x, y)
|
125
|
+
if @page.rotate == 90
|
126
|
+
tmp = x
|
127
|
+
x = y
|
128
|
+
y = tmp * -1
|
129
|
+
elsif @page.rotate == 180
|
130
|
+
y *= -1
|
131
|
+
elsif @page.rotate == 270
|
132
|
+
tmp = x
|
133
|
+
x = y * -1
|
134
|
+
y = tmp * -1
|
135
|
+
end
|
136
|
+
return x, y
|
137
|
+
end
|
138
|
+
|
121
139
|
end
|
122
140
|
end
|
123
141
|
end
|
@@ -37,23 +37,15 @@ class PDF::Reader
|
|
37
37
|
def glyph_width(code_point)
|
38
38
|
return 0 if code_point.nil? || code_point < 0
|
39
39
|
|
40
|
-
|
41
|
-
|
42
|
-
|
40
|
+
names = @font.encoding.int_to_name(code_point)
|
41
|
+
metrics = names.map { |name|
|
42
|
+
@metrics.char_metrics[name.to_s]
|
43
|
+
}.compact.first
|
43
44
|
|
44
|
-
|
45
|
-
|
46
|
-
}.compact.first
|
47
|
-
end
|
48
|
-
|
49
|
-
if m
|
50
|
-
m[:wx]
|
51
|
-
elsif @font.widths[code_point - 1]
|
52
|
-
@font.widths[code_point - 1]
|
53
|
-
elsif control_character?(code_point)
|
54
|
-
0
|
45
|
+
if metrics
|
46
|
+
metrics[:wx]
|
55
47
|
else
|
56
|
-
0
|
48
|
+
@font.widths[code_point - 1] || 0
|
57
49
|
end
|
58
50
|
end
|
59
51
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdf-reader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Healy
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-06-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -100,14 +100,14 @@ dependencies:
|
|
100
100
|
requirements:
|
101
101
|
- - "~>"
|
102
102
|
- !ruby/object:Gem::Version
|
103
|
-
version: 1.0
|
103
|
+
version: '1.0'
|
104
104
|
type: :runtime
|
105
105
|
prerelease: false
|
106
106
|
version_requirements: !ruby/object:Gem::Requirement
|
107
107
|
requirements:
|
108
108
|
- - "~>"
|
109
109
|
- !ruby/object:Gem::Version
|
110
|
-
version: 1.0
|
110
|
+
version: '1.0'
|
111
111
|
- !ruby/object:Gem::Dependency
|
112
112
|
name: ruby-rc4
|
113
113
|
requirement: !ruby/object:Gem::Requirement
|
@@ -277,10 +277,10 @@ licenses:
|
|
277
277
|
- MIT
|
278
278
|
metadata:
|
279
279
|
bug_tracker_uri: https://github.com/yob/pdf-reader/issues
|
280
|
-
changelog_uri: https://github.com/yob/pdf-reader/blob/v2.
|
281
|
-
documentation_uri: https://www.rubydoc.info/gems/pdf-reader/2.
|
282
|
-
source_code_uri: https://github.com/yob/pdf-reader/tree/v2.
|
283
|
-
post_install_message:
|
280
|
+
changelog_uri: https://github.com/yob/pdf-reader/blob/v2.5.0/CHANGELOG
|
281
|
+
documentation_uri: https://www.rubydoc.info/gems/pdf-reader/2.5.0
|
282
|
+
source_code_uri: https://github.com/yob/pdf-reader/tree/v2.5.0
|
283
|
+
post_install_message:
|
284
284
|
rdoc_options:
|
285
285
|
- "--title"
|
286
286
|
- PDF::Reader Documentation
|
@@ -293,15 +293,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
293
293
|
requirements:
|
294
294
|
- - ">="
|
295
295
|
- !ruby/object:Gem::Version
|
296
|
-
version:
|
296
|
+
version: '2.0'
|
297
297
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
298
298
|
requirements:
|
299
299
|
- - ">="
|
300
300
|
- !ruby/object:Gem::Version
|
301
301
|
version: '0'
|
302
302
|
requirements: []
|
303
|
-
rubygems_version: 3.
|
304
|
-
signing_key:
|
303
|
+
rubygems_version: 3.2.3
|
304
|
+
signing_key:
|
305
305
|
specification_version: 4
|
306
306
|
summary: A library for accessing the content of PDF files
|
307
307
|
test_files: []
|