pdf-reader 2.4.2 → 2.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG +7 -0
- data/lib/pdf/reader/filter/flate.rb +2 -2
- data/lib/pdf/reader/object_hash.rb +3 -1
- data/lib/pdf/reader/page_layout.rb +1 -0
- data/lib/pdf/reader/page_state.rb +7 -13
- data/lib/pdf/reader/page_text_receiver.rb +18 -0
- data/lib/pdf/reader/width_calculator/built_in.rb +7 -15
- metadata +6 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 652d05cf6a22fad5ecb4b92de1e27ba60cafc6525c5ca524e24c7f9796fe1b83
|
4
|
+
data.tar.gz: 2c7448e97890a9fcbd10ec2cd5bafb9025db2fb75dabaf71a4074c542b1065a1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ac82452924cf46af98ee15f2a20642b1d06d5b9c22104fe171b5b4612665e482f341e12473805016ccb9d921fc15324ba51675170b369adeace8b278cd1279fb
|
7
|
+
data.tar.gz: b1dc1c4422b0e6bf01092cf724630ba7424fdef1fdaf34f33aaa3a31397caf6ef5a73185a98e6e2828a9e082d87cbca311565397cb064cac20d86e72be27626f
|
data/CHANGELOG
CHANGED
@@ -1,3 +1,10 @@
|
|
1
|
+
v2.5.0 (6th June 2021)
|
2
|
+
- bump minimum ruby version to 2.0
|
3
|
+
- Correctly handle trascoding to UTF-8 from some fonts that use a difference table [#344](https://github.com/yob/pdf-reader/pull/344/)
|
4
|
+
- Fix some character spacing issues with the TJ operator [#343](https://github.com/yob/pdf-reader/pull/343)
|
5
|
+
- Fix crash with some encrypted PDFs [#348](https://github.com/yob/pdf-reader/pull/348/)
|
6
|
+
- Fix positions of text on some PDFs with pages rotated 90° [#350](https://github.com/yob/pdf-reader/pull/350/)
|
7
|
+
|
1
8
|
v2.4.2 (28th January 2021)
|
2
9
|
- relax ASCII85 dependency to allow 1.x
|
3
10
|
- improved support for decompressing objects with slightly malformed zlib data
|
@@ -32,7 +32,7 @@ class PDF::Reader
|
|
32
32
|
def zlib_inflate(data)
|
33
33
|
begin
|
34
34
|
return Zlib::Inflate.new(ZLIB_AUTO_DETECT_ZLIB_OR_GZIP).inflate(data)
|
35
|
-
rescue Zlib::DataError
|
35
|
+
rescue Zlib::DataError
|
36
36
|
# by default, Ruby's Zlib assumes the data it's inflating
|
37
37
|
# is RFC1951 deflated data, wrapped in a RFC1950 zlib container. If that
|
38
38
|
# fails, swallow the exception and attempt to inflate the data as a raw
|
@@ -41,7 +41,7 @@ class PDF::Reader
|
|
41
41
|
|
42
42
|
begin
|
43
43
|
return Zlib::Inflate.new(ZLIB_RAW_DEFLATE).inflate(data)
|
44
|
-
rescue StandardError
|
44
|
+
rescue StandardError
|
45
45
|
# swallow this one too, so we can try some other fallback options
|
46
46
|
end
|
47
47
|
|
@@ -331,7 +331,9 @@ class PDF::Reader
|
|
331
331
|
def decrypt(ref, obj)
|
332
332
|
case obj
|
333
333
|
when PDF::Reader::Stream then
|
334
|
-
|
334
|
+
# PDF 32000-1:2008 7.5.8.2: "The cross-reference stream shall not be encrypted [...]."
|
335
|
+
# Therefore we shouldn't try to decrypt it.
|
336
|
+
obj.data = sec_handler.decrypt(obj.data, ref) unless obj.hash[:Type] == :XRef
|
335
337
|
obj
|
336
338
|
when Hash then
|
337
339
|
arr = obj.map { |key,val| [key, decrypt(ref, val)] }.flatten(1)
|
@@ -30,15 +30,7 @@ class PDF::Reader
|
|
30
30
|
@xobject_stack = [page.xobjects]
|
31
31
|
@cs_stack = [page.color_spaces]
|
32
32
|
@stack = [DEFAULT_GRAPHICS_STATE.dup]
|
33
|
-
|
34
|
-
state[:ctm] = identity_matrix
|
35
|
-
else
|
36
|
-
rotate_cos = Math.cos(page.rotate * (Math::PI/180.0)).round(2)
|
37
|
-
rotate_sin = Math.sin(page.rotate * (Math::PI/180.0)).round(2)
|
38
|
-
state[:ctm] = TransformationMatrix.new(rotate_cos, rotate_sin,
|
39
|
-
rotate_sin * -1, rotate_cos,
|
40
|
-
0, 0)
|
41
|
-
end
|
33
|
+
state[:ctm] = identity_matrix
|
42
34
|
end
|
43
35
|
|
44
36
|
#####################################################
|
@@ -330,11 +322,13 @@ class PDF::Reader
|
|
330
322
|
th = state[:h_scaling]
|
331
323
|
# optimise the common path to reduce Float allocations
|
332
324
|
if th == 1 && tj == 0 && tc == 0 && tw == 0
|
333
|
-
|
334
|
-
|
325
|
+
tx = w0 * fs
|
326
|
+
elsif tj != 0
|
327
|
+
# don't apply spacing to TJ displacement
|
328
|
+
tx = (w0 - (tj/1000.0)) * fs * th
|
335
329
|
else
|
336
|
-
|
337
|
-
tx =
|
330
|
+
# apply horizontal scaling to spacing values but not font size
|
331
|
+
tx = ((w0 * fs) + tc + tw) * th
|
338
332
|
end
|
339
333
|
|
340
334
|
# TODO: I'm pretty sure that tx shouldn't need to be divided by
|
@@ -41,6 +41,7 @@ module PDF
|
|
41
41
|
# starting a new page
|
42
42
|
def page=(page)
|
43
43
|
@state = PageState.new(page)
|
44
|
+
@page = page
|
44
45
|
@content = []
|
45
46
|
@characters = []
|
46
47
|
@mediabox = page.objects.deref(page.attributes[:MediaBox])
|
@@ -104,6 +105,8 @@ module PDF
|
|
104
105
|
glyphs.each_with_index do |glyph_code, index|
|
105
106
|
# paint the current glyph
|
106
107
|
newx, newy = @state.trm_transform(0,0)
|
108
|
+
newx, newy = apply_rotation(newx, newy)
|
109
|
+
|
107
110
|
utf8_chars = @state.current_font.to_utf8(glyph_code)
|
108
111
|
|
109
112
|
# apply to glyph displacment for the current glyph so the next
|
@@ -118,6 +121,21 @@ module PDF
|
|
118
121
|
end
|
119
122
|
end
|
120
123
|
|
124
|
+
def apply_rotation(x, y)
|
125
|
+
if @page.rotate == 90
|
126
|
+
tmp = x
|
127
|
+
x = y
|
128
|
+
y = tmp * -1
|
129
|
+
elsif @page.rotate == 180
|
130
|
+
y *= -1
|
131
|
+
elsif @page.rotate == 270
|
132
|
+
tmp = x
|
133
|
+
x = y * -1
|
134
|
+
y = tmp * -1
|
135
|
+
end
|
136
|
+
return x, y
|
137
|
+
end
|
138
|
+
|
121
139
|
end
|
122
140
|
end
|
123
141
|
end
|
@@ -37,23 +37,15 @@ class PDF::Reader
|
|
37
37
|
def glyph_width(code_point)
|
38
38
|
return 0 if code_point.nil? || code_point < 0
|
39
39
|
|
40
|
-
|
41
|
-
|
42
|
-
|
40
|
+
names = @font.encoding.int_to_name(code_point)
|
41
|
+
metrics = names.map { |name|
|
42
|
+
@metrics.char_metrics[name.to_s]
|
43
|
+
}.compact.first
|
43
44
|
|
44
|
-
|
45
|
-
|
46
|
-
}.compact.first
|
47
|
-
end
|
48
|
-
|
49
|
-
if m
|
50
|
-
m[:wx]
|
51
|
-
elsif @font.widths[code_point - 1]
|
52
|
-
@font.widths[code_point - 1]
|
53
|
-
elsif control_character?(code_point)
|
54
|
-
0
|
45
|
+
if metrics
|
46
|
+
metrics[:wx]
|
55
47
|
else
|
56
|
-
0
|
48
|
+
@font.widths[code_point - 1] || 0
|
57
49
|
end
|
58
50
|
end
|
59
51
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdf-reader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Healy
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-06-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -277,9 +277,9 @@ licenses:
|
|
277
277
|
- MIT
|
278
278
|
metadata:
|
279
279
|
bug_tracker_uri: https://github.com/yob/pdf-reader/issues
|
280
|
-
changelog_uri: https://github.com/yob/pdf-reader/blob/v2.
|
281
|
-
documentation_uri: https://www.rubydoc.info/gems/pdf-reader/2.
|
282
|
-
source_code_uri: https://github.com/yob/pdf-reader/tree/v2.
|
280
|
+
changelog_uri: https://github.com/yob/pdf-reader/blob/v2.5.0/CHANGELOG
|
281
|
+
documentation_uri: https://www.rubydoc.info/gems/pdf-reader/2.5.0
|
282
|
+
source_code_uri: https://github.com/yob/pdf-reader/tree/v2.5.0
|
283
283
|
post_install_message:
|
284
284
|
rdoc_options:
|
285
285
|
- "--title"
|
@@ -293,7 +293,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
293
293
|
requirements:
|
294
294
|
- - ">="
|
295
295
|
- !ruby/object:Gem::Version
|
296
|
-
version:
|
296
|
+
version: '2.0'
|
297
297
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
298
298
|
requirements:
|
299
299
|
- - ">="
|