pdf-reader 2.4.2 → 2.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG +7 -0
- data/lib/pdf/reader/filter/flate.rb +2 -2
- data/lib/pdf/reader/object_hash.rb +3 -1
- data/lib/pdf/reader/page_layout.rb +1 -0
- data/lib/pdf/reader/page_state.rb +7 -13
- data/lib/pdf/reader/page_text_receiver.rb +18 -0
- data/lib/pdf/reader/width_calculator/built_in.rb +7 -15
- metadata +6 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 652d05cf6a22fad5ecb4b92de1e27ba60cafc6525c5ca524e24c7f9796fe1b83
|
4
|
+
data.tar.gz: 2c7448e97890a9fcbd10ec2cd5bafb9025db2fb75dabaf71a4074c542b1065a1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ac82452924cf46af98ee15f2a20642b1d06d5b9c22104fe171b5b4612665e482f341e12473805016ccb9d921fc15324ba51675170b369adeace8b278cd1279fb
|
7
|
+
data.tar.gz: b1dc1c4422b0e6bf01092cf724630ba7424fdef1fdaf34f33aaa3a31397caf6ef5a73185a98e6e2828a9e082d87cbca311565397cb064cac20d86e72be27626f
|
data/CHANGELOG
CHANGED
@@ -1,3 +1,10 @@
|
|
1
|
+
v2.5.0 (6th June 2021)
|
2
|
+
- bump minimum ruby version to 2.0
|
3
|
+
- Correctly handle trascoding to UTF-8 from some fonts that use a difference table [#344](https://github.com/yob/pdf-reader/pull/344/)
|
4
|
+
- Fix some character spacing issues with the TJ operator [#343](https://github.com/yob/pdf-reader/pull/343)
|
5
|
+
- Fix crash with some encrypted PDFs [#348](https://github.com/yob/pdf-reader/pull/348/)
|
6
|
+
- Fix positions of text on some PDFs with pages rotated 90° [#350](https://github.com/yob/pdf-reader/pull/350/)
|
7
|
+
|
1
8
|
v2.4.2 (28th January 2021)
|
2
9
|
- relax ASCII85 dependency to allow 1.x
|
3
10
|
- improved support for decompressing objects with slightly malformed zlib data
|
@@ -32,7 +32,7 @@ class PDF::Reader
|
|
32
32
|
def zlib_inflate(data)
|
33
33
|
begin
|
34
34
|
return Zlib::Inflate.new(ZLIB_AUTO_DETECT_ZLIB_OR_GZIP).inflate(data)
|
35
|
-
rescue Zlib::DataError
|
35
|
+
rescue Zlib::DataError
|
36
36
|
# by default, Ruby's Zlib assumes the data it's inflating
|
37
37
|
# is RFC1951 deflated data, wrapped in a RFC1950 zlib container. If that
|
38
38
|
# fails, swallow the exception and attempt to inflate the data as a raw
|
@@ -41,7 +41,7 @@ class PDF::Reader
|
|
41
41
|
|
42
42
|
begin
|
43
43
|
return Zlib::Inflate.new(ZLIB_RAW_DEFLATE).inflate(data)
|
44
|
-
rescue StandardError
|
44
|
+
rescue StandardError
|
45
45
|
# swallow this one too, so we can try some other fallback options
|
46
46
|
end
|
47
47
|
|
@@ -331,7 +331,9 @@ class PDF::Reader
|
|
331
331
|
def decrypt(ref, obj)
|
332
332
|
case obj
|
333
333
|
when PDF::Reader::Stream then
|
334
|
-
|
334
|
+
# PDF 32000-1:2008 7.5.8.2: "The cross-reference stream shall not be encrypted [...]."
|
335
|
+
# Therefore we shouldn't try to decrypt it.
|
336
|
+
obj.data = sec_handler.decrypt(obj.data, ref) unless obj.hash[:Type] == :XRef
|
335
337
|
obj
|
336
338
|
when Hash then
|
337
339
|
arr = obj.map { |key,val| [key, decrypt(ref, val)] }.flatten(1)
|
@@ -30,15 +30,7 @@ class PDF::Reader
|
|
30
30
|
@xobject_stack = [page.xobjects]
|
31
31
|
@cs_stack = [page.color_spaces]
|
32
32
|
@stack = [DEFAULT_GRAPHICS_STATE.dup]
|
33
|
-
|
34
|
-
state[:ctm] = identity_matrix
|
35
|
-
else
|
36
|
-
rotate_cos = Math.cos(page.rotate * (Math::PI/180.0)).round(2)
|
37
|
-
rotate_sin = Math.sin(page.rotate * (Math::PI/180.0)).round(2)
|
38
|
-
state[:ctm] = TransformationMatrix.new(rotate_cos, rotate_sin,
|
39
|
-
rotate_sin * -1, rotate_cos,
|
40
|
-
0, 0)
|
41
|
-
end
|
33
|
+
state[:ctm] = identity_matrix
|
42
34
|
end
|
43
35
|
|
44
36
|
#####################################################
|
@@ -330,11 +322,13 @@ class PDF::Reader
|
|
330
322
|
th = state[:h_scaling]
|
331
323
|
# optimise the common path to reduce Float allocations
|
332
324
|
if th == 1 && tj == 0 && tc == 0 && tw == 0
|
333
|
-
|
334
|
-
|
325
|
+
tx = w0 * fs
|
326
|
+
elsif tj != 0
|
327
|
+
# don't apply spacing to TJ displacement
|
328
|
+
tx = (w0 - (tj/1000.0)) * fs * th
|
335
329
|
else
|
336
|
-
|
337
|
-
tx =
|
330
|
+
# apply horizontal scaling to spacing values but not font size
|
331
|
+
tx = ((w0 * fs) + tc + tw) * th
|
338
332
|
end
|
339
333
|
|
340
334
|
# TODO: I'm pretty sure that tx shouldn't need to be divided by
|
@@ -41,6 +41,7 @@ module PDF
|
|
41
41
|
# starting a new page
|
42
42
|
def page=(page)
|
43
43
|
@state = PageState.new(page)
|
44
|
+
@page = page
|
44
45
|
@content = []
|
45
46
|
@characters = []
|
46
47
|
@mediabox = page.objects.deref(page.attributes[:MediaBox])
|
@@ -104,6 +105,8 @@ module PDF
|
|
104
105
|
glyphs.each_with_index do |glyph_code, index|
|
105
106
|
# paint the current glyph
|
106
107
|
newx, newy = @state.trm_transform(0,0)
|
108
|
+
newx, newy = apply_rotation(newx, newy)
|
109
|
+
|
107
110
|
utf8_chars = @state.current_font.to_utf8(glyph_code)
|
108
111
|
|
109
112
|
# apply to glyph displacment for the current glyph so the next
|
@@ -118,6 +121,21 @@ module PDF
|
|
118
121
|
end
|
119
122
|
end
|
120
123
|
|
124
|
+
def apply_rotation(x, y)
|
125
|
+
if @page.rotate == 90
|
126
|
+
tmp = x
|
127
|
+
x = y
|
128
|
+
y = tmp * -1
|
129
|
+
elsif @page.rotate == 180
|
130
|
+
y *= -1
|
131
|
+
elsif @page.rotate == 270
|
132
|
+
tmp = x
|
133
|
+
x = y * -1
|
134
|
+
y = tmp * -1
|
135
|
+
end
|
136
|
+
return x, y
|
137
|
+
end
|
138
|
+
|
121
139
|
end
|
122
140
|
end
|
123
141
|
end
|
@@ -37,23 +37,15 @@ class PDF::Reader
|
|
37
37
|
def glyph_width(code_point)
|
38
38
|
return 0 if code_point.nil? || code_point < 0
|
39
39
|
|
40
|
-
|
41
|
-
|
42
|
-
|
40
|
+
names = @font.encoding.int_to_name(code_point)
|
41
|
+
metrics = names.map { |name|
|
42
|
+
@metrics.char_metrics[name.to_s]
|
43
|
+
}.compact.first
|
43
44
|
|
44
|
-
|
45
|
-
|
46
|
-
}.compact.first
|
47
|
-
end
|
48
|
-
|
49
|
-
if m
|
50
|
-
m[:wx]
|
51
|
-
elsif @font.widths[code_point - 1]
|
52
|
-
@font.widths[code_point - 1]
|
53
|
-
elsif control_character?(code_point)
|
54
|
-
0
|
45
|
+
if metrics
|
46
|
+
metrics[:wx]
|
55
47
|
else
|
56
|
-
0
|
48
|
+
@font.widths[code_point - 1] || 0
|
57
49
|
end
|
58
50
|
end
|
59
51
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdf-reader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Healy
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-06-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -277,9 +277,9 @@ licenses:
|
|
277
277
|
- MIT
|
278
278
|
metadata:
|
279
279
|
bug_tracker_uri: https://github.com/yob/pdf-reader/issues
|
280
|
-
changelog_uri: https://github.com/yob/pdf-reader/blob/v2.
|
281
|
-
documentation_uri: https://www.rubydoc.info/gems/pdf-reader/2.
|
282
|
-
source_code_uri: https://github.com/yob/pdf-reader/tree/v2.
|
280
|
+
changelog_uri: https://github.com/yob/pdf-reader/blob/v2.5.0/CHANGELOG
|
281
|
+
documentation_uri: https://www.rubydoc.info/gems/pdf-reader/2.5.0
|
282
|
+
source_code_uri: https://github.com/yob/pdf-reader/tree/v2.5.0
|
283
283
|
post_install_message:
|
284
284
|
rdoc_options:
|
285
285
|
- "--title"
|
@@ -293,7 +293,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
293
293
|
requirements:
|
294
294
|
- - ">="
|
295
295
|
- !ruby/object:Gem::Version
|
296
|
-
version:
|
296
|
+
version: '2.0'
|
297
297
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
298
298
|
requirements:
|
299
299
|
- - ">="
|