pdf-reader 2.4.0 → 2.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG +14 -0
- data/README.md +1 -1
- data/lib/pdf/reader/afm/Courier-Bold.afm +342 -342
- data/lib/pdf/reader/afm/Courier-BoldOblique.afm +342 -342
- data/lib/pdf/reader/afm/Courier-Oblique.afm +342 -342
- data/lib/pdf/reader/afm/Courier.afm +342 -342
- data/lib/pdf/reader/afm/Helvetica-Bold.afm +2827 -2827
- data/lib/pdf/reader/afm/Helvetica-BoldOblique.afm +2827 -2827
- data/lib/pdf/reader/afm/Helvetica-Oblique.afm +3051 -3051
- data/lib/pdf/reader/afm/Helvetica.afm +3051 -3051
- data/lib/pdf/reader/afm/MustRead.html +19 -0
- data/lib/pdf/reader/afm/Symbol.afm +213 -213
- data/lib/pdf/reader/afm/Times-Bold.afm +2588 -2588
- data/lib/pdf/reader/afm/Times-BoldItalic.afm +2384 -2384
- data/lib/pdf/reader/afm/Times-Italic.afm +2667 -2667
- data/lib/pdf/reader/afm/Times-Roman.afm +2419 -2419
- data/lib/pdf/reader/afm/ZapfDingbats.afm +225 -225
- data/lib/pdf/reader/filter/flate.rb +28 -16
- data/lib/pdf/reader/font.rb +3 -1
- data/lib/pdf/reader/object_hash.rb +3 -1
- data/lib/pdf/reader/orientation_detector.rb +2 -2
- data/lib/pdf/reader/page.rb +28 -0
- data/lib/pdf/reader/page_layout.rb +7 -4
- data/lib/pdf/reader/page_state.rb +7 -5
- data/lib/pdf/reader/page_text_receiver.rb +22 -1
- data/lib/pdf/reader/width_calculator/built_in.rb +7 -15
- metadata +18 -17
data/lib/pdf/reader/font.rb
CHANGED
@@ -131,7 +131,9 @@ class PDF::Reader
|
|
131
131
|
if obj[:ToUnicode]
|
132
132
|
# ToUnicode is optional for Type1 and Type3
|
133
133
|
stream = @ohash.object(obj[:ToUnicode])
|
134
|
-
|
134
|
+
if stream.is_a?(PDF::Reader::Stream)
|
135
|
+
@tounicode = PDF::Reader::CMap.new(stream.unfiltered_data)
|
136
|
+
end
|
135
137
|
end
|
136
138
|
end
|
137
139
|
|
@@ -331,7 +331,9 @@ class PDF::Reader
|
|
331
331
|
def decrypt(ref, obj)
|
332
332
|
case obj
|
333
333
|
when PDF::Reader::Stream then
|
334
|
-
|
334
|
+
# PDF 32000-1:2008 7.5.8.2: "The cross-reference stream shall not be encrypted [...]."
|
335
|
+
# Therefore we shouldn't try to decrypt it.
|
336
|
+
obj.data = sec_handler.decrypt(obj.data, ref) unless obj.hash[:Type] == :XRef
|
335
337
|
obj
|
336
338
|
when Hash then
|
337
339
|
arr = obj.map { |key,val| [key, decrypt(ref, val)] }.flatten(1)
|
@@ -22,8 +22,8 @@ class PDF::Reader
|
|
22
22
|
def detect_orientation
|
23
23
|
llx,lly,urx,ury = @attributes[:MediaBox]
|
24
24
|
rotation = @attributes[:Rotate].to_i
|
25
|
-
width = urx.to_i - llx.to_i
|
26
|
-
height = ury.to_i - lly.to_i
|
25
|
+
width = (urx.to_i - llx.to_i).abs
|
26
|
+
height = (ury.to_i - lly.to_i).abs
|
27
27
|
if width > height
|
28
28
|
(rotation % 180).zero? ? 'landscape' : 'portrait'
|
29
29
|
else
|
data/lib/pdf/reader/page.rb
CHANGED
@@ -124,6 +124,34 @@ module PDF
|
|
124
124
|
}.join(" ")
|
125
125
|
end
|
126
126
|
|
127
|
+
# returns the angle to rotate the page clockwise. Always 0, 90, 180 or 270
|
128
|
+
#
|
129
|
+
def rotate
|
130
|
+
value = attributes[:Rotate].to_i
|
131
|
+
case value
|
132
|
+
when 0, 90, 180, 270
|
133
|
+
value
|
134
|
+
else
|
135
|
+
0
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
# returns the "boxes" that define the page object.
|
140
|
+
# values are defaulted according to section 7.7.3.3 of the PDF Spec 1.7
|
141
|
+
#
|
142
|
+
def boxes
|
143
|
+
mediabox = attributes[:MediaBox]
|
144
|
+
cropbox = attributes[:Cropbox] || mediabox
|
145
|
+
|
146
|
+
{
|
147
|
+
MediaBox: objects.deref!(mediabox),
|
148
|
+
CropBox: objects.deref!(cropbox),
|
149
|
+
BleedBox: objects.deref!(attributes[:BleedBox] || cropbox),
|
150
|
+
TrimBox: objects.deref!(attributes[:TrimBox] || cropbox),
|
151
|
+
ArtBox: objects.deref!(attributes[:ArtBox] || cropbox)
|
152
|
+
}
|
153
|
+
end
|
154
|
+
|
127
155
|
private
|
128
156
|
|
129
157
|
def root
|
@@ -21,18 +21,21 @@ class PDF::Reader
|
|
21
21
|
@mean_font_size = mean(@runs.map(&:font_size)) || DEFAULT_FONT_SIZE
|
22
22
|
@mean_font_size = DEFAULT_FONT_SIZE if @mean_font_size == 0
|
23
23
|
@mean_glyph_width = mean(@runs.map(&:mean_character_width)) || 0
|
24
|
-
@page_width = mediabox[2] - mediabox[0]
|
25
|
-
@page_height = mediabox[3] - mediabox[1]
|
26
|
-
@x_offset = @runs.map(&:x).sort.first
|
24
|
+
@page_width = (mediabox[2] - mediabox[0]).abs
|
25
|
+
@page_height = (mediabox[3] - mediabox[1]).abs
|
26
|
+
@x_offset = @runs.map(&:x).sort.first || 0
|
27
|
+
lowest_y = @runs.map(&:y).sort.first || 0
|
28
|
+
@y_offset = lowest_y > 0 ? 0 : lowest_y
|
27
29
|
end
|
28
30
|
|
29
31
|
def to_s
|
30
32
|
return "" if @runs.empty?
|
33
|
+
return "" if row_count == 0
|
31
34
|
|
32
35
|
page = row_count.times.map { |i| " " * col_count }
|
33
36
|
@runs.each do |run|
|
34
37
|
x_pos = ((run.x - @x_offset) / col_multiplier).round
|
35
|
-
y_pos = row_count - (run.y / row_multiplier).round
|
38
|
+
y_pos = row_count - ((run.y - @y_offset) / row_multiplier).round
|
36
39
|
if y_pos <= row_count && y_pos >= 0 && x_pos <= col_count && x_pos >= 0
|
37
40
|
local_string_insert(page[y_pos-1], run.text, x_pos)
|
38
41
|
end
|
@@ -30,7 +30,7 @@ class PDF::Reader
|
|
30
30
|
@xobject_stack = [page.xobjects]
|
31
31
|
@cs_stack = [page.color_spaces]
|
32
32
|
@stack = [DEFAULT_GRAPHICS_STATE.dup]
|
33
|
-
state[:ctm]
|
33
|
+
state[:ctm] = identity_matrix
|
34
34
|
end
|
35
35
|
|
36
36
|
#####################################################
|
@@ -322,11 +322,13 @@ class PDF::Reader
|
|
322
322
|
th = state[:h_scaling]
|
323
323
|
# optimise the common path to reduce Float allocations
|
324
324
|
if th == 1 && tj == 0 && tc == 0 && tw == 0
|
325
|
-
|
326
|
-
|
325
|
+
tx = w0 * fs
|
326
|
+
elsif tj != 0
|
327
|
+
# don't apply spacing to TJ displacement
|
328
|
+
tx = (w0 - (tj/1000.0)) * fs * th
|
327
329
|
else
|
328
|
-
|
329
|
-
tx =
|
330
|
+
# apply horizontal scaling to spacing values but not font size
|
331
|
+
tx = ((w0 * fs) + tc + tw) * th
|
330
332
|
end
|
331
333
|
|
332
334
|
# TODO: I'm pretty sure that tx shouldn't need to be divided by
|
@@ -41,13 +41,17 @@ module PDF
|
|
41
41
|
# starting a new page
|
42
42
|
def page=(page)
|
43
43
|
@state = PageState.new(page)
|
44
|
+
@page = page
|
44
45
|
@content = []
|
45
46
|
@characters = []
|
46
47
|
@mediabox = page.objects.deref(page.attributes[:MediaBox])
|
48
|
+
device_bl = @state.ctm_transform(@mediabox[0], @mediabox[1])
|
49
|
+
device_tr = @state.ctm_transform(@mediabox[2], @mediabox[3])
|
50
|
+
@device_mediabox = [ device_bl.first, device_bl.last, device_tr.first, device_tr.last]
|
47
51
|
end
|
48
52
|
|
49
53
|
def content
|
50
|
-
PageLayout.new(@characters, @
|
54
|
+
PageLayout.new(@characters, @device_mediabox).to_s
|
51
55
|
end
|
52
56
|
|
53
57
|
#####################################################
|
@@ -101,6 +105,8 @@ module PDF
|
|
101
105
|
glyphs.each_with_index do |glyph_code, index|
|
102
106
|
# paint the current glyph
|
103
107
|
newx, newy = @state.trm_transform(0,0)
|
108
|
+
newx, newy = apply_rotation(newx, newy)
|
109
|
+
|
104
110
|
utf8_chars = @state.current_font.to_utf8(glyph_code)
|
105
111
|
|
106
112
|
# apply to glyph displacment for the current glyph so the next
|
@@ -115,6 +121,21 @@ module PDF
|
|
115
121
|
end
|
116
122
|
end
|
117
123
|
|
124
|
+
def apply_rotation(x, y)
|
125
|
+
if @page.rotate == 90
|
126
|
+
tmp = x
|
127
|
+
x = y
|
128
|
+
y = tmp * -1
|
129
|
+
elsif @page.rotate == 180
|
130
|
+
y *= -1
|
131
|
+
elsif @page.rotate == 270
|
132
|
+
tmp = x
|
133
|
+
x = y * -1
|
134
|
+
y = tmp * -1
|
135
|
+
end
|
136
|
+
return x, y
|
137
|
+
end
|
138
|
+
|
118
139
|
end
|
119
140
|
end
|
120
141
|
end
|
@@ -37,23 +37,15 @@ class PDF::Reader
|
|
37
37
|
def glyph_width(code_point)
|
38
38
|
return 0 if code_point.nil? || code_point < 0
|
39
39
|
|
40
|
-
|
41
|
-
|
42
|
-
|
40
|
+
names = @font.encoding.int_to_name(code_point)
|
41
|
+
metrics = names.map { |name|
|
42
|
+
@metrics.char_metrics[name.to_s]
|
43
|
+
}.compact.first
|
43
44
|
|
44
|
-
|
45
|
-
|
46
|
-
}.compact.first
|
47
|
-
end
|
48
|
-
|
49
|
-
if m
|
50
|
-
m[:wx]
|
51
|
-
elsif @font.widths[code_point - 1]
|
52
|
-
@font.widths[code_point - 1]
|
53
|
-
elsif control_character?(code_point)
|
54
|
-
0
|
45
|
+
if metrics
|
46
|
+
metrics[:wx]
|
55
47
|
else
|
56
|
-
0
|
48
|
+
@font.widths[code_point - 1] || 0
|
57
49
|
end
|
58
50
|
end
|
59
51
|
|
metadata
CHANGED
@@ -1,29 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdf-reader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Healy
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-06-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - "<"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '0'
|
19
|
+
version: '13.0'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - "
|
24
|
+
- - "<"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '0'
|
26
|
+
version: '13.0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rspec
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -67,7 +67,7 @@ dependencies:
|
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0.2'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
|
-
name:
|
70
|
+
name: pry
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
73
|
- - ">="
|
@@ -100,14 +100,14 @@ dependencies:
|
|
100
100
|
requirements:
|
101
101
|
- - "~>"
|
102
102
|
- !ruby/object:Gem::Version
|
103
|
-
version: 1.0
|
103
|
+
version: '1.0'
|
104
104
|
type: :runtime
|
105
105
|
prerelease: false
|
106
106
|
version_requirements: !ruby/object:Gem::Requirement
|
107
107
|
requirements:
|
108
108
|
- - "~>"
|
109
109
|
- !ruby/object:Gem::Version
|
110
|
-
version: 1.0
|
110
|
+
version: '1.0'
|
111
111
|
- !ruby/object:Gem::Dependency
|
112
112
|
name: ruby-rc4
|
113
113
|
requirement: !ruby/object:Gem::Requirement
|
@@ -208,6 +208,7 @@ files:
|
|
208
208
|
- lib/pdf/reader/afm/Helvetica-BoldOblique.afm
|
209
209
|
- lib/pdf/reader/afm/Helvetica-Oblique.afm
|
210
210
|
- lib/pdf/reader/afm/Helvetica.afm
|
211
|
+
- lib/pdf/reader/afm/MustRead.html
|
211
212
|
- lib/pdf/reader/afm/Symbol.afm
|
212
213
|
- lib/pdf/reader/afm/Times-Bold.afm
|
213
214
|
- lib/pdf/reader/afm/Times-BoldItalic.afm
|
@@ -276,10 +277,10 @@ licenses:
|
|
276
277
|
- MIT
|
277
278
|
metadata:
|
278
279
|
bug_tracker_uri: https://github.com/yob/pdf-reader/issues
|
279
|
-
changelog_uri: https://github.com/yob/pdf-reader/blob/v2.
|
280
|
-
documentation_uri: https://www.rubydoc.info/gems/pdf-reader/2.
|
281
|
-
source_code_uri: https://github.com/yob/pdf-reader/tree/v2.
|
282
|
-
post_install_message:
|
280
|
+
changelog_uri: https://github.com/yob/pdf-reader/blob/v2.5.0/CHANGELOG
|
281
|
+
documentation_uri: https://www.rubydoc.info/gems/pdf-reader/2.5.0
|
282
|
+
source_code_uri: https://github.com/yob/pdf-reader/tree/v2.5.0
|
283
|
+
post_install_message:
|
283
284
|
rdoc_options:
|
284
285
|
- "--title"
|
285
286
|
- PDF::Reader Documentation
|
@@ -292,15 +293,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
292
293
|
requirements:
|
293
294
|
- - ">="
|
294
295
|
- !ruby/object:Gem::Version
|
295
|
-
version:
|
296
|
+
version: '2.0'
|
296
297
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
297
298
|
requirements:
|
298
299
|
- - ">="
|
299
300
|
- !ruby/object:Gem::Version
|
300
301
|
version: '0'
|
301
302
|
requirements: []
|
302
|
-
rubygems_version: 3.
|
303
|
-
signing_key:
|
303
|
+
rubygems_version: 3.2.3
|
304
|
+
signing_key:
|
304
305
|
specification_version: 4
|
305
306
|
summary: A library for accessing the content of PDF files
|
306
307
|
test_files: []
|