pdf-reader 2.4.0 → 2.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG +14 -0
- data/README.md +1 -1
- data/lib/pdf/reader/afm/Courier-Bold.afm +342 -342
- data/lib/pdf/reader/afm/Courier-BoldOblique.afm +342 -342
- data/lib/pdf/reader/afm/Courier-Oblique.afm +342 -342
- data/lib/pdf/reader/afm/Courier.afm +342 -342
- data/lib/pdf/reader/afm/Helvetica-Bold.afm +2827 -2827
- data/lib/pdf/reader/afm/Helvetica-BoldOblique.afm +2827 -2827
- data/lib/pdf/reader/afm/Helvetica-Oblique.afm +3051 -3051
- data/lib/pdf/reader/afm/Helvetica.afm +3051 -3051
- data/lib/pdf/reader/afm/MustRead.html +19 -0
- data/lib/pdf/reader/afm/Symbol.afm +213 -213
- data/lib/pdf/reader/afm/Times-Bold.afm +2588 -2588
- data/lib/pdf/reader/afm/Times-BoldItalic.afm +2384 -2384
- data/lib/pdf/reader/afm/Times-Italic.afm +2667 -2667
- data/lib/pdf/reader/afm/Times-Roman.afm +2419 -2419
- data/lib/pdf/reader/afm/ZapfDingbats.afm +225 -225
- data/lib/pdf/reader/filter/flate.rb +28 -16
- data/lib/pdf/reader/font.rb +3 -1
- data/lib/pdf/reader/object_hash.rb +3 -1
- data/lib/pdf/reader/orientation_detector.rb +2 -2
- data/lib/pdf/reader/page.rb +28 -0
- data/lib/pdf/reader/page_layout.rb +7 -4
- data/lib/pdf/reader/page_state.rb +7 -5
- data/lib/pdf/reader/page_text_receiver.rb +22 -1
- data/lib/pdf/reader/width_calculator/built_in.rb +7 -15
- metadata +18 -17
data/lib/pdf/reader/font.rb
CHANGED
@@ -131,7 +131,9 @@ class PDF::Reader
|
|
131
131
|
if obj[:ToUnicode]
|
132
132
|
# ToUnicode is optional for Type1 and Type3
|
133
133
|
stream = @ohash.object(obj[:ToUnicode])
|
134
|
-
|
134
|
+
if stream.is_a?(PDF::Reader::Stream)
|
135
|
+
@tounicode = PDF::Reader::CMap.new(stream.unfiltered_data)
|
136
|
+
end
|
135
137
|
end
|
136
138
|
end
|
137
139
|
|
@@ -331,7 +331,9 @@ class PDF::Reader
|
|
331
331
|
def decrypt(ref, obj)
|
332
332
|
case obj
|
333
333
|
when PDF::Reader::Stream then
|
334
|
-
|
334
|
+
# PDF 32000-1:2008 7.5.8.2: "The cross-reference stream shall not be encrypted [...]."
|
335
|
+
# Therefore we shouldn't try to decrypt it.
|
336
|
+
obj.data = sec_handler.decrypt(obj.data, ref) unless obj.hash[:Type] == :XRef
|
335
337
|
obj
|
336
338
|
when Hash then
|
337
339
|
arr = obj.map { |key,val| [key, decrypt(ref, val)] }.flatten(1)
|
@@ -22,8 +22,8 @@ class PDF::Reader
|
|
22
22
|
def detect_orientation
|
23
23
|
llx,lly,urx,ury = @attributes[:MediaBox]
|
24
24
|
rotation = @attributes[:Rotate].to_i
|
25
|
-
width = urx.to_i - llx.to_i
|
26
|
-
height = ury.to_i - lly.to_i
|
25
|
+
width = (urx.to_i - llx.to_i).abs
|
26
|
+
height = (ury.to_i - lly.to_i).abs
|
27
27
|
if width > height
|
28
28
|
(rotation % 180).zero? ? 'landscape' : 'portrait'
|
29
29
|
else
|
data/lib/pdf/reader/page.rb
CHANGED
@@ -124,6 +124,34 @@ module PDF
|
|
124
124
|
}.join(" ")
|
125
125
|
end
|
126
126
|
|
127
|
+
# returns the angle to rotate the page clockwise. Always 0, 90, 180 or 270
|
128
|
+
#
|
129
|
+
def rotate
|
130
|
+
value = attributes[:Rotate].to_i
|
131
|
+
case value
|
132
|
+
when 0, 90, 180, 270
|
133
|
+
value
|
134
|
+
else
|
135
|
+
0
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
# returns the "boxes" that define the page object.
|
140
|
+
# values are defaulted according to section 7.7.3.3 of the PDF Spec 1.7
|
141
|
+
#
|
142
|
+
def boxes
|
143
|
+
mediabox = attributes[:MediaBox]
|
144
|
+
cropbox = attributes[:Cropbox] || mediabox
|
145
|
+
|
146
|
+
{
|
147
|
+
MediaBox: objects.deref!(mediabox),
|
148
|
+
CropBox: objects.deref!(cropbox),
|
149
|
+
BleedBox: objects.deref!(attributes[:BleedBox] || cropbox),
|
150
|
+
TrimBox: objects.deref!(attributes[:TrimBox] || cropbox),
|
151
|
+
ArtBox: objects.deref!(attributes[:ArtBox] || cropbox)
|
152
|
+
}
|
153
|
+
end
|
154
|
+
|
127
155
|
private
|
128
156
|
|
129
157
|
def root
|
@@ -21,18 +21,21 @@ class PDF::Reader
|
|
21
21
|
@mean_font_size = mean(@runs.map(&:font_size)) || DEFAULT_FONT_SIZE
|
22
22
|
@mean_font_size = DEFAULT_FONT_SIZE if @mean_font_size == 0
|
23
23
|
@mean_glyph_width = mean(@runs.map(&:mean_character_width)) || 0
|
24
|
-
@page_width = mediabox[2] - mediabox[0]
|
25
|
-
@page_height = mediabox[3] - mediabox[1]
|
26
|
-
@x_offset = @runs.map(&:x).sort.first
|
24
|
+
@page_width = (mediabox[2] - mediabox[0]).abs
|
25
|
+
@page_height = (mediabox[3] - mediabox[1]).abs
|
26
|
+
@x_offset = @runs.map(&:x).sort.first || 0
|
27
|
+
lowest_y = @runs.map(&:y).sort.first || 0
|
28
|
+
@y_offset = lowest_y > 0 ? 0 : lowest_y
|
27
29
|
end
|
28
30
|
|
29
31
|
def to_s
|
30
32
|
return "" if @runs.empty?
|
33
|
+
return "" if row_count == 0
|
31
34
|
|
32
35
|
page = row_count.times.map { |i| " " * col_count }
|
33
36
|
@runs.each do |run|
|
34
37
|
x_pos = ((run.x - @x_offset) / col_multiplier).round
|
35
|
-
y_pos = row_count - (run.y / row_multiplier).round
|
38
|
+
y_pos = row_count - ((run.y - @y_offset) / row_multiplier).round
|
36
39
|
if y_pos <= row_count && y_pos >= 0 && x_pos <= col_count && x_pos >= 0
|
37
40
|
local_string_insert(page[y_pos-1], run.text, x_pos)
|
38
41
|
end
|
@@ -30,7 +30,7 @@ class PDF::Reader
|
|
30
30
|
@xobject_stack = [page.xobjects]
|
31
31
|
@cs_stack = [page.color_spaces]
|
32
32
|
@stack = [DEFAULT_GRAPHICS_STATE.dup]
|
33
|
-
state[:ctm]
|
33
|
+
state[:ctm] = identity_matrix
|
34
34
|
end
|
35
35
|
|
36
36
|
#####################################################
|
@@ -322,11 +322,13 @@ class PDF::Reader
|
|
322
322
|
th = state[:h_scaling]
|
323
323
|
# optimise the common path to reduce Float allocations
|
324
324
|
if th == 1 && tj == 0 && tc == 0 && tw == 0
|
325
|
-
|
326
|
-
|
325
|
+
tx = w0 * fs
|
326
|
+
elsif tj != 0
|
327
|
+
# don't apply spacing to TJ displacement
|
328
|
+
tx = (w0 - (tj/1000.0)) * fs * th
|
327
329
|
else
|
328
|
-
|
329
|
-
tx =
|
330
|
+
# apply horizontal scaling to spacing values but not font size
|
331
|
+
tx = ((w0 * fs) + tc + tw) * th
|
330
332
|
end
|
331
333
|
|
332
334
|
# TODO: I'm pretty sure that tx shouldn't need to be divided by
|
@@ -41,13 +41,17 @@ module PDF
|
|
41
41
|
# starting a new page
|
42
42
|
def page=(page)
|
43
43
|
@state = PageState.new(page)
|
44
|
+
@page = page
|
44
45
|
@content = []
|
45
46
|
@characters = []
|
46
47
|
@mediabox = page.objects.deref(page.attributes[:MediaBox])
|
48
|
+
device_bl = @state.ctm_transform(@mediabox[0], @mediabox[1])
|
49
|
+
device_tr = @state.ctm_transform(@mediabox[2], @mediabox[3])
|
50
|
+
@device_mediabox = [ device_bl.first, device_bl.last, device_tr.first, device_tr.last]
|
47
51
|
end
|
48
52
|
|
49
53
|
def content
|
50
|
-
PageLayout.new(@characters, @
|
54
|
+
PageLayout.new(@characters, @device_mediabox).to_s
|
51
55
|
end
|
52
56
|
|
53
57
|
#####################################################
|
@@ -101,6 +105,8 @@ module PDF
|
|
101
105
|
glyphs.each_with_index do |glyph_code, index|
|
102
106
|
# paint the current glyph
|
103
107
|
newx, newy = @state.trm_transform(0,0)
|
108
|
+
newx, newy = apply_rotation(newx, newy)
|
109
|
+
|
104
110
|
utf8_chars = @state.current_font.to_utf8(glyph_code)
|
105
111
|
|
106
112
|
# apply to glyph displacment for the current glyph so the next
|
@@ -115,6 +121,21 @@ module PDF
|
|
115
121
|
end
|
116
122
|
end
|
117
123
|
|
124
|
+
def apply_rotation(x, y)
|
125
|
+
if @page.rotate == 90
|
126
|
+
tmp = x
|
127
|
+
x = y
|
128
|
+
y = tmp * -1
|
129
|
+
elsif @page.rotate == 180
|
130
|
+
y *= -1
|
131
|
+
elsif @page.rotate == 270
|
132
|
+
tmp = x
|
133
|
+
x = y * -1
|
134
|
+
y = tmp * -1
|
135
|
+
end
|
136
|
+
return x, y
|
137
|
+
end
|
138
|
+
|
118
139
|
end
|
119
140
|
end
|
120
141
|
end
|
@@ -37,23 +37,15 @@ class PDF::Reader
|
|
37
37
|
def glyph_width(code_point)
|
38
38
|
return 0 if code_point.nil? || code_point < 0
|
39
39
|
|
40
|
-
|
41
|
-
|
42
|
-
|
40
|
+
names = @font.encoding.int_to_name(code_point)
|
41
|
+
metrics = names.map { |name|
|
42
|
+
@metrics.char_metrics[name.to_s]
|
43
|
+
}.compact.first
|
43
44
|
|
44
|
-
|
45
|
-
|
46
|
-
}.compact.first
|
47
|
-
end
|
48
|
-
|
49
|
-
if m
|
50
|
-
m[:wx]
|
51
|
-
elsif @font.widths[code_point - 1]
|
52
|
-
@font.widths[code_point - 1]
|
53
|
-
elsif control_character?(code_point)
|
54
|
-
0
|
45
|
+
if metrics
|
46
|
+
metrics[:wx]
|
55
47
|
else
|
56
|
-
0
|
48
|
+
@font.widths[code_point - 1] || 0
|
57
49
|
end
|
58
50
|
end
|
59
51
|
|
metadata
CHANGED
@@ -1,29 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdf-reader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Healy
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-06-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - "<"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '0'
|
19
|
+
version: '13.0'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - "
|
24
|
+
- - "<"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '0'
|
26
|
+
version: '13.0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rspec
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -67,7 +67,7 @@ dependencies:
|
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0.2'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
|
-
name:
|
70
|
+
name: pry
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
73
|
- - ">="
|
@@ -100,14 +100,14 @@ dependencies:
|
|
100
100
|
requirements:
|
101
101
|
- - "~>"
|
102
102
|
- !ruby/object:Gem::Version
|
103
|
-
version: 1.0
|
103
|
+
version: '1.0'
|
104
104
|
type: :runtime
|
105
105
|
prerelease: false
|
106
106
|
version_requirements: !ruby/object:Gem::Requirement
|
107
107
|
requirements:
|
108
108
|
- - "~>"
|
109
109
|
- !ruby/object:Gem::Version
|
110
|
-
version: 1.0
|
110
|
+
version: '1.0'
|
111
111
|
- !ruby/object:Gem::Dependency
|
112
112
|
name: ruby-rc4
|
113
113
|
requirement: !ruby/object:Gem::Requirement
|
@@ -208,6 +208,7 @@ files:
|
|
208
208
|
- lib/pdf/reader/afm/Helvetica-BoldOblique.afm
|
209
209
|
- lib/pdf/reader/afm/Helvetica-Oblique.afm
|
210
210
|
- lib/pdf/reader/afm/Helvetica.afm
|
211
|
+
- lib/pdf/reader/afm/MustRead.html
|
211
212
|
- lib/pdf/reader/afm/Symbol.afm
|
212
213
|
- lib/pdf/reader/afm/Times-Bold.afm
|
213
214
|
- lib/pdf/reader/afm/Times-BoldItalic.afm
|
@@ -276,10 +277,10 @@ licenses:
|
|
276
277
|
- MIT
|
277
278
|
metadata:
|
278
279
|
bug_tracker_uri: https://github.com/yob/pdf-reader/issues
|
279
|
-
changelog_uri: https://github.com/yob/pdf-reader/blob/v2.
|
280
|
-
documentation_uri: https://www.rubydoc.info/gems/pdf-reader/2.
|
281
|
-
source_code_uri: https://github.com/yob/pdf-reader/tree/v2.
|
282
|
-
post_install_message:
|
280
|
+
changelog_uri: https://github.com/yob/pdf-reader/blob/v2.5.0/CHANGELOG
|
281
|
+
documentation_uri: https://www.rubydoc.info/gems/pdf-reader/2.5.0
|
282
|
+
source_code_uri: https://github.com/yob/pdf-reader/tree/v2.5.0
|
283
|
+
post_install_message:
|
283
284
|
rdoc_options:
|
284
285
|
- "--title"
|
285
286
|
- PDF::Reader Documentation
|
@@ -292,15 +293,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
292
293
|
requirements:
|
293
294
|
- - ">="
|
294
295
|
- !ruby/object:Gem::Version
|
295
|
-
version:
|
296
|
+
version: '2.0'
|
296
297
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
297
298
|
requirements:
|
298
299
|
- - ">="
|
299
300
|
- !ruby/object:Gem::Version
|
300
301
|
version: '0'
|
301
302
|
requirements: []
|
302
|
-
rubygems_version: 3.
|
303
|
-
signing_key:
|
303
|
+
rubygems_version: 3.2.3
|
304
|
+
signing_key:
|
304
305
|
specification_version: 4
|
305
306
|
summary: A library for accessing the content of PDF files
|
306
307
|
test_files: []
|