pdf-reader 2.4.0 → 2.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG +3 -0
- data/README.md +1 -1
- data/lib/pdf/reader/afm/Courier-Bold.afm +342 -342
- data/lib/pdf/reader/afm/Courier-BoldOblique.afm +342 -342
- data/lib/pdf/reader/afm/Courier-Oblique.afm +342 -342
- data/lib/pdf/reader/afm/Courier.afm +342 -342
- data/lib/pdf/reader/afm/Helvetica-Bold.afm +2827 -2827
- data/lib/pdf/reader/afm/Helvetica-BoldOblique.afm +2827 -2827
- data/lib/pdf/reader/afm/Helvetica-Oblique.afm +3051 -3051
- data/lib/pdf/reader/afm/Helvetica.afm +3051 -3051
- data/lib/pdf/reader/afm/MustRead.html +19 -0
- data/lib/pdf/reader/afm/Symbol.afm +213 -213
- data/lib/pdf/reader/afm/Times-Bold.afm +2588 -2588
- data/lib/pdf/reader/afm/Times-BoldItalic.afm +2384 -2384
- data/lib/pdf/reader/afm/Times-Italic.afm +2667 -2667
- data/lib/pdf/reader/afm/Times-Roman.afm +2419 -2419
- data/lib/pdf/reader/afm/ZapfDingbats.afm +225 -225
- data/lib/pdf/reader/filter/flate.rb +5 -4
- data/lib/pdf/reader/font.rb +3 -1
- data/lib/pdf/reader/orientation_detector.rb +2 -2
- data/lib/pdf/reader/page.rb +28 -0
- data/lib/pdf/reader/page_layout.rb +6 -4
- data/lib/pdf/reader/page_state.rb +9 -1
- data/lib/pdf/reader/page_text_receiver.rb +4 -1
- metadata +11 -10
data/lib/pdf/reader/font.rb
CHANGED
@@ -131,7 +131,9 @@ class PDF::Reader
|
|
131
131
|
if obj[:ToUnicode]
|
132
132
|
# ToUnicode is optional for Type1 and Type3
|
133
133
|
stream = @ohash.object(obj[:ToUnicode])
|
134
|
-
|
134
|
+
if stream.is_a?(PDF::Reader::Stream)
|
135
|
+
@tounicode = PDF::Reader::CMap.new(stream.unfiltered_data)
|
136
|
+
end
|
135
137
|
end
|
136
138
|
end
|
137
139
|
|
@@ -22,8 +22,8 @@ class PDF::Reader
|
|
22
22
|
def detect_orientation
|
23
23
|
llx,lly,urx,ury = @attributes[:MediaBox]
|
24
24
|
rotation = @attributes[:Rotate].to_i
|
25
|
-
width = urx.to_i - llx.to_i
|
26
|
-
height = ury.to_i - lly.to_i
|
25
|
+
width = (urx.to_i - llx.to_i).abs
|
26
|
+
height = (ury.to_i - lly.to_i).abs
|
27
27
|
if width > height
|
28
28
|
(rotation % 180).zero? ? 'landscape' : 'portrait'
|
29
29
|
else
|
data/lib/pdf/reader/page.rb
CHANGED
@@ -124,6 +124,34 @@ module PDF
|
|
124
124
|
}.join(" ")
|
125
125
|
end
|
126
126
|
|
127
|
+
# returns the angle to rotate the page clockwise. Always 0, 90, 180 or 270
|
128
|
+
#
|
129
|
+
def rotate
|
130
|
+
value = attributes[:Rotate].to_i
|
131
|
+
case value
|
132
|
+
when 0, 90, 180, 270
|
133
|
+
value
|
134
|
+
else
|
135
|
+
0
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
# returns the "boxes" that define the page object.
|
140
|
+
# values are defaulted according to section 7.7.3.3 of the PDF Spec 1.7
|
141
|
+
#
|
142
|
+
def boxes
|
143
|
+
mediabox = attributes[:MediaBox]
|
144
|
+
cropbox = attributes[:Cropbox] || mediabox
|
145
|
+
|
146
|
+
{
|
147
|
+
MediaBox: objects.deref!(mediabox),
|
148
|
+
CropBox: objects.deref!(cropbox),
|
149
|
+
BleedBox: objects.deref!(attributes[:BleedBox] || cropbox),
|
150
|
+
TrimBox: objects.deref!(attributes[:TrimBox] || cropbox),
|
151
|
+
ArtBox: objects.deref!(attributes[:ArtBox] || cropbox)
|
152
|
+
}
|
153
|
+
end
|
154
|
+
|
127
155
|
private
|
128
156
|
|
129
157
|
def root
|
@@ -21,9 +21,11 @@ class PDF::Reader
|
|
21
21
|
@mean_font_size = mean(@runs.map(&:font_size)) || DEFAULT_FONT_SIZE
|
22
22
|
@mean_font_size = DEFAULT_FONT_SIZE if @mean_font_size == 0
|
23
23
|
@mean_glyph_width = mean(@runs.map(&:mean_character_width)) || 0
|
24
|
-
@page_width = mediabox[2] - mediabox[0]
|
25
|
-
@page_height = mediabox[3] - mediabox[1]
|
26
|
-
@x_offset = @runs.map(&:x).sort.first
|
24
|
+
@page_width = (mediabox[2] - mediabox[0]).abs
|
25
|
+
@page_height = (mediabox[3] - mediabox[1]).abs
|
26
|
+
@x_offset = @runs.map(&:x).sort.first || 0
|
27
|
+
lowest_y = @runs.map(&:y).sort.first || 0
|
28
|
+
@y_offset = lowest_y > 0 ? 0 : lowest_y
|
27
29
|
end
|
28
30
|
|
29
31
|
def to_s
|
@@ -32,7 +34,7 @@ class PDF::Reader
|
|
32
34
|
page = row_count.times.map { |i| " " * col_count }
|
33
35
|
@runs.each do |run|
|
34
36
|
x_pos = ((run.x - @x_offset) / col_multiplier).round
|
35
|
-
y_pos = row_count - (run.y / row_multiplier).round
|
37
|
+
y_pos = row_count - ((run.y - @y_offset) / row_multiplier).round
|
36
38
|
if y_pos <= row_count && y_pos >= 0 && x_pos <= col_count && x_pos >= 0
|
37
39
|
local_string_insert(page[y_pos-1], run.text, x_pos)
|
38
40
|
end
|
@@ -30,7 +30,15 @@ class PDF::Reader
|
|
30
30
|
@xobject_stack = [page.xobjects]
|
31
31
|
@cs_stack = [page.color_spaces]
|
32
32
|
@stack = [DEFAULT_GRAPHICS_STATE.dup]
|
33
|
-
|
33
|
+
if page.rotate == 0
|
34
|
+
state[:ctm] = identity_matrix
|
35
|
+
else
|
36
|
+
rotate_cos = Math.cos(page.rotate * (Math::PI/180.0)).round(2)
|
37
|
+
rotate_sin = Math.sin(page.rotate * (Math::PI/180.0)).round(2)
|
38
|
+
state[:ctm] = TransformationMatrix.new(rotate_cos, rotate_sin,
|
39
|
+
rotate_sin * -1, rotate_cos,
|
40
|
+
0, 0)
|
41
|
+
end
|
34
42
|
end
|
35
43
|
|
36
44
|
#####################################################
|
@@ -44,10 +44,13 @@ module PDF
|
|
44
44
|
@content = []
|
45
45
|
@characters = []
|
46
46
|
@mediabox = page.objects.deref(page.attributes[:MediaBox])
|
47
|
+
device_bl = @state.ctm_transform(@mediabox[0], @mediabox[1])
|
48
|
+
device_tr = @state.ctm_transform(@mediabox[2], @mediabox[3])
|
49
|
+
@device_mediabox = [ device_bl.first, device_bl.last, device_tr.first, device_tr.last]
|
47
50
|
end
|
48
51
|
|
49
52
|
def content
|
50
|
-
PageLayout.new(@characters, @
|
53
|
+
PageLayout.new(@characters, @device_mediabox).to_s
|
51
54
|
end
|
52
55
|
|
53
56
|
#####################################################
|
metadata
CHANGED
@@ -1,29 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdf-reader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.4.
|
4
|
+
version: 2.4.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Healy
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-09-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - "<"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '0'
|
19
|
+
version: '13.0'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - "
|
24
|
+
- - "<"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '0'
|
26
|
+
version: '13.0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rspec
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -67,7 +67,7 @@ dependencies:
|
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0.2'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
|
-
name:
|
70
|
+
name: pry
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
73
|
- - ">="
|
@@ -208,6 +208,7 @@ files:
|
|
208
208
|
- lib/pdf/reader/afm/Helvetica-BoldOblique.afm
|
209
209
|
- lib/pdf/reader/afm/Helvetica-Oblique.afm
|
210
210
|
- lib/pdf/reader/afm/Helvetica.afm
|
211
|
+
- lib/pdf/reader/afm/MustRead.html
|
211
212
|
- lib/pdf/reader/afm/Symbol.afm
|
212
213
|
- lib/pdf/reader/afm/Times-Bold.afm
|
213
214
|
- lib/pdf/reader/afm/Times-BoldItalic.afm
|
@@ -276,9 +277,9 @@ licenses:
|
|
276
277
|
- MIT
|
277
278
|
metadata:
|
278
279
|
bug_tracker_uri: https://github.com/yob/pdf-reader/issues
|
279
|
-
changelog_uri: https://github.com/yob/pdf-reader/blob/v2.4.
|
280
|
-
documentation_uri: https://www.rubydoc.info/gems/pdf-reader/2.4.
|
281
|
-
source_code_uri: https://github.com/yob/pdf-reader/tree/v2.4.
|
280
|
+
changelog_uri: https://github.com/yob/pdf-reader/blob/v2.4.1/CHANGELOG
|
281
|
+
documentation_uri: https://www.rubydoc.info/gems/pdf-reader/2.4.1
|
282
|
+
source_code_uri: https://github.com/yob/pdf-reader/tree/v2.4.1
|
282
283
|
post_install_message:
|
283
284
|
rdoc_options:
|
284
285
|
- "--title"
|