pdf-reader 2.4.2 → 2.8.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG +44 -0
- data/README.md +16 -1
- data/Rakefile +1 -1
- data/examples/extract_fonts.rb +12 -7
- data/examples/rspec.rb +1 -0
- data/lib/pdf/reader/bounding_rectangle_runs_filter.rb +16 -0
- data/lib/pdf/reader/buffer.rb +63 -21
- data/lib/pdf/reader/cid_widths.rb +1 -0
- data/lib/pdf/reader/cmap.rb +5 -3
- data/lib/pdf/reader/encoding.rb +3 -2
- data/lib/pdf/reader/error.rb +11 -3
- data/lib/pdf/reader/filter/ascii85.rb +7 -1
- data/lib/pdf/reader/filter/ascii_hex.rb +6 -1
- data/lib/pdf/reader/filter/depredict.rb +10 -8
- data/lib/pdf/reader/filter/flate.rb +4 -2
- data/lib/pdf/reader/filter/lzw.rb +2 -0
- data/lib/pdf/reader/filter/null.rb +1 -0
- data/lib/pdf/reader/filter/run_length.rb +19 -13
- data/lib/pdf/reader/filter.rb +1 -0
- data/lib/pdf/reader/font.rb +44 -0
- data/lib/pdf/reader/font_descriptor.rb +1 -0
- data/lib/pdf/reader/form_xobject.rb +1 -0
- data/lib/pdf/reader/glyph_hash.rb +16 -9
- data/lib/pdf/reader/glyphlist-zapfdingbats.txt +245 -0
- data/lib/pdf/reader/lzw.rb +4 -2
- data/lib/pdf/reader/null_security_handler.rb +1 -0
- data/lib/pdf/reader/object_cache.rb +1 -0
- data/lib/pdf/reader/object_hash.rb +8 -3
- data/lib/pdf/reader/object_stream.rb +1 -0
- data/lib/pdf/reader/overlapping_runs_filter.rb +11 -4
- data/lib/pdf/reader/page.rb +73 -11
- data/lib/pdf/reader/page_layout.rb +37 -37
- data/lib/pdf/reader/page_state.rb +18 -23
- data/lib/pdf/reader/page_text_receiver.rb +68 -6
- data/lib/pdf/reader/pages_strategy.rb +1 -0
- data/lib/pdf/reader/parser.rb +15 -7
- data/lib/pdf/reader/point.rb +25 -0
- data/lib/pdf/reader/print_receiver.rb +1 -0
- data/lib/pdf/reader/rectangle.rb +113 -0
- data/lib/pdf/reader/reference.rb +1 -0
- data/lib/pdf/reader/register_receiver.rb +1 -0
- data/lib/pdf/reader/resource_methods.rb +5 -0
- data/lib/pdf/reader/standard_security_handler.rb +1 -0
- data/lib/pdf/reader/standard_security_handler_v5.rb +1 -0
- data/lib/pdf/reader/stream.rb +1 -0
- data/lib/pdf/reader/synchronized_cache.rb +1 -0
- data/lib/pdf/reader/text_run.rb +14 -6
- data/lib/pdf/reader/token.rb +1 -0
- data/lib/pdf/reader/transformation_matrix.rb +1 -0
- data/lib/pdf/reader/unimplemented_security_handler.rb +1 -0
- data/lib/pdf/reader/width_calculator/built_in.rb +8 -15
- data/lib/pdf/reader/width_calculator/composite.rb +1 -0
- data/lib/pdf/reader/width_calculator/true_type.rb +1 -0
- data/lib/pdf/reader/width_calculator/type_one_or_three.rb +1 -0
- data/lib/pdf/reader/width_calculator/type_zero.rb +1 -0
- data/lib/pdf/reader/width_calculator.rb +1 -0
- data/lib/pdf/reader/xref.rb +7 -1
- data/lib/pdf/reader/zero_width_runs_filter.rb +13 -0
- data/lib/pdf/reader.rb +29 -6
- data/lib/pdf-reader.rb +1 -0
- data/rbi/pdf-reader.rbi +1763 -0
- metadata +12 -7
- data/lib/pdf/reader/orientation_detector.rb +0 -34
data/lib/pdf/reader/font.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: true
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
################################################################################
|
@@ -42,6 +43,7 @@ class PDF::Reader
|
|
42
43
|
@tounicode = nil
|
43
44
|
|
44
45
|
extract_base_info(obj)
|
46
|
+
extract_type3_info(obj)
|
45
47
|
extract_descriptor(obj)
|
46
48
|
extract_descendants(obj)
|
47
49
|
@width_calc = build_width_calculator
|
@@ -72,8 +74,44 @@ class PDF::Reader
|
|
72
74
|
@cached_widths[code_point] ||= @width_calc.glyph_width(code_point)
|
73
75
|
end
|
74
76
|
|
77
|
+
# In most cases glyph width is converted into text space with a simple divide by 1000.
|
78
|
+
#
|
79
|
+
# However, Type3 fonts provide their own FontMatrix that's used for the transformation.
|
80
|
+
#
|
81
|
+
def glyph_width_in_text_space(code_point)
|
82
|
+
glyph_width_in_glyph_space = glyph_width(code_point)
|
83
|
+
|
84
|
+
if @subtype == :Type3
|
85
|
+
x1, y1 = font_matrix_transform(0,0)
|
86
|
+
x2, y2 = font_matrix_transform(glyph_width_in_glyph_space, 0)
|
87
|
+
(x2 - x1).abs.round(2)
|
88
|
+
else
|
89
|
+
glyph_width_in_glyph_space / 1000.0
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
75
93
|
private
|
76
94
|
|
95
|
+
# Only valid for Type3 fonts
|
96
|
+
def font_matrix_transform(x, y)
|
97
|
+
return x, y if @font_matrix.nil?
|
98
|
+
|
99
|
+
matrix = TransformationMatrix.new(
|
100
|
+
@font_matrix[0], @font_matrix[1],
|
101
|
+
@font_matrix[2], @font_matrix[3],
|
102
|
+
@font_matrix[4], @font_matrix[5],
|
103
|
+
)
|
104
|
+
|
105
|
+
if x == 0 && y == 0
|
106
|
+
[matrix.e, matrix.f]
|
107
|
+
else
|
108
|
+
[
|
109
|
+
(matrix.a * x) + (matrix.c * y) + (matrix.e),
|
110
|
+
(matrix.b * x) + (matrix.d * y) + (matrix.f)
|
111
|
+
]
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
77
115
|
def default_encoding(font_name)
|
78
116
|
case font_name.to_s
|
79
117
|
when "Symbol" then
|
@@ -137,6 +175,12 @@ class PDF::Reader
|
|
137
175
|
end
|
138
176
|
end
|
139
177
|
|
178
|
+
def extract_type3_info(obj)
|
179
|
+
if @subtype == :Type3
|
180
|
+
@font_matrix = @ohash.object(obj[:FontMatrix]) || [ 0.001, 0, 0, 0.001, 0, 0 ]
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
140
184
|
def extract_descriptor(obj)
|
141
185
|
if obj[:FontDescriptor]
|
142
186
|
# create a font descriptor object if we can, in other words, unless this is
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: true
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
################################################################################
|
@@ -103,19 +104,25 @@ class PDF::Reader
|
|
103
104
|
|
104
105
|
# returns a hash that maps glyph names to unicode codepoints. The mapping is based on
|
105
106
|
# a text file supplied by Adobe at:
|
106
|
-
#
|
107
|
+
# https://github.com/adobe-type-tools/agl-aglfn
|
107
108
|
def load_adobe_glyph_mapping
|
108
109
|
keyed_by_name = {}
|
109
110
|
keyed_by_codepoint = {}
|
110
111
|
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
112
|
+
paths = [
|
113
|
+
File.dirname(__FILE__) + "/glyphlist.txt",
|
114
|
+
File.dirname(__FILE__) + "/glyphlist-zapfdingbats.txt",
|
115
|
+
]
|
116
|
+
paths.each do |path|
|
117
|
+
File.open(path, "r:BINARY") do |f|
|
118
|
+
f.each do |l|
|
119
|
+
_m, name, code = *l.match(/([0-9A-Za-z]+);([0-9A-F]{4})/)
|
120
|
+
if name && code
|
121
|
+
cp = "0x#{code}".hex
|
122
|
+
keyed_by_name[name.to_sym] = cp
|
123
|
+
keyed_by_codepoint[cp] ||= []
|
124
|
+
keyed_by_codepoint[cp] << name.to_sym
|
125
|
+
end
|
119
126
|
end
|
120
127
|
end
|
121
128
|
end
|
@@ -0,0 +1,245 @@
|
|
1
|
+
# -----------------------------------------------------------
|
2
|
+
# Copyright 2002-2019 Adobe (http://www.adobe.com/).
|
3
|
+
#
|
4
|
+
# Redistribution and use in source and binary forms, with or
|
5
|
+
# without modification, are permitted provided that the
|
6
|
+
# following conditions are met:
|
7
|
+
#
|
8
|
+
# Redistributions of source code must retain the above
|
9
|
+
# copyright notice, this list of conditions and the following
|
10
|
+
# disclaimer.
|
11
|
+
#
|
12
|
+
# Redistributions in binary form must reproduce the above
|
13
|
+
# copyright notice, this list of conditions and the following
|
14
|
+
# disclaimer in the documentation and/or other materials
|
15
|
+
# provided with the distribution.
|
16
|
+
#
|
17
|
+
# Neither the name of Adobe nor the names of its contributors
|
18
|
+
# may be used to endorse or promote products derived from this
|
19
|
+
# software without specific prior written permission.
|
20
|
+
#
|
21
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
|
22
|
+
# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
|
23
|
+
# INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
24
|
+
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
25
|
+
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
|
26
|
+
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
27
|
+
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
28
|
+
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
29
|
+
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
30
|
+
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
31
|
+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
32
|
+
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
33
|
+
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
34
|
+
# -----------------------------------------------------------
|
35
|
+
# Name: ITC Zapf Dingbats Glyph List
|
36
|
+
# Table version: 2.0
|
37
|
+
# Date: September 20, 2002
|
38
|
+
# URL: https://github.com/adobe-type-tools/agl-aglfn
|
39
|
+
#
|
40
|
+
# Format: two semicolon-delimited fields:
|
41
|
+
# (1) glyph name--upper/lowercase letters and digits
|
42
|
+
# (2) Unicode scalar value--four uppercase hexadecimal digits
|
43
|
+
#
|
44
|
+
a100;275E
|
45
|
+
a101;2761
|
46
|
+
a102;2762
|
47
|
+
a103;2763
|
48
|
+
a104;2764
|
49
|
+
a105;2710
|
50
|
+
a106;2765
|
51
|
+
a107;2766
|
52
|
+
a108;2767
|
53
|
+
a109;2660
|
54
|
+
a10;2721
|
55
|
+
a110;2665
|
56
|
+
a111;2666
|
57
|
+
a112;2663
|
58
|
+
a117;2709
|
59
|
+
a118;2708
|
60
|
+
a119;2707
|
61
|
+
a11;261B
|
62
|
+
a120;2460
|
63
|
+
a121;2461
|
64
|
+
a122;2462
|
65
|
+
a123;2463
|
66
|
+
a124;2464
|
67
|
+
a125;2465
|
68
|
+
a126;2466
|
69
|
+
a127;2467
|
70
|
+
a128;2468
|
71
|
+
a129;2469
|
72
|
+
a12;261E
|
73
|
+
a130;2776
|
74
|
+
a131;2777
|
75
|
+
a132;2778
|
76
|
+
a133;2779
|
77
|
+
a134;277A
|
78
|
+
a135;277B
|
79
|
+
a136;277C
|
80
|
+
a137;277D
|
81
|
+
a138;277E
|
82
|
+
a139;277F
|
83
|
+
a13;270C
|
84
|
+
a140;2780
|
85
|
+
a141;2781
|
86
|
+
a142;2782
|
87
|
+
a143;2783
|
88
|
+
a144;2784
|
89
|
+
a145;2785
|
90
|
+
a146;2786
|
91
|
+
a147;2787
|
92
|
+
a148;2788
|
93
|
+
a149;2789
|
94
|
+
a14;270D
|
95
|
+
a150;278A
|
96
|
+
a151;278B
|
97
|
+
a152;278C
|
98
|
+
a153;278D
|
99
|
+
a154;278E
|
100
|
+
a155;278F
|
101
|
+
a156;2790
|
102
|
+
a157;2791
|
103
|
+
a158;2792
|
104
|
+
a159;2793
|
105
|
+
a15;270E
|
106
|
+
a160;2794
|
107
|
+
a161;2192
|
108
|
+
a162;27A3
|
109
|
+
a163;2194
|
110
|
+
a164;2195
|
111
|
+
a165;2799
|
112
|
+
a166;279B
|
113
|
+
a167;279C
|
114
|
+
a168;279D
|
115
|
+
a169;279E
|
116
|
+
a16;270F
|
117
|
+
a170;279F
|
118
|
+
a171;27A0
|
119
|
+
a172;27A1
|
120
|
+
a173;27A2
|
121
|
+
a174;27A4
|
122
|
+
a175;27A5
|
123
|
+
a176;27A6
|
124
|
+
a177;27A7
|
125
|
+
a178;27A8
|
126
|
+
a179;27A9
|
127
|
+
a17;2711
|
128
|
+
a180;27AB
|
129
|
+
a181;27AD
|
130
|
+
a182;27AF
|
131
|
+
a183;27B2
|
132
|
+
a184;27B3
|
133
|
+
a185;27B5
|
134
|
+
a186;27B8
|
135
|
+
a187;27BA
|
136
|
+
a188;27BB
|
137
|
+
a189;27BC
|
138
|
+
a18;2712
|
139
|
+
a190;27BD
|
140
|
+
a191;27BE
|
141
|
+
a192;279A
|
142
|
+
a193;27AA
|
143
|
+
a194;27B6
|
144
|
+
a195;27B9
|
145
|
+
a196;2798
|
146
|
+
a197;27B4
|
147
|
+
a198;27B7
|
148
|
+
a199;27AC
|
149
|
+
a19;2713
|
150
|
+
a1;2701
|
151
|
+
a200;27AE
|
152
|
+
a201;27B1
|
153
|
+
a202;2703
|
154
|
+
a203;2750
|
155
|
+
a204;2752
|
156
|
+
a205;276E
|
157
|
+
a206;2770
|
158
|
+
a20;2714
|
159
|
+
a21;2715
|
160
|
+
a22;2716
|
161
|
+
a23;2717
|
162
|
+
a24;2718
|
163
|
+
a25;2719
|
164
|
+
a26;271A
|
165
|
+
a27;271B
|
166
|
+
a28;271C
|
167
|
+
a29;2722
|
168
|
+
a2;2702
|
169
|
+
a30;2723
|
170
|
+
a31;2724
|
171
|
+
a32;2725
|
172
|
+
a33;2726
|
173
|
+
a34;2727
|
174
|
+
a35;2605
|
175
|
+
a36;2729
|
176
|
+
a37;272A
|
177
|
+
a38;272B
|
178
|
+
a39;272C
|
179
|
+
a3;2704
|
180
|
+
a40;272D
|
181
|
+
a41;272E
|
182
|
+
a42;272F
|
183
|
+
a43;2730
|
184
|
+
a44;2731
|
185
|
+
a45;2732
|
186
|
+
a46;2733
|
187
|
+
a47;2734
|
188
|
+
a48;2735
|
189
|
+
a49;2736
|
190
|
+
a4;260E
|
191
|
+
a50;2737
|
192
|
+
a51;2738
|
193
|
+
a52;2739
|
194
|
+
a53;273A
|
195
|
+
a54;273B
|
196
|
+
a55;273C
|
197
|
+
a56;273D
|
198
|
+
a57;273E
|
199
|
+
a58;273F
|
200
|
+
a59;2740
|
201
|
+
a5;2706
|
202
|
+
a60;2741
|
203
|
+
a61;2742
|
204
|
+
a62;2743
|
205
|
+
a63;2744
|
206
|
+
a64;2745
|
207
|
+
a65;2746
|
208
|
+
a66;2747
|
209
|
+
a67;2748
|
210
|
+
a68;2749
|
211
|
+
a69;274A
|
212
|
+
a6;271D
|
213
|
+
a70;274B
|
214
|
+
a71;25CF
|
215
|
+
a72;274D
|
216
|
+
a73;25A0
|
217
|
+
a74;274F
|
218
|
+
a75;2751
|
219
|
+
a76;25B2
|
220
|
+
a77;25BC
|
221
|
+
a78;25C6
|
222
|
+
a79;2756
|
223
|
+
a7;271E
|
224
|
+
a81;25D7
|
225
|
+
a82;2758
|
226
|
+
a83;2759
|
227
|
+
a84;275A
|
228
|
+
a85;276F
|
229
|
+
a86;2771
|
230
|
+
a87;2772
|
231
|
+
a88;2773
|
232
|
+
a89;2768
|
233
|
+
a8;271F
|
234
|
+
a90;2769
|
235
|
+
a91;276C
|
236
|
+
a92;276D
|
237
|
+
a93;276A
|
238
|
+
a94;276B
|
239
|
+
a95;2774
|
240
|
+
a96;2775
|
241
|
+
a97;275B
|
242
|
+
a98;275C
|
243
|
+
a99;275D
|
244
|
+
a9;2720
|
245
|
+
# END
|
data/lib/pdf/reader/lzw.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: true
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
module PDF
|
@@ -35,9 +36,9 @@ module PDF
|
|
35
36
|
|
36
37
|
def read
|
37
38
|
bits_left_in_chunk = @bits_in_chunk
|
38
|
-
chunk =
|
39
|
+
chunk = -1
|
39
40
|
while bits_left_in_chunk > 0 and @current_pos < @data.size
|
40
|
-
chunk = 0 if chunk
|
41
|
+
chunk = 0 if chunk < 0
|
41
42
|
codepoint = @data[@current_pos, 1].unpack("C*")[0]
|
42
43
|
current_byte = codepoint & (2**@bits_left_in_byte - 1) #clear consumed bits
|
43
44
|
dif = bits_left_in_chunk - @bits_left_in_byte
|
@@ -83,6 +84,7 @@ module PDF
|
|
83
84
|
#
|
84
85
|
def self.decode(data)
|
85
86
|
stream = BitStream.new data.to_s, 9 # size of codes between 9 and 12 bits
|
87
|
+
string_table = StringTable.new
|
86
88
|
result = "".dup
|
87
89
|
until (code = stream.read) == CODE_EOD
|
88
90
|
if code == CODE_CLEAR_TABLE
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: true
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
class PDF::Reader
|
@@ -331,11 +332,15 @@ class PDF::Reader
|
|
331
332
|
def decrypt(ref, obj)
|
332
333
|
case obj
|
333
334
|
when PDF::Reader::Stream then
|
334
|
-
|
335
|
+
# PDF 32000-1:2008 7.5.8.2: "The cross-reference stream shall not be encrypted [...]."
|
336
|
+
# Therefore we shouldn't try to decrypt it.
|
337
|
+
obj.data = sec_handler.decrypt(obj.data, ref) unless obj.hash[:Type] == :XRef
|
335
338
|
obj
|
336
339
|
when Hash then
|
337
|
-
arr = obj.map { |key,val| [key, decrypt(ref, val)] }
|
338
|
-
|
340
|
+
arr = obj.map { |key,val| [key, decrypt(ref, val)] }
|
341
|
+
arr.each_with_object({}) { |(k,v), accum|
|
342
|
+
accum[k] = v
|
343
|
+
}
|
339
344
|
when Array then
|
340
345
|
obj.collect { |item| decrypt(ref, item) }
|
341
346
|
when String
|
@@ -1,4 +1,6 @@
|
|
1
|
+
# typed: true
|
1
2
|
# coding: utf-8
|
3
|
+
# frozen_string_literal: true
|
2
4
|
|
3
5
|
class PDF::Reader
|
4
6
|
# remove duplicates from a collection of TextRun objects. This can be helpful when a PDF
|
@@ -38,7 +40,8 @@ class PDF::Reader
|
|
38
40
|
|
39
41
|
def self.detect_intersection(sweep_line_status, event_point)
|
40
42
|
sweep_line_status.each do |open_text_run|
|
41
|
-
if
|
43
|
+
if open_text_run.text == event_point.run.text &&
|
44
|
+
event_point.x >= open_text_run.x &&
|
42
45
|
event_point.x <= open_text_run.endx &&
|
43
46
|
open_text_run.intersection_area_percent(event_point.run) >= OVERLAPPING_THRESHOLD
|
44
47
|
return true
|
@@ -51,10 +54,14 @@ class PDF::Reader
|
|
51
54
|
# Utility class used to avoid modifying the underlying TextRun objects while we're
|
52
55
|
# looking for duplicates
|
53
56
|
class EventPoint
|
54
|
-
attr_reader :x, :run
|
55
57
|
|
56
|
-
|
57
|
-
|
58
|
+
attr_reader :x
|
59
|
+
|
60
|
+
attr_reader :run
|
61
|
+
|
62
|
+
def initialize(x, run)
|
63
|
+
@x = x
|
64
|
+
@run = run
|
58
65
|
end
|
59
66
|
|
60
67
|
def start?
|
data/lib/pdf/reader/page.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: true
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
module PDF
|
@@ -68,22 +69,56 @@ module PDF
|
|
68
69
|
@attributes
|
69
70
|
end
|
70
71
|
|
72
|
+
def height
|
73
|
+
rect = Rectangle.new(*attributes[:MediaBox])
|
74
|
+
rect.apply_rotation(rotate) if rotate > 0
|
75
|
+
rect.height
|
76
|
+
end
|
77
|
+
|
78
|
+
def width
|
79
|
+
rect = Rectangle.new(*attributes[:MediaBox])
|
80
|
+
rect.apply_rotation(rotate) if rotate > 0
|
81
|
+
rect.width
|
82
|
+
end
|
83
|
+
|
84
|
+
def origin
|
85
|
+
rect = Rectangle.new(*attributes[:MediaBox])
|
86
|
+
rect.apply_rotation(rotate) if rotate > 0
|
87
|
+
|
88
|
+
rect.bottom_left
|
89
|
+
end
|
90
|
+
|
71
91
|
# Convenience method to identify the page's orientation.
|
72
92
|
#
|
73
93
|
def orientation
|
74
|
-
|
94
|
+
if height > width
|
95
|
+
"portrait"
|
96
|
+
else
|
97
|
+
"landscape"
|
98
|
+
end
|
75
99
|
end
|
76
100
|
|
77
101
|
# returns the plain text content of this page encoded as UTF-8. Any
|
78
102
|
# characters that can't be translated will be returned as a ▯
|
79
103
|
#
|
80
|
-
def text
|
104
|
+
def text(opts = {})
|
81
105
|
receiver = PageTextReceiver.new
|
82
106
|
walk(receiver)
|
83
|
-
receiver.
|
107
|
+
runs = receiver.runs(opts)
|
108
|
+
|
109
|
+
# rectangles[:MediaBox] can never be nil, but I have no easy way to tell sorbet that atm
|
110
|
+
mediabox = rectangles[:MediaBox] || Rectangle.new(0, 0, 0, 0)
|
111
|
+
|
112
|
+
PageLayout.new(runs, mediabox).to_s
|
84
113
|
end
|
85
114
|
alias :to_s :text
|
86
115
|
|
116
|
+
def runs(opts = {})
|
117
|
+
receiver = PageTextReceiver.new
|
118
|
+
walk(receiver)
|
119
|
+
receiver.runs(opts)
|
120
|
+
end
|
121
|
+
|
87
122
|
# processes the raw content stream for this page in sequential order and
|
88
123
|
# passes callbacks to the receiver objects.
|
89
124
|
#
|
@@ -139,23 +174,50 @@ module PDF
|
|
139
174
|
# returns the "boxes" that define the page object.
|
140
175
|
# values are defaulted according to section 7.7.3.3 of the PDF Spec 1.7
|
141
176
|
#
|
177
|
+
# DEPRECATED. Recommend using Page#rectangles instead
|
178
|
+
#
|
142
179
|
def boxes
|
143
|
-
|
144
|
-
|
180
|
+
# In ruby 2.4+ we could use Hash#transform_values
|
181
|
+
Hash[rectangles.map{ |k,rect| [k,rect.to_a] } ]
|
182
|
+
end
|
183
|
+
|
184
|
+
# returns the "boxes" that define the page object.
|
185
|
+
# values are defaulted according to section 7.7.3.3 of the PDF Spec 1.7
|
186
|
+
#
|
187
|
+
def rectangles
|
188
|
+
mediabox = objects.deref!(attributes[:MediaBox])
|
189
|
+
cropbox = objects.deref!(attributes[:Cropbox]) || mediabox
|
190
|
+
bleedbox = objects.deref!(attributes[:BleedBox]) || cropbox
|
191
|
+
trimbox = objects.deref!(attributes[:TrimBox]) || cropbox
|
192
|
+
artbox = objects.deref!(attributes[:ArtBox]) || cropbox
|
193
|
+
|
194
|
+
mediarect = Rectangle.new(*mediabox)
|
195
|
+
croprect = Rectangle.new(*cropbox)
|
196
|
+
bleedrect = Rectangle.new(*bleedbox)
|
197
|
+
trimrect = Rectangle.new(*trimbox)
|
198
|
+
artrect = Rectangle.new(*artbox)
|
199
|
+
|
200
|
+
if rotate > 0
|
201
|
+
mediarect.apply_rotation(rotate)
|
202
|
+
croprect.apply_rotation(rotate)
|
203
|
+
bleedrect.apply_rotation(rotate)
|
204
|
+
trimrect.apply_rotation(rotate)
|
205
|
+
artrect.apply_rotation(rotate)
|
206
|
+
end
|
145
207
|
|
146
208
|
{
|
147
|
-
MediaBox:
|
148
|
-
CropBox:
|
149
|
-
BleedBox:
|
150
|
-
TrimBox:
|
151
|
-
ArtBox:
|
209
|
+
MediaBox: mediarect,
|
210
|
+
CropBox: croprect,
|
211
|
+
BleedBox: bleedrect,
|
212
|
+
TrimBox: trimrect,
|
213
|
+
ArtBox: artrect,
|
152
214
|
}
|
153
215
|
end
|
154
216
|
|
155
217
|
private
|
156
218
|
|
157
219
|
def root
|
158
|
-
|
220
|
+
@root ||= objects.deref(@objects.trailer[:Root])
|
159
221
|
end
|
160
222
|
|
161
223
|
# Returns the resources that accompany this page. Includes
|