pdf-reader 2.6.0 → 2.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG +30 -1
- data/Rakefile +1 -1
- data/examples/rspec.rb +1 -0
- data/lib/pdf/reader/aes_v2_security_handler.rb +41 -0
- data/lib/pdf/reader/aes_v3_security_handler.rb +38 -0
- data/lib/pdf/reader/bounding_rectangle_runs_filter.rb +16 -0
- data/lib/pdf/reader/buffer.rb +36 -33
- data/lib/pdf/reader/cid_widths.rb +1 -0
- data/lib/pdf/reader/cmap.rb +65 -50
- data/lib/pdf/reader/encoding.rb +2 -1
- data/lib/pdf/reader/error.rb +16 -0
- data/lib/pdf/reader/filter/ascii85.rb +2 -0
- data/lib/pdf/reader/filter/ascii_hex.rb +6 -1
- data/lib/pdf/reader/filter/depredict.rb +8 -6
- data/lib/pdf/reader/filter/flate.rb +4 -2
- data/lib/pdf/reader/filter/lzw.rb +2 -0
- data/lib/pdf/reader/filter/null.rb +1 -1
- data/lib/pdf/reader/filter/run_length.rb +19 -13
- data/lib/pdf/reader/filter.rb +11 -11
- data/lib/pdf/reader/font.rb +72 -16
- data/lib/pdf/reader/font_descriptor.rb +19 -17
- data/lib/pdf/reader/form_xobject.rb +15 -5
- data/lib/pdf/reader/glyph_hash.rb +1 -0
- data/lib/pdf/reader/key_builder_v5.rb +138 -0
- data/lib/pdf/reader/lzw.rb +4 -2
- data/lib/pdf/reader/null_security_handler.rb +1 -4
- data/lib/pdf/reader/object_cache.rb +1 -0
- data/lib/pdf/reader/object_hash.rb +252 -44
- data/lib/pdf/reader/object_stream.rb +1 -0
- data/lib/pdf/reader/overlapping_runs_filter.rb +11 -4
- data/lib/pdf/reader/page.rb +99 -19
- data/lib/pdf/reader/page_layout.rb +28 -32
- data/lib/pdf/reader/page_state.rb +12 -11
- data/lib/pdf/reader/page_text_receiver.rb +57 -10
- data/lib/pdf/reader/pages_strategy.rb +1 -0
- data/lib/pdf/reader/parser.rb +26 -8
- data/lib/pdf/reader/point.rb +25 -0
- data/lib/pdf/reader/print_receiver.rb +1 -0
- data/lib/pdf/reader/rc4_security_handler.rb +38 -0
- data/lib/pdf/reader/rectangle.rb +113 -0
- data/lib/pdf/reader/reference.rb +1 -0
- data/lib/pdf/reader/register_receiver.rb +1 -0
- data/lib/pdf/reader/{resource_methods.rb → resources.rb} +16 -9
- data/lib/pdf/reader/security_handler_factory.rb +79 -0
- data/lib/pdf/reader/{standard_security_handler.rb → standard_key_builder.rb} +23 -94
- data/lib/pdf/reader/stream.rb +2 -1
- data/lib/pdf/reader/synchronized_cache.rb +1 -0
- data/lib/pdf/reader/text_run.rb +14 -6
- data/lib/pdf/reader/token.rb +1 -0
- data/lib/pdf/reader/transformation_matrix.rb +1 -0
- data/lib/pdf/reader/type_check.rb +52 -0
- data/lib/pdf/reader/unimplemented_security_handler.rb +1 -0
- data/lib/pdf/reader/validating_receiver.rb +262 -0
- data/lib/pdf/reader/width_calculator/built_in.rb +1 -0
- data/lib/pdf/reader/width_calculator/composite.rb +1 -0
- data/lib/pdf/reader/width_calculator/true_type.rb +2 -1
- data/lib/pdf/reader/width_calculator/type_one_or_three.rb +1 -0
- data/lib/pdf/reader/width_calculator/type_zero.rb +1 -0
- data/lib/pdf/reader/width_calculator.rb +1 -0
- data/lib/pdf/reader/xref.rb +21 -3
- data/lib/pdf/reader/zero_width_runs_filter.rb +2 -0
- data/lib/pdf/reader.rb +46 -15
- data/lib/pdf-reader.rb +1 -0
- data/rbi/pdf-reader.rbi +1978 -0
- metadata +22 -13
- data/lib/pdf/reader/orientation_detector.rb +0 -34
- data/lib/pdf/reader/standard_security_handler_v5.rb +0 -91
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: true
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
require 'pdf/reader/overlapping_runs_filter'
|
@@ -16,16 +17,15 @@ class PDF::Reader
|
|
16
17
|
DEFAULT_FONT_SIZE = 12
|
17
18
|
|
18
19
|
def initialize(runs, mediabox)
|
19
|
-
|
20
|
+
# mediabox is a 4-element array for now, but it'd be nice to switch to a
|
21
|
+
# PDF::Reader::Rectangle at some point
|
22
|
+
PDF::Reader::Error.validate_not_nil(mediabox, "mediabox")
|
20
23
|
|
21
|
-
|
22
|
-
runs =
|
23
|
-
@runs = merge_runs(runs)
|
24
|
+
@mediabox = process_mediabox(mediabox)
|
25
|
+
@runs = runs
|
24
26
|
@mean_font_size = mean(@runs.map(&:font_size)) || DEFAULT_FONT_SIZE
|
25
27
|
@mean_font_size = DEFAULT_FONT_SIZE if @mean_font_size == 0
|
26
28
|
@median_glyph_width = median(@runs.map(&:mean_character_width)) || 0
|
27
|
-
@page_width = (mediabox[2] - mediabox[0]).abs
|
28
|
-
@page_height = (mediabox[3] - mediabox[1]).abs
|
29
29
|
@x_offset = @runs.map(&:x).sort.first || 0
|
30
30
|
lowest_y = @runs.map(&:y).sort.first || 0
|
31
31
|
@y_offset = lowest_y > 0 ? 0 : lowest_y
|
@@ -48,6 +48,14 @@ class PDF::Reader
|
|
48
48
|
|
49
49
|
private
|
50
50
|
|
51
|
+
def page_width
|
52
|
+
@mediabox.width
|
53
|
+
end
|
54
|
+
|
55
|
+
def page_height
|
56
|
+
@mediabox.height
|
57
|
+
end
|
58
|
+
|
51
59
|
# given an array of strings, return a new array with empty rows from the
|
52
60
|
# beginning and end removed.
|
53
61
|
#
|
@@ -66,19 +74,19 @@ class PDF::Reader
|
|
66
74
|
end
|
67
75
|
|
68
76
|
def row_count
|
69
|
-
@row_count ||= (
|
77
|
+
@row_count ||= (page_height / @mean_font_size).floor
|
70
78
|
end
|
71
79
|
|
72
80
|
def col_count
|
73
|
-
@col_count ||= ((
|
81
|
+
@col_count ||= ((page_width / @median_glyph_width) * 1.05).floor
|
74
82
|
end
|
75
83
|
|
76
84
|
def row_multiplier
|
77
|
-
@row_multiplier ||=
|
85
|
+
@row_multiplier ||= page_height.to_f / row_count.to_f
|
78
86
|
end
|
79
87
|
|
80
88
|
def col_multiplier
|
81
|
-
@col_multiplier ||=
|
89
|
+
@col_multiplier ||= page_width.to_f / col_count.to_f
|
82
90
|
end
|
83
91
|
|
84
92
|
def mean(collection)
|
@@ -97,32 +105,20 @@ class PDF::Reader
|
|
97
105
|
end
|
98
106
|
end
|
99
107
|
|
100
|
-
|
101
|
-
|
102
|
-
def merge_runs(runs)
|
103
|
-
runs.group_by { |char|
|
104
|
-
char.y.to_i
|
105
|
-
}.map { |y, chars|
|
106
|
-
group_chars_into_runs(chars.sort)
|
107
|
-
}.flatten.sort
|
108
|
+
def local_string_insert(haystack, needle, index)
|
109
|
+
haystack[Range.new(index, index + needle.length - 1)] = String.new(needle)
|
108
110
|
end
|
109
111
|
|
110
|
-
def
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
runs << head
|
119
|
-
end
|
112
|
+
def process_mediabox(mediabox)
|
113
|
+
if mediabox.is_a?(Array)
|
114
|
+
msg = "Passing the mediabox to PageLayout as an Array is deprecated," +
|
115
|
+
" please use a Rectangle instead"
|
116
|
+
$stderr.puts msg
|
117
|
+
PDF::Reader::Rectangle.from_array(mediabox)
|
118
|
+
else
|
119
|
+
mediabox
|
120
120
|
end
|
121
|
-
runs
|
122
121
|
end
|
123
122
|
|
124
|
-
def local_string_insert(haystack, needle, index)
|
125
|
-
haystack[Range.new(index, index + needle.length - 1)] = String.new(needle)
|
126
|
-
end
|
127
123
|
end
|
128
124
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: true
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
require 'pdf/reader/transformation_matrix'
|
@@ -312,7 +313,7 @@ class PDF::Reader
|
|
312
313
|
# may need to be added
|
313
314
|
#
|
314
315
|
def process_glyph_displacement(w0, tj, word_boundary)
|
315
|
-
fs =
|
316
|
+
fs = state[:text_font_size]
|
316
317
|
tc = state[:char_spacing]
|
317
318
|
if word_boundary
|
318
319
|
tw = state[:word_spacing]
|
@@ -330,16 +331,16 @@ class PDF::Reader
|
|
330
331
|
# apply horizontal scaling to spacing values but not font size
|
331
332
|
tx = ((w0 * fs) + tc + tw) * th
|
332
333
|
end
|
333
|
-
|
334
|
-
# TODO: I'm pretty sure that tx shouldn't need to be divided by
|
335
|
-
# ctm[0] here, but this gets my tests green and I'm out of
|
336
|
-
# ideas for now
|
337
334
|
# TODO: support ty > 0
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
335
|
+
ty = 0
|
336
|
+
temp = TransformationMatrix.new(1, 0,
|
337
|
+
0, 1,
|
338
|
+
tx, ty)
|
339
|
+
@text_matrix = temp.multiply!(
|
340
|
+
@text_matrix.a, @text_matrix.b,
|
341
|
+
@text_matrix.c, @text_matrix.d,
|
342
|
+
@text_matrix.e, @text_matrix.f
|
343
|
+
)
|
343
344
|
@font_size = @text_rendering_matrix = nil # invalidate cached value
|
344
345
|
end
|
345
346
|
|
@@ -383,7 +384,7 @@ class PDF::Reader
|
|
383
384
|
#
|
384
385
|
def build_fonts(raw_fonts)
|
385
386
|
wrapped_fonts = raw_fonts.map { |label, font|
|
386
|
-
[label, PDF::Reader::Font.new(@objects, @objects.
|
387
|
+
[label, PDF::Reader::Font.new(@objects, @objects.deref_hash(font) || {})]
|
387
388
|
}
|
388
389
|
|
389
390
|
::Hash[wrapped_fonts]
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: true
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
require 'forwardable'
|
@@ -44,14 +45,34 @@ module PDF
|
|
44
45
|
@page = page
|
45
46
|
@content = []
|
46
47
|
@characters = []
|
47
|
-
@mediabox = page.objects.deref(page.attributes[:MediaBox])
|
48
|
-
device_bl = apply_rotation(*@state.ctm_transform(@mediabox[0], @mediabox[1]))
|
49
|
-
device_tr = apply_rotation(*@state.ctm_transform(@mediabox[2], @mediabox[3]))
|
50
|
-
@device_mediabox = [ device_bl.first, device_bl.last, device_tr.first, device_tr.last]
|
51
48
|
end
|
52
49
|
|
50
|
+
def runs(opts = {})
|
51
|
+
runs = @characters
|
52
|
+
|
53
|
+
if rect = opts.fetch(:rect, @page.rectangles[:CropBox])
|
54
|
+
runs = BoundingRectangleRunsFilter.runs_within_rect(runs, rect)
|
55
|
+
end
|
56
|
+
|
57
|
+
if opts.fetch(:skip_zero_width, true)
|
58
|
+
runs = ZeroWidthRunsFilter.exclude_zero_width_runs(runs)
|
59
|
+
end
|
60
|
+
|
61
|
+
if opts.fetch(:skip_overlapping, true)
|
62
|
+
runs = OverlappingRunsFilter.exclude_redundant_runs(runs)
|
63
|
+
end
|
64
|
+
|
65
|
+
if opts.fetch(:merge, true)
|
66
|
+
runs = merge_runs(runs)
|
67
|
+
end
|
68
|
+
|
69
|
+
runs
|
70
|
+
end
|
71
|
+
|
72
|
+
# deprecated
|
53
73
|
def content
|
54
|
-
|
74
|
+
mediabox = @page.rectangles[:MediaBox]
|
75
|
+
PageLayout.new(runs, mediabox).to_s
|
55
76
|
end
|
56
77
|
|
57
78
|
#####################################################
|
@@ -66,8 +87,10 @@ module PDF
|
|
66
87
|
params.each do |arg|
|
67
88
|
if arg.is_a?(String)
|
68
89
|
internal_show_text(arg)
|
69
|
-
|
90
|
+
elsif arg.is_a?(Numeric)
|
70
91
|
@state.process_glyph_displacement(0, arg, false)
|
92
|
+
else
|
93
|
+
# skip it
|
71
94
|
end
|
72
95
|
end
|
73
96
|
end
|
@@ -98,6 +121,7 @@ module PDF
|
|
98
121
|
private
|
99
122
|
|
100
123
|
def internal_show_text(string)
|
124
|
+
PDF::Reader::Error.validate_type_as_malformed(string, "string", String)
|
101
125
|
if @state.current_font.nil?
|
102
126
|
raise PDF::Reader::MalformedPDFError, "current font is invalid"
|
103
127
|
end
|
@@ -111,7 +135,7 @@ module PDF
|
|
111
135
|
|
112
136
|
# apply to glyph displacment for the current glyph so the next
|
113
137
|
# glyph will appear in the correct position
|
114
|
-
glyph_width = @state.current_font.
|
138
|
+
glyph_width = @state.current_font.glyph_width_in_text_space(glyph_code)
|
115
139
|
th = 1
|
116
140
|
scaled_glyph_width = glyph_width * @state.font_size * th
|
117
141
|
unless utf8_chars == SPACE
|
@@ -128,14 +152,37 @@ module PDF
|
|
128
152
|
y = tmp * -1
|
129
153
|
elsif @page.rotate == 180
|
130
154
|
y *= -1
|
155
|
+
x *= -1
|
131
156
|
elsif @page.rotate == 270
|
132
|
-
tmp =
|
133
|
-
|
134
|
-
|
157
|
+
tmp = y
|
158
|
+
y = x
|
159
|
+
x = tmp * -1
|
135
160
|
end
|
136
161
|
return x, y
|
137
162
|
end
|
138
163
|
|
164
|
+
# take a collection of TextRun objects and merge any that are in close
|
165
|
+
# proximity
|
166
|
+
def merge_runs(runs)
|
167
|
+
runs.group_by { |char|
|
168
|
+
char.y.to_i
|
169
|
+
}.map { |y, chars|
|
170
|
+
group_chars_into_runs(chars.sort)
|
171
|
+
}.flatten.sort
|
172
|
+
end
|
173
|
+
|
174
|
+
def group_chars_into_runs(chars)
|
175
|
+
chars.each_with_object([]) do |char, runs|
|
176
|
+
if runs.empty?
|
177
|
+
runs << char
|
178
|
+
elsif runs.last.mergable?(char)
|
179
|
+
runs[-1] = runs.last + char
|
180
|
+
else
|
181
|
+
runs << char
|
182
|
+
end
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
139
186
|
end
|
140
187
|
end
|
141
188
|
end
|
data/lib/pdf/reader/parser.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: true
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
################################################################################
|
@@ -79,8 +80,8 @@ class PDF::Reader
|
|
79
80
|
token
|
80
81
|
elsif operators.has_key? token
|
81
82
|
Token.new(token)
|
82
|
-
elsif token.
|
83
|
-
token
|
83
|
+
elsif token.frozen?
|
84
|
+
token
|
84
85
|
elsif token =~ /\d*\.\d/
|
85
86
|
token.to_f
|
86
87
|
else
|
@@ -95,14 +96,20 @@ class PDF::Reader
|
|
95
96
|
# id - the object ID to return
|
96
97
|
# gen - the object revision number to return
|
97
98
|
def object(id, gen)
|
98
|
-
|
99
|
+
idCheck = parse_token
|
100
|
+
|
101
|
+
# Sometimes the xref table is corrupt and points to an offset slightly too early in the file.
|
102
|
+
# check the next token, maybe we can find the start of the object we're looking for
|
103
|
+
if idCheck != id
|
104
|
+
Error.assert_equal(parse_token, id)
|
105
|
+
end
|
99
106
|
Error.assert_equal(parse_token, gen)
|
100
107
|
Error.str_assert(parse_token, "obj")
|
101
108
|
|
102
109
|
obj = parse_token
|
103
110
|
post_obj = parse_token
|
104
111
|
|
105
|
-
if post_obj == "stream"
|
112
|
+
if obj.is_a?(Hash) && post_obj == "stream"
|
106
113
|
stream(obj)
|
107
114
|
else
|
108
115
|
obj
|
@@ -120,7 +127,7 @@ class PDF::Reader
|
|
120
127
|
key = parse_token
|
121
128
|
break if key.kind_of?(Token) and key == ">>"
|
122
129
|
raise MalformedPDFError, "unterminated dict" if @buffer.empty?
|
123
|
-
|
130
|
+
PDF::Reader::Error.validate_type_as_malformed(key, "Dictionary key", Symbol)
|
124
131
|
|
125
132
|
value = parse_token
|
126
133
|
value.kind_of?(Token) and Error.str_assert_not(value, ">>")
|
@@ -166,7 +173,9 @@ class PDF::Reader
|
|
166
173
|
|
167
174
|
# add a missing digit if required, as required by the spec
|
168
175
|
str << "0" unless str.size % 2 == 0
|
169
|
-
str.
|
176
|
+
str.chars.each_slice(2).map { |nibbles|
|
177
|
+
nibbles.join("").hex.chr
|
178
|
+
}.join.force_encoding("binary")
|
170
179
|
end
|
171
180
|
################################################################################
|
172
181
|
# Reads a PDF String from the buffer and converts it to a Ruby String
|
@@ -206,14 +215,23 @@ class PDF::Reader
|
|
206
215
|
def stream(dict)
|
207
216
|
raise MalformedPDFError, "PDF malformed, missing stream length" unless dict.has_key?(:Length)
|
208
217
|
if @objects
|
209
|
-
length = @objects.
|
218
|
+
length = @objects.deref_integer(dict[:Length])
|
219
|
+
if dict[:Filter]
|
220
|
+
dict[:Filter] = @objects.deref_name_or_array(dict[:Filter])
|
221
|
+
end
|
210
222
|
else
|
211
223
|
length = dict[:Length] || 0
|
212
224
|
end
|
225
|
+
|
226
|
+
PDF::Reader::Error.validate_type_as_malformed(length, "length", Numeric)
|
227
|
+
|
213
228
|
data = @buffer.read(length, :skip_eol => true)
|
214
229
|
|
215
230
|
Error.str_assert(parse_token, "endstream")
|
216
|
-
|
231
|
+
|
232
|
+
# We used to assert that the stream had the correct closing token, but it doesn't *really*
|
233
|
+
# matter if it's missing, and other readers seems to handle its absence just fine
|
234
|
+
# Error.str_assert(parse_token, "endobj")
|
217
235
|
|
218
236
|
PDF::Reader::Stream.new(dict, data)
|
219
237
|
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# typed: strict
|
3
|
+
# frozen_string_literal: true
|
4
|
+
|
5
|
+
module PDF
|
6
|
+
class Reader
|
7
|
+
|
8
|
+
# PDFs are all about positioning content on a page, so there's lots of need to
|
9
|
+
# work with a set of X,Y coordinates.
|
10
|
+
#
|
11
|
+
class Point
|
12
|
+
|
13
|
+
attr_reader :x, :y
|
14
|
+
|
15
|
+
def initialize(x, y)
|
16
|
+
@x, @y = x, y
|
17
|
+
end
|
18
|
+
|
19
|
+
def ==(other)
|
20
|
+
other.respond_to?(:x) && other.respond_to?(:y) && x == other.x && y == other.y
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# typed: strict
|
3
|
+
# frozen_string_literal: true
|
4
|
+
|
5
|
+
require 'digest/md5'
|
6
|
+
require 'rc4'
|
7
|
+
|
8
|
+
class PDF::Reader
|
9
|
+
|
10
|
+
# Decrypts data using the RC4 algorithim defined in the PDF spec. Requires
|
11
|
+
# a decryption key, which is usually generated by PDF::Reader::StandardKeyBuilder
|
12
|
+
#
|
13
|
+
class Rc4SecurityHandler
|
14
|
+
|
15
|
+
def initialize(key)
|
16
|
+
@encrypt_key = key
|
17
|
+
end
|
18
|
+
|
19
|
+
##7.6.2 General Encryption Algorithm
|
20
|
+
#
|
21
|
+
# Algorithm 1: Encryption of data using the RC4 algorithm
|
22
|
+
#
|
23
|
+
# version <=3 or (version == 4 and CFM == V2)
|
24
|
+
#
|
25
|
+
# buf - a string to decrypt
|
26
|
+
# ref - a PDF::Reader::Reference for the object to decrypt
|
27
|
+
#
|
28
|
+
def decrypt( buf, ref )
|
29
|
+
objKey = @encrypt_key.dup
|
30
|
+
(0..2).each { |e| objKey << (ref.id >> e*8 & 0xFF ) }
|
31
|
+
(0..1).each { |e| objKey << (ref.gen >> e*8 & 0xFF ) }
|
32
|
+
length = objKey.length < 16 ? objKey.length : 16
|
33
|
+
rc4 = RC4.new( Digest::MD5.digest(objKey)[0,length] )
|
34
|
+
rc4.decrypt(buf)
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,113 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# typed: strict
|
3
|
+
# frozen_string_literal: true
|
4
|
+
|
5
|
+
module PDF
|
6
|
+
class Reader
|
7
|
+
|
8
|
+
# PDFs represent rectangles all over the place. They're 4 element arrays, like this:
|
9
|
+
#
|
10
|
+
# [A, B, C, D]
|
11
|
+
#
|
12
|
+
# Four element arrays are yucky to work with though, so here's a class that's better.
|
13
|
+
# Initialize it with the 4 elements, and get utility functions (width, height, etc)
|
14
|
+
# for free.
|
15
|
+
#
|
16
|
+
# By convention the first two elements are x1, y1, the co-ords for the bottom left corner
|
17
|
+
# of the rectangle. The third and fourth elements are x2, y2, the co-ords for the top left
|
18
|
+
# corner of the rectangle. It's valid for the alternative corners to be used though, so
|
19
|
+
# we don't assume which is which.
|
20
|
+
#
|
21
|
+
class Rectangle
|
22
|
+
|
23
|
+
attr_reader :bottom_left, :bottom_right, :top_left, :top_right
|
24
|
+
|
25
|
+
def initialize(x1, y1, x2, y2)
|
26
|
+
set_corners(x1, y1, x2, y2)
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.from_array(arr)
|
30
|
+
if arr.size != 4
|
31
|
+
raise ArgumentError, "Only 4-element Arrays can be converted to a Rectangle"
|
32
|
+
end
|
33
|
+
|
34
|
+
PDF::Reader::Rectangle.new(
|
35
|
+
arr[0].to_f,
|
36
|
+
arr[1].to_f,
|
37
|
+
arr[2].to_f,
|
38
|
+
arr[3].to_f,
|
39
|
+
)
|
40
|
+
end
|
41
|
+
|
42
|
+
def ==(other)
|
43
|
+
to_a == other.to_a
|
44
|
+
end
|
45
|
+
|
46
|
+
def height
|
47
|
+
top_right.y - bottom_right.y
|
48
|
+
end
|
49
|
+
|
50
|
+
def width
|
51
|
+
bottom_right.x - bottom_left.x
|
52
|
+
end
|
53
|
+
|
54
|
+
def contains?(point)
|
55
|
+
point.x >= bottom_left.x && point.x <= top_right.x &&
|
56
|
+
point.y >= bottom_left.y && point.y <= top_right.y
|
57
|
+
end
|
58
|
+
|
59
|
+
# A pdf-style 4-number array
|
60
|
+
def to_a
|
61
|
+
[
|
62
|
+
bottom_left.x,
|
63
|
+
bottom_left.y,
|
64
|
+
top_right.x,
|
65
|
+
top_right.y,
|
66
|
+
]
|
67
|
+
end
|
68
|
+
|
69
|
+
def apply_rotation(degrees)
|
70
|
+
return if degrees != 90 && degrees != 180 && degrees != 270
|
71
|
+
|
72
|
+
if degrees == 90
|
73
|
+
new_x1 = bottom_left.x
|
74
|
+
new_y1 = bottom_left.y - width
|
75
|
+
new_x2 = bottom_left.x + height
|
76
|
+
new_y2 = bottom_left.y
|
77
|
+
elsif degrees == 180
|
78
|
+
new_x1 = bottom_left.x - width
|
79
|
+
new_y1 = bottom_left.y - height
|
80
|
+
new_x2 = bottom_left.x
|
81
|
+
new_y2 = bottom_left.y
|
82
|
+
elsif degrees == 270
|
83
|
+
new_x1 = bottom_left.x - height
|
84
|
+
new_y1 = bottom_left.y
|
85
|
+
new_x2 = bottom_left.x
|
86
|
+
new_y2 = bottom_left.y + width
|
87
|
+
end
|
88
|
+
set_corners(new_x1 || 0, new_y1 || 0, new_x2 || 0, new_y2 || 0)
|
89
|
+
end
|
90
|
+
|
91
|
+
private
|
92
|
+
|
93
|
+
def set_corners(x1, y1, x2, y2)
|
94
|
+
@bottom_left = PDF::Reader::Point.new(
|
95
|
+
[x1, x2].min,
|
96
|
+
[y1, y2].min,
|
97
|
+
)
|
98
|
+
@bottom_right = PDF::Reader::Point.new(
|
99
|
+
[x1, x2].max,
|
100
|
+
[y1, y2].min,
|
101
|
+
)
|
102
|
+
@top_left = PDF::Reader::Point.new(
|
103
|
+
[x1, x2].min,
|
104
|
+
[y1, y2].max,
|
105
|
+
)
|
106
|
+
@top_right = PDF::Reader::Point.new(
|
107
|
+
[x1, x2].max,
|
108
|
+
[y1, y2].max,
|
109
|
+
)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
data/lib/pdf/reader/reference.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: true
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
module PDF
|
@@ -6,7 +7,13 @@ module PDF
|
|
6
7
|
|
7
8
|
# mixin for common methods in Page and FormXobjects
|
8
9
|
#
|
9
|
-
|
10
|
+
class Resources
|
11
|
+
|
12
|
+
def initialize(objects, resources)
|
13
|
+
@objects = objects
|
14
|
+
@resources = resources
|
15
|
+
end
|
16
|
+
|
10
17
|
# Returns a Hash of color spaces that are available to this page
|
11
18
|
#
|
12
19
|
# NOTE: this method de-serialise objects from the underlying PDF
|
@@ -14,7 +21,7 @@ module PDF
|
|
14
21
|
# of calling it over and over.
|
15
22
|
#
|
16
23
|
def color_spaces
|
17
|
-
@objects.
|
24
|
+
@objects.deref_hash!(@resources[:ColorSpace]) || {}
|
18
25
|
end
|
19
26
|
|
20
27
|
# Returns a Hash of fonts that are available to this page
|
@@ -24,7 +31,7 @@ module PDF
|
|
24
31
|
# of calling it over and over.
|
25
32
|
#
|
26
33
|
def fonts
|
27
|
-
@objects.
|
34
|
+
@objects.deref_hash!(@resources[:Font]) || {}
|
28
35
|
end
|
29
36
|
|
30
37
|
# Returns a Hash of external graphic states that are available to this
|
@@ -35,7 +42,7 @@ module PDF
|
|
35
42
|
# of calling it over and over.
|
36
43
|
#
|
37
44
|
def graphic_states
|
38
|
-
@objects.
|
45
|
+
@objects.deref_hash!(@resources[:ExtGState]) || {}
|
39
46
|
end
|
40
47
|
|
41
48
|
# Returns a Hash of patterns that are available to this page
|
@@ -45,7 +52,7 @@ module PDF
|
|
45
52
|
# of calling it over and over.
|
46
53
|
#
|
47
54
|
def patterns
|
48
|
-
@objects.
|
55
|
+
@objects.deref_hash!(@resources[:Pattern]) || {}
|
49
56
|
end
|
50
57
|
|
51
58
|
# Returns an Array of procedure sets that are available to this page
|
@@ -55,7 +62,7 @@ module PDF
|
|
55
62
|
# of calling it over and over.
|
56
63
|
#
|
57
64
|
def procedure_sets
|
58
|
-
@objects.
|
65
|
+
@objects.deref_array!(@resources[:ProcSet]) || []
|
59
66
|
end
|
60
67
|
|
61
68
|
# Returns a Hash of properties sets that are available to this page
|
@@ -65,7 +72,7 @@ module PDF
|
|
65
72
|
# of calling it over and over.
|
66
73
|
#
|
67
74
|
def properties
|
68
|
-
@objects.
|
75
|
+
@objects.deref_hash!(@resources[:Properties]) || {}
|
69
76
|
end
|
70
77
|
|
71
78
|
# Returns a Hash of shadings that are available to this page
|
@@ -75,7 +82,7 @@ module PDF
|
|
75
82
|
# of calling it over and over.
|
76
83
|
#
|
77
84
|
def shadings
|
78
|
-
@objects.
|
85
|
+
@objects.deref_hash!(@resources[:Shading]) || {}
|
79
86
|
end
|
80
87
|
|
81
88
|
# Returns a Hash of XObjects that are available to this page
|
@@ -85,7 +92,7 @@ module PDF
|
|
85
92
|
# of calling it over and over.
|
86
93
|
#
|
87
94
|
def xobjects
|
88
|
-
@objects.
|
95
|
+
@objects.deref_hash!(@resources[:XObject]) || {}
|
89
96
|
end
|
90
97
|
|
91
98
|
end
|