pdf-reader 2.14.1 → 2.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG +15 -0
- data/lib/pdf/reader/advanced_text_run_filter.rb +17 -2
- data/lib/pdf/reader/aes_v2_security_handler.rb +30 -0
- data/lib/pdf/reader/aes_v3_security_handler.rb +35 -3
- data/lib/pdf/reader/bounding_rectangle_runs_filter.rb +1 -0
- data/lib/pdf/reader/buffer.rb +35 -17
- data/lib/pdf/reader/cid_widths.rb +7 -1
- data/lib/pdf/reader/cmap.rb +14 -3
- data/lib/pdf/reader/encoding.rb +37 -12
- data/lib/pdf/reader/error.rb +6 -0
- data/lib/pdf/reader/filter/ascii85.rb +2 -0
- data/lib/pdf/reader/filter/ascii_hex.rb +2 -0
- data/lib/pdf/reader/filter/depredict.rb +4 -0
- data/lib/pdf/reader/filter/flate.rb +5 -2
- data/lib/pdf/reader/filter/lzw.rb +2 -0
- data/lib/pdf/reader/filter/null.rb +2 -0
- data/lib/pdf/reader/filter/run_length.rb +2 -0
- data/lib/pdf/reader/filter.rb +1 -0
- data/lib/pdf/reader/font.rb +90 -22
- data/lib/pdf/reader/font_descriptor.rb +76 -23
- data/lib/pdf/reader/form_xobject.rb +11 -0
- data/lib/pdf/reader/glyph_hash.rb +34 -9
- data/lib/pdf/reader/key_builder_v5.rb +17 -9
- data/lib/pdf/reader/lzw.rb +17 -6
- data/lib/pdf/reader/no_text_filter.rb +1 -0
- data/lib/pdf/reader/null_security_handler.rb +1 -0
- data/lib/pdf/reader/object_cache.rb +7 -2
- data/lib/pdf/reader/object_hash.rb +116 -9
- data/lib/pdf/reader/object_stream.rb +19 -2
- data/lib/pdf/reader/overlapping_runs_filter.rb +7 -1
- data/lib/pdf/reader/page.rb +41 -7
- data/lib/pdf/reader/page_layout.rb +25 -8
- data/lib/pdf/reader/page_state.rb +5 -2
- data/lib/pdf/reader/page_text_receiver.rb +6 -2
- data/lib/pdf/reader/pages_strategy.rb +1 -1
- data/lib/pdf/reader/parser.rb +51 -10
- data/lib/pdf/reader/point.rb +9 -2
- data/lib/pdf/reader/print_receiver.rb +2 -6
- data/lib/pdf/reader/rc4_security_handler.rb +2 -0
- data/lib/pdf/reader/rectangle.rb +24 -1
- data/lib/pdf/reader/reference.rb +10 -1
- data/lib/pdf/reader/register_receiver.rb +15 -2
- data/lib/pdf/reader/resources.rb +9 -0
- data/lib/pdf/reader/security_handler_factory.rb +13 -0
- data/lib/pdf/reader/standard_key_builder.rb +37 -23
- data/lib/pdf/reader/stream.rb +9 -3
- data/lib/pdf/reader/synchronized_cache.rb +5 -2
- data/lib/pdf/reader/text_run.rb +28 -1
- data/lib/pdf/reader/token.rb +1 -0
- data/lib/pdf/reader/transformation_matrix.rb +33 -2
- data/lib/pdf/reader/type_check.rb +10 -3
- data/lib/pdf/reader/unimplemented_security_handler.rb +2 -0
- data/lib/pdf/reader/validating_receiver.rb +29 -0
- data/lib/pdf/reader/width_calculator/built_in.rb +10 -3
- data/lib/pdf/reader/width_calculator/composite.rb +5 -1
- data/lib/pdf/reader/width_calculator/true_type.rb +5 -1
- data/lib/pdf/reader/width_calculator/type_one_or_three.rb +3 -1
- data/lib/pdf/reader/width_calculator/type_zero.rb +2 -0
- data/lib/pdf/reader/xref.rb +28 -7
- data/lib/pdf/reader/zero_width_runs_filter.rb +1 -0
- data/lib/pdf/reader.rb +18 -2
- data/rbi/pdf-reader.rbi +1502 -1594
- metadata +17 -11
data/lib/pdf/reader/page.rb
CHANGED
@@ -2,6 +2,8 @@
|
|
2
2
|
# typed: strict
|
3
3
|
# frozen_string_literal: true
|
4
4
|
|
5
|
+
require 'set'
|
6
|
+
|
5
7
|
module PDF
|
6
8
|
class Reader
|
7
9
|
|
@@ -17,14 +19,17 @@ module PDF
|
|
17
19
|
extend Forwardable
|
18
20
|
|
19
21
|
# lowlevel hash-like access to all objects in the underlying PDF
|
22
|
+
#: PDF::Reader::ObjectHash
|
20
23
|
attr_reader :objects
|
21
24
|
|
22
25
|
# the raw PDF object that defines this page
|
26
|
+
#: Hash[Symbol, untyped]
|
23
27
|
attr_reader :page_object
|
24
28
|
|
25
29
|
# a Hash-like object for storing cached data. Generally this is scoped to
|
26
30
|
# the current document and is used to avoid repeating expensive
|
27
31
|
# operations
|
32
|
+
#: PDF::Reader::ObjectCache | Hash[untyped, untyped]
|
28
33
|
attr_reader :cache
|
29
34
|
|
30
35
|
def_delegators :resources, :color_spaces
|
@@ -41,10 +46,16 @@ module PDF
|
|
41
46
|
# * objects - an ObjectHash instance that wraps a PDF file
|
42
47
|
# * pagenum - an int specifying the page number to expose. 1 indexed.
|
43
48
|
#
|
49
|
+
#: (PDF::Reader::ObjectHash, Integer, ?Hash[Symbol, untyped]) -> void
|
44
50
|
def initialize(objects, pagenum, options = {})
|
45
|
-
@objects
|
46
|
-
@
|
47
|
-
@
|
51
|
+
@objects = objects
|
52
|
+
@pagenum = pagenum
|
53
|
+
@page_ref = objects.page_references[pagenum - 1] #: (Reference | Hash[Symbol, untyped])?
|
54
|
+
@page_object = objects.deref_hash(@page_ref) || {} #: Hash[Symbol, untyped]
|
55
|
+
@cache = options[:cache] || {} #: PDF::Reader::ObjectCache | Hash[untyped, untyped]
|
56
|
+
@attributes = nil #: Hash[Symbol, untyped] | nil
|
57
|
+
@root = nil #: Hash[Symbol, untyped] | nil
|
58
|
+
@resources = nil #: PDF::Reader::Resources | nil
|
48
59
|
|
49
60
|
if @page_object.empty?
|
50
61
|
raise InvalidPageError, "Invalid page: #{pagenum}"
|
@@ -53,12 +64,14 @@ module PDF
|
|
53
64
|
|
54
65
|
# return the number of this page within the full document
|
55
66
|
#
|
67
|
+
#: () -> Integer
|
56
68
|
def number
|
57
69
|
@pagenum
|
58
70
|
end
|
59
71
|
|
60
72
|
# return a friendly string representation of this page
|
61
73
|
#
|
74
|
+
#: () -> String
|
62
75
|
def inspect
|
63
76
|
"<PDF::Reader::Page page: #{@pagenum}>"
|
64
77
|
end
|
@@ -66,6 +79,7 @@ module PDF
|
|
66
79
|
# Returns the attributes that accompany this page, including
|
67
80
|
# attributes inherited from parents.
|
68
81
|
#
|
82
|
+
#: () -> Hash[Symbol, untyped]
|
69
83
|
def attributes
|
70
84
|
@attributes ||= {}.tap { |hash|
|
71
85
|
page_with_ancestors.reverse.each do |obj|
|
@@ -78,18 +92,21 @@ module PDF
|
|
78
92
|
@attributes
|
79
93
|
end
|
80
94
|
|
95
|
+
#: () -> Numeric
|
81
96
|
def height
|
82
97
|
rect = Rectangle.new(*attributes[:MediaBox])
|
83
98
|
rect.apply_rotation(rotate) if rotate > 0
|
84
99
|
rect.height
|
85
100
|
end
|
86
101
|
|
102
|
+
#: () -> Numeric
|
87
103
|
def width
|
88
104
|
rect = Rectangle.new(*attributes[:MediaBox])
|
89
105
|
rect.apply_rotation(rotate) if rotate > 0
|
90
106
|
rect.width
|
91
107
|
end
|
92
108
|
|
109
|
+
#: () -> Array[Numeric]
|
93
110
|
def origin
|
94
111
|
rect = Rectangle.new(*attributes[:MediaBox])
|
95
112
|
rect.apply_rotation(rotate) if rotate > 0
|
@@ -99,6 +116,7 @@ module PDF
|
|
99
116
|
|
100
117
|
# Convenience method to identify the page's orientation.
|
101
118
|
#
|
119
|
+
#: () -> String
|
102
120
|
def orientation
|
103
121
|
if height > width
|
104
122
|
"portrait"
|
@@ -110,6 +128,7 @@ module PDF
|
|
110
128
|
# returns the plain text content of this page encoded as UTF-8. Any
|
111
129
|
# characters that can't be translated will be returned as a ▯
|
112
130
|
#
|
131
|
+
#: (?Hash[Symbol, untyped]) -> String
|
113
132
|
def text(opts = {})
|
114
133
|
receiver = PageTextReceiver.new
|
115
134
|
walk(receiver)
|
@@ -122,6 +141,7 @@ module PDF
|
|
122
141
|
end
|
123
142
|
alias :to_s :text
|
124
143
|
|
144
|
+
#: (?Hash[Symbol, untyped]) -> Array[PDF::Reader::TextRun]
|
125
145
|
def runs(opts = {})
|
126
146
|
receiver = PageTextReceiver.new
|
127
147
|
walk(receiver)
|
@@ -151,6 +171,7 @@ module PDF
|
|
151
171
|
# a set of instructions and associated resources. Calling walk() executes
|
152
172
|
# the program in the correct order and calls out to your implementation.
|
153
173
|
#
|
174
|
+
#: (*untyped) -> untyped
|
154
175
|
def walk(*receivers)
|
155
176
|
receivers = receivers.map { |receiver|
|
156
177
|
ValidatingReceiver.new(receiver)
|
@@ -162,6 +183,7 @@ module PDF
|
|
162
183
|
# returns the raw content stream for this page. This is plumbing, nothing to
|
163
184
|
# see here unless you're a PDF nerd like me.
|
164
185
|
#
|
186
|
+
#: () -> String
|
165
187
|
def raw_content
|
166
188
|
contents = objects.deref_stream_or_array(@page_object[:Contents])
|
167
189
|
[contents].flatten.compact.map { |obj|
|
@@ -173,6 +195,7 @@ module PDF
|
|
173
195
|
|
174
196
|
# returns the angle to rotate the page clockwise. Always 0, 90, 180 or 270
|
175
197
|
#
|
198
|
+
#: () -> Integer
|
176
199
|
def rotate
|
177
200
|
value = attributes[:Rotate].to_i
|
178
201
|
case value
|
@@ -188,6 +211,7 @@ module PDF
|
|
188
211
|
#
|
189
212
|
# DEPRECATED. Recommend using Page#rectangles instead
|
190
213
|
#
|
214
|
+
#: () -> Hash[Symbol, Array[Numeric]]
|
191
215
|
def boxes
|
192
216
|
# In ruby 2.4+ we could use Hash#transform_values
|
193
217
|
Hash[rectangles.map{ |k,rect| [k,rect.to_a] } ]
|
@@ -196,6 +220,7 @@ module PDF
|
|
196
220
|
# returns the "boxes" that define the page object.
|
197
221
|
# values are defaulted according to section 7.7.3.3 of the PDF Spec 1.7
|
198
222
|
#
|
223
|
+
#: () -> Hash[Symbol, PDF::Reader::Rectangle]
|
199
224
|
def rectangles
|
200
225
|
# attributes[:MediaBox] can never be nil, but I have no easy way to tell sorbet that atm
|
201
226
|
mediabox = objects.deref_array_of_numbers(attributes[:MediaBox]) || []
|
@@ -233,6 +258,7 @@ module PDF
|
|
233
258
|
|
234
259
|
private
|
235
260
|
|
261
|
+
#: () -> Hash[Symbol, untyped]
|
236
262
|
def root
|
237
263
|
@root ||= objects.deref_hash(@objects.trailer[:Root]) || {}
|
238
264
|
end
|
@@ -240,10 +266,12 @@ module PDF
|
|
240
266
|
# Returns the resources that accompany this page. Includes
|
241
267
|
# resources inherited from parents.
|
242
268
|
#
|
269
|
+
#: () -> PDF::Reader::Resources
|
243
270
|
def resources
|
244
271
|
@resources ||= Resources.new(@objects, @objects.deref_hash(attributes[:Resources]) || {})
|
245
272
|
end
|
246
273
|
|
274
|
+
#: (Array[untyped], String) -> void
|
247
275
|
def content_stream(receivers, instructions)
|
248
276
|
buffer = Buffer.new(StringIO.new(instructions), :content_stream => true)
|
249
277
|
parser = Parser.new(buffer, @objects)
|
@@ -265,6 +293,7 @@ module PDF
|
|
265
293
|
#
|
266
294
|
# The silly style here is because sorbet won't let me use splat arguments
|
267
295
|
#
|
296
|
+
#: (Array[Object], Symbol, ?Array[untyped]) -> void
|
268
297
|
def callback(receivers, name, params=[])
|
269
298
|
receivers.each do |receiver|
|
270
299
|
if receiver.respond_to?(name)
|
@@ -286,25 +315,30 @@ module PDF
|
|
286
315
|
end
|
287
316
|
end
|
288
317
|
|
318
|
+
#: () -> untyped
|
289
319
|
def page_with_ancestors
|
290
|
-
[ @page_object ] + ancestors
|
320
|
+
[ @page_object ] + ancestors(@page_object[:Parent], Set[@page_ref.hash])
|
291
321
|
end
|
292
322
|
|
293
|
-
|
323
|
+
#: (?untyped, ?Set[Integer]) -> untyped
|
324
|
+
def ancestors(origin = @page_object[:Parent], seen = Set.new)
|
294
325
|
if origin.nil?
|
295
326
|
[]
|
327
|
+
elsif seen.include?(origin.hash)
|
328
|
+
raise PDF::Reader::MalformedPDFError.new("loop found in ancestor path")
|
296
329
|
else
|
297
330
|
obj = objects.deref_hash(origin)
|
298
331
|
if obj.nil?
|
299
|
-
raise MalformedPDFError, "parent
|
332
|
+
raise MalformedPDFError, "parent must not be nil"
|
300
333
|
end
|
301
|
-
[ select_inheritable(obj) ] + ancestors(obj[:Parent])
|
334
|
+
[ select_inheritable(obj) ] + ancestors(obj[:Parent], seen.add(origin.hash))
|
302
335
|
end
|
303
336
|
end
|
304
337
|
|
305
338
|
# select the elements from a Pages dictionary that can be inherited by
|
306
339
|
# child Page dictionaries.
|
307
340
|
#
|
341
|
+
#: (Hash[Symbol, untyped]) -> Hash[Symbol, untyped]
|
308
342
|
def select_inheritable(obj)
|
309
343
|
::Hash[obj.select { |key, value|
|
310
344
|
[:Resources, :MediaBox, :CropBox, :Rotate, :Parent].include?(key)
|
@@ -14,23 +14,29 @@ class PDF::Reader
|
|
14
14
|
# page to be rendered as described by the page's MediaBox attribute
|
15
15
|
class PageLayout
|
16
16
|
|
17
|
-
DEFAULT_FONT_SIZE = 12
|
17
|
+
DEFAULT_FONT_SIZE = 12 #: Numeric
|
18
18
|
|
19
|
+
#: (Array[PDF::Reader::TextRun], Array[Numeric] | PDF::Reader::Rectangle) -> void
|
19
20
|
def initialize(runs, mediabox)
|
20
21
|
# mediabox is a 4-element array for now, but it'd be nice to switch to a
|
21
22
|
# PDF::Reader::Rectangle at some point
|
22
23
|
PDF::Reader::Error.validate_not_nil(mediabox, "mediabox")
|
23
24
|
|
24
|
-
@mediabox = process_mediabox(mediabox)
|
25
|
-
@runs = runs
|
26
|
-
@mean_font_size = mean(@runs.map(&:font_size)) || DEFAULT_FONT_SIZE
|
25
|
+
@mediabox = process_mediabox(mediabox) #: PDF::Reader::Rectangle
|
26
|
+
@runs = runs #: Array[PDF::Reader::TextRun]
|
27
|
+
@mean_font_size = mean(@runs.map(&:font_size)) || DEFAULT_FONT_SIZE #: Numeric
|
27
28
|
@mean_font_size = DEFAULT_FONT_SIZE if @mean_font_size == 0
|
28
|
-
@median_glyph_width = median(@runs.map(&:mean_character_width)) || 0
|
29
|
-
@x_offset = @runs.map(&:x).sort.first || 0
|
30
|
-
lowest_y = @runs.map(&:y).sort.first || 0
|
31
|
-
@y_offset = lowest_y > 0 ? 0 : lowest_y
|
29
|
+
@median_glyph_width = median(@runs.map(&:mean_character_width)) || 0 #: Numeric
|
30
|
+
@x_offset = @runs.map(&:x).sort.first || 0 #: Numeric
|
31
|
+
lowest_y = @runs.map(&:y).sort.first || 0 #: Numeric
|
32
|
+
@y_offset = lowest_y > 0 ? 0 : lowest_y #: Numeric
|
33
|
+
@row_count = nil #: Numeric | nil
|
34
|
+
@col_count = nil #: Numeric | nil
|
35
|
+
@row_multiplier = nil #: Numeric | nil
|
36
|
+
@col_multiplier = nil #: Numeric | nil
|
32
37
|
end
|
33
38
|
|
39
|
+
#: () -> String
|
34
40
|
def to_s
|
35
41
|
return "" if @runs.empty?
|
36
42
|
return "" if row_count == 0
|
@@ -48,10 +54,12 @@ class PDF::Reader
|
|
48
54
|
|
49
55
|
private
|
50
56
|
|
57
|
+
#: () -> Numeric
|
51
58
|
def page_width
|
52
59
|
@mediabox.width
|
53
60
|
end
|
54
61
|
|
62
|
+
#: () -> Numeric
|
55
63
|
def page_height
|
56
64
|
@mediabox.height
|
57
65
|
end
|
@@ -62,6 +70,7 @@ class PDF::Reader
|
|
62
70
|
# interesting_rows([ "", "one", "two", "" ])
|
63
71
|
# => [ "one", "two" ]
|
64
72
|
#
|
73
|
+
#: (untyped) -> untyped
|
65
74
|
def interesting_rows(rows)
|
66
75
|
line_lengths = rows.map { |l| l.strip.length }
|
67
76
|
|
@@ -73,22 +82,27 @@ class PDF::Reader
|
|
73
82
|
rows[first_line_with_text, interesting_line_count].map
|
74
83
|
end
|
75
84
|
|
85
|
+
#: () -> untyped
|
76
86
|
def row_count
|
77
87
|
@row_count ||= (page_height / @mean_font_size).floor
|
78
88
|
end
|
79
89
|
|
90
|
+
#: () -> untyped
|
80
91
|
def col_count
|
81
92
|
@col_count ||= ((page_width / @median_glyph_width) * 1.05).floor
|
82
93
|
end
|
83
94
|
|
95
|
+
#: () -> untyped
|
84
96
|
def row_multiplier
|
85
97
|
@row_multiplier ||= page_height.to_f / row_count.to_f
|
86
98
|
end
|
87
99
|
|
100
|
+
#: () -> untyped
|
88
101
|
def col_multiplier
|
89
102
|
@col_multiplier ||= page_width.to_f / col_count.to_f
|
90
103
|
end
|
91
104
|
|
105
|
+
#: (untyped) -> untyped
|
92
106
|
def mean(collection)
|
93
107
|
if collection.size == 0
|
94
108
|
0
|
@@ -97,6 +111,7 @@ class PDF::Reader
|
|
97
111
|
end
|
98
112
|
end
|
99
113
|
|
114
|
+
#: (untyped) -> untyped
|
100
115
|
def median(collection)
|
101
116
|
if collection.size == 0
|
102
117
|
0
|
@@ -105,10 +120,12 @@ class PDF::Reader
|
|
105
120
|
end
|
106
121
|
end
|
107
122
|
|
123
|
+
#: (untyped, untyped, untyped) -> untyped
|
108
124
|
def local_string_insert(haystack, needle, index)
|
109
125
|
haystack[Range.new(index, index + needle.length - 1)] = String.new(needle)
|
110
126
|
end
|
111
127
|
|
128
|
+
#: (untyped) -> untyped
|
112
129
|
def process_mediabox(mediabox)
|
113
130
|
if mediabox.is_a?(Array)
|
114
131
|
msg = "Passing the mediabox to PageLayout as an Array is deprecated," +
|
@@ -20,9 +20,10 @@ class PDF::Reader
|
|
20
20
|
:text_mode => 0,
|
21
21
|
:text_rise => 0,
|
22
22
|
:text_knockout => 0
|
23
|
-
}
|
23
|
+
} #: Hash[Symbol, Numeric | nil]
|
24
24
|
|
25
25
|
# starting a new page
|
26
|
+
#: (untyped) -> void
|
26
27
|
def initialize(page)
|
27
28
|
@page = page
|
28
29
|
@cache = page.cache
|
@@ -48,12 +49,14 @@ class PDF::Reader
|
|
48
49
|
# Any changes that are subsequently made to the state can then by reversed
|
49
50
|
# by calling restore_graphics_state.
|
50
51
|
#
|
52
|
+
#: () -> untyped
|
51
53
|
def save_graphics_state
|
52
54
|
@stack.push clone_state
|
53
55
|
end
|
54
56
|
|
55
57
|
# Restore the state to the previous value on the stack.
|
56
58
|
#
|
59
|
+
#: () -> untyped
|
57
60
|
def restore_graphics_state
|
58
61
|
@stack.pop
|
59
62
|
end
|
@@ -118,7 +121,7 @@ class PDF::Reader
|
|
118
121
|
@font_size ||= begin
|
119
122
|
_, zero = trm_transform(0,0)
|
120
123
|
_, one = trm_transform(1,1)
|
121
|
-
(zero - one).abs
|
124
|
+
(zero - one).abs.round(10)
|
122
125
|
end
|
123
126
|
end
|
124
127
|
|
@@ -14,9 +14,13 @@ module PDF
|
|
14
14
|
class PageTextReceiver
|
15
15
|
extend Forwardable
|
16
16
|
|
17
|
-
SPACE = " "
|
17
|
+
SPACE = " " #: String
|
18
18
|
|
19
|
-
|
19
|
+
#: untyped
|
20
|
+
attr_reader :state
|
21
|
+
|
22
|
+
#: untyped
|
23
|
+
attr_reader :options
|
20
24
|
|
21
25
|
########## BEGIN FORWARDERS ##########
|
22
26
|
# Graphics State Operators
|
@@ -180,7 +180,7 @@ class PDF::Reader
|
|
180
180
|
'y' => :append_curved_segment_final_point_replicated,
|
181
181
|
'\'' => :move_to_next_line_and_show_text,
|
182
182
|
'"' => :set_spacing_next_line_show_text,
|
183
|
-
}
|
183
|
+
} #: Hash[String, Symbol]
|
184
184
|
end
|
185
185
|
################################################################################
|
186
186
|
end
|
data/lib/pdf/reader/parser.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
# typed:
|
2
|
+
# typed: strict
|
3
3
|
# frozen_string_literal: true
|
4
4
|
|
5
5
|
################################################################################
|
@@ -33,7 +33,7 @@ class PDF::Reader
|
|
33
33
|
# them into useable ruby objects (hash's, arrays, true, false, etc)
|
34
34
|
class Parser
|
35
35
|
|
36
|
-
TOKEN_STRATEGY = proc { |parser, token| Token.new(token) }
|
36
|
+
TOKEN_STRATEGY = proc { |parser, token| Token.new(token) } #: Proc
|
37
37
|
|
38
38
|
STRATEGIES = {
|
39
39
|
"/" => proc { |parser, token| parser.send(:pdf_name) },
|
@@ -55,13 +55,14 @@ class PDF::Reader
|
|
55
55
|
"]" => TOKEN_STRATEGY,
|
56
56
|
">" => TOKEN_STRATEGY,
|
57
57
|
")" => TOKEN_STRATEGY
|
58
|
-
}
|
58
|
+
} #: Hash[String?, Proc]
|
59
59
|
|
60
60
|
################################################################################
|
61
61
|
# Create a new parser around a PDF::Reader::Buffer object
|
62
62
|
#
|
63
63
|
# buffer - a PDF::Reader::Buffer object that contains PDF data
|
64
64
|
# objects - a PDF::Reader::ObjectHash object that can return objects from the PDF file
|
65
|
+
#: (PDF::Reader::Buffer, ?PDF::Reader::ObjectHash?) -> void
|
65
66
|
def initialize(buffer, objects=nil)
|
66
67
|
@buffer = buffer
|
67
68
|
@objects = objects
|
@@ -71,11 +72,24 @@ class PDF::Reader
|
|
71
72
|
# object
|
72
73
|
#
|
73
74
|
# operators - a hash of supported operators to read from the underlying buffer.
|
75
|
+
#: (?Hash[String | PDF::Reader::Token, Symbol]) -> (
|
76
|
+
#| PDF::Reader::Reference |
|
77
|
+
#| PDF::Reader::Token |
|
78
|
+
#| Numeric |
|
79
|
+
#| String |
|
80
|
+
#| Symbol |
|
81
|
+
#| Array[untyped] |
|
82
|
+
#| Hash[untyped, untyped] |
|
83
|
+
#| nil
|
84
|
+
#| )
|
74
85
|
def parse_token(operators={})
|
75
86
|
token = @buffer.token
|
76
87
|
|
77
|
-
if
|
78
|
-
|
88
|
+
if token.nil?
|
89
|
+
nil
|
90
|
+
elsif token.is_a?(String) && STRATEGIES.has_key?(token)
|
91
|
+
proc = STRATEGIES[token]
|
92
|
+
proc.call(self, token) if proc
|
79
93
|
elsif token.is_a? PDF::Reader::Reference
|
80
94
|
token
|
81
95
|
elsif operators.has_key? token
|
@@ -95,6 +109,17 @@ class PDF::Reader
|
|
95
109
|
#
|
96
110
|
# id - the object ID to return
|
97
111
|
# gen - the object revision number to return
|
112
|
+
#: (Integer, Integer) -> (
|
113
|
+
#| PDF::Reader::Reference |
|
114
|
+
#| PDF::Reader::Token |
|
115
|
+
#| PDF::Reader::Stream |
|
116
|
+
#| Numeric |
|
117
|
+
#| String |
|
118
|
+
#| Symbol |
|
119
|
+
#| Array[untyped] |
|
120
|
+
#| Hash[untyped, untyped] |
|
121
|
+
#| nil
|
122
|
+
#| )
|
98
123
|
def object(id, gen)
|
99
124
|
idCheck = parse_token
|
100
125
|
|
@@ -120,6 +145,7 @@ class PDF::Reader
|
|
120
145
|
|
121
146
|
################################################################################
|
122
147
|
# reads a PDF dict from the buffer and converts it to a Ruby Hash.
|
148
|
+
#: () -> Hash[Symbol, untyped]
|
123
149
|
def dictionary
|
124
150
|
dict = {}
|
125
151
|
|
@@ -138,15 +164,25 @@ class PDF::Reader
|
|
138
164
|
end
|
139
165
|
################################################################################
|
140
166
|
# reads a PDF name from the buffer and converts it to a Ruby Symbol
|
167
|
+
#: () -> Symbol
|
141
168
|
def pdf_name
|
142
169
|
tok = @buffer.token
|
143
|
-
|
144
|
-
|
170
|
+
|
171
|
+
if tok.is_a?(String)
|
172
|
+
tok = tok.dup.gsub(/#([A-Fa-f0-9]{2})/) do |match|
|
173
|
+
res = match[1, 2]
|
174
|
+
res ? res.hex.chr : ""
|
175
|
+
end
|
176
|
+
tok.to_sym
|
177
|
+
elsif tok.is_a?(PDF::Reader::Reference)
|
178
|
+
raise MalformedPDFError, "unexpected reference"
|
179
|
+
else
|
180
|
+
raise MalformedPDFError, "unexpected nil PDF Name"
|
145
181
|
end
|
146
|
-
tok.to_sym
|
147
182
|
end
|
148
183
|
################################################################################
|
149
184
|
# reads a PDF array from the buffer and converts it to a Ruby Array.
|
185
|
+
#: () -> Array[untyped]
|
150
186
|
def array
|
151
187
|
a = []
|
152
188
|
|
@@ -161,6 +197,7 @@ class PDF::Reader
|
|
161
197
|
end
|
162
198
|
################################################################################
|
163
199
|
# Reads a PDF hex string from the buffer and converts it to a Ruby String
|
200
|
+
#: () -> String
|
164
201
|
def hex_string
|
165
202
|
str = "".dup
|
166
203
|
|
@@ -177,8 +214,11 @@ class PDF::Reader
|
|
177
214
|
end
|
178
215
|
################################################################################
|
179
216
|
# Reads a PDF String from the buffer and converts it to a Ruby String
|
217
|
+
#: () -> String
|
180
218
|
def string
|
181
219
|
str = @buffer.token
|
220
|
+
raise MalformedPDFError, "unexpected reference" if str.is_a?(PDF::Reader::Reference)
|
221
|
+
raise MalformedPDFError, "unexpected nil PDF String" if str.nil?
|
182
222
|
return "".dup.force_encoding("binary") if str == ")"
|
183
223
|
Error.assert_equal(parse_token, ")")
|
184
224
|
|
@@ -206,10 +246,11 @@ class PDF::Reader
|
|
206
246
|
"\\\n" => "",
|
207
247
|
"\\\r" => "",
|
208
248
|
"\\\r\n" => "",
|
209
|
-
}
|
249
|
+
} #: Hash[String, String]
|
210
250
|
|
211
251
|
################################################################################
|
212
252
|
# Decodes the contents of a PDF Stream and returns it as a Ruby String.
|
253
|
+
#: (Hash[Symbol, untyped]) -> PDF::Reader::Stream
|
213
254
|
def stream(dict)
|
214
255
|
raise MalformedPDFError, "PDF malformed, missing stream length" unless dict.has_key?(:Length)
|
215
256
|
if @objects
|
@@ -231,7 +272,7 @@ class PDF::Reader
|
|
231
272
|
# matter if it's missing, and other readers seems to handle its absence just fine
|
232
273
|
# Error.str_assert(parse_token, "endobj")
|
233
274
|
|
234
|
-
PDF::Reader::Stream.new(dict, data)
|
275
|
+
PDF::Reader::Stream.new(dict, data || "")
|
235
276
|
end
|
236
277
|
################################################################################
|
237
278
|
end
|
data/lib/pdf/reader/point.rb
CHANGED
@@ -10,12 +10,19 @@ module PDF
|
|
10
10
|
#
|
11
11
|
class Point
|
12
12
|
|
13
|
-
|
13
|
+
#: Numeric
|
14
|
+
attr_reader :x
|
14
15
|
|
16
|
+
#: Numeric
|
17
|
+
attr_reader :y
|
18
|
+
|
19
|
+
#: (Numeric, Numeric) -> void
|
15
20
|
def initialize(x, y)
|
16
|
-
@x
|
21
|
+
@x = x
|
22
|
+
@y = y
|
17
23
|
end
|
18
24
|
|
25
|
+
#: (PDF::Reader::Point) -> bool
|
19
26
|
def ==(other)
|
20
27
|
other.respond_to?(:x) && other.respond_to?(:y) && x == other.x && y == other.y
|
21
28
|
end
|
@@ -8,16 +8,12 @@ class PDF::Reader
|
|
8
8
|
#
|
9
9
|
class PrintReceiver
|
10
10
|
|
11
|
-
|
12
|
-
|
13
|
-
def initialize
|
14
|
-
@callbacks = []
|
15
|
-
end
|
16
|
-
|
11
|
+
#: (untyped) -> bool
|
17
12
|
def respond_to?(meth)
|
18
13
|
true
|
19
14
|
end
|
20
15
|
|
16
|
+
#: (Symbol, *untyped) -> void
|
21
17
|
def method_missing(methodname, *args)
|
22
18
|
puts "#{methodname} => #{args.inspect}"
|
23
19
|
end
|
@@ -12,6 +12,7 @@ class PDF::Reader
|
|
12
12
|
#
|
13
13
|
class Rc4SecurityHandler
|
14
14
|
|
15
|
+
#: (String) -> void
|
15
16
|
def initialize(key)
|
16
17
|
@encrypt_key = key
|
17
18
|
end
|
@@ -25,6 +26,7 @@ class PDF::Reader
|
|
25
26
|
# buf - a string to decrypt
|
26
27
|
# ref - a PDF::Reader::Reference for the object to decrypt
|
27
28
|
#
|
29
|
+
#: (String, PDF::Reader::Reference) -> String
|
28
30
|
def decrypt( buf, ref )
|
29
31
|
objKey = @encrypt_key.dup
|
30
32
|
(0..2).each { |e| objKey << (ref.id >> e*8 & 0xFF ) }
|
data/lib/pdf/reader/rectangle.rb
CHANGED
@@ -20,12 +20,28 @@ module PDF
|
|
20
20
|
#
|
21
21
|
class Rectangle
|
22
22
|
|
23
|
-
|
23
|
+
#: PDF::Reader::Point
|
24
|
+
attr_reader :bottom_left
|
24
25
|
|
26
|
+
#: PDF::Reader::Point
|
27
|
+
attr_reader :bottom_right
|
28
|
+
|
29
|
+
#: PDF::Reader::Point
|
30
|
+
attr_reader :top_left
|
31
|
+
|
32
|
+
#: PDF::Reader::Point
|
33
|
+
attr_reader :top_right
|
34
|
+
|
35
|
+
#: (Numeric, Numeric, Numeric, Numeric) -> void
|
25
36
|
def initialize(x1, y1, x2, y2)
|
37
|
+
@bottom_left = Point.new(0,0) #: PDF::Reader::Point
|
38
|
+
@bottom_right = Point.new(0,0) #: PDF::Reader::Point
|
39
|
+
@top_left = Point.new(0,0) #: PDF::Reader::Point
|
40
|
+
@top_right = Point.new(0,0) #: PDF::Reader::Point
|
26
41
|
set_corners(x1, y1, x2, y2)
|
27
42
|
end
|
28
43
|
|
44
|
+
#: (Array[Numeric]) -> PDF::Reader::Rectangle
|
29
45
|
def self.from_array(arr)
|
30
46
|
if arr.size != 4
|
31
47
|
raise ArgumentError, "Only 4-element Arrays can be converted to a Rectangle"
|
@@ -39,24 +55,29 @@ module PDF
|
|
39
55
|
)
|
40
56
|
end
|
41
57
|
|
58
|
+
#: (PDF::Reader::Rectangle) -> bool
|
42
59
|
def ==(other)
|
43
60
|
to_a == other.to_a
|
44
61
|
end
|
45
62
|
|
63
|
+
#: () -> Numeric
|
46
64
|
def height
|
47
65
|
top_right.y - bottom_right.y
|
48
66
|
end
|
49
67
|
|
68
|
+
#: () -> Numeric
|
50
69
|
def width
|
51
70
|
bottom_right.x - bottom_left.x
|
52
71
|
end
|
53
72
|
|
73
|
+
#: (PDF::Reader::Point) -> bool
|
54
74
|
def contains?(point)
|
55
75
|
point.x >= bottom_left.x && point.x <= top_right.x &&
|
56
76
|
point.y >= bottom_left.y && point.y <= top_right.y
|
57
77
|
end
|
58
78
|
|
59
79
|
# A pdf-style 4-number array
|
80
|
+
#: () -> Array[Numeric]
|
60
81
|
def to_a
|
61
82
|
[
|
62
83
|
bottom_left.x,
|
@@ -66,6 +87,7 @@ module PDF
|
|
66
87
|
]
|
67
88
|
end
|
68
89
|
|
90
|
+
#: (Integer) -> void
|
69
91
|
def apply_rotation(degrees)
|
70
92
|
return if degrees != 90 && degrees != 180 && degrees != 270
|
71
93
|
|
@@ -90,6 +112,7 @@ module PDF
|
|
90
112
|
|
91
113
|
private
|
92
114
|
|
115
|
+
#: (Numeric, Numeric, Numeric, Numeric) -> void
|
93
116
|
def set_corners(x1, y1, x2, y2)
|
94
117
|
@bottom_left = PDF::Reader::Point.new(
|
95
118
|
[x1, x2].min,
|