pdf-reader 1.4.1 → 2.0.0.beta1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG +8 -3
- data/{README.rdoc → README.md} +40 -23
- data/Rakefile +2 -2
- data/bin/pdf_object +4 -1
- data/lib/pdf/reader.rb +7 -112
- data/lib/pdf/reader/buffer.rb +2 -1
- data/lib/pdf/reader/cmap.rb +26 -24
- data/lib/pdf/reader/encoding.rb +4 -5
- data/lib/pdf/reader/filter.rb +1 -0
- data/lib/pdf/reader/filter/run_length.rb +1 -5
- data/lib/pdf/reader/font.rb +1 -11
- data/lib/pdf/reader/glyph_hash.rb +6 -2
- data/lib/pdf/reader/lzw.rb +1 -1
- data/lib/pdf/reader/object_hash.rb +35 -16
- data/lib/pdf/reader/page_layout.rb +6 -17
- data/lib/pdf/reader/pages_strategy.rb +1 -304
- data/lib/pdf/reader/parser.rb +6 -4
- data/lib/pdf/reader/standard_security_handler.rb +18 -14
- data/lib/pdf/reader/text_run.rb +3 -9
- metadata +14 -47
- data/bin/pdf_list_callbacks +0 -17
- data/lib/pdf/reader/abstract_strategy.rb +0 -81
- data/lib/pdf/reader/metadata_strategy.rb +0 -56
- data/lib/pdf/reader/text_receiver.rb +0 -265
data/lib/pdf/reader/encoding.rb
CHANGED
@@ -147,7 +147,7 @@ class PDF::Reader
|
|
147
147
|
ret = [
|
148
148
|
@mapping[glyph_code.to_i] || glyph_code.to_i
|
149
149
|
].pack("U*")
|
150
|
-
ret.force_encoding("UTF-8")
|
150
|
+
ret.force_encoding("UTF-8")
|
151
151
|
ret
|
152
152
|
end
|
153
153
|
|
@@ -158,13 +158,13 @@ class PDF::Reader
|
|
158
158
|
def little_boxes(times)
|
159
159
|
codepoints = [ PDF::Reader::Encoding::UNKNOWN_CHAR ] * times
|
160
160
|
ret = codepoints.pack("U*")
|
161
|
-
ret.force_encoding("UTF-8")
|
161
|
+
ret.force_encoding("UTF-8")
|
162
162
|
ret
|
163
163
|
end
|
164
164
|
|
165
165
|
def convert_to_utf8(str)
|
166
166
|
ret = str.unpack(unpack).map! { |c| @mapping[c] || c }.pack("U*")
|
167
|
-
ret.force_encoding("UTF-8")
|
167
|
+
ret.force_encoding("UTF-8")
|
168
168
|
ret
|
169
169
|
end
|
170
170
|
|
@@ -207,8 +207,7 @@ class PDF::Reader
|
|
207
207
|
end
|
208
208
|
|
209
209
|
def load_mapping(file)
|
210
|
-
|
211
|
-
File.open(file, mode) do |f|
|
210
|
+
File.open(file, "r:BINARY") do |f|
|
212
211
|
f.each do |l|
|
213
212
|
_m, single_byte, unicode = *l.match(/([0-9A-Za-z]+);([0-9A-F]{4})/)
|
214
213
|
@mapping["0x#{single_byte}".hex] = "0x#{unicode}".hex if single_byte
|
data/lib/pdf/reader/filter.rb
CHANGED
@@ -46,6 +46,7 @@ class PDF::Reader
|
|
46
46
|
when :CCITTFaxDecode then PDF::Reader::Filter::Null.new(options)
|
47
47
|
when :DCTDecode then PDF::Reader::Filter::Null.new(options)
|
48
48
|
when :FlateDecode then PDF::Reader::Filter::Flate.new(options)
|
49
|
+
when :Fl then PDF::Reader::Filter::Flate.new(options)
|
49
50
|
when :JBIG2Decode then PDF::Reader::Filter::Null.new(options)
|
50
51
|
when :JPXDecode then PDF::Reader::Filter::Null.new(options)
|
51
52
|
when :LZWDecode then PDF::Reader::Filter::Lzw.new(options)
|
data/lib/pdf/reader/font.rb
CHANGED
@@ -36,11 +36,7 @@ class PDF::Reader
|
|
36
36
|
attr_reader :widths, :first_char, :last_char, :basefont, :font_descriptor,
|
37
37
|
:cid_widths, :cid_default_width
|
38
38
|
|
39
|
-
def initialize(ohash
|
40
|
-
if ohash.nil? || obj.nil?
|
41
|
-
$stderr.puts "DEPREACTION WARNING - PDF::Reader::Font.new should be called with 2 args"
|
42
|
-
return
|
43
|
-
end
|
39
|
+
def initialize(ohash, obj)
|
44
40
|
@ohash = ohash
|
45
41
|
@tounicode = nil
|
46
42
|
|
@@ -52,12 +48,6 @@ class PDF::Reader
|
|
52
48
|
@encoding ||= PDF::Reader::Encoding.new(:StandardEncoding)
|
53
49
|
end
|
54
50
|
|
55
|
-
def basefont=(font)
|
56
|
-
$stderr.puts "Font#basefont= is deprecated and will be removed in the 2.0 release"
|
57
|
-
@encoding ||= default_encoding(font)
|
58
|
-
@basefont = font
|
59
|
-
end
|
60
|
-
|
61
51
|
def to_utf8(params)
|
62
52
|
if @tounicode
|
63
53
|
to_utf8_via_cmap(params)
|
@@ -48,6 +48,9 @@ class PDF::Reader
|
|
48
48
|
# h.name_to_unicode(:Euro)
|
49
49
|
# => 8364
|
50
50
|
#
|
51
|
+
# h.name_to_unicode(:X4A)
|
52
|
+
# => 74
|
53
|
+
#
|
51
54
|
# h.name_to_unicode(:G30)
|
52
55
|
# => 48
|
53
56
|
#
|
@@ -62,6 +65,8 @@ class PDF::Reader
|
|
62
65
|
|
63
66
|
if @by_name.has_key?(name)
|
64
67
|
@by_name[name]
|
68
|
+
elsif str.match(/\AX[0-9a-fA-F]{2,4}\Z/)
|
69
|
+
"0x#{str[1,4]}".hex
|
65
70
|
elsif str.match(/\Auni[A-F\d]{4}\Z/)
|
66
71
|
"0x#{str[3,4]}".hex
|
67
72
|
elsif str.match(/\Au[A-F\d]{4,6}\Z/)
|
@@ -102,8 +107,7 @@ class PDF::Reader
|
|
102
107
|
keyed_by_name = {}
|
103
108
|
keyed_by_codepoint = {}
|
104
109
|
|
105
|
-
|
106
|
-
File.open(File.dirname(__FILE__) + "/glyphlist.txt", mode) do |f|
|
110
|
+
File.open(File.dirname(__FILE__) + "/glyphlist.txt", "r:BINARY") do |f|
|
107
111
|
f.each do |l|
|
108
112
|
_m, name, code = *l.match(/([0-9A-Za-z]+);([0-9A-F]{4})/)
|
109
113
|
if name && code
|
data/lib/pdf/reader/lzw.rb
CHANGED
@@ -102,21 +102,7 @@ class PDF::Reader
|
|
102
102
|
# a PDF::Reader::Reference, the key is returned unchanged.
|
103
103
|
#
|
104
104
|
def deref!(key)
|
105
|
-
|
106
|
-
when Hash
|
107
|
-
{}.tap { |hash|
|
108
|
-
object.each do |k, value|
|
109
|
-
hash[k] = deref!(value)
|
110
|
-
end
|
111
|
-
}
|
112
|
-
when PDF::Reader::Stream
|
113
|
-
object.hash = deref!(object.hash)
|
114
|
-
object
|
115
|
-
when Array
|
116
|
-
object.map { |value| deref!(value) }
|
117
|
-
else
|
118
|
-
object
|
119
|
-
end
|
105
|
+
deref_internal!(key, {})
|
120
106
|
end
|
121
107
|
|
122
108
|
# Access an object from the PDF. key can be an int or a PDF::Reader::Reference
|
@@ -266,6 +252,39 @@ class PDF::Reader
|
|
266
252
|
|
267
253
|
private
|
268
254
|
|
255
|
+
# Private implementation of deref!, which exists to ensure the `seen` argument
|
256
|
+
# isn't publicly available. It's used to avoid endless loops in the recursion, and
|
257
|
+
# doesn't need to be part of the public API.
|
258
|
+
#
|
259
|
+
def deref_internal!(key, seen)
|
260
|
+
seen_key = key.is_a?(PDF::Reader::Reference) ? key : key.object_id
|
261
|
+
|
262
|
+
return seen[seen_key] if seen.key?(seen_key)
|
263
|
+
|
264
|
+
case object = deref(key)
|
265
|
+
when Hash
|
266
|
+
seen[seen_key] ||= {}
|
267
|
+
object.each do |k, value|
|
268
|
+
seen[seen_key][k] = deref_internal!(value, seen)
|
269
|
+
end
|
270
|
+
seen[seen_key]
|
271
|
+
when PDF::Reader::Stream
|
272
|
+
seen[seen_key] ||= PDF::Reader::Stream.new({}, object.data)
|
273
|
+
object.hash.each do |k,value|
|
274
|
+
seen[seen_key].hash[k] = deref_internal!(value, seen)
|
275
|
+
end
|
276
|
+
seen[seen_key]
|
277
|
+
when Array
|
278
|
+
seen[seen_key] ||= []
|
279
|
+
object.each do |value|
|
280
|
+
seen[seen_key] << deref_internal!(value, seen)
|
281
|
+
end
|
282
|
+
seen[seen_key]
|
283
|
+
else
|
284
|
+
object
|
285
|
+
end
|
286
|
+
end
|
287
|
+
|
269
288
|
def build_security_handler(opts = {})
|
270
289
|
return nil if trailer[:Encrypt].nil?
|
271
290
|
|
@@ -316,7 +335,7 @@ class PDF::Reader
|
|
316
335
|
|
317
336
|
if obj[:Type] == :Page
|
318
337
|
ref
|
319
|
-
elsif obj[:
|
338
|
+
elsif obj[:Kids]
|
320
339
|
deref(obj[:Kids]).map { |kid| get_page_objects(kid) }
|
321
340
|
end
|
322
341
|
end
|
@@ -8,17 +8,19 @@ class PDF::Reader
|
|
8
8
|
# media box should be a 4 number array that describes the dimensions of the
|
9
9
|
# page to be rendered as described by the page's MediaBox attribute
|
10
10
|
class PageLayout
|
11
|
+
|
12
|
+
DEFAULT_FONT_SIZE = 12
|
13
|
+
|
11
14
|
def initialize(runs, mediabox)
|
12
15
|
raise ArgumentError, "a mediabox must be provided" if mediabox.nil?
|
13
16
|
|
14
17
|
@runs = merge_runs(runs)
|
15
|
-
@mean_font_size = mean(@runs.map(&:font_size)) ||
|
18
|
+
@mean_font_size = mean(@runs.map(&:font_size)) || DEFAULT_FONT_SIZE
|
19
|
+
@mean_font_size = DEFAULT_FONT_SIZE if @mean_font_size == 0
|
16
20
|
@mean_glyph_width = mean(@runs.map(&:mean_character_width)) || 0
|
17
21
|
@page_width = mediabox[2] - mediabox[0]
|
18
22
|
@page_height = mediabox[3] - mediabox[1]
|
19
23
|
@x_offset = @runs.map(&:x).sort.first
|
20
|
-
@current_platform_is_rbx_19 = RUBY_DESCRIPTION =~ /\Arubinius 2.0.0/ &&
|
21
|
-
RUBY_VERSION >= "1.9.0"
|
22
24
|
end
|
23
25
|
|
24
26
|
def to_s
|
@@ -110,21 +112,8 @@ class PDF::Reader
|
|
110
112
|
runs
|
111
113
|
end
|
112
114
|
|
113
|
-
# This is a simple alternative to String#[]=. We can't use the string
|
114
|
-
# method as it's buggy on rubinius 2.0rc1 (in 1.9 mode)
|
115
|
-
#
|
116
|
-
# See my bug report at https://github.com/rubinius/rubinius/issues/1985
|
117
115
|
def local_string_insert(haystack, needle, index)
|
118
|
-
|
119
|
-
char_count = needle.length
|
120
|
-
haystack.replace(
|
121
|
-
(haystack[0,index] || "") +
|
122
|
-
needle +
|
123
|
-
(haystack[index+char_count,500] || "")
|
124
|
-
)
|
125
|
-
else
|
126
|
-
haystack[Range.new(index, index + needle.length - 1)] = String.new(needle)
|
127
|
-
end
|
116
|
+
haystack[Range.new(index, index + needle.length - 1)] = String.new(needle)
|
128
117
|
end
|
129
118
|
end
|
130
119
|
end
|
@@ -27,42 +27,8 @@
|
|
27
27
|
|
28
28
|
class PDF::Reader
|
29
29
|
################################################################################
|
30
|
-
# Walks the pages of the PDF file and calls the appropriate callback methods when
|
31
|
-
# something of interest is found.
|
32
|
-
#
|
33
|
-
# The callback methods should exist on the receiver object passed into the constructor.
|
34
|
-
# Whenever some content is found that will trigger a callback, the receiver is checked
|
35
|
-
# to see if the callback is defined.
|
36
|
-
#
|
37
|
-
# If it is defined it will be called. If not, processing will continue.
|
38
|
-
#
|
39
|
-
# = Available Callbacks
|
40
|
-
# The following callbacks are available and should be methods defined on your receiver class. Only
|
41
|
-
# implement the ones you need - the rest will be ignored.
|
42
|
-
#
|
43
|
-
# Some callbacks will include parameters which will be passed in as an array. For callbacks
|
44
|
-
# that supply no paramters, or where you don't need them, the *params argument can be left off.
|
45
|
-
# Some example callback method definitions are:
|
46
|
-
#
|
47
|
-
# def begin_document
|
48
|
-
# def end_page
|
49
|
-
# def show_text(string, *params)
|
50
|
-
# def fill_stroke(*params)
|
51
|
-
#
|
52
|
-
# You should be able to infer the basic command the callback is reporting based on the name. For
|
53
|
-
# further experimentation, define the callback with just a *params parameter, then print out the
|
54
|
-
# contents of the array using something like:
|
55
|
-
#
|
56
|
-
# puts params.inspect
|
57
|
-
#
|
58
30
|
# == Text Callbacks
|
59
31
|
#
|
60
|
-
# All text passed into these callbacks will be encoded as UTF-8. Depending on where (and when) the
|
61
|
-
# PDF was generated, there's a good chance the text is NOT stored as UTF-8 internally so be
|
62
|
-
# careful when doing a comparison on strings returned from PDF::Reader (when doing unit tests for
|
63
|
-
# example). The string may not be byte-by-byte identical with the string that was originally
|
64
|
-
# written to the PDF.
|
65
|
-
#
|
66
32
|
# - end_text_object
|
67
33
|
# - move_to_start_of_next_line
|
68
34
|
# - set_character_spacing
|
@@ -80,14 +46,6 @@ class PDF::Reader
|
|
80
46
|
# - move_to_next_line_and_show_text
|
81
47
|
# - set_spacing_next_line_show_text
|
82
48
|
#
|
83
|
-
# If the :raw_text option was passed to the PDF::Reader class the following callbacks
|
84
|
-
# may also appear:
|
85
|
-
#
|
86
|
-
# - show_text_raw
|
87
|
-
# - show_text_with_positioning_raw
|
88
|
-
# - move_to_next_line_and_show_text_raw
|
89
|
-
# - set_spacing_next_line_show_text_raw
|
90
|
-
#
|
91
49
|
# == Graphics Callbacks
|
92
50
|
# - close_fill_stroke
|
93
51
|
# - fill_stroke
|
@@ -145,42 +103,7 @@ class PDF::Reader
|
|
145
103
|
# - set_clipping_path_with_even_odd
|
146
104
|
# - append_curved_segment_final_point_replicated
|
147
105
|
#
|
148
|
-
|
149
|
-
# - begin_compatibility_section
|
150
|
-
# - end_compatibility_section,
|
151
|
-
# - begin_document
|
152
|
-
# - end_document
|
153
|
-
# - begin_page_container
|
154
|
-
# - end_page_container
|
155
|
-
# - begin_page
|
156
|
-
# - end_page
|
157
|
-
# - metadata
|
158
|
-
# - xml_metadata
|
159
|
-
# - page_count
|
160
|
-
# - begin_form_xobject
|
161
|
-
# - end_form_xobject
|
162
|
-
#
|
163
|
-
# == Resource Callbacks
|
164
|
-
#
|
165
|
-
# Each page can contain (or inherit) a range of resources required for the page,
|
166
|
-
# including things like fonts and images. The following callbacks may appear
|
167
|
-
# after begin_page if the relevant resources exist on a page:
|
168
|
-
#
|
169
|
-
# - resource_procset
|
170
|
-
# - resource_xobject
|
171
|
-
# - resource_extgstate
|
172
|
-
# - resource_colorspace
|
173
|
-
# - resource_pattern
|
174
|
-
# - resource_font
|
175
|
-
#
|
176
|
-
# In most cases, these callbacks associate a name with each resource, allowing it
|
177
|
-
# to be referred to by name in the page content. For example, an XObject can hold an image.
|
178
|
-
# If it gets mapped to the name "IM1", then it can be placed on the page using
|
179
|
-
# invoke_xobject "IM1".
|
180
|
-
#
|
181
|
-
# DEPRECATED: this class was deprecated in version 0.11.0 and will
|
182
|
-
# eventually be removed
|
183
|
-
class PagesStrategy< AbstractStrategy # :nodoc:
|
106
|
+
class PagesStrategy # :nodoc:
|
184
107
|
OPERATORS = {
|
185
108
|
'b' => :close_fill_stroke,
|
186
109
|
'B' => :fill_stroke,
|
@@ -256,232 +179,6 @@ class PDF::Reader
|
|
256
179
|
'\'' => :move_to_next_line_and_show_text,
|
257
180
|
'"' => :set_spacing_next_line_show_text,
|
258
181
|
}
|
259
|
-
def self.to_sym
|
260
|
-
:pages
|
261
|
-
end
|
262
|
-
################################################################################
|
263
|
-
# Begin processing the document
|
264
|
-
def process
|
265
|
-
return false unless options[:pages]
|
266
|
-
|
267
|
-
callback(:begin_document, [root])
|
268
|
-
walk_pages(@ohash.object(root[:Pages]))
|
269
|
-
callback(:end_document)
|
270
|
-
end
|
271
|
-
private
|
272
|
-
################################################################################
|
273
|
-
def params_to_utf8(params, font)
|
274
|
-
if params.is_a?(String)
|
275
|
-
font.to_utf8(params)
|
276
|
-
elsif params.is_a?(Array)
|
277
|
-
params.map { |i| params_to_utf8(i, font)}
|
278
|
-
else
|
279
|
-
params
|
280
|
-
end
|
281
|
-
end
|
282
|
-
################################################################################
|
283
|
-
# Walk over all pages in the PDF file, calling the appropriate callbacks for each page and all
|
284
|
-
# its content
|
285
|
-
def walk_pages(page)
|
286
|
-
|
287
|
-
# extract page content
|
288
|
-
if page[:Type] == :Pages
|
289
|
-
callback(:begin_page_container, [page])
|
290
|
-
res = @ohash.object(page[:Resources])
|
291
|
-
resources.push res if res
|
292
|
-
@ohash.object(page[:Kids]).each {|child| walk_pages(@ohash.object(child))}
|
293
|
-
resources.pop if res
|
294
|
-
callback(:end_page_container)
|
295
|
-
elsif page[:Type] == :Page
|
296
|
-
callback(:begin_page, [page])
|
297
|
-
res = @ohash.object(page[:Resources])
|
298
|
-
resources.push res if res
|
299
|
-
walk_resources(current_resources)
|
300
|
-
|
301
|
-
if @ohash.object(page[:Contents]).kind_of?(Array)
|
302
|
-
contents = @ohash.object(page[:Contents])
|
303
|
-
else
|
304
|
-
contents = [page[:Contents]]
|
305
|
-
end
|
306
|
-
|
307
|
-
fonts = font_hash_from_resources(current_resources)
|
308
|
-
|
309
|
-
if page.has_key?(:Contents) and page[:Contents]
|
310
|
-
direct_contents = contents.map { |content| @ohash.object(content) }
|
311
|
-
content_stream(direct_contents, fonts)
|
312
|
-
end
|
313
|
-
|
314
|
-
resources.pop if res
|
315
|
-
callback(:end_page)
|
316
|
-
end
|
317
|
-
end
|
318
|
-
################################################################################
|
319
|
-
# Retreive the XObject for the supplied label and if it's a Form, walk it
|
320
|
-
# like a regular page content stream.
|
321
|
-
#
|
322
|
-
def walk_xobject_form(label)
|
323
|
-
xobjects = @ohash.object(current_resources[:XObject]) || {}
|
324
|
-
xobject = @ohash.object(xobjects[label])
|
325
|
-
|
326
|
-
if xobject && xobject.hash[:Subtype] == :Form
|
327
|
-
callback(:begin_form_xobject)
|
328
|
-
xobj_resources = @ohash.object(xobject.hash[:Resources])
|
329
|
-
if xobj_resources
|
330
|
-
resources.push xobj_resources
|
331
|
-
walk_resources(xobj_resources)
|
332
|
-
end
|
333
|
-
fonts = font_hash_from_resources(xobj_resources)
|
334
|
-
content_stream(xobject, fonts)
|
335
|
-
callback(:end_form_xobject)
|
336
|
-
resources.pop if xobj_resources
|
337
|
-
end
|
338
|
-
end
|
339
|
-
|
340
|
-
################################################################################
|
341
|
-
# Return a merged hash of all resources that are current. Pages, page and xobject
|
342
|
-
#
|
343
|
-
def current_resources
|
344
|
-
hash = {}
|
345
|
-
resources.each do |res|
|
346
|
-
hash.merge!(res)
|
347
|
-
end
|
348
|
-
hash
|
349
|
-
end
|
350
|
-
################################################################################
|
351
|
-
# Reads a PDF content stream and calls all the appropriate callback methods for the operators
|
352
|
-
# it contains
|
353
|
-
#
|
354
|
-
def content_stream(instructions, fonts = {})
|
355
|
-
instructions = [instructions] unless instructions.kind_of?(Array)
|
356
|
-
instructions = instructions.map { |ins|
|
357
|
-
ins.is_a?(PDF::Reader::Stream) ? ins.unfiltered_data : ins.to_s
|
358
|
-
}.join
|
359
|
-
buffer = Buffer.new(StringIO.new(instructions), :content_stream => true)
|
360
|
-
parser = Parser.new(buffer, @ohash)
|
361
|
-
current_font = nil
|
362
|
-
params = []
|
363
|
-
|
364
|
-
while (token = parser.parse_token(OPERATORS))
|
365
|
-
if token.kind_of?(Token) and OPERATORS.has_key?(token)
|
366
|
-
if OPERATORS[token] == :set_text_font_and_size
|
367
|
-
current_font = params.first
|
368
|
-
if fonts[current_font].nil?
|
369
|
-
raise MalformedPDFError, "Unknown font #{current_font}"
|
370
|
-
end
|
371
|
-
end
|
372
|
-
|
373
|
-
# handle special cases in response to certain operators
|
374
|
-
if OPERATORS[token].to_s.include?("show_text")
|
375
|
-
# convert any text to utf-8, but output the raw string if the user wants it
|
376
|
-
if options[:raw_text]
|
377
|
-
callback("#{OPERATORS[token]}_raw".to_sym, params)
|
378
|
-
end
|
379
|
-
params = params_to_utf8(params, fonts[current_font])
|
380
|
-
elsif token == "ID"
|
381
|
-
# inline image data, first convert the current params into a more familiar hash
|
382
|
-
map = {}
|
383
|
-
params.each_slice(2) do |key, value|
|
384
|
-
map[key] = value
|
385
|
-
end
|
386
|
-
params = [map, buffer.token]
|
387
|
-
end
|
388
|
-
|
389
|
-
callback(OPERATORS[token], params)
|
390
|
-
|
391
|
-
if OPERATORS[token] == :invoke_xobject
|
392
|
-
xobject_label = params.first
|
393
|
-
params.clear
|
394
|
-
walk_xobject_form(xobject_label)
|
395
|
-
else
|
396
|
-
params.clear
|
397
|
-
end
|
398
|
-
else
|
399
|
-
params << token
|
400
|
-
end
|
401
|
-
end
|
402
|
-
rescue EOFError
|
403
|
-
raise MalformedPDFError, "End Of File while processing a content stream"
|
404
|
-
end
|
405
|
-
################################################################################
|
406
|
-
def walk_resources(resources)
|
407
|
-
return unless resources.respond_to?(:[])
|
408
|
-
|
409
|
-
resources = resolve_references(resources)
|
410
|
-
|
411
|
-
# extract any procset information
|
412
|
-
if resources[:ProcSet]
|
413
|
-
callback(:resource_procset, resources[:ProcSet])
|
414
|
-
end
|
415
|
-
|
416
|
-
# extract any xobject information
|
417
|
-
if resources[:XObject]
|
418
|
-
@ohash.object(resources[:XObject]).each do |name, val|
|
419
|
-
callback(:resource_xobject, [name, @ohash.object(val)])
|
420
|
-
end
|
421
|
-
end
|
422
|
-
|
423
|
-
# extract any extgstate information
|
424
|
-
if resources[:ExtGState]
|
425
|
-
@ohash.object(resources[:ExtGState]).each do |name, val|
|
426
|
-
callback(:resource_extgstate, [name, @ohash.object(val)])
|
427
|
-
end
|
428
|
-
end
|
429
|
-
|
430
|
-
# extract any colorspace information
|
431
|
-
if resources[:ColorSpace]
|
432
|
-
@ohash.object(resources[:ColorSpace]).each do |name, val|
|
433
|
-
callback(:resource_colorspace, [name, @ohash.object(val)])
|
434
|
-
end
|
435
|
-
end
|
436
|
-
|
437
|
-
# extract any pattern information
|
438
|
-
if resources[:Pattern]
|
439
|
-
@ohash.object(resources[:Pattern]).each do |name, val|
|
440
|
-
callback(:resource_pattern, [name, @ohash.object(val)])
|
441
|
-
end
|
442
|
-
end
|
443
|
-
|
444
|
-
# extract any font information
|
445
|
-
if resources[:Font]
|
446
|
-
fonts = font_hash_from_resources(resources)
|
447
|
-
fonts.each do |label, font|
|
448
|
-
callback(:resource_font, [label, font])
|
449
|
-
end
|
450
|
-
end
|
451
|
-
end
|
452
|
-
################################################################################
|
453
|
-
# Convert any PDF::Reader::Resource objects into a real object
|
454
|
-
def resolve_references(obj)
|
455
|
-
case obj
|
456
|
-
when PDF::Reader::Stream then
|
457
|
-
obj.hash = resolve_references(obj.hash)
|
458
|
-
obj
|
459
|
-
when PDF::Reader::Reference then
|
460
|
-
resolve_references(@ohash.object(obj))
|
461
|
-
when Hash then
|
462
|
-
arr = obj.map { |key,val| [key, resolve_references(val)] }.flatten(1)
|
463
|
-
Hash[*arr]
|
464
|
-
when Array then
|
465
|
-
obj.collect { |item| resolve_references(item) }
|
466
|
-
else
|
467
|
-
obj
|
468
|
-
end
|
469
|
-
end
|
470
|
-
################################################################################
|
471
|
-
################################################################################
|
472
|
-
def font_hash_from_resources(resources)
|
473
|
-
return {} unless resources.respond_to?(:[])
|
474
|
-
|
475
|
-
fonts = {}
|
476
|
-
resources = @ohash.object(resources[:Font]) || {}
|
477
|
-
resources.each do |label, desc|
|
478
|
-
fonts[label] = PDF::Reader::Font.new(@ohash, @ohash.object(desc))
|
479
|
-
end
|
480
|
-
fonts
|
481
|
-
end
|
482
|
-
def resources
|
483
|
-
@resources ||= []
|
484
|
-
end
|
485
182
|
end
|
486
183
|
################################################################################
|
487
184
|
end
|