pdf-reader 1.4.1 → 2.0.0.beta1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG +8 -3
- data/{README.rdoc → README.md} +40 -23
- data/Rakefile +2 -2
- data/bin/pdf_object +4 -1
- data/lib/pdf/reader.rb +7 -112
- data/lib/pdf/reader/buffer.rb +2 -1
- data/lib/pdf/reader/cmap.rb +26 -24
- data/lib/pdf/reader/encoding.rb +4 -5
- data/lib/pdf/reader/filter.rb +1 -0
- data/lib/pdf/reader/filter/run_length.rb +1 -5
- data/lib/pdf/reader/font.rb +1 -11
- data/lib/pdf/reader/glyph_hash.rb +6 -2
- data/lib/pdf/reader/lzw.rb +1 -1
- data/lib/pdf/reader/object_hash.rb +35 -16
- data/lib/pdf/reader/page_layout.rb +6 -17
- data/lib/pdf/reader/pages_strategy.rb +1 -304
- data/lib/pdf/reader/parser.rb +6 -4
- data/lib/pdf/reader/standard_security_handler.rb +18 -14
- data/lib/pdf/reader/text_run.rb +3 -9
- metadata +14 -47
- data/bin/pdf_list_callbacks +0 -17
- data/lib/pdf/reader/abstract_strategy.rb +0 -81
- data/lib/pdf/reader/metadata_strategy.rb +0 -56
- data/lib/pdf/reader/text_receiver.rb +0 -265
data/lib/pdf/reader/encoding.rb
CHANGED
@@ -147,7 +147,7 @@ class PDF::Reader
|
|
147
147
|
ret = [
|
148
148
|
@mapping[glyph_code.to_i] || glyph_code.to_i
|
149
149
|
].pack("U*")
|
150
|
-
ret.force_encoding("UTF-8")
|
150
|
+
ret.force_encoding("UTF-8")
|
151
151
|
ret
|
152
152
|
end
|
153
153
|
|
@@ -158,13 +158,13 @@ class PDF::Reader
|
|
158
158
|
def little_boxes(times)
|
159
159
|
codepoints = [ PDF::Reader::Encoding::UNKNOWN_CHAR ] * times
|
160
160
|
ret = codepoints.pack("U*")
|
161
|
-
ret.force_encoding("UTF-8")
|
161
|
+
ret.force_encoding("UTF-8")
|
162
162
|
ret
|
163
163
|
end
|
164
164
|
|
165
165
|
def convert_to_utf8(str)
|
166
166
|
ret = str.unpack(unpack).map! { |c| @mapping[c] || c }.pack("U*")
|
167
|
-
ret.force_encoding("UTF-8")
|
167
|
+
ret.force_encoding("UTF-8")
|
168
168
|
ret
|
169
169
|
end
|
170
170
|
|
@@ -207,8 +207,7 @@ class PDF::Reader
|
|
207
207
|
end
|
208
208
|
|
209
209
|
def load_mapping(file)
|
210
|
-
|
211
|
-
File.open(file, mode) do |f|
|
210
|
+
File.open(file, "r:BINARY") do |f|
|
212
211
|
f.each do |l|
|
213
212
|
_m, single_byte, unicode = *l.match(/([0-9A-Za-z]+);([0-9A-F]{4})/)
|
214
213
|
@mapping["0x#{single_byte}".hex] = "0x#{unicode}".hex if single_byte
|
data/lib/pdf/reader/filter.rb
CHANGED
@@ -46,6 +46,7 @@ class PDF::Reader
|
|
46
46
|
when :CCITTFaxDecode then PDF::Reader::Filter::Null.new(options)
|
47
47
|
when :DCTDecode then PDF::Reader::Filter::Null.new(options)
|
48
48
|
when :FlateDecode then PDF::Reader::Filter::Flate.new(options)
|
49
|
+
when :Fl then PDF::Reader::Filter::Flate.new(options)
|
49
50
|
when :JBIG2Decode then PDF::Reader::Filter::Null.new(options)
|
50
51
|
when :JPXDecode then PDF::Reader::Filter::Null.new(options)
|
51
52
|
when :LZWDecode then PDF::Reader::Filter::Lzw.new(options)
|
data/lib/pdf/reader/font.rb
CHANGED
@@ -36,11 +36,7 @@ class PDF::Reader
|
|
36
36
|
attr_reader :widths, :first_char, :last_char, :basefont, :font_descriptor,
|
37
37
|
:cid_widths, :cid_default_width
|
38
38
|
|
39
|
-
def initialize(ohash
|
40
|
-
if ohash.nil? || obj.nil?
|
41
|
-
$stderr.puts "DEPREACTION WARNING - PDF::Reader::Font.new should be called with 2 args"
|
42
|
-
return
|
43
|
-
end
|
39
|
+
def initialize(ohash, obj)
|
44
40
|
@ohash = ohash
|
45
41
|
@tounicode = nil
|
46
42
|
|
@@ -52,12 +48,6 @@ class PDF::Reader
|
|
52
48
|
@encoding ||= PDF::Reader::Encoding.new(:StandardEncoding)
|
53
49
|
end
|
54
50
|
|
55
|
-
def basefont=(font)
|
56
|
-
$stderr.puts "Font#basefont= is deprecated and will be removed in the 2.0 release"
|
57
|
-
@encoding ||= default_encoding(font)
|
58
|
-
@basefont = font
|
59
|
-
end
|
60
|
-
|
61
51
|
def to_utf8(params)
|
62
52
|
if @tounicode
|
63
53
|
to_utf8_via_cmap(params)
|
@@ -48,6 +48,9 @@ class PDF::Reader
|
|
48
48
|
# h.name_to_unicode(:Euro)
|
49
49
|
# => 8364
|
50
50
|
#
|
51
|
+
# h.name_to_unicode(:X4A)
|
52
|
+
# => 74
|
53
|
+
#
|
51
54
|
# h.name_to_unicode(:G30)
|
52
55
|
# => 48
|
53
56
|
#
|
@@ -62,6 +65,8 @@ class PDF::Reader
|
|
62
65
|
|
63
66
|
if @by_name.has_key?(name)
|
64
67
|
@by_name[name]
|
68
|
+
elsif str.match(/\AX[0-9a-fA-F]{2,4}\Z/)
|
69
|
+
"0x#{str[1,4]}".hex
|
65
70
|
elsif str.match(/\Auni[A-F\d]{4}\Z/)
|
66
71
|
"0x#{str[3,4]}".hex
|
67
72
|
elsif str.match(/\Au[A-F\d]{4,6}\Z/)
|
@@ -102,8 +107,7 @@ class PDF::Reader
|
|
102
107
|
keyed_by_name = {}
|
103
108
|
keyed_by_codepoint = {}
|
104
109
|
|
105
|
-
|
106
|
-
File.open(File.dirname(__FILE__) + "/glyphlist.txt", mode) do |f|
|
110
|
+
File.open(File.dirname(__FILE__) + "/glyphlist.txt", "r:BINARY") do |f|
|
107
111
|
f.each do |l|
|
108
112
|
_m, name, code = *l.match(/([0-9A-Za-z]+);([0-9A-F]{4})/)
|
109
113
|
if name && code
|
data/lib/pdf/reader/lzw.rb
CHANGED
@@ -102,21 +102,7 @@ class PDF::Reader
|
|
102
102
|
# a PDF::Reader::Reference, the key is returned unchanged.
|
103
103
|
#
|
104
104
|
def deref!(key)
|
105
|
-
|
106
|
-
when Hash
|
107
|
-
{}.tap { |hash|
|
108
|
-
object.each do |k, value|
|
109
|
-
hash[k] = deref!(value)
|
110
|
-
end
|
111
|
-
}
|
112
|
-
when PDF::Reader::Stream
|
113
|
-
object.hash = deref!(object.hash)
|
114
|
-
object
|
115
|
-
when Array
|
116
|
-
object.map { |value| deref!(value) }
|
117
|
-
else
|
118
|
-
object
|
119
|
-
end
|
105
|
+
deref_internal!(key, {})
|
120
106
|
end
|
121
107
|
|
122
108
|
# Access an object from the PDF. key can be an int or a PDF::Reader::Reference
|
@@ -266,6 +252,39 @@ class PDF::Reader
|
|
266
252
|
|
267
253
|
private
|
268
254
|
|
255
|
+
# Private implementation of deref!, which exists to ensure the `seen` argument
|
256
|
+
# isn't publicly available. It's used to avoid endless loops in the recursion, and
|
257
|
+
# doesn't need to be part of the public API.
|
258
|
+
#
|
259
|
+
def deref_internal!(key, seen)
|
260
|
+
seen_key = key.is_a?(PDF::Reader::Reference) ? key : key.object_id
|
261
|
+
|
262
|
+
return seen[seen_key] if seen.key?(seen_key)
|
263
|
+
|
264
|
+
case object = deref(key)
|
265
|
+
when Hash
|
266
|
+
seen[seen_key] ||= {}
|
267
|
+
object.each do |k, value|
|
268
|
+
seen[seen_key][k] = deref_internal!(value, seen)
|
269
|
+
end
|
270
|
+
seen[seen_key]
|
271
|
+
when PDF::Reader::Stream
|
272
|
+
seen[seen_key] ||= PDF::Reader::Stream.new({}, object.data)
|
273
|
+
object.hash.each do |k,value|
|
274
|
+
seen[seen_key].hash[k] = deref_internal!(value, seen)
|
275
|
+
end
|
276
|
+
seen[seen_key]
|
277
|
+
when Array
|
278
|
+
seen[seen_key] ||= []
|
279
|
+
object.each do |value|
|
280
|
+
seen[seen_key] << deref_internal!(value, seen)
|
281
|
+
end
|
282
|
+
seen[seen_key]
|
283
|
+
else
|
284
|
+
object
|
285
|
+
end
|
286
|
+
end
|
287
|
+
|
269
288
|
def build_security_handler(opts = {})
|
270
289
|
return nil if trailer[:Encrypt].nil?
|
271
290
|
|
@@ -316,7 +335,7 @@ class PDF::Reader
|
|
316
335
|
|
317
336
|
if obj[:Type] == :Page
|
318
337
|
ref
|
319
|
-
elsif obj[:
|
338
|
+
elsif obj[:Kids]
|
320
339
|
deref(obj[:Kids]).map { |kid| get_page_objects(kid) }
|
321
340
|
end
|
322
341
|
end
|
@@ -8,17 +8,19 @@ class PDF::Reader
|
|
8
8
|
# media box should be a 4 number array that describes the dimensions of the
|
9
9
|
# page to be rendered as described by the page's MediaBox attribute
|
10
10
|
class PageLayout
|
11
|
+
|
12
|
+
DEFAULT_FONT_SIZE = 12
|
13
|
+
|
11
14
|
def initialize(runs, mediabox)
|
12
15
|
raise ArgumentError, "a mediabox must be provided" if mediabox.nil?
|
13
16
|
|
14
17
|
@runs = merge_runs(runs)
|
15
|
-
@mean_font_size = mean(@runs.map(&:font_size)) ||
|
18
|
+
@mean_font_size = mean(@runs.map(&:font_size)) || DEFAULT_FONT_SIZE
|
19
|
+
@mean_font_size = DEFAULT_FONT_SIZE if @mean_font_size == 0
|
16
20
|
@mean_glyph_width = mean(@runs.map(&:mean_character_width)) || 0
|
17
21
|
@page_width = mediabox[2] - mediabox[0]
|
18
22
|
@page_height = mediabox[3] - mediabox[1]
|
19
23
|
@x_offset = @runs.map(&:x).sort.first
|
20
|
-
@current_platform_is_rbx_19 = RUBY_DESCRIPTION =~ /\Arubinius 2.0.0/ &&
|
21
|
-
RUBY_VERSION >= "1.9.0"
|
22
24
|
end
|
23
25
|
|
24
26
|
def to_s
|
@@ -110,21 +112,8 @@ class PDF::Reader
|
|
110
112
|
runs
|
111
113
|
end
|
112
114
|
|
113
|
-
# This is a simple alternative to String#[]=. We can't use the string
|
114
|
-
# method as it's buggy on rubinius 2.0rc1 (in 1.9 mode)
|
115
|
-
#
|
116
|
-
# See my bug report at https://github.com/rubinius/rubinius/issues/1985
|
117
115
|
def local_string_insert(haystack, needle, index)
|
118
|
-
|
119
|
-
char_count = needle.length
|
120
|
-
haystack.replace(
|
121
|
-
(haystack[0,index] || "") +
|
122
|
-
needle +
|
123
|
-
(haystack[index+char_count,500] || "")
|
124
|
-
)
|
125
|
-
else
|
126
|
-
haystack[Range.new(index, index + needle.length - 1)] = String.new(needle)
|
127
|
-
end
|
116
|
+
haystack[Range.new(index, index + needle.length - 1)] = String.new(needle)
|
128
117
|
end
|
129
118
|
end
|
130
119
|
end
|
@@ -27,42 +27,8 @@
|
|
27
27
|
|
28
28
|
class PDF::Reader
|
29
29
|
################################################################################
|
30
|
-
# Walks the pages of the PDF file and calls the appropriate callback methods when
|
31
|
-
# something of interest is found.
|
32
|
-
#
|
33
|
-
# The callback methods should exist on the receiver object passed into the constructor.
|
34
|
-
# Whenever some content is found that will trigger a callback, the receiver is checked
|
35
|
-
# to see if the callback is defined.
|
36
|
-
#
|
37
|
-
# If it is defined it will be called. If not, processing will continue.
|
38
|
-
#
|
39
|
-
# = Available Callbacks
|
40
|
-
# The following callbacks are available and should be methods defined on your receiver class. Only
|
41
|
-
# implement the ones you need - the rest will be ignored.
|
42
|
-
#
|
43
|
-
# Some callbacks will include parameters which will be passed in as an array. For callbacks
|
44
|
-
# that supply no paramters, or where you don't need them, the *params argument can be left off.
|
45
|
-
# Some example callback method definitions are:
|
46
|
-
#
|
47
|
-
# def begin_document
|
48
|
-
# def end_page
|
49
|
-
# def show_text(string, *params)
|
50
|
-
# def fill_stroke(*params)
|
51
|
-
#
|
52
|
-
# You should be able to infer the basic command the callback is reporting based on the name. For
|
53
|
-
# further experimentation, define the callback with just a *params parameter, then print out the
|
54
|
-
# contents of the array using something like:
|
55
|
-
#
|
56
|
-
# puts params.inspect
|
57
|
-
#
|
58
30
|
# == Text Callbacks
|
59
31
|
#
|
60
|
-
# All text passed into these callbacks will be encoded as UTF-8. Depending on where (and when) the
|
61
|
-
# PDF was generated, there's a good chance the text is NOT stored as UTF-8 internally so be
|
62
|
-
# careful when doing a comparison on strings returned from PDF::Reader (when doing unit tests for
|
63
|
-
# example). The string may not be byte-by-byte identical with the string that was originally
|
64
|
-
# written to the PDF.
|
65
|
-
#
|
66
32
|
# - end_text_object
|
67
33
|
# - move_to_start_of_next_line
|
68
34
|
# - set_character_spacing
|
@@ -80,14 +46,6 @@ class PDF::Reader
|
|
80
46
|
# - move_to_next_line_and_show_text
|
81
47
|
# - set_spacing_next_line_show_text
|
82
48
|
#
|
83
|
-
# If the :raw_text option was passed to the PDF::Reader class the following callbacks
|
84
|
-
# may also appear:
|
85
|
-
#
|
86
|
-
# - show_text_raw
|
87
|
-
# - show_text_with_positioning_raw
|
88
|
-
# - move_to_next_line_and_show_text_raw
|
89
|
-
# - set_spacing_next_line_show_text_raw
|
90
|
-
#
|
91
49
|
# == Graphics Callbacks
|
92
50
|
# - close_fill_stroke
|
93
51
|
# - fill_stroke
|
@@ -145,42 +103,7 @@ class PDF::Reader
|
|
145
103
|
# - set_clipping_path_with_even_odd
|
146
104
|
# - append_curved_segment_final_point_replicated
|
147
105
|
#
|
148
|
-
|
149
|
-
# - begin_compatibility_section
|
150
|
-
# - end_compatibility_section,
|
151
|
-
# - begin_document
|
152
|
-
# - end_document
|
153
|
-
# - begin_page_container
|
154
|
-
# - end_page_container
|
155
|
-
# - begin_page
|
156
|
-
# - end_page
|
157
|
-
# - metadata
|
158
|
-
# - xml_metadata
|
159
|
-
# - page_count
|
160
|
-
# - begin_form_xobject
|
161
|
-
# - end_form_xobject
|
162
|
-
#
|
163
|
-
# == Resource Callbacks
|
164
|
-
#
|
165
|
-
# Each page can contain (or inherit) a range of resources required for the page,
|
166
|
-
# including things like fonts and images. The following callbacks may appear
|
167
|
-
# after begin_page if the relevant resources exist on a page:
|
168
|
-
#
|
169
|
-
# - resource_procset
|
170
|
-
# - resource_xobject
|
171
|
-
# - resource_extgstate
|
172
|
-
# - resource_colorspace
|
173
|
-
# - resource_pattern
|
174
|
-
# - resource_font
|
175
|
-
#
|
176
|
-
# In most cases, these callbacks associate a name with each resource, allowing it
|
177
|
-
# to be referred to by name in the page content. For example, an XObject can hold an image.
|
178
|
-
# If it gets mapped to the name "IM1", then it can be placed on the page using
|
179
|
-
# invoke_xobject "IM1".
|
180
|
-
#
|
181
|
-
# DEPRECATED: this class was deprecated in version 0.11.0 and will
|
182
|
-
# eventually be removed
|
183
|
-
class PagesStrategy< AbstractStrategy # :nodoc:
|
106
|
+
class PagesStrategy # :nodoc:
|
184
107
|
OPERATORS = {
|
185
108
|
'b' => :close_fill_stroke,
|
186
109
|
'B' => :fill_stroke,
|
@@ -256,232 +179,6 @@ class PDF::Reader
|
|
256
179
|
'\'' => :move_to_next_line_and_show_text,
|
257
180
|
'"' => :set_spacing_next_line_show_text,
|
258
181
|
}
|
259
|
-
def self.to_sym
|
260
|
-
:pages
|
261
|
-
end
|
262
|
-
################################################################################
|
263
|
-
# Begin processing the document
|
264
|
-
def process
|
265
|
-
return false unless options[:pages]
|
266
|
-
|
267
|
-
callback(:begin_document, [root])
|
268
|
-
walk_pages(@ohash.object(root[:Pages]))
|
269
|
-
callback(:end_document)
|
270
|
-
end
|
271
|
-
private
|
272
|
-
################################################################################
|
273
|
-
def params_to_utf8(params, font)
|
274
|
-
if params.is_a?(String)
|
275
|
-
font.to_utf8(params)
|
276
|
-
elsif params.is_a?(Array)
|
277
|
-
params.map { |i| params_to_utf8(i, font)}
|
278
|
-
else
|
279
|
-
params
|
280
|
-
end
|
281
|
-
end
|
282
|
-
################################################################################
|
283
|
-
# Walk over all pages in the PDF file, calling the appropriate callbacks for each page and all
|
284
|
-
# its content
|
285
|
-
def walk_pages(page)
|
286
|
-
|
287
|
-
# extract page content
|
288
|
-
if page[:Type] == :Pages
|
289
|
-
callback(:begin_page_container, [page])
|
290
|
-
res = @ohash.object(page[:Resources])
|
291
|
-
resources.push res if res
|
292
|
-
@ohash.object(page[:Kids]).each {|child| walk_pages(@ohash.object(child))}
|
293
|
-
resources.pop if res
|
294
|
-
callback(:end_page_container)
|
295
|
-
elsif page[:Type] == :Page
|
296
|
-
callback(:begin_page, [page])
|
297
|
-
res = @ohash.object(page[:Resources])
|
298
|
-
resources.push res if res
|
299
|
-
walk_resources(current_resources)
|
300
|
-
|
301
|
-
if @ohash.object(page[:Contents]).kind_of?(Array)
|
302
|
-
contents = @ohash.object(page[:Contents])
|
303
|
-
else
|
304
|
-
contents = [page[:Contents]]
|
305
|
-
end
|
306
|
-
|
307
|
-
fonts = font_hash_from_resources(current_resources)
|
308
|
-
|
309
|
-
if page.has_key?(:Contents) and page[:Contents]
|
310
|
-
direct_contents = contents.map { |content| @ohash.object(content) }
|
311
|
-
content_stream(direct_contents, fonts)
|
312
|
-
end
|
313
|
-
|
314
|
-
resources.pop if res
|
315
|
-
callback(:end_page)
|
316
|
-
end
|
317
|
-
end
|
318
|
-
################################################################################
|
319
|
-
# Retreive the XObject for the supplied label and if it's a Form, walk it
|
320
|
-
# like a regular page content stream.
|
321
|
-
#
|
322
|
-
def walk_xobject_form(label)
|
323
|
-
xobjects = @ohash.object(current_resources[:XObject]) || {}
|
324
|
-
xobject = @ohash.object(xobjects[label])
|
325
|
-
|
326
|
-
if xobject && xobject.hash[:Subtype] == :Form
|
327
|
-
callback(:begin_form_xobject)
|
328
|
-
xobj_resources = @ohash.object(xobject.hash[:Resources])
|
329
|
-
if xobj_resources
|
330
|
-
resources.push xobj_resources
|
331
|
-
walk_resources(xobj_resources)
|
332
|
-
end
|
333
|
-
fonts = font_hash_from_resources(xobj_resources)
|
334
|
-
content_stream(xobject, fonts)
|
335
|
-
callback(:end_form_xobject)
|
336
|
-
resources.pop if xobj_resources
|
337
|
-
end
|
338
|
-
end
|
339
|
-
|
340
|
-
################################################################################
|
341
|
-
# Return a merged hash of all resources that are current. Pages, page and xobject
|
342
|
-
#
|
343
|
-
def current_resources
|
344
|
-
hash = {}
|
345
|
-
resources.each do |res|
|
346
|
-
hash.merge!(res)
|
347
|
-
end
|
348
|
-
hash
|
349
|
-
end
|
350
|
-
################################################################################
|
351
|
-
# Reads a PDF content stream and calls all the appropriate callback methods for the operators
|
352
|
-
# it contains
|
353
|
-
#
|
354
|
-
def content_stream(instructions, fonts = {})
|
355
|
-
instructions = [instructions] unless instructions.kind_of?(Array)
|
356
|
-
instructions = instructions.map { |ins|
|
357
|
-
ins.is_a?(PDF::Reader::Stream) ? ins.unfiltered_data : ins.to_s
|
358
|
-
}.join
|
359
|
-
buffer = Buffer.new(StringIO.new(instructions), :content_stream => true)
|
360
|
-
parser = Parser.new(buffer, @ohash)
|
361
|
-
current_font = nil
|
362
|
-
params = []
|
363
|
-
|
364
|
-
while (token = parser.parse_token(OPERATORS))
|
365
|
-
if token.kind_of?(Token) and OPERATORS.has_key?(token)
|
366
|
-
if OPERATORS[token] == :set_text_font_and_size
|
367
|
-
current_font = params.first
|
368
|
-
if fonts[current_font].nil?
|
369
|
-
raise MalformedPDFError, "Unknown font #{current_font}"
|
370
|
-
end
|
371
|
-
end
|
372
|
-
|
373
|
-
# handle special cases in response to certain operators
|
374
|
-
if OPERATORS[token].to_s.include?("show_text")
|
375
|
-
# convert any text to utf-8, but output the raw string if the user wants it
|
376
|
-
if options[:raw_text]
|
377
|
-
callback("#{OPERATORS[token]}_raw".to_sym, params)
|
378
|
-
end
|
379
|
-
params = params_to_utf8(params, fonts[current_font])
|
380
|
-
elsif token == "ID"
|
381
|
-
# inline image data, first convert the current params into a more familiar hash
|
382
|
-
map = {}
|
383
|
-
params.each_slice(2) do |key, value|
|
384
|
-
map[key] = value
|
385
|
-
end
|
386
|
-
params = [map, buffer.token]
|
387
|
-
end
|
388
|
-
|
389
|
-
callback(OPERATORS[token], params)
|
390
|
-
|
391
|
-
if OPERATORS[token] == :invoke_xobject
|
392
|
-
xobject_label = params.first
|
393
|
-
params.clear
|
394
|
-
walk_xobject_form(xobject_label)
|
395
|
-
else
|
396
|
-
params.clear
|
397
|
-
end
|
398
|
-
else
|
399
|
-
params << token
|
400
|
-
end
|
401
|
-
end
|
402
|
-
rescue EOFError
|
403
|
-
raise MalformedPDFError, "End Of File while processing a content stream"
|
404
|
-
end
|
405
|
-
################################################################################
|
406
|
-
def walk_resources(resources)
|
407
|
-
return unless resources.respond_to?(:[])
|
408
|
-
|
409
|
-
resources = resolve_references(resources)
|
410
|
-
|
411
|
-
# extract any procset information
|
412
|
-
if resources[:ProcSet]
|
413
|
-
callback(:resource_procset, resources[:ProcSet])
|
414
|
-
end
|
415
|
-
|
416
|
-
# extract any xobject information
|
417
|
-
if resources[:XObject]
|
418
|
-
@ohash.object(resources[:XObject]).each do |name, val|
|
419
|
-
callback(:resource_xobject, [name, @ohash.object(val)])
|
420
|
-
end
|
421
|
-
end
|
422
|
-
|
423
|
-
# extract any extgstate information
|
424
|
-
if resources[:ExtGState]
|
425
|
-
@ohash.object(resources[:ExtGState]).each do |name, val|
|
426
|
-
callback(:resource_extgstate, [name, @ohash.object(val)])
|
427
|
-
end
|
428
|
-
end
|
429
|
-
|
430
|
-
# extract any colorspace information
|
431
|
-
if resources[:ColorSpace]
|
432
|
-
@ohash.object(resources[:ColorSpace]).each do |name, val|
|
433
|
-
callback(:resource_colorspace, [name, @ohash.object(val)])
|
434
|
-
end
|
435
|
-
end
|
436
|
-
|
437
|
-
# extract any pattern information
|
438
|
-
if resources[:Pattern]
|
439
|
-
@ohash.object(resources[:Pattern]).each do |name, val|
|
440
|
-
callback(:resource_pattern, [name, @ohash.object(val)])
|
441
|
-
end
|
442
|
-
end
|
443
|
-
|
444
|
-
# extract any font information
|
445
|
-
if resources[:Font]
|
446
|
-
fonts = font_hash_from_resources(resources)
|
447
|
-
fonts.each do |label, font|
|
448
|
-
callback(:resource_font, [label, font])
|
449
|
-
end
|
450
|
-
end
|
451
|
-
end
|
452
|
-
################################################################################
|
453
|
-
# Convert any PDF::Reader::Resource objects into a real object
|
454
|
-
def resolve_references(obj)
|
455
|
-
case obj
|
456
|
-
when PDF::Reader::Stream then
|
457
|
-
obj.hash = resolve_references(obj.hash)
|
458
|
-
obj
|
459
|
-
when PDF::Reader::Reference then
|
460
|
-
resolve_references(@ohash.object(obj))
|
461
|
-
when Hash then
|
462
|
-
arr = obj.map { |key,val| [key, resolve_references(val)] }.flatten(1)
|
463
|
-
Hash[*arr]
|
464
|
-
when Array then
|
465
|
-
obj.collect { |item| resolve_references(item) }
|
466
|
-
else
|
467
|
-
obj
|
468
|
-
end
|
469
|
-
end
|
470
|
-
################################################################################
|
471
|
-
################################################################################
|
472
|
-
def font_hash_from_resources(resources)
|
473
|
-
return {} unless resources.respond_to?(:[])
|
474
|
-
|
475
|
-
fonts = {}
|
476
|
-
resources = @ohash.object(resources[:Font]) || {}
|
477
|
-
resources.each do |label, desc|
|
478
|
-
fonts[label] = PDF::Reader::Font.new(@ohash, @ohash.object(desc))
|
479
|
-
end
|
480
|
-
fonts
|
481
|
-
end
|
482
|
-
def resources
|
483
|
-
@resources ||= []
|
484
|
-
end
|
485
182
|
end
|
486
183
|
################################################################################
|
487
184
|
end
|