pdf-reader 1.1.1 → 2.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG +87 -2
- data/{README.rdoc → README.md} +43 -31
- data/Rakefile +21 -16
- data/bin/pdf_callbacks +1 -1
- data/bin/pdf_object +4 -1
- data/bin/pdf_text +1 -3
- data/examples/callbacks.rb +2 -1
- data/examples/extract_images.rb +11 -6
- data/examples/fuzzy_paragraphs.rb +24 -0
- data/lib/pdf/reader/afm/Courier-Bold.afm +342 -0
- data/lib/pdf/reader/afm/Courier-BoldOblique.afm +342 -0
- data/lib/pdf/reader/afm/Courier-Oblique.afm +342 -0
- data/lib/pdf/reader/afm/Courier.afm +342 -0
- data/lib/pdf/reader/afm/Helvetica-Bold.afm +2827 -0
- data/lib/pdf/reader/afm/Helvetica-BoldOblique.afm +2827 -0
- data/lib/pdf/reader/afm/Helvetica-Oblique.afm +3051 -0
- data/lib/pdf/reader/afm/Helvetica.afm +3051 -0
- data/lib/pdf/reader/afm/MustRead.html +19 -0
- data/lib/pdf/reader/afm/Symbol.afm +213 -0
- data/lib/pdf/reader/afm/Times-Bold.afm +2588 -0
- data/lib/pdf/reader/afm/Times-BoldItalic.afm +2384 -0
- data/lib/pdf/reader/afm/Times-Italic.afm +2667 -0
- data/lib/pdf/reader/afm/Times-Roman.afm +2419 -0
- data/lib/pdf/reader/afm/ZapfDingbats.afm +225 -0
- data/lib/pdf/reader/buffer.rb +90 -63
- data/lib/pdf/reader/cid_widths.rb +63 -0
- data/lib/pdf/reader/cmap.rb +69 -38
- data/lib/pdf/reader/encoding.rb +74 -48
- data/lib/pdf/reader/error.rb +24 -4
- data/lib/pdf/reader/filter/ascii85.rb +28 -0
- data/lib/pdf/reader/filter/ascii_hex.rb +30 -0
- data/lib/pdf/reader/filter/depredict.rb +141 -0
- data/lib/pdf/reader/filter/flate.rb +53 -0
- data/lib/pdf/reader/filter/lzw.rb +21 -0
- data/lib/pdf/reader/filter/null.rb +18 -0
- data/lib/pdf/reader/filter/run_length.rb +45 -0
- data/lib/pdf/reader/filter.rb +15 -234
- data/lib/pdf/reader/font.rb +107 -43
- data/lib/pdf/reader/font_descriptor.rb +80 -0
- data/lib/pdf/reader/form_xobject.rb +26 -4
- data/lib/pdf/reader/glyph_hash.rb +56 -18
- data/lib/pdf/reader/lzw.rb +6 -4
- data/lib/pdf/reader/null_security_handler.rb +17 -0
- data/lib/pdf/reader/object_cache.rb +40 -16
- data/lib/pdf/reader/object_hash.rb +94 -40
- data/lib/pdf/reader/object_stream.rb +1 -0
- data/lib/pdf/reader/orientation_detector.rb +34 -0
- data/lib/pdf/reader/overlapping_runs_filter.rb +65 -0
- data/lib/pdf/reader/page.rb +48 -3
- data/lib/pdf/reader/page_layout.rb +125 -0
- data/lib/pdf/reader/page_state.rb +185 -70
- data/lib/pdf/reader/page_text_receiver.rb +70 -20
- data/lib/pdf/reader/pages_strategy.rb +4 -293
- data/lib/pdf/reader/parser.rb +37 -61
- data/lib/pdf/reader/print_receiver.rb +6 -0
- data/lib/pdf/reader/reference.rb +4 -1
- data/lib/pdf/reader/register_receiver.rb +17 -31
- data/lib/pdf/reader/resource_methods.rb +1 -0
- data/lib/pdf/reader/standard_security_handler.rb +82 -42
- data/lib/pdf/reader/standard_security_handler_v5.rb +91 -0
- data/lib/pdf/reader/stream.rb +5 -2
- data/lib/pdf/reader/synchronized_cache.rb +33 -0
- data/lib/pdf/reader/text_run.rb +99 -0
- data/lib/pdf/reader/token.rb +4 -1
- data/lib/pdf/reader/transformation_matrix.rb +195 -0
- data/lib/pdf/reader/unimplemented_security_handler.rb +17 -0
- data/lib/pdf/reader/width_calculator/built_in.rb +67 -0
- data/lib/pdf/reader/width_calculator/composite.rb +28 -0
- data/lib/pdf/reader/width_calculator/true_type.rb +56 -0
- data/lib/pdf/reader/width_calculator/type_one_or_three.rb +33 -0
- data/lib/pdf/reader/width_calculator/type_zero.rb +25 -0
- data/lib/pdf/reader/width_calculator.rb +12 -0
- data/lib/pdf/reader/xref.rb +41 -9
- data/lib/pdf/reader.rb +45 -104
- data/lib/pdf-reader.rb +4 -1
- metadata +220 -101
- data/bin/pdf_list_callbacks +0 -17
- data/lib/pdf/hash.rb +0 -15
- data/lib/pdf/reader/abstract_strategy.rb +0 -81
- data/lib/pdf/reader/metadata_strategy.rb +0 -56
- data/lib/pdf/reader/text_receiver.rb +0 -264
@@ -1,13 +1,23 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
|
-
require 'matrix'
|
4
4
|
require 'forwardable'
|
5
|
+
require 'pdf/reader/page_layout'
|
5
6
|
|
6
7
|
module PDF
|
7
8
|
class Reader
|
9
|
+
|
10
|
+
# Builds a UTF-8 string of all the text on a single page by processing all
|
11
|
+
# the operaters in a content stream.
|
12
|
+
#
|
8
13
|
class PageTextReceiver
|
9
14
|
extend Forwardable
|
10
15
|
|
16
|
+
SPACE = " "
|
17
|
+
|
18
|
+
attr_reader :state, :options
|
19
|
+
|
20
|
+
########## BEGIN FORWARDERS ##########
|
11
21
|
# Graphics State Operators
|
12
22
|
def_delegators :@state, :save_graphics_state, :restore_graphics_state
|
13
23
|
|
@@ -26,41 +36,40 @@ module PDF
|
|
26
36
|
# Text Positioning Operators
|
27
37
|
def_delegators :@state, :move_text_position, :move_text_position_and_set_leading
|
28
38
|
def_delegators :@state, :set_text_matrix_and_text_line_matrix, :move_to_start_of_next_line
|
39
|
+
########## END FORWARDERS ##########
|
29
40
|
|
30
41
|
# starting a new page
|
31
42
|
def page=(page)
|
32
43
|
@state = PageState.new(page)
|
33
|
-
@
|
44
|
+
@page = page
|
45
|
+
@content = []
|
46
|
+
@characters = []
|
47
|
+
@mediabox = page.objects.deref(page.attributes[:MediaBox])
|
48
|
+
device_bl = @state.ctm_transform(@mediabox[0], @mediabox[1])
|
49
|
+
device_tr = @state.ctm_transform(@mediabox[2], @mediabox[3])
|
50
|
+
@device_mediabox = [ device_bl.first, device_bl.last, device_tr.first, device_tr.last]
|
34
51
|
end
|
35
52
|
|
36
53
|
def content
|
37
|
-
|
38
|
-
keys.map { |key|
|
39
|
-
@content[key]
|
40
|
-
}.join("\n")
|
54
|
+
PageLayout.new(@characters, @device_mediabox).to_s
|
41
55
|
end
|
42
56
|
|
43
57
|
#####################################################
|
44
58
|
# Text Showing Operators
|
45
59
|
#####################################################
|
46
|
-
|
47
60
|
# record text that is drawn on the page
|
48
|
-
def show_text(string) # Tj
|
49
|
-
|
50
|
-
newx, newy = @state.trm_transform(0,0)
|
51
|
-
@content[newy] ||= ""
|
52
|
-
@content[newy] << @state.current_font.to_utf8(string)
|
61
|
+
def show_text(string) # Tj (AWAY)
|
62
|
+
internal_show_text(string)
|
53
63
|
end
|
54
64
|
|
55
|
-
def show_text_with_positioning(params) # TJ
|
56
|
-
params.each
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
show_text(" ") if arg > 1000
|
65
|
+
def show_text_with_positioning(params) # TJ [(A) 120 (WA) 20 (Y)]
|
66
|
+
params.each do |arg|
|
67
|
+
if arg.is_a?(String)
|
68
|
+
internal_show_text(arg)
|
69
|
+
else
|
70
|
+
@state.process_glyph_displacement(0, arg, false)
|
62
71
|
end
|
63
|
-
|
72
|
+
end
|
64
73
|
end
|
65
74
|
|
66
75
|
def move_to_next_line_and_show_text(str) # '
|
@@ -86,6 +95,47 @@ module PDF
|
|
86
95
|
end
|
87
96
|
end
|
88
97
|
|
98
|
+
private
|
99
|
+
|
100
|
+
def internal_show_text(string)
|
101
|
+
if @state.current_font.nil?
|
102
|
+
raise PDF::Reader::MalformedPDFError, "current font is invalid"
|
103
|
+
end
|
104
|
+
glyphs = @state.current_font.unpack(string)
|
105
|
+
glyphs.each_with_index do |glyph_code, index|
|
106
|
+
# paint the current glyph
|
107
|
+
newx, newy = @state.trm_transform(0,0)
|
108
|
+
newx, newy = apply_rotation(newx, newy)
|
109
|
+
|
110
|
+
utf8_chars = @state.current_font.to_utf8(glyph_code)
|
111
|
+
|
112
|
+
# apply to glyph displacment for the current glyph so the next
|
113
|
+
# glyph will appear in the correct position
|
114
|
+
glyph_width = @state.current_font.glyph_width(glyph_code) / 1000.0
|
115
|
+
th = 1
|
116
|
+
scaled_glyph_width = glyph_width * @state.font_size * th
|
117
|
+
unless utf8_chars == SPACE
|
118
|
+
@characters << TextRun.new(newx, newy, scaled_glyph_width, @state.font_size, utf8_chars)
|
119
|
+
end
|
120
|
+
@state.process_glyph_displacement(glyph_width, 0, utf8_chars == SPACE)
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
def apply_rotation(x, y)
|
125
|
+
if @page.rotate == 90
|
126
|
+
tmp = x
|
127
|
+
x = y
|
128
|
+
y = tmp * -1
|
129
|
+
elsif @page.rotate == 180
|
130
|
+
y *= -1
|
131
|
+
elsif @page.rotate == 270
|
132
|
+
tmp = x
|
133
|
+
x = y * -1
|
134
|
+
y = tmp * -1
|
135
|
+
end
|
136
|
+
return x, y
|
137
|
+
end
|
138
|
+
|
89
139
|
end
|
90
140
|
end
|
91
141
|
end
|
@@ -1,3 +1,6 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
1
4
|
################################################################################
|
2
5
|
#
|
3
6
|
# Copyright (C) 2006 Peter J Jones (pjones@pmade.com)
|
@@ -25,41 +28,8 @@
|
|
25
28
|
|
26
29
|
class PDF::Reader
|
27
30
|
################################################################################
|
28
|
-
# Walks the pages of the PDF file and calls the appropriate callback methods when
|
29
|
-
# something of interest is found.
|
30
|
-
#
|
31
|
-
# The callback methods should exist on the receiver object passed into the constructor. Whenever
|
32
|
-
# some content is found that will trigger a callback, the receiver is checked to see if the callback
|
33
|
-
# is defined.
|
34
|
-
#
|
35
|
-
# If it is defined it will be called. If not, processing will continue.
|
36
|
-
#
|
37
|
-
# = Available Callbacks
|
38
|
-
# The following callbacks are available and should be methods defined on your receiver class. Only
|
39
|
-
# implement the ones you need - the rest will be ignored.
|
40
|
-
#
|
41
|
-
# Some callbacks will include parameters which will be passed in as an array. For callbacks that supply no
|
42
|
-
# paramters, or where you don't need them, the *params argument can be left off. Some example callback
|
43
|
-
# method definitions are:
|
44
|
-
#
|
45
|
-
# def begin_document
|
46
|
-
# def end_page
|
47
|
-
# def show_text(string, *params)
|
48
|
-
# def fill_stroke(*params)
|
49
|
-
#
|
50
|
-
# You should be able to infer the basic command the callback is reporting based on the name. For
|
51
|
-
# further experimentation, define the callback with just a *params parameter, then print out the
|
52
|
-
# contents of the array using something like:
|
53
|
-
#
|
54
|
-
# puts params.inspect
|
55
|
-
#
|
56
31
|
# == Text Callbacks
|
57
32
|
#
|
58
|
-
# All text passed into these callbacks will be encoded as UTF-8. Depending on where (and when) the
|
59
|
-
# PDF was generated, there's a good chance the text is NOT stored as UTF-8 internally so be careful
|
60
|
-
# when doing a comparison on strings returned from PDF::Reader (when doing unit tests for example). The
|
61
|
-
# string may not be byte-by-byte identical with the string that was originally written to the PDF.
|
62
|
-
#
|
63
33
|
# - end_text_object
|
64
34
|
# - move_to_start_of_next_line
|
65
35
|
# - set_character_spacing
|
@@ -77,14 +47,6 @@ class PDF::Reader
|
|
77
47
|
# - move_to_next_line_and_show_text
|
78
48
|
# - set_spacing_next_line_show_text
|
79
49
|
#
|
80
|
-
# If the :raw_text option was passed to the PDF::Reader class the following callbacks
|
81
|
-
# may also appear:
|
82
|
-
#
|
83
|
-
# - show_text_raw
|
84
|
-
# - show_text_with_positioning_raw
|
85
|
-
# - move_to_next_line_and_show_text_raw
|
86
|
-
# - set_spacing_next_line_show_text_raw
|
87
|
-
#
|
88
50
|
# == Graphics Callbacks
|
89
51
|
# - close_fill_stroke
|
90
52
|
# - fill_stroke
|
@@ -142,42 +104,7 @@ class PDF::Reader
|
|
142
104
|
# - set_clipping_path_with_even_odd
|
143
105
|
# - append_curved_segment_final_point_replicated
|
144
106
|
#
|
145
|
-
|
146
|
-
# - begin_compatibility_section
|
147
|
-
# - end_compatibility_section,
|
148
|
-
# - begin_document
|
149
|
-
# - end_document
|
150
|
-
# - begin_page_container
|
151
|
-
# - end_page_container
|
152
|
-
# - begin_page
|
153
|
-
# - end_page
|
154
|
-
# - metadata
|
155
|
-
# - xml_metadata
|
156
|
-
# - page_count
|
157
|
-
# - begin_form_xobject
|
158
|
-
# - end_form_xobject
|
159
|
-
#
|
160
|
-
# == Resource Callbacks
|
161
|
-
#
|
162
|
-
# Each page can contain (or inherit) a range of resources required for the page,
|
163
|
-
# including things like fonts and images. The following callbacks may appear
|
164
|
-
# after begin_page if the relevant resources exist on a page:
|
165
|
-
#
|
166
|
-
# - resource_procset
|
167
|
-
# - resource_xobject
|
168
|
-
# - resource_extgstate
|
169
|
-
# - resource_colorspace
|
170
|
-
# - resource_pattern
|
171
|
-
# - resource_font
|
172
|
-
#
|
173
|
-
# In most cases, these callbacks associate a name with each resource, allowing it
|
174
|
-
# to be referred to by name in the page content. For example, an XObject can hold an image.
|
175
|
-
# If it gets mapped to the name "IM1", then it can be placed on the page using
|
176
|
-
# invoke_xobject "IM1".
|
177
|
-
#
|
178
|
-
# DEPRECATED: this class was deprecated in version 0.11.0 and will
|
179
|
-
# eventually be removed
|
180
|
-
class PagesStrategy< AbstractStrategy # :nodoc:
|
107
|
+
class PagesStrategy # :nodoc:
|
181
108
|
OPERATORS = {
|
182
109
|
'b' => :close_fill_stroke,
|
183
110
|
'B' => :fill_stroke,
|
@@ -253,222 +180,6 @@ class PDF::Reader
|
|
253
180
|
'\'' => :move_to_next_line_and_show_text,
|
254
181
|
'"' => :set_spacing_next_line_show_text,
|
255
182
|
}
|
256
|
-
def self.to_sym
|
257
|
-
:pages
|
258
|
-
end
|
259
|
-
################################################################################
|
260
|
-
# Begin processing the document
|
261
|
-
def process
|
262
|
-
return false unless options[:pages]
|
263
|
-
|
264
|
-
callback(:begin_document, [root])
|
265
|
-
walk_pages(@ohash.object(root[:Pages]))
|
266
|
-
callback(:end_document)
|
267
|
-
end
|
268
|
-
private
|
269
|
-
################################################################################
|
270
|
-
# Walk over all pages in the PDF file, calling the appropriate callbacks for each page and all
|
271
|
-
# its content
|
272
|
-
def walk_pages (page)
|
273
|
-
|
274
|
-
# extract page content
|
275
|
-
if page[:Type] == :Pages
|
276
|
-
callback(:begin_page_container, [page])
|
277
|
-
res = @ohash.object(page[:Resources])
|
278
|
-
resources.push res if res
|
279
|
-
@ohash.object(page[:Kids]).each {|child| walk_pages(@ohash.object(child))}
|
280
|
-
resources.pop if res
|
281
|
-
callback(:end_page_container)
|
282
|
-
elsif page[:Type] == :Page
|
283
|
-
callback(:begin_page, [page])
|
284
|
-
res = @ohash.object(page[:Resources])
|
285
|
-
resources.push res if res
|
286
|
-
walk_resources(current_resources)
|
287
|
-
|
288
|
-
if @ohash.object(page[:Contents]).kind_of?(Array)
|
289
|
-
contents = @ohash.object(page[:Contents])
|
290
|
-
else
|
291
|
-
contents = [page[:Contents]]
|
292
|
-
end
|
293
|
-
|
294
|
-
fonts = font_hash_from_resources(current_resources)
|
295
|
-
|
296
|
-
if page.has_key?(:Contents) and page[:Contents]
|
297
|
-
direct_contents = contents.map { |content| @ohash.object(content) }
|
298
|
-
content_stream(direct_contents, fonts)
|
299
|
-
end
|
300
|
-
|
301
|
-
resources.pop if res
|
302
|
-
callback(:end_page)
|
303
|
-
end
|
304
|
-
end
|
305
|
-
################################################################################
|
306
|
-
# Retreive the XObject for the supplied label and if it's a Form, walk it
|
307
|
-
# like a regular page content stream.
|
308
|
-
#
|
309
|
-
def walk_xobject_form(label)
|
310
|
-
xobjects = @ohash.object(current_resources[:XObject]) || {}
|
311
|
-
xobject = @ohash.object(xobjects[label])
|
312
|
-
|
313
|
-
if xobject && xobject.hash[:Subtype] == :Form
|
314
|
-
callback(:begin_form_xobject)
|
315
|
-
xobj_resources = @ohash.object(xobject.hash[:Resources])
|
316
|
-
if xobj_resources
|
317
|
-
resources.push xobj_resources
|
318
|
-
walk_resources(xobj_resources)
|
319
|
-
end
|
320
|
-
fonts = font_hash_from_resources(xobj_resources)
|
321
|
-
content_stream(xobject, fonts)
|
322
|
-
callback(:end_form_xobject)
|
323
|
-
resources.pop if xobj_resources
|
324
|
-
end
|
325
|
-
end
|
326
|
-
|
327
|
-
################################################################################
|
328
|
-
# Return a merged hash of all resources that are current. Pages, page and xobject
|
329
|
-
#
|
330
|
-
def current_resources
|
331
|
-
hash = {}
|
332
|
-
resources.each do |res|
|
333
|
-
hash.merge!(res)
|
334
|
-
end
|
335
|
-
hash
|
336
|
-
end
|
337
|
-
################################################################################
|
338
|
-
# Reads a PDF content stream and calls all the appropriate callback methods for the operators
|
339
|
-
# it contains
|
340
|
-
#
|
341
|
-
def content_stream (instructions, fonts = {})
|
342
|
-
instructions = [instructions] unless instructions.kind_of?(Array)
|
343
|
-
instructions = instructions.map { |ins|
|
344
|
-
ins.is_a?(PDF::Reader::Stream) ? ins.unfiltered_data : ins.to_s
|
345
|
-
}.join
|
346
|
-
buffer = Buffer.new(StringIO.new(instructions), :content_stream => true)
|
347
|
-
parser = Parser.new(buffer, @ohash)
|
348
|
-
current_font = nil
|
349
|
-
params = []
|
350
|
-
|
351
|
-
while (token = parser.parse_token(OPERATORS))
|
352
|
-
if token.kind_of?(Token) and OPERATORS.has_key?(token)
|
353
|
-
if OPERATORS[token] == :set_text_font_and_size
|
354
|
-
current_font = params.first
|
355
|
-
if fonts[current_font].nil?
|
356
|
-
raise MalformedPDFError, "Unknown font #{current_font}"
|
357
|
-
end
|
358
|
-
end
|
359
|
-
|
360
|
-
# handle special cases in response to certain operators
|
361
|
-
if OPERATORS[token].to_s.include?("show_text")
|
362
|
-
# convert any text to utf-8, but output the raw string if the user wants it
|
363
|
-
if options[:raw_text]
|
364
|
-
callback("#{OPERATORS[token]}_raw".to_sym, params)
|
365
|
-
end
|
366
|
-
params = fonts[current_font].to_utf8(params)
|
367
|
-
elsif token == "ID"
|
368
|
-
# inline image data, first convert the current params into a more familiar hash
|
369
|
-
map = {}
|
370
|
-
params.each_slice(2) do |key, value|
|
371
|
-
map[key] = value
|
372
|
-
end
|
373
|
-
params = [map, buffer.token]
|
374
|
-
end
|
375
|
-
|
376
|
-
callback(OPERATORS[token], params)
|
377
|
-
|
378
|
-
if OPERATORS[token] == :invoke_xobject
|
379
|
-
xobject_label = params.first
|
380
|
-
params.clear
|
381
|
-
walk_xobject_form(xobject_label)
|
382
|
-
else
|
383
|
-
params.clear
|
384
|
-
end
|
385
|
-
else
|
386
|
-
params << token
|
387
|
-
end
|
388
|
-
end
|
389
|
-
rescue EOFError => e
|
390
|
-
raise MalformedPDFError, "End Of File while processing a content stream"
|
391
|
-
end
|
392
|
-
################################################################################
|
393
|
-
def walk_resources(resources)
|
394
|
-
return unless resources.respond_to?(:[])
|
395
|
-
|
396
|
-
resources = resolve_references(resources)
|
397
|
-
|
398
|
-
# extract any procset information
|
399
|
-
if resources[:ProcSet]
|
400
|
-
callback(:resource_procset, resources[:ProcSet])
|
401
|
-
end
|
402
|
-
|
403
|
-
# extract any xobject information
|
404
|
-
if resources[:XObject]
|
405
|
-
@ohash.object(resources[:XObject]).each do |name, val|
|
406
|
-
callback(:resource_xobject, [name, @ohash.object(val)])
|
407
|
-
end
|
408
|
-
end
|
409
|
-
|
410
|
-
# extract any extgstate information
|
411
|
-
if resources[:ExtGState]
|
412
|
-
@ohash.object(resources[:ExtGState]).each do |name, val|
|
413
|
-
callback(:resource_extgstate, [name, @ohash.object(val)])
|
414
|
-
end
|
415
|
-
end
|
416
|
-
|
417
|
-
# extract any colorspace information
|
418
|
-
if resources[:ColorSpace]
|
419
|
-
@ohash.object(resources[:ColorSpace]).each do |name, val|
|
420
|
-
callback(:resource_colorspace, [name, @ohash.object(val)])
|
421
|
-
end
|
422
|
-
end
|
423
|
-
|
424
|
-
# extract any pattern information
|
425
|
-
if resources[:Pattern]
|
426
|
-
@ohash.object(resources[:Pattern]).each do |name, val|
|
427
|
-
callback(:resource_pattern, [name, @ohash.object(val)])
|
428
|
-
end
|
429
|
-
end
|
430
|
-
|
431
|
-
# extract any font information
|
432
|
-
if resources[:Font]
|
433
|
-
fonts = font_hash_from_resources(resources)
|
434
|
-
fonts.each do |label, font|
|
435
|
-
callback(:resource_font, [label, font])
|
436
|
-
end
|
437
|
-
end
|
438
|
-
end
|
439
|
-
################################################################################
|
440
|
-
# Convert any PDF::Reader::Resource objects into a real object
|
441
|
-
def resolve_references(obj)
|
442
|
-
case obj
|
443
|
-
when PDF::Reader::Stream then
|
444
|
-
obj.hash = resolve_references(obj.hash)
|
445
|
-
obj
|
446
|
-
when PDF::Reader::Reference then
|
447
|
-
resolve_references(@ohash.object(obj))
|
448
|
-
when Hash then
|
449
|
-
arr = obj.map { |key,val| [key, resolve_references(val)] }.flatten(1)
|
450
|
-
Hash[*arr]
|
451
|
-
when Array then
|
452
|
-
obj.collect { |item| resolve_references(item) }
|
453
|
-
else
|
454
|
-
obj
|
455
|
-
end
|
456
|
-
end
|
457
|
-
################################################################################
|
458
|
-
################################################################################
|
459
|
-
def font_hash_from_resources(resources)
|
460
|
-
return {} unless resources.respond_to?(:[])
|
461
|
-
|
462
|
-
fonts = {}
|
463
|
-
resources = @ohash.object(resources[:Font]) || {}
|
464
|
-
resources.each do |label, desc|
|
465
|
-
fonts[label] = PDF::Reader::Font.new(@ohash, @ohash.object(desc))
|
466
|
-
end
|
467
|
-
fonts
|
468
|
-
end
|
469
|
-
def resources
|
470
|
-
@resources ||= []
|
471
|
-
end
|
472
183
|
end
|
473
184
|
################################################################################
|
474
185
|
end
|