pdf-reader 2.2.0 → 2.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG +26 -0
- data/README.md +2 -2
- data/bin/pdf_callbacks +1 -1
- data/bin/pdf_text +1 -1
- data/lib/pdf/reader.rb +1 -2
- data/lib/pdf/reader/afm/Courier-Bold.afm +342 -342
- data/lib/pdf/reader/afm/Courier-BoldOblique.afm +342 -342
- data/lib/pdf/reader/afm/Courier-Oblique.afm +342 -342
- data/lib/pdf/reader/afm/Courier.afm +342 -342
- data/lib/pdf/reader/afm/Helvetica-Bold.afm +2827 -2827
- data/lib/pdf/reader/afm/Helvetica-BoldOblique.afm +2827 -2827
- data/lib/pdf/reader/afm/Helvetica-Oblique.afm +3051 -3051
- data/lib/pdf/reader/afm/Helvetica.afm +3051 -3051
- data/lib/pdf/reader/afm/MustRead.html +19 -0
- data/lib/pdf/reader/afm/Symbol.afm +213 -213
- data/lib/pdf/reader/afm/Times-Bold.afm +2588 -2588
- data/lib/pdf/reader/afm/Times-BoldItalic.afm +2384 -2384
- data/lib/pdf/reader/afm/Times-Italic.afm +2667 -2667
- data/lib/pdf/reader/afm/Times-Roman.afm +2419 -2419
- data/lib/pdf/reader/afm/ZapfDingbats.afm +225 -225
- data/lib/pdf/reader/buffer.rb +1 -1
- data/lib/pdf/reader/cmap.rb +21 -12
- data/lib/pdf/reader/encoding.rb +11 -9
- data/lib/pdf/reader/filter/flate.rb +27 -15
- data/lib/pdf/reader/font.rb +10 -2
- data/lib/pdf/reader/object_hash.rb +21 -10
- data/lib/pdf/reader/orientation_detector.rb +4 -4
- data/lib/pdf/reader/overlapping_runs_filter.rb +65 -0
- data/lib/pdf/reader/page.rb +28 -0
- data/lib/pdf/reader/page_layout.rb +9 -5
- data/lib/pdf/reader/page_state.rb +9 -1
- data/lib/pdf/reader/page_text_receiver.rb +4 -1
- data/lib/pdf/reader/text_run.rb +24 -0
- data/lib/pdf/reader/width_calculator/built_in.rb +17 -1
- data/lib/pdf/reader/xref.rb +7 -4
- metadata +22 -18
- data/lib/pdf/hash.rb +0 -20
data/lib/pdf/reader/cmap.rb
CHANGED
@@ -96,25 +96,34 @@ class PDF::Reader
|
|
96
96
|
Parser.new(buffer)
|
97
97
|
end
|
98
98
|
|
99
|
+
# The following includes some manual decoding of UTF-16BE strings into unicode codepoints. In
|
100
|
+
# theory we could replace all the UTF-16 code with something based on Ruby's encoding support:
|
101
|
+
#
|
102
|
+
# str.dup.force_encoding("utf-16be").encode!("utf-8").unpack("U*")
|
103
|
+
#
|
104
|
+
# However, some cmaps contain broken surrogate pairs and the ruby encoding support raises an
|
105
|
+
# exception when we try converting broken UTF-16 to UTF-8
|
106
|
+
#
|
99
107
|
def str_to_int(str)
|
100
108
|
return nil if str.nil? || str.size == 0
|
101
|
-
unpacked_string = if str.
|
109
|
+
unpacked_string = if str.bytesize == 1 # UTF-8
|
102
110
|
str.unpack("C*")
|
103
111
|
else # UTF-16
|
104
112
|
str.unpack("n*")
|
105
113
|
end
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
114
|
+
result = []
|
115
|
+
while unpacked_string.any? do
|
116
|
+
if unpacked_string.size >= 2 && unpacked_string[0] > 0xD800 && unpacked_string[0] < 0xDBFF
|
117
|
+
# this is a Unicode UTF-16 "Surrogate Pair" see Unicode Spec. Chapter 3.7
|
118
|
+
# lets convert to a UTF-32. (the high bit is between 0xD800-0xDBFF, the
|
119
|
+
# low bit is between 0xDC00-0xDFFF) for example: U+1D44E (U+D835 U+DC4E)
|
120
|
+
points = [unpacked_string.shift, unpacked_string.shift]
|
121
|
+
result << (points[0] - 0xD800) * 0x400 + (points[1] - 0xDC00) + 0x10000
|
122
|
+
else
|
123
|
+
result << unpacked_string.shift
|
124
|
+
end
|
117
125
|
end
|
126
|
+
result
|
118
127
|
end
|
119
128
|
|
120
129
|
def process_bfchar_instructions(instructions)
|
data/lib/pdf/reader/encoding.rb
CHANGED
@@ -40,20 +40,22 @@ class PDF::Reader
|
|
40
40
|
@mapping = default_mapping # maps from character codes to Unicode codepoints
|
41
41
|
@string_cache = {} # maps from character codes to UTF-8 strings.
|
42
42
|
|
43
|
-
if enc.kind_of?(Hash)
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
enc = enc.to_sym
|
43
|
+
@enc_name = if enc.kind_of?(Hash)
|
44
|
+
enc[:Encoding] || enc[:BaseEncoding]
|
45
|
+
elsif enc && enc.respond_to?(:to_sym)
|
46
|
+
enc.to_sym
|
48
47
|
else
|
49
|
-
|
48
|
+
:StandardEncoding
|
50
49
|
end
|
51
50
|
|
52
|
-
@
|
53
|
-
@
|
54
|
-
@map_file = get_mapping_file(enc)
|
51
|
+
@unpack = get_unpack(@enc_name)
|
52
|
+
@map_file = get_mapping_file(@enc_name)
|
55
53
|
|
56
54
|
load_mapping(@map_file) if @map_file
|
55
|
+
|
56
|
+
if enc.is_a?(Hash) && enc[:Differences]
|
57
|
+
self.differences = enc[:Differences]
|
58
|
+
end
|
57
59
|
end
|
58
60
|
|
59
61
|
# set the differences table for this encoding. should be an array in the following format:
|
@@ -8,6 +8,9 @@ class PDF::Reader
|
|
8
8
|
module Filter # :nodoc:
|
9
9
|
# implementation of the Flate (zlib) stream filter
|
10
10
|
class Flate
|
11
|
+
ZLIB_AUTO_DETECT_ZLIB_OR_GZIP = 47 # Zlib::MAX_WBITS + 32
|
12
|
+
ZLIB_RAW_DEFLATE = -15 # Zlib::MAX_WBITS * -1
|
13
|
+
|
11
14
|
def initialize(options = {})
|
12
15
|
@options = options
|
13
16
|
end
|
@@ -15,25 +18,34 @@ class PDF::Reader
|
|
15
18
|
################################################################################
|
16
19
|
# Decode the specified data with the Zlib compression algorithm
|
17
20
|
def filter(data)
|
18
|
-
deflated =
|
21
|
+
deflated = zlib_inflate(data) || zlib_inflate(data[0, data.bytesize-1])
|
22
|
+
|
23
|
+
if deflated.nil?
|
24
|
+
raise MalformedPDFError,
|
25
|
+
"Error while inflating a compressed stream (no suitable inflation algorithm found)"
|
26
|
+
end
|
27
|
+
Depredict.new(@options).filter(deflated)
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def zlib_inflate(data)
|
19
33
|
begin
|
20
|
-
|
34
|
+
return Zlib::Inflate.new(ZLIB_AUTO_DETECT_ZLIB_OR_GZIP).inflate(data)
|
21
35
|
rescue Zlib::DataError => e
|
22
36
|
# by default, Ruby's Zlib assumes the data it's inflating
|
23
|
-
# is RFC1951 deflated data, wrapped in a
|
24
|
-
#
|
25
|
-
#
|
26
|
-
#
|
27
|
-
# See
|
28
|
-
# - http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-talk/243545
|
29
|
-
# - http://www.gzip.org/zlib/zlib_faq.html#faq38
|
30
|
-
deflated = Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(data)
|
37
|
+
# is RFC1951 deflated data, wrapped in a RFC1950 zlib container. If that
|
38
|
+
# fails, swallow the exception and attempt to inflate the data as a raw
|
39
|
+
# RFC1951 stream.
|
31
40
|
end
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
41
|
+
|
42
|
+
begin
|
43
|
+
return Zlib::Inflate.new(ZLIB_RAW_DEFLATE).inflate(data)
|
44
|
+
rescue StandardError => e
|
45
|
+
# swallow this one too, so we can try some other fallback options
|
46
|
+
end
|
47
|
+
|
48
|
+
nil
|
37
49
|
end
|
38
50
|
end
|
39
51
|
end
|
data/lib/pdf/reader/font.rb
CHANGED
@@ -97,7 +97,13 @@ class PDF::Reader
|
|
97
97
|
elsif @subtype == :Type3
|
98
98
|
PDF::Reader::WidthCalculator::TypeOneOrThree.new(self)
|
99
99
|
elsif @subtype == :TrueType
|
100
|
-
|
100
|
+
if @font_descriptor
|
101
|
+
PDF::Reader::WidthCalculator::TrueType.new(self)
|
102
|
+
else
|
103
|
+
# A TrueType font that isn't embedded. Most readers look for a version on the
|
104
|
+
# local system and fallback to a substitute. For now, we go straight to a substitute
|
105
|
+
PDF::Reader::WidthCalculator::BuiltIn.new(self)
|
106
|
+
end
|
101
107
|
elsif @subtype == :CIDFontType0 || @subtype == :CIDFontType2
|
102
108
|
PDF::Reader::WidthCalculator::Composite.new(self)
|
103
109
|
else
|
@@ -125,7 +131,9 @@ class PDF::Reader
|
|
125
131
|
if obj[:ToUnicode]
|
126
132
|
# ToUnicode is optional for Type1 and Type3
|
127
133
|
stream = @ohash.object(obj[:ToUnicode])
|
128
|
-
|
134
|
+
if stream.is_a?(PDF::Reader::Stream)
|
135
|
+
@tounicode = PDF::Reader::CMap.new(stream.unfiltered_data)
|
136
|
+
end
|
129
137
|
end
|
130
138
|
end
|
131
139
|
|
@@ -78,16 +78,7 @@ class PDF::Reader
|
|
78
78
|
key = PDF::Reader::Reference.new(key.to_i, 0)
|
79
79
|
end
|
80
80
|
|
81
|
-
|
82
|
-
@cache[key]
|
83
|
-
elsif xref[key].is_a?(Integer)
|
84
|
-
buf = new_buffer(xref[key])
|
85
|
-
@cache[key] = decrypt(key, Parser.new(buf, self).object(key.id, key.gen))
|
86
|
-
elsif xref[key].is_a?(PDF::Reader::Reference)
|
87
|
-
container_key = xref[key]
|
88
|
-
object_streams[container_key] ||= PDF::Reader::ObjectStream.new(object(container_key))
|
89
|
-
@cache[key] = object_streams[container_key][key.id]
|
90
|
-
end
|
81
|
+
@cache[key] ||= fetch_object(key) || fetch_object_stream(key)
|
91
82
|
rescue InvalidObjectError
|
92
83
|
return default
|
93
84
|
end
|
@@ -254,6 +245,26 @@ class PDF::Reader
|
|
254
245
|
|
255
246
|
private
|
256
247
|
|
248
|
+
# parse a traditional object from the PDF, starting from the byte offset indicated
|
249
|
+
# in the xref table
|
250
|
+
#
|
251
|
+
def fetch_object(key)
|
252
|
+
if xref[key].is_a?(Integer)
|
253
|
+
buf = new_buffer(xref[key])
|
254
|
+
decrypt(key, Parser.new(buf, self).object(key.id, key.gen))
|
255
|
+
end
|
256
|
+
end
|
257
|
+
|
258
|
+
# parse a object that's embedded in an object stream in the PDF
|
259
|
+
#
|
260
|
+
def fetch_object_stream(key)
|
261
|
+
if xref[key].is_a?(PDF::Reader::Reference)
|
262
|
+
container_key = xref[key]
|
263
|
+
object_streams[container_key] ||= PDF::Reader::ObjectStream.new(object(container_key))
|
264
|
+
object_streams[container_key][key.id]
|
265
|
+
end
|
266
|
+
end
|
267
|
+
|
257
268
|
# Private implementation of deref!, which exists to ensure the `seen` argument
|
258
269
|
# isn't publicly available. It's used to avoid endless loops in the recursion, and
|
259
270
|
# doesn't need to be part of the public API.
|
@@ -22,12 +22,12 @@ class PDF::Reader
|
|
22
22
|
def detect_orientation
|
23
23
|
llx,lly,urx,ury = @attributes[:MediaBox]
|
24
24
|
rotation = @attributes[:Rotate].to_i
|
25
|
-
width = urx.to_i - llx.to_i
|
26
|
-
height = ury.to_i - lly.to_i
|
25
|
+
width = (urx.to_i - llx.to_i).abs
|
26
|
+
height = (ury.to_i - lly.to_i).abs
|
27
27
|
if width > height
|
28
|
-
|
28
|
+
(rotation % 180).zero? ? 'landscape' : 'portrait'
|
29
29
|
else
|
30
|
-
|
30
|
+
(rotation % 180).zero? ? 'portrait' : 'landscape'
|
31
31
|
end
|
32
32
|
end
|
33
33
|
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
class PDF::Reader
|
4
|
+
# remove duplicates from a collection of TextRun objects. This can be helpful when a PDF
|
5
|
+
# uses slightly offset overlapping characters to achieve a fake 'bold' effect.
|
6
|
+
class OverlappingRunsFilter
|
7
|
+
|
8
|
+
# This should be between 0 and 1. If TextRun B obscures this much of TextRun A (and they
|
9
|
+
# have identical characters) then one will be discarded
|
10
|
+
OVERLAPPING_THRESHOLD = 0.5
|
11
|
+
|
12
|
+
def self.exclude_redundant_runs(runs)
|
13
|
+
sweep_line_status = Array.new
|
14
|
+
event_point_schedule = Array.new
|
15
|
+
to_exclude = []
|
16
|
+
|
17
|
+
runs.each do |run|
|
18
|
+
event_point_schedule << EventPoint.new(run.x, run)
|
19
|
+
event_point_schedule << EventPoint.new(run.endx, run)
|
20
|
+
end
|
21
|
+
|
22
|
+
event_point_schedule.sort! { |a,b| a.x <=> b.x }
|
23
|
+
|
24
|
+
event_point_schedule.each do |event_point|
|
25
|
+
run = event_point.run
|
26
|
+
|
27
|
+
if event_point.start?
|
28
|
+
if detect_intersection(sweep_line_status, event_point)
|
29
|
+
to_exclude << run
|
30
|
+
end
|
31
|
+
sweep_line_status.push(run)
|
32
|
+
else
|
33
|
+
sweep_line_status.delete(run)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
runs - to_exclude
|
37
|
+
end
|
38
|
+
|
39
|
+
def self.detect_intersection(sweep_line_status, event_point)
|
40
|
+
sweep_line_status.each do |open_text_run|
|
41
|
+
if event_point.x >= open_text_run.x &&
|
42
|
+
event_point.x <= open_text_run.endx &&
|
43
|
+
open_text_run.intersection_area_percent(event_point.run) >= OVERLAPPING_THRESHOLD
|
44
|
+
return true
|
45
|
+
end
|
46
|
+
end
|
47
|
+
return false
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# Utility class used to avoid modifying the underlying TextRun objects while we're
|
52
|
+
# looking for duplicates
|
53
|
+
class EventPoint
|
54
|
+
attr_reader :x, :run
|
55
|
+
|
56
|
+
def initialize x, run
|
57
|
+
@x, @run = x, run
|
58
|
+
end
|
59
|
+
|
60
|
+
def start?
|
61
|
+
@x == @run.x
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|
data/lib/pdf/reader/page.rb
CHANGED
@@ -124,6 +124,34 @@ module PDF
|
|
124
124
|
}.join(" ")
|
125
125
|
end
|
126
126
|
|
127
|
+
# returns the angle to rotate the page clockwise. Always 0, 90, 180 or 270
|
128
|
+
#
|
129
|
+
def rotate
|
130
|
+
value = attributes[:Rotate].to_i
|
131
|
+
case value
|
132
|
+
when 0, 90, 180, 270
|
133
|
+
value
|
134
|
+
else
|
135
|
+
0
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
# returns the "boxes" that define the page object.
|
140
|
+
# values are defaulted according to section 7.7.3.3 of the PDF Spec 1.7
|
141
|
+
#
|
142
|
+
def boxes
|
143
|
+
mediabox = attributes[:MediaBox]
|
144
|
+
cropbox = attributes[:Cropbox] || mediabox
|
145
|
+
|
146
|
+
{
|
147
|
+
MediaBox: objects.deref!(mediabox),
|
148
|
+
CropBox: objects.deref!(cropbox),
|
149
|
+
BleedBox: objects.deref!(attributes[:BleedBox] || cropbox),
|
150
|
+
TrimBox: objects.deref!(attributes[:TrimBox] || cropbox),
|
151
|
+
ArtBox: objects.deref!(attributes[:ArtBox] || cropbox)
|
152
|
+
}
|
153
|
+
end
|
154
|
+
|
127
155
|
private
|
128
156
|
|
129
157
|
def root
|
@@ -1,6 +1,8 @@
|
|
1
1
|
# coding: utf-8
|
2
2
|
# frozen_string_literal: true
|
3
3
|
|
4
|
+
require 'pdf/reader/overlapping_runs_filter'
|
5
|
+
|
4
6
|
class PDF::Reader
|
5
7
|
|
6
8
|
# Takes a collection of TextRun objects and renders them into a single
|
@@ -15,13 +17,15 @@ class PDF::Reader
|
|
15
17
|
def initialize(runs, mediabox)
|
16
18
|
raise ArgumentError, "a mediabox must be provided" if mediabox.nil?
|
17
19
|
|
18
|
-
@runs = merge_runs(runs)
|
20
|
+
@runs = merge_runs(OverlappingRunsFilter.exclude_redundant_runs(runs))
|
19
21
|
@mean_font_size = mean(@runs.map(&:font_size)) || DEFAULT_FONT_SIZE
|
20
22
|
@mean_font_size = DEFAULT_FONT_SIZE if @mean_font_size == 0
|
21
23
|
@mean_glyph_width = mean(@runs.map(&:mean_character_width)) || 0
|
22
|
-
@page_width = mediabox[2] - mediabox[0]
|
23
|
-
@page_height = mediabox[3] - mediabox[1]
|
24
|
-
@x_offset = @runs.map(&:x).sort.first
|
24
|
+
@page_width = (mediabox[2] - mediabox[0]).abs
|
25
|
+
@page_height = (mediabox[3] - mediabox[1]).abs
|
26
|
+
@x_offset = @runs.map(&:x).sort.first || 0
|
27
|
+
lowest_y = @runs.map(&:y).sort.first || 0
|
28
|
+
@y_offset = lowest_y > 0 ? 0 : lowest_y
|
25
29
|
end
|
26
30
|
|
27
31
|
def to_s
|
@@ -30,7 +34,7 @@ class PDF::Reader
|
|
30
34
|
page = row_count.times.map { |i| " " * col_count }
|
31
35
|
@runs.each do |run|
|
32
36
|
x_pos = ((run.x - @x_offset) / col_multiplier).round
|
33
|
-
y_pos = row_count - (run.y / row_multiplier).round
|
37
|
+
y_pos = row_count - ((run.y - @y_offset) / row_multiplier).round
|
34
38
|
if y_pos <= row_count && y_pos >= 0 && x_pos <= col_count && x_pos >= 0
|
35
39
|
local_string_insert(page[y_pos-1], run.text, x_pos)
|
36
40
|
end
|
@@ -30,7 +30,15 @@ class PDF::Reader
|
|
30
30
|
@xobject_stack = [page.xobjects]
|
31
31
|
@cs_stack = [page.color_spaces]
|
32
32
|
@stack = [DEFAULT_GRAPHICS_STATE.dup]
|
33
|
-
|
33
|
+
if page.rotate == 0
|
34
|
+
state[:ctm] = identity_matrix
|
35
|
+
else
|
36
|
+
rotate_cos = Math.cos(page.rotate * (Math::PI/180.0)).round(2)
|
37
|
+
rotate_sin = Math.sin(page.rotate * (Math::PI/180.0)).round(2)
|
38
|
+
state[:ctm] = TransformationMatrix.new(rotate_cos, rotate_sin,
|
39
|
+
rotate_sin * -1, rotate_cos,
|
40
|
+
0, 0)
|
41
|
+
end
|
34
42
|
end
|
35
43
|
|
36
44
|
#####################################################
|
@@ -44,10 +44,13 @@ module PDF
|
|
44
44
|
@content = []
|
45
45
|
@characters = []
|
46
46
|
@mediabox = page.objects.deref(page.attributes[:MediaBox])
|
47
|
+
device_bl = @state.ctm_transform(@mediabox[0], @mediabox[1])
|
48
|
+
device_tr = @state.ctm_transform(@mediabox[2], @mediabox[3])
|
49
|
+
@device_mediabox = [ device_bl.first, device_bl.last, device_tr.first, device_tr.last]
|
47
50
|
end
|
48
51
|
|
49
52
|
def content
|
50
|
-
PageLayout.new(@characters, @
|
53
|
+
PageLayout.new(@characters, @device_mediabox).to_s
|
51
54
|
end
|
52
55
|
|
53
56
|
#####################################################
|
data/lib/pdf/reader/text_run.rb
CHANGED
@@ -38,6 +38,10 @@ class PDF::Reader
|
|
38
38
|
@endx ||= x + width
|
39
39
|
end
|
40
40
|
|
41
|
+
def endy
|
42
|
+
@endy ||= y + font_size
|
43
|
+
end
|
44
|
+
|
41
45
|
def mean_character_width
|
42
46
|
@width / character_count
|
43
47
|
end
|
@@ -60,8 +64,28 @@ class PDF::Reader
|
|
60
64
|
"#{text} w:#{width} f:#{font_size} @#{x},#{y}"
|
61
65
|
end
|
62
66
|
|
67
|
+
def intersect?(other_run)
|
68
|
+
x <= other_run.endx && endx >= other_run.x &&
|
69
|
+
endy >= other_run.y && y <= other_run.endy
|
70
|
+
end
|
71
|
+
|
72
|
+
# return what percentage of this text run is overlapped by another run
|
73
|
+
def intersection_area_percent(other_run)
|
74
|
+
return 0 unless intersect?(other_run)
|
75
|
+
|
76
|
+
dx = [endx, other_run.endx].min - [x, other_run.x].max
|
77
|
+
dy = [endy, other_run.endy].min - [y, other_run.y].max
|
78
|
+
intersection_area = dx*dy
|
79
|
+
|
80
|
+
intersection_area.to_f / area
|
81
|
+
end
|
82
|
+
|
63
83
|
private
|
64
84
|
|
85
|
+
def area
|
86
|
+
(endx - x) * (endy - y)
|
87
|
+
end
|
88
|
+
|
65
89
|
def mergable_range
|
66
90
|
@mergable_range ||= Range.new(endx - 3, endx + font_size)
|
67
91
|
end
|
@@ -12,11 +12,20 @@ class PDF::Reader
|
|
12
12
|
# see Section 9.6.2.2, PDF 32000-1:2008, pp 256
|
13
13
|
class BuiltIn
|
14
14
|
|
15
|
+
BUILTINS = [
|
16
|
+
:Courier, :"Courier-Bold", :"Courier-BoldOblique", :"Courier-Oblique",
|
17
|
+
:Helvetica, :"Helvetica-Bold", :"Helvetica-BoldOblique", :"Helvetica-Oblique",
|
18
|
+
:Symbol,
|
19
|
+
:"Times-Roman", :"Times-Bold", :"Times-BoldItalic", :"Times-Italic",
|
20
|
+
:ZapfDingbats
|
21
|
+
]
|
22
|
+
|
15
23
|
def initialize(font)
|
16
24
|
@font = font
|
17
25
|
@@all_metrics ||= PDF::Reader::SynchronizedCache.new
|
18
26
|
|
19
|
-
|
27
|
+
basefont = extract_basefont(font.basefont)
|
28
|
+
metrics_path = File.join(File.dirname(__FILE__), "..","afm","#{basefont}.afm")
|
20
29
|
|
21
30
|
if File.file?(metrics_path)
|
22
31
|
@metrics = @@all_metrics[metrics_path] ||= AFM::Font.new(metrics_path)
|
@@ -54,6 +63,13 @@ class PDF::Reader
|
|
54
63
|
@font.encoding.int_to_name(code_point).first.to_s[/\Acontrol..\Z/]
|
55
64
|
end
|
56
65
|
|
66
|
+
def extract_basefont(font_name)
|
67
|
+
if BUILTINS.include?(font_name)
|
68
|
+
font_name
|
69
|
+
else
|
70
|
+
"Times-Roman"
|
71
|
+
end
|
72
|
+
end
|
57
73
|
end
|
58
74
|
end
|
59
75
|
end
|