pdf-reader 1.3.1 → 1.3.2
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +3 -0
- data/lib/pdf/reader/encoding.rb +9 -9
- data/lib/pdf/reader/glyph_hash.rb +37 -11
- data/lib/pdf/reader/width_calculator/built_in.rb +5 -3
- metadata +69 -69
data/CHANGELOG
CHANGED
data/lib/pdf/reader/encoding.rb
CHANGED
@@ -76,7 +76,7 @@ class PDF::Reader
|
|
76
76
|
byte = val.to_i
|
77
77
|
else
|
78
78
|
@differences[byte] = val
|
79
|
-
@mapping[byte] =
|
79
|
+
@mapping[byte] = glyphlist.name_to_unicode(val)
|
80
80
|
byte += 1
|
81
81
|
end
|
82
82
|
end
|
@@ -116,15 +116,15 @@ class PDF::Reader
|
|
116
116
|
# int_to_name(65)
|
117
117
|
# => :A
|
118
118
|
#
|
119
|
-
# TODO: this needs to be expanded to return the appropriate name for standard
|
120
|
-
# glyph codes in the encoding. 65 to :A, etc. At the moment it only
|
121
|
-
# handles glyphs in the difference table
|
122
|
-
#
|
123
119
|
def int_to_name(glyph_code)
|
124
120
|
if @enc_name == "Identity-H" || @enc_name == "Identity-V"
|
125
|
-
|
121
|
+
[]
|
122
|
+
elsif differences[glyph_code]
|
123
|
+
[differences[glyph_code]]
|
124
|
+
elsif @mapping[glyph_code]
|
125
|
+
glyphlist.unicode_to_name(@mapping[glyph_code])
|
126
126
|
else
|
127
|
-
|
127
|
+
[]
|
128
128
|
end
|
129
129
|
end
|
130
130
|
|
@@ -189,8 +189,8 @@ class PDF::Reader
|
|
189
189
|
@mapping.size > 0
|
190
190
|
end
|
191
191
|
|
192
|
-
def
|
193
|
-
@
|
192
|
+
def glyphlist
|
193
|
+
@glyphlist ||= PDF::Reader::GlyphHash.new
|
194
194
|
end
|
195
195
|
|
196
196
|
def load_mapping(file)
|
@@ -32,7 +32,9 @@ class PDF::Reader
|
|
32
32
|
class GlyphHash # :nodoc:
|
33
33
|
def initialize
|
34
34
|
# only parse the glyph list once, and cache the results (for performance)
|
35
|
-
|
35
|
+
adobe = @@cache ||= load_adobe_glyph_mapping
|
36
|
+
@by_name = adobe.first
|
37
|
+
@by_codepoint = adobe.last
|
36
38
|
end
|
37
39
|
|
38
40
|
# attempt to convert a PDF Name to a unicode codepoint. Returns nil
|
@@ -40,26 +42,26 @@ class PDF::Reader
|
|
40
42
|
#
|
41
43
|
# h = GlyphHash.new
|
42
44
|
#
|
43
|
-
# h
|
45
|
+
# h.name_to_unicode(:A)
|
44
46
|
# => 65
|
45
47
|
#
|
46
|
-
# h
|
48
|
+
# h.name_to_unicode(:Euro)
|
47
49
|
# => 8364
|
48
50
|
#
|
49
|
-
# h
|
51
|
+
# h.name_to_unicode(:G30)
|
50
52
|
# => 48
|
51
53
|
#
|
52
|
-
# h
|
54
|
+
# h.name_to_unicode(:34)
|
53
55
|
# => 34
|
54
56
|
#
|
55
|
-
def
|
57
|
+
def name_to_unicode(name)
|
56
58
|
return nil unless name.is_a?(Symbol)
|
57
59
|
|
58
60
|
name = name.to_s.gsub('_', '').intern
|
59
61
|
str = name.to_s
|
60
62
|
|
61
|
-
if @
|
62
|
-
@
|
63
|
+
if @by_name.has_key?(name)
|
64
|
+
@by_name[name]
|
63
65
|
elsif str.match(/\Auni[A-F\d]{4}\Z/)
|
64
66
|
"0x#{str[3,4]}".hex
|
65
67
|
elsif str.match(/\Au[A-F\d]{4,6}\Z/)
|
@@ -73,23 +75,47 @@ class PDF::Reader
|
|
73
75
|
end
|
74
76
|
end
|
75
77
|
|
78
|
+
# attempt to convert a Unicode code point to the equivilant PDF Name. Returns nil
|
79
|
+
# if no conversion is possible.
|
80
|
+
#
|
81
|
+
# h = GlyphHash.new
|
82
|
+
#
|
83
|
+
# h.unicode_to_name(65)
|
84
|
+
# => :A
|
85
|
+
#
|
86
|
+
# h.unicode_to_name(8364)
|
87
|
+
# => :Euro
|
88
|
+
#
|
89
|
+
# h.unicode_to_name(34)
|
90
|
+
# => :34
|
91
|
+
#
|
92
|
+
def unicode_to_name(codepoint)
|
93
|
+
@by_codepoint[codepoint.to_i]
|
94
|
+
end
|
95
|
+
|
76
96
|
private
|
77
97
|
|
78
98
|
# returns a hash that maps glyph names to unicode codepoints. The mapping is based on
|
79
99
|
# a text file supplied by Adobe at:
|
80
100
|
# http://www.adobe.com/devnet/opentype/archives/glyphlist.txt
|
81
101
|
def load_adobe_glyph_mapping
|
82
|
-
|
102
|
+
keyed_by_name = {}
|
103
|
+
keyed_by_codepoint = {}
|
83
104
|
|
84
105
|
RUBY_VERSION >= "1.9" ? mode = "r:BINARY" : mode = "r"
|
85
106
|
File.open(File.dirname(__FILE__) + "/glyphlist.txt", mode) do |f|
|
86
107
|
f.each do |l|
|
87
108
|
m, name, code = *l.match(/([0-9A-Za-z]+);([0-9A-F]{4})/)
|
88
|
-
|
109
|
+
if name && code
|
110
|
+
cp = "0x#{code}".hex
|
111
|
+
keyed_by_name[name.to_sym] = cp
|
112
|
+
keyed_by_codepoint[cp] ||= []
|
113
|
+
keyed_by_codepoint[cp] << name.to_sym
|
114
|
+
end
|
89
115
|
end
|
90
116
|
end
|
91
117
|
|
92
|
-
|
118
|
+
[keyed_by_name.freeze, keyed_by_codepoint.freeze]
|
93
119
|
end
|
94
120
|
|
95
121
|
end
|
@@ -39,8 +39,10 @@ class PDF::Reader
|
|
39
39
|
|
40
40
|
m = @metrics.metrics_for(code_point)
|
41
41
|
if m.nil?
|
42
|
-
|
43
|
-
m =
|
42
|
+
names = @font.encoding.int_to_name(code_point)
|
43
|
+
m = names.map { |name|
|
44
|
+
@metrics.metrics_for_name(name)
|
45
|
+
}.compact.first
|
44
46
|
end
|
45
47
|
|
46
48
|
if m
|
@@ -48,7 +50,7 @@ class PDF::Reader
|
|
48
50
|
elsif @font.widths[code_point - 1]
|
49
51
|
@font.widths[code_point - 1]
|
50
52
|
else
|
51
|
-
raise ArgumentError, "Unknown glyph width for #{
|
53
|
+
raise ArgumentError, "Unknown glyph width for #{code_point}"
|
52
54
|
end
|
53
55
|
end
|
54
56
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdf-reader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.3.
|
4
|
+
version: 1.3.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-02-
|
12
|
+
date: 2013-02-26 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -219,88 +219,88 @@ extra_rdoc_files:
|
|
219
219
|
- CHANGELOG
|
220
220
|
- MIT-LICENSE
|
221
221
|
files:
|
222
|
-
- examples/extract_fonts.rb
|
223
|
-
- examples/hash.rb
|
224
|
-
- examples/extract_bates.rb
|
225
222
|
- examples/metadata.rb
|
226
|
-
- examples/
|
223
|
+
- examples/callbacks.rb
|
227
224
|
- examples/rspec.rb
|
228
225
|
- examples/page_count.rb
|
229
|
-
- examples/callbacks.rb
|
230
|
-
- examples/text.rb
|
231
226
|
- examples/version.rb
|
232
|
-
-
|
233
|
-
-
|
234
|
-
-
|
235
|
-
-
|
236
|
-
-
|
237
|
-
- lib/pdf
|
238
|
-
- lib/pdf/reader/
|
239
|
-
- lib/pdf/reader/
|
240
|
-
- lib/pdf/reader/
|
241
|
-
- lib/pdf/reader/
|
242
|
-
- lib/pdf/reader/
|
243
|
-
- lib/pdf/reader/page_layout.rb
|
244
|
-
- lib/pdf/reader/font.rb
|
245
|
-
- lib/pdf/reader/print_receiver.rb
|
246
|
-
- lib/pdf/reader/lzw.rb
|
227
|
+
- examples/hash.rb
|
228
|
+
- examples/extract_fonts.rb
|
229
|
+
- examples/text.rb
|
230
|
+
- examples/extract_bates.rb
|
231
|
+
- examples/extract_images.rb
|
232
|
+
- lib/pdf-reader.rb
|
233
|
+
- lib/pdf/reader/error.rb
|
234
|
+
- lib/pdf/reader/filter.rb
|
235
|
+
- lib/pdf/reader/reference.rb
|
236
|
+
- lib/pdf/reader/form_xobject.rb
|
237
|
+
- lib/pdf/reader/metadata_strategy.rb
|
247
238
|
- lib/pdf/reader/buffer.rb
|
248
|
-
- lib/pdf/reader/
|
249
|
-
- lib/pdf/reader/object_stream.rb
|
250
|
-
- lib/pdf/reader/cmap.rb
|
251
|
-
- lib/pdf/reader/text_receiver.rb
|
252
|
-
- lib/pdf/reader/register_receiver.rb
|
253
|
-
- lib/pdf/reader/cid_widths.rb
|
254
|
-
- lib/pdf/reader/page_text_receiver.rb
|
239
|
+
- lib/pdf/reader/parser.rb
|
255
240
|
- lib/pdf/reader/encodings/mac_roman.txt
|
256
|
-
- lib/pdf/reader/encodings/zapf_dingbats.txt
|
257
|
-
- lib/pdf/reader/encodings/symbol.txt
|
258
|
-
- lib/pdf/reader/encodings/win_ansi.txt
|
259
|
-
- lib/pdf/reader/encodings/mac_expert.txt
|
260
241
|
- lib/pdf/reader/encodings/standard.txt
|
242
|
+
- lib/pdf/reader/encodings/symbol.txt
|
261
243
|
- lib/pdf/reader/encodings/pdf_doc.txt
|
262
|
-
- lib/pdf/reader/
|
263
|
-
- lib/pdf/reader/
|
264
|
-
- lib/pdf/reader/
|
265
|
-
- lib/pdf/reader/
|
266
|
-
- lib/pdf/reader/
|
267
|
-
- lib/pdf/reader/
|
268
|
-
- lib/pdf/reader/
|
269
|
-
- lib/pdf/reader/
|
270
|
-
- lib/pdf/reader/
|
271
|
-
- lib/pdf/reader/
|
244
|
+
- lib/pdf/reader/encodings/zapf_dingbats.txt
|
245
|
+
- lib/pdf/reader/encodings/mac_expert.txt
|
246
|
+
- lib/pdf/reader/encodings/win_ansi.txt
|
247
|
+
- lib/pdf/reader/cid_widths.rb
|
248
|
+
- lib/pdf/reader/stream.rb
|
249
|
+
- lib/pdf/reader/object_stream.rb
|
250
|
+
- lib/pdf/reader/cmap.rb
|
251
|
+
- lib/pdf/reader/font_descriptor.rb
|
252
|
+
- lib/pdf/reader/standard_security_handler.rb
|
253
|
+
- lib/pdf/reader/page.rb
|
254
|
+
- lib/pdf/reader/token.rb
|
255
|
+
- lib/pdf/reader/transformation_matrix.rb
|
256
|
+
- lib/pdf/reader/font.rb
|
257
|
+
- lib/pdf/reader/abstract_strategy.rb
|
258
|
+
- lib/pdf/reader/object_cache.rb
|
259
|
+
- lib/pdf/reader/register_receiver.rb
|
260
|
+
- lib/pdf/reader/text_receiver.rb
|
261
|
+
- lib/pdf/reader/synchronized_cache.rb
|
262
|
+
- lib/pdf/reader/resource_methods.rb
|
263
|
+
- lib/pdf/reader/page_state.rb
|
264
|
+
- lib/pdf/reader/pages_strategy.rb
|
272
265
|
- lib/pdf/reader/glyphlist.txt
|
266
|
+
- lib/pdf/reader/print_receiver.rb
|
267
|
+
- lib/pdf/reader/glyph_hash.rb
|
268
|
+
- lib/pdf/reader/text_run.rb
|
269
|
+
- lib/pdf/reader/object_hash.rb
|
270
|
+
- lib/pdf/reader/page_layout.rb
|
271
|
+
- lib/pdf/reader/afm/Times-Bold.afm
|
273
272
|
- lib/pdf/reader/afm/Courier-BoldOblique.afm
|
274
|
-
- lib/pdf/reader/afm/Symbol.afm
|
275
273
|
- lib/pdf/reader/afm/Times-Italic.afm
|
276
|
-
- lib/pdf/reader/afm/Courier-Oblique.afm
|
277
|
-
- lib/pdf/reader/afm/Helvetica-Bold.afm
|
278
|
-
- lib/pdf/reader/afm/Courier-Bold.afm
|
279
|
-
- lib/pdf/reader/afm/Times-BoldItalic.afm
|
280
|
-
- lib/pdf/reader/afm/Helvetica-BoldOblique.afm
|
281
274
|
- lib/pdf/reader/afm/Helvetica.afm
|
282
|
-
- lib/pdf/reader/afm/
|
283
|
-
- lib/pdf/reader/afm/Helvetica-Oblique.afm
|
284
|
-
- lib/pdf/reader/afm/Times-Bold.afm
|
275
|
+
- lib/pdf/reader/afm/Courier-Bold.afm
|
285
276
|
- lib/pdf/reader/afm/Times-Roman.afm
|
277
|
+
- lib/pdf/reader/afm/Helvetica-Bold.afm
|
278
|
+
- lib/pdf/reader/afm/Helvetica-Oblique.afm
|
279
|
+
- lib/pdf/reader/afm/Courier-Oblique.afm
|
280
|
+
- lib/pdf/reader/afm/ZapfDingbats.afm
|
281
|
+
- lib/pdf/reader/afm/Helvetica-BoldOblique.afm
|
286
282
|
- lib/pdf/reader/afm/Courier.afm
|
287
|
-
- lib/pdf/reader/
|
288
|
-
- lib/pdf/reader/
|
289
|
-
- lib/pdf/reader/
|
290
|
-
- lib/pdf/reader/error.rb
|
291
|
-
- lib/pdf/reader/glyph_hash.rb
|
283
|
+
- lib/pdf/reader/afm/Times-BoldItalic.afm
|
284
|
+
- lib/pdf/reader/afm/Symbol.afm
|
285
|
+
- lib/pdf/reader/encoding.rb
|
292
286
|
- lib/pdf/reader/width_calculator.rb
|
293
|
-
- lib/pdf/reader/
|
294
|
-
- lib/pdf/reader/
|
295
|
-
- lib/pdf/reader/
|
296
|
-
- lib/pdf/reader/
|
297
|
-
- lib/pdf/reader/
|
298
|
-
- lib/pdf/reader/
|
299
|
-
- lib/pdf/reader/
|
300
|
-
- lib/pdf/reader/
|
301
|
-
- lib/pdf/reader/
|
302
|
-
- lib/pdf/reader/
|
303
|
-
- lib/pdf
|
287
|
+
- lib/pdf/reader/xref.rb
|
288
|
+
- lib/pdf/reader/filter/flate.rb
|
289
|
+
- lib/pdf/reader/filter/depredict.rb
|
290
|
+
- lib/pdf/reader/filter/null.rb
|
291
|
+
- lib/pdf/reader/filter/ascii_hex.rb
|
292
|
+
- lib/pdf/reader/filter/ascii85.rb
|
293
|
+
- lib/pdf/reader/filter/run_length.rb
|
294
|
+
- lib/pdf/reader/filter/lzw.rb
|
295
|
+
- lib/pdf/reader/width_calculator/built_in.rb
|
296
|
+
- lib/pdf/reader/width_calculator/true_type.rb
|
297
|
+
- lib/pdf/reader/width_calculator/composite.rb
|
298
|
+
- lib/pdf/reader/width_calculator/type_zero.rb
|
299
|
+
- lib/pdf/reader/width_calculator/type_one_or_three.rb
|
300
|
+
- lib/pdf/reader/page_text_receiver.rb
|
301
|
+
- lib/pdf/reader/lzw.rb
|
302
|
+
- lib/pdf/hash.rb
|
303
|
+
- lib/pdf/reader.rb
|
304
304
|
- Rakefile
|
305
305
|
- README.rdoc
|
306
306
|
- TODO
|