pdf-reader 1.3.1 → 1.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +3 -0
- data/lib/pdf/reader/encoding.rb +9 -9
- data/lib/pdf/reader/glyph_hash.rb +37 -11
- data/lib/pdf/reader/width_calculator/built_in.rb +5 -3
- metadata +69 -69
data/CHANGELOG
CHANGED
data/lib/pdf/reader/encoding.rb
CHANGED
@@ -76,7 +76,7 @@ class PDF::Reader
|
|
76
76
|
byte = val.to_i
|
77
77
|
else
|
78
78
|
@differences[byte] = val
|
79
|
-
@mapping[byte] =
|
79
|
+
@mapping[byte] = glyphlist.name_to_unicode(val)
|
80
80
|
byte += 1
|
81
81
|
end
|
82
82
|
end
|
@@ -116,15 +116,15 @@ class PDF::Reader
|
|
116
116
|
# int_to_name(65)
|
117
117
|
# => :A
|
118
118
|
#
|
119
|
-
# TODO: this needs to be expanded to return the appropriate name for standard
|
120
|
-
# glyph codes in the encoding. 65 to :A, etc. At the moment it only
|
121
|
-
# handles glyphs in the difference table
|
122
|
-
#
|
123
119
|
def int_to_name(glyph_code)
|
124
120
|
if @enc_name == "Identity-H" || @enc_name == "Identity-V"
|
125
|
-
|
121
|
+
[]
|
122
|
+
elsif differences[glyph_code]
|
123
|
+
[differences[glyph_code]]
|
124
|
+
elsif @mapping[glyph_code]
|
125
|
+
glyphlist.unicode_to_name(@mapping[glyph_code])
|
126
126
|
else
|
127
|
-
|
127
|
+
[]
|
128
128
|
end
|
129
129
|
end
|
130
130
|
|
@@ -189,8 +189,8 @@ class PDF::Reader
|
|
189
189
|
@mapping.size > 0
|
190
190
|
end
|
191
191
|
|
192
|
-
def
|
193
|
-
@
|
192
|
+
def glyphlist
|
193
|
+
@glyphlist ||= PDF::Reader::GlyphHash.new
|
194
194
|
end
|
195
195
|
|
196
196
|
def load_mapping(file)
|
@@ -32,7 +32,9 @@ class PDF::Reader
|
|
32
32
|
class GlyphHash # :nodoc:
|
33
33
|
def initialize
|
34
34
|
# only parse the glyph list once, and cache the results (for performance)
|
35
|
-
|
35
|
+
adobe = @@cache ||= load_adobe_glyph_mapping
|
36
|
+
@by_name = adobe.first
|
37
|
+
@by_codepoint = adobe.last
|
36
38
|
end
|
37
39
|
|
38
40
|
# attempt to convert a PDF Name to a unicode codepoint. Returns nil
|
@@ -40,26 +42,26 @@ class PDF::Reader
|
|
40
42
|
#
|
41
43
|
# h = GlyphHash.new
|
42
44
|
#
|
43
|
-
# h
|
45
|
+
# h.name_to_unicode(:A)
|
44
46
|
# => 65
|
45
47
|
#
|
46
|
-
# h
|
48
|
+
# h.name_to_unicode(:Euro)
|
47
49
|
# => 8364
|
48
50
|
#
|
49
|
-
# h
|
51
|
+
# h.name_to_unicode(:G30)
|
50
52
|
# => 48
|
51
53
|
#
|
52
|
-
# h
|
54
|
+
# h.name_to_unicode(:34)
|
53
55
|
# => 34
|
54
56
|
#
|
55
|
-
def
|
57
|
+
def name_to_unicode(name)
|
56
58
|
return nil unless name.is_a?(Symbol)
|
57
59
|
|
58
60
|
name = name.to_s.gsub('_', '').intern
|
59
61
|
str = name.to_s
|
60
62
|
|
61
|
-
if @
|
62
|
-
@
|
63
|
+
if @by_name.has_key?(name)
|
64
|
+
@by_name[name]
|
63
65
|
elsif str.match(/\Auni[A-F\d]{4}\Z/)
|
64
66
|
"0x#{str[3,4]}".hex
|
65
67
|
elsif str.match(/\Au[A-F\d]{4,6}\Z/)
|
@@ -73,23 +75,47 @@ class PDF::Reader
|
|
73
75
|
end
|
74
76
|
end
|
75
77
|
|
78
|
+
# attempt to convert a Unicode code point to the equivilant PDF Name. Returns nil
|
79
|
+
# if no conversion is possible.
|
80
|
+
#
|
81
|
+
# h = GlyphHash.new
|
82
|
+
#
|
83
|
+
# h.unicode_to_name(65)
|
84
|
+
# => :A
|
85
|
+
#
|
86
|
+
# h.unicode_to_name(8364)
|
87
|
+
# => :Euro
|
88
|
+
#
|
89
|
+
# h.unicode_to_name(34)
|
90
|
+
# => :34
|
91
|
+
#
|
92
|
+
def unicode_to_name(codepoint)
|
93
|
+
@by_codepoint[codepoint.to_i]
|
94
|
+
end
|
95
|
+
|
76
96
|
private
|
77
97
|
|
78
98
|
# returns a hash that maps glyph names to unicode codepoints. The mapping is based on
|
79
99
|
# a text file supplied by Adobe at:
|
80
100
|
# http://www.adobe.com/devnet/opentype/archives/glyphlist.txt
|
81
101
|
def load_adobe_glyph_mapping
|
82
|
-
|
102
|
+
keyed_by_name = {}
|
103
|
+
keyed_by_codepoint = {}
|
83
104
|
|
84
105
|
RUBY_VERSION >= "1.9" ? mode = "r:BINARY" : mode = "r"
|
85
106
|
File.open(File.dirname(__FILE__) + "/glyphlist.txt", mode) do |f|
|
86
107
|
f.each do |l|
|
87
108
|
m, name, code = *l.match(/([0-9A-Za-z]+);([0-9A-F]{4})/)
|
88
|
-
|
109
|
+
if name && code
|
110
|
+
cp = "0x#{code}".hex
|
111
|
+
keyed_by_name[name.to_sym] = cp
|
112
|
+
keyed_by_codepoint[cp] ||= []
|
113
|
+
keyed_by_codepoint[cp] << name.to_sym
|
114
|
+
end
|
89
115
|
end
|
90
116
|
end
|
91
117
|
|
92
|
-
|
118
|
+
[keyed_by_name.freeze, keyed_by_codepoint.freeze]
|
93
119
|
end
|
94
120
|
|
95
121
|
end
|
@@ -39,8 +39,10 @@ class PDF::Reader
|
|
39
39
|
|
40
40
|
m = @metrics.metrics_for(code_point)
|
41
41
|
if m.nil?
|
42
|
-
|
43
|
-
m =
|
42
|
+
names = @font.encoding.int_to_name(code_point)
|
43
|
+
m = names.map { |name|
|
44
|
+
@metrics.metrics_for_name(name)
|
45
|
+
}.compact.first
|
44
46
|
end
|
45
47
|
|
46
48
|
if m
|
@@ -48,7 +50,7 @@ class PDF::Reader
|
|
48
50
|
elsif @font.widths[code_point - 1]
|
49
51
|
@font.widths[code_point - 1]
|
50
52
|
else
|
51
|
-
raise ArgumentError, "Unknown glyph width for #{
|
53
|
+
raise ArgumentError, "Unknown glyph width for #{code_point}"
|
52
54
|
end
|
53
55
|
end
|
54
56
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdf-reader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.3.
|
4
|
+
version: 1.3.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-02-
|
12
|
+
date: 2013-02-26 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -219,88 +219,88 @@ extra_rdoc_files:
|
|
219
219
|
- CHANGELOG
|
220
220
|
- MIT-LICENSE
|
221
221
|
files:
|
222
|
-
- examples/extract_fonts.rb
|
223
|
-
- examples/hash.rb
|
224
|
-
- examples/extract_bates.rb
|
225
222
|
- examples/metadata.rb
|
226
|
-
- examples/
|
223
|
+
- examples/callbacks.rb
|
227
224
|
- examples/rspec.rb
|
228
225
|
- examples/page_count.rb
|
229
|
-
- examples/callbacks.rb
|
230
|
-
- examples/text.rb
|
231
226
|
- examples/version.rb
|
232
|
-
-
|
233
|
-
-
|
234
|
-
-
|
235
|
-
-
|
236
|
-
-
|
237
|
-
- lib/pdf
|
238
|
-
- lib/pdf/reader/
|
239
|
-
- lib/pdf/reader/
|
240
|
-
- lib/pdf/reader/
|
241
|
-
- lib/pdf/reader/
|
242
|
-
- lib/pdf/reader/
|
243
|
-
- lib/pdf/reader/page_layout.rb
|
244
|
-
- lib/pdf/reader/font.rb
|
245
|
-
- lib/pdf/reader/print_receiver.rb
|
246
|
-
- lib/pdf/reader/lzw.rb
|
227
|
+
- examples/hash.rb
|
228
|
+
- examples/extract_fonts.rb
|
229
|
+
- examples/text.rb
|
230
|
+
- examples/extract_bates.rb
|
231
|
+
- examples/extract_images.rb
|
232
|
+
- lib/pdf-reader.rb
|
233
|
+
- lib/pdf/reader/error.rb
|
234
|
+
- lib/pdf/reader/filter.rb
|
235
|
+
- lib/pdf/reader/reference.rb
|
236
|
+
- lib/pdf/reader/form_xobject.rb
|
237
|
+
- lib/pdf/reader/metadata_strategy.rb
|
247
238
|
- lib/pdf/reader/buffer.rb
|
248
|
-
- lib/pdf/reader/
|
249
|
-
- lib/pdf/reader/object_stream.rb
|
250
|
-
- lib/pdf/reader/cmap.rb
|
251
|
-
- lib/pdf/reader/text_receiver.rb
|
252
|
-
- lib/pdf/reader/register_receiver.rb
|
253
|
-
- lib/pdf/reader/cid_widths.rb
|
254
|
-
- lib/pdf/reader/page_text_receiver.rb
|
239
|
+
- lib/pdf/reader/parser.rb
|
255
240
|
- lib/pdf/reader/encodings/mac_roman.txt
|
256
|
-
- lib/pdf/reader/encodings/zapf_dingbats.txt
|
257
|
-
- lib/pdf/reader/encodings/symbol.txt
|
258
|
-
- lib/pdf/reader/encodings/win_ansi.txt
|
259
|
-
- lib/pdf/reader/encodings/mac_expert.txt
|
260
241
|
- lib/pdf/reader/encodings/standard.txt
|
242
|
+
- lib/pdf/reader/encodings/symbol.txt
|
261
243
|
- lib/pdf/reader/encodings/pdf_doc.txt
|
262
|
-
- lib/pdf/reader/
|
263
|
-
- lib/pdf/reader/
|
264
|
-
- lib/pdf/reader/
|
265
|
-
- lib/pdf/reader/
|
266
|
-
- lib/pdf/reader/
|
267
|
-
- lib/pdf/reader/
|
268
|
-
- lib/pdf/reader/
|
269
|
-
- lib/pdf/reader/
|
270
|
-
- lib/pdf/reader/
|
271
|
-
- lib/pdf/reader/
|
244
|
+
- lib/pdf/reader/encodings/zapf_dingbats.txt
|
245
|
+
- lib/pdf/reader/encodings/mac_expert.txt
|
246
|
+
- lib/pdf/reader/encodings/win_ansi.txt
|
247
|
+
- lib/pdf/reader/cid_widths.rb
|
248
|
+
- lib/pdf/reader/stream.rb
|
249
|
+
- lib/pdf/reader/object_stream.rb
|
250
|
+
- lib/pdf/reader/cmap.rb
|
251
|
+
- lib/pdf/reader/font_descriptor.rb
|
252
|
+
- lib/pdf/reader/standard_security_handler.rb
|
253
|
+
- lib/pdf/reader/page.rb
|
254
|
+
- lib/pdf/reader/token.rb
|
255
|
+
- lib/pdf/reader/transformation_matrix.rb
|
256
|
+
- lib/pdf/reader/font.rb
|
257
|
+
- lib/pdf/reader/abstract_strategy.rb
|
258
|
+
- lib/pdf/reader/object_cache.rb
|
259
|
+
- lib/pdf/reader/register_receiver.rb
|
260
|
+
- lib/pdf/reader/text_receiver.rb
|
261
|
+
- lib/pdf/reader/synchronized_cache.rb
|
262
|
+
- lib/pdf/reader/resource_methods.rb
|
263
|
+
- lib/pdf/reader/page_state.rb
|
264
|
+
- lib/pdf/reader/pages_strategy.rb
|
272
265
|
- lib/pdf/reader/glyphlist.txt
|
266
|
+
- lib/pdf/reader/print_receiver.rb
|
267
|
+
- lib/pdf/reader/glyph_hash.rb
|
268
|
+
- lib/pdf/reader/text_run.rb
|
269
|
+
- lib/pdf/reader/object_hash.rb
|
270
|
+
- lib/pdf/reader/page_layout.rb
|
271
|
+
- lib/pdf/reader/afm/Times-Bold.afm
|
273
272
|
- lib/pdf/reader/afm/Courier-BoldOblique.afm
|
274
|
-
- lib/pdf/reader/afm/Symbol.afm
|
275
273
|
- lib/pdf/reader/afm/Times-Italic.afm
|
276
|
-
- lib/pdf/reader/afm/Courier-Oblique.afm
|
277
|
-
- lib/pdf/reader/afm/Helvetica-Bold.afm
|
278
|
-
- lib/pdf/reader/afm/Courier-Bold.afm
|
279
|
-
- lib/pdf/reader/afm/Times-BoldItalic.afm
|
280
|
-
- lib/pdf/reader/afm/Helvetica-BoldOblique.afm
|
281
274
|
- lib/pdf/reader/afm/Helvetica.afm
|
282
|
-
- lib/pdf/reader/afm/
|
283
|
-
- lib/pdf/reader/afm/Helvetica-Oblique.afm
|
284
|
-
- lib/pdf/reader/afm/Times-Bold.afm
|
275
|
+
- lib/pdf/reader/afm/Courier-Bold.afm
|
285
276
|
- lib/pdf/reader/afm/Times-Roman.afm
|
277
|
+
- lib/pdf/reader/afm/Helvetica-Bold.afm
|
278
|
+
- lib/pdf/reader/afm/Helvetica-Oblique.afm
|
279
|
+
- lib/pdf/reader/afm/Courier-Oblique.afm
|
280
|
+
- lib/pdf/reader/afm/ZapfDingbats.afm
|
281
|
+
- lib/pdf/reader/afm/Helvetica-BoldOblique.afm
|
286
282
|
- lib/pdf/reader/afm/Courier.afm
|
287
|
-
- lib/pdf/reader/
|
288
|
-
- lib/pdf/reader/
|
289
|
-
- lib/pdf/reader/
|
290
|
-
- lib/pdf/reader/error.rb
|
291
|
-
- lib/pdf/reader/glyph_hash.rb
|
283
|
+
- lib/pdf/reader/afm/Times-BoldItalic.afm
|
284
|
+
- lib/pdf/reader/afm/Symbol.afm
|
285
|
+
- lib/pdf/reader/encoding.rb
|
292
286
|
- lib/pdf/reader/width_calculator.rb
|
293
|
-
- lib/pdf/reader/
|
294
|
-
- lib/pdf/reader/
|
295
|
-
- lib/pdf/reader/
|
296
|
-
- lib/pdf/reader/
|
297
|
-
- lib/pdf/reader/
|
298
|
-
- lib/pdf/reader/
|
299
|
-
- lib/pdf/reader/
|
300
|
-
- lib/pdf/reader/
|
301
|
-
- lib/pdf/reader/
|
302
|
-
- lib/pdf/reader/
|
303
|
-
- lib/pdf
|
287
|
+
- lib/pdf/reader/xref.rb
|
288
|
+
- lib/pdf/reader/filter/flate.rb
|
289
|
+
- lib/pdf/reader/filter/depredict.rb
|
290
|
+
- lib/pdf/reader/filter/null.rb
|
291
|
+
- lib/pdf/reader/filter/ascii_hex.rb
|
292
|
+
- lib/pdf/reader/filter/ascii85.rb
|
293
|
+
- lib/pdf/reader/filter/run_length.rb
|
294
|
+
- lib/pdf/reader/filter/lzw.rb
|
295
|
+
- lib/pdf/reader/width_calculator/built_in.rb
|
296
|
+
- lib/pdf/reader/width_calculator/true_type.rb
|
297
|
+
- lib/pdf/reader/width_calculator/composite.rb
|
298
|
+
- lib/pdf/reader/width_calculator/type_zero.rb
|
299
|
+
- lib/pdf/reader/width_calculator/type_one_or_three.rb
|
300
|
+
- lib/pdf/reader/page_text_receiver.rb
|
301
|
+
- lib/pdf/reader/lzw.rb
|
302
|
+
- lib/pdf/hash.rb
|
303
|
+
- lib/pdf/reader.rb
|
304
304
|
- Rakefile
|
305
305
|
- README.rdoc
|
306
306
|
- TODO
|