pdf-reader 1.3.1 → 1.3.2

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG CHANGED
@@ -1,3 +1,6 @@
1
+ v1.3.2 (26th February 2013)
2
+ - various bug fixes
3
+
1
4
  v1.3.1 (12th February 2013)
2
5
  - various bug fixes
3
6
 
@@ -76,7 +76,7 @@ class PDF::Reader
76
76
  byte = val.to_i
77
77
  else
78
78
  @differences[byte] = val
79
- @mapping[byte] = names_to_unicode[val]
79
+ @mapping[byte] = glyphlist.name_to_unicode(val)
80
80
  byte += 1
81
81
  end
82
82
  end
@@ -116,15 +116,15 @@ class PDF::Reader
116
116
  # int_to_name(65)
117
117
  # => :A
118
118
  #
119
- # TODO: this needs to be expanded to return the appropriate name for standard
120
- # glyph codes in the encoding. 65 to :A, etc. At the moment it only
121
- # handles glyphs in the difference table
122
- #
123
119
  def int_to_name(glyph_code)
124
120
  if @enc_name == "Identity-H" || @enc_name == "Identity-V"
125
- nil
121
+ []
122
+ elsif differences[glyph_code]
123
+ [differences[glyph_code]]
124
+ elsif @mapping[glyph_code]
125
+ glyphlist.unicode_to_name(@mapping[glyph_code])
126
126
  else
127
- @differences[glyph_code]
127
+ []
128
128
  end
129
129
  end
130
130
 
@@ -189,8 +189,8 @@ class PDF::Reader
189
189
  @mapping.size > 0
190
190
  end
191
191
 
192
- def names_to_unicode
193
- @names_to_unicode ||= PDF::Reader::GlyphHash.new
192
+ def glyphlist
193
+ @glyphlist ||= PDF::Reader::GlyphHash.new
194
194
  end
195
195
 
196
196
  def load_mapping(file)
@@ -32,7 +32,9 @@ class PDF::Reader
32
32
  class GlyphHash # :nodoc:
33
33
  def initialize
34
34
  # only parse the glyph list once, and cache the results (for performance)
35
- @adobe = @@cache ||= load_adobe_glyph_mapping
35
+ adobe = @@cache ||= load_adobe_glyph_mapping
36
+ @by_name = adobe.first
37
+ @by_codepoint = adobe.last
36
38
  end
37
39
 
38
40
  # attempt to convert a PDF Name to a unicode codepoint. Returns nil
@@ -40,26 +42,26 @@ class PDF::Reader
40
42
  #
41
43
  # h = GlyphHash.new
42
44
  #
43
- # h[:A]
45
+ # h.name_to_unicode(:A)
44
46
  # => 65
45
47
  #
46
- # h[:Euro]
48
+ # h.name_to_unicode(:Euro)
47
49
  # => 8364
48
50
  #
49
- # h[:G30]
51
+ # h.name_to_unicode(:G30)
50
52
  # => 48
51
53
  #
52
- # h[:34]
54
+ # h.name_to_unicode(:34)
53
55
  # => 34
54
56
  #
55
- def [](name)
57
+ def name_to_unicode(name)
56
58
  return nil unless name.is_a?(Symbol)
57
59
 
58
60
  name = name.to_s.gsub('_', '').intern
59
61
  str = name.to_s
60
62
 
61
- if @adobe.has_key?(name)
62
- @adobe[name]
63
+ if @by_name.has_key?(name)
64
+ @by_name[name]
63
65
  elsif str.match(/\Auni[A-F\d]{4}\Z/)
64
66
  "0x#{str[3,4]}".hex
65
67
  elsif str.match(/\Au[A-F\d]{4,6}\Z/)
@@ -73,23 +75,47 @@ class PDF::Reader
73
75
  end
74
76
  end
75
77
 
78
+ # attempt to convert a Unicode code point to the equivilant PDF Name. Returns nil
79
+ # if no conversion is possible.
80
+ #
81
+ # h = GlyphHash.new
82
+ #
83
+ # h.unicode_to_name(65)
84
+ # => :A
85
+ #
86
+ # h.unicode_to_name(8364)
87
+ # => :Euro
88
+ #
89
+ # h.unicode_to_name(34)
90
+ # => :34
91
+ #
92
+ def unicode_to_name(codepoint)
93
+ @by_codepoint[codepoint.to_i]
94
+ end
95
+
76
96
  private
77
97
 
78
98
  # returns a hash that maps glyph names to unicode codepoints. The mapping is based on
79
99
  # a text file supplied by Adobe at:
80
100
  # http://www.adobe.com/devnet/opentype/archives/glyphlist.txt
81
101
  def load_adobe_glyph_mapping
82
- glyphs = {}
102
+ keyed_by_name = {}
103
+ keyed_by_codepoint = {}
83
104
 
84
105
  RUBY_VERSION >= "1.9" ? mode = "r:BINARY" : mode = "r"
85
106
  File.open(File.dirname(__FILE__) + "/glyphlist.txt", mode) do |f|
86
107
  f.each do |l|
87
108
  m, name, code = *l.match(/([0-9A-Za-z]+);([0-9A-F]{4})/)
88
- glyphs[name.to_sym] = "0x#{code}".hex if name
109
+ if name && code
110
+ cp = "0x#{code}".hex
111
+ keyed_by_name[name.to_sym] = cp
112
+ keyed_by_codepoint[cp] ||= []
113
+ keyed_by_codepoint[cp] << name.to_sym
114
+ end
89
115
  end
90
116
  end
91
117
 
92
- glyphs.freeze
118
+ [keyed_by_name.freeze, keyed_by_codepoint.freeze]
93
119
  end
94
120
 
95
121
  end
@@ -39,8 +39,10 @@ class PDF::Reader
39
39
 
40
40
  m = @metrics.metrics_for(code_point)
41
41
  if m.nil?
42
- name = @font.encoding.int_to_name(code_point)
43
- m = @metrics.metrics_for_name(name)
42
+ names = @font.encoding.int_to_name(code_point)
43
+ m = names.map { |name|
44
+ @metrics.metrics_for_name(name)
45
+ }.compact.first
44
46
  end
45
47
 
46
48
  if m
@@ -48,7 +50,7 @@ class PDF::Reader
48
50
  elsif @font.widths[code_point - 1]
49
51
  @font.widths[code_point - 1]
50
52
  else
51
- raise ArgumentError, "Unknown glyph width for #{codepoint}"
53
+ raise ArgumentError, "Unknown glyph width for #{code_point}"
52
54
  end
53
55
  end
54
56
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdf-reader
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.1
4
+ version: 1.3.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-02-12 00:00:00.000000000 Z
12
+ date: 2013-02-26 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
@@ -219,88 +219,88 @@ extra_rdoc_files:
219
219
  - CHANGELOG
220
220
  - MIT-LICENSE
221
221
  files:
222
- - examples/extract_fonts.rb
223
- - examples/hash.rb
224
- - examples/extract_bates.rb
225
222
  - examples/metadata.rb
226
- - examples/extract_images.rb
223
+ - examples/callbacks.rb
227
224
  - examples/rspec.rb
228
225
  - examples/page_count.rb
229
- - examples/callbacks.rb
230
- - examples/text.rb
231
226
  - examples/version.rb
232
- - lib/pdf/hash.rb
233
- - lib/pdf/reader.rb
234
- - lib/pdf/reader/width_calculator/built_in.rb
235
- - lib/pdf/reader/width_calculator/type_zero.rb
236
- - lib/pdf/reader/width_calculator/true_type.rb
237
- - lib/pdf/reader/width_calculator/composite.rb
238
- - lib/pdf/reader/width_calculator/type_one_or_three.rb
239
- - lib/pdf/reader/xref.rb
240
- - lib/pdf/reader/page.rb
241
- - lib/pdf/reader/transformation_matrix.rb
242
- - lib/pdf/reader/encoding.rb
243
- - lib/pdf/reader/page_layout.rb
244
- - lib/pdf/reader/font.rb
245
- - lib/pdf/reader/print_receiver.rb
246
- - lib/pdf/reader/lzw.rb
227
+ - examples/hash.rb
228
+ - examples/extract_fonts.rb
229
+ - examples/text.rb
230
+ - examples/extract_bates.rb
231
+ - examples/extract_images.rb
232
+ - lib/pdf-reader.rb
233
+ - lib/pdf/reader/error.rb
234
+ - lib/pdf/reader/filter.rb
235
+ - lib/pdf/reader/reference.rb
236
+ - lib/pdf/reader/form_xobject.rb
237
+ - lib/pdf/reader/metadata_strategy.rb
247
238
  - lib/pdf/reader/buffer.rb
248
- - lib/pdf/reader/synchronized_cache.rb
249
- - lib/pdf/reader/object_stream.rb
250
- - lib/pdf/reader/cmap.rb
251
- - lib/pdf/reader/text_receiver.rb
252
- - lib/pdf/reader/register_receiver.rb
253
- - lib/pdf/reader/cid_widths.rb
254
- - lib/pdf/reader/page_text_receiver.rb
239
+ - lib/pdf/reader/parser.rb
255
240
  - lib/pdf/reader/encodings/mac_roman.txt
256
- - lib/pdf/reader/encodings/zapf_dingbats.txt
257
- - lib/pdf/reader/encodings/symbol.txt
258
- - lib/pdf/reader/encodings/win_ansi.txt
259
- - lib/pdf/reader/encodings/mac_expert.txt
260
241
  - lib/pdf/reader/encodings/standard.txt
242
+ - lib/pdf/reader/encodings/symbol.txt
261
243
  - lib/pdf/reader/encodings/pdf_doc.txt
262
- - lib/pdf/reader/filter.rb
263
- - lib/pdf/reader/filter/null.rb
264
- - lib/pdf/reader/filter/flate.rb
265
- - lib/pdf/reader/filter/lzw.rb
266
- - lib/pdf/reader/filter/ascii85.rb
267
- - lib/pdf/reader/filter/ascii_hex.rb
268
- - lib/pdf/reader/filter/run_length.rb
269
- - lib/pdf/reader/filter/depredict.rb
270
- - lib/pdf/reader/object_hash.rb
271
- - lib/pdf/reader/reference.rb
244
+ - lib/pdf/reader/encodings/zapf_dingbats.txt
245
+ - lib/pdf/reader/encodings/mac_expert.txt
246
+ - lib/pdf/reader/encodings/win_ansi.txt
247
+ - lib/pdf/reader/cid_widths.rb
248
+ - lib/pdf/reader/stream.rb
249
+ - lib/pdf/reader/object_stream.rb
250
+ - lib/pdf/reader/cmap.rb
251
+ - lib/pdf/reader/font_descriptor.rb
252
+ - lib/pdf/reader/standard_security_handler.rb
253
+ - lib/pdf/reader/page.rb
254
+ - lib/pdf/reader/token.rb
255
+ - lib/pdf/reader/transformation_matrix.rb
256
+ - lib/pdf/reader/font.rb
257
+ - lib/pdf/reader/abstract_strategy.rb
258
+ - lib/pdf/reader/object_cache.rb
259
+ - lib/pdf/reader/register_receiver.rb
260
+ - lib/pdf/reader/text_receiver.rb
261
+ - lib/pdf/reader/synchronized_cache.rb
262
+ - lib/pdf/reader/resource_methods.rb
263
+ - lib/pdf/reader/page_state.rb
264
+ - lib/pdf/reader/pages_strategy.rb
272
265
  - lib/pdf/reader/glyphlist.txt
266
+ - lib/pdf/reader/print_receiver.rb
267
+ - lib/pdf/reader/glyph_hash.rb
268
+ - lib/pdf/reader/text_run.rb
269
+ - lib/pdf/reader/object_hash.rb
270
+ - lib/pdf/reader/page_layout.rb
271
+ - lib/pdf/reader/afm/Times-Bold.afm
273
272
  - lib/pdf/reader/afm/Courier-BoldOblique.afm
274
- - lib/pdf/reader/afm/Symbol.afm
275
273
  - lib/pdf/reader/afm/Times-Italic.afm
276
- - lib/pdf/reader/afm/Courier-Oblique.afm
277
- - lib/pdf/reader/afm/Helvetica-Bold.afm
278
- - lib/pdf/reader/afm/Courier-Bold.afm
279
- - lib/pdf/reader/afm/Times-BoldItalic.afm
280
- - lib/pdf/reader/afm/Helvetica-BoldOblique.afm
281
274
  - lib/pdf/reader/afm/Helvetica.afm
282
- - lib/pdf/reader/afm/ZapfDingbats.afm
283
- - lib/pdf/reader/afm/Helvetica-Oblique.afm
284
- - lib/pdf/reader/afm/Times-Bold.afm
275
+ - lib/pdf/reader/afm/Courier-Bold.afm
285
276
  - lib/pdf/reader/afm/Times-Roman.afm
277
+ - lib/pdf/reader/afm/Helvetica-Bold.afm
278
+ - lib/pdf/reader/afm/Helvetica-Oblique.afm
279
+ - lib/pdf/reader/afm/Courier-Oblique.afm
280
+ - lib/pdf/reader/afm/ZapfDingbats.afm
281
+ - lib/pdf/reader/afm/Helvetica-BoldOblique.afm
286
282
  - lib/pdf/reader/afm/Courier.afm
287
- - lib/pdf/reader/token.rb
288
- - lib/pdf/reader/parser.rb
289
- - lib/pdf/reader/page_state.rb
290
- - lib/pdf/reader/error.rb
291
- - lib/pdf/reader/glyph_hash.rb
283
+ - lib/pdf/reader/afm/Times-BoldItalic.afm
284
+ - lib/pdf/reader/afm/Symbol.afm
285
+ - lib/pdf/reader/encoding.rb
292
286
  - lib/pdf/reader/width_calculator.rb
293
- - lib/pdf/reader/resource_methods.rb
294
- - lib/pdf/reader/standard_security_handler.rb
295
- - lib/pdf/reader/text_run.rb
296
- - lib/pdf/reader/form_xobject.rb
297
- - lib/pdf/reader/stream.rb
298
- - lib/pdf/reader/pages_strategy.rb
299
- - lib/pdf/reader/abstract_strategy.rb
300
- - lib/pdf/reader/metadata_strategy.rb
301
- - lib/pdf/reader/object_cache.rb
302
- - lib/pdf/reader/font_descriptor.rb
303
- - lib/pdf-reader.rb
287
+ - lib/pdf/reader/xref.rb
288
+ - lib/pdf/reader/filter/flate.rb
289
+ - lib/pdf/reader/filter/depredict.rb
290
+ - lib/pdf/reader/filter/null.rb
291
+ - lib/pdf/reader/filter/ascii_hex.rb
292
+ - lib/pdf/reader/filter/ascii85.rb
293
+ - lib/pdf/reader/filter/run_length.rb
294
+ - lib/pdf/reader/filter/lzw.rb
295
+ - lib/pdf/reader/width_calculator/built_in.rb
296
+ - lib/pdf/reader/width_calculator/true_type.rb
297
+ - lib/pdf/reader/width_calculator/composite.rb
298
+ - lib/pdf/reader/width_calculator/type_zero.rb
299
+ - lib/pdf/reader/width_calculator/type_one_or_three.rb
300
+ - lib/pdf/reader/page_text_receiver.rb
301
+ - lib/pdf/reader/lzw.rb
302
+ - lib/pdf/hash.rb
303
+ - lib/pdf/reader.rb
304
304
  - Rakefile
305
305
  - README.rdoc
306
306
  - TODO