pdf-reader 1.3.1 → 1.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG CHANGED
@@ -1,3 +1,6 @@
1
+ v1.3.2 (26th February 2013)
2
+ - various bug fixes
3
+
1
4
  v1.3.1 (12th February 2013)
2
5
  - various bug fixes
3
6
 
@@ -76,7 +76,7 @@ class PDF::Reader
76
76
  byte = val.to_i
77
77
  else
78
78
  @differences[byte] = val
79
- @mapping[byte] = names_to_unicode[val]
79
+ @mapping[byte] = glyphlist.name_to_unicode(val)
80
80
  byte += 1
81
81
  end
82
82
  end
@@ -116,15 +116,15 @@ class PDF::Reader
116
116
  # int_to_name(65)
117
117
  # => :A
118
118
  #
119
- # TODO: this needs to be expanded to return the appropriate name for standard
120
- # glyph codes in the encoding. 65 to :A, etc. At the moment it only
121
- # handles glyphs in the difference table
122
- #
123
119
  def int_to_name(glyph_code)
124
120
  if @enc_name == "Identity-H" || @enc_name == "Identity-V"
125
- nil
121
+ []
122
+ elsif differences[glyph_code]
123
+ [differences[glyph_code]]
124
+ elsif @mapping[glyph_code]
125
+ glyphlist.unicode_to_name(@mapping[glyph_code])
126
126
  else
127
- @differences[glyph_code]
127
+ []
128
128
  end
129
129
  end
130
130
 
@@ -189,8 +189,8 @@ class PDF::Reader
189
189
  @mapping.size > 0
190
190
  end
191
191
 
192
- def names_to_unicode
193
- @names_to_unicode ||= PDF::Reader::GlyphHash.new
192
+ def glyphlist
193
+ @glyphlist ||= PDF::Reader::GlyphHash.new
194
194
  end
195
195
 
196
196
  def load_mapping(file)
@@ -32,7 +32,9 @@ class PDF::Reader
32
32
  class GlyphHash # :nodoc:
33
33
  def initialize
34
34
  # only parse the glyph list once, and cache the results (for performance)
35
- @adobe = @@cache ||= load_adobe_glyph_mapping
35
+ adobe = @@cache ||= load_adobe_glyph_mapping
36
+ @by_name = adobe.first
37
+ @by_codepoint = adobe.last
36
38
  end
37
39
 
38
40
  # attempt to convert a PDF Name to a unicode codepoint. Returns nil
@@ -40,26 +42,26 @@ class PDF::Reader
40
42
  #
41
43
  # h = GlyphHash.new
42
44
  #
43
- # h[:A]
45
+ # h.name_to_unicode(:A)
44
46
  # => 65
45
47
  #
46
- # h[:Euro]
48
+ # h.name_to_unicode(:Euro)
47
49
  # => 8364
48
50
  #
49
- # h[:G30]
51
+ # h.name_to_unicode(:G30)
50
52
  # => 48
51
53
  #
52
- # h[:34]
54
+ # h.name_to_unicode(:34)
53
55
  # => 34
54
56
  #
55
- def [](name)
57
+ def name_to_unicode(name)
56
58
  return nil unless name.is_a?(Symbol)
57
59
 
58
60
  name = name.to_s.gsub('_', '').intern
59
61
  str = name.to_s
60
62
 
61
- if @adobe.has_key?(name)
62
- @adobe[name]
63
+ if @by_name.has_key?(name)
64
+ @by_name[name]
63
65
  elsif str.match(/\Auni[A-F\d]{4}\Z/)
64
66
  "0x#{str[3,4]}".hex
65
67
  elsif str.match(/\Au[A-F\d]{4,6}\Z/)
@@ -73,23 +75,47 @@ class PDF::Reader
73
75
  end
74
76
  end
75
77
 
78
+ # attempt to convert a Unicode code point to the equivilant PDF Name. Returns nil
79
+ # if no conversion is possible.
80
+ #
81
+ # h = GlyphHash.new
82
+ #
83
+ # h.unicode_to_name(65)
84
+ # => :A
85
+ #
86
+ # h.unicode_to_name(8364)
87
+ # => :Euro
88
+ #
89
+ # h.unicode_to_name(34)
90
+ # => :34
91
+ #
92
+ def unicode_to_name(codepoint)
93
+ @by_codepoint[codepoint.to_i]
94
+ end
95
+
76
96
  private
77
97
 
78
98
  # returns a hash that maps glyph names to unicode codepoints. The mapping is based on
79
99
  # a text file supplied by Adobe at:
80
100
  # http://www.adobe.com/devnet/opentype/archives/glyphlist.txt
81
101
  def load_adobe_glyph_mapping
82
- glyphs = {}
102
+ keyed_by_name = {}
103
+ keyed_by_codepoint = {}
83
104
 
84
105
  RUBY_VERSION >= "1.9" ? mode = "r:BINARY" : mode = "r"
85
106
  File.open(File.dirname(__FILE__) + "/glyphlist.txt", mode) do |f|
86
107
  f.each do |l|
87
108
  m, name, code = *l.match(/([0-9A-Za-z]+);([0-9A-F]{4})/)
88
- glyphs[name.to_sym] = "0x#{code}".hex if name
109
+ if name && code
110
+ cp = "0x#{code}".hex
111
+ keyed_by_name[name.to_sym] = cp
112
+ keyed_by_codepoint[cp] ||= []
113
+ keyed_by_codepoint[cp] << name.to_sym
114
+ end
89
115
  end
90
116
  end
91
117
 
92
- glyphs.freeze
118
+ [keyed_by_name.freeze, keyed_by_codepoint.freeze]
93
119
  end
94
120
 
95
121
  end
@@ -39,8 +39,10 @@ class PDF::Reader
39
39
 
40
40
  m = @metrics.metrics_for(code_point)
41
41
  if m.nil?
42
- name = @font.encoding.int_to_name(code_point)
43
- m = @metrics.metrics_for_name(name)
42
+ names = @font.encoding.int_to_name(code_point)
43
+ m = names.map { |name|
44
+ @metrics.metrics_for_name(name)
45
+ }.compact.first
44
46
  end
45
47
 
46
48
  if m
@@ -48,7 +50,7 @@ class PDF::Reader
48
50
  elsif @font.widths[code_point - 1]
49
51
  @font.widths[code_point - 1]
50
52
  else
51
- raise ArgumentError, "Unknown glyph width for #{codepoint}"
53
+ raise ArgumentError, "Unknown glyph width for #{code_point}"
52
54
  end
53
55
  end
54
56
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdf-reader
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.1
4
+ version: 1.3.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-02-12 00:00:00.000000000 Z
12
+ date: 2013-02-26 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
@@ -219,88 +219,88 @@ extra_rdoc_files:
219
219
  - CHANGELOG
220
220
  - MIT-LICENSE
221
221
  files:
222
- - examples/extract_fonts.rb
223
- - examples/hash.rb
224
- - examples/extract_bates.rb
225
222
  - examples/metadata.rb
226
- - examples/extract_images.rb
223
+ - examples/callbacks.rb
227
224
  - examples/rspec.rb
228
225
  - examples/page_count.rb
229
- - examples/callbacks.rb
230
- - examples/text.rb
231
226
  - examples/version.rb
232
- - lib/pdf/hash.rb
233
- - lib/pdf/reader.rb
234
- - lib/pdf/reader/width_calculator/built_in.rb
235
- - lib/pdf/reader/width_calculator/type_zero.rb
236
- - lib/pdf/reader/width_calculator/true_type.rb
237
- - lib/pdf/reader/width_calculator/composite.rb
238
- - lib/pdf/reader/width_calculator/type_one_or_three.rb
239
- - lib/pdf/reader/xref.rb
240
- - lib/pdf/reader/page.rb
241
- - lib/pdf/reader/transformation_matrix.rb
242
- - lib/pdf/reader/encoding.rb
243
- - lib/pdf/reader/page_layout.rb
244
- - lib/pdf/reader/font.rb
245
- - lib/pdf/reader/print_receiver.rb
246
- - lib/pdf/reader/lzw.rb
227
+ - examples/hash.rb
228
+ - examples/extract_fonts.rb
229
+ - examples/text.rb
230
+ - examples/extract_bates.rb
231
+ - examples/extract_images.rb
232
+ - lib/pdf-reader.rb
233
+ - lib/pdf/reader/error.rb
234
+ - lib/pdf/reader/filter.rb
235
+ - lib/pdf/reader/reference.rb
236
+ - lib/pdf/reader/form_xobject.rb
237
+ - lib/pdf/reader/metadata_strategy.rb
247
238
  - lib/pdf/reader/buffer.rb
248
- - lib/pdf/reader/synchronized_cache.rb
249
- - lib/pdf/reader/object_stream.rb
250
- - lib/pdf/reader/cmap.rb
251
- - lib/pdf/reader/text_receiver.rb
252
- - lib/pdf/reader/register_receiver.rb
253
- - lib/pdf/reader/cid_widths.rb
254
- - lib/pdf/reader/page_text_receiver.rb
239
+ - lib/pdf/reader/parser.rb
255
240
  - lib/pdf/reader/encodings/mac_roman.txt
256
- - lib/pdf/reader/encodings/zapf_dingbats.txt
257
- - lib/pdf/reader/encodings/symbol.txt
258
- - lib/pdf/reader/encodings/win_ansi.txt
259
- - lib/pdf/reader/encodings/mac_expert.txt
260
241
  - lib/pdf/reader/encodings/standard.txt
242
+ - lib/pdf/reader/encodings/symbol.txt
261
243
  - lib/pdf/reader/encodings/pdf_doc.txt
262
- - lib/pdf/reader/filter.rb
263
- - lib/pdf/reader/filter/null.rb
264
- - lib/pdf/reader/filter/flate.rb
265
- - lib/pdf/reader/filter/lzw.rb
266
- - lib/pdf/reader/filter/ascii85.rb
267
- - lib/pdf/reader/filter/ascii_hex.rb
268
- - lib/pdf/reader/filter/run_length.rb
269
- - lib/pdf/reader/filter/depredict.rb
270
- - lib/pdf/reader/object_hash.rb
271
- - lib/pdf/reader/reference.rb
244
+ - lib/pdf/reader/encodings/zapf_dingbats.txt
245
+ - lib/pdf/reader/encodings/mac_expert.txt
246
+ - lib/pdf/reader/encodings/win_ansi.txt
247
+ - lib/pdf/reader/cid_widths.rb
248
+ - lib/pdf/reader/stream.rb
249
+ - lib/pdf/reader/object_stream.rb
250
+ - lib/pdf/reader/cmap.rb
251
+ - lib/pdf/reader/font_descriptor.rb
252
+ - lib/pdf/reader/standard_security_handler.rb
253
+ - lib/pdf/reader/page.rb
254
+ - lib/pdf/reader/token.rb
255
+ - lib/pdf/reader/transformation_matrix.rb
256
+ - lib/pdf/reader/font.rb
257
+ - lib/pdf/reader/abstract_strategy.rb
258
+ - lib/pdf/reader/object_cache.rb
259
+ - lib/pdf/reader/register_receiver.rb
260
+ - lib/pdf/reader/text_receiver.rb
261
+ - lib/pdf/reader/synchronized_cache.rb
262
+ - lib/pdf/reader/resource_methods.rb
263
+ - lib/pdf/reader/page_state.rb
264
+ - lib/pdf/reader/pages_strategy.rb
272
265
  - lib/pdf/reader/glyphlist.txt
266
+ - lib/pdf/reader/print_receiver.rb
267
+ - lib/pdf/reader/glyph_hash.rb
268
+ - lib/pdf/reader/text_run.rb
269
+ - lib/pdf/reader/object_hash.rb
270
+ - lib/pdf/reader/page_layout.rb
271
+ - lib/pdf/reader/afm/Times-Bold.afm
273
272
  - lib/pdf/reader/afm/Courier-BoldOblique.afm
274
- - lib/pdf/reader/afm/Symbol.afm
275
273
  - lib/pdf/reader/afm/Times-Italic.afm
276
- - lib/pdf/reader/afm/Courier-Oblique.afm
277
- - lib/pdf/reader/afm/Helvetica-Bold.afm
278
- - lib/pdf/reader/afm/Courier-Bold.afm
279
- - lib/pdf/reader/afm/Times-BoldItalic.afm
280
- - lib/pdf/reader/afm/Helvetica-BoldOblique.afm
281
274
  - lib/pdf/reader/afm/Helvetica.afm
282
- - lib/pdf/reader/afm/ZapfDingbats.afm
283
- - lib/pdf/reader/afm/Helvetica-Oblique.afm
284
- - lib/pdf/reader/afm/Times-Bold.afm
275
+ - lib/pdf/reader/afm/Courier-Bold.afm
285
276
  - lib/pdf/reader/afm/Times-Roman.afm
277
+ - lib/pdf/reader/afm/Helvetica-Bold.afm
278
+ - lib/pdf/reader/afm/Helvetica-Oblique.afm
279
+ - lib/pdf/reader/afm/Courier-Oblique.afm
280
+ - lib/pdf/reader/afm/ZapfDingbats.afm
281
+ - lib/pdf/reader/afm/Helvetica-BoldOblique.afm
286
282
  - lib/pdf/reader/afm/Courier.afm
287
- - lib/pdf/reader/token.rb
288
- - lib/pdf/reader/parser.rb
289
- - lib/pdf/reader/page_state.rb
290
- - lib/pdf/reader/error.rb
291
- - lib/pdf/reader/glyph_hash.rb
283
+ - lib/pdf/reader/afm/Times-BoldItalic.afm
284
+ - lib/pdf/reader/afm/Symbol.afm
285
+ - lib/pdf/reader/encoding.rb
292
286
  - lib/pdf/reader/width_calculator.rb
293
- - lib/pdf/reader/resource_methods.rb
294
- - lib/pdf/reader/standard_security_handler.rb
295
- - lib/pdf/reader/text_run.rb
296
- - lib/pdf/reader/form_xobject.rb
297
- - lib/pdf/reader/stream.rb
298
- - lib/pdf/reader/pages_strategy.rb
299
- - lib/pdf/reader/abstract_strategy.rb
300
- - lib/pdf/reader/metadata_strategy.rb
301
- - lib/pdf/reader/object_cache.rb
302
- - lib/pdf/reader/font_descriptor.rb
303
- - lib/pdf-reader.rb
287
+ - lib/pdf/reader/xref.rb
288
+ - lib/pdf/reader/filter/flate.rb
289
+ - lib/pdf/reader/filter/depredict.rb
290
+ - lib/pdf/reader/filter/null.rb
291
+ - lib/pdf/reader/filter/ascii_hex.rb
292
+ - lib/pdf/reader/filter/ascii85.rb
293
+ - lib/pdf/reader/filter/run_length.rb
294
+ - lib/pdf/reader/filter/lzw.rb
295
+ - lib/pdf/reader/width_calculator/built_in.rb
296
+ - lib/pdf/reader/width_calculator/true_type.rb
297
+ - lib/pdf/reader/width_calculator/composite.rb
298
+ - lib/pdf/reader/width_calculator/type_zero.rb
299
+ - lib/pdf/reader/width_calculator/type_one_or_three.rb
300
+ - lib/pdf/reader/page_text_receiver.rb
301
+ - lib/pdf/reader/lzw.rb
302
+ - lib/pdf/hash.rb
303
+ - lib/pdf/reader.rb
304
304
  - Rakefile
305
305
  - README.rdoc
306
306
  - TODO