pdf-reader 1.1.1 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG +87 -2
  3. data/{README.rdoc → README.md} +43 -31
  4. data/Rakefile +21 -16
  5. data/bin/pdf_callbacks +1 -1
  6. data/bin/pdf_object +4 -1
  7. data/bin/pdf_text +1 -3
  8. data/examples/callbacks.rb +2 -1
  9. data/examples/extract_images.rb +11 -6
  10. data/examples/fuzzy_paragraphs.rb +24 -0
  11. data/lib/pdf/reader/afm/Courier-Bold.afm +342 -0
  12. data/lib/pdf/reader/afm/Courier-BoldOblique.afm +342 -0
  13. data/lib/pdf/reader/afm/Courier-Oblique.afm +342 -0
  14. data/lib/pdf/reader/afm/Courier.afm +342 -0
  15. data/lib/pdf/reader/afm/Helvetica-Bold.afm +2827 -0
  16. data/lib/pdf/reader/afm/Helvetica-BoldOblique.afm +2827 -0
  17. data/lib/pdf/reader/afm/Helvetica-Oblique.afm +3051 -0
  18. data/lib/pdf/reader/afm/Helvetica.afm +3051 -0
  19. data/lib/pdf/reader/afm/MustRead.html +19 -0
  20. data/lib/pdf/reader/afm/Symbol.afm +213 -0
  21. data/lib/pdf/reader/afm/Times-Bold.afm +2588 -0
  22. data/lib/pdf/reader/afm/Times-BoldItalic.afm +2384 -0
  23. data/lib/pdf/reader/afm/Times-Italic.afm +2667 -0
  24. data/lib/pdf/reader/afm/Times-Roman.afm +2419 -0
  25. data/lib/pdf/reader/afm/ZapfDingbats.afm +225 -0
  26. data/lib/pdf/reader/buffer.rb +90 -63
  27. data/lib/pdf/reader/cid_widths.rb +63 -0
  28. data/lib/pdf/reader/cmap.rb +69 -38
  29. data/lib/pdf/reader/encoding.rb +74 -48
  30. data/lib/pdf/reader/error.rb +24 -4
  31. data/lib/pdf/reader/filter/ascii85.rb +28 -0
  32. data/lib/pdf/reader/filter/ascii_hex.rb +30 -0
  33. data/lib/pdf/reader/filter/depredict.rb +141 -0
  34. data/lib/pdf/reader/filter/flate.rb +53 -0
  35. data/lib/pdf/reader/filter/lzw.rb +21 -0
  36. data/lib/pdf/reader/filter/null.rb +18 -0
  37. data/lib/pdf/reader/filter/run_length.rb +45 -0
  38. data/lib/pdf/reader/filter.rb +15 -234
  39. data/lib/pdf/reader/font.rb +107 -43
  40. data/lib/pdf/reader/font_descriptor.rb +80 -0
  41. data/lib/pdf/reader/form_xobject.rb +26 -4
  42. data/lib/pdf/reader/glyph_hash.rb +56 -18
  43. data/lib/pdf/reader/lzw.rb +6 -4
  44. data/lib/pdf/reader/null_security_handler.rb +17 -0
  45. data/lib/pdf/reader/object_cache.rb +40 -16
  46. data/lib/pdf/reader/object_hash.rb +94 -40
  47. data/lib/pdf/reader/object_stream.rb +1 -0
  48. data/lib/pdf/reader/orientation_detector.rb +34 -0
  49. data/lib/pdf/reader/overlapping_runs_filter.rb +65 -0
  50. data/lib/pdf/reader/page.rb +48 -3
  51. data/lib/pdf/reader/page_layout.rb +125 -0
  52. data/lib/pdf/reader/page_state.rb +185 -70
  53. data/lib/pdf/reader/page_text_receiver.rb +70 -20
  54. data/lib/pdf/reader/pages_strategy.rb +4 -293
  55. data/lib/pdf/reader/parser.rb +37 -61
  56. data/lib/pdf/reader/print_receiver.rb +6 -0
  57. data/lib/pdf/reader/reference.rb +4 -1
  58. data/lib/pdf/reader/register_receiver.rb +17 -31
  59. data/lib/pdf/reader/resource_methods.rb +1 -0
  60. data/lib/pdf/reader/standard_security_handler.rb +82 -42
  61. data/lib/pdf/reader/standard_security_handler_v5.rb +91 -0
  62. data/lib/pdf/reader/stream.rb +5 -2
  63. data/lib/pdf/reader/synchronized_cache.rb +33 -0
  64. data/lib/pdf/reader/text_run.rb +99 -0
  65. data/lib/pdf/reader/token.rb +4 -1
  66. data/lib/pdf/reader/transformation_matrix.rb +195 -0
  67. data/lib/pdf/reader/unimplemented_security_handler.rb +17 -0
  68. data/lib/pdf/reader/width_calculator/built_in.rb +67 -0
  69. data/lib/pdf/reader/width_calculator/composite.rb +28 -0
  70. data/lib/pdf/reader/width_calculator/true_type.rb +56 -0
  71. data/lib/pdf/reader/width_calculator/type_one_or_three.rb +33 -0
  72. data/lib/pdf/reader/width_calculator/type_zero.rb +25 -0
  73. data/lib/pdf/reader/width_calculator.rb +12 -0
  74. data/lib/pdf/reader/xref.rb +41 -9
  75. data/lib/pdf/reader.rb +45 -104
  76. data/lib/pdf-reader.rb +4 -1
  77. metadata +220 -101
  78. data/bin/pdf_list_callbacks +0 -17
  79. data/lib/pdf/hash.rb +0 -15
  80. data/lib/pdf/reader/abstract_strategy.rb +0 -81
  81. data/lib/pdf/reader/metadata_strategy.rb +0 -56
  82. data/lib/pdf/reader/text_receiver.rb +0 -264
@@ -1,3 +1,6 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
1
4
  ################################################################################
2
5
  #
3
6
  # Copyright (C) 2008 James Healy (jimmy@deefa.com)
@@ -23,41 +26,29 @@
23
26
  #
24
27
  ################################################################################
25
28
 
29
+ require 'pdf/reader/width_calculator'
30
+
26
31
  class PDF::Reader
32
+ # Represents a single font PDF object and provides some useful methods
33
+ # for extracting info. Mainly used for converting text to UTF-8.
34
+ #
27
35
  class Font
28
- attr_accessor :label, :subtype, :encoding, :descendantfonts, :tounicode
29
- attr_reader :widths, :first_char, :ascent, :descent, :missing_width, :bbox
30
- attr_reader :basefont
31
-
32
- def initialize(ohash = nil, obj = nil)
33
- if ohash.nil? || obj.nil?
34
- $stderr.puts "DEPREACTION WARNING - PDF::Reader::Font.new should be called with 2 args"
35
- return
36
- end
36
+ attr_accessor :subtype, :encoding, :descendantfonts, :tounicode
37
+ attr_reader :widths, :first_char, :last_char, :basefont, :font_descriptor,
38
+ :cid_widths, :cid_default_width
39
+
40
+ def initialize(ohash, obj)
37
41
  @ohash = ohash
38
42
  @tounicode = nil
39
43
 
40
44
  extract_base_info(obj)
41
45
  extract_descriptor(obj)
42
46
  extract_descendants(obj)
47
+ @width_calc = build_width_calculator
43
48
 
44
49
  @encoding ||= PDF::Reader::Encoding.new(:StandardEncoding)
45
50
  end
46
51
 
47
- def basefont=(font)
48
- # setup a default encoding for the selected font. It can always be overridden
49
- # with encoding= if required
50
- case font
51
- when "Symbol" then
52
- @encoding = PDF::Reader::Encoding.new("SymbolEncoding")
53
- when "ZapfDingbats" then
54
- @encoding = PDF::Reader::Encoding.new("ZapfDingbatsEncoding")
55
- else
56
- @encoding = nil
57
- end
58
- @basefont = font
59
- end
60
-
61
52
  def to_utf8(params)
62
53
  if @tounicode
63
54
  to_utf8_via_cmap(params)
@@ -66,39 +57,102 @@ class PDF::Reader
66
57
  end
67
58
  end
68
59
 
69
- def glyph_width(c)
70
- @missing_width ||= 0
71
- @widths ||= []
72
- @widths.fetch(c - @first_char, @missing_width)
60
+ def unpack(data)
61
+ data.unpack(encoding.unpack)
62
+ end
63
+
64
+ # looks up the specified codepoint and returns a value that is in (pdf)
65
+ # glyph space, which is 1000 glyph units = 1 text space unit
66
+ def glyph_width(code_point)
67
+ if code_point.is_a?(String)
68
+ code_point = code_point.unpack(encoding.unpack).first
69
+ end
70
+
71
+ @cached_widths ||= {}
72
+ @cached_widths[code_point] ||= @width_calc.glyph_width(code_point)
73
73
  end
74
74
 
75
75
  private
76
76
 
77
+ def default_encoding(font_name)
78
+ case font_name.to_s
79
+ when "Symbol" then
80
+ PDF::Reader::Encoding.new(:SymbolEncoding)
81
+ when "ZapfDingbats" then
82
+ PDF::Reader::Encoding.new(:ZapfDingbatsEncoding)
83
+ else
84
+ PDF::Reader::Encoding.new(:StandardEncoding)
85
+ end
86
+ end
87
+
88
+ def build_width_calculator
89
+ if @subtype == :Type0
90
+ PDF::Reader::WidthCalculator::TypeZero.new(self)
91
+ elsif @subtype == :Type1
92
+ if @font_descriptor.nil?
93
+ PDF::Reader::WidthCalculator::BuiltIn.new(self)
94
+ else
95
+ PDF::Reader::WidthCalculator::TypeOneOrThree .new(self)
96
+ end
97
+ elsif @subtype == :Type3
98
+ PDF::Reader::WidthCalculator::TypeOneOrThree.new(self)
99
+ elsif @subtype == :TrueType
100
+ if @font_descriptor
101
+ PDF::Reader::WidthCalculator::TrueType.new(self)
102
+ else
103
+ # A TrueType font that isn't embedded. Most readers look for a version on the
104
+ # local system and fallback to a substitute. For now, we go straight to a substitute
105
+ PDF::Reader::WidthCalculator::BuiltIn.new(self)
106
+ end
107
+ elsif @subtype == :CIDFontType0 || @subtype == :CIDFontType2
108
+ PDF::Reader::WidthCalculator::Composite.new(self)
109
+ else
110
+ PDF::Reader::WidthCalculator::TypeOneOrThree.new(self)
111
+ end
112
+ end
113
+
77
114
  def extract_base_info(obj)
78
115
  @subtype = @ohash.object(obj[:Subtype])
79
116
  @basefont = @ohash.object(obj[:BaseFont])
80
- @encoding = PDF::Reader::Encoding.new(@ohash.object(obj[:Encoding]))
117
+ if @ohash.object(obj[:Encoding])
118
+ @encoding = PDF::Reader::Encoding.new(@ohash.object(obj[:Encoding]))
119
+ else
120
+ @encoding = default_encoding(@basefont)
121
+ end
81
122
  @widths = @ohash.object(obj[:Widths]) || []
82
123
  @first_char = @ohash.object(obj[:FirstChar])
124
+ @last_char = @ohash.object(obj[:LastChar])
125
+
126
+ # CID Fonts are not required to have a W or DW entry, if they don't exist,
127
+ # the default cid width = 1000, see Section 9.7.4.1 PDF 32000-1:2008 pp 269
128
+ @cid_widths = @ohash.object(obj[:W]) || []
129
+ @cid_default_width = @ohash.object(obj[:DW]) || 1000
130
+
83
131
  if obj[:ToUnicode]
132
+ # ToUnicode is optional for Type1 and Type3
84
133
  stream = @ohash.object(obj[:ToUnicode])
85
- @tounicode = PDF::Reader::CMap.new(stream.unfiltered_data)
134
+ if stream.is_a?(PDF::Reader::Stream)
135
+ @tounicode = PDF::Reader::CMap.new(stream.unfiltered_data)
136
+ end
86
137
  end
87
138
  end
88
139
 
89
140
  def extract_descriptor(obj)
90
- return unless obj[:FontDescriptor]
91
-
92
- fd = @ohash.object(obj[:FontDescriptor])
93
- @ascent = @ohash.object(fd[:Ascent])
94
- @descent = @ohash.object(fd[:Descent])
95
- @missing_width = @ohash.object(fd[:MissingWidth])
96
- @bbox = @ohash.object(fd[:FontBBox])
141
+ if obj[:FontDescriptor]
142
+ # create a font descriptor object if we can, in other words, unless this is
143
+ # a CID Font
144
+ fd = @ohash.object(obj[:FontDescriptor])
145
+ @font_descriptor = PDF::Reader::FontDescriptor.new(@ohash, fd)
146
+ else
147
+ @font_descriptor = nil
148
+ end
97
149
  end
98
150
 
99
151
  def extract_descendants(obj)
100
152
  return unless obj[:DescendantFonts]
101
-
153
+ # per PDF 32000-1:2008 pp. 280 :DescendentFonts is:
154
+ # A one-element array specifying the CIDFont dictionary that is the
155
+ # descendant of this Type 0 font.
102
156
  descendants = @ohash.object(obj[:DescendantFonts])
103
157
  @descendantfonts = descendants.map { |desc|
104
158
  PDF::Reader::Font.new(@ohash, @ohash.object(desc))
@@ -106,11 +160,16 @@ class PDF::Reader
106
160
  end
107
161
 
108
162
  def to_utf8_via_cmap(params)
109
- if params.class == String
163
+ case params
164
+ when Integer
165
+ [
166
+ @tounicode.decode(params) || PDF::Reader::Encoding::UNKNOWN_CHAR
167
+ ].flatten.pack("U*")
168
+ when String
110
169
  params.unpack(encoding.unpack).map { |c|
111
170
  @tounicode.decode(c) || PDF::Reader::Encoding::UNKNOWN_CHAR
112
- }.pack("U*")
113
- elsif params.class == Array
171
+ }.flatten.pack("U*")
172
+ when Array
114
173
  params.collect { |param| to_utf8_via_cmap(param) }
115
174
  else
116
175
  params
@@ -118,11 +177,16 @@ class PDF::Reader
118
177
  end
119
178
 
120
179
  def to_utf8_via_encoding(params)
121
- raise UnsupportedFeatureError, "font encoding '#{encoding}' currently unsupported" if encoding.kind_of?(String)
180
+ if encoding.kind_of?(String)
181
+ raise UnsupportedFeatureError, "font encoding '#{encoding}' currently unsupported"
182
+ end
122
183
 
123
- if params.class == String
184
+ case params
185
+ when Integer
186
+ encoding.int_to_utf8_string(params)
187
+ when String
124
188
  encoding.to_utf8(params)
125
- elsif params.class == Array
189
+ when Array
126
190
  params.collect { |param| to_utf8_via_encoding(param) }
127
191
  else
128
192
  params
@@ -0,0 +1,80 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ require 'ttfunk'
5
+
6
+ class PDF::Reader
7
+
8
+ # Font descriptors are outlined in Section 9.8, PDF 32000-1:2008, pp 281-288
9
+ class FontDescriptor
10
+
11
+ attr_reader :font_name, :font_family, :font_stretch, :font_weight,
12
+ :font_bounding_box, :cap_height, :ascent, :descent, :leading,
13
+ :avg_width, :max_width, :missing_width, :italic_angle, :stem_v,
14
+ :x_height, :font_flags
15
+
16
+ def initialize(ohash, fd_hash)
17
+ @ascent = ohash.object(fd_hash[:Ascent]) || 0
18
+ @descent = ohash.object(fd_hash[:Descent]) || 0
19
+ @missing_width = ohash.object(fd_hash[:MissingWidth]) || 0
20
+ @font_bounding_box = ohash.object(fd_hash[:FontBBox]) || [0,0,0,0]
21
+ @avg_width = ohash.object(fd_hash[:AvgWidth]) || 0
22
+ @cap_height = ohash.object(fd_hash[:CapHeight]) || 0
23
+ @font_flags = ohash.object(fd_hash[:Flags]) || 0
24
+ @italic_angle = ohash.object(fd_hash[:ItalicAngle])
25
+ @font_name = ohash.object(fd_hash[:FontName]).to_s
26
+ @leading = ohash.object(fd_hash[:Leading]) || 0
27
+ @max_width = ohash.object(fd_hash[:MaxWidth]) || 0
28
+ @stem_v = ohash.object(fd_hash[:StemV])
29
+ @x_height = ohash.object(fd_hash[:XHeight])
30
+ @font_stretch = ohash.object(fd_hash[:FontStretch]) || :Normal
31
+ @font_weight = ohash.object(fd_hash[:FontWeight]) || 400
32
+ @font_family = ohash.object(fd_hash[:FontFamily])
33
+
34
+ # A FontDescriptor may have an embedded font program in FontFile
35
+ # (Type 1 Font Program), FontFile2 (TrueType font program), or
36
+ # FontFile3 (Other font program as defined by Subtype entry)
37
+ # Subtype entries:
38
+ # 1) Type1C: Type 1 Font Program in Compact Font Format
39
+ # 2) CIDFontType0C: Type 0 Font Program in Compact Font Format
40
+ # 3) OpenType: OpenType Font Program
41
+ # see Section 9.9, PDF 32000-1:2008, pp 288-292
42
+ @font_program_stream = ohash.object(fd_hash[:FontFile2])
43
+ #TODO handle FontFile and FontFile3
44
+
45
+ @is_ttf = true if @font_program_stream
46
+ end
47
+
48
+ def glyph_width(char_code)
49
+ if @is_ttf
50
+ if ttf_program_stream.cmap.unicode.length > 0
51
+ glyph_id = ttf_program_stream.cmap.unicode.first[char_code]
52
+ else
53
+ glyph_id = char_code
54
+ end
55
+ char_metric = ttf_program_stream.horizontal_metrics.metrics[glyph_id]
56
+ if char_metric
57
+ return char_metric.advance_width
58
+ end
59
+ end
60
+ end
61
+
62
+ # PDF states that a glyph is 1000 units wide, true type doesn't enforce
63
+ # any behavior, but uses units/em to define how wide the 'M' is (the widest letter)
64
+ def glyph_to_pdf_scale_factor
65
+ if @is_ttf
66
+ @glyph_to_pdf_sf ||= (1.0 / ttf_program_stream.header.units_per_em) * 1000.0
67
+ else
68
+ @glyph_to_pdf_sf ||= 1.0
69
+ end
70
+ @glyph_to_pdf_sf
71
+ end
72
+
73
+ private
74
+
75
+ def ttf_program_stream
76
+ @ttf_program_stream ||= TTFunk::File.new(@font_program_stream.unfiltered_data)
77
+ end
78
+ end
79
+
80
+ end
@@ -1,4 +1,7 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ require 'digest/md5'
2
5
 
3
6
  module PDF
4
7
  class Reader
@@ -15,9 +18,10 @@ module PDF
15
18
 
16
19
  attr_reader :xobject
17
20
 
18
- def initialize(page, xobject)
21
+ def initialize(page, xobject, options = {})
19
22
  @page = page
20
23
  @objects = page.objects
24
+ @cache = options[:cache] || {}
21
25
  @xobject = @objects.deref(xobject)
22
26
  end
23
27
 
@@ -65,12 +69,30 @@ module PDF
65
69
  end
66
70
  end
67
71
 
72
+ def content_stream_md5
73
+ @content_stream_md5 ||= Digest::MD5.hexdigest(raw_content)
74
+ end
75
+
76
+ def cached_tokens_key
77
+ @cached_tokens_key ||= "tokens-#{content_stream_md5}"
78
+ end
79
+
80
+ def tokens
81
+ @cache[cached_tokens_key] ||= begin
82
+ buffer = Buffer.new(StringIO.new(raw_content), :content_stream => true)
83
+ parser = Parser.new(buffer, @objects)
84
+ result = []
85
+ while (token = parser.parse_token(PagesStrategy::OPERATORS))
86
+ result << token
87
+ end
88
+ result
89
+ end
90
+ end
91
+
68
92
  def content_stream(receivers, instructions)
69
- buffer = Buffer.new(StringIO.new(instructions), :content_stream => true)
70
- parser = Parser.new(buffer, @objects)
71
93
  params = []
72
94
 
73
- while (token = parser.parse_token(PagesStrategy::OPERATORS))
95
+ tokens.each do |token|
74
96
  if token.kind_of?(Token) and PagesStrategy::OPERATORS.has_key?(token)
75
97
  callback(receivers, PagesStrategy::OPERATORS[token], params)
76
98
  params.clear
@@ -1,3 +1,6 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
1
4
  ################################################################################
2
5
  #
3
6
  # Copyright (C) 2011 James Healy (jimmy@deefa.com)
@@ -24,9 +27,15 @@
24
27
  ################################################################################
25
28
 
26
29
  class PDF::Reader
30
+ # A Hash-like object that can convert glyph names into a unicode codepoint.
31
+ # The mapping is read from a data file on disk the first time it's needed.
32
+ #
27
33
  class GlyphHash # :nodoc:
28
34
  def initialize
29
- @adobe = load_adobe_glyph_mapping
35
+ # only parse the glyph list once, and cache the results (for performance)
36
+ adobe = @@cache ||= load_adobe_glyph_mapping
37
+ @by_name = adobe.first
38
+ @by_codepoint = adobe.last
30
39
  end
31
40
 
32
41
  # attempt to convert a PDF Name to a unicode codepoint. Returns nil
@@ -34,55 +43,84 @@ class PDF::Reader
34
43
  #
35
44
  # h = GlyphHash.new
36
45
  #
37
- # h[:A]
46
+ # h.name_to_unicode(:A)
38
47
  # => 65
39
48
  #
40
- # h[:Euro]
49
+ # h.name_to_unicode(:Euro)
41
50
  # => 8364
42
51
  #
43
- # h[:G30]
52
+ # h.name_to_unicode(:X4A)
53
+ # => 74
54
+ #
55
+ # h.name_to_unicode(:G30)
44
56
  # => 48
45
57
  #
46
- # h[:34]
58
+ # h.name_to_unicode(:34)
59
+ # => 34
47
60
  #
48
- def [](name)
61
+ def name_to_unicode(name)
49
62
  return nil unless name.is_a?(Symbol)
50
63
 
51
64
  name = name.to_s.gsub('_', '').intern
52
65
  str = name.to_s
53
66
 
54
- if @adobe.has_key?(name)
55
- @adobe[name]
67
+ if @by_name.has_key?(name)
68
+ @by_name[name]
69
+ elsif str.match(/\AX[0-9a-fA-F]{2,4}\Z/)
70
+ "0x#{str[1,4]}".hex
56
71
  elsif str.match(/\Auni[A-F\d]{4}\Z/)
57
72
  "0x#{str[3,4]}".hex
58
73
  elsif str.match(/\Au[A-F\d]{4,6}\Z/)
59
74
  "0x#{str[1,6]}".hex
60
- elsif str.match(/\A[A-Za-z]\d{1,4}\Z/)
61
- str[1,4].to_i
62
- elsif str.match(/\A[A-Za-z]{2}\d{2,4}\Z/)
63
- str[2,4].to_i
75
+ elsif str.match(/\A[A-Za-z]\d{1,5}\Z/)
76
+ str[1,5].to_i
77
+ elsif str.match(/\A[A-Za-z]{2}\d{2,5}\Z/)
78
+ str[2,5].to_i
64
79
  else
65
80
  nil
66
81
  end
67
82
  end
68
83
 
84
+ # attempt to convert a Unicode code point to the equivilant PDF Name. Returns nil
85
+ # if no conversion is possible.
86
+ #
87
+ # h = GlyphHash.new
88
+ #
89
+ # h.unicode_to_name(65)
90
+ # => [:A]
91
+ #
92
+ # h.unicode_to_name(8364)
93
+ # => [:Euro]
94
+ #
95
+ # h.unicode_to_name(34)
96
+ # => [:34]
97
+ #
98
+ def unicode_to_name(codepoint)
99
+ @by_codepoint[codepoint.to_i] || []
100
+ end
101
+
69
102
  private
70
103
 
71
104
  # returns a hash that maps glyph names to unicode codepoints. The mapping is based on
72
105
  # a text file supplied by Adobe at:
73
106
  # http://www.adobe.com/devnet/opentype/archives/glyphlist.txt
74
107
  def load_adobe_glyph_mapping
75
- glyphs = {}
108
+ keyed_by_name = {}
109
+ keyed_by_codepoint = {}
76
110
 
77
- RUBY_VERSION >= "1.9" ? mode = "r:BINARY" : mode = "r"
78
- File.open(File.dirname(__FILE__) + "/glyphlist.txt", mode) do |f|
111
+ File.open(File.dirname(__FILE__) + "/glyphlist.txt", "r:BINARY") do |f|
79
112
  f.each do |l|
80
- m, name, code = *l.match(/([0-9A-Za-z]+);([0-9A-F]{4})/)
81
- glyphs[name.to_sym] = "0x#{code}".hex if name
113
+ _m, name, code = *l.match(/([0-9A-Za-z]+);([0-9A-F]{4})/)
114
+ if name && code
115
+ cp = "0x#{code}".hex
116
+ keyed_by_name[name.to_sym] = cp
117
+ keyed_by_codepoint[cp] ||= []
118
+ keyed_by_codepoint[cp] << name.to_sym
119
+ end
82
120
  end
83
121
  end
84
122
 
85
- glyphs
123
+ [keyed_by_name.freeze, keyed_by_codepoint.freeze]
86
124
  end
87
125
 
88
126
  end