pdf-reader 1.1.1 → 2.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (82) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG +87 -2
  3. data/{README.rdoc → README.md} +43 -31
  4. data/Rakefile +21 -16
  5. data/bin/pdf_callbacks +1 -1
  6. data/bin/pdf_object +4 -1
  7. data/bin/pdf_text +1 -3
  8. data/examples/callbacks.rb +2 -1
  9. data/examples/extract_images.rb +11 -6
  10. data/examples/fuzzy_paragraphs.rb +24 -0
  11. data/lib/pdf/reader/afm/Courier-Bold.afm +342 -0
  12. data/lib/pdf/reader/afm/Courier-BoldOblique.afm +342 -0
  13. data/lib/pdf/reader/afm/Courier-Oblique.afm +342 -0
  14. data/lib/pdf/reader/afm/Courier.afm +342 -0
  15. data/lib/pdf/reader/afm/Helvetica-Bold.afm +2827 -0
  16. data/lib/pdf/reader/afm/Helvetica-BoldOblique.afm +2827 -0
  17. data/lib/pdf/reader/afm/Helvetica-Oblique.afm +3051 -0
  18. data/lib/pdf/reader/afm/Helvetica.afm +3051 -0
  19. data/lib/pdf/reader/afm/MustRead.html +19 -0
  20. data/lib/pdf/reader/afm/Symbol.afm +213 -0
  21. data/lib/pdf/reader/afm/Times-Bold.afm +2588 -0
  22. data/lib/pdf/reader/afm/Times-BoldItalic.afm +2384 -0
  23. data/lib/pdf/reader/afm/Times-Italic.afm +2667 -0
  24. data/lib/pdf/reader/afm/Times-Roman.afm +2419 -0
  25. data/lib/pdf/reader/afm/ZapfDingbats.afm +225 -0
  26. data/lib/pdf/reader/buffer.rb +90 -63
  27. data/lib/pdf/reader/cid_widths.rb +63 -0
  28. data/lib/pdf/reader/cmap.rb +69 -38
  29. data/lib/pdf/reader/encoding.rb +74 -48
  30. data/lib/pdf/reader/error.rb +24 -4
  31. data/lib/pdf/reader/filter/ascii85.rb +28 -0
  32. data/lib/pdf/reader/filter/ascii_hex.rb +30 -0
  33. data/lib/pdf/reader/filter/depredict.rb +141 -0
  34. data/lib/pdf/reader/filter/flate.rb +53 -0
  35. data/lib/pdf/reader/filter/lzw.rb +21 -0
  36. data/lib/pdf/reader/filter/null.rb +18 -0
  37. data/lib/pdf/reader/filter/run_length.rb +45 -0
  38. data/lib/pdf/reader/filter.rb +15 -234
  39. data/lib/pdf/reader/font.rb +107 -43
  40. data/lib/pdf/reader/font_descriptor.rb +80 -0
  41. data/lib/pdf/reader/form_xobject.rb +26 -4
  42. data/lib/pdf/reader/glyph_hash.rb +56 -18
  43. data/lib/pdf/reader/lzw.rb +6 -4
  44. data/lib/pdf/reader/null_security_handler.rb +17 -0
  45. data/lib/pdf/reader/object_cache.rb +40 -16
  46. data/lib/pdf/reader/object_hash.rb +94 -40
  47. data/lib/pdf/reader/object_stream.rb +1 -0
  48. data/lib/pdf/reader/orientation_detector.rb +34 -0
  49. data/lib/pdf/reader/overlapping_runs_filter.rb +65 -0
  50. data/lib/pdf/reader/page.rb +48 -3
  51. data/lib/pdf/reader/page_layout.rb +125 -0
  52. data/lib/pdf/reader/page_state.rb +185 -70
  53. data/lib/pdf/reader/page_text_receiver.rb +70 -20
  54. data/lib/pdf/reader/pages_strategy.rb +4 -293
  55. data/lib/pdf/reader/parser.rb +37 -61
  56. data/lib/pdf/reader/print_receiver.rb +6 -0
  57. data/lib/pdf/reader/reference.rb +4 -1
  58. data/lib/pdf/reader/register_receiver.rb +17 -31
  59. data/lib/pdf/reader/resource_methods.rb +1 -0
  60. data/lib/pdf/reader/standard_security_handler.rb +82 -42
  61. data/lib/pdf/reader/standard_security_handler_v5.rb +91 -0
  62. data/lib/pdf/reader/stream.rb +5 -2
  63. data/lib/pdf/reader/synchronized_cache.rb +33 -0
  64. data/lib/pdf/reader/text_run.rb +99 -0
  65. data/lib/pdf/reader/token.rb +4 -1
  66. data/lib/pdf/reader/transformation_matrix.rb +195 -0
  67. data/lib/pdf/reader/unimplemented_security_handler.rb +17 -0
  68. data/lib/pdf/reader/width_calculator/built_in.rb +67 -0
  69. data/lib/pdf/reader/width_calculator/composite.rb +28 -0
  70. data/lib/pdf/reader/width_calculator/true_type.rb +56 -0
  71. data/lib/pdf/reader/width_calculator/type_one_or_three.rb +33 -0
  72. data/lib/pdf/reader/width_calculator/type_zero.rb +25 -0
  73. data/lib/pdf/reader/width_calculator.rb +12 -0
  74. data/lib/pdf/reader/xref.rb +41 -9
  75. data/lib/pdf/reader.rb +45 -104
  76. data/lib/pdf-reader.rb +4 -1
  77. metadata +220 -101
  78. data/bin/pdf_list_callbacks +0 -17
  79. data/lib/pdf/hash.rb +0 -15
  80. data/lib/pdf/reader/abstract_strategy.rb +0 -81
  81. data/lib/pdf/reader/metadata_strategy.rb +0 -56
  82. data/lib/pdf/reader/text_receiver.rb +0 -264
@@ -1,3 +1,6 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
1
4
  ################################################################################
2
5
  #
3
6
  # Copyright (C) 2008 James Healy (jimmy@deefa.com)
@@ -23,41 +26,29 @@
23
26
  #
24
27
  ################################################################################
25
28
 
29
+ require 'pdf/reader/width_calculator'
30
+
26
31
  class PDF::Reader
32
+ # Represents a single font PDF object and provides some useful methods
33
+ # for extracting info. Mainly used for converting text to UTF-8.
34
+ #
27
35
  class Font
28
- attr_accessor :label, :subtype, :encoding, :descendantfonts, :tounicode
29
- attr_reader :widths, :first_char, :ascent, :descent, :missing_width, :bbox
30
- attr_reader :basefont
31
-
32
- def initialize(ohash = nil, obj = nil)
33
- if ohash.nil? || obj.nil?
34
- $stderr.puts "DEPREACTION WARNING - PDF::Reader::Font.new should be called with 2 args"
35
- return
36
- end
36
+ attr_accessor :subtype, :encoding, :descendantfonts, :tounicode
37
+ attr_reader :widths, :first_char, :last_char, :basefont, :font_descriptor,
38
+ :cid_widths, :cid_default_width
39
+
40
+ def initialize(ohash, obj)
37
41
  @ohash = ohash
38
42
  @tounicode = nil
39
43
 
40
44
  extract_base_info(obj)
41
45
  extract_descriptor(obj)
42
46
  extract_descendants(obj)
47
+ @width_calc = build_width_calculator
43
48
 
44
49
  @encoding ||= PDF::Reader::Encoding.new(:StandardEncoding)
45
50
  end
46
51
 
47
- def basefont=(font)
48
- # setup a default encoding for the selected font. It can always be overridden
49
- # with encoding= if required
50
- case font
51
- when "Symbol" then
52
- @encoding = PDF::Reader::Encoding.new("SymbolEncoding")
53
- when "ZapfDingbats" then
54
- @encoding = PDF::Reader::Encoding.new("ZapfDingbatsEncoding")
55
- else
56
- @encoding = nil
57
- end
58
- @basefont = font
59
- end
60
-
61
52
  def to_utf8(params)
62
53
  if @tounicode
63
54
  to_utf8_via_cmap(params)
@@ -66,39 +57,102 @@ class PDF::Reader
66
57
  end
67
58
  end
68
59
 
69
- def glyph_width(c)
70
- @missing_width ||= 0
71
- @widths ||= []
72
- @widths.fetch(c - @first_char, @missing_width)
60
+ def unpack(data)
61
+ data.unpack(encoding.unpack)
62
+ end
63
+
64
+ # looks up the specified codepoint and returns a value that is in (pdf)
65
+ # glyph space, which is 1000 glyph units = 1 text space unit
66
+ def glyph_width(code_point)
67
+ if code_point.is_a?(String)
68
+ code_point = code_point.unpack(encoding.unpack).first
69
+ end
70
+
71
+ @cached_widths ||= {}
72
+ @cached_widths[code_point] ||= @width_calc.glyph_width(code_point)
73
73
  end
74
74
 
75
75
  private
76
76
 
77
+ def default_encoding(font_name)
78
+ case font_name.to_s
79
+ when "Symbol" then
80
+ PDF::Reader::Encoding.new(:SymbolEncoding)
81
+ when "ZapfDingbats" then
82
+ PDF::Reader::Encoding.new(:ZapfDingbatsEncoding)
83
+ else
84
+ PDF::Reader::Encoding.new(:StandardEncoding)
85
+ end
86
+ end
87
+
88
+ def build_width_calculator
89
+ if @subtype == :Type0
90
+ PDF::Reader::WidthCalculator::TypeZero.new(self)
91
+ elsif @subtype == :Type1
92
+ if @font_descriptor.nil?
93
+ PDF::Reader::WidthCalculator::BuiltIn.new(self)
94
+ else
95
+ PDF::Reader::WidthCalculator::TypeOneOrThree .new(self)
96
+ end
97
+ elsif @subtype == :Type3
98
+ PDF::Reader::WidthCalculator::TypeOneOrThree.new(self)
99
+ elsif @subtype == :TrueType
100
+ if @font_descriptor
101
+ PDF::Reader::WidthCalculator::TrueType.new(self)
102
+ else
103
+ # A TrueType font that isn't embedded. Most readers look for a version on the
104
+ # local system and fallback to a substitute. For now, we go straight to a substitute
105
+ PDF::Reader::WidthCalculator::BuiltIn.new(self)
106
+ end
107
+ elsif @subtype == :CIDFontType0 || @subtype == :CIDFontType2
108
+ PDF::Reader::WidthCalculator::Composite.new(self)
109
+ else
110
+ PDF::Reader::WidthCalculator::TypeOneOrThree.new(self)
111
+ end
112
+ end
113
+
77
114
  def extract_base_info(obj)
78
115
  @subtype = @ohash.object(obj[:Subtype])
79
116
  @basefont = @ohash.object(obj[:BaseFont])
80
- @encoding = PDF::Reader::Encoding.new(@ohash.object(obj[:Encoding]))
117
+ if @ohash.object(obj[:Encoding])
118
+ @encoding = PDF::Reader::Encoding.new(@ohash.object(obj[:Encoding]))
119
+ else
120
+ @encoding = default_encoding(@basefont)
121
+ end
81
122
  @widths = @ohash.object(obj[:Widths]) || []
82
123
  @first_char = @ohash.object(obj[:FirstChar])
124
+ @last_char = @ohash.object(obj[:LastChar])
125
+
126
+ # CID Fonts are not required to have a W or DW entry, if they don't exist,
127
+ # the default cid width = 1000, see Section 9.7.4.1 PDF 32000-1:2008 pp 269
128
+ @cid_widths = @ohash.object(obj[:W]) || []
129
+ @cid_default_width = @ohash.object(obj[:DW]) || 1000
130
+
83
131
  if obj[:ToUnicode]
132
+ # ToUnicode is optional for Type1 and Type3
84
133
  stream = @ohash.object(obj[:ToUnicode])
85
- @tounicode = PDF::Reader::CMap.new(stream.unfiltered_data)
134
+ if stream.is_a?(PDF::Reader::Stream)
135
+ @tounicode = PDF::Reader::CMap.new(stream.unfiltered_data)
136
+ end
86
137
  end
87
138
  end
88
139
 
89
140
  def extract_descriptor(obj)
90
- return unless obj[:FontDescriptor]
91
-
92
- fd = @ohash.object(obj[:FontDescriptor])
93
- @ascent = @ohash.object(fd[:Ascent])
94
- @descent = @ohash.object(fd[:Descent])
95
- @missing_width = @ohash.object(fd[:MissingWidth])
96
- @bbox = @ohash.object(fd[:FontBBox])
141
+ if obj[:FontDescriptor]
142
+ # create a font descriptor object if we can, in other words, unless this is
143
+ # a CID Font
144
+ fd = @ohash.object(obj[:FontDescriptor])
145
+ @font_descriptor = PDF::Reader::FontDescriptor.new(@ohash, fd)
146
+ else
147
+ @font_descriptor = nil
148
+ end
97
149
  end
98
150
 
99
151
  def extract_descendants(obj)
100
152
  return unless obj[:DescendantFonts]
101
-
153
+ # per PDF 32000-1:2008 pp. 280 :DescendentFonts is:
154
+ # A one-element array specifying the CIDFont dictionary that is the
155
+ # descendant of this Type 0 font.
102
156
  descendants = @ohash.object(obj[:DescendantFonts])
103
157
  @descendantfonts = descendants.map { |desc|
104
158
  PDF::Reader::Font.new(@ohash, @ohash.object(desc))
@@ -106,11 +160,16 @@ class PDF::Reader
106
160
  end
107
161
 
108
162
  def to_utf8_via_cmap(params)
109
- if params.class == String
163
+ case params
164
+ when Integer
165
+ [
166
+ @tounicode.decode(params) || PDF::Reader::Encoding::UNKNOWN_CHAR
167
+ ].flatten.pack("U*")
168
+ when String
110
169
  params.unpack(encoding.unpack).map { |c|
111
170
  @tounicode.decode(c) || PDF::Reader::Encoding::UNKNOWN_CHAR
112
- }.pack("U*")
113
- elsif params.class == Array
171
+ }.flatten.pack("U*")
172
+ when Array
114
173
  params.collect { |param| to_utf8_via_cmap(param) }
115
174
  else
116
175
  params
@@ -118,11 +177,16 @@ class PDF::Reader
118
177
  end
119
178
 
120
179
  def to_utf8_via_encoding(params)
121
- raise UnsupportedFeatureError, "font encoding '#{encoding}' currently unsupported" if encoding.kind_of?(String)
180
+ if encoding.kind_of?(String)
181
+ raise UnsupportedFeatureError, "font encoding '#{encoding}' currently unsupported"
182
+ end
122
183
 
123
- if params.class == String
184
+ case params
185
+ when Integer
186
+ encoding.int_to_utf8_string(params)
187
+ when String
124
188
  encoding.to_utf8(params)
125
- elsif params.class == Array
189
+ when Array
126
190
  params.collect { |param| to_utf8_via_encoding(param) }
127
191
  else
128
192
  params
@@ -0,0 +1,80 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ require 'ttfunk'
5
+
6
+ class PDF::Reader
7
+
8
+ # Font descriptors are outlined in Section 9.8, PDF 32000-1:2008, pp 281-288
9
+ class FontDescriptor
10
+
11
+ attr_reader :font_name, :font_family, :font_stretch, :font_weight,
12
+ :font_bounding_box, :cap_height, :ascent, :descent, :leading,
13
+ :avg_width, :max_width, :missing_width, :italic_angle, :stem_v,
14
+ :x_height, :font_flags
15
+
16
+ def initialize(ohash, fd_hash)
17
+ @ascent = ohash.object(fd_hash[:Ascent]) || 0
18
+ @descent = ohash.object(fd_hash[:Descent]) || 0
19
+ @missing_width = ohash.object(fd_hash[:MissingWidth]) || 0
20
+ @font_bounding_box = ohash.object(fd_hash[:FontBBox]) || [0,0,0,0]
21
+ @avg_width = ohash.object(fd_hash[:AvgWidth]) || 0
22
+ @cap_height = ohash.object(fd_hash[:CapHeight]) || 0
23
+ @font_flags = ohash.object(fd_hash[:Flags]) || 0
24
+ @italic_angle = ohash.object(fd_hash[:ItalicAngle])
25
+ @font_name = ohash.object(fd_hash[:FontName]).to_s
26
+ @leading = ohash.object(fd_hash[:Leading]) || 0
27
+ @max_width = ohash.object(fd_hash[:MaxWidth]) || 0
28
+ @stem_v = ohash.object(fd_hash[:StemV])
29
+ @x_height = ohash.object(fd_hash[:XHeight])
30
+ @font_stretch = ohash.object(fd_hash[:FontStretch]) || :Normal
31
+ @font_weight = ohash.object(fd_hash[:FontWeight]) || 400
32
+ @font_family = ohash.object(fd_hash[:FontFamily])
33
+
34
+ # A FontDescriptor may have an embedded font program in FontFile
35
+ # (Type 1 Font Program), FontFile2 (TrueType font program), or
36
+ # FontFile3 (Other font program as defined by Subtype entry)
37
+ # Subtype entries:
38
+ # 1) Type1C: Type 1 Font Program in Compact Font Format
39
+ # 2) CIDFontType0C: Type 0 Font Program in Compact Font Format
40
+ # 3) OpenType: OpenType Font Program
41
+ # see Section 9.9, PDF 32000-1:2008, pp 288-292
42
+ @font_program_stream = ohash.object(fd_hash[:FontFile2])
43
+ #TODO handle FontFile and FontFile3
44
+
45
+ @is_ttf = true if @font_program_stream
46
+ end
47
+
48
+ def glyph_width(char_code)
49
+ if @is_ttf
50
+ if ttf_program_stream.cmap.unicode.length > 0
51
+ glyph_id = ttf_program_stream.cmap.unicode.first[char_code]
52
+ else
53
+ glyph_id = char_code
54
+ end
55
+ char_metric = ttf_program_stream.horizontal_metrics.metrics[glyph_id]
56
+ if char_metric
57
+ return char_metric.advance_width
58
+ end
59
+ end
60
+ end
61
+
62
+ # PDF states that a glyph is 1000 units wide, true type doesn't enforce
63
+ # any behavior, but uses units/em to define how wide the 'M' is (the widest letter)
64
+ def glyph_to_pdf_scale_factor
65
+ if @is_ttf
66
+ @glyph_to_pdf_sf ||= (1.0 / ttf_program_stream.header.units_per_em) * 1000.0
67
+ else
68
+ @glyph_to_pdf_sf ||= 1.0
69
+ end
70
+ @glyph_to_pdf_sf
71
+ end
72
+
73
+ private
74
+
75
+ def ttf_program_stream
76
+ @ttf_program_stream ||= TTFunk::File.new(@font_program_stream.unfiltered_data)
77
+ end
78
+ end
79
+
80
+ end
@@ -1,4 +1,7 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ require 'digest/md5'
2
5
 
3
6
  module PDF
4
7
  class Reader
@@ -15,9 +18,10 @@ module PDF
15
18
 
16
19
  attr_reader :xobject
17
20
 
18
- def initialize(page, xobject)
21
+ def initialize(page, xobject, options = {})
19
22
  @page = page
20
23
  @objects = page.objects
24
+ @cache = options[:cache] || {}
21
25
  @xobject = @objects.deref(xobject)
22
26
  end
23
27
 
@@ -65,12 +69,30 @@ module PDF
65
69
  end
66
70
  end
67
71
 
72
+ def content_stream_md5
73
+ @content_stream_md5 ||= Digest::MD5.hexdigest(raw_content)
74
+ end
75
+
76
+ def cached_tokens_key
77
+ @cached_tokens_key ||= "tokens-#{content_stream_md5}"
78
+ end
79
+
80
+ def tokens
81
+ @cache[cached_tokens_key] ||= begin
82
+ buffer = Buffer.new(StringIO.new(raw_content), :content_stream => true)
83
+ parser = Parser.new(buffer, @objects)
84
+ result = []
85
+ while (token = parser.parse_token(PagesStrategy::OPERATORS))
86
+ result << token
87
+ end
88
+ result
89
+ end
90
+ end
91
+
68
92
  def content_stream(receivers, instructions)
69
- buffer = Buffer.new(StringIO.new(instructions), :content_stream => true)
70
- parser = Parser.new(buffer, @objects)
71
93
  params = []
72
94
 
73
- while (token = parser.parse_token(PagesStrategy::OPERATORS))
95
+ tokens.each do |token|
74
96
  if token.kind_of?(Token) and PagesStrategy::OPERATORS.has_key?(token)
75
97
  callback(receivers, PagesStrategy::OPERATORS[token], params)
76
98
  params.clear
@@ -1,3 +1,6 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
1
4
  ################################################################################
2
5
  #
3
6
  # Copyright (C) 2011 James Healy (jimmy@deefa.com)
@@ -24,9 +27,15 @@
24
27
  ################################################################################
25
28
 
26
29
  class PDF::Reader
30
+ # A Hash-like object that can convert glyph names into a unicode codepoint.
31
+ # The mapping is read from a data file on disk the first time it's needed.
32
+ #
27
33
  class GlyphHash # :nodoc:
28
34
  def initialize
29
- @adobe = load_adobe_glyph_mapping
35
+ # only parse the glyph list once, and cache the results (for performance)
36
+ adobe = @@cache ||= load_adobe_glyph_mapping
37
+ @by_name = adobe.first
38
+ @by_codepoint = adobe.last
30
39
  end
31
40
 
32
41
  # attempt to convert a PDF Name to a unicode codepoint. Returns nil
@@ -34,55 +43,84 @@ class PDF::Reader
34
43
  #
35
44
  # h = GlyphHash.new
36
45
  #
37
- # h[:A]
46
+ # h.name_to_unicode(:A)
38
47
  # => 65
39
48
  #
40
- # h[:Euro]
49
+ # h.name_to_unicode(:Euro)
41
50
  # => 8364
42
51
  #
43
- # h[:G30]
52
+ # h.name_to_unicode(:X4A)
53
+ # => 74
54
+ #
55
+ # h.name_to_unicode(:G30)
44
56
  # => 48
45
57
  #
46
- # h[:34]
58
+ # h.name_to_unicode(:34)
59
+ # => 34
47
60
  #
48
- def [](name)
61
+ def name_to_unicode(name)
49
62
  return nil unless name.is_a?(Symbol)
50
63
 
51
64
  name = name.to_s.gsub('_', '').intern
52
65
  str = name.to_s
53
66
 
54
- if @adobe.has_key?(name)
55
- @adobe[name]
67
+ if @by_name.has_key?(name)
68
+ @by_name[name]
69
+ elsif str.match(/\AX[0-9a-fA-F]{2,4}\Z/)
70
+ "0x#{str[1,4]}".hex
56
71
  elsif str.match(/\Auni[A-F\d]{4}\Z/)
57
72
  "0x#{str[3,4]}".hex
58
73
  elsif str.match(/\Au[A-F\d]{4,6}\Z/)
59
74
  "0x#{str[1,6]}".hex
60
- elsif str.match(/\A[A-Za-z]\d{1,4}\Z/)
61
- str[1,4].to_i
62
- elsif str.match(/\A[A-Za-z]{2}\d{2,4}\Z/)
63
- str[2,4].to_i
75
+ elsif str.match(/\A[A-Za-z]\d{1,5}\Z/)
76
+ str[1,5].to_i
77
+ elsif str.match(/\A[A-Za-z]{2}\d{2,5}\Z/)
78
+ str[2,5].to_i
64
79
  else
65
80
  nil
66
81
  end
67
82
  end
68
83
 
84
+ # attempt to convert a Unicode code point to the equivilant PDF Name. Returns nil
85
+ # if no conversion is possible.
86
+ #
87
+ # h = GlyphHash.new
88
+ #
89
+ # h.unicode_to_name(65)
90
+ # => [:A]
91
+ #
92
+ # h.unicode_to_name(8364)
93
+ # => [:Euro]
94
+ #
95
+ # h.unicode_to_name(34)
96
+ # => [:34]
97
+ #
98
+ def unicode_to_name(codepoint)
99
+ @by_codepoint[codepoint.to_i] || []
100
+ end
101
+
69
102
  private
70
103
 
71
104
  # returns a hash that maps glyph names to unicode codepoints. The mapping is based on
72
105
  # a text file supplied by Adobe at:
73
106
  # http://www.adobe.com/devnet/opentype/archives/glyphlist.txt
74
107
  def load_adobe_glyph_mapping
75
- glyphs = {}
108
+ keyed_by_name = {}
109
+ keyed_by_codepoint = {}
76
110
 
77
- RUBY_VERSION >= "1.9" ? mode = "r:BINARY" : mode = "r"
78
- File.open(File.dirname(__FILE__) + "/glyphlist.txt", mode) do |f|
111
+ File.open(File.dirname(__FILE__) + "/glyphlist.txt", "r:BINARY") do |f|
79
112
  f.each do |l|
80
- m, name, code = *l.match(/([0-9A-Za-z]+);([0-9A-F]{4})/)
81
- glyphs[name.to_sym] = "0x#{code}".hex if name
113
+ _m, name, code = *l.match(/([0-9A-Za-z]+);([0-9A-F]{4})/)
114
+ if name && code
115
+ cp = "0x#{code}".hex
116
+ keyed_by_name[name.to_sym] = cp
117
+ keyed_by_codepoint[cp] ||= []
118
+ keyed_by_codepoint[cp] << name.to_sym
119
+ end
82
120
  end
83
121
  end
84
122
 
85
- glyphs
123
+ [keyed_by_name.freeze, keyed_by_codepoint.freeze]
86
124
  end
87
125
 
88
126
  end