swivel 0.0.146 → 0.0.149

Sign up to get free protection for your applications and to get access to all the features.
Files changed (183) hide show
  1. data/README +4 -1
  2. data/Rakefile +1 -1
  3. data/lib/swivel.rb +1 -1
  4. data/lib/swivel2/benchmarking.rb +1 -0
  5. data/lib/swivel2/config.rb +45 -0
  6. data/lib/swivel2/connection.rb +89 -0
  7. data/lib/swivel2/formats.rb +11 -0
  8. data/lib/swivel2/logging.rb +1 -0
  9. data/lib/swivel2/performance.rb +21 -0
  10. data/lib/swivel2/response.rb +5 -0
  11. data/lib/swivel2/swivelrc.default +5 -0
  12. data/vendor/activeresource-2.0.2-/CHANGELOG +223 -0
  13. data/vendor/activeresource-2.0.2-/README +165 -0
  14. data/vendor/activeresource-2.0.2-/Rakefile +133 -0
  15. data/vendor/activeresource-2.0.2-/lib/active_resource.rb +47 -0
  16. data/vendor/activeresource-2.0.2-/lib/active_resource/base.rb +872 -0
  17. data/vendor/activeresource-2.0.2-/lib/active_resource/connection.rb +172 -0
  18. data/vendor/activeresource-2.0.2-/lib/active_resource/custom_methods.rb +105 -0
  19. data/vendor/activeresource-2.0.2-/lib/active_resource/formats.rb +14 -0
  20. data/vendor/activeresource-2.0.2-/lib/active_resource/formats/json_format.rb +23 -0
  21. data/vendor/activeresource-2.0.2-/lib/active_resource/formats/xml_format.rb +34 -0
  22. data/vendor/activeresource-2.0.2-/lib/active_resource/http_mock.rb +147 -0
  23. data/vendor/activeresource-2.0.2-/lib/active_resource/validations.rb +288 -0
  24. data/vendor/activeresource-2.0.2-/lib/active_resource/version.rb +9 -0
  25. data/vendor/activeresource-2.0.2-/lib/activeresource.rb +1 -0
  26. data/vendor/activeresource-2.0.2-/test/abstract_unit.rb +10 -0
  27. data/vendor/activeresource-2.0.2-/test/authorization_test.rb +82 -0
  28. data/vendor/activeresource-2.0.2-/test/base/custom_methods_test.rb +96 -0
  29. data/vendor/activeresource-2.0.2-/test/base/equality_test.rb +43 -0
  30. data/vendor/activeresource-2.0.2-/test/base/load_test.rb +111 -0
  31. data/vendor/activeresource-2.0.2-/test/base_errors_test.rb +48 -0
  32. data/vendor/activeresource-2.0.2-/test/base_test.rb +454 -0
  33. data/vendor/activeresource-2.0.2-/test/connection_test.rb +170 -0
  34. data/vendor/activeresource-2.0.2-/test/fixtures/beast.rb +14 -0
  35. data/vendor/activeresource-2.0.2-/test/fixtures/person.rb +3 -0
  36. data/vendor/activeresource-2.0.2-/test/fixtures/street_address.rb +4 -0
  37. data/vendor/activeresource-2.0.2-/test/format_test.rb +42 -0
  38. data/vendor/activeresource-2.0.2-/test/setter_trap.rb +27 -0
  39. data/vendor/activesupport-2.0.2-/CHANGELOG +986 -0
  40. data/vendor/activesupport-2.0.2-/README +43 -0
  41. data/vendor/activesupport-2.0.2-/lib/active_support.rb +49 -0
  42. data/vendor/activesupport-2.0.2-/lib/active_support/basic_object.rb +5 -0
  43. data/vendor/activesupport-2.0.2-/lib/active_support/buffered_logger.rb +107 -0
  44. data/vendor/activesupport-2.0.2-/lib/active_support/clean_logger.rb +127 -0
  45. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext.rb +4 -0
  46. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/array.rb +13 -0
  47. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/array/access.rb +28 -0
  48. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/array/conversions.rb +94 -0
  49. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/array/extract_options.rb +19 -0
  50. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/array/grouping.rb +68 -0
  51. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/array/random_access.rb +12 -0
  52. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/bigdecimal.rb +2 -0
  53. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/bigdecimal/conversions.rb +6 -0
  54. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/blank.rb +50 -0
  55. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/cgi.rb +5 -0
  56. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/cgi/escape_skipping_slashes.rb +14 -0
  57. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/class.rb +4 -0
  58. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/class/attribute_accessors.rb +48 -0
  59. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/class/delegating_attributes.rb +40 -0
  60. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/class/inheritable_attributes.rb +140 -0
  61. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/class/removal.rb +24 -0
  62. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/date.rb +10 -0
  63. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/date/behavior.rb +13 -0
  64. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/date/calculations.rb +188 -0
  65. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/date/conversions.rb +98 -0
  66. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/date_time.rb +10 -0
  67. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/date_time/calculations.rb +77 -0
  68. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/date_time/conversions.rb +74 -0
  69. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/duplicable.rb +37 -0
  70. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/enumerable.rb +63 -0
  71. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/exception.rb +33 -0
  72. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/file.rb +21 -0
  73. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/float.rb +5 -0
  74. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/float/rounding.rb +24 -0
  75. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/hash.rb +13 -0
  76. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/hash/conversions.rb +242 -0
  77. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/hash/diff.rb +19 -0
  78. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/hash/except.rb +24 -0
  79. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/hash/indifferent_access.rb +102 -0
  80. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/hash/keys.rb +54 -0
  81. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/hash/reverse_merge.rb +25 -0
  82. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/hash/slice.rb +28 -0
  83. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/integer.rb +7 -0
  84. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/integer/even_odd.rb +24 -0
  85. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/integer/inflections.rb +21 -0
  86. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/kernel.rb +5 -0
  87. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/kernel/agnostics.rb +11 -0
  88. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/kernel/daemonizing.rb +15 -0
  89. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/kernel/debugger.rb +13 -0
  90. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/kernel/reporting.rb +51 -0
  91. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/kernel/requires.rb +24 -0
  92. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/load_error.rb +38 -0
  93. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/logger.rb +16 -0
  94. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/module.rb +8 -0
  95. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/module/aliasing.rb +70 -0
  96. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/module/attr_accessor_with_default.rb +31 -0
  97. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/module/attr_internal.rb +31 -0
  98. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/module/attribute_accessors.rb +48 -0
  99. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/module/delegation.rb +62 -0
  100. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/module/inclusion.rb +11 -0
  101. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/module/introspection.rb +35 -0
  102. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/module/loading.rb +13 -0
  103. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/name_error.rb +17 -0
  104. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/numeric.rb +7 -0
  105. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/numeric/bytes.rb +44 -0
  106. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/numeric/time.rb +91 -0
  107. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/object.rb +4 -0
  108. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/object/conversions.rb +14 -0
  109. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/object/extending.rb +58 -0
  110. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/object/instance_variables.rb +22 -0
  111. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/object/misc.rb +59 -0
  112. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/pathname.rb +7 -0
  113. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/pathname/clean_within.rb +14 -0
  114. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/proc.rb +12 -0
  115. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/range.rb +11 -0
  116. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/range/blockless_step.rb +22 -0
  117. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/range/conversions.rb +23 -0
  118. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/range/include_range.rb +22 -0
  119. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/range/overlaps.rb +12 -0
  120. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/string.rb +23 -0
  121. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/string/access.rb +58 -0
  122. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/string/conversions.rb +28 -0
  123. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/string/inflections.rb +153 -0
  124. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/string/iterators.rb +17 -0
  125. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/string/starts_ends_with.rb +27 -0
  126. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/string/unicode.rb +42 -0
  127. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/string/xchar.rb +11 -0
  128. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/symbol.rb +14 -0
  129. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/test.rb +1 -0
  130. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/test/unit/assertions.rb +62 -0
  131. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/time.rb +19 -0
  132. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/time/behavior.rb +13 -0
  133. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/time/calculations.rb +224 -0
  134. data/vendor/activesupport-2.0.2-/lib/active_support/core_ext/time/conversions.rb +94 -0
  135. data/vendor/activesupport-2.0.2-/lib/active_support/dependencies.rb +540 -0
  136. data/vendor/activesupport-2.0.2-/lib/active_support/deprecation.rb +204 -0
  137. data/vendor/activesupport-2.0.2-/lib/active_support/duration.rb +96 -0
  138. data/vendor/activesupport-2.0.2-/lib/active_support/inflections.rb +53 -0
  139. data/vendor/activesupport-2.0.2-/lib/active_support/inflector.rb +282 -0
  140. data/vendor/activesupport-2.0.2-/lib/active_support/json.rb +31 -0
  141. data/vendor/activesupport-2.0.2-/lib/active_support/json/decoding.rb +60 -0
  142. data/vendor/activesupport-2.0.2-/lib/active_support/json/encoders/date.rb +5 -0
  143. data/vendor/activesupport-2.0.2-/lib/active_support/json/encoders/date_time.rb +5 -0
  144. data/vendor/activesupport-2.0.2-/lib/active_support/json/encoders/enumerable.rb +12 -0
  145. data/vendor/activesupport-2.0.2-/lib/active_support/json/encoders/false_class.rb +5 -0
  146. data/vendor/activesupport-2.0.2-/lib/active_support/json/encoders/hash.rb +50 -0
  147. data/vendor/activesupport-2.0.2-/lib/active_support/json/encoders/nil_class.rb +5 -0
  148. data/vendor/activesupport-2.0.2-/lib/active_support/json/encoders/numeric.rb +5 -0
  149. data/vendor/activesupport-2.0.2-/lib/active_support/json/encoders/object.rb +6 -0
  150. data/vendor/activesupport-2.0.2-/lib/active_support/json/encoders/regexp.rb +5 -0
  151. data/vendor/activesupport-2.0.2-/lib/active_support/json/encoders/string.rb +30 -0
  152. data/vendor/activesupport-2.0.2-/lib/active_support/json/encoders/symbol.rb +5 -0
  153. data/vendor/activesupport-2.0.2-/lib/active_support/json/encoders/time.rb +5 -0
  154. data/vendor/activesupport-2.0.2-/lib/active_support/json/encoders/true_class.rb +5 -0
  155. data/vendor/activesupport-2.0.2-/lib/active_support/json/encoding.rb +38 -0
  156. data/vendor/activesupport-2.0.2-/lib/active_support/json/variable.rb +10 -0
  157. data/vendor/activesupport-2.0.2-/lib/active_support/multibyte.rb +9 -0
  158. data/vendor/activesupport-2.0.2-/lib/active_support/multibyte/chars.rb +141 -0
  159. data/vendor/activesupport-2.0.2-/lib/active_support/multibyte/generators/generate_tables.rb +149 -0
  160. data/vendor/activesupport-2.0.2-/lib/active_support/multibyte/handlers/passthru_handler.rb +9 -0
  161. data/vendor/activesupport-2.0.2-/lib/active_support/multibyte/handlers/utf8_handler.rb +564 -0
  162. data/vendor/activesupport-2.0.2-/lib/active_support/multibyte/handlers/utf8_handler_proc.rb +43 -0
  163. data/vendor/activesupport-2.0.2-/lib/active_support/option_merger.rb +25 -0
  164. data/vendor/activesupport-2.0.2-/lib/active_support/ordered_options.rb +49 -0
  165. data/vendor/activesupport-2.0.2-/lib/active_support/test_case.rb +5 -0
  166. data/vendor/activesupport-2.0.2-/lib/active_support/testing.rb +1 -0
  167. data/vendor/activesupport-2.0.2-/lib/active_support/testing/default.rb +12 -0
  168. data/vendor/activesupport-2.0.2-/lib/active_support/values/time_zone.rb +181 -0
  169. data/vendor/activesupport-2.0.2-/lib/active_support/values/unicode_tables.dat +0 -0
  170. data/vendor/activesupport-2.0.2-/lib/active_support/vendor.rb +14 -0
  171. data/vendor/activesupport-2.0.2-/lib/active_support/vendor/builder-2.1.2/blankslate.rb +113 -0
  172. data/vendor/activesupport-2.0.2-/lib/active_support/vendor/builder-2.1.2/builder.rb +13 -0
  173. data/vendor/activesupport-2.0.2-/lib/active_support/vendor/builder-2.1.2/builder/blankslate.rb +20 -0
  174. data/vendor/activesupport-2.0.2-/lib/active_support/vendor/builder-2.1.2/builder/css.rb +250 -0
  175. data/vendor/activesupport-2.0.2-/lib/active_support/vendor/builder-2.1.2/builder/xchar.rb +115 -0
  176. data/vendor/activesupport-2.0.2-/lib/active_support/vendor/builder-2.1.2/builder/xmlbase.rb +139 -0
  177. data/vendor/activesupport-2.0.2-/lib/active_support/vendor/builder-2.1.2/builder/xmlevents.rb +63 -0
  178. data/vendor/activesupport-2.0.2-/lib/active_support/vendor/builder-2.1.2/builder/xmlmarkup.rb +328 -0
  179. data/vendor/activesupport-2.0.2-/lib/active_support/vendor/xml-simple-1.0.11/xmlsimple.rb +1021 -0
  180. data/vendor/activesupport-2.0.2-/lib/active_support/version.rb +9 -0
  181. data/vendor/activesupport-2.0.2-/lib/active_support/whiny_nil.rb +38 -0
  182. data/vendor/activesupport-2.0.2-/lib/activesupport.rb +1 -0
  183. metadata +222 -2
@@ -0,0 +1,149 @@
1
+ #!/usr/bin/env ruby
2
+ begin
3
+ require File.dirname(__FILE__) + '/../../../active_support'
4
+ rescue IOError
5
+ end
6
+ require 'open-uri'
7
+ require 'tmpdir'
8
+
9
+ module ActiveSupport::Multibyte::Handlers #:nodoc:
10
+ class UnicodeDatabase #:nodoc:
11
+ def self.load
12
+ [Hash.new(Codepoint.new),[],{},{}]
13
+ end
14
+ end
15
+
16
+ class UnicodeTableGenerator #:nodoc:
17
+ BASE_URI = "http://www.unicode.org/Public/#{ActiveSupport::Multibyte::UNICODE_VERSION}/ucd/"
18
+ SOURCES = {
19
+ :codepoints => BASE_URI + 'UnicodeData.txt',
20
+ :composition_exclusion => BASE_URI + 'CompositionExclusions.txt',
21
+ :grapheme_break_property => BASE_URI + 'auxiliary/GraphemeBreakProperty.txt',
22
+ :cp1252 => 'http://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT'
23
+ }
24
+
25
+ def initialize
26
+ @ucd = UnicodeDatabase.new
27
+
28
+ default = Codepoint.new
29
+ default.combining_class = 0
30
+ default.uppercase_mapping = 0
31
+ default.lowercase_mapping = 0
32
+ @ucd.codepoints = Hash.new(default)
33
+
34
+ @ucd.composition_exclusion = []
35
+ @ucd.composition_map = {}
36
+ @ucd.boundary = {}
37
+ @ucd.cp1252 = {}
38
+ end
39
+
40
+ def parse_codepoints(line)
41
+ codepoint = Codepoint.new
42
+ raise "Could not parse input." unless line =~ /^
43
+ ([0-9A-F]+); # code
44
+ ([^;]+); # name
45
+ ([A-Z]+); # general category
46
+ ([0-9]+); # canonical combining class
47
+ ([A-Z]+); # bidi class
48
+ (<([A-Z]*)>)? # decomposition type
49
+ ((\ ?[0-9A-F]+)*); # decompomposition mapping
50
+ ([0-9]*); # decimal digit
51
+ ([0-9]*); # digit
52
+ ([^;]*); # numeric
53
+ ([YN]*); # bidi mirrored
54
+ ([^;]*); # unicode 1.0 name
55
+ ([^;]*); # iso comment
56
+ ([0-9A-F]*); # simple uppercase mapping
57
+ ([0-9A-F]*); # simple lowercase mapping
58
+ ([0-9A-F]*)$/ix # simple titlecase mapping
59
+ codepoint.code = $1.hex
60
+ #codepoint.name = $2
61
+ #codepoint.category = $3
62
+ codepoint.combining_class = Integer($4)
63
+ #codepoint.bidi_class = $5
64
+ codepoint.decomp_type = $7
65
+ codepoint.decomp_mapping = ($8=='') ? nil : $8.split.collect { |element| element.hex }
66
+ #codepoint.bidi_mirrored = ($13=='Y') ? true : false
67
+ codepoint.uppercase_mapping = ($16=='') ? 0 : $16.hex
68
+ codepoint.lowercase_mapping = ($17=='') ? 0 : $17.hex
69
+ #codepoint.titlecase_mapping = ($18=='') ? nil : $18.hex
70
+ @ucd.codepoints[codepoint.code] = codepoint
71
+ end
72
+
73
+ def parse_grapheme_break_property(line)
74
+ if line =~ /^([0-9A-F\.]+)\s*;\s*([\w]+)\s*#/
75
+ type = $2.downcase.intern
76
+ @ucd.boundary[type] ||= []
77
+ if $1.include? '..'
78
+ parts = $1.split '..'
79
+ @ucd.boundary[type] << (parts[0].hex..parts[1].hex)
80
+ else
81
+ @ucd.boundary[type] << $1.hex
82
+ end
83
+ end
84
+ end
85
+
86
+ def parse_composition_exclusion(line)
87
+ if line =~ /^([0-9A-F]+)/i
88
+ @ucd.composition_exclusion << $1.hex
89
+ end
90
+ end
91
+
92
+ def parse_cp1252(line)
93
+ if line =~ /^([0-9A-Fx]+)\s([0-9A-Fx]+)/i
94
+ @ucd.cp1252[$1.hex] = $2.hex
95
+ end
96
+ end
97
+
98
+ def create_composition_map
99
+ @ucd.codepoints.each do |_, cp|
100
+ if !cp.nil? and cp.combining_class == 0 and cp.decomp_type.nil? and !cp.decomp_mapping.nil? and cp.decomp_mapping.length == 2 and @ucd[cp.decomp_mapping[0]].combining_class == 0 and !@ucd.composition_exclusion.include?(cp.code)
101
+ @ucd.composition_map[cp.decomp_mapping[0]] ||= {}
102
+ @ucd.composition_map[cp.decomp_mapping[0]][cp.decomp_mapping[1]] = cp.code
103
+ end
104
+ end
105
+ end
106
+
107
+ def normalize_boundary_map
108
+ @ucd.boundary.each do |k,v|
109
+ if [:lf, :cr].include? k
110
+ @ucd.boundary[k] = v[0]
111
+ end
112
+ end
113
+ end
114
+
115
+ def parse
116
+ SOURCES.each do |type, url|
117
+ filename = File.join(Dir.tmpdir, "#{url.split('/').last}")
118
+ unless File.exist?(filename)
119
+ $stderr.puts "Downloading #{url.split('/').last}"
120
+ File.open(filename, 'wb') do |target|
121
+ open(url) do |source|
122
+ source.each_line { |line| target.write line }
123
+ end
124
+ end
125
+ end
126
+ File.open(filename) do |file|
127
+ file.each_line { |line| send "parse_#{type}".intern, line }
128
+ end
129
+ end
130
+ create_composition_map
131
+ normalize_boundary_map
132
+ end
133
+
134
+ def dump_to(filename)
135
+ File.open(filename, 'wb') do |f|
136
+ f.write Marshal.dump([@ucd.codepoints, @ucd.composition_exclusion, @ucd.composition_map, @ucd.boundary, @ucd.cp1252])
137
+ end
138
+ end
139
+ end
140
+ end
141
+
142
+ if __FILE__ == $0
143
+ filename = ActiveSupport::Multibyte::Handlers::UnicodeDatabase.filename
144
+ generator = ActiveSupport::Multibyte::Handlers::UnicodeTableGenerator.new
145
+ generator.parse
146
+ print "Writing to: #{filename}"
147
+ generator.dump_to filename
148
+ puts " (#{File.size(filename)} bytes)"
149
+ end
@@ -0,0 +1,9 @@
1
+ # Chars uses this handler when $KCODE is not set to 'UTF8'. Because this handler doesn't define any methods all call
2
+ # will be forwarded to String.
3
+ class ActiveSupport::Multibyte::Handlers::PassthruHandler #:nodoc:
4
+
5
+ # Return the original byteoffset
6
+ def self.translate_offset(string, byte_offset) #:nodoc:
7
+ byte_offset
8
+ end
9
+ end
@@ -0,0 +1,564 @@
1
+ # Contains all the handlers and helper classes
2
+ module ActiveSupport::Multibyte::Handlers #:nodoc:
3
+ class EncodingError < ArgumentError #:nodoc:
4
+ end
5
+
6
+ class Codepoint #:nodoc:
7
+ attr_accessor :code, :combining_class, :decomp_type, :decomp_mapping, :uppercase_mapping, :lowercase_mapping
8
+ end
9
+
10
+ class UnicodeDatabase #:nodoc:
11
+ attr_writer :codepoints, :composition_exclusion, :composition_map, :boundary, :cp1252
12
+
13
+ # self-expiring methods that lazily load the Unicode database and then return the value.
14
+ [:codepoints, :composition_exclusion, :composition_map, :boundary, :cp1252].each do |attr_name|
15
+ class_eval(<<-EOS, __FILE__, __LINE__)
16
+ def #{attr_name}
17
+ load
18
+ @#{attr_name}
19
+ end
20
+ EOS
21
+ end
22
+
23
+ # Shortcut to ucd.codepoints[]
24
+ def [](index); codepoints[index]; end
25
+
26
+ # Returns the directory in which the data files are stored
27
+ def self.dirname
28
+ File.dirname(__FILE__) + '/../../values/'
29
+ end
30
+
31
+ # Returns the filename for the data file for this version
32
+ def self.filename
33
+ File.expand_path File.join(dirname, "unicode_tables.dat")
34
+ end
35
+
36
+ # Loads the unicode database and returns all the internal objects of UnicodeDatabase
37
+ # Once the values have been loaded, define attr_reader methods for the instance variables.
38
+ def load
39
+ begin
40
+ @codepoints, @composition_exclusion, @composition_map, @boundary, @cp1252 = File.open(self.class.filename, 'rb') { |f| Marshal.load f.read }
41
+ rescue Exception => e
42
+ raise IOError.new("Couldn't load the unicode tables for UTF8Handler (#{e.message}), handler is unusable")
43
+ end
44
+ @codepoints ||= Hash.new(Codepoint.new)
45
+ @composition_exclusion ||= []
46
+ @composition_map ||= {}
47
+ @boundary ||= {}
48
+ @cp1252 ||= {}
49
+
50
+ # Redefine the === method so we can write shorter rules for grapheme cluster breaks
51
+ @boundary.each do |k,_|
52
+ @boundary[k].instance_eval do
53
+ def ===(other)
54
+ detect { |i| i === other } ? true : false
55
+ end
56
+ end if @boundary[k].kind_of?(Array)
57
+ end
58
+
59
+ # define attr_reader methods for the instance variables
60
+ class << self
61
+ attr_reader :codepoints, :composition_exclusion, :composition_map, :boundary, :cp1252
62
+ end
63
+ end
64
+ end
65
+
66
+ # UTF8Handler implements Unicode aware operations for strings, these operations will be used by the Chars
67
+ # proxy when $KCODE is set to 'UTF8'.
68
+ class UTF8Handler
69
+ # Hangul character boundaries and properties
70
+ HANGUL_SBASE = 0xAC00
71
+ HANGUL_LBASE = 0x1100
72
+ HANGUL_VBASE = 0x1161
73
+ HANGUL_TBASE = 0x11A7
74
+ HANGUL_LCOUNT = 19
75
+ HANGUL_VCOUNT = 21
76
+ HANGUL_TCOUNT = 28
77
+ HANGUL_NCOUNT = HANGUL_VCOUNT * HANGUL_TCOUNT
78
+ HANGUL_SCOUNT = 11172
79
+ HANGUL_SLAST = HANGUL_SBASE + HANGUL_SCOUNT
80
+ HANGUL_JAMO_FIRST = 0x1100
81
+ HANGUL_JAMO_LAST = 0x11FF
82
+
83
+ # All the unicode whitespace
84
+ UNICODE_WHITESPACE = [
85
+ (0x0009..0x000D).to_a, # White_Space # Cc [5] <control-0009>..<control-000D>
86
+ 0x0020, # White_Space # Zs SPACE
87
+ 0x0085, # White_Space # Cc <control-0085>
88
+ 0x00A0, # White_Space # Zs NO-BREAK SPACE
89
+ 0x1680, # White_Space # Zs OGHAM SPACE MARK
90
+ 0x180E, # White_Space # Zs MONGOLIAN VOWEL SEPARATOR
91
+ (0x2000..0x200A).to_a, # White_Space # Zs [11] EN QUAD..HAIR SPACE
92
+ 0x2028, # White_Space # Zl LINE SEPARATOR
93
+ 0x2029, # White_Space # Zp PARAGRAPH SEPARATOR
94
+ 0x202F, # White_Space # Zs NARROW NO-BREAK SPACE
95
+ 0x205F, # White_Space # Zs MEDIUM MATHEMATICAL SPACE
96
+ 0x3000, # White_Space # Zs IDEOGRAPHIC SPACE
97
+ ].flatten.freeze
98
+
99
+ # BOM (byte order mark) can also be seen as whitespace, it's a non-rendering character used to distinguish
100
+ # between little and big endian. This is not an issue in utf-8, so it must be ignored.
101
+ UNICODE_LEADERS_AND_TRAILERS = UNICODE_WHITESPACE + [65279] # ZERO-WIDTH NO-BREAK SPACE aka BOM
102
+
103
+ # Borrowed from the Kconv library by Shinji KONO - (also as seen on the W3C site)
104
+ UTF8_PAT = /\A(?:
105
+ [\x00-\x7f] |
106
+ [\xc2-\xdf] [\x80-\xbf] |
107
+ \xe0 [\xa0-\xbf] [\x80-\xbf] |
108
+ [\xe1-\xef] [\x80-\xbf] [\x80-\xbf] |
109
+ \xf0 [\x90-\xbf] [\x80-\xbf] [\x80-\xbf] |
110
+ [\xf1-\xf3] [\x80-\xbf] [\x80-\xbf] [\x80-\xbf] |
111
+ \xf4 [\x80-\x8f] [\x80-\xbf] [\x80-\xbf]
112
+ )*\z/xn
113
+
114
+ # Returns a regular expression pattern that matches the passed Unicode codepoints
115
+ def self.codepoints_to_pattern(array_of_codepoints) #:nodoc:
116
+ array_of_codepoints.collect{ |e| [e].pack 'U*' }.join('|')
117
+ end
118
+ UNICODE_TRAILERS_PAT = /(#{codepoints_to_pattern(UNICODE_LEADERS_AND_TRAILERS)})+\Z/
119
+ UNICODE_LEADERS_PAT = /\A(#{codepoints_to_pattern(UNICODE_LEADERS_AND_TRAILERS)})+/
120
+
121
+ class << self
122
+
123
+ # ///
124
+ # /// BEGIN String method overrides
125
+ # ///
126
+
127
+ # Inserts the passed string at specified codepoint offsets
128
+ def insert(str, offset, fragment)
129
+ str.replace(
130
+ u_unpack(str).insert(
131
+ offset,
132
+ u_unpack(fragment)
133
+ ).flatten.pack('U*')
134
+ )
135
+ end
136
+
137
+ # Returns the position of the passed argument in the string, counting in codepoints
138
+ def index(str, *args)
139
+ bidx = str.index(*args)
140
+ bidx ? (u_unpack(str.slice(0...bidx)).size) : nil
141
+ end
142
+
143
+ # Works just like the indexed replace method on string, except instead of byte offsets you specify
144
+ # character offsets.
145
+ #
146
+ # Example:
147
+ #
148
+ # s = "Müller"
149
+ # s.chars[2] = "e" # Replace character with offset 2
150
+ # s
151
+ # #=> "Müeler"
152
+ #
153
+ # s = "Müller"
154
+ # s.chars[1, 2] = "ö" # Replace 2 characters at character offset 1
155
+ # s
156
+ # #=> "Möler"
157
+ def []=(str, *args)
158
+ replace_by = args.pop
159
+ # Indexed replace with regular expressions already works
160
+ return str[*args] = replace_by if args.first.is_a?(Regexp)
161
+ result = u_unpack(str)
162
+ if args[0].is_a?(Fixnum)
163
+ raise IndexError, "index #{args[0]} out of string" if args[0] >= result.length
164
+ min = args[0]
165
+ max = args[1].nil? ? min : (min + args[1] - 1)
166
+ range = Range.new(min, max)
167
+ replace_by = [replace_by].pack('U') if replace_by.is_a?(Fixnum)
168
+ elsif args.first.is_a?(Range)
169
+ raise RangeError, "#{args[0]} out of range" if args[0].min >= result.length
170
+ range = args[0]
171
+ else
172
+ needle = args[0].to_s
173
+ min = index(str, needle)
174
+ max = min + length(needle) - 1
175
+ range = Range.new(min, max)
176
+ end
177
+ result[range] = u_unpack(replace_by)
178
+ str.replace(result.pack('U*'))
179
+ end
180
+
181
+ # Works just like String#rjust, only integer specifies characters instead of bytes.
182
+ #
183
+ # Example:
184
+ #
185
+ # "¾ cup".chars.rjust(8).to_s
186
+ # #=> " ¾ cup"
187
+ #
188
+ # "¾ cup".chars.rjust(8, " ").to_s # Use non-breaking whitespace
189
+ # #=> "   ¾ cup"
190
+ def rjust(str, integer, padstr=' ')
191
+ justify(str, integer, :right, padstr)
192
+ end
193
+
194
+ # Works just like String#ljust, only integer specifies characters instead of bytes.
195
+ #
196
+ # Example:
197
+ #
198
+ # "¾ cup".chars.rjust(8).to_s
199
+ # #=> "¾ cup "
200
+ #
201
+ # "¾ cup".chars.rjust(8, " ").to_s # Use non-breaking whitespace
202
+ # #=> "¾ cup   "
203
+ def ljust(str, integer, padstr=' ')
204
+ justify(str, integer, :left, padstr)
205
+ end
206
+
207
+ # Works just like String#center, only integer specifies characters instead of bytes.
208
+ #
209
+ # Example:
210
+ #
211
+ # "¾ cup".chars.center(8).to_s
212
+ # #=> " ¾ cup "
213
+ #
214
+ # "¾ cup".chars.center(8, " ").to_s # Use non-breaking whitespace
215
+ # #=> " ¾ cup  "
216
+ def center(str, integer, padstr=' ')
217
+ justify(str, integer, :center, padstr)
218
+ end
219
+
220
+ # Does Unicode-aware rstrip
221
+ def rstrip(str)
222
+ str.gsub(UNICODE_TRAILERS_PAT, '')
223
+ end
224
+
225
+ # Does Unicode-aware lstrip
226
+ def lstrip(str)
227
+ str.gsub(UNICODE_LEADERS_PAT, '')
228
+ end
229
+
230
+ # Removed leading and trailing whitespace
231
+ def strip(str)
232
+ str.gsub(UNICODE_LEADERS_PAT, '').gsub(UNICODE_TRAILERS_PAT, '')
233
+ end
234
+
235
+ # Returns the number of codepoints in the string
236
+ def size(str)
237
+ u_unpack(str).size
238
+ end
239
+ alias_method :length, :size
240
+
241
+ # Reverses codepoints in the string.
242
+ def reverse(str)
243
+ u_unpack(str).reverse.pack('U*')
244
+ end
245
+
246
+ # Implements Unicode-aware slice with codepoints. Slicing on one point returns the codepoints for that
247
+ # character.
248
+ def slice(str, *args)
249
+ if args.size > 2
250
+ raise ArgumentError, "wrong number of arguments (#{args.size} for 1)" # Do as if we were native
251
+ elsif (args.size == 2 && !(args.first.is_a?(Numeric) || args.first.is_a?(Regexp)))
252
+ raise TypeError, "cannot convert #{args.first.class} into Integer" # Do as if we were native
253
+ elsif (args.size == 2 && !args[1].is_a?(Numeric))
254
+ raise TypeError, "cannot convert #{args[1].class} into Integer" # Do as if we were native
255
+ elsif args[0].kind_of? Range
256
+ cps = u_unpack(str).slice(*args)
257
+ cps.nil? ? nil : cps.pack('U*')
258
+ elsif args[0].kind_of? Regexp
259
+ str.slice(*args)
260
+ elsif args.size == 1 && args[0].kind_of?(Numeric)
261
+ u_unpack(str)[args[0]]
262
+ else
263
+ u_unpack(str).slice(*args).pack('U*')
264
+ end
265
+ end
266
+ alias_method :[], :slice
267
+
268
+ # Convert characters in the string to uppercase
269
+ def upcase(str); to_case :uppercase_mapping, str; end
270
+
271
+ # Convert characters in the string to lowercase
272
+ def downcase(str); to_case :lowercase_mapping, str; end
273
+
274
+ # Returns a copy of +str+ with the first character converted to uppercase and the remainder to lowercase
275
+ def capitalize(str)
276
+ upcase(slice(str, 0..0)) + downcase(slice(str, 1..-1) || '')
277
+ end
278
+
279
+ # ///
280
+ # /// Extra String methods for unicode operations
281
+ # ///
282
+
283
+ # Returns the KC normalization of the string by default. NFKC is considered the best normalization form for
284
+ # passing strings to databases and validations.
285
+ #
286
+ # * <tt>str</tt> - The string to perform normalization on.
287
+ # * <tt>form</tt> - The form you want to normalize in. Should be one of the following: :c, :kc, :d or :kd.
288
+ def normalize(str, form=ActiveSupport::Multibyte::DEFAULT_NORMALIZATION_FORM)
289
+ # See http://www.unicode.org/reports/tr15, Table 1
290
+ codepoints = u_unpack(str)
291
+ case form
292
+ when :d
293
+ reorder_characters(decompose_codepoints(:canonical, codepoints))
294
+ when :c
295
+ compose_codepoints reorder_characters(decompose_codepoints(:canonical, codepoints))
296
+ when :kd
297
+ reorder_characters(decompose_codepoints(:compatability, codepoints))
298
+ when :kc
299
+ compose_codepoints reorder_characters(decompose_codepoints(:compatability, codepoints))
300
+ else
301
+ raise ArgumentError, "#{form} is not a valid normalization variant", caller
302
+ end.pack('U*')
303
+ end
304
+
305
+ # Perform decomposition on the characters in the string
306
+ def decompose(str)
307
+ decompose_codepoints(:canonical, u_unpack(str)).pack('U*')
308
+ end
309
+
310
+ # Perform composition on the characters in the string
311
+ def compose(str)
312
+ compose_codepoints u_unpack(str).pack('U*')
313
+ end
314
+
315
+ # ///
316
+ # /// BEGIN Helper methods for unicode operation
317
+ # ///
318
+
319
+ # Used to translate an offset from bytes to characters, for instance one received from a regular expression match
320
+ def translate_offset(str, byte_offset)
321
+ return nil if byte_offset.nil?
322
+ return 0 if str == ''
323
+ chunk = str[0..byte_offset]
324
+ begin
325
+ begin
326
+ chunk.unpack('U*').length - 1
327
+ rescue ArgumentError => e
328
+ chunk = str[0..(byte_offset+=1)]
329
+ # Stop retrying at the end of the string
330
+ raise e unless byte_offset < chunk.length
331
+ # We damaged a character, retry
332
+ retry
333
+ end
334
+ # Catch the ArgumentError so we can throw our own
335
+ rescue ArgumentError
336
+ raise EncodingError.new('malformed UTF-8 character')
337
+ end
338
+ end
339
+
340
+ # Checks if the string is valid UTF8.
341
+ def consumes?(str)
342
+ # Unpack is a little bit faster than regular expressions
343
+ begin
344
+ str.unpack('U*')
345
+ true
346
+ rescue ArgumentError
347
+ false
348
+ end
349
+ end
350
+
351
+ # Returns the number of grapheme clusters in the string. This method is very likely to be moved or renamed
352
+ # in future versions.
353
+ def g_length(str)
354
+ g_unpack(str).length
355
+ end
356
+
357
+ # Replaces all the non-utf-8 bytes by their iso-8859-1 or cp1252 equivalent resulting in a valid utf-8 string
358
+ def tidy_bytes(str)
359
+ str.split(//u).map do |c|
360
+ if !UTF8_PAT.match(c)
361
+ n = c.unpack('C')[0]
362
+ n < 128 ? n.chr :
363
+ n < 160 ? [UCD.cp1252[n] || n].pack('U') :
364
+ n < 192 ? "\xC2" + n.chr : "\xC3" + (n-64).chr
365
+ else
366
+ c
367
+ end
368
+ end.join
369
+ end
370
+
371
+ protected
372
+
373
+ # Detect whether the codepoint is in a certain character class. Primarily used by the
374
+ # grapheme cluster support.
375
+ def in_char_class?(codepoint, classes)
376
+ classes.detect { |c| UCD.boundary[c] === codepoint } ? true : false
377
+ end
378
+
379
+ # Unpack the string at codepoints boundaries
380
+ def u_unpack(str)
381
+ begin
382
+ str.unpack 'U*'
383
+ rescue ArgumentError
384
+ raise EncodingError.new('malformed UTF-8 character')
385
+ end
386
+ end
387
+
388
+ # Unpack the string at grapheme boundaries instead of codepoint boundaries
389
+ def g_unpack(str)
390
+ codepoints = u_unpack(str)
391
+ unpacked = []
392
+ pos = 0
393
+ marker = 0
394
+ eoc = codepoints.length
395
+ while(pos < eoc)
396
+ pos += 1
397
+ previous = codepoints[pos-1]
398
+ current = codepoints[pos]
399
+ if (
400
+ # CR X LF
401
+ one = ( previous == UCD.boundary[:cr] and current == UCD.boundary[:lf] ) or
402
+ # L X (L|V|LV|LVT)
403
+ two = ( UCD.boundary[:l] === previous and in_char_class?(current, [:l,:v,:lv,:lvt]) ) or
404
+ # (LV|V) X (V|T)
405
+ three = ( in_char_class?(previous, [:lv,:v]) and in_char_class?(current, [:v,:t]) ) or
406
+ # (LVT|T) X (T)
407
+ four = ( in_char_class?(previous, [:lvt,:t]) and UCD.boundary[:t] === current ) or
408
+ # X Extend
409
+ five = (UCD.boundary[:extend] === current)
410
+ )
411
+ else
412
+ unpacked << codepoints[marker..pos-1]
413
+ marker = pos
414
+ end
415
+ end
416
+ unpacked
417
+ end
418
+
419
+ # Reverse operation of g_unpack
420
+ def g_pack(unpacked)
421
+ unpacked.flatten
422
+ end
423
+
424
+ # Justifies a string in a certain way. Valid values for <tt>way</tt> are <tt>:right</tt>, <tt>:left</tt> and
425
+ # <tt>:center</tt>. Is primarily used as a helper method by <tt>rjust</tt>, <tt>ljust</tt> and <tt>center</tt>.
426
+ def justify(str, integer, way, padstr=' ')
427
+ raise ArgumentError, "zero width padding" if padstr.length == 0
428
+ padsize = integer - size(str)
429
+ padsize = padsize > 0 ? padsize : 0
430
+ case way
431
+ when :right
432
+ str.dup.insert(0, padding(padsize, padstr))
433
+ when :left
434
+ str.dup.insert(-1, padding(padsize, padstr))
435
+ when :center
436
+ lpad = padding((padsize / 2.0).floor, padstr)
437
+ rpad = padding((padsize / 2.0).ceil, padstr)
438
+ str.dup.insert(0, lpad).insert(-1, rpad)
439
+ end
440
+ end
441
+
442
+ # Generates a padding string of a certain size.
443
+ def padding(padsize, padstr=' ')
444
+ if padsize != 0
445
+ slice(padstr * ((padsize / size(padstr)) + 1), 0, padsize)
446
+ else
447
+ ''
448
+ end
449
+ end
450
+
451
+ # Convert characters to a different case
452
+ def to_case(way, str)
453
+ u_unpack(str).map do |codepoint|
454
+ cp = UCD[codepoint]
455
+ unless cp.nil?
456
+ ncp = cp.send(way)
457
+ ncp > 0 ? ncp : codepoint
458
+ else
459
+ codepoint
460
+ end
461
+ end.pack('U*')
462
+ end
463
+
464
+ # Re-order codepoints so the string becomes canonical
465
+ def reorder_characters(codepoints)
466
+ length = codepoints.length- 1
467
+ pos = 0
468
+ while pos < length do
469
+ cp1, cp2 = UCD[codepoints[pos]], UCD[codepoints[pos+1]]
470
+ if (cp1.combining_class > cp2.combining_class) && (cp2.combining_class > 0)
471
+ codepoints[pos..pos+1] = cp2.code, cp1.code
472
+ pos += (pos > 0 ? -1 : 1)
473
+ else
474
+ pos += 1
475
+ end
476
+ end
477
+ codepoints
478
+ end
479
+
480
+ # Decompose composed characters to the decomposed form
481
+ def decompose_codepoints(type, codepoints)
482
+ codepoints.inject([]) do |decomposed, cp|
483
+ # if it's a hangul syllable starter character
484
+ if HANGUL_SBASE <= cp and cp < HANGUL_SLAST
485
+ sindex = cp - HANGUL_SBASE
486
+ ncp = [] # new codepoints
487
+ ncp << HANGUL_LBASE + sindex / HANGUL_NCOUNT
488
+ ncp << HANGUL_VBASE + (sindex % HANGUL_NCOUNT) / HANGUL_TCOUNT
489
+ tindex = sindex % HANGUL_TCOUNT
490
+ ncp << (HANGUL_TBASE + tindex) unless tindex == 0
491
+ decomposed.concat ncp
492
+ # if the codepoint is decomposable in with the current decomposition type
493
+ elsif (ncp = UCD[cp].decomp_mapping) and (!UCD[cp].decomp_type || type == :compatability)
494
+ decomposed.concat decompose_codepoints(type, ncp.dup)
495
+ else
496
+ decomposed << cp
497
+ end
498
+ end
499
+ end
500
+
501
+ # Compose decomposed characters to the composed form
502
+ def compose_codepoints(codepoints)
503
+ pos = 0
504
+ eoa = codepoints.length - 1
505
+ starter_pos = 0
506
+ starter_char = codepoints[0]
507
+ previous_combining_class = -1
508
+ while pos < eoa
509
+ pos += 1
510
+ lindex = starter_char - HANGUL_LBASE
511
+ # -- Hangul
512
+ if 0 <= lindex and lindex < HANGUL_LCOUNT
513
+ vindex = codepoints[starter_pos+1] - HANGUL_VBASE rescue vindex = -1
514
+ if 0 <= vindex and vindex < HANGUL_VCOUNT
515
+ tindex = codepoints[starter_pos+2] - HANGUL_TBASE rescue tindex = -1
516
+ if 0 <= tindex and tindex < HANGUL_TCOUNT
517
+ j = starter_pos + 2
518
+ eoa -= 2
519
+ else
520
+ tindex = 0
521
+ j = starter_pos + 1
522
+ eoa -= 1
523
+ end
524
+ codepoints[starter_pos..j] = (lindex * HANGUL_VCOUNT + vindex) * HANGUL_TCOUNT + tindex + HANGUL_SBASE
525
+ end
526
+ starter_pos += 1
527
+ starter_char = codepoints[starter_pos]
528
+ # -- Other characters
529
+ else
530
+ current_char = codepoints[pos]
531
+ current = UCD[current_char]
532
+ if current.combining_class > previous_combining_class
533
+ if ref = UCD.composition_map[starter_char]
534
+ composition = ref[current_char]
535
+ else
536
+ composition = nil
537
+ end
538
+ unless composition.nil?
539
+ codepoints[starter_pos] = composition
540
+ starter_char = composition
541
+ codepoints.delete_at pos
542
+ eoa -= 1
543
+ pos -= 1
544
+ previous_combining_class = -1
545
+ else
546
+ previous_combining_class = current.combining_class
547
+ end
548
+ else
549
+ previous_combining_class = current.combining_class
550
+ end
551
+ if current.combining_class == 0
552
+ starter_pos = pos
553
+ starter_char = codepoints[pos]
554
+ end
555
+ end
556
+ end
557
+ codepoints
558
+ end
559
+
560
+ # UniCode Database
561
+ UCD = UnicodeDatabase.new
562
+ end
563
+ end
564
+ end