bioinform 0.1.17 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (145) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -3
  3. data/LICENSE +0 -1
  4. data/README.md +1 -1
  5. data/TODO.txt +23 -30
  6. data/bin/convert_motif +4 -0
  7. data/bin/pcm2pwm +1 -1
  8. data/bin/split_motifs +1 -1
  9. data/bioinform.gemspec +0 -2
  10. data/lib/bioinform.rb +54 -16
  11. data/lib/bioinform/alphabet.rb +85 -0
  12. data/lib/bioinform/background.rb +90 -0
  13. data/lib/bioinform/cli.rb +1 -2
  14. data/lib/bioinform/cli/convert_motif.rb +52 -17
  15. data/lib/bioinform/cli/pcm2pwm.rb +32 -26
  16. data/lib/bioinform/cli/split_motifs.rb +31 -30
  17. data/lib/bioinform/conversion_algorithms.rb +6 -0
  18. data/lib/bioinform/conversion_algorithms/pcm2ppm_converter.rb +13 -11
  19. data/lib/bioinform/conversion_algorithms/pcm2pwm_converter.rb +39 -11
  20. data/lib/bioinform/conversion_algorithms/pcm2pwm_mara_converter.rb +26 -0
  21. data/lib/bioinform/conversion_algorithms/ppm2pcm_converter.rb +30 -0
  22. data/lib/bioinform/conversion_algorithms/pwm2iupac_pwm_converter.rb +23 -0
  23. data/lib/bioinform/conversion_algorithms/pwm2pcm_converter.rb +85 -0
  24. data/lib/bioinform/data_models.rb +1 -7
  25. data/lib/bioinform/data_models/named_model.rb +38 -0
  26. data/lib/bioinform/data_models/pcm.rb +18 -28
  27. data/lib/bioinform/data_models/pm.rb +73 -170
  28. data/lib/bioinform/data_models/ppm.rb +11 -24
  29. data/lib/bioinform/data_models/pwm.rb +30 -56
  30. data/lib/bioinform/errors.rb +17 -0
  31. data/lib/bioinform/formatters.rb +4 -2
  32. data/lib/bioinform/formatters/consensus_formatter.rb +35 -0
  33. data/lib/bioinform/formatters/motif_formatter.rb +69 -0
  34. data/lib/bioinform/formatters/pretty_matrix_formatter.rb +36 -0
  35. data/lib/bioinform/formatters/transfac_formatter.rb +29 -37
  36. data/lib/bioinform/parsers.rb +1 -8
  37. data/lib/bioinform/parsers/matrix_parser.rb +44 -36
  38. data/lib/bioinform/parsers/motif_splitter.rb +45 -0
  39. data/lib/bioinform/support.rb +46 -14
  40. data/lib/bioinform/support/strip_doc.rb +1 -1
  41. data/lib/bioinform/version.rb +1 -1
  42. data/spec/alphabet_spec.rb +79 -0
  43. data/spec/background_spec.rb +57 -0
  44. data/spec/cli/cli_spec.rb +6 -6
  45. data/spec/cli/convert_motif_spec.rb +88 -88
  46. data/spec/cli/data/pcm2pwm/KLF4_f2.pwm.result +9 -9
  47. data/spec/cli/data/pcm2pwm/SP1_f1.pwm.result +11 -11
  48. data/spec/cli/pcm2pwm_spec.rb +22 -23
  49. data/spec/cli/shared_examples/convert_motif/motif_list_empty.rb +1 -1
  50. data/spec/cli/shared_examples/convert_motif/several_motifs_specified.rb +1 -1
  51. data/spec/cli/shared_examples/convert_motif/single_motif_specified.rb +5 -5
  52. data/spec/cli/shared_examples/convert_motif/yield_help_string.rb +2 -2
  53. data/spec/cli/shared_examples/convert_motif/yield_motif_conversion_error.rb +3 -3
  54. data/spec/cli/split_motifs_spec.rb +6 -21
  55. data/spec/converters/pcm2ppm_converter_spec.rb +32 -0
  56. data/spec/converters/pcm2pwm_converter_spec.rb +71 -0
  57. data/spec/converters/ppm2pcm_converter_spec.rb +32 -0
  58. data/spec/converters/pwm2iupac_pwm_converter_spec.rb +65 -0
  59. data/spec/converters/pwm2pcm_converter_spec.rb +57 -0
  60. data/spec/data_models/named_model_spec.rb +41 -0
  61. data/spec/data_models/pcm_spec.rb +114 -45
  62. data/spec/data_models/pm_spec.rb +132 -333
  63. data/spec/data_models/ppm_spec.rb +47 -44
  64. data/spec/data_models/pwm_spec.rb +85 -77
  65. data/spec/fabricators/motif_formats_fabricator.rb +116 -116
  66. data/spec/formatters/consensus_formatter_spec.rb +26 -0
  67. data/spec/formatters/raw_formatter_spec.rb +169 -0
  68. data/spec/parsers/matrix_parser_spec.rb +216 -0
  69. data/spec/parsers/motif_splitter_spec.rb +87 -0
  70. data/spec/spec_helper.rb +2 -2
  71. data/spec/spec_helper_source.rb +25 -5
  72. data/spec/support_spec.rb +31 -0
  73. metadata +43 -124
  74. data/bin/merge_into_collection +0 -4
  75. data/lib/bioinform/cli/merge_into_collection.rb +0 -80
  76. data/lib/bioinform/conversion_algorithms/ppm2pwm_converter.rb +0 -0
  77. data/lib/bioinform/data_models/collection.rb +0 -75
  78. data/lib/bioinform/data_models/motif.rb +0 -56
  79. data/lib/bioinform/formatters/raw_formatter.rb +0 -41
  80. data/lib/bioinform/parsers/jaspar_parser.rb +0 -35
  81. data/lib/bioinform/parsers/parser.rb +0 -92
  82. data/lib/bioinform/parsers/splittable_parser.rb +0 -57
  83. data/lib/bioinform/parsers/string_fantom_parser.rb +0 -35
  84. data/lib/bioinform/parsers/string_parser.rb +0 -72
  85. data/lib/bioinform/parsers/trivial_parser.rb +0 -34
  86. data/lib/bioinform/parsers/yaml_parser.rb +0 -35
  87. data/lib/bioinform/support/advanced_scan.rb +0 -8
  88. data/lib/bioinform/support/array_product.rb +0 -6
  89. data/lib/bioinform/support/array_zip.rb +0 -6
  90. data/lib/bioinform/support/collect_hash.rb +0 -7
  91. data/lib/bioinform/support/deep_dup.rb +0 -5
  92. data/lib/bioinform/support/delete_many.rb +0 -14
  93. data/lib/bioinform/support/inverf.rb +0 -13
  94. data/lib/bioinform/support/multiline_squish.rb +0 -6
  95. data/lib/bioinform/support/parameters.rb +0 -28
  96. data/lib/bioinform/support/partial_sums.rb +0 -16
  97. data/lib/bioinform/support/same_by.rb +0 -12
  98. data/lib/bioinform/support/third_part/active_support/core_ext/array/extract_options.rb +0 -29
  99. data/lib/bioinform/support/third_part/active_support/core_ext/hash/indifferent_access.rb +0 -23
  100. data/lib/bioinform/support/third_part/active_support/core_ext/hash/keys.rb +0 -54
  101. data/lib/bioinform/support/third_part/active_support/core_ext/module/attribute_accessors.rb +0 -64
  102. data/lib/bioinform/support/third_part/active_support/core_ext/object/try.rb +0 -57
  103. data/lib/bioinform/support/third_part/active_support/core_ext/string/access.rb +0 -99
  104. data/lib/bioinform/support/third_part/active_support/core_ext/string/behavior.rb +0 -6
  105. data/lib/bioinform/support/third_part/active_support/core_ext/string/filters.rb +0 -49
  106. data/lib/bioinform/support/third_part/active_support/core_ext/string/multibyte.rb +0 -72
  107. data/lib/bioinform/support/third_part/active_support/hash_with_indifferent_access.rb +0 -181
  108. data/lib/bioinform/support/third_part/active_support/multibyte.rb +0 -44
  109. data/lib/bioinform/support/third_part/active_support/multibyte/chars.rb +0 -476
  110. data/lib/bioinform/support/third_part/active_support/multibyte/exceptions.rb +0 -8
  111. data/lib/bioinform/support/third_part/active_support/multibyte/unicode.rb +0 -393
  112. data/lib/bioinform/support/third_part/active_support/multibyte/utils.rb +0 -60
  113. data/spec/cli/data/merge_into_collection/GABPA_f1.pwm +0 -14
  114. data/spec/cli/data/merge_into_collection/KLF4_f2.pwm +0 -11
  115. data/spec/cli/data/merge_into_collection/SP1_f1.pwm +0 -12
  116. data/spec/cli/data/merge_into_collection/collection.txt.result +0 -40
  117. data/spec/cli/data/merge_into_collection/collection.yaml.result +0 -188
  118. data/spec/cli/data/merge_into_collection/collection_pwm.yaml.result +0 -188
  119. data/spec/cli/data/merge_into_collection/pwm_folder/GABPA_f1.pwm +0 -14
  120. data/spec/cli/data/merge_into_collection/pwm_folder/KLF4_f2.pwm +0 -11
  121. data/spec/cli/data/merge_into_collection/pwm_folder/SP1_f1.pwm +0 -12
  122. data/spec/cli/data/split_motifs/collection.yaml +0 -188
  123. data/spec/cli/merge_into_collection_spec.rb +0 -100
  124. data/spec/data_models/collection_spec.rb +0 -98
  125. data/spec/data_models/motif_spec.rb +0 -224
  126. data/spec/fabricators/collection_fabricator.rb +0 -8
  127. data/spec/fabricators/motif_fabricator.rb +0 -33
  128. data/spec/fabricators/pcm_fabricator.rb +0 -25
  129. data/spec/fabricators/pm_fabricator.rb +0 -52
  130. data/spec/fabricators/ppm_fabricator.rb +0 -14
  131. data/spec/fabricators/pwm_fabricator.rb +0 -16
  132. data/spec/parsers/parser_spec.rb +0 -152
  133. data/spec/parsers/string_fantom_parser_spec.rb +0 -70
  134. data/spec/parsers/string_parser_spec.rb +0 -77
  135. data/spec/parsers/trivial_parser_spec.rb +0 -64
  136. data/spec/parsers/yaml_parser_spec.rb +0 -50
  137. data/spec/support/advanced_scan_spec.rb +0 -32
  138. data/spec/support/array_product_spec.rb +0 -15
  139. data/spec/support/array_zip_spec.rb +0 -15
  140. data/spec/support/collect_hash_spec.rb +0 -15
  141. data/spec/support/delete_many_spec.rb +0 -44
  142. data/spec/support/inverf_spec.rb +0 -19
  143. data/spec/support/multiline_squish_spec.rb +0 -25
  144. data/spec/support/partial_sums_spec.rb +0 -30
  145. data/spec/support/same_by_spec.rb +0 -36
@@ -1,476 +0,0 @@
1
- # encoding: utf-8
2
- require_relative '../core_ext/string/access'
3
- require_relative '../core_ext/string/behavior'
4
-
5
- module ActiveSupport #:nodoc:
6
- module Multibyte #:nodoc:
7
- # Chars enables you to work transparently with UTF-8 encoding in the Ruby String class without having extensive
8
- # knowledge about the encoding. A Chars object accepts a string upon initialization and proxies String methods in an
9
- # encoding safe manner. All the normal String methods are also implemented on the proxy.
10
- #
11
- # String methods are proxied through the Chars object, and can be accessed through the +mb_chars+ method. Methods
12
- # which would normally return a String object now return a Chars object so methods can be chained.
13
- #
14
- # "The Perfect String ".mb_chars.downcase.strip.normalize # => "the perfect string"
15
- #
16
- # Chars objects are perfectly interchangeable with String objects as long as no explicit class checks are made.
17
- # If certain methods do explicitly check the class, call +to_s+ before you pass chars objects to them.
18
- #
19
- # bad.explicit_checking_method "T".mb_chars.downcase.to_s
20
- #
21
- # The default Chars implementation assumes that the encoding of the string is UTF-8, if you want to handle different
22
- # encodings you can write your own multibyte string handler and configure it through
23
- # ActiveSupport::Multibyte.proxy_class.
24
- #
25
- # class CharsForUTF32
26
- # def size
27
- # @wrapped_string.size / 4
28
- # end
29
- #
30
- # def self.accepts?(string)
31
- # string.length % 4 == 0
32
- # end
33
- # end
34
- #
35
- # ActiveSupport::Multibyte.proxy_class = CharsForUTF32
36
- class Chars
37
- attr_reader :wrapped_string
38
- alias to_s wrapped_string
39
- alias to_str wrapped_string
40
-
41
- if RUBY_VERSION >= "1.9"
42
- # Creates a new Chars instance by wrapping _string_.
43
- def initialize(string)
44
- @wrapped_string = string
45
- @wrapped_string.force_encoding(Encoding::UTF_8) unless @wrapped_string.frozen?
46
- end
47
- else
48
- def initialize(string) #:nodoc:
49
- @wrapped_string = string
50
- end
51
- end
52
-
53
- # Forward all undefined methods to the wrapped string.
54
- def method_missing(method, *args, &block)
55
- if method.to_s =~ /!$/
56
- @wrapped_string.__send__(method, *args, &block)
57
- self
58
- else
59
- result = @wrapped_string.__send__(method, *args, &block)
60
- result.kind_of?(String) ? chars(result) : result
61
- end
62
- end
63
-
64
- # Returns +true+ if _obj_ responds to the given method. Private methods are included in the search
65
- # only if the optional second parameter evaluates to +true+.
66
- def respond_to?(method, include_private=false)
67
- super || @wrapped_string.respond_to?(method, include_private)
68
- end
69
-
70
- # Enable more predictable duck-typing on String-like classes. See Object#acts_like?.
71
- def acts_like_string?
72
- true
73
- end
74
-
75
- # Returns +true+ when the proxy class can handle the string. Returns +false+ otherwise.
76
- def self.consumes?(string)
77
- # Unpack is a little bit faster than regular expressions.
78
- string.unpack('U*')
79
- true
80
- rescue ArgumentError
81
- false
82
- end
83
-
84
- include Comparable
85
-
86
- # Returns -1, 0, or 1, depending on whether the Chars object is to be sorted before,
87
- # equal or after the object on the right side of the operation. It accepts any object
88
- # that implements +to_s+:
89
- #
90
- # 'é'.mb_chars <=> 'ü'.mb_chars # => -1
91
- #
92
- # See <tt>String#<=></tt> for more details.
93
- def <=>(other)
94
- @wrapped_string <=> other.to_s
95
- end
96
-
97
- if RUBY_VERSION < "1.9"
98
- # Returns +true+ if the Chars class can and should act as a proxy for the string _string_. Returns
99
- # +false+ otherwise.
100
- def self.wants?(string)
101
- $KCODE == 'UTF8' && consumes?(string)
102
- end
103
-
104
- # Returns a new Chars object containing the _other_ object concatenated to the string.
105
- #
106
- # Example:
107
- # ('Café'.mb_chars + ' périferôl').to_s # => "Café périferôl"
108
- def +(other)
109
- chars(@wrapped_string + other)
110
- end
111
-
112
- # Like <tt>String#=~</tt> only it returns the character offset (in codepoints) instead of the byte offset.
113
- #
114
- # Example:
115
- # 'Café périferôl'.mb_chars =~ /ô/ # => 12
116
- def =~(other)
117
- translate_offset(@wrapped_string =~ other)
118
- end
119
-
120
- # Inserts the passed string at specified codepoint offsets.
121
- #
122
- # Example:
123
- # 'Café'.mb_chars.insert(4, ' périferôl').to_s # => "Café périferôl"
124
- def insert(offset, fragment)
125
- unpacked = Unicode.u_unpack(@wrapped_string)
126
- unless offset > unpacked.length
127
- @wrapped_string.replace(
128
- Unicode.u_unpack(@wrapped_string).insert(offset, *Unicode.u_unpack(fragment)).pack('U*')
129
- )
130
- else
131
- raise IndexError, "index #{offset} out of string"
132
- end
133
- self
134
- end
135
-
136
- # Returns +true+ if contained string contains _other_. Returns +false+ otherwise.
137
- #
138
- # Example:
139
- # 'Café'.mb_chars.include?('é') # => true
140
- def include?(other)
141
- # We have to redefine this method because Enumerable defines it.
142
- @wrapped_string.include?(other)
143
- end
144
-
145
- # Returns the position _needle_ in the string, counting in codepoints. Returns +nil+ if _needle_ isn't found.
146
- #
147
- # Example:
148
- # 'Café périferôl'.mb_chars.index('ô') # => 12
149
- # 'Café périferôl'.mb_chars.index(/\w/u) # => 0
150
- def index(needle, offset=0)
151
- wrapped_offset = first(offset).wrapped_string.length
152
- index = @wrapped_string.index(needle, wrapped_offset)
153
- index ? (Unicode.u_unpack(@wrapped_string.slice(0...index)).size) : nil
154
- end
155
-
156
- # Returns the position _needle_ in the string, counting in
157
- # codepoints, searching backward from _offset_ or the end of the
158
- # string. Returns +nil+ if _needle_ isn't found.
159
- #
160
- # Example:
161
- # 'Café périferôl'.mb_chars.rindex('é') # => 6
162
- # 'Café périferôl'.mb_chars.rindex(/\w/u) # => 13
163
- def rindex(needle, offset=nil)
164
- offset ||= length
165
- wrapped_offset = first(offset).wrapped_string.length
166
- index = @wrapped_string.rindex(needle, wrapped_offset)
167
- index ? (Unicode.u_unpack(@wrapped_string.slice(0...index)).size) : nil
168
- end
169
-
170
- # Returns the number of codepoints in the string
171
- def size
172
- Unicode.u_unpack(@wrapped_string).size
173
- end
174
- alias_method :length, :size
175
-
176
- # Strips entire range of Unicode whitespace from the right of the string.
177
- def rstrip
178
- chars(@wrapped_string.gsub(Unicode::TRAILERS_PAT, ''))
179
- end
180
-
181
- # Strips entire range of Unicode whitespace from the left of the string.
182
- def lstrip
183
- chars(@wrapped_string.gsub(Unicode::LEADERS_PAT, ''))
184
- end
185
-
186
- # Strips entire range of Unicode whitespace from the right and left of the string.
187
- def strip
188
- rstrip.lstrip
189
- end
190
-
191
- # Returns the codepoint of the first character in the string.
192
- #
193
- # Example:
194
- # 'こんにちは'.mb_chars.ord # => 12371
195
- def ord
196
- Unicode.u_unpack(@wrapped_string)[0]
197
- end
198
-
199
- # Works just like <tt>String#rjust</tt>, only integer specifies characters instead of bytes.
200
- #
201
- # Example:
202
- #
203
- # "¾ cup".mb_chars.rjust(8).to_s
204
- # # => " ¾ cup"
205
- #
206
- # "¾ cup".mb_chars.rjust(8, " ").to_s # Use non-breaking whitespace
207
- # # => "   ¾ cup"
208
- def rjust(integer, padstr=' ')
209
- justify(integer, :right, padstr)
210
- end
211
-
212
- # Works just like <tt>String#ljust</tt>, only integer specifies characters instead of bytes.
213
- #
214
- # Example:
215
- #
216
- # "¾ cup".mb_chars.rjust(8).to_s
217
- # # => "¾ cup "
218
- #
219
- # "¾ cup".mb_chars.rjust(8, " ").to_s # Use non-breaking whitespace
220
- # # => "¾ cup   "
221
- def ljust(integer, padstr=' ')
222
- justify(integer, :left, padstr)
223
- end
224
-
225
- # Works just like <tt>String#center</tt>, only integer specifies characters instead of bytes.
226
- #
227
- # Example:
228
- #
229
- # "¾ cup".mb_chars.center(8).to_s
230
- # # => " ¾ cup "
231
- #
232
- # "¾ cup".mb_chars.center(8, " ").to_s # Use non-breaking whitespace
233
- # # => " ¾ cup  "
234
- def center(integer, padstr=' ')
235
- justify(integer, :center, padstr)
236
- end
237
-
238
- else
239
- def =~(other)
240
- @wrapped_string =~ other
241
- end
242
- end
243
-
244
- # Works just like <tt>String#split</tt>, with the exception that the items in the resulting list are Chars
245
- # instances instead of String. This makes chaining methods easier.
246
- #
247
- # Example:
248
- # 'Café périferôl'.mb_chars.split(/é/).map { |part| part.upcase.to_s } # => ["CAF", " P", "RIFERÔL"]
249
- def split(*args)
250
- @wrapped_string.split(*args).map { |i| i.mb_chars }
251
- end
252
-
253
- # Like <tt>String#[]=</tt>, except instead of byte offsets you specify character offsets.
254
- #
255
- # Example:
256
- #
257
- # s = "Müller"
258
- # s.mb_chars[2] = "e" # Replace character with offset 2
259
- # s
260
- # # => "Müeler"
261
- #
262
- # s = "Müller"
263
- # s.mb_chars[1, 2] = "ö" # Replace 2 characters at character offset 1
264
- # s
265
- # # => "Möler"
266
- def []=(*args)
267
- replace_by = args.pop
268
- # Indexed replace with regular expressions already works
269
- if args.first.is_a?(Regexp)
270
- @wrapped_string[*args] = replace_by
271
- else
272
- result = Unicode.u_unpack(@wrapped_string)
273
- case args.first
274
- when Fixnum
275
- raise IndexError, "index #{args[0]} out of string" if args[0] >= result.length
276
- min = args[0]
277
- max = args[1].nil? ? min : (min + args[1] - 1)
278
- range = Range.new(min, max)
279
- replace_by = [replace_by].pack('U') if replace_by.is_a?(Fixnum)
280
- when Range
281
- raise RangeError, "#{args[0]} out of range" if args[0].min >= result.length
282
- range = args[0]
283
- else
284
- needle = args[0].to_s
285
- min = index(needle)
286
- max = min + Unicode.u_unpack(needle).length - 1
287
- range = Range.new(min, max)
288
- end
289
- result[range] = Unicode.u_unpack(replace_by)
290
- @wrapped_string.replace(result.pack('U*'))
291
- end
292
- end
293
-
294
- # Reverses all characters in the string.
295
- #
296
- # Example:
297
- # 'Café'.mb_chars.reverse.to_s # => 'éfaC'
298
- def reverse
299
- chars(Unicode.g_unpack(@wrapped_string).reverse.flatten.pack('U*'))
300
- end
301
-
302
- # Implements Unicode-aware slice with codepoints. Slicing on one point returns the codepoints for that
303
- # character.
304
- #
305
- # Example:
306
- # 'こんにちは'.mb_chars.slice(2..3).to_s # => "にち"
307
- def slice(*args)
308
- if args.size > 2
309
- raise ArgumentError, "wrong number of arguments (#{args.size} for 1)" # Do as if we were native
310
- elsif (args.size == 2 && !(args.first.is_a?(Numeric) || args.first.is_a?(Regexp)))
311
- raise TypeError, "cannot convert #{args.first.class} into Integer" # Do as if we were native
312
- elsif (args.size == 2 && !args[1].is_a?(Numeric))
313
- raise TypeError, "cannot convert #{args[1].class} into Integer" # Do as if we were native
314
- elsif args[0].kind_of? Range
315
- cps = Unicode.u_unpack(@wrapped_string).slice(*args)
316
- result = cps.nil? ? nil : cps.pack('U*')
317
- elsif args[0].kind_of? Regexp
318
- result = @wrapped_string.slice(*args)
319
- elsif args.size == 1 && args[0].kind_of?(Numeric)
320
- character = Unicode.u_unpack(@wrapped_string)[args[0]]
321
- result = character && [character].pack('U')
322
- else
323
- cps = Unicode.u_unpack(@wrapped_string).slice(*args)
324
- result = cps && cps.pack('U*')
325
- end
326
- result && chars(result)
327
- end
328
- alias_method :[], :slice
329
-
330
- # Limit the byte size of the string to a number of bytes without breaking characters. Usable
331
- # when the storage for a string is limited for some reason.
332
- #
333
- # Example:
334
- # 'こんにちは'.mb_chars.limit(7).to_s # => "こん"
335
- def limit(limit)
336
- slice(0...translate_offset(limit))
337
- end
338
-
339
- # Convert characters in the string to uppercase.
340
- #
341
- # Example:
342
- # 'Laurent, où sont les tests ?'.mb_chars.upcase.to_s # => "LAURENT, OÙ SONT LES TESTS ?"
343
- def upcase
344
- chars(Unicode.apply_mapping @wrapped_string, :uppercase_mapping)
345
- end
346
-
347
- # Convert characters in the string to lowercase.
348
- #
349
- # Example:
350
- # 'VĚDA A VÝZKUM'.mb_chars.downcase.to_s # => "věda a výzkum"
351
- def downcase
352
- chars(Unicode.apply_mapping @wrapped_string, :lowercase_mapping)
353
- end
354
-
355
- # Converts the first character to uppercase and the remainder to lowercase.
356
- #
357
- # Example:
358
- # 'über'.mb_chars.capitalize.to_s # => "Über"
359
- def capitalize
360
- (slice(0) || chars('')).upcase + (slice(1..-1) || chars('')).downcase
361
- end
362
-
363
- # Capitalizes the first letter of every word, when possible.
364
- #
365
- # Example:
366
- # "ÉL QUE SE ENTERÓ".mb_chars.titleize # => "Él Que Se Enteró"
367
- # "日本語".mb_chars.titleize # => "日本語"
368
- def titleize
369
- chars(downcase.to_s.gsub(/\b('?[\S])/u) { Unicode.apply_mapping $1, :uppercase_mapping })
370
- end
371
- alias_method :titlecase, :titleize
372
-
373
- # Returns the KC normalization of the string by default. NFKC is considered the best normalization form for
374
- # passing strings to databases and validations.
375
- #
376
- # * <tt>form</tt> - The form you want to normalize in. Should be one of the following:
377
- # <tt>:c</tt>, <tt>:kc</tt>, <tt>:d</tt>, or <tt>:kd</tt>. Default is
378
- # ActiveSupport::Multibyte::Unicode.default_normalization_form
379
- def normalize(form = nil)
380
- chars(Unicode.normalize(@wrapped_string, form))
381
- end
382
-
383
- # Performs canonical decomposition on all the characters.
384
- #
385
- # Example:
386
- # 'é'.length # => 2
387
- # 'é'.mb_chars.decompose.to_s.length # => 3
388
- def decompose
389
- chars(Unicode.decompose_codepoints(:canonical, Unicode.u_unpack(@wrapped_string)).pack('U*'))
390
- end
391
-
392
- # Performs composition on all the characters.
393
- #
394
- # Example:
395
- # 'é'.length # => 3
396
- # 'é'.mb_chars.compose.to_s.length # => 2
397
- def compose
398
- chars(Unicode.compose_codepoints(Unicode.u_unpack(@wrapped_string)).pack('U*'))
399
- end
400
-
401
- # Returns the number of grapheme clusters in the string.
402
- #
403
- # Example:
404
- # 'क्षि'.mb_chars.length # => 4
405
- # 'क्षि'.mb_chars.g_length # => 3
406
- def g_length
407
- Unicode.g_unpack(@wrapped_string).length
408
- end
409
-
410
- # Replaces all ISO-8859-1 or CP1252 characters by their UTF-8 equivalent resulting in a valid UTF-8 string.
411
- #
412
- # Passing +true+ will forcibly tidy all bytes, assuming that the string's encoding is entirely CP1252 or ISO-8859-1.
413
- def tidy_bytes(force = false)
414
- chars(Unicode.tidy_bytes(@wrapped_string, force))
415
- end
416
-
417
- %w(capitalize downcase lstrip reverse rstrip slice strip tidy_bytes upcase).each do |method|
418
- # Only define a corresponding bang method for methods defined in the proxy; On 1.9 the proxy will
419
- # exclude lstrip!, rstrip! and strip! because they are already work as expected on multibyte strings.
420
- if public_method_defined?(method)
421
- define_method("#{method}!") do |*args|
422
- @wrapped_string = send(args.nil? ? method : method, *args).to_s
423
- self
424
- end
425
- end
426
- end
427
-
428
- protected
429
-
430
- def translate_offset(byte_offset) #:nodoc:
431
- return nil if byte_offset.nil?
432
- return 0 if @wrapped_string == ''
433
-
434
- if @wrapped_string.respond_to?(:force_encoding)
435
- @wrapped_string = @wrapped_string.dup.force_encoding(Encoding::ASCII_8BIT)
436
- end
437
-
438
- begin
439
- @wrapped_string[0...byte_offset].unpack('U*').length
440
- rescue ArgumentError
441
- byte_offset -= 1
442
- retry
443
- end
444
- end
445
-
446
- def justify(integer, way, padstr=' ') #:nodoc:
447
- raise ArgumentError, "zero width padding" if padstr.length == 0
448
- padsize = integer - size
449
- padsize = padsize > 0 ? padsize : 0
450
- case way
451
- when :right
452
- result = @wrapped_string.dup.insert(0, padding(padsize, padstr))
453
- when :left
454
- result = @wrapped_string.dup.insert(-1, padding(padsize, padstr))
455
- when :center
456
- lpad = padding((padsize / 2.0).floor, padstr)
457
- rpad = padding((padsize / 2.0).ceil, padstr)
458
- result = @wrapped_string.dup.insert(0, lpad).insert(-1, rpad)
459
- end
460
- chars(result)
461
- end
462
-
463
- def padding(padsize, padstr=' ') #:nodoc:
464
- if padsize != 0
465
- chars(padstr * ((padsize / Unicode.u_unpack(padstr).size) + 1)).slice(0, padsize)
466
- else
467
- ''
468
- end
469
- end
470
-
471
- def chars(string) #:nodoc:
472
- self.class.new(string)
473
- end
474
- end
475
- end
476
- end