bioinform 0.1.17 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (145) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -3
  3. data/LICENSE +0 -1
  4. data/README.md +1 -1
  5. data/TODO.txt +23 -30
  6. data/bin/convert_motif +4 -0
  7. data/bin/pcm2pwm +1 -1
  8. data/bin/split_motifs +1 -1
  9. data/bioinform.gemspec +0 -2
  10. data/lib/bioinform.rb +54 -16
  11. data/lib/bioinform/alphabet.rb +85 -0
  12. data/lib/bioinform/background.rb +90 -0
  13. data/lib/bioinform/cli.rb +1 -2
  14. data/lib/bioinform/cli/convert_motif.rb +52 -17
  15. data/lib/bioinform/cli/pcm2pwm.rb +32 -26
  16. data/lib/bioinform/cli/split_motifs.rb +31 -30
  17. data/lib/bioinform/conversion_algorithms.rb +6 -0
  18. data/lib/bioinform/conversion_algorithms/pcm2ppm_converter.rb +13 -11
  19. data/lib/bioinform/conversion_algorithms/pcm2pwm_converter.rb +39 -11
  20. data/lib/bioinform/conversion_algorithms/pcm2pwm_mara_converter.rb +26 -0
  21. data/lib/bioinform/conversion_algorithms/ppm2pcm_converter.rb +30 -0
  22. data/lib/bioinform/conversion_algorithms/pwm2iupac_pwm_converter.rb +23 -0
  23. data/lib/bioinform/conversion_algorithms/pwm2pcm_converter.rb +85 -0
  24. data/lib/bioinform/data_models.rb +1 -7
  25. data/lib/bioinform/data_models/named_model.rb +38 -0
  26. data/lib/bioinform/data_models/pcm.rb +18 -28
  27. data/lib/bioinform/data_models/pm.rb +73 -170
  28. data/lib/bioinform/data_models/ppm.rb +11 -24
  29. data/lib/bioinform/data_models/pwm.rb +30 -56
  30. data/lib/bioinform/errors.rb +17 -0
  31. data/lib/bioinform/formatters.rb +4 -2
  32. data/lib/bioinform/formatters/consensus_formatter.rb +35 -0
  33. data/lib/bioinform/formatters/motif_formatter.rb +69 -0
  34. data/lib/bioinform/formatters/pretty_matrix_formatter.rb +36 -0
  35. data/lib/bioinform/formatters/transfac_formatter.rb +29 -37
  36. data/lib/bioinform/parsers.rb +1 -8
  37. data/lib/bioinform/parsers/matrix_parser.rb +44 -36
  38. data/lib/bioinform/parsers/motif_splitter.rb +45 -0
  39. data/lib/bioinform/support.rb +46 -14
  40. data/lib/bioinform/support/strip_doc.rb +1 -1
  41. data/lib/bioinform/version.rb +1 -1
  42. data/spec/alphabet_spec.rb +79 -0
  43. data/spec/background_spec.rb +57 -0
  44. data/spec/cli/cli_spec.rb +6 -6
  45. data/spec/cli/convert_motif_spec.rb +88 -88
  46. data/spec/cli/data/pcm2pwm/KLF4_f2.pwm.result +9 -9
  47. data/spec/cli/data/pcm2pwm/SP1_f1.pwm.result +11 -11
  48. data/spec/cli/pcm2pwm_spec.rb +22 -23
  49. data/spec/cli/shared_examples/convert_motif/motif_list_empty.rb +1 -1
  50. data/spec/cli/shared_examples/convert_motif/several_motifs_specified.rb +1 -1
  51. data/spec/cli/shared_examples/convert_motif/single_motif_specified.rb +5 -5
  52. data/spec/cli/shared_examples/convert_motif/yield_help_string.rb +2 -2
  53. data/spec/cli/shared_examples/convert_motif/yield_motif_conversion_error.rb +3 -3
  54. data/spec/cli/split_motifs_spec.rb +6 -21
  55. data/spec/converters/pcm2ppm_converter_spec.rb +32 -0
  56. data/spec/converters/pcm2pwm_converter_spec.rb +71 -0
  57. data/spec/converters/ppm2pcm_converter_spec.rb +32 -0
  58. data/spec/converters/pwm2iupac_pwm_converter_spec.rb +65 -0
  59. data/spec/converters/pwm2pcm_converter_spec.rb +57 -0
  60. data/spec/data_models/named_model_spec.rb +41 -0
  61. data/spec/data_models/pcm_spec.rb +114 -45
  62. data/spec/data_models/pm_spec.rb +132 -333
  63. data/spec/data_models/ppm_spec.rb +47 -44
  64. data/spec/data_models/pwm_spec.rb +85 -77
  65. data/spec/fabricators/motif_formats_fabricator.rb +116 -116
  66. data/spec/formatters/consensus_formatter_spec.rb +26 -0
  67. data/spec/formatters/raw_formatter_spec.rb +169 -0
  68. data/spec/parsers/matrix_parser_spec.rb +216 -0
  69. data/spec/parsers/motif_splitter_spec.rb +87 -0
  70. data/spec/spec_helper.rb +2 -2
  71. data/spec/spec_helper_source.rb +25 -5
  72. data/spec/support_spec.rb +31 -0
  73. metadata +43 -124
  74. data/bin/merge_into_collection +0 -4
  75. data/lib/bioinform/cli/merge_into_collection.rb +0 -80
  76. data/lib/bioinform/conversion_algorithms/ppm2pwm_converter.rb +0 -0
  77. data/lib/bioinform/data_models/collection.rb +0 -75
  78. data/lib/bioinform/data_models/motif.rb +0 -56
  79. data/lib/bioinform/formatters/raw_formatter.rb +0 -41
  80. data/lib/bioinform/parsers/jaspar_parser.rb +0 -35
  81. data/lib/bioinform/parsers/parser.rb +0 -92
  82. data/lib/bioinform/parsers/splittable_parser.rb +0 -57
  83. data/lib/bioinform/parsers/string_fantom_parser.rb +0 -35
  84. data/lib/bioinform/parsers/string_parser.rb +0 -72
  85. data/lib/bioinform/parsers/trivial_parser.rb +0 -34
  86. data/lib/bioinform/parsers/yaml_parser.rb +0 -35
  87. data/lib/bioinform/support/advanced_scan.rb +0 -8
  88. data/lib/bioinform/support/array_product.rb +0 -6
  89. data/lib/bioinform/support/array_zip.rb +0 -6
  90. data/lib/bioinform/support/collect_hash.rb +0 -7
  91. data/lib/bioinform/support/deep_dup.rb +0 -5
  92. data/lib/bioinform/support/delete_many.rb +0 -14
  93. data/lib/bioinform/support/inverf.rb +0 -13
  94. data/lib/bioinform/support/multiline_squish.rb +0 -6
  95. data/lib/bioinform/support/parameters.rb +0 -28
  96. data/lib/bioinform/support/partial_sums.rb +0 -16
  97. data/lib/bioinform/support/same_by.rb +0 -12
  98. data/lib/bioinform/support/third_part/active_support/core_ext/array/extract_options.rb +0 -29
  99. data/lib/bioinform/support/third_part/active_support/core_ext/hash/indifferent_access.rb +0 -23
  100. data/lib/bioinform/support/third_part/active_support/core_ext/hash/keys.rb +0 -54
  101. data/lib/bioinform/support/third_part/active_support/core_ext/module/attribute_accessors.rb +0 -64
  102. data/lib/bioinform/support/third_part/active_support/core_ext/object/try.rb +0 -57
  103. data/lib/bioinform/support/third_part/active_support/core_ext/string/access.rb +0 -99
  104. data/lib/bioinform/support/third_part/active_support/core_ext/string/behavior.rb +0 -6
  105. data/lib/bioinform/support/third_part/active_support/core_ext/string/filters.rb +0 -49
  106. data/lib/bioinform/support/third_part/active_support/core_ext/string/multibyte.rb +0 -72
  107. data/lib/bioinform/support/third_part/active_support/hash_with_indifferent_access.rb +0 -181
  108. data/lib/bioinform/support/third_part/active_support/multibyte.rb +0 -44
  109. data/lib/bioinform/support/third_part/active_support/multibyte/chars.rb +0 -476
  110. data/lib/bioinform/support/third_part/active_support/multibyte/exceptions.rb +0 -8
  111. data/lib/bioinform/support/third_part/active_support/multibyte/unicode.rb +0 -393
  112. data/lib/bioinform/support/third_part/active_support/multibyte/utils.rb +0 -60
  113. data/spec/cli/data/merge_into_collection/GABPA_f1.pwm +0 -14
  114. data/spec/cli/data/merge_into_collection/KLF4_f2.pwm +0 -11
  115. data/spec/cli/data/merge_into_collection/SP1_f1.pwm +0 -12
  116. data/spec/cli/data/merge_into_collection/collection.txt.result +0 -40
  117. data/spec/cli/data/merge_into_collection/collection.yaml.result +0 -188
  118. data/spec/cli/data/merge_into_collection/collection_pwm.yaml.result +0 -188
  119. data/spec/cli/data/merge_into_collection/pwm_folder/GABPA_f1.pwm +0 -14
  120. data/spec/cli/data/merge_into_collection/pwm_folder/KLF4_f2.pwm +0 -11
  121. data/spec/cli/data/merge_into_collection/pwm_folder/SP1_f1.pwm +0 -12
  122. data/spec/cli/data/split_motifs/collection.yaml +0 -188
  123. data/spec/cli/merge_into_collection_spec.rb +0 -100
  124. data/spec/data_models/collection_spec.rb +0 -98
  125. data/spec/data_models/motif_spec.rb +0 -224
  126. data/spec/fabricators/collection_fabricator.rb +0 -8
  127. data/spec/fabricators/motif_fabricator.rb +0 -33
  128. data/spec/fabricators/pcm_fabricator.rb +0 -25
  129. data/spec/fabricators/pm_fabricator.rb +0 -52
  130. data/spec/fabricators/ppm_fabricator.rb +0 -14
  131. data/spec/fabricators/pwm_fabricator.rb +0 -16
  132. data/spec/parsers/parser_spec.rb +0 -152
  133. data/spec/parsers/string_fantom_parser_spec.rb +0 -70
  134. data/spec/parsers/string_parser_spec.rb +0 -77
  135. data/spec/parsers/trivial_parser_spec.rb +0 -64
  136. data/spec/parsers/yaml_parser_spec.rb +0 -50
  137. data/spec/support/advanced_scan_spec.rb +0 -32
  138. data/spec/support/array_product_spec.rb +0 -15
  139. data/spec/support/array_zip_spec.rb +0 -15
  140. data/spec/support/collect_hash_spec.rb +0 -15
  141. data/spec/support/delete_many_spec.rb +0 -44
  142. data/spec/support/inverf_spec.rb +0 -19
  143. data/spec/support/multiline_squish_spec.rb +0 -25
  144. data/spec/support/partial_sums_spec.rb +0 -30
  145. data/spec/support/same_by_spec.rb +0 -36
@@ -1,99 +0,0 @@
1
- require_relative "../../multibyte"
2
-
3
- class String
4
- unless '1.9'.respond_to?(:force_encoding)
5
- # Returns the character at the +position+ treating the string as an array (where 0 is the first character).
6
- #
7
- # Examples:
8
- # "hello".at(0) # => "h"
9
- # "hello".at(4) # => "o"
10
- # "hello".at(10) # => ERROR if < 1.9, nil in 1.9
11
- def at(position)
12
- mb_chars[position, 1].to_s
13
- end
14
-
15
- # Returns the remaining of the string from the +position+ treating the string as an array (where 0 is the first character).
16
- #
17
- # Examples:
18
- # "hello".from(0) # => "hello"
19
- # "hello".from(2) # => "llo"
20
- # "hello".from(10) # => "" if < 1.9, nil in 1.9
21
- def from(position)
22
- mb_chars[position..-1].to_s
23
- end
24
-
25
- # Returns the beginning of the string up to the +position+ treating the string as an array (where 0 is the first character).
26
- #
27
- # Examples:
28
- # "hello".to(0) # => "h"
29
- # "hello".to(2) # => "hel"
30
- # "hello".to(10) # => "hello"
31
- def to(position)
32
- mb_chars[0..position].to_s
33
- end
34
-
35
- # Returns the first character of the string or the first +limit+ characters.
36
- #
37
- # Examples:
38
- # "hello".first # => "h"
39
- # "hello".first(2) # => "he"
40
- # "hello".first(10) # => "hello"
41
- def first(limit = 1)
42
- if limit == 0
43
- ''
44
- elsif limit >= size
45
- self
46
- else
47
- mb_chars[0...limit].to_s
48
- end
49
- end
50
-
51
- # Returns the last character of the string or the last +limit+ characters.
52
- #
53
- # Examples:
54
- # "hello".last # => "o"
55
- # "hello".last(2) # => "lo"
56
- # "hello".last(10) # => "hello"
57
- def last(limit = 1)
58
- if limit == 0
59
- ''
60
- elsif limit >= size
61
- self
62
- else
63
- mb_chars[(-limit)..-1].to_s
64
- end
65
- end
66
- else
67
- def at(position)
68
- self[position]
69
- end
70
-
71
- def from(position)
72
- self[position..-1]
73
- end
74
-
75
- def to(position)
76
- self[0..position]
77
- end
78
-
79
- def first(limit = 1)
80
- if limit == 0
81
- ''
82
- elsif limit >= size
83
- self
84
- else
85
- to(limit - 1)
86
- end
87
- end
88
-
89
- def last(limit = 1)
90
- if limit == 0
91
- ''
92
- elsif limit >= size
93
- self
94
- else
95
- from(-limit)
96
- end
97
- end
98
- end
99
- end
@@ -1,6 +0,0 @@
1
- class String
2
- # Enable more predictable duck-typing on String-like classes. See <tt>Object#acts_like?</tt>.
3
- def acts_like_string?
4
- true
5
- end
6
- end
@@ -1,49 +0,0 @@
1
- require_relative 'multibyte'
2
-
3
- class String
4
- # Returns the string, first removing all whitespace on both ends of
5
- # the string, and then changing remaining consecutive whitespace
6
- # groups into one space each.
7
- #
8
- # Examples:
9
- # %{ Multi-line
10
- # string }.squish # => "Multi-line string"
11
- # " foo bar \n \t boo".squish # => "foo bar boo"
12
- def squish
13
- dup.squish!
14
- end
15
-
16
- # Performs a destructive squish. See String#squish.
17
- def squish!
18
- strip!
19
- gsub!(/\s+/, ' ')
20
- self
21
- end
22
-
23
- # Truncates a given +text+ after a given <tt>length</tt> if +text+ is longer than <tt>length</tt>:
24
- #
25
- # "Once upon a time in a world far far away".truncate(27)
26
- # # => "Once upon a time in a wo..."
27
- #
28
- # Pass a <tt>:separator</tt> to truncate +text+ at a natural break:
29
- #
30
- # "Once upon a time in a world far far away".truncate(27, :separator => ' ')
31
- # # => "Once upon a time in a..."
32
- #
33
- # The last characters will be replaced with the <tt>:omission</tt> string (defaults to "...")
34
- # for a total length not exceeding <tt>:length</tt>:
35
- #
36
- # "And they found that many people were sleeping better.".truncate(25, :omission => "... (continued)")
37
- # # => "And they f... (continued)"
38
- def truncate(length, options = {})
39
- text = self.dup
40
- options[:omission] ||= "..."
41
-
42
- length_with_room_for_omission = length - options[:omission].mb_chars.length
43
- chars = text.mb_chars
44
- stop = options[:separator] ?
45
- (chars.rindex(options[:separator].mb_chars, length_with_room_for_omission) || length_with_room_for_omission) : length_with_room_for_omission
46
-
47
- (chars.length > length ? chars[0...stop] + options[:omission] : text).to_s
48
- end
49
- end
@@ -1,72 +0,0 @@
1
- # encoding: utf-8
2
- require_relative '../../multibyte'
3
-
4
- class String
5
- if RUBY_VERSION >= "1.9"
6
- # == Multibyte proxy
7
- #
8
- # +mb_chars+ is a multibyte safe proxy for string methods.
9
- #
10
- # In Ruby 1.8 and older it creates and returns an instance of the ActiveSupport::Multibyte::Chars class which
11
- # encapsulates the original string. A Unicode safe version of all the String methods are defined on this proxy
12
- # class. If the proxy class doesn't respond to a certain method, it's forwarded to the encapsulated string.
13
- #
14
- # name = 'Claus Müller'
15
- # name.reverse # => "rell??M sualC"
16
- # name.length # => 13
17
- #
18
- # name.mb_chars.reverse.to_s # => "rellüM sualC"
19
- # name.mb_chars.length # => 12
20
- #
21
- # In Ruby 1.9 and newer +mb_chars+ returns +self+ because String is (mostly) encoding aware. This means that
22
- # it becomes easy to run one version of your code on multiple Ruby versions.
23
- #
24
- # == Method chaining
25
- #
26
- # All the methods on the Chars proxy which normally return a string will return a Chars object. This allows
27
- # method chaining on the result of any of these methods.
28
- #
29
- # name.mb_chars.reverse.length # => 12
30
- #
31
- # == Interoperability and configuration
32
- #
33
- # The Chars object tries to be as interchangeable with String objects as possible: sorting and comparing between
34
- # String and Char work like expected. The bang! methods change the internal string representation in the Chars
35
- # object. Interoperability problems can be resolved easily with a +to_s+ call.
36
- #
37
- # For more information about the methods defined on the Chars proxy see ActiveSupport::Multibyte::Chars. For
38
- # information about how to change the default Multibyte behavior see ActiveSupport::Multibyte.
39
- def mb_chars
40
- if ActiveSupport::Multibyte.proxy_class.consumes?(self)
41
- ActiveSupport::Multibyte.proxy_class.new(self)
42
- else
43
- self
44
- end
45
- end
46
-
47
- def is_utf8?
48
- case encoding
49
- when Encoding::UTF_8
50
- valid_encoding?
51
- when Encoding::ASCII_8BIT, Encoding::US_ASCII
52
- dup.force_encoding(Encoding::UTF_8).valid_encoding?
53
- else
54
- false
55
- end
56
- end
57
- else
58
- def mb_chars
59
- if ActiveSupport::Multibyte.proxy_class.wants?(self)
60
- ActiveSupport::Multibyte.proxy_class.new(self)
61
- else
62
- self
63
- end
64
- end
65
-
66
- # Returns true if the string has UTF-8 semantics (a String used for purely byte resources is unlikely to have
67
- # them), returns false otherwise.
68
- def is_utf8?
69
- ActiveSupport::Multibyte::Chars.consumes?(self)
70
- end
71
- end
72
- end
@@ -1,181 +0,0 @@
1
- require_relative 'core_ext/hash/keys'
2
-
3
- # This class has dubious semantics and we only have it so that
4
- # people can write <tt>params[:key]</tt> instead of <tt>params['key']</tt>
5
- # and they get the same value for both keys.
6
- unless defined? ActiveSupport::HashWithIndifferentAccess
7
-
8
- module ActiveSupport
9
- class HashWithIndifferentAccess < Hash
10
-
11
- # Always returns true, so that <tt>Array#extract_options!</tt> finds members of this class.
12
- def extractable_options?
13
- true
14
- end
15
-
16
- def with_indifferent_access
17
- dup
18
- end
19
-
20
- def nested_under_indifferent_access
21
- self
22
- end
23
-
24
- def initialize(constructor = {})
25
- if constructor.is_a?(Hash)
26
- super()
27
- update(constructor)
28
- else
29
- super(constructor)
30
- end
31
- end
32
-
33
- def default(key = nil)
34
- if key.is_a?(Symbol) && include?(key = key.to_s)
35
- self[key]
36
- else
37
- super
38
- end
39
- end
40
-
41
- def self.new_from_hash_copying_default(hash)
42
- new(hash).tap do |new_hash|
43
- new_hash.default = hash.default
44
- end
45
- end
46
-
47
- alias_method :regular_writer, :[]= unless method_defined?(:regular_writer)
48
- alias_method :regular_update, :update unless method_defined?(:regular_update)
49
-
50
- # Assigns a new value to the hash:
51
- #
52
- # hash = HashWithIndifferentAccess.new
53
- # hash[:key] = "value"
54
- #
55
- def []=(key, value)
56
- regular_writer(convert_key(key), convert_value(value))
57
- end
58
-
59
- alias_method :store, :[]=
60
-
61
- # Updates the instantized hash with values from the second:
62
- #
63
- # hash_1 = HashWithIndifferentAccess.new
64
- # hash_1[:key] = "value"
65
- #
66
- # hash_2 = HashWithIndifferentAccess.new
67
- # hash_2[:key] = "New Value!"
68
- #
69
- # hash_1.update(hash_2) # => {"key"=>"New Value!"}
70
- #
71
- def update(other_hash)
72
- if other_hash.is_a? HashWithIndifferentAccess
73
- super(other_hash)
74
- else
75
- other_hash.each_pair { |key, value| regular_writer(convert_key(key), convert_value(value)) }
76
- self
77
- end
78
- end
79
-
80
- alias_method :merge!, :update
81
-
82
- # Checks the hash for a key matching the argument passed in:
83
- #
84
- # hash = HashWithIndifferentAccess.new
85
- # hash["key"] = "value"
86
- # hash.key? :key # => true
87
- # hash.key? "key" # => true
88
- #
89
- def key?(key)
90
- super(convert_key(key))
91
- end
92
-
93
- alias_method :include?, :key?
94
- alias_method :has_key?, :key?
95
- alias_method :member?, :key?
96
-
97
- # Same as <tt>Hash#fetch</tt> where the key passed as argument can be
98
- # either a string or a symbol:
99
- #
100
- # counters = HashWithIndifferentAccess.new
101
- # counters[:foo] = 1
102
- #
103
- # counters.fetch("foo") # => 1
104
- # counters.fetch(:bar, 0) # => 0
105
- # counters.fetch(:bar) {|key| 0} # => 0
106
- # counters.fetch(:zoo) # => KeyError: key not found: "zoo"
107
- #
108
- def fetch(key, *extras)
109
- super(convert_key(key), *extras)
110
- end
111
-
112
- # Returns an array of the values at the specified indices:
113
- #
114
- # hash = HashWithIndifferentAccess.new
115
- # hash[:a] = "x"
116
- # hash[:b] = "y"
117
- # hash.values_at("a", "b") # => ["x", "y"]
118
- #
119
- def values_at(*indices)
120
- indices.collect {|key| self[convert_key(key)]}
121
- end
122
-
123
- # Returns an exact copy of the hash.
124
- def dup
125
- self.class.new(self).tap do |new_hash|
126
- new_hash.default = default
127
- end
128
- end
129
-
130
- # Merges the instantized and the specified hashes together, giving precedence to the values from the second hash.
131
- # Does not overwrite the existing hash.
132
- def merge(hash)
133
- self.dup.update(hash)
134
- end
135
-
136
- # Performs the opposite of merge, with the keys and values from the first hash taking precedence over the second.
137
- # This overloaded definition prevents returning a regular hash, if reverse_merge is called on a <tt>HashWithDifferentAccess</tt>.
138
- def reverse_merge(other_hash)
139
- super self.class.new_from_hash_copying_default(other_hash)
140
- end
141
-
142
- def reverse_merge!(other_hash)
143
- replace(reverse_merge( other_hash ))
144
- end
145
-
146
- # Removes a specified key from the hash.
147
- def delete(key)
148
- super(convert_key(key))
149
- end
150
-
151
- def stringify_keys!; self end
152
- def stringify_keys; dup end
153
- undef :symbolize_keys!
154
- def symbolize_keys; to_hash.symbolize_keys end
155
- def to_options!; self end
156
-
157
- # Convert to a Hash with String keys.
158
- def to_hash
159
- Hash.new(default).merge!(self)
160
- end
161
-
162
- protected
163
- def convert_key(key)
164
- key.kind_of?(Symbol) ? key.to_s : key
165
- end
166
-
167
- def convert_value(value)
168
- if value.is_a? Hash
169
- value.nested_under_indifferent_access
170
- elsif value.is_a?(Array)
171
- value.dup.replace(value.map { |e| convert_value(e) })
172
- else
173
- value
174
- end
175
- end
176
- end
177
- end
178
-
179
- HashWithIndifferentAccess = ActiveSupport::HashWithIndifferentAccess
180
-
181
- end
@@ -1,44 +0,0 @@
1
- # encoding: utf-8
2
- require_relative 'core_ext/module/attribute_accessors'
3
-
4
- module ActiveSupport #:nodoc:
5
- module Multibyte
6
- autoload :EncodingError, 'bioinform/support/third_part/active_support/multibyte/exceptions'
7
- autoload :Chars, 'bioinform/support/third_part/active_support/multibyte/chars'
8
- autoload :Unicode, 'bioinform/support/third_part/active_support/multibyte/unicode'
9
-
10
- # The proxy class returned when calling mb_chars. You can use this accessor to configure your own proxy
11
- # class so you can support other encodings. See the ActiveSupport::Multibyte::Chars implementation for
12
- # an example how to do this.
13
- #
14
- # Example:
15
- # ActiveSupport::Multibyte.proxy_class = CharsForUTF32
16
- def self.proxy_class=(klass)
17
- @proxy_class = klass
18
- end
19
-
20
- # Returns the current proxy class
21
- def self.proxy_class
22
- @proxy_class ||= ActiveSupport::Multibyte::Chars
23
- end
24
-
25
- # Regular expressions that describe valid byte sequences for a character
26
- VALID_CHARACTER = {
27
- # Borrowed from the Kconv library by Shinji KONO - (also as seen on the W3C site)
28
- 'UTF-8' => /\A(?:
29
- [\x00-\x7f] |
30
- [\xc2-\xdf] [\x80-\xbf] |
31
- \xe0 [\xa0-\xbf] [\x80-\xbf] |
32
- [\xe1-\xef] [\x80-\xbf] [\x80-\xbf] |
33
- \xf0 [\x90-\xbf] [\x80-\xbf] [\x80-\xbf] |
34
- [\xf1-\xf3] [\x80-\xbf] [\x80-\xbf] [\x80-\xbf] |
35
- \xf4 [\x80-\x8f] [\x80-\xbf] [\x80-\xbf])\z /xn,
36
- # Quick check for valid Shift-JIS characters, disregards the odd-even pairing
37
- 'Shift_JIS' => /\A(?:
38
- [\x00-\x7e\xa1-\xdf] |
39
- [\x81-\x9f\xe0-\xef] [\x40-\x7e\x80-\x9e\x9f-\xfc])\z /xn
40
- }
41
- end
42
- end
43
-
44
- require_relative 'multibyte/utils'