bioinform 0.1.12 → 0.1.13

Sign up to get free protection for your applications and to get access to all the features.
Files changed (110) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +17 -17
  3. data/Gemfile +16 -16
  4. data/LICENSE +21 -21
  5. data/README.md +35 -35
  6. data/Rakefile +4 -4
  7. data/TODO.txt +37 -37
  8. data/bin/merge_into_collection +3 -3
  9. data/bin/pcm2pwm +3 -3
  10. data/bin/split_motifs +3 -3
  11. data/bioinform.gemspec +19 -19
  12. data/lib/bioinform/cli/convert_motif.rb +107 -107
  13. data/lib/bioinform/cli/merge_into_collection.rb +79 -79
  14. data/lib/bioinform/cli/pcm2pwm.rb +46 -46
  15. data/lib/bioinform/cli/split_motifs.rb +46 -46
  16. data/lib/bioinform/cli.rb +29 -29
  17. data/lib/bioinform/conversion_algorithms/pcm2ppm_converter.rb +18 -18
  18. data/lib/bioinform/conversion_algorithms/pcm2pwm_converter.rb +19 -19
  19. data/lib/bioinform/data_models/collection.rb +74 -74
  20. data/lib/bioinform/data_models/motif.rb +55 -55
  21. data/lib/bioinform/data_models/pcm.rb +23 -23
  22. data/lib/bioinform/data_models/pm.rb +169 -169
  23. data/lib/bioinform/data_models/ppm.rb +9 -9
  24. data/lib/bioinform/data_models/pwm.rb +55 -55
  25. data/lib/bioinform/data_models.rb +10 -10
  26. data/lib/bioinform/formatters/raw_formatter.rb +40 -40
  27. data/lib/bioinform/formatters/transfac_formatter.rb +38 -38
  28. data/lib/bioinform/formatters.rb +1 -1
  29. data/lib/bioinform/parsers/jaspar_parser.rb +34 -34
  30. data/lib/bioinform/parsers/parser.rb +87 -87
  31. data/lib/bioinform/parsers/splittable_parser.rb +56 -56
  32. data/lib/bioinform/parsers/string_fantom_parser.rb +34 -34
  33. data/lib/bioinform/parsers/string_parser.rb +71 -71
  34. data/lib/bioinform/parsers/trivial_parser.rb +33 -33
  35. data/lib/bioinform/parsers/yaml_parser.rb +34 -34
  36. data/lib/bioinform/parsers.rb +6 -6
  37. data/lib/bioinform/support/array_product.rb +5 -5
  38. data/lib/bioinform/support/array_zip.rb +5 -5
  39. data/lib/bioinform/support/collect_hash.rb +6 -6
  40. data/lib/bioinform/support/deep_dup.rb +4 -4
  41. data/lib/bioinform/support/delete_many.rb +13 -13
  42. data/lib/bioinform/support/inverf.rb +12 -12
  43. data/lib/bioinform/support/multiline_squish.rb +5 -5
  44. data/lib/bioinform/support/parameters.rb +27 -27
  45. data/lib/bioinform/support/partial_sums.rb +15 -15
  46. data/lib/bioinform/support/same_by.rb +12 -12
  47. data/lib/bioinform/support/strip_doc.rb +8 -8
  48. data/lib/bioinform/support/third_part/active_support/hash_with_indifferent_access.rb +3 -0
  49. data/lib/bioinform/support.rb +17 -17
  50. data/lib/bioinform/version.rb +3 -3
  51. data/lib/bioinform.rb +10 -10
  52. data/spec/cli/cli_spec.rb +13 -13
  53. data/spec/cli/convert_motif_spec.rb +106 -106
  54. data/spec/cli/data/merge_into_collection/GABPA_f1.pwm +14 -14
  55. data/spec/cli/data/merge_into_collection/KLF4_f2.pwm +11 -11
  56. data/spec/cli/data/merge_into_collection/SP1_f1.pwm +12 -12
  57. data/spec/cli/data/merge_into_collection/collection.txt.result +40 -40
  58. data/spec/cli/data/merge_into_collection/collection.yaml.result +188 -188
  59. data/spec/cli/data/merge_into_collection/collection_pwm.yaml.result +188 -188
  60. data/spec/cli/data/merge_into_collection/pwm_folder/GABPA_f1.pwm +14 -14
  61. data/spec/cli/data/merge_into_collection/pwm_folder/KLF4_f2.pwm +11 -11
  62. data/spec/cli/data/merge_into_collection/pwm_folder/SP1_f1.pwm +12 -12
  63. data/spec/cli/data/pcm2pwm/KLF4 f2 spaced name.pcm +11 -11
  64. data/spec/cli/data/pcm2pwm/KLF4_f2.pcm +11 -11
  65. data/spec/cli/data/pcm2pwm/KLF4_f2.pwm.result +11 -11
  66. data/spec/cli/data/pcm2pwm/SP1_f1.pcm +12 -12
  67. data/spec/cli/data/pcm2pwm/SP1_f1.pwm.result +12 -12
  68. data/spec/cli/data/split_motifs/GABPA_f1.mat.result +14 -14
  69. data/spec/cli/data/split_motifs/KLF4_f2.mat.result +11 -11
  70. data/spec/cli/data/split_motifs/SP1_f1.mat.result +12 -12
  71. data/spec/cli/data/split_motifs/collection.yaml +188 -188
  72. data/spec/cli/data/split_motifs/plain_collection.txt +38 -38
  73. data/spec/cli/merge_into_collection_spec.rb +99 -99
  74. data/spec/cli/pcm2pwm_spec.rb +79 -79
  75. data/spec/cli/shared_examples/convert_motif/motif_list_empty.rb +17 -17
  76. data/spec/cli/shared_examples/convert_motif/several_motifs_specified.rb +14 -14
  77. data/spec/cli/shared_examples/convert_motif/single_motif_specified.rb +49 -49
  78. data/spec/cli/shared_examples/convert_motif/yield_help_string.rb +4 -4
  79. data/spec/cli/shared_examples/convert_motif/yield_motif_conversion_error.rb +3 -3
  80. data/spec/cli/split_motifs_spec.rb +76 -76
  81. data/spec/data_models/collection_spec.rb +97 -97
  82. data/spec/data_models/motif_spec.rb +223 -223
  83. data/spec/data_models/pcm_spec.rb +55 -55
  84. data/spec/data_models/pm_spec.rb +359 -359
  85. data/spec/data_models/ppm_spec.rb +7 -7
  86. data/spec/data_models/pwm_spec.rb +82 -82
  87. data/spec/fabricators/collection_fabricator.rb +7 -7
  88. data/spec/fabricators/motif_fabricator.rb +32 -32
  89. data/spec/fabricators/motif_formats_fabricator.rb +124 -124
  90. data/spec/fabricators/pcm_fabricator.rb +24 -24
  91. data/spec/fabricators/pm_fabricator.rb +51 -51
  92. data/spec/fabricators/ppm_fabricator.rb +13 -13
  93. data/spec/fabricators/pwm_fabricator.rb +16 -16
  94. data/spec/parsers/parser_spec.rb +152 -152
  95. data/spec/parsers/string_fantom_parser_spec.rb +69 -69
  96. data/spec/parsers/string_parser_spec.rb +76 -76
  97. data/spec/parsers/trivial_parser_spec.rb +63 -63
  98. data/spec/parsers/yaml_parser_spec.rb +50 -50
  99. data/spec/spec_helper.rb +10 -10
  100. data/spec/spec_helper_source.rb +59 -59
  101. data/spec/support/advanced_scan_spec.rb +31 -31
  102. data/spec/support/array_product_spec.rb +14 -14
  103. data/spec/support/array_zip_spec.rb +14 -14
  104. data/spec/support/collect_hash_spec.rb +14 -14
  105. data/spec/support/delete_many_spec.rb +43 -43
  106. data/spec/support/inverf_spec.rb +18 -18
  107. data/spec/support/multiline_squish_spec.rb +24 -24
  108. data/spec/support/partial_sums_spec.rb +30 -30
  109. data/spec/support/same_by_spec.rb +35 -35
  110. metadata +3 -3
@@ -1,56 +1,56 @@
1
- require 'ostruct'
2
- require_relative '../support/third_part/active_support/core_ext/object/try'
3
- require_relative '../support/parameters'
4
- module Bioinform
5
- class Motif
6
- include Parameters
7
- make_parameters :pcm, :pwm, :ppm, :name, :original_data_model
8
-
9
- # 0)Motif.new()
10
- # 1)Motif.new(pcm: ..., pwm: ..., name: ...,threshold: ...)
11
- # 2)Motif.new(my_pcm)
12
- # 3)Motif.new(pm: my_pcm, threshold: ...)
13
- # 2) and 3) cases will automatically choose data model
14
- #### What if pm already is a Motif
15
- def initialize(parameters = {})
16
- case parameters
17
- when PM
18
- pm = parameters
19
- motif_type = pm.class.name.downcase.sub(/^.+::/,'').to_sym
20
- self.original_data_model = motif_type
21
- set_parameters(motif_type => pm)
22
- when Hash
23
- if parameters.has_key?(:pm) && parameters[:pm].is_a?(PM)
24
- pm = parameters.delete(:pm)
25
- motif_type = pm.class.name.downcase.sub(/^.+::/,'').to_sym
26
- self.original_data_model = motif_type
27
- set_parameters(motif_type => pm)
28
- end
29
- set_parameters(parameters)
30
- else
31
- raise ArgumentError, "Motif::new doesn't accept argument #{parameters} of class #{parameters.class}"
32
- end
33
- end
34
-
35
- def pm; ((original_data_model || :pm) == :pm) ? parameters.pm : send(original_data_model); end
36
- #def pcm; parameters.pcm; end
37
- def pwm; parameters.pwm || pcm.try(:to_pwm); end
38
- def ppm; parameters.ppm || pcm.try(:to_ppm); end
39
- #def pcm=(pcm); parameters.pcm = pcm; end
40
- #def pwm=(pwm); parameters.pwm = pwm; end
41
- #def ppm=(ppm); parameters.ppm = ppm; end
42
- def name; parameters.name || pm.name; end
43
-
44
- def method_missing(meth, *args)
45
- parameters.__send__(meth, *args)
46
- end
47
-
48
- def ==(other)
49
- parameters == other.parameters
50
- end
51
-
52
- def to_s
53
- parameters.to_s
54
- end
55
- end
1
+ require 'ostruct'
2
+ require_relative '../support/third_part/active_support/core_ext/object/try'
3
+ require_relative '../support/parameters'
4
+ module Bioinform
5
+ class Motif
6
+ include Parameters
7
+ make_parameters :pcm, :pwm, :ppm, :name, :original_data_model
8
+
9
+ # 0)Motif.new()
10
+ # 1)Motif.new(pcm: ..., pwm: ..., name: ...,threshold: ...)
11
+ # 2)Motif.new(my_pcm)
12
+ # 3)Motif.new(pm: my_pcm, threshold: ...)
13
+ # 2) and 3) cases will automatically choose data model
14
+ #### What if pm already is a Motif
15
+ def initialize(parameters = {})
16
+ case parameters
17
+ when PM
18
+ pm = parameters
19
+ motif_type = pm.class.name.downcase.sub(/^.+::/,'').to_sym
20
+ self.original_data_model = motif_type
21
+ set_parameters(motif_type => pm)
22
+ when Hash
23
+ if parameters.has_key?(:pm) && parameters[:pm].is_a?(PM)
24
+ pm = parameters.delete(:pm)
25
+ motif_type = pm.class.name.downcase.sub(/^.+::/,'').to_sym
26
+ self.original_data_model = motif_type
27
+ set_parameters(motif_type => pm)
28
+ end
29
+ set_parameters(parameters)
30
+ else
31
+ raise ArgumentError, "Motif::new doesn't accept argument #{parameters} of class #{parameters.class}"
32
+ end
33
+ end
34
+
35
+ def pm; ((original_data_model || :pm) == :pm) ? parameters.pm : send(original_data_model); end
36
+ #def pcm; parameters.pcm; end
37
+ def pwm; parameters.pwm || pcm.try(:to_pwm); end
38
+ def ppm; parameters.ppm || pcm.try(:to_ppm); end
39
+ #def pcm=(pcm); parameters.pcm = pcm; end
40
+ #def pwm=(pwm); parameters.pwm = pwm; end
41
+ #def ppm=(ppm); parameters.ppm = ppm; end
42
+ def name; parameters.name || pm.name; end
43
+
44
+ def method_missing(meth, *args)
45
+ parameters.__send__(meth, *args)
46
+ end
47
+
48
+ def ==(other)
49
+ parameters == other.parameters
50
+ end
51
+
52
+ def to_s
53
+ parameters.to_s
54
+ end
55
+ end
56
56
  end
@@ -1,24 +1,24 @@
1
- require_relative '../support'
2
- require_relative '../data_models'
3
- require_relative '../conversion_algorithms/pcm2ppm_converter'
4
- require_relative '../conversion_algorithms/pcm2pwm_converter'
5
-
6
- module Bioinform
7
- class PCM < PM
8
- def count
9
- matrix.first.inject(&:+)
10
- end
11
-
12
- def to_pcm
13
- self
14
- end
15
-
16
- def to_pwm(pseudocount = Math.log(count))
17
- ConversionAlgorithms::PCM2PWMConverter.convert(self, pseudocount: pseudocount)
18
- end
19
-
20
- def to_ppm
21
- ConversionAlgorithms::PCM2PPMConverter.convert(self)
22
- end
23
- end
1
+ require_relative '../support'
2
+ require_relative '../data_models'
3
+ require_relative '../conversion_algorithms/pcm2ppm_converter'
4
+ require_relative '../conversion_algorithms/pcm2pwm_converter'
5
+
6
+ module Bioinform
7
+ class PCM < PM
8
+ def count
9
+ matrix.first.inject(&:+)
10
+ end
11
+
12
+ def to_pcm
13
+ self
14
+ end
15
+
16
+ def to_pwm(pseudocount = Math.log(count))
17
+ ConversionAlgorithms::PCM2PWMConverter.convert(self, pseudocount: pseudocount)
18
+ end
19
+
20
+ def to_ppm
21
+ ConversionAlgorithms::PCM2PPMConverter.convert(self)
22
+ end
23
+ end
24
24
  end
@@ -1,170 +1,170 @@
1
- require 'ostruct'
2
- require_relative '../support'
3
- require_relative '../parsers'
4
- require_relative '../formatters'
5
-
6
- module Bioinform
7
- IndexByLetter = { 'A' => 0, 'C' => 1, 'G' => 2, 'T' => 3, A: 0, C: 1, G: 2, T: 3,
8
- 'a' => 0, 'c' => 1, 'g' => 2, 't' => 3, a: 0, c: 1, g: 2, t: 3}
9
- LetterByIndex = {0 => :A, 1 => :C, 2 => :G, 3 => :T}
10
-
11
- class PM
12
- attr_accessor :matrix, :parameters
13
-
14
- include Parameters
15
- make_parameters :name, :background # , :tags
16
-
17
- # def mark(tag)
18
- # tags << tag
19
- # end
20
-
21
- # def tagged?(tag)
22
- # tags.any?{|t| (t.eql? tag) || (t.respond_to?(:name) && t.name && (t.name == tag)) }
23
- # end
24
-
25
- def self.choose_parser(input)
26
- [TrivialParser, YAMLParser, Parser, StringParser, StringFantomParser, JasparParser, TrivialCollectionParser, YAMLCollectionParser].find do |parser|
27
- self.new(input, parser) rescue nil
28
- end
29
- end
30
-
31
- def self.split_on_motifs(input)
32
- parser = choose_parser(input)
33
- raise ParsingError, "No parser can parse given input" unless parser
34
- parser.split_on_motifs(input, self)
35
- end
36
-
37
- def initialize(input, parser = nil)
38
- @parameters = OpenStruct.new
39
- parser ||= self.class.choose_parser(input)
40
- raise 'No one parser can process input' unless parser
41
- result = parser.new(input).parse
42
- @matrix = result.matrix
43
- self.name = result.name
44
- # self.tags = result.tags || []
45
- self.background = result.background || [1, 1, 1, 1]
46
- raise 'matrix not valid' unless valid?
47
- end
48
-
49
- def ==(other)
50
- @matrix == other.matrix && background == other.background && name == other.name
51
- rescue
52
- false
53
- end
54
-
55
- def self.valid_matrix?(matrix)
56
- matrix.is_a?(Array) &&
57
- ! matrix.empty? &&
58
- matrix.all?{|pos| pos.is_a?(Array)} &&
59
- matrix.all?{|pos| pos.size == 4} &&
60
- matrix.all?{|pos| pos.all?{|el| el.is_a?(Numeric)}}
61
- rescue
62
- false
63
- end
64
-
65
- def valid?
66
- self.class.valid_matrix?(@matrix)
67
- end
68
-
69
- def each_position
70
- if block_given?
71
- matrix.each{|pos| yield pos}
72
- else
73
- self.to_enum(:each_position)
74
- end
75
- end
76
-
77
- def length
78
- @matrix.length
79
- end
80
- alias_method :size, :length
81
-
82
- def to_s(options = {}, formatter = RawFormatter)
83
- formatter.new(self, options).to_s
84
- end
85
-
86
- def pretty_string(options = {})
87
- default_options = {with_name: true, letters_as_rows: false}
88
-
89
- return to_s(options) if options[:letters_as_rows]
90
-
91
- options = default_options.merge(options)
92
- header = %w{A C G T}.map{|el| el.rjust(4).ljust(7)}.join + "\n"
93
- matrix_rows = each_position.map do |position|
94
- position.map{|el| el.round(3).to_s.rjust(6)}.join(' ')
95
- end
96
-
97
- matrix_str = matrix_rows.join("\n")
98
-
99
- if options[:with_name] && name
100
- name + "\n" + header + matrix_str
101
- else
102
- header + matrix_str
103
- end
104
- end
105
-
106
- def to_hash
107
- hsh = %w{A C G T}.each_with_index.collect_hash do |letter, letter_index|
108
- [ letter, @matrix.map{|pos| pos[letter_index]} ]
109
- end
110
- hsh.with_indifferent_access
111
- end
112
-
113
- def self.zero_column
114
- [0, 0, 0, 0]
115
- end
116
-
117
- def reverse_complement!
118
- @matrix.reverse!.map!(&:reverse!)
119
- self
120
- end
121
- def left_augment!(n)
122
- n.times{ @matrix.unshift(self.class.zero_column) }
123
- self
124
- end
125
- def right_augment!(n)
126
- n.times{ @matrix.push(self.class.zero_column) }
127
- self
128
- end
129
-
130
- def discrete!(rate)
131
- @matrix.map!{|position| position.map{|element| (element * rate).ceil}}
132
- self
133
- end
134
-
135
- def vocabulary_volume
136
- background.inject(&:+) ** length
137
- end
138
-
139
- def probability
140
- sum = background.inject(0.0, &:+)
141
- background.map{|element| element.to_f / sum}
142
- end
143
-
144
- def reverse_complement
145
- dup.reverse_complement!
146
- end
147
- def left_augment(n)
148
- dup.left_augment!(n)
149
- end
150
- def right_augment(n)
151
- dup.right_augment!(n)
152
- end
153
- def discrete(rate)
154
- dup.discrete!(rate)
155
- end
156
- def dup
157
- deep_dup
158
- end
159
-
160
- def as_pcm
161
- PCM.new(get_parameters.merge(matrix: matrix))
162
- end
163
- def as_ppm
164
- PPM.new(get_parameters.merge(matrix: matrix))
165
- end
166
- def as_pwm
167
- PWM.new(get_parameters.merge(matrix: matrix))
168
- end
169
- end
1
+ require 'ostruct'
2
+ require_relative '../support'
3
+ require_relative '../parsers'
4
+ require_relative '../formatters'
5
+
6
+ module Bioinform
7
+ IndexByLetter = { 'A' => 0, 'C' => 1, 'G' => 2, 'T' => 3, A: 0, C: 1, G: 2, T: 3,
8
+ 'a' => 0, 'c' => 1, 'g' => 2, 't' => 3, a: 0, c: 1, g: 2, t: 3}
9
+ LetterByIndex = {0 => :A, 1 => :C, 2 => :G, 3 => :T}
10
+
11
+ class PM
12
+ attr_accessor :matrix, :parameters
13
+
14
+ include Parameters
15
+ make_parameters :name, :background # , :tags
16
+
17
+ # def mark(tag)
18
+ # tags << tag
19
+ # end
20
+
21
+ # def tagged?(tag)
22
+ # tags.any?{|t| (t.eql? tag) || (t.respond_to?(:name) && t.name && (t.name == tag)) }
23
+ # end
24
+
25
+ def self.choose_parser(input)
26
+ [TrivialParser, YAMLParser, Parser, StringParser, StringFantomParser, JasparParser, TrivialCollectionParser, YAMLCollectionParser].find do |parser|
27
+ self.new(input, parser) rescue nil
28
+ end
29
+ end
30
+
31
+ def self.split_on_motifs(input)
32
+ parser = choose_parser(input)
33
+ raise ParsingError, "No parser can parse given input" unless parser
34
+ parser.split_on_motifs(input, self)
35
+ end
36
+
37
+ def initialize(input, parser = nil)
38
+ @parameters = OpenStruct.new
39
+ parser ||= self.class.choose_parser(input)
40
+ raise 'No one parser can process input' unless parser
41
+ result = parser.new(input).parse
42
+ @matrix = result.matrix
43
+ self.name = result.name
44
+ # self.tags = result.tags || []
45
+ self.background = result.background || [1, 1, 1, 1]
46
+ raise 'matrix not valid' unless valid?
47
+ end
48
+
49
+ def ==(other)
50
+ @matrix == other.matrix && background == other.background && name == other.name
51
+ rescue
52
+ false
53
+ end
54
+
55
+ def self.valid_matrix?(matrix)
56
+ matrix.is_a?(Array) &&
57
+ ! matrix.empty? &&
58
+ matrix.all?{|pos| pos.is_a?(Array)} &&
59
+ matrix.all?{|pos| pos.size == 4} &&
60
+ matrix.all?{|pos| pos.all?{|el| el.is_a?(Numeric)}}
61
+ rescue
62
+ false
63
+ end
64
+
65
+ def valid?
66
+ self.class.valid_matrix?(@matrix)
67
+ end
68
+
69
+ def each_position
70
+ if block_given?
71
+ matrix.each{|pos| yield pos}
72
+ else
73
+ self.to_enum(:each_position)
74
+ end
75
+ end
76
+
77
+ def length
78
+ @matrix.length
79
+ end
80
+ alias_method :size, :length
81
+
82
+ def to_s(options = {}, formatter = RawFormatter)
83
+ formatter.new(self, options).to_s
84
+ end
85
+
86
+ def pretty_string(options = {})
87
+ default_options = {with_name: true, letters_as_rows: false}
88
+
89
+ return to_s(options) if options[:letters_as_rows]
90
+
91
+ options = default_options.merge(options)
92
+ header = %w{A C G T}.map{|el| el.rjust(4).ljust(7)}.join + "\n"
93
+ matrix_rows = each_position.map do |position|
94
+ position.map{|el| el.round(3).to_s.rjust(6)}.join(' ')
95
+ end
96
+
97
+ matrix_str = matrix_rows.join("\n")
98
+
99
+ if options[:with_name] && name
100
+ name + "\n" + header + matrix_str
101
+ else
102
+ header + matrix_str
103
+ end
104
+ end
105
+
106
+ def to_hash
107
+ hsh = %w{A C G T}.each_with_index.collect_hash do |letter, letter_index|
108
+ [ letter, @matrix.map{|pos| pos[letter_index]} ]
109
+ end
110
+ hsh.with_indifferent_access
111
+ end
112
+
113
+ def self.zero_column
114
+ [0, 0, 0, 0]
115
+ end
116
+
117
+ def reverse_complement!
118
+ @matrix.reverse!.map!(&:reverse!)
119
+ self
120
+ end
121
+ def left_augment!(n)
122
+ n.times{ @matrix.unshift(self.class.zero_column) }
123
+ self
124
+ end
125
+ def right_augment!(n)
126
+ n.times{ @matrix.push(self.class.zero_column) }
127
+ self
128
+ end
129
+
130
+ def discrete!(rate)
131
+ @matrix.map!{|position| position.map{|element| (element * rate).ceil}}
132
+ self
133
+ end
134
+
135
+ def vocabulary_volume
136
+ background.inject(&:+) ** length
137
+ end
138
+
139
+ def probability
140
+ sum = background.inject(0.0, &:+)
141
+ background.map{|element| element.to_f / sum}
142
+ end
143
+
144
+ def reverse_complement
145
+ dup.reverse_complement!
146
+ end
147
+ def left_augment(n)
148
+ dup.left_augment!(n)
149
+ end
150
+ def right_augment(n)
151
+ dup.right_augment!(n)
152
+ end
153
+ def discrete(rate)
154
+ dup.discrete!(rate)
155
+ end
156
+ def dup
157
+ deep_dup
158
+ end
159
+
160
+ def as_pcm
161
+ PCM.new(get_parameters.merge(matrix: matrix))
162
+ end
163
+ def as_ppm
164
+ PPM.new(get_parameters.merge(matrix: matrix))
165
+ end
166
+ def as_pwm
167
+ PWM.new(get_parameters.merge(matrix: matrix))
168
+ end
169
+ end
170
170
  end
@@ -1,10 +1,10 @@
1
- require_relative '../support'
2
- require_relative '../data_models'
3
-
4
- module Bioinform
5
- class PPM < PM
6
- def to_ppm
7
- self
8
- end
9
- end
1
+ require_relative '../support'
2
+ require_relative '../data_models'
3
+
4
+ module Bioinform
5
+ class PPM < PM
6
+ def to_ppm
7
+ self
8
+ end
9
+ end
10
10
  end
@@ -1,56 +1,56 @@
1
- require_relative '../support'
2
- require_relative '../data_models'
3
- module Bioinform
4
- class PWM < PM
5
- def score_mean
6
- each_position.inject(0){ |mean, position| mean + position.each_index.inject(0){|sum, letter| sum + position[letter] * probability[letter]} }
7
- end
8
- def score_variance
9
- each_position.inject(0) do |variance, position|
10
- variance + position.each_index.inject(0) { |sum,letter| sum + position[letter]**2 * probability[letter] } -
11
- position.each_index.inject(0) { |sum,letter| sum + position[letter] * probability[letter] }**2
12
- end
13
- end
14
-
15
- def threshold_gauss_estimation(pvalue)
16
- sigma = Math.sqrt(score_variance)
17
- n_ = Math.inverf(1 - 2 * pvalue) * Math.sqrt(2)
18
- score_mean + n_ * sigma
19
- end
20
-
21
- def score(word)
22
- raise ArgumentError, 'word in PWM#score(word) should have the same length as matrix' unless word.length == length
23
- #raise ArgumentError, 'word in PWM#score(word) should have only ACGT-letters' unless word.each_char.all?{|letter| %w{A C G T}.include? letter}
24
- (0...length).map do |pos|
25
- letter = word[pos]
26
- if IndexByLetter[letter]
27
- matrix[pos][IndexByLetter[letter]]
28
- elsif letter == 'N'
29
- matrix[pos].zip(probability).map{|el, p| el * p}.inject(0, &:+)
30
- else
31
- raise ArgumentError, "word in PWM#score(#{word}) should have only ACGT or N letters"
32
- end
33
- end.inject(0, &:+).to_f
34
- end
35
-
36
- def to_pwm
37
- self
38
- end
39
-
40
- def best_score
41
- best_suffix(0)
42
- end
43
- def worst_score
44
- worst_suffix(0)
45
- end
46
-
47
- # best score of suffix s[i..l]
48
- def best_suffix(i)
49
- @matrix[i...length].map(&:max).inject(0.0, &:+)
50
- end
51
-
52
- def worst_suffix(i)
53
- @matrix[i...length].map(&:min).inject(0.0, &:+)
54
- end
55
- end
1
+ require_relative '../support'
2
+ require_relative '../data_models'
3
+ module Bioinform
4
+ class PWM < PM
5
+ def score_mean
6
+ each_position.inject(0){ |mean, position| mean + position.each_index.inject(0){|sum, letter| sum + position[letter] * probability[letter]} }
7
+ end
8
+ def score_variance
9
+ each_position.inject(0) do |variance, position|
10
+ variance + position.each_index.inject(0) { |sum,letter| sum + position[letter]**2 * probability[letter] } -
11
+ position.each_index.inject(0) { |sum,letter| sum + position[letter] * probability[letter] }**2
12
+ end
13
+ end
14
+
15
+ def threshold_gauss_estimation(pvalue)
16
+ sigma = Math.sqrt(score_variance)
17
+ n_ = Math.inverf(1 - 2 * pvalue) * Math.sqrt(2)
18
+ score_mean + n_ * sigma
19
+ end
20
+
21
+ def score(word)
22
+ raise ArgumentError, 'word in PWM#score(word) should have the same length as matrix' unless word.length == length
23
+ #raise ArgumentError, 'word in PWM#score(word) should have only ACGT-letters' unless word.each_char.all?{|letter| %w{A C G T}.include? letter}
24
+ (0...length).map do |pos|
25
+ letter = word[pos]
26
+ if IndexByLetter[letter]
27
+ matrix[pos][IndexByLetter[letter]]
28
+ elsif letter == 'N'
29
+ matrix[pos].zip(probability).map{|el, p| el * p}.inject(0, &:+)
30
+ else
31
+ raise ArgumentError, "word in PWM#score(#{word}) should have only ACGT or N letters"
32
+ end
33
+ end.inject(0, &:+).to_f
34
+ end
35
+
36
+ def to_pwm
37
+ self
38
+ end
39
+
40
+ def best_score
41
+ best_suffix(0)
42
+ end
43
+ def worst_score
44
+ worst_suffix(0)
45
+ end
46
+
47
+ # best score of suffix s[i..l]
48
+ def best_suffix(i)
49
+ @matrix[i...length].map(&:max).inject(0.0, &:+)
50
+ end
51
+
52
+ def worst_suffix(i)
53
+ @matrix[i...length].map(&:min).inject(0.0, &:+)
54
+ end
55
+ end
56
56
  end
@@ -1,11 +1,11 @@
1
- require_relative 'parsers'
2
-
3
- require_relative 'data_models/pm'
4
- require_relative 'data_models/pcm'
5
- require_relative 'data_models/ppm'
6
- require_relative 'data_models/pwm'
7
-
8
- require_relative 'data_models/collection'
9
-
10
- #require_relative 'bioinform/data_models/iupac_word'
1
+ require_relative 'parsers'
2
+
3
+ require_relative 'data_models/pm'
4
+ require_relative 'data_models/pcm'
5
+ require_relative 'data_models/ppm'
6
+ require_relative 'data_models/pwm'
7
+
8
+ require_relative 'data_models/collection'
9
+
10
+ #require_relative 'bioinform/data_models/iupac_word'
11
11
  #require_relative 'bioinform/data_models/iupac_wordset'