macroape 4.0.2 → 4.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (74) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +17 -17
  3. data/Gemfile +4 -4
  4. data/LICENSE +22 -22
  5. data/README.md +70 -70
  6. data/Rakefile.rb +49 -49
  7. data/TODO.txt +46 -46
  8. data/benchmark/benchmark_helper.rb +4 -4
  9. data/benchmark/similarity_benchmark.rb +52 -52
  10. data/bin/align_motifs +4 -4
  11. data/bin/eval_alignment +4 -4
  12. data/bin/eval_similarity +4 -4
  13. data/bin/find_pvalue +4 -4
  14. data/bin/find_threshold +4 -4
  15. data/bin/preprocess_collection +4 -4
  16. data/bin/scan_collection +4 -4
  17. data/lib/macroape.rb +14 -11
  18. data/lib/macroape/aligned_pair_intersection.rb +61 -62
  19. data/lib/macroape/cli.rb +191 -188
  20. data/lib/macroape/cli/align_motifs.rb +120 -100
  21. data/lib/macroape/cli/eval_alignment.rb +157 -156
  22. data/lib/macroape/cli/eval_similarity.rb +138 -137
  23. data/lib/macroape/cli/find_pvalue.rb +93 -87
  24. data/lib/macroape/cli/find_threshold.rb +103 -96
  25. data/lib/macroape/cli/preprocess_collection.rb +169 -161
  26. data/lib/macroape/cli/scan_collection.rb +171 -163
  27. data/lib/macroape/collection.rb +29 -0
  28. data/lib/macroape/motif_with_thresholds.rb +18 -0
  29. data/lib/macroape/pwm_compare.rb +39 -44
  30. data/lib/macroape/pwm_compare_aligned.rb +139 -130
  31. data/lib/macroape/{counting.rb → pwm_counting.rb} +175 -121
  32. data/lib/macroape/support/inverf.rb +13 -0
  33. data/lib/macroape/support/partial_sums.rb +17 -0
  34. data/lib/macroape/version.rb +4 -4
  35. data/macroape.gemspec +19 -19
  36. data/spec/count_distribution_spec.rb +112 -109
  37. data/spec/inverf_spec.rb +23 -0
  38. data/spec/partial_sums_spec.rb +28 -0
  39. data/spec/spec_helper.rb +11 -11
  40. data/test/align_motifs_test.rb +42 -43
  41. data/test/data/AHR_si.pwm +10 -10
  42. data/test/data/KLF3_f1.pcm +16 -16
  43. data/test/data/KLF3_f1.pwm +16 -16
  44. data/test/data/KLF4_f2.pcm +11 -11
  45. data/test/data/KLF4_f2.pwm +11 -11
  46. data/test/data/KLF4_f2_scan_results_all.txt +2 -2
  47. data/test/data/KLF4_f2_scan_results_default_cutoff.txt +1 -1
  48. data/test/data/KLF4_f2_scan_results_precise_mode.txt +2 -2
  49. data/test/data/SP1_f1.pcm +12 -12
  50. data/test/data/SP1_f1.pwm +12 -12
  51. data/test/data/SP1_f1_revcomp.pcm +12 -12
  52. data/test/data/SP1_f1_revcomp.pwm +12 -12
  53. data/test/data/medium_motif.pwm +8 -8
  54. data/test/data/short_motif.pwm +7 -7
  55. data/test/data/test_collection.yaml +231 -214
  56. data/test/data/test_collection/GABPA_f1.pwm +14 -14
  57. data/test/data/test_collection/KLF4_f2.pwm +10 -10
  58. data/test/data/test_collection/SP1_f1.pwm +12 -12
  59. data/test/data/test_collection_pcm/GABPA_f1.pcm +14 -14
  60. data/test/data/test_collection_pcm/KLF4_f2.pcm +11 -11
  61. data/test/data/test_collection_pcm/SP1_f1.pcm +12 -12
  62. data/test/data/test_collection_single_file.txt +38 -38
  63. data/test/data/test_collection_single_file_pcm.txt +37 -37
  64. data/test/data/test_collection_weak.yaml +231 -214
  65. data/test/eval_alignment_test.rb +90 -111
  66. data/test/eval_similarity_test.rb +105 -123
  67. data/test/find_pvalue_test.rb +34 -39
  68. data/test/find_threshold_test.rb +87 -91
  69. data/test/preprocess_collection_test.rb +56 -65
  70. data/test/scan_collection_test.rb +42 -48
  71. data/test/test_helper.rb +159 -160
  72. metadata +14 -10
  73. data/test/data/collection_pcm_without_thresholds.yaml +0 -188
  74. data/test/data/collection_without_thresholds.yaml +0 -188
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: macroape
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.0.2
4
+ version: 4.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ilya Vorontsov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-09-30 00:00:00.000000000 Z
11
+ date: 2014-07-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bioinform
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: 0.1.10
19
+ version: 0.2.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: 0.1.10
26
+ version: 0.2.0
27
27
  description: Macroape is an abbreviation for MAtrix CompaRisOn by Approximate P-value
28
28
  Estimation. It's a bioinformatic tool for evaluating similarity measure and best
29
29
  alignment between a pair of Position Weight Matrices(PWM), finding thresholds by
@@ -67,12 +67,18 @@ files:
67
67
  - lib/macroape/cli/find_threshold.rb
68
68
  - lib/macroape/cli/preprocess_collection.rb
69
69
  - lib/macroape/cli/scan_collection.rb
70
- - lib/macroape/counting.rb
70
+ - lib/macroape/collection.rb
71
+ - lib/macroape/motif_with_thresholds.rb
71
72
  - lib/macroape/pwm_compare.rb
72
73
  - lib/macroape/pwm_compare_aligned.rb
74
+ - lib/macroape/pwm_counting.rb
75
+ - lib/macroape/support/inverf.rb
76
+ - lib/macroape/support/partial_sums.rb
73
77
  - lib/macroape/version.rb
74
78
  - macroape.gemspec
75
79
  - spec/count_distribution_spec.rb
80
+ - spec/inverf_spec.rb
81
+ - spec/partial_sums_spec.rb
76
82
  - spec/spec_helper.rb
77
83
  - test/align_motifs_test.rb
78
84
  - test/data/AHR_si.pwm
@@ -88,8 +94,6 @@ files:
88
94
  - test/data/SP1_f1.pwm
89
95
  - test/data/SP1_f1_revcomp.pcm
90
96
  - test/data/SP1_f1_revcomp.pwm
91
- - test/data/collection_pcm_without_thresholds.yaml
92
- - test/data/collection_without_thresholds.yaml
93
97
  - test/data/medium_motif.pwm
94
98
  - test/data/short_motif.pwm
95
99
  - test/data/test_collection.yaml
@@ -128,12 +132,14 @@ required_rubygems_version: !ruby/object:Gem::Requirement
128
132
  version: '0'
129
133
  requirements: []
130
134
  rubyforge_project:
131
- rubygems_version: 2.1.5
135
+ rubygems_version: 2.3.0
132
136
  signing_key:
133
137
  specification_version: 4
134
138
  summary: PWM comparison tool using MACROAPE approach
135
139
  test_files:
136
140
  - spec/count_distribution_spec.rb
141
+ - spec/inverf_spec.rb
142
+ - spec/partial_sums_spec.rb
137
143
  - spec/spec_helper.rb
138
144
  - test/align_motifs_test.rb
139
145
  - test/data/AHR_si.pwm
@@ -149,8 +155,6 @@ test_files:
149
155
  - test/data/SP1_f1.pwm
150
156
  - test/data/SP1_f1_revcomp.pcm
151
157
  - test/data/SP1_f1_revcomp.pwm
152
- - test/data/collection_pcm_without_thresholds.yaml
153
- - test/data/collection_without_thresholds.yaml
154
158
  - test/data/medium_motif.pwm
155
159
  - test/data/short_motif.pwm
156
160
  - test/data/test_collection.yaml
@@ -1,188 +0,0 @@
1
- --- !ruby/object:Bioinform::Collection
2
- container:
3
- - !ruby/object:Bioinform::Motif
4
- parameters: !ruby/object:OpenStruct
5
- table:
6
- :original_data_model: :pcm
7
- :pcm: !ruby/object:Bioinform::PCM
8
- parameters: !ruby/object:OpenStruct
9
- table:
10
- :name: GABPA_f1
11
- :background:
12
- - 1
13
- - 1
14
- - 1
15
- - 1
16
- modifiable: true
17
- matrix:
18
- - - 615.2572649050138
19
- - 697.0698715160123
20
- - 1261.1903440712872
21
- - 176.43506582414153
22
- - - 996.4929869323321
23
- - 805.1878697364007
24
- - 693.7695793644275
25
- - 254.5021102832924
26
- - - 1106.9888035794224
27
- - 508.19444415177276
28
- - 1029.8329748714536
29
- - 104.93632371380718
30
- - - 143.7121486195701
31
- - 2086.4279160661263
32
- - 518.37507049306
33
- - 1.4374111377025893
34
- - - 362.9541452731307
35
- - 2369.473894845734
36
- - 17.23702397004065
37
- - 0.2874822275405179
38
- - - 0.0
39
- - 0.0
40
- - 2749.952546316428
41
- - 0.0
42
- - - 0.0
43
- - 0.0
44
- - 2749.952546316428
45
- - 0.0
46
- - - 2748.2567506938462
47
- - 1.695795622582083
48
- - 0.0
49
- - 0.0
50
- - - 2726.6484322711017
51
- - 1.1499289101620715
52
- - 1.1499289101620715
53
- - 21.00425622500253
54
- - - 202.05697400573305
55
- - 28.799402471063658
56
- - 2518.808687612104
57
- - 0.2874822275405179
58
- - - 172.92889618879767
59
- - 521.1240363384483
60
- - 106.38197600987633
61
- - 1949.517637779338
62
- - - 398.1679460365911
63
- - 424.20938204069563
64
- - 1706.4024212088275
65
- - 221.17279703034018
66
- - - 764.2587933951809
67
- - 675.0883944902433
68
- - 1066.5413633225007
69
- - 244.06399510852864
70
- modifiable: true
71
- - !ruby/object:Bioinform::Motif
72
- parameters: !ruby/object:OpenStruct
73
- table:
74
- :original_data_model: :pcm
75
- :pcm: !ruby/object:Bioinform::PCM
76
- parameters: !ruby/object:OpenStruct
77
- table:
78
- :name: KLF4_f2
79
- :background:
80
- - 1
81
- - 1
82
- - 1
83
- - 1
84
- modifiable: true
85
- matrix:
86
- - - 1233.46088405354
87
- - 93.18173277811673
88
- - 1036.6014857092885
89
- - 1258.2948629970272
90
- - - 263.979242343185
91
- - 5.314520555872139
92
- - 3347.5949971525274
93
- - 4.650205486388122
94
- - - 76.7700780003465
95
- - 6.643150694840173
96
- - 3529.4896409394937
97
- - 8.636095903292224
98
- - - 57.86097393406657
99
- - 18.102585643439472
100
- - 3520.3342027139347
101
- - 25.24120324653207
102
- - - 518.1947904009378
103
- - 1545.9062946905135
104
- - 22.396758181071043
105
- - 1535.0411222654507
106
- - - 137.98151691820345
107
- - 9.300410972776241
108
- - 3456.320530770924
109
- - 17.936506876068467
110
- - - 115.27647661640499
111
- - 81.51802997128804
112
- - 1861.9425868567278
113
- - 1562.801872093553
114
- - - 227.8095486111286
115
- - 42.84555258785854
116
- - 3278.6396005325996
117
- - 72.244263806387
118
- - - 108.73384179997886
119
- - 134.47328134862394
120
- - 3162.880454846513
121
- - 215.45138754285665
122
- - - 238.49636899561344
123
- - 2225.9561104691043
124
- - 402.40727964384774
125
- - 754.6792064294074
126
- modifiable: true
127
- - !ruby/object:Bioinform::Motif
128
- parameters: !ruby/object:OpenStruct
129
- table:
130
- :original_data_model: :pcm
131
- :pcm: !ruby/object:Bioinform::PCM
132
- parameters: !ruby/object:OpenStruct
133
- table:
134
- :name: SP1_f1
135
- :background:
136
- - 1
137
- - 1
138
- - 1
139
- - 1
140
- modifiable: true
141
- matrix:
142
- - - 682.6436366358055
143
- - 443.1455214015781
144
- - 2075.655346294993
145
- - 287.211468117951
146
- - - 299.8883246804867
147
- - 103.74338315843572
148
- - 2613.8927022405364
149
- - 471.1315623708902
150
- - - 591.4892493324709
151
- - 42.631827541794564
152
- - 2845.1654083148564
153
- - 9.36948726124641
154
- - - 7.071084742361592
155
- - 45.29093411231232
156
- - 3432.8847704374107
157
- - 3.409183158303573
158
- - - 91.308984085713
159
- - 19.1536481364332
160
- - 3373.656949880137
161
- - 4.5363903481026
162
- - - 809.2082973387932
163
- - 2246.941954176211
164
- - 61.30766021687515
165
- - 371.19806071846244
166
- - - 120.56476435866055
167
- - 42.4349244403591
168
- - 3242.1560628684038
169
- - 83.50022078295852
170
- - - 13.72524477409959
171
- - 35.858220519297525
172
- - 3332.4066864946167
173
- - 106.66582066236779
174
- - - 558.1188080161639
175
- - 90.0084504200356
176
- - 2694.854973210736
177
- - 145.67374080342415
178
- - - 264.0088462230318
179
- - 254.7175868081866
180
- - 2796.88087480315
181
- - 173.0486646159857
182
- - - 519.46013914282
183
- - 1874.9349086474765
184
- - 654.5411208373813
185
- - 439.7198038226514
186
- modifiable: true
187
- parameters: !ruby/object:OpenStruct
188
- table: {}
@@ -1,188 +0,0 @@
1
- --- !ruby/object:Bioinform::Collection
2
- container:
3
- - !ruby/object:Bioinform::Motif
4
- parameters: !ruby/object:OpenStruct
5
- table:
6
- :original_data_model: :pwm
7
- :pwm: !ruby/object:Bioinform::PWM
8
- parameters: !ruby/object:OpenStruct
9
- table:
10
- :name: GABPA_f1
11
- :background:
12
- - 1
13
- - 1
14
- - 1
15
- - 1
16
- modifiable: true
17
- matrix:
18
- - - -0.1106670158341858
19
- - 0.013801606113892391
20
- - 0.6054596108973699
21
- - -1.3518085041421573
22
- - - 0.37030668921643345
23
- - 0.15761121480429963
24
- - 0.009069314183831202
25
- - -0.9888619717703562
26
- - - 0.47526546359546684
27
- - -0.3011678534572083
28
- - 0.4031522994412777
29
- - -1.8638752827041059
30
- - - -1.5544255540164373
31
- - 1.1082369687811506
32
- - -0.2814091552834454
33
- - -5.30708531823271
34
- - - -0.6362037835776368
35
- - 1.235338189985594
36
- - -3.5801322928552253
37
- - -5.717323067092849
38
- - - -5.852906870733575
39
- - -5.852906870733575
40
- - 1.3841383838057746
41
- - -5.852906870733575
42
- - - -5.852906870733575
43
- - -5.852906870733575
44
- - 1.3841383838057746
45
- - -5.852906870733575
46
- - - 1.3835219739184708
47
- - -5.2341956006430985
48
- - -5.852906870733575
49
- - -5.852906870733575
50
- - - 1.3756340514956562
51
- - -5.394962755562375
52
- - -5.394962755562375
53
- - -3.401117964959733
54
- - - -1.2176198315414444
55
- - -3.109079898175411
56
- - 1.2964067931472216
57
- - -5.717323067092849
58
- - - -1.3716559438167257
59
- - -0.2761401935045069
60
- - -1.8504445165866068
61
- - 1.0404320473626856
62
- - - -0.5440863133031895
63
- - -0.48103682561971345
64
- - 0.907381908447086
65
- - -1.1280642594012078
66
- - - 0.10557340209290218
67
- - -0.01814819455289191
68
- - 0.4381106695354074
69
- - -1.0304105539540915
70
- modifiable: true
71
- - !ruby/object:Bioinform::Motif
72
- parameters: !ruby/object:OpenStruct
73
- table:
74
- :original_data_model: :pwm
75
- :pwm: !ruby/object:Bioinform::PWM
76
- parameters: !ruby/object:OpenStruct
77
- table:
78
- :name: KLF4_f2
79
- :background:
80
- - 1
81
- - 1
82
- - 1
83
- - 1
84
- modifiable: true
85
- matrix:
86
- - - 0.30861857265872605
87
- - -2.254321000121579
88
- - 0.13505703522674192
89
- - 0.3285194224375633
90
- - - -1.227018967707036
91
- - -4.814127713368663
92
- - 1.3059890687390967
93
- - -4.908681463544344
94
- - - -2.443469374521196
95
- - -4.648238485031404
96
- - 1.3588686548279805
97
- - -4.441801801188402
98
- - - -2.7177827948276123
99
- - -3.8073538975356565
100
- - 1.356272809724262
101
- - -3.504104725510225
102
- - - -0.5563232977367343
103
- - 0.5340697765121405
104
- - -3.61417723090579
105
- - 0.5270259776377405
106
- - - -1.8687622060887386
107
- - -4.381483976582316
108
- - 1.337932245336098
109
- - -3.815629658877517
110
- - - -2.045671123823928
111
- - -2.384975142213679
112
- - 0.7198551207724355
113
- - 0.5449254135616948
114
- - - -1.373157530374372
115
- - -3.0063112097748217
116
- - 1.285188335493552
117
- - -2.5026044231773543
118
- - - -2.1030513122772208
119
- - -1.8941348100402244
120
- - 1.249265758393991
121
- - -1.4284210948906104
122
- - - -1.3277128628152939
123
- - 0.8982415633049462
124
- - -0.8080773665408135
125
- - -0.18161647647456935
126
- modifiable: true
127
- - !ruby/object:Bioinform::Motif
128
- parameters: !ruby/object:OpenStruct
129
- table:
130
- :original_data_model: :pwm
131
- :pwm: !ruby/object:Bioinform::PWM
132
- parameters: !ruby/object:OpenStruct
133
- table:
134
- :name: SP1_f1
135
- :background:
136
- - 1
137
- - 1
138
- - 1
139
- - 1
140
- modifiable: true
141
- matrix:
142
- - - -0.24435707885585292
143
- - -0.674823404693731
144
- - 0.8657012535789866
145
- - -1.1060188862599287
146
- - - -1.0631255752097797
147
- - -2.111925969423868
148
- - 1.0960627561110403
149
- - -0.6138563775211977
150
- - - -0.3872276234760535
151
- - -2.9739851913218045
152
- - 1.1807800242010378
153
- - -4.338927525031566
154
- - - -4.563896055436894
155
- - -2.9161633002532277
156
- - 1.3684371349982638
157
- - -5.077972423609655
158
- - - -2.2369752892820083
159
- - -3.7196436313301846
160
- - 1.3510439136452734
161
- - -4.889930670508233
162
- - - -0.07473964149330865
163
- - 0.944919654762011
164
- - -2.6246857648086044
165
- - -0.8510983487822436
166
- - - -1.9643526491643322
167
- - -2.978402770880115
168
- - 1.3113096718240573
169
- - -2.324334259499025
170
- - - -4.0155484139655835
171
- - -3.1384268078096667
172
- - 1.3387488589788057
173
- - -2.084673903537648
174
- - - -0.44509385828355363
175
- - -2.2510053061629702
176
- - 1.1265431574368685
177
- - -1.7780413702431372
178
- - - -1.1896356092245048
179
- - -1.2251832285630027
180
- - 1.1636760063747527
181
- - -1.6080243648157353
182
- - - -0.5166047365590571
183
- - 0.7641033353626657
184
- - -0.2862677570028208
185
- - -0.68254820978656
186
- modifiable: true
187
- parameters: !ruby/object:OpenStruct
188
- table: {}