macroape 4.0.2 → 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +17 -17
  3. data/Gemfile +4 -4
  4. data/LICENSE +22 -22
  5. data/README.md +70 -70
  6. data/Rakefile.rb +49 -49
  7. data/TODO.txt +46 -46
  8. data/benchmark/benchmark_helper.rb +4 -4
  9. data/benchmark/similarity_benchmark.rb +52 -52
  10. data/bin/align_motifs +4 -4
  11. data/bin/eval_alignment +4 -4
  12. data/bin/eval_similarity +4 -4
  13. data/bin/find_pvalue +4 -4
  14. data/bin/find_threshold +4 -4
  15. data/bin/preprocess_collection +4 -4
  16. data/bin/scan_collection +4 -4
  17. data/lib/macroape.rb +14 -11
  18. data/lib/macroape/aligned_pair_intersection.rb +61 -62
  19. data/lib/macroape/cli.rb +191 -188
  20. data/lib/macroape/cli/align_motifs.rb +120 -100
  21. data/lib/macroape/cli/eval_alignment.rb +157 -156
  22. data/lib/macroape/cli/eval_similarity.rb +138 -137
  23. data/lib/macroape/cli/find_pvalue.rb +93 -87
  24. data/lib/macroape/cli/find_threshold.rb +103 -96
  25. data/lib/macroape/cli/preprocess_collection.rb +169 -161
  26. data/lib/macroape/cli/scan_collection.rb +171 -163
  27. data/lib/macroape/collection.rb +29 -0
  28. data/lib/macroape/motif_with_thresholds.rb +18 -0
  29. data/lib/macroape/pwm_compare.rb +39 -44
  30. data/lib/macroape/pwm_compare_aligned.rb +139 -130
  31. data/lib/macroape/{counting.rb → pwm_counting.rb} +175 -121
  32. data/lib/macroape/support/inverf.rb +13 -0
  33. data/lib/macroape/support/partial_sums.rb +17 -0
  34. data/lib/macroape/version.rb +4 -4
  35. data/macroape.gemspec +19 -19
  36. data/spec/count_distribution_spec.rb +112 -109
  37. data/spec/inverf_spec.rb +23 -0
  38. data/spec/partial_sums_spec.rb +28 -0
  39. data/spec/spec_helper.rb +11 -11
  40. data/test/align_motifs_test.rb +42 -43
  41. data/test/data/AHR_si.pwm +10 -10
  42. data/test/data/KLF3_f1.pcm +16 -16
  43. data/test/data/KLF3_f1.pwm +16 -16
  44. data/test/data/KLF4_f2.pcm +11 -11
  45. data/test/data/KLF4_f2.pwm +11 -11
  46. data/test/data/KLF4_f2_scan_results_all.txt +2 -2
  47. data/test/data/KLF4_f2_scan_results_default_cutoff.txt +1 -1
  48. data/test/data/KLF4_f2_scan_results_precise_mode.txt +2 -2
  49. data/test/data/SP1_f1.pcm +12 -12
  50. data/test/data/SP1_f1.pwm +12 -12
  51. data/test/data/SP1_f1_revcomp.pcm +12 -12
  52. data/test/data/SP1_f1_revcomp.pwm +12 -12
  53. data/test/data/medium_motif.pwm +8 -8
  54. data/test/data/short_motif.pwm +7 -7
  55. data/test/data/test_collection.yaml +231 -214
  56. data/test/data/test_collection/GABPA_f1.pwm +14 -14
  57. data/test/data/test_collection/KLF4_f2.pwm +10 -10
  58. data/test/data/test_collection/SP1_f1.pwm +12 -12
  59. data/test/data/test_collection_pcm/GABPA_f1.pcm +14 -14
  60. data/test/data/test_collection_pcm/KLF4_f2.pcm +11 -11
  61. data/test/data/test_collection_pcm/SP1_f1.pcm +12 -12
  62. data/test/data/test_collection_single_file.txt +38 -38
  63. data/test/data/test_collection_single_file_pcm.txt +37 -37
  64. data/test/data/test_collection_weak.yaml +231 -214
  65. data/test/eval_alignment_test.rb +90 -111
  66. data/test/eval_similarity_test.rb +105 -123
  67. data/test/find_pvalue_test.rb +34 -39
  68. data/test/find_threshold_test.rb +87 -91
  69. data/test/preprocess_collection_test.rb +56 -65
  70. data/test/scan_collection_test.rb +42 -48
  71. data/test/test_helper.rb +159 -160
  72. metadata +14 -10
  73. data/test/data/collection_pcm_without_thresholds.yaml +0 -188
  74. data/test/data/collection_without_thresholds.yaml +0 -188
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: macroape
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.0.2
4
+ version: 4.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ilya Vorontsov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-09-30 00:00:00.000000000 Z
11
+ date: 2014-07-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bioinform
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: 0.1.10
19
+ version: 0.2.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: 0.1.10
26
+ version: 0.2.0
27
27
  description: Macroape is an abbreviation for MAtrix CompaRisOn by Approximate P-value
28
28
  Estimation. It's a bioinformatic tool for evaluating similarity measure and best
29
29
  alignment between a pair of Position Weight Matrices(PWM), finding thresholds by
@@ -67,12 +67,18 @@ files:
67
67
  - lib/macroape/cli/find_threshold.rb
68
68
  - lib/macroape/cli/preprocess_collection.rb
69
69
  - lib/macroape/cli/scan_collection.rb
70
- - lib/macroape/counting.rb
70
+ - lib/macroape/collection.rb
71
+ - lib/macroape/motif_with_thresholds.rb
71
72
  - lib/macroape/pwm_compare.rb
72
73
  - lib/macroape/pwm_compare_aligned.rb
74
+ - lib/macroape/pwm_counting.rb
75
+ - lib/macroape/support/inverf.rb
76
+ - lib/macroape/support/partial_sums.rb
73
77
  - lib/macroape/version.rb
74
78
  - macroape.gemspec
75
79
  - spec/count_distribution_spec.rb
80
+ - spec/inverf_spec.rb
81
+ - spec/partial_sums_spec.rb
76
82
  - spec/spec_helper.rb
77
83
  - test/align_motifs_test.rb
78
84
  - test/data/AHR_si.pwm
@@ -88,8 +94,6 @@ files:
88
94
  - test/data/SP1_f1.pwm
89
95
  - test/data/SP1_f1_revcomp.pcm
90
96
  - test/data/SP1_f1_revcomp.pwm
91
- - test/data/collection_pcm_without_thresholds.yaml
92
- - test/data/collection_without_thresholds.yaml
93
97
  - test/data/medium_motif.pwm
94
98
  - test/data/short_motif.pwm
95
99
  - test/data/test_collection.yaml
@@ -128,12 +132,14 @@ required_rubygems_version: !ruby/object:Gem::Requirement
128
132
  version: '0'
129
133
  requirements: []
130
134
  rubyforge_project:
131
- rubygems_version: 2.1.5
135
+ rubygems_version: 2.3.0
132
136
  signing_key:
133
137
  specification_version: 4
134
138
  summary: PWM comparison tool using MACROAPE approach
135
139
  test_files:
136
140
  - spec/count_distribution_spec.rb
141
+ - spec/inverf_spec.rb
142
+ - spec/partial_sums_spec.rb
137
143
  - spec/spec_helper.rb
138
144
  - test/align_motifs_test.rb
139
145
  - test/data/AHR_si.pwm
@@ -149,8 +155,6 @@ test_files:
149
155
  - test/data/SP1_f1.pwm
150
156
  - test/data/SP1_f1_revcomp.pcm
151
157
  - test/data/SP1_f1_revcomp.pwm
152
- - test/data/collection_pcm_without_thresholds.yaml
153
- - test/data/collection_without_thresholds.yaml
154
158
  - test/data/medium_motif.pwm
155
159
  - test/data/short_motif.pwm
156
160
  - test/data/test_collection.yaml
@@ -1,188 +0,0 @@
1
- --- !ruby/object:Bioinform::Collection
2
- container:
3
- - !ruby/object:Bioinform::Motif
4
- parameters: !ruby/object:OpenStruct
5
- table:
6
- :original_data_model: :pcm
7
- :pcm: !ruby/object:Bioinform::PCM
8
- parameters: !ruby/object:OpenStruct
9
- table:
10
- :name: GABPA_f1
11
- :background:
12
- - 1
13
- - 1
14
- - 1
15
- - 1
16
- modifiable: true
17
- matrix:
18
- - - 615.2572649050138
19
- - 697.0698715160123
20
- - 1261.1903440712872
21
- - 176.43506582414153
22
- - - 996.4929869323321
23
- - 805.1878697364007
24
- - 693.7695793644275
25
- - 254.5021102832924
26
- - - 1106.9888035794224
27
- - 508.19444415177276
28
- - 1029.8329748714536
29
- - 104.93632371380718
30
- - - 143.7121486195701
31
- - 2086.4279160661263
32
- - 518.37507049306
33
- - 1.4374111377025893
34
- - - 362.9541452731307
35
- - 2369.473894845734
36
- - 17.23702397004065
37
- - 0.2874822275405179
38
- - - 0.0
39
- - 0.0
40
- - 2749.952546316428
41
- - 0.0
42
- - - 0.0
43
- - 0.0
44
- - 2749.952546316428
45
- - 0.0
46
- - - 2748.2567506938462
47
- - 1.695795622582083
48
- - 0.0
49
- - 0.0
50
- - - 2726.6484322711017
51
- - 1.1499289101620715
52
- - 1.1499289101620715
53
- - 21.00425622500253
54
- - - 202.05697400573305
55
- - 28.799402471063658
56
- - 2518.808687612104
57
- - 0.2874822275405179
58
- - - 172.92889618879767
59
- - 521.1240363384483
60
- - 106.38197600987633
61
- - 1949.517637779338
62
- - - 398.1679460365911
63
- - 424.20938204069563
64
- - 1706.4024212088275
65
- - 221.17279703034018
66
- - - 764.2587933951809
67
- - 675.0883944902433
68
- - 1066.5413633225007
69
- - 244.06399510852864
70
- modifiable: true
71
- - !ruby/object:Bioinform::Motif
72
- parameters: !ruby/object:OpenStruct
73
- table:
74
- :original_data_model: :pcm
75
- :pcm: !ruby/object:Bioinform::PCM
76
- parameters: !ruby/object:OpenStruct
77
- table:
78
- :name: KLF4_f2
79
- :background:
80
- - 1
81
- - 1
82
- - 1
83
- - 1
84
- modifiable: true
85
- matrix:
86
- - - 1233.46088405354
87
- - 93.18173277811673
88
- - 1036.6014857092885
89
- - 1258.2948629970272
90
- - - 263.979242343185
91
- - 5.314520555872139
92
- - 3347.5949971525274
93
- - 4.650205486388122
94
- - - 76.7700780003465
95
- - 6.643150694840173
96
- - 3529.4896409394937
97
- - 8.636095903292224
98
- - - 57.86097393406657
99
- - 18.102585643439472
100
- - 3520.3342027139347
101
- - 25.24120324653207
102
- - - 518.1947904009378
103
- - 1545.9062946905135
104
- - 22.396758181071043
105
- - 1535.0411222654507
106
- - - 137.98151691820345
107
- - 9.300410972776241
108
- - 3456.320530770924
109
- - 17.936506876068467
110
- - - 115.27647661640499
111
- - 81.51802997128804
112
- - 1861.9425868567278
113
- - 1562.801872093553
114
- - - 227.8095486111286
115
- - 42.84555258785854
116
- - 3278.6396005325996
117
- - 72.244263806387
118
- - - 108.73384179997886
119
- - 134.47328134862394
120
- - 3162.880454846513
121
- - 215.45138754285665
122
- - - 238.49636899561344
123
- - 2225.9561104691043
124
- - 402.40727964384774
125
- - 754.6792064294074
126
- modifiable: true
127
- - !ruby/object:Bioinform::Motif
128
- parameters: !ruby/object:OpenStruct
129
- table:
130
- :original_data_model: :pcm
131
- :pcm: !ruby/object:Bioinform::PCM
132
- parameters: !ruby/object:OpenStruct
133
- table:
134
- :name: SP1_f1
135
- :background:
136
- - 1
137
- - 1
138
- - 1
139
- - 1
140
- modifiable: true
141
- matrix:
142
- - - 682.6436366358055
143
- - 443.1455214015781
144
- - 2075.655346294993
145
- - 287.211468117951
146
- - - 299.8883246804867
147
- - 103.74338315843572
148
- - 2613.8927022405364
149
- - 471.1315623708902
150
- - - 591.4892493324709
151
- - 42.631827541794564
152
- - 2845.1654083148564
153
- - 9.36948726124641
154
- - - 7.071084742361592
155
- - 45.29093411231232
156
- - 3432.8847704374107
157
- - 3.409183158303573
158
- - - 91.308984085713
159
- - 19.1536481364332
160
- - 3373.656949880137
161
- - 4.5363903481026
162
- - - 809.2082973387932
163
- - 2246.941954176211
164
- - 61.30766021687515
165
- - 371.19806071846244
166
- - - 120.56476435866055
167
- - 42.4349244403591
168
- - 3242.1560628684038
169
- - 83.50022078295852
170
- - - 13.72524477409959
171
- - 35.858220519297525
172
- - 3332.4066864946167
173
- - 106.66582066236779
174
- - - 558.1188080161639
175
- - 90.0084504200356
176
- - 2694.854973210736
177
- - 145.67374080342415
178
- - - 264.0088462230318
179
- - 254.7175868081866
180
- - 2796.88087480315
181
- - 173.0486646159857
182
- - - 519.46013914282
183
- - 1874.9349086474765
184
- - 654.5411208373813
185
- - 439.7198038226514
186
- modifiable: true
187
- parameters: !ruby/object:OpenStruct
188
- table: {}
@@ -1,188 +0,0 @@
1
- --- !ruby/object:Bioinform::Collection
2
- container:
3
- - !ruby/object:Bioinform::Motif
4
- parameters: !ruby/object:OpenStruct
5
- table:
6
- :original_data_model: :pwm
7
- :pwm: !ruby/object:Bioinform::PWM
8
- parameters: !ruby/object:OpenStruct
9
- table:
10
- :name: GABPA_f1
11
- :background:
12
- - 1
13
- - 1
14
- - 1
15
- - 1
16
- modifiable: true
17
- matrix:
18
- - - -0.1106670158341858
19
- - 0.013801606113892391
20
- - 0.6054596108973699
21
- - -1.3518085041421573
22
- - - 0.37030668921643345
23
- - 0.15761121480429963
24
- - 0.009069314183831202
25
- - -0.9888619717703562
26
- - - 0.47526546359546684
27
- - -0.3011678534572083
28
- - 0.4031522994412777
29
- - -1.8638752827041059
30
- - - -1.5544255540164373
31
- - 1.1082369687811506
32
- - -0.2814091552834454
33
- - -5.30708531823271
34
- - - -0.6362037835776368
35
- - 1.235338189985594
36
- - -3.5801322928552253
37
- - -5.717323067092849
38
- - - -5.852906870733575
39
- - -5.852906870733575
40
- - 1.3841383838057746
41
- - -5.852906870733575
42
- - - -5.852906870733575
43
- - -5.852906870733575
44
- - 1.3841383838057746
45
- - -5.852906870733575
46
- - - 1.3835219739184708
47
- - -5.2341956006430985
48
- - -5.852906870733575
49
- - -5.852906870733575
50
- - - 1.3756340514956562
51
- - -5.394962755562375
52
- - -5.394962755562375
53
- - -3.401117964959733
54
- - - -1.2176198315414444
55
- - -3.109079898175411
56
- - 1.2964067931472216
57
- - -5.717323067092849
58
- - - -1.3716559438167257
59
- - -0.2761401935045069
60
- - -1.8504445165866068
61
- - 1.0404320473626856
62
- - - -0.5440863133031895
63
- - -0.48103682561971345
64
- - 0.907381908447086
65
- - -1.1280642594012078
66
- - - 0.10557340209290218
67
- - -0.01814819455289191
68
- - 0.4381106695354074
69
- - -1.0304105539540915
70
- modifiable: true
71
- - !ruby/object:Bioinform::Motif
72
- parameters: !ruby/object:OpenStruct
73
- table:
74
- :original_data_model: :pwm
75
- :pwm: !ruby/object:Bioinform::PWM
76
- parameters: !ruby/object:OpenStruct
77
- table:
78
- :name: KLF4_f2
79
- :background:
80
- - 1
81
- - 1
82
- - 1
83
- - 1
84
- modifiable: true
85
- matrix:
86
- - - 0.30861857265872605
87
- - -2.254321000121579
88
- - 0.13505703522674192
89
- - 0.3285194224375633
90
- - - -1.227018967707036
91
- - -4.814127713368663
92
- - 1.3059890687390967
93
- - -4.908681463544344
94
- - - -2.443469374521196
95
- - -4.648238485031404
96
- - 1.3588686548279805
97
- - -4.441801801188402
98
- - - -2.7177827948276123
99
- - -3.8073538975356565
100
- - 1.356272809724262
101
- - -3.504104725510225
102
- - - -0.5563232977367343
103
- - 0.5340697765121405
104
- - -3.61417723090579
105
- - 0.5270259776377405
106
- - - -1.8687622060887386
107
- - -4.381483976582316
108
- - 1.337932245336098
109
- - -3.815629658877517
110
- - - -2.045671123823928
111
- - -2.384975142213679
112
- - 0.7198551207724355
113
- - 0.5449254135616948
114
- - - -1.373157530374372
115
- - -3.0063112097748217
116
- - 1.285188335493552
117
- - -2.5026044231773543
118
- - - -2.1030513122772208
119
- - -1.8941348100402244
120
- - 1.249265758393991
121
- - -1.4284210948906104
122
- - - -1.3277128628152939
123
- - 0.8982415633049462
124
- - -0.8080773665408135
125
- - -0.18161647647456935
126
- modifiable: true
127
- - !ruby/object:Bioinform::Motif
128
- parameters: !ruby/object:OpenStruct
129
- table:
130
- :original_data_model: :pwm
131
- :pwm: !ruby/object:Bioinform::PWM
132
- parameters: !ruby/object:OpenStruct
133
- table:
134
- :name: SP1_f1
135
- :background:
136
- - 1
137
- - 1
138
- - 1
139
- - 1
140
- modifiable: true
141
- matrix:
142
- - - -0.24435707885585292
143
- - -0.674823404693731
144
- - 0.8657012535789866
145
- - -1.1060188862599287
146
- - - -1.0631255752097797
147
- - -2.111925969423868
148
- - 1.0960627561110403
149
- - -0.6138563775211977
150
- - - -0.3872276234760535
151
- - -2.9739851913218045
152
- - 1.1807800242010378
153
- - -4.338927525031566
154
- - - -4.563896055436894
155
- - -2.9161633002532277
156
- - 1.3684371349982638
157
- - -5.077972423609655
158
- - - -2.2369752892820083
159
- - -3.7196436313301846
160
- - 1.3510439136452734
161
- - -4.889930670508233
162
- - - -0.07473964149330865
163
- - 0.944919654762011
164
- - -2.6246857648086044
165
- - -0.8510983487822436
166
- - - -1.9643526491643322
167
- - -2.978402770880115
168
- - 1.3113096718240573
169
- - -2.324334259499025
170
- - - -4.0155484139655835
171
- - -3.1384268078096667
172
- - 1.3387488589788057
173
- - -2.084673903537648
174
- - - -0.44509385828355363
175
- - -2.2510053061629702
176
- - 1.1265431574368685
177
- - -1.7780413702431372
178
- - - -1.1896356092245048
179
- - -1.2251832285630027
180
- - 1.1636760063747527
181
- - -1.6080243648157353
182
- - - -0.5166047365590571
183
- - 0.7641033353626657
184
- - -0.2862677570028208
185
- - -0.68254820978656
186
- modifiable: true
187
- parameters: !ruby/object:OpenStruct
188
- table: {}