mspire 0.4.9 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (255) hide show
  1. data/README +27 -17
  2. data/changelog.txt +31 -62
  3. data/lib/ms/calc.rb +32 -0
  4. data/lib/ms/data/interleaved.rb +60 -0
  5. data/lib/ms/data/lazy_io.rb +73 -0
  6. data/lib/ms/data/lazy_string.rb +15 -0
  7. data/lib/ms/data/simple.rb +59 -0
  8. data/lib/ms/data/transposed.rb +41 -0
  9. data/lib/ms/data.rb +57 -0
  10. data/lib/ms/format/format_error.rb +12 -0
  11. data/lib/ms/spectrum.rb +25 -384
  12. data/lib/ms/support/binary_search.rb +126 -0
  13. data/lib/ms.rb +10 -10
  14. metadata +38 -350
  15. data/INSTALL +0 -58
  16. data/README.rdoc +0 -18
  17. data/Rakefile +0 -330
  18. data/bin/aafreqs.rb +0 -23
  19. data/bin/bioworks2excel.rb +0 -14
  20. data/bin/bioworks_to_pepxml.rb +0 -148
  21. data/bin/bioworks_to_pepxml_gui.rb +0 -225
  22. data/bin/fasta_shaker.rb +0 -5
  23. data/bin/filter_and_validate.rb +0 -5
  24. data/bin/gi2annot.rb +0 -14
  25. data/bin/id_class_anal.rb +0 -112
  26. data/bin/id_precision.rb +0 -172
  27. data/bin/ms_to_lmat.rb +0 -67
  28. data/bin/pepproph_filter.rb +0 -16
  29. data/bin/prob_validate.rb +0 -6
  30. data/bin/protein_summary.rb +0 -6
  31. data/bin/protxml2prots_peps.rb +0 -32
  32. data/bin/raw_to_mzXML.rb +0 -55
  33. data/bin/run_percolator.rb +0 -122
  34. data/bin/sqt_group.rb +0 -26
  35. data/bin/srf_group.rb +0 -27
  36. data/bin/srf_to_sqt.rb +0 -40
  37. data/lib/align/chams.rb +0 -78
  38. data/lib/align.rb +0 -154
  39. data/lib/archive/targz.rb +0 -94
  40. data/lib/bsearch.rb +0 -120
  41. data/lib/core_extensions.rb +0 -16
  42. data/lib/fasta.rb +0 -626
  43. data/lib/gi.rb +0 -124
  44. data/lib/group_by.rb +0 -10
  45. data/lib/index_by.rb +0 -11
  46. data/lib/merge_deep.rb +0 -21
  47. data/lib/ms/converter/mzxml.rb +0 -77
  48. data/lib/ms/gradient_program.rb +0 -170
  49. data/lib/ms/msrun.rb +0 -244
  50. data/lib/ms/msrun_index.rb +0 -108
  51. data/lib/ms/parser/mzdata/axml.rb +0 -67
  52. data/lib/ms/parser/mzdata/dom.rb +0 -175
  53. data/lib/ms/parser/mzdata/libxml.rb +0 -7
  54. data/lib/ms/parser/mzdata.rb +0 -31
  55. data/lib/ms/parser/mzxml/axml.rb +0 -70
  56. data/lib/ms/parser/mzxml/dom.rb +0 -182
  57. data/lib/ms/parser/mzxml/hpricot.rb +0 -253
  58. data/lib/ms/parser/mzxml/libxml.rb +0 -19
  59. data/lib/ms/parser/mzxml/regexp.rb +0 -122
  60. data/lib/ms/parser/mzxml/rexml.rb +0 -72
  61. data/lib/ms/parser/mzxml/xmlparser.rb +0 -248
  62. data/lib/ms/parser/mzxml.rb +0 -282
  63. data/lib/ms/parser.rb +0 -108
  64. data/lib/ms/precursor.rb +0 -25
  65. data/lib/ms/scan.rb +0 -81
  66. data/lib/mspire.rb +0 -4
  67. data/lib/pi_zero.rb +0 -244
  68. data/lib/qvalue.rb +0 -161
  69. data/lib/roc.rb +0 -187
  70. data/lib/sample_enzyme.rb +0 -160
  71. data/lib/scan_i.rb +0 -21
  72. data/lib/spec_id/aa_freqs.rb +0 -170
  73. data/lib/spec_id/bioworks.rb +0 -497
  74. data/lib/spec_id/digestor.rb +0 -138
  75. data/lib/spec_id/mass.rb +0 -179
  76. data/lib/spec_id/parser/proph.rb +0 -335
  77. data/lib/spec_id/precision/filter/cmdline.rb +0 -218
  78. data/lib/spec_id/precision/filter/interactive.rb +0 -134
  79. data/lib/spec_id/precision/filter/output.rb +0 -148
  80. data/lib/spec_id/precision/filter.rb +0 -637
  81. data/lib/spec_id/precision/output.rb +0 -60
  82. data/lib/spec_id/precision/prob/cmdline.rb +0 -160
  83. data/lib/spec_id/precision/prob/output.rb +0 -94
  84. data/lib/spec_id/precision/prob.rb +0 -249
  85. data/lib/spec_id/proph/pep_summary.rb +0 -104
  86. data/lib/spec_id/proph/prot_summary.rb +0 -484
  87. data/lib/spec_id/proph.rb +0 -4
  88. data/lib/spec_id/protein_summary.rb +0 -489
  89. data/lib/spec_id/sequest/params.rb +0 -316
  90. data/lib/spec_id/sequest/pepxml.rb +0 -1458
  91. data/lib/spec_id/sequest.rb +0 -33
  92. data/lib/spec_id/sqt.rb +0 -349
  93. data/lib/spec_id/srf.rb +0 -973
  94. data/lib/spec_id.rb +0 -778
  95. data/lib/spec_id_xml.rb +0 -99
  96. data/lib/transmem/phobius.rb +0 -147
  97. data/lib/transmem/toppred.rb +0 -368
  98. data/lib/transmem.rb +0 -157
  99. data/lib/validator/aa.rb +0 -48
  100. data/lib/validator/aa_est.rb +0 -112
  101. data/lib/validator/background.rb +0 -77
  102. data/lib/validator/bias.rb +0 -95
  103. data/lib/validator/cmdline.rb +0 -431
  104. data/lib/validator/decoy.rb +0 -107
  105. data/lib/validator/digestion_based.rb +0 -70
  106. data/lib/validator/probability.rb +0 -51
  107. data/lib/validator/prot_from_pep.rb +0 -234
  108. data/lib/validator/q_value.rb +0 -32
  109. data/lib/validator/transmem.rb +0 -272
  110. data/lib/validator/true_pos.rb +0 -46
  111. data/lib/validator.rb +0 -197
  112. data/lib/xml.rb +0 -38
  113. data/lib/xml_style_parser.rb +0 -119
  114. data/lib/xmlparser_wrapper.rb +0 -19
  115. data/release_notes.txt +0 -2
  116. data/script/compile_and_plot_smriti_final.rb +0 -97
  117. data/script/create_little_pepxml.rb +0 -61
  118. data/script/degenerate_peptides.rb +0 -47
  119. data/script/estimate_fpr_by_cysteine.rb +0 -226
  120. data/script/extract_gradient_programs.rb +0 -56
  121. data/script/find_cysteine_background.rb +0 -137
  122. data/script/genuine_tps_and_probs.rb +0 -136
  123. data/script/get_apex_values_rexml.rb +0 -44
  124. data/script/histogram_probs.rb +0 -61
  125. data/script/mascot_fix_pepxml.rb +0 -123
  126. data/script/msvis.rb +0 -42
  127. data/script/mzXML2timeIndex.rb +0 -25
  128. data/script/peps_per_bin.rb +0 -67
  129. data/script/prep_dir.rb +0 -121
  130. data/script/simple_protein_digestion.rb +0 -27
  131. data/script/smriti_final_analysis.rb +0 -103
  132. data/script/sqt_to_meta.rb +0 -24
  133. data/script/top_hit_per_scan.rb +0 -67
  134. data/script/toppred_to_yaml.rb +0 -47
  135. data/script/tpp_installer.rb +0 -249
  136. data/specs/align_spec.rb +0 -79
  137. data/specs/bin/bioworks_to_pepxml_spec.rb +0 -79
  138. data/specs/bin/fasta_shaker_spec.rb +0 -259
  139. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +0 -199
  140. data/specs/bin/filter_and_validate_spec.rb +0 -180
  141. data/specs/bin/ms_to_lmat_spec.rb +0 -34
  142. data/specs/bin/prob_validate_spec.rb +0 -86
  143. data/specs/bin/protein_summary_spec.rb +0 -14
  144. data/specs/fasta_spec.rb +0 -354
  145. data/specs/gi_spec.rb +0 -22
  146. data/specs/load_bin_path.rb +0 -7
  147. data/specs/merge_deep_spec.rb +0 -13
  148. data/specs/ms/gradient_program_spec.rb +0 -77
  149. data/specs/ms/msrun_spec.rb +0 -498
  150. data/specs/ms/parser_spec.rb +0 -92
  151. data/specs/ms/spectrum_spec.rb +0 -87
  152. data/specs/pi_zero_spec.rb +0 -115
  153. data/specs/qvalue_spec.rb +0 -39
  154. data/specs/roc_spec.rb +0 -251
  155. data/specs/rspec_autotest.rb +0 -149
  156. data/specs/sample_enzyme_spec.rb +0 -126
  157. data/specs/spec_helper.rb +0 -135
  158. data/specs/spec_id/aa_freqs_spec.rb +0 -52
  159. data/specs/spec_id/bioworks_spec.rb +0 -148
  160. data/specs/spec_id/digestor_spec.rb +0 -75
  161. data/specs/spec_id/precision/filter/cmdline_spec.rb +0 -20
  162. data/specs/spec_id/precision/filter/output_spec.rb +0 -31
  163. data/specs/spec_id/precision/filter_spec.rb +0 -246
  164. data/specs/spec_id/precision/prob_spec.rb +0 -44
  165. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  166. data/specs/spec_id/proph/pep_summary_spec.rb +0 -98
  167. data/specs/spec_id/proph/prot_summary_spec.rb +0 -128
  168. data/specs/spec_id/protein_summary_spec.rb +0 -189
  169. data/specs/spec_id/sequest/params_spec.rb +0 -68
  170. data/specs/spec_id/sequest/pepxml_spec.rb +0 -374
  171. data/specs/spec_id/sequest_spec.rb +0 -38
  172. data/specs/spec_id/sqt_spec.rb +0 -246
  173. data/specs/spec_id/srf_spec.rb +0 -172
  174. data/specs/spec_id/srf_spec_helper.rb +0 -139
  175. data/specs/spec_id_helper.rb +0 -33
  176. data/specs/spec_id_spec.rb +0 -366
  177. data/specs/spec_id_xml_spec.rb +0 -33
  178. data/specs/transmem/phobius_spec.rb +0 -425
  179. data/specs/transmem/toppred_spec.rb +0 -298
  180. data/specs/transmem_spec.rb +0 -60
  181. data/specs/transmem_spec_shared.rb +0 -64
  182. data/specs/validator/aa_est_spec.rb +0 -66
  183. data/specs/validator/aa_spec.rb +0 -40
  184. data/specs/validator/background_spec.rb +0 -67
  185. data/specs/validator/bias_spec.rb +0 -122
  186. data/specs/validator/decoy_spec.rb +0 -51
  187. data/specs/validator/fasta_helper.rb +0 -26
  188. data/specs/validator/prot_from_pep_spec.rb +0 -141
  189. data/specs/validator/transmem_spec.rb +0 -146
  190. data/specs/validator/true_pos_spec.rb +0 -58
  191. data/specs/validator_helper.rb +0 -33
  192. data/specs/xml_spec.rb +0 -12
  193. data/test_files/000_pepxml18_small.xml +0 -206
  194. data/test_files/020a.mzXML.timeIndex +0 -4710
  195. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +0 -3973
  196. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +0 -3872
  197. data/test_files/4-03-03_small-prot.xml +0 -321
  198. data/test_files/4-03-03_small.xml +0 -3876
  199. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  200. data/test_files/bioworks-3.3_10prots.xml +0 -5999
  201. data/test_files/bioworks31.params +0 -77
  202. data/test_files/bioworks32.params +0 -62
  203. data/test_files/bioworks33.params +0 -63
  204. data/test_files/bioworks_single_run_small.xml +0 -7237
  205. data/test_files/bioworks_small.fasta +0 -212
  206. data/test_files/bioworks_small.params +0 -63
  207. data/test_files/bioworks_small.phobius +0 -109
  208. data/test_files/bioworks_small.toppred.out +0 -2847
  209. data/test_files/bioworks_small.xml +0 -5610
  210. data/test_files/bioworks_with_INV_small.xml +0 -3753
  211. data/test_files/bioworks_with_SHUFF_small.xml +0 -2503
  212. data/test_files/corrupted_900.srf +0 -0
  213. data/test_files/head_of_7MIX.srf +0 -0
  214. data/test_files/interact-opd1_mods_small-prot.xml +0 -304
  215. data/test_files/messups.fasta +0 -297
  216. data/test_files/opd1/000.my_answer.100lines.xml +0 -101
  217. data/test_files/opd1/000.tpp_1.2.3.first10.xml +0 -115
  218. data/test_files/opd1/000.tpp_2.9.2.first10.xml +0 -126
  219. data/test_files/opd1/000.v2.1.mzXML.timeIndex +0 -3748
  220. data/test_files/opd1/000_020-prot.png +0 -0
  221. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +0 -62
  222. data/test_files/opd1/000_020_3prots-prot.xml +0 -62
  223. data/test_files/opd1/opd1_cat_inv_small-prot.xml +0 -139
  224. data/test_files/opd1/sequest.3.1.params +0 -77
  225. data/test_files/opd1/sequest.3.2.params +0 -62
  226. data/test_files/opd1/twenty_scans.mzXML +0 -418
  227. data/test_files/opd1/twenty_scans.v2.1.mzXML +0 -382
  228. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  229. data/test_files/opd1/twenty_scans_answ.lmata +0 -9
  230. data/test_files/opd1_020_beginning.RAW +0 -0
  231. data/test_files/opd1_2runs_2mods/data/020.mzData.xml +0 -683
  232. data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +0 -382
  233. data/test_files/opd1_2runs_2mods/data/040.mzData.xml +0 -683
  234. data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +0 -382
  235. data/test_files/opd1_2runs_2mods/data/README.txt +0 -6
  236. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +0 -753
  237. data/test_files/orbitrap_mzData/000_cut.xml +0 -1920
  238. data/test_files/pepproph_small.xml +0 -4691
  239. data/test_files/phobius.small.noheader.txt +0 -50
  240. data/test_files/phobius.small.small.txt +0 -53
  241. data/test_files/s01_anC1_ld020mM.key.txt +0 -25
  242. data/test_files/s01_anC1_ld020mM.meth +0 -0
  243. data/test_files/small.fasta +0 -297
  244. data/test_files/small.sqt +0 -87
  245. data/test_files/smallraw.RAW +0 -0
  246. data/test_files/tf_bioworks2excel.bioXML +0 -14340
  247. data/test_files/tf_bioworks2excel.txt.actual +0 -1035
  248. data/test_files/toppred.small.out +0 -416
  249. data/test_files/toppred.xml.out +0 -318
  250. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +0 -7
  251. data/test_files/validator_hits_separate/bioworks_small_HS.xml +0 -5651
  252. data/test_files/yeast_gly_small-prot.xml +0 -265
  253. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +0 -6
  254. data/test_files/yeast_gly_small.xml +0 -3807
  255. data/test_files/yeast_gly_small2.parentTimes +0 -6
@@ -1,246 +0,0 @@
1
- require File.expand_path( File.dirname(__FILE__) + '/../../spec_helper' )
2
- require 'spec_id/srf'
3
- require 'spec_id/precision/filter'
4
-
5
- require File.dirname(__FILE__) + '/../../spec_id_helper'
6
-
7
- require 'set'
8
- require 'set_from_hash'
9
-
10
- describe SpecID::Precision::Filter::Peps do
11
- it 'does basic top hit filtering with ties=true|false|:as_array' do
12
- hashes = [
13
- {:aaseq=> 'A', :first_scan => 1, :xcorr => 1.5, :deltacn => 0.1, :ppm => 40, :charge => 2}, # 0
14
- {:aaseq=> 'B', :first_scan => 1, :xcorr => 1.5, :deltacn => 0.1, :ppm => 40, :charge => 2}, # 1
15
- {:aaseq=> 'C', :first_scan => 1, :xcorr => 1.4, :deltacn => 0.1, :ppm => 40, :charge => 2}, # 2
16
- {:aaseq=> 'D', :first_scan => 1, :xcorr => 1.4, :deltacn => 0.2, :ppm => 25, :charge => 2}, # 3
17
- {:aaseq=> 'D', :first_scan => 2, :xcorr => 1.9, :deltacn => 0.1, :ppm => 25, :charge => 2}, # 4
18
- ]
19
- pep_klass = SRF::OUT::Pep
20
- @sequest_peps = hashes.map do |hash|
21
- hash[:prots] = []
22
- pep = pep_klass.new.set_from_hash(hash)
23
- end
24
- # no tie:
25
- options = {
26
- :per => [:first_scan, :charge],
27
- :by => [:xcorr, {:down => [:xcorr]}],
28
- :ties => false
29
- }
30
- peps = SpecID::Precision::Filter::Peps.new.top_hit(@sequest_peps, options)
31
- peps.size.should == 2
32
- set_of_hash_xcorrs = [0,4].map {|i| hashes[i][:xcorr] }.to_set
33
- peps.map {|v| v.xcorr }.to_set.should == set_of_hash_xcorrs
34
-
35
- # with tie == true:
36
- options[:ties] = true
37
- peps = SpecID::Precision::Filter::Peps.new.top_hit(@sequest_peps, options)
38
- peps.size.should == 3
39
- set_of_hash_xcorrs = [0,1,4].map {|i| hashes[i][:xcorr] }.to_set
40
- peps.map{|v| v.xcorr}.to_set.should == set_of_hash_xcorrs
41
-
42
- # with tie == :as_array
43
- options[:ties] = :as_array
44
- peps = SpecID::Precision::Filter::Peps.new.top_hit(@sequest_peps, options)
45
- peps.size.should == 2
46
- peps.any? {|v| v.class == Array }.should be_true
47
- peps.select {|v| v.is_a? pep_klass }.first.should equal(@sequest_peps[4])
48
- end
49
- end
50
-
51
-
52
- describe 'filtering on a small bioworks file' do
53
- before(:each) do
54
- @file = Tfiles + '/bioworks_small.xml'
55
- @spec_id = SpecID.new(@file)
56
- end
57
-
58
- it 'filters with basic sequest filters' do
59
- opts = {:sequest => {:xcorr1 => 1.0, :xcorr2 => 1.0, :xcorr3 => 1.0, :deltacn => 0.1, :ppm => 1000.0, :include_deltacnstar => false} }
60
- ans = SpecID::Precision::Filter.new.filter_and_validate(@spec_id, opts)
61
-
62
-
63
- ans[:params][:sequest].should == opts[:sequest]
64
- # FROZEN:
65
- ans[:pephits].size.should == 4
66
-
67
- ans[:pephits].each do |pephit|
68
- pephit.pass_filters?(opts[:sequest]).should be_true
69
- pephit.fail_filters?(opts[:sequest]).should be_false
70
- end
71
- before = @spec_id.peps.size
72
- ans[:pephits].each do |pephit|
73
- @spec_id.peps.delete(pephit)
74
- end
75
- @spec_id.peps.size.should == before - 4
76
- @spec_id.peps.each do |not_passing_pep|
77
- not_passing_pep.pass_filters?(opts[:sequest]).should_not be_true
78
- end
79
-
80
- ans[:pephits].map {|v| v.aaseq }.to_set.size == 4
81
- end
82
-
83
- it 'can exclude deltacnstar' do
84
- opts = {:sequest => {:xcorr1 => 1.0, :xcorr2 => 1.0, :xcorr3 => 1.0, :deltacn => 0.1, :ppm => 1000.0, :include_deltacnstar => false} }
85
- # make two hits have the deltacnstar deltacn of 1.1
86
- sorted = @spec_id.peps.sort_by {|pep| [pep.xcorr, pep.deltacn, 1.0/pep.ppm, pep.first_scan, pep.aaseq] }
87
- # for two of these indices:
88
- [286, 287].each do |index|
89
- sorted[index].deltacn = 1.1
90
- sorted[index].deltacn.should == 1.1
91
- end
92
- ans = SpecID::Precision::Filter.new.filter_and_validate(@spec_id, opts)
93
-
94
- ans[:params][:sequest].should == opts[:sequest]
95
- # FROZEN:
96
- ans[:pephits].size.should == 2
97
- end
98
-
99
- end
100
-
101
- describe 'filtering on small bioworks file with inverse prots' do
102
- before(:each) do
103
- @regexp = /^INV_/o
104
- @file = Tfiles + '/bioworks_with_INV_small.xml'
105
- @spec_id = SpecID.new(@file)
106
- vals = [Validator::Decoy.new(:constraint => @regexp)]
107
- @opts = {:sequest => {:xcorr1 => 1.0, :xcorr2 => 1.0, :xcorr3 => 1.0, :deltacn => 0.1, :ppm => 1000.0, :include_deltacnstar=> false}, :validators => vals}
108
- end
109
-
110
- it 'gets decoy precision' do
111
- ans = SpecID::Precision::Filter.new.filter_and_validate(@spec_id, @opts)
112
- peps = ans[:pephits]
113
- vals = ans[:pephits_precision]
114
- # FROZEN:
115
- peps.size.should == 150
116
- peps.hash_by(:aaseq).size.should == 74
117
- vals.first.should == 149.0/150
118
- end
119
-
120
- it 'gets cys precision with freq' do
121
- # this does a minimal test to see if this functions properly
122
- # (not for accuracy, which is done in validator_spec)
123
- ## WITH FASTA FILE:
124
- val1 = Validator::AAEst.new('C').set_frequency(Fasta.new(Tfiles + '/small.fasta').prots)
125
- @opts[:validators] << val1 # obviously this guy is not his
126
- ans1 = SpecID::Precision::Filter.new.filter_and_validate(@spec_id, @opts)
127
- peps = ans1[:pephits]
128
- vals1 = ans1[:pephits_precision]
129
- # FROZEN:
130
- vals1.last.should be_close(0.84432189117806, 0.0000000001)
131
-
132
- ## WITH A CYSTEINE BACKGROUND:
133
- background_cys = 0.0172
134
- val3 = Validator::AAEst.new('C', :background => background_cys).set_frequency(Fasta.new(Tfiles + '/small.fasta').prots)
135
- @opts[:validators][1] = val3
136
- ans3 = SpecID::Precision::Filter.new.filter_and_validate(@spec_id, @opts)
137
- peps = ans3[:pephits]
138
- vals3 = ans3[:pephits_precision]
139
- # FROZEN:
140
- vals3.last.should be_close(0.944734271368211, 0.00000000001)
141
- end
142
- end
143
-
144
- describe 'filtering on a real srf file' do
145
-
146
- spec_large do
147
- it 'does tmm with a toppred file on srf' do
148
- opts = {:sequest => {:xcorr1 => 1.0, :xcorr2 => 1.0, :xcorr3 => 1.0, :deltacn => 0.1, :ppm => 1000.0, :include_deltacnstar => false}}
149
- dir = Tfiles_l + '/opd1_2runs_2mods/sequest33'
150
- tmm_file = dir + '/ecoli_K12_ncbi_20060321.toppred.xml'
151
- fasta_file = dir + '/ecoli_K12_ncbi_20060321.fasta'
152
- sequest_file = dir + '/ecoli.params'
153
- srf_file = dir + '/020.srf'
154
- spec_id = SpecID.new(srf_file)
155
- # :tmm -> [transmembrane file,min_tm_seqs=1,expect_soluble=true,correct_wins=true,no_include_tm_peps=0.8, bkg=0] # a toppred.out file
156
-
157
- regexp = /FAKINGIT_OUT/
158
- opts[:decoy] = regexp
159
- decoy_val = Validator::Decoy.new(:constraint => regexp) # this is not real, just to test
160
- cys_val = Validator::AAEst.new('C').set_frequency(Fasta.new(fasta_file).prots)
161
- tmm_val = Validator::Transmem::Protein.new(tmm_file, :min_num_tms => 1, :soluble_fraction => true, :correct_wins => true, :no_include_tm_peps => false, :background => 0.0).set_false_to_total_ratio( Digestor.digest( Fasta.new(fasta_file), Sequest::Params.new(sequest_file) ) )
162
- opts[:validators] = [decoy_val, cys_val, tmm_val]
163
- ans = SpecID::Precision::Filter.new.filter_and_validate(spec_id, opts)
164
- peps = ans[:pephits]
165
- vals = ans[:pephits_precision]
166
-
167
- # frozen:
168
- vals[0].should == 1.0
169
- vals[1].should be_close(0.366612274427855, 0.00000001)
170
- #vals[2].should be_close(0.396396396396396, 0.00000001)
171
- # if the srf file is not 'filtered' by proper sequest vals, should give
172
- # this:
173
- #vals[2].should be_close(-0.204031426241371, 0.00000001)
174
- vals[2].should be_close(-0.199538771665843, 0.00000001)
175
- peps.size.should == 444
176
- end
177
- end
178
-
179
- # This is what I was doing before. I think I may have been forgetting to
180
- # remove the INV_ peptide from these counts!
181
- # or more likely, the peptide hits were pep+prot hits!
182
- # SpecID::Filterer.run_from_argv([@small_inv].push( *(%w(-1 1.0 -2 1.0 -3 1.0 -c 0.1 --ppm 1000 -f INV_))) )
183
- ### FROZEN:
184
- #assert_match(/pep_hits\s+151/, output)
185
- #assert_match(/uniq_aa_hits\s+75/, output)
186
- #assert_match(/prot_hits\s+13/, output)
187
-
188
- end
189
-
190
- describe SpecID::Precision::Filter::Peps do
191
-
192
- before(:all) do
193
- hashes = [
194
- {:xcorr => 1.2, :deltacn => 0.1, :ppm => 40, :charge => 2},
195
- {:xcorr => 1.3, :deltacn => 0.1, :ppm => 50, :charge => 3},
196
- {:xcorr => 1.4, :deltacn => 0.1, :ppm => 50, :charge => 1},
197
- {:xcorr => 1.5, :deltacn => 1.1, :ppm => 20, :charge => 2},
198
- {:xcorr => 1.3, :deltacn => 0.1, :ppm => 20, :charge => 2},
199
- {:xcorr => 1.3, :deltacn => 0.1, :ppm => 40, :charge => 2},
200
- ]
201
- @sequest_peps = hashes.map do |hash|
202
- pep = SRF::OUT::Pep.new.set_from_hash(hash)
203
- end
204
- #sp = GenericSpecID.new.set_from_hash({:peps => peps})
205
-
206
- end
207
-
208
- it 'filters sequest peptides' do
209
- args_and_expected = {
210
- #deltacnstar false
211
- [1.2, 1.2, 1.2, 0.1, 50, false] => 5, # "all passing"
212
- [1.6, 1.6, 1.6, 0.1, 50, false] => 0, # "xcorrs too high"
213
- [1.6, 1.0, 1.0, 0.1, 50, false] => 4, # "one xcorr too high"
214
- [1.0, 1.6, 1.0, 0.1, 50, false] => 2, # "one xcorr too high"
215
- [1.0, 1.0, 1.6, 0.1, 50, false] => 4, # "one xcorr too high"
216
- [1.2, 1.2, 1.2, 0.2, 50, false] => 0, # "high deltacn"
217
-
218
- ## includedeltcnstars :
219
- [1.2, 1.2, 1.2, 0.1, 50, true] => 6, # "all passing"
220
- [1.2, 1.2, 1.2, 0.2, 50, true] => 1, # "high deltacn"
221
- [1.0, 1.0, 1.6, 0.1, 50, true] => 5, # "one xcorr too high"
222
- ##
223
- [1.0, 1.0, 1.0, 0.05, 60, true] => 6, ## testing ppm filtering:
224
- [1.0, 1.0, 1.0, 0.05, 10, true] => 0,
225
- }
226
- args_and_expected.each do |args,exp|
227
- filt = SpecID::Precision::Filter::Peps.new(:standard_sequest_filter, *args)
228
- filt.filter(@sequest_peps).size.should == exp
229
- end
230
- end
231
-
232
- it 'can change the pep array permanently' do
233
- args_and_expected = {[1.2, 1.2, 1.2, 0.2, 50, true] => 1} # "high deltacn"
234
- array_to_change = @sequest_peps.dup
235
- array_to_change.size.should == @sequest_peps.size
236
- args_and_expected.each do |args,exp|
237
- filt = SpecID::Precision::Filter::Peps.new(:standard_sequest_filter, *args)
238
- filt.filter!(array_to_change)
239
- end
240
- array_to_change.size.should_not == @sequest_peps.size
241
- end
242
-
243
- end
244
-
245
-
246
-
@@ -1,44 +0,0 @@
1
- require File.expand_path( File.dirname(__FILE__) + '/../../spec_helper' )
2
-
3
- require 'spec_id/precision/prob'
4
- require 'spec_id'
5
- require 'spec_id/proph'
6
- require 'validator'
7
- require 'fasta'
8
- require 'spec_id/sequest/params'
9
-
10
-
11
- describe 'finding precision Proph::Prot::Pep objects' do
12
- before(:each) do
13
- @spec_id = GenericSpecID.new
14
- # actual sort order: 3, 0, 4, 1, 2
15
- peps = [
16
- # 0: canonical
17
- {:peptide_sequence => '0', :initial_probability => 0.63, :nsp_adjusted_probability => 0.62, :weight => 1.0, :is_nondegenerate_evidence => true, :n_enzymatic_termini => 2, :n_sibling_peptides => 0.0, :n_instances => 1, :is_contributing_evidence => true},
18
- # 1: lower init prob
19
- {:peptide_sequence => '1', :initial_probability => 0.60, :nsp_adjusted_probability => 0.62, :weight => 1.0, :is_nondegenerate_evidence => true, :n_enzymatic_termini => 2, :n_sibling_peptides => 0.0, :n_instances => 1, :is_contributing_evidence => true},
20
- # 2: lower nsp prob
21
- {:peptide_sequence => '2', :initial_probability => 0.63, :nsp_adjusted_probability => 0.52, :weight => 1.0, :is_nondegenerate_evidence => true, :n_enzymatic_termini => 2, :n_sibling_peptides => 0.0, :n_instances => 1, :is_contributing_evidence => true},
22
- # extra instances! (best hit)
23
- {:peptide_sequence => '3', :initial_probability => 0.63, :nsp_adjusted_probability => 0.62, :weight => 1.0, :is_nondegenerate_evidence => true, :n_enzymatic_termini => 2, :n_sibling_peptides => 0.0, :n_instances => 5, :is_contributing_evidence => true},
24
- # is nondegen = false
25
- {:peptide_sequence => '4', :initial_probability => 0.63, :nsp_adjusted_probability => 0.62, :weight => 1.0, :is_nondegenerate_evidence => false, :n_enzymatic_termini => 2, :n_sibling_peptides => 0.0, :n_instances => 1, :is_contributing_evidence => true},].map {|v| Proph::Prot::Pep.new(v) }
26
- @spec_id.peps = peps
27
- end
28
-
29
- it 'runs without any validator' do
30
- answer = SpecID::Precision::Prob.new.precision_vs_num_hits(@spec_id)
31
- answer.keys.map {|v| v.to_s }.sort.should == ["aaseqs", "charges", "count", "params", "pephits", "pephits_precision", "probabilities"]
32
- answer[:aaseqs].should == %w(3 0 4 1 2)
33
- end
34
-
35
- it 'returns modified peptides if any modified peptides' do
36
- @spec_id.peps[1].mod_info = Sequest::PepXML::SearchHit::ModificationInfo.new(['MODIFIED', []])
37
- answer = SpecID::Precision::Prob.new.precision_vs_num_hits(@spec_id)
38
- answer.keys.map {|v| v.to_s }.sort.should == ["aaseqs", "charges", "count", "modified_peptides", "params", "pephits", "pephits_precision", "probabilities"]
39
- end
40
-
41
- end
42
-
43
-
44
-
File without changes
@@ -1,98 +0,0 @@
1
- require File.expand_path( File.dirname(__FILE__) + '/../../spec_helper' )
2
-
3
- require 'spec_id/proph/pep_summary'
4
-
5
- ToCheck = {
6
- :spectrum_query => {:first => {:spectrum => "020.42.42.3", :start_scan=>42, :end_scan=>42, :precursor_neutral_mass=>1015.77285654469, :assumed_charge=>3, :index=>1 },
7
- :last => {:spectrum=>"020.344.344.3", :start_scan=>344, :end_scan=>344, :precursor_neutral_mass=>1447.6040333025, :assumed_charge=>3, :index=>18 },
8
- },
9
-
10
- :search_hit => {:first => {:hit_rank=>1, :peptide=>"GTGVSVTR", :peptide_prev_aa=>"R", :peptide_next_aa=>"S", :protein=>"gi|49176370|ref|YP_026228.1|", :num_tot_proteins=>1, :num_matched_ions=>10, :tot_num_ions=>70, :calc_neutral_pep_mass=>1015.79382542, :massdiff=>-0.0209688753124055, :num_tol_term=>2, :num_missed_cleavages=>0, :is_rejected=>0, :xcorr=>1.06543827056885, :deltacn => 0.192325830459595, :deltacnstar=>0, :spscore=>77.8397979736328, :sprank=>3, :probability=>0.07881571, :fval=>0.1592, :ntt=>2, :nmc=> 0, :massd=>-0.021},
11
- :last => { :hit_rank=>1, :peptide=>"VAALRVPGGASLTR", :peptide_prev_aa=>"R", :peptide_next_aa=>"K", :protein=>"gi|16129819|ref|NP_416380.1|", :num_tot_proteins=>1, :num_matched_ions=>16, :tot_num_ions=>78, :calc_neutral_pep_mass=>1447.58289842, :massdiff=> 0.0211348825000641, :num_tol_term=>2, :num_missed_cleavages=>1, :is_rejected=>0, :xcorr=>1.3090912103653, :deltacn => 0.259967535734177, :deltacnstar => 0, :spscore => 118.513412475586, :sprank => 4, :probability=>0.27738378, :fval=>1.3810, :ntt=>2, :nmc=>1, :massd=>0.021 },
12
- }
13
- }
14
-
15
-
16
- describe Proph::PepSummary, "reading a small .xml file" do
17
- before(:each) do
18
- file = Tfiles + '/opd1_2runs_2mods/interact-opd1_mods__small.xml'
19
- @obj = Proph::PepSummary.new(file)
20
- end
21
-
22
- it 'should raise an error if not a peptide prophet file' do
23
- lambda { Proph::PepSummary.new(Tfiles + '/opd1/000.tpp_2.9.2.first10.xml')}.should raise_error(ArgumentError)
24
- end
25
-
26
- it 'has msms_run_summary objects with spectrum_queries' do
27
- @obj.msms_run_summaries.size.should == 1
28
- sqs = @obj.msms_run_summaries.first.spectrum_queries
29
- sqs.size.should == 18
30
-
31
- [:first, :last].each do |mth|
32
- ToCheck[:spectrum_query][mth].each do |k,v|
33
- if v.is_a? Float
34
- sqs.send(mth).send(k).should be_close(v, 0.0000000001)
35
- else
36
- sqs.send(mth).send(k).should == v
37
- end
38
- end
39
- ToCheck[:search_hit][mth].each do |k,v|
40
- if v.is_a? Float
41
- sqs.send(mth).search_results.first.search_hits.first.send(k).should be_close(v, 0.0000000001)
42
- else
43
- sqs.send(mth).search_results.first.search_hits.first.send(k).should == v
44
- end
45
- end
46
- end
47
- end
48
-
49
- it 'has pephits (which are descended from SearchHit)' do
50
- @obj.peps.size.should == 18
51
- [:hit_rank, :probability, :fval, :ntt, :nmc, :massd].each do |guy|
52
- @obj.peps.first.should respond_to(guy)
53
- end
54
-
55
- [:first, :last].each do |mth|
56
- ToCheck[:search_hit][mth].each do |k,v|
57
- if v.is_a? Float
58
- @obj.peps.send(mth).send(k).should be_close(v, 0.0000000001)
59
- else
60
- @obj.peps.send(mth).send(k).should == v
61
- end
62
- end
63
- end
64
-
65
- end
66
-
67
- end
68
-
69
- describe Proph::PepSummary, 'reading a large .xml file' do
70
- spec_large do
71
- before(:all) do
72
- file = Tfiles_l + '/opd1_2runs_2mods/prophet/interact-opd1_mods.xml'
73
- @obj = Proph::PepSummary.new(file)
74
- end
75
-
76
- it 'has peps of class Proph::PepSummary::Pep' do
77
- @obj.peps.first.class.to_s.should == 'Proph::PepSummary::Pep'
78
- @obj.peps.size.should == 1643
79
- end
80
-
81
- it 'contains peps that respond_to :aaseq' do
82
- @obj.peps.first.should respond_to(:aaseq)
83
- end
84
-
85
- it 'has prots (also callable from peps)' do
86
- (@obj.prots.size > 0).should be_true
87
- @obj.peps.all? {|v| v.prots.size > 0 }.should be_true
88
- peps_with_prots = @obj.peps.select {|v| v.prots.size > 1 }
89
- # frozen:
90
- peps_with_prots.first.prots.size.should == 3
91
- peps_with_prots.first.prots.first.name.should == "gi|16128676|ref|NP_415229.1|"
92
- peps_with_prots.first.prots.first.protein_descr.should == "RhsC protein in RhsC element [Escherichia coli K12]"
93
- peps_with_prots.first.prots.first.reference.should == "gi|16128676|ref|NP_415229.1| RhsC protein in RhsC element [Escherichia coli K12]"
94
- peps_with_prots.first.prots.last.protein_descr.should == "RhsA protein in RhsA element [Escherichia coli K12]"
95
- end
96
- end
97
- end
98
-
@@ -1,128 +0,0 @@
1
- require File.expand_path( File.dirname(__FILE__) + '/../../spec_helper' )
2
-
3
- require 'spec_id/proph/prot_summary'
4
-
5
- describe Proph::ProtSummary, "reading a -prot.xml file" do
6
- before(:each) do
7
- file = Tfiles + '/opd1/000_020_3prots-prot.xml'
8
- @obj = Proph::ProtSummary.new(file)
9
- end
10
-
11
- it 'extracts protein groups with probabilities' do
12
- @obj.prot_groups.size.should == 3
13
- @obj.prot_groups.first.probability.should == 1.0
14
- @obj.prot_groups[2].probability == 0.98
15
- end
16
-
17
- it 'extracts protein hit attributes' do
18
- prot = @obj.prot_groups[1].prots.first
19
- %w(protein_name n_indistinguishable_proteins probability percent_coverage unique_stripped_peptides group_sibling_id total_number_peptides pct_spectrum_ids).zip(["gi|16132019|ref|NP_418618.1|", 1, 1.0, 13.0, "FRDGLK+AIQFAQDVGIRVIQLAGYDVYYQEANNETRR".split('+'), "a", 2, 0.41]) do |name, val|
20
- prot.send(name).should == val
21
- end
22
- end
23
-
24
- it 'can detect -prot.xml version' do
25
- answer = ['1.9', '4']
26
- files = ['/yeast_gly_small-prot.xml', '/interact-opd1_mods_small-prot.xml'].map {|v| Tfiles + v}
27
- files.zip(answer) do |file,answ|
28
- Proph::ProtSummary.new.get_version(file).should == answ
29
- end
30
- end
31
-
32
- it 'has prots, peps, and prot_groups ' do
33
- @obj.peps.should_not be_nil
34
- @obj.prots.should_not be_nil
35
- @obj.prot_groups.should_not be_nil
36
- end
37
-
38
- end
39
-
40
- ####################################################
41
- # OTHER TESTS NOT IMPLEMENTED (do we need these??)
42
- ####################################################
43
-
44
- =begin
45
-
46
- require 'test/unit'
47
- require 'spec_id'
48
- require 'ms/scan'
49
-
50
- class ProphTest < Test::Unit::TestCase
51
-
52
- def initialize(arg)
53
- super(arg)
54
- @tfiles = File.dirname(__FILE__) + '/tfiles/'
55
- @pepproph_xml = @tfiles + 'pepproph_small.xml'
56
- end
57
-
58
- def Xtest_filter_by_min_pep_prob
59
- obj = Proph::Pep::Parser.new
60
- new_file = "tfiles/tmp.xml"
61
- assert_match(/peptideprophet_result probability="0.[0-5]/, IO.read(@pepproph_xml))
62
- obj.filter_by_min_pep_prob(@pepproph_xml, new_file, 0.50)
63
- assert_no_match(/peptideprophet_result probability="0.[0-5]/, IO.read(new_file))
64
- assert_match(/<peptideprophet_result[^>]*probability="0.[6-9][^>]*>/, IO.read(new_file))
65
- File.unlink new_file
66
- end
67
-
68
- def Xtest_uniq_by_seqcharge
69
- cls = Proph::Pep
70
- p1 = cls.new({ :charge => '2', :sequence => 'PEPTIDE' })
71
- p2 = cls.new({ :charge => '3', :sequence => 'PEPTIDE' })
72
- p3 = cls.new({ :charge => '2', :sequence => 'PEPTIDE' })
73
- p4 = cls.new({ :charge => '2', :sequence => 'APEPTIDE' })
74
- p5 = cls.new({ :charge => '2', :sequence => 'APEPTIDE' })
75
- un_peps = cls.uniq_by_seqcharge([p1,p2,p3,p4,p5])
76
- ## WHY ISn't that working? below!
77
- ##assert_equal([p1,p2,p4].to_set, un_peps.to_set)
78
- assert(equal_sets([p1,p2,p4], un_peps))
79
- end
80
-
81
- def Xequal_sets(arr1, arr2)
82
- c1 = arr1.dup
83
- c2 = arr2.dup
84
- arr1.each do |c|
85
- arr2.each do |d|
86
- if c == d
87
- c1.delete c
88
- c2.delete d
89
- end
90
- end
91
- end
92
- if (c1.size == c2.size) && (c1.size == 0)
93
- true
94
- else
95
- false
96
- end
97
- end
98
-
99
- def Xtest_arithmetic_avg_scan_by_parent_time
100
- i1 = 100015.0
101
- i2 = 30000.0
102
- i3 = 100.0
103
- t1 = 0.13
104
- t2 = 0.23
105
- t3 = 0.33
106
- p1 = MS::Scan.new(1,1, t1)
107
- p2 = MS::Scan.new(2,1, t2)
108
- p3 = MS::Scan.new(3,1, t3)
109
- s1 = MS::Scan.new(1,2,0.10, 300.2, i1, p1)
110
- s2 = MS::Scan.new(2,2,0.20, 301.1, i2, p2)
111
- s3 = MS::Scan.new(3,2,0.30, 302.0, i3, p3)
112
- scan = Proph::Pep.new({:scans => [s1,s2,s3]}).arithmetic_avg_scan_by_parent_time
113
- tot_inten = i1 + i2 + i3
114
- tm = ( t1 * (i1/tot_inten) + t2 * (i2/tot_inten) + t3 * (i3/tot_inten) )
115
- {:ms_level => 2, :prec_inten => 130115.0/3, :num => nil, :prec_mz => 301.1.to_f, :time => tm }.each do |k,v|
116
- if k == :prec_mz # not sure why this is bugging out, but..
117
- assert_equal(v.to_s, scan.send(k).to_s)
118
- else
119
- assert_equal(v, scan.send(k))
120
- end
121
- end
122
-
123
- end
124
-
125
-
126
- end
127
-
128
- =end