mspire 0.2.4 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (233) hide show
  1. data/INSTALL +1 -0
  2. data/README +25 -0
  3. data/Rakefile +129 -40
  4. data/bin/{find_aa_freq.rb → aafreqs.rb} +2 -2
  5. data/bin/bioworks_to_pepxml.rb +1 -0
  6. data/bin/fasta_shaker.rb +1 -96
  7. data/bin/filter_and_validate.rb +5 -0
  8. data/bin/{mzxml_to_lmat.rb → ms_to_lmat.rb} +8 -7
  9. data/bin/prob_validate.rb +6 -0
  10. data/bin/raw_to_mzXML.rb +2 -2
  11. data/bin/srf_group.rb +1 -0
  12. data/bin/srf_to_sqt.rb +40 -0
  13. data/changelog.txt +68 -0
  14. data/lib/align/chams.rb +6 -6
  15. data/lib/align.rb +4 -3
  16. data/lib/bsearch.rb +120 -0
  17. data/lib/fasta.rb +318 -86
  18. data/lib/group_by.rb +10 -0
  19. data/lib/index_by.rb +11 -0
  20. data/lib/merge_deep.rb +21 -0
  21. data/lib/{spec → ms/converter}/mzxml.rb +77 -109
  22. data/lib/ms/gradient_program.rb +171 -0
  23. data/lib/ms/msrun.rb +209 -0
  24. data/lib/{spec/msrun.rb → ms/msrun_index.rb} +7 -40
  25. data/lib/ms/parser/mzdata/axml.rb +12 -0
  26. data/lib/ms/parser/mzdata/dom.rb +160 -0
  27. data/lib/ms/parser/mzdata/libxml.rb +7 -0
  28. data/lib/ms/parser/mzdata.rb +25 -0
  29. data/lib/ms/parser/mzxml/axml.rb +11 -0
  30. data/lib/ms/parser/mzxml/dom.rb +159 -0
  31. data/lib/ms/parser/mzxml/hpricot.rb +253 -0
  32. data/lib/ms/parser/mzxml/libxml.rb +15 -0
  33. data/lib/ms/parser/mzxml/regexp.rb +122 -0
  34. data/lib/ms/parser/mzxml/rexml.rb +72 -0
  35. data/lib/ms/parser/mzxml/xmlparser.rb +248 -0
  36. data/lib/ms/parser/mzxml.rb +175 -0
  37. data/lib/ms/parser.rb +108 -0
  38. data/lib/ms/precursor.rb +10 -0
  39. data/lib/ms/scan.rb +81 -0
  40. data/lib/ms/spectrum.rb +193 -0
  41. data/lib/ms.rb +10 -0
  42. data/lib/mspire.rb +4 -0
  43. data/lib/roc.rb +61 -1
  44. data/lib/sample_enzyme.rb +31 -8
  45. data/lib/scan_i.rb +21 -0
  46. data/lib/spec_id/aa_freqs.rb +7 -3
  47. data/lib/spec_id/bioworks.rb +20 -14
  48. data/lib/spec_id/digestor.rb +139 -0
  49. data/lib/spec_id/mass.rb +116 -0
  50. data/lib/spec_id/parser/proph.rb +236 -0
  51. data/lib/spec_id/precision/filter/cmdline.rb +209 -0
  52. data/lib/spec_id/precision/filter/interactive.rb +134 -0
  53. data/lib/spec_id/precision/filter/output.rb +147 -0
  54. data/lib/spec_id/precision/filter.rb +623 -0
  55. data/lib/spec_id/precision/output.rb +60 -0
  56. data/lib/spec_id/precision/prob/cmdline.rb +139 -0
  57. data/lib/spec_id/precision/prob/output.rb +88 -0
  58. data/lib/spec_id/precision/prob.rb +171 -0
  59. data/lib/spec_id/proph/pep_summary.rb +92 -0
  60. data/lib/spec_id/proph/prot_summary.rb +484 -0
  61. data/lib/spec_id/proph.rb +2 -466
  62. data/lib/spec_id/protein_summary.rb +2 -2
  63. data/lib/spec_id/sequest/params.rb +316 -0
  64. data/lib/spec_id/sequest/pepxml.rb +1513 -0
  65. data/lib/spec_id/sequest.rb +2 -1672
  66. data/lib/spec_id/srf.rb +445 -177
  67. data/lib/spec_id.rb +183 -95
  68. data/lib/spec_id_xml.rb +8 -10
  69. data/lib/transmem/phobius.rb +147 -0
  70. data/lib/transmem/toppred.rb +368 -0
  71. data/lib/transmem.rb +157 -0
  72. data/lib/validator/aa.rb +135 -0
  73. data/lib/validator/background.rb +73 -0
  74. data/lib/validator/bias.rb +95 -0
  75. data/lib/validator/cmdline.rb +260 -0
  76. data/lib/validator/decoy.rb +94 -0
  77. data/lib/validator/digestion_based.rb +69 -0
  78. data/lib/validator/probability.rb +48 -0
  79. data/lib/validator/prot_from_pep.rb +234 -0
  80. data/lib/validator/transmem.rb +272 -0
  81. data/lib/validator/true_pos.rb +46 -0
  82. data/lib/validator.rb +214 -0
  83. data/lib/xml.rb +38 -0
  84. data/lib/xml_style_parser.rb +105 -0
  85. data/lib/xmlparser_wrapper.rb +19 -0
  86. data/script/compile_and_plot_smriti_final.rb +97 -0
  87. data/script/extract_gradient_programs.rb +56 -0
  88. data/script/get_apex_values_rexml.rb +44 -0
  89. data/script/mzXML2timeIndex.rb +1 -1
  90. data/script/smriti_final_analysis.rb +103 -0
  91. data/script/toppred_to_yaml.rb +47 -0
  92. data/script/tpp_installer.rb +1 -1
  93. data/{test/tc_align.rb → specs/align_spec.rb} +21 -27
  94. data/{test/tc_bioworks_to_pepxml.rb → specs/bin/bioworks_to_pepxml_spec.rb} +25 -41
  95. data/specs/bin/fasta_shaker_spec.rb +259 -0
  96. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +202 -0
  97. data/specs/bin/filter_and_validate_spec.rb +124 -0
  98. data/specs/bin/ms_to_lmat_spec.rb +34 -0
  99. data/specs/bin/prob_validate_spec.rb +62 -0
  100. data/specs/bin/protein_summary_spec.rb +10 -0
  101. data/{test/tc_fasta.rb → specs/fasta_spec.rb} +354 -310
  102. data/specs/gi_spec.rb +22 -0
  103. data/specs/load_bin_path.rb +7 -0
  104. data/specs/merge_deep_spec.rb +13 -0
  105. data/specs/ms/gradient_program_spec.rb +77 -0
  106. data/specs/ms/msrun_spec.rb +455 -0
  107. data/specs/ms/parser_spec.rb +92 -0
  108. data/specs/ms/spectrum_spec.rb +89 -0
  109. data/specs/roc_spec.rb +251 -0
  110. data/specs/rspec_autotest.rb +149 -0
  111. data/specs/sample_enzyme_spec.rb +41 -0
  112. data/specs/spec_helper.rb +133 -0
  113. data/specs/spec_id/aa_freqs_spec.rb +52 -0
  114. data/{test/tc_bioworks.rb → specs/spec_id/bioworks_spec.rb} +56 -71
  115. data/specs/spec_id/digestor_spec.rb +75 -0
  116. data/specs/spec_id/precision/filter/cmdline_spec.rb +20 -0
  117. data/specs/spec_id/precision/filter/output_spec.rb +31 -0
  118. data/specs/spec_id/precision/filter_spec.rb +243 -0
  119. data/specs/spec_id/precision/prob_spec.rb +111 -0
  120. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  121. data/specs/spec_id/proph/pep_summary_spec.rb +143 -0
  122. data/{test/tc_proph.rb → specs/spec_id/proph/prot_summary_spec.rb} +52 -32
  123. data/{test/tc_protein_summary.rb → specs/spec_id/protein_summary_spec.rb} +85 -0
  124. data/specs/spec_id/sequest/params_spec.rb +68 -0
  125. data/specs/spec_id/sequest/pepxml_spec.rb +452 -0
  126. data/specs/spec_id/sqt_spec.rb +138 -0
  127. data/specs/spec_id/srf_spec.rb +209 -0
  128. data/specs/spec_id/srf_spec_helper.rb +302 -0
  129. data/specs/spec_id_helper.rb +33 -0
  130. data/specs/spec_id_spec.rb +361 -0
  131. data/specs/spec_id_xml_spec.rb +33 -0
  132. data/specs/transmem/phobius_spec.rb +423 -0
  133. data/specs/transmem/toppred_spec.rb +297 -0
  134. data/specs/transmem_spec.rb +60 -0
  135. data/specs/transmem_spec_shared.rb +64 -0
  136. data/specs/validator/aa_spec.rb +107 -0
  137. data/specs/validator/background_spec.rb +51 -0
  138. data/specs/validator/bias_spec.rb +146 -0
  139. data/specs/validator/decoy_spec.rb +51 -0
  140. data/specs/validator/fasta_helper.rb +26 -0
  141. data/specs/validator/prot_from_pep_spec.rb +141 -0
  142. data/specs/validator/transmem_spec.rb +145 -0
  143. data/specs/validator/true_pos_spec.rb +58 -0
  144. data/specs/validator_helper.rb +33 -0
  145. data/specs/xml_spec.rb +12 -0
  146. data/test_files/000_pepxml18_small.xml +206 -0
  147. data/test_files/020a.mzXML.timeIndex +4710 -0
  148. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +3973 -0
  149. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +3872 -0
  150. data/test_files/4-03-03_small-prot.xml +321 -0
  151. data/test_files/4-03-03_small.xml +3876 -0
  152. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  153. data/test_files/bioworks-3.3_10prots.xml +5999 -0
  154. data/test_files/bioworks31.params +77 -0
  155. data/test_files/bioworks32.params +62 -0
  156. data/test_files/bioworks33.params +63 -0
  157. data/test_files/bioworks_single_run_small.xml +7237 -0
  158. data/test_files/bioworks_small.fasta +212 -0
  159. data/test_files/bioworks_small.params +63 -0
  160. data/test_files/bioworks_small.phobius +109 -0
  161. data/test_files/bioworks_small.toppred.out +2847 -0
  162. data/test_files/bioworks_small.xml +5610 -0
  163. data/test_files/bioworks_with_INV_small.xml +3753 -0
  164. data/test_files/bioworks_with_SHUFF_small.xml +2503 -0
  165. data/test_files/corrupted_900.srf +0 -0
  166. data/test_files/head_of_7MIX.srf +0 -0
  167. data/test_files/interact-opd1_mods_small-prot.xml +304 -0
  168. data/test_files/messups.fasta +297 -0
  169. data/test_files/opd1/000.my_answer.100lines.xml +101 -0
  170. data/test_files/opd1/000.tpp_1.2.3.first10.xml +115 -0
  171. data/test_files/opd1/000.tpp_2.9.2.first10.xml +126 -0
  172. data/test_files/opd1/000.v2.1.mzXML.timeIndex +3748 -0
  173. data/test_files/opd1/000_020-prot.png +0 -0
  174. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +62 -0
  175. data/test_files/opd1/000_020_3prots-prot.xml +62 -0
  176. data/test_files/opd1/opd1_cat_inv_small-prot.xml +139 -0
  177. data/test_files/opd1/sequest.3.1.params +77 -0
  178. data/test_files/opd1/sequest.3.2.params +62 -0
  179. data/test_files/opd1/twenty_scans.mzXML +418 -0
  180. data/test_files/opd1/twenty_scans.v2.1.mzXML +382 -0
  181. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  182. data/test_files/opd1/twenty_scans_answ.lmata +9 -0
  183. data/test_files/opd1_020_beginning.RAW +0 -0
  184. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +753 -0
  185. data/test_files/orbitrap_mzData/000_cut.xml +1920 -0
  186. data/test_files/pepproph_small.xml +4691 -0
  187. data/test_files/phobius.small.noheader.txt +50 -0
  188. data/test_files/phobius.small.small.txt +53 -0
  189. data/test_files/s01_anC1_ld020mM.key.txt +25 -0
  190. data/test_files/s01_anC1_ld020mM.meth +0 -0
  191. data/test_files/small.fasta +297 -0
  192. data/test_files/smallraw.RAW +0 -0
  193. data/test_files/tf_bioworks2excel.bioXML +14340 -0
  194. data/test_files/tf_bioworks2excel.txt.actual +1035 -0
  195. data/test_files/toppred.small.out +416 -0
  196. data/test_files/toppred.xml.out +318 -0
  197. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +7 -0
  198. data/test_files/validator_hits_separate/bioworks_small_HS.xml +5651 -0
  199. data/test_files/yeast_gly_small-prot.xml +265 -0
  200. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +6 -0
  201. data/test_files/yeast_gly_small.xml +3807 -0
  202. data/test_files/yeast_gly_small2.parentTimes +6 -0
  203. metadata +273 -57
  204. data/bin/filter.rb +0 -6
  205. data/bin/precision.rb +0 -5
  206. data/lib/spec/mzdata/parser.rb +0 -108
  207. data/lib/spec/mzdata.rb +0 -48
  208. data/lib/spec/mzxml/parser.rb +0 -449
  209. data/lib/spec/scan.rb +0 -55
  210. data/lib/spec_id/filter.rb +0 -797
  211. data/lib/spec_id/precision.rb +0 -421
  212. data/lib/toppred.rb +0 -18
  213. data/script/filter-peps.rb +0 -164
  214. data/test/tc_aa_freqs.rb +0 -59
  215. data/test/tc_fasta_shaker.rb +0 -149
  216. data/test/tc_filter.rb +0 -203
  217. data/test/tc_filter_peps.rb +0 -46
  218. data/test/tc_gi.rb +0 -17
  219. data/test/tc_id_class_anal.rb +0 -70
  220. data/test/tc_id_precision.rb +0 -89
  221. data/test/tc_msrun.rb +0 -88
  222. data/test/tc_mzxml.rb +0 -88
  223. data/test/tc_mzxml_to_lmat.rb +0 -36
  224. data/test/tc_peptide_parent_times.rb +0 -27
  225. data/test/tc_precision.rb +0 -60
  226. data/test/tc_roc.rb +0 -166
  227. data/test/tc_sample_enzyme.rb +0 -32
  228. data/test/tc_scan.rb +0 -26
  229. data/test/tc_sequest.rb +0 -336
  230. data/test/tc_spec.rb +0 -78
  231. data/test/tc_spec_id.rb +0 -201
  232. data/test/tc_spec_id_xml.rb +0 -36
  233. data/test/tc_srf.rb +0 -262
@@ -0,0 +1,111 @@
1
+ require File.expand_path( File.dirname(__FILE__) + '/../../spec_helper' )
2
+
3
+ require 'spec_id/precision/prob'
4
+ require 'spec_id'
5
+ require 'spec_id/proph'
6
+ require 'validator'
7
+ require 'fasta'
8
+ require 'spec_id/sequest/params'
9
+
10
+
11
+ describe 'finding precision Proph::Prot::Pep objects' do
12
+ before(:each) do
13
+ @spec_id = GenericSpecID.new
14
+ # actual sort order: 3, 0, 4, 1, 2
15
+ peps = [
16
+ # 0: canonical
17
+ {:peptide_sequence => '0', :initial_probability => 0.63, :nsp_adjusted_probability => 0.62, :weight => 1.0, :is_nondegenerate_evidence => true, :n_enzymatic_termini => 2, :n_sibling_peptides => 0.0, :n_instances => 1, :is_contributing_evidence => true},
18
+ # 1: lower init prob
19
+ {:peptide_sequence => '1', :initial_probability => 0.60, :nsp_adjusted_probability => 0.62, :weight => 1.0, :is_nondegenerate_evidence => true, :n_enzymatic_termini => 2, :n_sibling_peptides => 0.0, :n_instances => 1, :is_contributing_evidence => true},
20
+ # 2: lower nsp prob
21
+ {:peptide_sequence => '2', :initial_probability => 0.63, :nsp_adjusted_probability => 0.52, :weight => 1.0, :is_nondegenerate_evidence => true, :n_enzymatic_termini => 2, :n_sibling_peptides => 0.0, :n_instances => 1, :is_contributing_evidence => true},
22
+ # extra instances! (best hit)
23
+ {:peptide_sequence => '3', :initial_probability => 0.63, :nsp_adjusted_probability => 0.62, :weight => 1.0, :is_nondegenerate_evidence => true, :n_enzymatic_termini => 2, :n_sibling_peptides => 0.0, :n_instances => 5, :is_contributing_evidence => true},
24
+ # is nondegen = false
25
+ {:peptide_sequence => '4', :initial_probability => 0.63, :nsp_adjusted_probability => 0.62, :weight => 1.0, :is_nondegenerate_evidence => false, :n_enzymatic_termini => 2, :n_sibling_peptides => 0.0, :n_instances => 1, :is_contributing_evidence => true},].map {|v| Proph::Prot::Pep.new(v) }
26
+ @spec_id.peps = peps
27
+ end
28
+
29
+ it 'runs without any validator' do
30
+ answer = SpecID::Precision::Prob.new.precision_vs_num_hits(@spec_id)
31
+ answer.keys.map {|v| v.to_s }.sort.should == ["aaseqs", "charges", "count", "params", "pephits_precision", "probabilities"]
32
+ answer[:aaseqs].should == %w(3 0 4 1 2)
33
+ end
34
+
35
+ it 'returns modified peptides if any modified peptides' do
36
+ @spec_id.peps[1].mod_info = Sequest::PepXML::SearchHit::ModificationInfo.new(['MODIFIED', []])
37
+ answer = SpecID::Precision::Prob.new.precision_vs_num_hits(@spec_id)
38
+ answer.keys.map {|v| v.to_s }.sort.should == ["aaseqs", "charges", "count", "modified_peptides", "params", "pephits_precision", "probabilities"]
39
+ end
40
+
41
+ end
42
+
43
+
44
+ =begin
45
+ it 'gets precision with all validators (including probability and decoy)' do
46
+ ## create some decoy peptides!
47
+ @spec_id.peps.sort_by {|pep| pep.probability }[100..-1].each_with_index do |pep,i|
48
+ if i % 3 == 0
49
+ pep.prots.each {|prot| prot.protein_name = 'DECOY_' + prot.protein_name }
50
+ end
51
+ end
52
+ # check which ones are ACTUALLY normal and decoy
53
+ (decoy, normal) = @spec_id.peps.partition do |pep|
54
+ pep.prots.all? {|prot| prot.protein_name =~ /^DECOY_/}
55
+ end
56
+ num_decoy = decoy.size
57
+ num_normal = normal.size
58
+ end
59
+
60
+ prob_spec_helper = File.expand_path( File.dirname(__FILE__) + '/prob_spec_helper' )
61
+
62
+ # this does a minimal test to see if this functions properly
63
+ # (not for accuracy, which is done in validator_spec)
64
+ ## WITH FASTA FILE:
65
+ base_dir = Tfiles_l + '/opd1_2runs_2mods/sequest'
66
+ fasta_file = base_dir + '/ecoli_K12_ncbi_20060321.fasta'
67
+ params_file = base_dir + '/ecoli.params'
68
+ bias_file = base_dir + '/ecoli_K12_ncbi_20060321.bias.fasta'
69
+ toppred_file = base_dir + '/ecoli_K12_ncbi_20060321.toppred.xml'
70
+
71
+ fasta_file.should exist
72
+
73
+ prob = Validator::Probability.new
74
+ badaa_freq = Validator::AA.new('C', :frequency => 0.0115866200193321)
75
+ badaa_dig = Validator::AA.new('C')
76
+ bias = Validator::Bias.new(Fasta.new(bias_file))
77
+ transmem = Validator::Transmem::Protein.new(toppred_file)
78
+ decoy = Validator::Decoy.new(/^DECOY_/)
79
+
80
+ turn_on_digestion = true
81
+ if turn_on_digestion
82
+ # digestion based validators need this set!
83
+ digested_peps = Digestor.digest(Fasta.new(fasta_file), Sequest::Params.new(params_file))
84
+ [badaa_dig, bias, transmem].each do |val|
85
+ val.set_false_to_total_ratio(digested_peps)
86
+ end
87
+ end
88
+ transmem.transmem_status_hash = transmem.create_transmem_status_hash(@spec_id.peps)
89
+ val_list = [decoy, badaa_freq, badaa_dig, prob, bias, transmem]
90
+
91
+ opts = { :validators => val_list }
92
+
93
+
94
+ hash = SpecID::Precision::Prob.new.precision_vs_num_hits(@spec_id, opts)
95
+ #puts "OUTPUT: "
96
+ #puts hash.to_yaml
97
+
98
+
99
+ # frozen
100
+ e_hash = ProbMSHelper::Answer2
101
+ # hash[:pephits_precision].size.should == e_hash[:pephits_precision].size
102
+ # other data types are tested above, just testing validators
103
+ hash[:pephits_precision].zip( e_hash[:pephits_precision] ) do |val_hash, val_hash_e|
104
+ val_hash[:values].size.should == num_normal
105
+ #val_hash[:validator].should == val_hash_e[:validator]
106
+ val_hash[:values].zip(val_hash_e[:values]) {|v,e| v.should be_close(e, 0.000000001)}
107
+ end
108
+ end
109
+ end
110
+
111
+ =end
File without changes
@@ -0,0 +1,143 @@
1
+ require File.expand_path( File.dirname(__FILE__) + '/../../spec_helper' )
2
+
3
+ require 'spec_id/proph/pep_summary'
4
+
5
+ ToCheck = {
6
+ :spectrum_query => {:first => {:spectrum => "020.42.42.3", :start_scan=>42, :end_scan=>42, :precursor_neutral_mass=>1015.77285654469, :assumed_charge=>3, :index=>1 },
7
+ :last => {:spectrum=>"020.344.344.3", :start_scan=>344, :end_scan=>344, :precursor_neutral_mass=>1447.6040333025, :assumed_charge=>3, :index=>18 },
8
+ },
9
+
10
+ :search_hit => {:first => {:hit_rank=>1, :peptide=>"GTGVSVTR", :peptide_prev_aa=>"R", :peptide_next_aa=>"S", :protein=>"gi|49176370|ref|YP_026228.1|", :num_tot_proteins=>1, :num_matched_ions=>10, :tot_num_ions=>70, :calc_neutral_pep_mass=>1015.79382542, :massdiff=>-0.0209688753124055, :num_tol_term=>2, :num_missed_cleavages=>0, :is_rejected=>0, :xcorr=>1.06543827056885, :deltacn => 0.192325830459595, :deltacnstar=>0, :spscore=>77.8397979736328, :sprank=>3, :probability=>0.07881571, :fval=>0.1592, :ntt=>2, :nmc=> 0, :massd=>-0.021},
11
+ :last => { :hit_rank=>1, :peptide=>"VAALRVPGGASLTR", :peptide_prev_aa=>"R", :peptide_next_aa=>"K", :protein=>"gi|16129819|ref|NP_416380.1|", :num_tot_proteins=>1, :num_matched_ions=>16, :tot_num_ions=>78, :calc_neutral_pep_mass=>1447.58289842, :massdiff=> 0.0211348825000641, :num_tol_term=>2, :num_missed_cleavages=>1, :is_rejected=>0, :xcorr=>1.3090912103653, :deltacn => 0.259967535734177, :deltacnstar => 0, :spscore => 118.513412475586, :sprank => 4, :probability=>0.27738378, :fval=>1.3810, :ntt=>2, :nmc=>1, :massd=>0.021 },
12
+ }
13
+ }
14
+
15
+
16
+ describe Proph::PepSummary, "reading a .xml file" do
17
+ before(:each) do
18
+ file = Tfiles + '/opd1_2runs_2mods/interact-opd1_mods__small.xml'
19
+ @obj = Proph::PepSummary.new(file)
20
+ end
21
+
22
+ it 'should raise an error if not a peptide prophet file' do
23
+ lambda { Proph::PepSummary.new(Tfiles + '/opd1/000.tpp_2.9.2.first10.xml')}.should raise_error(ArgumentError)
24
+ end
25
+
26
+ it 'has spectrum queries' do
27
+ @obj.spectrum_queries.size.should == 18
28
+
29
+ [:first, :last].each do |mth|
30
+ ToCheck[:spectrum_query][mth].each do |k,v|
31
+ @obj.spectrum_queries.send(mth).send(k).should == v
32
+ end
33
+ ToCheck[:search_hit][mth].each do |k,v|
34
+ @obj.spectrum_queries.send(mth).search_results.first.search_hits.first.send(k).should == v
35
+ end
36
+ end
37
+ end
38
+
39
+ it 'has pephits (which are descended from SearchHit)' do
40
+ @obj.peps.size.should == 18
41
+ [:hit_rank, :probability, :fval, :ntt, :nmc, :massd].each do |guy|
42
+ @obj.peps.first.should respond_to(guy)
43
+ end
44
+
45
+ [:first, :last].each do |mth|
46
+ ToCheck[:search_hit][mth].each do |k,v|
47
+ @obj.peps.send(mth).send(k).should == v
48
+ end
49
+ end
50
+
51
+ end
52
+
53
+ end
54
+
55
+ ####################################################
56
+ # OTHER TESTS NOT IMPLEMENTED (do we need these??)
57
+ ####################################################
58
+
59
+ =begin
60
+
61
+ require 'test/unit'
62
+ require 'spec_id'
63
+ require 'ms/scan'
64
+
65
+ class ProphTest < Test::Unit::TestCase
66
+
67
+ def initialize(arg)
68
+ super(arg)
69
+ @tfiles = File.dirname(__FILE__) + '/tfiles/'
70
+ @pepproph_xml = @tfiles + 'pepproph_small.xml'
71
+ end
72
+
73
+ def Xtest_filter_by_min_pep_prob
74
+ obj = Proph::Pep::Parser.new
75
+ new_file = "tfiles/tmp.xml"
76
+ assert_match(/peptideprophet_result probability="0.[0-5]/, IO.read(@pepproph_xml))
77
+ obj.filter_by_min_pep_prob(@pepproph_xml, new_file, 0.50)
78
+ assert_no_match(/peptideprophet_result probability="0.[0-5]/, IO.read(new_file))
79
+ assert_match(/<peptideprophet_result[^>]*probability="0.[6-9][^>]*>/, IO.read(new_file))
80
+ File.unlink new_file
81
+ end
82
+
83
+ def Xtest_uniq_by_seqcharge
84
+ cls = Proph::Pep
85
+ p1 = cls.new({ :charge => '2', :sequence => 'PEPTIDE' })
86
+ p2 = cls.new({ :charge => '3', :sequence => 'PEPTIDE' })
87
+ p3 = cls.new({ :charge => '2', :sequence => 'PEPTIDE' })
88
+ p4 = cls.new({ :charge => '2', :sequence => 'APEPTIDE' })
89
+ p5 = cls.new({ :charge => '2', :sequence => 'APEPTIDE' })
90
+ un_peps = cls.uniq_by_seqcharge([p1,p2,p3,p4,p5])
91
+ ## WHY ISn't that working? below!
92
+ ##assert_equal([p1,p2,p4].to_set, un_peps.to_set)
93
+ assert(equal_sets([p1,p2,p4], un_peps))
94
+ end
95
+
96
+ def Xequal_sets(arr1, arr2)
97
+ c1 = arr1.dup
98
+ c2 = arr2.dup
99
+ arr1.each do |c|
100
+ arr2.each do |d|
101
+ if c == d
102
+ c1.delete c
103
+ c2.delete d
104
+ end
105
+ end
106
+ end
107
+ if (c1.size == c2.size) && (c1.size == 0)
108
+ true
109
+ else
110
+ false
111
+ end
112
+ end
113
+
114
+ def Xtest_arithmetic_avg_scan_by_parent_time
115
+ i1 = 100015.0
116
+ i2 = 30000.0
117
+ i3 = 100.0
118
+ t1 = 0.13
119
+ t2 = 0.23
120
+ t3 = 0.33
121
+ p1 = MS::Scan.new(1,1, t1)
122
+ p2 = MS::Scan.new(2,1, t2)
123
+ p3 = MS::Scan.new(3,1, t3)
124
+ s1 = MS::Scan.new(1,2,0.10, 300.2, i1, p1)
125
+ s2 = MS::Scan.new(2,2,0.20, 301.1, i2, p2)
126
+ s3 = MS::Scan.new(3,2,0.30, 302.0, i3, p3)
127
+ scan = Proph::Pep.new({:scans => [s1,s2,s3]}).arithmetic_avg_scan_by_parent_time
128
+ tot_inten = i1 + i2 + i3
129
+ tm = ( t1 * (i1/tot_inten) + t2 * (i2/tot_inten) + t3 * (i3/tot_inten) )
130
+ {:ms_level => 2, :prec_inten => 130115.0/3, :num => nil, :prec_mz => 301.1.to_f, :time => tm }.each do |k,v|
131
+ if k == :prec_mz # not sure why this is bugging out, but..
132
+ assert_equal(v.to_s, scan.send(k).to_s)
133
+ else
134
+ assert_equal(v, scan.send(k))
135
+ end
136
+ end
137
+
138
+ end
139
+
140
+
141
+ end
142
+
143
+ =end
@@ -1,10 +1,51 @@
1
+ require File.expand_path( File.dirname(__FILE__) + '/../../spec_helper' )
1
2
 
3
+ require 'spec_id/proph/prot_summary'
2
4
 
3
- require 'test/unit'
4
- require 'spec_id'
5
- require 'spec/scan'
5
+ describe Proph::ProtSummary, "reading a -prot.xml file" do
6
+ before(:each) do
7
+ file = Tfiles + '/opd1/000_020_3prots-prot.xml'
8
+ @obj = Proph::ProtSummary.new(file)
9
+ end
10
+
11
+ it 'extracts protein groups with probabilities' do
12
+ @obj.prot_groups.size.should == 3
13
+ @obj.prot_groups.first.probability.should == 1.0
14
+ @obj.prot_groups[2].probability == 0.98
15
+ end
16
+
17
+ it 'extracts protein hit attributes' do
18
+ prot = @obj.prot_groups[1].prots.first
19
+ %w(protein_name n_indistinguishable_proteins probability percent_coverage unique_stripped_peptides group_sibling_id total_number_peptides pct_spectrum_ids).zip(["gi|16132019|ref|NP_418618.1|", 1, 1.0, 13.0, "FRDGLK+AIQFAQDVGIRVIQLAGYDVYYQEANNETRR".split('+'), "a", 2, 0.41]) do |name, val|
20
+ prot.send(name).should == val
21
+ end
22
+ end
23
+
24
+ it 'can detect -prot.xml version' do
25
+ answer = ['1.9', '4']
26
+ files = ['/yeast_gly_small-prot.xml', '/interact-opd1_mods_small-prot.xml'].map {|v| Tfiles + v}
27
+ files.zip(answer) do |file,answ|
28
+ Proph::ProtSummary.new.get_version(file).should == answ
29
+ end
30
+ end
31
+
32
+ it 'has prots, peps, and prot_groups ' do
33
+ @obj.peps.should_not be_nil
34
+ @obj.prots.should_not be_nil
35
+ @obj.prot_groups.should_not be_nil
36
+ end
6
37
 
38
+ end
39
+
40
+ ####################################################
41
+ # OTHER TESTS NOT IMPLEMENTED (do we need these??)
42
+ ####################################################
7
43
 
44
+ =begin
45
+
46
+ require 'test/unit'
47
+ require 'spec_id'
48
+ require 'ms/scan'
8
49
 
9
50
  class ProphTest < Test::Unit::TestCase
10
51
 
@@ -14,29 +55,6 @@ class ProphTest < Test::Unit::TestCase
14
55
  @pepproph_xml = @tfiles + 'pepproph_small.xml'
15
56
  end
16
57
 
17
- def test_parse_protxml_file
18
- file = @tfiles + 'opd1/000_020_3prots-prot.xml'
19
- #obj = Proph::ProtSummary.new
20
- obj = Proph::ProtSummary.new(file)
21
- assert_equal(3, obj.prot_groups.size)
22
- assert_equal("1.00", obj.prot_groups.first.probability)
23
- assert_equal("0.98", obj.prot_groups[2].probability)
24
- assert_equal_xml_atts_to_obj('protein_name="gi|16132019|ref|NP_418618.1|" n_indistinguishable_proteins="1" probability="1.00" percent_coverage="13.0" unique_stripped_peptides="FRDGLK+AIQFAQDVGIRVIQLAGYDVYYQEANNETRR" group_sibling_id="a" total_number_peptides="2" pct_spectrum_ids="0.41"', obj.prot_groups[1].prots.first)
25
- end
26
-
27
- def assert_equal_xml_atts_to_obj(string, obj, msg=nil)
28
- parts = string.split(/\s+/)
29
- parts.each do |part|
30
- pi = part.split('=')
31
- value = pi[1].sub(/^"/,'').sub(/"$/,'')
32
- if pi[0] == "probability"
33
- value = value.to_f
34
- end
35
- assert_equal(value, obj.send(pi[0].to_sym))
36
- end
37
- end
38
-
39
-
40
58
  def Xtest_filter_by_min_pep_prob
41
59
  obj = Proph::Pep::Parser.new
42
60
  new_file = "tfiles/tmp.xml"
@@ -85,12 +103,12 @@ class ProphTest < Test::Unit::TestCase
85
103
  t1 = 0.13
86
104
  t2 = 0.23
87
105
  t3 = 0.33
88
- p1 = Spec::Scan.new(1,1, t1)
89
- p2 = Spec::Scan.new(2,1, t2)
90
- p3 = Spec::Scan.new(3,1, t3)
91
- s1 = Spec::Scan.new(1,2,0.10, 300.2, i1, p1)
92
- s2 = Spec::Scan.new(2,2,0.20, 301.1, i2, p2)
93
- s3 = Spec::Scan.new(3,2,0.30, 302.0, i3, p3)
106
+ p1 = MS::Scan.new(1,1, t1)
107
+ p2 = MS::Scan.new(2,1, t2)
108
+ p3 = MS::Scan.new(3,1, t3)
109
+ s1 = MS::Scan.new(1,2,0.10, 300.2, i1, p1)
110
+ s2 = MS::Scan.new(2,2,0.20, 301.1, i2, p2)
111
+ s3 = MS::Scan.new(3,2,0.30, 302.0, i3, p3)
94
112
  scan = Proph::Pep.new({:scans => [s1,s2,s3]}).arithmetic_avg_scan_by_parent_time
95
113
  tot_inten = i1 + i2 + i3
96
114
  tm = ( t1 * (i1/tot_inten) + t2 * (i2/tot_inten) + t3 * (i3/tot_inten) )
@@ -106,3 +124,5 @@ class ProphTest < Test::Unit::TestCase
106
124
 
107
125
 
108
126
  end
127
+
128
+ =end
@@ -1,3 +1,87 @@
1
+ require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
2
+ require 'spec_id/protein_summary'
3
+
4
+ xdescribe ProteinSummary do
5
+
6
+ before(:all) do
7
+ @tf_proph = Tfiles_l + "/opd1/000_020-prot.xml"
8
+ @tf_summary = Tfiles_l + "/opd1/000_020-prot.summary.html"
9
+ @tf_bioworks_small = Tfiles + '/bioworks_small.xml'
10
+ @tf_bioworks_small_summary_html = Tfiles + '/bioworks_small.summary.html'
11
+ @tf_proph_cat_inv = Tfiles + '/opd1/opd1_cat_inv_small-prot.xml'
12
+ @tf_proph_cat_inv_summary_html = Tfiles + '/opd1/opd1_cat_inv_small-prot.summary.html'
13
+ @tf_proph_cat_inv_summary_png = Tfiles + '/opd1/opd1_cat_inv_small-prot.summary.png'
14
+ @tf_peptide_count = Tfiles + "/peptide_counts.tmp.txt"
15
+ @no_delete = false
16
+ end
17
+
18
+ spec_large do
19
+ it 'does basic summary on prophet file' do
20
+ runit "-c 5.0 #{@tf_proph}"
21
+ @tf_summary.should exist
22
+ string = IO.read(@tf_summary)
23
+ string.should =~ /gi\|16132176\|ref\|NP_418775\.1\|/
24
+ string.should =~ /16132176/
25
+ File.unlink(@tf_summary) unless @no_delete
26
+ end
27
+ end
28
+
29
+ it 'does basic summary on bioworks.xml file' do
30
+ runit "#{@tf_bioworks_small}"
31
+ @tf_bioworks_small_summary_html.should exist
32
+ File.unlink @tf_bioworks_small_summary_html unless @no_delete
33
+ # @TODO: need to freeze the output here
34
+ end
35
+
36
+
37
+ it 'calculates precision values with bioworks files' do
38
+ ## Could reimplement a separate file approach?
39
+ #reply = `#{@cmd} -f #{@tf_bioworks_small} #{@tf_bioworks_small} --precision`
40
+ runit "#{@tf_bioworks_small} --precision"
41
+ IO.read(@tf_bioworks_small_summary_html).should =~ /# hits.*106/m
42
+ # should add more tests here...
43
+ @tf_bioworks_small_summary_html.should exist
44
+ File.unlink @tf_bioworks_small_summary_html unless @no_delete
45
+ end
46
+
47
+ it 'calculates precision values with prophet files' do
48
+ runit "#{@tf_proph_cat_inv} -f INV_ --prefix --precision"
49
+ html = IO.read(@tf_proph_cat_inv_summary_html)
50
+ html.should =~ /# hits/
51
+ html.should =~ /2.*0\.0000/m
52
+ html.should =~ /3.*0\.3333/m
53
+ html.should =~ /7.*0\.5714/m
54
+
55
+ File.unlink @tf_proph_cat_inv_summary_html unless @no_delete
56
+ File.unlink @tf_proph_cat_inv_summary_png unless @no_delete
57
+ end
58
+
59
+ spec_large do
60
+ it 'gives correct peptide counts' do
61
+ runit "-c 5.0 #{@tf_proph} --peptide_count #{@tf_peptide_count}"
62
+ @tf_peptide_count.should exist
63
+ file = IO.read(@tf_peptide_count)
64
+ file.should include("gi|16132176|ref|NP_418775.1|\t2")
65
+ file.should include("gi|16131996|ref|NP_418595.1|\t1")
66
+ file.should include("gi|16131692|ref|NP_418288.1|\t4")
67
+ File.unlink @tf_peptide_count unless @no_delete
68
+ end
69
+ end
70
+
71
+ def runit(string_or_args)
72
+ args = if string_or_args.is_a? String
73
+ string_or_args.split(/\s+/)
74
+ else
75
+ string_or_args
76
+ end
77
+ ProteinSummary.new.create_from_command_line_args(args)
78
+ end
79
+
80
+
81
+ end
82
+
83
+
84
+ =begin
1
85
 
2
86
  require 'test/unit'
3
87
  require 'spec_id/protein_summary'
@@ -102,3 +186,4 @@ class ProphProtSummaryTest < Test::Unit::TestCase
102
186
 
103
187
  end
104
188
 
189
+ =end
@@ -0,0 +1,68 @@
1
+ require File.expand_path( File.dirname(__FILE__) + '/../../spec_helper' )
2
+ require 'spec_id/sequest/params'
3
+
4
+
5
+ describe "a sequest params object", :shared => true do
6
+ before(:each) do
7
+ @obj = Sequest::Params.new(@file)
8
+ end
9
+ it 'gives enzyme_specificity' do
10
+ ar = @obj.enzyme_specificity
11
+ ar.size.should == 3
12
+ ar.should == @enzyme_specificity
13
+ end
14
+ it 'returns static mods callable by key' do
15
+ @obj.add_Cterm_peptide.should == @add_Cterm_peptide
16
+ end
17
+ end
18
+
19
+
20
+ describe Sequest::Params, "with a bioworks 3.1 params" do
21
+ before(:all) do
22
+ @file = Tfiles + '/bioworks31.params'
23
+ @obj = Sequest::Params.new(@file)
24
+ @enzyme_specificity = [1, 'KR', '']
25
+ @add_Cterm_peptide = '0.0000'
26
+ end
27
+ it_should_behave_like 'a sequest params object'
28
+ end
29
+
30
+ describe Sequest::Params, "with a bioworks 3.2 params" do
31
+ before(:all) do
32
+ @file = Tfiles + '/bioworks32.params'
33
+ @obj = Sequest::Params.new(@file)
34
+ @enzyme_specificity = [1, 'KR', 'P']
35
+ @add_Cterm_peptide = '0.0000'
36
+ end
37
+ it_should_behave_like 'a sequest params object'
38
+ end
39
+
40
+ describe Sequest::Params, "with a bioworks 3.3 params" do
41
+ before(:all) do
42
+ @file = Tfiles + '/bioworks33.params'
43
+ @obj = Sequest::Params.new(@file)
44
+ @enzyme_specificity = [1, 'KR', '']
45
+ @add_Cterm_peptide = '0.0000'
46
+ end
47
+ it_should_behave_like 'a sequest params object'
48
+ end
49
+
50
+ describe Sequest::Params, "given a bioworks 3.2 params (from .srf file)" do
51
+ before(:all) do
52
+ @file = Tfiles + '/7MIX_STD_110802_1.sequest_params_fragment.srf'
53
+ @obj = Sequest::Params.new(@file)
54
+ @enzyme_specificity = [1, 'KR', 'P']
55
+ @add_Cterm_peptide = '0.0000'
56
+ end
57
+ it_should_behave_like 'a sequest params object'
58
+ end
59
+
60
+
61
+ describe Sequest::Params do
62
+ it '(private) can give a system independent basename' do
63
+ Sequest::Params.new._sys_ind_basename("C:\\Xcalibur\\database\\hello.fasta").should == "hello.fasta"
64
+ Sequest::Params.new._sys_ind_basename("/work/john/hello.fasta").should == "hello.fasta"
65
+ end
66
+
67
+ end
68
+