mspire 0.2.4 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (233) hide show
  1. data/INSTALL +1 -0
  2. data/README +25 -0
  3. data/Rakefile +129 -40
  4. data/bin/{find_aa_freq.rb → aafreqs.rb} +2 -2
  5. data/bin/bioworks_to_pepxml.rb +1 -0
  6. data/bin/fasta_shaker.rb +1 -96
  7. data/bin/filter_and_validate.rb +5 -0
  8. data/bin/{mzxml_to_lmat.rb → ms_to_lmat.rb} +8 -7
  9. data/bin/prob_validate.rb +6 -0
  10. data/bin/raw_to_mzXML.rb +2 -2
  11. data/bin/srf_group.rb +1 -0
  12. data/bin/srf_to_sqt.rb +40 -0
  13. data/changelog.txt +68 -0
  14. data/lib/align/chams.rb +6 -6
  15. data/lib/align.rb +4 -3
  16. data/lib/bsearch.rb +120 -0
  17. data/lib/fasta.rb +318 -86
  18. data/lib/group_by.rb +10 -0
  19. data/lib/index_by.rb +11 -0
  20. data/lib/merge_deep.rb +21 -0
  21. data/lib/{spec → ms/converter}/mzxml.rb +77 -109
  22. data/lib/ms/gradient_program.rb +171 -0
  23. data/lib/ms/msrun.rb +209 -0
  24. data/lib/{spec/msrun.rb → ms/msrun_index.rb} +7 -40
  25. data/lib/ms/parser/mzdata/axml.rb +12 -0
  26. data/lib/ms/parser/mzdata/dom.rb +160 -0
  27. data/lib/ms/parser/mzdata/libxml.rb +7 -0
  28. data/lib/ms/parser/mzdata.rb +25 -0
  29. data/lib/ms/parser/mzxml/axml.rb +11 -0
  30. data/lib/ms/parser/mzxml/dom.rb +159 -0
  31. data/lib/ms/parser/mzxml/hpricot.rb +253 -0
  32. data/lib/ms/parser/mzxml/libxml.rb +15 -0
  33. data/lib/ms/parser/mzxml/regexp.rb +122 -0
  34. data/lib/ms/parser/mzxml/rexml.rb +72 -0
  35. data/lib/ms/parser/mzxml/xmlparser.rb +248 -0
  36. data/lib/ms/parser/mzxml.rb +175 -0
  37. data/lib/ms/parser.rb +108 -0
  38. data/lib/ms/precursor.rb +10 -0
  39. data/lib/ms/scan.rb +81 -0
  40. data/lib/ms/spectrum.rb +193 -0
  41. data/lib/ms.rb +10 -0
  42. data/lib/mspire.rb +4 -0
  43. data/lib/roc.rb +61 -1
  44. data/lib/sample_enzyme.rb +31 -8
  45. data/lib/scan_i.rb +21 -0
  46. data/lib/spec_id/aa_freqs.rb +7 -3
  47. data/lib/spec_id/bioworks.rb +20 -14
  48. data/lib/spec_id/digestor.rb +139 -0
  49. data/lib/spec_id/mass.rb +116 -0
  50. data/lib/spec_id/parser/proph.rb +236 -0
  51. data/lib/spec_id/precision/filter/cmdline.rb +209 -0
  52. data/lib/spec_id/precision/filter/interactive.rb +134 -0
  53. data/lib/spec_id/precision/filter/output.rb +147 -0
  54. data/lib/spec_id/precision/filter.rb +623 -0
  55. data/lib/spec_id/precision/output.rb +60 -0
  56. data/lib/spec_id/precision/prob/cmdline.rb +139 -0
  57. data/lib/spec_id/precision/prob/output.rb +88 -0
  58. data/lib/spec_id/precision/prob.rb +171 -0
  59. data/lib/spec_id/proph/pep_summary.rb +92 -0
  60. data/lib/spec_id/proph/prot_summary.rb +484 -0
  61. data/lib/spec_id/proph.rb +2 -466
  62. data/lib/spec_id/protein_summary.rb +2 -2
  63. data/lib/spec_id/sequest/params.rb +316 -0
  64. data/lib/spec_id/sequest/pepxml.rb +1513 -0
  65. data/lib/spec_id/sequest.rb +2 -1672
  66. data/lib/spec_id/srf.rb +445 -177
  67. data/lib/spec_id.rb +183 -95
  68. data/lib/spec_id_xml.rb +8 -10
  69. data/lib/transmem/phobius.rb +147 -0
  70. data/lib/transmem/toppred.rb +368 -0
  71. data/lib/transmem.rb +157 -0
  72. data/lib/validator/aa.rb +135 -0
  73. data/lib/validator/background.rb +73 -0
  74. data/lib/validator/bias.rb +95 -0
  75. data/lib/validator/cmdline.rb +260 -0
  76. data/lib/validator/decoy.rb +94 -0
  77. data/lib/validator/digestion_based.rb +69 -0
  78. data/lib/validator/probability.rb +48 -0
  79. data/lib/validator/prot_from_pep.rb +234 -0
  80. data/lib/validator/transmem.rb +272 -0
  81. data/lib/validator/true_pos.rb +46 -0
  82. data/lib/validator.rb +214 -0
  83. data/lib/xml.rb +38 -0
  84. data/lib/xml_style_parser.rb +105 -0
  85. data/lib/xmlparser_wrapper.rb +19 -0
  86. data/script/compile_and_plot_smriti_final.rb +97 -0
  87. data/script/extract_gradient_programs.rb +56 -0
  88. data/script/get_apex_values_rexml.rb +44 -0
  89. data/script/mzXML2timeIndex.rb +1 -1
  90. data/script/smriti_final_analysis.rb +103 -0
  91. data/script/toppred_to_yaml.rb +47 -0
  92. data/script/tpp_installer.rb +1 -1
  93. data/{test/tc_align.rb → specs/align_spec.rb} +21 -27
  94. data/{test/tc_bioworks_to_pepxml.rb → specs/bin/bioworks_to_pepxml_spec.rb} +25 -41
  95. data/specs/bin/fasta_shaker_spec.rb +259 -0
  96. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +202 -0
  97. data/specs/bin/filter_and_validate_spec.rb +124 -0
  98. data/specs/bin/ms_to_lmat_spec.rb +34 -0
  99. data/specs/bin/prob_validate_spec.rb +62 -0
  100. data/specs/bin/protein_summary_spec.rb +10 -0
  101. data/{test/tc_fasta.rb → specs/fasta_spec.rb} +354 -310
  102. data/specs/gi_spec.rb +22 -0
  103. data/specs/load_bin_path.rb +7 -0
  104. data/specs/merge_deep_spec.rb +13 -0
  105. data/specs/ms/gradient_program_spec.rb +77 -0
  106. data/specs/ms/msrun_spec.rb +455 -0
  107. data/specs/ms/parser_spec.rb +92 -0
  108. data/specs/ms/spectrum_spec.rb +89 -0
  109. data/specs/roc_spec.rb +251 -0
  110. data/specs/rspec_autotest.rb +149 -0
  111. data/specs/sample_enzyme_spec.rb +41 -0
  112. data/specs/spec_helper.rb +133 -0
  113. data/specs/spec_id/aa_freqs_spec.rb +52 -0
  114. data/{test/tc_bioworks.rb → specs/spec_id/bioworks_spec.rb} +56 -71
  115. data/specs/spec_id/digestor_spec.rb +75 -0
  116. data/specs/spec_id/precision/filter/cmdline_spec.rb +20 -0
  117. data/specs/spec_id/precision/filter/output_spec.rb +31 -0
  118. data/specs/spec_id/precision/filter_spec.rb +243 -0
  119. data/specs/spec_id/precision/prob_spec.rb +111 -0
  120. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  121. data/specs/spec_id/proph/pep_summary_spec.rb +143 -0
  122. data/{test/tc_proph.rb → specs/spec_id/proph/prot_summary_spec.rb} +52 -32
  123. data/{test/tc_protein_summary.rb → specs/spec_id/protein_summary_spec.rb} +85 -0
  124. data/specs/spec_id/sequest/params_spec.rb +68 -0
  125. data/specs/spec_id/sequest/pepxml_spec.rb +452 -0
  126. data/specs/spec_id/sqt_spec.rb +138 -0
  127. data/specs/spec_id/srf_spec.rb +209 -0
  128. data/specs/spec_id/srf_spec_helper.rb +302 -0
  129. data/specs/spec_id_helper.rb +33 -0
  130. data/specs/spec_id_spec.rb +361 -0
  131. data/specs/spec_id_xml_spec.rb +33 -0
  132. data/specs/transmem/phobius_spec.rb +423 -0
  133. data/specs/transmem/toppred_spec.rb +297 -0
  134. data/specs/transmem_spec.rb +60 -0
  135. data/specs/transmem_spec_shared.rb +64 -0
  136. data/specs/validator/aa_spec.rb +107 -0
  137. data/specs/validator/background_spec.rb +51 -0
  138. data/specs/validator/bias_spec.rb +146 -0
  139. data/specs/validator/decoy_spec.rb +51 -0
  140. data/specs/validator/fasta_helper.rb +26 -0
  141. data/specs/validator/prot_from_pep_spec.rb +141 -0
  142. data/specs/validator/transmem_spec.rb +145 -0
  143. data/specs/validator/true_pos_spec.rb +58 -0
  144. data/specs/validator_helper.rb +33 -0
  145. data/specs/xml_spec.rb +12 -0
  146. data/test_files/000_pepxml18_small.xml +206 -0
  147. data/test_files/020a.mzXML.timeIndex +4710 -0
  148. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +3973 -0
  149. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +3872 -0
  150. data/test_files/4-03-03_small-prot.xml +321 -0
  151. data/test_files/4-03-03_small.xml +3876 -0
  152. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  153. data/test_files/bioworks-3.3_10prots.xml +5999 -0
  154. data/test_files/bioworks31.params +77 -0
  155. data/test_files/bioworks32.params +62 -0
  156. data/test_files/bioworks33.params +63 -0
  157. data/test_files/bioworks_single_run_small.xml +7237 -0
  158. data/test_files/bioworks_small.fasta +212 -0
  159. data/test_files/bioworks_small.params +63 -0
  160. data/test_files/bioworks_small.phobius +109 -0
  161. data/test_files/bioworks_small.toppred.out +2847 -0
  162. data/test_files/bioworks_small.xml +5610 -0
  163. data/test_files/bioworks_with_INV_small.xml +3753 -0
  164. data/test_files/bioworks_with_SHUFF_small.xml +2503 -0
  165. data/test_files/corrupted_900.srf +0 -0
  166. data/test_files/head_of_7MIX.srf +0 -0
  167. data/test_files/interact-opd1_mods_small-prot.xml +304 -0
  168. data/test_files/messups.fasta +297 -0
  169. data/test_files/opd1/000.my_answer.100lines.xml +101 -0
  170. data/test_files/opd1/000.tpp_1.2.3.first10.xml +115 -0
  171. data/test_files/opd1/000.tpp_2.9.2.first10.xml +126 -0
  172. data/test_files/opd1/000.v2.1.mzXML.timeIndex +3748 -0
  173. data/test_files/opd1/000_020-prot.png +0 -0
  174. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +62 -0
  175. data/test_files/opd1/000_020_3prots-prot.xml +62 -0
  176. data/test_files/opd1/opd1_cat_inv_small-prot.xml +139 -0
  177. data/test_files/opd1/sequest.3.1.params +77 -0
  178. data/test_files/opd1/sequest.3.2.params +62 -0
  179. data/test_files/opd1/twenty_scans.mzXML +418 -0
  180. data/test_files/opd1/twenty_scans.v2.1.mzXML +382 -0
  181. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  182. data/test_files/opd1/twenty_scans_answ.lmata +9 -0
  183. data/test_files/opd1_020_beginning.RAW +0 -0
  184. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +753 -0
  185. data/test_files/orbitrap_mzData/000_cut.xml +1920 -0
  186. data/test_files/pepproph_small.xml +4691 -0
  187. data/test_files/phobius.small.noheader.txt +50 -0
  188. data/test_files/phobius.small.small.txt +53 -0
  189. data/test_files/s01_anC1_ld020mM.key.txt +25 -0
  190. data/test_files/s01_anC1_ld020mM.meth +0 -0
  191. data/test_files/small.fasta +297 -0
  192. data/test_files/smallraw.RAW +0 -0
  193. data/test_files/tf_bioworks2excel.bioXML +14340 -0
  194. data/test_files/tf_bioworks2excel.txt.actual +1035 -0
  195. data/test_files/toppred.small.out +416 -0
  196. data/test_files/toppred.xml.out +318 -0
  197. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +7 -0
  198. data/test_files/validator_hits_separate/bioworks_small_HS.xml +5651 -0
  199. data/test_files/yeast_gly_small-prot.xml +265 -0
  200. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +6 -0
  201. data/test_files/yeast_gly_small.xml +3807 -0
  202. data/test_files/yeast_gly_small2.parentTimes +6 -0
  203. metadata +273 -57
  204. data/bin/filter.rb +0 -6
  205. data/bin/precision.rb +0 -5
  206. data/lib/spec/mzdata/parser.rb +0 -108
  207. data/lib/spec/mzdata.rb +0 -48
  208. data/lib/spec/mzxml/parser.rb +0 -449
  209. data/lib/spec/scan.rb +0 -55
  210. data/lib/spec_id/filter.rb +0 -797
  211. data/lib/spec_id/precision.rb +0 -421
  212. data/lib/toppred.rb +0 -18
  213. data/script/filter-peps.rb +0 -164
  214. data/test/tc_aa_freqs.rb +0 -59
  215. data/test/tc_fasta_shaker.rb +0 -149
  216. data/test/tc_filter.rb +0 -203
  217. data/test/tc_filter_peps.rb +0 -46
  218. data/test/tc_gi.rb +0 -17
  219. data/test/tc_id_class_anal.rb +0 -70
  220. data/test/tc_id_precision.rb +0 -89
  221. data/test/tc_msrun.rb +0 -88
  222. data/test/tc_mzxml.rb +0 -88
  223. data/test/tc_mzxml_to_lmat.rb +0 -36
  224. data/test/tc_peptide_parent_times.rb +0 -27
  225. data/test/tc_precision.rb +0 -60
  226. data/test/tc_roc.rb +0 -166
  227. data/test/tc_sample_enzyme.rb +0 -32
  228. data/test/tc_scan.rb +0 -26
  229. data/test/tc_sequest.rb +0 -336
  230. data/test/tc_spec.rb +0 -78
  231. data/test/tc_spec_id.rb +0 -201
  232. data/test/tc_spec_id_xml.rb +0 -36
  233. data/test/tc_srf.rb +0 -262
data/test/tc_sequest.rb DELETED
@@ -1,336 +0,0 @@
1
-
2
-
3
-
4
- require 'spec_id'
5
- require 'spec_id/sequest'
6
- require 'test/unit'
7
- require 'spec/mzxml'
8
-
9
-
10
- NODELETE = false
11
-
12
- class SequestTest < Test::Unit::TestCase
13
-
14
- def initialize(arg)
15
- super(arg)
16
- @tfiles = File.dirname(__FILE__) + '/tfiles/'
17
- @tfiles_l = File.dirname(__FILE__) + '/tfiles_large/'
18
- @tf_params = @tfiles + "bioworks32.params"
19
- @tf_mzxml_path = @tfiles_l + "yeast_gly_mzXML"
20
- @tf_bioworks_xml = @tfiles + "bioworks_small.xml"
21
- end
22
-
23
- def Xtest_set_from_bioworks
24
- if File.exist? @tfiles_l
25
- out_path = '.'
26
- pepxml_objs = Sequest::PepXML.set_from_bioworks_xml(@tf_bioworks_xml, @tf_params, {:ms_path => @tf_mzxml_path, :out_path => out_path})
27
- pepxml_objs.each do |obj|
28
- assert(obj.spectrum_queries.size > 2)
29
- assert(obj.spectrum_queries.first.search_results.first.search_hits.size > 0)
30
- end
31
- else
32
- assert_nil( puts("--SKIPPING TEST-- (missing dir: #{@tfiles_l})") )
33
- end
34
- end
35
-
36
- # assert_equal_by_pairs (really any old array)
37
- def aep(obj, arrs)
38
- arrs.each do |arr|
39
- assert_equal(arr[0], obj.send(arr[1]), "#{arr[1]}")
40
- end
41
- end
42
-
43
- #swap the first to guys first
44
- def aeps(obj, arrs)
45
- arrs.each do |arr|
46
- arr[0], arr[1] = arr[1], arr[0]
47
- end
48
- aep(obj, arrs)
49
- end
50
-
51
- ## turn this off if you are doing lots of tests
52
- def Xtest_set_from_bioworks ## new one for opd1
53
- if File.exist? @tfiles_l
54
- st = Time.new
55
- params = @tfiles + "opd1/sequest.3.2.params"
56
- bioworks_xml = @tfiles_l + "opd1/bioworks.000.oldparams.xml"
57
- mzxml_path = @tfiles + "opd1"
58
- out_path = @tfiles
59
- pepxml_version = 18
60
- pepxml_objs = Sequest::PepXML.set_from_bioworks_xml(bioworks_xml, params, {:ms_data => mzxml_path, :out_path => out_path, :pepxml_version => pepxml_version, :sample_enzyme => "trypsin"})
61
- puts "TOOK #{Time.new - st}secs"
62
- po = pepxml_objs.first
63
- assert_equal(pepxml_version, Sequest::PepXML.pepxml_version)
64
-
65
- # MSMSPipelineAnalysis
66
- pipe = po.msms_pipeline_analysis
67
- aep(pipe, [
68
- ['http://regis-web.systemsbiology.net/pepXML', :xmlns],
69
- ['http://www.w3.org/2001/XMLSchema-instance', :xmlns_xsi],
70
- ['http://regis-web.systemsbiology.net/pepXML /tools/bin/TPP/tpp/schema/pepXML_v18.xsd', :xsi_schema_location],
71
- ['000.xml', :summary_xml],
72
- ])
73
-
74
- # MSMSRunSummary
75
- rs = pipe.msms_run_summary
76
- assert_match(/test\/tfiles\/000/, rs.base_name)
77
- aep(rs, [ ['ThermoFinnigan', :ms_manufacturer], ['LCQ Deca XP Plus', :ms_model], ['ESI', :ms_ionization], ['Ion Trap', :ms_mass_analyzer], ['UNKNOWN', :ms_detector], ['raw', :raw_data_type], ['.mzXML', :raw_data], ])
78
-
79
- # SampleEnzyme
80
- se = rs.sample_enzyme
81
- aep(se, [ ['trypsin', :name], ['KR', :cut], ['P', :no_cut], ['C', :sense], ])
82
-
83
- # SearchSummary
84
- ss = rs.search_summary
85
- assert_match(/test\/tfiles\/000/, ss.base_name)
86
- assert_match(/1\.500/, ss.peptide_mass_tol)
87
- aeps(ss, [ # normal attributes
88
- [:search_engine, "SEQUEST"], [:precursor_mass_type, "average"], [:fragment_mass_type, "average"], [:out_data_type, "out"], [:out_data, ".tgz"], [:search_id, "1"],
89
-
90
- # enzymatic_search_constraint
91
- [:enzyme, 'Trypsin'], [:max_num_internal_cleavages, '2'], [:min_number_termini, '2'],
92
-
93
- # parameters
94
- [:fragment_ion_tol, "1.0000"], [:ion_series, "0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0"], [:max_num_differential_AA_per_mod, "3"], [:nucleotide_reading_frame, "0"], [:num_output_lines, "10"], [:remove_precursor_peak, "0"], [:ion_cutoff_percentage, "0.0000"], [:match_peak_count, "0"], [:match_peak_allowed_error, "1"], [:match_peak_tolerance, "1.0000"], [:protein_mass_filter, "0 0"],
95
- ])
96
-
97
- # SearchDatabase
98
- sd = ss.search_database
99
- aeps(sd, [ [:local_path, "C:\\Xcalibur\\database\\ecoli_K12.fasta"], [:seq_type, 'AA'], ])
100
-
101
- # SpectrumQueries
102
- sq = rs.spectrum_queries
103
- spec = sq.first
104
- aeps(spec, [
105
- [:spectrum, "000.100.100.1"], [:start_scan, "100"], [:end_scan, "100"],
106
- #[:precursor_neutral_mass, "1074.5920"], # out2summary
107
- [:precursor_neutral_mass, "1074.666926"], # mine
108
- [:assumed_charge, "1"], [:index, "1"],
109
- ])
110
- sh = spec.search_results.first.search_hits.first
111
- aeps(sh, [
112
- # normal attributes
113
- [:hit_rank, "1"],
114
- [:peptide, "SIYFRNFK"],
115
- [:peptide_prev_aa, "R"],
116
- [:peptide_next_aa, "G"],
117
- [:protein, "gi|16130084|ref|NP_416651.1|"],
118
- [:num_tot_proteins, "1"],
119
- [:num_matched_ions, "4"],
120
- [:tot_num_ions, "14"],
121
- #[:calc_neutral_pep_mass, "1074.1920"], # out2summary
122
- [:calc_neutral_pep_mass, "1074.23261"], # mine
123
- #[:massdiff, "+0.400000"], # out2summary
124
- [:massdiff, "+0.434316000000081"], # mine
125
- [:num_tol_term, "2"], [:num_missed_cleavages, "1"], [:is_rejected, "0"],
126
-
127
- # search_score
128
- [:xcorr, "0.400"], [:deltacn, "0.023"], [:deltacnstar, "0"], [:spscore, "78.8"], [:sprank, "1"],
129
- ])
130
-
131
- spec = sq[1]
132
- aeps(spec, [
133
- [:spectrum, "000.1000.1000.1"], [:start_scan, "1000"], [:end_scan, "1000"], #[:precursor_neutral_mass, "663.1920"], # out2summary
134
- [:precursor_neutral_mass, "663.206111"], # mine
135
- [:assumed_charge, "1"], [:index, "2"],
136
- ])
137
-
138
- sh = spec.search_results.first.search_hits.first
139
- aeps(sh, [
140
- # normal attributes
141
- [:hit_rank, "1"], [:peptide, "ALADFK"], [:peptide_prev_aa, "R"], [:peptide_next_aa, "S"], [:protein, "gi|16128765|ref|NP_415318.1|"], [:num_tot_proteins, "1"], [:num_matched_ions, "5"], [:tot_num_ions, "10"],
142
- [:num_tol_term, "2"], [:num_missed_cleavages, "0"], [:is_rejected, "0"],
143
- #[:massdiff, "-0.600000"], # out2summary
144
- [:massdiff, "-0.556499000000031"], # mine
145
- #[:calc_neutral_pep_mass, "663.7920"], # out2summary
146
- [:calc_neutral_pep_mass, "663.76261"], # mine
147
-
148
- # search_score
149
- [:xcorr, "0.965"], [:deltacn, "0.132"], [:deltacnstar, "0"], [:spscore, "81.1"], [:sprank, "1"],
150
- ])
151
-
152
- spec = sq[9]
153
- aeps(spec, [
154
- [:spectrum, "000.1008.1008.2"], [:start_scan, "1008"], [:end_scan, "1008"], [:assumed_charge, "2"], [:index, "10"],
155
- #[:precursor_neutral_mass, "691.0920"], # out2summary
156
- [:precursor_neutral_mass, "691.150992"], # mine
157
- ])
158
-
159
- sh = spec.search_results.first.search_hits.first
160
- aeps(sh, [
161
- # normal attributes
162
- [:hit_rank, "1"], [:peptide, "RLFTR"], [:peptide_prev_aa, "R"], [:peptide_next_aa, "A"], [:protein, "gi|16130457|ref|NP_417027.1|"], [:num_tot_proteins, "1"], [:num_matched_ions, "5"], [:tot_num_ions, "8"], [:num_tol_term, "2"],
163
-
164
- #[:num_missed_cleavages, "0"], # out2summary misses this!
165
- [:num_missed_cleavages, "1"],
166
- [:is_rejected, "0"],
167
- #[:calc_neutral_pep_mass, "691.7920"], # out2summary
168
- [:calc_neutral_pep_mass, "691.82261"], # mine
169
- #[:massdiff, "-0.700000"], # out2summary
170
- [:massdiff, "-0.67161800000008"], # mine
171
-
172
- # search_score
173
- [:xcorr, "0.903"], [:deltacn, "0.333"], [:deltacnstar, "0"], [:spscore, "172.8"], [:sprank, "1"],
174
- ])
175
-
176
- ## IF ARE OBJECT IS CORRECT, THEN WE GET THE OUTPUT:
177
- string = po.to_pepxml
178
-
179
- ans_lines = IO.read(@tfiles + "opd1/000.my_answer.100lines.xml").split("\n")
180
- string.split("\n").each_with_index do |line,i|
181
- base_name_re = /base_name=".*?\/test/o
182
- if i > 99 ; break end
183
- if i == 1
184
- assert_equal(ans_lines[i].sub(/date=".*?"/,''), line.sub(/date=".*?"/,''))
185
- elsif i == 2
186
- assert_equal(ans_lines[i].sub(base_name_re,''), line.sub(base_name_re,''))
187
- else
188
- assert_equal(ans_lines[i].sub(base_name_re,''), line.sub(base_name_re,''))
189
- #assert_equal(ans_lines[i], line)
190
- end
191
- end
192
- else
193
- assert_nil(puts("--SKIPPING TEST-- (missing dir: #{@tfiles_l})"))
194
- end
195
-
196
- #assert_match(/#{Regexp.escape("")}/, string)
197
-
198
- end
199
-
200
-
201
-
202
- def Xtest_calc_num_tol_term
203
- params = Sequest::Params.new(@tf_params)
204
- scall = Sequest::PepXML::SearchHit
205
- sym = :calc_num_tol_term
206
- assert_equal(2, scall.send(sym, params, "K.EPTIDR.E"))
207
- assert_equal(1, scall.send(sym, params, "K.PEPTIDR.E"))
208
- assert_equal(1, scall.send(sym, params, "F.EEPTIDR.E"))
209
- assert_equal(0, scall.send(sym, params, "F.PEPTIDW.R"))
210
- end
211
-
212
- def Xtest_calc_num_missed_cleavages
213
- params = Sequest::Params.new(@tf_params)
214
- scall = Sequest::PepXML::SearchHit
215
- sym = :calc_num_missed_cleavages
216
- assert_equal(0, scall.send(sym, params, "K.EPTIDR.E"))
217
- assert_equal(0, scall.send(sym, params, "K.PEPTIDR.E"))
218
- assert_equal(0, scall.send(sym, params, "F.EEPTIDR.E"))
219
- assert_equal(0, scall.send(sym, params, "F.PEPTIDW.R"))
220
- assert_equal(1, scall.send(sym, params, "F.PEPRTIDW.R"))
221
- assert_equal(1, scall.send(sym, params, "F.PEPKTIDW.R"))
222
- assert_equal(2, scall.send(sym, params, "F.PKEPRTIDW.R"))
223
- assert_equal(3, scall.send(sym, params, "F.PKEPRTIDKW.R"))
224
- assert_equal(3, scall.send(sym, params, "F.PKEPRAALKPEERPTIDKW.R"))
225
- assert_equal(1, scall.send(sym, params, "K.RTTIDR.E"))
226
- assert_equal(2, scall.send(sym, params, "K.RTTIKK.E"))
227
- end
228
-
229
-
230
- def Xtest_sys_ind_basename
231
- assert_equal("hello.fasta", Sequest::Params.new._sys_ind_basename("C:\\Xcalibur\\database\\hello.fasta"))
232
- assert_equal("hello.fasta", Sequest::Params.new._sys_ind_basename("/work/john/hello.fasta"))
233
- end
234
-
235
- def Xtest_modifications
236
- obj = Sequest::PepXML::Modifications.new(nil, "(M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000) ")
237
- answ = {[:C, 12.0]=>"^", [:S, 80.0]=>"@", [:M, 29.0]=>"#", [:M, 15.9]=>"*", [:ct, 12.33]=>"[", [:nt, 14.2]=>"]"}
238
- assert_equal(answ, obj.mod_symbols_hash, "mod_symbols_hash")
239
-
240
- ## need more here
241
- end
242
-
243
- def Xtest_modification_info
244
- hash = {
245
- :mod_nterm_mass => 520.2,
246
- :modified_peptide => "MOD*IFI^E&D",
247
- :mod_aminoacid_mass => [[3, 150.3], [6, 345.2]],
248
- }
249
- answ = "<modification_info mod_nterm_mass=\"520.2\" modified_peptide=\"MOD*IFI^E&amp;D\">\n\t<mod_aminoacid_mass position=\"3\" mass=\"150.3\"/>\n\t<mod_aminoacid_mass position=\"6\" mass=\"345.2\"/>\n</modification_info>\n"
250
- string = Sequest::PepXML::SearchHit::ModificationInfo.new(hash).to_pepxml
251
- assert_match(_re('<modification_info'), answ)
252
- assert_match(_re(" mod_nterm_mass=\"520.2\""), answ)
253
- assert_match(_re(" modified_peptide=\"MOD*IFI^E&amp;D\""), answ)
254
- assert_match(_re("<mod_aminoacid_mass"), answ)
255
- assert_match(_re(" position=\"3\""), answ)
256
- assert_match(_re(" mass=\"150.3\""), answ)
257
- assert_match(_re(" position=\"6\""), answ)
258
- assert_match(_re(" mass=\"345.2\""), answ)
259
- assert_match(_re("</modification_info>"), answ)
260
- end
261
-
262
- def _re(st)
263
- /#{Regexp.escape(st)}/
264
- end
265
-
266
- def test_modifications
267
- params = Sequest::Params.new(@tf_params)
268
- mod_string = "(M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000) "
269
- params.diff_search_options = "15.90000 M 29.00000 M 80.00000 S 12.00000 C"
270
- params.term_diff_search_options = "14.20000 12.33000"
271
- mod = Sequest::PepXML::Modifications.new(params, mod_string)
272
- ## no mods
273
- peptide = "PEPTIDE"
274
- assert_equal(nil, mod.modification_info(peptide))
275
- peptide = "]M*EC^S@IDM#M*EMSCM["
276
- modinfo = mod.modification_info(peptide)
277
- assert_equal(peptide, modinfo.modified_peptide)
278
- assert_in_delta(146.40054, modinfo.mod_nterm_mass, 0.000001)
279
- assert_in_delta(160.52994, modinfo.mod_cterm_mass, 0.000001)
280
- end
281
-
282
- # splits string on ' 'and matches the line found by find_line_regexp in
283
- # lines
284
- def match_modline_pieces(lines, find_line_regexp, string)
285
- pieces = string.split(' ').map {|v| /#{Regexp.escape(v)}/ }
286
- lines.each do |line|
287
- if line =~ find_line_regexp
288
- pieces.each do |piece|
289
- assert_match(piece, line)
290
- end
291
- end
292
- end
293
- end
294
-
295
- def test_modifications_in_run
296
- if File.exist? @tfiles_l
297
- modfiles_sequest_dir = @tfiles_l + 'opd1_2runs_2mods/sequest/'
298
- modfiles_data_dir = @tfiles_l + 'opd1_2runs_2mods/data/'
299
- srgfile = modfiles_sequest_dir + 'tmp.srg'
300
- out_path = modfiles_sequest_dir + 'pepxml'
301
- modfiles = %w(020 040).map do |file|
302
- modfiles_sequest_dir + file + ".srf"
303
- end
304
- objs = Sequest::PepXML.set_from_bioworks( SRFGroup.new(modfiles).to_srg(srgfile), {:ms_data => modfiles_data_dir, :out_path => out_path, :print => true, :backup_db_path => '/project/marcotte/marcotte/ms/database'} )
305
- %w(020 040).each do |file|
306
- fn = out_path + '/' + file + '.xml'
307
- assert(File.exist?(fn), "file #{fn} exists")
308
- beginning = IO.read(fn)
309
- lines = beginning.split("\n")
310
- [
311
- [/aminoacid="M"/, '<aminoacid_modification symbol="*" massdiff="+15.9994" aminoacid="M" variable="Y" binary="N" mass="147.192"'],
312
-
313
- [/aminoacid="S"/, '<aminoacid_modification symbol="#" massdiff="+79.9799" aminoacid="S" variable="Y" binary="N" mass="167.0581"'],
314
- [/aminoacid="T"/, '<aminoacid_modification symbol="#" massdiff="+79.9799" aminoacid="T" variable="Y" binary="N" mass="181.085"'],
315
- [/aminoacid="Y"/, '<aminoacid_modification symbol="#" massdiff="+79.9799" aminoacid="Y" variable="Y" binary="N" mass="243.1559"'],
316
- [/parameter name="diff_search_options"/, '<parameter name="diff_search_options" value="15.999400 M 79.979900 STY 0.000000 M 0.000000 X 0.000000 T 0.000000 Y"/>'],
317
- ].each do |a,b|
318
- match_modline_pieces(lines, a, b)
319
- end
320
- [
321
- '<modification_info modified_peptide="Y#RLGGS#T#K">',
322
- '<mod_aminoacid_mass position="1" mass="243.1559"/>',
323
- '<mod_aminoacid_mass position="7" mass="167.0581"/>',
324
- '</modification_info>',
325
- '<mod_aminoacid_mass position="9" mass="181.085"/>'
326
- ].each do |line|
327
- assert_match(/#{Regexp.escape(line)}/, beginning, "a modification info for a peptide")
328
- end
329
- File.unlink(fn) unless NODELETE
330
- end
331
- else
332
- assert_nil( puts("--SKIPPING TEST-- (missing dir: #{@tfiles_l})") )
333
- end
334
- end
335
- end
336
-
data/test/tc_spec.rb DELETED
@@ -1,78 +0,0 @@
1
-
2
- require 'test/unit'
3
- require 'spec/mzxml/parser'
4
- require 'benchmark'
5
-
6
- $SPEED_TEST = false
7
-
8
- class SpecTest < Test::Unit::TestCase
9
-
10
- def initialize(arg)
11
- super(arg)
12
- @tfiles = File.dirname(__FILE__) + '/tfiles/'
13
- @tfiles_l = File.dirname(__FILE__) + '/tfiles_large/'
14
- @tscans = @tfiles + "opd1/twenty_scans.mzXML"
15
- @tf_mzxml_path = @tfiles_l + "yeast_gly_mzXML"
16
- #@big_file = "/work/john/ISB_Proteomics_18Set/mzXML/sergei_digest_A_full_01.mzXML"
17
- @big_file = "../bioworks2prophet/xml/opd00001_test_set/opd00001_prophprepped/000.mzXML"
18
- end
19
-
20
- def test_mzxml_path_precursor_mz_by_scan
21
- if File.exist? @tfiles_l
22
- hash = Spec::MzXML::Parser.new.precursor_mz_by_scan_for_path(@tf_mzxml_path, "*.mzXML")
23
- assert_equal(%w(000 020), hash.keys.sort)
24
- assert(hash["000"].size > 0)
25
- assert(hash["020"].size > 0)
26
- else
27
- assert_nil( puts("--SKIPPING TEST-- (missing dir: #{@tfiles_l})") )
28
- end
29
- end
30
-
31
- def test_mzxml_precursor_mz_by_scan
32
- answ = {11=>"1122.119141", 6=>"390.947449", 12=>"444.804504", 7=>"1221.905518", 8=>"1322.036621", 14=>"446.796082", 15=>"1122.041260", 16=>"1421.951416", 18=>"358.676636", 2=>"391.045410", 20=>"1422.277100", 19=>"1460.548340", 3=>"446.009033", 10=>"1322.000732", 4=>"1222.033203"}
33
-
34
- loaded_xmlparser = false
35
- $".each do |lib| if lib =~ /xmlparser/ then loaded_xmlparser = true end end
36
-
37
- types = %w(regex rexml)
38
- if loaded_xmlparser
39
- types.push("xmlparser")
40
- else
41
- puts "'xmlparser' not loaded: SKIPPING 'xmlparser' testing'"
42
- end
43
- types.unshift(nil)
44
-
45
- types.each do |ty|
46
- arr = Spec::MzXML::Parser.new.precursor_mz_by_scan(@tscans, ty)
47
- assert_hash_equal_arr(answ, arr)
48
- # On my linux box these are the speed comparisons:
49
- # REXMLStreamParser ~ 28.5 sec
50
- # REGEX ~ 3.7 sec
51
- # REGEX without procs??
52
- # XMLParser ~0.85 sec
53
-
54
- # Speed test
55
- if $SPEED_TEST
56
- puts "PARSETYPE = #{ty ? ty : "DEFAULT"}: "
57
- puts Benchmark.measure {
58
- arr = Spec::MzXML::Parser.new.precursor_mz_by_scan(@big_file, ty ? ty : nil)
59
- }
60
- end
61
- end
62
- end
63
-
64
- def Xtest_mzxml_precursor_mz_and_inten_by_scan
65
- arr = Spec::MzXML.precursor_mz_and_inten_by_scan(@tscans)
66
- answ = {11=>["1122.119141", "1188303.000000"], 6=>["390.947449", "6191130.000000"], 12=>["444.804504", "716303.000000"], 7=>["1221.905518", "2245001.000000"], 8=>["1322.036621", "1946525.000000"], 14=>["446.796082", "1472386.000000"], 15=>["1122.041260", "1411827.000000"], 16=>["1421.951416", "1187501.000000"] , 18=>["358.676636", "826186.000000"], 2=>["391.045410", "6986078.000000"], 20=>["1422.277100", "709884.000000"], 19=>["1460.548340", "720317.000000"], 3=>["446.009033", "1531503.000000"], 10=>["1322.000732", "1475536.000000"], 4=>["1222.033203", "1520220.000000"]}
67
-
68
- assert_hash_equal_arr(answ, arr)
69
- end
70
-
71
- def assert_hash_equal_arr(hash,arr)
72
- hash.each do |k,v|
73
- assert_equal(hash[k], arr[k])
74
- end
75
- end
76
- end
77
-
78
-
data/test/tc_spec_id.rb DELETED
@@ -1,201 +0,0 @@
1
-
2
- require 'test/unit'
3
- require 'spec_id'
4
-
5
-
6
- class SpecIDTest < Test::Unit::TestCase
7
-
8
- def initialize(arg)
9
- super(arg)
10
- @tfiles = File.dirname(__FILE__) + '/tfiles/'
11
- @tfiles_l = File.dirname(__FILE__) + '/tfiles_large/'
12
- @bw = @tfiles + "bioworks_small.xml"
13
- @old_prot_proph = @tfiles + 'yeast_gly_small-prot.xml'
14
- @prot_proph = @tfiles + 'opd1/000_020_3prots-prot.xml'
15
- @srf = @tfiles_l + '7MIX_STD_110802_1.srf'
16
- end
17
-
18
- def test_spec_id_creation
19
- sp = SpecID.new(@bw)
20
- assert_equal(106, sp.prots.size)
21
- end
22
-
23
- def test_classify_by_false_flag
24
- file = @tfiles + "bioworks_with_INV_small.xml"
25
- sp = SpecID.new(file)
26
- assert_equal(19, sp.prots.size)
27
- (tp, fp) = sp.classify_by_false_flag(:prots, "INV_", true, true)
28
- assert_equal(4, fp.size, "num false pos")
29
- assert_equal(15, tp.size, "num true pos")
30
- end
31
-
32
-
33
- def test_precision
34
- require 'roc'
35
- file = @tfiles + "bioworks_with_INV_small.xml"
36
- # 4 INV and 15 non-inv for 19 total prots
37
- answ = %w( t t t t t t t t t t F t t t t F t F F )
38
- index = 0
39
- answ.collect! do |bool|
40
- bo = false
41
- if bool == 't'; bo = true end
42
- index += 1
43
- write_index = index
44
- ## in the bioworks_with_INV_small.xml, protein 8 and 9 have the same
45
- ## probability as protein 7
46
- if write_index == 8 || write_index == 9
47
- write_index = 7
48
- end
49
- [write_index, bo]
50
- end
51
- roc = ROC.new
52
- tp, fp = ROC.new.prep_list(answ)
53
- (exp_tp, exp_fp) = roc.tps_and_ppv(tp, fp)
54
-
55
- sp = SpecID.new(file)
56
- assert_equal(19, sp.prots.size)
57
- tp, fp = sp.rank_and_classify(:prots, proc {|prt| prt.probability }, proc {|prt| if prt.reference =~ /^INV_/ ; false; else; true; end })
58
- (tps, ys) = roc.tps_and_ppv(tp, fp)
59
- assert_equal(exp_tp, tps)
60
- assert_equal(exp_fp, ys)
61
- (num_hits, prec) = sp.num_hits_and_ppv_for_prob("INV_", true)
62
- # @TODO: assert these guys for consistencies sake:
63
- assert_in_delta_arrays([1, 2, 3, 4, 5, 6, 9, 10, 11, 12, 13, 14, 15], tps, 0.0000001)
64
- # Consistency check only:
65
- assert_in_delta_arrays([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.909090909090909, 0.916666666666667, 0.923076923076923, 0.928571428571429, 0.866666666666667], prec, 0.0000001)
66
- end
67
-
68
- def assert_in_delta_arrays(one, two, delta, message=nil)
69
- one.each_with_index do |v,i|
70
- assert_in_delta(v, two[i], delta, message)
71
- end
72
- end
73
-
74
- def test_file_type
75
- assert_equal('bioworks', SpecID.file_type(@bw))
76
- assert_equal('protproph', SpecID.file_type(@prot_proph))
77
- assert_equal('srg', SpecID.file_type('whatever.srg'))
78
- ## WOULD BE NICE TO GET THIS WORKING, TOO
79
- # assert_equal('protproph', SpecID.file_type(@old_prot_proph))
80
- if File.exist? @tfiles_l
81
- assert File.exist?(@srf), "file #{@srf} is there"
82
- assert_equal('srf', SpecID.file_type(@srf))
83
- else
84
- assert_nil( puts("\n--SKIPPING TEST-- (missing dir: #{@tfiles_l})") )
85
- end
86
- end
87
-
88
- def test_non_standard_aa_removal
89
- hash = {"K.PEPTIDE.Z" => "K.PEPTIDE.Z", "K.*M" => "K.M", "aI" => 'I', "YI.&" => "YI.", "EI.!@#\$%^&*(){}[]|\\;:'\"<>,?/EI" => 'EI.EI'}
90
- cl = proc {|v| SpecID::Pep.remove_non_amino_acids(v) }
91
- hash.each do |k,v|
92
- assert_equal(v, cl.call(k))
93
- end
94
- end
95
-
96
-
97
-
98
- end
99
-
100
- class MyProt ; include SpecID::Prot ; end
101
- class MyPep ; include SpecID::Pep ; attr_accessor :xcorr end
102
-
103
-
104
- class TestOccamsRazor < Test::Unit::TestCase
105
-
106
- def test_small
107
-
108
- prots = (0..6).to_a.map do |n|
109
- prot = MyProt.new
110
- prot.reference = "ref_#{n}"
111
- prot
112
- end
113
-
114
- peps = (0..12).to_a.map {|v| MyPep.new }
115
-
116
- # 0 1 2 3 4 5 6 7 8 9 10 11 12
117
- aaseqs = %w(AAA BBB CCC ABC AAA BBB CCC ABC DDD EEE FFF EEEEE DDD)
118
- xcorrs = [1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 0.5, 0.6, 0.7, 0.8, 0.5]
119
-
120
- peps.zip(aaseqs, xcorrs) do |pep,aaseq,xcorr|
121
- pep.aaseq = aaseq
122
- pep.xcorr = xcorr
123
- end
124
-
125
- prots[0].peps = peps[0,4]
126
- prots[1].peps = [peps[2]] ## should be missing
127
-
128
- test_prots = prots[0,2]
129
- answ = SpecID.occams_razor(test_prots)
130
- answ.each do |an|
131
- assert( an[0].is_a?(SpecID::Prot), "prots are there")
132
- end
133
- first = answ.first
134
- assert_equal( prots[0], first[0])
135
- assert_equal_array_content( prots[0].peps, first[1])
136
-
137
- require 'pp'
138
- #pp answ
139
-
140
-
141
- prots[0].peps = peps[0,4]
142
- prots[1].peps = [peps[2]] ## should be missing
143
- prots[2].peps = [] ## should be missing
144
-
145
- answ = SpecID.occams_razor(test_prots, true)
146
- #pp answ
147
-
148
-
149
- #prots[2].peps = [peps[2]]
150
- #prots[2].peps.push( peps[3] ) ## should be there since it has 2
151
- #prots[3].peps = [peps[3]] ## should be missing
152
- end
153
-
154
- def assert_equal_array_content(exp1, ans, message='')
155
- exp1.each do |item|
156
- assert(ans.include?(item), "finding #{item}: #{message}")
157
- end
158
- end
159
- end
160
-
161
-
162
- require 'fasta'
163
-
164
- class TestProteinGroups < Test::Unit::TestCase
165
-
166
- def test_small
167
- prots = []
168
-
169
- aaseq = ('A'..'Z').to_a.join('')
170
- header = "prot1"
171
- prots << Fasta::Prot.new(header, aaseq)
172
-
173
- aaseq = ('A'..'Z').to_a.reverse.join('')
174
- header = "prot1_reverse"
175
- prots << Fasta::Prot.new(header, aaseq)
176
-
177
- aaseq = ('A'..'Z').to_a.join('')
178
- header = "prot1_identical"
179
- prots << Fasta::Prot.new(header, aaseq)
180
-
181
- aaseq = ('A'..'E').to_a.join('')
182
- header = "prot1_short"
183
- prots << Fasta::Prot.new(header, aaseq)
184
-
185
- aaseq = ('A'..'E').to_a.reverse.join('')
186
- header = "prot1_reverse_short"
187
- prots << Fasta::Prot.new(header, aaseq)
188
-
189
- fasta = Fasta.new(prots)
190
-
191
- pep_seqs = %w(ABCD DEFG ABCD DEFG EDCB FEDCB XYZ RANDOM AEABA)
192
-
193
- arr = SpecID::Pep.protein_groups_by_sequence(pep_seqs, fasta)
194
-
195
- exp = [[prots[0], prots[2], prots[3]], [prots[0], prots[2]], [prots[0], prots[2], prots[3]], [prots[0],prots[2]], [prots[1], prots[4]], [prots[1]], [prots[0], prots[2]], [], []]
196
-
197
- assert_equal(exp, arr)
198
- end
199
-
200
- end
201
-
@@ -1,36 +0,0 @@
1
- require 'test/unit'
2
- require 'spec_id'
3
- require 'ostruct'
4
-
5
- class Bob
6
- include SpecIDXML
7
- def initialize(first, second)
8
- @first = first ; @second = second
9
- end
10
- end
11
-
12
-
13
- class SpecIDXMLTest < Test::Unit::TestCase
14
- include SpecIDXML
15
-
16
- def initialize(*args)
17
- super(*args)
18
- end
19
-
20
- def test_short_element_xml_from_instance_vars
21
- obj = Bob.new(1, 2)
22
- st = obj.short_element_xml_from_instance_vars("bob")
23
- assert_match(/second="2"/, st)
24
- assert_match(/first="1"/, st)
25
- assert_match(/^<bob /, st)
26
- assert_match(/>$/, st)
27
- end
28
-
29
- def test_escape_special_chars
30
- assert_equal("&amp;&gt;&lt;&quot;&apos;" , escape_special_chars("&><\"'"))
31
- assert_equal("PE&amp;PT&gt;I&lt;D&quot;E&apos;", escape_special_chars("PE&PT>I<D\"E'"))
32
- end
33
-
34
- end
35
-
36
-