mspire 0.2.4 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (233) hide show
  1. data/INSTALL +1 -0
  2. data/README +25 -0
  3. data/Rakefile +129 -40
  4. data/bin/{find_aa_freq.rb → aafreqs.rb} +2 -2
  5. data/bin/bioworks_to_pepxml.rb +1 -0
  6. data/bin/fasta_shaker.rb +1 -96
  7. data/bin/filter_and_validate.rb +5 -0
  8. data/bin/{mzxml_to_lmat.rb → ms_to_lmat.rb} +8 -7
  9. data/bin/prob_validate.rb +6 -0
  10. data/bin/raw_to_mzXML.rb +2 -2
  11. data/bin/srf_group.rb +1 -0
  12. data/bin/srf_to_sqt.rb +40 -0
  13. data/changelog.txt +68 -0
  14. data/lib/align/chams.rb +6 -6
  15. data/lib/align.rb +4 -3
  16. data/lib/bsearch.rb +120 -0
  17. data/lib/fasta.rb +318 -86
  18. data/lib/group_by.rb +10 -0
  19. data/lib/index_by.rb +11 -0
  20. data/lib/merge_deep.rb +21 -0
  21. data/lib/{spec → ms/converter}/mzxml.rb +77 -109
  22. data/lib/ms/gradient_program.rb +171 -0
  23. data/lib/ms/msrun.rb +209 -0
  24. data/lib/{spec/msrun.rb → ms/msrun_index.rb} +7 -40
  25. data/lib/ms/parser/mzdata/axml.rb +12 -0
  26. data/lib/ms/parser/mzdata/dom.rb +160 -0
  27. data/lib/ms/parser/mzdata/libxml.rb +7 -0
  28. data/lib/ms/parser/mzdata.rb +25 -0
  29. data/lib/ms/parser/mzxml/axml.rb +11 -0
  30. data/lib/ms/parser/mzxml/dom.rb +159 -0
  31. data/lib/ms/parser/mzxml/hpricot.rb +253 -0
  32. data/lib/ms/parser/mzxml/libxml.rb +15 -0
  33. data/lib/ms/parser/mzxml/regexp.rb +122 -0
  34. data/lib/ms/parser/mzxml/rexml.rb +72 -0
  35. data/lib/ms/parser/mzxml/xmlparser.rb +248 -0
  36. data/lib/ms/parser/mzxml.rb +175 -0
  37. data/lib/ms/parser.rb +108 -0
  38. data/lib/ms/precursor.rb +10 -0
  39. data/lib/ms/scan.rb +81 -0
  40. data/lib/ms/spectrum.rb +193 -0
  41. data/lib/ms.rb +10 -0
  42. data/lib/mspire.rb +4 -0
  43. data/lib/roc.rb +61 -1
  44. data/lib/sample_enzyme.rb +31 -8
  45. data/lib/scan_i.rb +21 -0
  46. data/lib/spec_id/aa_freqs.rb +7 -3
  47. data/lib/spec_id/bioworks.rb +20 -14
  48. data/lib/spec_id/digestor.rb +139 -0
  49. data/lib/spec_id/mass.rb +116 -0
  50. data/lib/spec_id/parser/proph.rb +236 -0
  51. data/lib/spec_id/precision/filter/cmdline.rb +209 -0
  52. data/lib/spec_id/precision/filter/interactive.rb +134 -0
  53. data/lib/spec_id/precision/filter/output.rb +147 -0
  54. data/lib/spec_id/precision/filter.rb +623 -0
  55. data/lib/spec_id/precision/output.rb +60 -0
  56. data/lib/spec_id/precision/prob/cmdline.rb +139 -0
  57. data/lib/spec_id/precision/prob/output.rb +88 -0
  58. data/lib/spec_id/precision/prob.rb +171 -0
  59. data/lib/spec_id/proph/pep_summary.rb +92 -0
  60. data/lib/spec_id/proph/prot_summary.rb +484 -0
  61. data/lib/spec_id/proph.rb +2 -466
  62. data/lib/spec_id/protein_summary.rb +2 -2
  63. data/lib/spec_id/sequest/params.rb +316 -0
  64. data/lib/spec_id/sequest/pepxml.rb +1513 -0
  65. data/lib/spec_id/sequest.rb +2 -1672
  66. data/lib/spec_id/srf.rb +445 -177
  67. data/lib/spec_id.rb +183 -95
  68. data/lib/spec_id_xml.rb +8 -10
  69. data/lib/transmem/phobius.rb +147 -0
  70. data/lib/transmem/toppred.rb +368 -0
  71. data/lib/transmem.rb +157 -0
  72. data/lib/validator/aa.rb +135 -0
  73. data/lib/validator/background.rb +73 -0
  74. data/lib/validator/bias.rb +95 -0
  75. data/lib/validator/cmdline.rb +260 -0
  76. data/lib/validator/decoy.rb +94 -0
  77. data/lib/validator/digestion_based.rb +69 -0
  78. data/lib/validator/probability.rb +48 -0
  79. data/lib/validator/prot_from_pep.rb +234 -0
  80. data/lib/validator/transmem.rb +272 -0
  81. data/lib/validator/true_pos.rb +46 -0
  82. data/lib/validator.rb +214 -0
  83. data/lib/xml.rb +38 -0
  84. data/lib/xml_style_parser.rb +105 -0
  85. data/lib/xmlparser_wrapper.rb +19 -0
  86. data/script/compile_and_plot_smriti_final.rb +97 -0
  87. data/script/extract_gradient_programs.rb +56 -0
  88. data/script/get_apex_values_rexml.rb +44 -0
  89. data/script/mzXML2timeIndex.rb +1 -1
  90. data/script/smriti_final_analysis.rb +103 -0
  91. data/script/toppred_to_yaml.rb +47 -0
  92. data/script/tpp_installer.rb +1 -1
  93. data/{test/tc_align.rb → specs/align_spec.rb} +21 -27
  94. data/{test/tc_bioworks_to_pepxml.rb → specs/bin/bioworks_to_pepxml_spec.rb} +25 -41
  95. data/specs/bin/fasta_shaker_spec.rb +259 -0
  96. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +202 -0
  97. data/specs/bin/filter_and_validate_spec.rb +124 -0
  98. data/specs/bin/ms_to_lmat_spec.rb +34 -0
  99. data/specs/bin/prob_validate_spec.rb +62 -0
  100. data/specs/bin/protein_summary_spec.rb +10 -0
  101. data/{test/tc_fasta.rb → specs/fasta_spec.rb} +354 -310
  102. data/specs/gi_spec.rb +22 -0
  103. data/specs/load_bin_path.rb +7 -0
  104. data/specs/merge_deep_spec.rb +13 -0
  105. data/specs/ms/gradient_program_spec.rb +77 -0
  106. data/specs/ms/msrun_spec.rb +455 -0
  107. data/specs/ms/parser_spec.rb +92 -0
  108. data/specs/ms/spectrum_spec.rb +89 -0
  109. data/specs/roc_spec.rb +251 -0
  110. data/specs/rspec_autotest.rb +149 -0
  111. data/specs/sample_enzyme_spec.rb +41 -0
  112. data/specs/spec_helper.rb +133 -0
  113. data/specs/spec_id/aa_freqs_spec.rb +52 -0
  114. data/{test/tc_bioworks.rb → specs/spec_id/bioworks_spec.rb} +56 -71
  115. data/specs/spec_id/digestor_spec.rb +75 -0
  116. data/specs/spec_id/precision/filter/cmdline_spec.rb +20 -0
  117. data/specs/spec_id/precision/filter/output_spec.rb +31 -0
  118. data/specs/spec_id/precision/filter_spec.rb +243 -0
  119. data/specs/spec_id/precision/prob_spec.rb +111 -0
  120. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  121. data/specs/spec_id/proph/pep_summary_spec.rb +143 -0
  122. data/{test/tc_proph.rb → specs/spec_id/proph/prot_summary_spec.rb} +52 -32
  123. data/{test/tc_protein_summary.rb → specs/spec_id/protein_summary_spec.rb} +85 -0
  124. data/specs/spec_id/sequest/params_spec.rb +68 -0
  125. data/specs/spec_id/sequest/pepxml_spec.rb +452 -0
  126. data/specs/spec_id/sqt_spec.rb +138 -0
  127. data/specs/spec_id/srf_spec.rb +209 -0
  128. data/specs/spec_id/srf_spec_helper.rb +302 -0
  129. data/specs/spec_id_helper.rb +33 -0
  130. data/specs/spec_id_spec.rb +361 -0
  131. data/specs/spec_id_xml_spec.rb +33 -0
  132. data/specs/transmem/phobius_spec.rb +423 -0
  133. data/specs/transmem/toppred_spec.rb +297 -0
  134. data/specs/transmem_spec.rb +60 -0
  135. data/specs/transmem_spec_shared.rb +64 -0
  136. data/specs/validator/aa_spec.rb +107 -0
  137. data/specs/validator/background_spec.rb +51 -0
  138. data/specs/validator/bias_spec.rb +146 -0
  139. data/specs/validator/decoy_spec.rb +51 -0
  140. data/specs/validator/fasta_helper.rb +26 -0
  141. data/specs/validator/prot_from_pep_spec.rb +141 -0
  142. data/specs/validator/transmem_spec.rb +145 -0
  143. data/specs/validator/true_pos_spec.rb +58 -0
  144. data/specs/validator_helper.rb +33 -0
  145. data/specs/xml_spec.rb +12 -0
  146. data/test_files/000_pepxml18_small.xml +206 -0
  147. data/test_files/020a.mzXML.timeIndex +4710 -0
  148. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +3973 -0
  149. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +3872 -0
  150. data/test_files/4-03-03_small-prot.xml +321 -0
  151. data/test_files/4-03-03_small.xml +3876 -0
  152. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  153. data/test_files/bioworks-3.3_10prots.xml +5999 -0
  154. data/test_files/bioworks31.params +77 -0
  155. data/test_files/bioworks32.params +62 -0
  156. data/test_files/bioworks33.params +63 -0
  157. data/test_files/bioworks_single_run_small.xml +7237 -0
  158. data/test_files/bioworks_small.fasta +212 -0
  159. data/test_files/bioworks_small.params +63 -0
  160. data/test_files/bioworks_small.phobius +109 -0
  161. data/test_files/bioworks_small.toppred.out +2847 -0
  162. data/test_files/bioworks_small.xml +5610 -0
  163. data/test_files/bioworks_with_INV_small.xml +3753 -0
  164. data/test_files/bioworks_with_SHUFF_small.xml +2503 -0
  165. data/test_files/corrupted_900.srf +0 -0
  166. data/test_files/head_of_7MIX.srf +0 -0
  167. data/test_files/interact-opd1_mods_small-prot.xml +304 -0
  168. data/test_files/messups.fasta +297 -0
  169. data/test_files/opd1/000.my_answer.100lines.xml +101 -0
  170. data/test_files/opd1/000.tpp_1.2.3.first10.xml +115 -0
  171. data/test_files/opd1/000.tpp_2.9.2.first10.xml +126 -0
  172. data/test_files/opd1/000.v2.1.mzXML.timeIndex +3748 -0
  173. data/test_files/opd1/000_020-prot.png +0 -0
  174. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +62 -0
  175. data/test_files/opd1/000_020_3prots-prot.xml +62 -0
  176. data/test_files/opd1/opd1_cat_inv_small-prot.xml +139 -0
  177. data/test_files/opd1/sequest.3.1.params +77 -0
  178. data/test_files/opd1/sequest.3.2.params +62 -0
  179. data/test_files/opd1/twenty_scans.mzXML +418 -0
  180. data/test_files/opd1/twenty_scans.v2.1.mzXML +382 -0
  181. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  182. data/test_files/opd1/twenty_scans_answ.lmata +9 -0
  183. data/test_files/opd1_020_beginning.RAW +0 -0
  184. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +753 -0
  185. data/test_files/orbitrap_mzData/000_cut.xml +1920 -0
  186. data/test_files/pepproph_small.xml +4691 -0
  187. data/test_files/phobius.small.noheader.txt +50 -0
  188. data/test_files/phobius.small.small.txt +53 -0
  189. data/test_files/s01_anC1_ld020mM.key.txt +25 -0
  190. data/test_files/s01_anC1_ld020mM.meth +0 -0
  191. data/test_files/small.fasta +297 -0
  192. data/test_files/smallraw.RAW +0 -0
  193. data/test_files/tf_bioworks2excel.bioXML +14340 -0
  194. data/test_files/tf_bioworks2excel.txt.actual +1035 -0
  195. data/test_files/toppred.small.out +416 -0
  196. data/test_files/toppred.xml.out +318 -0
  197. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +7 -0
  198. data/test_files/validator_hits_separate/bioworks_small_HS.xml +5651 -0
  199. data/test_files/yeast_gly_small-prot.xml +265 -0
  200. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +6 -0
  201. data/test_files/yeast_gly_small.xml +3807 -0
  202. data/test_files/yeast_gly_small2.parentTimes +6 -0
  203. metadata +273 -57
  204. data/bin/filter.rb +0 -6
  205. data/bin/precision.rb +0 -5
  206. data/lib/spec/mzdata/parser.rb +0 -108
  207. data/lib/spec/mzdata.rb +0 -48
  208. data/lib/spec/mzxml/parser.rb +0 -449
  209. data/lib/spec/scan.rb +0 -55
  210. data/lib/spec_id/filter.rb +0 -797
  211. data/lib/spec_id/precision.rb +0 -421
  212. data/lib/toppred.rb +0 -18
  213. data/script/filter-peps.rb +0 -164
  214. data/test/tc_aa_freqs.rb +0 -59
  215. data/test/tc_fasta_shaker.rb +0 -149
  216. data/test/tc_filter.rb +0 -203
  217. data/test/tc_filter_peps.rb +0 -46
  218. data/test/tc_gi.rb +0 -17
  219. data/test/tc_id_class_anal.rb +0 -70
  220. data/test/tc_id_precision.rb +0 -89
  221. data/test/tc_msrun.rb +0 -88
  222. data/test/tc_mzxml.rb +0 -88
  223. data/test/tc_mzxml_to_lmat.rb +0 -36
  224. data/test/tc_peptide_parent_times.rb +0 -27
  225. data/test/tc_precision.rb +0 -60
  226. data/test/tc_roc.rb +0 -166
  227. data/test/tc_sample_enzyme.rb +0 -32
  228. data/test/tc_scan.rb +0 -26
  229. data/test/tc_sequest.rb +0 -336
  230. data/test/tc_spec.rb +0 -78
  231. data/test/tc_spec_id.rb +0 -201
  232. data/test/tc_spec_id_xml.rb +0 -36
  233. data/test/tc_srf.rb +0 -262
@@ -0,0 +1,361 @@
1
+ require File.expand_path( File.dirname(__FILE__) + '/spec_helper' )
2
+
3
+ require 'spec_id'
4
+ require 'spec_id/srf'
5
+
6
+ # we use this to set the values of generic proteins below
7
+ require 'set_from_hash'
8
+
9
+
10
+ describe 'creating a list of proteins from peptides', :shared => true do
11
+ before(:each) do
12
+ # EXPECTS @prots and a @meth proc that takes two args, an array of
13
+ # peptides and the details of the list creation
14
+
15
+ hashes = [
16
+ {:aaseq => 'PEP0', :xcorr => 1.2, :deltacn => 0.1, :ppm => 40, :charge => 2, :prots => [prots[0],prots[1]]},
17
+ {:aaseq => 'PEP1', :xcorr => 1.3, :deltacn => 0.1, :ppm => 50, :charge => 3, :prots => [prots[1],prots[2]]},
18
+ {:aaseq => 'PEP2', :xcorr => 1.4, :deltacn => 0.1, :ppm => 50, :charge => 1, :prots => [prots[3]]},
19
+ {:aaseq => 'PEP3', :xcorr => 1.5, :deltacn => 1.1, :ppm => 20, :charge => 2, :prots => [prots[4]]},
20
+ {:aaseq => 'PEP4', :xcorr => 1.3, :deltacn => 0.1, :ppm => 20, :charge => 2, :prots => [prots[0]]},
21
+ {:aaseq => 'PEP5', :xcorr => 1.3, :deltacn => 0.1, :ppm => 40, :charge => 2, :prots => prots[1,2]},
22
+ ]
23
+
24
+ @peps = hashes.map do |hash|
25
+ SRF::OUT::Pep.new.set_from_hash(hash)
26
+ end
27
+ end
28
+
29
+ it 'compiles protein lists from peps not touching peps attr (:no_update)' do
30
+
31
+ prts = @meth.call(@peps, :no_update)
32
+ exp = (0..4).map do |n|
33
+ "prot_" + n.to_s
34
+ end
35
+ refs = prts.map {|v| v.reference }.sort
36
+ refs.should == exp
37
+ prts.each do |prt|
38
+ prt.peps.should == []
39
+ end
40
+ end
41
+
42
+ it 'compiles protein lists with updated peps attribute (:update)' do
43
+
44
+ prts = @meth.call(@peps, :update)
45
+ prot_0_before = prts.select {|v| v.reference == 'prot_0'}.first
46
+ protein_match(prts, 'prot_0', %w(PEP0 PEP4))
47
+ protein_match(prts, 'prot_1', %w(PEP0 PEP1 PEP5))
48
+ protein_match(prts, 'prot_2', %w(PEP1 PEP5))
49
+ protein_match(prts, 'prot_3', %w(PEP2))
50
+ protein_match(prts, 'prot_4', %w(PEP3))
51
+ srt_ref = prts.map {|v| v.reference}.sort
52
+ %w(prot_0 prot_1 prot_2 prot_3 prot_4).should == srt_ref # just the right number of prots
53
+ prot_0 = prts.select {|v| v.reference == 'prot_0'}.first
54
+ prot_0_before.__id__.should == prot_0.__id__ # proteins are identical
55
+
56
+
57
+ prot_0_before = prts.select {|v| v.reference == 'prot_0'}.first.__id__
58
+ end
59
+
60
+ it 'compiles protein lists of new proteins (:new)' do
61
+ prts = SpecID.protein_list(@peps, :new)
62
+ prot_0_before = prts.select {|v| v.reference == 'prot_0'}.first
63
+ protein_match(prts, 'prot_0', %w(PEP0 PEP4))
64
+ protein_match(prts, 'prot_1', %w(PEP0 PEP1 PEP5))
65
+ protein_match(prts, 'prot_2', %w(PEP1 PEP5))
66
+ protein_match(prts, 'prot_3', %w(PEP2))
67
+ protein_match(prts, 'prot_4', %w(PEP3))
68
+ srt_ref = prts.map {|v| v.reference}.sort
69
+ #assert_equal(%w(prot_0 prot_1 prot_2 prot_3 prot_4), srt_ref, "just the right number of prots")
70
+ %w(prot_0 prot_1 prot_2 prot_3 prot_4).should == srt_ref # just the right number of prots
71
+ prot_0 = prts.select {|v| v.reference == 'prot_0'}.first
72
+ #assert_not_equal(prot_0_before, prot_0.__id__, "proteins are not identical")
73
+ prot_0_before.should_not == prot_0.__id__ # proteins are not identical
74
+ end
75
+
76
+ # checks that among prts, the protein with ref has peptides with pepseqs
77
+ # aaseqs
78
+ def protein_match(prts, ref, pepseqs)
79
+ prt = prts.select{|v| v.reference == ref }.first
80
+ sorted_prt_peps_aaseqs = prt.peps.map {|v| v.aaseq }.sort
81
+ sorted_pepseqs = pepseqs.sort
82
+ pepseqs.should == sorted_prt_peps_aaseqs
83
+ end
84
+
85
+ end
86
+
87
+ describe SpecID, 'with generic proteins' do
88
+ before(:all) do
89
+ @prots = (0..7).map do |n|
90
+ SpecID::GenericProt.new.set_from_hash({:reference => "prot_"+n.to_s, :peps => []})
91
+ end
92
+ @meth = proc {|peps, kind| SpecID.protein_list(peps, kind) }
93
+ end
94
+ it_should_behave_like 'creating a list of proteins from peptides'
95
+ end
96
+
97
+ describe SpecID, 'with array based proteins' do
98
+ before(:all) do
99
+ @prots = (0..7).map do |n|
100
+ SRF::OUT::Prot.new.set_from_hash({:reference => "prot_"+n.to_s, :peps => []})
101
+ end
102
+ @meth = proc {|peps, kind| SpecID.protein_list(peps, kind) }
103
+ end
104
+ it_should_behave_like 'creating a list of proteins from peptides'
105
+ end
106
+
107
+ module Boolean ; end
108
+ class TrueClass ; include Boolean end
109
+ class FalseClass; include Boolean end
110
+
111
+ describe SpecID, 'being created' do
112
+ it 'can be from small bioworks.xml' do
113
+ sp = SpecID.new(Tfiles + '/bioworks_small.xml')
114
+ sp.prots.size.should == 106
115
+ end
116
+
117
+ it 'can be from small -prot.xml (newer prophet versions)' do
118
+ prot_xml = Tfiles + '/interact-opd1_mods_small-prot.xml'
119
+ sp = SpecID.new(prot_xml)
120
+ sp.is_a?(SpecID).should be_true
121
+ sp.is_a?(Proph::ProtSummary).should be_true
122
+ sp.prots.size.should == 20
123
+ sp.peps.size.should == 31
124
+ types = {
125
+ :protein_name => String,
126
+ :n_indistinguishable_proteins => Integer,
127
+ :probability => Float,
128
+ :percent_coverage => Float,
129
+ :unique_stripped_peptides => Array,
130
+ :group_sibling_id => String,
131
+ :total_number_peptides => Integer,
132
+ :pct_spectrum_ids => Float,
133
+ :peps => Array,
134
+ }
135
+ sp.prots.each do |prot|
136
+ types.each { |cl,tp| prot.send(cl).is_a?(tp).should be_true }
137
+ end
138
+ types = {
139
+ :aaseq => String,
140
+ :peptide_sequence => String,
141
+ :charge => Integer,
142
+ :initial_probability => Float,
143
+ :nsp_adjusted_probability => Float,
144
+ :weight => Float,
145
+ :is_nondegenerate_evidence => Boolean, # no Boolean class
146
+ :n_enzymatic_termini => Integer,
147
+ :n_sibling_peptides => Float,
148
+ :n_sibling_peptides_bin => Integer,
149
+ :n_instances => Integer,
150
+ :is_contributing_evidence => Boolean,
151
+ :calc_neutral_pep_mass => Float,
152
+ :modification_info => Object,
153
+ :mod_info => Object,
154
+ }
155
+ sp.peps.each do |pep|
156
+ types.each { |cl,tp| pep.send(cl).is_a?(tp).should be_true }
157
+ end
158
+ prot_ars = []
159
+ sp.peps.each do |pep|
160
+ if pep.prots.size > 1
161
+ prot_ars << pep.prots
162
+ end
163
+ end
164
+ prot_ars.each do |prt_ar|
165
+ prt_ar.each do |prt|
166
+ # the nils because this is a small file and their proteins are not
167
+ # found
168
+ ((prt.is_a?(SpecID::Prot) == true) or prt.nil?).should be_true
169
+ ((prt.is_a?(Proph::Prot) == true) or prt.nil?).should be_true
170
+ end
171
+ end
172
+ mod_objects = []
173
+ sp.peps.each do |pep|
174
+ if !pep.mod_info.nil?
175
+ mod_objects << pep.mod_info
176
+ end
177
+ end
178
+ # frozen
179
+ mod_objects.size.should == 23
180
+ end
181
+
182
+ spec_large do
183
+ it 'works on a large file' do
184
+ file = Tfiles_l + '/opd1_2runs_2mods/prophet/interact-opd1_mods-prot.xml'
185
+ #file = '/work/john/db_quest/verify_prophet/orbi/prophet_results/orbi_f00-prot.xml'
186
+ start = Time.now
187
+ sp = SpecID.new(file)
188
+ puts "- Took #{Time.now - start} seconds to read"
189
+ prot_ars = []
190
+ sp.peps.each do |pep|
191
+ if pep.prots.size > 1
192
+ prot_ars << pep.prots
193
+ end
194
+ end
195
+ prot_ars.each do |prt_ar|
196
+ prt_ar.each do |prt|
197
+ # the nils because this is a small file and their proteins are not
198
+ # found
199
+ prt.is_a?(SpecID::Prot).should be_true
200
+ prt.is_a?(Proph::Prot).should be_true
201
+ end
202
+ end
203
+
204
+ end
205
+ end
206
+
207
+ it_should 'can be from -prot.xml (older prophet versions)' do
208
+ prot_xml = Tfiles + '/4-03-03_small-prot.xml'
209
+ prot_xml = Tfiles + '/yeast_gly_small-prot.xml'
210
+ end
211
+ end
212
+
213
+ describe SpecID, 'class methods' do
214
+
215
+ it 'determines filetype (small files)' do
216
+ files = {
217
+ :bioworks => Tfiles + "/bioworks_small.xml",
218
+ :protproph => Tfiles + '/opd1/000_020_3prots-prot.xml',
219
+ :pepproph => Tfiles + '/opd1_2runs_2mods/interact-opd1_mods__small.xml',
220
+ :srf => Tfiles + '/head_of_7MIX.srf',
221
+ :srg => 'whatever.srg',
222
+ }
223
+ files.each do |key,val|
224
+ SpecID.file_type(val).should == key.to_s
225
+ end
226
+ ## WOULD BE NICE TO GET THIS WORKING, TOO
227
+ # assert_equal('protproph', SpecID.file_type(@old_prot_proph))
228
+ end
229
+
230
+ it 'can remove non-standard amino acids' do
231
+ hash = {"K.PEPTIDE.Z" => "K.PEPTIDE.Z", "K.*M" => "K.M", "aI" => 'I', "YI.&" => "YI.", "EI.!@#\$%^&*(){}[]|\\;:'\"<>,?/EI" => 'EI.EI'}
232
+ cl = proc {|v| SpecID::Pep.remove_non_amino_acids(v) }
233
+ hash.each do |k,v|
234
+ cl.call(k).should == v
235
+ end
236
+ end
237
+
238
+ end
239
+
240
+ describe SpecID, "determining the minimum set of proteins from pephits" do
241
+
242
+ before(:all) do
243
+ class MyProt ; include SpecID::Prot ; end
244
+ class MyPep ; include SpecID::Pep ; attr_accessor :xcorr end
245
+ end
246
+
247
+ it 'can do occams razor on small set' do
248
+
249
+ prots = (0..6).to_a.map do |n|
250
+ prot = MyProt.new
251
+ prot.reference = "ref_#{n}"
252
+ prot
253
+ end
254
+
255
+ peps = (0..12).to_a.map {|v| MyPep.new }
256
+
257
+ # 0 1 2 3 4 5 6 7 8 9 10 11 12
258
+ aaseqs = %w(AAA BBB CCC ABC AAA BBB CCC ABC DDD EEE FFF EEEEE DDD)
259
+ xcorrs = [1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 0.5, 0.6, 0.7, 0.8, 0.5]
260
+
261
+ peps.zip(aaseqs, xcorrs) do |pep,aaseq,xcorr|
262
+ pep.aaseq = aaseq
263
+ pep.xcorr = xcorr
264
+ end
265
+
266
+ prots[0].peps = peps[0,4]
267
+ prots[1].peps = [peps[2]] ## should be missing
268
+
269
+ test_prots = prots[0,2]
270
+ answ = SpecID.occams_razor(test_prots)
271
+ answ.each do |an|
272
+ an[0].is_a?(SpecID::Prot).should be_true
273
+ end
274
+ first = answ.first
275
+ first[0].should == prots[0]
276
+ equal_array_content( prots[0].peps, first[1])
277
+
278
+ require 'pp'
279
+ #pp answ
280
+
281
+
282
+ prots[0].peps = peps[0,4]
283
+ prots[1].peps = [peps[2]] ## should be missing
284
+ prots[2].peps = [] ## should be missing
285
+
286
+ answ = SpecID.occams_razor(test_prots, true)
287
+ puts '- NEED MORE tests HERE!' if $specdoc
288
+ #pp answ
289
+
290
+
291
+ #prots[2].peps = [peps[2]]
292
+ #prots[2].peps.push( peps[3] ) ## should be there since it has 2
293
+ #prots[3].peps = [peps[3]] ## should be missing
294
+ end
295
+
296
+ def equal_array_content(exp1, ans, message='')
297
+ exp1.each do |item|
298
+ ans.should include(item)
299
+ end
300
+ end
301
+
302
+
303
+ end
304
+
305
+
306
+ require 'fasta'
307
+
308
+ describe SpecID::Pep, "with a small fasta object" do
309
+ before(:each) do
310
+ @prots = []
311
+
312
+ aaseq = ('A'..'Z').to_a.join('')
313
+ header = "prot1"
314
+ @prots << Fasta::Prot.new(header, aaseq)
315
+
316
+ aaseq = ('A'..'Z').to_a.reverse.join('')
317
+ header = "prot1_reverse"
318
+ @prots << Fasta::Prot.new(header, aaseq)
319
+
320
+ aaseq = ('A'..'Z').to_a.join('')
321
+ header = "prot1_identical"
322
+ @prots << Fasta::Prot.new(header, aaseq)
323
+
324
+ aaseq = ('A'..'E').to_a.join('')
325
+ header = "prot1_short"
326
+ @prots << Fasta::Prot.new(header, aaseq)
327
+
328
+ aaseq = ('A'..'E').to_a.reverse.join('')
329
+ header = "prot1_reverse_short"
330
+ @prots << Fasta::Prot.new(header, aaseq)
331
+
332
+ @fasta = Fasta.new(@prots)
333
+
334
+ end
335
+ it "can find protein groups from a fasta object" do
336
+ pep_seqs = %w(ABCD DEFG ABCD DEFG EDCB FEDCB XYZ RANDOM AEABA)
337
+ arr = SpecID::Pep.protein_groups_by_sequence(pep_seqs, @fasta)
338
+
339
+ prots = @prots
340
+ exp = [[prots[0], prots[2], prots[3]], [prots[0], prots[2]], [prots[0], prots[2], prots[3]], [prots[0],prots[2]], [prots[1], prots[4]], [prots[1]], [prots[0], prots[2]], [], []]
341
+
342
+ arr.should == exp
343
+ end
344
+ end
345
+
346
+
347
+ ###########################
348
+ # old tests
349
+ ###########################
350
+
351
+ =begin
352
+ def test_classify_by_false_flag
353
+ file = @tfiles + "bioworks_with_INV_small.xml"
354
+ sp = SpecID.new(file)
355
+ assert_equal(19, sp.prots.size)
356
+ (tp, fp) = sp.classify_by_false_flag(:prots, "INV_", true, true)
357
+ assert_equal(4, fp.size, "num false pos")
358
+ assert_equal(15, tp.size, "num true pos")
359
+ end
360
+
361
+ =end
@@ -0,0 +1,33 @@
1
+ require File.expand_path( File.dirname(__FILE__) + '/spec_helper' )
2
+
3
+ require 'spec_id_xml'
4
+
5
+ describe SpecIDXML, 'included with a simple object' do
6
+ before(:all) do
7
+ class Bob
8
+ include SpecIDXML
9
+ def initialize(first=nil, second=nil)
10
+ @first = first ; @second = second
11
+ end
12
+ end
13
+ end
14
+
15
+ it 'creates short element xmls using an objects instance variables' do
16
+ obj = Bob.new(1, 2)
17
+ st = obj.short_element_xml_from_instance_vars("bob")
18
+ # the ordering is arbitrary: "<bob first=\"1\" second=\"2\"/>\n"
19
+ st.should =~ /second="2"/
20
+ st.should =~ /first="1"/
21
+ st.should =~ /^<bob /
22
+ st.should =~ />$/
23
+ end
24
+
25
+ it 'escapes special characters' do
26
+ obj = Bob.new
27
+ obj.escape_special_chars("&><\"'").should == "&amp;&gt;&lt;&quot;&apos;"
28
+ obj.escape_special_chars("PE&PT>I<D\"E'").should == "PE&amp;PT&gt;I&lt;D&quot;E&apos;"
29
+ end
30
+
31
+ end
32
+
33
+