mspire 0.2.4 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (233) hide show
  1. data/INSTALL +1 -0
  2. data/README +25 -0
  3. data/Rakefile +129 -40
  4. data/bin/{find_aa_freq.rb → aafreqs.rb} +2 -2
  5. data/bin/bioworks_to_pepxml.rb +1 -0
  6. data/bin/fasta_shaker.rb +1 -96
  7. data/bin/filter_and_validate.rb +5 -0
  8. data/bin/{mzxml_to_lmat.rb → ms_to_lmat.rb} +8 -7
  9. data/bin/prob_validate.rb +6 -0
  10. data/bin/raw_to_mzXML.rb +2 -2
  11. data/bin/srf_group.rb +1 -0
  12. data/bin/srf_to_sqt.rb +40 -0
  13. data/changelog.txt +68 -0
  14. data/lib/align/chams.rb +6 -6
  15. data/lib/align.rb +4 -3
  16. data/lib/bsearch.rb +120 -0
  17. data/lib/fasta.rb +318 -86
  18. data/lib/group_by.rb +10 -0
  19. data/lib/index_by.rb +11 -0
  20. data/lib/merge_deep.rb +21 -0
  21. data/lib/{spec → ms/converter}/mzxml.rb +77 -109
  22. data/lib/ms/gradient_program.rb +171 -0
  23. data/lib/ms/msrun.rb +209 -0
  24. data/lib/{spec/msrun.rb → ms/msrun_index.rb} +7 -40
  25. data/lib/ms/parser/mzdata/axml.rb +12 -0
  26. data/lib/ms/parser/mzdata/dom.rb +160 -0
  27. data/lib/ms/parser/mzdata/libxml.rb +7 -0
  28. data/lib/ms/parser/mzdata.rb +25 -0
  29. data/lib/ms/parser/mzxml/axml.rb +11 -0
  30. data/lib/ms/parser/mzxml/dom.rb +159 -0
  31. data/lib/ms/parser/mzxml/hpricot.rb +253 -0
  32. data/lib/ms/parser/mzxml/libxml.rb +15 -0
  33. data/lib/ms/parser/mzxml/regexp.rb +122 -0
  34. data/lib/ms/parser/mzxml/rexml.rb +72 -0
  35. data/lib/ms/parser/mzxml/xmlparser.rb +248 -0
  36. data/lib/ms/parser/mzxml.rb +175 -0
  37. data/lib/ms/parser.rb +108 -0
  38. data/lib/ms/precursor.rb +10 -0
  39. data/lib/ms/scan.rb +81 -0
  40. data/lib/ms/spectrum.rb +193 -0
  41. data/lib/ms.rb +10 -0
  42. data/lib/mspire.rb +4 -0
  43. data/lib/roc.rb +61 -1
  44. data/lib/sample_enzyme.rb +31 -8
  45. data/lib/scan_i.rb +21 -0
  46. data/lib/spec_id/aa_freqs.rb +7 -3
  47. data/lib/spec_id/bioworks.rb +20 -14
  48. data/lib/spec_id/digestor.rb +139 -0
  49. data/lib/spec_id/mass.rb +116 -0
  50. data/lib/spec_id/parser/proph.rb +236 -0
  51. data/lib/spec_id/precision/filter/cmdline.rb +209 -0
  52. data/lib/spec_id/precision/filter/interactive.rb +134 -0
  53. data/lib/spec_id/precision/filter/output.rb +147 -0
  54. data/lib/spec_id/precision/filter.rb +623 -0
  55. data/lib/spec_id/precision/output.rb +60 -0
  56. data/lib/spec_id/precision/prob/cmdline.rb +139 -0
  57. data/lib/spec_id/precision/prob/output.rb +88 -0
  58. data/lib/spec_id/precision/prob.rb +171 -0
  59. data/lib/spec_id/proph/pep_summary.rb +92 -0
  60. data/lib/spec_id/proph/prot_summary.rb +484 -0
  61. data/lib/spec_id/proph.rb +2 -466
  62. data/lib/spec_id/protein_summary.rb +2 -2
  63. data/lib/spec_id/sequest/params.rb +316 -0
  64. data/lib/spec_id/sequest/pepxml.rb +1513 -0
  65. data/lib/spec_id/sequest.rb +2 -1672
  66. data/lib/spec_id/srf.rb +445 -177
  67. data/lib/spec_id.rb +183 -95
  68. data/lib/spec_id_xml.rb +8 -10
  69. data/lib/transmem/phobius.rb +147 -0
  70. data/lib/transmem/toppred.rb +368 -0
  71. data/lib/transmem.rb +157 -0
  72. data/lib/validator/aa.rb +135 -0
  73. data/lib/validator/background.rb +73 -0
  74. data/lib/validator/bias.rb +95 -0
  75. data/lib/validator/cmdline.rb +260 -0
  76. data/lib/validator/decoy.rb +94 -0
  77. data/lib/validator/digestion_based.rb +69 -0
  78. data/lib/validator/probability.rb +48 -0
  79. data/lib/validator/prot_from_pep.rb +234 -0
  80. data/lib/validator/transmem.rb +272 -0
  81. data/lib/validator/true_pos.rb +46 -0
  82. data/lib/validator.rb +214 -0
  83. data/lib/xml.rb +38 -0
  84. data/lib/xml_style_parser.rb +105 -0
  85. data/lib/xmlparser_wrapper.rb +19 -0
  86. data/script/compile_and_plot_smriti_final.rb +97 -0
  87. data/script/extract_gradient_programs.rb +56 -0
  88. data/script/get_apex_values_rexml.rb +44 -0
  89. data/script/mzXML2timeIndex.rb +1 -1
  90. data/script/smriti_final_analysis.rb +103 -0
  91. data/script/toppred_to_yaml.rb +47 -0
  92. data/script/tpp_installer.rb +1 -1
  93. data/{test/tc_align.rb → specs/align_spec.rb} +21 -27
  94. data/{test/tc_bioworks_to_pepxml.rb → specs/bin/bioworks_to_pepxml_spec.rb} +25 -41
  95. data/specs/bin/fasta_shaker_spec.rb +259 -0
  96. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +202 -0
  97. data/specs/bin/filter_and_validate_spec.rb +124 -0
  98. data/specs/bin/ms_to_lmat_spec.rb +34 -0
  99. data/specs/bin/prob_validate_spec.rb +62 -0
  100. data/specs/bin/protein_summary_spec.rb +10 -0
  101. data/{test/tc_fasta.rb → specs/fasta_spec.rb} +354 -310
  102. data/specs/gi_spec.rb +22 -0
  103. data/specs/load_bin_path.rb +7 -0
  104. data/specs/merge_deep_spec.rb +13 -0
  105. data/specs/ms/gradient_program_spec.rb +77 -0
  106. data/specs/ms/msrun_spec.rb +455 -0
  107. data/specs/ms/parser_spec.rb +92 -0
  108. data/specs/ms/spectrum_spec.rb +89 -0
  109. data/specs/roc_spec.rb +251 -0
  110. data/specs/rspec_autotest.rb +149 -0
  111. data/specs/sample_enzyme_spec.rb +41 -0
  112. data/specs/spec_helper.rb +133 -0
  113. data/specs/spec_id/aa_freqs_spec.rb +52 -0
  114. data/{test/tc_bioworks.rb → specs/spec_id/bioworks_spec.rb} +56 -71
  115. data/specs/spec_id/digestor_spec.rb +75 -0
  116. data/specs/spec_id/precision/filter/cmdline_spec.rb +20 -0
  117. data/specs/spec_id/precision/filter/output_spec.rb +31 -0
  118. data/specs/spec_id/precision/filter_spec.rb +243 -0
  119. data/specs/spec_id/precision/prob_spec.rb +111 -0
  120. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  121. data/specs/spec_id/proph/pep_summary_spec.rb +143 -0
  122. data/{test/tc_proph.rb → specs/spec_id/proph/prot_summary_spec.rb} +52 -32
  123. data/{test/tc_protein_summary.rb → specs/spec_id/protein_summary_spec.rb} +85 -0
  124. data/specs/spec_id/sequest/params_spec.rb +68 -0
  125. data/specs/spec_id/sequest/pepxml_spec.rb +452 -0
  126. data/specs/spec_id/sqt_spec.rb +138 -0
  127. data/specs/spec_id/srf_spec.rb +209 -0
  128. data/specs/spec_id/srf_spec_helper.rb +302 -0
  129. data/specs/spec_id_helper.rb +33 -0
  130. data/specs/spec_id_spec.rb +361 -0
  131. data/specs/spec_id_xml_spec.rb +33 -0
  132. data/specs/transmem/phobius_spec.rb +423 -0
  133. data/specs/transmem/toppred_spec.rb +297 -0
  134. data/specs/transmem_spec.rb +60 -0
  135. data/specs/transmem_spec_shared.rb +64 -0
  136. data/specs/validator/aa_spec.rb +107 -0
  137. data/specs/validator/background_spec.rb +51 -0
  138. data/specs/validator/bias_spec.rb +146 -0
  139. data/specs/validator/decoy_spec.rb +51 -0
  140. data/specs/validator/fasta_helper.rb +26 -0
  141. data/specs/validator/prot_from_pep_spec.rb +141 -0
  142. data/specs/validator/transmem_spec.rb +145 -0
  143. data/specs/validator/true_pos_spec.rb +58 -0
  144. data/specs/validator_helper.rb +33 -0
  145. data/specs/xml_spec.rb +12 -0
  146. data/test_files/000_pepxml18_small.xml +206 -0
  147. data/test_files/020a.mzXML.timeIndex +4710 -0
  148. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +3973 -0
  149. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +3872 -0
  150. data/test_files/4-03-03_small-prot.xml +321 -0
  151. data/test_files/4-03-03_small.xml +3876 -0
  152. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  153. data/test_files/bioworks-3.3_10prots.xml +5999 -0
  154. data/test_files/bioworks31.params +77 -0
  155. data/test_files/bioworks32.params +62 -0
  156. data/test_files/bioworks33.params +63 -0
  157. data/test_files/bioworks_single_run_small.xml +7237 -0
  158. data/test_files/bioworks_small.fasta +212 -0
  159. data/test_files/bioworks_small.params +63 -0
  160. data/test_files/bioworks_small.phobius +109 -0
  161. data/test_files/bioworks_small.toppred.out +2847 -0
  162. data/test_files/bioworks_small.xml +5610 -0
  163. data/test_files/bioworks_with_INV_small.xml +3753 -0
  164. data/test_files/bioworks_with_SHUFF_small.xml +2503 -0
  165. data/test_files/corrupted_900.srf +0 -0
  166. data/test_files/head_of_7MIX.srf +0 -0
  167. data/test_files/interact-opd1_mods_small-prot.xml +304 -0
  168. data/test_files/messups.fasta +297 -0
  169. data/test_files/opd1/000.my_answer.100lines.xml +101 -0
  170. data/test_files/opd1/000.tpp_1.2.3.first10.xml +115 -0
  171. data/test_files/opd1/000.tpp_2.9.2.first10.xml +126 -0
  172. data/test_files/opd1/000.v2.1.mzXML.timeIndex +3748 -0
  173. data/test_files/opd1/000_020-prot.png +0 -0
  174. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +62 -0
  175. data/test_files/opd1/000_020_3prots-prot.xml +62 -0
  176. data/test_files/opd1/opd1_cat_inv_small-prot.xml +139 -0
  177. data/test_files/opd1/sequest.3.1.params +77 -0
  178. data/test_files/opd1/sequest.3.2.params +62 -0
  179. data/test_files/opd1/twenty_scans.mzXML +418 -0
  180. data/test_files/opd1/twenty_scans.v2.1.mzXML +382 -0
  181. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  182. data/test_files/opd1/twenty_scans_answ.lmata +9 -0
  183. data/test_files/opd1_020_beginning.RAW +0 -0
  184. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +753 -0
  185. data/test_files/orbitrap_mzData/000_cut.xml +1920 -0
  186. data/test_files/pepproph_small.xml +4691 -0
  187. data/test_files/phobius.small.noheader.txt +50 -0
  188. data/test_files/phobius.small.small.txt +53 -0
  189. data/test_files/s01_anC1_ld020mM.key.txt +25 -0
  190. data/test_files/s01_anC1_ld020mM.meth +0 -0
  191. data/test_files/small.fasta +297 -0
  192. data/test_files/smallraw.RAW +0 -0
  193. data/test_files/tf_bioworks2excel.bioXML +14340 -0
  194. data/test_files/tf_bioworks2excel.txt.actual +1035 -0
  195. data/test_files/toppred.small.out +416 -0
  196. data/test_files/toppred.xml.out +318 -0
  197. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +7 -0
  198. data/test_files/validator_hits_separate/bioworks_small_HS.xml +5651 -0
  199. data/test_files/yeast_gly_small-prot.xml +265 -0
  200. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +6 -0
  201. data/test_files/yeast_gly_small.xml +3807 -0
  202. data/test_files/yeast_gly_small2.parentTimes +6 -0
  203. metadata +273 -57
  204. data/bin/filter.rb +0 -6
  205. data/bin/precision.rb +0 -5
  206. data/lib/spec/mzdata/parser.rb +0 -108
  207. data/lib/spec/mzdata.rb +0 -48
  208. data/lib/spec/mzxml/parser.rb +0 -449
  209. data/lib/spec/scan.rb +0 -55
  210. data/lib/spec_id/filter.rb +0 -797
  211. data/lib/spec_id/precision.rb +0 -421
  212. data/lib/toppred.rb +0 -18
  213. data/script/filter-peps.rb +0 -164
  214. data/test/tc_aa_freqs.rb +0 -59
  215. data/test/tc_fasta_shaker.rb +0 -149
  216. data/test/tc_filter.rb +0 -203
  217. data/test/tc_filter_peps.rb +0 -46
  218. data/test/tc_gi.rb +0 -17
  219. data/test/tc_id_class_anal.rb +0 -70
  220. data/test/tc_id_precision.rb +0 -89
  221. data/test/tc_msrun.rb +0 -88
  222. data/test/tc_mzxml.rb +0 -88
  223. data/test/tc_mzxml_to_lmat.rb +0 -36
  224. data/test/tc_peptide_parent_times.rb +0 -27
  225. data/test/tc_precision.rb +0 -60
  226. data/test/tc_roc.rb +0 -166
  227. data/test/tc_sample_enzyme.rb +0 -32
  228. data/test/tc_scan.rb +0 -26
  229. data/test/tc_sequest.rb +0 -336
  230. data/test/tc_spec.rb +0 -78
  231. data/test/tc_spec_id.rb +0 -201
  232. data/test/tc_spec_id_xml.rb +0 -36
  233. data/test/tc_srf.rb +0 -262
@@ -0,0 +1,361 @@
1
+ require File.expand_path( File.dirname(__FILE__) + '/spec_helper' )
2
+
3
+ require 'spec_id'
4
+ require 'spec_id/srf'
5
+
6
+ # we use this to set the values of generic proteins below
7
+ require 'set_from_hash'
8
+
9
+
10
+ describe 'creating a list of proteins from peptides', :shared => true do
11
+ before(:each) do
12
+ # EXPECTS @prots and a @meth proc that takes two args, an array of
13
+ # peptides and the details of the list creation
14
+
15
+ hashes = [
16
+ {:aaseq => 'PEP0', :xcorr => 1.2, :deltacn => 0.1, :ppm => 40, :charge => 2, :prots => [prots[0],prots[1]]},
17
+ {:aaseq => 'PEP1', :xcorr => 1.3, :deltacn => 0.1, :ppm => 50, :charge => 3, :prots => [prots[1],prots[2]]},
18
+ {:aaseq => 'PEP2', :xcorr => 1.4, :deltacn => 0.1, :ppm => 50, :charge => 1, :prots => [prots[3]]},
19
+ {:aaseq => 'PEP3', :xcorr => 1.5, :deltacn => 1.1, :ppm => 20, :charge => 2, :prots => [prots[4]]},
20
+ {:aaseq => 'PEP4', :xcorr => 1.3, :deltacn => 0.1, :ppm => 20, :charge => 2, :prots => [prots[0]]},
21
+ {:aaseq => 'PEP5', :xcorr => 1.3, :deltacn => 0.1, :ppm => 40, :charge => 2, :prots => prots[1,2]},
22
+ ]
23
+
24
+ @peps = hashes.map do |hash|
25
+ SRF::OUT::Pep.new.set_from_hash(hash)
26
+ end
27
+ end
28
+
29
+ it 'compiles protein lists from peps not touching peps attr (:no_update)' do
30
+
31
+ prts = @meth.call(@peps, :no_update)
32
+ exp = (0..4).map do |n|
33
+ "prot_" + n.to_s
34
+ end
35
+ refs = prts.map {|v| v.reference }.sort
36
+ refs.should == exp
37
+ prts.each do |prt|
38
+ prt.peps.should == []
39
+ end
40
+ end
41
+
42
+ it 'compiles protein lists with updated peps attribute (:update)' do
43
+
44
+ prts = @meth.call(@peps, :update)
45
+ prot_0_before = prts.select {|v| v.reference == 'prot_0'}.first
46
+ protein_match(prts, 'prot_0', %w(PEP0 PEP4))
47
+ protein_match(prts, 'prot_1', %w(PEP0 PEP1 PEP5))
48
+ protein_match(prts, 'prot_2', %w(PEP1 PEP5))
49
+ protein_match(prts, 'prot_3', %w(PEP2))
50
+ protein_match(prts, 'prot_4', %w(PEP3))
51
+ srt_ref = prts.map {|v| v.reference}.sort
52
+ %w(prot_0 prot_1 prot_2 prot_3 prot_4).should == srt_ref # just the right number of prots
53
+ prot_0 = prts.select {|v| v.reference == 'prot_0'}.first
54
+ prot_0_before.__id__.should == prot_0.__id__ # proteins are identical
55
+
56
+
57
+ prot_0_before = prts.select {|v| v.reference == 'prot_0'}.first.__id__
58
+ end
59
+
60
+ it 'compiles protein lists of new proteins (:new)' do
61
+ prts = SpecID.protein_list(@peps, :new)
62
+ prot_0_before = prts.select {|v| v.reference == 'prot_0'}.first
63
+ protein_match(prts, 'prot_0', %w(PEP0 PEP4))
64
+ protein_match(prts, 'prot_1', %w(PEP0 PEP1 PEP5))
65
+ protein_match(prts, 'prot_2', %w(PEP1 PEP5))
66
+ protein_match(prts, 'prot_3', %w(PEP2))
67
+ protein_match(prts, 'prot_4', %w(PEP3))
68
+ srt_ref = prts.map {|v| v.reference}.sort
69
+ #assert_equal(%w(prot_0 prot_1 prot_2 prot_3 prot_4), srt_ref, "just the right number of prots")
70
+ %w(prot_0 prot_1 prot_2 prot_3 prot_4).should == srt_ref # just the right number of prots
71
+ prot_0 = prts.select {|v| v.reference == 'prot_0'}.first
72
+ #assert_not_equal(prot_0_before, prot_0.__id__, "proteins are not identical")
73
+ prot_0_before.should_not == prot_0.__id__ # proteins are not identical
74
+ end
75
+
76
+ # checks that among prts, the protein with ref has peptides with pepseqs
77
+ # aaseqs
78
+ def protein_match(prts, ref, pepseqs)
79
+ prt = prts.select{|v| v.reference == ref }.first
80
+ sorted_prt_peps_aaseqs = prt.peps.map {|v| v.aaseq }.sort
81
+ sorted_pepseqs = pepseqs.sort
82
+ pepseqs.should == sorted_prt_peps_aaseqs
83
+ end
84
+
85
+ end
86
+
87
+ describe SpecID, 'with generic proteins' do
88
+ before(:all) do
89
+ @prots = (0..7).map do |n|
90
+ SpecID::GenericProt.new.set_from_hash({:reference => "prot_"+n.to_s, :peps => []})
91
+ end
92
+ @meth = proc {|peps, kind| SpecID.protein_list(peps, kind) }
93
+ end
94
+ it_should_behave_like 'creating a list of proteins from peptides'
95
+ end
96
+
97
+ describe SpecID, 'with array based proteins' do
98
+ before(:all) do
99
+ @prots = (0..7).map do |n|
100
+ SRF::OUT::Prot.new.set_from_hash({:reference => "prot_"+n.to_s, :peps => []})
101
+ end
102
+ @meth = proc {|peps, kind| SpecID.protein_list(peps, kind) }
103
+ end
104
+ it_should_behave_like 'creating a list of proteins from peptides'
105
+ end
106
+
107
+ module Boolean ; end
108
+ class TrueClass ; include Boolean end
109
+ class FalseClass; include Boolean end
110
+
111
+ describe SpecID, 'being created' do
112
+ it 'can be from small bioworks.xml' do
113
+ sp = SpecID.new(Tfiles + '/bioworks_small.xml')
114
+ sp.prots.size.should == 106
115
+ end
116
+
117
+ it 'can be from small -prot.xml (newer prophet versions)' do
118
+ prot_xml = Tfiles + '/interact-opd1_mods_small-prot.xml'
119
+ sp = SpecID.new(prot_xml)
120
+ sp.is_a?(SpecID).should be_true
121
+ sp.is_a?(Proph::ProtSummary).should be_true
122
+ sp.prots.size.should == 20
123
+ sp.peps.size.should == 31
124
+ types = {
125
+ :protein_name => String,
126
+ :n_indistinguishable_proteins => Integer,
127
+ :probability => Float,
128
+ :percent_coverage => Float,
129
+ :unique_stripped_peptides => Array,
130
+ :group_sibling_id => String,
131
+ :total_number_peptides => Integer,
132
+ :pct_spectrum_ids => Float,
133
+ :peps => Array,
134
+ }
135
+ sp.prots.each do |prot|
136
+ types.each { |cl,tp| prot.send(cl).is_a?(tp).should be_true }
137
+ end
138
+ types = {
139
+ :aaseq => String,
140
+ :peptide_sequence => String,
141
+ :charge => Integer,
142
+ :initial_probability => Float,
143
+ :nsp_adjusted_probability => Float,
144
+ :weight => Float,
145
+ :is_nondegenerate_evidence => Boolean, # no Boolean class
146
+ :n_enzymatic_termini => Integer,
147
+ :n_sibling_peptides => Float,
148
+ :n_sibling_peptides_bin => Integer,
149
+ :n_instances => Integer,
150
+ :is_contributing_evidence => Boolean,
151
+ :calc_neutral_pep_mass => Float,
152
+ :modification_info => Object,
153
+ :mod_info => Object,
154
+ }
155
+ sp.peps.each do |pep|
156
+ types.each { |cl,tp| pep.send(cl).is_a?(tp).should be_true }
157
+ end
158
+ prot_ars = []
159
+ sp.peps.each do |pep|
160
+ if pep.prots.size > 1
161
+ prot_ars << pep.prots
162
+ end
163
+ end
164
+ prot_ars.each do |prt_ar|
165
+ prt_ar.each do |prt|
166
+ # the nils because this is a small file and their proteins are not
167
+ # found
168
+ ((prt.is_a?(SpecID::Prot) == true) or prt.nil?).should be_true
169
+ ((prt.is_a?(Proph::Prot) == true) or prt.nil?).should be_true
170
+ end
171
+ end
172
+ mod_objects = []
173
+ sp.peps.each do |pep|
174
+ if !pep.mod_info.nil?
175
+ mod_objects << pep.mod_info
176
+ end
177
+ end
178
+ # frozen
179
+ mod_objects.size.should == 23
180
+ end
181
+
182
+ spec_large do
183
+ it 'works on a large file' do
184
+ file = Tfiles_l + '/opd1_2runs_2mods/prophet/interact-opd1_mods-prot.xml'
185
+ #file = '/work/john/db_quest/verify_prophet/orbi/prophet_results/orbi_f00-prot.xml'
186
+ start = Time.now
187
+ sp = SpecID.new(file)
188
+ puts "- Took #{Time.now - start} seconds to read"
189
+ prot_ars = []
190
+ sp.peps.each do |pep|
191
+ if pep.prots.size > 1
192
+ prot_ars << pep.prots
193
+ end
194
+ end
195
+ prot_ars.each do |prt_ar|
196
+ prt_ar.each do |prt|
197
+ # the nils because this is a small file and their proteins are not
198
+ # found
199
+ prt.is_a?(SpecID::Prot).should be_true
200
+ prt.is_a?(Proph::Prot).should be_true
201
+ end
202
+ end
203
+
204
+ end
205
+ end
206
+
207
+ it_should 'can be from -prot.xml (older prophet versions)' do
208
+ prot_xml = Tfiles + '/4-03-03_small-prot.xml'
209
+ prot_xml = Tfiles + '/yeast_gly_small-prot.xml'
210
+ end
211
+ end
212
+
213
+ describe SpecID, 'class methods' do
214
+
215
+ it 'determines filetype (small files)' do
216
+ files = {
217
+ :bioworks => Tfiles + "/bioworks_small.xml",
218
+ :protproph => Tfiles + '/opd1/000_020_3prots-prot.xml',
219
+ :pepproph => Tfiles + '/opd1_2runs_2mods/interact-opd1_mods__small.xml',
220
+ :srf => Tfiles + '/head_of_7MIX.srf',
221
+ :srg => 'whatever.srg',
222
+ }
223
+ files.each do |key,val|
224
+ SpecID.file_type(val).should == key.to_s
225
+ end
226
+ ## WOULD BE NICE TO GET THIS WORKING, TOO
227
+ # assert_equal('protproph', SpecID.file_type(@old_prot_proph))
228
+ end
229
+
230
+ it 'can remove non-standard amino acids' do
231
+ hash = {"K.PEPTIDE.Z" => "K.PEPTIDE.Z", "K.*M" => "K.M", "aI" => 'I', "YI.&" => "YI.", "EI.!@#\$%^&*(){}[]|\\;:'\"<>,?/EI" => 'EI.EI'}
232
+ cl = proc {|v| SpecID::Pep.remove_non_amino_acids(v) }
233
+ hash.each do |k,v|
234
+ cl.call(k).should == v
235
+ end
236
+ end
237
+
238
+ end
239
+
240
+ describe SpecID, "determining the minimum set of proteins from pephits" do
241
+
242
+ before(:all) do
243
+ class MyProt ; include SpecID::Prot ; end
244
+ class MyPep ; include SpecID::Pep ; attr_accessor :xcorr end
245
+ end
246
+
247
+ it 'can do occams razor on small set' do
248
+
249
+ prots = (0..6).to_a.map do |n|
250
+ prot = MyProt.new
251
+ prot.reference = "ref_#{n}"
252
+ prot
253
+ end
254
+
255
+ peps = (0..12).to_a.map {|v| MyPep.new }
256
+
257
+ # 0 1 2 3 4 5 6 7 8 9 10 11 12
258
+ aaseqs = %w(AAA BBB CCC ABC AAA BBB CCC ABC DDD EEE FFF EEEEE DDD)
259
+ xcorrs = [1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 0.5, 0.6, 0.7, 0.8, 0.5]
260
+
261
+ peps.zip(aaseqs, xcorrs) do |pep,aaseq,xcorr|
262
+ pep.aaseq = aaseq
263
+ pep.xcorr = xcorr
264
+ end
265
+
266
+ prots[0].peps = peps[0,4]
267
+ prots[1].peps = [peps[2]] ## should be missing
268
+
269
+ test_prots = prots[0,2]
270
+ answ = SpecID.occams_razor(test_prots)
271
+ answ.each do |an|
272
+ an[0].is_a?(SpecID::Prot).should be_true
273
+ end
274
+ first = answ.first
275
+ first[0].should == prots[0]
276
+ equal_array_content( prots[0].peps, first[1])
277
+
278
+ require 'pp'
279
+ #pp answ
280
+
281
+
282
+ prots[0].peps = peps[0,4]
283
+ prots[1].peps = [peps[2]] ## should be missing
284
+ prots[2].peps = [] ## should be missing
285
+
286
+ answ = SpecID.occams_razor(test_prots, true)
287
+ puts '- NEED MORE tests HERE!' if $specdoc
288
+ #pp answ
289
+
290
+
291
+ #prots[2].peps = [peps[2]]
292
+ #prots[2].peps.push( peps[3] ) ## should be there since it has 2
293
+ #prots[3].peps = [peps[3]] ## should be missing
294
+ end
295
+
296
+ def equal_array_content(exp1, ans, message='')
297
+ exp1.each do |item|
298
+ ans.should include(item)
299
+ end
300
+ end
301
+
302
+
303
+ end
304
+
305
+
306
+ require 'fasta'
307
+
308
+ describe SpecID::Pep, "with a small fasta object" do
309
+ before(:each) do
310
+ @prots = []
311
+
312
+ aaseq = ('A'..'Z').to_a.join('')
313
+ header = "prot1"
314
+ @prots << Fasta::Prot.new(header, aaseq)
315
+
316
+ aaseq = ('A'..'Z').to_a.reverse.join('')
317
+ header = "prot1_reverse"
318
+ @prots << Fasta::Prot.new(header, aaseq)
319
+
320
+ aaseq = ('A'..'Z').to_a.join('')
321
+ header = "prot1_identical"
322
+ @prots << Fasta::Prot.new(header, aaseq)
323
+
324
+ aaseq = ('A'..'E').to_a.join('')
325
+ header = "prot1_short"
326
+ @prots << Fasta::Prot.new(header, aaseq)
327
+
328
+ aaseq = ('A'..'E').to_a.reverse.join('')
329
+ header = "prot1_reverse_short"
330
+ @prots << Fasta::Prot.new(header, aaseq)
331
+
332
+ @fasta = Fasta.new(@prots)
333
+
334
+ end
335
+ it "can find protein groups from a fasta object" do
336
+ pep_seqs = %w(ABCD DEFG ABCD DEFG EDCB FEDCB XYZ RANDOM AEABA)
337
+ arr = SpecID::Pep.protein_groups_by_sequence(pep_seqs, @fasta)
338
+
339
+ prots = @prots
340
+ exp = [[prots[0], prots[2], prots[3]], [prots[0], prots[2]], [prots[0], prots[2], prots[3]], [prots[0],prots[2]], [prots[1], prots[4]], [prots[1]], [prots[0], prots[2]], [], []]
341
+
342
+ arr.should == exp
343
+ end
344
+ end
345
+
346
+
347
+ ###########################
348
+ # old tests
349
+ ###########################
350
+
351
+ =begin
352
+ def test_classify_by_false_flag
353
+ file = @tfiles + "bioworks_with_INV_small.xml"
354
+ sp = SpecID.new(file)
355
+ assert_equal(19, sp.prots.size)
356
+ (tp, fp) = sp.classify_by_false_flag(:prots, "INV_", true, true)
357
+ assert_equal(4, fp.size, "num false pos")
358
+ assert_equal(15, tp.size, "num true pos")
359
+ end
360
+
361
+ =end
@@ -0,0 +1,33 @@
1
+ require File.expand_path( File.dirname(__FILE__) + '/spec_helper' )
2
+
3
+ require 'spec_id_xml'
4
+
5
+ describe SpecIDXML, 'included with a simple object' do
6
+ before(:all) do
7
+ class Bob
8
+ include SpecIDXML
9
+ def initialize(first=nil, second=nil)
10
+ @first = first ; @second = second
11
+ end
12
+ end
13
+ end
14
+
15
+ it 'creates short element xmls using an objects instance variables' do
16
+ obj = Bob.new(1, 2)
17
+ st = obj.short_element_xml_from_instance_vars("bob")
18
+ # the ordering is arbitrary: "<bob first=\"1\" second=\"2\"/>\n"
19
+ st.should =~ /second="2"/
20
+ st.should =~ /first="1"/
21
+ st.should =~ /^<bob /
22
+ st.should =~ />$/
23
+ end
24
+
25
+ it 'escapes special characters' do
26
+ obj = Bob.new
27
+ obj.escape_special_chars("&><\"'").should == "&amp;&gt;&lt;&quot;&apos;"
28
+ obj.escape_special_chars("PE&PT>I<D\"E'").should == "PE&amp;PT&gt;I&lt;D&quot;E&apos;"
29
+ end
30
+
31
+ end
32
+
33
+