mspire 0.4.9 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (255) hide show
  1. data/README +27 -17
  2. data/changelog.txt +31 -62
  3. data/lib/ms/calc.rb +32 -0
  4. data/lib/ms/data/interleaved.rb +60 -0
  5. data/lib/ms/data/lazy_io.rb +73 -0
  6. data/lib/ms/data/lazy_string.rb +15 -0
  7. data/lib/ms/data/simple.rb +59 -0
  8. data/lib/ms/data/transposed.rb +41 -0
  9. data/lib/ms/data.rb +57 -0
  10. data/lib/ms/format/format_error.rb +12 -0
  11. data/lib/ms/spectrum.rb +25 -384
  12. data/lib/ms/support/binary_search.rb +126 -0
  13. data/lib/ms.rb +10 -10
  14. metadata +38 -350
  15. data/INSTALL +0 -58
  16. data/README.rdoc +0 -18
  17. data/Rakefile +0 -330
  18. data/bin/aafreqs.rb +0 -23
  19. data/bin/bioworks2excel.rb +0 -14
  20. data/bin/bioworks_to_pepxml.rb +0 -148
  21. data/bin/bioworks_to_pepxml_gui.rb +0 -225
  22. data/bin/fasta_shaker.rb +0 -5
  23. data/bin/filter_and_validate.rb +0 -5
  24. data/bin/gi2annot.rb +0 -14
  25. data/bin/id_class_anal.rb +0 -112
  26. data/bin/id_precision.rb +0 -172
  27. data/bin/ms_to_lmat.rb +0 -67
  28. data/bin/pepproph_filter.rb +0 -16
  29. data/bin/prob_validate.rb +0 -6
  30. data/bin/protein_summary.rb +0 -6
  31. data/bin/protxml2prots_peps.rb +0 -32
  32. data/bin/raw_to_mzXML.rb +0 -55
  33. data/bin/run_percolator.rb +0 -122
  34. data/bin/sqt_group.rb +0 -26
  35. data/bin/srf_group.rb +0 -27
  36. data/bin/srf_to_sqt.rb +0 -40
  37. data/lib/align/chams.rb +0 -78
  38. data/lib/align.rb +0 -154
  39. data/lib/archive/targz.rb +0 -94
  40. data/lib/bsearch.rb +0 -120
  41. data/lib/core_extensions.rb +0 -16
  42. data/lib/fasta.rb +0 -626
  43. data/lib/gi.rb +0 -124
  44. data/lib/group_by.rb +0 -10
  45. data/lib/index_by.rb +0 -11
  46. data/lib/merge_deep.rb +0 -21
  47. data/lib/ms/converter/mzxml.rb +0 -77
  48. data/lib/ms/gradient_program.rb +0 -170
  49. data/lib/ms/msrun.rb +0 -244
  50. data/lib/ms/msrun_index.rb +0 -108
  51. data/lib/ms/parser/mzdata/axml.rb +0 -67
  52. data/lib/ms/parser/mzdata/dom.rb +0 -175
  53. data/lib/ms/parser/mzdata/libxml.rb +0 -7
  54. data/lib/ms/parser/mzdata.rb +0 -31
  55. data/lib/ms/parser/mzxml/axml.rb +0 -70
  56. data/lib/ms/parser/mzxml/dom.rb +0 -182
  57. data/lib/ms/parser/mzxml/hpricot.rb +0 -253
  58. data/lib/ms/parser/mzxml/libxml.rb +0 -19
  59. data/lib/ms/parser/mzxml/regexp.rb +0 -122
  60. data/lib/ms/parser/mzxml/rexml.rb +0 -72
  61. data/lib/ms/parser/mzxml/xmlparser.rb +0 -248
  62. data/lib/ms/parser/mzxml.rb +0 -282
  63. data/lib/ms/parser.rb +0 -108
  64. data/lib/ms/precursor.rb +0 -25
  65. data/lib/ms/scan.rb +0 -81
  66. data/lib/mspire.rb +0 -4
  67. data/lib/pi_zero.rb +0 -244
  68. data/lib/qvalue.rb +0 -161
  69. data/lib/roc.rb +0 -187
  70. data/lib/sample_enzyme.rb +0 -160
  71. data/lib/scan_i.rb +0 -21
  72. data/lib/spec_id/aa_freqs.rb +0 -170
  73. data/lib/spec_id/bioworks.rb +0 -497
  74. data/lib/spec_id/digestor.rb +0 -138
  75. data/lib/spec_id/mass.rb +0 -179
  76. data/lib/spec_id/parser/proph.rb +0 -335
  77. data/lib/spec_id/precision/filter/cmdline.rb +0 -218
  78. data/lib/spec_id/precision/filter/interactive.rb +0 -134
  79. data/lib/spec_id/precision/filter/output.rb +0 -148
  80. data/lib/spec_id/precision/filter.rb +0 -637
  81. data/lib/spec_id/precision/output.rb +0 -60
  82. data/lib/spec_id/precision/prob/cmdline.rb +0 -160
  83. data/lib/spec_id/precision/prob/output.rb +0 -94
  84. data/lib/spec_id/precision/prob.rb +0 -249
  85. data/lib/spec_id/proph/pep_summary.rb +0 -104
  86. data/lib/spec_id/proph/prot_summary.rb +0 -484
  87. data/lib/spec_id/proph.rb +0 -4
  88. data/lib/spec_id/protein_summary.rb +0 -489
  89. data/lib/spec_id/sequest/params.rb +0 -316
  90. data/lib/spec_id/sequest/pepxml.rb +0 -1458
  91. data/lib/spec_id/sequest.rb +0 -33
  92. data/lib/spec_id/sqt.rb +0 -349
  93. data/lib/spec_id/srf.rb +0 -973
  94. data/lib/spec_id.rb +0 -778
  95. data/lib/spec_id_xml.rb +0 -99
  96. data/lib/transmem/phobius.rb +0 -147
  97. data/lib/transmem/toppred.rb +0 -368
  98. data/lib/transmem.rb +0 -157
  99. data/lib/validator/aa.rb +0 -48
  100. data/lib/validator/aa_est.rb +0 -112
  101. data/lib/validator/background.rb +0 -77
  102. data/lib/validator/bias.rb +0 -95
  103. data/lib/validator/cmdline.rb +0 -431
  104. data/lib/validator/decoy.rb +0 -107
  105. data/lib/validator/digestion_based.rb +0 -70
  106. data/lib/validator/probability.rb +0 -51
  107. data/lib/validator/prot_from_pep.rb +0 -234
  108. data/lib/validator/q_value.rb +0 -32
  109. data/lib/validator/transmem.rb +0 -272
  110. data/lib/validator/true_pos.rb +0 -46
  111. data/lib/validator.rb +0 -197
  112. data/lib/xml.rb +0 -38
  113. data/lib/xml_style_parser.rb +0 -119
  114. data/lib/xmlparser_wrapper.rb +0 -19
  115. data/release_notes.txt +0 -2
  116. data/script/compile_and_plot_smriti_final.rb +0 -97
  117. data/script/create_little_pepxml.rb +0 -61
  118. data/script/degenerate_peptides.rb +0 -47
  119. data/script/estimate_fpr_by_cysteine.rb +0 -226
  120. data/script/extract_gradient_programs.rb +0 -56
  121. data/script/find_cysteine_background.rb +0 -137
  122. data/script/genuine_tps_and_probs.rb +0 -136
  123. data/script/get_apex_values_rexml.rb +0 -44
  124. data/script/histogram_probs.rb +0 -61
  125. data/script/mascot_fix_pepxml.rb +0 -123
  126. data/script/msvis.rb +0 -42
  127. data/script/mzXML2timeIndex.rb +0 -25
  128. data/script/peps_per_bin.rb +0 -67
  129. data/script/prep_dir.rb +0 -121
  130. data/script/simple_protein_digestion.rb +0 -27
  131. data/script/smriti_final_analysis.rb +0 -103
  132. data/script/sqt_to_meta.rb +0 -24
  133. data/script/top_hit_per_scan.rb +0 -67
  134. data/script/toppred_to_yaml.rb +0 -47
  135. data/script/tpp_installer.rb +0 -249
  136. data/specs/align_spec.rb +0 -79
  137. data/specs/bin/bioworks_to_pepxml_spec.rb +0 -79
  138. data/specs/bin/fasta_shaker_spec.rb +0 -259
  139. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +0 -199
  140. data/specs/bin/filter_and_validate_spec.rb +0 -180
  141. data/specs/bin/ms_to_lmat_spec.rb +0 -34
  142. data/specs/bin/prob_validate_spec.rb +0 -86
  143. data/specs/bin/protein_summary_spec.rb +0 -14
  144. data/specs/fasta_spec.rb +0 -354
  145. data/specs/gi_spec.rb +0 -22
  146. data/specs/load_bin_path.rb +0 -7
  147. data/specs/merge_deep_spec.rb +0 -13
  148. data/specs/ms/gradient_program_spec.rb +0 -77
  149. data/specs/ms/msrun_spec.rb +0 -498
  150. data/specs/ms/parser_spec.rb +0 -92
  151. data/specs/ms/spectrum_spec.rb +0 -87
  152. data/specs/pi_zero_spec.rb +0 -115
  153. data/specs/qvalue_spec.rb +0 -39
  154. data/specs/roc_spec.rb +0 -251
  155. data/specs/rspec_autotest.rb +0 -149
  156. data/specs/sample_enzyme_spec.rb +0 -126
  157. data/specs/spec_helper.rb +0 -135
  158. data/specs/spec_id/aa_freqs_spec.rb +0 -52
  159. data/specs/spec_id/bioworks_spec.rb +0 -148
  160. data/specs/spec_id/digestor_spec.rb +0 -75
  161. data/specs/spec_id/precision/filter/cmdline_spec.rb +0 -20
  162. data/specs/spec_id/precision/filter/output_spec.rb +0 -31
  163. data/specs/spec_id/precision/filter_spec.rb +0 -246
  164. data/specs/spec_id/precision/prob_spec.rb +0 -44
  165. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  166. data/specs/spec_id/proph/pep_summary_spec.rb +0 -98
  167. data/specs/spec_id/proph/prot_summary_spec.rb +0 -128
  168. data/specs/spec_id/protein_summary_spec.rb +0 -189
  169. data/specs/spec_id/sequest/params_spec.rb +0 -68
  170. data/specs/spec_id/sequest/pepxml_spec.rb +0 -374
  171. data/specs/spec_id/sequest_spec.rb +0 -38
  172. data/specs/spec_id/sqt_spec.rb +0 -246
  173. data/specs/spec_id/srf_spec.rb +0 -172
  174. data/specs/spec_id/srf_spec_helper.rb +0 -139
  175. data/specs/spec_id_helper.rb +0 -33
  176. data/specs/spec_id_spec.rb +0 -366
  177. data/specs/spec_id_xml_spec.rb +0 -33
  178. data/specs/transmem/phobius_spec.rb +0 -425
  179. data/specs/transmem/toppred_spec.rb +0 -298
  180. data/specs/transmem_spec.rb +0 -60
  181. data/specs/transmem_spec_shared.rb +0 -64
  182. data/specs/validator/aa_est_spec.rb +0 -66
  183. data/specs/validator/aa_spec.rb +0 -40
  184. data/specs/validator/background_spec.rb +0 -67
  185. data/specs/validator/bias_spec.rb +0 -122
  186. data/specs/validator/decoy_spec.rb +0 -51
  187. data/specs/validator/fasta_helper.rb +0 -26
  188. data/specs/validator/prot_from_pep_spec.rb +0 -141
  189. data/specs/validator/transmem_spec.rb +0 -146
  190. data/specs/validator/true_pos_spec.rb +0 -58
  191. data/specs/validator_helper.rb +0 -33
  192. data/specs/xml_spec.rb +0 -12
  193. data/test_files/000_pepxml18_small.xml +0 -206
  194. data/test_files/020a.mzXML.timeIndex +0 -4710
  195. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +0 -3973
  196. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +0 -3872
  197. data/test_files/4-03-03_small-prot.xml +0 -321
  198. data/test_files/4-03-03_small.xml +0 -3876
  199. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  200. data/test_files/bioworks-3.3_10prots.xml +0 -5999
  201. data/test_files/bioworks31.params +0 -77
  202. data/test_files/bioworks32.params +0 -62
  203. data/test_files/bioworks33.params +0 -63
  204. data/test_files/bioworks_single_run_small.xml +0 -7237
  205. data/test_files/bioworks_small.fasta +0 -212
  206. data/test_files/bioworks_small.params +0 -63
  207. data/test_files/bioworks_small.phobius +0 -109
  208. data/test_files/bioworks_small.toppred.out +0 -2847
  209. data/test_files/bioworks_small.xml +0 -5610
  210. data/test_files/bioworks_with_INV_small.xml +0 -3753
  211. data/test_files/bioworks_with_SHUFF_small.xml +0 -2503
  212. data/test_files/corrupted_900.srf +0 -0
  213. data/test_files/head_of_7MIX.srf +0 -0
  214. data/test_files/interact-opd1_mods_small-prot.xml +0 -304
  215. data/test_files/messups.fasta +0 -297
  216. data/test_files/opd1/000.my_answer.100lines.xml +0 -101
  217. data/test_files/opd1/000.tpp_1.2.3.first10.xml +0 -115
  218. data/test_files/opd1/000.tpp_2.9.2.first10.xml +0 -126
  219. data/test_files/opd1/000.v2.1.mzXML.timeIndex +0 -3748
  220. data/test_files/opd1/000_020-prot.png +0 -0
  221. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +0 -62
  222. data/test_files/opd1/000_020_3prots-prot.xml +0 -62
  223. data/test_files/opd1/opd1_cat_inv_small-prot.xml +0 -139
  224. data/test_files/opd1/sequest.3.1.params +0 -77
  225. data/test_files/opd1/sequest.3.2.params +0 -62
  226. data/test_files/opd1/twenty_scans.mzXML +0 -418
  227. data/test_files/opd1/twenty_scans.v2.1.mzXML +0 -382
  228. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  229. data/test_files/opd1/twenty_scans_answ.lmata +0 -9
  230. data/test_files/opd1_020_beginning.RAW +0 -0
  231. data/test_files/opd1_2runs_2mods/data/020.mzData.xml +0 -683
  232. data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +0 -382
  233. data/test_files/opd1_2runs_2mods/data/040.mzData.xml +0 -683
  234. data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +0 -382
  235. data/test_files/opd1_2runs_2mods/data/README.txt +0 -6
  236. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +0 -753
  237. data/test_files/orbitrap_mzData/000_cut.xml +0 -1920
  238. data/test_files/pepproph_small.xml +0 -4691
  239. data/test_files/phobius.small.noheader.txt +0 -50
  240. data/test_files/phobius.small.small.txt +0 -53
  241. data/test_files/s01_anC1_ld020mM.key.txt +0 -25
  242. data/test_files/s01_anC1_ld020mM.meth +0 -0
  243. data/test_files/small.fasta +0 -297
  244. data/test_files/small.sqt +0 -87
  245. data/test_files/smallraw.RAW +0 -0
  246. data/test_files/tf_bioworks2excel.bioXML +0 -14340
  247. data/test_files/tf_bioworks2excel.txt.actual +0 -1035
  248. data/test_files/toppred.small.out +0 -416
  249. data/test_files/toppred.xml.out +0 -318
  250. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +0 -7
  251. data/test_files/validator_hits_separate/bioworks_small_HS.xml +0 -5651
  252. data/test_files/yeast_gly_small-prot.xml +0 -265
  253. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +0 -6
  254. data/test_files/yeast_gly_small.xml +0 -3807
  255. data/test_files/yeast_gly_small2.parentTimes +0 -6
@@ -1,366 +0,0 @@
1
- require File.expand_path( File.dirname(__FILE__) + '/spec_helper' )
2
-
3
- require 'spec_id'
4
- require 'spec_id/srf'
5
-
6
- # we use this to set the values of generic proteins below
7
- require 'set_from_hash'
8
-
9
-
10
- describe 'creating a list of proteins from peptides', :shared => true do
11
- before(:each) do
12
- # EXPECTS @prots and a @meth proc that takes two args, an array of
13
- # peptides and the details of the list creation
14
-
15
- hashes = [
16
- {:aaseq => 'PEP0', :xcorr => 1.2, :deltacn => 0.1, :ppm => 40, :charge => 2, :prots => [prots[0],prots[1]]},
17
- {:aaseq => 'PEP1', :xcorr => 1.3, :deltacn => 0.1, :ppm => 50, :charge => 3, :prots => [prots[1],prots[2]]},
18
- {:aaseq => 'PEP2', :xcorr => 1.4, :deltacn => 0.1, :ppm => 50, :charge => 1, :prots => [prots[3]]},
19
- {:aaseq => 'PEP3', :xcorr => 1.5, :deltacn => 1.1, :ppm => 20, :charge => 2, :prots => [prots[4]]},
20
- {:aaseq => 'PEP4', :xcorr => 1.3, :deltacn => 0.1, :ppm => 20, :charge => 2, :prots => [prots[0]]},
21
- {:aaseq => 'PEP5', :xcorr => 1.3, :deltacn => 0.1, :ppm => 40, :charge => 2, :prots => prots[1,2]},
22
- ]
23
-
24
- @peps = hashes.map do |hash|
25
- SRF::OUT::Pep.new.set_from_hash(hash)
26
- end
27
- end
28
-
29
- it 'compiles protein lists from peps not touching peps attr (:no_update)' do
30
-
31
- prts = @meth.call(@peps, :no_update)
32
- exp = (0..4).map do |n|
33
- "prot_" + n.to_s
34
- end
35
- refs = prts.map {|v| v.reference }.sort
36
- refs.should == exp
37
- prts.each do |prt|
38
- prt.peps.should == []
39
- end
40
- end
41
-
42
- it 'compiles protein lists with updated peps attribute (:update)' do
43
-
44
- prts = @meth.call(@peps, :update)
45
- prot_0_before = prts.select {|v| v.reference == 'prot_0'}.first
46
- protein_match(prts, 'prot_0', %w(PEP0 PEP4))
47
- protein_match(prts, 'prot_1', %w(PEP0 PEP1 PEP5))
48
- protein_match(prts, 'prot_2', %w(PEP1 PEP5))
49
- protein_match(prts, 'prot_3', %w(PEP2))
50
- protein_match(prts, 'prot_4', %w(PEP3))
51
- srt_ref = prts.map {|v| v.reference}.sort
52
- %w(prot_0 prot_1 prot_2 prot_3 prot_4).should == srt_ref # just the right number of prots
53
- prot_0 = prts.select {|v| v.reference == 'prot_0'}.first
54
- prot_0_before.__id__.should == prot_0.__id__ # proteins are identical
55
-
56
-
57
- prot_0_before = prts.select {|v| v.reference == 'prot_0'}.first.__id__
58
- end
59
-
60
- it 'compiles protein lists of new proteins (:new)' do
61
- prts = SpecID.protein_list(@peps, :new)
62
- prot_0_before = prts.select {|v| v.reference == 'prot_0'}.first
63
- protein_match(prts, 'prot_0', %w(PEP0 PEP4))
64
- protein_match(prts, 'prot_1', %w(PEP0 PEP1 PEP5))
65
- protein_match(prts, 'prot_2', %w(PEP1 PEP5))
66
- protein_match(prts, 'prot_3', %w(PEP2))
67
- protein_match(prts, 'prot_4', %w(PEP3))
68
- srt_ref = prts.map {|v| v.reference}.sort
69
- #assert_equal(%w(prot_0 prot_1 prot_2 prot_3 prot_4), srt_ref, "just the right number of prots")
70
- %w(prot_0 prot_1 prot_2 prot_3 prot_4).should == srt_ref # just the right number of prots
71
- prot_0 = prts.select {|v| v.reference == 'prot_0'}.first
72
- #assert_not_equal(prot_0_before, prot_0.__id__, "proteins are not identical")
73
- prot_0_before.should_not == prot_0.__id__ # proteins are not identical
74
- end
75
-
76
- # checks that among prts, the protein with ref has peptides with pepseqs
77
- # aaseqs
78
- def protein_match(prts, ref, pepseqs)
79
- prt = prts.select{|v| v.reference == ref }.first
80
- sorted_prt_peps_aaseqs = prt.peps.map {|v| v.aaseq }.sort
81
- sorted_pepseqs = pepseqs.sort
82
- pepseqs.should == sorted_prt_peps_aaseqs
83
- end
84
-
85
- end
86
-
87
- describe SpecID, 'with generic proteins' do
88
- include SpecID
89
- before(:all) do
90
- @prots = (0..7).map do |n|
91
- SpecID::GenericProt.new.set_from_hash({:reference => "prot_"+n.to_s, :peps => []})
92
- end
93
- @meth = proc {|peps, kind| SpecID.protein_list(peps, kind) }
94
- end
95
- it_should_behave_like 'creating a list of proteins from peptides'
96
- end
97
-
98
- describe SpecID, 'with array based proteins' do
99
- include SpecID
100
- before(:all) do
101
- @prots = (0..7).map do |n|
102
- SRF::OUT::Prot.new.set_from_hash({:reference => "prot_"+n.to_s, :peps => []})
103
- end
104
- @meth = proc {|peps, kind| SpecID.protein_list(peps, kind) }
105
- end
106
- it_should_behave_like 'creating a list of proteins from peptides'
107
- end
108
-
109
- module Boolean ; end
110
- class TrueClass ; include Boolean end
111
- class FalseClass; include Boolean end
112
-
113
- describe SpecID, 'being created' do
114
- include SpecID
115
- it 'can be from small bioworks.xml' do
116
- sp = SpecID.new(Tfiles + '/bioworks_small.xml')
117
- sp.prots.size.should == 106
118
- end
119
-
120
- it 'can be from small -prot.xml (newer prophet versions)' do
121
- prot_xml = Tfiles + '/interact-opd1_mods_small-prot.xml'
122
- sp = SpecID.new(prot_xml)
123
- sp.is_a?(SpecID).should be_true
124
- sp.is_a?(Proph::ProtSummary).should be_true
125
- sp.prots.size.should == 20
126
- sp.peps.size.should == 31
127
- types = {
128
- :protein_name => String,
129
- :n_indistinguishable_proteins => Integer,
130
- :probability => Float,
131
- :percent_coverage => Float,
132
- :unique_stripped_peptides => Array,
133
- :group_sibling_id => String,
134
- :total_number_peptides => Integer,
135
- :pct_spectrum_ids => Float,
136
- :peps => Array,
137
- }
138
- sp.prots.each do |prot|
139
- types.each { |cl,tp| prot.send(cl).is_a?(tp).should be_true }
140
- end
141
- types = {
142
- :aaseq => String,
143
- :peptide_sequence => String,
144
- :charge => Integer,
145
- :initial_probability => Float,
146
- :nsp_adjusted_probability => Float,
147
- :weight => Float,
148
- :is_nondegenerate_evidence => Boolean, # no Boolean class
149
- :n_enzymatic_termini => Integer,
150
- :n_sibling_peptides => Float,
151
- :n_sibling_peptides_bin => Integer,
152
- :n_instances => Integer,
153
- :is_contributing_evidence => Boolean,
154
- :calc_neutral_pep_mass => Float,
155
- :modification_info => Object,
156
- :mod_info => Object,
157
- }
158
- sp.peps.each do |pep|
159
- types.each { |cl,tp| pep.send(cl).is_a?(tp).should be_true }
160
- end
161
- prot_ars = []
162
- sp.peps.each do |pep|
163
- if pep.prots.size > 1
164
- prot_ars << pep.prots
165
- end
166
- end
167
- prot_ars.each do |prt_ar|
168
- prt_ar.each do |prt|
169
- # the nils because this is a small file and their proteins are not
170
- # found
171
- ((prt.is_a?(SpecID::Prot) == true) or prt.nil?).should be_true
172
- ((prt.is_a?(Proph::Prot) == true) or prt.nil?).should be_true
173
- end
174
- end
175
- mod_objects = []
176
- sp.peps.each do |pep|
177
- if !pep.mod_info.nil?
178
- mod_objects << pep.mod_info
179
- end
180
- end
181
- # frozen
182
- mod_objects.size.should == 23
183
- end
184
-
185
- spec_large do
186
- it 'works on a large file' do
187
- file = Tfiles_l + '/opd1_2runs_2mods/prophet/interact-opd1_mods-prot.xml'
188
- #file = '/work/john/db_quest/verify_prophet/orbi/prophet_results/orbi_f00-prot.xml'
189
- start = Time.now
190
- sp = SpecID.new(file)
191
- puts "- Took #{Time.now - start} seconds to read"
192
- prot_ars = []
193
- sp.peps.each do |pep|
194
- if pep.prots.size > 1
195
- prot_ars << pep.prots
196
- end
197
- end
198
- prot_ars.each do |prt_ar|
199
- prt_ar.each do |prt|
200
- # the nils because this is a small file and their proteins are not
201
- # found
202
- prt.is_a?(SpecID::Prot).should be_true
203
- prt.is_a?(Proph::Prot).should be_true
204
- end
205
- end
206
-
207
- end
208
- end
209
-
210
- it_should 'can be from -prot.xml (older prophet versions)' do
211
- prot_xml = Tfiles + '/4-03-03_small-prot.xml'
212
- prot_xml = Tfiles + '/yeast_gly_small-prot.xml'
213
- end
214
- end
215
-
216
- describe SpecID, 'class methods' do
217
-
218
- it 'determines filetype (small files)' do
219
- files = {
220
- :bioworks => Tfiles + "/bioworks_small.xml",
221
- :protproph => Tfiles + '/opd1/000_020_3prots-prot.xml',
222
- :pepproph => Tfiles + '/opd1_2runs_2mods/interact-opd1_mods__small.xml',
223
- :srf => Tfiles + '/head_of_7MIX.srf',
224
- :srg => 'whatever.srg',
225
- :sqt => Tfiles + '/small.sqt',
226
- :sqg => 'whatever.sqg',
227
- }
228
- files.each do |key,val|
229
- SpecID.file_type(val).should == key.to_s
230
- end
231
- ## WOULD BE NICE TO GET THIS WORKING, TOO
232
- # assert_equal('protproph', SpecID.file_type(@old_prot_proph))
233
- end
234
-
235
- it 'can remove non-standard amino acids' do
236
- hash = {"K.PEPTIDE.Z" => "K.PEPTIDE.Z", "K.*M" => "K.M", "aI" => 'I', "YI.&" => "YI.", "EI.!@#\$%^&*(){}[]|\\;:'\"<>,?/EI" => 'EI.EI'}
237
- cl = proc {|v| SpecID::Pep.remove_non_amino_acids(v) }
238
- hash.each do |k,v|
239
- cl.call(k).should == v
240
- end
241
- end
242
-
243
- end
244
-
245
- describe SpecID, "determining the minimum set of proteins from pephits" do
246
-
247
- before(:all) do
248
- class MyProt ; include SpecID::Prot ; end
249
- class MyPep ; include SpecID::Pep ; attr_accessor :xcorr end
250
- end
251
-
252
- it 'can do occams razor on small set' do
253
-
254
- prots = (0..6).to_a.map do |n|
255
- prot = MyProt.new
256
- prot.reference = "ref_#{n}"
257
- prot
258
- end
259
-
260
- peps = (0..12).to_a.map {|v| MyPep.new }
261
-
262
- # 0 1 2 3 4 5 6 7 8 9 10 11 12
263
- aaseqs = %w(AAA BBB CCC ABC AAA BBB CCC ABC DDD EEE FFF EEEEE DDD)
264
- xcorrs = [1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 0.5, 0.6, 0.7, 0.8, 0.5]
265
-
266
- peps.zip(aaseqs, xcorrs) do |pep,aaseq,xcorr|
267
- pep.aaseq = aaseq
268
- pep.xcorr = xcorr
269
- end
270
-
271
- prots[0].peps = peps[0,4]
272
- prots[1].peps = [peps[2]] ## should be missing
273
-
274
- test_prots = prots[0,2]
275
- answ = SpecID.occams_razor(test_prots)
276
- answ.each do |an|
277
- an[0].is_a?(SpecID::Prot).should be_true
278
- end
279
- first = answ.first
280
- first[0].should == prots[0]
281
- equal_array_content( prots[0].peps, first[1])
282
-
283
- require 'pp'
284
- #pp answ
285
-
286
-
287
- prots[0].peps = peps[0,4]
288
- prots[1].peps = [peps[2]] ## should be missing
289
- prots[2].peps = [] ## should be missing
290
-
291
- answ = SpecID.occams_razor(test_prots, true)
292
- puts '- NEED MORE tests HERE!' if $specdoc
293
- #pp answ
294
-
295
-
296
- #prots[2].peps = [peps[2]]
297
- #prots[2].peps.push( peps[3] ) ## should be there since it has 2
298
- #prots[3].peps = [peps[3]] ## should be missing
299
- end
300
-
301
- def equal_array_content(exp1, ans, message='')
302
- exp1.each do |item|
303
- ans.should include(item)
304
- end
305
- end
306
-
307
-
308
- end
309
-
310
-
311
- require 'fasta'
312
-
313
- describe SpecID::Pep, "with a small fasta object" do
314
- before(:each) do
315
- @prots = []
316
-
317
- aaseq = ('A'..'Z').to_a.join('')
318
- header = "prot1"
319
- @prots << Fasta::Prot.new(header, aaseq)
320
-
321
- aaseq = ('A'..'Z').to_a.reverse.join('')
322
- header = "prot1_reverse"
323
- @prots << Fasta::Prot.new(header, aaseq)
324
-
325
- aaseq = ('A'..'Z').to_a.join('')
326
- header = "prot1_identical"
327
- @prots << Fasta::Prot.new(header, aaseq)
328
-
329
- aaseq = ('A'..'E').to_a.join('')
330
- header = "prot1_short"
331
- @prots << Fasta::Prot.new(header, aaseq)
332
-
333
- aaseq = ('A'..'E').to_a.reverse.join('')
334
- header = "prot1_reverse_short"
335
- @prots << Fasta::Prot.new(header, aaseq)
336
-
337
- @fasta = Fasta.new(@prots)
338
-
339
- end
340
- it "can find protein groups from a fasta object" do
341
- pep_seqs = %w(ABCD DEFG ABCD DEFG EDCB FEDCB XYZ RANDOM AEABA)
342
- arr = SpecID::Pep.protein_groups_by_sequence(pep_seqs, @fasta)
343
-
344
- prots = @prots
345
- exp = [[prots[0], prots[2], prots[3]], [prots[0], prots[2]], [prots[0], prots[2], prots[3]], [prots[0],prots[2]], [prots[1], prots[4]], [prots[1]], [prots[0], prots[2]], [], []]
346
-
347
- arr.should == exp
348
- end
349
- end
350
-
351
-
352
- ###########################
353
- # old tests
354
- ###########################
355
-
356
- =begin
357
- def test_classify_by_false_flag
358
- file = @tfiles + "bioworks_with_INV_small.xml"
359
- sp = SpecID.new(file)
360
- assert_equal(19, sp.prots.size)
361
- (tp, fp) = sp.classify_by_false_flag(:prots, "INV_", true, true)
362
- assert_equal(4, fp.size, "num false pos")
363
- assert_equal(15, tp.size, "num true pos")
364
- end
365
-
366
- =end
@@ -1,33 +0,0 @@
1
- require File.expand_path( File.dirname(__FILE__) + '/spec_helper' )
2
-
3
- require 'spec_id_xml'
4
-
5
- describe SpecIDXML, 'included with a simple object' do
6
- before(:all) do
7
- class Bob
8
- include SpecIDXML
9
- def initialize(first=nil, second=nil)
10
- @first = first ; @second = second
11
- end
12
- end
13
- end
14
-
15
- it 'creates short element xmls using an objects instance variables' do
16
- obj = Bob.new(1, 2)
17
- st = obj.short_element_xml_from_instance_vars("bob")
18
- # the ordering is arbitrary: "<bob first=\"1\" second=\"2\"/>\n"
19
- st.should =~ /second="2"/
20
- st.should =~ /first="1"/
21
- st.should =~ /^<bob /
22
- st.should =~ />$/
23
- end
24
-
25
- it 'escapes special characters' do
26
- obj = Bob.new
27
- obj.escape_special_chars("&><\"'").should == "&amp;&gt;&lt;&quot;&apos;"
28
- obj.escape_special_chars("PE&PT>I<D\"E'").should == "PE&amp;PT&gt;I&lt;D&quot;E&apos;"
29
- end
30
-
31
- end
32
-
33
-