mspire 0.4.9 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (255) hide show
  1. data/README +27 -17
  2. data/changelog.txt +31 -62
  3. data/lib/ms/calc.rb +32 -0
  4. data/lib/ms/data/interleaved.rb +60 -0
  5. data/lib/ms/data/lazy_io.rb +73 -0
  6. data/lib/ms/data/lazy_string.rb +15 -0
  7. data/lib/ms/data/simple.rb +59 -0
  8. data/lib/ms/data/transposed.rb +41 -0
  9. data/lib/ms/data.rb +57 -0
  10. data/lib/ms/format/format_error.rb +12 -0
  11. data/lib/ms/spectrum.rb +25 -384
  12. data/lib/ms/support/binary_search.rb +126 -0
  13. data/lib/ms.rb +10 -10
  14. metadata +38 -350
  15. data/INSTALL +0 -58
  16. data/README.rdoc +0 -18
  17. data/Rakefile +0 -330
  18. data/bin/aafreqs.rb +0 -23
  19. data/bin/bioworks2excel.rb +0 -14
  20. data/bin/bioworks_to_pepxml.rb +0 -148
  21. data/bin/bioworks_to_pepxml_gui.rb +0 -225
  22. data/bin/fasta_shaker.rb +0 -5
  23. data/bin/filter_and_validate.rb +0 -5
  24. data/bin/gi2annot.rb +0 -14
  25. data/bin/id_class_anal.rb +0 -112
  26. data/bin/id_precision.rb +0 -172
  27. data/bin/ms_to_lmat.rb +0 -67
  28. data/bin/pepproph_filter.rb +0 -16
  29. data/bin/prob_validate.rb +0 -6
  30. data/bin/protein_summary.rb +0 -6
  31. data/bin/protxml2prots_peps.rb +0 -32
  32. data/bin/raw_to_mzXML.rb +0 -55
  33. data/bin/run_percolator.rb +0 -122
  34. data/bin/sqt_group.rb +0 -26
  35. data/bin/srf_group.rb +0 -27
  36. data/bin/srf_to_sqt.rb +0 -40
  37. data/lib/align/chams.rb +0 -78
  38. data/lib/align.rb +0 -154
  39. data/lib/archive/targz.rb +0 -94
  40. data/lib/bsearch.rb +0 -120
  41. data/lib/core_extensions.rb +0 -16
  42. data/lib/fasta.rb +0 -626
  43. data/lib/gi.rb +0 -124
  44. data/lib/group_by.rb +0 -10
  45. data/lib/index_by.rb +0 -11
  46. data/lib/merge_deep.rb +0 -21
  47. data/lib/ms/converter/mzxml.rb +0 -77
  48. data/lib/ms/gradient_program.rb +0 -170
  49. data/lib/ms/msrun.rb +0 -244
  50. data/lib/ms/msrun_index.rb +0 -108
  51. data/lib/ms/parser/mzdata/axml.rb +0 -67
  52. data/lib/ms/parser/mzdata/dom.rb +0 -175
  53. data/lib/ms/parser/mzdata/libxml.rb +0 -7
  54. data/lib/ms/parser/mzdata.rb +0 -31
  55. data/lib/ms/parser/mzxml/axml.rb +0 -70
  56. data/lib/ms/parser/mzxml/dom.rb +0 -182
  57. data/lib/ms/parser/mzxml/hpricot.rb +0 -253
  58. data/lib/ms/parser/mzxml/libxml.rb +0 -19
  59. data/lib/ms/parser/mzxml/regexp.rb +0 -122
  60. data/lib/ms/parser/mzxml/rexml.rb +0 -72
  61. data/lib/ms/parser/mzxml/xmlparser.rb +0 -248
  62. data/lib/ms/parser/mzxml.rb +0 -282
  63. data/lib/ms/parser.rb +0 -108
  64. data/lib/ms/precursor.rb +0 -25
  65. data/lib/ms/scan.rb +0 -81
  66. data/lib/mspire.rb +0 -4
  67. data/lib/pi_zero.rb +0 -244
  68. data/lib/qvalue.rb +0 -161
  69. data/lib/roc.rb +0 -187
  70. data/lib/sample_enzyme.rb +0 -160
  71. data/lib/scan_i.rb +0 -21
  72. data/lib/spec_id/aa_freqs.rb +0 -170
  73. data/lib/spec_id/bioworks.rb +0 -497
  74. data/lib/spec_id/digestor.rb +0 -138
  75. data/lib/spec_id/mass.rb +0 -179
  76. data/lib/spec_id/parser/proph.rb +0 -335
  77. data/lib/spec_id/precision/filter/cmdline.rb +0 -218
  78. data/lib/spec_id/precision/filter/interactive.rb +0 -134
  79. data/lib/spec_id/precision/filter/output.rb +0 -148
  80. data/lib/spec_id/precision/filter.rb +0 -637
  81. data/lib/spec_id/precision/output.rb +0 -60
  82. data/lib/spec_id/precision/prob/cmdline.rb +0 -160
  83. data/lib/spec_id/precision/prob/output.rb +0 -94
  84. data/lib/spec_id/precision/prob.rb +0 -249
  85. data/lib/spec_id/proph/pep_summary.rb +0 -104
  86. data/lib/spec_id/proph/prot_summary.rb +0 -484
  87. data/lib/spec_id/proph.rb +0 -4
  88. data/lib/spec_id/protein_summary.rb +0 -489
  89. data/lib/spec_id/sequest/params.rb +0 -316
  90. data/lib/spec_id/sequest/pepxml.rb +0 -1458
  91. data/lib/spec_id/sequest.rb +0 -33
  92. data/lib/spec_id/sqt.rb +0 -349
  93. data/lib/spec_id/srf.rb +0 -973
  94. data/lib/spec_id.rb +0 -778
  95. data/lib/spec_id_xml.rb +0 -99
  96. data/lib/transmem/phobius.rb +0 -147
  97. data/lib/transmem/toppred.rb +0 -368
  98. data/lib/transmem.rb +0 -157
  99. data/lib/validator/aa.rb +0 -48
  100. data/lib/validator/aa_est.rb +0 -112
  101. data/lib/validator/background.rb +0 -77
  102. data/lib/validator/bias.rb +0 -95
  103. data/lib/validator/cmdline.rb +0 -431
  104. data/lib/validator/decoy.rb +0 -107
  105. data/lib/validator/digestion_based.rb +0 -70
  106. data/lib/validator/probability.rb +0 -51
  107. data/lib/validator/prot_from_pep.rb +0 -234
  108. data/lib/validator/q_value.rb +0 -32
  109. data/lib/validator/transmem.rb +0 -272
  110. data/lib/validator/true_pos.rb +0 -46
  111. data/lib/validator.rb +0 -197
  112. data/lib/xml.rb +0 -38
  113. data/lib/xml_style_parser.rb +0 -119
  114. data/lib/xmlparser_wrapper.rb +0 -19
  115. data/release_notes.txt +0 -2
  116. data/script/compile_and_plot_smriti_final.rb +0 -97
  117. data/script/create_little_pepxml.rb +0 -61
  118. data/script/degenerate_peptides.rb +0 -47
  119. data/script/estimate_fpr_by_cysteine.rb +0 -226
  120. data/script/extract_gradient_programs.rb +0 -56
  121. data/script/find_cysteine_background.rb +0 -137
  122. data/script/genuine_tps_and_probs.rb +0 -136
  123. data/script/get_apex_values_rexml.rb +0 -44
  124. data/script/histogram_probs.rb +0 -61
  125. data/script/mascot_fix_pepxml.rb +0 -123
  126. data/script/msvis.rb +0 -42
  127. data/script/mzXML2timeIndex.rb +0 -25
  128. data/script/peps_per_bin.rb +0 -67
  129. data/script/prep_dir.rb +0 -121
  130. data/script/simple_protein_digestion.rb +0 -27
  131. data/script/smriti_final_analysis.rb +0 -103
  132. data/script/sqt_to_meta.rb +0 -24
  133. data/script/top_hit_per_scan.rb +0 -67
  134. data/script/toppred_to_yaml.rb +0 -47
  135. data/script/tpp_installer.rb +0 -249
  136. data/specs/align_spec.rb +0 -79
  137. data/specs/bin/bioworks_to_pepxml_spec.rb +0 -79
  138. data/specs/bin/fasta_shaker_spec.rb +0 -259
  139. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +0 -199
  140. data/specs/bin/filter_and_validate_spec.rb +0 -180
  141. data/specs/bin/ms_to_lmat_spec.rb +0 -34
  142. data/specs/bin/prob_validate_spec.rb +0 -86
  143. data/specs/bin/protein_summary_spec.rb +0 -14
  144. data/specs/fasta_spec.rb +0 -354
  145. data/specs/gi_spec.rb +0 -22
  146. data/specs/load_bin_path.rb +0 -7
  147. data/specs/merge_deep_spec.rb +0 -13
  148. data/specs/ms/gradient_program_spec.rb +0 -77
  149. data/specs/ms/msrun_spec.rb +0 -498
  150. data/specs/ms/parser_spec.rb +0 -92
  151. data/specs/ms/spectrum_spec.rb +0 -87
  152. data/specs/pi_zero_spec.rb +0 -115
  153. data/specs/qvalue_spec.rb +0 -39
  154. data/specs/roc_spec.rb +0 -251
  155. data/specs/rspec_autotest.rb +0 -149
  156. data/specs/sample_enzyme_spec.rb +0 -126
  157. data/specs/spec_helper.rb +0 -135
  158. data/specs/spec_id/aa_freqs_spec.rb +0 -52
  159. data/specs/spec_id/bioworks_spec.rb +0 -148
  160. data/specs/spec_id/digestor_spec.rb +0 -75
  161. data/specs/spec_id/precision/filter/cmdline_spec.rb +0 -20
  162. data/specs/spec_id/precision/filter/output_spec.rb +0 -31
  163. data/specs/spec_id/precision/filter_spec.rb +0 -246
  164. data/specs/spec_id/precision/prob_spec.rb +0 -44
  165. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  166. data/specs/spec_id/proph/pep_summary_spec.rb +0 -98
  167. data/specs/spec_id/proph/prot_summary_spec.rb +0 -128
  168. data/specs/spec_id/protein_summary_spec.rb +0 -189
  169. data/specs/spec_id/sequest/params_spec.rb +0 -68
  170. data/specs/spec_id/sequest/pepxml_spec.rb +0 -374
  171. data/specs/spec_id/sequest_spec.rb +0 -38
  172. data/specs/spec_id/sqt_spec.rb +0 -246
  173. data/specs/spec_id/srf_spec.rb +0 -172
  174. data/specs/spec_id/srf_spec_helper.rb +0 -139
  175. data/specs/spec_id_helper.rb +0 -33
  176. data/specs/spec_id_spec.rb +0 -366
  177. data/specs/spec_id_xml_spec.rb +0 -33
  178. data/specs/transmem/phobius_spec.rb +0 -425
  179. data/specs/transmem/toppred_spec.rb +0 -298
  180. data/specs/transmem_spec.rb +0 -60
  181. data/specs/transmem_spec_shared.rb +0 -64
  182. data/specs/validator/aa_est_spec.rb +0 -66
  183. data/specs/validator/aa_spec.rb +0 -40
  184. data/specs/validator/background_spec.rb +0 -67
  185. data/specs/validator/bias_spec.rb +0 -122
  186. data/specs/validator/decoy_spec.rb +0 -51
  187. data/specs/validator/fasta_helper.rb +0 -26
  188. data/specs/validator/prot_from_pep_spec.rb +0 -141
  189. data/specs/validator/transmem_spec.rb +0 -146
  190. data/specs/validator/true_pos_spec.rb +0 -58
  191. data/specs/validator_helper.rb +0 -33
  192. data/specs/xml_spec.rb +0 -12
  193. data/test_files/000_pepxml18_small.xml +0 -206
  194. data/test_files/020a.mzXML.timeIndex +0 -4710
  195. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +0 -3973
  196. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +0 -3872
  197. data/test_files/4-03-03_small-prot.xml +0 -321
  198. data/test_files/4-03-03_small.xml +0 -3876
  199. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  200. data/test_files/bioworks-3.3_10prots.xml +0 -5999
  201. data/test_files/bioworks31.params +0 -77
  202. data/test_files/bioworks32.params +0 -62
  203. data/test_files/bioworks33.params +0 -63
  204. data/test_files/bioworks_single_run_small.xml +0 -7237
  205. data/test_files/bioworks_small.fasta +0 -212
  206. data/test_files/bioworks_small.params +0 -63
  207. data/test_files/bioworks_small.phobius +0 -109
  208. data/test_files/bioworks_small.toppred.out +0 -2847
  209. data/test_files/bioworks_small.xml +0 -5610
  210. data/test_files/bioworks_with_INV_small.xml +0 -3753
  211. data/test_files/bioworks_with_SHUFF_small.xml +0 -2503
  212. data/test_files/corrupted_900.srf +0 -0
  213. data/test_files/head_of_7MIX.srf +0 -0
  214. data/test_files/interact-opd1_mods_small-prot.xml +0 -304
  215. data/test_files/messups.fasta +0 -297
  216. data/test_files/opd1/000.my_answer.100lines.xml +0 -101
  217. data/test_files/opd1/000.tpp_1.2.3.first10.xml +0 -115
  218. data/test_files/opd1/000.tpp_2.9.2.first10.xml +0 -126
  219. data/test_files/opd1/000.v2.1.mzXML.timeIndex +0 -3748
  220. data/test_files/opd1/000_020-prot.png +0 -0
  221. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +0 -62
  222. data/test_files/opd1/000_020_3prots-prot.xml +0 -62
  223. data/test_files/opd1/opd1_cat_inv_small-prot.xml +0 -139
  224. data/test_files/opd1/sequest.3.1.params +0 -77
  225. data/test_files/opd1/sequest.3.2.params +0 -62
  226. data/test_files/opd1/twenty_scans.mzXML +0 -418
  227. data/test_files/opd1/twenty_scans.v2.1.mzXML +0 -382
  228. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  229. data/test_files/opd1/twenty_scans_answ.lmata +0 -9
  230. data/test_files/opd1_020_beginning.RAW +0 -0
  231. data/test_files/opd1_2runs_2mods/data/020.mzData.xml +0 -683
  232. data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +0 -382
  233. data/test_files/opd1_2runs_2mods/data/040.mzData.xml +0 -683
  234. data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +0 -382
  235. data/test_files/opd1_2runs_2mods/data/README.txt +0 -6
  236. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +0 -753
  237. data/test_files/orbitrap_mzData/000_cut.xml +0 -1920
  238. data/test_files/pepproph_small.xml +0 -4691
  239. data/test_files/phobius.small.noheader.txt +0 -50
  240. data/test_files/phobius.small.small.txt +0 -53
  241. data/test_files/s01_anC1_ld020mM.key.txt +0 -25
  242. data/test_files/s01_anC1_ld020mM.meth +0 -0
  243. data/test_files/small.fasta +0 -297
  244. data/test_files/small.sqt +0 -87
  245. data/test_files/smallraw.RAW +0 -0
  246. data/test_files/tf_bioworks2excel.bioXML +0 -14340
  247. data/test_files/tf_bioworks2excel.txt.actual +0 -1035
  248. data/test_files/toppred.small.out +0 -416
  249. data/test_files/toppred.xml.out +0 -318
  250. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +0 -7
  251. data/test_files/validator_hits_separate/bioworks_small_HS.xml +0 -5651
  252. data/test_files/yeast_gly_small-prot.xml +0 -265
  253. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +0 -6
  254. data/test_files/yeast_gly_small.xml +0 -3807
  255. data/test_files/yeast_gly_small2.parentTimes +0 -6
@@ -1,366 +0,0 @@
1
- require File.expand_path( File.dirname(__FILE__) + '/spec_helper' )
2
-
3
- require 'spec_id'
4
- require 'spec_id/srf'
5
-
6
- # we use this to set the values of generic proteins below
7
- require 'set_from_hash'
8
-
9
-
10
- describe 'creating a list of proteins from peptides', :shared => true do
11
- before(:each) do
12
- # EXPECTS @prots and a @meth proc that takes two args, an array of
13
- # peptides and the details of the list creation
14
-
15
- hashes = [
16
- {:aaseq => 'PEP0', :xcorr => 1.2, :deltacn => 0.1, :ppm => 40, :charge => 2, :prots => [prots[0],prots[1]]},
17
- {:aaseq => 'PEP1', :xcorr => 1.3, :deltacn => 0.1, :ppm => 50, :charge => 3, :prots => [prots[1],prots[2]]},
18
- {:aaseq => 'PEP2', :xcorr => 1.4, :deltacn => 0.1, :ppm => 50, :charge => 1, :prots => [prots[3]]},
19
- {:aaseq => 'PEP3', :xcorr => 1.5, :deltacn => 1.1, :ppm => 20, :charge => 2, :prots => [prots[4]]},
20
- {:aaseq => 'PEP4', :xcorr => 1.3, :deltacn => 0.1, :ppm => 20, :charge => 2, :prots => [prots[0]]},
21
- {:aaseq => 'PEP5', :xcorr => 1.3, :deltacn => 0.1, :ppm => 40, :charge => 2, :prots => prots[1,2]},
22
- ]
23
-
24
- @peps = hashes.map do |hash|
25
- SRF::OUT::Pep.new.set_from_hash(hash)
26
- end
27
- end
28
-
29
- it 'compiles protein lists from peps not touching peps attr (:no_update)' do
30
-
31
- prts = @meth.call(@peps, :no_update)
32
- exp = (0..4).map do |n|
33
- "prot_" + n.to_s
34
- end
35
- refs = prts.map {|v| v.reference }.sort
36
- refs.should == exp
37
- prts.each do |prt|
38
- prt.peps.should == []
39
- end
40
- end
41
-
42
- it 'compiles protein lists with updated peps attribute (:update)' do
43
-
44
- prts = @meth.call(@peps, :update)
45
- prot_0_before = prts.select {|v| v.reference == 'prot_0'}.first
46
- protein_match(prts, 'prot_0', %w(PEP0 PEP4))
47
- protein_match(prts, 'prot_1', %w(PEP0 PEP1 PEP5))
48
- protein_match(prts, 'prot_2', %w(PEP1 PEP5))
49
- protein_match(prts, 'prot_3', %w(PEP2))
50
- protein_match(prts, 'prot_4', %w(PEP3))
51
- srt_ref = prts.map {|v| v.reference}.sort
52
- %w(prot_0 prot_1 prot_2 prot_3 prot_4).should == srt_ref # just the right number of prots
53
- prot_0 = prts.select {|v| v.reference == 'prot_0'}.first
54
- prot_0_before.__id__.should == prot_0.__id__ # proteins are identical
55
-
56
-
57
- prot_0_before = prts.select {|v| v.reference == 'prot_0'}.first.__id__
58
- end
59
-
60
- it 'compiles protein lists of new proteins (:new)' do
61
- prts = SpecID.protein_list(@peps, :new)
62
- prot_0_before = prts.select {|v| v.reference == 'prot_0'}.first
63
- protein_match(prts, 'prot_0', %w(PEP0 PEP4))
64
- protein_match(prts, 'prot_1', %w(PEP0 PEP1 PEP5))
65
- protein_match(prts, 'prot_2', %w(PEP1 PEP5))
66
- protein_match(prts, 'prot_3', %w(PEP2))
67
- protein_match(prts, 'prot_4', %w(PEP3))
68
- srt_ref = prts.map {|v| v.reference}.sort
69
- #assert_equal(%w(prot_0 prot_1 prot_2 prot_3 prot_4), srt_ref, "just the right number of prots")
70
- %w(prot_0 prot_1 prot_2 prot_3 prot_4).should == srt_ref # just the right number of prots
71
- prot_0 = prts.select {|v| v.reference == 'prot_0'}.first
72
- #assert_not_equal(prot_0_before, prot_0.__id__, "proteins are not identical")
73
- prot_0_before.should_not == prot_0.__id__ # proteins are not identical
74
- end
75
-
76
- # checks that among prts, the protein with ref has peptides with pepseqs
77
- # aaseqs
78
- def protein_match(prts, ref, pepseqs)
79
- prt = prts.select{|v| v.reference == ref }.first
80
- sorted_prt_peps_aaseqs = prt.peps.map {|v| v.aaseq }.sort
81
- sorted_pepseqs = pepseqs.sort
82
- pepseqs.should == sorted_prt_peps_aaseqs
83
- end
84
-
85
- end
86
-
87
- describe SpecID, 'with generic proteins' do
88
- include SpecID
89
- before(:all) do
90
- @prots = (0..7).map do |n|
91
- SpecID::GenericProt.new.set_from_hash({:reference => "prot_"+n.to_s, :peps => []})
92
- end
93
- @meth = proc {|peps, kind| SpecID.protein_list(peps, kind) }
94
- end
95
- it_should_behave_like 'creating a list of proteins from peptides'
96
- end
97
-
98
- describe SpecID, 'with array based proteins' do
99
- include SpecID
100
- before(:all) do
101
- @prots = (0..7).map do |n|
102
- SRF::OUT::Prot.new.set_from_hash({:reference => "prot_"+n.to_s, :peps => []})
103
- end
104
- @meth = proc {|peps, kind| SpecID.protein_list(peps, kind) }
105
- end
106
- it_should_behave_like 'creating a list of proteins from peptides'
107
- end
108
-
109
- module Boolean ; end
110
- class TrueClass ; include Boolean end
111
- class FalseClass; include Boolean end
112
-
113
- describe SpecID, 'being created' do
114
- include SpecID
115
- it 'can be from small bioworks.xml' do
116
- sp = SpecID.new(Tfiles + '/bioworks_small.xml')
117
- sp.prots.size.should == 106
118
- end
119
-
120
- it 'can be from small -prot.xml (newer prophet versions)' do
121
- prot_xml = Tfiles + '/interact-opd1_mods_small-prot.xml'
122
- sp = SpecID.new(prot_xml)
123
- sp.is_a?(SpecID).should be_true
124
- sp.is_a?(Proph::ProtSummary).should be_true
125
- sp.prots.size.should == 20
126
- sp.peps.size.should == 31
127
- types = {
128
- :protein_name => String,
129
- :n_indistinguishable_proteins => Integer,
130
- :probability => Float,
131
- :percent_coverage => Float,
132
- :unique_stripped_peptides => Array,
133
- :group_sibling_id => String,
134
- :total_number_peptides => Integer,
135
- :pct_spectrum_ids => Float,
136
- :peps => Array,
137
- }
138
- sp.prots.each do |prot|
139
- types.each { |cl,tp| prot.send(cl).is_a?(tp).should be_true }
140
- end
141
- types = {
142
- :aaseq => String,
143
- :peptide_sequence => String,
144
- :charge => Integer,
145
- :initial_probability => Float,
146
- :nsp_adjusted_probability => Float,
147
- :weight => Float,
148
- :is_nondegenerate_evidence => Boolean, # no Boolean class
149
- :n_enzymatic_termini => Integer,
150
- :n_sibling_peptides => Float,
151
- :n_sibling_peptides_bin => Integer,
152
- :n_instances => Integer,
153
- :is_contributing_evidence => Boolean,
154
- :calc_neutral_pep_mass => Float,
155
- :modification_info => Object,
156
- :mod_info => Object,
157
- }
158
- sp.peps.each do |pep|
159
- types.each { |cl,tp| pep.send(cl).is_a?(tp).should be_true }
160
- end
161
- prot_ars = []
162
- sp.peps.each do |pep|
163
- if pep.prots.size > 1
164
- prot_ars << pep.prots
165
- end
166
- end
167
- prot_ars.each do |prt_ar|
168
- prt_ar.each do |prt|
169
- # the nils because this is a small file and their proteins are not
170
- # found
171
- ((prt.is_a?(SpecID::Prot) == true) or prt.nil?).should be_true
172
- ((prt.is_a?(Proph::Prot) == true) or prt.nil?).should be_true
173
- end
174
- end
175
- mod_objects = []
176
- sp.peps.each do |pep|
177
- if !pep.mod_info.nil?
178
- mod_objects << pep.mod_info
179
- end
180
- end
181
- # frozen
182
- mod_objects.size.should == 23
183
- end
184
-
185
- spec_large do
186
- it 'works on a large file' do
187
- file = Tfiles_l + '/opd1_2runs_2mods/prophet/interact-opd1_mods-prot.xml'
188
- #file = '/work/john/db_quest/verify_prophet/orbi/prophet_results/orbi_f00-prot.xml'
189
- start = Time.now
190
- sp = SpecID.new(file)
191
- puts "- Took #{Time.now - start} seconds to read"
192
- prot_ars = []
193
- sp.peps.each do |pep|
194
- if pep.prots.size > 1
195
- prot_ars << pep.prots
196
- end
197
- end
198
- prot_ars.each do |prt_ar|
199
- prt_ar.each do |prt|
200
- # the nils because this is a small file and their proteins are not
201
- # found
202
- prt.is_a?(SpecID::Prot).should be_true
203
- prt.is_a?(Proph::Prot).should be_true
204
- end
205
- end
206
-
207
- end
208
- end
209
-
210
- it_should 'can be from -prot.xml (older prophet versions)' do
211
- prot_xml = Tfiles + '/4-03-03_small-prot.xml'
212
- prot_xml = Tfiles + '/yeast_gly_small-prot.xml'
213
- end
214
- end
215
-
216
- describe SpecID, 'class methods' do
217
-
218
- it 'determines filetype (small files)' do
219
- files = {
220
- :bioworks => Tfiles + "/bioworks_small.xml",
221
- :protproph => Tfiles + '/opd1/000_020_3prots-prot.xml',
222
- :pepproph => Tfiles + '/opd1_2runs_2mods/interact-opd1_mods__small.xml',
223
- :srf => Tfiles + '/head_of_7MIX.srf',
224
- :srg => 'whatever.srg',
225
- :sqt => Tfiles + '/small.sqt',
226
- :sqg => 'whatever.sqg',
227
- }
228
- files.each do |key,val|
229
- SpecID.file_type(val).should == key.to_s
230
- end
231
- ## WOULD BE NICE TO GET THIS WORKING, TOO
232
- # assert_equal('protproph', SpecID.file_type(@old_prot_proph))
233
- end
234
-
235
- it 'can remove non-standard amino acids' do
236
- hash = {"K.PEPTIDE.Z" => "K.PEPTIDE.Z", "K.*M" => "K.M", "aI" => 'I', "YI.&" => "YI.", "EI.!@#\$%^&*(){}[]|\\;:'\"<>,?/EI" => 'EI.EI'}
237
- cl = proc {|v| SpecID::Pep.remove_non_amino_acids(v) }
238
- hash.each do |k,v|
239
- cl.call(k).should == v
240
- end
241
- end
242
-
243
- end
244
-
245
- describe SpecID, "determining the minimum set of proteins from pephits" do
246
-
247
- before(:all) do
248
- class MyProt ; include SpecID::Prot ; end
249
- class MyPep ; include SpecID::Pep ; attr_accessor :xcorr end
250
- end
251
-
252
- it 'can do occams razor on small set' do
253
-
254
- prots = (0..6).to_a.map do |n|
255
- prot = MyProt.new
256
- prot.reference = "ref_#{n}"
257
- prot
258
- end
259
-
260
- peps = (0..12).to_a.map {|v| MyPep.new }
261
-
262
- # 0 1 2 3 4 5 6 7 8 9 10 11 12
263
- aaseqs = %w(AAA BBB CCC ABC AAA BBB CCC ABC DDD EEE FFF EEEEE DDD)
264
- xcorrs = [1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 0.5, 0.6, 0.7, 0.8, 0.5]
265
-
266
- peps.zip(aaseqs, xcorrs) do |pep,aaseq,xcorr|
267
- pep.aaseq = aaseq
268
- pep.xcorr = xcorr
269
- end
270
-
271
- prots[0].peps = peps[0,4]
272
- prots[1].peps = [peps[2]] ## should be missing
273
-
274
- test_prots = prots[0,2]
275
- answ = SpecID.occams_razor(test_prots)
276
- answ.each do |an|
277
- an[0].is_a?(SpecID::Prot).should be_true
278
- end
279
- first = answ.first
280
- first[0].should == prots[0]
281
- equal_array_content( prots[0].peps, first[1])
282
-
283
- require 'pp'
284
- #pp answ
285
-
286
-
287
- prots[0].peps = peps[0,4]
288
- prots[1].peps = [peps[2]] ## should be missing
289
- prots[2].peps = [] ## should be missing
290
-
291
- answ = SpecID.occams_razor(test_prots, true)
292
- puts '- NEED MORE tests HERE!' if $specdoc
293
- #pp answ
294
-
295
-
296
- #prots[2].peps = [peps[2]]
297
- #prots[2].peps.push( peps[3] ) ## should be there since it has 2
298
- #prots[3].peps = [peps[3]] ## should be missing
299
- end
300
-
301
- def equal_array_content(exp1, ans, message='')
302
- exp1.each do |item|
303
- ans.should include(item)
304
- end
305
- end
306
-
307
-
308
- end
309
-
310
-
311
- require 'fasta'
312
-
313
- describe SpecID::Pep, "with a small fasta object" do
314
- before(:each) do
315
- @prots = []
316
-
317
- aaseq = ('A'..'Z').to_a.join('')
318
- header = "prot1"
319
- @prots << Fasta::Prot.new(header, aaseq)
320
-
321
- aaseq = ('A'..'Z').to_a.reverse.join('')
322
- header = "prot1_reverse"
323
- @prots << Fasta::Prot.new(header, aaseq)
324
-
325
- aaseq = ('A'..'Z').to_a.join('')
326
- header = "prot1_identical"
327
- @prots << Fasta::Prot.new(header, aaseq)
328
-
329
- aaseq = ('A'..'E').to_a.join('')
330
- header = "prot1_short"
331
- @prots << Fasta::Prot.new(header, aaseq)
332
-
333
- aaseq = ('A'..'E').to_a.reverse.join('')
334
- header = "prot1_reverse_short"
335
- @prots << Fasta::Prot.new(header, aaseq)
336
-
337
- @fasta = Fasta.new(@prots)
338
-
339
- end
340
- it "can find protein groups from a fasta object" do
341
- pep_seqs = %w(ABCD DEFG ABCD DEFG EDCB FEDCB XYZ RANDOM AEABA)
342
- arr = SpecID::Pep.protein_groups_by_sequence(pep_seqs, @fasta)
343
-
344
- prots = @prots
345
- exp = [[prots[0], prots[2], prots[3]], [prots[0], prots[2]], [prots[0], prots[2], prots[3]], [prots[0],prots[2]], [prots[1], prots[4]], [prots[1]], [prots[0], prots[2]], [], []]
346
-
347
- arr.should == exp
348
- end
349
- end
350
-
351
-
352
- ###########################
353
- # old tests
354
- ###########################
355
-
356
- =begin
357
- def test_classify_by_false_flag
358
- file = @tfiles + "bioworks_with_INV_small.xml"
359
- sp = SpecID.new(file)
360
- assert_equal(19, sp.prots.size)
361
- (tp, fp) = sp.classify_by_false_flag(:prots, "INV_", true, true)
362
- assert_equal(4, fp.size, "num false pos")
363
- assert_equal(15, tp.size, "num true pos")
364
- end
365
-
366
- =end
@@ -1,33 +0,0 @@
1
- require File.expand_path( File.dirname(__FILE__) + '/spec_helper' )
2
-
3
- require 'spec_id_xml'
4
-
5
- describe SpecIDXML, 'included with a simple object' do
6
- before(:all) do
7
- class Bob
8
- include SpecIDXML
9
- def initialize(first=nil, second=nil)
10
- @first = first ; @second = second
11
- end
12
- end
13
- end
14
-
15
- it 'creates short element xmls using an objects instance variables' do
16
- obj = Bob.new(1, 2)
17
- st = obj.short_element_xml_from_instance_vars("bob")
18
- # the ordering is arbitrary: "<bob first=\"1\" second=\"2\"/>\n"
19
- st.should =~ /second="2"/
20
- st.should =~ /first="1"/
21
- st.should =~ /^<bob /
22
- st.should =~ />$/
23
- end
24
-
25
- it 'escapes special characters' do
26
- obj = Bob.new
27
- obj.escape_special_chars("&><\"'").should == "&amp;&gt;&lt;&quot;&apos;"
28
- obj.escape_special_chars("PE&PT>I<D\"E'").should == "PE&amp;PT&gt;I&lt;D&quot;E&apos;"
29
- end
30
-
31
- end
32
-
33
-