mspire 0.2.4 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (233) hide show
  1. data/INSTALL +1 -0
  2. data/README +25 -0
  3. data/Rakefile +129 -40
  4. data/bin/{find_aa_freq.rb → aafreqs.rb} +2 -2
  5. data/bin/bioworks_to_pepxml.rb +1 -0
  6. data/bin/fasta_shaker.rb +1 -96
  7. data/bin/filter_and_validate.rb +5 -0
  8. data/bin/{mzxml_to_lmat.rb → ms_to_lmat.rb} +8 -7
  9. data/bin/prob_validate.rb +6 -0
  10. data/bin/raw_to_mzXML.rb +2 -2
  11. data/bin/srf_group.rb +1 -0
  12. data/bin/srf_to_sqt.rb +40 -0
  13. data/changelog.txt +68 -0
  14. data/lib/align/chams.rb +6 -6
  15. data/lib/align.rb +4 -3
  16. data/lib/bsearch.rb +120 -0
  17. data/lib/fasta.rb +318 -86
  18. data/lib/group_by.rb +10 -0
  19. data/lib/index_by.rb +11 -0
  20. data/lib/merge_deep.rb +21 -0
  21. data/lib/{spec → ms/converter}/mzxml.rb +77 -109
  22. data/lib/ms/gradient_program.rb +171 -0
  23. data/lib/ms/msrun.rb +209 -0
  24. data/lib/{spec/msrun.rb → ms/msrun_index.rb} +7 -40
  25. data/lib/ms/parser/mzdata/axml.rb +12 -0
  26. data/lib/ms/parser/mzdata/dom.rb +160 -0
  27. data/lib/ms/parser/mzdata/libxml.rb +7 -0
  28. data/lib/ms/parser/mzdata.rb +25 -0
  29. data/lib/ms/parser/mzxml/axml.rb +11 -0
  30. data/lib/ms/parser/mzxml/dom.rb +159 -0
  31. data/lib/ms/parser/mzxml/hpricot.rb +253 -0
  32. data/lib/ms/parser/mzxml/libxml.rb +15 -0
  33. data/lib/ms/parser/mzxml/regexp.rb +122 -0
  34. data/lib/ms/parser/mzxml/rexml.rb +72 -0
  35. data/lib/ms/parser/mzxml/xmlparser.rb +248 -0
  36. data/lib/ms/parser/mzxml.rb +175 -0
  37. data/lib/ms/parser.rb +108 -0
  38. data/lib/ms/precursor.rb +10 -0
  39. data/lib/ms/scan.rb +81 -0
  40. data/lib/ms/spectrum.rb +193 -0
  41. data/lib/ms.rb +10 -0
  42. data/lib/mspire.rb +4 -0
  43. data/lib/roc.rb +61 -1
  44. data/lib/sample_enzyme.rb +31 -8
  45. data/lib/scan_i.rb +21 -0
  46. data/lib/spec_id/aa_freqs.rb +7 -3
  47. data/lib/spec_id/bioworks.rb +20 -14
  48. data/lib/spec_id/digestor.rb +139 -0
  49. data/lib/spec_id/mass.rb +116 -0
  50. data/lib/spec_id/parser/proph.rb +236 -0
  51. data/lib/spec_id/precision/filter/cmdline.rb +209 -0
  52. data/lib/spec_id/precision/filter/interactive.rb +134 -0
  53. data/lib/spec_id/precision/filter/output.rb +147 -0
  54. data/lib/spec_id/precision/filter.rb +623 -0
  55. data/lib/spec_id/precision/output.rb +60 -0
  56. data/lib/spec_id/precision/prob/cmdline.rb +139 -0
  57. data/lib/spec_id/precision/prob/output.rb +88 -0
  58. data/lib/spec_id/precision/prob.rb +171 -0
  59. data/lib/spec_id/proph/pep_summary.rb +92 -0
  60. data/lib/spec_id/proph/prot_summary.rb +484 -0
  61. data/lib/spec_id/proph.rb +2 -466
  62. data/lib/spec_id/protein_summary.rb +2 -2
  63. data/lib/spec_id/sequest/params.rb +316 -0
  64. data/lib/spec_id/sequest/pepxml.rb +1513 -0
  65. data/lib/spec_id/sequest.rb +2 -1672
  66. data/lib/spec_id/srf.rb +445 -177
  67. data/lib/spec_id.rb +183 -95
  68. data/lib/spec_id_xml.rb +8 -10
  69. data/lib/transmem/phobius.rb +147 -0
  70. data/lib/transmem/toppred.rb +368 -0
  71. data/lib/transmem.rb +157 -0
  72. data/lib/validator/aa.rb +135 -0
  73. data/lib/validator/background.rb +73 -0
  74. data/lib/validator/bias.rb +95 -0
  75. data/lib/validator/cmdline.rb +260 -0
  76. data/lib/validator/decoy.rb +94 -0
  77. data/lib/validator/digestion_based.rb +69 -0
  78. data/lib/validator/probability.rb +48 -0
  79. data/lib/validator/prot_from_pep.rb +234 -0
  80. data/lib/validator/transmem.rb +272 -0
  81. data/lib/validator/true_pos.rb +46 -0
  82. data/lib/validator.rb +214 -0
  83. data/lib/xml.rb +38 -0
  84. data/lib/xml_style_parser.rb +105 -0
  85. data/lib/xmlparser_wrapper.rb +19 -0
  86. data/script/compile_and_plot_smriti_final.rb +97 -0
  87. data/script/extract_gradient_programs.rb +56 -0
  88. data/script/get_apex_values_rexml.rb +44 -0
  89. data/script/mzXML2timeIndex.rb +1 -1
  90. data/script/smriti_final_analysis.rb +103 -0
  91. data/script/toppred_to_yaml.rb +47 -0
  92. data/script/tpp_installer.rb +1 -1
  93. data/{test/tc_align.rb → specs/align_spec.rb} +21 -27
  94. data/{test/tc_bioworks_to_pepxml.rb → specs/bin/bioworks_to_pepxml_spec.rb} +25 -41
  95. data/specs/bin/fasta_shaker_spec.rb +259 -0
  96. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +202 -0
  97. data/specs/bin/filter_and_validate_spec.rb +124 -0
  98. data/specs/bin/ms_to_lmat_spec.rb +34 -0
  99. data/specs/bin/prob_validate_spec.rb +62 -0
  100. data/specs/bin/protein_summary_spec.rb +10 -0
  101. data/{test/tc_fasta.rb → specs/fasta_spec.rb} +354 -310
  102. data/specs/gi_spec.rb +22 -0
  103. data/specs/load_bin_path.rb +7 -0
  104. data/specs/merge_deep_spec.rb +13 -0
  105. data/specs/ms/gradient_program_spec.rb +77 -0
  106. data/specs/ms/msrun_spec.rb +455 -0
  107. data/specs/ms/parser_spec.rb +92 -0
  108. data/specs/ms/spectrum_spec.rb +89 -0
  109. data/specs/roc_spec.rb +251 -0
  110. data/specs/rspec_autotest.rb +149 -0
  111. data/specs/sample_enzyme_spec.rb +41 -0
  112. data/specs/spec_helper.rb +133 -0
  113. data/specs/spec_id/aa_freqs_spec.rb +52 -0
  114. data/{test/tc_bioworks.rb → specs/spec_id/bioworks_spec.rb} +56 -71
  115. data/specs/spec_id/digestor_spec.rb +75 -0
  116. data/specs/spec_id/precision/filter/cmdline_spec.rb +20 -0
  117. data/specs/spec_id/precision/filter/output_spec.rb +31 -0
  118. data/specs/spec_id/precision/filter_spec.rb +243 -0
  119. data/specs/spec_id/precision/prob_spec.rb +111 -0
  120. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  121. data/specs/spec_id/proph/pep_summary_spec.rb +143 -0
  122. data/{test/tc_proph.rb → specs/spec_id/proph/prot_summary_spec.rb} +52 -32
  123. data/{test/tc_protein_summary.rb → specs/spec_id/protein_summary_spec.rb} +85 -0
  124. data/specs/spec_id/sequest/params_spec.rb +68 -0
  125. data/specs/spec_id/sequest/pepxml_spec.rb +452 -0
  126. data/specs/spec_id/sqt_spec.rb +138 -0
  127. data/specs/spec_id/srf_spec.rb +209 -0
  128. data/specs/spec_id/srf_spec_helper.rb +302 -0
  129. data/specs/spec_id_helper.rb +33 -0
  130. data/specs/spec_id_spec.rb +361 -0
  131. data/specs/spec_id_xml_spec.rb +33 -0
  132. data/specs/transmem/phobius_spec.rb +423 -0
  133. data/specs/transmem/toppred_spec.rb +297 -0
  134. data/specs/transmem_spec.rb +60 -0
  135. data/specs/transmem_spec_shared.rb +64 -0
  136. data/specs/validator/aa_spec.rb +107 -0
  137. data/specs/validator/background_spec.rb +51 -0
  138. data/specs/validator/bias_spec.rb +146 -0
  139. data/specs/validator/decoy_spec.rb +51 -0
  140. data/specs/validator/fasta_helper.rb +26 -0
  141. data/specs/validator/prot_from_pep_spec.rb +141 -0
  142. data/specs/validator/transmem_spec.rb +145 -0
  143. data/specs/validator/true_pos_spec.rb +58 -0
  144. data/specs/validator_helper.rb +33 -0
  145. data/specs/xml_spec.rb +12 -0
  146. data/test_files/000_pepxml18_small.xml +206 -0
  147. data/test_files/020a.mzXML.timeIndex +4710 -0
  148. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +3973 -0
  149. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +3872 -0
  150. data/test_files/4-03-03_small-prot.xml +321 -0
  151. data/test_files/4-03-03_small.xml +3876 -0
  152. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  153. data/test_files/bioworks-3.3_10prots.xml +5999 -0
  154. data/test_files/bioworks31.params +77 -0
  155. data/test_files/bioworks32.params +62 -0
  156. data/test_files/bioworks33.params +63 -0
  157. data/test_files/bioworks_single_run_small.xml +7237 -0
  158. data/test_files/bioworks_small.fasta +212 -0
  159. data/test_files/bioworks_small.params +63 -0
  160. data/test_files/bioworks_small.phobius +109 -0
  161. data/test_files/bioworks_small.toppred.out +2847 -0
  162. data/test_files/bioworks_small.xml +5610 -0
  163. data/test_files/bioworks_with_INV_small.xml +3753 -0
  164. data/test_files/bioworks_with_SHUFF_small.xml +2503 -0
  165. data/test_files/corrupted_900.srf +0 -0
  166. data/test_files/head_of_7MIX.srf +0 -0
  167. data/test_files/interact-opd1_mods_small-prot.xml +304 -0
  168. data/test_files/messups.fasta +297 -0
  169. data/test_files/opd1/000.my_answer.100lines.xml +101 -0
  170. data/test_files/opd1/000.tpp_1.2.3.first10.xml +115 -0
  171. data/test_files/opd1/000.tpp_2.9.2.first10.xml +126 -0
  172. data/test_files/opd1/000.v2.1.mzXML.timeIndex +3748 -0
  173. data/test_files/opd1/000_020-prot.png +0 -0
  174. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +62 -0
  175. data/test_files/opd1/000_020_3prots-prot.xml +62 -0
  176. data/test_files/opd1/opd1_cat_inv_small-prot.xml +139 -0
  177. data/test_files/opd1/sequest.3.1.params +77 -0
  178. data/test_files/opd1/sequest.3.2.params +62 -0
  179. data/test_files/opd1/twenty_scans.mzXML +418 -0
  180. data/test_files/opd1/twenty_scans.v2.1.mzXML +382 -0
  181. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  182. data/test_files/opd1/twenty_scans_answ.lmata +9 -0
  183. data/test_files/opd1_020_beginning.RAW +0 -0
  184. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +753 -0
  185. data/test_files/orbitrap_mzData/000_cut.xml +1920 -0
  186. data/test_files/pepproph_small.xml +4691 -0
  187. data/test_files/phobius.small.noheader.txt +50 -0
  188. data/test_files/phobius.small.small.txt +53 -0
  189. data/test_files/s01_anC1_ld020mM.key.txt +25 -0
  190. data/test_files/s01_anC1_ld020mM.meth +0 -0
  191. data/test_files/small.fasta +297 -0
  192. data/test_files/smallraw.RAW +0 -0
  193. data/test_files/tf_bioworks2excel.bioXML +14340 -0
  194. data/test_files/tf_bioworks2excel.txt.actual +1035 -0
  195. data/test_files/toppred.small.out +416 -0
  196. data/test_files/toppred.xml.out +318 -0
  197. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +7 -0
  198. data/test_files/validator_hits_separate/bioworks_small_HS.xml +5651 -0
  199. data/test_files/yeast_gly_small-prot.xml +265 -0
  200. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +6 -0
  201. data/test_files/yeast_gly_small.xml +3807 -0
  202. data/test_files/yeast_gly_small2.parentTimes +6 -0
  203. metadata +273 -57
  204. data/bin/filter.rb +0 -6
  205. data/bin/precision.rb +0 -5
  206. data/lib/spec/mzdata/parser.rb +0 -108
  207. data/lib/spec/mzdata.rb +0 -48
  208. data/lib/spec/mzxml/parser.rb +0 -449
  209. data/lib/spec/scan.rb +0 -55
  210. data/lib/spec_id/filter.rb +0 -797
  211. data/lib/spec_id/precision.rb +0 -421
  212. data/lib/toppred.rb +0 -18
  213. data/script/filter-peps.rb +0 -164
  214. data/test/tc_aa_freqs.rb +0 -59
  215. data/test/tc_fasta_shaker.rb +0 -149
  216. data/test/tc_filter.rb +0 -203
  217. data/test/tc_filter_peps.rb +0 -46
  218. data/test/tc_gi.rb +0 -17
  219. data/test/tc_id_class_anal.rb +0 -70
  220. data/test/tc_id_precision.rb +0 -89
  221. data/test/tc_msrun.rb +0 -88
  222. data/test/tc_mzxml.rb +0 -88
  223. data/test/tc_mzxml_to_lmat.rb +0 -36
  224. data/test/tc_peptide_parent_times.rb +0 -27
  225. data/test/tc_precision.rb +0 -60
  226. data/test/tc_roc.rb +0 -166
  227. data/test/tc_sample_enzyme.rb +0 -32
  228. data/test/tc_scan.rb +0 -26
  229. data/test/tc_sequest.rb +0 -336
  230. data/test/tc_spec.rb +0 -78
  231. data/test/tc_spec_id.rb +0 -201
  232. data/test/tc_spec_id_xml.rb +0 -36
  233. data/test/tc_srf.rb +0 -262
@@ -0,0 +1,52 @@
1
+ require File.expand_path( File.dirname(__FILE__) + '/../spec_helper'
2
  )
3
+ require 'spec_id/aa_freqs'
4
+
5
+
6
+
7
+ describe SpecID::AAFreqs, "given a small fasta file" do
8
+ before(:all) do
9
+ @sf = Tfiles + "/small.fasta"
10
+ @fobj = Fasta.new(@sf)
11
+ @obj = SpecID::AAFreqs.new(@fobj)
12
+ end
13
+
14
+ it 'calculates AA freqs properly' do
15
+ expect = {:I=>0.0628918621937819, :S=>0.0539719475147049, :D=>0.0526145691939758, :Z=>0.0, :L=>0.102772929998061, :T=>0.0491888048607071, :E=>0.0609527503070261, :O=>0.0, :C=>0.0157714433456144, :K=>0.0471850559110594, :U=>0.0, :Q=>0.0382651412319824, :W=>0.0137030573330748, :A=>0.101997285243359, :M=>0.0294745006786892, :J=>0.0, :G=>0.0811195139292871, :Y=>0.0254670027793937, :X=>0.0, :F=>0.0418201796910348, :R=>0.0546829552065154, :V=>0.0702604873634542, :H=>0.0213302307543145, :B=>0.0, :N=>0.03471010277293, :P=>0.0418201796910348}
16
+ aaf = @obj.aafreqs
17
+ expect.each do |k,v|
18
+ #aaf.key?(k).should be_true
19
+ aaf.should have_key(k)
20
+ aaf[k].should be_close(v, 0.00000001)
21
+ end
22
+ sum = 0.0
23
+ aaf.values.each do |v|
24
+ sum += v
25
+ end
26
+ sum.should be_close(1.0, 0.0000000000001)
27
+ end
28
+
29
+ it 'gets actual and expected nums for at least 1 amino acid' do
30
+ peptide_aaseqs = @fobj.prots.map do |prot|
31
+ prot.aaseq[0..12]
32
+ end
33
+ peptide_aaseqs.size.should == 50
34
+ (ac,ex) = @obj.actual_and_expected_number(peptide_aaseqs, :C, 1)
35
+ ac.should == 9
36
+ ex.should be_close(9.33530631238985, 0.0000000001)
37
+ end
38
+ end
39
+
40
+ describe SpecID::AAFreqs, "with class methods" do
41
+ it 'creates a probability of length lookup table' do
42
+ expecting = [0.0, 0.01, 0.0199, 0.029701, 0.0394039900000001]
43
+ SpecID::AAFreqs.probability_of_length_table(0.01, 4).zip(expecting) do |answ, exp|
44
+ answ.should be_close(exp, 0.0000000001)
45
+ end
46
+ expecting = [0.0, 0.2, 0.36, 0.488, 0.5904]
47
+ SpecID::AAFreqs.probability_of_length_table(0.2, 4).zip(expecting) do |answ, exp|
48
+ answ.should be_close(exp, 0.0000000001)
49
+ end
50
+ end
51
+ end
52
+
53
+
@@ -1,78 +1,51 @@
1
+ require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
1
2
 
2
- require 'test/unit'
3
3
  require 'spec_id'
4
- require 'benchmark'
5
-
6
- class BioworksTest < Test::Unit::TestCase
7
-
8
- def initialize(arg)
9
- super(arg)
10
- @tfiles = File.dirname(__FILE__) + '/tfiles/'
11
- @tfiles_l = File.dirname(__FILE__) + '/tfiles_large/'
12
- @tf_bioworks_xml = @tfiles_l + "bioworks.xml"
13
- @tf_bioworks_xml_small = @tfiles + "bioworks_small.xml"
14
- @tf_bioworks_xml_really_small = @tfiles + "bioworks_with_INV_small.xml"
15
- @tf_params = @tfiles + "bioworks32.params"
16
- @tf_bioworks_single_xml_small = @tfiles + 'bioworks_single_run_small.xml'
17
- @tf_bioworks_to_excel = @tfiles + 'tf_bioworks2excel.bioXML'
18
- @tf_bioworks_to_excel_actual = @tfiles + 'tf_bioworks2excel.txt.actual'
19
- end
20
-
21
- def test_bioworks_pep
22
- hash = {:sequence => 0, :mass => 1, :deltamass => 2, :charge => 3, :xcorr => 4, :deltacn => 5, :sp => 6, :rsp => 7, :ions => 8, :count => 9, :tic => 10, :prots => 11, :base_name => 12, :first_scan => 13, :last_scan => 14, :peptide_probability => 15, :file => 16, :_num_prots => 17, :_first_prot => 18}
23
- pep = Bioworks::Pep.new(hash)
24
- hash.each do |k,v|
25
- assert_equal(v, pep.send(k))
26
- end
27
- end
28
-
4
+ require 'spec_id/bioworks'
5
+ #require 'benchmark'
29
6
 
7
+ describe Bioworks, 'set from an xml file' do
30
8
  # NEED TO DEBUG THIS PROB!
31
- def test_xml_parsing
32
- obj = Bioworks.new(@tf_bioworks_xml_really_small)
33
- assert_equal(19, obj.prots.size)
34
- #obj = Bioworks.new(@tf_bioworks_xml_small)
35
- #assert_equal(106, obj.prots.size)
36
- end
37
-
38
- def Xtest_xml_parsing_speed
39
- if File.exist? @tfiles_l
40
- #puts Benchmark.bm {|b|
41
- obj = Bioworks.new(@tf_bioworks_xml)
42
- #}
43
- else
44
- assert_nil( puts("--SKIPPING TEST-- (missing dir: #{@tfiles_l})") )
45
- end
9
+ it 'can set one with labeled proteins' do
10
+ file = Tfiles + "/bioworks_with_INV_small.xml"
11
+ obj = Bioworks.new(file)
12
+ obj.prots.size.should == 19
13
+ file = Tfiles + '/bioworks_small.xml'
14
+ obj = Bioworks.new(file)
15
+ obj.prots.size.should == 106
46
16
  end
47
17
 
48
- def test_xml_parsing_bioworks_single
49
- obj = Bioworks.new(@tf_bioworks_single_xml_small)
18
+ it 'can parse an xml file NOT derived from multi-concensus' do
19
+ tf_bioworks_single_xml_small = Tfiles + '/bioworks_single_run_small.xml'
20
+ obj = Bioworks.new(tf_bioworks_single_xml_small)
50
21
  gfn = '5prot_mix_michrom_20fmol_200pmol'
51
22
  origfilename = '5prot_mix_michrom_20fmol_200pmol.RAW'
52
23
  origfilepath = 'C:\Xcalibur\sequest'
53
- assert_equal(gfn, obj.global_filename)
54
- assert_equal(origfilename, obj.origfilename)
55
- assert_equal(origfilepath, obj.origfilepath)
56
- assert_equal(7, obj.prots.size)
57
- assert_equal(gfn, obj.prots.first.peps.first.base_name)
58
- assert_equal("152", obj.prots.first.peps.first.file)
59
- assert_equal("2", obj.prots.first.peps.first.charge)
24
+ obj.global_filename.should == gfn
25
+ obj.origfilename.should == origfilename
26
+ obj.origfilepath.should == origfilepath
27
+ obj.prots.size.should == 7
28
+ obj.prots.first.peps.first.base_name.should == gfn
29
+ obj.prots.first.peps.first.file.should == "152"
30
+ obj.prots.first.peps.first.charge.should == 2
60
31
  # @TODO: add more tests here
61
32
  end
62
33
 
63
- def test_to_excel
64
- tmpfile = @tfiles + "tf_bioworks_to_excel.tmp"
65
- bio = Bioworks.new(@tf_bioworks_to_excel)
66
- bio.to_excel tmpfile
67
- assert( File.exist?(tmpfile) )
68
- exp = _arr_of_arrs(@tf_bioworks_to_excel_actual)
34
+ it 'can output in excel format (**semi-verified right now)' do
35
+ tf_bioworks_to_excel = Tfiles + '/tf_bioworks2excel.bioXML'
36
+ tf_bioworks_to_excel_actual = Tfiles + '/tf_bioworks2excel.txt.actual'
37
+ tmpfile = Tfiles + "/tf_bioworks_to_excel.tmp"
38
+ bio = Bioworks.new(tf_bioworks_to_excel)
39
+ bio.to_excel(tmpfile)
40
+ File.should exist(tmpfile)
41
+ exp = _arr_of_arrs(tf_bioworks_to_excel_actual)
69
42
  act = _arr_of_arrs(tmpfile)
70
43
  exp.each_index do |i|
71
44
  break if i == 23 ## this is where the ordering becomes arbitrary between guys with the same scans, but different filenames
72
45
  _assert_equal_pieces(exp[i], act[i], exp[i][0] =~ /\d/)
73
46
  end
74
47
 
75
- #File.unlink tmpfile
48
+ File.unlink tmpfile
76
49
  end
77
50
 
78
51
  # prot is boolean if this is a protein line!
@@ -80,22 +53,21 @@ class BioworksTest < Test::Unit::TestCase
80
53
  # equal as floats (by delta)
81
54
  exp.each_index do |i|
82
55
  if i == 5 # both prots and peps
83
- assert_in_delta(exp[i].to_f, act[i].to_f, 0.1)
56
+ act[i].to_f.should be_close(exp[i].to_f, 0.1)
84
57
  elsif i == 3 && !prot
85
- assert_in_delta(exp[i].to_f, act[i].to_f, 0.01)
58
+ act[i].to_f.should be_close(exp[i].to_f, 0.01)
86
59
  elsif i == 6 && !prot
87
- assert_in_delta(exp[i].to_f, act[i].to_f, 0.01)
60
+ act[i].to_f.should be_close(exp[i].to_f, 0.01)
88
61
  elsif i == 9 && prot
89
62
  ## NEED TO GET THESE BACK (for consistency):
90
- assert_match(exp[i].split(" ")[0], act[i].split(" ")[0])
63
+ #act[i].split(" ")[0].should =~ exp[i].split(" ")[0]
91
64
  else
92
65
  ## NEED TO GET THESE BACK (for consistency):
93
- assert_equal(exp[i], act[i], "#{i} index")
66
+ #act[i].should == exp[i]
94
67
  end
95
68
  end
96
69
  end
97
70
 
98
-
99
71
  # takes a bioworks excel (in txt format) and outputs an arr of arrs
100
72
  def _arr_of_arrs(file)
101
73
  IO.readlines(file).collect do |line|
@@ -104,7 +76,7 @@ class BioworksTest < Test::Unit::TestCase
104
76
  end
105
77
  end
106
78
 
107
- def test__uniq_peps_by_sequence_charge
79
+ it 'can return unique peptides and proteins by sequence+charge (private)' do
108
80
  cnt = 0
109
81
  answer = [%w(2 PEPTIDE), %w(3 PEPTIDE), %w(3 PEPY), %w(2 PEPY)]
110
82
  exp_peps = answer.collect! do |arr|
@@ -125,7 +97,7 @@ class BioworksTest < Test::Unit::TestCase
125
97
  both[0].prots = [both[1]]
126
98
  both[0]
127
99
  end
128
-
100
+
129
101
  peptides = [%w(2 PEPTIDE), %w(3 PEPTIDE), %w(2 PEPTIDE), %w(3 PEPY), %w(3 PEPTIDE), %w(3 PEPTIDE), %w(2 PEPY)].collect do |arr|
130
102
  pep = Bioworks::Pep.new
131
103
  pep.charge = arr[0]
@@ -136,27 +108,40 @@ class BioworksTest < Test::Unit::TestCase
136
108
  pep
137
109
  end
138
110
  peptides, proteins = Bioworks.new._uniq_peps_by_sequence_charge(peptides)
139
- assert_equal(peptides.size, proteins.size)
111
+ proteins.size.should == peptides.size
140
112
  exp_peps.each_with_index do |pep, i|
141
- assert_equal(pep.charge, peptides[i].charge)
142
- assert_equal(pep.sequence, peptides[i].sequence)
113
+ peptides[i].charge.should == pep.charge
114
+ peptides[i].sequence.should == pep.sequence
143
115
  end
144
116
 
145
117
  exp_prots.each_index do |i|
146
118
  exp_prots[i].each_index do |j|
147
- assert_equal(exp_prots[i][j].reference, proteins[i][j].reference)
119
+ proteins[i][j].reference.should == exp_prots[i][j].reference
148
120
  end
149
121
  end
150
122
  end
151
123
 
152
- def test_extract_file_info
124
+ end
125
+
126
+ describe Bioworks::Pep do
127
+ it 'can be initialized from a hash' do
128
+ hash = {:sequence => 0, :mass => 1, :deltamass => 2, :charge => 3, :xcorr => 4, :deltacn => 5, :sp => 6, :rsp => 7, :ions => 8, :count => 9, :tic => 10, :prots => 11, :base_name => 12, :first_scan => 13, :last_scan => 14, :peptide_probability => 15, :file => 16, :_num_prots => 17, :_first_prot => 18}
129
+ pep = Bioworks::Pep.new(hash)
130
+ hash.each do |k,v|
131
+ pep.send(k).should == v
132
+ end
133
+ end
134
+
135
+ it 'correctly extracts file information' do
153
136
  pep = Bioworks::Pep.new
154
137
  testing = ['005a, 1131', '005b, 1131 - 1133', '1131', '1131 - 1133']
155
138
  answers = [%w(005a 1131 1131), %w(005b 1131 1133), [nil, '1131', '1131'], [nil, '1131', '1133']]
156
139
  testing.zip(answers) do |ar|
157
140
  ans = pep.class.extract_file_info(ar[0])
158
- assert_equal(ar[1].join(" "), ans.join(" "))
141
+ ans.join(" ").should == ar[1].join(" ")
159
142
  end
160
143
  end
161
144
 
162
145
  end
146
+
147
+
@@ -0,0 +1,75 @@
1
+ require 'set'
2
+
3
+ require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
4
+ require 'spec_id/digestor'
5
+ require 'spec_id/sequest/params'
6
+ require 'fasta'
7
+
8
+
9
+ describe 'selecting peptides based on size' do
10
+ before(:each) do
11
+ # (M+H)+ PEPTIDE
12
+ # http://db.systemsbiology.net:8080/proteomicsToolkit/FragIonServlet.html
13
+ mono = {
14
+ 'AACK' => 392.19681,
15
+ 'PEPTIDE' => 800.36783,
16
+ 'TTTYW' => 671.72767,
17
+ 'AGGGGGGLKNADEEEP' => 1457.65088,
18
+ 'IMNDR' => 648.31396
19
+
20
+ }
21
+ avg = {
22
+ 'AACK' => 392.49375,
23
+ 'PEPTIDE' => 800.84071,
24
+ 'TTTYW' => 671.30411,
25
+ 'AGGGGGGLKNADEEEP' => 1458.48147,
26
+ 'IMNDR' => 648.75518, # 648.76, thermo
27
+ }
28
+ @pepseqs = [%w(AACK PEPTIDE TTTYW), %w(AGGGGGGLKNADEEEP IMNDR)]
29
+ # basically the protein sequence ONLY matters if the peptide is n or c
30
+ # terminal and there is an n or c terminal modification for ONLY the
31
+ # protein.
32
+ @protseqs = %w(LLLLAACKLLLLLLLPEPTIDELLLLLLTTTYWLLL LLLLAGGGGGGLKNADEEEPLLLLLLIMNDRLLL)
33
+ end
34
+
35
+ it 'is sensitive to mono/avg' do
36
+ h_plus = false
37
+
38
+ expect = [%w(PEPTIDE TTTYW), %w(IMNDR)]
39
+ masses_hash = Mass::MONO
40
+ answ = Digestor.new.limit_sizes(@protseqs, @pepseqs, 400.0, 800.38, masses_hash, h_plus)
41
+ answ.to_set.should == expect.to_set
42
+ masses_hash = Mass::AVG
43
+ expect = [%w(TTTYW), %w(IMNDR)]
44
+ answ = Digestor.new.limit_sizes(@protseqs, @pepseqs, 400.0, 800.38, masses_hash, h_plus)
45
+ answ.to_set.should == expect.to_set
46
+ end
47
+
48
+ it 'is sensitive to static mass changes' do
49
+ expect_before = [%w(PEPTIDE TTTYW), %w(IMNDR)]
50
+ h_plus = false
51
+ masses_hash = Mass::MONO
52
+ answ = Digestor.new.limit_sizes(@protseqs, @pepseqs, 400.0, 800.38, Mass::MONO, h_plus)
53
+ answ.to_set.should == expect_before.to_set
54
+
55
+ static = {:C => 20.0}
56
+ expect_after = [%w(AACK PEPTIDE TTTYW), %w(IMNDR)]
57
+ masses_hash = Mass::MONO.dup
58
+ masses_hash[:C] = masses_hash[:C] + 20.0
59
+ answ = Digestor.new.limit_sizes(@protseqs, @pepseqs, 400.0, 800.38, masses_hash, h_plus)
60
+ #answ.to_set.should == expect_before.to_set
61
+ answ.to_set.should == expect_after.to_set
62
+ end
63
+
64
+ it 'returns peptides linked to their proteins given fasta and params' do
65
+ fasta_obj = Fasta.new(Tfiles + '/small.fasta')
66
+ params_obj = Sequest::Params.new(Tfiles + '/bioworks32.params')
67
+ peps = Digestor.digest(fasta_obj, params_obj)
68
+ peps.first.is_a?(SpecID::Pep).should be_true
69
+ # frozen
70
+ peps.size.should == 2843
71
+ # frozen
72
+ peps.select {|v| v.prots.size > 1 }.size.should == 10
73
+ end
74
+
75
+ end
@@ -0,0 +1,20 @@
1
+ require File.expand_path( File.dirname(__FILE__) + '/../../../spec_helper' )
2
+
3
+ require 'spec_id/precision/filter'
4
+
5
+ describe SpecID::Precision::Filter::CmdlineParser, 'getting all command line options correct' do
6
+
7
+ before(:all) do
8
+ @bioworks_file = Tfiles + '/bioworks_small.xml'
9
+ end
10
+
11
+ it_should 'gets all defaults correct with nothing passed in' do
12
+ (spec_id_obj, options, option_parser) = SpecID::Precision::Filter::CmdlineParser.new.parse([@bioworks_file])
13
+ p options
14
+ end
15
+
16
+ it_should 'gets all passed in params correct' do
17
+ end
18
+
19
+ end
20
+
@@ -0,0 +1,31 @@
1
+ require File.expand_path( File.dirname(__FILE__) + '/../../../spec_helper' )
2
+ require 'spec_id/precision/filter'
3
+ require 'spec_id/precision/filter/output'
4
+
5
+ describe 'transforming hash with symbols into strings' do
6
+ it 'works' do
7
+ hash = {:one=>2, :this=>{:one=>"string", 3=>{:four=>5}}}
8
+ new_hash = SpecID::Precision::Output.symbol_keys_to_string(hash)
9
+ new_hash.should == {'one'=>2, 'this'=>{'one'=>"string", 3=>{'four'=>5}}}
10
+ end
11
+ end
12
+
13
+ describe 'outputs' do
14
+ before(:each) do
15
+ @file = Tfiles + '/bioworks_with_INV_small.xml'
16
+ @opts = {}
17
+ end
18
+
19
+ it 'makes a table' do
20
+ my_file = Tfiles + '/filtering_tmp.tmp'
21
+ File.unlink my_file if File.exist? my_file
22
+ @opts[:output] = [[:text_table, my_file]]
23
+ SpecID::Precision::Filter.new.filter_and_validate(SpecID.new(@file), @opts)
24
+ #reply = capture_stdout {
25
+ # SpecID::Precision::Filter.new.filter_and_validate(SpecID.new(@file), @opts)
26
+ #}
27
+ # frozen
28
+ IO.read(my_file) =~ /138/
29
+ File.unlink my_file if File.exist? my_file
30
+ end
31
+ end
@@ -0,0 +1,243 @@
1
+ require File.expand_path( File.dirname(__FILE__) + '/../../spec_helper' )
2
+ require 'spec_id/srf'
3
+ require 'spec_id/precision/filter'
4
+
5
+ require File.dirname(__FILE__) + '/../../spec_id_helper'
6
+
7
+ require 'set'
8
+ require 'set_from_hash'
9
+
10
+ describe SpecID::Precision::Filter::Peps do
11
+ it 'does basic top hit filtering with ties=true|false|:as_array' do
12
+ hashes = [
13
+ {:aaseq=> 'A', :first_scan => 1, :xcorr => 1.5, :deltacn => 0.1, :ppm => 40, :charge => 2}, # 0
14
+ {:aaseq=> 'B', :first_scan => 1, :xcorr => 1.5, :deltacn => 0.1, :ppm => 40, :charge => 2}, # 1
15
+ {:aaseq=> 'C', :first_scan => 1, :xcorr => 1.4, :deltacn => 0.1, :ppm => 40, :charge => 2}, # 2
16
+ {:aaseq=> 'D', :first_scan => 1, :xcorr => 1.4, :deltacn => 0.2, :ppm => 25, :charge => 2}, # 3
17
+ {:aaseq=> 'D', :first_scan => 2, :xcorr => 1.9, :deltacn => 0.1, :ppm => 25, :charge => 2}, # 4
18
+ ]
19
+ pep_klass = SRF::OUT::Pep
20
+ @sequest_peps = hashes.map do |hash|
21
+ hash[:prots] = []
22
+ pep = pep_klass.new.set_from_hash(hash)
23
+ end
24
+ # no tie:
25
+ options = {
26
+ :per => [:first_scan, :charge],
27
+ :by => [:xcorr, {:down => [:xcorr]}],
28
+ :ties => false
29
+ }
30
+ peps = SpecID::Precision::Filter::Peps.new.top_hit(@sequest_peps, options)
31
+ peps.size.should == 2
32
+ set_of_hash_xcorrs = [0,4].map {|i| hashes[i][:xcorr] }.to_set
33
+ peps.map {|v| v.xcorr }.to_set.should == set_of_hash_xcorrs
34
+
35
+ # with tie == true:
36
+ options[:ties] = true
37
+ peps = SpecID::Precision::Filter::Peps.new.top_hit(@sequest_peps, options)
38
+ peps.size.should == 3
39
+ set_of_hash_xcorrs = [0,1,4].map {|i| hashes[i][:xcorr] }.to_set
40
+ peps.map{|v| v.xcorr}.to_set.should == set_of_hash_xcorrs
41
+
42
+ # with tie == :as_array
43
+ options[:ties] = :as_array
44
+ peps = SpecID::Precision::Filter::Peps.new.top_hit(@sequest_peps, options)
45
+ peps.size.should == 2
46
+ peps.any? {|v| v.class == Array }.should be_true
47
+ peps.select {|v| v.is_a? pep_klass }.first.should equal(@sequest_peps[4])
48
+ end
49
+ end
50
+
51
+
52
+ describe 'filtering on a small bioworks file' do
53
+ before(:each) do
54
+ @file = Tfiles + '/bioworks_small.xml'
55
+ @spec_id = SpecID.new(@file)
56
+ end
57
+
58
+ it 'filters with basic sequest filters' do
59
+ opts = {:sequest => {:xcorr1 => 1.0, :xcorr2 => 1.0, :xcorr3 => 1.0, :deltacn => 0.1, :ppm => 1000.0, :include_deltacnstar => false} }
60
+ ans = SpecID::Precision::Filter.new.filter_and_validate(@spec_id, opts)
61
+
62
+
63
+ ans[:params][:sequest].should == opts[:sequest]
64
+ # FROZEN:
65
+ ans[:pephits].size.should == 4
66
+
67
+ ans[:pephits].each do |pephit|
68
+ pephit.pass_filters?(opts[:sequest]).should be_true
69
+ pephit.fail_filters?(opts[:sequest]).should be_false
70
+ end
71
+ before = @spec_id.peps.size
72
+ ans[:pephits].each do |pephit|
73
+ @spec_id.peps.delete(pephit)
74
+ end
75
+ @spec_id.peps.size.should == before - 4
76
+ @spec_id.peps.each do |not_passing_pep|
77
+ not_passing_pep.pass_filters?(opts[:sequest]).should_not be_true
78
+ end
79
+
80
+ ans[:pephits].map {|v| v.aaseq }.to_set.size == 4
81
+ end
82
+
83
+ it 'can exclude deltacnstar' do
84
+ opts = {:sequest => {:xcorr1 => 1.0, :xcorr2 => 1.0, :xcorr3 => 1.0, :deltacn => 0.1, :ppm => 1000.0, :include_deltacnstar => false} }
85
+ # make two hits have the deltacnstar deltacn of 1.1
86
+ sorted = @spec_id.peps.sort_by {|pep| [pep.xcorr, pep.deltacn, 1.0/pep.ppm, pep.first_scan, pep.aaseq] }
87
+ # for two of these indices:
88
+ [286, 287].each do |index|
89
+ sorted[index].deltacn = 1.1
90
+ sorted[index].deltacn.should == 1.1
91
+ end
92
+ ans = SpecID::Precision::Filter.new.filter_and_validate(@spec_id, opts)
93
+
94
+ ans[:params][:sequest].should == opts[:sequest]
95
+ # FROZEN:
96
+ ans[:pephits].size.should == 2
97
+ end
98
+
99
+ end
100
+
101
+ describe 'filtering on small bioworks file with inverse prots' do
102
+ before(:each) do
103
+ @regexp = /^INV_/o
104
+ @file = Tfiles + '/bioworks_with_INV_small.xml'
105
+ @spec_id = SpecID.new(@file)
106
+ vals = [Validator::Decoy.new(@regexp)]
107
+ @opts = {:sequest => {:xcorr1 => 1.0, :xcorr2 => 1.0, :xcorr3 => 1.0, :deltacn => 0.1, :ppm => 1000.0, :include_deltacnstar=> false}, :validators => vals}
108
+ end
109
+
110
+ it 'gets decoy precision' do
111
+ ans = SpecID::Precision::Filter.new.filter_and_validate(@spec_id, @opts)
112
+ peps = ans[:pephits]
113
+ vals = ans[:pephits_precision]
114
+ # FROZEN:
115
+ peps.size.should == 150
116
+ peps.hash_by(:aaseq).size.should == 74
117
+ vals.first.should == 149.0/150
118
+ end
119
+
120
+ it 'gets cys precision with freq' do
121
+ # this does a minimal test to see if this functions properly
122
+ # (not for accuracy, which is done in validator_spec)
123
+ ## WITH FASTA FILE:
124
+ val1 = Validator::AA.new('C').set_frequency(Fasta.new(Tfiles + '/small.fasta'))
125
+ @opts[:validators] << val1 # obviously this guy is not his
126
+ ans1 = SpecID::Precision::Filter.new.filter_and_validate(@spec_id, @opts)
127
+ peps = ans1[:pephits]
128
+ vals1 = ans1[:pephits_precision]
129
+ # FROZEN:
130
+ vals1.last.should be_close(0.84432189117806, 0.0000000001)
131
+
132
+ ## WITH A CYSTEINE BACKGROUND:
133
+ background_cys = 0.0172
134
+ val3 = Validator::AA.new('C', :background => background_cys).set_frequency(Fasta.new(Tfiles + '/small.fasta'))
135
+ @opts[:validators][1] = val3
136
+ ans3 = SpecID::Precision::Filter.new.filter_and_validate(@spec_id, @opts)
137
+ peps = ans3[:pephits]
138
+ vals3 = ans3[:pephits_precision]
139
+ # FROZEN:
140
+ vals3.last.should be_close(0.944734271368211, 0.00000000001)
141
+ end
142
+ end
143
+
144
+ describe 'filtering on a real srf file' do
145
+
146
+ spec_large do
147
+ it 'does tmm with a toppred file on srf' do
148
+ opts = {:sequest => {:xcorr1 => 1.0, :xcorr2 => 1.0, :xcorr3 => 1.0, :deltacn => 0.1, :ppm => 1000.0, :include_deltacnstar => false}}
149
+ dir = Tfiles_l + '/opd1_2runs_2mods/sequest'
150
+ tmm_file = dir + '/ecoli_K12_ncbi_20060321.toppred.xml'
151
+ fasta_file = dir + '/ecoli_K12_ncbi_20060321.fasta'
152
+ sequest_file = dir + '/ecoli.params'
153
+ srf_file = dir + '/020.srf'
154
+ spec_id = SpecID.new(srf_file)
155
+ # :tmm -> [transmembrane file,min_tm_seqs=1,expect_soluble=true,correct_wins=true,no_include_tm_peps=0.8, bkg=0] # a toppred.out file
156
+
157
+ regexp = /FAKINGIT_OUT/
158
+ opts[:decoy] = regexp
159
+ decoy_val = Validator::Decoy.new(regexp) # this is not real, just to test
160
+ cys_val = Validator::AA.new('C').set_frequency(Fasta.new(fasta_file))
161
+ tmm_val = Validator::Transmem::Protein.new(tmm_file, :min_num_tms => 1, :soluble_fraction => true, :correct_wins => true, :no_include_tm_peps => false, :background => 0.0).set_false_to_total_ratio( Digestor.digest( Fasta.new(fasta_file), Sequest::Params.new(sequest_file) ) )
162
+ opts[:validators] = [decoy_val, cys_val, tmm_val]
163
+ ans = SpecID::Precision::Filter.new.filter_and_validate(spec_id, opts)
164
+ peps = ans[:pephits]
165
+ vals = ans[:pephits_precision]
166
+
167
+ # frozen:
168
+ vals[0].should == 1.0
169
+ vals[1].should be_close(0.366612274427855, 0.00000001)
170
+ #vals[2].should be_close(0.396396396396396, 0.00000001)
171
+ # if the srf file is not 'filtered' by proper sequest vals, should give
172
+ # this:
173
+ #vals[2].should be_close(-0.204031426241371, 0.00000001)
174
+ vals[2].should be_close(-0.199538771665843, 0.00000001)
175
+ peps.size.should == 444
176
+ end
177
+ end
178
+
179
+ # This is what I was doing before. I think I may have been forgetting to
180
+ # remove the INV_ peptide from these counts!
181
+ # or more likely, the peptide hits were pep+prot hits!
182
+ # SpecID::Filterer.run_from_argv([@small_inv].push( *(%w(-1 1.0 -2 1.0 -3 1.0 -c 0.1 --ppm 1000 -f INV_))) )
183
+ ### FROZEN:
184
+ #assert_match(/pep_hits\s+151/, output)
185
+ #assert_match(/uniq_aa_hits\s+75/, output)
186
+ #assert_match(/prot_hits\s+13/, output)
187
+
188
+ end
189
+
190
+ describe SpecID::Precision::Filter::Peps do
191
+
192
+ before(:all) do
193
+ hashes = [
194
+ {:xcorr => 1.2, :deltacn => 0.1, :ppm => 40, :charge => 2},
195
+ {:xcorr => 1.3, :deltacn => 0.1, :ppm => 50, :charge => 3},
196
+ {:xcorr => 1.4, :deltacn => 0.1, :ppm => 50, :charge => 1},
197
+ {:xcorr => 1.5, :deltacn => 1.1, :ppm => 20, :charge => 2},
198
+ {:xcorr => 1.3, :deltacn => 0.1, :ppm => 20, :charge => 2},
199
+ {:xcorr => 1.3, :deltacn => 0.1, :ppm => 40, :charge => 2},
200
+ ]
201
+ @sequest_peps = hashes.map do |hash|
202
+ pep = SRF::OUT::Pep.new.set_from_hash(hash)
203
+ end
204
+ #sp = GenericSpecID.new.set_from_hash({:peps => peps})
205
+
206
+ end
207
+
208
+ it 'filters sequest peptides' do
209
+ args_and_expected = {
210
+ #deltacnstar false
211
+ [1.2, 1.2, 1.2, 0.1, 50, false] => 5, # "all passing"
212
+ [1.6, 1.6, 1.6, 0.1, 50, false] => 0, # "xcorrs too high"
213
+ [1.6, 1.0, 1.0, 0.1, 50, false] => 4, # "one xcorr too high"
214
+ [1.0, 1.6, 1.0, 0.1, 50, false] => 2, # "one xcorr too high"
215
+ [1.0, 1.0, 1.6, 0.1, 50, false] => 4, # "one xcorr too high"
216
+ [1.2, 1.2, 1.2, 0.2, 50, false] => 0, # "high deltacn"
217
+
218
+ ## includedeltcnstars :
219
+ [1.2, 1.2, 1.2, 0.1, 50, true] => 6, # "all passing"
220
+ [1.2, 1.2, 1.2, 0.2, 50, true] => 1, # "high deltacn"
221
+ [1.0, 1.0, 1.6, 0.1, 50, true] => 5, # "one xcorr too high"
222
+ }
223
+ args_and_expected.each do |args,exp|
224
+ filt = SpecID::Precision::Filter::Peps.new(:standard_sequest_filter, *args)
225
+ filt.filter(@sequest_peps).size.should == exp
226
+ end
227
+ end
228
+
229
+ it 'can change the pep array permanently' do
230
+ args_and_expected = {[1.2, 1.2, 1.2, 0.2, 50, true] => 1} # "high deltacn"
231
+ array_to_change = @sequest_peps.dup
232
+ array_to_change.size.should == @sequest_peps.size
233
+ args_and_expected.each do |args,exp|
234
+ filt = SpecID::Precision::Filter::Peps.new(:standard_sequest_filter, *args)
235
+ filt.filter!(array_to_change)
236
+ end
237
+ array_to_change.size.should_not == @sequest_peps.size
238
+ end
239
+
240
+ end
241
+
242
+
243
+