mspire 0.2.4 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (233) hide show
  1. data/INSTALL +1 -0
  2. data/README +25 -0
  3. data/Rakefile +129 -40
  4. data/bin/{find_aa_freq.rb → aafreqs.rb} +2 -2
  5. data/bin/bioworks_to_pepxml.rb +1 -0
  6. data/bin/fasta_shaker.rb +1 -96
  7. data/bin/filter_and_validate.rb +5 -0
  8. data/bin/{mzxml_to_lmat.rb → ms_to_lmat.rb} +8 -7
  9. data/bin/prob_validate.rb +6 -0
  10. data/bin/raw_to_mzXML.rb +2 -2
  11. data/bin/srf_group.rb +1 -0
  12. data/bin/srf_to_sqt.rb +40 -0
  13. data/changelog.txt +68 -0
  14. data/lib/align/chams.rb +6 -6
  15. data/lib/align.rb +4 -3
  16. data/lib/bsearch.rb +120 -0
  17. data/lib/fasta.rb +318 -86
  18. data/lib/group_by.rb +10 -0
  19. data/lib/index_by.rb +11 -0
  20. data/lib/merge_deep.rb +21 -0
  21. data/lib/{spec → ms/converter}/mzxml.rb +77 -109
  22. data/lib/ms/gradient_program.rb +171 -0
  23. data/lib/ms/msrun.rb +209 -0
  24. data/lib/{spec/msrun.rb → ms/msrun_index.rb} +7 -40
  25. data/lib/ms/parser/mzdata/axml.rb +12 -0
  26. data/lib/ms/parser/mzdata/dom.rb +160 -0
  27. data/lib/ms/parser/mzdata/libxml.rb +7 -0
  28. data/lib/ms/parser/mzdata.rb +25 -0
  29. data/lib/ms/parser/mzxml/axml.rb +11 -0
  30. data/lib/ms/parser/mzxml/dom.rb +159 -0
  31. data/lib/ms/parser/mzxml/hpricot.rb +253 -0
  32. data/lib/ms/parser/mzxml/libxml.rb +15 -0
  33. data/lib/ms/parser/mzxml/regexp.rb +122 -0
  34. data/lib/ms/parser/mzxml/rexml.rb +72 -0
  35. data/lib/ms/parser/mzxml/xmlparser.rb +248 -0
  36. data/lib/ms/parser/mzxml.rb +175 -0
  37. data/lib/ms/parser.rb +108 -0
  38. data/lib/ms/precursor.rb +10 -0
  39. data/lib/ms/scan.rb +81 -0
  40. data/lib/ms/spectrum.rb +193 -0
  41. data/lib/ms.rb +10 -0
  42. data/lib/mspire.rb +4 -0
  43. data/lib/roc.rb +61 -1
  44. data/lib/sample_enzyme.rb +31 -8
  45. data/lib/scan_i.rb +21 -0
  46. data/lib/spec_id/aa_freqs.rb +7 -3
  47. data/lib/spec_id/bioworks.rb +20 -14
  48. data/lib/spec_id/digestor.rb +139 -0
  49. data/lib/spec_id/mass.rb +116 -0
  50. data/lib/spec_id/parser/proph.rb +236 -0
  51. data/lib/spec_id/precision/filter/cmdline.rb +209 -0
  52. data/lib/spec_id/precision/filter/interactive.rb +134 -0
  53. data/lib/spec_id/precision/filter/output.rb +147 -0
  54. data/lib/spec_id/precision/filter.rb +623 -0
  55. data/lib/spec_id/precision/output.rb +60 -0
  56. data/lib/spec_id/precision/prob/cmdline.rb +139 -0
  57. data/lib/spec_id/precision/prob/output.rb +88 -0
  58. data/lib/spec_id/precision/prob.rb +171 -0
  59. data/lib/spec_id/proph/pep_summary.rb +92 -0
  60. data/lib/spec_id/proph/prot_summary.rb +484 -0
  61. data/lib/spec_id/proph.rb +2 -466
  62. data/lib/spec_id/protein_summary.rb +2 -2
  63. data/lib/spec_id/sequest/params.rb +316 -0
  64. data/lib/spec_id/sequest/pepxml.rb +1513 -0
  65. data/lib/spec_id/sequest.rb +2 -1672
  66. data/lib/spec_id/srf.rb +445 -177
  67. data/lib/spec_id.rb +183 -95
  68. data/lib/spec_id_xml.rb +8 -10
  69. data/lib/transmem/phobius.rb +147 -0
  70. data/lib/transmem/toppred.rb +368 -0
  71. data/lib/transmem.rb +157 -0
  72. data/lib/validator/aa.rb +135 -0
  73. data/lib/validator/background.rb +73 -0
  74. data/lib/validator/bias.rb +95 -0
  75. data/lib/validator/cmdline.rb +260 -0
  76. data/lib/validator/decoy.rb +94 -0
  77. data/lib/validator/digestion_based.rb +69 -0
  78. data/lib/validator/probability.rb +48 -0
  79. data/lib/validator/prot_from_pep.rb +234 -0
  80. data/lib/validator/transmem.rb +272 -0
  81. data/lib/validator/true_pos.rb +46 -0
  82. data/lib/validator.rb +214 -0
  83. data/lib/xml.rb +38 -0
  84. data/lib/xml_style_parser.rb +105 -0
  85. data/lib/xmlparser_wrapper.rb +19 -0
  86. data/script/compile_and_plot_smriti_final.rb +97 -0
  87. data/script/extract_gradient_programs.rb +56 -0
  88. data/script/get_apex_values_rexml.rb +44 -0
  89. data/script/mzXML2timeIndex.rb +1 -1
  90. data/script/smriti_final_analysis.rb +103 -0
  91. data/script/toppred_to_yaml.rb +47 -0
  92. data/script/tpp_installer.rb +1 -1
  93. data/{test/tc_align.rb → specs/align_spec.rb} +21 -27
  94. data/{test/tc_bioworks_to_pepxml.rb → specs/bin/bioworks_to_pepxml_spec.rb} +25 -41
  95. data/specs/bin/fasta_shaker_spec.rb +259 -0
  96. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +202 -0
  97. data/specs/bin/filter_and_validate_spec.rb +124 -0
  98. data/specs/bin/ms_to_lmat_spec.rb +34 -0
  99. data/specs/bin/prob_validate_spec.rb +62 -0
  100. data/specs/bin/protein_summary_spec.rb +10 -0
  101. data/{test/tc_fasta.rb → specs/fasta_spec.rb} +354 -310
  102. data/specs/gi_spec.rb +22 -0
  103. data/specs/load_bin_path.rb +7 -0
  104. data/specs/merge_deep_spec.rb +13 -0
  105. data/specs/ms/gradient_program_spec.rb +77 -0
  106. data/specs/ms/msrun_spec.rb +455 -0
  107. data/specs/ms/parser_spec.rb +92 -0
  108. data/specs/ms/spectrum_spec.rb +89 -0
  109. data/specs/roc_spec.rb +251 -0
  110. data/specs/rspec_autotest.rb +149 -0
  111. data/specs/sample_enzyme_spec.rb +41 -0
  112. data/specs/spec_helper.rb +133 -0
  113. data/specs/spec_id/aa_freqs_spec.rb +52 -0
  114. data/{test/tc_bioworks.rb → specs/spec_id/bioworks_spec.rb} +56 -71
  115. data/specs/spec_id/digestor_spec.rb +75 -0
  116. data/specs/spec_id/precision/filter/cmdline_spec.rb +20 -0
  117. data/specs/spec_id/precision/filter/output_spec.rb +31 -0
  118. data/specs/spec_id/precision/filter_spec.rb +243 -0
  119. data/specs/spec_id/precision/prob_spec.rb +111 -0
  120. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  121. data/specs/spec_id/proph/pep_summary_spec.rb +143 -0
  122. data/{test/tc_proph.rb → specs/spec_id/proph/prot_summary_spec.rb} +52 -32
  123. data/{test/tc_protein_summary.rb → specs/spec_id/protein_summary_spec.rb} +85 -0
  124. data/specs/spec_id/sequest/params_spec.rb +68 -0
  125. data/specs/spec_id/sequest/pepxml_spec.rb +452 -0
  126. data/specs/spec_id/sqt_spec.rb +138 -0
  127. data/specs/spec_id/srf_spec.rb +209 -0
  128. data/specs/spec_id/srf_spec_helper.rb +302 -0
  129. data/specs/spec_id_helper.rb +33 -0
  130. data/specs/spec_id_spec.rb +361 -0
  131. data/specs/spec_id_xml_spec.rb +33 -0
  132. data/specs/transmem/phobius_spec.rb +423 -0
  133. data/specs/transmem/toppred_spec.rb +297 -0
  134. data/specs/transmem_spec.rb +60 -0
  135. data/specs/transmem_spec_shared.rb +64 -0
  136. data/specs/validator/aa_spec.rb +107 -0
  137. data/specs/validator/background_spec.rb +51 -0
  138. data/specs/validator/bias_spec.rb +146 -0
  139. data/specs/validator/decoy_spec.rb +51 -0
  140. data/specs/validator/fasta_helper.rb +26 -0
  141. data/specs/validator/prot_from_pep_spec.rb +141 -0
  142. data/specs/validator/transmem_spec.rb +145 -0
  143. data/specs/validator/true_pos_spec.rb +58 -0
  144. data/specs/validator_helper.rb +33 -0
  145. data/specs/xml_spec.rb +12 -0
  146. data/test_files/000_pepxml18_small.xml +206 -0
  147. data/test_files/020a.mzXML.timeIndex +4710 -0
  148. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +3973 -0
  149. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +3872 -0
  150. data/test_files/4-03-03_small-prot.xml +321 -0
  151. data/test_files/4-03-03_small.xml +3876 -0
  152. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  153. data/test_files/bioworks-3.3_10prots.xml +5999 -0
  154. data/test_files/bioworks31.params +77 -0
  155. data/test_files/bioworks32.params +62 -0
  156. data/test_files/bioworks33.params +63 -0
  157. data/test_files/bioworks_single_run_small.xml +7237 -0
  158. data/test_files/bioworks_small.fasta +212 -0
  159. data/test_files/bioworks_small.params +63 -0
  160. data/test_files/bioworks_small.phobius +109 -0
  161. data/test_files/bioworks_small.toppred.out +2847 -0
  162. data/test_files/bioworks_small.xml +5610 -0
  163. data/test_files/bioworks_with_INV_small.xml +3753 -0
  164. data/test_files/bioworks_with_SHUFF_small.xml +2503 -0
  165. data/test_files/corrupted_900.srf +0 -0
  166. data/test_files/head_of_7MIX.srf +0 -0
  167. data/test_files/interact-opd1_mods_small-prot.xml +304 -0
  168. data/test_files/messups.fasta +297 -0
  169. data/test_files/opd1/000.my_answer.100lines.xml +101 -0
  170. data/test_files/opd1/000.tpp_1.2.3.first10.xml +115 -0
  171. data/test_files/opd1/000.tpp_2.9.2.first10.xml +126 -0
  172. data/test_files/opd1/000.v2.1.mzXML.timeIndex +3748 -0
  173. data/test_files/opd1/000_020-prot.png +0 -0
  174. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +62 -0
  175. data/test_files/opd1/000_020_3prots-prot.xml +62 -0
  176. data/test_files/opd1/opd1_cat_inv_small-prot.xml +139 -0
  177. data/test_files/opd1/sequest.3.1.params +77 -0
  178. data/test_files/opd1/sequest.3.2.params +62 -0
  179. data/test_files/opd1/twenty_scans.mzXML +418 -0
  180. data/test_files/opd1/twenty_scans.v2.1.mzXML +382 -0
  181. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  182. data/test_files/opd1/twenty_scans_answ.lmata +9 -0
  183. data/test_files/opd1_020_beginning.RAW +0 -0
  184. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +753 -0
  185. data/test_files/orbitrap_mzData/000_cut.xml +1920 -0
  186. data/test_files/pepproph_small.xml +4691 -0
  187. data/test_files/phobius.small.noheader.txt +50 -0
  188. data/test_files/phobius.small.small.txt +53 -0
  189. data/test_files/s01_anC1_ld020mM.key.txt +25 -0
  190. data/test_files/s01_anC1_ld020mM.meth +0 -0
  191. data/test_files/small.fasta +297 -0
  192. data/test_files/smallraw.RAW +0 -0
  193. data/test_files/tf_bioworks2excel.bioXML +14340 -0
  194. data/test_files/tf_bioworks2excel.txt.actual +1035 -0
  195. data/test_files/toppred.small.out +416 -0
  196. data/test_files/toppred.xml.out +318 -0
  197. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +7 -0
  198. data/test_files/validator_hits_separate/bioworks_small_HS.xml +5651 -0
  199. data/test_files/yeast_gly_small-prot.xml +265 -0
  200. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +6 -0
  201. data/test_files/yeast_gly_small.xml +3807 -0
  202. data/test_files/yeast_gly_small2.parentTimes +6 -0
  203. metadata +273 -57
  204. data/bin/filter.rb +0 -6
  205. data/bin/precision.rb +0 -5
  206. data/lib/spec/mzdata/parser.rb +0 -108
  207. data/lib/spec/mzdata.rb +0 -48
  208. data/lib/spec/mzxml/parser.rb +0 -449
  209. data/lib/spec/scan.rb +0 -55
  210. data/lib/spec_id/filter.rb +0 -797
  211. data/lib/spec_id/precision.rb +0 -421
  212. data/lib/toppred.rb +0 -18
  213. data/script/filter-peps.rb +0 -164
  214. data/test/tc_aa_freqs.rb +0 -59
  215. data/test/tc_fasta_shaker.rb +0 -149
  216. data/test/tc_filter.rb +0 -203
  217. data/test/tc_filter_peps.rb +0 -46
  218. data/test/tc_gi.rb +0 -17
  219. data/test/tc_id_class_anal.rb +0 -70
  220. data/test/tc_id_precision.rb +0 -89
  221. data/test/tc_msrun.rb +0 -88
  222. data/test/tc_mzxml.rb +0 -88
  223. data/test/tc_mzxml_to_lmat.rb +0 -36
  224. data/test/tc_peptide_parent_times.rb +0 -27
  225. data/test/tc_precision.rb +0 -60
  226. data/test/tc_roc.rb +0 -166
  227. data/test/tc_sample_enzyme.rb +0 -32
  228. data/test/tc_scan.rb +0 -26
  229. data/test/tc_sequest.rb +0 -336
  230. data/test/tc_spec.rb +0 -78
  231. data/test/tc_spec_id.rb +0 -201
  232. data/test/tc_spec_id_xml.rb +0 -36
  233. data/test/tc_srf.rb +0 -262
@@ -0,0 +1,47 @@
1
+ #!/usr/bin/ruby -w
2
+
3
+
4
+ require 'optparse'
5
+
6
+ opt = {}
7
+ opt[:probability] = 1.0
8
+ opts = OptionParser.new do |op|
9
+ op.banner = "USAGE: #{File.basename(__FILE__)} toppred.out"
10
+ op.separator "Outputs toppred.yaml"
11
+ op.separator "takes the highest probability structure"
12
+ op.separator "for best structures of equal probability, takes first given"
13
+ op.separator "Each line contains:"
14
+ op.separator "<identifier>: String :"
15
+ op.separator " num_found: Int"
16
+ op.separator " num_certain_transmembrane_segments: Int"
17
+ op.separator " num_putative_transmembrane_segments: Int"
18
+ op.separator " best_structure_probability: Float"
19
+ op.separator " transmembrane_segments:"
20
+ op.separator " - probability: Float"
21
+ op.separator " start: Int"
22
+ op.separator " stop: Int"
23
+ op.separator " aaseq: String"
24
+ op.separator ""
25
+ op.separator "OPTIONS:"
26
+ op.on("-p", "--probability", Float, "min structure prob threshold (default #{opt[:probability]})") {|v| opt[:probability] = v}
27
+ end
28
+
29
+ opts.parse!
30
+
31
+
32
+ if ARGV.size == 0
33
+ puts opts
34
+ exit
35
+ end
36
+
37
+ file = ARGV.shift
38
+
39
+ File.open(file) do |fh|
40
+ hash = Transmem.read_toppred(fh)
41
+ end
42
+
43
+ puts hash.to_yaml
44
+
45
+
46
+
47
+
@@ -202,7 +202,7 @@ chmod(0777, TPP_DATA_PATH.chomp('/'))
202
202
  mkpath TPP_VIS_PATH.chomp('/')
203
203
 
204
204
  ## VERY SPECIFIC to OUR SYSTEM
205
- soft_link('/project/marcotte/ms', TPP_DATA_PATH.chomp('/') + '/ms')
205
+ soft_link('/project/marcotte/marcotte/ms', TPP_DATA_PATH.chomp('/') + '/ms')
206
206
  system "sudo chown john:marcotte #{TPP_DATA_PATH.chomp('/')}"
207
207
  system "sudo chown john:marcotte #{TPP_VIS_PATH.chomp('/')}"
208
208
 
@@ -1,20 +1,17 @@
1
+ require File.expand_path( File.dirname(__FILE__) + '/spec_helper' )
1
2
 
2
- require 'test/unit'
3
3
  require 'align'
4
- require 'pp'
5
4
 
6
- class AlignTest < Test::Unit::TestCase
5
+ describe Align do
7
6
 
8
- def initialize(arg)
9
- super(arg)
10
- @tfiles = File.dirname(__FILE__) + '/tfiles/'
11
- @mz1 = @tfiles + '4-03-03_mzXML/000.mzXML.timeIndex'
12
- @mz2 = @tfiles + '4-03-03_mzXML/020.mzXML.timeIndex'
13
- @prt = @tfiles + '4-03-03_small-prot.xml'
14
- @pep = @tfiles + '4-03-03_small.xml'
7
+ before(:each) do
8
+ @mz1 = Tfiles + '4-03-03_mzXML/000.mzXML.timeIndex'
9
+ @mz2 = Tfiles + '4-03-03_mzXML/020.mzXML.timeIndex'
10
+ @prt = Tfiles + '4-03-03_small-prot.xml'
11
+ @pep = Tfiles + '4-03-03_small.xml'
15
12
  end
16
13
 
17
- def test_overlapping_peps_by_seqcharge
14
+ it_should 'finds overlapping peptides of same seq+charge' do
18
15
  s1 = 'DETTIVEGAGDAEAIQGR'
19
16
  c1 = '2'
20
17
  s2 = 'TDDVAGDGTTTATVLAQALVR'
@@ -35,28 +32,25 @@ class AlignTest < Test::Unit::TestCase
35
32
  has_seqcharges << false
36
33
  end
37
34
  end
38
- has_seqcharges.each do |c| assert c end
35
+ has_seqcharges.each { |c| c.should be_true }
39
36
  end
40
37
  end
41
38
 
42
39
  ### !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
43
40
  # @TODO: CURRENT WORK!
44
- def test_overlapping_peps_by_seqcharge_with_filter
45
- assert true
46
- if false
47
- al = Align.new
48
- pep1 = al.peps_with_scans([@mz1], @prt, @pep, 0.0 ,0.0 ,0.0 )
49
- pep2 = al.peps_with_scans(@mz2, @prt, @pep, 0.0, 0.0, 0.0 )
50
- max_dups = nil
51
- outlier_cutoff = 0.0
52
- olap = al.overlapping_peps_by_seqcharge_with_filter([pep1, pep2], max_dups, outlier_cutoff)
53
- olap.each do |peps|
54
- p peps
55
- end
41
+ it_should 'should find overlapping peptides at a seqcharge with a filter' do
42
+ al = Align.new
43
+ pep1 = al.peps_with_scans([@mz1], @prt, @pep, 0.0 ,0.0 ,0.0 )
44
+ pep2 = al.peps_with_scans(@mz2, @prt, @pep, 0.0, 0.0, 0.0 )
45
+ max_dups = nil
46
+ outlier_cutoff = 0.0
47
+ olap = al.overlapping_peps_by_seqcharge_with_filter([pep1, pep2], max_dups, outlier_cutoff)
48
+ olap.each do |peps|
49
+ p peps
56
50
  end
57
51
  end
58
52
 
59
- def test_toss_outliers
53
+ it_should 'should toss outliers' do
60
54
 
61
55
  # Consistency/sanity checks right now (not accuracy)
62
56
  x = [-10,-9,-8,-7,-6,-5,-4,-3,-2,-1,10,0 ,1,2,3,4,5,6,7,8,9]
@@ -65,7 +59,7 @@ class AlignTest < Test::Unit::TestCase
65
59
  expy2 = [-10,-9,-8,-7,-6,-5,-4,-3,-2,-1,1,2,3,4,5,6,7,8,9]
66
60
 
67
61
  pcls = Proph::Pep
68
- scls = Spec::Scan
62
+ scls = MS::Scan
69
63
 
70
64
  pep_groups = [x,y].collect do |arr|
71
65
  arr.collect do |val|
@@ -79,7 +73,7 @@ class AlignTest < Test::Unit::TestCase
79
73
  deviations = 3.2
80
74
  size_before = pep_groups.first.size
81
75
  al.toss_outliers(pep_groups, deviations)
82
- assert_equal(2, size_before - pep_groups.first.size)
76
+ (size_before - pep_groups.first.size).should == 2
83
77
  end
84
78
 
85
79
  end
@@ -1,12 +1,8 @@
1
-
2
- require 'test/unit'
3
- require File.dirname(File.expand_path(__FILE__)) + '/load_bin_path'
1
+ require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
4
2
  require 'fileutils'
5
3
 
6
- tmp = $VERBOSE
7
- $VERBOSE = 5
8
4
 
9
- $XML_SANITY_LINES = ['<sample_enzyme name="trypsin">', '<specificity cut="KR" no_cut="P" sense="C"/>', '<parameter name="diff_search_options" value="0.000000 S 0.000000 C 0.000000 M 0.000000 X 0.000000 T 0.000000 Y"/>']
5
+ $XML_SANITY_LINES = ['<sample_enzyme name="Trypsin">', '<specificity cut="KR" no_cut="P" sense="C"/>', '<parameter name="diff_search_options" value="0.000000 S 0.000000 C 0.000000 M 0.000000 X 0.000000 T 0.000000 Y"/>']
10
6
 
11
7
  $XML_SANITY_MATCHES = [/<spectrum_query spectrum="0\d0.\d+.\d+.[123]" start_scan="\d+" end_scan="\d+" precursor_neutral_mass="[\d\.]+" assumed_charge="[123]" index="\d+">/,
12
8
  / <search_hit hit_rank="\d" peptide="[\w\-\.]+" peptide_prev_aa="." peptide_next_aa="." protein=".*" num_tot_proteins="\d+" num_matched_ions="\d+" tot_num_ions="\d+" calc_neutral_pep_mass="[\d\.]+" massdiff="[\+\-][\d\.]+" num_tol_term="\d" num_missed_cleavages="\d" is_rejected="[01]">/,
@@ -18,78 +14,66 @@ $XML_SANITY_MATCHES = [/<spectrum_query spectrum="0\d0.\d+.\d+.[123]" start_scan
18
14
  ]
19
15
 
20
16
 
21
-
22
-
23
- class BioworksToPepXMLTest < Test::Unit::TestCase
24
-
25
- def initialize(arg)
26
- super(arg)
27
- @tfiles = File.dirname(__FILE__) + '/tfiles/'
28
- @tfiles_l = File.dirname(__FILE__) + '/tfiles_large/'
29
- @tf_mzxml_path = @tfiles_l + "yeast_gly_mzXML"
30
- @tf_bioworks_xml = @tfiles + "bioworks_small.xml"
31
- @tf_params = @tfiles + "bioworks32.params"
32
- @no_delete = true
33
- @out_path = @tfiles + 'pepxml/'
34
- @cmd = "ruby -I#{File.join(File.dirname(__FILE__), "..", "lib")} -S bioworks_to_pepxml.rb "
17
+ describe 'bioworks_to_pepxml.rb' do
18
+ before(:all) do
19
+ @tf_mzxml_path = Tfiles_l + "/yeast_gly_mzXML"
20
+ @tf_bioworks_xml = Tfiles + "/bioworks_small.xml"
21
+ @tf_params = Tfiles + '/bioworks32.params'
22
+ @out_path = Tfiles + '/pepxml/'
23
+ @progname = 'bioworks_to_pepxml.rb'
24
+ @no_delete = false
35
25
  end
36
26
 
37
- def test_usage
38
- assert_match(/usage:/, `#{@cmd}`)
39
- end
27
+ it_should_behave_like "a cmdline program"
40
28
 
41
29
  def _basic(cmd, prc)
42
- puts "Performing: #{cmd}" if $VERBOSE
30
+ puts "Performing: #{cmd}" if $DEBUG
43
31
  reply = `#{cmd}`
44
- puts reply if $VERBOSE
32
+ puts reply if $DEBUG
45
33
  %w(000 020).each do |file|
46
34
  ffile = @out_path + file + ".xml"
47
35
  prc.call(ffile)
48
36
  end
49
37
  end
50
38
 
51
- def test_basic
52
- if File.exist? @tfiles_l
39
+ spec_large do
40
+ it 'works on a real bioworks.xml file' do
53
41
  cmd = "#{@cmd} -p #{@tf_params} -o #{@out_path} #{@tf_bioworks_xml} -m #{@tf_mzxml_path} -d /work/special/path --copy_mzxml"
54
42
  ## FILES EXIST:
55
43
  prc = proc {|file|
56
- assert(File.exist?(file), "#{file} exists")
44
+ file.should exist
57
45
  beginning = IO.readlines(file)[0,50].join("\n")
58
46
  $XML_SANITY_LINES.each do |line|
59
- assert(beginning.include?(line), "xml includes line: #{line}")
47
+ beginning.should include(line)
48
+ #beginning.include?(line).should be_true
60
49
  end
61
50
  $XML_SANITY_MATCHES.each do |match|
62
- assert_match(match, beginning, "matches")
51
+ beginning.should =~ match
63
52
  end
64
53
  }
65
54
  _basic(cmd, prc)
66
55
  ## COPY MZXML:
67
56
  %w(000 020).each do |file|
68
57
  mzxml_file = File.join(@out_path, "#{file}.mzXML")
69
- assert(File.exist?( mzxml_file ), "file: #{mzxml_file} exists")
58
+ mzxml_file.should exist
70
59
  end
71
60
  ## CLEANUP:
72
61
  unless @no_delete then FileUtils.rm_rf(@out_path) end
73
- else
74
- assert_nil( puts("--SKIPPING TEST-- (missing dir: #{@tfiles_l})") )
75
62
  end
76
63
  end
77
64
 
78
- def test_database
79
- if File.exist? @tfiles_l
65
+ spec_large do
66
+ it 'transforms database name when its proper to do so' do
80
67
  cmd = "#{@cmd} -p #{@tf_params} -o #{@out_path} #{@tf_bioworks_xml} -m #{@tf_mzxml_path}"
81
68
  db_re = /C:\\Xcalibur\\database\\ecoli_K12_ncbi_20060321.fasta/
82
- assert_match(db_re, IO.read(@tf_params))
69
+ IO.read(@tf_params).should =~ db_re
83
70
  prc = proc {|file|
84
- assert(File.exist?(file))
85
- assert_no_match(db_re, IO.read(file))
71
+ file.should exist
72
+ IO.read(file).should_not =~ db_re
86
73
  }
87
74
  _basic(cmd, prc)
88
75
  unless @no_delete then FileUtils.rm_rf(@out_path) end
89
- else
90
- assert_nil( puts("--SKIPPING TEST-- (missing dir: #{@tfiles_l})") )
91
76
  end
92
77
  end
93
78
  end
94
79
 
95
- $VERBOSE = tmp
@@ -0,0 +1,259 @@
1
+ require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
2
+
3
+ require 'fasta'
4
+
5
+
6
+ class Fasta
7
+ def same_sized_proteins?(other_fasta_obj_or_file)
8
+ other = Fasta.to_fasta(other_fasta_obj_or_file)
9
+ @prots.zip(other.prots).all? do |a,b|
10
+ a.aaseq.size == b.aaseq.size
11
+ end
12
+ end
13
+
14
+ # This is tough to say 'for sure' Right now, we consider the proteins
15
+ # shuffled if they are all the same size and 2/3 or more of the peptides are
16
+ # different than the other (this is designed for small sets of proteins
17
+ # where it is possible one of the peptides is equal to the other).
18
+ def shuffled?(other_fasta_obj_or_file)
19
+ other = Fasta.to_fasta(other_fasta_obj_or_file)
20
+ if !same_sized_proteins?(other)
21
+ false
22
+ else
23
+ (same, different) = @prots.zip(other.prots).partition do |prota, protb|
24
+ prota == protb
25
+ end
26
+ fraction_different = different.size.to_f / (same.size + different.size)
27
+ fraction_different >= 2.0/3
28
+ end
29
+ end
30
+ end
31
+
32
+ describe "a manipulator of a fasta file", :shared => true do
33
+ before(:all) do
34
+ @filestring = ">gi|P1
35
+ AMKRGAN
36
+ >gi|P2
37
+ CRGATKKTAGRPMEK
38
+ >gi|P3
39
+ PEPTIDE
40
+ "
41
+
42
+ @rev_filestring = ">gi|P1
43
+ NAGRKMA
44
+ >gi|P2
45
+ KEMPRGATKKTAGRC
46
+ >gi|P3
47
+ EDITPEP
48
+ "
49
+
50
+ @rev_pref_filestring = ">REV_gi|P1
51
+ NAGRKMA
52
+ >REV_gi|P2
53
+ KEMPRGATKKTAGRC
54
+ >REV_gi|P3
55
+ EDITPEP
56
+ "
57
+
58
+ @rev_tryptic_filestring = ">gi|P1
59
+ MAKRNAG
60
+ >gi|P2
61
+ CRTAGKKEMPRGATK
62
+ >gi|P3
63
+ EDITPEP
64
+ "
65
+ end
66
+
67
+
68
+ before(:each) do
69
+ testdir = File.dirname(__FILE__)
70
+ @tmpfile = Tfiles + "/littlefasta.trash.fasta"
71
+ @f = Tfiles + "/trash.fasta"
72
+ File.open(@tmpfile, "w") {|fh| fh.print @filestring }
73
+ end
74
+
75
+ after(:each) do
76
+ File.unlink @tmpfile if File.exist? @tmpfile
77
+ File.unlink @f if File.exist? @f
78
+ end
79
+
80
+ it 'reverses protein sequences' do
81
+ reverse_the_file
82
+ fastap(@f).to_s.should == @rev_filestring
83
+ end
84
+
85
+ def reverse_the_file
86
+ do_it(:reverse)
87
+ end
88
+
89
+ it 'shuffles protein sequences' do
90
+ shuffle_the_file
91
+ Fasta.new(@f).shuffled?(Fasta.from_string(@filestring)).should be_true
92
+ end
93
+
94
+ def shuffle_the_file
95
+ do_it(:shuffle)
96
+ end
97
+
98
+ it 'concatenates sequences' do
99
+ concatenate_sequences
100
+ lns = fastalns(@f)
101
+ strlns(@filestring).should == lns[0..5] # first part equal
102
+ strlns(@rev_pref_filestring).should == lns[6..-1] # "second part equal")
103
+ end
104
+
105
+ def concatenate_sequences
106
+ do_it(:reverse, :cat => true, :prefix => 'REV_')
107
+ end
108
+
109
+ it 'makes prefixes' do
110
+ make_prefixes
111
+ #@shaker.reverse(@tmpfile, :out => @f, :prefix => 'SILLY_')
112
+ fp = fastap(@f)
113
+ fp.each do |prt|
114
+ prt.header.should match(/^>SILLY_.+/)
115
+ end
116
+ end
117
+
118
+ def make_prefixes
119
+ do_it(:reverse, :prefix => 'SILLY_')
120
+ end
121
+
122
+ it 'makes fractions of proteins' do
123
+ make_fractions_of_proteins(1.0/3)
124
+ fastap(@f).size.should == 1
125
+ fastap(@f).first.header.should =~ /^>[^M]/
126
+
127
+ # this guy gets rounded up on the command line so that it fails there
128
+ #make_fractions_of_proteins(2.0/3)
129
+ #fastap(@f).size.should == 2
130
+ #fastap(@f).each do |prt|
131
+ # prt.header.should =~ /^>[^M]/
132
+ #end
133
+
134
+ make_fractions_of_proteins(1.0)
135
+ fastap(@f).size.should == 3
136
+ fastap(@f).each do |prt|
137
+ prt.header.should =~ /^>[^M]/
138
+ end
139
+ end
140
+
141
+ def make_fractions_of_proteins(fraction)
142
+ do_it(:shuffle, :fraction => fraction)
143
+ end
144
+
145
+
146
+ it 'makes fractions with labels (for > 1)' do
147
+ make_fractions_of_proteins(1.1)
148
+ fastap(@f).size.should == 4
149
+ fastap(@f).any? do |prt|
150
+ prt.header =~ /^>[^M]/
151
+ end.should be_true
152
+
153
+
154
+ make_fractions_of_proteins(2.6)
155
+ fastap(@f).size.should == 8
156
+
157
+ make_reverse_cat_fractions(2.0)
158
+ fastap(@f).size.should == 9
159
+
160
+ fp = Fasta.new(@f)
161
+ fp[0..2].each do |prt|
162
+ prt.header.should =~ /^>/
163
+ end
164
+ fp[3..5].each do |prt|
165
+ prt.header.should =~ /^>MINE_f0_/
166
+ end
167
+ fp[6..8].each do |prt|
168
+ prt.header.should =~ /^>MINE_f1_/
169
+ end
170
+ end
171
+
172
+ def make_reverse_cat_fractions(fraction, prefix='MINE_')
173
+ do_it(:reverse, :fraction => fraction, :cat => true, :prefix => prefix)
174
+ end
175
+
176
+ def reverse_tryptic_peptides
177
+ do_it(:reverse, :tryptic_peptides => true)
178
+ end
179
+
180
+ it 'reverses tryptic peptides' do
181
+ reverse_tryptic_peptides
182
+ Fasta.from_string(@rev_tryptic_filestring).should == Fasta.new(@f)
183
+ end
184
+
185
+ def shuffle_tryptic_peptides
186
+ do_it(:shuffle, :tryptic_peptides => true)
187
+ end
188
+
189
+ it 'shuffles tryptic peptides (rerun on failure to recheck)' do
190
+ shuffle_tryptic_peptides
191
+ lns = fastap(@f).to_s.split("\n")
192
+ lns[1][2..3].should == 'KR'
193
+ lns[3][1..1].should == 'R'
194
+ lns[3].size.should == 'CRGATKKTAGRPMEK'.size
195
+ lns[3].should_not == 'CRGATKKTAGRPMEK' #sequence is randomised from original [remote chance of failure] rerun to make sure
196
+ end
197
+
198
+ def strlns(str)
199
+ str.split("\n")
200
+ end
201
+
202
+ def fastalns(fn)
203
+ fn.should exist
204
+ IO.read(fn).split("\n")
205
+ end
206
+
207
+ # returns the fasta object proteins
208
+ def fastap(fn)
209
+ @f.should exist
210
+ Fasta.new(fn).prots
211
+ end
212
+
213
+ end
214
+
215
+ describe FastaShaker, "by method call" do
216
+
217
+ before(:all) do
218
+ @shaker = FastaShaker.new
219
+ end
220
+
221
+ it_should_behave_like "a manipulator of a fasta file"
222
+
223
+ def do_it(method, additional_opts={})
224
+ opts = {:out => @f}
225
+ @shaker.send(method, @tmpfile, opts.merge(additional_opts))
226
+ end
227
+
228
+ end
229
+
230
+
231
+ describe FastaShaker, "by command line long args" do
232
+ before(:all) do
233
+ @progname = 'fasta_shaker.rb'
234
+ end
235
+
236
+ it_should_behave_like "a cmdline program"
237
+ it_should_behave_like "a manipulator of a fasta file"
238
+
239
+ # returns an array of the args
240
+ def opts_to_cmd_args(hash)
241
+ opts = []
242
+ hash.each do |k,v|
243
+ opts.push('--' + k.to_s)
244
+ unless (v == true) or (v == false)
245
+ opts.push(v)
246
+ end
247
+ end
248
+ opts
249
+ end
250
+
251
+ def do_it(method, additional_opts={})
252
+ opts = {:out => @f}
253
+ opts.merge!(additional_opts)
254
+ cmd = [@cmd, method, @tmpfile, *(opts_to_cmd_args(opts))].join(" ")
255
+ #puts cmd
256
+ system cmd
257
+ end
258
+
259
+ end