mspire 0.2.4 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/INSTALL +1 -0
- data/README +25 -0
- data/Rakefile +129 -40
- data/bin/{find_aa_freq.rb → aafreqs.rb} +2 -2
- data/bin/bioworks_to_pepxml.rb +1 -0
- data/bin/fasta_shaker.rb +1 -96
- data/bin/filter_and_validate.rb +5 -0
- data/bin/{mzxml_to_lmat.rb → ms_to_lmat.rb} +8 -7
- data/bin/prob_validate.rb +6 -0
- data/bin/raw_to_mzXML.rb +2 -2
- data/bin/srf_group.rb +1 -0
- data/bin/srf_to_sqt.rb +40 -0
- data/changelog.txt +68 -0
- data/lib/align/chams.rb +6 -6
- data/lib/align.rb +4 -3
- data/lib/bsearch.rb +120 -0
- data/lib/fasta.rb +318 -86
- data/lib/group_by.rb +10 -0
- data/lib/index_by.rb +11 -0
- data/lib/merge_deep.rb +21 -0
- data/lib/{spec → ms/converter}/mzxml.rb +77 -109
- data/lib/ms/gradient_program.rb +171 -0
- data/lib/ms/msrun.rb +209 -0
- data/lib/{spec/msrun.rb → ms/msrun_index.rb} +7 -40
- data/lib/ms/parser/mzdata/axml.rb +12 -0
- data/lib/ms/parser/mzdata/dom.rb +160 -0
- data/lib/ms/parser/mzdata/libxml.rb +7 -0
- data/lib/ms/parser/mzdata.rb +25 -0
- data/lib/ms/parser/mzxml/axml.rb +11 -0
- data/lib/ms/parser/mzxml/dom.rb +159 -0
- data/lib/ms/parser/mzxml/hpricot.rb +253 -0
- data/lib/ms/parser/mzxml/libxml.rb +15 -0
- data/lib/ms/parser/mzxml/regexp.rb +122 -0
- data/lib/ms/parser/mzxml/rexml.rb +72 -0
- data/lib/ms/parser/mzxml/xmlparser.rb +248 -0
- data/lib/ms/parser/mzxml.rb +175 -0
- data/lib/ms/parser.rb +108 -0
- data/lib/ms/precursor.rb +10 -0
- data/lib/ms/scan.rb +81 -0
- data/lib/ms/spectrum.rb +193 -0
- data/lib/ms.rb +10 -0
- data/lib/mspire.rb +4 -0
- data/lib/roc.rb +61 -1
- data/lib/sample_enzyme.rb +31 -8
- data/lib/scan_i.rb +21 -0
- data/lib/spec_id/aa_freqs.rb +7 -3
- data/lib/spec_id/bioworks.rb +20 -14
- data/lib/spec_id/digestor.rb +139 -0
- data/lib/spec_id/mass.rb +116 -0
- data/lib/spec_id/parser/proph.rb +236 -0
- data/lib/spec_id/precision/filter/cmdline.rb +209 -0
- data/lib/spec_id/precision/filter/interactive.rb +134 -0
- data/lib/spec_id/precision/filter/output.rb +147 -0
- data/lib/spec_id/precision/filter.rb +623 -0
- data/lib/spec_id/precision/output.rb +60 -0
- data/lib/spec_id/precision/prob/cmdline.rb +139 -0
- data/lib/spec_id/precision/prob/output.rb +88 -0
- data/lib/spec_id/precision/prob.rb +171 -0
- data/lib/spec_id/proph/pep_summary.rb +92 -0
- data/lib/spec_id/proph/prot_summary.rb +484 -0
- data/lib/spec_id/proph.rb +2 -466
- data/lib/spec_id/protein_summary.rb +2 -2
- data/lib/spec_id/sequest/params.rb +316 -0
- data/lib/spec_id/sequest/pepxml.rb +1513 -0
- data/lib/spec_id/sequest.rb +2 -1672
- data/lib/spec_id/srf.rb +445 -177
- data/lib/spec_id.rb +183 -95
- data/lib/spec_id_xml.rb +8 -10
- data/lib/transmem/phobius.rb +147 -0
- data/lib/transmem/toppred.rb +368 -0
- data/lib/transmem.rb +157 -0
- data/lib/validator/aa.rb +135 -0
- data/lib/validator/background.rb +73 -0
- data/lib/validator/bias.rb +95 -0
- data/lib/validator/cmdline.rb +260 -0
- data/lib/validator/decoy.rb +94 -0
- data/lib/validator/digestion_based.rb +69 -0
- data/lib/validator/probability.rb +48 -0
- data/lib/validator/prot_from_pep.rb +234 -0
- data/lib/validator/transmem.rb +272 -0
- data/lib/validator/true_pos.rb +46 -0
- data/lib/validator.rb +214 -0
- data/lib/xml.rb +38 -0
- data/lib/xml_style_parser.rb +105 -0
- data/lib/xmlparser_wrapper.rb +19 -0
- data/script/compile_and_plot_smriti_final.rb +97 -0
- data/script/extract_gradient_programs.rb +56 -0
- data/script/get_apex_values_rexml.rb +44 -0
- data/script/mzXML2timeIndex.rb +1 -1
- data/script/smriti_final_analysis.rb +103 -0
- data/script/toppred_to_yaml.rb +47 -0
- data/script/tpp_installer.rb +1 -1
- data/{test/tc_align.rb → specs/align_spec.rb} +21 -27
- data/{test/tc_bioworks_to_pepxml.rb → specs/bin/bioworks_to_pepxml_spec.rb} +25 -41
- data/specs/bin/fasta_shaker_spec.rb +259 -0
- data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +202 -0
- data/specs/bin/filter_and_validate_spec.rb +124 -0
- data/specs/bin/ms_to_lmat_spec.rb +34 -0
- data/specs/bin/prob_validate_spec.rb +62 -0
- data/specs/bin/protein_summary_spec.rb +10 -0
- data/{test/tc_fasta.rb → specs/fasta_spec.rb} +354 -310
- data/specs/gi_spec.rb +22 -0
- data/specs/load_bin_path.rb +7 -0
- data/specs/merge_deep_spec.rb +13 -0
- data/specs/ms/gradient_program_spec.rb +77 -0
- data/specs/ms/msrun_spec.rb +455 -0
- data/specs/ms/parser_spec.rb +92 -0
- data/specs/ms/spectrum_spec.rb +89 -0
- data/specs/roc_spec.rb +251 -0
- data/specs/rspec_autotest.rb +149 -0
- data/specs/sample_enzyme_spec.rb +41 -0
- data/specs/spec_helper.rb +133 -0
- data/specs/spec_id/aa_freqs_spec.rb +52 -0
- data/{test/tc_bioworks.rb → specs/spec_id/bioworks_spec.rb} +56 -71
- data/specs/spec_id/digestor_spec.rb +75 -0
- data/specs/spec_id/precision/filter/cmdline_spec.rb +20 -0
- data/specs/spec_id/precision/filter/output_spec.rb +31 -0
- data/specs/spec_id/precision/filter_spec.rb +243 -0
- data/specs/spec_id/precision/prob_spec.rb +111 -0
- data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
- data/specs/spec_id/proph/pep_summary_spec.rb +143 -0
- data/{test/tc_proph.rb → specs/spec_id/proph/prot_summary_spec.rb} +52 -32
- data/{test/tc_protein_summary.rb → specs/spec_id/protein_summary_spec.rb} +85 -0
- data/specs/spec_id/sequest/params_spec.rb +68 -0
- data/specs/spec_id/sequest/pepxml_spec.rb +452 -0
- data/specs/spec_id/sqt_spec.rb +138 -0
- data/specs/spec_id/srf_spec.rb +209 -0
- data/specs/spec_id/srf_spec_helper.rb +302 -0
- data/specs/spec_id_helper.rb +33 -0
- data/specs/spec_id_spec.rb +361 -0
- data/specs/spec_id_xml_spec.rb +33 -0
- data/specs/transmem/phobius_spec.rb +423 -0
- data/specs/transmem/toppred_spec.rb +297 -0
- data/specs/transmem_spec.rb +60 -0
- data/specs/transmem_spec_shared.rb +64 -0
- data/specs/validator/aa_spec.rb +107 -0
- data/specs/validator/background_spec.rb +51 -0
- data/specs/validator/bias_spec.rb +146 -0
- data/specs/validator/decoy_spec.rb +51 -0
- data/specs/validator/fasta_helper.rb +26 -0
- data/specs/validator/prot_from_pep_spec.rb +141 -0
- data/specs/validator/transmem_spec.rb +145 -0
- data/specs/validator/true_pos_spec.rb +58 -0
- data/specs/validator_helper.rb +33 -0
- data/specs/xml_spec.rb +12 -0
- data/test_files/000_pepxml18_small.xml +206 -0
- data/test_files/020a.mzXML.timeIndex +4710 -0
- data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +3973 -0
- data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +3872 -0
- data/test_files/4-03-03_small-prot.xml +321 -0
- data/test_files/4-03-03_small.xml +3876 -0
- data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
- data/test_files/bioworks-3.3_10prots.xml +5999 -0
- data/test_files/bioworks31.params +77 -0
- data/test_files/bioworks32.params +62 -0
- data/test_files/bioworks33.params +63 -0
- data/test_files/bioworks_single_run_small.xml +7237 -0
- data/test_files/bioworks_small.fasta +212 -0
- data/test_files/bioworks_small.params +63 -0
- data/test_files/bioworks_small.phobius +109 -0
- data/test_files/bioworks_small.toppred.out +2847 -0
- data/test_files/bioworks_small.xml +5610 -0
- data/test_files/bioworks_with_INV_small.xml +3753 -0
- data/test_files/bioworks_with_SHUFF_small.xml +2503 -0
- data/test_files/corrupted_900.srf +0 -0
- data/test_files/head_of_7MIX.srf +0 -0
- data/test_files/interact-opd1_mods_small-prot.xml +304 -0
- data/test_files/messups.fasta +297 -0
- data/test_files/opd1/000.my_answer.100lines.xml +101 -0
- data/test_files/opd1/000.tpp_1.2.3.first10.xml +115 -0
- data/test_files/opd1/000.tpp_2.9.2.first10.xml +126 -0
- data/test_files/opd1/000.v2.1.mzXML.timeIndex +3748 -0
- data/test_files/opd1/000_020-prot.png +0 -0
- data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +62 -0
- data/test_files/opd1/000_020_3prots-prot.xml +62 -0
- data/test_files/opd1/opd1_cat_inv_small-prot.xml +139 -0
- data/test_files/opd1/sequest.3.1.params +77 -0
- data/test_files/opd1/sequest.3.2.params +62 -0
- data/test_files/opd1/twenty_scans.mzXML +418 -0
- data/test_files/opd1/twenty_scans.v2.1.mzXML +382 -0
- data/test_files/opd1/twenty_scans_answ.lmat +0 -0
- data/test_files/opd1/twenty_scans_answ.lmata +9 -0
- data/test_files/opd1_020_beginning.RAW +0 -0
- data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +753 -0
- data/test_files/orbitrap_mzData/000_cut.xml +1920 -0
- data/test_files/pepproph_small.xml +4691 -0
- data/test_files/phobius.small.noheader.txt +50 -0
- data/test_files/phobius.small.small.txt +53 -0
- data/test_files/s01_anC1_ld020mM.key.txt +25 -0
- data/test_files/s01_anC1_ld020mM.meth +0 -0
- data/test_files/small.fasta +297 -0
- data/test_files/smallraw.RAW +0 -0
- data/test_files/tf_bioworks2excel.bioXML +14340 -0
- data/test_files/tf_bioworks2excel.txt.actual +1035 -0
- data/test_files/toppred.small.out +416 -0
- data/test_files/toppred.xml.out +318 -0
- data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +7 -0
- data/test_files/validator_hits_separate/bioworks_small_HS.xml +5651 -0
- data/test_files/yeast_gly_small-prot.xml +265 -0
- data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +6 -0
- data/test_files/yeast_gly_small.xml +3807 -0
- data/test_files/yeast_gly_small2.parentTimes +6 -0
- metadata +273 -57
- data/bin/filter.rb +0 -6
- data/bin/precision.rb +0 -5
- data/lib/spec/mzdata/parser.rb +0 -108
- data/lib/spec/mzdata.rb +0 -48
- data/lib/spec/mzxml/parser.rb +0 -449
- data/lib/spec/scan.rb +0 -55
- data/lib/spec_id/filter.rb +0 -797
- data/lib/spec_id/precision.rb +0 -421
- data/lib/toppred.rb +0 -18
- data/script/filter-peps.rb +0 -164
- data/test/tc_aa_freqs.rb +0 -59
- data/test/tc_fasta_shaker.rb +0 -149
- data/test/tc_filter.rb +0 -203
- data/test/tc_filter_peps.rb +0 -46
- data/test/tc_gi.rb +0 -17
- data/test/tc_id_class_anal.rb +0 -70
- data/test/tc_id_precision.rb +0 -89
- data/test/tc_msrun.rb +0 -88
- data/test/tc_mzxml.rb +0 -88
- data/test/tc_mzxml_to_lmat.rb +0 -36
- data/test/tc_peptide_parent_times.rb +0 -27
- data/test/tc_precision.rb +0 -60
- data/test/tc_roc.rb +0 -166
- data/test/tc_sample_enzyme.rb +0 -32
- data/test/tc_scan.rb +0 -26
- data/test/tc_sequest.rb +0 -336
- data/test/tc_spec.rb +0 -78
- data/test/tc_spec_id.rb +0 -201
- data/test/tc_spec_id_xml.rb +0 -36
- data/test/tc_srf.rb +0 -262
@@ -0,0 +1,47 @@
|
|
1
|
+
#!/usr/bin/ruby -w
|
2
|
+
|
3
|
+
|
4
|
+
require 'optparse'
|
5
|
+
|
6
|
+
opt = {}
|
7
|
+
opt[:probability] = 1.0
|
8
|
+
opts = OptionParser.new do |op|
|
9
|
+
op.banner = "USAGE: #{File.basename(__FILE__)} toppred.out"
|
10
|
+
op.separator "Outputs toppred.yaml"
|
11
|
+
op.separator "takes the highest probability structure"
|
12
|
+
op.separator "for best structures of equal probability, takes first given"
|
13
|
+
op.separator "Each line contains:"
|
14
|
+
op.separator "<identifier>: String :"
|
15
|
+
op.separator " num_found: Int"
|
16
|
+
op.separator " num_certain_transmembrane_segments: Int"
|
17
|
+
op.separator " num_putative_transmembrane_segments: Int"
|
18
|
+
op.separator " best_structure_probability: Float"
|
19
|
+
op.separator " transmembrane_segments:"
|
20
|
+
op.separator " - probability: Float"
|
21
|
+
op.separator " start: Int"
|
22
|
+
op.separator " stop: Int"
|
23
|
+
op.separator " aaseq: String"
|
24
|
+
op.separator ""
|
25
|
+
op.separator "OPTIONS:"
|
26
|
+
op.on("-p", "--probability", Float, "min structure prob threshold (default #{opt[:probability]})") {|v| opt[:probability] = v}
|
27
|
+
end
|
28
|
+
|
29
|
+
opts.parse!
|
30
|
+
|
31
|
+
|
32
|
+
if ARGV.size == 0
|
33
|
+
puts opts
|
34
|
+
exit
|
35
|
+
end
|
36
|
+
|
37
|
+
file = ARGV.shift
|
38
|
+
|
39
|
+
File.open(file) do |fh|
|
40
|
+
hash = Transmem.read_toppred(fh)
|
41
|
+
end
|
42
|
+
|
43
|
+
puts hash.to_yaml
|
44
|
+
|
45
|
+
|
46
|
+
|
47
|
+
|
data/script/tpp_installer.rb
CHANGED
@@ -202,7 +202,7 @@ chmod(0777, TPP_DATA_PATH.chomp('/'))
|
|
202
202
|
mkpath TPP_VIS_PATH.chomp('/')
|
203
203
|
|
204
204
|
## VERY SPECIFIC to OUR SYSTEM
|
205
|
-
soft_link('/project/marcotte/ms', TPP_DATA_PATH.chomp('/') + '/ms')
|
205
|
+
soft_link('/project/marcotte/marcotte/ms', TPP_DATA_PATH.chomp('/') + '/ms')
|
206
206
|
system "sudo chown john:marcotte #{TPP_DATA_PATH.chomp('/')}"
|
207
207
|
system "sudo chown john:marcotte #{TPP_VIS_PATH.chomp('/')}"
|
208
208
|
|
@@ -1,20 +1,17 @@
|
|
1
|
+
require File.expand_path( File.dirname(__FILE__) + '/spec_helper' )
|
1
2
|
|
2
|
-
require 'test/unit'
|
3
3
|
require 'align'
|
4
|
-
require 'pp'
|
5
4
|
|
6
|
-
|
5
|
+
describe Align do
|
7
6
|
|
8
|
-
|
9
|
-
|
10
|
-
@
|
11
|
-
@
|
12
|
-
@
|
13
|
-
@prt = @tfiles + '4-03-03_small-prot.xml'
|
14
|
-
@pep = @tfiles + '4-03-03_small.xml'
|
7
|
+
before(:each) do
|
8
|
+
@mz1 = Tfiles + '4-03-03_mzXML/000.mzXML.timeIndex'
|
9
|
+
@mz2 = Tfiles + '4-03-03_mzXML/020.mzXML.timeIndex'
|
10
|
+
@prt = Tfiles + '4-03-03_small-prot.xml'
|
11
|
+
@pep = Tfiles + '4-03-03_small.xml'
|
15
12
|
end
|
16
13
|
|
17
|
-
|
14
|
+
it_should 'finds overlapping peptides of same seq+charge' do
|
18
15
|
s1 = 'DETTIVEGAGDAEAIQGR'
|
19
16
|
c1 = '2'
|
20
17
|
s2 = 'TDDVAGDGTTTATVLAQALVR'
|
@@ -35,28 +32,25 @@ class AlignTest < Test::Unit::TestCase
|
|
35
32
|
has_seqcharges << false
|
36
33
|
end
|
37
34
|
end
|
38
|
-
has_seqcharges.each
|
35
|
+
has_seqcharges.each { |c| c.should be_true }
|
39
36
|
end
|
40
37
|
end
|
41
38
|
|
42
39
|
### !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
43
40
|
# @TODO: CURRENT WORK!
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
olap.each do |peps|
|
54
|
-
p peps
|
55
|
-
end
|
41
|
+
it_should 'should find overlapping peptides at a seqcharge with a filter' do
|
42
|
+
al = Align.new
|
43
|
+
pep1 = al.peps_with_scans([@mz1], @prt, @pep, 0.0 ,0.0 ,0.0 )
|
44
|
+
pep2 = al.peps_with_scans(@mz2, @prt, @pep, 0.0, 0.0, 0.0 )
|
45
|
+
max_dups = nil
|
46
|
+
outlier_cutoff = 0.0
|
47
|
+
olap = al.overlapping_peps_by_seqcharge_with_filter([pep1, pep2], max_dups, outlier_cutoff)
|
48
|
+
olap.each do |peps|
|
49
|
+
p peps
|
56
50
|
end
|
57
51
|
end
|
58
52
|
|
59
|
-
|
53
|
+
it_should 'should toss outliers' do
|
60
54
|
|
61
55
|
# Consistency/sanity checks right now (not accuracy)
|
62
56
|
x = [-10,-9,-8,-7,-6,-5,-4,-3,-2,-1,10,0 ,1,2,3,4,5,6,7,8,9]
|
@@ -65,7 +59,7 @@ class AlignTest < Test::Unit::TestCase
|
|
65
59
|
expy2 = [-10,-9,-8,-7,-6,-5,-4,-3,-2,-1,1,2,3,4,5,6,7,8,9]
|
66
60
|
|
67
61
|
pcls = Proph::Pep
|
68
|
-
scls =
|
62
|
+
scls = MS::Scan
|
69
63
|
|
70
64
|
pep_groups = [x,y].collect do |arr|
|
71
65
|
arr.collect do |val|
|
@@ -79,7 +73,7 @@ class AlignTest < Test::Unit::TestCase
|
|
79
73
|
deviations = 3.2
|
80
74
|
size_before = pep_groups.first.size
|
81
75
|
al.toss_outliers(pep_groups, deviations)
|
82
|
-
|
76
|
+
(size_before - pep_groups.first.size).should == 2
|
83
77
|
end
|
84
78
|
|
85
79
|
end
|
@@ -1,12 +1,8 @@
|
|
1
|
-
|
2
|
-
require 'test/unit'
|
3
|
-
require File.dirname(File.expand_path(__FILE__)) + '/load_bin_path'
|
1
|
+
require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
|
4
2
|
require 'fileutils'
|
5
3
|
|
6
|
-
tmp = $VERBOSE
|
7
|
-
$VERBOSE = 5
|
8
4
|
|
9
|
-
$XML_SANITY_LINES = ['<sample_enzyme name="
|
5
|
+
$XML_SANITY_LINES = ['<sample_enzyme name="Trypsin">', '<specificity cut="KR" no_cut="P" sense="C"/>', '<parameter name="diff_search_options" value="0.000000 S 0.000000 C 0.000000 M 0.000000 X 0.000000 T 0.000000 Y"/>']
|
10
6
|
|
11
7
|
$XML_SANITY_MATCHES = [/<spectrum_query spectrum="0\d0.\d+.\d+.[123]" start_scan="\d+" end_scan="\d+" precursor_neutral_mass="[\d\.]+" assumed_charge="[123]" index="\d+">/,
|
12
8
|
/ <search_hit hit_rank="\d" peptide="[\w\-\.]+" peptide_prev_aa="." peptide_next_aa="." protein=".*" num_tot_proteins="\d+" num_matched_ions="\d+" tot_num_ions="\d+" calc_neutral_pep_mass="[\d\.]+" massdiff="[\+\-][\d\.]+" num_tol_term="\d" num_missed_cleavages="\d" is_rejected="[01]">/,
|
@@ -18,78 +14,66 @@ $XML_SANITY_MATCHES = [/<spectrum_query spectrum="0\d0.\d+.\d+.[123]" start_scan
|
|
18
14
|
]
|
19
15
|
|
20
16
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
@
|
28
|
-
@
|
29
|
-
@tf_mzxml_path = @tfiles_l + "yeast_gly_mzXML"
|
30
|
-
@tf_bioworks_xml = @tfiles + "bioworks_small.xml"
|
31
|
-
@tf_params = @tfiles + "bioworks32.params"
|
32
|
-
@no_delete = true
|
33
|
-
@out_path = @tfiles + 'pepxml/'
|
34
|
-
@cmd = "ruby -I#{File.join(File.dirname(__FILE__), "..", "lib")} -S bioworks_to_pepxml.rb "
|
17
|
+
describe 'bioworks_to_pepxml.rb' do
|
18
|
+
before(:all) do
|
19
|
+
@tf_mzxml_path = Tfiles_l + "/yeast_gly_mzXML"
|
20
|
+
@tf_bioworks_xml = Tfiles + "/bioworks_small.xml"
|
21
|
+
@tf_params = Tfiles + '/bioworks32.params'
|
22
|
+
@out_path = Tfiles + '/pepxml/'
|
23
|
+
@progname = 'bioworks_to_pepxml.rb'
|
24
|
+
@no_delete = false
|
35
25
|
end
|
36
26
|
|
37
|
-
|
38
|
-
assert_match(/usage:/, `#{@cmd}`)
|
39
|
-
end
|
27
|
+
it_should_behave_like "a cmdline program"
|
40
28
|
|
41
29
|
def _basic(cmd, prc)
|
42
|
-
puts "Performing: #{cmd}" if $
|
30
|
+
puts "Performing: #{cmd}" if $DEBUG
|
43
31
|
reply = `#{cmd}`
|
44
|
-
puts reply if $
|
32
|
+
puts reply if $DEBUG
|
45
33
|
%w(000 020).each do |file|
|
46
34
|
ffile = @out_path + file + ".xml"
|
47
35
|
prc.call(ffile)
|
48
36
|
end
|
49
37
|
end
|
50
38
|
|
51
|
-
|
52
|
-
|
39
|
+
spec_large do
|
40
|
+
it 'works on a real bioworks.xml file' do
|
53
41
|
cmd = "#{@cmd} -p #{@tf_params} -o #{@out_path} #{@tf_bioworks_xml} -m #{@tf_mzxml_path} -d /work/special/path --copy_mzxml"
|
54
42
|
## FILES EXIST:
|
55
43
|
prc = proc {|file|
|
56
|
-
|
44
|
+
file.should exist
|
57
45
|
beginning = IO.readlines(file)[0,50].join("\n")
|
58
46
|
$XML_SANITY_LINES.each do |line|
|
59
|
-
|
47
|
+
beginning.should include(line)
|
48
|
+
#beginning.include?(line).should be_true
|
60
49
|
end
|
61
50
|
$XML_SANITY_MATCHES.each do |match|
|
62
|
-
|
51
|
+
beginning.should =~ match
|
63
52
|
end
|
64
53
|
}
|
65
54
|
_basic(cmd, prc)
|
66
55
|
## COPY MZXML:
|
67
56
|
%w(000 020).each do |file|
|
68
57
|
mzxml_file = File.join(@out_path, "#{file}.mzXML")
|
69
|
-
|
58
|
+
mzxml_file.should exist
|
70
59
|
end
|
71
60
|
## CLEANUP:
|
72
61
|
unless @no_delete then FileUtils.rm_rf(@out_path) end
|
73
|
-
else
|
74
|
-
assert_nil( puts("--SKIPPING TEST-- (missing dir: #{@tfiles_l})") )
|
75
62
|
end
|
76
63
|
end
|
77
64
|
|
78
|
-
|
79
|
-
|
65
|
+
spec_large do
|
66
|
+
it 'transforms database name when its proper to do so' do
|
80
67
|
cmd = "#{@cmd} -p #{@tf_params} -o #{@out_path} #{@tf_bioworks_xml} -m #{@tf_mzxml_path}"
|
81
68
|
db_re = /C:\\Xcalibur\\database\\ecoli_K12_ncbi_20060321.fasta/
|
82
|
-
|
69
|
+
IO.read(@tf_params).should =~ db_re
|
83
70
|
prc = proc {|file|
|
84
|
-
|
85
|
-
|
71
|
+
file.should exist
|
72
|
+
IO.read(file).should_not =~ db_re
|
86
73
|
}
|
87
74
|
_basic(cmd, prc)
|
88
75
|
unless @no_delete then FileUtils.rm_rf(@out_path) end
|
89
|
-
else
|
90
|
-
assert_nil( puts("--SKIPPING TEST-- (missing dir: #{@tfiles_l})") )
|
91
76
|
end
|
92
77
|
end
|
93
78
|
end
|
94
79
|
|
95
|
-
$VERBOSE = tmp
|
@@ -0,0 +1,259 @@
|
|
1
|
+
require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
|
2
|
+
|
3
|
+
require 'fasta'
|
4
|
+
|
5
|
+
|
6
|
+
class Fasta
|
7
|
+
def same_sized_proteins?(other_fasta_obj_or_file)
|
8
|
+
other = Fasta.to_fasta(other_fasta_obj_or_file)
|
9
|
+
@prots.zip(other.prots).all? do |a,b|
|
10
|
+
a.aaseq.size == b.aaseq.size
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
# This is tough to say 'for sure' Right now, we consider the proteins
|
15
|
+
# shuffled if they are all the same size and 2/3 or more of the peptides are
|
16
|
+
# different than the other (this is designed for small sets of proteins
|
17
|
+
# where it is possible one of the peptides is equal to the other).
|
18
|
+
def shuffled?(other_fasta_obj_or_file)
|
19
|
+
other = Fasta.to_fasta(other_fasta_obj_or_file)
|
20
|
+
if !same_sized_proteins?(other)
|
21
|
+
false
|
22
|
+
else
|
23
|
+
(same, different) = @prots.zip(other.prots).partition do |prota, protb|
|
24
|
+
prota == protb
|
25
|
+
end
|
26
|
+
fraction_different = different.size.to_f / (same.size + different.size)
|
27
|
+
fraction_different >= 2.0/3
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
describe "a manipulator of a fasta file", :shared => true do
|
33
|
+
before(:all) do
|
34
|
+
@filestring = ">gi|P1
|
35
|
+
AMKRGAN
|
36
|
+
>gi|P2
|
37
|
+
CRGATKKTAGRPMEK
|
38
|
+
>gi|P3
|
39
|
+
PEPTIDE
|
40
|
+
"
|
41
|
+
|
42
|
+
@rev_filestring = ">gi|P1
|
43
|
+
NAGRKMA
|
44
|
+
>gi|P2
|
45
|
+
KEMPRGATKKTAGRC
|
46
|
+
>gi|P3
|
47
|
+
EDITPEP
|
48
|
+
"
|
49
|
+
|
50
|
+
@rev_pref_filestring = ">REV_gi|P1
|
51
|
+
NAGRKMA
|
52
|
+
>REV_gi|P2
|
53
|
+
KEMPRGATKKTAGRC
|
54
|
+
>REV_gi|P3
|
55
|
+
EDITPEP
|
56
|
+
"
|
57
|
+
|
58
|
+
@rev_tryptic_filestring = ">gi|P1
|
59
|
+
MAKRNAG
|
60
|
+
>gi|P2
|
61
|
+
CRTAGKKEMPRGATK
|
62
|
+
>gi|P3
|
63
|
+
EDITPEP
|
64
|
+
"
|
65
|
+
end
|
66
|
+
|
67
|
+
|
68
|
+
before(:each) do
|
69
|
+
testdir = File.dirname(__FILE__)
|
70
|
+
@tmpfile = Tfiles + "/littlefasta.trash.fasta"
|
71
|
+
@f = Tfiles + "/trash.fasta"
|
72
|
+
File.open(@tmpfile, "w") {|fh| fh.print @filestring }
|
73
|
+
end
|
74
|
+
|
75
|
+
after(:each) do
|
76
|
+
File.unlink @tmpfile if File.exist? @tmpfile
|
77
|
+
File.unlink @f if File.exist? @f
|
78
|
+
end
|
79
|
+
|
80
|
+
it 'reverses protein sequences' do
|
81
|
+
reverse_the_file
|
82
|
+
fastap(@f).to_s.should == @rev_filestring
|
83
|
+
end
|
84
|
+
|
85
|
+
def reverse_the_file
|
86
|
+
do_it(:reverse)
|
87
|
+
end
|
88
|
+
|
89
|
+
it 'shuffles protein sequences' do
|
90
|
+
shuffle_the_file
|
91
|
+
Fasta.new(@f).shuffled?(Fasta.from_string(@filestring)).should be_true
|
92
|
+
end
|
93
|
+
|
94
|
+
def shuffle_the_file
|
95
|
+
do_it(:shuffle)
|
96
|
+
end
|
97
|
+
|
98
|
+
it 'concatenates sequences' do
|
99
|
+
concatenate_sequences
|
100
|
+
lns = fastalns(@f)
|
101
|
+
strlns(@filestring).should == lns[0..5] # first part equal
|
102
|
+
strlns(@rev_pref_filestring).should == lns[6..-1] # "second part equal")
|
103
|
+
end
|
104
|
+
|
105
|
+
def concatenate_sequences
|
106
|
+
do_it(:reverse, :cat => true, :prefix => 'REV_')
|
107
|
+
end
|
108
|
+
|
109
|
+
it 'makes prefixes' do
|
110
|
+
make_prefixes
|
111
|
+
#@shaker.reverse(@tmpfile, :out => @f, :prefix => 'SILLY_')
|
112
|
+
fp = fastap(@f)
|
113
|
+
fp.each do |prt|
|
114
|
+
prt.header.should match(/^>SILLY_.+/)
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
def make_prefixes
|
119
|
+
do_it(:reverse, :prefix => 'SILLY_')
|
120
|
+
end
|
121
|
+
|
122
|
+
it 'makes fractions of proteins' do
|
123
|
+
make_fractions_of_proteins(1.0/3)
|
124
|
+
fastap(@f).size.should == 1
|
125
|
+
fastap(@f).first.header.should =~ /^>[^M]/
|
126
|
+
|
127
|
+
# this guy gets rounded up on the command line so that it fails there
|
128
|
+
#make_fractions_of_proteins(2.0/3)
|
129
|
+
#fastap(@f).size.should == 2
|
130
|
+
#fastap(@f).each do |prt|
|
131
|
+
# prt.header.should =~ /^>[^M]/
|
132
|
+
#end
|
133
|
+
|
134
|
+
make_fractions_of_proteins(1.0)
|
135
|
+
fastap(@f).size.should == 3
|
136
|
+
fastap(@f).each do |prt|
|
137
|
+
prt.header.should =~ /^>[^M]/
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
def make_fractions_of_proteins(fraction)
|
142
|
+
do_it(:shuffle, :fraction => fraction)
|
143
|
+
end
|
144
|
+
|
145
|
+
|
146
|
+
it 'makes fractions with labels (for > 1)' do
|
147
|
+
make_fractions_of_proteins(1.1)
|
148
|
+
fastap(@f).size.should == 4
|
149
|
+
fastap(@f).any? do |prt|
|
150
|
+
prt.header =~ /^>[^M]/
|
151
|
+
end.should be_true
|
152
|
+
|
153
|
+
|
154
|
+
make_fractions_of_proteins(2.6)
|
155
|
+
fastap(@f).size.should == 8
|
156
|
+
|
157
|
+
make_reverse_cat_fractions(2.0)
|
158
|
+
fastap(@f).size.should == 9
|
159
|
+
|
160
|
+
fp = Fasta.new(@f)
|
161
|
+
fp[0..2].each do |prt|
|
162
|
+
prt.header.should =~ /^>/
|
163
|
+
end
|
164
|
+
fp[3..5].each do |prt|
|
165
|
+
prt.header.should =~ /^>MINE_f0_/
|
166
|
+
end
|
167
|
+
fp[6..8].each do |prt|
|
168
|
+
prt.header.should =~ /^>MINE_f1_/
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
def make_reverse_cat_fractions(fraction, prefix='MINE_')
|
173
|
+
do_it(:reverse, :fraction => fraction, :cat => true, :prefix => prefix)
|
174
|
+
end
|
175
|
+
|
176
|
+
def reverse_tryptic_peptides
|
177
|
+
do_it(:reverse, :tryptic_peptides => true)
|
178
|
+
end
|
179
|
+
|
180
|
+
it 'reverses tryptic peptides' do
|
181
|
+
reverse_tryptic_peptides
|
182
|
+
Fasta.from_string(@rev_tryptic_filestring).should == Fasta.new(@f)
|
183
|
+
end
|
184
|
+
|
185
|
+
def shuffle_tryptic_peptides
|
186
|
+
do_it(:shuffle, :tryptic_peptides => true)
|
187
|
+
end
|
188
|
+
|
189
|
+
it 'shuffles tryptic peptides (rerun on failure to recheck)' do
|
190
|
+
shuffle_tryptic_peptides
|
191
|
+
lns = fastap(@f).to_s.split("\n")
|
192
|
+
lns[1][2..3].should == 'KR'
|
193
|
+
lns[3][1..1].should == 'R'
|
194
|
+
lns[3].size.should == 'CRGATKKTAGRPMEK'.size
|
195
|
+
lns[3].should_not == 'CRGATKKTAGRPMEK' #sequence is randomised from original [remote chance of failure] rerun to make sure
|
196
|
+
end
|
197
|
+
|
198
|
+
def strlns(str)
|
199
|
+
str.split("\n")
|
200
|
+
end
|
201
|
+
|
202
|
+
def fastalns(fn)
|
203
|
+
fn.should exist
|
204
|
+
IO.read(fn).split("\n")
|
205
|
+
end
|
206
|
+
|
207
|
+
# returns the fasta object proteins
|
208
|
+
def fastap(fn)
|
209
|
+
@f.should exist
|
210
|
+
Fasta.new(fn).prots
|
211
|
+
end
|
212
|
+
|
213
|
+
end
|
214
|
+
|
215
|
+
describe FastaShaker, "by method call" do
|
216
|
+
|
217
|
+
before(:all) do
|
218
|
+
@shaker = FastaShaker.new
|
219
|
+
end
|
220
|
+
|
221
|
+
it_should_behave_like "a manipulator of a fasta file"
|
222
|
+
|
223
|
+
def do_it(method, additional_opts={})
|
224
|
+
opts = {:out => @f}
|
225
|
+
@shaker.send(method, @tmpfile, opts.merge(additional_opts))
|
226
|
+
end
|
227
|
+
|
228
|
+
end
|
229
|
+
|
230
|
+
|
231
|
+
describe FastaShaker, "by command line long args" do
|
232
|
+
before(:all) do
|
233
|
+
@progname = 'fasta_shaker.rb'
|
234
|
+
end
|
235
|
+
|
236
|
+
it_should_behave_like "a cmdline program"
|
237
|
+
it_should_behave_like "a manipulator of a fasta file"
|
238
|
+
|
239
|
+
# returns an array of the args
|
240
|
+
def opts_to_cmd_args(hash)
|
241
|
+
opts = []
|
242
|
+
hash.each do |k,v|
|
243
|
+
opts.push('--' + k.to_s)
|
244
|
+
unless (v == true) or (v == false)
|
245
|
+
opts.push(v)
|
246
|
+
end
|
247
|
+
end
|
248
|
+
opts
|
249
|
+
end
|
250
|
+
|
251
|
+
def do_it(method, additional_opts={})
|
252
|
+
opts = {:out => @f}
|
253
|
+
opts.merge!(additional_opts)
|
254
|
+
cmd = [@cmd, method, @tmpfile, *(opts_to_cmd_args(opts))].join(" ")
|
255
|
+
#puts cmd
|
256
|
+
system cmd
|
257
|
+
end
|
258
|
+
|
259
|
+
end
|