mspire 0.2.4 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/INSTALL +1 -0
- data/README +25 -0
- data/Rakefile +129 -40
- data/bin/{find_aa_freq.rb → aafreqs.rb} +2 -2
- data/bin/bioworks_to_pepxml.rb +1 -0
- data/bin/fasta_shaker.rb +1 -96
- data/bin/filter_and_validate.rb +5 -0
- data/bin/{mzxml_to_lmat.rb → ms_to_lmat.rb} +8 -7
- data/bin/prob_validate.rb +6 -0
- data/bin/raw_to_mzXML.rb +2 -2
- data/bin/srf_group.rb +1 -0
- data/bin/srf_to_sqt.rb +40 -0
- data/changelog.txt +68 -0
- data/lib/align/chams.rb +6 -6
- data/lib/align.rb +4 -3
- data/lib/bsearch.rb +120 -0
- data/lib/fasta.rb +318 -86
- data/lib/group_by.rb +10 -0
- data/lib/index_by.rb +11 -0
- data/lib/merge_deep.rb +21 -0
- data/lib/{spec → ms/converter}/mzxml.rb +77 -109
- data/lib/ms/gradient_program.rb +171 -0
- data/lib/ms/msrun.rb +209 -0
- data/lib/{spec/msrun.rb → ms/msrun_index.rb} +7 -40
- data/lib/ms/parser/mzdata/axml.rb +12 -0
- data/lib/ms/parser/mzdata/dom.rb +160 -0
- data/lib/ms/parser/mzdata/libxml.rb +7 -0
- data/lib/ms/parser/mzdata.rb +25 -0
- data/lib/ms/parser/mzxml/axml.rb +11 -0
- data/lib/ms/parser/mzxml/dom.rb +159 -0
- data/lib/ms/parser/mzxml/hpricot.rb +253 -0
- data/lib/ms/parser/mzxml/libxml.rb +15 -0
- data/lib/ms/parser/mzxml/regexp.rb +122 -0
- data/lib/ms/parser/mzxml/rexml.rb +72 -0
- data/lib/ms/parser/mzxml/xmlparser.rb +248 -0
- data/lib/ms/parser/mzxml.rb +175 -0
- data/lib/ms/parser.rb +108 -0
- data/lib/ms/precursor.rb +10 -0
- data/lib/ms/scan.rb +81 -0
- data/lib/ms/spectrum.rb +193 -0
- data/lib/ms.rb +10 -0
- data/lib/mspire.rb +4 -0
- data/lib/roc.rb +61 -1
- data/lib/sample_enzyme.rb +31 -8
- data/lib/scan_i.rb +21 -0
- data/lib/spec_id/aa_freqs.rb +7 -3
- data/lib/spec_id/bioworks.rb +20 -14
- data/lib/spec_id/digestor.rb +139 -0
- data/lib/spec_id/mass.rb +116 -0
- data/lib/spec_id/parser/proph.rb +236 -0
- data/lib/spec_id/precision/filter/cmdline.rb +209 -0
- data/lib/spec_id/precision/filter/interactive.rb +134 -0
- data/lib/spec_id/precision/filter/output.rb +147 -0
- data/lib/spec_id/precision/filter.rb +623 -0
- data/lib/spec_id/precision/output.rb +60 -0
- data/lib/spec_id/precision/prob/cmdline.rb +139 -0
- data/lib/spec_id/precision/prob/output.rb +88 -0
- data/lib/spec_id/precision/prob.rb +171 -0
- data/lib/spec_id/proph/pep_summary.rb +92 -0
- data/lib/spec_id/proph/prot_summary.rb +484 -0
- data/lib/spec_id/proph.rb +2 -466
- data/lib/spec_id/protein_summary.rb +2 -2
- data/lib/spec_id/sequest/params.rb +316 -0
- data/lib/spec_id/sequest/pepxml.rb +1513 -0
- data/lib/spec_id/sequest.rb +2 -1672
- data/lib/spec_id/srf.rb +445 -177
- data/lib/spec_id.rb +183 -95
- data/lib/spec_id_xml.rb +8 -10
- data/lib/transmem/phobius.rb +147 -0
- data/lib/transmem/toppred.rb +368 -0
- data/lib/transmem.rb +157 -0
- data/lib/validator/aa.rb +135 -0
- data/lib/validator/background.rb +73 -0
- data/lib/validator/bias.rb +95 -0
- data/lib/validator/cmdline.rb +260 -0
- data/lib/validator/decoy.rb +94 -0
- data/lib/validator/digestion_based.rb +69 -0
- data/lib/validator/probability.rb +48 -0
- data/lib/validator/prot_from_pep.rb +234 -0
- data/lib/validator/transmem.rb +272 -0
- data/lib/validator/true_pos.rb +46 -0
- data/lib/validator.rb +214 -0
- data/lib/xml.rb +38 -0
- data/lib/xml_style_parser.rb +105 -0
- data/lib/xmlparser_wrapper.rb +19 -0
- data/script/compile_and_plot_smriti_final.rb +97 -0
- data/script/extract_gradient_programs.rb +56 -0
- data/script/get_apex_values_rexml.rb +44 -0
- data/script/mzXML2timeIndex.rb +1 -1
- data/script/smriti_final_analysis.rb +103 -0
- data/script/toppred_to_yaml.rb +47 -0
- data/script/tpp_installer.rb +1 -1
- data/{test/tc_align.rb → specs/align_spec.rb} +21 -27
- data/{test/tc_bioworks_to_pepxml.rb → specs/bin/bioworks_to_pepxml_spec.rb} +25 -41
- data/specs/bin/fasta_shaker_spec.rb +259 -0
- data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +202 -0
- data/specs/bin/filter_and_validate_spec.rb +124 -0
- data/specs/bin/ms_to_lmat_spec.rb +34 -0
- data/specs/bin/prob_validate_spec.rb +62 -0
- data/specs/bin/protein_summary_spec.rb +10 -0
- data/{test/tc_fasta.rb → specs/fasta_spec.rb} +354 -310
- data/specs/gi_spec.rb +22 -0
- data/specs/load_bin_path.rb +7 -0
- data/specs/merge_deep_spec.rb +13 -0
- data/specs/ms/gradient_program_spec.rb +77 -0
- data/specs/ms/msrun_spec.rb +455 -0
- data/specs/ms/parser_spec.rb +92 -0
- data/specs/ms/spectrum_spec.rb +89 -0
- data/specs/roc_spec.rb +251 -0
- data/specs/rspec_autotest.rb +149 -0
- data/specs/sample_enzyme_spec.rb +41 -0
- data/specs/spec_helper.rb +133 -0
- data/specs/spec_id/aa_freqs_spec.rb +52 -0
- data/{test/tc_bioworks.rb → specs/spec_id/bioworks_spec.rb} +56 -71
- data/specs/spec_id/digestor_spec.rb +75 -0
- data/specs/spec_id/precision/filter/cmdline_spec.rb +20 -0
- data/specs/spec_id/precision/filter/output_spec.rb +31 -0
- data/specs/spec_id/precision/filter_spec.rb +243 -0
- data/specs/spec_id/precision/prob_spec.rb +111 -0
- data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
- data/specs/spec_id/proph/pep_summary_spec.rb +143 -0
- data/{test/tc_proph.rb → specs/spec_id/proph/prot_summary_spec.rb} +52 -32
- data/{test/tc_protein_summary.rb → specs/spec_id/protein_summary_spec.rb} +85 -0
- data/specs/spec_id/sequest/params_spec.rb +68 -0
- data/specs/spec_id/sequest/pepxml_spec.rb +452 -0
- data/specs/spec_id/sqt_spec.rb +138 -0
- data/specs/spec_id/srf_spec.rb +209 -0
- data/specs/spec_id/srf_spec_helper.rb +302 -0
- data/specs/spec_id_helper.rb +33 -0
- data/specs/spec_id_spec.rb +361 -0
- data/specs/spec_id_xml_spec.rb +33 -0
- data/specs/transmem/phobius_spec.rb +423 -0
- data/specs/transmem/toppred_spec.rb +297 -0
- data/specs/transmem_spec.rb +60 -0
- data/specs/transmem_spec_shared.rb +64 -0
- data/specs/validator/aa_spec.rb +107 -0
- data/specs/validator/background_spec.rb +51 -0
- data/specs/validator/bias_spec.rb +146 -0
- data/specs/validator/decoy_spec.rb +51 -0
- data/specs/validator/fasta_helper.rb +26 -0
- data/specs/validator/prot_from_pep_spec.rb +141 -0
- data/specs/validator/transmem_spec.rb +145 -0
- data/specs/validator/true_pos_spec.rb +58 -0
- data/specs/validator_helper.rb +33 -0
- data/specs/xml_spec.rb +12 -0
- data/test_files/000_pepxml18_small.xml +206 -0
- data/test_files/020a.mzXML.timeIndex +4710 -0
- data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +3973 -0
- data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +3872 -0
- data/test_files/4-03-03_small-prot.xml +321 -0
- data/test_files/4-03-03_small.xml +3876 -0
- data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
- data/test_files/bioworks-3.3_10prots.xml +5999 -0
- data/test_files/bioworks31.params +77 -0
- data/test_files/bioworks32.params +62 -0
- data/test_files/bioworks33.params +63 -0
- data/test_files/bioworks_single_run_small.xml +7237 -0
- data/test_files/bioworks_small.fasta +212 -0
- data/test_files/bioworks_small.params +63 -0
- data/test_files/bioworks_small.phobius +109 -0
- data/test_files/bioworks_small.toppred.out +2847 -0
- data/test_files/bioworks_small.xml +5610 -0
- data/test_files/bioworks_with_INV_small.xml +3753 -0
- data/test_files/bioworks_with_SHUFF_small.xml +2503 -0
- data/test_files/corrupted_900.srf +0 -0
- data/test_files/head_of_7MIX.srf +0 -0
- data/test_files/interact-opd1_mods_small-prot.xml +304 -0
- data/test_files/messups.fasta +297 -0
- data/test_files/opd1/000.my_answer.100lines.xml +101 -0
- data/test_files/opd1/000.tpp_1.2.3.first10.xml +115 -0
- data/test_files/opd1/000.tpp_2.9.2.first10.xml +126 -0
- data/test_files/opd1/000.v2.1.mzXML.timeIndex +3748 -0
- data/test_files/opd1/000_020-prot.png +0 -0
- data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +62 -0
- data/test_files/opd1/000_020_3prots-prot.xml +62 -0
- data/test_files/opd1/opd1_cat_inv_small-prot.xml +139 -0
- data/test_files/opd1/sequest.3.1.params +77 -0
- data/test_files/opd1/sequest.3.2.params +62 -0
- data/test_files/opd1/twenty_scans.mzXML +418 -0
- data/test_files/opd1/twenty_scans.v2.1.mzXML +382 -0
- data/test_files/opd1/twenty_scans_answ.lmat +0 -0
- data/test_files/opd1/twenty_scans_answ.lmata +9 -0
- data/test_files/opd1_020_beginning.RAW +0 -0
- data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +753 -0
- data/test_files/orbitrap_mzData/000_cut.xml +1920 -0
- data/test_files/pepproph_small.xml +4691 -0
- data/test_files/phobius.small.noheader.txt +50 -0
- data/test_files/phobius.small.small.txt +53 -0
- data/test_files/s01_anC1_ld020mM.key.txt +25 -0
- data/test_files/s01_anC1_ld020mM.meth +0 -0
- data/test_files/small.fasta +297 -0
- data/test_files/smallraw.RAW +0 -0
- data/test_files/tf_bioworks2excel.bioXML +14340 -0
- data/test_files/tf_bioworks2excel.txt.actual +1035 -0
- data/test_files/toppred.small.out +416 -0
- data/test_files/toppred.xml.out +318 -0
- data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +7 -0
- data/test_files/validator_hits_separate/bioworks_small_HS.xml +5651 -0
- data/test_files/yeast_gly_small-prot.xml +265 -0
- data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +6 -0
- data/test_files/yeast_gly_small.xml +3807 -0
- data/test_files/yeast_gly_small2.parentTimes +6 -0
- metadata +273 -57
- data/bin/filter.rb +0 -6
- data/bin/precision.rb +0 -5
- data/lib/spec/mzdata/parser.rb +0 -108
- data/lib/spec/mzdata.rb +0 -48
- data/lib/spec/mzxml/parser.rb +0 -449
- data/lib/spec/scan.rb +0 -55
- data/lib/spec_id/filter.rb +0 -797
- data/lib/spec_id/precision.rb +0 -421
- data/lib/toppred.rb +0 -18
- data/script/filter-peps.rb +0 -164
- data/test/tc_aa_freqs.rb +0 -59
- data/test/tc_fasta_shaker.rb +0 -149
- data/test/tc_filter.rb +0 -203
- data/test/tc_filter_peps.rb +0 -46
- data/test/tc_gi.rb +0 -17
- data/test/tc_id_class_anal.rb +0 -70
- data/test/tc_id_precision.rb +0 -89
- data/test/tc_msrun.rb +0 -88
- data/test/tc_mzxml.rb +0 -88
- data/test/tc_mzxml_to_lmat.rb +0 -36
- data/test/tc_peptide_parent_times.rb +0 -27
- data/test/tc_precision.rb +0 -60
- data/test/tc_roc.rb +0 -166
- data/test/tc_sample_enzyme.rb +0 -32
- data/test/tc_scan.rb +0 -26
- data/test/tc_sequest.rb +0 -336
- data/test/tc_spec.rb +0 -78
- data/test/tc_spec_id.rb +0 -201
- data/test/tc_spec_id_xml.rb +0 -36
- data/test/tc_srf.rb +0 -262
data/INSTALL
CHANGED
@@ -5,6 +5,7 @@ Prerequisites
|
|
5
5
|
Much of the package will work without any prerequisites at all. Some functionality may require addition ruby packages or other converters. These are listed in current order of importance:
|
6
6
|
|
7
7
|
* [xmlparser](http://www.yoshidam.net/Ruby.html) (comes with one-click Windows; on Ubuntu: 'sudo apt-get libxml-parser-ruby1.8')
|
8
|
+
* [libxml](http://libxml.rubyforge.org/) in Ubuntu: sudo apt-get install libxml2 libxml2-dev ; sudo gem install libxml-ruby --remote
|
8
9
|
* ['t2x'](http://sashimi.sourceforge.net/software_glossolalia.html#ReAdW) to convert .RAW files to version 1 mzXML files
|
9
10
|
* [gnuplot](http://rgplot.rubyforge.org/) ('gem install gnuplot'). Of course, you'll need [gnuplot](http://www.gnuplot.info/) before this package will work. Under one-click installer for windows this package requires a little configuration. It works with no configuration on cygwin (or linux).
|
10
11
|
|
data/README
CHANGED
@@ -18,6 +18,31 @@ The project is currently focusing on the following:
|
|
18
18
|
* ProteinProphet
|
19
19
|
* Preparation of files for [obiwarp](http://obi-warp.sourceforge.net/)
|
20
20
|
|
21
|
+
Features
|
22
|
+
--------
|
23
|
+
|
24
|
+
* mzXML (version 1 & 2) parsing
|
25
|
+
* mzData parsing
|
26
|
+
* bioworks .srf (binary files) reader
|
27
|
+
* bioworks to PeptideProphet input (pepXML files)
|
28
|
+
* lightweight APEX values parser
|
29
|
+
* histogram protein probabilities
|
30
|
+
* developed for Linux, should port easily to Windows or others
|
31
|
+
* protein summary views with custom false ID cutoff values
|
32
|
+
* conversion to OBI-Warp input files
|
33
|
+
|
34
|
+
Validation by:
|
35
|
+
* Various Decoy Database search options: Reverse/Shuffle, concatenated/separate, with various hashing options (e.g., by amino acid sequence + charge)
|
36
|
+
* Amino acid (e.g., search for unblocked cysteines)
|
37
|
+
* Transmembrane prediction (Phobius or TopPred)
|
38
|
+
* Generic sample bias (e.g., low abundance/high abundance proteins)
|
39
|
+
* Defined sample
|
40
|
+
|
41
|
+
Working with:
|
42
|
+
* Bioworks (3.2-3.3.1)
|
43
|
+
* Peptide/Protein Prophet
|
44
|
+
* Easily extensible to others
|
45
|
+
|
21
46
|
Tutorials
|
22
47
|
---------
|
23
48
|
|
data/Rakefile
CHANGED
@@ -2,9 +2,9 @@ require 'rake'
|
|
2
2
|
require 'rubygems'
|
3
3
|
require 'rake/rdoctask'
|
4
4
|
require 'rake/gempackagetask'
|
5
|
-
require 'rake/testtask'
|
6
5
|
require 'rake/clean'
|
7
6
|
require 'fileutils'
|
7
|
+
require 'spec/rake/spectask'
|
8
8
|
|
9
9
|
###############################################
|
10
10
|
# GLOBAL
|
@@ -13,23 +13,25 @@ FL = FileList
|
|
13
13
|
|
14
14
|
NAME = "mspire"
|
15
15
|
|
16
|
-
|
17
|
-
|
16
|
+
$dependencies = %w(libjtp)
|
17
|
+
$tfiles_large = 'test_files_large'
|
18
|
+
changelog = "changelog.txt"
|
18
19
|
|
19
|
-
|
20
|
-
|
20
|
+
core_files = FL["INSTALL", "README", "Rakefile", "LICENSE", changelog, "release_notes.txt", "{lib,bin,script,specs,tutorial,test_files}/**/*"]
|
21
|
+
big_dist_files = core_files + FL["test_files_large/**/*"]
|
21
22
|
|
22
|
-
dist_files =
|
23
|
+
dist_files = core_files
|
24
|
+
# dist_files = big_dist_files
|
23
25
|
|
24
26
|
###############################################
|
25
27
|
# ENVIRONMENT
|
26
28
|
###############################################
|
27
29
|
|
28
30
|
ENV["OS"] == "Windows_NT" ? WIN32 = true : WIN32 = false
|
29
|
-
gemcmd = "gem"
|
31
|
+
$gemcmd = "gem"
|
30
32
|
if WIN32
|
31
33
|
unless ENV["TERM"] == "cygwin"
|
32
|
-
gemcmd << ".cmd"
|
34
|
+
$gemcmd << ".cmd"
|
33
35
|
end
|
34
36
|
end
|
35
37
|
|
@@ -81,40 +83,123 @@ end
|
|
81
83
|
# TESTS
|
82
84
|
###############################################
|
83
85
|
|
84
|
-
|
85
|
-
|
86
|
-
|
86
|
+
namespace :spec do
|
87
|
+
task :autotest do
|
88
|
+
require './specs/rspec_autotest'
|
89
|
+
RspecAutotest.run
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
|
94
|
+
task :ensure_dependencies do
|
95
|
+
$dependencies.each do |dep|
|
96
|
+
unless `#{$gemcmd} list -l #{dep}`.include?(dep)
|
97
|
+
abort "ABORTING: install #{dep} before testing!"
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
task :ensure_large_testfiles do
|
103
|
+
if !File.exist?($tfiles_large) and !ENV['SPEC_LARGE'].nil?
|
104
|
+
warn "Not running with large files since #{$tfiles_large} does not exist!"
|
105
|
+
warn "Removing SPEC_LARGE from ENV!"
|
106
|
+
ENV.delete('SPEC_LARGE')
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
task :ensure_gem_is_uninstalled do
|
111
|
+
reply = `#{$gemcmd} list -l #{NAME}`
|
87
112
|
if reply.include? NAME + " ("
|
88
113
|
puts "GOING to uninstall gem '#{NAME}' for testing"
|
89
114
|
if WIN32
|
90
|
-
%x( #{gemcmd} uninstall -x #{NAME} )
|
115
|
+
%x( #{$gemcmd} uninstall -x #{NAME} )
|
91
116
|
else
|
92
|
-
%x( sudo #{gemcmd} uninstall -x #{NAME} )
|
117
|
+
%x( sudo #{$gemcmd} uninstall -x #{NAME} )
|
93
118
|
end
|
94
119
|
end
|
95
|
-
# t.libs << "lib" ## done by default
|
96
|
-
t.test_files = FL["test/tc_*.rb"]
|
97
|
-
#t.verbose = true
|
98
120
|
end
|
99
121
|
|
122
|
+
desc "Run all specs"
|
123
|
+
Spec::Rake::SpecTask.new('spec') do |t|
|
124
|
+
Rake::Task[:ensure_gem_is_uninstalled].invoke
|
125
|
+
Rake::Task[:ensure_dependencies].invoke
|
126
|
+
Rake::Task[:ensure_large_testfiles].invoke
|
127
|
+
t.libs = ['lib']
|
128
|
+
#t.ruby_opts = ['-I', 'lib']
|
129
|
+
t.spec_files = FileList['specs/**/*_spec.rb']
|
130
|
+
end
|
100
131
|
|
132
|
+
desc "Run all specs"
|
133
|
+
Spec::Rake::SpecTask.new('specl') do |t|
|
134
|
+
Rake::Task[:ensure_gem_is_uninstalled].invoke
|
135
|
+
Rake::Task[:ensure_dependencies].invoke
|
136
|
+
Rake::Task[:ensure_large_testfiles].invoke
|
137
|
+
t.spec_files = FileList['specs/**/*_spec.rb']
|
138
|
+
t.libs = ['lib']
|
139
|
+
#t.ruby_opts = ['-I', 'lib']
|
140
|
+
t.spec_opts = ['--format', 'specdoc' ]
|
141
|
+
end
|
101
142
|
|
102
|
-
desc "Run
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
143
|
+
desc "Run all specs with RCov"
|
144
|
+
Spec::Rake::SpecTask.new('rcov') do |t|
|
145
|
+
Rake::Task[:ensure_gem_is_uninstalled].invoke
|
146
|
+
Rake::Task[:ensure_dependencies].invoke
|
147
|
+
Rake::Task[:ensure_large_testfiles].invoke
|
148
|
+
t.spec_files = FileList['specs/**/*_spec.rb']
|
149
|
+
t.rcov = true
|
150
|
+
t.libs = ['lib']
|
151
|
+
#t.ruby_opts = ['-I', 'lib']
|
152
|
+
t.rcov_opts = ['--exclude', 'specs']
|
153
|
+
end
|
108
154
|
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
155
|
+
task :speci => [:ensure_gem_is_uninstalled, :ensure_dependencies, :ensure_large_testfiles] do
|
156
|
+
# files that match a key word
|
157
|
+
files_to_run = ENV['SPEC'] || FileList['specs/**/*_spec.rb']
|
158
|
+
if ENV['SPECM']
|
159
|
+
files_to_run = files_to_run.select do |file|
|
160
|
+
file.include?(ENV['SPECM'])
|
161
|
+
end
|
162
|
+
end
|
163
|
+
files_to_run.each do |spc|
|
164
|
+
puts "------ SPEC=#{spc} ------"
|
165
|
+
system "ruby -I lib -S spec #{spc} --format specdoc"
|
114
166
|
end
|
115
|
-
#t.verbose = true
|
116
167
|
end
|
117
168
|
|
169
|
+
#Spec::Rake::SpecTask.new(:spec) do |t|
|
170
|
+
# uninstall_gem
|
171
|
+
# t.spec_files = FileList['spec/**/spec_*.rb']
|
172
|
+
# t.libs = FileList['lib']
|
173
|
+
# t.spec_opts = ['--format', 'specdoc']
|
174
|
+
#end
|
175
|
+
|
176
|
+
|
177
|
+
#desc "Run unit tests."
|
178
|
+
#Rake::TestTask.new do |t|
|
179
|
+
# uninstall_gem
|
180
|
+
# # t.libs << "lib" ## done by default
|
181
|
+
# t.test_files = FL["test/tc_*.rb"]
|
182
|
+
# #t.verbose = true
|
183
|
+
#end
|
184
|
+
|
185
|
+
|
186
|
+
|
187
|
+
#desc "Run unit tests individual on each test"
|
188
|
+
#task :test_ind do |t|
|
189
|
+
# reply = `#{$gemcmd} list -l #{NAME}`
|
190
|
+
# if reply.include? NAME + " ("
|
191
|
+
# %x( sudo #{$gemcmd} uninstall -x #{NAME} )
|
192
|
+
# end
|
193
|
+
#
|
194
|
+
# # t.libs << "lib" ## done by default
|
195
|
+
# test_files = FL["test/tc_*.rb"]
|
196
|
+
# test_files.each do |file|
|
197
|
+
# puts "TESTING: #{file.sub(/test\//,'')}"
|
198
|
+
# puts `ruby -I lib #{file}`
|
199
|
+
# end
|
200
|
+
# #t.verbose = true
|
201
|
+
#end
|
202
|
+
|
118
203
|
|
119
204
|
|
120
205
|
|
@@ -140,7 +225,7 @@ tm = Time.now
|
|
140
225
|
spec = Gem::Specification.new do |s|
|
141
226
|
s.platform = Gem::Platform::RUBY
|
142
227
|
s.name = NAME
|
143
|
-
s.version =
|
228
|
+
s.version = IO.readlines(changelog).grep(/##.*version/).pop.split(/\s+/).last.chomp
|
144
229
|
s.summary = "Mass Spectrometry Proteomics Objects, Scripts, and Executables"
|
145
230
|
s.date = "#{tm.year}-#{tm.month}-#{tm.day}"
|
146
231
|
s.email = "jprince@icmb.utexas.edu"
|
@@ -149,17 +234,19 @@ spec = Gem::Specification.new do |s|
|
|
149
234
|
s.description = "mspire is for working with mass spectrometry proteomics data"
|
150
235
|
s.has_rdoc = true
|
151
236
|
s.authors = ["John Prince"]
|
152
|
-
s.files =
|
237
|
+
s.files = dist_files
|
153
238
|
s.rdoc_options = rdoc_options
|
154
239
|
s.extra_rdoc_files = rdoc_extra_includes
|
155
240
|
s.executables = FL["bin/*"].map {|file| File.basename(file) }
|
156
|
-
s.add_dependency('libjtp', '~> 0.
|
157
|
-
s.
|
241
|
+
s.add_dependency('libjtp', '~> 0.2.5')
|
242
|
+
s.add_dependency('axml')
|
243
|
+
s.requirements << '"libxml" is the prefered xml parser right now. libxml, xmlparser, REXML and regular expressions are used as fallback in some routines.'
|
158
244
|
s.requirements << 'some plotting functions will not be available without the "gnuplot" gem (and underlying gnuplot binary)'
|
159
245
|
s.requirements << 'the "t2x" binary (in archive) or readw.exe is required to convert .RAW files to mzXML in some applications'
|
160
246
|
s.requirements << '"rake" is useful for development'
|
161
247
|
s.requirements << '"webgen (with gems redcloth and bluecloth) is necessary to build web pages'
|
162
|
-
s.test_files = FL["test/tc_*.rb"]
|
248
|
+
#s.test_files = FL["test/tc_*.rb"]
|
249
|
+
s.test_files = FL["specs/**/*_spec.rb"]
|
163
250
|
end
|
164
251
|
|
165
252
|
desc "Create packages."
|
@@ -180,20 +267,22 @@ end
|
|
180
267
|
# t.package_task
|
181
268
|
#end
|
182
269
|
|
183
|
-
|
184
|
-
|
270
|
+
task :remove_pkg do
|
271
|
+
FileUtils.rm_rf "pkg"
|
272
|
+
end
|
185
273
|
|
186
274
|
task :install => [:reinstall]
|
187
275
|
|
188
276
|
desc "uninstalls the package, packages a fresh one, and installs"
|
189
|
-
task :reinstall => [:clean, :package] do
|
190
|
-
reply = `#{gemcmd} list -l #{NAME}`
|
191
|
-
if reply.include?
|
192
|
-
%x( #{gemcmd} uninstall -x #{NAME} )
|
277
|
+
task :reinstall => [:remove_pkg, :clean, :package] do
|
278
|
+
reply = `#{$gemcmd} list -l #{NAME}`
|
279
|
+
if reply.include?(NAME + " (")
|
280
|
+
%x( #{$gemcmd} uninstall -x #{NAME} )
|
193
281
|
end
|
194
282
|
FileUtils.cd("pkg") do
|
195
|
-
%x( #{gemcmd} install #{NAME} )
|
283
|
+
%x( #{$gemcmd} install #{NAME}*.gem )
|
196
284
|
end
|
285
|
+
|
197
286
|
end
|
198
287
|
|
199
288
|
###############################################
|
@@ -1,6 +1,6 @@
|
|
1
1
|
#!/usr/bin/ruby -w
|
2
2
|
|
3
|
-
|
3
|
+
require 'fasta'
|
4
4
|
require 'spec_id/aa_freqs'
|
5
5
|
|
6
6
|
if ARGV.size < 1
|
@@ -10,7 +10,7 @@ if ARGV.size < 1
|
|
10
10
|
end
|
11
11
|
|
12
12
|
ARGV.each do |file|
|
13
|
-
obj = SpecID::AAFreqs.new(file)
|
13
|
+
obj = SpecID::AAFreqs.new(Fasta.new(file))
|
14
14
|
puts file
|
15
15
|
obj.aafreqs.sort_by{|v| v.to_s }.each do |k,v|
|
16
16
|
puts "#{k}: #{v}"
|
data/bin/bioworks_to_pepxml.rb
CHANGED
data/bin/fasta_shaker.rb
CHANGED
@@ -1,100 +1,5 @@
|
|
1
1
|
#!/usr/bin/ruby
|
2
2
|
|
3
|
-
# This is my second attempt at writing a simple interface for messing with
|
4
|
-
# fasta files. Acheiving simplicity (and power) is challenging. It usually
|
5
|
-
# only happens on the second (or sometimes more) try. Of course, in
|
6
|
-
# retrospect the simple solution seems sooo obvious. But its deceptive.
|
7
|
-
# It takes work to acheive simplicity for complex tasks. That's my thought
|
8
|
-
# for the day.
|
9
|
-
|
10
|
-
# fasta_shaker as in a salt shaker. Shake up your fasta proteins and let them
|
11
|
-
# season your dinner (hopefully a protein dinner). Mmmm. Don't they taste
|
12
|
-
# good all mixed up? If you want, you can think of it as a pepper shaker.
|
13
|
-
# I don't usually comment on my scripts (in my script, anyway), but this one
|
14
|
-
# came out so nice and clean that I feel like I have room to spare.
|
15
3
|
|
16
4
|
require 'fasta'
|
17
|
-
|
18
|
-
|
19
|
-
opt = {}
|
20
|
-
|
21
|
-
opts = OptionParser.new do |op|
|
22
|
-
prog = File.basename(__FILE__)
|
23
|
-
op.banner = "usage: #{prog} <method> [OPTIONS] <file>.fasta"
|
24
|
-
op.separator " <method> = reverse | shuffle"
|
25
|
-
op.on("-c", "--cat", "catenates the output to copy of original") {|v| opt[:cat] = v }
|
26
|
-
op.on("-o", "--out <string>", "name of output file (default is descriptive)") {|v| opt[:out] = v }
|
27
|
-
op.on("-p", "--prefix <string>", "give a header prefix to modified prots") {|v| opt[:prefix] = v }
|
28
|
-
op.on("-f", "--fraction <float>", "creates some fraction of proteins") {|v| opt[:fraction] = v }
|
29
|
-
op.separator " [if fraction > 1 then the tag 'f<frac#>_' prefixed to proteins"
|
30
|
-
op.separator " (after any given prefix) so that proteins are unique]"
|
31
|
-
op.on("--tryptic_peptides", "applies method to [KR][^P] peptides") {|v| opt[:tryptic_peptides] = v }
|
32
|
-
|
33
|
-
op.separator "EXAMPLES: "
|
34
|
-
op.separator " #{prog} reverse file.fasta -o protein_aa_sequence_reversed.fasta"
|
35
|
-
op.separator " #{prog} shuffle file.fasta -o protein_aa_sequence_shuffled.fasta"
|
36
|
-
op.separator " #{prog} shuffle file.fasta -c -p SH_ -o normal_cat_shuffled_with_prefix.fasta"
|
37
|
-
op.separator " #{prog} reverse file.fasta --tryptic_peptides tryptic_peptides_reversed.fasta"
|
38
|
-
end
|
39
|
-
|
40
|
-
opts.parse!
|
41
|
-
|
42
|
-
if ARGV.size < 2
|
43
|
-
puts opts
|
44
|
-
exit
|
45
|
-
end
|
46
|
-
|
47
|
-
(method, file) = ARGV
|
48
|
-
|
49
|
-
if opt[:cat] && !opt[:prefix]
|
50
|
-
puts "WARNING: concatenated proteins don't have unique headers"
|
51
|
-
puts "[you probably wanted to use the '--prefix' option!]"
|
52
|
-
end
|
53
|
-
|
54
|
-
# OUT filename:
|
55
|
-
unless opt[:out]
|
56
|
-
filebase = file.sub(/\..*$/,'')
|
57
|
-
parts = [filebase]
|
58
|
-
parts << 'cat' if opt[:cat]
|
59
|
-
parts << method
|
60
|
-
parts << 'prefix' << opt[:prefix] if opt[:prefix]
|
61
|
-
parts << 'fraction' << opt[:fraction] if opt[:fraction]
|
62
|
-
parts << 'tryptic_peptides' if opt[:tryptic_peptides]
|
63
|
-
opt[:out] = parts.join("_") << ".fasta"
|
64
|
-
end
|
65
|
-
|
66
|
-
## READ the file
|
67
|
-
fasta = Fasta.new.read_file(file)
|
68
|
-
|
69
|
-
## CAT (save an original copy)
|
70
|
-
fasta_orig = fasta.dup if opt[:cat]
|
71
|
-
|
72
|
-
## FRACTION the proteins
|
73
|
-
if f = opt[:fraction]
|
74
|
-
prefix = nil
|
75
|
-
f = f.to_f
|
76
|
-
if f > 1.0
|
77
|
-
prefix = proc {|cnt| "f#{cnt}_" }
|
78
|
-
end
|
79
|
-
fasta = fasta.fraction_of_prots(f, prefix)
|
80
|
-
end
|
81
|
-
|
82
|
-
## PREFIX the proteins
|
83
|
-
if pre = opt[:prefix]
|
84
|
-
fasta.header_prefix!(pre)
|
85
|
-
end
|
86
|
-
|
87
|
-
## MODIFY the proteins
|
88
|
-
fasta.aaseq!((method + '!').to_sym, opt[:tryptic_peptides])
|
89
|
-
|
90
|
-
## CAT (finish it up)
|
91
|
-
if opt[:cat]
|
92
|
-
fasta_orig << fasta
|
93
|
-
fasta = fasta_orig
|
94
|
-
end
|
95
|
-
|
96
|
-
## WRITE out the file
|
97
|
-
fasta.write_file(opt[:out])
|
98
|
-
|
99
|
-
|
100
|
-
|
5
|
+
FastaShaker.shake_from_argv(ARGV)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
#!/usr/bin/ruby
|
2
2
|
|
3
|
-
require '
|
3
|
+
require 'ms/msrun'
|
4
4
|
require 'optparse'
|
5
5
|
require 'ostruct'
|
6
6
|
require 'lmat'
|
@@ -14,7 +14,8 @@ opt[:inc_mz] = 1.0
|
|
14
14
|
|
15
15
|
# get options:
|
16
16
|
opts = OptionParser.new do |op|
|
17
|
-
op.banner = "usage: #{File.basename(__FILE__)} [options]
|
17
|
+
op.banner = "usage: #{File.basename(__FILE__)} [options] <msfile> ..."
|
18
|
+
op.separator "input: .mzdata or .mzXML (versions 1.x and 2.x)"
|
18
19
|
op.separator ""
|
19
20
|
op.separator "(sums m/z values that round to the same bin)"
|
20
21
|
op.separator ""
|
@@ -32,10 +33,10 @@ if ARGV.size < 1
|
|
32
33
|
end
|
33
34
|
|
34
35
|
ARGV.each do |file|
|
35
|
-
|
36
|
-
|
37
|
-
(
|
38
|
-
times
|
36
|
+
msrun = MS::MSRun.new(file)
|
37
|
+
mslevel = 1
|
38
|
+
(start_mz, end_mz) = msrun.start_and_end_mz(mslevel)
|
39
|
+
(times, spectra) = msrun.times_and_spectra(mslevel)
|
39
40
|
args = {
|
40
41
|
:start_mz => start_mz,
|
41
42
|
:end_mz => end_mz,
|
@@ -45,7 +46,7 @@ ARGV.each do |file|
|
|
45
46
|
:inc_tm => nil,
|
46
47
|
}
|
47
48
|
args.merge!(opt)
|
48
|
-
lmat = LMat.new.
|
49
|
+
lmat = LMat.new.from_times_and_spectra(times, spectra, args)
|
49
50
|
outfile = file.sub(/\.mzXML$/, opt[:newext])
|
50
51
|
if args[:ascii]
|
51
52
|
outfile << "a"
|
data/bin/raw_to_mzXML.rb
CHANGED
@@ -21,11 +21,11 @@ if ARGV.size == 0
|
|
21
21
|
exit
|
22
22
|
end
|
23
23
|
|
24
|
-
converter =
|
24
|
+
converter = MS::MzXML.find_mzxml_converter
|
25
25
|
if converter
|
26
26
|
$stderr.puts "using #{converter} to convert files"
|
27
27
|
else
|
28
|
-
puts "cannot find [#{
|
28
|
+
puts "cannot find [#{MS::MzXML::Potential_mzxml_converters.join(', ')}] in the paths:"
|
29
29
|
puts ENV['PATH'].split(/[:;]/).join(", ")
|
30
30
|
abort
|
31
31
|
end
|
data/bin/srf_group.rb
CHANGED
data/bin/srf_to_sqt.rb
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
require 'spec_id/srf'
|
4
|
+
require 'optparse'
|
5
|
+
|
6
|
+
|
7
|
+
opt = {}
|
8
|
+
opt['db-info'] = false
|
9
|
+
opt['db-path'] = nil
|
10
|
+
opt['filter'] = true
|
11
|
+
opts = OptionParser.new do |op|
|
12
|
+
op.banner = "usage: #{File.basename(__FILE__)} [OPTIONS] <file>.srf ..."
|
13
|
+
op.separator "outputs: <file>.sqt ..."
|
14
|
+
op.separator ""
|
15
|
+
op.separator "OPTIONS"
|
16
|
+
op.on("-d", "--db-info", "calculates num aa's and md5sum on db") {|v| opt['db-info'] = v }
|
17
|
+
op.on("-p", "--db-path <path_to_dir>", "if your database path has changed",
|
18
|
+
"and you want db-info, then give the",
|
19
|
+
"path to the new *directory*",
|
20
|
+
"e.g. /my/new/path") {|v| opt['db-path'] = v }
|
21
|
+
op.on("-u", "--db-update", "update the sqt file to reflect --db-path") {|v| opt['db-update'] = v }
|
22
|
+
op.on("-n", "--no-filter", "by default, pephit must be within",
|
23
|
+
"peptide_mass_tolerance (defined in params)",
|
24
|
+
"to be displayed. Turns this off.") {|v| opt['filter'] = false}
|
25
|
+
op.on("-r", "--round", "round floating point values reasonably") {|v| opt['round'] = v }
|
26
|
+
end
|
27
|
+
|
28
|
+
opts.parse!
|
29
|
+
|
30
|
+
if ARGV.size == 0
|
31
|
+
puts opts.to_s
|
32
|
+
exit
|
33
|
+
end
|
34
|
+
|
35
|
+
ARGV.each do |file|
|
36
|
+
abort "file #{file} must be named .srf" if file !~ /\.srf$/i
|
37
|
+
new_filename = file.sub(/\.srf$/i, '.sqt')
|
38
|
+
SRFGroup.new([file], opt['filter']).srfs.first.to_sqt(new_filename, :db_info => opt['db-info'], :new_db_path => opt['db-path'], :update_db_path => opt['db-update'], :round => opt['round'])
|
39
|
+
end
|
40
|
+
|
data/changelog.txt
CHANGED
@@ -54,3 +54,71 @@ a prefix option
|
|
54
54
|
|
55
55
|
in protein_summary.rb added handling for proteins with no annotation. (either
|
56
56
|
dispaly NA or use gi2annnot to grab them from NCBI)
|
57
|
+
|
58
|
+
## version 0.2.5
|
59
|
+
|
60
|
+
renamed prep_list in roc (potential breaks in code)
|
61
|
+
|
62
|
+
## version 0.2.6
|
63
|
+
|
64
|
+
1. Massive refactorization of filtering and validation. Validation objects are
|
65
|
+
created and then can be used to validate just about anything.
|
66
|
+
2. Massive redo of the parsing of MS runs. Can parse mzXML v1, v2.X
|
67
|
+
(including readw broken output), and mzData (even Thermo's broken output).
|
68
|
+
4. Moved all tests to specs (rspec).
|
69
|
+
5. Can read gradient programs off of .meth or .RAW files (both Xcal 1.X and
|
70
|
+
2.X)
|
71
|
+
|
72
|
+
Bugfixes:
|
73
|
+
1. The search_summary 'base_name' in pepxml output was incorrect (this did not
|
74
|
+
appear to influence our analyses, however). Fixed.
|
75
|
+
2. Enzymes with no exceptions (e.g., cuts at KR) would report one too many
|
76
|
+
missed cleavages if the last amino acid was a cut point. Fixed.
|
77
|
+
|
78
|
+
## version 0.2.7
|
79
|
+
|
80
|
+
1. In conversion from bioworks to pepxml, the default was trypsin (KR/P).
|
81
|
+
Now, the sample enzyme is set explicitly from the params file and the option
|
82
|
+
is not available. This can give more accuract pepxml files than from
|
83
|
+
previous depending on your enzyme.
|
84
|
+
|
85
|
+
## version 0.2.9
|
86
|
+
|
87
|
+
1. Added support for phobius transmembrane predictions
|
88
|
+
2. have filter_and_validate.rb working well (multiple validators allowed).
|
89
|
+
3. Can read bioworks 3.3.1 .srf files (.srf version 3.5 files)
|
90
|
+
4. Added a bias validator
|
91
|
+
|
92
|
+
## version 0.2.10
|
93
|
+
|
94
|
+
1. Fixed --hits_separate flag in spec_id/filter
|
95
|
+
|
96
|
+
## version 0.2.11
|
97
|
+
|
98
|
+
1. Added prob precision support and reorganized filter_and_validate libs
|
99
|
+
|
100
|
+
## version 0.2.12
|
101
|
+
|
102
|
+
1. Fixed bug in transmem for prob and others.
|
103
|
+
2. Can use axml (XMLParser based) or libxml depending on availability
|
104
|
+
|
105
|
+
## version 0.2.13
|
106
|
+
|
107
|
+
1. Fixed issue with --hits_separate
|
108
|
+
2. filter_and_validate.rb requires decoy validator if decoy proteins
|
109
|
+
(refactored code)
|
110
|
+
|
111
|
+
## version 0.2.14
|
112
|
+
|
113
|
+
1. Can read PeptideProphet files (should be able to read pepxml files, too)
|
114
|
+
2. API change: Some slight modifications to the Sequest::PepXML object
|
115
|
+
interfaces and implementations (using ArrayClass)
|
116
|
+
|
117
|
+
## version 0.2.15
|
118
|
+
|
119
|
+
1. can convert srf files to sqt files
|
120
|
+
|
121
|
+
## version 0.3.0
|
122
|
+
|
123
|
+
1. IMPORTANT BUG FIX: protein reporting in srf files is correct now (proteins after the first protein were being assigned to the last hit in an out file).
|
124
|
+
2. SQT export is correct and works at least on 3.2 and 3.3.1.
|
data/lib/align/chams.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
|
2
|
-
require '
|
2
|
+
require 'ms/msrun'
|
3
3
|
|
4
4
|
module Align; end
|
5
5
|
class Align::CHAMS
|
@@ -8,7 +8,9 @@ class Align::CHAMS
|
|
8
8
|
# Scan1 Scan2 Edge_cost Path_cost Edge_direction
|
9
9
|
attr_accessor :avg_score, :time_mscans, :time_nscans, :mscans, :nscans, :edge_costs, :path_costs, :directions
|
10
10
|
|
11
|
-
|
11
|
+
# requires an object that will respond to [<scan_num>] to give time
|
12
|
+
# (seconds) for each file
|
13
|
+
def initialize(chams_file, time_by_scan_num1, time_by_scan_num2)
|
12
14
|
@time_mscans = []
|
13
15
|
@time_nscans = []
|
14
16
|
@mscans = []
|
@@ -17,13 +19,11 @@ class Align::CHAMS
|
|
17
19
|
@path_costs = []
|
18
20
|
@directions = []
|
19
21
|
read_chams_file(chams_file)
|
20
|
-
scans_by_num1 = Spec::MSRunIndex.new(timeIndex_file1).scans_by_num
|
21
|
-
scans_by_num2 = Spec::MSRunIndex.new(timeIndex_file2).scans_by_num
|
22
22
|
@mscans.each_with_index do |scan,i|
|
23
|
-
@time_mscans[i] =
|
23
|
+
@time_mscans[i] = time_by_scan_num1[scan]
|
24
24
|
end
|
25
25
|
@nscans.each_with_index do |scan,i|
|
26
|
-
@time_nscans[i] =
|
26
|
+
@time_nscans[i] = time_by_scan_num2[scan]
|
27
27
|
end
|
28
28
|
end
|
29
29
|
|
data/lib/align.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
|
2
|
-
require '
|
3
|
-
require '
|
2
|
+
#require 'ms/parser'
|
3
|
+
#require 'ms/parser/mzxml'
|
4
|
+
require 'ms/msrun'
|
4
5
|
require 'spec_id/proph'
|
5
6
|
require 'vec'
|
6
7
|
|
@@ -18,7 +19,7 @@ class Align
|
|
18
19
|
|
19
20
|
## Create scan indices on msrun name
|
20
21
|
if mztimes.class != Array ; mztimes = [mztimes] end
|
21
|
-
msrun_indices = mztimes.collect do |file|
|
22
|
+
msrun_indices = mztimes.collect do |file| MS::MSRunIndex.new(file) end
|
22
23
|
scanindex_by_basename_noext = {}
|
23
24
|
msrun_indices.each do |runindex|
|
24
25
|
scanindex_by_basename_noext[runindex.basename_noext] = runindex.scans_by_num
|