protk 1.2.6.pre5 → 1.3.0.pre1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +84 -45
- data/bin/add_retention_times.rb +9 -5
- data/bin/augustus_to_proteindb.rb +7 -11
- data/bin/interprophet.rb +28 -46
- data/bin/make_decoy.rb +16 -48
- data/bin/mascot_search.rb +57 -71
- data/bin/mascot_to_pepxml.rb +13 -26
- data/bin/msgfplus_search.rb +70 -107
- data/bin/omssa_search.rb +52 -109
- data/bin/peptide_prophet.rb +44 -119
- data/bin/pepxml_to_table.rb +24 -27
- data/bin/protein_prophet.rb +22 -82
- data/bin/protxml_to_gff.rb +22 -519
- data/bin/protxml_to_table.rb +2 -16
- data/bin/sixframe.rb +10 -32
- data/bin/tandem_search.rb +30 -403
- data/bin/tandem_to_pepxml.rb +43 -0
- data/bin/unimod_to_loc.rb +1 -1
- data/ext/{protk/decoymaker → decoymaker}/decoymaker.c +74 -21
- data/ext/decoymaker/extconf.rb +3 -0
- data/lib/protk/constants.rb +16 -2
- data/lib/protk/data/default_config.yml +2 -1
- data/lib/protk/data/tandem_gpm_defaults.xml +175 -0
- data/lib/protk/data/tandem_isb_kscore_defaults.xml +123 -0
- data/lib/protk/data/tandem_isb_native_defaults.xml +123 -0
- data/lib/protk/data/tandem_params.xml +17 -54
- data/lib/protk/fastadb.rb +2 -2
- data/lib/protk/prophet_tool.rb +1 -1
- data/lib/protk/protxml_to_gff_tool.rb +474 -0
- data/lib/protk/search_tool.rb +58 -103
- data/lib/protk/setup_rakefile.rake +9 -5
- data/lib/protk/tandem_search_tool.rb +256 -0
- data/lib/protk/tool.rb +85 -104
- data/lib/protk.rb +1 -6
- metadata +24 -103
- data/bin/annotate_ids.rb +0 -59
- data/bin/asapratio.rb +0 -27
- data/bin/blastxml_to_table.rb +0 -119
- data/bin/correct_omssa_retention_times.rb +0 -27
- data/bin/feature_finder.rb +0 -95
- data/bin/file_convert.rb +0 -164
- data/bin/generate_omssa_loc.rb +0 -42
- data/bin/gffmerge.rb +0 -208
- data/bin/libra.rb +0 -70
- data/bin/toppas_pipeline.rb +0 -84
- data/bin/uniprot_annotation.rb +0 -141
- data/bin/xls_to_table.rb +0 -52
- data/bin/xpress.rb +0 -27
- data/ext/protk/decoymaker/extconf.rb +0 -3
- data/ext/protk/simplealign/extconf.rb +0 -3
- data/lib/protk/biotools_excel_converter.rb +0 -60
- data/lib/protk/eupathdb_gene_information_table.rb +0 -158
- data/lib/protk/gapped_aligner.rb +0 -264
- data/lib/protk/protein_annotator.rb +0 -646
- data/lib/protk/spreadsheet_extensions.rb +0 -79
- data/lib/protk/xtandem_defaults.rb +0 -11
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5aa7ddfc73e97717cd296b99ad7796420af4af80
|
4
|
+
data.tar.gz: 8872996c6ff14306573ca698736dac00e34c43fa
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8b9162aaf5c6ee9c54e0f089e2499b47735eb12c325e35564b7b312d0553e5b09e7ea37114effd033b2b36f4646ad3080124cb31399288d5aac849fd8df894b8
|
7
|
+
data.tar.gz: 7f65f321c1f5909af31a059e843a96a8ea9c9522dac6423abad2d6987d733e766907630cb8a5a31b616cc26830d6e66219106fd66a239e1d19f69eabb6a90e26
|
data/README.md
CHANGED
@@ -7,20 +7,44 @@
|
|
7
7
|
***
|
8
8
|
## What is it?
|
9
9
|
|
10
|
-
Protk is a
|
10
|
+
Protk is a suite of tools for proteomics. It aims to present a simple and consistent command-line interface across otherwise disparate third party tools. The following analysis tasks are currently supported;
|
11
|
+
|
12
|
+
- Tandem MS search with X!Tandem, Mascot, OMSSA and MS-GF+
|
13
|
+
- Peptide and Protein inference with Peptide Prophet, iProphet and Protein Prophet
|
14
|
+
- Conversion of pepXML or protXML to tabulular format
|
11
15
|
|
12
16
|
|
13
17
|
## Installation
|
14
18
|
|
15
|
-
|
19
|
+
Protk is a ruby gem and requires ruby 2.0 or higher with support for libxml2. To avoid installation problems we recommend using [rvm](https://rvm.io) to install ruby.
|
16
20
|
|
17
21
|
``` shell
|
18
22
|
gem install protk
|
19
23
|
```
|
20
24
|
|
21
|
-
## Configuration
|
22
25
|
|
23
|
-
|
26
|
+
|
27
|
+
## Usage
|
28
|
+
|
29
|
+
Protk consists of a suite of small ruby programs. After installing the protk rubygem the following should be available for running in your shell. Help can be obtained on using any program by typing its name without any arguments. Note than many protk programs require third party tools to be installed, see [Configuration](#user-content-configuration) below for instructions on installing these.
|
30
|
+
|
31
|
+
- `tandem_search.rb` Run an X!Tandem search. Requires [X!Tandem](http://www.thegpm.org/TANDEM/)
|
32
|
+
- `mascot_search.rb` Run a Mascot search. Requires a [Mascot](http://www.matrixscience.com/server.html) server
|
33
|
+
- `msgfplus_search.rb` Run an MS-GF+ search. Requires [MS-GF+](https://bix-lab.ucsd.edu/pages/viewpage.action?pageId=13533355)
|
34
|
+
- `omssa_search.rb` Run an OMSSA search. Requires [OMSSA](ftp://ftp.ncbi.nih.gov/pub/lewisg/omssa/CURRENT/)
|
35
|
+
- `peptide_prophet.rb` Perform peptide inference based on search engine scores. Requires the [TPP](http://sourceforge.net/projects/sashimi/files/Trans-Proteomic%20Pipeline%20%28TPP%29/)
|
36
|
+
- `interprophet.rb` Perform peptide inference across multiple search engines. Requires the [TPP](http://sourceforge.net/projects/sashimi/files/Trans-Proteomic%20Pipeline%20%28TPP%29/)
|
37
|
+
- `protein_prophet.rb` Perform protein inference. Requires the [TPP](http://sourceforge.net/projects/sashimi/files/Trans-Proteomic%20Pipeline%20%28TPP%29/)
|
38
|
+
- `mascot_to_pepxml.rb` Convert raw mascot dat files to pepXML. Requires the [TPP](http://sourceforge.net/projects/sashimi/files/Trans-Proteomic%20Pipeline%20%28TPP%29/)
|
39
|
+
- `tandem_to_pepxml.rb` Convert raw X!Tandem outputs to pepXML. Requires the [TPP](http://sourceforge.net/projects/sashimi/files/Trans-Proteomic%20Pipeline%20%28TPP%29/)
|
40
|
+
- `pepxml_to_table.rb` Convert pepXML to tabular format
|
41
|
+
- `protxml_to_table.rb` Convert protXML to tabular format
|
42
|
+
- `make_decoy.rb` Generate semi-random decoy sequences
|
43
|
+
- `sixframe.rb` Generate six-frame translations of DNA sequences
|
44
|
+
- `protk_setup.rb` Install third party dependencies
|
45
|
+
- `manage_db.rb` Manage protein databases
|
46
|
+
|
47
|
+
## Configuration
|
24
48
|
|
25
49
|
Protk includes a setup tool to install various third party proteomics tools such as the TPP, OMSSA, MS-GF+, Proteowizard. If this tool is used it installs everything under `.protk/tools`. To perform such an installation use;
|
26
50
|
|
@@ -28,16 +52,12 @@ Protk includes a setup tool to install various third party proteomics tools such
|
|
28
52
|
protk_setup.rb tpp omssa blast msgfplus pwiz
|
29
53
|
```
|
30
54
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
Instead off using protk_setup.rb all it might be preferable to only install some of the protk tool dependencies. 'all' is just an alias for the following full target list, any of which can be omitted with the consequence that tools depending on that component will not be available.
|
35
|
-
|
55
|
+
By default protk will install tools and databases into `.protk` in your home directory. If this is not desirable you can change the protk root default by setting the environment variable `PROTK_INSTALL_DIR`. If you prefer to install the tools yourself protk will find them provided they are included in your `$PATH`. Those executables will be used as a fallback if nothing is available under the `.protk` installation directory.
|
36
56
|
|
37
57
|
|
38
58
|
## Sequence databases
|
39
59
|
|
40
|
-
|
60
|
+
Protk also includes a script called manage_db.rb to install specific sequence databases for use by the search engines if desired. Databases installed via manage_db.rb can be invoked using a shorthand name rather than a full path to a fasta file, and Protk also provides some automation for database upgrades. Protk comes with several predefined database configurations. For example, to install a database consisting of human entries from Swissprot plus known contaminants use the following commands;
|
41
61
|
|
42
62
|
```sh
|
43
63
|
manage_db.rb add --predefined crap
|
@@ -52,54 +72,73 @@ You should now be able to run database searches, specifying this database by usi
|
|
52
72
|
manage_db.rb add --ftp-source 'ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/reldate.txt' --include-filters '/OS=Homo\ssapiens/' --id-regex 'sp\|.*\|(.*?)\s' --add-decoys --make-blast-index --archive-old sphuman
|
53
73
|
```
|
54
74
|
|
55
|
-
##
|
75
|
+
## Galaxy Integration
|
56
76
|
|
57
|
-
|
77
|
+
Many protk tools have equivalent galaxy wrappers available on the [galaxy toolshed](http://toolshed.g2.bx.psu.edu/) . In order for these tools to work you will also need to make sure that protk, as well as the necessary third party dependencies are available to galaxy during tool execution. If you install protk using the default system ruby (without rvm) this will probably just work, however you will lose the ability to run specific versions of tools against specific versions of protk. The recommended method of installing protk for use with galaxy is as follows;
|
58
78
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
79
|
+
1. Ensure you have a working install of galaxy.
|
80
|
+
|
81
|
+
[Full instructions](https://wiki.galaxyproject.org/Admin/GetGalaxy) are available on the official Galaxy project wiki page. We assume you have galaxy installed in a directory called galaxy-dist.
|
82
|
+
|
83
|
+
2. Install rvm if you haven't allready. See [here](https://rvm.io/) for more information.
|
84
|
+
|
85
|
+
```bash
|
86
|
+
curl -sSL https://get.rvm.io | bash -s stable
|
87
|
+
```
|
88
|
+
|
89
|
+
3. Install a compatible version of ruby using rvm. Ruby 2.0 or higher is required
|
90
|
+
|
91
|
+
```bash
|
92
|
+
rvm install 2.1
|
93
|
+
```
|
63
94
|
|
64
|
-
|
95
|
+
4. Install protk in an isolated gemset using rvm.
|
65
96
|
|
66
|
-
|
97
|
+
This sets up an isolated environment where only a specific version of protk is available. We name the environment according to the protk version (1.2.6 in this example).
|
67
98
|
|
68
|
-
|
99
|
+
```bash
|
100
|
+
rvm 2.1
|
101
|
+
rvm gemset create protk1.2.6
|
102
|
+
rvm use 2.1@protk1.2.6
|
103
|
+
gem install protk -v 1.2.6
|
104
|
+
```
|
69
105
|
|
70
|
-
|
106
|
+
5. Configure Galaxy's tool dependency directory.
|
71
107
|
|
72
|
-
|
108
|
+
Create a directory for galaxy tool dependencies. This must be outside the galaxy-dist directory. I usually create a directory called tool_dependencies alongside galaxy-dist.
|
109
|
+
Open the file `universe_wsgi.ini` in the galaxy-dist directory and set the configuration option `tool_dependency_dir` to point to the directory you just created, eg;
|
73
110
|
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
sh run.sh
|
78
|
-
```
|
111
|
+
```
|
112
|
+
tool_dependency_dir = ../tool_dependencies
|
113
|
+
```
|
79
114
|
|
115
|
+
6. Create a tool dependency that sets up protk in the environment created by rvm
|
80
116
|
|
81
|
-
|
117
|
+
In this example we create the environment for protk `1.2.6` as this was the version installed in step 4 above.
|
82
118
|
|
83
|
-
|
84
|
-
|
85
|
-
|
119
|
+
```bash
|
120
|
+
cd <tool_dependency_dir>
|
121
|
+
mkdir protk
|
122
|
+
cd protk
|
123
|
+
mkdir 1.2.6
|
124
|
+
ln -s 1.2.6 default
|
125
|
+
echo `rvm env --path 2.1@protk1.2.6` > 1.2.6/env.sh
|
126
|
+
```
|
86
127
|
|
87
|
-
|
88
|
-
cd <tool_dependency_dir>
|
89
|
-
mkdir protkgem
|
90
|
-
cd protkgem
|
91
|
-
mkdir rvm193
|
92
|
-
ln -s rvm193 default
|
93
|
-
cd default
|
94
|
-
ln -s ~/.protk/galaxy/env.sh env.sh
|
95
|
-
```
|
128
|
+
7. Keep things up to date
|
96
129
|
|
97
|
-
|
98
|
-
- Create a file named `config.yml` inside your .protk directory
|
99
|
-
- Add the line `galaxy_root: /home/galaxy/galaxy-dist` to config.yml substituting the actual path to the root directory of your galaxy installation
|
130
|
+
When new versions of galaxy tools are released they may change the version of protk that is required. Check the release notes on the tool to see what is needed. For example, if upgrading to version 1.2.7 you would do the following;
|
100
131
|
|
101
|
-
|
102
|
-
|
103
|
-
|
132
|
+
```bash
|
133
|
+
rvm 2.1
|
134
|
+
rvm gemset create protk1.2.7
|
135
|
+
rvm use 2.1@protk1.2.7
|
136
|
+
gem install protk -v 1.2.7
|
137
|
+
cd <tool_dependency_dir>/protk/
|
138
|
+
mkdir 1.2.7
|
139
|
+
rvmenv=`rvm env --path 2.1@protk1.2.7`
|
140
|
+
echo ". $rvmenv" > 1.2.7/env.sh
|
141
|
+
ln -s 1.2.7 default
|
142
|
+
```
|
104
143
|
|
105
144
|
|
data/bin/add_retention_times.rb
CHANGED
@@ -17,14 +17,14 @@ include LibXML
|
|
17
17
|
#
|
18
18
|
genv=Constants.new
|
19
19
|
|
20
|
-
tool=Tool.new([:over_write])
|
20
|
+
tool=Tool.new([:over_write,:explicit_output])
|
21
21
|
tool.option_parser.banner = "Look up retention times in a raw file and \
|
22
22
|
add them to a pepxml file.\n\nUsage: add_retention_times.rb [options] file1.pep.xml file2.mgf"
|
23
23
|
|
24
24
|
exit unless tool.check_options
|
25
25
|
|
26
26
|
if ( ARGV[0].nil? || ARGV[1].nil? )
|
27
|
-
puts "You must supply an input file"
|
27
|
+
puts "You must supply an input pepxml file and an input mgf file"
|
28
28
|
puts tool.option_parser
|
29
29
|
exit
|
30
30
|
end
|
@@ -51,8 +51,6 @@ if not pepxml_doc.root.namespaces.default
|
|
51
51
|
pepxml_ns=nil
|
52
52
|
end
|
53
53
|
|
54
|
-
|
55
|
-
|
56
54
|
queries=pepxml_doc.find("//#{pepxml_ns_prefix}spectrum_query", pepxml_ns)
|
57
55
|
|
58
56
|
queries.each do |query|
|
@@ -86,4 +84,10 @@ queries.each do |query|
|
|
86
84
|
end
|
87
85
|
end
|
88
86
|
|
89
|
-
|
87
|
+
if tool.explicit_output.nil?
|
88
|
+
pepxml_doc.save(pepxml_file)
|
89
|
+
else
|
90
|
+
pepxml_doc.save(tool.explicit_output)
|
91
|
+
end
|
92
|
+
|
93
|
+
|
@@ -10,20 +10,16 @@ require 'protk/tool'
|
|
10
10
|
require 'bio'
|
11
11
|
|
12
12
|
tool=Tool.new([:explicit_output])
|
13
|
-
tool.option_parser.banner = "Create a protein database from Augustus gene prediction
|
13
|
+
tool.option_parser.banner = "Create a protein database from Augustus gene prediction \
|
14
|
+
output that is suitable for later processing by proteogenomics tools.\
|
15
|
+
\n\nUsage: augustus_to_proteindb.rb [options] augustus.gff3"
|
14
16
|
|
15
|
-
tool.
|
16
|
-
|
17
|
-
|
18
|
-
end
|
17
|
+
tool.add_value_option(:explicit_output,nil,['-o', '--output out', 'An explicitly named output file. \
|
18
|
+
The default is to write to standard output'])
|
19
|
+
tool.add_boolean_option(:add_transcript_info,false,['--info','Include CDS Coordinates'])
|
19
20
|
|
20
|
-
exit unless tool.check_options
|
21
21
|
|
22
|
-
|
23
|
-
puts "You must supply an input file"
|
24
|
-
puts tool.option_parser
|
25
|
-
exit
|
26
|
-
end
|
22
|
+
exit unless tool.check_options(true)
|
27
23
|
|
28
24
|
inname=ARGV.shift
|
29
25
|
|
data/bin/interprophet.rb
CHANGED
@@ -16,65 +16,49 @@ for_galaxy = GalaxyUtil.for_galaxy?
|
|
16
16
|
|
17
17
|
# Setup specific command-line options for this tool. Other options are inherited from ProphetTool
|
18
18
|
#
|
19
|
-
prophet_tool=ProphetTool.new([
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
prophet_tool.options.no_nss=""
|
25
|
-
prophet_tool.option_parser.on( '--no-nss', 'Don\'t use NSS (Number of Sibling Searches) in Model' ) do
|
26
|
-
prophet_tool.options.no_nss="NONSS"
|
27
|
-
end
|
19
|
+
prophet_tool=ProphetTool.new([
|
20
|
+
:explicit_output,
|
21
|
+
:over_write,
|
22
|
+
:prefix])
|
28
23
|
|
29
|
-
prophet_tool.
|
30
|
-
|
31
|
-
prophet_tool.options.no_nrs="NONRS"
|
32
|
-
end
|
33
|
-
|
34
|
-
prophet_tool.options.no_nse=""
|
35
|
-
prophet_tool.option_parser.on('--no-nse', 'Don\'t use NSE (Number of Sibling Experiments) in Model' ) do
|
36
|
-
prophet_tool.options.no_nse="NONSE"
|
37
|
-
end
|
24
|
+
prophet_tool.option_parser.banner = "Run InterProphet on a set of pep.xml input files.\n\nUsage: interprophet.rb [options] file1.pep.xml file2.pep.xml ..."
|
25
|
+
@output_suffix="_iproph"
|
38
26
|
|
39
|
-
prophet_tool.options.no_nsi=""
|
40
|
-
prophet_tool.option_parser.on("--no-nsi",'Don\'t use NSE (Number of Sibling Ions) in Model' ) do
|
41
|
-
prophet_tool.options.no_nsi="NONSI"
|
42
|
-
end
|
43
27
|
|
44
|
-
prophet_tool.
|
45
|
-
prophet_tool.
|
46
|
-
|
47
|
-
|
28
|
+
prophet_tool.add_boolean_option(:no_nss,false,['--no-nss', 'Don\'t use NSS (Number of Sibling Searches) in Model'])
|
29
|
+
prophet_tool.add_boolean_option(:no_nrs,false,['--no-nrs', 'Don\'t use NRS (Number of Replicate Spectra) in Model'])
|
30
|
+
prophet_tool.add_boolean_option(:no_nse,false,['--no-nse', 'Don\'t use NSE (Number of Sibling Experiments) in Model'])
|
31
|
+
prophet_tool.add_boolean_option(:no_nsi,false,["--no-nsi",'Don\'t use NSE (Number of Sibling Ions) in Model'])
|
32
|
+
prophet_tool.add_boolean_option(:no_nsm,false,["--no-nsm",'Don\'t use NSE (Number of Sibling Modifications) in Model'])
|
33
|
+
prophet_tool.add_value_option(:min_prob,"",["--minprob mp","Minimum probability cutoff "])
|
48
34
|
|
49
|
-
prophet_tool.
|
50
|
-
prophet_tool.option_parser.on("--minprob mp","Minimum probability cutoff ") do |mp|
|
51
|
-
prophet_tool.options.min_prob=mp
|
52
|
-
end
|
35
|
+
exit unless prophet_tool.check_options(true)
|
53
36
|
|
54
|
-
exit unless prophet_tool.check_options
|
55
|
-
if ( ARGV[0].nil? )
|
56
|
-
puts "You must supply an input file"
|
57
|
-
puts prophet_tool.option_parser
|
58
|
-
exit
|
59
|
-
end
|
60
37
|
|
61
38
|
# Obtain a global environment object
|
62
39
|
genv=Constants.new
|
63
40
|
|
41
|
+
inputs = ARGV.collect {|file_name|
|
42
|
+
file_name.chomp
|
43
|
+
}
|
44
|
+
|
64
45
|
if ( prophet_tool.explicit_output != nil )
|
65
|
-
|
46
|
+
output_file=prophet_tool.explicit_output
|
66
47
|
else
|
67
|
-
output_file="
|
48
|
+
output_file=Tool.default_output_path(inputs[0],".pep.xml",prophet_tool.output_prefix,@output_suffix)
|
68
49
|
end
|
69
50
|
|
70
51
|
if ( !Pathname.new(output_file).exist? || prophet_tool.over_write )
|
71
52
|
|
72
|
-
cmd="InterProphetParser
|
73
|
-
cmd << " MINPROB=#{prophet_tool.min_prob}" if ( prophet_tool.min_prob !="" )
|
53
|
+
cmd="InterProphetParser "
|
74
54
|
|
75
|
-
|
76
|
-
|
77
|
-
|
55
|
+
cmd<<"NONSS " if prophet_tool.options.no_nss
|
56
|
+
cmd<<"NONRS " if prophet_tool.options.no_nrs
|
57
|
+
cmd<<"NONSE " if prophet_tool.options.no_nse
|
58
|
+
cmd<<"NONSI " if prophet_tool.options.no_nsi
|
59
|
+
cmd<<"NONSM " if prophet_tool.options.no_nsm
|
60
|
+
|
61
|
+
cmd << " MINPROB=#{prophet_tool.min_prob}" if ( prophet_tool.min_prob !="" )
|
78
62
|
|
79
63
|
if for_galaxy
|
80
64
|
inputs = inputs.collect {|ip| GalaxyUtil.stage_pepxml(ip) }
|
@@ -86,9 +70,7 @@ if ( !Pathname.new(output_file).exist? || prophet_tool.over_write )
|
|
86
70
|
|
87
71
|
# Run the analysis
|
88
72
|
#
|
89
|
-
|
90
|
-
job_params={:jobid=>"iprophet", :vmem=>"900mb", :queue => "lowmem"}
|
91
|
-
code = prophet_tool.run(cmd,genv,job_params,jobscript_path)
|
73
|
+
code = prophet_tool.run(cmd,genv)
|
92
74
|
throw "Command failed with exit code #{code}" unless code==0
|
93
75
|
|
94
76
|
else
|
data/bin/make_decoy.rb
CHANGED
@@ -22,40 +22,13 @@ include LibXML
|
|
22
22
|
tool=Tool.new([:explicit_output])
|
23
23
|
tool.option_parser.banner = "Create a decoy database from real protein sequences.\n\nUsage: make_decoy.rb [options] realdb.fasta"
|
24
24
|
|
25
|
-
tool.
|
26
|
-
tool.
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
tool.options.prefix_string="decoy_"
|
31
|
-
tool.option_parser.on('-P str','--prefix-string str','String to prepend to sequence ids') do |str|
|
32
|
-
tool.options.prefix_string=str
|
33
|
-
end
|
34
|
-
|
35
|
-
tool.options.reverse_only=false
|
36
|
-
tool.option_parser.on('--reverse-only','Just reverse sequences. Dont try to randomize') do
|
37
|
-
tool.options.reverse_only=true
|
38
|
-
end
|
39
|
-
|
40
|
-
tool.options.id_regex=".*?\\|(.*?)[ \\|]"
|
41
|
-
tool.option_parser.on('--id-regex regex','Regex for finding IDs. If reverse-only is used then this will be used to find ids and prepend with the decoy string. Default .*?\\|(.*?)[ \\|]') do regex
|
42
|
-
tool.options.id_regex=regex
|
43
|
-
end
|
44
|
-
|
45
|
-
tool.options.append=false
|
46
|
-
tool.option_parser.on('-A','--append','Append input sequences to the generated database') do
|
47
|
-
tool.options.append=true
|
48
|
-
end
|
49
|
-
|
50
|
-
|
51
|
-
exit unless tool.check_options
|
52
|
-
|
53
|
-
if ( ARGV[0].nil? )
|
54
|
-
puts "You must supply an input file"
|
55
|
-
puts tool.option_parser
|
56
|
-
exit
|
57
|
-
end
|
25
|
+
tool.add_value_option(:db_length,0,['-L len','--db-length len','Number of sequences to generate'])
|
26
|
+
tool.add_value_option(:prefix_string,"decoy_",['-P str','--prefix-string str','String to prepend to sequence ids'])
|
27
|
+
tool.add_boolean_option(:reverse_only,false,['--reverse-only','Just reverse sequences. Dont try to randomize. Ignores -L'])
|
28
|
+
tool.add_value_option(:id_regex,".*?\\|(.*?)[ \\|]",['--id-regex regex','Regex for finding IDs. If reverse-only is used then this will be used to find ids and prepend with the decoy string. Default .*?\\|(.*?)[ \\|]'])
|
29
|
+
tool.add_boolean_option(:append,false,['-A','--append','Append input sequences to the generated database'])
|
58
30
|
|
31
|
+
exit unless tool.check_options(true,[:explicit_output])
|
59
32
|
|
60
33
|
input_file=ARGV[0]
|
61
34
|
|
@@ -63,11 +36,9 @@ input_file=ARGV[0]
|
|
63
36
|
db_length=tool.db_length
|
64
37
|
if ( db_length==0) #If no db length was specified use the number of entries in the input file
|
65
38
|
db_length=Bio::FastaFormat.open(input_file).count
|
66
|
-
|
39
|
+
puts "Found #{db_length} entries in input file"
|
67
40
|
end
|
68
41
|
|
69
|
-
output_file="decoy_#{input_file}"
|
70
|
-
|
71
42
|
output_file = tool.explicit_output if tool.explicit_output!=nil
|
72
43
|
|
73
44
|
genv=Constants.new()
|
@@ -79,7 +50,8 @@ if (tool.reverse_only)
|
|
79
50
|
Bio::FastaFormat.open(input_file).each do |seq|
|
80
51
|
id=nil
|
81
52
|
begin
|
82
|
-
|
53
|
+
# require 'debugger';debugger
|
54
|
+
id=seq.definition.chomp.scan(/#{tool.id_regex}/)[0][0]
|
83
55
|
revdef=seq.definition.sub(id,"#{tool.prefix_string}#{id}")
|
84
56
|
decoys_out.write ">#{revdef}\n#{seq.aaseq}\n"
|
85
57
|
rescue
|
@@ -91,17 +63,13 @@ else
|
|
91
63
|
Randomize.make_decoys input_file, db_length, decoys_tmp_file, tool.prefix_string
|
92
64
|
end
|
93
65
|
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
66
|
+
if ( tool.append )
|
67
|
+
cmd ="awk 'FNR==1{print \"\"}1' #{input_file} #{decoys_tmp_file} > #{output_file};"
|
68
|
+
cmd << "rm #{decoys_tmp_file}"
|
69
|
+
else
|
70
|
+
cmd = "mv #{decoys_tmp_file} #{output_file}"
|
71
|
+
end
|
98
72
|
|
99
|
-
|
100
|
-
# Run the conversion
|
101
|
-
#
|
102
|
-
job_params= {:jobid => tool.jobid_from_filename(input_file) }
|
103
|
-
job_params[:queue]="lowmem"
|
104
|
-
job_params[:vmem]="900mb"
|
105
|
-
tool.run(cmd,genv,job_params)
|
73
|
+
tool.run(cmd,genv)
|
106
74
|
|
107
75
|
|
data/bin/mascot_search.rb
CHANGED
@@ -21,7 +21,7 @@ def login(mascot_cgi,username,password)
|
|
21
21
|
authdict[:action]="login"
|
22
22
|
authdict[:savecookie]="1"
|
23
23
|
|
24
|
-
|
24
|
+
$genv.log("Logging in to #{mascot_cgi}/login.pl",:info)
|
25
25
|
|
26
26
|
response = RestClient.post "#{mascot_cgi}/login.pl", authdict
|
27
27
|
|
@@ -40,7 +40,7 @@ def download_datfile(mascot_cgi,results_date,results_file,explicit_output,openur
|
|
40
40
|
output_path="#{results_file}"
|
41
41
|
end
|
42
42
|
|
43
|
-
|
43
|
+
$genv.log("Writing output to #{output_path}",:info)
|
44
44
|
|
45
45
|
require 'open-uri'
|
46
46
|
open("#{output_path}", 'wb') do |file|
|
@@ -58,6 +58,29 @@ def search_params_dictionary(search_tool,input_file)
|
|
58
58
|
var_mods="" if var_mods=="None"
|
59
59
|
fix_mods="" if fix_mods=="None"
|
60
60
|
|
61
|
+
shorthand_varmods=[]
|
62
|
+
shorthand_fixmods=[]
|
63
|
+
|
64
|
+
shorthand_varmods << ['Oxidation (M)'] if search_tool.methionine_oxidation
|
65
|
+
shorthand_varmods << ['Acetyl (Protein N-term)'] if search_tool.acetyl_nterm
|
66
|
+
shorthand_varmods << ['Deamidated (NQ)'] if search_tool.glyco
|
67
|
+
|
68
|
+
shorthand_fixmods << ['Carbamidomethyl (C)'] if search_tool.carbamidomethyl
|
69
|
+
|
70
|
+
if var_mods.length>0
|
71
|
+
var_mods=[var_mods,"#{shorthand_varmods.join(",")}"].join(",")
|
72
|
+
else
|
73
|
+
var_mods=shorthand_varmods.join(",")
|
74
|
+
end
|
75
|
+
|
76
|
+
|
77
|
+
if fix_mods.length>0
|
78
|
+
fix_mods=[fix_mods,"#{shorthand_fixmods.join(",")}"].join(",")
|
79
|
+
else
|
80
|
+
fix_mods=shorthand_fixmods.join(",")
|
81
|
+
end
|
82
|
+
|
83
|
+
|
61
84
|
postdict={}
|
62
85
|
postdict[:SEARCH]="MIS"
|
63
86
|
postdict[:CHARGE]=search_tool.allowed_charges
|
@@ -90,74 +113,36 @@ $genv=Constants.new
|
|
90
113
|
|
91
114
|
# Setup specific command-line options for this tool. Other options are inherited from SearchTool
|
92
115
|
#
|
93
|
-
search_tool=SearchTool.new([
|
94
|
-
:
|
95
|
-
:
|
96
|
-
|
97
|
-
|
116
|
+
search_tool=SearchTool.new([
|
117
|
+
:explicit_output,
|
118
|
+
:over_write,
|
119
|
+
:database,
|
120
|
+
:enzyme,
|
121
|
+
:modifications,
|
122
|
+
:methionine_oxidation,
|
123
|
+
:carbamidomethyl,
|
124
|
+
:glyco,
|
125
|
+
:acetyl_nterm,
|
126
|
+
:instrument,
|
127
|
+
:mass_tolerance,
|
128
|
+
:mass_tolerance_units,
|
129
|
+
:precursor_search_type,
|
130
|
+
:missed_cleavages])
|
98
131
|
|
99
132
|
search_tool.option_parser.banner = "Run a Mascot msms search on a set of mgf input files.\n\nUsage: mascot_search.rb [options] msmsfile.mgf"
|
100
133
|
search_tool.options.output_suffix="_mascot"
|
101
134
|
|
102
|
-
search_tool.
|
135
|
+
search_tool.add_value_option(:mascot_server,"#{$genv.default_mascot_server}/mascot/cgi",['-S', '--server url', 'The url to the cgi directory of the mascot server'])
|
136
|
+
search_tool.add_value_option(:allowed_charges,"1+,2+,3+",['--allowed-charges ac', 'Allowed precursor ion charges.'])
|
137
|
+
search_tool.add_value_option(:email,"",['--email em', 'User email.'])
|
138
|
+
search_tool.add_value_option(:username,"",['--username un', 'Username.'])
|
139
|
+
search_tool.add_value_option(:httpproxy,nil,['--proxy url', 'The url to a proxy server'])
|
140
|
+
search_tool.add_value_option(:mascot_password,"",['--password psswd', 'Password to use when Mascot security is enabled'])
|
141
|
+
search_tool.add_boolean_option(:use_security,false,['--use-security', 'When Mascot security is enabled this is required'])
|
142
|
+
search_tool.add_value_option(:download_only,nil,['--download-only path', 'Specify a relative path to an existing results file on the server for download eg(20131113/F227185.dat)'])
|
143
|
+
search_tool.add_value_option(:timeout,200,['--timeout seconds', 'Timeout for sending data file to mascot in seconds'])
|
103
144
|
|
104
|
-
search_tool.
|
105
|
-
search_tool.option_parser.on( '--allowed-charges ac', 'Allowed precursor ion charges. Default=1+,2+,3+' ) do |ac|
|
106
|
-
search_tool.options.allowed_charges = ac
|
107
|
-
end
|
108
|
-
|
109
|
-
search_tool.options.email=""
|
110
|
-
search_tool.option_parser.on('--email em', 'User email.') do |em|
|
111
|
-
search_tool.options.email = em
|
112
|
-
end
|
113
|
-
|
114
|
-
search_tool.options.username=""
|
115
|
-
search_tool.option_parser.on('--username un', 'Username.') do |un|
|
116
|
-
search_tool.options.username = un
|
117
|
-
end
|
118
|
-
|
119
|
-
search_tool.options.mascot_server="www.matrixscience.com"
|
120
|
-
search_tool.option_parser.on( '-S', '--server url', 'The url to the cgi directory of the mascot server' ) do |url|
|
121
|
-
search_tool.options.mascot_server=url
|
122
|
-
end
|
123
|
-
|
124
|
-
search_tool.options.mascot_server=""
|
125
|
-
search_tool.option_parser.on('--username un', 'Username.') do |un|
|
126
|
-
search_tool.options.username = un
|
127
|
-
end
|
128
|
-
|
129
|
-
search_tool.options.httpproxy=nil
|
130
|
-
search_tool.option_parser.on( '--proxy url', 'The url to a proxy server' ) do |urll|
|
131
|
-
search_tool.options.httpproxy=urll
|
132
|
-
end
|
133
|
-
|
134
|
-
search_tool.options.mascot_password=""
|
135
|
-
search_tool.option_parser.on( '--password psswd', 'Password to use when Mascot security is enabled' ) do |psswd|
|
136
|
-
search_tool.options.mascot_password=psswd
|
137
|
-
end
|
138
|
-
|
139
|
-
search_tool.options.use_security=FALSE
|
140
|
-
search_tool.option_parser.on( '--use-security', 'When Mascot security is enabled this is required' ) do
|
141
|
-
search_tool.options.use_security=TRUE
|
142
|
-
end
|
143
|
-
|
144
|
-
search_tool.options.export_format="mascotdat"
|
145
|
-
search_tool.option_parser.on( '--export format', 'Save results in a specified format. Only mascotdat is currently supported' ) do |format|
|
146
|
-
search_tool.options.export_format=format
|
147
|
-
end
|
148
|
-
|
149
|
-
search_tool.options.download_only=nil
|
150
|
-
search_tool.option_parser.on( '--download-only path', 'Specify a path to an existing results file for download eg(20131113/F227185.dat)' ) do |path|
|
151
|
-
search_tool.options.download_only=path
|
152
|
-
end
|
153
|
-
|
154
|
-
|
155
|
-
search_tool.options.timeout=200
|
156
|
-
search_tool.option_parser.on( '--timeout seconds', 'Timeout for sending data file to mascot in seconds' ) do |seconds|
|
157
|
-
search_tool.options.timeout=seconds.to_i
|
158
|
-
end
|
159
|
-
|
160
|
-
exit unless search_tool.check_options
|
145
|
+
exit unless search_tool.check_options
|
161
146
|
|
162
147
|
if ( ARGV[0].nil? && search_tool.download_only.nil?)
|
163
148
|
puts "You must supply an input file"
|
@@ -214,19 +199,20 @@ else
|
|
214
199
|
puts error_result[0]
|
215
200
|
$genv.log("Mascot search failed with response #{search_response}",:warn)
|
216
201
|
throw "Mascot search failed with response #{search_response}"
|
217
|
-
|
202
|
+
else (search_tool.export_format=="mascotdat")
|
218
203
|
# Search for the location of the mascot data file in the response
|
219
204
|
results=/master_results_?2?\.pl\?file=\.*\/data\/(.*)\/(.+\.dat)/.match(search_response)
|
220
205
|
results_date=results[1]
|
221
206
|
results_file=results[2]
|
222
207
|
|
223
208
|
download_datfile mascot_cgi, results_date, results_file,search_tool.explicit_output,openurlcookie
|
224
|
-
else
|
225
|
-
results=/master_results_?2?\.pl\?file=(\.*\/data\/.*\/.+\.dat)/.match(search_response)
|
226
|
-
results_file = results[1]
|
227
|
-
export_results mascot_cgi,cookie,results_file,search_tool.export_format, openurlcookie
|
228
|
-
# export_results mascot_cgi,cookie,results_file,search_tool.export_format
|
229
209
|
end
|
210
|
+
# else
|
211
|
+
# results=/master_results_?2?\.pl\?file=(\.*\/data\/.*\/.+\.dat)/.match(search_response)
|
212
|
+
# results_file = results[1]
|
213
|
+
# export_results mascot_cgi,cookie,results_file,search_tool.export_format, openurlcookie
|
214
|
+
# # export_results mascot_cgi,cookie,results_file,search_tool.export_format
|
215
|
+
# end
|
230
216
|
end
|
231
217
|
|
232
218
|
|