protk 1.2.1 → 1.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/annotate_ids.rb +2 -2
- data/bin/feature_finder.rb +8 -2
- data/bin/file_convert.rb +8 -2
- data/bin/gffmerge.rb +15 -20
- data/bin/interprophet.rb +7 -3
- data/bin/make_decoy.rb +9 -2
- data/bin/mascot2xml.rb +87 -0
- data/bin/mascot_search.rb +126 -187
- data/bin/mascot_to_pepxml.rb +32 -3
- data/bin/msgfplus_search.rb +58 -12
- data/bin/omssa_search.rb +13 -2
- data/bin/peptide_prophet.rb +8 -2
- data/bin/pepxml_to_table.rb +8 -2
- data/bin/protein_prophet.rb +8 -2
- data/bin/protxml_to_table.rb +82 -0
- data/bin/repair_run_summary.rb +7 -1
- data/bin/sixframe.rb +48 -2
- data/bin/tandem_search.rb +11 -2
- data/bin/toppas_pipeline.rb +8 -2
- data/bin/uniprot_annotation.rb +8 -2
- data/bin/uniprot_mapper.rb +8 -2
- data/bin/xls_to_table.rb +8 -2
- data/lib/protk/constants.rb +2 -0
- data/lib/protk/data/pepxml_mascot_template.xml +29 -0
- data/lib/protk/mascot_util.rb +5 -0
- data/lib/protk/prophet_tool.rb +1 -3
- data/lib/protk/search_tool.rb +75 -86
- data/lib/protk/setup_rakefile.rake +12 -5
- data/lib/protk/tool.rb +26 -12
- metadata +23 -9
- data/bin/big_search.rb +0 -41
- data/bin/template_search.rb +0 -144
- data/lib/convert_util.rb +0 -27
- data/lib/pepxml.rb +0 -22
- data/lib/protk/big_search_rakefile.rake +0 -16
- data/lib/protk/big_search_tool.rb +0 -23
data/bin/annotate_ids.rb
CHANGED
@@ -15,7 +15,7 @@ require 'protk/protein_annotator'
|
|
15
15
|
|
16
16
|
# Setup specific command-line options for this tool. Other options are inherited from Tool
|
17
17
|
#
|
18
|
-
id_tool=ProphetTool.new(
|
18
|
+
id_tool=ProphetTool.new([:explicit_output,:over_write])
|
19
19
|
id_tool.option_parser.banner = "Run ID annotation on a prot.xml input file.\n\nUsage: annotate_ids.rb [options] file1.prot.xml"
|
20
20
|
id_tool.options.output_prefix="annotated_"
|
21
21
|
|
@@ -25,7 +25,7 @@ id_tool.option_parser.on( '-I', '--input-format format', 'Format of input file'
|
|
25
25
|
id_tool.options.input_format = format
|
26
26
|
end
|
27
27
|
|
28
|
-
id_tool.
|
28
|
+
exit unless id_tool.check_options
|
29
29
|
|
30
30
|
# Obtain a global environment object
|
31
31
|
genv=Constants.new
|
data/bin/feature_finder.rb
CHANGED
@@ -13,7 +13,7 @@ require 'libxml'
|
|
13
13
|
|
14
14
|
include LibXML
|
15
15
|
|
16
|
-
tool=Tool.new(
|
16
|
+
tool=Tool.new([:explicit_output, :background,:over_write,:prefix_suffix])
|
17
17
|
tool.option_parser.banner = "Find molecular features on a set of input files.\n\nUsage: feature_finder.rb [options] file1.mzML file2.mzML ..."
|
18
18
|
|
19
19
|
tool.options.intensity_type = "ref"
|
@@ -27,7 +27,13 @@ tool.option_parser.on( '--intensity-threshold thresh',"discard features below th
|
|
27
27
|
end
|
28
28
|
|
29
29
|
|
30
|
-
tool.
|
30
|
+
exit unless tool.check_options
|
31
|
+
|
32
|
+
if ( ARGV[0].nil? )
|
33
|
+
puts "You must supply an input file"
|
34
|
+
puts tool.option_parser
|
35
|
+
exit
|
36
|
+
end
|
31
37
|
|
32
38
|
# Obtain a global environment object
|
33
39
|
genv=Constants.new
|
data/bin/file_convert.rb
CHANGED
@@ -57,7 +57,7 @@ end
|
|
57
57
|
|
58
58
|
# Setup specific command-line options for this tool. Other options are inherited from Tool
|
59
59
|
#
|
60
|
-
convert_tool=Tool.new(
|
60
|
+
convert_tool=Tool.new([:explicit_output,:over_write,:maldi])
|
61
61
|
convert_tool.option_parser.banner = "Convert files between different formats.\n\nUsage: file_convert.rb [options] input_file output_file"
|
62
62
|
|
63
63
|
# Special case (usually tool specific options use capitals). Use lowercase l here to mimick maldi option in the search_tool class
|
@@ -80,7 +80,13 @@ end
|
|
80
80
|
|
81
81
|
|
82
82
|
|
83
|
-
convert_tool.
|
83
|
+
exit unless convert_tool.check_options
|
84
|
+
|
85
|
+
if ( ARGV[0].nil? )
|
86
|
+
puts "You must supply an input file"
|
87
|
+
puts convert_tool.option_parser
|
88
|
+
exit
|
89
|
+
end
|
84
90
|
|
85
91
|
|
86
92
|
|
data/bin/gffmerge.rb
CHANGED
@@ -14,7 +14,7 @@ require 'bio'
|
|
14
14
|
|
15
15
|
include LibXML
|
16
16
|
|
17
|
-
tool=Tool.new(:explicit_output
|
17
|
+
tool=Tool.new([:explicit_output])
|
18
18
|
tool.option_parser.banner = "Create a gff containing peptide observations.\n\nUsage: gffmerge.rb "
|
19
19
|
|
20
20
|
|
@@ -43,21 +43,7 @@ tool.option_parser.on('--threshold prob','Peptide Probability Threshold (Default
|
|
43
43
|
tool.options.peptide_probability_threshold=thresh.to_f
|
44
44
|
end
|
45
45
|
|
46
|
-
|
47
|
-
begin
|
48
|
-
tool.option_parser.parse!
|
49
|
-
mandatory = [:protxml,:sixframe]
|
50
|
-
missing = mandatory.select{ |param| tool.send(param).nil? }
|
51
|
-
if not missing.empty?
|
52
|
-
puts "Missing options: #{missing.join(', ')}"
|
53
|
-
puts tool.option_parser
|
54
|
-
exit
|
55
|
-
end
|
56
|
-
rescue OptionParser::InvalidOption, OptionParser::MissingArgument
|
57
|
-
puts $!.to_s
|
58
|
-
puts tool.option_parser
|
59
|
-
exit
|
60
|
-
end
|
46
|
+
exit unless tool.check_options [:protxml,:sixframe]
|
61
47
|
|
62
48
|
gff_out_file="merged.gff"
|
63
49
|
if ( tool.explicit_output != nil)
|
@@ -109,6 +95,9 @@ protein_count = 0
|
|
109
95
|
total_peptides = 0
|
110
96
|
for prot in proteins
|
111
97
|
prot_prob = prot['probability']
|
98
|
+
if ( prot_prob.to_f < tool.peptide_probability_threshold )
|
99
|
+
next
|
100
|
+
end
|
112
101
|
indis_proteins = prot.find('protxml:indistinguishable_protein','protxml:http://regis-web.systemsbiology.net/protXML')
|
113
102
|
prot_names = [prot['protein_name']]
|
114
103
|
for protein in indis_proteins
|
@@ -121,10 +110,10 @@ for prot in proteins
|
|
121
110
|
protein_count += 1
|
122
111
|
prot_qualifiers = {"source" => "OBSERVATION", "score" => prot_prob, "ID" => 'pr' + protein_count.to_s}
|
123
112
|
begin
|
124
|
-
|
113
|
+
puts "Looking up #{protein_name}"
|
125
114
|
orf = orf_lookup.get_by_id protein_name
|
126
115
|
if ( orf == nil)
|
127
|
-
|
116
|
+
puts "Failed lookup for #{protein_name}"
|
128
117
|
raise KeyError
|
129
118
|
end
|
130
119
|
|
@@ -132,7 +121,7 @@ for prot in proteins
|
|
132
121
|
position = orf.identifiers.description.split('|').collect { |pos| pos.to_i }
|
133
122
|
|
134
123
|
if ( position.length != 2 )
|
135
|
-
|
124
|
+
puts "Badly formatted entry #{orf}"
|
136
125
|
raise EncodingError
|
137
126
|
end
|
138
127
|
orf_name = orf.entry_id.scan(/lcl\|(.*)/)[0][0]
|
@@ -151,6 +140,9 @@ for prot in proteins
|
|
151
140
|
prot_seq = orf.aaseq.to_s
|
152
141
|
throw "Not amino_acids" if prot_seq != orf.seq.to_s
|
153
142
|
|
143
|
+
if ( strand=='-' )
|
144
|
+
prot_seq.reverse!
|
145
|
+
end
|
154
146
|
|
155
147
|
for peptide in peptides
|
156
148
|
pprob = peptide['nsp_adjusted_probability'].to_f
|
@@ -158,6 +150,9 @@ for prot in proteins
|
|
158
150
|
total_peptides += 1
|
159
151
|
pep_seq = peptide['peptide_sequence']
|
160
152
|
|
153
|
+
if ( strand=='-')
|
154
|
+
pep_seq.reverse!
|
155
|
+
end
|
161
156
|
|
162
157
|
start_indexes = [0]
|
163
158
|
prot_seq.scan /#{pep_seq}/ do |match|
|
@@ -178,7 +173,7 @@ for prot in proteins
|
|
178
173
|
strand=strand,frame=nil,attributes=pep_attributes)
|
179
174
|
fragment_gff_line = Bio::GFF::GFF3::Record.new(seqid = scaffold_name,source="OBSERVATION",
|
180
175
|
feature_type="fragment",start_position=pep_genomic_start,end_position=pep_genomic_end,score='',
|
181
|
-
strand=strand,frame=nil,attributes=[["Parent",pep_id],["ID",
|
176
|
+
strand=strand,frame=nil,attributes=[["Parent",pep_id],["ID",peptide['peptide_sequence']]])
|
182
177
|
gff_db.records += [pep_gff_line,fragment_gff_line]
|
183
178
|
|
184
179
|
end
|
data/bin/interprophet.rb
CHANGED
@@ -13,7 +13,7 @@ require 'protk/prophet_tool'
|
|
13
13
|
|
14
14
|
# Setup specific command-line options for this tool. Other options are inherited from ProphetTool
|
15
15
|
#
|
16
|
-
prophet_tool=ProphetTool.new(
|
16
|
+
prophet_tool=ProphetTool.new([:explicit_output])
|
17
17
|
prophet_tool.option_parser.banner = "Run InterProphet on a set of pep.xml input files.\n\nUsage: interprophet.rb [options] file1.pep.xml file2.pep.xml ..."
|
18
18
|
prophet_tool.options.output_suffix="_iproph"
|
19
19
|
|
@@ -48,8 +48,12 @@ prophet_tool.option_parser.on("--minprob mp","Minimum probability cutoff ") do |
|
|
48
48
|
prophet_tool.options.min_prob=mp
|
49
49
|
end
|
50
50
|
|
51
|
-
prophet_tool.
|
52
|
-
|
51
|
+
exit unless prophet_tool.check_options
|
52
|
+
if ( ARGV[0].nil? )
|
53
|
+
puts "You must supply an input file"
|
54
|
+
puts prophet_tool.option_parser
|
55
|
+
exit
|
56
|
+
end
|
53
57
|
|
54
58
|
# Obtain a global environment object
|
55
59
|
genv=Constants.new
|
data/bin/make_decoy.rb
CHANGED
@@ -17,7 +17,7 @@ include LibXML
|
|
17
17
|
|
18
18
|
# Setup specific command-line options for this tool. Other options are inherited from ProphetTool
|
19
19
|
#
|
20
|
-
tool=Tool.new(
|
20
|
+
tool=Tool.new([:explicit_output])
|
21
21
|
tool.option_parser.banner = "Create a decoy database from real protein sequences.\n\nUsage: make_decoy.rb [options] realdb.fasta"
|
22
22
|
|
23
23
|
tool.options.db_length=0
|
@@ -36,7 +36,14 @@ tool.option_parser.on('-A','--append','Append input sequences to the generated d
|
|
36
36
|
end
|
37
37
|
|
38
38
|
|
39
|
-
tool.
|
39
|
+
exit unless tool.check_options
|
40
|
+
|
41
|
+
if ( ARGV[0].nil? )
|
42
|
+
puts "You must supply an input file"
|
43
|
+
puts tool.option_parser
|
44
|
+
exit
|
45
|
+
end
|
46
|
+
|
40
47
|
|
41
48
|
input_file=ARGV[0]
|
42
49
|
|
data/bin/mascot2xml.rb
ADDED
@@ -0,0 +1,87 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# This file is part of protk
|
4
|
+
# Created by Ira Cooke 12/4/2010
|
5
|
+
#
|
6
|
+
# Convert mascot dat files to pepxml without using TPP Mascot2XML
|
7
|
+
#
|
8
|
+
|
9
|
+
|
10
|
+
require 'protk/constants'
|
11
|
+
require 'protk/search_tool'
|
12
|
+
require 'mascot/dat'
|
13
|
+
require 'libxml'
|
14
|
+
|
15
|
+
include LibXML
|
16
|
+
|
17
|
+
|
18
|
+
|
19
|
+
|
20
|
+
# Environment with global constants
|
21
|
+
#
|
22
|
+
genv=Constants.new
|
23
|
+
|
24
|
+
tool=SearchTool.new([:database,:explicit_output,:over_write])
|
25
|
+
tool.option_parser.banner = "Convert mascot dat files to pep.xml files.\n\nUsage: mascot2xml.rb [options] file1.dat file2.dat ... "
|
26
|
+
tool.option_parser.parse!
|
27
|
+
|
28
|
+
throw "Only one file at a time is allowed when using explicit output" if (tool.explicit_output && ARGV.length > 1)
|
29
|
+
|
30
|
+
def update_msms_pipeline_analysis(pepxml_doc,mascot_dat,outname)
|
31
|
+
pipeline_analysis_node=pepxml_doc.find("//#{$pepxml_ns_prefix}msms_pipeline_analysis", $pepxml_ns)[0]
|
32
|
+
pipeline_analysis_node.attributes['date']=Time.at(mascot_dat.header.date.to_i).to_s
|
33
|
+
pipeline_analysis_node.attributes['summary_xml']=outname
|
34
|
+
end
|
35
|
+
|
36
|
+
def update_enzyme(pepxml_doc,mascot_dat)
|
37
|
+
dat_enzyme=mascot_dat.enzyme
|
38
|
+
enzyme_node=pepxml_doc.find("//#{$pepxml_ns_prefix}sample_enzyme", $pepxml_ns)[0]
|
39
|
+
enzyme_node.attributes['name']=dat_enzyme.title
|
40
|
+
specificity=enzyme_node.find("./#{$pepxml_ns_prefix}specificity",$pepxml_ns)[0]
|
41
|
+
# p dat_enzyme.cleavages
|
42
|
+
# TODO: What does the spec say about multiple cut sites
|
43
|
+
# specificity.attributes['cut']=
|
44
|
+
end
|
45
|
+
|
46
|
+
|
47
|
+
|
48
|
+
ARGV.each do |file_name|
|
49
|
+
name=file_name.chomp
|
50
|
+
|
51
|
+
dat = Mascot::DAT.open(name)
|
52
|
+
pepxml_template_parser=XML::Parser.file("#{genv.data_lib_dir}/pepxml_mascot_template.xml")
|
53
|
+
pepxml_doc = pepxml_template_parser.parse
|
54
|
+
|
55
|
+
outname = nil
|
56
|
+
if ( tool.explicit_output !=nil)
|
57
|
+
outname = tool.explicit_output
|
58
|
+
else
|
59
|
+
basename = Pathname.new(name).basename
|
60
|
+
outname = "#{basename}.pepXML"
|
61
|
+
end
|
62
|
+
|
63
|
+
$pepxml_ns_prefix="xmlns:"
|
64
|
+
$pepxml_ns="xmlns:http://regis-web.systemsbiology.net/pepXML"
|
65
|
+
if not pepxml_doc.root.namespaces.default
|
66
|
+
$pepxml_ns_prefix=""
|
67
|
+
$pepxml_ns=nil
|
68
|
+
end
|
69
|
+
|
70
|
+
update_msms_pipeline_analysis(pepxml_doc,dat,outname)
|
71
|
+
update_enzyme(pepxml_doc,dat)
|
72
|
+
|
73
|
+
spectrum_queries={}
|
74
|
+
|
75
|
+
dat.peptides.each do |psm|
|
76
|
+
# psm.score
|
77
|
+
qnum= psm.query
|
78
|
+
if qnum
|
79
|
+
qid=qnum.to_s
|
80
|
+
spectrum_queries[qid]=[] if ( spectrum_queries[qid]==nil)
|
81
|
+
spectrum_queries[qid] << psm
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
spectrum_queries.each_pair { |name, val| p "#{name} #{val[0].pep}" }
|
86
|
+
|
87
|
+
end
|
data/bin/mascot_search.rb
CHANGED
@@ -15,250 +15,189 @@ require 'rest_client'
|
|
15
15
|
|
16
16
|
def login(mascot_cgi,username,password)
|
17
17
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
18
|
+
authdict={}
|
19
|
+
authdict[:username]=username
|
20
|
+
authdict[:password]=password
|
21
|
+
authdict[:action]="login"
|
22
|
+
authdict[:savecookie]="1"
|
23
|
+
|
24
|
+
p "Logging in to #{mascot_cgi}/login.pl"
|
25
|
+
p authdict
|
26
|
+
response = RestClient.post "#{mascot_cgi}/login.pl", authdict
|
27
|
+
|
28
|
+
cookie = response.cookies
|
29
|
+
cookie
|
30
30
|
end
|
31
31
|
|
32
|
-
def
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
export_dict[:report]=0
|
48
|
-
export_dict[:show_queries]=1
|
49
|
-
export_dict[:pep_exp_mz]=1
|
50
|
-
export_dict[:pep_exp_z]=1
|
51
|
-
export_dict[:query_master]=0
|
52
|
-
export_dict[:pep_scan_title]=1
|
53
|
-
export_dict[:query_qualifiers]=1
|
54
|
-
export_dict[:_showsubsets]=1
|
55
|
-
export_dict[:_sigthreshold]=0.99
|
56
|
-
export_dict[:pep_isunique]=1
|
57
|
-
export_dict[:show_header]=1
|
58
|
-
export_dict[:pep_ident]=1
|
59
|
-
export_dict[:query_peaks]=1
|
60
|
-
export_dict[:pep_seq]=1
|
61
|
-
export_dict[:query_raw]=1
|
62
|
-
export_dict[:pep_score]=1
|
63
|
-
export_dict[:show_same_sets]=1
|
64
|
-
export_dict[:do_export]=1
|
65
|
-
export_dict[:peptide_master]=1
|
66
|
-
export_dict[:prot_score]=1
|
67
|
-
export_dict[:prot_acc]=1
|
68
|
-
export_dict[:show_params]=1
|
69
|
-
export_dict[:pep_homol]=1
|
70
|
-
export_dict[:show_mods]=1
|
71
|
-
|
72
|
-
# RestClient.add_before_execution_proc do |req, params|
|
73
|
-
# require 'debugger'; debugger
|
74
|
-
# p req
|
75
|
-
# p params
|
76
|
-
# end
|
77
|
-
|
78
|
-
|
79
|
-
export_url="#{mascot_cgi}/export_dat_2.pl"
|
80
|
-
|
81
|
-
begin
|
82
|
-
RestClient.post(export_url , export_dict , {:cookies=>session_cookie}){ |response, request, result, &block|
|
83
|
-
# require 'debugger'; debugger
|
84
|
-
if ( response.code==303)
|
85
|
-
sleep(5)
|
86
|
-
end
|
87
|
-
response.return!(request, result, &block)
|
88
|
-
}
|
89
|
-
# response = RestClient.post export_url , export_dict , {:cookies=>session_cookie}
|
90
|
-
rescue
|
91
|
-
p "Ignoring exception"
|
92
|
-
# require 'debugger'; debugger
|
93
|
-
end
|
94
|
-
|
95
|
-
begin
|
96
|
-
p response.to_s[0,1000]
|
97
|
-
rescue
|
98
|
-
end
|
99
|
-
|
100
|
-
fout = File.new("results.xml", "w+")
|
101
|
-
|
102
|
-
fout.write response
|
32
|
+
def download_datfile(mascot_cgi,results_date,results_file,explicit_output,openurlcookie)
|
33
|
+
mascot_xcgi = "#{mascot_cgi.chomp('cgi')}x-cgi"
|
34
|
+
get_url= "#{mascot_xcgi}/ms-status.exe?Autorefresh=false&Show=RESULTFILE&DateDir=#{results_date}&ResJob=#{results_file}"
|
35
|
+
$genv.log("Getting results file at #{get_url}",:info)
|
36
|
+
|
37
|
+
if ( explicit_output!=nil)
|
38
|
+
output_path=explicit_output
|
39
|
+
else
|
40
|
+
output_path="#{results_file}"
|
41
|
+
end
|
42
|
+
|
43
|
+
require 'open-uri'
|
44
|
+
open("#{output_path}", 'wb') do |file|
|
45
|
+
file << open("#{get_url}","Cookie"=>openurlcookie).read
|
46
|
+
end
|
103
47
|
end
|
104
48
|
|
49
|
+
|
105
50
|
def search_params_dictionary(search_tool,input_file)
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
51
|
+
var_mods = search_tool.var_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject {|e| e.empty? }.join(",")
|
52
|
+
fix_mods = search_tool.fix_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject { |e| e.empty? }.join(",")
|
53
|
+
|
54
|
+
# None is given by a an empty galaxy multi-select list and we need to turn it into an empty string
|
55
|
+
#
|
56
|
+
var_mods="" if var_mods=="None"
|
57
|
+
fix_mods="" if fix_mods=="None"
|
58
|
+
|
59
|
+
postdict={}
|
60
|
+
postdict[:SEARCH]="MIS"
|
61
|
+
postdict[:CHARGE]=search_tool.allowed_charges
|
62
|
+
postdict[:CLE]=search_tool.enzyme
|
63
|
+
postdict[:PFA]=search_tool.missed_cleavages
|
64
|
+
postdict[:COM]="Protk"
|
65
|
+
postdict[:DB]=search_tool.database
|
66
|
+
postdict[:INSTRUMENT]=search_tool.instrument
|
67
|
+
postdict[:IT_MODS]=var_mods
|
68
|
+
postdict[:ITOL]=search_tool.fragment_tol
|
69
|
+
postdict[:ITOLU]=search_tool.fragment_tolu
|
70
|
+
postdict[:MASS]=search_tool.precursor_search_type
|
71
|
+
postdict[:MODS]=fix_mods
|
72
|
+
postdict[:REPORT]="AUTO"
|
73
|
+
postdict[:TAXONOMY]="All entries"
|
74
|
+
postdict[:TOL]=search_tool.precursor_tol
|
75
|
+
postdict[:TOLU]=search_tool.precursor_tolu
|
76
|
+
postdict[:USEREMAIL]=search_tool.email
|
77
|
+
postdict[:USERNAME]=search_tool.username
|
78
|
+
postdict[:FILE]=File.new(input_file)
|
79
|
+
postdict[:FORMVER]='1.01'
|
80
|
+
postdict[:INTERMEDIATE]=''
|
81
|
+
|
82
|
+
postdict
|
138
83
|
end
|
139
84
|
|
140
85
|
# Environment with global constants
|
141
86
|
#
|
142
|
-
genv=Constants.new
|
87
|
+
$genv=Constants.new
|
143
88
|
|
144
89
|
# Setup specific command-line options for this tool. Other options are inherited from SearchTool
|
145
90
|
#
|
146
|
-
search_tool=SearchTool.new(
|
91
|
+
search_tool=SearchTool.new([:explicit_output,:over_write,:database,:enzyme,
|
92
|
+
:modifications,:instrument,:mass_tolerance,
|
93
|
+
:mass_tolerance_units,:precursor_search_type,:missed_cleavages])
|
147
94
|
|
148
95
|
search_tool.jobid_prefix="o"
|
149
96
|
|
150
|
-
search_tool.option_parser.banner = "Run a Mascot msms search on a set of mgf input files.\n\nUsage: mascot_search.rb [options]
|
97
|
+
search_tool.option_parser.banner = "Run a Mascot msms search on a set of mgf input files.\n\nUsage: mascot_search.rb [options] msmsfile.mgf"
|
151
98
|
search_tool.options.output_suffix="_mascot"
|
152
99
|
|
153
|
-
search_tool.options.mascot_server="#{genv.default_mascot_server}/mascot/cgi"
|
100
|
+
search_tool.options.mascot_server="#{$genv.default_mascot_server}/mascot/cgi"
|
101
|
+
|
102
|
+
search_tool.options.allowed_charges="1+,2+,3+"
|
103
|
+
search_tool.option_parser.on( '--allowed-charges ac', 'Allowed precursor ion charges. Default=1+,2+,3+' ) do |ac|
|
104
|
+
search_tool.options.allowed_charges = ac
|
105
|
+
end
|
106
|
+
|
107
|
+
search_tool.options.email=""
|
108
|
+
search_tool.option_parser.on('--email em', 'User email.') do |em|
|
109
|
+
search_tool.options.email = em
|
110
|
+
end
|
111
|
+
|
112
|
+
search_tool.options.username=""
|
113
|
+
search_tool.option_parser.on('--username un', 'Username.') do |un|
|
114
|
+
search_tool.options.username = un
|
115
|
+
end
|
116
|
+
|
117
|
+
search_tool.options.mascot_server="www.matrixscience.com"
|
118
|
+
search_tool.option_parser.on( '-S', '--server url', 'The url to the cgi directory of the mascot server' ) do |url|
|
119
|
+
search_tool.options.mascot_server=url
|
120
|
+
end
|
121
|
+
|
122
|
+
search_tool.options.mascot_server=""
|
123
|
+
search_tool.option_parser.on('--username un', 'Username.') do |un|
|
124
|
+
search_tool.options.username = un
|
125
|
+
end
|
154
126
|
|
155
127
|
search_tool.options.httpproxy=""
|
156
128
|
search_tool.option_parser.on( '--proxy url', 'The url to a proxy server' ) do |urll|
|
157
|
-
|
129
|
+
search_tool.options.httpproxy=urll
|
158
130
|
end
|
159
131
|
|
160
132
|
search_tool.options.mascot_password=""
|
161
133
|
search_tool.option_parser.on( '--password psswd', 'Password to use when Mascot security is enabled' ) do |psswd|
|
162
|
-
|
134
|
+
search_tool.options.mascot_password=psswd
|
163
135
|
end
|
164
136
|
|
165
137
|
search_tool.options.use_security=FALSE
|
166
138
|
search_tool.option_parser.on( '--use-security', 'When Mascot security is enabled this is required' ) do
|
167
|
-
|
139
|
+
search_tool.options.use_security=TRUE
|
168
140
|
end
|
169
141
|
|
170
|
-
search_tool.
|
142
|
+
search_tool.options.export_format="mascotdat"
|
143
|
+
search_tool.option_parser.on( '--export format', 'Save results in a specified format. Only mascotdat is currently supported' ) do |format|
|
144
|
+
search_tool.options.export_format=format
|
145
|
+
end
|
171
146
|
|
147
|
+
exit unless search_tool.check_options
|
148
|
+
|
149
|
+
if ( ARGV[0].nil? )
|
150
|
+
puts "You must supply an input file"
|
151
|
+
puts search_tool.option_parser
|
152
|
+
exit
|
153
|
+
end
|
172
154
|
|
173
155
|
fragment_tol = search_tool.fragment_tol
|
174
156
|
precursor_tol = search_tool.precursor_tol
|
175
157
|
|
176
|
-
|
177
|
-
|
178
158
|
mascot_cgi=search_tool.mascot_server.chomp('/')
|
179
159
|
|
180
160
|
unless ( mascot_cgi =~ /^http[s]?:\/\//)
|
181
|
-
|
161
|
+
mascot_cgi = "http://#{mascot_cgi}"
|
182
162
|
end
|
183
163
|
|
184
|
-
mascot_xcgi = "#{mascot_cgi.chomp('cgi')}x-cgi"
|
185
|
-
|
186
|
-
#
|
187
164
|
RestClient.proxy=search_tool.httpproxy
|
188
165
|
|
189
|
-
genv.log("Var mods #{search_tool.var_mods} and fixed #{search_tool.fix_mods}",:info)
|
190
|
-
|
191
|
-
|
166
|
+
$genv.log("Var mods #{search_tool.var_mods} and fixed #{search_tool.fix_mods}",:info)
|
192
167
|
|
193
168
|
cookie=""
|
194
169
|
openurlcookie=""
|
195
170
|
|
196
171
|
if ( search_tool.use_security)
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
# authdict={}
|
202
|
-
# authdict[:username]=search_tool.username
|
203
|
-
# authdict[:password]=search_tool.mascot_password
|
204
|
-
# authdict[:action]="login"
|
205
|
-
# authdict[:savecookie]="1"
|
206
|
-
|
207
|
-
# response = RestClient.post "#{mascot_cgi}/login.pl", authdict
|
208
|
-
|
209
|
-
cookie = login(mascot_cgi,search_tool.username,search_tool.mascot_password)
|
210
|
-
|
211
|
-
#cookie = response.cookies
|
212
|
-
openurlcookie = "MASCOT_SESSION=#{cookie['MASCOT_SESSION']}; MASCOT_USERID=#{cookie['MASCOT_USERID']}; MASCOT_USERNAME=#{cookie['MASCOT_USERNAME']}"
|
172
|
+
$genv.log("Logging in",:info)
|
173
|
+
cookie = login(mascot_cgi,search_tool.username,search_tool.mascot_password)
|
174
|
+
openurlcookie = "MASCOT_SESSION=#{cookie['MASCOT_SESSION']}; MASCOT_USERID=#{cookie['MASCOT_USERID']}; MASCOT_USERNAME=#{cookie['MASCOT_USERNAME']}"
|
213
175
|
end
|
214
176
|
|
215
177
|
postdict = search_params_dictionary search_tool, ARGV[0]
|
216
|
-
|
217
|
-
|
218
|
-
genv.log("Sending #{postdict}",:info)
|
219
|
-
|
178
|
+
$genv.log("Sending #{postdict}",:info)
|
220
179
|
|
221
180
|
search_response=RestClient.post "#{mascot_cgi}/nph-mascot.exe?1", postdict, {:cookies=>cookie}
|
222
181
|
|
223
|
-
genv.log("Mascot search response was #{search_response}",:info)
|
182
|
+
$genv.log("Mascot search response was #{search_response}",:info)
|
224
183
|
|
225
184
|
# Look for an error if there is one
|
226
185
|
error_result= /Sorry, your search could not be performed(.*)/.match(search_response)
|
227
186
|
if ( error_result != nil )
|
228
|
-
|
229
|
-
|
230
|
-
|
187
|
+
puts error_result[0]
|
188
|
+
$genv.log("Mascot search failed with response #{search_response}",:warn)
|
189
|
+
throw "Mascot search failed with response #{search_response}"
|
190
|
+
elsif (search_tool.export_format=="mascotdat")
|
191
|
+
# Search for the location of the mascot data file in the response
|
192
|
+
results=/master_results_?2?\.pl\?file=\.*\/data\/(.*)\/(.+\.dat)/.match(search_response)
|
193
|
+
results_date=results[1]
|
194
|
+
results_file=results[2]
|
195
|
+
|
196
|
+
download_datfile mascot_cgi, results_date, results_file,search_tool.explicit_output,openurlcookie
|
231
197
|
else
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
results_file=results[2]
|
237
|
-
|
238
|
-
# results=/master_results_?2?\.pl\?file=(\.*\/data\/.*\/.+\.dat)/.match(search_response)
|
239
|
-
# results_file = results[1]
|
240
|
-
# export_results mascot_cgi,cookie,results_file,"XML"
|
241
|
-
|
242
|
-
|
243
|
-
get_url= "#{mascot_xcgi}/ms-status.exe?Autorefresh=false&Show=RESULTFILE&DateDir=#{results_date}&ResJob=#{results_file}"
|
244
|
-
|
245
|
-
genv.log("Getting results file at #{get_url}",:info)
|
246
|
-
|
247
|
-
if ( search_tool.explicit_output!=nil)
|
248
|
-
output_path=search_tool.explicit_output
|
249
|
-
else
|
250
|
-
output_path="#{results_file}"
|
251
|
-
end
|
252
|
-
|
253
|
-
# Download the results
|
254
|
-
#
|
255
|
-
require 'open-uri'
|
256
|
-
open("#{output_path}", 'wb') do |file|
|
257
|
-
file << open("#{get_url}","Cookie"=>openurlcookie).read
|
258
|
-
end
|
259
|
-
|
260
|
-
|
261
|
-
#open("F1.dat", 'wb') do |file| file << open("#{get_url}","Cookie" => cookie).read end
|
262
|
-
|
198
|
+
results=/master_results_?2?\.pl\?file=(\.*\/data\/.*\/.+\.dat)/.match(search_response)
|
199
|
+
results_file = results[1]
|
200
|
+
export_results mascot_cgi,cookie,results_file,search_tool.export_format, openurlcookie
|
201
|
+
# export_results mascot_cgi,cookie,results_file,search_tool.export_format
|
263
202
|
end
|
264
203
|
|