ms-sequest 0.0.2 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/History +15 -2
- data/MIT-LICENSE +3 -2
- data/bin/srf_to_search.rb +33 -0
- data/bin/srf_to_sqt.rb +39 -0
- data/lib/ms/sequest.rb +1 -1
- data/lib/ms/sequest/sqt.rb +33 -5
- data/lib/ms/sequest/srf.rb +9 -7
- data/lib/ms/sequest/srf/search.rb +135 -0
- data/lib/ms/sequest/srf/sqt.rb +24 -2
- metadata +27 -5
data/History
CHANGED
@@ -1,8 +1,21 @@
|
|
1
|
-
== 0.0.1 / 2009-05-11
|
2
1
|
|
3
|
-
|
2
|
+
== 0.0.4 / 2009-06-18
|
3
|
+
|
4
|
+
* srf_to_sqt.rb and srf_to_search.rb both working now
|
5
|
+
|
6
|
+
== 0.0.3 / 2009-06-16
|
7
|
+
|
8
|
+
* only dependent on very simple ms/fasta interface, no more on digest info, etc.
|
4
9
|
|
5
10
|
== 0.0.2 / 2009-05-14
|
6
11
|
|
7
12
|
* Basic SRF to SQT translation working
|
8
13
|
* SQT reading working
|
14
|
+
|
15
|
+
== 0.0.1 / 2009-05-11
|
16
|
+
|
17
|
+
* pulled out of mspire core
|
18
|
+
|
19
|
+
|
20
|
+
|
21
|
+
|
data/MIT-LICENSE
CHANGED
@@ -1,5 +1,6 @@
|
|
1
|
-
Copyright
|
2
|
-
|
1
|
+
Copyright shared among contributing institutions:
|
2
|
+
Copyright (c) 2006-2008 University of Texas at Austin (the initial project)
|
3
|
+
Copyright (c) 2009 Regents of the University of Colorado and Howard Hughes Medical Institute. (modularization of the project)
|
3
4
|
|
4
5
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
5
6
|
of this software and associated documentation files (the "Software"), to deal
|
@@ -0,0 +1,33 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'tap/task'
|
5
|
+
require 'ms/sequest/srf/search'
|
6
|
+
|
7
|
+
if ARGV.size == 0
|
8
|
+
ARGV << "--help"
|
9
|
+
end
|
10
|
+
|
11
|
+
task_class = Ms::Sequest::Srf::SrfToSearch
|
12
|
+
|
13
|
+
parser = ConfigParser.new do |opts|
|
14
|
+
opts.separator "configurations"
|
15
|
+
opts.add task_class.configurations
|
16
|
+
|
17
|
+
opts.on "--help", "Print this help" do
|
18
|
+
puts "usage: #{File.basename(__FILE__)} <file>.srf ..."
|
19
|
+
puts
|
20
|
+
puts opts
|
21
|
+
exit(0)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
parser.parse!(ARGV)
|
26
|
+
|
27
|
+
task = task_class.new(parser.config)
|
28
|
+
|
29
|
+
ARGV.each do |file|
|
30
|
+
task.execute(file)
|
31
|
+
end
|
32
|
+
|
33
|
+
|
data/bin/srf_to_sqt.rb
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'tap/task'
|
5
|
+
require 'ms/sequest/srf/sqt'
|
6
|
+
|
7
|
+
if ARGV.size == 0
|
8
|
+
ARGV << "--help"
|
9
|
+
end
|
10
|
+
|
11
|
+
task_class = Ms::Sequest::Srf::SrfToSqt
|
12
|
+
|
13
|
+
parser = ConfigParser.new do |opts|
|
14
|
+
opts.separator "configurations"
|
15
|
+
opts.add task_class.configurations
|
16
|
+
|
17
|
+
opts.on "--help", "Print this help" do
|
18
|
+
puts "usage: #{File.basename(__FILE__)} <file>.srf ..."
|
19
|
+
puts "outputs: <file>.sqt ..."
|
20
|
+
puts
|
21
|
+
#puts task_class::desc.wrap
|
22
|
+
#puts
|
23
|
+
puts opts
|
24
|
+
#puts
|
25
|
+
#puts "in tap workflow: tap run -- glob '*.srf' --:i srf_to_sqt"
|
26
|
+
exit(0)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
parser.parse!(ARGV)
|
31
|
+
|
32
|
+
|
33
|
+
task = task_class.new(parser.config)
|
34
|
+
|
35
|
+
ARGV.each do |file|
|
36
|
+
task.execute(file)
|
37
|
+
end
|
38
|
+
|
39
|
+
|
data/lib/ms/sequest.rb
CHANGED
data/lib/ms/sequest/sqt.rb
CHANGED
@@ -1,8 +1,11 @@
|
|
1
1
|
|
2
|
-
require 'ms/fasta'
|
3
2
|
require 'arrayclass'
|
4
3
|
require 'set'
|
5
4
|
|
5
|
+
require 'ms/fasta'
|
6
|
+
require 'digest/md5'
|
7
|
+
|
8
|
+
|
6
9
|
require 'ms/id/peptide'
|
7
10
|
require 'ms/id/search'
|
8
11
|
|
@@ -62,12 +65,37 @@ module Ms
|
|
62
65
|
# boolean
|
63
66
|
attr_accessor :percolator_results
|
64
67
|
|
65
|
-
#
|
66
|
-
|
67
|
-
|
68
|
+
# returns [sequence_length, locus_count] of the fasta file
|
69
|
+
def self.db_seq_length_and_locus_count(dbfile)
|
70
|
+
total_sequence_length = 0
|
71
|
+
fastasize = 0
|
68
72
|
Ms::Fasta.open(dbfile) do |fasta|
|
69
|
-
|
73
|
+
fasta.each {|entry| total_sequence_length += entry.sequence.size }
|
74
|
+
fastasize = fasta.size
|
75
|
+
end
|
76
|
+
[total_sequence_length, fastasize]
|
77
|
+
end
|
78
|
+
|
79
|
+
#--
|
80
|
+
# this is implemented separate from sequence length because seq length
|
81
|
+
# uses Archive which doesn't preserve carriage returns and newlines.
|
82
|
+
#++
|
83
|
+
def self.db_md5sum(dbfile)
|
84
|
+
chunksize = 61440
|
85
|
+
digest = Digest::MD5.new
|
86
|
+
File.open(dbfile) do |io|
|
87
|
+
while chunk = io.read(chunksize)
|
88
|
+
digest << chunk
|
89
|
+
end
|
70
90
|
end
|
91
|
+
digest.hexdigest
|
92
|
+
end
|
93
|
+
|
94
|
+
# assumes the file exists and is readable
|
95
|
+
# returns [DBSeqLength, DBLocusCount, DBMD5Sum]
|
96
|
+
def self.db_info(dbfile)
|
97
|
+
# returns the 3 member array
|
98
|
+
self.db_seq_length_and_locus_count(dbfile) << self.db_md5sum(dbfile)
|
71
99
|
end
|
72
100
|
|
73
101
|
def protein_class
|
data/lib/ms/sequest/srf.rb
CHANGED
@@ -13,9 +13,8 @@ require 'ms/id/search'
|
|
13
13
|
require 'ms/sequest/params'
|
14
14
|
|
15
15
|
# for conversions
|
16
|
-
require 'ms/sequest/srf/
|
16
|
+
require 'ms/sequest/srf/search'
|
17
17
|
require 'ms/sequest/srf/sqt'
|
18
|
-
require 'ms/sequest/srf/dta'
|
19
18
|
|
20
19
|
module Ms ; end
|
21
20
|
module Ms::Sequest ; end
|
@@ -96,9 +95,6 @@ class Ms::Sequest::Srf
|
|
96
95
|
end
|
97
96
|
end
|
98
97
|
|
99
|
-
def round(float, decimal_places)
|
100
|
-
sprintf("%.#{decimal_places}f", float)
|
101
|
-
end
|
102
98
|
|
103
99
|
# 1. updates the out_file's list of hits based on passing peptides (but not
|
104
100
|
# the original hit id; rank is implicit in array ordering)
|
@@ -202,7 +198,6 @@ END
|
|
202
198
|
# give each hit a base_name, first_scan, last_scan
|
203
199
|
@index.each_with_index do |ind,i|
|
204
200
|
mass_measured = @dta_files[i][0]
|
205
|
-
#puts @out_files[i].join(", ")
|
206
201
|
@out_files[i][0,3] = *ind
|
207
202
|
pep_hits = @out_files[i][6]
|
208
203
|
@peps.push( *pep_hits )
|
@@ -212,6 +207,9 @@ END
|
|
212
207
|
pep_hit[11] = pep_hit[0] - mass_measured # real - measured (deltamass)
|
213
208
|
pep_hit[12] = 1.0e6 * pep_hit[11].abs / mass_measured ## ppm
|
214
209
|
pep_hit[18] = self ## link with the srf object
|
210
|
+
if pep_hit.first_scan == 5719
|
211
|
+
puts [pep_hit.sequence, pep_hit.xcorr].join(' ')
|
212
|
+
end
|
215
213
|
end
|
216
214
|
end
|
217
215
|
|
@@ -391,6 +389,7 @@ class Ms::Sequest::Srf::DTA
|
|
391
389
|
Unpack_32 = "EeIvvvv"
|
392
390
|
Unpack_35 = "Ex8eVx2vvvv"
|
393
391
|
|
392
|
+
|
394
393
|
# note on peaks (self[7])
|
395
394
|
# this is a byte array of floats, you can get the peaks out with
|
396
395
|
# unpack("e*")
|
@@ -442,6 +441,10 @@ class Ms::Sequest::Srf::DTA
|
|
442
441
|
io.print to_dta_file_data
|
443
442
|
end
|
444
443
|
|
444
|
+
def round(float, decimal_places)
|
445
|
+
sprintf("%.#{decimal_places}f", float)
|
446
|
+
end
|
447
|
+
|
445
448
|
end
|
446
449
|
|
447
450
|
|
@@ -518,7 +521,6 @@ end
|
|
518
521
|
|
519
522
|
|
520
523
|
Ms::Sequest::Srf::Out::Pep = Arrayclass.new( %w(mh deltacn_orig sp xcorr id num_other_loci rsp ions_matched ions_total sequence prots deltamass ppm aaseq base_name first_scan last_scan charge srf deltacn deltacn_orig_updated) )
|
521
|
-
|
522
524
|
# 0=mh 1=deltacn_orig 2=sp 3=xcorr 4=id 5=num_other_loci 6=rsp 7=ions_matched 8=ions_total 9=sequence 10=prots 11=deltamass 12=ppm 13=aaseq 14=base_name 15=first_scan 16=last_scan 17=charge 18=srf 19=deltacn 20=deltacn_orig_updated
|
523
525
|
|
524
526
|
class Ms::Sequest::Srf::Out::Pep
|
@@ -0,0 +1,135 @@
|
|
1
|
+
|
2
|
+
require 'ms/sequest/srf'
|
3
|
+
require 'ms/mass'
|
4
|
+
|
5
|
+
# These are for outputting formats used in MS/MS Search engines
|
6
|
+
|
7
|
+
module Ms
|
8
|
+
module Sequest
|
9
|
+
class Srf
|
10
|
+
|
11
|
+
# Writes an MGF file to given filename or base_name + '.mgf' if no
|
12
|
+
# filename given.
|
13
|
+
#
|
14
|
+
# This mimicks the output of merge.pl from mascot The only difference is
|
15
|
+
# that this does not include the "\r\n" that is found after the peak
|
16
|
+
# lists, instead, it uses "\n" throughout the file (thinking that this
|
17
|
+
# is preferable to mixing newline styles!)
|
18
|
+
def to_mgf(filename=nil)
|
19
|
+
filename =
|
20
|
+
if filename ; filename
|
21
|
+
else
|
22
|
+
base_name + '.mgf'
|
23
|
+
end
|
24
|
+
h_plus = Ms::Mass::MASCOT_H_PLUS
|
25
|
+
File.open(filename, 'wb') do |out|
|
26
|
+
dta_files.zip(index) do |dta, i_ar|
|
27
|
+
chrg = dta.charge
|
28
|
+
out.print "BEGIN IONS\n"
|
29
|
+
out.print "TITLE=#{[base_name, *i_ar].push('dta').join('.')}\n"
|
30
|
+
out.print "CHARGE=#{chrg}+\n"
|
31
|
+
out.print "PEPMASS=#{(dta.mh+((chrg-1)*h_plus))/chrg}\n"
|
32
|
+
peak_ar = dta.peaks.unpack('e*')
|
33
|
+
(0...(peak_ar.size)).step(2) do |i|
|
34
|
+
out.print( peak_ar[i,2].join(' '), "\n")
|
35
|
+
end
|
36
|
+
out.print "END IONS\n"
|
37
|
+
out.print "\n"
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
# not given an out_folder, will make one with the basename
|
43
|
+
# compress may be: :zip, :tgz, or nil (no compression)
|
44
|
+
# :zip requires gem rubyzip to be installed and is *very* bloated
|
45
|
+
# as it writes out all the files first!
|
46
|
+
# :tgz requires gem archive-tar-minitar to be installed
|
47
|
+
def to_dta_files(out_folder=nil, compress=nil)
|
48
|
+
outdir =
|
49
|
+
if out_folder ; out_folder
|
50
|
+
else base_name
|
51
|
+
end
|
52
|
+
|
53
|
+
case compress
|
54
|
+
when :tgz
|
55
|
+
begin
|
56
|
+
require 'archive/tar/minitar'
|
57
|
+
rescue LoadError
|
58
|
+
abort "need gem 'archive-tar-minitar' installed' for tgz compression!\n#{$!}"
|
59
|
+
end
|
60
|
+
require 'archive/targz' # my own simplified interface!
|
61
|
+
require 'zlib'
|
62
|
+
names = index.map do |i_ar|
|
63
|
+
[outdir, '/', [base_name, *i_ar].join('.'), '.dta'].join('')
|
64
|
+
end
|
65
|
+
#Archive::Targz.archive_as_files(outdir + '.tgz', names, dta_file_data)
|
66
|
+
|
67
|
+
tgz = Zlib::GzipWriter.new(File.open(outdir + '.tgz', 'wb'))
|
68
|
+
|
69
|
+
Archive::Tar::Minitar::Output.open(tgz) do |outp|
|
70
|
+
dta_files.each_with_index do |dta_file, i|
|
71
|
+
Archive::Tar::Minitar.pack_as_file(names[i], dta_file.to_dta_file_data, outp)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
when :zip
|
75
|
+
begin
|
76
|
+
require 'zip/zipfilesystem'
|
77
|
+
rescue LoadError
|
78
|
+
abort "need gem 'rubyzip' installed' for zip compression!\n#{$!}"
|
79
|
+
end
|
80
|
+
#begin ; require 'zip/zipfilesystem' ; rescue LoadError, "need gem 'rubyzip' installed' for zip compression!\n#{$!}" ; end
|
81
|
+
Zip::ZipFile.open(outdir + ".zip", Zip::ZipFile::CREATE) do |zfs|
|
82
|
+
dta_files.zip(index) do |dta,i_ar|
|
83
|
+
#zfs.mkdir(outdir)
|
84
|
+
zfs.get_output_stream(outdir + '/' + [base_name, *i_ar].join('.') + '.dta') do |out|
|
85
|
+
dta.write_dta_file(out)
|
86
|
+
#zfs.commit
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
else # no compression
|
91
|
+
FileUtils.mkpath(outdir)
|
92
|
+
Dir.chdir(outdir) do
|
93
|
+
dta_files.zip(index) do |dta,i_ar|
|
94
|
+
File.open([base_name, *i_ar].join('.') << '.dta', 'wb') do |out|
|
95
|
+
dta.write_dta_file(out)
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
# Ms::Sequest::Srf::SrfToSearch::task converts to MS formats for DB
|
103
|
+
# searching
|
104
|
+
#
|
105
|
+
# outputs the appropriate file or directory structure for <file>.srf:
|
106
|
+
# <file>.mgf # file for mgf
|
107
|
+
# <file> # the basename directory for dta
|
108
|
+
class SrfToSearch < Tap::Task
|
109
|
+
config :format, "mgf", :short => 'f' # mgf|dta (default: mgf)
|
110
|
+
def process(srf_file)
|
111
|
+
base = srf_file.sub(/\.srf$/i, '')
|
112
|
+
newfile =
|
113
|
+
case format
|
114
|
+
when 'dta'
|
115
|
+
base
|
116
|
+
when 'mgf'
|
117
|
+
base << '.' << format
|
118
|
+
end
|
119
|
+
srf = Ms::Sequest::Srf.new(srf_file, :link_protein_hits => false, :filter_by_precursor_mass_tolerance => false )
|
120
|
+
# options just speed up reading since we don't need .out info anyway
|
121
|
+
case format
|
122
|
+
when 'mgf'
|
123
|
+
srf.to_mgf(newfile)
|
124
|
+
when 'dta'
|
125
|
+
srf.to_dta_files(newfile)
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
|
131
|
+
end # Srf
|
132
|
+
end # Sequest
|
133
|
+
end # Ms
|
134
|
+
|
135
|
+
|
data/lib/ms/sequest/srf/sqt.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
|
-
|
1
|
+
require 'tap/task'
|
2
|
+
require 'configurable'
|
2
3
|
require 'ms/sequest'
|
3
4
|
require 'ms/sequest/srf'
|
4
5
|
require 'ms/sequest/sqt'
|
@@ -93,7 +94,7 @@ module Ms
|
|
93
94
|
|
94
95
|
if opt[:db_info]
|
95
96
|
if File.exist?(db_filename)
|
96
|
-
reply = Ms::Sequest::Sqt.
|
97
|
+
reply = Ms::Sequest::Sqt.db_info(db_filename)
|
97
98
|
%w(DBSeqLength DBLocusCount DBMD5Sum).zip(reply) do |label,val|
|
98
99
|
hh[label] = val
|
99
100
|
end
|
@@ -164,6 +165,27 @@ module Ms
|
|
164
165
|
end # close the filehandle
|
165
166
|
end # method
|
166
167
|
|
168
|
+
# SrfToSqt::task convert .srf to .sqt files
|
169
|
+
class SrfToSqt < Tap::Task
|
170
|
+
config :db_info, false, :short => 'd', &c.flag # calculates num aa's and md5sum on db
|
171
|
+
# if your database path has changed
|
172
|
+
# and you want db-info, then give the
|
173
|
+
# path to the new *directory*
|
174
|
+
# e.g. /my/new/path
|
175
|
+
config :db_path, nil, :short => 'p'
|
176
|
+
config :db_update, false, :short => 'u', &c.flag # update the sqt file to reflect --db_path
|
177
|
+
config :no_filter, false, :short => 'n', &c.flag # by default, pephit must be within peptide_mass_tolerance (defined in sequest.params) to be included. Turns this off.
|
178
|
+
config :round, false, :short => 'r', &c.flag # round floating point values reasonably
|
179
|
+
|
180
|
+
def process(srf_file)
|
181
|
+
new_filename = srf_file.sub(/\.srf$/i, '') << '.sqt'
|
182
|
+
|
183
|
+
srf = Ms::Sequest::Srf.new(srf_file, :link_protein_hits => false, :filter_by_precursor_mass_tolerance => !no_filter)
|
184
|
+
|
185
|
+
srf.to_sqt(new_filename, :db_info => db_info, :new_db_path => db_path, :update_db_path => db_update, :round => round)
|
186
|
+
|
187
|
+
end # process
|
188
|
+
end # SrfToSqt
|
167
189
|
end # Srf
|
168
190
|
end # Sequest
|
169
191
|
end # Ms
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ms-sequest
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- John Prince
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-
|
12
|
+
date: 2009-06-18 00:00:00 -06:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -32,10 +32,31 @@ dependencies:
|
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: 0.0.1
|
34
34
|
version:
|
35
|
-
|
35
|
+
- !ruby/object:Gem::Dependency
|
36
|
+
name: tap
|
37
|
+
type: :runtime
|
38
|
+
version_requirement:
|
39
|
+
version_requirements: !ruby/object:Gem::Requirement
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: 0.17.1
|
44
|
+
version:
|
45
|
+
- !ruby/object:Gem::Dependency
|
46
|
+
name: ms-fasta
|
47
|
+
type: :runtime
|
48
|
+
version_requirement:
|
49
|
+
version_requirements: !ruby/object:Gem::Requirement
|
50
|
+
requirements:
|
51
|
+
- - ">="
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: 0.2.3
|
54
|
+
version:
|
55
|
+
description: reads .SRF, .SQT and supports conversions
|
36
56
|
email: jtprince@gmail.com
|
37
|
-
executables:
|
38
|
-
|
57
|
+
executables:
|
58
|
+
- srf_to_sqt.rb
|
59
|
+
- srf_to_search.rb
|
39
60
|
extensions: []
|
40
61
|
|
41
62
|
extra_rdoc_files:
|
@@ -44,6 +65,7 @@ extra_rdoc_files:
|
|
44
65
|
- History
|
45
66
|
files:
|
46
67
|
- lib/ms/sequest/params.rb
|
68
|
+
- lib/ms/sequest/srf/search.rb
|
47
69
|
- lib/ms/sequest/srf/sqt.rb
|
48
70
|
- lib/ms/sequest/srf.rb
|
49
71
|
- lib/ms/sequest/sqt.rb
|