ms-sequest 0.0.2 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History +15 -2
- data/MIT-LICENSE +3 -2
- data/bin/srf_to_search.rb +33 -0
- data/bin/srf_to_sqt.rb +39 -0
- data/lib/ms/sequest.rb +1 -1
- data/lib/ms/sequest/sqt.rb +33 -5
- data/lib/ms/sequest/srf.rb +9 -7
- data/lib/ms/sequest/srf/search.rb +135 -0
- data/lib/ms/sequest/srf/sqt.rb +24 -2
- metadata +27 -5
data/History
CHANGED
@@ -1,8 +1,21 @@
|
|
1
|
-
== 0.0.1 / 2009-05-11
|
2
1
|
|
3
|
-
|
2
|
+
== 0.0.4 / 2009-06-18
|
3
|
+
|
4
|
+
* srf_to_sqt.rb and srf_to_search.rb both working now
|
5
|
+
|
6
|
+
== 0.0.3 / 2009-06-16
|
7
|
+
|
8
|
+
* only dependent on very simple ms/fasta interface, no more on digest info, etc.
|
4
9
|
|
5
10
|
== 0.0.2 / 2009-05-14
|
6
11
|
|
7
12
|
* Basic SRF to SQT translation working
|
8
13
|
* SQT reading working
|
14
|
+
|
15
|
+
== 0.0.1 / 2009-05-11
|
16
|
+
|
17
|
+
* pulled out of mspire core
|
18
|
+
|
19
|
+
|
20
|
+
|
21
|
+
|
data/MIT-LICENSE
CHANGED
@@ -1,5 +1,6 @@
|
|
1
|
-
Copyright
|
2
|
-
|
1
|
+
Copyright shared among contributing institutions:
|
2
|
+
Copyright (c) 2006-2008 University of Texas at Austin (the initial project)
|
3
|
+
Copyright (c) 2009 Regents of the University of Colorado and Howard Hughes Medical Institute. (modularization of the project)
|
3
4
|
|
4
5
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
5
6
|
of this software and associated documentation files (the "Software"), to deal
|
@@ -0,0 +1,33 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'tap/task'
|
5
|
+
require 'ms/sequest/srf/search'
|
6
|
+
|
7
|
+
if ARGV.size == 0
|
8
|
+
ARGV << "--help"
|
9
|
+
end
|
10
|
+
|
11
|
+
task_class = Ms::Sequest::Srf::SrfToSearch
|
12
|
+
|
13
|
+
parser = ConfigParser.new do |opts|
|
14
|
+
opts.separator "configurations"
|
15
|
+
opts.add task_class.configurations
|
16
|
+
|
17
|
+
opts.on "--help", "Print this help" do
|
18
|
+
puts "usage: #{File.basename(__FILE__)} <file>.srf ..."
|
19
|
+
puts
|
20
|
+
puts opts
|
21
|
+
exit(0)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
parser.parse!(ARGV)
|
26
|
+
|
27
|
+
task = task_class.new(parser.config)
|
28
|
+
|
29
|
+
ARGV.each do |file|
|
30
|
+
task.execute(file)
|
31
|
+
end
|
32
|
+
|
33
|
+
|
data/bin/srf_to_sqt.rb
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'tap/task'
|
5
|
+
require 'ms/sequest/srf/sqt'
|
6
|
+
|
7
|
+
if ARGV.size == 0
|
8
|
+
ARGV << "--help"
|
9
|
+
end
|
10
|
+
|
11
|
+
task_class = Ms::Sequest::Srf::SrfToSqt
|
12
|
+
|
13
|
+
parser = ConfigParser.new do |opts|
|
14
|
+
opts.separator "configurations"
|
15
|
+
opts.add task_class.configurations
|
16
|
+
|
17
|
+
opts.on "--help", "Print this help" do
|
18
|
+
puts "usage: #{File.basename(__FILE__)} <file>.srf ..."
|
19
|
+
puts "outputs: <file>.sqt ..."
|
20
|
+
puts
|
21
|
+
#puts task_class::desc.wrap
|
22
|
+
#puts
|
23
|
+
puts opts
|
24
|
+
#puts
|
25
|
+
#puts "in tap workflow: tap run -- glob '*.srf' --:i srf_to_sqt"
|
26
|
+
exit(0)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
parser.parse!(ARGV)
|
31
|
+
|
32
|
+
|
33
|
+
task = task_class.new(parser.config)
|
34
|
+
|
35
|
+
ARGV.each do |file|
|
36
|
+
task.execute(file)
|
37
|
+
end
|
38
|
+
|
39
|
+
|
data/lib/ms/sequest.rb
CHANGED
data/lib/ms/sequest/sqt.rb
CHANGED
@@ -1,8 +1,11 @@
|
|
1
1
|
|
2
|
-
require 'ms/fasta'
|
3
2
|
require 'arrayclass'
|
4
3
|
require 'set'
|
5
4
|
|
5
|
+
require 'ms/fasta'
|
6
|
+
require 'digest/md5'
|
7
|
+
|
8
|
+
|
6
9
|
require 'ms/id/peptide'
|
7
10
|
require 'ms/id/search'
|
8
11
|
|
@@ -62,12 +65,37 @@ module Ms
|
|
62
65
|
# boolean
|
63
66
|
attr_accessor :percolator_results
|
64
67
|
|
65
|
-
#
|
66
|
-
|
67
|
-
|
68
|
+
# returns [sequence_length, locus_count] of the fasta file
|
69
|
+
def self.db_seq_length_and_locus_count(dbfile)
|
70
|
+
total_sequence_length = 0
|
71
|
+
fastasize = 0
|
68
72
|
Ms::Fasta.open(dbfile) do |fasta|
|
69
|
-
|
73
|
+
fasta.each {|entry| total_sequence_length += entry.sequence.size }
|
74
|
+
fastasize = fasta.size
|
75
|
+
end
|
76
|
+
[total_sequence_length, fastasize]
|
77
|
+
end
|
78
|
+
|
79
|
+
#--
|
80
|
+
# this is implemented separate from sequence length because seq length
|
81
|
+
# uses Archive which doesn't preserve carriage returns and newlines.
|
82
|
+
#++
|
83
|
+
def self.db_md5sum(dbfile)
|
84
|
+
chunksize = 61440
|
85
|
+
digest = Digest::MD5.new
|
86
|
+
File.open(dbfile) do |io|
|
87
|
+
while chunk = io.read(chunksize)
|
88
|
+
digest << chunk
|
89
|
+
end
|
70
90
|
end
|
91
|
+
digest.hexdigest
|
92
|
+
end
|
93
|
+
|
94
|
+
# assumes the file exists and is readable
|
95
|
+
# returns [DBSeqLength, DBLocusCount, DBMD5Sum]
|
96
|
+
def self.db_info(dbfile)
|
97
|
+
# returns the 3 member array
|
98
|
+
self.db_seq_length_and_locus_count(dbfile) << self.db_md5sum(dbfile)
|
71
99
|
end
|
72
100
|
|
73
101
|
def protein_class
|
data/lib/ms/sequest/srf.rb
CHANGED
@@ -13,9 +13,8 @@ require 'ms/id/search'
|
|
13
13
|
require 'ms/sequest/params'
|
14
14
|
|
15
15
|
# for conversions
|
16
|
-
require 'ms/sequest/srf/
|
16
|
+
require 'ms/sequest/srf/search'
|
17
17
|
require 'ms/sequest/srf/sqt'
|
18
|
-
require 'ms/sequest/srf/dta'
|
19
18
|
|
20
19
|
module Ms ; end
|
21
20
|
module Ms::Sequest ; end
|
@@ -96,9 +95,6 @@ class Ms::Sequest::Srf
|
|
96
95
|
end
|
97
96
|
end
|
98
97
|
|
99
|
-
def round(float, decimal_places)
|
100
|
-
sprintf("%.#{decimal_places}f", float)
|
101
|
-
end
|
102
98
|
|
103
99
|
# 1. updates the out_file's list of hits based on passing peptides (but not
|
104
100
|
# the original hit id; rank is implicit in array ordering)
|
@@ -202,7 +198,6 @@ END
|
|
202
198
|
# give each hit a base_name, first_scan, last_scan
|
203
199
|
@index.each_with_index do |ind,i|
|
204
200
|
mass_measured = @dta_files[i][0]
|
205
|
-
#puts @out_files[i].join(", ")
|
206
201
|
@out_files[i][0,3] = *ind
|
207
202
|
pep_hits = @out_files[i][6]
|
208
203
|
@peps.push( *pep_hits )
|
@@ -212,6 +207,9 @@ END
|
|
212
207
|
pep_hit[11] = pep_hit[0] - mass_measured # real - measured (deltamass)
|
213
208
|
pep_hit[12] = 1.0e6 * pep_hit[11].abs / mass_measured ## ppm
|
214
209
|
pep_hit[18] = self ## link with the srf object
|
210
|
+
if pep_hit.first_scan == 5719
|
211
|
+
puts [pep_hit.sequence, pep_hit.xcorr].join(' ')
|
212
|
+
end
|
215
213
|
end
|
216
214
|
end
|
217
215
|
|
@@ -391,6 +389,7 @@ class Ms::Sequest::Srf::DTA
|
|
391
389
|
Unpack_32 = "EeIvvvv"
|
392
390
|
Unpack_35 = "Ex8eVx2vvvv"
|
393
391
|
|
392
|
+
|
394
393
|
# note on peaks (self[7])
|
395
394
|
# this is a byte array of floats, you can get the peaks out with
|
396
395
|
# unpack("e*")
|
@@ -442,6 +441,10 @@ class Ms::Sequest::Srf::DTA
|
|
442
441
|
io.print to_dta_file_data
|
443
442
|
end
|
444
443
|
|
444
|
+
def round(float, decimal_places)
|
445
|
+
sprintf("%.#{decimal_places}f", float)
|
446
|
+
end
|
447
|
+
|
445
448
|
end
|
446
449
|
|
447
450
|
|
@@ -518,7 +521,6 @@ end
|
|
518
521
|
|
519
522
|
|
520
523
|
Ms::Sequest::Srf::Out::Pep = Arrayclass.new( %w(mh deltacn_orig sp xcorr id num_other_loci rsp ions_matched ions_total sequence prots deltamass ppm aaseq base_name first_scan last_scan charge srf deltacn deltacn_orig_updated) )
|
521
|
-
|
522
524
|
# 0=mh 1=deltacn_orig 2=sp 3=xcorr 4=id 5=num_other_loci 6=rsp 7=ions_matched 8=ions_total 9=sequence 10=prots 11=deltamass 12=ppm 13=aaseq 14=base_name 15=first_scan 16=last_scan 17=charge 18=srf 19=deltacn 20=deltacn_orig_updated
|
523
525
|
|
524
526
|
class Ms::Sequest::Srf::Out::Pep
|
@@ -0,0 +1,135 @@
|
|
1
|
+
|
2
|
+
require 'ms/sequest/srf'
|
3
|
+
require 'ms/mass'
|
4
|
+
|
5
|
+
# These are for outputting formats used in MS/MS Search engines
|
6
|
+
|
7
|
+
module Ms
|
8
|
+
module Sequest
|
9
|
+
class Srf
|
10
|
+
|
11
|
+
# Writes an MGF file to given filename or base_name + '.mgf' if no
|
12
|
+
# filename given.
|
13
|
+
#
|
14
|
+
# This mimicks the output of merge.pl from mascot The only difference is
|
15
|
+
# that this does not include the "\r\n" that is found after the peak
|
16
|
+
# lists, instead, it uses "\n" throughout the file (thinking that this
|
17
|
+
# is preferable to mixing newline styles!)
|
18
|
+
def to_mgf(filename=nil)
|
19
|
+
filename =
|
20
|
+
if filename ; filename
|
21
|
+
else
|
22
|
+
base_name + '.mgf'
|
23
|
+
end
|
24
|
+
h_plus = Ms::Mass::MASCOT_H_PLUS
|
25
|
+
File.open(filename, 'wb') do |out|
|
26
|
+
dta_files.zip(index) do |dta, i_ar|
|
27
|
+
chrg = dta.charge
|
28
|
+
out.print "BEGIN IONS\n"
|
29
|
+
out.print "TITLE=#{[base_name, *i_ar].push('dta').join('.')}\n"
|
30
|
+
out.print "CHARGE=#{chrg}+\n"
|
31
|
+
out.print "PEPMASS=#{(dta.mh+((chrg-1)*h_plus))/chrg}\n"
|
32
|
+
peak_ar = dta.peaks.unpack('e*')
|
33
|
+
(0...(peak_ar.size)).step(2) do |i|
|
34
|
+
out.print( peak_ar[i,2].join(' '), "\n")
|
35
|
+
end
|
36
|
+
out.print "END IONS\n"
|
37
|
+
out.print "\n"
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
# not given an out_folder, will make one with the basename
|
43
|
+
# compress may be: :zip, :tgz, or nil (no compression)
|
44
|
+
# :zip requires gem rubyzip to be installed and is *very* bloated
|
45
|
+
# as it writes out all the files first!
|
46
|
+
# :tgz requires gem archive-tar-minitar to be installed
|
47
|
+
def to_dta_files(out_folder=nil, compress=nil)
|
48
|
+
outdir =
|
49
|
+
if out_folder ; out_folder
|
50
|
+
else base_name
|
51
|
+
end
|
52
|
+
|
53
|
+
case compress
|
54
|
+
when :tgz
|
55
|
+
begin
|
56
|
+
require 'archive/tar/minitar'
|
57
|
+
rescue LoadError
|
58
|
+
abort "need gem 'archive-tar-minitar' installed' for tgz compression!\n#{$!}"
|
59
|
+
end
|
60
|
+
require 'archive/targz' # my own simplified interface!
|
61
|
+
require 'zlib'
|
62
|
+
names = index.map do |i_ar|
|
63
|
+
[outdir, '/', [base_name, *i_ar].join('.'), '.dta'].join('')
|
64
|
+
end
|
65
|
+
#Archive::Targz.archive_as_files(outdir + '.tgz', names, dta_file_data)
|
66
|
+
|
67
|
+
tgz = Zlib::GzipWriter.new(File.open(outdir + '.tgz', 'wb'))
|
68
|
+
|
69
|
+
Archive::Tar::Minitar::Output.open(tgz) do |outp|
|
70
|
+
dta_files.each_with_index do |dta_file, i|
|
71
|
+
Archive::Tar::Minitar.pack_as_file(names[i], dta_file.to_dta_file_data, outp)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
when :zip
|
75
|
+
begin
|
76
|
+
require 'zip/zipfilesystem'
|
77
|
+
rescue LoadError
|
78
|
+
abort "need gem 'rubyzip' installed' for zip compression!\n#{$!}"
|
79
|
+
end
|
80
|
+
#begin ; require 'zip/zipfilesystem' ; rescue LoadError, "need gem 'rubyzip' installed' for zip compression!\n#{$!}" ; end
|
81
|
+
Zip::ZipFile.open(outdir + ".zip", Zip::ZipFile::CREATE) do |zfs|
|
82
|
+
dta_files.zip(index) do |dta,i_ar|
|
83
|
+
#zfs.mkdir(outdir)
|
84
|
+
zfs.get_output_stream(outdir + '/' + [base_name, *i_ar].join('.') + '.dta') do |out|
|
85
|
+
dta.write_dta_file(out)
|
86
|
+
#zfs.commit
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
else # no compression
|
91
|
+
FileUtils.mkpath(outdir)
|
92
|
+
Dir.chdir(outdir) do
|
93
|
+
dta_files.zip(index) do |dta,i_ar|
|
94
|
+
File.open([base_name, *i_ar].join('.') << '.dta', 'wb') do |out|
|
95
|
+
dta.write_dta_file(out)
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
# Ms::Sequest::Srf::SrfToSearch::task converts to MS formats for DB
|
103
|
+
# searching
|
104
|
+
#
|
105
|
+
# outputs the appropriate file or directory structure for <file>.srf:
|
106
|
+
# <file>.mgf # file for mgf
|
107
|
+
# <file> # the basename directory for dta
|
108
|
+
class SrfToSearch < Tap::Task
|
109
|
+
config :format, "mgf", :short => 'f' # mgf|dta (default: mgf)
|
110
|
+
def process(srf_file)
|
111
|
+
base = srf_file.sub(/\.srf$/i, '')
|
112
|
+
newfile =
|
113
|
+
case format
|
114
|
+
when 'dta'
|
115
|
+
base
|
116
|
+
when 'mgf'
|
117
|
+
base << '.' << format
|
118
|
+
end
|
119
|
+
srf = Ms::Sequest::Srf.new(srf_file, :link_protein_hits => false, :filter_by_precursor_mass_tolerance => false )
|
120
|
+
# options just speed up reading since we don't need .out info anyway
|
121
|
+
case format
|
122
|
+
when 'mgf'
|
123
|
+
srf.to_mgf(newfile)
|
124
|
+
when 'dta'
|
125
|
+
srf.to_dta_files(newfile)
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
|
131
|
+
end # Srf
|
132
|
+
end # Sequest
|
133
|
+
end # Ms
|
134
|
+
|
135
|
+
|
data/lib/ms/sequest/srf/sqt.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
|
-
|
1
|
+
require 'tap/task'
|
2
|
+
require 'configurable'
|
2
3
|
require 'ms/sequest'
|
3
4
|
require 'ms/sequest/srf'
|
4
5
|
require 'ms/sequest/sqt'
|
@@ -93,7 +94,7 @@ module Ms
|
|
93
94
|
|
94
95
|
if opt[:db_info]
|
95
96
|
if File.exist?(db_filename)
|
96
|
-
reply = Ms::Sequest::Sqt.
|
97
|
+
reply = Ms::Sequest::Sqt.db_info(db_filename)
|
97
98
|
%w(DBSeqLength DBLocusCount DBMD5Sum).zip(reply) do |label,val|
|
98
99
|
hh[label] = val
|
99
100
|
end
|
@@ -164,6 +165,27 @@ module Ms
|
|
164
165
|
end # close the filehandle
|
165
166
|
end # method
|
166
167
|
|
168
|
+
# SrfToSqt::task convert .srf to .sqt files
|
169
|
+
class SrfToSqt < Tap::Task
|
170
|
+
config :db_info, false, :short => 'd', &c.flag # calculates num aa's and md5sum on db
|
171
|
+
# if your database path has changed
|
172
|
+
# and you want db-info, then give the
|
173
|
+
# path to the new *directory*
|
174
|
+
# e.g. /my/new/path
|
175
|
+
config :db_path, nil, :short => 'p'
|
176
|
+
config :db_update, false, :short => 'u', &c.flag # update the sqt file to reflect --db_path
|
177
|
+
config :no_filter, false, :short => 'n', &c.flag # by default, pephit must be within peptide_mass_tolerance (defined in sequest.params) to be included. Turns this off.
|
178
|
+
config :round, false, :short => 'r', &c.flag # round floating point values reasonably
|
179
|
+
|
180
|
+
def process(srf_file)
|
181
|
+
new_filename = srf_file.sub(/\.srf$/i, '') << '.sqt'
|
182
|
+
|
183
|
+
srf = Ms::Sequest::Srf.new(srf_file, :link_protein_hits => false, :filter_by_precursor_mass_tolerance => !no_filter)
|
184
|
+
|
185
|
+
srf.to_sqt(new_filename, :db_info => db_info, :new_db_path => db_path, :update_db_path => db_update, :round => round)
|
186
|
+
|
187
|
+
end # process
|
188
|
+
end # SrfToSqt
|
167
189
|
end # Srf
|
168
190
|
end # Sequest
|
169
191
|
end # Ms
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ms-sequest
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- John Prince
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-
|
12
|
+
date: 2009-06-18 00:00:00 -06:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -32,10 +32,31 @@ dependencies:
|
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: 0.0.1
|
34
34
|
version:
|
35
|
-
|
35
|
+
- !ruby/object:Gem::Dependency
|
36
|
+
name: tap
|
37
|
+
type: :runtime
|
38
|
+
version_requirement:
|
39
|
+
version_requirements: !ruby/object:Gem::Requirement
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: 0.17.1
|
44
|
+
version:
|
45
|
+
- !ruby/object:Gem::Dependency
|
46
|
+
name: ms-fasta
|
47
|
+
type: :runtime
|
48
|
+
version_requirement:
|
49
|
+
version_requirements: !ruby/object:Gem::Requirement
|
50
|
+
requirements:
|
51
|
+
- - ">="
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: 0.2.3
|
54
|
+
version:
|
55
|
+
description: reads .SRF, .SQT and supports conversions
|
36
56
|
email: jtprince@gmail.com
|
37
|
-
executables:
|
38
|
-
|
57
|
+
executables:
|
58
|
+
- srf_to_sqt.rb
|
59
|
+
- srf_to_search.rb
|
39
60
|
extensions: []
|
40
61
|
|
41
62
|
extra_rdoc_files:
|
@@ -44,6 +65,7 @@ extra_rdoc_files:
|
|
44
65
|
- History
|
45
66
|
files:
|
46
67
|
- lib/ms/sequest/params.rb
|
68
|
+
- lib/ms/sequest/srf/search.rb
|
47
69
|
- lib/ms/sequest/srf/sqt.rb
|
48
70
|
- lib/ms/sequest/srf.rb
|
49
71
|
- lib/ms/sequest/sqt.rb
|