ms-sequest 0.0.2 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
data/History CHANGED
@@ -1,8 +1,21 @@
1
- == 0.0.1 / 2009-05-11
2
1
 
3
- * pulled out of mspire core
2
+ == 0.0.4 / 2009-06-18
3
+
4
+ * srf_to_sqt.rb and srf_to_search.rb both working now
5
+
6
+ == 0.0.3 / 2009-06-16
7
+
8
+ * only dependent on very simple ms/fasta interface, no more on digest info, etc.
4
9
 
5
10
  == 0.0.2 / 2009-05-14
6
11
 
7
12
  * Basic SRF to SQT translation working
8
13
  * SQT reading working
14
+
15
+ == 0.0.1 / 2009-05-11
16
+
17
+ * pulled out of mspire core
18
+
19
+
20
+
21
+
@@ -1,5 +1,6 @@
1
- Copyright (c) 2006 University of Texas at Austin, Regents of the University of
2
- Colorado, and Howard Hughes Medical Institute.
1
+ Copyright shared among contributing institutions:
2
+ Copyright (c) 2006-2008 University of Texas at Austin (the initial project)
3
+ Copyright (c) 2009 Regents of the University of Colorado and Howard Hughes Medical Institute. (modularization of the project)
3
4
 
4
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
5
6
  of this software and associated documentation files (the "Software"), to deal
@@ -0,0 +1,33 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rubygems'
4
+ require 'tap/task'
5
+ require 'ms/sequest/srf/search'
6
+
7
+ if ARGV.size == 0
8
+ ARGV << "--help"
9
+ end
10
+
11
+ task_class = Ms::Sequest::Srf::SrfToSearch
12
+
13
+ parser = ConfigParser.new do |opts|
14
+ opts.separator "configurations"
15
+ opts.add task_class.configurations
16
+
17
+ opts.on "--help", "Print this help" do
18
+ puts "usage: #{File.basename(__FILE__)} <file>.srf ..."
19
+ puts
20
+ puts opts
21
+ exit(0)
22
+ end
23
+ end
24
+
25
+ parser.parse!(ARGV)
26
+
27
+ task = task_class.new(parser.config)
28
+
29
+ ARGV.each do |file|
30
+ task.execute(file)
31
+ end
32
+
33
+
@@ -0,0 +1,39 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rubygems'
4
+ require 'tap/task'
5
+ require 'ms/sequest/srf/sqt'
6
+
7
+ if ARGV.size == 0
8
+ ARGV << "--help"
9
+ end
10
+
11
+ task_class = Ms::Sequest::Srf::SrfToSqt
12
+
13
+ parser = ConfigParser.new do |opts|
14
+ opts.separator "configurations"
15
+ opts.add task_class.configurations
16
+
17
+ opts.on "--help", "Print this help" do
18
+ puts "usage: #{File.basename(__FILE__)} <file>.srf ..."
19
+ puts "outputs: <file>.sqt ..."
20
+ puts
21
+ #puts task_class::desc.wrap
22
+ #puts
23
+ puts opts
24
+ #puts
25
+ #puts "in tap workflow: tap run -- glob '*.srf' --:i srf_to_sqt"
26
+ exit(0)
27
+ end
28
+ end
29
+
30
+ parser.parse!(ARGV)
31
+
32
+
33
+ task = task_class.new(parser.config)
34
+
35
+ ARGV.each do |file|
36
+ task.execute(file)
37
+ end
38
+
39
+
@@ -1,6 +1,6 @@
1
1
 
2
2
  module Ms
3
3
  module Sequest
4
- VERSION = '0.0.2'
4
+ VERSION = '0.0.4'
5
5
  end
6
6
  end
@@ -1,8 +1,11 @@
1
1
 
2
- require 'ms/fasta'
3
2
  require 'arrayclass'
4
3
  require 'set'
5
4
 
5
+ require 'ms/fasta'
6
+ require 'digest/md5'
7
+
8
+
6
9
  require 'ms/id/peptide'
7
10
  require 'ms/id/search'
8
11
 
@@ -62,12 +65,37 @@ module Ms
62
65
  # boolean
63
66
  attr_accessor :percolator_results
64
67
 
65
- # assumes the file exists and is readable
66
- # returns [DBSeqLength, DBLocusCount, DBMD5Sum] or nil if no file
67
- def self.get_db_info(dbfile)
68
+ # returns [sequence_length, locus_count] of the fasta file
69
+ def self.db_seq_length_and_locus_count(dbfile)
70
+ total_sequence_length = 0
71
+ fastasize = 0
68
72
  Ms::Fasta.open(dbfile) do |fasta|
69
- [fasta.total_sequence_length, fasta.size, fasta.md5_sum]
73
+ fasta.each {|entry| total_sequence_length += entry.sequence.size }
74
+ fastasize = fasta.size
75
+ end
76
+ [total_sequence_length, fastasize]
77
+ end
78
+
79
+ #--
80
+ # this is implemented separate from sequence length because seq length
81
+ # uses Archive which doesn't preserve carriage returns and newlines.
82
+ #++
83
+ def self.db_md5sum(dbfile)
84
+ chunksize = 61440
85
+ digest = Digest::MD5.new
86
+ File.open(dbfile) do |io|
87
+ while chunk = io.read(chunksize)
88
+ digest << chunk
89
+ end
70
90
  end
91
+ digest.hexdigest
92
+ end
93
+
94
+ # assumes the file exists and is readable
95
+ # returns [DBSeqLength, DBLocusCount, DBMD5Sum]
96
+ def self.db_info(dbfile)
97
+ # returns the 3 member array
98
+ self.db_seq_length_and_locus_count(dbfile) << self.db_md5sum(dbfile)
71
99
  end
72
100
 
73
101
  def protein_class
@@ -13,9 +13,8 @@ require 'ms/id/search'
13
13
  require 'ms/sequest/params'
14
14
 
15
15
  # for conversions
16
- require 'ms/sequest/srf/mgf'
16
+ require 'ms/sequest/srf/search'
17
17
  require 'ms/sequest/srf/sqt'
18
- require 'ms/sequest/srf/dta'
19
18
 
20
19
  module Ms ; end
21
20
  module Ms::Sequest ; end
@@ -96,9 +95,6 @@ class Ms::Sequest::Srf
96
95
  end
97
96
  end
98
97
 
99
- def round(float, decimal_places)
100
- sprintf("%.#{decimal_places}f", float)
101
- end
102
98
 
103
99
  # 1. updates the out_file's list of hits based on passing peptides (but not
104
100
  # the original hit id; rank is implicit in array ordering)
@@ -202,7 +198,6 @@ END
202
198
  # give each hit a base_name, first_scan, last_scan
203
199
  @index.each_with_index do |ind,i|
204
200
  mass_measured = @dta_files[i][0]
205
- #puts @out_files[i].join(", ")
206
201
  @out_files[i][0,3] = *ind
207
202
  pep_hits = @out_files[i][6]
208
203
  @peps.push( *pep_hits )
@@ -212,6 +207,9 @@ END
212
207
  pep_hit[11] = pep_hit[0] - mass_measured # real - measured (deltamass)
213
208
  pep_hit[12] = 1.0e6 * pep_hit[11].abs / mass_measured ## ppm
214
209
  pep_hit[18] = self ## link with the srf object
210
+ if pep_hit.first_scan == 5719
211
+ puts [pep_hit.sequence, pep_hit.xcorr].join(' ')
212
+ end
215
213
  end
216
214
  end
217
215
 
@@ -391,6 +389,7 @@ class Ms::Sequest::Srf::DTA
391
389
  Unpack_32 = "EeIvvvv"
392
390
  Unpack_35 = "Ex8eVx2vvvv"
393
391
 
392
+
394
393
  # note on peaks (self[7])
395
394
  # this is a byte array of floats, you can get the peaks out with
396
395
  # unpack("e*")
@@ -442,6 +441,10 @@ class Ms::Sequest::Srf::DTA
442
441
  io.print to_dta_file_data
443
442
  end
444
443
 
444
+ def round(float, decimal_places)
445
+ sprintf("%.#{decimal_places}f", float)
446
+ end
447
+
445
448
  end
446
449
 
447
450
 
@@ -518,7 +521,6 @@ end
518
521
 
519
522
 
520
523
  Ms::Sequest::Srf::Out::Pep = Arrayclass.new( %w(mh deltacn_orig sp xcorr id num_other_loci rsp ions_matched ions_total sequence prots deltamass ppm aaseq base_name first_scan last_scan charge srf deltacn deltacn_orig_updated) )
521
-
522
524
  # 0=mh 1=deltacn_orig 2=sp 3=xcorr 4=id 5=num_other_loci 6=rsp 7=ions_matched 8=ions_total 9=sequence 10=prots 11=deltamass 12=ppm 13=aaseq 14=base_name 15=first_scan 16=last_scan 17=charge 18=srf 19=deltacn 20=deltacn_orig_updated
523
525
 
524
526
  class Ms::Sequest::Srf::Out::Pep
@@ -0,0 +1,135 @@
1
+
2
+ require 'ms/sequest/srf'
3
+ require 'ms/mass'
4
+
5
+ # These are for outputting formats used in MS/MS Search engines
6
+
7
+ module Ms
8
+ module Sequest
9
+ class Srf
10
+
11
+ # Writes an MGF file to given filename or base_name + '.mgf' if no
12
+ # filename given.
13
+ #
14
+ # This mimicks the output of merge.pl from mascot The only difference is
15
+ # that this does not include the "\r\n" that is found after the peak
16
+ # lists, instead, it uses "\n" throughout the file (thinking that this
17
+ # is preferable to mixing newline styles!)
18
+ def to_mgf(filename=nil)
19
+ filename =
20
+ if filename ; filename
21
+ else
22
+ base_name + '.mgf'
23
+ end
24
+ h_plus = Ms::Mass::MASCOT_H_PLUS
25
+ File.open(filename, 'wb') do |out|
26
+ dta_files.zip(index) do |dta, i_ar|
27
+ chrg = dta.charge
28
+ out.print "BEGIN IONS\n"
29
+ out.print "TITLE=#{[base_name, *i_ar].push('dta').join('.')}\n"
30
+ out.print "CHARGE=#{chrg}+\n"
31
+ out.print "PEPMASS=#{(dta.mh+((chrg-1)*h_plus))/chrg}\n"
32
+ peak_ar = dta.peaks.unpack('e*')
33
+ (0...(peak_ar.size)).step(2) do |i|
34
+ out.print( peak_ar[i,2].join(' '), "\n")
35
+ end
36
+ out.print "END IONS\n"
37
+ out.print "\n"
38
+ end
39
+ end
40
+ end
41
+
42
+ # not given an out_folder, will make one with the basename
43
+ # compress may be: :zip, :tgz, or nil (no compression)
44
+ # :zip requires gem rubyzip to be installed and is *very* bloated
45
+ # as it writes out all the files first!
46
+ # :tgz requires gem archive-tar-minitar to be installed
47
+ def to_dta_files(out_folder=nil, compress=nil)
48
+ outdir =
49
+ if out_folder ; out_folder
50
+ else base_name
51
+ end
52
+
53
+ case compress
54
+ when :tgz
55
+ begin
56
+ require 'archive/tar/minitar'
57
+ rescue LoadError
58
+ abort "need gem 'archive-tar-minitar' installed' for tgz compression!\n#{$!}"
59
+ end
60
+ require 'archive/targz' # my own simplified interface!
61
+ require 'zlib'
62
+ names = index.map do |i_ar|
63
+ [outdir, '/', [base_name, *i_ar].join('.'), '.dta'].join('')
64
+ end
65
+ #Archive::Targz.archive_as_files(outdir + '.tgz', names, dta_file_data)
66
+
67
+ tgz = Zlib::GzipWriter.new(File.open(outdir + '.tgz', 'wb'))
68
+
69
+ Archive::Tar::Minitar::Output.open(tgz) do |outp|
70
+ dta_files.each_with_index do |dta_file, i|
71
+ Archive::Tar::Minitar.pack_as_file(names[i], dta_file.to_dta_file_data, outp)
72
+ end
73
+ end
74
+ when :zip
75
+ begin
76
+ require 'zip/zipfilesystem'
77
+ rescue LoadError
78
+ abort "need gem 'rubyzip' installed' for zip compression!\n#{$!}"
79
+ end
80
+ #begin ; require 'zip/zipfilesystem' ; rescue LoadError, "need gem 'rubyzip' installed' for zip compression!\n#{$!}" ; end
81
+ Zip::ZipFile.open(outdir + ".zip", Zip::ZipFile::CREATE) do |zfs|
82
+ dta_files.zip(index) do |dta,i_ar|
83
+ #zfs.mkdir(outdir)
84
+ zfs.get_output_stream(outdir + '/' + [base_name, *i_ar].join('.') + '.dta') do |out|
85
+ dta.write_dta_file(out)
86
+ #zfs.commit
87
+ end
88
+ end
89
+ end
90
+ else # no compression
91
+ FileUtils.mkpath(outdir)
92
+ Dir.chdir(outdir) do
93
+ dta_files.zip(index) do |dta,i_ar|
94
+ File.open([base_name, *i_ar].join('.') << '.dta', 'wb') do |out|
95
+ dta.write_dta_file(out)
96
+ end
97
+ end
98
+ end
99
+ end
100
+ end
101
+
102
+ # Ms::Sequest::Srf::SrfToSearch::task converts to MS formats for DB
103
+ # searching
104
+ #
105
+ # outputs the appropriate file or directory structure for <file>.srf:
106
+ # <file>.mgf # file for mgf
107
+ # <file> # the basename directory for dta
108
+ class SrfToSearch < Tap::Task
109
+ config :format, "mgf", :short => 'f' # mgf|dta (default: mgf)
110
+ def process(srf_file)
111
+ base = srf_file.sub(/\.srf$/i, '')
112
+ newfile =
113
+ case format
114
+ when 'dta'
115
+ base
116
+ when 'mgf'
117
+ base << '.' << format
118
+ end
119
+ srf = Ms::Sequest::Srf.new(srf_file, :link_protein_hits => false, :filter_by_precursor_mass_tolerance => false )
120
+ # options just speed up reading since we don't need .out info anyway
121
+ case format
122
+ when 'mgf'
123
+ srf.to_mgf(newfile)
124
+ when 'dta'
125
+ srf.to_dta_files(newfile)
126
+ end
127
+ end
128
+ end
129
+
130
+
131
+ end # Srf
132
+ end # Sequest
133
+ end # Ms
134
+
135
+
@@ -1,4 +1,5 @@
1
-
1
+ require 'tap/task'
2
+ require 'configurable'
2
3
  require 'ms/sequest'
3
4
  require 'ms/sequest/srf'
4
5
  require 'ms/sequest/sqt'
@@ -93,7 +94,7 @@ module Ms
93
94
 
94
95
  if opt[:db_info]
95
96
  if File.exist?(db_filename)
96
- reply = Ms::Sequest::Sqt.get_db_info(db_filename)
97
+ reply = Ms::Sequest::Sqt.db_info(db_filename)
97
98
  %w(DBSeqLength DBLocusCount DBMD5Sum).zip(reply) do |label,val|
98
99
  hh[label] = val
99
100
  end
@@ -164,6 +165,27 @@ module Ms
164
165
  end # close the filehandle
165
166
  end # method
166
167
 
168
+ # SrfToSqt::task convert .srf to .sqt files
169
+ class SrfToSqt < Tap::Task
170
+ config :db_info, false, :short => 'd', &c.flag # calculates num aa's and md5sum on db
171
+ # if your database path has changed
172
+ # and you want db-info, then give the
173
+ # path to the new *directory*
174
+ # e.g. /my/new/path
175
+ config :db_path, nil, :short => 'p'
176
+ config :db_update, false, :short => 'u', &c.flag # update the sqt file to reflect --db_path
177
+ config :no_filter, false, :short => 'n', &c.flag # by default, pephit must be within peptide_mass_tolerance (defined in sequest.params) to be included. Turns this off.
178
+ config :round, false, :short => 'r', &c.flag # round floating point values reasonably
179
+
180
+ def process(srf_file)
181
+ new_filename = srf_file.sub(/\.srf$/i, '') << '.sqt'
182
+
183
+ srf = Ms::Sequest::Srf.new(srf_file, :link_protein_hits => false, :filter_by_precursor_mass_tolerance => !no_filter)
184
+
185
+ srf.to_sqt(new_filename, :db_info => db_info, :new_db_path => db_path, :update_db_path => db_update, :round => round)
186
+
187
+ end # process
188
+ end # SrfToSqt
167
189
  end # Srf
168
190
  end # Sequest
169
191
  end # Ms
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ms-sequest
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - John Prince
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-05-14 00:00:00 -06:00
12
+ date: 2009-06-18 00:00:00 -06:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -32,10 +32,31 @@ dependencies:
32
32
  - !ruby/object:Gem::Version
33
33
  version: 0.0.1
34
34
  version:
35
- description:
35
+ - !ruby/object:Gem::Dependency
36
+ name: tap
37
+ type: :runtime
38
+ version_requirement:
39
+ version_requirements: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ version: 0.17.1
44
+ version:
45
+ - !ruby/object:Gem::Dependency
46
+ name: ms-fasta
47
+ type: :runtime
48
+ version_requirement:
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: 0.2.3
54
+ version:
55
+ description: reads .SRF, .SQT and supports conversions
36
56
  email: jtprince@gmail.com
37
- executables: []
38
-
57
+ executables:
58
+ - srf_to_sqt.rb
59
+ - srf_to_search.rb
39
60
  extensions: []
40
61
 
41
62
  extra_rdoc_files:
@@ -44,6 +65,7 @@ extra_rdoc_files:
44
65
  - History
45
66
  files:
46
67
  - lib/ms/sequest/params.rb
68
+ - lib/ms/sequest/srf/search.rb
47
69
  - lib/ms/sequest/srf/sqt.rb
48
70
  - lib/ms/sequest/srf.rb
49
71
  - lib/ms/sequest/sqt.rb