ms-sequest 0.0.2 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History CHANGED
@@ -1,8 +1,21 @@
1
- == 0.0.1 / 2009-05-11
2
1
 
3
- * pulled out of mspire core
2
+ == 0.0.4 / 2009-06-18
3
+
4
+ * srf_to_sqt.rb and srf_to_search.rb both working now
5
+
6
+ == 0.0.3 / 2009-06-16
7
+
8
+ * only dependent on very simple ms/fasta interface, no more on digest info, etc.
4
9
 
5
10
  == 0.0.2 / 2009-05-14
6
11
 
7
12
  * Basic SRF to SQT translation working
8
13
  * SQT reading working
14
+
15
+ == 0.0.1 / 2009-05-11
16
+
17
+ * pulled out of mspire core
18
+
19
+
20
+
21
+
@@ -1,5 +1,6 @@
1
- Copyright (c) 2006 University of Texas at Austin, Regents of the University of
2
- Colorado, and Howard Hughes Medical Institute.
1
+ Copyright shared among contributing institutions:
2
+ Copyright (c) 2006-2008 University of Texas at Austin (the initial project)
3
+ Copyright (c) 2009 Regents of the University of Colorado and Howard Hughes Medical Institute. (modularization of the project)
3
4
 
4
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
5
6
  of this software and associated documentation files (the "Software"), to deal
@@ -0,0 +1,33 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rubygems'
4
+ require 'tap/task'
5
+ require 'ms/sequest/srf/search'
6
+
7
+ if ARGV.size == 0
8
+ ARGV << "--help"
9
+ end
10
+
11
+ task_class = Ms::Sequest::Srf::SrfToSearch
12
+
13
+ parser = ConfigParser.new do |opts|
14
+ opts.separator "configurations"
15
+ opts.add task_class.configurations
16
+
17
+ opts.on "--help", "Print this help" do
18
+ puts "usage: #{File.basename(__FILE__)} <file>.srf ..."
19
+ puts
20
+ puts opts
21
+ exit(0)
22
+ end
23
+ end
24
+
25
+ parser.parse!(ARGV)
26
+
27
+ task = task_class.new(parser.config)
28
+
29
+ ARGV.each do |file|
30
+ task.execute(file)
31
+ end
32
+
33
+
@@ -0,0 +1,39 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rubygems'
4
+ require 'tap/task'
5
+ require 'ms/sequest/srf/sqt'
6
+
7
+ if ARGV.size == 0
8
+ ARGV << "--help"
9
+ end
10
+
11
+ task_class = Ms::Sequest::Srf::SrfToSqt
12
+
13
+ parser = ConfigParser.new do |opts|
14
+ opts.separator "configurations"
15
+ opts.add task_class.configurations
16
+
17
+ opts.on "--help", "Print this help" do
18
+ puts "usage: #{File.basename(__FILE__)} <file>.srf ..."
19
+ puts "outputs: <file>.sqt ..."
20
+ puts
21
+ #puts task_class::desc.wrap
22
+ #puts
23
+ puts opts
24
+ #puts
25
+ #puts "in tap workflow: tap run -- glob '*.srf' --:i srf_to_sqt"
26
+ exit(0)
27
+ end
28
+ end
29
+
30
+ parser.parse!(ARGV)
31
+
32
+
33
+ task = task_class.new(parser.config)
34
+
35
+ ARGV.each do |file|
36
+ task.execute(file)
37
+ end
38
+
39
+
@@ -1,6 +1,6 @@
1
1
 
2
2
  module Ms
3
3
  module Sequest
4
- VERSION = '0.0.2'
4
+ VERSION = '0.0.4'
5
5
  end
6
6
  end
@@ -1,8 +1,11 @@
1
1
 
2
- require 'ms/fasta'
3
2
  require 'arrayclass'
4
3
  require 'set'
5
4
 
5
+ require 'ms/fasta'
6
+ require 'digest/md5'
7
+
8
+
6
9
  require 'ms/id/peptide'
7
10
  require 'ms/id/search'
8
11
 
@@ -62,12 +65,37 @@ module Ms
62
65
  # boolean
63
66
  attr_accessor :percolator_results
64
67
 
65
- # assumes the file exists and is readable
66
- # returns [DBSeqLength, DBLocusCount, DBMD5Sum] or nil if no file
67
- def self.get_db_info(dbfile)
68
+ # returns [sequence_length, locus_count] of the fasta file
69
+ def self.db_seq_length_and_locus_count(dbfile)
70
+ total_sequence_length = 0
71
+ fastasize = 0
68
72
  Ms::Fasta.open(dbfile) do |fasta|
69
- [fasta.total_sequence_length, fasta.size, fasta.md5_sum]
73
+ fasta.each {|entry| total_sequence_length += entry.sequence.size }
74
+ fastasize = fasta.size
75
+ end
76
+ [total_sequence_length, fastasize]
77
+ end
78
+
79
+ #--
80
+ # this is implemented separate from sequence length because seq length
81
+ # uses Archive which doesn't preserve carriage returns and newlines.
82
+ #++
83
+ def self.db_md5sum(dbfile)
84
+ chunksize = 61440
85
+ digest = Digest::MD5.new
86
+ File.open(dbfile) do |io|
87
+ while chunk = io.read(chunksize)
88
+ digest << chunk
89
+ end
70
90
  end
91
+ digest.hexdigest
92
+ end
93
+
94
+ # assumes the file exists and is readable
95
+ # returns [DBSeqLength, DBLocusCount, DBMD5Sum]
96
+ def self.db_info(dbfile)
97
+ # returns the 3 member array
98
+ self.db_seq_length_and_locus_count(dbfile) << self.db_md5sum(dbfile)
71
99
  end
72
100
 
73
101
  def protein_class
@@ -13,9 +13,8 @@ require 'ms/id/search'
13
13
  require 'ms/sequest/params'
14
14
 
15
15
  # for conversions
16
- require 'ms/sequest/srf/mgf'
16
+ require 'ms/sequest/srf/search'
17
17
  require 'ms/sequest/srf/sqt'
18
- require 'ms/sequest/srf/dta'
19
18
 
20
19
  module Ms ; end
21
20
  module Ms::Sequest ; end
@@ -96,9 +95,6 @@ class Ms::Sequest::Srf
96
95
  end
97
96
  end
98
97
 
99
- def round(float, decimal_places)
100
- sprintf("%.#{decimal_places}f", float)
101
- end
102
98
 
103
99
  # 1. updates the out_file's list of hits based on passing peptides (but not
104
100
  # the original hit id; rank is implicit in array ordering)
@@ -202,7 +198,6 @@ END
202
198
  # give each hit a base_name, first_scan, last_scan
203
199
  @index.each_with_index do |ind,i|
204
200
  mass_measured = @dta_files[i][0]
205
- #puts @out_files[i].join(", ")
206
201
  @out_files[i][0,3] = *ind
207
202
  pep_hits = @out_files[i][6]
208
203
  @peps.push( *pep_hits )
@@ -212,6 +207,9 @@ END
212
207
  pep_hit[11] = pep_hit[0] - mass_measured # real - measured (deltamass)
213
208
  pep_hit[12] = 1.0e6 * pep_hit[11].abs / mass_measured ## ppm
214
209
  pep_hit[18] = self ## link with the srf object
210
+ if pep_hit.first_scan == 5719
211
+ puts [pep_hit.sequence, pep_hit.xcorr].join(' ')
212
+ end
215
213
  end
216
214
  end
217
215
 
@@ -391,6 +389,7 @@ class Ms::Sequest::Srf::DTA
391
389
  Unpack_32 = "EeIvvvv"
392
390
  Unpack_35 = "Ex8eVx2vvvv"
393
391
 
392
+
394
393
  # note on peaks (self[7])
395
394
  # this is a byte array of floats, you can get the peaks out with
396
395
  # unpack("e*")
@@ -442,6 +441,10 @@ class Ms::Sequest::Srf::DTA
442
441
  io.print to_dta_file_data
443
442
  end
444
443
 
444
+ def round(float, decimal_places)
445
+ sprintf("%.#{decimal_places}f", float)
446
+ end
447
+
445
448
  end
446
449
 
447
450
 
@@ -518,7 +521,6 @@ end
518
521
 
519
522
 
520
523
  Ms::Sequest::Srf::Out::Pep = Arrayclass.new( %w(mh deltacn_orig sp xcorr id num_other_loci rsp ions_matched ions_total sequence prots deltamass ppm aaseq base_name first_scan last_scan charge srf deltacn deltacn_orig_updated) )
521
-
522
524
  # 0=mh 1=deltacn_orig 2=sp 3=xcorr 4=id 5=num_other_loci 6=rsp 7=ions_matched 8=ions_total 9=sequence 10=prots 11=deltamass 12=ppm 13=aaseq 14=base_name 15=first_scan 16=last_scan 17=charge 18=srf 19=deltacn 20=deltacn_orig_updated
523
525
 
524
526
  class Ms::Sequest::Srf::Out::Pep
@@ -0,0 +1,135 @@
1
+
2
+ require 'ms/sequest/srf'
3
+ require 'ms/mass'
4
+
5
+ # These are for outputting formats used in MS/MS Search engines
6
+
7
+ module Ms
8
+ module Sequest
9
+ class Srf
10
+
11
+ # Writes an MGF file to given filename or base_name + '.mgf' if no
12
+ # filename given.
13
+ #
14
+ # This mimicks the output of merge.pl from mascot The only difference is
15
+ # that this does not include the "\r\n" that is found after the peak
16
+ # lists, instead, it uses "\n" throughout the file (thinking that this
17
+ # is preferable to mixing newline styles!)
18
+ def to_mgf(filename=nil)
19
+ filename =
20
+ if filename ; filename
21
+ else
22
+ base_name + '.mgf'
23
+ end
24
+ h_plus = Ms::Mass::MASCOT_H_PLUS
25
+ File.open(filename, 'wb') do |out|
26
+ dta_files.zip(index) do |dta, i_ar|
27
+ chrg = dta.charge
28
+ out.print "BEGIN IONS\n"
29
+ out.print "TITLE=#{[base_name, *i_ar].push('dta').join('.')}\n"
30
+ out.print "CHARGE=#{chrg}+\n"
31
+ out.print "PEPMASS=#{(dta.mh+((chrg-1)*h_plus))/chrg}\n"
32
+ peak_ar = dta.peaks.unpack('e*')
33
+ (0...(peak_ar.size)).step(2) do |i|
34
+ out.print( peak_ar[i,2].join(' '), "\n")
35
+ end
36
+ out.print "END IONS\n"
37
+ out.print "\n"
38
+ end
39
+ end
40
+ end
41
+
42
+ # not given an out_folder, will make one with the basename
43
+ # compress may be: :zip, :tgz, or nil (no compression)
44
+ # :zip requires gem rubyzip to be installed and is *very* bloated
45
+ # as it writes out all the files first!
46
+ # :tgz requires gem archive-tar-minitar to be installed
47
+ def to_dta_files(out_folder=nil, compress=nil)
48
+ outdir =
49
+ if out_folder ; out_folder
50
+ else base_name
51
+ end
52
+
53
+ case compress
54
+ when :tgz
55
+ begin
56
+ require 'archive/tar/minitar'
57
+ rescue LoadError
58
+ abort "need gem 'archive-tar-minitar' installed' for tgz compression!\n#{$!}"
59
+ end
60
+ require 'archive/targz' # my own simplified interface!
61
+ require 'zlib'
62
+ names = index.map do |i_ar|
63
+ [outdir, '/', [base_name, *i_ar].join('.'), '.dta'].join('')
64
+ end
65
+ #Archive::Targz.archive_as_files(outdir + '.tgz', names, dta_file_data)
66
+
67
+ tgz = Zlib::GzipWriter.new(File.open(outdir + '.tgz', 'wb'))
68
+
69
+ Archive::Tar::Minitar::Output.open(tgz) do |outp|
70
+ dta_files.each_with_index do |dta_file, i|
71
+ Archive::Tar::Minitar.pack_as_file(names[i], dta_file.to_dta_file_data, outp)
72
+ end
73
+ end
74
+ when :zip
75
+ begin
76
+ require 'zip/zipfilesystem'
77
+ rescue LoadError
78
+ abort "need gem 'rubyzip' installed' for zip compression!\n#{$!}"
79
+ end
80
+ #begin ; require 'zip/zipfilesystem' ; rescue LoadError, "need gem 'rubyzip' installed' for zip compression!\n#{$!}" ; end
81
+ Zip::ZipFile.open(outdir + ".zip", Zip::ZipFile::CREATE) do |zfs|
82
+ dta_files.zip(index) do |dta,i_ar|
83
+ #zfs.mkdir(outdir)
84
+ zfs.get_output_stream(outdir + '/' + [base_name, *i_ar].join('.') + '.dta') do |out|
85
+ dta.write_dta_file(out)
86
+ #zfs.commit
87
+ end
88
+ end
89
+ end
90
+ else # no compression
91
+ FileUtils.mkpath(outdir)
92
+ Dir.chdir(outdir) do
93
+ dta_files.zip(index) do |dta,i_ar|
94
+ File.open([base_name, *i_ar].join('.') << '.dta', 'wb') do |out|
95
+ dta.write_dta_file(out)
96
+ end
97
+ end
98
+ end
99
+ end
100
+ end
101
+
102
+ # Ms::Sequest::Srf::SrfToSearch::task converts to MS formats for DB
103
+ # searching
104
+ #
105
+ # outputs the appropriate file or directory structure for <file>.srf:
106
+ # <file>.mgf # file for mgf
107
+ # <file> # the basename directory for dta
108
+ class SrfToSearch < Tap::Task
109
+ config :format, "mgf", :short => 'f' # mgf|dta (default: mgf)
110
+ def process(srf_file)
111
+ base = srf_file.sub(/\.srf$/i, '')
112
+ newfile =
113
+ case format
114
+ when 'dta'
115
+ base
116
+ when 'mgf'
117
+ base << '.' << format
118
+ end
119
+ srf = Ms::Sequest::Srf.new(srf_file, :link_protein_hits => false, :filter_by_precursor_mass_tolerance => false )
120
+ # options just speed up reading since we don't need .out info anyway
121
+ case format
122
+ when 'mgf'
123
+ srf.to_mgf(newfile)
124
+ when 'dta'
125
+ srf.to_dta_files(newfile)
126
+ end
127
+ end
128
+ end
129
+
130
+
131
+ end # Srf
132
+ end # Sequest
133
+ end # Ms
134
+
135
+
@@ -1,4 +1,5 @@
1
-
1
+ require 'tap/task'
2
+ require 'configurable'
2
3
  require 'ms/sequest'
3
4
  require 'ms/sequest/srf'
4
5
  require 'ms/sequest/sqt'
@@ -93,7 +94,7 @@ module Ms
93
94
 
94
95
  if opt[:db_info]
95
96
  if File.exist?(db_filename)
96
- reply = Ms::Sequest::Sqt.get_db_info(db_filename)
97
+ reply = Ms::Sequest::Sqt.db_info(db_filename)
97
98
  %w(DBSeqLength DBLocusCount DBMD5Sum).zip(reply) do |label,val|
98
99
  hh[label] = val
99
100
  end
@@ -164,6 +165,27 @@ module Ms
164
165
  end # close the filehandle
165
166
  end # method
166
167
 
168
+ # SrfToSqt::task convert .srf to .sqt files
169
+ class SrfToSqt < Tap::Task
170
+ config :db_info, false, :short => 'd', &c.flag # calculates num aa's and md5sum on db
171
+ # if your database path has changed
172
+ # and you want db-info, then give the
173
+ # path to the new *directory*
174
+ # e.g. /my/new/path
175
+ config :db_path, nil, :short => 'p'
176
+ config :db_update, false, :short => 'u', &c.flag # update the sqt file to reflect --db_path
177
+ config :no_filter, false, :short => 'n', &c.flag # by default, pephit must be within peptide_mass_tolerance (defined in sequest.params) to be included. Turns this off.
178
+ config :round, false, :short => 'r', &c.flag # round floating point values reasonably
179
+
180
+ def process(srf_file)
181
+ new_filename = srf_file.sub(/\.srf$/i, '') << '.sqt'
182
+
183
+ srf = Ms::Sequest::Srf.new(srf_file, :link_protein_hits => false, :filter_by_precursor_mass_tolerance => !no_filter)
184
+
185
+ srf.to_sqt(new_filename, :db_info => db_info, :new_db_path => db_path, :update_db_path => db_update, :round => round)
186
+
187
+ end # process
188
+ end # SrfToSqt
167
189
  end # Srf
168
190
  end # Sequest
169
191
  end # Ms
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ms-sequest
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - John Prince
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-05-14 00:00:00 -06:00
12
+ date: 2009-06-18 00:00:00 -06:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -32,10 +32,31 @@ dependencies:
32
32
  - !ruby/object:Gem::Version
33
33
  version: 0.0.1
34
34
  version:
35
- description:
35
+ - !ruby/object:Gem::Dependency
36
+ name: tap
37
+ type: :runtime
38
+ version_requirement:
39
+ version_requirements: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ version: 0.17.1
44
+ version:
45
+ - !ruby/object:Gem::Dependency
46
+ name: ms-fasta
47
+ type: :runtime
48
+ version_requirement:
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: 0.2.3
54
+ version:
55
+ description: reads .SRF, .SQT and supports conversions
36
56
  email: jtprince@gmail.com
37
- executables: []
38
-
57
+ executables:
58
+ - srf_to_sqt.rb
59
+ - srf_to_search.rb
39
60
  extensions: []
40
61
 
41
62
  extra_rdoc_files:
@@ -44,6 +65,7 @@ extra_rdoc_files:
44
65
  - History
45
66
  files:
46
67
  - lib/ms/sequest/params.rb
68
+ - lib/ms/sequest/srf/search.rb
47
69
  - lib/ms/sequest/srf/sqt.rb
48
70
  - lib/ms/sequest/srf.rb
49
71
  - lib/ms/sequest/sqt.rb