ms-xcalibur 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,21 @@
1
+ Copyright (c) 2006-2008, Regents of the University of Colorado.
2
+ Developer:: Simon Chiang, Biomolecular Structure Program, Hansen Lab
3
+ Support:: CU Denver School of Medicine Deans Academic Enrichment Fund
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this
6
+ software and associated documentation files (the "Software"), to deal in the Software
7
+ without restriction, including without limitation the rights to use, copy, modify, merge,
8
+ publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
9
+ to whom the Software is furnished to do so, subject to the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be included in all copies or
12
+ substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
18
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
19
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21
+ OTHER DEALINGS IN THE SOFTWARE.
data/README ADDED
@@ -0,0 +1,22 @@
1
+ = {Ms-Xcalibur}[http://mspire.rubyforge.org/projects/ms-xcalibur]
2
+
3
+ An {Mspire}[http://mspire.rubyforge.org] library supporting {Xcalibur}[http://www.thermo.com/com/cda/product/detail/1,,1000001009250,00.html].
4
+
5
+ == Description
6
+
7
+ * Lighthouse[http://bahuvrihi.lighthouseapp.com/projects/16692-mspire/tickets]
8
+ * Github[http://github.com/bahuvrihi/ms-xcalibur/tree/master]
9
+ * {Google Group}[http://groups.google.com/group/mspire-forum]
10
+
11
+ == Installation
12
+
13
+ Ms-Xcalibur is available as a gem on RubyForge[http://rubyforge.org/projects/mspire]. Use:
14
+
15
+ % gem install ms-xcalibur
16
+
17
+ == Info
18
+
19
+ Copyright (c) 2006-2008, Regents of the University of Colorado.
20
+ Developer:: {Simon Chiang}[http://bahuvrihi.wordpress.com], {Biomolecular Structure Program}[http://biomol.uchsc.edu/], {Hansen Lab}[http://hsc-proteomics.uchsc.edu/hansenlab/]
21
+ Support:: CU Denver School of Medicine Deans Academic Enrichment Fund
22
+ Licence:: {MIT-Style}[link:files/MIT-LICENSE.html]
@@ -0,0 +1,88 @@
1
+ require 'constants'
2
+
3
+ module Ms
4
+ module Xcalibur
5
+ module Convert
6
+ # :startdoc::manifest convert dta files to mgf format
7
+ # Converts a set of .dta files (Sequest format) into an .mgf (Mascot format)
8
+ # file. The conversion is straightforward.
9
+ #
10
+ # dta format:
11
+ # [input_file.dta]
12
+ # 353.128 1
13
+ # 85.354 2.2
14
+ # 87.302 2.8
15
+ # ...
16
+ #
17
+ # mgf format:
18
+ # [output_file.mgf]
19
+ # BEGIN IONS
20
+ # TITLE=input_file
21
+ # CHARGE=1
22
+ # PEPMASS=<calculated>
23
+ # 85.354 2.2
24
+ # 87.302 2.8
25
+ # ...
26
+ # END IONS
27
+ #
28
+ # The first line of the dta file specifies the M+H (mh) and charge state (z) of
29
+ # the precursor ion. To convert this to PEPMASS, use (mh + (z-1) * H)/ z) where
30
+ # H is the mass of a proton, ie hydrogen - electron. The mass of a proton is
31
+ # calculated from the {constants}[bioactive.rubyforge.org/constants] gem to be
32
+ # ~ 1.007276 Da
33
+ #
34
+ class DtaToMgf < Tap::FileTask
35
+ include Constants::Libraries
36
+
37
+ # Returns the unrounded mass of a proton (H - e) as calculated
38
+ # from the {constants}[bioactive.rubyforge.org/constants] gem.
39
+ config :proton_mass, Element['H'].mass - Particle['Electron'].mass, &c.num_or_nil # allows specification of an alternate proton mass
40
+
41
+ def process(output_file, *inputs)
42
+ return output_file if inputs.empty?
43
+
44
+ dta_files = inputs.collect do |file|
45
+ if File.directory?(file)
46
+ Dir.glob(File.expand_path(File.join(file, "*.dta")))
47
+ else
48
+ raise "Not a .dta file: #{file}" unless file =~ /\.(dta)$/
49
+ file
50
+ end
51
+ end
52
+
53
+ prepare(output_file)
54
+ File.open(output_file, "wb") do |target|
55
+ h = proton_mass
56
+
57
+ dta_files.flatten.each do |file|
58
+ #log_basename(:merging, file)
59
+ lines = File.read(file).split(/\r?\n/)
60
+
61
+ # get the mh and z
62
+ mh, z = lines.shift.split(/\s+/)
63
+ mh = mh.to_f
64
+ z = z.to_i
65
+
66
+ # add a trailing empty line
67
+ lines << ""
68
+
69
+ # make the output
70
+ target << %Q{BEGIN IONS
71
+ TITLE=#{File.basename(file)}
72
+ CHARGE=#{z}+
73
+ PEPMASS=#{(mh + (z-1) * h)/ z}
74
+ #{lines.join("\n")}
75
+ END IONS
76
+
77
+ }
78
+ end
79
+ end
80
+ log(:made, output_file)
81
+
82
+ output_file
83
+ end
84
+ end
85
+
86
+ end
87
+ end
88
+ end
@@ -0,0 +1,143 @@
1
+ module Ms
2
+ module Xcalibur
3
+ module Convert
4
+ # :startdoc::manifest convert RAW files to dta format
5
+ # Converts a .RAW file to dta files using extract_msn.exe
6
+ #
7
+ # extract_msn.exe is an Xcalibur/BioWorks tool that extracts spectra from .RAW
8
+ # files into .dta (Sequest) format and must be installed for RawToDta to work.
9
+ # RawToDta was developed against extract_msn version 4.0. You can check if
10
+ # extract_msn is installed at the default location, as well as determine the
11
+ # version of your executable using:
12
+ #
13
+ # % tap run -- xcalibur/convert/raw_to_dta --extract_msn_help
14
+ #
15
+ class RawToDta < Tap::FileTask
16
+ config :extract_msn, 'C:\Xcalibur\System\Programs\extract_msn.exe' # the full path to the extract_msn executable
17
+ config :first_scan, nil, &c.integer_or_nil # (-F)
18
+ config :last_scan, nil, &c.integer_or_nil # (-L)
19
+ config :lower_MW, nil, &c.num_or_nil # (-B)
20
+ config :upper_MW, nil, &c.num_or_nil # (-T)
21
+ config :precursor_mass_tol, 1.4, &c.num # (-M)
22
+ config :num_allowed_intermediate_scans_for_grouping, 1, &c.integer # (-S)
23
+ config :charge_state, nil, &c.integer_or_nil # (-C)
24
+ config :num_required_group_scans, 1, &c.integer_or_nil # (-G)
25
+ config :num_ions_required, 0, &c.integer_or_nil # (-I)
26
+ config :intensity_threshold, nil, &c.integer_or_nil # (-E)
27
+ config :use_unified_search_file, nil, &c.flag # (-U)
28
+ config :subsequence, nil # (-Y)
29
+ config :write_zta_files, nil, &c.flag # (-Z)
30
+ config :perform_charge_calculations, nil, &c.flag # (-K)
31
+ config :template_file, nil # (-O)
32
+ config :options_string, nil # (-A)
33
+ config :minimum_signal_to_noise, 3, &c.num # (-R)
34
+ config :minimum_number_of_peaks, 5, &c.integer # (-r)
35
+
36
+ config_attr(:extract_msn_help, nil, :arg_type => :flag) do |value| # Print the extract_msn help
37
+ if value
38
+ sh(extract_msn)
39
+ exit
40
+ end
41
+ end
42
+
43
+ CONFIG_MAP = [
44
+ [:first_scan, 'F'],
45
+ [:last_scan, 'L'],
46
+ [:lower_MW, 'B'],
47
+ [:upper_MW, 'T'],
48
+ [:precursor_mass_tol, 'M'],
49
+ [:num_allowed_intermediate_scans_for_grouping, 'S'],
50
+ [:charge_state, 'C'],
51
+ [:num_required_group_scans, 'G'],
52
+ [:num_ions_required, 'I'],
53
+ [:output_path, 'D'],
54
+ [:intensity_threshold, 'E'],
55
+ [:use_unified_search_file, 'U'],
56
+ [:subsequence, 'Y'],
57
+ [:write_zta_files, 'Z'],
58
+ [:perform_charge_calculations, 'K'],
59
+ [:template_file, 'O'],
60
+ [:options_string, 'A'],
61
+ [:minimum_signal_to_noise, 'R'],
62
+ [:minimum_number_of_peaks, 'r']
63
+ ]
64
+
65
+ # Expands the input path and converts all forward slashes (/)
66
+ # to backslashes (\) to make it into a Windows-style path.
67
+ def normalize(path)
68
+ File.expand_path(path).gsub(/\//, "\\")
69
+ end
70
+
71
+ # Formats command options for extract_msn.exe using the current configuration.
72
+ # Configurations are mapped to their single-letter keys using CONFIG_MAP.
73
+ #
74
+ # A default output_dir can be specified for when config[:output_path] is not
75
+ # specified.
76
+ def cmd_options(output_dir=nil)
77
+ options = CONFIG_MAP.collect do |key, flag|
78
+ value = (flag == "D" ? output_dir : config[key])
79
+ next unless value
80
+
81
+ # formatting consists of stringifying the value argument, or
82
+ # in escaping the value if the arguement is a path
83
+ formatted_value = case key
84
+ when :use_unified_search_file, :perform_charge_calculations, :write_zta_files
85
+ "" # no argument
86
+ when :output_path, :template_file
87
+ # path argument, escape
88
+ "\"#{normalize value}\""
89
+ else
90
+ # number or string, simply stringify
91
+ value.to_s
92
+ end
93
+
94
+ "-#{flag}#{formatted_value}"
95
+ end
96
+
97
+ options.compact.join(" ")
98
+ end
99
+
100
+ # Formats the extract_msn.exe command using the specified input_file,
101
+ # and the current configuration. A default output directory can be
102
+ # specified using output_dir; it will not override a configured output
103
+ # directory.
104
+ #
105
+ # Note that output_dir should be an EXISTING filepath or relative
106
+ # filepath. execute_msn.exe will not generate .dta files if the
107
+ # output_dir doesn't exist.
108
+ def cmd(input_file, output_dir=nil)
109
+ args = []
110
+ args << "\"#{normalize extract_msn}\""
111
+ args << cmd_options(output_dir)
112
+ args << "\"#{normalize input_file}\""
113
+
114
+ args.join(' ')
115
+ end
116
+
117
+ def process(input_file, output_dir=nil)
118
+ extname = File.extname(input_file)
119
+ raise "Expected .RAW file: #{input_file}" unless extname =~ /\.RAW$/i
120
+
121
+ # Target the output to a directory with the same basename
122
+ # as the raw file, unless otherwise specified.
123
+ output_dir = input_file.chomp(File.extname(input_file)) if output_dir == nil
124
+
125
+ mkdir(output_dir)
126
+ command = cmd(input_file, output_dir)
127
+
128
+ log :sh, command
129
+ if app.quiet
130
+ capture_sh(command, true)
131
+ else
132
+ sh(command)
133
+ puts "" # add extra line to make logging nice
134
+ end
135
+
136
+ # This may select additional .dta files that existed before raw_to_dta
137
+ # TODO - maybe read lcq_dta for files?
138
+ Dir.glob( File.expand_path(File.join(output_dir, "*.dta")) )
139
+ end
140
+ end
141
+ end
142
+ end
143
+ end
@@ -0,0 +1,86 @@
1
+ require 'ms/xcalibur/convert/raw_to_dta'
2
+ require 'ms/xcalibur/convert/dta_to_mgf'
3
+
4
+ module Ms
5
+ module Xcalibur
6
+ module Convert
7
+ # :startdoc::manifest convert RAW files to mgf format
8
+ # Extracts spectra from a .RAW file and formats them as mgf (Mascot
9
+ # Generic Format). RawToMgf is a workflow that uses the RawToDta
10
+ # and DtaToMgf tasks, and can be configured through these tasks
11
+ # using the following configuration files:
12
+ #
13
+ # config/xcalibur/convert
14
+ # |- raw_to_mgf.yml # configures RawToMgf
15
+ # `- raw_to_mgf
16
+ # |- raw_to_dta.yml # configures RawToDta
17
+ # `- dta_to_mgf.yml # configures DtaToMgf
18
+ #
19
+ # Mgf files are named after the RAW file they represent; the group
20
+ # merge file is named 'merge.mgf' although an alternate merge file
21
+ # name can be specified in the options.
22
+ #
23
+ class RawToMgf < Tap::Task
24
+
25
+ define :raw_to_dta, Xcalibur::Convert::RawToDta
26
+ define :dta_to_mgf, Xcalibur::Convert::DtaToMgf
27
+ define :cleanup do |raw_dir|
28
+ log :rm, raw_dir
29
+
30
+ # take this stepwise to be a little safer...
31
+ FileUtils.rm Dir.glob(raw_dir + "/*.dta")
32
+ FileUtils.rm ["#{raw_dir }/lcq_dta.txt", "#{raw_dir }/lcq_profile.txt"]
33
+ FileUtils.rmdir raw_dir
34
+ end
35
+
36
+ config :merge_file, 'merge.mgf' # the group merge file
37
+ config :merge_individual, true, &c.switch # merge the dta's for each RAW file
38
+ config :merge_group, true, &c.switch # merge the dta's for all RAW files
39
+ config :remove_dta_files, true, &c.switch # clean up dta files upon completion
40
+
41
+ def workflow
42
+ group_results = []
43
+ raw_to_dta.on_complete do |_result|
44
+ if merge_individual
45
+ input_file = _result._original[0]
46
+ output_file = File.join( File.dirname(merge_file), File.basename(input_file).chomp(File.extname(input_file)) + ".mgf")
47
+ dta_to_mgf.execute(output_file, *_result._iterate)
48
+ end
49
+
50
+ # collect _results to determine when all the input
51
+ # files have been processed by raw_to_dta
52
+ group_results << _result
53
+
54
+ # When all the input files have been converted, merge the
55
+ # group and enque a task to cleanup the dta files, as specified.
56
+ if group_results.length == @n_inputs
57
+ if merge_group
58
+ all_results = group_results.collect {|_result| _result._iterate }.flatten
59
+ dta_to_mgf.execute(merge_file, *all_results)
60
+ end
61
+ end
62
+ end
63
+ end
64
+
65
+ def process(*input_files)
66
+ @n_inputs = input_files.length
67
+
68
+ dta_dirs = []
69
+ input_files.each do |input_file|
70
+ dta_dir = File.basename(input_file).chomp(File.extname(input_file))
71
+ dta_dirs << dta_dir
72
+ raw_to_dta.execute(input_file, dta_dir)
73
+ end
74
+
75
+ if remove_dta_files
76
+ dta_dirs.each {|dir| cleanup.process(dir) }
77
+ end
78
+
79
+ @n_inputs = nil
80
+ nil
81
+ end
82
+
83
+ end
84
+ end
85
+ end
86
+ end
@@ -0,0 +1,92 @@
1
+ module Ms
2
+ module Xcalibur
3
+ # A simple representation of a peak file exported from Xcalibur Qual
4
+ # Browser (v 2.0). The expected format of a peak file is as shown below:
5
+ #
6
+ # [peak_file.txt]
7
+ # SPECTRUM - MS
8
+ # GSE_T29K_080703143635.raw
9
+ # ITMS + c ESI Full ms [300.00-2000.00]
10
+ # Scan #: 11
11
+ # RT: 0.07
12
+ # Data points: 1490
13
+ # Mass Intensity
14
+ # 300.516479 2000.0
15
+ # 301.392487 1000.0
16
+ # 302.465759 3000.0
17
+ # ...
18
+ #
19
+ # Any headers matching the pattern 'key: value' will be parsed as a
20
+ # header, while other lines (ex: SPECTRUM - MS) are parsed into the
21
+ # description.
22
+ #
23
+ class PeakFile
24
+
25
+ class << self
26
+
27
+ # Parses the input string into a PeakFile
28
+ def parse(str)
29
+ peak_file = PeakFile.new
30
+ mode = :header
31
+ str.each_line do |line|
32
+ case mode
33
+ when :header
34
+
35
+ case line
36
+ when /^(.*?): (.*)$/
37
+ peak_file.headers[$1] = $2.strip
38
+ when /Mass\sIntensity/
39
+ mode = :data
40
+ else
41
+ peak_file.desc << line.strip
42
+ end
43
+
44
+ when :data
45
+ peak_file.data << line.split(/\s/).collect {|mz| mz.to_f }
46
+ end
47
+ end
48
+
49
+ peak_file
50
+ end
51
+ end
52
+
53
+ # The order of headers observed in export files
54
+ HEADER_ORDER = [
55
+ "Scan #",
56
+ "RT",
57
+ "Mass defect",
58
+ "Data points"
59
+ ]
60
+
61
+ # An array of description lines
62
+ attr_accessor :desc
63
+
64
+ # A hash of headers
65
+ attr_accessor :headers
66
+
67
+ # An array of (mz, intensity) values
68
+ attr_accessor :data
69
+
70
+ def initialize(desc=[], headers={}, data=[])
71
+ @desc = desc
72
+ @headers = headers
73
+ @data = data
74
+ end
75
+
76
+ # Recreates the peak file
77
+ def to_s(sep="\r\n")
78
+ lines = desc +
79
+ HEADER_ORDER.collect do |key|
80
+ next nil unless headers.has_key?(key)
81
+ "#{key}: #{headers[key]}"
82
+ end.compact +
83
+ ["Mass\tIntensity"] +
84
+ data.collect do |point|
85
+ point.join("\t")
86
+ end
87
+
88
+ lines.join(sep) + sep
89
+ end
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,65 @@
1
+ require 'ms/xcalibur/peak_file'
2
+
3
+ module Ms
4
+ module Xcalibur
5
+ # :startdoc::manifest adds graph data to an exported peak file
6
+ # Peakify adds points to signify the relative intensity
7
+ # (ie the rounded intensity/max_intensity) of peaks in
8
+ # an exported peak list. This can be useful as a visual aid.
9
+ #
10
+ # [a_sample_result.txt]
11
+ # SPECTRUM - MS
12
+ # GSE_T29K_080703143635.raw
13
+ # ITMS + c ESI Full ms [300.00-2000.00]
14
+ # Scan #: 11
15
+ # RT: 0.07
16
+ # Data points: 1490
17
+ # Mass Intensity
18
+ # 300.516479 2000.0 .................................
19
+ # 301.392487 1000.0 .................
20
+ # 302.465759 3000.0 ..................................................
21
+ # ...
22
+ #
23
+ # Options can be specified to filter out points within a
24
+ # range of relative intensities. Peakify can handle exported
25
+ # peak lists from Xcalibur Qual Browser (v 2.0).
26
+ #
27
+ class Peakify < Tap::FileTask
28
+
29
+ config :point_char, '.' # a character used for each intensity point
30
+ config :min, 0, &c.num # min relative intenisty
31
+ config :max, 100, &c.num # max relative intenisty
32
+ config :sort, false, &c.flag # sort by intensity
33
+
34
+ def process(source, target=basepath(source, 'peaks.txt'))
35
+ prepare(target)
36
+
37
+ # now perform the task...
38
+ peak_file = PeakFile.parse File.read(source)
39
+ max_intensity = peak_file.data.inject(0) do |max, (mz, intensity)|
40
+ intensity > max ? intensity : max
41
+ end
42
+
43
+ range = min..max
44
+ peak_file.data = peak_file.data.collect do |(mz, intensity)|
45
+ percent = (intensity / max_intensity * 100)
46
+ next unless range.include?(percent)
47
+
48
+ [mz, intensity, point_char * percent.round]
49
+ end.compact
50
+
51
+ if sort
52
+ peak_file.data = peak_file.data.sort_by do |(mz, intensity)|
53
+ intensity
54
+ end.reverse
55
+ end
56
+
57
+ File.open(target, "wb") do |file|
58
+ file << peak_file.to_s
59
+ end
60
+
61
+ target
62
+ end
63
+ end
64
+ end
65
+ end
data/tap.yml ADDED
File without changes
metadata ADDED
@@ -0,0 +1,80 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ms-xcalibur
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Simon Chiang
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2008-12-03 00:00:00 -07:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: tap
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0.11"
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: constants
27
+ type: :runtime
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: "0.1"
34
+ version:
35
+ description:
36
+ email: simon.a.chiang@gmail.com
37
+ executables: []
38
+
39
+ extensions: []
40
+
41
+ extra_rdoc_files:
42
+ - README
43
+ - MIT-LICENSE
44
+ files:
45
+ - lib/ms/xcalibur/convert/dta_to_mgf.rb
46
+ - lib/ms/xcalibur/convert/raw_to_dta.rb
47
+ - lib/ms/xcalibur/convert/raw_to_mgf.rb
48
+ - lib/ms/xcalibur/peak_file.rb
49
+ - lib/ms/xcalibur/peakify.rb
50
+ - tap.yml
51
+ - README
52
+ - MIT-LICENSE
53
+ has_rdoc: true
54
+ homepage: http://mspire.rubyforge.org/projects/ms-xcalibur/
55
+ post_install_message:
56
+ rdoc_options: []
57
+
58
+ require_paths:
59
+ - lib
60
+ required_ruby_version: !ruby/object:Gem::Requirement
61
+ requirements:
62
+ - - ">="
63
+ - !ruby/object:Gem::Version
64
+ version: "0"
65
+ version:
66
+ required_rubygems_version: !ruby/object:Gem::Requirement
67
+ requirements:
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ version: "0"
71
+ version:
72
+ requirements: []
73
+
74
+ rubyforge_project: mspire
75
+ rubygems_version: 1.3.1
76
+ signing_key:
77
+ specification_version: 2
78
+ summary: An Mspire library supporting Xcalibur.
79
+ test_files: []
80
+