ms-xcalibur 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ Copyright (c) 2006-2008, Regents of the University of Colorado.
2
+ Developer:: Simon Chiang, Biomolecular Structure Program, Hansen Lab
3
+ Support:: CU Denver School of Medicine Deans Academic Enrichment Fund
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this
6
+ software and associated documentation files (the "Software"), to deal in the Software
7
+ without restriction, including without limitation the rights to use, copy, modify, merge,
8
+ publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
9
+ to whom the Software is furnished to do so, subject to the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be included in all copies or
12
+ substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
18
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
19
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21
+ OTHER DEALINGS IN THE SOFTWARE.
data/README ADDED
@@ -0,0 +1,22 @@
1
+ = {Ms-Xcalibur}[http://mspire.rubyforge.org/projects/ms-xcalibur]
2
+
3
+ An {Mspire}[http://mspire.rubyforge.org] library supporting {Xcalibur}[http://www.thermo.com/com/cda/product/detail/1,,1000001009250,00.html].
4
+
5
+ == Description
6
+
7
+ * Lighthouse[http://bahuvrihi.lighthouseapp.com/projects/16692-mspire/tickets]
8
+ * Github[http://github.com/bahuvrihi/ms-xcalibur/tree/master]
9
+ * {Google Group}[http://groups.google.com/group/mspire-forum]
10
+
11
+ == Installation
12
+
13
+ Ms-Xcalibur is available as a gem on RubyForge[http://rubyforge.org/projects/mspire]. Use:
14
+
15
+ % gem install ms-xcalibur
16
+
17
+ == Info
18
+
19
+ Copyright (c) 2006-2008, Regents of the University of Colorado.
20
+ Developer:: {Simon Chiang}[http://bahuvrihi.wordpress.com], {Biomolecular Structure Program}[http://biomol.uchsc.edu/], {Hansen Lab}[http://hsc-proteomics.uchsc.edu/hansenlab/]
21
+ Support:: CU Denver School of Medicine Deans Academic Enrichment Fund
22
+ Licence:: {MIT-Style}[link:files/MIT-LICENSE.html]
@@ -0,0 +1,88 @@
1
+ require 'constants'
2
+
3
+ module Ms
4
+ module Xcalibur
5
+ module Convert
6
+ # :startdoc::manifest convert dta files to mgf format
7
+ # Converts a set of .dta files (Sequest format) into an .mgf (Mascot format)
8
+ # file. The conversion is straightforward.
9
+ #
10
+ # dta format:
11
+ # [input_file.dta]
12
+ # 353.128 1
13
+ # 85.354 2.2
14
+ # 87.302 2.8
15
+ # ...
16
+ #
17
+ # mgf format:
18
+ # [output_file.mgf]
19
+ # BEGIN IONS
20
+ # TITLE=input_file
21
+ # CHARGE=1
22
+ # PEPMASS=<calculated>
23
+ # 85.354 2.2
24
+ # 87.302 2.8
25
+ # ...
26
+ # END IONS
27
+ #
28
+ # The first line of the dta file specifies the M+H (mh) and charge state (z) of
29
+ # the precursor ion. To convert this to PEPMASS, use (mh + (z-1) * H)/ z) where
30
+ # H is the mass of a proton, ie hydrogen - electron. The mass of a proton is
31
+ # calculated from the {constants}[bioactive.rubyforge.org/constants] gem to be
32
+ # ~ 1.007276 Da
33
+ #
34
+ class DtaToMgf < Tap::FileTask
35
+ include Constants::Libraries
36
+
37
+ # Returns the unrounded mass of a proton (H - e) as calculated
38
+ # from the {constants}[bioactive.rubyforge.org/constants] gem.
39
+ config :proton_mass, Element['H'].mass - Particle['Electron'].mass, &c.num_or_nil # allows specification of an alternate proton mass
40
+
41
+ def process(output_file, *inputs)
42
+ return output_file if inputs.empty?
43
+
44
+ dta_files = inputs.collect do |file|
45
+ if File.directory?(file)
46
+ Dir.glob(File.expand_path(File.join(file, "*.dta")))
47
+ else
48
+ raise "Not a .dta file: #{file}" unless file =~ /\.(dta)$/
49
+ file
50
+ end
51
+ end
52
+
53
+ prepare(output_file)
54
+ File.open(output_file, "wb") do |target|
55
+ h = proton_mass
56
+
57
+ dta_files.flatten.each do |file|
58
+ #log_basename(:merging, file)
59
+ lines = File.read(file).split(/\r?\n/)
60
+
61
+ # get the mh and z
62
+ mh, z = lines.shift.split(/\s+/)
63
+ mh = mh.to_f
64
+ z = z.to_i
65
+
66
+ # add a trailing empty line
67
+ lines << ""
68
+
69
+ # make the output
70
+ target << %Q{BEGIN IONS
71
+ TITLE=#{File.basename(file)}
72
+ CHARGE=#{z}+
73
+ PEPMASS=#{(mh + (z-1) * h)/ z}
74
+ #{lines.join("\n")}
75
+ END IONS
76
+
77
+ }
78
+ end
79
+ end
80
+ log(:made, output_file)
81
+
82
+ output_file
83
+ end
84
+ end
85
+
86
+ end
87
+ end
88
+ end
@@ -0,0 +1,143 @@
1
+ module Ms
2
+ module Xcalibur
3
+ module Convert
4
+ # :startdoc::manifest convert RAW files to dta format
5
+ # Converts a .RAW file to dta files using extract_msn.exe
6
+ #
7
+ # extract_msn.exe is an Xcalibur/BioWorks tool that extracts spectra from .RAW
8
+ # files into .dta (Sequest) format and must be installed for RawToDta to work.
9
+ # RawToDta was developed against extract_msn version 4.0. You can check if
10
+ # extract_msn is installed at the default location, as well as determine the
11
+ # version of your executable using:
12
+ #
13
+ # % tap run -- xcalibur/convert/raw_to_dta --extract_msn_help
14
+ #
15
+ class RawToDta < Tap::FileTask
16
+ config :extract_msn, 'C:\Xcalibur\System\Programs\extract_msn.exe' # the full path to the extract_msn executable
17
+ config :first_scan, nil, &c.integer_or_nil # (-F)
18
+ config :last_scan, nil, &c.integer_or_nil # (-L)
19
+ config :lower_MW, nil, &c.num_or_nil # (-B)
20
+ config :upper_MW, nil, &c.num_or_nil # (-T)
21
+ config :precursor_mass_tol, 1.4, &c.num # (-M)
22
+ config :num_allowed_intermediate_scans_for_grouping, 1, &c.integer # (-S)
23
+ config :charge_state, nil, &c.integer_or_nil # (-C)
24
+ config :num_required_group_scans, 1, &c.integer_or_nil # (-G)
25
+ config :num_ions_required, 0, &c.integer_or_nil # (-I)
26
+ config :intensity_threshold, nil, &c.integer_or_nil # (-E)
27
+ config :use_unified_search_file, nil, &c.flag # (-U)
28
+ config :subsequence, nil # (-Y)
29
+ config :write_zta_files, nil, &c.flag # (-Z)
30
+ config :perform_charge_calculations, nil, &c.flag # (-K)
31
+ config :template_file, nil # (-O)
32
+ config :options_string, nil # (-A)
33
+ config :minimum_signal_to_noise, 3, &c.num # (-R)
34
+ config :minimum_number_of_peaks, 5, &c.integer # (-r)
35
+
36
+ config_attr(:extract_msn_help, nil, :arg_type => :flag) do |value| # Print the extract_msn help
37
+ if value
38
+ sh(extract_msn)
39
+ exit
40
+ end
41
+ end
42
+
43
+ CONFIG_MAP = [
44
+ [:first_scan, 'F'],
45
+ [:last_scan, 'L'],
46
+ [:lower_MW, 'B'],
47
+ [:upper_MW, 'T'],
48
+ [:precursor_mass_tol, 'M'],
49
+ [:num_allowed_intermediate_scans_for_grouping, 'S'],
50
+ [:charge_state, 'C'],
51
+ [:num_required_group_scans, 'G'],
52
+ [:num_ions_required, 'I'],
53
+ [:output_path, 'D'],
54
+ [:intensity_threshold, 'E'],
55
+ [:use_unified_search_file, 'U'],
56
+ [:subsequence, 'Y'],
57
+ [:write_zta_files, 'Z'],
58
+ [:perform_charge_calculations, 'K'],
59
+ [:template_file, 'O'],
60
+ [:options_string, 'A'],
61
+ [:minimum_signal_to_noise, 'R'],
62
+ [:minimum_number_of_peaks, 'r']
63
+ ]
64
+
65
+ # Expands the input path and converts all forward slashes (/)
66
+ # to backslashes (\) to make it into a Windows-style path.
67
+ def normalize(path)
68
+ File.expand_path(path).gsub(/\//, "\\")
69
+ end
70
+
71
+ # Formats command options for extract_msn.exe using the current configuration.
72
+ # Configurations are mapped to their single-letter keys using CONFIG_MAP.
73
+ #
74
+ # A default output_dir can be specified for when config[:output_path] is not
75
+ # specified.
76
+ def cmd_options(output_dir=nil)
77
+ options = CONFIG_MAP.collect do |key, flag|
78
+ value = (flag == "D" ? output_dir : config[key])
79
+ next unless value
80
+
81
+ # formatting consists of stringifying the value argument, or
82
+ # in escaping the value if the arguement is a path
83
+ formatted_value = case key
84
+ when :use_unified_search_file, :perform_charge_calculations, :write_zta_files
85
+ "" # no argument
86
+ when :output_path, :template_file
87
+ # path argument, escape
88
+ "\"#{normalize value}\""
89
+ else
90
+ # number or string, simply stringify
91
+ value.to_s
92
+ end
93
+
94
+ "-#{flag}#{formatted_value}"
95
+ end
96
+
97
+ options.compact.join(" ")
98
+ end
99
+
100
+ # Formats the extract_msn.exe command using the specified input_file,
101
+ # and the current configuration. A default output directory can be
102
+ # specified using output_dir; it will not override a configured output
103
+ # directory.
104
+ #
105
+ # Note that output_dir should be an EXISTING filepath or relative
106
+ # filepath. execute_msn.exe will not generate .dta files if the
107
+ # output_dir doesn't exist.
108
+ def cmd(input_file, output_dir=nil)
109
+ args = []
110
+ args << "\"#{normalize extract_msn}\""
111
+ args << cmd_options(output_dir)
112
+ args << "\"#{normalize input_file}\""
113
+
114
+ args.join(' ')
115
+ end
116
+
117
+ def process(input_file, output_dir=nil)
118
+ extname = File.extname(input_file)
119
+ raise "Expected .RAW file: #{input_file}" unless extname =~ /\.RAW$/i
120
+
121
+ # Target the output to a directory with the same basename
122
+ # as the raw file, unless otherwise specified.
123
+ output_dir = input_file.chomp(File.extname(input_file)) if output_dir == nil
124
+
125
+ mkdir(output_dir)
126
+ command = cmd(input_file, output_dir)
127
+
128
+ log :sh, command
129
+ if app.quiet
130
+ capture_sh(command, true)
131
+ else
132
+ sh(command)
133
+ puts "" # add extra line to make logging nice
134
+ end
135
+
136
+ # This may select additional .dta files that existed before raw_to_dta
137
+ # TODO - maybe read lcq_dta for files?
138
+ Dir.glob( File.expand_path(File.join(output_dir, "*.dta")) )
139
+ end
140
+ end
141
+ end
142
+ end
143
+ end
@@ -0,0 +1,86 @@
1
+ require 'ms/xcalibur/convert/raw_to_dta'
2
+ require 'ms/xcalibur/convert/dta_to_mgf'
3
+
4
+ module Ms
5
+ module Xcalibur
6
+ module Convert
7
+ # :startdoc::manifest convert RAW files to mgf format
8
+ # Extracts spectra from a .RAW file and formats them as mgf (Mascot
9
+ # Generic Format). RawToMgf is a workflow that uses the RawToDta
10
+ # and DtaToMgf tasks, and can be configured through these tasks
11
+ # using the following configuration files:
12
+ #
13
+ # config/xcalibur/convert
14
+ # |- raw_to_mgf.yml # configures RawToMgf
15
+ # `- raw_to_mgf
16
+ # |- raw_to_dta.yml # configures RawToDta
17
+ # `- dta_to_mgf.yml # configures DtaToMgf
18
+ #
19
+ # Mgf files are named after the RAW file they represent; the group
20
+ # merge file is named 'merge.mgf' although an alternate merge file
21
+ # name can be specified in the options.
22
+ #
23
+ class RawToMgf < Tap::Task
24
+
25
+ define :raw_to_dta, Xcalibur::Convert::RawToDta
26
+ define :dta_to_mgf, Xcalibur::Convert::DtaToMgf
27
+ define :cleanup do |raw_dir|
28
+ log :rm, raw_dir
29
+
30
+ # take this stepwise to be a little safer...
31
+ FileUtils.rm Dir.glob(raw_dir + "/*.dta")
32
+ FileUtils.rm ["#{raw_dir }/lcq_dta.txt", "#{raw_dir }/lcq_profile.txt"]
33
+ FileUtils.rmdir raw_dir
34
+ end
35
+
36
+ config :merge_file, 'merge.mgf' # the group merge file
37
+ config :merge_individual, true, &c.switch # merge the dta's for each RAW file
38
+ config :merge_group, true, &c.switch # merge the dta's for all RAW files
39
+ config :remove_dta_files, true, &c.switch # clean up dta files upon completion
40
+
41
+ def workflow
42
+ group_results = []
43
+ raw_to_dta.on_complete do |_result|
44
+ if merge_individual
45
+ input_file = _result._original[0]
46
+ output_file = File.join( File.dirname(merge_file), File.basename(input_file).chomp(File.extname(input_file)) + ".mgf")
47
+ dta_to_mgf.execute(output_file, *_result._iterate)
48
+ end
49
+
50
+ # collect _results to determine when all the input
51
+ # files have been processed by raw_to_dta
52
+ group_results << _result
53
+
54
+ # When all the input files have been converted, merge the
55
+ # group and enque a task to cleanup the dta files, as specified.
56
+ if group_results.length == @n_inputs
57
+ if merge_group
58
+ all_results = group_results.collect {|_result| _result._iterate }.flatten
59
+ dta_to_mgf.execute(merge_file, *all_results)
60
+ end
61
+ end
62
+ end
63
+ end
64
+
65
+ def process(*input_files)
66
+ @n_inputs = input_files.length
67
+
68
+ dta_dirs = []
69
+ input_files.each do |input_file|
70
+ dta_dir = File.basename(input_file).chomp(File.extname(input_file))
71
+ dta_dirs << dta_dir
72
+ raw_to_dta.execute(input_file, dta_dir)
73
+ end
74
+
75
+ if remove_dta_files
76
+ dta_dirs.each {|dir| cleanup.process(dir) }
77
+ end
78
+
79
+ @n_inputs = nil
80
+ nil
81
+ end
82
+
83
+ end
84
+ end
85
+ end
86
+ end
@@ -0,0 +1,92 @@
1
+ module Ms
2
+ module Xcalibur
3
+ # A simple representation of a peak file exported from Xcalibur Qual
4
+ # Browser (v 2.0). The expected format of a peak file is as shown below:
5
+ #
6
+ # [peak_file.txt]
7
+ # SPECTRUM - MS
8
+ # GSE_T29K_080703143635.raw
9
+ # ITMS + c ESI Full ms [300.00-2000.00]
10
+ # Scan #: 11
11
+ # RT: 0.07
12
+ # Data points: 1490
13
+ # Mass Intensity
14
+ # 300.516479 2000.0
15
+ # 301.392487 1000.0
16
+ # 302.465759 3000.0
17
+ # ...
18
+ #
19
+ # Any headers matching the pattern 'key: value' will be parsed as a
20
+ # header, while other lines (ex: SPECTRUM - MS) are parsed into the
21
+ # description.
22
+ #
23
+ class PeakFile
24
+
25
+ class << self
26
+
27
+ # Parses the input string into a PeakFile
28
+ def parse(str)
29
+ peak_file = PeakFile.new
30
+ mode = :header
31
+ str.each_line do |line|
32
+ case mode
33
+ when :header
34
+
35
+ case line
36
+ when /^(.*?): (.*)$/
37
+ peak_file.headers[$1] = $2.strip
38
+ when /Mass\sIntensity/
39
+ mode = :data
40
+ else
41
+ peak_file.desc << line.strip
42
+ end
43
+
44
+ when :data
45
+ peak_file.data << line.split(/\s/).collect {|mz| mz.to_f }
46
+ end
47
+ end
48
+
49
+ peak_file
50
+ end
51
+ end
52
+
53
+ # The order of headers observed in export files
54
+ HEADER_ORDER = [
55
+ "Scan #",
56
+ "RT",
57
+ "Mass defect",
58
+ "Data points"
59
+ ]
60
+
61
+ # An array of description lines
62
+ attr_accessor :desc
63
+
64
+ # A hash of headers
65
+ attr_accessor :headers
66
+
67
+ # An array of (mz, intensity) values
68
+ attr_accessor :data
69
+
70
+ def initialize(desc=[], headers={}, data=[])
71
+ @desc = desc
72
+ @headers = headers
73
+ @data = data
74
+ end
75
+
76
+ # Recreates the peak file
77
+ def to_s(sep="\r\n")
78
+ lines = desc +
79
+ HEADER_ORDER.collect do |key|
80
+ next nil unless headers.has_key?(key)
81
+ "#{key}: #{headers[key]}"
82
+ end.compact +
83
+ ["Mass\tIntensity"] +
84
+ data.collect do |point|
85
+ point.join("\t")
86
+ end
87
+
88
+ lines.join(sep) + sep
89
+ end
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,65 @@
1
+ require 'ms/xcalibur/peak_file'
2
+
3
+ module Ms
4
+ module Xcalibur
5
+ # :startdoc::manifest adds graph data to an exported peak file
6
+ # Peakify adds points to signify the relative intensity
7
+ # (ie the rounded intensity/max_intensity) of peaks in
8
+ # an exported peak list. This can be useful as a visual aid.
9
+ #
10
+ # [a_sample_result.txt]
11
+ # SPECTRUM - MS
12
+ # GSE_T29K_080703143635.raw
13
+ # ITMS + c ESI Full ms [300.00-2000.00]
14
+ # Scan #: 11
15
+ # RT: 0.07
16
+ # Data points: 1490
17
+ # Mass Intensity
18
+ # 300.516479 2000.0 .................................
19
+ # 301.392487 1000.0 .................
20
+ # 302.465759 3000.0 ..................................................
21
+ # ...
22
+ #
23
+ # Options can be specified to filter out points within a
24
+ # range of relative intensities. Peakify can handle exported
25
+ # peak lists from Xcalibur Qual Browser (v 2.0).
26
+ #
27
+ class Peakify < Tap::FileTask
28
+
29
+ config :point_char, '.' # a character used for each intensity point
30
+ config :min, 0, &c.num # min relative intenisty
31
+ config :max, 100, &c.num # max relative intenisty
32
+ config :sort, false, &c.flag # sort by intensity
33
+
34
+ def process(source, target=basepath(source, 'peaks.txt'))
35
+ prepare(target)
36
+
37
+ # now perform the task...
38
+ peak_file = PeakFile.parse File.read(source)
39
+ max_intensity = peak_file.data.inject(0) do |max, (mz, intensity)|
40
+ intensity > max ? intensity : max
41
+ end
42
+
43
+ range = min..max
44
+ peak_file.data = peak_file.data.collect do |(mz, intensity)|
45
+ percent = (intensity / max_intensity * 100)
46
+ next unless range.include?(percent)
47
+
48
+ [mz, intensity, point_char * percent.round]
49
+ end.compact
50
+
51
+ if sort
52
+ peak_file.data = peak_file.data.sort_by do |(mz, intensity)|
53
+ intensity
54
+ end.reverse
55
+ end
56
+
57
+ File.open(target, "wb") do |file|
58
+ file << peak_file.to_s
59
+ end
60
+
61
+ target
62
+ end
63
+ end
64
+ end
65
+ end
data/tap.yml ADDED
File without changes
metadata ADDED
@@ -0,0 +1,80 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ms-xcalibur
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Simon Chiang
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2008-12-03 00:00:00 -07:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: tap
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0.11"
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: constants
27
+ type: :runtime
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: "0.1"
34
+ version:
35
+ description:
36
+ email: simon.a.chiang@gmail.com
37
+ executables: []
38
+
39
+ extensions: []
40
+
41
+ extra_rdoc_files:
42
+ - README
43
+ - MIT-LICENSE
44
+ files:
45
+ - lib/ms/xcalibur/convert/dta_to_mgf.rb
46
+ - lib/ms/xcalibur/convert/raw_to_dta.rb
47
+ - lib/ms/xcalibur/convert/raw_to_mgf.rb
48
+ - lib/ms/xcalibur/peak_file.rb
49
+ - lib/ms/xcalibur/peakify.rb
50
+ - tap.yml
51
+ - README
52
+ - MIT-LICENSE
53
+ has_rdoc: true
54
+ homepage: http://mspire.rubyforge.org/projects/ms-xcalibur/
55
+ post_install_message:
56
+ rdoc_options: []
57
+
58
+ require_paths:
59
+ - lib
60
+ required_ruby_version: !ruby/object:Gem::Requirement
61
+ requirements:
62
+ - - ">="
63
+ - !ruby/object:Gem::Version
64
+ version: "0"
65
+ version:
66
+ required_rubygems_version: !ruby/object:Gem::Requirement
67
+ requirements:
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ version: "0"
71
+ version:
72
+ requirements: []
73
+
74
+ rubyforge_project: mspire
75
+ rubygems_version: 1.3.1
76
+ signing_key:
77
+ specification_version: 2
78
+ summary: An Mspire library supporting Xcalibur.
79
+ test_files: []
80
+