bahuvrihi-xcalibur 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,21 @@
1
+ Copyright (c) 2006-2008, Regents of the University of Colorado.
2
+ Developer:: Simon Chiang, Biomolecular Structure Program, Hansen Lab
3
+ Support:: CU Denver School of Medicine Deans Academic Enrichment Fund
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this
6
+ software and associated documentation files (the "Software"), to deal in the Software
7
+ without restriction, including without limitation the rights to use, copy, modify, merge,
8
+ publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
9
+ to whom the Software is furnished to do so, subject to the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be included in all copies or
12
+ substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
18
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
19
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21
+ OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,86 @@
1
+ require 'constants'
2
+
3
+ module Xcalibur
4
+ module Convert
5
+ # :startdoc::manifest convert dta files to mgf format
6
+ # Converts a set of .dta files (Sequest format) into an .mgf (Mascot format)
7
+ # file. The conversion is straightforward.
8
+ #
9
+ # dta format:
10
+ # [input_file.dta]
11
+ # 353.128 1
12
+ # 85.354 2.2
13
+ # 87.302 2.8
14
+ # ...
15
+ #
16
+ # mgf format:
17
+ # [output_file.mgf]
18
+ # BEGIN IONS
19
+ # TITLE=input_file
20
+ # CHARGE=1
21
+ # PEPMASS=<calculated>
22
+ # 85.354 2.2
23
+ # 87.302 2.8
24
+ # ...
25
+ # END IONS
26
+ #
27
+ # The first line of the dta file specifies the M+H (mh) and charge state (z) of
28
+ # the precursor ion. To convert this to PEPMASS, use (mh + (z-1) * H)/ z) where
29
+ # H is the mass of a proton, ie hydrogen - electron. The mass of a proton is
30
+ # calculated from the {constants}[bioactive.rubyforge.org/constants] gem to be
31
+ # ~ 1.007276 Da
32
+ #
33
+ class DtaToMgf < Tap::FileTask
34
+ include Constants::Libraries
35
+
36
+ # Returns the unrounded mass of a proton (H - e) as calculated
37
+ # from the {constants}[bioactive.rubyforge.org/constants] gem.
38
+ config :proton_mass, Element['H'].mass - Particle['Electron'].mass, &c.num_or_nil # allows specification of an alternate proton mass
39
+
40
+ def process(output_file, *inputs)
41
+ return output_file if inputs.empty?
42
+
43
+ dta_files = inputs.collect do |file|
44
+ if File.directory?(file)
45
+ Dir.glob(File.expand_path(File.join(file, "*.dta")))
46
+ else
47
+ raise "Not a .dta file: #{file}" unless file =~ /\.(dta)$/
48
+ file
49
+ end
50
+ end
51
+
52
+ prepare(output_file)
53
+ File.open(output_file, "wb") do |target|
54
+ h = proton_mass
55
+
56
+ dta_files.flatten.each do |file|
57
+ #log_basename(:merging, file)
58
+ lines = File.read(file).split(/\r?\n/)
59
+
60
+ # get the mh and z
61
+ mh, z = lines.shift.split(/\s+/)
62
+ mh = mh.to_f
63
+ z = z.to_i
64
+
65
+ # add a trailing empty line
66
+ lines << ""
67
+
68
+ # make the output
69
+ target << %Q{BEGIN IONS
70
+ TITLE=#{File.basename(file)}
71
+ CHARGE=#{z}+
72
+ PEPMASS=#{(mh + (z-1) * h)/ z}
73
+ #{lines.join("\n")}
74
+ END IONS
75
+
76
+ }
77
+ end
78
+ end
79
+ log(:made, output_file)
80
+
81
+ output_file
82
+ end
83
+
84
+ end
85
+ end
86
+ end
@@ -0,0 +1,142 @@
1
+ module Xcalibur
2
+ module Convert
3
+ # :startdoc::manifest convert RAW files to dta format
4
+ # Converts a .RAW file to dta files using extract_msn.exe
5
+ #
6
+ # extract_msn.exe is an Xcalibur/BioWorks tool that extracts spectra from .RAW
7
+ # files into .dta (Sequest) format and must be installed for RawToDta to work.
8
+ # RawToDta was developed against extract_msn version 4.0. You can check if
9
+ # extract_msn is installed at the default location, as well as determine the
10
+ # version of your executable using:
11
+ #
12
+ # % tap run -- xcalibur/convert/raw_to_dta --extract_msn_help
13
+ #
14
+ class RawToDta < Tap::FileTask
15
+ config :extract_msn, 'C:\Xcalibur\System\Programs\extract_msn.exe' # the full path to the extract_msn executable
16
+ config :first_scan, nil, &c.integer_or_nil # (-F)
17
+ config :last_scan, nil, &c.integer_or_nil # (-L)
18
+ config :lower_MW, nil, &c.num_or_nil # (-B)
19
+ config :upper_MW, nil, &c.num_or_nil # (-T)
20
+ config :precursor_mass_tol, 1.4, &c.num # (-M)
21
+ config :num_allowed_intermediate_scans_for_grouping, 1, &c.integer # (-S)
22
+ config :charge_state, nil, &c.integer_or_nil # (-C)
23
+ config :num_required_group_scans, 1, &c.integer_or_nil # (-G)
24
+ config :num_ions_required, 0, &c.integer_or_nil # (-I)
25
+ config :intensity_threshold, nil, &c.integer_or_nil # (-E)
26
+ config :use_unified_search_file, nil, &c.flag # (-U)
27
+ config :subsequence, nil # (-Y)
28
+ config :write_zta_files, nil, &c.flag # (-Z)
29
+ config :perform_charge_calculations, nil, &c.flag # (-K)
30
+ config :template_file, nil # (-O)
31
+ config :options_string, nil # (-A)
32
+ config :minimum_signal_to_noise, 3, &c.num # (-R)
33
+ config :minimum_number_of_peaks, 5, &c.integer # (-r)
34
+
35
+ config_attr(:extract_msn_help, nil, :arg_type => :flag) do |value| # Print the extract_msn help
36
+ if value
37
+ sh(extract_msn)
38
+ exit
39
+ end
40
+ end
41
+
42
+ CONFIG_MAP = [
43
+ [:first_scan, 'F'],
44
+ [:last_scan, 'L'],
45
+ [:lower_MW, 'B'],
46
+ [:upper_MW, 'T'],
47
+ [:precursor_mass_tol, 'M'],
48
+ [:num_allowed_intermediate_scans_for_grouping, 'S'],
49
+ [:charge_state, 'C'],
50
+ [:num_required_group_scans, 'G'],
51
+ [:num_ions_required, 'I'],
52
+ [:output_path, 'D'],
53
+ [:intensity_threshold, 'E'],
54
+ [:use_unified_search_file, 'U'],
55
+ [:subsequence, 'Y'],
56
+ [:write_zta_files, 'Z'],
57
+ [:perform_charge_calculations, 'K'],
58
+ [:template_file, 'O'],
59
+ [:options_string, 'A'],
60
+ [:minimum_signal_to_noise, 'R'],
61
+ [:minimum_number_of_peaks, 'r']
62
+ ]
63
+
64
+ # Expands the input path and converts all forward slashes (/)
65
+ # to backslashes (\) to make it into a Windows-style path.
66
+ def normalize(path)
67
+ File.expand_path(path).gsub(/\//, "\\")
68
+ end
69
+
70
+ # Formats command options for extract_msn.exe using the current configuration.
71
+ # Configurations are mapped to their single-letter keys using CONFIG_MAP.
72
+ #
73
+ # A default output_dir can be specified for when config[:output_path] is not
74
+ # specified.
75
+ def cmd_options(output_dir=nil)
76
+ options = CONFIG_MAP.collect do |key, flag|
77
+ value = (flag == "D" ? output_dir : config[key])
78
+ next unless value
79
+
80
+ # formatting consists of stringifying the value argument, or
81
+ # in escaping the value if the arguement is a path
82
+ formatted_value = case key
83
+ when :use_unified_search_file, :perform_charge_calculations, :write_zta_files
84
+ "" # no argument
85
+ when :output_path, :template_file
86
+ # path argument, escape
87
+ "\"#{normalize value}\""
88
+ else
89
+ # number or string, simply stringify
90
+ value.to_s
91
+ end
92
+
93
+ "-#{flag}#{formatted_value}"
94
+ end
95
+
96
+ options.compact.join(" ")
97
+ end
98
+
99
+ # Formats the extract_msn.exe command using the specified input_file,
100
+ # and the current configuration. A default output directory can be
101
+ # specified using output_dir; it will not override a configured output
102
+ # directory.
103
+ #
104
+ # Note that output_dir should be an EXISTING filepath or relative
105
+ # filepath. execute_msn.exe will not generate .dta files if the
106
+ # output_dir doesn't exist.
107
+ def cmd(input_file, output_dir=nil)
108
+ args = []
109
+ args << "\"#{normalize extract_msn}\""
110
+ args << cmd_options(output_dir)
111
+ args << "\"#{normalize input_file}\""
112
+
113
+ args.join(' ')
114
+ end
115
+
116
+ def process(input_file, output_dir=nil)
117
+ extname = File.extname(input_file)
118
+ raise "Expected .RAW file: #{input_file}" unless extname =~ /\.RAW$/i
119
+
120
+ # Target the output to a directory with the same basename
121
+ # as the raw file, unless otherwise specified.
122
+ output_dir = input_file.chomp(File.extname(input_file)) if output_dir == nil
123
+
124
+ mkdir(output_dir)
125
+ command = cmd(input_file, output_dir)
126
+
127
+ log :sh, command
128
+ if app.quiet
129
+ capture_sh(command, true)
130
+ else
131
+ sh(command)
132
+ puts "" # add extra line to make logging nice
133
+ end
134
+
135
+ # This may select additional .dta files that existed before raw_to_dta
136
+ # TODO - maybe read lcq_dta for files?
137
+ Dir.glob( File.expand_path(File.join(output_dir, "*.dta")) )
138
+ end
139
+
140
+ end
141
+ end
142
+ end
@@ -0,0 +1,85 @@
1
+ require 'xcalibur/convert/raw_to_dta'
2
+ require 'xcalibur/convert/dta_to_mgf'
3
+
4
+ module Xcalibur
5
+ module Convert
6
+ # :startdoc::manifest convert RAW files to mgf format
7
+ # Extracts spectra from a .RAW file and formats them as mgf (Mascot
8
+ # Generic Format). RawToMgf is a workflow that uses the RawToDta
9
+ # and DtaToMgf tasks, and can be configured through these tasks
10
+ # using the following configuration files:
11
+ #
12
+ # config/xcalibur/convert
13
+ # |- raw_to_mgf.yml # configures RawToMgf
14
+ # `- raw_to_mgf
15
+ # |- raw_to_dta.yml # configures RawToDta
16
+ # `- dta_to_mgf.yml # configures DtaToMgf
17
+ #
18
+ # Mgf files are named after the RAW file they represent; the group
19
+ # merge file is named 'merge.mgf' although an alternate merge file
20
+ # name can be specified in the options.
21
+ #
22
+ class RawToMgf < Tap::Workflow
23
+
24
+ config :merge_file, 'merge.mgf' # the group merge file
25
+ config :merge_individual, true, &c.switch # merge the dta's for each RAW file
26
+ config :merge_group, true, &c.switch # merge the dta's for all RAW files
27
+ config :remove_dta_files, true, &c.switch # clean up dta files upon completion
28
+
29
+ def workflow
30
+ # Define the workflow entry and exit points,
31
+ # as well as the workflow logic.
32
+
33
+ raw_to_dta = Xcalibur::Convert::RawToDta.new
34
+ dta_to_mgf = Xcalibur::Convert::DtaToMgf.new
35
+
36
+ dta_dirs = []
37
+ n_inputs = nil
38
+ self.entry_point = Tap::Task.new do |task, *input_files|
39
+ n_inputs = input_files.length
40
+ input_files.each do |input_file|
41
+ dta_dir = File.basename(input_file).chomp(File.extname(input_file))
42
+ dta_dirs << dta_dir
43
+ raw_to_dta.enq(input_file, dta_dir)
44
+ end
45
+ end
46
+
47
+ group_results = []
48
+ raw_to_dta.on_complete do |_result|
49
+ if merge_individual
50
+ input_file = _result._original[0]
51
+ output_file = File.join( File.dirname(merge_file), File.basename(input_file).chomp(File.extname(input_file)) + ".mgf")
52
+ dta_to_mgf.enq(output_file, *_result._expand)
53
+ end
54
+
55
+ # collect _results to determine when all the input
56
+ # files have been processed by raw_to_dta
57
+ group_results << _result
58
+
59
+ # When all the input files have been converted, merge the
60
+ # group and enque a task to cleanup the dta files, as specified.
61
+ if group_results.length == n_inputs
62
+ if merge_group
63
+ all_results = group_results.collect {|_result| _result._expand }.flatten
64
+ dta_to_mgf.enq(merge_file, *all_results)
65
+ end
66
+
67
+ if remove_dta_files
68
+ cleanup = Tap::Task.new do |task, raw_dir|
69
+ task.log :rm, raw_dir
70
+
71
+ # take this stepwise to be a little safer...
72
+ FileUtils.rm Dir.glob(raw_dir + "/*.dta")
73
+ FileUtils.rm ["#{raw_dir }/lcq_dta.txt", "#{raw_dir }/lcq_profile.txt"]
74
+ FileUtils.rmdir raw_dir
75
+ end
76
+ dta_dirs.each {|dir| cleanup.enq(dir)}
77
+ end
78
+ end
79
+ end
80
+
81
+ self.exit_point = dta_to_mgf
82
+ end
83
+ end
84
+ end
85
+ end
@@ -0,0 +1,91 @@
1
+ module Xcalibur
2
+ # A simple representation of a peak file exported from Xcalibur Qual
3
+ # Browser (v 2.0). The expected format of a peak file is as shown below:
4
+ #
5
+ # [peak_file.txt]
6
+ # SPECTRUM - MS
7
+ # GSE_T29K_080703143635.raw
8
+ # ITMS + c ESI Full ms [300.00-2000.00]
9
+ # Scan #: 11
10
+ # RT: 0.07
11
+ # Data points: 1490
12
+ # Mass Intensity
13
+ # 300.516479 2000.0
14
+ # 301.392487 1000.0
15
+ # 302.465759 3000.0
16
+ # ...
17
+ #
18
+ # Any headers matching the pattern 'key: value' will be parsed as a
19
+ # header, while other lines (ex: SPECTRUM - MS) are parsed into the
20
+ # description.
21
+ #
22
+ class PeakFile
23
+
24
+ class << self
25
+
26
+ # Parses the input string into a PeakFile
27
+ def parse(str)
28
+ peak_file = PeakFile.new
29
+ mode = :header
30
+ str.each_line do |line|
31
+ case mode
32
+ when :header
33
+
34
+ case line
35
+ when /^(.*?): (.*)$/
36
+ peak_file.headers[$1] = $2
37
+ when /Mass\sIntensity/
38
+ mode = :data
39
+ else
40
+ peak_file.desc << line.strip
41
+ end
42
+
43
+ when :data
44
+ peak_file.data << line.split(/\s/).collect {|mz| mz.to_f }
45
+ end
46
+ end
47
+
48
+ peak_file
49
+ end
50
+ end
51
+
52
+ # The order of headers observed in export files
53
+ HEADER_ORDER = [
54
+ "Scan #",
55
+ "RT",
56
+ "Mass defect",
57
+ "Data points"
58
+ ]
59
+
60
+ # An array of description lines
61
+ attr_accessor :desc
62
+
63
+ # A hash of headers
64
+ attr_accessor :headers
65
+
66
+ # An array of (mz, intensity) values
67
+ attr_accessor :data
68
+
69
+ def initialize(desc=[], headers={}, data=[])
70
+ @desc = desc
71
+ @headers = headers
72
+ @data = data
73
+ end
74
+
75
+ # Recreates the peak file
76
+ def to_s(sep="\r\n")
77
+ lines = desc +
78
+ HEADER_ORDER.collect do |key|
79
+ next nil unless headers.has_key?(key)
80
+ "#{key}: #{headers[key]}"
81
+ end.compact +
82
+ ["Mass\tIntensity"] +
83
+ data.collect do |point|
84
+ point.join("\t")
85
+ end
86
+
87
+ lines.join(sep) + sep
88
+ end
89
+
90
+ end
91
+ end
@@ -0,0 +1,65 @@
1
+ require 'xcalibur/peak_file'
2
+
3
+ module Xcalibur
4
+ # :startdoc::manifest adds graph data to an exported peak file
5
+ # Peakify adds points to signify the relative intensity
6
+ # (ie the rounded intensity/max_intensity) of peaks in
7
+ # an exported peak list. This can be useful as a visual aid.
8
+ #
9
+ # [a_sample_result.txt]
10
+ # SPECTRUM - MS
11
+ # GSE_T29K_080703143635.raw
12
+ # ITMS + c ESI Full ms [300.00-2000.00]
13
+ # Scan #: 11
14
+ # RT: 0.07
15
+ # Data points: 1490
16
+ # Mass Intensity
17
+ # 300.516479 2000.0 .................................
18
+ # 301.392487 1000.0 .................
19
+ # 302.465759 3000.0 ..................................................
20
+ # ...
21
+ #
22
+ # Options can be specified to filter out points within a
23
+ # range of relative intensities. Peakify can handle exported
24
+ # peak lists from Xcalibur Qual Browser (v 2.0).
25
+ #
26
+ class Peakify < Tap::FileTask
27
+
28
+ config :point_char, '.' # a character used for each intensity point
29
+ config :min, 0, &c.num # min relative intenisty
30
+ config :max, 100, &c.num # max relative intenisty
31
+ config :sort, false, &c.flag # sort by intensity
32
+
33
+ def process(filepath)
34
+ target = app.filepath(:data, "peak_#{File.basename(filepath)}" )
35
+ prepare(target)
36
+
37
+ # now perform the task...
38
+ peak_file = PeakFile.parse File.read(filepath)
39
+ max_intensity = peak_file.data.inject(0) do |max, (mz, intensity)|
40
+ intensity > max ? intensity : max
41
+ end
42
+
43
+ range = min..max
44
+ peak_file.data = peak_file.data.collect do |(mz, intensity)|
45
+ percent = (intensity / max_intensity * 100)
46
+ next unless range.include?(percent)
47
+
48
+ [mz, intensity, point_char * percent.round]
49
+ end.compact
50
+
51
+ if sort
52
+ peak_file.data = peak_file.data.sort_by do |(mz, intensity)|
53
+ intensity
54
+ end.reverse
55
+ end
56
+
57
+ File.open(target, "wb") do |file|
58
+ file << peak_file.to_s
59
+ end
60
+
61
+ target
62
+ end
63
+
64
+ end
65
+ end
data/tap.yml ADDED
File without changes
@@ -0,0 +1,5 @@
1
+ $:.unshift File.join(File.dirname(__FILE__), '../lib')
2
+
3
+ # runs all subsets (see Tap::Test::SubsetMethods)
4
+ ENV["ALL"] = "true"
5
+ Dir.glob("./**/*_test.rb").each {|test| require test}
metadata ADDED
@@ -0,0 +1,76 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bahuvrihi-xcalibur
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Simon Chiang
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2008-07-08 00:00:00 -07:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: tap
17
+ version_requirement:
18
+ version_requirements: !ruby/object:Gem::Requirement
19
+ requirements:
20
+ - - ~>
21
+ - !ruby/object:Gem::Version
22
+ version: 0.10.0
23
+ version:
24
+ - !ruby/object:Gem::Dependency
25
+ name: constants
26
+ version_requirement:
27
+ version_requirements: !ruby/object:Gem::Requirement
28
+ requirements:
29
+ - - ~>
30
+ - !ruby/object:Gem::Version
31
+ version: 0.1.0
32
+ version:
33
+ description:
34
+ email: simon.a.chiang@gmail.com
35
+ executables: []
36
+
37
+ extensions: []
38
+
39
+ extra_rdoc_files:
40
+ - MIT-LICENSE
41
+ files:
42
+ - lib/xcalibur/convert/dta_to_mgf.rb
43
+ - lib/xcalibur/convert/raw_to_dta.rb
44
+ - lib/xcalibur/convert/raw_to_mgf.rb
45
+ - lib/xcalibur/peak_file.rb
46
+ - lib/xcalibur/peakify.rb
47
+ - tap.yml
48
+ - MIT-LICENSE
49
+ has_rdoc: true
50
+ homepage: http://hsc-proteomics.uchsc.edu/
51
+ post_install_message:
52
+ rdoc_options: []
53
+
54
+ require_paths:
55
+ - lib
56
+ required_ruby_version: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: "0"
61
+ version:
62
+ required_rubygems_version: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ version: "0"
67
+ version:
68
+ requirements: []
69
+
70
+ rubyforge_project:
71
+ rubygems_version: 1.2.0
72
+ signing_key:
73
+ specification_version: 2
74
+ summary: xcalibur task library
75
+ test_files:
76
+ - test/tap_test_suite.rb