ms-mascot 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History +9 -0
- data/lib/ms/mascot/dat.rb +16 -0
- data/lib/ms/mascot/dat/archive.rb +198 -0
- data/lib/ms/mascot/dat/header.rb +4 -0
- data/lib/ms/mascot/dat/index.rb +23 -0
- data/lib/ms/mascot/dat/masses.rb +4 -0
- data/lib/ms/mascot/dat/parameters.rb +4 -0
- data/lib/ms/mascot/dat/peptides.rb +4 -0
- data/lib/ms/mascot/dat/proteins.rb +4 -0
- data/lib/ms/mascot/dat/query.rb +12 -0
- data/lib/ms/mascot/dat/section.rb +86 -0
- data/lib/ms/mascot/dat/summary.rb +8 -0
- data/lib/ms/mascot/dat/summary/id.rb +54 -0
- data/lib/ms/mascot/export.rb +75 -10
- data/lib/ms/mascot/format_mgf.rb +54 -0
- data/lib/ms/mascot/fragment.rb +29 -25
- data/lib/ms/mascot/mgf.rb +35 -2
- data/lib/ms/mascot/mgf/entry.rb +23 -5
- data/lib/ms/mascot/spectrum.rb +18 -3
- data/lib/ms/mascot/submit.rb +120 -29
- data/tap.yml +0 -0
- metadata +29 -31
- data/cmd/generate_mgf.rb +0 -123
- data/cmd/generate_prospector_mgf.rb +0 -123
- data/cmd/reformat_mgf.rb +0 -90
- data/lib/ms/mascot/predict.rb +0 -94
data/tap.yml
ADDED
File without changes
|
metadata
CHANGED
@@ -1,15 +1,16 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ms-mascot
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Simon Chiang
|
8
|
+
- John Prince
|
8
9
|
autorequire:
|
9
10
|
bindir: bin
|
10
11
|
cert_chain: []
|
11
12
|
|
12
|
-
date:
|
13
|
+
date: 2009-02-24 00:00:00 -07:00
|
13
14
|
default_executable:
|
14
15
|
dependencies:
|
15
16
|
- !ruby/object:Gem::Dependency
|
@@ -20,7 +21,7 @@ dependencies:
|
|
20
21
|
requirements:
|
21
22
|
- - ">="
|
22
23
|
- !ruby/object:Gem::Version
|
23
|
-
version: "0.
|
24
|
+
version: "0.12"
|
24
25
|
version:
|
25
26
|
- !ruby/object:Gem::Dependency
|
26
27
|
name: tap-http
|
@@ -30,7 +31,7 @@ dependencies:
|
|
30
31
|
requirements:
|
31
32
|
- - ">="
|
32
33
|
- !ruby/object:Gem::Version
|
33
|
-
version: 0.2
|
34
|
+
version: 0.3.2
|
34
35
|
version:
|
35
36
|
- !ruby/object:Gem::Dependency
|
36
37
|
name: external
|
@@ -42,16 +43,6 @@ dependencies:
|
|
42
43
|
- !ruby/object:Gem::Version
|
43
44
|
version: 0.3.0
|
44
45
|
version:
|
45
|
-
- !ruby/object:Gem::Dependency
|
46
|
-
name: mspire
|
47
|
-
type: :runtime
|
48
|
-
version_requirement:
|
49
|
-
version_requirements: !ruby/object:Gem::Requirement
|
50
|
-
requirements:
|
51
|
-
- - ">="
|
52
|
-
- !ruby/object:Gem::Version
|
53
|
-
version: 0.5.0
|
54
|
-
version:
|
55
46
|
- !ruby/object:Gem::Dependency
|
56
47
|
name: ms-in_silico
|
57
48
|
type: :runtime
|
@@ -60,17 +51,7 @@ dependencies:
|
|
60
51
|
requirements:
|
61
52
|
- - ">="
|
62
53
|
- !ruby/object:Gem::Version
|
63
|
-
version: 0.2.
|
64
|
-
version:
|
65
|
-
- !ruby/object:Gem::Dependency
|
66
|
-
name: ms-testdata
|
67
|
-
type: :development
|
68
|
-
version_requirement:
|
69
|
-
version_requirements: !ruby/object:Gem::Requirement
|
70
|
-
requirements:
|
71
|
-
- - ">="
|
72
|
-
- !ruby/object:Gem::Version
|
73
|
-
version: 0.0.1
|
54
|
+
version: 0.2.2
|
74
55
|
version:
|
75
56
|
description:
|
76
57
|
email: simon.a.chiang@gmail.com
|
@@ -81,26 +62,43 @@ extensions: []
|
|
81
62
|
extra_rdoc_files:
|
82
63
|
- README
|
83
64
|
- MIT-LICENSE
|
65
|
+
- History
|
84
66
|
files:
|
85
|
-
- cmd/generate_mgf.rb
|
86
|
-
- cmd/generate_prospector_mgf.rb
|
87
|
-
- cmd/reformat_mgf.rb
|
88
67
|
- lib/ms/mascot.rb
|
68
|
+
- lib/ms/mascot/dat.rb
|
69
|
+
- lib/ms/mascot/dat/archive.rb
|
70
|
+
- lib/ms/mascot/dat/header.rb
|
71
|
+
- lib/ms/mascot/dat/index.rb
|
72
|
+
- lib/ms/mascot/dat/masses.rb
|
73
|
+
- lib/ms/mascot/dat/parameters.rb
|
74
|
+
- lib/ms/mascot/dat/peptides.rb
|
75
|
+
- lib/ms/mascot/dat/proteins.rb
|
76
|
+
- lib/ms/mascot/dat/query.rb
|
77
|
+
- lib/ms/mascot/dat/section.rb
|
78
|
+
- lib/ms/mascot/dat/summary.rb
|
79
|
+
- lib/ms/mascot/dat/summary/id.rb
|
89
80
|
- lib/ms/mascot/export.rb
|
81
|
+
- lib/ms/mascot/format_mgf.rb
|
90
82
|
- lib/ms/mascot/fragment.rb
|
91
83
|
- lib/ms/mascot/mgf.rb
|
92
84
|
- lib/ms/mascot/mgf/archive.rb
|
93
85
|
- lib/ms/mascot/mgf/entry.rb
|
94
|
-
- lib/ms/mascot/predict.rb
|
95
86
|
- lib/ms/mascot/spectrum.rb
|
96
87
|
- lib/ms/mascot/submit.rb
|
88
|
+
- tap.yml
|
97
89
|
- README
|
98
90
|
- MIT-LICENSE
|
91
|
+
- History
|
99
92
|
has_rdoc: true
|
100
93
|
homepage: http://mspire.rubyforge.org/projects/ms-mascot/
|
101
94
|
post_install_message:
|
102
|
-
rdoc_options:
|
103
|
-
|
95
|
+
rdoc_options:
|
96
|
+
- --main
|
97
|
+
- README
|
98
|
+
- -S
|
99
|
+
- -N
|
100
|
+
- --title
|
101
|
+
- Ms-Mascot
|
104
102
|
require_paths:
|
105
103
|
- lib
|
106
104
|
required_ruby_version: !ruby/object:Gem::Requirement
|
data/cmd/generate_mgf.rb
DELETED
@@ -1,123 +0,0 @@
|
|
1
|
-
# = Usage
|
2
|
-
# tap generate_mgf {options} protein_sequences
|
3
|
-
#
|
4
|
-
# When specifying the ions to include, alternate charge states can be
|
5
|
-
# specified using + and -, for example 'y++' or 'b-'. The available ion
|
6
|
-
# series are [a,b,c,x,y,z].
|
7
|
-
#
|
8
|
-
# = Description
|
9
|
-
# Digests, fragments, then formats the protein sequences into mgf files.
|
10
|
-
# Use the options to specify/modify digestion enzymes, as well as the
|
11
|
-
# type of ions to generate.
|
12
|
-
#
|
13
|
-
# = Information
|
14
|
-
#
|
15
|
-
# Copyright (c) 2006-2007, Regents of the University of Colorado.
|
16
|
-
# Developer:: Simon Chiang, Biomolecular Structure Program
|
17
|
-
# Homepage:: http://hsc-proteomics.uchsc.edu/hansen_lab
|
18
|
-
# Support:: CU Denver School of Medicine Deans Academic Enrichment Fund
|
19
|
-
#
|
20
|
-
|
21
|
-
require 'tap/script'
|
22
|
-
include Constants::Library
|
23
|
-
|
24
|
-
app = Tap::App.instance
|
25
|
-
|
26
|
-
#
|
27
|
-
# handle options
|
28
|
-
#
|
29
|
-
|
30
|
-
opts = Prospector::Digest.configurations.to_opts
|
31
|
-
opts += Mascot::Formats::Mgf::Print.configurations.to_opts
|
32
|
-
opts += [
|
33
|
-
['--charge', '-c', GetoptLong::REQUIRED_ARGUMENT, "Parent ion charge for mgf files. (default +1)"],
|
34
|
-
|
35
|
-
['--ions', '-i', GetoptLong::REQUIRED_ARGUMENT, "Comma-separated string of ion series to include. (default 'yb')"],
|
36
|
-
#['--enzyme_file', nil, GetoptLong::REQUIRED_ARGUMENT, "Specifes a Prospector-style enzyme config file."],
|
37
|
-
['--residue_precision', nil, GetoptLong::REQUIRED_ARGUMENT, "The precision of residues, ex 6 for 57.021464"],
|
38
|
-
['--help', '-h', GetoptLong::NO_ARGUMENT, "Print this help."],
|
39
|
-
['--debug', nil, GetoptLong::NO_ARGUMENT, "Specifes debug mode."]]
|
40
|
-
|
41
|
-
digest_config = {}
|
42
|
-
print_config = {}
|
43
|
-
series = "yb"
|
44
|
-
charge = 1
|
45
|
-
residue_precision = 6
|
46
|
-
|
47
|
-
Tap::Script.handle_options(*opts) do |opt, value|
|
48
|
-
case opt
|
49
|
-
when '--help'
|
50
|
-
puts Tap::Script.usage(__FILE__, "Usage", "Description", "Information", :keep_headers => false)
|
51
|
-
puts
|
52
|
-
puts Tap::Script.usage_options(opts)
|
53
|
-
exit
|
54
|
-
|
55
|
-
when '--debug'
|
56
|
-
app.options.debug = true
|
57
|
-
|
58
|
-
when '--ions'
|
59
|
-
series = value
|
60
|
-
|
61
|
-
when '--charge'
|
62
|
-
charge = value.to_i
|
63
|
-
|
64
|
-
when '--residue_precision'
|
65
|
-
residue_precision = value.to_i
|
66
|
-
|
67
|
-
else
|
68
|
-
key = Prospector::Digest.configurations.opt_map(opt)
|
69
|
-
digest_config[key] = YAML.load(value) if key
|
70
|
-
|
71
|
-
key = Mascot::Formats::Mgf::Print.configurations.opt_map(opt)
|
72
|
-
print_config[key] = YAML.load(value) if key
|
73
|
-
end
|
74
|
-
end
|
75
|
-
|
76
|
-
if ARGV.empty?
|
77
|
-
puts "no sequences specified"
|
78
|
-
exit
|
79
|
-
end
|
80
|
-
|
81
|
-
#
|
82
|
-
# add your script code here
|
83
|
-
#
|
84
|
-
series = series.scan(/\w\-*\+*/)
|
85
|
-
|
86
|
-
#loader = Prospector::LoadDigesters.new
|
87
|
-
#loader.enq(enzyme_file)
|
88
|
-
|
89
|
-
#
|
90
|
-
digest = Prospector::Digest.new(nil, digest_config)
|
91
|
-
|
92
|
-
#
|
93
|
-
n = Molecule[digest.nterm]
|
94
|
-
c = Molecule[digest.cterm]
|
95
|
-
|
96
|
-
fragment = Tap::Task.new do |task, polypeptides|
|
97
|
-
polypeptides.collect do |polypeptide, start_index, end_index|
|
98
|
-
task.log :fragment, polypeptide.sequence[0..10] + (polypeptide.sequence.length > 10 ? "..." : "")
|
99
|
-
|
100
|
-
f = Mascot::FragmentSpectrum.new(polypeptide.sequence, n, c, residue_precision)
|
101
|
-
|
102
|
-
headers = {
|
103
|
-
:title => polypeptide.sequence,
|
104
|
-
:charge => charge,
|
105
|
-
:pepmass => (f.mass(n) + f.ladder.last + f.mass(c) + charge * f.proton_mass)/charge
|
106
|
-
}
|
107
|
-
|
108
|
-
data = series.collect {|s| f.series(s)}.flatten.delete_if {|mass| mass < 0 }.sort
|
109
|
-
data = [data, Array.new(data.length, 1)].transpose
|
110
|
-
|
111
|
-
Mascot::Formats::Mgf::Entry.new(headers, data)
|
112
|
-
end
|
113
|
-
end
|
114
|
-
|
115
|
-
#
|
116
|
-
print = Mascot::Formats::Mgf::Print.new('generate_mgf', print_config)
|
117
|
-
|
118
|
-
# workflow
|
119
|
-
digest.enq(*ARGV)
|
120
|
-
ARGV.clear
|
121
|
-
|
122
|
-
app.sequence(digest, fragment, print)
|
123
|
-
app.run
|
@@ -1,123 +0,0 @@
|
|
1
|
-
# = Usage
|
2
|
-
# tap generate_mgf {options} protein_sequences
|
3
|
-
#
|
4
|
-
# When specifying the ions to include, alternate charge states can be
|
5
|
-
# specified using + and -, for example 'y++' or 'b-'. The available ion
|
6
|
-
# series are [a,b,c,x,y,z].
|
7
|
-
#
|
8
|
-
# = Description
|
9
|
-
# Digests, fragments, then formats the protein sequences into mgf files.
|
10
|
-
# Use the options to specify/modify digestion enzymes, as well as the
|
11
|
-
# type of ions to generate.
|
12
|
-
#
|
13
|
-
# = Information
|
14
|
-
#
|
15
|
-
# Copyright (c) 2006-2007, Regents of the University of Colorado.
|
16
|
-
# Developer:: Simon Chiang, Biomolecular Structure Program
|
17
|
-
# Homepage:: http://hsc-proteomics.uchsc.edu/hansen_lab
|
18
|
-
# Support:: CU Denver School of Medicine Deans Academic Enrichment Fund
|
19
|
-
#
|
20
|
-
|
21
|
-
require 'tap/script'
|
22
|
-
include Constants::Library
|
23
|
-
|
24
|
-
app = Tap::App.instance
|
25
|
-
|
26
|
-
#
|
27
|
-
# handle options
|
28
|
-
#
|
29
|
-
|
30
|
-
opts = Prospector::Digest.configurations.to_opts
|
31
|
-
opts += Mascot::Formats::Mgf::Print.configurations.to_opts
|
32
|
-
opts += [
|
33
|
-
['--charge', '-c', GetoptLong::REQUIRED_ARGUMENT, "Parent ion charge for mgf files. (default +1)"],
|
34
|
-
|
35
|
-
['--ions', '-i', GetoptLong::REQUIRED_ARGUMENT, "Comma-separated string of ion series to include. (default 'yb')"],
|
36
|
-
#['--enzyme_file', nil, GetoptLong::REQUIRED_ARGUMENT, "Specifes a Prospector-style enzyme config file."],
|
37
|
-
['--residue_precision', nil, GetoptLong::REQUIRED_ARGUMENT, "The precision of residues, ex 6 for 57.021464"],
|
38
|
-
['--help', '-h', GetoptLong::NO_ARGUMENT, "Print this help."],
|
39
|
-
['--debug', nil, GetoptLong::NO_ARGUMENT, "Specifes debug mode."]]
|
40
|
-
|
41
|
-
digest_config = {}
|
42
|
-
print_config = {}
|
43
|
-
series = "yb"
|
44
|
-
charge = 1
|
45
|
-
residue_precision = 6
|
46
|
-
|
47
|
-
Tap::Script.handle_options(*opts) do |opt, value|
|
48
|
-
case opt
|
49
|
-
when '--help'
|
50
|
-
puts Tap::Script.usage(__FILE__, "Usage", "Description", "Information", :keep_headers => false)
|
51
|
-
puts
|
52
|
-
puts Tap::Script.usage_options(opts)
|
53
|
-
exit
|
54
|
-
|
55
|
-
when '--debug'
|
56
|
-
app.options.debug = true
|
57
|
-
|
58
|
-
when '--ions'
|
59
|
-
series = value
|
60
|
-
|
61
|
-
when '--charge'
|
62
|
-
charge = value.to_i
|
63
|
-
|
64
|
-
when '--residue_precision'
|
65
|
-
residue_precision = value.to_i
|
66
|
-
|
67
|
-
else
|
68
|
-
key = Prospector::Digest.configurations.opt_map(opt)
|
69
|
-
digest_config[key] = YAML.load(value) if key
|
70
|
-
|
71
|
-
key = Mascot::Formats::Mgf::Print.configurations.opt_map(opt)
|
72
|
-
print_config[key] = YAML.load(value) if key
|
73
|
-
end
|
74
|
-
end
|
75
|
-
|
76
|
-
if ARGV.empty?
|
77
|
-
puts "no sequences specified"
|
78
|
-
exit
|
79
|
-
end
|
80
|
-
|
81
|
-
#
|
82
|
-
# add your script code here
|
83
|
-
#
|
84
|
-
series = series.scan(/\w\-*\+*/)
|
85
|
-
|
86
|
-
#loader = Prospector::LoadDigesters.new
|
87
|
-
#loader.enq(enzyme_file)
|
88
|
-
|
89
|
-
#
|
90
|
-
digest = Prospector::Digest.new(nil, digest_config)
|
91
|
-
|
92
|
-
#
|
93
|
-
n = Molecule[digest.nterm]
|
94
|
-
c = Molecule[digest.cterm]
|
95
|
-
|
96
|
-
fragment = Tap::Task.new do |task, polypeptides|
|
97
|
-
polypeptides.collect do |polypeptide, start_index, end_index|
|
98
|
-
task.log :fragment, polypeptide.sequence[0..10] + (polypeptide.sequence.length > 10 ? "..." : "")
|
99
|
-
|
100
|
-
f = Prospector::FragmentSpectrum.new(polypeptide.sequence, n, c)
|
101
|
-
|
102
|
-
headers = {
|
103
|
-
:title => polypeptide.sequence,
|
104
|
-
:charge => charge,
|
105
|
-
:pepmass => (n.mass + polypeptide.mass + c.mass + charge * (Molecule['H'].mass - Particle['Electron'].mass))/charge
|
106
|
-
}
|
107
|
-
|
108
|
-
data = series.collect {|s| f.series(s)}.flatten.delete_if {|mass| mass < 0 }.sort
|
109
|
-
data = [data, Array.new(data.length, 1)].transpose
|
110
|
-
|
111
|
-
Mascot::Formats::Mgf::Entry.new(headers, data)
|
112
|
-
end
|
113
|
-
end
|
114
|
-
|
115
|
-
#
|
116
|
-
print = Mascot::Formats::Mgf::Print.new('generate_mgf', print_config)
|
117
|
-
|
118
|
-
# workflow
|
119
|
-
digest.enq(*ARGV)
|
120
|
-
ARGV.clear
|
121
|
-
|
122
|
-
app.sequence(digest, fragment, print)
|
123
|
-
app.run
|
data/cmd/reformat_mgf.rb
DELETED
@@ -1,90 +0,0 @@
|
|
1
|
-
# = Usage
|
2
|
-
# tap reformat_mgf {options} MGF_FILES
|
3
|
-
#
|
4
|
-
# = Description
|
5
|
-
# Reformats mgf files to a standard output like:
|
6
|
-
#
|
7
|
-
# BEGIN IONS
|
8
|
-
# TITLE=7100401blank.190.190.2.dta
|
9
|
-
# CHARGE=2+
|
10
|
-
# PEPMASS=321.571138
|
11
|
-
# 100.266 2.0
|
12
|
-
# 111.323 2.5
|
13
|
-
# ...
|
14
|
-
# 496.110 3.3
|
15
|
-
# 601.206 1.3
|
16
|
-
# END IONS
|
17
|
-
#
|
18
|
-
# = Information
|
19
|
-
#
|
20
|
-
# Copyright (c) 2006-2007, Regents of the University of Colorado.
|
21
|
-
# Developer:: Simon Chiang, Biomolecular Structure Program
|
22
|
-
# Homepage:: http://hsc-proteomics.uchsc.edu/hansen_lab
|
23
|
-
# Support:: CU Denver School of Medicine Deans Academic Enrichment Fund
|
24
|
-
#
|
25
|
-
require 'tap/script'
|
26
|
-
|
27
|
-
app = Tap::App.instance
|
28
|
-
|
29
|
-
#
|
30
|
-
# handle options
|
31
|
-
#
|
32
|
-
|
33
|
-
opts = [
|
34
|
-
['--target_dir', '-t', GetoptLong::REQUIRED_ARGUMENT, "Specify an output directory."],
|
35
|
-
['--mz_precision', '-m', GetoptLong::REQUIRED_ARGUMENT, "Specify the mz precision."],
|
36
|
-
['--intensity_precision', '-i', GetoptLong::REQUIRED_ARGUMENT, "Specify the intensity precision."],
|
37
|
-
['--pepmass_precision', '-p', GetoptLong::REQUIRED_ARGUMENT, "Specify the peptide mass precision."],
|
38
|
-
['--headers', nil, GetoptLong::REQUIRED_ARGUMENT, "Specify the headers to include, separated by commas."],
|
39
|
-
['--help', '-h', GetoptLong::NO_ARGUMENT, "Print this help."],
|
40
|
-
['--debug', nil, GetoptLong::NO_ARGUMENT, "Specifies debug mode."]]
|
41
|
-
|
42
|
-
config = {:target_dir => 'reformatted'}
|
43
|
-
|
44
|
-
Tap::Script.handle_options(*opts) do |opt, value|
|
45
|
-
case opt
|
46
|
-
when '--help'
|
47
|
-
puts Tap::Script.usage(__FILE__, "Usage", "Description", "Information", :keep_headers => false)
|
48
|
-
puts
|
49
|
-
puts Tap::Script.usage_options(opts)
|
50
|
-
exit
|
51
|
-
|
52
|
-
when '--debug'
|
53
|
-
app.options.debug = true
|
54
|
-
|
55
|
-
when '--headers'
|
56
|
-
value = value[1..-2] if value[0] == 34 && value[-1] == 34
|
57
|
-
config[:headers] = value.split(/,/).collect {|header| header.strip}
|
58
|
-
else
|
59
|
-
opt =~ /--(.*)/
|
60
|
-
config[$1.to_sym] = value
|
61
|
-
|
62
|
-
end
|
63
|
-
end
|
64
|
-
|
65
|
-
#
|
66
|
-
# add your script code here
|
67
|
-
#
|
68
|
-
|
69
|
-
require 'mascot/formats/mgf'
|
70
|
-
|
71
|
-
reformat = Tap::FileTask.new("", config) do |task, input|
|
72
|
-
target = task.filepath(task.config[:target_dir], File.basename(input))
|
73
|
-
task.prepare(target)
|
74
|
-
|
75
|
-
task.log_basename :reformatting, input
|
76
|
-
Mascot::Formats::Mgf::Archive.open(input) do |archive|
|
77
|
-
archive.reindex if archive.length == 0
|
78
|
-
|
79
|
-
File.open(target, "wb") do |output|
|
80
|
-
archive.each do |mgf|
|
81
|
-
mgf.puts(output, task.config)
|
82
|
-
output.puts
|
83
|
-
end
|
84
|
-
end
|
85
|
-
end
|
86
|
-
end
|
87
|
-
|
88
|
-
args = ARGV.dup
|
89
|
-
ARGV.clear
|
90
|
-
app.run(reformat, *args)
|