ms-sequest 0.0.11 → 0.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.autotest +14 -0
- data/.gitignore +8 -0
- data/.gitmodules +9 -0
- data/History +8 -0
- data/{MIT-LICENSE → LICENSE} +1 -0
- data/README.rdoc +77 -0
- data/Rakefile +110 -0
- data/VERSION +1 -0
- data/lib/ms/sequest.rb +1 -1
- data/lib/ms/sequest/bioworks.rb +498 -0
- data/lib/ms/sequest/pepxml.rb +1458 -0
- data/lib/ms/sequest/srf.rb +4 -3
- data/lib/ms/sequest/srf/search.rb +1 -1
- data/lib/ms/sequest/srf/search/tap.rb +1 -1
- data/script/fasta_ipi_to_ncbi-ish.rb +29 -0
- data/spec/ms/sequest/bioworks_spec.rb +153 -0
- data/spec/ms/sequest/params_spec.rb +131 -0
- data/spec/ms/sequest/pepxml_spec.rb +376 -0
- data/spec/ms/sequest/sqt_spec.rb +78 -0
- data/spec/ms/sequest/sqt_spec_helper.rb +34 -0
- data/spec/ms/sequest/srf/search_spec.rb +53 -0
- data/spec/ms/sequest/srf/search_spec_helper.rb +341 -0
- data/spec/ms/sequest/srf/sqt_spec.rb +142 -0
- data/spec/ms/sequest/srf_spec.rb +182 -0
- data/spec/ms/sequest/srf_spec_helper.rb +172 -0
- data/spec/spec_helper.rb +51 -0
- data/spec/testfiles/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
- data/spec/testfiles/bioworks31.params +77 -0
- data/spec/testfiles/bioworks32.params +62 -0
- data/spec/testfiles/bioworks33.params +63 -0
- data/spec/testfiles/corrupted_900.srf +0 -0
- data/spec/testfiles/small.sqt +87 -0
- data/spec/testfiles/small2.sqt +176 -0
- data/tap.yml +0 -0
- metadata +74 -21
- data/README +0 -23
data/lib/ms/sequest/srf.rb
CHANGED
@@ -17,6 +17,7 @@ require 'ms/sequest/params'
|
|
17
17
|
module Ms ; end
|
18
18
|
module Ms::Sequest ; end
|
19
19
|
|
20
|
+
|
20
21
|
class Ms::Sequest::Srf
|
21
22
|
|
22
23
|
class NoSequestParamsError < ArgumentError
|
@@ -350,8 +351,8 @@ class Ms::Sequest::Srf::Header
|
|
350
351
|
:modifications => 456,
|
351
352
|
}
|
352
353
|
|
353
|
-
# a Ms::Sequest::Srf::DTAGen object
|
354
354
|
attr_accessor :version
|
355
|
+
# a Ms::Sequest::Srf::DTAGen object
|
355
356
|
attr_accessor :dta_gen
|
356
357
|
attr_accessor :enzyme
|
357
358
|
attr_accessor :ion_series
|
@@ -732,8 +733,8 @@ end
|
|
732
733
|
class Ms::Sequest::SrfGroup
|
733
734
|
include Ms::Id::SearchGroup
|
734
735
|
|
735
|
-
#
|
736
|
-
#
|
736
|
+
# inherits an array of Ms::Sequest::Srf::Out::Pep objects
|
737
|
+
# inherits an array of Ms::Sequest::Srf::Out::Prot objects
|
737
738
|
|
738
739
|
# see Ms::Id::Search for acceptable arguments
|
739
740
|
# (filename, filenames, array of objects)
|
@@ -44,7 +44,7 @@ module Ms
|
|
44
44
|
# :zip requires gem rubyzip to be installed and is *very* bloated
|
45
45
|
# as it writes out all the files first!
|
46
46
|
# :tgz requires gem archive-tar-minitar to be installed
|
47
|
-
def
|
47
|
+
def to_dta(out_folder=nil, compress=nil)
|
48
48
|
outdir =
|
49
49
|
if out_folder ; out_folder
|
50
50
|
else base_name
|
@@ -0,0 +1,29 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
if ARGV.size == 0
|
4
|
+
puts "usage: #{File.basename(__FILE__)} <file>.fasta ..."
|
5
|
+
puts "outputs: <file>_NCBI.fasta ..."
|
6
|
+
puts ""
|
7
|
+
puts "(Bioworks 3.3.1 [maybe others] does not seem to read an IPI"
|
8
|
+
puts "formatted fasta database header lines. This will change an"
|
9
|
+
puts "IPI format to an NCBI style format that Bioworks can read."
|
10
|
+
exit
|
11
|
+
end
|
12
|
+
|
13
|
+
ARGV.each do |file|
|
14
|
+
base = file.chomp(File.extname(file))
|
15
|
+
outfile = base + '_NCBI' + ".fasta"
|
16
|
+
File.open(outfile, 'w') do |out|
|
17
|
+
IO.foreach(file) do |line|
|
18
|
+
if line =~ /^>/
|
19
|
+
(codes, *description) = line[1..-1].split(" ")
|
20
|
+
description = description.join(" ")
|
21
|
+
code_section = codes.split('|').map {|code| (key, val) = code.split(':') ; "#{key}|#{val}|" }.join
|
22
|
+
out.puts ">#{code_section} #{description}"
|
23
|
+
else
|
24
|
+
out.print line
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
@@ -0,0 +1,153 @@
|
|
1
|
+
|
2
|
+
# TODO work on this guy!
|
3
|
+
=begin
|
4
|
+
|
5
|
+
require File.expand_path( File.dirname(__FILE__) + '/../tap_spec_helper' )
|
6
|
+
|
7
|
+
require 'spec_id'
|
8
|
+
require 'spec_id/bioworks'
|
9
|
+
#require 'benchmark'
|
10
|
+
|
11
|
+
describe Bioworks, 'set from an xml file' do
|
12
|
+
# NEED TO DEBUG THIS PROB!
|
13
|
+
it 'can set one with labeled proteins' do
|
14
|
+
file = Tfiles + "/bioworks_with_INV_small.xml"
|
15
|
+
obj = Bioworks.new(file)
|
16
|
+
obj.prots.size.should == 19
|
17
|
+
file = Tfiles + '/bioworks_small.xml'
|
18
|
+
obj = Bioworks.new(file)
|
19
|
+
obj.prots.size.should == 106
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'can parse an xml file NOT derived from multi-concensus' do
|
23
|
+
tf_bioworks_single_xml_small = Tfiles + '/bioworks_single_run_small.xml'
|
24
|
+
obj = Bioworks.new(tf_bioworks_single_xml_small)
|
25
|
+
gfn = '5prot_mix_michrom_20fmol_200pmol'
|
26
|
+
origfilename = '5prot_mix_michrom_20fmol_200pmol.RAW'
|
27
|
+
origfilepath = 'C:\Xcalibur\sequest'
|
28
|
+
obj.global_filename.should == gfn
|
29
|
+
obj.origfilename.should == origfilename
|
30
|
+
obj.origfilepath.should == origfilepath
|
31
|
+
obj.prots.size.should == 7
|
32
|
+
obj.prots.first.peps.first.base_name.should == gfn
|
33
|
+
obj.prots.first.peps.first.file.should == "152"
|
34
|
+
obj.prots.first.peps.first.charge.should == 2
|
35
|
+
# @TODO: add more tests here
|
36
|
+
end
|
37
|
+
|
38
|
+
it 'can output in excel format (**semi-verified right now)' do
|
39
|
+
tf_bioworks_to_excel = Tfiles + '/tf_bioworks2excel.bioXML'
|
40
|
+
tf_bioworks_to_excel_actual = Tfiles + '/tf_bioworks2excel.txt.actual'
|
41
|
+
tmpfile = Tfiles + "/tf_bioworks_to_excel.tmp"
|
42
|
+
bio = Bioworks.new(tf_bioworks_to_excel)
|
43
|
+
bio.to_excel(tmpfile)
|
44
|
+
tmpfile.exist_as_a_file?.should be_true
|
45
|
+
#File.should exist_as_a_file(tmpfile)
|
46
|
+
exp = _arr_of_arrs(tf_bioworks_to_excel_actual)
|
47
|
+
act = _arr_of_arrs(tmpfile)
|
48
|
+
exp.each_index do |i|
|
49
|
+
break if i == 23 ## this is where the ordering becomes arbitrary between guys with the same scans, but different filenames
|
50
|
+
_assert_equal_pieces(exp[i], act[i], exp[i][0] =~ /\d/)
|
51
|
+
end
|
52
|
+
|
53
|
+
File.unlink tmpfile
|
54
|
+
end
|
55
|
+
|
56
|
+
# prot is boolean if this is a protein line!
|
57
|
+
def _assert_equal_pieces(exp, act, prot)
|
58
|
+
# equal as floats (by delta)
|
59
|
+
exp.each_index do |i|
|
60
|
+
if i == 5 # both prots and peps
|
61
|
+
act[i].to_f.should be_close(exp[i].to_f, 0.1)
|
62
|
+
elsif i == 3 && !prot
|
63
|
+
act[i].to_f.should be_close(exp[i].to_f, 0.01)
|
64
|
+
elsif i == 6 && !prot
|
65
|
+
act[i].to_f.should be_close(exp[i].to_f, 0.01)
|
66
|
+
elsif i == 9 && prot
|
67
|
+
## NEED TO GET THESE BACK (for consistency):
|
68
|
+
#act[i].split(" ")[0].should =~ exp[i].split(" ")[0]
|
69
|
+
else
|
70
|
+
## NEED TO GET THESE BACK (for consistency):
|
71
|
+
#act[i].should == exp[i]
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
# takes a bioworks excel (in txt format) and outputs an arr of arrs
|
77
|
+
def _arr_of_arrs(file)
|
78
|
+
IO.readlines(file).collect do |line|
|
79
|
+
line.chomp!
|
80
|
+
line.split("\t")
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
it 'can return unique peptides and proteins by sequence+charge (private)' do
|
85
|
+
cnt = 0
|
86
|
+
answer = [%w(2 PEPTIDE), %w(3 PEPTIDE), %w(3 PEPY), %w(2 PEPY)]
|
87
|
+
exp_peps = answer.collect! do |arr|
|
88
|
+
pep = Bioworks::Pep.new
|
89
|
+
pep.charge = arr[0]
|
90
|
+
pep.sequence = arr[1]
|
91
|
+
pep
|
92
|
+
end
|
93
|
+
exp_prots = [[0,2],[1,4,5],[3],[6]].collect do |arr|
|
94
|
+
arr.collect do |num|
|
95
|
+
prot = Bioworks::Prot.new
|
96
|
+
prot.reference = "#{num}"
|
97
|
+
prot
|
98
|
+
end
|
99
|
+
end
|
100
|
+
exp_peps = exp_peps.zip(exp_prots)
|
101
|
+
exp_peps.collect! do |both|
|
102
|
+
both[0].prots = [both[1]]
|
103
|
+
both[0]
|
104
|
+
end
|
105
|
+
|
106
|
+
peptides = [%w(2 PEPTIDE), %w(3 PEPTIDE), %w(2 PEPTIDE), %w(3 PEPY), %w(3 PEPTIDE), %w(3 PEPTIDE), %w(2 PEPY)].collect do |arr|
|
107
|
+
pep = Bioworks::Pep.new
|
108
|
+
pep.charge = arr[0]
|
109
|
+
pep.sequence = arr[1]
|
110
|
+
pep.prots = [Bioworks::Prot.new]
|
111
|
+
pep.prots.first.reference = "#{cnt}"
|
112
|
+
cnt += 1
|
113
|
+
pep
|
114
|
+
end
|
115
|
+
peptides, proteins = Bioworks.new._uniq_peps_by_sequence_charge(peptides)
|
116
|
+
proteins.size.should == peptides.size
|
117
|
+
exp_peps.each_with_index do |pep, i|
|
118
|
+
peptides[i].charge.should == pep.charge
|
119
|
+
peptides[i].sequence.should == pep.sequence
|
120
|
+
end
|
121
|
+
|
122
|
+
exp_prots.each_index do |i|
|
123
|
+
exp_prots[i].each_index do |j|
|
124
|
+
proteins[i][j].reference.should == exp_prots[i][j].reference
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
end
|
130
|
+
|
131
|
+
describe Bioworks::Pep do
|
132
|
+
it 'can be initialized from a hash' do
|
133
|
+
hash = {:sequence => 0, :mass => 1, :deltamass => 2, :charge => 3, :xcorr => 4, :deltacn => 5, :sp => 6, :rsp => 7, :ions => 8, :count => 9, :tic => 10, :prots => 11, :base_name => 12, :first_scan => 13, :last_scan => 14, :peptide_probability => 15, :file => 16, :_num_prots => 17, :_first_prot => 18}
|
134
|
+
pep = Bioworks::Pep.new(hash)
|
135
|
+
hash.each do |k,v|
|
136
|
+
pep.send(k).should == v
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
it 'correctly extracts file information' do
|
141
|
+
pep = Bioworks::Pep.new
|
142
|
+
testing = ['005a, 1131', '005b, 1131 - 1133', '1131', '1131 - 1133']
|
143
|
+
answers = [%w(005a 1131 1131), %w(005b 1131 1133), [nil, '1131', '1131'], [nil, '1131', '1133']]
|
144
|
+
testing.zip(answers) do |ar|
|
145
|
+
ans = pep.class.extract_file_info(ar[0])
|
146
|
+
ans.join(" ").should == ar[1].join(" ")
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
end
|
151
|
+
|
152
|
+
|
153
|
+
=end
|
@@ -0,0 +1,131 @@
|
|
1
|
+
require File.expand_path( File.dirname(__FILE__) + '/../../spec_helper' )
|
2
|
+
|
3
|
+
require 'ms/sequest/params'
|
4
|
+
|
5
|
+
# returns a hash of all params
|
6
|
+
def simple_parse(filename)
|
7
|
+
hash = {}
|
8
|
+
IO.read(filename).split(/\r?\n/).select {|v| v =~ /^[a-z]/}.each do |line|
|
9
|
+
if line =~ /([^\s]+)\s*=\s*([^;]+)\s*;?/
|
10
|
+
hash[$1.dup] = $2.rstrip
|
11
|
+
end
|
12
|
+
end
|
13
|
+
hash
|
14
|
+
end
|
15
|
+
|
16
|
+
shared 'sequest params' do
|
17
|
+
before do
|
18
|
+
@obj = Ms::Sequest::Params.new(@file)
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'has a method for every parameter in the file' do
|
22
|
+
hash = simple_parse(@file)
|
23
|
+
hash.each do |k,v|
|
24
|
+
@obj.send(k.to_sym).is v
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
it 'returns zero length string for params with no information' do
|
29
|
+
@obj.second_database_name.is ""
|
30
|
+
@obj.sequence_header_filter.is ""
|
31
|
+
end
|
32
|
+
|
33
|
+
it 'returns nil for params that do not exist and have no translation' do
|
34
|
+
@obj.google_plex.is nil
|
35
|
+
end
|
36
|
+
|
37
|
+
it 'provides consistent API between versions for important info' do
|
38
|
+
message = capture_stderr do
|
39
|
+
@api_hash.each do |k,v|
|
40
|
+
@obj.send(k).is v
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
it 'provides some backwards compatibility' do
|
46
|
+
@backwards_hash.each do |k,v|
|
47
|
+
@obj.send(k).is v
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
52
|
+
|
53
|
+
describe 'sequest params v 3.1' do
|
54
|
+
|
55
|
+
@file = TESTFILES + '/bioworks31.params'
|
56
|
+
@api_hash = {
|
57
|
+
:version => '3.1',
|
58
|
+
:enzyme => 'Trypsin',
|
59
|
+
:database => "C:\\Xcalibur\\database\\ecoli_K12.fasta",
|
60
|
+
:enzyme_specificity => [1, 'KR', ''],
|
61
|
+
:precursor_mass_type => "average",
|
62
|
+
:fragment_mass_type => "average",
|
63
|
+
:min_number_termini => '1',
|
64
|
+
}
|
65
|
+
|
66
|
+
@backwards_hash = {
|
67
|
+
:max_num_internal_cleavages => '2',
|
68
|
+
:fragment_ion_tol => '0.0000',
|
69
|
+
}
|
70
|
+
|
71
|
+
behaves_like 'sequest params'
|
72
|
+
end
|
73
|
+
|
74
|
+
describe 'sequest params v 3.2' do
|
75
|
+
@file = TESTFILES + '/bioworks32.params'
|
76
|
+
@api_hash = {
|
77
|
+
:version => '3.2',
|
78
|
+
:enzyme => 'Trypsin',
|
79
|
+
:database => "C:\\Xcalibur\\database\\ecoli_K12_ncbi_20060321.fasta",
|
80
|
+
:enzyme_specificity => [1, 'KR', 'P'],
|
81
|
+
:precursor_mass_type => "average",
|
82
|
+
:fragment_mass_type => "average",
|
83
|
+
:min_number_termini => '2',
|
84
|
+
}
|
85
|
+
|
86
|
+
@backwards_hash = {
|
87
|
+
:max_num_internal_cleavages => '2',
|
88
|
+
:fragment_ion_tol => '1.0000',
|
89
|
+
}
|
90
|
+
|
91
|
+
behaves_like 'sequest params'
|
92
|
+
end
|
93
|
+
|
94
|
+
describe 'sequest params v 3.3' do
|
95
|
+
@file = TESTFILES + '/bioworks33.params'
|
96
|
+
@api_hash = {
|
97
|
+
:version => '3.3',
|
98
|
+
:enzyme => 'Trypsin',
|
99
|
+
:database => "C:\\Xcalibur\\database\\yeast.fasta",
|
100
|
+
:enzyme_specificity => [1, 'KR', ''],
|
101
|
+
:precursor_mass_type => "monoisotopic",
|
102
|
+
:fragment_mass_type => "monoisotopic",
|
103
|
+
:min_number_termini => '2',
|
104
|
+
}
|
105
|
+
|
106
|
+
@backwards_hash = {
|
107
|
+
:max_num_internal_cleavages => '2',
|
108
|
+
:fragment_ion_tol => '1.0000',
|
109
|
+
}
|
110
|
+
behaves_like 'sequest params'
|
111
|
+
end
|
112
|
+
|
113
|
+
describe 'sequest params v 3.2 from srf' do
|
114
|
+
@file = TESTFILES + '/7MIX_STD_110802_1.sequest_params_fragment.srf'
|
115
|
+
@api_hash = {
|
116
|
+
:version => '3.2',
|
117
|
+
:enzyme => 'Trypsin',
|
118
|
+
:database => "C:\\Xcalibur\\database\\mixed_db_human_ecoli_7prot_unique.fasta",
|
119
|
+
:enzyme_specificity => [1, 'KR', 'P'],
|
120
|
+
:precursor_mass_type => "average",
|
121
|
+
:fragment_mass_type => "average",
|
122
|
+
:min_number_termini => '2',
|
123
|
+
}
|
124
|
+
|
125
|
+
@backwards_hash = {
|
126
|
+
:max_num_internal_cleavages => '2',
|
127
|
+
:fragment_ion_tol => '1.0000',
|
128
|
+
}
|
129
|
+
behaves_like 'sequest params'
|
130
|
+
end
|
131
|
+
|
@@ -0,0 +1,376 @@
|
|
1
|
+
|
2
|
+
=begin
|
3
|
+
require File.expand_path( File.dirname(__FILE__) + '/../../spec_helper' )
|
4
|
+
|
5
|
+
require 'spec_id'
|
6
|
+
require 'spec_id/sequest/pepxml'
|
7
|
+
#require 'ms/mzxml'
|
8
|
+
|
9
|
+
|
10
|
+
NODELETE = false
|
11
|
+
|
12
|
+
describe Sequest::PepXML, " created from small bioworks.xml" do
|
13
|
+
|
14
|
+
spec_large do
|
15
|
+
before(:all) do
|
16
|
+
tf_mzxml_path = Tfiles_l + "/yeast_gly_mzXML"
|
17
|
+
|
18
|
+
tf_params = Tfiles + "/bioworks32.params"
|
19
|
+
tf_bioworks_xml = Tfiles + "/bioworks_small.xml"
|
20
|
+
out_path = Tfiles
|
21
|
+
@pepxml_objs = Sequest::PepXML.set_from_bioworks(tf_bioworks_xml, :params => tf_params, :ms_data => tf_mzxml_path, :out_path => out_path)
|
22
|
+
end
|
23
|
+
|
24
|
+
it 'gets some spectrum queries' do
|
25
|
+
@pepxml_objs.each do |obj|
|
26
|
+
(obj.spectrum_queries.size > 2).should be_true
|
27
|
+
(obj.spectrum_queries.first.search_results.first.search_hits.size > 0).should be_true
|
28
|
+
end
|
29
|
+
#@pepxml_objs.each do |pep| puts pep.to_pepxml end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
|
35
|
+
|
36
|
+
describe Sequest::PepXML, " created from large bioworks.xml" do
|
37
|
+
# assert_equal_by_pairs (really any old array)
|
38
|
+
def assert_equal_pairs(obj, arrs)
|
39
|
+
arrs.each do |arr|
|
40
|
+
#if obj.send(arr[1]) != arr[0]
|
41
|
+
# puts "HELLO"
|
42
|
+
# puts "OBJ answer"
|
43
|
+
# p obj.send(arr[1])
|
44
|
+
# puts "ar0"
|
45
|
+
# p arr[0]
|
46
|
+
# puts "ar1"
|
47
|
+
# p arr[1]
|
48
|
+
#end
|
49
|
+
if arr[0].is_a? Float
|
50
|
+
obj.send(arr[1]).should be_close(arr[0], 0.0000000001)
|
51
|
+
else
|
52
|
+
obj.send(arr[1]).should == arr[0]
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
#swap the first to guys first
|
58
|
+
def assert_equal_pairs_swapped(obj, arrs)
|
59
|
+
arrs.each do |arr|
|
60
|
+
arr[0], arr[1] = arr[1], arr[0]
|
61
|
+
end
|
62
|
+
assert_equal_pairs(obj, arrs)
|
63
|
+
end
|
64
|
+
|
65
|
+
spec_large do
|
66
|
+
before(:all) do
|
67
|
+
st = Time.new
|
68
|
+
params = Tfiles + "/opd1/sequest.3.2.params"
|
69
|
+
bioworks_xml = Tfiles_l + "/opd1/bioworks.000.oldparams.xml"
|
70
|
+
mzxml_path = Tfiles_l + "/opd1"
|
71
|
+
out_path = Tfiles
|
72
|
+
@pepxml_version = 18
|
73
|
+
@pepxml_objs = Sequest::PepXML.set_from_bioworks_xml(bioworks_xml, params, {:ms_data => mzxml_path, :out_path => out_path, :pepxml_version => @pepxml_version})
|
74
|
+
puts "- takes #{Time.new - st} secs"
|
75
|
+
end
|
76
|
+
|
77
|
+
it 'extracts MSMSPipelineAnalysis' do
|
78
|
+
######## HMMMMM...
|
79
|
+
Sequest::PepXML.pepxml_version.should == @pepxml_version
|
80
|
+
|
81
|
+
# MSMSPipelineAnalysis
|
82
|
+
po = @pepxml_objs.first
|
83
|
+
msms_pipeline = po.msms_pipeline_analysis
|
84
|
+
msms_pipeline.xmlns.should == 'http://regis-web.systemsbiology.net/pepXML'
|
85
|
+
msms_pipeline.xmlns_xsi.should == 'http://www.w3.org/2001/XMLSchema-instance'
|
86
|
+
msms_pipeline.xsi_schema_location.should == 'http://regis-web.systemsbiology.net/pepXML /tools/bin/TPP/tpp/schema/pepXML_v18.xsd'
|
87
|
+
msms_pipeline.summary_xml.should == '000.xml'
|
88
|
+
end
|
89
|
+
|
90
|
+
it 'extracts MSmSRunSummary' do
|
91
|
+
# MSMSRunSummary
|
92
|
+
rs = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary
|
93
|
+
rs.base_name.should =~ /\/000/
|
94
|
+
assert_equal_pairs(rs, [ ['ThermoFinnigan', :ms_manufacturer], ['LCQ Deca XP Plus', :ms_model], ['ESI', :ms_ionization], ['Ion Trap', :ms_mass_analyzer], ['UNKNOWN', :ms_detector], ['raw', :raw_data_type], ['.mzXML', :raw_data], ])
|
95
|
+
end
|
96
|
+
|
97
|
+
it 'extracts SampleEnzyme' do
|
98
|
+
# SampleEnzyme
|
99
|
+
se = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary.sample_enzyme
|
100
|
+
assert_equal_pairs(se, [ ['Trypsin', :name], ['KR', :cut], [nil, :no_cut], ['C', :sense], ])
|
101
|
+
end
|
102
|
+
|
103
|
+
it 'extracts SearchSummary' do
|
104
|
+
# SearchSummary
|
105
|
+
ss = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary.search_summary
|
106
|
+
ss.is_a?(Sequest::PepXML::SearchSummary).should be_true
|
107
|
+
ss.base_name.should =~ /\/000/
|
108
|
+
ss.peptide_mass_tol.should =~ /1\.500/
|
109
|
+
assert_equal_pairs_swapped(ss, [ # normal attributes
|
110
|
+
[:search_engine, "SEQUEST"], [:precursor_mass_type, "average"], [:fragment_mass_type, "average"], [:out_data_type, "out"], [:out_data, ".tgz"], [:search_id, "1"],
|
111
|
+
|
112
|
+
# enzymatic_search_constraint
|
113
|
+
[:enzyme, 'Trypsin'], [:max_num_internal_cleavages, '2'], [:min_number_termini, '2'],
|
114
|
+
|
115
|
+
# parameters
|
116
|
+
[:fragment_ion_tol, "1.0000"], [:ion_series, "0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0"], [:max_num_differential_AA_per_mod, "3"], [:nucleotide_reading_frame, "0"], [:num_output_lines, "10"], [:remove_precursor_peak, "0"], [:ion_cutoff_percentage, "0.0000"], [:match_peak_count, "0"], [:match_peak_allowed_error, "1"], [:match_peak_tolerance, "1.0000"], [:protein_mass_filter, "0 0"],
|
117
|
+
])
|
118
|
+
|
119
|
+
end
|
120
|
+
it 'extracts SearchDatabase' do
|
121
|
+
# SearchDatabase
|
122
|
+
sd = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary.search_summary.search_database
|
123
|
+
sd.is_a?(Sequest::PepXML::SearchDatabase).should be_true
|
124
|
+
assert_equal_pairs_swapped(sd, [ [:local_path, "C:\\Xcalibur\\database\\ecoli_K12.fasta"], [:seq_type, 'AA'], ])
|
125
|
+
end
|
126
|
+
|
127
|
+
it 'returns SpectrumQueries' do
|
128
|
+
# SpectrumQueries
|
129
|
+
sq = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary.spectrum_queries
|
130
|
+
spec = sq.first
|
131
|
+
assert_equal_pairs_swapped(spec, [
|
132
|
+
[:spectrum, "000.100.100.1"], [:start_scan, "100"], [:end_scan, "100"],
|
133
|
+
#[:precursor_neutral_mass, "1074.5920"], # out2summary
|
134
|
+
[:precursor_neutral_mass, 1074.666926], # mine
|
135
|
+
[:assumed_charge, 1], [:index, "1"],
|
136
|
+
])
|
137
|
+
sh = spec.search_results.first.search_hits.first
|
138
|
+
assert_equal_pairs_swapped(sh, [
|
139
|
+
# normal attributes
|
140
|
+
[:hit_rank, 1],
|
141
|
+
[:peptide, "SIYFRNFK"],
|
142
|
+
[:peptide_prev_aa, "R"],
|
143
|
+
[:peptide_next_aa, "G"],
|
144
|
+
[:protein, "gi|16130084|ref|NP_416651.1|"],
|
145
|
+
[:num_tot_proteins, 1],
|
146
|
+
[:num_matched_ions, 4],
|
147
|
+
[:tot_num_ions, 14],
|
148
|
+
#[:calc_neutral_pep_mass, "1074.1920"], # out2summary
|
149
|
+
[:calc_neutral_pep_mass, 1074.23261], # mine
|
150
|
+
#[:massdiff, "+0.400000"], # out2summary
|
151
|
+
[:massdiff, 0.434316000000081], # mine
|
152
|
+
[:num_tol_term, 2], [:num_missed_cleavages, 1], [:is_rejected, 0],
|
153
|
+
|
154
|
+
# search_score
|
155
|
+
[:xcorr, 0.4], [:deltacn, 0.023], [:deltacnstar, "0"], [:spscore, 78.8], [:sprank, 1],
|
156
|
+
])
|
157
|
+
|
158
|
+
spec = sq[1]
|
159
|
+
assert_equal_pairs_swapped(spec, [
|
160
|
+
[:spectrum, "000.1000.1000.1"], [:start_scan, "1000"], [:end_scan, "1000"], #[:precursor_neutral_mass, "663.1920"], # out2summary
|
161
|
+
[:precursor_neutral_mass, 663.206111], # mine
|
162
|
+
[:assumed_charge, 1], [:index, "2"],
|
163
|
+
])
|
164
|
+
|
165
|
+
sh = spec.search_results.first.search_hits.first
|
166
|
+
assert_equal_pairs_swapped(sh, [
|
167
|
+
# normal attributes
|
168
|
+
[:hit_rank, 1], [:peptide, "ALADFK"], [:peptide_prev_aa, "R"], [:peptide_next_aa, "S"], [:protein, "gi|16128765|ref|NP_415318.1|"], [:num_tot_proteins, 1], [:num_matched_ions, 5], [:tot_num_ions, 10],
|
169
|
+
[:num_tol_term, 2], [:num_missed_cleavages, 0], [:is_rejected, 0],
|
170
|
+
#[:massdiff, "-0.600000"], # out2summary
|
171
|
+
[:massdiff, -0.556499000000031], # mine
|
172
|
+
#[:calc_neutral_pep_mass, 663.7920], # out2summary
|
173
|
+
[:calc_neutral_pep_mass, 663.76261], # mine
|
174
|
+
|
175
|
+
# search_score
|
176
|
+
[:xcorr, 0.965], [:deltacn, 0.132], [:deltacnstar, "0"], [:spscore, 81.1], [:sprank, 1],
|
177
|
+
])
|
178
|
+
|
179
|
+
spec = sq[9]
|
180
|
+
assert_equal_pairs_swapped(spec, [
|
181
|
+
[:spectrum, "000.1008.1008.2"], [:start_scan, "1008"], [:end_scan, "1008"], [:assumed_charge, 2],
|
182
|
+
#[:precursor_neutral_mass, "691.0920"], # out2summary
|
183
|
+
[:precursor_neutral_mass, 691.150992], # mine
|
184
|
+
])
|
185
|
+
|
186
|
+
sh = spec.search_results.first.search_hits.first
|
187
|
+
assert_equal_pairs_swapped(sh, [
|
188
|
+
# normal attributes
|
189
|
+
[:hit_rank, 1], [:peptide, "RLFTR"], [:peptide_prev_aa, "R"], [:peptide_next_aa, "A"], [:protein, "gi|16130457|ref|NP_417027.1|"], [:num_tot_proteins, 1], [:num_matched_ions, 5], [:tot_num_ions, 8], [:num_tol_term, 2],
|
190
|
+
|
191
|
+
#[:num_missed_cleavages, "0"], # out2summary misses this!
|
192
|
+
[:num_missed_cleavages, 1],
|
193
|
+
[:is_rejected, 0],
|
194
|
+
#[:calc_neutral_pep_mass, "691.7920"], # out2summary
|
195
|
+
[:calc_neutral_pep_mass, 691.82261], # mine
|
196
|
+
#[:massdiff, "-0.700000"], # out2summary
|
197
|
+
[:massdiff, -0.67161800000008], # mine
|
198
|
+
|
199
|
+
# search_score
|
200
|
+
[:xcorr, 0.903], [:deltacn, 0.333], [:deltacnstar, "0"], [:spscore, 172.8], [:sprank, 1],
|
201
|
+
])
|
202
|
+
end
|
203
|
+
|
204
|
+
it 'can generate correct pepxml file' do
|
205
|
+
|
206
|
+
## IF OUR OBJECT IS CORRECT, THEN WE GET THE OUTPUT:
|
207
|
+
string = @pepxml_objs.first.to_pepxml
|
208
|
+
ans_lines = IO.read(Tfiles + "/opd1/000.my_answer.100lines.xml").split("\n")
|
209
|
+
base_name_re = /base_name=".*?files\//o
|
210
|
+
date_re = /date=".*?"/
|
211
|
+
string.split("\n").each_with_index do |line,i|
|
212
|
+
if i > 99 ; break end
|
213
|
+
ans, exp =
|
214
|
+
if i == 1
|
215
|
+
[line.sub(date_re,''), ans_lines[i].sub(date_re,'')]
|
216
|
+
elsif i == 2
|
217
|
+
[line.sub(base_name_re,''), ans_lines[i].sub(base_name_re, '').sub(/^\s+/, "\t")]
|
218
|
+
elsif i == 6
|
219
|
+
[line.sub(base_name_re,''), ans_lines[i].sub(base_name_re, '').sub(/^\s+/, "\t\t")]
|
220
|
+
else
|
221
|
+
[line, ans_lines[i]]
|
222
|
+
end
|
223
|
+
|
224
|
+
#ans.split('').zip(exp.split('')) do |l,a|
|
225
|
+
# if l != a
|
226
|
+
# puts line
|
227
|
+
# puts ans_lines[i]
|
228
|
+
# puts l
|
229
|
+
# puts a
|
230
|
+
# end
|
231
|
+
#end
|
232
|
+
if ans != exp
|
233
|
+
puts ans
|
234
|
+
puts exp
|
235
|
+
end
|
236
|
+
ans.should == exp
|
237
|
+
#line.sub(base_name_re,'').should == ans_lines[i].sub(base_name_re,'')
|
238
|
+
end
|
239
|
+
end
|
240
|
+
end
|
241
|
+
end
|
242
|
+
|
243
|
+
|
244
|
+
|
245
|
+
describe Sequest::PepXML::Modifications do
|
246
|
+
before(:each) do
|
247
|
+
tf_params = Tfiles + "/bioworks32.params"
|
248
|
+
@params = Sequest::Params.new(tf_params)
|
249
|
+
# The params object here is completely unnecessary for this test, except
|
250
|
+
# that it sets up the mass table
|
251
|
+
@obj = Sequest::PepXML::Modifications.new(@params, "(M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000) ")
|
252
|
+
end
|
253
|
+
it 'creates a mod_symbols_hash' do
|
254
|
+
answ = {[:C, 12.0]=>"^", [:S, 80.0]=>"@", [:M, 29.0]=>"#", [:M, 15.9]=>"*", [:ct, 12.33]=>"[", [:nt, 14.2]=>"]"}
|
255
|
+
@obj.mod_symbols_hash.should == answ
|
256
|
+
## need more here
|
257
|
+
end
|
258
|
+
|
259
|
+
it 'creates a ModificationInfo object given a special peptide sequence' do
|
260
|
+
mod_string = "(M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000) "
|
261
|
+
@params.diff_search_options = "15.90000 M 29.00000 M 80.00000 S 12.00000 C"
|
262
|
+
@params.term_diff_search_options = "14.20000 12.33000"
|
263
|
+
mod = Sequest::PepXML::Modifications.new(@params, mod_string)
|
264
|
+
## no mods
|
265
|
+
peptide = "PEPTIDE"
|
266
|
+
mod.modification_info(peptide).should be_nil
|
267
|
+
peptide = "]M*EC^S@IDM#M*EMSCM["
|
268
|
+
modinfo = mod.modification_info(peptide)
|
269
|
+
modinfo.modified_peptide.should == peptide
|
270
|
+
modinfo.mod_nterm_mass.should be_close(146.40054, 0.000001)
|
271
|
+
modinfo.mod_cterm_mass.should be_close(160.52994, 0.000001)
|
272
|
+
end
|
273
|
+
|
274
|
+
end
|
275
|
+
|
276
|
+
describe Sequest::PepXML::SearchHit::ModificationInfo do
|
277
|
+
|
278
|
+
before(:each) do
|
279
|
+
modaaobjs = [[3, 150.3], [6, 345.2]].map do |ar|
|
280
|
+
Sequest::PepXML::SearchHit::ModificationInfo::ModAminoacidMass.new(ar)
|
281
|
+
end
|
282
|
+
hash = {
|
283
|
+
:mod_nterm_mass => 520.2,
|
284
|
+
:modified_peptide => "MOD*IFI^E&D",
|
285
|
+
:mod_aminoacid_masses => modaaobjs,
|
286
|
+
}
|
287
|
+
#answ = "<modification_info mod_nterm_mass=\"520.2\" modified_peptide=\"MOD*IFI^E&D\">\n\t<mod_aminoacid_mass position=\"3\" mass=\"150.3\"/>\n\t<mod_aminoacid_mass position=\"6\" mass=\"345.2\"/>\n</modification_info>\n"
|
288
|
+
@obj = Sequest::PepXML::SearchHit::ModificationInfo.new(hash)
|
289
|
+
end
|
290
|
+
|
291
|
+
def _re(st)
|
292
|
+
/#{Regexp.escape(st)}/
|
293
|
+
end
|
294
|
+
|
295
|
+
it 'can produce pepxml' do
|
296
|
+
answ = @obj.to_pepxml
|
297
|
+
answ.should =~ _re('<modification_info')
|
298
|
+
answ.should =~ _re(" mod_nterm_mass=\"520.2\"")
|
299
|
+
answ.should =~ _re(" modified_peptide=\"MOD*IFI^E&D\"")
|
300
|
+
answ.should =~ _re("<mod_aminoacid_mass")
|
301
|
+
answ.should =~ _re(" position=\"3\"")
|
302
|
+
answ.should =~ _re(" mass=\"150.3\"")
|
303
|
+
answ.should =~ _re(" position=\"6\"")
|
304
|
+
answ.should =~ _re(" mass=\"345.2\"")
|
305
|
+
answ.should =~ _re("</modification_info>")
|
306
|
+
end
|
307
|
+
end
|
308
|
+
|
309
|
+
describe 'bioworks file with modifications transformed into pepxml' do
|
310
|
+
|
311
|
+
spec_large do
|
312
|
+
before(:all) do
|
313
|
+
modfiles_sequest_dir = Tfiles_l + '/opd1_2runs_2mods/sequest33/'
|
314
|
+
modfiles_data_dir = Tfiles_l + '/opd1_2runs_2mods/data/'
|
315
|
+
@srgfile = modfiles_sequest_dir + 'tmp.srg'
|
316
|
+
@out_path = modfiles_sequest_dir + 'pepxml'
|
317
|
+
modfiles = %w(020 040).map do |file|
|
318
|
+
modfiles_sequest_dir + file + ".srf"
|
319
|
+
end
|
320
|
+
objs = Sequest::PepXML.set_from_bioworks( SRFGroup.new(modfiles).to_srg(@srgfile), {:ms_data => modfiles_data_dir, :out_path => @out_path, :print => true, :backup_db_path => '/project/marcotte/marcotte/ms/database'} )
|
321
|
+
@out_files = %w(020 040).map do |file|
|
322
|
+
@out_path + '/' + file + '.xml'
|
323
|
+
end
|
324
|
+
end
|
325
|
+
|
326
|
+
after(:all) do
|
327
|
+
File.unlink(@srgfile) unless NODELETE
|
328
|
+
FileUtils.rm_r(@out_path)
|
329
|
+
#@out_files.each do |fn|
|
330
|
+
# File.unlink(fn) unless NODELETE
|
331
|
+
#end
|
332
|
+
end
|
333
|
+
|
334
|
+
# splits string on ' 'and matches the line found by find_line_regexp in
|
335
|
+
# lines
|
336
|
+
def match_modline_pieces(lines, find_line_regexp, string)
|
337
|
+
pieces = string.split(' ').map {|v| /#{Regexp.escape(v)}/ }
|
338
|
+
lines.each do |line|
|
339
|
+
if line =~ find_line_regexp
|
340
|
+
pieces.each do |piece|
|
341
|
+
line.should =~ piece
|
342
|
+
end
|
343
|
+
end
|
344
|
+
end
|
345
|
+
end
|
346
|
+
|
347
|
+
it 'gets modifications right in real run' do
|
348
|
+
@out_files.each do |fn|
|
349
|
+
fn.exist_as_a_file?.should be_true
|
350
|
+
beginning = IO.read(fn)
|
351
|
+
lines = beginning.split("\n")
|
352
|
+
[
|
353
|
+
[/aminoacid="M"/, '<aminoacid_modification symbol="*" massdiff="+15.9994" aminoacid="M" variable="Y" binary="N" mass="147.192"'],
|
354
|
+
|
355
|
+
[/aminoacid="S"/, '<aminoacid_modification symbol="#" massdiff="+79.9799" aminoacid="S" variable="Y" binary="N" mass="167.0581"'],
|
356
|
+
[/aminoacid="T"/, '<aminoacid_modification symbol="#" massdiff="+79.9799" aminoacid="T" variable="Y" binary="N" mass="181.085"'],
|
357
|
+
[/aminoacid="Y"/, '<aminoacid_modification symbol="#" massdiff="+79.9799" aminoacid="Y" variable="Y" binary="N" mass="243.1559"'],
|
358
|
+
[/parameter name="diff_search_options"/, '<parameter name="diff_search_options" value="15.999400 M 79.979900 STY 0.000000 M 0.000000 X 0.000000 T 0.000000 Y"/>'],
|
359
|
+
].each do |a,b|
|
360
|
+
match_modline_pieces(lines, a, b)
|
361
|
+
end
|
362
|
+
[
|
363
|
+
'<modification_info modified_peptide="Y#RLGGS#T#K">',
|
364
|
+
'<mod_aminoacid_mass position="1" mass="243.1559"/>',
|
365
|
+
'<mod_aminoacid_mass position="7" mass="167.0581"/>',
|
366
|
+
'</modification_info>',
|
367
|
+
'<mod_aminoacid_mass position="9" mass="181.085"/>'
|
368
|
+
].each do |line|
|
369
|
+
beginning.should =~ /#{Regexp.escape(line)}/ # "a modification info for a peptide")
|
370
|
+
end
|
371
|
+
end
|
372
|
+
end
|
373
|
+
end
|
374
|
+
end
|
375
|
+
|
376
|
+
=end
|