mspire 0.5.0 → 0.6.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +24 -0
- data/Rakefile +51 -0
- data/VERSION +1 -0
- data/lib/cv/description.rb +18 -0
- data/lib/cv/param.rb +33 -0
- data/lib/cv.rb +3 -0
- data/lib/io/bookmark.rb +13 -0
- data/lib/merge.rb +7 -0
- data/lib/ms/cvlist.rb +76 -0
- data/lib/ms/digester.rb +245 -0
- data/lib/ms/fasta.rb +86 -0
- data/lib/ms/ident/peptide/db.rb +243 -0
- data/lib/ms/ident/peptide.rb +72 -0
- data/lib/ms/ident/peptide_hit/qvalue.rb +56 -0
- data/lib/ms/ident/peptide_hit.rb +26 -0
- data/lib/ms/ident/pepxml/modifications.rb +83 -0
- data/lib/ms/ident/pepxml/msms_pipeline_analysis.rb +70 -0
- data/lib/ms/ident/pepxml/msms_run_summary.rb +82 -0
- data/lib/ms/ident/pepxml/parameters.rb +14 -0
- data/lib/ms/ident/pepxml/sample_enzyme.rb +165 -0
- data/lib/ms/ident/pepxml/search_database.rb +49 -0
- data/lib/ms/ident/pepxml/search_hit/modification_info.rb +79 -0
- data/lib/ms/ident/pepxml/search_hit.rb +144 -0
- data/lib/ms/ident/pepxml/search_result.rb +35 -0
- data/lib/ms/ident/pepxml/search_summary.rb +92 -0
- data/lib/ms/ident/pepxml/spectrum_query.rb +85 -0
- data/lib/ms/ident/pepxml.rb +112 -0
- data/lib/ms/ident/protein.rb +33 -0
- data/lib/ms/ident/protein_group.rb +80 -0
- data/lib/ms/ident/search.rb +114 -0
- data/lib/ms/ident.rb +37 -0
- data/lib/ms/isotope/aa.rb +59 -0
- data/lib/ms/mascot.rb +6 -0
- data/lib/ms/mass/aa.rb +79 -0
- data/lib/ms/mass.rb +55 -0
- data/lib/ms/mzml/index_list.rb +98 -0
- data/lib/ms/mzml/plms1.rb +34 -0
- data/lib/ms/mzml.rb +197 -0
- data/lib/ms/obo.rb +38 -0
- data/lib/ms/plms1.rb +156 -0
- data/lib/ms/quant/qspec/protein_group_comparison.rb +22 -0
- data/lib/ms/quant/qspec.rb +112 -0
- data/lib/ms/spectrum.rb +154 -8
- data/lib/ms.rb +3 -10
- data/lib/msplat.rb +2 -0
- data/lib/obo/ims.rb +5 -0
- data/lib/obo/ms.rb +7 -0
- data/lib/obo/ontology.rb +41 -0
- data/lib/obo/unit.rb +5 -0
- data/lib/openany.rb +23 -0
- data/lib/write_file_or_string.rb +18 -0
- data/obo/ims.obo +562 -0
- data/obo/ms.obo +11677 -0
- data/obo/unit.obo +2563 -0
- data/spec/ms/cvlist_spec.rb +60 -0
- data/spec/ms/digester_spec.rb +351 -0
- data/spec/ms/fasta_spec.rb +100 -0
- data/spec/ms/ident/peptide/db_spec.rb +108 -0
- data/spec/ms/ident/pepxml/sample_enzyme_spec.rb +181 -0
- data/spec/ms/ident/pepxml/search_hit/modification_info_spec.rb +37 -0
- data/spec/ms/ident/pepxml_spec.rb +442 -0
- data/spec/ms/ident/protein_group_spec.rb +68 -0
- data/spec/ms/mass_spec.rb +8 -0
- data/spec/ms/mzml/index_list_spec.rb +122 -0
- data/spec/ms/mzml/plms1_spec.rb +62 -0
- data/spec/ms/mzml_spec.rb +50 -0
- data/spec/ms/plms1_spec.rb +38 -0
- data/spec/ms/quant/qspec_spec.rb +25 -0
- data/spec/msplat_spec.rb +24 -0
- data/spec/obo_spec.rb +25 -0
- data/spec/spec_helper.rb +25 -0
- data/spec/testfiles/ms/ident/peptide/db/uni_11_sp_tr.fasta +69 -0
- data/spec/testfiles/ms/ident/peptide/db/uni_11_sp_tr.msd_clvg2.min_aaseq4.yml +728 -0
- data/spec/testfiles/ms/mzml/j24z.idx_comp.3.mzML +271 -0
- data/spec/testfiles/ms/mzml/openms.noidx_nocomp.12.mzML +330 -0
- data/spec/testfiles/ms/quant/kill_extra_tabs.rb +13 -0
- data/spec/testfiles/ms/quant/max_quant_output.provenance.txt +15 -0
- data/spec/testfiles/ms/quant/max_quant_output.txt +199 -0
- data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv_qspecgp +199 -0
- data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv_qspecgp.csv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_final.txt +199 -0
- data/spec/testfiles/ms/quant/pdcd5_final.txt_qspecgp +0 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.CSV.csv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.csv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.csv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv_qspecgp +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv_qspecgp.csv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.txt +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_tabdel.txt +134 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_tabdel.txt_qspecgp +134 -0
- data/spec/testfiles/ms/quant/remove_rest_of_proteins.rb +13 -0
- data/spec/testfiles/ms/quant/unlog_transform.rb +13 -0
- data/spec/testfiles/plms1/output.key +0 -0
- metadata +157 -40
- data/README +0 -77
- data/changelog.txt +0 -196
- data/lib/ms/calc.rb +0 -32
- data/lib/ms/data/interleaved.rb +0 -60
- data/lib/ms/data/lazy_io.rb +0 -73
- data/lib/ms/data/lazy_string.rb +0 -15
- data/lib/ms/data/simple.rb +0 -59
- data/lib/ms/data/transposed.rb +0 -41
- data/lib/ms/data.rb +0 -57
- data/lib/ms/format/format_error.rb +0 -12
- data/lib/ms/support/binary_search.rb +0 -126
@@ -0,0 +1,181 @@
|
|
1
|
+
|
2
|
+
require 'spec_helper'
|
3
|
+
require 'ms/ident/pepxml/sample_enzyme'
|
4
|
+
require 'nokogiri'
|
5
|
+
|
6
|
+
describe 'creating an MS::Ident::Pepxml::SampleEnzyme' do
|
7
|
+
before do
|
8
|
+
@hash = {
|
9
|
+
:name => 'trypsin',
|
10
|
+
:cut => 'KR',
|
11
|
+
:no_cut => 'P',
|
12
|
+
:sense => 'C',
|
13
|
+
}
|
14
|
+
end
|
15
|
+
it 'can be set by a known enzyme name' do
|
16
|
+
se = MS::Ident::Pepxml::SampleEnzyme.new('trypsin')
|
17
|
+
@hash.each do |k,v|
|
18
|
+
se.send(k).should == v
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'can be set manually with a hash' do
|
23
|
+
se = MS::Ident::Pepxml::SampleEnzyme.new(@hash)
|
24
|
+
@hash.each do |k,v|
|
25
|
+
se.send(k).should == v
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
describe 'an MS::Ident::Pepxml::SampleEnzyme' do
|
31
|
+
before do
|
32
|
+
@sample_enzyme = MS::Ident::Pepxml::SampleEnzyme.new(:name=>'trypsin',:cut=>'KR',:no_cut=>'P',:sense=>'C')
|
33
|
+
end
|
34
|
+
it 'generates a valid xml fragment' do
|
35
|
+
string = @sample_enzyme.to_xml
|
36
|
+
string.is_a?(String).should == true
|
37
|
+
string.should match(/<sample_enzyme name="trypsin"/)
|
38
|
+
string.should match(/<specificity/)
|
39
|
+
%w(cut="KR" no_cut="P" sense="C").each {|re| string.should match(/#{re}/) }
|
40
|
+
!string.include?('version').should == true
|
41
|
+
end
|
42
|
+
it 'adds to an xml builder object' do
|
43
|
+
builder = Nokogiri::XML::Builder.new
|
44
|
+
after = @sample_enzyme.to_xml(builder)
|
45
|
+
after.is_a?(Nokogiri::XML::Builder).should == true
|
46
|
+
after.should == builder
|
47
|
+
after.to_xml.is_a?(String).should == true
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
describe 'an MS::Ident::Pepxml::SampleEnzyme making enzyme digestion calculations' do
|
52
|
+
before do
|
53
|
+
@full_KRP = MS::Ident::Pepxml::SampleEnzyme.new(
|
54
|
+
:name => 'trypsin',
|
55
|
+
:cut => 'KR',
|
56
|
+
:no_cut => 'P',
|
57
|
+
:sense => 'C',
|
58
|
+
)
|
59
|
+
@just_KR = MS::Ident::Pepxml::SampleEnzyme.new(
|
60
|
+
:name => 'trypsin',
|
61
|
+
:cut => 'KR',
|
62
|
+
:no_cut => '',
|
63
|
+
:sense => 'C',
|
64
|
+
)
|
65
|
+
end
|
66
|
+
|
67
|
+
it 'calculates the number of tolerant termini' do
|
68
|
+
exp = [{
|
69
|
+
# full KR/P
|
70
|
+
%w(K EPTIDR E) => 2,
|
71
|
+
%w(K PEPTIDR E) => 1,
|
72
|
+
%w(F EEPTIDR E) => 1,
|
73
|
+
%w(F PEPTIDW R) => 0,
|
74
|
+
},
|
75
|
+
{
|
76
|
+
# just KR
|
77
|
+
%w(K EPTIDR E) => 2,
|
78
|
+
%w(K PEPTIDR E) => 2,
|
79
|
+
%w(F EEPTIDR E) => 1,
|
80
|
+
%w(F PEPTIDW R) => 0,
|
81
|
+
}
|
82
|
+
]
|
83
|
+
sample_enzyme_ar = [@full_KRP, @just_KR]
|
84
|
+
sample_enzyme_ar.zip(exp) do |sample_enzyme,hash|
|
85
|
+
hash.each do |seq, val|
|
86
|
+
sample_enzyme.num_tol_term(*seq).should == val
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
it 'calculates number of missed cleavages' do
|
92
|
+
exp = [{
|
93
|
+
"EPTIDR" => 0,
|
94
|
+
"PEPTIDR" => 0,
|
95
|
+
"EEPTIDR" => 0,
|
96
|
+
"PEPTIDW" => 0,
|
97
|
+
"PERPTIDW" => 0,
|
98
|
+
"PEPKPTIDW" => 0,
|
99
|
+
"PEPKTIDW" => 1,
|
100
|
+
"RTTIDR" => 1,
|
101
|
+
"RTTIKK" => 2,
|
102
|
+
"PKEPRTIDW" => 2,
|
103
|
+
"PKEPRTIDKP" => 2,
|
104
|
+
"PKEPRAALKPEERPTIDKW" => 3,
|
105
|
+
},
|
106
|
+
{
|
107
|
+
"EPTIDR" => 0,
|
108
|
+
"PEPTIDR" => 0,
|
109
|
+
"EEPTIDR" => 0,
|
110
|
+
"PEPTIDW" => 0,
|
111
|
+
"PERPTIDW" => 1,
|
112
|
+
"PEPKPTIDW" => 1,
|
113
|
+
"PEPKTIDW" => 1,
|
114
|
+
"RTTIDR" => 1,
|
115
|
+
"RTTIKK" => 2,
|
116
|
+
"PKEPRTIDW" => 2,
|
117
|
+
"PKEPRTIDKP" => 3,
|
118
|
+
"PKEPRAALKPEERPTIDKW" => 5,
|
119
|
+
}
|
120
|
+
]
|
121
|
+
|
122
|
+
sample_enzyme_ar = [@full_KRP, @just_KR]
|
123
|
+
sample_enzyme_ar.zip(exp) do |sample_enzyme, hash|
|
124
|
+
hash.each do |aaseq, val|
|
125
|
+
sample_enzyme.num_missed_cleavages(aaseq).should == val
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
#xdescribe 'read in from an xml node' do
|
132
|
+
# # placeholder until written
|
133
|
+
#end
|
134
|
+
|
135
|
+
### DOES this kind of functionality belong in this kind of container????
|
136
|
+
### SHOULD it be with ms-enzyme or ms-in_silico ???????
|
137
|
+
|
138
|
+
=begin
|
139
|
+
require 'set'
|
140
|
+
|
141
|
+
describe 'MS::Ident::Pepxml::SampleEnzyme digesting sequences' do
|
142
|
+
it 'can digest with no missed cleavages' do
|
143
|
+
st = "CRGATKKTAGRPMEK"
|
144
|
+
SampleEnzyme.tryptic(st).should == %w(CR GATK K TAGRPMEK)
|
145
|
+
st = "CATRP"
|
146
|
+
SampleEnzyme.tryptic(st).should == %w(CATRP)
|
147
|
+
st = "RCATRP"
|
148
|
+
SampleEnzyme.tryptic(st).should == %w(R CATRP)
|
149
|
+
st = ""
|
150
|
+
SampleEnzyme.tryptic(st).should == []
|
151
|
+
st = "R"
|
152
|
+
SampleEnzyme.tryptic(st).should == %w(R)
|
153
|
+
end
|
154
|
+
|
155
|
+
it 'can digest with missed cleavages' do
|
156
|
+
st = "CRGATKKTAGRPMEKLLLERTKY"
|
157
|
+
zero = %w(CR GATK K TAGRPMEK LLLER TK Y)
|
158
|
+
SampleEnzyme.tryptic(st,0).to_set.should == zero.to_set
|
159
|
+
one = %w(CRGATK GATKK KTAGRPMEK TAGRPMEKLLLER LLLERTK TKY)
|
160
|
+
SampleEnzyme.tryptic(st,1).to_set.should == (zero+one).to_set
|
161
|
+
two = %w(CRGATKK GATKKTAGRPMEK KTAGRPMEKLLLER TAGRPMEKLLLERTK LLLERTKY)
|
162
|
+
all = zero + one + two
|
163
|
+
SampleEnzyme.tryptic(st,2).to_set.should == all.to_set
|
164
|
+
end
|
165
|
+
|
166
|
+
it 'contains duplicates IF there are duplicate tryptic sequences' do
|
167
|
+
st = "AAAAKCCCCKDDDDKCCCCK"
|
168
|
+
peps = SampleEnzyme.new('trypsin').digest(st, 2)
|
169
|
+
peps.select {|aaseq| aaseq == 'CCCCK'}.size.should == 2
|
170
|
+
end
|
171
|
+
|
172
|
+
end
|
173
|
+
|
174
|
+
describe SampleEnzyme, 'making enzyme calculations on sequences and aaseqs' do
|
175
|
+
|
176
|
+
|
177
|
+
end
|
178
|
+
=end
|
179
|
+
|
180
|
+
|
181
|
+
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
require 'ms/ident/pepxml/search_hit/modification_info'
|
4
|
+
|
5
|
+
describe 'MS::Ident::Pepxml::SearchHit::ModificationInfo' do
|
6
|
+
|
7
|
+
before do
|
8
|
+
modaaobjs = [[3, 150.3], [6, 345.2]].map do |ar|
|
9
|
+
MS::Ident::Pepxml::SearchHit::ModificationInfo::ModAminoacidMass.new(*ar)
|
10
|
+
end
|
11
|
+
hash = {
|
12
|
+
:mod_nterm_mass => 520.2,
|
13
|
+
:modified_peptide => "MOD*IFI^E&D",
|
14
|
+
:mod_aminoacid_masses => modaaobjs,
|
15
|
+
}
|
16
|
+
#answ = "<modification_info mod_nterm_mass=\"520.2\" modified_peptide=\"MOD*IFI^E&D\">\n\t<mod_aminoacid_mass position=\"3\" mass=\"150.3\"/>\n\t<mod_aminoacid_mass position=\"6\" mass=\"345.2\"/>\n</modification_info>\n"
|
17
|
+
@obj = MS::Ident::Pepxml::SearchHit::ModificationInfo.new(hash)
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'can produce valid pepxml xml' do
|
21
|
+
to_match = ['<modification_info',
|
22
|
+
' mod_nterm_mass="520.2"',
|
23
|
+
" modified_peptide=\"MOD*IFI^E&D\"",
|
24
|
+
"<mod_aminoacid_mass",
|
25
|
+
" position=\"3\"",
|
26
|
+
" mass=\"150.3\"",
|
27
|
+
" position=\"6\"",
|
28
|
+
" mass=\"345.2\"",
|
29
|
+
"</modification_info>"]
|
30
|
+
string = @obj.to_xml
|
31
|
+
to_match.each do |re|
|
32
|
+
string.should match(Regexp.new(Regexp.escape(re)))
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
|
@@ -0,0 +1,442 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
require 'ms/mass'
|
4
|
+
require 'ms/mass/aa'
|
5
|
+
require 'ms/ident/pepxml'
|
6
|
+
require 'ms/ident/pepxml/modifications'
|
7
|
+
require 'ms/ident/pepxml/spectrum_query'
|
8
|
+
require 'ms/ident/pepxml/search_result'
|
9
|
+
require 'ms/ident/pepxml/search_hit'
|
10
|
+
require 'ms/ident/pepxml/search_hit/modification_info'
|
11
|
+
|
12
|
+
describe "creating an MS::Ident::Pepxml" do
|
13
|
+
include MS::Ident
|
14
|
+
|
15
|
+
it "can be creating in a nested fashion reflecting internal structure" do
|
16
|
+
tags_that_should_be_present = %w(msms_pipeline_analysis msms_run_summary sample_enzyme search_summary spectrum_query search_result search_hit modification_info mod_aminoacid_mass search_score)
|
17
|
+
|
18
|
+
pepxml = Pepxml.new do |msms_pipeline_analysis|
|
19
|
+
msms_pipeline_analysis.merge!(:summary_xml => "020.xml") do |msms_run_summary|
|
20
|
+
# prep the sample enzyme and search_summary
|
21
|
+
msms_run_summary.merge!(
|
22
|
+
:base_name => '/home/jtprince/dev/mspire/020',
|
23
|
+
:ms_manufacturer => 'Thermo',
|
24
|
+
:ms_model => 'LTQ Orbitrap',
|
25
|
+
:ms_ionization => 'ESI',
|
26
|
+
:ms_mass_analyzer => 'Ion Trap',
|
27
|
+
:ms_detector => 'UNKNOWN'
|
28
|
+
) do |sample_enzyme, search_summary, spectrum_queries|
|
29
|
+
sample_enzyme.merge!(:name=>'Trypsin',:cut=>'KR',:no_cut=>'P',:sense=>'C')
|
30
|
+
search_summary.merge!(
|
31
|
+
:base_name=>'/path/to/file/020',
|
32
|
+
:search_engine => 'SEQUEST',
|
33
|
+
:precursor_mass_type =>'monoisotopic',
|
34
|
+
:fragment_mass_type => 'average'
|
35
|
+
) do |search_database, enzymatic_search_constraint, modifications, parameters|
|
36
|
+
search_database.merge!(:local_path => '/path/to/db.fasta', :seq_type => 'AA') # note seq_type == type
|
37
|
+
enzymatic_search_constraint.merge!(
|
38
|
+
:enzyme => 'Trypsin',
|
39
|
+
:max_num_internal_cleavages => 2,
|
40
|
+
:min_number_termini => 2
|
41
|
+
)
|
42
|
+
modifications << Pepxml::AminoacidModification.new(
|
43
|
+
:aminoacid => 'M', :massdiff => 15.9994, :mass => MS::Mass::AA::MONO['M']+15.9994,
|
44
|
+
:variable => 'Y', :symbol => '*')
|
45
|
+
# invented, for example, a protein terminating mod
|
46
|
+
modifications << Pepxml::TerminalModification.new(
|
47
|
+
:terminus => 'c', :massdiff => 23.3333, :mass => MS::Mass::MONO['oh'] + 23.3333,
|
48
|
+
:variable => 'Y', :symbol => '[', :protein_terminus => 'c',
|
49
|
+
:description => 'leave protein_terminus off if not protein mod'
|
50
|
+
)
|
51
|
+
modifications << Pepxml::TerminalModification.new(
|
52
|
+
:terminus => 'c', :massdiff => 25.42322, :mass => MS::Mass::MONO['h+'] + 25.42322,
|
53
|
+
:variable => 'N', :symbol => ']', :description => 'example: c term mod'
|
54
|
+
)
|
55
|
+
parameters.merge!(
|
56
|
+
:fragment_ion_tolerance => 1.0000,
|
57
|
+
:digest_mass_range => '600.0 3500.0',
|
58
|
+
:enzyme_info => 'Trypsin(KR/P) 1 1 KR P', # etc....
|
59
|
+
)
|
60
|
+
end
|
61
|
+
spectrum_query1 = Pepxml::SpectrumQuery.new(
|
62
|
+
:spectrum => '020.3.3.1', :start_scan => 3, :end_scan => 3,
|
63
|
+
:precursor_neutral_mass => 1120.93743421875, :assumed_charge => 1
|
64
|
+
) do |search_results|
|
65
|
+
search_result1 = Pepxml::SearchResult.new do |search_hits|
|
66
|
+
modpositions = [[1, 243.1559], [6, 167.0581], [7,181.085]].map do |pair|
|
67
|
+
Pepxml::SearchHit::ModificationInfo::ModAminoacidMass.new(*pair)
|
68
|
+
end
|
69
|
+
# order(modified_peptide, mod_aminoacid_masses, :mod_nterm_mass, :mod_cterm_mass)
|
70
|
+
# or can be set by hash
|
71
|
+
mod_info = Pepxml::SearchHit::ModificationInfo.new('Y#RLGGS#T#K', modpositions)
|
72
|
+
search_hit1 = Pepxml::SearchHit.new(
|
73
|
+
:hit_rank=>1, :peptide=>'YRLGGSTK', :peptide_prev_aa => "R", :peptide_next_aa => "K",
|
74
|
+
:protein => "gi|16130113|ref|NP_416680.1|", :num_tot_proteins => 1, :num_matched_ions => 5,
|
75
|
+
:tot_num_ions => 35, :calc_neutral_pep_mass => 1120.93163442, :massdiff => 0.00579979875010395,
|
76
|
+
:num_tol_term => 2, :num_missed_cleavages => 1, :is_rejected => 0,
|
77
|
+
:modification_info => mod_info) do |search_scores|
|
78
|
+
search_scores.merge!(:xcorr => 0.12346, :deltacn => 0.7959, :deltacnstar => 0,
|
79
|
+
:spscore => 29.85, :sprank => 1)
|
80
|
+
end
|
81
|
+
search_hits << search_hit1
|
82
|
+
end
|
83
|
+
search_results << search_result1
|
84
|
+
end
|
85
|
+
spectrum_queries << spectrum_query1
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
xml = pepxml.to_xml
|
90
|
+
tags_that_should_be_present.each do |tag|
|
91
|
+
xml.should match(/<#{tag} ?/)
|
92
|
+
end
|
93
|
+
xml.should match( /<\?xml version="1.0" encoding="UTF-8"\?>/ )
|
94
|
+
xml.should match( %r{<\?xml-stylesheet type="text/xsl" href="/tools/bin/TPP/tpp/schema/pepXML_std.xsl"\?>} )
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
=begin
|
99
|
+
# splits string on ' 'and matches the line found by find_line_regexp in
|
100
|
+
# lines
|
101
|
+
def match_modline_pieces(lines, find_line_regexp, string)
|
102
|
+
pieces = string.split(' ').map {|v| /#{Regexp.escape(v)}/ }
|
103
|
+
lines.each do |line|
|
104
|
+
if line =~ find_line_regexp
|
105
|
+
pieces.each do |piece|
|
106
|
+
line.should =~ piece
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
|
113
|
+
it 'gets modifications right in real run' do
|
114
|
+
@out_files.each do |fn|
|
115
|
+
fn.exist_as_a_file?.should be_true
|
116
|
+
beginning = IO.read(fn)
|
117
|
+
lines = beginning.split("\n")
|
118
|
+
[
|
119
|
+
[/aminoacid="M"/, '<aminoacid_modification symbol="*" massdiff="+15.9994" aminoacid="M" variable="Y" binary="N" mass="147.192"'],
|
120
|
+
|
121
|
+
[/aminoacid="S"/, '<aminoacid_modification symbol="#" massdiff="+79.9799" aminoacid="S" variable="Y" binary="N" mass="167.0581"'],
|
122
|
+
[/aminoacid="T"/, '<aminoacid_modification symbol="#" massdiff="+79.9799" aminoacid="T" variable="Y" binary="N" mass="181.085"'],
|
123
|
+
[/aminoacid="Y"/, '<aminoacid_modification symbol="#" massdiff="+79.9799" aminoacid="Y" variable="Y" binary="N" mass="243.1559"'],
|
124
|
+
[/parameter name="diff_search_options"/, '<parameter name="diff_search_options" value="15.999400 M 79.979900 STY 0.000000 M 0.000000 X 0.000000 T 0.000000 Y"/>'],
|
125
|
+
].each do |a,b|
|
126
|
+
match_modline_pieces(lines, a, b)
|
127
|
+
end
|
128
|
+
[
|
129
|
+
'<modification_info modified_peptide="Y#RLGGS#T#K">',
|
130
|
+
'<mod_aminoacid_mass position="1" mass="243.1559"/>',
|
131
|
+
'<mod_aminoacid_mass position="7" mass="167.0581"/>',
|
132
|
+
'</modification_info>',
|
133
|
+
'<mod_aminoacid_mass position="9" mass="181.085"/>'
|
134
|
+
].each do |line|
|
135
|
+
beginning.should =~ /#{Regexp.escape(line)}/ # "a modification info for a peptide")
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
|
143
|
+
|
144
|
+
=begin
|
145
|
+
describe "MS::Ident::Pepxml created from small bioworks.xml" do
|
146
|
+
|
147
|
+
spec_large do
|
148
|
+
before(:all) do
|
149
|
+
tf_mzxml_path = Tfiles_l + "/yeast_gly_mzXML"
|
150
|
+
|
151
|
+
tf_params = Tfiles + "/bioworks32.params"
|
152
|
+
tf_bioworks_xml = Tfiles + "/bioworks_small.xml"
|
153
|
+
out_path = Tfiles
|
154
|
+
@pepxml_objs = Sequest::Pepxml.set_from_bioworks(tf_bioworks_xml, :params => tf_params, :ms_data => tf_mzxml_path, :out_path => out_path)
|
155
|
+
end
|
156
|
+
|
157
|
+
it 'gets some spectrum queries' do
|
158
|
+
@pepxml_objs.each do |obj|
|
159
|
+
(obj.spectrum_queries.size > 2).should be_true
|
160
|
+
(obj.spectrum_queries.first.search_results.first.search_hits.size > 0).should be_true
|
161
|
+
end
|
162
|
+
#@pepxml_objs.each do |pep| puts pep.to_pepxml end
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
|
168
|
+
|
169
|
+
describe Sequest::Pepxml, " created from large bioworks.xml" do
|
170
|
+
# assert_equal_by_pairs (really any old array)
|
171
|
+
def assert_equal_pairs(obj, arrs)
|
172
|
+
arrs.each do |arr|
|
173
|
+
#if obj.send(arr[1]) != arr[0]
|
174
|
+
# puts "HELLO"
|
175
|
+
# puts "OBJ answer"
|
176
|
+
# p obj.send(arr[1])
|
177
|
+
# puts "ar0"
|
178
|
+
# p arr[0]
|
179
|
+
# puts "ar1"
|
180
|
+
# p arr[1]
|
181
|
+
#end
|
182
|
+
if arr[0].is_a? Float
|
183
|
+
obj.send(arr[1]).should be_close(arr[0], 0.0000000001)
|
184
|
+
else
|
185
|
+
obj.send(arr[1]).should == arr[0]
|
186
|
+
end
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
#swap the first to guys first
|
191
|
+
def assert_equal_pairs_swapped(obj, arrs)
|
192
|
+
arrs.each do |arr|
|
193
|
+
arr[0], arr[1] = arr[1], arr[0]
|
194
|
+
end
|
195
|
+
assert_equal_pairs(obj, arrs)
|
196
|
+
end
|
197
|
+
|
198
|
+
spec_large do
|
199
|
+
before(:all) do
|
200
|
+
st = Time.new
|
201
|
+
params = Tfiles + "/opd1/sequest.3.2.params"
|
202
|
+
bioworks_xml = Tfiles_l + "/opd1/bioworks.000.oldparams.xml"
|
203
|
+
mzxml_path = Tfiles_l + "/opd1"
|
204
|
+
out_path = Tfiles
|
205
|
+
@pepxml_version = 18
|
206
|
+
@pepxml_objs = Sequest::Pepxml.set_from_bioworks_xml(bioworks_xml, params, {:ms_data => mzxml_path, :out_path => out_path, :pepxml_version => @pepxml_version})
|
207
|
+
puts "- takes #{Time.new - st} secs"
|
208
|
+
end
|
209
|
+
|
210
|
+
it 'extracts MSMSPipelineAnalysis' do
|
211
|
+
######## HMMMMM...
|
212
|
+
Sequest::Pepxml.pepxml_version.should == @pepxml_version
|
213
|
+
|
214
|
+
# MSMSPipelineAnalysis
|
215
|
+
po = @pepxml_objs.first
|
216
|
+
msms_pipeline = po.msms_pipeline_analysis
|
217
|
+
msms_pipeline.xmlns.should == 'http://regis-web.systemsbiology.net/pepXML'
|
218
|
+
msms_pipeline.xmlns_xsi.should == 'http://www.w3.org/2001/XMLSchema-instance'
|
219
|
+
msms_pipeline.xsi_schema_location.should == 'http://regis-web.systemsbiology.net/pepXML /tools/bin/TPP/tpp/schema/pepXML_v18.xsd'
|
220
|
+
msms_pipeline.summary_xml.should == '000.xml'
|
221
|
+
end
|
222
|
+
|
223
|
+
it 'extracts MSmSRunSummary' do
|
224
|
+
# MSMSRunSummary
|
225
|
+
rs = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary
|
226
|
+
rs.base_name.should =~ /\/000/
|
227
|
+
assert_equal_pairs(rs, [ ['ThermoFinnigan', :ms_manufacturer], ['LCQ Deca XP Plus', :ms_model], ['ESI', :ms_ionization], ['Ion Trap', :ms_mass_analyzer], ['UNKNOWN', :ms_detector], ['raw', :raw_data_type], ['.mzXML', :raw_data], ])
|
228
|
+
end
|
229
|
+
|
230
|
+
it 'extracts SampleEnzyme' do
|
231
|
+
# SampleEnzyme
|
232
|
+
se = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary.sample_enzyme
|
233
|
+
assert_equal_pairs(se, [ ['Trypsin', :name], ['KR', :cut], [nil, :no_cut], ['C', :sense], ])
|
234
|
+
end
|
235
|
+
|
236
|
+
it 'extracts SearchSummary' do
|
237
|
+
# SearchSummary
|
238
|
+
ss = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary.search_summary
|
239
|
+
ss.is_a?(Sequest::Pepxml::SearchSummary).should be_true
|
240
|
+
ss.base_name.should =~ /\/000/
|
241
|
+
ss.peptide_mass_tol.should =~ /1\.500/
|
242
|
+
assert_equal_pairs_swapped(ss, [ # normal attributes
|
243
|
+
[:search_engine, "SEQUEST"], [:precursor_mass_type, "average"], [:fragment_mass_type, "average"], [:out_data_type, "out"], [:out_data, ".tgz"], [:search_id, "1"],
|
244
|
+
|
245
|
+
# enzymatic_search_constraint
|
246
|
+
[:enzyme, 'Trypsin'], [:max_num_internal_cleavages, '2'], [:min_number_termini, '2'],
|
247
|
+
|
248
|
+
# parameters
|
249
|
+
[:fragment_ion_tol, "1.0000"], [:ion_series, "0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0"], [:max_num_differential_AA_per_mod, "3"], [:nucleotide_reading_frame, "0"], [:num_output_lines, "10"], [:remove_precursor_peak, "0"], [:ion_cutoff_percentage, "0.0000"], [:match_peak_count, "0"], [:match_peak_allowed_error, "1"], [:match_peak_tolerance, "1.0000"], [:protein_mass_filter, "0 0"],
|
250
|
+
])
|
251
|
+
|
252
|
+
end
|
253
|
+
it 'extracts SearchDatabase' do
|
254
|
+
# SearchDatabase
|
255
|
+
sd = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary.search_summary.search_database
|
256
|
+
sd.is_a?(Sequest::Pepxml::SearchDatabase).should be_true
|
257
|
+
assert_equal_pairs_swapped(sd, [ [:local_path, "C:\\Xcalibur\\database\\ecoli_K12.fasta"], [:seq_type, 'AA'], ])
|
258
|
+
end
|
259
|
+
|
260
|
+
it 'returns SpectrumQueries' do
|
261
|
+
# SpectrumQueries
|
262
|
+
sq = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary.spectrum_queries
|
263
|
+
spec = sq.first
|
264
|
+
assert_equal_pairs_swapped(spec, [
|
265
|
+
[:spectrum, "000.100.100.1"], [:start_scan, "100"], [:end_scan, "100"],
|
266
|
+
#[:precursor_neutral_mass, "1074.5920"], # out2summary
|
267
|
+
[:precursor_neutral_mass, 1074.666926], # mine
|
268
|
+
[:assumed_charge, 1], [:index, "1"],
|
269
|
+
])
|
270
|
+
sh = spec.search_results.first.search_hits.first
|
271
|
+
assert_equal_pairs_swapped(sh, [
|
272
|
+
# normal attributes
|
273
|
+
[:hit_rank, 1],
|
274
|
+
[:peptide, "SIYFRNFK"],
|
275
|
+
[:peptide_prev_aa, "R"],
|
276
|
+
[:peptide_next_aa, "G"],
|
277
|
+
[:protein, "gi|16130084|ref|NP_416651.1|"],
|
278
|
+
[:num_tot_proteins, 1],
|
279
|
+
[:num_matched_ions, 4],
|
280
|
+
[:tot_num_ions, 14],
|
281
|
+
#[:calc_neutral_pep_mass, "1074.1920"], # out2summary
|
282
|
+
[:calc_neutral_pep_mass, 1074.23261], # mine
|
283
|
+
#[:massdiff, "+0.400000"], # out2summary
|
284
|
+
[:massdiff, 0.434316000000081], # mine
|
285
|
+
[:num_tol_term, 2], [:num_missed_cleavages, 1], [:is_rejected, 0],
|
286
|
+
|
287
|
+
# search_score
|
288
|
+
[:xcorr, 0.4], [:deltacn, 0.023], [:deltacnstar, "0"], [:spscore, 78.8], [:sprank, 1],
|
289
|
+
])
|
290
|
+
|
291
|
+
spec = sq[1]
|
292
|
+
assert_equal_pairs_swapped(spec, [
|
293
|
+
[:spectrum, "000.1000.1000.1"], [:start_scan, "1000"], [:end_scan, "1000"], #[:precursor_neutral_mass, "663.1920"], # out2summary
|
294
|
+
[:precursor_neutral_mass, 663.206111], # mine
|
295
|
+
[:assumed_charge, 1], [:index, "2"],
|
296
|
+
])
|
297
|
+
|
298
|
+
sh = spec.search_results.first.search_hits.first
|
299
|
+
assert_equal_pairs_swapped(sh, [
|
300
|
+
# normal attributes
|
301
|
+
[:hit_rank, 1], [:peptide, "ALADFK"], [:peptide_prev_aa, "R"], [:peptide_next_aa, "S"], [:protein, "gi|16128765|ref|NP_415318.1|"], [:num_tot_proteins, 1], [:num_matched_ions, 5], [:tot_num_ions, 10],
|
302
|
+
[:num_tol_term, 2], [:num_missed_cleavages, 0], [:is_rejected, 0],
|
303
|
+
#[:massdiff, "-0.600000"], # out2summary
|
304
|
+
[:massdiff, -0.556499000000031], # mine
|
305
|
+
#[:calc_neutral_pep_mass, 663.7920], # out2summary
|
306
|
+
[:calc_neutral_pep_mass, 663.76261], # mine
|
307
|
+
|
308
|
+
# search_score
|
309
|
+
[:xcorr, 0.965], [:deltacn, 0.132], [:deltacnstar, "0"], [:spscore, 81.1], [:sprank, 1],
|
310
|
+
])
|
311
|
+
|
312
|
+
spec = sq[9]
|
313
|
+
assert_equal_pairs_swapped(spec, [
|
314
|
+
[:spectrum, "000.1008.1008.2"], [:start_scan, "1008"], [:end_scan, "1008"], [:assumed_charge, 2],
|
315
|
+
#[:precursor_neutral_mass, "691.0920"], # out2summary
|
316
|
+
[:precursor_neutral_mass, 691.150992], # mine
|
317
|
+
])
|
318
|
+
|
319
|
+
sh = spec.search_results.first.search_hits.first
|
320
|
+
assert_equal_pairs_swapped(sh, [
|
321
|
+
# normal attributes
|
322
|
+
[:hit_rank, 1], [:peptide, "RLFTR"], [:peptide_prev_aa, "R"], [:peptide_next_aa, "A"], [:protein, "gi|16130457|ref|NP_417027.1|"], [:num_tot_proteins, 1], [:num_matched_ions, 5], [:tot_num_ions, 8], [:num_tol_term, 2],
|
323
|
+
|
324
|
+
#[:num_missed_cleavages, "0"], # out2summary misses this!
|
325
|
+
[:num_missed_cleavages, 1],
|
326
|
+
[:is_rejected, 0],
|
327
|
+
#[:calc_neutral_pep_mass, "691.7920"], # out2summary
|
328
|
+
[:calc_neutral_pep_mass, 691.82261], # mine
|
329
|
+
#[:massdiff, "-0.700000"], # out2summary
|
330
|
+
[:massdiff, -0.67161800000008], # mine
|
331
|
+
|
332
|
+
# search_score
|
333
|
+
[:xcorr, 0.903], [:deltacn, 0.333], [:deltacnstar, "0"], [:spscore, 172.8], [:sprank, 1],
|
334
|
+
])
|
335
|
+
end
|
336
|
+
|
337
|
+
it 'can generate correct pepxml file' do
|
338
|
+
|
339
|
+
## IF OUR OBJECT IS CORRECT, THEN WE GET THE OUTPUT:
|
340
|
+
string = @pepxml_objs.first.to_pepxml
|
341
|
+
ans_lines = IO.read(Tfiles + "/opd1/000.my_answer.100lines.xml").split("\n")
|
342
|
+
base_name_re = /base_name=".*?files\//o
|
343
|
+
date_re = /date=".*?"/
|
344
|
+
string.split("\n").each_with_index do |line,i|
|
345
|
+
if i > 99 ; break end
|
346
|
+
ans, exp =
|
347
|
+
if i == 1
|
348
|
+
[line.sub(date_re,''), ans_lines[i].sub(date_re,'')]
|
349
|
+
elsif i == 2
|
350
|
+
[line.sub(base_name_re,''), ans_lines[i].sub(base_name_re, '').sub(/^\s+/, "\t")]
|
351
|
+
elsif i == 6
|
352
|
+
[line.sub(base_name_re,''), ans_lines[i].sub(base_name_re, '').sub(/^\s+/, "\t\t")]
|
353
|
+
else
|
354
|
+
[line, ans_lines[i]]
|
355
|
+
end
|
356
|
+
|
357
|
+
#ans.split('').zip(exp.split('')) do |l,a|
|
358
|
+
# if l != a
|
359
|
+
# puts line
|
360
|
+
# puts ans_lines[i]
|
361
|
+
# puts l
|
362
|
+
# puts a
|
363
|
+
# end
|
364
|
+
#end
|
365
|
+
if ans != exp
|
366
|
+
puts ans
|
367
|
+
puts exp
|
368
|
+
end
|
369
|
+
ans.should == exp
|
370
|
+
#line.sub(base_name_re,'').should == ans_lines[i].sub(base_name_re,'')
|
371
|
+
end
|
372
|
+
end
|
373
|
+
end
|
374
|
+
end
|
375
|
+
|
376
|
+
|
377
|
+
|
378
|
+
describe Sequest::Pepxml::Modifications do
|
379
|
+
before(:each) do
|
380
|
+
tf_params = Tfiles + "/bioworks32.params"
|
381
|
+
@params = Sequest::Params.new(tf_params)
|
382
|
+
# The params object here is completely unnecessary for this test, except
|
383
|
+
# that it sets up the mass table
|
384
|
+
@obj = Sequest::Pepxml::Modifications.new(@params, "(M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000) ")
|
385
|
+
end
|
386
|
+
it 'creates a mod_symbols_hash' do
|
387
|
+
answ = {[:C, 12.0]=>"^", [:S, 80.0]=>"@", [:M, 29.0]=>"#", [:M, 15.9]=>"*", [:ct, 12.33]=>"[", [:nt, 14.2]=>"]"}
|
388
|
+
@obj.mod_symbols_hash.should == answ
|
389
|
+
## need more here
|
390
|
+
end
|
391
|
+
|
392
|
+
it 'creates a ModificationInfo object given a special peptide sequence' do
|
393
|
+
mod_string = "(M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000) "
|
394
|
+
@params.diff_search_options = "15.90000 M 29.00000 M 80.00000 S 12.00000 C"
|
395
|
+
@params.term_diff_search_options = "14.20000 12.33000"
|
396
|
+
mod = Sequest::Pepxml::Modifications.new(@params, mod_string)
|
397
|
+
## no mods
|
398
|
+
peptide = "PEPTIDE"
|
399
|
+
mod.modification_info(peptide).should be_nil
|
400
|
+
peptide = "]M*EC^S@IDM#M*EMSCM["
|
401
|
+
modinfo = mod.modification_info(peptide)
|
402
|
+
modinfo.modified_peptide.should == peptide
|
403
|
+
modinfo.mod_nterm_mass.should be_close(146.40054, 0.000001)
|
404
|
+
modinfo.mod_cterm_mass.should be_close(160.52994, 0.000001)
|
405
|
+
end
|
406
|
+
|
407
|
+
end
|
408
|
+
|
409
|
+
describe Sequest::Pepxml::SearchHit::ModificationInfo do
|
410
|
+
|
411
|
+
before(:each) do
|
412
|
+
modaaobjs = [[3, 150.3], [6, 345.2]].map do |ar|
|
413
|
+
Sequest::Pepxml::SearchHit::ModificationInfo::ModAminoacidMass.new(ar)
|
414
|
+
end
|
415
|
+
hash = {
|
416
|
+
:mod_nterm_mass => 520.2,
|
417
|
+
:modified_peptide => "MOD*IFI^E&D",
|
418
|
+
:mod_aminoacid_masses => modaaobjs,
|
419
|
+
}
|
420
|
+
#answ = "<modification_info mod_nterm_mass=\"520.2\" modified_peptide=\"MOD*IFI^E&D\">\n\t<mod_aminoacid_mass position=\"3\" mass=\"150.3\"/>\n\t<mod_aminoacid_mass position=\"6\" mass=\"345.2\"/>\n</modification_info>\n"
|
421
|
+
@obj = Sequest::Pepxml::SearchHit::ModificationInfo.new(hash)
|
422
|
+
end
|
423
|
+
|
424
|
+
def _re(st)
|
425
|
+
/#{Regexp.escape(st)}/
|
426
|
+
end
|
427
|
+
|
428
|
+
it 'can produce pepxml' do
|
429
|
+
answ = @obj.to_pepxml
|
430
|
+
answ.should =~ _re('<modification_info')
|
431
|
+
answ.should =~ _re(" mod_nterm_mass=\"520.2\"")
|
432
|
+
answ.should =~ _re(" modified_peptide=\"MOD*IFI^E&D\"")
|
433
|
+
answ.should =~ _re("<mod_aminoacid_mass")
|
434
|
+
answ.should =~ _re(" position=\"3\"")
|
435
|
+
answ.should =~ _re(" mass=\"150.3\"")
|
436
|
+
answ.should =~ _re(" position=\"6\"")
|
437
|
+
answ.should =~ _re(" mass=\"345.2\"")
|
438
|
+
answ.should =~ _re("</modification_info>")
|
439
|
+
end
|
440
|
+
end
|
441
|
+
|
442
|
+
=end
|