mspire 0.5.0 → 0.6.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (107) hide show
  1. data/README.rdoc +24 -0
  2. data/Rakefile +51 -0
  3. data/VERSION +1 -0
  4. data/lib/cv/description.rb +18 -0
  5. data/lib/cv/param.rb +33 -0
  6. data/lib/cv.rb +3 -0
  7. data/lib/io/bookmark.rb +13 -0
  8. data/lib/merge.rb +7 -0
  9. data/lib/ms/cvlist.rb +76 -0
  10. data/lib/ms/digester.rb +245 -0
  11. data/lib/ms/fasta.rb +86 -0
  12. data/lib/ms/ident/peptide/db.rb +243 -0
  13. data/lib/ms/ident/peptide.rb +72 -0
  14. data/lib/ms/ident/peptide_hit/qvalue.rb +56 -0
  15. data/lib/ms/ident/peptide_hit.rb +26 -0
  16. data/lib/ms/ident/pepxml/modifications.rb +83 -0
  17. data/lib/ms/ident/pepxml/msms_pipeline_analysis.rb +70 -0
  18. data/lib/ms/ident/pepxml/msms_run_summary.rb +82 -0
  19. data/lib/ms/ident/pepxml/parameters.rb +14 -0
  20. data/lib/ms/ident/pepxml/sample_enzyme.rb +165 -0
  21. data/lib/ms/ident/pepxml/search_database.rb +49 -0
  22. data/lib/ms/ident/pepxml/search_hit/modification_info.rb +79 -0
  23. data/lib/ms/ident/pepxml/search_hit.rb +144 -0
  24. data/lib/ms/ident/pepxml/search_result.rb +35 -0
  25. data/lib/ms/ident/pepxml/search_summary.rb +92 -0
  26. data/lib/ms/ident/pepxml/spectrum_query.rb +85 -0
  27. data/lib/ms/ident/pepxml.rb +112 -0
  28. data/lib/ms/ident/protein.rb +33 -0
  29. data/lib/ms/ident/protein_group.rb +80 -0
  30. data/lib/ms/ident/search.rb +114 -0
  31. data/lib/ms/ident.rb +37 -0
  32. data/lib/ms/isotope/aa.rb +59 -0
  33. data/lib/ms/mascot.rb +6 -0
  34. data/lib/ms/mass/aa.rb +79 -0
  35. data/lib/ms/mass.rb +55 -0
  36. data/lib/ms/mzml/index_list.rb +98 -0
  37. data/lib/ms/mzml/plms1.rb +34 -0
  38. data/lib/ms/mzml.rb +197 -0
  39. data/lib/ms/obo.rb +38 -0
  40. data/lib/ms/plms1.rb +156 -0
  41. data/lib/ms/quant/qspec/protein_group_comparison.rb +22 -0
  42. data/lib/ms/quant/qspec.rb +112 -0
  43. data/lib/ms/spectrum.rb +154 -8
  44. data/lib/ms.rb +3 -10
  45. data/lib/msplat.rb +2 -0
  46. data/lib/obo/ims.rb +5 -0
  47. data/lib/obo/ms.rb +7 -0
  48. data/lib/obo/ontology.rb +41 -0
  49. data/lib/obo/unit.rb +5 -0
  50. data/lib/openany.rb +23 -0
  51. data/lib/write_file_or_string.rb +18 -0
  52. data/obo/ims.obo +562 -0
  53. data/obo/ms.obo +11677 -0
  54. data/obo/unit.obo +2563 -0
  55. data/spec/ms/cvlist_spec.rb +60 -0
  56. data/spec/ms/digester_spec.rb +351 -0
  57. data/spec/ms/fasta_spec.rb +100 -0
  58. data/spec/ms/ident/peptide/db_spec.rb +108 -0
  59. data/spec/ms/ident/pepxml/sample_enzyme_spec.rb +181 -0
  60. data/spec/ms/ident/pepxml/search_hit/modification_info_spec.rb +37 -0
  61. data/spec/ms/ident/pepxml_spec.rb +442 -0
  62. data/spec/ms/ident/protein_group_spec.rb +68 -0
  63. data/spec/ms/mass_spec.rb +8 -0
  64. data/spec/ms/mzml/index_list_spec.rb +122 -0
  65. data/spec/ms/mzml/plms1_spec.rb +62 -0
  66. data/spec/ms/mzml_spec.rb +50 -0
  67. data/spec/ms/plms1_spec.rb +38 -0
  68. data/spec/ms/quant/qspec_spec.rb +25 -0
  69. data/spec/msplat_spec.rb +24 -0
  70. data/spec/obo_spec.rb +25 -0
  71. data/spec/spec_helper.rb +25 -0
  72. data/spec/testfiles/ms/ident/peptide/db/uni_11_sp_tr.fasta +69 -0
  73. data/spec/testfiles/ms/ident/peptide/db/uni_11_sp_tr.msd_clvg2.min_aaseq4.yml +728 -0
  74. data/spec/testfiles/ms/mzml/j24z.idx_comp.3.mzML +271 -0
  75. data/spec/testfiles/ms/mzml/openms.noidx_nocomp.12.mzML +330 -0
  76. data/spec/testfiles/ms/quant/kill_extra_tabs.rb +13 -0
  77. data/spec/testfiles/ms/quant/max_quant_output.provenance.txt +15 -0
  78. data/spec/testfiles/ms/quant/max_quant_output.txt +199 -0
  79. data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv +199 -0
  80. data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv_qspecgp +199 -0
  81. data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv_qspecgp.csv +199 -0
  82. data/spec/testfiles/ms/quant/pdcd5_final.txt +199 -0
  83. data/spec/testfiles/ms/quant/pdcd5_final.txt_qspecgp +0 -0
  84. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.CSV.csv +199 -0
  85. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.csv +199 -0
  86. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.csv +199 -0
  87. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv +199 -0
  88. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv_qspecgp +199 -0
  89. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv_qspecgp.csv +199 -0
  90. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.txt +199 -0
  91. data/spec/testfiles/ms/quant/pdcd5_lfq_tabdel.txt +134 -0
  92. data/spec/testfiles/ms/quant/pdcd5_lfq_tabdel.txt_qspecgp +134 -0
  93. data/spec/testfiles/ms/quant/remove_rest_of_proteins.rb +13 -0
  94. data/spec/testfiles/ms/quant/unlog_transform.rb +13 -0
  95. data/spec/testfiles/plms1/output.key +0 -0
  96. metadata +157 -40
  97. data/README +0 -77
  98. data/changelog.txt +0 -196
  99. data/lib/ms/calc.rb +0 -32
  100. data/lib/ms/data/interleaved.rb +0 -60
  101. data/lib/ms/data/lazy_io.rb +0 -73
  102. data/lib/ms/data/lazy_string.rb +0 -15
  103. data/lib/ms/data/simple.rb +0 -59
  104. data/lib/ms/data/transposed.rb +0 -41
  105. data/lib/ms/data.rb +0 -57
  106. data/lib/ms/format/format_error.rb +0 -12
  107. data/lib/ms/support/binary_search.rb +0 -126
@@ -0,0 +1,181 @@
1
+
2
+ require 'spec_helper'
3
+ require 'ms/ident/pepxml/sample_enzyme'
4
+ require 'nokogiri'
5
+
6
+ describe 'creating an MS::Ident::Pepxml::SampleEnzyme' do
7
+ before do
8
+ @hash = {
9
+ :name => 'trypsin',
10
+ :cut => 'KR',
11
+ :no_cut => 'P',
12
+ :sense => 'C',
13
+ }
14
+ end
15
+ it 'can be set by a known enzyme name' do
16
+ se = MS::Ident::Pepxml::SampleEnzyme.new('trypsin')
17
+ @hash.each do |k,v|
18
+ se.send(k).should == v
19
+ end
20
+ end
21
+
22
+ it 'can be set manually with a hash' do
23
+ se = MS::Ident::Pepxml::SampleEnzyme.new(@hash)
24
+ @hash.each do |k,v|
25
+ se.send(k).should == v
26
+ end
27
+ end
28
+ end
29
+
30
+ describe 'an MS::Ident::Pepxml::SampleEnzyme' do
31
+ before do
32
+ @sample_enzyme = MS::Ident::Pepxml::SampleEnzyme.new(:name=>'trypsin',:cut=>'KR',:no_cut=>'P',:sense=>'C')
33
+ end
34
+ it 'generates a valid xml fragment' do
35
+ string = @sample_enzyme.to_xml
36
+ string.is_a?(String).should == true
37
+ string.should match(/<sample_enzyme name="trypsin"/)
38
+ string.should match(/<specificity/)
39
+ %w(cut="KR" no_cut="P" sense="C").each {|re| string.should match(/#{re}/) }
40
+ !string.include?('version').should == true
41
+ end
42
+ it 'adds to an xml builder object' do
43
+ builder = Nokogiri::XML::Builder.new
44
+ after = @sample_enzyme.to_xml(builder)
45
+ after.is_a?(Nokogiri::XML::Builder).should == true
46
+ after.should == builder
47
+ after.to_xml.is_a?(String).should == true
48
+ end
49
+ end
50
+
51
+ describe 'an MS::Ident::Pepxml::SampleEnzyme making enzyme digestion calculations' do
52
+ before do
53
+ @full_KRP = MS::Ident::Pepxml::SampleEnzyme.new(
54
+ :name => 'trypsin',
55
+ :cut => 'KR',
56
+ :no_cut => 'P',
57
+ :sense => 'C',
58
+ )
59
+ @just_KR = MS::Ident::Pepxml::SampleEnzyme.new(
60
+ :name => 'trypsin',
61
+ :cut => 'KR',
62
+ :no_cut => '',
63
+ :sense => 'C',
64
+ )
65
+ end
66
+
67
+ it 'calculates the number of tolerant termini' do
68
+ exp = [{
69
+ # full KR/P
70
+ %w(K EPTIDR E) => 2,
71
+ %w(K PEPTIDR E) => 1,
72
+ %w(F EEPTIDR E) => 1,
73
+ %w(F PEPTIDW R) => 0,
74
+ },
75
+ {
76
+ # just KR
77
+ %w(K EPTIDR E) => 2,
78
+ %w(K PEPTIDR E) => 2,
79
+ %w(F EEPTIDR E) => 1,
80
+ %w(F PEPTIDW R) => 0,
81
+ }
82
+ ]
83
+ sample_enzyme_ar = [@full_KRP, @just_KR]
84
+ sample_enzyme_ar.zip(exp) do |sample_enzyme,hash|
85
+ hash.each do |seq, val|
86
+ sample_enzyme.num_tol_term(*seq).should == val
87
+ end
88
+ end
89
+ end
90
+
91
+ it 'calculates number of missed cleavages' do
92
+ exp = [{
93
+ "EPTIDR" => 0,
94
+ "PEPTIDR" => 0,
95
+ "EEPTIDR" => 0,
96
+ "PEPTIDW" => 0,
97
+ "PERPTIDW" => 0,
98
+ "PEPKPTIDW" => 0,
99
+ "PEPKTIDW" => 1,
100
+ "RTTIDR" => 1,
101
+ "RTTIKK" => 2,
102
+ "PKEPRTIDW" => 2,
103
+ "PKEPRTIDKP" => 2,
104
+ "PKEPRAALKPEERPTIDKW" => 3,
105
+ },
106
+ {
107
+ "EPTIDR" => 0,
108
+ "PEPTIDR" => 0,
109
+ "EEPTIDR" => 0,
110
+ "PEPTIDW" => 0,
111
+ "PERPTIDW" => 1,
112
+ "PEPKPTIDW" => 1,
113
+ "PEPKTIDW" => 1,
114
+ "RTTIDR" => 1,
115
+ "RTTIKK" => 2,
116
+ "PKEPRTIDW" => 2,
117
+ "PKEPRTIDKP" => 3,
118
+ "PKEPRAALKPEERPTIDKW" => 5,
119
+ }
120
+ ]
121
+
122
+ sample_enzyme_ar = [@full_KRP, @just_KR]
123
+ sample_enzyme_ar.zip(exp) do |sample_enzyme, hash|
124
+ hash.each do |aaseq, val|
125
+ sample_enzyme.num_missed_cleavages(aaseq).should == val
126
+ end
127
+ end
128
+ end
129
+ end
130
+
131
+ #xdescribe 'read in from an xml node' do
132
+ # # placeholder until written
133
+ #end
134
+
135
+ ### DOES this kind of functionality belong in this kind of container????
136
+ ### SHOULD it be with ms-enzyme or ms-in_silico ???????
137
+
138
+ =begin
139
+ require 'set'
140
+
141
+ describe 'MS::Ident::Pepxml::SampleEnzyme digesting sequences' do
142
+ it 'can digest with no missed cleavages' do
143
+ st = "CRGATKKTAGRPMEK"
144
+ SampleEnzyme.tryptic(st).should == %w(CR GATK K TAGRPMEK)
145
+ st = "CATRP"
146
+ SampleEnzyme.tryptic(st).should == %w(CATRP)
147
+ st = "RCATRP"
148
+ SampleEnzyme.tryptic(st).should == %w(R CATRP)
149
+ st = ""
150
+ SampleEnzyme.tryptic(st).should == []
151
+ st = "R"
152
+ SampleEnzyme.tryptic(st).should == %w(R)
153
+ end
154
+
155
+ it 'can digest with missed cleavages' do
156
+ st = "CRGATKKTAGRPMEKLLLERTKY"
157
+ zero = %w(CR GATK K TAGRPMEK LLLER TK Y)
158
+ SampleEnzyme.tryptic(st,0).to_set.should == zero.to_set
159
+ one = %w(CRGATK GATKK KTAGRPMEK TAGRPMEKLLLER LLLERTK TKY)
160
+ SampleEnzyme.tryptic(st,1).to_set.should == (zero+one).to_set
161
+ two = %w(CRGATKK GATKKTAGRPMEK KTAGRPMEKLLLER TAGRPMEKLLLERTK LLLERTKY)
162
+ all = zero + one + two
163
+ SampleEnzyme.tryptic(st,2).to_set.should == all.to_set
164
+ end
165
+
166
+ it 'contains duplicates IF there are duplicate tryptic sequences' do
167
+ st = "AAAAKCCCCKDDDDKCCCCK"
168
+ peps = SampleEnzyme.new('trypsin').digest(st, 2)
169
+ peps.select {|aaseq| aaseq == 'CCCCK'}.size.should == 2
170
+ end
171
+
172
+ end
173
+
174
+ describe SampleEnzyme, 'making enzyme calculations on sequences and aaseqs' do
175
+
176
+
177
+ end
178
+ =end
179
+
180
+
181
+
@@ -0,0 +1,37 @@
1
+ require 'spec_helper'
2
+
3
+ require 'ms/ident/pepxml/search_hit/modification_info'
4
+
5
+ describe 'MS::Ident::Pepxml::SearchHit::ModificationInfo' do
6
+
7
+ before do
8
+ modaaobjs = [[3, 150.3], [6, 345.2]].map do |ar|
9
+ MS::Ident::Pepxml::SearchHit::ModificationInfo::ModAminoacidMass.new(*ar)
10
+ end
11
+ hash = {
12
+ :mod_nterm_mass => 520.2,
13
+ :modified_peptide => "MOD*IFI^E&D",
14
+ :mod_aminoacid_masses => modaaobjs,
15
+ }
16
+ #answ = "<modification_info mod_nterm_mass=\"520.2\" modified_peptide=\"MOD*IFI^E&amp;D\">\n\t<mod_aminoacid_mass position=\"3\" mass=\"150.3\"/>\n\t<mod_aminoacid_mass position=\"6\" mass=\"345.2\"/>\n</modification_info>\n"
17
+ @obj = MS::Ident::Pepxml::SearchHit::ModificationInfo.new(hash)
18
+ end
19
+
20
+ it 'can produce valid pepxml xml' do
21
+ to_match = ['<modification_info',
22
+ ' mod_nterm_mass="520.2"',
23
+ " modified_peptide=\"MOD*IFI^E&amp;D\"",
24
+ "<mod_aminoacid_mass",
25
+ " position=\"3\"",
26
+ " mass=\"150.3\"",
27
+ " position=\"6\"",
28
+ " mass=\"345.2\"",
29
+ "</modification_info>"]
30
+ string = @obj.to_xml
31
+ to_match.each do |re|
32
+ string.should match(Regexp.new(Regexp.escape(re)))
33
+ end
34
+ end
35
+ end
36
+
37
+
@@ -0,0 +1,442 @@
1
+ require 'spec_helper'
2
+
3
+ require 'ms/mass'
4
+ require 'ms/mass/aa'
5
+ require 'ms/ident/pepxml'
6
+ require 'ms/ident/pepxml/modifications'
7
+ require 'ms/ident/pepxml/spectrum_query'
8
+ require 'ms/ident/pepxml/search_result'
9
+ require 'ms/ident/pepxml/search_hit'
10
+ require 'ms/ident/pepxml/search_hit/modification_info'
11
+
12
+ describe "creating an MS::Ident::Pepxml" do
13
+ include MS::Ident
14
+
15
+ it "can be creating in a nested fashion reflecting internal structure" do
16
+ tags_that_should_be_present = %w(msms_pipeline_analysis msms_run_summary sample_enzyme search_summary spectrum_query search_result search_hit modification_info mod_aminoacid_mass search_score)
17
+
18
+ pepxml = Pepxml.new do |msms_pipeline_analysis|
19
+ msms_pipeline_analysis.merge!(:summary_xml => "020.xml") do |msms_run_summary|
20
+ # prep the sample enzyme and search_summary
21
+ msms_run_summary.merge!(
22
+ :base_name => '/home/jtprince/dev/mspire/020',
23
+ :ms_manufacturer => 'Thermo',
24
+ :ms_model => 'LTQ Orbitrap',
25
+ :ms_ionization => 'ESI',
26
+ :ms_mass_analyzer => 'Ion Trap',
27
+ :ms_detector => 'UNKNOWN'
28
+ ) do |sample_enzyme, search_summary, spectrum_queries|
29
+ sample_enzyme.merge!(:name=>'Trypsin',:cut=>'KR',:no_cut=>'P',:sense=>'C')
30
+ search_summary.merge!(
31
+ :base_name=>'/path/to/file/020',
32
+ :search_engine => 'SEQUEST',
33
+ :precursor_mass_type =>'monoisotopic',
34
+ :fragment_mass_type => 'average'
35
+ ) do |search_database, enzymatic_search_constraint, modifications, parameters|
36
+ search_database.merge!(:local_path => '/path/to/db.fasta', :seq_type => 'AA') # note seq_type == type
37
+ enzymatic_search_constraint.merge!(
38
+ :enzyme => 'Trypsin',
39
+ :max_num_internal_cleavages => 2,
40
+ :min_number_termini => 2
41
+ )
42
+ modifications << Pepxml::AminoacidModification.new(
43
+ :aminoacid => 'M', :massdiff => 15.9994, :mass => MS::Mass::AA::MONO['M']+15.9994,
44
+ :variable => 'Y', :symbol => '*')
45
+ # invented, for example, a protein terminating mod
46
+ modifications << Pepxml::TerminalModification.new(
47
+ :terminus => 'c', :massdiff => 23.3333, :mass => MS::Mass::MONO['oh'] + 23.3333,
48
+ :variable => 'Y', :symbol => '[', :protein_terminus => 'c',
49
+ :description => 'leave protein_terminus off if not protein mod'
50
+ )
51
+ modifications << Pepxml::TerminalModification.new(
52
+ :terminus => 'c', :massdiff => 25.42322, :mass => MS::Mass::MONO['h+'] + 25.42322,
53
+ :variable => 'N', :symbol => ']', :description => 'example: c term mod'
54
+ )
55
+ parameters.merge!(
56
+ :fragment_ion_tolerance => 1.0000,
57
+ :digest_mass_range => '600.0 3500.0',
58
+ :enzyme_info => 'Trypsin(KR/P) 1 1 KR P', # etc....
59
+ )
60
+ end
61
+ spectrum_query1 = Pepxml::SpectrumQuery.new(
62
+ :spectrum => '020.3.3.1', :start_scan => 3, :end_scan => 3,
63
+ :precursor_neutral_mass => 1120.93743421875, :assumed_charge => 1
64
+ ) do |search_results|
65
+ search_result1 = Pepxml::SearchResult.new do |search_hits|
66
+ modpositions = [[1, 243.1559], [6, 167.0581], [7,181.085]].map do |pair|
67
+ Pepxml::SearchHit::ModificationInfo::ModAminoacidMass.new(*pair)
68
+ end
69
+ # order(modified_peptide, mod_aminoacid_masses, :mod_nterm_mass, :mod_cterm_mass)
70
+ # or can be set by hash
71
+ mod_info = Pepxml::SearchHit::ModificationInfo.new('Y#RLGGS#T#K', modpositions)
72
+ search_hit1 = Pepxml::SearchHit.new(
73
+ :hit_rank=>1, :peptide=>'YRLGGSTK', :peptide_prev_aa => "R", :peptide_next_aa => "K",
74
+ :protein => "gi|16130113|ref|NP_416680.1|", :num_tot_proteins => 1, :num_matched_ions => 5,
75
+ :tot_num_ions => 35, :calc_neutral_pep_mass => 1120.93163442, :massdiff => 0.00579979875010395,
76
+ :num_tol_term => 2, :num_missed_cleavages => 1, :is_rejected => 0,
77
+ :modification_info => mod_info) do |search_scores|
78
+ search_scores.merge!(:xcorr => 0.12346, :deltacn => 0.7959, :deltacnstar => 0,
79
+ :spscore => 29.85, :sprank => 1)
80
+ end
81
+ search_hits << search_hit1
82
+ end
83
+ search_results << search_result1
84
+ end
85
+ spectrum_queries << spectrum_query1
86
+ end
87
+ end
88
+ end
89
+ xml = pepxml.to_xml
90
+ tags_that_should_be_present.each do |tag|
91
+ xml.should match(/<#{tag} ?/)
92
+ end
93
+ xml.should match( /<\?xml version="1.0" encoding="UTF-8"\?>/ )
94
+ xml.should match( %r{<\?xml-stylesheet type="text/xsl" href="/tools/bin/TPP/tpp/schema/pepXML_std.xsl"\?>} )
95
+ end
96
+ end
97
+
98
+ =begin
99
+ # splits string on ' 'and matches the line found by find_line_regexp in
100
+ # lines
101
+ def match_modline_pieces(lines, find_line_regexp, string)
102
+ pieces = string.split(' ').map {|v| /#{Regexp.escape(v)}/ }
103
+ lines.each do |line|
104
+ if line =~ find_line_regexp
105
+ pieces.each do |piece|
106
+ line.should =~ piece
107
+ end
108
+ end
109
+ end
110
+ end
111
+
112
+
113
+ it 'gets modifications right in real run' do
114
+ @out_files.each do |fn|
115
+ fn.exist_as_a_file?.should be_true
116
+ beginning = IO.read(fn)
117
+ lines = beginning.split("\n")
118
+ [
119
+ [/aminoacid="M"/, '<aminoacid_modification symbol="*" massdiff="+15.9994" aminoacid="M" variable="Y" binary="N" mass="147.192"'],
120
+
121
+ [/aminoacid="S"/, '<aminoacid_modification symbol="#" massdiff="+79.9799" aminoacid="S" variable="Y" binary="N" mass="167.0581"'],
122
+ [/aminoacid="T"/, '<aminoacid_modification symbol="#" massdiff="+79.9799" aminoacid="T" variable="Y" binary="N" mass="181.085"'],
123
+ [/aminoacid="Y"/, '<aminoacid_modification symbol="#" massdiff="+79.9799" aminoacid="Y" variable="Y" binary="N" mass="243.1559"'],
124
+ [/parameter name="diff_search_options"/, '<parameter name="diff_search_options" value="15.999400 M 79.979900 STY 0.000000 M 0.000000 X 0.000000 T 0.000000 Y"/>'],
125
+ ].each do |a,b|
126
+ match_modline_pieces(lines, a, b)
127
+ end
128
+ [
129
+ '<modification_info modified_peptide="Y#RLGGS#T#K">',
130
+ '<mod_aminoacid_mass position="1" mass="243.1559"/>',
131
+ '<mod_aminoacid_mass position="7" mass="167.0581"/>',
132
+ '</modification_info>',
133
+ '<mod_aminoacid_mass position="9" mass="181.085"/>'
134
+ ].each do |line|
135
+ beginning.should =~ /#{Regexp.escape(line)}/ # "a modification info for a peptide")
136
+ end
137
+ end
138
+ end
139
+ end
140
+ end
141
+
142
+
143
+
144
+ =begin
145
+ describe "MS::Ident::Pepxml created from small bioworks.xml" do
146
+
147
+ spec_large do
148
+ before(:all) do
149
+ tf_mzxml_path = Tfiles_l + "/yeast_gly_mzXML"
150
+
151
+ tf_params = Tfiles + "/bioworks32.params"
152
+ tf_bioworks_xml = Tfiles + "/bioworks_small.xml"
153
+ out_path = Tfiles
154
+ @pepxml_objs = Sequest::Pepxml.set_from_bioworks(tf_bioworks_xml, :params => tf_params, :ms_data => tf_mzxml_path, :out_path => out_path)
155
+ end
156
+
157
+ it 'gets some spectrum queries' do
158
+ @pepxml_objs.each do |obj|
159
+ (obj.spectrum_queries.size > 2).should be_true
160
+ (obj.spectrum_queries.first.search_results.first.search_hits.size > 0).should be_true
161
+ end
162
+ #@pepxml_objs.each do |pep| puts pep.to_pepxml end
163
+ end
164
+ end
165
+ end
166
+
167
+
168
+
169
+ describe Sequest::Pepxml, " created from large bioworks.xml" do
170
+ # assert_equal_by_pairs (really any old array)
171
+ def assert_equal_pairs(obj, arrs)
172
+ arrs.each do |arr|
173
+ #if obj.send(arr[1]) != arr[0]
174
+ # puts "HELLO"
175
+ # puts "OBJ answer"
176
+ # p obj.send(arr[1])
177
+ # puts "ar0"
178
+ # p arr[0]
179
+ # puts "ar1"
180
+ # p arr[1]
181
+ #end
182
+ if arr[0].is_a? Float
183
+ obj.send(arr[1]).should be_close(arr[0], 0.0000000001)
184
+ else
185
+ obj.send(arr[1]).should == arr[0]
186
+ end
187
+ end
188
+ end
189
+
190
+ #swap the first to guys first
191
+ def assert_equal_pairs_swapped(obj, arrs)
192
+ arrs.each do |arr|
193
+ arr[0], arr[1] = arr[1], arr[0]
194
+ end
195
+ assert_equal_pairs(obj, arrs)
196
+ end
197
+
198
+ spec_large do
199
+ before(:all) do
200
+ st = Time.new
201
+ params = Tfiles + "/opd1/sequest.3.2.params"
202
+ bioworks_xml = Tfiles_l + "/opd1/bioworks.000.oldparams.xml"
203
+ mzxml_path = Tfiles_l + "/opd1"
204
+ out_path = Tfiles
205
+ @pepxml_version = 18
206
+ @pepxml_objs = Sequest::Pepxml.set_from_bioworks_xml(bioworks_xml, params, {:ms_data => mzxml_path, :out_path => out_path, :pepxml_version => @pepxml_version})
207
+ puts "- takes #{Time.new - st} secs"
208
+ end
209
+
210
+ it 'extracts MSMSPipelineAnalysis' do
211
+ ######## HMMMMM...
212
+ Sequest::Pepxml.pepxml_version.should == @pepxml_version
213
+
214
+ # MSMSPipelineAnalysis
215
+ po = @pepxml_objs.first
216
+ msms_pipeline = po.msms_pipeline_analysis
217
+ msms_pipeline.xmlns.should == 'http://regis-web.systemsbiology.net/pepXML'
218
+ msms_pipeline.xmlns_xsi.should == 'http://www.w3.org/2001/XMLSchema-instance'
219
+ msms_pipeline.xsi_schema_location.should == 'http://regis-web.systemsbiology.net/pepXML /tools/bin/TPP/tpp/schema/pepXML_v18.xsd'
220
+ msms_pipeline.summary_xml.should == '000.xml'
221
+ end
222
+
223
+ it 'extracts MSmSRunSummary' do
224
+ # MSMSRunSummary
225
+ rs = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary
226
+ rs.base_name.should =~ /\/000/
227
+ assert_equal_pairs(rs, [ ['ThermoFinnigan', :ms_manufacturer], ['LCQ Deca XP Plus', :ms_model], ['ESI', :ms_ionization], ['Ion Trap', :ms_mass_analyzer], ['UNKNOWN', :ms_detector], ['raw', :raw_data_type], ['.mzXML', :raw_data], ])
228
+ end
229
+
230
+ it 'extracts SampleEnzyme' do
231
+ # SampleEnzyme
232
+ se = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary.sample_enzyme
233
+ assert_equal_pairs(se, [ ['Trypsin', :name], ['KR', :cut], [nil, :no_cut], ['C', :sense], ])
234
+ end
235
+
236
+ it 'extracts SearchSummary' do
237
+ # SearchSummary
238
+ ss = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary.search_summary
239
+ ss.is_a?(Sequest::Pepxml::SearchSummary).should be_true
240
+ ss.base_name.should =~ /\/000/
241
+ ss.peptide_mass_tol.should =~ /1\.500/
242
+ assert_equal_pairs_swapped(ss, [ # normal attributes
243
+ [:search_engine, "SEQUEST"], [:precursor_mass_type, "average"], [:fragment_mass_type, "average"], [:out_data_type, "out"], [:out_data, ".tgz"], [:search_id, "1"],
244
+
245
+ # enzymatic_search_constraint
246
+ [:enzyme, 'Trypsin'], [:max_num_internal_cleavages, '2'], [:min_number_termini, '2'],
247
+
248
+ # parameters
249
+ [:fragment_ion_tol, "1.0000"], [:ion_series, "0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0"], [:max_num_differential_AA_per_mod, "3"], [:nucleotide_reading_frame, "0"], [:num_output_lines, "10"], [:remove_precursor_peak, "0"], [:ion_cutoff_percentage, "0.0000"], [:match_peak_count, "0"], [:match_peak_allowed_error, "1"], [:match_peak_tolerance, "1.0000"], [:protein_mass_filter, "0 0"],
250
+ ])
251
+
252
+ end
253
+ it 'extracts SearchDatabase' do
254
+ # SearchDatabase
255
+ sd = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary.search_summary.search_database
256
+ sd.is_a?(Sequest::Pepxml::SearchDatabase).should be_true
257
+ assert_equal_pairs_swapped(sd, [ [:local_path, "C:\\Xcalibur\\database\\ecoli_K12.fasta"], [:seq_type, 'AA'], ])
258
+ end
259
+
260
+ it 'returns SpectrumQueries' do
261
+ # SpectrumQueries
262
+ sq = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary.spectrum_queries
263
+ spec = sq.first
264
+ assert_equal_pairs_swapped(spec, [
265
+ [:spectrum, "000.100.100.1"], [:start_scan, "100"], [:end_scan, "100"],
266
+ #[:precursor_neutral_mass, "1074.5920"], # out2summary
267
+ [:precursor_neutral_mass, 1074.666926], # mine
268
+ [:assumed_charge, 1], [:index, "1"],
269
+ ])
270
+ sh = spec.search_results.first.search_hits.first
271
+ assert_equal_pairs_swapped(sh, [
272
+ # normal attributes
273
+ [:hit_rank, 1],
274
+ [:peptide, "SIYFRNFK"],
275
+ [:peptide_prev_aa, "R"],
276
+ [:peptide_next_aa, "G"],
277
+ [:protein, "gi|16130084|ref|NP_416651.1|"],
278
+ [:num_tot_proteins, 1],
279
+ [:num_matched_ions, 4],
280
+ [:tot_num_ions, 14],
281
+ #[:calc_neutral_pep_mass, "1074.1920"], # out2summary
282
+ [:calc_neutral_pep_mass, 1074.23261], # mine
283
+ #[:massdiff, "+0.400000"], # out2summary
284
+ [:massdiff, 0.434316000000081], # mine
285
+ [:num_tol_term, 2], [:num_missed_cleavages, 1], [:is_rejected, 0],
286
+
287
+ # search_score
288
+ [:xcorr, 0.4], [:deltacn, 0.023], [:deltacnstar, "0"], [:spscore, 78.8], [:sprank, 1],
289
+ ])
290
+
291
+ spec = sq[1]
292
+ assert_equal_pairs_swapped(spec, [
293
+ [:spectrum, "000.1000.1000.1"], [:start_scan, "1000"], [:end_scan, "1000"], #[:precursor_neutral_mass, "663.1920"], # out2summary
294
+ [:precursor_neutral_mass, 663.206111], # mine
295
+ [:assumed_charge, 1], [:index, "2"],
296
+ ])
297
+
298
+ sh = spec.search_results.first.search_hits.first
299
+ assert_equal_pairs_swapped(sh, [
300
+ # normal attributes
301
+ [:hit_rank, 1], [:peptide, "ALADFK"], [:peptide_prev_aa, "R"], [:peptide_next_aa, "S"], [:protein, "gi|16128765|ref|NP_415318.1|"], [:num_tot_proteins, 1], [:num_matched_ions, 5], [:tot_num_ions, 10],
302
+ [:num_tol_term, 2], [:num_missed_cleavages, 0], [:is_rejected, 0],
303
+ #[:massdiff, "-0.600000"], # out2summary
304
+ [:massdiff, -0.556499000000031], # mine
305
+ #[:calc_neutral_pep_mass, 663.7920], # out2summary
306
+ [:calc_neutral_pep_mass, 663.76261], # mine
307
+
308
+ # search_score
309
+ [:xcorr, 0.965], [:deltacn, 0.132], [:deltacnstar, "0"], [:spscore, 81.1], [:sprank, 1],
310
+ ])
311
+
312
+ spec = sq[9]
313
+ assert_equal_pairs_swapped(spec, [
314
+ [:spectrum, "000.1008.1008.2"], [:start_scan, "1008"], [:end_scan, "1008"], [:assumed_charge, 2],
315
+ #[:precursor_neutral_mass, "691.0920"], # out2summary
316
+ [:precursor_neutral_mass, 691.150992], # mine
317
+ ])
318
+
319
+ sh = spec.search_results.first.search_hits.first
320
+ assert_equal_pairs_swapped(sh, [
321
+ # normal attributes
322
+ [:hit_rank, 1], [:peptide, "RLFTR"], [:peptide_prev_aa, "R"], [:peptide_next_aa, "A"], [:protein, "gi|16130457|ref|NP_417027.1|"], [:num_tot_proteins, 1], [:num_matched_ions, 5], [:tot_num_ions, 8], [:num_tol_term, 2],
323
+
324
+ #[:num_missed_cleavages, "0"], # out2summary misses this!
325
+ [:num_missed_cleavages, 1],
326
+ [:is_rejected, 0],
327
+ #[:calc_neutral_pep_mass, "691.7920"], # out2summary
328
+ [:calc_neutral_pep_mass, 691.82261], # mine
329
+ #[:massdiff, "-0.700000"], # out2summary
330
+ [:massdiff, -0.67161800000008], # mine
331
+
332
+ # search_score
333
+ [:xcorr, 0.903], [:deltacn, 0.333], [:deltacnstar, "0"], [:spscore, 172.8], [:sprank, 1],
334
+ ])
335
+ end
336
+
337
+ it 'can generate correct pepxml file' do
338
+
339
+ ## IF OUR OBJECT IS CORRECT, THEN WE GET THE OUTPUT:
340
+ string = @pepxml_objs.first.to_pepxml
341
+ ans_lines = IO.read(Tfiles + "/opd1/000.my_answer.100lines.xml").split("\n")
342
+ base_name_re = /base_name=".*?files\//o
343
+ date_re = /date=".*?"/
344
+ string.split("\n").each_with_index do |line,i|
345
+ if i > 99 ; break end
346
+ ans, exp =
347
+ if i == 1
348
+ [line.sub(date_re,''), ans_lines[i].sub(date_re,'')]
349
+ elsif i == 2
350
+ [line.sub(base_name_re,''), ans_lines[i].sub(base_name_re, '').sub(/^\s+/, "\t")]
351
+ elsif i == 6
352
+ [line.sub(base_name_re,''), ans_lines[i].sub(base_name_re, '').sub(/^\s+/, "\t\t")]
353
+ else
354
+ [line, ans_lines[i]]
355
+ end
356
+
357
+ #ans.split('').zip(exp.split('')) do |l,a|
358
+ # if l != a
359
+ # puts line
360
+ # puts ans_lines[i]
361
+ # puts l
362
+ # puts a
363
+ # end
364
+ #end
365
+ if ans != exp
366
+ puts ans
367
+ puts exp
368
+ end
369
+ ans.should == exp
370
+ #line.sub(base_name_re,'').should == ans_lines[i].sub(base_name_re,'')
371
+ end
372
+ end
373
+ end
374
+ end
375
+
376
+
377
+
378
+ describe Sequest::Pepxml::Modifications do
379
+ before(:each) do
380
+ tf_params = Tfiles + "/bioworks32.params"
381
+ @params = Sequest::Params.new(tf_params)
382
+ # The params object here is completely unnecessary for this test, except
383
+ # that it sets up the mass table
384
+ @obj = Sequest::Pepxml::Modifications.new(@params, "(M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000) ")
385
+ end
386
+ it 'creates a mod_symbols_hash' do
387
+ answ = {[:C, 12.0]=>"^", [:S, 80.0]=>"@", [:M, 29.0]=>"#", [:M, 15.9]=>"*", [:ct, 12.33]=>"[", [:nt, 14.2]=>"]"}
388
+ @obj.mod_symbols_hash.should == answ
389
+ ## need more here
390
+ end
391
+
392
+ it 'creates a ModificationInfo object given a special peptide sequence' do
393
+ mod_string = "(M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000) "
394
+ @params.diff_search_options = "15.90000 M 29.00000 M 80.00000 S 12.00000 C"
395
+ @params.term_diff_search_options = "14.20000 12.33000"
396
+ mod = Sequest::Pepxml::Modifications.new(@params, mod_string)
397
+ ## no mods
398
+ peptide = "PEPTIDE"
399
+ mod.modification_info(peptide).should be_nil
400
+ peptide = "]M*EC^S@IDM#M*EMSCM["
401
+ modinfo = mod.modification_info(peptide)
402
+ modinfo.modified_peptide.should == peptide
403
+ modinfo.mod_nterm_mass.should be_close(146.40054, 0.000001)
404
+ modinfo.mod_cterm_mass.should be_close(160.52994, 0.000001)
405
+ end
406
+
407
+ end
408
+
409
+ describe Sequest::Pepxml::SearchHit::ModificationInfo do
410
+
411
+ before(:each) do
412
+ modaaobjs = [[3, 150.3], [6, 345.2]].map do |ar|
413
+ Sequest::Pepxml::SearchHit::ModificationInfo::ModAminoacidMass.new(ar)
414
+ end
415
+ hash = {
416
+ :mod_nterm_mass => 520.2,
417
+ :modified_peptide => "MOD*IFI^E&D",
418
+ :mod_aminoacid_masses => modaaobjs,
419
+ }
420
+ #answ = "<modification_info mod_nterm_mass=\"520.2\" modified_peptide=\"MOD*IFI^E&amp;D\">\n\t<mod_aminoacid_mass position=\"3\" mass=\"150.3\"/>\n\t<mod_aminoacid_mass position=\"6\" mass=\"345.2\"/>\n</modification_info>\n"
421
+ @obj = Sequest::Pepxml::SearchHit::ModificationInfo.new(hash)
422
+ end
423
+
424
+ def _re(st)
425
+ /#{Regexp.escape(st)}/
426
+ end
427
+
428
+ it 'can produce pepxml' do
429
+ answ = @obj.to_pepxml
430
+ answ.should =~ _re('<modification_info')
431
+ answ.should =~ _re(" mod_nterm_mass=\"520.2\"")
432
+ answ.should =~ _re(" modified_peptide=\"MOD*IFI^E&amp;D\"")
433
+ answ.should =~ _re("<mod_aminoacid_mass")
434
+ answ.should =~ _re(" position=\"3\"")
435
+ answ.should =~ _re(" mass=\"150.3\"")
436
+ answ.should =~ _re(" position=\"6\"")
437
+ answ.should =~ _re(" mass=\"345.2\"")
438
+ answ.should =~ _re("</modification_info>")
439
+ end
440
+ end
441
+
442
+ =end