mspire 0.3.9 → 0.4.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (87) hide show
  1. data/INSTALL +24 -7
  2. data/README +15 -13
  3. data/README.rdoc +18 -0
  4. data/Rakefile +50 -14
  5. data/bin/aafreqs.rb +0 -0
  6. data/bin/bioworks2excel.rb +0 -0
  7. data/bin/bioworks_to_pepxml.rb +2 -1
  8. data/bin/bioworks_to_pepxml_gui.rb +0 -0
  9. data/bin/fasta_shaker.rb +0 -0
  10. data/bin/filter_and_validate.rb +0 -0
  11. data/bin/gi2annot.rb +0 -0
  12. data/bin/id_class_anal.rb +0 -0
  13. data/bin/id_precision.rb +0 -0
  14. data/bin/ms_to_lmat.rb +0 -0
  15. data/bin/pepproph_filter.rb +0 -0
  16. data/bin/protein_summary.rb +0 -0
  17. data/bin/protxml2prots_peps.rb +0 -0
  18. data/bin/raw_to_mzXML.rb +3 -3
  19. data/bin/run_percolator.rb +122 -0
  20. data/bin/sqt_group.rb +0 -0
  21. data/bin/srf_group.rb +0 -0
  22. data/changelog.txt +29 -0
  23. data/lib/ms/gradient_program.rb +0 -1
  24. data/lib/ms/msrun.rb +62 -29
  25. data/lib/ms/parser/mzdata/axml.rb +55 -0
  26. data/lib/ms/parser/mzdata/dom.rb +51 -36
  27. data/lib/ms/parser/mzdata.rb +8 -2
  28. data/lib/ms/parser/mzxml/axml.rb +59 -0
  29. data/lib/ms/parser/mzxml/dom.rb +80 -57
  30. data/lib/ms/parser/mzxml/hpricot.rb +1 -1
  31. data/lib/ms/parser/mzxml/libxml.rb +6 -2
  32. data/lib/ms/parser/mzxml.rb +110 -3
  33. data/lib/ms/parser.rb +4 -4
  34. data/lib/ms/precursor.rb +19 -4
  35. data/lib/ms/scan.rb +7 -7
  36. data/lib/ms/spectrum.rb +249 -58
  37. data/lib/mspire.rb +1 -1
  38. data/lib/spec_id/bioworks.rb +2 -2
  39. data/lib/spec_id/precision/filter/cmdline.rb +8 -1
  40. data/lib/spec_id/precision/prob/cmdline.rb +2 -2
  41. data/lib/spec_id/precision/prob.rb +1 -0
  42. data/lib/spec_id/proph/pep_summary.rb +3 -4
  43. data/lib/spec_id/proph/prot_summary.rb +3 -3
  44. data/lib/spec_id/protein_summary.rb +1 -1
  45. data/lib/spec_id/sequest/pepxml.rb +5 -5
  46. data/lib/spec_id/sqt.rb +4 -4
  47. data/lib/spec_id/srf.rb +49 -8
  48. data/lib/spec_id.rb +5 -0
  49. data/lib/xml_style_parser.rb +16 -2
  50. data/script/compile_and_plot_smriti_final.rb +0 -0
  51. data/script/create_little_pepxml.rb +0 -0
  52. data/script/degenerate_peptides.rb +0 -0
  53. data/script/estimate_fpr_by_cysteine.rb +0 -0
  54. data/script/extract_gradient_programs.rb +1 -1
  55. data/script/find_cysteine_background.rb +0 -0
  56. data/script/genuine_tps_and_probs.rb +0 -0
  57. data/script/get_apex_values_rexml.rb +0 -0
  58. data/script/mascot_fix_pepxml.rb +123 -0
  59. data/script/msvis.rb +0 -0
  60. data/script/mzXML2timeIndex.rb +0 -0
  61. data/script/peps_per_bin.rb +0 -0
  62. data/script/prep_dir.rb +0 -0
  63. data/script/simple_protein_digestion.rb +0 -0
  64. data/script/smriti_final_analysis.rb +0 -0
  65. data/script/sqt_to_meta.rb +0 -0
  66. data/script/top_hit_per_scan.rb +0 -0
  67. data/script/toppred_to_yaml.rb +0 -0
  68. data/script/tpp_installer.rb +0 -0
  69. data/specs/bin/prob_validate_spec.rb +5 -2
  70. data/specs/bin/protein_summary_spec.rb +5 -1
  71. data/specs/ms/msrun_spec.rb +176 -133
  72. data/specs/ms/parser_spec.rb +3 -3
  73. data/specs/ms/spectrum_spec.rb +0 -2
  74. data/specs/spec_id/precision/filter_spec.rb +4 -1
  75. data/specs/spec_id/precision/prob_spec.rb +2 -2
  76. data/specs/spec_id/sequest/pepxml_spec.rb +1 -1
  77. data/specs/spec_id/sqt_spec.rb +5 -5
  78. data/specs/spec_id/srf_spec.rb +56 -93
  79. data/specs/spec_id/srf_spec_helper.rb +121 -284
  80. data/specs/spec_id_spec.rb +3 -0
  81. data/specs/transmem/toppred_spec.rb +1 -0
  82. data/test_files/opd1_2runs_2mods/data/020.mzData.xml +683 -0
  83. data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +382 -0
  84. data/test_files/opd1_2runs_2mods/data/040.mzData.xml +683 -0
  85. data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +382 -0
  86. data/test_files/opd1_2runs_2mods/data/README.txt +6 -0
  87. metadata +247 -229
@@ -3,6 +3,8 @@ require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
3
3
  require File.expand_path( File.dirname(__FILE__) + '/srf_spec_helper' )
4
4
  require 'spec_id/srf'
5
5
 
6
+ require 'fileutils'
7
+
6
8
  include SRFHelper
7
9
 
8
10
  #tfiles = File.dirname(__FILE__) + '/tfiles/'
@@ -60,11 +62,23 @@ describe 'an srf reader', :shared => true do
60
62
  @dta_files_last.object_match(@srf_obj.dta_files.last).should be_true
61
63
  end
62
64
 
65
+ # given an array of out_file objects, returns the first set of hits
66
+ def get_first_peps(out_files)
67
+ out_files.each do |outf|
68
+ if outf.num_hits > 0
69
+ return outf.hits
70
+ end
71
+ end
72
+ return nil
73
+ end
74
+
63
75
  it 'retrieves correct out files' do
64
76
  @out_files_first.object_match(@srf_obj.out_files.first).should be_true
65
77
  @out_files_last.object_match(@srf_obj.out_files.last).should be_true
66
- @out_files_first_last_pep.object_match(@srf_obj.out_files.first.hits.last).should be_true
67
- @out_files_last_last_pep.object_match(@srf_obj.out_files.last.hits.last).should be_true
78
+ # first available peptide hit
79
+ @out_files_first_pep.object_match(get_first_peps(@srf_obj.out_files).first).should be_true
80
+ # last available peptide hit
81
+ @out_files_last_pep.object_match(get_first_peps(@srf_obj.out_files.reverse).last).should be_true
68
82
  end
69
83
 
70
84
  xit 'retrieves correct params' do
@@ -75,35 +89,30 @@ describe 'an srf reader', :shared => true do
75
89
  end
76
90
 
77
91
 
92
+ Expected_hash_keys = %w(header dta_gen dta_files_first dta_files_last out_files_first out_files_last out_files_first_pep out_files_last_pep params)
78
93
 
79
- describe klass, " reading a version 3.2 .srf file" do
80
- spec_large do
81
- before(:all) do
82
- @file = Tfiles_l + '/sash7/sequest/7MIX_STD_110802_1.srf'
83
- %w(header dta_gen dta_files_first dta_files_last out_files_first out_files_last out_files_first_last_pep out_files_last_last_pep params).each do |c|
84
- instance_variable_set("@#{c}", File_32[c.to_sym])
94
+ to_run = {
95
+ '3.2' => {:hash => File_32, :file => '/opd1_2runs_2mods/sequest32/020.srf'},
96
+ '3.3' => {:hash => File_33, :file => '/opd1_2runs_2mods/sequest33/020.srf'},
97
+ '3.3.1' => {:hash => File_331, :file => '/opd1_2runs_2mods/sequest331/020.srf'},
98
+ }
99
+
100
+ to_run.each do |version,info|
101
+ describe klass, " reading a version #{version} .srf file" do
102
+ spec_large do
103
+ before(:all) do
104
+ @file = Tfiles_l + info[:file]
105
+ Expected_hash_keys.each do |c|
106
+ instance_variable_set("@#{c}", info[:hash][c.to_sym])
107
+ end
85
108
  end
109
+ it_should_behave_like "an srf reader"
86
110
  end
87
- it_should_behave_like "an srf reader"
88
111
  end
89
112
  end
90
113
 
91
114
 
92
- describe klass, " reading a version 3.3 .srf file" do
93
- it_should 'reading a version 3.3 .srf file'
94
- end
95
-
96
-
97
- describe klass, " reading a version 3.5 (bioworks 3.3.1) .srf file" do
98
- spec_large do
99
- before(:all) do
100
- @file = Tfiles_l + '/sash7/sequest/bioworks331/7MIX_STD_110802_1.srf'
101
- %w(header dta_gen dta_files_first dta_files_last out_files_first out_files_last out_files_first_last_pep out_files_last_last_pep params).each do |c|
102
- instance_variable_set("@#{c}", File_35[c.to_sym])
103
- end
104
- end
105
- it_should_behave_like "an srf reader"
106
- end
115
+ describe klass, " reading a corrupted file" do
107
116
  it 'should read a null file from an aborted run w/o failing (but gives error msg)' do
108
117
  file = Tfiles + '/corrupted_900.srf'
109
118
  error_msg = Tfiles + '/error_msg.tmp'
@@ -123,78 +132,9 @@ describe klass, " reading a version 3.5 (bioworks 3.3.1) .srf file" do
123
132
  IO.read(error_msg).should =~ /corrupted_900\.srf/
124
133
  File.unlink error_msg
125
134
  end
126
-
127
- end
128
-
129
-
130
- describe klass, 'reading an srf file' do
131
-
132
- spec_large do
133
- before(:all) do
134
- start = Time.now
135
- tf_srf = Tfiles_l + "/sash7/sequest/older/7MIX_STD_110802_1.srf"
136
- @srf = klass.new(tf_srf)
137
- puts "- read in #{Time.now - start} seconds"
138
- end
139
-
140
- #def initialize(arg)
141
- # super(arg)
142
- # @tfiles = File.dirname(__FILE__) + '/tfiles/'
143
- # @tfiles_l = File.dirname(__FILE__) + '/tfiles_large/'
144
- # @srg_file = @tfiles + "tmp_bioworks.srg"
145
- # @srf = $srf
146
- # @group = $group
147
- #end
148
-
149
- it 'reads' do
150
- end
151
-
152
- it 'reads an srf file (w/o probs) and extracts all basic information' do
153
- ## Verify that we have everything and it is as we expect (not exhaustive)
154
- head = @srf.header
155
- dtgen = head.dta_gen
156
- ## HEADER
157
- hash_match(Header, head)
158
- hash_match(Dta_gen, dtgen)
159
- ## DTA_FILES
160
- hash_match(Dta_files_first, @srf.dta_files.first)
161
- hash_match(Dta_files_last, @srf.dta_files.last)
162
- ## OUT_FILES
163
- hash_match(Out_files_first, @srf.out_files.first)
164
- hit = @srf.out_files.first.hits.first
165
- hash_match(Out_files_first_hit, @srf.out_files.first.hits.first)
166
- hash_match(Out_files_last_first_hit, @srf.out_files.last.hits.first)
167
- hash_match(Out_files_last_last_hit, @srf.out_files.last.hits.last)
168
- ## SEQUEST_PARAMS
169
- hash_match(Sequest_params, @srf.params)
170
- ## INDEX
171
- @srf.index.last.should == [7161, 7161, 3]
172
- @srf.index.first.should == [2, 2, 1]
173
-
174
- @srf.dta_files.size.should == @srf.index.size
175
- @srf.dta_files.size.should == @srf.out_files.size
176
- end
177
- it_should 'give accurate peptides' do
178
- end
179
- end
180
-
181
- ## treats reference special
182
- def hash_match(hash, srf)
183
- hash.each do |k,v|
184
- if v.is_a? Float
185
- delta = v/100000
186
- srf.send(k.to_sym).should be_close(v, delta)
187
- elsif k == :reference
188
- srf.prots.first.reference.should == v[0,38]
189
- else
190
- srf.send(k.to_sym).should == v
191
- end
192
- end
193
- end
194
135
  end
195
136
 
196
137
  describe SRFGroup, 'creating an srg file' do
197
-
198
138
  it 'creates one given some non-existing, relative filenames' do
199
139
  ## TEST SRG GROUPING:
200
140
  filenames = %w(my/lucky/filename /another/filename)
@@ -205,5 +145,28 @@ describe SRFGroup, 'creating an srg file' do
205
145
  File.exist?(srg_file).should be_true
206
146
  File.unlink(srg_file)
207
147
  end
148
+ end
149
+
150
+
151
+ # @TODO: this test needs to be created for a small mock dataset!!
152
+ describe SRF, 'creating dta files' do
153
+ spec_large do
154
+ before(:all) do
155
+ file = Tfiles_l + '/opd1_2runs_2mods/sequest33/020.srf'
156
+ @srf = SRF.new(file)
157
+ end
158
+
159
+ it 'creates dta files' do
160
+ @srf.to_dta_files
161
+ File.exist?('020').should be_true
162
+ File.directory?('020').should be_true
163
+ File.exist?('020/020.3366.3366.2.dta').should be_true
164
+ lines = IO.readlines('020/020.3366.3366.2.dta', "\r\n")
165
+ lines.first.should == "1113.10649290125 2\r\n"
166
+ lines[1].should == "164.56591796875 4817.0\r\n"
167
+
168
+ FileUtils.rm_rf '020'
169
+ end
170
+ end
208
171
 
209
172
  end
@@ -3,300 +3,137 @@ module SRFHelper
3
3
  File_32 = {
4
4
  :header =>
5
5
  {
6
- :params_filename => "C:\\Xcalibur\\sequest\\john\\sashimi7\\sashimi7.params",
7
- :model => "LCQ Deca XP",
8
- :dta_log_filename => "C:\\Xcalibur\\sequest\\john\\sashimi7\\7MIX_STD_110802_1_dta.log",
9
- :ion_series => "ion series nABY ABCDVWXYZ: 0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0",
10
- :db_filename => "C:\\Xcalibur\\database\\mixed_db_human_ecoli_7prot_unique.fasta",
11
- :modifications => "",
12
- :enzyme => "Enzyme:Trypsin(KR/P) (2)",
13
- :sequest_log_filename => "C:\\Xcalibur\\sequest\\john\\sashimi7\\7MIX_STD_110802_1_sequest.log",
14
- :version => "3.2",
15
- :raw_filename => "C:\\Xcalibur\\data\\john\\sashimi7\\7MIX_STD_110802_1.RAW"
6
+ :params_filename=>"C:\\Xcalibur\\sequest\\john\\opd1_2runs_2mods\\ecoli.params",
7
+ :raw_filename=>"C:\\Xcalibur\\data\\john\\opd00001\\020.RAW",
8
+ :modifications=>"(M* +15.99940) (STY# +79.97990)",
9
+ :sequest_log_filename=>"C:\\Xcalibur\\sequest\\john\\opd1_2runs_2mods\\020_sequest.log",
10
+ :ion_series=>"ion series nABY ABCDVWXYZ: 0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0",
11
+ :db_filename=>"C:\\Xcalibur\\database\\ecoli_K12_ncbi_20060321.fasta",
12
+ :enzyme=>"Enzyme:Trypsin(KR/P) (2)",
13
+ :version=>"3.2",
14
+ :model=>"LCQ Deca XP",
15
+ :dta_log_filename=>"C:\\Xcalibur\\sequest\\john\\opd1_2runs_2mods\\020_dta.log"
16
16
  },
17
- :dta_gen => {
18
- :min_group_count => 1,
19
- :start_time => 1.39999997615814,
20
- :start_mass => 400.0,
21
- :end_scan => 7161,
22
- :group_scan => 1,
23
- :start_scan => 1,
24
- :num_dta_files => 6952,
25
- :min_ion_threshold => 15,
26
- :end_mass => 4500.0,
27
- },
28
- :dta_files_first => {
29
- :mh => 1221.88989257812,
30
- :dta_tic => 7703132.0,
31
- :num_peaks => 74,
32
- :charge => 1,
33
- :ms_level => 2,
34
- :total_num_possible_charge_states => 0,
35
- :peaks => 592,
36
- },
37
- :dta_files_last => {
38
- :mh => 2604.8360326775,
39
- :dta_tic => 31977.0,
40
- :num_peaks => 17,
41
- :charge => 3,
42
- :ms_level => 2,
43
- :total_num_possible_charge_states => 0,
44
- :peaks => 136,
45
- },
46
- :out_files_first => {
47
- :num_hits => 10,
48
- :computer => 'VELA',
49
- :date_time => '05/12/2006, 10:58 AM,',
50
- :hits => 10
51
- },
52
- :out_files_last => {
53
- :num_hits => 10,
54
- :computer => 'VELA',
55
- :date_time => '05/12/2006, 11:11 AM,',
56
- :hits => 10
57
- },
58
- :out_files_first_last_pep => {
59
- :aaseq => 'QFSLSKSSLPK',
60
- :sequence => 'K.QFSLSKSSLPK.S',
61
- :mh => 1222.4156904522,
62
- :deltacn => 1.1,
63
- :sp => 57.4083709716797,
64
- :xcorr => 0.802009999752045,
65
- :id => 19977,
66
- :rsp => 60,
67
- :ions_matched => 7,
68
- :ions_total => 20,
69
- :prots => 1,
70
- :deltamass => 0.525797874074897,
71
- :ppm => 430.315265940608,
72
- :base_name => '7MIX_STD_110802_1',
73
- :first_scan => 2,
74
- :last_scan => 2,
75
- :charge => 1
76
- },
77
- :out_files_last_last_pep =>
78
- {
79
- :aaseq => 'EAFLVNSDLTLRAQLTEFRDHK',
80
- :sequence => 'R.EAFLVNSDLTLRAQLTEFRDHK.L',
81
- :mh => 2604.9025174522,
82
- :deltacn => 1.1,
83
- :sp => 26.1511478424072,
84
- :xcorr => 0.634012818336487,
85
- :id => 8105,
86
- :rsp => 165,
87
- :ions_matched => 6,
88
- :ions_total => 84,
89
- :prots => 1,
90
- :deltamass => 0.0664847746993473,
91
- :ppm => 25.523592988311,
92
- :base_name => '7MIX_STD_110802_1',
93
- :first_scan => 7161,
94
- :last_scan => 7161,
95
- :charge => 3,
96
- },
17
+ :dta_gen => {
18
+ :min_group_count => 1,
19
+ :start_time => 1.5,
20
+ :start_mass => 300.0,
21
+ :end_scan => 3620,
22
+ :group_scan => 1,
23
+ :start_scan => 1,
24
+ :num_dta_files => 3747,
25
+ :min_ion_threshold => 15,
26
+ :end_mass => 4500.0,
27
+ },
28
+ :dta_files_first => {
97
29
 
98
- :params => {
99
- "add_O_Ornithine"=>"0.0000", "add_F_Phenylalanine"=>"0.0000", "add_A_Alanine"=>"0.0000", "add_C_Cysteine"=>"0.0000", "add_Y_Tyrosine"=>"0.0000", "add_X_LorI"=>"0.0000", "add_J_user_amino_acid"=>"0.0000", "add_Cterm_peptide"=>"0.0000", "add_S_Serine"=>"0.0000", "add_Nterm_protein"=>"0.0000", "add_D_Aspartic_Acid"=>"0.0000", "add_Q_Glutamine"=>"0.0000", "add_K_Lysine"=>"0.0000", "add_R_Arginine"=>"0.0000", "add_W_Tryptophan"=>"0.0000", "add_Nterm_peptide"=>"0.0000", "add_H_Histidine"=>"0.0000", "add_L_Leucine"=>"0.0000", "add_I_Isoleucine"=>"0.0000", "add_N_Asparagine"=>"0.0000", "add_B_avg_NandD"=>"0.0000", "add_Z_avg_QandE"=>"0.0000", "add_E_Glutamic_Acid"=>"0.0000", "add_G_Glycine"=>"0.0000", "add_P_Proline"=>"0.0000", "add_M_Methionine"=>"0.0000", "add_Cterm_protein"=>"0.0000", "add_V_Valine"=>"0.0000", "add_T_Threonine"=>"0.0000", "add_U_user_amino_acid"=>"0.0000", "match_peak_tolerance"=>"1.0000", "match_peak_allowed_error"=>"1", "normalize_xcorr"=>"0", "nucleotide_reading_frame"=>"0", "num_results"=>"250", "sequence_header_filter"=>"", "diff_search_options"=>"0.000000 S 0.000000 C 0.000000 M 0.000000 X 0.000000 T 0.000000 Y", "partial_sequence"=>"", "max_num_internal_cleavage_sites"=>"2", "search_engine"=>"SEQUEST", "print_duplicate_references"=>"40", "ion_series"=>"0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0", "remove_precursor_peak"=>"0", "num_output_lines"=>"10", "second_database_name"=>"", "first_database_name"=>"C:\\Xcalibur\\database\\mixed_db_human_ecoli_7prot_unique.fasta", "peptide_mass_tolerance"=>"1.4000", "digest_mass_range"=>"600.0 3500.0", "enzyme_info"=>"Trypsin(KR/P) 1 1 KR P", "show_fragment_ions"=>"0", "protein_mass_filter"=>"0 0", "term_diff_search_options"=>"0.000000 0.000000", "num_description_lines"=>"5", "fragment_ion_tolerance"=>"1.0000", "peptide_mass_units"=>"0", "mass_type_parent"=>"0", "match_peak_count"=>"0", "max_num_differential_per_peptide"=>"1", "ion_cutoff_percentage"=>"0.0000", "mass_type_fragment"=>"0"},
30
+ :mh=>390.92919921875,
31
+ :dta_tic=>9041311.0,
32
+ :num_peaks=>48,
33
+ :charge=>1,
34
+ :ms_level=>2,
35
+ :total_num_possible_charge_states=>0,
36
+ },
37
+ :dta_files_last => {
38
+ :dta_tic=>842424.0,
39
+ :mh=>357.041198730469,
40
+ :num_peaks=>78,
41
+ :ms_level=>2,
42
+ :charge=>1,
43
+ :total_num_possible_charge_states=>0,
44
+ },
45
+ :out_files_first => {
46
+ :num_hits => 0,
47
+ :computer => 'VELA',
48
+ :date_time => '05/06/2008, 02:08 PM,',
49
+ :hits => 0,
50
+ },
51
+ :out_files_last => {
52
+ :num_hits => 0,
53
+ :computer => 'VELA',
54
+ :date_time => '05/06/2008, 02:11 PM,',
55
+ :hits => 0,
56
+ },
57
+ :out_files_first_pep => {
58
+ :aaseq=>"YRLGGSTK",
59
+ :sequence=>"R.Y#RLGGS#T#K.K",
60
+ :mh=>1121.9390244522,
61
+ :deltacn_orig=>0.0,
62
+ :sp=>29.8529319763184,
63
+ :xcorr=>0.123464643955231,
64
+ :id=>2104,
65
+ :rsp=>1,
66
+ :ions_matched=>5,
67
+ :ions_total=>35,
68
+ :prots=>1,
69
+ :deltamass=>-0.00579976654989878,
70
+ :ppm=>5.16938660859491,
71
+ :base_name=>"020",
72
+ :first_scan=>3,
73
+ :last_scan=>3,
74
+ :charge=>1,
75
+ :deltacn=>0.795928299427032,
76
+ :base_name=>"020",
77
+ },
78
+ :out_files_last_pep =>
79
+ {
80
+ :aaseq=>"LLPGTARTMRR",
81
+ :sequence=>"R.LLPGTARTMRR.M",
82
+ :mh=>1272.5493424522,
83
+ :deltacn_orig=>0.835508584976196,
84
+ :deltacn=>1.1,
85
+ :sp=>57.9885787963867,
86
+ :xcorr=>0.109200321137905,
87
+ :id=>1361,
88
+ :rsp=>11,
89
+ :ions_matched=>6,
90
+ :ions_total=>40,
91
+ :prots=>1,
92
+ :deltamass=>0.00243330985608736,
93
+ :ppm=>1.91215729542523,
94
+ :base_name=>"020",
95
+ :first_scan=>3619,
96
+ :last_scan=>3619,
97
+ :charge=>3,
98
+ :deltacn=>1.1,
99
+ :base_name=>"020",
100
+ },
100
101
 
102
+ :params => {
103
+ "add_O_Ornithine"=>"0.0000", "add_F_Phenylalanine"=>"0.0000", "add_A_Alanine"=>"0.0000", "add_C_Cysteine"=>"0.0000", "add_Y_Tyrosine"=>"0.0000", "add_X_LorI"=>"0.0000", "add_J_user_amino_acid"=>"0.0000", "add_Cterm_peptide"=>"0.0000", "add_S_Serine"=>"0.0000", "add_Nterm_protein"=>"0.0000", "add_D_Aspartic_Acid"=>"0.0000", "add_Q_Glutamine"=>"0.0000", "add_K_Lysine"=>"0.0000", "add_R_Arginine"=>"0.0000", "add_W_Tryptophan"=>"0.0000", "add_Nterm_peptide"=>"0.0000", "add_H_Histidine"=>"0.0000", "add_L_Leucine"=>"0.0000", "add_I_Isoleucine"=>"0.0000", "add_N_Asparagine"=>"0.0000", "add_B_avg_NandD"=>"0.0000", "add_Z_avg_QandE"=>"0.0000", "add_E_Glutamic_Acid"=>"0.0000", "add_G_Glycine"=>"0.0000", "add_P_Proline"=>"0.0000", "add_M_Methionine"=>"0.0000", "add_Cterm_protein"=>"0.0000", "add_V_Valine"=>"0.0000", "add_T_Threonine"=>"0.0000", "add_U_user_amino_acid"=>"0.0000", "match_peak_tolerance"=>"1.0000", "match_peak_allowed_error"=>"1", "normalize_xcorr"=>"0", "nucleotide_reading_frame"=>"0", "num_results"=>"250", "sequence_header_filter"=>"", "diff_search_options"=>"15.999400 M 79.979900 STY 0.000000 M 0.000000 X 0.000000 T 0.000000 Y", "partial_sequence"=>"", "max_num_internal_cleavage_sites"=>"2", "search_engine"=>"SEQUEST", "print_duplicate_references"=>"40", "ion_series"=>"0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0", "remove_precursor_peak"=>"0", "num_output_lines"=>"10", "second_database_name"=>"", "first_database_name"=>"C:\\Xcalibur\\database\\ecoli_K12_ncbi_20060321.fasta", "peptide_mass_tolerance"=>"25.0000", "digest_mass_range"=>"600.0 3500.0", "enzyme_info"=>"Trypsin(KR/P) 1 1 KR P", "show_fragment_ions"=>"0", "protein_mass_filter"=>"0 0", "term_diff_search_options"=>"0.000000 0.000000", "num_description_lines"=>"5", "fragment_ion_tolerance"=>"1.0000", "peptide_mass_units"=>"2", "mass_type_parent"=>"0", "match_peak_count"=>"0", "max_num_differential_per_peptide"=>"3", "ion_cutoff_percentage"=>"0.0000", "mass_type_fragment"=>"0"
104
+ }
101
105
  }
102
106
 
103
- File_35 = {}
104
- File_32.each {|k,v| File_35[k] = v.dup }
105
-
106
- File_35[:header].merge!( {
107
- :sequest_log_filename => "C:\\Xcalibur\\sequest\\7MIX_STD_110802_1_sequest.log",
108
- :raw_filename => "C:\\Xcalibur\\data\\john\\sash7\\7MIX_STD_110802_1.RAW",
109
- :params_filename => "C:\\Xcalibur\\sequest\\john\\bioworks331\\sashimi7.params",
110
- :dta_log_filename => "C:\\Xcalibur\\sequest\\7MIX_STD_110802_1_dta.log",
111
- :version=>"3.5"
112
- } )
113
-
114
- File_35[:params].merge!( {
115
- "add_O_Ornithine"=>"0.00000", "add_F_Phenylalanine"=>"0.00000", "add_A_Alanine"=>"0.00000", "add_C_Cysteine"=>"0.00000", "add_Y_Tyrosine"=>"0.00000", "add_X_LorI"=>"0.00000", "add_J_user_amino_acid"=>"0.00000", "add_Cterm_peptide"=>"0.00000", "add_S_Serine"=>"0.00000", "add_Nterm_protein"=>"0.00000", "add_D_Aspartic_Acid"=>"0.00000", "add_Q_Glutamine"=>"0.00000", "add_K_Lysine"=>"0.00000", "add_R_Arginine"=>"0.00000", "add_W_Tryptophan"=>"0.00000", "add_Nterm_peptide"=>"0.00000", "add_H_Histidine"=>"0.00000", "add_L_Leucine"=>"0.00000", "add_I_Isoleucine"=>"0.00000", "add_N_Asparagine"=>"0.00000", "add_B_avg_NandD"=>"0.00000", "add_Z_avg_QandE"=>"0.00000", "add_E_Glutamic_Acid"=>"0.00000", "add_G_Glycine"=>"0.00000", "add_P_Proline"=>"0.00000", "add_M_Methionine"=>"0.00000", "add_Cterm_protein"=>"0.00000", "add_V_Valine"=>"0.00000", "add_T_Threonine"=>"0.00000", "add_U_user_amino_acid"=>"0.00000", "match_peak_tolerance"=>"1.00000", "match_peak_allowed_error"=>"1", "normalize_xcorr"=>"0", "nucleotide_reading_frame"=>"0", "num_results"=>"250", "sequence_header_filter"=>"", "diff_search_options"=>"0.000000 S 0.000000 C 0.000000 M 0.000000 X 0.000000 T 0.000000 Y", "partial_sequence"=>"", "max_num_internal_cleavage_sites"=>"2", "search_engine"=>"SEQUEST", "print_duplicate_references"=>"40", "ion_series"=>"0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0", "remove_precursor_peak"=>"0", "num_output_lines"=>"10", "second_database_name"=>"", "first_database_name"=>"C:\\Xcalibur\\database\\mixed_db_human_ecoli_7prot_unique.fasta", "peptide_mass_tolerance"=>"1.40000", "digest_mass_range"=>"400.0000 4500.0000", "enzyme_info"=>"Trypsin(KR/P) 1 1 KR P", "show_fragment_ions"=>"0", "protein_mass_filter"=>"0 0", "term_diff_search_options"=>"0.000000 0.000000", "num_description_lines"=>"5", "fragment_ion_tolerance"=>"1.00000", "peptide_mass_units"=>"0", "mass_type_parent"=>"0", "match_peak_count"=>"0", "max_num_differential_per_peptide"=>"1", "fragment_ion_units"=>"0", "ion_cutoff_percentage"=>"0.00000", "mass_type_fragment"=>"0"}
116
-
117
- )
107
+ File_33 = {}
108
+ File_32.each do |k,v|
109
+ File_33[k] = v.dup
110
+ end
118
111
 
119
- File_35[:out_files_first].merge!( {:computer=>'TESLA', :date_time=>'09/17/2007, 03:11 PM,'} )
120
- File_35[:out_files_last].merge!( {:computer=>'TESLA', :date_time=>'09/17/2007, 03:15 PM,'} )
121
- # I'm assuming this difference is due to higher precision mass...? (not a
122
- # parsing error)
123
- File_35[:out_files_first_last_pep][:rsp] = 56
124
- File_35[:out_files_last_last_pep][:rsp] = 125
125
-
112
+ ## Bioworks 3.3 (srf version 3.3)
113
+ File_33[:header][:raw_filename] = "C:\\Xcalibur\\data\\john\\021112-EcoliSol37-1\\020.RAW"
114
+ File_33[:header][:version] = "3.3"
126
115
 
127
- Header = {
128
- :db_filename => "C:\\Xcalibur\\database\\mixed_db_human_ecoli_7prot_unique.fasta",
129
- :ion_series => "ion series nABY ABCDVWXYZ: 0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0",
130
- :sequest_log_filename => "C:\\Xcalibur\\sequest\\john\\db_quest\\alone_new\\sash7\\7MIX_STD_110802_1_sequest.log",
131
- :raw_filename => "C:\\Xcalibur\\data\\john\\sashimi7\\7MIX_STD_110802_1.RAW",
132
- :enzyme => "Enzyme:Trypsin(KR/P) (2)",
133
- :params_filename => "C:\\Xcalibur\\sequest\\john\\db_quest\\alone_new\\sash7\\sash7.params",
134
- :modifications => "",
135
- :version => "3.2",
136
- :dta_log_filename => "C:\\Xcalibur\\sequest\\john\\db_quest\\alone_new\\sash7\\7MIX_STD_110802_1_dta.log",
137
- :model => "LCQ Deca XP",
138
- }
139
- ## DTA Gen
140
- Dta_gen = {
141
- :group_scan => 1,
142
- :start_time => 1.39999997615814,
143
- :start_scan => 1,
144
- :num_dta_files => 6952,
145
- :min_ion_threshold => 15,
146
- :end_mass => 4500.0,
147
- :min_group_count => 1,
148
- :start_mass => 400.0,
149
- :end_scan => 7161,
150
- }
151
-
152
- Dta_files_first = {
153
- :mh => 1221.88989257812,
154
- :dta_tic => 7703132.0,
155
- :num_peaks => 74,
156
- :charge => 1,
157
- :ms_level => 2,
158
- :total_num_possible_charge_states => 0,
159
- :peaks => "\346\214\271C\000p|F\340\016\335C\000D\fG\022l\335C\000\3604F\020\205\337C\000D~F\260\256\340C\000\020\347E\220\023\343C\000\220&F\020R\352C\000\244\313F\246\237\353C\000\360\032E\206\223\004D\000\204\030F\260\177\005D\000\346\220F \316\005D\000`\222F<\001\006D\000\356\217Fd\213\010D\000\336\tGr\314\vD\000\034}F\262\006\rD\000\026\221F\f\202!D\000\340\274E\302u#D\000\030\036Fl\275#D\000U\035G\254~&D\200\370\022H\364\315&D\000\346bGT\365&D\000\000\000@\300s5D\000`\307ET\2008D\000\3175G\310{:D\200\307\251G\230\311:D\000`5F\000\214<D\000\000\270E\254\301<D\000\340\024FX\264=D\000\270\021F\204\204?D\000\226\006H\356\256?D\000\000\000@\300\023@D\000\005\002Gb~BD\200\256\350G\032\312BD\000zAG\034\316CD\000\350\254E8\314DD\000\270\310E\316\020ED\000\010\254E\026\005QD\000\240\267E\250tSD\000tEFB\200VD\200\342\235G\374\247VD\000$\023F\000\206XD\200K\245G\242\303XD\000\343xG\270\201YD\000\214\325F\304\365ZD\0008\225FZF[D\000\230RF\232~[D@\r\201Hl\307[D\000L\031Hv\001\\D\000\3540Fx\201^D`\222\275H\f\305^D\000wZG\006\023oD\000\360\217E\354\205oD\200\335-H\350zrD\000\224,GFXtD\000\364\223F\222\201tD\200\221\341H\024\304tD\000)\034H\314\354tD\000\000\200@\022}wD\200\001\205I\274\274wD\000\t\210H\260\344wD\000\000pA\004\370yD\000@\203Eh\272\205D\2006\214Gh\336\205D\000\026\235Fb,\210D\200\177 H\\@\210D\240,\355Il`\210D\200\022\026I\320\202\210D\000 \336Fx\227\210D\000\000\200?\334{\212D\000<\252F4>\222D\000\264\213F\302\321\223D\000H\354Ed\275\230D\000-\fHv\332\230D@\313\tH\374\367\230D\000?\aG",
160
- }
161
- Dta_files_last = {
162
- :mh => 2604.8360326775,
163
- :dta_tic => 31977.0,
164
- :num_peaks => 17,
165
- :charge => 3,
166
- :ms_level => 2,
167
- :total_num_possible_charge_states => 0,
168
- :peaks => "4\n\216C\000`\305D\254\205\303C\000@;D\354\321\nD\000 \275D\232\243'D\000\020iE\350\2302D\000`\245D\f\3164D\000p@E\314JID\000\300\213D\264\002PD\000\260\016E\252\213[D\0000\eE\340NoD\000@\177D0\371xD\000@:Dd\f\205D\000\000yD\200\261\215D\000@\371D\210N\221D\000`\274D\034N\256D\000\020\032EN\372\266D\000\000\aD\356\223\322D\000\250\227E"
169
- }
116
+ File_33[:out_files_first][:computer] = 'TESLA'
117
+ File_33[:out_files_first][:date_time] = '04/24/2007, 10:41 AM,'
118
+ File_33[:out_files_last][:computer] = 'TESLA'
119
+ File_33[:out_files_last][:date_time] = '04/24/2007, 10:42 AM,'
170
120
 
171
- Out_files_first = {
172
- :num_hits => 10,
173
- :computer => "VELA",
174
- :date_time => "11/17/2006, 04:13 PM,",
175
- }
121
+ File_33[:out_files_first_pep][:sp] = 29.8535556793213
122
+ File_33[:out_files_last_pep][:sp] = 57.987476348877
123
+ File_33[:out_files_last_pep][:rsp] = 10
124
+ File_33[:out_files_last_pep][:deltacn_orig] = 0.835624694824219
176
125
 
177
- Out_files_first_hit = {
178
- :mh => 1220.5128044522,
179
- :deltacn => 0.071944423019886, ## this is the modified version
180
- :sp => 96.5815887451172,
181
- :xcorr => 1.08377742767334,
182
- :id => 224,
183
- :rsp => 13,
184
- :ions_matched => 8,
185
- :ions_total => 20,
186
- :sequence => "K.LCPHLTLLPGR.F",
187
- :aaseq => "LCPHLTLLPGR",
188
- :reference => "gi|1786425|gb|AAC73335.1| damage-inducible protein P; putative tRNA synthetase",
189
- :first_scan => 2,
190
- :last_scan => 2,
191
- :base_name => '7MIX_STD_110802_1',
192
- :charge => 1,
193
- }
194
126
 
195
- Out_files_last = {
196
- :num_hits => 10,
197
- :computer => "VELA",
198
- :date_time => "11/17/2006, 04:25 PM," ,
199
- }
200
- Out_files_last_first_hit = {
201
- :mh => 2605.9368784522,
202
- :deltacn => 0.03921128064394,
203
- :sp => 76.7447052001953,
204
- :xcorr => 0.915680646896362,
205
- :id => 13562,
206
- :rsp => 4,
207
- :ions_matched => 10,
208
- :ions_total => 84,
209
- :sequence => "K.HLEINPNHPIVETLRQKAETHK.N",
210
- :aaseq => "HLEINPNHPIVETLRQKAETHK",
211
- :reference => "gi|30149327|ref|XP_293672.2| similar to ebiP7687 [Homo sapiens]",
212
- :first_scan => 7161,
213
- :last_scan => 7161,
214
- :base_name => '7MIX_STD_110802_1',
215
- :deltamass => 2605.9368784522 - 2604.8360326775,
216
- :ppm => ((1.0e6 * (2605.9368784522 - 2604.8360326775)) / 2604.8360326775).abs,
217
- :charge => 3,
218
- }
219
- Out_files_last_last_hit = {
220
- :mh => 2604.9025174522,
221
- :deltacn => 1.1,
222
- :sp => 26.1511478424072,
223
- :xcorr => 0.634012818336487,
224
- :id => 8105,
225
- :rsp => 165,
226
- :ions_matched => 6,
227
- :ions_total => 84,
228
- :sequence => "R.EAFLVNSDLTLRAQLTEFRDHK.L",
229
- :aaseq => "EAFLVNSDLTLRAQLTEFRDHK",
230
- :reference => "gi|5453830|ref|NP_006181.1| origin recognition complex, subunit 2-like; origin",
231
- :first_scan => 7161,
232
- :last_scan => 7161,
233
- :base_name => '7MIX_STD_110802_1',
234
- :deltamass => 2604.9025174522 - 2604.8360326775,
235
- :ppm => ((1.0e6 * (2604.9025174522 - 2604.8360326775)) / 2604.8360326775).abs,
236
- :charge => 3,
237
- }
238
- Sequest_params = {
239
- "add_F_Phenylalanine"=>"0.0000",
240
- "add_O_Ornithine"=>"0.0000",
241
- "add_Y_Tyrosine"=>"0.0000",
242
- "add_C_Cysteine"=>"0.0000",
243
- "add_A_Alanine"=>"0.0000",
244
- "add_J_user_amino_acid"=>"0.0000",
245
- "add_X_LorI"=>"0.0000",
246
- "add_S_Serine"=>"0.0000",
247
- "add_Cterm_peptide"=>"0.0000",
248
- "add_Q_Glutamine"=>"0.0000",
249
- "add_D_Aspartic_Acid"=>"0.0000",
250
- "add_Nterm_protein"=>"0.0000",
251
- "add_W_Tryptophan"=>"0.0000",
252
- "add_R_Arginine"=>"0.0000",
253
- "add_K_Lysine"=>"0.0000",
254
- "add_H_Histidine"=>"0.0000",
255
- "add_Nterm_peptide"=>"0.0000",
256
- "add_E_Glutamic_Acid"=>"0.0000",
257
- "add_Z_avg_QandE"=>"0.0000",
258
- "add_B_avg_NandD"=>"0.0000",
259
- "add_N_Asparagine"=>"0.0000",
260
- "add_I_Isoleucine"=>"0.0000",
261
- "add_L_Leucine"=>"0.0000",
262
- "add_M_Methionine"=>"0.0000",
263
- "add_P_Proline"=>"0.0000",
264
- "add_G_Glycine"=>"0.0000",
265
- "add_U_user_amino_acid"=>"0.0000",
266
- "add_T_Threonine"=>"0.0000",
267
- "add_V_Valine"=>"0.0000",
268
- "add_Cterm_protein"=>"0.0000",
269
- "match_peak_tolerance"=>"1.0000",
270
- "match_peak_allowed_error"=>"1",
271
- "normalize_xcorr"=>"0",
272
- "nucleotide_reading_frame"=>"0",
273
- "num_results"=>"250",
274
- "sequence_header_filter"=>"",
275
- "diff_search_options"=>"0.000000 S 0.000000 C 0.000000 M 0.000000 X 0.000000 T 0.000000 Y",
276
- "partial_sequence"=>"",
277
- "max_num_internal_cleavage_sites"=>"2",
278
- "search_engine"=>"SEQUEST",
279
- "print_duplicate_references"=>"40",
280
- "ion_series"=>"0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0",
281
- "remove_precursor_peak"=>"0",
282
- "num_output_lines"=>"10",
283
- "second_database_name"=>"",
284
- "first_database_name"=>"C:\\Xcalibur\\database\\mixed_db_human_ecoli_7prot_unique.fasta",
285
- "peptide_mass_tolerance"=>"1.4000",
286
- "digest_mass_range"=>"600.0 3500.0",
287
- "enzyme_info"=>"Trypsin(KR/P) 1 1 KR P",
288
- "show_fragment_ions"=>"0",
289
- "protein_mass_filter"=>"0 0",
290
- "term_diff_search_options"=>"0.000000 0.000000",
291
- "num_description_lines"=>"5",
292
- "fragment_ion_tolerance"=>"1.0000",
293
- "peptide_mass_units"=>"0",
294
- "mass_type_parent"=>"0",
295
- "match_peak_count"=>"0",
296
- "max_num_differential_per_peptide"=>"1",
297
- "ion_cutoff_percentage"=>"0.0000",
298
- "mass_type_fragment"=>"0"
299
- }
127
+ ## Bioworks 3.3.1 (srf version 3.5)
128
+ File_331 = {}
129
+ File_33.each do |k,v|
130
+ File_331[k] = v.dup
131
+ end
132
+ File_331[:header][:raw_filename] = "C:\\Xcalibur\\data\\john\\opd1_2runs_2mods\\020.RAW"
133
+ File_331[:header][:version] = "3.5"
134
+ File_331[:out_files_first][:date_time] = '05/06/2008, 03:31 PM,'
135
+ File_331[:out_files_last][:date_time] = '05/06/2008, 03:32 PM,'
300
136
 
301
137
  end
302
138
 
139
+
@@ -85,6 +85,7 @@ describe 'creating a list of proteins from peptides', :shared => true do
85
85
  end
86
86
 
87
87
  describe SpecID, 'with generic proteins' do
88
+ include SpecID
88
89
  before(:all) do
89
90
  @prots = (0..7).map do |n|
90
91
  SpecID::GenericProt.new.set_from_hash({:reference => "prot_"+n.to_s, :peps => []})
@@ -95,6 +96,7 @@ describe SpecID, 'with generic proteins' do
95
96
  end
96
97
 
97
98
  describe SpecID, 'with array based proteins' do
99
+ include SpecID
98
100
  before(:all) do
99
101
  @prots = (0..7).map do |n|
100
102
  SRF::OUT::Prot.new.set_from_hash({:reference => "prot_"+n.to_s, :peps => []})
@@ -109,6 +111,7 @@ class TrueClass ; include Boolean end
109
111
  class FalseClass; include Boolean end
110
112
 
111
113
  describe SpecID, 'being created' do
114
+ include SpecID
112
115
  it 'can be from small bioworks.xml' do
113
116
  sp = SpecID.new(Tfiles + '/bioworks_small.xml')
114
117
  sp.prots.size.should == 106
@@ -4,6 +4,7 @@ require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
4
4
  require File.expand_path( File.dirname(__FILE__) + '/../transmem_spec_shared' )
5
5
 
6
6
  require 'transmem/toppred'
7
+ require 'yaml'
7
8
 
8
9
  describe TopPred::Index do
9
10
  before(:all) do