mspire 0.3.9 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. data/INSTALL +24 -7
  2. data/README +15 -13
  3. data/README.rdoc +18 -0
  4. data/Rakefile +50 -14
  5. data/bin/aafreqs.rb +0 -0
  6. data/bin/bioworks2excel.rb +0 -0
  7. data/bin/bioworks_to_pepxml.rb +2 -1
  8. data/bin/bioworks_to_pepxml_gui.rb +0 -0
  9. data/bin/fasta_shaker.rb +0 -0
  10. data/bin/filter_and_validate.rb +0 -0
  11. data/bin/gi2annot.rb +0 -0
  12. data/bin/id_class_anal.rb +0 -0
  13. data/bin/id_precision.rb +0 -0
  14. data/bin/ms_to_lmat.rb +0 -0
  15. data/bin/pepproph_filter.rb +0 -0
  16. data/bin/protein_summary.rb +0 -0
  17. data/bin/protxml2prots_peps.rb +0 -0
  18. data/bin/raw_to_mzXML.rb +3 -3
  19. data/bin/run_percolator.rb +122 -0
  20. data/bin/sqt_group.rb +0 -0
  21. data/bin/srf_group.rb +0 -0
  22. data/changelog.txt +29 -0
  23. data/lib/ms/gradient_program.rb +0 -1
  24. data/lib/ms/msrun.rb +62 -29
  25. data/lib/ms/parser/mzdata/axml.rb +55 -0
  26. data/lib/ms/parser/mzdata/dom.rb +51 -36
  27. data/lib/ms/parser/mzdata.rb +8 -2
  28. data/lib/ms/parser/mzxml/axml.rb +59 -0
  29. data/lib/ms/parser/mzxml/dom.rb +80 -57
  30. data/lib/ms/parser/mzxml/hpricot.rb +1 -1
  31. data/lib/ms/parser/mzxml/libxml.rb +6 -2
  32. data/lib/ms/parser/mzxml.rb +110 -3
  33. data/lib/ms/parser.rb +4 -4
  34. data/lib/ms/precursor.rb +19 -4
  35. data/lib/ms/scan.rb +7 -7
  36. data/lib/ms/spectrum.rb +249 -58
  37. data/lib/mspire.rb +1 -1
  38. data/lib/spec_id/bioworks.rb +2 -2
  39. data/lib/spec_id/precision/filter/cmdline.rb +8 -1
  40. data/lib/spec_id/precision/prob/cmdline.rb +2 -2
  41. data/lib/spec_id/precision/prob.rb +1 -0
  42. data/lib/spec_id/proph/pep_summary.rb +3 -4
  43. data/lib/spec_id/proph/prot_summary.rb +3 -3
  44. data/lib/spec_id/protein_summary.rb +1 -1
  45. data/lib/spec_id/sequest/pepxml.rb +5 -5
  46. data/lib/spec_id/sqt.rb +4 -4
  47. data/lib/spec_id/srf.rb +49 -8
  48. data/lib/spec_id.rb +5 -0
  49. data/lib/xml_style_parser.rb +16 -2
  50. data/script/compile_and_plot_smriti_final.rb +0 -0
  51. data/script/create_little_pepxml.rb +0 -0
  52. data/script/degenerate_peptides.rb +0 -0
  53. data/script/estimate_fpr_by_cysteine.rb +0 -0
  54. data/script/extract_gradient_programs.rb +1 -1
  55. data/script/find_cysteine_background.rb +0 -0
  56. data/script/genuine_tps_and_probs.rb +0 -0
  57. data/script/get_apex_values_rexml.rb +0 -0
  58. data/script/mascot_fix_pepxml.rb +123 -0
  59. data/script/msvis.rb +0 -0
  60. data/script/mzXML2timeIndex.rb +0 -0
  61. data/script/peps_per_bin.rb +0 -0
  62. data/script/prep_dir.rb +0 -0
  63. data/script/simple_protein_digestion.rb +0 -0
  64. data/script/smriti_final_analysis.rb +0 -0
  65. data/script/sqt_to_meta.rb +0 -0
  66. data/script/top_hit_per_scan.rb +0 -0
  67. data/script/toppred_to_yaml.rb +0 -0
  68. data/script/tpp_installer.rb +0 -0
  69. data/specs/bin/prob_validate_spec.rb +5 -2
  70. data/specs/bin/protein_summary_spec.rb +5 -1
  71. data/specs/ms/msrun_spec.rb +176 -133
  72. data/specs/ms/parser_spec.rb +3 -3
  73. data/specs/ms/spectrum_spec.rb +0 -2
  74. data/specs/spec_id/precision/filter_spec.rb +4 -1
  75. data/specs/spec_id/precision/prob_spec.rb +2 -2
  76. data/specs/spec_id/sequest/pepxml_spec.rb +1 -1
  77. data/specs/spec_id/sqt_spec.rb +5 -5
  78. data/specs/spec_id/srf_spec.rb +56 -93
  79. data/specs/spec_id/srf_spec_helper.rb +121 -284
  80. data/specs/spec_id_spec.rb +3 -0
  81. data/specs/transmem/toppred_spec.rb +1 -0
  82. data/test_files/opd1_2runs_2mods/data/020.mzData.xml +683 -0
  83. data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +382 -0
  84. data/test_files/opd1_2runs_2mods/data/040.mzData.xml +683 -0
  85. data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +382 -0
  86. data/test_files/opd1_2runs_2mods/data/README.txt +6 -0
  87. metadata +247 -229
@@ -3,6 +3,8 @@ require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
3
3
  require File.expand_path( File.dirname(__FILE__) + '/srf_spec_helper' )
4
4
  require 'spec_id/srf'
5
5
 
6
+ require 'fileutils'
7
+
6
8
  include SRFHelper
7
9
 
8
10
  #tfiles = File.dirname(__FILE__) + '/tfiles/'
@@ -60,11 +62,23 @@ describe 'an srf reader', :shared => true do
60
62
  @dta_files_last.object_match(@srf_obj.dta_files.last).should be_true
61
63
  end
62
64
 
65
+ # given an array of out_file objects, returns the first set of hits
66
+ def get_first_peps(out_files)
67
+ out_files.each do |outf|
68
+ if outf.num_hits > 0
69
+ return outf.hits
70
+ end
71
+ end
72
+ return nil
73
+ end
74
+
63
75
  it 'retrieves correct out files' do
64
76
  @out_files_first.object_match(@srf_obj.out_files.first).should be_true
65
77
  @out_files_last.object_match(@srf_obj.out_files.last).should be_true
66
- @out_files_first_last_pep.object_match(@srf_obj.out_files.first.hits.last).should be_true
67
- @out_files_last_last_pep.object_match(@srf_obj.out_files.last.hits.last).should be_true
78
+ # first available peptide hit
79
+ @out_files_first_pep.object_match(get_first_peps(@srf_obj.out_files).first).should be_true
80
+ # last available peptide hit
81
+ @out_files_last_pep.object_match(get_first_peps(@srf_obj.out_files.reverse).last).should be_true
68
82
  end
69
83
 
70
84
  xit 'retrieves correct params' do
@@ -75,35 +89,30 @@ describe 'an srf reader', :shared => true do
75
89
  end
76
90
 
77
91
 
92
+ Expected_hash_keys = %w(header dta_gen dta_files_first dta_files_last out_files_first out_files_last out_files_first_pep out_files_last_pep params)
78
93
 
79
- describe klass, " reading a version 3.2 .srf file" do
80
- spec_large do
81
- before(:all) do
82
- @file = Tfiles_l + '/sash7/sequest/7MIX_STD_110802_1.srf'
83
- %w(header dta_gen dta_files_first dta_files_last out_files_first out_files_last out_files_first_last_pep out_files_last_last_pep params).each do |c|
84
- instance_variable_set("@#{c}", File_32[c.to_sym])
94
+ to_run = {
95
+ '3.2' => {:hash => File_32, :file => '/opd1_2runs_2mods/sequest32/020.srf'},
96
+ '3.3' => {:hash => File_33, :file => '/opd1_2runs_2mods/sequest33/020.srf'},
97
+ '3.3.1' => {:hash => File_331, :file => '/opd1_2runs_2mods/sequest331/020.srf'},
98
+ }
99
+
100
+ to_run.each do |version,info|
101
+ describe klass, " reading a version #{version} .srf file" do
102
+ spec_large do
103
+ before(:all) do
104
+ @file = Tfiles_l + info[:file]
105
+ Expected_hash_keys.each do |c|
106
+ instance_variable_set("@#{c}", info[:hash][c.to_sym])
107
+ end
85
108
  end
109
+ it_should_behave_like "an srf reader"
86
110
  end
87
- it_should_behave_like "an srf reader"
88
111
  end
89
112
  end
90
113
 
91
114
 
92
- describe klass, " reading a version 3.3 .srf file" do
93
- it_should 'reading a version 3.3 .srf file'
94
- end
95
-
96
-
97
- describe klass, " reading a version 3.5 (bioworks 3.3.1) .srf file" do
98
- spec_large do
99
- before(:all) do
100
- @file = Tfiles_l + '/sash7/sequest/bioworks331/7MIX_STD_110802_1.srf'
101
- %w(header dta_gen dta_files_first dta_files_last out_files_first out_files_last out_files_first_last_pep out_files_last_last_pep params).each do |c|
102
- instance_variable_set("@#{c}", File_35[c.to_sym])
103
- end
104
- end
105
- it_should_behave_like "an srf reader"
106
- end
115
+ describe klass, " reading a corrupted file" do
107
116
  it 'should read a null file from an aborted run w/o failing (but gives error msg)' do
108
117
  file = Tfiles + '/corrupted_900.srf'
109
118
  error_msg = Tfiles + '/error_msg.tmp'
@@ -123,78 +132,9 @@ describe klass, " reading a version 3.5 (bioworks 3.3.1) .srf file" do
123
132
  IO.read(error_msg).should =~ /corrupted_900\.srf/
124
133
  File.unlink error_msg
125
134
  end
126
-
127
- end
128
-
129
-
130
- describe klass, 'reading an srf file' do
131
-
132
- spec_large do
133
- before(:all) do
134
- start = Time.now
135
- tf_srf = Tfiles_l + "/sash7/sequest/older/7MIX_STD_110802_1.srf"
136
- @srf = klass.new(tf_srf)
137
- puts "- read in #{Time.now - start} seconds"
138
- end
139
-
140
- #def initialize(arg)
141
- # super(arg)
142
- # @tfiles = File.dirname(__FILE__) + '/tfiles/'
143
- # @tfiles_l = File.dirname(__FILE__) + '/tfiles_large/'
144
- # @srg_file = @tfiles + "tmp_bioworks.srg"
145
- # @srf = $srf
146
- # @group = $group
147
- #end
148
-
149
- it 'reads' do
150
- end
151
-
152
- it 'reads an srf file (w/o probs) and extracts all basic information' do
153
- ## Verify that we have everything and it is as we expect (not exhaustive)
154
- head = @srf.header
155
- dtgen = head.dta_gen
156
- ## HEADER
157
- hash_match(Header, head)
158
- hash_match(Dta_gen, dtgen)
159
- ## DTA_FILES
160
- hash_match(Dta_files_first, @srf.dta_files.first)
161
- hash_match(Dta_files_last, @srf.dta_files.last)
162
- ## OUT_FILES
163
- hash_match(Out_files_first, @srf.out_files.first)
164
- hit = @srf.out_files.first.hits.first
165
- hash_match(Out_files_first_hit, @srf.out_files.first.hits.first)
166
- hash_match(Out_files_last_first_hit, @srf.out_files.last.hits.first)
167
- hash_match(Out_files_last_last_hit, @srf.out_files.last.hits.last)
168
- ## SEQUEST_PARAMS
169
- hash_match(Sequest_params, @srf.params)
170
- ## INDEX
171
- @srf.index.last.should == [7161, 7161, 3]
172
- @srf.index.first.should == [2, 2, 1]
173
-
174
- @srf.dta_files.size.should == @srf.index.size
175
- @srf.dta_files.size.should == @srf.out_files.size
176
- end
177
- it_should 'give accurate peptides' do
178
- end
179
- end
180
-
181
- ## treats reference special
182
- def hash_match(hash, srf)
183
- hash.each do |k,v|
184
- if v.is_a? Float
185
- delta = v/100000
186
- srf.send(k.to_sym).should be_close(v, delta)
187
- elsif k == :reference
188
- srf.prots.first.reference.should == v[0,38]
189
- else
190
- srf.send(k.to_sym).should == v
191
- end
192
- end
193
- end
194
135
  end
195
136
 
196
137
  describe SRFGroup, 'creating an srg file' do
197
-
198
138
  it 'creates one given some non-existing, relative filenames' do
199
139
  ## TEST SRG GROUPING:
200
140
  filenames = %w(my/lucky/filename /another/filename)
@@ -205,5 +145,28 @@ describe SRFGroup, 'creating an srg file' do
205
145
  File.exist?(srg_file).should be_true
206
146
  File.unlink(srg_file)
207
147
  end
148
+ end
149
+
150
+
151
+ # @TODO: this test needs to be created for a small mock dataset!!
152
+ describe SRF, 'creating dta files' do
153
+ spec_large do
154
+ before(:all) do
155
+ file = Tfiles_l + '/opd1_2runs_2mods/sequest33/020.srf'
156
+ @srf = SRF.new(file)
157
+ end
158
+
159
+ it 'creates dta files' do
160
+ @srf.to_dta_files
161
+ File.exist?('020').should be_true
162
+ File.directory?('020').should be_true
163
+ File.exist?('020/020.3366.3366.2.dta').should be_true
164
+ lines = IO.readlines('020/020.3366.3366.2.dta', "\r\n")
165
+ lines.first.should == "1113.10649290125 2\r\n"
166
+ lines[1].should == "164.56591796875 4817.0\r\n"
167
+
168
+ FileUtils.rm_rf '020'
169
+ end
170
+ end
208
171
 
209
172
  end
@@ -3,300 +3,137 @@ module SRFHelper
3
3
  File_32 = {
4
4
  :header =>
5
5
  {
6
- :params_filename => "C:\\Xcalibur\\sequest\\john\\sashimi7\\sashimi7.params",
7
- :model => "LCQ Deca XP",
8
- :dta_log_filename => "C:\\Xcalibur\\sequest\\john\\sashimi7\\7MIX_STD_110802_1_dta.log",
9
- :ion_series => "ion series nABY ABCDVWXYZ: 0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0",
10
- :db_filename => "C:\\Xcalibur\\database\\mixed_db_human_ecoli_7prot_unique.fasta",
11
- :modifications => "",
12
- :enzyme => "Enzyme:Trypsin(KR/P) (2)",
13
- :sequest_log_filename => "C:\\Xcalibur\\sequest\\john\\sashimi7\\7MIX_STD_110802_1_sequest.log",
14
- :version => "3.2",
15
- :raw_filename => "C:\\Xcalibur\\data\\john\\sashimi7\\7MIX_STD_110802_1.RAW"
6
+ :params_filename=>"C:\\Xcalibur\\sequest\\john\\opd1_2runs_2mods\\ecoli.params",
7
+ :raw_filename=>"C:\\Xcalibur\\data\\john\\opd00001\\020.RAW",
8
+ :modifications=>"(M* +15.99940) (STY# +79.97990)",
9
+ :sequest_log_filename=>"C:\\Xcalibur\\sequest\\john\\opd1_2runs_2mods\\020_sequest.log",
10
+ :ion_series=>"ion series nABY ABCDVWXYZ: 0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0",
11
+ :db_filename=>"C:\\Xcalibur\\database\\ecoli_K12_ncbi_20060321.fasta",
12
+ :enzyme=>"Enzyme:Trypsin(KR/P) (2)",
13
+ :version=>"3.2",
14
+ :model=>"LCQ Deca XP",
15
+ :dta_log_filename=>"C:\\Xcalibur\\sequest\\john\\opd1_2runs_2mods\\020_dta.log"
16
16
  },
17
- :dta_gen => {
18
- :min_group_count => 1,
19
- :start_time => 1.39999997615814,
20
- :start_mass => 400.0,
21
- :end_scan => 7161,
22
- :group_scan => 1,
23
- :start_scan => 1,
24
- :num_dta_files => 6952,
25
- :min_ion_threshold => 15,
26
- :end_mass => 4500.0,
27
- },
28
- :dta_files_first => {
29
- :mh => 1221.88989257812,
30
- :dta_tic => 7703132.0,
31
- :num_peaks => 74,
32
- :charge => 1,
33
- :ms_level => 2,
34
- :total_num_possible_charge_states => 0,
35
- :peaks => 592,
36
- },
37
- :dta_files_last => {
38
- :mh => 2604.8360326775,
39
- :dta_tic => 31977.0,
40
- :num_peaks => 17,
41
- :charge => 3,
42
- :ms_level => 2,
43
- :total_num_possible_charge_states => 0,
44
- :peaks => 136,
45
- },
46
- :out_files_first => {
47
- :num_hits => 10,
48
- :computer => 'VELA',
49
- :date_time => '05/12/2006, 10:58 AM,',
50
- :hits => 10
51
- },
52
- :out_files_last => {
53
- :num_hits => 10,
54
- :computer => 'VELA',
55
- :date_time => '05/12/2006, 11:11 AM,',
56
- :hits => 10
57
- },
58
- :out_files_first_last_pep => {
59
- :aaseq => 'QFSLSKSSLPK',
60
- :sequence => 'K.QFSLSKSSLPK.S',
61
- :mh => 1222.4156904522,
62
- :deltacn => 1.1,
63
- :sp => 57.4083709716797,
64
- :xcorr => 0.802009999752045,
65
- :id => 19977,
66
- :rsp => 60,
67
- :ions_matched => 7,
68
- :ions_total => 20,
69
- :prots => 1,
70
- :deltamass => 0.525797874074897,
71
- :ppm => 430.315265940608,
72
- :base_name => '7MIX_STD_110802_1',
73
- :first_scan => 2,
74
- :last_scan => 2,
75
- :charge => 1
76
- },
77
- :out_files_last_last_pep =>
78
- {
79
- :aaseq => 'EAFLVNSDLTLRAQLTEFRDHK',
80
- :sequence => 'R.EAFLVNSDLTLRAQLTEFRDHK.L',
81
- :mh => 2604.9025174522,
82
- :deltacn => 1.1,
83
- :sp => 26.1511478424072,
84
- :xcorr => 0.634012818336487,
85
- :id => 8105,
86
- :rsp => 165,
87
- :ions_matched => 6,
88
- :ions_total => 84,
89
- :prots => 1,
90
- :deltamass => 0.0664847746993473,
91
- :ppm => 25.523592988311,
92
- :base_name => '7MIX_STD_110802_1',
93
- :first_scan => 7161,
94
- :last_scan => 7161,
95
- :charge => 3,
96
- },
17
+ :dta_gen => {
18
+ :min_group_count => 1,
19
+ :start_time => 1.5,
20
+ :start_mass => 300.0,
21
+ :end_scan => 3620,
22
+ :group_scan => 1,
23
+ :start_scan => 1,
24
+ :num_dta_files => 3747,
25
+ :min_ion_threshold => 15,
26
+ :end_mass => 4500.0,
27
+ },
28
+ :dta_files_first => {
97
29
 
98
- :params => {
99
- "add_O_Ornithine"=>"0.0000", "add_F_Phenylalanine"=>"0.0000", "add_A_Alanine"=>"0.0000", "add_C_Cysteine"=>"0.0000", "add_Y_Tyrosine"=>"0.0000", "add_X_LorI"=>"0.0000", "add_J_user_amino_acid"=>"0.0000", "add_Cterm_peptide"=>"0.0000", "add_S_Serine"=>"0.0000", "add_Nterm_protein"=>"0.0000", "add_D_Aspartic_Acid"=>"0.0000", "add_Q_Glutamine"=>"0.0000", "add_K_Lysine"=>"0.0000", "add_R_Arginine"=>"0.0000", "add_W_Tryptophan"=>"0.0000", "add_Nterm_peptide"=>"0.0000", "add_H_Histidine"=>"0.0000", "add_L_Leucine"=>"0.0000", "add_I_Isoleucine"=>"0.0000", "add_N_Asparagine"=>"0.0000", "add_B_avg_NandD"=>"0.0000", "add_Z_avg_QandE"=>"0.0000", "add_E_Glutamic_Acid"=>"0.0000", "add_G_Glycine"=>"0.0000", "add_P_Proline"=>"0.0000", "add_M_Methionine"=>"0.0000", "add_Cterm_protein"=>"0.0000", "add_V_Valine"=>"0.0000", "add_T_Threonine"=>"0.0000", "add_U_user_amino_acid"=>"0.0000", "match_peak_tolerance"=>"1.0000", "match_peak_allowed_error"=>"1", "normalize_xcorr"=>"0", "nucleotide_reading_frame"=>"0", "num_results"=>"250", "sequence_header_filter"=>"", "diff_search_options"=>"0.000000 S 0.000000 C 0.000000 M 0.000000 X 0.000000 T 0.000000 Y", "partial_sequence"=>"", "max_num_internal_cleavage_sites"=>"2", "search_engine"=>"SEQUEST", "print_duplicate_references"=>"40", "ion_series"=>"0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0", "remove_precursor_peak"=>"0", "num_output_lines"=>"10", "second_database_name"=>"", "first_database_name"=>"C:\\Xcalibur\\database\\mixed_db_human_ecoli_7prot_unique.fasta", "peptide_mass_tolerance"=>"1.4000", "digest_mass_range"=>"600.0 3500.0", "enzyme_info"=>"Trypsin(KR/P) 1 1 KR P", "show_fragment_ions"=>"0", "protein_mass_filter"=>"0 0", "term_diff_search_options"=>"0.000000 0.000000", "num_description_lines"=>"5", "fragment_ion_tolerance"=>"1.0000", "peptide_mass_units"=>"0", "mass_type_parent"=>"0", "match_peak_count"=>"0", "max_num_differential_per_peptide"=>"1", "ion_cutoff_percentage"=>"0.0000", "mass_type_fragment"=>"0"},
30
+ :mh=>390.92919921875,
31
+ :dta_tic=>9041311.0,
32
+ :num_peaks=>48,
33
+ :charge=>1,
34
+ :ms_level=>2,
35
+ :total_num_possible_charge_states=>0,
36
+ },
37
+ :dta_files_last => {
38
+ :dta_tic=>842424.0,
39
+ :mh=>357.041198730469,
40
+ :num_peaks=>78,
41
+ :ms_level=>2,
42
+ :charge=>1,
43
+ :total_num_possible_charge_states=>0,
44
+ },
45
+ :out_files_first => {
46
+ :num_hits => 0,
47
+ :computer => 'VELA',
48
+ :date_time => '05/06/2008, 02:08 PM,',
49
+ :hits => 0,
50
+ },
51
+ :out_files_last => {
52
+ :num_hits => 0,
53
+ :computer => 'VELA',
54
+ :date_time => '05/06/2008, 02:11 PM,',
55
+ :hits => 0,
56
+ },
57
+ :out_files_first_pep => {
58
+ :aaseq=>"YRLGGSTK",
59
+ :sequence=>"R.Y#RLGGS#T#K.K",
60
+ :mh=>1121.9390244522,
61
+ :deltacn_orig=>0.0,
62
+ :sp=>29.8529319763184,
63
+ :xcorr=>0.123464643955231,
64
+ :id=>2104,
65
+ :rsp=>1,
66
+ :ions_matched=>5,
67
+ :ions_total=>35,
68
+ :prots=>1,
69
+ :deltamass=>-0.00579976654989878,
70
+ :ppm=>5.16938660859491,
71
+ :base_name=>"020",
72
+ :first_scan=>3,
73
+ :last_scan=>3,
74
+ :charge=>1,
75
+ :deltacn=>0.795928299427032,
76
+ :base_name=>"020",
77
+ },
78
+ :out_files_last_pep =>
79
+ {
80
+ :aaseq=>"LLPGTARTMRR",
81
+ :sequence=>"R.LLPGTARTMRR.M",
82
+ :mh=>1272.5493424522,
83
+ :deltacn_orig=>0.835508584976196,
84
+ :deltacn=>1.1,
85
+ :sp=>57.9885787963867,
86
+ :xcorr=>0.109200321137905,
87
+ :id=>1361,
88
+ :rsp=>11,
89
+ :ions_matched=>6,
90
+ :ions_total=>40,
91
+ :prots=>1,
92
+ :deltamass=>0.00243330985608736,
93
+ :ppm=>1.91215729542523,
94
+ :base_name=>"020",
95
+ :first_scan=>3619,
96
+ :last_scan=>3619,
97
+ :charge=>3,
98
+ :deltacn=>1.1,
99
+ :base_name=>"020",
100
+ },
100
101
 
102
+ :params => {
103
+ "add_O_Ornithine"=>"0.0000", "add_F_Phenylalanine"=>"0.0000", "add_A_Alanine"=>"0.0000", "add_C_Cysteine"=>"0.0000", "add_Y_Tyrosine"=>"0.0000", "add_X_LorI"=>"0.0000", "add_J_user_amino_acid"=>"0.0000", "add_Cterm_peptide"=>"0.0000", "add_S_Serine"=>"0.0000", "add_Nterm_protein"=>"0.0000", "add_D_Aspartic_Acid"=>"0.0000", "add_Q_Glutamine"=>"0.0000", "add_K_Lysine"=>"0.0000", "add_R_Arginine"=>"0.0000", "add_W_Tryptophan"=>"0.0000", "add_Nterm_peptide"=>"0.0000", "add_H_Histidine"=>"0.0000", "add_L_Leucine"=>"0.0000", "add_I_Isoleucine"=>"0.0000", "add_N_Asparagine"=>"0.0000", "add_B_avg_NandD"=>"0.0000", "add_Z_avg_QandE"=>"0.0000", "add_E_Glutamic_Acid"=>"0.0000", "add_G_Glycine"=>"0.0000", "add_P_Proline"=>"0.0000", "add_M_Methionine"=>"0.0000", "add_Cterm_protein"=>"0.0000", "add_V_Valine"=>"0.0000", "add_T_Threonine"=>"0.0000", "add_U_user_amino_acid"=>"0.0000", "match_peak_tolerance"=>"1.0000", "match_peak_allowed_error"=>"1", "normalize_xcorr"=>"0", "nucleotide_reading_frame"=>"0", "num_results"=>"250", "sequence_header_filter"=>"", "diff_search_options"=>"15.999400 M 79.979900 STY 0.000000 M 0.000000 X 0.000000 T 0.000000 Y", "partial_sequence"=>"", "max_num_internal_cleavage_sites"=>"2", "search_engine"=>"SEQUEST", "print_duplicate_references"=>"40", "ion_series"=>"0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0", "remove_precursor_peak"=>"0", "num_output_lines"=>"10", "second_database_name"=>"", "first_database_name"=>"C:\\Xcalibur\\database\\ecoli_K12_ncbi_20060321.fasta", "peptide_mass_tolerance"=>"25.0000", "digest_mass_range"=>"600.0 3500.0", "enzyme_info"=>"Trypsin(KR/P) 1 1 KR P", "show_fragment_ions"=>"0", "protein_mass_filter"=>"0 0", "term_diff_search_options"=>"0.000000 0.000000", "num_description_lines"=>"5", "fragment_ion_tolerance"=>"1.0000", "peptide_mass_units"=>"2", "mass_type_parent"=>"0", "match_peak_count"=>"0", "max_num_differential_per_peptide"=>"3", "ion_cutoff_percentage"=>"0.0000", "mass_type_fragment"=>"0"
104
+ }
101
105
  }
102
106
 
103
- File_35 = {}
104
- File_32.each {|k,v| File_35[k] = v.dup }
105
-
106
- File_35[:header].merge!( {
107
- :sequest_log_filename => "C:\\Xcalibur\\sequest\\7MIX_STD_110802_1_sequest.log",
108
- :raw_filename => "C:\\Xcalibur\\data\\john\\sash7\\7MIX_STD_110802_1.RAW",
109
- :params_filename => "C:\\Xcalibur\\sequest\\john\\bioworks331\\sashimi7.params",
110
- :dta_log_filename => "C:\\Xcalibur\\sequest\\7MIX_STD_110802_1_dta.log",
111
- :version=>"3.5"
112
- } )
113
-
114
- File_35[:params].merge!( {
115
- "add_O_Ornithine"=>"0.00000", "add_F_Phenylalanine"=>"0.00000", "add_A_Alanine"=>"0.00000", "add_C_Cysteine"=>"0.00000", "add_Y_Tyrosine"=>"0.00000", "add_X_LorI"=>"0.00000", "add_J_user_amino_acid"=>"0.00000", "add_Cterm_peptide"=>"0.00000", "add_S_Serine"=>"0.00000", "add_Nterm_protein"=>"0.00000", "add_D_Aspartic_Acid"=>"0.00000", "add_Q_Glutamine"=>"0.00000", "add_K_Lysine"=>"0.00000", "add_R_Arginine"=>"0.00000", "add_W_Tryptophan"=>"0.00000", "add_Nterm_peptide"=>"0.00000", "add_H_Histidine"=>"0.00000", "add_L_Leucine"=>"0.00000", "add_I_Isoleucine"=>"0.00000", "add_N_Asparagine"=>"0.00000", "add_B_avg_NandD"=>"0.00000", "add_Z_avg_QandE"=>"0.00000", "add_E_Glutamic_Acid"=>"0.00000", "add_G_Glycine"=>"0.00000", "add_P_Proline"=>"0.00000", "add_M_Methionine"=>"0.00000", "add_Cterm_protein"=>"0.00000", "add_V_Valine"=>"0.00000", "add_T_Threonine"=>"0.00000", "add_U_user_amino_acid"=>"0.00000", "match_peak_tolerance"=>"1.00000", "match_peak_allowed_error"=>"1", "normalize_xcorr"=>"0", "nucleotide_reading_frame"=>"0", "num_results"=>"250", "sequence_header_filter"=>"", "diff_search_options"=>"0.000000 S 0.000000 C 0.000000 M 0.000000 X 0.000000 T 0.000000 Y", "partial_sequence"=>"", "max_num_internal_cleavage_sites"=>"2", "search_engine"=>"SEQUEST", "print_duplicate_references"=>"40", "ion_series"=>"0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0", "remove_precursor_peak"=>"0", "num_output_lines"=>"10", "second_database_name"=>"", "first_database_name"=>"C:\\Xcalibur\\database\\mixed_db_human_ecoli_7prot_unique.fasta", "peptide_mass_tolerance"=>"1.40000", "digest_mass_range"=>"400.0000 4500.0000", "enzyme_info"=>"Trypsin(KR/P) 1 1 KR P", "show_fragment_ions"=>"0", "protein_mass_filter"=>"0 0", "term_diff_search_options"=>"0.000000 0.000000", "num_description_lines"=>"5", "fragment_ion_tolerance"=>"1.00000", "peptide_mass_units"=>"0", "mass_type_parent"=>"0", "match_peak_count"=>"0", "max_num_differential_per_peptide"=>"1", "fragment_ion_units"=>"0", "ion_cutoff_percentage"=>"0.00000", "mass_type_fragment"=>"0"}
116
-
117
- )
107
+ File_33 = {}
108
+ File_32.each do |k,v|
109
+ File_33[k] = v.dup
110
+ end
118
111
 
119
- File_35[:out_files_first].merge!( {:computer=>'TESLA', :date_time=>'09/17/2007, 03:11 PM,'} )
120
- File_35[:out_files_last].merge!( {:computer=>'TESLA', :date_time=>'09/17/2007, 03:15 PM,'} )
121
- # I'm assuming this difference is due to higher precision mass...? (not a
122
- # parsing error)
123
- File_35[:out_files_first_last_pep][:rsp] = 56
124
- File_35[:out_files_last_last_pep][:rsp] = 125
125
-
112
+ ## Bioworks 3.3 (srf version 3.3)
113
+ File_33[:header][:raw_filename] = "C:\\Xcalibur\\data\\john\\021112-EcoliSol37-1\\020.RAW"
114
+ File_33[:header][:version] = "3.3"
126
115
 
127
- Header = {
128
- :db_filename => "C:\\Xcalibur\\database\\mixed_db_human_ecoli_7prot_unique.fasta",
129
- :ion_series => "ion series nABY ABCDVWXYZ: 0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0",
130
- :sequest_log_filename => "C:\\Xcalibur\\sequest\\john\\db_quest\\alone_new\\sash7\\7MIX_STD_110802_1_sequest.log",
131
- :raw_filename => "C:\\Xcalibur\\data\\john\\sashimi7\\7MIX_STD_110802_1.RAW",
132
- :enzyme => "Enzyme:Trypsin(KR/P) (2)",
133
- :params_filename => "C:\\Xcalibur\\sequest\\john\\db_quest\\alone_new\\sash7\\sash7.params",
134
- :modifications => "",
135
- :version => "3.2",
136
- :dta_log_filename => "C:\\Xcalibur\\sequest\\john\\db_quest\\alone_new\\sash7\\7MIX_STD_110802_1_dta.log",
137
- :model => "LCQ Deca XP",
138
- }
139
- ## DTA Gen
140
- Dta_gen = {
141
- :group_scan => 1,
142
- :start_time => 1.39999997615814,
143
- :start_scan => 1,
144
- :num_dta_files => 6952,
145
- :min_ion_threshold => 15,
146
- :end_mass => 4500.0,
147
- :min_group_count => 1,
148
- :start_mass => 400.0,
149
- :end_scan => 7161,
150
- }
151
-
152
- Dta_files_first = {
153
- :mh => 1221.88989257812,
154
- :dta_tic => 7703132.0,
155
- :num_peaks => 74,
156
- :charge => 1,
157
- :ms_level => 2,
158
- :total_num_possible_charge_states => 0,
159
- :peaks => "\346\214\271C\000p|F\340\016\335C\000D\fG\022l\335C\000\3604F\020\205\337C\000D~F\260\256\340C\000\020\347E\220\023\343C\000\220&F\020R\352C\000\244\313F\246\237\353C\000\360\032E\206\223\004D\000\204\030F\260\177\005D\000\346\220F \316\005D\000`\222F<\001\006D\000\356\217Fd\213\010D\000\336\tGr\314\vD\000\034}F\262\006\rD\000\026\221F\f\202!D\000\340\274E\302u#D\000\030\036Fl\275#D\000U\035G\254~&D\200\370\022H\364\315&D\000\346bGT\365&D\000\000\000@\300s5D\000`\307ET\2008D\000\3175G\310{:D\200\307\251G\230\311:D\000`5F\000\214<D\000\000\270E\254\301<D\000\340\024FX\264=D\000\270\021F\204\204?D\000\226\006H\356\256?D\000\000\000@\300\023@D\000\005\002Gb~BD\200\256\350G\032\312BD\000zAG\034\316CD\000\350\254E8\314DD\000\270\310E\316\020ED\000\010\254E\026\005QD\000\240\267E\250tSD\000tEFB\200VD\200\342\235G\374\247VD\000$\023F\000\206XD\200K\245G\242\303XD\000\343xG\270\201YD\000\214\325F\304\365ZD\0008\225FZF[D\000\230RF\232~[D@\r\201Hl\307[D\000L\031Hv\001\\D\000\3540Fx\201^D`\222\275H\f\305^D\000wZG\006\023oD\000\360\217E\354\205oD\200\335-H\350zrD\000\224,GFXtD\000\364\223F\222\201tD\200\221\341H\024\304tD\000)\034H\314\354tD\000\000\200@\022}wD\200\001\205I\274\274wD\000\t\210H\260\344wD\000\000pA\004\370yD\000@\203Eh\272\205D\2006\214Gh\336\205D\000\026\235Fb,\210D\200\177 H\\@\210D\240,\355Il`\210D\200\022\026I\320\202\210D\000 \336Fx\227\210D\000\000\200?\334{\212D\000<\252F4>\222D\000\264\213F\302\321\223D\000H\354Ed\275\230D\000-\fHv\332\230D@\313\tH\374\367\230D\000?\aG",
160
- }
161
- Dta_files_last = {
162
- :mh => 2604.8360326775,
163
- :dta_tic => 31977.0,
164
- :num_peaks => 17,
165
- :charge => 3,
166
- :ms_level => 2,
167
- :total_num_possible_charge_states => 0,
168
- :peaks => "4\n\216C\000`\305D\254\205\303C\000@;D\354\321\nD\000 \275D\232\243'D\000\020iE\350\2302D\000`\245D\f\3164D\000p@E\314JID\000\300\213D\264\002PD\000\260\016E\252\213[D\0000\eE\340NoD\000@\177D0\371xD\000@:Dd\f\205D\000\000yD\200\261\215D\000@\371D\210N\221D\000`\274D\034N\256D\000\020\032EN\372\266D\000\000\aD\356\223\322D\000\250\227E"
169
- }
116
+ File_33[:out_files_first][:computer] = 'TESLA'
117
+ File_33[:out_files_first][:date_time] = '04/24/2007, 10:41 AM,'
118
+ File_33[:out_files_last][:computer] = 'TESLA'
119
+ File_33[:out_files_last][:date_time] = '04/24/2007, 10:42 AM,'
170
120
 
171
- Out_files_first = {
172
- :num_hits => 10,
173
- :computer => "VELA",
174
- :date_time => "11/17/2006, 04:13 PM,",
175
- }
121
+ File_33[:out_files_first_pep][:sp] = 29.8535556793213
122
+ File_33[:out_files_last_pep][:sp] = 57.987476348877
123
+ File_33[:out_files_last_pep][:rsp] = 10
124
+ File_33[:out_files_last_pep][:deltacn_orig] = 0.835624694824219
176
125
 
177
- Out_files_first_hit = {
178
- :mh => 1220.5128044522,
179
- :deltacn => 0.071944423019886, ## this is the modified version
180
- :sp => 96.5815887451172,
181
- :xcorr => 1.08377742767334,
182
- :id => 224,
183
- :rsp => 13,
184
- :ions_matched => 8,
185
- :ions_total => 20,
186
- :sequence => "K.LCPHLTLLPGR.F",
187
- :aaseq => "LCPHLTLLPGR",
188
- :reference => "gi|1786425|gb|AAC73335.1| damage-inducible protein P; putative tRNA synthetase",
189
- :first_scan => 2,
190
- :last_scan => 2,
191
- :base_name => '7MIX_STD_110802_1',
192
- :charge => 1,
193
- }
194
126
 
195
- Out_files_last = {
196
- :num_hits => 10,
197
- :computer => "VELA",
198
- :date_time => "11/17/2006, 04:25 PM," ,
199
- }
200
- Out_files_last_first_hit = {
201
- :mh => 2605.9368784522,
202
- :deltacn => 0.03921128064394,
203
- :sp => 76.7447052001953,
204
- :xcorr => 0.915680646896362,
205
- :id => 13562,
206
- :rsp => 4,
207
- :ions_matched => 10,
208
- :ions_total => 84,
209
- :sequence => "K.HLEINPNHPIVETLRQKAETHK.N",
210
- :aaseq => "HLEINPNHPIVETLRQKAETHK",
211
- :reference => "gi|30149327|ref|XP_293672.2| similar to ebiP7687 [Homo sapiens]",
212
- :first_scan => 7161,
213
- :last_scan => 7161,
214
- :base_name => '7MIX_STD_110802_1',
215
- :deltamass => 2605.9368784522 - 2604.8360326775,
216
- :ppm => ((1.0e6 * (2605.9368784522 - 2604.8360326775)) / 2604.8360326775).abs,
217
- :charge => 3,
218
- }
219
- Out_files_last_last_hit = {
220
- :mh => 2604.9025174522,
221
- :deltacn => 1.1,
222
- :sp => 26.1511478424072,
223
- :xcorr => 0.634012818336487,
224
- :id => 8105,
225
- :rsp => 165,
226
- :ions_matched => 6,
227
- :ions_total => 84,
228
- :sequence => "R.EAFLVNSDLTLRAQLTEFRDHK.L",
229
- :aaseq => "EAFLVNSDLTLRAQLTEFRDHK",
230
- :reference => "gi|5453830|ref|NP_006181.1| origin recognition complex, subunit 2-like; origin",
231
- :first_scan => 7161,
232
- :last_scan => 7161,
233
- :base_name => '7MIX_STD_110802_1',
234
- :deltamass => 2604.9025174522 - 2604.8360326775,
235
- :ppm => ((1.0e6 * (2604.9025174522 - 2604.8360326775)) / 2604.8360326775).abs,
236
- :charge => 3,
237
- }
238
- Sequest_params = {
239
- "add_F_Phenylalanine"=>"0.0000",
240
- "add_O_Ornithine"=>"0.0000",
241
- "add_Y_Tyrosine"=>"0.0000",
242
- "add_C_Cysteine"=>"0.0000",
243
- "add_A_Alanine"=>"0.0000",
244
- "add_J_user_amino_acid"=>"0.0000",
245
- "add_X_LorI"=>"0.0000",
246
- "add_S_Serine"=>"0.0000",
247
- "add_Cterm_peptide"=>"0.0000",
248
- "add_Q_Glutamine"=>"0.0000",
249
- "add_D_Aspartic_Acid"=>"0.0000",
250
- "add_Nterm_protein"=>"0.0000",
251
- "add_W_Tryptophan"=>"0.0000",
252
- "add_R_Arginine"=>"0.0000",
253
- "add_K_Lysine"=>"0.0000",
254
- "add_H_Histidine"=>"0.0000",
255
- "add_Nterm_peptide"=>"0.0000",
256
- "add_E_Glutamic_Acid"=>"0.0000",
257
- "add_Z_avg_QandE"=>"0.0000",
258
- "add_B_avg_NandD"=>"0.0000",
259
- "add_N_Asparagine"=>"0.0000",
260
- "add_I_Isoleucine"=>"0.0000",
261
- "add_L_Leucine"=>"0.0000",
262
- "add_M_Methionine"=>"0.0000",
263
- "add_P_Proline"=>"0.0000",
264
- "add_G_Glycine"=>"0.0000",
265
- "add_U_user_amino_acid"=>"0.0000",
266
- "add_T_Threonine"=>"0.0000",
267
- "add_V_Valine"=>"0.0000",
268
- "add_Cterm_protein"=>"0.0000",
269
- "match_peak_tolerance"=>"1.0000",
270
- "match_peak_allowed_error"=>"1",
271
- "normalize_xcorr"=>"0",
272
- "nucleotide_reading_frame"=>"0",
273
- "num_results"=>"250",
274
- "sequence_header_filter"=>"",
275
- "diff_search_options"=>"0.000000 S 0.000000 C 0.000000 M 0.000000 X 0.000000 T 0.000000 Y",
276
- "partial_sequence"=>"",
277
- "max_num_internal_cleavage_sites"=>"2",
278
- "search_engine"=>"SEQUEST",
279
- "print_duplicate_references"=>"40",
280
- "ion_series"=>"0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0",
281
- "remove_precursor_peak"=>"0",
282
- "num_output_lines"=>"10",
283
- "second_database_name"=>"",
284
- "first_database_name"=>"C:\\Xcalibur\\database\\mixed_db_human_ecoli_7prot_unique.fasta",
285
- "peptide_mass_tolerance"=>"1.4000",
286
- "digest_mass_range"=>"600.0 3500.0",
287
- "enzyme_info"=>"Trypsin(KR/P) 1 1 KR P",
288
- "show_fragment_ions"=>"0",
289
- "protein_mass_filter"=>"0 0",
290
- "term_diff_search_options"=>"0.000000 0.000000",
291
- "num_description_lines"=>"5",
292
- "fragment_ion_tolerance"=>"1.0000",
293
- "peptide_mass_units"=>"0",
294
- "mass_type_parent"=>"0",
295
- "match_peak_count"=>"0",
296
- "max_num_differential_per_peptide"=>"1",
297
- "ion_cutoff_percentage"=>"0.0000",
298
- "mass_type_fragment"=>"0"
299
- }
127
+ ## Bioworks 3.3.1 (srf version 3.5)
128
+ File_331 = {}
129
+ File_33.each do |k,v|
130
+ File_331[k] = v.dup
131
+ end
132
+ File_331[:header][:raw_filename] = "C:\\Xcalibur\\data\\john\\opd1_2runs_2mods\\020.RAW"
133
+ File_331[:header][:version] = "3.5"
134
+ File_331[:out_files_first][:date_time] = '05/06/2008, 03:31 PM,'
135
+ File_331[:out_files_last][:date_time] = '05/06/2008, 03:32 PM,'
300
136
 
301
137
  end
302
138
 
139
+
@@ -85,6 +85,7 @@ describe 'creating a list of proteins from peptides', :shared => true do
85
85
  end
86
86
 
87
87
  describe SpecID, 'with generic proteins' do
88
+ include SpecID
88
89
  before(:all) do
89
90
  @prots = (0..7).map do |n|
90
91
  SpecID::GenericProt.new.set_from_hash({:reference => "prot_"+n.to_s, :peps => []})
@@ -95,6 +96,7 @@ describe SpecID, 'with generic proteins' do
95
96
  end
96
97
 
97
98
  describe SpecID, 'with array based proteins' do
99
+ include SpecID
98
100
  before(:all) do
99
101
  @prots = (0..7).map do |n|
100
102
  SRF::OUT::Prot.new.set_from_hash({:reference => "prot_"+n.to_s, :peps => []})
@@ -109,6 +111,7 @@ class TrueClass ; include Boolean end
109
111
  class FalseClass; include Boolean end
110
112
 
111
113
  describe SpecID, 'being created' do
114
+ include SpecID
112
115
  it 'can be from small bioworks.xml' do
113
116
  sp = SpecID.new(Tfiles + '/bioworks_small.xml')
114
117
  sp.prots.size.should == 106
@@ -4,6 +4,7 @@ require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
4
4
  require File.expand_path( File.dirname(__FILE__) + '/../transmem_spec_shared' )
5
5
 
6
6
  require 'transmem/toppred'
7
+ require 'yaml'
7
8
 
8
9
  describe TopPred::Index do
9
10
  before(:all) do