mspire 0.1.5 → 0.1.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. data/Rakefile +5 -2
  2. data/bin/bioworks_to_pepxml.rb +84 -40
  3. data/bin/fasta_shaker.rb +100 -0
  4. data/bin/filter_spec_id.rb +185 -23
  5. data/bin/gi2annot.rb +2 -110
  6. data/bin/id_class_anal.rb +31 -21
  7. data/bin/id_precision.rb +12 -8
  8. data/bin/{false_positive_rate.rb → precision.rb} +1 -1
  9. data/bin/protein_summary.rb +55 -62
  10. data/changelog.txt +34 -0
  11. data/lib/align.rb +0 -1
  12. data/lib/fasta.rb +88 -24
  13. data/lib/gi.rb +114 -0
  14. data/lib/roc.rb +64 -58
  15. data/lib/spec_id/aa_freqs.rb +166 -0
  16. data/lib/spec_id/bioworks.rb +5 -1
  17. data/lib/spec_id/precision.rb +427 -0
  18. data/lib/spec_id/proph.rb +2 -2
  19. data/lib/spec_id/sequest.rb +810 -113
  20. data/lib/spec_id/srf.rb +486 -0
  21. data/lib/spec_id.rb +107 -23
  22. data/release_notes.txt +11 -0
  23. data/script/estimate_fpr_by_cysteine.rb +226 -0
  24. data/script/filter-peps.rb +3 -3
  25. data/script/find_cysteine_background.rb +137 -0
  26. data/script/gen_database_searching.rb +11 -7
  27. data/script/genuine_tps_and_probs.rb +136 -0
  28. data/script/top_hit_per_scan.rb +5 -2
  29. data/test/tc_aa_freqs.rb +59 -0
  30. data/test/tc_bioworks.rb +6 -1
  31. data/test/tc_bioworks_to_pepxml.rb +25 -18
  32. data/test/tc_fasta.rb +81 -3
  33. data/test/tc_fasta_shaker.rb +147 -0
  34. data/test/tc_gi.rb +20 -0
  35. data/test/tc_id_class_anal.rb +9 -12
  36. data/test/tc_id_precision.rb +12 -11
  37. data/test/{tc_false_positive_rate.rb → tc_precision.rb} +13 -22
  38. data/test/tc_protein_summary.rb +31 -22
  39. data/test/tc_roc.rb +95 -50
  40. data/test/tc_sequest.rb +212 -145
  41. data/test/tc_spec.rb +10 -5
  42. data/test/tc_spec_id.rb +0 -2
  43. data/test/tc_spec_id_xml.rb +36 -0
  44. data/test/tc_srf.rb +216 -0
  45. metadata +35 -21
  46. data/lib/spec_id/false_positive_rate.rb +0 -476
  47. data/test/tc_gi2annot.rb +0 -12
data/test/tc_srf.rb ADDED
@@ -0,0 +1,216 @@
1
+
2
+ require 'test/unit'
3
+ require 'spec_id/srf'
4
+
5
+
6
+ module ToMatch
7
+ Header = {
8
+ :db_filename => "C:\\Xcalibur\\database\\mixed_db_human_ecoli_7prot_unique.fasta",
9
+ :ion_series => "ion series nABY ABCDVWXYZ: 0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0",
10
+ :sequest_log_filename => "C:\\Xcalibur\\sequest\\john\\db_quest\\alone_new\\sash7\\7MIX_STD_110802_1_sequest.log",
11
+ :raw_filename => "C:\\Xcalibur\\data\\john\\sashimi7\\7MIX_STD_110802_1.RAW",
12
+ :enzyme => "Enzyme:Trypsin(KR/P) (2)",
13
+ :params_filename => "C:\\Xcalibur\\sequest\\john\\db_quest\\alone_new\\sash7\\sash7.params",
14
+ :modifications => "",
15
+ :version => "3.2",
16
+ :dta_log_filename => "C:\\Xcalibur\\sequest\\john\\db_quest\\alone_new\\sash7\\7MIX_STD_110802_1_dta.log",
17
+ :model => "LCQ Deca XP",
18
+ }
19
+ ## DTA Gen
20
+ Dta_gen = {
21
+ :group_scan => 1,
22
+ :start_time => 1.39999997615814,
23
+ :start_scan => 1,
24
+ :num_dta_files => 6952,
25
+ :min_ion_threshold => 15,
26
+ :end_mass => 4500.0,
27
+ :min_group_count => 1,
28
+ :start_mass => 400.0,
29
+ :end_scan => 7161,
30
+ }
31
+
32
+ Dta_files_first = {
33
+ :mh => 1221.88989257812,
34
+ :dta_tic => 7703132.0,
35
+ :num_peaks => 74,
36
+ :charge => 1,
37
+ :ms_level => 2,
38
+ :total_num_possible_charge_states => 0,
39
+ :peaks => "\346\214\271C\000p|F\340\016\335C\000D\fG\022l\335C\000\3604F\020\205\337C\000D~F\260\256\340C\000\020\347E\220\023\343C\000\220&F\020R\352C\000\244\313F\246\237\353C\000\360\032E\206\223\004D\000\204\030F\260\177\005D\000\346\220F \316\005D\000`\222F<\001\006D\000\356\217Fd\213\010D\000\336\tGr\314\vD\000\034}F\262\006\rD\000\026\221F\f\202!D\000\340\274E\302u#D\000\030\036Fl\275#D\000U\035G\254~&D\200\370\022H\364\315&D\000\346bGT\365&D\000\000\000@\300s5D\000`\307ET\2008D\000\3175G\310{:D\200\307\251G\230\311:D\000`5F\000\214<D\000\000\270E\254\301<D\000\340\024FX\264=D\000\270\021F\204\204?D\000\226\006H\356\256?D\000\000\000@\300\023@D\000\005\002Gb~BD\200\256\350G\032\312BD\000zAG\034\316CD\000\350\254E8\314DD\000\270\310E\316\020ED\000\010\254E\026\005QD\000\240\267E\250tSD\000tEFB\200VD\200\342\235G\374\247VD\000$\023F\000\206XD\200K\245G\242\303XD\000\343xG\270\201YD\000\214\325F\304\365ZD\0008\225FZF[D\000\230RF\232~[D@\r\201Hl\307[D\000L\031Hv\001\\D\000\3540Fx\201^D`\222\275H\f\305^D\000wZG\006\023oD\000\360\217E\354\205oD\200\335-H\350zrD\000\224,GFXtD\000\364\223F\222\201tD\200\221\341H\024\304tD\000)\034H\314\354tD\000\000\200@\022}wD\200\001\205I\274\274wD\000\t\210H\260\344wD\000\000pA\004\370yD\000@\203Eh\272\205D\2006\214Gh\336\205D\000\026\235Fb,\210D\200\177 H\\@\210D\240,\355Il`\210D\200\022\026I\320\202\210D\000 \336Fx\227\210D\000\000\200?\334{\212D\000<\252F4>\222D\000\264\213F\302\321\223D\000H\354Ed\275\230D\000-\fHv\332\230D@\313\tH\374\367\230D\000?\aG",
40
+ }
41
+ Dta_files_last = {
42
+ :mh => 2604.8360326775,
43
+ :dta_tic => 31977.0,
44
+ :num_peaks => 17,
45
+ :charge => 3,
46
+ :ms_level => 2,
47
+ :total_num_possible_charge_states => 0,
48
+ :peaks => "4\n\216C\000`\305D\254\205\303C\000@;D\354\321\nD\000 \275D\232\243'D\000\020iE\350\2302D\000`\245D\f\3164D\000p@E\314JID\000\300\213D\264\002PD\000\260\016E\252\213[D\0000\eE\340NoD\000@\177D0\371xD\000@:Dd\f\205D\000\000yD\200\261\215D\000@\371D\210N\221D\000`\274D\034N\256D\000\020\032EN\372\266D\000\000\aD\356\223\322D\000\250\227E"
49
+ }
50
+
51
+ Out_files_first = {
52
+ :num_hits => 10,
53
+ :charge => 1,
54
+ :computer => "VELA",
55
+ :date_time => "11/17/2006, 04:13 PM,",
56
+ }
57
+
58
+ Out_files_first_hit = {
59
+ :mh => 1220.5128044522,
60
+ :deltacn => 0.0,
61
+ :sp => 96.5815887451172,
62
+ :xcorr => 1.08377742767334,
63
+ :id => 224,
64
+ :rsp => 13,
65
+ :ions_matched => 8,
66
+ :ions_total => 20,
67
+ :peptide => "K.LCPHLTLLPGR.F",
68
+ :reference => "gi|1786425|gb|AAC73335.1| damage-inducible protein P; putative tRNA synthetase",
69
+ }
70
+
71
+ Out_files_last = {
72
+ :num_hits => 10,
73
+ :charge => 1,
74
+ :computer => "VELA",
75
+ :date_time => "11/17/2006, 04:25 PM," ,
76
+ }
77
+ Out_files_last_first_hit = {
78
+ :mh => 2605.9368784522,
79
+ :deltacn => 0.0,
80
+ :sp => 76.7447052001953,
81
+ :xcorr => 0.915680646896362,
82
+ :id => 13562,
83
+ :rsp => 4,
84
+ :ions_matched => 10,
85
+ :ions_total => 84,
86
+ :peptide => "K.HLEINPNHPIVETLRQKAETHK.N",
87
+ :reference => "gi|30149327|ref|XP_293672.2| similar to ebiP7687 [Homo sapiens]",
88
+ }
89
+ Out_files_last_last_hit = {
90
+ :mh => 2604.9025174522,
91
+ :deltacn => 0.307604849338531,
92
+ :sp => 26.1511478424072,
93
+ :xcorr => 0.634012818336487,
94
+ :id => 8105,
95
+ :rsp => 165,
96
+ :ions_matched => 6,
97
+ :ions_total => 84,
98
+ :peptide => "R.EAFLVNSDLTLRAQLTEFRDHK.L",
99
+ :reference => "gi|5453830|ref|NP_006181.1| origin recognition complex, subunit 2-like; origin"
100
+ }
101
+ Sequest_params = {
102
+ "add_F_Phenylalanine"=>"0.0000",
103
+ "add_O_Ornithine"=>"0.0000",
104
+ "add_Y_Tyrosine"=>"0.0000",
105
+ "add_C_Cysteine"=>"0.0000",
106
+ "add_A_Alanine"=>"0.0000",
107
+ "add_J_user_amino_acid"=>"0.0000",
108
+ "add_X_LorI"=>"0.0000",
109
+ "add_S_Serine"=>"0.0000",
110
+ "add_Cterm_peptide"=>"0.0000",
111
+ "add_Q_Glutamine"=>"0.0000",
112
+ "add_D_Aspartic_Acid"=>"0.0000",
113
+ "add_Nterm_protein"=>"0.0000",
114
+ "add_W_Tryptophan"=>"0.0000",
115
+ "add_R_Arginine"=>"0.0000",
116
+ "add_K_Lysine"=>"0.0000",
117
+ "add_H_Histidine"=>"0.0000",
118
+ "add_Nterm_peptide"=>"0.0000",
119
+ "add_E_Glutamic_Acid"=>"0.0000",
120
+ "add_Z_avg_QandE"=>"0.0000",
121
+ "add_B_avg_NandD"=>"0.0000",
122
+ "add_N_Asparagine"=>"0.0000",
123
+ "add_I_Isoleucine"=>"0.0000",
124
+ "add_L_Leucine"=>"0.0000",
125
+ "add_M_Methionine"=>"0.0000",
126
+ "add_P_Proline"=>"0.0000",
127
+ "add_G_Glycine"=>"0.0000",
128
+ "add_U_user_amino_acid"=>"0.0000",
129
+ "add_T_Threonine"=>"0.0000",
130
+ "add_V_Valine"=>"0.0000",
131
+ "add_Cterm_protein"=>"0.0000",
132
+ "match_peak_tolerance"=>"1.0000",
133
+ "match_peak_allowed_error"=>"1",
134
+ "normalize_xcorr"=>"0",
135
+ "nucleotide_reading_frame"=>"0",
136
+ "num_results"=>"250",
137
+ "sequence_header_filter"=>"",
138
+ "diff_search_options"=>"0.000000 S 0.000000 C 0.000000 M 0.000000 X 0.000000 T 0.000000 Y",
139
+ "partial_sequence"=>"",
140
+ "max_num_internal_cleavage_sites"=>"2",
141
+ "search_engine"=>"SEQUEST",
142
+ "print_duplicate_references"=>"40",
143
+ "ion_series"=>"0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0",
144
+ "remove_precursor_peak"=>"0",
145
+ "num_output_lines"=>"10",
146
+ "second_database_name"=>"",
147
+ "first_database_name"=>"C:\\Xcalibur\\database\\mixed_db_human_ecoli_7prot_unique.fasta",
148
+ "peptide_mass_tolerance"=>"1.4000",
149
+ "digest_mass_range"=>"600.0 3500.0",
150
+ "enzyme_info"=>"Trypsin(KR/P) 1 1 KR P",
151
+ "show_fragment_ions"=>"0",
152
+ "protein_mass_filter"=>"0 0",
153
+ "term_diff_search_options"=>"0.000000 0.000000",
154
+ "num_description_lines"=>"5",
155
+ "fragment_ion_tolerance"=>"1.0000",
156
+ "peptide_mass_units"=>"0",
157
+ "mass_type_parent"=>"0",
158
+ "match_peak_count"=>"0",
159
+ "max_num_differential_per_peptide"=>"1",
160
+ "ion_cutoff_percentage"=>"0.0000",
161
+ "mass_type_fragment"=>"0"
162
+ }
163
+
164
+ end
165
+
166
+ class TestSRF < Test::Unit::TestCase
167
+ include ToMatch
168
+ def initialize(arg)
169
+ super(arg)
170
+ @tfiles = File.dirname(__FILE__) + '/tfiles/'
171
+ @tfiles_l = File.dirname(__FILE__) + '/tfiles_large/'
172
+ @tf_srf = @tfiles_l + "7MIX_STD_110802_1.srf"
173
+ end
174
+
175
+ def test_basic
176
+ start = Time.now
177
+ obj = SRF.new(@tf_srf)
178
+ puts "TOOK: #{Time.now - start} secs"
179
+ ## Verify that we have everything and it is as we expect (not exhaustive)
180
+ head = obj.header
181
+ dtgen = head.dta_gen
182
+ ## HEADER
183
+ hash_match(Header, head)
184
+ hash_match(Dta_gen, dtgen)
185
+ ## DTA_FILES
186
+ hash_match(Dta_files_first, obj.dta_files.first)
187
+ hash_match(Dta_files_last, obj.dta_files.last)
188
+ ## OUT_FILES
189
+ hash_match(Out_files_first, obj.out_files.first)
190
+ hash_match(Out_files_first_hit, obj.out_files.first.hits.first)
191
+ hash_match(Out_files_last_first_hit, obj.out_files.last.hits.first)
192
+ hash_match(Out_files_last_last_hit, obj.out_files.last.hits.last)
193
+ ## SEQUEST_PARAMS
194
+ hash_match(Sequest_params, obj.params)
195
+ ## INDEX
196
+ assert_equal([7161, 7161, 3], obj.index.last)
197
+ assert_equal([2, 2, 1], obj.index.first)
198
+
199
+ assert_equal(obj.index.size, obj.dta_files.size)
200
+ assert_equal(obj.out_files.size, obj.dta_files.size)
201
+ end
202
+
203
+ def hash_match(hash, obj)
204
+ hash.each do |k,v|
205
+ if v.is_a? Float
206
+ delta = v/100000
207
+ assert_in_delta( obj.send(k.to_sym), v, delta, "param: #{k}")
208
+ else
209
+ assert_equal(obj.send(k.to_sym), v, "param: #{k}")
210
+ end
211
+ end
212
+ end
213
+
214
+ end
215
+
216
+
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0
3
3
  specification_version: 1
4
4
  name: mspire
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.1.5
7
- date: 2007-02-08 00:00:00 -06:00
6
+ version: 0.1.7
7
+ date: 2007-03-27 00:00:00 -05:00
8
8
  summary: Mass Spectrometry Proteomics Objects, Scripts, and Executables
9
9
  require_paths:
10
10
  - lib
@@ -32,15 +32,18 @@ files:
32
32
  - lib/spec_id
33
33
  - lib/align
34
34
  - lib/spec
35
- - lib/spec_id.rb
36
- - lib/fasta.rb
37
35
  - lib/sample_enzyme.rb
38
- - lib/align.rb
36
+ - lib/fasta.rb
39
37
  - lib/roc.rb
38
+ - lib/spec_id.rb
39
+ - lib/gi.rb
40
+ - lib/align.rb
41
+ - lib/spec_id/srf.rb
40
42
  - lib/spec_id/bioworks.rb
41
43
  - lib/spec_id/proph.rb
42
44
  - lib/spec_id/sequest.rb
43
- - lib/spec_id/false_positive_rate.rb
45
+ - lib/spec_id/precision.rb
46
+ - lib/spec_id/aa_freqs.rb
44
47
  - lib/align/chams.rb
45
48
  - lib/spec/mzxml
46
49
  - lib/spec/mzdata
@@ -54,6 +57,8 @@ files:
54
57
  - README
55
58
  - Rakefile
56
59
  - LICENSE
60
+ - changelog.txt
61
+ - release_notes.txt
57
62
  - bin/fasta_cat_mod.rb
58
63
  - bin/fasta_mod.rb
59
64
  - bin/gi2annot.rb
@@ -63,45 +68,53 @@ files:
63
68
  - bin/bioworks2sequestXML_gui.rb
64
69
  - bin/bioworks2excel.rb
65
70
  - bin/pepproph_filter.rb
71
+ - bin/filter_spec_id.rb
66
72
  - bin/bioworks_to_pepxml.rb
73
+ - bin/mzxml_to_lmat.rb
67
74
  - bin/protxml2prots_peps.rb
68
- - bin/false_positive_rate.rb
69
75
  - bin/id_precision.rb
70
- - bin/mzxml_to_lmat.rb
71
76
  - bin/id_class_anal.rb
72
- - bin/filter_spec_id.rb
73
- - script/create_little_pepxml.rb
77
+ - bin/precision.rb
78
+ - bin/fasta_shaker.rb
79
+ - script/prep_dir.rb
80
+ - script/msvis.rb
81
+ - script/gen_database_searching.rb
74
82
  - script/mzXML2timeIndex.rb
75
83
  - script/tpp_installer.rb
76
- - script/msvis.rb
84
+ - script/create_little_pepxml.rb
77
85
  - script/histogram_probs.rb
78
- - script/prep_dir.rb
79
86
  - script/top_hit_per_scan.rb
80
87
  - script/filter-peps.rb
81
- - script/gen_database_searching.rb
82
88
  - script/simple_protein_digestion.rb
89
+ - script/genuine_tps_and_probs.rb
90
+ - script/estimate_fpr_by_cysteine.rb
91
+ - script/find_cysteine_background.rb
83
92
  test_files:
84
- - test/tc_false_positive_rate.rb
93
+ - test/tc_srf.rb
85
94
  - test/tc_proph.rb
86
95
  - test/tc_sequest.rb
87
- - test/tc_spec.rb
88
96
  - test/tc_align.rb
97
+ - test/tc_spec.rb
98
+ - test/tc_aa_freqs.rb
89
99
  - test/tc_protein_summary.rb
90
100
  - test/tc_fasta.rb
91
101
  - test/tc_bioworks.rb
92
- - test/tc_msrun.rb
93
102
  - test/tc_peptide_parent_times.rb
103
+ - test/tc_msrun.rb
94
104
  - test/tc_spec_id.rb
95
105
  - test/tc_mzxml.rb
96
106
  - test/tc_id_precision.rb
97
107
  - test/tc_id_class_anal.rb
98
108
  - test/tc_filter_peps.rb
99
- - test/tc_gi2annot.rb
109
+ - test/tc_precision.rb
100
110
  - test/tc_roc.rb
101
111
  - test/tc_scan.rb
102
- - test/tc_bioworks_to_pepxml.rb
103
112
  - test/tc_mzxml_to_lmat.rb
113
+ - test/tc_bioworks_to_pepxml.rb
104
114
  - test/tc_sample_enzyme.rb
115
+ - test/tc_fasta_shaker.rb
116
+ - test/tc_gi.rb
117
+ - test/tc_spec_id_xml.rb
105
118
  rdoc_options:
106
119
  - --main
107
120
  - README
@@ -121,13 +134,14 @@ executables:
121
134
  - bioworks2sequestXML_gui.rb
122
135
  - bioworks2excel.rb
123
136
  - pepproph_filter.rb
137
+ - filter_spec_id.rb
124
138
  - bioworks_to_pepxml.rb
139
+ - mzxml_to_lmat.rb
125
140
  - protxml2prots_peps.rb
126
- - false_positive_rate.rb
127
141
  - id_precision.rb
128
- - mzxml_to_lmat.rb
129
142
  - id_class_anal.rb
130
- - filter_spec_id.rb
143
+ - precision.rb
144
+ - fasta_shaker.rb
131
145
  extensions: []
132
146
 
133
147
  requirements: