mspire 0.1.5 → 0.1.7
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +5 -2
- data/bin/bioworks_to_pepxml.rb +84 -40
- data/bin/fasta_shaker.rb +100 -0
- data/bin/filter_spec_id.rb +185 -23
- data/bin/gi2annot.rb +2 -110
- data/bin/id_class_anal.rb +31 -21
- data/bin/id_precision.rb +12 -8
- data/bin/{false_positive_rate.rb → precision.rb} +1 -1
- data/bin/protein_summary.rb +55 -62
- data/changelog.txt +34 -0
- data/lib/align.rb +0 -1
- data/lib/fasta.rb +88 -24
- data/lib/gi.rb +114 -0
- data/lib/roc.rb +64 -58
- data/lib/spec_id/aa_freqs.rb +166 -0
- data/lib/spec_id/bioworks.rb +5 -1
- data/lib/spec_id/precision.rb +427 -0
- data/lib/spec_id/proph.rb +2 -2
- data/lib/spec_id/sequest.rb +810 -113
- data/lib/spec_id/srf.rb +486 -0
- data/lib/spec_id.rb +107 -23
- data/release_notes.txt +11 -0
- data/script/estimate_fpr_by_cysteine.rb +226 -0
- data/script/filter-peps.rb +3 -3
- data/script/find_cysteine_background.rb +137 -0
- data/script/gen_database_searching.rb +11 -7
- data/script/genuine_tps_and_probs.rb +136 -0
- data/script/top_hit_per_scan.rb +5 -2
- data/test/tc_aa_freqs.rb +59 -0
- data/test/tc_bioworks.rb +6 -1
- data/test/tc_bioworks_to_pepxml.rb +25 -18
- data/test/tc_fasta.rb +81 -3
- data/test/tc_fasta_shaker.rb +147 -0
- data/test/tc_gi.rb +20 -0
- data/test/tc_id_class_anal.rb +9 -12
- data/test/tc_id_precision.rb +12 -11
- data/test/{tc_false_positive_rate.rb → tc_precision.rb} +13 -22
- data/test/tc_protein_summary.rb +31 -22
- data/test/tc_roc.rb +95 -50
- data/test/tc_sequest.rb +212 -145
- data/test/tc_spec.rb +10 -5
- data/test/tc_spec_id.rb +0 -2
- data/test/tc_spec_id_xml.rb +36 -0
- data/test/tc_srf.rb +216 -0
- metadata +35 -21
- data/lib/spec_id/false_positive_rate.rb +0 -476
- data/test/tc_gi2annot.rb +0 -12
data/test/tc_srf.rb
ADDED
@@ -0,0 +1,216 @@
|
|
1
|
+
|
2
|
+
require 'test/unit'
|
3
|
+
require 'spec_id/srf'
|
4
|
+
|
5
|
+
|
6
|
+
module ToMatch
|
7
|
+
Header = {
|
8
|
+
:db_filename => "C:\\Xcalibur\\database\\mixed_db_human_ecoli_7prot_unique.fasta",
|
9
|
+
:ion_series => "ion series nABY ABCDVWXYZ: 0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0",
|
10
|
+
:sequest_log_filename => "C:\\Xcalibur\\sequest\\john\\db_quest\\alone_new\\sash7\\7MIX_STD_110802_1_sequest.log",
|
11
|
+
:raw_filename => "C:\\Xcalibur\\data\\john\\sashimi7\\7MIX_STD_110802_1.RAW",
|
12
|
+
:enzyme => "Enzyme:Trypsin(KR/P) (2)",
|
13
|
+
:params_filename => "C:\\Xcalibur\\sequest\\john\\db_quest\\alone_new\\sash7\\sash7.params",
|
14
|
+
:modifications => "",
|
15
|
+
:version => "3.2",
|
16
|
+
:dta_log_filename => "C:\\Xcalibur\\sequest\\john\\db_quest\\alone_new\\sash7\\7MIX_STD_110802_1_dta.log",
|
17
|
+
:model => "LCQ Deca XP",
|
18
|
+
}
|
19
|
+
## DTA Gen
|
20
|
+
Dta_gen = {
|
21
|
+
:group_scan => 1,
|
22
|
+
:start_time => 1.39999997615814,
|
23
|
+
:start_scan => 1,
|
24
|
+
:num_dta_files => 6952,
|
25
|
+
:min_ion_threshold => 15,
|
26
|
+
:end_mass => 4500.0,
|
27
|
+
:min_group_count => 1,
|
28
|
+
:start_mass => 400.0,
|
29
|
+
:end_scan => 7161,
|
30
|
+
}
|
31
|
+
|
32
|
+
Dta_files_first = {
|
33
|
+
:mh => 1221.88989257812,
|
34
|
+
:dta_tic => 7703132.0,
|
35
|
+
:num_peaks => 74,
|
36
|
+
:charge => 1,
|
37
|
+
:ms_level => 2,
|
38
|
+
:total_num_possible_charge_states => 0,
|
39
|
+
:peaks => "\346\214\271C\000p|F\340\016\335C\000D\fG\022l\335C\000\3604F\020\205\337C\000D~F\260\256\340C\000\020\347E\220\023\343C\000\220&F\020R\352C\000\244\313F\246\237\353C\000\360\032E\206\223\004D\000\204\030F\260\177\005D\000\346\220F \316\005D\000`\222F<\001\006D\000\356\217Fd\213\010D\000\336\tGr\314\vD\000\034}F\262\006\rD\000\026\221F\f\202!D\000\340\274E\302u#D\000\030\036Fl\275#D\000U\035G\254~&D\200\370\022H\364\315&D\000\346bGT\365&D\000\000\000@\300s5D\000`\307ET\2008D\000\3175G\310{:D\200\307\251G\230\311:D\000`5F\000\214<D\000\000\270E\254\301<D\000\340\024FX\264=D\000\270\021F\204\204?D\000\226\006H\356\256?D\000\000\000@\300\023@D\000\005\002Gb~BD\200\256\350G\032\312BD\000zAG\034\316CD\000\350\254E8\314DD\000\270\310E\316\020ED\000\010\254E\026\005QD\000\240\267E\250tSD\000tEFB\200VD\200\342\235G\374\247VD\000$\023F\000\206XD\200K\245G\242\303XD\000\343xG\270\201YD\000\214\325F\304\365ZD\0008\225FZF[D\000\230RF\232~[D@\r\201Hl\307[D\000L\031Hv\001\\D\000\3540Fx\201^D`\222\275H\f\305^D\000wZG\006\023oD\000\360\217E\354\205oD\200\335-H\350zrD\000\224,GFXtD\000\364\223F\222\201tD\200\221\341H\024\304tD\000)\034H\314\354tD\000\000\200@\022}wD\200\001\205I\274\274wD\000\t\210H\260\344wD\000\000pA\004\370yD\000@\203Eh\272\205D\2006\214Gh\336\205D\000\026\235Fb,\210D\200\177 H\\@\210D\240,\355Il`\210D\200\022\026I\320\202\210D\000 \336Fx\227\210D\000\000\200?\334{\212D\000<\252F4>\222D\000\264\213F\302\321\223D\000H\354Ed\275\230D\000-\fHv\332\230D@\313\tH\374\367\230D\000?\aG",
|
40
|
+
}
|
41
|
+
Dta_files_last = {
|
42
|
+
:mh => 2604.8360326775,
|
43
|
+
:dta_tic => 31977.0,
|
44
|
+
:num_peaks => 17,
|
45
|
+
:charge => 3,
|
46
|
+
:ms_level => 2,
|
47
|
+
:total_num_possible_charge_states => 0,
|
48
|
+
:peaks => "4\n\216C\000`\305D\254\205\303C\000@;D\354\321\nD\000 \275D\232\243'D\000\020iE\350\2302D\000`\245D\f\3164D\000p@E\314JID\000\300\213D\264\002PD\000\260\016E\252\213[D\0000\eE\340NoD\000@\177D0\371xD\000@:Dd\f\205D\000\000yD\200\261\215D\000@\371D\210N\221D\000`\274D\034N\256D\000\020\032EN\372\266D\000\000\aD\356\223\322D\000\250\227E"
|
49
|
+
}
|
50
|
+
|
51
|
+
Out_files_first = {
|
52
|
+
:num_hits => 10,
|
53
|
+
:charge => 1,
|
54
|
+
:computer => "VELA",
|
55
|
+
:date_time => "11/17/2006, 04:13 PM,",
|
56
|
+
}
|
57
|
+
|
58
|
+
Out_files_first_hit = {
|
59
|
+
:mh => 1220.5128044522,
|
60
|
+
:deltacn => 0.0,
|
61
|
+
:sp => 96.5815887451172,
|
62
|
+
:xcorr => 1.08377742767334,
|
63
|
+
:id => 224,
|
64
|
+
:rsp => 13,
|
65
|
+
:ions_matched => 8,
|
66
|
+
:ions_total => 20,
|
67
|
+
:peptide => "K.LCPHLTLLPGR.F",
|
68
|
+
:reference => "gi|1786425|gb|AAC73335.1| damage-inducible protein P; putative tRNA synthetase",
|
69
|
+
}
|
70
|
+
|
71
|
+
Out_files_last = {
|
72
|
+
:num_hits => 10,
|
73
|
+
:charge => 1,
|
74
|
+
:computer => "VELA",
|
75
|
+
:date_time => "11/17/2006, 04:25 PM," ,
|
76
|
+
}
|
77
|
+
Out_files_last_first_hit = {
|
78
|
+
:mh => 2605.9368784522,
|
79
|
+
:deltacn => 0.0,
|
80
|
+
:sp => 76.7447052001953,
|
81
|
+
:xcorr => 0.915680646896362,
|
82
|
+
:id => 13562,
|
83
|
+
:rsp => 4,
|
84
|
+
:ions_matched => 10,
|
85
|
+
:ions_total => 84,
|
86
|
+
:peptide => "K.HLEINPNHPIVETLRQKAETHK.N",
|
87
|
+
:reference => "gi|30149327|ref|XP_293672.2| similar to ebiP7687 [Homo sapiens]",
|
88
|
+
}
|
89
|
+
Out_files_last_last_hit = {
|
90
|
+
:mh => 2604.9025174522,
|
91
|
+
:deltacn => 0.307604849338531,
|
92
|
+
:sp => 26.1511478424072,
|
93
|
+
:xcorr => 0.634012818336487,
|
94
|
+
:id => 8105,
|
95
|
+
:rsp => 165,
|
96
|
+
:ions_matched => 6,
|
97
|
+
:ions_total => 84,
|
98
|
+
:peptide => "R.EAFLVNSDLTLRAQLTEFRDHK.L",
|
99
|
+
:reference => "gi|5453830|ref|NP_006181.1| origin recognition complex, subunit 2-like; origin"
|
100
|
+
}
|
101
|
+
Sequest_params = {
|
102
|
+
"add_F_Phenylalanine"=>"0.0000",
|
103
|
+
"add_O_Ornithine"=>"0.0000",
|
104
|
+
"add_Y_Tyrosine"=>"0.0000",
|
105
|
+
"add_C_Cysteine"=>"0.0000",
|
106
|
+
"add_A_Alanine"=>"0.0000",
|
107
|
+
"add_J_user_amino_acid"=>"0.0000",
|
108
|
+
"add_X_LorI"=>"0.0000",
|
109
|
+
"add_S_Serine"=>"0.0000",
|
110
|
+
"add_Cterm_peptide"=>"0.0000",
|
111
|
+
"add_Q_Glutamine"=>"0.0000",
|
112
|
+
"add_D_Aspartic_Acid"=>"0.0000",
|
113
|
+
"add_Nterm_protein"=>"0.0000",
|
114
|
+
"add_W_Tryptophan"=>"0.0000",
|
115
|
+
"add_R_Arginine"=>"0.0000",
|
116
|
+
"add_K_Lysine"=>"0.0000",
|
117
|
+
"add_H_Histidine"=>"0.0000",
|
118
|
+
"add_Nterm_peptide"=>"0.0000",
|
119
|
+
"add_E_Glutamic_Acid"=>"0.0000",
|
120
|
+
"add_Z_avg_QandE"=>"0.0000",
|
121
|
+
"add_B_avg_NandD"=>"0.0000",
|
122
|
+
"add_N_Asparagine"=>"0.0000",
|
123
|
+
"add_I_Isoleucine"=>"0.0000",
|
124
|
+
"add_L_Leucine"=>"0.0000",
|
125
|
+
"add_M_Methionine"=>"0.0000",
|
126
|
+
"add_P_Proline"=>"0.0000",
|
127
|
+
"add_G_Glycine"=>"0.0000",
|
128
|
+
"add_U_user_amino_acid"=>"0.0000",
|
129
|
+
"add_T_Threonine"=>"0.0000",
|
130
|
+
"add_V_Valine"=>"0.0000",
|
131
|
+
"add_Cterm_protein"=>"0.0000",
|
132
|
+
"match_peak_tolerance"=>"1.0000",
|
133
|
+
"match_peak_allowed_error"=>"1",
|
134
|
+
"normalize_xcorr"=>"0",
|
135
|
+
"nucleotide_reading_frame"=>"0",
|
136
|
+
"num_results"=>"250",
|
137
|
+
"sequence_header_filter"=>"",
|
138
|
+
"diff_search_options"=>"0.000000 S 0.000000 C 0.000000 M 0.000000 X 0.000000 T 0.000000 Y",
|
139
|
+
"partial_sequence"=>"",
|
140
|
+
"max_num_internal_cleavage_sites"=>"2",
|
141
|
+
"search_engine"=>"SEQUEST",
|
142
|
+
"print_duplicate_references"=>"40",
|
143
|
+
"ion_series"=>"0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0",
|
144
|
+
"remove_precursor_peak"=>"0",
|
145
|
+
"num_output_lines"=>"10",
|
146
|
+
"second_database_name"=>"",
|
147
|
+
"first_database_name"=>"C:\\Xcalibur\\database\\mixed_db_human_ecoli_7prot_unique.fasta",
|
148
|
+
"peptide_mass_tolerance"=>"1.4000",
|
149
|
+
"digest_mass_range"=>"600.0 3500.0",
|
150
|
+
"enzyme_info"=>"Trypsin(KR/P) 1 1 KR P",
|
151
|
+
"show_fragment_ions"=>"0",
|
152
|
+
"protein_mass_filter"=>"0 0",
|
153
|
+
"term_diff_search_options"=>"0.000000 0.000000",
|
154
|
+
"num_description_lines"=>"5",
|
155
|
+
"fragment_ion_tolerance"=>"1.0000",
|
156
|
+
"peptide_mass_units"=>"0",
|
157
|
+
"mass_type_parent"=>"0",
|
158
|
+
"match_peak_count"=>"0",
|
159
|
+
"max_num_differential_per_peptide"=>"1",
|
160
|
+
"ion_cutoff_percentage"=>"0.0000",
|
161
|
+
"mass_type_fragment"=>"0"
|
162
|
+
}
|
163
|
+
|
164
|
+
end
|
165
|
+
|
166
|
+
class TestSRF < Test::Unit::TestCase
|
167
|
+
include ToMatch
|
168
|
+
def initialize(arg)
|
169
|
+
super(arg)
|
170
|
+
@tfiles = File.dirname(__FILE__) + '/tfiles/'
|
171
|
+
@tfiles_l = File.dirname(__FILE__) + '/tfiles_large/'
|
172
|
+
@tf_srf = @tfiles_l + "7MIX_STD_110802_1.srf"
|
173
|
+
end
|
174
|
+
|
175
|
+
def test_basic
|
176
|
+
start = Time.now
|
177
|
+
obj = SRF.new(@tf_srf)
|
178
|
+
puts "TOOK: #{Time.now - start} secs"
|
179
|
+
## Verify that we have everything and it is as we expect (not exhaustive)
|
180
|
+
head = obj.header
|
181
|
+
dtgen = head.dta_gen
|
182
|
+
## HEADER
|
183
|
+
hash_match(Header, head)
|
184
|
+
hash_match(Dta_gen, dtgen)
|
185
|
+
## DTA_FILES
|
186
|
+
hash_match(Dta_files_first, obj.dta_files.first)
|
187
|
+
hash_match(Dta_files_last, obj.dta_files.last)
|
188
|
+
## OUT_FILES
|
189
|
+
hash_match(Out_files_first, obj.out_files.first)
|
190
|
+
hash_match(Out_files_first_hit, obj.out_files.first.hits.first)
|
191
|
+
hash_match(Out_files_last_first_hit, obj.out_files.last.hits.first)
|
192
|
+
hash_match(Out_files_last_last_hit, obj.out_files.last.hits.last)
|
193
|
+
## SEQUEST_PARAMS
|
194
|
+
hash_match(Sequest_params, obj.params)
|
195
|
+
## INDEX
|
196
|
+
assert_equal([7161, 7161, 3], obj.index.last)
|
197
|
+
assert_equal([2, 2, 1], obj.index.first)
|
198
|
+
|
199
|
+
assert_equal(obj.index.size, obj.dta_files.size)
|
200
|
+
assert_equal(obj.out_files.size, obj.dta_files.size)
|
201
|
+
end
|
202
|
+
|
203
|
+
def hash_match(hash, obj)
|
204
|
+
hash.each do |k,v|
|
205
|
+
if v.is_a? Float
|
206
|
+
delta = v/100000
|
207
|
+
assert_in_delta( obj.send(k.to_sym), v, delta, "param: #{k}")
|
208
|
+
else
|
209
|
+
assert_equal(obj.send(k.to_sym), v, "param: #{k}")
|
210
|
+
end
|
211
|
+
end
|
212
|
+
end
|
213
|
+
|
214
|
+
end
|
215
|
+
|
216
|
+
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0
|
|
3
3
|
specification_version: 1
|
4
4
|
name: mspire
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.1.
|
7
|
-
date: 2007-
|
6
|
+
version: 0.1.7
|
7
|
+
date: 2007-03-27 00:00:00 -05:00
|
8
8
|
summary: Mass Spectrometry Proteomics Objects, Scripts, and Executables
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -32,15 +32,18 @@ files:
|
|
32
32
|
- lib/spec_id
|
33
33
|
- lib/align
|
34
34
|
- lib/spec
|
35
|
-
- lib/spec_id.rb
|
36
|
-
- lib/fasta.rb
|
37
35
|
- lib/sample_enzyme.rb
|
38
|
-
- lib/
|
36
|
+
- lib/fasta.rb
|
39
37
|
- lib/roc.rb
|
38
|
+
- lib/spec_id.rb
|
39
|
+
- lib/gi.rb
|
40
|
+
- lib/align.rb
|
41
|
+
- lib/spec_id/srf.rb
|
40
42
|
- lib/spec_id/bioworks.rb
|
41
43
|
- lib/spec_id/proph.rb
|
42
44
|
- lib/spec_id/sequest.rb
|
43
|
-
- lib/spec_id/
|
45
|
+
- lib/spec_id/precision.rb
|
46
|
+
- lib/spec_id/aa_freqs.rb
|
44
47
|
- lib/align/chams.rb
|
45
48
|
- lib/spec/mzxml
|
46
49
|
- lib/spec/mzdata
|
@@ -54,6 +57,8 @@ files:
|
|
54
57
|
- README
|
55
58
|
- Rakefile
|
56
59
|
- LICENSE
|
60
|
+
- changelog.txt
|
61
|
+
- release_notes.txt
|
57
62
|
- bin/fasta_cat_mod.rb
|
58
63
|
- bin/fasta_mod.rb
|
59
64
|
- bin/gi2annot.rb
|
@@ -63,45 +68,53 @@ files:
|
|
63
68
|
- bin/bioworks2sequestXML_gui.rb
|
64
69
|
- bin/bioworks2excel.rb
|
65
70
|
- bin/pepproph_filter.rb
|
71
|
+
- bin/filter_spec_id.rb
|
66
72
|
- bin/bioworks_to_pepxml.rb
|
73
|
+
- bin/mzxml_to_lmat.rb
|
67
74
|
- bin/protxml2prots_peps.rb
|
68
|
-
- bin/false_positive_rate.rb
|
69
75
|
- bin/id_precision.rb
|
70
|
-
- bin/mzxml_to_lmat.rb
|
71
76
|
- bin/id_class_anal.rb
|
72
|
-
- bin/
|
73
|
-
-
|
77
|
+
- bin/precision.rb
|
78
|
+
- bin/fasta_shaker.rb
|
79
|
+
- script/prep_dir.rb
|
80
|
+
- script/msvis.rb
|
81
|
+
- script/gen_database_searching.rb
|
74
82
|
- script/mzXML2timeIndex.rb
|
75
83
|
- script/tpp_installer.rb
|
76
|
-
- script/
|
84
|
+
- script/create_little_pepxml.rb
|
77
85
|
- script/histogram_probs.rb
|
78
|
-
- script/prep_dir.rb
|
79
86
|
- script/top_hit_per_scan.rb
|
80
87
|
- script/filter-peps.rb
|
81
|
-
- script/gen_database_searching.rb
|
82
88
|
- script/simple_protein_digestion.rb
|
89
|
+
- script/genuine_tps_and_probs.rb
|
90
|
+
- script/estimate_fpr_by_cysteine.rb
|
91
|
+
- script/find_cysteine_background.rb
|
83
92
|
test_files:
|
84
|
-
- test/
|
93
|
+
- test/tc_srf.rb
|
85
94
|
- test/tc_proph.rb
|
86
95
|
- test/tc_sequest.rb
|
87
|
-
- test/tc_spec.rb
|
88
96
|
- test/tc_align.rb
|
97
|
+
- test/tc_spec.rb
|
98
|
+
- test/tc_aa_freqs.rb
|
89
99
|
- test/tc_protein_summary.rb
|
90
100
|
- test/tc_fasta.rb
|
91
101
|
- test/tc_bioworks.rb
|
92
|
-
- test/tc_msrun.rb
|
93
102
|
- test/tc_peptide_parent_times.rb
|
103
|
+
- test/tc_msrun.rb
|
94
104
|
- test/tc_spec_id.rb
|
95
105
|
- test/tc_mzxml.rb
|
96
106
|
- test/tc_id_precision.rb
|
97
107
|
- test/tc_id_class_anal.rb
|
98
108
|
- test/tc_filter_peps.rb
|
99
|
-
- test/
|
109
|
+
- test/tc_precision.rb
|
100
110
|
- test/tc_roc.rb
|
101
111
|
- test/tc_scan.rb
|
102
|
-
- test/tc_bioworks_to_pepxml.rb
|
103
112
|
- test/tc_mzxml_to_lmat.rb
|
113
|
+
- test/tc_bioworks_to_pepxml.rb
|
104
114
|
- test/tc_sample_enzyme.rb
|
115
|
+
- test/tc_fasta_shaker.rb
|
116
|
+
- test/tc_gi.rb
|
117
|
+
- test/tc_spec_id_xml.rb
|
105
118
|
rdoc_options:
|
106
119
|
- --main
|
107
120
|
- README
|
@@ -121,13 +134,14 @@ executables:
|
|
121
134
|
- bioworks2sequestXML_gui.rb
|
122
135
|
- bioworks2excel.rb
|
123
136
|
- pepproph_filter.rb
|
137
|
+
- filter_spec_id.rb
|
124
138
|
- bioworks_to_pepxml.rb
|
139
|
+
- mzxml_to_lmat.rb
|
125
140
|
- protxml2prots_peps.rb
|
126
|
-
- false_positive_rate.rb
|
127
141
|
- id_precision.rb
|
128
|
-
- mzxml_to_lmat.rb
|
129
142
|
- id_class_anal.rb
|
130
|
-
-
|
143
|
+
- precision.rb
|
144
|
+
- fasta_shaker.rb
|
131
145
|
extensions: []
|
132
146
|
|
133
147
|
requirements:
|