mspire 0.2.4 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (233) hide show
  1. data/INSTALL +1 -0
  2. data/README +25 -0
  3. data/Rakefile +129 -40
  4. data/bin/{find_aa_freq.rb → aafreqs.rb} +2 -2
  5. data/bin/bioworks_to_pepxml.rb +1 -0
  6. data/bin/fasta_shaker.rb +1 -96
  7. data/bin/filter_and_validate.rb +5 -0
  8. data/bin/{mzxml_to_lmat.rb → ms_to_lmat.rb} +8 -7
  9. data/bin/prob_validate.rb +6 -0
  10. data/bin/raw_to_mzXML.rb +2 -2
  11. data/bin/srf_group.rb +1 -0
  12. data/bin/srf_to_sqt.rb +40 -0
  13. data/changelog.txt +68 -0
  14. data/lib/align/chams.rb +6 -6
  15. data/lib/align.rb +4 -3
  16. data/lib/bsearch.rb +120 -0
  17. data/lib/fasta.rb +318 -86
  18. data/lib/group_by.rb +10 -0
  19. data/lib/index_by.rb +11 -0
  20. data/lib/merge_deep.rb +21 -0
  21. data/lib/{spec → ms/converter}/mzxml.rb +77 -109
  22. data/lib/ms/gradient_program.rb +171 -0
  23. data/lib/ms/msrun.rb +209 -0
  24. data/lib/{spec/msrun.rb → ms/msrun_index.rb} +7 -40
  25. data/lib/ms/parser/mzdata/axml.rb +12 -0
  26. data/lib/ms/parser/mzdata/dom.rb +160 -0
  27. data/lib/ms/parser/mzdata/libxml.rb +7 -0
  28. data/lib/ms/parser/mzdata.rb +25 -0
  29. data/lib/ms/parser/mzxml/axml.rb +11 -0
  30. data/lib/ms/parser/mzxml/dom.rb +159 -0
  31. data/lib/ms/parser/mzxml/hpricot.rb +253 -0
  32. data/lib/ms/parser/mzxml/libxml.rb +15 -0
  33. data/lib/ms/parser/mzxml/regexp.rb +122 -0
  34. data/lib/ms/parser/mzxml/rexml.rb +72 -0
  35. data/lib/ms/parser/mzxml/xmlparser.rb +248 -0
  36. data/lib/ms/parser/mzxml.rb +175 -0
  37. data/lib/ms/parser.rb +108 -0
  38. data/lib/ms/precursor.rb +10 -0
  39. data/lib/ms/scan.rb +81 -0
  40. data/lib/ms/spectrum.rb +193 -0
  41. data/lib/ms.rb +10 -0
  42. data/lib/mspire.rb +4 -0
  43. data/lib/roc.rb +61 -1
  44. data/lib/sample_enzyme.rb +31 -8
  45. data/lib/scan_i.rb +21 -0
  46. data/lib/spec_id/aa_freqs.rb +7 -3
  47. data/lib/spec_id/bioworks.rb +20 -14
  48. data/lib/spec_id/digestor.rb +139 -0
  49. data/lib/spec_id/mass.rb +116 -0
  50. data/lib/spec_id/parser/proph.rb +236 -0
  51. data/lib/spec_id/precision/filter/cmdline.rb +209 -0
  52. data/lib/spec_id/precision/filter/interactive.rb +134 -0
  53. data/lib/spec_id/precision/filter/output.rb +147 -0
  54. data/lib/spec_id/precision/filter.rb +623 -0
  55. data/lib/spec_id/precision/output.rb +60 -0
  56. data/lib/spec_id/precision/prob/cmdline.rb +139 -0
  57. data/lib/spec_id/precision/prob/output.rb +88 -0
  58. data/lib/spec_id/precision/prob.rb +171 -0
  59. data/lib/spec_id/proph/pep_summary.rb +92 -0
  60. data/lib/spec_id/proph/prot_summary.rb +484 -0
  61. data/lib/spec_id/proph.rb +2 -466
  62. data/lib/spec_id/protein_summary.rb +2 -2
  63. data/lib/spec_id/sequest/params.rb +316 -0
  64. data/lib/spec_id/sequest/pepxml.rb +1513 -0
  65. data/lib/spec_id/sequest.rb +2 -1672
  66. data/lib/spec_id/srf.rb +445 -177
  67. data/lib/spec_id.rb +183 -95
  68. data/lib/spec_id_xml.rb +8 -10
  69. data/lib/transmem/phobius.rb +147 -0
  70. data/lib/transmem/toppred.rb +368 -0
  71. data/lib/transmem.rb +157 -0
  72. data/lib/validator/aa.rb +135 -0
  73. data/lib/validator/background.rb +73 -0
  74. data/lib/validator/bias.rb +95 -0
  75. data/lib/validator/cmdline.rb +260 -0
  76. data/lib/validator/decoy.rb +94 -0
  77. data/lib/validator/digestion_based.rb +69 -0
  78. data/lib/validator/probability.rb +48 -0
  79. data/lib/validator/prot_from_pep.rb +234 -0
  80. data/lib/validator/transmem.rb +272 -0
  81. data/lib/validator/true_pos.rb +46 -0
  82. data/lib/validator.rb +214 -0
  83. data/lib/xml.rb +38 -0
  84. data/lib/xml_style_parser.rb +105 -0
  85. data/lib/xmlparser_wrapper.rb +19 -0
  86. data/script/compile_and_plot_smriti_final.rb +97 -0
  87. data/script/extract_gradient_programs.rb +56 -0
  88. data/script/get_apex_values_rexml.rb +44 -0
  89. data/script/mzXML2timeIndex.rb +1 -1
  90. data/script/smriti_final_analysis.rb +103 -0
  91. data/script/toppred_to_yaml.rb +47 -0
  92. data/script/tpp_installer.rb +1 -1
  93. data/{test/tc_align.rb → specs/align_spec.rb} +21 -27
  94. data/{test/tc_bioworks_to_pepxml.rb → specs/bin/bioworks_to_pepxml_spec.rb} +25 -41
  95. data/specs/bin/fasta_shaker_spec.rb +259 -0
  96. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +202 -0
  97. data/specs/bin/filter_and_validate_spec.rb +124 -0
  98. data/specs/bin/ms_to_lmat_spec.rb +34 -0
  99. data/specs/bin/prob_validate_spec.rb +62 -0
  100. data/specs/bin/protein_summary_spec.rb +10 -0
  101. data/{test/tc_fasta.rb → specs/fasta_spec.rb} +354 -310
  102. data/specs/gi_spec.rb +22 -0
  103. data/specs/load_bin_path.rb +7 -0
  104. data/specs/merge_deep_spec.rb +13 -0
  105. data/specs/ms/gradient_program_spec.rb +77 -0
  106. data/specs/ms/msrun_spec.rb +455 -0
  107. data/specs/ms/parser_spec.rb +92 -0
  108. data/specs/ms/spectrum_spec.rb +89 -0
  109. data/specs/roc_spec.rb +251 -0
  110. data/specs/rspec_autotest.rb +149 -0
  111. data/specs/sample_enzyme_spec.rb +41 -0
  112. data/specs/spec_helper.rb +133 -0
  113. data/specs/spec_id/aa_freqs_spec.rb +52 -0
  114. data/{test/tc_bioworks.rb → specs/spec_id/bioworks_spec.rb} +56 -71
  115. data/specs/spec_id/digestor_spec.rb +75 -0
  116. data/specs/spec_id/precision/filter/cmdline_spec.rb +20 -0
  117. data/specs/spec_id/precision/filter/output_spec.rb +31 -0
  118. data/specs/spec_id/precision/filter_spec.rb +243 -0
  119. data/specs/spec_id/precision/prob_spec.rb +111 -0
  120. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  121. data/specs/spec_id/proph/pep_summary_spec.rb +143 -0
  122. data/{test/tc_proph.rb → specs/spec_id/proph/prot_summary_spec.rb} +52 -32
  123. data/{test/tc_protein_summary.rb → specs/spec_id/protein_summary_spec.rb} +85 -0
  124. data/specs/spec_id/sequest/params_spec.rb +68 -0
  125. data/specs/spec_id/sequest/pepxml_spec.rb +452 -0
  126. data/specs/spec_id/sqt_spec.rb +138 -0
  127. data/specs/spec_id/srf_spec.rb +209 -0
  128. data/specs/spec_id/srf_spec_helper.rb +302 -0
  129. data/specs/spec_id_helper.rb +33 -0
  130. data/specs/spec_id_spec.rb +361 -0
  131. data/specs/spec_id_xml_spec.rb +33 -0
  132. data/specs/transmem/phobius_spec.rb +423 -0
  133. data/specs/transmem/toppred_spec.rb +297 -0
  134. data/specs/transmem_spec.rb +60 -0
  135. data/specs/transmem_spec_shared.rb +64 -0
  136. data/specs/validator/aa_spec.rb +107 -0
  137. data/specs/validator/background_spec.rb +51 -0
  138. data/specs/validator/bias_spec.rb +146 -0
  139. data/specs/validator/decoy_spec.rb +51 -0
  140. data/specs/validator/fasta_helper.rb +26 -0
  141. data/specs/validator/prot_from_pep_spec.rb +141 -0
  142. data/specs/validator/transmem_spec.rb +145 -0
  143. data/specs/validator/true_pos_spec.rb +58 -0
  144. data/specs/validator_helper.rb +33 -0
  145. data/specs/xml_spec.rb +12 -0
  146. data/test_files/000_pepxml18_small.xml +206 -0
  147. data/test_files/020a.mzXML.timeIndex +4710 -0
  148. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +3973 -0
  149. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +3872 -0
  150. data/test_files/4-03-03_small-prot.xml +321 -0
  151. data/test_files/4-03-03_small.xml +3876 -0
  152. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  153. data/test_files/bioworks-3.3_10prots.xml +5999 -0
  154. data/test_files/bioworks31.params +77 -0
  155. data/test_files/bioworks32.params +62 -0
  156. data/test_files/bioworks33.params +63 -0
  157. data/test_files/bioworks_single_run_small.xml +7237 -0
  158. data/test_files/bioworks_small.fasta +212 -0
  159. data/test_files/bioworks_small.params +63 -0
  160. data/test_files/bioworks_small.phobius +109 -0
  161. data/test_files/bioworks_small.toppred.out +2847 -0
  162. data/test_files/bioworks_small.xml +5610 -0
  163. data/test_files/bioworks_with_INV_small.xml +3753 -0
  164. data/test_files/bioworks_with_SHUFF_small.xml +2503 -0
  165. data/test_files/corrupted_900.srf +0 -0
  166. data/test_files/head_of_7MIX.srf +0 -0
  167. data/test_files/interact-opd1_mods_small-prot.xml +304 -0
  168. data/test_files/messups.fasta +297 -0
  169. data/test_files/opd1/000.my_answer.100lines.xml +101 -0
  170. data/test_files/opd1/000.tpp_1.2.3.first10.xml +115 -0
  171. data/test_files/opd1/000.tpp_2.9.2.first10.xml +126 -0
  172. data/test_files/opd1/000.v2.1.mzXML.timeIndex +3748 -0
  173. data/test_files/opd1/000_020-prot.png +0 -0
  174. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +62 -0
  175. data/test_files/opd1/000_020_3prots-prot.xml +62 -0
  176. data/test_files/opd1/opd1_cat_inv_small-prot.xml +139 -0
  177. data/test_files/opd1/sequest.3.1.params +77 -0
  178. data/test_files/opd1/sequest.3.2.params +62 -0
  179. data/test_files/opd1/twenty_scans.mzXML +418 -0
  180. data/test_files/opd1/twenty_scans.v2.1.mzXML +382 -0
  181. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  182. data/test_files/opd1/twenty_scans_answ.lmata +9 -0
  183. data/test_files/opd1_020_beginning.RAW +0 -0
  184. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +753 -0
  185. data/test_files/orbitrap_mzData/000_cut.xml +1920 -0
  186. data/test_files/pepproph_small.xml +4691 -0
  187. data/test_files/phobius.small.noheader.txt +50 -0
  188. data/test_files/phobius.small.small.txt +53 -0
  189. data/test_files/s01_anC1_ld020mM.key.txt +25 -0
  190. data/test_files/s01_anC1_ld020mM.meth +0 -0
  191. data/test_files/small.fasta +297 -0
  192. data/test_files/smallraw.RAW +0 -0
  193. data/test_files/tf_bioworks2excel.bioXML +14340 -0
  194. data/test_files/tf_bioworks2excel.txt.actual +1035 -0
  195. data/test_files/toppred.small.out +416 -0
  196. data/test_files/toppred.xml.out +318 -0
  197. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +7 -0
  198. data/test_files/validator_hits_separate/bioworks_small_HS.xml +5651 -0
  199. data/test_files/yeast_gly_small-prot.xml +265 -0
  200. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +6 -0
  201. data/test_files/yeast_gly_small.xml +3807 -0
  202. data/test_files/yeast_gly_small2.parentTimes +6 -0
  203. metadata +273 -57
  204. data/bin/filter.rb +0 -6
  205. data/bin/precision.rb +0 -5
  206. data/lib/spec/mzdata/parser.rb +0 -108
  207. data/lib/spec/mzdata.rb +0 -48
  208. data/lib/spec/mzxml/parser.rb +0 -449
  209. data/lib/spec/scan.rb +0 -55
  210. data/lib/spec_id/filter.rb +0 -797
  211. data/lib/spec_id/precision.rb +0 -421
  212. data/lib/toppred.rb +0 -18
  213. data/script/filter-peps.rb +0 -164
  214. data/test/tc_aa_freqs.rb +0 -59
  215. data/test/tc_fasta_shaker.rb +0 -149
  216. data/test/tc_filter.rb +0 -203
  217. data/test/tc_filter_peps.rb +0 -46
  218. data/test/tc_gi.rb +0 -17
  219. data/test/tc_id_class_anal.rb +0 -70
  220. data/test/tc_id_precision.rb +0 -89
  221. data/test/tc_msrun.rb +0 -88
  222. data/test/tc_mzxml.rb +0 -88
  223. data/test/tc_mzxml_to_lmat.rb +0 -36
  224. data/test/tc_peptide_parent_times.rb +0 -27
  225. data/test/tc_precision.rb +0 -60
  226. data/test/tc_roc.rb +0 -166
  227. data/test/tc_sample_enzyme.rb +0 -32
  228. data/test/tc_scan.rb +0 -26
  229. data/test/tc_sequest.rb +0 -336
  230. data/test/tc_spec.rb +0 -78
  231. data/test/tc_spec_id.rb +0 -201
  232. data/test/tc_spec_id_xml.rb +0 -36
  233. data/test/tc_srf.rb +0 -262
@@ -0,0 +1,209 @@
1
+
2
+ require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
3
+ require File.expand_path( File.dirname(__FILE__) + '/srf_spec_helper' )
4
+ require 'spec_id/srf'
5
+
6
+ include SRFHelper
7
+
8
+ #tfiles = File.dirname(__FILE__) + '/tfiles/'
9
+ #tfiles_l = File.dirname(__FILE__) + '/tfiles_large/'
10
+ #tf_srf = tfiles_l + "7MIX_STD_110802_1.srf"
11
+ #tf_srf_inv = tfiles_l + "7MIX_STD_110802_1_INV.srf"
12
+ #if File.exist? tfiles_l
13
+ # start = Time.now
14
+ # $group = SRFGroup.new([tf_srf, tf_srf_inv])
15
+ # $srf = $group.srfs.first
16
+ # puts "Time to read and compile two SRF: #{Time.now - start} secs"
17
+ #end
18
+
19
+ class Hash
20
+ def object_match(obj)
21
+ self.all? do |k,v|
22
+ k = k.to_sym
23
+ retval =
24
+ if k == :peaks or k == :hits or k == :prots
25
+ obj.send(k).size == v
26
+ elsif v.class == Float
27
+ delta =
28
+ if k == :ppm ; 0.0001
29
+ else ; 0.0000001
30
+ end
31
+ (v - obj.send(k)).abs <= delta
32
+ else
33
+ obj.send(k) == v
34
+ end
35
+ if retval == false
36
+ puts "BAD KEY: #{k}"
37
+ puts "need: #{v}"
38
+ puts "got: #{obj.send(k)}"
39
+ end
40
+ retval
41
+ end
42
+ end
43
+ end
44
+
45
+ klass = SRF
46
+
47
+ describe 'an srf reader', :shared => true do
48
+ before(:all) do
49
+ @srf_obj = klass.new(@file)
50
+ end
51
+
52
+ it 'retrieves correct header info' do
53
+ @header.object_match(@srf_obj.header).should be_true
54
+ @dta_gen.object_match(@srf_obj.header.dta_gen).should be_true
55
+ end
56
+
57
+ # a few more dta params could be added in here:
58
+ it 'retrieves correct dta files' do
59
+ @dta_files_first.object_match(@srf_obj.dta_files.first).should be_true
60
+ @dta_files_last.object_match(@srf_obj.dta_files.last).should be_true
61
+ end
62
+
63
+ it 'retrieves correct out files' do
64
+ @out_files_first.object_match(@srf_obj.out_files.first).should be_true
65
+ @out_files_last.object_match(@srf_obj.out_files.last).should be_true
66
+ @out_files_first_last_pep.object_match(@srf_obj.out_files.first.hits.last).should be_true
67
+ @out_files_last_last_pep.object_match(@srf_obj.out_files.last.hits.last).should be_true
68
+ end
69
+
70
+ xit 'retrieves correct params' do
71
+ @params.object_match(@srf_obj.params).should be_true
72
+ end
73
+
74
+ it_should 'retrieve probabilities if available'
75
+ end
76
+
77
+
78
+
79
+ describe klass, " reading a version 3.2 .srf file" do
80
+ spec_large do
81
+ before(:all) do
82
+ @file = Tfiles_l + '/sash7/sequest/7MIX_STD_110802_1.srf'
83
+ %w(header dta_gen dta_files_first dta_files_last out_files_first out_files_last out_files_first_last_pep out_files_last_last_pep params).each do |c|
84
+ instance_variable_set("@#{c}", File_32[c.to_sym])
85
+ end
86
+ end
87
+ it_should_behave_like "an srf reader"
88
+ end
89
+ end
90
+
91
+
92
+ describe klass, " reading a version 3.3 .srf file" do
93
+ it_should 'reading a version 3.3 .srf file'
94
+ end
95
+
96
+
97
+ describe klass, " reading a version 3.5 (bioworks 3.3.1) .srf file" do
98
+ spec_large do
99
+ before(:all) do
100
+ @file = Tfiles_l + '/sash7/sequest/bioworks331/7MIX_STD_110802_1.srf'
101
+ %w(header dta_gen dta_files_first dta_files_last out_files_first out_files_last out_files_first_last_pep out_files_last_last_pep params).each do |c|
102
+ instance_variable_set("@#{c}", File_35[c.to_sym])
103
+ end
104
+ end
105
+ it_should_behave_like "an srf reader"
106
+ end
107
+ it 'should read a null file from an aborted run w/o failing (but gives error msg)' do
108
+ file = Tfiles + '/corrupted_900.srf'
109
+ error_msg = Tfiles + '/error_msg.tmp'
110
+ File.open(error_msg, 'w') do |err_fh|
111
+ $stderr = err_fh
112
+ srf_obj = klass.new(file)
113
+ srf_obj.base_name.should == '900'
114
+ srf_obj.params.should be_nil
115
+ header = srf_obj.header
116
+ header.db_filename.should == "C:\\Xcalibur\\database\\sf_hs_44_36f_longesttrpt.fasta.hdr"
117
+ header.enzyme.should == 'Enzyme:Trypsin(KR) (2)'
118
+ dta_gen = header.dta_gen
119
+ dta_gen.start_time.should be_close(1.39999997615814, 0.00000000001)
120
+ srf_obj.dta_files.should == []
121
+ srf_obj.out_files.should == []
122
+ end
123
+ IO.read(error_msg).should =~ /corrupted_900\.srf/
124
+ File.unlink error_msg
125
+ end
126
+
127
+ end
128
+
129
+
130
+ describe klass, 'reading an srf file' do
131
+
132
+ spec_large do
133
+ before(:all) do
134
+ start = Time.now
135
+ tf_srf = Tfiles_l + "/sash7/sequest/older/7MIX_STD_110802_1.srf"
136
+ @srf = klass.new(tf_srf)
137
+ puts "- read in #{Time.now - start} seconds"
138
+ end
139
+
140
+ #def initialize(arg)
141
+ # super(arg)
142
+ # @tfiles = File.dirname(__FILE__) + '/tfiles/'
143
+ # @tfiles_l = File.dirname(__FILE__) + '/tfiles_large/'
144
+ # @srg_file = @tfiles + "tmp_bioworks.srg"
145
+ # @srf = $srf
146
+ # @group = $group
147
+ #end
148
+
149
+ it 'reads' do
150
+ end
151
+
152
+ it 'reads an srf file (w/o probs) and extracts all basic information' do
153
+ ## Verify that we have everything and it is as we expect (not exhaustive)
154
+ head = @srf.header
155
+ dtgen = head.dta_gen
156
+ ## HEADER
157
+ hash_match(Header, head)
158
+ hash_match(Dta_gen, dtgen)
159
+ ## DTA_FILES
160
+ hash_match(Dta_files_first, @srf.dta_files.first)
161
+ hash_match(Dta_files_last, @srf.dta_files.last)
162
+ ## OUT_FILES
163
+ hash_match(Out_files_first, @srf.out_files.first)
164
+ hit = @srf.out_files.first.hits.first
165
+ hash_match(Out_files_first_hit, @srf.out_files.first.hits.first)
166
+ hash_match(Out_files_last_first_hit, @srf.out_files.last.hits.first)
167
+ hash_match(Out_files_last_last_hit, @srf.out_files.last.hits.last)
168
+ ## SEQUEST_PARAMS
169
+ hash_match(Sequest_params, @srf.params)
170
+ ## INDEX
171
+ @srf.index.last.should == [7161, 7161, 3]
172
+ @srf.index.first.should == [2, 2, 1]
173
+
174
+ @srf.dta_files.size.should == @srf.index.size
175
+ @srf.dta_files.size.should == @srf.out_files.size
176
+ end
177
+ it_should 'give accurate peptides' do
178
+ end
179
+ end
180
+
181
+ ## treats reference special
182
+ def hash_match(hash, srf)
183
+ hash.each do |k,v|
184
+ if v.is_a? Float
185
+ delta = v/100000
186
+ srf.send(k.to_sym).should be_close(v, delta)
187
+ elsif k == :reference
188
+ srf.prots.first.reference.should == v[0,38]
189
+ else
190
+ srf.send(k.to_sym).should == v
191
+ end
192
+ end
193
+ end
194
+ end
195
+
196
+ describe SRFGroup, 'creating an srg file' do
197
+
198
+ it 'creates one given some non-existing, relative filenames' do
199
+ ## TEST SRG GROUPING:
200
+ filenames = %w(my/lucky/filename /another/filename)
201
+ @srg = SRFGroup.new
202
+ @srg.filenames = filenames
203
+ srg_file = Tfiles + '/tmp_srg_file.srg'
204
+ @srg.to_srg(srg_file)
205
+ File.exist?(srg_file).should be_true
206
+ File.unlink(srg_file)
207
+ end
208
+
209
+ end
@@ -0,0 +1,302 @@
1
+ module SRFHelper
2
+
3
+ File_32 = {
4
+ :header =>
5
+ {
6
+ :params_filename => "C:\\Xcalibur\\sequest\\john\\sashimi7\\sashimi7.params",
7
+ :model => "LCQ Deca XP",
8
+ :dta_log_filename => "C:\\Xcalibur\\sequest\\john\\sashimi7\\7MIX_STD_110802_1_dta.log",
9
+ :ion_series => "ion series nABY ABCDVWXYZ: 0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0",
10
+ :db_filename => "C:\\Xcalibur\\database\\mixed_db_human_ecoli_7prot_unique.fasta",
11
+ :modifications => "",
12
+ :enzyme => "Enzyme:Trypsin(KR/P) (2)",
13
+ :sequest_log_filename => "C:\\Xcalibur\\sequest\\john\\sashimi7\\7MIX_STD_110802_1_sequest.log",
14
+ :version => "3.2",
15
+ :raw_filename => "C:\\Xcalibur\\data\\john\\sashimi7\\7MIX_STD_110802_1.RAW"
16
+ },
17
+ :dta_gen => {
18
+ :min_group_count => 1,
19
+ :start_time => 1.39999997615814,
20
+ :start_mass => 400.0,
21
+ :end_scan => 7161,
22
+ :group_scan => 1,
23
+ :start_scan => 1,
24
+ :num_dta_files => 6952,
25
+ :min_ion_threshold => 15,
26
+ :end_mass => 4500.0,
27
+ },
28
+ :dta_files_first => {
29
+ :mh => 1221.88989257812,
30
+ :dta_tic => 7703132.0,
31
+ :num_peaks => 74,
32
+ :charge => 1,
33
+ :ms_level => 2,
34
+ :total_num_possible_charge_states => 0,
35
+ :peaks => 592,
36
+ },
37
+ :dta_files_last => {
38
+ :mh => 2604.8360326775,
39
+ :dta_tic => 31977.0,
40
+ :num_peaks => 17,
41
+ :charge => 3,
42
+ :ms_level => 2,
43
+ :total_num_possible_charge_states => 0,
44
+ :peaks => 136,
45
+ },
46
+ :out_files_first => {
47
+ :num_hits => 10,
48
+ :computer => 'VELA',
49
+ :date_time => '05/12/2006, 10:58 AM,',
50
+ :hits => 10
51
+ },
52
+ :out_files_last => {
53
+ :num_hits => 10,
54
+ :computer => 'VELA',
55
+ :date_time => '05/12/2006, 11:11 AM,',
56
+ :hits => 10
57
+ },
58
+ :out_files_first_last_pep => {
59
+ :aaseq => 'QFSLSKSSLPK',
60
+ :sequence => 'K.QFSLSKSSLPK.S',
61
+ :mh => 1222.4156904522,
62
+ :deltacn => 1.1,
63
+ :sp => 57.4083709716797,
64
+ :xcorr => 0.802009999752045,
65
+ :id => 19977,
66
+ :rsp => 60,
67
+ :ions_matched => 7,
68
+ :ions_total => 20,
69
+ :prots => 1,
70
+ :deltamass => 0.525797874074897,
71
+ :ppm => 430.315265940608,
72
+ :base_name => '7MIX_STD_110802_1',
73
+ :first_scan => 2,
74
+ :last_scan => 2,
75
+ :charge => 1
76
+ },
77
+ :out_files_last_last_pep =>
78
+ {
79
+ :aaseq => 'EAFLVNSDLTLRAQLTEFRDHK',
80
+ :sequence => 'R.EAFLVNSDLTLRAQLTEFRDHK.L',
81
+ :mh => 2604.9025174522,
82
+ :deltacn => 1.1,
83
+ :sp => 26.1511478424072,
84
+ :xcorr => 0.634012818336487,
85
+ :id => 8105,
86
+ :rsp => 165,
87
+ :ions_matched => 6,
88
+ :ions_total => 84,
89
+ :prots => 1,
90
+ :deltamass => 0.0664847746993473,
91
+ :ppm => 25.523592988311,
92
+ :base_name => '7MIX_STD_110802_1',
93
+ :first_scan => 7161,
94
+ :last_scan => 7161,
95
+ :charge => 3,
96
+ },
97
+
98
+ :params => {
99
+ "add_O_Ornithine"=>"0.0000", "add_F_Phenylalanine"=>"0.0000", "add_A_Alanine"=>"0.0000", "add_C_Cysteine"=>"0.0000", "add_Y_Tyrosine"=>"0.0000", "add_X_LorI"=>"0.0000", "add_J_user_amino_acid"=>"0.0000", "add_Cterm_peptide"=>"0.0000", "add_S_Serine"=>"0.0000", "add_Nterm_protein"=>"0.0000", "add_D_Aspartic_Acid"=>"0.0000", "add_Q_Glutamine"=>"0.0000", "add_K_Lysine"=>"0.0000", "add_R_Arginine"=>"0.0000", "add_W_Tryptophan"=>"0.0000", "add_Nterm_peptide"=>"0.0000", "add_H_Histidine"=>"0.0000", "add_L_Leucine"=>"0.0000", "add_I_Isoleucine"=>"0.0000", "add_N_Asparagine"=>"0.0000", "add_B_avg_NandD"=>"0.0000", "add_Z_avg_QandE"=>"0.0000", "add_E_Glutamic_Acid"=>"0.0000", "add_G_Glycine"=>"0.0000", "add_P_Proline"=>"0.0000", "add_M_Methionine"=>"0.0000", "add_Cterm_protein"=>"0.0000", "add_V_Valine"=>"0.0000", "add_T_Threonine"=>"0.0000", "add_U_user_amino_acid"=>"0.0000", "match_peak_tolerance"=>"1.0000", "match_peak_allowed_error"=>"1", "normalize_xcorr"=>"0", "nucleotide_reading_frame"=>"0", "num_results"=>"250", "sequence_header_filter"=>"", "diff_search_options"=>"0.000000 S 0.000000 C 0.000000 M 0.000000 X 0.000000 T 0.000000 Y", "partial_sequence"=>"", "max_num_internal_cleavage_sites"=>"2", "search_engine"=>"SEQUEST", "print_duplicate_references"=>"40", "ion_series"=>"0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0", "remove_precursor_peak"=>"0", "num_output_lines"=>"10", "second_database_name"=>"", "first_database_name"=>"C:\\Xcalibur\\database\\mixed_db_human_ecoli_7prot_unique.fasta", "peptide_mass_tolerance"=>"1.4000", "digest_mass_range"=>"600.0 3500.0", "enzyme_info"=>"Trypsin(KR/P) 1 1 KR P", "show_fragment_ions"=>"0", "protein_mass_filter"=>"0 0", "term_diff_search_options"=>"0.000000 0.000000", "num_description_lines"=>"5", "fragment_ion_tolerance"=>"1.0000", "peptide_mass_units"=>"0", "mass_type_parent"=>"0", "match_peak_count"=>"0", "max_num_differential_per_peptide"=>"1", "ion_cutoff_percentage"=>"0.0000", "mass_type_fragment"=>"0"},
100
+
101
+ }
102
+
103
+ File_35 = {}
104
+ File_32.each {|k,v| File_35[k] = v.dup }
105
+
106
+ File_35[:header].merge!( {
107
+ :sequest_log_filename => "C:\\Xcalibur\\sequest\\7MIX_STD_110802_1_sequest.log",
108
+ :raw_filename => "C:\\Xcalibur\\data\\john\\sash7\\7MIX_STD_110802_1.RAW",
109
+ :params_filename => "C:\\Xcalibur\\sequest\\john\\bioworks331\\sashimi7.params",
110
+ :dta_log_filename => "C:\\Xcalibur\\sequest\\7MIX_STD_110802_1_dta.log",
111
+ :version=>"3.5"
112
+ } )
113
+
114
+ File_35[:params].merge!( {
115
+ "add_O_Ornithine"=>"0.00000", "add_F_Phenylalanine"=>"0.00000", "add_A_Alanine"=>"0.00000", "add_C_Cysteine"=>"0.00000", "add_Y_Tyrosine"=>"0.00000", "add_X_LorI"=>"0.00000", "add_J_user_amino_acid"=>"0.00000", "add_Cterm_peptide"=>"0.00000", "add_S_Serine"=>"0.00000", "add_Nterm_protein"=>"0.00000", "add_D_Aspartic_Acid"=>"0.00000", "add_Q_Glutamine"=>"0.00000", "add_K_Lysine"=>"0.00000", "add_R_Arginine"=>"0.00000", "add_W_Tryptophan"=>"0.00000", "add_Nterm_peptide"=>"0.00000", "add_H_Histidine"=>"0.00000", "add_L_Leucine"=>"0.00000", "add_I_Isoleucine"=>"0.00000", "add_N_Asparagine"=>"0.00000", "add_B_avg_NandD"=>"0.00000", "add_Z_avg_QandE"=>"0.00000", "add_E_Glutamic_Acid"=>"0.00000", "add_G_Glycine"=>"0.00000", "add_P_Proline"=>"0.00000", "add_M_Methionine"=>"0.00000", "add_Cterm_protein"=>"0.00000", "add_V_Valine"=>"0.00000", "add_T_Threonine"=>"0.00000", "add_U_user_amino_acid"=>"0.00000", "match_peak_tolerance"=>"1.00000", "match_peak_allowed_error"=>"1", "normalize_xcorr"=>"0", "nucleotide_reading_frame"=>"0", "num_results"=>"250", "sequence_header_filter"=>"", "diff_search_options"=>"0.000000 S 0.000000 C 0.000000 M 0.000000 X 0.000000 T 0.000000 Y", "partial_sequence"=>"", "max_num_internal_cleavage_sites"=>"2", "search_engine"=>"SEQUEST", "print_duplicate_references"=>"40", "ion_series"=>"0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0", "remove_precursor_peak"=>"0", "num_output_lines"=>"10", "second_database_name"=>"", "first_database_name"=>"C:\\Xcalibur\\database\\mixed_db_human_ecoli_7prot_unique.fasta", "peptide_mass_tolerance"=>"1.40000", "digest_mass_range"=>"400.0000 4500.0000", "enzyme_info"=>"Trypsin(KR/P) 1 1 KR P", "show_fragment_ions"=>"0", "protein_mass_filter"=>"0 0", "term_diff_search_options"=>"0.000000 0.000000", "num_description_lines"=>"5", "fragment_ion_tolerance"=>"1.00000", "peptide_mass_units"=>"0", "mass_type_parent"=>"0", "match_peak_count"=>"0", "max_num_differential_per_peptide"=>"1", "fragment_ion_units"=>"0", "ion_cutoff_percentage"=>"0.00000", "mass_type_fragment"=>"0"}
116
+
117
+ )
118
+
119
+ File_35[:out_files_first].merge!( {:computer=>'TESLA', :date_time=>'09/17/2007, 03:11 PM,'} )
120
+ File_35[:out_files_last].merge!( {:computer=>'TESLA', :date_time=>'09/17/2007, 03:15 PM,'} )
121
+ # I'm assuming this difference is due to higher precision mass...? (not a
122
+ # parsing error)
123
+ File_35[:out_files_first_last_pep][:rsp] = 56
124
+ File_35[:out_files_last_last_pep][:rsp] = 125
125
+
126
+
127
+ Header = {
128
+ :db_filename => "C:\\Xcalibur\\database\\mixed_db_human_ecoli_7prot_unique.fasta",
129
+ :ion_series => "ion series nABY ABCDVWXYZ: 0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0",
130
+ :sequest_log_filename => "C:\\Xcalibur\\sequest\\john\\db_quest\\alone_new\\sash7\\7MIX_STD_110802_1_sequest.log",
131
+ :raw_filename => "C:\\Xcalibur\\data\\john\\sashimi7\\7MIX_STD_110802_1.RAW",
132
+ :enzyme => "Enzyme:Trypsin(KR/P) (2)",
133
+ :params_filename => "C:\\Xcalibur\\sequest\\john\\db_quest\\alone_new\\sash7\\sash7.params",
134
+ :modifications => "",
135
+ :version => "3.2",
136
+ :dta_log_filename => "C:\\Xcalibur\\sequest\\john\\db_quest\\alone_new\\sash7\\7MIX_STD_110802_1_dta.log",
137
+ :model => "LCQ Deca XP",
138
+ }
139
+ ## DTA Gen
140
+ Dta_gen = {
141
+ :group_scan => 1,
142
+ :start_time => 1.39999997615814,
143
+ :start_scan => 1,
144
+ :num_dta_files => 6952,
145
+ :min_ion_threshold => 15,
146
+ :end_mass => 4500.0,
147
+ :min_group_count => 1,
148
+ :start_mass => 400.0,
149
+ :end_scan => 7161,
150
+ }
151
+
152
+ Dta_files_first = {
153
+ :mh => 1221.88989257812,
154
+ :dta_tic => 7703132.0,
155
+ :num_peaks => 74,
156
+ :charge => 1,
157
+ :ms_level => 2,
158
+ :total_num_possible_charge_states => 0,
159
+ :peaks => "\346\214\271C\000p|F\340\016\335C\000D\fG\022l\335C\000\3604F\020\205\337C\000D~F\260\256\340C\000\020\347E\220\023\343C\000\220&F\020R\352C\000\244\313F\246\237\353C\000\360\032E\206\223\004D\000\204\030F\260\177\005D\000\346\220F \316\005D\000`\222F<\001\006D\000\356\217Fd\213\010D\000\336\tGr\314\vD\000\034}F\262\006\rD\000\026\221F\f\202!D\000\340\274E\302u#D\000\030\036Fl\275#D\000U\035G\254~&D\200\370\022H\364\315&D\000\346bGT\365&D\000\000\000@\300s5D\000`\307ET\2008D\000\3175G\310{:D\200\307\251G\230\311:D\000`5F\000\214<D\000\000\270E\254\301<D\000\340\024FX\264=D\000\270\021F\204\204?D\000\226\006H\356\256?D\000\000\000@\300\023@D\000\005\002Gb~BD\200\256\350G\032\312BD\000zAG\034\316CD\000\350\254E8\314DD\000\270\310E\316\020ED\000\010\254E\026\005QD\000\240\267E\250tSD\000tEFB\200VD\200\342\235G\374\247VD\000$\023F\000\206XD\200K\245G\242\303XD\000\343xG\270\201YD\000\214\325F\304\365ZD\0008\225FZF[D\000\230RF\232~[D@\r\201Hl\307[D\000L\031Hv\001\\D\000\3540Fx\201^D`\222\275H\f\305^D\000wZG\006\023oD\000\360\217E\354\205oD\200\335-H\350zrD\000\224,GFXtD\000\364\223F\222\201tD\200\221\341H\024\304tD\000)\034H\314\354tD\000\000\200@\022}wD\200\001\205I\274\274wD\000\t\210H\260\344wD\000\000pA\004\370yD\000@\203Eh\272\205D\2006\214Gh\336\205D\000\026\235Fb,\210D\200\177 H\\@\210D\240,\355Il`\210D\200\022\026I\320\202\210D\000 \336Fx\227\210D\000\000\200?\334{\212D\000<\252F4>\222D\000\264\213F\302\321\223D\000H\354Ed\275\230D\000-\fHv\332\230D@\313\tH\374\367\230D\000?\aG",
160
+ }
161
+ Dta_files_last = {
162
+ :mh => 2604.8360326775,
163
+ :dta_tic => 31977.0,
164
+ :num_peaks => 17,
165
+ :charge => 3,
166
+ :ms_level => 2,
167
+ :total_num_possible_charge_states => 0,
168
+ :peaks => "4\n\216C\000`\305D\254\205\303C\000@;D\354\321\nD\000 \275D\232\243'D\000\020iE\350\2302D\000`\245D\f\3164D\000p@E\314JID\000\300\213D\264\002PD\000\260\016E\252\213[D\0000\eE\340NoD\000@\177D0\371xD\000@:Dd\f\205D\000\000yD\200\261\215D\000@\371D\210N\221D\000`\274D\034N\256D\000\020\032EN\372\266D\000\000\aD\356\223\322D\000\250\227E"
169
+ }
170
+
171
+ Out_files_first = {
172
+ :num_hits => 10,
173
+ :computer => "VELA",
174
+ :date_time => "11/17/2006, 04:13 PM,",
175
+ }
176
+
177
+ Out_files_first_hit = {
178
+ :mh => 1220.5128044522,
179
+ :deltacn => 0.071944423019886, ## this is the modified version
180
+ :sp => 96.5815887451172,
181
+ :xcorr => 1.08377742767334,
182
+ :id => 224,
183
+ :rsp => 13,
184
+ :ions_matched => 8,
185
+ :ions_total => 20,
186
+ :sequence => "K.LCPHLTLLPGR.F",
187
+ :aaseq => "LCPHLTLLPGR",
188
+ :reference => "gi|1786425|gb|AAC73335.1| damage-inducible protein P; putative tRNA synthetase",
189
+ :first_scan => 2,
190
+ :last_scan => 2,
191
+ :base_name => '7MIX_STD_110802_1',
192
+ :charge => 1,
193
+ }
194
+
195
+ Out_files_last = {
196
+ :num_hits => 10,
197
+ :computer => "VELA",
198
+ :date_time => "11/17/2006, 04:25 PM," ,
199
+ }
200
+ Out_files_last_first_hit = {
201
+ :mh => 2605.9368784522,
202
+ :deltacn => 0.03921128064394,
203
+ :sp => 76.7447052001953,
204
+ :xcorr => 0.915680646896362,
205
+ :id => 13562,
206
+ :rsp => 4,
207
+ :ions_matched => 10,
208
+ :ions_total => 84,
209
+ :sequence => "K.HLEINPNHPIVETLRQKAETHK.N",
210
+ :aaseq => "HLEINPNHPIVETLRQKAETHK",
211
+ :reference => "gi|30149327|ref|XP_293672.2| similar to ebiP7687 [Homo sapiens]",
212
+ :first_scan => 7161,
213
+ :last_scan => 7161,
214
+ :base_name => '7MIX_STD_110802_1',
215
+ :deltamass => 2605.9368784522 - 2604.8360326775,
216
+ :ppm => ((1.0e6 * (2605.9368784522 - 2604.8360326775)) / 2604.8360326775).abs,
217
+ :charge => 3,
218
+ }
219
+ Out_files_last_last_hit = {
220
+ :mh => 2604.9025174522,
221
+ :deltacn => 1.1,
222
+ :sp => 26.1511478424072,
223
+ :xcorr => 0.634012818336487,
224
+ :id => 8105,
225
+ :rsp => 165,
226
+ :ions_matched => 6,
227
+ :ions_total => 84,
228
+ :sequence => "R.EAFLVNSDLTLRAQLTEFRDHK.L",
229
+ :aaseq => "EAFLVNSDLTLRAQLTEFRDHK",
230
+ :reference => "gi|5453830|ref|NP_006181.1| origin recognition complex, subunit 2-like; origin",
231
+ :first_scan => 7161,
232
+ :last_scan => 7161,
233
+ :base_name => '7MIX_STD_110802_1',
234
+ :deltamass => 2604.9025174522 - 2604.8360326775,
235
+ :ppm => ((1.0e6 * (2604.9025174522 - 2604.8360326775)) / 2604.8360326775).abs,
236
+ :charge => 3,
237
+ }
238
+ Sequest_params = {
239
+ "add_F_Phenylalanine"=>"0.0000",
240
+ "add_O_Ornithine"=>"0.0000",
241
+ "add_Y_Tyrosine"=>"0.0000",
242
+ "add_C_Cysteine"=>"0.0000",
243
+ "add_A_Alanine"=>"0.0000",
244
+ "add_J_user_amino_acid"=>"0.0000",
245
+ "add_X_LorI"=>"0.0000",
246
+ "add_S_Serine"=>"0.0000",
247
+ "add_Cterm_peptide"=>"0.0000",
248
+ "add_Q_Glutamine"=>"0.0000",
249
+ "add_D_Aspartic_Acid"=>"0.0000",
250
+ "add_Nterm_protein"=>"0.0000",
251
+ "add_W_Tryptophan"=>"0.0000",
252
+ "add_R_Arginine"=>"0.0000",
253
+ "add_K_Lysine"=>"0.0000",
254
+ "add_H_Histidine"=>"0.0000",
255
+ "add_Nterm_peptide"=>"0.0000",
256
+ "add_E_Glutamic_Acid"=>"0.0000",
257
+ "add_Z_avg_QandE"=>"0.0000",
258
+ "add_B_avg_NandD"=>"0.0000",
259
+ "add_N_Asparagine"=>"0.0000",
260
+ "add_I_Isoleucine"=>"0.0000",
261
+ "add_L_Leucine"=>"0.0000",
262
+ "add_M_Methionine"=>"0.0000",
263
+ "add_P_Proline"=>"0.0000",
264
+ "add_G_Glycine"=>"0.0000",
265
+ "add_U_user_amino_acid"=>"0.0000",
266
+ "add_T_Threonine"=>"0.0000",
267
+ "add_V_Valine"=>"0.0000",
268
+ "add_Cterm_protein"=>"0.0000",
269
+ "match_peak_tolerance"=>"1.0000",
270
+ "match_peak_allowed_error"=>"1",
271
+ "normalize_xcorr"=>"0",
272
+ "nucleotide_reading_frame"=>"0",
273
+ "num_results"=>"250",
274
+ "sequence_header_filter"=>"",
275
+ "diff_search_options"=>"0.000000 S 0.000000 C 0.000000 M 0.000000 X 0.000000 T 0.000000 Y",
276
+ "partial_sequence"=>"",
277
+ "max_num_internal_cleavage_sites"=>"2",
278
+ "search_engine"=>"SEQUEST",
279
+ "print_duplicate_references"=>"40",
280
+ "ion_series"=>"0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0",
281
+ "remove_precursor_peak"=>"0",
282
+ "num_output_lines"=>"10",
283
+ "second_database_name"=>"",
284
+ "first_database_name"=>"C:\\Xcalibur\\database\\mixed_db_human_ecoli_7prot_unique.fasta",
285
+ "peptide_mass_tolerance"=>"1.4000",
286
+ "digest_mass_range"=>"600.0 3500.0",
287
+ "enzyme_info"=>"Trypsin(KR/P) 1 1 KR P",
288
+ "show_fragment_ions"=>"0",
289
+ "protein_mass_filter"=>"0 0",
290
+ "term_diff_search_options"=>"0.000000 0.000000",
291
+ "num_description_lines"=>"5",
292
+ "fragment_ion_tolerance"=>"1.0000",
293
+ "peptide_mass_units"=>"0",
294
+ "mass_type_parent"=>"0",
295
+ "match_peak_count"=>"0",
296
+ "max_num_differential_per_peptide"=>"1",
297
+ "ion_cutoff_percentage"=>"0.0000",
298
+ "mass_type_fragment"=>"0"
299
+ }
300
+
301
+ end
302
+
@@ -0,0 +1,33 @@
1
+
2
+ module SpecID::Pep
3
+
4
+ # filter must be a hash with these keys allowed:
5
+ # :xcorr1, :xcorr2, :xcorr3, :deltacn, :ppm, :include_deltacnstar
6
+ def pass_filters?(filter)
7
+ filter.all? do |k,v|
8
+ k_as_s = k.to_s
9
+ if ((k_as_s[0...-1] == 'xcorr') and (k.to_s[-1,1].to_i == self.charge))
10
+ charge = k.to_s[-1,1].to_i
11
+ self.xcorr >= v
12
+ elsif k_as_s == 'include_deltacnstar'
13
+ if v == false
14
+ self.deltacn <= 1.0
15
+ else
16
+ true
17
+ end
18
+ elsif k_as_s == 'ppm'
19
+ self.send(k) <= v
20
+ elsif k_as_s == 'deltacn'
21
+ self.send(k) >= v
22
+ else
23
+ true
24
+ end
25
+ end
26
+ end
27
+
28
+ def fail_filters?(filter)
29
+ !pass_filters?(filter)
30
+ end
31
+
32
+ end
33
+