mspire 0.4.9 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (255) hide show
  1. data/README +27 -17
  2. data/changelog.txt +31 -62
  3. data/lib/ms/calc.rb +32 -0
  4. data/lib/ms/data/interleaved.rb +60 -0
  5. data/lib/ms/data/lazy_io.rb +73 -0
  6. data/lib/ms/data/lazy_string.rb +15 -0
  7. data/lib/ms/data/simple.rb +59 -0
  8. data/lib/ms/data/transposed.rb +41 -0
  9. data/lib/ms/data.rb +57 -0
  10. data/lib/ms/format/format_error.rb +12 -0
  11. data/lib/ms/spectrum.rb +25 -384
  12. data/lib/ms/support/binary_search.rb +126 -0
  13. data/lib/ms.rb +10 -10
  14. metadata +38 -350
  15. data/INSTALL +0 -58
  16. data/README.rdoc +0 -18
  17. data/Rakefile +0 -330
  18. data/bin/aafreqs.rb +0 -23
  19. data/bin/bioworks2excel.rb +0 -14
  20. data/bin/bioworks_to_pepxml.rb +0 -148
  21. data/bin/bioworks_to_pepxml_gui.rb +0 -225
  22. data/bin/fasta_shaker.rb +0 -5
  23. data/bin/filter_and_validate.rb +0 -5
  24. data/bin/gi2annot.rb +0 -14
  25. data/bin/id_class_anal.rb +0 -112
  26. data/bin/id_precision.rb +0 -172
  27. data/bin/ms_to_lmat.rb +0 -67
  28. data/bin/pepproph_filter.rb +0 -16
  29. data/bin/prob_validate.rb +0 -6
  30. data/bin/protein_summary.rb +0 -6
  31. data/bin/protxml2prots_peps.rb +0 -32
  32. data/bin/raw_to_mzXML.rb +0 -55
  33. data/bin/run_percolator.rb +0 -122
  34. data/bin/sqt_group.rb +0 -26
  35. data/bin/srf_group.rb +0 -27
  36. data/bin/srf_to_sqt.rb +0 -40
  37. data/lib/align/chams.rb +0 -78
  38. data/lib/align.rb +0 -154
  39. data/lib/archive/targz.rb +0 -94
  40. data/lib/bsearch.rb +0 -120
  41. data/lib/core_extensions.rb +0 -16
  42. data/lib/fasta.rb +0 -626
  43. data/lib/gi.rb +0 -124
  44. data/lib/group_by.rb +0 -10
  45. data/lib/index_by.rb +0 -11
  46. data/lib/merge_deep.rb +0 -21
  47. data/lib/ms/converter/mzxml.rb +0 -77
  48. data/lib/ms/gradient_program.rb +0 -170
  49. data/lib/ms/msrun.rb +0 -244
  50. data/lib/ms/msrun_index.rb +0 -108
  51. data/lib/ms/parser/mzdata/axml.rb +0 -67
  52. data/lib/ms/parser/mzdata/dom.rb +0 -175
  53. data/lib/ms/parser/mzdata/libxml.rb +0 -7
  54. data/lib/ms/parser/mzdata.rb +0 -31
  55. data/lib/ms/parser/mzxml/axml.rb +0 -70
  56. data/lib/ms/parser/mzxml/dom.rb +0 -182
  57. data/lib/ms/parser/mzxml/hpricot.rb +0 -253
  58. data/lib/ms/parser/mzxml/libxml.rb +0 -19
  59. data/lib/ms/parser/mzxml/regexp.rb +0 -122
  60. data/lib/ms/parser/mzxml/rexml.rb +0 -72
  61. data/lib/ms/parser/mzxml/xmlparser.rb +0 -248
  62. data/lib/ms/parser/mzxml.rb +0 -282
  63. data/lib/ms/parser.rb +0 -108
  64. data/lib/ms/precursor.rb +0 -25
  65. data/lib/ms/scan.rb +0 -81
  66. data/lib/mspire.rb +0 -4
  67. data/lib/pi_zero.rb +0 -244
  68. data/lib/qvalue.rb +0 -161
  69. data/lib/roc.rb +0 -187
  70. data/lib/sample_enzyme.rb +0 -160
  71. data/lib/scan_i.rb +0 -21
  72. data/lib/spec_id/aa_freqs.rb +0 -170
  73. data/lib/spec_id/bioworks.rb +0 -497
  74. data/lib/spec_id/digestor.rb +0 -138
  75. data/lib/spec_id/mass.rb +0 -179
  76. data/lib/spec_id/parser/proph.rb +0 -335
  77. data/lib/spec_id/precision/filter/cmdline.rb +0 -218
  78. data/lib/spec_id/precision/filter/interactive.rb +0 -134
  79. data/lib/spec_id/precision/filter/output.rb +0 -148
  80. data/lib/spec_id/precision/filter.rb +0 -637
  81. data/lib/spec_id/precision/output.rb +0 -60
  82. data/lib/spec_id/precision/prob/cmdline.rb +0 -160
  83. data/lib/spec_id/precision/prob/output.rb +0 -94
  84. data/lib/spec_id/precision/prob.rb +0 -249
  85. data/lib/spec_id/proph/pep_summary.rb +0 -104
  86. data/lib/spec_id/proph/prot_summary.rb +0 -484
  87. data/lib/spec_id/proph.rb +0 -4
  88. data/lib/spec_id/protein_summary.rb +0 -489
  89. data/lib/spec_id/sequest/params.rb +0 -316
  90. data/lib/spec_id/sequest/pepxml.rb +0 -1458
  91. data/lib/spec_id/sequest.rb +0 -33
  92. data/lib/spec_id/sqt.rb +0 -349
  93. data/lib/spec_id/srf.rb +0 -973
  94. data/lib/spec_id.rb +0 -778
  95. data/lib/spec_id_xml.rb +0 -99
  96. data/lib/transmem/phobius.rb +0 -147
  97. data/lib/transmem/toppred.rb +0 -368
  98. data/lib/transmem.rb +0 -157
  99. data/lib/validator/aa.rb +0 -48
  100. data/lib/validator/aa_est.rb +0 -112
  101. data/lib/validator/background.rb +0 -77
  102. data/lib/validator/bias.rb +0 -95
  103. data/lib/validator/cmdline.rb +0 -431
  104. data/lib/validator/decoy.rb +0 -107
  105. data/lib/validator/digestion_based.rb +0 -70
  106. data/lib/validator/probability.rb +0 -51
  107. data/lib/validator/prot_from_pep.rb +0 -234
  108. data/lib/validator/q_value.rb +0 -32
  109. data/lib/validator/transmem.rb +0 -272
  110. data/lib/validator/true_pos.rb +0 -46
  111. data/lib/validator.rb +0 -197
  112. data/lib/xml.rb +0 -38
  113. data/lib/xml_style_parser.rb +0 -119
  114. data/lib/xmlparser_wrapper.rb +0 -19
  115. data/release_notes.txt +0 -2
  116. data/script/compile_and_plot_smriti_final.rb +0 -97
  117. data/script/create_little_pepxml.rb +0 -61
  118. data/script/degenerate_peptides.rb +0 -47
  119. data/script/estimate_fpr_by_cysteine.rb +0 -226
  120. data/script/extract_gradient_programs.rb +0 -56
  121. data/script/find_cysteine_background.rb +0 -137
  122. data/script/genuine_tps_and_probs.rb +0 -136
  123. data/script/get_apex_values_rexml.rb +0 -44
  124. data/script/histogram_probs.rb +0 -61
  125. data/script/mascot_fix_pepxml.rb +0 -123
  126. data/script/msvis.rb +0 -42
  127. data/script/mzXML2timeIndex.rb +0 -25
  128. data/script/peps_per_bin.rb +0 -67
  129. data/script/prep_dir.rb +0 -121
  130. data/script/simple_protein_digestion.rb +0 -27
  131. data/script/smriti_final_analysis.rb +0 -103
  132. data/script/sqt_to_meta.rb +0 -24
  133. data/script/top_hit_per_scan.rb +0 -67
  134. data/script/toppred_to_yaml.rb +0 -47
  135. data/script/tpp_installer.rb +0 -249
  136. data/specs/align_spec.rb +0 -79
  137. data/specs/bin/bioworks_to_pepxml_spec.rb +0 -79
  138. data/specs/bin/fasta_shaker_spec.rb +0 -259
  139. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +0 -199
  140. data/specs/bin/filter_and_validate_spec.rb +0 -180
  141. data/specs/bin/ms_to_lmat_spec.rb +0 -34
  142. data/specs/bin/prob_validate_spec.rb +0 -86
  143. data/specs/bin/protein_summary_spec.rb +0 -14
  144. data/specs/fasta_spec.rb +0 -354
  145. data/specs/gi_spec.rb +0 -22
  146. data/specs/load_bin_path.rb +0 -7
  147. data/specs/merge_deep_spec.rb +0 -13
  148. data/specs/ms/gradient_program_spec.rb +0 -77
  149. data/specs/ms/msrun_spec.rb +0 -498
  150. data/specs/ms/parser_spec.rb +0 -92
  151. data/specs/ms/spectrum_spec.rb +0 -87
  152. data/specs/pi_zero_spec.rb +0 -115
  153. data/specs/qvalue_spec.rb +0 -39
  154. data/specs/roc_spec.rb +0 -251
  155. data/specs/rspec_autotest.rb +0 -149
  156. data/specs/sample_enzyme_spec.rb +0 -126
  157. data/specs/spec_helper.rb +0 -135
  158. data/specs/spec_id/aa_freqs_spec.rb +0 -52
  159. data/specs/spec_id/bioworks_spec.rb +0 -148
  160. data/specs/spec_id/digestor_spec.rb +0 -75
  161. data/specs/spec_id/precision/filter/cmdline_spec.rb +0 -20
  162. data/specs/spec_id/precision/filter/output_spec.rb +0 -31
  163. data/specs/spec_id/precision/filter_spec.rb +0 -246
  164. data/specs/spec_id/precision/prob_spec.rb +0 -44
  165. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  166. data/specs/spec_id/proph/pep_summary_spec.rb +0 -98
  167. data/specs/spec_id/proph/prot_summary_spec.rb +0 -128
  168. data/specs/spec_id/protein_summary_spec.rb +0 -189
  169. data/specs/spec_id/sequest/params_spec.rb +0 -68
  170. data/specs/spec_id/sequest/pepxml_spec.rb +0 -374
  171. data/specs/spec_id/sequest_spec.rb +0 -38
  172. data/specs/spec_id/sqt_spec.rb +0 -246
  173. data/specs/spec_id/srf_spec.rb +0 -172
  174. data/specs/spec_id/srf_spec_helper.rb +0 -139
  175. data/specs/spec_id_helper.rb +0 -33
  176. data/specs/spec_id_spec.rb +0 -366
  177. data/specs/spec_id_xml_spec.rb +0 -33
  178. data/specs/transmem/phobius_spec.rb +0 -425
  179. data/specs/transmem/toppred_spec.rb +0 -298
  180. data/specs/transmem_spec.rb +0 -60
  181. data/specs/transmem_spec_shared.rb +0 -64
  182. data/specs/validator/aa_est_spec.rb +0 -66
  183. data/specs/validator/aa_spec.rb +0 -40
  184. data/specs/validator/background_spec.rb +0 -67
  185. data/specs/validator/bias_spec.rb +0 -122
  186. data/specs/validator/decoy_spec.rb +0 -51
  187. data/specs/validator/fasta_helper.rb +0 -26
  188. data/specs/validator/prot_from_pep_spec.rb +0 -141
  189. data/specs/validator/transmem_spec.rb +0 -146
  190. data/specs/validator/true_pos_spec.rb +0 -58
  191. data/specs/validator_helper.rb +0 -33
  192. data/specs/xml_spec.rb +0 -12
  193. data/test_files/000_pepxml18_small.xml +0 -206
  194. data/test_files/020a.mzXML.timeIndex +0 -4710
  195. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +0 -3973
  196. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +0 -3872
  197. data/test_files/4-03-03_small-prot.xml +0 -321
  198. data/test_files/4-03-03_small.xml +0 -3876
  199. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  200. data/test_files/bioworks-3.3_10prots.xml +0 -5999
  201. data/test_files/bioworks31.params +0 -77
  202. data/test_files/bioworks32.params +0 -62
  203. data/test_files/bioworks33.params +0 -63
  204. data/test_files/bioworks_single_run_small.xml +0 -7237
  205. data/test_files/bioworks_small.fasta +0 -212
  206. data/test_files/bioworks_small.params +0 -63
  207. data/test_files/bioworks_small.phobius +0 -109
  208. data/test_files/bioworks_small.toppred.out +0 -2847
  209. data/test_files/bioworks_small.xml +0 -5610
  210. data/test_files/bioworks_with_INV_small.xml +0 -3753
  211. data/test_files/bioworks_with_SHUFF_small.xml +0 -2503
  212. data/test_files/corrupted_900.srf +0 -0
  213. data/test_files/head_of_7MIX.srf +0 -0
  214. data/test_files/interact-opd1_mods_small-prot.xml +0 -304
  215. data/test_files/messups.fasta +0 -297
  216. data/test_files/opd1/000.my_answer.100lines.xml +0 -101
  217. data/test_files/opd1/000.tpp_1.2.3.first10.xml +0 -115
  218. data/test_files/opd1/000.tpp_2.9.2.first10.xml +0 -126
  219. data/test_files/opd1/000.v2.1.mzXML.timeIndex +0 -3748
  220. data/test_files/opd1/000_020-prot.png +0 -0
  221. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +0 -62
  222. data/test_files/opd1/000_020_3prots-prot.xml +0 -62
  223. data/test_files/opd1/opd1_cat_inv_small-prot.xml +0 -139
  224. data/test_files/opd1/sequest.3.1.params +0 -77
  225. data/test_files/opd1/sequest.3.2.params +0 -62
  226. data/test_files/opd1/twenty_scans.mzXML +0 -418
  227. data/test_files/opd1/twenty_scans.v2.1.mzXML +0 -382
  228. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  229. data/test_files/opd1/twenty_scans_answ.lmata +0 -9
  230. data/test_files/opd1_020_beginning.RAW +0 -0
  231. data/test_files/opd1_2runs_2mods/data/020.mzData.xml +0 -683
  232. data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +0 -382
  233. data/test_files/opd1_2runs_2mods/data/040.mzData.xml +0 -683
  234. data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +0 -382
  235. data/test_files/opd1_2runs_2mods/data/README.txt +0 -6
  236. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +0 -753
  237. data/test_files/orbitrap_mzData/000_cut.xml +0 -1920
  238. data/test_files/pepproph_small.xml +0 -4691
  239. data/test_files/phobius.small.noheader.txt +0 -50
  240. data/test_files/phobius.small.small.txt +0 -53
  241. data/test_files/s01_anC1_ld020mM.key.txt +0 -25
  242. data/test_files/s01_anC1_ld020mM.meth +0 -0
  243. data/test_files/small.fasta +0 -297
  244. data/test_files/small.sqt +0 -87
  245. data/test_files/smallraw.RAW +0 -0
  246. data/test_files/tf_bioworks2excel.bioXML +0 -14340
  247. data/test_files/tf_bioworks2excel.txt.actual +0 -1035
  248. data/test_files/toppred.small.out +0 -416
  249. data/test_files/toppred.xml.out +0 -318
  250. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +0 -7
  251. data/test_files/validator_hits_separate/bioworks_small_HS.xml +0 -5651
  252. data/test_files/yeast_gly_small-prot.xml +0 -265
  253. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +0 -6
  254. data/test_files/yeast_gly_small.xml +0 -3807
  255. data/test_files/yeast_gly_small2.parentTimes +0 -6
@@ -1,246 +0,0 @@
1
- require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
2
-
3
- require 'spec_id/sqt'
4
- require 'spec_id/srf'
5
-
6
- SpecHelperHeaderHash = {
7
- 'SQTGenerator' => 'mspire',
8
- 'SQTGeneratorVersion' => String,
9
- 'Database' => 'C:\\Xcalibur\\database\\ecoli_K12_ncbi_20060321.fasta',
10
- 'FragmentMasses' => 'AVG',
11
- 'PrecursorMasses' => 'AVG',
12
- 'StartTime' => nil,
13
- 'Alg-MSModel' => 'LCQ Deca XP',
14
- 'Alg-PreMassUnits' => 'amu',
15
- 'DBLocusCount' => '4237',
16
- 'Alg-FragMassTol' => '1.0000',
17
- 'Alg-PreMassTol' => '1.4000',
18
- 'Alg-IonSeries' => '0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0',
19
- 'Alg-Enzyme' => 'Trypsin(KR/P) (2)',
20
- 'Comment' => ['Created from Bioworks .srf file'],
21
- 'StaticMod' => ['C=160.1901','Cterm=10.1230','E=161.4455'],
22
- 'DynamicMod' => ['STY*=+79.97990', 'M#=+14.02660'],
23
- }
24
-
25
- SpecHelperOtherLines =<<END
26
- S 2 2 1 0.0 VELA 391.04541015625 3021.5419921875 0.0 0
27
- S 3 3 1 0.0 VELA 446.009033203125 1743.96911621094 0.0 122
28
- M 1 1 445.5769264522 0.0 0.245620265603065 16.6666660308838 1 6 R.SNSK.S U
29
- L gi|16128266|ref|NP_414815.1|
30
- END
31
-
32
- SpecHelperOtherLinesEnd =<<END
33
- L gi|90111093|ref|NP_414704.4|
34
- M 10 17 1298.5350544522 0.235343858599663 0.823222815990448 151.717300415039 12 54 K.LQKIITNSY*K U
35
- L gi|90111124|ref|NP_414904.2|
36
- END
37
-
38
- describe 'converting a large srf to sqt' do
39
- def del(file)
40
- if File.exist?(file)
41
- File.unlink(file)
42
- end
43
- end
44
-
45
- # returns true or false
46
- def header_hash_match(header_lines, hash)
47
- header_lines.all? do |line|
48
- (h, k, v) = line.chomp.split("\t")
49
- if hash[k].is_a? Array
50
- if hash[k].include?(v)
51
- true
52
- else
53
- puts "FAILED: "
54
- p k
55
- p v
56
- p hash[k]
57
- false
58
- end
59
- elsif hash[k] == String
60
- v.is_a?(String)
61
- else
62
- if v == hash[k]
63
- true
64
- else
65
- puts "FAILED: "
66
- p k
67
- p v
68
- p hash[k]
69
- false
70
- end
71
- end
72
- end
73
- end
74
-
75
- spec_large do
76
- before(:all) do
77
- @file = Tfiles_l + '/opd1_static_diff_mods/000.srf'
78
- @output = Tfiles_l + '/opd1_static_diff_mods/000.sqt.tmp'
79
- @srf = SRF.new(@file)
80
- @original_db_filename = @srf.header.db_filename
81
- end
82
- it 'converts without bothering with the database' do
83
- @srf.to_sqt(@output)
84
- @output.exist_as_a_file?.should be_true
85
- lines = File.readlines(@output)
86
- lines.size.should == 80910
87
- header_lines = lines.grep(/^H/)
88
- (header_lines.size > 10).should be_true
89
- header_hash_match(header_lines, SpecHelperHeaderHash).should be_true
90
- other_lines = lines.grep(/^[^H]/)
91
- other_lines[0,4].join('').should == SpecHelperOtherLines
92
- other_lines[-3,3].join('').should == SpecHelperOtherLinesEnd
93
- del(@output)
94
- end
95
- it 'warns if the db path is incorrect and we want to update db info' do
96
- # requires some knowledge of how the database file is extracted
97
- # internally
98
- wacky_path = '/not/a/real/path/wacky.fasta'
99
- @srf.header.db_filename = wacky_path
100
- my_error_string = ''
101
- StringIO.open(my_error_string, 'w') do |strio|
102
- $stderr = strio
103
- @srf.to_sqt(@output, :db_info => true)
104
- end
105
- my_error_string.should include(wacky_path)
106
- @srf.header.db_filename = @original_db_filename
107
- $stderr = STDERR
108
- @output.exist_as_a_file?.should be_true
109
- IO.readlines(@output).size.should == 80910
110
- del(@output)
111
- end
112
- it 'can get db info with correct path' do
113
- @srf.to_sqt(@output, :db_info => true, :new_db_path => Tfiles_l + '/opd1_2runs_2mods/sequest33')
114
- @output.exist_as_a_file?.should be_true
115
- lines = IO.readlines(@output)
116
- has_md5 = lines.any? do |line|
117
- line =~ /DBMD5Sum\s+202b1d95e91f2da30191174a7f13a04e/
118
- end
119
- has_md5.should be_true
120
-
121
- has_seq_len = lines.any? do |line|
122
- # frozen
123
- line =~ /DBSeqLength\s+1342842/
124
- end
125
- has_seq_len.should be_true
126
- lines.size.should == 80912
127
- del(@output)
128
- end
129
- it 'can update the Database' do
130
- @srf.to_sqt(@output, :new_db_path => Tfiles_l + '/opd1_2runs_2mods/sequest33', :update_db_path => true)
131
- regexp = Regexp.new("Database\t/.*/opd1_2runs_2mods/sequest33/ecoli_K12_ncbi_20060321.fasta")
132
- updated_db = IO.readlines(@output).any? do |line|
133
- line =~ regexp
134
- end
135
- updated_db.should be_true
136
- del(@output)
137
- end
138
- end
139
- end
140
-
141
- HeaderHash = {}
142
- header_doublets = [
143
- %w(SQTGenerator mspire),
144
- %w(SQTGeneratorVersion 0.3.1),
145
- %w(Database C:\Xcalibur\database\ecoli_K12_ncbi_20060321.fasta),
146
- %w(FragmentMasses AVG),
147
- %w(PrecursorMasses AVG),
148
- ['StartTime', ''],
149
- ['Alg-MSModel', 'LCQ Deca XP'],
150
- %w(DBLocusCount 4237),
151
- %w(Alg-FragMassTol 1.0000),
152
- %w(Alg-PreMassTol 25.0000),
153
- ['Alg-IonSeries', '0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0'],
154
- %w(Alg-PreMassUnits ppm),
155
- ['Alg-Enzyme', 'Trypsin(KR/P) (2)'],
156
-
157
- ['Comment', ['ultra small file created for testing', 'Created from Bioworks .srf file']],
158
- ['DynamicMod', ['M*=+15.99940', 'STY#=+79.97990']],
159
- ['StaticMod', []],
160
- ].each do |double|
161
- HeaderHash[double[0]] = double[1]
162
- end
163
-
164
- TestSpectra = {
165
- :first => { :first_scan=>2, :last_scan=>2, :charge=>1, :time_to_process=>0.0, :node=>"TESLA", :mh=>390.92919921875, :total_intensity=>2653.90307617188, :lowest_sp=>0.0, :num_matched_peptides=>0, :matches=>[]},
166
- :last => { :first_scan=>27, :last_scan=>27, :charge=>1, :time_to_process=>0.0, :node=>"TESLA", :mh=>393.008056640625, :total_intensity=>2896.16967773438, :lowest_sp=>0.0, :num_matched_peptides=>0, :matches=>[] },
167
- :seventeenth => {:first_scan=>23, :last_scan=>23, :charge=>1, :time_to_process=>0.0, :node=>"TESLA", :mh=>1022.10571289062, :total_intensity=>3637.86059570312, :lowest_sp=>0.0, :num_matched_peptides=>41},
168
- :first_match_17 => { :rxcorr=>1, :rsp=>5, :mh=>1022.11662242, :deltacn_orig=>0.0, :xcorr=>0.725152492523193, :sp=>73.9527359008789, :ions_matched=>6, :ions_total=>24, :sequence=>"-.MGT#TTM*GVK.L", :manual_validation_status=>"U", :first_scan=>23, :last_scan=>23, :charge=>1, :deltacn=>0.0672458708286285, :aaseq => 'MGTTTMGVK' },
169
- :last_match_17 => {:rxcorr=>10, :rsp=>16, :mh=>1022.09807242, :deltacn_orig=>0.398330867290497, :xcorr=>0.436301857233047, :sp=>49.735767364502, :ions_matched=>5, :ions_total=>21, :sequence=>"-.MRT#TSFAK.V", :manual_validation_status=>"U", :first_scan=>23, :last_scan=>23, :charge=>1, :deltacn=>1.1, :aaseq => 'MRTTSFAK'},
170
- :last_match_17_last_loci => {:reference =>'gi|16129390|ref|NP_415948.1|', :first_entry =>'gi|16129390|ref|NP_415948.1|', :locus =>'gi|16129390|ref|NP_415948.1|', :description => 'Fake description' }
171
- }
172
-
173
-
174
- describe SQT, ": reading a small sqt file" do
175
- before(:each) do
176
- file = Tfiles + '/small.sqt'
177
- file.exist_as_a_file?.should be_true
178
- @sqt = SQT.new(file)
179
- end
180
-
181
- it 'can access header entries like a hash' do
182
- header = @sqt.header
183
- HeaderHash.each do |k,v|
184
- header[k].should == v
185
- end
186
- end
187
-
188
- it 'can access header entries with methods' do
189
- header = @sqt.header
190
- # for example:
191
- header.database.should == HeaderHash['Database']
192
- # all working:
193
- HeaderHash.each do |k,v|
194
- header.send(SQT::Header::KeysToAtts[k]).should == v
195
- end
196
-
197
- end
198
-
199
- it 'has spectra, matches, and loci' do
200
- svt = @sqt.spectra[16]
201
- reply = {:first => @sqt.spectra.first, :last => @sqt.spectra.last, :seventeenth => svt, :first_match_17 => svt.matches.first, :last_match_17 => svt.matches.last, :last_match_17_last_loci => svt.matches.last.loci.last}
202
- [:first, :last, :seventeenth, :first_match_17, :last_match_17, :last_match_17_last_loci].each do |key|
203
- TestSpectra[key].each do |k,v|
204
- if v.is_a? Float
205
- reply[key].send(k).should be_close(v, 0.0000000001)
206
- else
207
- reply[key].send(k).should == v
208
- end
209
- end
210
- end
211
- @sqt.spectra[16].matches.first.loci.size.should == 1
212
- @sqt.spectra[16].matches.last.loci.size.should == 1
213
- end
214
-
215
- end
216
-
217
- describe SQTGroup, ': acting as a SpecID on large files' do
218
- spec_large do
219
- before(:each) do
220
- file1 = Tfiles_l + '/opd1_2runs_2mods/sequest33/020.sqt'
221
- file2 = Tfiles_l + '/opd1_2runs_2mods/sequest33/040.sqt'
222
- file1.exist_as_a_file?.should be_true
223
- file2.exist_as_a_file?.should be_true
224
- @sqg = SQTGroup.new([file1, file2])
225
- end
226
-
227
- it 'has peptide hits' do
228
- peps = @sqg.peps
229
- peps.size.should == 38512 # frozen
230
- # first hit in 020
231
- peps.first.sequence.should == 'R.Y#RLGGS#T#K.K'
232
- peps.first.base_name.should == '020'
233
- # last hit in 040
234
- peps.last.sequence.should == 'K.NQTNNRFK.T'
235
- peps.last.base_name.should == '040'
236
- end
237
-
238
- it 'has prots' do
239
- ## FROZEN:
240
- @sqg.prots.size.should == 3994
241
- sorted = @sqg.prots.sort_by {|v| v.reference }
242
- sorted.first.reference.should == 'gi|16127996|ref|NP_414543.1|'
243
- sorted.first.peps.size.should == 33
244
- end
245
- end
246
- end
@@ -1,172 +0,0 @@
1
-
2
- require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
3
- require File.expand_path( File.dirname(__FILE__) + '/srf_spec_helper' )
4
- require 'spec_id/srf'
5
-
6
- require 'fileutils'
7
-
8
- include SRFHelper
9
-
10
- #tfiles = File.dirname(__FILE__) + '/tfiles/'
11
- #tfiles_l = File.dirname(__FILE__) + '/tfiles_large/'
12
- #tf_srf = tfiles_l + "7MIX_STD_110802_1.srf"
13
- #tf_srf_inv = tfiles_l + "7MIX_STD_110802_1_INV.srf"
14
- #if File.exist? tfiles_l
15
- # start = Time.now
16
- # $group = SRFGroup.new([tf_srf, tf_srf_inv])
17
- # $srf = $group.srfs.first
18
- # puts "Time to read and compile two SRF: #{Time.now - start} secs"
19
- #end
20
-
21
- class Hash
22
- def object_match(obj)
23
- self.all? do |k,v|
24
- k = k.to_sym
25
- retval =
26
- if k == :peaks or k == :hits or k == :prots
27
- obj.send(k).size == v
28
- elsif v.class == Float
29
- delta =
30
- if k == :ppm ; 0.0001
31
- else ; 0.0000001
32
- end
33
- (v - obj.send(k)).abs <= delta
34
- else
35
- obj.send(k) == v
36
- end
37
- if retval == false
38
- puts "BAD KEY: #{k}"
39
- puts "need: #{v}"
40
- puts "got: #{obj.send(k)}"
41
- end
42
- retval
43
- end
44
- end
45
- end
46
-
47
- klass = SRF
48
-
49
- describe 'an srf reader', :shared => true do
50
- before(:all) do
51
- @srf_obj = klass.new(@file)
52
- end
53
-
54
- it 'retrieves correct header info' do
55
- @header.object_match(@srf_obj.header).should be_true
56
- @dta_gen.object_match(@srf_obj.header.dta_gen).should be_true
57
- end
58
-
59
- # a few more dta params could be added in here:
60
- it 'retrieves correct dta files' do
61
- @dta_files_first.object_match(@srf_obj.dta_files.first).should be_true
62
- @dta_files_last.object_match(@srf_obj.dta_files.last).should be_true
63
- end
64
-
65
- # given an array of out_file objects, returns the first set of hits
66
- def get_first_peps(out_files)
67
- out_files.each do |outf|
68
- if outf.num_hits > 0
69
- return outf.hits
70
- end
71
- end
72
- return nil
73
- end
74
-
75
- it 'retrieves correct out files' do
76
- @out_files_first.object_match(@srf_obj.out_files.first).should be_true
77
- @out_files_last.object_match(@srf_obj.out_files.last).should be_true
78
- # first available peptide hit
79
- @out_files_first_pep.object_match(get_first_peps(@srf_obj.out_files).first).should be_true
80
- # last available peptide hit
81
- @out_files_last_pep.object_match(get_first_peps(@srf_obj.out_files.reverse).last).should be_true
82
- end
83
-
84
- xit 'retrieves correct params' do
85
- @params.object_match(@srf_obj.params).should be_true
86
- end
87
-
88
- it_should 'retrieve probabilities if available'
89
- end
90
-
91
-
92
- Expected_hash_keys = %w(header dta_gen dta_files_first dta_files_last out_files_first out_files_last out_files_first_pep out_files_last_pep params)
93
-
94
- to_run = {
95
- '3.2' => {:hash => File_32, :file => '/opd1_2runs_2mods/sequest32/020.srf'},
96
- '3.3' => {:hash => File_33, :file => '/opd1_2runs_2mods/sequest33/020.srf'},
97
- '3.3.1' => {:hash => File_331, :file => '/opd1_2runs_2mods/sequest331/020.srf'},
98
- }
99
-
100
- to_run.each do |version,info|
101
- describe klass, " reading a version #{version} .srf file" do
102
- spec_large do
103
- before(:all) do
104
- @file = Tfiles_l + info[:file]
105
- Expected_hash_keys.each do |c|
106
- instance_variable_set("@#{c}", info[:hash][c.to_sym])
107
- end
108
- end
109
- it_should_behave_like "an srf reader"
110
- end
111
- end
112
- end
113
-
114
-
115
- describe klass, " reading a corrupted file" do
116
- it 'should read a null file from an aborted run w/o failing (but gives error msg)' do
117
- file = Tfiles + '/corrupted_900.srf'
118
- error_msg = Tfiles + '/error_msg.tmp'
119
- File.open(error_msg, 'w') do |err_fh|
120
- $stderr = err_fh
121
- srf_obj = klass.new(file)
122
- srf_obj.base_name.should == '900'
123
- srf_obj.params.should be_nil
124
- header = srf_obj.header
125
- header.db_filename.should == "C:\\Xcalibur\\database\\sf_hs_44_36f_longesttrpt.fasta.hdr"
126
- header.enzyme.should == 'Enzyme:Trypsin(KR) (2)'
127
- dta_gen = header.dta_gen
128
- dta_gen.start_time.should be_close(1.39999997615814, 0.00000000001)
129
- srf_obj.dta_files.should == []
130
- srf_obj.out_files.should == []
131
- end
132
- IO.read(error_msg).should =~ /corrupted_900\.srf/
133
- File.unlink error_msg
134
- end
135
- end
136
-
137
- describe SRFGroup, 'creating an srg file' do
138
- it 'creates one given some non-existing, relative filenames' do
139
- ## TEST SRG GROUPING:
140
- filenames = %w(my/lucky/filename /another/filename)
141
- @srg = SRFGroup.new
142
- @srg.filenames = filenames
143
- srg_file = Tfiles + '/tmp_srg_file.srg'
144
- @srg.to_srg(srg_file)
145
- File.exist?(srg_file).should be_true
146
- File.unlink(srg_file)
147
- end
148
- end
149
-
150
-
151
- # @TODO: this test needs to be created for a small mock dataset!!
152
- describe SRF, 'creating dta files' do
153
- spec_large do
154
- before(:all) do
155
- file = Tfiles_l + '/opd1_2runs_2mods/sequest33/020.srf'
156
- @srf = SRF.new(file)
157
- end
158
-
159
- it 'creates dta files' do
160
- @srf.to_dta_files
161
- File.exist?('020').should be_true
162
- File.directory?('020').should be_true
163
- File.exist?('020/020.3366.3366.2.dta').should be_true
164
- lines = IO.readlines('020/020.3366.3366.2.dta', "\r\n")
165
- lines.first.should == "1113.106493 2\r\n"
166
- lines[1].should == "164.5659 4817\r\n"
167
-
168
- FileUtils.rm_rf '020'
169
- end
170
- end
171
-
172
- end
@@ -1,139 +0,0 @@
1
- module SRFHelper
2
-
3
- File_32 = {
4
- :header =>
5
- {
6
- :params_filename=>"C:\\Xcalibur\\sequest\\john\\opd1_2runs_2mods\\ecoli.params",
7
- :raw_filename=>"C:\\Xcalibur\\data\\john\\opd00001\\020.RAW",
8
- :modifications=>"(M* +15.99940) (STY# +79.97990)",
9
- :sequest_log_filename=>"C:\\Xcalibur\\sequest\\john\\opd1_2runs_2mods\\020_sequest.log",
10
- :ion_series=>"ion series nABY ABCDVWXYZ: 0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0",
11
- :db_filename=>"C:\\Xcalibur\\database\\ecoli_K12_ncbi_20060321.fasta",
12
- :enzyme=>"Enzyme:Trypsin(KR/P) (2)",
13
- :version=>"3.2",
14
- :model=>"LCQ Deca XP",
15
- :dta_log_filename=>"C:\\Xcalibur\\sequest\\john\\opd1_2runs_2mods\\020_dta.log"
16
- },
17
- :dta_gen => {
18
- :min_group_count => 1,
19
- :start_time => 1.5,
20
- :start_mass => 300.0,
21
- :end_scan => 3620,
22
- :group_scan => 1,
23
- :start_scan => 1,
24
- :num_dta_files => 3747,
25
- :min_ion_threshold => 15,
26
- :end_mass => 4500.0,
27
- },
28
- :dta_files_first => {
29
-
30
- :mh=>390.92919921875,
31
- :dta_tic=>9041311.0,
32
- :num_peaks=>48,
33
- :charge=>1,
34
- :ms_level=>2,
35
- :total_num_possible_charge_states=>0,
36
- },
37
- :dta_files_last => {
38
- :dta_tic=>842424.0,
39
- :mh=>357.041198730469,
40
- :num_peaks=>78,
41
- :ms_level=>2,
42
- :charge=>1,
43
- :total_num_possible_charge_states=>0,
44
- },
45
- :out_files_first => {
46
- :num_hits => 0,
47
- :computer => 'VELA',
48
- :date_time => '05/06/2008, 02:08 PM,',
49
- :hits => 0,
50
- },
51
- :out_files_last => {
52
- :num_hits => 0,
53
- :computer => 'VELA',
54
- :date_time => '05/06/2008, 02:11 PM,',
55
- :hits => 0,
56
- },
57
- :out_files_first_pep => {
58
- :aaseq=>"YRLGGSTK",
59
- :sequence=>"R.Y#RLGGS#T#K.K",
60
- :mh=>1121.9390244522,
61
- :deltacn_orig=>0.0,
62
- :sp=>29.8529319763184,
63
- :xcorr=>0.123464643955231,
64
- :id=>2104,
65
- :rsp=>1,
66
- :ions_matched=>5,
67
- :ions_total=>35,
68
- :prots=>1,
69
- :deltamass=>-0.00579976654989878,
70
- :ppm=>5.16938660859491,
71
- :base_name=>"020",
72
- :first_scan=>3,
73
- :last_scan=>3,
74
- :charge=>1,
75
- :deltacn=>0.795928299427032,
76
- :base_name=>"020",
77
- },
78
- :out_files_last_pep =>
79
- {
80
- :aaseq=>"LLPGTARTMRR",
81
- :sequence=>"R.LLPGTARTMRR.M",
82
- :mh=>1272.5493424522,
83
- :deltacn_orig=>0.835508584976196,
84
- :deltacn=>1.1,
85
- :sp=>57.9885787963867,
86
- :xcorr=>0.109200321137905,
87
- :id=>1361,
88
- :rsp=>11,
89
- :ions_matched=>6,
90
- :ions_total=>40,
91
- :prots=>1,
92
- :deltamass=>0.00243330985608736,
93
- :ppm=>1.91215729542523,
94
- :base_name=>"020",
95
- :first_scan=>3619,
96
- :last_scan=>3619,
97
- :charge=>3,
98
- :deltacn=>1.1,
99
- :base_name=>"020",
100
- },
101
-
102
- :params => {
103
- "add_O_Ornithine"=>"0.0000", "add_F_Phenylalanine"=>"0.0000", "add_A_Alanine"=>"0.0000", "add_C_Cysteine"=>"0.0000", "add_Y_Tyrosine"=>"0.0000", "add_X_LorI"=>"0.0000", "add_J_user_amino_acid"=>"0.0000", "add_Cterm_peptide"=>"0.0000", "add_S_Serine"=>"0.0000", "add_Nterm_protein"=>"0.0000", "add_D_Aspartic_Acid"=>"0.0000", "add_Q_Glutamine"=>"0.0000", "add_K_Lysine"=>"0.0000", "add_R_Arginine"=>"0.0000", "add_W_Tryptophan"=>"0.0000", "add_Nterm_peptide"=>"0.0000", "add_H_Histidine"=>"0.0000", "add_L_Leucine"=>"0.0000", "add_I_Isoleucine"=>"0.0000", "add_N_Asparagine"=>"0.0000", "add_B_avg_NandD"=>"0.0000", "add_Z_avg_QandE"=>"0.0000", "add_E_Glutamic_Acid"=>"0.0000", "add_G_Glycine"=>"0.0000", "add_P_Proline"=>"0.0000", "add_M_Methionine"=>"0.0000", "add_Cterm_protein"=>"0.0000", "add_V_Valine"=>"0.0000", "add_T_Threonine"=>"0.0000", "add_U_user_amino_acid"=>"0.0000", "match_peak_tolerance"=>"1.0000", "match_peak_allowed_error"=>"1", "normalize_xcorr"=>"0", "nucleotide_reading_frame"=>"0", "num_results"=>"250", "sequence_header_filter"=>"", "diff_search_options"=>"15.999400 M 79.979900 STY 0.000000 M 0.000000 X 0.000000 T 0.000000 Y", "partial_sequence"=>"", "max_num_internal_cleavage_sites"=>"2", "search_engine"=>"SEQUEST", "print_duplicate_references"=>"40", "ion_series"=>"0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0", "remove_precursor_peak"=>"0", "num_output_lines"=>"10", "second_database_name"=>"", "first_database_name"=>"C:\\Xcalibur\\database\\ecoli_K12_ncbi_20060321.fasta", "peptide_mass_tolerance"=>"25.0000", "digest_mass_range"=>"600.0 3500.0", "enzyme_info"=>"Trypsin(KR/P) 1 1 KR P", "show_fragment_ions"=>"0", "protein_mass_filter"=>"0 0", "term_diff_search_options"=>"0.000000 0.000000", "num_description_lines"=>"5", "fragment_ion_tolerance"=>"1.0000", "peptide_mass_units"=>"2", "mass_type_parent"=>"0", "match_peak_count"=>"0", "max_num_differential_per_peptide"=>"3", "ion_cutoff_percentage"=>"0.0000", "mass_type_fragment"=>"0"
104
- }
105
- }
106
-
107
- File_33 = {}
108
- File_32.each do |k,v|
109
- File_33[k] = v.dup
110
- end
111
-
112
- ## Bioworks 3.3 (srf version 3.3)
113
- File_33[:header][:raw_filename] = "C:\\Xcalibur\\data\\john\\021112-EcoliSol37-1\\020.RAW"
114
- File_33[:header][:version] = "3.3"
115
-
116
- File_33[:out_files_first][:computer] = 'TESLA'
117
- File_33[:out_files_first][:date_time] = '04/24/2007, 10:41 AM,'
118
- File_33[:out_files_last][:computer] = 'TESLA'
119
- File_33[:out_files_last][:date_time] = '04/24/2007, 10:42 AM,'
120
-
121
- File_33[:out_files_first_pep][:sp] = 29.8535556793213
122
- File_33[:out_files_last_pep][:sp] = 57.987476348877
123
- File_33[:out_files_last_pep][:rsp] = 10
124
- File_33[:out_files_last_pep][:deltacn_orig] = 0.835624694824219
125
-
126
-
127
- ## Bioworks 3.3.1 (srf version 3.5)
128
- File_331 = {}
129
- File_33.each do |k,v|
130
- File_331[k] = v.dup
131
- end
132
- File_331[:header][:raw_filename] = "C:\\Xcalibur\\data\\john\\opd1_2runs_2mods\\020.RAW"
133
- File_331[:header][:version] = "3.5"
134
- File_331[:out_files_first][:date_time] = '05/06/2008, 03:31 PM,'
135
- File_331[:out_files_last][:date_time] = '05/06/2008, 03:32 PM,'
136
-
137
- end
138
-
139
-
@@ -1,33 +0,0 @@
1
-
2
- module SpecID::Pep
3
-
4
- # filter must be a hash with these keys allowed:
5
- # :xcorr1, :xcorr2, :xcorr3, :deltacn, :ppm, :include_deltacnstar
6
- def pass_filters?(filter)
7
- filter.all? do |k,v|
8
- k_as_s = k.to_s
9
- if ((k_as_s[0...-1] == 'xcorr') and (k.to_s[-1,1].to_i == self.charge))
10
- charge = k.to_s[-1,1].to_i
11
- self.xcorr >= v
12
- elsif k_as_s == 'include_deltacnstar'
13
- if v == false
14
- self.deltacn <= 1.0
15
- else
16
- true
17
- end
18
- elsif k_as_s == 'ppm'
19
- self.send(k) <= v
20
- elsif k_as_s == 'deltacn'
21
- self.send(k) >= v
22
- else
23
- true
24
- end
25
- end
26
- end
27
-
28
- def fail_filters?(filter)
29
- !pass_filters?(filter)
30
- end
31
-
32
- end
33
-