mspire 0.4.9 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (255) hide show
  1. data/README +27 -17
  2. data/changelog.txt +31 -62
  3. data/lib/ms/calc.rb +32 -0
  4. data/lib/ms/data/interleaved.rb +60 -0
  5. data/lib/ms/data/lazy_io.rb +73 -0
  6. data/lib/ms/data/lazy_string.rb +15 -0
  7. data/lib/ms/data/simple.rb +59 -0
  8. data/lib/ms/data/transposed.rb +41 -0
  9. data/lib/ms/data.rb +57 -0
  10. data/lib/ms/format/format_error.rb +12 -0
  11. data/lib/ms/spectrum.rb +25 -384
  12. data/lib/ms/support/binary_search.rb +126 -0
  13. data/lib/ms.rb +10 -10
  14. metadata +38 -350
  15. data/INSTALL +0 -58
  16. data/README.rdoc +0 -18
  17. data/Rakefile +0 -330
  18. data/bin/aafreqs.rb +0 -23
  19. data/bin/bioworks2excel.rb +0 -14
  20. data/bin/bioworks_to_pepxml.rb +0 -148
  21. data/bin/bioworks_to_pepxml_gui.rb +0 -225
  22. data/bin/fasta_shaker.rb +0 -5
  23. data/bin/filter_and_validate.rb +0 -5
  24. data/bin/gi2annot.rb +0 -14
  25. data/bin/id_class_anal.rb +0 -112
  26. data/bin/id_precision.rb +0 -172
  27. data/bin/ms_to_lmat.rb +0 -67
  28. data/bin/pepproph_filter.rb +0 -16
  29. data/bin/prob_validate.rb +0 -6
  30. data/bin/protein_summary.rb +0 -6
  31. data/bin/protxml2prots_peps.rb +0 -32
  32. data/bin/raw_to_mzXML.rb +0 -55
  33. data/bin/run_percolator.rb +0 -122
  34. data/bin/sqt_group.rb +0 -26
  35. data/bin/srf_group.rb +0 -27
  36. data/bin/srf_to_sqt.rb +0 -40
  37. data/lib/align/chams.rb +0 -78
  38. data/lib/align.rb +0 -154
  39. data/lib/archive/targz.rb +0 -94
  40. data/lib/bsearch.rb +0 -120
  41. data/lib/core_extensions.rb +0 -16
  42. data/lib/fasta.rb +0 -626
  43. data/lib/gi.rb +0 -124
  44. data/lib/group_by.rb +0 -10
  45. data/lib/index_by.rb +0 -11
  46. data/lib/merge_deep.rb +0 -21
  47. data/lib/ms/converter/mzxml.rb +0 -77
  48. data/lib/ms/gradient_program.rb +0 -170
  49. data/lib/ms/msrun.rb +0 -244
  50. data/lib/ms/msrun_index.rb +0 -108
  51. data/lib/ms/parser/mzdata/axml.rb +0 -67
  52. data/lib/ms/parser/mzdata/dom.rb +0 -175
  53. data/lib/ms/parser/mzdata/libxml.rb +0 -7
  54. data/lib/ms/parser/mzdata.rb +0 -31
  55. data/lib/ms/parser/mzxml/axml.rb +0 -70
  56. data/lib/ms/parser/mzxml/dom.rb +0 -182
  57. data/lib/ms/parser/mzxml/hpricot.rb +0 -253
  58. data/lib/ms/parser/mzxml/libxml.rb +0 -19
  59. data/lib/ms/parser/mzxml/regexp.rb +0 -122
  60. data/lib/ms/parser/mzxml/rexml.rb +0 -72
  61. data/lib/ms/parser/mzxml/xmlparser.rb +0 -248
  62. data/lib/ms/parser/mzxml.rb +0 -282
  63. data/lib/ms/parser.rb +0 -108
  64. data/lib/ms/precursor.rb +0 -25
  65. data/lib/ms/scan.rb +0 -81
  66. data/lib/mspire.rb +0 -4
  67. data/lib/pi_zero.rb +0 -244
  68. data/lib/qvalue.rb +0 -161
  69. data/lib/roc.rb +0 -187
  70. data/lib/sample_enzyme.rb +0 -160
  71. data/lib/scan_i.rb +0 -21
  72. data/lib/spec_id/aa_freqs.rb +0 -170
  73. data/lib/spec_id/bioworks.rb +0 -497
  74. data/lib/spec_id/digestor.rb +0 -138
  75. data/lib/spec_id/mass.rb +0 -179
  76. data/lib/spec_id/parser/proph.rb +0 -335
  77. data/lib/spec_id/precision/filter/cmdline.rb +0 -218
  78. data/lib/spec_id/precision/filter/interactive.rb +0 -134
  79. data/lib/spec_id/precision/filter/output.rb +0 -148
  80. data/lib/spec_id/precision/filter.rb +0 -637
  81. data/lib/spec_id/precision/output.rb +0 -60
  82. data/lib/spec_id/precision/prob/cmdline.rb +0 -160
  83. data/lib/spec_id/precision/prob/output.rb +0 -94
  84. data/lib/spec_id/precision/prob.rb +0 -249
  85. data/lib/spec_id/proph/pep_summary.rb +0 -104
  86. data/lib/spec_id/proph/prot_summary.rb +0 -484
  87. data/lib/spec_id/proph.rb +0 -4
  88. data/lib/spec_id/protein_summary.rb +0 -489
  89. data/lib/spec_id/sequest/params.rb +0 -316
  90. data/lib/spec_id/sequest/pepxml.rb +0 -1458
  91. data/lib/spec_id/sequest.rb +0 -33
  92. data/lib/spec_id/sqt.rb +0 -349
  93. data/lib/spec_id/srf.rb +0 -973
  94. data/lib/spec_id.rb +0 -778
  95. data/lib/spec_id_xml.rb +0 -99
  96. data/lib/transmem/phobius.rb +0 -147
  97. data/lib/transmem/toppred.rb +0 -368
  98. data/lib/transmem.rb +0 -157
  99. data/lib/validator/aa.rb +0 -48
  100. data/lib/validator/aa_est.rb +0 -112
  101. data/lib/validator/background.rb +0 -77
  102. data/lib/validator/bias.rb +0 -95
  103. data/lib/validator/cmdline.rb +0 -431
  104. data/lib/validator/decoy.rb +0 -107
  105. data/lib/validator/digestion_based.rb +0 -70
  106. data/lib/validator/probability.rb +0 -51
  107. data/lib/validator/prot_from_pep.rb +0 -234
  108. data/lib/validator/q_value.rb +0 -32
  109. data/lib/validator/transmem.rb +0 -272
  110. data/lib/validator/true_pos.rb +0 -46
  111. data/lib/validator.rb +0 -197
  112. data/lib/xml.rb +0 -38
  113. data/lib/xml_style_parser.rb +0 -119
  114. data/lib/xmlparser_wrapper.rb +0 -19
  115. data/release_notes.txt +0 -2
  116. data/script/compile_and_plot_smriti_final.rb +0 -97
  117. data/script/create_little_pepxml.rb +0 -61
  118. data/script/degenerate_peptides.rb +0 -47
  119. data/script/estimate_fpr_by_cysteine.rb +0 -226
  120. data/script/extract_gradient_programs.rb +0 -56
  121. data/script/find_cysteine_background.rb +0 -137
  122. data/script/genuine_tps_and_probs.rb +0 -136
  123. data/script/get_apex_values_rexml.rb +0 -44
  124. data/script/histogram_probs.rb +0 -61
  125. data/script/mascot_fix_pepxml.rb +0 -123
  126. data/script/msvis.rb +0 -42
  127. data/script/mzXML2timeIndex.rb +0 -25
  128. data/script/peps_per_bin.rb +0 -67
  129. data/script/prep_dir.rb +0 -121
  130. data/script/simple_protein_digestion.rb +0 -27
  131. data/script/smriti_final_analysis.rb +0 -103
  132. data/script/sqt_to_meta.rb +0 -24
  133. data/script/top_hit_per_scan.rb +0 -67
  134. data/script/toppred_to_yaml.rb +0 -47
  135. data/script/tpp_installer.rb +0 -249
  136. data/specs/align_spec.rb +0 -79
  137. data/specs/bin/bioworks_to_pepxml_spec.rb +0 -79
  138. data/specs/bin/fasta_shaker_spec.rb +0 -259
  139. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +0 -199
  140. data/specs/bin/filter_and_validate_spec.rb +0 -180
  141. data/specs/bin/ms_to_lmat_spec.rb +0 -34
  142. data/specs/bin/prob_validate_spec.rb +0 -86
  143. data/specs/bin/protein_summary_spec.rb +0 -14
  144. data/specs/fasta_spec.rb +0 -354
  145. data/specs/gi_spec.rb +0 -22
  146. data/specs/load_bin_path.rb +0 -7
  147. data/specs/merge_deep_spec.rb +0 -13
  148. data/specs/ms/gradient_program_spec.rb +0 -77
  149. data/specs/ms/msrun_spec.rb +0 -498
  150. data/specs/ms/parser_spec.rb +0 -92
  151. data/specs/ms/spectrum_spec.rb +0 -87
  152. data/specs/pi_zero_spec.rb +0 -115
  153. data/specs/qvalue_spec.rb +0 -39
  154. data/specs/roc_spec.rb +0 -251
  155. data/specs/rspec_autotest.rb +0 -149
  156. data/specs/sample_enzyme_spec.rb +0 -126
  157. data/specs/spec_helper.rb +0 -135
  158. data/specs/spec_id/aa_freqs_spec.rb +0 -52
  159. data/specs/spec_id/bioworks_spec.rb +0 -148
  160. data/specs/spec_id/digestor_spec.rb +0 -75
  161. data/specs/spec_id/precision/filter/cmdline_spec.rb +0 -20
  162. data/specs/spec_id/precision/filter/output_spec.rb +0 -31
  163. data/specs/spec_id/precision/filter_spec.rb +0 -246
  164. data/specs/spec_id/precision/prob_spec.rb +0 -44
  165. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  166. data/specs/spec_id/proph/pep_summary_spec.rb +0 -98
  167. data/specs/spec_id/proph/prot_summary_spec.rb +0 -128
  168. data/specs/spec_id/protein_summary_spec.rb +0 -189
  169. data/specs/spec_id/sequest/params_spec.rb +0 -68
  170. data/specs/spec_id/sequest/pepxml_spec.rb +0 -374
  171. data/specs/spec_id/sequest_spec.rb +0 -38
  172. data/specs/spec_id/sqt_spec.rb +0 -246
  173. data/specs/spec_id/srf_spec.rb +0 -172
  174. data/specs/spec_id/srf_spec_helper.rb +0 -139
  175. data/specs/spec_id_helper.rb +0 -33
  176. data/specs/spec_id_spec.rb +0 -366
  177. data/specs/spec_id_xml_spec.rb +0 -33
  178. data/specs/transmem/phobius_spec.rb +0 -425
  179. data/specs/transmem/toppred_spec.rb +0 -298
  180. data/specs/transmem_spec.rb +0 -60
  181. data/specs/transmem_spec_shared.rb +0 -64
  182. data/specs/validator/aa_est_spec.rb +0 -66
  183. data/specs/validator/aa_spec.rb +0 -40
  184. data/specs/validator/background_spec.rb +0 -67
  185. data/specs/validator/bias_spec.rb +0 -122
  186. data/specs/validator/decoy_spec.rb +0 -51
  187. data/specs/validator/fasta_helper.rb +0 -26
  188. data/specs/validator/prot_from_pep_spec.rb +0 -141
  189. data/specs/validator/transmem_spec.rb +0 -146
  190. data/specs/validator/true_pos_spec.rb +0 -58
  191. data/specs/validator_helper.rb +0 -33
  192. data/specs/xml_spec.rb +0 -12
  193. data/test_files/000_pepxml18_small.xml +0 -206
  194. data/test_files/020a.mzXML.timeIndex +0 -4710
  195. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +0 -3973
  196. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +0 -3872
  197. data/test_files/4-03-03_small-prot.xml +0 -321
  198. data/test_files/4-03-03_small.xml +0 -3876
  199. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  200. data/test_files/bioworks-3.3_10prots.xml +0 -5999
  201. data/test_files/bioworks31.params +0 -77
  202. data/test_files/bioworks32.params +0 -62
  203. data/test_files/bioworks33.params +0 -63
  204. data/test_files/bioworks_single_run_small.xml +0 -7237
  205. data/test_files/bioworks_small.fasta +0 -212
  206. data/test_files/bioworks_small.params +0 -63
  207. data/test_files/bioworks_small.phobius +0 -109
  208. data/test_files/bioworks_small.toppred.out +0 -2847
  209. data/test_files/bioworks_small.xml +0 -5610
  210. data/test_files/bioworks_with_INV_small.xml +0 -3753
  211. data/test_files/bioworks_with_SHUFF_small.xml +0 -2503
  212. data/test_files/corrupted_900.srf +0 -0
  213. data/test_files/head_of_7MIX.srf +0 -0
  214. data/test_files/interact-opd1_mods_small-prot.xml +0 -304
  215. data/test_files/messups.fasta +0 -297
  216. data/test_files/opd1/000.my_answer.100lines.xml +0 -101
  217. data/test_files/opd1/000.tpp_1.2.3.first10.xml +0 -115
  218. data/test_files/opd1/000.tpp_2.9.2.first10.xml +0 -126
  219. data/test_files/opd1/000.v2.1.mzXML.timeIndex +0 -3748
  220. data/test_files/opd1/000_020-prot.png +0 -0
  221. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +0 -62
  222. data/test_files/opd1/000_020_3prots-prot.xml +0 -62
  223. data/test_files/opd1/opd1_cat_inv_small-prot.xml +0 -139
  224. data/test_files/opd1/sequest.3.1.params +0 -77
  225. data/test_files/opd1/sequest.3.2.params +0 -62
  226. data/test_files/opd1/twenty_scans.mzXML +0 -418
  227. data/test_files/opd1/twenty_scans.v2.1.mzXML +0 -382
  228. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  229. data/test_files/opd1/twenty_scans_answ.lmata +0 -9
  230. data/test_files/opd1_020_beginning.RAW +0 -0
  231. data/test_files/opd1_2runs_2mods/data/020.mzData.xml +0 -683
  232. data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +0 -382
  233. data/test_files/opd1_2runs_2mods/data/040.mzData.xml +0 -683
  234. data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +0 -382
  235. data/test_files/opd1_2runs_2mods/data/README.txt +0 -6
  236. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +0 -753
  237. data/test_files/orbitrap_mzData/000_cut.xml +0 -1920
  238. data/test_files/pepproph_small.xml +0 -4691
  239. data/test_files/phobius.small.noheader.txt +0 -50
  240. data/test_files/phobius.small.small.txt +0 -53
  241. data/test_files/s01_anC1_ld020mM.key.txt +0 -25
  242. data/test_files/s01_anC1_ld020mM.meth +0 -0
  243. data/test_files/small.fasta +0 -297
  244. data/test_files/small.sqt +0 -87
  245. data/test_files/smallraw.RAW +0 -0
  246. data/test_files/tf_bioworks2excel.bioXML +0 -14340
  247. data/test_files/tf_bioworks2excel.txt.actual +0 -1035
  248. data/test_files/toppred.small.out +0 -416
  249. data/test_files/toppred.xml.out +0 -318
  250. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +0 -7
  251. data/test_files/validator_hits_separate/bioworks_small_HS.xml +0 -5651
  252. data/test_files/yeast_gly_small-prot.xml +0 -265
  253. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +0 -6
  254. data/test_files/yeast_gly_small.xml +0 -3807
  255. data/test_files/yeast_gly_small2.parentTimes +0 -6
@@ -1,246 +0,0 @@
1
- require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
2
-
3
- require 'spec_id/sqt'
4
- require 'spec_id/srf'
5
-
6
- SpecHelperHeaderHash = {
7
- 'SQTGenerator' => 'mspire',
8
- 'SQTGeneratorVersion' => String,
9
- 'Database' => 'C:\\Xcalibur\\database\\ecoli_K12_ncbi_20060321.fasta',
10
- 'FragmentMasses' => 'AVG',
11
- 'PrecursorMasses' => 'AVG',
12
- 'StartTime' => nil,
13
- 'Alg-MSModel' => 'LCQ Deca XP',
14
- 'Alg-PreMassUnits' => 'amu',
15
- 'DBLocusCount' => '4237',
16
- 'Alg-FragMassTol' => '1.0000',
17
- 'Alg-PreMassTol' => '1.4000',
18
- 'Alg-IonSeries' => '0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0',
19
- 'Alg-Enzyme' => 'Trypsin(KR/P) (2)',
20
- 'Comment' => ['Created from Bioworks .srf file'],
21
- 'StaticMod' => ['C=160.1901','Cterm=10.1230','E=161.4455'],
22
- 'DynamicMod' => ['STY*=+79.97990', 'M#=+14.02660'],
23
- }
24
-
25
- SpecHelperOtherLines =<<END
26
- S 2 2 1 0.0 VELA 391.04541015625 3021.5419921875 0.0 0
27
- S 3 3 1 0.0 VELA 446.009033203125 1743.96911621094 0.0 122
28
- M 1 1 445.5769264522 0.0 0.245620265603065 16.6666660308838 1 6 R.SNSK.S U
29
- L gi|16128266|ref|NP_414815.1|
30
- END
31
-
32
- SpecHelperOtherLinesEnd =<<END
33
- L gi|90111093|ref|NP_414704.4|
34
- M 10 17 1298.5350544522 0.235343858599663 0.823222815990448 151.717300415039 12 54 K.LQKIITNSY*K U
35
- L gi|90111124|ref|NP_414904.2|
36
- END
37
-
38
- describe 'converting a large srf to sqt' do
39
- def del(file)
40
- if File.exist?(file)
41
- File.unlink(file)
42
- end
43
- end
44
-
45
- # returns true or false
46
- def header_hash_match(header_lines, hash)
47
- header_lines.all? do |line|
48
- (h, k, v) = line.chomp.split("\t")
49
- if hash[k].is_a? Array
50
- if hash[k].include?(v)
51
- true
52
- else
53
- puts "FAILED: "
54
- p k
55
- p v
56
- p hash[k]
57
- false
58
- end
59
- elsif hash[k] == String
60
- v.is_a?(String)
61
- else
62
- if v == hash[k]
63
- true
64
- else
65
- puts "FAILED: "
66
- p k
67
- p v
68
- p hash[k]
69
- false
70
- end
71
- end
72
- end
73
- end
74
-
75
- spec_large do
76
- before(:all) do
77
- @file = Tfiles_l + '/opd1_static_diff_mods/000.srf'
78
- @output = Tfiles_l + '/opd1_static_diff_mods/000.sqt.tmp'
79
- @srf = SRF.new(@file)
80
- @original_db_filename = @srf.header.db_filename
81
- end
82
- it 'converts without bothering with the database' do
83
- @srf.to_sqt(@output)
84
- @output.exist_as_a_file?.should be_true
85
- lines = File.readlines(@output)
86
- lines.size.should == 80910
87
- header_lines = lines.grep(/^H/)
88
- (header_lines.size > 10).should be_true
89
- header_hash_match(header_lines, SpecHelperHeaderHash).should be_true
90
- other_lines = lines.grep(/^[^H]/)
91
- other_lines[0,4].join('').should == SpecHelperOtherLines
92
- other_lines[-3,3].join('').should == SpecHelperOtherLinesEnd
93
- del(@output)
94
- end
95
- it 'warns if the db path is incorrect and we want to update db info' do
96
- # requires some knowledge of how the database file is extracted
97
- # internally
98
- wacky_path = '/not/a/real/path/wacky.fasta'
99
- @srf.header.db_filename = wacky_path
100
- my_error_string = ''
101
- StringIO.open(my_error_string, 'w') do |strio|
102
- $stderr = strio
103
- @srf.to_sqt(@output, :db_info => true)
104
- end
105
- my_error_string.should include(wacky_path)
106
- @srf.header.db_filename = @original_db_filename
107
- $stderr = STDERR
108
- @output.exist_as_a_file?.should be_true
109
- IO.readlines(@output).size.should == 80910
110
- del(@output)
111
- end
112
- it 'can get db info with correct path' do
113
- @srf.to_sqt(@output, :db_info => true, :new_db_path => Tfiles_l + '/opd1_2runs_2mods/sequest33')
114
- @output.exist_as_a_file?.should be_true
115
- lines = IO.readlines(@output)
116
- has_md5 = lines.any? do |line|
117
- line =~ /DBMD5Sum\s+202b1d95e91f2da30191174a7f13a04e/
118
- end
119
- has_md5.should be_true
120
-
121
- has_seq_len = lines.any? do |line|
122
- # frozen
123
- line =~ /DBSeqLength\s+1342842/
124
- end
125
- has_seq_len.should be_true
126
- lines.size.should == 80912
127
- del(@output)
128
- end
129
- it 'can update the Database' do
130
- @srf.to_sqt(@output, :new_db_path => Tfiles_l + '/opd1_2runs_2mods/sequest33', :update_db_path => true)
131
- regexp = Regexp.new("Database\t/.*/opd1_2runs_2mods/sequest33/ecoli_K12_ncbi_20060321.fasta")
132
- updated_db = IO.readlines(@output).any? do |line|
133
- line =~ regexp
134
- end
135
- updated_db.should be_true
136
- del(@output)
137
- end
138
- end
139
- end
140
-
141
- HeaderHash = {}
142
- header_doublets = [
143
- %w(SQTGenerator mspire),
144
- %w(SQTGeneratorVersion 0.3.1),
145
- %w(Database C:\Xcalibur\database\ecoli_K12_ncbi_20060321.fasta),
146
- %w(FragmentMasses AVG),
147
- %w(PrecursorMasses AVG),
148
- ['StartTime', ''],
149
- ['Alg-MSModel', 'LCQ Deca XP'],
150
- %w(DBLocusCount 4237),
151
- %w(Alg-FragMassTol 1.0000),
152
- %w(Alg-PreMassTol 25.0000),
153
- ['Alg-IonSeries', '0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0'],
154
- %w(Alg-PreMassUnits ppm),
155
- ['Alg-Enzyme', 'Trypsin(KR/P) (2)'],
156
-
157
- ['Comment', ['ultra small file created for testing', 'Created from Bioworks .srf file']],
158
- ['DynamicMod', ['M*=+15.99940', 'STY#=+79.97990']],
159
- ['StaticMod', []],
160
- ].each do |double|
161
- HeaderHash[double[0]] = double[1]
162
- end
163
-
164
- TestSpectra = {
165
- :first => { :first_scan=>2, :last_scan=>2, :charge=>1, :time_to_process=>0.0, :node=>"TESLA", :mh=>390.92919921875, :total_intensity=>2653.90307617188, :lowest_sp=>0.0, :num_matched_peptides=>0, :matches=>[]},
166
- :last => { :first_scan=>27, :last_scan=>27, :charge=>1, :time_to_process=>0.0, :node=>"TESLA", :mh=>393.008056640625, :total_intensity=>2896.16967773438, :lowest_sp=>0.0, :num_matched_peptides=>0, :matches=>[] },
167
- :seventeenth => {:first_scan=>23, :last_scan=>23, :charge=>1, :time_to_process=>0.0, :node=>"TESLA", :mh=>1022.10571289062, :total_intensity=>3637.86059570312, :lowest_sp=>0.0, :num_matched_peptides=>41},
168
- :first_match_17 => { :rxcorr=>1, :rsp=>5, :mh=>1022.11662242, :deltacn_orig=>0.0, :xcorr=>0.725152492523193, :sp=>73.9527359008789, :ions_matched=>6, :ions_total=>24, :sequence=>"-.MGT#TTM*GVK.L", :manual_validation_status=>"U", :first_scan=>23, :last_scan=>23, :charge=>1, :deltacn=>0.0672458708286285, :aaseq => 'MGTTTMGVK' },
169
- :last_match_17 => {:rxcorr=>10, :rsp=>16, :mh=>1022.09807242, :deltacn_orig=>0.398330867290497, :xcorr=>0.436301857233047, :sp=>49.735767364502, :ions_matched=>5, :ions_total=>21, :sequence=>"-.MRT#TSFAK.V", :manual_validation_status=>"U", :first_scan=>23, :last_scan=>23, :charge=>1, :deltacn=>1.1, :aaseq => 'MRTTSFAK'},
170
- :last_match_17_last_loci => {:reference =>'gi|16129390|ref|NP_415948.1|', :first_entry =>'gi|16129390|ref|NP_415948.1|', :locus =>'gi|16129390|ref|NP_415948.1|', :description => 'Fake description' }
171
- }
172
-
173
-
174
- describe SQT, ": reading a small sqt file" do
175
- before(:each) do
176
- file = Tfiles + '/small.sqt'
177
- file.exist_as_a_file?.should be_true
178
- @sqt = SQT.new(file)
179
- end
180
-
181
- it 'can access header entries like a hash' do
182
- header = @sqt.header
183
- HeaderHash.each do |k,v|
184
- header[k].should == v
185
- end
186
- end
187
-
188
- it 'can access header entries with methods' do
189
- header = @sqt.header
190
- # for example:
191
- header.database.should == HeaderHash['Database']
192
- # all working:
193
- HeaderHash.each do |k,v|
194
- header.send(SQT::Header::KeysToAtts[k]).should == v
195
- end
196
-
197
- end
198
-
199
- it 'has spectra, matches, and loci' do
200
- svt = @sqt.spectra[16]
201
- reply = {:first => @sqt.spectra.first, :last => @sqt.spectra.last, :seventeenth => svt, :first_match_17 => svt.matches.first, :last_match_17 => svt.matches.last, :last_match_17_last_loci => svt.matches.last.loci.last}
202
- [:first, :last, :seventeenth, :first_match_17, :last_match_17, :last_match_17_last_loci].each do |key|
203
- TestSpectra[key].each do |k,v|
204
- if v.is_a? Float
205
- reply[key].send(k).should be_close(v, 0.0000000001)
206
- else
207
- reply[key].send(k).should == v
208
- end
209
- end
210
- end
211
- @sqt.spectra[16].matches.first.loci.size.should == 1
212
- @sqt.spectra[16].matches.last.loci.size.should == 1
213
- end
214
-
215
- end
216
-
217
- describe SQTGroup, ': acting as a SpecID on large files' do
218
- spec_large do
219
- before(:each) do
220
- file1 = Tfiles_l + '/opd1_2runs_2mods/sequest33/020.sqt'
221
- file2 = Tfiles_l + '/opd1_2runs_2mods/sequest33/040.sqt'
222
- file1.exist_as_a_file?.should be_true
223
- file2.exist_as_a_file?.should be_true
224
- @sqg = SQTGroup.new([file1, file2])
225
- end
226
-
227
- it 'has peptide hits' do
228
- peps = @sqg.peps
229
- peps.size.should == 38512 # frozen
230
- # first hit in 020
231
- peps.first.sequence.should == 'R.Y#RLGGS#T#K.K'
232
- peps.first.base_name.should == '020'
233
- # last hit in 040
234
- peps.last.sequence.should == 'K.NQTNNRFK.T'
235
- peps.last.base_name.should == '040'
236
- end
237
-
238
- it 'has prots' do
239
- ## FROZEN:
240
- @sqg.prots.size.should == 3994
241
- sorted = @sqg.prots.sort_by {|v| v.reference }
242
- sorted.first.reference.should == 'gi|16127996|ref|NP_414543.1|'
243
- sorted.first.peps.size.should == 33
244
- end
245
- end
246
- end
@@ -1,172 +0,0 @@
1
-
2
- require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
3
- require File.expand_path( File.dirname(__FILE__) + '/srf_spec_helper' )
4
- require 'spec_id/srf'
5
-
6
- require 'fileutils'
7
-
8
- include SRFHelper
9
-
10
- #tfiles = File.dirname(__FILE__) + '/tfiles/'
11
- #tfiles_l = File.dirname(__FILE__) + '/tfiles_large/'
12
- #tf_srf = tfiles_l + "7MIX_STD_110802_1.srf"
13
- #tf_srf_inv = tfiles_l + "7MIX_STD_110802_1_INV.srf"
14
- #if File.exist? tfiles_l
15
- # start = Time.now
16
- # $group = SRFGroup.new([tf_srf, tf_srf_inv])
17
- # $srf = $group.srfs.first
18
- # puts "Time to read and compile two SRF: #{Time.now - start} secs"
19
- #end
20
-
21
- class Hash
22
- def object_match(obj)
23
- self.all? do |k,v|
24
- k = k.to_sym
25
- retval =
26
- if k == :peaks or k == :hits or k == :prots
27
- obj.send(k).size == v
28
- elsif v.class == Float
29
- delta =
30
- if k == :ppm ; 0.0001
31
- else ; 0.0000001
32
- end
33
- (v - obj.send(k)).abs <= delta
34
- else
35
- obj.send(k) == v
36
- end
37
- if retval == false
38
- puts "BAD KEY: #{k}"
39
- puts "need: #{v}"
40
- puts "got: #{obj.send(k)}"
41
- end
42
- retval
43
- end
44
- end
45
- end
46
-
47
- klass = SRF
48
-
49
- describe 'an srf reader', :shared => true do
50
- before(:all) do
51
- @srf_obj = klass.new(@file)
52
- end
53
-
54
- it 'retrieves correct header info' do
55
- @header.object_match(@srf_obj.header).should be_true
56
- @dta_gen.object_match(@srf_obj.header.dta_gen).should be_true
57
- end
58
-
59
- # a few more dta params could be added in here:
60
- it 'retrieves correct dta files' do
61
- @dta_files_first.object_match(@srf_obj.dta_files.first).should be_true
62
- @dta_files_last.object_match(@srf_obj.dta_files.last).should be_true
63
- end
64
-
65
- # given an array of out_file objects, returns the first set of hits
66
- def get_first_peps(out_files)
67
- out_files.each do |outf|
68
- if outf.num_hits > 0
69
- return outf.hits
70
- end
71
- end
72
- return nil
73
- end
74
-
75
- it 'retrieves correct out files' do
76
- @out_files_first.object_match(@srf_obj.out_files.first).should be_true
77
- @out_files_last.object_match(@srf_obj.out_files.last).should be_true
78
- # first available peptide hit
79
- @out_files_first_pep.object_match(get_first_peps(@srf_obj.out_files).first).should be_true
80
- # last available peptide hit
81
- @out_files_last_pep.object_match(get_first_peps(@srf_obj.out_files.reverse).last).should be_true
82
- end
83
-
84
- xit 'retrieves correct params' do
85
- @params.object_match(@srf_obj.params).should be_true
86
- end
87
-
88
- it_should 'retrieve probabilities if available'
89
- end
90
-
91
-
92
- Expected_hash_keys = %w(header dta_gen dta_files_first dta_files_last out_files_first out_files_last out_files_first_pep out_files_last_pep params)
93
-
94
- to_run = {
95
- '3.2' => {:hash => File_32, :file => '/opd1_2runs_2mods/sequest32/020.srf'},
96
- '3.3' => {:hash => File_33, :file => '/opd1_2runs_2mods/sequest33/020.srf'},
97
- '3.3.1' => {:hash => File_331, :file => '/opd1_2runs_2mods/sequest331/020.srf'},
98
- }
99
-
100
- to_run.each do |version,info|
101
- describe klass, " reading a version #{version} .srf file" do
102
- spec_large do
103
- before(:all) do
104
- @file = Tfiles_l + info[:file]
105
- Expected_hash_keys.each do |c|
106
- instance_variable_set("@#{c}", info[:hash][c.to_sym])
107
- end
108
- end
109
- it_should_behave_like "an srf reader"
110
- end
111
- end
112
- end
113
-
114
-
115
- describe klass, " reading a corrupted file" do
116
- it 'should read a null file from an aborted run w/o failing (but gives error msg)' do
117
- file = Tfiles + '/corrupted_900.srf'
118
- error_msg = Tfiles + '/error_msg.tmp'
119
- File.open(error_msg, 'w') do |err_fh|
120
- $stderr = err_fh
121
- srf_obj = klass.new(file)
122
- srf_obj.base_name.should == '900'
123
- srf_obj.params.should be_nil
124
- header = srf_obj.header
125
- header.db_filename.should == "C:\\Xcalibur\\database\\sf_hs_44_36f_longesttrpt.fasta.hdr"
126
- header.enzyme.should == 'Enzyme:Trypsin(KR) (2)'
127
- dta_gen = header.dta_gen
128
- dta_gen.start_time.should be_close(1.39999997615814, 0.00000000001)
129
- srf_obj.dta_files.should == []
130
- srf_obj.out_files.should == []
131
- end
132
- IO.read(error_msg).should =~ /corrupted_900\.srf/
133
- File.unlink error_msg
134
- end
135
- end
136
-
137
- describe SRFGroup, 'creating an srg file' do
138
- it 'creates one given some non-existing, relative filenames' do
139
- ## TEST SRG GROUPING:
140
- filenames = %w(my/lucky/filename /another/filename)
141
- @srg = SRFGroup.new
142
- @srg.filenames = filenames
143
- srg_file = Tfiles + '/tmp_srg_file.srg'
144
- @srg.to_srg(srg_file)
145
- File.exist?(srg_file).should be_true
146
- File.unlink(srg_file)
147
- end
148
- end
149
-
150
-
151
- # @TODO: this test needs to be created for a small mock dataset!!
152
- describe SRF, 'creating dta files' do
153
- spec_large do
154
- before(:all) do
155
- file = Tfiles_l + '/opd1_2runs_2mods/sequest33/020.srf'
156
- @srf = SRF.new(file)
157
- end
158
-
159
- it 'creates dta files' do
160
- @srf.to_dta_files
161
- File.exist?('020').should be_true
162
- File.directory?('020').should be_true
163
- File.exist?('020/020.3366.3366.2.dta').should be_true
164
- lines = IO.readlines('020/020.3366.3366.2.dta', "\r\n")
165
- lines.first.should == "1113.106493 2\r\n"
166
- lines[1].should == "164.5659 4817\r\n"
167
-
168
- FileUtils.rm_rf '020'
169
- end
170
- end
171
-
172
- end
@@ -1,139 +0,0 @@
1
- module SRFHelper
2
-
3
- File_32 = {
4
- :header =>
5
- {
6
- :params_filename=>"C:\\Xcalibur\\sequest\\john\\opd1_2runs_2mods\\ecoli.params",
7
- :raw_filename=>"C:\\Xcalibur\\data\\john\\opd00001\\020.RAW",
8
- :modifications=>"(M* +15.99940) (STY# +79.97990)",
9
- :sequest_log_filename=>"C:\\Xcalibur\\sequest\\john\\opd1_2runs_2mods\\020_sequest.log",
10
- :ion_series=>"ion series nABY ABCDVWXYZ: 0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0",
11
- :db_filename=>"C:\\Xcalibur\\database\\ecoli_K12_ncbi_20060321.fasta",
12
- :enzyme=>"Enzyme:Trypsin(KR/P) (2)",
13
- :version=>"3.2",
14
- :model=>"LCQ Deca XP",
15
- :dta_log_filename=>"C:\\Xcalibur\\sequest\\john\\opd1_2runs_2mods\\020_dta.log"
16
- },
17
- :dta_gen => {
18
- :min_group_count => 1,
19
- :start_time => 1.5,
20
- :start_mass => 300.0,
21
- :end_scan => 3620,
22
- :group_scan => 1,
23
- :start_scan => 1,
24
- :num_dta_files => 3747,
25
- :min_ion_threshold => 15,
26
- :end_mass => 4500.0,
27
- },
28
- :dta_files_first => {
29
-
30
- :mh=>390.92919921875,
31
- :dta_tic=>9041311.0,
32
- :num_peaks=>48,
33
- :charge=>1,
34
- :ms_level=>2,
35
- :total_num_possible_charge_states=>0,
36
- },
37
- :dta_files_last => {
38
- :dta_tic=>842424.0,
39
- :mh=>357.041198730469,
40
- :num_peaks=>78,
41
- :ms_level=>2,
42
- :charge=>1,
43
- :total_num_possible_charge_states=>0,
44
- },
45
- :out_files_first => {
46
- :num_hits => 0,
47
- :computer => 'VELA',
48
- :date_time => '05/06/2008, 02:08 PM,',
49
- :hits => 0,
50
- },
51
- :out_files_last => {
52
- :num_hits => 0,
53
- :computer => 'VELA',
54
- :date_time => '05/06/2008, 02:11 PM,',
55
- :hits => 0,
56
- },
57
- :out_files_first_pep => {
58
- :aaseq=>"YRLGGSTK",
59
- :sequence=>"R.Y#RLGGS#T#K.K",
60
- :mh=>1121.9390244522,
61
- :deltacn_orig=>0.0,
62
- :sp=>29.8529319763184,
63
- :xcorr=>0.123464643955231,
64
- :id=>2104,
65
- :rsp=>1,
66
- :ions_matched=>5,
67
- :ions_total=>35,
68
- :prots=>1,
69
- :deltamass=>-0.00579976654989878,
70
- :ppm=>5.16938660859491,
71
- :base_name=>"020",
72
- :first_scan=>3,
73
- :last_scan=>3,
74
- :charge=>1,
75
- :deltacn=>0.795928299427032,
76
- :base_name=>"020",
77
- },
78
- :out_files_last_pep =>
79
- {
80
- :aaseq=>"LLPGTARTMRR",
81
- :sequence=>"R.LLPGTARTMRR.M",
82
- :mh=>1272.5493424522,
83
- :deltacn_orig=>0.835508584976196,
84
- :deltacn=>1.1,
85
- :sp=>57.9885787963867,
86
- :xcorr=>0.109200321137905,
87
- :id=>1361,
88
- :rsp=>11,
89
- :ions_matched=>6,
90
- :ions_total=>40,
91
- :prots=>1,
92
- :deltamass=>0.00243330985608736,
93
- :ppm=>1.91215729542523,
94
- :base_name=>"020",
95
- :first_scan=>3619,
96
- :last_scan=>3619,
97
- :charge=>3,
98
- :deltacn=>1.1,
99
- :base_name=>"020",
100
- },
101
-
102
- :params => {
103
- "add_O_Ornithine"=>"0.0000", "add_F_Phenylalanine"=>"0.0000", "add_A_Alanine"=>"0.0000", "add_C_Cysteine"=>"0.0000", "add_Y_Tyrosine"=>"0.0000", "add_X_LorI"=>"0.0000", "add_J_user_amino_acid"=>"0.0000", "add_Cterm_peptide"=>"0.0000", "add_S_Serine"=>"0.0000", "add_Nterm_protein"=>"0.0000", "add_D_Aspartic_Acid"=>"0.0000", "add_Q_Glutamine"=>"0.0000", "add_K_Lysine"=>"0.0000", "add_R_Arginine"=>"0.0000", "add_W_Tryptophan"=>"0.0000", "add_Nterm_peptide"=>"0.0000", "add_H_Histidine"=>"0.0000", "add_L_Leucine"=>"0.0000", "add_I_Isoleucine"=>"0.0000", "add_N_Asparagine"=>"0.0000", "add_B_avg_NandD"=>"0.0000", "add_Z_avg_QandE"=>"0.0000", "add_E_Glutamic_Acid"=>"0.0000", "add_G_Glycine"=>"0.0000", "add_P_Proline"=>"0.0000", "add_M_Methionine"=>"0.0000", "add_Cterm_protein"=>"0.0000", "add_V_Valine"=>"0.0000", "add_T_Threonine"=>"0.0000", "add_U_user_amino_acid"=>"0.0000", "match_peak_tolerance"=>"1.0000", "match_peak_allowed_error"=>"1", "normalize_xcorr"=>"0", "nucleotide_reading_frame"=>"0", "num_results"=>"250", "sequence_header_filter"=>"", "diff_search_options"=>"15.999400 M 79.979900 STY 0.000000 M 0.000000 X 0.000000 T 0.000000 Y", "partial_sequence"=>"", "max_num_internal_cleavage_sites"=>"2", "search_engine"=>"SEQUEST", "print_duplicate_references"=>"40", "ion_series"=>"0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0", "remove_precursor_peak"=>"0", "num_output_lines"=>"10", "second_database_name"=>"", "first_database_name"=>"C:\\Xcalibur\\database\\ecoli_K12_ncbi_20060321.fasta", "peptide_mass_tolerance"=>"25.0000", "digest_mass_range"=>"600.0 3500.0", "enzyme_info"=>"Trypsin(KR/P) 1 1 KR P", "show_fragment_ions"=>"0", "protein_mass_filter"=>"0 0", "term_diff_search_options"=>"0.000000 0.000000", "num_description_lines"=>"5", "fragment_ion_tolerance"=>"1.0000", "peptide_mass_units"=>"2", "mass_type_parent"=>"0", "match_peak_count"=>"0", "max_num_differential_per_peptide"=>"3", "ion_cutoff_percentage"=>"0.0000", "mass_type_fragment"=>"0"
104
- }
105
- }
106
-
107
- File_33 = {}
108
- File_32.each do |k,v|
109
- File_33[k] = v.dup
110
- end
111
-
112
- ## Bioworks 3.3 (srf version 3.3)
113
- File_33[:header][:raw_filename] = "C:\\Xcalibur\\data\\john\\021112-EcoliSol37-1\\020.RAW"
114
- File_33[:header][:version] = "3.3"
115
-
116
- File_33[:out_files_first][:computer] = 'TESLA'
117
- File_33[:out_files_first][:date_time] = '04/24/2007, 10:41 AM,'
118
- File_33[:out_files_last][:computer] = 'TESLA'
119
- File_33[:out_files_last][:date_time] = '04/24/2007, 10:42 AM,'
120
-
121
- File_33[:out_files_first_pep][:sp] = 29.8535556793213
122
- File_33[:out_files_last_pep][:sp] = 57.987476348877
123
- File_33[:out_files_last_pep][:rsp] = 10
124
- File_33[:out_files_last_pep][:deltacn_orig] = 0.835624694824219
125
-
126
-
127
- ## Bioworks 3.3.1 (srf version 3.5)
128
- File_331 = {}
129
- File_33.each do |k,v|
130
- File_331[k] = v.dup
131
- end
132
- File_331[:header][:raw_filename] = "C:\\Xcalibur\\data\\john\\opd1_2runs_2mods\\020.RAW"
133
- File_331[:header][:version] = "3.5"
134
- File_331[:out_files_first][:date_time] = '05/06/2008, 03:31 PM,'
135
- File_331[:out_files_last][:date_time] = '05/06/2008, 03:32 PM,'
136
-
137
- end
138
-
139
-
@@ -1,33 +0,0 @@
1
-
2
- module SpecID::Pep
3
-
4
- # filter must be a hash with these keys allowed:
5
- # :xcorr1, :xcorr2, :xcorr3, :deltacn, :ppm, :include_deltacnstar
6
- def pass_filters?(filter)
7
- filter.all? do |k,v|
8
- k_as_s = k.to_s
9
- if ((k_as_s[0...-1] == 'xcorr') and (k.to_s[-1,1].to_i == self.charge))
10
- charge = k.to_s[-1,1].to_i
11
- self.xcorr >= v
12
- elsif k_as_s == 'include_deltacnstar'
13
- if v == false
14
- self.deltacn <= 1.0
15
- else
16
- true
17
- end
18
- elsif k_as_s == 'ppm'
19
- self.send(k) <= v
20
- elsif k_as_s == 'deltacn'
21
- self.send(k) >= v
22
- else
23
- true
24
- end
25
- end
26
- end
27
-
28
- def fail_filters?(filter)
29
- !pass_filters?(filter)
30
- end
31
-
32
- end
33
-