mspire 0.4.9 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (255) hide show
  1. data/README +27 -17
  2. data/changelog.txt +31 -62
  3. data/lib/ms/calc.rb +32 -0
  4. data/lib/ms/data/interleaved.rb +60 -0
  5. data/lib/ms/data/lazy_io.rb +73 -0
  6. data/lib/ms/data/lazy_string.rb +15 -0
  7. data/lib/ms/data/simple.rb +59 -0
  8. data/lib/ms/data/transposed.rb +41 -0
  9. data/lib/ms/data.rb +57 -0
  10. data/lib/ms/format/format_error.rb +12 -0
  11. data/lib/ms/spectrum.rb +25 -384
  12. data/lib/ms/support/binary_search.rb +126 -0
  13. data/lib/ms.rb +10 -10
  14. metadata +38 -350
  15. data/INSTALL +0 -58
  16. data/README.rdoc +0 -18
  17. data/Rakefile +0 -330
  18. data/bin/aafreqs.rb +0 -23
  19. data/bin/bioworks2excel.rb +0 -14
  20. data/bin/bioworks_to_pepxml.rb +0 -148
  21. data/bin/bioworks_to_pepxml_gui.rb +0 -225
  22. data/bin/fasta_shaker.rb +0 -5
  23. data/bin/filter_and_validate.rb +0 -5
  24. data/bin/gi2annot.rb +0 -14
  25. data/bin/id_class_anal.rb +0 -112
  26. data/bin/id_precision.rb +0 -172
  27. data/bin/ms_to_lmat.rb +0 -67
  28. data/bin/pepproph_filter.rb +0 -16
  29. data/bin/prob_validate.rb +0 -6
  30. data/bin/protein_summary.rb +0 -6
  31. data/bin/protxml2prots_peps.rb +0 -32
  32. data/bin/raw_to_mzXML.rb +0 -55
  33. data/bin/run_percolator.rb +0 -122
  34. data/bin/sqt_group.rb +0 -26
  35. data/bin/srf_group.rb +0 -27
  36. data/bin/srf_to_sqt.rb +0 -40
  37. data/lib/align/chams.rb +0 -78
  38. data/lib/align.rb +0 -154
  39. data/lib/archive/targz.rb +0 -94
  40. data/lib/bsearch.rb +0 -120
  41. data/lib/core_extensions.rb +0 -16
  42. data/lib/fasta.rb +0 -626
  43. data/lib/gi.rb +0 -124
  44. data/lib/group_by.rb +0 -10
  45. data/lib/index_by.rb +0 -11
  46. data/lib/merge_deep.rb +0 -21
  47. data/lib/ms/converter/mzxml.rb +0 -77
  48. data/lib/ms/gradient_program.rb +0 -170
  49. data/lib/ms/msrun.rb +0 -244
  50. data/lib/ms/msrun_index.rb +0 -108
  51. data/lib/ms/parser/mzdata/axml.rb +0 -67
  52. data/lib/ms/parser/mzdata/dom.rb +0 -175
  53. data/lib/ms/parser/mzdata/libxml.rb +0 -7
  54. data/lib/ms/parser/mzdata.rb +0 -31
  55. data/lib/ms/parser/mzxml/axml.rb +0 -70
  56. data/lib/ms/parser/mzxml/dom.rb +0 -182
  57. data/lib/ms/parser/mzxml/hpricot.rb +0 -253
  58. data/lib/ms/parser/mzxml/libxml.rb +0 -19
  59. data/lib/ms/parser/mzxml/regexp.rb +0 -122
  60. data/lib/ms/parser/mzxml/rexml.rb +0 -72
  61. data/lib/ms/parser/mzxml/xmlparser.rb +0 -248
  62. data/lib/ms/parser/mzxml.rb +0 -282
  63. data/lib/ms/parser.rb +0 -108
  64. data/lib/ms/precursor.rb +0 -25
  65. data/lib/ms/scan.rb +0 -81
  66. data/lib/mspire.rb +0 -4
  67. data/lib/pi_zero.rb +0 -244
  68. data/lib/qvalue.rb +0 -161
  69. data/lib/roc.rb +0 -187
  70. data/lib/sample_enzyme.rb +0 -160
  71. data/lib/scan_i.rb +0 -21
  72. data/lib/spec_id/aa_freqs.rb +0 -170
  73. data/lib/spec_id/bioworks.rb +0 -497
  74. data/lib/spec_id/digestor.rb +0 -138
  75. data/lib/spec_id/mass.rb +0 -179
  76. data/lib/spec_id/parser/proph.rb +0 -335
  77. data/lib/spec_id/precision/filter/cmdline.rb +0 -218
  78. data/lib/spec_id/precision/filter/interactive.rb +0 -134
  79. data/lib/spec_id/precision/filter/output.rb +0 -148
  80. data/lib/spec_id/precision/filter.rb +0 -637
  81. data/lib/spec_id/precision/output.rb +0 -60
  82. data/lib/spec_id/precision/prob/cmdline.rb +0 -160
  83. data/lib/spec_id/precision/prob/output.rb +0 -94
  84. data/lib/spec_id/precision/prob.rb +0 -249
  85. data/lib/spec_id/proph/pep_summary.rb +0 -104
  86. data/lib/spec_id/proph/prot_summary.rb +0 -484
  87. data/lib/spec_id/proph.rb +0 -4
  88. data/lib/spec_id/protein_summary.rb +0 -489
  89. data/lib/spec_id/sequest/params.rb +0 -316
  90. data/lib/spec_id/sequest/pepxml.rb +0 -1458
  91. data/lib/spec_id/sequest.rb +0 -33
  92. data/lib/spec_id/sqt.rb +0 -349
  93. data/lib/spec_id/srf.rb +0 -973
  94. data/lib/spec_id.rb +0 -778
  95. data/lib/spec_id_xml.rb +0 -99
  96. data/lib/transmem/phobius.rb +0 -147
  97. data/lib/transmem/toppred.rb +0 -368
  98. data/lib/transmem.rb +0 -157
  99. data/lib/validator/aa.rb +0 -48
  100. data/lib/validator/aa_est.rb +0 -112
  101. data/lib/validator/background.rb +0 -77
  102. data/lib/validator/bias.rb +0 -95
  103. data/lib/validator/cmdline.rb +0 -431
  104. data/lib/validator/decoy.rb +0 -107
  105. data/lib/validator/digestion_based.rb +0 -70
  106. data/lib/validator/probability.rb +0 -51
  107. data/lib/validator/prot_from_pep.rb +0 -234
  108. data/lib/validator/q_value.rb +0 -32
  109. data/lib/validator/transmem.rb +0 -272
  110. data/lib/validator/true_pos.rb +0 -46
  111. data/lib/validator.rb +0 -197
  112. data/lib/xml.rb +0 -38
  113. data/lib/xml_style_parser.rb +0 -119
  114. data/lib/xmlparser_wrapper.rb +0 -19
  115. data/release_notes.txt +0 -2
  116. data/script/compile_and_plot_smriti_final.rb +0 -97
  117. data/script/create_little_pepxml.rb +0 -61
  118. data/script/degenerate_peptides.rb +0 -47
  119. data/script/estimate_fpr_by_cysteine.rb +0 -226
  120. data/script/extract_gradient_programs.rb +0 -56
  121. data/script/find_cysteine_background.rb +0 -137
  122. data/script/genuine_tps_and_probs.rb +0 -136
  123. data/script/get_apex_values_rexml.rb +0 -44
  124. data/script/histogram_probs.rb +0 -61
  125. data/script/mascot_fix_pepxml.rb +0 -123
  126. data/script/msvis.rb +0 -42
  127. data/script/mzXML2timeIndex.rb +0 -25
  128. data/script/peps_per_bin.rb +0 -67
  129. data/script/prep_dir.rb +0 -121
  130. data/script/simple_protein_digestion.rb +0 -27
  131. data/script/smriti_final_analysis.rb +0 -103
  132. data/script/sqt_to_meta.rb +0 -24
  133. data/script/top_hit_per_scan.rb +0 -67
  134. data/script/toppred_to_yaml.rb +0 -47
  135. data/script/tpp_installer.rb +0 -249
  136. data/specs/align_spec.rb +0 -79
  137. data/specs/bin/bioworks_to_pepxml_spec.rb +0 -79
  138. data/specs/bin/fasta_shaker_spec.rb +0 -259
  139. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +0 -199
  140. data/specs/bin/filter_and_validate_spec.rb +0 -180
  141. data/specs/bin/ms_to_lmat_spec.rb +0 -34
  142. data/specs/bin/prob_validate_spec.rb +0 -86
  143. data/specs/bin/protein_summary_spec.rb +0 -14
  144. data/specs/fasta_spec.rb +0 -354
  145. data/specs/gi_spec.rb +0 -22
  146. data/specs/load_bin_path.rb +0 -7
  147. data/specs/merge_deep_spec.rb +0 -13
  148. data/specs/ms/gradient_program_spec.rb +0 -77
  149. data/specs/ms/msrun_spec.rb +0 -498
  150. data/specs/ms/parser_spec.rb +0 -92
  151. data/specs/ms/spectrum_spec.rb +0 -87
  152. data/specs/pi_zero_spec.rb +0 -115
  153. data/specs/qvalue_spec.rb +0 -39
  154. data/specs/roc_spec.rb +0 -251
  155. data/specs/rspec_autotest.rb +0 -149
  156. data/specs/sample_enzyme_spec.rb +0 -126
  157. data/specs/spec_helper.rb +0 -135
  158. data/specs/spec_id/aa_freqs_spec.rb +0 -52
  159. data/specs/spec_id/bioworks_spec.rb +0 -148
  160. data/specs/spec_id/digestor_spec.rb +0 -75
  161. data/specs/spec_id/precision/filter/cmdline_spec.rb +0 -20
  162. data/specs/spec_id/precision/filter/output_spec.rb +0 -31
  163. data/specs/spec_id/precision/filter_spec.rb +0 -246
  164. data/specs/spec_id/precision/prob_spec.rb +0 -44
  165. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  166. data/specs/spec_id/proph/pep_summary_spec.rb +0 -98
  167. data/specs/spec_id/proph/prot_summary_spec.rb +0 -128
  168. data/specs/spec_id/protein_summary_spec.rb +0 -189
  169. data/specs/spec_id/sequest/params_spec.rb +0 -68
  170. data/specs/spec_id/sequest/pepxml_spec.rb +0 -374
  171. data/specs/spec_id/sequest_spec.rb +0 -38
  172. data/specs/spec_id/sqt_spec.rb +0 -246
  173. data/specs/spec_id/srf_spec.rb +0 -172
  174. data/specs/spec_id/srf_spec_helper.rb +0 -139
  175. data/specs/spec_id_helper.rb +0 -33
  176. data/specs/spec_id_spec.rb +0 -366
  177. data/specs/spec_id_xml_spec.rb +0 -33
  178. data/specs/transmem/phobius_spec.rb +0 -425
  179. data/specs/transmem/toppred_spec.rb +0 -298
  180. data/specs/transmem_spec.rb +0 -60
  181. data/specs/transmem_spec_shared.rb +0 -64
  182. data/specs/validator/aa_est_spec.rb +0 -66
  183. data/specs/validator/aa_spec.rb +0 -40
  184. data/specs/validator/background_spec.rb +0 -67
  185. data/specs/validator/bias_spec.rb +0 -122
  186. data/specs/validator/decoy_spec.rb +0 -51
  187. data/specs/validator/fasta_helper.rb +0 -26
  188. data/specs/validator/prot_from_pep_spec.rb +0 -141
  189. data/specs/validator/transmem_spec.rb +0 -146
  190. data/specs/validator/true_pos_spec.rb +0 -58
  191. data/specs/validator_helper.rb +0 -33
  192. data/specs/xml_spec.rb +0 -12
  193. data/test_files/000_pepxml18_small.xml +0 -206
  194. data/test_files/020a.mzXML.timeIndex +0 -4710
  195. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +0 -3973
  196. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +0 -3872
  197. data/test_files/4-03-03_small-prot.xml +0 -321
  198. data/test_files/4-03-03_small.xml +0 -3876
  199. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  200. data/test_files/bioworks-3.3_10prots.xml +0 -5999
  201. data/test_files/bioworks31.params +0 -77
  202. data/test_files/bioworks32.params +0 -62
  203. data/test_files/bioworks33.params +0 -63
  204. data/test_files/bioworks_single_run_small.xml +0 -7237
  205. data/test_files/bioworks_small.fasta +0 -212
  206. data/test_files/bioworks_small.params +0 -63
  207. data/test_files/bioworks_small.phobius +0 -109
  208. data/test_files/bioworks_small.toppred.out +0 -2847
  209. data/test_files/bioworks_small.xml +0 -5610
  210. data/test_files/bioworks_with_INV_small.xml +0 -3753
  211. data/test_files/bioworks_with_SHUFF_small.xml +0 -2503
  212. data/test_files/corrupted_900.srf +0 -0
  213. data/test_files/head_of_7MIX.srf +0 -0
  214. data/test_files/interact-opd1_mods_small-prot.xml +0 -304
  215. data/test_files/messups.fasta +0 -297
  216. data/test_files/opd1/000.my_answer.100lines.xml +0 -101
  217. data/test_files/opd1/000.tpp_1.2.3.first10.xml +0 -115
  218. data/test_files/opd1/000.tpp_2.9.2.first10.xml +0 -126
  219. data/test_files/opd1/000.v2.1.mzXML.timeIndex +0 -3748
  220. data/test_files/opd1/000_020-prot.png +0 -0
  221. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +0 -62
  222. data/test_files/opd1/000_020_3prots-prot.xml +0 -62
  223. data/test_files/opd1/opd1_cat_inv_small-prot.xml +0 -139
  224. data/test_files/opd1/sequest.3.1.params +0 -77
  225. data/test_files/opd1/sequest.3.2.params +0 -62
  226. data/test_files/opd1/twenty_scans.mzXML +0 -418
  227. data/test_files/opd1/twenty_scans.v2.1.mzXML +0 -382
  228. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  229. data/test_files/opd1/twenty_scans_answ.lmata +0 -9
  230. data/test_files/opd1_020_beginning.RAW +0 -0
  231. data/test_files/opd1_2runs_2mods/data/020.mzData.xml +0 -683
  232. data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +0 -382
  233. data/test_files/opd1_2runs_2mods/data/040.mzData.xml +0 -683
  234. data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +0 -382
  235. data/test_files/opd1_2runs_2mods/data/README.txt +0 -6
  236. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +0 -753
  237. data/test_files/orbitrap_mzData/000_cut.xml +0 -1920
  238. data/test_files/pepproph_small.xml +0 -4691
  239. data/test_files/phobius.small.noheader.txt +0 -50
  240. data/test_files/phobius.small.small.txt +0 -53
  241. data/test_files/s01_anC1_ld020mM.key.txt +0 -25
  242. data/test_files/s01_anC1_ld020mM.meth +0 -0
  243. data/test_files/small.fasta +0 -297
  244. data/test_files/small.sqt +0 -87
  245. data/test_files/smallraw.RAW +0 -0
  246. data/test_files/tf_bioworks2excel.bioXML +0 -14340
  247. data/test_files/tf_bioworks2excel.txt.actual +0 -1035
  248. data/test_files/toppred.small.out +0 -416
  249. data/test_files/toppred.xml.out +0 -318
  250. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +0 -7
  251. data/test_files/validator_hits_separate/bioworks_small_HS.xml +0 -5651
  252. data/test_files/yeast_gly_small-prot.xml +0 -265
  253. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +0 -6
  254. data/test_files/yeast_gly_small.xml +0 -3807
  255. data/test_files/yeast_gly_small2.parentTimes +0 -6
data/README CHANGED
@@ -1,3 +1,12 @@
1
+ = {Mspire}[http://mspire.rubyforge.org]
2
+
3
+ A library for working with mass spectrometry proteomics data.
4
+
5
+ <em> Mspire is going through a re-write as of version 0.5.0 to support a new
6
+ development model. Many modules are absent but will gradually be added back.
7
+ Use the 0.4 releases as necessary. </em>
8
+
9
+ == Description
1
10
 
2
11
  mspire - 'Mass Spectrometry Proteomics in Ruby' is a collection of tools for
3
12
  working with MS proteomics data in ruby. It seeks to provide support for open
@@ -5,19 +14,22 @@ standards (e.g., parsers for mzData, mzXML, Peptide/Protein Prophet and the
5
14
  TPP) and contribute other useful functionality for working with mass
6
15
  spectrometry data in ruby.
7
16
 
8
- Current Focus
9
- -------------
17
+ * Lighthouse[http://bahuvrihi.lighthouseapp.com/projects/16692-mspire/tickets]
18
+ * Github[http://github.com/bahuvrihi/mspire/tree/master]
19
+ * {Google Group}[http://groups.google.com/group/mspire-forum]
20
+
21
+ --
22
+ === Current Focus
10
23
 
11
24
  The project is currently focusing on the following:
12
25
 
13
- * SEQUEST data (particularly the output of Bioworks 3.2-3.3)
26
+ * SEQUEST data (particularly the output of Bioworks 3.2-3.3.1)
14
27
  * mzXML
15
28
  * mzData
16
29
  * ProteinProphet
17
30
  * Preparation of files for [obiwarp](http://obi-warp.sourceforge.net/)
18
31
 
19
- Features
20
- --------
32
+ === Features
21
33
 
22
34
  * mzXML (version 1, 2, and 3) parsing
23
35
  * mzData parsing
@@ -37,31 +49,29 @@ Validation by:
37
49
  * Generic sample bias (e.g., low abundance/high abundance proteins)
38
50
  * Defined sample
39
51
 
40
- Spectra and Spectra Identification
41
- ----------------------------------
52
+ === Spectra and Spectra Identification
42
53
 
43
54
  The [MS](ms/index.html) namespace contains objects for working with mass spectra and associated file formats.
44
55
 
45
56
  The [SpecID](spec_id/index.html) namespace contains objects for working with spectral identifications.
46
57
 
47
- Tutorials
48
- ---------
58
+ === Tutorials
49
59
 
50
60
  * [Database Searching Tutorial](tutorial/database_searching/index.html) -
51
61
  Demonstrates two methods for running and analysing Bioworks output to obtain
52
62
  false positive rates using mspire executables.
63
+ ++
64
+
65
+ == Installation
53
66
 
54
- Warning
55
- -------
67
+ Mspire is available as a gem on RubyForge[http://rubyforge.org/projects/mspire]. Use:
68
+
69
+ % gem install mspire
70
+
71
+ = Warning
56
72
 
57
73
  This is an experimental package. As such, all versions prior to version 1.0
58
74
  may contain interface changes on minor revisions (major.minor.build) (e.g.,
59
75
  0.4.0 may contain interface change from 0.3.9). Beyond version 1.0, the
60
76
  versioning scheme will be strictly adhered to (no interface changes except on
61
77
  major revisions).
62
-
63
- Installation
64
- ------------
65
-
66
- see [Install](install/index.html)
67
-
data/changelog.txt CHANGED
@@ -1,5 +1,5 @@
1
1
 
2
- ## version 0.1.7
2
+ == version 0.1.7
3
3
 
4
4
  1. A couple of scripts and subroutines were hashing peptides but not on the file
5
5
  basename. This would result in slightly incorrect results (any time there
@@ -33,33 +33,33 @@ Rate' and 'FPR' from the package. It's been suggested that FP/(TP+FP) be
33
33
  called the False Positive Predictive Rate (FPPR). I will probably implement
34
34
  this in a future release.
35
35
 
36
- ## version 0.2.0
36
+ == version 0.2.0
37
37
 
38
38
  Revamped the way SpecID works (it is now mixed-in).
39
39
  Added support for modifications to bioworks_to_pepxml.rb
40
40
  Can read .srf files (nearly interchangeable with bioworks files)
41
41
  Redid filter.rb
42
42
 
43
- ## version 0.2.1
43
+ == version 0.2.1
44
44
 
45
45
  minor bugfix
46
46
 
47
- ## version 0.2.2
47
+ == version 0.2.2
48
48
 
49
49
  made compatible with Bioworks fasta file reverser and updated tutorial.
50
50
  Killed classify_by_prefix routine in favor of classify_by_false_flag which has
51
51
  a prefix option
52
52
 
53
- ## version 0.2.3
53
+ == version 0.2.3
54
54
 
55
55
  in protein_summary.rb added handling for proteins with no annotation. (either
56
56
  dispaly NA or use gi2annnot to grab them from NCBI)
57
57
 
58
- ## version 0.2.5
58
+ == version 0.2.5
59
59
 
60
60
  renamed prep_list in roc (potential breaks in code)
61
61
 
62
- ## version 0.2.6
62
+ == version 0.2.6
63
63
 
64
64
  1. Massive refactorization of filtering and validation. Validation objects are
65
65
  created and then can be used to validate just about anything.
@@ -75,105 +75,105 @@ appear to influence our analyses, however). Fixed.
75
75
  2. Enzymes with no exceptions (e.g., cuts at KR) would report one too many
76
76
  missed cleavages if the last amino acid was a cut point. Fixed.
77
77
 
78
- ## version 0.2.7
78
+ == version 0.2.7
79
79
 
80
80
  1. In conversion from bioworks to pepxml, the default was trypsin (KR/P).
81
81
  Now, the sample enzyme is set explicitly from the params file and the option
82
82
  is not available. This can give more accuract pepxml files than from
83
83
  previous depending on your enzyme.
84
84
 
85
- ## version 0.2.9
85
+ == version 0.2.9
86
86
 
87
87
  1. Added support for phobius transmembrane predictions
88
88
  2. have filter_and_validate.rb working well (multiple validators allowed).
89
89
  3. Can read bioworks 3.3.1 .srf files (.srf version 3.5 files)
90
90
  4. Added a bias validator
91
91
 
92
- ## version 0.2.10
92
+ == version 0.2.10
93
93
 
94
94
  1. Fixed --hits_separate flag in spec_id/filter
95
95
 
96
- ## version 0.2.11
96
+ == version 0.2.11
97
97
 
98
98
  1. Added prob precision support and reorganized filter_and_validate libs
99
99
 
100
- ## version 0.2.12
100
+ == version 0.2.12
101
101
 
102
102
  1. Fixed bug in transmem for prob and others.
103
103
  2. Can use axml (XMLParser based) or libxml depending on availability
104
104
 
105
- ## version 0.2.13
105
+ == version 0.2.13
106
106
 
107
107
  1. Fixed issue with --hits_separate
108
108
  2. filter_and_validate.rb requires decoy validator if decoy proteins
109
109
  (refactored code)
110
110
 
111
- ## version 0.2.14
111
+ == version 0.2.14
112
112
 
113
113
  1. Can read PeptideProphet files (should be able to read pepxml files, too)
114
114
  2. API change: Some slight modifications to the Sequest::PepXML object
115
115
  interfaces and implementations (using ArrayClass)
116
116
 
117
- ## version 0.2.15
117
+ == version 0.2.15
118
118
 
119
119
  1. can convert srf files to sqt files
120
120
 
121
- ## version 0.3.0
121
+ == version 0.3.0
122
122
 
123
123
  1. IMPORTANT BUG FIX: protein reporting in srf files is correct now (proteins after the first protein were being assigned to the last hit in an out file).
124
124
  2. SQT export is correct and works at least on 3.2 and 3.3.1.
125
125
 
126
- ## version 0.3.1
126
+ == version 0.3.1
127
127
 
128
128
  1. Bug fix in srf filtering (num_hits adjusted)
129
129
 
130
- ## version 0.3.2
130
+ == version 0.3.2
131
131
 
132
132
  1. Uses sequest peptide_mass_tolerance filter on srf group files by default
133
133
  now.
134
134
 
135
- ## version 0.3.3
135
+ == version 0.3.3
136
136
 
137
137
  1. Worked out minor kinks in prob_precision.rb
138
138
 
139
- ## version 0.3.4
139
+ == version 0.3.4
140
140
 
141
141
  1. filters >= +3 charged ions now.
142
142
 
143
- ## version 0.3.5
143
+ == version 0.3.5
144
144
 
145
145
  1. fixed creation of background distribution in validators (hash_by base_name,
146
146
  first_scan, charge now)
147
147
 
148
- ## version 0.3.6
148
+ == version 0.3.6
149
149
 
150
150
  1. split off bad_aa_est from bad_aa
151
151
 
152
- ## version 0.3.7
152
+ == version 0.3.7
153
153
 
154
154
  1. can deal with No_Enzyme searches now (while still capable of setting
155
155
  sample_enzyme)
156
156
 
157
- ## version 0.3.8
157
+ == version 0.3.8
158
158
 
159
159
  1. can set a decoy to target ratio for decoy validation
160
160
  2. added mass calculator in Mass::Calculator
161
161
 
162
- ## version 0.3.9
162
+ == version 0.3.9
163
163
 
164
164
  1. doesn't clobber mzdata filename in ms_to_lmat.rb conversion
165
165
 
166
- ## version 0.3.10
166
+ == version 0.3.10
167
167
 
168
168
  1. added run_percolator.rb script which makes running multiple files easy
169
169
 
170
- ## version 0.3.11
170
+ == version 0.3.11
171
171
 
172
172
  1. faster sensing of bad scan tags in mzXML v. 2.0 files
173
173
  2. implemented lazy evaluation of spectrum in 2 different ways allowing much
174
174
  larger files to be parsed
175
175
 
176
- ## version 0.4.0
176
+ == version 0.4.0
177
177
 
178
178
  1. ** INTERFACE CHANGE: each scan can only have one precursor (used to be an array)
179
179
  2. ** INTERFACE CHANGE: spectrum mz and intensity data accessed with mzs and intensities
@@ -182,46 +182,15 @@ larger files to be parsed
182
182
  eval methos (however, the method intensity_at_mz will still work (causing
183
183
  evaluation))
184
184
 
185
- ## version 0.4.1
185
+ == version 0.4.1
186
186
 
187
187
  1. added support for reading mzXML version 3.0 (may fail in some cases)
188
188
 
189
- ## version 0.4.2
189
+ == version 0.4.2
190
190
 
191
191
  1. added MS::MSRun.open method
192
192
  2. added method to write dta files from SRF
193
193
 
194
- ## version 0.4.3
194
+ == version 0.4.3
195
195
 
196
196
  1. added to_mfg_file from SRF
197
- 2. added to_dta_files from SRF complete with streaming .tar.gz output (and
198
- supporting .zip output but it has to make tmp files)
199
-
200
- ## version 0.4.4
201
- 1. implemented q-value and pi_0 methods of Storey
202
- 2. can do complete q-value calculations given p-values
203
- 3. can determine a pi_0 given a list of target and decoy values (as booleans)
204
- 4. can determine a pi_0 given a list containing numbers of decoy and target
205
- values as is often encountered with filtering
206
- 5. prob_validate.rb implements a q-value option for turning PeptideProphet
207
- probabilities into q-values
208
- 6. filter_validate.rb implements a p value method using xcorr values, however,
209
- this is not very effective since xcorr values underrepresent the the
210
- difference between good hits and bad hits
211
-
212
- ## version 0.4.5
213
- 1. using pi_zero instead of decoy_to_target_ratio. While all tests are
214
- passing, this release should be considered experimental with the use of any
215
- target-decoy validation.
216
-
217
- ## version 0.4.6
218
- 1. added NOTE to --to_qvalues option to include all results (no low prob
219
- filter)
220
-
221
- ## version 0.4.7
222
- 1. Added ability to quickly grab sequest params out of a .SRF file
223
- 2. Added helpful runtime error if print_duplicate_references is 0.
224
-
225
- ## version 0.4.9
226
- 1. quiet some unneeded output and fixed truncation of filenames with '.'
227
- inside them.
data/lib/ms/calc.rb ADDED
@@ -0,0 +1,32 @@
1
+ module Ms
2
+ module Calc
3
+ module_function
4
+
5
+ #
6
+ # ppm calculations... maybe use RUnit
7
+ #
8
+
9
+ def ppm_tol_at(mz, ppm)
10
+ 1.0 * mz * ppm / 10**6
11
+ end
12
+
13
+ def ppm_span_at(mz, ppm)
14
+ tol = ppm_tol_at(mz, ppm)
15
+ [mz-tol, mz+tol]
16
+ end
17
+
18
+ def ppm_range_at(mz, ppm)
19
+ mz = mz.to_f
20
+ tol = ppm_tol_at(mz, ppm)
21
+ mz-tol...mz+tol
22
+ end
23
+
24
+
25
+ # Rounds n to the specified precision (ie number of decimal places)
26
+ # def round(n, precision)
27
+ # factor = 10**precision.to_i
28
+ # (n * factor).round.to_f / factor
29
+ # end
30
+
31
+ end
32
+ end
@@ -0,0 +1,60 @@
1
+ require 'ms/data/simple'
2
+
3
+ module Ms
4
+ module Data
5
+ module_function
6
+
7
+ # Initializes a new interleaved data array.
8
+ def new_interleaved(unresolved_data, n=2)
9
+ Interleaved.new(unresolved_data, n=2)
10
+ end
11
+
12
+ # An Interleaved data array lazily evaluates it's unresolved data as
13
+ # an interleaved array of n members. The unresolved data is evaluated
14
+ # into an array using to_a.
15
+ #
16
+ # i = Ms::Data::Interleaved.new([1,4,2,5,3,6])
17
+ # i.unresolved_data # => [1,4,2,5,3,6]
18
+ # i.data # => []
19
+ # i[0] # => [1,2,3]
20
+ # i[1] # => [4,5,6]
21
+ # i.data # => [[1,2,3], [4,5,6]]
22
+ #
23
+ class Interleaved < Simple
24
+ attr_reader :n
25
+
26
+ def initialize(unresolved_data, n=2)
27
+ @n = 2
28
+ super(unresolved_data)
29
+ end
30
+
31
+ def [](index)
32
+ resolve.data[index]
33
+ end
34
+
35
+ def resolved?
36
+ !@data.empty?
37
+ end
38
+
39
+ def resolve
40
+ return(self) if resolved?
41
+
42
+ unresolved_data = @unresolved_data.to_a
43
+
44
+ unless unresolved_data.length % n == 0
45
+ raise ArgumentError, "interleaved data must have a number of elements evenly divisible by n (#{n})"
46
+ end
47
+
48
+ n.times { @data << [] }
49
+ map = @data * (unresolved_data.length/n)
50
+
51
+ unresolved_data.each_with_index do |item, i|
52
+ map[i] << item
53
+ end
54
+
55
+ self
56
+ end
57
+
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,73 @@
1
+ module Ms
2
+ module Data
3
+
4
+ # LazyIO represents data to be lazily read from an IO. To read the data
5
+ # from the IO, either string or to_a may be called (to_a unpacks the
6
+ # string into an array using the decode_format and unpack_format).
7
+ #
8
+ # LazyIO is a suitable unresolved_data source for Ms::Data formats.
9
+ class LazyIO
10
+ NETWORK_FLOAT = 'g*'
11
+ NETWORK_DOUBLE = 'G*'
12
+ LITTLE_ENDIAN_FLOAT = 'e*'
13
+ LITTLE_ENDIAN_DOUBLE = 'E*'
14
+ BASE_64 = 'm'
15
+
16
+ class << self
17
+ # Returns the unpacking code for the given precision (32 or 64-bit)
18
+ # and network order (true for big-endian).
19
+ def unpack_code(precision, network_order)
20
+ case precision
21
+ when 32 then network_order ? NETWORK_FLOAT : LITTLE_ENDIAN_FLOAT
22
+ when 64 then network_order ? NETWORK_DOUBLE : LITTLE_ENDIAN_DOUBLE
23
+ else raise ArgumentError, "unknown precision (should be 32 or 64): #{precision}"
24
+ end
25
+ end
26
+ end
27
+
28
+ # The IO from which string is read
29
+ attr_reader :io
30
+
31
+ # The start index for reading string
32
+ attr_reader :start_index
33
+
34
+ # The number of bytes to be read from io when evaluating string
35
+ attr_reader :num_bytes
36
+
37
+ # Indicates the unpacking format
38
+ attr_reader :unpack_format
39
+
40
+ # Indicates a decoding format, may be false to unpack string
41
+ # without decoding.
42
+ attr_reader :decode_format
43
+
44
+ def initialize(io, start_index=io.pos, num_bytes=nil, unpack_format=NETWORK_FLOAT, decode_format=BASE_64)
45
+ @io = io
46
+ @start_index = start_index
47
+ @num_bytes = num_bytes
48
+ @unpack_format = unpack_format
49
+ @decode_format = decode_format
50
+ end
51
+
52
+ # Positions io at start_index and reads a string of num_bytes length.
53
+ # The string is newly read from io each time string is called.
54
+ def string
55
+ io.pos = start_index unless io.pos == start_index
56
+ io.read(num_bytes)
57
+ end
58
+
59
+ # Resets the cached array (returned by to_a) so that the array will
60
+ # be re-read from io.
61
+ def reset
62
+ @array = nil
63
+ end
64
+
65
+ # Reads string and unpacks using decode_format and unpack_code. The
66
+ # array is cached internally; to re-read the array, use reset.
67
+ def to_a
68
+ @array ||= (decode_format ? string.unpack(decode_format)[0] : string).unpack(unpack_format)
69
+ end
70
+
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,15 @@
1
+ require 'ms/data/lazy_io'
2
+ require 'stringio'
3
+
4
+ module Ms
5
+ module Data
6
+
7
+ # LazyString is a LazyIO initialized from a string, which is converted into
8
+ # a StringIO.
9
+ class LazyString < LazyIO
10
+ def initialize(string, unpack_format=NETWORK_FLOAT, decode_format=BASE_64)
11
+ super(StringIO.new(string), 0, string.length, unpack_format, decode_format)
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,59 @@
1
+ module Ms
2
+ module Data
3
+ module_function
4
+
5
+ # Initializes a new simple data array.
6
+ def new_simple(unresolved_data)
7
+ Simple.new(unresolved_data)
8
+ end
9
+
10
+ # A Simple data array that lazily evaluates unresolved_data, and
11
+ # each member of unresolved_data using to_a:
12
+ #
13
+ # class LazyObject
14
+ # attr_reader :to_a
15
+ # def initialize(array)
16
+ # @to_a = array
17
+ # end
18
+ # end
19
+ #
20
+ # a = LazyObject.new([1,2,3])
21
+ # b = LazyObject.new([4,5,6])
22
+ # s = Ms::Data::Simple.new([a, b])
23
+ #
24
+ # s.unresolved_data # => [a, b]
25
+ # s.data # => []
26
+ # s[0] # => [1,2,3]
27
+ # s[1] # => [4,5,6]
28
+ # s.data # => [[1,2,3], [4,5,6]]
29
+ #
30
+ class Simple
31
+ # The underlying resolved data store.
32
+ attr_reader :data
33
+
34
+ # The underlying unresolved data store.
35
+ attr_reader :unresolved_data
36
+
37
+ def initialize(unresolved_data)
38
+ @data = []
39
+ @unresolved_data = unresolved_data
40
+ end
41
+
42
+ def [](index)
43
+ @data[index] ||= @unresolved_data.to_a[index].to_a
44
+ end
45
+
46
+ def resolve
47
+ 0.upto(@unresolved_data.length - 1) do |index|
48
+ self[index]
49
+ end unless resolved?
50
+
51
+ self
52
+ end
53
+
54
+ def resolved?
55
+ @data.compact.length == @unresolved_data.length
56
+ end
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,41 @@
1
+ require 'ms/data/simple'
2
+
3
+ module Ms
4
+ module Data
5
+ module_function
6
+
7
+ # Initializes a new transposed data array.
8
+ def new_transposed(unresolved_data)
9
+ Transposed.new(unresolved_data)
10
+ end
11
+
12
+ # A Transposed data array lazily evaluates it's unresolved data as
13
+ # a transposed array. The unresolved data is evaluated
14
+ # into an array using to_a.
15
+ #
16
+ # t = Ms::Data::Transposed.new([[1,4],[2,5],[3,6]])
17
+ #
18
+ # t.unresolved_data # => [[1,4],[2,5],[3,6]]
19
+ # t.data # => []
20
+ # t[0] # => [1,2,3]
21
+ # t[1] # => [4,5,6]
22
+ # t.data # => [[1,2,3], [4,5,6]]
23
+ #
24
+ class Transposed < Simple
25
+
26
+ def [](index)
27
+ resolve.data[index]
28
+ end
29
+
30
+ def resolved?
31
+ !@data.empty?
32
+ end
33
+
34
+ def resolve
35
+ @data = @unresolved_data.to_a.transpose unless resolved?
36
+ self
37
+ end
38
+
39
+ end
40
+ end
41
+ end
data/lib/ms/data.rb ADDED
@@ -0,0 +1,57 @@
1
+ require 'ms/data/interleaved'
2
+ require 'ms/data/transposed'
3
+
4
+ module Ms
5
+
6
+ # The Data module contains a number of classes providing a standard way to
7
+ # resolve various data storage formats into a 'simple' data array.
8
+ #
9
+ # type format
10
+ # simple [[mzs,...], [intensities...]]
11
+ # transposed [[mz,intensity], [mz,intensity], ...]
12
+ # interleaved [mz,intensity,mz,intensity,...]
13
+ #
14
+ # For instance:
15
+ #
16
+ # s = Data.new([[1,2,3], [4,5,6]], :simple)
17
+ # s.resolve.data # => [[1,2,3], [4,5,6]]
18
+ #
19
+ # t = Data.new([[1,4],[2,5],[3,6]], :transposed)
20
+ # t.resolve.data # => [[1,2,3], [4,5,6]]
21
+ #
22
+ # i = Data.new([1,4,2,5,3,6], :interleaved)
23
+ # i.resolve.data # => [[1,2,3], [4,5,6]]
24
+ #
25
+ # Data is always resolved by calling to_a on the unresolved data object
26
+ # and then rearranging as needed (in the case of simple data, to_a is
27
+ # also called on each member of the unresolved data array). This lazy
28
+ # resolution allows the use of non-array unresolved_data objects such
29
+ # as Data::LazyString:
30
+ #
31
+ # str = [[1,4,2,5,3,6].pack("g*")].pack("m")
32
+ # unresolved_data = Data::LazyString.new(str)
33
+ #
34
+ # i = Data.new(unresolved_data, :interleaved)
35
+ # i.resolve.data # => [[1,2,3], [4,5,6]]
36
+ #
37
+ # Obviously the big advantage of lazy data resolution is that Data objects
38
+ # may be instantiated cheaply while expensive operations like unpacking and
39
+ # rearrangement may be put off or not executed at all.
40
+ #
41
+ module Data
42
+ module_function
43
+
44
+ # Initializes a new data array of the specified type by forwarding
45
+ # data to the "new_<type>" method.
46
+ #
47
+ # simple = Ms::Data.new([[1,2,3], [4,5,6]], :simple)
48
+ # simple.class # => Ms::Data::Simple
49
+ #
50
+ # interleaved = Ms::Data.new([1,4,2,5,3,6], :interleaved)
51
+ # interleaved.class # => Ms::Data::Interleaved
52
+ #
53
+ def new(data, type=:simple)
54
+ send("new_#{type}", data)
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,12 @@
1
+ module Ms
2
+ module Format
3
+ class FormatError < Exception
4
+ attr_accessor :str
5
+
6
+ def initialize(msg, str)
7
+ super(msg)
8
+ @str = str
9
+ end
10
+ end
11
+ end
12
+ end