mspire 0.4.9 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (255) hide show
  1. data/README +27 -17
  2. data/changelog.txt +31 -62
  3. data/lib/ms/calc.rb +32 -0
  4. data/lib/ms/data/interleaved.rb +60 -0
  5. data/lib/ms/data/lazy_io.rb +73 -0
  6. data/lib/ms/data/lazy_string.rb +15 -0
  7. data/lib/ms/data/simple.rb +59 -0
  8. data/lib/ms/data/transposed.rb +41 -0
  9. data/lib/ms/data.rb +57 -0
  10. data/lib/ms/format/format_error.rb +12 -0
  11. data/lib/ms/spectrum.rb +25 -384
  12. data/lib/ms/support/binary_search.rb +126 -0
  13. data/lib/ms.rb +10 -10
  14. metadata +38 -350
  15. data/INSTALL +0 -58
  16. data/README.rdoc +0 -18
  17. data/Rakefile +0 -330
  18. data/bin/aafreqs.rb +0 -23
  19. data/bin/bioworks2excel.rb +0 -14
  20. data/bin/bioworks_to_pepxml.rb +0 -148
  21. data/bin/bioworks_to_pepxml_gui.rb +0 -225
  22. data/bin/fasta_shaker.rb +0 -5
  23. data/bin/filter_and_validate.rb +0 -5
  24. data/bin/gi2annot.rb +0 -14
  25. data/bin/id_class_anal.rb +0 -112
  26. data/bin/id_precision.rb +0 -172
  27. data/bin/ms_to_lmat.rb +0 -67
  28. data/bin/pepproph_filter.rb +0 -16
  29. data/bin/prob_validate.rb +0 -6
  30. data/bin/protein_summary.rb +0 -6
  31. data/bin/protxml2prots_peps.rb +0 -32
  32. data/bin/raw_to_mzXML.rb +0 -55
  33. data/bin/run_percolator.rb +0 -122
  34. data/bin/sqt_group.rb +0 -26
  35. data/bin/srf_group.rb +0 -27
  36. data/bin/srf_to_sqt.rb +0 -40
  37. data/lib/align/chams.rb +0 -78
  38. data/lib/align.rb +0 -154
  39. data/lib/archive/targz.rb +0 -94
  40. data/lib/bsearch.rb +0 -120
  41. data/lib/core_extensions.rb +0 -16
  42. data/lib/fasta.rb +0 -626
  43. data/lib/gi.rb +0 -124
  44. data/lib/group_by.rb +0 -10
  45. data/lib/index_by.rb +0 -11
  46. data/lib/merge_deep.rb +0 -21
  47. data/lib/ms/converter/mzxml.rb +0 -77
  48. data/lib/ms/gradient_program.rb +0 -170
  49. data/lib/ms/msrun.rb +0 -244
  50. data/lib/ms/msrun_index.rb +0 -108
  51. data/lib/ms/parser/mzdata/axml.rb +0 -67
  52. data/lib/ms/parser/mzdata/dom.rb +0 -175
  53. data/lib/ms/parser/mzdata/libxml.rb +0 -7
  54. data/lib/ms/parser/mzdata.rb +0 -31
  55. data/lib/ms/parser/mzxml/axml.rb +0 -70
  56. data/lib/ms/parser/mzxml/dom.rb +0 -182
  57. data/lib/ms/parser/mzxml/hpricot.rb +0 -253
  58. data/lib/ms/parser/mzxml/libxml.rb +0 -19
  59. data/lib/ms/parser/mzxml/regexp.rb +0 -122
  60. data/lib/ms/parser/mzxml/rexml.rb +0 -72
  61. data/lib/ms/parser/mzxml/xmlparser.rb +0 -248
  62. data/lib/ms/parser/mzxml.rb +0 -282
  63. data/lib/ms/parser.rb +0 -108
  64. data/lib/ms/precursor.rb +0 -25
  65. data/lib/ms/scan.rb +0 -81
  66. data/lib/mspire.rb +0 -4
  67. data/lib/pi_zero.rb +0 -244
  68. data/lib/qvalue.rb +0 -161
  69. data/lib/roc.rb +0 -187
  70. data/lib/sample_enzyme.rb +0 -160
  71. data/lib/scan_i.rb +0 -21
  72. data/lib/spec_id/aa_freqs.rb +0 -170
  73. data/lib/spec_id/bioworks.rb +0 -497
  74. data/lib/spec_id/digestor.rb +0 -138
  75. data/lib/spec_id/mass.rb +0 -179
  76. data/lib/spec_id/parser/proph.rb +0 -335
  77. data/lib/spec_id/precision/filter/cmdline.rb +0 -218
  78. data/lib/spec_id/precision/filter/interactive.rb +0 -134
  79. data/lib/spec_id/precision/filter/output.rb +0 -148
  80. data/lib/spec_id/precision/filter.rb +0 -637
  81. data/lib/spec_id/precision/output.rb +0 -60
  82. data/lib/spec_id/precision/prob/cmdline.rb +0 -160
  83. data/lib/spec_id/precision/prob/output.rb +0 -94
  84. data/lib/spec_id/precision/prob.rb +0 -249
  85. data/lib/spec_id/proph/pep_summary.rb +0 -104
  86. data/lib/spec_id/proph/prot_summary.rb +0 -484
  87. data/lib/spec_id/proph.rb +0 -4
  88. data/lib/spec_id/protein_summary.rb +0 -489
  89. data/lib/spec_id/sequest/params.rb +0 -316
  90. data/lib/spec_id/sequest/pepxml.rb +0 -1458
  91. data/lib/spec_id/sequest.rb +0 -33
  92. data/lib/spec_id/sqt.rb +0 -349
  93. data/lib/spec_id/srf.rb +0 -973
  94. data/lib/spec_id.rb +0 -778
  95. data/lib/spec_id_xml.rb +0 -99
  96. data/lib/transmem/phobius.rb +0 -147
  97. data/lib/transmem/toppred.rb +0 -368
  98. data/lib/transmem.rb +0 -157
  99. data/lib/validator/aa.rb +0 -48
  100. data/lib/validator/aa_est.rb +0 -112
  101. data/lib/validator/background.rb +0 -77
  102. data/lib/validator/bias.rb +0 -95
  103. data/lib/validator/cmdline.rb +0 -431
  104. data/lib/validator/decoy.rb +0 -107
  105. data/lib/validator/digestion_based.rb +0 -70
  106. data/lib/validator/probability.rb +0 -51
  107. data/lib/validator/prot_from_pep.rb +0 -234
  108. data/lib/validator/q_value.rb +0 -32
  109. data/lib/validator/transmem.rb +0 -272
  110. data/lib/validator/true_pos.rb +0 -46
  111. data/lib/validator.rb +0 -197
  112. data/lib/xml.rb +0 -38
  113. data/lib/xml_style_parser.rb +0 -119
  114. data/lib/xmlparser_wrapper.rb +0 -19
  115. data/release_notes.txt +0 -2
  116. data/script/compile_and_plot_smriti_final.rb +0 -97
  117. data/script/create_little_pepxml.rb +0 -61
  118. data/script/degenerate_peptides.rb +0 -47
  119. data/script/estimate_fpr_by_cysteine.rb +0 -226
  120. data/script/extract_gradient_programs.rb +0 -56
  121. data/script/find_cysteine_background.rb +0 -137
  122. data/script/genuine_tps_and_probs.rb +0 -136
  123. data/script/get_apex_values_rexml.rb +0 -44
  124. data/script/histogram_probs.rb +0 -61
  125. data/script/mascot_fix_pepxml.rb +0 -123
  126. data/script/msvis.rb +0 -42
  127. data/script/mzXML2timeIndex.rb +0 -25
  128. data/script/peps_per_bin.rb +0 -67
  129. data/script/prep_dir.rb +0 -121
  130. data/script/simple_protein_digestion.rb +0 -27
  131. data/script/smriti_final_analysis.rb +0 -103
  132. data/script/sqt_to_meta.rb +0 -24
  133. data/script/top_hit_per_scan.rb +0 -67
  134. data/script/toppred_to_yaml.rb +0 -47
  135. data/script/tpp_installer.rb +0 -249
  136. data/specs/align_spec.rb +0 -79
  137. data/specs/bin/bioworks_to_pepxml_spec.rb +0 -79
  138. data/specs/bin/fasta_shaker_spec.rb +0 -259
  139. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +0 -199
  140. data/specs/bin/filter_and_validate_spec.rb +0 -180
  141. data/specs/bin/ms_to_lmat_spec.rb +0 -34
  142. data/specs/bin/prob_validate_spec.rb +0 -86
  143. data/specs/bin/protein_summary_spec.rb +0 -14
  144. data/specs/fasta_spec.rb +0 -354
  145. data/specs/gi_spec.rb +0 -22
  146. data/specs/load_bin_path.rb +0 -7
  147. data/specs/merge_deep_spec.rb +0 -13
  148. data/specs/ms/gradient_program_spec.rb +0 -77
  149. data/specs/ms/msrun_spec.rb +0 -498
  150. data/specs/ms/parser_spec.rb +0 -92
  151. data/specs/ms/spectrum_spec.rb +0 -87
  152. data/specs/pi_zero_spec.rb +0 -115
  153. data/specs/qvalue_spec.rb +0 -39
  154. data/specs/roc_spec.rb +0 -251
  155. data/specs/rspec_autotest.rb +0 -149
  156. data/specs/sample_enzyme_spec.rb +0 -126
  157. data/specs/spec_helper.rb +0 -135
  158. data/specs/spec_id/aa_freqs_spec.rb +0 -52
  159. data/specs/spec_id/bioworks_spec.rb +0 -148
  160. data/specs/spec_id/digestor_spec.rb +0 -75
  161. data/specs/spec_id/precision/filter/cmdline_spec.rb +0 -20
  162. data/specs/spec_id/precision/filter/output_spec.rb +0 -31
  163. data/specs/spec_id/precision/filter_spec.rb +0 -246
  164. data/specs/spec_id/precision/prob_spec.rb +0 -44
  165. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  166. data/specs/spec_id/proph/pep_summary_spec.rb +0 -98
  167. data/specs/spec_id/proph/prot_summary_spec.rb +0 -128
  168. data/specs/spec_id/protein_summary_spec.rb +0 -189
  169. data/specs/spec_id/sequest/params_spec.rb +0 -68
  170. data/specs/spec_id/sequest/pepxml_spec.rb +0 -374
  171. data/specs/spec_id/sequest_spec.rb +0 -38
  172. data/specs/spec_id/sqt_spec.rb +0 -246
  173. data/specs/spec_id/srf_spec.rb +0 -172
  174. data/specs/spec_id/srf_spec_helper.rb +0 -139
  175. data/specs/spec_id_helper.rb +0 -33
  176. data/specs/spec_id_spec.rb +0 -366
  177. data/specs/spec_id_xml_spec.rb +0 -33
  178. data/specs/transmem/phobius_spec.rb +0 -425
  179. data/specs/transmem/toppred_spec.rb +0 -298
  180. data/specs/transmem_spec.rb +0 -60
  181. data/specs/transmem_spec_shared.rb +0 -64
  182. data/specs/validator/aa_est_spec.rb +0 -66
  183. data/specs/validator/aa_spec.rb +0 -40
  184. data/specs/validator/background_spec.rb +0 -67
  185. data/specs/validator/bias_spec.rb +0 -122
  186. data/specs/validator/decoy_spec.rb +0 -51
  187. data/specs/validator/fasta_helper.rb +0 -26
  188. data/specs/validator/prot_from_pep_spec.rb +0 -141
  189. data/specs/validator/transmem_spec.rb +0 -146
  190. data/specs/validator/true_pos_spec.rb +0 -58
  191. data/specs/validator_helper.rb +0 -33
  192. data/specs/xml_spec.rb +0 -12
  193. data/test_files/000_pepxml18_small.xml +0 -206
  194. data/test_files/020a.mzXML.timeIndex +0 -4710
  195. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +0 -3973
  196. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +0 -3872
  197. data/test_files/4-03-03_small-prot.xml +0 -321
  198. data/test_files/4-03-03_small.xml +0 -3876
  199. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  200. data/test_files/bioworks-3.3_10prots.xml +0 -5999
  201. data/test_files/bioworks31.params +0 -77
  202. data/test_files/bioworks32.params +0 -62
  203. data/test_files/bioworks33.params +0 -63
  204. data/test_files/bioworks_single_run_small.xml +0 -7237
  205. data/test_files/bioworks_small.fasta +0 -212
  206. data/test_files/bioworks_small.params +0 -63
  207. data/test_files/bioworks_small.phobius +0 -109
  208. data/test_files/bioworks_small.toppred.out +0 -2847
  209. data/test_files/bioworks_small.xml +0 -5610
  210. data/test_files/bioworks_with_INV_small.xml +0 -3753
  211. data/test_files/bioworks_with_SHUFF_small.xml +0 -2503
  212. data/test_files/corrupted_900.srf +0 -0
  213. data/test_files/head_of_7MIX.srf +0 -0
  214. data/test_files/interact-opd1_mods_small-prot.xml +0 -304
  215. data/test_files/messups.fasta +0 -297
  216. data/test_files/opd1/000.my_answer.100lines.xml +0 -101
  217. data/test_files/opd1/000.tpp_1.2.3.first10.xml +0 -115
  218. data/test_files/opd1/000.tpp_2.9.2.first10.xml +0 -126
  219. data/test_files/opd1/000.v2.1.mzXML.timeIndex +0 -3748
  220. data/test_files/opd1/000_020-prot.png +0 -0
  221. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +0 -62
  222. data/test_files/opd1/000_020_3prots-prot.xml +0 -62
  223. data/test_files/opd1/opd1_cat_inv_small-prot.xml +0 -139
  224. data/test_files/opd1/sequest.3.1.params +0 -77
  225. data/test_files/opd1/sequest.3.2.params +0 -62
  226. data/test_files/opd1/twenty_scans.mzXML +0 -418
  227. data/test_files/opd1/twenty_scans.v2.1.mzXML +0 -382
  228. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  229. data/test_files/opd1/twenty_scans_answ.lmata +0 -9
  230. data/test_files/opd1_020_beginning.RAW +0 -0
  231. data/test_files/opd1_2runs_2mods/data/020.mzData.xml +0 -683
  232. data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +0 -382
  233. data/test_files/opd1_2runs_2mods/data/040.mzData.xml +0 -683
  234. data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +0 -382
  235. data/test_files/opd1_2runs_2mods/data/README.txt +0 -6
  236. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +0 -753
  237. data/test_files/orbitrap_mzData/000_cut.xml +0 -1920
  238. data/test_files/pepproph_small.xml +0 -4691
  239. data/test_files/phobius.small.noheader.txt +0 -50
  240. data/test_files/phobius.small.small.txt +0 -53
  241. data/test_files/s01_anC1_ld020mM.key.txt +0 -25
  242. data/test_files/s01_anC1_ld020mM.meth +0 -0
  243. data/test_files/small.fasta +0 -297
  244. data/test_files/small.sqt +0 -87
  245. data/test_files/smallraw.RAW +0 -0
  246. data/test_files/tf_bioworks2excel.bioXML +0 -14340
  247. data/test_files/tf_bioworks2excel.txt.actual +0 -1035
  248. data/test_files/toppred.small.out +0 -416
  249. data/test_files/toppred.xml.out +0 -318
  250. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +0 -7
  251. data/test_files/validator_hits_separate/bioworks_small_HS.xml +0 -5651
  252. data/test_files/yeast_gly_small-prot.xml +0 -265
  253. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +0 -6
  254. data/test_files/yeast_gly_small.xml +0 -3807
  255. data/test_files/yeast_gly_small2.parentTimes +0 -6
data/lib/gi.rb DELETED
@@ -1,124 +0,0 @@
1
- require 'open-uri'
2
- require 'rexml/document'
3
- require 'rexml/streamlistener'
4
-
5
- $ANNOTS = []
6
-
7
- class GIListener
8
- include REXML
9
- include StreamListener
10
-
11
- attr_accessor :annotations
12
-
13
- def initialize
14
- @get_title = false
15
- @annotations = []
16
- end
17
-
18
- def tag_start(name, attributes)
19
- #puts "NAME" + name
20
- #p attributes
21
- if name == "Item" && attributes["Name"] == "Title"
22
- @get_title = true
23
- end
24
- end
25
-
26
- def text(text)
27
- #puts "TEXT: " + text + @get_title.to_s
28
- if @get_title
29
- #puts "GETTING TITLE!"
30
- @annotations.push text.chomp
31
- @get_title = false
32
- end
33
- end
34
-
35
- end
36
-
37
-
38
-
39
- class GI
40
- BATCH_SIZE = 500
41
- # takes an array of gi numbers and returns an array of annotation
42
- # This allows use of the batch search mode on NCBI
43
- # returns nil if no internet connection
44
- def self.gi2annot(list_of_gi_numbers)
45
- annots = []
46
- loop do
47
- batch = list_of_gi_numbers.slice!(0..BATCH_SIZE)
48
- if batch.size == 0 then break end
49
- string = batch.join(",")
50
- url = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=protein&retmode=xml&id=#{string}"
51
- #puts url
52
- begin
53
- open(url) do |handle|
54
- if handle.is_a? StringIO
55
- io_input = handle
56
- else
57
- io_input = handle.read
58
- end
59
- annots.push( *(parse_etool_output(io_input)) )
60
- end
61
- rescue SocketError
62
- return nil
63
- end
64
- end
65
- annots
66
- end
67
-
68
- protected
69
- # Returns a list of Annotation strings
70
- def self.parse_etool_output(handle)
71
- listener = GIListener.new
72
- parser = REXML::Parsers::StreamParser.new(handle, listener)
73
- parser.parse
74
- listener.annotations
75
- end
76
-
77
-
78
- end
79
-
80
-
81
-
82
- =begin
83
-
84
- <?xml version="1.0" encoding="ISO-8859-1"?>
85
- <!DOCTYPE eSummaryResult PUBLIC "-//NLM//DTD eSummaryResult, 11 May 2002//EN" "http://www.ncbi.nlm.nih.gov/entrez/query/DTD/eSummary_041029.dtd">
86
- <eSummaryResult>
87
-
88
- <DocSum>
89
- <Id>24115498</Id>
90
- <Item Name="Caption" Type="String">NP_710008</Item>
91
- <Item Name="Title" Type="String">chaperonin GroEL [Shigella flexneri 2a str. 301]</Item>
92
- <Item Name="Extra" Type="String">gi|24115498|ref|NP_710008.1|[24115498]</Item>
93
- <Item Name="Gi" Type="Integer">24115498</Item>
94
- <Item Name="CreateDate" Type="String">2002/10/16</Item>
95
-
96
- <Item Name="UpdateDate" Type="String">2006/04/03</Item>
97
- <Item Name="Flags" Type="Integer">512</Item>
98
- <Item Name="TaxId" Type="Integer">198214</Item>
99
- <Item Name="Status" Type="String">live</Item>
100
- <Item Name="ReplacedBy" Type="String"></Item>
101
- <Item Name="Comment" Type="String"><![CDATA[ ]]></Item>
102
- </DocSum>
103
-
104
-
105
- <DocSum>
106
- <Id>434011</Id>
107
- <Item Name="Caption" Type="String">CAA24741</Item>
108
-
109
- <Item Name="Title" Type="String">unnamed protein product [Escherichia coli]</Item>
110
- <Item Name="Extra" Type="String">gi|434011|emb|CAA24741.1|[434011]</Item>
111
- <Item Name="Gi" Type="Integer">434011</Item>
112
- <Item Name="CreateDate" Type="String">1983/12/06</Item>
113
- <Item Name="UpdateDate" Type="String">2005/04/18</Item>
114
- <Item Name="Flags" Type="Integer">0</Item>
115
- <Item Name="TaxId" Type="Integer">562</Item>
116
- <Item Name="Status" Type="String">live</Item>
117
- <Item Name="ReplacedBy" Type="String"></Item>
118
-
119
- <Item Name="Comment" Type="String"><![CDATA[ ]]></Item>
120
- </DocSum>
121
-
122
- </eSummaryResult>
123
-
124
- =end
data/lib/group_by.rb DELETED
@@ -1,10 +0,0 @@
1
-
2
- #taken from rails, will be in Ruby 1.9
3
- module Enumerable
4
- def group_by
5
- inject({}) do |groups, element|
6
- (groups[yield(element)] ||= []) << element
7
- groups
8
- end
9
- end
10
- end
data/lib/index_by.rb DELETED
@@ -1,11 +0,0 @@
1
-
2
- # taken from rails (will be in Ruby 1.9??)
3
-
4
- module Enumerable
5
- def index_by
6
- inject({}) do |accum, elem|
7
- accum[yield(elem)] = elem
8
- accum
9
- end
10
- end
11
- end
data/lib/merge_deep.rb DELETED
@@ -1,21 +0,0 @@
1
-
2
- class Hash
3
-
4
- # any hashes within the hash will also be merged to the level specifid
5
- def merge_deep(hash2, level=1)
6
- if level == 1
7
- tmp_opts = {}
8
- self.each do |k,v|
9
- if (v.is_a?(Hash) and hash2[k].is_a?(Hash))
10
- tmp_opts[k] = v.merge(hash2[k])
11
- end
12
- end
13
- opts = self.merge(hash2)
14
- opts.merge!(tmp_opts)
15
- opts
16
- else
17
- raise NotImplementedError, "need to implement level > 1"
18
- end
19
- end
20
- end
21
-
@@ -1,77 +0,0 @@
1
-
2
- module MS ; end
3
- module MS::Converter ; end
4
- module MS::Converter::MzXML
5
- Potential_mzxml_converters = %w(readw.exe readw t2x)
6
-
7
- # takes PT2.7500000S and returns it as 2.700000 (no PT or S)
8
- #def strip_time(time)
9
- # return time[2...-1]
10
- #end
11
-
12
- # first, converts backslash to forward slash in filename.
13
- # if .mzXML returns the filename
14
- # if .raw or .RAW converts the file to .mZXML and returns mzXML filename
15
- # if no recognized extension, looks for .mzXML file, then .RAW file (and
16
- # converts)
17
- # aborts if file was not able to be converted
18
- # returns nil if a file that can be converted or used was not found
19
- def self.file_to_mzxml(file)
20
- file.gsub!("\\",'/')
21
- old_file = file.dup
22
- if file =~ /\.mzXML$/
23
- return file
24
- elsif file =~ /\.RAW$/i
25
- old_file = file.dup
26
- ## t2x outputs in cwd (so go to the directory of the file!)
27
- dir = File.dirname(file)
28
- basename = File.basename(file)
29
- converter = MS::MzXML.find_mzxml_converter
30
- Dir.chdir(dir) do
31
- if converter =~ /readw/
32
- cmd = "#{converter} #{basename} c #{basename.sub(/\.RAW$/i, '.mzXML')}"
33
- else
34
- cmd = "#{converter} #{basename}"
35
- end
36
- #puts cmd
37
- #puts `#{cmd}`
38
- reply = `#{cmd}`
39
- puts reply if $VERBOSE
40
- end
41
- file.sub!(/\.RAW$/i, '.mzXML')
42
- unless File.exist? file
43
- abort "Couldn't convert #{old_file} to #{file}"
44
- end
45
- return file
46
- else
47
- if File.exist?( file + '.mzXML' )
48
- return file_to_mzxml(file + '.mzXML')
49
- elsif File.exist?( file + '.RAW' )
50
- return file_to_mzxml(file + '.RAW')
51
- elsif File.exist?( file + '.raw' )
52
- return file_to_mzxml(file + '.raw')
53
- else
54
- return nil
55
- end
56
- end
57
-
58
- end
59
-
60
-
61
- # Searchs each path element and returns the first one it finds
62
- # returns nil if none found
63
- def self.find_mzxml_converter
64
- ENV['PATH'].split(/[:;]/).each do |path|
65
- Dir.chdir(path) do
66
- Potential_mzxml_converters.each do |pc|
67
- if File.exist? pc
68
- return File.join(path, pc)
69
- end
70
- end
71
- end
72
- end
73
- nil
74
- end
75
-
76
- end
77
-
@@ -1,170 +0,0 @@
1
-
2
- # This is modeled after the Thermo gradient
3
- class GradientProgram
4
- attr_accessor :time_points
5
- attr_accessor :pump_type
6
- # array of solvents parallel to TimePoint percentages array
7
- attr_accessor :solvents
8
-
9
- def initialize(pump_type, time_points=[], solvents=[])
10
- @pump_type = pump_type
11
- @time_points = time_points
12
- @solvents = solvents
13
- end
14
-
15
- def ==(other)
16
- self.class == other.class and @pump_type==other.pump_type and @solvents == other.solvents and @time_points == other.time_points
17
- end
18
-
19
- # gets the first gradient program encountered in the filehandle
20
- def self.get_gradient_program(fh)
21
- thermo_newline = "\n\000"
22
- #gtable = "g\000r\000a\000d\000i\000e\000n\000t\000 \000t\000a\000b\000l\000e"
23
- gradient = "[Gg]\000r\000a\000d\000i\000e\000n\000t\000 \000"
24
-
25
-
26
- xcal_2x = gradient + "t\000a\000b\000l\000e\000:\000"
27
- xcal_1x = gradient + "P\000r\000o\000g\000r\000a\000m\000:\000"
28
- xcal_2x_regexp = Regexp.new(xcal_2x)
29
- xcal_1x_regexp = Regexp.new(xcal_1x)
30
- find_gtable_regexp = Regexp.new(gradient)
31
-
32
- found_one_2x = false
33
- found_one_1x = false
34
- pump_type = ''
35
- fh.each(thermo_newline) do |line|
36
- # first identify the line, then
37
- if line =~ find_gtable_regexp
38
- if line =~ xcal_1x_regexp
39
- pump_type = '' ## have to look way back in file for this
40
- found_one_1x = true
41
- break
42
- elsif line =~ xcal_2x_regexp
43
- grab_pump_type_regexp = /(.*) .g.r.a.d.i.e.n.t. .t.a.b.l.e/
44
- pump_type = read_thermo_string(grab_pump_type_regexp.match(line).captures[0])
45
- found_one_2x = true
46
- break
47
- end
48
- end
49
- end
50
- if found_one_2x
51
- fh.gets(thermo_newline) # nothing
52
- table_headers = fh.gets(thermo_newline)
53
- time_points = []
54
- while (line = fh.gets(thermo_newline)) != thermo_newline
55
- # 0 0.00 95.0 5.0 0.0 0.0 38.0 x
56
- # 1 1.00 90.0 10.0 0.0 0.0 38.0 o
57
-
58
- pieces = table_row_to_pieces(line, '2.0')
59
- time_points << TimePoint.new(pieces[1].to_f, pieces[6].to_f, pieces[2,4].map{|x| x.to_f })
60
- end
61
- GradientProgram.new(pump_type, time_points, %w(A B C D))
62
- elsif found_one_1x
63
- fh.gets(thermo_newline) # nothing
64
- table_headers = fh.gets(thermo_newline)
65
- time_points = []
66
- null_char_regexp = Regexp.new("^\000\000\000\000")
67
- while (line = fh.gets(thermo_newline)) !~ null_char_regexp
68
- pieces = table_row_to_pieces(line, '1.0')
69
- time_points << TimePoint.new(pieces[1].to_f, pieces[6].to_f, pieces[2,4].map{|x| x.to_f })
70
- end
71
- GradientProgram.new(pump_type, time_points, %w(A B C D))
72
- else
73
- nil
74
- end
75
- end
76
-
77
- # returns the elements of a gradient table row properly cast
78
- # NOTE: Xcal 2.X starts index with 0, 1.X starts with 1
79
- # (and this is how it will be returned!)
80
- # NOTE: Xcal 1.X will be shorter by one (doesn't have the o/x string!)
81
- # [(Int) index, time (Float), %A (Float), %B (Float), %C (Float), %D (Float),
82
- # FlowRate (Float), o/x (String)]
83
- def self.table_row_to_pieces(line,xcal_version='2.0')
84
- string = read_thermo_string(line)
85
- if xcal_version >= '2.0'
86
- # at first, I thought you could just split on spaces, but the table is
87
- # designed to have a certain number of chars per column padded with
88
- # spaces. This is hte way to do it.
89
- index = string[0,4].to_i
90
- (tm, a, b, c, d) = (0...5).to_a.map do |x|
91
- string[(x*6)+4,6].rstrip.to_f
92
- end
93
- fr = string[34,7].rstrip.to_f
94
- ox = string[41,4].rstrip
95
- [index, tm, a, b, c, d, fr, ox]
96
- else
97
- index = string[0,5].lstrip.to_i # correct
98
- tm = string[5,13].lstrip.to_f # correct
99
- #puts "**" + string[18,16] + "**"
100
- fr = string[18,16].lstrip.to_f
101
- (a,b,c,d) = (0..3).to_a.map do |x|
102
- string[(x*8)+34, 8].lstrip.to_f # correct
103
- end
104
- [index, tm, a, b, c, d, fr]
105
- end
106
- end
107
-
108
- # takes a filehandle
109
- # returns an array of gradient programs from a thermo filehandle.
110
- # Acceptable file types include a .meth file and a .raw file
111
- def self.all_from_handle(fh)
112
- # 0005340: 3000 2e00 3000 3000 0a00 0a00 5300 6100 0...0.0.....S.a.
113
- # 0005350: 6d00 7000 6c00 6500 2000 5000 7500 6d00 m.p.l.e. .P.u.m.
114
- # 0005360: 7000 2000 6700 7200 6100 6400 6900 6500 p. .g.r.a.d.i.e.
115
- # 0005370: 6e00 7400 2000 7400 6100 6200 6c00 6500 n.t. .t.a.b.l.e.
116
- # 0005380: 3a00 0a00 0a00 4e00 6f00 2e00 2000 5400 :.....N.o... .T.
117
- # 0005390: 6900 6d00 6500 2000 2000 4100 2500 2000 i.m.e. . .A.%. .
118
- # 00053a0: 2000 2000 2000 4200 2500 2000 2000 2000 . . .B.%. . . .
119
- # 00053b0: 2000 4300 2500 2000 2000 2000 2000 4400 .C.%. . . . .D.
120
- # 00053c0: 2500 2000 2000 2000 2000 b500 6c00 2f00 %. . . . ...l./.
121
- # 00053d0: 6d00 6900 6e00 2000 0a00 3000 2000 2000 m.i.n. ...0. . .
122
- # 00053e0: 2000 3000 2e00 3000 3000 2000 2000 3000 .0...0.0. . .0.
123
- # 00053f0: 2e00 3000 2000 2000 2000 3000 2e00 3000 ..0. . . .0...0.
124
- # 0005400: 2000 2000 2000 3100 3000 3000 2e00 3000 . . .1.0.0...0.
125
- programs = []
126
- while (gp = get_gradient_program(fh))
127
- programs << gp
128
- end
129
- programs
130
- end
131
-
132
- def self.read_thermo_string(string)
133
- chars = []
134
- (0...string.size).step(2) do |i|
135
- chars << string[i,1]
136
- end
137
- chars.join
138
- end
139
-
140
- def self.read_thermo_string_as_hex(string)
141
- chars = []
142
- (0...string.size).step(4) do |i|
143
- chars << string[i,2]
144
- end
145
- [chars.join].pack('H*')
146
- end
147
-
148
-
149
- end
150
-
151
- class GradientProgram::TimePoint
152
- # time in minutes
153
- attr_accessor :time
154
- # flow_rate in ul/min
155
- attr_accessor :flow_rate
156
- # percentages
157
- attr_accessor :percentages
158
-
159
- def initialize(time=nil, flow_rate=nil, percentages=[])
160
- @time = time
161
- @flow_rate = flow_rate
162
- @percentages = percentages
163
- end
164
-
165
- def ==(other)
166
- self.class == other.class and @time==other.time and @flow_rate == other.flow_rate and @percentages == other.percentages
167
- end
168
- end
169
-
170
-
data/lib/ms/msrun.rb DELETED
@@ -1,244 +0,0 @@
1
-
2
- require 'ms/scan'
3
- require 'ms/parser'
4
- require 'ms/msrun_index'
5
- require 'ms/converter/mzxml'
6
-
7
- #require 'ms/parser/mzxml'
8
- #require 'ms/parser/mzdata'
9
-
10
- module MS; end
11
- class MS::MSRun
12
-
13
- MSRunDefaultOpts = { :lazy => :string }
14
-
15
- attr_accessor :start_time, :end_time
16
- attr_accessor :scans
17
- # (just for reference) the type of file this is (as symbol)
18
- attr_accessor :filetype
19
- # (just for reference) the version string of this type of file
20
- attr_accessor :version
21
- # the total number of scans
22
- attr_writer :scan_count
23
-
24
-
25
- #### # [note: precursor intensities not guaranteed to exist unless :
26
- # TODO: may need to eliminate unavailable precursor intensities if they
27
- # doing lazy evaluation?? or it becomes lazy too??
28
-
29
- # OPTIONS:
30
- # :lazy => :string | :not | :no_spectra | :io
31
- # :string = (default) stores each spectrum as a base64 decoded
32
- # string that is further processed into Arrays of Floats when m/z
33
- # or intensity information is access. This lazy evaluation
34
- # should work on most files.
35
- # :not = all information is read into memory and parsed into
36
- # objects. Should only be used for small-medium files (< 80MB on
37
- # a machine with 2GB memory)
38
- # :no_spectra = if no peak information is required use this to
39
- # avoid the overhead of parsing and creating spectra.
40
- # :io = stores the io object and indices into spectrum data.
41
- # When spectral information is requested (m/z or intensity
42
- # information) then the spectrum is read from the io object and
43
- # evaluated (requires an open io object when spectrum information
44
- # is requested)
45
- def initialize(file_or_io=nil, opts={})
46
- if opts[:lazy] == :io
47
- if !file_or_io.is_a?(IO)
48
- raise ArgumentError, "Caller must provide an IO object (rather than filename) if using {:lazy => :io}"
49
- end
50
- end
51
- myopts = MSRunDefaultOpts.merge(opts)
52
- myopts[:msrun] = self
53
- if file_or_io
54
- filetype_and_version = MS::Parser.filetype_and_version(file_or_io)
55
- parser = MS::Parser.new(filetype_and_version, :msrun, myopts)
56
- parser.parse(file_or_io, myopts)
57
- #MS::Parser.new(filetype_and_version, :msrun).parse(file, myopts)
58
- (@filetype, @version) = filetype_and_version
59
- end
60
- end
61
-
62
- # This will automatically use :lazy => :io, open the file, and close it
63
- # after the block returns.
64
- # MS::MSRun.open("file.mzXML") do |ms|
65
- # ms.scans.each {|scan| ... do something }
66
- # end
67
- def self.open(filename, opts={})
68
- File.open(filename) do |fh|
69
- ms = MS::MSRun.new(fh, {:lazy => :io}.merge(opts))
70
- yield(ms)
71
- end
72
- end
73
-
74
- # returns an array, whose indices provide the number of scans in each index level the ms_levels, [0] = all the scans, [1] = mslevel 1, [2] = mslevel 2,
75
- # ...
76
- def scan_counts
77
- ar = []
78
- ar[0] = 0
79
- scans.each do |sc|
80
- level = sc.ms_level
81
- unless ar[level]
82
- ar[level] = 0
83
- end
84
- ar[level] += 1
85
- ar[0] += 1
86
- end
87
- ar
88
- end
89
-
90
- def scan_count(mslevel=0)
91
- if mslevel == 0
92
- @scan_count
93
- else
94
- num = 0
95
- scans.each do |sc|
96
- if sc.ms_level == mslevel
97
- num += 1
98
- end
99
- end
100
- num
101
- end
102
- end
103
-
104
- # for level 1, finds first scan and asks if it has start_mz/end_mz
105
- # attributes. for other levels, asks for start_mz/ end_mz and takes the
106
- # min/max. If start_mz and end_mz are not found, goes through every scan
107
- # finding the max/min first and last m/z. returns [start_mz (rounded down to
108
- # nearest int), end_mz (rounded up to nearest int)]
109
- def start_and_end_mz(mslevel=1)
110
- if mslevel == 1
111
- # special case for mslevel 1 (where we expect scans to be same length)
112
- scans.each do |sc|
113
- if sc.ms_level == mslevel
114
- if sc.start_mz && sc.end_mz
115
- return [sc.start_mz, sc.end_mz]
116
- end
117
- break
118
- end
119
- end
120
- end
121
- hi_mz = nil
122
- lo_mz = nil
123
- # see if we have start_mz and end_mz for the level we want
124
- # set the initial hi_mz and lo_mz in any case
125
- have_start_end_mz = false
126
- scans.each do |sc|
127
- if sc.ms_level == mslevel
128
- if sc.start_mz && sc.end_mz
129
- lo_mz = sc.start_mz
130
- hi_mz = sc.end_mz
131
- else
132
- mz_ar = sc.spectrum.mzs
133
- hi_mz = mz_ar.last
134
- lo_mz = mz_ar.first
135
- end
136
- break
137
- end
138
- end
139
- if have_start_end_mz
140
- scans.each do |sc|
141
- if sc.ms_level == mslevel
142
- if sc.start_mz < lo_mz
143
- lo_mz = sc.start_mz
144
- end
145
- if sc.end_mz > hi_mz
146
- hi_mz = sc.end_mz
147
- end
148
- end
149
- end
150
- else
151
- # didn't have the attributes (find by brute force)
152
- scans.each do |sc|
153
- if sc.ms_level == mslevel
154
- mz_ar = sc.spectrum.mzs
155
- if mz_ar.last > hi_mz
156
- hi_mz = mz_ar.last
157
- end
158
- if mz_ar.last < lo_mz
159
- lo_mz = mz_ar.last
160
- end
161
- end
162
- end
163
- end
164
- [lo_mz.floor, hi_mz.ceil]
165
- end
166
-
167
- # returns an array of precursor mz by scan number
168
- # returns only the m/z of the FIRST precursor if multiple
169
- def precursor_mz_by_scan_num
170
- ar = Array.new(@scans.size + 1)
171
- @scans.each do |scan|
172
- if prec = scan.precursor
173
- ar[scan.num] = prec.mz
174
- else
175
- ar[scan.num] = nil
176
- end
177
- end
178
- ar
179
- end
180
-
181
- # returns an array of times and parallel array of spectra objects.
182
- # ms_level = 0 then all spectra and times
183
- # ms_level = 1 then all spectra of ms_level 1
184
- def times_and_spectra(ms_level=0)
185
- spectra = []
186
- if ms_level == 0
187
- times = @scans.map do |scan|
188
- spectra << scan.spectrum
189
- scan.time
190
- end
191
- [times, spectra]
192
- else # choose a particular ms_level
193
- times = []
194
- @scans.each do |scan|
195
- if ms_level == scan.ms_level
196
- spectra << scan.spectrum
197
- times << scan.time
198
- end
199
- end
200
- [times, spectra]
201
- end
202
- end
203
-
204
- # same as the instance method (creates an object without spectrum and calls
205
- # instance method of the same name)
206
- def self.precursor_mz_by_scan_num(file)
207
- self.new(file, :lazy => :no_spectra, :fix_bad_tags => true).precursor_mz_by_scan_num
208
- end
209
-
210
- # only adds the parent if one is not already present!
211
- def self.add_parent_scan(scans, add_intensities=false)
212
- #start = Time.now
213
- prev_scan = nil
214
- parent_stack = [nil]
215
- ## we want to set the level to be the first mslevel we come to
216
- prev_level = scans.first.ms_level
217
- scans.each do |scan|
218
- #next unless scan ## the first one is nil, (others?)
219
- level = scan.ms_level
220
- if prev_level < level
221
- parent_stack.unshift prev_scan
222
- end
223
- if prev_level > level
224
- (prev_level - level).times do parent_stack.shift end
225
- end
226
- if scan.ms_level > 1
227
- precursor = scan.precursor
228
- #precursor.parent = parent_stack.first # that's the next line's
229
- precursor[2] = parent_stack.first unless precursor[2]
230
- #precursor.intensity
231
- if add_intensities
232
- precursor[1] = precursor[2].spectrum.intensity_at_mz(precursor[0])
233
- end
234
- end
235
- prev_level = level
236
- prev_scan = scan
237
- end
238
- #puts "TOOK #{Time.now - start} secs"
239
- end
240
-
241
- end
242
-
243
-
244
-