mspire 0.7.18 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/bin/mspire +5 -0
- data/lib/core_ext/enumerable.rb +8 -0
- data/lib/mspire/commandline.rb +39 -0
- data/lib/mspire/cv/paramable.rb +72 -35
- data/lib/mspire/imzml/writer/commandline.rb +16 -7
- data/lib/mspire/imzml/writer.rb +22 -14
- data/lib/mspire/mzml/activation.rb +0 -5
- data/lib/mspire/mzml/chromatogram.rb +41 -6
- data/lib/mspire/mzml/chromatogram_list.rb +2 -19
- data/lib/mspire/mzml/component.rb +28 -4
- data/lib/mspire/mzml/cv.rb +1 -0
- data/lib/mspire/mzml/data_array.rb +164 -154
- data/lib/mspire/mzml/data_array_container_like.rb +6 -13
- data/lib/mspire/mzml/data_processing.rb +19 -5
- data/lib/mspire/mzml/file_description.rb +22 -4
- data/lib/mspire/mzml/index.rb +53 -0
- data/lib/mspire/mzml/index_list.rb +64 -55
- data/lib/mspire/mzml/instrument_configuration.rb +22 -7
- data/lib/mspire/mzml/io_index.rb +79 -0
- data/lib/mspire/mzml/io_indexable_list.rb +71 -0
- data/lib/mspire/mzml/isolation_window.rb +0 -5
- data/lib/mspire/mzml/parser.rb +10 -0
- data/lib/mspire/mzml/plms1.rb +14 -24
- data/lib/mspire/mzml/precursor.rb +41 -19
- data/lib/mspire/mzml/processing_method.rb +34 -7
- data/lib/mspire/mzml/product.rb +14 -1
- data/lib/mspire/mzml/reader.rb +154 -0
- data/lib/mspire/mzml/referenceable_param_group.rb +9 -2
- data/lib/mspire/mzml/run.rb +62 -5
- data/lib/mspire/mzml/sample.rb +16 -6
- data/lib/mspire/mzml/scan.rb +31 -16
- data/lib/mspire/mzml/scan_list.rb +18 -5
- data/lib/mspire/mzml/scan_settings.rb +4 -5
- data/lib/mspire/mzml/scan_window.rb +0 -6
- data/lib/mspire/mzml/selected_ion.rb +1 -8
- data/lib/mspire/mzml/software.rb +9 -4
- data/lib/mspire/mzml/source_file.rb +8 -4
- data/lib/mspire/mzml/spectrum.rb +60 -35
- data/lib/mspire/mzml/spectrum_list.rb +5 -34
- data/lib/mspire/mzml.rb +72 -210
- data/lib/mspire/plms1.rb +3 -0
- data/spec/mspire/cv/paramable_spec.rb +3 -3
- data/spec/mspire/mzml/data_array_spec.rb +19 -6
- data/spec/mspire/mzml/file_content_spec.rb +1 -4
- data/spec/mspire/mzml/index_list_spec.rb +5 -12
- data/spec/mspire/mzml/plms1_spec.rb +5 -9
- data/spec/mspire/mzml/referenceable_param_group_spec.rb +3 -3
- data/spec/mspire/mzml/source_file_spec.rb +1 -2
- data/spec/mspire/mzml/spectrum_list_spec.rb +54 -0
- data/spec/mspire/mzml/spectrum_spec.rb +2 -4
- data/spec/mspire/mzml_spec.rb +241 -21
- data/spec/spec_helper.rb +1 -0
- data/spec/testfiles/mspire/mzml/1_BB7_SIM_478.5.mzML +103 -0
- data/spec/testfiles/mspire/mzml/j24z.idx_comp.3.mzML +6 -6
- metadata +14 -6
- data/bin/mzml_to_imzml +0 -9
- data/spec/mspire/mzml/file_description_spec.rb +0 -12
@@ -4,177 +4,187 @@ require 'mspire/cv/paramable'
|
|
4
4
|
|
5
5
|
module Mspire
|
6
6
|
class Mzml
|
7
|
-
|
8
|
-
|
9
|
-
include Mspire::CV::Paramable
|
10
|
-
alias_method :params_initialize, :initialize
|
11
|
-
alias_method :params_to_xml, :to_xml
|
12
|
-
|
13
|
-
DEFAULT_DTYPE = :float64
|
14
|
-
DEFAULT_COMPRESSION = true
|
15
|
-
DTYPE_TO_ACC = {
|
16
|
-
float64: 'MS:1000523',
|
17
|
-
float32: 'MS:1000521',
|
18
|
-
# float16: 'MS:1000520', # <- not supported w/o other gems
|
19
|
-
int64: 'MS:1000522', # signed
|
20
|
-
int32: 'MS:1000519', # signed
|
21
|
-
}
|
22
|
-
|
23
|
-
def type=(arg)
|
24
|
-
all_accs = %w(MS:1000514 MS:1000515)
|
25
|
-
params.delete_if {|param| all_accs.include?(param.accession) } if params
|
26
|
-
case arg
|
27
|
-
when :mz
|
28
|
-
describe! all_accs[0] # , nil, "MS:1000040"
|
29
|
-
when :intensity
|
30
|
-
describe! all_accs[1] # , nil, "MS:1000131"
|
31
|
-
end
|
32
|
-
arg
|
33
|
-
end
|
7
|
+
end
|
8
|
+
end
|
34
9
|
|
35
|
-
def type
|
36
|
-
if params
|
37
|
-
if params.any? {|param| param.accession == 'MS:1000514' }
|
38
|
-
:mz
|
39
|
-
elsif params.any? {|param| param.accession == 'MS:1000515' }
|
40
|
-
:intensity
|
41
|
-
end
|
42
|
-
end
|
43
|
-
end
|
44
10
|
|
45
|
-
|
46
|
-
|
47
|
-
|
11
|
+
class Mspire::Mzml::DataArray < Array
|
12
|
+
alias_method :array_init, :initialize
|
13
|
+
include Mspire::CV::Paramable
|
48
14
|
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
15
|
+
DEFAULT_DTYPE = :float64
|
16
|
+
DEFAULT_COMPRESSION = true
|
17
|
+
DTYPE_TO_ACC = {
|
18
|
+
float64: 'MS:1000523',
|
19
|
+
float32: 'MS:1000521',
|
20
|
+
# float16: 'MS:1000520', # <- not supported w/o other gems
|
21
|
+
int64: 'MS:1000522', # signed
|
22
|
+
int32: 'MS:1000519', # signed
|
23
|
+
}
|
24
|
+
TYPE_XML = {
|
25
|
+
mz: '<cvParam cvRef="MS" accession="MS:1000514" name="m/z array"/>',
|
26
|
+
intensity: '<cvParam cvRef="MS" accession="MS:1000515" name="intensity array"/>'
|
27
|
+
}
|
53
28
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
29
|
+
def initialize(*args)
|
30
|
+
params_init # paramable
|
31
|
+
array_init(*args)
|
32
|
+
end
|
33
|
+
|
34
|
+
# takes :mz or :intensity and sets the proper param among cvParams. Does not do
|
35
|
+
# referenceableParamGroup resolution.
|
36
|
+
def type=(symbol)
|
37
|
+
new_cv_params = []
|
38
|
+
already_present = false
|
39
|
+
cvs = ['MS:1000514', 'MS:1000515']
|
40
|
+
cvs.reverse! if symbol == :intensity
|
41
|
+
(keep, remove) = cvs
|
42
|
+
|
43
|
+
@cv_params.each do |param|
|
44
|
+
new_cv_params << param unless param.accession == remove
|
45
|
+
(already_present = true) if (param.accession == keep)
|
46
|
+
end
|
47
|
+
new_cv_params.push(Mspire::CV::Param[keep]) unless already_present
|
48
|
+
@cv_params = new_cv_params
|
49
|
+
symbol
|
50
|
+
end
|
51
|
+
|
52
|
+
# :mz or :intensity (or nil if none found)
|
53
|
+
def type
|
54
|
+
each_accessionable_param do |param|
|
55
|
+
return :mz if (param.accession == 'MS:1000514')
|
56
|
+
return :intensity if (param.accession == 'MS:1000515')
|
57
|
+
end
|
58
|
+
nil
|
59
|
+
end
|
60
|
+
|
61
|
+
# (optional) the DataProcessing object associated with this DataArray
|
62
|
+
attr_accessor :data_processing
|
63
|
+
|
64
|
+
# set this if the data is written to an external file (such as the ibd
|
65
|
+
# file for imzML files)
|
66
|
+
attr_accessor :external
|
67
|
+
|
68
|
+
def self.empty_data_arrays
|
69
|
+
[self.new, self.new]
|
70
|
+
end
|
71
|
+
|
72
|
+
def self.data_arrays_from_xml(xml, link)
|
73
|
+
data_arrays = xml.children.map do |binary_data_array_n|
|
74
|
+
Mspire::Mzml::DataArray.from_xml(binary_data_array_n, link)
|
75
|
+
end
|
76
|
+
(data_arrays.size > 0) ? data_arrays : empty_data_arrays
|
77
|
+
end
|
78
|
+
|
79
|
+
def self.from_xml(xml, link)
|
80
|
+
da = self.new
|
81
|
+
binary_n = da.describe_from_xml!(xml, link[:ref_hash])
|
82
|
+
|
83
|
+
if (dp_id = xml[:dataProcessingRef])
|
84
|
+
da.data_processing = link[:data_processing_hash][dp_id]
|
85
|
+
end
|
86
|
+
|
87
|
+
zlib_compression = nil
|
88
|
+
precision_unpack = nil
|
89
|
+
# could also implement with set or hash lookup (need to test for
|
90
|
+
# speed)
|
91
|
+
da.each_accessionable_param do |param|
|
92
|
+
acc = param.accession
|
93
|
+
unless zlib_compression || zlib_compression == false
|
94
|
+
case acc
|
95
|
+
when 'MS:1000574' then zlib_compression = true
|
96
|
+
when 'MS:1000576' then zlib_compression = false
|
90
97
|
end
|
91
|
-
data = base64.unpack("m*").first
|
92
|
-
# some implementations leave data blank if there aren't peaks
|
93
|
-
# even if they say it is zlib compressed...
|
94
|
-
unzipped =
|
95
|
-
if data.size > 0
|
96
|
-
compressed ? Zlib::Inflate.inflate(data) : data
|
97
|
-
else
|
98
|
-
data
|
99
|
-
end
|
100
|
-
self.new( unzipped.unpack(precision_unpack) )
|
101
98
|
end
|
99
|
+
unless precision_unpack
|
100
|
+
case acc
|
101
|
+
when 'MS:1000523' then precision_unpack = 'E*'
|
102
|
+
when 'MS:1000521' then precision_unpack = 'e*'
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
102
106
|
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
107
|
+
data = binary_n.text.unpack("m*").first
|
108
|
+
|
109
|
+
# some implementations leave data blank if there aren't peaks
|
110
|
+
# even if they say it is zlib compressed...
|
111
|
+
unzipped =
|
112
|
+
if data.size > 0 then ( zlib_compression ? Zlib::Inflate.inflate(data) : data )
|
113
|
+
else data end
|
114
|
+
da.replace( unzipped.unpack(precision_unpack) )
|
115
|
+
da
|
116
|
+
end
|
117
|
+
|
118
|
+
# returns a base64 string that can be used for xml representations of
|
119
|
+
# the data
|
120
|
+
#
|
121
|
+
# args:
|
122
|
+
# array-like set-like # where set-like responds to include?
|
123
|
+
# array-like dtype=:float64, compression=true
|
124
|
+
def self.to_binary(array_ish, *args)
|
125
|
+
if args.first.respond_to?(:include?)
|
126
|
+
accessions = args.first
|
127
|
+
dtype =
|
128
|
+
if accessions.include?('MS:1000521')
|
129
|
+
:float32
|
119
130
|
else
|
120
|
-
|
121
|
-
compression = args[1] || DEFAULT_COMPRESSION
|
131
|
+
:float64
|
122
132
|
end
|
133
|
+
compression = accessions.include?('MS:1000576') ? false : true
|
134
|
+
else
|
135
|
+
dtype = args[0] || DEFAULT_DTYPE
|
136
|
+
compression = args[1] || DEFAULT_COMPRESSION
|
137
|
+
end
|
123
138
|
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
end
|
132
|
-
# TODO: support faster pack method for NArray's in future
|
133
|
-
string = array_ish.to_a.pack(pack_code)
|
134
|
-
string = Zlib::Deflate.deflate(string) if compression
|
135
|
-
Base64.strict_encode64(string)
|
139
|
+
pack_code =
|
140
|
+
case dtype
|
141
|
+
when :float64 ; 'E*'
|
142
|
+
when :float32 ; 'e*'
|
143
|
+
when :int64 ; 'q<*'
|
144
|
+
when :int32 ; 'l<*'
|
145
|
+
else ; raise "unsupported dtype: #{dtype}"
|
136
146
|
end
|
147
|
+
# TODO: support faster pack method for NArray's in future
|
148
|
+
string = array_ish.to_a.pack(pack_code)
|
149
|
+
string = Zlib::Deflate.deflate(string) if compression
|
150
|
+
Base64.strict_encode64(string)
|
151
|
+
end
|
137
152
|
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
153
|
+
# calls the class to_binary method with self and the given args
|
154
|
+
def to_binary(*args)
|
155
|
+
self.class.to_binary(self, *args)
|
156
|
+
end
|
142
157
|
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
end
|
151
|
-
|
152
|
-
builder.binaryDataArray(encodedLength: encoded_length) do |bda_n|
|
153
|
-
params_to_xml(bda_n)
|
154
|
-
unless self.external
|
155
|
-
Mspire::CV::Param[ DTYPE_TO_ACC[dtype] ].to_xml(bda_n)
|
156
|
-
Mspire::CV::Param[ compression ? 'MS:1000574' : 'MS:1000576' ].to_xml(bda_n)
|
157
|
-
bda_n.binary(base64)
|
158
|
-
end
|
159
|
-
end
|
158
|
+
def to_xml(builder, dtype=DEFAULT_DTYPE, compression=DEFAULT_COMPRESSION)
|
159
|
+
encoded_length =
|
160
|
+
if @external
|
161
|
+
0
|
162
|
+
else
|
163
|
+
base64 = self.class.to_binary(self, dtype, compression)
|
164
|
+
base64.bytesize
|
160
165
|
end
|
161
166
|
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
else
|
170
|
-
Mspire::Mzml::DataArray.new(data_ar)
|
171
|
-
end
|
172
|
-
ar.type = typ unless ar.type
|
173
|
-
ar.to_xml(bdal_n)
|
174
|
-
end
|
175
|
-
end
|
167
|
+
builder.binaryDataArray(encodedLength: encoded_length) do |bda_n|
|
168
|
+
super(bda_n)
|
169
|
+
unless self.external
|
170
|
+
# can significantly speed up the below 2 lines:
|
171
|
+
Mspire::CV::Param[ DTYPE_TO_ACC[dtype] ].to_xml(bda_n)
|
172
|
+
Mspire::CV::Param[ compression ? 'MS:1000574' : 'MS:1000576' ].to_xml(bda_n)
|
173
|
+
bda_n.binary(base64)
|
176
174
|
end
|
175
|
+
end
|
176
|
+
end
|
177
177
|
|
178
|
+
# takes an array of DataArray objects or other kinds of objects
|
179
|
+
def self.list_xml(arrays, builder)
|
180
|
+
builder.binaryDataArrayList(count: arrays.size) do |bdal_n|
|
181
|
+
arrays.zip([:mz, :intensity]) do |data_ar, typ|
|
182
|
+
ar =
|
183
|
+
if data_ar.is_a?(Mspire::Mzml::DataArray) then data_ar
|
184
|
+
else Mspire::Mzml::DataArray.new(data_ar) end
|
185
|
+
ar.type = typ unless ar.type
|
186
|
+
ar.to_xml(bdal_n)
|
187
|
+
end
|
178
188
|
end
|
179
189
|
end
|
180
190
|
end
|
@@ -38,21 +38,14 @@ module Mspire
|
|
38
38
|
end
|
39
39
|
end
|
40
40
|
|
41
|
-
#
|
42
|
-
#
|
43
|
-
|
44
|
-
def to_xml(builder, opts={}, &block)
|
41
|
+
# returns a hash with id, index, defaultArrayLength and the proper
|
42
|
+
# dataProcessing attributes filled out.
|
43
|
+
def data_array_xml_atts(default_ids)
|
45
44
|
atts = {id: @id, index: @index, defaultArrayLength: default_array_length}
|
46
|
-
|
47
|
-
|
48
|
-
raise "#{self.class} object must have index at xml writing time!" unless atts[:index]
|
49
|
-
|
50
|
-
builder.spectrum(atts) do |sp_n|
|
51
|
-
super(sp_n) # params
|
52
|
-
block.call(sp_n) if block
|
53
|
-
Mspire::Mzml::DataArray.list_xml(@data_arrays, sp_n) if @data_arrays
|
45
|
+
if @data_processing && default_ids[:data_processing] != @data_processing.id
|
46
|
+
atts[:dataProcessingRef] = @data_processing.id
|
54
47
|
end
|
55
|
-
|
48
|
+
atts
|
56
49
|
end
|
57
50
|
|
58
51
|
end
|
@@ -1,27 +1,41 @@
|
|
1
1
|
require 'mspire/mzml/list'
|
2
|
+
require 'mspire/mzml/processing_method'
|
2
3
|
|
3
4
|
module Mspire
|
4
5
|
class Mzml
|
5
6
|
class DataProcessing
|
7
|
+
extend Mspire::Mzml::List
|
6
8
|
|
7
9
|
attr_accessor :id, :processing_methods
|
8
10
|
|
9
11
|
# yields self if given a block
|
10
|
-
def initialize(id, processing_methods=[]
|
12
|
+
def initialize(id, processing_methods=[])
|
11
13
|
@id, @processing_methods = id, processing_methods
|
12
|
-
|
14
|
+
yield(self) if block_given?
|
13
15
|
end
|
14
16
|
|
15
17
|
def to_xml(builder)
|
16
18
|
builder.dataProcessing( id: @id ) do |dp_n|
|
17
|
-
processing_methods.
|
18
|
-
|
19
|
+
processing_methods.each_with_index do |processing_method,order|
|
20
|
+
processing_method.to_xml(dp_n, order)
|
19
21
|
end
|
20
22
|
end
|
21
23
|
builder
|
22
24
|
end
|
23
25
|
|
24
|
-
|
26
|
+
# returns the order of the processing method
|
27
|
+
def order(processing_method)
|
28
|
+
processing_methods.index(processing_method)
|
29
|
+
end
|
30
|
+
|
31
|
+
def self.from_xml(xml, link)
|
32
|
+
processing_methods = xml.children.map do |pm_n|
|
33
|
+
ProcessingMethod.new(link[:software_hash][pm_n[:softwareRef]])
|
34
|
+
.describe_self_from_xml!(pm_n, link[:ref_hash])
|
35
|
+
end
|
36
|
+
self.new(xml[:id], processing_methods)
|
37
|
+
end
|
38
|
+
|
25
39
|
end
|
26
40
|
end
|
27
41
|
end
|
@@ -5,6 +5,7 @@ require 'mspire/mzml/contact'
|
|
5
5
|
module Mspire
|
6
6
|
class Mzml
|
7
7
|
class FileDescription
|
8
|
+
# note: FileDescription is NOT paramable!
|
8
9
|
|
9
10
|
# a summary of the different types of spectra, must be present
|
10
11
|
attr_accessor :file_content
|
@@ -21,14 +22,31 @@ module Mspire
|
|
21
22
|
attr_accessor :contacts
|
22
23
|
|
23
24
|
# hands the user the object if given a block
|
24
|
-
def initialize(file_content=nil, source_files=[], contacts=[]
|
25
|
+
def initialize(file_content=nil, source_files=[], contacts=[])
|
25
26
|
@file_content, @source_files, @contacts = file_content, source_files, contacts
|
26
|
-
|
27
|
+
yield(self) if block_given?
|
27
28
|
#raise ArgumentError, "FileDescription must have file_content" unless @file_content
|
28
29
|
end
|
29
30
|
|
30
|
-
def self.from_xml(xml)
|
31
|
-
|
31
|
+
def self.from_xml(xml, link)
|
32
|
+
ref_hash = link[:ref_hash]
|
33
|
+
file_content_n = xml.child
|
34
|
+
obj = self.new( Mspire::Mzml::FileContent.new.describe_self_from_xml!(file_content_n, ref_hash) )
|
35
|
+
|
36
|
+
return obj unless next_n = file_content_n.next
|
37
|
+
|
38
|
+
if next_n.name == 'sourceFileList'
|
39
|
+
obj.source_files = next_n.children.map do |source_file_n|
|
40
|
+
Mspire::Mzml::SourceFile.from_xml(source_file_n, ref_hash)
|
41
|
+
end
|
42
|
+
return obj unless next_n = next_n.next
|
43
|
+
end
|
44
|
+
|
45
|
+
loop do
|
46
|
+
obj.contacts << Mspire::Mzml::Contact.from_xml(contact_n, ref_hash)
|
47
|
+
break unless contact_n = contact_n.next
|
48
|
+
end
|
49
|
+
obj
|
32
50
|
end
|
33
51
|
|
34
52
|
def to_xml(builder)
|
@@ -0,0 +1,53 @@
|
|
1
|
+
|
2
|
+
module Mspire
|
3
|
+
class Mzml
|
4
|
+
# the array holds start bytes
|
5
|
+
class Index < Array
|
6
|
+
|
7
|
+
# the name of the index (as a symbol)
|
8
|
+
attr_accessor :name
|
9
|
+
|
10
|
+
# a parallel array of ids (idRef's)
|
11
|
+
attr_accessor :ids
|
12
|
+
|
13
|
+
# @return [Integer] the start byte of the spectrum
|
14
|
+
# @param [Object] an Integer (the index number) or String (an id string)
|
15
|
+
def start_byte(arg)
|
16
|
+
case arg
|
17
|
+
when Integer
|
18
|
+
self[arg]
|
19
|
+
when String
|
20
|
+
@id_index ||= create_id_index
|
21
|
+
@id_index[arg]
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
# generates a scan to index hash that points from scan number to the
|
26
|
+
# spectrum index number. returns the index, nil if the scan ids
|
27
|
+
# are not present and spectra are, or false if they are not unique.
|
28
|
+
def create_scan_to_index
|
29
|
+
scan_re = /scan=(\d+)/
|
30
|
+
scan_to_index = {}
|
31
|
+
ids.each_with_index do |id, index|
|
32
|
+
md = id.match(scan_re)
|
33
|
+
scan_num = md[1].to_i if md
|
34
|
+
if scan_num
|
35
|
+
if scan_to_index.key?(scan_num)
|
36
|
+
return false
|
37
|
+
else
|
38
|
+
scan_to_index[scan_num] = index
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
if scan_to_index.size > 0
|
43
|
+
scan_to_index
|
44
|
+
elsif ids.size > 0
|
45
|
+
nil # there are scans, but we did not find scan numbers
|
46
|
+
else
|
47
|
+
scan_to_index
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
@@ -1,10 +1,12 @@
|
|
1
|
+
require 'mspire/mzml/index'
|
2
|
+
|
1
3
|
module Mspire
|
2
4
|
class Mzml
|
3
5
|
# A simple array of indices but #[] has been overloaded to find an index
|
4
6
|
# by name
|
5
7
|
#
|
6
8
|
# index_list[0] # the first index
|
7
|
-
# index_list.map(&:
|
9
|
+
# index_list.map(&:name) # -> [:spectrum, :chromatogram]
|
8
10
|
# index_list[:spectrum] # the spectrum index
|
9
11
|
# index_list[:chromatogram] # the chromatogram index
|
10
12
|
class IndexList < Array
|
@@ -20,12 +22,24 @@ module Mspire
|
|
20
22
|
self.find {|index| index.name == int_or_symbol }
|
21
23
|
end
|
22
24
|
end
|
23
|
-
end
|
24
25
|
|
25
|
-
|
26
|
-
|
26
|
+
def keys
|
27
|
+
self.map(&:name)
|
28
|
+
end
|
29
|
+
|
30
|
+
# returns each name and associated index object
|
31
|
+
def each_pair(&block)
|
32
|
+
block or return enum_for __method__
|
33
|
+
each {|index| block.call([index.name, index]) }
|
34
|
+
end
|
27
35
|
|
28
36
|
class << self
|
37
|
+
|
38
|
+
# either reads in from file or creates an IndexList
|
39
|
+
def from_io(io)
|
40
|
+
read_index_list(io) || create_index_list(io)
|
41
|
+
end
|
42
|
+
|
29
43
|
# returns an Integer or nil if not found
|
30
44
|
# does a single jump backwards from the tail of the file looking for
|
31
45
|
# an xml element based on tag. If it is not found, returns nil
|
@@ -35,64 +49,59 @@ module Mspire
|
|
35
49
|
md = io.readlines("\n").map {|line| line.match(tag_re) }.compact.shift
|
36
50
|
md[1].to_i if md
|
37
51
|
end
|
38
|
-
end
|
39
|
-
|
40
|
-
# an index indexed by scan number
|
41
|
-
attr_accessor :by_scans
|
42
|
-
|
43
|
-
# the name of the index (as a symbol)
|
44
|
-
attr_accessor :name
|
45
|
-
|
46
|
-
# a parallel array of ids (idRef's)
|
47
|
-
attr_accessor :ids
|
48
|
-
|
49
|
-
def start_byte_and_id(int)
|
50
|
-
[self[int], ids[int]]
|
51
|
-
end
|
52
|
-
|
53
|
-
# returns hash of id to start_byte
|
54
|
-
def create_id_index
|
55
|
-
Hash[self.ids.zip(self)]
|
56
|
-
end
|
57
52
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
53
|
+
# @return [Mspire::Mzml::IndexList] or nil if there is no indexList in the
|
54
|
+
# mzML
|
55
|
+
def read_index_list(io)
|
56
|
+
if (offset = index_offset(io))
|
57
|
+
io.seek(offset)
|
58
|
+
xml = Nokogiri::XML.parse(io.read, nil, @encoding, Parser::NOBLANKS)
|
59
|
+
index_list = xml.root
|
60
|
+
num_indices = index_list['count'].to_i
|
61
|
+
array = index_list.children.map do |index_n|
|
62
|
+
#index = Index.new(index_n['name'])
|
63
|
+
index = Index.new
|
64
|
+
index.name = index_n['name'].to_sym
|
65
|
+
ids = []
|
66
|
+
index_n.children.map do |offset_n|
|
67
|
+
index << offset_n.text.to_i
|
68
|
+
ids << offset_n['idRef']
|
69
|
+
end
|
70
|
+
index.ids = ids
|
71
|
+
index
|
72
|
+
end
|
73
|
+
IndexList.new(array)
|
74
|
+
end
|
67
75
|
end
|
68
|
-
end
|
69
76
|
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
return false
|
82
|
-
else
|
83
|
-
scan_to_index[scan_num] = index
|
77
|
+
# Reads through and captures start bytes
|
78
|
+
# @return [Mspire::Mzml::IndexList]
|
79
|
+
def create_index_list(io)
|
80
|
+
indices_hash = io.bookmark(true) do |inner_io| # sets to beginning of file
|
81
|
+
indices = {:spectrum => {}, :chromatogram => {}}
|
82
|
+
byte_total = 0
|
83
|
+
io.each do |line|
|
84
|
+
if md=%r{<(spectrum|chromatogram).*?id=['"](.*?)['"][ >]}.match(line)
|
85
|
+
indices[md[1].to_sym][md[2]] = byte_total + md.pre_match.bytesize
|
86
|
+
end
|
87
|
+
byte_total += line.bytesize
|
84
88
|
end
|
89
|
+
indices
|
85
90
|
end
|
91
|
+
|
92
|
+
indices = indices_hash.map do |sym, hash|
|
93
|
+
indices = Index.new ; ids = []
|
94
|
+
hash.each {|id, startbyte| ids << id ; indices << startbyte }
|
95
|
+
indices.ids = ids ; indices.name = sym
|
96
|
+
indices
|
97
|
+
end
|
98
|
+
# we only return an index if there were some guys there
|
99
|
+
indices.delete_if {|ind| ind.size == 0 }
|
100
|
+
IndexList.new(indices)
|
86
101
|
end
|
87
|
-
|
88
|
-
by_scans = scan_to_index
|
89
|
-
elsif ids.size > 0
|
90
|
-
nil # there are scans, but we did not find scan numbers
|
91
|
-
else
|
92
|
-
scan_to_index
|
93
|
-
end
|
102
|
+
|
94
103
|
end
|
95
104
|
end
|
96
105
|
end
|
97
|
-
end
|
98
106
|
|
107
|
+
end
|