imzml 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,332 @@
1
+ require "ox"
2
+
3
+ module ImzML
4
+
5
+ class Parser
6
+
7
+ attr_reader :metadata
8
+
9
+ def initialize(filepath)
10
+
11
+ sax = ImzML::Sax.new
12
+ Ox.sax_parse(sax, File.open(filepath))
13
+ @metadata = sax.metadata
14
+
15
+ end
16
+
17
+ end
18
+
19
+ class Sax < ::Ox::Sax
20
+
21
+ attr_reader :metadata
22
+
23
+ def initialize()
24
+ @metadata = Metadata.new
25
+ @stack = Array.new
26
+ @elements = Array.new
27
+
28
+ # temporary values useful just for data parsing
29
+ @reference_groups = Hash.new
30
+ @obo = Hash.new
31
+ end
32
+
33
+ def start_element(name)
34
+ # p "#{@stack.last} #{@elements.last}"
35
+ @stack.push(name)
36
+ @elements.push(Hash.new)
37
+ # p "#{name} started"
38
+ # p @stack
39
+
40
+ case name
41
+ when :cvList
42
+ @cv_list = []
43
+ when :cv
44
+ @cv_list << @elements.last
45
+ end
46
+ end
47
+
48
+ def attr(name, str)
49
+ element = @elements.last
50
+ return if element.nil? # skip attributes without correct elements (like <?xml ...)
51
+ element[name] = str
52
+ end
53
+
54
+ def end_element(name)
55
+ @stack.pop
56
+ element = @elements.pop
57
+ # p @stack
58
+
59
+ # open OBO file for each from CV list for further validation
60
+ if name == :cvList
61
+ @cv_list.each do |cv|
62
+ filename = case cv[:id]
63
+ when "MS"
64
+ "psi-ms.obo"
65
+ when "UO"
66
+ "unit.obo"
67
+ when "IMS"
68
+ "imagingMS.obo"
69
+ end
70
+ filepath = File.join(File.dirname(__FILE__), "..", "..", "data", filename)
71
+ @obo[cv[:id].to_s] = Obo::Parser.new(filepath)
72
+ end
73
+ end
74
+
75
+ # save file content
76
+ if name == :cvParam && @stack.last == :fileContent
77
+
78
+ cv = @obo[element[:cvRef]]
79
+ stanza = cv.stanza(element[:accession])
80
+ parent_id = stanza.parent_id
81
+
82
+ # init basic structures
83
+ @metadata.file_description ||= FileDescription.new
84
+ file_content = (@metadata.file_description.file_content ||= FileContent.new)
85
+
86
+ case element[:cvRef]
87
+ when "MS"
88
+ # save data file content
89
+ if parent_id == FileContent::DATA_FILE_CONTENT
90
+ file_content.data_file_contents ||= Hash.new
91
+ (file_content.data_file_contents[parent_id] ||= Array.new) << element
92
+ end
93
+
94
+ # save spectrum representation
95
+ if parent_id == FileContent::SPECTRUM_REPRESENTATION
96
+ file_content.spectrum_representation = element
97
+ end
98
+
99
+ when "IMS"
100
+ # save binary type (cannot look by parent because the OBO file is different and
101
+ # the parser doesn't hadle it well, need to first improve the OBO parser)
102
+ if stanza.id == FileContent::CONTINUOUS
103
+ file_content.binary_type = :continuous
104
+ elsif stanza.id == FileContent::PROCESSED
105
+ file_content.binary_type = :processed
106
+ end
107
+
108
+ # save checksum type
109
+ if stanza.id == FileContent::MD5
110
+ file_content.checksum = element[:value]
111
+ elsif stanza.id == FileContent::SHA1
112
+ file_content.checksum = element[:value]
113
+ end
114
+
115
+ # save identifier
116
+ if stanza.id == FileContent::UNIVERSALLY_UNIQUE_IDENTIFIER
117
+ file_content.uuid = element[:value]
118
+ end
119
+ end
120
+
121
+ end
122
+
123
+ # save reference group for further usage
124
+ if name == :cvParam && @stack.last == :referenceableParamGroup
125
+ (@reference_groups[@elements.last[:id].to_sym] ||= Array.new) << element
126
+ end
127
+
128
+ # save sample list
129
+ if name == :cvParam && @stack.last == :sample
130
+ samples = (@metadata.samples ||= Hash.new)
131
+ samples[@elements.last[:id].to_sym] = element
132
+ end
133
+
134
+ # save software list (raw, without detailed parsing)
135
+ if name == :software && @stack.last == :softwareList
136
+ (@metadata.software ||= Array.new) << element
137
+ end
138
+
139
+ # save scan settings
140
+ if name == :cvParam && @stack.last == :scanSettings
141
+ scan_settings = (@metadata.scan_settings ||= Hash.new)
142
+ setting = (scan_settings[@elements.last[:id].to_sym] ||= ScanSettings.new)
143
+
144
+ cv = @obo[element[:cvRef]]
145
+ stanza = cv.stanza(element[:accession])
146
+ parent_id = stanza.parent_id
147
+
148
+ case element[:cvRef]
149
+ when "IMS"
150
+
151
+ # detect correct line scan direction
152
+ setting.line_scan_direction = case stanza.id
153
+ when ScanSettings::LINE_SCAN_BOTTOM_UP
154
+ :bottom_up
155
+ when ScanSettings::LINE_SCAN_LEFT_RIGHT
156
+ :left_right
157
+ when ScanSettings::LINE_SCAN_RIGHT_LEFT
158
+ :right_left
159
+ when ScanSettings::LINE_SCAN_TOP_DOWN
160
+ :top_down
161
+ else
162
+ setting.line_scan_direction
163
+ end
164
+
165
+ # detect scan direction
166
+ setting.scan_direction = case stanza.id
167
+ when ScanSettings::BOTTOM_UP
168
+ :bottom_up
169
+ when ScanSettings::LEFT_RIGHT
170
+ :left_right
171
+ when ScanSettings::RIGHT_LEFT
172
+ :right_left
173
+ when ScanSettings::TOP_DOWN
174
+ :top_down
175
+ else
176
+ setting.scan_direction
177
+ end
178
+
179
+ # detect scan pattern
180
+ setting.scan_pattern = case stanza.id
181
+ when ScanSettings::MEANDERING
182
+ :meandering
183
+ when ScanSettings::ONE_WAY
184
+ :one_way
185
+ when ScanSettings::RANDOM_ACCESS
186
+ :random_access
187
+ when ScanSettings::FLY_BACK
188
+ :fly_back
189
+ else
190
+ setting.scan_pattern
191
+ end
192
+
193
+ # detect scan type
194
+ setting.scan_type = case stanza.id
195
+ when ScanSettings::HORIZONTAL_LINE_SCAN
196
+ :horizontal
197
+ when ScanSettings::VERTICAL_LINE_SCAN
198
+ :vertical
199
+ else
200
+ setting.scan_type
201
+ end
202
+
203
+ # detect image properties
204
+ image = (setting.image ||= ImzML::Image.new)
205
+
206
+ case stanza.id
207
+ when ScanSettings::MAX_DIMENSION_X
208
+ point = (image.max_dimension ||= ImzML::Point.new)
209
+ point.x = element[:value].to_i
210
+ when ScanSettings::MAX_DIMENSION_Y
211
+ point = (image.max_dimension ||= ImzML::Point.new)
212
+ point.y = element[:value].to_i
213
+ when ScanSettings::MAX_COUNT_OF_PIXEL_X
214
+ point = (image.max_pixel_count ||= ImzML::Point.new)
215
+ point.x = element[:value].to_i
216
+ when ScanSettings::MAX_COUNT_OF_PIXEL_Y
217
+ point = (image.max_pixel_count ||= ImzML::Point.new)
218
+ point.y = element[:value].to_i
219
+ when ScanSettings::PIXEL_SIZE_X
220
+ point = (image.pixel_size ||= ImzML::Point.new)
221
+ point.x = element[:value].to_i
222
+ when ScanSettings::PIXEL_SIZE_Y
223
+ point = (image.pixel_size ||= ImzML::Point.new)
224
+ point.y = element[:value].to_i
225
+ end
226
+ end
227
+
228
+ # [
229
+ # {:cvRef=>"IMS", :accession=>"IMS:1000401", :name=>"top down", :value=>""},
230
+ # {:cvRef=>"IMS", :accession=>"IMS:1000413", :name=>"flyback", :value=>""},
231
+ # {:cvRef=>"IMS", :accession=>"IMS:1000480", :name=>"horizontal line scan", :value=>""},
232
+ # {:cvRef=>"IMS", :accession=>"IMS:1000491", :name=>"linescan left right", :value=>""},
233
+ # {:cvRef=>"IMS", :accession=>"IMS:1000042", :name=>"max count of pixel x", :value=>"3"},
234
+ # {:cvRef=>"IMS", :accession=>"IMS:1000043", :name=>"max count of pixel y", :value=>"3"},
235
+ # {:cvRef=>"IMS", :accession=>"IMS:1000044", :name=>"max dimension x", :value=>"300", :unitCvRef=>"UO", :unitAccession=>"UO:0000017", :unitName=>"micrometer"},
236
+ # {:cvRef=>"IMS", :accession=>"IMS:1000045", :name=>"max dimension y", :value=>"300", :unitCvRef=>"UO", :unitAccession=>"UO:0000017", :unitName=>"micrometer"},
237
+ # {:cvRef=>"IMS", :accession=>"IMS:1000046", :name=>"pixel size x", :value=>"100", :unitCvRef=>"UO", :unitAccession=>"UO:0000017", :unitName=>"micrometer"},
238
+ # {:cvRef=>"IMS", :accession=>"IMS:1000047", :name=>"pixel size y", :value=>"100", :unitCvRef=>"UO", :unitAccession=>"UO:0000017", :unitName=>"micrometer"},
239
+ # {:cvRef=>"MS", :accession=>"MS:1000836", :name=>"dried dropplet", :value=>""},
240
+ # {:cvRef=>"MS", :accession=>"MS:1000835", :name=>"matrix solution concentration", :value=>"10"},
241
+ # {:cvRef=>"MS", :accession=>"MS:1000834", :name=>"matrix solution", :value=>"DHB"}
242
+ # ]
243
+ end
244
+
245
+ # parse processing methods
246
+ if name == :cvParam && @stack.last == :processingMethod
247
+ data_processing = (@metadata.data_processing ||= Hash.new)
248
+ processing = (data_processing[@elements[-2][:id].to_sym] ||= DataProcessing.new)
249
+ processing.processing_method = @elements.last
250
+ (processing.processing_method[:actions] ||= Array.new) << element
251
+ end
252
+
253
+ # save spectrum position info
254
+ if name == :cvParam && @stack.last == :scan
255
+ spectrums = (@metadata.spectrums ||= Hash.new)
256
+ spectrum = (spectrums[@elements[-3][:id].to_sym] ||= Spectrum.new)
257
+ point = (spectrum.position ||= ImzML::Point.new)
258
+
259
+ point.x = element[:value].to_i if element[:accession] == Spectrum::POSITION_X
260
+ point.y = element[:value].to_i if element[:accession] == Spectrum::POSITION_Y
261
+ end
262
+
263
+ # save spectrum binary data info
264
+ if name == :referenceableParamGroupRef && @stack.last == :binaryDataArray
265
+ group = @reference_groups[element[:ref].to_sym]
266
+
267
+ spectrum = @metadata.spectrums[@elements[-3][:id].to_sym]
268
+ mz_binary = (spectrum.mz_binary ||= ImzML::Spectrum::BinaryData.new)
269
+ intensity_binary = (spectrum.intensity_binary ||= ImzML::Spectrum::BinaryData.new)
270
+
271
+ # detect type of the binary data info based on referenced group content
272
+ group.each do |param|
273
+ # p param
274
+ @binary_type = case param[:accession]
275
+ when ImzML::Spectrum::BinaryData::MZ_ARRAY
276
+ :mz_binary
277
+ when ImzML::Spectrum::BinaryData::INTENSITY_ARRAY
278
+ :intensity_binary
279
+ end
280
+
281
+ break if !@binary_type.nil?
282
+ end
283
+
284
+ # detect binary data type
285
+ number_type = nil
286
+ group.each do |param|
287
+ number_type = case param[:accession]
288
+ when Metadata::BINARY_TYPE_8BIT_INTEGER
289
+ :int8
290
+ when Metadata::BINARY_TYPE_16BIT_INTEGER
291
+ :int16
292
+ when Metadata::BINARY_TYPE_32BIT_INTEGER
293
+ :int32
294
+ when Metadata::BINARY_TYPE_64BIT_INTEGER
295
+ :int64
296
+ when Metadata::BINARY_TYPE_32BIT_FLOAT
297
+ :float32
298
+ when Metadata::BINARY_TYPE_64BIT_FLOAT
299
+ :float64
300
+ end
301
+
302
+ break if !number_type.nil?
303
+ end
304
+ @metadata.send("#{@binary_type.to_s}_data_type=", number_type) if !number_type.nil?
305
+ end
306
+
307
+ # save info about binary
308
+ if name == :cvParam && @stack.last == :binaryDataArray
309
+ spectrum = @metadata.spectrums[@elements[-3][:id].to_sym]
310
+
311
+ # convert chosen type to mz_binary/intensity_binary property selector
312
+ binary_data = spectrum.send(@binary_type.to_s)
313
+ case element[:accession]
314
+ when ImzML::Spectrum::BinaryData::EXTERNAL_ARRAY_LENGTH
315
+ binary_data.length = element[:value].to_i
316
+ when ImzML::Spectrum::BinaryData::EXTERNAL_OFFSET
317
+ binary_data.offset = element[:value].to_i
318
+ when ImzML::Spectrum::BinaryData::EXTERNAL_ENCODED_LENGHT
319
+ binary_data.encoded_length = element[:value].to_i
320
+ end
321
+
322
+ end
323
+
324
+ # p @metadata.spectrums if name == :binaryDataArray
325
+
326
+ # p "#{name} ended #{element}"
327
+
328
+ end
329
+
330
+ end
331
+
332
+ end
@@ -0,0 +1,5 @@
1
+ module ImzML
2
+
3
+ VERSION = "0.0.2"
4
+
5
+ end
data/lib/imzml.rb ADDED
@@ -0,0 +1,23 @@
1
+ require "obo"
2
+
3
+ require "obo_ext/parser"
4
+ require "obo_ext/stanza"
5
+
6
+ require "core_ext/string"
7
+
8
+ require "imzml/obo"
9
+ require "imzml/metadata"
10
+ require "imzml/metadata/point"
11
+ # require "imzml/metadata/spectrum"
12
+ require "imzml/metadata/file_description"
13
+ require "imzml/metadata/file_description/file_content"
14
+ require "imzml/metadata/scan_settings"
15
+ require "imzml/metadata/scan_settings/image"
16
+ require "imzml/metadata/data_processing"
17
+ require "imzml/metadata/run/spectrum"
18
+
19
+ require "imzml/parser"
20
+
21
+ module ImzML
22
+
23
+ end
@@ -0,0 +1,20 @@
1
+ module Obo
2
+
3
+ class Parser
4
+
5
+ def stanzas
6
+ self.elements.to_a.keep_if { |x| x.is_a?(Obo::Stanza)}
7
+ end
8
+
9
+ def stanza(id)
10
+ elements = self.elements.to_a.keep_if { |x| x.is_a?(Obo::Stanza) && (x.id == id)}
11
+ elements.first if elements.is_a?(Array)
12
+ end
13
+
14
+ def children_of(id)
15
+ self.elements.to_a.keep_if { |x| x.is_a?(Obo::Stanza) && (x.parent?(id))}
16
+ end
17
+
18
+ end
19
+
20
+ end
@@ -0,0 +1,19 @@
1
+ module Obo
2
+
3
+ class Stanza
4
+
5
+ def id
6
+ self.tagvalues["id"].first
7
+ end
8
+
9
+ def parent_id
10
+ self.tagvalues["is_a"].first
11
+ end
12
+
13
+ def parent?(id)
14
+ self.parent_id == id
15
+ end
16
+
17
+ end
18
+
19
+ end
metadata ADDED
@@ -0,0 +1,115 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: imzml
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Ondra Beneš
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-04-27 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: minitest
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '5.3'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '5.3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: ox
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '2.0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '2.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: obo
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '0.1'
48
+ - - ">="
49
+ - !ruby/object:Gem::Version
50
+ version: 0.1.4
51
+ type: :runtime
52
+ prerelease: false
53
+ version_requirements: !ruby/object:Gem::Requirement
54
+ requirements:
55
+ - - "~>"
56
+ - !ruby/object:Gem::Version
57
+ version: '0.1'
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: 0.1.4
61
+ description: Parser for mass spectrometry imaging standard file format. Gem does not
62
+ check the validity of the input file.
63
+ email:
64
+ - ondra.benes@gmail.com
65
+ executables: []
66
+ extensions: []
67
+ extra_rdoc_files: []
68
+ files:
69
+ - data/Example_Continuous.ibd
70
+ - data/Example_Continuous.imzML
71
+ - data/Example_Processed.ibd
72
+ - data/Example_Processed.imzML
73
+ - data/imagingMS.obo
74
+ - data/psi-ms.obo
75
+ - data/unit.obo
76
+ - lib/core_ext/string.rb
77
+ - lib/imzml.rb
78
+ - lib/imzml/metadata.rb
79
+ - lib/imzml/metadata/data_processing.rb
80
+ - lib/imzml/metadata/file_description.rb
81
+ - lib/imzml/metadata/file_description/file_content.rb
82
+ - lib/imzml/metadata/point.rb
83
+ - lib/imzml/metadata/run/spectrum.rb
84
+ - lib/imzml/metadata/scan_settings.rb
85
+ - lib/imzml/metadata/scan_settings/image.rb
86
+ - lib/imzml/metadata/spectrum.rb
87
+ - lib/imzml/obo.rb
88
+ - lib/imzml/parser.rb
89
+ - lib/imzml/version.rb
90
+ - lib/obo_ext/parser.rb
91
+ - lib/obo_ext/stanza.rb
92
+ homepage: https://github.com/beny/imzml
93
+ licenses: []
94
+ metadata: {}
95
+ post_install_message:
96
+ rdoc_options: []
97
+ require_paths:
98
+ - lib
99
+ required_ruby_version: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ required_rubygems_version: !ruby/object:Gem::Requirement
105
+ requirements:
106
+ - - ">="
107
+ - !ruby/object:Gem::Version
108
+ version: '0'
109
+ requirements: []
110
+ rubyforge_project:
111
+ rubygems_version: 2.2.0
112
+ signing_key:
113
+ specification_version: 4
114
+ summary: Simple parser for imzML files.
115
+ test_files: []