imzml 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,332 @@
1
+ require "ox"
2
+
3
+ module ImzML
4
+
5
+ class Parser
6
+
7
+ attr_reader :metadata
8
+
9
+ def initialize(filepath)
10
+
11
+ sax = ImzML::Sax.new
12
+ Ox.sax_parse(sax, File.open(filepath))
13
+ @metadata = sax.metadata
14
+
15
+ end
16
+
17
+ end
18
+
19
+ class Sax < ::Ox::Sax
20
+
21
+ attr_reader :metadata
22
+
23
+ def initialize()
24
+ @metadata = Metadata.new
25
+ @stack = Array.new
26
+ @elements = Array.new
27
+
28
+ # temporary values useful just for data parsing
29
+ @reference_groups = Hash.new
30
+ @obo = Hash.new
31
+ end
32
+
33
+ def start_element(name)
34
+ # p "#{@stack.last} #{@elements.last}"
35
+ @stack.push(name)
36
+ @elements.push(Hash.new)
37
+ # p "#{name} started"
38
+ # p @stack
39
+
40
+ case name
41
+ when :cvList
42
+ @cv_list = []
43
+ when :cv
44
+ @cv_list << @elements.last
45
+ end
46
+ end
47
+
48
+ def attr(name, str)
49
+ element = @elements.last
50
+ return if element.nil? # skip attributes without correct elements (like <?xml ...)
51
+ element[name] = str
52
+ end
53
+
54
+ def end_element(name)
55
+ @stack.pop
56
+ element = @elements.pop
57
+ # p @stack
58
+
59
+ # open OBO file for each from CV list for further validation
60
+ if name == :cvList
61
+ @cv_list.each do |cv|
62
+ filename = case cv[:id]
63
+ when "MS"
64
+ "psi-ms.obo"
65
+ when "UO"
66
+ "unit.obo"
67
+ when "IMS"
68
+ "imagingMS.obo"
69
+ end
70
+ filepath = File.join(File.dirname(__FILE__), "..", "..", "data", filename)
71
+ @obo[cv[:id].to_s] = Obo::Parser.new(filepath)
72
+ end
73
+ end
74
+
75
+ # save file content
76
+ if name == :cvParam && @stack.last == :fileContent
77
+
78
+ cv = @obo[element[:cvRef]]
79
+ stanza = cv.stanza(element[:accession])
80
+ parent_id = stanza.parent_id
81
+
82
+ # init basic structures
83
+ @metadata.file_description ||= FileDescription.new
84
+ file_content = (@metadata.file_description.file_content ||= FileContent.new)
85
+
86
+ case element[:cvRef]
87
+ when "MS"
88
+ # save data file content
89
+ if parent_id == FileContent::DATA_FILE_CONTENT
90
+ file_content.data_file_contents ||= Hash.new
91
+ (file_content.data_file_contents[parent_id] ||= Array.new) << element
92
+ end
93
+
94
+ # save spectrum representation
95
+ if parent_id == FileContent::SPECTRUM_REPRESENTATION
96
+ file_content.spectrum_representation = element
97
+ end
98
+
99
+ when "IMS"
100
+ # save binary type (cannot look by parent because the OBO file is different and
101
+ # the parser doesn't hadle it well, need to first improve the OBO parser)
102
+ if stanza.id == FileContent::CONTINUOUS
103
+ file_content.binary_type = :continuous
104
+ elsif stanza.id == FileContent::PROCESSED
105
+ file_content.binary_type = :processed
106
+ end
107
+
108
+ # save checksum type
109
+ if stanza.id == FileContent::MD5
110
+ file_content.checksum = element[:value]
111
+ elsif stanza.id == FileContent::SHA1
112
+ file_content.checksum = element[:value]
113
+ end
114
+
115
+ # save identifier
116
+ if stanza.id == FileContent::UNIVERSALLY_UNIQUE_IDENTIFIER
117
+ file_content.uuid = element[:value]
118
+ end
119
+ end
120
+
121
+ end
122
+
123
+ # save reference group for further usage
124
+ if name == :cvParam && @stack.last == :referenceableParamGroup
125
+ (@reference_groups[@elements.last[:id].to_sym] ||= Array.new) << element
126
+ end
127
+
128
+ # save sample list
129
+ if name == :cvParam && @stack.last == :sample
130
+ samples = (@metadata.samples ||= Hash.new)
131
+ samples[@elements.last[:id].to_sym] = element
132
+ end
133
+
134
+ # save software list (raw, without detailed parsing)
135
+ if name == :software && @stack.last == :softwareList
136
+ (@metadata.software ||= Array.new) << element
137
+ end
138
+
139
+ # save scan settings
140
+ if name == :cvParam && @stack.last == :scanSettings
141
+ scan_settings = (@metadata.scan_settings ||= Hash.new)
142
+ setting = (scan_settings[@elements.last[:id].to_sym] ||= ScanSettings.new)
143
+
144
+ cv = @obo[element[:cvRef]]
145
+ stanza = cv.stanza(element[:accession])
146
+ parent_id = stanza.parent_id
147
+
148
+ case element[:cvRef]
149
+ when "IMS"
150
+
151
+ # detect correct line scan direction
152
+ setting.line_scan_direction = case stanza.id
153
+ when ScanSettings::LINE_SCAN_BOTTOM_UP
154
+ :bottom_up
155
+ when ScanSettings::LINE_SCAN_LEFT_RIGHT
156
+ :left_right
157
+ when ScanSettings::LINE_SCAN_RIGHT_LEFT
158
+ :right_left
159
+ when ScanSettings::LINE_SCAN_TOP_DOWN
160
+ :top_down
161
+ else
162
+ setting.line_scan_direction
163
+ end
164
+
165
+ # detect scan direction
166
+ setting.scan_direction = case stanza.id
167
+ when ScanSettings::BOTTOM_UP
168
+ :bottom_up
169
+ when ScanSettings::LEFT_RIGHT
170
+ :left_right
171
+ when ScanSettings::RIGHT_LEFT
172
+ :right_left
173
+ when ScanSettings::TOP_DOWN
174
+ :top_down
175
+ else
176
+ setting.scan_direction
177
+ end
178
+
179
+ # detect scan pattern
180
+ setting.scan_pattern = case stanza.id
181
+ when ScanSettings::MEANDERING
182
+ :meandering
183
+ when ScanSettings::ONE_WAY
184
+ :one_way
185
+ when ScanSettings::RANDOM_ACCESS
186
+ :random_access
187
+ when ScanSettings::FLY_BACK
188
+ :fly_back
189
+ else
190
+ setting.scan_pattern
191
+ end
192
+
193
+ # detect scan type
194
+ setting.scan_type = case stanza.id
195
+ when ScanSettings::HORIZONTAL_LINE_SCAN
196
+ :horizontal
197
+ when ScanSettings::VERTICAL_LINE_SCAN
198
+ :vertical
199
+ else
200
+ setting.scan_type
201
+ end
202
+
203
+ # detect image properties
204
+ image = (setting.image ||= ImzML::Image.new)
205
+
206
+ case stanza.id
207
+ when ScanSettings::MAX_DIMENSION_X
208
+ point = (image.max_dimension ||= ImzML::Point.new)
209
+ point.x = element[:value].to_i
210
+ when ScanSettings::MAX_DIMENSION_Y
211
+ point = (image.max_dimension ||= ImzML::Point.new)
212
+ point.y = element[:value].to_i
213
+ when ScanSettings::MAX_COUNT_OF_PIXEL_X
214
+ point = (image.max_pixel_count ||= ImzML::Point.new)
215
+ point.x = element[:value].to_i
216
+ when ScanSettings::MAX_COUNT_OF_PIXEL_Y
217
+ point = (image.max_pixel_count ||= ImzML::Point.new)
218
+ point.y = element[:value].to_i
219
+ when ScanSettings::PIXEL_SIZE_X
220
+ point = (image.pixel_size ||= ImzML::Point.new)
221
+ point.x = element[:value].to_i
222
+ when ScanSettings::PIXEL_SIZE_Y
223
+ point = (image.pixel_size ||= ImzML::Point.new)
224
+ point.y = element[:value].to_i
225
+ end
226
+ end
227
+
228
+ # [
229
+ # {:cvRef=>"IMS", :accession=>"IMS:1000401", :name=>"top down", :value=>""},
230
+ # {:cvRef=>"IMS", :accession=>"IMS:1000413", :name=>"flyback", :value=>""},
231
+ # {:cvRef=>"IMS", :accession=>"IMS:1000480", :name=>"horizontal line scan", :value=>""},
232
+ # {:cvRef=>"IMS", :accession=>"IMS:1000491", :name=>"linescan left right", :value=>""},
233
+ # {:cvRef=>"IMS", :accession=>"IMS:1000042", :name=>"max count of pixel x", :value=>"3"},
234
+ # {:cvRef=>"IMS", :accession=>"IMS:1000043", :name=>"max count of pixel y", :value=>"3"},
235
+ # {:cvRef=>"IMS", :accession=>"IMS:1000044", :name=>"max dimension x", :value=>"300", :unitCvRef=>"UO", :unitAccession=>"UO:0000017", :unitName=>"micrometer"},
236
+ # {:cvRef=>"IMS", :accession=>"IMS:1000045", :name=>"max dimension y", :value=>"300", :unitCvRef=>"UO", :unitAccession=>"UO:0000017", :unitName=>"micrometer"},
237
+ # {:cvRef=>"IMS", :accession=>"IMS:1000046", :name=>"pixel size x", :value=>"100", :unitCvRef=>"UO", :unitAccession=>"UO:0000017", :unitName=>"micrometer"},
238
+ # {:cvRef=>"IMS", :accession=>"IMS:1000047", :name=>"pixel size y", :value=>"100", :unitCvRef=>"UO", :unitAccession=>"UO:0000017", :unitName=>"micrometer"},
239
+ # {:cvRef=>"MS", :accession=>"MS:1000836", :name=>"dried dropplet", :value=>""},
240
+ # {:cvRef=>"MS", :accession=>"MS:1000835", :name=>"matrix solution concentration", :value=>"10"},
241
+ # {:cvRef=>"MS", :accession=>"MS:1000834", :name=>"matrix solution", :value=>"DHB"}
242
+ # ]
243
+ end
244
+
245
+ # parse processing methods
246
+ if name == :cvParam && @stack.last == :processingMethod
247
+ data_processing = (@metadata.data_processing ||= Hash.new)
248
+ processing = (data_processing[@elements[-2][:id].to_sym] ||= DataProcessing.new)
249
+ processing.processing_method = @elements.last
250
+ (processing.processing_method[:actions] ||= Array.new) << element
251
+ end
252
+
253
+ # save spectrum position info
254
+ if name == :cvParam && @stack.last == :scan
255
+ spectrums = (@metadata.spectrums ||= Hash.new)
256
+ spectrum = (spectrums[@elements[-3][:id].to_sym] ||= Spectrum.new)
257
+ point = (spectrum.position ||= ImzML::Point.new)
258
+
259
+ point.x = element[:value].to_i if element[:accession] == Spectrum::POSITION_X
260
+ point.y = element[:value].to_i if element[:accession] == Spectrum::POSITION_Y
261
+ end
262
+
263
+ # save spectrum binary data info
264
+ if name == :referenceableParamGroupRef && @stack.last == :binaryDataArray
265
+ group = @reference_groups[element[:ref].to_sym]
266
+
267
+ spectrum = @metadata.spectrums[@elements[-3][:id].to_sym]
268
+ mz_binary = (spectrum.mz_binary ||= ImzML::Spectrum::BinaryData.new)
269
+ intensity_binary = (spectrum.intensity_binary ||= ImzML::Spectrum::BinaryData.new)
270
+
271
+ # detect type of the binary data info based on referenced group content
272
+ group.each do |param|
273
+ # p param
274
+ @binary_type = case param[:accession]
275
+ when ImzML::Spectrum::BinaryData::MZ_ARRAY
276
+ :mz_binary
277
+ when ImzML::Spectrum::BinaryData::INTENSITY_ARRAY
278
+ :intensity_binary
279
+ end
280
+
281
+ break if !@binary_type.nil?
282
+ end
283
+
284
+ # detect binary data type
285
+ number_type = nil
286
+ group.each do |param|
287
+ number_type = case param[:accession]
288
+ when Metadata::BINARY_TYPE_8BIT_INTEGER
289
+ :int8
290
+ when Metadata::BINARY_TYPE_16BIT_INTEGER
291
+ :int16
292
+ when Metadata::BINARY_TYPE_32BIT_INTEGER
293
+ :int32
294
+ when Metadata::BINARY_TYPE_64BIT_INTEGER
295
+ :int64
296
+ when Metadata::BINARY_TYPE_32BIT_FLOAT
297
+ :float32
298
+ when Metadata::BINARY_TYPE_64BIT_FLOAT
299
+ :float64
300
+ end
301
+
302
+ break if !number_type.nil?
303
+ end
304
+ @metadata.send("#{@binary_type.to_s}_data_type=", number_type) if !number_type.nil?
305
+ end
306
+
307
+ # save info about binary
308
+ if name == :cvParam && @stack.last == :binaryDataArray
309
+ spectrum = @metadata.spectrums[@elements[-3][:id].to_sym]
310
+
311
+ # convert chosen type to mz_binary/intensity_binary property selector
312
+ binary_data = spectrum.send(@binary_type.to_s)
313
+ case element[:accession]
314
+ when ImzML::Spectrum::BinaryData::EXTERNAL_ARRAY_LENGTH
315
+ binary_data.length = element[:value].to_i
316
+ when ImzML::Spectrum::BinaryData::EXTERNAL_OFFSET
317
+ binary_data.offset = element[:value].to_i
318
+ when ImzML::Spectrum::BinaryData::EXTERNAL_ENCODED_LENGHT
319
+ binary_data.encoded_length = element[:value].to_i
320
+ end
321
+
322
+ end
323
+
324
+ # p @metadata.spectrums if name == :binaryDataArray
325
+
326
+ # p "#{name} ended #{element}"
327
+
328
+ end
329
+
330
+ end
331
+
332
+ end
@@ -0,0 +1,5 @@
1
+ module ImzML
2
+
3
+ VERSION = "0.0.2"
4
+
5
+ end
data/lib/imzml.rb ADDED
@@ -0,0 +1,23 @@
1
+ require "obo"
2
+
3
+ require "obo_ext/parser"
4
+ require "obo_ext/stanza"
5
+
6
+ require "core_ext/string"
7
+
8
+ require "imzml/obo"
9
+ require "imzml/metadata"
10
+ require "imzml/metadata/point"
11
+ # require "imzml/metadata/spectrum"
12
+ require "imzml/metadata/file_description"
13
+ require "imzml/metadata/file_description/file_content"
14
+ require "imzml/metadata/scan_settings"
15
+ require "imzml/metadata/scan_settings/image"
16
+ require "imzml/metadata/data_processing"
17
+ require "imzml/metadata/run/spectrum"
18
+
19
+ require "imzml/parser"
20
+
21
+ module ImzML
22
+
23
+ end
@@ -0,0 +1,20 @@
1
+ module Obo
2
+
3
+ class Parser
4
+
5
+ def stanzas
6
+ self.elements.to_a.keep_if { |x| x.is_a?(Obo::Stanza)}
7
+ end
8
+
9
+ def stanza(id)
10
+ elements = self.elements.to_a.keep_if { |x| x.is_a?(Obo::Stanza) && (x.id == id)}
11
+ elements.first if elements.is_a?(Array)
12
+ end
13
+
14
+ def children_of(id)
15
+ self.elements.to_a.keep_if { |x| x.is_a?(Obo::Stanza) && (x.parent?(id))}
16
+ end
17
+
18
+ end
19
+
20
+ end
@@ -0,0 +1,19 @@
1
+ module Obo
2
+
3
+ class Stanza
4
+
5
+ def id
6
+ self.tagvalues["id"].first
7
+ end
8
+
9
+ def parent_id
10
+ self.tagvalues["is_a"].first
11
+ end
12
+
13
+ def parent?(id)
14
+ self.parent_id == id
15
+ end
16
+
17
+ end
18
+
19
+ end
metadata ADDED
@@ -0,0 +1,115 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: imzml
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Ondra Beneš
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-04-27 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: minitest
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '5.3'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '5.3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: ox
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '2.0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '2.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: obo
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '0.1'
48
+ - - ">="
49
+ - !ruby/object:Gem::Version
50
+ version: 0.1.4
51
+ type: :runtime
52
+ prerelease: false
53
+ version_requirements: !ruby/object:Gem::Requirement
54
+ requirements:
55
+ - - "~>"
56
+ - !ruby/object:Gem::Version
57
+ version: '0.1'
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: 0.1.4
61
+ description: Parser for mass spectrometry imaging standard file format. Gem does not
62
+ check the validity of the input file.
63
+ email:
64
+ - ondra.benes@gmail.com
65
+ executables: []
66
+ extensions: []
67
+ extra_rdoc_files: []
68
+ files:
69
+ - data/Example_Continuous.ibd
70
+ - data/Example_Continuous.imzML
71
+ - data/Example_Processed.ibd
72
+ - data/Example_Processed.imzML
73
+ - data/imagingMS.obo
74
+ - data/psi-ms.obo
75
+ - data/unit.obo
76
+ - lib/core_ext/string.rb
77
+ - lib/imzml.rb
78
+ - lib/imzml/metadata.rb
79
+ - lib/imzml/metadata/data_processing.rb
80
+ - lib/imzml/metadata/file_description.rb
81
+ - lib/imzml/metadata/file_description/file_content.rb
82
+ - lib/imzml/metadata/point.rb
83
+ - lib/imzml/metadata/run/spectrum.rb
84
+ - lib/imzml/metadata/scan_settings.rb
85
+ - lib/imzml/metadata/scan_settings/image.rb
86
+ - lib/imzml/metadata/spectrum.rb
87
+ - lib/imzml/obo.rb
88
+ - lib/imzml/parser.rb
89
+ - lib/imzml/version.rb
90
+ - lib/obo_ext/parser.rb
91
+ - lib/obo_ext/stanza.rb
92
+ homepage: https://github.com/beny/imzml
93
+ licenses: []
94
+ metadata: {}
95
+ post_install_message:
96
+ rdoc_options: []
97
+ require_paths:
98
+ - lib
99
+ required_ruby_version: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ required_rubygems_version: !ruby/object:Gem::Requirement
105
+ requirements:
106
+ - - ">="
107
+ - !ruby/object:Gem::Version
108
+ version: '0'
109
+ requirements: []
110
+ rubyforge_project:
111
+ rubygems_version: 2.2.0
112
+ signing_key:
113
+ specification_version: 4
114
+ summary: Simple parser for imzML files.
115
+ test_files: []