imzml 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/data/Example_Continuous.ibd +0 -0
- data/data/Example_Continuous.imzML +373 -0
- data/data/Example_Processed.ibd +0 -0
- data/data/Example_Processed.imzML +373 -0
- data/data/imagingMS.obo +560 -0
- data/data/psi-ms.obo +14317 -0
- data/data/unit.obo +2585 -0
- data/lib/core_ext/string.rb +11 -0
- data/lib/imzml/metadata/data_processing.rb +24 -0
- data/lib/imzml/metadata/file_description/file_content.rb +42 -0
- data/lib/imzml/metadata/file_description.rb +10 -0
- data/lib/imzml/metadata/point.rb +7 -0
- data/lib/imzml/metadata/run/spectrum.rb +52 -0
- data/lib/imzml/metadata/scan_settings/image.rb +11 -0
- data/lib/imzml/metadata/scan_settings.rb +42 -0
- data/lib/imzml/metadata/spectrum.rb +79 -0
- data/lib/imzml/metadata.rb +36 -0
- data/lib/imzml/obo.rb +57 -0
- data/lib/imzml/parser.rb +332 -0
- data/lib/imzml/version.rb +5 -0
- data/lib/imzml.rb +23 -0
- data/lib/obo_ext/parser.rb +20 -0
- data/lib/obo_ext/stanza.rb +19 -0
- metadata +115 -0
@@ -0,0 +1,24 @@
|
|
1
|
+
module ImzML
|
2
|
+
|
3
|
+
class DataProcessing
|
4
|
+
|
5
|
+
# Description of the way in which a particular software was used
|
6
|
+
#
|
7
|
+
# Represent by Hash with keys [order, softwareRef, actions]
|
8
|
+
attr_accessor :processing_method
|
9
|
+
|
10
|
+
DEISOTOPING = "MS:1000033"
|
11
|
+
CHARGE_DECONVOLUTION = "MS:1000034"
|
12
|
+
PEAK_PICKING = "MS:1000035"
|
13
|
+
SMOOTHING = "MS:1000592"
|
14
|
+
BASELINE_REDUCTION = "MS:1000593"
|
15
|
+
RETENTION_TIME_ALIGNMENT = "MS:1000745"
|
16
|
+
CHARGE_STATE_CALCULATION = "MS:1000778"
|
17
|
+
PRECURSOR_RECALCULATION = "MS:1000780"
|
18
|
+
INTENSITY_NORMALIZATION = "MS:1001484"
|
19
|
+
MZ_CALIBRATION = "MS:1001485"
|
20
|
+
DATA_FILTERING = "MS:1001486"
|
21
|
+
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module ImzML
|
2
|
+
|
3
|
+
class FileContent
|
4
|
+
|
5
|
+
# must containt one or more of children
|
6
|
+
attr_accessor :data_file_contents
|
7
|
+
DATA_FILE_CONTENT = "MS:1000524"
|
8
|
+
TOTAL_ION_CURRENT_CHROMATOGRAM = "MS:1000235"
|
9
|
+
CHARGE_INVERSION_MASS_SPECTRUM = "MS:1000322"
|
10
|
+
CONSTANT_NEUTRAL_GAIN_SPECTRUM = "MS:1000325"
|
11
|
+
CONSTANT_NEUTRAL_LOSS_SPECTRUM = "MS:1000326"
|
12
|
+
E_2_MASS_SPECTRUM = "MS:1000328"
|
13
|
+
PRECURSOR_ION_SPECTRUM = "MS:1000341"
|
14
|
+
|
15
|
+
# may supply just one of children
|
16
|
+
attr_accessor :spectrum_representation
|
17
|
+
SPECTRUM_REPRESENTATION = "MS:1000525"
|
18
|
+
CENTROID_SPECTRUM = "MS:1000127"
|
19
|
+
PROFILE_SPECTRUM = "MS:1000128"
|
20
|
+
|
21
|
+
# Describes type of the binary (ibd) file
|
22
|
+
attr_accessor :binary_type
|
23
|
+
IBD_BINARY_TYPE = "IMS:1000003"
|
24
|
+
CONTINUOUS = "IMS:1000030"
|
25
|
+
PROCESSED = "IMS:1000031"
|
26
|
+
|
27
|
+
# Checksum is a form of redundancy check, a simple way to protect the integrity of data by detecting errors in data of the ibd file
|
28
|
+
attr_accessor :checksum
|
29
|
+
IBD_CHECKSUM = "IMS:1000009"
|
30
|
+
MD5 = "IMS:1000090"
|
31
|
+
SHA1 = "IMS:1000091"
|
32
|
+
|
33
|
+
# Attributes to doubtlessly identify the ibd file
|
34
|
+
attr_accessor :uuid
|
35
|
+
IBD_IDENTIFICATION = "IMS:1000008"
|
36
|
+
UNIVERSALLY_UNIQUE_IDENTIFIER = "IMS:1000080"
|
37
|
+
|
38
|
+
# TODO ibd file
|
39
|
+
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
module ImzML
|
2
|
+
|
3
|
+
class FileDescription
|
4
|
+
|
5
|
+
# This summarizes the different types of spectra that can be expected in the file. This is expected to aid processing software in skipping files that do not contain appropriate spectrum types for it. It should also describe the nativeID format used in the file by referring to an appropriate CV term.
|
6
|
+
attr_accessor :file_content
|
7
|
+
|
8
|
+
end
|
9
|
+
|
10
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
module ImzML
|
2
|
+
|
3
|
+
class Spectrum
|
4
|
+
|
5
|
+
class BinaryData
|
6
|
+
|
7
|
+
# A data array of m/z values
|
8
|
+
MZ_ARRAY = "MS:1000514"
|
9
|
+
|
10
|
+
# A data array of intensity values
|
11
|
+
INTENSITY_ARRAY = "MS:1000515"
|
12
|
+
|
13
|
+
# Describes how many fields an array contains
|
14
|
+
attr_accessor :length
|
15
|
+
EXTERNAL_ARRAY_LENGTH = "IMS:1000103"
|
16
|
+
|
17
|
+
# The position where the data of an array of a mass spectrum begins
|
18
|
+
attr_accessor :offset
|
19
|
+
EXTERNAL_OFFSET = "IMS:1000102"
|
20
|
+
|
21
|
+
# Describes the length of the written data
|
22
|
+
attr_accessor :encoded_length
|
23
|
+
EXTERNAL_ENCODED_LENGHT = "IMS:1000104"
|
24
|
+
|
25
|
+
# grabs the actual binary data from disk
|
26
|
+
def data
|
27
|
+
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
31
|
+
|
32
|
+
# Attributes to describe the position of a spectrum in the image.
|
33
|
+
#
|
34
|
+
# represented as Point with position x, y
|
35
|
+
attr_accessor :position
|
36
|
+
POSITION_X = "IMS:1000050"
|
37
|
+
POSITION_Y = "IMS:1000051"
|
38
|
+
|
39
|
+
# Info about mz binary data
|
40
|
+
#
|
41
|
+
# Represented by class BinaryData
|
42
|
+
attr_accessor :mz_binary
|
43
|
+
|
44
|
+
# Info about intensity binary data
|
45
|
+
#
|
46
|
+
# Represented by class BinaryData
|
47
|
+
attr_accessor :intensity_binary
|
48
|
+
|
49
|
+
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module ImzML
|
2
|
+
|
3
|
+
class ScanSettings
|
4
|
+
|
5
|
+
# Description in wich direction the lines of the sample were scanned
|
6
|
+
attr_accessor :line_scan_direction
|
7
|
+
LINE_SCAN_BOTTOM_UP = "IMS:1000492"
|
8
|
+
LINE_SCAN_LEFT_RIGHT = "IMS:1000491"
|
9
|
+
LINE_SCAN_RIGHT_LEFT = "IMS:1000490"
|
10
|
+
LINE_SCAN_TOP_DOWN = "IMS:1000493"
|
11
|
+
|
12
|
+
# Description of the direction of the succession of the assembling of the linescans
|
13
|
+
attr_accessor :scan_direction
|
14
|
+
BOTTOM_UP = "IMS:1000400"
|
15
|
+
LEFT_RIGHT = "IMS:1000402"
|
16
|
+
RIGHT_LEFT = "IMS:1000403"
|
17
|
+
TOP_DOWN = "IMS:1000401"
|
18
|
+
|
19
|
+
# Description of the pattern how the image was scanned
|
20
|
+
attr_accessor :scan_pattern
|
21
|
+
MEANDERING = "IMS:1000410"
|
22
|
+
ONE_WAY = "IMS:1000411"
|
23
|
+
RANDOM_ACCESS = "IMS:1000412"
|
24
|
+
FLY_BACK = "IMS:1000413"
|
25
|
+
|
26
|
+
# Shows the direction in which the lines were scanned
|
27
|
+
attr_accessor :scan_type
|
28
|
+
HORIZONTAL_LINE_SCAN = "IMS:1000480"
|
29
|
+
VERTICAL_LINE_SCAN = "IMS:1000481"
|
30
|
+
|
31
|
+
# Sample properties only concerning imaging samples
|
32
|
+
attr_accessor :image
|
33
|
+
MAX_DIMENSION_X = "IMS:1000044"
|
34
|
+
MAX_DIMENSION_Y = "IMS:1000045"
|
35
|
+
MAX_COUNT_OF_PIXEL_X = "IMS:1000042"
|
36
|
+
MAX_COUNT_OF_PIXEL_Y = "IMS:1000043"
|
37
|
+
PIXEL_SIZE_X = "IMS:1000046"
|
38
|
+
PIXEL_SIZE_Y = "IMS:1000047"
|
39
|
+
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
module ImzML
|
2
|
+
|
3
|
+
class Spectrum
|
4
|
+
|
5
|
+
attr_accessor :id
|
6
|
+
attr_accessor :mz_array_external_offset
|
7
|
+
attr_accessor :mz_array_external_encoded_length
|
8
|
+
attr_accessor :intensity_array_external_offset
|
9
|
+
attr_accessor :intensity_array_external_encoded_length
|
10
|
+
|
11
|
+
def intensity(data_path, at, interval)
|
12
|
+
|
13
|
+
raise "Interval cannot be nil" if !interval
|
14
|
+
|
15
|
+
# read array and intensity data
|
16
|
+
mz_array = mz_array(data_path)
|
17
|
+
intensity_array = intensity_array(data_path)
|
18
|
+
|
19
|
+
default_from, default_to = mz_array.first, mz_array.first
|
20
|
+
|
21
|
+
# find designated intensity
|
22
|
+
if !at
|
23
|
+
from = default_from
|
24
|
+
to = default_to
|
25
|
+
else
|
26
|
+
from = at - interval
|
27
|
+
from = default_from if from < 0
|
28
|
+
to = at + interval
|
29
|
+
to = default_to if to > mz_array.last
|
30
|
+
end
|
31
|
+
|
32
|
+
# find values in mz array
|
33
|
+
low_value = search_binary(mz_array, from)
|
34
|
+
low_index = mz_array.index(low_value)
|
35
|
+
high_value = search_binary(mz_array, to)
|
36
|
+
high_index = mz_array.index(high_value)
|
37
|
+
|
38
|
+
# sum all values in subarray
|
39
|
+
intensity_array[low_index..high_index].inject{|sum, x| sum + x}
|
40
|
+
end
|
41
|
+
|
42
|
+
def mz_array(data_path)
|
43
|
+
IO.binread(data_path, @mz_array_external_encoded_length.to_i, @mz_array_external_offset.to_i).unpack("e*")
|
44
|
+
end
|
45
|
+
|
46
|
+
def intensity_array(data_path)
|
47
|
+
IO.binread(data_path, @intensity_array_external_encoded_length.to_i, @intensity_array_external_offset.to_i).unpack("e*")
|
48
|
+
end
|
49
|
+
|
50
|
+
private
|
51
|
+
|
52
|
+
def search_binary(array, value, first = true)
|
53
|
+
|
54
|
+
if (array.size > 2)
|
55
|
+
middle_index = array.size/2
|
56
|
+
middle = array[middle_index]
|
57
|
+
|
58
|
+
if (middle > value)
|
59
|
+
search_binary(array[0..middle_index], value, first)
|
60
|
+
else
|
61
|
+
search_binary(array[middle_index..array.size], value, first)
|
62
|
+
end
|
63
|
+
else
|
64
|
+
if first
|
65
|
+
array.first
|
66
|
+
else
|
67
|
+
array.last
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
end
|
72
|
+
|
73
|
+
def search_last(array, value)
|
74
|
+
|
75
|
+
end
|
76
|
+
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module ImzML
|
2
|
+
|
3
|
+
class Metadata
|
4
|
+
|
5
|
+
# Information pertaining to the entire mzML file (i.e. not specific to any part of the data set) is stored here
|
6
|
+
attr_accessor :file_description
|
7
|
+
|
8
|
+
# List and descriptions of samples
|
9
|
+
attr_accessor :samples
|
10
|
+
|
11
|
+
# List and descriptions of software used to acquire and/or process the data in this mzML file
|
12
|
+
attr_accessor :software
|
13
|
+
|
14
|
+
# List with the descriptions of the acquisition settings applied prior to the start of data acquisition
|
15
|
+
attr_accessor :scan_settings
|
16
|
+
|
17
|
+
# List and descriptions of data processing applied to this data
|
18
|
+
attr_accessor :data_processing
|
19
|
+
|
20
|
+
# All mass spectra and the acquisitions underlying them are described and attached here
|
21
|
+
attr_accessor :spectrums
|
22
|
+
|
23
|
+
# Binary data types, always little endian
|
24
|
+
BINARY_TYPE_8BIT_INTEGER = "IMS:1100000"
|
25
|
+
BINARY_TYPE_16BIT_INTEGER = "IMS:1100001"
|
26
|
+
BINARY_TYPE_32BIT_INTEGER = "MS:1000519"
|
27
|
+
BINARY_TYPE_64BIT_INTEGER = "MS:1000522"
|
28
|
+
BINARY_TYPE_32BIT_FLOAT = "MS:1000521"
|
29
|
+
BINARY_TYPE_64BIT_FLOAT = "MS:1000523"
|
30
|
+
|
31
|
+
# both can have one of the symbols [:int8, :int16, :int32, :int63, :float32, :float64]
|
32
|
+
attr_accessor :mz_binary_data_type
|
33
|
+
attr_accessor :intensity_binary_data_type
|
34
|
+
|
35
|
+
end
|
36
|
+
end
|
data/lib/imzml/obo.rb
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
module ImzML
|
2
|
+
|
3
|
+
module OBO
|
4
|
+
|
5
|
+
module MS
|
6
|
+
|
7
|
+
FLOAT_32_BIT = "MS:1000521"
|
8
|
+
|
9
|
+
end
|
10
|
+
|
11
|
+
module IMS
|
12
|
+
|
13
|
+
CONTINUOUS = "IMS:1000030"
|
14
|
+
PROCESSED = "IMS:1000031"
|
15
|
+
EXTERNAL_OFFSET = "IMS:1000102"
|
16
|
+
EXTERNAL_ENCODED_LENGTH = "IMS:1000104"
|
17
|
+
MAX_COUNT_OF_PIXELS_X = "IMS:1000042"
|
18
|
+
MAX_COUNT_OF_PIXELS_Y = "IMS:1000043"
|
19
|
+
PIXEL_SIZE = "IMS:1000046"
|
20
|
+
IMAGE_SHAPE = "IMS:1000047"
|
21
|
+
UNIVERSALLY_UNIQUE_IDENTIFIER = "IMS:1000080"
|
22
|
+
|
23
|
+
SPECTRUM_POSITION = "IMS:1000005"
|
24
|
+
SPECTRUM_POSITION_X = "IMS:1000050"
|
25
|
+
SPECTRUM_POSITION_Y = "IMS:1000051"
|
26
|
+
SPECTRUM_POSITION_Z = "IMS:1000052"
|
27
|
+
|
28
|
+
# attributes of the generation of the image
|
29
|
+
LINESCAN_SEQUENCE = "IMS:1000040"
|
30
|
+
LINESCAN_SEQUENCE_BOTTOM_UP = "IMS:1000400"
|
31
|
+
LINESCAN_SEQUENCE_TOP_DOWN = "IMS:1000401"
|
32
|
+
LINESCAN_SEQUENCE_LEFT_RIGHT = "IMS:1000402"
|
33
|
+
LINESCAN_SEQUENCE_RIGHT_LEFT = "IMS:1000403"
|
34
|
+
LINESCAN_SEQUENCE_NO_DIRECTION = "IMS:1000404"
|
35
|
+
|
36
|
+
SCAN_PATTERN = "IMS:1000041"
|
37
|
+
SCAN_PATTERN_MEANDERING = "IMS:1000410"
|
38
|
+
SCAN_PATTERN_RANDOM_ACCESS = "IMS:1000412"
|
39
|
+
SCAN_PATTERN_FLYBACK = "IMS:1000413"
|
40
|
+
|
41
|
+
SCAN_TYPE = "IMS:1000048"
|
42
|
+
SCAN_TYPE_HORIZONTAL_LINE_SCAN = "IMS:1000480"
|
43
|
+
SCAN_TYPE_VERTICAL_LINE_SCAN = "IMS:1000481"
|
44
|
+
|
45
|
+
LINE_SCAN_DIRECTION = "IMS:1000049"
|
46
|
+
LINE_SCAN_DIRECTION_LINESCAN_RIGHT_LEFT = "IMS:1000490"
|
47
|
+
LINE_SCAN_DIRECTION_LINESCAN_LEFT_RIGHT = "IMS:1000491"
|
48
|
+
LINE_SCAN_DIRECTION_LINESCAN_BOTTOM_UP = "IMS:1000492"
|
49
|
+
LINE_SCAN_DIRECTION_LINESCAN_TOP_DOWN = "IMS:1000493"
|
50
|
+
|
51
|
+
end
|
52
|
+
|
53
|
+
module UNIT
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|