mspire 0.7.18 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/bin/mspire +5 -0
- data/lib/core_ext/enumerable.rb +8 -0
- data/lib/mspire/commandline.rb +39 -0
- data/lib/mspire/cv/paramable.rb +72 -35
- data/lib/mspire/imzml/writer/commandline.rb +16 -7
- data/lib/mspire/imzml/writer.rb +22 -14
- data/lib/mspire/mzml/activation.rb +0 -5
- data/lib/mspire/mzml/chromatogram.rb +41 -6
- data/lib/mspire/mzml/chromatogram_list.rb +2 -19
- data/lib/mspire/mzml/component.rb +28 -4
- data/lib/mspire/mzml/cv.rb +1 -0
- data/lib/mspire/mzml/data_array.rb +164 -154
- data/lib/mspire/mzml/data_array_container_like.rb +6 -13
- data/lib/mspire/mzml/data_processing.rb +19 -5
- data/lib/mspire/mzml/file_description.rb +22 -4
- data/lib/mspire/mzml/index.rb +53 -0
- data/lib/mspire/mzml/index_list.rb +64 -55
- data/lib/mspire/mzml/instrument_configuration.rb +22 -7
- data/lib/mspire/mzml/io_index.rb +79 -0
- data/lib/mspire/mzml/io_indexable_list.rb +71 -0
- data/lib/mspire/mzml/isolation_window.rb +0 -5
- data/lib/mspire/mzml/parser.rb +10 -0
- data/lib/mspire/mzml/plms1.rb +14 -24
- data/lib/mspire/mzml/precursor.rb +41 -19
- data/lib/mspire/mzml/processing_method.rb +34 -7
- data/lib/mspire/mzml/product.rb +14 -1
- data/lib/mspire/mzml/reader.rb +154 -0
- data/lib/mspire/mzml/referenceable_param_group.rb +9 -2
- data/lib/mspire/mzml/run.rb +62 -5
- data/lib/mspire/mzml/sample.rb +16 -6
- data/lib/mspire/mzml/scan.rb +31 -16
- data/lib/mspire/mzml/scan_list.rb +18 -5
- data/lib/mspire/mzml/scan_settings.rb +4 -5
- data/lib/mspire/mzml/scan_window.rb +0 -6
- data/lib/mspire/mzml/selected_ion.rb +1 -8
- data/lib/mspire/mzml/software.rb +9 -4
- data/lib/mspire/mzml/source_file.rb +8 -4
- data/lib/mspire/mzml/spectrum.rb +60 -35
- data/lib/mspire/mzml/spectrum_list.rb +5 -34
- data/lib/mspire/mzml.rb +72 -210
- data/lib/mspire/plms1.rb +3 -0
- data/spec/mspire/cv/paramable_spec.rb +3 -3
- data/spec/mspire/mzml/data_array_spec.rb +19 -6
- data/spec/mspire/mzml/file_content_spec.rb +1 -4
- data/spec/mspire/mzml/index_list_spec.rb +5 -12
- data/spec/mspire/mzml/plms1_spec.rb +5 -9
- data/spec/mspire/mzml/referenceable_param_group_spec.rb +3 -3
- data/spec/mspire/mzml/source_file_spec.rb +1 -2
- data/spec/mspire/mzml/spectrum_list_spec.rb +54 -0
- data/spec/mspire/mzml/spectrum_spec.rb +2 -4
- data/spec/mspire/mzml_spec.rb +241 -21
- data/spec/spec_helper.rb +1 -0
- data/spec/testfiles/mspire/mzml/1_BB7_SIM_478.5.mzML +103 -0
- data/spec/testfiles/mspire/mzml/j24z.idx_comp.3.mzML +6 -6
- metadata +14 -6
- data/bin/mzml_to_imzml +0 -9
- data/spec/mspire/mzml/file_description_spec.rb +0 -12
@@ -6,20 +6,21 @@ module Mspire
|
|
6
6
|
class Mzml
|
7
7
|
class InstrumentConfiguration
|
8
8
|
include Mspire::CV::Paramable
|
9
|
+
extend Mspire::Mzml::List
|
9
10
|
|
10
11
|
# (required) the id that this guy can be referenced from
|
11
12
|
attr_accessor :id
|
12
13
|
|
13
|
-
# a list of Source, Analyzer, Detector objects
|
14
|
+
# a list of Source, Analyzer, Detector objects (optional)
|
14
15
|
attr_accessor :components
|
15
16
|
|
16
|
-
# a single software object associated with the instrument
|
17
|
+
# a single software object associated with the instrument (optional)
|
17
18
|
attr_accessor :software
|
18
19
|
|
19
|
-
def initialize(id, components=[]
|
20
|
-
|
21
|
-
|
22
|
-
|
20
|
+
def initialize(id, components=[])
|
21
|
+
@id, @components = id, components
|
22
|
+
params_init
|
23
|
+
yield(self) if block_given?
|
23
24
|
end
|
24
25
|
|
25
26
|
def to_xml(builder)
|
@@ -31,7 +32,21 @@ module Mspire
|
|
31
32
|
builder
|
32
33
|
end
|
33
34
|
|
34
|
-
self.
|
35
|
+
def self.from_xml(xml, link)
|
36
|
+
obj = self.new(xml[:id])
|
37
|
+
next_n = obj.describe_from_xml!(xml, link[:ref_hash])
|
38
|
+
if next_n && next_n.name == 'componentList'
|
39
|
+
obj.components = next_n.children.map do |component_n|
|
40
|
+
Mspire::Mzml.const_get(component_n.name.capitalize).new.describe_self_from_xml!(component_n, link[:ref_hash])
|
41
|
+
end
|
42
|
+
next_n = next_n.next
|
43
|
+
end
|
44
|
+
if next_n && next_n.name == 'softwareRef'
|
45
|
+
obj.software = link[:software_hash][next_n[:ref]]
|
46
|
+
end
|
47
|
+
obj
|
48
|
+
end
|
49
|
+
|
35
50
|
end
|
36
51
|
end
|
37
52
|
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
require 'mspire/mzml/parser'
|
2
|
+
require 'mspire/mzml/spectrum'
|
3
|
+
require 'mspire/mzml/chromatogram'
|
4
|
+
|
5
|
+
module Mspire
|
6
|
+
class Mzml
|
7
|
+
|
8
|
+
# an index that retrieves its objects on the fly by index from the IO object.
|
9
|
+
class IOIndex
|
10
|
+
include Enumerable
|
11
|
+
|
12
|
+
attr_reader :io
|
13
|
+
|
14
|
+
attr_reader :byte_index
|
15
|
+
|
16
|
+
# hash of relevant hashes and objects for linking
|
17
|
+
attr_accessor :link
|
18
|
+
|
19
|
+
# byte_index will typically be an Mspire::Mzml::Index object.
|
20
|
+
#
|
21
|
+
# link will have the following keys:
|
22
|
+
#
|
23
|
+
# :ref_hash
|
24
|
+
# :data_processing_hash
|
25
|
+
# :(<sample>|<chromatogram>)_default_data_processing
|
26
|
+
#
|
27
|
+
# may have:
|
28
|
+
#
|
29
|
+
# :source_file_hash
|
30
|
+
#
|
31
|
+
def initialize(io, byte_index, link)
|
32
|
+
@io, @byte_index, @link = io, byte_index, link
|
33
|
+
@object_class = Mspire::Mzml.const_get(@byte_index.name.to_s.capitalize)
|
34
|
+
@closetag_regexp = %r{</#{name}>}
|
35
|
+
end
|
36
|
+
|
37
|
+
def name
|
38
|
+
@byte_index.name
|
39
|
+
end
|
40
|
+
|
41
|
+
def each(&block)
|
42
|
+
return enum_for(__method__) unless block
|
43
|
+
(0...byte_index.size).each do |int|
|
44
|
+
block.call(self[int])
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def [](index)
|
49
|
+
@object_class.from_xml(fetch_xml_node(index), @link)
|
50
|
+
end
|
51
|
+
|
52
|
+
def length
|
53
|
+
@byte_index.length
|
54
|
+
end
|
55
|
+
alias_method :size, :length
|
56
|
+
|
57
|
+
# gets the data string through to last element
|
58
|
+
def get_xml_string(start_byte)
|
59
|
+
@io.seek(start_byte)
|
60
|
+
data = ""
|
61
|
+
@io.each_line do |line|
|
62
|
+
data << line
|
63
|
+
break if @closetag_regexp.match(line)
|
64
|
+
end
|
65
|
+
data
|
66
|
+
end
|
67
|
+
|
68
|
+
def xml_node_from_start_byte(start_byte)
|
69
|
+
xml = get_xml_string(start_byte)
|
70
|
+
Nokogiri::XML.parse(xml, nil, @encoding, Parser::NOBLANKS).root
|
71
|
+
end
|
72
|
+
|
73
|
+
def fetch_xml_node(index)
|
74
|
+
xml_node_from_start_byte(byte_index[index])
|
75
|
+
end
|
76
|
+
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
require 'core_ext/enumerable'
|
2
|
+
require 'delegate'
|
3
|
+
|
4
|
+
module Mspire
|
5
|
+
class Mzml
|
6
|
+
|
7
|
+
# An IOIndexableList is the base object for SpectrumList and
|
8
|
+
# ChromatogramList. It's main feature is that it delegates all of its
|
9
|
+
# duties to the array like object.
|
10
|
+
class IOIndexableList < SimpleDelegator
|
11
|
+
alias_method :get_delegate, :__getobj__
|
12
|
+
|
13
|
+
attr_accessor :default_data_processing
|
14
|
+
|
15
|
+
# a hash linking an id to the Integer index
|
16
|
+
attr_accessor :id_to_index
|
17
|
+
|
18
|
+
# array_like must implement #[] (with an Integer index), #each, size and length. For example, it may be an
|
19
|
+
# actual Array object, or it may be an IOIndex, something that behaves
|
20
|
+
# similar to an array but is really pulling objects by reading an io
|
21
|
+
# object. Sets the spectrum_list attribute of array_like if it can be
|
22
|
+
# set.
|
23
|
+
def initialize(default_data_processing, array_like, id_to_index=nil)
|
24
|
+
if array_like.respond_to?(:spectrum_list=)
|
25
|
+
array_like.spectrum_list = self
|
26
|
+
end
|
27
|
+
@id_to_index = id_to_index
|
28
|
+
@default_data_processing = default_data_processing
|
29
|
+
__setobj__(array_like)
|
30
|
+
end
|
31
|
+
|
32
|
+
# for a class like <Object>List, returns :object. So a SpectrumList
|
33
|
+
# will return :spectrum.
|
34
|
+
def list_type
|
35
|
+
base = self.class.to_s.split('::').last.sub(/List$/,'')
|
36
|
+
base[0] = base[0].downcase
|
37
|
+
base.to_sym
|
38
|
+
end
|
39
|
+
|
40
|
+
|
41
|
+
# method to generate the id_to_index hash from the underlying delegated
|
42
|
+
# object.
|
43
|
+
def create_id_to_index!
|
44
|
+
@id_to_index = {}
|
45
|
+
get_delegate.each_with_index do |obj, i|
|
46
|
+
@id_to_index[obj.id] = i
|
47
|
+
end
|
48
|
+
@id_to_index
|
49
|
+
end
|
50
|
+
|
51
|
+
# arg may be an Integer or a String (an id)
|
52
|
+
def [](arg)
|
53
|
+
arg.is_a?(Integer) ? get_delegate[arg] : get_delegate[ @id_to_index[arg] ]
|
54
|
+
end
|
55
|
+
|
56
|
+
def to_xml(builder, default_ids)
|
57
|
+
default_ids["#{list_type}_data_processing".to_sym] = @default_data_processing.id
|
58
|
+
xml_name = self.class.to_s.split('::').last
|
59
|
+
xml_name[0] = xml_name[0].downcase
|
60
|
+
builder.tag!(xml_name.to_sym, count: self.size, defaultDataProcessingRef: @default_data_processing.id) do |iol_n|
|
61
|
+
self.each_with_index do |obj,i|
|
62
|
+
obj.index = i unless obj.index
|
63
|
+
obj.to_xml(iol_n, default_ids)
|
64
|
+
end
|
65
|
+
end
|
66
|
+
builder
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
@@ -16,11 +16,6 @@ module Mspire
|
|
16
16
|
# e.g.: MS:1000829 (isolation window upper offset)
|
17
17
|
class IsolationWindow
|
18
18
|
include Mspire::CV::Paramable
|
19
|
-
def self.from_xml(xml)
|
20
|
-
obj = self.new
|
21
|
-
[:cvParam, :userParam].each {|v| obj.describe! xml.xpath("./#{v}") }
|
22
|
-
obj
|
23
|
-
end
|
24
19
|
end
|
25
20
|
end
|
26
21
|
end
|
data/lib/mspire/mzml/plms1.rb
CHANGED
@@ -3,32 +3,22 @@ require 'mspire/plms1'
|
|
3
3
|
|
4
4
|
module Mspire
|
5
5
|
class Mzml
|
6
|
-
# will use scan numbers if use_scan_nums is true
|
7
|
-
#
|
6
|
+
# will use scan numbers if use_scan_nums is true (typically start with
|
7
|
+
# one), otherwise it will use index numbers (starts with zero)
|
8
8
|
def to_plms1(use_scan_nums=true)
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
case rt_xml_node['unitName']
|
21
|
-
when 'minute'
|
22
|
-
retention_time * 60
|
23
|
-
when 'second'
|
24
|
-
retention_time
|
25
|
-
else
|
26
|
-
raise 'retention time must be in minutes or seconds (or add some code to handle)'
|
27
|
-
end
|
9
|
+
spectrum_index = self.index_list[:spectrum]
|
10
|
+
|
11
|
+
scan_nums = spectrum_index.create_scan_to_index.keys if use_scan_nums
|
12
|
+
|
13
|
+
nums = [] ; rts = [] ; spectra = []
|
14
|
+
|
15
|
+
self.each_with_index do |spec, index|
|
16
|
+
next unless spec.ms_level == 1
|
17
|
+
nums << (use_scan_nums ? scan_nums[index] : index)
|
18
|
+
spectra << spec
|
19
|
+
rts << spec.retention_time
|
28
20
|
end
|
29
|
-
|
30
|
-
# object, so an Mzml object will work.
|
31
|
-
Mspire::Plms1.new(scan_nums, retention_times, self)
|
21
|
+
Mspire::Plms1.new(nums, rts, spectra)
|
32
22
|
end
|
33
23
|
end
|
34
24
|
end
|
@@ -7,9 +7,10 @@ module Mspire
|
|
7
7
|
class Mzml
|
8
8
|
# The method of precursor ion selection and activation
|
9
9
|
class Precursor
|
10
|
-
|
11
|
-
#
|
12
|
-
|
10
|
+
|
11
|
+
# (optional) the id of the Spectrum object, whether internal or
|
12
|
+
# externally derived.
|
13
|
+
attr_accessor :spectrum_id
|
13
14
|
|
14
15
|
# (optional)
|
15
16
|
attr_accessor :isolation_window
|
@@ -20,34 +21,55 @@ module Mspire
|
|
20
21
|
# (required) The type and energy level used for activation.
|
21
22
|
attr_accessor :activation
|
22
23
|
|
23
|
-
#
|
24
|
-
|
24
|
+
# This is an *EXTERNAL* source file *ONLY*. It should NOT be set if the
|
25
|
+
# spectrum is internal.
|
26
|
+
attr_accessor :source_file
|
27
|
+
|
28
|
+
# the spectrum list object which enables the spectrum to be accessed directly
|
29
|
+
attr_accessor :spectrum_list
|
30
|
+
|
31
|
+
# provide the SpectrumList object for #spectrum access
|
32
|
+
def initialize(spectrum_id=nil, spectrum_list=nil)
|
33
|
+
@spectrum_id, @spectrum_list = spectrum_id, spectrum_list
|
34
|
+
end
|
25
35
|
|
26
|
-
def
|
27
|
-
@
|
36
|
+
def spectrum
|
37
|
+
@spectrum_list[@spectrum_id]
|
28
38
|
end
|
29
39
|
|
30
|
-
def self.from_xml(xml)
|
40
|
+
def self.from_xml(xml, link)
|
41
|
+
ref_hash = link[:ref_hash]
|
31
42
|
obj = self.new
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
Mspire::Mzml.const_get(el).from_xml(sub_node) if sub_node
|
43
|
+
obj.spectrum_id = xml[:spectrumRef] || xml[:externalSpectrumID]
|
44
|
+
if source_file_ref = xml[:sourceFileRef]
|
45
|
+
obj.source_file = link[:source_file_hash][ source_file_ref ]
|
36
46
|
end
|
37
|
-
|
38
|
-
|
47
|
+
|
48
|
+
xml.children.each do |child_n|
|
49
|
+
case child_n.name
|
50
|
+
when 'activation' # the only one required
|
51
|
+
obj.activation = Mspire::Mzml::Activation.new.describe_self_from_xml!(child_n, ref_hash)
|
52
|
+
when 'isolationWindow'
|
53
|
+
obj.isolation_window = Mspire::Mzml::IsolationWindow.new.describe_self_from_xml!(child_n, ref_hash)
|
54
|
+
when 'selectedIonList'
|
55
|
+
obj.selected_ions = child_n.children.map do |si_n|
|
56
|
+
Mspire::Mzml::SelectedIon.new.describe_self_from_xml!(si_n, ref_hash)
|
57
|
+
end
|
58
|
+
end
|
39
59
|
end
|
60
|
+
|
40
61
|
obj
|
41
62
|
end
|
42
63
|
|
43
64
|
def to_xml(builder)
|
44
65
|
atts = {}
|
45
|
-
if @
|
46
|
-
atts[:sourceFileRef] = @
|
47
|
-
atts[:externalSpectrumRef] = @
|
48
|
-
|
49
|
-
atts[:spectrumRef] = @
|
66
|
+
if @source_file
|
67
|
+
atts[:sourceFileRef] = @source_file.id
|
68
|
+
atts[:externalSpectrumRef] = @spectrum_id
|
69
|
+
elsif @spectrum_id
|
70
|
+
atts[:spectrumRef] = @spectrum_id
|
50
71
|
end
|
72
|
+
|
51
73
|
builder.precursor(atts) do |prec_n|
|
52
74
|
@isolation_window.to_xml(prec_n) if @isolation_window
|
53
75
|
Mspire::Mzml::SelectedIon.list_xml(@selected_ions, prec_n) if @selected_ions
|
@@ -2,19 +2,40 @@ require 'mspire/cv/paramable'
|
|
2
2
|
|
3
3
|
module Mspire
|
4
4
|
class Mzml
|
5
|
+
|
6
|
+
# MAY supply a *child* term of MS:1000630 (data processing parameter) one or more times
|
7
|
+
# e.g.: MS:1000629 (low intensity threshold)
|
8
|
+
# e.g.: MS:1000631 (high intensity threshold)
|
9
|
+
# e.g.: MS:1000747 (completion time)
|
10
|
+
# e.g.: MS:1000787 (inclusive low intensity threshold)
|
11
|
+
# e.g.: MS:1000788 (inclusive high intensity threshold)
|
12
|
+
#
|
13
|
+
# MUST supply a *child* term of MS:1000452 (data transformation) one or more times
|
14
|
+
# e.g.: MS:1000033 (deisotoping)
|
15
|
+
# e.g.: MS:1000034 (charge deconvolution)
|
16
|
+
# e.g.: MS:1000544 (Conversion to mzML)
|
17
|
+
# e.g.: MS:1000545 (Conversion to mzXML)
|
18
|
+
# e.g.: MS:1000546 (Conversion to mzData)
|
19
|
+
# e.g.: MS:1000593 (baseline reduction)
|
20
|
+
# e.g.: MS:1000594 (low intensity data point removal)
|
21
|
+
# e.g.: MS:1000741 (Conversion to dta)
|
22
|
+
# e.g.: MS:1000745 (retention time alignment)
|
23
|
+
# e.g.: MS:1000746 (high intensity data point removal)
|
5
24
|
class ProcessingMethod
|
6
25
|
include Mspire::CV::Paramable
|
7
26
|
|
8
|
-
attr_accessor :
|
27
|
+
attr_accessor :software
|
9
28
|
|
10
|
-
def initialize(
|
11
|
-
@
|
12
|
-
|
13
|
-
|
29
|
+
def initialize(software)
|
30
|
+
@software = software
|
31
|
+
params_init
|
32
|
+
if block_given?
|
33
|
+
yield self
|
34
|
+
end
|
14
35
|
end
|
15
36
|
|
16
|
-
def to_xml(builder)
|
17
|
-
builder.processingMethod(order:
|
37
|
+
def to_xml(builder, order)
|
38
|
+
builder.processingMethod(order: order, softwareRef: software.id) do |pm_n|
|
18
39
|
super(pm_n) # params
|
19
40
|
end
|
20
41
|
builder
|
@@ -22,3 +43,9 @@ module Mspire
|
|
22
43
|
end
|
23
44
|
end
|
24
45
|
end
|
46
|
+
|
47
|
+
# The order attribute is *not* intrinsic to the ProcessingMethod (and thus
|
48
|
+
# cannot be queried from within the object. It can be determined easily
|
49
|
+
# by asking for the index of the method in the array of processing
|
50
|
+
# methods. (zero based indexing is fine)
|
51
|
+
|
data/lib/mspire/mzml/product.rb
CHANGED
@@ -2,7 +2,13 @@ require 'mspire/mzml/list'
|
|
2
2
|
|
3
3
|
module Mspire
|
4
4
|
class Mzml
|
5
|
+
# The method of product ion selection and activation in a precursor ion scan
|
6
|
+
#
|
7
|
+
# this object is NOT paramable, it just contains a single IsolationWindow
|
5
8
|
class Product
|
9
|
+
|
10
|
+
extend Mspire::Mzml::List
|
11
|
+
|
6
12
|
attr_accessor :isolation_window
|
7
13
|
|
8
14
|
def initialize(isolation_window=nil)
|
@@ -15,7 +21,14 @@ module Mspire
|
|
15
21
|
end
|
16
22
|
end
|
17
23
|
|
18
|
-
|
24
|
+
def self.from_xml(xml, ref_hash)
|
25
|
+
isolation_window_n = xml.child
|
26
|
+
if isolation_window_n
|
27
|
+
iw = Mspire::Mzml::IsolationWindow.from_xml(isolation_window_n, ref_hash)
|
28
|
+
end
|
29
|
+
self.new(iw)
|
30
|
+
end
|
31
|
+
|
19
32
|
end
|
20
33
|
end
|
21
34
|
end
|
@@ -0,0 +1,154 @@
|
|
1
|
+
require 'io/bookmark'
|
2
|
+
|
3
|
+
%w(
|
4
|
+
parser
|
5
|
+
|
6
|
+
index_list
|
7
|
+
|
8
|
+
cv
|
9
|
+
referenceable_param_group
|
10
|
+
file_description
|
11
|
+
|
12
|
+
sample
|
13
|
+
software
|
14
|
+
instrument_configuration
|
15
|
+
data_processing
|
16
|
+
run
|
17
|
+
).each do |file|
|
18
|
+
require "mspire/mzml/#{file}"
|
19
|
+
end
|
20
|
+
|
21
|
+
module Mspire
|
22
|
+
class Mzml
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
module Mspire::Mzml::Reader
|
27
|
+
|
28
|
+
attr_accessor :link
|
29
|
+
|
30
|
+
def set_from_xml_io!(xml_io)
|
31
|
+
@io = xml_io
|
32
|
+
begin
|
33
|
+
@encoding = @io.bookmark(true) {|io| io.readline.match(/encoding=["'](.*?)["']/)[1] }
|
34
|
+
rescue EOFError
|
35
|
+
raise RuntimeError, "no encoding present in XML! (Is this even an xml file?)"
|
36
|
+
end
|
37
|
+
@index_list = Mspire::Mzml::IndexList.from_io(@io)
|
38
|
+
read_header!( get_default_data_processing_ids(@io, @index_list) )
|
39
|
+
end
|
40
|
+
|
41
|
+
# returns a hash keyed by :spectrum or :chromatogram that gives the id
|
42
|
+
# (aka ref) as a string.
|
43
|
+
def get_default_data_processing_ids(io, index_list, lookback=200)
|
44
|
+
hash = {}
|
45
|
+
index_list.each_pair do |name, index|
|
46
|
+
io.bookmark do |io|
|
47
|
+
io.pos = index[0] - lookback
|
48
|
+
hash[name] = io.read(lookback)[/<#{name}List.*defaultDataProcessingRef=['"](.*?)['"]/m, 1]
|
49
|
+
end
|
50
|
+
end
|
51
|
+
hash
|
52
|
+
end
|
53
|
+
|
54
|
+
# saves ~ 3 seconds when reading a 83M mzML file to scrape off the
|
55
|
+
# header string (even though we're just handing in an IO object to
|
56
|
+
# Nokogiri::XML::Document.parse and we are very careful to not parse too
|
57
|
+
# far).
|
58
|
+
def get_header_string(io)
|
59
|
+
chunk_size = 2**12
|
60
|
+
loc = 0
|
61
|
+
string = ''
|
62
|
+
while chunk = @io.read(chunk_size)
|
63
|
+
string << chunk
|
64
|
+
start_looking = ((loc-20) < 0) ? 0 : (loc-20)
|
65
|
+
break if string[start_looking..-1] =~ /<(spectrum|chromatogram)/
|
66
|
+
loc += chunk_size
|
67
|
+
end
|
68
|
+
string
|
69
|
+
end
|
70
|
+
|
71
|
+
# list_type_to_default_data_processing_id is a hash keyed by :spectrum or
|
72
|
+
# :chromatogram that gives the default data_processing_object for the
|
73
|
+
# SpectrumList and/or the ChromatogramList. This information is not
|
74
|
+
# obtainable from the header string, so must be pre-obtained.
|
75
|
+
def read_header!(list_type_to_default_data_processing_id)
|
76
|
+
@io.rewind
|
77
|
+
|
78
|
+
string = get_header_string(@io)
|
79
|
+
doc = Nokogiri::XML.parse(string, nil, @encoding, Mspire::Mzml::Parser::NOBLANKS)
|
80
|
+
|
81
|
+
doc.remove_namespaces!
|
82
|
+
mzml_n = doc.root
|
83
|
+
if mzml_n.name == 'indexedmzML'
|
84
|
+
mzml_n = mzml_n.child
|
85
|
+
end
|
86
|
+
cv_list_n = mzml_n.child
|
87
|
+
self.cvs = cv_list_n.children.map do |cv_n|
|
88
|
+
Mspire::Mzml::CV.from_xml(cv_n)
|
89
|
+
end
|
90
|
+
|
91
|
+
# get the file description node but deal with it after getting ref_hash
|
92
|
+
file_description_n = cv_list_n.next
|
93
|
+
|
94
|
+
xml_n = file_description_n.next
|
95
|
+
|
96
|
+
# a hash of referenceable_param_groups indexed by id
|
97
|
+
@link = {}
|
98
|
+
|
99
|
+
if xml_n.name == 'referenceableParamGroupList'
|
100
|
+
self.referenceable_param_groups = xml_n.children.map do |rpg_n|
|
101
|
+
Mspire::Mzml::ReferenceableParamGroup.from_xml(rpg_n) # <- no ref_hash (not made yet)
|
102
|
+
end
|
103
|
+
@link[:ref_hash] = self.referenceable_param_groups.index_by(&:id)
|
104
|
+
xml_n = xml_n.next
|
105
|
+
end
|
106
|
+
|
107
|
+
# now we can set the file description because we have the ref_hash
|
108
|
+
self.file_description = Mspire::Mzml::FileDescription.from_xml(file_description_n, @link)
|
109
|
+
@link[:source_file_hash] = self.file_description.source_files.index_by(&:id)
|
110
|
+
|
111
|
+
|
112
|
+
loop do
|
113
|
+
case xml_n.name
|
114
|
+
when 'sampleList'
|
115
|
+
self.samples = xml_n.children.map do |sample_n|
|
116
|
+
Mspire::Mzml::Sample.from_xml(sample_n, @link)
|
117
|
+
end
|
118
|
+
@link[:sample_hash] = self.samples.index_by(&:id)
|
119
|
+
when 'softwareList' # required
|
120
|
+
self.software_list = xml_n.children.map do |software_n|
|
121
|
+
Mspire::Mzml::Software.from_xml(software_n, @link)
|
122
|
+
end
|
123
|
+
@link[:software_hash] = self.software_list.index_by(&:id)
|
124
|
+
when 'instrumentConfigurationList'
|
125
|
+
self.instrument_configurations = xml_n.children.map do |inst_config_n|
|
126
|
+
Mspire::Mzml::InstrumentConfiguration.from_xml(inst_config_n, @link)
|
127
|
+
end
|
128
|
+
@link[:instrument_configuration_hash] = self.instrument_configurations.index_by(&:id)
|
129
|
+
when 'dataProcessingList'
|
130
|
+
self.data_processing_list = xml_n.children.map do |data_processing_n|
|
131
|
+
Mspire::Mzml::DataProcessing.from_xml(data_processing_n, @link)
|
132
|
+
end
|
133
|
+
@link[:data_processing_hash] = self.data_processing_list.index_by(&:id)
|
134
|
+
when 'run'
|
135
|
+
@link[:index_list] = @index_list
|
136
|
+
list_type_to_default_data_processing_id.each do |type, process_id|
|
137
|
+
@link["#{type}_default_data_processing".to_sym] = @link[:data_processing_hash][process_id]
|
138
|
+
end
|
139
|
+
self.run = Mspire::Mzml::Run.from_xml(@io, xml_n, @link)
|
140
|
+
break
|
141
|
+
end
|
142
|
+
xml_n = xml_n.next
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
module Mspire
|
148
|
+
class Mzml
|
149
|
+
include Reader
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
|
154
|
+
|
@@ -12,9 +12,10 @@ module Mspire
|
|
12
12
|
|
13
13
|
attr_accessor :id
|
14
14
|
|
15
|
-
def initialize(id
|
15
|
+
def initialize(id)
|
16
16
|
@id = id
|
17
|
-
|
17
|
+
params_init
|
18
|
+
yield(self) if block_given?
|
18
19
|
end
|
19
20
|
|
20
21
|
def to_xml(builder)
|
@@ -29,6 +30,12 @@ module Mspire
|
|
29
30
|
builder
|
30
31
|
end
|
31
32
|
|
33
|
+
def self.from_xml(xml)
|
34
|
+
obj = self.new(xml[:id])
|
35
|
+
obj.describe_from_xml!(xml)
|
36
|
+
obj
|
37
|
+
end
|
38
|
+
|
32
39
|
def self.list_xml(objs, builder)
|
33
40
|
builder.referenceableParamGroupList(count: objs.size) do |rpgl_n|
|
34
41
|
objs.each {|obj| obj.to_xml_definition(rpgl_n) }
|