mspire 0.7.18 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. data/VERSION +1 -1
  2. data/bin/mspire +5 -0
  3. data/lib/core_ext/enumerable.rb +8 -0
  4. data/lib/mspire/commandline.rb +39 -0
  5. data/lib/mspire/cv/paramable.rb +72 -35
  6. data/lib/mspire/imzml/writer/commandline.rb +16 -7
  7. data/lib/mspire/imzml/writer.rb +22 -14
  8. data/lib/mspire/mzml/activation.rb +0 -5
  9. data/lib/mspire/mzml/chromatogram.rb +41 -6
  10. data/lib/mspire/mzml/chromatogram_list.rb +2 -19
  11. data/lib/mspire/mzml/component.rb +28 -4
  12. data/lib/mspire/mzml/cv.rb +1 -0
  13. data/lib/mspire/mzml/data_array.rb +164 -154
  14. data/lib/mspire/mzml/data_array_container_like.rb +6 -13
  15. data/lib/mspire/mzml/data_processing.rb +19 -5
  16. data/lib/mspire/mzml/file_description.rb +22 -4
  17. data/lib/mspire/mzml/index.rb +53 -0
  18. data/lib/mspire/mzml/index_list.rb +64 -55
  19. data/lib/mspire/mzml/instrument_configuration.rb +22 -7
  20. data/lib/mspire/mzml/io_index.rb +79 -0
  21. data/lib/mspire/mzml/io_indexable_list.rb +71 -0
  22. data/lib/mspire/mzml/isolation_window.rb +0 -5
  23. data/lib/mspire/mzml/parser.rb +10 -0
  24. data/lib/mspire/mzml/plms1.rb +14 -24
  25. data/lib/mspire/mzml/precursor.rb +41 -19
  26. data/lib/mspire/mzml/processing_method.rb +34 -7
  27. data/lib/mspire/mzml/product.rb +14 -1
  28. data/lib/mspire/mzml/reader.rb +154 -0
  29. data/lib/mspire/mzml/referenceable_param_group.rb +9 -2
  30. data/lib/mspire/mzml/run.rb +62 -5
  31. data/lib/mspire/mzml/sample.rb +16 -6
  32. data/lib/mspire/mzml/scan.rb +31 -16
  33. data/lib/mspire/mzml/scan_list.rb +18 -5
  34. data/lib/mspire/mzml/scan_settings.rb +4 -5
  35. data/lib/mspire/mzml/scan_window.rb +0 -6
  36. data/lib/mspire/mzml/selected_ion.rb +1 -8
  37. data/lib/mspire/mzml/software.rb +9 -4
  38. data/lib/mspire/mzml/source_file.rb +8 -4
  39. data/lib/mspire/mzml/spectrum.rb +60 -35
  40. data/lib/mspire/mzml/spectrum_list.rb +5 -34
  41. data/lib/mspire/mzml.rb +72 -210
  42. data/lib/mspire/plms1.rb +3 -0
  43. data/spec/mspire/cv/paramable_spec.rb +3 -3
  44. data/spec/mspire/mzml/data_array_spec.rb +19 -6
  45. data/spec/mspire/mzml/file_content_spec.rb +1 -4
  46. data/spec/mspire/mzml/index_list_spec.rb +5 -12
  47. data/spec/mspire/mzml/plms1_spec.rb +5 -9
  48. data/spec/mspire/mzml/referenceable_param_group_spec.rb +3 -3
  49. data/spec/mspire/mzml/source_file_spec.rb +1 -2
  50. data/spec/mspire/mzml/spectrum_list_spec.rb +54 -0
  51. data/spec/mspire/mzml/spectrum_spec.rb +2 -4
  52. data/spec/mspire/mzml_spec.rb +241 -21
  53. data/spec/spec_helper.rb +1 -0
  54. data/spec/testfiles/mspire/mzml/1_BB7_SIM_478.5.mzML +103 -0
  55. data/spec/testfiles/mspire/mzml/j24z.idx_comp.3.mzML +6 -6
  56. metadata +14 -6
  57. data/bin/mzml_to_imzml +0 -9
  58. data/spec/mspire/mzml/file_description_spec.rb +0 -12
@@ -4,177 +4,187 @@ require 'mspire/cv/paramable'
4
4
 
5
5
  module Mspire
6
6
  class Mzml
7
- class DataArray < Array
8
- alias_method :array_initialize, :initialize
9
- include Mspire::CV::Paramable
10
- alias_method :params_initialize, :initialize
11
- alias_method :params_to_xml, :to_xml
12
-
13
- DEFAULT_DTYPE = :float64
14
- DEFAULT_COMPRESSION = true
15
- DTYPE_TO_ACC = {
16
- float64: 'MS:1000523',
17
- float32: 'MS:1000521',
18
- # float16: 'MS:1000520', # <- not supported w/o other gems
19
- int64: 'MS:1000522', # signed
20
- int32: 'MS:1000519', # signed
21
- }
22
-
23
- def type=(arg)
24
- all_accs = %w(MS:1000514 MS:1000515)
25
- params.delete_if {|param| all_accs.include?(param.accession) } if params
26
- case arg
27
- when :mz
28
- describe! all_accs[0] # , nil, "MS:1000040"
29
- when :intensity
30
- describe! all_accs[1] # , nil, "MS:1000131"
31
- end
32
- arg
33
- end
7
+ end
8
+ end
34
9
 
35
- def type
36
- if params
37
- if params.any? {|param| param.accession == 'MS:1000514' }
38
- :mz
39
- elsif params.any? {|param| param.accession == 'MS:1000515' }
40
- :intensity
41
- end
42
- end
43
- end
44
10
 
45
- # set this if the data is written to an external file (such as the ibd
46
- # file for imzML files)
47
- attr_accessor :external
11
+ class Mspire::Mzml::DataArray < Array
12
+ alias_method :array_init, :initialize
13
+ include Mspire::CV::Paramable
48
14
 
49
- def initialize(*args)
50
- array_initialize(*args)
51
- params_initialize
52
- end
15
+ DEFAULT_DTYPE = :float64
16
+ DEFAULT_COMPRESSION = true
17
+ DTYPE_TO_ACC = {
18
+ float64: 'MS:1000523',
19
+ float32: 'MS:1000521',
20
+ # float16: 'MS:1000520', # <- not supported w/o other gems
21
+ int64: 'MS:1000522', # signed
22
+ int32: 'MS:1000519', # signed
23
+ }
24
+ TYPE_XML = {
25
+ mz: '<cvParam cvRef="MS" accession="MS:1000514" name="m/z array"/>',
26
+ intensity: '<cvParam cvRef="MS" accession="MS:1000515" name="intensity array"/>'
27
+ }
53
28
 
54
- # returns a new Mspire::Mzml::DataArray object (an array)
55
- #
56
- # args:
57
- # base64, set-like # where set-like responds to include?
58
- # base64, type=:float64, compression=true
59
- #
60
- # examples:
61
- # Mspire::Mzml::Spectrum.unpack_binary('eJxjYACBD/YMEOAAoTgcABe3Abg=', ['MS:1000574', MS:1000523']).
62
- # Mspire::Mzml::Spectrum.unpack_binary("ADBA/=", :float32, true)
63
- # Mspire::Mzml::Spectrum.unpack_binary("ADBA/=") # uses float64 and compression
64
- def self.from_binary(base64, *args)
65
- if args.first.respond_to?(:include?)
66
- accessions = args.first
67
- compressed =
68
- if accessions.include?('MS:1000574') then true # zlib compression
69
- elsif accessions.include?('MS:1000576') then false # no compression
70
- else raise 'no compression info: check your MS accession numbers'
71
- end
72
- precision_unpack =
73
- if accessions.include?('MS:1000523') then 'E*'
74
- elsif accessions.include?('MS:1000521') then 'e*'
75
- else raise 'unrecognized precision: check your MS accession numbers'
76
- end
77
- else
78
- compressed = args.last || true
79
- precision_unpack =
80
- case args.first
81
- when :float64
82
- 'E*'
83
- when :float32
84
- 'e*'
85
- when nil
86
- 'E*'
87
- else
88
- raise ArgumentError, "#{args.first} must be one of :float64, :float32 or other acceptable type"
89
- end
29
+ def initialize(*args)
30
+ params_init # paramable
31
+ array_init(*args)
32
+ end
33
+
34
+ # takes :mz or :intensity and sets the proper param among cvParams. Does not do
35
+ # referenceableParamGroup resolution.
36
+ def type=(symbol)
37
+ new_cv_params = []
38
+ already_present = false
39
+ cvs = ['MS:1000514', 'MS:1000515']
40
+ cvs.reverse! if symbol == :intensity
41
+ (keep, remove) = cvs
42
+
43
+ @cv_params.each do |param|
44
+ new_cv_params << param unless param.accession == remove
45
+ (already_present = true) if (param.accession == keep)
46
+ end
47
+ new_cv_params.push(Mspire::CV::Param[keep]) unless already_present
48
+ @cv_params = new_cv_params
49
+ symbol
50
+ end
51
+
52
+ # :mz or :intensity (or nil if none found)
53
+ def type
54
+ each_accessionable_param do |param|
55
+ return :mz if (param.accession == 'MS:1000514')
56
+ return :intensity if (param.accession == 'MS:1000515')
57
+ end
58
+ nil
59
+ end
60
+
61
+ # (optional) the DataProcessing object associated with this DataArray
62
+ attr_accessor :data_processing
63
+
64
+ # set this if the data is written to an external file (such as the ibd
65
+ # file for imzML files)
66
+ attr_accessor :external
67
+
68
+ def self.empty_data_arrays
69
+ [self.new, self.new]
70
+ end
71
+
72
+ def self.data_arrays_from_xml(xml, link)
73
+ data_arrays = xml.children.map do |binary_data_array_n|
74
+ Mspire::Mzml::DataArray.from_xml(binary_data_array_n, link)
75
+ end
76
+ (data_arrays.size > 0) ? data_arrays : empty_data_arrays
77
+ end
78
+
79
+ def self.from_xml(xml, link)
80
+ da = self.new
81
+ binary_n = da.describe_from_xml!(xml, link[:ref_hash])
82
+
83
+ if (dp_id = xml[:dataProcessingRef])
84
+ da.data_processing = link[:data_processing_hash][dp_id]
85
+ end
86
+
87
+ zlib_compression = nil
88
+ precision_unpack = nil
89
+ # could also implement with set or hash lookup (need to test for
90
+ # speed)
91
+ da.each_accessionable_param do |param|
92
+ acc = param.accession
93
+ unless zlib_compression || zlib_compression == false
94
+ case acc
95
+ when 'MS:1000574' then zlib_compression = true
96
+ when 'MS:1000576' then zlib_compression = false
90
97
  end
91
- data = base64.unpack("m*").first
92
- # some implementations leave data blank if there aren't peaks
93
- # even if they say it is zlib compressed...
94
- unzipped =
95
- if data.size > 0
96
- compressed ? Zlib::Inflate.inflate(data) : data
97
- else
98
- data
99
- end
100
- self.new( unzipped.unpack(precision_unpack) )
101
98
  end
99
+ unless precision_unpack
100
+ case acc
101
+ when 'MS:1000523' then precision_unpack = 'E*'
102
+ when 'MS:1000521' then precision_unpack = 'e*'
103
+ end
104
+ end
105
+ end
102
106
 
103
- # returns a base64 string that can be used for xml representations of
104
- # the data
105
- #
106
- # args:
107
- # array-like set-like # where set-like responds to include?
108
- # array-like dtype=:float64, compression=true
109
- def self.to_binary(array_ish, *args)
110
- if args.first.respond_to?(:include?)
111
- accessions = args.first
112
- dtype =
113
- if accessions.include?('MS:1000521')
114
- :float32
115
- else
116
- :float64
117
- end
118
- compression = accessions.include?('MS:1000576') ? false : true
107
+ data = binary_n.text.unpack("m*").first
108
+
109
+ # some implementations leave data blank if there aren't peaks
110
+ # even if they say it is zlib compressed...
111
+ unzipped =
112
+ if data.size > 0 then ( zlib_compression ? Zlib::Inflate.inflate(data) : data )
113
+ else data end
114
+ da.replace( unzipped.unpack(precision_unpack) )
115
+ da
116
+ end
117
+
118
+ # returns a base64 string that can be used for xml representations of
119
+ # the data
120
+ #
121
+ # args:
122
+ # array-like set-like # where set-like responds to include?
123
+ # array-like dtype=:float64, compression=true
124
+ def self.to_binary(array_ish, *args)
125
+ if args.first.respond_to?(:include?)
126
+ accessions = args.first
127
+ dtype =
128
+ if accessions.include?('MS:1000521')
129
+ :float32
119
130
  else
120
- dtype = args[0] || DEFAULT_DTYPE
121
- compression = args[1] || DEFAULT_COMPRESSION
131
+ :float64
122
132
  end
133
+ compression = accessions.include?('MS:1000576') ? false : true
134
+ else
135
+ dtype = args[0] || DEFAULT_DTYPE
136
+ compression = args[1] || DEFAULT_COMPRESSION
137
+ end
123
138
 
124
- pack_code =
125
- case dtype
126
- when :float64 ; 'E*'
127
- when :float32 ; 'e*'
128
- when :int64 ; 'q<*'
129
- when :int32 ; 'l<*'
130
- else ; raise "unsupported dtype: #{dtype}"
131
- end
132
- # TODO: support faster pack method for NArray's in future
133
- string = array_ish.to_a.pack(pack_code)
134
- string = Zlib::Deflate.deflate(string) if compression
135
- Base64.strict_encode64(string)
139
+ pack_code =
140
+ case dtype
141
+ when :float64 ; 'E*'
142
+ when :float32 ; 'e*'
143
+ when :int64 ; 'q<*'
144
+ when :int32 ; 'l<*'
145
+ else ; raise "unsupported dtype: #{dtype}"
136
146
  end
147
+ # TODO: support faster pack method for NArray's in future
148
+ string = array_ish.to_a.pack(pack_code)
149
+ string = Zlib::Deflate.deflate(string) if compression
150
+ Base64.strict_encode64(string)
151
+ end
137
152
 
138
- # calls the class to_binary method with self and the given args
139
- def to_binary(*args)
140
- self.class.to_binary(self, *args)
141
- end
153
+ # calls the class to_binary method with self and the given args
154
+ def to_binary(*args)
155
+ self.class.to_binary(self, *args)
156
+ end
142
157
 
143
- def to_xml(builder, dtype=DEFAULT_DTYPE, compression=DEFAULT_COMPRESSION)
144
- encoded_length =
145
- if @external
146
- 0
147
- else
148
- base64 = self.class.to_binary(self, dtype, compression)
149
- base64.bytesize
150
- end
151
-
152
- builder.binaryDataArray(encodedLength: encoded_length) do |bda_n|
153
- params_to_xml(bda_n)
154
- unless self.external
155
- Mspire::CV::Param[ DTYPE_TO_ACC[dtype] ].to_xml(bda_n)
156
- Mspire::CV::Param[ compression ? 'MS:1000574' : 'MS:1000576' ].to_xml(bda_n)
157
- bda_n.binary(base64)
158
- end
159
- end
158
+ def to_xml(builder, dtype=DEFAULT_DTYPE, compression=DEFAULT_COMPRESSION)
159
+ encoded_length =
160
+ if @external
161
+ 0
162
+ else
163
+ base64 = self.class.to_binary(self, dtype, compression)
164
+ base64.bytesize
160
165
  end
161
166
 
162
- # takes an array of DataArray objects or other kinds of objects
163
- def self.list_xml(arrays, builder)
164
- builder.binaryDataArrayList(count: arrays.size) do |bdal_n|
165
- arrays.zip([:mz, :intensity]) do |data_ar, typ|
166
- ar =
167
- if data_ar.is_a?(Mspire::Mzml::DataArray)
168
- data_ar
169
- else
170
- Mspire::Mzml::DataArray.new(data_ar)
171
- end
172
- ar.type = typ unless ar.type
173
- ar.to_xml(bdal_n)
174
- end
175
- end
167
+ builder.binaryDataArray(encodedLength: encoded_length) do |bda_n|
168
+ super(bda_n)
169
+ unless self.external
170
+ # can significantly speed up the below 2 lines:
171
+ Mspire::CV::Param[ DTYPE_TO_ACC[dtype] ].to_xml(bda_n)
172
+ Mspire::CV::Param[ compression ? 'MS:1000574' : 'MS:1000576' ].to_xml(bda_n)
173
+ bda_n.binary(base64)
176
174
  end
175
+ end
176
+ end
177
177
 
178
+ # takes an array of DataArray objects or other kinds of objects
179
+ def self.list_xml(arrays, builder)
180
+ builder.binaryDataArrayList(count: arrays.size) do |bdal_n|
181
+ arrays.zip([:mz, :intensity]) do |data_ar, typ|
182
+ ar =
183
+ if data_ar.is_a?(Mspire::Mzml::DataArray) then data_ar
184
+ else Mspire::Mzml::DataArray.new(data_ar) end
185
+ ar.type = typ unless ar.type
186
+ ar.to_xml(bdal_n)
187
+ end
178
188
  end
179
189
  end
180
190
  end
@@ -38,21 +38,14 @@ module Mspire
38
38
  end
39
39
  end
40
40
 
41
- # see SpectrumList for generating the entire list
42
- # the opt key :sub_elements can be used to pass in subelements whose
43
- # to_xml methods will be called.
44
- def to_xml(builder, opts={}, &block)
41
+ # returns a hash with id, index, defaultArrayLength and the proper
42
+ # dataProcessing attributes filled out.
43
+ def data_array_xml_atts(default_ids)
45
44
  atts = {id: @id, index: @index, defaultArrayLength: default_array_length}
46
- atts[:dataProcessingRef] = @data_processing.id if @data_processing
47
- atts.merge!(opts)
48
- raise "#{self.class} object must have index at xml writing time!" unless atts[:index]
49
-
50
- builder.spectrum(atts) do |sp_n|
51
- super(sp_n) # params
52
- block.call(sp_n) if block
53
- Mspire::Mzml::DataArray.list_xml(@data_arrays, sp_n) if @data_arrays
45
+ if @data_processing && default_ids[:data_processing] != @data_processing.id
46
+ atts[:dataProcessingRef] = @data_processing.id
54
47
  end
55
- builder
48
+ atts
56
49
  end
57
50
 
58
51
  end
@@ -1,27 +1,41 @@
1
1
  require 'mspire/mzml/list'
2
+ require 'mspire/mzml/processing_method'
2
3
 
3
4
  module Mspire
4
5
  class Mzml
5
6
  class DataProcessing
7
+ extend Mspire::Mzml::List
6
8
 
7
9
  attr_accessor :id, :processing_methods
8
10
 
9
11
  # yields self if given a block
10
- def initialize(id, processing_methods=[], &block)
12
+ def initialize(id, processing_methods=[])
11
13
  @id, @processing_methods = id, processing_methods
12
- block.call(self) if block
14
+ yield(self) if block_given?
13
15
  end
14
16
 
15
17
  def to_xml(builder)
16
18
  builder.dataProcessing( id: @id ) do |dp_n|
17
- processing_methods.each do |proc_method|
18
- proc_method.to_xml(dp_n)
19
+ processing_methods.each_with_index do |processing_method,order|
20
+ processing_method.to_xml(dp_n, order)
19
21
  end
20
22
  end
21
23
  builder
22
24
  end
23
25
 
24
- extend(Mspire::Mzml::List)
26
+ # returns the order of the processing method
27
+ def order(processing_method)
28
+ processing_methods.index(processing_method)
29
+ end
30
+
31
+ def self.from_xml(xml, link)
32
+ processing_methods = xml.children.map do |pm_n|
33
+ ProcessingMethod.new(link[:software_hash][pm_n[:softwareRef]])
34
+ .describe_self_from_xml!(pm_n, link[:ref_hash])
35
+ end
36
+ self.new(xml[:id], processing_methods)
37
+ end
38
+
25
39
  end
26
40
  end
27
41
  end
@@ -5,6 +5,7 @@ require 'mspire/mzml/contact'
5
5
  module Mspire
6
6
  class Mzml
7
7
  class FileDescription
8
+ # note: FileDescription is NOT paramable!
8
9
 
9
10
  # a summary of the different types of spectra, must be present
10
11
  attr_accessor :file_content
@@ -21,14 +22,31 @@ module Mspire
21
22
  attr_accessor :contacts
22
23
 
23
24
  # hands the user the object if given a block
24
- def initialize(file_content=nil, source_files=[], contacts=[], &block)
25
+ def initialize(file_content=nil, source_files=[], contacts=[])
25
26
  @file_content, @source_files, @contacts = file_content, source_files, contacts
26
- block.call(self) if block
27
+ yield(self) if block_given?
27
28
  #raise ArgumentError, "FileDescription must have file_content" unless @file_content
28
29
  end
29
30
 
30
- def self.from_xml(xml)
31
- self.new
31
+ def self.from_xml(xml, link)
32
+ ref_hash = link[:ref_hash]
33
+ file_content_n = xml.child
34
+ obj = self.new( Mspire::Mzml::FileContent.new.describe_self_from_xml!(file_content_n, ref_hash) )
35
+
36
+ return obj unless next_n = file_content_n.next
37
+
38
+ if next_n.name == 'sourceFileList'
39
+ obj.source_files = next_n.children.map do |source_file_n|
40
+ Mspire::Mzml::SourceFile.from_xml(source_file_n, ref_hash)
41
+ end
42
+ return obj unless next_n = next_n.next
43
+ end
44
+
45
+ loop do
46
+ obj.contacts << Mspire::Mzml::Contact.from_xml(contact_n, ref_hash)
47
+ break unless contact_n = contact_n.next
48
+ end
49
+ obj
32
50
  end
33
51
 
34
52
  def to_xml(builder)
@@ -0,0 +1,53 @@
1
+
2
+ module Mspire
3
+ class Mzml
4
+ # the array holds start bytes
5
+ class Index < Array
6
+
7
+ # the name of the index (as a symbol)
8
+ attr_accessor :name
9
+
10
+ # a parallel array of ids (idRef's)
11
+ attr_accessor :ids
12
+
13
+ # @return [Integer] the start byte of the spectrum
14
+ # @param [Object] an Integer (the index number) or String (an id string)
15
+ def start_byte(arg)
16
+ case arg
17
+ when Integer
18
+ self[arg]
19
+ when String
20
+ @id_index ||= create_id_index
21
+ @id_index[arg]
22
+ end
23
+ end
24
+
25
+ # generates a scan to index hash that points from scan number to the
26
+ # spectrum index number. returns the index, nil if the scan ids
27
+ # are not present and spectra are, or false if they are not unique.
28
+ def create_scan_to_index
29
+ scan_re = /scan=(\d+)/
30
+ scan_to_index = {}
31
+ ids.each_with_index do |id, index|
32
+ md = id.match(scan_re)
33
+ scan_num = md[1].to_i if md
34
+ if scan_num
35
+ if scan_to_index.key?(scan_num)
36
+ return false
37
+ else
38
+ scan_to_index[scan_num] = index
39
+ end
40
+ end
41
+ end
42
+ if scan_to_index.size > 0
43
+ scan_to_index
44
+ elsif ids.size > 0
45
+ nil # there are scans, but we did not find scan numbers
46
+ else
47
+ scan_to_index
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
53
+
@@ -1,10 +1,12 @@
1
+ require 'mspire/mzml/index'
2
+
1
3
  module Mspire
2
4
  class Mzml
3
5
  # A simple array of indices but #[] has been overloaded to find an index
4
6
  # by name
5
7
  #
6
8
  # index_list[0] # the first index
7
- # index_list.map(&:names) # -> [:spectrum, :chromatogram]
9
+ # index_list.map(&:name) # -> [:spectrum, :chromatogram]
8
10
  # index_list[:spectrum] # the spectrum index
9
11
  # index_list[:chromatogram] # the chromatogram index
10
12
  class IndexList < Array
@@ -20,12 +22,24 @@ module Mspire
20
22
  self.find {|index| index.name == int_or_symbol }
21
23
  end
22
24
  end
23
- end
24
25
 
25
- # the array holds start bytes
26
- class Index < Array
26
+ def keys
27
+ self.map(&:name)
28
+ end
29
+
30
+ # returns each name and associated index object
31
+ def each_pair(&block)
32
+ block or return enum_for __method__
33
+ each {|index| block.call([index.name, index]) }
34
+ end
27
35
 
28
36
  class << self
37
+
38
+ # either reads in from file or creates an IndexList
39
+ def from_io(io)
40
+ read_index_list(io) || create_index_list(io)
41
+ end
42
+
29
43
  # returns an Integer or nil if not found
30
44
  # does a single jump backwards from the tail of the file looking for
31
45
  # an xml element based on tag. If it is not found, returns nil
@@ -35,64 +49,59 @@ module Mspire
35
49
  md = io.readlines("\n").map {|line| line.match(tag_re) }.compact.shift
36
50
  md[1].to_i if md
37
51
  end
38
- end
39
-
40
- # an index indexed by scan number
41
- attr_accessor :by_scans
42
-
43
- # the name of the index (as a symbol)
44
- attr_accessor :name
45
-
46
- # a parallel array of ids (idRef's)
47
- attr_accessor :ids
48
-
49
- def start_byte_and_id(int)
50
- [self[int], ids[int]]
51
- end
52
-
53
- # returns hash of id to start_byte
54
- def create_id_index
55
- Hash[self.ids.zip(self)]
56
- end
57
52
 
58
- # @return [Integer] the start byte of the spectrum
59
- # @param [Object] an Integer (the index number) or String (an id string)
60
- def start_byte(arg)
61
- case arg
62
- when Integer
63
- self[arg]
64
- when String
65
- @id_index ||= create_id_index
66
- @id_index[arg]
53
+ # @return [Mspire::Mzml::IndexList] or nil if there is no indexList in the
54
+ # mzML
55
+ def read_index_list(io)
56
+ if (offset = index_offset(io))
57
+ io.seek(offset)
58
+ xml = Nokogiri::XML.parse(io.read, nil, @encoding, Parser::NOBLANKS)
59
+ index_list = xml.root
60
+ num_indices = index_list['count'].to_i
61
+ array = index_list.children.map do |index_n|
62
+ #index = Index.new(index_n['name'])
63
+ index = Index.new
64
+ index.name = index_n['name'].to_sym
65
+ ids = []
66
+ index_n.children.map do |offset_n|
67
+ index << offset_n.text.to_i
68
+ ids << offset_n['idRef']
69
+ end
70
+ index.ids = ids
71
+ index
72
+ end
73
+ IndexList.new(array)
74
+ end
67
75
  end
68
- end
69
76
 
70
- # generates a scan to index hash that points from scan number to the
71
- # spectrum index number. returns the index, nil if the scan ids
72
- # are not present and spectra are, or false if they are not unique.
73
- def create_scan_to_index
74
- scan_re = /scan=(\d+)/
75
- scan_to_index = {}
76
- ids.each_with_index do |id, index|
77
- md = id.match(scan_re)
78
- scan_num = md[1].to_i if md
79
- if scan_num
80
- if scan_to_index.key?(scan_num)
81
- return false
82
- else
83
- scan_to_index[scan_num] = index
77
+ # Reads through and captures start bytes
78
+ # @return [Mspire::Mzml::IndexList]
79
+ def create_index_list(io)
80
+ indices_hash = io.bookmark(true) do |inner_io| # sets to beginning of file
81
+ indices = {:spectrum => {}, :chromatogram => {}}
82
+ byte_total = 0
83
+ io.each do |line|
84
+ if md=%r{<(spectrum|chromatogram).*?id=['"](.*?)['"][ >]}.match(line)
85
+ indices[md[1].to_sym][md[2]] = byte_total + md.pre_match.bytesize
86
+ end
87
+ byte_total += line.bytesize
84
88
  end
89
+ indices
85
90
  end
91
+
92
+ indices = indices_hash.map do |sym, hash|
93
+ indices = Index.new ; ids = []
94
+ hash.each {|id, startbyte| ids << id ; indices << startbyte }
95
+ indices.ids = ids ; indices.name = sym
96
+ indices
97
+ end
98
+ # we only return an index if there were some guys there
99
+ indices.delete_if {|ind| ind.size == 0 }
100
+ IndexList.new(indices)
86
101
  end
87
- if scan_to_index.size > 0
88
- by_scans = scan_to_index
89
- elsif ids.size > 0
90
- nil # there are scans, but we did not find scan numbers
91
- else
92
- scan_to_index
93
- end
102
+
94
103
  end
95
104
  end
96
105
  end
97
- end
98
106
 
107
+ end