RubyGems - mspire - Versions diffs - 0.6.7 → 0.6.9 - Mend

mspire 0.6.7 → 0.6.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (59) hide show

data/Rakefile +5 -0
data/VERSION +1 -1
data/lib/cv/param.rb +25 -5
data/lib/cv/referenceable_param_group_ref.rb +13 -0
data/lib/cv.rb +3 -1
data/lib/ms/cv/param.rb +19 -24
data/lib/ms/cv/paramable.rb +42 -0
data/lib/ms/mzml/activation.rb +33 -0
data/lib/ms/mzml/chromatogram.rb +29 -0
data/lib/ms/mzml/chromatogram_list.rb +26 -0
data/lib/ms/mzml/component.rb +21 -0
data/lib/ms/mzml/contact.rb +23 -0
data/lib/ms/mzml/cv.rb +46 -0
data/lib/ms/mzml/data_array.rb +65 -0
data/lib/ms/mzml/data_array_container_like.rb +57 -0
data/lib/ms/mzml/data_processing.rb +27 -0
data/lib/ms/mzml/file_content.rb +21 -0
data/lib/ms/mzml/file_description.rb +47 -0
data/lib/ms/mzml/instrument_configuration.rb +37 -0
data/lib/ms/mzml/isolation_window.rb +21 -0
data/lib/ms/mzml/list.rb +23 -0
data/lib/ms/mzml/precursor.rb +42 -0
data/lib/ms/mzml/processing_method.rb +24 -0
data/lib/ms/mzml/product.rb +22 -0
data/lib/ms/mzml/referenceable_param_group.rb +40 -0
data/lib/ms/mzml/run.rb +54 -0
data/lib/ms/mzml/sample.rb +27 -0
data/lib/ms/mzml/scan.rb +44 -0
data/lib/ms/mzml/scan_list.rb +33 -0
data/lib/ms/mzml/scan_settings.rb +28 -0
data/lib/ms/mzml/selected_ion.rb +18 -0
data/lib/ms/mzml/software.rb +28 -0
data/lib/ms/mzml/source_file.rb +48 -0
data/lib/ms/mzml/spectrum.rb +91 -0
data/lib/ms/mzml/spectrum_list.rb +42 -0
data/lib/ms/mzml.rb +173 -6
data/lib/ms/quant/qspec/protein_group_comparison.rb +3 -3
data/lib/ms/quant/qspec.rb +4 -4
data/lib/ms/spectrum.rb +137 -260
data/lib/ms/spectrum_like.rb +133 -0
data/lib/ms/user_param.rb +43 -0
data/lib/mspire.rb +6 -0
data/obo/ms.obo +670 -121
data/obo/unit.obo +23 -1
data/spec/ms/cv/param_spec.rb +33 -0
data/spec/ms/mzml/cv_spec.rb +17 -0
data/spec/ms/mzml/file_content_spec.rb +25 -0
data/spec/ms/mzml/file_description_spec.rb +34 -0
data/spec/ms/mzml/referenceable_param_group_spec.rb +33 -0
data/spec/ms/mzml_spec.rb +65 -4
data/spec/ms/user_param_spec.rb +51 -0
data/spec/mspire_spec.rb +9 -0
data/spec/testfiles/ms/mzml/mspire_simulated.noidx.check.mzML +81 -0
metadata +57 -21
data/lib/cv/description.rb +0 -19
data/lib/ms/cv/description.rb +0 -44
data/lib/msplat.rb +0 -2
data/spec/ms/cv/description_spec.rb +0 -60
data/spec/msplat_spec.rb +0 -24

data/lib/ms/mzml.rb CHANGED Viewed

@@ -1,10 +1,26 @@
+require 'mspire'
+require 'builder'
 require 'nokogiri'
 require 'io/bookmark'
 require 'zlib'
 require 'ms/mzml/index_list'
 require 'ms/spectrum'
+require 'ms/mzml/file_description'
+require 'ms/mzml/software'
+require 'ms/mzml/scan_list'
+require 'ms/mzml/scan'
+require 'ms/mzml/run'
+require 'ms/mzml/spectrum_list'
+require 'ms/mzml/chromatogram_list'
+require 'ms/mzml/instrument_configuration'
+require 'ms/mzml/data_processing'
+require 'ms/mzml/referenceable_param_group'
+require 'ms/mzml/cv'
+require 'ms/mzml/sample'
 module MS
+  # Reading an mzxml file:
+  #
   #     MS::Mzml.open("somefile.mzML") do |mzml|
   #       mzml.each do |spectrum|
   #         scan = spectrum.scan
@@ -15,24 +31,131 @@ module MS
   #         end
   #       end
   #     end
+  #
+  # Note that the mzml object supports random spectrum access (even if the
+  # mzml was not indexed):
+  #
+  #     mzml[22]  # retrieve spectrum at index 22
+  #
+  # Writing an mzml file from scratch:
+  #
+  #     spec1 = MS::Mzml::Spectrum.new('scan=1', params: ['MS:1000127', ['MS:1000511', 1]]) do |spec|
+  #       spec.data_arrays = [[1,2,3], [4,5,6]]
+  #       spec.scan_list = MS::Mzml::ScanList.new do |sl|
+  #         scan = MS::Mzml::Scan.new do |scan|
+  #           # retention time of 40 seconds
+  #           scan.describe! ['MS:1000016', 40.0, 'UO:0000010']
+  #         end
+  #         sl << scan
+  #       end
+  #     end
+  #
+  #     mzml = MS::Mzml.new do |mzml|
+  #       mzml.id = 'the_little_example'
+  #       mzml.cvs = MS::Mzml::CV::DEFAULT_CVS
+  #       mzml.file_description = MS::Mzml::FileDescription.new  do |fd|
+  #         fd.file_content = MS::Mzml::FileContent.new
+  #         fd.source_files << MS::Mzml::SourceFile.new
+  #       end
+  #       default_instrument_config = MS::Mzml::InstrumentConfiguration.new("IC",[], params: ['MS:1000031'])
+  #       mzml.instrument_configurations << default_instrument_config
+  #       software = MS::Mzml::Software.new
+  #       mzml.software_list << software
+  #       default_data_processing = MS::Mzml::DataProcessing.new("did_nothing")
+  #       mzml.data_processing_list << default_data_processing
+  #       mzml.run = MS::Mzml::Run.new("little_run", default_instrument_config) do |run|
+  #         spectrum_list = MS::Mzml::SpectrumList.new(default_data_processing)
+  #         spectrum_list.push(spec1)
+  #         run.spectrum_list = spectrum_list
+  #       end
+  #     end
   class Mzml
+    module Default
+      NAMESPACE = {
+        :xmlns => "http://psi.hupo.org/ms/mzml",
+        "xmlns:xsi" => "http://www.w3.org/2001/XMLSchema-instance",
+        "xmlns:xsd" => "http://www.w3.org/2001/XMLSchema",
+      }
+      VERSION = '1.1.0'
+    end
+    ###############################################
+    # ATTRIBUTES
+    ###############################################
+    # (optional) an id for accessing from external files
+    attr_accessor :id
+    # (required) the Mzml document version
+    attr_accessor :version
+    # (optional) e.g. a PRIDE accession number
+    attr_accessor :accession
+    ###############################################
+    # SUBELEMENTS
+    ###############################################
+    # (required) an array of MS::Mzml::CV objects
+    attr_accessor :cvs
+    # (required) an MS::Mzml::FileDescription
+    attr_accessor :file_description
+    # (optional) an array of CV::ReferenceableParamGroup objects
+    attr_accessor :referenceable_param_groups
+    # (optional) an array of MS::Mzml::Sample objects
+    attr_accessor :samples
+    # (required) an array of MS::Mzml::Software objects
+    attr_accessor :software_list
+    # (optional) an array of MS::Mzml::ScanSettings objects
+    attr_accessor :scan_settings_list
+    # (required) an array of MS::Mzml::InstrumentConfiguration objects
+    attr_accessor :instrument_configurations
+    # (required) an array of MS::Mzml::DataProcessing objects
+    attr_accessor :data_processing_list
+    # (required) an MS::Mzml::Run object
+    attr_accessor :run
     module Parser
       NOBLANKS = ::Nokogiri::XML::ParseOptions::DEFAULT_XML | ::Nokogiri::XML::ParseOptions::NOBLANKS
     end
     include Enumerable
-    attr_accessor :filename
     attr_accessor :io
     attr_accessor :index_list
     attr_accessor :encoding
+    # arg must be an IO object for automatic index and header parsing to
+    # occur.  If arg is a hash, then attributes are set.  In addition (or
+    # alternatively) a block called that yields self to setup the object.
+    #
     # io must respond_to?(:size), giving the size of the io object in bytes
-    # which allows seeking.  #get_index_list is called to get or create the
+    # which allows seeking.  get_index_list is called to get or create the
     # index list.
-    def initialize(io)
-      @io = io
-      @encoding = @io.bookmark(true) {|io| io.readline.match(/encoding=["'](.*?)["']/)[1] }
-      @index_list = get_index_list
+    def initialize(arg=nil, &block)
+      %w(cvs software_list instrument_configurations data_processing_list).each {|guy| self.send( guy + '=', [] ) }
+      case arg
+      when IO
+        @io = arg
+        @encoding = @io.bookmark(true) {|io| io.readline.match(/encoding=["'](.*?)["']/)[1] }
+        @index_list = get_index_list
+        # TODO: and read in 'header' info (everything until 'run'
+      when Hash
+        arg.each {|k,v| self.send("#{k}=", v) }
+      end
+      if block
+        block.call(self)
+      end
     end
     class << self
@@ -191,6 +314,50 @@ module MS
       read_index_list || create_index_list
     end
+    # Because mzml files are often very large, we try to avoid storing the
+    # entire object tree in memory before writing.
+    #
+    # takes a filename and uses builder to write to it
+    # if no filename is given, returns a string
+    def to_xml(filename=nil)
+      # TODO: support indexed mzml files
+      io = filename ? File.open(filename, 'w') : StringIO.new
+      xml = Builder::XmlMarkup.new(:target => io, :indent => 2)
+      xml.instruct!
+      mzml_atts = Default::NAMESPACE.dup
+      mzml_atts[:version] = @version || Default::VERSION
+      mzml_atts[:accession] = @accession if @accession
+      mzml_atts[:id] = @id if @id
+      xml.mzML(mzml_atts) do |mzml_n|
+        # the 'if' statements capture whether or not the list is required or not
+        raise "#{self.class}#cvs must have > 0 MS::Mzml::CV objects" unless @cvs.size > 0
+        MS::Mzml::CV.list_xml(@cvs, mzml_n)
+        @file_description.to_xml(mzml_n)
+        if @referenceable_param_groups
+          MS::Mzml::ReferenceableParamGroup.list_xml(@referenceable_param_groups, mzml_n)
+        end
+        if @samples
+          MS::Mzml::Sample.list_xml(@samples, mzml_n)
+        end
+        MS::Mzml::Software.list_xml(@software_list, mzml_n)
+        if @scan_settings_list && @scan_settings_list.size > 0
+          MS::Mzml::ScanSettings.list_xml(@scan_settings_list, mzml_n)
+        end
+        icl = MS::Mzml::InstrumentConfiguration.list_xml(@instrument_configurations, mzml_n)
+        MS::Mzml::DataProcessing.list_xml(@data_processing_list, mzml_n)
+        @run.to_xml(mzml_n)
+      end
+      if filename
+        io.close
+        self
+      else
+        io.string
+      end
+    end
     class ScanNumbersNotUnique < Exception
     end
     class ScanNumbersNotFound < Exception

data/lib/ms/quant/qspec/protein_group_comparison.rb CHANGED Viewed

@@ -1,14 +1,14 @@
 require 'ms/quant/protein_group_comparison'
-module Ms
+module MS
   module Quant
     module ProteinGroupComparison
     end
   end
 end
-class Ms::Quant::ProteinGroupComparison::Qspec
-  include Ms::Quant::ProteinGroupComparison
+class MS::Quant::ProteinGroupComparison::Qspec
+  include MS::Quant::ProteinGroupComparison
   attr_accessor :qspec_results_struct

data/lib/ms/quant/qspec.rb CHANGED Viewed

@@ -1,7 +1,7 @@
-module Ms ; end
-module Ms::Quant ; end
+module MS ; end
+module MS::Quant ; end
-class Ms::Quant::Qspec
+class MS::Quant::Qspec
   # personal communication with Hyungwon Choi: "We typically use nburn=2000,
   # niter=10000, which is quite sufficient to guarantee the reproducibility of
@@ -62,7 +62,7 @@ class Ms::Quant::Qspec
   # writes a qspec formatted file to filename
   def write(filename)
-    ints = Ms::Quant::Qspec.conditions_to_ints(conditions)
+    ints = MS::Quant::Qspec.conditions_to_ints(conditions)
     header_cats = INIT_HEADER + ints
     rows = @protname_length_pairs.map {|pair| pair.map.to_a }
     @condition_to_count_array.each do |cond,counts|

data/lib/ms/spectrum.rb CHANGED Viewed

@@ -1,3 +1,4 @@
+require 'ms/spectrum_like'
 require 'bsearch'
 require 'bin'
 require 'ms/peak'
@@ -6,7 +7,7 @@ module MS
   # note that a point is an [m/z, intensity] doublet.
   # A peak is considered a related string of points
   class Spectrum
-    include Enumerable
+    include MS::SpectrumLike
     DEFAULT_MERGE = {
       :bin_width => 5,
@@ -16,284 +17,160 @@ module MS
       :split => :share
     }
-    # returns a new spectrum which has been merged with the others.  If the
-    # spectra are centroided (just checks the first one and assumes the others
-    # are the same) then it will bin the points (bin width determined by
-    # opts[:resolution]) and then segment according to monotonicity (sharing
-    # intensity between abutting points).  The  final m/z is the weighted
-    # averaged of all the m/z's in each peak.  Valid opts (with default listed
-    # first):
-    #
-    #     :bin_width => 5
-    #     :bin_unit => :ppm | :amu        interpret bin_width as ppm or amu
-    #     :bins => array of Bin objects   for custom bins (overides other bin options)
-    #     :normalize => false             if true, divides total intensity by
-    #                                     number of spectra
-    #     :return_data => false           returns a parallel array containing
-    #                                     the peaks associated with each returned point
-    #     :split => :share | :greedy_y    see MS::Peak#split
-    #
-    # The binning algorithm is the fastest possible algorithm that would allow
-    # for arbitrary, non-constant bin widths (a ratcheting algorithm O(n + m))
-    def self.merge(spectra, opts={})
-      opt = DEFAULT_MERGE.merge(opts)
-      (spectrum, returned_data) =
-        if spectra.first.centroided?
-          # find the min and max across all spectra
-          first_mzs = spectra.first.mzs
-          min = first_mzs.first ; max = first_mzs.last
-          spectra.each do |spectrum|
-            mzs = spectrum.mzs
-            min = mzs.first if mzs.first < min
-            max = mzs.last if mzs.last > max
-          end
-          # Create Bin objects
-          bins =
-            if opt[:bins]
-              opt[:bins]
-            else
-              divisions = []
-              bin_width = opt[:bin_width]
-              use_ppm = (opt[:bin_unit] == :ppm)
-              current_mz = min
-              loop do
-                if current_mz >= max
-                  divisions << max
-                  break
-                else
-                  divisions << current_mz
-                  current_mz += ( use_ppm ? current_mz./(1e6).*(bin_width) : bin_width )
-                end
-              end
-              # make each bin exclusive so there is no overlap
-              bins = divisions.each_cons(2).map {|pair| Bin.new(*pair, true) }
-              # make the last bin *inclusive* of the terminating value
-              bins[-1] = Bin.new(bins.last.begin, bins.last.end)
-              bins
-            end
-          spectra.each do |spectrum|
-            Bin.bin(bins, spectrum.points, &:first)
-          end
-          pseudo_points = bins.map do |bin|
-            #int = bin.data.reduce(0.0) {|sum,point| sum + point.last }.round(3)   # <- just for info:
-            [bin, bin.data.reduce(0.0) {|sum,point| sum + point.last }]
-          end
+    class << self
-          #p_mzs = []
-          #p_ints = []
-          #p_num_points = []
-          #pseudo_points.each do |psp|
-          #  p_mzs << ((psp.first.begin + psp.first.end)/2)
-          #  p_ints << psp.last
-          #  p_num_points <<  psp.first.data.size
-          #end
+      def from_points(ar_of_doublets)
+        _mzs = []
+        _ints = []
+        ar_of_doublets.each do |mz, int|
+          _mzs << mz
+          _ints << int
+        end
+        self.new([_mzs, _ints])
+      end
-          #File.write("file_#{opt[:bin_width]}_to_plot.txt", [p_mzs, p_ints, p_num_points].map {|ar| ar.join(' ') }.join("\n"))
-          #abort 'here'
+      # returns a new spectrum which has been merged with the others.  If the
+      # spectra are centroided (just checks the first one and assumes the others
+      # are the same) then it will bin the points (bin width determined by
+      # opts[:resolution]) and then segment according to monotonicity (sharing
+      # intensity between abutting points).  The  final m/z is the weighted
+      # averaged of all the m/z's in each peak.  Valid opts (with default listed
+      # first):
+      #
+      #     :bin_width => 5
+      #     :bin_unit => :ppm | :amu        interpret bin_width as ppm or amu
+      #     :bins => array of Bin objects   for custom bins (overides other bin options)
+      #     :normalize => false             if true, divides total intensity by
+      #                                     number of spectra
+      #     :return_data => false           returns a parallel array containing
+      #                                     the peaks associated with each returned point
+      #     :split => :share | :greedy_y    see MS::Peak#split
+      #
+      # The binning algorithm is the fastest possible algorithm that would allow
+      # for arbitrary, non-constant bin widths (a ratcheting algorithm O(n + m))
+      def merge(spectra, opts={})
+        opt = DEFAULT_MERGE.merge(opts)
+        (spectrum, returned_data) =
+          if spectra.first.centroided?
+            # find the min and max across all spectra
+            first_mzs = spectra.first.mzs
+            min = first_mzs.first ; max = first_mzs.last
+            spectra.each do |spectrum|
+              mzs = spectrum.mzs
+              min = mzs.first if mzs.first < min
+              max = mzs.last if mzs.last > max
+            end
-          peaks = MS::Peak.new(pseudo_points).split(opt[:split])
+            # Create Bin objects
+            bins =
+              if opt[:bins]
+                opt[:bins]
+              else
+                divisions = []
+                bin_width = opt[:bin_width]
+                use_ppm = (opt[:bin_unit] == :ppm)
+                current_mz = min
+                loop do
+                  if current_mz >= max
+                    divisions << max
+                    break
+                  else
+                    divisions << current_mz
+                    current_mz += ( use_ppm ? current_mz./(1e6).*(bin_width) : bin_width )
+                  end
+                end
+                # make each bin exclusive so there is no overlap
+                bins = divisions.each_cons(2).map {|pair| Bin.new(*pair, true) }
+                # make the last bin *inclusive* of the terminating value
+                bins[-1] = Bin.new(bins.last.begin, bins.last.end)
+                bins
+              end
-          return_data = []
-          _mzs = [] ; _ints = []
+            spectra.each do |spectrum|
+              Bin.bin(bins, spectrum.points, &:first)
+            end
-          #p peaks[97]
-          #puts "HIYA"
-          #abort 'here'
+            pseudo_points = bins.map do |bin|
+              #int = bin.data.reduce(0.0) {|sum,point| sum + point.last }.round(3)   # <- just for info:
+              [bin, bin.data.reduce(0.0) {|sum,point| sum + point.last }]
+            end
-          peaks.each_with_index do |peak,i|
-          #peaks.each do |peak|
-            tot_intensity = peak.map(&:last).reduce(:+)
-            return_data_per_peak = [] if opt[:return_data]
-            weighted_mz = 0.0
-            peak.each do |point|
-              pre_scaled_intensity = point[0].data.reduce(0.0) {|sum,v| sum + v.last }
-              post_scaled_intensity = point[1]
-              # some peaks may have been shared.  In this case the intensity
-              # for that peak was downweighted.  However, the actually data
-              # composing that peak is not altered when the intensity is
-              # shared.  So, to calculate a proper weighted avg we need to
-              # downweight the intensity of any data point found within a bin
-              # whose intensity was scaled.
-              correction_factor =
-                if pre_scaled_intensity != post_scaled_intensity
-                  post_scaled_intensity / pre_scaled_intensity
-                else
-                  1.0
+            #p_mzs = []
+            #p_ints = []
+            #p_num_points = []
+            #pseudo_points.each do |psp|
+            #  p_mzs << ((psp.first.begin + psp.first.end)/2)
+            #  p_ints << psp.last
+            #  p_num_points <<  psp.first.data.size
+            #end
+            #File.write("file_#{opt[:bin_width]}_to_plot.txt", [p_mzs, p_ints, p_num_points].map {|ar| ar.join(' ') }.join("\n"))
+            #abort 'here'
+            peaks = MS::Peak.new(pseudo_points).split(opt[:split])
+            return_data = []
+            _mzs = [] ; _ints = []
+            #p peaks[97]
+            #puts "HIYA"
+            #abort 'here'
+            peaks.each_with_index do |peak,i|
+              #peaks.each do |peak|
+              tot_intensity = peak.map(&:last).reduce(:+)
+              return_data_per_peak = [] if opt[:return_data]
+              weighted_mz = 0.0
+              peak.each do |point|
+                pre_scaled_intensity = point[0].data.reduce(0.0) {|sum,v| sum + v.last }
+                post_scaled_intensity = point[1]
+                # some peaks may have been shared.  In this case the intensity
+                # for that peak was downweighted.  However, the actually data
+                # composing that peak is not altered when the intensity is
+                # shared.  So, to calculate a proper weighted avg we need to
+                # downweight the intensity of any data point found within a bin
+                # whose intensity was scaled.
+                correction_factor =
+                  if pre_scaled_intensity != post_scaled_intensity
+                    post_scaled_intensity / pre_scaled_intensity
+                  else
+                    1.0
+                  end
+                return_data_per_peak.push(*point[0].data) if opt[:return_data]
+                point[0].data.each do |lil_point|
+                  weighted_mz += lil_point[0] * ( (lil_point[1].to_f * correction_factor) / tot_intensity)
                 end
-              return_data_per_peak.push(*point[0].data) if opt[:return_data]
-              point[0].data.each do |lil_point|
-                weighted_mz += lil_point[0] * ( (lil_point[1].to_f * correction_factor) / tot_intensity)
               end
+              return_data << return_data_per_peak if opt[:return_data]
+              _mzs << weighted_mz
+              _ints << tot_intensity
             end
-            return_data << return_data_per_peak if opt[:return_data]
-            _mzs << weighted_mz
-            _ints << tot_intensity
+            [Spectrum.new([_mzs, _ints]), return_data]
+          else
+            raise NotImplementedError, "the way to do this is interpolate the profile evenly and sum"
           end
-          [Spectrum.new([_mzs, _ints]), return_data]
-        else
-          raise NotImplementedError, "the way to do this is interpolate the profile evenly and sum"
-        end
-      if opt[:normalize]
-        sz = spectra.size
-        spectrum.data[1].map! {|v| v.to_f / sz }
-      end
-      if opt[:return_data]
-        $stderr.puts "returning spectrum (#{spectrum.mzs.size}) and data" if $VERBOSE
-        [spectrum, return_data]
-      else
-        $stderr.puts "returning spectrum (#{spectrum.mzs.size})" if $VERBOSE
-        spectrum
-      end
-    end
-    # boolean for if the spectrum represents centroided data or not
-    attr_accessor :centroided
-    def centroided?() centroided end
-    # The underlying data store. methods are implemented so that data[0] is
-    # the m/z's and data[1] is intensities
-    attr_reader :data
-    # data takes an array: [mzs, intensities]
-    # @return [MS::Spectrum]
-    # @param [Array] data two element array of mzs and intensities
-    def initialize(data, centroided=true)
-      @data = data
-      @centroided = centroided
-    end
-    def self.from_points(ar_of_doublets)
-      _mzs = []
-      _ints = []
-      ar_of_doublets.each do |mz, int|
-        _mzs << mz
-        _ints << int
-      end
-      self.new([_mzs, _ints])
-    end
-    # found by querying the size of the data store.  This should almost always
-    # be 2 (m/z and intensities)
-    def size
-      @data.size
-    end
-    def ==(other)
-      mzs == other.mzs && intensities == other.intensities
-    end
-    # An array of the mz data.
-    def mzs
-      @data[0]
-    end
-    # An array of the intensities data, corresponding to mzs.
-    def intensities
-      @data[1]
-    end
-    def mzs_and_intensities
-      [@data[0], @data[1]]
-    end
-    # retrieve an m/z and intensity doublet at that index
-    def [](array_index)
-      [@data[0][array_index], @data[1][array_index]]
-    end
-    # yields(mz, inten) across the spectrum, or array of doublets if no block
-    def points(&block)
-      @data[0].zip(@data[1], &block)
-    end
-    alias_method :each, :points
-    alias_method :each_point, :points
-    # if the mzs and intensities are the same then the spectra are considered
-    # equal
-    def ==(other)
-      mzs == other.mzs && intensities == other.intensities
-    end
-    # returns a new spectrum whose intensities have been normalized by the tic
-    # of another given value
-    def normalize(norm_by=:tic)
-      norm_by = tic if norm_by == :tic
-      MS::Spectrum.new([self.mzs, self.intensities.map {|v| v / norm_by }])
-    end
-    def tic
-      self.intensities.reduce(:+)
-    end
-    # ensures that the m/z values are monotonically ascending (some
-    # instruments are bad about this)
-    # returns self
-    def sort!
-      _points = points.to_a
-      _points.sort!
-      _points.each_with_index {|(mz,int), i| @data[0][i] = mz ; @data[1][i] = int }
-      self
-    end
-    # returns the m/z that is closest to the value, favoring the lower m/z in
-    # the case of a tie. Uses a binary search.
-    def find_nearest(val)
-      mzs[find_nearest_index(val)]
-    end
-    # same as find_nearest but returns the index of the point
-    def find_nearest_index(val)
-      find_all_nearest_index(val).first
-    end
-    def find_all_nearest_index(val)
-      _mzs = mzs
-      index = _mzs.bsearch_lower_boundary {|v| v <=> val }
-      if index == _mzs.size
-        [_mzs.size-1]
-      else
-        # if the previous m/z diff is smaller, use it
-        if index == 0
-          [index]
+        if opt[:normalize]
+          sz = spectra.size
+          spectrum.data[1].map! {|v| v.to_f / sz }
+        end
+        if opt[:return_data]
+          $stderr.puts "returning spectrum (#{spectrum.mzs.size}) and data" if $VERBOSE
+          [spectrum, return_data]
         else
-          case (val - _mzs[index-1]).abs <=> (_mzs[index] - val).abs
-          when -1
-            [index-1]
-          when 0
-            [index-1, index]
-          when 1
-            [index]
-          end
+          $stderr.puts "returning spectrum (#{spectrum.mzs.size})" if $VERBOSE
+          spectrum
         end
       end
-    end
-    def find_all_nearest(val)
-      find_all_nearest_index(val).map {|i| mzs[i] }
-    end
-    # uses MS::Spectrum.merge
-    def merge(other_spectra, opts={})
-      MS::Spectrum.merge([self, *other_spectra], opts)
     end
   end
 end