RubyGems - mspire - Versions diffs - 0.4.9 → 0.5.0 - Mend

mspire 0.4.9 → 0.5.0

Files changed (255) hide show

data/README +27 -17
data/changelog.txt +31 -62
data/lib/ms/calc.rb +32 -0
data/lib/ms/data/interleaved.rb +60 -0
data/lib/ms/data/lazy_io.rb +73 -0
data/lib/ms/data/lazy_string.rb +15 -0
data/lib/ms/data/simple.rb +59 -0
data/lib/ms/data/transposed.rb +41 -0
data/lib/ms/data.rb +57 -0
data/lib/ms/format/format_error.rb +12 -0
data/lib/ms/spectrum.rb +25 -384
data/lib/ms/support/binary_search.rb +126 -0
data/lib/ms.rb +10 -10
metadata +38 -350
data/INSTALL +0 -58
data/README.rdoc +0 -18
data/Rakefile +0 -330
data/bin/aafreqs.rb +0 -23
data/bin/bioworks2excel.rb +0 -14
data/bin/bioworks_to_pepxml.rb +0 -148
data/bin/bioworks_to_pepxml_gui.rb +0 -225
data/bin/fasta_shaker.rb +0 -5
data/bin/filter_and_validate.rb +0 -5
data/bin/gi2annot.rb +0 -14
data/bin/id_class_anal.rb +0 -112
data/bin/id_precision.rb +0 -172
data/bin/ms_to_lmat.rb +0 -67
data/bin/pepproph_filter.rb +0 -16
data/bin/prob_validate.rb +0 -6
data/bin/protein_summary.rb +0 -6
data/bin/protxml2prots_peps.rb +0 -32
data/bin/raw_to_mzXML.rb +0 -55
data/bin/run_percolator.rb +0 -122
data/bin/sqt_group.rb +0 -26
data/bin/srf_group.rb +0 -27
data/bin/srf_to_sqt.rb +0 -40
data/lib/align/chams.rb +0 -78
data/lib/align.rb +0 -154
data/lib/archive/targz.rb +0 -94
data/lib/bsearch.rb +0 -120
data/lib/core_extensions.rb +0 -16
data/lib/fasta.rb +0 -626
data/lib/gi.rb +0 -124
data/lib/group_by.rb +0 -10
data/lib/index_by.rb +0 -11
data/lib/merge_deep.rb +0 -21
data/lib/ms/converter/mzxml.rb +0 -77
data/lib/ms/gradient_program.rb +0 -170
data/lib/ms/msrun.rb +0 -244
data/lib/ms/msrun_index.rb +0 -108
data/lib/ms/parser/mzdata/axml.rb +0 -67
data/lib/ms/parser/mzdata/dom.rb +0 -175
data/lib/ms/parser/mzdata/libxml.rb +0 -7
data/lib/ms/parser/mzdata.rb +0 -31
data/lib/ms/parser/mzxml/axml.rb +0 -70
data/lib/ms/parser/mzxml/dom.rb +0 -182
data/lib/ms/parser/mzxml/hpricot.rb +0 -253
data/lib/ms/parser/mzxml/libxml.rb +0 -19
data/lib/ms/parser/mzxml/regexp.rb +0 -122
data/lib/ms/parser/mzxml/rexml.rb +0 -72
data/lib/ms/parser/mzxml/xmlparser.rb +0 -248
data/lib/ms/parser/mzxml.rb +0 -282
data/lib/ms/parser.rb +0 -108
data/lib/ms/precursor.rb +0 -25
data/lib/ms/scan.rb +0 -81
data/lib/mspire.rb +0 -4
data/lib/pi_zero.rb +0 -244
data/lib/qvalue.rb +0 -161
data/lib/roc.rb +0 -187
data/lib/sample_enzyme.rb +0 -160
data/lib/scan_i.rb +0 -21
data/lib/spec_id/aa_freqs.rb +0 -170
data/lib/spec_id/bioworks.rb +0 -497
data/lib/spec_id/digestor.rb +0 -138
data/lib/spec_id/mass.rb +0 -179
data/lib/spec_id/parser/proph.rb +0 -335
data/lib/spec_id/precision/filter/cmdline.rb +0 -218
data/lib/spec_id/precision/filter/interactive.rb +0 -134
data/lib/spec_id/precision/filter/output.rb +0 -148
data/lib/spec_id/precision/filter.rb +0 -637
data/lib/spec_id/precision/output.rb +0 -60
data/lib/spec_id/precision/prob/cmdline.rb +0 -160
data/lib/spec_id/precision/prob/output.rb +0 -94
data/lib/spec_id/precision/prob.rb +0 -249
data/lib/spec_id/proph/pep_summary.rb +0 -104
data/lib/spec_id/proph/prot_summary.rb +0 -484
data/lib/spec_id/proph.rb +0 -4
data/lib/spec_id/protein_summary.rb +0 -489
data/lib/spec_id/sequest/params.rb +0 -316
data/lib/spec_id/sequest/pepxml.rb +0 -1458
data/lib/spec_id/sequest.rb +0 -33
data/lib/spec_id/sqt.rb +0 -349
data/lib/spec_id/srf.rb +0 -973
data/lib/spec_id.rb +0 -778
data/lib/spec_id_xml.rb +0 -99
data/lib/transmem/phobius.rb +0 -147
data/lib/transmem/toppred.rb +0 -368
data/lib/transmem.rb +0 -157
data/lib/validator/aa.rb +0 -48
data/lib/validator/aa_est.rb +0 -112
data/lib/validator/background.rb +0 -77
data/lib/validator/bias.rb +0 -95
data/lib/validator/cmdline.rb +0 -431
data/lib/validator/decoy.rb +0 -107
data/lib/validator/digestion_based.rb +0 -70
data/lib/validator/probability.rb +0 -51
data/lib/validator/prot_from_pep.rb +0 -234
data/lib/validator/q_value.rb +0 -32
data/lib/validator/transmem.rb +0 -272
data/lib/validator/true_pos.rb +0 -46
data/lib/validator.rb +0 -197
data/lib/xml.rb +0 -38
data/lib/xml_style_parser.rb +0 -119
data/lib/xmlparser_wrapper.rb +0 -19
data/release_notes.txt +0 -2
data/script/compile_and_plot_smriti_final.rb +0 -97
data/script/create_little_pepxml.rb +0 -61
data/script/degenerate_peptides.rb +0 -47
data/script/estimate_fpr_by_cysteine.rb +0 -226
data/script/extract_gradient_programs.rb +0 -56
data/script/find_cysteine_background.rb +0 -137
data/script/genuine_tps_and_probs.rb +0 -136
data/script/get_apex_values_rexml.rb +0 -44
data/script/histogram_probs.rb +0 -61
data/script/mascot_fix_pepxml.rb +0 -123
data/script/msvis.rb +0 -42
data/script/mzXML2timeIndex.rb +0 -25
data/script/peps_per_bin.rb +0 -67
data/script/prep_dir.rb +0 -121
data/script/simple_protein_digestion.rb +0 -27
data/script/smriti_final_analysis.rb +0 -103
data/script/sqt_to_meta.rb +0 -24
data/script/top_hit_per_scan.rb +0 -67
data/script/toppred_to_yaml.rb +0 -47
data/script/tpp_installer.rb +0 -249
data/specs/align_spec.rb +0 -79
data/specs/bin/bioworks_to_pepxml_spec.rb +0 -79
data/specs/bin/fasta_shaker_spec.rb +0 -259
data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +0 -199
data/specs/bin/filter_and_validate_spec.rb +0 -180
data/specs/bin/ms_to_lmat_spec.rb +0 -34
data/specs/bin/prob_validate_spec.rb +0 -86
data/specs/bin/protein_summary_spec.rb +0 -14
data/specs/fasta_spec.rb +0 -354
data/specs/gi_spec.rb +0 -22
data/specs/load_bin_path.rb +0 -7
data/specs/merge_deep_spec.rb +0 -13
data/specs/ms/gradient_program_spec.rb +0 -77
data/specs/ms/msrun_spec.rb +0 -498
data/specs/ms/parser_spec.rb +0 -92
data/specs/ms/spectrum_spec.rb +0 -87
data/specs/pi_zero_spec.rb +0 -115
data/specs/qvalue_spec.rb +0 -39
data/specs/roc_spec.rb +0 -251
data/specs/rspec_autotest.rb +0 -149
data/specs/sample_enzyme_spec.rb +0 -126
data/specs/spec_helper.rb +0 -135
data/specs/spec_id/aa_freqs_spec.rb +0 -52
data/specs/spec_id/bioworks_spec.rb +0 -148
data/specs/spec_id/digestor_spec.rb +0 -75
data/specs/spec_id/precision/filter/cmdline_spec.rb +0 -20
data/specs/spec_id/precision/filter/output_spec.rb +0 -31
data/specs/spec_id/precision/filter_spec.rb +0 -246
data/specs/spec_id/precision/prob_spec.rb +0 -44
data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
data/specs/spec_id/proph/pep_summary_spec.rb +0 -98
data/specs/spec_id/proph/prot_summary_spec.rb +0 -128
data/specs/spec_id/protein_summary_spec.rb +0 -189
data/specs/spec_id/sequest/params_spec.rb +0 -68
data/specs/spec_id/sequest/pepxml_spec.rb +0 -374
data/specs/spec_id/sequest_spec.rb +0 -38
data/specs/spec_id/sqt_spec.rb +0 -246
data/specs/spec_id/srf_spec.rb +0 -172
data/specs/spec_id/srf_spec_helper.rb +0 -139
data/specs/spec_id_helper.rb +0 -33
data/specs/spec_id_spec.rb +0 -366
data/specs/spec_id_xml_spec.rb +0 -33
data/specs/transmem/phobius_spec.rb +0 -425
data/specs/transmem/toppred_spec.rb +0 -298
data/specs/transmem_spec.rb +0 -60
data/specs/transmem_spec_shared.rb +0 -64
data/specs/validator/aa_est_spec.rb +0 -66
data/specs/validator/aa_spec.rb +0 -40
data/specs/validator/background_spec.rb +0 -67
data/specs/validator/bias_spec.rb +0 -122
data/specs/validator/decoy_spec.rb +0 -51
data/specs/validator/fasta_helper.rb +0 -26
data/specs/validator/prot_from_pep_spec.rb +0 -141
data/specs/validator/transmem_spec.rb +0 -146
data/specs/validator/true_pos_spec.rb +0 -58
data/specs/validator_helper.rb +0 -33
data/specs/xml_spec.rb +0 -12
data/test_files/000_pepxml18_small.xml +0 -206
data/test_files/020a.mzXML.timeIndex +0 -4710
data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +0 -3973
data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +0 -3872
data/test_files/4-03-03_small-prot.xml +0 -321
data/test_files/4-03-03_small.xml +0 -3876
data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
data/test_files/bioworks-3.3_10prots.xml +0 -5999
data/test_files/bioworks31.params +0 -77
data/test_files/bioworks32.params +0 -62
data/test_files/bioworks33.params +0 -63
data/test_files/bioworks_single_run_small.xml +0 -7237
data/test_files/bioworks_small.fasta +0 -212
data/test_files/bioworks_small.params +0 -63
data/test_files/bioworks_small.phobius +0 -109
data/test_files/bioworks_small.toppred.out +0 -2847
data/test_files/bioworks_small.xml +0 -5610
data/test_files/bioworks_with_INV_small.xml +0 -3753
data/test_files/bioworks_with_SHUFF_small.xml +0 -2503
data/test_files/corrupted_900.srf +0 -0
data/test_files/head_of_7MIX.srf +0 -0
data/test_files/interact-opd1_mods_small-prot.xml +0 -304
data/test_files/messups.fasta +0 -297
data/test_files/opd1/000.my_answer.100lines.xml +0 -101
data/test_files/opd1/000.tpp_1.2.3.first10.xml +0 -115
data/test_files/opd1/000.tpp_2.9.2.first10.xml +0 -126
data/test_files/opd1/000.v2.1.mzXML.timeIndex +0 -3748
data/test_files/opd1/000_020-prot.png +0 -0
data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +0 -62
data/test_files/opd1/000_020_3prots-prot.xml +0 -62
data/test_files/opd1/opd1_cat_inv_small-prot.xml +0 -139
data/test_files/opd1/sequest.3.1.params +0 -77
data/test_files/opd1/sequest.3.2.params +0 -62
data/test_files/opd1/twenty_scans.mzXML +0 -418
data/test_files/opd1/twenty_scans.v2.1.mzXML +0 -382
data/test_files/opd1/twenty_scans_answ.lmat +0 -0
data/test_files/opd1/twenty_scans_answ.lmata +0 -9
data/test_files/opd1_020_beginning.RAW +0 -0
data/test_files/opd1_2runs_2mods/data/020.mzData.xml +0 -683
data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +0 -382
data/test_files/opd1_2runs_2mods/data/040.mzData.xml +0 -683
data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +0 -382
data/test_files/opd1_2runs_2mods/data/README.txt +0 -6
data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +0 -753
data/test_files/orbitrap_mzData/000_cut.xml +0 -1920
data/test_files/pepproph_small.xml +0 -4691
data/test_files/phobius.small.noheader.txt +0 -50
data/test_files/phobius.small.small.txt +0 -53
data/test_files/s01_anC1_ld020mM.key.txt +0 -25
data/test_files/s01_anC1_ld020mM.meth +0 -0
data/test_files/small.fasta +0 -297
data/test_files/small.sqt +0 -87
data/test_files/smallraw.RAW +0 -0
data/test_files/tf_bioworks2excel.bioXML +0 -14340
data/test_files/tf_bioworks2excel.txt.actual +0 -1035
data/test_files/toppred.small.out +0 -416
data/test_files/toppred.xml.out +0 -318
data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +0 -7
data/test_files/validator_hits_separate/bioworks_small_HS.xml +0 -5651
data/test_files/yeast_gly_small-prot.xml +0 -265
data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +0 -6
data/test_files/yeast_gly_small.xml +0 -3807
data/test_files/yeast_gly_small2.parentTimes +0 -6

data/lib/ms/msrun_index.rb DELETED Viewed

@@ -1,108 +0,0 @@
-require 'ms/scan'
-require 'ms/parser'
-class MS::MSRunIndex
-  # basename_noext is the base name of the file (with NO extensions)
-  attr_accessor :scans_by_num
-  attr_reader :basename_noext
-  # identifies and removes .mzXML .mzXML.timeIndex and .timeIndex
-  # otherwise, removes one extension and that's the filename_noext
-  # Also, removes any leading path
-  def basename_noext=(filename)
-    ext = File.extname(filename)
-    basename = File.basename(filename)
-    case ext
-    when '.mzXML'
-      @basename_noext = basename.gsub(/\.mzXML$/, "")
-    when '.timeIndex'
-      @basename_noext = basename.gsub(/\.timeIndex$/, "")
-      if File.extname(@basename_noext) == ".mzXML"
-        @basename_noext.gsub!(/\.mzXML$/, "")
-      end
-    else
-      @basename_noext = basename.gsub(/#{Regexp.escape(ext)}/, "")
-    end
-  end
-  # index_file has one row for each scan:
-  # ms_level scan_num time [prec_mz prec_inten]
-  # also consider getting this data directly from the mzXML file
-  # via the MS::MzXML::Parser.get_msrun_index command
-  def set_from_index_file(index_file)
-    self.basename_noext = index_file
-    @scans_by_num = []
-    if index_file
-      File.open(index_file).each do |line|
-        next if line !~ /\d/ || line =~ /^#/
-        line.chomp!
-        arr = line.split(" ")
-        scan = MS::Scan.new(arr[1].to_i, arr[0].to_i, arr[2].to_f)
-        if scan.ms_level > 1
-          scan.prec_mz = arr[3].to_f
-          scan.prec_inten = arr[4].to_f
-        end
-        @scans_by_num[scan.num] = scan
-      end
-    end
-    MS::Scan.add_parent_scan(@scans_by_num)
-  end
-  # Takes a .mzXML file or .timeIndex file (currently)
-  # and creates an index of scans from it
-  def initialize(file=nil)
-    @scans_by_num = []
-    if file
-      ext = File.extname(file)
-      case ext
-      when '.mzXML'
-        set_from_mzxml(file)
-      when '.timeIndex'
-        set_from_index_file(file)
-      else
-        raise ArgumentError, "#{self.class}.new doesn't recognize files of extension: #{ext}"
-      end
-    end
-  end
-  # returns a new
-  def set_from_mzxml(file)
-    self.basename_noext = file
-    @scans_by_num = MS::Parser.new(file, :scans_by_num).parse(file)
-  end
-  # writes the index to filename
-  # each line:
-  #   ms_level scan_num time (if !ms_level=1) { prec_mz prec_intensity)
-  def to_index_file(filename)
-    strings = []
-    @scans_by_num.each do |scan|
-      if scan
-        strings << scan.to_index_file_string
-      end
-    end
-    File.open(filename, "w") do |fh|
-      fh.print strings.join("\n")
-    end
-  end
-  # returns an array of the times of the precursor scan's parent (not its own
-  # acquisition time).  The parent scan index will also retrieve the time of
-  # the parent scan.
-  def parent_times_by_scan_num
-    by_num = []
-    parent_time = nil
-    @scans_by_num.each_with_index do |scan,i|
-      if scan.ms_level == 1
-        parent_time = scan.time
-      end
-      by_num[i] = parent_time
-    end
-    by_num
-  end
-end

data/lib/ms/parser/mzdata/axml.rb DELETED Viewed

@@ -1,67 +0,0 @@
-require 'ms/parser/mzdata/dom'
-class MS::Parser::MzData::AXML < MS::Parser::MzData::DOM
-  def get_root_node_from_file(file)
-    ::AXML.parse_file(file)
-  end
-  def get_root_node_from_io(io)
-    ::AXML.parse(io)
-  end
-end
-class MS::Parser::MzData::AXML::LazyData < MS::Parser::MzData::AXML
-  def get_root_node_from_string(string)
-    ::AXML::LazyData.parse(string)
-  end
-  def get_root_node_from_file(file)
-    ::AXML::LazyData.parse_file(file)
-  end
-  def get_root_node_from_io(io)
-    ::AXML::LazyData.parse(io)
-  end
-end
-class AXML::LazyData < AXML
-  # Returns the root node (as Element) or nodes (as Array)
-  def self.parse(stream)
-    parser = ::AXML::XMLParser::LazyData.new
-    parser.parse(stream)
-    parser.root
-  end
-end
-# This parser stores information about where the data (peaks) information is
-# in the file
-# The content of the data node is an array where the first member is the
-# start index and the last member is the number of bytes.  All other members
-# should be ignored.
-class AXML::XMLParser::LazyData < ::AXML::XMLParser
-  def startElement(name, attributes)
-    text =
-      if name == 'data' ; []
-      else ; ''
-      end
-    new_el = ::AXML::El.new(@cur, name, attributes, text, [])
-    # add the new node to the previous parent node
-    @cur.add_node(new_el)
-    # notice the change in @cur node
-    @cur = new_el
-  end
-  def character(data)
-    if @cur.text.is_a? Array
-      @cur.text << byteIndex
-    else
-      @cur.text << data
-    end
-  end
-  def endElement(name)
-    if @cur.text.is_a? Array
-      @cur.text << (byteIndex - @cur.text.first)
-    end
-    @cur = @cur.parent
-  end
-end

data/lib/ms/parser/mzdata/dom.rb DELETED Viewed

@@ -1,175 +0,0 @@
-require 'xml_style_parser'
-require 'ms/spectrum'
-require 'ms/scan'
-module MS::Parser::MzData ; end
-class MS::Parser::MzData::DOM
-  include XMLStyleParser
-  include MS::Parser::MzData
-  def initialize(parse_type=:msrun, version='1.0')
-    @method = parse_type
-    @version = version
-  end
-  # true if there is a node <dataProcessing><software><name>Bioworks Browser</...>
-  # otherwise false
-  def is_bioworks33?(description_node)
-    begin
-      software_node = description_node.find_first('child::dataProcessing').find_first('child::software')
-      name = software_node.find_first('child::name').content
-      version = software_node.find_first('child::version').content
-      ((name == 'Bioworks Browser') and (version == '3.3'))
-    rescue
-      false
-    end
-  end
-  # OPTIONS:
-  #   :msrun => MSRun    # use this object instead of creating one
-  def msrun(file, opts={})
-    msrun_obj =
-      if x = opts[:msrun]
-        msrun_obj = x
-      else
-        MS::MSRun.new
-      end
-    # should ensure that parsing is not counting spaces...
-    # a string we'd parse like this:
-    # doc = XML::Parser.string(st).parse
-    # WE NEED TO GET scan_count, start_time and end_time!!!!
-    id_to_scan_hash = {}
-    #    0   1       2             3       4     5          6
-    # %w(num msLevel retentionTime startMz endMz precursor spectrum)
-    io =
-      if file.is_a? String
-        filename = file
-        File.open(file)
-      else
-        file
-      end
-    root = get_root_node_from_io(io)
-    description = root.find_first('child::description')
-    bioworks33 = is_bioworks33?(description)
-    spectrum_list = description.next
-    scans = []
-    # bioworks 33 gives incorrect scan count
-    stated_num_scans = spectrum_list['count'].to_i
-    # if I move from node to node, it means I've checked that it's a sequence
-    # and that the elements are req'd
-    if spectrum_list.child?
-      spectrum_n = spectrum_list.child
-      loop do
-        scan = MS::Scan.new(9)
-        id = spectrum_n["id"].to_i
-        id_to_scan_hash[id] = scan
-        spec_desc_n = spectrum_n.child   # required in sequence
-        spec_settings_n = spec_desc_n.child # required in sequence
-        if acq_n = spec_settings_n.find_first('descendant::acquisition')
-          scan[0] = acq_n['acqNumber'].to_i
-        else
-          scan[0] = id
-        end
-        spec_inst_n = spec_settings_n.find_first('child::spectrumInstrument')
-        scan[1] = spec_inst_n['msLevel'].to_i
-        # we could use a scan_count, but in bioworks 33, we can't trust the
-        # scan count!  So, we just collect them
-        scans << scan
-        scan[3] = spec_inst_n['mzRangeStart'].to_f
-        scan[4] = spec_inst_n['mzRangeStop'].to_f
-        spec_inst_n.find('child::cvParam').each do |cv_param|
-          if cv_param['name'] == 'TimeInMinutes'
-            scan[2] = cv_param['value'].to_f * 60 #convert to seconds
-          end
-        end
-        if scan[1] > 1  # precursormz info
-          prec_list_n = spec_settings_n.next
-          raise RuntimeError, "MSRun objects can only accept 1 precursor" if prec_list_n['count'] != '1'
-          prec_n = prec_list_n.find_first('child::precursor')
-          # %w(mz inten parent ms_level parent charge_states)
-          prec = MS::Precursor.new
-          unless bioworks33  # bioworks33 points to the wrong scan!!!
-            prec[2] = id_to_scan_hash[prec_n['spectrumRef'].to_i]
-          end
-          # we're not keeping track of this guy anymore
-          # prec[3] = prec_n['msLevel'].to_i
-          charges = []
-          prec_n.find('descendant::cvParam').each do |cv_param_n|
-            case cv_param_n['name']
-            when 'MassToChargeRatio'
-              prec[0] = cv_param_n['value'].to_f
-              # find the prec intensity
-              unless bioworks33
-                prec[1] = prec[2].spectrum.intensity_at_mz(prec[0])
-              end
-            when 'ChargeState'
-              charges << cv_param_n['value'].to_i
-            end
-          end
-          prec[3] = charges
-          scan[5] = prec
-        else  # no precursors
-          scan[5] = nil
-        end
-        # here's the one line way of doing it, but it's probably more clear in
-        # the loop
-        #while ((mz_array_bin_n = spec_desc_n.next).name != 'mzArrayBinary') do
-        unless opts[:lazy] == :no_spectra
-          mz_array_bin_n = nil
-          loop do
-            mz_array_bin_n = spec_desc_n.next
-            break if mz_array_bin_n.name == 'mzArrayBinary'
-          end
-          mz_data_n = mz_array_bin_n.child
-          inten_array_bin_n = mz_array_bin_n.next
-          inten_data_n = inten_array_bin_n.child
-          case opts[:lazy]
-          when :string
-           scan[6] = MS::Spectrum::LazyString.from_base64_pair(mz_data_n.content, mz_data_n['precision'].to_i, ((mz_data_n['endian']=='little') ? false : true), inten_data_n.content, inten_data_n['precision'].to_i, ((inten_data_n['endian']=='little') ? false : true) )
-          when :io
-            mz_data_n_content = mz_data_n.content
-            i_data_n_content = inten_data_n.content
-            scan[6] = MS::Spectrum::LazyIO.new(io, mz_data_n_content.first, mz_data_n_content.last, mz_data_n['precision'].to_i, ((mz_data_n['endian']=='little') ? false : true), i_data_n_content.first, i_data_n_content.last, inten_data_n['precision'].to_i, ((inten_data_n['endian']=='little') ? false : true))
-          when :not
-            mz = MS::Spectrum.base64_to_array(mz_data_n.content, mz_data_n['precision'].to_i, ((mz_data_n['endian']=='little') ? false : true))
-            inten = MS::Spectrum.base64_to_array(inten_data_n.content, inten_data_n['precision'].to_i, ((inten_data_n['endian']=='little') ? false : true))
-            scan[6] = MS::Spectrum.new(mz, inten)
-          end
-        end
-        # set up the next loop
-        break unless spectrum_n = spectrum_n.next
-      end
-    end
-    if bioworks33
-      MS::MSRun.add_parent_scan(scans, ((opts[:lazy] == :not) ? true : false))
-    end
-    msrun_obj.scans = scans
-    msrun_obj.scan_count = scans.size
-    unless bioworks33  # we know the scan count is off here
-      if msrun_obj.scan_count != stated_num_scans
-        warn "num collected scans (#{scans.size}) does not agree with stated num scans (#{stated_num_scans})!"
-      end
-    end
-    msrun_obj.start_time = msrun_obj.scans.first.time
-    msrun_obj.end_time = msrun_obj.scans.last.time
-    io.close if filename
-  end
-end

data/lib/ms/parser/mzdata/libxml.rb DELETED Viewed

@@ -1,7 +0,0 @@
-class MS::Parser::MzData::LibXML < MS::Parser::MzData::DOM
-  def get_root_node_from_file(file)
-    XML::Document.file(file).root
-  end
-end

data/lib/ms/parser/mzdata.rb DELETED Viewed

@@ -1,31 +0,0 @@
-require 'ms/msrun'
-module MS; end
-module MS::Parser::MzData
-  Base_dir_for_parsers = 'ms/parser/mzdata'
-  # inherits XMLStyleParser and version
-  include MS::Parser
-  include XMLStyleParser
-  # returns a specific parser MS::Parser::MzXML::#{ParserType}
-  # based on choose_parser from xml_style_parser
-  def self.new(parse_type=:msrun, version='1.05', opts={})
-    special_subclass =
-      if opts[:lazy] == :io
-      'LazyData'
-      else ; nil
-      end
-    @version = version
-    @method = parse_type
-    #p self.methods.grep /choose_parser/
-    XMLStyleParser.require_parse_files(Base_dir_for_parsers)
-    parser_class = XMLStyleParser.choose_parser(self, parse_type, special_subclass)
-    parser = parser_class.new(parse_type, version)
-  end
-end

data/lib/ms/parser/mzxml/axml.rb DELETED Viewed

@@ -1,70 +0,0 @@
-require 'ms/parser/mzxml/dom'
-class MS::Parser::MzXML::AXML < MS::Parser::MzXML::DOM
-  def get_root_node_from_string(string)
-    ::AXML.parse(string)
-  end
-  def get_root_node_from_file(file)
-    ::AXML.parse_file(file)
-  end
-  def get_root_node_from_io(io)
-    ::AXML.parse(io)
-  end
-end
-class MS::Parser::MzXML::AXML::LazyPeaks < MS::Parser::MzXML::AXML
-  def get_root_node_from_string(string)
-    ::AXML::LazyPeaks.parse(string)
-  end
-  def get_root_node_from_file(file)
-    ::AXML::LazyPeaks.parse_file(file)
-  end
-  def get_root_node_from_io(io)
-    ::AXML::LazyPeaks.parse(io)
-  end
-end
-class AXML::LazyPeaks < AXML
-  # Returns the root node (as Element) or nodes (as Array)
-  def self.parse(stream)
-    parser = ::AXML::XMLParser::LazyPeaks.new
-    parser.parse(stream)
-    parser.root
-  end
-end
-# This parser stores information about where the peaks information is in the
-# file
-# The content of the peaks node is an array where the first member is the
-# start index and the last member is the number of bytes.  All other members
-# should be ignored.
-class AXML::XMLParser::LazyPeaks < ::AXML::XMLParser
-  def startElement(name, attributes)
-    text =
-      if name == 'peaks' ; []
-      else ; ''
-      end
-    new_el = ::AXML::El.new(@cur, name, attributes, text, [])
-    # add the new node to the previous parent node
-    @cur.add_node(new_el)
-    # notice the change in @cur node
-    @cur = new_el
-  end
-  def character(data)
-    if @cur.text.is_a? Array
-      @cur.text << byteIndex
-    else
-      @cur.text << data
-    end
-  end
-  def endElement(name)
-    if @cur.text.is_a? Array
-      @cur.text << (byteIndex - @cur.text.first)
-    end
-    @cur = @cur.parent
-  end
-end

data/lib/ms/parser/mzxml/dom.rb DELETED Viewed

@@ -1,182 +0,0 @@
-require 'xml_style_parser'
-require 'ms/spectrum'
-require 'ms/scan'
-require 'ms/parser/mzxml'
-require 'tempfile'
-class MS::Parser::MzXML::DOM
-  include XMLStyleParser
-  include MS::Parser::MzXML
-  NetworkOrder = true
-  #@@scan_atts = %w(num msLevel retentionTime startMz endMz precursor spectrum)
-  def initialize(parse_type=:msrun, version='1.0')
-    @method = parse_type
-    @version = version
-  end
-  def new_scan_from_hash(node)
-    scan = MS::Scan.new  # array class creates one with 9 positions
-    scan[0] = node['num'].to_i
-    scan[1] = node['msLevel'].to_i
-    if x = node['retentionTime']
-      scan[2] = x[2...-1].to_f
-    end
-    if x = node['startMz']
-      scan[3] = x.to_f
-      scan[4] = node['endMz'].to_f
-    end
-    scan
-  end
-  # assumes that node contains scans and checks any scan nodes for children
-  def add_scan_nodes(nodes, scans, scn_index, scans_by_num, lazy, io)
-    nodes.each do |scan_n|
-      scan = create_scan(scan_n, scans_by_num, lazy, io)
-      scans[scn_index] = scan
-      scans_by_num[scan[0]] = scan
-      scn_index += 1
-      if @version > '1.0'
-        new_nodes = scan_n.find('child::scan')
-        if new_nodes.size > 0
-          scn_index = add_scan_nodes(new_nodes, scans, scn_index, scans_by_num, lazy, io)
-        end
-      end
-    end
-    scn_index
-  end
-  # takes a scan node and creates a scan object
-  # the parent scan is the one directly above it in mslevel
-  # lazy must be a symbol from MS::MSRun.new
-  def create_scan(scan_n, scans_by_num, lazy, io=nil)
-    scan = new_scan_from_hash(scan_n)
-    prec = nil
-    scan_n.each do |node|
-      case node.name
-      when 'precursorMz'
-        # should be able to do this!!!
-        #scan[5] = scan_n.find('child::precursorMz').map do |prec_n|
-        raise RuntimeError, "the msrun object can only handle one precursor!" unless prec.nil?
-        prec = MS::Precursor.new
-        prec[1] = node['precursorIntensity'].to_f
-        prec[0] = node.content.to_f
-        if x = node['precursorScanNum']
-          prec[2] = scans_by_num[x.to_i]
-        end
-      when 'peaks'
-        case lazy
-        when :no_spectra
-          next
-        when :string
-          scan[6] = MS::Spectrum::LazyString.from_base64_peaks(node.content, node['precision'].to_i)
-        when :io
-          # assumes that parsing was done with a LazyPeaks parser!
-          nc = node.content
-          scan[6] = MS::Spectrum::LazyIO.new(io, nc.first, nc.last, node['precision'].to_i, MS::Parser::MzXML::DOM::NetworkOrder)
-        when :not
-          # SHOULD be able to do this!!
-          #peaks_n = scan_n.find_first('child::peaks')
-          scan[6] = MS::Spectrum.from_base64_peaks(node.content, node['precision'].to_i)
-        end
-      end
-    end
-    scan[5] = prec
-    scan
-  end
-  # returns an array of msrun objects
-  def msruns(file)
-    raise NotImplementedError
-  end
-    # right now cannot parse multiple runs out of an mzXML version 2 file since
-  # this is built around a single run per file
-  # OPTIONS:
-  #   :msrun => (an MSRun object)   # use this object instead of creating one
-  #   :lazy => [See MS::MSRun for documentation]
-  def msrun(file, opts={})
-    #unless opts.key?(:spectra)
-    #  opts[:spectra] = true
-    #end
-    msrun_obj =
-      if x = opts[:msrun]
-        msrun_obj = x
-      else
-        MS::MSRun.new
-      end
-    io =
-      if file.is_a? String  # a filename
-        filename = file
-        File.open(file)
-      else
-        file
-      end
-    root = get_root_node_from_io(io)
-    if filename
-      io.close  # can close now
-    end
-    # right now we are only finding the first msRun (probably a rare case of
-    # multiple runs in an mzXML file...)
-    msrun_n =
-      if @version >= '2.0'
-        kids = root.children.select {|v| v.name == 'msRun' }
-        raise(NotImplementedError, "one msrun per doc right now" ) if kids.size > 1
-        kids.first
-      else
-        root
-      end
-    if msrun_n.name != 'msRun'
-      raise RuntimeError, "extra node slipped in somehow"
-    end
-    ## HEADER
-    scan_count = msrun_n['scanCount'].to_i
-    msrun_obj.scan_count = scan_count
-    scans_by_num = Array.new(scan_count + 1)
-    ## SPECTRUM
-    parent = nil
-    scans = Array.new( scan_count )
-    scn_index = 0
-    # we should be able to do this, but it's not working!!!
-    #scan_n = msrun_n.find_first('scan')
-    #while (scn_index < scan_count)
-    lazy = opts[:lazy]
-    if @version >= '3.0'
-      warn '[version 3.0 parsing may fail if > 1 peak list per scan]'
-      # note that mzXML version 3.0 *can* have more than one peak...
-      # I'm not sure how to deal with that since I have one spectrum/scan
-    end
-    scan_nodes = msrun_n.find('child::scan')
-    add_scan_nodes(scan_nodes, scans, scn_index, scans_by_num, lazy, io)
-    ## update the scan's parents
-    MS::MSRun.add_parent_scan(scans)
-    # note that startTime and endTime are optional AND in >2.2 are dateTime
-    # instead of duration types!, so we will just use scan times...
-    # Also, note that startTime and endTime are BROKEN on readw -> mzXML 2.0
-    # export.  They give the start and end time in seconds, but they are
-    # really minutes.  All the more reason to use the first and last scans!
-    msrun_obj.start_time = scans.first.time
-    msrun_obj.end_time = scans.last.time
-    msrun_obj.scans = scans
-  end
-end