RubyGems - ms-msrun - Versions diffs - 0.1.0 → 0.2.0 - Mend

ms-msrun 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

data/.gitignore +3 -0
data/.gitmodules +3 -0
data/History +18 -0
data/{README → README.rdoc} +0 -0
data/Rakefile +93 -107
data/VERSION +1 -0
data/lib/lmat.rb +141 -39
data/lib/ms/msrun/nokogiri.rb +1 -0
data/lib/ms/msrun/search_dev_notes.txt +47 -0
data/spec/lmat_spec.rb +87 -11
data/spec/metadata/opd1/000.v1.mzXML.yml +3 -0
data/spec/metadata/opd1/000.v2.1.mzXML.yml +3 -0
data/spec/metadata/opd1/020.mzData.xml.yml +3 -0
data/spec/metadata/opd1/020.v2.0.readw.mzXML.yml +3 -0
data/spec/ms/msrun/hpricot.rb +38 -0
data/spec/ms/msrun/index_spec.rb +12 -13
data/spec/ms/msrun/search_spec.rb +5 -4
data/spec/ms/msrun/sha1_spec.rb +3 -6
data/spec/ms/msrun/test_parsing_xml_frags/parse_test.rb +25 -0
data/spec/ms/msrun/test_parsing_xml_frags/test1.xml +5 -0
data/spec/ms/msrun/test_parsing_xml_frags/test2.xml +6 -0
data/spec/ms/msrun/test_parsing_xml_frags/test3.xml +4 -0
data/spec/ms/msrun/test_parsing_xml_frags/test4.xml +11 -0
data/spec/ms/msrun/test_parsing_xml_frags/test_failures.rb +47 -0
data/spec/ms/msrun_bm.rb +22 -0
data/spec/ms/msrun_spec.rb +90 -109
data/spec/ms/scan_spec.rb +5 -6
data/spec/ms/spectrum/compare_spec.rb +31 -28
data/spec/ms/spectrum/filter_spec.rb +15 -13
data/spec/spec_helper.rb +21 -0
data/spec/testfiles/lmat/tmp1.lmat +0 -0
data/spec/testfiles/lmat/tmp1.lmata +44 -0
data/spec/testfiles/lmat/tmp2.lmata +11 -0
data/spec/testfiles/opd1/000.v1.mzXML +418 -0
data/spec/testfiles/opd1/000.v1.mzXML.key.yml +51 -0
data/spec/testfiles/opd1/000.v2.1.mzXML +382 -0
data/spec/testfiles/opd1/000.v2.1.mzXML.key.yml +51 -0
data/spec/testfiles/opd1/020.mzData.xml +683 -0
data/spec/testfiles/opd1/020.mzData.xml.key.yml +43 -0
data/spec/testfiles/opd1/020.v2.0.readw.mzXML +382 -0
data/spec/testfiles/opd1/020.v2.0.readw.mzXML.key.yml +46 -0
metadata +85 -34

data/lib/ms/msrun/nokogiri.rb CHANGED Viewed

@@ -1,4 +1,5 @@
+require 'nokogiri'
 module Ms
   class Msrun

data/lib/ms/msrun/search_dev_notes.txt ADDED Viewed

@@ -0,0 +1,47 @@
+(file stamp of Feb 15 2005)
+$ ./extract_msn.exe
+ EXTRACT_MSN usage:  extract_msn [options] [datafile]
+ options = -Fnum     where num is an INT specifying the first scan
+           -Lnum     where num is an INT specifying the last scan
+           -Bnum     where num is a FLOAT specifying the bottom MW for datafile creation
+           -Tnum     where num is a FLOAT specifying the top MW for datafile creation
+           -Mnum     where num is a FLOAT specifying the precursor mass
+                        tolerance for grouping (default=1.4)
+           -Snum     where num is an INT specifying the number of allowed
+                        different intermediate scans for grouping. (default=1)
+           -Cnum     where num is an INT specifying the charge state to use
+           -Gnum     where num is an INT specifying the minimum # of related
+                        grouped scans needed for a .dta file (default=2)
+           -Inum     where num is an INT specifying the minimum # of ions
+                        needed for a .dta file (default=0)
+           -Rnum     where num is a FLOAT specifying the minimum signal-to-noise value
+                        needed for a peak to be written to a .dta file (default=3)
+           -rnum     where num is an INT specifying the minimum number of major peaks
+                        (peaks above S/N threshold) needed for a .dta file (default=5)
+           -Dstring  where string is a path name
+           -Ystring  where string is a subsequence
+           -Z        Controls whether the zta files are written
+           -K        Controls whether the charge calculations are performed
+           -Ustring  where string is the path of a template file
+             [Default name is chgstate.tpl]
+           -Acontrolstring containing any of the options
+             T: use template          F: use discrete Fourier transform
+             E: use Eng's algorithm   H: use scan header
+             M: use MSMS count
+             O: override header charge state
+             S: create summary file   L: create log file
+             D: create both files     C: create MSMS count file
+             A: find CS even for nonzero headers
+             tfehm: include algorithm output in summary file even if not called
+             [NOTE: This version of the program has a default string of -AHTFEMAOSC,
+             but if -A option is used all desired parameters must be specified]
+           -H        print this information
+ If lcq_dta.exclude present, will ignore list of ions in exclude list.
+ Format of lcq_dta.exclude:  mass tolerance on 1st line
+                             precursor masses on subsequent lines

data/spec/lmat_spec.rb CHANGED Viewed

@@ -2,28 +2,104 @@ require File.expand_path( File.dirname(__FILE__) + '/spec_helper' )
 require 'lmat'
-class LmatUnitSpec < MiniTest::Spec
+describe 'an lmat' do
-  def initialize(*args)
-    @klass = Lmat
-    super(*args)
+  @klass = Lmat
+  @lmatfile = TESTFILES + "/lmat/tmp1.lmat"
+  @lmatafile = TESTFILES + "/lmat/tmp1.lmata"
+  @lmatafile_small = TESTFILES + "/lmat/tmp2.lmata"
+  before do
+    @lmat = Lmat.new
   end
   it 'can be created with no arguments' do
     obj1 = @klass.new
-    obj1.class.must_equal @klass
+    obj1.class.is @klass
   end
-  xit 'can be created with arrays' do
+  it 'can be created with arrays' do
     obj = @klass[[1,2,3],[4,5,6]]
-    obj[0,0].must_equal 1
-    obj[1,0].must_equal 4
-    obj[1,2].must_equal 6
+    obj[0,0].is 1
+    obj[2,1].is 6
+    obj[1,0].is 2
+    obj.mvec.enums [0,1]
+    obj.nvec.enums [0,1,2]
   end
-  xit 'can find the max value' do
+  it 'can find the max value' do
     obj = @klass[[1,2,3],[1,8,3]]
-    obj.max.must_equal 8
+    obj.max.is 8
+  end
+  it 'can be read from lmat file' do
+    x = Lmat.new
+    x.from_lmat(@lmatfile)
+    x.nvec.size.is 30
+    x.mvec.size.is 40
+    x.mat.size.is 1200
+    x.mat.shape.is [30,40]
+  end
+  it 'can write an lmat file' do
+    begin
+      output = @lmatfile + ".TMP"
+      @lmat.from_lmat(@lmatfile)
+      @lmat.write(output)
+      IO.read(output).is IO.read(@lmatfile)
+    ensure
+      File.unlink(output) if File.exist?(output)
+    end
+  end
+  it 'can be read from an lmata file' do
+    x = Lmat.new.from_lmata(@lmatafile)
+    x.nvec.size.is 30
+    x.mvec.size.is 40
+    x.mat.size.is 1200
+    x.mat.shape.is [30,40]
+  end
+  it 'can print an lmata file' do
+    begin
+      output = @lmatafile_small + ".TMP"
+      @lmat.from_lmata(@lmatafile_small)
+      @lmat.print(output)
+      ars = [output, @lmatafile_small].map do |file|
+        IO.read(file).chomp.gsub("\n", " ").split(/\s+/).map {|v| v.to_f }
+      end
+      ars.first.enums ars.last
+    ensure
+      File.unlink(output) if File.exist?(output)
+    end
+  end
+  xit 'can warp data columns' do
+    @lmat.from_lmata(@lmatafile_small)
+    puts "Warp before"
+    p @lmat
+    deep_copy = true
+    @lmat.plot("before.png")
+    new_lmat = @lmat.warp_cols(NArray.float(7).indgen(12).collect {|v| v + 2.5 }, deep_copy)
+    new_lmat.isa Lmat
+    new_lmat.plot("after.png")
+    puts "Warp after"
+    p new_lmat
+    ## TODO: NEEEED tests HERE
+  end
+  begin
+    require 'gnuplot'
+    it 'can plot' do
+      file = "mypng.png"
+      @lmat.from_lmata(@lmatafile_small)
+      @lmat.plot(file)
+      @lmat.isa Lmat
+      ok File.exist?(file)
+      File.unlink(file) if File.exist?(file)
+    end
+  rescue
+    puts "SKIPPING: plotting (since gnuplot gem not found)"
   end
 end

data/spec/metadata/opd1/000.v1.mzXML.yml ADDED Viewed

@@ -0,0 +1,3 @@
+source: John Prince
+description: |
+  Run 000 of opd1.  This is mzXML version 1 output.  Only the first twenty scans have been retained.  Because of this modification, at least the indexOffset and sha1 tags are incorrect.  Other values have been modified to reflect the twenty scans.

data/spec/metadata/opd1/000.v2.1.mzXML.yml ADDED Viewed

@@ -0,0 +1,3 @@
+source: John Prince
+description: |
+  Run 000 of opd1.  This is mzXML version 2.1 output.  Only the first twenty scans have been retained.  Because of this modification, at least the indexOffset and sha1 tags are incorrect.  Other values have been modified to reflect the twenty scans.

data/spec/metadata/opd1/020.mzData.xml.yml ADDED Viewed

@@ -0,0 +1,3 @@
+source: John Prince
+description: |
+  mzData version 1.05 Xcalibur/Bioworks output of opd1, 020.RAW.  Note the output is buggy in its SpectrumRef and SpectrumList 'count' attribute.  The file has been cut to have only the first twenty scans.  The SpectrumList 'count' attribute is off, but it was wrong already.  Other values have been modified to reflect the twenty scans.

data/spec/metadata/opd1/020.v2.0.readw.mzXML.yml ADDED Viewed

@@ -0,0 +1,3 @@
+source: John Prince
+description: |
+  mzXML version 2.0 readw.exe output of opd1, 020.RAW.  The file has been cut to have only the first twenty scans. Because of this modification, at least the indexOffset and sha1 tags are incorrect.  Other values have been modified to reflect the twenty scans.

data/spec/ms/msrun/hpricot.rb ADDED Viewed

@@ -0,0 +1,38 @@
+require File.expand_path( File.dirname(__FILE__) + '/../../spec_helper' )
+require 'ms/msrun/hpricot/mzxml'
+class HpricotSpec < MiniTest::Spec
+  before do
+    @scan_xml = '<scan num="19"
+        msLevel="2"
+        peaksCount="9"
+        polarity="+"
+        scanType="Full"
+        retentionTime="PT25.23S"
+        collisionEnergy="35"
+        lowMz="390"
+        highMz="2000"
+	basePeakMz="1621.51"
+	basePeakIntensity="17748"
+	totIonCurrent="54989">
+    <precursorMz precursorIntensity="720317">1460.54834</precursorMz>
+    <peaks precision="32"
+           byteOrder="network"
+           pairOrder="m/z-int">RE84xESwAABEYq6wRNLAAESW7sRGFigARJ/nyEVuYABEo+vkRMgAAESqV85FjhgARLQ3FEXvmABEuEH6RdfoAETKsCpGiqgA</peaks>
+  </scan>'
+    @scan_xml_short = @scan_xml.split("\n")[0...-1].join("\n")
+    @scan_xml_long = @scan_xml + "\n</scan>"
+    @basic_info = { :num => 19, :ms_level => 2, :time => 25.23 }
+    @prec_info = {:intensity => 720317, :mz => 1460.54834 }
+    @spectrum = nil # for now
+  end
+  it 'reads normal xml' do
+    Ms::Msrun::Hpricot::Mzxml.parse_scan
+  end
+end

data/spec/ms/msrun/index_spec.rb CHANGED Viewed

@@ -3,7 +3,10 @@ require File.expand_path(File.dirname(__FILE__) + '/../../spec_helper')
 require 'rexml/document'
 require 'ms/msrun/index'
-class MsMsrunIndexSpec < MiniTest::Spec
+describe 'an Ms::Msrun::Index' do
+  @files = %w(000.v1.mzXML 000.v2.1.mzXML 020.v2.0.readw.mzXML).map {|v| TESTFILES + '/opd1/' + v }
   before do
     @indices = @files.map do |file|
@@ -11,25 +14,21 @@ class MsMsrunIndexSpec < MiniTest::Spec
     end
   end
-  def initialize(*args)
-    @files = %w(000.v1.mzXML 000.v2.1.mzXML 020.v2.0.readw.mzXML).map {|v| TESTFILES + '/opd1/' + v }
-    super *args
-  end
   it 'is indexed by scan num and gives doublets of byte and length' do
     @files.zip(@indices) do |file, index|
       index.each_with_index do |pair,i|
         string = IO.read(file, pair.last, pair.first).strip
-        string[0,5].must_equal '<scan'
-        string[-7..-1].must_match %r{</scan>|/peaks>|/msRun>}
-        string.must_match %r{num="#{i+1}"}
+        string[0,5].is '<scan'
+        string[-7..-1].should.match %r{</scan>|/peaks>|/msRun>}
+        string.should.match %r{num="#{i+1}"}
       end
     end
   end
   it 'gives scan_nums' do
     @indices.each do |index|
-      index.scan_nums.must_equal((1..20).to_a)
+      index.scan_nums.is((1..20).to_a)
     end
   end
@@ -37,7 +36,7 @@ class MsMsrunIndexSpec < MiniTest::Spec
     @indices.each do |index|
       scan_nums = index.scan_nums
       index.each_with_index do |doublet,i|
-        index[scan_nums[i]].must_equal doublet
+        index[scan_nums[i]].is doublet
       end
     end
   end
@@ -45,15 +44,15 @@ class MsMsrunIndexSpec < MiniTest::Spec
   it 'gives header length' do
     header_lengths = [824, 1138, 1147]
     @indices.zip(@files, header_lengths) do |index, file, header_length|
-      index.header_length.must_equal header_length
+      index.header_length.is header_length
     end
   end
   it 'gives a scan for #first and #last' do
     # TODO: fill in with actual data too
     @indices.each do |index|
-      index.first.wont_equal nil
-      index.last.wont_equal nil
+      ok !index.first.nil?
+      ok !index.last.nil?
     end
   end

data/spec/ms/msrun/search_spec.rb CHANGED Viewed

@@ -2,7 +2,8 @@ require File.expand_path(File.dirname(__FILE__) + '/../../spec_helper')
 require 'ms/msrun'
-class SearchSpec < MiniTest::Spec
+describe 'mzxml to search formats' do
   it 'creates mgf formatted files' do
     @file = TESTFILES + '/opd1/000.v1.mzXML'
@@ -27,7 +28,7 @@ class SearchSpec < MiniTest::Spec
     ]
     Ms::Msrun.open(@file) do |ms|
       no_scans.each do |k,v|
-        ms.to_mgf( k => v).must_equal ""
+        ms.to_mgf( k => v).is ""
       end
     end
@@ -45,8 +46,8 @@ class SearchSpec < MiniTest::Spec
     Ms::Msrun.open(@file) do |ms|
       some_scans.each do |k,v|
         reply = ms.to_mgf(k => v)
-        reply.must_match(/BEGIN.IONS/)
-        reply.must_match(/END.IONS/)
+        reply.should.match(/BEGIN.IONS/)
+        reply.should.match(/END.IONS/)
       end
     end
     # TODO: should write some more specs here

data/spec/ms/msrun/sha1_spec.rb CHANGED Viewed

@@ -2,12 +2,9 @@ require File.expand_path(File.dirname(__FILE__) + '/../../spec_helper')
 require 'ms/msrun/sha1'
-class Sha1Spec < MiniTest::Spec
-  def initialize(*args)
-    @files = %w(000.v1.mzXML 020.v2.0.readw.mzXML 000.v2.1.mzXML).map do |file|
-      TESTFILES + "/opd1/#{file}"
-    end
-    super(*args)
+describe 'sha1 creation from mzXML' do
+  @files = %w(000.v1.mzXML 020.v2.0.readw.mzXML 000.v2.1.mzXML).map do |file|
+    TESTFILES + "/opd1/#{file}"
   end
   ## NOTE: this does NOT match up to real files yet!

data/spec/ms/msrun/test_parsing_xml_frags/parse_test.rb ADDED Viewed

@@ -0,0 +1,25 @@
+#!/usr/bin/ruby
+require 'nokogiri'
+class MyDoc < Nokogiri::XML::SAX::Document
+  def initialize(io)
+    @io = io
+  end
+  def start_element( name, attributes = [])
+    puts "NAME: #{name}"
+    puts "POST: "
+    puts @io.pos
+  end
+end
+File.open("test3.xml") do |io|
+  parser = Nokogiri::XML::SAX::PushParser.new( MyDoc.new(io) )
+  io.each_line do |line|
+    parser << line
+  end
+end
+#xml = Nokogiri::XML.parse(IO.read("test3.xml"), nil, nil,  Nokogiri::XML::ParseOptions::DEFAULT_XML | Nokogiri::XML::ParseOptions::NOBLANKS )

data/spec/ms/msrun/test_parsing_xml_frags/test1.xml ADDED Viewed

@@ -0,0 +1,5 @@
+<node1>
+    <node2>
+        <node3>my text</node3>
+    </node2>
+</node1>

data/spec/ms/msrun/test_parsing_xml_frags/test2.xml ADDED Viewed

@@ -0,0 +1,6 @@
+<node1>
+    <node2>
+        <node3>my text</node3>
+    </node2>
+</node1>
+</node1>

data/spec/ms/msrun/test_parsing_xml_frags/test3.xml ADDED Viewed

@@ -0,0 +1,4 @@
+<node1>
+    <node2>
+        <node3>my text</node3>
+    </node2>

data/spec/ms/msrun/test_parsing_xml_frags/test4.xml ADDED Viewed

@@ -0,0 +1,11 @@
+<node1>
+    <node2>
+        <node3>my text</node3>
+    </node2>
+</node1>
+</node1>
+<node1 id="3">
+</node1>
+<node1 id="4">
+</node1>
+</msrun>

data/spec/ms/msrun/test_parsing_xml_frags/test_failures.rb ADDED Viewed

@@ -0,0 +1,47 @@
+#require 'axml'
+correct = '<scan num="12">
+  <peaks>ABCD</peaks>
+</scan>
+'
+short = '<scan num="12">
+  <peaks>ABCD</peaks>
+'
+long = '<scan num="12">
+  <peaks>ABCD</peaks>
+</scan>
+</scan>
+'
+require 'xml/libxml'
+XML::Error.set_handler do |error|
+  puts "GOTCAH!"
+  #puts error.to_s
+end
+[correct, short, long].each do |str|
+  reader = XML::Reader.string str
+  x = reader.read
+  p x
+end
+=begin
+x = AXML.parse(correct)
+puts x.to_s
+begin
+y = AXML.parse(short)
+rescue
+  puts "RESCUED"
+puts y.to_s
+end
+#x = AXML.parse(long)
+#puts x.to_s
+=end