ms-msrun 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. data/.gitignore +3 -0
  2. data/.gitmodules +3 -0
  3. data/History +18 -0
  4. data/{README → README.rdoc} +0 -0
  5. data/Rakefile +93 -107
  6. data/VERSION +1 -0
  7. data/lib/lmat.rb +141 -39
  8. data/lib/ms/msrun/nokogiri.rb +1 -0
  9. data/lib/ms/msrun/search_dev_notes.txt +47 -0
  10. data/spec/lmat_spec.rb +87 -11
  11. data/spec/metadata/opd1/000.v1.mzXML.yml +3 -0
  12. data/spec/metadata/opd1/000.v2.1.mzXML.yml +3 -0
  13. data/spec/metadata/opd1/020.mzData.xml.yml +3 -0
  14. data/spec/metadata/opd1/020.v2.0.readw.mzXML.yml +3 -0
  15. data/spec/ms/msrun/hpricot.rb +38 -0
  16. data/spec/ms/msrun/index_spec.rb +12 -13
  17. data/spec/ms/msrun/search_spec.rb +5 -4
  18. data/spec/ms/msrun/sha1_spec.rb +3 -6
  19. data/spec/ms/msrun/test_parsing_xml_frags/parse_test.rb +25 -0
  20. data/spec/ms/msrun/test_parsing_xml_frags/test1.xml +5 -0
  21. data/spec/ms/msrun/test_parsing_xml_frags/test2.xml +6 -0
  22. data/spec/ms/msrun/test_parsing_xml_frags/test3.xml +4 -0
  23. data/spec/ms/msrun/test_parsing_xml_frags/test4.xml +11 -0
  24. data/spec/ms/msrun/test_parsing_xml_frags/test_failures.rb +47 -0
  25. data/spec/ms/msrun_bm.rb +22 -0
  26. data/spec/ms/msrun_spec.rb +90 -109
  27. data/spec/ms/scan_spec.rb +5 -6
  28. data/spec/ms/spectrum/compare_spec.rb +31 -28
  29. data/spec/ms/spectrum/filter_spec.rb +15 -13
  30. data/spec/spec_helper.rb +21 -0
  31. data/spec/testfiles/lmat/tmp1.lmat +0 -0
  32. data/spec/testfiles/lmat/tmp1.lmata +44 -0
  33. data/spec/testfiles/lmat/tmp2.lmata +11 -0
  34. data/spec/testfiles/opd1/000.v1.mzXML +418 -0
  35. data/spec/testfiles/opd1/000.v1.mzXML.key.yml +51 -0
  36. data/spec/testfiles/opd1/000.v2.1.mzXML +382 -0
  37. data/spec/testfiles/opd1/000.v2.1.mzXML.key.yml +51 -0
  38. data/spec/testfiles/opd1/020.mzData.xml +683 -0
  39. data/spec/testfiles/opd1/020.mzData.xml.key.yml +43 -0
  40. data/spec/testfiles/opd1/020.v2.0.readw.mzXML +382 -0
  41. data/spec/testfiles/opd1/020.v2.0.readw.mzXML.key.yml +46 -0
  42. metadata +85 -34
@@ -1,4 +1,5 @@
1
1
 
2
+ require 'nokogiri'
2
3
 
3
4
  module Ms
4
5
  class Msrun
@@ -0,0 +1,47 @@
1
+
2
+ (file stamp of Feb 15 2005)
3
+
4
+ $ ./extract_msn.exe
5
+ EXTRACT_MSN usage: extract_msn [options] [datafile]
6
+ options = -Fnum where num is an INT specifying the first scan
7
+ -Lnum where num is an INT specifying the last scan
8
+ -Bnum where num is a FLOAT specifying the bottom MW for datafile creation
9
+ -Tnum where num is a FLOAT specifying the top MW for datafile creation
10
+ -Mnum where num is a FLOAT specifying the precursor mass
11
+ tolerance for grouping (default=1.4)
12
+ -Snum where num is an INT specifying the number of allowed
13
+ different intermediate scans for grouping. (default=1)
14
+ -Cnum where num is an INT specifying the charge state to use
15
+ -Gnum where num is an INT specifying the minimum # of related
16
+ grouped scans needed for a .dta file (default=2)
17
+ -Inum where num is an INT specifying the minimum # of ions
18
+ needed for a .dta file (default=0)
19
+ -Rnum where num is a FLOAT specifying the minimum signal-to-noise value
20
+ needed for a peak to be written to a .dta file (default=3)
21
+ -rnum where num is an INT specifying the minimum number of major peaks
22
+ (peaks above S/N threshold) needed for a .dta file (default=5)
23
+ -Dstring where string is a path name
24
+ -Ystring where string is a subsequence
25
+ -Z Controls whether the zta files are written
26
+ -K Controls whether the charge calculations are performed
27
+ -Ustring where string is the path of a template file
28
+ [Default name is chgstate.tpl]
29
+ -Acontrolstring containing any of the options
30
+ T: use template F: use discrete Fourier transform
31
+ E: use Eng's algorithm H: use scan header
32
+ M: use MSMS count
33
+ O: override header charge state
34
+ S: create summary file L: create log file
35
+ D: create both files C: create MSMS count file
36
+ A: find CS even for nonzero headers
37
+ tfehm: include algorithm output in summary file even if not called
38
+ [NOTE: This version of the program has a default string of -AHTFEMAOSC,
39
+ but if -A option is used all desired parameters must be specified]
40
+ -H print this information
41
+
42
+ If lcq_dta.exclude present, will ignore list of ions in exclude list.
43
+ Format of lcq_dta.exclude: mass tolerance on 1st line
44
+ precursor masses on subsequent lines
45
+
46
+
47
+
data/spec/lmat_spec.rb CHANGED
@@ -2,28 +2,104 @@ require File.expand_path( File.dirname(__FILE__) + '/spec_helper' )
2
2
 
3
3
  require 'lmat'
4
4
 
5
- class LmatUnitSpec < MiniTest::Spec
5
+ describe 'an lmat' do
6
6
 
7
- def initialize(*args)
8
- @klass = Lmat
9
- super(*args)
7
+ @klass = Lmat
8
+ @lmatfile = TESTFILES + "/lmat/tmp1.lmat"
9
+ @lmatafile = TESTFILES + "/lmat/tmp1.lmata"
10
+ @lmatafile_small = TESTFILES + "/lmat/tmp2.lmata"
11
+
12
+ before do
13
+ @lmat = Lmat.new
10
14
  end
11
15
 
12
16
  it 'can be created with no arguments' do
13
17
  obj1 = @klass.new
14
- obj1.class.must_equal @klass
18
+ obj1.class.is @klass
15
19
  end
16
20
 
17
- xit 'can be created with arrays' do
21
+ it 'can be created with arrays' do
18
22
  obj = @klass[[1,2,3],[4,5,6]]
19
- obj[0,0].must_equal 1
20
- obj[1,0].must_equal 4
21
- obj[1,2].must_equal 6
23
+ obj[0,0].is 1
24
+ obj[2,1].is 6
25
+ obj[1,0].is 2
26
+ obj.mvec.enums [0,1]
27
+ obj.nvec.enums [0,1,2]
22
28
  end
23
29
 
24
- xit 'can find the max value' do
30
+ it 'can find the max value' do
25
31
  obj = @klass[[1,2,3],[1,8,3]]
26
- obj.max.must_equal 8
32
+ obj.max.is 8
33
+ end
34
+
35
+ it 'can be read from lmat file' do
36
+ x = Lmat.new
37
+ x.from_lmat(@lmatfile)
38
+ x.nvec.size.is 30
39
+ x.mvec.size.is 40
40
+ x.mat.size.is 1200
41
+ x.mat.shape.is [30,40]
42
+ end
43
+
44
+ it 'can write an lmat file' do
45
+ begin
46
+ output = @lmatfile + ".TMP"
47
+ @lmat.from_lmat(@lmatfile)
48
+ @lmat.write(output)
49
+ IO.read(output).is IO.read(@lmatfile)
50
+ ensure
51
+ File.unlink(output) if File.exist?(output)
52
+ end
53
+ end
54
+
55
+ it 'can be read from an lmata file' do
56
+ x = Lmat.new.from_lmata(@lmatafile)
57
+ x.nvec.size.is 30
58
+ x.mvec.size.is 40
59
+ x.mat.size.is 1200
60
+ x.mat.shape.is [30,40]
61
+ end
62
+
63
+ it 'can print an lmata file' do
64
+ begin
65
+ output = @lmatafile_small + ".TMP"
66
+ @lmat.from_lmata(@lmatafile_small)
67
+ @lmat.print(output)
68
+ ars = [output, @lmatafile_small].map do |file|
69
+ IO.read(file).chomp.gsub("\n", " ").split(/\s+/).map {|v| v.to_f }
70
+ end
71
+ ars.first.enums ars.last
72
+ ensure
73
+ File.unlink(output) if File.exist?(output)
74
+ end
75
+ end
76
+
77
+ xit 'can warp data columns' do
78
+ @lmat.from_lmata(@lmatafile_small)
79
+ puts "Warp before"
80
+ p @lmat
81
+ deep_copy = true
82
+ @lmat.plot("before.png")
83
+ new_lmat = @lmat.warp_cols(NArray.float(7).indgen(12).collect {|v| v + 2.5 }, deep_copy)
84
+ new_lmat.isa Lmat
85
+ new_lmat.plot("after.png")
86
+ puts "Warp after"
87
+ p new_lmat
88
+ ## TODO: NEEEED tests HERE
89
+ end
90
+
91
+ begin
92
+ require 'gnuplot'
93
+ it 'can plot' do
94
+ file = "mypng.png"
95
+ @lmat.from_lmata(@lmatafile_small)
96
+ @lmat.plot(file)
97
+ @lmat.isa Lmat
98
+ ok File.exist?(file)
99
+ File.unlink(file) if File.exist?(file)
100
+ end
101
+ rescue
102
+ puts "SKIPPING: plotting (since gnuplot gem not found)"
27
103
  end
28
104
 
29
105
  end
@@ -0,0 +1,3 @@
1
+ source: John Prince
2
+ description: |
3
+ Run 000 of opd1. This is mzXML version 1 output. Only the first twenty scans have been retained. Because of this modification, at least the indexOffset and sha1 tags are incorrect. Other values have been modified to reflect the twenty scans.
@@ -0,0 +1,3 @@
1
+ source: John Prince
2
+ description: |
3
+ Run 000 of opd1. This is mzXML version 2.1 output. Only the first twenty scans have been retained. Because of this modification, at least the indexOffset and sha1 tags are incorrect. Other values have been modified to reflect the twenty scans.
@@ -0,0 +1,3 @@
1
+ source: John Prince
2
+ description: |
3
+ mzData version 1.05 Xcalibur/Bioworks output of opd1, 020.RAW. Note the output is buggy in its SpectrumRef and SpectrumList 'count' attribute. The file has been cut to have only the first twenty scans. The SpectrumList 'count' attribute is off, but it was wrong already. Other values have been modified to reflect the twenty scans.
@@ -0,0 +1,3 @@
1
+ source: John Prince
2
+ description: |
3
+ mzXML version 2.0 readw.exe output of opd1, 020.RAW. The file has been cut to have only the first twenty scans. Because of this modification, at least the indexOffset and sha1 tags are incorrect. Other values have been modified to reflect the twenty scans.
@@ -0,0 +1,38 @@
1
+ require File.expand_path( File.dirname(__FILE__) + '/../../spec_helper' )
2
+
3
+ require 'ms/msrun/hpricot/mzxml'
4
+
5
+ class HpricotSpec < MiniTest::Spec
6
+
7
+ before do
8
+ @scan_xml = '<scan num="19"
9
+ msLevel="2"
10
+ peaksCount="9"
11
+ polarity="+"
12
+ scanType="Full"
13
+ retentionTime="PT25.23S"
14
+ collisionEnergy="35"
15
+ lowMz="390"
16
+ highMz="2000"
17
+ basePeakMz="1621.51"
18
+ basePeakIntensity="17748"
19
+ totIonCurrent="54989">
20
+ <precursorMz precursorIntensity="720317">1460.54834</precursorMz>
21
+ <peaks precision="32"
22
+ byteOrder="network"
23
+ pairOrder="m/z-int">RE84xESwAABEYq6wRNLAAESW7sRGFigARJ/nyEVuYABEo+vkRMgAAESqV85FjhgARLQ3FEXvmABEuEH6RdfoAETKsCpGiqgA</peaks>
24
+ </scan>'
25
+ @scan_xml_short = @scan_xml.split("\n")[0...-1].join("\n")
26
+ @scan_xml_long = @scan_xml + "\n</scan>"
27
+ @basic_info = { :num => 19, :ms_level => 2, :time => 25.23 }
28
+ @prec_info = {:intensity => 720317, :mz => 1460.54834 }
29
+ @spectrum = nil # for now
30
+ end
31
+
32
+
33
+ it 'reads normal xml' do
34
+ Ms::Msrun::Hpricot::Mzxml.parse_scan
35
+ end
36
+
37
+
38
+ end
@@ -3,7 +3,10 @@ require File.expand_path(File.dirname(__FILE__) + '/../../spec_helper')
3
3
  require 'rexml/document'
4
4
  require 'ms/msrun/index'
5
5
 
6
- class MsMsrunIndexSpec < MiniTest::Spec
6
+
7
+ describe 'an Ms::Msrun::Index' do
8
+
9
+ @files = %w(000.v1.mzXML 000.v2.1.mzXML 020.v2.0.readw.mzXML).map {|v| TESTFILES + '/opd1/' + v }
7
10
 
8
11
  before do
9
12
  @indices = @files.map do |file|
@@ -11,25 +14,21 @@ class MsMsrunIndexSpec < MiniTest::Spec
11
14
  end
12
15
  end
13
16
 
14
- def initialize(*args)
15
- @files = %w(000.v1.mzXML 000.v2.1.mzXML 020.v2.0.readw.mzXML).map {|v| TESTFILES + '/opd1/' + v }
16
- super *args
17
- end
18
17
 
19
18
  it 'is indexed by scan num and gives doublets of byte and length' do
20
19
  @files.zip(@indices) do |file, index|
21
20
  index.each_with_index do |pair,i|
22
21
  string = IO.read(file, pair.last, pair.first).strip
23
- string[0,5].must_equal '<scan'
24
- string[-7..-1].must_match %r{</scan>|/peaks>|/msRun>}
25
- string.must_match %r{num="#{i+1}"}
22
+ string[0,5].is '<scan'
23
+ string[-7..-1].should.match %r{</scan>|/peaks>|/msRun>}
24
+ string.should.match %r{num="#{i+1}"}
26
25
  end
27
26
  end
28
27
  end
29
28
 
30
29
  it 'gives scan_nums' do
31
30
  @indices.each do |index|
32
- index.scan_nums.must_equal((1..20).to_a)
31
+ index.scan_nums.is((1..20).to_a)
33
32
  end
34
33
  end
35
34
 
@@ -37,7 +36,7 @@ class MsMsrunIndexSpec < MiniTest::Spec
37
36
  @indices.each do |index|
38
37
  scan_nums = index.scan_nums
39
38
  index.each_with_index do |doublet,i|
40
- index[scan_nums[i]].must_equal doublet
39
+ index[scan_nums[i]].is doublet
41
40
  end
42
41
  end
43
42
  end
@@ -45,15 +44,15 @@ class MsMsrunIndexSpec < MiniTest::Spec
45
44
  it 'gives header length' do
46
45
  header_lengths = [824, 1138, 1147]
47
46
  @indices.zip(@files, header_lengths) do |index, file, header_length|
48
- index.header_length.must_equal header_length
47
+ index.header_length.is header_length
49
48
  end
50
49
  end
51
50
 
52
51
  it 'gives a scan for #first and #last' do
53
52
  # TODO: fill in with actual data too
54
53
  @indices.each do |index|
55
- index.first.wont_equal nil
56
- index.last.wont_equal nil
54
+ ok !index.first.nil?
55
+ ok !index.last.nil?
57
56
  end
58
57
  end
59
58
 
@@ -2,7 +2,8 @@ require File.expand_path(File.dirname(__FILE__) + '/../../spec_helper')
2
2
 
3
3
  require 'ms/msrun'
4
4
 
5
- class SearchSpec < MiniTest::Spec
5
+
6
+ describe 'mzxml to search formats' do
6
7
 
7
8
  it 'creates mgf formatted files' do
8
9
  @file = TESTFILES + '/opd1/000.v1.mzXML'
@@ -27,7 +28,7 @@ class SearchSpec < MiniTest::Spec
27
28
  ]
28
29
  Ms::Msrun.open(@file) do |ms|
29
30
  no_scans.each do |k,v|
30
- ms.to_mgf( k => v).must_equal ""
31
+ ms.to_mgf( k => v).is ""
31
32
  end
32
33
  end
33
34
 
@@ -45,8 +46,8 @@ class SearchSpec < MiniTest::Spec
45
46
  Ms::Msrun.open(@file) do |ms|
46
47
  some_scans.each do |k,v|
47
48
  reply = ms.to_mgf(k => v)
48
- reply.must_match(/BEGIN.IONS/)
49
- reply.must_match(/END.IONS/)
49
+ reply.should.match(/BEGIN.IONS/)
50
+ reply.should.match(/END.IONS/)
50
51
  end
51
52
  end
52
53
  # TODO: should write some more specs here
@@ -2,12 +2,9 @@ require File.expand_path(File.dirname(__FILE__) + '/../../spec_helper')
2
2
 
3
3
  require 'ms/msrun/sha1'
4
4
 
5
- class Sha1Spec < MiniTest::Spec
6
- def initialize(*args)
7
- @files = %w(000.v1.mzXML 020.v2.0.readw.mzXML 000.v2.1.mzXML).map do |file|
8
- TESTFILES + "/opd1/#{file}"
9
- end
10
- super(*args)
5
+ describe 'sha1 creation from mzXML' do
6
+ @files = %w(000.v1.mzXML 020.v2.0.readw.mzXML 000.v2.1.mzXML).map do |file|
7
+ TESTFILES + "/opd1/#{file}"
11
8
  end
12
9
 
13
10
  ## NOTE: this does NOT match up to real files yet!
@@ -0,0 +1,25 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'nokogiri'
4
+
5
+ class MyDoc < Nokogiri::XML::SAX::Document
6
+ def initialize(io)
7
+ @io = io
8
+ end
9
+
10
+ def start_element( name, attributes = [])
11
+ puts "NAME: #{name}"
12
+ puts "POST: "
13
+ puts @io.pos
14
+ end
15
+
16
+ end
17
+
18
+ File.open("test3.xml") do |io|
19
+ parser = Nokogiri::XML::SAX::PushParser.new( MyDoc.new(io) )
20
+ io.each_line do |line|
21
+ parser << line
22
+ end
23
+ end
24
+
25
+ #xml = Nokogiri::XML.parse(IO.read("test3.xml"), nil, nil, Nokogiri::XML::ParseOptions::DEFAULT_XML | Nokogiri::XML::ParseOptions::NOBLANKS )
@@ -0,0 +1,5 @@
1
+ <node1>
2
+ <node2>
3
+ <node3>my text</node3>
4
+ </node2>
5
+ </node1>
@@ -0,0 +1,6 @@
1
+ <node1>
2
+ <node2>
3
+ <node3>my text</node3>
4
+ </node2>
5
+ </node1>
6
+ </node1>
@@ -0,0 +1,4 @@
1
+ <node1>
2
+ <node2>
3
+ <node3>my text</node3>
4
+ </node2>
@@ -0,0 +1,11 @@
1
+ <node1>
2
+ <node2>
3
+ <node3>my text</node3>
4
+ </node2>
5
+ </node1>
6
+ </node1>
7
+ <node1 id="3">
8
+ </node1>
9
+ <node1 id="4">
10
+ </node1>
11
+ </msrun>
@@ -0,0 +1,47 @@
1
+
2
+ #require 'axml'
3
+
4
+
5
+ correct = '<scan num="12">
6
+ <peaks>ABCD</peaks>
7
+ </scan>
8
+ '
9
+
10
+
11
+ short = '<scan num="12">
12
+ <peaks>ABCD</peaks>
13
+ '
14
+
15
+ long = '<scan num="12">
16
+ <peaks>ABCD</peaks>
17
+ </scan>
18
+ </scan>
19
+ '
20
+
21
+ require 'xml/libxml'
22
+
23
+ XML::Error.set_handler do |error|
24
+ puts "GOTCAH!"
25
+ #puts error.to_s
26
+ end
27
+
28
+ [correct, short, long].each do |str|
29
+ reader = XML::Reader.string str
30
+ x = reader.read
31
+ p x
32
+ end
33
+
34
+
35
+ =begin
36
+
37
+ x = AXML.parse(correct)
38
+ puts x.to_s
39
+ begin
40
+ y = AXML.parse(short)
41
+ rescue
42
+ puts "RESCUED"
43
+ puts y.to_s
44
+ end
45
+ #x = AXML.parse(long)
46
+ #puts x.to_s
47
+ =end