ms-msrun 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +3 -0
- data/.gitmodules +3 -0
- data/History +18 -0
- data/{README → README.rdoc} +0 -0
- data/Rakefile +93 -107
- data/VERSION +1 -0
- data/lib/lmat.rb +141 -39
- data/lib/ms/msrun/nokogiri.rb +1 -0
- data/lib/ms/msrun/search_dev_notes.txt +47 -0
- data/spec/lmat_spec.rb +87 -11
- data/spec/metadata/opd1/000.v1.mzXML.yml +3 -0
- data/spec/metadata/opd1/000.v2.1.mzXML.yml +3 -0
- data/spec/metadata/opd1/020.mzData.xml.yml +3 -0
- data/spec/metadata/opd1/020.v2.0.readw.mzXML.yml +3 -0
- data/spec/ms/msrun/hpricot.rb +38 -0
- data/spec/ms/msrun/index_spec.rb +12 -13
- data/spec/ms/msrun/search_spec.rb +5 -4
- data/spec/ms/msrun/sha1_spec.rb +3 -6
- data/spec/ms/msrun/test_parsing_xml_frags/parse_test.rb +25 -0
- data/spec/ms/msrun/test_parsing_xml_frags/test1.xml +5 -0
- data/spec/ms/msrun/test_parsing_xml_frags/test2.xml +6 -0
- data/spec/ms/msrun/test_parsing_xml_frags/test3.xml +4 -0
- data/spec/ms/msrun/test_parsing_xml_frags/test4.xml +11 -0
- data/spec/ms/msrun/test_parsing_xml_frags/test_failures.rb +47 -0
- data/spec/ms/msrun_bm.rb +22 -0
- data/spec/ms/msrun_spec.rb +90 -109
- data/spec/ms/scan_spec.rb +5 -6
- data/spec/ms/spectrum/compare_spec.rb +31 -28
- data/spec/ms/spectrum/filter_spec.rb +15 -13
- data/spec/spec_helper.rb +21 -0
- data/spec/testfiles/lmat/tmp1.lmat +0 -0
- data/spec/testfiles/lmat/tmp1.lmata +44 -0
- data/spec/testfiles/lmat/tmp2.lmata +11 -0
- data/spec/testfiles/opd1/000.v1.mzXML +418 -0
- data/spec/testfiles/opd1/000.v1.mzXML.key.yml +51 -0
- data/spec/testfiles/opd1/000.v2.1.mzXML +382 -0
- data/spec/testfiles/opd1/000.v2.1.mzXML.key.yml +51 -0
- data/spec/testfiles/opd1/020.mzData.xml +683 -0
- data/spec/testfiles/opd1/020.mzData.xml.key.yml +43 -0
- data/spec/testfiles/opd1/020.v2.0.readw.mzXML +382 -0
- data/spec/testfiles/opd1/020.v2.0.readw.mzXML.key.yml +46 -0
- metadata +85 -34
data/lib/ms/msrun/nokogiri.rb
CHANGED
@@ -0,0 +1,47 @@
|
|
1
|
+
|
2
|
+
(file stamp of Feb 15 2005)
|
3
|
+
|
4
|
+
$ ./extract_msn.exe
|
5
|
+
EXTRACT_MSN usage: extract_msn [options] [datafile]
|
6
|
+
options = -Fnum where num is an INT specifying the first scan
|
7
|
+
-Lnum where num is an INT specifying the last scan
|
8
|
+
-Bnum where num is a FLOAT specifying the bottom MW for datafile creation
|
9
|
+
-Tnum where num is a FLOAT specifying the top MW for datafile creation
|
10
|
+
-Mnum where num is a FLOAT specifying the precursor mass
|
11
|
+
tolerance for grouping (default=1.4)
|
12
|
+
-Snum where num is an INT specifying the number of allowed
|
13
|
+
different intermediate scans for grouping. (default=1)
|
14
|
+
-Cnum where num is an INT specifying the charge state to use
|
15
|
+
-Gnum where num is an INT specifying the minimum # of related
|
16
|
+
grouped scans needed for a .dta file (default=2)
|
17
|
+
-Inum where num is an INT specifying the minimum # of ions
|
18
|
+
needed for a .dta file (default=0)
|
19
|
+
-Rnum where num is a FLOAT specifying the minimum signal-to-noise value
|
20
|
+
needed for a peak to be written to a .dta file (default=3)
|
21
|
+
-rnum where num is an INT specifying the minimum number of major peaks
|
22
|
+
(peaks above S/N threshold) needed for a .dta file (default=5)
|
23
|
+
-Dstring where string is a path name
|
24
|
+
-Ystring where string is a subsequence
|
25
|
+
-Z Controls whether the zta files are written
|
26
|
+
-K Controls whether the charge calculations are performed
|
27
|
+
-Ustring where string is the path of a template file
|
28
|
+
[Default name is chgstate.tpl]
|
29
|
+
-Acontrolstring containing any of the options
|
30
|
+
T: use template F: use discrete Fourier transform
|
31
|
+
E: use Eng's algorithm H: use scan header
|
32
|
+
M: use MSMS count
|
33
|
+
O: override header charge state
|
34
|
+
S: create summary file L: create log file
|
35
|
+
D: create both files C: create MSMS count file
|
36
|
+
A: find CS even for nonzero headers
|
37
|
+
tfehm: include algorithm output in summary file even if not called
|
38
|
+
[NOTE: This version of the program has a default string of -AHTFEMAOSC,
|
39
|
+
but if -A option is used all desired parameters must be specified]
|
40
|
+
-H print this information
|
41
|
+
|
42
|
+
If lcq_dta.exclude present, will ignore list of ions in exclude list.
|
43
|
+
Format of lcq_dta.exclude: mass tolerance on 1st line
|
44
|
+
precursor masses on subsequent lines
|
45
|
+
|
46
|
+
|
47
|
+
|
data/spec/lmat_spec.rb
CHANGED
@@ -2,28 +2,104 @@ require File.expand_path( File.dirname(__FILE__) + '/spec_helper' )
|
|
2
2
|
|
3
3
|
require 'lmat'
|
4
4
|
|
5
|
-
|
5
|
+
describe 'an lmat' do
|
6
6
|
|
7
|
-
|
8
|
-
|
9
|
-
|
7
|
+
@klass = Lmat
|
8
|
+
@lmatfile = TESTFILES + "/lmat/tmp1.lmat"
|
9
|
+
@lmatafile = TESTFILES + "/lmat/tmp1.lmata"
|
10
|
+
@lmatafile_small = TESTFILES + "/lmat/tmp2.lmata"
|
11
|
+
|
12
|
+
before do
|
13
|
+
@lmat = Lmat.new
|
10
14
|
end
|
11
15
|
|
12
16
|
it 'can be created with no arguments' do
|
13
17
|
obj1 = @klass.new
|
14
|
-
obj1.class.
|
18
|
+
obj1.class.is @klass
|
15
19
|
end
|
16
20
|
|
17
|
-
|
21
|
+
it 'can be created with arrays' do
|
18
22
|
obj = @klass[[1,2,3],[4,5,6]]
|
19
|
-
obj[0,0].
|
20
|
-
obj[1
|
21
|
-
obj[1,
|
23
|
+
obj[0,0].is 1
|
24
|
+
obj[2,1].is 6
|
25
|
+
obj[1,0].is 2
|
26
|
+
obj.mvec.enums [0,1]
|
27
|
+
obj.nvec.enums [0,1,2]
|
22
28
|
end
|
23
29
|
|
24
|
-
|
30
|
+
it 'can find the max value' do
|
25
31
|
obj = @klass[[1,2,3],[1,8,3]]
|
26
|
-
obj.max.
|
32
|
+
obj.max.is 8
|
33
|
+
end
|
34
|
+
|
35
|
+
it 'can be read from lmat file' do
|
36
|
+
x = Lmat.new
|
37
|
+
x.from_lmat(@lmatfile)
|
38
|
+
x.nvec.size.is 30
|
39
|
+
x.mvec.size.is 40
|
40
|
+
x.mat.size.is 1200
|
41
|
+
x.mat.shape.is [30,40]
|
42
|
+
end
|
43
|
+
|
44
|
+
it 'can write an lmat file' do
|
45
|
+
begin
|
46
|
+
output = @lmatfile + ".TMP"
|
47
|
+
@lmat.from_lmat(@lmatfile)
|
48
|
+
@lmat.write(output)
|
49
|
+
IO.read(output).is IO.read(@lmatfile)
|
50
|
+
ensure
|
51
|
+
File.unlink(output) if File.exist?(output)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
it 'can be read from an lmata file' do
|
56
|
+
x = Lmat.new.from_lmata(@lmatafile)
|
57
|
+
x.nvec.size.is 30
|
58
|
+
x.mvec.size.is 40
|
59
|
+
x.mat.size.is 1200
|
60
|
+
x.mat.shape.is [30,40]
|
61
|
+
end
|
62
|
+
|
63
|
+
it 'can print an lmata file' do
|
64
|
+
begin
|
65
|
+
output = @lmatafile_small + ".TMP"
|
66
|
+
@lmat.from_lmata(@lmatafile_small)
|
67
|
+
@lmat.print(output)
|
68
|
+
ars = [output, @lmatafile_small].map do |file|
|
69
|
+
IO.read(file).chomp.gsub("\n", " ").split(/\s+/).map {|v| v.to_f }
|
70
|
+
end
|
71
|
+
ars.first.enums ars.last
|
72
|
+
ensure
|
73
|
+
File.unlink(output) if File.exist?(output)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
xit 'can warp data columns' do
|
78
|
+
@lmat.from_lmata(@lmatafile_small)
|
79
|
+
puts "Warp before"
|
80
|
+
p @lmat
|
81
|
+
deep_copy = true
|
82
|
+
@lmat.plot("before.png")
|
83
|
+
new_lmat = @lmat.warp_cols(NArray.float(7).indgen(12).collect {|v| v + 2.5 }, deep_copy)
|
84
|
+
new_lmat.isa Lmat
|
85
|
+
new_lmat.plot("after.png")
|
86
|
+
puts "Warp after"
|
87
|
+
p new_lmat
|
88
|
+
## TODO: NEEEED tests HERE
|
89
|
+
end
|
90
|
+
|
91
|
+
begin
|
92
|
+
require 'gnuplot'
|
93
|
+
it 'can plot' do
|
94
|
+
file = "mypng.png"
|
95
|
+
@lmat.from_lmata(@lmatafile_small)
|
96
|
+
@lmat.plot(file)
|
97
|
+
@lmat.isa Lmat
|
98
|
+
ok File.exist?(file)
|
99
|
+
File.unlink(file) if File.exist?(file)
|
100
|
+
end
|
101
|
+
rescue
|
102
|
+
puts "SKIPPING: plotting (since gnuplot gem not found)"
|
27
103
|
end
|
28
104
|
|
29
105
|
end
|
@@ -0,0 +1,3 @@
|
|
1
|
+
source: John Prince
|
2
|
+
description: |
|
3
|
+
Run 000 of opd1. This is mzXML version 1 output. Only the first twenty scans have been retained. Because of this modification, at least the indexOffset and sha1 tags are incorrect. Other values have been modified to reflect the twenty scans.
|
@@ -0,0 +1,3 @@
|
|
1
|
+
source: John Prince
|
2
|
+
description: |
|
3
|
+
Run 000 of opd1. This is mzXML version 2.1 output. Only the first twenty scans have been retained. Because of this modification, at least the indexOffset and sha1 tags are incorrect. Other values have been modified to reflect the twenty scans.
|
@@ -0,0 +1,3 @@
|
|
1
|
+
source: John Prince
|
2
|
+
description: |
|
3
|
+
mzData version 1.05 Xcalibur/Bioworks output of opd1, 020.RAW. Note the output is buggy in its SpectrumRef and SpectrumList 'count' attribute. The file has been cut to have only the first twenty scans. The SpectrumList 'count' attribute is off, but it was wrong already. Other values have been modified to reflect the twenty scans.
|
@@ -0,0 +1,3 @@
|
|
1
|
+
source: John Prince
|
2
|
+
description: |
|
3
|
+
mzXML version 2.0 readw.exe output of opd1, 020.RAW. The file has been cut to have only the first twenty scans. Because of this modification, at least the indexOffset and sha1 tags are incorrect. Other values have been modified to reflect the twenty scans.
|
@@ -0,0 +1,38 @@
|
|
1
|
+
require File.expand_path( File.dirname(__FILE__) + '/../../spec_helper' )
|
2
|
+
|
3
|
+
require 'ms/msrun/hpricot/mzxml'
|
4
|
+
|
5
|
+
class HpricotSpec < MiniTest::Spec
|
6
|
+
|
7
|
+
before do
|
8
|
+
@scan_xml = '<scan num="19"
|
9
|
+
msLevel="2"
|
10
|
+
peaksCount="9"
|
11
|
+
polarity="+"
|
12
|
+
scanType="Full"
|
13
|
+
retentionTime="PT25.23S"
|
14
|
+
collisionEnergy="35"
|
15
|
+
lowMz="390"
|
16
|
+
highMz="2000"
|
17
|
+
basePeakMz="1621.51"
|
18
|
+
basePeakIntensity="17748"
|
19
|
+
totIonCurrent="54989">
|
20
|
+
<precursorMz precursorIntensity="720317">1460.54834</precursorMz>
|
21
|
+
<peaks precision="32"
|
22
|
+
byteOrder="network"
|
23
|
+
pairOrder="m/z-int">RE84xESwAABEYq6wRNLAAESW7sRGFigARJ/nyEVuYABEo+vkRMgAAESqV85FjhgARLQ3FEXvmABEuEH6RdfoAETKsCpGiqgA</peaks>
|
24
|
+
</scan>'
|
25
|
+
@scan_xml_short = @scan_xml.split("\n")[0...-1].join("\n")
|
26
|
+
@scan_xml_long = @scan_xml + "\n</scan>"
|
27
|
+
@basic_info = { :num => 19, :ms_level => 2, :time => 25.23 }
|
28
|
+
@prec_info = {:intensity => 720317, :mz => 1460.54834 }
|
29
|
+
@spectrum = nil # for now
|
30
|
+
end
|
31
|
+
|
32
|
+
|
33
|
+
it 'reads normal xml' do
|
34
|
+
Ms::Msrun::Hpricot::Mzxml.parse_scan
|
35
|
+
end
|
36
|
+
|
37
|
+
|
38
|
+
end
|
data/spec/ms/msrun/index_spec.rb
CHANGED
@@ -3,7 +3,10 @@ require File.expand_path(File.dirname(__FILE__) + '/../../spec_helper')
|
|
3
3
|
require 'rexml/document'
|
4
4
|
require 'ms/msrun/index'
|
5
5
|
|
6
|
-
|
6
|
+
|
7
|
+
describe 'an Ms::Msrun::Index' do
|
8
|
+
|
9
|
+
@files = %w(000.v1.mzXML 000.v2.1.mzXML 020.v2.0.readw.mzXML).map {|v| TESTFILES + '/opd1/' + v }
|
7
10
|
|
8
11
|
before do
|
9
12
|
@indices = @files.map do |file|
|
@@ -11,25 +14,21 @@ class MsMsrunIndexSpec < MiniTest::Spec
|
|
11
14
|
end
|
12
15
|
end
|
13
16
|
|
14
|
-
def initialize(*args)
|
15
|
-
@files = %w(000.v1.mzXML 000.v2.1.mzXML 020.v2.0.readw.mzXML).map {|v| TESTFILES + '/opd1/' + v }
|
16
|
-
super *args
|
17
|
-
end
|
18
17
|
|
19
18
|
it 'is indexed by scan num and gives doublets of byte and length' do
|
20
19
|
@files.zip(@indices) do |file, index|
|
21
20
|
index.each_with_index do |pair,i|
|
22
21
|
string = IO.read(file, pair.last, pair.first).strip
|
23
|
-
string[0,5].
|
24
|
-
string[-7..-1].
|
25
|
-
string.
|
22
|
+
string[0,5].is '<scan'
|
23
|
+
string[-7..-1].should.match %r{</scan>|/peaks>|/msRun>}
|
24
|
+
string.should.match %r{num="#{i+1}"}
|
26
25
|
end
|
27
26
|
end
|
28
27
|
end
|
29
28
|
|
30
29
|
it 'gives scan_nums' do
|
31
30
|
@indices.each do |index|
|
32
|
-
index.scan_nums.
|
31
|
+
index.scan_nums.is((1..20).to_a)
|
33
32
|
end
|
34
33
|
end
|
35
34
|
|
@@ -37,7 +36,7 @@ class MsMsrunIndexSpec < MiniTest::Spec
|
|
37
36
|
@indices.each do |index|
|
38
37
|
scan_nums = index.scan_nums
|
39
38
|
index.each_with_index do |doublet,i|
|
40
|
-
index[scan_nums[i]].
|
39
|
+
index[scan_nums[i]].is doublet
|
41
40
|
end
|
42
41
|
end
|
43
42
|
end
|
@@ -45,15 +44,15 @@ class MsMsrunIndexSpec < MiniTest::Spec
|
|
45
44
|
it 'gives header length' do
|
46
45
|
header_lengths = [824, 1138, 1147]
|
47
46
|
@indices.zip(@files, header_lengths) do |index, file, header_length|
|
48
|
-
index.header_length.
|
47
|
+
index.header_length.is header_length
|
49
48
|
end
|
50
49
|
end
|
51
50
|
|
52
51
|
it 'gives a scan for #first and #last' do
|
53
52
|
# TODO: fill in with actual data too
|
54
53
|
@indices.each do |index|
|
55
|
-
index.first.
|
56
|
-
index.last.
|
54
|
+
ok !index.first.nil?
|
55
|
+
ok !index.last.nil?
|
57
56
|
end
|
58
57
|
end
|
59
58
|
|
@@ -2,7 +2,8 @@ require File.expand_path(File.dirname(__FILE__) + '/../../spec_helper')
|
|
2
2
|
|
3
3
|
require 'ms/msrun'
|
4
4
|
|
5
|
-
|
5
|
+
|
6
|
+
describe 'mzxml to search formats' do
|
6
7
|
|
7
8
|
it 'creates mgf formatted files' do
|
8
9
|
@file = TESTFILES + '/opd1/000.v1.mzXML'
|
@@ -27,7 +28,7 @@ class SearchSpec < MiniTest::Spec
|
|
27
28
|
]
|
28
29
|
Ms::Msrun.open(@file) do |ms|
|
29
30
|
no_scans.each do |k,v|
|
30
|
-
ms.to_mgf( k => v).
|
31
|
+
ms.to_mgf( k => v).is ""
|
31
32
|
end
|
32
33
|
end
|
33
34
|
|
@@ -45,8 +46,8 @@ class SearchSpec < MiniTest::Spec
|
|
45
46
|
Ms::Msrun.open(@file) do |ms|
|
46
47
|
some_scans.each do |k,v|
|
47
48
|
reply = ms.to_mgf(k => v)
|
48
|
-
reply.
|
49
|
-
reply.
|
49
|
+
reply.should.match(/BEGIN.IONS/)
|
50
|
+
reply.should.match(/END.IONS/)
|
50
51
|
end
|
51
52
|
end
|
52
53
|
# TODO: should write some more specs here
|
data/spec/ms/msrun/sha1_spec.rb
CHANGED
@@ -2,12 +2,9 @@ require File.expand_path(File.dirname(__FILE__) + '/../../spec_helper')
|
|
2
2
|
|
3
3
|
require 'ms/msrun/sha1'
|
4
4
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
TESTFILES + "/opd1/#{file}"
|
9
|
-
end
|
10
|
-
super(*args)
|
5
|
+
describe 'sha1 creation from mzXML' do
|
6
|
+
@files = %w(000.v1.mzXML 020.v2.0.readw.mzXML 000.v2.1.mzXML).map do |file|
|
7
|
+
TESTFILES + "/opd1/#{file}"
|
11
8
|
end
|
12
9
|
|
13
10
|
## NOTE: this does NOT match up to real files yet!
|
@@ -0,0 +1,25 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
require 'nokogiri'
|
4
|
+
|
5
|
+
class MyDoc < Nokogiri::XML::SAX::Document
|
6
|
+
def initialize(io)
|
7
|
+
@io = io
|
8
|
+
end
|
9
|
+
|
10
|
+
def start_element( name, attributes = [])
|
11
|
+
puts "NAME: #{name}"
|
12
|
+
puts "POST: "
|
13
|
+
puts @io.pos
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
17
|
+
|
18
|
+
File.open("test3.xml") do |io|
|
19
|
+
parser = Nokogiri::XML::SAX::PushParser.new( MyDoc.new(io) )
|
20
|
+
io.each_line do |line|
|
21
|
+
parser << line
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
#xml = Nokogiri::XML.parse(IO.read("test3.xml"), nil, nil, Nokogiri::XML::ParseOptions::DEFAULT_XML | Nokogiri::XML::ParseOptions::NOBLANKS )
|
@@ -0,0 +1,47 @@
|
|
1
|
+
|
2
|
+
#require 'axml'
|
3
|
+
|
4
|
+
|
5
|
+
correct = '<scan num="12">
|
6
|
+
<peaks>ABCD</peaks>
|
7
|
+
</scan>
|
8
|
+
'
|
9
|
+
|
10
|
+
|
11
|
+
short = '<scan num="12">
|
12
|
+
<peaks>ABCD</peaks>
|
13
|
+
'
|
14
|
+
|
15
|
+
long = '<scan num="12">
|
16
|
+
<peaks>ABCD</peaks>
|
17
|
+
</scan>
|
18
|
+
</scan>
|
19
|
+
'
|
20
|
+
|
21
|
+
require 'xml/libxml'
|
22
|
+
|
23
|
+
XML::Error.set_handler do |error|
|
24
|
+
puts "GOTCAH!"
|
25
|
+
#puts error.to_s
|
26
|
+
end
|
27
|
+
|
28
|
+
[correct, short, long].each do |str|
|
29
|
+
reader = XML::Reader.string str
|
30
|
+
x = reader.read
|
31
|
+
p x
|
32
|
+
end
|
33
|
+
|
34
|
+
|
35
|
+
=begin
|
36
|
+
|
37
|
+
x = AXML.parse(correct)
|
38
|
+
puts x.to_s
|
39
|
+
begin
|
40
|
+
y = AXML.parse(short)
|
41
|
+
rescue
|
42
|
+
puts "RESCUED"
|
43
|
+
puts y.to_s
|
44
|
+
end
|
45
|
+
#x = AXML.parse(long)
|
46
|
+
#puts x.to_s
|
47
|
+
=end
|