ms-msrun 0.0.1 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,3 +1,4 @@
1
+ require 'ms/mass'
1
2
 
2
3
  module Ms
3
4
  class Msrun
@@ -13,13 +14,12 @@ module Ms
13
14
 
14
15
  module Search
15
16
 
16
- PROTON_MASS = 1.007276
17
-
18
- # returns a string, or writes the string to file if given an out_filename
19
- # if given a filename or IO object, returns the number of spectra
20
- # written
21
- def to_mgf(file_or_io=nil, opts={})
17
+ # returns a string unless :output given (may be a String (filename) or a
18
+ # writeable IO object in which case the data is written to file or io
19
+ # and the number of spectra written is returned
20
+ def to_mgf(opts={})
22
21
  opts = {
22
+ :output => nil, # an output file or io object
23
23
  :bottom_mh => 0.0,
24
24
  :top_mh => nil,
25
25
  :ms_levels => (2..-1), # range or intger, -1 at end will be substituted for last level
@@ -30,8 +30,10 @@ module Ms
30
30
  :prec_int_precision => 2,
31
31
  :frag_mz_precision => 5,
32
32
  :frag_int_precision => 1,
33
+ :charge_states_for_unknowns => [2,3],
34
+ :determine_plus_ones => false,
33
35
  }.merge(opts)
34
- (_first_scan, _last_scan, _bottom_mh, _top_mh, _ms_levels, _min_peaks, _charge_states, _prec_mz_precision, _prec_int_precision, _frag_mz_precision, _frag_int_precision) = opts.values_at(:first_scan, :last_scan, :bottom_mh, :top_mh, :ms_levels, :min_peaks, :charge_states, :prec_mz_precision, :prec_int_precision, :frag_mz_precision, :frag_int_precision)
36
+ (_first_scan, _last_scan, _bottom_mh, _top_mh, _ms_levels, _min_peaks, _charge_states_for_unknowns, _prec_mz_precision, _prec_int_precision, _frag_mz_precision, _frag_int_precision, _determine_plus_ones) = opts.values_at(:first_scan, :last_scan, :bottom_mh, :top_mh, :ms_levels, :min_peaks, :charge_states_for_unknowns, :prec_mz_precision, :prec_int_precision, :frag_mz_precision, :frag_int_precision, :determine_plus_ones)
35
37
 
36
38
  sep = ' '
37
39
 
@@ -40,7 +42,7 @@ module Ms
40
42
  end
41
43
 
42
44
  if _last_scan.nil? or _last_scan == -1
43
- _last_scan = scans.last.num
45
+ _last_scan = self.scan_nums.last
44
46
  end
45
47
 
46
48
  if !_ms_levels.is_a?(Integer) && _ms_levels.last == -1
@@ -50,26 +52,30 @@ module Ms
50
52
  prec_string = "PEPMASS=%0.#{_prec_mz_precision}f %0.#{_prec_int_precision}f\n"
51
53
  frag_string = "%0.#{_frag_mz_precision}f%s%0.#{_frag_int_precision}f\n"
52
54
 
53
- any_input(file_or_io) do |out, out_type|
54
- scans.each do |scan|
55
+ any_output(opts[:output]) do |out, out_type|
56
+ each_scan(:ms_level => _ms_levels) do |scan|
55
57
  sn = scan.num
56
58
 
57
- next unless _ms_levels === scan.ms_level
58
59
  next unless sn >= _first_scan and sn <= _last_scan
59
60
  next unless scan.num_peaks >= _min_peaks
60
61
 
61
- # tic under precursor > 95% and true = save the spectrum info
62
- scan.spectrum.save!
63
- if scan.plus1?(0.95)
64
- _charge_states = [1]
62
+
63
+ if _determine_plus_ones
64
+ # tic under precursor > 95% and true = save the spectrum info
65
+ if scan.plus1?(0.95)
66
+ _charge_states = [1]
67
+ end
65
68
  end
66
69
 
67
- # (scanHeader.precursorMZ * iCharge) - (iCharge - 1)*dChargeMass;
70
+ chrg_sts = scan.precursor.charge_states
71
+ if chrg_sts.nil? || !chrg_sts.first.is_a?(Integer)
72
+ chrg_sts = _charge_states_for_unknowns
73
+ end
68
74
 
69
75
  pmz = scan.precursor && scan.precursor.mz
70
76
 
71
- _charge_states.each do |z|
72
- mh = (pmz * z) - (z - 1)*PROTON_MASS
77
+ chrg_sts.each do |z|
78
+ mh = (pmz * z) - (z - 1)*Ms::Mass::PROTON
73
79
  next unless (mh >= _bottom_mh)
74
80
  next unless (mh <= _top_mh) if _top_mh
75
81
  out.puts "BEGIN IONS"
@@ -81,8 +87,6 @@ module Ms
81
87
  end
82
88
  out.puts "END IONS\n\n"
83
89
  end
84
-
85
- scan.spectrum.flush!
86
90
  end
87
91
 
88
92
  if out_type == :string_io
@@ -96,7 +100,7 @@ module Ms
96
100
 
97
101
 
98
102
  # yields an IO object and the type input (:io, :filename, :string_io)
99
- def any_input(arg, &block)
103
+ def any_output(arg, &block)
100
104
  # this is pretty ugly, can we clean up?
101
105
  if arg.is_a? IO # an IO object passed in
102
106
  block.call(arg, :io)
@@ -0,0 +1,36 @@
1
+ require 'digest/sha1'
2
+
3
+ module Ms
4
+ class Msrun
5
+ # the mzXML digest is from the start of the document to the end of the
6
+ # first sha1 tag: '...<sha1>'
7
+ module Sha1
8
+ module_function
9
+
10
+ # returns [calculated digest, recorded digest] for an mzXML file
11
+ def digest_mzxml_file(file)
12
+ recorded_digest = nil
13
+
14
+ incr_digest = ""
15
+ #incr_digest = Digest::SHA1.new
16
+ endpos = nil
17
+ File.open(file, 'rb') do |io|
18
+ while line = io.gets
19
+ if line.include?("<sha1>")
20
+ incr_digest << line[0, line.index("<sha1>") + 6]
21
+ if line =~ %r{<sha1>(.*)</sha1>}
22
+ recorded_digest = $1.dup
23
+ break
24
+ else
25
+ incr_digest << line
26
+ end
27
+ end
28
+ end
29
+ end
30
+
31
+ [Digest::SHA1.hexdigest(incr_digest), recorded_digest]
32
+ #[incr_digest.hexdigest, recorded_digest]
33
+ end
34
+ end
35
+ end
36
+ end
data/lib/ms/mzxml.rb ADDED
@@ -0,0 +1,12 @@
1
+
2
+ module Ms
3
+ module Mzxml
4
+ module_function
5
+ def parent_basename_and_dir(xml_value)
6
+ fn = xml_value.gsub(/\\/, '/')
7
+ dn = File.dirname(fn)
8
+ dn = nil if dn == '.' && !fn.include?('/')
9
+ [File.basename(fn), dn]
10
+ end
11
+ end
12
+ end
data/lib/ms/precursor.rb CHANGED
@@ -1,8 +1,9 @@
1
1
 
2
2
  module Ms
3
3
  # charge_states are the possible charge states of the precursor
4
- # parent references a scan
5
- PrecursorAtts = [:mz, :intensity, :parent, :charge_states]
4
+ # parent references a scan and scan references the scan the precursor
5
+ # belongs to (i.e., the scan holding the precursor information).
6
+ PrecursorAtts = [:mz, :intensity, :parent, :charge_states, :scan]
6
7
  end
7
8
 
8
9
  Ms::Precursor = Struct.new(*Ms::PrecursorAtts)
@@ -0,0 +1,28 @@
1
+ require 'ms/precursor'
2
+
3
+ module Ms
4
+ class Precursor
5
+ class LazyParent
6
+ undef :parent
7
+
8
+ def parent
9
+ #########################
10
+ #########################
11
+ #########################
12
+ #########################
13
+ #########################
14
+ #########################
15
+
16
+ WorkingHERE!!
17
+
18
+ #########################
19
+ #########################
20
+ #########################
21
+ #########################
22
+ #########################
23
+ scan.num
24
+ end
25
+ end
26
+ end
27
+ end
28
+
data/lib/ms/scan.rb CHANGED
@@ -2,16 +2,14 @@ require 'ms/precursor'
2
2
 
3
3
  module Ms ; end
4
4
 
5
- # 0 1 2 3 4 5 6 7
6
- # 8
5
+ # 0 1 2 3 4 5 6 7 8
7
6
  MsScanAtts = [:num, :ms_level, :time, :start_mz, :end_mz, :num_peaks, :tic, :precursor, :spectrum]
8
7
 
9
- Ms::Scan = Struct.new(*MsScanAtts)
8
+ Ms::Scan = Struct.new(*MsScanAtts) do
10
9
 
11
10
  # time in seconds
12
11
  # everything else in float/int
13
12
 
14
- class Ms::Scan
15
13
 
16
14
  def to_s
17
15
  "<Scan num=#{num} ms_level=#{ms_level} time=#{time}>"
@@ -63,31 +61,6 @@ class Ms::Scan
63
61
  arr.join(" ")
64
62
  end
65
63
 
66
- # adds the attribute parent to each scan with a parent
67
- # (level 1 = no parent; level 2 = prev level 1, etc.
68
- def self.add_parent_scan(scans)
69
- prev_scan = nil
70
- parent_stack = [nil]
71
- ## we want to set the level to be the first mslevel we come to
72
- prev_level = 1
73
- scans.each do |scan|
74
- if scan then prev_level = scan.ms_level; break; end
75
- end
76
- scans.each do |scan|
77
- next unless scan ## the first one is nil, (others?)
78
- level = scan.ms_level
79
- if prev_level < level
80
- parent_stack.unshift prev_scan
81
- end
82
- if prev_level > level
83
- (prev_level - level).times do parent_stack.shift end
84
- end
85
- scan.parent = parent_stack.first
86
- prev_level = level
87
- prev_scan = scan
88
- end
89
- end
90
-
91
64
  end
92
65
 
93
66
 
@@ -17,14 +17,54 @@ end
17
17
  module Ms
18
18
  class Spectrum
19
19
  module Compare
20
+
21
+ # percent ion current score: the percent of total ion current of the
22
+ # calling object that can be explained by other. Ranges from 0-100
23
+ def pic_score(other, opts={})
24
+ opts= {:normalize => true}.merge(opts)
25
+ (a_spec, b_spec) =
26
+ if opts[:normalize] == true
27
+ [self.normalize, other.normalize]
28
+ else
29
+ [self, other]
30
+ end
31
+ overlapping_current = 0.0
32
+ a_spec.compare(b_spec, opts.merge( {:yield_diff => false} )) do |sint, oint|
33
+ overlapping_current += [oint, sint].min
34
+ end
35
+ 100.0 * (overlapping_current / a_spec.intensities.sum)
36
+ end
37
+
20
38
 
39
+ # Zhang Analytical Chemistry. 2004 76(14)
40
+ # "the ratio between the sum of geometric mean and sum of arithmetic mean
41
+ # of all ions, after the two spectra are normalized to the same total
42
+ # ion intensity"
43
+ # The score will be 1 for identical spectra
21
44
  # (Σ (Ii*Ij)^½) / (ΣIi * ΣIj)^½
45
+ #
46
+ # options:
47
+ #
48
+ # :normalize => true | false (d: true) normalizes intensities first
49
+ #
50
+ # relevant options given for 'compare' will be passed through:
51
+ #
52
+ # :radius
53
+ # :type
22
54
  def sim_score(other, opts={})
55
+ opts = {:normalize => true}.merge(opts)
56
+ (a_spec, b_spec) =
57
+ if opts[:normalize] == true
58
+ [self.normalize, other.normalize]
59
+ else
60
+ [self, other]
61
+ end
23
62
  numer = 0.0
24
- compare(other, {:yield_diff => false}) do |sint, oint|
63
+
64
+ a_spec.compare(b_spec, opts.merge( {:yield_diff => false} )) do |sint, oint|
25
65
  numer += Math.sqrt(sint * oint)
26
66
  end
27
- numer / Math.sqrt( self.ints.sum * other.ints.sum )
67
+ numer / Math.sqrt( a_spec.intensities.sum * b_spec.intensities.sum )
28
68
  end
29
69
 
30
70
  # opts[:type] == :mutual_best
@@ -37,7 +37,7 @@ module Ms
37
37
  ret_ints = []
38
38
  ret_mzs = include.map {|int, mz| [mz, int] }.sort.map {|mz,int| ret_ints << int ; mz }
39
39
 
40
- return Spectrum.new(ret_mzs, ret_ints)
40
+ return Spectrum.new([ret_mzs, ret_ints])
41
41
  end
42
42
 
43
43
  end
@@ -0,0 +1,60 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../../spec_helper')
2
+
3
+ require 'rexml/document'
4
+ require 'ms/msrun/index'
5
+
6
+ class MsMsrunIndexSpec < MiniTest::Spec
7
+
8
+ before do
9
+ @indices = @files.map do |file|
10
+ indices = Ms::Msrun::Index.new(file)
11
+ end
12
+ end
13
+
14
+ def initialize(*args)
15
+ @files = %w(000.v1.mzXML 000.v2.1.mzXML 020.v2.0.readw.mzXML).map {|v| TESTFILES + '/opd1/' + v }
16
+ super *args
17
+ end
18
+
19
+ it 'is indexed by scan num and gives doublets of byte and length' do
20
+ @files.zip(@indices) do |file, index|
21
+ index.each_with_index do |pair,i|
22
+ string = IO.read(file, pair.last, pair.first).strip
23
+ string[0,5].must_equal '<scan'
24
+ string[-7..-1].must_match %r{</scan>|/peaks>|/msRun>}
25
+ string.must_match %r{num="#{i+1}"}
26
+ end
27
+ end
28
+ end
29
+
30
+ it 'gives scan_nums' do
31
+ @indices.each do |index|
32
+ index.scan_nums.must_equal((1..20).to_a)
33
+ end
34
+ end
35
+
36
+ it 'is enumerable' do
37
+ @indices.each do |index|
38
+ scan_nums = index.scan_nums
39
+ index.each_with_index do |doublet,i|
40
+ index[scan_nums[i]].must_equal doublet
41
+ end
42
+ end
43
+ end
44
+
45
+ it 'gives header length' do
46
+ header_lengths = [824, 1138, 1147]
47
+ @indices.zip(@files, header_lengths) do |index, file, header_length|
48
+ index.header_length.must_equal header_length
49
+ end
50
+ end
51
+
52
+ it 'gives a scan for #first and #last' do
53
+ # TODO: fill in with actual data too
54
+ @indices.each do |index|
55
+ index.first.wont_equal nil
56
+ index.last.wont_equal nil
57
+ end
58
+ end
59
+
60
+ end
@@ -0,0 +1,78 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../../spec_helper')
2
+
3
+ require 'ms/msrun'
4
+
5
+ Scan1 = '<scan num="4"
6
+ msLevel="2"
7
+ peaksCount="74"
8
+ polarity="+"
9
+ scanType="Full"
10
+ retentionTime="PT3.64S"
11
+ collisionEnergy="35"
12
+ lowMz="325"
13
+ highMz="2000"
14
+ basePeakMz="1090.05"
15
+ basePeakIntensity="789939"
16
+ totIonCurrent="2.27206e+006">
17
+ <precursorMz precursorIntensity="1.52022e+006">1222.033203</precursorMz>
18
+ <peaks precision="32"
19
+ byteOrder="network"
20
+ pairOrder="m/z-int">Q7Rw2EUK8ABDuWjORQSgAEO5+NhFHLAAQ8gNTEU10ABD2QKcRLCAAEPcdCJFxBAAQ92bFkZzPABEAIucRSbQAEQJuBJFTyAARAqM7EX9+ABECsPMRT7QAEQPyWRF1LgARBoLhETLoABEHjlsRZQwAEQefjRE3yAARCERUkYvuABEIYiKRMMAAEQh01xFifAARCNEKEYpuABEI3kcRWrgAEQjvPhFHRAARCZm7EZtNABEJpXwRnyMAEQmy+pF4QAARCb/RkWfIABEKY28RZQIAEQ6gGBFyHAARDrIYEVEsABEPLuARXJgAEQ9RdREQ8AARD2BIEaV7ABEP7tcRdzoAERChMxHVPcAREK2gEZQAABEQwDkRa/gAEREyLxE3UAARFDwhEXwKABEUT7uRTPQAERTbZZFhfAARFQQvEaK9gBEVOfcRaDQAERWgBJGY5AARFh4ukcWmgBEWMAqRppEAERZgzhF+SgARFsBakXOKABEW4IiR8GBAERbw5pGUvwARFwJcEUooABEXat2RmUoAERegQRHibgARF7FmkU2AABEY6z2RgBIAERvwkxF/pgARHJ9YEYXEABEcrySRgFkAER0gDJINYsARHTFKEeJloBEdPgURoz8AER3hVhIlXggRHfIAEd2YgBEd/acRuh2AER4IiI/gAAARIW+tEVbkABEhdH8RnqcAESIQXhJQNswRIhimEecEgBEiH0cRvKMAESInkBGguYARIp9kEWm0ABEkUz6RdCQAESR//BFH5AARJQhYkYGbABEmxG2RjnYAA==</peaks>
21
+ </scan>
22
+ </scan>
23
+ '
24
+
25
+ Scan2 = '<scan num="4"
26
+ msLevel="2"
27
+ peaksCount="74"
28
+ polarity="+"
29
+ scanType="Full"
30
+ retentionTime="PT3.64S"
31
+ collisionEnergy="35"
32
+ lowMz="325"
33
+ highMz="2000"
34
+ basePeakMz="1090.05"
35
+ basePeakIntensity="789939"
36
+ totIonCurrent="2.27206e+006">
37
+ <precursorMz precursorIntensity="1.52022e+006">1222.033203</precursorMz>
38
+ <peaks precision="32"
39
+ byteOrder="network"
40
+ pairOrder="m/z-int">Q7Rw2EUK8ABDuWjORQSgAEO5+NhFHLAAQ8gNTEU10ABD2QKcRLCAAEPcdCJFxBAAQ92bFkZzPABEAIucRSbQAEQJuBJFTyAARAqM7EX9+ABECsPMRT7QAEQPyWRF1LgARBoLhETLoABEHjlsRZQwAEQefjRE3yAARCERUkYvuABEIYiKRMMAAEQh01xFifAARCNEKEYpuABEI3kcRWrgAEQjvPhFHRAARCZm7EZtNABEJpXwRnyMAEQmy+pF4QAARCb/RkWfIABEKY28RZQIAEQ6gGBFyHAARDrIYEVEsABEPLuARXJgAEQ9RdREQ8AARD2BIEaV7ABEP7tcRdzoAERChMxHVPcAREK2gEZQAABEQwDkRa/gAEREyLxE3UAARFDwhEXwKABEUT7uRTPQAERTbZZFhfAARFQQvEaK9gBEVOfcRaDQAERWgBJGY5AARFh4ukcWmgBEWMAqRppEAERZgzhF+SgARFsBakXOKABEW4IiR8GBAERbw5pGUvwARFwJcEUooABEXat2RmUoAERegQRHibgARF7FmkU2AABEY6z2RgBIAERvwkxF/pgARHJ9YEYXEABEcrySRgFkAER0gDJINYsARHTFKEeJloBEdPgURoz8AER3hVhIlXggRHfIAEd2YgBEd/acRuh2AER4IiI/gAAARIW+tEVbkABEhdH8RnqcAESIQXhJQNswRIhimEecEgBEiH0cRvKMAESInkBGguYARIp9kEWm0ABEkUz6RdCQAESR//BFH5AARJQhYkYGbABEmxG2RjnYAA==</peaks>
41
+ '
42
+
43
+ Scan3 = '<scan num="4"
44
+ msLevel="2"
45
+ peaksCount="74"
46
+ polarity="+"
47
+ scanType="Full"
48
+ retentionTime="PT3.64S"
49
+ collisionEnergy="35"
50
+ lowMz="325"
51
+ highMz="2000"
52
+ basePeakMz="1090.05"
53
+ basePeakIntensity="789939"
54
+ totIonCurrent="2.27206e+006">
55
+ <precursorMz precursorIntensity="1.52022e+006">1222.033203</precursorMz>
56
+ <peaks precision="32"
57
+ byteOrder="network"
58
+ pairOrder="m/z-int">Q7Rw2EUK8ABDuWjORQSgAEO5+NhFHLAAQ8gNTEU10ABD2QKcRLCAAEPcdCJFxBAAQ92bFkZzPABEAIucRSbQAEQJuBJFTyAARAqM7EX9+ABECsPMRT7QAEQPyWRF1LgARBoLhETLoABEHjlsRZQwAEQefjRE3yAARCERUkYvuABEIYiKRMMAAEQh01xFifAARCNEKEYpuABEI3kcRWrgAEQjvPhFHRAARCZm7EZtNABEJpXwRnyMAEQmy+pF4QAARCb/RkWfIABEKY28RZQIAEQ6gGBFyHAARDrIYEVEsABEPLuARXJgAEQ9RdREQ8AARD2BIEaV7ABEP7tcRdzoAERChMxHVPcAREK2gEZQAABEQwDkRa/gAEREyLxE3UAARFDwhEXwKABEUT7uRTPQAERTbZZFhfAARFQQvEaK9gBEVOfcRaDQAERWgBJGY5AARFh4ukcWmgBEWMAqRppEAERZgzhF+SgARFsBakXOKABEW4IiR8GBAERbw5pGUvwARFwJcEUooABEXat2RmUoAERegQRHibgARF7FmkU2AABEY6z2RgBIAERvwkxF/pgARHJ9YEYXEABEcrySRgFkAER0gDJINYsARHTFKEeJloBEdPgURoz8AER3hVhIlXggRHfIAEd2YgBEd/acRuh2AER4IiI/gAAARIW+tEVbkABEhdH8RnqcAESIQXhJQNswRIhimEecEgBEiH0cRvKMAESInkBGguYARIp9kEWm0ABEkUz6RdCQAESR//BFH5AARJQhYkYGbABEmxG2RjnYAA==</peaks>
59
+ </scan>
60
+ </scan>
61
+ </msRun>
62
+ '
63
+
64
+ #class Sha1Spec < MiniTest::Spec
65
+ #def initialize(*args)
66
+ #@files = %w(000.v1.mzXML 020.v2.0.readw.mzXML 000.v2.1.mzXML).map do |file|
67
+ #TESTFILES + "/opd1/#{file}"
68
+ #end
69
+ #super(*args)
70
+ #end
71
+
72
+ ### NOTE: this does NOT match up to real files yet!
73
+ #it 'can read xml scans with extra or missing tags' do
74
+ #Scan.new(from_xml)
75
+ #end
76
+
77
+
78
+ #end