ms-msrun 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,4 @@
1
+ require 'ms/mass'
1
2
 
2
3
  module Ms
3
4
  class Msrun
@@ -13,13 +14,12 @@ module Ms
13
14
 
14
15
  module Search
15
16
 
16
- PROTON_MASS = 1.007276
17
-
18
- # returns a string, or writes the string to file if given an out_filename
19
- # if given a filename or IO object, returns the number of spectra
20
- # written
21
- def to_mgf(file_or_io=nil, opts={})
17
+ # returns a string unless :output given (may be a String (filename) or a
18
+ # writeable IO object in which case the data is written to file or io
19
+ # and the number of spectra written is returned
20
+ def to_mgf(opts={})
22
21
  opts = {
22
+ :output => nil, # an output file or io object
23
23
  :bottom_mh => 0.0,
24
24
  :top_mh => nil,
25
25
  :ms_levels => (2..-1), # range or intger, -1 at end will be substituted for last level
@@ -30,8 +30,10 @@ module Ms
30
30
  :prec_int_precision => 2,
31
31
  :frag_mz_precision => 5,
32
32
  :frag_int_precision => 1,
33
+ :charge_states_for_unknowns => [2,3],
34
+ :determine_plus_ones => false,
33
35
  }.merge(opts)
34
- (_first_scan, _last_scan, _bottom_mh, _top_mh, _ms_levels, _min_peaks, _charge_states, _prec_mz_precision, _prec_int_precision, _frag_mz_precision, _frag_int_precision) = opts.values_at(:first_scan, :last_scan, :bottom_mh, :top_mh, :ms_levels, :min_peaks, :charge_states, :prec_mz_precision, :prec_int_precision, :frag_mz_precision, :frag_int_precision)
36
+ (_first_scan, _last_scan, _bottom_mh, _top_mh, _ms_levels, _min_peaks, _charge_states_for_unknowns, _prec_mz_precision, _prec_int_precision, _frag_mz_precision, _frag_int_precision, _determine_plus_ones) = opts.values_at(:first_scan, :last_scan, :bottom_mh, :top_mh, :ms_levels, :min_peaks, :charge_states_for_unknowns, :prec_mz_precision, :prec_int_precision, :frag_mz_precision, :frag_int_precision, :determine_plus_ones)
35
37
 
36
38
  sep = ' '
37
39
 
@@ -40,7 +42,7 @@ module Ms
40
42
  end
41
43
 
42
44
  if _last_scan.nil? or _last_scan == -1
43
- _last_scan = scans.last.num
45
+ _last_scan = self.scan_nums.last
44
46
  end
45
47
 
46
48
  if !_ms_levels.is_a?(Integer) && _ms_levels.last == -1
@@ -50,26 +52,30 @@ module Ms
50
52
  prec_string = "PEPMASS=%0.#{_prec_mz_precision}f %0.#{_prec_int_precision}f\n"
51
53
  frag_string = "%0.#{_frag_mz_precision}f%s%0.#{_frag_int_precision}f\n"
52
54
 
53
- any_input(file_or_io) do |out, out_type|
54
- scans.each do |scan|
55
+ any_output(opts[:output]) do |out, out_type|
56
+ each_scan(:ms_level => _ms_levels) do |scan|
55
57
  sn = scan.num
56
58
 
57
- next unless _ms_levels === scan.ms_level
58
59
  next unless sn >= _first_scan and sn <= _last_scan
59
60
  next unless scan.num_peaks >= _min_peaks
60
61
 
61
- # tic under precursor > 95% and true = save the spectrum info
62
- scan.spectrum.save!
63
- if scan.plus1?(0.95)
64
- _charge_states = [1]
62
+
63
+ if _determine_plus_ones
64
+ # tic under precursor > 95% and true = save the spectrum info
65
+ if scan.plus1?(0.95)
66
+ _charge_states = [1]
67
+ end
65
68
  end
66
69
 
67
- # (scanHeader.precursorMZ * iCharge) - (iCharge - 1)*dChargeMass;
70
+ chrg_sts = scan.precursor.charge_states
71
+ if chrg_sts.nil? || !chrg_sts.first.is_a?(Integer)
72
+ chrg_sts = _charge_states_for_unknowns
73
+ end
68
74
 
69
75
  pmz = scan.precursor && scan.precursor.mz
70
76
 
71
- _charge_states.each do |z|
72
- mh = (pmz * z) - (z - 1)*PROTON_MASS
77
+ chrg_sts.each do |z|
78
+ mh = (pmz * z) - (z - 1)*Ms::Mass::PROTON
73
79
  next unless (mh >= _bottom_mh)
74
80
  next unless (mh <= _top_mh) if _top_mh
75
81
  out.puts "BEGIN IONS"
@@ -81,8 +87,6 @@ module Ms
81
87
  end
82
88
  out.puts "END IONS\n\n"
83
89
  end
84
-
85
- scan.spectrum.flush!
86
90
  end
87
91
 
88
92
  if out_type == :string_io
@@ -96,7 +100,7 @@ module Ms
96
100
 
97
101
 
98
102
  # yields an IO object and the type input (:io, :filename, :string_io)
99
- def any_input(arg, &block)
103
+ def any_output(arg, &block)
100
104
  # this is pretty ugly, can we clean up?
101
105
  if arg.is_a? IO # an IO object passed in
102
106
  block.call(arg, :io)
@@ -0,0 +1,36 @@
1
+ require 'digest/sha1'
2
+
3
+ module Ms
4
+ class Msrun
5
+ # the mzXML digest is from the start of the document to the end of the
6
+ # first sha1 tag: '...<sha1>'
7
+ module Sha1
8
+ module_function
9
+
10
+ # returns [calculated digest, recorded digest] for an mzXML file
11
+ def digest_mzxml_file(file)
12
+ recorded_digest = nil
13
+
14
+ incr_digest = ""
15
+ #incr_digest = Digest::SHA1.new
16
+ endpos = nil
17
+ File.open(file, 'rb') do |io|
18
+ while line = io.gets
19
+ if line.include?("<sha1>")
20
+ incr_digest << line[0, line.index("<sha1>") + 6]
21
+ if line =~ %r{<sha1>(.*)</sha1>}
22
+ recorded_digest = $1.dup
23
+ break
24
+ else
25
+ incr_digest << line
26
+ end
27
+ end
28
+ end
29
+ end
30
+
31
+ [Digest::SHA1.hexdigest(incr_digest), recorded_digest]
32
+ #[incr_digest.hexdigest, recorded_digest]
33
+ end
34
+ end
35
+ end
36
+ end
data/lib/ms/mzxml.rb ADDED
@@ -0,0 +1,12 @@
1
+
2
+ module Ms
3
+ module Mzxml
4
+ module_function
5
+ def parent_basename_and_dir(xml_value)
6
+ fn = xml_value.gsub(/\\/, '/')
7
+ dn = File.dirname(fn)
8
+ dn = nil if dn == '.' && !fn.include?('/')
9
+ [File.basename(fn), dn]
10
+ end
11
+ end
12
+ end
data/lib/ms/precursor.rb CHANGED
@@ -1,8 +1,9 @@
1
1
 
2
2
  module Ms
3
3
  # charge_states are the possible charge states of the precursor
4
- # parent references a scan
5
- PrecursorAtts = [:mz, :intensity, :parent, :charge_states]
4
+ # parent references a scan and scan references the scan the precursor
5
+ # belongs to (i.e., the scan holding the precursor information).
6
+ PrecursorAtts = [:mz, :intensity, :parent, :charge_states, :scan]
6
7
  end
7
8
 
8
9
  Ms::Precursor = Struct.new(*Ms::PrecursorAtts)
@@ -0,0 +1,28 @@
1
+ require 'ms/precursor'
2
+
3
+ module Ms
4
+ class Precursor
5
+ class LazyParent
6
+ undef :parent
7
+
8
+ def parent
9
+ #########################
10
+ #########################
11
+ #########################
12
+ #########################
13
+ #########################
14
+ #########################
15
+
16
+ WorkingHERE!!
17
+
18
+ #########################
19
+ #########################
20
+ #########################
21
+ #########################
22
+ #########################
23
+ scan.num
24
+ end
25
+ end
26
+ end
27
+ end
28
+
data/lib/ms/scan.rb CHANGED
@@ -2,16 +2,14 @@ require 'ms/precursor'
2
2
 
3
3
  module Ms ; end
4
4
 
5
- # 0 1 2 3 4 5 6 7
6
- # 8
5
+ # 0 1 2 3 4 5 6 7 8
7
6
  MsScanAtts = [:num, :ms_level, :time, :start_mz, :end_mz, :num_peaks, :tic, :precursor, :spectrum]
8
7
 
9
- Ms::Scan = Struct.new(*MsScanAtts)
8
+ Ms::Scan = Struct.new(*MsScanAtts) do
10
9
 
11
10
  # time in seconds
12
11
  # everything else in float/int
13
12
 
14
- class Ms::Scan
15
13
 
16
14
  def to_s
17
15
  "<Scan num=#{num} ms_level=#{ms_level} time=#{time}>"
@@ -63,31 +61,6 @@ class Ms::Scan
63
61
  arr.join(" ")
64
62
  end
65
63
 
66
- # adds the attribute parent to each scan with a parent
67
- # (level 1 = no parent; level 2 = prev level 1, etc.
68
- def self.add_parent_scan(scans)
69
- prev_scan = nil
70
- parent_stack = [nil]
71
- ## we want to set the level to be the first mslevel we come to
72
- prev_level = 1
73
- scans.each do |scan|
74
- if scan then prev_level = scan.ms_level; break; end
75
- end
76
- scans.each do |scan|
77
- next unless scan ## the first one is nil, (others?)
78
- level = scan.ms_level
79
- if prev_level < level
80
- parent_stack.unshift prev_scan
81
- end
82
- if prev_level > level
83
- (prev_level - level).times do parent_stack.shift end
84
- end
85
- scan.parent = parent_stack.first
86
- prev_level = level
87
- prev_scan = scan
88
- end
89
- end
90
-
91
64
  end
92
65
 
93
66
 
@@ -17,14 +17,54 @@ end
17
17
  module Ms
18
18
  class Spectrum
19
19
  module Compare
20
+
21
+ # percent ion current score: the percent of total ion current of the
22
+ # calling object that can be explained by other. Ranges from 0-100
23
+ def pic_score(other, opts={})
24
+ opts= {:normalize => true}.merge(opts)
25
+ (a_spec, b_spec) =
26
+ if opts[:normalize] == true
27
+ [self.normalize, other.normalize]
28
+ else
29
+ [self, other]
30
+ end
31
+ overlapping_current = 0.0
32
+ a_spec.compare(b_spec, opts.merge( {:yield_diff => false} )) do |sint, oint|
33
+ overlapping_current += [oint, sint].min
34
+ end
35
+ 100.0 * (overlapping_current / a_spec.intensities.sum)
36
+ end
37
+
20
38
 
39
+ # Zhang Analytical Chemistry. 2004 76(14)
40
+ # "the ratio between the sum of geometric mean and sum of arithmetic mean
41
+ # of all ions, after the two spectra are normalized to the same total
42
+ # ion intensity"
43
+ # The score will be 1 for identical spectra
21
44
  # (Σ (Ii*Ij)^½) / (ΣIi * ΣIj)^½
45
+ #
46
+ # options:
47
+ #
48
+ # :normalize => true | false (d: true) normalizes intensities first
49
+ #
50
+ # relevant options given for 'compare' will be passed through:
51
+ #
52
+ # :radius
53
+ # :type
22
54
  def sim_score(other, opts={})
55
+ opts = {:normalize => true}.merge(opts)
56
+ (a_spec, b_spec) =
57
+ if opts[:normalize] == true
58
+ [self.normalize, other.normalize]
59
+ else
60
+ [self, other]
61
+ end
23
62
  numer = 0.0
24
- compare(other, {:yield_diff => false}) do |sint, oint|
63
+
64
+ a_spec.compare(b_spec, opts.merge( {:yield_diff => false} )) do |sint, oint|
25
65
  numer += Math.sqrt(sint * oint)
26
66
  end
27
- numer / Math.sqrt( self.ints.sum * other.ints.sum )
67
+ numer / Math.sqrt( a_spec.intensities.sum * b_spec.intensities.sum )
28
68
  end
29
69
 
30
70
  # opts[:type] == :mutual_best
@@ -37,7 +37,7 @@ module Ms
37
37
  ret_ints = []
38
38
  ret_mzs = include.map {|int, mz| [mz, int] }.sort.map {|mz,int| ret_ints << int ; mz }
39
39
 
40
- return Spectrum.new(ret_mzs, ret_ints)
40
+ return Spectrum.new([ret_mzs, ret_ints])
41
41
  end
42
42
 
43
43
  end
@@ -0,0 +1,60 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../../spec_helper')
2
+
3
+ require 'rexml/document'
4
+ require 'ms/msrun/index'
5
+
6
+ class MsMsrunIndexSpec < MiniTest::Spec
7
+
8
+ before do
9
+ @indices = @files.map do |file|
10
+ indices = Ms::Msrun::Index.new(file)
11
+ end
12
+ end
13
+
14
+ def initialize(*args)
15
+ @files = %w(000.v1.mzXML 000.v2.1.mzXML 020.v2.0.readw.mzXML).map {|v| TESTFILES + '/opd1/' + v }
16
+ super *args
17
+ end
18
+
19
+ it 'is indexed by scan num and gives doublets of byte and length' do
20
+ @files.zip(@indices) do |file, index|
21
+ index.each_with_index do |pair,i|
22
+ string = IO.read(file, pair.last, pair.first).strip
23
+ string[0,5].must_equal '<scan'
24
+ string[-7..-1].must_match %r{</scan>|/peaks>|/msRun>}
25
+ string.must_match %r{num="#{i+1}"}
26
+ end
27
+ end
28
+ end
29
+
30
+ it 'gives scan_nums' do
31
+ @indices.each do |index|
32
+ index.scan_nums.must_equal((1..20).to_a)
33
+ end
34
+ end
35
+
36
+ it 'is enumerable' do
37
+ @indices.each do |index|
38
+ scan_nums = index.scan_nums
39
+ index.each_with_index do |doublet,i|
40
+ index[scan_nums[i]].must_equal doublet
41
+ end
42
+ end
43
+ end
44
+
45
+ it 'gives header length' do
46
+ header_lengths = [824, 1138, 1147]
47
+ @indices.zip(@files, header_lengths) do |index, file, header_length|
48
+ index.header_length.must_equal header_length
49
+ end
50
+ end
51
+
52
+ it 'gives a scan for #first and #last' do
53
+ # TODO: fill in with actual data too
54
+ @indices.each do |index|
55
+ index.first.wont_equal nil
56
+ index.last.wont_equal nil
57
+ end
58
+ end
59
+
60
+ end
@@ -0,0 +1,78 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../../spec_helper')
2
+
3
+ require 'ms/msrun'
4
+
5
+ Scan1 = '<scan num="4"
6
+ msLevel="2"
7
+ peaksCount="74"
8
+ polarity="+"
9
+ scanType="Full"
10
+ retentionTime="PT3.64S"
11
+ collisionEnergy="35"
12
+ lowMz="325"
13
+ highMz="2000"
14
+ basePeakMz="1090.05"
15
+ basePeakIntensity="789939"
16
+ totIonCurrent="2.27206e+006">
17
+ <precursorMz precursorIntensity="1.52022e+006">1222.033203</precursorMz>
18
+ <peaks precision="32"
19
+ byteOrder="network"
20
+ pairOrder="m/z-int">Q7Rw2EUK8ABDuWjORQSgAEO5+NhFHLAAQ8gNTEU10ABD2QKcRLCAAEPcdCJFxBAAQ92bFkZzPABEAIucRSbQAEQJuBJFTyAARAqM7EX9+ABECsPMRT7QAEQPyWRF1LgARBoLhETLoABEHjlsRZQwAEQefjRE3yAARCERUkYvuABEIYiKRMMAAEQh01xFifAARCNEKEYpuABEI3kcRWrgAEQjvPhFHRAARCZm7EZtNABEJpXwRnyMAEQmy+pF4QAARCb/RkWfIABEKY28RZQIAEQ6gGBFyHAARDrIYEVEsABEPLuARXJgAEQ9RdREQ8AARD2BIEaV7ABEP7tcRdzoAERChMxHVPcAREK2gEZQAABEQwDkRa/gAEREyLxE3UAARFDwhEXwKABEUT7uRTPQAERTbZZFhfAARFQQvEaK9gBEVOfcRaDQAERWgBJGY5AARFh4ukcWmgBEWMAqRppEAERZgzhF+SgARFsBakXOKABEW4IiR8GBAERbw5pGUvwARFwJcEUooABEXat2RmUoAERegQRHibgARF7FmkU2AABEY6z2RgBIAERvwkxF/pgARHJ9YEYXEABEcrySRgFkAER0gDJINYsARHTFKEeJloBEdPgURoz8AER3hVhIlXggRHfIAEd2YgBEd/acRuh2AER4IiI/gAAARIW+tEVbkABEhdH8RnqcAESIQXhJQNswRIhimEecEgBEiH0cRvKMAESInkBGguYARIp9kEWm0ABEkUz6RdCQAESR//BFH5AARJQhYkYGbABEmxG2RjnYAA==</peaks>
21
+ </scan>
22
+ </scan>
23
+ '
24
+
25
+ Scan2 = '<scan num="4"
26
+ msLevel="2"
27
+ peaksCount="74"
28
+ polarity="+"
29
+ scanType="Full"
30
+ retentionTime="PT3.64S"
31
+ collisionEnergy="35"
32
+ lowMz="325"
33
+ highMz="2000"
34
+ basePeakMz="1090.05"
35
+ basePeakIntensity="789939"
36
+ totIonCurrent="2.27206e+006">
37
+ <precursorMz precursorIntensity="1.52022e+006">1222.033203</precursorMz>
38
+ <peaks precision="32"
39
+ byteOrder="network"
40
+ pairOrder="m/z-int">Q7Rw2EUK8ABDuWjORQSgAEO5+NhFHLAAQ8gNTEU10ABD2QKcRLCAAEPcdCJFxBAAQ92bFkZzPABEAIucRSbQAEQJuBJFTyAARAqM7EX9+ABECsPMRT7QAEQPyWRF1LgARBoLhETLoABEHjlsRZQwAEQefjRE3yAARCERUkYvuABEIYiKRMMAAEQh01xFifAARCNEKEYpuABEI3kcRWrgAEQjvPhFHRAARCZm7EZtNABEJpXwRnyMAEQmy+pF4QAARCb/RkWfIABEKY28RZQIAEQ6gGBFyHAARDrIYEVEsABEPLuARXJgAEQ9RdREQ8AARD2BIEaV7ABEP7tcRdzoAERChMxHVPcAREK2gEZQAABEQwDkRa/gAEREyLxE3UAARFDwhEXwKABEUT7uRTPQAERTbZZFhfAARFQQvEaK9gBEVOfcRaDQAERWgBJGY5AARFh4ukcWmgBEWMAqRppEAERZgzhF+SgARFsBakXOKABEW4IiR8GBAERbw5pGUvwARFwJcEUooABEXat2RmUoAERegQRHibgARF7FmkU2AABEY6z2RgBIAERvwkxF/pgARHJ9YEYXEABEcrySRgFkAER0gDJINYsARHTFKEeJloBEdPgURoz8AER3hVhIlXggRHfIAEd2YgBEd/acRuh2AER4IiI/gAAARIW+tEVbkABEhdH8RnqcAESIQXhJQNswRIhimEecEgBEiH0cRvKMAESInkBGguYARIp9kEWm0ABEkUz6RdCQAESR//BFH5AARJQhYkYGbABEmxG2RjnYAA==</peaks>
41
+ '
42
+
43
+ Scan3 = '<scan num="4"
44
+ msLevel="2"
45
+ peaksCount="74"
46
+ polarity="+"
47
+ scanType="Full"
48
+ retentionTime="PT3.64S"
49
+ collisionEnergy="35"
50
+ lowMz="325"
51
+ highMz="2000"
52
+ basePeakMz="1090.05"
53
+ basePeakIntensity="789939"
54
+ totIonCurrent="2.27206e+006">
55
+ <precursorMz precursorIntensity="1.52022e+006">1222.033203</precursorMz>
56
+ <peaks precision="32"
57
+ byteOrder="network"
58
+ pairOrder="m/z-int">Q7Rw2EUK8ABDuWjORQSgAEO5+NhFHLAAQ8gNTEU10ABD2QKcRLCAAEPcdCJFxBAAQ92bFkZzPABEAIucRSbQAEQJuBJFTyAARAqM7EX9+ABECsPMRT7QAEQPyWRF1LgARBoLhETLoABEHjlsRZQwAEQefjRE3yAARCERUkYvuABEIYiKRMMAAEQh01xFifAARCNEKEYpuABEI3kcRWrgAEQjvPhFHRAARCZm7EZtNABEJpXwRnyMAEQmy+pF4QAARCb/RkWfIABEKY28RZQIAEQ6gGBFyHAARDrIYEVEsABEPLuARXJgAEQ9RdREQ8AARD2BIEaV7ABEP7tcRdzoAERChMxHVPcAREK2gEZQAABEQwDkRa/gAEREyLxE3UAARFDwhEXwKABEUT7uRTPQAERTbZZFhfAARFQQvEaK9gBEVOfcRaDQAERWgBJGY5AARFh4ukcWmgBEWMAqRppEAERZgzhF+SgARFsBakXOKABEW4IiR8GBAERbw5pGUvwARFwJcEUooABEXat2RmUoAERegQRHibgARF7FmkU2AABEY6z2RgBIAERvwkxF/pgARHJ9YEYXEABEcrySRgFkAER0gDJINYsARHTFKEeJloBEdPgURoz8AER3hVhIlXggRHfIAEd2YgBEd/acRuh2AER4IiI/gAAARIW+tEVbkABEhdH8RnqcAESIQXhJQNswRIhimEecEgBEiH0cRvKMAESInkBGguYARIp9kEWm0ABEkUz6RdCQAESR//BFH5AARJQhYkYGbABEmxG2RjnYAA==</peaks>
59
+ </scan>
60
+ </scan>
61
+ </msRun>
62
+ '
63
+
64
+ #class Sha1Spec < MiniTest::Spec
65
+ #def initialize(*args)
66
+ #@files = %w(000.v1.mzXML 020.v2.0.readw.mzXML 000.v2.1.mzXML).map do |file|
67
+ #TESTFILES + "/opd1/#{file}"
68
+ #end
69
+ #super(*args)
70
+ #end
71
+
72
+ ### NOTE: this does NOT match up to real files yet!
73
+ #it 'can read xml scans with extra or missing tags' do
74
+ #Scan.new(from_xml)
75
+ #end
76
+
77
+
78
+ #end