mspire 0.7.7 → 0.7.8
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/bin/mzml_to_imzml +0 -0
- data/lib/mspire/cv/obo.rb +20 -0
- data/lib/mspire/cv/param.rb +10 -4
- data/lib/mspire/cv/paramable.rb +41 -20
- data/lib/mspire/mzml.rb +2 -2
- data/lib/mspire/mzml/spectrum.rb +6 -6
- data/lib/mspire/peak.rb +9 -106
- data/lib/mspire/peak_list.rb +260 -0
- data/lib/mspire/spectrum.rb +8 -162
- data/lib/mspire/spectrum_like.rb +8 -10
- data/lib/obo/ontology.rb +36 -0
- data/mspire.gemspec +7 -5
- data/spec/mspire/cv/obo_spec.rb +18 -0
- data/spec/mspire/cv/param_spec.rb +12 -0
- data/spec/mspire/cv/paramable_spec.rb +57 -0
- data/spec/mspire/mzml_spec.rb +4 -3
- data/spec/mspire/{peak_spec.rb → peak_list_spec.rb} +32 -10
- data/spec/mspire/spectrum_spec.rb +0 -18
- data/spec/obo_spec.rb +12 -0
- data/spec/testfiles/mspire/mzml/mspire_simulated.MSn.check.mzML +1 -1
- metadata +7 -5
- data/lib/mspire/cv.rb +0 -16
- data/lib/mspire/peak/point.rb +0 -13
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.7.
|
1
|
+
0.7.8
|
data/bin/mzml_to_imzml
CHANGED
File without changes
|
@@ -0,0 +1,20 @@
|
|
1
|
+
|
2
|
+
require 'obo/ms'
|
3
|
+
require 'obo/ims'
|
4
|
+
require 'obo/unit'
|
5
|
+
|
6
|
+
module Mspire
|
7
|
+
module CV
|
8
|
+
module Obo
|
9
|
+
|
10
|
+
# a hash keyed on ID that gives the cv term name
|
11
|
+
NAME = %w(MS IMS Unit).inject({}) do |hash,key|
|
12
|
+
hash.merge! ::Obo.const_get(key).id_to_name
|
13
|
+
end
|
14
|
+
CAST = %w(MS IMS Unit).inject({}) do |hash,key|
|
15
|
+
hash.merge! ::Obo.const_get(key).id_to_cast
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
data/lib/mspire/cv/param.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
|
-
require 'cv/param'
|
2
|
-
require 'mspire/cv'
|
1
|
+
require 'mspire/cv/param'
|
2
|
+
require 'mspire/cv/obo'
|
3
3
|
|
4
4
|
module Mspire
|
5
5
|
module CV
|
@@ -28,8 +28,14 @@ module Mspire
|
|
28
28
|
when 3
|
29
29
|
Mspire::CV::Param[args.pop]
|
30
30
|
end
|
31
|
-
|
32
|
-
|
31
|
+
acc = args[0]
|
32
|
+
obo_type = acc[/([A-Za-z]+):/,1]
|
33
|
+
val = args[1]
|
34
|
+
if val
|
35
|
+
cast = Mspire::CV::Obo::CAST[acc]
|
36
|
+
(val = val.send(cast)) if cast
|
37
|
+
end
|
38
|
+
::CV::Param.new(obo_type, args[0], Mspire::CV::Obo::NAME[acc], val, unit)
|
33
39
|
end
|
34
40
|
end
|
35
41
|
end
|
data/lib/mspire/cv/paramable.rb
CHANGED
@@ -2,6 +2,7 @@ require 'cv/param'
|
|
2
2
|
require 'mspire/user_param'
|
3
3
|
require 'mspire/cv/param'
|
4
4
|
require 'nokogiri'
|
5
|
+
require 'andand'
|
5
6
|
|
6
7
|
module Mspire
|
7
8
|
module CV
|
@@ -15,16 +16,48 @@ module Mspire
|
|
15
16
|
cv_params + ref_param_groups.flat_map(&:params) + user_params
|
16
17
|
end
|
17
18
|
|
19
|
+
def params?
|
20
|
+
total_num_params = cv_params.size +
|
21
|
+
ref_param_groups.reduce(0) {|sum,group| sum +
|
22
|
+
group.params.size } + user_params.size
|
23
|
+
total_num_params > 0
|
24
|
+
end
|
25
|
+
|
18
26
|
def accessionable_params
|
19
27
|
cv_params + ref_param_groups.flat_map(&:params)
|
20
28
|
end
|
21
29
|
|
22
|
-
def params_by_name
|
23
|
-
|
30
|
+
#def params_by_name
|
31
|
+
# params.index_by &:name
|
32
|
+
#end
|
33
|
+
|
34
|
+
#def params_by_accession
|
35
|
+
# accessionable_params.index_by &:accession
|
36
|
+
#end
|
37
|
+
|
38
|
+
# returns the value if the param exists by that name. Returns true if
|
39
|
+
# the param exists but has no value. returns false if no param
|
40
|
+
def fetch(name)
|
41
|
+
param = params.find {|param| param.name == name}
|
42
|
+
if param
|
43
|
+
param.value || true
|
44
|
+
else
|
45
|
+
false
|
46
|
+
end
|
24
47
|
end
|
25
48
|
|
26
|
-
def
|
27
|
-
accessionable_params.
|
49
|
+
def fetch_by_accession(acc)
|
50
|
+
param = accessionable_params.find {|v| v.accession == acc }
|
51
|
+
if param
|
52
|
+
param.value || true
|
53
|
+
else
|
54
|
+
false
|
55
|
+
end
|
56
|
+
end
|
57
|
+
alias_method :fetch_by_acc, :fetch_by_accession
|
58
|
+
|
59
|
+
def param?(name)
|
60
|
+
params.any? {|param| param.name == name }
|
28
61
|
end
|
29
62
|
|
30
63
|
def initialize(opts={params: []})
|
@@ -34,22 +67,10 @@ module Mspire
|
|
34
67
|
describe_many!(opts[:params])
|
35
68
|
end
|
36
69
|
|
37
|
-
|
38
|
-
|
39
|
-
param = accessionable_params.find {|v| v.accession == accession }
|
40
|
-
if param
|
41
|
-
val = param.value
|
42
|
-
cast ? (val && val.send(cast)) : val
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
def find_param_by_accession(accession)
|
47
|
-
accessionable_params.find {|v| v.accession == accession }
|
48
|
-
end
|
49
|
-
|
50
|
-
def param_exists_by_accession?(accession)
|
51
|
-
accessionable_params.any? {|v| v.accession == accession }
|
70
|
+
def param_by_accession(acc)
|
71
|
+
accessionable_params.find {|v| v.accession == acc }
|
52
72
|
end
|
73
|
+
alias_method :param_by_acc, :param_by_accession
|
53
74
|
|
54
75
|
# takes an array of values, each of which is fed into describe!
|
55
76
|
def describe_many!(array)
|
@@ -89,7 +110,7 @@ module Mspire
|
|
89
110
|
param =
|
90
111
|
case arg.name
|
91
112
|
when 'cvParam'
|
92
|
-
::CV::Param
|
113
|
+
Mspire::CV::Param[ arg[:accession], arg[:value] ]
|
93
114
|
when 'userParam'
|
94
115
|
Mspire::UserParam.new(arg[:name], arg[:value], arg[:type])
|
95
116
|
end
|
data/lib/mspire/mzml.rb
CHANGED
@@ -22,14 +22,14 @@ require 'mspire/mzml/cv'
|
|
22
22
|
require 'mspire/mzml/sample'
|
23
23
|
|
24
24
|
module Mspire
|
25
|
-
# Reading an
|
25
|
+
# Reading an mzml file:
|
26
26
|
#
|
27
27
|
# Mspire::Mzml.open("somefile.mzML") do |mzml|
|
28
28
|
# mzml.each do |spectrum|
|
29
29
|
# scan = spectrum.scan
|
30
30
|
# spectrum.mzs # array of m/zs
|
31
31
|
# spectrum.intensities # array of intensities
|
32
|
-
# spectrum.
|
32
|
+
# spectrum.peaks do |mz,intensity|
|
33
33
|
# puts "mz: #{mz} intensity: #{intensity}"
|
34
34
|
# end
|
35
35
|
# end
|
data/lib/mspire/mzml/spectrum.rb
CHANGED
@@ -70,7 +70,7 @@ module Mspire
|
|
70
70
|
# returns the retention time of the first scan object in the scan list
|
71
71
|
# *in seconds*!
|
72
72
|
def retention_time
|
73
|
-
rt_param = scan_list.first.
|
73
|
+
rt_param = scan_list.first.param_by_acc('MS:1000016')
|
74
74
|
if rt_param
|
75
75
|
multiplier =
|
76
76
|
case rt_param.unit.accession
|
@@ -86,24 +86,24 @@ module Mspire
|
|
86
86
|
|
87
87
|
# returns the ms_level as an Integer
|
88
88
|
def ms_level
|
89
|
-
|
89
|
+
fetch_by_acc('MS:1000511')
|
90
90
|
end
|
91
91
|
|
92
92
|
def centroided?
|
93
|
-
|
93
|
+
fetch_by_acc('MS:1000127')
|
94
94
|
end
|
95
95
|
|
96
96
|
def profile?
|
97
|
-
|
97
|
+
fetch_by_acc('MS:1000128')
|
98
98
|
end
|
99
99
|
|
100
100
|
# returns the charge state of the first precursor as an integer
|
101
101
|
def precursor_charge
|
102
|
-
precursors.andand.first.andand.selected_ions.andand.first.andand.
|
102
|
+
precursors.andand.first.andand.selected_ions.andand.first.andand.fetch_by_acc('MS:1000041')
|
103
103
|
end
|
104
104
|
|
105
105
|
def precursor_mz
|
106
|
-
precursors.andand.first.andand.selected_ions.andand.first.andand.
|
106
|
+
precursors.andand.first.andand.selected_ions.andand.first.andand.fetch_by_acc('MS:1000744')
|
107
107
|
end
|
108
108
|
|
109
109
|
# takes a Nokogiri node and sets relevant properties
|
data/lib/mspire/peak.rb
CHANGED
@@ -1,108 +1,11 @@
|
|
1
1
|
|
2
|
-
module Mspire
|
3
|
-
#
|
4
|
-
#
|
5
|
-
class
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
#
|
12
|
-
# false/nil => only split on zeros
|
13
|
-
# :share => give each peak its rightful portion of shared peaks, dividing the
|
14
|
-
# intensity based on the intensity of adjacent peaks
|
15
|
-
# :greedy_y => give the point to the peak with highest point next to
|
16
|
-
# the point in question. tie goes lower.
|
17
|
-
#
|
18
|
-
# if return_local_minima is true, a parallel array of local minima indices is
|
19
|
-
# returned (only makes sense if split_multipeaks is false)
|
20
|
-
#
|
21
|
-
# assumes that a new point can be made with an array containing the x
|
22
|
-
# value and the y value.
|
23
|
-
def split(split_multipeaks=false, return_local_minima=false)
|
24
|
-
if split_multipeaks
|
25
|
-
(zeroed_peaks, local_min_ind_ar) = self.split(false, true)
|
26
|
-
$stderr.print "splitting on local minima ..." if $VERBOSE
|
27
|
-
no_local_minima_peaks = zeroed_peaks.zip(local_min_ind_ar).map do |peak, lm_indices|
|
28
|
-
new_peaks = [ peak.class.new ]
|
29
|
-
if lm_indices.size > 0
|
30
|
-
prev_lm_i = -1 # <- it's okay, we don't use until it is zero
|
31
|
-
lm_indices.each do |lm_i|
|
32
|
-
lm = peak[lm_i]
|
33
|
-
point_class = lm.class
|
34
|
-
|
35
|
-
# push onto the last peak all the points from right after the previous local min
|
36
|
-
# to just before this local min
|
37
|
-
new_peaks.last.push( *peak[(prev_lm_i+1)..(lm_i-1)] )
|
38
|
-
before_pnt = peak[lm_i-1]
|
39
|
-
after_pnt = peak[lm_i+1]
|
40
|
-
|
41
|
-
case split_multipeaks
|
42
|
-
when :share
|
43
|
-
sum = before_pnt[1] + after_pnt[1]
|
44
|
-
# push onto the last peak its portion of the local min
|
45
|
-
new_peaks.last << point_class.new( [lm[0], lm[1] * (before_pnt[1].to_f/sum)] )
|
46
|
-
# create a new peak that contains its portion of the local min
|
47
|
-
new_peaks << self.class.new( [point_class.new([lm[0], lm[1] * (after_pnt[1].to_f/sum)])] )
|
48
|
-
prev_lm_i = lm_i
|
49
|
-
when :greedy_y
|
50
|
-
if before_pnt[1] >= after_pnt[1]
|
51
|
-
new_peaks.last << lm
|
52
|
-
new_peaks << self.class.new
|
53
|
-
prev_lm_i = lm_i
|
54
|
-
else
|
55
|
-
new_peaks << self.class.new( [lm] )
|
56
|
-
prev_lm_i = lm_i
|
57
|
-
end
|
58
|
-
else
|
59
|
-
raise ArgumentError, "only recognize :share, :greedy_y, or false for the arg in #split(arg)"
|
60
|
-
end
|
61
|
-
end
|
62
|
-
new_peaks.last.push( *peak[(prev_lm_i+1)...peak.size] )
|
63
|
-
new_peaks
|
64
|
-
else
|
65
|
-
[peak]
|
66
|
-
end
|
67
|
-
end.flatten(1) # end zip
|
68
|
-
$stderr.puts "now #{no_local_minima_peaks.size} peaks." if $VERBOSE
|
69
|
-
no_local_minima_peaks
|
70
|
-
else
|
71
|
-
$stderr.print "splitting on zeros..." if $VERBOSE
|
72
|
-
# first, split the peaks based on zero intensity values
|
73
|
-
# and simultaneously keep track of the local minima within each
|
74
|
-
# resulting peak
|
75
|
-
peaks = []
|
76
|
-
local_min_ind_ar = []
|
77
|
-
in_peak = false
|
78
|
-
self.each_with_index do |point, index|
|
79
|
-
previous_y = self[index - 1][1]
|
80
|
-
if point[1] > 0
|
81
|
-
if !in_peak
|
82
|
-
in_peak = 0
|
83
|
-
peaks << self.class.new([point])
|
84
|
-
local_min_ind_ar << []
|
85
|
-
else
|
86
|
-
peaks.last << point
|
87
|
-
# if on_upslope(previous_y, point[1])
|
88
|
-
if previous_y < point[1]
|
89
|
-
# If we were previously on a downslope and we are now on an upslope
|
90
|
-
# then the previous index is a local min
|
91
|
-
prev_previous_y = self[index - 2][1]
|
92
|
-
# on_downslope(prev_previous_y, previous_y)
|
93
|
-
if prev_previous_y > previous_y
|
94
|
-
# We have found a local min
|
95
|
-
local_min_ind_ar.last << (in_peak-1)
|
96
|
-
end
|
97
|
-
end # end if (upslope)
|
98
|
-
end # end if !in_peak
|
99
|
-
in_peak += 1
|
100
|
-
elsif in_peak
|
101
|
-
in_peak = false
|
102
|
-
end # end if point[1] > 0
|
103
|
-
end
|
104
|
-
$stderr.puts "#{peaks.size} no-whitespace-inside peaks." if $VERBOSE
|
105
|
-
return_local_minima ? [peaks, local_min_ind_ar] : peaks
|
106
|
-
end #
|
107
|
-
end # def split
|
2
|
+
module Mspire
|
3
|
+
# A peak is typically a doublet: an x value and a y value. In a spectrum
|
4
|
+
# this will be an m/z and intensity. In a chromatogram this will be a
|
5
|
+
# retention time and an intensity. (This class can be subclassed if
|
6
|
+
# desired)
|
7
|
+
class Peak < Array
|
8
|
+
alias_method :x, :first
|
9
|
+
alias_method :y, :last
|
10
|
+
end
|
108
11
|
end
|
@@ -0,0 +1,260 @@
|
|
1
|
+
require 'mspire/bin'
|
2
|
+
|
3
|
+
module Mspire
|
4
|
+
# a collection of peak objects
|
5
|
+
class PeakList < Array
|
6
|
+
|
7
|
+
def lo_x
|
8
|
+
self.first[0]
|
9
|
+
end
|
10
|
+
|
11
|
+
def hi_x
|
12
|
+
self.last[0]
|
13
|
+
end
|
14
|
+
|
15
|
+
DEFAULT_MERGE = {
|
16
|
+
:bin_width => 5,
|
17
|
+
:bin_unit => :ppm,
|
18
|
+
:normalize => true,
|
19
|
+
:return_data => false,
|
20
|
+
:split => :share,
|
21
|
+
:centroided => true,
|
22
|
+
}
|
23
|
+
|
24
|
+
class << self
|
25
|
+
|
26
|
+
def create_bins(peaklists, opts)
|
27
|
+
min, max = min_max_mz(peaklists)
|
28
|
+
|
29
|
+
divisions = []
|
30
|
+
bin_width = opts[:bin_width]
|
31
|
+
use_ppm = (opts[:bin_unit] == :ppm)
|
32
|
+
current_mz = min
|
33
|
+
loop do
|
34
|
+
if current_mz >= max
|
35
|
+
divisions << max
|
36
|
+
break
|
37
|
+
else
|
38
|
+
divisions << current_mz
|
39
|
+
current_mz += ( use_ppm ? current_mz./(1e6).*(bin_width) : bin_width )
|
40
|
+
end
|
41
|
+
end
|
42
|
+
# make each bin exclusive so there is no overlap
|
43
|
+
bins = divisions.each_cons(2).map {|pair| Mspire::Bin.new(*pair, true) }
|
44
|
+
# make the last bin *inclusive* of the terminating value
|
45
|
+
bins[-1] = Mspire::Bin.new(bins.last.begin, bins.last.end)
|
46
|
+
bins
|
47
|
+
end
|
48
|
+
|
49
|
+
def min_max_mz(peaklists)
|
50
|
+
# find the min and max across all spectra
|
51
|
+
first_peaklist = peaklists.first
|
52
|
+
min = first_peaklist.first[0]; max = first_peaklist.last[0]
|
53
|
+
peaklists.each do |peaklist|
|
54
|
+
min = peaklist.lo_x if peaklist.lo_x < min
|
55
|
+
max = peaklist.hi_x if peaklist.hi_x > max
|
56
|
+
end
|
57
|
+
[min, max]
|
58
|
+
end
|
59
|
+
|
60
|
+
def merge_centroids(peaklists, opts={})
|
61
|
+
|
62
|
+
# Create Mspire::Bin objects
|
63
|
+
bins = opts[:bins] ? opts[:bins] : create_bins(peaklists, opts)
|
64
|
+
|
65
|
+
peaklists.each do |peaklist|
|
66
|
+
Mspire::Bin.bin(bins, peaklist, &:first)
|
67
|
+
end
|
68
|
+
|
69
|
+
pseudo_peaks = bins.map do |bin|
|
70
|
+
[bin, bin.data.reduce(0.0) {|sum,peak| sum + peak[1] }]
|
71
|
+
end
|
72
|
+
|
73
|
+
pseudo_peaklist = Mspire::PeakList.new(pseudo_peaks)
|
74
|
+
|
75
|
+
peak_lists = pseudo_peaklist.split(opts[:split])
|
76
|
+
|
77
|
+
return_data = []
|
78
|
+
final_peaklist = []
|
79
|
+
peak_lists.each_with_index do |peak_list,i|
|
80
|
+
#peaks.each do |peak|
|
81
|
+
tot_intensity = peak_list.map(&:last).reduce(:+)
|
82
|
+
return_data_per_peak = [] if opts[:return_data]
|
83
|
+
weighted_mz = 0.0
|
84
|
+
peak_list.each do |peak|
|
85
|
+
pre_scaled_intensity = peak[0].data.reduce(0.0) {|sum,v| sum + v.last }
|
86
|
+
post_scaled_intensity = peak[1]
|
87
|
+
# some peaks may have been shared. In this case the intensity
|
88
|
+
# for that peak was downweighted. However, the actual data
|
89
|
+
# composing that peak is not altered when the intensity is
|
90
|
+
# shared. So, to calculate a proper weighted avg we need to
|
91
|
+
# downweight the intensity of any data point found within a bin
|
92
|
+
# whose intensity was scaled.
|
93
|
+
correction_factor =
|
94
|
+
if pre_scaled_intensity != post_scaled_intensity
|
95
|
+
post_scaled_intensity / pre_scaled_intensity
|
96
|
+
else
|
97
|
+
1.0
|
98
|
+
end
|
99
|
+
|
100
|
+
return_data_per_peak.push(*peak[0].data) if opts[:return_data]
|
101
|
+
|
102
|
+
peak[0].data.each do |lil_point|
|
103
|
+
weighted_mz += lil_point[0] * ( (lil_point[1].to_f * correction_factor) / tot_intensity)
|
104
|
+
end
|
105
|
+
end
|
106
|
+
return_data << return_data_per_peak if opts[:return_data]
|
107
|
+
final_peaklist << Mspire::Peak.new([weighted_mz, tot_intensity])
|
108
|
+
end
|
109
|
+
[final_peaklist, return_data]
|
110
|
+
end
|
111
|
+
|
112
|
+
# returns a new peak_list which has been merged with the others.
|
113
|
+
# opts[:resolution]) and then segment according to monotonicity (sharing
|
114
|
+
# intensity between abutting points). The final m/z is the weighted
|
115
|
+
# averaged of all the m/z's in each peak. Valid opts (with default listed
|
116
|
+
# first):
|
117
|
+
#
|
118
|
+
# :bin_width => 5
|
119
|
+
# :bin_unit => :ppm | :amu interpret bin_width as ppm or amu
|
120
|
+
# :bins => array of Mspire::Bin objects for custom bins (overides other bin options)
|
121
|
+
# :normalize => true if true, divides total intensity by
|
122
|
+
# number of spectra
|
123
|
+
# :return_data => false returns a parallel array containing
|
124
|
+
# the peaks associated with each returned peak
|
125
|
+
# :split => false | :share | :greedy_y see Mspire::Peak#split
|
126
|
+
# :centroided => true treat the data as centroided
|
127
|
+
#
|
128
|
+
# The binning algorithm is roughly the fastest possible algorithm that
|
129
|
+
# would allow for arbitrary, non-constant bin widths (a ratcheting
|
130
|
+
# algorithm O(n + m))
|
131
|
+
#
|
132
|
+
# Assumes the peaklists are already sorted by m/z.
|
133
|
+
def merge(peaklists, opts={})
|
134
|
+
opts = DEFAULT_MERGE.merge(opts)
|
135
|
+
|
136
|
+
(peaklist, returned_data) =
|
137
|
+
if opts[:centroided]
|
138
|
+
merge_centroids(peaklists, opts)
|
139
|
+
else
|
140
|
+
raise NotImplementedError, "need to implement profile merging"
|
141
|
+
end
|
142
|
+
|
143
|
+
if opts[:normalize]
|
144
|
+
sz = peaklists.size
|
145
|
+
peaklist.each {|peak| peak[1] = peak[1].to_f / sz }
|
146
|
+
end
|
147
|
+
if opts[:return_data]
|
148
|
+
$stderr.puts "returning peaklist (#{peaklist.size}) and data" if $VERBOSE
|
149
|
+
[peaklist, returned_data]
|
150
|
+
else
|
151
|
+
$stderr.puts "returning peaklist (#{peaklist.size})" if $VERBOSE
|
152
|
+
peaklist
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
|
158
|
+
# returns an Array of peaklist objects. Splits run of 1 or more local
|
159
|
+
# minima into multiple peaklists. When a point is 'shared' between two
|
160
|
+
# adjacent hill-ish areas, the choice of how to resolve multi-hills (runs
|
161
|
+
# of data above zero) is one of:
|
162
|
+
#
|
163
|
+
# false/nil => only split on zeros
|
164
|
+
# :share => give each peak its rightful portion of shared peaks, dividing the
|
165
|
+
# intensity based on the intensity of adjacent peaks
|
166
|
+
# :greedy_y => give the point to the peak with highest point next to
|
167
|
+
# the point in question. tie goes lower.
|
168
|
+
#
|
169
|
+
# if return_local_minima is true, a parallel array of local minima indices is
|
170
|
+
# returned (only makes sense if split_multipeaks is false)
|
171
|
+
#
|
172
|
+
# assumes that a new peak can be made with an array containing the x
|
173
|
+
# value and the y value.
|
174
|
+
def split(split_multipeaks=false, return_local_minima=false)
|
175
|
+
if split_multipeaks
|
176
|
+
(zeroed_peaks, local_min_ind_ar) = self.split(false, true)
|
177
|
+
$stderr.print "splitting on local minima ..." if $VERBOSE
|
178
|
+
no_local_minima_peaks = zeroed_peaks.zip(local_min_ind_ar).map do |peak, lm_indices|
|
179
|
+
new_peaks = [ peak.class.new ]
|
180
|
+
if lm_indices.size > 0
|
181
|
+
prev_lm_i = -1 # <- it's okay, we don't use until it is zero
|
182
|
+
lm_indices.each do |lm_i|
|
183
|
+
lm = peak[lm_i]
|
184
|
+
point_class = lm.class
|
185
|
+
|
186
|
+
# push onto the last peak all the points from right after the previous local min
|
187
|
+
# to just before this local min
|
188
|
+
new_peaks.last.push( *peak[(prev_lm_i+1)..(lm_i-1)] )
|
189
|
+
before_pnt = peak[lm_i-1]
|
190
|
+
after_pnt = peak[lm_i+1]
|
191
|
+
|
192
|
+
case split_multipeaks
|
193
|
+
when :share
|
194
|
+
sum = before_pnt[1] + after_pnt[1]
|
195
|
+
# push onto the last peak its portion of the local min
|
196
|
+
new_peaks.last << point_class.new( [lm[0], lm[1] * (before_pnt[1].to_f/sum)] )
|
197
|
+
# create a new peak that contains its portion of the local min
|
198
|
+
new_peaks << self.class.new( [point_class.new([lm[0], lm[1] * (after_pnt[1].to_f/sum)])] )
|
199
|
+
prev_lm_i = lm_i
|
200
|
+
when :greedy_y
|
201
|
+
if before_pnt[1] >= after_pnt[1]
|
202
|
+
new_peaks.last << lm
|
203
|
+
new_peaks << self.class.new
|
204
|
+
prev_lm_i = lm_i
|
205
|
+
else
|
206
|
+
new_peaks << self.class.new( [lm] )
|
207
|
+
prev_lm_i = lm_i
|
208
|
+
end
|
209
|
+
else
|
210
|
+
raise ArgumentError, "only recognize :share, :greedy_y, or false for the arg in #split(arg)"
|
211
|
+
end
|
212
|
+
end
|
213
|
+
new_peaks.last.push( *peak[(prev_lm_i+1)...peak.size] )
|
214
|
+
new_peaks
|
215
|
+
else
|
216
|
+
[peak]
|
217
|
+
end
|
218
|
+
end.flatten(1) # end zip
|
219
|
+
$stderr.puts "now #{no_local_minima_peaks.size} peaks." if $VERBOSE
|
220
|
+
no_local_minima_peaks
|
221
|
+
else
|
222
|
+
$stderr.print "splitting on zeros..." if $VERBOSE
|
223
|
+
# first, split the peaks based on zero intensity values
|
224
|
+
# and simultaneously keep track of the local minima within each
|
225
|
+
# resulting peak
|
226
|
+
peak_lists = []
|
227
|
+
local_min_ind_ar = []
|
228
|
+
in_peak = false
|
229
|
+
self.each_with_index do |peak, index|
|
230
|
+
previous_y = self[index - 1][1]
|
231
|
+
if peak[1] > 0
|
232
|
+
if !in_peak
|
233
|
+
in_peak = 0
|
234
|
+
peak_lists << self.class.new([peak])
|
235
|
+
local_min_ind_ar << []
|
236
|
+
else
|
237
|
+
peak_lists.last << peak
|
238
|
+
# if on_upslope(previous_y, point[1])
|
239
|
+
if previous_y < peak[1]
|
240
|
+
# If we were previously on a downslope and we are now on an upslope
|
241
|
+
# then the previous index is a local min
|
242
|
+
prev_previous_y = self[index - 2][1]
|
243
|
+
# on_downslope(prev_previous_y, previous_y)
|
244
|
+
if prev_previous_y > previous_y
|
245
|
+
# We have found a local min
|
246
|
+
local_min_ind_ar.last << (in_peak-1)
|
247
|
+
end
|
248
|
+
end # end if (upslope)
|
249
|
+
end # end if !in_peak
|
250
|
+
in_peak += 1
|
251
|
+
elsif in_peak
|
252
|
+
in_peak = false
|
253
|
+
end # end if point[1] > 0
|
254
|
+
end
|
255
|
+
$stderr.puts "#{peak_lists.size} no-whitespace-inside peak_lists." if $VERBOSE
|
256
|
+
return_local_minima ? [peak_lists, local_min_ind_ar] : peak_lists
|
257
|
+
end #
|
258
|
+
end # def split
|
259
|
+
end
|
260
|
+
end
|
data/lib/mspire/spectrum.rb
CHANGED
@@ -1,173 +1,19 @@
|
|
1
1
|
require 'mspire/spectrum_like'
|
2
|
-
require 'bsearch'
|
3
|
-
require 'mspire/bin'
|
4
|
-
require 'mspire/peak'
|
5
2
|
|
6
3
|
module Mspire
|
7
|
-
# note that a point is an [m/z, intensity] doublet.
|
8
|
-
# A peak is considered a related string of points
|
9
4
|
class Spectrum
|
10
5
|
include Mspire::SpectrumLike
|
11
6
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
}
|
19
|
-
|
20
|
-
class << self
|
21
|
-
|
22
|
-
def from_points(ar_of_doublets)
|
23
|
-
_mzs = []
|
24
|
-
_ints = []
|
25
|
-
ar_of_doublets.each do |mz, int|
|
26
|
-
_mzs << mz
|
27
|
-
_ints << int
|
28
|
-
end
|
29
|
-
self.new([_mzs, _ints])
|
30
|
-
end
|
31
|
-
|
32
|
-
|
33
|
-
# returns a new spectrum which has been merged with the others. If the
|
34
|
-
# spectra are centroided (just checks the first one and assumes the others
|
35
|
-
# are the same) then it will bin the points (bin width determined by
|
36
|
-
# opts[:resolution]) and then segment according to monotonicity (sharing
|
37
|
-
# intensity between abutting points). The final m/z is the weighted
|
38
|
-
# averaged of all the m/z's in each peak. Valid opts (with default listed
|
39
|
-
# first):
|
40
|
-
#
|
41
|
-
# :bin_width => 5
|
42
|
-
# :bin_unit => :ppm | :amu interpret bin_width as ppm or amu
|
43
|
-
# :bins => array of Bin objects for custom bins (overides other bin options)
|
44
|
-
# :normalize => false if true, divides total intensity by
|
45
|
-
# number of spectra
|
46
|
-
# :return_data => false returns a parallel array containing
|
47
|
-
# the peaks associated with each returned point
|
48
|
-
# :split => false | :share | :greedy_y see Mspire::Peak#split
|
49
|
-
#
|
50
|
-
# The binning algorithm is the fastest possible algorithm that would allow
|
51
|
-
# for arbitrary, non-constant bin widths (a ratcheting algorithm O(n + m))
|
52
|
-
def merge(spectra, opts={})
|
53
|
-
opt = DEFAULT_MERGE.merge(opts)
|
54
|
-
(spectrum, returned_data) =
|
55
|
-
unless spectra.first.centroided? == false
|
56
|
-
# find the min and max across all spectra
|
57
|
-
first_mzs = spectra.first.mzs
|
58
|
-
min = first_mzs.first ; max = first_mzs.last
|
59
|
-
spectra.each do |spectrum|
|
60
|
-
mzs = spectrum.mzs
|
61
|
-
min = mzs.first if mzs.first < min
|
62
|
-
max = mzs.last if mzs.last > max
|
63
|
-
end
|
64
|
-
|
65
|
-
# Create Bin objects
|
66
|
-
bins =
|
67
|
-
if opt[:bins]
|
68
|
-
opt[:bins]
|
69
|
-
else
|
70
|
-
divisions = []
|
71
|
-
bin_width = opt[:bin_width]
|
72
|
-
use_ppm = (opt[:bin_unit] == :ppm)
|
73
|
-
current_mz = min
|
74
|
-
loop do
|
75
|
-
if current_mz >= max
|
76
|
-
divisions << max
|
77
|
-
break
|
78
|
-
else
|
79
|
-
divisions << current_mz
|
80
|
-
current_mz += ( use_ppm ? current_mz./(1e6).*(bin_width) : bin_width )
|
81
|
-
end
|
82
|
-
end
|
83
|
-
# make each bin exclusive so there is no overlap
|
84
|
-
bins = divisions.each_cons(2).map {|pair| Bin.new(*pair, true) }
|
85
|
-
# make the last bin *inclusive* of the terminating value
|
86
|
-
bins[-1] = Bin.new(bins.last.begin, bins.last.end)
|
87
|
-
bins
|
88
|
-
end
|
89
|
-
|
90
|
-
spectra.each do |spectrum|
|
91
|
-
Bin.bin(bins, spectrum.points, &:first)
|
92
|
-
end
|
93
|
-
|
94
|
-
pseudo_points = bins.map do |bin|
|
95
|
-
#int = bin.data.reduce(0.0) {|sum,point| sum + point.last }.round(3) # <- just for info:
|
96
|
-
[bin, bin.data.reduce(0.0) {|sum,point| sum + point.last }]
|
97
|
-
end
|
98
|
-
|
99
|
-
#p_mzs = []
|
100
|
-
#p_ints = []
|
101
|
-
#p_num_points = []
|
102
|
-
#pseudo_points.each do |psp|
|
103
|
-
# p_mzs << ((psp.first.begin + psp.first.end)/2)
|
104
|
-
# p_ints << psp.last
|
105
|
-
# p_num_points << psp.first.data.size
|
106
|
-
#end
|
107
|
-
|
108
|
-
#File.write("file_#{opt[:bin_width]}_to_plot.txt", [p_mzs, p_ints, p_num_points].map {|ar| ar.join(' ') }.join("\n"))
|
109
|
-
#abort 'here'
|
110
|
-
|
111
|
-
|
112
|
-
peaks = Mspire::Peak.new(pseudo_points).split(opt[:split])
|
113
|
-
|
114
|
-
return_data = []
|
115
|
-
_mzs = [] ; _ints = []
|
116
|
-
|
117
|
-
#p peaks[97]
|
118
|
-
#puts "HIYA"
|
119
|
-
#abort 'here'
|
120
|
-
|
121
|
-
peaks.each_with_index do |peak,i|
|
122
|
-
#peaks.each do |peak|
|
123
|
-
tot_intensity = peak.map(&:last).reduce(:+)
|
124
|
-
return_data_per_peak = [] if opt[:return_data]
|
125
|
-
weighted_mz = 0.0
|
126
|
-
peak.each do |point|
|
127
|
-
pre_scaled_intensity = point[0].data.reduce(0.0) {|sum,v| sum + v.last }
|
128
|
-
post_scaled_intensity = point[1]
|
129
|
-
# some peaks may have been shared. In this case the intensity
|
130
|
-
# for that peak was downweighted. However, the actually data
|
131
|
-
# composing that peak is not altered when the intensity is
|
132
|
-
# shared. So, to calculate a proper weighted avg we need to
|
133
|
-
# downweight the intensity of any data point found within a bin
|
134
|
-
# whose intensity was scaled.
|
135
|
-
correction_factor =
|
136
|
-
if pre_scaled_intensity != post_scaled_intensity
|
137
|
-
post_scaled_intensity / pre_scaled_intensity
|
138
|
-
else
|
139
|
-
1.0
|
140
|
-
end
|
141
|
-
|
142
|
-
return_data_per_peak.push(*point[0].data) if opt[:return_data]
|
143
|
-
|
144
|
-
point[0].data.each do |lil_point|
|
145
|
-
weighted_mz += lil_point[0] * ( (lil_point[1].to_f * correction_factor) / tot_intensity)
|
146
|
-
end
|
147
|
-
end
|
148
|
-
return_data << return_data_per_peak if opt[:return_data]
|
149
|
-
_mzs << weighted_mz
|
150
|
-
_ints << tot_intensity
|
151
|
-
end
|
152
|
-
[Spectrum.new([_mzs, _ints]), return_data]
|
153
|
-
else
|
154
|
-
raise NotImplementedError, "the way to do this is interpolate the profile evenly and sum"
|
155
|
-
end
|
156
|
-
|
157
|
-
if opt[:normalize]
|
158
|
-
sz = spectra.size
|
159
|
-
spectrum.intensities.map! {|v| v.to_f / sz }
|
160
|
-
end
|
161
|
-
if opt[:return_data]
|
162
|
-
$stderr.puts "returning spectrum (#{spectrum.mzs.size}) and data" if $VERBOSE
|
163
|
-
[spectrum, return_data]
|
164
|
-
else
|
165
|
-
$stderr.puts "returning spectrum (#{spectrum.mzs.size})" if $VERBOSE
|
166
|
-
spectrum
|
167
|
-
end
|
7
|
+
def self.from_peaklist(peaklist)
|
8
|
+
_mzs = []
|
9
|
+
_ints = []
|
10
|
+
peaklist.each do |mz, int|
|
11
|
+
_mzs << mz
|
12
|
+
_ints << int
|
168
13
|
end
|
169
|
-
|
14
|
+
self.new([_mzs, _ints])
|
170
15
|
end
|
16
|
+
|
171
17
|
end
|
172
18
|
end
|
173
19
|
|
data/lib/mspire/spectrum_like.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'bsearch'
|
2
|
+
|
1
3
|
module Mspire
|
2
4
|
module SpectrumLike
|
3
5
|
include Enumerable
|
@@ -63,12 +65,12 @@ module Mspire
|
|
63
65
|
end
|
64
66
|
|
65
67
|
# yields(mz, inten) across the spectrum, or array of doublets if no block
|
66
|
-
def
|
68
|
+
def peaks(&block)
|
67
69
|
@data_arrays[0].zip(@data_arrays[1], &block)
|
68
70
|
end
|
69
71
|
|
70
|
-
alias_method :each, :
|
71
|
-
alias_method :
|
72
|
+
alias_method :each, :peaks
|
73
|
+
alias_method :each_peak, :peaks
|
72
74
|
|
73
75
|
# if the mzs and intensities are the same then the spectra are considered
|
74
76
|
# equal
|
@@ -91,9 +93,9 @@ module Mspire
|
|
91
93
|
# instruments are bad about this)
|
92
94
|
# returns self
|
93
95
|
def sort!
|
94
|
-
|
95
|
-
|
96
|
-
|
96
|
+
_peaks = peaks.to_a
|
97
|
+
_peaks.sort!
|
98
|
+
_peaks.each_with_index {|(mz,int), i| @data_arrays[0][i] = mz ; @data_arrays[1][i] = int }
|
97
99
|
self
|
98
100
|
end
|
99
101
|
|
@@ -134,9 +136,5 @@ module Mspire
|
|
134
136
|
find_all_nearest_index(val).map {|i| mzs[i] }
|
135
137
|
end
|
136
138
|
|
137
|
-
# uses Mspire::Spectrum.merge
|
138
|
-
def merge(other_spectra, opts={})
|
139
|
-
Mspire::Spectrum.merge([self, *other_spectra], opts)
|
140
|
-
end
|
141
139
|
end
|
142
140
|
end
|
data/lib/obo/ontology.rb
CHANGED
@@ -1,5 +1,35 @@
|
|
1
1
|
require 'obo' # the gem obo
|
2
2
|
|
3
|
+
module Obo
|
4
|
+
class Stanza
|
5
|
+
|
6
|
+
def cast_method
|
7
|
+
xref = @tagvalues['xref'].first
|
8
|
+
@cast_method =
|
9
|
+
if xref.nil? || (@cast_method == false)
|
10
|
+
false
|
11
|
+
else
|
12
|
+
if @cast_method
|
13
|
+
@cast_method
|
14
|
+
else
|
15
|
+
case xref[/value-type:xsd\\:([^\s]+) /, 1]
|
16
|
+
when 'float' ; :to_f
|
17
|
+
when 'int' ; :to_i
|
18
|
+
when 'string' ; :to_s
|
19
|
+
else ; false
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
# returns the value cast based on rules in first xref
|
26
|
+
# no casting performed if there is no xref
|
27
|
+
def cast(val)
|
28
|
+
@cast_method ? val.send(@cast_method) : val
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
3
33
|
module Obo
|
4
34
|
class Ontology
|
5
35
|
DIR = File.expand_path(File.dirname(__FILE__) + '/../../obo')
|
@@ -16,10 +46,16 @@ module Obo
|
|
16
46
|
def id_to_name
|
17
47
|
@id_to_name ||= build_hash('id', 'name')
|
18
48
|
end
|
49
|
+
|
50
|
+
def id_to_cast
|
51
|
+
@id_to_cast ||= Hash[ id_to_element.map {|id,el| [id, el.cast_method] } ]
|
52
|
+
end
|
53
|
+
|
19
54
|
# returns a name to id Hash
|
20
55
|
def name_to_id
|
21
56
|
@name_to_id ||= build_hash('name', 'id')
|
22
57
|
end
|
58
|
+
|
23
59
|
def id_to_element
|
24
60
|
@id_to_element ||= build_hash('id', nil)
|
25
61
|
end
|
data/mspire.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "mspire"
|
8
|
-
s.version = "0.7.
|
8
|
+
s.version = "0.7.8"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["John T. Prince", "Simon Chiang"]
|
12
|
-
s.date = "2012-03-
|
12
|
+
s.date = "2012-03-29"
|
13
13
|
s.description = "mass spectrometry proteomics, lipidomics, and tools, a rewrite of mspire, merging of ms-* gems"
|
14
14
|
s.email = "jtprince@gmail.com"
|
15
15
|
s.executables = ["mzml_to_imzml"]
|
@@ -31,7 +31,7 @@ Gem::Specification.new do |s|
|
|
31
31
|
"lib/merge.rb",
|
32
32
|
"lib/mspire.rb",
|
33
33
|
"lib/mspire/bin.rb",
|
34
|
-
"lib/mspire/cv.rb",
|
34
|
+
"lib/mspire/cv/obo.rb",
|
35
35
|
"lib/mspire/cv/param.rb",
|
36
36
|
"lib/mspire/cv/paramable.rb",
|
37
37
|
"lib/mspire/digester.rb",
|
@@ -102,7 +102,7 @@ Gem::Specification.new do |s|
|
|
102
102
|
"lib/mspire/mzml/spectrum_list.rb",
|
103
103
|
"lib/mspire/obo.rb",
|
104
104
|
"lib/mspire/peak.rb",
|
105
|
-
"lib/mspire/
|
105
|
+
"lib/mspire/peak_list.rb",
|
106
106
|
"lib/mspire/plms1.rb",
|
107
107
|
"lib/mspire/quant/qspec.rb",
|
108
108
|
"lib/mspire/quant/qspec/protein_group_comparison.rb",
|
@@ -123,7 +123,9 @@ Gem::Specification.new do |s|
|
|
123
123
|
"script/mzml_read_binary.rb",
|
124
124
|
"spec/cv/param_spec.rb",
|
125
125
|
"spec/mspire/bin_spec.rb",
|
126
|
+
"spec/mspire/cv/obo_spec.rb",
|
126
127
|
"spec/mspire/cv/param_spec.rb",
|
128
|
+
"spec/mspire/cv/paramable_spec.rb",
|
127
129
|
"spec/mspire/digester_spec.rb",
|
128
130
|
"spec/mspire/error_rate/qvalue_spec.rb",
|
129
131
|
"spec/mspire/fasta_spec.rb",
|
@@ -148,7 +150,7 @@ Gem::Specification.new do |s|
|
|
148
150
|
"spec/mspire/mzml/source_file_spec.rb",
|
149
151
|
"spec/mspire/mzml/spectrum_spec.rb",
|
150
152
|
"spec/mspire/mzml_spec.rb",
|
151
|
-
"spec/mspire/
|
153
|
+
"spec/mspire/peak_list_spec.rb",
|
152
154
|
"spec/mspire/plms1_spec.rb",
|
153
155
|
"spec/mspire/quant/qspec_spec.rb",
|
154
156
|
"spec/mspire/spectrum_spec.rb",
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
require 'mspire/cv/obo'
|
4
|
+
|
5
|
+
describe 'Mspire::CV::Obo' do
|
6
|
+
it 'finds names based on id' do
|
7
|
+
id_to_name = Mspire::CV::Obo::NAME
|
8
|
+
id_to_name.should be_a(Hash)
|
9
|
+
id_to_name['MS:1000005'].should == 'sample volume'
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'finds casts based on id' do
|
13
|
+
id_to_cast = Mspire::CV::Obo::CAST
|
14
|
+
id_to_cast.should be_a(Hash)
|
15
|
+
id_to_cast['MS:1000005'].should == :to_f
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
@@ -19,5 +19,17 @@ describe Mspire::CV::Param do
|
|
19
19
|
param2.unit.accession.should == 'UO:0000024'
|
20
20
|
end
|
21
21
|
|
22
|
+
it 'casts values based on info in the obo itself' do
|
23
|
+
param = Mspire::CV::Param['MS:1000005', "2.2"]
|
24
|
+
param.value.should == 2.2
|
25
|
+
param.value.should be_a(Float)
|
26
|
+
param = Mspire::CV::Param['MS:1000041', "3"]
|
27
|
+
param.value.should be_a(Integer)
|
28
|
+
param.value.should == 3
|
29
|
+
# doesn't get greedy about casting things like nil
|
30
|
+
param = Mspire::CV::Param['MS:1000041', nil]
|
31
|
+
param.value.should be_nil
|
32
|
+
end
|
33
|
+
|
22
34
|
end
|
23
35
|
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
require 'mspire/cv/paramable'
|
4
|
+
require 'mspire/cv/param'
|
5
|
+
require 'mspire/user_param'
|
6
|
+
require 'mspire/mzml/referenceable_param_group'
|
7
|
+
|
8
|
+
class MockObject
|
9
|
+
include Mspire::CV::Paramable
|
10
|
+
end
|
11
|
+
|
12
|
+
describe 'Mspire::CV::Paramable' do
|
13
|
+
|
14
|
+
subject do
|
15
|
+
paramable = MockObject.new :params => ['MS:1000007', ['MS:1000511', 2]]
|
16
|
+
paramable.user_params << Mspire::UserParam.new('hello', 477)
|
17
|
+
paramable.ref_param_groups << Mspire::Mzml::ReferenceableParamGroup.new('id1', :params => [['MS:1000512', 'filter string']])
|
18
|
+
paramable
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'can be initialized with different types of params' do
|
22
|
+
subject.cv_params.size.should == 2
|
23
|
+
subject.ref_param_groups.size.should == 1
|
24
|
+
subject.user_params.size.should == 1
|
25
|
+
end
|
26
|
+
|
27
|
+
it '#params grabs all params' do
|
28
|
+
params = subject.params
|
29
|
+
params.size.should == 4
|
30
|
+
params.map(&:class).uniq.size.should == 2
|
31
|
+
end
|
32
|
+
|
33
|
+
it '#params? asks if there are any' do
|
34
|
+
subject.params?.should be_true
|
35
|
+
end
|
36
|
+
|
37
|
+
it '#accessionable_params returns those with accession numbers' do
|
38
|
+
subject.accessionable_params.size.should == 3
|
39
|
+
end
|
40
|
+
|
41
|
+
it '#param finds the value or true if param name exists' do
|
42
|
+
# doesn't take accessions!
|
43
|
+
subject.fetch('MS:1000511').should be_false
|
44
|
+
subject.fetch('ms level').should == 2
|
45
|
+
subject.fetch('inlet type').should be_true
|
46
|
+
end
|
47
|
+
|
48
|
+
it '#params? tells if has any' do
|
49
|
+
subject.params?.should be_true
|
50
|
+
mine = subject.dup
|
51
|
+
[:cv_params, :user_params, :ref_param_groups].each do |key|
|
52
|
+
mine.send("#{key}=", [])
|
53
|
+
end
|
54
|
+
mine.params?.should be_false
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
data/spec/mspire/mzml_spec.rb
CHANGED
@@ -40,10 +40,11 @@ describe Mspire::Mzml do
|
|
40
40
|
|
41
41
|
it 'goes through spectrum with #each or #each_spectrum' do
|
42
42
|
mz_sizes = [20168, 315, 634]
|
43
|
-
|
43
|
+
centroided_list = [false, true, true]
|
44
44
|
@mzml.each do |spec|
|
45
45
|
spec.mzs.size.should == mz_sizes.shift
|
46
|
-
|
46
|
+
centroided = centroided_list.shift
|
47
|
+
spec.centroided?.should == centroided
|
47
48
|
end
|
48
49
|
end
|
49
50
|
|
@@ -71,7 +72,7 @@ describe Mspire::Mzml do
|
|
71
72
|
|
72
73
|
it 'can gracefully determine the m/z with highest peak in select scans' do
|
73
74
|
highest_mzs = Mspire::Mzml.foreach(@file).select {|v| v.ms_level > 1 }.map do |spec|
|
74
|
-
spec.
|
75
|
+
spec.peaks.sort_by(&:last).first.first
|
75
76
|
end
|
76
77
|
highest_mzs.map(&:round).should == [453, 866]
|
77
78
|
end
|
@@ -1,9 +1,9 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
|
+
require 'mspire/peak_list'
|
3
4
|
require 'mspire/peak'
|
4
|
-
require 'mspire/peak/point'
|
5
5
|
|
6
|
-
describe Mspire::
|
6
|
+
describe Mspire::PeakList do
|
7
7
|
|
8
8
|
describe '#split' do
|
9
9
|
|
@@ -25,11 +25,11 @@ describe Mspire::Peak do
|
|
25
25
|
mz += diff
|
26
26
|
end
|
27
27
|
@xs.map! {|mz| mz.round(2) }
|
28
|
-
@
|
28
|
+
@peaks = @xs.zip(@intensities).map {|pair| Mspire::Peak.new(pair) }
|
29
29
|
end
|
30
30
|
|
31
31
|
it 'splits on zeros by default' do
|
32
|
-
peak = Mspire::
|
32
|
+
peak = Mspire::PeakList.new(@peaks) # <- maybe more like a collection of peaks, but PeakList is flexible
|
33
33
|
peaks = peak.split
|
34
34
|
peaks.size.should == 4
|
35
35
|
peaks.should == [
|
@@ -46,17 +46,17 @@ describe Mspire::Peak do
|
|
46
46
|
|
47
47
|
# which it should since zeros are the ultimate local min!
|
48
48
|
it 'always cleans up surrounding zeros and does not split non-multipeaks' do
|
49
|
-
peak = Mspire::
|
49
|
+
peak = Mspire::PeakList.new(@peaks[0,7]) # simple
|
50
50
|
[:share, :greedy_y].each do |multipeak_split_method|
|
51
51
|
peaks = peak.split(multipeak_split_method)
|
52
|
-
peaks.first.should be_an_instance_of(Mspire::
|
52
|
+
peaks.first.should be_an_instance_of(Mspire::PeakList)
|
53
53
|
peaks.first.to_a.should == [[50.01, 3], [50.02, 8], [50.03, 9], [50.04, 7], [50.05, 2]]
|
54
54
|
end
|
55
55
|
end
|
56
56
|
|
57
57
|
it 'does #split(:share) and shares the peak proportional to adjacent peaks' do
|
58
58
|
data = [[50.07, 0], [50.08, 3], [50.09, 8], [50.1, 2], [50.11, 9], [50.12, 7], [50.13, 1], [50.14, 3], [50.15, 0]]
|
59
|
-
multipeak1 = Mspire::
|
59
|
+
multipeak1 = Mspire::PeakList.new( data )
|
60
60
|
|
61
61
|
answer = [
|
62
62
|
[[50.08, 3], [50.09, 8], [50.1, (2*8.0/17)]],
|
@@ -79,12 +79,34 @@ describe Mspire::Peak do
|
|
79
79
|
]
|
80
80
|
|
81
81
|
# test a tie -> goes left!
|
82
|
-
|
83
|
-
|
84
|
-
multipeak2 = Mspire::
|
82
|
+
peaks = @peaks[7,9]
|
83
|
+
peaks[2] = Mspire::Peak.new([peaks[2][0], 9])
|
84
|
+
multipeak2 = Mspire::PeakList.new( peaks )
|
85
85
|
multipeak2.split(:greedy_y).should == answer
|
86
86
|
|
87
87
|
end
|
88
88
|
end
|
89
89
|
|
90
|
+
describe '#merge' do
|
91
|
+
|
92
|
+
subject do
|
93
|
+
|
94
|
+
list1 = [[10.1, 1], [10.5, 2], [10.7, 3], [11.5, 4]]
|
95
|
+
list2 = [[10.11, 5], [10.49, 6], [10.71, 7], [11.48, 8]]
|
96
|
+
list3 = [[10.09, 9], [10.51, 10], [10.72, 11], [11.51, 12]]
|
97
|
+
|
98
|
+
[list1, list2, list3].map {|peaks| Mspire::PeakList.new( peaks ) }
|
99
|
+
end
|
100
|
+
|
101
|
+
it 'merges, giving exact weighted average m/z values for each cluster' do
|
102
|
+
(peaklist1, data) = Mspire::PeakList.merge(subject, :bin_width => 0.08, :bin_unit => :amu, :return_data => true)
|
103
|
+
peaklist2 = Mspire::PeakList.merge(subject, :bin_width => 0.08, :bin_unit => :amu)
|
104
|
+
peaklist1.should == peaklist2
|
105
|
+
peaks = [[10.097333333333331, 10.502222222222223, 10.713809523809525, 11.498333333333333], [5.0, 6.0, 7.0, 8.0]].transpose
|
106
|
+
peaklist1.should == Mspire::PeakList.new(peaks)
|
107
|
+
data.should == [[[10.1, 1], [10.11, 5], [10.09, 9]], [[10.5, 2], [10.49, 6], [10.51, 10]], [[10.7, 3], [10.71, 7], [10.72, 11]], [[11.5, 4], [11.48, 8], [11.51, 12]]]
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
90
111
|
end
|
112
|
+
|
@@ -44,22 +44,4 @@ describe Mspire::Spectrum do
|
|
44
44
|
spec.intensities.should == subject.intensities
|
45
45
|
end
|
46
46
|
end
|
47
|
-
|
48
|
-
describe 'merging spectra' do
|
49
|
-
subject do
|
50
|
-
data = [ [10.10, 10.5, 10.7, 11.5], [1, 2, 3, 4] ],
|
51
|
-
[ [10.11, 10.49, 10.71, 11.48], [5, 6, 7, 8] ],
|
52
|
-
[ [10.09, 10.51, 10.72, 11.51], [9, 10, 11, 12]
|
53
|
-
]
|
54
|
-
data.map {|datum| Mspire::Spectrum.new( datum ) }
|
55
|
-
end
|
56
|
-
it 'merges, giving exact weighted average m/z values for each cluster' do
|
57
|
-
(spec1, data) = Mspire::Spectrum.merge(subject, :bin_width => 0.08, :bin_unit => :amu, :return_data => true)
|
58
|
-
spec2 = Mspire::Spectrum.merge(subject, :bin_width => 0.08, :bin_unit => :amu)
|
59
|
-
spec1.should == spec2
|
60
|
-
spec1.should == Mspire::Spectrum.new([[10.097333333333331, 10.502222222222223, 10.713809523809525, 11.498333333333333], [5.0, 6.0, 7.0, 8.0]])
|
61
|
-
data.should == [[[10.1, 1], [10.11, 5], [10.09, 9]], [[10.5, 2], [10.49, 6], [10.51, 10]], [[10.7, 3], [10.71, 7], [10.72, 11]], [[11.5, 4], [11.48, 8], [11.51, 12]]]
|
62
|
-
end
|
63
|
-
end
|
64
|
-
|
65
47
|
end
|
data/spec/obo_spec.rb
CHANGED
@@ -5,6 +5,7 @@ require 'obo/ims'
|
|
5
5
|
require 'obo/unit'
|
6
6
|
|
7
7
|
describe 'accessing a specific Obo::Ontology' do
|
8
|
+
|
8
9
|
it 'can access MS obo' do
|
9
10
|
Obo::MS.id_to_name['MS:1000004'].should == 'sample mass'
|
10
11
|
Obo::MS.name_to_id['sample mass'].should == 'MS:1000004'
|
@@ -22,4 +23,15 @@ describe 'accessing a specific Obo::Ontology' do
|
|
22
23
|
Obo::Unit.name_to_id['temperature unit'].should == 'UO:0000005'
|
23
24
|
Obo::Unit.id_to_element['UO:0000005'].should be_a(Obo::Stanza)
|
24
25
|
end
|
26
|
+
|
27
|
+
end
|
28
|
+
|
29
|
+
describe 'Obo::Stanza' do
|
30
|
+
it 'can properly cast values' do
|
31
|
+
Obo::MS.id_to_element['MS:1000511'].cast('1').should == 1
|
32
|
+
Obo::MS.id_to_element['MS:1000004'].cast('2.2').should == 2.2
|
33
|
+
# don't ask me why mass resolution is cast into a string, but it is!
|
34
|
+
Obo::MS.id_to_element['MS:1000011'].cast('2.2').should == '2.2'
|
35
|
+
Obo::MS.id_to_element['MS:1000018'].cast('low to high').should == 'low to high'
|
36
|
+
end
|
25
37
|
end
|
@@ -67,7 +67,7 @@
|
|
67
67
|
<selectedIonList count="1">
|
68
68
|
<cvParam cvRef="MS" accession="MS:1000744" name="selected ion m/z" value="2.0"/>
|
69
69
|
<cvParam cvRef="MS" accession="MS:1000041" name="charge state" value="2"/>
|
70
|
-
<cvParam cvRef="MS" accession="MS:1000042" name="peak intensity" value="5"/>
|
70
|
+
<cvParam cvRef="MS" accession="MS:1000042" name="peak intensity" value="5.0"/>
|
71
71
|
</selectedIonList>
|
72
72
|
</precursor>
|
73
73
|
</precursorList>
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mspire
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
4
|
+
version: 0.7.8
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2012-03-
|
13
|
+
date: 2012-03-29 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: nokogiri
|
@@ -195,7 +195,7 @@ files:
|
|
195
195
|
- lib/merge.rb
|
196
196
|
- lib/mspire.rb
|
197
197
|
- lib/mspire/bin.rb
|
198
|
-
- lib/mspire/cv.rb
|
198
|
+
- lib/mspire/cv/obo.rb
|
199
199
|
- lib/mspire/cv/param.rb
|
200
200
|
- lib/mspire/cv/paramable.rb
|
201
201
|
- lib/mspire/digester.rb
|
@@ -266,7 +266,7 @@ files:
|
|
266
266
|
- lib/mspire/mzml/spectrum_list.rb
|
267
267
|
- lib/mspire/obo.rb
|
268
268
|
- lib/mspire/peak.rb
|
269
|
-
- lib/mspire/
|
269
|
+
- lib/mspire/peak_list.rb
|
270
270
|
- lib/mspire/plms1.rb
|
271
271
|
- lib/mspire/quant/qspec.rb
|
272
272
|
- lib/mspire/quant/qspec/protein_group_comparison.rb
|
@@ -287,7 +287,9 @@ files:
|
|
287
287
|
- script/mzml_read_binary.rb
|
288
288
|
- spec/cv/param_spec.rb
|
289
289
|
- spec/mspire/bin_spec.rb
|
290
|
+
- spec/mspire/cv/obo_spec.rb
|
290
291
|
- spec/mspire/cv/param_spec.rb
|
292
|
+
- spec/mspire/cv/paramable_spec.rb
|
291
293
|
- spec/mspire/digester_spec.rb
|
292
294
|
- spec/mspire/error_rate/qvalue_spec.rb
|
293
295
|
- spec/mspire/fasta_spec.rb
|
@@ -312,7 +314,7 @@ files:
|
|
312
314
|
- spec/mspire/mzml/source_file_spec.rb
|
313
315
|
- spec/mspire/mzml/spectrum_spec.rb
|
314
316
|
- spec/mspire/mzml_spec.rb
|
315
|
-
- spec/mspire/
|
317
|
+
- spec/mspire/peak_list_spec.rb
|
316
318
|
- spec/mspire/plms1_spec.rb
|
317
319
|
- spec/mspire/quant/qspec_spec.rb
|
318
320
|
- spec/mspire/spectrum_spec.rb
|
data/lib/mspire/cv.rb
DELETED
data/lib/mspire/peak/point.rb
DELETED
@@ -1,13 +0,0 @@
|
|
1
|
-
|
2
|
-
module Mspire
|
3
|
-
class Peak
|
4
|
-
# A point is typically a doublet: an x value and a y value. In a spectrum
|
5
|
-
# this will be an m/z and intensity. In a chromatogram this will be a
|
6
|
-
# retention time and an intensity. (This class can be subclassed if
|
7
|
-
# desired)
|
8
|
-
class Point < Array
|
9
|
-
alias_method :x, :first
|
10
|
-
alias_method :y, :last
|
11
|
-
end
|
12
|
-
end
|
13
|
-
end
|