mspire 0.7.7 → 0.7.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/bin/mzml_to_imzml +0 -0
- data/lib/mspire/cv/obo.rb +20 -0
- data/lib/mspire/cv/param.rb +10 -4
- data/lib/mspire/cv/paramable.rb +41 -20
- data/lib/mspire/mzml.rb +2 -2
- data/lib/mspire/mzml/spectrum.rb +6 -6
- data/lib/mspire/peak.rb +9 -106
- data/lib/mspire/peak_list.rb +260 -0
- data/lib/mspire/spectrum.rb +8 -162
- data/lib/mspire/spectrum_like.rb +8 -10
- data/lib/obo/ontology.rb +36 -0
- data/mspire.gemspec +7 -5
- data/spec/mspire/cv/obo_spec.rb +18 -0
- data/spec/mspire/cv/param_spec.rb +12 -0
- data/spec/mspire/cv/paramable_spec.rb +57 -0
- data/spec/mspire/mzml_spec.rb +4 -3
- data/spec/mspire/{peak_spec.rb → peak_list_spec.rb} +32 -10
- data/spec/mspire/spectrum_spec.rb +0 -18
- data/spec/obo_spec.rb +12 -0
- data/spec/testfiles/mspire/mzml/mspire_simulated.MSn.check.mzML +1 -1
- metadata +7 -5
- data/lib/mspire/cv.rb +0 -16
- data/lib/mspire/peak/point.rb +0 -13
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.7.
|
1
|
+
0.7.8
|
data/bin/mzml_to_imzml
CHANGED
File without changes
|
@@ -0,0 +1,20 @@
|
|
1
|
+
|
2
|
+
require 'obo/ms'
|
3
|
+
require 'obo/ims'
|
4
|
+
require 'obo/unit'
|
5
|
+
|
6
|
+
module Mspire
|
7
|
+
module CV
|
8
|
+
module Obo
|
9
|
+
|
10
|
+
# a hash keyed on ID that gives the cv term name
|
11
|
+
NAME = %w(MS IMS Unit).inject({}) do |hash,key|
|
12
|
+
hash.merge! ::Obo.const_get(key).id_to_name
|
13
|
+
end
|
14
|
+
CAST = %w(MS IMS Unit).inject({}) do |hash,key|
|
15
|
+
hash.merge! ::Obo.const_get(key).id_to_cast
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
data/lib/mspire/cv/param.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
|
-
require 'cv/param'
|
2
|
-
require 'mspire/cv'
|
1
|
+
require 'mspire/cv/param'
|
2
|
+
require 'mspire/cv/obo'
|
3
3
|
|
4
4
|
module Mspire
|
5
5
|
module CV
|
@@ -28,8 +28,14 @@ module Mspire
|
|
28
28
|
when 3
|
29
29
|
Mspire::CV::Param[args.pop]
|
30
30
|
end
|
31
|
-
|
32
|
-
|
31
|
+
acc = args[0]
|
32
|
+
obo_type = acc[/([A-Za-z]+):/,1]
|
33
|
+
val = args[1]
|
34
|
+
if val
|
35
|
+
cast = Mspire::CV::Obo::CAST[acc]
|
36
|
+
(val = val.send(cast)) if cast
|
37
|
+
end
|
38
|
+
::CV::Param.new(obo_type, args[0], Mspire::CV::Obo::NAME[acc], val, unit)
|
33
39
|
end
|
34
40
|
end
|
35
41
|
end
|
data/lib/mspire/cv/paramable.rb
CHANGED
@@ -2,6 +2,7 @@ require 'cv/param'
|
|
2
2
|
require 'mspire/user_param'
|
3
3
|
require 'mspire/cv/param'
|
4
4
|
require 'nokogiri'
|
5
|
+
require 'andand'
|
5
6
|
|
6
7
|
module Mspire
|
7
8
|
module CV
|
@@ -15,16 +16,48 @@ module Mspire
|
|
15
16
|
cv_params + ref_param_groups.flat_map(&:params) + user_params
|
16
17
|
end
|
17
18
|
|
19
|
+
def params?
|
20
|
+
total_num_params = cv_params.size +
|
21
|
+
ref_param_groups.reduce(0) {|sum,group| sum +
|
22
|
+
group.params.size } + user_params.size
|
23
|
+
total_num_params > 0
|
24
|
+
end
|
25
|
+
|
18
26
|
def accessionable_params
|
19
27
|
cv_params + ref_param_groups.flat_map(&:params)
|
20
28
|
end
|
21
29
|
|
22
|
-
def params_by_name
|
23
|
-
|
30
|
+
#def params_by_name
|
31
|
+
# params.index_by &:name
|
32
|
+
#end
|
33
|
+
|
34
|
+
#def params_by_accession
|
35
|
+
# accessionable_params.index_by &:accession
|
36
|
+
#end
|
37
|
+
|
38
|
+
# returns the value if the param exists by that name. Returns true if
|
39
|
+
# the param exists but has no value. returns false if no param
|
40
|
+
def fetch(name)
|
41
|
+
param = params.find {|param| param.name == name}
|
42
|
+
if param
|
43
|
+
param.value || true
|
44
|
+
else
|
45
|
+
false
|
46
|
+
end
|
24
47
|
end
|
25
48
|
|
26
|
-
def
|
27
|
-
accessionable_params.
|
49
|
+
def fetch_by_accession(acc)
|
50
|
+
param = accessionable_params.find {|v| v.accession == acc }
|
51
|
+
if param
|
52
|
+
param.value || true
|
53
|
+
else
|
54
|
+
false
|
55
|
+
end
|
56
|
+
end
|
57
|
+
alias_method :fetch_by_acc, :fetch_by_accession
|
58
|
+
|
59
|
+
def param?(name)
|
60
|
+
params.any? {|param| param.name == name }
|
28
61
|
end
|
29
62
|
|
30
63
|
def initialize(opts={params: []})
|
@@ -34,22 +67,10 @@ module Mspire
|
|
34
67
|
describe_many!(opts[:params])
|
35
68
|
end
|
36
69
|
|
37
|
-
|
38
|
-
|
39
|
-
param = accessionable_params.find {|v| v.accession == accession }
|
40
|
-
if param
|
41
|
-
val = param.value
|
42
|
-
cast ? (val && val.send(cast)) : val
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
def find_param_by_accession(accession)
|
47
|
-
accessionable_params.find {|v| v.accession == accession }
|
48
|
-
end
|
49
|
-
|
50
|
-
def param_exists_by_accession?(accession)
|
51
|
-
accessionable_params.any? {|v| v.accession == accession }
|
70
|
+
def param_by_accession(acc)
|
71
|
+
accessionable_params.find {|v| v.accession == acc }
|
52
72
|
end
|
73
|
+
alias_method :param_by_acc, :param_by_accession
|
53
74
|
|
54
75
|
# takes an array of values, each of which is fed into describe!
|
55
76
|
def describe_many!(array)
|
@@ -89,7 +110,7 @@ module Mspire
|
|
89
110
|
param =
|
90
111
|
case arg.name
|
91
112
|
when 'cvParam'
|
92
|
-
::CV::Param
|
113
|
+
Mspire::CV::Param[ arg[:accession], arg[:value] ]
|
93
114
|
when 'userParam'
|
94
115
|
Mspire::UserParam.new(arg[:name], arg[:value], arg[:type])
|
95
116
|
end
|
data/lib/mspire/mzml.rb
CHANGED
@@ -22,14 +22,14 @@ require 'mspire/mzml/cv'
|
|
22
22
|
require 'mspire/mzml/sample'
|
23
23
|
|
24
24
|
module Mspire
|
25
|
-
# Reading an
|
25
|
+
# Reading an mzml file:
|
26
26
|
#
|
27
27
|
# Mspire::Mzml.open("somefile.mzML") do |mzml|
|
28
28
|
# mzml.each do |spectrum|
|
29
29
|
# scan = spectrum.scan
|
30
30
|
# spectrum.mzs # array of m/zs
|
31
31
|
# spectrum.intensities # array of intensities
|
32
|
-
# spectrum.
|
32
|
+
# spectrum.peaks do |mz,intensity|
|
33
33
|
# puts "mz: #{mz} intensity: #{intensity}"
|
34
34
|
# end
|
35
35
|
# end
|
data/lib/mspire/mzml/spectrum.rb
CHANGED
@@ -70,7 +70,7 @@ module Mspire
|
|
70
70
|
# returns the retention time of the first scan object in the scan list
|
71
71
|
# *in seconds*!
|
72
72
|
def retention_time
|
73
|
-
rt_param = scan_list.first.
|
73
|
+
rt_param = scan_list.first.param_by_acc('MS:1000016')
|
74
74
|
if rt_param
|
75
75
|
multiplier =
|
76
76
|
case rt_param.unit.accession
|
@@ -86,24 +86,24 @@ module Mspire
|
|
86
86
|
|
87
87
|
# returns the ms_level as an Integer
|
88
88
|
def ms_level
|
89
|
-
|
89
|
+
fetch_by_acc('MS:1000511')
|
90
90
|
end
|
91
91
|
|
92
92
|
def centroided?
|
93
|
-
|
93
|
+
fetch_by_acc('MS:1000127')
|
94
94
|
end
|
95
95
|
|
96
96
|
def profile?
|
97
|
-
|
97
|
+
fetch_by_acc('MS:1000128')
|
98
98
|
end
|
99
99
|
|
100
100
|
# returns the charge state of the first precursor as an integer
|
101
101
|
def precursor_charge
|
102
|
-
precursors.andand.first.andand.selected_ions.andand.first.andand.
|
102
|
+
precursors.andand.first.andand.selected_ions.andand.first.andand.fetch_by_acc('MS:1000041')
|
103
103
|
end
|
104
104
|
|
105
105
|
def precursor_mz
|
106
|
-
precursors.andand.first.andand.selected_ions.andand.first.andand.
|
106
|
+
precursors.andand.first.andand.selected_ions.andand.first.andand.fetch_by_acc('MS:1000744')
|
107
107
|
end
|
108
108
|
|
109
109
|
# takes a Nokogiri node and sets relevant properties
|
data/lib/mspire/peak.rb
CHANGED
@@ -1,108 +1,11 @@
|
|
1
1
|
|
2
|
-
module Mspire
|
3
|
-
#
|
4
|
-
#
|
5
|
-
class
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
#
|
12
|
-
# false/nil => only split on zeros
|
13
|
-
# :share => give each peak its rightful portion of shared peaks, dividing the
|
14
|
-
# intensity based on the intensity of adjacent peaks
|
15
|
-
# :greedy_y => give the point to the peak with highest point next to
|
16
|
-
# the point in question. tie goes lower.
|
17
|
-
#
|
18
|
-
# if return_local_minima is true, a parallel array of local minima indices is
|
19
|
-
# returned (only makes sense if split_multipeaks is false)
|
20
|
-
#
|
21
|
-
# assumes that a new point can be made with an array containing the x
|
22
|
-
# value and the y value.
|
23
|
-
def split(split_multipeaks=false, return_local_minima=false)
|
24
|
-
if split_multipeaks
|
25
|
-
(zeroed_peaks, local_min_ind_ar) = self.split(false, true)
|
26
|
-
$stderr.print "splitting on local minima ..." if $VERBOSE
|
27
|
-
no_local_minima_peaks = zeroed_peaks.zip(local_min_ind_ar).map do |peak, lm_indices|
|
28
|
-
new_peaks = [ peak.class.new ]
|
29
|
-
if lm_indices.size > 0
|
30
|
-
prev_lm_i = -1 # <- it's okay, we don't use until it is zero
|
31
|
-
lm_indices.each do |lm_i|
|
32
|
-
lm = peak[lm_i]
|
33
|
-
point_class = lm.class
|
34
|
-
|
35
|
-
# push onto the last peak all the points from right after the previous local min
|
36
|
-
# to just before this local min
|
37
|
-
new_peaks.last.push( *peak[(prev_lm_i+1)..(lm_i-1)] )
|
38
|
-
before_pnt = peak[lm_i-1]
|
39
|
-
after_pnt = peak[lm_i+1]
|
40
|
-
|
41
|
-
case split_multipeaks
|
42
|
-
when :share
|
43
|
-
sum = before_pnt[1] + after_pnt[1]
|
44
|
-
# push onto the last peak its portion of the local min
|
45
|
-
new_peaks.last << point_class.new( [lm[0], lm[1] * (before_pnt[1].to_f/sum)] )
|
46
|
-
# create a new peak that contains its portion of the local min
|
47
|
-
new_peaks << self.class.new( [point_class.new([lm[0], lm[1] * (after_pnt[1].to_f/sum)])] )
|
48
|
-
prev_lm_i = lm_i
|
49
|
-
when :greedy_y
|
50
|
-
if before_pnt[1] >= after_pnt[1]
|
51
|
-
new_peaks.last << lm
|
52
|
-
new_peaks << self.class.new
|
53
|
-
prev_lm_i = lm_i
|
54
|
-
else
|
55
|
-
new_peaks << self.class.new( [lm] )
|
56
|
-
prev_lm_i = lm_i
|
57
|
-
end
|
58
|
-
else
|
59
|
-
raise ArgumentError, "only recognize :share, :greedy_y, or false for the arg in #split(arg)"
|
60
|
-
end
|
61
|
-
end
|
62
|
-
new_peaks.last.push( *peak[(prev_lm_i+1)...peak.size] )
|
63
|
-
new_peaks
|
64
|
-
else
|
65
|
-
[peak]
|
66
|
-
end
|
67
|
-
end.flatten(1) # end zip
|
68
|
-
$stderr.puts "now #{no_local_minima_peaks.size} peaks." if $VERBOSE
|
69
|
-
no_local_minima_peaks
|
70
|
-
else
|
71
|
-
$stderr.print "splitting on zeros..." if $VERBOSE
|
72
|
-
# first, split the peaks based on zero intensity values
|
73
|
-
# and simultaneously keep track of the local minima within each
|
74
|
-
# resulting peak
|
75
|
-
peaks = []
|
76
|
-
local_min_ind_ar = []
|
77
|
-
in_peak = false
|
78
|
-
self.each_with_index do |point, index|
|
79
|
-
previous_y = self[index - 1][1]
|
80
|
-
if point[1] > 0
|
81
|
-
if !in_peak
|
82
|
-
in_peak = 0
|
83
|
-
peaks << self.class.new([point])
|
84
|
-
local_min_ind_ar << []
|
85
|
-
else
|
86
|
-
peaks.last << point
|
87
|
-
# if on_upslope(previous_y, point[1])
|
88
|
-
if previous_y < point[1]
|
89
|
-
# If we were previously on a downslope and we are now on an upslope
|
90
|
-
# then the previous index is a local min
|
91
|
-
prev_previous_y = self[index - 2][1]
|
92
|
-
# on_downslope(prev_previous_y, previous_y)
|
93
|
-
if prev_previous_y > previous_y
|
94
|
-
# We have found a local min
|
95
|
-
local_min_ind_ar.last << (in_peak-1)
|
96
|
-
end
|
97
|
-
end # end if (upslope)
|
98
|
-
end # end if !in_peak
|
99
|
-
in_peak += 1
|
100
|
-
elsif in_peak
|
101
|
-
in_peak = false
|
102
|
-
end # end if point[1] > 0
|
103
|
-
end
|
104
|
-
$stderr.puts "#{peaks.size} no-whitespace-inside peaks." if $VERBOSE
|
105
|
-
return_local_minima ? [peaks, local_min_ind_ar] : peaks
|
106
|
-
end #
|
107
|
-
end # def split
|
2
|
+
module Mspire
|
3
|
+
# A peak is typically a doublet: an x value and a y value. In a spectrum
|
4
|
+
# this will be an m/z and intensity. In a chromatogram this will be a
|
5
|
+
# retention time and an intensity. (This class can be subclassed if
|
6
|
+
# desired)
|
7
|
+
class Peak < Array
|
8
|
+
alias_method :x, :first
|
9
|
+
alias_method :y, :last
|
10
|
+
end
|
108
11
|
end
|
@@ -0,0 +1,260 @@
|
|
1
|
+
require 'mspire/bin'
|
2
|
+
|
3
|
+
module Mspire
|
4
|
+
# a collection of peak objects
|
5
|
+
class PeakList < Array
|
6
|
+
|
7
|
+
def lo_x
|
8
|
+
self.first[0]
|
9
|
+
end
|
10
|
+
|
11
|
+
def hi_x
|
12
|
+
self.last[0]
|
13
|
+
end
|
14
|
+
|
15
|
+
DEFAULT_MERGE = {
|
16
|
+
:bin_width => 5,
|
17
|
+
:bin_unit => :ppm,
|
18
|
+
:normalize => true,
|
19
|
+
:return_data => false,
|
20
|
+
:split => :share,
|
21
|
+
:centroided => true,
|
22
|
+
}
|
23
|
+
|
24
|
+
class << self
|
25
|
+
|
26
|
+
def create_bins(peaklists, opts)
|
27
|
+
min, max = min_max_mz(peaklists)
|
28
|
+
|
29
|
+
divisions = []
|
30
|
+
bin_width = opts[:bin_width]
|
31
|
+
use_ppm = (opts[:bin_unit] == :ppm)
|
32
|
+
current_mz = min
|
33
|
+
loop do
|
34
|
+
if current_mz >= max
|
35
|
+
divisions << max
|
36
|
+
break
|
37
|
+
else
|
38
|
+
divisions << current_mz
|
39
|
+
current_mz += ( use_ppm ? current_mz./(1e6).*(bin_width) : bin_width )
|
40
|
+
end
|
41
|
+
end
|
42
|
+
# make each bin exclusive so there is no overlap
|
43
|
+
bins = divisions.each_cons(2).map {|pair| Mspire::Bin.new(*pair, true) }
|
44
|
+
# make the last bin *inclusive* of the terminating value
|
45
|
+
bins[-1] = Mspire::Bin.new(bins.last.begin, bins.last.end)
|
46
|
+
bins
|
47
|
+
end
|
48
|
+
|
49
|
+
def min_max_mz(peaklists)
|
50
|
+
# find the min and max across all spectra
|
51
|
+
first_peaklist = peaklists.first
|
52
|
+
min = first_peaklist.first[0]; max = first_peaklist.last[0]
|
53
|
+
peaklists.each do |peaklist|
|
54
|
+
min = peaklist.lo_x if peaklist.lo_x < min
|
55
|
+
max = peaklist.hi_x if peaklist.hi_x > max
|
56
|
+
end
|
57
|
+
[min, max]
|
58
|
+
end
|
59
|
+
|
60
|
+
def merge_centroids(peaklists, opts={})
|
61
|
+
|
62
|
+
# Create Mspire::Bin objects
|
63
|
+
bins = opts[:bins] ? opts[:bins] : create_bins(peaklists, opts)
|
64
|
+
|
65
|
+
peaklists.each do |peaklist|
|
66
|
+
Mspire::Bin.bin(bins, peaklist, &:first)
|
67
|
+
end
|
68
|
+
|
69
|
+
pseudo_peaks = bins.map do |bin|
|
70
|
+
[bin, bin.data.reduce(0.0) {|sum,peak| sum + peak[1] }]
|
71
|
+
end
|
72
|
+
|
73
|
+
pseudo_peaklist = Mspire::PeakList.new(pseudo_peaks)
|
74
|
+
|
75
|
+
peak_lists = pseudo_peaklist.split(opts[:split])
|
76
|
+
|
77
|
+
return_data = []
|
78
|
+
final_peaklist = []
|
79
|
+
peak_lists.each_with_index do |peak_list,i|
|
80
|
+
#peaks.each do |peak|
|
81
|
+
tot_intensity = peak_list.map(&:last).reduce(:+)
|
82
|
+
return_data_per_peak = [] if opts[:return_data]
|
83
|
+
weighted_mz = 0.0
|
84
|
+
peak_list.each do |peak|
|
85
|
+
pre_scaled_intensity = peak[0].data.reduce(0.0) {|sum,v| sum + v.last }
|
86
|
+
post_scaled_intensity = peak[1]
|
87
|
+
# some peaks may have been shared. In this case the intensity
|
88
|
+
# for that peak was downweighted. However, the actual data
|
89
|
+
# composing that peak is not altered when the intensity is
|
90
|
+
# shared. So, to calculate a proper weighted avg we need to
|
91
|
+
# downweight the intensity of any data point found within a bin
|
92
|
+
# whose intensity was scaled.
|
93
|
+
correction_factor =
|
94
|
+
if pre_scaled_intensity != post_scaled_intensity
|
95
|
+
post_scaled_intensity / pre_scaled_intensity
|
96
|
+
else
|
97
|
+
1.0
|
98
|
+
end
|
99
|
+
|
100
|
+
return_data_per_peak.push(*peak[0].data) if opts[:return_data]
|
101
|
+
|
102
|
+
peak[0].data.each do |lil_point|
|
103
|
+
weighted_mz += lil_point[0] * ( (lil_point[1].to_f * correction_factor) / tot_intensity)
|
104
|
+
end
|
105
|
+
end
|
106
|
+
return_data << return_data_per_peak if opts[:return_data]
|
107
|
+
final_peaklist << Mspire::Peak.new([weighted_mz, tot_intensity])
|
108
|
+
end
|
109
|
+
[final_peaklist, return_data]
|
110
|
+
end
|
111
|
+
|
112
|
+
# returns a new peak_list which has been merged with the others.
|
113
|
+
# opts[:resolution]) and then segment according to monotonicity (sharing
|
114
|
+
# intensity between abutting points). The final m/z is the weighted
|
115
|
+
# averaged of all the m/z's in each peak. Valid opts (with default listed
|
116
|
+
# first):
|
117
|
+
#
|
118
|
+
# :bin_width => 5
|
119
|
+
# :bin_unit => :ppm | :amu interpret bin_width as ppm or amu
|
120
|
+
# :bins => array of Mspire::Bin objects for custom bins (overides other bin options)
|
121
|
+
# :normalize => true if true, divides total intensity by
|
122
|
+
# number of spectra
|
123
|
+
# :return_data => false returns a parallel array containing
|
124
|
+
# the peaks associated with each returned peak
|
125
|
+
# :split => false | :share | :greedy_y see Mspire::Peak#split
|
126
|
+
# :centroided => true treat the data as centroided
|
127
|
+
#
|
128
|
+
# The binning algorithm is roughly the fastest possible algorithm that
|
129
|
+
# would allow for arbitrary, non-constant bin widths (a ratcheting
|
130
|
+
# algorithm O(n + m))
|
131
|
+
#
|
132
|
+
# Assumes the peaklists are already sorted by m/z.
|
133
|
+
def merge(peaklists, opts={})
|
134
|
+
opts = DEFAULT_MERGE.merge(opts)
|
135
|
+
|
136
|
+
(peaklist, returned_data) =
|
137
|
+
if opts[:centroided]
|
138
|
+
merge_centroids(peaklists, opts)
|
139
|
+
else
|
140
|
+
raise NotImplementedError, "need to implement profile merging"
|
141
|
+
end
|
142
|
+
|
143
|
+
if opts[:normalize]
|
144
|
+
sz = peaklists.size
|
145
|
+
peaklist.each {|peak| peak[1] = peak[1].to_f / sz }
|
146
|
+
end
|
147
|
+
if opts[:return_data]
|
148
|
+
$stderr.puts "returning peaklist (#{peaklist.size}) and data" if $VERBOSE
|
149
|
+
[peaklist, returned_data]
|
150
|
+
else
|
151
|
+
$stderr.puts "returning peaklist (#{peaklist.size})" if $VERBOSE
|
152
|
+
peaklist
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
|
158
|
+
# returns an Array of peaklist objects. Splits run of 1 or more local
|
159
|
+
# minima into multiple peaklists. When a point is 'shared' between two
|
160
|
+
# adjacent hill-ish areas, the choice of how to resolve multi-hills (runs
|
161
|
+
# of data above zero) is one of:
|
162
|
+
#
|
163
|
+
# false/nil => only split on zeros
|
164
|
+
# :share => give each peak its rightful portion of shared peaks, dividing the
|
165
|
+
# intensity based on the intensity of adjacent peaks
|
166
|
+
# :greedy_y => give the point to the peak with highest point next to
|
167
|
+
# the point in question. tie goes lower.
|
168
|
+
#
|
169
|
+
# if return_local_minima is true, a parallel array of local minima indices is
|
170
|
+
# returned (only makes sense if split_multipeaks is false)
|
171
|
+
#
|
172
|
+
# assumes that a new peak can be made with an array containing the x
|
173
|
+
# value and the y value.
|
174
|
+
def split(split_multipeaks=false, return_local_minima=false)
|
175
|
+
if split_multipeaks
|
176
|
+
(zeroed_peaks, local_min_ind_ar) = self.split(false, true)
|
177
|
+
$stderr.print "splitting on local minima ..." if $VERBOSE
|
178
|
+
no_local_minima_peaks = zeroed_peaks.zip(local_min_ind_ar).map do |peak, lm_indices|
|
179
|
+
new_peaks = [ peak.class.new ]
|
180
|
+
if lm_indices.size > 0
|
181
|
+
prev_lm_i = -1 # <- it's okay, we don't use until it is zero
|
182
|
+
lm_indices.each do |lm_i|
|
183
|
+
lm = peak[lm_i]
|
184
|
+
point_class = lm.class
|
185
|
+
|
186
|
+
# push onto the last peak all the points from right after the previous local min
|
187
|
+
# to just before this local min
|
188
|
+
new_peaks.last.push( *peak[(prev_lm_i+1)..(lm_i-1)] )
|
189
|
+
before_pnt = peak[lm_i-1]
|
190
|
+
after_pnt = peak[lm_i+1]
|
191
|
+
|
192
|
+
case split_multipeaks
|
193
|
+
when :share
|
194
|
+
sum = before_pnt[1] + after_pnt[1]
|
195
|
+
# push onto the last peak its portion of the local min
|
196
|
+
new_peaks.last << point_class.new( [lm[0], lm[1] * (before_pnt[1].to_f/sum)] )
|
197
|
+
# create a new peak that contains its portion of the local min
|
198
|
+
new_peaks << self.class.new( [point_class.new([lm[0], lm[1] * (after_pnt[1].to_f/sum)])] )
|
199
|
+
prev_lm_i = lm_i
|
200
|
+
when :greedy_y
|
201
|
+
if before_pnt[1] >= after_pnt[1]
|
202
|
+
new_peaks.last << lm
|
203
|
+
new_peaks << self.class.new
|
204
|
+
prev_lm_i = lm_i
|
205
|
+
else
|
206
|
+
new_peaks << self.class.new( [lm] )
|
207
|
+
prev_lm_i = lm_i
|
208
|
+
end
|
209
|
+
else
|
210
|
+
raise ArgumentError, "only recognize :share, :greedy_y, or false for the arg in #split(arg)"
|
211
|
+
end
|
212
|
+
end
|
213
|
+
new_peaks.last.push( *peak[(prev_lm_i+1)...peak.size] )
|
214
|
+
new_peaks
|
215
|
+
else
|
216
|
+
[peak]
|
217
|
+
end
|
218
|
+
end.flatten(1) # end zip
|
219
|
+
$stderr.puts "now #{no_local_minima_peaks.size} peaks." if $VERBOSE
|
220
|
+
no_local_minima_peaks
|
221
|
+
else
|
222
|
+
$stderr.print "splitting on zeros..." if $VERBOSE
|
223
|
+
# first, split the peaks based on zero intensity values
|
224
|
+
# and simultaneously keep track of the local minima within each
|
225
|
+
# resulting peak
|
226
|
+
peak_lists = []
|
227
|
+
local_min_ind_ar = []
|
228
|
+
in_peak = false
|
229
|
+
self.each_with_index do |peak, index|
|
230
|
+
previous_y = self[index - 1][1]
|
231
|
+
if peak[1] > 0
|
232
|
+
if !in_peak
|
233
|
+
in_peak = 0
|
234
|
+
peak_lists << self.class.new([peak])
|
235
|
+
local_min_ind_ar << []
|
236
|
+
else
|
237
|
+
peak_lists.last << peak
|
238
|
+
# if on_upslope(previous_y, point[1])
|
239
|
+
if previous_y < peak[1]
|
240
|
+
# If we were previously on a downslope and we are now on an upslope
|
241
|
+
# then the previous index is a local min
|
242
|
+
prev_previous_y = self[index - 2][1]
|
243
|
+
# on_downslope(prev_previous_y, previous_y)
|
244
|
+
if prev_previous_y > previous_y
|
245
|
+
# We have found a local min
|
246
|
+
local_min_ind_ar.last << (in_peak-1)
|
247
|
+
end
|
248
|
+
end # end if (upslope)
|
249
|
+
end # end if !in_peak
|
250
|
+
in_peak += 1
|
251
|
+
elsif in_peak
|
252
|
+
in_peak = false
|
253
|
+
end # end if point[1] > 0
|
254
|
+
end
|
255
|
+
$stderr.puts "#{peak_lists.size} no-whitespace-inside peak_lists." if $VERBOSE
|
256
|
+
return_local_minima ? [peak_lists, local_min_ind_ar] : peak_lists
|
257
|
+
end #
|
258
|
+
end # def split
|
259
|
+
end
|
260
|
+
end
|
data/lib/mspire/spectrum.rb
CHANGED
@@ -1,173 +1,19 @@
|
|
1
1
|
require 'mspire/spectrum_like'
|
2
|
-
require 'bsearch'
|
3
|
-
require 'mspire/bin'
|
4
|
-
require 'mspire/peak'
|
5
2
|
|
6
3
|
module Mspire
|
7
|
-
# note that a point is an [m/z, intensity] doublet.
|
8
|
-
# A peak is considered a related string of points
|
9
4
|
class Spectrum
|
10
5
|
include Mspire::SpectrumLike
|
11
6
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
}
|
19
|
-
|
20
|
-
class << self
|
21
|
-
|
22
|
-
def from_points(ar_of_doublets)
|
23
|
-
_mzs = []
|
24
|
-
_ints = []
|
25
|
-
ar_of_doublets.each do |mz, int|
|
26
|
-
_mzs << mz
|
27
|
-
_ints << int
|
28
|
-
end
|
29
|
-
self.new([_mzs, _ints])
|
30
|
-
end
|
31
|
-
|
32
|
-
|
33
|
-
# returns a new spectrum which has been merged with the others. If the
|
34
|
-
# spectra are centroided (just checks the first one and assumes the others
|
35
|
-
# are the same) then it will bin the points (bin width determined by
|
36
|
-
# opts[:resolution]) and then segment according to monotonicity (sharing
|
37
|
-
# intensity between abutting points). The final m/z is the weighted
|
38
|
-
# averaged of all the m/z's in each peak. Valid opts (with default listed
|
39
|
-
# first):
|
40
|
-
#
|
41
|
-
# :bin_width => 5
|
42
|
-
# :bin_unit => :ppm | :amu interpret bin_width as ppm or amu
|
43
|
-
# :bins => array of Bin objects for custom bins (overides other bin options)
|
44
|
-
# :normalize => false if true, divides total intensity by
|
45
|
-
# number of spectra
|
46
|
-
# :return_data => false returns a parallel array containing
|
47
|
-
# the peaks associated with each returned point
|
48
|
-
# :split => false | :share | :greedy_y see Mspire::Peak#split
|
49
|
-
#
|
50
|
-
# The binning algorithm is the fastest possible algorithm that would allow
|
51
|
-
# for arbitrary, non-constant bin widths (a ratcheting algorithm O(n + m))
|
52
|
-
def merge(spectra, opts={})
|
53
|
-
opt = DEFAULT_MERGE.merge(opts)
|
54
|
-
(spectrum, returned_data) =
|
55
|
-
unless spectra.first.centroided? == false
|
56
|
-
# find the min and max across all spectra
|
57
|
-
first_mzs = spectra.first.mzs
|
58
|
-
min = first_mzs.first ; max = first_mzs.last
|
59
|
-
spectra.each do |spectrum|
|
60
|
-
mzs = spectrum.mzs
|
61
|
-
min = mzs.first if mzs.first < min
|
62
|
-
max = mzs.last if mzs.last > max
|
63
|
-
end
|
64
|
-
|
65
|
-
# Create Bin objects
|
66
|
-
bins =
|
67
|
-
if opt[:bins]
|
68
|
-
opt[:bins]
|
69
|
-
else
|
70
|
-
divisions = []
|
71
|
-
bin_width = opt[:bin_width]
|
72
|
-
use_ppm = (opt[:bin_unit] == :ppm)
|
73
|
-
current_mz = min
|
74
|
-
loop do
|
75
|
-
if current_mz >= max
|
76
|
-
divisions << max
|
77
|
-
break
|
78
|
-
else
|
79
|
-
divisions << current_mz
|
80
|
-
current_mz += ( use_ppm ? current_mz./(1e6).*(bin_width) : bin_width )
|
81
|
-
end
|
82
|
-
end
|
83
|
-
# make each bin exclusive so there is no overlap
|
84
|
-
bins = divisions.each_cons(2).map {|pair| Bin.new(*pair, true) }
|
85
|
-
# make the last bin *inclusive* of the terminating value
|
86
|
-
bins[-1] = Bin.new(bins.last.begin, bins.last.end)
|
87
|
-
bins
|
88
|
-
end
|
89
|
-
|
90
|
-
spectra.each do |spectrum|
|
91
|
-
Bin.bin(bins, spectrum.points, &:first)
|
92
|
-
end
|
93
|
-
|
94
|
-
pseudo_points = bins.map do |bin|
|
95
|
-
#int = bin.data.reduce(0.0) {|sum,point| sum + point.last }.round(3) # <- just for info:
|
96
|
-
[bin, bin.data.reduce(0.0) {|sum,point| sum + point.last }]
|
97
|
-
end
|
98
|
-
|
99
|
-
#p_mzs = []
|
100
|
-
#p_ints = []
|
101
|
-
#p_num_points = []
|
102
|
-
#pseudo_points.each do |psp|
|
103
|
-
# p_mzs << ((psp.first.begin + psp.first.end)/2)
|
104
|
-
# p_ints << psp.last
|
105
|
-
# p_num_points << psp.first.data.size
|
106
|
-
#end
|
107
|
-
|
108
|
-
#File.write("file_#{opt[:bin_width]}_to_plot.txt", [p_mzs, p_ints, p_num_points].map {|ar| ar.join(' ') }.join("\n"))
|
109
|
-
#abort 'here'
|
110
|
-
|
111
|
-
|
112
|
-
peaks = Mspire::Peak.new(pseudo_points).split(opt[:split])
|
113
|
-
|
114
|
-
return_data = []
|
115
|
-
_mzs = [] ; _ints = []
|
116
|
-
|
117
|
-
#p peaks[97]
|
118
|
-
#puts "HIYA"
|
119
|
-
#abort 'here'
|
120
|
-
|
121
|
-
peaks.each_with_index do |peak,i|
|
122
|
-
#peaks.each do |peak|
|
123
|
-
tot_intensity = peak.map(&:last).reduce(:+)
|
124
|
-
return_data_per_peak = [] if opt[:return_data]
|
125
|
-
weighted_mz = 0.0
|
126
|
-
peak.each do |point|
|
127
|
-
pre_scaled_intensity = point[0].data.reduce(0.0) {|sum,v| sum + v.last }
|
128
|
-
post_scaled_intensity = point[1]
|
129
|
-
# some peaks may have been shared. In this case the intensity
|
130
|
-
# for that peak was downweighted. However, the actually data
|
131
|
-
# composing that peak is not altered when the intensity is
|
132
|
-
# shared. So, to calculate a proper weighted avg we need to
|
133
|
-
# downweight the intensity of any data point found within a bin
|
134
|
-
# whose intensity was scaled.
|
135
|
-
correction_factor =
|
136
|
-
if pre_scaled_intensity != post_scaled_intensity
|
137
|
-
post_scaled_intensity / pre_scaled_intensity
|
138
|
-
else
|
139
|
-
1.0
|
140
|
-
end
|
141
|
-
|
142
|
-
return_data_per_peak.push(*point[0].data) if opt[:return_data]
|
143
|
-
|
144
|
-
point[0].data.each do |lil_point|
|
145
|
-
weighted_mz += lil_point[0] * ( (lil_point[1].to_f * correction_factor) / tot_intensity)
|
146
|
-
end
|
147
|
-
end
|
148
|
-
return_data << return_data_per_peak if opt[:return_data]
|
149
|
-
_mzs << weighted_mz
|
150
|
-
_ints << tot_intensity
|
151
|
-
end
|
152
|
-
[Spectrum.new([_mzs, _ints]), return_data]
|
153
|
-
else
|
154
|
-
raise NotImplementedError, "the way to do this is interpolate the profile evenly and sum"
|
155
|
-
end
|
156
|
-
|
157
|
-
if opt[:normalize]
|
158
|
-
sz = spectra.size
|
159
|
-
spectrum.intensities.map! {|v| v.to_f / sz }
|
160
|
-
end
|
161
|
-
if opt[:return_data]
|
162
|
-
$stderr.puts "returning spectrum (#{spectrum.mzs.size}) and data" if $VERBOSE
|
163
|
-
[spectrum, return_data]
|
164
|
-
else
|
165
|
-
$stderr.puts "returning spectrum (#{spectrum.mzs.size})" if $VERBOSE
|
166
|
-
spectrum
|
167
|
-
end
|
7
|
+
def self.from_peaklist(peaklist)
|
8
|
+
_mzs = []
|
9
|
+
_ints = []
|
10
|
+
peaklist.each do |mz, int|
|
11
|
+
_mzs << mz
|
12
|
+
_ints << int
|
168
13
|
end
|
169
|
-
|
14
|
+
self.new([_mzs, _ints])
|
170
15
|
end
|
16
|
+
|
171
17
|
end
|
172
18
|
end
|
173
19
|
|
data/lib/mspire/spectrum_like.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'bsearch'
|
2
|
+
|
1
3
|
module Mspire
|
2
4
|
module SpectrumLike
|
3
5
|
include Enumerable
|
@@ -63,12 +65,12 @@ module Mspire
|
|
63
65
|
end
|
64
66
|
|
65
67
|
# yields(mz, inten) across the spectrum, or array of doublets if no block
|
66
|
-
def
|
68
|
+
def peaks(&block)
|
67
69
|
@data_arrays[0].zip(@data_arrays[1], &block)
|
68
70
|
end
|
69
71
|
|
70
|
-
alias_method :each, :
|
71
|
-
alias_method :
|
72
|
+
alias_method :each, :peaks
|
73
|
+
alias_method :each_peak, :peaks
|
72
74
|
|
73
75
|
# if the mzs and intensities are the same then the spectra are considered
|
74
76
|
# equal
|
@@ -91,9 +93,9 @@ module Mspire
|
|
91
93
|
# instruments are bad about this)
|
92
94
|
# returns self
|
93
95
|
def sort!
|
94
|
-
|
95
|
-
|
96
|
-
|
96
|
+
_peaks = peaks.to_a
|
97
|
+
_peaks.sort!
|
98
|
+
_peaks.each_with_index {|(mz,int), i| @data_arrays[0][i] = mz ; @data_arrays[1][i] = int }
|
97
99
|
self
|
98
100
|
end
|
99
101
|
|
@@ -134,9 +136,5 @@ module Mspire
|
|
134
136
|
find_all_nearest_index(val).map {|i| mzs[i] }
|
135
137
|
end
|
136
138
|
|
137
|
-
# uses Mspire::Spectrum.merge
|
138
|
-
def merge(other_spectra, opts={})
|
139
|
-
Mspire::Spectrum.merge([self, *other_spectra], opts)
|
140
|
-
end
|
141
139
|
end
|
142
140
|
end
|
data/lib/obo/ontology.rb
CHANGED
@@ -1,5 +1,35 @@
|
|
1
1
|
require 'obo' # the gem obo
|
2
2
|
|
3
|
+
module Obo
|
4
|
+
class Stanza
|
5
|
+
|
6
|
+
def cast_method
|
7
|
+
xref = @tagvalues['xref'].first
|
8
|
+
@cast_method =
|
9
|
+
if xref.nil? || (@cast_method == false)
|
10
|
+
false
|
11
|
+
else
|
12
|
+
if @cast_method
|
13
|
+
@cast_method
|
14
|
+
else
|
15
|
+
case xref[/value-type:xsd\\:([^\s]+) /, 1]
|
16
|
+
when 'float' ; :to_f
|
17
|
+
when 'int' ; :to_i
|
18
|
+
when 'string' ; :to_s
|
19
|
+
else ; false
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
# returns the value cast based on rules in first xref
|
26
|
+
# no casting performed if there is no xref
|
27
|
+
def cast(val)
|
28
|
+
@cast_method ? val.send(@cast_method) : val
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
3
33
|
module Obo
|
4
34
|
class Ontology
|
5
35
|
DIR = File.expand_path(File.dirname(__FILE__) + '/../../obo')
|
@@ -16,10 +46,16 @@ module Obo
|
|
16
46
|
def id_to_name
|
17
47
|
@id_to_name ||= build_hash('id', 'name')
|
18
48
|
end
|
49
|
+
|
50
|
+
def id_to_cast
|
51
|
+
@id_to_cast ||= Hash[ id_to_element.map {|id,el| [id, el.cast_method] } ]
|
52
|
+
end
|
53
|
+
|
19
54
|
# returns a name to id Hash
|
20
55
|
def name_to_id
|
21
56
|
@name_to_id ||= build_hash('name', 'id')
|
22
57
|
end
|
58
|
+
|
23
59
|
def id_to_element
|
24
60
|
@id_to_element ||= build_hash('id', nil)
|
25
61
|
end
|
data/mspire.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "mspire"
|
8
|
-
s.version = "0.7.
|
8
|
+
s.version = "0.7.8"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["John T. Prince", "Simon Chiang"]
|
12
|
-
s.date = "2012-03-
|
12
|
+
s.date = "2012-03-29"
|
13
13
|
s.description = "mass spectrometry proteomics, lipidomics, and tools, a rewrite of mspire, merging of ms-* gems"
|
14
14
|
s.email = "jtprince@gmail.com"
|
15
15
|
s.executables = ["mzml_to_imzml"]
|
@@ -31,7 +31,7 @@ Gem::Specification.new do |s|
|
|
31
31
|
"lib/merge.rb",
|
32
32
|
"lib/mspire.rb",
|
33
33
|
"lib/mspire/bin.rb",
|
34
|
-
"lib/mspire/cv.rb",
|
34
|
+
"lib/mspire/cv/obo.rb",
|
35
35
|
"lib/mspire/cv/param.rb",
|
36
36
|
"lib/mspire/cv/paramable.rb",
|
37
37
|
"lib/mspire/digester.rb",
|
@@ -102,7 +102,7 @@ Gem::Specification.new do |s|
|
|
102
102
|
"lib/mspire/mzml/spectrum_list.rb",
|
103
103
|
"lib/mspire/obo.rb",
|
104
104
|
"lib/mspire/peak.rb",
|
105
|
-
"lib/mspire/
|
105
|
+
"lib/mspire/peak_list.rb",
|
106
106
|
"lib/mspire/plms1.rb",
|
107
107
|
"lib/mspire/quant/qspec.rb",
|
108
108
|
"lib/mspire/quant/qspec/protein_group_comparison.rb",
|
@@ -123,7 +123,9 @@ Gem::Specification.new do |s|
|
|
123
123
|
"script/mzml_read_binary.rb",
|
124
124
|
"spec/cv/param_spec.rb",
|
125
125
|
"spec/mspire/bin_spec.rb",
|
126
|
+
"spec/mspire/cv/obo_spec.rb",
|
126
127
|
"spec/mspire/cv/param_spec.rb",
|
128
|
+
"spec/mspire/cv/paramable_spec.rb",
|
127
129
|
"spec/mspire/digester_spec.rb",
|
128
130
|
"spec/mspire/error_rate/qvalue_spec.rb",
|
129
131
|
"spec/mspire/fasta_spec.rb",
|
@@ -148,7 +150,7 @@ Gem::Specification.new do |s|
|
|
148
150
|
"spec/mspire/mzml/source_file_spec.rb",
|
149
151
|
"spec/mspire/mzml/spectrum_spec.rb",
|
150
152
|
"spec/mspire/mzml_spec.rb",
|
151
|
-
"spec/mspire/
|
153
|
+
"spec/mspire/peak_list_spec.rb",
|
152
154
|
"spec/mspire/plms1_spec.rb",
|
153
155
|
"spec/mspire/quant/qspec_spec.rb",
|
154
156
|
"spec/mspire/spectrum_spec.rb",
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
require 'mspire/cv/obo'
|
4
|
+
|
5
|
+
describe 'Mspire::CV::Obo' do
|
6
|
+
it 'finds names based on id' do
|
7
|
+
id_to_name = Mspire::CV::Obo::NAME
|
8
|
+
id_to_name.should be_a(Hash)
|
9
|
+
id_to_name['MS:1000005'].should == 'sample volume'
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'finds casts based on id' do
|
13
|
+
id_to_cast = Mspire::CV::Obo::CAST
|
14
|
+
id_to_cast.should be_a(Hash)
|
15
|
+
id_to_cast['MS:1000005'].should == :to_f
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
@@ -19,5 +19,17 @@ describe Mspire::CV::Param do
|
|
19
19
|
param2.unit.accession.should == 'UO:0000024'
|
20
20
|
end
|
21
21
|
|
22
|
+
it 'casts values based on info in the obo itself' do
|
23
|
+
param = Mspire::CV::Param['MS:1000005', "2.2"]
|
24
|
+
param.value.should == 2.2
|
25
|
+
param.value.should be_a(Float)
|
26
|
+
param = Mspire::CV::Param['MS:1000041', "3"]
|
27
|
+
param.value.should be_a(Integer)
|
28
|
+
param.value.should == 3
|
29
|
+
# doesn't get greedy about casting things like nil
|
30
|
+
param = Mspire::CV::Param['MS:1000041', nil]
|
31
|
+
param.value.should be_nil
|
32
|
+
end
|
33
|
+
|
22
34
|
end
|
23
35
|
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
require 'mspire/cv/paramable'
|
4
|
+
require 'mspire/cv/param'
|
5
|
+
require 'mspire/user_param'
|
6
|
+
require 'mspire/mzml/referenceable_param_group'
|
7
|
+
|
8
|
+
class MockObject
|
9
|
+
include Mspire::CV::Paramable
|
10
|
+
end
|
11
|
+
|
12
|
+
describe 'Mspire::CV::Paramable' do
|
13
|
+
|
14
|
+
subject do
|
15
|
+
paramable = MockObject.new :params => ['MS:1000007', ['MS:1000511', 2]]
|
16
|
+
paramable.user_params << Mspire::UserParam.new('hello', 477)
|
17
|
+
paramable.ref_param_groups << Mspire::Mzml::ReferenceableParamGroup.new('id1', :params => [['MS:1000512', 'filter string']])
|
18
|
+
paramable
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'can be initialized with different types of params' do
|
22
|
+
subject.cv_params.size.should == 2
|
23
|
+
subject.ref_param_groups.size.should == 1
|
24
|
+
subject.user_params.size.should == 1
|
25
|
+
end
|
26
|
+
|
27
|
+
it '#params grabs all params' do
|
28
|
+
params = subject.params
|
29
|
+
params.size.should == 4
|
30
|
+
params.map(&:class).uniq.size.should == 2
|
31
|
+
end
|
32
|
+
|
33
|
+
it '#params? asks if there are any' do
|
34
|
+
subject.params?.should be_true
|
35
|
+
end
|
36
|
+
|
37
|
+
it '#accessionable_params returns those with accession numbers' do
|
38
|
+
subject.accessionable_params.size.should == 3
|
39
|
+
end
|
40
|
+
|
41
|
+
it '#param finds the value or true if param name exists' do
|
42
|
+
# doesn't take accessions!
|
43
|
+
subject.fetch('MS:1000511').should be_false
|
44
|
+
subject.fetch('ms level').should == 2
|
45
|
+
subject.fetch('inlet type').should be_true
|
46
|
+
end
|
47
|
+
|
48
|
+
it '#params? tells if has any' do
|
49
|
+
subject.params?.should be_true
|
50
|
+
mine = subject.dup
|
51
|
+
[:cv_params, :user_params, :ref_param_groups].each do |key|
|
52
|
+
mine.send("#{key}=", [])
|
53
|
+
end
|
54
|
+
mine.params?.should be_false
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
data/spec/mspire/mzml_spec.rb
CHANGED
@@ -40,10 +40,11 @@ describe Mspire::Mzml do
|
|
40
40
|
|
41
41
|
it 'goes through spectrum with #each or #each_spectrum' do
|
42
42
|
mz_sizes = [20168, 315, 634]
|
43
|
-
|
43
|
+
centroided_list = [false, true, true]
|
44
44
|
@mzml.each do |spec|
|
45
45
|
spec.mzs.size.should == mz_sizes.shift
|
46
|
-
|
46
|
+
centroided = centroided_list.shift
|
47
|
+
spec.centroided?.should == centroided
|
47
48
|
end
|
48
49
|
end
|
49
50
|
|
@@ -71,7 +72,7 @@ describe Mspire::Mzml do
|
|
71
72
|
|
72
73
|
it 'can gracefully determine the m/z with highest peak in select scans' do
|
73
74
|
highest_mzs = Mspire::Mzml.foreach(@file).select {|v| v.ms_level > 1 }.map do |spec|
|
74
|
-
spec.
|
75
|
+
spec.peaks.sort_by(&:last).first.first
|
75
76
|
end
|
76
77
|
highest_mzs.map(&:round).should == [453, 866]
|
77
78
|
end
|
@@ -1,9 +1,9 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
|
+
require 'mspire/peak_list'
|
3
4
|
require 'mspire/peak'
|
4
|
-
require 'mspire/peak/point'
|
5
5
|
|
6
|
-
describe Mspire::
|
6
|
+
describe Mspire::PeakList do
|
7
7
|
|
8
8
|
describe '#split' do
|
9
9
|
|
@@ -25,11 +25,11 @@ describe Mspire::Peak do
|
|
25
25
|
mz += diff
|
26
26
|
end
|
27
27
|
@xs.map! {|mz| mz.round(2) }
|
28
|
-
@
|
28
|
+
@peaks = @xs.zip(@intensities).map {|pair| Mspire::Peak.new(pair) }
|
29
29
|
end
|
30
30
|
|
31
31
|
it 'splits on zeros by default' do
|
32
|
-
peak = Mspire::
|
32
|
+
peak = Mspire::PeakList.new(@peaks) # <- maybe more like a collection of peaks, but PeakList is flexible
|
33
33
|
peaks = peak.split
|
34
34
|
peaks.size.should == 4
|
35
35
|
peaks.should == [
|
@@ -46,17 +46,17 @@ describe Mspire::Peak do
|
|
46
46
|
|
47
47
|
# which it should since zeros are the ultimate local min!
|
48
48
|
it 'always cleans up surrounding zeros and does not split non-multipeaks' do
|
49
|
-
peak = Mspire::
|
49
|
+
peak = Mspire::PeakList.new(@peaks[0,7]) # simple
|
50
50
|
[:share, :greedy_y].each do |multipeak_split_method|
|
51
51
|
peaks = peak.split(multipeak_split_method)
|
52
|
-
peaks.first.should be_an_instance_of(Mspire::
|
52
|
+
peaks.first.should be_an_instance_of(Mspire::PeakList)
|
53
53
|
peaks.first.to_a.should == [[50.01, 3], [50.02, 8], [50.03, 9], [50.04, 7], [50.05, 2]]
|
54
54
|
end
|
55
55
|
end
|
56
56
|
|
57
57
|
it 'does #split(:share) and shares the peak proportional to adjacent peaks' do
|
58
58
|
data = [[50.07, 0], [50.08, 3], [50.09, 8], [50.1, 2], [50.11, 9], [50.12, 7], [50.13, 1], [50.14, 3], [50.15, 0]]
|
59
|
-
multipeak1 = Mspire::
|
59
|
+
multipeak1 = Mspire::PeakList.new( data )
|
60
60
|
|
61
61
|
answer = [
|
62
62
|
[[50.08, 3], [50.09, 8], [50.1, (2*8.0/17)]],
|
@@ -79,12 +79,34 @@ describe Mspire::Peak do
|
|
79
79
|
]
|
80
80
|
|
81
81
|
# test a tie -> goes left!
|
82
|
-
|
83
|
-
|
84
|
-
multipeak2 = Mspire::
|
82
|
+
peaks = @peaks[7,9]
|
83
|
+
peaks[2] = Mspire::Peak.new([peaks[2][0], 9])
|
84
|
+
multipeak2 = Mspire::PeakList.new( peaks )
|
85
85
|
multipeak2.split(:greedy_y).should == answer
|
86
86
|
|
87
87
|
end
|
88
88
|
end
|
89
89
|
|
90
|
+
describe '#merge' do
|
91
|
+
|
92
|
+
subject do
|
93
|
+
|
94
|
+
list1 = [[10.1, 1], [10.5, 2], [10.7, 3], [11.5, 4]]
|
95
|
+
list2 = [[10.11, 5], [10.49, 6], [10.71, 7], [11.48, 8]]
|
96
|
+
list3 = [[10.09, 9], [10.51, 10], [10.72, 11], [11.51, 12]]
|
97
|
+
|
98
|
+
[list1, list2, list3].map {|peaks| Mspire::PeakList.new( peaks ) }
|
99
|
+
end
|
100
|
+
|
101
|
+
it 'merges, giving exact weighted average m/z values for each cluster' do
|
102
|
+
(peaklist1, data) = Mspire::PeakList.merge(subject, :bin_width => 0.08, :bin_unit => :amu, :return_data => true)
|
103
|
+
peaklist2 = Mspire::PeakList.merge(subject, :bin_width => 0.08, :bin_unit => :amu)
|
104
|
+
peaklist1.should == peaklist2
|
105
|
+
peaks = [[10.097333333333331, 10.502222222222223, 10.713809523809525, 11.498333333333333], [5.0, 6.0, 7.0, 8.0]].transpose
|
106
|
+
peaklist1.should == Mspire::PeakList.new(peaks)
|
107
|
+
data.should == [[[10.1, 1], [10.11, 5], [10.09, 9]], [[10.5, 2], [10.49, 6], [10.51, 10]], [[10.7, 3], [10.71, 7], [10.72, 11]], [[11.5, 4], [11.48, 8], [11.51, 12]]]
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
90
111
|
end
|
112
|
+
|
@@ -44,22 +44,4 @@ describe Mspire::Spectrum do
|
|
44
44
|
spec.intensities.should == subject.intensities
|
45
45
|
end
|
46
46
|
end
|
47
|
-
|
48
|
-
describe 'merging spectra' do
|
49
|
-
subject do
|
50
|
-
data = [ [10.10, 10.5, 10.7, 11.5], [1, 2, 3, 4] ],
|
51
|
-
[ [10.11, 10.49, 10.71, 11.48], [5, 6, 7, 8] ],
|
52
|
-
[ [10.09, 10.51, 10.72, 11.51], [9, 10, 11, 12]
|
53
|
-
]
|
54
|
-
data.map {|datum| Mspire::Spectrum.new( datum ) }
|
55
|
-
end
|
56
|
-
it 'merges, giving exact weighted average m/z values for each cluster' do
|
57
|
-
(spec1, data) = Mspire::Spectrum.merge(subject, :bin_width => 0.08, :bin_unit => :amu, :return_data => true)
|
58
|
-
spec2 = Mspire::Spectrum.merge(subject, :bin_width => 0.08, :bin_unit => :amu)
|
59
|
-
spec1.should == spec2
|
60
|
-
spec1.should == Mspire::Spectrum.new([[10.097333333333331, 10.502222222222223, 10.713809523809525, 11.498333333333333], [5.0, 6.0, 7.0, 8.0]])
|
61
|
-
data.should == [[[10.1, 1], [10.11, 5], [10.09, 9]], [[10.5, 2], [10.49, 6], [10.51, 10]], [[10.7, 3], [10.71, 7], [10.72, 11]], [[11.5, 4], [11.48, 8], [11.51, 12]]]
|
62
|
-
end
|
63
|
-
end
|
64
|
-
|
65
47
|
end
|
data/spec/obo_spec.rb
CHANGED
@@ -5,6 +5,7 @@ require 'obo/ims'
|
|
5
5
|
require 'obo/unit'
|
6
6
|
|
7
7
|
describe 'accessing a specific Obo::Ontology' do
|
8
|
+
|
8
9
|
it 'can access MS obo' do
|
9
10
|
Obo::MS.id_to_name['MS:1000004'].should == 'sample mass'
|
10
11
|
Obo::MS.name_to_id['sample mass'].should == 'MS:1000004'
|
@@ -22,4 +23,15 @@ describe 'accessing a specific Obo::Ontology' do
|
|
22
23
|
Obo::Unit.name_to_id['temperature unit'].should == 'UO:0000005'
|
23
24
|
Obo::Unit.id_to_element['UO:0000005'].should be_a(Obo::Stanza)
|
24
25
|
end
|
26
|
+
|
27
|
+
end
|
28
|
+
|
29
|
+
describe 'Obo::Stanza' do
|
30
|
+
it 'can properly cast values' do
|
31
|
+
Obo::MS.id_to_element['MS:1000511'].cast('1').should == 1
|
32
|
+
Obo::MS.id_to_element['MS:1000004'].cast('2.2').should == 2.2
|
33
|
+
# don't ask me why mass resolution is cast into a string, but it is!
|
34
|
+
Obo::MS.id_to_element['MS:1000011'].cast('2.2').should == '2.2'
|
35
|
+
Obo::MS.id_to_element['MS:1000018'].cast('low to high').should == 'low to high'
|
36
|
+
end
|
25
37
|
end
|
@@ -67,7 +67,7 @@
|
|
67
67
|
<selectedIonList count="1">
|
68
68
|
<cvParam cvRef="MS" accession="MS:1000744" name="selected ion m/z" value="2.0"/>
|
69
69
|
<cvParam cvRef="MS" accession="MS:1000041" name="charge state" value="2"/>
|
70
|
-
<cvParam cvRef="MS" accession="MS:1000042" name="peak intensity" value="5"/>
|
70
|
+
<cvParam cvRef="MS" accession="MS:1000042" name="peak intensity" value="5.0"/>
|
71
71
|
</selectedIonList>
|
72
72
|
</precursor>
|
73
73
|
</precursorList>
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mspire
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
4
|
+
version: 0.7.8
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2012-03-
|
13
|
+
date: 2012-03-29 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: nokogiri
|
@@ -195,7 +195,7 @@ files:
|
|
195
195
|
- lib/merge.rb
|
196
196
|
- lib/mspire.rb
|
197
197
|
- lib/mspire/bin.rb
|
198
|
-
- lib/mspire/cv.rb
|
198
|
+
- lib/mspire/cv/obo.rb
|
199
199
|
- lib/mspire/cv/param.rb
|
200
200
|
- lib/mspire/cv/paramable.rb
|
201
201
|
- lib/mspire/digester.rb
|
@@ -266,7 +266,7 @@ files:
|
|
266
266
|
- lib/mspire/mzml/spectrum_list.rb
|
267
267
|
- lib/mspire/obo.rb
|
268
268
|
- lib/mspire/peak.rb
|
269
|
-
- lib/mspire/
|
269
|
+
- lib/mspire/peak_list.rb
|
270
270
|
- lib/mspire/plms1.rb
|
271
271
|
- lib/mspire/quant/qspec.rb
|
272
272
|
- lib/mspire/quant/qspec/protein_group_comparison.rb
|
@@ -287,7 +287,9 @@ files:
|
|
287
287
|
- script/mzml_read_binary.rb
|
288
288
|
- spec/cv/param_spec.rb
|
289
289
|
- spec/mspire/bin_spec.rb
|
290
|
+
- spec/mspire/cv/obo_spec.rb
|
290
291
|
- spec/mspire/cv/param_spec.rb
|
292
|
+
- spec/mspire/cv/paramable_spec.rb
|
291
293
|
- spec/mspire/digester_spec.rb
|
292
294
|
- spec/mspire/error_rate/qvalue_spec.rb
|
293
295
|
- spec/mspire/fasta_spec.rb
|
@@ -312,7 +314,7 @@ files:
|
|
312
314
|
- spec/mspire/mzml/source_file_spec.rb
|
313
315
|
- spec/mspire/mzml/spectrum_spec.rb
|
314
316
|
- spec/mspire/mzml_spec.rb
|
315
|
-
- spec/mspire/
|
317
|
+
- spec/mspire/peak_list_spec.rb
|
316
318
|
- spec/mspire/plms1_spec.rb
|
317
319
|
- spec/mspire/quant/qspec_spec.rb
|
318
320
|
- spec/mspire/spectrum_spec.rb
|
data/lib/mspire/cv.rb
DELETED
data/lib/mspire/peak/point.rb
DELETED
@@ -1,13 +0,0 @@
|
|
1
|
-
|
2
|
-
module Mspire
|
3
|
-
class Peak
|
4
|
-
# A point is typically a doublet: an x value and a y value. In a spectrum
|
5
|
-
# this will be an m/z and intensity. In a chromatogram this will be a
|
6
|
-
# retention time and an intensity. (This class can be subclassed if
|
7
|
-
# desired)
|
8
|
-
class Point < Array
|
9
|
-
alias_method :x, :first
|
10
|
-
alias_method :y, :last
|
11
|
-
end
|
12
|
-
end
|
13
|
-
end
|