ms-quant 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/LICENSE +21 -0
- data/README.rdoc +9 -0
- data/Rakefile +49 -0
- data/bin/peptide_hit_qvalues_to_spectral_counts_table.rb +126 -0
- data/lib/ms/quant/spectral_counts.rb +35 -0
- data/lib/ms-quant.rb +0 -0
- data/spec/ms/quant/spectral_counts_spec.rb +67 -0
- data/spec/spec_helper.rb +7 -0
- metadata +129 -0
data/.document
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
Copyright (c) 2011 Brigham Young University
|
2
|
+
authored by John T. Prince
|
3
|
+
|
4
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
5
|
+
a copy of this software and associated documentation files (the
|
6
|
+
"Software"), to deal in the Software without restriction, including
|
7
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
8
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
+
permit persons to whom the Software is furnished to do so, subject to
|
10
|
+
the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be
|
13
|
+
included in all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
18
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
19
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
20
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
21
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
data/Rakefile
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
require 'jeweler'
|
5
|
+
Jeweler::Tasks.new do |gem|
|
6
|
+
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
7
|
+
gem.name = "ms-quant"
|
8
|
+
gem.homepage = "http://github.com/princelab/ms-quant"
|
9
|
+
gem.license = "MIT"
|
10
|
+
gem.summary = %Q{quantitation of mass spectrometry datasets (proteomic, metabolomic/lipidomic)}
|
11
|
+
gem.description = %Q{quantitation of mass spectrometry datasets (proteomic, metabolomic/lipidomic). Not related to MSQuant}
|
12
|
+
gem.email = "jtprince@gmail.com"
|
13
|
+
gem.authors = ["John T. Prince"]
|
14
|
+
# Include your dependencies below. Runtime dependencies are required when using your gem,
|
15
|
+
# and development dependencies are only needed for development (ie running rake tasks, tests, etc)
|
16
|
+
# gem.add_runtime_dependency 'jabber4r', '> 0.1'
|
17
|
+
# gem.add_development_dependency 'rspec', '> 1.2.3'
|
18
|
+
gem.add_runtime_dependency 'ms-ident', ">= 0.0.19"
|
19
|
+
gem.add_development_dependency "spec-more", ">= 0"
|
20
|
+
gem.add_development_dependency "jeweler", "~> 1.5.2"
|
21
|
+
gem.add_development_dependency "rcov", ">= 0"
|
22
|
+
end
|
23
|
+
Jeweler::RubygemsDotOrgTasks.new
|
24
|
+
|
25
|
+
require 'rake/testtask'
|
26
|
+
Rake::TestTask.new(:spec) do |spec|
|
27
|
+
spec.libs << 'lib' << 'spec'
|
28
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
29
|
+
spec.verbose = true
|
30
|
+
end
|
31
|
+
|
32
|
+
require 'rcov/rcovtask'
|
33
|
+
Rcov::RcovTask.new do |spec|
|
34
|
+
spec.libs << 'spec'
|
35
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
36
|
+
spec.verbose = true
|
37
|
+
end
|
38
|
+
|
39
|
+
task :default => :spec
|
40
|
+
|
41
|
+
require 'rake/rdoctask'
|
42
|
+
Rake::RDocTask.new do |rdoc|
|
43
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
44
|
+
|
45
|
+
rdoc.rdoc_dir = 'rdoc'
|
46
|
+
rdoc.title = "ms-quant #{version}"
|
47
|
+
rdoc.rdoc_files.include('README*')
|
48
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
49
|
+
end
|
@@ -0,0 +1,126 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'ms/ident/peptide_hit/qvalue'
|
4
|
+
require 'ms/ident/protein_hit'
|
5
|
+
require 'ms/ident/peptide/db'
|
6
|
+
require 'ms/quant/spectral_counts'
|
7
|
+
|
8
|
+
require 'trollop'
|
9
|
+
|
10
|
+
def putsv(*args)
|
11
|
+
if $VERBOSE
|
12
|
+
puts(*args) ; $stdout.flush
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
opts = Trollop::Parser.new do
|
17
|
+
banner %Q{usage: #{File.basename(__FILE__)} peptide_centric_db.yml, file1.psq ...
|
18
|
+
}
|
19
|
+
opt :names, "array of names for the table (otherwise filenames)", :type => String
|
20
|
+
opt :fdr_percent, "%FDR as cutoff", :default => 1.0
|
21
|
+
opt :write_subset, "(development) write subset db", :default => false
|
22
|
+
end
|
23
|
+
|
24
|
+
opt = opts.parse(ARGV)
|
25
|
+
|
26
|
+
if ARGV.size < 2
|
27
|
+
opts.educate && exit
|
28
|
+
end
|
29
|
+
|
30
|
+
peptide_centric_db_file = ARGV.shift
|
31
|
+
|
32
|
+
opt[:names] ||= ARGV.map do |file|
|
33
|
+
base = file.chomp(File.extname(file))
|
34
|
+
base=base.chomp(File.extname(base)) if File.extname(base) == '.phq'
|
35
|
+
base
|
36
|
+
end
|
37
|
+
|
38
|
+
class Ms::Ident::PeptideHit
|
39
|
+
attr_accessor :experiment_name
|
40
|
+
end
|
41
|
+
fdr_cutoff = opt[:fdr_percent] / 100
|
42
|
+
|
43
|
+
start=Time.now
|
44
|
+
|
45
|
+
$VERBOSE = true
|
46
|
+
|
47
|
+
ar_of_peptide_hit_ars = Ms::Ident::Peptide::Db::IO.open(peptide_centric_db_file) do |peptide_to_proteins|
|
48
|
+
putsv "#{Time.now-start} seconds to read #{peptide_centric_db_file}"
|
49
|
+
ARGV.zip(opt[:names]).map do |file,exp|
|
50
|
+
peptide_hits = Ms::Ident::PeptideHit::Qvalue.from_file(file)
|
51
|
+
putsv "#{file}: #{peptide_hits.size} hits"
|
52
|
+
peptide_hits.select! do |hit|
|
53
|
+
if hit.qvalue <= fdr_cutoff
|
54
|
+
# update each peptide with its protein hits
|
55
|
+
prot_ids = peptide_to_proteins[hit.aaseq]
|
56
|
+
if prot_ids
|
57
|
+
hit.experiment_name = exp
|
58
|
+
hit.proteins = prot_ids
|
59
|
+
else ; false end
|
60
|
+
else
|
61
|
+
false
|
62
|
+
end
|
63
|
+
end
|
64
|
+
peptide_hits
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
if opt[:write_subset]
|
69
|
+
aaseqs_to_prots = {}
|
70
|
+
ar_of_peptide_hit_ars.each do |pephits|
|
71
|
+
pephits.each do |pephit|
|
72
|
+
aaseqs_to_prots[pephit.aaseq] = pephit.proteins
|
73
|
+
end
|
74
|
+
end
|
75
|
+
outfile = "peptidecentric_subset.yml"
|
76
|
+
puts "writing #{outfile} with #{aaseqs_to_prots.size} aaseq->protids"
|
77
|
+
File.open(outfile,'w') do |out|
|
78
|
+
aaseqs_to_prots.each do |k,v|
|
79
|
+
out.puts(%Q{#{k}: #{v.map(&:id).join("\t") }})
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
$VERBOSE = true
|
85
|
+
if $VERBOSE
|
86
|
+
opt[:names].zip(ar_of_peptide_hit_ars) do |name, pep_ar|
|
87
|
+
puts "#{name}: #{pep_ar.size}"
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
all_peptide_hits = ar_of_peptide_hit_ars.flatten(1)
|
92
|
+
|
93
|
+
|
94
|
+
# because peptide_hit#proteins yields id strings (which hash properly),
|
95
|
+
# each protein group is an array of
|
96
|
+
protein_groups = Ms::Ident::ProteinGroup.peptide_hits_to_protein_groups(all_peptide_hits)
|
97
|
+
|
98
|
+
pephit_to_protein_groups = Hash.new {|h,k| h[k] = [] }
|
99
|
+
protein_groups.each do |protein_group|
|
100
|
+
protein_group.peptide_hits.each {|hit| pephit_to_protein_groups[hit] << protein_group }
|
101
|
+
end
|
102
|
+
|
103
|
+
# partition them all out by filename
|
104
|
+
|
105
|
+
ar_of_count_data = opt[:names].map do |name|
|
106
|
+
pep_hit_to_prot_groups = Hash.new {|h,k| h[k] = [] }
|
107
|
+
groups_of_pephits = protein_groups.map do |prot_group|
|
108
|
+
pep_hits = prot_group.peptide_hits.select {|hit| hit.experiment_name == name }
|
109
|
+
pep_hits.each do |pep_hit|
|
110
|
+
pep_hit_to_prot_groups[pep_hit] << prot_group
|
111
|
+
end # returns the group of pep_hits
|
112
|
+
end
|
113
|
+
counts = Ms::Quant::SpectralCounts.counts(groups_of_pephits) # do |pephit|
|
114
|
+
# pephit_to_protein_groups[pephit].size
|
115
|
+
#end
|
116
|
+
end
|
117
|
+
|
118
|
+
# protein_groups
|
119
|
+
# [ ar_of_counts_for_exp1, ar_of_counts_for_exp2, ar_of_counts_for_exp3 ]
|
120
|
+
|
121
|
+
protein_groups.zip(*ar_of_count_data) do |row|
|
122
|
+
pg = row.shift
|
123
|
+
puts (row.map(&:to_a).flatten + pg.to_a).join("\t")
|
124
|
+
end
|
125
|
+
|
126
|
+
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'set'
|
2
|
+
require 'ms/ident/protein_group'
|
3
|
+
|
4
|
+
module Ms
|
5
|
+
module Quant
|
6
|
+
module SpectralCounts
|
7
|
+
Counts = Struct.new(:spectral, :aaseqcharge, :aaseq)
|
8
|
+
|
9
|
+
# returns a parallel array of Count objects. If split_hits then counts
|
10
|
+
# are split between groups sharing the hit. peptide_hits must respond
|
11
|
+
# to :charge and :aaseq. If split_hits, then each peptide_hit must
|
12
|
+
# respond to :linked_to yielding an object with a :size reflective of
|
13
|
+
# the number of shared peptide_hits.
|
14
|
+
def self.counts(groups_of_peptide_hits, &share_the_pephit)
|
15
|
+
groups_of_peptide_hits.map do |peptide_hits|
|
16
|
+
uniq_aaseq = {}
|
17
|
+
uniq_aaseq_charge = {}
|
18
|
+
linked_sizes = peptide_hits.map do |hit|
|
19
|
+
linked_to_size = share_the_pephit ? share_the_pephit.call(hit) : 1
|
20
|
+
# these guys will end up clobbering themselves, but the
|
21
|
+
# linked_to_size should be consistent if the key is the same
|
22
|
+
uniq_aaseq_charge[[hit.aaseq, hit.charge]] = linked_to_size
|
23
|
+
uniq_aaseq[hit.aaseq] = linked_to_size
|
24
|
+
linked_to_size
|
25
|
+
end
|
26
|
+
counts_data = [linked_sizes, uniq_aaseq_charge.values, uniq_aaseq.values].map do |array|
|
27
|
+
share_the_pephit ? array.inject(0.0) {|sum,size| sum+=(1.0/size) } : array.size
|
28
|
+
end
|
29
|
+
Counts.new(*counts_data)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
data/lib/ms-quant.rb
ADDED
File without changes
|
@@ -0,0 +1,67 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
require 'ms/quant/spectral_counts'
|
4
|
+
|
5
|
+
|
6
|
+
|
7
|
+
PeptideHit = Struct.new(:aaseq, :charge, :proteins) do
|
8
|
+
def initialize(*args)
|
9
|
+
super(*args)
|
10
|
+
self.proteins ||= []
|
11
|
+
end
|
12
|
+
def inspect # easier to read output
|
13
|
+
"<PeptideHit aaseq=#{self.aaseq} charge=#{self.charge} proteins(ids)=#{self.proteins.map(&:id).join(',')}>"
|
14
|
+
end
|
15
|
+
def hash ; self.object_id end
|
16
|
+
end
|
17
|
+
ProteinHit = Struct.new(:id, :peptide_hits) do
|
18
|
+
def inspect # easier to read output
|
19
|
+
"<Prt #{self.id}>"
|
20
|
+
end
|
21
|
+
def hash ; self.object_id end
|
22
|
+
end
|
23
|
+
|
24
|
+
describe 'groups of peptide hits' do
|
25
|
+
before do
|
26
|
+
@pep_hits = [
|
27
|
+
['AABBCCDD', 2], #bg,mg1,mg2 0.33
|
28
|
+
['BBCC', 2], #bg,mg1,mg2 0.33
|
29
|
+
['DDEEFFGG', 2], #bg,mg1,mg2,sbm 0.25
|
30
|
+
['DDEEFFGG', 2], #bg,mg1,mg2,sbm 0.25
|
31
|
+
['DDEEFFGG', 3], #bg,mg1,mg2,sbm 0.25
|
32
|
+
['HIYA', 2], #bg,lg 0.5
|
33
|
+
].map {|ar| PeptideHit.new(*ar) }
|
34
|
+
@prot_hits = {
|
35
|
+
# spectral_counts, aaseq+charge counts, aaseq counts
|
36
|
+
'big_guy' => @pep_hits, # 6, 5, 4;
|
37
|
+
'little_guy' => [@pep_hits.last], # 1, 1, 1, 0.5, 0.5, 0.5
|
38
|
+
'medium_guy1' => @pep_hits[0,5], # 5, 4, 3
|
39
|
+
'medium_guy2' => @pep_hits[0,5], # 5, 4, 3
|
40
|
+
'subsumed_by_medium' => @pep_hits[2,3], # 3, 2, 1
|
41
|
+
}.map {|data| ProteinHit.new(*data) }
|
42
|
+
# doubly linked for this
|
43
|
+
@prot_hits.each do |prot|
|
44
|
+
prot.peptide_hits.each {|pephit| pephit.proteins << prot }
|
45
|
+
end
|
46
|
+
# DEPENDS ON AN ORDERED HASH (RUBY 1.9!!!!)
|
47
|
+
@expected_counts = [ [6,5,4], [1,1,1], [5,4,3], [5,4,3], [3,2,1] ]
|
48
|
+
@expected_counts_split = [ [1.9167,1.6667,1.4167], [0.5,0.5,0.5], [1.41667, 1.1667, 0.91667], [1.41667, 1.16667, 0.91667], [0.75, 0.5, 0.25] ]
|
49
|
+
end
|
50
|
+
|
51
|
+
it 'finds spectral counts (without sharing)' do
|
52
|
+
groups_of_pephits = @prot_hits.map(&:peptide_hits)
|
53
|
+
counts = Ms::Quant::SpectralCounts.counts(groups_of_pephits)
|
54
|
+
@expected_counts.zip(counts) do |exp, act|
|
55
|
+
act.to_a.is exp
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
it 'finds spectral counts (splitting counts between shared)' do
|
60
|
+
groups_of_pephits = @prot_hits.map(&:peptide_hits)
|
61
|
+
counts = Ms::Quant::SpectralCounts.counts(groups_of_pephits) {|pephit| pephit.proteins.size }
|
62
|
+
@expected_counts_split.zip(counts) do |exp, act|
|
63
|
+
exp.zip(act) {|e,a| a.should.be.close e, 0.0001 }
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
end
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,129 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: ms-quant
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
version: 0.0.1
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- John T. Prince
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2011-03-30 00:00:00 -06:00
|
18
|
+
default_executable: peptide_hit_qvalues_to_spectral_counts_table.rb
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: ms-ident
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
25
|
+
requirements:
|
26
|
+
- - ">="
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
segments:
|
29
|
+
- 0
|
30
|
+
- 0
|
31
|
+
- 19
|
32
|
+
version: 0.0.19
|
33
|
+
type: :runtime
|
34
|
+
version_requirements: *id001
|
35
|
+
- !ruby/object:Gem::Dependency
|
36
|
+
name: spec-more
|
37
|
+
prerelease: false
|
38
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
segments:
|
44
|
+
- 0
|
45
|
+
version: "0"
|
46
|
+
type: :development
|
47
|
+
version_requirements: *id002
|
48
|
+
- !ruby/object:Gem::Dependency
|
49
|
+
name: jeweler
|
50
|
+
prerelease: false
|
51
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
52
|
+
none: false
|
53
|
+
requirements:
|
54
|
+
- - ~>
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
segments:
|
57
|
+
- 1
|
58
|
+
- 5
|
59
|
+
- 2
|
60
|
+
version: 1.5.2
|
61
|
+
type: :development
|
62
|
+
version_requirements: *id003
|
63
|
+
- !ruby/object:Gem::Dependency
|
64
|
+
name: rcov
|
65
|
+
prerelease: false
|
66
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
67
|
+
none: false
|
68
|
+
requirements:
|
69
|
+
- - ">="
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
segments:
|
72
|
+
- 0
|
73
|
+
version: "0"
|
74
|
+
type: :development
|
75
|
+
version_requirements: *id004
|
76
|
+
description: quantitation of mass spectrometry datasets (proteomic, metabolomic/lipidomic). Not related to MSQuant
|
77
|
+
email: jtprince@gmail.com
|
78
|
+
executables:
|
79
|
+
- peptide_hit_qvalues_to_spectral_counts_table.rb
|
80
|
+
extensions: []
|
81
|
+
|
82
|
+
extra_rdoc_files:
|
83
|
+
- LICENSE
|
84
|
+
- README.rdoc
|
85
|
+
files:
|
86
|
+
- .document
|
87
|
+
- LICENSE
|
88
|
+
- README.rdoc
|
89
|
+
- Rakefile
|
90
|
+
- bin/peptide_hit_qvalues_to_spectral_counts_table.rb
|
91
|
+
- lib/ms-quant.rb
|
92
|
+
- lib/ms/quant/spectral_counts.rb
|
93
|
+
- spec/ms/quant/spectral_counts_spec.rb
|
94
|
+
- spec/spec_helper.rb
|
95
|
+
has_rdoc: true
|
96
|
+
homepage: http://github.com/princelab/ms-quant
|
97
|
+
licenses:
|
98
|
+
- MIT
|
99
|
+
post_install_message:
|
100
|
+
rdoc_options: []
|
101
|
+
|
102
|
+
require_paths:
|
103
|
+
- lib
|
104
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
105
|
+
none: false
|
106
|
+
requirements:
|
107
|
+
- - ">="
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
segments:
|
110
|
+
- 0
|
111
|
+
version: "0"
|
112
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
113
|
+
none: false
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
segments:
|
118
|
+
- 0
|
119
|
+
version: "0"
|
120
|
+
requirements: []
|
121
|
+
|
122
|
+
rubyforge_project:
|
123
|
+
rubygems_version: 1.3.7
|
124
|
+
signing_key:
|
125
|
+
specification_version: 3
|
126
|
+
summary: quantitation of mass spectrometry datasets (proteomic, metabolomic/lipidomic)
|
127
|
+
test_files:
|
128
|
+
- spec/ms/quant/spectral_counts_spec.rb
|
129
|
+
- spec/spec_helper.rb
|