ms-quant 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ Copyright (c) 2011 Brigham Young University
2
+ authored by John T. Prince
3
+
4
+ Permission is hereby granted, free of charge, to any person obtaining
5
+ a copy of this software and associated documentation files (the
6
+ "Software"), to deal in the Software without restriction, including
7
+ without limitation the rights to use, copy, modify, merge, publish,
8
+ distribute, sublicense, and/or sell copies of the Software, and to
9
+ permit persons to whom the Software is furnished to do so, subject to
10
+ the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be
13
+ included in all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,9 @@
1
+ = ms-quant
2
+
3
+ Tools for the quantitation of mass spectrometry proteomics experiments. This
4
+ is not associated with {MSQuant}[http://msquant.sourceforge.net] although the
5
+ goals are similar.
6
+
7
+ == Copyright
8
+
9
+ See LICENSE
data/Rakefile ADDED
@@ -0,0 +1,49 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ require 'jeweler'
5
+ Jeweler::Tasks.new do |gem|
6
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
7
+ gem.name = "ms-quant"
8
+ gem.homepage = "http://github.com/princelab/ms-quant"
9
+ gem.license = "MIT"
10
+ gem.summary = %Q{quantitation of mass spectrometry datasets (proteomic, metabolomic/lipidomic)}
11
+ gem.description = %Q{quantitation of mass spectrometry datasets (proteomic, metabolomic/lipidomic). Not related to MSQuant}
12
+ gem.email = "jtprince@gmail.com"
13
+ gem.authors = ["John T. Prince"]
14
+ # Include your dependencies below. Runtime dependencies are required when using your gem,
15
+ # and development dependencies are only needed for development (ie running rake tasks, tests, etc)
16
+ # gem.add_runtime_dependency 'jabber4r', '> 0.1'
17
+ # gem.add_development_dependency 'rspec', '> 1.2.3'
18
+ gem.add_runtime_dependency 'ms-ident', ">= 0.0.19"
19
+ gem.add_development_dependency "spec-more", ">= 0"
20
+ gem.add_development_dependency "jeweler", "~> 1.5.2"
21
+ gem.add_development_dependency "rcov", ">= 0"
22
+ end
23
+ Jeweler::RubygemsDotOrgTasks.new
24
+
25
+ require 'rake/testtask'
26
+ Rake::TestTask.new(:spec) do |spec|
27
+ spec.libs << 'lib' << 'spec'
28
+ spec.pattern = 'spec/**/*_spec.rb'
29
+ spec.verbose = true
30
+ end
31
+
32
+ require 'rcov/rcovtask'
33
+ Rcov::RcovTask.new do |spec|
34
+ spec.libs << 'spec'
35
+ spec.pattern = 'spec/**/*_spec.rb'
36
+ spec.verbose = true
37
+ end
38
+
39
+ task :default => :spec
40
+
41
+ require 'rake/rdoctask'
42
+ Rake::RDocTask.new do |rdoc|
43
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
44
+
45
+ rdoc.rdoc_dir = 'rdoc'
46
+ rdoc.title = "ms-quant #{version}"
47
+ rdoc.rdoc_files.include('README*')
48
+ rdoc.rdoc_files.include('lib/**/*.rb')
49
+ end
@@ -0,0 +1,126 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'ms/ident/peptide_hit/qvalue'
4
+ require 'ms/ident/protein_hit'
5
+ require 'ms/ident/peptide/db'
6
+ require 'ms/quant/spectral_counts'
7
+
8
+ require 'trollop'
9
+
10
+ def putsv(*args)
11
+ if $VERBOSE
12
+ puts(*args) ; $stdout.flush
13
+ end
14
+ end
15
+
16
+ opts = Trollop::Parser.new do
17
+ banner %Q{usage: #{File.basename(__FILE__)} peptide_centric_db.yml, file1.psq ...
18
+ }
19
+ opt :names, "array of names for the table (otherwise filenames)", :type => String
20
+ opt :fdr_percent, "%FDR as cutoff", :default => 1.0
21
+ opt :write_subset, "(development) write subset db", :default => false
22
+ end
23
+
24
+ opt = opts.parse(ARGV)
25
+
26
+ if ARGV.size < 2
27
+ opts.educate && exit
28
+ end
29
+
30
+ peptide_centric_db_file = ARGV.shift
31
+
32
+ opt[:names] ||= ARGV.map do |file|
33
+ base = file.chomp(File.extname(file))
34
+ base=base.chomp(File.extname(base)) if File.extname(base) == '.phq'
35
+ base
36
+ end
37
+
38
+ class Ms::Ident::PeptideHit
39
+ attr_accessor :experiment_name
40
+ end
41
+ fdr_cutoff = opt[:fdr_percent] / 100
42
+
43
+ start=Time.now
44
+
45
+ $VERBOSE = true
46
+
47
+ ar_of_peptide_hit_ars = Ms::Ident::Peptide::Db::IO.open(peptide_centric_db_file) do |peptide_to_proteins|
48
+ putsv "#{Time.now-start} seconds to read #{peptide_centric_db_file}"
49
+ ARGV.zip(opt[:names]).map do |file,exp|
50
+ peptide_hits = Ms::Ident::PeptideHit::Qvalue.from_file(file)
51
+ putsv "#{file}: #{peptide_hits.size} hits"
52
+ peptide_hits.select! do |hit|
53
+ if hit.qvalue <= fdr_cutoff
54
+ # update each peptide with its protein hits
55
+ prot_ids = peptide_to_proteins[hit.aaseq]
56
+ if prot_ids
57
+ hit.experiment_name = exp
58
+ hit.proteins = prot_ids
59
+ else ; false end
60
+ else
61
+ false
62
+ end
63
+ end
64
+ peptide_hits
65
+ end
66
+ end
67
+
68
+ if opt[:write_subset]
69
+ aaseqs_to_prots = {}
70
+ ar_of_peptide_hit_ars.each do |pephits|
71
+ pephits.each do |pephit|
72
+ aaseqs_to_prots[pephit.aaseq] = pephit.proteins
73
+ end
74
+ end
75
+ outfile = "peptidecentric_subset.yml"
76
+ puts "writing #{outfile} with #{aaseqs_to_prots.size} aaseq->protids"
77
+ File.open(outfile,'w') do |out|
78
+ aaseqs_to_prots.each do |k,v|
79
+ out.puts(%Q{#{k}: #{v.map(&:id).join("\t") }})
80
+ end
81
+ end
82
+ end
83
+
84
+ $VERBOSE = true
85
+ if $VERBOSE
86
+ opt[:names].zip(ar_of_peptide_hit_ars) do |name, pep_ar|
87
+ puts "#{name}: #{pep_ar.size}"
88
+ end
89
+ end
90
+
91
+ all_peptide_hits = ar_of_peptide_hit_ars.flatten(1)
92
+
93
+
94
+ # because peptide_hit#proteins yields id strings (which hash properly),
95
+ # each protein group is an array of
96
+ protein_groups = Ms::Ident::ProteinGroup.peptide_hits_to_protein_groups(all_peptide_hits)
97
+
98
+ pephit_to_protein_groups = Hash.new {|h,k| h[k] = [] }
99
+ protein_groups.each do |protein_group|
100
+ protein_group.peptide_hits.each {|hit| pephit_to_protein_groups[hit] << protein_group }
101
+ end
102
+
103
+ # partition them all out by filename
104
+
105
+ ar_of_count_data = opt[:names].map do |name|
106
+ pep_hit_to_prot_groups = Hash.new {|h,k| h[k] = [] }
107
+ groups_of_pephits = protein_groups.map do |prot_group|
108
+ pep_hits = prot_group.peptide_hits.select {|hit| hit.experiment_name == name }
109
+ pep_hits.each do |pep_hit|
110
+ pep_hit_to_prot_groups[pep_hit] << prot_group
111
+ end # returns the group of pep_hits
112
+ end
113
+ counts = Ms::Quant::SpectralCounts.counts(groups_of_pephits) # do |pephit|
114
+ # pephit_to_protein_groups[pephit].size
115
+ #end
116
+ end
117
+
118
+ # protein_groups
119
+ # [ ar_of_counts_for_exp1, ar_of_counts_for_exp2, ar_of_counts_for_exp3 ]
120
+
121
+ protein_groups.zip(*ar_of_count_data) do |row|
122
+ pg = row.shift
123
+ puts (row.map(&:to_a).flatten + pg.to_a).join("\t")
124
+ end
125
+
126
+
@@ -0,0 +1,35 @@
1
+ require 'set'
2
+ require 'ms/ident/protein_group'
3
+
4
+ module Ms
5
+ module Quant
6
+ module SpectralCounts
7
+ Counts = Struct.new(:spectral, :aaseqcharge, :aaseq)
8
+
9
+ # returns a parallel array of Count objects. If split_hits then counts
10
+ # are split between groups sharing the hit. peptide_hits must respond
11
+ # to :charge and :aaseq. If split_hits, then each peptide_hit must
12
+ # respond to :linked_to yielding an object with a :size reflective of
13
+ # the number of shared peptide_hits.
14
+ def self.counts(groups_of_peptide_hits, &share_the_pephit)
15
+ groups_of_peptide_hits.map do |peptide_hits|
16
+ uniq_aaseq = {}
17
+ uniq_aaseq_charge = {}
18
+ linked_sizes = peptide_hits.map do |hit|
19
+ linked_to_size = share_the_pephit ? share_the_pephit.call(hit) : 1
20
+ # these guys will end up clobbering themselves, but the
21
+ # linked_to_size should be consistent if the key is the same
22
+ uniq_aaseq_charge[[hit.aaseq, hit.charge]] = linked_to_size
23
+ uniq_aaseq[hit.aaseq] = linked_to_size
24
+ linked_to_size
25
+ end
26
+ counts_data = [linked_sizes, uniq_aaseq_charge.values, uniq_aaseq.values].map do |array|
27
+ share_the_pephit ? array.inject(0.0) {|sum,size| sum+=(1.0/size) } : array.size
28
+ end
29
+ Counts.new(*counts_data)
30
+ end
31
+ end
32
+
33
+ end
34
+ end
35
+ end
data/lib/ms-quant.rb ADDED
File without changes
@@ -0,0 +1,67 @@
1
+ require 'spec_helper'
2
+
3
+ require 'ms/quant/spectral_counts'
4
+
5
+
6
+
7
+ PeptideHit = Struct.new(:aaseq, :charge, :proteins) do
8
+ def initialize(*args)
9
+ super(*args)
10
+ self.proteins ||= []
11
+ end
12
+ def inspect # easier to read output
13
+ "<PeptideHit aaseq=#{self.aaseq} charge=#{self.charge} proteins(ids)=#{self.proteins.map(&:id).join(',')}>"
14
+ end
15
+ def hash ; self.object_id end
16
+ end
17
+ ProteinHit = Struct.new(:id, :peptide_hits) do
18
+ def inspect # easier to read output
19
+ "<Prt #{self.id}>"
20
+ end
21
+ def hash ; self.object_id end
22
+ end
23
+
24
+ describe 'groups of peptide hits' do
25
+ before do
26
+ @pep_hits = [
27
+ ['AABBCCDD', 2], #bg,mg1,mg2 0.33
28
+ ['BBCC', 2], #bg,mg1,mg2 0.33
29
+ ['DDEEFFGG', 2], #bg,mg1,mg2,sbm 0.25
30
+ ['DDEEFFGG', 2], #bg,mg1,mg2,sbm 0.25
31
+ ['DDEEFFGG', 3], #bg,mg1,mg2,sbm 0.25
32
+ ['HIYA', 2], #bg,lg 0.5
33
+ ].map {|ar| PeptideHit.new(*ar) }
34
+ @prot_hits = {
35
+ # spectral_counts, aaseq+charge counts, aaseq counts
36
+ 'big_guy' => @pep_hits, # 6, 5, 4;
37
+ 'little_guy' => [@pep_hits.last], # 1, 1, 1, 0.5, 0.5, 0.5
38
+ 'medium_guy1' => @pep_hits[0,5], # 5, 4, 3
39
+ 'medium_guy2' => @pep_hits[0,5], # 5, 4, 3
40
+ 'subsumed_by_medium' => @pep_hits[2,3], # 3, 2, 1
41
+ }.map {|data| ProteinHit.new(*data) }
42
+ # doubly linked for this
43
+ @prot_hits.each do |prot|
44
+ prot.peptide_hits.each {|pephit| pephit.proteins << prot }
45
+ end
46
+ # DEPENDS ON AN ORDERED HASH (RUBY 1.9!!!!)
47
+ @expected_counts = [ [6,5,4], [1,1,1], [5,4,3], [5,4,3], [3,2,1] ]
48
+ @expected_counts_split = [ [1.9167,1.6667,1.4167], [0.5,0.5,0.5], [1.41667, 1.1667, 0.91667], [1.41667, 1.16667, 0.91667], [0.75, 0.5, 0.25] ]
49
+ end
50
+
51
+ it 'finds spectral counts (without sharing)' do
52
+ groups_of_pephits = @prot_hits.map(&:peptide_hits)
53
+ counts = Ms::Quant::SpectralCounts.counts(groups_of_pephits)
54
+ @expected_counts.zip(counts) do |exp, act|
55
+ act.to_a.is exp
56
+ end
57
+ end
58
+
59
+ it 'finds spectral counts (splitting counts between shared)' do
60
+ groups_of_pephits = @prot_hits.map(&:peptide_hits)
61
+ counts = Ms::Quant::SpectralCounts.counts(groups_of_pephits) {|pephit| pephit.proteins.size }
62
+ @expected_counts_split.zip(counts) do |exp, act|
63
+ exp.zip(act) {|e,a| a.should.be.close e, 0.0001 }
64
+ end
65
+ end
66
+
67
+ end
@@ -0,0 +1,7 @@
1
+ require 'rubygems'
2
+ require 'spec/more'
3
+
4
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
5
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
6
+
7
+ Bacon.summary_on_exit
metadata ADDED
@@ -0,0 +1,129 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ms-quant
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 0
8
+ - 1
9
+ version: 0.0.1
10
+ platform: ruby
11
+ authors:
12
+ - John T. Prince
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2011-03-30 00:00:00 -06:00
18
+ default_executable: peptide_hit_qvalues_to_spectral_counts_table.rb
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: ms-ident
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ segments:
29
+ - 0
30
+ - 0
31
+ - 19
32
+ version: 0.0.19
33
+ type: :runtime
34
+ version_requirements: *id001
35
+ - !ruby/object:Gem::Dependency
36
+ name: spec-more
37
+ prerelease: false
38
+ requirement: &id002 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ segments:
44
+ - 0
45
+ version: "0"
46
+ type: :development
47
+ version_requirements: *id002
48
+ - !ruby/object:Gem::Dependency
49
+ name: jeweler
50
+ prerelease: false
51
+ requirement: &id003 !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ~>
55
+ - !ruby/object:Gem::Version
56
+ segments:
57
+ - 1
58
+ - 5
59
+ - 2
60
+ version: 1.5.2
61
+ type: :development
62
+ version_requirements: *id003
63
+ - !ruby/object:Gem::Dependency
64
+ name: rcov
65
+ prerelease: false
66
+ requirement: &id004 !ruby/object:Gem::Requirement
67
+ none: false
68
+ requirements:
69
+ - - ">="
70
+ - !ruby/object:Gem::Version
71
+ segments:
72
+ - 0
73
+ version: "0"
74
+ type: :development
75
+ version_requirements: *id004
76
+ description: quantitation of mass spectrometry datasets (proteomic, metabolomic/lipidomic). Not related to MSQuant
77
+ email: jtprince@gmail.com
78
+ executables:
79
+ - peptide_hit_qvalues_to_spectral_counts_table.rb
80
+ extensions: []
81
+
82
+ extra_rdoc_files:
83
+ - LICENSE
84
+ - README.rdoc
85
+ files:
86
+ - .document
87
+ - LICENSE
88
+ - README.rdoc
89
+ - Rakefile
90
+ - bin/peptide_hit_qvalues_to_spectral_counts_table.rb
91
+ - lib/ms-quant.rb
92
+ - lib/ms/quant/spectral_counts.rb
93
+ - spec/ms/quant/spectral_counts_spec.rb
94
+ - spec/spec_helper.rb
95
+ has_rdoc: true
96
+ homepage: http://github.com/princelab/ms-quant
97
+ licenses:
98
+ - MIT
99
+ post_install_message:
100
+ rdoc_options: []
101
+
102
+ require_paths:
103
+ - lib
104
+ required_ruby_version: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ">="
108
+ - !ruby/object:Gem::Version
109
+ segments:
110
+ - 0
111
+ version: "0"
112
+ required_rubygems_version: !ruby/object:Gem::Requirement
113
+ none: false
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ segments:
118
+ - 0
119
+ version: "0"
120
+ requirements: []
121
+
122
+ rubyforge_project:
123
+ rubygems_version: 1.3.7
124
+ signing_key:
125
+ specification_version: 3
126
+ summary: quantitation of mass spectrometry datasets (proteomic, metabolomic/lipidomic)
127
+ test_files:
128
+ - spec/ms/quant/spectral_counts_spec.rb
129
+ - spec/spec_helper.rb