ms-quant 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ Copyright (c) 2011 Brigham Young University
2
+ authored by John T. Prince
3
+
4
+ Permission is hereby granted, free of charge, to any person obtaining
5
+ a copy of this software and associated documentation files (the
6
+ "Software"), to deal in the Software without restriction, including
7
+ without limitation the rights to use, copy, modify, merge, publish,
8
+ distribute, sublicense, and/or sell copies of the Software, and to
9
+ permit persons to whom the Software is furnished to do so, subject to
10
+ the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be
13
+ included in all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,9 @@
1
+ = ms-quant
2
+
3
+ Tools for the quantitation of mass spectrometry proteomics experiments. This
4
+ is not associated with {MSQuant}[http://msquant.sourceforge.net] although the
5
+ goals are similar.
6
+
7
+ == Copyright
8
+
9
+ See LICENSE
data/Rakefile ADDED
@@ -0,0 +1,49 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ require 'jeweler'
5
+ Jeweler::Tasks.new do |gem|
6
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
7
+ gem.name = "ms-quant"
8
+ gem.homepage = "http://github.com/princelab/ms-quant"
9
+ gem.license = "MIT"
10
+ gem.summary = %Q{quantitation of mass spectrometry datasets (proteomic, metabolomic/lipidomic)}
11
+ gem.description = %Q{quantitation of mass spectrometry datasets (proteomic, metabolomic/lipidomic). Not related to MSQuant}
12
+ gem.email = "jtprince@gmail.com"
13
+ gem.authors = ["John T. Prince"]
14
+ # Include your dependencies below. Runtime dependencies are required when using your gem,
15
+ # and development dependencies are only needed for development (ie running rake tasks, tests, etc)
16
+ # gem.add_runtime_dependency 'jabber4r', '> 0.1'
17
+ # gem.add_development_dependency 'rspec', '> 1.2.3'
18
+ gem.add_runtime_dependency 'ms-ident', ">= 0.0.19"
19
+ gem.add_development_dependency "spec-more", ">= 0"
20
+ gem.add_development_dependency "jeweler", "~> 1.5.2"
21
+ gem.add_development_dependency "rcov", ">= 0"
22
+ end
23
+ Jeweler::RubygemsDotOrgTasks.new
24
+
25
+ require 'rake/testtask'
26
+ Rake::TestTask.new(:spec) do |spec|
27
+ spec.libs << 'lib' << 'spec'
28
+ spec.pattern = 'spec/**/*_spec.rb'
29
+ spec.verbose = true
30
+ end
31
+
32
+ require 'rcov/rcovtask'
33
+ Rcov::RcovTask.new do |spec|
34
+ spec.libs << 'spec'
35
+ spec.pattern = 'spec/**/*_spec.rb'
36
+ spec.verbose = true
37
+ end
38
+
39
+ task :default => :spec
40
+
41
+ require 'rake/rdoctask'
42
+ Rake::RDocTask.new do |rdoc|
43
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
44
+
45
+ rdoc.rdoc_dir = 'rdoc'
46
+ rdoc.title = "ms-quant #{version}"
47
+ rdoc.rdoc_files.include('README*')
48
+ rdoc.rdoc_files.include('lib/**/*.rb')
49
+ end
@@ -0,0 +1,126 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'ms/ident/peptide_hit/qvalue'
4
+ require 'ms/ident/protein_hit'
5
+ require 'ms/ident/peptide/db'
6
+ require 'ms/quant/spectral_counts'
7
+
8
+ require 'trollop'
9
+
10
+ def putsv(*args)
11
+ if $VERBOSE
12
+ puts(*args) ; $stdout.flush
13
+ end
14
+ end
15
+
16
+ opts = Trollop::Parser.new do
17
+ banner %Q{usage: #{File.basename(__FILE__)} peptide_centric_db.yml, file1.psq ...
18
+ }
19
+ opt :names, "array of names for the table (otherwise filenames)", :type => String
20
+ opt :fdr_percent, "%FDR as cutoff", :default => 1.0
21
+ opt :write_subset, "(development) write subset db", :default => false
22
+ end
23
+
24
+ opt = opts.parse(ARGV)
25
+
26
+ if ARGV.size < 2
27
+ opts.educate && exit
28
+ end
29
+
30
+ peptide_centric_db_file = ARGV.shift
31
+
32
+ opt[:names] ||= ARGV.map do |file|
33
+ base = file.chomp(File.extname(file))
34
+ base=base.chomp(File.extname(base)) if File.extname(base) == '.phq'
35
+ base
36
+ end
37
+
38
+ class Ms::Ident::PeptideHit
39
+ attr_accessor :experiment_name
40
+ end
41
+ fdr_cutoff = opt[:fdr_percent] / 100
42
+
43
+ start=Time.now
44
+
45
+ $VERBOSE = true
46
+
47
+ ar_of_peptide_hit_ars = Ms::Ident::Peptide::Db::IO.open(peptide_centric_db_file) do |peptide_to_proteins|
48
+ putsv "#{Time.now-start} seconds to read #{peptide_centric_db_file}"
49
+ ARGV.zip(opt[:names]).map do |file,exp|
50
+ peptide_hits = Ms::Ident::PeptideHit::Qvalue.from_file(file)
51
+ putsv "#{file}: #{peptide_hits.size} hits"
52
+ peptide_hits.select! do |hit|
53
+ if hit.qvalue <= fdr_cutoff
54
+ # update each peptide with its protein hits
55
+ prot_ids = peptide_to_proteins[hit.aaseq]
56
+ if prot_ids
57
+ hit.experiment_name = exp
58
+ hit.proteins = prot_ids
59
+ else ; false end
60
+ else
61
+ false
62
+ end
63
+ end
64
+ peptide_hits
65
+ end
66
+ end
67
+
68
+ if opt[:write_subset]
69
+ aaseqs_to_prots = {}
70
+ ar_of_peptide_hit_ars.each do |pephits|
71
+ pephits.each do |pephit|
72
+ aaseqs_to_prots[pephit.aaseq] = pephit.proteins
73
+ end
74
+ end
75
+ outfile = "peptidecentric_subset.yml"
76
+ puts "writing #{outfile} with #{aaseqs_to_prots.size} aaseq->protids"
77
+ File.open(outfile,'w') do |out|
78
+ aaseqs_to_prots.each do |k,v|
79
+ out.puts(%Q{#{k}: #{v.map(&:id).join("\t") }})
80
+ end
81
+ end
82
+ end
83
+
84
+ $VERBOSE = true
85
+ if $VERBOSE
86
+ opt[:names].zip(ar_of_peptide_hit_ars) do |name, pep_ar|
87
+ puts "#{name}: #{pep_ar.size}"
88
+ end
89
+ end
90
+
91
+ all_peptide_hits = ar_of_peptide_hit_ars.flatten(1)
92
+
93
+
94
+ # because peptide_hit#proteins yields id strings (which hash properly),
95
+ # each protein group is an array of
96
+ protein_groups = Ms::Ident::ProteinGroup.peptide_hits_to_protein_groups(all_peptide_hits)
97
+
98
+ pephit_to_protein_groups = Hash.new {|h,k| h[k] = [] }
99
+ protein_groups.each do |protein_group|
100
+ protein_group.peptide_hits.each {|hit| pephit_to_protein_groups[hit] << protein_group }
101
+ end
102
+
103
+ # partition them all out by filename
104
+
105
+ ar_of_count_data = opt[:names].map do |name|
106
+ pep_hit_to_prot_groups = Hash.new {|h,k| h[k] = [] }
107
+ groups_of_pephits = protein_groups.map do |prot_group|
108
+ pep_hits = prot_group.peptide_hits.select {|hit| hit.experiment_name == name }
109
+ pep_hits.each do |pep_hit|
110
+ pep_hit_to_prot_groups[pep_hit] << prot_group
111
+ end # returns the group of pep_hits
112
+ end
113
+ counts = Ms::Quant::SpectralCounts.counts(groups_of_pephits) # do |pephit|
114
+ # pephit_to_protein_groups[pephit].size
115
+ #end
116
+ end
117
+
118
+ # protein_groups
119
+ # [ ar_of_counts_for_exp1, ar_of_counts_for_exp2, ar_of_counts_for_exp3 ]
120
+
121
+ protein_groups.zip(*ar_of_count_data) do |row|
122
+ pg = row.shift
123
+ puts (row.map(&:to_a).flatten + pg.to_a).join("\t")
124
+ end
125
+
126
+
@@ -0,0 +1,35 @@
1
+ require 'set'
2
+ require 'ms/ident/protein_group'
3
+
4
+ module Ms
5
+ module Quant
6
+ module SpectralCounts
7
+ Counts = Struct.new(:spectral, :aaseqcharge, :aaseq)
8
+
9
+ # returns a parallel array of Count objects. If split_hits then counts
10
+ # are split between groups sharing the hit. peptide_hits must respond
11
+ # to :charge and :aaseq. If split_hits, then each peptide_hit must
12
+ # respond to :linked_to yielding an object with a :size reflective of
13
+ # the number of shared peptide_hits.
14
+ def self.counts(groups_of_peptide_hits, &share_the_pephit)
15
+ groups_of_peptide_hits.map do |peptide_hits|
16
+ uniq_aaseq = {}
17
+ uniq_aaseq_charge = {}
18
+ linked_sizes = peptide_hits.map do |hit|
19
+ linked_to_size = share_the_pephit ? share_the_pephit.call(hit) : 1
20
+ # these guys will end up clobbering themselves, but the
21
+ # linked_to_size should be consistent if the key is the same
22
+ uniq_aaseq_charge[[hit.aaseq, hit.charge]] = linked_to_size
23
+ uniq_aaseq[hit.aaseq] = linked_to_size
24
+ linked_to_size
25
+ end
26
+ counts_data = [linked_sizes, uniq_aaseq_charge.values, uniq_aaseq.values].map do |array|
27
+ share_the_pephit ? array.inject(0.0) {|sum,size| sum+=(1.0/size) } : array.size
28
+ end
29
+ Counts.new(*counts_data)
30
+ end
31
+ end
32
+
33
+ end
34
+ end
35
+ end
data/lib/ms-quant.rb ADDED
File without changes
@@ -0,0 +1,67 @@
1
+ require 'spec_helper'
2
+
3
+ require 'ms/quant/spectral_counts'
4
+
5
+
6
+
7
+ PeptideHit = Struct.new(:aaseq, :charge, :proteins) do
8
+ def initialize(*args)
9
+ super(*args)
10
+ self.proteins ||= []
11
+ end
12
+ def inspect # easier to read output
13
+ "<PeptideHit aaseq=#{self.aaseq} charge=#{self.charge} proteins(ids)=#{self.proteins.map(&:id).join(',')}>"
14
+ end
15
+ def hash ; self.object_id end
16
+ end
17
+ ProteinHit = Struct.new(:id, :peptide_hits) do
18
+ def inspect # easier to read output
19
+ "<Prt #{self.id}>"
20
+ end
21
+ def hash ; self.object_id end
22
+ end
23
+
24
+ describe 'groups of peptide hits' do
25
+ before do
26
+ @pep_hits = [
27
+ ['AABBCCDD', 2], #bg,mg1,mg2 0.33
28
+ ['BBCC', 2], #bg,mg1,mg2 0.33
29
+ ['DDEEFFGG', 2], #bg,mg1,mg2,sbm 0.25
30
+ ['DDEEFFGG', 2], #bg,mg1,mg2,sbm 0.25
31
+ ['DDEEFFGG', 3], #bg,mg1,mg2,sbm 0.25
32
+ ['HIYA', 2], #bg,lg 0.5
33
+ ].map {|ar| PeptideHit.new(*ar) }
34
+ @prot_hits = {
35
+ # spectral_counts, aaseq+charge counts, aaseq counts
36
+ 'big_guy' => @pep_hits, # 6, 5, 4;
37
+ 'little_guy' => [@pep_hits.last], # 1, 1, 1, 0.5, 0.5, 0.5
38
+ 'medium_guy1' => @pep_hits[0,5], # 5, 4, 3
39
+ 'medium_guy2' => @pep_hits[0,5], # 5, 4, 3
40
+ 'subsumed_by_medium' => @pep_hits[2,3], # 3, 2, 1
41
+ }.map {|data| ProteinHit.new(*data) }
42
+ # doubly linked for this
43
+ @prot_hits.each do |prot|
44
+ prot.peptide_hits.each {|pephit| pephit.proteins << prot }
45
+ end
46
+ # DEPENDS ON AN ORDERED HASH (RUBY 1.9!!!!)
47
+ @expected_counts = [ [6,5,4], [1,1,1], [5,4,3], [5,4,3], [3,2,1] ]
48
+ @expected_counts_split = [ [1.9167,1.6667,1.4167], [0.5,0.5,0.5], [1.41667, 1.1667, 0.91667], [1.41667, 1.16667, 0.91667], [0.75, 0.5, 0.25] ]
49
+ end
50
+
51
+ it 'finds spectral counts (without sharing)' do
52
+ groups_of_pephits = @prot_hits.map(&:peptide_hits)
53
+ counts = Ms::Quant::SpectralCounts.counts(groups_of_pephits)
54
+ @expected_counts.zip(counts) do |exp, act|
55
+ act.to_a.is exp
56
+ end
57
+ end
58
+
59
+ it 'finds spectral counts (splitting counts between shared)' do
60
+ groups_of_pephits = @prot_hits.map(&:peptide_hits)
61
+ counts = Ms::Quant::SpectralCounts.counts(groups_of_pephits) {|pephit| pephit.proteins.size }
62
+ @expected_counts_split.zip(counts) do |exp, act|
63
+ exp.zip(act) {|e,a| a.should.be.close e, 0.0001 }
64
+ end
65
+ end
66
+
67
+ end
@@ -0,0 +1,7 @@
1
+ require 'rubygems'
2
+ require 'spec/more'
3
+
4
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
5
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
6
+
7
+ Bacon.summary_on_exit
metadata ADDED
@@ -0,0 +1,129 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ms-quant
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 0
8
+ - 1
9
+ version: 0.0.1
10
+ platform: ruby
11
+ authors:
12
+ - John T. Prince
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2011-03-30 00:00:00 -06:00
18
+ default_executable: peptide_hit_qvalues_to_spectral_counts_table.rb
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: ms-ident
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ segments:
29
+ - 0
30
+ - 0
31
+ - 19
32
+ version: 0.0.19
33
+ type: :runtime
34
+ version_requirements: *id001
35
+ - !ruby/object:Gem::Dependency
36
+ name: spec-more
37
+ prerelease: false
38
+ requirement: &id002 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ segments:
44
+ - 0
45
+ version: "0"
46
+ type: :development
47
+ version_requirements: *id002
48
+ - !ruby/object:Gem::Dependency
49
+ name: jeweler
50
+ prerelease: false
51
+ requirement: &id003 !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ~>
55
+ - !ruby/object:Gem::Version
56
+ segments:
57
+ - 1
58
+ - 5
59
+ - 2
60
+ version: 1.5.2
61
+ type: :development
62
+ version_requirements: *id003
63
+ - !ruby/object:Gem::Dependency
64
+ name: rcov
65
+ prerelease: false
66
+ requirement: &id004 !ruby/object:Gem::Requirement
67
+ none: false
68
+ requirements:
69
+ - - ">="
70
+ - !ruby/object:Gem::Version
71
+ segments:
72
+ - 0
73
+ version: "0"
74
+ type: :development
75
+ version_requirements: *id004
76
+ description: quantitation of mass spectrometry datasets (proteomic, metabolomic/lipidomic). Not related to MSQuant
77
+ email: jtprince@gmail.com
78
+ executables:
79
+ - peptide_hit_qvalues_to_spectral_counts_table.rb
80
+ extensions: []
81
+
82
+ extra_rdoc_files:
83
+ - LICENSE
84
+ - README.rdoc
85
+ files:
86
+ - .document
87
+ - LICENSE
88
+ - README.rdoc
89
+ - Rakefile
90
+ - bin/peptide_hit_qvalues_to_spectral_counts_table.rb
91
+ - lib/ms-quant.rb
92
+ - lib/ms/quant/spectral_counts.rb
93
+ - spec/ms/quant/spectral_counts_spec.rb
94
+ - spec/spec_helper.rb
95
+ has_rdoc: true
96
+ homepage: http://github.com/princelab/ms-quant
97
+ licenses:
98
+ - MIT
99
+ post_install_message:
100
+ rdoc_options: []
101
+
102
+ require_paths:
103
+ - lib
104
+ required_ruby_version: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ">="
108
+ - !ruby/object:Gem::Version
109
+ segments:
110
+ - 0
111
+ version: "0"
112
+ required_rubygems_version: !ruby/object:Gem::Requirement
113
+ none: false
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ segments:
118
+ - 0
119
+ version: "0"
120
+ requirements: []
121
+
122
+ rubyforge_project:
123
+ rubygems_version: 1.3.7
124
+ signing_key:
125
+ specification_version: 3
126
+ summary: quantitation of mass spectrometry datasets (proteomic, metabolomic/lipidomic)
127
+ test_files:
128
+ - spec/ms/quant/spectral_counts_spec.rb
129
+ - spec/spec_helper.rb