ms-quant 0.0.4 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -15,7 +15,7 @@ Jeweler::Tasks.new do |gem|
15
15
  # and development dependencies are only needed for development (ie running rake tasks, tests, etc)
16
16
  # gem.add_runtime_dependency 'jabber4r', '> 0.1'
17
17
  # gem.add_development_dependency 'rspec', '> 1.2.3'
18
- gem.add_runtime_dependency 'ms-ident', ">= 0.0.19"
18
+ gem.add_runtime_dependency 'ms-ident', ">= 0.1.1"
19
19
  gem.add_development_dependency "spec-more", ">= 0"
20
20
  gem.add_development_dependency "jeweler", "~> 1.5.2"
21
21
  gem.add_development_dependency "rcov", ">= 0"
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.4
1
+ 0.0.6
@@ -1,29 +1,24 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
+ require 'andand'
4
+ require 'set'
5
+ require 'ruport'
6
+
3
7
  require 'ms/ident/peptide_hit/qvalue'
4
- require 'ms/ident/protein_hit'
8
+ require 'ms/ident/protein'
5
9
  require 'ms/ident/peptide/db'
6
10
  require 'ms/quant/spectral_counts'
11
+ require 'ms/quant/protein_group_comparison'
12
+ require 'ms/quant/qspec/protein_group_comparison'
7
13
  require 'ms/quant/qspec'
14
+ require 'ms/quant/cmdline'
15
+
8
16
 
9
17
  require 'yaml'
10
18
  require 'tempfile'
11
19
 
12
20
  require 'trollop'
13
21
 
14
- # inverse from Tilo Sloboda (now in facets)
15
-
16
- class Hash
17
- def inverse
18
- i = Hash.new
19
- self.each_pair do |k,v|
20
- if (Array === v) ; v.each{ |x| i[x] = ( i.has_key?(x) ? [k,i[x]].flatten : k ) }
21
- else ; i[v] = ( i.has_key?(v) ? [k,i[v]].flatten : k ) end
22
- end ; i
23
- end
24
- end
25
-
26
-
27
22
  def putsv(*args)
28
23
  if $VERBOSE
29
24
  puts(*args) ; $stdout.flush
@@ -36,8 +31,48 @@ def basename(file)
36
31
  base
37
32
  end
38
33
 
34
+ class Ruport::Data::Table
35
+ # returns self
36
+ def add_column_with_data(colname, array_of_data, opts={})
37
+ self.add_column(colname, opts)
38
+ self.data.zip(array_of_data) do |row, newval|
39
+ row[colname] = newval
40
+ end
41
+ self
42
+ end
43
+
44
+ # acceptable opts:
45
+ #
46
+ # :header => an array of lines (each which will be commented out)
47
+ def to_tsv(file, opt={})
48
+ delimiter = "\t"
49
+ File.open(file,'w') do |out|
50
+ opt[:header].each {|line| out.puts "# #{line}" } if opt[:header]
51
+ out.puts self.column_names.join(delimiter)
52
+ self.data.each do |row|
53
+ out.puts row.to_a.join(delimiter)
54
+ end
55
+ opt[:footer].each {|line| out.puts "# #{line}" } if opt[:footer]
56
+ end
57
+ end
58
+
59
+ end
60
+
61
+ def write_subset(sample_to_pephits, outfile="peptidecentric_subset.yml")
62
+ aaseqs_to_prots = {}
63
+ sample_to_pephits.map(&:last).flatten(1).each do |pephit|
64
+ aaseqs_to_prots[pephit.aaseq] = pephit.proteins.map(&:id)
65
+ end
66
+ File.open(outfile,'w') do |out|
67
+ aaseqs_to_prots.each do |k,v|
68
+ out.puts(%Q{#{k}: #{v.join("\t") }})
69
+ end
70
+ end
71
+ end
72
+
39
73
 
40
74
  outfile = "spectral_counts.tsv"
75
+ pephits_outfile = "spectral_counts.pephits.tsv"
41
76
  delimiter = "\t"
42
77
 
43
78
  opts = Trollop::Parser.new do
@@ -53,6 +88,7 @@ psq is really .psq.tsv file
53
88
  opt :descriptions, "include descriptions of proteins, requires :fasta", :default => false
54
89
  opt :fasta, "the fasta file. Required for :qspec and :descriptions", :type => String
55
90
  opt :outfile, "the to which file data are written", :default => outfile
91
+ opt :peptides, "also write peptide hits (to: #{pephits_outfile})", :default => false
56
92
  opt :verbose, "speak up", :default => false
57
93
  opt :count_type, "type of spectral counts (<spectral|aaseqcharge|aaseq>)", :default => 'spectral'
58
94
  opt :qspec_normalize, "normalize spectral counts per run", :default => false
@@ -78,29 +114,8 @@ raise ArgumentError, "need .yml file for peptide centric db" unless File.extname
78
114
  putsv "using: #{peptide_centric_db_file} as peptide centric db"
79
115
 
80
116
  # groupname => files
81
- condition_to_samplenames = {}
82
- samplename_to_filename = {}
83
- ARGV.each do |arg|
84
- (condition, files) =
85
- if arg.include?('=')
86
- (condition, filestring) = arg.split('=')
87
- [condition, filestring.split(',')]
88
- else
89
- [basename(arg), [arg]]
90
- end
91
- reptag = ARGV.size
92
- sample_to_file_pairs = files.each_with_index.map {|file,i| ["#{condition}-rep#{i+1}", file] }
93
- sample_to_file_pairs.each {|name,file| samplename_to_filename[name] = file }
94
- condition_to_samplenames[condition] = sample_to_file_pairs.map(&:first)
95
- end
96
-
97
117
 
98
- if $VERBOSE
99
- puts "** condition: sample_names"
100
- puts condition_to_samplenames.to_yaml
101
- puts "** samplename: filename"
102
- puts samplename_to_filename.to_yaml
103
- end
118
+ (samplename_to_filename, condition_to_samplenames, samplename_to_condition) = Ms::Quant::Cmdline.args_to_hashes(ARGV)
104
119
 
105
120
  raise ArgumentError, "must have 2 conditions for qspec!" if opt[:qspec] && condition_to_samplenames.size != 2
106
121
 
@@ -108,144 +123,122 @@ samplenames = samplename_to_filename.keys
108
123
 
109
124
  class Ms::Ident::PeptideHit
110
125
  attr_accessor :experiment_name
126
+ attr_accessor :protein_groups
127
+ end
128
+ class Ms::Ident::Protein
129
+ attr_accessor :length
111
130
  end
131
+
132
+
112
133
  fdr_cutoff = opt[:fdr_percent] / 100
113
134
 
114
- start=Time.now
115
-
116
- ar_of_pephit_ars = Ms::Ident::Peptide::Db::IO.open(peptide_centric_db_file) do |peptide_to_proteins|
117
- putsv "#{Time.now-start} seconds to read #{peptide_centric_db_file}"
118
- samplename_to_filename.map do |sample, file|
119
- peptide_hits = Ms::Ident::PeptideHit::Qvalue.from_file(file)
120
- putsv "#{file}: #{peptide_hits.size} hits"
121
- peptide_hits.select! do |hit|
122
- if hit.qvalue <= fdr_cutoff
123
- # update each peptide with its protein hits
124
- prot_ids = peptide_to_proteins[hit.aaseq]
125
- if prot_ids
126
- hit.experiment_name = sample
127
- hit.proteins = prot_ids
128
- else ; false end
129
- else
130
- false
131
- end
132
- end
133
- peptide_hits
134
- end
135
+ if opt[:qspec] || opt[:descriptions]
136
+ putsv "reading lengths and descriptions from #{opt[:fasta]}"
137
+ (id_to_length, id_to_desc) = Ms::Fasta.protein_lengths_and_descriptions(opt[:fasta])
135
138
  end
136
139
 
137
- if opt[:write_subset]
138
- aaseqs_to_prots = {}
139
- ar_of_pephit_ars.flatten(1).each do |pephit|
140
- aaseqs_to_prots[pephit.aaseq] = pephit.proteins
141
- end
142
- outfile = "peptidecentric_subset.yml"
143
- puts "writing #{outfile} with #{aaseqs_to_prots.size} aaseq->protids"
144
- File.open(outfile,'w') do |out|
145
- aaseqs_to_prots.each do |k,v|
146
- out.puts(%Q{#{k}: #{v.join("\t") }})
147
- end
148
- end
140
+ samplename_to_peptidehits = samplename_to_filename.map do |sample, file|
141
+ [sample, Ms::Ident::PeptideHit::Qvalue.from_file(file).select {|hit| hit.qvalue <= fdr_cutoff }]
149
142
  end
150
143
 
151
- if $VERBOSE
152
- samplenames.zip(ar_of_pephit_ars) do |samplename, pep_ar|
153
- putsv "#{samplename}: #{pep_ar.size}"
144
+ # update each peptide hit with protein hits and sample name:
145
+ all_protein_hits = Hash.new {|h,id| h[id] = Ms::Ident::Protein.new(id) }
146
+ Ms::Ident::Peptide::Db::IO.open(peptide_centric_db_file) do |peptide_to_proteins|
147
+ samplename_to_peptidehits.map do |sample, peptide_hits|
148
+ peptide_hits.each do |hit|
149
+ # update each peptide with its protein hits
150
+ protein_hits = peptide_to_proteins[hit.aaseq].map do |id|
151
+ protein = all_protein_hits[id]
152
+ protein.length = id_to_length[id] if id_to_length
153
+ protein.description = id_to_desc[id] if id_to_desc
154
+ protein
155
+ end
156
+ hit.experiment_name = sample
157
+ # if there are protein hits, the peptide hit is selected
158
+ hit.proteins = protein_hits
159
+ end
154
160
  end
155
161
  end
156
162
 
157
- all_peptide_hits = ar_of_pephit_ars.flatten(1)
163
+ write_subset(samplename_to_peptidehits) if opt[:write_subset]
158
164
 
159
- # because peptide_hit#proteins yields id strings (which hash properly),
160
- # each protein group is an array of
161
- protein_groups = Ms::Ident::ProteinGroup.peptide_hits_to_protein_groups(all_peptide_hits)
165
+ samplename_to_peptidehits.each {|samplename, hits| putsv "#{samplename}: #{hits.size}" } if $VERBOSE
162
166
 
163
- pephit_to_protein_groups = Hash.new {|h,k| h[k] = [] }
164
- protein_groups.each do |protein_group|
165
- protein_group.peptide_hits.each {|hit| pephit_to_protein_groups[hit] << protein_group }
166
- end
167
+ all_peptide_hits = samplename_to_peptidehits.map(&:last).flatten(1)
167
168
 
168
- counts_parallel_to_names_with_counts_per_group = samplenames.map do |name|
169
- pep_hit_to_prot_groups = Hash.new {|h,k| h[k] = [] }
170
- groups_of_pephits = protein_groups.map do |prot_group|
171
- pep_hits = prot_group.peptide_hits.select {|hit| hit.experiment_name == name }
172
- pep_hits.each do |pep_hit|
173
- pep_hit_to_prot_groups[pep_hit] << prot_group
174
- end # returns the group of pep_hits
175
- end
176
- counts = Ms::Quant::SpectralCounts.counts(groups_of_pephits) do |pephit|
177
- pephit_to_protein_groups[pephit].size
178
- end
179
- end
169
+ # this constricts everything down to a minimal set of protein groups that
170
+ # explain the entire set of peptide hits.
171
+ update_pephits = true # ensures that each pephit is linked to the array of protein groups it is associated with
172
+ protein_groups = Ms::Ident::ProteinGroup.peptide_hits_to_protein_groups(all_peptide_hits, update_pephits)
180
173
 
181
- if opt[:qspec] || opt[:descriptions]
182
- putsv "reading lengths and descriptions from #{opt[:fasta]}"
183
- (id_to_length, id_to_desc) = Ms::Fasta.protein_lengths_and_descriptions(opt[:fasta])
174
+ hits_table_hash = {} # create the table using key => column hash
175
+ samplenames.each do |name|
176
+ hits_table_hash[name] = protein_groups.map do |prot_group|
177
+ prot_group.peptide_hits.select {|hit| hit.experiment_name == name }
178
+ end
184
179
  end
185
180
 
186
- samplename_to_condition = condition_to_samplenames.inverse
181
+ # The columns are filled with groups of peptide hits, one group of hits per
182
+ # protein group (protein group order is implicit). The rows are sample names.
183
+ #
184
+ # (implied) sample1 sample2 sample3 ...
185
+ # (group1) [hit,hit] [hit...] [hit...] ...
186
+ # (group2) [hit,hit] [hit...] [hit...] ...
187
+ # ... ... ... ... ...
188
+ hits_table = Ruport::Data::Table.new(:data => hits_table_hash.values.transpose, :column_names => hits_table_hash.keys)
187
189
 
188
- ### OUTPUT TABLE
189
- header_cats = samplenames.map.to_a
190
+ # spectral counts of type opt[:count_type]
191
+ counts_data = hits_table.data.map do |row|
192
+ row.map do |pephits|
193
+ Ms::Quant::SpectralCounts.counts(pephits) {|pephit| 1.0 / pephit.protein_groups.size }.send(opt[:count_type])
194
+ end
195
+ end
190
196
 
191
- ar_of_rows = counts_parallel_to_names_with_counts_per_group.map do |counts_per_group|
192
- counts_per_group.map(&opt[:count_type])
193
- end.transpose
197
+ # each cell holds a SpectralCounts object, which hash 3 types of count data
198
+ counts_table = Ruport::Data::Table.new(:data => counts_data, :column_names => samplenames)
194
199
 
200
+ # return a list of ProteinGroupComparisons
195
201
  if opt[:qspec]
196
- all_conditions = samplenames.map {|sn| samplename_to_condition[sn] }
197
- condition_to_count_array = all_conditions.zip(counts_parallel_to_names_with_counts_per_group).map do |condition, counts_par_groups|
198
- [condition, counts_par_groups.map(&opt[:count_type])]
199
- end
200
202
 
203
+ # prepare data for qspec
204
+ condition_to_count_array = counts_table.column_names.map {|name| [name, counts_table.column(name)] }
205
+ # average length of the proteins in the group
201
206
  name_length_pairs = protein_groups.map do |pg|
202
- # prefer swissprot (sp) proteins over tremble (tr) and shorter protein
203
- # lengths over longer lengths
204
- best_guess_protein_id = pg.sort_by {|prot_id| [prot_id, -id_to_length[prot_id]] }.first
205
- length = id_to_length[best_guess_protein_id]
206
- [pg.join(":"), length]
207
+ [pg.join(":"), pg.map(&:length).reduce(:+)./(pg.size).round]
207
208
  end
208
209
 
209
- putsv "qspec to normalize counts: #{opt[:qspec_normalize]}"
210
210
  qspec_results = Ms::Quant::Qspec.new(name_length_pairs, condition_to_count_array).run(opt[:qspec_normalize])
211
-
212
- to_add = [:fdr, :bayes_factor, :fold_change]
213
- header_cats.push(*to_add)
214
- qspec_results.zip(ar_of_rows) do |zipped|
215
- (result, row) = zipped
216
- row.push(*to_add.map {|v| result.send(v) })
211
+
212
+ cols_to_add = [:bayes_factor, :fold_change, :fdr]
213
+ counts_table.add_columns cols_to_add
214
+ counts_table.data.zip(qspec_results) do |row, qspec_result|
215
+ cols_to_add.each {|cat| row[cat] = qspec_result[cat] }
217
216
  end
218
217
  end
219
218
 
220
- header_cats.push( *%w(BestID AllIDs) )
221
- header_cats.push( 'Description' ) if opt[:descriptions]
219
+ counts_table.add_columns( [:name, :ids, :description] )
220
+ counts_table.data.zip(protein_groups) do |row, pg|
221
+ best_id = pg.sort_by {|prot| [prot.id, prot.length] }.first
222
+ row.name = best_id.description.andand.match(/ GN=([^\s]+) ?/).andand[1] || best_id.id
223
+ row.ids = pg.map(&:id).join(',')
224
+ row.description = best_id.description
225
+ end
222
226
 
223
- sort_protein_id =
224
- if id_to_length
225
- lambda {|prot_id| [prot_id, -id_to_length[prot_id]] }
226
- else
227
- lambda {|prot_id| prot_id }
228
- end
229
227
 
230
- protein_groups.zip(ar_of_rows) do |zipped|
231
- (pg, row) = zipped
232
- # swiss-prot and then the shortest
233
- best_protid = pg.sort_by(&sort_protein_id).first
234
- (gene_id, desc) =
235
- if opt[:descriptions]
236
- desc = id_to_desc[best_protid]
237
- gene_id = (md=desc.match(/ GN=(\w+) ?/)) ? md[1] : best_protid
238
- [gene_id, desc]
239
- else
240
- [best_protid, nil]
228
+ if opt[:peptides]
229
+ hits_table.each do |record|
230
+ record.each_with_index do |hits,i|
231
+ new_cell = hits.group_by do |hit|
232
+ [hit.aaseq, hit.charge]
233
+ end.map do |key, hits|
234
+ [key.reverse.join("_"), hits.map(&:id).join(',')].join(":")
235
+ end.join('; ')
236
+ record[i] = new_cell
241
237
  end
242
- row << gene_id << pg.join(',')
243
- row.push(desc) if desc
244
- end
245
-
246
- File.open(opt[:outfile],'w') do |out|
247
- out.puts header_cats.join(delimiter)
248
- ar_of_rows.each {|row| out.puts row.join(delimiter) }
249
- putsv "wrote: #{opt[:outfile]}"
238
+ end
239
+ hits_table.add_column_with_data(:name, counts_table.column(:name), :position=>0)
240
+ hits_table.to_tsv(pephits_outfile, :footer => ["parallel to #{outfile}"])
250
241
  end
251
242
 
243
+ intro = ["samples: #{samplename_to_filename}", "options: #{opt}"]
244
+ counts_table.to_tsv(outfile, :footer => intro)
@@ -0,0 +1,15 @@
1
+
2
+
3
+ # inverse from Tilo Sloboda (now in facets)
4
+
5
+ class Hash
6
+ def inverse
7
+ i = Hash.new
8
+ self.each_pair do |k,v|
9
+ if (Array === v) ; v.each{ |x| i[x] = ( i.has_key?(x) ? [k,i[x]].flatten : k ) }
10
+ else ; i[v] = ( i.has_key?(v) ? [k,i[v]].flatten : k ) end
11
+ end ; i
12
+ end
13
+ end
14
+
15
+
@@ -0,0 +1,42 @@
1
+ require 'hash/inverse'
2
+
3
+ module Ms ; module Quant ; end ; end
4
+
5
+ module Ms::Quant::Cmdline
6
+
7
+ # expects arguments in one of two forms. The first form is grouped by
8
+ # condition as shown:
9
+ #
10
+ # condition1=file1,file2,file3... condition2=file4,file5...
11
+ #
12
+ # The second is where each file is its own condition (1 replicate):
13
+ #
14
+ # file1 file2 file3
15
+ #
16
+ # Returns three ordered hashes (only ordered for ruby 1.9):
17
+ #
18
+ # 1) Condition to an array of samplenames
19
+ # 2) Samplename to the filename
20
+ # 3) Samplename to condition
21
+ def self.args_to_hashes(args, replicate_postfix="-rep")
22
+ # groupname => files
23
+ condition_to_samplenames = {}
24
+ samplename_to_filename = {}
25
+ args.each do |arg|
26
+ (condition, files) =
27
+ if arg.include?('=')
28
+ (condition, filestring) = arg.split('=')
29
+ [condition, filestring.split(',')]
30
+ else
31
+ [basename(arg), [arg]]
32
+ end
33
+ sample_to_file_pairs = files.each_with_index.map do |file,i|
34
+ rep_string = (files.size == 1) ? "" : "#{replicate_postfix}#{i+1}"
35
+ ["#{condition}#{rep_string}", file]
36
+ end
37
+ sample_to_file_pairs.each {|name,file| samplename_to_filename[name] = file }
38
+ condition_to_samplenames[condition] = sample_to_file_pairs.map(&:first)
39
+ end
40
+ [samplename_to_filename, condition_to_samplenames, condition_to_samplenames.inverse]
41
+ end
42
+ end
@@ -0,0 +1,2 @@
1
+
2
+ Peptide
@@ -0,0 +1,29 @@
1
+
2
+ module Ms
3
+ module Quant
4
+ end
5
+ end
6
+
7
+ module Ms::Quant::ProteinGroupComparison
8
+
9
+ # a protein group object
10
+ attr_accessor :protein_group
11
+
12
+ # an array of experiment names
13
+ attr_accessor :experiments
14
+
15
+ # parallel array to experiments with the measured values
16
+ attr_accessor :values
17
+
18
+ def initialize(protein_group, experiments, values)
19
+ (@protein_group, @experiment, @values) = protein_group, experiments, values
20
+ end
21
+ end
22
+
23
+ class Ms::Quant::ProteinGroupComparison::SpectralCounts
24
+ include Ms::Quant::ProteinGroupComparison
25
+ end
26
+
27
+ class Ms::Quant::ProteinGroupComparison::UniqAAzCounts
28
+ include Ms::Quant::ProteinGroupComparison
29
+ end
@@ -0,0 +1,22 @@
1
+ require 'ms/quant/protein_group_comparison'
2
+
3
+ module Ms
4
+ module Quant
5
+ module ProteinGroupComparison
6
+ end
7
+ end
8
+ end
9
+
10
+ class Ms::Quant::ProteinGroupComparison::Qspec
11
+ include Ms::Quant::ProteinGroupComparison
12
+
13
+ attr_accessor :qspec_results_struct
14
+
15
+ # takes a protein group object, an array of experiment names and a qspec
16
+ # results struct
17
+ def initialize(protein_group, experiments, qspec_results_struct)
18
+ super(protein_group, experiments, qspec_results_struct.counts_array)
19
+ @qspec_results_struct = qspec_results_struct
20
+ end
21
+ end
22
+
@@ -31,6 +31,7 @@ class Ms::Quant::Qspec
31
31
  start_bayes = headers.index {|v| v =~ /BayesFactor/i }
32
32
  rows.map do |row|
33
33
  data = [row[0]]
34
+ data.push( row[1...start_bayes].map(&:to_f) )
34
35
  data.push( *row[start_bayes,4].map(&:to_f) )
35
36
  data.push( row[start_bayes+4] )
36
37
  Results.new(*data)
@@ -68,6 +69,8 @@ class Ms::Quant::Qspec
68
69
  end
69
70
  end
70
71
 
72
+ # returns an array of Qspec::Results objects (each object can be considered
73
+ # a row of data)
71
74
  def run(normalize=true, opts={})
72
75
  puts "normalize: #{normalize}" if $VERBOSE
73
76
  tfile = Tempfile.new("qspec")
@@ -87,6 +90,7 @@ class Ms::Quant::Qspec
87
90
  end
88
91
 
89
92
  # for version 2 of QSpec
90
- Results = Struct.new(:protid, :bayes_factor, :fold_change, :rb_stat, :fdr, :flag)
93
+ # counts array is parallel to the experiment names passed in originally
94
+ Results = Struct.new(:protid, :counts_array, :bayes_factor, :fold_change, :rb_stat, :fdr, :flag)
91
95
  end
92
96
 
@@ -8,28 +8,28 @@ module Ms
8
8
 
9
9
  # returns a parallel array of Count objects. If split_hits then counts
10
10
  # are split between groups sharing the hit. peptide_hits must respond
11
- # to :charge and :aaseq. If split_hits, then each peptide_hit must
12
- # respond to :linked_to yielding an object with a :size reflective of
13
- # the number of shared peptide_hits.
14
- def self.counts(groups_of_peptide_hits, &share_the_pephit)
15
- groups_of_peptide_hits.map do |peptide_hits|
16
- uniq_aaseq = {}
17
- uniq_aaseq_charge = {}
18
- linked_sizes = peptide_hits.map do |hit|
19
- linked_to_size = share_the_pephit ? share_the_pephit.call(hit) : 1
20
- # these guys will end up clobbering themselves, but the
21
- # linked_to_size should be consistent if the key is the same
22
- uniq_aaseq_charge[[hit.aaseq, hit.charge]] = linked_to_size
23
- uniq_aaseq[hit.aaseq] = linked_to_size
24
- linked_to_size
25
- end
26
- counts_data = [linked_sizes, uniq_aaseq_charge.values, uniq_aaseq.values].map do |array|
27
- share_the_pephit ? array.inject(0.0) {|sum,size| sum+=(1.0/size) } : array.size
28
- end
29
- Counts.new(*counts_data)
11
+ # to :charge and :aaseq. If a block is given, the weight of a
12
+ # particular hit can be given (typically this will be 1/#proteins
13
+ # sharing the hit
14
+ def self.counts(peptide_hits, &share_the_pephit)
15
+ uniq_aaseq = {}
16
+ uniq_aaseq_charge = {}
17
+ weights = peptide_hits.map do |hit|
18
+ weight = share_the_pephit ? share_the_pephit.call(hit) : 1
19
+ # these guys will end up clobbering themselves, but the
20
+ # linked_to_size should be consistent if the key is the same
21
+ uniq_aaseq_charge[[hit.aaseq, hit.charge]] = weight
22
+ uniq_aaseq[hit.aaseq] = weight
23
+ weight
30
24
  end
25
+ counts_data = [weights, uniq_aaseq_charge.values, uniq_aaseq.values].map do |array|
26
+ array.reduce(:+)
27
+ end
28
+ Counts.new(*counts_data)
31
29
  end
32
-
33
30
  end
34
31
  end
35
32
  end
33
+
34
+
35
+
@@ -2,8 +2,6 @@ require 'spec_helper'
2
2
 
3
3
  require 'ms/quant/spectral_counts'
4
4
 
5
-
6
-
7
5
  PeptideHit = Struct.new(:aaseq, :charge, :proteins) do
8
6
  def initialize(*args)
9
7
  super(*args)
@@ -58,7 +56,7 @@ describe 'groups of peptide hits' do
58
56
 
59
57
  it 'finds spectral counts (splitting counts between shared)' do
60
58
  groups_of_pephits = @prot_hits.map(&:peptide_hits)
61
- counts = Ms::Quant::SpectralCounts.counts(groups_of_pephits) {|pephit| pephit.proteins.size }
59
+ counts = Ms::Quant::SpectralCounts.counts(groups_of_pephits) {|pephit| 1.0 / pephit.proteins.size }
62
60
  @expected_counts_split.zip(counts) do |exp, act|
63
61
  exp.zip(act) {|e,a| a.should.be.close e, 0.0001 }
64
62
  end
metadata CHANGED
@@ -1,12 +1,8 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ms-quant
3
3
  version: !ruby/object:Gem::Version
4
- prerelease: false
5
- segments:
6
- - 0
7
- - 0
8
- - 4
9
- version: 0.0.4
4
+ prerelease:
5
+ version: 0.0.6
10
6
  platform: ruby
11
7
  authors:
12
8
  - John T. Prince
@@ -14,7 +10,7 @@ autorequire:
14
10
  bindir: bin
15
11
  cert_chain: []
16
12
 
17
- date: 2011-04-04 00:00:00 -06:00
13
+ date: 2011-04-26 00:00:00 -06:00
18
14
  default_executable: peptide_hit_qvalues_to_spectral_counts_table.rb
19
15
  dependencies:
20
16
  - !ruby/object:Gem::Dependency
@@ -25,11 +21,7 @@ dependencies:
25
21
  requirements:
26
22
  - - ">="
27
23
  - !ruby/object:Gem::Version
28
- segments:
29
- - 0
30
- - 0
31
- - 19
32
- version: 0.0.19
24
+ version: 0.1.1
33
25
  type: :runtime
34
26
  version_requirements: *id001
35
27
  - !ruby/object:Gem::Dependency
@@ -40,8 +32,6 @@ dependencies:
40
32
  requirements:
41
33
  - - ">="
42
34
  - !ruby/object:Gem::Version
43
- segments:
44
- - 0
45
35
  version: "0"
46
36
  type: :development
47
37
  version_requirements: *id002
@@ -53,10 +43,6 @@ dependencies:
53
43
  requirements:
54
44
  - - ~>
55
45
  - !ruby/object:Gem::Version
56
- segments:
57
- - 1
58
- - 5
59
- - 2
60
46
  version: 1.5.2
61
47
  type: :development
62
48
  version_requirements: *id003
@@ -68,8 +54,6 @@ dependencies:
68
54
  requirements:
69
55
  - - ">="
70
56
  - !ruby/object:Gem::Version
71
- segments:
72
- - 0
73
57
  version: "0"
74
58
  type: :development
75
59
  version_requirements: *id004
@@ -89,8 +73,13 @@ files:
89
73
  - Rakefile
90
74
  - VERSION
91
75
  - bin/peptide_hit_qvalues_to_spectral_counts_table.rb
76
+ - lib/hash/inverse.rb
92
77
  - lib/ms-quant.rb
78
+ - lib/ms/quant/cmdline.rb
79
+ - lib/ms/quant/peptide.rb
80
+ - lib/ms/quant/protein_group_comparison.rb
93
81
  - lib/ms/quant/qspec.rb
82
+ - lib/ms/quant/qspec/protein_group_comparison.rb
94
83
  - lib/ms/quant/spectral_counts.rb
95
84
  - spec/ms/quant/spectral_counts_spec.rb
96
85
  - spec/spec_helper.rb
@@ -108,21 +97,17 @@ required_ruby_version: !ruby/object:Gem::Requirement
108
97
  requirements:
109
98
  - - ">="
110
99
  - !ruby/object:Gem::Version
111
- segments:
112
- - 0
113
100
  version: "0"
114
101
  required_rubygems_version: !ruby/object:Gem::Requirement
115
102
  none: false
116
103
  requirements:
117
104
  - - ">="
118
105
  - !ruby/object:Gem::Version
119
- segments:
120
- - 0
121
106
  version: "0"
122
107
  requirements: []
123
108
 
124
109
  rubyforge_project:
125
- rubygems_version: 1.3.7
110
+ rubygems_version: 1.6.2
126
111
  signing_key:
127
112
  specification_version: 3
128
113
  summary: quantitation of mass spectrometry datasets (proteomic, metabolomic/lipidomic)