ms-quant 0.0.4 → 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile CHANGED
@@ -15,7 +15,7 @@ Jeweler::Tasks.new do |gem|
15
15
  # and development dependencies are only needed for development (ie running rake tasks, tests, etc)
16
16
  # gem.add_runtime_dependency 'jabber4r', '> 0.1'
17
17
  # gem.add_development_dependency 'rspec', '> 1.2.3'
18
- gem.add_runtime_dependency 'ms-ident', ">= 0.0.19"
18
+ gem.add_runtime_dependency 'ms-ident', ">= 0.1.1"
19
19
  gem.add_development_dependency "spec-more", ">= 0"
20
20
  gem.add_development_dependency "jeweler", "~> 1.5.2"
21
21
  gem.add_development_dependency "rcov", ">= 0"
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.4
1
+ 0.0.6
@@ -1,29 +1,24 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
+ require 'andand'
4
+ require 'set'
5
+ require 'ruport'
6
+
3
7
  require 'ms/ident/peptide_hit/qvalue'
4
- require 'ms/ident/protein_hit'
8
+ require 'ms/ident/protein'
5
9
  require 'ms/ident/peptide/db'
6
10
  require 'ms/quant/spectral_counts'
11
+ require 'ms/quant/protein_group_comparison'
12
+ require 'ms/quant/qspec/protein_group_comparison'
7
13
  require 'ms/quant/qspec'
14
+ require 'ms/quant/cmdline'
15
+
8
16
 
9
17
  require 'yaml'
10
18
  require 'tempfile'
11
19
 
12
20
  require 'trollop'
13
21
 
14
- # inverse from Tilo Sloboda (now in facets)
15
-
16
- class Hash
17
- def inverse
18
- i = Hash.new
19
- self.each_pair do |k,v|
20
- if (Array === v) ; v.each{ |x| i[x] = ( i.has_key?(x) ? [k,i[x]].flatten : k ) }
21
- else ; i[v] = ( i.has_key?(v) ? [k,i[v]].flatten : k ) end
22
- end ; i
23
- end
24
- end
25
-
26
-
27
22
  def putsv(*args)
28
23
  if $VERBOSE
29
24
  puts(*args) ; $stdout.flush
@@ -36,8 +31,48 @@ def basename(file)
36
31
  base
37
32
  end
38
33
 
34
+ class Ruport::Data::Table
35
+ # returns self
36
+ def add_column_with_data(colname, array_of_data, opts={})
37
+ self.add_column(colname, opts)
38
+ self.data.zip(array_of_data) do |row, newval|
39
+ row[colname] = newval
40
+ end
41
+ self
42
+ end
43
+
44
+ # acceptable opts:
45
+ #
46
+ # :header => an array of lines (each which will be commented out)
47
+ def to_tsv(file, opt={})
48
+ delimiter = "\t"
49
+ File.open(file,'w') do |out|
50
+ opt[:header].each {|line| out.puts "# #{line}" } if opt[:header]
51
+ out.puts self.column_names.join(delimiter)
52
+ self.data.each do |row|
53
+ out.puts row.to_a.join(delimiter)
54
+ end
55
+ opt[:footer].each {|line| out.puts "# #{line}" } if opt[:footer]
56
+ end
57
+ end
58
+
59
+ end
60
+
61
+ def write_subset(sample_to_pephits, outfile="peptidecentric_subset.yml")
62
+ aaseqs_to_prots = {}
63
+ sample_to_pephits.map(&:last).flatten(1).each do |pephit|
64
+ aaseqs_to_prots[pephit.aaseq] = pephit.proteins.map(&:id)
65
+ end
66
+ File.open(outfile,'w') do |out|
67
+ aaseqs_to_prots.each do |k,v|
68
+ out.puts(%Q{#{k}: #{v.join("\t") }})
69
+ end
70
+ end
71
+ end
72
+
39
73
 
40
74
  outfile = "spectral_counts.tsv"
75
+ pephits_outfile = "spectral_counts.pephits.tsv"
41
76
  delimiter = "\t"
42
77
 
43
78
  opts = Trollop::Parser.new do
@@ -53,6 +88,7 @@ psq is really .psq.tsv file
53
88
  opt :descriptions, "include descriptions of proteins, requires :fasta", :default => false
54
89
  opt :fasta, "the fasta file. Required for :qspec and :descriptions", :type => String
55
90
  opt :outfile, "the to which file data are written", :default => outfile
91
+ opt :peptides, "also write peptide hits (to: #{pephits_outfile})", :default => false
56
92
  opt :verbose, "speak up", :default => false
57
93
  opt :count_type, "type of spectral counts (<spectral|aaseqcharge|aaseq>)", :default => 'spectral'
58
94
  opt :qspec_normalize, "normalize spectral counts per run", :default => false
@@ -78,29 +114,8 @@ raise ArgumentError, "need .yml file for peptide centric db" unless File.extname
78
114
  putsv "using: #{peptide_centric_db_file} as peptide centric db"
79
115
 
80
116
  # groupname => files
81
- condition_to_samplenames = {}
82
- samplename_to_filename = {}
83
- ARGV.each do |arg|
84
- (condition, files) =
85
- if arg.include?('=')
86
- (condition, filestring) = arg.split('=')
87
- [condition, filestring.split(',')]
88
- else
89
- [basename(arg), [arg]]
90
- end
91
- reptag = ARGV.size
92
- sample_to_file_pairs = files.each_with_index.map {|file,i| ["#{condition}-rep#{i+1}", file] }
93
- sample_to_file_pairs.each {|name,file| samplename_to_filename[name] = file }
94
- condition_to_samplenames[condition] = sample_to_file_pairs.map(&:first)
95
- end
96
-
97
117
 
98
- if $VERBOSE
99
- puts "** condition: sample_names"
100
- puts condition_to_samplenames.to_yaml
101
- puts "** samplename: filename"
102
- puts samplename_to_filename.to_yaml
103
- end
118
+ (samplename_to_filename, condition_to_samplenames, samplename_to_condition) = Ms::Quant::Cmdline.args_to_hashes(ARGV)
104
119
 
105
120
  raise ArgumentError, "must have 2 conditions for qspec!" if opt[:qspec] && condition_to_samplenames.size != 2
106
121
 
@@ -108,144 +123,122 @@ samplenames = samplename_to_filename.keys
108
123
 
109
124
  class Ms::Ident::PeptideHit
110
125
  attr_accessor :experiment_name
126
+ attr_accessor :protein_groups
127
+ end
128
+ class Ms::Ident::Protein
129
+ attr_accessor :length
111
130
  end
131
+
132
+
112
133
  fdr_cutoff = opt[:fdr_percent] / 100
113
134
 
114
- start=Time.now
115
-
116
- ar_of_pephit_ars = Ms::Ident::Peptide::Db::IO.open(peptide_centric_db_file) do |peptide_to_proteins|
117
- putsv "#{Time.now-start} seconds to read #{peptide_centric_db_file}"
118
- samplename_to_filename.map do |sample, file|
119
- peptide_hits = Ms::Ident::PeptideHit::Qvalue.from_file(file)
120
- putsv "#{file}: #{peptide_hits.size} hits"
121
- peptide_hits.select! do |hit|
122
- if hit.qvalue <= fdr_cutoff
123
- # update each peptide with its protein hits
124
- prot_ids = peptide_to_proteins[hit.aaseq]
125
- if prot_ids
126
- hit.experiment_name = sample
127
- hit.proteins = prot_ids
128
- else ; false end
129
- else
130
- false
131
- end
132
- end
133
- peptide_hits
134
- end
135
+ if opt[:qspec] || opt[:descriptions]
136
+ putsv "reading lengths and descriptions from #{opt[:fasta]}"
137
+ (id_to_length, id_to_desc) = Ms::Fasta.protein_lengths_and_descriptions(opt[:fasta])
135
138
  end
136
139
 
137
- if opt[:write_subset]
138
- aaseqs_to_prots = {}
139
- ar_of_pephit_ars.flatten(1).each do |pephit|
140
- aaseqs_to_prots[pephit.aaseq] = pephit.proteins
141
- end
142
- outfile = "peptidecentric_subset.yml"
143
- puts "writing #{outfile} with #{aaseqs_to_prots.size} aaseq->protids"
144
- File.open(outfile,'w') do |out|
145
- aaseqs_to_prots.each do |k,v|
146
- out.puts(%Q{#{k}: #{v.join("\t") }})
147
- end
148
- end
140
+ samplename_to_peptidehits = samplename_to_filename.map do |sample, file|
141
+ [sample, Ms::Ident::PeptideHit::Qvalue.from_file(file).select {|hit| hit.qvalue <= fdr_cutoff }]
149
142
  end
150
143
 
151
- if $VERBOSE
152
- samplenames.zip(ar_of_pephit_ars) do |samplename, pep_ar|
153
- putsv "#{samplename}: #{pep_ar.size}"
144
+ # update each peptide hit with protein hits and sample name:
145
+ all_protein_hits = Hash.new {|h,id| h[id] = Ms::Ident::Protein.new(id) }
146
+ Ms::Ident::Peptide::Db::IO.open(peptide_centric_db_file) do |peptide_to_proteins|
147
+ samplename_to_peptidehits.map do |sample, peptide_hits|
148
+ peptide_hits.each do |hit|
149
+ # update each peptide with its protein hits
150
+ protein_hits = peptide_to_proteins[hit.aaseq].map do |id|
151
+ protein = all_protein_hits[id]
152
+ protein.length = id_to_length[id] if id_to_length
153
+ protein.description = id_to_desc[id] if id_to_desc
154
+ protein
155
+ end
156
+ hit.experiment_name = sample
157
+ # if there are protein hits, the peptide hit is selected
158
+ hit.proteins = protein_hits
159
+ end
154
160
  end
155
161
  end
156
162
 
157
- all_peptide_hits = ar_of_pephit_ars.flatten(1)
163
+ write_subset(samplename_to_peptidehits) if opt[:write_subset]
158
164
 
159
- # because peptide_hit#proteins yields id strings (which hash properly),
160
- # each protein group is an array of
161
- protein_groups = Ms::Ident::ProteinGroup.peptide_hits_to_protein_groups(all_peptide_hits)
165
+ samplename_to_peptidehits.each {|samplename, hits| putsv "#{samplename}: #{hits.size}" } if $VERBOSE
162
166
 
163
- pephit_to_protein_groups = Hash.new {|h,k| h[k] = [] }
164
- protein_groups.each do |protein_group|
165
- protein_group.peptide_hits.each {|hit| pephit_to_protein_groups[hit] << protein_group }
166
- end
167
+ all_peptide_hits = samplename_to_peptidehits.map(&:last).flatten(1)
167
168
 
168
- counts_parallel_to_names_with_counts_per_group = samplenames.map do |name|
169
- pep_hit_to_prot_groups = Hash.new {|h,k| h[k] = [] }
170
- groups_of_pephits = protein_groups.map do |prot_group|
171
- pep_hits = prot_group.peptide_hits.select {|hit| hit.experiment_name == name }
172
- pep_hits.each do |pep_hit|
173
- pep_hit_to_prot_groups[pep_hit] << prot_group
174
- end # returns the group of pep_hits
175
- end
176
- counts = Ms::Quant::SpectralCounts.counts(groups_of_pephits) do |pephit|
177
- pephit_to_protein_groups[pephit].size
178
- end
179
- end
169
+ # this constricts everything down to a minimal set of protein groups that
170
+ # explain the entire set of peptide hits.
171
+ update_pephits = true # ensures that each pephit is linked to the array of protein groups it is associated with
172
+ protein_groups = Ms::Ident::ProteinGroup.peptide_hits_to_protein_groups(all_peptide_hits, update_pephits)
180
173
 
181
- if opt[:qspec] || opt[:descriptions]
182
- putsv "reading lengths and descriptions from #{opt[:fasta]}"
183
- (id_to_length, id_to_desc) = Ms::Fasta.protein_lengths_and_descriptions(opt[:fasta])
174
+ hits_table_hash = {} # create the table using key => column hash
175
+ samplenames.each do |name|
176
+ hits_table_hash[name] = protein_groups.map do |prot_group|
177
+ prot_group.peptide_hits.select {|hit| hit.experiment_name == name }
178
+ end
184
179
  end
185
180
 
186
- samplename_to_condition = condition_to_samplenames.inverse
181
+ # The columns are filled with groups of peptide hits, one group of hits per
182
+ # protein group (protein group order is implicit). The rows are sample names.
183
+ #
184
+ # (implied) sample1 sample2 sample3 ...
185
+ # (group1) [hit,hit] [hit...] [hit...] ...
186
+ # (group2) [hit,hit] [hit...] [hit...] ...
187
+ # ... ... ... ... ...
188
+ hits_table = Ruport::Data::Table.new(:data => hits_table_hash.values.transpose, :column_names => hits_table_hash.keys)
187
189
 
188
- ### OUTPUT TABLE
189
- header_cats = samplenames.map.to_a
190
+ # spectral counts of type opt[:count_type]
191
+ counts_data = hits_table.data.map do |row|
192
+ row.map do |pephits|
193
+ Ms::Quant::SpectralCounts.counts(pephits) {|pephit| 1.0 / pephit.protein_groups.size }.send(opt[:count_type])
194
+ end
195
+ end
190
196
 
191
- ar_of_rows = counts_parallel_to_names_with_counts_per_group.map do |counts_per_group|
192
- counts_per_group.map(&opt[:count_type])
193
- end.transpose
197
+ # each cell holds a SpectralCounts object, which hash 3 types of count data
198
+ counts_table = Ruport::Data::Table.new(:data => counts_data, :column_names => samplenames)
194
199
 
200
+ # return a list of ProteinGroupComparisons
195
201
  if opt[:qspec]
196
- all_conditions = samplenames.map {|sn| samplename_to_condition[sn] }
197
- condition_to_count_array = all_conditions.zip(counts_parallel_to_names_with_counts_per_group).map do |condition, counts_par_groups|
198
- [condition, counts_par_groups.map(&opt[:count_type])]
199
- end
200
202
 
203
+ # prepare data for qspec
204
+ condition_to_count_array = counts_table.column_names.map {|name| [name, counts_table.column(name)] }
205
+ # average length of the proteins in the group
201
206
  name_length_pairs = protein_groups.map do |pg|
202
- # prefer swissprot (sp) proteins over tremble (tr) and shorter protein
203
- # lengths over longer lengths
204
- best_guess_protein_id = pg.sort_by {|prot_id| [prot_id, -id_to_length[prot_id]] }.first
205
- length = id_to_length[best_guess_protein_id]
206
- [pg.join(":"), length]
207
+ [pg.join(":"), pg.map(&:length).reduce(:+)./(pg.size).round]
207
208
  end
208
209
 
209
- putsv "qspec to normalize counts: #{opt[:qspec_normalize]}"
210
210
  qspec_results = Ms::Quant::Qspec.new(name_length_pairs, condition_to_count_array).run(opt[:qspec_normalize])
211
-
212
- to_add = [:fdr, :bayes_factor, :fold_change]
213
- header_cats.push(*to_add)
214
- qspec_results.zip(ar_of_rows) do |zipped|
215
- (result, row) = zipped
216
- row.push(*to_add.map {|v| result.send(v) })
211
+
212
+ cols_to_add = [:bayes_factor, :fold_change, :fdr]
213
+ counts_table.add_columns cols_to_add
214
+ counts_table.data.zip(qspec_results) do |row, qspec_result|
215
+ cols_to_add.each {|cat| row[cat] = qspec_result[cat] }
217
216
  end
218
217
  end
219
218
 
220
- header_cats.push( *%w(BestID AllIDs) )
221
- header_cats.push( 'Description' ) if opt[:descriptions]
219
+ counts_table.add_columns( [:name, :ids, :description] )
220
+ counts_table.data.zip(protein_groups) do |row, pg|
221
+ best_id = pg.sort_by {|prot| [prot.id, prot.length] }.first
222
+ row.name = best_id.description.andand.match(/ GN=([^\s]+) ?/).andand[1] || best_id.id
223
+ row.ids = pg.map(&:id).join(',')
224
+ row.description = best_id.description
225
+ end
222
226
 
223
- sort_protein_id =
224
- if id_to_length
225
- lambda {|prot_id| [prot_id, -id_to_length[prot_id]] }
226
- else
227
- lambda {|prot_id| prot_id }
228
- end
229
227
 
230
- protein_groups.zip(ar_of_rows) do |zipped|
231
- (pg, row) = zipped
232
- # swiss-prot and then the shortest
233
- best_protid = pg.sort_by(&sort_protein_id).first
234
- (gene_id, desc) =
235
- if opt[:descriptions]
236
- desc = id_to_desc[best_protid]
237
- gene_id = (md=desc.match(/ GN=(\w+) ?/)) ? md[1] : best_protid
238
- [gene_id, desc]
239
- else
240
- [best_protid, nil]
228
+ if opt[:peptides]
229
+ hits_table.each do |record|
230
+ record.each_with_index do |hits,i|
231
+ new_cell = hits.group_by do |hit|
232
+ [hit.aaseq, hit.charge]
233
+ end.map do |key, hits|
234
+ [key.reverse.join("_"), hits.map(&:id).join(',')].join(":")
235
+ end.join('; ')
236
+ record[i] = new_cell
241
237
  end
242
- row << gene_id << pg.join(',')
243
- row.push(desc) if desc
244
- end
245
-
246
- File.open(opt[:outfile],'w') do |out|
247
- out.puts header_cats.join(delimiter)
248
- ar_of_rows.each {|row| out.puts row.join(delimiter) }
249
- putsv "wrote: #{opt[:outfile]}"
238
+ end
239
+ hits_table.add_column_with_data(:name, counts_table.column(:name), :position=>0)
240
+ hits_table.to_tsv(pephits_outfile, :footer => ["parallel to #{outfile}"])
250
241
  end
251
242
 
243
+ intro = ["samples: #{samplename_to_filename}", "options: #{opt}"]
244
+ counts_table.to_tsv(outfile, :footer => intro)
@@ -0,0 +1,15 @@
1
+
2
+
3
+ # inverse from Tilo Sloboda (now in facets)
4
+
5
+ class Hash
6
+ def inverse
7
+ i = Hash.new
8
+ self.each_pair do |k,v|
9
+ if (Array === v) ; v.each{ |x| i[x] = ( i.has_key?(x) ? [k,i[x]].flatten : k ) }
10
+ else ; i[v] = ( i.has_key?(v) ? [k,i[v]].flatten : k ) end
11
+ end ; i
12
+ end
13
+ end
14
+
15
+
@@ -0,0 +1,42 @@
1
+ require 'hash/inverse'
2
+
3
+ module Ms ; module Quant ; end ; end
4
+
5
+ module Ms::Quant::Cmdline
6
+
7
+ # expects arguments in one of two forms. The first form is grouped by
8
+ # condition as shown:
9
+ #
10
+ # condition1=file1,file2,file3... condition2=file4,file5...
11
+ #
12
+ # The second is where each file is its own condition (1 replicate):
13
+ #
14
+ # file1 file2 file3
15
+ #
16
+ # Returns three ordered hashes (only ordered for ruby 1.9):
17
+ #
18
+ # 1) Condition to an array of samplenames
19
+ # 2) Samplename to the filename
20
+ # 3) Samplename to condition
21
+ def self.args_to_hashes(args, replicate_postfix="-rep")
22
+ # groupname => files
23
+ condition_to_samplenames = {}
24
+ samplename_to_filename = {}
25
+ args.each do |arg|
26
+ (condition, files) =
27
+ if arg.include?('=')
28
+ (condition, filestring) = arg.split('=')
29
+ [condition, filestring.split(',')]
30
+ else
31
+ [basename(arg), [arg]]
32
+ end
33
+ sample_to_file_pairs = files.each_with_index.map do |file,i|
34
+ rep_string = (files.size == 1) ? "" : "#{replicate_postfix}#{i+1}"
35
+ ["#{condition}#{rep_string}", file]
36
+ end
37
+ sample_to_file_pairs.each {|name,file| samplename_to_filename[name] = file }
38
+ condition_to_samplenames[condition] = sample_to_file_pairs.map(&:first)
39
+ end
40
+ [samplename_to_filename, condition_to_samplenames, condition_to_samplenames.inverse]
41
+ end
42
+ end
@@ -0,0 +1,2 @@
1
+
2
+ Peptide
@@ -0,0 +1,29 @@
1
+
2
+ module Ms
3
+ module Quant
4
+ end
5
+ end
6
+
7
+ module Ms::Quant::ProteinGroupComparison
8
+
9
+ # a protein group object
10
+ attr_accessor :protein_group
11
+
12
+ # an array of experiment names
13
+ attr_accessor :experiments
14
+
15
+ # parallel array to experiments with the measured values
16
+ attr_accessor :values
17
+
18
+ def initialize(protein_group, experiments, values)
19
+ (@protein_group, @experiment, @values) = protein_group, experiments, values
20
+ end
21
+ end
22
+
23
+ class Ms::Quant::ProteinGroupComparison::SpectralCounts
24
+ include Ms::Quant::ProteinGroupComparison
25
+ end
26
+
27
+ class Ms::Quant::ProteinGroupComparison::UniqAAzCounts
28
+ include Ms::Quant::ProteinGroupComparison
29
+ end
@@ -0,0 +1,22 @@
1
+ require 'ms/quant/protein_group_comparison'
2
+
3
+ module Ms
4
+ module Quant
5
+ module ProteinGroupComparison
6
+ end
7
+ end
8
+ end
9
+
10
+ class Ms::Quant::ProteinGroupComparison::Qspec
11
+ include Ms::Quant::ProteinGroupComparison
12
+
13
+ attr_accessor :qspec_results_struct
14
+
15
+ # takes a protein group object, an array of experiment names and a qspec
16
+ # results struct
17
+ def initialize(protein_group, experiments, qspec_results_struct)
18
+ super(protein_group, experiments, qspec_results_struct.counts_array)
19
+ @qspec_results_struct = qspec_results_struct
20
+ end
21
+ end
22
+
@@ -31,6 +31,7 @@ class Ms::Quant::Qspec
31
31
  start_bayes = headers.index {|v| v =~ /BayesFactor/i }
32
32
  rows.map do |row|
33
33
  data = [row[0]]
34
+ data.push( row[1...start_bayes].map(&:to_f) )
34
35
  data.push( *row[start_bayes,4].map(&:to_f) )
35
36
  data.push( row[start_bayes+4] )
36
37
  Results.new(*data)
@@ -68,6 +69,8 @@ class Ms::Quant::Qspec
68
69
  end
69
70
  end
70
71
 
72
+ # returns an array of Qspec::Results objects (each object can be considered
73
+ # a row of data)
71
74
  def run(normalize=true, opts={})
72
75
  puts "normalize: #{normalize}" if $VERBOSE
73
76
  tfile = Tempfile.new("qspec")
@@ -87,6 +90,7 @@ class Ms::Quant::Qspec
87
90
  end
88
91
 
89
92
  # for version 2 of QSpec
90
- Results = Struct.new(:protid, :bayes_factor, :fold_change, :rb_stat, :fdr, :flag)
93
+ # counts array is parallel to the experiment names passed in originally
94
+ Results = Struct.new(:protid, :counts_array, :bayes_factor, :fold_change, :rb_stat, :fdr, :flag)
91
95
  end
92
96
 
@@ -8,28 +8,28 @@ module Ms
8
8
 
9
9
  # returns a parallel array of Count objects. If split_hits then counts
10
10
  # are split between groups sharing the hit. peptide_hits must respond
11
- # to :charge and :aaseq. If split_hits, then each peptide_hit must
12
- # respond to :linked_to yielding an object with a :size reflective of
13
- # the number of shared peptide_hits.
14
- def self.counts(groups_of_peptide_hits, &share_the_pephit)
15
- groups_of_peptide_hits.map do |peptide_hits|
16
- uniq_aaseq = {}
17
- uniq_aaseq_charge = {}
18
- linked_sizes = peptide_hits.map do |hit|
19
- linked_to_size = share_the_pephit ? share_the_pephit.call(hit) : 1
20
- # these guys will end up clobbering themselves, but the
21
- # linked_to_size should be consistent if the key is the same
22
- uniq_aaseq_charge[[hit.aaseq, hit.charge]] = linked_to_size
23
- uniq_aaseq[hit.aaseq] = linked_to_size
24
- linked_to_size
25
- end
26
- counts_data = [linked_sizes, uniq_aaseq_charge.values, uniq_aaseq.values].map do |array|
27
- share_the_pephit ? array.inject(0.0) {|sum,size| sum+=(1.0/size) } : array.size
28
- end
29
- Counts.new(*counts_data)
11
+ # to :charge and :aaseq. If a block is given, the weight of a
12
+ # particular hit can be given (typically this will be 1/#proteins
13
+ # sharing the hit
14
+ def self.counts(peptide_hits, &share_the_pephit)
15
+ uniq_aaseq = {}
16
+ uniq_aaseq_charge = {}
17
+ weights = peptide_hits.map do |hit|
18
+ weight = share_the_pephit ? share_the_pephit.call(hit) : 1
19
+ # these guys will end up clobbering themselves, but the
20
+ # linked_to_size should be consistent if the key is the same
21
+ uniq_aaseq_charge[[hit.aaseq, hit.charge]] = weight
22
+ uniq_aaseq[hit.aaseq] = weight
23
+ weight
30
24
  end
25
+ counts_data = [weights, uniq_aaseq_charge.values, uniq_aaseq.values].map do |array|
26
+ array.reduce(:+)
27
+ end
28
+ Counts.new(*counts_data)
31
29
  end
32
-
33
30
  end
34
31
  end
35
32
  end
33
+
34
+
35
+
@@ -2,8 +2,6 @@ require 'spec_helper'
2
2
 
3
3
  require 'ms/quant/spectral_counts'
4
4
 
5
-
6
-
7
5
  PeptideHit = Struct.new(:aaseq, :charge, :proteins) do
8
6
  def initialize(*args)
9
7
  super(*args)
@@ -58,7 +56,7 @@ describe 'groups of peptide hits' do
58
56
 
59
57
  it 'finds spectral counts (splitting counts between shared)' do
60
58
  groups_of_pephits = @prot_hits.map(&:peptide_hits)
61
- counts = Ms::Quant::SpectralCounts.counts(groups_of_pephits) {|pephit| pephit.proteins.size }
59
+ counts = Ms::Quant::SpectralCounts.counts(groups_of_pephits) {|pephit| 1.0 / pephit.proteins.size }
62
60
  @expected_counts_split.zip(counts) do |exp, act|
63
61
  exp.zip(act) {|e,a| a.should.be.close e, 0.0001 }
64
62
  end
metadata CHANGED
@@ -1,12 +1,8 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ms-quant
3
3
  version: !ruby/object:Gem::Version
4
- prerelease: false
5
- segments:
6
- - 0
7
- - 0
8
- - 4
9
- version: 0.0.4
4
+ prerelease:
5
+ version: 0.0.6
10
6
  platform: ruby
11
7
  authors:
12
8
  - John T. Prince
@@ -14,7 +10,7 @@ autorequire:
14
10
  bindir: bin
15
11
  cert_chain: []
16
12
 
17
- date: 2011-04-04 00:00:00 -06:00
13
+ date: 2011-04-26 00:00:00 -06:00
18
14
  default_executable: peptide_hit_qvalues_to_spectral_counts_table.rb
19
15
  dependencies:
20
16
  - !ruby/object:Gem::Dependency
@@ -25,11 +21,7 @@ dependencies:
25
21
  requirements:
26
22
  - - ">="
27
23
  - !ruby/object:Gem::Version
28
- segments:
29
- - 0
30
- - 0
31
- - 19
32
- version: 0.0.19
24
+ version: 0.1.1
33
25
  type: :runtime
34
26
  version_requirements: *id001
35
27
  - !ruby/object:Gem::Dependency
@@ -40,8 +32,6 @@ dependencies:
40
32
  requirements:
41
33
  - - ">="
42
34
  - !ruby/object:Gem::Version
43
- segments:
44
- - 0
45
35
  version: "0"
46
36
  type: :development
47
37
  version_requirements: *id002
@@ -53,10 +43,6 @@ dependencies:
53
43
  requirements:
54
44
  - - ~>
55
45
  - !ruby/object:Gem::Version
56
- segments:
57
- - 1
58
- - 5
59
- - 2
60
46
  version: 1.5.2
61
47
  type: :development
62
48
  version_requirements: *id003
@@ -68,8 +54,6 @@ dependencies:
68
54
  requirements:
69
55
  - - ">="
70
56
  - !ruby/object:Gem::Version
71
- segments:
72
- - 0
73
57
  version: "0"
74
58
  type: :development
75
59
  version_requirements: *id004
@@ -89,8 +73,13 @@ files:
89
73
  - Rakefile
90
74
  - VERSION
91
75
  - bin/peptide_hit_qvalues_to_spectral_counts_table.rb
76
+ - lib/hash/inverse.rb
92
77
  - lib/ms-quant.rb
78
+ - lib/ms/quant/cmdline.rb
79
+ - lib/ms/quant/peptide.rb
80
+ - lib/ms/quant/protein_group_comparison.rb
93
81
  - lib/ms/quant/qspec.rb
82
+ - lib/ms/quant/qspec/protein_group_comparison.rb
94
83
  - lib/ms/quant/spectral_counts.rb
95
84
  - spec/ms/quant/spectral_counts_spec.rb
96
85
  - spec/spec_helper.rb
@@ -108,21 +97,17 @@ required_ruby_version: !ruby/object:Gem::Requirement
108
97
  requirements:
109
98
  - - ">="
110
99
  - !ruby/object:Gem::Version
111
- segments:
112
- - 0
113
100
  version: "0"
114
101
  required_rubygems_version: !ruby/object:Gem::Requirement
115
102
  none: false
116
103
  requirements:
117
104
  - - ">="
118
105
  - !ruby/object:Gem::Version
119
- segments:
120
- - 0
121
106
  version: "0"
122
107
  requirements: []
123
108
 
124
109
  rubyforge_project:
125
- rubygems_version: 1.3.7
110
+ rubygems_version: 1.6.2
126
111
  signing_key:
127
112
  specification_version: 3
128
113
  summary: quantitation of mass spectrometry datasets (proteomic, metabolomic/lipidomic)