ms-quant 0.0.4 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +1 -1
- data/VERSION +1 -1
- data/bin/peptide_hit_qvalues_to_spectral_counts_table.rb +137 -144
- data/lib/hash/inverse.rb +15 -0
- data/lib/ms/quant/cmdline.rb +42 -0
- data/lib/ms/quant/peptide.rb +2 -0
- data/lib/ms/quant/protein_group_comparison.rb +29 -0
- data/lib/ms/quant/qspec/protein_group_comparison.rb +22 -0
- data/lib/ms/quant/qspec.rb +5 -1
- data/lib/ms/quant/spectral_counts.rb +20 -20
- data/spec/ms/quant/spectral_counts_spec.rb +1 -3
- metadata +10 -25
data/Rakefile
CHANGED
@@ -15,7 +15,7 @@ Jeweler::Tasks.new do |gem|
|
|
15
15
|
# and development dependencies are only needed for development (ie running rake tasks, tests, etc)
|
16
16
|
# gem.add_runtime_dependency 'jabber4r', '> 0.1'
|
17
17
|
# gem.add_development_dependency 'rspec', '> 1.2.3'
|
18
|
-
gem.add_runtime_dependency 'ms-ident', ">= 0.
|
18
|
+
gem.add_runtime_dependency 'ms-ident', ">= 0.1.1"
|
19
19
|
gem.add_development_dependency "spec-more", ">= 0"
|
20
20
|
gem.add_development_dependency "jeweler", "~> 1.5.2"
|
21
21
|
gem.add_development_dependency "rcov", ">= 0"
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.6
|
@@ -1,29 +1,24 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
+
require 'andand'
|
4
|
+
require 'set'
|
5
|
+
require 'ruport'
|
6
|
+
|
3
7
|
require 'ms/ident/peptide_hit/qvalue'
|
4
|
-
require 'ms/ident/
|
8
|
+
require 'ms/ident/protein'
|
5
9
|
require 'ms/ident/peptide/db'
|
6
10
|
require 'ms/quant/spectral_counts'
|
11
|
+
require 'ms/quant/protein_group_comparison'
|
12
|
+
require 'ms/quant/qspec/protein_group_comparison'
|
7
13
|
require 'ms/quant/qspec'
|
14
|
+
require 'ms/quant/cmdline'
|
15
|
+
|
8
16
|
|
9
17
|
require 'yaml'
|
10
18
|
require 'tempfile'
|
11
19
|
|
12
20
|
require 'trollop'
|
13
21
|
|
14
|
-
# inverse from Tilo Sloboda (now in facets)
|
15
|
-
|
16
|
-
class Hash
|
17
|
-
def inverse
|
18
|
-
i = Hash.new
|
19
|
-
self.each_pair do |k,v|
|
20
|
-
if (Array === v) ; v.each{ |x| i[x] = ( i.has_key?(x) ? [k,i[x]].flatten : k ) }
|
21
|
-
else ; i[v] = ( i.has_key?(v) ? [k,i[v]].flatten : k ) end
|
22
|
-
end ; i
|
23
|
-
end
|
24
|
-
end
|
25
|
-
|
26
|
-
|
27
22
|
def putsv(*args)
|
28
23
|
if $VERBOSE
|
29
24
|
puts(*args) ; $stdout.flush
|
@@ -36,8 +31,48 @@ def basename(file)
|
|
36
31
|
base
|
37
32
|
end
|
38
33
|
|
34
|
+
class Ruport::Data::Table
|
35
|
+
# returns self
|
36
|
+
def add_column_with_data(colname, array_of_data, opts={})
|
37
|
+
self.add_column(colname, opts)
|
38
|
+
self.data.zip(array_of_data) do |row, newval|
|
39
|
+
row[colname] = newval
|
40
|
+
end
|
41
|
+
self
|
42
|
+
end
|
43
|
+
|
44
|
+
# acceptable opts:
|
45
|
+
#
|
46
|
+
# :header => an array of lines (each which will be commented out)
|
47
|
+
def to_tsv(file, opt={})
|
48
|
+
delimiter = "\t"
|
49
|
+
File.open(file,'w') do |out|
|
50
|
+
opt[:header].each {|line| out.puts "# #{line}" } if opt[:header]
|
51
|
+
out.puts self.column_names.join(delimiter)
|
52
|
+
self.data.each do |row|
|
53
|
+
out.puts row.to_a.join(delimiter)
|
54
|
+
end
|
55
|
+
opt[:footer].each {|line| out.puts "# #{line}" } if opt[:footer]
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
60
|
+
|
61
|
+
def write_subset(sample_to_pephits, outfile="peptidecentric_subset.yml")
|
62
|
+
aaseqs_to_prots = {}
|
63
|
+
sample_to_pephits.map(&:last).flatten(1).each do |pephit|
|
64
|
+
aaseqs_to_prots[pephit.aaseq] = pephit.proteins.map(&:id)
|
65
|
+
end
|
66
|
+
File.open(outfile,'w') do |out|
|
67
|
+
aaseqs_to_prots.each do |k,v|
|
68
|
+
out.puts(%Q{#{k}: #{v.join("\t") }})
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
39
73
|
|
40
74
|
outfile = "spectral_counts.tsv"
|
75
|
+
pephits_outfile = "spectral_counts.pephits.tsv"
|
41
76
|
delimiter = "\t"
|
42
77
|
|
43
78
|
opts = Trollop::Parser.new do
|
@@ -53,6 +88,7 @@ psq is really .psq.tsv file
|
|
53
88
|
opt :descriptions, "include descriptions of proteins, requires :fasta", :default => false
|
54
89
|
opt :fasta, "the fasta file. Required for :qspec and :descriptions", :type => String
|
55
90
|
opt :outfile, "the to which file data are written", :default => outfile
|
91
|
+
opt :peptides, "also write peptide hits (to: #{pephits_outfile})", :default => false
|
56
92
|
opt :verbose, "speak up", :default => false
|
57
93
|
opt :count_type, "type of spectral counts (<spectral|aaseqcharge|aaseq>)", :default => 'spectral'
|
58
94
|
opt :qspec_normalize, "normalize spectral counts per run", :default => false
|
@@ -78,29 +114,8 @@ raise ArgumentError, "need .yml file for peptide centric db" unless File.extname
|
|
78
114
|
putsv "using: #{peptide_centric_db_file} as peptide centric db"
|
79
115
|
|
80
116
|
# groupname => files
|
81
|
-
condition_to_samplenames = {}
|
82
|
-
samplename_to_filename = {}
|
83
|
-
ARGV.each do |arg|
|
84
|
-
(condition, files) =
|
85
|
-
if arg.include?('=')
|
86
|
-
(condition, filestring) = arg.split('=')
|
87
|
-
[condition, filestring.split(',')]
|
88
|
-
else
|
89
|
-
[basename(arg), [arg]]
|
90
|
-
end
|
91
|
-
reptag = ARGV.size
|
92
|
-
sample_to_file_pairs = files.each_with_index.map {|file,i| ["#{condition}-rep#{i+1}", file] }
|
93
|
-
sample_to_file_pairs.each {|name,file| samplename_to_filename[name] = file }
|
94
|
-
condition_to_samplenames[condition] = sample_to_file_pairs.map(&:first)
|
95
|
-
end
|
96
|
-
|
97
117
|
|
98
|
-
|
99
|
-
puts "** condition: sample_names"
|
100
|
-
puts condition_to_samplenames.to_yaml
|
101
|
-
puts "** samplename: filename"
|
102
|
-
puts samplename_to_filename.to_yaml
|
103
|
-
end
|
118
|
+
(samplename_to_filename, condition_to_samplenames, samplename_to_condition) = Ms::Quant::Cmdline.args_to_hashes(ARGV)
|
104
119
|
|
105
120
|
raise ArgumentError, "must have 2 conditions for qspec!" if opt[:qspec] && condition_to_samplenames.size != 2
|
106
121
|
|
@@ -108,144 +123,122 @@ samplenames = samplename_to_filename.keys
|
|
108
123
|
|
109
124
|
class Ms::Ident::PeptideHit
|
110
125
|
attr_accessor :experiment_name
|
126
|
+
attr_accessor :protein_groups
|
127
|
+
end
|
128
|
+
class Ms::Ident::Protein
|
129
|
+
attr_accessor :length
|
111
130
|
end
|
131
|
+
|
132
|
+
|
112
133
|
fdr_cutoff = opt[:fdr_percent] / 100
|
113
134
|
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
putsv "#{Time.now-start} seconds to read #{peptide_centric_db_file}"
|
118
|
-
samplename_to_filename.map do |sample, file|
|
119
|
-
peptide_hits = Ms::Ident::PeptideHit::Qvalue.from_file(file)
|
120
|
-
putsv "#{file}: #{peptide_hits.size} hits"
|
121
|
-
peptide_hits.select! do |hit|
|
122
|
-
if hit.qvalue <= fdr_cutoff
|
123
|
-
# update each peptide with its protein hits
|
124
|
-
prot_ids = peptide_to_proteins[hit.aaseq]
|
125
|
-
if prot_ids
|
126
|
-
hit.experiment_name = sample
|
127
|
-
hit.proteins = prot_ids
|
128
|
-
else ; false end
|
129
|
-
else
|
130
|
-
false
|
131
|
-
end
|
132
|
-
end
|
133
|
-
peptide_hits
|
134
|
-
end
|
135
|
+
if opt[:qspec] || opt[:descriptions]
|
136
|
+
putsv "reading lengths and descriptions from #{opt[:fasta]}"
|
137
|
+
(id_to_length, id_to_desc) = Ms::Fasta.protein_lengths_and_descriptions(opt[:fasta])
|
135
138
|
end
|
136
139
|
|
137
|
-
|
138
|
-
|
139
|
-
ar_of_pephit_ars.flatten(1).each do |pephit|
|
140
|
-
aaseqs_to_prots[pephit.aaseq] = pephit.proteins
|
141
|
-
end
|
142
|
-
outfile = "peptidecentric_subset.yml"
|
143
|
-
puts "writing #{outfile} with #{aaseqs_to_prots.size} aaseq->protids"
|
144
|
-
File.open(outfile,'w') do |out|
|
145
|
-
aaseqs_to_prots.each do |k,v|
|
146
|
-
out.puts(%Q{#{k}: #{v.join("\t") }})
|
147
|
-
end
|
148
|
-
end
|
140
|
+
samplename_to_peptidehits = samplename_to_filename.map do |sample, file|
|
141
|
+
[sample, Ms::Ident::PeptideHit::Qvalue.from_file(file).select {|hit| hit.qvalue <= fdr_cutoff }]
|
149
142
|
end
|
150
143
|
|
151
|
-
|
152
|
-
|
153
|
-
|
144
|
+
# update each peptide hit with protein hits and sample name:
|
145
|
+
all_protein_hits = Hash.new {|h,id| h[id] = Ms::Ident::Protein.new(id) }
|
146
|
+
Ms::Ident::Peptide::Db::IO.open(peptide_centric_db_file) do |peptide_to_proteins|
|
147
|
+
samplename_to_peptidehits.map do |sample, peptide_hits|
|
148
|
+
peptide_hits.each do |hit|
|
149
|
+
# update each peptide with its protein hits
|
150
|
+
protein_hits = peptide_to_proteins[hit.aaseq].map do |id|
|
151
|
+
protein = all_protein_hits[id]
|
152
|
+
protein.length = id_to_length[id] if id_to_length
|
153
|
+
protein.description = id_to_desc[id] if id_to_desc
|
154
|
+
protein
|
155
|
+
end
|
156
|
+
hit.experiment_name = sample
|
157
|
+
# if there are protein hits, the peptide hit is selected
|
158
|
+
hit.proteins = protein_hits
|
159
|
+
end
|
154
160
|
end
|
155
161
|
end
|
156
162
|
|
157
|
-
|
163
|
+
write_subset(samplename_to_peptidehits) if opt[:write_subset]
|
158
164
|
|
159
|
-
|
160
|
-
# each protein group is an array of
|
161
|
-
protein_groups = Ms::Ident::ProteinGroup.peptide_hits_to_protein_groups(all_peptide_hits)
|
165
|
+
samplename_to_peptidehits.each {|samplename, hits| putsv "#{samplename}: #{hits.size}" } if $VERBOSE
|
162
166
|
|
163
|
-
|
164
|
-
protein_groups.each do |protein_group|
|
165
|
-
protein_group.peptide_hits.each {|hit| pephit_to_protein_groups[hit] << protein_group }
|
166
|
-
end
|
167
|
+
all_peptide_hits = samplename_to_peptidehits.map(&:last).flatten(1)
|
167
168
|
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
pep_hits.each do |pep_hit|
|
173
|
-
pep_hit_to_prot_groups[pep_hit] << prot_group
|
174
|
-
end # returns the group of pep_hits
|
175
|
-
end
|
176
|
-
counts = Ms::Quant::SpectralCounts.counts(groups_of_pephits) do |pephit|
|
177
|
-
pephit_to_protein_groups[pephit].size
|
178
|
-
end
|
179
|
-
end
|
169
|
+
# this constricts everything down to a minimal set of protein groups that
|
170
|
+
# explain the entire set of peptide hits.
|
171
|
+
update_pephits = true # ensures that each pephit is linked to the array of protein groups it is associated with
|
172
|
+
protein_groups = Ms::Ident::ProteinGroup.peptide_hits_to_protein_groups(all_peptide_hits, update_pephits)
|
180
173
|
|
181
|
-
|
182
|
-
|
183
|
-
|
174
|
+
hits_table_hash = {} # create the table using key => column hash
|
175
|
+
samplenames.each do |name|
|
176
|
+
hits_table_hash[name] = protein_groups.map do |prot_group|
|
177
|
+
prot_group.peptide_hits.select {|hit| hit.experiment_name == name }
|
178
|
+
end
|
184
179
|
end
|
185
180
|
|
186
|
-
|
181
|
+
# The columns are filled with groups of peptide hits, one group of hits per
|
182
|
+
# protein group (protein group order is implicit). The rows are sample names.
|
183
|
+
#
|
184
|
+
# (implied) sample1 sample2 sample3 ...
|
185
|
+
# (group1) [hit,hit] [hit...] [hit...] ...
|
186
|
+
# (group2) [hit,hit] [hit...] [hit...] ...
|
187
|
+
# ... ... ... ... ...
|
188
|
+
hits_table = Ruport::Data::Table.new(:data => hits_table_hash.values.transpose, :column_names => hits_table_hash.keys)
|
187
189
|
|
188
|
-
|
189
|
-
|
190
|
+
# spectral counts of type opt[:count_type]
|
191
|
+
counts_data = hits_table.data.map do |row|
|
192
|
+
row.map do |pephits|
|
193
|
+
Ms::Quant::SpectralCounts.counts(pephits) {|pephit| 1.0 / pephit.protein_groups.size }.send(opt[:count_type])
|
194
|
+
end
|
195
|
+
end
|
190
196
|
|
191
|
-
|
192
|
-
|
193
|
-
end.transpose
|
197
|
+
# each cell holds a SpectralCounts object, which hash 3 types of count data
|
198
|
+
counts_table = Ruport::Data::Table.new(:data => counts_data, :column_names => samplenames)
|
194
199
|
|
200
|
+
# return a list of ProteinGroupComparisons
|
195
201
|
if opt[:qspec]
|
196
|
-
all_conditions = samplenames.map {|sn| samplename_to_condition[sn] }
|
197
|
-
condition_to_count_array = all_conditions.zip(counts_parallel_to_names_with_counts_per_group).map do |condition, counts_par_groups|
|
198
|
-
[condition, counts_par_groups.map(&opt[:count_type])]
|
199
|
-
end
|
200
202
|
|
203
|
+
# prepare data for qspec
|
204
|
+
condition_to_count_array = counts_table.column_names.map {|name| [name, counts_table.column(name)] }
|
205
|
+
# average length of the proteins in the group
|
201
206
|
name_length_pairs = protein_groups.map do |pg|
|
202
|
-
|
203
|
-
# lengths over longer lengths
|
204
|
-
best_guess_protein_id = pg.sort_by {|prot_id| [prot_id, -id_to_length[prot_id]] }.first
|
205
|
-
length = id_to_length[best_guess_protein_id]
|
206
|
-
[pg.join(":"), length]
|
207
|
+
[pg.join(":"), pg.map(&:length).reduce(:+)./(pg.size).round]
|
207
208
|
end
|
208
209
|
|
209
|
-
putsv "qspec to normalize counts: #{opt[:qspec_normalize]}"
|
210
210
|
qspec_results = Ms::Quant::Qspec.new(name_length_pairs, condition_to_count_array).run(opt[:qspec_normalize])
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
row.push(*to_add.map {|v| result.send(v) })
|
211
|
+
|
212
|
+
cols_to_add = [:bayes_factor, :fold_change, :fdr]
|
213
|
+
counts_table.add_columns cols_to_add
|
214
|
+
counts_table.data.zip(qspec_results) do |row, qspec_result|
|
215
|
+
cols_to_add.each {|cat| row[cat] = qspec_result[cat] }
|
217
216
|
end
|
218
217
|
end
|
219
218
|
|
220
|
-
|
221
|
-
|
219
|
+
counts_table.add_columns( [:name, :ids, :description] )
|
220
|
+
counts_table.data.zip(protein_groups) do |row, pg|
|
221
|
+
best_id = pg.sort_by {|prot| [prot.id, prot.length] }.first
|
222
|
+
row.name = best_id.description.andand.match(/ GN=([^\s]+) ?/).andand[1] || best_id.id
|
223
|
+
row.ids = pg.map(&:id).join(',')
|
224
|
+
row.description = best_id.description
|
225
|
+
end
|
222
226
|
|
223
|
-
sort_protein_id =
|
224
|
-
if id_to_length
|
225
|
-
lambda {|prot_id| [prot_id, -id_to_length[prot_id]] }
|
226
|
-
else
|
227
|
-
lambda {|prot_id| prot_id }
|
228
|
-
end
|
229
227
|
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
[
|
239
|
-
else
|
240
|
-
[best_protid, nil]
|
228
|
+
if opt[:peptides]
|
229
|
+
hits_table.each do |record|
|
230
|
+
record.each_with_index do |hits,i|
|
231
|
+
new_cell = hits.group_by do |hit|
|
232
|
+
[hit.aaseq, hit.charge]
|
233
|
+
end.map do |key, hits|
|
234
|
+
[key.reverse.join("_"), hits.map(&:id).join(',')].join(":")
|
235
|
+
end.join('; ')
|
236
|
+
record[i] = new_cell
|
241
237
|
end
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
File.open(opt[:outfile],'w') do |out|
|
247
|
-
out.puts header_cats.join(delimiter)
|
248
|
-
ar_of_rows.each {|row| out.puts row.join(delimiter) }
|
249
|
-
putsv "wrote: #{opt[:outfile]}"
|
238
|
+
end
|
239
|
+
hits_table.add_column_with_data(:name, counts_table.column(:name), :position=>0)
|
240
|
+
hits_table.to_tsv(pephits_outfile, :footer => ["parallel to #{outfile}"])
|
250
241
|
end
|
251
242
|
|
243
|
+
intro = ["samples: #{samplename_to_filename}", "options: #{opt}"]
|
244
|
+
counts_table.to_tsv(outfile, :footer => intro)
|
data/lib/hash/inverse.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
# inverse from Tilo Sloboda (now in facets)
|
4
|
+
|
5
|
+
class Hash
|
6
|
+
def inverse
|
7
|
+
i = Hash.new
|
8
|
+
self.each_pair do |k,v|
|
9
|
+
if (Array === v) ; v.each{ |x| i[x] = ( i.has_key?(x) ? [k,i[x]].flatten : k ) }
|
10
|
+
else ; i[v] = ( i.has_key?(v) ? [k,i[v]].flatten : k ) end
|
11
|
+
end ; i
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require 'hash/inverse'
|
2
|
+
|
3
|
+
module Ms ; module Quant ; end ; end
|
4
|
+
|
5
|
+
module Ms::Quant::Cmdline
|
6
|
+
|
7
|
+
# expects arguments in one of two forms. The first form is grouped by
|
8
|
+
# condition as shown:
|
9
|
+
#
|
10
|
+
# condition1=file1,file2,file3... condition2=file4,file5...
|
11
|
+
#
|
12
|
+
# The second is where each file is its own condition (1 replicate):
|
13
|
+
#
|
14
|
+
# file1 file2 file3
|
15
|
+
#
|
16
|
+
# Returns three ordered hashes (only ordered for ruby 1.9):
|
17
|
+
#
|
18
|
+
# 1) Condition to an array of samplenames
|
19
|
+
# 2) Samplename to the filename
|
20
|
+
# 3) Samplename to condition
|
21
|
+
def self.args_to_hashes(args, replicate_postfix="-rep")
|
22
|
+
# groupname => files
|
23
|
+
condition_to_samplenames = {}
|
24
|
+
samplename_to_filename = {}
|
25
|
+
args.each do |arg|
|
26
|
+
(condition, files) =
|
27
|
+
if arg.include?('=')
|
28
|
+
(condition, filestring) = arg.split('=')
|
29
|
+
[condition, filestring.split(',')]
|
30
|
+
else
|
31
|
+
[basename(arg), [arg]]
|
32
|
+
end
|
33
|
+
sample_to_file_pairs = files.each_with_index.map do |file,i|
|
34
|
+
rep_string = (files.size == 1) ? "" : "#{replicate_postfix}#{i+1}"
|
35
|
+
["#{condition}#{rep_string}", file]
|
36
|
+
end
|
37
|
+
sample_to_file_pairs.each {|name,file| samplename_to_filename[name] = file }
|
38
|
+
condition_to_samplenames[condition] = sample_to_file_pairs.map(&:first)
|
39
|
+
end
|
40
|
+
[samplename_to_filename, condition_to_samplenames, condition_to_samplenames.inverse]
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
|
2
|
+
module Ms
|
3
|
+
module Quant
|
4
|
+
end
|
5
|
+
end
|
6
|
+
|
7
|
+
module Ms::Quant::ProteinGroupComparison
|
8
|
+
|
9
|
+
# a protein group object
|
10
|
+
attr_accessor :protein_group
|
11
|
+
|
12
|
+
# an array of experiment names
|
13
|
+
attr_accessor :experiments
|
14
|
+
|
15
|
+
# parallel array to experiments with the measured values
|
16
|
+
attr_accessor :values
|
17
|
+
|
18
|
+
def initialize(protein_group, experiments, values)
|
19
|
+
(@protein_group, @experiment, @values) = protein_group, experiments, values
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
class Ms::Quant::ProteinGroupComparison::SpectralCounts
|
24
|
+
include Ms::Quant::ProteinGroupComparison
|
25
|
+
end
|
26
|
+
|
27
|
+
class Ms::Quant::ProteinGroupComparison::UniqAAzCounts
|
28
|
+
include Ms::Quant::ProteinGroupComparison
|
29
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'ms/quant/protein_group_comparison'
|
2
|
+
|
3
|
+
module Ms
|
4
|
+
module Quant
|
5
|
+
module ProteinGroupComparison
|
6
|
+
end
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
class Ms::Quant::ProteinGroupComparison::Qspec
|
11
|
+
include Ms::Quant::ProteinGroupComparison
|
12
|
+
|
13
|
+
attr_accessor :qspec_results_struct
|
14
|
+
|
15
|
+
# takes a protein group object, an array of experiment names and a qspec
|
16
|
+
# results struct
|
17
|
+
def initialize(protein_group, experiments, qspec_results_struct)
|
18
|
+
super(protein_group, experiments, qspec_results_struct.counts_array)
|
19
|
+
@qspec_results_struct = qspec_results_struct
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
data/lib/ms/quant/qspec.rb
CHANGED
@@ -31,6 +31,7 @@ class Ms::Quant::Qspec
|
|
31
31
|
start_bayes = headers.index {|v| v =~ /BayesFactor/i }
|
32
32
|
rows.map do |row|
|
33
33
|
data = [row[0]]
|
34
|
+
data.push( row[1...start_bayes].map(&:to_f) )
|
34
35
|
data.push( *row[start_bayes,4].map(&:to_f) )
|
35
36
|
data.push( row[start_bayes+4] )
|
36
37
|
Results.new(*data)
|
@@ -68,6 +69,8 @@ class Ms::Quant::Qspec
|
|
68
69
|
end
|
69
70
|
end
|
70
71
|
|
72
|
+
# returns an array of Qspec::Results objects (each object can be considered
|
73
|
+
# a row of data)
|
71
74
|
def run(normalize=true, opts={})
|
72
75
|
puts "normalize: #{normalize}" if $VERBOSE
|
73
76
|
tfile = Tempfile.new("qspec")
|
@@ -87,6 +90,7 @@ class Ms::Quant::Qspec
|
|
87
90
|
end
|
88
91
|
|
89
92
|
# for version 2 of QSpec
|
90
|
-
|
93
|
+
# counts array is parallel to the experiment names passed in originally
|
94
|
+
Results = Struct.new(:protid, :counts_array, :bayes_factor, :fold_change, :rb_stat, :fdr, :flag)
|
91
95
|
end
|
92
96
|
|
@@ -8,28 +8,28 @@ module Ms
|
|
8
8
|
|
9
9
|
# returns a parallel array of Count objects. If split_hits then counts
|
10
10
|
# are split between groups sharing the hit. peptide_hits must respond
|
11
|
-
# to :charge and :aaseq. If
|
12
|
-
#
|
13
|
-
# the
|
14
|
-
def self.counts(
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
linked_to_size
|
25
|
-
end
|
26
|
-
counts_data = [linked_sizes, uniq_aaseq_charge.values, uniq_aaseq.values].map do |array|
|
27
|
-
share_the_pephit ? array.inject(0.0) {|sum,size| sum+=(1.0/size) } : array.size
|
28
|
-
end
|
29
|
-
Counts.new(*counts_data)
|
11
|
+
# to :charge and :aaseq. If a block is given, the weight of a
|
12
|
+
# particular hit can be given (typically this will be 1/#proteins
|
13
|
+
# sharing the hit
|
14
|
+
def self.counts(peptide_hits, &share_the_pephit)
|
15
|
+
uniq_aaseq = {}
|
16
|
+
uniq_aaseq_charge = {}
|
17
|
+
weights = peptide_hits.map do |hit|
|
18
|
+
weight = share_the_pephit ? share_the_pephit.call(hit) : 1
|
19
|
+
# these guys will end up clobbering themselves, but the
|
20
|
+
# linked_to_size should be consistent if the key is the same
|
21
|
+
uniq_aaseq_charge[[hit.aaseq, hit.charge]] = weight
|
22
|
+
uniq_aaseq[hit.aaseq] = weight
|
23
|
+
weight
|
30
24
|
end
|
25
|
+
counts_data = [weights, uniq_aaseq_charge.values, uniq_aaseq.values].map do |array|
|
26
|
+
array.reduce(:+)
|
27
|
+
end
|
28
|
+
Counts.new(*counts_data)
|
31
29
|
end
|
32
|
-
|
33
30
|
end
|
34
31
|
end
|
35
32
|
end
|
33
|
+
|
34
|
+
|
35
|
+
|
@@ -2,8 +2,6 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
require 'ms/quant/spectral_counts'
|
4
4
|
|
5
|
-
|
6
|
-
|
7
5
|
PeptideHit = Struct.new(:aaseq, :charge, :proteins) do
|
8
6
|
def initialize(*args)
|
9
7
|
super(*args)
|
@@ -58,7 +56,7 @@ describe 'groups of peptide hits' do
|
|
58
56
|
|
59
57
|
it 'finds spectral counts (splitting counts between shared)' do
|
60
58
|
groups_of_pephits = @prot_hits.map(&:peptide_hits)
|
61
|
-
counts = Ms::Quant::SpectralCounts.counts(groups_of_pephits) {|pephit| pephit.proteins.size }
|
59
|
+
counts = Ms::Quant::SpectralCounts.counts(groups_of_pephits) {|pephit| 1.0 / pephit.proteins.size }
|
62
60
|
@expected_counts_split.zip(counts) do |exp, act|
|
63
61
|
exp.zip(act) {|e,a| a.should.be.close e, 0.0001 }
|
64
62
|
end
|
metadata
CHANGED
@@ -1,12 +1,8 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ms-quant
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
prerelease:
|
5
|
-
|
6
|
-
- 0
|
7
|
-
- 0
|
8
|
-
- 4
|
9
|
-
version: 0.0.4
|
4
|
+
prerelease:
|
5
|
+
version: 0.0.6
|
10
6
|
platform: ruby
|
11
7
|
authors:
|
12
8
|
- John T. Prince
|
@@ -14,7 +10,7 @@ autorequire:
|
|
14
10
|
bindir: bin
|
15
11
|
cert_chain: []
|
16
12
|
|
17
|
-
date: 2011-04-
|
13
|
+
date: 2011-04-26 00:00:00 -06:00
|
18
14
|
default_executable: peptide_hit_qvalues_to_spectral_counts_table.rb
|
19
15
|
dependencies:
|
20
16
|
- !ruby/object:Gem::Dependency
|
@@ -25,11 +21,7 @@ dependencies:
|
|
25
21
|
requirements:
|
26
22
|
- - ">="
|
27
23
|
- !ruby/object:Gem::Version
|
28
|
-
|
29
|
-
- 0
|
30
|
-
- 0
|
31
|
-
- 19
|
32
|
-
version: 0.0.19
|
24
|
+
version: 0.1.1
|
33
25
|
type: :runtime
|
34
26
|
version_requirements: *id001
|
35
27
|
- !ruby/object:Gem::Dependency
|
@@ -40,8 +32,6 @@ dependencies:
|
|
40
32
|
requirements:
|
41
33
|
- - ">="
|
42
34
|
- !ruby/object:Gem::Version
|
43
|
-
segments:
|
44
|
-
- 0
|
45
35
|
version: "0"
|
46
36
|
type: :development
|
47
37
|
version_requirements: *id002
|
@@ -53,10 +43,6 @@ dependencies:
|
|
53
43
|
requirements:
|
54
44
|
- - ~>
|
55
45
|
- !ruby/object:Gem::Version
|
56
|
-
segments:
|
57
|
-
- 1
|
58
|
-
- 5
|
59
|
-
- 2
|
60
46
|
version: 1.5.2
|
61
47
|
type: :development
|
62
48
|
version_requirements: *id003
|
@@ -68,8 +54,6 @@ dependencies:
|
|
68
54
|
requirements:
|
69
55
|
- - ">="
|
70
56
|
- !ruby/object:Gem::Version
|
71
|
-
segments:
|
72
|
-
- 0
|
73
57
|
version: "0"
|
74
58
|
type: :development
|
75
59
|
version_requirements: *id004
|
@@ -89,8 +73,13 @@ files:
|
|
89
73
|
- Rakefile
|
90
74
|
- VERSION
|
91
75
|
- bin/peptide_hit_qvalues_to_spectral_counts_table.rb
|
76
|
+
- lib/hash/inverse.rb
|
92
77
|
- lib/ms-quant.rb
|
78
|
+
- lib/ms/quant/cmdline.rb
|
79
|
+
- lib/ms/quant/peptide.rb
|
80
|
+
- lib/ms/quant/protein_group_comparison.rb
|
93
81
|
- lib/ms/quant/qspec.rb
|
82
|
+
- lib/ms/quant/qspec/protein_group_comparison.rb
|
94
83
|
- lib/ms/quant/spectral_counts.rb
|
95
84
|
- spec/ms/quant/spectral_counts_spec.rb
|
96
85
|
- spec/spec_helper.rb
|
@@ -108,21 +97,17 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
108
97
|
requirements:
|
109
98
|
- - ">="
|
110
99
|
- !ruby/object:Gem::Version
|
111
|
-
segments:
|
112
|
-
- 0
|
113
100
|
version: "0"
|
114
101
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
115
102
|
none: false
|
116
103
|
requirements:
|
117
104
|
- - ">="
|
118
105
|
- !ruby/object:Gem::Version
|
119
|
-
segments:
|
120
|
-
- 0
|
121
106
|
version: "0"
|
122
107
|
requirements: []
|
123
108
|
|
124
109
|
rubyforge_project:
|
125
|
-
rubygems_version: 1.
|
110
|
+
rubygems_version: 1.6.2
|
126
111
|
signing_key:
|
127
112
|
specification_version: 3
|
128
113
|
summary: quantitation of mass spectrometry datasets (proteomic, metabolomic/lipidomic)
|