ms-quant 0.0.4 → 0.0.6
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +1 -1
- data/VERSION +1 -1
- data/bin/peptide_hit_qvalues_to_spectral_counts_table.rb +137 -144
- data/lib/hash/inverse.rb +15 -0
- data/lib/ms/quant/cmdline.rb +42 -0
- data/lib/ms/quant/peptide.rb +2 -0
- data/lib/ms/quant/protein_group_comparison.rb +29 -0
- data/lib/ms/quant/qspec/protein_group_comparison.rb +22 -0
- data/lib/ms/quant/qspec.rb +5 -1
- data/lib/ms/quant/spectral_counts.rb +20 -20
- data/spec/ms/quant/spectral_counts_spec.rb +1 -3
- metadata +10 -25
data/Rakefile
CHANGED
@@ -15,7 +15,7 @@ Jeweler::Tasks.new do |gem|
|
|
15
15
|
# and development dependencies are only needed for development (ie running rake tasks, tests, etc)
|
16
16
|
# gem.add_runtime_dependency 'jabber4r', '> 0.1'
|
17
17
|
# gem.add_development_dependency 'rspec', '> 1.2.3'
|
18
|
-
gem.add_runtime_dependency 'ms-ident', ">= 0.
|
18
|
+
gem.add_runtime_dependency 'ms-ident', ">= 0.1.1"
|
19
19
|
gem.add_development_dependency "spec-more", ">= 0"
|
20
20
|
gem.add_development_dependency "jeweler", "~> 1.5.2"
|
21
21
|
gem.add_development_dependency "rcov", ">= 0"
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.6
|
@@ -1,29 +1,24 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
+
require 'andand'
|
4
|
+
require 'set'
|
5
|
+
require 'ruport'
|
6
|
+
|
3
7
|
require 'ms/ident/peptide_hit/qvalue'
|
4
|
-
require 'ms/ident/
|
8
|
+
require 'ms/ident/protein'
|
5
9
|
require 'ms/ident/peptide/db'
|
6
10
|
require 'ms/quant/spectral_counts'
|
11
|
+
require 'ms/quant/protein_group_comparison'
|
12
|
+
require 'ms/quant/qspec/protein_group_comparison'
|
7
13
|
require 'ms/quant/qspec'
|
14
|
+
require 'ms/quant/cmdline'
|
15
|
+
|
8
16
|
|
9
17
|
require 'yaml'
|
10
18
|
require 'tempfile'
|
11
19
|
|
12
20
|
require 'trollop'
|
13
21
|
|
14
|
-
# inverse from Tilo Sloboda (now in facets)
|
15
|
-
|
16
|
-
class Hash
|
17
|
-
def inverse
|
18
|
-
i = Hash.new
|
19
|
-
self.each_pair do |k,v|
|
20
|
-
if (Array === v) ; v.each{ |x| i[x] = ( i.has_key?(x) ? [k,i[x]].flatten : k ) }
|
21
|
-
else ; i[v] = ( i.has_key?(v) ? [k,i[v]].flatten : k ) end
|
22
|
-
end ; i
|
23
|
-
end
|
24
|
-
end
|
25
|
-
|
26
|
-
|
27
22
|
def putsv(*args)
|
28
23
|
if $VERBOSE
|
29
24
|
puts(*args) ; $stdout.flush
|
@@ -36,8 +31,48 @@ def basename(file)
|
|
36
31
|
base
|
37
32
|
end
|
38
33
|
|
34
|
+
class Ruport::Data::Table
|
35
|
+
# returns self
|
36
|
+
def add_column_with_data(colname, array_of_data, opts={})
|
37
|
+
self.add_column(colname, opts)
|
38
|
+
self.data.zip(array_of_data) do |row, newval|
|
39
|
+
row[colname] = newval
|
40
|
+
end
|
41
|
+
self
|
42
|
+
end
|
43
|
+
|
44
|
+
# acceptable opts:
|
45
|
+
#
|
46
|
+
# :header => an array of lines (each which will be commented out)
|
47
|
+
def to_tsv(file, opt={})
|
48
|
+
delimiter = "\t"
|
49
|
+
File.open(file,'w') do |out|
|
50
|
+
opt[:header].each {|line| out.puts "# #{line}" } if opt[:header]
|
51
|
+
out.puts self.column_names.join(delimiter)
|
52
|
+
self.data.each do |row|
|
53
|
+
out.puts row.to_a.join(delimiter)
|
54
|
+
end
|
55
|
+
opt[:footer].each {|line| out.puts "# #{line}" } if opt[:footer]
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
60
|
+
|
61
|
+
def write_subset(sample_to_pephits, outfile="peptidecentric_subset.yml")
|
62
|
+
aaseqs_to_prots = {}
|
63
|
+
sample_to_pephits.map(&:last).flatten(1).each do |pephit|
|
64
|
+
aaseqs_to_prots[pephit.aaseq] = pephit.proteins.map(&:id)
|
65
|
+
end
|
66
|
+
File.open(outfile,'w') do |out|
|
67
|
+
aaseqs_to_prots.each do |k,v|
|
68
|
+
out.puts(%Q{#{k}: #{v.join("\t") }})
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
39
73
|
|
40
74
|
outfile = "spectral_counts.tsv"
|
75
|
+
pephits_outfile = "spectral_counts.pephits.tsv"
|
41
76
|
delimiter = "\t"
|
42
77
|
|
43
78
|
opts = Trollop::Parser.new do
|
@@ -53,6 +88,7 @@ psq is really .psq.tsv file
|
|
53
88
|
opt :descriptions, "include descriptions of proteins, requires :fasta", :default => false
|
54
89
|
opt :fasta, "the fasta file. Required for :qspec and :descriptions", :type => String
|
55
90
|
opt :outfile, "the to which file data are written", :default => outfile
|
91
|
+
opt :peptides, "also write peptide hits (to: #{pephits_outfile})", :default => false
|
56
92
|
opt :verbose, "speak up", :default => false
|
57
93
|
opt :count_type, "type of spectral counts (<spectral|aaseqcharge|aaseq>)", :default => 'spectral'
|
58
94
|
opt :qspec_normalize, "normalize spectral counts per run", :default => false
|
@@ -78,29 +114,8 @@ raise ArgumentError, "need .yml file for peptide centric db" unless File.extname
|
|
78
114
|
putsv "using: #{peptide_centric_db_file} as peptide centric db"
|
79
115
|
|
80
116
|
# groupname => files
|
81
|
-
condition_to_samplenames = {}
|
82
|
-
samplename_to_filename = {}
|
83
|
-
ARGV.each do |arg|
|
84
|
-
(condition, files) =
|
85
|
-
if arg.include?('=')
|
86
|
-
(condition, filestring) = arg.split('=')
|
87
|
-
[condition, filestring.split(',')]
|
88
|
-
else
|
89
|
-
[basename(arg), [arg]]
|
90
|
-
end
|
91
|
-
reptag = ARGV.size
|
92
|
-
sample_to_file_pairs = files.each_with_index.map {|file,i| ["#{condition}-rep#{i+1}", file] }
|
93
|
-
sample_to_file_pairs.each {|name,file| samplename_to_filename[name] = file }
|
94
|
-
condition_to_samplenames[condition] = sample_to_file_pairs.map(&:first)
|
95
|
-
end
|
96
|
-
|
97
117
|
|
98
|
-
|
99
|
-
puts "** condition: sample_names"
|
100
|
-
puts condition_to_samplenames.to_yaml
|
101
|
-
puts "** samplename: filename"
|
102
|
-
puts samplename_to_filename.to_yaml
|
103
|
-
end
|
118
|
+
(samplename_to_filename, condition_to_samplenames, samplename_to_condition) = Ms::Quant::Cmdline.args_to_hashes(ARGV)
|
104
119
|
|
105
120
|
raise ArgumentError, "must have 2 conditions for qspec!" if opt[:qspec] && condition_to_samplenames.size != 2
|
106
121
|
|
@@ -108,144 +123,122 @@ samplenames = samplename_to_filename.keys
|
|
108
123
|
|
109
124
|
class Ms::Ident::PeptideHit
|
110
125
|
attr_accessor :experiment_name
|
126
|
+
attr_accessor :protein_groups
|
127
|
+
end
|
128
|
+
class Ms::Ident::Protein
|
129
|
+
attr_accessor :length
|
111
130
|
end
|
131
|
+
|
132
|
+
|
112
133
|
fdr_cutoff = opt[:fdr_percent] / 100
|
113
134
|
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
putsv "#{Time.now-start} seconds to read #{peptide_centric_db_file}"
|
118
|
-
samplename_to_filename.map do |sample, file|
|
119
|
-
peptide_hits = Ms::Ident::PeptideHit::Qvalue.from_file(file)
|
120
|
-
putsv "#{file}: #{peptide_hits.size} hits"
|
121
|
-
peptide_hits.select! do |hit|
|
122
|
-
if hit.qvalue <= fdr_cutoff
|
123
|
-
# update each peptide with its protein hits
|
124
|
-
prot_ids = peptide_to_proteins[hit.aaseq]
|
125
|
-
if prot_ids
|
126
|
-
hit.experiment_name = sample
|
127
|
-
hit.proteins = prot_ids
|
128
|
-
else ; false end
|
129
|
-
else
|
130
|
-
false
|
131
|
-
end
|
132
|
-
end
|
133
|
-
peptide_hits
|
134
|
-
end
|
135
|
+
if opt[:qspec] || opt[:descriptions]
|
136
|
+
putsv "reading lengths and descriptions from #{opt[:fasta]}"
|
137
|
+
(id_to_length, id_to_desc) = Ms::Fasta.protein_lengths_and_descriptions(opt[:fasta])
|
135
138
|
end
|
136
139
|
|
137
|
-
|
138
|
-
|
139
|
-
ar_of_pephit_ars.flatten(1).each do |pephit|
|
140
|
-
aaseqs_to_prots[pephit.aaseq] = pephit.proteins
|
141
|
-
end
|
142
|
-
outfile = "peptidecentric_subset.yml"
|
143
|
-
puts "writing #{outfile} with #{aaseqs_to_prots.size} aaseq->protids"
|
144
|
-
File.open(outfile,'w') do |out|
|
145
|
-
aaseqs_to_prots.each do |k,v|
|
146
|
-
out.puts(%Q{#{k}: #{v.join("\t") }})
|
147
|
-
end
|
148
|
-
end
|
140
|
+
samplename_to_peptidehits = samplename_to_filename.map do |sample, file|
|
141
|
+
[sample, Ms::Ident::PeptideHit::Qvalue.from_file(file).select {|hit| hit.qvalue <= fdr_cutoff }]
|
149
142
|
end
|
150
143
|
|
151
|
-
|
152
|
-
|
153
|
-
|
144
|
+
# update each peptide hit with protein hits and sample name:
|
145
|
+
all_protein_hits = Hash.new {|h,id| h[id] = Ms::Ident::Protein.new(id) }
|
146
|
+
Ms::Ident::Peptide::Db::IO.open(peptide_centric_db_file) do |peptide_to_proteins|
|
147
|
+
samplename_to_peptidehits.map do |sample, peptide_hits|
|
148
|
+
peptide_hits.each do |hit|
|
149
|
+
# update each peptide with its protein hits
|
150
|
+
protein_hits = peptide_to_proteins[hit.aaseq].map do |id|
|
151
|
+
protein = all_protein_hits[id]
|
152
|
+
protein.length = id_to_length[id] if id_to_length
|
153
|
+
protein.description = id_to_desc[id] if id_to_desc
|
154
|
+
protein
|
155
|
+
end
|
156
|
+
hit.experiment_name = sample
|
157
|
+
# if there are protein hits, the peptide hit is selected
|
158
|
+
hit.proteins = protein_hits
|
159
|
+
end
|
154
160
|
end
|
155
161
|
end
|
156
162
|
|
157
|
-
|
163
|
+
write_subset(samplename_to_peptidehits) if opt[:write_subset]
|
158
164
|
|
159
|
-
|
160
|
-
# each protein group is an array of
|
161
|
-
protein_groups = Ms::Ident::ProteinGroup.peptide_hits_to_protein_groups(all_peptide_hits)
|
165
|
+
samplename_to_peptidehits.each {|samplename, hits| putsv "#{samplename}: #{hits.size}" } if $VERBOSE
|
162
166
|
|
163
|
-
|
164
|
-
protein_groups.each do |protein_group|
|
165
|
-
protein_group.peptide_hits.each {|hit| pephit_to_protein_groups[hit] << protein_group }
|
166
|
-
end
|
167
|
+
all_peptide_hits = samplename_to_peptidehits.map(&:last).flatten(1)
|
167
168
|
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
pep_hits.each do |pep_hit|
|
173
|
-
pep_hit_to_prot_groups[pep_hit] << prot_group
|
174
|
-
end # returns the group of pep_hits
|
175
|
-
end
|
176
|
-
counts = Ms::Quant::SpectralCounts.counts(groups_of_pephits) do |pephit|
|
177
|
-
pephit_to_protein_groups[pephit].size
|
178
|
-
end
|
179
|
-
end
|
169
|
+
# this constricts everything down to a minimal set of protein groups that
|
170
|
+
# explain the entire set of peptide hits.
|
171
|
+
update_pephits = true # ensures that each pephit is linked to the array of protein groups it is associated with
|
172
|
+
protein_groups = Ms::Ident::ProteinGroup.peptide_hits_to_protein_groups(all_peptide_hits, update_pephits)
|
180
173
|
|
181
|
-
|
182
|
-
|
183
|
-
|
174
|
+
hits_table_hash = {} # create the table using key => column hash
|
175
|
+
samplenames.each do |name|
|
176
|
+
hits_table_hash[name] = protein_groups.map do |prot_group|
|
177
|
+
prot_group.peptide_hits.select {|hit| hit.experiment_name == name }
|
178
|
+
end
|
184
179
|
end
|
185
180
|
|
186
|
-
|
181
|
+
# The columns are filled with groups of peptide hits, one group of hits per
|
182
|
+
# protein group (protein group order is implicit). The rows are sample names.
|
183
|
+
#
|
184
|
+
# (implied) sample1 sample2 sample3 ...
|
185
|
+
# (group1) [hit,hit] [hit...] [hit...] ...
|
186
|
+
# (group2) [hit,hit] [hit...] [hit...] ...
|
187
|
+
# ... ... ... ... ...
|
188
|
+
hits_table = Ruport::Data::Table.new(:data => hits_table_hash.values.transpose, :column_names => hits_table_hash.keys)
|
187
189
|
|
188
|
-
|
189
|
-
|
190
|
+
# spectral counts of type opt[:count_type]
|
191
|
+
counts_data = hits_table.data.map do |row|
|
192
|
+
row.map do |pephits|
|
193
|
+
Ms::Quant::SpectralCounts.counts(pephits) {|pephit| 1.0 / pephit.protein_groups.size }.send(opt[:count_type])
|
194
|
+
end
|
195
|
+
end
|
190
196
|
|
191
|
-
|
192
|
-
|
193
|
-
end.transpose
|
197
|
+
# each cell holds a SpectralCounts object, which hash 3 types of count data
|
198
|
+
counts_table = Ruport::Data::Table.new(:data => counts_data, :column_names => samplenames)
|
194
199
|
|
200
|
+
# return a list of ProteinGroupComparisons
|
195
201
|
if opt[:qspec]
|
196
|
-
all_conditions = samplenames.map {|sn| samplename_to_condition[sn] }
|
197
|
-
condition_to_count_array = all_conditions.zip(counts_parallel_to_names_with_counts_per_group).map do |condition, counts_par_groups|
|
198
|
-
[condition, counts_par_groups.map(&opt[:count_type])]
|
199
|
-
end
|
200
202
|
|
203
|
+
# prepare data for qspec
|
204
|
+
condition_to_count_array = counts_table.column_names.map {|name| [name, counts_table.column(name)] }
|
205
|
+
# average length of the proteins in the group
|
201
206
|
name_length_pairs = protein_groups.map do |pg|
|
202
|
-
|
203
|
-
# lengths over longer lengths
|
204
|
-
best_guess_protein_id = pg.sort_by {|prot_id| [prot_id, -id_to_length[prot_id]] }.first
|
205
|
-
length = id_to_length[best_guess_protein_id]
|
206
|
-
[pg.join(":"), length]
|
207
|
+
[pg.join(":"), pg.map(&:length).reduce(:+)./(pg.size).round]
|
207
208
|
end
|
208
209
|
|
209
|
-
putsv "qspec to normalize counts: #{opt[:qspec_normalize]}"
|
210
210
|
qspec_results = Ms::Quant::Qspec.new(name_length_pairs, condition_to_count_array).run(opt[:qspec_normalize])
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
row.push(*to_add.map {|v| result.send(v) })
|
211
|
+
|
212
|
+
cols_to_add = [:bayes_factor, :fold_change, :fdr]
|
213
|
+
counts_table.add_columns cols_to_add
|
214
|
+
counts_table.data.zip(qspec_results) do |row, qspec_result|
|
215
|
+
cols_to_add.each {|cat| row[cat] = qspec_result[cat] }
|
217
216
|
end
|
218
217
|
end
|
219
218
|
|
220
|
-
|
221
|
-
|
219
|
+
counts_table.add_columns( [:name, :ids, :description] )
|
220
|
+
counts_table.data.zip(protein_groups) do |row, pg|
|
221
|
+
best_id = pg.sort_by {|prot| [prot.id, prot.length] }.first
|
222
|
+
row.name = best_id.description.andand.match(/ GN=([^\s]+) ?/).andand[1] || best_id.id
|
223
|
+
row.ids = pg.map(&:id).join(',')
|
224
|
+
row.description = best_id.description
|
225
|
+
end
|
222
226
|
|
223
|
-
sort_protein_id =
|
224
|
-
if id_to_length
|
225
|
-
lambda {|prot_id| [prot_id, -id_to_length[prot_id]] }
|
226
|
-
else
|
227
|
-
lambda {|prot_id| prot_id }
|
228
|
-
end
|
229
227
|
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
[
|
239
|
-
else
|
240
|
-
[best_protid, nil]
|
228
|
+
if opt[:peptides]
|
229
|
+
hits_table.each do |record|
|
230
|
+
record.each_with_index do |hits,i|
|
231
|
+
new_cell = hits.group_by do |hit|
|
232
|
+
[hit.aaseq, hit.charge]
|
233
|
+
end.map do |key, hits|
|
234
|
+
[key.reverse.join("_"), hits.map(&:id).join(',')].join(":")
|
235
|
+
end.join('; ')
|
236
|
+
record[i] = new_cell
|
241
237
|
end
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
File.open(opt[:outfile],'w') do |out|
|
247
|
-
out.puts header_cats.join(delimiter)
|
248
|
-
ar_of_rows.each {|row| out.puts row.join(delimiter) }
|
249
|
-
putsv "wrote: #{opt[:outfile]}"
|
238
|
+
end
|
239
|
+
hits_table.add_column_with_data(:name, counts_table.column(:name), :position=>0)
|
240
|
+
hits_table.to_tsv(pephits_outfile, :footer => ["parallel to #{outfile}"])
|
250
241
|
end
|
251
242
|
|
243
|
+
intro = ["samples: #{samplename_to_filename}", "options: #{opt}"]
|
244
|
+
counts_table.to_tsv(outfile, :footer => intro)
|
data/lib/hash/inverse.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
# inverse from Tilo Sloboda (now in facets)
|
4
|
+
|
5
|
+
class Hash
|
6
|
+
def inverse
|
7
|
+
i = Hash.new
|
8
|
+
self.each_pair do |k,v|
|
9
|
+
if (Array === v) ; v.each{ |x| i[x] = ( i.has_key?(x) ? [k,i[x]].flatten : k ) }
|
10
|
+
else ; i[v] = ( i.has_key?(v) ? [k,i[v]].flatten : k ) end
|
11
|
+
end ; i
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require 'hash/inverse'
|
2
|
+
|
3
|
+
module Ms ; module Quant ; end ; end
|
4
|
+
|
5
|
+
module Ms::Quant::Cmdline
|
6
|
+
|
7
|
+
# expects arguments in one of two forms. The first form is grouped by
|
8
|
+
# condition as shown:
|
9
|
+
#
|
10
|
+
# condition1=file1,file2,file3... condition2=file4,file5...
|
11
|
+
#
|
12
|
+
# The second is where each file is its own condition (1 replicate):
|
13
|
+
#
|
14
|
+
# file1 file2 file3
|
15
|
+
#
|
16
|
+
# Returns three ordered hashes (only ordered for ruby 1.9):
|
17
|
+
#
|
18
|
+
# 1) Condition to an array of samplenames
|
19
|
+
# 2) Samplename to the filename
|
20
|
+
# 3) Samplename to condition
|
21
|
+
def self.args_to_hashes(args, replicate_postfix="-rep")
|
22
|
+
# groupname => files
|
23
|
+
condition_to_samplenames = {}
|
24
|
+
samplename_to_filename = {}
|
25
|
+
args.each do |arg|
|
26
|
+
(condition, files) =
|
27
|
+
if arg.include?('=')
|
28
|
+
(condition, filestring) = arg.split('=')
|
29
|
+
[condition, filestring.split(',')]
|
30
|
+
else
|
31
|
+
[basename(arg), [arg]]
|
32
|
+
end
|
33
|
+
sample_to_file_pairs = files.each_with_index.map do |file,i|
|
34
|
+
rep_string = (files.size == 1) ? "" : "#{replicate_postfix}#{i+1}"
|
35
|
+
["#{condition}#{rep_string}", file]
|
36
|
+
end
|
37
|
+
sample_to_file_pairs.each {|name,file| samplename_to_filename[name] = file }
|
38
|
+
condition_to_samplenames[condition] = sample_to_file_pairs.map(&:first)
|
39
|
+
end
|
40
|
+
[samplename_to_filename, condition_to_samplenames, condition_to_samplenames.inverse]
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
|
2
|
+
module Ms
|
3
|
+
module Quant
|
4
|
+
end
|
5
|
+
end
|
6
|
+
|
7
|
+
module Ms::Quant::ProteinGroupComparison
|
8
|
+
|
9
|
+
# a protein group object
|
10
|
+
attr_accessor :protein_group
|
11
|
+
|
12
|
+
# an array of experiment names
|
13
|
+
attr_accessor :experiments
|
14
|
+
|
15
|
+
# parallel array to experiments with the measured values
|
16
|
+
attr_accessor :values
|
17
|
+
|
18
|
+
def initialize(protein_group, experiments, values)
|
19
|
+
(@protein_group, @experiment, @values) = protein_group, experiments, values
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
class Ms::Quant::ProteinGroupComparison::SpectralCounts
|
24
|
+
include Ms::Quant::ProteinGroupComparison
|
25
|
+
end
|
26
|
+
|
27
|
+
class Ms::Quant::ProteinGroupComparison::UniqAAzCounts
|
28
|
+
include Ms::Quant::ProteinGroupComparison
|
29
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'ms/quant/protein_group_comparison'
|
2
|
+
|
3
|
+
module Ms
|
4
|
+
module Quant
|
5
|
+
module ProteinGroupComparison
|
6
|
+
end
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
class Ms::Quant::ProteinGroupComparison::Qspec
|
11
|
+
include Ms::Quant::ProteinGroupComparison
|
12
|
+
|
13
|
+
attr_accessor :qspec_results_struct
|
14
|
+
|
15
|
+
# takes a protein group object, an array of experiment names and a qspec
|
16
|
+
# results struct
|
17
|
+
def initialize(protein_group, experiments, qspec_results_struct)
|
18
|
+
super(protein_group, experiments, qspec_results_struct.counts_array)
|
19
|
+
@qspec_results_struct = qspec_results_struct
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
data/lib/ms/quant/qspec.rb
CHANGED
@@ -31,6 +31,7 @@ class Ms::Quant::Qspec
|
|
31
31
|
start_bayes = headers.index {|v| v =~ /BayesFactor/i }
|
32
32
|
rows.map do |row|
|
33
33
|
data = [row[0]]
|
34
|
+
data.push( row[1...start_bayes].map(&:to_f) )
|
34
35
|
data.push( *row[start_bayes,4].map(&:to_f) )
|
35
36
|
data.push( row[start_bayes+4] )
|
36
37
|
Results.new(*data)
|
@@ -68,6 +69,8 @@ class Ms::Quant::Qspec
|
|
68
69
|
end
|
69
70
|
end
|
70
71
|
|
72
|
+
# returns an array of Qspec::Results objects (each object can be considered
|
73
|
+
# a row of data)
|
71
74
|
def run(normalize=true, opts={})
|
72
75
|
puts "normalize: #{normalize}" if $VERBOSE
|
73
76
|
tfile = Tempfile.new("qspec")
|
@@ -87,6 +90,7 @@ class Ms::Quant::Qspec
|
|
87
90
|
end
|
88
91
|
|
89
92
|
# for version 2 of QSpec
|
90
|
-
|
93
|
+
# counts array is parallel to the experiment names passed in originally
|
94
|
+
Results = Struct.new(:protid, :counts_array, :bayes_factor, :fold_change, :rb_stat, :fdr, :flag)
|
91
95
|
end
|
92
96
|
|
@@ -8,28 +8,28 @@ module Ms
|
|
8
8
|
|
9
9
|
# returns a parallel array of Count objects. If split_hits then counts
|
10
10
|
# are split between groups sharing the hit. peptide_hits must respond
|
11
|
-
# to :charge and :aaseq. If
|
12
|
-
#
|
13
|
-
# the
|
14
|
-
def self.counts(
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
linked_to_size
|
25
|
-
end
|
26
|
-
counts_data = [linked_sizes, uniq_aaseq_charge.values, uniq_aaseq.values].map do |array|
|
27
|
-
share_the_pephit ? array.inject(0.0) {|sum,size| sum+=(1.0/size) } : array.size
|
28
|
-
end
|
29
|
-
Counts.new(*counts_data)
|
11
|
+
# to :charge and :aaseq. If a block is given, the weight of a
|
12
|
+
# particular hit can be given (typically this will be 1/#proteins
|
13
|
+
# sharing the hit
|
14
|
+
def self.counts(peptide_hits, &share_the_pephit)
|
15
|
+
uniq_aaseq = {}
|
16
|
+
uniq_aaseq_charge = {}
|
17
|
+
weights = peptide_hits.map do |hit|
|
18
|
+
weight = share_the_pephit ? share_the_pephit.call(hit) : 1
|
19
|
+
# these guys will end up clobbering themselves, but the
|
20
|
+
# linked_to_size should be consistent if the key is the same
|
21
|
+
uniq_aaseq_charge[[hit.aaseq, hit.charge]] = weight
|
22
|
+
uniq_aaseq[hit.aaseq] = weight
|
23
|
+
weight
|
30
24
|
end
|
25
|
+
counts_data = [weights, uniq_aaseq_charge.values, uniq_aaseq.values].map do |array|
|
26
|
+
array.reduce(:+)
|
27
|
+
end
|
28
|
+
Counts.new(*counts_data)
|
31
29
|
end
|
32
|
-
|
33
30
|
end
|
34
31
|
end
|
35
32
|
end
|
33
|
+
|
34
|
+
|
35
|
+
|
@@ -2,8 +2,6 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
require 'ms/quant/spectral_counts'
|
4
4
|
|
5
|
-
|
6
|
-
|
7
5
|
PeptideHit = Struct.new(:aaseq, :charge, :proteins) do
|
8
6
|
def initialize(*args)
|
9
7
|
super(*args)
|
@@ -58,7 +56,7 @@ describe 'groups of peptide hits' do
|
|
58
56
|
|
59
57
|
it 'finds spectral counts (splitting counts between shared)' do
|
60
58
|
groups_of_pephits = @prot_hits.map(&:peptide_hits)
|
61
|
-
counts = Ms::Quant::SpectralCounts.counts(groups_of_pephits) {|pephit| pephit.proteins.size }
|
59
|
+
counts = Ms::Quant::SpectralCounts.counts(groups_of_pephits) {|pephit| 1.0 / pephit.proteins.size }
|
62
60
|
@expected_counts_split.zip(counts) do |exp, act|
|
63
61
|
exp.zip(act) {|e,a| a.should.be.close e, 0.0001 }
|
64
62
|
end
|
metadata
CHANGED
@@ -1,12 +1,8 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ms-quant
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
prerelease:
|
5
|
-
|
6
|
-
- 0
|
7
|
-
- 0
|
8
|
-
- 4
|
9
|
-
version: 0.0.4
|
4
|
+
prerelease:
|
5
|
+
version: 0.0.6
|
10
6
|
platform: ruby
|
11
7
|
authors:
|
12
8
|
- John T. Prince
|
@@ -14,7 +10,7 @@ autorequire:
|
|
14
10
|
bindir: bin
|
15
11
|
cert_chain: []
|
16
12
|
|
17
|
-
date: 2011-04-
|
13
|
+
date: 2011-04-26 00:00:00 -06:00
|
18
14
|
default_executable: peptide_hit_qvalues_to_spectral_counts_table.rb
|
19
15
|
dependencies:
|
20
16
|
- !ruby/object:Gem::Dependency
|
@@ -25,11 +21,7 @@ dependencies:
|
|
25
21
|
requirements:
|
26
22
|
- - ">="
|
27
23
|
- !ruby/object:Gem::Version
|
28
|
-
|
29
|
-
- 0
|
30
|
-
- 0
|
31
|
-
- 19
|
32
|
-
version: 0.0.19
|
24
|
+
version: 0.1.1
|
33
25
|
type: :runtime
|
34
26
|
version_requirements: *id001
|
35
27
|
- !ruby/object:Gem::Dependency
|
@@ -40,8 +32,6 @@ dependencies:
|
|
40
32
|
requirements:
|
41
33
|
- - ">="
|
42
34
|
- !ruby/object:Gem::Version
|
43
|
-
segments:
|
44
|
-
- 0
|
45
35
|
version: "0"
|
46
36
|
type: :development
|
47
37
|
version_requirements: *id002
|
@@ -53,10 +43,6 @@ dependencies:
|
|
53
43
|
requirements:
|
54
44
|
- - ~>
|
55
45
|
- !ruby/object:Gem::Version
|
56
|
-
segments:
|
57
|
-
- 1
|
58
|
-
- 5
|
59
|
-
- 2
|
60
46
|
version: 1.5.2
|
61
47
|
type: :development
|
62
48
|
version_requirements: *id003
|
@@ -68,8 +54,6 @@ dependencies:
|
|
68
54
|
requirements:
|
69
55
|
- - ">="
|
70
56
|
- !ruby/object:Gem::Version
|
71
|
-
segments:
|
72
|
-
- 0
|
73
57
|
version: "0"
|
74
58
|
type: :development
|
75
59
|
version_requirements: *id004
|
@@ -89,8 +73,13 @@ files:
|
|
89
73
|
- Rakefile
|
90
74
|
- VERSION
|
91
75
|
- bin/peptide_hit_qvalues_to_spectral_counts_table.rb
|
76
|
+
- lib/hash/inverse.rb
|
92
77
|
- lib/ms-quant.rb
|
78
|
+
- lib/ms/quant/cmdline.rb
|
79
|
+
- lib/ms/quant/peptide.rb
|
80
|
+
- lib/ms/quant/protein_group_comparison.rb
|
93
81
|
- lib/ms/quant/qspec.rb
|
82
|
+
- lib/ms/quant/qspec/protein_group_comparison.rb
|
94
83
|
- lib/ms/quant/spectral_counts.rb
|
95
84
|
- spec/ms/quant/spectral_counts_spec.rb
|
96
85
|
- spec/spec_helper.rb
|
@@ -108,21 +97,17 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
108
97
|
requirements:
|
109
98
|
- - ">="
|
110
99
|
- !ruby/object:Gem::Version
|
111
|
-
segments:
|
112
|
-
- 0
|
113
100
|
version: "0"
|
114
101
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
115
102
|
none: false
|
116
103
|
requirements:
|
117
104
|
- - ">="
|
118
105
|
- !ruby/object:Gem::Version
|
119
|
-
segments:
|
120
|
-
- 0
|
121
106
|
version: "0"
|
122
107
|
requirements: []
|
123
108
|
|
124
109
|
rubyforge_project:
|
125
|
-
rubygems_version: 1.
|
110
|
+
rubygems_version: 1.6.2
|
126
111
|
signing_key:
|
127
112
|
specification_version: 3
|
128
113
|
summary: quantitation of mass spectrometry datasets (proteomic, metabolomic/lipidomic)
|