mspire 0.5.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +24 -0
- data/Rakefile +51 -0
- data/VERSION +1 -0
- data/lib/cv/description.rb +18 -0
- data/lib/cv/param.rb +33 -0
- data/lib/cv.rb +3 -0
- data/lib/io/bookmark.rb +13 -0
- data/lib/merge.rb +7 -0
- data/lib/ms/cvlist.rb +76 -0
- data/lib/ms/digester.rb +245 -0
- data/lib/ms/fasta.rb +86 -0
- data/lib/ms/ident/peptide/db.rb +243 -0
- data/lib/ms/ident/peptide.rb +72 -0
- data/lib/ms/ident/peptide_hit/qvalue.rb +56 -0
- data/lib/ms/ident/peptide_hit.rb +26 -0
- data/lib/ms/ident/pepxml/modifications.rb +83 -0
- data/lib/ms/ident/pepxml/msms_pipeline_analysis.rb +70 -0
- data/lib/ms/ident/pepxml/msms_run_summary.rb +82 -0
- data/lib/ms/ident/pepxml/parameters.rb +14 -0
- data/lib/ms/ident/pepxml/sample_enzyme.rb +165 -0
- data/lib/ms/ident/pepxml/search_database.rb +49 -0
- data/lib/ms/ident/pepxml/search_hit/modification_info.rb +79 -0
- data/lib/ms/ident/pepxml/search_hit.rb +144 -0
- data/lib/ms/ident/pepxml/search_result.rb +35 -0
- data/lib/ms/ident/pepxml/search_summary.rb +92 -0
- data/lib/ms/ident/pepxml/spectrum_query.rb +85 -0
- data/lib/ms/ident/pepxml.rb +112 -0
- data/lib/ms/ident/protein.rb +33 -0
- data/lib/ms/ident/protein_group.rb +80 -0
- data/lib/ms/ident/search.rb +114 -0
- data/lib/ms/ident.rb +37 -0
- data/lib/ms/isotope/aa.rb +59 -0
- data/lib/ms/mascot.rb +6 -0
- data/lib/ms/mass/aa.rb +79 -0
- data/lib/ms/mass.rb +55 -0
- data/lib/ms/mzml/index_list.rb +98 -0
- data/lib/ms/mzml/plms1.rb +34 -0
- data/lib/ms/mzml.rb +197 -0
- data/lib/ms/obo.rb +38 -0
- data/lib/ms/plms1.rb +156 -0
- data/lib/ms/quant/qspec/protein_group_comparison.rb +22 -0
- data/lib/ms/quant/qspec.rb +112 -0
- data/lib/ms/spectrum.rb +154 -8
- data/lib/ms.rb +3 -10
- data/lib/msplat.rb +2 -0
- data/lib/obo/ims.rb +5 -0
- data/lib/obo/ms.rb +7 -0
- data/lib/obo/ontology.rb +41 -0
- data/lib/obo/unit.rb +5 -0
- data/lib/openany.rb +23 -0
- data/lib/write_file_or_string.rb +18 -0
- data/obo/ims.obo +562 -0
- data/obo/ms.obo +11677 -0
- data/obo/unit.obo +2563 -0
- data/spec/ms/cvlist_spec.rb +60 -0
- data/spec/ms/digester_spec.rb +351 -0
- data/spec/ms/fasta_spec.rb +100 -0
- data/spec/ms/ident/peptide/db_spec.rb +108 -0
- data/spec/ms/ident/pepxml/sample_enzyme_spec.rb +181 -0
- data/spec/ms/ident/pepxml/search_hit/modification_info_spec.rb +37 -0
- data/spec/ms/ident/pepxml_spec.rb +442 -0
- data/spec/ms/ident/protein_group_spec.rb +68 -0
- data/spec/ms/mass_spec.rb +8 -0
- data/spec/ms/mzml/index_list_spec.rb +122 -0
- data/spec/ms/mzml/plms1_spec.rb +62 -0
- data/spec/ms/mzml_spec.rb +50 -0
- data/spec/ms/plms1_spec.rb +38 -0
- data/spec/ms/quant/qspec_spec.rb +25 -0
- data/spec/msplat_spec.rb +24 -0
- data/spec/obo_spec.rb +25 -0
- data/spec/spec_helper.rb +25 -0
- data/spec/testfiles/ms/ident/peptide/db/uni_11_sp_tr.fasta +69 -0
- data/spec/testfiles/ms/ident/peptide/db/uni_11_sp_tr.msd_clvg2.min_aaseq4.yml +728 -0
- data/spec/testfiles/ms/mzml/j24z.idx_comp.3.mzML +271 -0
- data/spec/testfiles/ms/mzml/openms.noidx_nocomp.12.mzML +330 -0
- data/spec/testfiles/ms/quant/kill_extra_tabs.rb +13 -0
- data/spec/testfiles/ms/quant/max_quant_output.provenance.txt +15 -0
- data/spec/testfiles/ms/quant/max_quant_output.txt +199 -0
- data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv_qspecgp +199 -0
- data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv_qspecgp.csv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_final.txt +199 -0
- data/spec/testfiles/ms/quant/pdcd5_final.txt_qspecgp +0 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.CSV.csv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.csv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.csv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv_qspecgp +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv_qspecgp.csv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.txt +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_tabdel.txt +134 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_tabdel.txt_qspecgp +134 -0
- data/spec/testfiles/ms/quant/remove_rest_of_proteins.rb +13 -0
- data/spec/testfiles/ms/quant/unlog_transform.rb +13 -0
- data/spec/testfiles/plms1/output.key +0 -0
- metadata +157 -40
- data/README +0 -77
- data/changelog.txt +0 -196
- data/lib/ms/calc.rb +0 -32
- data/lib/ms/data/interleaved.rb +0 -60
- data/lib/ms/data/lazy_io.rb +0 -73
- data/lib/ms/data/lazy_string.rb +0 -15
- data/lib/ms/data/simple.rb +0 -59
- data/lib/ms/data/transposed.rb +0 -41
- data/lib/ms/data.rb +0 -57
- data/lib/ms/format/format_error.rb +0 -12
- data/lib/ms/support/binary_search.rb +0 -126
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
|
|
2
|
+
require 'ms/plms1'
|
|
3
|
+
|
|
4
|
+
module MS
|
|
5
|
+
class Mzml
|
|
6
|
+
# will use scan numbers if use_scan_nums is true, otherwise it will use index
|
|
7
|
+
# numbers in place of scan nums
|
|
8
|
+
def to_plms1(use_scan_nums=true)
|
|
9
|
+
spectrum_index_list = self.index_list[:spectrum]
|
|
10
|
+
scan_nums =
|
|
11
|
+
if use_scan_nums
|
|
12
|
+
spectrum_index_list.create_scan_to_index.keys
|
|
13
|
+
else
|
|
14
|
+
(0...spectrum_index_list.size).to_a
|
|
15
|
+
end
|
|
16
|
+
retention_times = self.enum_for(:each_spectrum_node).map do |xml_node|
|
|
17
|
+
rt_xml_node=xml_node.xpath("scanList/scan/cvParam[@accession='MS:1000016']")[0]
|
|
18
|
+
raise 'no retention time xml node' unless rt_xml_node
|
|
19
|
+
retention_time = rt_xml_node['value'].to_f
|
|
20
|
+
case rt_xml_node['unitName']
|
|
21
|
+
when 'minute'
|
|
22
|
+
retention_time * 60
|
|
23
|
+
when 'second'
|
|
24
|
+
retention_time
|
|
25
|
+
else
|
|
26
|
+
raise 'retention time must be in minutes or seconds (or add some code to handle)'
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
# plms1 only requires that the obect respond to :each, giving a spectrum
|
|
30
|
+
# object, so an Mzml object will work.
|
|
31
|
+
MS::Plms1.new(scan_nums, retention_times, self)
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
data/lib/ms/mzml.rb
ADDED
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
require 'nokogiri'
|
|
2
|
+
require 'io/bookmark'
|
|
3
|
+
require 'zlib'
|
|
4
|
+
require 'ms/mzml/index_list'
|
|
5
|
+
require 'ms/spectrum'
|
|
6
|
+
|
|
7
|
+
module MS
|
|
8
|
+
# MS::Mzml.open("somefile.mzML") do |mzml|
|
|
9
|
+
# mzml.each do |spectrum|
|
|
10
|
+
# scan = spectrum.scan
|
|
11
|
+
# spectrum.mzs # array of m/zs
|
|
12
|
+
# spectrum.intensities # array of intensities
|
|
13
|
+
# spectrum.peaks.each do |mz,intensity|
|
|
14
|
+
# puts "mz: #{mz} intensity: #{intensity}"
|
|
15
|
+
# end
|
|
16
|
+
# end
|
|
17
|
+
# end
|
|
18
|
+
class Mzml
|
|
19
|
+
module Parser
|
|
20
|
+
NOBLANKS = ::Nokogiri::XML::ParseOptions::DEFAULT_XML | ::Nokogiri::XML::ParseOptions::NOBLANKS
|
|
21
|
+
end
|
|
22
|
+
include Enumerable
|
|
23
|
+
|
|
24
|
+
attr_accessor :filename
|
|
25
|
+
attr_accessor :io
|
|
26
|
+
attr_accessor :index_list
|
|
27
|
+
attr_accessor :encoding
|
|
28
|
+
|
|
29
|
+
# io must respond_to?(:size), giving the size of the io object in bytes
|
|
30
|
+
# which allows seeking. #get_index_list is called to get or create the
|
|
31
|
+
# index list.
|
|
32
|
+
def initialize(io)
|
|
33
|
+
@io = io
|
|
34
|
+
@encoding = @io.bookmark(true) {|io| io.readline.match(/encoding=["'](.*?)["']/)[1] }
|
|
35
|
+
@index_list = get_index_list
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
class << self
|
|
39
|
+
# read-only right now
|
|
40
|
+
def open(filename, &block)
|
|
41
|
+
File.open(filename) do |io|
|
|
42
|
+
block.call(self.new(io))
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def foreach(filename, &block)
|
|
47
|
+
open(filename) do |mzml|
|
|
48
|
+
mzml.each(&block)
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# unpack binary data based on an accesions. accessions must only
|
|
53
|
+
# respond to :include? So, hash keys, a set, or an array will all work.
|
|
54
|
+
def unpack_binary(base64string, accessions)
|
|
55
|
+
compressed =
|
|
56
|
+
if accessions.include?('MS:1000574') then true # zlib compression
|
|
57
|
+
elsif accessions.include?('MS:1000576') then false # no compression
|
|
58
|
+
else raise 'no compression info: check your MS accession numbers'
|
|
59
|
+
end
|
|
60
|
+
precision_unpack =
|
|
61
|
+
if accessions.include?('MS:1000523') then 'E*'
|
|
62
|
+
elsif accessions.include?('MS:1000521') then 'e*'
|
|
63
|
+
else raise 'unrecognized precision: check your MS accession numbers'
|
|
64
|
+
end
|
|
65
|
+
data = base64string.unpack("m*").first
|
|
66
|
+
unzipped = compressed ? Zlib::Inflate.inflate(data) : data
|
|
67
|
+
unzipped.unpack(precision_unpack)
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# name can be :spectrum or :chromatogram
|
|
72
|
+
def get_xml_string(start_byte, name=:spectrum)
|
|
73
|
+
io.seek(start_byte)
|
|
74
|
+
data = []
|
|
75
|
+
regexp = %r{</#{name}>}
|
|
76
|
+
io.each_line do |line|
|
|
77
|
+
data << line
|
|
78
|
+
#unless (line.index('<binary') && line[-12..-1].include?('</binary>'))
|
|
79
|
+
break if regexp.match(line)
|
|
80
|
+
#end
|
|
81
|
+
end
|
|
82
|
+
data.join
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def each_spectrum(&block)
|
|
86
|
+
(0...@index_list[:spectrum].size).each do |int|
|
|
87
|
+
block.call spectrum(int)
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# returns the Nokogiri::XML::Node object associated with each spectrum
|
|
92
|
+
def each_spectrum_node(&block)
|
|
93
|
+
@index_list[:spectrum].each do |start_byte|
|
|
94
|
+
block.call spectrum_node_from_start_byte(start_byte)
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
alias_method :each, :each_spectrum
|
|
99
|
+
|
|
100
|
+
def spectrum_node_from_start_byte(start_byte)
|
|
101
|
+
xml = get_xml_string(start_byte, :spectrum)
|
|
102
|
+
doc = Nokogiri::XML.parse(xml, nil, @encoding, Parser::NOBLANKS)
|
|
103
|
+
doc.root
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# @param [Object] arg an index number (Integer) or id string (String)
|
|
107
|
+
# @return [MS::Spectrum] a spectrum object
|
|
108
|
+
def spectrum(arg)
|
|
109
|
+
################### trouble
|
|
110
|
+
start_byte = index_list[0].start_byte(arg)
|
|
111
|
+
data_arrays = spectrum_node_from_start_byte(start_byte).xpath('//binaryDataArray').map do |binary_data_array_n|
|
|
112
|
+
accessions = binary_data_array_n.xpath('./cvParam').map {|node| node['accession'] }
|
|
113
|
+
base64 = binary_data_array_n.xpath('./binary').text
|
|
114
|
+
MS::Mzml.unpack_binary(base64, accessions)
|
|
115
|
+
end
|
|
116
|
+
MS::Spectrum.new(data_arrays)
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
# returns the number of spectra
|
|
120
|
+
def size
|
|
121
|
+
@index_list[:spectrum].size
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
alias_method :'[]', :spectrum
|
|
125
|
+
|
|
126
|
+
# @param [Integer] scan_num the scan number
|
|
127
|
+
# @return [MS::Spectrum] a spectrum object, or nil if not found
|
|
128
|
+
# @raise [ScanNumbersNotUnique] if scan numbers are not unique
|
|
129
|
+
# @raise [ScanNumbersNotFound] if spectra exist but scan numbers were not
|
|
130
|
+
# found
|
|
131
|
+
def spectrum_from_scan_num(scan_num)
|
|
132
|
+
@scan_to_index ||= @index_list[0].create_scan_index
|
|
133
|
+
raise ScanNumbersNotUnique if @scan_to_index == false
|
|
134
|
+
raise ScanNumbersNotFound if @scan_to_index == nil
|
|
135
|
+
spectrum(@scan_to_index[scan_num])
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
# @return [MS::Mzml::IndexList] or nil if there is no indexList in the
|
|
139
|
+
# mzML
|
|
140
|
+
def read_index_list
|
|
141
|
+
if offset=MS::Mzml::Index.index_offset(@io)
|
|
142
|
+
@io.seek(offset)
|
|
143
|
+
xml = Nokogiri::XML.parse(@io.read, nil, @encoding, Parser::NOBLANKS)
|
|
144
|
+
index_list = xml.root
|
|
145
|
+
num_indices = index_list['count'].to_i
|
|
146
|
+
array = index_list.children.map do |index_n|
|
|
147
|
+
#index = Index.new(index_n['name'])
|
|
148
|
+
index = Index.new
|
|
149
|
+
index.name = index_n['name'].to_sym
|
|
150
|
+
ids = []
|
|
151
|
+
index_n.children.map do |offset_n|
|
|
152
|
+
index << offset_n.text.to_i
|
|
153
|
+
ids << offset_n['idRef']
|
|
154
|
+
end
|
|
155
|
+
index.ids = ids
|
|
156
|
+
index
|
|
157
|
+
end
|
|
158
|
+
IndexList.new(array)
|
|
159
|
+
end
|
|
160
|
+
end
|
|
161
|
+
# Reads through and captures start bytes
|
|
162
|
+
# @return [MS::Mzml::IndexList]
|
|
163
|
+
def create_index_list
|
|
164
|
+
indices_hash = @io.bookmark(true) do |inner_io| # sets to beginning of file
|
|
165
|
+
indices = {:spectrum => {}, :chromatogram => {}}
|
|
166
|
+
byte_total = 0
|
|
167
|
+
@io.each do |line|
|
|
168
|
+
if md=%r{<(spectrum|chromatogram).*?id=['"](.*?)['"][ >]}.match(line)
|
|
169
|
+
indices[md[1].to_sym][md[2]] = byte_total + md.pre_match.bytesize
|
|
170
|
+
end
|
|
171
|
+
byte_total += line.bytesize
|
|
172
|
+
end
|
|
173
|
+
indices
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
indices = indices_hash.map do |sym, hash|
|
|
177
|
+
indices = Index.new ; ids = []
|
|
178
|
+
hash.each {|id, startbyte| ids << id ; indices << startbyte }
|
|
179
|
+
indices.ids = ids ; indices.name = sym
|
|
180
|
+
indices
|
|
181
|
+
end
|
|
182
|
+
IndexList.new(indices)
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
# reads or creates an index list
|
|
186
|
+
# @return [Array] an array of indices
|
|
187
|
+
def get_index_list
|
|
188
|
+
read_index_list || create_index_list
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
class ScanNumbersNotUnique < Exception
|
|
192
|
+
end
|
|
193
|
+
class ScanNumbersNotFound < Exception
|
|
194
|
+
end
|
|
195
|
+
end
|
|
196
|
+
end
|
|
197
|
+
|
data/lib/ms/obo.rb
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
|
|
2
|
+
module MS
|
|
3
|
+
class OBO
|
|
4
|
+
attr_accessor :header
|
|
5
|
+
attr_accessor :elements
|
|
6
|
+
|
|
7
|
+
def initialize(file_or_io)
|
|
8
|
+
obo = Obo::Parser.new(file_or_io)
|
|
9
|
+
elements = obo.elements.to_a
|
|
10
|
+
header = elements.shift
|
|
11
|
+
end
|
|
12
|
+
# returns an id to name Hash
|
|
13
|
+
def id_to_name
|
|
14
|
+
@id_to_name ||= build_hash('id', 'name')
|
|
15
|
+
end
|
|
16
|
+
# returns a name to id Hash
|
|
17
|
+
def name_to_id
|
|
18
|
+
@name_to_id ||= build_hash('name', 'id')
|
|
19
|
+
end
|
|
20
|
+
def id_to_element
|
|
21
|
+
@id_to_element ||= build_hash('id', nil)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
protected
|
|
25
|
+
def build_hash(key,val)
|
|
26
|
+
hash = {}
|
|
27
|
+
elements.each do |el|
|
|
28
|
+
tv = el.tagvalues
|
|
29
|
+
if val.nil?
|
|
30
|
+
hash[tv[key].first] = el
|
|
31
|
+
else
|
|
32
|
+
hash[tv[key].first] = tv[val].first
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
hash
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
data/lib/ms/plms1.rb
ADDED
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
|
|
2
|
+
require 'write_file_or_string'
|
|
3
|
+
require 'ms/spectrum'
|
|
4
|
+
require 'stringio'
|
|
5
|
+
require 'openany'
|
|
6
|
+
|
|
7
|
+
module MS
|
|
8
|
+
|
|
9
|
+
=begin
|
|
10
|
+
# if given scans, will use those, or optionally takes a block where an
|
|
11
|
+
# array of ms1 scans are yielded and it expects Enumerable scans back.
|
|
12
|
+
def to_plms1(scans=nil)
|
|
13
|
+
times = []
|
|
14
|
+
scan_numbers = []
|
|
15
|
+
spectra = []
|
|
16
|
+
|
|
17
|
+
unless scans
|
|
18
|
+
scans = []
|
|
19
|
+
self.each(:ms_level => 1, :precursor => false) do |scan|
|
|
20
|
+
scans << scan
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
if block_given?
|
|
25
|
+
scans = yield(scans)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
scans.each do |scan|
|
|
29
|
+
times << scan.time
|
|
30
|
+
scan_numbers << scan.num
|
|
31
|
+
spec = scan.spectrum
|
|
32
|
+
spectra << [spec.mzs.to_a, spec.intensities.to_a]
|
|
33
|
+
end
|
|
34
|
+
plms1 = Plms1.new
|
|
35
|
+
plms1.times = times
|
|
36
|
+
plms1.scan_numbers = scan_numbers
|
|
37
|
+
plms1.spectra = spectra
|
|
38
|
+
plms1
|
|
39
|
+
end
|
|
40
|
+
=end
|
|
41
|
+
|
|
42
|
+
# Prince Lab MS 1: a simple format for reading and writing
|
|
43
|
+
# MS1 level mass spec data
|
|
44
|
+
#
|
|
45
|
+
# see MS::Plms1::SPECIFICATION for the file specification
|
|
46
|
+
class Plms1
|
|
47
|
+
SPECIFICATION =<<-HERE
|
|
48
|
+
# The file format contains no newlines but is shown here broken into lines for
|
|
49
|
+
# clarity. Data should be little endian. Comments begin with '#' but are not
|
|
50
|
+
# part of the spec. Angled brackets '<>' indicate the data type and square
|
|
51
|
+
# brackets '[]' the name of the data. An ellipsis '...' represents a
|
|
52
|
+
# continuous array of data points.
|
|
53
|
+
|
|
54
|
+
<uint32>[Number of scans]
|
|
55
|
+
<uint32>[scan number] ... # array of scan numbers as uint32
|
|
56
|
+
<float64>[time point] ... # array of time points as double precision floats (in seconds)
|
|
57
|
+
# this is a repeating unit based on [Number of scans]:
|
|
58
|
+
<uint32>[Number of data rows] # almost always == 2 (m/z, intensity)
|
|
59
|
+
# this is a repeating unit based on [Number of data rows]
|
|
60
|
+
<uint32>[Number of data points]
|
|
61
|
+
<float64>[data point] ... # array of data points as double precision floats
|
|
62
|
+
HERE
|
|
63
|
+
|
|
64
|
+
# an array of scan numbers
|
|
65
|
+
attr_accessor :scan_numbers
|
|
66
|
+
# an array of time data
|
|
67
|
+
attr_accessor :times
|
|
68
|
+
# an array that contains spectrum objects
|
|
69
|
+
attr_accessor :spectra
|
|
70
|
+
|
|
71
|
+
def initialize(_scan_numbers=[], _times=[], _spectra=[])
|
|
72
|
+
(@scan_numbers, @times, @spectra) = [_scan_numbers, _times, _spectra]
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# returns an array of Integers
|
|
76
|
+
def read_uint32(io, cnt=1)
|
|
77
|
+
io.read(cnt*4).unpack("V*")
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# returns an array of Floats
|
|
81
|
+
def read_float64(io, cnt=1)
|
|
82
|
+
io.read(cnt*8).unpack("E*")
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# returns self for chaining
|
|
86
|
+
def read(io_or_filename)
|
|
87
|
+
openany(io_or_filename) do |io|
|
|
88
|
+
num_scans = read_uint32(io)[0]
|
|
89
|
+
@scan_numbers = read_uint32(io, num_scans)
|
|
90
|
+
@times = read_float64(io, num_scans)
|
|
91
|
+
@spectra = num_scans.times.map do
|
|
92
|
+
data = read_uint32(io)[0].times.map do
|
|
93
|
+
read_float64(io, read_uint32(io)[0])
|
|
94
|
+
end
|
|
95
|
+
MS::Spectrum.new(data)
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
self
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def write_uint32(out, data)
|
|
102
|
+
to_pack = data.is_a?(Array) ? data : [data]
|
|
103
|
+
out << to_pack.pack('V*')
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def write_float64(out, data)
|
|
107
|
+
to_pack = data.is_a?(Array) ? data : [data]
|
|
108
|
+
out << to_pack.pack('E*')
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
# writes an ascii version of the format
|
|
112
|
+
# It is the same as the binary format, except a newline follows each
|
|
113
|
+
# length indicator or array of data. An empty line represents an empty
|
|
114
|
+
# array.
|
|
115
|
+
def write_ascii(filename=nil)
|
|
116
|
+
write_file_or_string(filename) do |out|
|
|
117
|
+
out.puts scan_numbers.size
|
|
118
|
+
out.puts scan_numbers.join(' ')
|
|
119
|
+
out.puts times.join(' ')
|
|
120
|
+
spectra.each do |spectrum|
|
|
121
|
+
out.puts spectrum.size
|
|
122
|
+
if spectrum.size > 0
|
|
123
|
+
out.puts spectrum.mzs.size
|
|
124
|
+
out.puts spectrum.mzs.join(' ')
|
|
125
|
+
out.puts spectrum.intensities.size
|
|
126
|
+
out.puts spectrum.intensities.join(' ')
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
# returns the string if no filename given
|
|
133
|
+
def write(filename=nil, ascii=false)
|
|
134
|
+
if ascii
|
|
135
|
+
write_ascii(filename)
|
|
136
|
+
else
|
|
137
|
+
write_file_or_string(filename) do |out|
|
|
138
|
+
write_uint32(out, spectra.size)
|
|
139
|
+
write_uint32(out, scan_numbers)
|
|
140
|
+
write_float64(out, times)
|
|
141
|
+
spectra.each do |spectrum|
|
|
142
|
+
write_uint32(out, spectrum.size) # number of rows
|
|
143
|
+
if spectrum.size > 0
|
|
144
|
+
mzs = spectrum.mzs
|
|
145
|
+
write_uint32(out, mzs.size)
|
|
146
|
+
write_float64(out, mzs)
|
|
147
|
+
intensities = spectrum.intensities
|
|
148
|
+
write_uint32(out, intensities.size)
|
|
149
|
+
write_float64(out, intensities)
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
require 'ms/quant/protein_group_comparison'
|
|
2
|
+
|
|
3
|
+
module Ms
|
|
4
|
+
module Quant
|
|
5
|
+
module ProteinGroupComparison
|
|
6
|
+
end
|
|
7
|
+
end
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
class Ms::Quant::ProteinGroupComparison::Qspec
|
|
11
|
+
include Ms::Quant::ProteinGroupComparison
|
|
12
|
+
|
|
13
|
+
attr_accessor :qspec_results_struct
|
|
14
|
+
|
|
15
|
+
# takes a protein group object, an array of experiment names and a qspec
|
|
16
|
+
# results struct
|
|
17
|
+
def initialize(protein_group, experiments, qspec_results_struct)
|
|
18
|
+
super(protein_group, experiments, qspec_results_struct.counts_array)
|
|
19
|
+
@qspec_results_struct = qspec_results_struct
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
module Ms ; end
|
|
2
|
+
module Ms::Quant ; end
|
|
3
|
+
|
|
4
|
+
class Ms::Quant::Qspec
|
|
5
|
+
|
|
6
|
+
# personal communication with Hyungwon Choi: "We typically use nburn=2000,
|
|
7
|
+
# niter=10000, which is quite sufficient to guarantee the reproducibility of
|
|
8
|
+
# results using the same data."
|
|
9
|
+
NBURNIN = 2000
|
|
10
|
+
NITER = 10000
|
|
11
|
+
INIT_HEADER = %w(protid protLen)
|
|
12
|
+
DELIMITER = "\t"
|
|
13
|
+
|
|
14
|
+
SUBMITTED_TO_QSPEC = 'submitted_to_qspec.txt'
|
|
15
|
+
|
|
16
|
+
# takes an ordered list of conditions ['cond1', 'cond1', 'cond2', 'cond2'] and
|
|
17
|
+
# returns an array of ints [0,0,0,1,1,1...]
|
|
18
|
+
def self.conditions_to_ints(conditions)
|
|
19
|
+
i = 0
|
|
20
|
+
current_condition = conditions.first
|
|
21
|
+
conditions.map do |cond|
|
|
22
|
+
if current_condition == cond ; i
|
|
23
|
+
else
|
|
24
|
+
i += 1
|
|
25
|
+
current_condition = cond
|
|
26
|
+
i
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# returns an array of Results structs which is each row of the returned file
|
|
32
|
+
# works with V2 of QSpec
|
|
33
|
+
def self.results_array(resultsfile)
|
|
34
|
+
rows = IO.readlines(resultsfile).map {|line| line.chomp.split("\t") }
|
|
35
|
+
headers = rows.shift
|
|
36
|
+
start_bayes = headers.index {|v| v =~ /BayesFactor/i }
|
|
37
|
+
rows.map do |row|
|
|
38
|
+
data = [row[0]]
|
|
39
|
+
data.push( row[1...start_bayes].map(&:to_f) )
|
|
40
|
+
data.push( *row[start_bayes,4].map(&:to_f) )
|
|
41
|
+
data.push( row[start_bayes+4] )
|
|
42
|
+
Results.new(*data)
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# returns the right executable based on the array of conditions
|
|
47
|
+
def self.executable(conditions)
|
|
48
|
+
biggest_size = conditions.group_by {|v| v }.values.map(&:size).max
|
|
49
|
+
(biggest_size >= 3) ? 'qspecgp' : 'qspec'
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# protname_length_pairs is an array of doublets: [protname, length]
|
|
53
|
+
# condition_to_count_array is an array doublets: [condition, array_of_counts]
|
|
54
|
+
def initialize(protname_length_pairs, condition_to_count_array)
|
|
55
|
+
@protname_length_pairs = protname_length_pairs
|
|
56
|
+
@condition_to_count_array = condition_to_count_array
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def conditions
|
|
60
|
+
@condition_to_count_array.map(&:first)
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# writes a qspec formatted file to filename
|
|
64
|
+
def write(filename)
|
|
65
|
+
ints = Ms::Quant::Qspec.conditions_to_ints(conditions)
|
|
66
|
+
header_cats = INIT_HEADER + ints
|
|
67
|
+
rows = @protname_length_pairs.map {|pair| pair.map.to_a }
|
|
68
|
+
@condition_to_count_array.each do |cond,counts|
|
|
69
|
+
rows.zip(counts) {|row,cnt| row << cnt }
|
|
70
|
+
end
|
|
71
|
+
File.open(filename,'w') do |out|
|
|
72
|
+
out.puts header_cats.join(DELIMITER)
|
|
73
|
+
rows.each {|row| out.puts row.join(DELIMITER) }
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# returns an array of Qspec::Results objects (each object can be considered
|
|
78
|
+
# a row of data)
|
|
79
|
+
def run(normalize=true, opts={})
|
|
80
|
+
puts "normalize: #{normalize}" if $VERBOSE
|
|
81
|
+
tfile = Tempfile.new("qspec")
|
|
82
|
+
write(tfile.path)
|
|
83
|
+
if opts[:keep]
|
|
84
|
+
local_file = File.join(Dir.pwd,File.basename(tfile.path))
|
|
85
|
+
FileUtils.cp(tfile.path, local_file, :verbose => $VERBOSE)
|
|
86
|
+
puts "(copy of) file submitted to qspec: #{local_file}" if $VERBOSE
|
|
87
|
+
end
|
|
88
|
+
qspec_exe = self.class.executable(conditions)
|
|
89
|
+
cmd = [qspec_exe, tfile.path, NBURNIN, NITER, (normalize ? 1 : 0)].join(' ')
|
|
90
|
+
if $VERBOSE
|
|
91
|
+
puts "running #{cmd}" if $VERBOSE
|
|
92
|
+
else
|
|
93
|
+
cmd << " 2>&1"
|
|
94
|
+
end
|
|
95
|
+
reply = `#{cmd}`
|
|
96
|
+
puts reply if $VERBOSE
|
|
97
|
+
outfile = tfile.path + '_' + qspec_exe
|
|
98
|
+
results = self.class.results_array(outfile)
|
|
99
|
+
if opts[:keep]
|
|
100
|
+
local_outfile = File.join(Dir.pwd, File.basename(outfile))
|
|
101
|
+
FileUtils.cp(outfile, local_outfile, :verbose => $VERBOSE)
|
|
102
|
+
puts "(copy of) file returned from qspec: #{outfile}"
|
|
103
|
+
end
|
|
104
|
+
tfile.unlink
|
|
105
|
+
results
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# for version 2 of QSpec
|
|
109
|
+
# counts array is parallel to the experiment names passed in originally
|
|
110
|
+
Results = Struct.new(:protid, :counts_array, :bayes_factor, :fold_change, :rb_stat, :fdr, :flag)
|
|
111
|
+
end
|
|
112
|
+
|