mspire 0.5.0 → 0.6.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +24 -0
- data/Rakefile +51 -0
- data/VERSION +1 -0
- data/lib/cv/description.rb +18 -0
- data/lib/cv/param.rb +33 -0
- data/lib/cv.rb +3 -0
- data/lib/io/bookmark.rb +13 -0
- data/lib/merge.rb +7 -0
- data/lib/ms/cvlist.rb +76 -0
- data/lib/ms/digester.rb +245 -0
- data/lib/ms/fasta.rb +86 -0
- data/lib/ms/ident/peptide/db.rb +243 -0
- data/lib/ms/ident/peptide.rb +72 -0
- data/lib/ms/ident/peptide_hit/qvalue.rb +56 -0
- data/lib/ms/ident/peptide_hit.rb +26 -0
- data/lib/ms/ident/pepxml/modifications.rb +83 -0
- data/lib/ms/ident/pepxml/msms_pipeline_analysis.rb +70 -0
- data/lib/ms/ident/pepxml/msms_run_summary.rb +82 -0
- data/lib/ms/ident/pepxml/parameters.rb +14 -0
- data/lib/ms/ident/pepxml/sample_enzyme.rb +165 -0
- data/lib/ms/ident/pepxml/search_database.rb +49 -0
- data/lib/ms/ident/pepxml/search_hit/modification_info.rb +79 -0
- data/lib/ms/ident/pepxml/search_hit.rb +144 -0
- data/lib/ms/ident/pepxml/search_result.rb +35 -0
- data/lib/ms/ident/pepxml/search_summary.rb +92 -0
- data/lib/ms/ident/pepxml/spectrum_query.rb +85 -0
- data/lib/ms/ident/pepxml.rb +112 -0
- data/lib/ms/ident/protein.rb +33 -0
- data/lib/ms/ident/protein_group.rb +80 -0
- data/lib/ms/ident/search.rb +114 -0
- data/lib/ms/ident.rb +37 -0
- data/lib/ms/isotope/aa.rb +59 -0
- data/lib/ms/mascot.rb +6 -0
- data/lib/ms/mass/aa.rb +79 -0
- data/lib/ms/mass.rb +55 -0
- data/lib/ms/mzml/index_list.rb +98 -0
- data/lib/ms/mzml/plms1.rb +34 -0
- data/lib/ms/mzml.rb +197 -0
- data/lib/ms/obo.rb +38 -0
- data/lib/ms/plms1.rb +156 -0
- data/lib/ms/quant/qspec/protein_group_comparison.rb +22 -0
- data/lib/ms/quant/qspec.rb +112 -0
- data/lib/ms/spectrum.rb +154 -8
- data/lib/ms.rb +3 -10
- data/lib/msplat.rb +2 -0
- data/lib/obo/ims.rb +5 -0
- data/lib/obo/ms.rb +7 -0
- data/lib/obo/ontology.rb +41 -0
- data/lib/obo/unit.rb +5 -0
- data/lib/openany.rb +23 -0
- data/lib/write_file_or_string.rb +18 -0
- data/obo/ims.obo +562 -0
- data/obo/ms.obo +11677 -0
- data/obo/unit.obo +2563 -0
- data/spec/ms/cvlist_spec.rb +60 -0
- data/spec/ms/digester_spec.rb +351 -0
- data/spec/ms/fasta_spec.rb +100 -0
- data/spec/ms/ident/peptide/db_spec.rb +108 -0
- data/spec/ms/ident/pepxml/sample_enzyme_spec.rb +181 -0
- data/spec/ms/ident/pepxml/search_hit/modification_info_spec.rb +37 -0
- data/spec/ms/ident/pepxml_spec.rb +442 -0
- data/spec/ms/ident/protein_group_spec.rb +68 -0
- data/spec/ms/mass_spec.rb +8 -0
- data/spec/ms/mzml/index_list_spec.rb +122 -0
- data/spec/ms/mzml/plms1_spec.rb +62 -0
- data/spec/ms/mzml_spec.rb +50 -0
- data/spec/ms/plms1_spec.rb +38 -0
- data/spec/ms/quant/qspec_spec.rb +25 -0
- data/spec/msplat_spec.rb +24 -0
- data/spec/obo_spec.rb +25 -0
- data/spec/spec_helper.rb +25 -0
- data/spec/testfiles/ms/ident/peptide/db/uni_11_sp_tr.fasta +69 -0
- data/spec/testfiles/ms/ident/peptide/db/uni_11_sp_tr.msd_clvg2.min_aaseq4.yml +728 -0
- data/spec/testfiles/ms/mzml/j24z.idx_comp.3.mzML +271 -0
- data/spec/testfiles/ms/mzml/openms.noidx_nocomp.12.mzML +330 -0
- data/spec/testfiles/ms/quant/kill_extra_tabs.rb +13 -0
- data/spec/testfiles/ms/quant/max_quant_output.provenance.txt +15 -0
- data/spec/testfiles/ms/quant/max_quant_output.txt +199 -0
- data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv_qspecgp +199 -0
- data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv_qspecgp.csv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_final.txt +199 -0
- data/spec/testfiles/ms/quant/pdcd5_final.txt_qspecgp +0 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.CSV.csv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.csv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.csv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv_qspecgp +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv_qspecgp.csv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.txt +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_tabdel.txt +134 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_tabdel.txt_qspecgp +134 -0
- data/spec/testfiles/ms/quant/remove_rest_of_proteins.rb +13 -0
- data/spec/testfiles/ms/quant/unlog_transform.rb +13 -0
- data/spec/testfiles/plms1/output.key +0 -0
- metadata +157 -40
- data/README +0 -77
- data/changelog.txt +0 -196
- data/lib/ms/calc.rb +0 -32
- data/lib/ms/data/interleaved.rb +0 -60
- data/lib/ms/data/lazy_io.rb +0 -73
- data/lib/ms/data/lazy_string.rb +0 -15
- data/lib/ms/data/simple.rb +0 -59
- data/lib/ms/data/transposed.rb +0 -41
- data/lib/ms/data.rb +0 -57
- data/lib/ms/format/format_error.rb +0 -12
- data/lib/ms/support/binary_search.rb +0 -126
@@ -0,0 +1,34 @@
|
|
1
|
+
|
2
|
+
require 'ms/plms1'
|
3
|
+
|
4
|
+
module MS
|
5
|
+
class Mzml
|
6
|
+
# will use scan numbers if use_scan_nums is true, otherwise it will use index
|
7
|
+
# numbers in place of scan nums
|
8
|
+
def to_plms1(use_scan_nums=true)
|
9
|
+
spectrum_index_list = self.index_list[:spectrum]
|
10
|
+
scan_nums =
|
11
|
+
if use_scan_nums
|
12
|
+
spectrum_index_list.create_scan_to_index.keys
|
13
|
+
else
|
14
|
+
(0...spectrum_index_list.size).to_a
|
15
|
+
end
|
16
|
+
retention_times = self.enum_for(:each_spectrum_node).map do |xml_node|
|
17
|
+
rt_xml_node=xml_node.xpath("scanList/scan/cvParam[@accession='MS:1000016']")[0]
|
18
|
+
raise 'no retention time xml node' unless rt_xml_node
|
19
|
+
retention_time = rt_xml_node['value'].to_f
|
20
|
+
case rt_xml_node['unitName']
|
21
|
+
when 'minute'
|
22
|
+
retention_time * 60
|
23
|
+
when 'second'
|
24
|
+
retention_time
|
25
|
+
else
|
26
|
+
raise 'retention time must be in minutes or seconds (or add some code to handle)'
|
27
|
+
end
|
28
|
+
end
|
29
|
+
# plms1 only requires that the obect respond to :each, giving a spectrum
|
30
|
+
# object, so an Mzml object will work.
|
31
|
+
MS::Plms1.new(scan_nums, retention_times, self)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
data/lib/ms/mzml.rb
ADDED
@@ -0,0 +1,197 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'io/bookmark'
|
3
|
+
require 'zlib'
|
4
|
+
require 'ms/mzml/index_list'
|
5
|
+
require 'ms/spectrum'
|
6
|
+
|
7
|
+
module MS
|
8
|
+
# MS::Mzml.open("somefile.mzML") do |mzml|
|
9
|
+
# mzml.each do |spectrum|
|
10
|
+
# scan = spectrum.scan
|
11
|
+
# spectrum.mzs # array of m/zs
|
12
|
+
# spectrum.intensities # array of intensities
|
13
|
+
# spectrum.peaks.each do |mz,intensity|
|
14
|
+
# puts "mz: #{mz} intensity: #{intensity}"
|
15
|
+
# end
|
16
|
+
# end
|
17
|
+
# end
|
18
|
+
class Mzml
|
19
|
+
module Parser
|
20
|
+
NOBLANKS = ::Nokogiri::XML::ParseOptions::DEFAULT_XML | ::Nokogiri::XML::ParseOptions::NOBLANKS
|
21
|
+
end
|
22
|
+
include Enumerable
|
23
|
+
|
24
|
+
attr_accessor :filename
|
25
|
+
attr_accessor :io
|
26
|
+
attr_accessor :index_list
|
27
|
+
attr_accessor :encoding
|
28
|
+
|
29
|
+
# io must respond_to?(:size), giving the size of the io object in bytes
|
30
|
+
# which allows seeking. #get_index_list is called to get or create the
|
31
|
+
# index list.
|
32
|
+
def initialize(io)
|
33
|
+
@io = io
|
34
|
+
@encoding = @io.bookmark(true) {|io| io.readline.match(/encoding=["'](.*?)["']/)[1] }
|
35
|
+
@index_list = get_index_list
|
36
|
+
end
|
37
|
+
|
38
|
+
class << self
|
39
|
+
# read-only right now
|
40
|
+
def open(filename, &block)
|
41
|
+
File.open(filename) do |io|
|
42
|
+
block.call(self.new(io))
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def foreach(filename, &block)
|
47
|
+
open(filename) do |mzml|
|
48
|
+
mzml.each(&block)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
# unpack binary data based on an accesions. accessions must only
|
53
|
+
# respond to :include? So, hash keys, a set, or an array will all work.
|
54
|
+
def unpack_binary(base64string, accessions)
|
55
|
+
compressed =
|
56
|
+
if accessions.include?('MS:1000574') then true # zlib compression
|
57
|
+
elsif accessions.include?('MS:1000576') then false # no compression
|
58
|
+
else raise 'no compression info: check your MS accession numbers'
|
59
|
+
end
|
60
|
+
precision_unpack =
|
61
|
+
if accessions.include?('MS:1000523') then 'E*'
|
62
|
+
elsif accessions.include?('MS:1000521') then 'e*'
|
63
|
+
else raise 'unrecognized precision: check your MS accession numbers'
|
64
|
+
end
|
65
|
+
data = base64string.unpack("m*").first
|
66
|
+
unzipped = compressed ? Zlib::Inflate.inflate(data) : data
|
67
|
+
unzipped.unpack(precision_unpack)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
# name can be :spectrum or :chromatogram
|
72
|
+
def get_xml_string(start_byte, name=:spectrum)
|
73
|
+
io.seek(start_byte)
|
74
|
+
data = []
|
75
|
+
regexp = %r{</#{name}>}
|
76
|
+
io.each_line do |line|
|
77
|
+
data << line
|
78
|
+
#unless (line.index('<binary') && line[-12..-1].include?('</binary>'))
|
79
|
+
break if regexp.match(line)
|
80
|
+
#end
|
81
|
+
end
|
82
|
+
data.join
|
83
|
+
end
|
84
|
+
|
85
|
+
def each_spectrum(&block)
|
86
|
+
(0...@index_list[:spectrum].size).each do |int|
|
87
|
+
block.call spectrum(int)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
# returns the Nokogiri::XML::Node object associated with each spectrum
|
92
|
+
def each_spectrum_node(&block)
|
93
|
+
@index_list[:spectrum].each do |start_byte|
|
94
|
+
block.call spectrum_node_from_start_byte(start_byte)
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
alias_method :each, :each_spectrum
|
99
|
+
|
100
|
+
def spectrum_node_from_start_byte(start_byte)
|
101
|
+
xml = get_xml_string(start_byte, :spectrum)
|
102
|
+
doc = Nokogiri::XML.parse(xml, nil, @encoding, Parser::NOBLANKS)
|
103
|
+
doc.root
|
104
|
+
end
|
105
|
+
|
106
|
+
# @param [Object] arg an index number (Integer) or id string (String)
|
107
|
+
# @return [MS::Spectrum] a spectrum object
|
108
|
+
def spectrum(arg)
|
109
|
+
################### trouble
|
110
|
+
start_byte = index_list[0].start_byte(arg)
|
111
|
+
data_arrays = spectrum_node_from_start_byte(start_byte).xpath('//binaryDataArray').map do |binary_data_array_n|
|
112
|
+
accessions = binary_data_array_n.xpath('./cvParam').map {|node| node['accession'] }
|
113
|
+
base64 = binary_data_array_n.xpath('./binary').text
|
114
|
+
MS::Mzml.unpack_binary(base64, accessions)
|
115
|
+
end
|
116
|
+
MS::Spectrum.new(data_arrays)
|
117
|
+
end
|
118
|
+
|
119
|
+
# returns the number of spectra
|
120
|
+
def size
|
121
|
+
@index_list[:spectrum].size
|
122
|
+
end
|
123
|
+
|
124
|
+
alias_method :'[]', :spectrum
|
125
|
+
|
126
|
+
# @param [Integer] scan_num the scan number
|
127
|
+
# @return [MS::Spectrum] a spectrum object, or nil if not found
|
128
|
+
# @raise [ScanNumbersNotUnique] if scan numbers are not unique
|
129
|
+
# @raise [ScanNumbersNotFound] if spectra exist but scan numbers were not
|
130
|
+
# found
|
131
|
+
def spectrum_from_scan_num(scan_num)
|
132
|
+
@scan_to_index ||= @index_list[0].create_scan_index
|
133
|
+
raise ScanNumbersNotUnique if @scan_to_index == false
|
134
|
+
raise ScanNumbersNotFound if @scan_to_index == nil
|
135
|
+
spectrum(@scan_to_index[scan_num])
|
136
|
+
end
|
137
|
+
|
138
|
+
# @return [MS::Mzml::IndexList] or nil if there is no indexList in the
|
139
|
+
# mzML
|
140
|
+
def read_index_list
|
141
|
+
if offset=MS::Mzml::Index.index_offset(@io)
|
142
|
+
@io.seek(offset)
|
143
|
+
xml = Nokogiri::XML.parse(@io.read, nil, @encoding, Parser::NOBLANKS)
|
144
|
+
index_list = xml.root
|
145
|
+
num_indices = index_list['count'].to_i
|
146
|
+
array = index_list.children.map do |index_n|
|
147
|
+
#index = Index.new(index_n['name'])
|
148
|
+
index = Index.new
|
149
|
+
index.name = index_n['name'].to_sym
|
150
|
+
ids = []
|
151
|
+
index_n.children.map do |offset_n|
|
152
|
+
index << offset_n.text.to_i
|
153
|
+
ids << offset_n['idRef']
|
154
|
+
end
|
155
|
+
index.ids = ids
|
156
|
+
index
|
157
|
+
end
|
158
|
+
IndexList.new(array)
|
159
|
+
end
|
160
|
+
end
|
161
|
+
# Reads through and captures start bytes
|
162
|
+
# @return [MS::Mzml::IndexList]
|
163
|
+
def create_index_list
|
164
|
+
indices_hash = @io.bookmark(true) do |inner_io| # sets to beginning of file
|
165
|
+
indices = {:spectrum => {}, :chromatogram => {}}
|
166
|
+
byte_total = 0
|
167
|
+
@io.each do |line|
|
168
|
+
if md=%r{<(spectrum|chromatogram).*?id=['"](.*?)['"][ >]}.match(line)
|
169
|
+
indices[md[1].to_sym][md[2]] = byte_total + md.pre_match.bytesize
|
170
|
+
end
|
171
|
+
byte_total += line.bytesize
|
172
|
+
end
|
173
|
+
indices
|
174
|
+
end
|
175
|
+
|
176
|
+
indices = indices_hash.map do |sym, hash|
|
177
|
+
indices = Index.new ; ids = []
|
178
|
+
hash.each {|id, startbyte| ids << id ; indices << startbyte }
|
179
|
+
indices.ids = ids ; indices.name = sym
|
180
|
+
indices
|
181
|
+
end
|
182
|
+
IndexList.new(indices)
|
183
|
+
end
|
184
|
+
|
185
|
+
# reads or creates an index list
|
186
|
+
# @return [Array] an array of indices
|
187
|
+
def get_index_list
|
188
|
+
read_index_list || create_index_list
|
189
|
+
end
|
190
|
+
|
191
|
+
class ScanNumbersNotUnique < Exception
|
192
|
+
end
|
193
|
+
class ScanNumbersNotFound < Exception
|
194
|
+
end
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
data/lib/ms/obo.rb
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
|
2
|
+
module MS
|
3
|
+
class OBO
|
4
|
+
attr_accessor :header
|
5
|
+
attr_accessor :elements
|
6
|
+
|
7
|
+
def initialize(file_or_io)
|
8
|
+
obo = Obo::Parser.new(file_or_io)
|
9
|
+
elements = obo.elements.to_a
|
10
|
+
header = elements.shift
|
11
|
+
end
|
12
|
+
# returns an id to name Hash
|
13
|
+
def id_to_name
|
14
|
+
@id_to_name ||= build_hash('id', 'name')
|
15
|
+
end
|
16
|
+
# returns a name to id Hash
|
17
|
+
def name_to_id
|
18
|
+
@name_to_id ||= build_hash('name', 'id')
|
19
|
+
end
|
20
|
+
def id_to_element
|
21
|
+
@id_to_element ||= build_hash('id', nil)
|
22
|
+
end
|
23
|
+
|
24
|
+
protected
|
25
|
+
def build_hash(key,val)
|
26
|
+
hash = {}
|
27
|
+
elements.each do |el|
|
28
|
+
tv = el.tagvalues
|
29
|
+
if val.nil?
|
30
|
+
hash[tv[key].first] = el
|
31
|
+
else
|
32
|
+
hash[tv[key].first] = tv[val].first
|
33
|
+
end
|
34
|
+
end
|
35
|
+
hash
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
data/lib/ms/plms1.rb
ADDED
@@ -0,0 +1,156 @@
|
|
1
|
+
|
2
|
+
require 'write_file_or_string'
|
3
|
+
require 'ms/spectrum'
|
4
|
+
require 'stringio'
|
5
|
+
require 'openany'
|
6
|
+
|
7
|
+
module MS
|
8
|
+
|
9
|
+
=begin
|
10
|
+
# if given scans, will use those, or optionally takes a block where an
|
11
|
+
# array of ms1 scans are yielded and it expects Enumerable scans back.
|
12
|
+
def to_plms1(scans=nil)
|
13
|
+
times = []
|
14
|
+
scan_numbers = []
|
15
|
+
spectra = []
|
16
|
+
|
17
|
+
unless scans
|
18
|
+
scans = []
|
19
|
+
self.each(:ms_level => 1, :precursor => false) do |scan|
|
20
|
+
scans << scan
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
if block_given?
|
25
|
+
scans = yield(scans)
|
26
|
+
end
|
27
|
+
|
28
|
+
scans.each do |scan|
|
29
|
+
times << scan.time
|
30
|
+
scan_numbers << scan.num
|
31
|
+
spec = scan.spectrum
|
32
|
+
spectra << [spec.mzs.to_a, spec.intensities.to_a]
|
33
|
+
end
|
34
|
+
plms1 = Plms1.new
|
35
|
+
plms1.times = times
|
36
|
+
plms1.scan_numbers = scan_numbers
|
37
|
+
plms1.spectra = spectra
|
38
|
+
plms1
|
39
|
+
end
|
40
|
+
=end
|
41
|
+
|
42
|
+
# Prince Lab MS 1: a simple format for reading and writing
|
43
|
+
# MS1 level mass spec data
|
44
|
+
#
|
45
|
+
# see MS::Plms1::SPECIFICATION for the file specification
|
46
|
+
class Plms1
|
47
|
+
SPECIFICATION =<<-HERE
|
48
|
+
# The file format contains no newlines but is shown here broken into lines for
|
49
|
+
# clarity. Data should be little endian. Comments begin with '#' but are not
|
50
|
+
# part of the spec. Angled brackets '<>' indicate the data type and square
|
51
|
+
# brackets '[]' the name of the data. An ellipsis '...' represents a
|
52
|
+
# continuous array of data points.
|
53
|
+
|
54
|
+
<uint32>[Number of scans]
|
55
|
+
<uint32>[scan number] ... # array of scan numbers as uint32
|
56
|
+
<float64>[time point] ... # array of time points as double precision floats (in seconds)
|
57
|
+
# this is a repeating unit based on [Number of scans]:
|
58
|
+
<uint32>[Number of data rows] # almost always == 2 (m/z, intensity)
|
59
|
+
# this is a repeating unit based on [Number of data rows]
|
60
|
+
<uint32>[Number of data points]
|
61
|
+
<float64>[data point] ... # array of data points as double precision floats
|
62
|
+
HERE
|
63
|
+
|
64
|
+
# an array of scan numbers
|
65
|
+
attr_accessor :scan_numbers
|
66
|
+
# an array of time data
|
67
|
+
attr_accessor :times
|
68
|
+
# an array that contains spectrum objects
|
69
|
+
attr_accessor :spectra
|
70
|
+
|
71
|
+
def initialize(_scan_numbers=[], _times=[], _spectra=[])
|
72
|
+
(@scan_numbers, @times, @spectra) = [_scan_numbers, _times, _spectra]
|
73
|
+
end
|
74
|
+
|
75
|
+
# returns an array of Integers
|
76
|
+
def read_uint32(io, cnt=1)
|
77
|
+
io.read(cnt*4).unpack("V*")
|
78
|
+
end
|
79
|
+
|
80
|
+
# returns an array of Floats
|
81
|
+
def read_float64(io, cnt=1)
|
82
|
+
io.read(cnt*8).unpack("E*")
|
83
|
+
end
|
84
|
+
|
85
|
+
# returns self for chaining
|
86
|
+
def read(io_or_filename)
|
87
|
+
openany(io_or_filename) do |io|
|
88
|
+
num_scans = read_uint32(io)[0]
|
89
|
+
@scan_numbers = read_uint32(io, num_scans)
|
90
|
+
@times = read_float64(io, num_scans)
|
91
|
+
@spectra = num_scans.times.map do
|
92
|
+
data = read_uint32(io)[0].times.map do
|
93
|
+
read_float64(io, read_uint32(io)[0])
|
94
|
+
end
|
95
|
+
MS::Spectrum.new(data)
|
96
|
+
end
|
97
|
+
end
|
98
|
+
self
|
99
|
+
end
|
100
|
+
|
101
|
+
def write_uint32(out, data)
|
102
|
+
to_pack = data.is_a?(Array) ? data : [data]
|
103
|
+
out << to_pack.pack('V*')
|
104
|
+
end
|
105
|
+
|
106
|
+
def write_float64(out, data)
|
107
|
+
to_pack = data.is_a?(Array) ? data : [data]
|
108
|
+
out << to_pack.pack('E*')
|
109
|
+
end
|
110
|
+
|
111
|
+
# writes an ascii version of the format
|
112
|
+
# It is the same as the binary format, except a newline follows each
|
113
|
+
# length indicator or array of data. An empty line represents an empty
|
114
|
+
# array.
|
115
|
+
def write_ascii(filename=nil)
|
116
|
+
write_file_or_string(filename) do |out|
|
117
|
+
out.puts scan_numbers.size
|
118
|
+
out.puts scan_numbers.join(' ')
|
119
|
+
out.puts times.join(' ')
|
120
|
+
spectra.each do |spectrum|
|
121
|
+
out.puts spectrum.size
|
122
|
+
if spectrum.size > 0
|
123
|
+
out.puts spectrum.mzs.size
|
124
|
+
out.puts spectrum.mzs.join(' ')
|
125
|
+
out.puts spectrum.intensities.size
|
126
|
+
out.puts spectrum.intensities.join(' ')
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
# returns the string if no filename given
|
133
|
+
def write(filename=nil, ascii=false)
|
134
|
+
if ascii
|
135
|
+
write_ascii(filename)
|
136
|
+
else
|
137
|
+
write_file_or_string(filename) do |out|
|
138
|
+
write_uint32(out, spectra.size)
|
139
|
+
write_uint32(out, scan_numbers)
|
140
|
+
write_float64(out, times)
|
141
|
+
spectra.each do |spectrum|
|
142
|
+
write_uint32(out, spectrum.size) # number of rows
|
143
|
+
if spectrum.size > 0
|
144
|
+
mzs = spectrum.mzs
|
145
|
+
write_uint32(out, mzs.size)
|
146
|
+
write_float64(out, mzs)
|
147
|
+
intensities = spectrum.intensities
|
148
|
+
write_uint32(out, intensities.size)
|
149
|
+
write_float64(out, intensities)
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'ms/quant/protein_group_comparison'
|
2
|
+
|
3
|
+
module Ms
|
4
|
+
module Quant
|
5
|
+
module ProteinGroupComparison
|
6
|
+
end
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
class Ms::Quant::ProteinGroupComparison::Qspec
|
11
|
+
include Ms::Quant::ProteinGroupComparison
|
12
|
+
|
13
|
+
attr_accessor :qspec_results_struct
|
14
|
+
|
15
|
+
# takes a protein group object, an array of experiment names and a qspec
|
16
|
+
# results struct
|
17
|
+
def initialize(protein_group, experiments, qspec_results_struct)
|
18
|
+
super(protein_group, experiments, qspec_results_struct.counts_array)
|
19
|
+
@qspec_results_struct = qspec_results_struct
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
@@ -0,0 +1,112 @@
|
|
1
|
+
module Ms ; end
|
2
|
+
module Ms::Quant ; end
|
3
|
+
|
4
|
+
class Ms::Quant::Qspec
|
5
|
+
|
6
|
+
# personal communication with Hyungwon Choi: "We typically use nburn=2000,
|
7
|
+
# niter=10000, which is quite sufficient to guarantee the reproducibility of
|
8
|
+
# results using the same data."
|
9
|
+
NBURNIN = 2000
|
10
|
+
NITER = 10000
|
11
|
+
INIT_HEADER = %w(protid protLen)
|
12
|
+
DELIMITER = "\t"
|
13
|
+
|
14
|
+
SUBMITTED_TO_QSPEC = 'submitted_to_qspec.txt'
|
15
|
+
|
16
|
+
# takes an ordered list of conditions ['cond1', 'cond1', 'cond2', 'cond2'] and
|
17
|
+
# returns an array of ints [0,0,0,1,1,1...]
|
18
|
+
def self.conditions_to_ints(conditions)
|
19
|
+
i = 0
|
20
|
+
current_condition = conditions.first
|
21
|
+
conditions.map do |cond|
|
22
|
+
if current_condition == cond ; i
|
23
|
+
else
|
24
|
+
i += 1
|
25
|
+
current_condition = cond
|
26
|
+
i
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
# returns an array of Results structs which is each row of the returned file
|
32
|
+
# works with V2 of QSpec
|
33
|
+
def self.results_array(resultsfile)
|
34
|
+
rows = IO.readlines(resultsfile).map {|line| line.chomp.split("\t") }
|
35
|
+
headers = rows.shift
|
36
|
+
start_bayes = headers.index {|v| v =~ /BayesFactor/i }
|
37
|
+
rows.map do |row|
|
38
|
+
data = [row[0]]
|
39
|
+
data.push( row[1...start_bayes].map(&:to_f) )
|
40
|
+
data.push( *row[start_bayes,4].map(&:to_f) )
|
41
|
+
data.push( row[start_bayes+4] )
|
42
|
+
Results.new(*data)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
# returns the right executable based on the array of conditions
|
47
|
+
def self.executable(conditions)
|
48
|
+
biggest_size = conditions.group_by {|v| v }.values.map(&:size).max
|
49
|
+
(biggest_size >= 3) ? 'qspecgp' : 'qspec'
|
50
|
+
end
|
51
|
+
|
52
|
+
# protname_length_pairs is an array of doublets: [protname, length]
|
53
|
+
# condition_to_count_array is an array doublets: [condition, array_of_counts]
|
54
|
+
def initialize(protname_length_pairs, condition_to_count_array)
|
55
|
+
@protname_length_pairs = protname_length_pairs
|
56
|
+
@condition_to_count_array = condition_to_count_array
|
57
|
+
end
|
58
|
+
|
59
|
+
def conditions
|
60
|
+
@condition_to_count_array.map(&:first)
|
61
|
+
end
|
62
|
+
|
63
|
+
# writes a qspec formatted file to filename
|
64
|
+
def write(filename)
|
65
|
+
ints = Ms::Quant::Qspec.conditions_to_ints(conditions)
|
66
|
+
header_cats = INIT_HEADER + ints
|
67
|
+
rows = @protname_length_pairs.map {|pair| pair.map.to_a }
|
68
|
+
@condition_to_count_array.each do |cond,counts|
|
69
|
+
rows.zip(counts) {|row,cnt| row << cnt }
|
70
|
+
end
|
71
|
+
File.open(filename,'w') do |out|
|
72
|
+
out.puts header_cats.join(DELIMITER)
|
73
|
+
rows.each {|row| out.puts row.join(DELIMITER) }
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
# returns an array of Qspec::Results objects (each object can be considered
|
78
|
+
# a row of data)
|
79
|
+
def run(normalize=true, opts={})
|
80
|
+
puts "normalize: #{normalize}" if $VERBOSE
|
81
|
+
tfile = Tempfile.new("qspec")
|
82
|
+
write(tfile.path)
|
83
|
+
if opts[:keep]
|
84
|
+
local_file = File.join(Dir.pwd,File.basename(tfile.path))
|
85
|
+
FileUtils.cp(tfile.path, local_file, :verbose => $VERBOSE)
|
86
|
+
puts "(copy of) file submitted to qspec: #{local_file}" if $VERBOSE
|
87
|
+
end
|
88
|
+
qspec_exe = self.class.executable(conditions)
|
89
|
+
cmd = [qspec_exe, tfile.path, NBURNIN, NITER, (normalize ? 1 : 0)].join(' ')
|
90
|
+
if $VERBOSE
|
91
|
+
puts "running #{cmd}" if $VERBOSE
|
92
|
+
else
|
93
|
+
cmd << " 2>&1"
|
94
|
+
end
|
95
|
+
reply = `#{cmd}`
|
96
|
+
puts reply if $VERBOSE
|
97
|
+
outfile = tfile.path + '_' + qspec_exe
|
98
|
+
results = self.class.results_array(outfile)
|
99
|
+
if opts[:keep]
|
100
|
+
local_outfile = File.join(Dir.pwd, File.basename(outfile))
|
101
|
+
FileUtils.cp(outfile, local_outfile, :verbose => $VERBOSE)
|
102
|
+
puts "(copy of) file returned from qspec: #{outfile}"
|
103
|
+
end
|
104
|
+
tfile.unlink
|
105
|
+
results
|
106
|
+
end
|
107
|
+
|
108
|
+
# for version 2 of QSpec
|
109
|
+
# counts array is parallel to the experiment names passed in originally
|
110
|
+
Results = Struct.new(:protid, :counts_array, :bayes_factor, :fold_change, :rb_stat, :fdr, :flag)
|
111
|
+
end
|
112
|
+
|