mspire 0.5.0 → 0.6.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (107) hide show
  1. data/README.rdoc +24 -0
  2. data/Rakefile +51 -0
  3. data/VERSION +1 -0
  4. data/lib/cv/description.rb +18 -0
  5. data/lib/cv/param.rb +33 -0
  6. data/lib/cv.rb +3 -0
  7. data/lib/io/bookmark.rb +13 -0
  8. data/lib/merge.rb +7 -0
  9. data/lib/ms/cvlist.rb +76 -0
  10. data/lib/ms/digester.rb +245 -0
  11. data/lib/ms/fasta.rb +86 -0
  12. data/lib/ms/ident/peptide/db.rb +243 -0
  13. data/lib/ms/ident/peptide.rb +72 -0
  14. data/lib/ms/ident/peptide_hit/qvalue.rb +56 -0
  15. data/lib/ms/ident/peptide_hit.rb +26 -0
  16. data/lib/ms/ident/pepxml/modifications.rb +83 -0
  17. data/lib/ms/ident/pepxml/msms_pipeline_analysis.rb +70 -0
  18. data/lib/ms/ident/pepxml/msms_run_summary.rb +82 -0
  19. data/lib/ms/ident/pepxml/parameters.rb +14 -0
  20. data/lib/ms/ident/pepxml/sample_enzyme.rb +165 -0
  21. data/lib/ms/ident/pepxml/search_database.rb +49 -0
  22. data/lib/ms/ident/pepxml/search_hit/modification_info.rb +79 -0
  23. data/lib/ms/ident/pepxml/search_hit.rb +144 -0
  24. data/lib/ms/ident/pepxml/search_result.rb +35 -0
  25. data/lib/ms/ident/pepxml/search_summary.rb +92 -0
  26. data/lib/ms/ident/pepxml/spectrum_query.rb +85 -0
  27. data/lib/ms/ident/pepxml.rb +112 -0
  28. data/lib/ms/ident/protein.rb +33 -0
  29. data/lib/ms/ident/protein_group.rb +80 -0
  30. data/lib/ms/ident/search.rb +114 -0
  31. data/lib/ms/ident.rb +37 -0
  32. data/lib/ms/isotope/aa.rb +59 -0
  33. data/lib/ms/mascot.rb +6 -0
  34. data/lib/ms/mass/aa.rb +79 -0
  35. data/lib/ms/mass.rb +55 -0
  36. data/lib/ms/mzml/index_list.rb +98 -0
  37. data/lib/ms/mzml/plms1.rb +34 -0
  38. data/lib/ms/mzml.rb +197 -0
  39. data/lib/ms/obo.rb +38 -0
  40. data/lib/ms/plms1.rb +156 -0
  41. data/lib/ms/quant/qspec/protein_group_comparison.rb +22 -0
  42. data/lib/ms/quant/qspec.rb +112 -0
  43. data/lib/ms/spectrum.rb +154 -8
  44. data/lib/ms.rb +3 -10
  45. data/lib/msplat.rb +2 -0
  46. data/lib/obo/ims.rb +5 -0
  47. data/lib/obo/ms.rb +7 -0
  48. data/lib/obo/ontology.rb +41 -0
  49. data/lib/obo/unit.rb +5 -0
  50. data/lib/openany.rb +23 -0
  51. data/lib/write_file_or_string.rb +18 -0
  52. data/obo/ims.obo +562 -0
  53. data/obo/ms.obo +11677 -0
  54. data/obo/unit.obo +2563 -0
  55. data/spec/ms/cvlist_spec.rb +60 -0
  56. data/spec/ms/digester_spec.rb +351 -0
  57. data/spec/ms/fasta_spec.rb +100 -0
  58. data/spec/ms/ident/peptide/db_spec.rb +108 -0
  59. data/spec/ms/ident/pepxml/sample_enzyme_spec.rb +181 -0
  60. data/spec/ms/ident/pepxml/search_hit/modification_info_spec.rb +37 -0
  61. data/spec/ms/ident/pepxml_spec.rb +442 -0
  62. data/spec/ms/ident/protein_group_spec.rb +68 -0
  63. data/spec/ms/mass_spec.rb +8 -0
  64. data/spec/ms/mzml/index_list_spec.rb +122 -0
  65. data/spec/ms/mzml/plms1_spec.rb +62 -0
  66. data/spec/ms/mzml_spec.rb +50 -0
  67. data/spec/ms/plms1_spec.rb +38 -0
  68. data/spec/ms/quant/qspec_spec.rb +25 -0
  69. data/spec/msplat_spec.rb +24 -0
  70. data/spec/obo_spec.rb +25 -0
  71. data/spec/spec_helper.rb +25 -0
  72. data/spec/testfiles/ms/ident/peptide/db/uni_11_sp_tr.fasta +69 -0
  73. data/spec/testfiles/ms/ident/peptide/db/uni_11_sp_tr.msd_clvg2.min_aaseq4.yml +728 -0
  74. data/spec/testfiles/ms/mzml/j24z.idx_comp.3.mzML +271 -0
  75. data/spec/testfiles/ms/mzml/openms.noidx_nocomp.12.mzML +330 -0
  76. data/spec/testfiles/ms/quant/kill_extra_tabs.rb +13 -0
  77. data/spec/testfiles/ms/quant/max_quant_output.provenance.txt +15 -0
  78. data/spec/testfiles/ms/quant/max_quant_output.txt +199 -0
  79. data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv +199 -0
  80. data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv_qspecgp +199 -0
  81. data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv_qspecgp.csv +199 -0
  82. data/spec/testfiles/ms/quant/pdcd5_final.txt +199 -0
  83. data/spec/testfiles/ms/quant/pdcd5_final.txt_qspecgp +0 -0
  84. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.CSV.csv +199 -0
  85. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.csv +199 -0
  86. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.csv +199 -0
  87. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv +199 -0
  88. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv_qspecgp +199 -0
  89. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv_qspecgp.csv +199 -0
  90. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.txt +199 -0
  91. data/spec/testfiles/ms/quant/pdcd5_lfq_tabdel.txt +134 -0
  92. data/spec/testfiles/ms/quant/pdcd5_lfq_tabdel.txt_qspecgp +134 -0
  93. data/spec/testfiles/ms/quant/remove_rest_of_proteins.rb +13 -0
  94. data/spec/testfiles/ms/quant/unlog_transform.rb +13 -0
  95. data/spec/testfiles/plms1/output.key +0 -0
  96. metadata +157 -40
  97. data/README +0 -77
  98. data/changelog.txt +0 -196
  99. data/lib/ms/calc.rb +0 -32
  100. data/lib/ms/data/interleaved.rb +0 -60
  101. data/lib/ms/data/lazy_io.rb +0 -73
  102. data/lib/ms/data/lazy_string.rb +0 -15
  103. data/lib/ms/data/simple.rb +0 -59
  104. data/lib/ms/data/transposed.rb +0 -41
  105. data/lib/ms/data.rb +0 -57
  106. data/lib/ms/format/format_error.rb +0 -12
  107. data/lib/ms/support/binary_search.rb +0 -126
@@ -0,0 +1,34 @@
1
+
2
+ require 'ms/plms1'
3
+
4
+ module MS
5
+ class Mzml
6
+ # will use scan numbers if use_scan_nums is true, otherwise it will use index
7
+ # numbers in place of scan nums
8
+ def to_plms1(use_scan_nums=true)
9
+ spectrum_index_list = self.index_list[:spectrum]
10
+ scan_nums =
11
+ if use_scan_nums
12
+ spectrum_index_list.create_scan_to_index.keys
13
+ else
14
+ (0...spectrum_index_list.size).to_a
15
+ end
16
+ retention_times = self.enum_for(:each_spectrum_node).map do |xml_node|
17
+ rt_xml_node=xml_node.xpath("scanList/scan/cvParam[@accession='MS:1000016']")[0]
18
+ raise 'no retention time xml node' unless rt_xml_node
19
+ retention_time = rt_xml_node['value'].to_f
20
+ case rt_xml_node['unitName']
21
+ when 'minute'
22
+ retention_time * 60
23
+ when 'second'
24
+ retention_time
25
+ else
26
+ raise 'retention time must be in minutes or seconds (or add some code to handle)'
27
+ end
28
+ end
29
+ # plms1 only requires that the obect respond to :each, giving a spectrum
30
+ # object, so an Mzml object will work.
31
+ MS::Plms1.new(scan_nums, retention_times, self)
32
+ end
33
+ end
34
+ end
data/lib/ms/mzml.rb ADDED
@@ -0,0 +1,197 @@
1
+ require 'nokogiri'
2
+ require 'io/bookmark'
3
+ require 'zlib'
4
+ require 'ms/mzml/index_list'
5
+ require 'ms/spectrum'
6
+
7
+ module MS
8
+ # MS::Mzml.open("somefile.mzML") do |mzml|
9
+ # mzml.each do |spectrum|
10
+ # scan = spectrum.scan
11
+ # spectrum.mzs # array of m/zs
12
+ # spectrum.intensities # array of intensities
13
+ # spectrum.peaks.each do |mz,intensity|
14
+ # puts "mz: #{mz} intensity: #{intensity}"
15
+ # end
16
+ # end
17
+ # end
18
+ class Mzml
19
+ module Parser
20
+ NOBLANKS = ::Nokogiri::XML::ParseOptions::DEFAULT_XML | ::Nokogiri::XML::ParseOptions::NOBLANKS
21
+ end
22
+ include Enumerable
23
+
24
+ attr_accessor :filename
25
+ attr_accessor :io
26
+ attr_accessor :index_list
27
+ attr_accessor :encoding
28
+
29
+ # io must respond_to?(:size), giving the size of the io object in bytes
30
+ # which allows seeking. #get_index_list is called to get or create the
31
+ # index list.
32
+ def initialize(io)
33
+ @io = io
34
+ @encoding = @io.bookmark(true) {|io| io.readline.match(/encoding=["'](.*?)["']/)[1] }
35
+ @index_list = get_index_list
36
+ end
37
+
38
+ class << self
39
+ # read-only right now
40
+ def open(filename, &block)
41
+ File.open(filename) do |io|
42
+ block.call(self.new(io))
43
+ end
44
+ end
45
+
46
+ def foreach(filename, &block)
47
+ open(filename) do |mzml|
48
+ mzml.each(&block)
49
+ end
50
+ end
51
+
52
+ # unpack binary data based on an accesions. accessions must only
53
+ # respond to :include? So, hash keys, a set, or an array will all work.
54
+ def unpack_binary(base64string, accessions)
55
+ compressed =
56
+ if accessions.include?('MS:1000574') then true # zlib compression
57
+ elsif accessions.include?('MS:1000576') then false # no compression
58
+ else raise 'no compression info: check your MS accession numbers'
59
+ end
60
+ precision_unpack =
61
+ if accessions.include?('MS:1000523') then 'E*'
62
+ elsif accessions.include?('MS:1000521') then 'e*'
63
+ else raise 'unrecognized precision: check your MS accession numbers'
64
+ end
65
+ data = base64string.unpack("m*").first
66
+ unzipped = compressed ? Zlib::Inflate.inflate(data) : data
67
+ unzipped.unpack(precision_unpack)
68
+ end
69
+ end
70
+
71
+ # name can be :spectrum or :chromatogram
72
+ def get_xml_string(start_byte, name=:spectrum)
73
+ io.seek(start_byte)
74
+ data = []
75
+ regexp = %r{</#{name}>}
76
+ io.each_line do |line|
77
+ data << line
78
+ #unless (line.index('<binary') && line[-12..-1].include?('</binary>'))
79
+ break if regexp.match(line)
80
+ #end
81
+ end
82
+ data.join
83
+ end
84
+
85
+ def each_spectrum(&block)
86
+ (0...@index_list[:spectrum].size).each do |int|
87
+ block.call spectrum(int)
88
+ end
89
+ end
90
+
91
+ # returns the Nokogiri::XML::Node object associated with each spectrum
92
+ def each_spectrum_node(&block)
93
+ @index_list[:spectrum].each do |start_byte|
94
+ block.call spectrum_node_from_start_byte(start_byte)
95
+ end
96
+ end
97
+
98
+ alias_method :each, :each_spectrum
99
+
100
+ def spectrum_node_from_start_byte(start_byte)
101
+ xml = get_xml_string(start_byte, :spectrum)
102
+ doc = Nokogiri::XML.parse(xml, nil, @encoding, Parser::NOBLANKS)
103
+ doc.root
104
+ end
105
+
106
+ # @param [Object] arg an index number (Integer) or id string (String)
107
+ # @return [MS::Spectrum] a spectrum object
108
+ def spectrum(arg)
109
+ ################### trouble
110
+ start_byte = index_list[0].start_byte(arg)
111
+ data_arrays = spectrum_node_from_start_byte(start_byte).xpath('//binaryDataArray').map do |binary_data_array_n|
112
+ accessions = binary_data_array_n.xpath('./cvParam').map {|node| node['accession'] }
113
+ base64 = binary_data_array_n.xpath('./binary').text
114
+ MS::Mzml.unpack_binary(base64, accessions)
115
+ end
116
+ MS::Spectrum.new(data_arrays)
117
+ end
118
+
119
+ # returns the number of spectra
120
+ def size
121
+ @index_list[:spectrum].size
122
+ end
123
+
124
+ alias_method :'[]', :spectrum
125
+
126
+ # @param [Integer] scan_num the scan number
127
+ # @return [MS::Spectrum] a spectrum object, or nil if not found
128
+ # @raise [ScanNumbersNotUnique] if scan numbers are not unique
129
+ # @raise [ScanNumbersNotFound] if spectra exist but scan numbers were not
130
+ # found
131
+ def spectrum_from_scan_num(scan_num)
132
+ @scan_to_index ||= @index_list[0].create_scan_index
133
+ raise ScanNumbersNotUnique if @scan_to_index == false
134
+ raise ScanNumbersNotFound if @scan_to_index == nil
135
+ spectrum(@scan_to_index[scan_num])
136
+ end
137
+
138
+ # @return [MS::Mzml::IndexList] or nil if there is no indexList in the
139
+ # mzML
140
+ def read_index_list
141
+ if offset=MS::Mzml::Index.index_offset(@io)
142
+ @io.seek(offset)
143
+ xml = Nokogiri::XML.parse(@io.read, nil, @encoding, Parser::NOBLANKS)
144
+ index_list = xml.root
145
+ num_indices = index_list['count'].to_i
146
+ array = index_list.children.map do |index_n|
147
+ #index = Index.new(index_n['name'])
148
+ index = Index.new
149
+ index.name = index_n['name'].to_sym
150
+ ids = []
151
+ index_n.children.map do |offset_n|
152
+ index << offset_n.text.to_i
153
+ ids << offset_n['idRef']
154
+ end
155
+ index.ids = ids
156
+ index
157
+ end
158
+ IndexList.new(array)
159
+ end
160
+ end
161
+ # Reads through and captures start bytes
162
+ # @return [MS::Mzml::IndexList]
163
+ def create_index_list
164
+ indices_hash = @io.bookmark(true) do |inner_io| # sets to beginning of file
165
+ indices = {:spectrum => {}, :chromatogram => {}}
166
+ byte_total = 0
167
+ @io.each do |line|
168
+ if md=%r{<(spectrum|chromatogram).*?id=['"](.*?)['"][ >]}.match(line)
169
+ indices[md[1].to_sym][md[2]] = byte_total + md.pre_match.bytesize
170
+ end
171
+ byte_total += line.bytesize
172
+ end
173
+ indices
174
+ end
175
+
176
+ indices = indices_hash.map do |sym, hash|
177
+ indices = Index.new ; ids = []
178
+ hash.each {|id, startbyte| ids << id ; indices << startbyte }
179
+ indices.ids = ids ; indices.name = sym
180
+ indices
181
+ end
182
+ IndexList.new(indices)
183
+ end
184
+
185
+ # reads or creates an index list
186
+ # @return [Array] an array of indices
187
+ def get_index_list
188
+ read_index_list || create_index_list
189
+ end
190
+
191
+ class ScanNumbersNotUnique < Exception
192
+ end
193
+ class ScanNumbersNotFound < Exception
194
+ end
195
+ end
196
+ end
197
+
data/lib/ms/obo.rb ADDED
@@ -0,0 +1,38 @@
1
+
2
+ module MS
3
+ class OBO
4
+ attr_accessor :header
5
+ attr_accessor :elements
6
+
7
+ def initialize(file_or_io)
8
+ obo = Obo::Parser.new(file_or_io)
9
+ elements = obo.elements.to_a
10
+ header = elements.shift
11
+ end
12
+ # returns an id to name Hash
13
+ def id_to_name
14
+ @id_to_name ||= build_hash('id', 'name')
15
+ end
16
+ # returns a name to id Hash
17
+ def name_to_id
18
+ @name_to_id ||= build_hash('name', 'id')
19
+ end
20
+ def id_to_element
21
+ @id_to_element ||= build_hash('id', nil)
22
+ end
23
+
24
+ protected
25
+ def build_hash(key,val)
26
+ hash = {}
27
+ elements.each do |el|
28
+ tv = el.tagvalues
29
+ if val.nil?
30
+ hash[tv[key].first] = el
31
+ else
32
+ hash[tv[key].first] = tv[val].first
33
+ end
34
+ end
35
+ hash
36
+ end
37
+ end
38
+ end
data/lib/ms/plms1.rb ADDED
@@ -0,0 +1,156 @@
1
+
2
+ require 'write_file_or_string'
3
+ require 'ms/spectrum'
4
+ require 'stringio'
5
+ require 'openany'
6
+
7
+ module MS
8
+
9
+ =begin
10
+ # if given scans, will use those, or optionally takes a block where an
11
+ # array of ms1 scans are yielded and it expects Enumerable scans back.
12
+ def to_plms1(scans=nil)
13
+ times = []
14
+ scan_numbers = []
15
+ spectra = []
16
+
17
+ unless scans
18
+ scans = []
19
+ self.each(:ms_level => 1, :precursor => false) do |scan|
20
+ scans << scan
21
+ end
22
+ end
23
+
24
+ if block_given?
25
+ scans = yield(scans)
26
+ end
27
+
28
+ scans.each do |scan|
29
+ times << scan.time
30
+ scan_numbers << scan.num
31
+ spec = scan.spectrum
32
+ spectra << [spec.mzs.to_a, spec.intensities.to_a]
33
+ end
34
+ plms1 = Plms1.new
35
+ plms1.times = times
36
+ plms1.scan_numbers = scan_numbers
37
+ plms1.spectra = spectra
38
+ plms1
39
+ end
40
+ =end
41
+
42
+ # Prince Lab MS 1: a simple format for reading and writing
43
+ # MS1 level mass spec data
44
+ #
45
+ # see MS::Plms1::SPECIFICATION for the file specification
46
+ class Plms1
47
+ SPECIFICATION =<<-HERE
48
+ # The file format contains no newlines but is shown here broken into lines for
49
+ # clarity. Data should be little endian. Comments begin with '#' but are not
50
+ # part of the spec. Angled brackets '<>' indicate the data type and square
51
+ # brackets '[]' the name of the data. An ellipsis '...' represents a
52
+ # continuous array of data points.
53
+
54
+ <uint32>[Number of scans]
55
+ <uint32>[scan number] ... # array of scan numbers as uint32
56
+ <float64>[time point] ... # array of time points as double precision floats (in seconds)
57
+ # this is a repeating unit based on [Number of scans]:
58
+ <uint32>[Number of data rows] # almost always == 2 (m/z, intensity)
59
+ # this is a repeating unit based on [Number of data rows]
60
+ <uint32>[Number of data points]
61
+ <float64>[data point] ... # array of data points as double precision floats
62
+ HERE
63
+
64
+ # an array of scan numbers
65
+ attr_accessor :scan_numbers
66
+ # an array of time data
67
+ attr_accessor :times
68
+ # an array that contains spectrum objects
69
+ attr_accessor :spectra
70
+
71
+ def initialize(_scan_numbers=[], _times=[], _spectra=[])
72
+ (@scan_numbers, @times, @spectra) = [_scan_numbers, _times, _spectra]
73
+ end
74
+
75
+ # returns an array of Integers
76
+ def read_uint32(io, cnt=1)
77
+ io.read(cnt*4).unpack("V*")
78
+ end
79
+
80
+ # returns an array of Floats
81
+ def read_float64(io, cnt=1)
82
+ io.read(cnt*8).unpack("E*")
83
+ end
84
+
85
+ # returns self for chaining
86
+ def read(io_or_filename)
87
+ openany(io_or_filename) do |io|
88
+ num_scans = read_uint32(io)[0]
89
+ @scan_numbers = read_uint32(io, num_scans)
90
+ @times = read_float64(io, num_scans)
91
+ @spectra = num_scans.times.map do
92
+ data = read_uint32(io)[0].times.map do
93
+ read_float64(io, read_uint32(io)[0])
94
+ end
95
+ MS::Spectrum.new(data)
96
+ end
97
+ end
98
+ self
99
+ end
100
+
101
+ def write_uint32(out, data)
102
+ to_pack = data.is_a?(Array) ? data : [data]
103
+ out << to_pack.pack('V*')
104
+ end
105
+
106
+ def write_float64(out, data)
107
+ to_pack = data.is_a?(Array) ? data : [data]
108
+ out << to_pack.pack('E*')
109
+ end
110
+
111
+ # writes an ascii version of the format
112
+ # It is the same as the binary format, except a newline follows each
113
+ # length indicator or array of data. An empty line represents an empty
114
+ # array.
115
+ def write_ascii(filename=nil)
116
+ write_file_or_string(filename) do |out|
117
+ out.puts scan_numbers.size
118
+ out.puts scan_numbers.join(' ')
119
+ out.puts times.join(' ')
120
+ spectra.each do |spectrum|
121
+ out.puts spectrum.size
122
+ if spectrum.size > 0
123
+ out.puts spectrum.mzs.size
124
+ out.puts spectrum.mzs.join(' ')
125
+ out.puts spectrum.intensities.size
126
+ out.puts spectrum.intensities.join(' ')
127
+ end
128
+ end
129
+ end
130
+ end
131
+
132
+ # returns the string if no filename given
133
+ def write(filename=nil, ascii=false)
134
+ if ascii
135
+ write_ascii(filename)
136
+ else
137
+ write_file_or_string(filename) do |out|
138
+ write_uint32(out, spectra.size)
139
+ write_uint32(out, scan_numbers)
140
+ write_float64(out, times)
141
+ spectra.each do |spectrum|
142
+ write_uint32(out, spectrum.size) # number of rows
143
+ if spectrum.size > 0
144
+ mzs = spectrum.mzs
145
+ write_uint32(out, mzs.size)
146
+ write_float64(out, mzs)
147
+ intensities = spectrum.intensities
148
+ write_uint32(out, intensities.size)
149
+ write_float64(out, intensities)
150
+ end
151
+ end
152
+ end
153
+ end
154
+ end
155
+ end
156
+ end
@@ -0,0 +1,22 @@
1
+ require 'ms/quant/protein_group_comparison'
2
+
3
+ module Ms
4
+ module Quant
5
+ module ProteinGroupComparison
6
+ end
7
+ end
8
+ end
9
+
10
+ class Ms::Quant::ProteinGroupComparison::Qspec
11
+ include Ms::Quant::ProteinGroupComparison
12
+
13
+ attr_accessor :qspec_results_struct
14
+
15
+ # takes a protein group object, an array of experiment names and a qspec
16
+ # results struct
17
+ def initialize(protein_group, experiments, qspec_results_struct)
18
+ super(protein_group, experiments, qspec_results_struct.counts_array)
19
+ @qspec_results_struct = qspec_results_struct
20
+ end
21
+ end
22
+
@@ -0,0 +1,112 @@
1
+ module Ms ; end
2
+ module Ms::Quant ; end
3
+
4
+ class Ms::Quant::Qspec
5
+
6
+ # personal communication with Hyungwon Choi: "We typically use nburn=2000,
7
+ # niter=10000, which is quite sufficient to guarantee the reproducibility of
8
+ # results using the same data."
9
+ NBURNIN = 2000
10
+ NITER = 10000
11
+ INIT_HEADER = %w(protid protLen)
12
+ DELIMITER = "\t"
13
+
14
+ SUBMITTED_TO_QSPEC = 'submitted_to_qspec.txt'
15
+
16
+ # takes an ordered list of conditions ['cond1', 'cond1', 'cond2', 'cond2'] and
17
+ # returns an array of ints [0,0,0,1,1,1...]
18
+ def self.conditions_to_ints(conditions)
19
+ i = 0
20
+ current_condition = conditions.first
21
+ conditions.map do |cond|
22
+ if current_condition == cond ; i
23
+ else
24
+ i += 1
25
+ current_condition = cond
26
+ i
27
+ end
28
+ end
29
+ end
30
+
31
+ # returns an array of Results structs which is each row of the returned file
32
+ # works with V2 of QSpec
33
+ def self.results_array(resultsfile)
34
+ rows = IO.readlines(resultsfile).map {|line| line.chomp.split("\t") }
35
+ headers = rows.shift
36
+ start_bayes = headers.index {|v| v =~ /BayesFactor/i }
37
+ rows.map do |row|
38
+ data = [row[0]]
39
+ data.push( row[1...start_bayes].map(&:to_f) )
40
+ data.push( *row[start_bayes,4].map(&:to_f) )
41
+ data.push( row[start_bayes+4] )
42
+ Results.new(*data)
43
+ end
44
+ end
45
+
46
+ # returns the right executable based on the array of conditions
47
+ def self.executable(conditions)
48
+ biggest_size = conditions.group_by {|v| v }.values.map(&:size).max
49
+ (biggest_size >= 3) ? 'qspecgp' : 'qspec'
50
+ end
51
+
52
+ # protname_length_pairs is an array of doublets: [protname, length]
53
+ # condition_to_count_array is an array doublets: [condition, array_of_counts]
54
+ def initialize(protname_length_pairs, condition_to_count_array)
55
+ @protname_length_pairs = protname_length_pairs
56
+ @condition_to_count_array = condition_to_count_array
57
+ end
58
+
59
+ def conditions
60
+ @condition_to_count_array.map(&:first)
61
+ end
62
+
63
+ # writes a qspec formatted file to filename
64
+ def write(filename)
65
+ ints = Ms::Quant::Qspec.conditions_to_ints(conditions)
66
+ header_cats = INIT_HEADER + ints
67
+ rows = @protname_length_pairs.map {|pair| pair.map.to_a }
68
+ @condition_to_count_array.each do |cond,counts|
69
+ rows.zip(counts) {|row,cnt| row << cnt }
70
+ end
71
+ File.open(filename,'w') do |out|
72
+ out.puts header_cats.join(DELIMITER)
73
+ rows.each {|row| out.puts row.join(DELIMITER) }
74
+ end
75
+ end
76
+
77
+ # returns an array of Qspec::Results objects (each object can be considered
78
+ # a row of data)
79
+ def run(normalize=true, opts={})
80
+ puts "normalize: #{normalize}" if $VERBOSE
81
+ tfile = Tempfile.new("qspec")
82
+ write(tfile.path)
83
+ if opts[:keep]
84
+ local_file = File.join(Dir.pwd,File.basename(tfile.path))
85
+ FileUtils.cp(tfile.path, local_file, :verbose => $VERBOSE)
86
+ puts "(copy of) file submitted to qspec: #{local_file}" if $VERBOSE
87
+ end
88
+ qspec_exe = self.class.executable(conditions)
89
+ cmd = [qspec_exe, tfile.path, NBURNIN, NITER, (normalize ? 1 : 0)].join(' ')
90
+ if $VERBOSE
91
+ puts "running #{cmd}" if $VERBOSE
92
+ else
93
+ cmd << " 2>&1"
94
+ end
95
+ reply = `#{cmd}`
96
+ puts reply if $VERBOSE
97
+ outfile = tfile.path + '_' + qspec_exe
98
+ results = self.class.results_array(outfile)
99
+ if opts[:keep]
100
+ local_outfile = File.join(Dir.pwd, File.basename(outfile))
101
+ FileUtils.cp(outfile, local_outfile, :verbose => $VERBOSE)
102
+ puts "(copy of) file returned from qspec: #{outfile}"
103
+ end
104
+ tfile.unlink
105
+ results
106
+ end
107
+
108
+ # for version 2 of QSpec
109
+ # counts array is parallel to the experiment names passed in originally
110
+ Results = Struct.new(:protid, :counts_array, :bayes_factor, :fold_change, :rb_stat, :fdr, :flag)
111
+ end
112
+