mspire 0.5.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. data/README.rdoc +24 -0
  2. data/Rakefile +51 -0
  3. data/VERSION +1 -0
  4. data/lib/cv/description.rb +18 -0
  5. data/lib/cv/param.rb +33 -0
  6. data/lib/cv.rb +3 -0
  7. data/lib/io/bookmark.rb +13 -0
  8. data/lib/merge.rb +7 -0
  9. data/lib/ms/cvlist.rb +76 -0
  10. data/lib/ms/digester.rb +245 -0
  11. data/lib/ms/fasta.rb +86 -0
  12. data/lib/ms/ident/peptide/db.rb +243 -0
  13. data/lib/ms/ident/peptide.rb +72 -0
  14. data/lib/ms/ident/peptide_hit/qvalue.rb +56 -0
  15. data/lib/ms/ident/peptide_hit.rb +26 -0
  16. data/lib/ms/ident/pepxml/modifications.rb +83 -0
  17. data/lib/ms/ident/pepxml/msms_pipeline_analysis.rb +70 -0
  18. data/lib/ms/ident/pepxml/msms_run_summary.rb +82 -0
  19. data/lib/ms/ident/pepxml/parameters.rb +14 -0
  20. data/lib/ms/ident/pepxml/sample_enzyme.rb +165 -0
  21. data/lib/ms/ident/pepxml/search_database.rb +49 -0
  22. data/lib/ms/ident/pepxml/search_hit/modification_info.rb +79 -0
  23. data/lib/ms/ident/pepxml/search_hit.rb +144 -0
  24. data/lib/ms/ident/pepxml/search_result.rb +35 -0
  25. data/lib/ms/ident/pepxml/search_summary.rb +92 -0
  26. data/lib/ms/ident/pepxml/spectrum_query.rb +85 -0
  27. data/lib/ms/ident/pepxml.rb +112 -0
  28. data/lib/ms/ident/protein.rb +33 -0
  29. data/lib/ms/ident/protein_group.rb +80 -0
  30. data/lib/ms/ident/search.rb +114 -0
  31. data/lib/ms/ident.rb +37 -0
  32. data/lib/ms/isotope/aa.rb +59 -0
  33. data/lib/ms/mascot.rb +6 -0
  34. data/lib/ms/mass/aa.rb +79 -0
  35. data/lib/ms/mass.rb +55 -0
  36. data/lib/ms/mzml/index_list.rb +98 -0
  37. data/lib/ms/mzml/plms1.rb +34 -0
  38. data/lib/ms/mzml.rb +197 -0
  39. data/lib/ms/obo.rb +38 -0
  40. data/lib/ms/plms1.rb +156 -0
  41. data/lib/ms/quant/qspec/protein_group_comparison.rb +22 -0
  42. data/lib/ms/quant/qspec.rb +112 -0
  43. data/lib/ms/spectrum.rb +154 -8
  44. data/lib/ms.rb +3 -10
  45. data/lib/msplat.rb +2 -0
  46. data/lib/obo/ims.rb +5 -0
  47. data/lib/obo/ms.rb +7 -0
  48. data/lib/obo/ontology.rb +41 -0
  49. data/lib/obo/unit.rb +5 -0
  50. data/lib/openany.rb +23 -0
  51. data/lib/write_file_or_string.rb +18 -0
  52. data/obo/ims.obo +562 -0
  53. data/obo/ms.obo +11677 -0
  54. data/obo/unit.obo +2563 -0
  55. data/spec/ms/cvlist_spec.rb +60 -0
  56. data/spec/ms/digester_spec.rb +351 -0
  57. data/spec/ms/fasta_spec.rb +100 -0
  58. data/spec/ms/ident/peptide/db_spec.rb +108 -0
  59. data/spec/ms/ident/pepxml/sample_enzyme_spec.rb +181 -0
  60. data/spec/ms/ident/pepxml/search_hit/modification_info_spec.rb +37 -0
  61. data/spec/ms/ident/pepxml_spec.rb +442 -0
  62. data/spec/ms/ident/protein_group_spec.rb +68 -0
  63. data/spec/ms/mass_spec.rb +8 -0
  64. data/spec/ms/mzml/index_list_spec.rb +122 -0
  65. data/spec/ms/mzml/plms1_spec.rb +62 -0
  66. data/spec/ms/mzml_spec.rb +50 -0
  67. data/spec/ms/plms1_spec.rb +38 -0
  68. data/spec/ms/quant/qspec_spec.rb +25 -0
  69. data/spec/msplat_spec.rb +24 -0
  70. data/spec/obo_spec.rb +25 -0
  71. data/spec/spec_helper.rb +25 -0
  72. data/spec/testfiles/ms/ident/peptide/db/uni_11_sp_tr.fasta +69 -0
  73. data/spec/testfiles/ms/ident/peptide/db/uni_11_sp_tr.msd_clvg2.min_aaseq4.yml +728 -0
  74. data/spec/testfiles/ms/mzml/j24z.idx_comp.3.mzML +271 -0
  75. data/spec/testfiles/ms/mzml/openms.noidx_nocomp.12.mzML +330 -0
  76. data/spec/testfiles/ms/quant/kill_extra_tabs.rb +13 -0
  77. data/spec/testfiles/ms/quant/max_quant_output.provenance.txt +15 -0
  78. data/spec/testfiles/ms/quant/max_quant_output.txt +199 -0
  79. data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv +199 -0
  80. data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv_qspecgp +199 -0
  81. data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv_qspecgp.csv +199 -0
  82. data/spec/testfiles/ms/quant/pdcd5_final.txt +199 -0
  83. data/spec/testfiles/ms/quant/pdcd5_final.txt_qspecgp +0 -0
  84. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.CSV.csv +199 -0
  85. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.csv +199 -0
  86. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.csv +199 -0
  87. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv +199 -0
  88. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv_qspecgp +199 -0
  89. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv_qspecgp.csv +199 -0
  90. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.txt +199 -0
  91. data/spec/testfiles/ms/quant/pdcd5_lfq_tabdel.txt +134 -0
  92. data/spec/testfiles/ms/quant/pdcd5_lfq_tabdel.txt_qspecgp +134 -0
  93. data/spec/testfiles/ms/quant/remove_rest_of_proteins.rb +13 -0
  94. data/spec/testfiles/ms/quant/unlog_transform.rb +13 -0
  95. data/spec/testfiles/plms1/output.key +0 -0
  96. metadata +157 -40
  97. data/README +0 -77
  98. data/changelog.txt +0 -196
  99. data/lib/ms/calc.rb +0 -32
  100. data/lib/ms/data/interleaved.rb +0 -60
  101. data/lib/ms/data/lazy_io.rb +0 -73
  102. data/lib/ms/data/lazy_string.rb +0 -15
  103. data/lib/ms/data/simple.rb +0 -59
  104. data/lib/ms/data/transposed.rb +0 -41
  105. data/lib/ms/data.rb +0 -57
  106. data/lib/ms/format/format_error.rb +0 -12
  107. data/lib/ms/support/binary_search.rb +0 -126
@@ -0,0 +1,34 @@
1
+
2
+ require 'ms/plms1'
3
+
4
+ module MS
5
+ class Mzml
6
+ # will use scan numbers if use_scan_nums is true, otherwise it will use index
7
+ # numbers in place of scan nums
8
+ def to_plms1(use_scan_nums=true)
9
+ spectrum_index_list = self.index_list[:spectrum]
10
+ scan_nums =
11
+ if use_scan_nums
12
+ spectrum_index_list.create_scan_to_index.keys
13
+ else
14
+ (0...spectrum_index_list.size).to_a
15
+ end
16
+ retention_times = self.enum_for(:each_spectrum_node).map do |xml_node|
17
+ rt_xml_node=xml_node.xpath("scanList/scan/cvParam[@accession='MS:1000016']")[0]
18
+ raise 'no retention time xml node' unless rt_xml_node
19
+ retention_time = rt_xml_node['value'].to_f
20
+ case rt_xml_node['unitName']
21
+ when 'minute'
22
+ retention_time * 60
23
+ when 'second'
24
+ retention_time
25
+ else
26
+ raise 'retention time must be in minutes or seconds (or add some code to handle)'
27
+ end
28
+ end
29
+ # plms1 only requires that the obect respond to :each, giving a spectrum
30
+ # object, so an Mzml object will work.
31
+ MS::Plms1.new(scan_nums, retention_times, self)
32
+ end
33
+ end
34
+ end
data/lib/ms/mzml.rb ADDED
@@ -0,0 +1,197 @@
1
+ require 'nokogiri'
2
+ require 'io/bookmark'
3
+ require 'zlib'
4
+ require 'ms/mzml/index_list'
5
+ require 'ms/spectrum'
6
+
7
+ module MS
8
+ # MS::Mzml.open("somefile.mzML") do |mzml|
9
+ # mzml.each do |spectrum|
10
+ # scan = spectrum.scan
11
+ # spectrum.mzs # array of m/zs
12
+ # spectrum.intensities # array of intensities
13
+ # spectrum.peaks.each do |mz,intensity|
14
+ # puts "mz: #{mz} intensity: #{intensity}"
15
+ # end
16
+ # end
17
+ # end
18
+ class Mzml
19
+ module Parser
20
+ NOBLANKS = ::Nokogiri::XML::ParseOptions::DEFAULT_XML | ::Nokogiri::XML::ParseOptions::NOBLANKS
21
+ end
22
+ include Enumerable
23
+
24
+ attr_accessor :filename
25
+ attr_accessor :io
26
+ attr_accessor :index_list
27
+ attr_accessor :encoding
28
+
29
+ # io must respond_to?(:size), giving the size of the io object in bytes
30
+ # which allows seeking. #get_index_list is called to get or create the
31
+ # index list.
32
+ def initialize(io)
33
+ @io = io
34
+ @encoding = @io.bookmark(true) {|io| io.readline.match(/encoding=["'](.*?)["']/)[1] }
35
+ @index_list = get_index_list
36
+ end
37
+
38
+ class << self
39
+ # read-only right now
40
+ def open(filename, &block)
41
+ File.open(filename) do |io|
42
+ block.call(self.new(io))
43
+ end
44
+ end
45
+
46
+ def foreach(filename, &block)
47
+ open(filename) do |mzml|
48
+ mzml.each(&block)
49
+ end
50
+ end
51
+
52
+ # unpack binary data based on an accesions. accessions must only
53
+ # respond to :include? So, hash keys, a set, or an array will all work.
54
+ def unpack_binary(base64string, accessions)
55
+ compressed =
56
+ if accessions.include?('MS:1000574') then true # zlib compression
57
+ elsif accessions.include?('MS:1000576') then false # no compression
58
+ else raise 'no compression info: check your MS accession numbers'
59
+ end
60
+ precision_unpack =
61
+ if accessions.include?('MS:1000523') then 'E*'
62
+ elsif accessions.include?('MS:1000521') then 'e*'
63
+ else raise 'unrecognized precision: check your MS accession numbers'
64
+ end
65
+ data = base64string.unpack("m*").first
66
+ unzipped = compressed ? Zlib::Inflate.inflate(data) : data
67
+ unzipped.unpack(precision_unpack)
68
+ end
69
+ end
70
+
71
+ # name can be :spectrum or :chromatogram
72
+ def get_xml_string(start_byte, name=:spectrum)
73
+ io.seek(start_byte)
74
+ data = []
75
+ regexp = %r{</#{name}>}
76
+ io.each_line do |line|
77
+ data << line
78
+ #unless (line.index('<binary') && line[-12..-1].include?('</binary>'))
79
+ break if regexp.match(line)
80
+ #end
81
+ end
82
+ data.join
83
+ end
84
+
85
+ def each_spectrum(&block)
86
+ (0...@index_list[:spectrum].size).each do |int|
87
+ block.call spectrum(int)
88
+ end
89
+ end
90
+
91
+ # returns the Nokogiri::XML::Node object associated with each spectrum
92
+ def each_spectrum_node(&block)
93
+ @index_list[:spectrum].each do |start_byte|
94
+ block.call spectrum_node_from_start_byte(start_byte)
95
+ end
96
+ end
97
+
98
+ alias_method :each, :each_spectrum
99
+
100
+ def spectrum_node_from_start_byte(start_byte)
101
+ xml = get_xml_string(start_byte, :spectrum)
102
+ doc = Nokogiri::XML.parse(xml, nil, @encoding, Parser::NOBLANKS)
103
+ doc.root
104
+ end
105
+
106
+ # @param [Object] arg an index number (Integer) or id string (String)
107
+ # @return [MS::Spectrum] a spectrum object
108
+ def spectrum(arg)
109
+ ################### trouble
110
+ start_byte = index_list[0].start_byte(arg)
111
+ data_arrays = spectrum_node_from_start_byte(start_byte).xpath('//binaryDataArray').map do |binary_data_array_n|
112
+ accessions = binary_data_array_n.xpath('./cvParam').map {|node| node['accession'] }
113
+ base64 = binary_data_array_n.xpath('./binary').text
114
+ MS::Mzml.unpack_binary(base64, accessions)
115
+ end
116
+ MS::Spectrum.new(data_arrays)
117
+ end
118
+
119
+ # returns the number of spectra
120
+ def size
121
+ @index_list[:spectrum].size
122
+ end
123
+
124
+ alias_method :'[]', :spectrum
125
+
126
+ # @param [Integer] scan_num the scan number
127
+ # @return [MS::Spectrum] a spectrum object, or nil if not found
128
+ # @raise [ScanNumbersNotUnique] if scan numbers are not unique
129
+ # @raise [ScanNumbersNotFound] if spectra exist but scan numbers were not
130
+ # found
131
+ def spectrum_from_scan_num(scan_num)
132
+ @scan_to_index ||= @index_list[0].create_scan_index
133
+ raise ScanNumbersNotUnique if @scan_to_index == false
134
+ raise ScanNumbersNotFound if @scan_to_index == nil
135
+ spectrum(@scan_to_index[scan_num])
136
+ end
137
+
138
+ # @return [MS::Mzml::IndexList] or nil if there is no indexList in the
139
+ # mzML
140
+ def read_index_list
141
+ if offset=MS::Mzml::Index.index_offset(@io)
142
+ @io.seek(offset)
143
+ xml = Nokogiri::XML.parse(@io.read, nil, @encoding, Parser::NOBLANKS)
144
+ index_list = xml.root
145
+ num_indices = index_list['count'].to_i
146
+ array = index_list.children.map do |index_n|
147
+ #index = Index.new(index_n['name'])
148
+ index = Index.new
149
+ index.name = index_n['name'].to_sym
150
+ ids = []
151
+ index_n.children.map do |offset_n|
152
+ index << offset_n.text.to_i
153
+ ids << offset_n['idRef']
154
+ end
155
+ index.ids = ids
156
+ index
157
+ end
158
+ IndexList.new(array)
159
+ end
160
+ end
161
+ # Reads through and captures start bytes
162
+ # @return [MS::Mzml::IndexList]
163
+ def create_index_list
164
+ indices_hash = @io.bookmark(true) do |inner_io| # sets to beginning of file
165
+ indices = {:spectrum => {}, :chromatogram => {}}
166
+ byte_total = 0
167
+ @io.each do |line|
168
+ if md=%r{<(spectrum|chromatogram).*?id=['"](.*?)['"][ >]}.match(line)
169
+ indices[md[1].to_sym][md[2]] = byte_total + md.pre_match.bytesize
170
+ end
171
+ byte_total += line.bytesize
172
+ end
173
+ indices
174
+ end
175
+
176
+ indices = indices_hash.map do |sym, hash|
177
+ indices = Index.new ; ids = []
178
+ hash.each {|id, startbyte| ids << id ; indices << startbyte }
179
+ indices.ids = ids ; indices.name = sym
180
+ indices
181
+ end
182
+ IndexList.new(indices)
183
+ end
184
+
185
+ # reads or creates an index list
186
+ # @return [Array] an array of indices
187
+ def get_index_list
188
+ read_index_list || create_index_list
189
+ end
190
+
191
+ class ScanNumbersNotUnique < Exception
192
+ end
193
+ class ScanNumbersNotFound < Exception
194
+ end
195
+ end
196
+ end
197
+
data/lib/ms/obo.rb ADDED
@@ -0,0 +1,38 @@
1
+
2
+ module MS
3
+ class OBO
4
+ attr_accessor :header
5
+ attr_accessor :elements
6
+
7
+ def initialize(file_or_io)
8
+ obo = Obo::Parser.new(file_or_io)
9
+ elements = obo.elements.to_a
10
+ header = elements.shift
11
+ end
12
+ # returns an id to name Hash
13
+ def id_to_name
14
+ @id_to_name ||= build_hash('id', 'name')
15
+ end
16
+ # returns a name to id Hash
17
+ def name_to_id
18
+ @name_to_id ||= build_hash('name', 'id')
19
+ end
20
+ def id_to_element
21
+ @id_to_element ||= build_hash('id', nil)
22
+ end
23
+
24
+ protected
25
+ def build_hash(key,val)
26
+ hash = {}
27
+ elements.each do |el|
28
+ tv = el.tagvalues
29
+ if val.nil?
30
+ hash[tv[key].first] = el
31
+ else
32
+ hash[tv[key].first] = tv[val].first
33
+ end
34
+ end
35
+ hash
36
+ end
37
+ end
38
+ end
data/lib/ms/plms1.rb ADDED
@@ -0,0 +1,156 @@
1
+
2
+ require 'write_file_or_string'
3
+ require 'ms/spectrum'
4
+ require 'stringio'
5
+ require 'openany'
6
+
7
+ module MS
8
+
9
+ =begin
10
+ # if given scans, will use those, or optionally takes a block where an
11
+ # array of ms1 scans are yielded and it expects Enumerable scans back.
12
+ def to_plms1(scans=nil)
13
+ times = []
14
+ scan_numbers = []
15
+ spectra = []
16
+
17
+ unless scans
18
+ scans = []
19
+ self.each(:ms_level => 1, :precursor => false) do |scan|
20
+ scans << scan
21
+ end
22
+ end
23
+
24
+ if block_given?
25
+ scans = yield(scans)
26
+ end
27
+
28
+ scans.each do |scan|
29
+ times << scan.time
30
+ scan_numbers << scan.num
31
+ spec = scan.spectrum
32
+ spectra << [spec.mzs.to_a, spec.intensities.to_a]
33
+ end
34
+ plms1 = Plms1.new
35
+ plms1.times = times
36
+ plms1.scan_numbers = scan_numbers
37
+ plms1.spectra = spectra
38
+ plms1
39
+ end
40
+ =end
41
+
42
+ # Prince Lab MS 1: a simple format for reading and writing
43
+ # MS1 level mass spec data
44
+ #
45
+ # see MS::Plms1::SPECIFICATION for the file specification
46
+ class Plms1
47
+ SPECIFICATION =<<-HERE
48
+ # The file format contains no newlines but is shown here broken into lines for
49
+ # clarity. Data should be little endian. Comments begin with '#' but are not
50
+ # part of the spec. Angled brackets '<>' indicate the data type and square
51
+ # brackets '[]' the name of the data. An ellipsis '...' represents a
52
+ # continuous array of data points.
53
+
54
+ <uint32>[Number of scans]
55
+ <uint32>[scan number] ... # array of scan numbers as uint32
56
+ <float64>[time point] ... # array of time points as double precision floats (in seconds)
57
+ # this is a repeating unit based on [Number of scans]:
58
+ <uint32>[Number of data rows] # almost always == 2 (m/z, intensity)
59
+ # this is a repeating unit based on [Number of data rows]
60
+ <uint32>[Number of data points]
61
+ <float64>[data point] ... # array of data points as double precision floats
62
+ HERE
63
+
64
+ # an array of scan numbers
65
+ attr_accessor :scan_numbers
66
+ # an array of time data
67
+ attr_accessor :times
68
+ # an array that contains spectrum objects
69
+ attr_accessor :spectra
70
+
71
+ def initialize(_scan_numbers=[], _times=[], _spectra=[])
72
+ (@scan_numbers, @times, @spectra) = [_scan_numbers, _times, _spectra]
73
+ end
74
+
75
+ # returns an array of Integers
76
+ def read_uint32(io, cnt=1)
77
+ io.read(cnt*4).unpack("V*")
78
+ end
79
+
80
+ # returns an array of Floats
81
+ def read_float64(io, cnt=1)
82
+ io.read(cnt*8).unpack("E*")
83
+ end
84
+
85
+ # returns self for chaining
86
+ def read(io_or_filename)
87
+ openany(io_or_filename) do |io|
88
+ num_scans = read_uint32(io)[0]
89
+ @scan_numbers = read_uint32(io, num_scans)
90
+ @times = read_float64(io, num_scans)
91
+ @spectra = num_scans.times.map do
92
+ data = read_uint32(io)[0].times.map do
93
+ read_float64(io, read_uint32(io)[0])
94
+ end
95
+ MS::Spectrum.new(data)
96
+ end
97
+ end
98
+ self
99
+ end
100
+
101
+ def write_uint32(out, data)
102
+ to_pack = data.is_a?(Array) ? data : [data]
103
+ out << to_pack.pack('V*')
104
+ end
105
+
106
+ def write_float64(out, data)
107
+ to_pack = data.is_a?(Array) ? data : [data]
108
+ out << to_pack.pack('E*')
109
+ end
110
+
111
+ # writes an ascii version of the format
112
+ # It is the same as the binary format, except a newline follows each
113
+ # length indicator or array of data. An empty line represents an empty
114
+ # array.
115
+ def write_ascii(filename=nil)
116
+ write_file_or_string(filename) do |out|
117
+ out.puts scan_numbers.size
118
+ out.puts scan_numbers.join(' ')
119
+ out.puts times.join(' ')
120
+ spectra.each do |spectrum|
121
+ out.puts spectrum.size
122
+ if spectrum.size > 0
123
+ out.puts spectrum.mzs.size
124
+ out.puts spectrum.mzs.join(' ')
125
+ out.puts spectrum.intensities.size
126
+ out.puts spectrum.intensities.join(' ')
127
+ end
128
+ end
129
+ end
130
+ end
131
+
132
+ # returns the string if no filename given
133
+ def write(filename=nil, ascii=false)
134
+ if ascii
135
+ write_ascii(filename)
136
+ else
137
+ write_file_or_string(filename) do |out|
138
+ write_uint32(out, spectra.size)
139
+ write_uint32(out, scan_numbers)
140
+ write_float64(out, times)
141
+ spectra.each do |spectrum|
142
+ write_uint32(out, spectrum.size) # number of rows
143
+ if spectrum.size > 0
144
+ mzs = spectrum.mzs
145
+ write_uint32(out, mzs.size)
146
+ write_float64(out, mzs)
147
+ intensities = spectrum.intensities
148
+ write_uint32(out, intensities.size)
149
+ write_float64(out, intensities)
150
+ end
151
+ end
152
+ end
153
+ end
154
+ end
155
+ end
156
+ end
@@ -0,0 +1,22 @@
1
+ require 'ms/quant/protein_group_comparison'
2
+
3
+ module Ms
4
+ module Quant
5
+ module ProteinGroupComparison
6
+ end
7
+ end
8
+ end
9
+
10
+ class Ms::Quant::ProteinGroupComparison::Qspec
11
+ include Ms::Quant::ProteinGroupComparison
12
+
13
+ attr_accessor :qspec_results_struct
14
+
15
+ # takes a protein group object, an array of experiment names and a qspec
16
+ # results struct
17
+ def initialize(protein_group, experiments, qspec_results_struct)
18
+ super(protein_group, experiments, qspec_results_struct.counts_array)
19
+ @qspec_results_struct = qspec_results_struct
20
+ end
21
+ end
22
+
@@ -0,0 +1,112 @@
1
+ module Ms ; end
2
+ module Ms::Quant ; end
3
+
4
+ class Ms::Quant::Qspec
5
+
6
+ # personal communication with Hyungwon Choi: "We typically use nburn=2000,
7
+ # niter=10000, which is quite sufficient to guarantee the reproducibility of
8
+ # results using the same data."
9
+ NBURNIN = 2000
10
+ NITER = 10000
11
+ INIT_HEADER = %w(protid protLen)
12
+ DELIMITER = "\t"
13
+
14
+ SUBMITTED_TO_QSPEC = 'submitted_to_qspec.txt'
15
+
16
+ # takes an ordered list of conditions ['cond1', 'cond1', 'cond2', 'cond2'] and
17
+ # returns an array of ints [0,0,0,1,1,1...]
18
+ def self.conditions_to_ints(conditions)
19
+ i = 0
20
+ current_condition = conditions.first
21
+ conditions.map do |cond|
22
+ if current_condition == cond ; i
23
+ else
24
+ i += 1
25
+ current_condition = cond
26
+ i
27
+ end
28
+ end
29
+ end
30
+
31
+ # returns an array of Results structs which is each row of the returned file
32
+ # works with V2 of QSpec
33
+ def self.results_array(resultsfile)
34
+ rows = IO.readlines(resultsfile).map {|line| line.chomp.split("\t") }
35
+ headers = rows.shift
36
+ start_bayes = headers.index {|v| v =~ /BayesFactor/i }
37
+ rows.map do |row|
38
+ data = [row[0]]
39
+ data.push( row[1...start_bayes].map(&:to_f) )
40
+ data.push( *row[start_bayes,4].map(&:to_f) )
41
+ data.push( row[start_bayes+4] )
42
+ Results.new(*data)
43
+ end
44
+ end
45
+
46
+ # returns the right executable based on the array of conditions
47
+ def self.executable(conditions)
48
+ biggest_size = conditions.group_by {|v| v }.values.map(&:size).max
49
+ (biggest_size >= 3) ? 'qspecgp' : 'qspec'
50
+ end
51
+
52
+ # protname_length_pairs is an array of doublets: [protname, length]
53
+ # condition_to_count_array is an array doublets: [condition, array_of_counts]
54
+ def initialize(protname_length_pairs, condition_to_count_array)
55
+ @protname_length_pairs = protname_length_pairs
56
+ @condition_to_count_array = condition_to_count_array
57
+ end
58
+
59
+ def conditions
60
+ @condition_to_count_array.map(&:first)
61
+ end
62
+
63
+ # writes a qspec formatted file to filename
64
+ def write(filename)
65
+ ints = Ms::Quant::Qspec.conditions_to_ints(conditions)
66
+ header_cats = INIT_HEADER + ints
67
+ rows = @protname_length_pairs.map {|pair| pair.map.to_a }
68
+ @condition_to_count_array.each do |cond,counts|
69
+ rows.zip(counts) {|row,cnt| row << cnt }
70
+ end
71
+ File.open(filename,'w') do |out|
72
+ out.puts header_cats.join(DELIMITER)
73
+ rows.each {|row| out.puts row.join(DELIMITER) }
74
+ end
75
+ end
76
+
77
+ # returns an array of Qspec::Results objects (each object can be considered
78
+ # a row of data)
79
+ def run(normalize=true, opts={})
80
+ puts "normalize: #{normalize}" if $VERBOSE
81
+ tfile = Tempfile.new("qspec")
82
+ write(tfile.path)
83
+ if opts[:keep]
84
+ local_file = File.join(Dir.pwd,File.basename(tfile.path))
85
+ FileUtils.cp(tfile.path, local_file, :verbose => $VERBOSE)
86
+ puts "(copy of) file submitted to qspec: #{local_file}" if $VERBOSE
87
+ end
88
+ qspec_exe = self.class.executable(conditions)
89
+ cmd = [qspec_exe, tfile.path, NBURNIN, NITER, (normalize ? 1 : 0)].join(' ')
90
+ if $VERBOSE
91
+ puts "running #{cmd}" if $VERBOSE
92
+ else
93
+ cmd << " 2>&1"
94
+ end
95
+ reply = `#{cmd}`
96
+ puts reply if $VERBOSE
97
+ outfile = tfile.path + '_' + qspec_exe
98
+ results = self.class.results_array(outfile)
99
+ if opts[:keep]
100
+ local_outfile = File.join(Dir.pwd, File.basename(outfile))
101
+ FileUtils.cp(outfile, local_outfile, :verbose => $VERBOSE)
102
+ puts "(copy of) file returned from qspec: #{outfile}"
103
+ end
104
+ tfile.unlink
105
+ results
106
+ end
107
+
108
+ # for version 2 of QSpec
109
+ # counts array is parallel to the experiment names passed in originally
110
+ Results = Struct.new(:protid, :counts_array, :bayes_factor, :fold_change, :rb_stat, :fdr, :flag)
111
+ end
112
+