mspire 0.5.0 → 0.6.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (107) hide show
  1. data/README.rdoc +24 -0
  2. data/Rakefile +51 -0
  3. data/VERSION +1 -0
  4. data/lib/cv/description.rb +18 -0
  5. data/lib/cv/param.rb +33 -0
  6. data/lib/cv.rb +3 -0
  7. data/lib/io/bookmark.rb +13 -0
  8. data/lib/merge.rb +7 -0
  9. data/lib/ms/cvlist.rb +76 -0
  10. data/lib/ms/digester.rb +245 -0
  11. data/lib/ms/fasta.rb +86 -0
  12. data/lib/ms/ident/peptide/db.rb +243 -0
  13. data/lib/ms/ident/peptide.rb +72 -0
  14. data/lib/ms/ident/peptide_hit/qvalue.rb +56 -0
  15. data/lib/ms/ident/peptide_hit.rb +26 -0
  16. data/lib/ms/ident/pepxml/modifications.rb +83 -0
  17. data/lib/ms/ident/pepxml/msms_pipeline_analysis.rb +70 -0
  18. data/lib/ms/ident/pepxml/msms_run_summary.rb +82 -0
  19. data/lib/ms/ident/pepxml/parameters.rb +14 -0
  20. data/lib/ms/ident/pepxml/sample_enzyme.rb +165 -0
  21. data/lib/ms/ident/pepxml/search_database.rb +49 -0
  22. data/lib/ms/ident/pepxml/search_hit/modification_info.rb +79 -0
  23. data/lib/ms/ident/pepxml/search_hit.rb +144 -0
  24. data/lib/ms/ident/pepxml/search_result.rb +35 -0
  25. data/lib/ms/ident/pepxml/search_summary.rb +92 -0
  26. data/lib/ms/ident/pepxml/spectrum_query.rb +85 -0
  27. data/lib/ms/ident/pepxml.rb +112 -0
  28. data/lib/ms/ident/protein.rb +33 -0
  29. data/lib/ms/ident/protein_group.rb +80 -0
  30. data/lib/ms/ident/search.rb +114 -0
  31. data/lib/ms/ident.rb +37 -0
  32. data/lib/ms/isotope/aa.rb +59 -0
  33. data/lib/ms/mascot.rb +6 -0
  34. data/lib/ms/mass/aa.rb +79 -0
  35. data/lib/ms/mass.rb +55 -0
  36. data/lib/ms/mzml/index_list.rb +98 -0
  37. data/lib/ms/mzml/plms1.rb +34 -0
  38. data/lib/ms/mzml.rb +197 -0
  39. data/lib/ms/obo.rb +38 -0
  40. data/lib/ms/plms1.rb +156 -0
  41. data/lib/ms/quant/qspec/protein_group_comparison.rb +22 -0
  42. data/lib/ms/quant/qspec.rb +112 -0
  43. data/lib/ms/spectrum.rb +154 -8
  44. data/lib/ms.rb +3 -10
  45. data/lib/msplat.rb +2 -0
  46. data/lib/obo/ims.rb +5 -0
  47. data/lib/obo/ms.rb +7 -0
  48. data/lib/obo/ontology.rb +41 -0
  49. data/lib/obo/unit.rb +5 -0
  50. data/lib/openany.rb +23 -0
  51. data/lib/write_file_or_string.rb +18 -0
  52. data/obo/ims.obo +562 -0
  53. data/obo/ms.obo +11677 -0
  54. data/obo/unit.obo +2563 -0
  55. data/spec/ms/cvlist_spec.rb +60 -0
  56. data/spec/ms/digester_spec.rb +351 -0
  57. data/spec/ms/fasta_spec.rb +100 -0
  58. data/spec/ms/ident/peptide/db_spec.rb +108 -0
  59. data/spec/ms/ident/pepxml/sample_enzyme_spec.rb +181 -0
  60. data/spec/ms/ident/pepxml/search_hit/modification_info_spec.rb +37 -0
  61. data/spec/ms/ident/pepxml_spec.rb +442 -0
  62. data/spec/ms/ident/protein_group_spec.rb +68 -0
  63. data/spec/ms/mass_spec.rb +8 -0
  64. data/spec/ms/mzml/index_list_spec.rb +122 -0
  65. data/spec/ms/mzml/plms1_spec.rb +62 -0
  66. data/spec/ms/mzml_spec.rb +50 -0
  67. data/spec/ms/plms1_spec.rb +38 -0
  68. data/spec/ms/quant/qspec_spec.rb +25 -0
  69. data/spec/msplat_spec.rb +24 -0
  70. data/spec/obo_spec.rb +25 -0
  71. data/spec/spec_helper.rb +25 -0
  72. data/spec/testfiles/ms/ident/peptide/db/uni_11_sp_tr.fasta +69 -0
  73. data/spec/testfiles/ms/ident/peptide/db/uni_11_sp_tr.msd_clvg2.min_aaseq4.yml +728 -0
  74. data/spec/testfiles/ms/mzml/j24z.idx_comp.3.mzML +271 -0
  75. data/spec/testfiles/ms/mzml/openms.noidx_nocomp.12.mzML +330 -0
  76. data/spec/testfiles/ms/quant/kill_extra_tabs.rb +13 -0
  77. data/spec/testfiles/ms/quant/max_quant_output.provenance.txt +15 -0
  78. data/spec/testfiles/ms/quant/max_quant_output.txt +199 -0
  79. data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv +199 -0
  80. data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv_qspecgp +199 -0
  81. data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv_qspecgp.csv +199 -0
  82. data/spec/testfiles/ms/quant/pdcd5_final.txt +199 -0
  83. data/spec/testfiles/ms/quant/pdcd5_final.txt_qspecgp +0 -0
  84. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.CSV.csv +199 -0
  85. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.csv +199 -0
  86. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.csv +199 -0
  87. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv +199 -0
  88. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv_qspecgp +199 -0
  89. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv_qspecgp.csv +199 -0
  90. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.txt +199 -0
  91. data/spec/testfiles/ms/quant/pdcd5_lfq_tabdel.txt +134 -0
  92. data/spec/testfiles/ms/quant/pdcd5_lfq_tabdel.txt_qspecgp +134 -0
  93. data/spec/testfiles/ms/quant/remove_rest_of_proteins.rb +13 -0
  94. data/spec/testfiles/ms/quant/unlog_transform.rb +13 -0
  95. data/spec/testfiles/plms1/output.key +0 -0
  96. metadata +157 -40
  97. data/README +0 -77
  98. data/changelog.txt +0 -196
  99. data/lib/ms/calc.rb +0 -32
  100. data/lib/ms/data/interleaved.rb +0 -60
  101. data/lib/ms/data/lazy_io.rb +0 -73
  102. data/lib/ms/data/lazy_string.rb +0 -15
  103. data/lib/ms/data/simple.rb +0 -59
  104. data/lib/ms/data/transposed.rb +0 -41
  105. data/lib/ms/data.rb +0 -57
  106. data/lib/ms/format/format_error.rb +0 -12
  107. data/lib/ms/support/binary_search.rb +0 -126
@@ -0,0 +1,243 @@
1
+ require 'ms/digester'
2
+ require 'ms/fasta'
3
+ require 'optparse'
4
+
5
+ module MS ; end
6
+ module MS::Ident ; end
7
+ module MS::Ident::Peptide ; end
8
+
9
+ # the object itself is a modified Hash.
10
+ # It is initialized with the database file and a protein array can be
11
+ # retrieved with the #[] method given an amino acid sequence. All other
12
+ # methods are untested at this time and should be avoided!
13
+ class MS::Ident::Peptide::Db < Hash
14
+ MAX_NUM_AA_EXPANSION = 3
15
+
16
+ # the twenty standard amino acids
17
+ STANDARD_AA = %w(A C D E F G H I K L M N P Q R S T V W Y)
18
+
19
+ DEFAULT_PEPTIDE_CENTRIC_DB = {:missed_cleavages => 2, :min_length => 4, :enzyme => MS::Digester[:trypsin], :id_regexp => nil, :remove_digestion_file => true, :cleave_initiator_methionine => true, :expand_aa => {'X' => STANDARD_AA}}
20
+
21
+ PROTEIN_DELIMITER = "\t"
22
+ KEY_VALUE_DELIMITER = ": "
23
+
24
+ def self.cmdline(argv)
25
+
26
+ opt = {
27
+ :remove_digestion_file => true,
28
+ :enzyme => MS::Digester[:trypsin]
29
+ }
30
+ opts = OptionParser.new do |op|
31
+ op.banner = "usage: #{File.basename($0)} <file>.fasta ..."
32
+ op.separator "output: "
33
+ op.separator " <file>.msd_clvg<missed_cleavages>.min_aaseq<min_length>.yml"
34
+ op.separator "format:"
35
+ op.separator " PEPTIDE: ID1<tab>ID2<tab>ID3..."
36
+ op.separator ""
37
+ op.separator " Initiator Methionines - by default, will generate two peptides"
38
+ op.separator " for any peptide found at the N-termini starting with 'M'"
39
+ op.separator " (i.e., one with and one without the leading methionine)"
40
+ op.separator ""
41
+ op.on("--missed-cleavages <#{opt[:missed_cleavages]}>", Integer, "max num of missed cleavages") {|v| opt[:missed_cleavages] = v }
42
+ op.on("--min-length <#{opt[:min_length]}>", Integer, "the minimum peptide aaseq length") {|v| opt[:min_length] = v }
43
+ op.on("--no-cleaved-methionine", "does not cleave off initiator methionine") { opt[:cleave_initiator_methionine] = false }
44
+ op.on("--no-expand-x", "don't enumerate aa 'X' possibilities") { opt[:expand_aa] = nil }
45
+ op.on("-e", "--enzyme <name>", "enzyme for digestion") {|v| opt[:enzyme] = MS::Insilico::Digester.const_get(v.upcase) }
46
+ op.on("--list-enzymes", "lists approved enzymes and exits") do
47
+ puts MS::Digester::ENZYMES.keys.join("\n")
48
+ exit
49
+ end
50
+ end
51
+
52
+ opts.parse!(argv)
53
+
54
+ if argv.size == 0
55
+ puts opts || exit
56
+ end
57
+
58
+ argv.map do |file|
59
+ MS::Ident::Peptide::Db.peptide_centric_db(file, opt)
60
+ end
61
+ end
62
+
63
+ # writes a new file with the added 'min_aaseq<Integer>'
64
+ # creates a temporary digestion file that contains all peptides digesting
65
+ # with certain missed_cleavages (i.e., min_seq_length is not applied to
66
+ # this file but on the final peptide centric db)
67
+ # returns the full name of the written file.
68
+ def self.peptide_centric_db(fasta_file, opts={})
69
+ opts = DEFAULT_PEPTIDE_CENTRIC_DB.merge(opts)
70
+
71
+ (missed_cleavages, min_length, enzyme, id_regexp, remove_digestion_file, cleave_initiator_methionine, expand_aa) = opts.values_at(:missed_cleavages, :min_length, :enzyme, :id_regexp, :remove_digestion_file, :cleave_initiator_methionine, :expand_aa)
72
+ start_time = Time.now
73
+ print "Digesting #{fasta_file} ..." if $VERBOSE
74
+
75
+ if expand_aa
76
+ letters_to_expand_re = Regexp.new("[" << Regexp.escape(expand_aa.keys.join) << "]")
77
+ end
78
+
79
+ base = fasta_file.chomp(File.extname(fasta_file))
80
+ digestion_file = base + ".msd_clvg#{missed_cleavages}.peptides"
81
+ File.open(digestion_file, "w") do |fh|
82
+ MS::Fasta.open(fasta_file) do |fasta|
83
+ fasta.each do |prot|
84
+ peptides = enzyme.digest(prot.sequence, missed_cleavages)
85
+ if (cleave_initiator_methionine && (prot.sequence[0,1] == "M"))
86
+ m_peps = []
87
+ init_methionine_peps = []
88
+ peptides.each do |pep|
89
+ # if the peptide is at the beginning of the protein sequence
90
+ if prot.sequence[0,pep.size] == pep
91
+ m_peps << pep[1..-1]
92
+ end
93
+ end
94
+ peptides.push(*m_peps)
95
+ end
96
+ if expand_aa
97
+ peptides = peptides.map do |pep|
98
+ if pep =~ letters_to_expand_re
99
+ expand_peptides(pep, expand_aa)
100
+ else
101
+ pep
102
+ end
103
+ end.flatten
104
+ end
105
+ fh.puts( prot.header.split(/\s+/).first + "\t" + peptides.join(" ") )
106
+ end
107
+ end
108
+ end
109
+ puts "#{Time.now - start_time} sec" if $VERBOSE
110
+
111
+
112
+ start_time = Time.now
113
+ print "Organizing raw digestion #{digestion_file} ..." if $VERBOSE
114
+
115
+ hash = Hash.new {|h,k| h[k] = [] }
116
+ ::IO.foreach(digestion_file) do |line|
117
+ (prot, *peps) = line.chomp!.split(/\s+/)
118
+ # prot is something like this: "sp|P31946|1433B_HUMAN" in uniprot
119
+ peps.each do |pep|
120
+ if pep.size >= min_length
121
+ hash[pep] << prot
122
+ end
123
+ end
124
+ end
125
+ puts "#{Time.now - start_time} sec" if $VERBOSE
126
+
127
+ base = digestion_file.chomp(File.extname(digestion_file))
128
+ final_outfile = base + ".min_aaseq#{min_length}" + ".yml"
129
+
130
+ start_time = Time.now
131
+ print "Writing #{hash.size} peptides to #{} ..." if $VERBOSE
132
+
133
+ File.open(final_outfile, 'w') do |out|
134
+ hash.each do |k,v|
135
+ out.puts( [k, v.join(PROTEIN_DELIMITER)].join(KEY_VALUE_DELIMITER) )
136
+ end
137
+ end
138
+ puts "#{Time.now - start_time} sec" if $VERBOSE
139
+
140
+ if remove_digestion_file
141
+ File.unlink(digestion_file)
142
+ end
143
+ File.expand_path(final_outfile)
144
+ end
145
+
146
+ # does combinatorial expansion of all letters requesting it.
147
+ # expand_aa is hash like: {'X'=>STANDARD_AA}
148
+ # returns nil if there are more than MAX_NUM_AA_EXPANSION amino acids to
149
+ # be expanded
150
+ # returns an empty array if there is no expansion
151
+ def self.expand_peptides(peptide, expand_aa)
152
+ letters_in_order = expand_aa.keys.sort
153
+ index_and_key = []
154
+ peptide.split('').each_with_index do |char,i|
155
+ if let_index = letters_in_order.index(char)
156
+ index_and_key << [i, letters_in_order[let_index]]
157
+ end
158
+ end
159
+ if index_and_key.size > MAX_NUM_AA_EXPANSION
160
+ return nil
161
+ end
162
+ to_expand = [peptide]
163
+ index_and_key.each do |i,letter|
164
+ new_peps = []
165
+ while current_pep = to_expand.shift do
166
+ new_peps << expand_aa[letter].map {|v| dp = current_pep.dup ; dp[i] = v ; dp }
167
+ end
168
+ to_expand = new_peps.flatten
169
+ end
170
+ to_expand
171
+ end
172
+
173
+ def initialize(db_file)
174
+ self.replace(YAML.load_file(db_file))
175
+ end
176
+
177
+ alias_method :old_bracket, '[]'.to_sym
178
+
179
+ # returns the protein id's as an array
180
+ def [](key)
181
+ old_bracket(key).chomp.split(PROTEIN_DELIMITER)
182
+ end
183
+
184
+ # an object for on disk retrieval of db entries
185
+ # proteins are returned as an array.
186
+ # behaves much like a hash once it is opened.
187
+ class IO
188
+ include Enumerable
189
+ def self.open(filename, &block)
190
+ raise ArgumentError unless block
191
+ File.open(filename) do |io|
192
+ block.call(self.new(io))
193
+ end
194
+ end
195
+
196
+ attr_accessor :io
197
+ attr_accessor :index
198
+
199
+ def initialize(io)
200
+ @io = io
201
+ @index = {}
202
+ re = /^(\w+)#{Regexp.escape(KEY_VALUE_DELIMITER)}/
203
+ prev_io_pos = io.pos
204
+ triplets = io.each_line.map do |line|
205
+ key = re.match(line)[1]
206
+ [key, prev_io_pos + key.bytesize+KEY_VALUE_DELIMITER.bytesize, prev_io_pos=io.pos]
207
+ end
208
+ triplets.each do |key, start, end_pos|
209
+ @index[key] = [start, end_pos-start]
210
+ end
211
+ end
212
+
213
+ # returns an array of proteins for the given key (peptide aaseq)
214
+ def [](key)
215
+ (start, length) = @index[key]
216
+ return nil unless start
217
+ @io.seek(start)
218
+ string = @io.read(length)
219
+ string.chomp!
220
+ string.split("\t")
221
+ end
222
+
223
+ # number of entries
224
+ def size ; @index.size end
225
+ alias_method :length, :size
226
+
227
+ def keys
228
+ @index.keys
229
+ end
230
+
231
+ # all the protein lists
232
+ def values
233
+ keys.map {|key| self[key] }
234
+ end
235
+
236
+ # yields a pair of aaseq and protein array
237
+ def each(&block)
238
+ @index.each do |key, start_length|
239
+ block.call([key, self[key]])
240
+ end
241
+ end
242
+ end
243
+ end
@@ -0,0 +1,72 @@
1
+ module MS ; end
2
+ module MS::Ident ; end
3
+
4
+ # A 'sequence' is a notation of a peptide that includes the leading and
5
+ # trailing amino acid after cleavage (e.g., K.PEPTIDER.E or -.STARTK.L )
6
+ # and may contain post-translational modification information.
7
+ #
8
+ # 'aaseq' is the amino acid sequence of just the peptide with no leading or
9
+ # trailing notation (e.g., PEPTIDER or LAKKLY)
10
+ module MS::Ident::Peptide
11
+ Nonstandard_AA_re = /[^A-Z\.\-]/
12
+
13
+ class << self
14
+
15
+ # Takes a peptide sequence of the form '-.PEPTIDE.R', removes non-standard
16
+ # amino acids, and returns the center piece
17
+ def sequence_to_aaseq(sequence)
18
+ after_removed = remove_non_amino_acids(sequence)
19
+ pieces = after_removed.split('.')
20
+ case pieces.size
21
+ when 3
22
+ pieces[1]
23
+ when 2
24
+ if pieces[0].size > 1 ## N termini
25
+ pieces[0]
26
+ else ## C termini
27
+ pieces[1]
28
+ end
29
+ when 1 ## this must be a parse error!
30
+ pieces[0] ## which is the peptide itself
31
+ else
32
+ abort "bad peptide sequence: #{sequence.inspect}"
33
+ end
34
+ end
35
+
36
+ # removes non standard amino acids specified by Nonstandard_AA_re
37
+ def remove_non_amino_acids(sequence)
38
+ sequence.gsub(Nonstandard_AA_re, '')
39
+ end
40
+
41
+ # remove non amino acids and split the sequence
42
+ def prepare_sequence(sequence)
43
+ nv = remove_non_amino_acids(sequence)
44
+ split_sequence(nv)
45
+ end
46
+
47
+ # Returns prev, peptide, next from sequence. Parse errors return
48
+ # nil,nil,nil
49
+ # R.PEPTIDE.A # -> R, PEPTIDE, A
50
+ # R.PEPTIDE.- # -> R, PEPTIDE, -
51
+ # PEPTIDE.A # -> -, PEPTIDE, A
52
+ # A.PEPTIDE # -> A, PEPTIDE, -
53
+ # PEPTIDE # -> nil,nil,nil
54
+ def split_sequence(sequence)
55
+ pieces = sequence.split('.')
56
+ case pieces.size
57
+ when 3
58
+ pieces
59
+ when 2
60
+ if pieces[0].size > 1 ## N termini
61
+ ['-', pieces[0], pieces[1]]
62
+ else ## C termini
63
+ [pieces[0], pieces[1], '-']
64
+ end
65
+ when 1 ## this must be a parse error!
66
+ [nil,nil,nil]
67
+ when 0
68
+ [nil,nil,nil]
69
+ end
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,56 @@
1
+ require 'ms/ident/search'
2
+ require 'ms/ident/peptide_hit'
3
+
4
+ module MS ; end
5
+ module MS::Ident ; end
6
+
7
+ class MS::Ident::PeptideHit
8
+ module Qvalue
9
+ FILE_EXTENSION = '.phq.tsv'
10
+ FILE_DELIMITER = "\t"
11
+ HEADER = %w(run_id id aaseq charge qvalue)
12
+
13
+ class << self
14
+
15
+ # writes to the file, adding an extension
16
+ def to_phq(base, hits, qvalues=[])
17
+ to_file(base + FILE_EXTENSION, hits, qvalues)
18
+ end
19
+
20
+ # writes the peptide hits to a phq.tsv file. qvalues is a parallel array
21
+ # to hits that can provide qvalues if not inherent to the hits
22
+ # returns the filename.
23
+ def to_file(filename, hits, qvalues=[])
24
+ File.open(filename,'w') do |out|
25
+ out.puts HEADER.join(FILE_DELIMITER)
26
+ hits.zip(qvalues) do |hit, qvalue|
27
+ out.puts [hit.search.id, hit.id, hit.aaseq, hit.charge, qvalue || hit.qvalue].join(FILE_DELIMITER)
28
+ end
29
+ end
30
+ filename
31
+ end
32
+
33
+ # returns an array of PeptideHit objects from a phq.tsv
34
+ def from_file(filename)
35
+ searches = Hash.new {|h,id| h[id] = MS::Ident::Search.new(id) }
36
+ peptide_hits = []
37
+ File.open(filename) do |io|
38
+ header = io.readline.chomp.split(FILE_DELIMITER)
39
+ raise "bad headers" unless header == HEADER
40
+ io.each do |line|
41
+ line.chomp!
42
+ (run_id, id, aaseq, charge, qvalue) = line.split(FILE_DELIMITER)
43
+ ph = MS::Ident::PeptideHit.new
44
+ ph.search = searches[run_id]
45
+ ph.id = id; ph.aaseq = aaseq ; ph.charge = charge.to_i ; ph.qvalue = qvalue.to_f
46
+ peptide_hits << ph
47
+ end
48
+ end
49
+ peptide_hits
50
+ end
51
+
52
+ alias_method :from_phq, :from_file
53
+
54
+ end
55
+ end # Qvalue
56
+ end # Peptide Hit
@@ -0,0 +1,26 @@
1
+ require 'merge'
2
+
3
+ module MS ; end
4
+ module MS::Ident ; end
5
+
6
+ module MS::Ident::PeptideHitLike
7
+ attr_accessor :id
8
+ attr_accessor :search
9
+ attr_accessor :missed_cleavages
10
+ attr_accessor :aaseq
11
+ attr_accessor :charge
12
+ # an array of MS::Ident::ProteinLike objects
13
+ attr_accessor :proteins
14
+ # relative to the set the hit is contained in!
15
+ attr_accessor :qvalue
16
+ end
17
+
18
+ class MS::Ident::PeptideHit
19
+ include MS::Ident::PeptideHitLike
20
+ include Merge
21
+
22
+ def initialize(hash)
23
+ merge!(hash)
24
+ end
25
+ end
26
+
@@ -0,0 +1,83 @@
1
+ require 'merge'
2
+ require 'nokogiri'
3
+
4
+ module MS ; end
5
+ module MS::Ident ; end
6
+ class MS::Ident::Pepxml ; end
7
+
8
+ # Modified aminoacid, static or variable
9
+ # unless otherwise stated, all attributes can be anything
10
+ class MS::Ident::Pepxml::AminoacidModification
11
+ include Merge
12
+ # The amino acid (one letter code)
13
+ attr_accessor :aminoacid
14
+ # Mass difference with respect to unmodified aminoacid, as a Float
15
+ attr_accessor :massdiff
16
+ # Mass of modified aminoacid, Float
17
+ attr_accessor :mass
18
+ # Y if both modified and unmodified aminoacid could be present in the
19
+ # dataset, N if only modified aminoacid can be present
20
+ attr_accessor :variable
21
+ # whether modification can reside only at protein terminus (specified 'n',
22
+ # 'c', or 'nc')
23
+ attr_accessor :peptide_terminus
24
+ # Symbol used by search engine to designate this modification
25
+ attr_accessor :symbol
26
+ # 'Y' if each peptide must have only modified or unmodified aminoacid, 'N' if a
27
+ # peptide may contain both modified and unmodified aminoacid
28
+ attr_accessor :binary
29
+
30
+ def initialize(hash={})
31
+ merge!(hash)
32
+ end
33
+
34
+ # returns the builder or an xml string if no builder supplied
35
+ def to_xml(builder=nil)
36
+ xmlb = builder || Nokogiri::XML::Builder.new
37
+ # note massdiff: must begin with either + (nonnegative) or - [e.g.
38
+ # +1.05446 or -2.3342] consider Numeric#to_plus_minus_string in
39
+ # MS::Ident::Pepxml
40
+ attrs = [:aminoacid, :massdiff, :mass, :variable, :peptide_terminus, :symbol, :binary].map {|at| v=send(at) ; [at,v] if v }.compact
41
+ hash = Hash[attrs]
42
+ hash[:massdiff] = hash[:massdiff].to_plus_minus_string
43
+ xmlb.aminoacid_modification(hash)
44
+ builder || xmlb.doc.root.to_xml
45
+ end
46
+ end
47
+
48
+ # Modified aminoacid, static or variable
49
+ class MS::Ident::Pepxml::TerminalModification
50
+ include Merge
51
+ # n for N-terminus, c for C-terminus
52
+ attr_accessor :terminus
53
+ # Mass difference with respect to unmodified terminus
54
+ attr_accessor :massdiff
55
+ # Mass of modified terminus
56
+ attr_accessor :mass
57
+ # Y if both modified and unmodified terminus could be present in the
58
+ # dataset, N if only modified terminus can be present
59
+ attr_accessor :variable
60
+ # MSial symbol used by search engine to designate this modification
61
+ attr_accessor :symbol
62
+ # whether modification can reside only at protein terminus (specified n or
63
+ # c)
64
+ attr_accessor :protein_terminus
65
+ attr_accessor :description
66
+
67
+ def initialize(hash={})
68
+ hash.each {|k,v| send("#{k}=", v) }
69
+ end
70
+
71
+ # returns the builder or an xml string if no builder supplied
72
+ def to_xml(builder=nil)
73
+ xmlb = builder || Nokogiri::XML::Builder.new
74
+ #short_element_xml_from_instance_vars("terminal_modification")
75
+ attrs = [:terminus, :massdiff, :mass, :variable, :protein_terminus, :description].map {|at| v=send(at) ; [at,v] if v }
76
+ hash = Hash[attrs]
77
+ hash[:massdiff] = hash[:massdiff].to_plus_minus_string
78
+ xmlb.terminal_modification(hash)
79
+ builder || xmlb.doc.root.to_xml
80
+ end
81
+ end
82
+
83
+
@@ -0,0 +1,70 @@
1
+ require 'merge'
2
+
3
+ require 'ms/ident/pepxml/msms_run_summary'
4
+
5
+ module MS ; end
6
+ module MS::Ident ; end
7
+ class MS::Ident::Pepxml; end
8
+
9
+ class MS::Ident::Pepxml::MsmsPipelineAnalysis
10
+ include Merge
11
+ XMLNS = "http://regis-web.systemsbiology.net/pepXML"
12
+ XMLNS_XSI = "http://www.w3.org/2001/XMLSchema-instance"
13
+ # (this doesn't actually exist), also, the space is supposed to be there
14
+ XSI_SCHEMA_LOCATION_BASE = "http://regis-web.systemsbiology.net/pepXML /tools/bin/TPP/tpp/schema/pepXML_v"
15
+ # the only additions concerning a writer are from v18 are to the 'spectrum': retention_time_sec and activationMethodType
16
+ PEPXML_VERSION = 115
17
+
18
+ #include SpecIDXML
19
+ # Version 1.2.3
20
+ #attr_writer :date
21
+ #attr_writer :xmlns, :xmlns_xsi, :xsi_schemaLocation
22
+ #attr_accessor :summary_xml
23
+
24
+ attr_accessor :xmlns
25
+ attr_accessor :xmlns_xsi
26
+ attr_accessor :xsi_schema_location
27
+ # an Integer
28
+ attr_accessor :pepxml_version
29
+ # self referential path to the outputfile
30
+ attr_accessor :summary_xml
31
+ attr_accessor :msms_run_summary
32
+ attr_writer :date
33
+
34
+ def block_arg
35
+ @msms_run_summary = MS::Ident::Pepxml::MsmsRunSummary.new
36
+ end
37
+
38
+ # if block given, yields a new msms_run_summary to return value of block
39
+ def initialize(hash={}, &block)
40
+ @xmlns = XMLNS
41
+ @xmlns_xsi = XMLNS_XSI
42
+ @xsi_schema_location = xsi_schema_location
43
+ @pepxml_version = PEPXML_VERSION
44
+ merge!(hash, &block)
45
+ end
46
+
47
+ # returns the location based on the pepxml version number
48
+ def xsi_schema_location
49
+ XSI_SCHEMA_LOCATION_BASE + pepxml_version.to_s + '.xsd'
50
+ end
51
+
52
+ # if no date string given, then it will set to Time.now
53
+ def date
54
+ return @date if @date
55
+ tarr = Time.now.to_a
56
+ tarr[3..5].reverse.join('-') + "T#{tarr[0..2].reverse.join(':')}"
57
+ end
58
+
59
+ # uses the filename as summary_xml (if it is nil) attribute and builds a complete, valid xml document,
60
+ # writing it to the filename
61
+ def to_xml(builder)
62
+ xmlb = builder || Nokogiri::XML::Builder.new
63
+ xmlb.msms_pipeline_analysis(:date => date, :xmlns => xmlns, 'xsi:schemaLocation'.to_sym => xsi_schema_location, :summary_xml => summary_xml) do |xmlb|
64
+ msms_run_summary.to_xml(xmlb) if msms_run_summary
65
+ end
66
+ builder || xmlb.doc.root.to_xml
67
+ end
68
+ end
69
+
70
+
@@ -0,0 +1,82 @@
1
+ require 'merge'
2
+ require 'nokogiri'
3
+
4
+ require 'ms/ident/pepxml/sample_enzyme'
5
+ require 'ms/ident/pepxml/search_summary'
6
+ require 'ms/ident/pepxml/spectrum_query'
7
+
8
+ module MS ; end
9
+ module MS::Ident ; end
10
+ class MS::Ident::Pepxml; end
11
+
12
+ class MS::Ident::Pepxml::MsmsRunSummary
13
+ include Merge
14
+ # The name of the pep xml file without any extension
15
+ attr_accessor :base_name
16
+ # The name of the mass spec manufacturer
17
+ attr_accessor :ms_manufacturer
18
+ attr_accessor :ms_model
19
+ attr_accessor :ms_mass_analyzer
20
+ attr_accessor :ms_detector
21
+ attr_accessor :raw_data_type
22
+ attr_accessor :raw_data
23
+ attr_accessor :ms_ionization
24
+ attr_accessor :pepxml_version
25
+
26
+ # A SampleEnzyme object (responds to: name, cut, no_cut, sense)
27
+ attr_accessor :sample_enzyme
28
+ # A SearchSummary object
29
+ attr_accessor :search_summary
30
+ # An array of spectrum_queries
31
+ attr_accessor :spectrum_queries
32
+
33
+ def block_arg
34
+ [@sample_enzyme = MS::Ident::Pepxml::SampleEnzyme.new,
35
+ @search_summary = MS::Ident::Pepxml::SearchSummary.new,
36
+ @spectrum_queries ]
37
+ end
38
+
39
+ # takes a hash of name, value pairs
40
+ # if block given, yields a SampleEnzyme object, a SearchSummary and an array
41
+ # for SpectrumQueries
42
+ def initialize(hash={}, &block)
43
+ @spectrum_queries = []
44
+ merge!(hash, &block)
45
+ block.call(block_arg) if block
46
+ end
47
+
48
+ # optionally takes an xml builder object and returns the builder, or the xml
49
+ # string if no builder was given
50
+ # sets the index attribute of each spectrum query if it is not already set
51
+ def to_xml(builder=nil)
52
+ xmlb = builder || Nokogiri::XML::Builder.new
53
+ hash = {:base_name => base_name, :msManufacturer => ms_manufacturer, :msModel => ms_model, :msIonization => ms_ionization, :msMassAnalyzer => ms_mass_analyzer, :msDetector => ms_detector, :raw_data_type => raw_data_type, :raw_data => raw_data}
54
+ hash.each {|k,v| hash.delete(k) unless v }
55
+ xmlb.msms_run_summary(hash) do |xmlb|
56
+ sample_enzyme.to_xml(xmlb) if sample_enzyme
57
+ search_summary.to_xml(xmlb) if search_summary
58
+ spectrum_queries.each_with_index do |sq,i|
59
+ sq.index = i+1 unless sq.index
60
+ sq.to_xml(xmlb)
61
+ end
62
+ end
63
+ builder || xmlb.doc.root.to_xml
64
+ end
65
+
66
+ def self.from_pepxml_node(node)
67
+ self.new.from_pepxml_node(node)
68
+ end
69
+
70
+ # peps correspond to search_results
71
+ def from_pepxml_node(node)
72
+ @base_name = node['base_name']
73
+ @ms_manufacturer = node['msManufacturer']
74
+ @ms_model = node['msModel']
75
+ @ms_manufacturer = node['msIonization']
76
+ @ms_mass_analyzer = node['msMassAnalyzer']
77
+ @ms_detector = node['msDetector']
78
+ @raw_data_type = node['raw_data_type']
79
+ @raw_data = node['raw_data']
80
+ self
81
+ end
82
+ end
@@ -0,0 +1,14 @@
1
+ module MS
2
+ module Ident
3
+ class Pepxml
4
+ class Parameters < Hash
5
+ def to_xml(builder)
6
+ self.each do |k,v|
7
+ builder.parameter(:name => k, :value => v)
8
+ end
9
+ end
10
+ end
11
+ end
12
+ end
13
+ end
14
+