mspire 0.5.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. data/README.rdoc +24 -0
  2. data/Rakefile +51 -0
  3. data/VERSION +1 -0
  4. data/lib/cv/description.rb +18 -0
  5. data/lib/cv/param.rb +33 -0
  6. data/lib/cv.rb +3 -0
  7. data/lib/io/bookmark.rb +13 -0
  8. data/lib/merge.rb +7 -0
  9. data/lib/ms/cvlist.rb +76 -0
  10. data/lib/ms/digester.rb +245 -0
  11. data/lib/ms/fasta.rb +86 -0
  12. data/lib/ms/ident/peptide/db.rb +243 -0
  13. data/lib/ms/ident/peptide.rb +72 -0
  14. data/lib/ms/ident/peptide_hit/qvalue.rb +56 -0
  15. data/lib/ms/ident/peptide_hit.rb +26 -0
  16. data/lib/ms/ident/pepxml/modifications.rb +83 -0
  17. data/lib/ms/ident/pepxml/msms_pipeline_analysis.rb +70 -0
  18. data/lib/ms/ident/pepxml/msms_run_summary.rb +82 -0
  19. data/lib/ms/ident/pepxml/parameters.rb +14 -0
  20. data/lib/ms/ident/pepxml/sample_enzyme.rb +165 -0
  21. data/lib/ms/ident/pepxml/search_database.rb +49 -0
  22. data/lib/ms/ident/pepxml/search_hit/modification_info.rb +79 -0
  23. data/lib/ms/ident/pepxml/search_hit.rb +144 -0
  24. data/lib/ms/ident/pepxml/search_result.rb +35 -0
  25. data/lib/ms/ident/pepxml/search_summary.rb +92 -0
  26. data/lib/ms/ident/pepxml/spectrum_query.rb +85 -0
  27. data/lib/ms/ident/pepxml.rb +112 -0
  28. data/lib/ms/ident/protein.rb +33 -0
  29. data/lib/ms/ident/protein_group.rb +80 -0
  30. data/lib/ms/ident/search.rb +114 -0
  31. data/lib/ms/ident.rb +37 -0
  32. data/lib/ms/isotope/aa.rb +59 -0
  33. data/lib/ms/mascot.rb +6 -0
  34. data/lib/ms/mass/aa.rb +79 -0
  35. data/lib/ms/mass.rb +55 -0
  36. data/lib/ms/mzml/index_list.rb +98 -0
  37. data/lib/ms/mzml/plms1.rb +34 -0
  38. data/lib/ms/mzml.rb +197 -0
  39. data/lib/ms/obo.rb +38 -0
  40. data/lib/ms/plms1.rb +156 -0
  41. data/lib/ms/quant/qspec/protein_group_comparison.rb +22 -0
  42. data/lib/ms/quant/qspec.rb +112 -0
  43. data/lib/ms/spectrum.rb +154 -8
  44. data/lib/ms.rb +3 -10
  45. data/lib/msplat.rb +2 -0
  46. data/lib/obo/ims.rb +5 -0
  47. data/lib/obo/ms.rb +7 -0
  48. data/lib/obo/ontology.rb +41 -0
  49. data/lib/obo/unit.rb +5 -0
  50. data/lib/openany.rb +23 -0
  51. data/lib/write_file_or_string.rb +18 -0
  52. data/obo/ims.obo +562 -0
  53. data/obo/ms.obo +11677 -0
  54. data/obo/unit.obo +2563 -0
  55. data/spec/ms/cvlist_spec.rb +60 -0
  56. data/spec/ms/digester_spec.rb +351 -0
  57. data/spec/ms/fasta_spec.rb +100 -0
  58. data/spec/ms/ident/peptide/db_spec.rb +108 -0
  59. data/spec/ms/ident/pepxml/sample_enzyme_spec.rb +181 -0
  60. data/spec/ms/ident/pepxml/search_hit/modification_info_spec.rb +37 -0
  61. data/spec/ms/ident/pepxml_spec.rb +442 -0
  62. data/spec/ms/ident/protein_group_spec.rb +68 -0
  63. data/spec/ms/mass_spec.rb +8 -0
  64. data/spec/ms/mzml/index_list_spec.rb +122 -0
  65. data/spec/ms/mzml/plms1_spec.rb +62 -0
  66. data/spec/ms/mzml_spec.rb +50 -0
  67. data/spec/ms/plms1_spec.rb +38 -0
  68. data/spec/ms/quant/qspec_spec.rb +25 -0
  69. data/spec/msplat_spec.rb +24 -0
  70. data/spec/obo_spec.rb +25 -0
  71. data/spec/spec_helper.rb +25 -0
  72. data/spec/testfiles/ms/ident/peptide/db/uni_11_sp_tr.fasta +69 -0
  73. data/spec/testfiles/ms/ident/peptide/db/uni_11_sp_tr.msd_clvg2.min_aaseq4.yml +728 -0
  74. data/spec/testfiles/ms/mzml/j24z.idx_comp.3.mzML +271 -0
  75. data/spec/testfiles/ms/mzml/openms.noidx_nocomp.12.mzML +330 -0
  76. data/spec/testfiles/ms/quant/kill_extra_tabs.rb +13 -0
  77. data/spec/testfiles/ms/quant/max_quant_output.provenance.txt +15 -0
  78. data/spec/testfiles/ms/quant/max_quant_output.txt +199 -0
  79. data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv +199 -0
  80. data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv_qspecgp +199 -0
  81. data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv_qspecgp.csv +199 -0
  82. data/spec/testfiles/ms/quant/pdcd5_final.txt +199 -0
  83. data/spec/testfiles/ms/quant/pdcd5_final.txt_qspecgp +0 -0
  84. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.CSV.csv +199 -0
  85. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.csv +199 -0
  86. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.csv +199 -0
  87. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv +199 -0
  88. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv_qspecgp +199 -0
  89. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv_qspecgp.csv +199 -0
  90. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.txt +199 -0
  91. data/spec/testfiles/ms/quant/pdcd5_lfq_tabdel.txt +134 -0
  92. data/spec/testfiles/ms/quant/pdcd5_lfq_tabdel.txt_qspecgp +134 -0
  93. data/spec/testfiles/ms/quant/remove_rest_of_proteins.rb +13 -0
  94. data/spec/testfiles/ms/quant/unlog_transform.rb +13 -0
  95. data/spec/testfiles/plms1/output.key +0 -0
  96. metadata +157 -40
  97. data/README +0 -77
  98. data/changelog.txt +0 -196
  99. data/lib/ms/calc.rb +0 -32
  100. data/lib/ms/data/interleaved.rb +0 -60
  101. data/lib/ms/data/lazy_io.rb +0 -73
  102. data/lib/ms/data/lazy_string.rb +0 -15
  103. data/lib/ms/data/simple.rb +0 -59
  104. data/lib/ms/data/transposed.rb +0 -41
  105. data/lib/ms/data.rb +0 -57
  106. data/lib/ms/format/format_error.rb +0 -12
  107. data/lib/ms/support/binary_search.rb +0 -126
@@ -0,0 +1,243 @@
1
+ require 'ms/digester'
2
+ require 'ms/fasta'
3
+ require 'optparse'
4
+
5
+ module MS ; end
6
+ module MS::Ident ; end
7
+ module MS::Ident::Peptide ; end
8
+
9
+ # the object itself is a modified Hash.
10
+ # It is initialized with the database file and a protein array can be
11
+ # retrieved with the #[] method given an amino acid sequence. All other
12
+ # methods are untested at this time and should be avoided!
13
+ class MS::Ident::Peptide::Db < Hash
14
+ MAX_NUM_AA_EXPANSION = 3
15
+
16
+ # the twenty standard amino acids
17
+ STANDARD_AA = %w(A C D E F G H I K L M N P Q R S T V W Y)
18
+
19
+ DEFAULT_PEPTIDE_CENTRIC_DB = {:missed_cleavages => 2, :min_length => 4, :enzyme => MS::Digester[:trypsin], :id_regexp => nil, :remove_digestion_file => true, :cleave_initiator_methionine => true, :expand_aa => {'X' => STANDARD_AA}}
20
+
21
+ PROTEIN_DELIMITER = "\t"
22
+ KEY_VALUE_DELIMITER = ": "
23
+
24
+ def self.cmdline(argv)
25
+
26
+ opt = {
27
+ :remove_digestion_file => true,
28
+ :enzyme => MS::Digester[:trypsin]
29
+ }
30
+ opts = OptionParser.new do |op|
31
+ op.banner = "usage: #{File.basename($0)} <file>.fasta ..."
32
+ op.separator "output: "
33
+ op.separator " <file>.msd_clvg<missed_cleavages>.min_aaseq<min_length>.yml"
34
+ op.separator "format:"
35
+ op.separator " PEPTIDE: ID1<tab>ID2<tab>ID3..."
36
+ op.separator ""
37
+ op.separator " Initiator Methionines - by default, will generate two peptides"
38
+ op.separator " for any peptide found at the N-termini starting with 'M'"
39
+ op.separator " (i.e., one with and one without the leading methionine)"
40
+ op.separator ""
41
+ op.on("--missed-cleavages <#{opt[:missed_cleavages]}>", Integer, "max num of missed cleavages") {|v| opt[:missed_cleavages] = v }
42
+ op.on("--min-length <#{opt[:min_length]}>", Integer, "the minimum peptide aaseq length") {|v| opt[:min_length] = v }
43
+ op.on("--no-cleaved-methionine", "does not cleave off initiator methionine") { opt[:cleave_initiator_methionine] = false }
44
+ op.on("--no-expand-x", "don't enumerate aa 'X' possibilities") { opt[:expand_aa] = nil }
45
+ op.on("-e", "--enzyme <name>", "enzyme for digestion") {|v| opt[:enzyme] = MS::Insilico::Digester.const_get(v.upcase) }
46
+ op.on("--list-enzymes", "lists approved enzymes and exits") do
47
+ puts MS::Digester::ENZYMES.keys.join("\n")
48
+ exit
49
+ end
50
+ end
51
+
52
+ opts.parse!(argv)
53
+
54
+ if argv.size == 0
55
+ puts opts || exit
56
+ end
57
+
58
+ argv.map do |file|
59
+ MS::Ident::Peptide::Db.peptide_centric_db(file, opt)
60
+ end
61
+ end
62
+
63
+ # writes a new file with the added 'min_aaseq<Integer>'
64
+ # creates a temporary digestion file that contains all peptides digesting
65
+ # with certain missed_cleavages (i.e., min_seq_length is not applied to
66
+ # this file but on the final peptide centric db)
67
+ # returns the full name of the written file.
68
+ def self.peptide_centric_db(fasta_file, opts={})
69
+ opts = DEFAULT_PEPTIDE_CENTRIC_DB.merge(opts)
70
+
71
+ (missed_cleavages, min_length, enzyme, id_regexp, remove_digestion_file, cleave_initiator_methionine, expand_aa) = opts.values_at(:missed_cleavages, :min_length, :enzyme, :id_regexp, :remove_digestion_file, :cleave_initiator_methionine, :expand_aa)
72
+ start_time = Time.now
73
+ print "Digesting #{fasta_file} ..." if $VERBOSE
74
+
75
+ if expand_aa
76
+ letters_to_expand_re = Regexp.new("[" << Regexp.escape(expand_aa.keys.join) << "]")
77
+ end
78
+
79
+ base = fasta_file.chomp(File.extname(fasta_file))
80
+ digestion_file = base + ".msd_clvg#{missed_cleavages}.peptides"
81
+ File.open(digestion_file, "w") do |fh|
82
+ MS::Fasta.open(fasta_file) do |fasta|
83
+ fasta.each do |prot|
84
+ peptides = enzyme.digest(prot.sequence, missed_cleavages)
85
+ if (cleave_initiator_methionine && (prot.sequence[0,1] == "M"))
86
+ m_peps = []
87
+ init_methionine_peps = []
88
+ peptides.each do |pep|
89
+ # if the peptide is at the beginning of the protein sequence
90
+ if prot.sequence[0,pep.size] == pep
91
+ m_peps << pep[1..-1]
92
+ end
93
+ end
94
+ peptides.push(*m_peps)
95
+ end
96
+ if expand_aa
97
+ peptides = peptides.map do |pep|
98
+ if pep =~ letters_to_expand_re
99
+ expand_peptides(pep, expand_aa)
100
+ else
101
+ pep
102
+ end
103
+ end.flatten
104
+ end
105
+ fh.puts( prot.header.split(/\s+/).first + "\t" + peptides.join(" ") )
106
+ end
107
+ end
108
+ end
109
+ puts "#{Time.now - start_time} sec" if $VERBOSE
110
+
111
+
112
+ start_time = Time.now
113
+ print "Organizing raw digestion #{digestion_file} ..." if $VERBOSE
114
+
115
+ hash = Hash.new {|h,k| h[k] = [] }
116
+ ::IO.foreach(digestion_file) do |line|
117
+ (prot, *peps) = line.chomp!.split(/\s+/)
118
+ # prot is something like this: "sp|P31946|1433B_HUMAN" in uniprot
119
+ peps.each do |pep|
120
+ if pep.size >= min_length
121
+ hash[pep] << prot
122
+ end
123
+ end
124
+ end
125
+ puts "#{Time.now - start_time} sec" if $VERBOSE
126
+
127
+ base = digestion_file.chomp(File.extname(digestion_file))
128
+ final_outfile = base + ".min_aaseq#{min_length}" + ".yml"
129
+
130
+ start_time = Time.now
131
+ print "Writing #{hash.size} peptides to #{} ..." if $VERBOSE
132
+
133
+ File.open(final_outfile, 'w') do |out|
134
+ hash.each do |k,v|
135
+ out.puts( [k, v.join(PROTEIN_DELIMITER)].join(KEY_VALUE_DELIMITER) )
136
+ end
137
+ end
138
+ puts "#{Time.now - start_time} sec" if $VERBOSE
139
+
140
+ if remove_digestion_file
141
+ File.unlink(digestion_file)
142
+ end
143
+ File.expand_path(final_outfile)
144
+ end
145
+
146
+ # does combinatorial expansion of all letters requesting it.
147
+ # expand_aa is hash like: {'X'=>STANDARD_AA}
148
+ # returns nil if there are more than MAX_NUM_AA_EXPANSION amino acids to
149
+ # be expanded
150
+ # returns an empty array if there is no expansion
151
+ def self.expand_peptides(peptide, expand_aa)
152
+ letters_in_order = expand_aa.keys.sort
153
+ index_and_key = []
154
+ peptide.split('').each_with_index do |char,i|
155
+ if let_index = letters_in_order.index(char)
156
+ index_and_key << [i, letters_in_order[let_index]]
157
+ end
158
+ end
159
+ if index_and_key.size > MAX_NUM_AA_EXPANSION
160
+ return nil
161
+ end
162
+ to_expand = [peptide]
163
+ index_and_key.each do |i,letter|
164
+ new_peps = []
165
+ while current_pep = to_expand.shift do
166
+ new_peps << expand_aa[letter].map {|v| dp = current_pep.dup ; dp[i] = v ; dp }
167
+ end
168
+ to_expand = new_peps.flatten
169
+ end
170
+ to_expand
171
+ end
172
+
173
+ def initialize(db_file)
174
+ self.replace(YAML.load_file(db_file))
175
+ end
176
+
177
+ alias_method :old_bracket, '[]'.to_sym
178
+
179
+ # returns the protein id's as an array
180
+ def [](key)
181
+ old_bracket(key).chomp.split(PROTEIN_DELIMITER)
182
+ end
183
+
184
+ # an object for on disk retrieval of db entries
185
+ # proteins are returned as an array.
186
+ # behaves much like a hash once it is opened.
187
+ class IO
188
+ include Enumerable
189
+ def self.open(filename, &block)
190
+ raise ArgumentError unless block
191
+ File.open(filename) do |io|
192
+ block.call(self.new(io))
193
+ end
194
+ end
195
+
196
+ attr_accessor :io
197
+ attr_accessor :index
198
+
199
+ def initialize(io)
200
+ @io = io
201
+ @index = {}
202
+ re = /^(\w+)#{Regexp.escape(KEY_VALUE_DELIMITER)}/
203
+ prev_io_pos = io.pos
204
+ triplets = io.each_line.map do |line|
205
+ key = re.match(line)[1]
206
+ [key, prev_io_pos + key.bytesize+KEY_VALUE_DELIMITER.bytesize, prev_io_pos=io.pos]
207
+ end
208
+ triplets.each do |key, start, end_pos|
209
+ @index[key] = [start, end_pos-start]
210
+ end
211
+ end
212
+
213
+ # returns an array of proteins for the given key (peptide aaseq)
214
+ def [](key)
215
+ (start, length) = @index[key]
216
+ return nil unless start
217
+ @io.seek(start)
218
+ string = @io.read(length)
219
+ string.chomp!
220
+ string.split("\t")
221
+ end
222
+
223
+ # number of entries
224
+ def size ; @index.size end
225
+ alias_method :length, :size
226
+
227
+ def keys
228
+ @index.keys
229
+ end
230
+
231
+ # all the protein lists
232
+ def values
233
+ keys.map {|key| self[key] }
234
+ end
235
+
236
+ # yields a pair of aaseq and protein array
237
+ def each(&block)
238
+ @index.each do |key, start_length|
239
+ block.call([key, self[key]])
240
+ end
241
+ end
242
+ end
243
+ end
@@ -0,0 +1,72 @@
1
+ module MS ; end
2
+ module MS::Ident ; end
3
+
4
+ # A 'sequence' is a notation of a peptide that includes the leading and
5
+ # trailing amino acid after cleavage (e.g., K.PEPTIDER.E or -.STARTK.L )
6
+ # and may contain post-translational modification information.
7
+ #
8
+ # 'aaseq' is the amino acid sequence of just the peptide with no leading or
9
+ # trailing notation (e.g., PEPTIDER or LAKKLY)
10
+ module MS::Ident::Peptide
11
+ Nonstandard_AA_re = /[^A-Z\.\-]/
12
+
13
+ class << self
14
+
15
+ # Takes a peptide sequence of the form '-.PEPTIDE.R', removes non-standard
16
+ # amino acids, and returns the center piece
17
+ def sequence_to_aaseq(sequence)
18
+ after_removed = remove_non_amino_acids(sequence)
19
+ pieces = after_removed.split('.')
20
+ case pieces.size
21
+ when 3
22
+ pieces[1]
23
+ when 2
24
+ if pieces[0].size > 1 ## N termini
25
+ pieces[0]
26
+ else ## C termini
27
+ pieces[1]
28
+ end
29
+ when 1 ## this must be a parse error!
30
+ pieces[0] ## which is the peptide itself
31
+ else
32
+ abort "bad peptide sequence: #{sequence.inspect}"
33
+ end
34
+ end
35
+
36
+ # removes non standard amino acids specified by Nonstandard_AA_re
37
+ def remove_non_amino_acids(sequence)
38
+ sequence.gsub(Nonstandard_AA_re, '')
39
+ end
40
+
41
+ # remove non amino acids and split the sequence
42
+ def prepare_sequence(sequence)
43
+ nv = remove_non_amino_acids(sequence)
44
+ split_sequence(nv)
45
+ end
46
+
47
+ # Returns prev, peptide, next from sequence. Parse errors return
48
+ # nil,nil,nil
49
+ # R.PEPTIDE.A # -> R, PEPTIDE, A
50
+ # R.PEPTIDE.- # -> R, PEPTIDE, -
51
+ # PEPTIDE.A # -> -, PEPTIDE, A
52
+ # A.PEPTIDE # -> A, PEPTIDE, -
53
+ # PEPTIDE # -> nil,nil,nil
54
+ def split_sequence(sequence)
55
+ pieces = sequence.split('.')
56
+ case pieces.size
57
+ when 3
58
+ pieces
59
+ when 2
60
+ if pieces[0].size > 1 ## N termini
61
+ ['-', pieces[0], pieces[1]]
62
+ else ## C termini
63
+ [pieces[0], pieces[1], '-']
64
+ end
65
+ when 1 ## this must be a parse error!
66
+ [nil,nil,nil]
67
+ when 0
68
+ [nil,nil,nil]
69
+ end
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,56 @@
1
+ require 'ms/ident/search'
2
+ require 'ms/ident/peptide_hit'
3
+
4
+ module MS ; end
5
+ module MS::Ident ; end
6
+
7
+ class MS::Ident::PeptideHit
8
+ module Qvalue
9
+ FILE_EXTENSION = '.phq.tsv'
10
+ FILE_DELIMITER = "\t"
11
+ HEADER = %w(run_id id aaseq charge qvalue)
12
+
13
+ class << self
14
+
15
+ # writes to the file, adding an extension
16
+ def to_phq(base, hits, qvalues=[])
17
+ to_file(base + FILE_EXTENSION, hits, qvalues)
18
+ end
19
+
20
+ # writes the peptide hits to a phq.tsv file. qvalues is a parallel array
21
+ # to hits that can provide qvalues if not inherent to the hits
22
+ # returns the filename.
23
+ def to_file(filename, hits, qvalues=[])
24
+ File.open(filename,'w') do |out|
25
+ out.puts HEADER.join(FILE_DELIMITER)
26
+ hits.zip(qvalues) do |hit, qvalue|
27
+ out.puts [hit.search.id, hit.id, hit.aaseq, hit.charge, qvalue || hit.qvalue].join(FILE_DELIMITER)
28
+ end
29
+ end
30
+ filename
31
+ end
32
+
33
+ # returns an array of PeptideHit objects from a phq.tsv
34
+ def from_file(filename)
35
+ searches = Hash.new {|h,id| h[id] = MS::Ident::Search.new(id) }
36
+ peptide_hits = []
37
+ File.open(filename) do |io|
38
+ header = io.readline.chomp.split(FILE_DELIMITER)
39
+ raise "bad headers" unless header == HEADER
40
+ io.each do |line|
41
+ line.chomp!
42
+ (run_id, id, aaseq, charge, qvalue) = line.split(FILE_DELIMITER)
43
+ ph = MS::Ident::PeptideHit.new
44
+ ph.search = searches[run_id]
45
+ ph.id = id; ph.aaseq = aaseq ; ph.charge = charge.to_i ; ph.qvalue = qvalue.to_f
46
+ peptide_hits << ph
47
+ end
48
+ end
49
+ peptide_hits
50
+ end
51
+
52
+ alias_method :from_phq, :from_file
53
+
54
+ end
55
+ end # Qvalue
56
+ end # Peptide Hit
@@ -0,0 +1,26 @@
1
+ require 'merge'
2
+
3
+ module MS ; end
4
+ module MS::Ident ; end
5
+
6
+ module MS::Ident::PeptideHitLike
7
+ attr_accessor :id
8
+ attr_accessor :search
9
+ attr_accessor :missed_cleavages
10
+ attr_accessor :aaseq
11
+ attr_accessor :charge
12
+ # an array of MS::Ident::ProteinLike objects
13
+ attr_accessor :proteins
14
+ # relative to the set the hit is contained in!
15
+ attr_accessor :qvalue
16
+ end
17
+
18
+ class MS::Ident::PeptideHit
19
+ include MS::Ident::PeptideHitLike
20
+ include Merge
21
+
22
+ def initialize(hash)
23
+ merge!(hash)
24
+ end
25
+ end
26
+
@@ -0,0 +1,83 @@
1
+ require 'merge'
2
+ require 'nokogiri'
3
+
4
+ module MS ; end
5
+ module MS::Ident ; end
6
+ class MS::Ident::Pepxml ; end
7
+
8
+ # Modified aminoacid, static or variable
9
+ # unless otherwise stated, all attributes can be anything
10
+ class MS::Ident::Pepxml::AminoacidModification
11
+ include Merge
12
+ # The amino acid (one letter code)
13
+ attr_accessor :aminoacid
14
+ # Mass difference with respect to unmodified aminoacid, as a Float
15
+ attr_accessor :massdiff
16
+ # Mass of modified aminoacid, Float
17
+ attr_accessor :mass
18
+ # Y if both modified and unmodified aminoacid could be present in the
19
+ # dataset, N if only modified aminoacid can be present
20
+ attr_accessor :variable
21
+ # whether modification can reside only at protein terminus (specified 'n',
22
+ # 'c', or 'nc')
23
+ attr_accessor :peptide_terminus
24
+ # Symbol used by search engine to designate this modification
25
+ attr_accessor :symbol
26
+ # 'Y' if each peptide must have only modified or unmodified aminoacid, 'N' if a
27
+ # peptide may contain both modified and unmodified aminoacid
28
+ attr_accessor :binary
29
+
30
+ def initialize(hash={})
31
+ merge!(hash)
32
+ end
33
+
34
+ # returns the builder or an xml string if no builder supplied
35
+ def to_xml(builder=nil)
36
+ xmlb = builder || Nokogiri::XML::Builder.new
37
+ # note massdiff: must begin with either + (nonnegative) or - [e.g.
38
+ # +1.05446 or -2.3342] consider Numeric#to_plus_minus_string in
39
+ # MS::Ident::Pepxml
40
+ attrs = [:aminoacid, :massdiff, :mass, :variable, :peptide_terminus, :symbol, :binary].map {|at| v=send(at) ; [at,v] if v }.compact
41
+ hash = Hash[attrs]
42
+ hash[:massdiff] = hash[:massdiff].to_plus_minus_string
43
+ xmlb.aminoacid_modification(hash)
44
+ builder || xmlb.doc.root.to_xml
45
+ end
46
+ end
47
+
48
+ # Modified aminoacid, static or variable
49
+ class MS::Ident::Pepxml::TerminalModification
50
+ include Merge
51
+ # n for N-terminus, c for C-terminus
52
+ attr_accessor :terminus
53
+ # Mass difference with respect to unmodified terminus
54
+ attr_accessor :massdiff
55
+ # Mass of modified terminus
56
+ attr_accessor :mass
57
+ # Y if both modified and unmodified terminus could be present in the
58
+ # dataset, N if only modified terminus can be present
59
+ attr_accessor :variable
60
+ # MSial symbol used by search engine to designate this modification
61
+ attr_accessor :symbol
62
+ # whether modification can reside only at protein terminus (specified n or
63
+ # c)
64
+ attr_accessor :protein_terminus
65
+ attr_accessor :description
66
+
67
+ def initialize(hash={})
68
+ hash.each {|k,v| send("#{k}=", v) }
69
+ end
70
+
71
+ # returns the builder or an xml string if no builder supplied
72
+ def to_xml(builder=nil)
73
+ xmlb = builder || Nokogiri::XML::Builder.new
74
+ #short_element_xml_from_instance_vars("terminal_modification")
75
+ attrs = [:terminus, :massdiff, :mass, :variable, :protein_terminus, :description].map {|at| v=send(at) ; [at,v] if v }
76
+ hash = Hash[attrs]
77
+ hash[:massdiff] = hash[:massdiff].to_plus_minus_string
78
+ xmlb.terminal_modification(hash)
79
+ builder || xmlb.doc.root.to_xml
80
+ end
81
+ end
82
+
83
+
@@ -0,0 +1,70 @@
1
+ require 'merge'
2
+
3
+ require 'ms/ident/pepxml/msms_run_summary'
4
+
5
+ module MS ; end
6
+ module MS::Ident ; end
7
+ class MS::Ident::Pepxml; end
8
+
9
+ class MS::Ident::Pepxml::MsmsPipelineAnalysis
10
+ include Merge
11
+ XMLNS = "http://regis-web.systemsbiology.net/pepXML"
12
+ XMLNS_XSI = "http://www.w3.org/2001/XMLSchema-instance"
13
+ # (this doesn't actually exist), also, the space is supposed to be there
14
+ XSI_SCHEMA_LOCATION_BASE = "http://regis-web.systemsbiology.net/pepXML /tools/bin/TPP/tpp/schema/pepXML_v"
15
+ # the only additions concerning a writer are from v18 are to the 'spectrum': retention_time_sec and activationMethodType
16
+ PEPXML_VERSION = 115
17
+
18
+ #include SpecIDXML
19
+ # Version 1.2.3
20
+ #attr_writer :date
21
+ #attr_writer :xmlns, :xmlns_xsi, :xsi_schemaLocation
22
+ #attr_accessor :summary_xml
23
+
24
+ attr_accessor :xmlns
25
+ attr_accessor :xmlns_xsi
26
+ attr_accessor :xsi_schema_location
27
+ # an Integer
28
+ attr_accessor :pepxml_version
29
+ # self referential path to the outputfile
30
+ attr_accessor :summary_xml
31
+ attr_accessor :msms_run_summary
32
+ attr_writer :date
33
+
34
+ def block_arg
35
+ @msms_run_summary = MS::Ident::Pepxml::MsmsRunSummary.new
36
+ end
37
+
38
+ # if block given, yields a new msms_run_summary to return value of block
39
+ def initialize(hash={}, &block)
40
+ @xmlns = XMLNS
41
+ @xmlns_xsi = XMLNS_XSI
42
+ @xsi_schema_location = xsi_schema_location
43
+ @pepxml_version = PEPXML_VERSION
44
+ merge!(hash, &block)
45
+ end
46
+
47
+ # returns the location based on the pepxml version number
48
+ def xsi_schema_location
49
+ XSI_SCHEMA_LOCATION_BASE + pepxml_version.to_s + '.xsd'
50
+ end
51
+
52
+ # if no date string given, then it will set to Time.now
53
+ def date
54
+ return @date if @date
55
+ tarr = Time.now.to_a
56
+ tarr[3..5].reverse.join('-') + "T#{tarr[0..2].reverse.join(':')}"
57
+ end
58
+
59
+ # uses the filename as summary_xml (if it is nil) attribute and builds a complete, valid xml document,
60
+ # writing it to the filename
61
+ def to_xml(builder)
62
+ xmlb = builder || Nokogiri::XML::Builder.new
63
+ xmlb.msms_pipeline_analysis(:date => date, :xmlns => xmlns, 'xsi:schemaLocation'.to_sym => xsi_schema_location, :summary_xml => summary_xml) do |xmlb|
64
+ msms_run_summary.to_xml(xmlb) if msms_run_summary
65
+ end
66
+ builder || xmlb.doc.root.to_xml
67
+ end
68
+ end
69
+
70
+
@@ -0,0 +1,82 @@
1
+ require 'merge'
2
+ require 'nokogiri'
3
+
4
+ require 'ms/ident/pepxml/sample_enzyme'
5
+ require 'ms/ident/pepxml/search_summary'
6
+ require 'ms/ident/pepxml/spectrum_query'
7
+
8
+ module MS ; end
9
+ module MS::Ident ; end
10
+ class MS::Ident::Pepxml; end
11
+
12
+ class MS::Ident::Pepxml::MsmsRunSummary
13
+ include Merge
14
+ # The name of the pep xml file without any extension
15
+ attr_accessor :base_name
16
+ # The name of the mass spec manufacturer
17
+ attr_accessor :ms_manufacturer
18
+ attr_accessor :ms_model
19
+ attr_accessor :ms_mass_analyzer
20
+ attr_accessor :ms_detector
21
+ attr_accessor :raw_data_type
22
+ attr_accessor :raw_data
23
+ attr_accessor :ms_ionization
24
+ attr_accessor :pepxml_version
25
+
26
+ # A SampleEnzyme object (responds to: name, cut, no_cut, sense)
27
+ attr_accessor :sample_enzyme
28
+ # A SearchSummary object
29
+ attr_accessor :search_summary
30
+ # An array of spectrum_queries
31
+ attr_accessor :spectrum_queries
32
+
33
+ def block_arg
34
+ [@sample_enzyme = MS::Ident::Pepxml::SampleEnzyme.new,
35
+ @search_summary = MS::Ident::Pepxml::SearchSummary.new,
36
+ @spectrum_queries ]
37
+ end
38
+
39
+ # takes a hash of name, value pairs
40
+ # if block given, yields a SampleEnzyme object, a SearchSummary and an array
41
+ # for SpectrumQueries
42
+ def initialize(hash={}, &block)
43
+ @spectrum_queries = []
44
+ merge!(hash, &block)
45
+ block.call(block_arg) if block
46
+ end
47
+
48
+ # optionally takes an xml builder object and returns the builder, or the xml
49
+ # string if no builder was given
50
+ # sets the index attribute of each spectrum query if it is not already set
51
+ def to_xml(builder=nil)
52
+ xmlb = builder || Nokogiri::XML::Builder.new
53
+ hash = {:base_name => base_name, :msManufacturer => ms_manufacturer, :msModel => ms_model, :msIonization => ms_ionization, :msMassAnalyzer => ms_mass_analyzer, :msDetector => ms_detector, :raw_data_type => raw_data_type, :raw_data => raw_data}
54
+ hash.each {|k,v| hash.delete(k) unless v }
55
+ xmlb.msms_run_summary(hash) do |xmlb|
56
+ sample_enzyme.to_xml(xmlb) if sample_enzyme
57
+ search_summary.to_xml(xmlb) if search_summary
58
+ spectrum_queries.each_with_index do |sq,i|
59
+ sq.index = i+1 unless sq.index
60
+ sq.to_xml(xmlb)
61
+ end
62
+ end
63
+ builder || xmlb.doc.root.to_xml
64
+ end
65
+
66
+ def self.from_pepxml_node(node)
67
+ self.new.from_pepxml_node(node)
68
+ end
69
+
70
+ # peps correspond to search_results
71
+ def from_pepxml_node(node)
72
+ @base_name = node['base_name']
73
+ @ms_manufacturer = node['msManufacturer']
74
+ @ms_model = node['msModel']
75
+ @ms_manufacturer = node['msIonization']
76
+ @ms_mass_analyzer = node['msMassAnalyzer']
77
+ @ms_detector = node['msDetector']
78
+ @raw_data_type = node['raw_data_type']
79
+ @raw_data = node['raw_data']
80
+ self
81
+ end
82
+ end
@@ -0,0 +1,14 @@
1
+ module MS
2
+ module Ident
3
+ class Pepxml
4
+ class Parameters < Hash
5
+ def to_xml(builder)
6
+ self.each do |k,v|
7
+ builder.parameter(:name => k, :value => v)
8
+ end
9
+ end
10
+ end
11
+ end
12
+ end
13
+ end
14
+