mspire 0.5.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +24 -0
- data/Rakefile +51 -0
- data/VERSION +1 -0
- data/lib/cv/description.rb +18 -0
- data/lib/cv/param.rb +33 -0
- data/lib/cv.rb +3 -0
- data/lib/io/bookmark.rb +13 -0
- data/lib/merge.rb +7 -0
- data/lib/ms/cvlist.rb +76 -0
- data/lib/ms/digester.rb +245 -0
- data/lib/ms/fasta.rb +86 -0
- data/lib/ms/ident/peptide/db.rb +243 -0
- data/lib/ms/ident/peptide.rb +72 -0
- data/lib/ms/ident/peptide_hit/qvalue.rb +56 -0
- data/lib/ms/ident/peptide_hit.rb +26 -0
- data/lib/ms/ident/pepxml/modifications.rb +83 -0
- data/lib/ms/ident/pepxml/msms_pipeline_analysis.rb +70 -0
- data/lib/ms/ident/pepxml/msms_run_summary.rb +82 -0
- data/lib/ms/ident/pepxml/parameters.rb +14 -0
- data/lib/ms/ident/pepxml/sample_enzyme.rb +165 -0
- data/lib/ms/ident/pepxml/search_database.rb +49 -0
- data/lib/ms/ident/pepxml/search_hit/modification_info.rb +79 -0
- data/lib/ms/ident/pepxml/search_hit.rb +144 -0
- data/lib/ms/ident/pepxml/search_result.rb +35 -0
- data/lib/ms/ident/pepxml/search_summary.rb +92 -0
- data/lib/ms/ident/pepxml/spectrum_query.rb +85 -0
- data/lib/ms/ident/pepxml.rb +112 -0
- data/lib/ms/ident/protein.rb +33 -0
- data/lib/ms/ident/protein_group.rb +80 -0
- data/lib/ms/ident/search.rb +114 -0
- data/lib/ms/ident.rb +37 -0
- data/lib/ms/isotope/aa.rb +59 -0
- data/lib/ms/mascot.rb +6 -0
- data/lib/ms/mass/aa.rb +79 -0
- data/lib/ms/mass.rb +55 -0
- data/lib/ms/mzml/index_list.rb +98 -0
- data/lib/ms/mzml/plms1.rb +34 -0
- data/lib/ms/mzml.rb +197 -0
- data/lib/ms/obo.rb +38 -0
- data/lib/ms/plms1.rb +156 -0
- data/lib/ms/quant/qspec/protein_group_comparison.rb +22 -0
- data/lib/ms/quant/qspec.rb +112 -0
- data/lib/ms/spectrum.rb +154 -8
- data/lib/ms.rb +3 -10
- data/lib/msplat.rb +2 -0
- data/lib/obo/ims.rb +5 -0
- data/lib/obo/ms.rb +7 -0
- data/lib/obo/ontology.rb +41 -0
- data/lib/obo/unit.rb +5 -0
- data/lib/openany.rb +23 -0
- data/lib/write_file_or_string.rb +18 -0
- data/obo/ims.obo +562 -0
- data/obo/ms.obo +11677 -0
- data/obo/unit.obo +2563 -0
- data/spec/ms/cvlist_spec.rb +60 -0
- data/spec/ms/digester_spec.rb +351 -0
- data/spec/ms/fasta_spec.rb +100 -0
- data/spec/ms/ident/peptide/db_spec.rb +108 -0
- data/spec/ms/ident/pepxml/sample_enzyme_spec.rb +181 -0
- data/spec/ms/ident/pepxml/search_hit/modification_info_spec.rb +37 -0
- data/spec/ms/ident/pepxml_spec.rb +442 -0
- data/spec/ms/ident/protein_group_spec.rb +68 -0
- data/spec/ms/mass_spec.rb +8 -0
- data/spec/ms/mzml/index_list_spec.rb +122 -0
- data/spec/ms/mzml/plms1_spec.rb +62 -0
- data/spec/ms/mzml_spec.rb +50 -0
- data/spec/ms/plms1_spec.rb +38 -0
- data/spec/ms/quant/qspec_spec.rb +25 -0
- data/spec/msplat_spec.rb +24 -0
- data/spec/obo_spec.rb +25 -0
- data/spec/spec_helper.rb +25 -0
- data/spec/testfiles/ms/ident/peptide/db/uni_11_sp_tr.fasta +69 -0
- data/spec/testfiles/ms/ident/peptide/db/uni_11_sp_tr.msd_clvg2.min_aaseq4.yml +728 -0
- data/spec/testfiles/ms/mzml/j24z.idx_comp.3.mzML +271 -0
- data/spec/testfiles/ms/mzml/openms.noidx_nocomp.12.mzML +330 -0
- data/spec/testfiles/ms/quant/kill_extra_tabs.rb +13 -0
- data/spec/testfiles/ms/quant/max_quant_output.provenance.txt +15 -0
- data/spec/testfiles/ms/quant/max_quant_output.txt +199 -0
- data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv_qspecgp +199 -0
- data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv_qspecgp.csv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_final.txt +199 -0
- data/spec/testfiles/ms/quant/pdcd5_final.txt_qspecgp +0 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.CSV.csv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.csv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.csv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv_qspecgp +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv_qspecgp.csv +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.txt +199 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_tabdel.txt +134 -0
- data/spec/testfiles/ms/quant/pdcd5_lfq_tabdel.txt_qspecgp +134 -0
- data/spec/testfiles/ms/quant/remove_rest_of_proteins.rb +13 -0
- data/spec/testfiles/ms/quant/unlog_transform.rb +13 -0
- data/spec/testfiles/plms1/output.key +0 -0
- metadata +157 -40
- data/README +0 -77
- data/changelog.txt +0 -196
- data/lib/ms/calc.rb +0 -32
- data/lib/ms/data/interleaved.rb +0 -60
- data/lib/ms/data/lazy_io.rb +0 -73
- data/lib/ms/data/lazy_string.rb +0 -15
- data/lib/ms/data/simple.rb +0 -59
- data/lib/ms/data/transposed.rb +0 -41
- data/lib/ms/data.rb +0 -57
- data/lib/ms/format/format_error.rb +0 -12
- data/lib/ms/support/binary_search.rb +0 -126
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
require 'merge'
|
|
2
|
+
require 'strscan'
|
|
3
|
+
|
|
4
|
+
module MS ; end
|
|
5
|
+
module MS::Ident ; end
|
|
6
|
+
class MS::Ident::Pepxml ; end
|
|
7
|
+
|
|
8
|
+
class MS::Ident::Pepxml::SampleEnzyme
|
|
9
|
+
include Merge
|
|
10
|
+
# an identifier
|
|
11
|
+
attr_accessor :name
|
|
12
|
+
# amino acids after which to cleave
|
|
13
|
+
attr_accessor :cut
|
|
14
|
+
# cleave at 'cut' amino acids UNLESS it is followed by 'no_cut'
|
|
15
|
+
attr_accessor :no_cut
|
|
16
|
+
# 'C' or 'N'
|
|
17
|
+
attr_accessor :sense
|
|
18
|
+
|
|
19
|
+
# Can pass in a name of an enzyme that is recognized (meaning there is a
|
|
20
|
+
# set_<name> method), or
|
|
21
|
+
# trypsin
|
|
22
|
+
# For other enzymes, you must set :cut, :no_cut, :name, and :sense will
|
|
23
|
+
def initialize(arg={})
|
|
24
|
+
if arg.is_a?(String)
|
|
25
|
+
@name = arg
|
|
26
|
+
send("set_#{@name}".to_sym)
|
|
27
|
+
else
|
|
28
|
+
merge!(arg)
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def set_trypsin
|
|
33
|
+
@sense = 'C'
|
|
34
|
+
@cut = 'KR'
|
|
35
|
+
@no_cut = 'P'
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# if an xml builder object is given, it adds to the object and returns the
|
|
39
|
+
# builder object, otherwise it returns an xml fragment string
|
|
40
|
+
def to_xml(builder=nil)
|
|
41
|
+
xmlb = builder || Nokogiri::XML::Builder.new
|
|
42
|
+
xmlb.sample_enzyme(:name => name) do |xmlb|
|
|
43
|
+
xmlb.specificity(:cut => cut, :no_cut => no_cut, :sense => sense)
|
|
44
|
+
end
|
|
45
|
+
builder || xmlb.doc.root.to_xml
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# returns self
|
|
49
|
+
def from_pepxml_node(node)
|
|
50
|
+
self.name = node['name']
|
|
51
|
+
ch = node.child
|
|
52
|
+
self.cut = ch['cut']
|
|
53
|
+
self.no_cut= ch['no_cut']
|
|
54
|
+
self.sense = ch['sense']
|
|
55
|
+
self
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def self.from_pepxml_node(node)
|
|
59
|
+
self.new.from_pepxml_node(node)
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# takes an amino acid sequence (e.g. PEPTIDE).
|
|
63
|
+
# returns the number of missed cleavages
|
|
64
|
+
def num_missed_cleavages(aaseq)
|
|
65
|
+
seq_to_scan = ' ' + aaseq + ' '
|
|
66
|
+
raise NotImplementedError, 'need to implement for N terminal sense' if sense == 'N'
|
|
67
|
+
@num_missed_cleavages_regex =
|
|
68
|
+
if @num_missed_cleavages_regex ; @num_missed_cleavages_regex
|
|
69
|
+
else
|
|
70
|
+
regex_string = "[#{@cut}]"
|
|
71
|
+
if @no_cut and @no_cut != ''
|
|
72
|
+
regex_string << "[^#{@no_cut}]"
|
|
73
|
+
end
|
|
74
|
+
/#{regex_string}/
|
|
75
|
+
end
|
|
76
|
+
arr = aaseq.scan(@num_missed_cleavages_regex)
|
|
77
|
+
num = arr.size
|
|
78
|
+
if aaseq[-1,1] =~ @num_missed_cleavages_regex
|
|
79
|
+
num -= 1
|
|
80
|
+
end
|
|
81
|
+
num
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# No arguments should contain non-standard amino acids
|
|
85
|
+
def num_tol_term(prev_aa, middle, next_aa)
|
|
86
|
+
raise NotImplementedError, 'need to implement for N terminal sense' if sense == 'N'
|
|
87
|
+
no_cut = @no_cut || ''
|
|
88
|
+
num_tol = 0
|
|
89
|
+
last_of_middle = middle[-1,1]
|
|
90
|
+
first_of_middle = middle[0,1]
|
|
91
|
+
if ( @cut.include?(prev_aa) && !no_cut.include?(first_of_middle) ) || prev_aa == '-'
|
|
92
|
+
num_tol += 1
|
|
93
|
+
end
|
|
94
|
+
if @cut.include?(last_of_middle) && !no_cut.include?(next_aa) || next_aa == '-'
|
|
95
|
+
num_tol += 1
|
|
96
|
+
end
|
|
97
|
+
num_tol
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
###################################################
|
|
102
|
+
###################################################
|
|
103
|
+
###################################################
|
|
104
|
+
###################################################
|
|
105
|
+
# This is digestion methodology:
|
|
106
|
+
|
|
107
|
+
=begin
|
|
108
|
+
# returns all peptides of missed cleavages <= 'missed_cleavages'
|
|
109
|
+
# so 2 missed cleavages will return all no missed cleavage peptides
|
|
110
|
+
# all 1 missed cleavages and all 2 missed cleavages.
|
|
111
|
+
# options:
|
|
112
|
+
def digest(string, missed_cleavages=0, options={})
|
|
113
|
+
raise NotImplementedError if @sense == 'N'
|
|
114
|
+
s = StringScanner.new(string)
|
|
115
|
+
no_cut_regex = Regexp.new("[#{@no_cut}]")
|
|
116
|
+
regex = Regexp.new("[#{@cut}]")
|
|
117
|
+
peps = []
|
|
118
|
+
last_pos = 0
|
|
119
|
+
current_pep = ''
|
|
120
|
+
loop do
|
|
121
|
+
if s.eos?
|
|
122
|
+
break
|
|
123
|
+
end
|
|
124
|
+
m = s.scan_until(regex)
|
|
125
|
+
if m ## found a cut point
|
|
126
|
+
last_pos = s.pos
|
|
127
|
+
# is the next amino acid a no_cut?
|
|
128
|
+
if string[s.pos,1] =~ no_cut_regex
|
|
129
|
+
current_pep << m
|
|
130
|
+
else
|
|
131
|
+
# cut it
|
|
132
|
+
current_pep << m
|
|
133
|
+
peps << current_pep
|
|
134
|
+
current_pep = ''
|
|
135
|
+
end
|
|
136
|
+
else ## didn't find a cut point
|
|
137
|
+
current_pep << string[last_pos..-1]
|
|
138
|
+
peps << current_pep
|
|
139
|
+
break
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
## LOOP through and grab each set of missed cleavages from num down to 0
|
|
143
|
+
all_sets_of_peps = []
|
|
144
|
+
(0..missed_cleavages).to_a.reverse.each do |num_mc|
|
|
145
|
+
all_sets_of_peps.push( *(get_missed_cleavages(peps, num_mc)) )
|
|
146
|
+
end
|
|
147
|
+
all_sets_of_peps
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
# takes an array of peptides and returns an array containing 'num' missed
|
|
151
|
+
# cleavages
|
|
152
|
+
# DOES NOT contain peptides that contain < num of missed cleavages
|
|
153
|
+
# (i.e., will not return missed cleaveages of 1 or 2 if num == 3
|
|
154
|
+
def get_missed_cleavages(ar_of_peptide_seqs, num)
|
|
155
|
+
(0...(ar_of_peptide_seqs.size - num)).to_a.map do |i|
|
|
156
|
+
ar_of_peptide_seqs[i,num+1].join
|
|
157
|
+
end
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
def self.tryptic(string, missed_cleavages=0)
|
|
161
|
+
self.new("trypsin").digest(string, missed_cleavages)
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
end
|
|
165
|
+
=end
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
require 'ms/fasta'
|
|
2
|
+
require 'merge'
|
|
3
|
+
module MS ; end
|
|
4
|
+
module MS::Ident ; end
|
|
5
|
+
|
|
6
|
+
class MS::Ident::Pepxml
|
|
7
|
+
class SearchDatabase
|
|
8
|
+
include Merge
|
|
9
|
+
# required! the local, full path to the protein sequence database
|
|
10
|
+
attr_accessor :local_path
|
|
11
|
+
# required! 'AA' or 'NA'
|
|
12
|
+
attr_accessor :seq_type
|
|
13
|
+
|
|
14
|
+
# optional
|
|
15
|
+
attr_accessor :database_name
|
|
16
|
+
# optional
|
|
17
|
+
attr_accessor :orig_database_url
|
|
18
|
+
# optional
|
|
19
|
+
attr_accessor :database_release_date
|
|
20
|
+
# optional
|
|
21
|
+
attr_accessor :database_release_identifier
|
|
22
|
+
# optional
|
|
23
|
+
attr_accessor :size_of_residues
|
|
24
|
+
|
|
25
|
+
# takes a hash to fill in values
|
|
26
|
+
def initialize(hash={}, get_size_of_residues=false)
|
|
27
|
+
merge!(hash)
|
|
28
|
+
if get_size_of_residues && File.exist?(@local_path)
|
|
29
|
+
set_size_of_residues!
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# returns self for chaining
|
|
34
|
+
def set_size_of_residues!
|
|
35
|
+
@size_of_residues = 0
|
|
36
|
+
MS::Fasta.foreach(@local_path) do |entry|
|
|
37
|
+
@size_of_residues += entry.sequence.size
|
|
38
|
+
end
|
|
39
|
+
self
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def to_xml(builder)
|
|
43
|
+
attrs = [:local_path, :seq_type, :database_name, :orig_database_url, :database_release_date, :database_release_identifier, :size_of_residues].map {|k| v=send(k) ; [k, v] if v }.compact
|
|
44
|
+
builder.search_database(Hash[attrs])
|
|
45
|
+
builder
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
end
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
require 'andand'
|
|
2
|
+
require 'nokogiri'
|
|
3
|
+
|
|
4
|
+
module MS ; end
|
|
5
|
+
module MS::Ident ; end
|
|
6
|
+
class MS::Ident::Pepxml ; end
|
|
7
|
+
class MS::Ident::Pepxml::SearchHit ; end
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
# Positions and masses of modifications
|
|
11
|
+
MS::Ident::Pepxml::SearchHit::ModificationInfo = Struct.new(:modified_peptide, :mod_aminoacid_masses, :mod_nterm_mass, :mod_cterm_mass) do
|
|
12
|
+
## Should be something like this:
|
|
13
|
+
# <modification_info mod_nterm_mass=" " mod_nterm_mass=" " modified_peptide=" ">
|
|
14
|
+
# <mod_aminoacid_mass position=" " mass=" "/>
|
|
15
|
+
# </modification_info>
|
|
16
|
+
# e.g.:
|
|
17
|
+
# <modification_info modified_peptide="GC[546]M[147]PSKEVLSAGAHR">
|
|
18
|
+
# <mod_aminoacid_mass position="2" mass="545.7160"/>
|
|
19
|
+
# <mod_aminoacid_mass position="3" mass="147.1926"/>
|
|
20
|
+
# </modification_info>
|
|
21
|
+
|
|
22
|
+
# Mass of modified N terminus<
|
|
23
|
+
#attr_accessor :mod_nterm_mass
|
|
24
|
+
# Mass of modified C terminus<
|
|
25
|
+
#attr_accessor :mod_cterm_mass
|
|
26
|
+
# Peptide sequence (with indicated modifications) I'm assuming that the
|
|
27
|
+
# native sequest indicators are OK here
|
|
28
|
+
#attr_accessor :modified_peptide
|
|
29
|
+
|
|
30
|
+
# These are objects of type: ...ModAminoacidMass
|
|
31
|
+
# position ranges from 1 to peptide length
|
|
32
|
+
#attr_accessor :mod_aminoacid_masses
|
|
33
|
+
|
|
34
|
+
def initialize(*args)
|
|
35
|
+
if args.first.is_a?(Hash)
|
|
36
|
+
args = args.first.values_at(*members)
|
|
37
|
+
end
|
|
38
|
+
super(*args)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Will escape any xml special chars in modified_peptide
|
|
42
|
+
def to_xml(builder=nil)
|
|
43
|
+
xmlb = builder || Nokogiri::XML::Builder.new
|
|
44
|
+
## Collect the modifications:
|
|
45
|
+
## Create the attribute string:
|
|
46
|
+
atts = [:mod_nterm_mass, :mod_cterm_mass, :modified_peptide]
|
|
47
|
+
atts.map! {|at| (v=send(at)) && [at, v] }.compact
|
|
48
|
+
xmlb.modification_info(Hash[atts]) do |xmlb|
|
|
49
|
+
mod_aminoacid_masses.andand.each do |mod_aa_mass|
|
|
50
|
+
mod_aa_mass.to_xml(xmlb)
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
builder || xmlb.doc.root.to_s
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def self.from_pepxml_node(node)
|
|
57
|
+
self.new.from_pepxml_node(node)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# returns self
|
|
61
|
+
def from_pepxml_node(node)
|
|
62
|
+
self[0] = node['modified_peptide']
|
|
63
|
+
self[2] = node['mod_nterm_mass']
|
|
64
|
+
self[3] = node['mod_cterm_mass']
|
|
65
|
+
_masses = []
|
|
66
|
+
node.children do |mass_n|
|
|
67
|
+
_masses << MS::Ident::Pepxml::SearchHit::ModificationInfo::ModAminoacidMass.new([mass_n['position'].to_i, mass_n['mass'].to_f])
|
|
68
|
+
end
|
|
69
|
+
self.mod_aminoacid_masses = _masses
|
|
70
|
+
self
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
MS::Ident::Pepxml::SearchHit::ModificationInfo::ModAminoacidMass = Struct.new(:position, :mass) do
|
|
75
|
+
def to_xml(builder)
|
|
76
|
+
builder.mod_aminoacid_mass(:position => position, :mass => mass)
|
|
77
|
+
builder
|
|
78
|
+
end
|
|
79
|
+
end
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
require 'set'
|
|
2
|
+
require 'merge'
|
|
3
|
+
require 'nokogiri'
|
|
4
|
+
|
|
5
|
+
module MS ; end
|
|
6
|
+
module MS::Ident ; end
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class MS::Ident::Pepxml
|
|
10
|
+
|
|
11
|
+
class MS::Ident::Pepxml::SearchHit
|
|
12
|
+
include Merge
|
|
13
|
+
|
|
14
|
+
DEFAULT_MEMBERS = [:hit_rank, :peptide, :peptide_prev_aa, :peptide_next_aa, :num_matched_ions, :tot_num_ions, :calc_neutral_pep_mass, :massdiff, :num_tol_term, :num_missed_cleavages, :is_rejected, :protein, :num_tot_proteins, :protein_desc, :calc_pI, :protein_mw, :modification_info, :search_scores, :spectrum_query]
|
|
15
|
+
|
|
16
|
+
Required = Set.new([:hit_rank, :peptide, :protein, :num_tot_proteins, :calc_neutral_pep_mass, :massdiff])
|
|
17
|
+
|
|
18
|
+
class << self
|
|
19
|
+
attr_writer :members
|
|
20
|
+
def members
|
|
21
|
+
@members || DEFAULT_MEMBERS
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
members.each {|memb| attr_accessor memb }
|
|
26
|
+
|
|
27
|
+
# rank of the peptide hit (required)
|
|
28
|
+
attr_accessor :hit_rank
|
|
29
|
+
# Peptide aminoacid sequence (with no indicated modifications) (required)
|
|
30
|
+
attr_accessor :peptide
|
|
31
|
+
|
|
32
|
+
# Aminoacid preceding peptide ('-' if none)
|
|
33
|
+
attr_accessor :peptide_prev_aa
|
|
34
|
+
|
|
35
|
+
# Aminoacid following peptide (- if none)
|
|
36
|
+
attr_accessor :peptide_next_aa
|
|
37
|
+
|
|
38
|
+
# Number of peptide fragment ions found in spectrum (Integer)
|
|
39
|
+
attr_accessor :num_matched_ions
|
|
40
|
+
|
|
41
|
+
# Number of peptide fragment ions predicted for peptide (Integer)
|
|
42
|
+
attr_accessor :tot_num_ions
|
|
43
|
+
|
|
44
|
+
# (required)
|
|
45
|
+
attr_accessor :calc_neutral_pep_mass
|
|
46
|
+
|
|
47
|
+
# Mass(precursor ion) - Mass(peptide) (required)
|
|
48
|
+
attr_accessor :massdiff
|
|
49
|
+
|
|
50
|
+
# Number of peptide termini consistent with cleavage by sample enzyme
|
|
51
|
+
attr_accessor :num_tol_term
|
|
52
|
+
|
|
53
|
+
# Number of sample enzyme cleavage sites internal to peptide<
|
|
54
|
+
attr_accessor :num_missed_cleavages
|
|
55
|
+
|
|
56
|
+
# Potential use in future for user manual validation (true/false)
|
|
57
|
+
# by default, this will be set to false
|
|
58
|
+
# (the xml is expressed as a 0 or 1)
|
|
59
|
+
attr_accessor :is_rejected
|
|
60
|
+
|
|
61
|
+
# a protein identifier string (required)
|
|
62
|
+
attr_accessor :protein
|
|
63
|
+
|
|
64
|
+
# Number of unique proteins in search database containing peptide
|
|
65
|
+
# (required)
|
|
66
|
+
attr_accessor :num_tot_proteins
|
|
67
|
+
|
|
68
|
+
# Extracted from search database
|
|
69
|
+
attr_accessor :protein_desc
|
|
70
|
+
|
|
71
|
+
attr_accessor :calc_pI
|
|
72
|
+
attr_accessor :protein_mw
|
|
73
|
+
|
|
74
|
+
# a ModificationInfo object
|
|
75
|
+
attr_accessor :modification_info
|
|
76
|
+
|
|
77
|
+
# a Hash with keys (the score type) and values
|
|
78
|
+
# (to_xml calls each_pair to generate the xml, so a Struct would also
|
|
79
|
+
# work)
|
|
80
|
+
attr_accessor :search_scores
|
|
81
|
+
|
|
82
|
+
# a link back to the spectrum_query object
|
|
83
|
+
attr_accessor :spectrum_query
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
Non_standard_amino_acid_char_re = %r{[^A-Z\.\-]}
|
|
87
|
+
|
|
88
|
+
alias_method :aaseq, :peptide
|
|
89
|
+
alias_method :aaseq=, :peptide=
|
|
90
|
+
|
|
91
|
+
# takes either a hash or an ordered list of values to set.
|
|
92
|
+
# yeilds an empty search_scores hash if given a block.
|
|
93
|
+
# mind that you set the ModificationInfo object as needed.
|
|
94
|
+
def initialize(*args, &block)
|
|
95
|
+
@search_scores = {}
|
|
96
|
+
if args.first.is_a?(Hash)
|
|
97
|
+
merge!(args.first)
|
|
98
|
+
else
|
|
99
|
+
self.class.members.zip(args) do |k,v|
|
|
100
|
+
send("#{k}=", v)
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
block.call(@search_scores) if block
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def members
|
|
107
|
+
self.class.members
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def to_xml(builder=nil)
|
|
111
|
+
xmlb = builder || Nokogiri::XML::Builder.new
|
|
112
|
+
attrs = members[0,14].map {|k| v=send(k) ; [k, v] if v }.compact
|
|
113
|
+
hash_attrs = Hash[attrs]
|
|
114
|
+
hash_attrs[:massdiff] = hash_attrs[:massdiff].to_plus_minus_string
|
|
115
|
+
xmlb.search_hit(hash_attrs) do |xmlb|
|
|
116
|
+
@modification_info.to_xml(xmlb) if @modification_info
|
|
117
|
+
@search_scores.each_pair {|k,v| xmlb.search_score(:name => k, :value => v) }
|
|
118
|
+
end
|
|
119
|
+
builder || xmlb.doc.root.to_xml
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def from_pepxml_node(node)
|
|
123
|
+
node.attributes
|
|
124
|
+
self[0] = node['hit_rank'].to_i
|
|
125
|
+
self[1] = node['peptide']
|
|
126
|
+
self[2] = node['peptide_prev_aa']
|
|
127
|
+
self[3] = node['peptide_next_aa']
|
|
128
|
+
self[4] = node['protein'] ## will this be the string?? (yes, for now)
|
|
129
|
+
self[5] = node['num_tot_proteins'].to_i
|
|
130
|
+
self[6] = node['num_matched_ions'].to_i
|
|
131
|
+
self[7] = node['tot_num_ions'].to_i
|
|
132
|
+
self[8] = node['calc_neutral_pep_mass'].to_f
|
|
133
|
+
self[9] = node['massdiff'].to_f
|
|
134
|
+
self[10] = node['num_tol_term'].to_i
|
|
135
|
+
self[11] = node['num_missed_cleavages'].to_i
|
|
136
|
+
self[12] = node['is_rejected'].to_i
|
|
137
|
+
self
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
Simple = Struct.new(:id, :search, :aaseq, :charge, :search_scores)
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
end
|
|
144
|
+
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
require 'nokogiri'
|
|
2
|
+
|
|
3
|
+
require 'ms/ident/pepxml/search_hit'
|
|
4
|
+
|
|
5
|
+
module MS ; end
|
|
6
|
+
module MS::Ident ; end
|
|
7
|
+
class MS::Ident::Pepxml ; end
|
|
8
|
+
|
|
9
|
+
class MS::Ident::Pepxml::SearchResult
|
|
10
|
+
# an array of search_hits
|
|
11
|
+
attr_accessor :search_hits
|
|
12
|
+
|
|
13
|
+
# if block given, then yields an empty search_hits array.
|
|
14
|
+
# For consistency with other objects, will also take a hash that has the key
|
|
15
|
+
# :search_hits and the value an array.
|
|
16
|
+
def initialize(search_hits = [], &block)
|
|
17
|
+
@search_hits = search_hits
|
|
18
|
+
if search_hits.is_a?(Hash)
|
|
19
|
+
@search_hits = search_hits[:search_hits]
|
|
20
|
+
end
|
|
21
|
+
block.call(@search_hits) if block
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def to_xml(builder=nil)
|
|
25
|
+
xmlb = builder || Nokogiri::XML::Builder.new
|
|
26
|
+
builder.search_result do |xmlb|
|
|
27
|
+
search_hits.each do |sh|
|
|
28
|
+
sh.to_xml(xmlb)
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
builder || xmlb.doc.root.to_xml
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
end
|
|
35
|
+
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
require 'ms/ident/pepxml/search_database'
|
|
2
|
+
require 'ms/ident/pepxml/modifications'
|
|
3
|
+
require 'ms/ident/pepxml/parameters'
|
|
4
|
+
|
|
5
|
+
require 'nokogiri'
|
|
6
|
+
require 'merge'
|
|
7
|
+
|
|
8
|
+
module MS ; end
|
|
9
|
+
module MS::Ident ; end
|
|
10
|
+
class MS::Ident::Pepxml ; end
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
# requires these keys:
|
|
14
|
+
#
|
|
15
|
+
# :enzyme => a valid enzyme name
|
|
16
|
+
# :max_num_internal_cleavages => max number of internal cleavages allowed
|
|
17
|
+
# :min_number_termini => minimum number of termini??
|
|
18
|
+
class MS::Ident::Pepxml::EnzymaticSearchConstraint < Hash
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
class MS::Ident::Pepxml::SearchSummary
|
|
22
|
+
include Merge
|
|
23
|
+
|
|
24
|
+
DEFAULT_SEARCH_ID = '1'
|
|
25
|
+
|
|
26
|
+
attr_accessor :base_name
|
|
27
|
+
# required in v18-19, optional in later versions
|
|
28
|
+
attr_accessor :out_data_type
|
|
29
|
+
# required in v18-19, optional in later versions
|
|
30
|
+
attr_accessor :out_data
|
|
31
|
+
# by default, "1"
|
|
32
|
+
attr_accessor :search_id
|
|
33
|
+
# an array of MS::Ident::Pepxml::Modification objects
|
|
34
|
+
attr_accessor :modifications
|
|
35
|
+
# A SearchDatabase object (responds to :local_path and :type)
|
|
36
|
+
attr_accessor :search_database
|
|
37
|
+
# the other search paramaters as a hash
|
|
38
|
+
attr_accessor :parameters
|
|
39
|
+
# the search engine used, SEQUEST, Mascot, Comet, etc.
|
|
40
|
+
attr_accessor :search_engine
|
|
41
|
+
# required: 'average' or 'monoisotopic'
|
|
42
|
+
attr_accessor :precursor_mass_type
|
|
43
|
+
# required: 'average' or 'monoisotopic'
|
|
44
|
+
attr_accessor :fragment_mass_type
|
|
45
|
+
# An EnzymaticSearchConstraint object (at the moment this is merely a hash
|
|
46
|
+
# with a few required keys
|
|
47
|
+
attr_accessor :enzymatic_search_constraint
|
|
48
|
+
|
|
49
|
+
def block_arg
|
|
50
|
+
[@search_database = MS::Ident::Pepxml::SearchDatabase.new,
|
|
51
|
+
@enzymatic_search_constraint = MS::Ident::Pepxml::EnzymaticSearchConstraint.new,
|
|
52
|
+
@modifications,
|
|
53
|
+
@parameters = MS::Ident::Pepxml::Parameters.new,
|
|
54
|
+
]
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# initializes modifications to an empty array
|
|
58
|
+
def initialize(hash={}, &block)
|
|
59
|
+
@modifications = []
|
|
60
|
+
@search_id = DEFAULT_SEARCH_ID
|
|
61
|
+
merge!(hash, &block)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def to_xml(builder=nil)
|
|
65
|
+
# TODO: out_data and out_data_type are optional in later pepxml versions...
|
|
66
|
+
# should work that in...
|
|
67
|
+
attrs = [:base_name, :search_engine, :precursor_mass_type, :fragment_mass_type, :out_data_type, :out_data, :search_id]
|
|
68
|
+
hash = Hash[ attrs.map {|at| v=send(at) ; [at, v] if v }.compact ]
|
|
69
|
+
xmlb = builder || Nokogiri::XML::Builder.new
|
|
70
|
+
builder.search_summary(hash) do |xmlb|
|
|
71
|
+
search_database.to_xml(xmlb)
|
|
72
|
+
xmlb.enzymatic_search_constraint(enzymatic_search_constraint) if enzymatic_search_constraint
|
|
73
|
+
modifications.each do |mod|
|
|
74
|
+
mod.to_xml(xmlb)
|
|
75
|
+
end
|
|
76
|
+
parameters.to_xml(xmlb) if parameters
|
|
77
|
+
end
|
|
78
|
+
builder || xmlb.doc.root.to_xml
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def self.from_pepxml_node(node)
|
|
82
|
+
self.new.from_pepxml_node(node)
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def from_pepxml_node(node)
|
|
86
|
+
raise NotImplementedError, "not implemented just yet (just use the raw xml node)"
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
require 'nokogiri'
|
|
2
|
+
require 'ms/mass'
|
|
3
|
+
require 'merge'
|
|
4
|
+
|
|
5
|
+
require 'ms/ident/pepxml/search_result'
|
|
6
|
+
|
|
7
|
+
module MS ; end
|
|
8
|
+
module MS::Ident ; end
|
|
9
|
+
class MS::Ident::Pepxml ; end
|
|
10
|
+
|
|
11
|
+
# search_specification is a search constraint applied specifically to this query (a String)
|
|
12
|
+
class MS::Ident::Pepxml::SpectrumQuery
|
|
13
|
+
include Merge
|
|
14
|
+
DEFAULT_MEMBERS = [:spectrum, :start_scan, :end_scan, :precursor_neutral_mass, :index, :assumed_charge, :retention_time_sec, :search_specification, :search_results, :pepxml_version]
|
|
15
|
+
|
|
16
|
+
class << self
|
|
17
|
+
attr_writer :members
|
|
18
|
+
def members
|
|
19
|
+
@members || DEFAULT_MEMBERS
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
members.each {|memb| attr_accessor memb }
|
|
24
|
+
|
|
25
|
+
Required = Set.new([:spectrum, :start_scan, :end_scan, :precursor_neutral_mass, :index, :assumed_charge])
|
|
26
|
+
Optional = [:retention_time_sec, :search_specification]
|
|
27
|
+
|
|
28
|
+
# takes either a hash or an ordered list of values to set
|
|
29
|
+
# yeilds an empty search_results array if given a block
|
|
30
|
+
def initialize(*args, &block)
|
|
31
|
+
@search_results = []
|
|
32
|
+
if args.first.is_a?(Hash)
|
|
33
|
+
merge!(args.first)
|
|
34
|
+
else
|
|
35
|
+
self.class.members.zip(args) do |k,v|
|
|
36
|
+
send("#{k}=", v)
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
block.call(@search_results) if block
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def members
|
|
43
|
+
self.class.members
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
############################################################
|
|
47
|
+
# FOR PEPXML:
|
|
48
|
+
############################################################
|
|
49
|
+
def to_xml(builder=nil)
|
|
50
|
+
xmlb = builder || Nokogiri::XML::Builder.new
|
|
51
|
+
# all through search_specification
|
|
52
|
+
attrs = members[0, 8].map {|at| v=send(at) ; [at, v] if v }
|
|
53
|
+
attrs_hash = Hash[attrs]
|
|
54
|
+
case pepxml_version
|
|
55
|
+
when 18
|
|
56
|
+
attrs_hash.delete(:retention_time_sec)
|
|
57
|
+
end
|
|
58
|
+
xmlb.spectrum_query(attrs_hash) do |xmlb|
|
|
59
|
+
search_results.each do |search_result|
|
|
60
|
+
search_result.to_xml(xmlb)
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
builder || xmlb.doc.root.to_xml
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def self.from_pepxml_node(node)
|
|
67
|
+
self.new.from_pepxml_node(node)
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def from_pepxml_node(node)
|
|
71
|
+
@spectrum = node['spectrum']
|
|
72
|
+
@start_scan = node['start_scan'].to_i
|
|
73
|
+
@end_scan = node['end_scan'].to_i
|
|
74
|
+
@precursor_neutral_mass = node['precursor_neutral_mass'].to_f
|
|
75
|
+
@index = node['index'].to_i
|
|
76
|
+
@assumed_charge = node['assumed_charge'].to_i
|
|
77
|
+
self
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def self.calc_precursor_neutral_mass(m_plus_h, deltamass, h_plus=MS::Mass::H_PLUS)
|
|
81
|
+
m_plus_h - h_plus + deltamass
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
|