ms-ident 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,82 @@
1
+ require 'andand'
2
+ require 'nokogiri'
3
+
4
+ module Ms ; end
5
+ module Ms::Ident ; end
6
+ class Ms::Ident::Pepxml ; end
7
+ class Ms::Ident::Pepxml::SearchHit ; end
8
+
9
+
10
+ # Positions and masses of modifications
11
+ Ms::Ident::Pepxml::SearchHit::ModificationInfo = Struct.new(:modified_peptide, :mod_aminoacid_masses, :mod_nterm_mass, :mod_cterm_mass) do
12
+ ## Should be something like this:
13
+ # <modification_info mod_nterm_mass=" " mod_nterm_mass=" " modified_peptide=" ">
14
+ # <mod_aminoacid_mass position=" " mass=" "/>
15
+ # </modification_info>
16
+ # e.g.:
17
+ # <modification_info modified_peptide="GC[546]M[147]PSKEVLSAGAHR">
18
+ # <mod_aminoacid_mass position="2" mass="545.7160"/>
19
+ # <mod_aminoacid_mass position="3" mass="147.1926"/>
20
+ # </modification_info>
21
+
22
+ # Mass of modified N terminus<
23
+ #attr_accessor :mod_nterm_mass
24
+ # Mass of modified C terminus<
25
+ #attr_accessor :mod_cterm_mass
26
+ # Peptide sequence (with indicated modifications) I'm assuming that the
27
+ # native sequest indicators are OK here
28
+ #attr_accessor :modified_peptide
29
+
30
+ # These are objects of type: ...ModAminoacidMass
31
+ # position ranges from 1 to peptide length
32
+ #attr_accessor :mod_aminoacid_masses
33
+
34
+ class << self
35
+ alias_method :old_new, :new
36
+ # takes either a hash or the normal list of values to set.
37
+ def new(*args)
38
+ if args.first.is_a?(Hash)
39
+ args = args.first.values_at(*members)
40
+ end
41
+ obj = old_new(*args)
42
+ obj
43
+ end
44
+ end
45
+
46
+ # Will escape any xml special chars in modified_peptide
47
+ def to_xml(builder=nil)
48
+ xmlb = builder || Nokogiri::XML::Builder.new
49
+ ## Collect the modifications:
50
+ ## Create the attribute string:
51
+ atts = [:mod_nterm_mass, :mod_cterm_mass, :modified_peptide]
52
+ atts.map! {|at| (v=send(at)) && [at, v] }.compact
53
+ xmlb.modification_info(Hash[atts]) do
54
+ mod_aminoacid_masses.andand.each do |mod_aa_mass|
55
+ mod_aa_mass.to_xml(xmlb)
56
+ end
57
+ end
58
+ end
59
+
60
+ def self.from_pepxml_node(node)
61
+ self.new.from_pepxml_node(node)
62
+ end
63
+
64
+ # returns self
65
+ def from_pepxml_node(node)
66
+ self[0] = node['modified_peptide']
67
+ self[2] = node['mod_nterm_mass']
68
+ self[3] = node['mod_cterm_mass']
69
+ _masses = []
70
+ node.children do |mass_n|
71
+ _masses << Ms::Ident::Pepxml::SearchHit::ModificationInfo::ModAminoacidMass.new([mass_n['position'].to_i, mass_n['mass'].to_f])
72
+ end
73
+ self.mod_aminoacid_masses = _masses
74
+ self
75
+ end
76
+ end
77
+
78
+ Ms::Ident::Pepxml::SearchHit::ModificationInfo::ModAminoacidMass = Struct.new(:position, :mass) do
79
+ def to_xml(builder)
80
+ builder.mod_aminoacid_mass(:position => position, :mass => mass)
81
+ end
82
+ end
@@ -0,0 +1,141 @@
1
+ require 'set'
2
+ require 'merge'
3
+ require 'nokogiri'
4
+
5
+ module Ms ; end
6
+ module Ms::Ident ; end
7
+
8
+
9
+ class Ms::Ident::Pepxml
10
+
11
+ class Ms::Ident::Pepxml::SearchHit
12
+ include Merge
13
+
14
+ DEFAULT_MEMBERS = [:hit_rank, :peptide, :peptide_prev_aa, :peptide_next_aa, :num_matched_ions, :tot_num_ions, :calc_neutral_pep_mass, :massdiff, :num_tol_term, :num_missed_cleavages, :is_rejected, :protein, :num_tot_proteins, :protein_desc, :calc_pI, :protein_mw, :modification_info, :search_scores, :spectrum_query]
15
+
16
+ Required = Set.new([:hit_rank, :peptide, :protein, :num_tot_proteins, :calc_neutral_pep_mass, :massdiff])
17
+
18
+ class << self
19
+ attr_writer :members
20
+ def members
21
+ @members || DEFAULT_MEMBERS
22
+ end
23
+ end
24
+
25
+ members.each {|memb| attr_accessor memb }
26
+
27
+ # rank of the peptide hit (required)
28
+ attr_accessor :hit_rank
29
+ # Peptide aminoacid sequence (with no indicated modifications) (required)
30
+ attr_accessor :peptide
31
+
32
+ # Aminoacid preceding peptide ('-' if none)
33
+ attr_accessor :peptide_prev_aa
34
+
35
+ # Aminoacid following peptide (- if none)
36
+ attr_accessor :peptide_next_aa
37
+
38
+ # Number of peptide fragment ions found in spectrum (Integer)
39
+ attr_accessor :num_matched_ions
40
+
41
+ # Number of peptide fragment ions predicted for peptide (Integer)
42
+ attr_accessor :tot_num_ions
43
+
44
+ # (required)
45
+ attr_accessor :calc_neutral_pep_mass
46
+
47
+ # Mass(precursor ion) - Mass(peptide) (required)
48
+ attr_accessor :massdiff
49
+
50
+ # Number of peptide termini consistent with cleavage by sample enzyme
51
+ attr_accessor :num_tol_term
52
+
53
+ # Number of sample enzyme cleavage sites internal to peptide<
54
+ attr_accessor :num_missed_cleavages
55
+
56
+ # Potential use in future for user manual validation (true/false)
57
+ # by default, this will be set to false
58
+ # (the xml is expressed as a 0 or 1)
59
+ attr_accessor :is_rejected
60
+
61
+ # a protein identifier string (required)
62
+ attr_accessor :protein
63
+
64
+ # Number of unique proteins in search database containing peptide
65
+ # (required)
66
+ attr_accessor :num_tot_proteins
67
+
68
+ # Extracted from search database
69
+ attr_accessor :protein_desc
70
+
71
+ attr_accessor :calc_pI
72
+ attr_accessor :protein_mw
73
+
74
+ # a ModificationInfo object
75
+ attr_accessor :modification_info
76
+
77
+ # a Hash with keys (the score type) and values
78
+ # (to_xml calls each_pair to generate the xml, so a Struct would also
79
+ # work)
80
+ attr_accessor :search_scores
81
+
82
+ # a link back to the spectrum_query object
83
+ attr_accessor :spectrum_query
84
+
85
+
86
+ Non_standard_amino_acid_char_re = %r{[^A-Z\.\-]}
87
+
88
+ alias_method :aaseq, :peptide
89
+ alias_method :aaseq=, :peptide=
90
+
91
+ # takes either a hash or an ordered list of values to set.
92
+ # yeilds an empty search_scores hash if given a block.
93
+ # mind that you set the ModificationInfo object as needed.
94
+ def initialize(*args, &block)
95
+ @search_scores = {}
96
+ if args.first.is_a?(Hash)
97
+ merge!(args.first)
98
+ else
99
+ self.class.members.zip(args) do |k,v|
100
+ send("#{k}=", v)
101
+ end
102
+ end
103
+ block.call(@search_scores) if block
104
+ end
105
+
106
+ def members
107
+ self.class.members
108
+ end
109
+
110
+ def to_xml(builder=nil)
111
+ xmlb = builder || Nokogiri::XML::Builder.new
112
+ attrs = members[0,14].map {|k| v=send(k) ; [k, v] if v }.compact
113
+ hash_attrs = Hash[attrs]
114
+ hash_attrs[:massdiff] = hash_attrs[:massdiff].to_plus_minus_string
115
+ xmlb.search_hit(hash_attrs) do |xmlb|
116
+ @modification_info.to_xml(xmlb) if @modification_info
117
+ @search_scores.each_pair {|k,v| xmlb.search_score(:name => k, :value => v) }
118
+ end
119
+ builder || xmlb.doc.root.to_xml
120
+ end
121
+
122
+ def from_pepxml_node(node)
123
+ self[0] = node['hit_rank'].to_i
124
+ self[1] = node['peptide']
125
+ self[2] = node['peptide_prev_aa']
126
+ self[3] = node['peptide_next_aa']
127
+ self[4] = node['protein'] ## will this be the string?? (yes, for now)
128
+ self[5] = node['num_tot_proteins'].to_i
129
+ self[6] = node['num_matched_ions'].to_i
130
+ self[7] = node['tot_num_ions'].to_i
131
+ self[8] = node['calc_neutral_pep_mass'].to_f
132
+ self[9] = node['massdiff'].to_f
133
+ self[10] = node['num_tol_term'].to_i
134
+ self[11] = node['num_missed_cleavages'].to_i
135
+ self[12] = node['is_rejected'].to_i
136
+ self
137
+ end
138
+ end
139
+
140
+ end
141
+
@@ -0,0 +1,28 @@
1
+ require 'nokogiri'
2
+
3
+ module Ms ; end
4
+ module Ms::Ident ; end
5
+ class Ms::Ident::Pepxml ; end
6
+
7
+ class Ms::Ident::Pepxml::SearchResult
8
+ # an array of search_hits
9
+ attr_accessor :search_hits
10
+
11
+ # if block given, then yields an empty search_hits array
12
+ def initialize(search_hits = [], &block)
13
+ @search_hits = search_hits
14
+ block.call(@search_hits) if block
15
+ end
16
+
17
+ def to_xml(builder=nil)
18
+ xmlb = builder || Nokogiri::XML::Builder.new
19
+ builder.search_result do |xmlb|
20
+ search_hits.each do |sh|
21
+ sh.to_xml(xmlb)
22
+ end
23
+ end
24
+ builder || xmlb.doc.root.to_xml
25
+ end
26
+
27
+ end
28
+
@@ -0,0 +1,88 @@
1
+ require 'ms/ident/pepxml/search_database'
2
+ require 'ms/ident/pepxml/modifications'
3
+ require 'ms/ident/pepxml/parameters'
4
+
5
+ require 'nokogiri'
6
+ require 'merge'
7
+
8
+ module Ms ; end
9
+ module Ms::Ident ; end
10
+ class Ms::Ident::Pepxml ; end
11
+
12
+
13
+ # requires these keys:
14
+ #
15
+ # :enzyme => a valid enzyme name
16
+ # :max_num_internal_cleavages => max number of internal cleavages allowed
17
+ # :min_number_termini => minimum number of termini??
18
+ class Ms::Ident::Pepxml::EnzymaticSearchConstraint < Hash
19
+ end
20
+
21
+ class Ms::Ident::Pepxml::SearchSummary
22
+ include Merge
23
+
24
+ DEFAULT_SEARCH_ID = '1'
25
+
26
+ attr_accessor :base_name
27
+ # required in v18-19, optional in later versions
28
+ attr_accessor :out_data_type
29
+ # required in v18-19, optional in later versions
30
+ attr_accessor :out_data
31
+ # by default, "1"
32
+ attr_accessor :search_id
33
+ # a Modifications object
34
+ attr_accessor :modifications
35
+ # A SearchDatabase object (responds to :local_path and :type)
36
+ attr_accessor :search_database
37
+ # the other search paramaters as a hash
38
+ attr_accessor :parameters
39
+ # the search engine used, SEQUEST, Mascot, Comet, etc.
40
+ attr_accessor :search_engine
41
+ # required: 'average' or 'monoisotopic'
42
+ attr_accessor :precursor_mass_type
43
+ # required: 'average' or 'monoisotopic'
44
+ attr_accessor :fragment_mass_type
45
+ # An EnzymaticSearchConstraint object (at the moment this is merely a hash
46
+ # with a few required keys
47
+ attr_accessor :enzymatic_search_constraint
48
+
49
+ def block_arg
50
+ [@search_database = Ms::Ident::Pepxml::SearchDatabase.new,
51
+ @enzymatic_search_constraint = Ms::Ident::Pepxml::EnzymaticSearchConstraint.new,
52
+ @modifications = Ms::Ident::Pepxml::Modifications.new,
53
+ @parameters = Ms::Ident::Pepxml::Parameters.new,
54
+ ]
55
+ end
56
+
57
+ def initialize(hash={}, &block)
58
+ @search_id = DEFAULT_SEARCH_ID
59
+ merge!(hash, &block)
60
+ end
61
+
62
+ def to_xml(builder=nil)
63
+ # TODO: out_data and out_data_type are optional in later pepxml versions...
64
+ # should work that in...
65
+ attrs = [:base_name, :search_engine, :precursor_mass_type, :fragment_mass_type, :out_data_type, :out_data, :search_id]
66
+ hash = Hash[ attrs.map {|at| v=send(at) ; [at, v] if v }.compact ]
67
+ xmlb = builder || Nokogiri::XML::Builder.new
68
+ builder.search_summary(hash) do |xmlb|
69
+ search_database.to_xml(xmlb)
70
+ xmlb.enzymatic_search_constraint(enzymatic_search_constraint) if enzymatic_search_constraint
71
+ modifications.to_xml(xmlb) if modifications
72
+ parameters.to_xml(xmlb) if parameters
73
+ end
74
+ builder || xmlb.doc.root.to_xml
75
+ end
76
+
77
+ def self.from_pepxml_node(node)
78
+ self.new.from_pepxml_node(node)
79
+ end
80
+
81
+ def from_pepxml_node(node)
82
+ raise NotImplementedError, "not implemented just yet (just use the raw xml node)"
83
+ end
84
+
85
+ end
86
+
87
+
88
+
@@ -0,0 +1,83 @@
1
+ require 'nokogiri'
2
+ require 'ms/mass'
3
+ require 'merge'
4
+
5
+ module Ms ; end
6
+ module Ms::Ident ; end
7
+ class Ms::Ident::Pepxml ; end
8
+
9
+ # search_specification is a search constraint applied specifically to this query (a String)
10
+ class Ms::Ident::Pepxml::SpectrumQuery
11
+ include Merge
12
+ DEFAULT_MEMBERS = [:spectrum, :start_scan, :end_scan, :precursor_neutral_mass, :index, :assumed_charge, :retention_time_sec, :search_specification, :search_results, :pepxml_version]
13
+
14
+ class << self
15
+ attr_writer :members
16
+ def members
17
+ @members || DEFAULT_MEMBERS
18
+ end
19
+ end
20
+
21
+ members.each {|memb| attr_accessor memb }
22
+
23
+ Required = Set.new([:spectrum, :start_scan, :end_scan, :precursor_neutral_mass, :index, :assumed_charge])
24
+ Optional = [:retention_time_sec, :search_specification]
25
+
26
+ # takes either a hash or an ordered list of values to set
27
+ # yeilds an empty search_results array if given a block
28
+ def initialize(*args, &block)
29
+ @search_results = []
30
+ if args.first.is_a?(Hash)
31
+ merge!(args.first)
32
+ else
33
+ self.class.members.zip(args) do |k,v|
34
+ send("#{k}=", v)
35
+ end
36
+ end
37
+ block.call(@search_results) if block
38
+ end
39
+
40
+ def members
41
+ self.class.members
42
+ end
43
+
44
+ ############################################################
45
+ # FOR PEPXML:
46
+ ############################################################
47
+ def to_xml(builder=nil)
48
+ xmlb = builder || Nokogiri::XML::Builder.new
49
+ # all through search_specification
50
+ attrs = members[0, 8].map {|at| v=send(at) ; [at, v] if v }
51
+ attrs_hash = Hash[attrs]
52
+ case pepxml_version
53
+ when 18
54
+ attrs_hash.delete(:retention_time_sec)
55
+ end
56
+ xmlb.spectrum_query(attrs_hash) do |xmlb|
57
+ search_results.each do |search_result|
58
+ search_result.to_xml(xmlb)
59
+ end
60
+ end
61
+ builder || xmlb.doc.root.to_xml
62
+ end
63
+
64
+ def self.from_pepxml_node(node)
65
+ self.new.from_pepxml_node(node)
66
+ end
67
+
68
+ def from_pepxml_node(node)
69
+ @spectrum = node['spectrum']
70
+ @start_scan = node['start_scan'].to_i
71
+ @end_scan = node['end_scan'].to_i
72
+ @precursor_neutral_mass = node['precursor_neutral_mass'].to_f
73
+ @index = node['index'].to_i
74
+ @assumed_charge = node['assumed_charge'].to_i
75
+ self
76
+ end
77
+
78
+ def self.calc_precursor_neutral_mass(m_plus_h, deltamass, h_plus=Ms::Mass::H_PLUS)
79
+ m_plus_h - h_plus + deltamass
80
+ end
81
+ end
82
+
83
+
@@ -0,0 +1,61 @@
1
+ require 'nokogiri'
2
+ require 'ms/ident'
3
+ require 'ms/ident/pepxml/msms_pipeline_analysis'
4
+
5
+ module Ms ; module Ident ; end ; end
6
+
7
+ class Numeric
8
+ # returns a string with a + or - on the front
9
+ def to_plus_minus_string
10
+ if self >= 0
11
+ '+' << self.to_s
12
+ else
13
+ self.to_s
14
+ end
15
+ end
16
+ end
17
+
18
+ class Ms::Ident::Pepxml
19
+ XML_STYLESHEET_LOCATION = '/tools/bin/TPP/tpp/schema/pepXML_std.xsl'
20
+ DEFAULT_PEPXML_VERSION = MsmsPipelineAnalysis::PEPXML_VERSION
21
+
22
+ attr_accessor :msms_pipeline_analysis
23
+
24
+ def pepxml_version
25
+ msms_pipeline_analysis.pepxml_version
26
+ end
27
+
28
+ # returns an array of spectrum queries
29
+ def spectrum_queries
30
+ msms_pipeline_analysis.msms_run_summary.spectrum_queries
31
+ end
32
+
33
+ # yields a new Msms_Pipeline_Analysis object if given a block
34
+ def initialize(&block)
35
+ block.call(@msms_pipeline_analysis=MsmsPipelineAnalysis.new) if block
36
+ end
37
+
38
+ # takes an xml document object and sets it with the xml stylesheet
39
+ def add_stylesheet(doc, location)
40
+ xml_stylesheet = Nokogiri::XML::ProcessingInstruction.new(doc, "xml-stylesheet", %Q{type="text/xsl" href="#{location}"})
41
+ doc.root.add_previous_sibling xml_stylesheet
42
+ doc
43
+ end
44
+
45
+ # writes xml file named msms_pipeline_analysis.summary_xml into the msms_run_summary.base_name directory
46
+ def to_xml_file
47
+ to_xml(File.dirname(msms_pipeline_analysis.msms_run_summary.base_name) + '/' + msms_pipeline_analysis.summary_xml)
48
+ end
49
+
50
+ # if no outfile is given, an xml string is returned. summary_xml should
51
+ # have already been set and is not influenced by the outfile given here.
52
+ def to_xml(outfile=nil)
53
+ builder = Nokogiri::XML::Builder.new
54
+ msms_pipeline_analysis.to_xml(builder)
55
+ add_stylesheet(builder.doc, Ms::Ident::Pepxml::XML_STYLESHEET_LOCATION)
56
+ string = builder.doc.to_xml
57
+ outfile ? File.open(outfile,'w') {|out| out.print(string) } : string
58
+ end
59
+ end
60
+
61
+
data/lib/ms/ident.rb ADDED
@@ -0,0 +1,11 @@
1
+
2
+ module Ms
3
+ module Ident
4
+ # returns the filetype (if possible)
5
+ def self.filetype(file)
6
+ if file =~ /\.srf$/i
7
+ :srf
8
+ end
9
+ end
10
+ end
11
+ end