ms-ident 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,82 @@
1
+ require 'andand'
2
+ require 'nokogiri'
3
+
4
+ module Ms ; end
5
+ module Ms::Ident ; end
6
+ class Ms::Ident::Pepxml ; end
7
+ class Ms::Ident::Pepxml::SearchHit ; end
8
+
9
+
10
+ # Positions and masses of modifications
11
+ Ms::Ident::Pepxml::SearchHit::ModificationInfo = Struct.new(:modified_peptide, :mod_aminoacid_masses, :mod_nterm_mass, :mod_cterm_mass) do
12
+ ## Should be something like this:
13
+ # <modification_info mod_nterm_mass=" " mod_nterm_mass=" " modified_peptide=" ">
14
+ # <mod_aminoacid_mass position=" " mass=" "/>
15
+ # </modification_info>
16
+ # e.g.:
17
+ # <modification_info modified_peptide="GC[546]M[147]PSKEVLSAGAHR">
18
+ # <mod_aminoacid_mass position="2" mass="545.7160"/>
19
+ # <mod_aminoacid_mass position="3" mass="147.1926"/>
20
+ # </modification_info>
21
+
22
+ # Mass of modified N terminus<
23
+ #attr_accessor :mod_nterm_mass
24
+ # Mass of modified C terminus<
25
+ #attr_accessor :mod_cterm_mass
26
+ # Peptide sequence (with indicated modifications) I'm assuming that the
27
+ # native sequest indicators are OK here
28
+ #attr_accessor :modified_peptide
29
+
30
+ # These are objects of type: ...ModAminoacidMass
31
+ # position ranges from 1 to peptide length
32
+ #attr_accessor :mod_aminoacid_masses
33
+
34
+ class << self
35
+ alias_method :old_new, :new
36
+ # takes either a hash or the normal list of values to set.
37
+ def new(*args)
38
+ if args.first.is_a?(Hash)
39
+ args = args.first.values_at(*members)
40
+ end
41
+ obj = old_new(*args)
42
+ obj
43
+ end
44
+ end
45
+
46
+ # Will escape any xml special chars in modified_peptide
47
+ def to_xml(builder=nil)
48
+ xmlb = builder || Nokogiri::XML::Builder.new
49
+ ## Collect the modifications:
50
+ ## Create the attribute string:
51
+ atts = [:mod_nterm_mass, :mod_cterm_mass, :modified_peptide]
52
+ atts.map! {|at| (v=send(at)) && [at, v] }.compact
53
+ xmlb.modification_info(Hash[atts]) do
54
+ mod_aminoacid_masses.andand.each do |mod_aa_mass|
55
+ mod_aa_mass.to_xml(xmlb)
56
+ end
57
+ end
58
+ end
59
+
60
+ def self.from_pepxml_node(node)
61
+ self.new.from_pepxml_node(node)
62
+ end
63
+
64
+ # returns self
65
+ def from_pepxml_node(node)
66
+ self[0] = node['modified_peptide']
67
+ self[2] = node['mod_nterm_mass']
68
+ self[3] = node['mod_cterm_mass']
69
+ _masses = []
70
+ node.children do |mass_n|
71
+ _masses << Ms::Ident::Pepxml::SearchHit::ModificationInfo::ModAminoacidMass.new([mass_n['position'].to_i, mass_n['mass'].to_f])
72
+ end
73
+ self.mod_aminoacid_masses = _masses
74
+ self
75
+ end
76
+ end
77
+
78
+ Ms::Ident::Pepxml::SearchHit::ModificationInfo::ModAminoacidMass = Struct.new(:position, :mass) do
79
+ def to_xml(builder)
80
+ builder.mod_aminoacid_mass(:position => position, :mass => mass)
81
+ end
82
+ end
@@ -0,0 +1,141 @@
1
+ require 'set'
2
+ require 'merge'
3
+ require 'nokogiri'
4
+
5
+ module Ms ; end
6
+ module Ms::Ident ; end
7
+
8
+
9
+ class Ms::Ident::Pepxml
10
+
11
+ class Ms::Ident::Pepxml::SearchHit
12
+ include Merge
13
+
14
+ DEFAULT_MEMBERS = [:hit_rank, :peptide, :peptide_prev_aa, :peptide_next_aa, :num_matched_ions, :tot_num_ions, :calc_neutral_pep_mass, :massdiff, :num_tol_term, :num_missed_cleavages, :is_rejected, :protein, :num_tot_proteins, :protein_desc, :calc_pI, :protein_mw, :modification_info, :search_scores, :spectrum_query]
15
+
16
+ Required = Set.new([:hit_rank, :peptide, :protein, :num_tot_proteins, :calc_neutral_pep_mass, :massdiff])
17
+
18
+ class << self
19
+ attr_writer :members
20
+ def members
21
+ @members || DEFAULT_MEMBERS
22
+ end
23
+ end
24
+
25
+ members.each {|memb| attr_accessor memb }
26
+
27
+ # rank of the peptide hit (required)
28
+ attr_accessor :hit_rank
29
+ # Peptide aminoacid sequence (with no indicated modifications) (required)
30
+ attr_accessor :peptide
31
+
32
+ # Aminoacid preceding peptide ('-' if none)
33
+ attr_accessor :peptide_prev_aa
34
+
35
+ # Aminoacid following peptide (- if none)
36
+ attr_accessor :peptide_next_aa
37
+
38
+ # Number of peptide fragment ions found in spectrum (Integer)
39
+ attr_accessor :num_matched_ions
40
+
41
+ # Number of peptide fragment ions predicted for peptide (Integer)
42
+ attr_accessor :tot_num_ions
43
+
44
+ # (required)
45
+ attr_accessor :calc_neutral_pep_mass
46
+
47
+ # Mass(precursor ion) - Mass(peptide) (required)
48
+ attr_accessor :massdiff
49
+
50
+ # Number of peptide termini consistent with cleavage by sample enzyme
51
+ attr_accessor :num_tol_term
52
+
53
+ # Number of sample enzyme cleavage sites internal to peptide<
54
+ attr_accessor :num_missed_cleavages
55
+
56
+ # Potential use in future for user manual validation (true/false)
57
+ # by default, this will be set to false
58
+ # (the xml is expressed as a 0 or 1)
59
+ attr_accessor :is_rejected
60
+
61
+ # a protein identifier string (required)
62
+ attr_accessor :protein
63
+
64
+ # Number of unique proteins in search database containing peptide
65
+ # (required)
66
+ attr_accessor :num_tot_proteins
67
+
68
+ # Extracted from search database
69
+ attr_accessor :protein_desc
70
+
71
+ attr_accessor :calc_pI
72
+ attr_accessor :protein_mw
73
+
74
+ # a ModificationInfo object
75
+ attr_accessor :modification_info
76
+
77
+ # a Hash with keys (the score type) and values
78
+ # (to_xml calls each_pair to generate the xml, so a Struct would also
79
+ # work)
80
+ attr_accessor :search_scores
81
+
82
+ # a link back to the spectrum_query object
83
+ attr_accessor :spectrum_query
84
+
85
+
86
+ Non_standard_amino_acid_char_re = %r{[^A-Z\.\-]}
87
+
88
+ alias_method :aaseq, :peptide
89
+ alias_method :aaseq=, :peptide=
90
+
91
+ # takes either a hash or an ordered list of values to set.
92
+ # yeilds an empty search_scores hash if given a block.
93
+ # mind that you set the ModificationInfo object as needed.
94
+ def initialize(*args, &block)
95
+ @search_scores = {}
96
+ if args.first.is_a?(Hash)
97
+ merge!(args.first)
98
+ else
99
+ self.class.members.zip(args) do |k,v|
100
+ send("#{k}=", v)
101
+ end
102
+ end
103
+ block.call(@search_scores) if block
104
+ end
105
+
106
+ def members
107
+ self.class.members
108
+ end
109
+
110
+ def to_xml(builder=nil)
111
+ xmlb = builder || Nokogiri::XML::Builder.new
112
+ attrs = members[0,14].map {|k| v=send(k) ; [k, v] if v }.compact
113
+ hash_attrs = Hash[attrs]
114
+ hash_attrs[:massdiff] = hash_attrs[:massdiff].to_plus_minus_string
115
+ xmlb.search_hit(hash_attrs) do |xmlb|
116
+ @modification_info.to_xml(xmlb) if @modification_info
117
+ @search_scores.each_pair {|k,v| xmlb.search_score(:name => k, :value => v) }
118
+ end
119
+ builder || xmlb.doc.root.to_xml
120
+ end
121
+
122
+ def from_pepxml_node(node)
123
+ self[0] = node['hit_rank'].to_i
124
+ self[1] = node['peptide']
125
+ self[2] = node['peptide_prev_aa']
126
+ self[3] = node['peptide_next_aa']
127
+ self[4] = node['protein'] ## will this be the string?? (yes, for now)
128
+ self[5] = node['num_tot_proteins'].to_i
129
+ self[6] = node['num_matched_ions'].to_i
130
+ self[7] = node['tot_num_ions'].to_i
131
+ self[8] = node['calc_neutral_pep_mass'].to_f
132
+ self[9] = node['massdiff'].to_f
133
+ self[10] = node['num_tol_term'].to_i
134
+ self[11] = node['num_missed_cleavages'].to_i
135
+ self[12] = node['is_rejected'].to_i
136
+ self
137
+ end
138
+ end
139
+
140
+ end
141
+
@@ -0,0 +1,28 @@
1
+ require 'nokogiri'
2
+
3
+ module Ms ; end
4
+ module Ms::Ident ; end
5
+ class Ms::Ident::Pepxml ; end
6
+
7
+ class Ms::Ident::Pepxml::SearchResult
8
+ # an array of search_hits
9
+ attr_accessor :search_hits
10
+
11
+ # if block given, then yields an empty search_hits array
12
+ def initialize(search_hits = [], &block)
13
+ @search_hits = search_hits
14
+ block.call(@search_hits) if block
15
+ end
16
+
17
+ def to_xml(builder=nil)
18
+ xmlb = builder || Nokogiri::XML::Builder.new
19
+ builder.search_result do |xmlb|
20
+ search_hits.each do |sh|
21
+ sh.to_xml(xmlb)
22
+ end
23
+ end
24
+ builder || xmlb.doc.root.to_xml
25
+ end
26
+
27
+ end
28
+
@@ -0,0 +1,88 @@
1
+ require 'ms/ident/pepxml/search_database'
2
+ require 'ms/ident/pepxml/modifications'
3
+ require 'ms/ident/pepxml/parameters'
4
+
5
+ require 'nokogiri'
6
+ require 'merge'
7
+
8
+ module Ms ; end
9
+ module Ms::Ident ; end
10
+ class Ms::Ident::Pepxml ; end
11
+
12
+
13
+ # requires these keys:
14
+ #
15
+ # :enzyme => a valid enzyme name
16
+ # :max_num_internal_cleavages => max number of internal cleavages allowed
17
+ # :min_number_termini => minimum number of termini??
18
+ class Ms::Ident::Pepxml::EnzymaticSearchConstraint < Hash
19
+ end
20
+
21
+ class Ms::Ident::Pepxml::SearchSummary
22
+ include Merge
23
+
24
+ DEFAULT_SEARCH_ID = '1'
25
+
26
+ attr_accessor :base_name
27
+ # required in v18-19, optional in later versions
28
+ attr_accessor :out_data_type
29
+ # required in v18-19, optional in later versions
30
+ attr_accessor :out_data
31
+ # by default, "1"
32
+ attr_accessor :search_id
33
+ # a Modifications object
34
+ attr_accessor :modifications
35
+ # A SearchDatabase object (responds to :local_path and :type)
36
+ attr_accessor :search_database
37
+ # the other search paramaters as a hash
38
+ attr_accessor :parameters
39
+ # the search engine used, SEQUEST, Mascot, Comet, etc.
40
+ attr_accessor :search_engine
41
+ # required: 'average' or 'monoisotopic'
42
+ attr_accessor :precursor_mass_type
43
+ # required: 'average' or 'monoisotopic'
44
+ attr_accessor :fragment_mass_type
45
+ # An EnzymaticSearchConstraint object (at the moment this is merely a hash
46
+ # with a few required keys
47
+ attr_accessor :enzymatic_search_constraint
48
+
49
+ def block_arg
50
+ [@search_database = Ms::Ident::Pepxml::SearchDatabase.new,
51
+ @enzymatic_search_constraint = Ms::Ident::Pepxml::EnzymaticSearchConstraint.new,
52
+ @modifications = Ms::Ident::Pepxml::Modifications.new,
53
+ @parameters = Ms::Ident::Pepxml::Parameters.new,
54
+ ]
55
+ end
56
+
57
+ def initialize(hash={}, &block)
58
+ @search_id = DEFAULT_SEARCH_ID
59
+ merge!(hash, &block)
60
+ end
61
+
62
+ def to_xml(builder=nil)
63
+ # TODO: out_data and out_data_type are optional in later pepxml versions...
64
+ # should work that in...
65
+ attrs = [:base_name, :search_engine, :precursor_mass_type, :fragment_mass_type, :out_data_type, :out_data, :search_id]
66
+ hash = Hash[ attrs.map {|at| v=send(at) ; [at, v] if v }.compact ]
67
+ xmlb = builder || Nokogiri::XML::Builder.new
68
+ builder.search_summary(hash) do |xmlb|
69
+ search_database.to_xml(xmlb)
70
+ xmlb.enzymatic_search_constraint(enzymatic_search_constraint) if enzymatic_search_constraint
71
+ modifications.to_xml(xmlb) if modifications
72
+ parameters.to_xml(xmlb) if parameters
73
+ end
74
+ builder || xmlb.doc.root.to_xml
75
+ end
76
+
77
+ def self.from_pepxml_node(node)
78
+ self.new.from_pepxml_node(node)
79
+ end
80
+
81
+ def from_pepxml_node(node)
82
+ raise NotImplementedError, "not implemented just yet (just use the raw xml node)"
83
+ end
84
+
85
+ end
86
+
87
+
88
+
@@ -0,0 +1,83 @@
1
+ require 'nokogiri'
2
+ require 'ms/mass'
3
+ require 'merge'
4
+
5
+ module Ms ; end
6
+ module Ms::Ident ; end
7
+ class Ms::Ident::Pepxml ; end
8
+
9
+ # search_specification is a search constraint applied specifically to this query (a String)
10
+ class Ms::Ident::Pepxml::SpectrumQuery
11
+ include Merge
12
+ DEFAULT_MEMBERS = [:spectrum, :start_scan, :end_scan, :precursor_neutral_mass, :index, :assumed_charge, :retention_time_sec, :search_specification, :search_results, :pepxml_version]
13
+
14
+ class << self
15
+ attr_writer :members
16
+ def members
17
+ @members || DEFAULT_MEMBERS
18
+ end
19
+ end
20
+
21
+ members.each {|memb| attr_accessor memb }
22
+
23
+ Required = Set.new([:spectrum, :start_scan, :end_scan, :precursor_neutral_mass, :index, :assumed_charge])
24
+ Optional = [:retention_time_sec, :search_specification]
25
+
26
+ # takes either a hash or an ordered list of values to set
27
+ # yeilds an empty search_results array if given a block
28
+ def initialize(*args, &block)
29
+ @search_results = []
30
+ if args.first.is_a?(Hash)
31
+ merge!(args.first)
32
+ else
33
+ self.class.members.zip(args) do |k,v|
34
+ send("#{k}=", v)
35
+ end
36
+ end
37
+ block.call(@search_results) if block
38
+ end
39
+
40
+ def members
41
+ self.class.members
42
+ end
43
+
44
+ ############################################################
45
+ # FOR PEPXML:
46
+ ############################################################
47
+ def to_xml(builder=nil)
48
+ xmlb = builder || Nokogiri::XML::Builder.new
49
+ # all through search_specification
50
+ attrs = members[0, 8].map {|at| v=send(at) ; [at, v] if v }
51
+ attrs_hash = Hash[attrs]
52
+ case pepxml_version
53
+ when 18
54
+ attrs_hash.delete(:retention_time_sec)
55
+ end
56
+ xmlb.spectrum_query(attrs_hash) do |xmlb|
57
+ search_results.each do |search_result|
58
+ search_result.to_xml(xmlb)
59
+ end
60
+ end
61
+ builder || xmlb.doc.root.to_xml
62
+ end
63
+
64
+ def self.from_pepxml_node(node)
65
+ self.new.from_pepxml_node(node)
66
+ end
67
+
68
+ def from_pepxml_node(node)
69
+ @spectrum = node['spectrum']
70
+ @start_scan = node['start_scan'].to_i
71
+ @end_scan = node['end_scan'].to_i
72
+ @precursor_neutral_mass = node['precursor_neutral_mass'].to_f
73
+ @index = node['index'].to_i
74
+ @assumed_charge = node['assumed_charge'].to_i
75
+ self
76
+ end
77
+
78
+ def self.calc_precursor_neutral_mass(m_plus_h, deltamass, h_plus=Ms::Mass::H_PLUS)
79
+ m_plus_h - h_plus + deltamass
80
+ end
81
+ end
82
+
83
+
@@ -0,0 +1,61 @@
1
+ require 'nokogiri'
2
+ require 'ms/ident'
3
+ require 'ms/ident/pepxml/msms_pipeline_analysis'
4
+
5
+ module Ms ; module Ident ; end ; end
6
+
7
+ class Numeric
8
+ # returns a string with a + or - on the front
9
+ def to_plus_minus_string
10
+ if self >= 0
11
+ '+' << self.to_s
12
+ else
13
+ self.to_s
14
+ end
15
+ end
16
+ end
17
+
18
+ class Ms::Ident::Pepxml
19
+ XML_STYLESHEET_LOCATION = '/tools/bin/TPP/tpp/schema/pepXML_std.xsl'
20
+ DEFAULT_PEPXML_VERSION = MsmsPipelineAnalysis::PEPXML_VERSION
21
+
22
+ attr_accessor :msms_pipeline_analysis
23
+
24
+ def pepxml_version
25
+ msms_pipeline_analysis.pepxml_version
26
+ end
27
+
28
+ # returns an array of spectrum queries
29
+ def spectrum_queries
30
+ msms_pipeline_analysis.msms_run_summary.spectrum_queries
31
+ end
32
+
33
+ # yields a new Msms_Pipeline_Analysis object if given a block
34
+ def initialize(&block)
35
+ block.call(@msms_pipeline_analysis=MsmsPipelineAnalysis.new) if block
36
+ end
37
+
38
+ # takes an xml document object and sets it with the xml stylesheet
39
+ def add_stylesheet(doc, location)
40
+ xml_stylesheet = Nokogiri::XML::ProcessingInstruction.new(doc, "xml-stylesheet", %Q{type="text/xsl" href="#{location}"})
41
+ doc.root.add_previous_sibling xml_stylesheet
42
+ doc
43
+ end
44
+
45
+ # writes xml file named msms_pipeline_analysis.summary_xml into the msms_run_summary.base_name directory
46
+ def to_xml_file
47
+ to_xml(File.dirname(msms_pipeline_analysis.msms_run_summary.base_name) + '/' + msms_pipeline_analysis.summary_xml)
48
+ end
49
+
50
+ # if no outfile is given, an xml string is returned. summary_xml should
51
+ # have already been set and is not influenced by the outfile given here.
52
+ def to_xml(outfile=nil)
53
+ builder = Nokogiri::XML::Builder.new
54
+ msms_pipeline_analysis.to_xml(builder)
55
+ add_stylesheet(builder.doc, Ms::Ident::Pepxml::XML_STYLESHEET_LOCATION)
56
+ string = builder.doc.to_xml
57
+ outfile ? File.open(outfile,'w') {|out| out.print(string) } : string
58
+ end
59
+ end
60
+
61
+
data/lib/ms/ident.rb ADDED
@@ -0,0 +1,11 @@
1
+
2
+ module Ms
3
+ module Ident
4
+ # returns the filetype (if possible)
5
+ def self.filetype(file)
6
+ if file =~ /\.srf$/i
7
+ :srf
8
+ end
9
+ end
10
+ end
11
+ end