ms-ident 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/Gemfile +31 -0
- data/Gemfile.lock +32 -0
- data/LICENSE +61 -0
- data/README.rdoc +97 -0
- data/Rakefile +54 -0
- data/VERSION +1 -0
- data/lib/merge.rb +7 -0
- data/lib/ms/ident/pepxml/modifications/sequest.rb +237 -0
- data/lib/ms/ident/pepxml/modifications.rb +94 -0
- data/lib/ms/ident/pepxml/msms_pipeline_analysis.rb +70 -0
- data/lib/ms/ident/pepxml/msms_run_summary.rb +81 -0
- data/lib/ms/ident/pepxml/parameters.rb +14 -0
- data/lib/ms/ident/pepxml/pep_summary.rb +104 -0
- data/lib/ms/ident/pepxml/prot_summary.rb +484 -0
- data/lib/ms/ident/pepxml/sample_enzyme.rb +166 -0
- data/lib/ms/ident/pepxml/search_database.rb +42 -0
- data/lib/ms/ident/pepxml/search_hit/modification_info.rb +82 -0
- data/lib/ms/ident/pepxml/search_hit.rb +141 -0
- data/lib/ms/ident/pepxml/search_result.rb +28 -0
- data/lib/ms/ident/pepxml/search_summary.rb +88 -0
- data/lib/ms/ident/pepxml/spectrum_query.rb +83 -0
- data/lib/ms/ident/pepxml.rb +61 -0
- data/lib/ms/ident.rb +11 -0
- data/schema/pepXML_v115.xsd +1458 -0
- data/schema/pepXML_v19.xsd +1337 -0
- data/spec/ms/ident/pepxml/sample_enzyme_spec.rb +181 -0
- data/spec/ms/ident/pepxml_spec.rb +436 -0
- data/spec/spec_helper.rb +40 -0
- metadata +194 -0
@@ -0,0 +1,70 @@
|
|
1
|
+
require 'merge'
|
2
|
+
|
3
|
+
require 'ms/ident/pepxml/msms_run_summary'
|
4
|
+
|
5
|
+
module Ms ; end
|
6
|
+
module Ms::Ident ; end
|
7
|
+
class Ms::Ident::Pepxml; end
|
8
|
+
|
9
|
+
class Ms::Ident::Pepxml::MsmsPipelineAnalysis
|
10
|
+
include Merge
|
11
|
+
XMLNS = "http://regis-web.systemsbiology.net/pepXML"
|
12
|
+
XMLNS_XSI = "http://www.w3.org/2001/XMLSchema-instance"
|
13
|
+
# (this doesn't actually exist), also, the space is supposed to be there
|
14
|
+
XSI_SCHEMA_LOCATION_BASE = "http://regis-web.systemsbiology.net/pepXML /tools/bin/TPP/tpp/schema/pepXML_v"
|
15
|
+
# the only additions concerning a writer are from v18 are to the 'spectrum': retention_time_sec and activationMethodType
|
16
|
+
PEPXML_VERSION = 115
|
17
|
+
|
18
|
+
#include SpecIDXML
|
19
|
+
# Version 1.2.3
|
20
|
+
#attr_writer :date
|
21
|
+
#attr_writer :xmlns, :xmlns_xsi, :xsi_schemaLocation
|
22
|
+
#attr_accessor :summary_xml
|
23
|
+
|
24
|
+
attr_accessor :xmlns
|
25
|
+
attr_accessor :xmlns_xsi
|
26
|
+
attr_accessor :xsi_schema_location
|
27
|
+
# an Integer
|
28
|
+
attr_accessor :pepxml_version
|
29
|
+
# self referential path to the outputfile
|
30
|
+
attr_accessor :summary_xml
|
31
|
+
attr_accessor :msms_run_summary
|
32
|
+
attr_writer :date
|
33
|
+
|
34
|
+
def block_arg
|
35
|
+
@msms_run_summary = Ms::Ident::Pepxml::MsmsRunSummary.new
|
36
|
+
end
|
37
|
+
|
38
|
+
# if block given, yields a new msms_run_summary to return value of block
|
39
|
+
def initialize(hash={}, &block)
|
40
|
+
@xmlns = XMLNS
|
41
|
+
@xmlns_xsi = XMLNS_XSI
|
42
|
+
@xsi_schema_location = xsi_schema_location
|
43
|
+
@pepxml_version = PEPXML_VERSION
|
44
|
+
merge!(hash, &block)
|
45
|
+
end
|
46
|
+
|
47
|
+
# returns the location based on the pepxml version number
|
48
|
+
def xsi_schema_location
|
49
|
+
XSI_SCHEMA_LOCATION_BASE + pepxml_version.to_s + '.xsd'
|
50
|
+
end
|
51
|
+
|
52
|
+
# if no date string given, then it will set to Time.now
|
53
|
+
def date
|
54
|
+
return @date if @date
|
55
|
+
tarr = Time.now.to_a
|
56
|
+
tarr[3..5].reverse.join('-') + "T#{tarr[0..2].reverse.join(':')}"
|
57
|
+
end
|
58
|
+
|
59
|
+
# uses the filename as summary_xml (if it is nil) attribute and builds a complete, valid xml document,
|
60
|
+
# writing it to the filename
|
61
|
+
def to_xml(builder)
|
62
|
+
xmlb = builder || Nokogiri::XML::Builder.new
|
63
|
+
xmlb.msms_pipeline_analysis(:date => date, :xmlns => xmlns, 'xsi:schemaLocation'.to_sym => xsi_schema_location, :summary_xml => summary_xml) do |xmlb|
|
64
|
+
msms_run_summary.to_xml(xmlb) if msms_run_summary
|
65
|
+
end
|
66
|
+
builder || xmlb.doc.root.to_xml
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
|
@@ -0,0 +1,81 @@
|
|
1
|
+
require 'merge'
|
2
|
+
require 'nokogiri'
|
3
|
+
|
4
|
+
require 'ms/ident/pepxml/sample_enzyme'
|
5
|
+
require 'ms/ident/pepxml/search_summary'
|
6
|
+
|
7
|
+
module Ms ; end
|
8
|
+
module Ms::Ident ; end
|
9
|
+
class Ms::Ident::Pepxml; end
|
10
|
+
|
11
|
+
class Ms::Ident::Pepxml::MsmsRunSummary
|
12
|
+
include Merge
|
13
|
+
# The name of the pep xml file without any extension
|
14
|
+
attr_accessor :base_name
|
15
|
+
# The name of the mass spec manufacturer
|
16
|
+
attr_accessor :ms_manufacturer
|
17
|
+
attr_accessor :ms_model
|
18
|
+
attr_accessor :ms_mass_analyzer
|
19
|
+
attr_accessor :ms_detector
|
20
|
+
attr_accessor :raw_data_type
|
21
|
+
attr_accessor :raw_data
|
22
|
+
attr_accessor :ms_ionization
|
23
|
+
attr_accessor :pepxml_version
|
24
|
+
|
25
|
+
# A SampleEnzyme object (responds to: name, cut, no_cut, sense)
|
26
|
+
attr_accessor :sample_enzyme
|
27
|
+
# A SearchSummary object
|
28
|
+
attr_accessor :search_summary
|
29
|
+
# An array of spectrum_queries
|
30
|
+
attr_accessor :spectrum_queries
|
31
|
+
|
32
|
+
def block_arg
|
33
|
+
[@sample_enzyme = Ms::Ident::Pepxml::SampleEnzyme.new,
|
34
|
+
@search_summary = Ms::Ident::Pepxml::SearchSummary.new,
|
35
|
+
@spectrum_queries ]
|
36
|
+
end
|
37
|
+
|
38
|
+
# takes a hash of name, value pairs
|
39
|
+
# if block given, yields a SampleEnzyme object, a SearchSummary and an array
|
40
|
+
# for SpectrumQueries
|
41
|
+
def initialize(hash={}, &block)
|
42
|
+
@spectrum_queries = []
|
43
|
+
merge!(hash, &block)
|
44
|
+
block.call(block_arg) if block
|
45
|
+
end
|
46
|
+
|
47
|
+
# optionally takes an xml builder object and returns the builder, or the xml
|
48
|
+
# string if no builder was given
|
49
|
+
# sets the index attribute of each spectrum query if it is not already set
|
50
|
+
def to_xml(builder=nil)
|
51
|
+
xmlb = builder || Nokogiri::XML::Builder.new
|
52
|
+
hash = {:base_name => base_name, :msManufacturer => ms_manufacturer, :msModel => ms_model, :msIonization => ms_ionization, :msMassAnalyzer => ms_mass_analyzer, :msDetector => ms_detector, :raw_data_type => raw_data_type, :raw_data => raw_data}
|
53
|
+
hash.each {|k,v| hash.delete(k) unless v }
|
54
|
+
xmlb.msms_run_summary(hash) do |xmlb|
|
55
|
+
sample_enzyme.to_xml(xmlb) if sample_enzyme
|
56
|
+
search_summary.to_xml(xmlb) if search_summary
|
57
|
+
spectrum_queries.each_with_index do |sq,i|
|
58
|
+
sq.index = i+1 unless sq.index
|
59
|
+
sq.to_xml(xmlb)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
builder || xmlb.doc.root.to_xml
|
63
|
+
end
|
64
|
+
|
65
|
+
def self.from_pepxml_node(node)
|
66
|
+
self.new.from_pepxml_node(node)
|
67
|
+
end
|
68
|
+
|
69
|
+
# peps correspond to search_results
|
70
|
+
def from_pepxml_node(node)
|
71
|
+
@base_name = node['base_name']
|
72
|
+
@ms_manufacturer = node['msManufacturer']
|
73
|
+
@ms_model = node['msModel']
|
74
|
+
@ms_manufacturer = node['msIonization']
|
75
|
+
@ms_mass_analyzer = node['msMassAnalyzer']
|
76
|
+
@ms_detector = node['msDetector']
|
77
|
+
@raw_data_type = node['raw_data_type']
|
78
|
+
@raw_data = node['raw_data']
|
79
|
+
self
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,104 @@
|
|
1
|
+
|
2
|
+
require 'arrayclass'
|
3
|
+
require 'spec_id/sequest/pepxml'
|
4
|
+
require 'spec_id/parser/proph'
|
5
|
+
|
6
|
+
module Sequest ; end
|
7
|
+
class Sequest::PepXML ; end
|
8
|
+
class Sequest::PepXML::MSMSRunSummary ; end
|
9
|
+
class Sequest::PepXML::SearchHit ; end
|
10
|
+
|
11
|
+
module SpecID ; end
|
12
|
+
module SpecID::Prot ; end
|
13
|
+
module SpecID::Pep ; end
|
14
|
+
|
15
|
+
|
16
|
+
|
17
|
+
module Proph
|
18
|
+
|
19
|
+
class PepSummary
|
20
|
+
include SpecID
|
21
|
+
|
22
|
+
Filetype_and_version_re_new = /version="PeptideProphet v([\d\.]+) /
|
23
|
+
|
24
|
+
# inherits prots and peps
|
25
|
+
|
26
|
+
# the protein groups
|
27
|
+
# currently these are just xml nodes returned!
|
28
|
+
attr_accessor :peptideprophet_summary
|
29
|
+
attr_accessor :msms_run_summaries
|
30
|
+
attr_accessor :version
|
31
|
+
|
32
|
+
def hi_prob_best ; true end
|
33
|
+
|
34
|
+
def get_version(file)
|
35
|
+
answer = nil
|
36
|
+
File.open(file) do |fh|
|
37
|
+
8.times do
|
38
|
+
line = fh.gets
|
39
|
+
answer =
|
40
|
+
if line =~ Filetype_and_version_re_new
|
41
|
+
$1.dup
|
42
|
+
end
|
43
|
+
break if answer
|
44
|
+
end
|
45
|
+
end
|
46
|
+
raise(ArgumentError, "couldn't detect version in #{file}") unless answer
|
47
|
+
answer
|
48
|
+
end
|
49
|
+
|
50
|
+
def search_hit_class
|
51
|
+
PepSummary::Pep
|
52
|
+
end
|
53
|
+
|
54
|
+
def initialize(file=nil)
|
55
|
+
if file
|
56
|
+
@version = get_version(file)
|
57
|
+
spec_id = SpecID::Parser::PepProph.new(:spec_id).parse(file, :spec_id => self)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
# this is a SpecID::Pep (by interface: not including stuff yet)
|
63
|
+
class PepSummary::Pep < Sequest::PepXML::SearchHit
|
64
|
+
# aaseq is defined in SearchHit
|
65
|
+
|
66
|
+
%w(probability fval ntt nmc massd prots).each do |guy|
|
67
|
+
self.add_member(guy)
|
68
|
+
end
|
69
|
+
|
70
|
+
# returns self
|
71
|
+
def from_pepxml_node(node)
|
72
|
+
super(node)
|
73
|
+
|
74
|
+
an_res = node.find_first('child::analysis_result')
|
75
|
+
pp_n = an_res.find_first('child::peptideprophet_result')
|
76
|
+
self.probability = pp_n['probability'].to_f
|
77
|
+
pp_n.find('descendant::parameter').each do |par_n|
|
78
|
+
case par_n['name']
|
79
|
+
when 'fval'
|
80
|
+
self.fval = par_n['value'].to_f
|
81
|
+
when 'ntt'
|
82
|
+
self.ntt = par_n['value'].to_i
|
83
|
+
when 'nmc'
|
84
|
+
self.nmc = par_n['value'].to_i
|
85
|
+
when 'massd'
|
86
|
+
self.massd = par_n['value'].to_f
|
87
|
+
end
|
88
|
+
end
|
89
|
+
self
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
::Proph::PepSummary::Prot = Arrayclass.new(%w(name protein_descr peps))
|
94
|
+
|
95
|
+
class PepSummary::Prot
|
96
|
+
def first_entry ; self[0] end ## name
|
97
|
+
def reference ; self[0] + ' ' + self[1] end
|
98
|
+
end
|
99
|
+
|
100
|
+
end
|
101
|
+
|
102
|
+
|
103
|
+
|
104
|
+
|