ms-ident 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/Gemfile +31 -0
- data/Gemfile.lock +32 -0
- data/LICENSE +61 -0
- data/README.rdoc +97 -0
- data/Rakefile +54 -0
- data/VERSION +1 -0
- data/lib/merge.rb +7 -0
- data/lib/ms/ident/pepxml/modifications/sequest.rb +237 -0
- data/lib/ms/ident/pepxml/modifications.rb +94 -0
- data/lib/ms/ident/pepxml/msms_pipeline_analysis.rb +70 -0
- data/lib/ms/ident/pepxml/msms_run_summary.rb +81 -0
- data/lib/ms/ident/pepxml/parameters.rb +14 -0
- data/lib/ms/ident/pepxml/pep_summary.rb +104 -0
- data/lib/ms/ident/pepxml/prot_summary.rb +484 -0
- data/lib/ms/ident/pepxml/sample_enzyme.rb +166 -0
- data/lib/ms/ident/pepxml/search_database.rb +42 -0
- data/lib/ms/ident/pepxml/search_hit/modification_info.rb +82 -0
- data/lib/ms/ident/pepxml/search_hit.rb +141 -0
- data/lib/ms/ident/pepxml/search_result.rb +28 -0
- data/lib/ms/ident/pepxml/search_summary.rb +88 -0
- data/lib/ms/ident/pepxml/spectrum_query.rb +83 -0
- data/lib/ms/ident/pepxml.rb +61 -0
- data/lib/ms/ident.rb +11 -0
- data/schema/pepXML_v115.xsd +1458 -0
- data/schema/pepXML_v19.xsd +1337 -0
- data/spec/ms/ident/pepxml/sample_enzyme_spec.rb +181 -0
- data/spec/ms/ident/pepxml_spec.rb +436 -0
- data/spec/spec_helper.rb +40 -0
- metadata +194 -0
@@ -0,0 +1,70 @@
|
|
1
|
+
require 'merge'
|
2
|
+
|
3
|
+
require 'ms/ident/pepxml/msms_run_summary'
|
4
|
+
|
5
|
+
module Ms ; end
|
6
|
+
module Ms::Ident ; end
|
7
|
+
class Ms::Ident::Pepxml; end
|
8
|
+
|
9
|
+
class Ms::Ident::Pepxml::MsmsPipelineAnalysis
|
10
|
+
include Merge
|
11
|
+
XMLNS = "http://regis-web.systemsbiology.net/pepXML"
|
12
|
+
XMLNS_XSI = "http://www.w3.org/2001/XMLSchema-instance"
|
13
|
+
# (this doesn't actually exist), also, the space is supposed to be there
|
14
|
+
XSI_SCHEMA_LOCATION_BASE = "http://regis-web.systemsbiology.net/pepXML /tools/bin/TPP/tpp/schema/pepXML_v"
|
15
|
+
# the only additions concerning a writer are from v18 are to the 'spectrum': retention_time_sec and activationMethodType
|
16
|
+
PEPXML_VERSION = 115
|
17
|
+
|
18
|
+
#include SpecIDXML
|
19
|
+
# Version 1.2.3
|
20
|
+
#attr_writer :date
|
21
|
+
#attr_writer :xmlns, :xmlns_xsi, :xsi_schemaLocation
|
22
|
+
#attr_accessor :summary_xml
|
23
|
+
|
24
|
+
attr_accessor :xmlns
|
25
|
+
attr_accessor :xmlns_xsi
|
26
|
+
attr_accessor :xsi_schema_location
|
27
|
+
# an Integer
|
28
|
+
attr_accessor :pepxml_version
|
29
|
+
# self referential path to the outputfile
|
30
|
+
attr_accessor :summary_xml
|
31
|
+
attr_accessor :msms_run_summary
|
32
|
+
attr_writer :date
|
33
|
+
|
34
|
+
def block_arg
|
35
|
+
@msms_run_summary = Ms::Ident::Pepxml::MsmsRunSummary.new
|
36
|
+
end
|
37
|
+
|
38
|
+
# if block given, yields a new msms_run_summary to return value of block
|
39
|
+
def initialize(hash={}, &block)
|
40
|
+
@xmlns = XMLNS
|
41
|
+
@xmlns_xsi = XMLNS_XSI
|
42
|
+
@xsi_schema_location = xsi_schema_location
|
43
|
+
@pepxml_version = PEPXML_VERSION
|
44
|
+
merge!(hash, &block)
|
45
|
+
end
|
46
|
+
|
47
|
+
# returns the location based on the pepxml version number
|
48
|
+
def xsi_schema_location
|
49
|
+
XSI_SCHEMA_LOCATION_BASE + pepxml_version.to_s + '.xsd'
|
50
|
+
end
|
51
|
+
|
52
|
+
# if no date string given, then it will set to Time.now
|
53
|
+
def date
|
54
|
+
return @date if @date
|
55
|
+
tarr = Time.now.to_a
|
56
|
+
tarr[3..5].reverse.join('-') + "T#{tarr[0..2].reverse.join(':')}"
|
57
|
+
end
|
58
|
+
|
59
|
+
# uses the filename as summary_xml (if it is nil) attribute and builds a complete, valid xml document,
|
60
|
+
# writing it to the filename
|
61
|
+
def to_xml(builder)
|
62
|
+
xmlb = builder || Nokogiri::XML::Builder.new
|
63
|
+
xmlb.msms_pipeline_analysis(:date => date, :xmlns => xmlns, 'xsi:schemaLocation'.to_sym => xsi_schema_location, :summary_xml => summary_xml) do |xmlb|
|
64
|
+
msms_run_summary.to_xml(xmlb) if msms_run_summary
|
65
|
+
end
|
66
|
+
builder || xmlb.doc.root.to_xml
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
|
@@ -0,0 +1,81 @@
|
|
1
|
+
require 'merge'
|
2
|
+
require 'nokogiri'
|
3
|
+
|
4
|
+
require 'ms/ident/pepxml/sample_enzyme'
|
5
|
+
require 'ms/ident/pepxml/search_summary'
|
6
|
+
|
7
|
+
module Ms ; end
|
8
|
+
module Ms::Ident ; end
|
9
|
+
class Ms::Ident::Pepxml; end
|
10
|
+
|
11
|
+
class Ms::Ident::Pepxml::MsmsRunSummary
|
12
|
+
include Merge
|
13
|
+
# The name of the pep xml file without any extension
|
14
|
+
attr_accessor :base_name
|
15
|
+
# The name of the mass spec manufacturer
|
16
|
+
attr_accessor :ms_manufacturer
|
17
|
+
attr_accessor :ms_model
|
18
|
+
attr_accessor :ms_mass_analyzer
|
19
|
+
attr_accessor :ms_detector
|
20
|
+
attr_accessor :raw_data_type
|
21
|
+
attr_accessor :raw_data
|
22
|
+
attr_accessor :ms_ionization
|
23
|
+
attr_accessor :pepxml_version
|
24
|
+
|
25
|
+
# A SampleEnzyme object (responds to: name, cut, no_cut, sense)
|
26
|
+
attr_accessor :sample_enzyme
|
27
|
+
# A SearchSummary object
|
28
|
+
attr_accessor :search_summary
|
29
|
+
# An array of spectrum_queries
|
30
|
+
attr_accessor :spectrum_queries
|
31
|
+
|
32
|
+
def block_arg
|
33
|
+
[@sample_enzyme = Ms::Ident::Pepxml::SampleEnzyme.new,
|
34
|
+
@search_summary = Ms::Ident::Pepxml::SearchSummary.new,
|
35
|
+
@spectrum_queries ]
|
36
|
+
end
|
37
|
+
|
38
|
+
# takes a hash of name, value pairs
|
39
|
+
# if block given, yields a SampleEnzyme object, a SearchSummary and an array
|
40
|
+
# for SpectrumQueries
|
41
|
+
def initialize(hash={}, &block)
|
42
|
+
@spectrum_queries = []
|
43
|
+
merge!(hash, &block)
|
44
|
+
block.call(block_arg) if block
|
45
|
+
end
|
46
|
+
|
47
|
+
# optionally takes an xml builder object and returns the builder, or the xml
|
48
|
+
# string if no builder was given
|
49
|
+
# sets the index attribute of each spectrum query if it is not already set
|
50
|
+
def to_xml(builder=nil)
|
51
|
+
xmlb = builder || Nokogiri::XML::Builder.new
|
52
|
+
hash = {:base_name => base_name, :msManufacturer => ms_manufacturer, :msModel => ms_model, :msIonization => ms_ionization, :msMassAnalyzer => ms_mass_analyzer, :msDetector => ms_detector, :raw_data_type => raw_data_type, :raw_data => raw_data}
|
53
|
+
hash.each {|k,v| hash.delete(k) unless v }
|
54
|
+
xmlb.msms_run_summary(hash) do |xmlb|
|
55
|
+
sample_enzyme.to_xml(xmlb) if sample_enzyme
|
56
|
+
search_summary.to_xml(xmlb) if search_summary
|
57
|
+
spectrum_queries.each_with_index do |sq,i|
|
58
|
+
sq.index = i+1 unless sq.index
|
59
|
+
sq.to_xml(xmlb)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
builder || xmlb.doc.root.to_xml
|
63
|
+
end
|
64
|
+
|
65
|
+
def self.from_pepxml_node(node)
|
66
|
+
self.new.from_pepxml_node(node)
|
67
|
+
end
|
68
|
+
|
69
|
+
# peps correspond to search_results
|
70
|
+
def from_pepxml_node(node)
|
71
|
+
@base_name = node['base_name']
|
72
|
+
@ms_manufacturer = node['msManufacturer']
|
73
|
+
@ms_model = node['msModel']
|
74
|
+
@ms_manufacturer = node['msIonization']
|
75
|
+
@ms_mass_analyzer = node['msMassAnalyzer']
|
76
|
+
@ms_detector = node['msDetector']
|
77
|
+
@raw_data_type = node['raw_data_type']
|
78
|
+
@raw_data = node['raw_data']
|
79
|
+
self
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,104 @@
|
|
1
|
+
|
2
|
+
require 'arrayclass'
|
3
|
+
require 'spec_id/sequest/pepxml'
|
4
|
+
require 'spec_id/parser/proph'
|
5
|
+
|
6
|
+
module Sequest ; end
|
7
|
+
class Sequest::PepXML ; end
|
8
|
+
class Sequest::PepXML::MSMSRunSummary ; end
|
9
|
+
class Sequest::PepXML::SearchHit ; end
|
10
|
+
|
11
|
+
module SpecID ; end
|
12
|
+
module SpecID::Prot ; end
|
13
|
+
module SpecID::Pep ; end
|
14
|
+
|
15
|
+
|
16
|
+
|
17
|
+
module Proph
|
18
|
+
|
19
|
+
class PepSummary
|
20
|
+
include SpecID
|
21
|
+
|
22
|
+
Filetype_and_version_re_new = /version="PeptideProphet v([\d\.]+) /
|
23
|
+
|
24
|
+
# inherits prots and peps
|
25
|
+
|
26
|
+
# the protein groups
|
27
|
+
# currently these are just xml nodes returned!
|
28
|
+
attr_accessor :peptideprophet_summary
|
29
|
+
attr_accessor :msms_run_summaries
|
30
|
+
attr_accessor :version
|
31
|
+
|
32
|
+
def hi_prob_best ; true end
|
33
|
+
|
34
|
+
def get_version(file)
|
35
|
+
answer = nil
|
36
|
+
File.open(file) do |fh|
|
37
|
+
8.times do
|
38
|
+
line = fh.gets
|
39
|
+
answer =
|
40
|
+
if line =~ Filetype_and_version_re_new
|
41
|
+
$1.dup
|
42
|
+
end
|
43
|
+
break if answer
|
44
|
+
end
|
45
|
+
end
|
46
|
+
raise(ArgumentError, "couldn't detect version in #{file}") unless answer
|
47
|
+
answer
|
48
|
+
end
|
49
|
+
|
50
|
+
def search_hit_class
|
51
|
+
PepSummary::Pep
|
52
|
+
end
|
53
|
+
|
54
|
+
def initialize(file=nil)
|
55
|
+
if file
|
56
|
+
@version = get_version(file)
|
57
|
+
spec_id = SpecID::Parser::PepProph.new(:spec_id).parse(file, :spec_id => self)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
# this is a SpecID::Pep (by interface: not including stuff yet)
|
63
|
+
class PepSummary::Pep < Sequest::PepXML::SearchHit
|
64
|
+
# aaseq is defined in SearchHit
|
65
|
+
|
66
|
+
%w(probability fval ntt nmc massd prots).each do |guy|
|
67
|
+
self.add_member(guy)
|
68
|
+
end
|
69
|
+
|
70
|
+
# returns self
|
71
|
+
def from_pepxml_node(node)
|
72
|
+
super(node)
|
73
|
+
|
74
|
+
an_res = node.find_first('child::analysis_result')
|
75
|
+
pp_n = an_res.find_first('child::peptideprophet_result')
|
76
|
+
self.probability = pp_n['probability'].to_f
|
77
|
+
pp_n.find('descendant::parameter').each do |par_n|
|
78
|
+
case par_n['name']
|
79
|
+
when 'fval'
|
80
|
+
self.fval = par_n['value'].to_f
|
81
|
+
when 'ntt'
|
82
|
+
self.ntt = par_n['value'].to_i
|
83
|
+
when 'nmc'
|
84
|
+
self.nmc = par_n['value'].to_i
|
85
|
+
when 'massd'
|
86
|
+
self.massd = par_n['value'].to_f
|
87
|
+
end
|
88
|
+
end
|
89
|
+
self
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
::Proph::PepSummary::Prot = Arrayclass.new(%w(name protein_descr peps))
|
94
|
+
|
95
|
+
class PepSummary::Prot
|
96
|
+
def first_entry ; self[0] end ## name
|
97
|
+
def reference ; self[0] + ' ' + self[1] end
|
98
|
+
end
|
99
|
+
|
100
|
+
end
|
101
|
+
|
102
|
+
|
103
|
+
|
104
|
+
|