ms-ident 0.0.23 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/lib/ms/ident/peptide/db.rb +1 -0
- data/lib/ms/ident/peptide_hit.rb +5 -0
- data/lib/ms/ident/peptide_hit/qvalue.rb +7 -6
- data/lib/ms/ident/pepxml.rb +3 -3
- data/lib/ms/ident/pepxml/search_hit.rb +1 -1
- data/lib/ms/ident/protein.rb +18 -5
- data/lib/ms/ident/protein_group.rb +9 -1
- data/lib/ms/ident/protein_hit.rb +11 -7
- data/lib/ms/ident/search.rb +9 -4
- metadata +4 -26
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.1.1
|
data/lib/ms/ident/peptide/db.rb
CHANGED
data/lib/ms/ident/peptide_hit.rb
CHANGED
@@ -2,7 +2,12 @@ module Ms ; end
|
|
2
2
|
module Ms::Ident ; end
|
3
3
|
|
4
4
|
class Ms::Ident::PeptideHit
|
5
|
+
attr_accessor :id
|
6
|
+
attr_accessor :search
|
7
|
+
attr_accessor :missed_cleavages
|
5
8
|
attr_accessor :aaseq
|
6
9
|
attr_accessor :charge
|
7
10
|
attr_accessor :proteins
|
11
|
+
attr_accessor :qvalue
|
8
12
|
end
|
13
|
+
|
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'ms/ident/search'
|
1
2
|
require 'ms/ident/peptide_hit'
|
2
3
|
|
3
4
|
module Ms ; end
|
@@ -5,10 +6,9 @@ module Ms::Ident ; end
|
|
5
6
|
|
6
7
|
class Ms::Ident::PeptideHit
|
7
8
|
module Qvalue
|
8
|
-
attr_accessor :qvalue
|
9
9
|
FILE_EXTENSION = '.phq.tsv'
|
10
10
|
FILE_DELIMITER = "\t"
|
11
|
-
HEADER = %w(aaseq charge qvalue)
|
11
|
+
HEADER = %w(run_id id aaseq charge qvalue)
|
12
12
|
|
13
13
|
class << self
|
14
14
|
|
@@ -24,7 +24,7 @@ class Ms::Ident::PeptideHit
|
|
24
24
|
File.open(filename,'w') do |out|
|
25
25
|
out.puts HEADER.join(FILE_DELIMITER)
|
26
26
|
hits.zip(qvalues) do |hit, qvalue|
|
27
|
-
out.puts [hit.aaseq, hit.charge, qvalue || hit.qvalue].join(FILE_DELIMITER)
|
27
|
+
out.puts [hit.search.id, hit.id, hit.aaseq, hit.charge, qvalue || hit.qvalue].join(FILE_DELIMITER)
|
28
28
|
end
|
29
29
|
end
|
30
30
|
filename
|
@@ -32,15 +32,17 @@ class Ms::Ident::PeptideHit
|
|
32
32
|
|
33
33
|
# returns an array of PeptideHit objects from a phq.tsv
|
34
34
|
def from_file(filename)
|
35
|
+
searches = Hash.new {|h,id| h[id] = Ms::Ident::Search.new(id) }
|
35
36
|
peptide_hits = []
|
36
37
|
File.open(filename) do |io|
|
37
38
|
header = io.readline.chomp.split(FILE_DELIMITER)
|
38
39
|
raise "bad headers" unless header == HEADER
|
39
40
|
io.each do |line|
|
40
41
|
line.chomp!
|
41
|
-
(aaseq, charge, qvalue) = line.split(FILE_DELIMITER)
|
42
|
+
(run_id, id, aaseq, charge, qvalue) = line.split(FILE_DELIMITER)
|
42
43
|
ph = Ms::Ident::PeptideHit.new
|
43
|
-
ph.
|
44
|
+
ph.search = searches[run_id]
|
45
|
+
ph.id = id; ph.aaseq = aaseq ; ph.charge = charge.to_i ; ph.qvalue = qvalue.to_f
|
44
46
|
peptide_hits << ph
|
45
47
|
end
|
46
48
|
end
|
@@ -51,5 +53,4 @@ class Ms::Ident::PeptideHit
|
|
51
53
|
|
52
54
|
end
|
53
55
|
end # Qvalue
|
54
|
-
include Qvalue
|
55
56
|
end # Peptide Hit
|
data/lib/ms/ident/pepxml.rb
CHANGED
@@ -27,12 +27,12 @@ class Ms::Ident::Pepxml
|
|
27
27
|
# returns an array of Ms::Ident::Pepxml::SearchHit::Simple structs
|
28
28
|
def self.simple_search_hits(file)
|
29
29
|
hit_values = File.open(file) do |io|
|
30
|
-
doc = Nokogiri::XML.parse(io, nil, nil, Nokogiri::XML::ParseOptions::DEFAULT_XML | Nokogiri::XML::ParseOptions::NOBLANKS)
|
30
|
+
doc = Nokogiri::XML.parse(io, nil, nil, Nokogiri::XML::ParseOptions::DEFAULT_XML | Nokogiri::XML::ParseOptions::NOBLANKS | Nokogiri::XML::ParseOptions::STRICT)
|
31
31
|
# we can work with namespaces, or just remove them ...
|
32
32
|
doc.remove_namespaces!
|
33
33
|
root = doc.root
|
34
34
|
search_hits = root.xpath('//search_hit')
|
35
|
-
search_hits.map do |search_hit|
|
35
|
+
search_hits.each_with_index.map do |search_hit,i|
|
36
36
|
aaseq = search_hit['peptide']
|
37
37
|
charge = search_hit.parent.parent['assumed_charge'].to_i
|
38
38
|
search_score_nodes = search_hit.children.select {|node| node.name == 'search_score' }
|
@@ -40,7 +40,7 @@ class Ms::Ident::Pepxml
|
|
40
40
|
search_score_nodes.each do |node|
|
41
41
|
search_scores[node['name'].to_sym] = node['value'].to_f
|
42
42
|
end
|
43
|
-
Ms::Ident::Pepxml::SearchHit::Simple.new(aaseq, charge, search_scores)
|
43
|
+
Ms::Ident::Pepxml::SearchHit::Simple.new("hit_#{i}", Ms::Ident::Search.new(file.chomp(File.extname(file))), aaseq, charge, search_scores)
|
44
44
|
end
|
45
45
|
end
|
46
46
|
end
|
data/lib/ms/ident/protein.rb
CHANGED
@@ -2,11 +2,24 @@ module Ms ; end
|
|
2
2
|
module Ms::Ident ; end
|
3
3
|
|
4
4
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
5
|
+
class Ms::Ident::Protein
|
6
|
+
|
7
|
+
attr_accessor :id
|
8
|
+
attr_accessor :seq
|
9
|
+
alias_method :sequence, :seq
|
10
|
+
alias_method :sequence=, :seq=
|
11
|
+
|
12
|
+
attr_accessor :description
|
13
|
+
|
14
|
+
def initialize(id=nil, seq=nil)
|
15
|
+
(@id, @seq) = id, seq
|
10
16
|
end
|
17
|
+
|
18
|
+
# DEPRECATING THIS GUY
|
19
|
+
## gives the information up until the first space or carriage return.
|
20
|
+
## Assumes the protein can respond_to? :reference
|
21
|
+
#def first_entry
|
22
|
+
# reference.split(/[\s\r]/)[0]
|
23
|
+
#end
|
11
24
|
end
|
12
25
|
|
@@ -28,7 +28,12 @@ module Ms
|
|
28
28
|
# accuracy.
|
29
29
|
#
|
30
30
|
# returns an array of ProteinGroup objects, each set with :peptide_hits
|
31
|
-
|
31
|
+
#
|
32
|
+
# If update_peptide_hits is true, then each peptide_hit is linked to the array
|
33
|
+
# of protein_groups it is associated with using :protein_groups. A
|
34
|
+
# symbol can also be passed in, and that method will be called instead.
|
35
|
+
def self.peptide_hits_to_protein_groups(peptide_hits, update_peptide_hits=false, &sort_by)
|
36
|
+
update_peptide_hits = 'protein_groups='.to_sym if (update_peptide_hits==true)
|
32
37
|
sort_by ||= PRIORITIZE_PROTEINS
|
33
38
|
# note to self: I wrote this in 2011, so I think I know what I'm doing now
|
34
39
|
protein_to_peptides = Hash.new {|h,k| h[k] = Set.new }
|
@@ -64,6 +69,9 @@ module Ms
|
|
64
69
|
group.peptide_hits = peptide_set if has_an_unaccounted_peptide
|
65
70
|
has_an_unaccounted_peptide
|
66
71
|
end
|
72
|
+
if update_peptide_hits
|
73
|
+
greedy_first.each {|pg, pephits| pephits.each {|hit| hit.send(update_peptide_hits, pg) } }
|
74
|
+
end
|
67
75
|
greedy_first.map(&:first)
|
68
76
|
end
|
69
77
|
|
data/lib/ms/ident/protein_hit.rb
CHANGED
@@ -1,17 +1,21 @@
|
|
1
|
+
require 'andand'
|
2
|
+
|
1
3
|
|
2
4
|
module Ms ; end
|
3
5
|
module Ms::Ident ; end
|
4
6
|
|
5
|
-
class Ms::Ident::ProteinHit
|
6
|
-
attr_accessor :id
|
7
|
-
attr_accessor :seq
|
8
|
-
alias_method :sequence, :seq
|
9
|
-
alias_method :sequence=, :seq=
|
7
|
+
class Ms::Ident::ProteinHit < Ms::Ident::Protein
|
10
8
|
attr_accessor :peptide_hits
|
11
9
|
|
12
|
-
def initialize(id=nil)
|
13
|
-
@peptide_hits =
|
10
|
+
def initialize(id=nil, peptide_hits=[])
|
11
|
+
@peptide_hits = peptide_hits
|
14
12
|
@id = id
|
15
13
|
end
|
14
|
+
|
15
|
+
# if the GN=([^\s]+) regexp is found in the description, returns the first
|
16
|
+
# match, or nil if not found
|
17
|
+
def gene_id
|
18
|
+
description.andand.match(/ GN=(\w+) ?/)[1]
|
19
|
+
end
|
16
20
|
end
|
17
21
|
|
data/lib/ms/ident/search.rb
CHANGED
@@ -2,9 +2,15 @@
|
|
2
2
|
module Ms
|
3
3
|
module Ident
|
4
4
|
|
5
|
-
|
6
|
-
attr_accessor :
|
7
|
-
attr_accessor :
|
5
|
+
class Search
|
6
|
+
attr_accessor :id
|
7
|
+
attr_accessor :peptide_hits
|
8
|
+
alias_method :hits, :peptide_hits
|
9
|
+
|
10
|
+
def initialize(id=nil, peptide_hits=[])
|
11
|
+
@id = id
|
12
|
+
@peptide_hits = peptide_hits
|
13
|
+
end
|
8
14
|
|
9
15
|
# returns an array of peptide_hits and protein_hits that are linked to
|
10
16
|
# one another. NOTE: this will update peptide and protein
|
@@ -40,7 +46,6 @@ module Ms
|
|
40
46
|
|
41
47
|
|
42
48
|
module SearchGroup
|
43
|
-
include Search
|
44
49
|
|
45
50
|
# an array of search objects
|
46
51
|
attr_accessor :searches
|
metadata
CHANGED
@@ -1,12 +1,8 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ms-ident
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
prerelease:
|
5
|
-
|
6
|
-
- 0
|
7
|
-
- 0
|
8
|
-
- 23
|
9
|
-
version: 0.0.23
|
4
|
+
prerelease:
|
5
|
+
version: 0.1.1
|
10
6
|
platform: ruby
|
11
7
|
authors:
|
12
8
|
- John T. Prince
|
@@ -14,7 +10,7 @@ autorequire:
|
|
14
10
|
bindir: bin
|
15
11
|
cert_chain: []
|
16
12
|
|
17
|
-
date: 2011-
|
13
|
+
date: 2011-04-26 00:00:00 -06:00
|
18
14
|
default_executable:
|
19
15
|
dependencies:
|
20
16
|
- !ruby/object:Gem::Dependency
|
@@ -25,8 +21,6 @@ dependencies:
|
|
25
21
|
requirements:
|
26
22
|
- - ">="
|
27
23
|
- !ruby/object:Gem::Version
|
28
|
-
segments:
|
29
|
-
- 0
|
30
24
|
version: "0"
|
31
25
|
type: :runtime
|
32
26
|
version_requirements: *id001
|
@@ -38,10 +32,6 @@ dependencies:
|
|
38
32
|
requirements:
|
39
33
|
- - ">="
|
40
34
|
- !ruby/object:Gem::Version
|
41
|
-
segments:
|
42
|
-
- 0
|
43
|
-
- 0
|
44
|
-
- 12
|
45
35
|
version: 0.0.12
|
46
36
|
type: :runtime
|
47
37
|
version_requirements: *id002
|
@@ -53,8 +43,6 @@ dependencies:
|
|
53
43
|
requirements:
|
54
44
|
- - ">="
|
55
45
|
- !ruby/object:Gem::Version
|
56
|
-
segments:
|
57
|
-
- 0
|
58
46
|
version: "0"
|
59
47
|
type: :runtime
|
60
48
|
version_requirements: *id003
|
@@ -66,8 +54,6 @@ dependencies:
|
|
66
54
|
requirements:
|
67
55
|
- - ">="
|
68
56
|
- !ruby/object:Gem::Version
|
69
|
-
segments:
|
70
|
-
- 0
|
71
57
|
version: "0"
|
72
58
|
type: :runtime
|
73
59
|
version_requirements: *id004
|
@@ -79,8 +65,6 @@ dependencies:
|
|
79
65
|
requirements:
|
80
66
|
- - ">="
|
81
67
|
- !ruby/object:Gem::Version
|
82
|
-
segments:
|
83
|
-
- 0
|
84
68
|
version: "0"
|
85
69
|
type: :development
|
86
70
|
version_requirements: *id005
|
@@ -92,8 +76,6 @@ dependencies:
|
|
92
76
|
requirements:
|
93
77
|
- - ">="
|
94
78
|
- !ruby/object:Gem::Version
|
95
|
-
segments:
|
96
|
-
- 0
|
97
79
|
version: "0"
|
98
80
|
type: :development
|
99
81
|
version_requirements: *id006
|
@@ -160,21 +142,17 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
160
142
|
requirements:
|
161
143
|
- - ">="
|
162
144
|
- !ruby/object:Gem::Version
|
163
|
-
segments:
|
164
|
-
- 0
|
165
145
|
version: "0"
|
166
146
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
167
147
|
none: false
|
168
148
|
requirements:
|
169
149
|
- - ">="
|
170
150
|
- !ruby/object:Gem::Version
|
171
|
-
segments:
|
172
|
-
- 0
|
173
151
|
version: "0"
|
174
152
|
requirements: []
|
175
153
|
|
176
154
|
rubyforge_project: mspire
|
177
|
-
rubygems_version: 1.
|
155
|
+
rubygems_version: 1.6.2
|
178
156
|
signing_key:
|
179
157
|
specification_version: 3
|
180
158
|
summary: mspire library for working with mzIdentML and pepxml
|