ms-ident 0.0.23 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/lib/ms/ident/peptide/db.rb +1 -0
- data/lib/ms/ident/peptide_hit.rb +5 -0
- data/lib/ms/ident/peptide_hit/qvalue.rb +7 -6
- data/lib/ms/ident/pepxml.rb +3 -3
- data/lib/ms/ident/pepxml/search_hit.rb +1 -1
- data/lib/ms/ident/protein.rb +18 -5
- data/lib/ms/ident/protein_group.rb +9 -1
- data/lib/ms/ident/protein_hit.rb +11 -7
- data/lib/ms/ident/search.rb +9 -4
- metadata +4 -26
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.1.1
|
data/lib/ms/ident/peptide/db.rb
CHANGED
data/lib/ms/ident/peptide_hit.rb
CHANGED
@@ -2,7 +2,12 @@ module Ms ; end
|
|
2
2
|
module Ms::Ident ; end
|
3
3
|
|
4
4
|
class Ms::Ident::PeptideHit
|
5
|
+
attr_accessor :id
|
6
|
+
attr_accessor :search
|
7
|
+
attr_accessor :missed_cleavages
|
5
8
|
attr_accessor :aaseq
|
6
9
|
attr_accessor :charge
|
7
10
|
attr_accessor :proteins
|
11
|
+
attr_accessor :qvalue
|
8
12
|
end
|
13
|
+
|
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'ms/ident/search'
|
1
2
|
require 'ms/ident/peptide_hit'
|
2
3
|
|
3
4
|
module Ms ; end
|
@@ -5,10 +6,9 @@ module Ms::Ident ; end
|
|
5
6
|
|
6
7
|
class Ms::Ident::PeptideHit
|
7
8
|
module Qvalue
|
8
|
-
attr_accessor :qvalue
|
9
9
|
FILE_EXTENSION = '.phq.tsv'
|
10
10
|
FILE_DELIMITER = "\t"
|
11
|
-
HEADER = %w(aaseq charge qvalue)
|
11
|
+
HEADER = %w(run_id id aaseq charge qvalue)
|
12
12
|
|
13
13
|
class << self
|
14
14
|
|
@@ -24,7 +24,7 @@ class Ms::Ident::PeptideHit
|
|
24
24
|
File.open(filename,'w') do |out|
|
25
25
|
out.puts HEADER.join(FILE_DELIMITER)
|
26
26
|
hits.zip(qvalues) do |hit, qvalue|
|
27
|
-
out.puts [hit.aaseq, hit.charge, qvalue || hit.qvalue].join(FILE_DELIMITER)
|
27
|
+
out.puts [hit.search.id, hit.id, hit.aaseq, hit.charge, qvalue || hit.qvalue].join(FILE_DELIMITER)
|
28
28
|
end
|
29
29
|
end
|
30
30
|
filename
|
@@ -32,15 +32,17 @@ class Ms::Ident::PeptideHit
|
|
32
32
|
|
33
33
|
# returns an array of PeptideHit objects from a phq.tsv
|
34
34
|
def from_file(filename)
|
35
|
+
searches = Hash.new {|h,id| h[id] = Ms::Ident::Search.new(id) }
|
35
36
|
peptide_hits = []
|
36
37
|
File.open(filename) do |io|
|
37
38
|
header = io.readline.chomp.split(FILE_DELIMITER)
|
38
39
|
raise "bad headers" unless header == HEADER
|
39
40
|
io.each do |line|
|
40
41
|
line.chomp!
|
41
|
-
(aaseq, charge, qvalue) = line.split(FILE_DELIMITER)
|
42
|
+
(run_id, id, aaseq, charge, qvalue) = line.split(FILE_DELIMITER)
|
42
43
|
ph = Ms::Ident::PeptideHit.new
|
43
|
-
ph.
|
44
|
+
ph.search = searches[run_id]
|
45
|
+
ph.id = id; ph.aaseq = aaseq ; ph.charge = charge.to_i ; ph.qvalue = qvalue.to_f
|
44
46
|
peptide_hits << ph
|
45
47
|
end
|
46
48
|
end
|
@@ -51,5 +53,4 @@ class Ms::Ident::PeptideHit
|
|
51
53
|
|
52
54
|
end
|
53
55
|
end # Qvalue
|
54
|
-
include Qvalue
|
55
56
|
end # Peptide Hit
|
data/lib/ms/ident/pepxml.rb
CHANGED
@@ -27,12 +27,12 @@ class Ms::Ident::Pepxml
|
|
27
27
|
# returns an array of Ms::Ident::Pepxml::SearchHit::Simple structs
|
28
28
|
def self.simple_search_hits(file)
|
29
29
|
hit_values = File.open(file) do |io|
|
30
|
-
doc = Nokogiri::XML.parse(io, nil, nil, Nokogiri::XML::ParseOptions::DEFAULT_XML | Nokogiri::XML::ParseOptions::NOBLANKS)
|
30
|
+
doc = Nokogiri::XML.parse(io, nil, nil, Nokogiri::XML::ParseOptions::DEFAULT_XML | Nokogiri::XML::ParseOptions::NOBLANKS | Nokogiri::XML::ParseOptions::STRICT)
|
31
31
|
# we can work with namespaces, or just remove them ...
|
32
32
|
doc.remove_namespaces!
|
33
33
|
root = doc.root
|
34
34
|
search_hits = root.xpath('//search_hit')
|
35
|
-
search_hits.map do |search_hit|
|
35
|
+
search_hits.each_with_index.map do |search_hit,i|
|
36
36
|
aaseq = search_hit['peptide']
|
37
37
|
charge = search_hit.parent.parent['assumed_charge'].to_i
|
38
38
|
search_score_nodes = search_hit.children.select {|node| node.name == 'search_score' }
|
@@ -40,7 +40,7 @@ class Ms::Ident::Pepxml
|
|
40
40
|
search_score_nodes.each do |node|
|
41
41
|
search_scores[node['name'].to_sym] = node['value'].to_f
|
42
42
|
end
|
43
|
-
Ms::Ident::Pepxml::SearchHit::Simple.new(aaseq, charge, search_scores)
|
43
|
+
Ms::Ident::Pepxml::SearchHit::Simple.new("hit_#{i}", Ms::Ident::Search.new(file.chomp(File.extname(file))), aaseq, charge, search_scores)
|
44
44
|
end
|
45
45
|
end
|
46
46
|
end
|
data/lib/ms/ident/protein.rb
CHANGED
@@ -2,11 +2,24 @@ module Ms ; end
|
|
2
2
|
module Ms::Ident ; end
|
3
3
|
|
4
4
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
5
|
+
class Ms::Ident::Protein
|
6
|
+
|
7
|
+
attr_accessor :id
|
8
|
+
attr_accessor :seq
|
9
|
+
alias_method :sequence, :seq
|
10
|
+
alias_method :sequence=, :seq=
|
11
|
+
|
12
|
+
attr_accessor :description
|
13
|
+
|
14
|
+
def initialize(id=nil, seq=nil)
|
15
|
+
(@id, @seq) = id, seq
|
10
16
|
end
|
17
|
+
|
18
|
+
# DEPRECATING THIS GUY
|
19
|
+
## gives the information up until the first space or carriage return.
|
20
|
+
## Assumes the protein can respond_to? :reference
|
21
|
+
#def first_entry
|
22
|
+
# reference.split(/[\s\r]/)[0]
|
23
|
+
#end
|
11
24
|
end
|
12
25
|
|
@@ -28,7 +28,12 @@ module Ms
|
|
28
28
|
# accuracy.
|
29
29
|
#
|
30
30
|
# returns an array of ProteinGroup objects, each set with :peptide_hits
|
31
|
-
|
31
|
+
#
|
32
|
+
# If update_peptide_hits is true, then each peptide_hit is linked to the array
|
33
|
+
# of protein_groups it is associated with using :protein_groups. A
|
34
|
+
# symbol can also be passed in, and that method will be called instead.
|
35
|
+
def self.peptide_hits_to_protein_groups(peptide_hits, update_peptide_hits=false, &sort_by)
|
36
|
+
update_peptide_hits = 'protein_groups='.to_sym if (update_peptide_hits==true)
|
32
37
|
sort_by ||= PRIORITIZE_PROTEINS
|
33
38
|
# note to self: I wrote this in 2011, so I think I know what I'm doing now
|
34
39
|
protein_to_peptides = Hash.new {|h,k| h[k] = Set.new }
|
@@ -64,6 +69,9 @@ module Ms
|
|
64
69
|
group.peptide_hits = peptide_set if has_an_unaccounted_peptide
|
65
70
|
has_an_unaccounted_peptide
|
66
71
|
end
|
72
|
+
if update_peptide_hits
|
73
|
+
greedy_first.each {|pg, pephits| pephits.each {|hit| hit.send(update_peptide_hits, pg) } }
|
74
|
+
end
|
67
75
|
greedy_first.map(&:first)
|
68
76
|
end
|
69
77
|
|
data/lib/ms/ident/protein_hit.rb
CHANGED
@@ -1,17 +1,21 @@
|
|
1
|
+
require 'andand'
|
2
|
+
|
1
3
|
|
2
4
|
module Ms ; end
|
3
5
|
module Ms::Ident ; end
|
4
6
|
|
5
|
-
class Ms::Ident::ProteinHit
|
6
|
-
attr_accessor :id
|
7
|
-
attr_accessor :seq
|
8
|
-
alias_method :sequence, :seq
|
9
|
-
alias_method :sequence=, :seq=
|
7
|
+
class Ms::Ident::ProteinHit < Ms::Ident::Protein
|
10
8
|
attr_accessor :peptide_hits
|
11
9
|
|
12
|
-
def initialize(id=nil)
|
13
|
-
@peptide_hits =
|
10
|
+
def initialize(id=nil, peptide_hits=[])
|
11
|
+
@peptide_hits = peptide_hits
|
14
12
|
@id = id
|
15
13
|
end
|
14
|
+
|
15
|
+
# if the GN=([^\s]+) regexp is found in the description, returns the first
|
16
|
+
# match, or nil if not found
|
17
|
+
def gene_id
|
18
|
+
description.andand.match(/ GN=(\w+) ?/)[1]
|
19
|
+
end
|
16
20
|
end
|
17
21
|
|
data/lib/ms/ident/search.rb
CHANGED
@@ -2,9 +2,15 @@
|
|
2
2
|
module Ms
|
3
3
|
module Ident
|
4
4
|
|
5
|
-
|
6
|
-
attr_accessor :
|
7
|
-
attr_accessor :
|
5
|
+
class Search
|
6
|
+
attr_accessor :id
|
7
|
+
attr_accessor :peptide_hits
|
8
|
+
alias_method :hits, :peptide_hits
|
9
|
+
|
10
|
+
def initialize(id=nil, peptide_hits=[])
|
11
|
+
@id = id
|
12
|
+
@peptide_hits = peptide_hits
|
13
|
+
end
|
8
14
|
|
9
15
|
# returns an array of peptide_hits and protein_hits that are linked to
|
10
16
|
# one another. NOTE: this will update peptide and protein
|
@@ -40,7 +46,6 @@ module Ms
|
|
40
46
|
|
41
47
|
|
42
48
|
module SearchGroup
|
43
|
-
include Search
|
44
49
|
|
45
50
|
# an array of search objects
|
46
51
|
attr_accessor :searches
|
metadata
CHANGED
@@ -1,12 +1,8 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ms-ident
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
prerelease:
|
5
|
-
|
6
|
-
- 0
|
7
|
-
- 0
|
8
|
-
- 23
|
9
|
-
version: 0.0.23
|
4
|
+
prerelease:
|
5
|
+
version: 0.1.1
|
10
6
|
platform: ruby
|
11
7
|
authors:
|
12
8
|
- John T. Prince
|
@@ -14,7 +10,7 @@ autorequire:
|
|
14
10
|
bindir: bin
|
15
11
|
cert_chain: []
|
16
12
|
|
17
|
-
date: 2011-
|
13
|
+
date: 2011-04-26 00:00:00 -06:00
|
18
14
|
default_executable:
|
19
15
|
dependencies:
|
20
16
|
- !ruby/object:Gem::Dependency
|
@@ -25,8 +21,6 @@ dependencies:
|
|
25
21
|
requirements:
|
26
22
|
- - ">="
|
27
23
|
- !ruby/object:Gem::Version
|
28
|
-
segments:
|
29
|
-
- 0
|
30
24
|
version: "0"
|
31
25
|
type: :runtime
|
32
26
|
version_requirements: *id001
|
@@ -38,10 +32,6 @@ dependencies:
|
|
38
32
|
requirements:
|
39
33
|
- - ">="
|
40
34
|
- !ruby/object:Gem::Version
|
41
|
-
segments:
|
42
|
-
- 0
|
43
|
-
- 0
|
44
|
-
- 12
|
45
35
|
version: 0.0.12
|
46
36
|
type: :runtime
|
47
37
|
version_requirements: *id002
|
@@ -53,8 +43,6 @@ dependencies:
|
|
53
43
|
requirements:
|
54
44
|
- - ">="
|
55
45
|
- !ruby/object:Gem::Version
|
56
|
-
segments:
|
57
|
-
- 0
|
58
46
|
version: "0"
|
59
47
|
type: :runtime
|
60
48
|
version_requirements: *id003
|
@@ -66,8 +54,6 @@ dependencies:
|
|
66
54
|
requirements:
|
67
55
|
- - ">="
|
68
56
|
- !ruby/object:Gem::Version
|
69
|
-
segments:
|
70
|
-
- 0
|
71
57
|
version: "0"
|
72
58
|
type: :runtime
|
73
59
|
version_requirements: *id004
|
@@ -79,8 +65,6 @@ dependencies:
|
|
79
65
|
requirements:
|
80
66
|
- - ">="
|
81
67
|
- !ruby/object:Gem::Version
|
82
|
-
segments:
|
83
|
-
- 0
|
84
68
|
version: "0"
|
85
69
|
type: :development
|
86
70
|
version_requirements: *id005
|
@@ -92,8 +76,6 @@ dependencies:
|
|
92
76
|
requirements:
|
93
77
|
- - ">="
|
94
78
|
- !ruby/object:Gem::Version
|
95
|
-
segments:
|
96
|
-
- 0
|
97
79
|
version: "0"
|
98
80
|
type: :development
|
99
81
|
version_requirements: *id006
|
@@ -160,21 +142,17 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
160
142
|
requirements:
|
161
143
|
- - ">="
|
162
144
|
- !ruby/object:Gem::Version
|
163
|
-
segments:
|
164
|
-
- 0
|
165
145
|
version: "0"
|
166
146
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
167
147
|
none: false
|
168
148
|
requirements:
|
169
149
|
- - ">="
|
170
150
|
- !ruby/object:Gem::Version
|
171
|
-
segments:
|
172
|
-
- 0
|
173
151
|
version: "0"
|
174
152
|
requirements: []
|
175
153
|
|
176
154
|
rubyforge_project: mspire
|
177
|
-
rubygems_version: 1.
|
155
|
+
rubygems_version: 1.6.2
|
178
156
|
signing_key:
|
179
157
|
specification_version: 3
|
180
158
|
summary: mspire library for working with mzIdentML and pepxml
|