ms-ident 0.0.23 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.23
1
+ 0.1.1
@@ -1,5 +1,6 @@
1
1
  require 'ms/in_silico/digester'
2
2
  require 'ms/fasta'
3
+ require 'optparse'
3
4
 
4
5
  module Ms ; end
5
6
  module Ms::Ident ; end
@@ -2,7 +2,12 @@ module Ms ; end
2
2
  module Ms::Ident ; end
3
3
 
4
4
  class Ms::Ident::PeptideHit
5
+ attr_accessor :id
6
+ attr_accessor :search
7
+ attr_accessor :missed_cleavages
5
8
  attr_accessor :aaseq
6
9
  attr_accessor :charge
7
10
  attr_accessor :proteins
11
+ attr_accessor :qvalue
8
12
  end
13
+
@@ -1,3 +1,4 @@
1
+ require 'ms/ident/search'
1
2
  require 'ms/ident/peptide_hit'
2
3
 
3
4
  module Ms ; end
@@ -5,10 +6,9 @@ module Ms::Ident ; end
5
6
 
6
7
  class Ms::Ident::PeptideHit
7
8
  module Qvalue
8
- attr_accessor :qvalue
9
9
  FILE_EXTENSION = '.phq.tsv'
10
10
  FILE_DELIMITER = "\t"
11
- HEADER = %w(aaseq charge qvalue)
11
+ HEADER = %w(run_id id aaseq charge qvalue)
12
12
 
13
13
  class << self
14
14
 
@@ -24,7 +24,7 @@ class Ms::Ident::PeptideHit
24
24
  File.open(filename,'w') do |out|
25
25
  out.puts HEADER.join(FILE_DELIMITER)
26
26
  hits.zip(qvalues) do |hit, qvalue|
27
- out.puts [hit.aaseq, hit.charge, qvalue || hit.qvalue].join(FILE_DELIMITER)
27
+ out.puts [hit.search.id, hit.id, hit.aaseq, hit.charge, qvalue || hit.qvalue].join(FILE_DELIMITER)
28
28
  end
29
29
  end
30
30
  filename
@@ -32,15 +32,17 @@ class Ms::Ident::PeptideHit
32
32
 
33
33
  # returns an array of PeptideHit objects from a phq.tsv
34
34
  def from_file(filename)
35
+ searches = Hash.new {|h,id| h[id] = Ms::Ident::Search.new(id) }
35
36
  peptide_hits = []
36
37
  File.open(filename) do |io|
37
38
  header = io.readline.chomp.split(FILE_DELIMITER)
38
39
  raise "bad headers" unless header == HEADER
39
40
  io.each do |line|
40
41
  line.chomp!
41
- (aaseq, charge, qvalue) = line.split(FILE_DELIMITER)
42
+ (run_id, id, aaseq, charge, qvalue) = line.split(FILE_DELIMITER)
42
43
  ph = Ms::Ident::PeptideHit.new
43
- ph.aaseq = aaseq ; ph.charge = charge.to_i ; ph.qvalue = qvalue.to_f
44
+ ph.search = searches[run_id]
45
+ ph.id = id; ph.aaseq = aaseq ; ph.charge = charge.to_i ; ph.qvalue = qvalue.to_f
44
46
  peptide_hits << ph
45
47
  end
46
48
  end
@@ -51,5 +53,4 @@ class Ms::Ident::PeptideHit
51
53
 
52
54
  end
53
55
  end # Qvalue
54
- include Qvalue
55
56
  end # Peptide Hit
@@ -27,12 +27,12 @@ class Ms::Ident::Pepxml
27
27
  # returns an array of Ms::Ident::Pepxml::SearchHit::Simple structs
28
28
  def self.simple_search_hits(file)
29
29
  hit_values = File.open(file) do |io|
30
- doc = Nokogiri::XML.parse(io, nil, nil, Nokogiri::XML::ParseOptions::DEFAULT_XML | Nokogiri::XML::ParseOptions::NOBLANKS)
30
+ doc = Nokogiri::XML.parse(io, nil, nil, Nokogiri::XML::ParseOptions::DEFAULT_XML | Nokogiri::XML::ParseOptions::NOBLANKS | Nokogiri::XML::ParseOptions::STRICT)
31
31
  # we can work with namespaces, or just remove them ...
32
32
  doc.remove_namespaces!
33
33
  root = doc.root
34
34
  search_hits = root.xpath('//search_hit')
35
- search_hits.map do |search_hit|
35
+ search_hits.each_with_index.map do |search_hit,i|
36
36
  aaseq = search_hit['peptide']
37
37
  charge = search_hit.parent.parent['assumed_charge'].to_i
38
38
  search_score_nodes = search_hit.children.select {|node| node.name == 'search_score' }
@@ -40,7 +40,7 @@ class Ms::Ident::Pepxml
40
40
  search_score_nodes.each do |node|
41
41
  search_scores[node['name'].to_sym] = node['value'].to_f
42
42
  end
43
- Ms::Ident::Pepxml::SearchHit::Simple.new(aaseq, charge, search_scores)
43
+ Ms::Ident::Pepxml::SearchHit::Simple.new("hit_#{i}", Ms::Ident::Search.new(file.chomp(File.extname(file))), aaseq, charge, search_scores)
44
44
  end
45
45
  end
46
46
  end
@@ -137,7 +137,7 @@ class Ms::Ident::Pepxml
137
137
  self
138
138
  end
139
139
 
140
- Simple = Struct.new(:aaseq, :charge, :search_scores)
140
+ Simple = Struct.new(:id, :search, :aaseq, :charge, :search_scores)
141
141
  end
142
142
 
143
143
  end
@@ -2,11 +2,24 @@ module Ms ; end
2
2
  module Ms::Ident ; end
3
3
 
4
4
 
5
- module Ms::Ident::Protein
6
- # gives the information up until the first space or carriage return.
7
- # Assumes the protein can respond_to? :reference
8
- def first_entry
9
- reference.split(/[\s\r]/)[0]
5
+ class Ms::Ident::Protein
6
+
7
+ attr_accessor :id
8
+ attr_accessor :seq
9
+ alias_method :sequence, :seq
10
+ alias_method :sequence=, :seq=
11
+
12
+ attr_accessor :description
13
+
14
+ def initialize(id=nil, seq=nil)
15
+ (@id, @seq) = id, seq
10
16
  end
17
+
18
+ # DEPRECATING THIS GUY
19
+ ## gives the information up until the first space or carriage return.
20
+ ## Assumes the protein can respond_to? :reference
21
+ #def first_entry
22
+ # reference.split(/[\s\r]/)[0]
23
+ #end
11
24
  end
12
25
 
@@ -28,7 +28,12 @@ module Ms
28
28
  # accuracy.
29
29
  #
30
30
  # returns an array of ProteinGroup objects, each set with :peptide_hits
31
- def self.peptide_hits_to_protein_groups(peptide_hits, &sort_by)
31
+ #
32
+ # If update_peptide_hits is true, then each peptide_hit is linked to the array
33
+ # of protein_groups it is associated with using :protein_groups. A
34
+ # symbol can also be passed in, and that method will be called instead.
35
+ def self.peptide_hits_to_protein_groups(peptide_hits, update_peptide_hits=false, &sort_by)
36
+ update_peptide_hits = 'protein_groups='.to_sym if (update_peptide_hits==true)
32
37
  sort_by ||= PRIORITIZE_PROTEINS
33
38
  # note to self: I wrote this in 2011, so I think I know what I'm doing now
34
39
  protein_to_peptides = Hash.new {|h,k| h[k] = Set.new }
@@ -64,6 +69,9 @@ module Ms
64
69
  group.peptide_hits = peptide_set if has_an_unaccounted_peptide
65
70
  has_an_unaccounted_peptide
66
71
  end
72
+ if update_peptide_hits
73
+ greedy_first.each {|pg, pephits| pephits.each {|hit| hit.send(update_peptide_hits, pg) } }
74
+ end
67
75
  greedy_first.map(&:first)
68
76
  end
69
77
 
@@ -1,17 +1,21 @@
1
+ require 'andand'
2
+
1
3
 
2
4
  module Ms ; end
3
5
  module Ms::Ident ; end
4
6
 
5
- class Ms::Ident::ProteinHit
6
- attr_accessor :id
7
- attr_accessor :seq
8
- alias_method :sequence, :seq
9
- alias_method :sequence=, :seq=
7
+ class Ms::Ident::ProteinHit < Ms::Ident::Protein
10
8
  attr_accessor :peptide_hits
11
9
 
12
- def initialize(id=nil)
13
- @peptide_hits = []
10
+ def initialize(id=nil, peptide_hits=[])
11
+ @peptide_hits = peptide_hits
14
12
  @id = id
15
13
  end
14
+
15
+ # if the GN=([^\s]+) regexp is found in the description, returns the first
16
+ # match, or nil if not found
17
+ def gene_id
18
+ description.andand.match(/ GN=(\w+) ?/)[1]
19
+ end
16
20
  end
17
21
 
@@ -2,9 +2,15 @@
2
2
  module Ms
3
3
  module Ident
4
4
 
5
- module Search
6
- attr_accessor :proteins
7
- attr_accessor :peptides
5
+ class Search
6
+ attr_accessor :id
7
+ attr_accessor :peptide_hits
8
+ alias_method :hits, :peptide_hits
9
+
10
+ def initialize(id=nil, peptide_hits=[])
11
+ @id = id
12
+ @peptide_hits = peptide_hits
13
+ end
8
14
 
9
15
  # returns an array of peptide_hits and protein_hits that are linked to
10
16
  # one another. NOTE: this will update peptide and protein
@@ -40,7 +46,6 @@ module Ms
40
46
 
41
47
 
42
48
  module SearchGroup
43
- include Search
44
49
 
45
50
  # an array of search objects
46
51
  attr_accessor :searches
metadata CHANGED
@@ -1,12 +1,8 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ms-ident
3
3
  version: !ruby/object:Gem::Version
4
- prerelease: false
5
- segments:
6
- - 0
7
- - 0
8
- - 23
9
- version: 0.0.23
4
+ prerelease:
5
+ version: 0.1.1
10
6
  platform: ruby
11
7
  authors:
12
8
  - John T. Prince
@@ -14,7 +10,7 @@ autorequire:
14
10
  bindir: bin
15
11
  cert_chain: []
16
12
 
17
- date: 2011-03-31 00:00:00 -06:00
13
+ date: 2011-04-26 00:00:00 -06:00
18
14
  default_executable:
19
15
  dependencies:
20
16
  - !ruby/object:Gem::Dependency
@@ -25,8 +21,6 @@ dependencies:
25
21
  requirements:
26
22
  - - ">="
27
23
  - !ruby/object:Gem::Version
28
- segments:
29
- - 0
30
24
  version: "0"
31
25
  type: :runtime
32
26
  version_requirements: *id001
@@ -38,10 +32,6 @@ dependencies:
38
32
  requirements:
39
33
  - - ">="
40
34
  - !ruby/object:Gem::Version
41
- segments:
42
- - 0
43
- - 0
44
- - 12
45
35
  version: 0.0.12
46
36
  type: :runtime
47
37
  version_requirements: *id002
@@ -53,8 +43,6 @@ dependencies:
53
43
  requirements:
54
44
  - - ">="
55
45
  - !ruby/object:Gem::Version
56
- segments:
57
- - 0
58
46
  version: "0"
59
47
  type: :runtime
60
48
  version_requirements: *id003
@@ -66,8 +54,6 @@ dependencies:
66
54
  requirements:
67
55
  - - ">="
68
56
  - !ruby/object:Gem::Version
69
- segments:
70
- - 0
71
57
  version: "0"
72
58
  type: :runtime
73
59
  version_requirements: *id004
@@ -79,8 +65,6 @@ dependencies:
79
65
  requirements:
80
66
  - - ">="
81
67
  - !ruby/object:Gem::Version
82
- segments:
83
- - 0
84
68
  version: "0"
85
69
  type: :development
86
70
  version_requirements: *id005
@@ -92,8 +76,6 @@ dependencies:
92
76
  requirements:
93
77
  - - ">="
94
78
  - !ruby/object:Gem::Version
95
- segments:
96
- - 0
97
79
  version: "0"
98
80
  type: :development
99
81
  version_requirements: *id006
@@ -160,21 +142,17 @@ required_ruby_version: !ruby/object:Gem::Requirement
160
142
  requirements:
161
143
  - - ">="
162
144
  - !ruby/object:Gem::Version
163
- segments:
164
- - 0
165
145
  version: "0"
166
146
  required_rubygems_version: !ruby/object:Gem::Requirement
167
147
  none: false
168
148
  requirements:
169
149
  - - ">="
170
150
  - !ruby/object:Gem::Version
171
- segments:
172
- - 0
173
151
  version: "0"
174
152
  requirements: []
175
153
 
176
154
  rubyforge_project: mspire
177
- rubygems_version: 1.3.7
155
+ rubygems_version: 1.6.2
178
156
  signing_key:
179
157
  specification_version: 3
180
158
  summary: mspire library for working with mzIdentML and pepxml