ms-ident 0.0.23 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.23
1
+ 0.1.1
@@ -1,5 +1,6 @@
1
1
  require 'ms/in_silico/digester'
2
2
  require 'ms/fasta'
3
+ require 'optparse'
3
4
 
4
5
  module Ms ; end
5
6
  module Ms::Ident ; end
@@ -2,7 +2,12 @@ module Ms ; end
2
2
  module Ms::Ident ; end
3
3
 
4
4
  class Ms::Ident::PeptideHit
5
+ attr_accessor :id
6
+ attr_accessor :search
7
+ attr_accessor :missed_cleavages
5
8
  attr_accessor :aaseq
6
9
  attr_accessor :charge
7
10
  attr_accessor :proteins
11
+ attr_accessor :qvalue
8
12
  end
13
+
@@ -1,3 +1,4 @@
1
+ require 'ms/ident/search'
1
2
  require 'ms/ident/peptide_hit'
2
3
 
3
4
  module Ms ; end
@@ -5,10 +6,9 @@ module Ms::Ident ; end
5
6
 
6
7
  class Ms::Ident::PeptideHit
7
8
  module Qvalue
8
- attr_accessor :qvalue
9
9
  FILE_EXTENSION = '.phq.tsv'
10
10
  FILE_DELIMITER = "\t"
11
- HEADER = %w(aaseq charge qvalue)
11
+ HEADER = %w(run_id id aaseq charge qvalue)
12
12
 
13
13
  class << self
14
14
 
@@ -24,7 +24,7 @@ class Ms::Ident::PeptideHit
24
24
  File.open(filename,'w') do |out|
25
25
  out.puts HEADER.join(FILE_DELIMITER)
26
26
  hits.zip(qvalues) do |hit, qvalue|
27
- out.puts [hit.aaseq, hit.charge, qvalue || hit.qvalue].join(FILE_DELIMITER)
27
+ out.puts [hit.search.id, hit.id, hit.aaseq, hit.charge, qvalue || hit.qvalue].join(FILE_DELIMITER)
28
28
  end
29
29
  end
30
30
  filename
@@ -32,15 +32,17 @@ class Ms::Ident::PeptideHit
32
32
 
33
33
  # returns an array of PeptideHit objects from a phq.tsv
34
34
  def from_file(filename)
35
+ searches = Hash.new {|h,id| h[id] = Ms::Ident::Search.new(id) }
35
36
  peptide_hits = []
36
37
  File.open(filename) do |io|
37
38
  header = io.readline.chomp.split(FILE_DELIMITER)
38
39
  raise "bad headers" unless header == HEADER
39
40
  io.each do |line|
40
41
  line.chomp!
41
- (aaseq, charge, qvalue) = line.split(FILE_DELIMITER)
42
+ (run_id, id, aaseq, charge, qvalue) = line.split(FILE_DELIMITER)
42
43
  ph = Ms::Ident::PeptideHit.new
43
- ph.aaseq = aaseq ; ph.charge = charge.to_i ; ph.qvalue = qvalue.to_f
44
+ ph.search = searches[run_id]
45
+ ph.id = id; ph.aaseq = aaseq ; ph.charge = charge.to_i ; ph.qvalue = qvalue.to_f
44
46
  peptide_hits << ph
45
47
  end
46
48
  end
@@ -51,5 +53,4 @@ class Ms::Ident::PeptideHit
51
53
 
52
54
  end
53
55
  end # Qvalue
54
- include Qvalue
55
56
  end # Peptide Hit
@@ -27,12 +27,12 @@ class Ms::Ident::Pepxml
27
27
  # returns an array of Ms::Ident::Pepxml::SearchHit::Simple structs
28
28
  def self.simple_search_hits(file)
29
29
  hit_values = File.open(file) do |io|
30
- doc = Nokogiri::XML.parse(io, nil, nil, Nokogiri::XML::ParseOptions::DEFAULT_XML | Nokogiri::XML::ParseOptions::NOBLANKS)
30
+ doc = Nokogiri::XML.parse(io, nil, nil, Nokogiri::XML::ParseOptions::DEFAULT_XML | Nokogiri::XML::ParseOptions::NOBLANKS | Nokogiri::XML::ParseOptions::STRICT)
31
31
  # we can work with namespaces, or just remove them ...
32
32
  doc.remove_namespaces!
33
33
  root = doc.root
34
34
  search_hits = root.xpath('//search_hit')
35
- search_hits.map do |search_hit|
35
+ search_hits.each_with_index.map do |search_hit,i|
36
36
  aaseq = search_hit['peptide']
37
37
  charge = search_hit.parent.parent['assumed_charge'].to_i
38
38
  search_score_nodes = search_hit.children.select {|node| node.name == 'search_score' }
@@ -40,7 +40,7 @@ class Ms::Ident::Pepxml
40
40
  search_score_nodes.each do |node|
41
41
  search_scores[node['name'].to_sym] = node['value'].to_f
42
42
  end
43
- Ms::Ident::Pepxml::SearchHit::Simple.new(aaseq, charge, search_scores)
43
+ Ms::Ident::Pepxml::SearchHit::Simple.new("hit_#{i}", Ms::Ident::Search.new(file.chomp(File.extname(file))), aaseq, charge, search_scores)
44
44
  end
45
45
  end
46
46
  end
@@ -137,7 +137,7 @@ class Ms::Ident::Pepxml
137
137
  self
138
138
  end
139
139
 
140
- Simple = Struct.new(:aaseq, :charge, :search_scores)
140
+ Simple = Struct.new(:id, :search, :aaseq, :charge, :search_scores)
141
141
  end
142
142
 
143
143
  end
@@ -2,11 +2,24 @@ module Ms ; end
2
2
  module Ms::Ident ; end
3
3
 
4
4
 
5
- module Ms::Ident::Protein
6
- # gives the information up until the first space or carriage return.
7
- # Assumes the protein can respond_to? :reference
8
- def first_entry
9
- reference.split(/[\s\r]/)[0]
5
+ class Ms::Ident::Protein
6
+
7
+ attr_accessor :id
8
+ attr_accessor :seq
9
+ alias_method :sequence, :seq
10
+ alias_method :sequence=, :seq=
11
+
12
+ attr_accessor :description
13
+
14
+ def initialize(id=nil, seq=nil)
15
+ (@id, @seq) = id, seq
10
16
  end
17
+
18
+ # DEPRECATING THIS GUY
19
+ ## gives the information up until the first space or carriage return.
20
+ ## Assumes the protein can respond_to? :reference
21
+ #def first_entry
22
+ # reference.split(/[\s\r]/)[0]
23
+ #end
11
24
  end
12
25
 
@@ -28,7 +28,12 @@ module Ms
28
28
  # accuracy.
29
29
  #
30
30
  # returns an array of ProteinGroup objects, each set with :peptide_hits
31
- def self.peptide_hits_to_protein_groups(peptide_hits, &sort_by)
31
+ #
32
+ # If update_peptide_hits is true, then each peptide_hit is linked to the array
33
+ # of protein_groups it is associated with using :protein_groups. A
34
+ # symbol can also be passed in, and that method will be called instead.
35
+ def self.peptide_hits_to_protein_groups(peptide_hits, update_peptide_hits=false, &sort_by)
36
+ update_peptide_hits = 'protein_groups='.to_sym if (update_peptide_hits==true)
32
37
  sort_by ||= PRIORITIZE_PROTEINS
33
38
  # note to self: I wrote this in 2011, so I think I know what I'm doing now
34
39
  protein_to_peptides = Hash.new {|h,k| h[k] = Set.new }
@@ -64,6 +69,9 @@ module Ms
64
69
  group.peptide_hits = peptide_set if has_an_unaccounted_peptide
65
70
  has_an_unaccounted_peptide
66
71
  end
72
+ if update_peptide_hits
73
+ greedy_first.each {|pg, pephits| pephits.each {|hit| hit.send(update_peptide_hits, pg) } }
74
+ end
67
75
  greedy_first.map(&:first)
68
76
  end
69
77
 
@@ -1,17 +1,21 @@
1
+ require 'andand'
2
+
1
3
 
2
4
  module Ms ; end
3
5
  module Ms::Ident ; end
4
6
 
5
- class Ms::Ident::ProteinHit
6
- attr_accessor :id
7
- attr_accessor :seq
8
- alias_method :sequence, :seq
9
- alias_method :sequence=, :seq=
7
+ class Ms::Ident::ProteinHit < Ms::Ident::Protein
10
8
  attr_accessor :peptide_hits
11
9
 
12
- def initialize(id=nil)
13
- @peptide_hits = []
10
+ def initialize(id=nil, peptide_hits=[])
11
+ @peptide_hits = peptide_hits
14
12
  @id = id
15
13
  end
14
+
15
+ # if the GN=([^\s]+) regexp is found in the description, returns the first
16
+ # match, or nil if not found
17
+ def gene_id
18
+ description.andand.match(/ GN=(\w+) ?/)[1]
19
+ end
16
20
  end
17
21
 
@@ -2,9 +2,15 @@
2
2
  module Ms
3
3
  module Ident
4
4
 
5
- module Search
6
- attr_accessor :proteins
7
- attr_accessor :peptides
5
+ class Search
6
+ attr_accessor :id
7
+ attr_accessor :peptide_hits
8
+ alias_method :hits, :peptide_hits
9
+
10
+ def initialize(id=nil, peptide_hits=[])
11
+ @id = id
12
+ @peptide_hits = peptide_hits
13
+ end
8
14
 
9
15
  # returns an array of peptide_hits and protein_hits that are linked to
10
16
  # one another. NOTE: this will update peptide and protein
@@ -40,7 +46,6 @@ module Ms
40
46
 
41
47
 
42
48
  module SearchGroup
43
- include Search
44
49
 
45
50
  # an array of search objects
46
51
  attr_accessor :searches
metadata CHANGED
@@ -1,12 +1,8 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ms-ident
3
3
  version: !ruby/object:Gem::Version
4
- prerelease: false
5
- segments:
6
- - 0
7
- - 0
8
- - 23
9
- version: 0.0.23
4
+ prerelease:
5
+ version: 0.1.1
10
6
  platform: ruby
11
7
  authors:
12
8
  - John T. Prince
@@ -14,7 +10,7 @@ autorequire:
14
10
  bindir: bin
15
11
  cert_chain: []
16
12
 
17
- date: 2011-03-31 00:00:00 -06:00
13
+ date: 2011-04-26 00:00:00 -06:00
18
14
  default_executable:
19
15
  dependencies:
20
16
  - !ruby/object:Gem::Dependency
@@ -25,8 +21,6 @@ dependencies:
25
21
  requirements:
26
22
  - - ">="
27
23
  - !ruby/object:Gem::Version
28
- segments:
29
- - 0
30
24
  version: "0"
31
25
  type: :runtime
32
26
  version_requirements: *id001
@@ -38,10 +32,6 @@ dependencies:
38
32
  requirements:
39
33
  - - ">="
40
34
  - !ruby/object:Gem::Version
41
- segments:
42
- - 0
43
- - 0
44
- - 12
45
35
  version: 0.0.12
46
36
  type: :runtime
47
37
  version_requirements: *id002
@@ -53,8 +43,6 @@ dependencies:
53
43
  requirements:
54
44
  - - ">="
55
45
  - !ruby/object:Gem::Version
56
- segments:
57
- - 0
58
46
  version: "0"
59
47
  type: :runtime
60
48
  version_requirements: *id003
@@ -66,8 +54,6 @@ dependencies:
66
54
  requirements:
67
55
  - - ">="
68
56
  - !ruby/object:Gem::Version
69
- segments:
70
- - 0
71
57
  version: "0"
72
58
  type: :runtime
73
59
  version_requirements: *id004
@@ -79,8 +65,6 @@ dependencies:
79
65
  requirements:
80
66
  - - ">="
81
67
  - !ruby/object:Gem::Version
82
- segments:
83
- - 0
84
68
  version: "0"
85
69
  type: :development
86
70
  version_requirements: *id005
@@ -92,8 +76,6 @@ dependencies:
92
76
  requirements:
93
77
  - - ">="
94
78
  - !ruby/object:Gem::Version
95
- segments:
96
- - 0
97
79
  version: "0"
98
80
  type: :development
99
81
  version_requirements: *id006
@@ -160,21 +142,17 @@ required_ruby_version: !ruby/object:Gem::Requirement
160
142
  requirements:
161
143
  - - ">="
162
144
  - !ruby/object:Gem::Version
163
- segments:
164
- - 0
165
145
  version: "0"
166
146
  required_rubygems_version: !ruby/object:Gem::Requirement
167
147
  none: false
168
148
  requirements:
169
149
  - - ">="
170
150
  - !ruby/object:Gem::Version
171
- segments:
172
- - 0
173
151
  version: "0"
174
152
  requirements: []
175
153
 
176
154
  rubyforge_project: mspire
177
- rubygems_version: 1.3.7
155
+ rubygems_version: 1.6.2
178
156
  signing_key:
179
157
  specification_version: 3
180
158
  summary: mspire library for working with mzIdentML and pepxml