divvy_proteomics 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 05ddf71aecd113201c370185104006af9705a525
4
- data.tar.gz: b450d8ecd60717f7286f234aed29beaf455aa4eb
3
+ metadata.gz: 57c4b7c64ec34ec42b4b28f6a31537a8901d5a7c
4
+ data.tar.gz: c216e563aa04c13e7b935852862994579e45d61f
5
5
  SHA512:
6
- metadata.gz: a0b9e987dd54239a0da817becc9755c42d0f375e89fc16ff7c9f442830b34379de98c89f3bad0d74286567c5cbaafdc6a97656f766f2e63da02ccd113004019e
7
- data.tar.gz: 2347260e7dba1a6bb08c91cc2183b445d534b36e1296e6d3338076ed3729073276df516dbfa83bce87e18af4e51721df744fc694eee3a60a096f27d9ecf0f666
6
+ metadata.gz: 7a1e7dd2be6565d9503ce1e5fc43cb2362920fa8146f66d7c179c0a812de5e1ea2be9f42d70f63bb2bf3811e44f5800f01e4bbbb8731a4365d129c01c138edae
7
+ data.tar.gz: 846cf30c524054c62205de65aba81d37bdd1f2857cec1e623730d167efa6eacc71491a999806e73547b50d0c36a4b87993245d20217d47090c8e3da6eecf31b3
data/Gemfile CHANGED
@@ -1,13 +1,13 @@
1
1
  source "http://rubygems.org"
2
2
 
3
- gem 'bio-logger', ">=0"
3
+ gem 'bio-logger', "~> 1.0"
4
4
 
5
5
  # Add dependencies to develop your gem here.
6
6
  # Include everything needed to run rake, tests, features, etc.
7
7
  group :development do
8
- gem 'systemu', ">=0"
9
- gem "rspec", ">= 2.8.0"
10
- gem "rdoc", ">= 3.12"
11
- gem "bundler", ">= 1.0.0"
12
- gem "jeweler", ">= 1.8.4"
8
+ gem 'systemu', "~> 2.6"
9
+ gem "rspec", "~> 2.14"
10
+ gem "rdoc", "~> 3.12"
11
+ gem "bundler", "~> 1.5"
12
+ gem "jeweler", "~> 2.0"
13
13
  end
data/README.md CHANGED
@@ -13,20 +13,30 @@ $ gem install divvy_spectra
13
13
  $ divvy_spectra <DTASelectFile>
14
14
  ```
15
15
  Output is a table, with a row for each protein with a few columns, including number of unique spectra and the
16
- estimated number of spectral counts after sorting out the non-uniqueness.
16
+ estimated number of spectral counts after sorting out the non-uniqueness. Using the ```--pep-xml``` flag, PepXML files
17
+ are can be used as input also:
18
+
19
+ ```
20
+ $ divvy_spectra --pep-xml <PepXML_file>
21
+ ```
17
22
 
18
23
  Full usage information:
19
24
  ```
20
- $ divvy_spectra -h
21
25
 
22
- Usage: divvy_spectra [options] <DTASelect_file>
26
+ Usage: divvy_spectra [options] <input_file>
23
27
 
24
- Takes a tab separated file containing a (possibly modified) output from a DTAselect run, and use some algorithm to divy up the spectra that match multiple peptides.
28
+ Takes a tab separated file containing a (possibly modified) output from a DTAselect run (or a pepXML file and add the flag --pep-xml), and use some algorithm to divy up the spectra that match multiple peptides.
25
29
 
26
30
  --merge-proteins FILE_OF_IDENTIFIERS
27
31
  Provide a space/tab separated file where the identifiers on each row should be treated as one protein
28
32
  --whitelist FILE_OF_PROTEINS_TO_REPORT
29
33
  Only report proteins that are in this whitelist, after divvying with everything
34
+ --contaminant-regexes REGEXES
35
+ Comma-separated list of regular expressions to apply to protein names. If the protein name matches then all spectra assigned to that protein are considered contaminants. [default: ]
36
+
37
+ Optional arguments:
38
+
39
+ --pep-xml Input file is pep XML, rather than a DTA select output file [default: false]
30
40
 
31
41
  Verbosity:
32
42
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.0
1
+ 0.3.0
data/bin/divvy_spectra CHANGED
@@ -5,23 +5,24 @@ require 'bio-logger'
5
5
  require 'pp'
6
6
  require 'set'
7
7
 
8
- SCRIPT_NAME = File.basename(__FILE__); LOG_NAME = SCRIPT_NAME.gsub('.rb','')
8
+ SCRIPT_NAME = File.basename(__FILE__); LOG_NAME = 'divvy_proteomics'
9
9
 
10
10
  rootpath = File.dirname(File.dirname(__FILE__))
11
11
  $: << File.join(rootpath,'lib')
12
- require 'dta_select_output'
12
+ require 'divvy_proteomics'
13
13
 
14
14
  # Parse command line options into the options hash
15
15
  options = {
16
16
  :logger => 'stderr',
17
17
  :log_level => 'info',
18
18
  :contaminant_regexes => [/^CNTM:/],
19
+ :input_is_pep_xml => false,
19
20
  }
20
21
  o = OptionParser.new do |opts|
21
22
  opts.banner = "
22
- Usage: #{SCRIPT_NAME} [options] <DTASelect_file>
23
+ Usage: #{SCRIPT_NAME} [options] <input_file>
23
24
 
24
- Takes a tab separated file containing a (possibly modified) output from a DTAselect run, and use some algorithm to divy up the spectra that match multiple peptides.\n\n"
25
+ Takes a tab separated file containing a (possibly modified) output from a DTAselect run (or a pepXML file and add the flag --pep-xml), and use some algorithm to divy up the spectra that match multiple peptides.\n\n"
25
26
 
26
27
  opts.on("--merge-proteins FILE_OF_IDENTIFIERS", "Provide a space/tab separated file where the identifiers on each row should be treated as one protein") do |file|
27
28
  options[:merge_proteins_file] = file
@@ -32,6 +33,10 @@ o = OptionParser.new do |opts|
32
33
  opts.on("--contaminant-regexes REGEXES", "Comma-separated list of regular expressions to apply to protein names. If the protein name matches then all spectra assigned to that protein are considered contaminants. [default: #{options[:contaminant_prefixes]}]") do |str|
33
34
  options[:contaminant_regexes] = str.split(/,/).collect{|s| /#{s}/}
34
35
  end
36
+ opts.separator "\nOptional arguments:\n\n"
37
+ opts.on("--pep-xml", "Input file is pep XML, rather than a DTA select output file [default: #{options[:input_is_pep_xml]}]") do |arg|
38
+ options[:input_is_pep_xml] = true
39
+ end
35
40
 
36
41
 
37
42
  # logger options
@@ -74,7 +79,12 @@ if options[:whitelist_file]
74
79
  end
75
80
 
76
81
  # Parse the csv file
77
- parsed = Bio::DTASelect::OutputFile.parse(ARGF)
82
+ parsed = nil
83
+ if options[:input_is_pep_xml]
84
+ parsed = Bio::PepXML.parse(ARGF)
85
+ else
86
+ parsed = Bio::DTASelect::OutputFile.parse(ARGF)
87
+ end
78
88
 
79
89
  # Hashes of identifiers to objects
80
90
  proteins = parsed.protein_name_to_object
@@ -90,12 +100,13 @@ mergers.each do |secondary_id, primary_id|
90
100
 
91
101
  # Invalidate some things about the primary ID because they are no longer valid
92
102
  current_protein = proteins[primary_id]
93
- current_protein.sequence_count = nil
94
- current_protein.sequence_coverage = nil
95
- current_protein.length = nil
96
- current_protein.molwt = nil
97
- current_protein.pi = nil
98
- current_protein.validation_status = nil
103
+ # These variables are not used and are not present in pepXML files, so don't mess with them.
104
+ # current_protein.sequence_count = nil
105
+ # current_protein.sequence_coverage = nil
106
+ # current_protein.length = nil
107
+ # current_protein.molwt = nil
108
+ # current_protein.pi = nil
109
+ # current_protein.validation_status = nil
99
110
  # Keep the primary proteins' description, I reckon
100
111
 
101
112
  # When there is spectra that are in the secondary but not the primary, add them to the primary's repertoire.
@@ -172,11 +183,13 @@ number_non_shared_peptides = all_peptides.select{|pep| pep.parent_proteins.lengt
172
183
  total_peptides = number_shared_peptides+number_non_shared_peptides
173
184
  log.info "Found #{number_shared_peptides} (#{number_shared_peptides.to_f/total_peptides*100}%) shared peptides and #{number_non_shared_peptides} (#{number_non_shared_peptides.to_f/total_peptides*100}%) non-shared peptides"
174
185
 
175
- # Find non-starred peptides that occur only once in the file - maybe not possible given a correctly formatted file?
176
- non_starred_but_uniquely_identified_peptides = hits.values.select do |peptide|
177
- peptide.dtaselect_attributes['Unique'] == nil and peptide.parent_proteins.length == 1
186
+ unless options[:input_is_pep_xml]
187
+ # Find non-starred peptides that occur only once in the file - maybe not possible given a correctly formatted file?
188
+ non_starred_but_uniquely_identified_peptides = hits.values.select do |peptide|
189
+ peptide.dtaselect_attributes['Unique'] == nil and peptide.parent_proteins.length == 1
190
+ end
191
+ log.debug "Found #{non_starred_but_uniquely_identified_peptides.length} different peptides that weren't starred or 2'd but the identifier is only found one time."
178
192
  end
179
- log.debug "Found #{non_starred_but_uniquely_identified_peptides.length} different peptides that weren't starred or 2'd but the identifier is only found one time."
180
193
 
181
194
  # OK, finished parsing the file. Now output the score for each protein
182
195
  puts [
@@ -2,14 +2,16 @@
2
2
  # DO NOT EDIT THIS FILE DIRECTLY
3
3
  # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
+ # stub: divvy_proteomics 0.3.0 ruby lib
5
6
 
6
7
  Gem::Specification.new do |s|
7
8
  s.name = "divvy_proteomics"
8
- s.version = "0.2.0"
9
+ s.version = "0.3.0"
9
10
 
10
11
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
12
+ s.require_paths = ["lib"]
11
13
  s.authors = ["Ben J Woodcroft"]
12
- s.date = "2013-11-06"
14
+ s.date = "2014-01-07"
13
15
  s.description = "divvy up spectra from DTASelect files in a somewhat parsimonious way"
14
16
  s.email = "donttrustben@gmail.com"
15
17
  s.executables = ["divvy_spectra"]
@@ -28,8 +30,14 @@ Gem::Specification.new do |s|
28
30
  "bin/divvy_spectra",
29
31
  "divvy_proteomics.gemspec",
30
32
  "lib/divvy_proteomics.rb",
33
+ "lib/divvyable_protein.rb",
31
34
  "lib/dta_select_output.rb",
35
+ "lib/pep_xml.rb",
36
+ "spec/data/contaminant.pep.xml",
32
37
  "spec/data/merge_definition.csv",
38
+ "spec/data/minimal.pep.xml",
39
+ "spec/data/minimal2.pep.xml",
40
+ "spec/data/minimal3.pep.xml",
33
41
  "spec/data/multiply_mapped_spectra.csv",
34
42
  "spec/data/new_format.csv",
35
43
  "spec/data/new_format_some_all_shared_spectra.csv",
@@ -39,39 +47,39 @@ Gem::Specification.new do |s|
39
47
  "spec/data/three_proteins_meant_for_merge.csv",
40
48
  "spec/data/three_proteins_with_contaminant.csv",
41
49
  "spec/divvy_proteomics_spec.rb",
50
+ "spec/pep_xml_spec.rb",
42
51
  "spec/spec_helper.rb"
43
52
  ]
44
53
  s.homepage = "http://github.com/wwood/divvy_proteomics"
45
54
  s.licenses = ["MIT"]
46
- s.require_paths = ["lib"]
47
- s.rubygems_version = "2.0.3"
55
+ s.rubygems_version = "2.2.0"
48
56
  s.summary = "divvy up spectra from DTASelect files in a parsimonious way"
49
57
 
50
58
  if s.respond_to? :specification_version then
51
59
  s.specification_version = 4
52
60
 
53
61
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
54
- s.add_runtime_dependency(%q<bio-logger>, [">= 0"])
55
- s.add_development_dependency(%q<systemu>, [">= 0"])
56
- s.add_development_dependency(%q<rspec>, [">= 2.8.0"])
57
- s.add_development_dependency(%q<rdoc>, [">= 3.12"])
58
- s.add_development_dependency(%q<bundler>, [">= 1.0.0"])
59
- s.add_development_dependency(%q<jeweler>, [">= 1.8.4"])
62
+ s.add_runtime_dependency(%q<bio-logger>, ["~> 1.0"])
63
+ s.add_development_dependency(%q<systemu>, ["~> 2.6"])
64
+ s.add_development_dependency(%q<rspec>, ["~> 2.14"])
65
+ s.add_development_dependency(%q<rdoc>, ["~> 3.12"])
66
+ s.add_development_dependency(%q<bundler>, ["~> 1.5"])
67
+ s.add_development_dependency(%q<jeweler>, ["~> 2.0"])
60
68
  else
61
- s.add_dependency(%q<bio-logger>, [">= 0"])
62
- s.add_dependency(%q<systemu>, [">= 0"])
63
- s.add_dependency(%q<rspec>, [">= 2.8.0"])
64
- s.add_dependency(%q<rdoc>, [">= 3.12"])
65
- s.add_dependency(%q<bundler>, [">= 1.0.0"])
66
- s.add_dependency(%q<jeweler>, [">= 1.8.4"])
69
+ s.add_dependency(%q<bio-logger>, ["~> 1.0"])
70
+ s.add_dependency(%q<systemu>, ["~> 2.6"])
71
+ s.add_dependency(%q<rspec>, ["~> 2.14"])
72
+ s.add_dependency(%q<rdoc>, ["~> 3.12"])
73
+ s.add_dependency(%q<bundler>, ["~> 1.5"])
74
+ s.add_dependency(%q<jeweler>, ["~> 2.0"])
67
75
  end
68
76
  else
69
- s.add_dependency(%q<bio-logger>, [">= 0"])
70
- s.add_dependency(%q<systemu>, [">= 0"])
71
- s.add_dependency(%q<rspec>, [">= 2.8.0"])
72
- s.add_dependency(%q<rdoc>, [">= 3.12"])
73
- s.add_dependency(%q<bundler>, [">= 1.0.0"])
74
- s.add_dependency(%q<jeweler>, [">= 1.8.4"])
77
+ s.add_dependency(%q<bio-logger>, ["~> 1.0"])
78
+ s.add_dependency(%q<systemu>, ["~> 2.6"])
79
+ s.add_dependency(%q<rspec>, ["~> 2.14"])
80
+ s.add_dependency(%q<rdoc>, ["~> 3.12"])
81
+ s.add_dependency(%q<bundler>, ["~> 1.5"])
82
+ s.add_dependency(%q<jeweler>, ["~> 2.0"])
75
83
  end
76
84
  end
77
85
 
@@ -0,0 +1,16 @@
1
+ require 'bio-logger'
2
+ Bio::Log::LoggerPlus.new('divvy_proteomics')
3
+ module Bio
4
+ module DivvyProteomics
5
+ module Logging
6
+ def log
7
+ Bio::Log::LoggerPlus['divvy_proteomics']
8
+ end
9
+ end
10
+ end
11
+ end
12
+
13
+ require 'divvyable_protein'
14
+ require 'dta_select_output'
15
+ require 'pep_xml'
16
+
@@ -0,0 +1,40 @@
1
+ module Bio::DivvyProteomics::DivvyableProtein
2
+ def unique_spectra
3
+ return 0 if @peptides.nil? or @peptides.empty?
4
+ num = @peptides.select{|pep| pep.parent_proteins.length == 1}.collect{|pep| pep.redundancy}.reduce(:+)
5
+ num ||= 0
6
+ return num
7
+ end
8
+
9
+ def non_unique_spectra
10
+ return 0 if @peptides.nil? or @peptides.empty?
11
+ num = @peptides.reject{|pep| pep.parent_proteins.length == 1}.collect{|pep| pep.redundancy}.reduce(:+)
12
+ num ||= 0
13
+
14
+ return num
15
+ end
16
+
17
+ # Are there any peptides that are assigned exclusively to this protein?
18
+ def uniquely_identified_by_any_peptides?
19
+ unique_spectra > 0
20
+ end
21
+
22
+ def estimated_spectral_count
23
+ # How many unique spectra are there for each protein that shares a peptide with the current peptide
24
+ return 0 if @peptides.nil? or @peptides.empty?
25
+ peptide_shares = []
26
+ # If all peptides are non-unique and shared with some number of other proteins, then output a negative number num shared spectra divided by the number of proteins
27
+ if !uniquely_identified_by_any_peptides?
28
+ # Don't attempt to divvy these up, because there are too many assumptions involved
29
+ return 0
30
+ else
31
+ peptides.each do |peptide|
32
+ log.debug "Tallying peptide #{peptide.identifier}, which is has #{peptide.redundancy} spectra shared among #{peptide.parent_proteins.length} proteins"
33
+ log.debug "These proteins have #{peptide.parent_proteins.collect{|pro| pro.unique_spectra}.inspect} unique spectra each"
34
+ total_linked_unique_spectra = peptide.parent_proteins.collect{|pro| pro.unique_spectra}.reduce(:+)
35
+ peptide_shares.push unique_spectra.to_f/total_linked_unique_spectra*peptide.redundancy
36
+ end
37
+ return peptide_shares.reduce(:+)
38
+ end
39
+ end
40
+ end
@@ -1,12 +1,9 @@
1
1
 
2
2
 
3
3
 
4
+
5
+
4
6
  module Bio::DTASelect
5
- module Logging
6
- def log
7
- Bio::Log::LoggerPlus['divvy_spectra']
8
- end
9
- end
10
7
 
11
8
  class OutputFile
12
9
  def self.log
@@ -14,7 +11,8 @@ module Bio::DTASelect
14
11
  end
15
12
 
16
13
  class SelectedProtein
17
- include Bio::DTASelect::Logging
14
+ include Bio::DivvyProteomics::Logging
15
+ include Bio::DivvyProteomics::DivvyableProtein
18
16
 
19
17
  attr_accessor :identifier
20
18
 
@@ -26,43 +24,7 @@ module Bio::DTASelect
26
24
  @peptides = []
27
25
  end
28
26
 
29
- def unique_spectra
30
- return 0 if @peptides.nil? or @peptides.empty?
31
- num = @peptides.select{|pep| pep.parent_proteins.length == 1}.collect{|pep| pep.redundancy}.reduce(:+)
32
- num ||= 0
33
- return num
34
- end
35
-
36
- def non_unique_spectra
37
- return 0 if @peptides.nil? or @peptides.empty?
38
- num = @peptides.reject{|pep| pep.parent_proteins.length == 1}.collect{|pep| pep.redundancy}.reduce(:+)
39
- num ||= 0
40
- return num
41
- end
42
27
 
43
- # Are there any peptides that are assigned exclusively to this protein?
44
- def uniquely_identified_by_any_peptides?
45
- unique_spectra > 0
46
- end
47
-
48
- def estimated_spectral_count
49
- # How many unique spectra are there for each protein that shares a peptide with the current peptide
50
- return 0 if @peptides.nil? or @peptides.empty?
51
- peptide_shares = []
52
- # If all peptides are non-unique and shared with some number of other proteins, then output a negative number num shared spectra divided by the number of proteins
53
- if !uniquely_identified_by_any_peptides?
54
- # Don't attempt to divvy these up, because there are too many assumptions involved
55
- return 0
56
- else
57
- peptides.each do |peptide|
58
- log.debug "Tallying peptide #{peptide.identifier}, which is has #{peptide.redundancy} spectra shared among #{peptide.parent_proteins.length} proteins"
59
- log.debug "These proteins have #{peptide.parent_proteins.collect{|pro| pro.unique_spectra}.inspect} unique spectra each"
60
- total_linked_unique_spectra = peptide.parent_proteins.collect{|pro| pro.unique_spectra}.reduce(:+)
61
- peptide_shares.push unique_spectra.to_f/total_linked_unique_spectra*peptide.redundancy
62
- end
63
- return peptide_shares.reduce(:+)
64
- end
65
- end
66
28
 
67
29
  def log
68
30
  Bio::Log::LoggerPlus[LOG_NAME]
@@ -70,7 +32,7 @@ module Bio::DTASelect
70
32
  end
71
33
 
72
34
  class Peptide
73
- include Bio::DTASelect::Logging
35
+ include Bio::DivvyProteomics::Logging
74
36
 
75
37
  attr_accessor :identifier
76
38
 
@@ -98,7 +60,7 @@ module Bio::DTASelect
98
60
  end
99
61
 
100
62
  class Result
101
- include Bio::DTASelect::Logging
63
+ include Bio::DivvyProteomics::Logging
102
64
 
103
65
  # hash of protein identifier to Protein object
104
66
  attr_accessor :protein_name_to_object
@@ -123,7 +85,7 @@ module Bio::DTASelect
123
85
  # Parse each line of the DTAselect file
124
86
  io.each_line do |line|
125
87
  splits = line.chomp.split("\t")
126
- log.debug "Parsing line `#{line.chomp}'"
88
+ log.debug "Parsing line `#{line.chomp}'" if log.debug?
127
89
 
128
90
  if reading_header
129
91
  log.debug "reading header"
@@ -146,7 +108,7 @@ module Bio::DTASelect
146
108
  if !last_line_was_protein_name
147
109
  # Sometimes several proteins are given all in the one header line
148
110
  # start a new protein
149
- log.debug "New protein now being parsed"
111
+ log.debug "New protein now being parsed" if log.debug?
150
112
  current_proteins = []
151
113
  end
152
114
 
@@ -174,13 +136,13 @@ module Bio::DTASelect
174
136
 
175
137
 
176
138
  elsif splits[1] == 'Proteins'
177
- # Done processing, except for the bits down the bottom which aren't parsed (yet)
139
+ # Done processing, except for the bits down the bottom which aren't parsed (yet, at least)
178
140
  break
179
141
 
180
142
 
181
143
 
182
144
  else
183
- log.debug "New spectra now being parsed"
145
+ log.debug "New spectra now being parsed" if log.debug?
184
146
  last_line_was_protein_name = false
185
147
 
186
148
  # Record a spectra
@@ -204,11 +166,11 @@ module Bio::DTASelect
204
166
  pep.parent_proteins.push current_protein
205
167
  current_protein.peptides.push pep
206
168
  end
207
- log.debug "Parsed this peptide #{pep.inspect}"
169
+ log.debug "Parsed this peptide #{pep.inspect}" if log.debug?
208
170
  end
209
171
  end
210
172
 
211
- log.debug "Proteins parsed: #{result.protein_name_to_object.inspect}"
173
+ log.debug "Proteins parsed: #{result.protein_name_to_object.inspect}" if log.debug?
212
174
  return result
213
175
  end
214
176
  end
data/lib/pep_xml.rb ADDED
@@ -0,0 +1,130 @@
1
+ require 'rexml/document'
2
+
3
+ class Bio::PepXML
4
+ include Bio::DivvyProteomics::Logging
5
+
6
+ attr_accessor :protein_name_to_object, :peptide_name_to_object
7
+
8
+ class Protein
9
+ include Bio::DivvyProteomics::Logging
10
+ include Bio::DivvyProteomics::DivvyableProtein
11
+
12
+ # Array of peptide objects that have been assigned to this protein
13
+ attr_accessor :peptides
14
+
15
+ attr_accessor :identifier, :descriptive_name
16
+ end
17
+
18
+ # Named 'Peptide' but really mean Spectra. Just too hard to change
19
+ class Peptide
20
+ attr_accessor :parent_proteins
21
+
22
+ # Name of the spectra
23
+ attr_accessor :identifier
24
+
25
+ def initialize
26
+ @parent_proteins = []
27
+ end
28
+
29
+ #TODO: right now this just always returns 1. It should really be working out redundancy
30
+ #properly by comparison of peptide sequences, but this isn't yet parsed this info
31
+ def redundancy
32
+ 1
33
+ end
34
+ end
35
+
36
+ def self.log
37
+ Bio::PepXML.new.log
38
+ end
39
+
40
+ def self.parse(io)
41
+ protein_name_to_object = {}
42
+ peptide_name_to_object = {}
43
+
44
+ #pep.elements.each('msms_pipeline_analysis/msms_run_summary/spectrum_query/search_result/search_hit'){|e|
45
+ # c+=1; p e.attributes['protein_descr'].strip;
46
+ # e.elements.each{|e|
47
+ # p e.name, e.attributes['protein_descr'].strip};break}
48
+ xml = REXML::Document.new(io)
49
+
50
+ parse_name_and_description = lambda do |e|
51
+ name = e.attributes['protein'].strip
52
+ description = e.attributes['protein_descr'].strip
53
+ if name.nil? or name == ''
54
+ name = e.attributes['protein_descr'].strip
55
+ else
56
+ description = name+' '+description
57
+ end
58
+ name.gsub!(/\t.*/,'')
59
+ description.gsub!(/[\t\n]/,' ')
60
+
61
+ [name, description]
62
+ end
63
+
64
+ #TODO: some better sanity checking here would be ideal.
65
+ num_hits_parsed = 0
66
+ xml.elements.each('msms_pipeline_analysis/msms_run_summary/spectrum_query/search_result/search_hit') do |hit|
67
+ hit_number = hit.attributes['hit_rank']
68
+ raise "Parsing error on #{hit}" if hit_number.nil?
69
+ next if hit_number != "1"
70
+
71
+ # Parse the primary hit
72
+ name1, description1 = parse_name_and_description.call(hit)
73
+ raise "No protein name found in this xml fragment: #{hit.to_s}" if name1.nil?
74
+ spectrum_name = hit.parent.parent.attributes['spectrum'].strip
75
+ raise "Parsing error (couldn't find spectrum name) with spectra #{hit.inspect}" if spectrum_name.nil?
76
+
77
+ # It is possible to have multiple peptides both hit the spectra with hit_rank="1"
78
+ # This happens when when e.g. leucine and isoleucine are possible.
79
+ spectrum = peptide_name_to_object[spectrum_name]
80
+ if spectrum.nil?
81
+ spectrum = Peptide.new
82
+ spectrum.identifier = spectrum_name
83
+ peptide_name_to_object[spectrum_name] = spectrum
84
+ end
85
+
86
+
87
+ protein1 = protein_name_to_object[name1]
88
+ if protein1.nil?
89
+ protein1 = Protein.new
90
+ protein1.identifier = name1
91
+ protein1.descriptive_name = description1
92
+ protein1.peptides = []
93
+ protein_name_to_object[name1] = protein1
94
+ end
95
+ protein1.peptides.push spectrum
96
+ spectrum.parent_proteins ||= []
97
+ spectrum.parent_proteins.push protein1
98
+
99
+
100
+ # Parse the alternate hits. Only look at children with protein_descr attributes - these are
101
+ # these are the alternate proteins
102
+ hit.each_element_with_attribute('protein_descr') do |e|
103
+ name, description = parse_name_and_description.call(e)
104
+
105
+ alternate = protein_name_to_object[name]
106
+ if alternate.nil?
107
+ alternate = Protein.new
108
+ alternate.identifier = name
109
+ alternate.descriptive_name = description
110
+ alternate.peptides = []
111
+ protein_name_to_object[name] = alternate
112
+ end
113
+ alternate.peptides.push spectrum
114
+ spectrum.parent_proteins.push alternate
115
+ end
116
+
117
+ # Don't count the same protein multiple times - might happen when a spectru
118
+ spectrum.parent_proteins.uniq!
119
+
120
+ num_hits_parsed += 1
121
+ end
122
+ log.info "Parsed #{num_hits_parsed} search hits"
123
+
124
+ pepxml = Bio::PepXML.new
125
+ pepxml.protein_name_to_object = protein_name_to_object
126
+ pepxml.peptide_name_to_object = peptide_name_to_object
127
+
128
+ return pepxml
129
+ end
130
+ end
@@ -0,0 +1,13 @@
1
+ <?xml version="1.0" encoding="utf-8"?>
2
+ <msms_pipeline_analysis date="2013-12-06T09:32:51.2000705-07:00" name="Tara_38sfc_FASP_8hr_OrbiVelosPro_Run1_030513_All_SEQUESTHT" summary_xml="" xmlns="http://regis-web.systemsbiology.net/pepXML">
3
+ <msms_run_summary base_name="D:\Proteome_Discoverer\RawFiles\FASP1\Tara_38sfc_FASP_8hr_OrbiVelosPro_Run1_030513\Tara_38sfc_FASP_8hr_OrbiVelosPro_Run1_030513_All_SEQUESTHT.msf" raw_data_type=".msf" raw_data=".msf" msManufacturer="" msModel="" msIonization="" msMassAnalyzer="" msDetector="">
4
+ <spectrum_query spectrum="Tara_38sfc_FASP_8hr_OrbiVelosPro_Run1_030513_03.1121.1121.2" start_scan="1121" end_scan="1121" retention_time_sec="5.4199816666666667" activation_method="CID" precursor_intensity="388495.5625" precursor_neutral_mass="1329.7252673153125" assumed_charge="2" index="221">
5
+ <search_result search_id="1">
6
+ <search_hit hit_rank="1" peptide="NLDLDSIIAEVK" protein="CNTM:cont_sp" num_tot_proteins="2" num_matched_ions="0" calc_neutral_pep_mass="1329.7252673153125" massdiff="0" protein_descr="P13647 K2C5_HUMAN Keratin, type II cytoskeletal 5 (Cytokeratin 5) (K5) (CK 5) (58 kDa cytokeratin) - Homo sapiens (Human). # pI:8.14 MW:62462" protein_mw="62.423064734660052" calc_pI="8.06005859375">
7
+ <alternative_protein protein="CNTM:cont_sp" protein_descr="P48668 K2CE_HUMAN Keratin, type II cytoskeletal 6E (Cytokeratin 6E) (CK 6E) (K6e keratin) - Homo sapiens (Human). # pI:8.14 MW:60092" protein_mw="60.05537958466001" />
8
+ <search_score name="XCorr" value="4.5027022361755371" />
9
+ </search_hit>
10
+ </search_result>
11
+ </spectrum_query>
12
+ </msms_run_summary>
13
+ </msms_pipeline_analysis>
@@ -0,0 +1,14 @@
1
+ <?xml version="1.0" encoding="utf-8"?>
2
+ <msms_pipeline_analysis date="2013-12-06T09:32:51.2000705-07:00" name="Tara_38sfc_FASP_8hr_OrbiVelosPro_Run1_030513_All_SEQUESTHT" summary_xml="" xmlns="http://regis-web.systemsbiology.net/pepXML">
3
+ <msms_run_summary base_name="D:\Proteome_Discoverer\RawFiles\FASP1\Tara_38sfc_FASP_8hr_OrbiVelosPro_Run1_030513\Tara_38sfc_FASP_8hr_OrbiVelosPro_Run1_030513_All_SEQUESTHT.msf" raw_data_type=".msf" raw_data=".msf" msManufacturer="" msModel="" msIonization="" msMassAnalyzer="" msDetector="">
4
+ <spectrum_query spectrum="Tara_38sfc_FASP_8hr_OrbiVelosPro_Run1_030513_02.9921.9921.2" start_scan="9921" end_scan="9921" retention_time_sec="41.732728333333334" activation_method="CID" precursor_intensity="61015.84375" precursor_neutral_mass="1246.6412829403125" assumed_charge="2" index="1">
5
+ <search_result search_id="1">
6
+ <search_hit hit_rank="1" peptide="IADQTIGTANSR" protein="" num_tot_proteins="3" num_matched_ions="0" calc_neutral_pep_mass="1246.6412829403125" massdiff="0" protein_descr="&gt;38SUR_2379_1524213_2&#x9;" protein_mw="43.185399974660044" calc_pI="5.63037109375">
7
+ <alternative_protein protein="" protein_descr="&gt;38SUR_6350_1528184_1&#x9;" protein_mw="24.663561404659987" />
8
+ <alternative_protein protein="" protein_descr="&gt;38SUR_80622_1602456_1&#x9;" protein_mw="30.364007294659981" />
9
+ <search_score name="XCorr" value="4.7916374206542969" />
10
+ </search_hit>
11
+ </search_result>
12
+ </spectrum_query>
13
+ </msms_run_summary>
14
+ </msms_pipeline_analysis>
@@ -0,0 +1,12 @@
1
+ <?xml version="1.0" encoding="utf-8"?>
2
+ <msms_pipeline_analysis date="2013-12-06T09:32:51.2000705-07:00" name="Tara_38sfc_FASP_8hr_OrbiVelosPro_Run1_030513_All_SEQUESTHT" summary_xml="" xmlns="http://regis-web.systemsbiology.net/pepXML">
3
+ <msms_run_summary base_name="D:\Proteome_Discoverer\RawFiles\FASP1\Tara_38sfc_FASP_8hr_OrbiVelosPro_Run1_030513\Tara_38sfc_FASP_8hr_OrbiVelosPro_Run1_030513_All_SEQUESTHT.msf" raw_data_type=".msf" raw_data=".msf" msManufacturer="" msModel="" msIonization="" msMassAnalyzer="" msDetector="">
4
+ <spectrum_query spectrum="Tara_38sfc_FASP_8hr_OrbiVelosPro_Run1_030513_02.9921.9921.2" start_scan="9921" end_scan="9921" retention_time_sec="41.732728333333334" activation_method="CID" precursor_intensity="61015.84375" precursor_neutral_mass="1246.6412829403125" assumed_charge="2" index="1">
5
+ <search_result search_id="1">
6
+ <search_hit hit_rank="1" peptide="IADQTIGTANSR" protein="" num_tot_proteins="3" num_matched_ions="0" calc_neutral_pep_mass="1246.6412829403125" massdiff="0" protein_descr="&gt;38SUR_2379_1524213_2&#x9;" protein_mw="43.185399974660044" calc_pI="5.63037109375">
7
+ <search_score name="XCorr" value="4.7916374206542969" />
8
+ </search_hit>
9
+ </search_result>
10
+ </spectrum_query>
11
+ </msms_run_summary>
12
+ </msms_pipeline_analysis>
@@ -0,0 +1,21 @@
1
+ <?xml version="1.0" encoding="utf-8"?>
2
+ <msms_pipeline_analysis date="2013-12-06T09:32:51.2000705-07:00" name="Tara_38sfc_FASP_8hr_OrbiVelosPro_Run1_030513_All_SEQUESTHT" summary_xml="" xmlns="http://regis-web.systemsbiology.net/pepXML">
3
+ <msms_run_summary base_name="D:\Proteome_Discoverer\RawFiles\FASP1\Tara_38sfc_FASP_8hr_OrbiVelosPro_Run1_030513\Tara_38sfc_FASP_8hr_OrbiVelosPro_Run1_030513_All_SEQUESTHT.msf" raw_data_type=".msf" raw_data=".msf" msManufacturer="" msModel="" msIonization="" msMassAnalyzer="" msDetector="">
4
+ <spectrum_query spectrum="Tara_38sfc_FASP_8hr_OrbiVelosPro_Run1_030513_02.9921.9921.2" start_scan="9921" end_scan="9921" retention_time_sec="41.732728333333334" activation_method="CID" precursor_intensity="61015.84375" precursor_neutral_mass="1246.6412829403125" assumed_charge="2" index="1">
5
+ <search_result search_id="1">
6
+ <search_hit hit_rank="1" peptide="IADQTIGTANSR" protein="" num_tot_proteins="3" num_matched_ions="0" calc_neutral_pep_mass="1246.6412829403125" massdiff="0" protein_descr="&gt;38SUR_2379_1524213_2&#x9;" protein_mw="43.185399974660044" calc_pI="5.63037109375">
7
+ <alternative_protein protein="" protein_descr="&gt;38SUR_6350_1528184_1&#x9;" protein_mw="24.663561404659987" />
8
+ <alternative_protein protein="" protein_descr="&gt;38SUR_80622_1602456_1&#x9;" protein_mw="30.364007294659981" />
9
+ <search_score name="XCorr" value="4.7916374206542969" />
10
+ </search_hit>
11
+ </search_result>
12
+ </spectrum_query>
13
+ <spectrum_query spectrum="Tara_38sfc_FASP_8hr_OrbiVelosPro_Run1_030513_02.9921.9921.2_3" start_scan="9921" end_scan="9921" retention_time_sec="41.732728333333334" activation_method="CID" precursor_intensity="61015.84375" precursor_neutral_mass="1246.6412829403125" assumed_charge="2" index="1">
14
+ <search_result search_id="1">
15
+ <search_hit hit_rank="1" peptide="IADQTIGTANSR" protein="" num_tot_proteins="3" num_matched_ions="0" calc_neutral_pep_mass="1246.6412829403125" massdiff="0" protein_descr="&gt;38SUR_2379_1524213_2&#x9;" protein_mw="43.185399974660044" calc_pI="5.63037109375">
16
+ <search_score name="XCorr" value="4.7916374206542969" />
17
+ </search_hit>
18
+ </search_result>
19
+ </spectrum_query>
20
+ </msms_run_summary>
21
+ </msms_pipeline_analysis>
@@ -11,8 +11,6 @@ $:.unshift File.join(File.dirname(__FILE__),'..')
11
11
  script_under_test = File.basename(__FILE__).gsub(/^test_/,'')
12
12
  path_to_script = File.join(File.dirname(__FILE__),'..','bin','divvy_spectra')
13
13
 
14
- TEST_DATA_DIR = File.join(File.dirname(__FILE__),'data')
15
-
16
14
  describe script_under_test do
17
15
  let(:header){"ID\tUnique spectra\tNon-unique spectra\tEstimated total spectra\tNormalised spectral count\tDescription\tProteins sharing spectra\n"}
18
16
  it 'should do 1 protein hit' do
@@ -0,0 +1,99 @@
1
+ require 'systemu'
2
+ require 'pp'
3
+ require 'open3'
4
+ require 'tempfile'
5
+
6
+ require 'spec_helper'
7
+
8
+
9
+
10
+ describe 'pepxml parsing' do
11
+ let(:header){"ID\tUnique spectra\tNon-unique spectra\tEstimated total spectra\tNormalised spectral count\tDescription\tProteins sharing spectra\n"}
12
+ it 'should parse decently' do
13
+ pepxml = Bio::PepXML.parse(File.open(File.join(TEST_DATA_DIR, 'minimal.pep.xml')))
14
+
15
+ # <spectrum_query spectrum="Tara_38sfc_FASP_8hr_OrbiVelosPro_Run1_030513_02.9921.9921.2" start_scan="9921" end_scan="9921" retention_time_sec="41.732728333333334" activation_method="CID" precursor_intensity="61015.84375" precursor_neutral_mass="1246.6412829403125" assumed_charge="2" index="1">
16
+ # <search_hit hit_rank="1" peptide="IADQTIGTANSR" protein="" num_tot_proteins="3" num_matched_ions="0" calc_neutral_pep_mass="1246.6412829403125" massdiff="0" protein_descr="&gt;38SUR_2379_1524213_2&#x9;" protein_mw="43.185399974660044" calc_pI="5.63037109375">
17
+ # <alternative_protein protein="" protein_descr="&gt;38SUR_6350_1528184_1&#x9;" protein_mw="24.663561404659987" />
18
+ # <alternative_protein protein="" protein_descr="&gt;38SUR_80622_1602456_1&#x9;" protein_mw="30.364007294659981" />
19
+ # <search_score name="XCorr" value="4.7916374206542969" />
20
+
21
+ pepxml.kind_of?(Bio::PepXML).should == true
22
+
23
+ pepxml.protein_name_to_object.keys.sort.should == [
24
+ '>38SUR_2379_1524213_2',
25
+ '>38SUR_6350_1528184_1',
26
+ '>38SUR_80622_1602456_1',
27
+ ].sort
28
+ pepxml.peptide_name_to_object.keys.sort.should == [
29
+ 'Tara_38sfc_FASP_8hr_OrbiVelosPro_Run1_030513_02.9921.9921.2'
30
+ ]
31
+ pepxml.protein_name_to_object.values.each do |prot|
32
+ prot.kind_of?(Bio::PepXML::Protein).should == true
33
+ end
34
+ pepxml.peptide_name_to_object.values.each do |prot|
35
+ prot.kind_of?(Bio::PepXML::Peptide).should == true
36
+ end
37
+
38
+ prot1 = pepxml.protein_name_to_object['>38SUR_2379_1524213_2']
39
+ prot1.identifier.should == '>38SUR_2379_1524213_2'
40
+ prot1.descriptive_name.should == '>38SUR_2379_1524213_2'
41
+ end
42
+
43
+ it 'should respond to divvy proteomics module things' do
44
+ pepxml = Bio::PepXML.parse(File.open(File.join(TEST_DATA_DIR, 'minimal.pep.xml')))
45
+
46
+ # <spectrum_query spectrum="Tara_38sfc_FASP_8hr_OrbiVelosPro_Run1_030513_02.9921.9921.2" start_scan="9921" end_scan="9921" retention_time_sec="41.732728333333334" activation_method="CID" precursor_intensity="61015.84375" precursor_neutral_mass="1246.6412829403125" assumed_charge="2" index="1">
47
+ # <search_hit hit_rank="1" peptide="IADQTIGTANSR" protein="" num_tot_proteins="3" num_matched_ions="0" calc_neutral_pep_mass="1246.6412829403125" massdiff="0" protein_descr="&gt;38SUR_2379_1524213_2&#x9;" protein_mw="43.185399974660044" calc_pI="5.63037109375">
48
+ # <alternative_protein protein="" protein_descr="&gt;38SUR_6350_1528184_1&#x9;" protein_mw="24.663561404659987" />
49
+ # <alternative_protein protein="" protein_descr="&gt;38SUR_80622_1602456_1&#x9;" protein_mw="30.364007294659981" />
50
+ # <search_score name="XCorr" value="4.7916374206542969" />
51
+
52
+ pepxml.kind_of?(Bio::PepXML).should == true
53
+
54
+ prot1 = pepxml.protein_name_to_object['>38SUR_2379_1524213_2']
55
+ prot1.peptides.length.should == 1
56
+ prot1.unique_spectra.should == 0
57
+ prot1.non_unique_spectra.should == 1
58
+ prot1.estimated_spectral_count.should == 0.0
59
+
60
+
61
+ prot1 = pepxml.protein_name_to_object['>38SUR_6350_1528184_1']
62
+ prot1.peptides.length.should == 1
63
+ prot1.unique_spectra.should == 0
64
+ prot1.non_unique_spectra.should == 1
65
+ prot1.estimated_spectral_count.should == 0.0
66
+ end
67
+
68
+ it 'should respond to divvy proteomics module things with 1 unique hit' do
69
+ pepxml = Bio::PepXML.parse(File.open(File.join(TEST_DATA_DIR, 'minimal2.pep.xml')))
70
+ pepxml.kind_of?(Bio::PepXML).should == true
71
+
72
+ prot1 = pepxml.protein_name_to_object['>38SUR_2379_1524213_2']
73
+ prot1.peptides.length.should == 1
74
+ prot1.unique_spectra.should == 1
75
+ prot1.non_unique_spectra.should == 0
76
+ prot1.estimated_spectral_count.should == 1.0
77
+ end
78
+
79
+ it 'should respond to divvy proteomics module things with 2 hits, where 1 is unique' do
80
+ pepxml = Bio::PepXML.parse(File.open(File.join(TEST_DATA_DIR, 'minimal3.pep.xml')))
81
+ pepxml.kind_of?(Bio::PepXML).should == true
82
+
83
+ prot1 = pepxml.protein_name_to_object['>38SUR_2379_1524213_2']
84
+ prot1.peptides.length.should == 2
85
+ prot1.unique_spectra.should == 1
86
+ prot1.non_unique_spectra.should == 1
87
+ prot1.estimated_spectral_count.should == 2.0
88
+ end
89
+
90
+ it 'should parse when the protein and protein_desc attributes are both defined' do
91
+ pepxml = Bio::PepXML.parse(File.open(File.join(TEST_DATA_DIR, 'contaminant.pep.xml')))
92
+ pepxml.kind_of?(Bio::PepXML).should == true
93
+
94
+ prot1 = pepxml.protein_name_to_object['CNTM:cont_sp']
95
+ prot1.nil?.should == false
96
+ prot1.identifier.should == 'CNTM:cont_sp'
97
+ prot1.descriptive_name.should == 'CNTM:cont_sp P13647 K2C5_HUMAN Keratin, type II cytoskeletal 5 (Cytokeratin 5) (K5) (CK 5) (58 kDa cytokeratin) - Homo sapiens (Human). # pI:8.14 MW:62462'
98
+ end
99
+ end
data/spec/spec_helper.rb CHANGED
@@ -8,5 +8,7 @@ require 'divvy_proteomics'
8
8
  Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
9
9
 
10
10
  RSpec.configure do |config|
11
-
11
+
12
12
  end
13
+
14
+ TEST_DATA_DIR = File.join(File.dirname(__FILE__),'data')
metadata CHANGED
@@ -1,99 +1,99 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: divvy_proteomics
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ben J Woodcroft
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-11-06 00:00:00.000000000 Z
11
+ date: 2014-01-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bio-logger
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - '>='
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '0'
19
+ version: '1.0'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - '>='
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '0'
26
+ version: '1.0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: systemu
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - '>='
31
+ - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '0'
33
+ version: '2.6'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - '>='
38
+ - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '0'
40
+ version: '2.6'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: rspec
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - '>='
45
+ - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: 2.8.0
47
+ version: '2.14'
48
48
  type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - '>='
52
+ - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: 2.8.0
54
+ version: '2.14'
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: rdoc
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - '>='
59
+ - - "~>"
60
60
  - !ruby/object:Gem::Version
61
61
  version: '3.12'
62
62
  type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
- - - '>='
66
+ - - "~>"
67
67
  - !ruby/object:Gem::Version
68
68
  version: '3.12'
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: bundler
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
- - - '>='
73
+ - - "~>"
74
74
  - !ruby/object:Gem::Version
75
- version: 1.0.0
75
+ version: '1.5'
76
76
  type: :development
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
- - - '>='
80
+ - - "~>"
81
81
  - !ruby/object:Gem::Version
82
- version: 1.0.0
82
+ version: '1.5'
83
83
  - !ruby/object:Gem::Dependency
84
84
  name: jeweler
85
85
  requirement: !ruby/object:Gem::Requirement
86
86
  requirements:
87
- - - '>='
87
+ - - "~>"
88
88
  - !ruby/object:Gem::Version
89
- version: 1.8.4
89
+ version: '2.0'
90
90
  type: :development
91
91
  prerelease: false
92
92
  version_requirements: !ruby/object:Gem::Requirement
93
93
  requirements:
94
- - - '>='
94
+ - - "~>"
95
95
  - !ruby/object:Gem::Version
96
- version: 1.8.4
96
+ version: '2.0'
97
97
  description: divvy up spectra from DTASelect files in a somewhat parsimonious way
98
98
  email: donttrustben@gmail.com
99
99
  executables:
@@ -103,8 +103,8 @@ extra_rdoc_files:
103
103
  - LICENSE.txt
104
104
  - README.md
105
105
  files:
106
- - .document
107
- - .rspec
106
+ - ".document"
107
+ - ".rspec"
108
108
  - Gemfile
109
109
  - LICENSE.txt
110
110
  - README.md
@@ -113,8 +113,14 @@ files:
113
113
  - bin/divvy_spectra
114
114
  - divvy_proteomics.gemspec
115
115
  - lib/divvy_proteomics.rb
116
+ - lib/divvyable_protein.rb
116
117
  - lib/dta_select_output.rb
118
+ - lib/pep_xml.rb
119
+ - spec/data/contaminant.pep.xml
117
120
  - spec/data/merge_definition.csv
121
+ - spec/data/minimal.pep.xml
122
+ - spec/data/minimal2.pep.xml
123
+ - spec/data/minimal3.pep.xml
118
124
  - spec/data/multiply_mapped_spectra.csv
119
125
  - spec/data/new_format.csv
120
126
  - spec/data/new_format_some_all_shared_spectra.csv
@@ -124,6 +130,7 @@ files:
124
130
  - spec/data/three_proteins_meant_for_merge.csv
125
131
  - spec/data/three_proteins_with_contaminant.csv
126
132
  - spec/divvy_proteomics_spec.rb
133
+ - spec/pep_xml_spec.rb
127
134
  - spec/spec_helper.rb
128
135
  homepage: http://github.com/wwood/divvy_proteomics
129
136
  licenses:
@@ -135,17 +142,17 @@ require_paths:
135
142
  - lib
136
143
  required_ruby_version: !ruby/object:Gem::Requirement
137
144
  requirements:
138
- - - '>='
145
+ - - ">="
139
146
  - !ruby/object:Gem::Version
140
147
  version: '0'
141
148
  required_rubygems_version: !ruby/object:Gem::Requirement
142
149
  requirements:
143
- - - '>='
150
+ - - ">="
144
151
  - !ruby/object:Gem::Version
145
152
  version: '0'
146
153
  requirements: []
147
154
  rubyforge_project:
148
- rubygems_version: 2.0.3
155
+ rubygems_version: 2.2.0
149
156
  signing_key:
150
157
  specification_version: 4
151
158
  summary: divvy up spectra from DTASelect files in a parsimonious way