divvy_proteomics 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 05ddf71aecd113201c370185104006af9705a525
4
- data.tar.gz: b450d8ecd60717f7286f234aed29beaf455aa4eb
3
+ metadata.gz: 57c4b7c64ec34ec42b4b28f6a31537a8901d5a7c
4
+ data.tar.gz: c216e563aa04c13e7b935852862994579e45d61f
5
5
  SHA512:
6
- metadata.gz: a0b9e987dd54239a0da817becc9755c42d0f375e89fc16ff7c9f442830b34379de98c89f3bad0d74286567c5cbaafdc6a97656f766f2e63da02ccd113004019e
7
- data.tar.gz: 2347260e7dba1a6bb08c91cc2183b445d534b36e1296e6d3338076ed3729073276df516dbfa83bce87e18af4e51721df744fc694eee3a60a096f27d9ecf0f666
6
+ metadata.gz: 7a1e7dd2be6565d9503ce1e5fc43cb2362920fa8146f66d7c179c0a812de5e1ea2be9f42d70f63bb2bf3811e44f5800f01e4bbbb8731a4365d129c01c138edae
7
+ data.tar.gz: 846cf30c524054c62205de65aba81d37bdd1f2857cec1e623730d167efa6eacc71491a999806e73547b50d0c36a4b87993245d20217d47090c8e3da6eecf31b3
data/Gemfile CHANGED
@@ -1,13 +1,13 @@
1
1
  source "http://rubygems.org"
2
2
 
3
- gem 'bio-logger', ">=0"
3
+ gem 'bio-logger', "~> 1.0"
4
4
 
5
5
  # Add dependencies to develop your gem here.
6
6
  # Include everything needed to run rake, tests, features, etc.
7
7
  group :development do
8
- gem 'systemu', ">=0"
9
- gem "rspec", ">= 2.8.0"
10
- gem "rdoc", ">= 3.12"
11
- gem "bundler", ">= 1.0.0"
12
- gem "jeweler", ">= 1.8.4"
8
+ gem 'systemu', "~> 2.6"
9
+ gem "rspec", "~> 2.14"
10
+ gem "rdoc", "~> 3.12"
11
+ gem "bundler", "~> 1.5"
12
+ gem "jeweler", "~> 2.0"
13
13
  end
data/README.md CHANGED
@@ -13,20 +13,30 @@ $ gem install divvy_spectra
13
13
  $ divvy_spectra <DTASelectFile>
14
14
  ```
15
15
  Output is a table, with a row for each protein with a few columns, including number of unique spectra and the
16
- estimated number of spectral counts after sorting out the non-uniqueness.
16
+ estimated number of spectral counts after sorting out the non-uniqueness. Using the ```--pep-xml``` flag, PepXML files
17
+ are can be used as input also:
18
+
19
+ ```
20
+ $ divvy_spectra --pep-xml <PepXML_file>
21
+ ```
17
22
 
18
23
  Full usage information:
19
24
  ```
20
- $ divvy_spectra -h
21
25
 
22
- Usage: divvy_spectra [options] <DTASelect_file>
26
+ Usage: divvy_spectra [options] <input_file>
23
27
 
24
- Takes a tab separated file containing a (possibly modified) output from a DTAselect run, and use some algorithm to divy up the spectra that match multiple peptides.
28
+ Takes a tab separated file containing a (possibly modified) output from a DTAselect run (or a pepXML file and add the flag --pep-xml), and use some algorithm to divy up the spectra that match multiple peptides.
25
29
 
26
30
  --merge-proteins FILE_OF_IDENTIFIERS
27
31
  Provide a space/tab separated file where the identifiers on each row should be treated as one protein
28
32
  --whitelist FILE_OF_PROTEINS_TO_REPORT
29
33
  Only report proteins that are in this whitelist, after divvying with everything
34
+ --contaminant-regexes REGEXES
35
+ Comma-separated list of regular expressions to apply to protein names. If the protein name matches then all spectra assigned to that protein are considered contaminants. [default: ]
36
+
37
+ Optional arguments:
38
+
39
+ --pep-xml Input file is pep XML, rather than a DTA select output file [default: false]
30
40
 
31
41
  Verbosity:
32
42
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.0
1
+ 0.3.0
data/bin/divvy_spectra CHANGED
@@ -5,23 +5,24 @@ require 'bio-logger'
5
5
  require 'pp'
6
6
  require 'set'
7
7
 
8
- SCRIPT_NAME = File.basename(__FILE__); LOG_NAME = SCRIPT_NAME.gsub('.rb','')
8
+ SCRIPT_NAME = File.basename(__FILE__); LOG_NAME = 'divvy_proteomics'
9
9
 
10
10
  rootpath = File.dirname(File.dirname(__FILE__))
11
11
  $: << File.join(rootpath,'lib')
12
- require 'dta_select_output'
12
+ require 'divvy_proteomics'
13
13
 
14
14
  # Parse command line options into the options hash
15
15
  options = {
16
16
  :logger => 'stderr',
17
17
  :log_level => 'info',
18
18
  :contaminant_regexes => [/^CNTM:/],
19
+ :input_is_pep_xml => false,
19
20
  }
20
21
  o = OptionParser.new do |opts|
21
22
  opts.banner = "
22
- Usage: #{SCRIPT_NAME} [options] <DTASelect_file>
23
+ Usage: #{SCRIPT_NAME} [options] <input_file>
23
24
 
24
- Takes a tab separated file containing a (possibly modified) output from a DTAselect run, and use some algorithm to divy up the spectra that match multiple peptides.\n\n"
25
+ Takes a tab separated file containing a (possibly modified) output from a DTAselect run (or a pepXML file and add the flag --pep-xml), and use some algorithm to divy up the spectra that match multiple peptides.\n\n"
25
26
 
26
27
  opts.on("--merge-proteins FILE_OF_IDENTIFIERS", "Provide a space/tab separated file where the identifiers on each row should be treated as one protein") do |file|
27
28
  options[:merge_proteins_file] = file
@@ -32,6 +33,10 @@ o = OptionParser.new do |opts|
32
33
  opts.on("--contaminant-regexes REGEXES", "Comma-separated list of regular expressions to apply to protein names. If the protein name matches then all spectra assigned to that protein are considered contaminants. [default: #{options[:contaminant_prefixes]}]") do |str|
33
34
  options[:contaminant_regexes] = str.split(/,/).collect{|s| /#{s}/}
34
35
  end
36
+ opts.separator "\nOptional arguments:\n\n"
37
+ opts.on("--pep-xml", "Input file is pep XML, rather than a DTA select output file [default: #{options[:input_is_pep_xml]}]") do |arg|
38
+ options[:input_is_pep_xml] = true
39
+ end
35
40
 
36
41
 
37
42
  # logger options
@@ -74,7 +79,12 @@ if options[:whitelist_file]
74
79
  end
75
80
 
76
81
  # Parse the csv file
77
- parsed = Bio::DTASelect::OutputFile.parse(ARGF)
82
+ parsed = nil
83
+ if options[:input_is_pep_xml]
84
+ parsed = Bio::PepXML.parse(ARGF)
85
+ else
86
+ parsed = Bio::DTASelect::OutputFile.parse(ARGF)
87
+ end
78
88
 
79
89
  # Hashes of identifiers to objects
80
90
  proteins = parsed.protein_name_to_object
@@ -90,12 +100,13 @@ mergers.each do |secondary_id, primary_id|
90
100
 
91
101
  # Invalidate some things about the primary ID because they are no longer valid
92
102
  current_protein = proteins[primary_id]
93
- current_protein.sequence_count = nil
94
- current_protein.sequence_coverage = nil
95
- current_protein.length = nil
96
- current_protein.molwt = nil
97
- current_protein.pi = nil
98
- current_protein.validation_status = nil
103
+ # These variables are not used and are not present in pepXML files, so don't mess with them.
104
+ # current_protein.sequence_count = nil
105
+ # current_protein.sequence_coverage = nil
106
+ # current_protein.length = nil
107
+ # current_protein.molwt = nil
108
+ # current_protein.pi = nil
109
+ # current_protein.validation_status = nil
99
110
  # Keep the primary proteins' description, I reckon
100
111
 
101
112
  # When there is spectra that are in the secondary but not the primary, add them to the primary's repertoire.
@@ -172,11 +183,13 @@ number_non_shared_peptides = all_peptides.select{|pep| pep.parent_proteins.lengt
172
183
  total_peptides = number_shared_peptides+number_non_shared_peptides
173
184
  log.info "Found #{number_shared_peptides} (#{number_shared_peptides.to_f/total_peptides*100}%) shared peptides and #{number_non_shared_peptides} (#{number_non_shared_peptides.to_f/total_peptides*100}%) non-shared peptides"
174
185
 
175
- # Find non-starred peptides that occur only once in the file - maybe not possible given a correctly formatted file?
176
- non_starred_but_uniquely_identified_peptides = hits.values.select do |peptide|
177
- peptide.dtaselect_attributes['Unique'] == nil and peptide.parent_proteins.length == 1
186
+ unless options[:input_is_pep_xml]
187
+ # Find non-starred peptides that occur only once in the file - maybe not possible given a correctly formatted file?
188
+ non_starred_but_uniquely_identified_peptides = hits.values.select do |peptide|
189
+ peptide.dtaselect_attributes['Unique'] == nil and peptide.parent_proteins.length == 1
190
+ end
191
+ log.debug "Found #{non_starred_but_uniquely_identified_peptides.length} different peptides that weren't starred or 2'd but the identifier is only found one time."
178
192
  end
179
- log.debug "Found #{non_starred_but_uniquely_identified_peptides.length} different peptides that weren't starred or 2'd but the identifier is only found one time."
180
193
 
181
194
  # OK, finished parsing the file. Now output the score for each protein
182
195
  puts [
@@ -2,14 +2,16 @@
2
2
  # DO NOT EDIT THIS FILE DIRECTLY
3
3
  # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
+ # stub: divvy_proteomics 0.3.0 ruby lib
5
6
 
6
7
  Gem::Specification.new do |s|
7
8
  s.name = "divvy_proteomics"
8
- s.version = "0.2.0"
9
+ s.version = "0.3.0"
9
10
 
10
11
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
12
+ s.require_paths = ["lib"]
11
13
  s.authors = ["Ben J Woodcroft"]
12
- s.date = "2013-11-06"
14
+ s.date = "2014-01-07"
13
15
  s.description = "divvy up spectra from DTASelect files in a somewhat parsimonious way"
14
16
  s.email = "donttrustben@gmail.com"
15
17
  s.executables = ["divvy_spectra"]
@@ -28,8 +30,14 @@ Gem::Specification.new do |s|
28
30
  "bin/divvy_spectra",
29
31
  "divvy_proteomics.gemspec",
30
32
  "lib/divvy_proteomics.rb",
33
+ "lib/divvyable_protein.rb",
31
34
  "lib/dta_select_output.rb",
35
+ "lib/pep_xml.rb",
36
+ "spec/data/contaminant.pep.xml",
32
37
  "spec/data/merge_definition.csv",
38
+ "spec/data/minimal.pep.xml",
39
+ "spec/data/minimal2.pep.xml",
40
+ "spec/data/minimal3.pep.xml",
33
41
  "spec/data/multiply_mapped_spectra.csv",
34
42
  "spec/data/new_format.csv",
35
43
  "spec/data/new_format_some_all_shared_spectra.csv",
@@ -39,39 +47,39 @@ Gem::Specification.new do |s|
39
47
  "spec/data/three_proteins_meant_for_merge.csv",
40
48
  "spec/data/three_proteins_with_contaminant.csv",
41
49
  "spec/divvy_proteomics_spec.rb",
50
+ "spec/pep_xml_spec.rb",
42
51
  "spec/spec_helper.rb"
43
52
  ]
44
53
  s.homepage = "http://github.com/wwood/divvy_proteomics"
45
54
  s.licenses = ["MIT"]
46
- s.require_paths = ["lib"]
47
- s.rubygems_version = "2.0.3"
55
+ s.rubygems_version = "2.2.0"
48
56
  s.summary = "divvy up spectra from DTASelect files in a parsimonious way"
49
57
 
50
58
  if s.respond_to? :specification_version then
51
59
  s.specification_version = 4
52
60
 
53
61
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
54
- s.add_runtime_dependency(%q<bio-logger>, [">= 0"])
55
- s.add_development_dependency(%q<systemu>, [">= 0"])
56
- s.add_development_dependency(%q<rspec>, [">= 2.8.0"])
57
- s.add_development_dependency(%q<rdoc>, [">= 3.12"])
58
- s.add_development_dependency(%q<bundler>, [">= 1.0.0"])
59
- s.add_development_dependency(%q<jeweler>, [">= 1.8.4"])
62
+ s.add_runtime_dependency(%q<bio-logger>, ["~> 1.0"])
63
+ s.add_development_dependency(%q<systemu>, ["~> 2.6"])
64
+ s.add_development_dependency(%q<rspec>, ["~> 2.14"])
65
+ s.add_development_dependency(%q<rdoc>, ["~> 3.12"])
66
+ s.add_development_dependency(%q<bundler>, ["~> 1.5"])
67
+ s.add_development_dependency(%q<jeweler>, ["~> 2.0"])
60
68
  else
61
- s.add_dependency(%q<bio-logger>, [">= 0"])
62
- s.add_dependency(%q<systemu>, [">= 0"])
63
- s.add_dependency(%q<rspec>, [">= 2.8.0"])
64
- s.add_dependency(%q<rdoc>, [">= 3.12"])
65
- s.add_dependency(%q<bundler>, [">= 1.0.0"])
66
- s.add_dependency(%q<jeweler>, [">= 1.8.4"])
69
+ s.add_dependency(%q<bio-logger>, ["~> 1.0"])
70
+ s.add_dependency(%q<systemu>, ["~> 2.6"])
71
+ s.add_dependency(%q<rspec>, ["~> 2.14"])
72
+ s.add_dependency(%q<rdoc>, ["~> 3.12"])
73
+ s.add_dependency(%q<bundler>, ["~> 1.5"])
74
+ s.add_dependency(%q<jeweler>, ["~> 2.0"])
67
75
  end
68
76
  else
69
- s.add_dependency(%q<bio-logger>, [">= 0"])
70
- s.add_dependency(%q<systemu>, [">= 0"])
71
- s.add_dependency(%q<rspec>, [">= 2.8.0"])
72
- s.add_dependency(%q<rdoc>, [">= 3.12"])
73
- s.add_dependency(%q<bundler>, [">= 1.0.0"])
74
- s.add_dependency(%q<jeweler>, [">= 1.8.4"])
77
+ s.add_dependency(%q<bio-logger>, ["~> 1.0"])
78
+ s.add_dependency(%q<systemu>, ["~> 2.6"])
79
+ s.add_dependency(%q<rspec>, ["~> 2.14"])
80
+ s.add_dependency(%q<rdoc>, ["~> 3.12"])
81
+ s.add_dependency(%q<bundler>, ["~> 1.5"])
82
+ s.add_dependency(%q<jeweler>, ["~> 2.0"])
75
83
  end
76
84
  end
77
85
 
@@ -0,0 +1,16 @@
1
+ require 'bio-logger'
2
+ Bio::Log::LoggerPlus.new('divvy_proteomics')
3
+ module Bio
4
+ module DivvyProteomics
5
+ module Logging
6
+ def log
7
+ Bio::Log::LoggerPlus['divvy_proteomics']
8
+ end
9
+ end
10
+ end
11
+ end
12
+
13
+ require 'divvyable_protein'
14
+ require 'dta_select_output'
15
+ require 'pep_xml'
16
+
@@ -0,0 +1,40 @@
1
+ module Bio::DivvyProteomics::DivvyableProtein
2
+ def unique_spectra
3
+ return 0 if @peptides.nil? or @peptides.empty?
4
+ num = @peptides.select{|pep| pep.parent_proteins.length == 1}.collect{|pep| pep.redundancy}.reduce(:+)
5
+ num ||= 0
6
+ return num
7
+ end
8
+
9
+ def non_unique_spectra
10
+ return 0 if @peptides.nil? or @peptides.empty?
11
+ num = @peptides.reject{|pep| pep.parent_proteins.length == 1}.collect{|pep| pep.redundancy}.reduce(:+)
12
+ num ||= 0
13
+
14
+ return num
15
+ end
16
+
17
+ # Are there any peptides that are assigned exclusively to this protein?
18
+ def uniquely_identified_by_any_peptides?
19
+ unique_spectra > 0
20
+ end
21
+
22
+ def estimated_spectral_count
23
+ # How many unique spectra are there for each protein that shares a peptide with the current peptide
24
+ return 0 if @peptides.nil? or @peptides.empty?
25
+ peptide_shares = []
26
+ # If all peptides are non-unique and shared with some number of other proteins, then output a negative number num shared spectra divided by the number of proteins
27
+ if !uniquely_identified_by_any_peptides?
28
+ # Don't attempt to divvy these up, because there are too many assumptions involved
29
+ return 0
30
+ else
31
+ peptides.each do |peptide|
32
+ log.debug "Tallying peptide #{peptide.identifier}, which is has #{peptide.redundancy} spectra shared among #{peptide.parent_proteins.length} proteins"
33
+ log.debug "These proteins have #{peptide.parent_proteins.collect{|pro| pro.unique_spectra}.inspect} unique spectra each"
34
+ total_linked_unique_spectra = peptide.parent_proteins.collect{|pro| pro.unique_spectra}.reduce(:+)
35
+ peptide_shares.push unique_spectra.to_f/total_linked_unique_spectra*peptide.redundancy
36
+ end
37
+ return peptide_shares.reduce(:+)
38
+ end
39
+ end
40
+ end
@@ -1,12 +1,9 @@
1
1
 
2
2
 
3
3
 
4
+
5
+
4
6
  module Bio::DTASelect
5
- module Logging
6
- def log
7
- Bio::Log::LoggerPlus['divvy_spectra']
8
- end
9
- end
10
7
 
11
8
  class OutputFile
12
9
  def self.log
@@ -14,7 +11,8 @@ module Bio::DTASelect
14
11
  end
15
12
 
16
13
  class SelectedProtein
17
- include Bio::DTASelect::Logging
14
+ include Bio::DivvyProteomics::Logging
15
+ include Bio::DivvyProteomics::DivvyableProtein
18
16
 
19
17
  attr_accessor :identifier
20
18
 
@@ -26,43 +24,7 @@ module Bio::DTASelect
26
24
  @peptides = []
27
25
  end
28
26
 
29
- def unique_spectra
30
- return 0 if @peptides.nil? or @peptides.empty?
31
- num = @peptides.select{|pep| pep.parent_proteins.length == 1}.collect{|pep| pep.redundancy}.reduce(:+)
32
- num ||= 0
33
- return num
34
- end
35
-
36
- def non_unique_spectra
37
- return 0 if @peptides.nil? or @peptides.empty?
38
- num = @peptides.reject{|pep| pep.parent_proteins.length == 1}.collect{|pep| pep.redundancy}.reduce(:+)
39
- num ||= 0
40
- return num
41
- end
42
27
 
43
- # Are there any peptides that are assigned exclusively to this protein?
44
- def uniquely_identified_by_any_peptides?
45
- unique_spectra > 0
46
- end
47
-
48
- def estimated_spectral_count
49
- # How many unique spectra are there for each protein that shares a peptide with the current peptide
50
- return 0 if @peptides.nil? or @peptides.empty?
51
- peptide_shares = []
52
- # If all peptides are non-unique and shared with some number of other proteins, then output a negative number num shared spectra divided by the number of proteins
53
- if !uniquely_identified_by_any_peptides?
54
- # Don't attempt to divvy these up, because there are too many assumptions involved
55
- return 0
56
- else
57
- peptides.each do |peptide|
58
- log.debug "Tallying peptide #{peptide.identifier}, which is has #{peptide.redundancy} spectra shared among #{peptide.parent_proteins.length} proteins"
59
- log.debug "These proteins have #{peptide.parent_proteins.collect{|pro| pro.unique_spectra}.inspect} unique spectra each"
60
- total_linked_unique_spectra = peptide.parent_proteins.collect{|pro| pro.unique_spectra}.reduce(:+)
61
- peptide_shares.push unique_spectra.to_f/total_linked_unique_spectra*peptide.redundancy
62
- end
63
- return peptide_shares.reduce(:+)
64
- end
65
- end
66
28
 
67
29
  def log
68
30
  Bio::Log::LoggerPlus[LOG_NAME]
@@ -70,7 +32,7 @@ module Bio::DTASelect
70
32
  end
71
33
 
72
34
  class Peptide
73
- include Bio::DTASelect::Logging
35
+ include Bio::DivvyProteomics::Logging
74
36
 
75
37
  attr_accessor :identifier
76
38
 
@@ -98,7 +60,7 @@ module Bio::DTASelect
98
60
  end
99
61
 
100
62
  class Result
101
- include Bio::DTASelect::Logging
63
+ include Bio::DivvyProteomics::Logging
102
64
 
103
65
  # hash of protein identifier to Protein object
104
66
  attr_accessor :protein_name_to_object
@@ -123,7 +85,7 @@ module Bio::DTASelect
123
85
  # Parse each line of the DTAselect file
124
86
  io.each_line do |line|
125
87
  splits = line.chomp.split("\t")
126
- log.debug "Parsing line `#{line.chomp}'"
88
+ log.debug "Parsing line `#{line.chomp}'" if log.debug?
127
89
 
128
90
  if reading_header
129
91
  log.debug "reading header"
@@ -146,7 +108,7 @@ module Bio::DTASelect
146
108
  if !last_line_was_protein_name
147
109
  # Sometimes several proteins are given all in the one header line
148
110
  # start a new protein
149
- log.debug "New protein now being parsed"
111
+ log.debug "New protein now being parsed" if log.debug?
150
112
  current_proteins = []
151
113
  end
152
114
 
@@ -174,13 +136,13 @@ module Bio::DTASelect
174
136
 
175
137
 
176
138
  elsif splits[1] == 'Proteins'
177
- # Done processing, except for the bits down the bottom which aren't parsed (yet)
139
+ # Done processing, except for the bits down the bottom which aren't parsed (yet, at least)
178
140
  break
179
141
 
180
142
 
181
143
 
182
144
  else
183
- log.debug "New spectra now being parsed"
145
+ log.debug "New spectra now being parsed" if log.debug?
184
146
  last_line_was_protein_name = false
185
147
 
186
148
  # Record a spectra
@@ -204,11 +166,11 @@ module Bio::DTASelect
204
166
  pep.parent_proteins.push current_protein
205
167
  current_protein.peptides.push pep
206
168
  end
207
- log.debug "Parsed this peptide #{pep.inspect}"
169
+ log.debug "Parsed this peptide #{pep.inspect}" if log.debug?
208
170
  end
209
171
  end
210
172
 
211
- log.debug "Proteins parsed: #{result.protein_name_to_object.inspect}"
173
+ log.debug "Proteins parsed: #{result.protein_name_to_object.inspect}" if log.debug?
212
174
  return result
213
175
  end
214
176
  end
data/lib/pep_xml.rb ADDED
@@ -0,0 +1,130 @@
1
+ require 'rexml/document'
2
+
3
+ class Bio::PepXML
4
+ include Bio::DivvyProteomics::Logging
5
+
6
+ attr_accessor :protein_name_to_object, :peptide_name_to_object
7
+
8
+ class Protein
9
+ include Bio::DivvyProteomics::Logging
10
+ include Bio::DivvyProteomics::DivvyableProtein
11
+
12
+ # Array of peptide objects that have been assigned to this protein
13
+ attr_accessor :peptides
14
+
15
+ attr_accessor :identifier, :descriptive_name
16
+ end
17
+
18
+ # Named 'Peptide' but really mean Spectra. Just too hard to change
19
+ class Peptide
20
+ attr_accessor :parent_proteins
21
+
22
+ # Name of the spectra
23
+ attr_accessor :identifier
24
+
25
+ def initialize
26
+ @parent_proteins = []
27
+ end
28
+
29
+ #TODO: right now this just always returns 1. It should really be working out redundancy
30
+ #properly by comparison of peptide sequences, but this isn't yet parsed this info
31
+ def redundancy
32
+ 1
33
+ end
34
+ end
35
+
36
+ def self.log
37
+ Bio::PepXML.new.log
38
+ end
39
+
40
+ def self.parse(io)
41
+ protein_name_to_object = {}
42
+ peptide_name_to_object = {}
43
+
44
+ #pep.elements.each('msms_pipeline_analysis/msms_run_summary/spectrum_query/search_result/search_hit'){|e|
45
+ # c+=1; p e.attributes['protein_descr'].strip;
46
+ # e.elements.each{|e|
47
+ # p e.name, e.attributes['protein_descr'].strip};break}
48
+ xml = REXML::Document.new(io)
49
+
50
+ parse_name_and_description = lambda do |e|
51
+ name = e.attributes['protein'].strip
52
+ description = e.attributes['protein_descr'].strip
53
+ if name.nil? or name == ''
54
+ name = e.attributes['protein_descr'].strip
55
+ else
56
+ description = name+' '+description
57
+ end
58
+ name.gsub!(/\t.*/,'')
59
+ description.gsub!(/[\t\n]/,' ')
60
+
61
+ [name, description]
62
+ end
63
+
64
+ #TODO: some better sanity checking here would be ideal.
65
+ num_hits_parsed = 0
66
+ xml.elements.each('msms_pipeline_analysis/msms_run_summary/spectrum_query/search_result/search_hit') do |hit|
67
+ hit_number = hit.attributes['hit_rank']
68
+ raise "Parsing error on #{hit}" if hit_number.nil?
69
+ next if hit_number != "1"
70
+
71
+ # Parse the primary hit
72
+ name1, description1 = parse_name_and_description.call(hit)
73
+ raise "No protein name found in this xml fragment: #{hit.to_s}" if name1.nil?
74
+ spectrum_name = hit.parent.parent.attributes['spectrum'].strip
75
+ raise "Parsing error (couldn't find spectrum name) with spectra #{hit.inspect}" if spectrum_name.nil?
76
+
77
+ # It is possible to have multiple peptides both hit the spectra with hit_rank="1"
78
+ # This happens when when e.g. leucine and isoleucine are possible.
79
+ spectrum = peptide_name_to_object[spectrum_name]
80
+ if spectrum.nil?
81
+ spectrum = Peptide.new
82
+ spectrum.identifier = spectrum_name
83
+ peptide_name_to_object[spectrum_name] = spectrum
84
+ end
85
+
86
+
87
+ protein1 = protein_name_to_object[name1]
88
+ if protein1.nil?
89
+ protein1 = Protein.new
90
+ protein1.identifier = name1
91
+ protein1.descriptive_name = description1
92
+ protein1.peptides = []
93
+ protein_name_to_object[name1] = protein1
94
+ end
95
+ protein1.peptides.push spectrum
96
+ spectrum.parent_proteins ||= []
97
+ spectrum.parent_proteins.push protein1
98
+
99
+
100
+ # Parse the alternate hits. Only look at children with protein_descr attributes - these are
101
+ # these are the alternate proteins
102
+ hit.each_element_with_attribute('protein_descr') do |e|
103
+ name, description = parse_name_and_description.call(e)
104
+
105
+ alternate = protein_name_to_object[name]
106
+ if alternate.nil?
107
+ alternate = Protein.new
108
+ alternate.identifier = name
109
+ alternate.descriptive_name = description
110
+ alternate.peptides = []
111
+ protein_name_to_object[name] = alternate
112
+ end
113
+ alternate.peptides.push spectrum
114
+ spectrum.parent_proteins.push alternate
115
+ end
116
+
117
+ # Don't count the same protein multiple times - might happen when a spectru
118
+ spectrum.parent_proteins.uniq!
119
+
120
+ num_hits_parsed += 1
121
+ end
122
+ log.info "Parsed #{num_hits_parsed} search hits"
123
+
124
+ pepxml = Bio::PepXML.new
125
+ pepxml.protein_name_to_object = protein_name_to_object
126
+ pepxml.peptide_name_to_object = peptide_name_to_object
127
+
128
+ return pepxml
129
+ end
130
+ end
@@ -0,0 +1,13 @@
1
+ <?xml version="1.0" encoding="utf-8"?>
2
+ <msms_pipeline_analysis date="2013-12-06T09:32:51.2000705-07:00" name="Tara_38sfc_FASP_8hr_OrbiVelosPro_Run1_030513_All_SEQUESTHT" summary_xml="" xmlns="http://regis-web.systemsbiology.net/pepXML">
3
+ <msms_run_summary base_name="D:\Proteome_Discoverer\RawFiles\FASP1\Tara_38sfc_FASP_8hr_OrbiVelosPro_Run1_030513\Tara_38sfc_FASP_8hr_OrbiVelosPro_Run1_030513_All_SEQUESTHT.msf" raw_data_type=".msf" raw_data=".msf" msManufacturer="" msModel="" msIonization="" msMassAnalyzer="" msDetector="">
4
+ <spectrum_query spectrum="Tara_38sfc_FASP_8hr_OrbiVelosPro_Run1_030513_03.1121.1121.2" start_scan="1121" end_scan="1121" retention_time_sec="5.4199816666666667" activation_method="CID" precursor_intensity="388495.5625" precursor_neutral_mass="1329.7252673153125" assumed_charge="2" index="221">
5
+ <search_result search_id="1">
6
+ <search_hit hit_rank="1" peptide="NLDLDSIIAEVK" protein="CNTM:cont_sp" num_tot_proteins="2" num_matched_ions="0" calc_neutral_pep_mass="1329.7252673153125" massdiff="0" protein_descr="P13647 K2C5_HUMAN Keratin, type II cytoskeletal 5 (Cytokeratin 5) (K5) (CK 5) (58 kDa cytokeratin) - Homo sapiens (Human). # pI:8.14 MW:62462" protein_mw="62.423064734660052" calc_pI="8.06005859375">
7
+ <alternative_protein protein="CNTM:cont_sp" protein_descr="P48668 K2CE_HUMAN Keratin, type II cytoskeletal 6E (Cytokeratin 6E) (CK 6E) (K6e keratin) - Homo sapiens (Human). # pI:8.14 MW:60092" protein_mw="60.05537958466001" />
8
+ <search_score name="XCorr" value="4.5027022361755371" />
9
+ </search_hit>
10
+ </search_result>
11
+ </spectrum_query>
12
+ </msms_run_summary>
13
+ </msms_pipeline_analysis>
@@ -0,0 +1,14 @@
1
+ <?xml version="1.0" encoding="utf-8"?>
2
+ <msms_pipeline_analysis date="2013-12-06T09:32:51.2000705-07:00" name="Tara_38sfc_FASP_8hr_OrbiVelosPro_Run1_030513_All_SEQUESTHT" summary_xml="" xmlns="http://regis-web.systemsbiology.net/pepXML">
3
+ <msms_run_summary base_name="D:\Proteome_Discoverer\RawFiles\FASP1\Tara_38sfc_FASP_8hr_OrbiVelosPro_Run1_030513\Tara_38sfc_FASP_8hr_OrbiVelosPro_Run1_030513_All_SEQUESTHT.msf" raw_data_type=".msf" raw_data=".msf" msManufacturer="" msModel="" msIonization="" msMassAnalyzer="" msDetector="">
4
+ <spectrum_query spectrum="Tara_38sfc_FASP_8hr_OrbiVelosPro_Run1_030513_02.9921.9921.2" start_scan="9921" end_scan="9921" retention_time_sec="41.732728333333334" activation_method="CID" precursor_intensity="61015.84375" precursor_neutral_mass="1246.6412829403125" assumed_charge="2" index="1">
5
+ <search_result search_id="1">
6
+ <search_hit hit_rank="1" peptide="IADQTIGTANSR" protein="" num_tot_proteins="3" num_matched_ions="0" calc_neutral_pep_mass="1246.6412829403125" massdiff="0" protein_descr="&gt;38SUR_2379_1524213_2&#x9;" protein_mw="43.185399974660044" calc_pI="5.63037109375">
7
+ <alternative_protein protein="" protein_descr="&gt;38SUR_6350_1528184_1&#x9;" protein_mw="24.663561404659987" />
8
+ <alternative_protein protein="" protein_descr="&gt;38SUR_80622_1602456_1&#x9;" protein_mw="30.364007294659981" />
9
+ <search_score name="XCorr" value="4.7916374206542969" />
10
+ </search_hit>
11
+ </search_result>
12
+ </spectrum_query>
13
+ </msms_run_summary>
14
+ </msms_pipeline_analysis>
@@ -0,0 +1,12 @@
1
+ <?xml version="1.0" encoding="utf-8"?>
2
+ <msms_pipeline_analysis date="2013-12-06T09:32:51.2000705-07:00" name="Tara_38sfc_FASP_8hr_OrbiVelosPro_Run1_030513_All_SEQUESTHT" summary_xml="" xmlns="http://regis-web.systemsbiology.net/pepXML">
3
+ <msms_run_summary base_name="D:\Proteome_Discoverer\RawFiles\FASP1\Tara_38sfc_FASP_8hr_OrbiVelosPro_Run1_030513\Tara_38sfc_FASP_8hr_OrbiVelosPro_Run1_030513_All_SEQUESTHT.msf" raw_data_type=".msf" raw_data=".msf" msManufacturer="" msModel="" msIonization="" msMassAnalyzer="" msDetector="">
4
+ <spectrum_query spectrum="Tara_38sfc_FASP_8hr_OrbiVelosPro_Run1_030513_02.9921.9921.2" start_scan="9921" end_scan="9921" retention_time_sec="41.732728333333334" activation_method="CID" precursor_intensity="61015.84375" precursor_neutral_mass="1246.6412829403125" assumed_charge="2" index="1">
5
+ <search_result search_id="1">
6
+ <search_hit hit_rank="1" peptide="IADQTIGTANSR" protein="" num_tot_proteins="3" num_matched_ions="0" calc_neutral_pep_mass="1246.6412829403125" massdiff="0" protein_descr="&gt;38SUR_2379_1524213_2&#x9;" protein_mw="43.185399974660044" calc_pI="5.63037109375">
7
+ <search_score name="XCorr" value="4.7916374206542969" />
8
+ </search_hit>
9
+ </search_result>
10
+ </spectrum_query>
11
+ </msms_run_summary>
12
+ </msms_pipeline_analysis>
@@ -0,0 +1,21 @@
1
+ <?xml version="1.0" encoding="utf-8"?>
2
+ <msms_pipeline_analysis date="2013-12-06T09:32:51.2000705-07:00" name="Tara_38sfc_FASP_8hr_OrbiVelosPro_Run1_030513_All_SEQUESTHT" summary_xml="" xmlns="http://regis-web.systemsbiology.net/pepXML">
3
+ <msms_run_summary base_name="D:\Proteome_Discoverer\RawFiles\FASP1\Tara_38sfc_FASP_8hr_OrbiVelosPro_Run1_030513\Tara_38sfc_FASP_8hr_OrbiVelosPro_Run1_030513_All_SEQUESTHT.msf" raw_data_type=".msf" raw_data=".msf" msManufacturer="" msModel="" msIonization="" msMassAnalyzer="" msDetector="">
4
+ <spectrum_query spectrum="Tara_38sfc_FASP_8hr_OrbiVelosPro_Run1_030513_02.9921.9921.2" start_scan="9921" end_scan="9921" retention_time_sec="41.732728333333334" activation_method="CID" precursor_intensity="61015.84375" precursor_neutral_mass="1246.6412829403125" assumed_charge="2" index="1">
5
+ <search_result search_id="1">
6
+ <search_hit hit_rank="1" peptide="IADQTIGTANSR" protein="" num_tot_proteins="3" num_matched_ions="0" calc_neutral_pep_mass="1246.6412829403125" massdiff="0" protein_descr="&gt;38SUR_2379_1524213_2&#x9;" protein_mw="43.185399974660044" calc_pI="5.63037109375">
7
+ <alternative_protein protein="" protein_descr="&gt;38SUR_6350_1528184_1&#x9;" protein_mw="24.663561404659987" />
8
+ <alternative_protein protein="" protein_descr="&gt;38SUR_80622_1602456_1&#x9;" protein_mw="30.364007294659981" />
9
+ <search_score name="XCorr" value="4.7916374206542969" />
10
+ </search_hit>
11
+ </search_result>
12
+ </spectrum_query>
13
+ <spectrum_query spectrum="Tara_38sfc_FASP_8hr_OrbiVelosPro_Run1_030513_02.9921.9921.2_3" start_scan="9921" end_scan="9921" retention_time_sec="41.732728333333334" activation_method="CID" precursor_intensity="61015.84375" precursor_neutral_mass="1246.6412829403125" assumed_charge="2" index="1">
14
+ <search_result search_id="1">
15
+ <search_hit hit_rank="1" peptide="IADQTIGTANSR" protein="" num_tot_proteins="3" num_matched_ions="0" calc_neutral_pep_mass="1246.6412829403125" massdiff="0" protein_descr="&gt;38SUR_2379_1524213_2&#x9;" protein_mw="43.185399974660044" calc_pI="5.63037109375">
16
+ <search_score name="XCorr" value="4.7916374206542969" />
17
+ </search_hit>
18
+ </search_result>
19
+ </spectrum_query>
20
+ </msms_run_summary>
21
+ </msms_pipeline_analysis>
@@ -11,8 +11,6 @@ $:.unshift File.join(File.dirname(__FILE__),'..')
11
11
  script_under_test = File.basename(__FILE__).gsub(/^test_/,'')
12
12
  path_to_script = File.join(File.dirname(__FILE__),'..','bin','divvy_spectra')
13
13
 
14
- TEST_DATA_DIR = File.join(File.dirname(__FILE__),'data')
15
-
16
14
  describe script_under_test do
17
15
  let(:header){"ID\tUnique spectra\tNon-unique spectra\tEstimated total spectra\tNormalised spectral count\tDescription\tProteins sharing spectra\n"}
18
16
  it 'should do 1 protein hit' do
@@ -0,0 +1,99 @@
1
+ require 'systemu'
2
+ require 'pp'
3
+ require 'open3'
4
+ require 'tempfile'
5
+
6
+ require 'spec_helper'
7
+
8
+
9
+
10
+ describe 'pepxml parsing' do
11
+ let(:header){"ID\tUnique spectra\tNon-unique spectra\tEstimated total spectra\tNormalised spectral count\tDescription\tProteins sharing spectra\n"}
12
+ it 'should parse decently' do
13
+ pepxml = Bio::PepXML.parse(File.open(File.join(TEST_DATA_DIR, 'minimal.pep.xml')))
14
+
15
+ # <spectrum_query spectrum="Tara_38sfc_FASP_8hr_OrbiVelosPro_Run1_030513_02.9921.9921.2" start_scan="9921" end_scan="9921" retention_time_sec="41.732728333333334" activation_method="CID" precursor_intensity="61015.84375" precursor_neutral_mass="1246.6412829403125" assumed_charge="2" index="1">
16
+ # <search_hit hit_rank="1" peptide="IADQTIGTANSR" protein="" num_tot_proteins="3" num_matched_ions="0" calc_neutral_pep_mass="1246.6412829403125" massdiff="0" protein_descr="&gt;38SUR_2379_1524213_2&#x9;" protein_mw="43.185399974660044" calc_pI="5.63037109375">
17
+ # <alternative_protein protein="" protein_descr="&gt;38SUR_6350_1528184_1&#x9;" protein_mw="24.663561404659987" />
18
+ # <alternative_protein protein="" protein_descr="&gt;38SUR_80622_1602456_1&#x9;" protein_mw="30.364007294659981" />
19
+ # <search_score name="XCorr" value="4.7916374206542969" />
20
+
21
+ pepxml.kind_of?(Bio::PepXML).should == true
22
+
23
+ pepxml.protein_name_to_object.keys.sort.should == [
24
+ '>38SUR_2379_1524213_2',
25
+ '>38SUR_6350_1528184_1',
26
+ '>38SUR_80622_1602456_1',
27
+ ].sort
28
+ pepxml.peptide_name_to_object.keys.sort.should == [
29
+ 'Tara_38sfc_FASP_8hr_OrbiVelosPro_Run1_030513_02.9921.9921.2'
30
+ ]
31
+ pepxml.protein_name_to_object.values.each do |prot|
32
+ prot.kind_of?(Bio::PepXML::Protein).should == true
33
+ end
34
+ pepxml.peptide_name_to_object.values.each do |prot|
35
+ prot.kind_of?(Bio::PepXML::Peptide).should == true
36
+ end
37
+
38
+ prot1 = pepxml.protein_name_to_object['>38SUR_2379_1524213_2']
39
+ prot1.identifier.should == '>38SUR_2379_1524213_2'
40
+ prot1.descriptive_name.should == '>38SUR_2379_1524213_2'
41
+ end
42
+
43
+ it 'should respond to divvy proteomics module things' do
44
+ pepxml = Bio::PepXML.parse(File.open(File.join(TEST_DATA_DIR, 'minimal.pep.xml')))
45
+
46
+ # <spectrum_query spectrum="Tara_38sfc_FASP_8hr_OrbiVelosPro_Run1_030513_02.9921.9921.2" start_scan="9921" end_scan="9921" retention_time_sec="41.732728333333334" activation_method="CID" precursor_intensity="61015.84375" precursor_neutral_mass="1246.6412829403125" assumed_charge="2" index="1">
47
+ # <search_hit hit_rank="1" peptide="IADQTIGTANSR" protein="" num_tot_proteins="3" num_matched_ions="0" calc_neutral_pep_mass="1246.6412829403125" massdiff="0" protein_descr="&gt;38SUR_2379_1524213_2&#x9;" protein_mw="43.185399974660044" calc_pI="5.63037109375">
48
+ # <alternative_protein protein="" protein_descr="&gt;38SUR_6350_1528184_1&#x9;" protein_mw="24.663561404659987" />
49
+ # <alternative_protein protein="" protein_descr="&gt;38SUR_80622_1602456_1&#x9;" protein_mw="30.364007294659981" />
50
+ # <search_score name="XCorr" value="4.7916374206542969" />
51
+
52
+ pepxml.kind_of?(Bio::PepXML).should == true
53
+
54
+ prot1 = pepxml.protein_name_to_object['>38SUR_2379_1524213_2']
55
+ prot1.peptides.length.should == 1
56
+ prot1.unique_spectra.should == 0
57
+ prot1.non_unique_spectra.should == 1
58
+ prot1.estimated_spectral_count.should == 0.0
59
+
60
+
61
+ prot1 = pepxml.protein_name_to_object['>38SUR_6350_1528184_1']
62
+ prot1.peptides.length.should == 1
63
+ prot1.unique_spectra.should == 0
64
+ prot1.non_unique_spectra.should == 1
65
+ prot1.estimated_spectral_count.should == 0.0
66
+ end
67
+
68
+ it 'should respond to divvy proteomics module things with 1 unique hit' do
69
+ pepxml = Bio::PepXML.parse(File.open(File.join(TEST_DATA_DIR, 'minimal2.pep.xml')))
70
+ pepxml.kind_of?(Bio::PepXML).should == true
71
+
72
+ prot1 = pepxml.protein_name_to_object['>38SUR_2379_1524213_2']
73
+ prot1.peptides.length.should == 1
74
+ prot1.unique_spectra.should == 1
75
+ prot1.non_unique_spectra.should == 0
76
+ prot1.estimated_spectral_count.should == 1.0
77
+ end
78
+
79
+ it 'should respond to divvy proteomics module things with 2 hits, where 1 is unique' do
80
+ pepxml = Bio::PepXML.parse(File.open(File.join(TEST_DATA_DIR, 'minimal3.pep.xml')))
81
+ pepxml.kind_of?(Bio::PepXML).should == true
82
+
83
+ prot1 = pepxml.protein_name_to_object['>38SUR_2379_1524213_2']
84
+ prot1.peptides.length.should == 2
85
+ prot1.unique_spectra.should == 1
86
+ prot1.non_unique_spectra.should == 1
87
+ prot1.estimated_spectral_count.should == 2.0
88
+ end
89
+
90
+ it 'should parse when the protein and protein_desc attributes are both defined' do
91
+ pepxml = Bio::PepXML.parse(File.open(File.join(TEST_DATA_DIR, 'contaminant.pep.xml')))
92
+ pepxml.kind_of?(Bio::PepXML).should == true
93
+
94
+ prot1 = pepxml.protein_name_to_object['CNTM:cont_sp']
95
+ prot1.nil?.should == false
96
+ prot1.identifier.should == 'CNTM:cont_sp'
97
+ prot1.descriptive_name.should == 'CNTM:cont_sp P13647 K2C5_HUMAN Keratin, type II cytoskeletal 5 (Cytokeratin 5) (K5) (CK 5) (58 kDa cytokeratin) - Homo sapiens (Human). # pI:8.14 MW:62462'
98
+ end
99
+ end
data/spec/spec_helper.rb CHANGED
@@ -8,5 +8,7 @@ require 'divvy_proteomics'
8
8
  Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
9
9
 
10
10
  RSpec.configure do |config|
11
-
11
+
12
12
  end
13
+
14
+ TEST_DATA_DIR = File.join(File.dirname(__FILE__),'data')
metadata CHANGED
@@ -1,99 +1,99 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: divvy_proteomics
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ben J Woodcroft
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-11-06 00:00:00.000000000 Z
11
+ date: 2014-01-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bio-logger
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - '>='
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '0'
19
+ version: '1.0'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - '>='
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '0'
26
+ version: '1.0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: systemu
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - '>='
31
+ - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '0'
33
+ version: '2.6'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - '>='
38
+ - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '0'
40
+ version: '2.6'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: rspec
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - '>='
45
+ - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: 2.8.0
47
+ version: '2.14'
48
48
  type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - '>='
52
+ - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: 2.8.0
54
+ version: '2.14'
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: rdoc
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - '>='
59
+ - - "~>"
60
60
  - !ruby/object:Gem::Version
61
61
  version: '3.12'
62
62
  type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
- - - '>='
66
+ - - "~>"
67
67
  - !ruby/object:Gem::Version
68
68
  version: '3.12'
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: bundler
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
- - - '>='
73
+ - - "~>"
74
74
  - !ruby/object:Gem::Version
75
- version: 1.0.0
75
+ version: '1.5'
76
76
  type: :development
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
- - - '>='
80
+ - - "~>"
81
81
  - !ruby/object:Gem::Version
82
- version: 1.0.0
82
+ version: '1.5'
83
83
  - !ruby/object:Gem::Dependency
84
84
  name: jeweler
85
85
  requirement: !ruby/object:Gem::Requirement
86
86
  requirements:
87
- - - '>='
87
+ - - "~>"
88
88
  - !ruby/object:Gem::Version
89
- version: 1.8.4
89
+ version: '2.0'
90
90
  type: :development
91
91
  prerelease: false
92
92
  version_requirements: !ruby/object:Gem::Requirement
93
93
  requirements:
94
- - - '>='
94
+ - - "~>"
95
95
  - !ruby/object:Gem::Version
96
- version: 1.8.4
96
+ version: '2.0'
97
97
  description: divvy up spectra from DTASelect files in a somewhat parsimonious way
98
98
  email: donttrustben@gmail.com
99
99
  executables:
@@ -103,8 +103,8 @@ extra_rdoc_files:
103
103
  - LICENSE.txt
104
104
  - README.md
105
105
  files:
106
- - .document
107
- - .rspec
106
+ - ".document"
107
+ - ".rspec"
108
108
  - Gemfile
109
109
  - LICENSE.txt
110
110
  - README.md
@@ -113,8 +113,14 @@ files:
113
113
  - bin/divvy_spectra
114
114
  - divvy_proteomics.gemspec
115
115
  - lib/divvy_proteomics.rb
116
+ - lib/divvyable_protein.rb
116
117
  - lib/dta_select_output.rb
118
+ - lib/pep_xml.rb
119
+ - spec/data/contaminant.pep.xml
117
120
  - spec/data/merge_definition.csv
121
+ - spec/data/minimal.pep.xml
122
+ - spec/data/minimal2.pep.xml
123
+ - spec/data/minimal3.pep.xml
118
124
  - spec/data/multiply_mapped_spectra.csv
119
125
  - spec/data/new_format.csv
120
126
  - spec/data/new_format_some_all_shared_spectra.csv
@@ -124,6 +130,7 @@ files:
124
130
  - spec/data/three_proteins_meant_for_merge.csv
125
131
  - spec/data/three_proteins_with_contaminant.csv
126
132
  - spec/divvy_proteomics_spec.rb
133
+ - spec/pep_xml_spec.rb
127
134
  - spec/spec_helper.rb
128
135
  homepage: http://github.com/wwood/divvy_proteomics
129
136
  licenses:
@@ -135,17 +142,17 @@ require_paths:
135
142
  - lib
136
143
  required_ruby_version: !ruby/object:Gem::Requirement
137
144
  requirements:
138
- - - '>='
145
+ - - ">="
139
146
  - !ruby/object:Gem::Version
140
147
  version: '0'
141
148
  required_rubygems_version: !ruby/object:Gem::Requirement
142
149
  requirements:
143
- - - '>='
150
+ - - ">="
144
151
  - !ruby/object:Gem::Version
145
152
  version: '0'
146
153
  requirements: []
147
154
  rubyforge_project:
148
- rubygems_version: 2.0.3
155
+ rubygems_version: 2.2.0
149
156
  signing_key:
150
157
  specification_version: 4
151
158
  summary: divvy up spectra from DTASelect files in a parsimonious way