ms-ident 0.0.17 → 0.0.18

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile CHANGED
@@ -1,12 +1,4 @@
1
1
  require 'rubygems'
2
- require 'bundler'
3
- begin
4
- Bundler.setup(:default, :development)
5
- rescue Bundler::BundlerError => e
6
- $stderr.puts e.message
7
- $stderr.puts "Run `bundle install` to install missing gems"
8
- exit e.status_code
9
- end
10
2
  require 'rake'
11
3
 
12
4
  require 'jeweler'
@@ -20,10 +12,13 @@ Jeweler::Tasks.new do |gem|
20
12
  gem.email = "jtprince@gmail.com"
21
13
  gem.authors = ["John T. Prince"]
22
14
  gem.rubyforge_project = 'mspire'
23
- # Include your dependencies below. Runtime dependencies are required when using your gem,
24
- # and development dependencies are only needed for development (ie running rake tasks, tests, etc)
25
- # gem.add_runtime_dependency 'jabber4r', '> 0.1'
26
- # gem.add_development_dependency 'rspec', '> 1.2.3'
15
+ gem.add_runtime_dependency 'nokogiri'
16
+ gem.add_runtime_dependency 'ms-core', ">=0.0.12"
17
+ gem.add_runtime_dependency 'ms-in_silico'
18
+ gem.add_runtime_dependency 'andand'
19
+ gem.add_development_dependency 'spec-more'
20
+ gem.add_development_dependency 'jeweler'
21
+ #gem.add_development_dependency 'ms-testdata'
27
22
  end
28
23
  Jeweler::RubygemsDotOrgTasks.new
29
24
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.17
1
+ 0.0.18
@@ -0,0 +1,224 @@
1
+ require 'ms/in_silico/digester'
2
+ require 'ms/fasta'
3
+
4
+ module Ms ; end
5
+ module Ms::Ident ; end
6
+ module Ms::Ident::Peptide ; end
7
+
8
+ module Ms::Ident::Peptide::Db
9
+ MAX_NUM_AA_EXPANSION = 3
10
+
11
+ # the twenty standard amino acids
12
+ STANDARD_AA = %w(A C D E F G H I K L M N P Q R S T V W Y)
13
+
14
+ DEFAULT_PEPTIDE_CENTRIC_DB = {:missed_cleavages => 2, :min_length => 4, :enzyme => Ms::InSilico::Digester::TRYPSIN, :id_regexp => nil, :remove_digestion_file => true, :cleave_initiator_methionine => true, :expand_aa => {'X' => STANDARD_AA}}
15
+
16
+ PROTEIN_DELIMITER = "\t"
17
+ KEY_VALUE_DELIMITER = ": "
18
+
19
+ def self.cmdline(argv)
20
+
21
+ opt = {
22
+ :remove_digestion_file => true,
23
+ :enzyme => Ms::InSilico::Digester::TRYPSIN
24
+ }
25
+ opts = OptionParser.new do |op|
26
+ op.banner = "usage: #{File.basename($0)} <file>.fasta ..."
27
+ op.separator "output: "
28
+ op.separator " <file>.msd_clvg<missed_cleavages>.min_aaseq<min_length>.yml"
29
+ op.separator "format:"
30
+ op.separator " PEPTIDE: ID1<tab>ID2<tab>ID3..."
31
+ op.separator ""
32
+ op.separator " Initiator Methionines - by default, will generate two peptides"
33
+ op.separator " for any peptide found at the N-termini starting with 'M'"
34
+ op.separator " (i.e., one with and one without the leading methionine)"
35
+ op.separator ""
36
+ op.on("--missed-cleavages <#{opt[:missed_cleavages]}>", Integer, "max num of missed cleavages") {|v| opt[:missed_cleavages] = v }
37
+ op.on("--min-length <#{opt[:min_length]}>", Integer, "the minimum peptide aaseq length") {|v| opt[:min_length] = v }
38
+ op.on("--no-cleaved-methionine", "does not cleave off initiator methionine") { opt[:cleave_initiator_methionine] = false }
39
+ op.on("--no-expand-x", "don't enumerate aa 'X' possibilities") { opt[:expand_aa] = nil }
40
+ op.on("-e", "--enzyme <name>", "enzyme for digestion") {|v| opt[:enzyme] = Ms::Insilico::Digester.const_get(v.upcase) }
41
+ op.on("--list-enzymes", "lists approved enzymes and exits") do
42
+ puts Ms::InSilico::Digester::ENZYMES.keys.join("\n")
43
+ exit
44
+ end
45
+ end
46
+
47
+ opts.parse!(argv)
48
+
49
+ if argv.size == 0
50
+ puts opts || exit
51
+ end
52
+
53
+ argv.map do |file|
54
+ Ms::Ident::Peptide::Db.peptide_centric_db(file, opt)
55
+ end
56
+ end
57
+
58
+ # writes a new file with the added 'min_aaseq<Integer>'
59
+ # creates a temporary digestion file that contains all peptides digesting
60
+ # with certain missed_cleavages (i.e., min_seq_length is not applied to
61
+ # this file but on the final peptide centric db)
62
+ # returns the full name of the written file.
63
+ def self.peptide_centric_db(fasta_file, opts={})
64
+ opts = DEFAULT_PEPTIDE_CENTRIC_DB.merge(opts)
65
+
66
+ (missed_cleavages, min_length, enzyme, id_regexp, remove_digestion_file, cleave_initiator_methionine, expand_aa) = opts.values_at(:missed_cleavages, :min_length, :enzyme, :id_regexp, :remove_digestion_file, :cleave_initiator_methionine, :expand_aa)
67
+ start_time = Time.now
68
+ print "Digesting #{fasta_file} ..." if $VERBOSE
69
+
70
+ if expand_aa
71
+ letters_to_expand_re = Regexp.new("[" << Regexp.escape(expand_aa.keys.join) << "]")
72
+ end
73
+
74
+ base = fasta_file.chomp(File.extname(fasta_file))
75
+ digestion_file = base + ".msd_clvg#{missed_cleavages}.peptides"
76
+ File.open(digestion_file, "w") do |fh|
77
+ Ms::Fasta.open(fasta_file) do |fasta|
78
+ fasta.each do |prot|
79
+ peptides = enzyme.digest(prot.sequence, missed_cleavages)
80
+ if (cleave_initiator_methionine && (prot.sequence[0,1] == "M"))
81
+ m_peps = []
82
+ init_methionine_peps = []
83
+ peptides.each do |pep|
84
+ # if the peptide is at the beginning of the protein sequence
85
+ if prot.sequence[0,pep.size] == pep
86
+ m_peps << pep[1..-1]
87
+ end
88
+ end
89
+ peptides.push(*m_peps)
90
+ end
91
+ if expand_aa
92
+ peptides = peptides.map do |pep|
93
+ if pep =~ letters_to_expand_re
94
+ expand_peptides(pep, expand_aa)
95
+ else
96
+ pep
97
+ end
98
+ end.flatten
99
+ end
100
+ fh.puts( prot.header.split(/\s+/).first + "\t" + peptides.join(" ") )
101
+ end
102
+ end
103
+ end
104
+ puts "#{Time.now - start_time} sec" if $VERBOSE
105
+
106
+
107
+ start_time = Time.now
108
+ print "Organizing raw digestion #{digestion_file} ..." if $VERBOSE
109
+
110
+ hash = Hash.new {|h,k| h[k] = [] }
111
+ ::IO.foreach(digestion_file) do |line|
112
+ (prot, *peps) = line.chomp!.split(/\s+/)
113
+ # prot is something like this: "sp|P31946|1433B_HUMAN" in uniprot
114
+ peps.each do |pep|
115
+ if pep.size >= min_length
116
+ hash[pep] << prot
117
+ end
118
+ end
119
+ end
120
+ puts "#{Time.now - start_time} sec" if $VERBOSE
121
+
122
+ base = digestion_file.chomp(File.extname(digestion_file))
123
+ final_outfile = base + ".min_aaseq#{min_length}" + ".yml"
124
+
125
+ start_time = Time.now
126
+ print "Writing results to #{} ..." if $VERBOSE
127
+
128
+ File.open(final_outfile, 'w') do |out|
129
+ hash.each do |k,v|
130
+ out.puts( [k, v.join(PROTEIN_DELIMITER)].join(KEY_VALUE_DELIMITER) )
131
+ end
132
+ end
133
+ puts "#{Time.now - start_time} sec" if $VERBOSE
134
+
135
+ if remove_digestion_file
136
+ File.unlink(digestion_file)
137
+ end
138
+ File.expand_path(final_outfile)
139
+ end
140
+
141
+ # does combinatorial expansion of all letters requesting it.
142
+ # expand_aa is hash like: {'X'=>STANDARD_AA}
143
+ # returns nil if there are more than MAX_NUM_AA_EXPANSION amino acids to
144
+ # be expanded
145
+ # returns an empty array if there is no expansion
146
+ def self.expand_peptides(peptide, expand_aa)
147
+ letters_in_order = expand_aa.keys.sort
148
+ index_and_key = []
149
+ peptide.split('').each_with_index do |char,i|
150
+ if let_index = letters_in_order.index(char)
151
+ index_and_key << [i, letters_in_order[let_index]]
152
+ end
153
+ end
154
+ if index_and_key.size > MAX_NUM_AA_EXPANSION
155
+ return nil
156
+ end
157
+ to_expand = [peptide]
158
+ index_and_key.each do |i,letter|
159
+ new_peps = []
160
+ while current_pep = to_expand.shift do
161
+ new_peps << expand_aa[letter].map {|v| dp = current_pep.dup ; dp[i] = v ; dp }
162
+ end
163
+ to_expand = new_peps.flatten
164
+ end
165
+ to_expand
166
+ end
167
+
168
+ # an object for on disk retrieval of db entries
169
+ # proteins are returned as an array.
170
+ # behaves much like a hash once it is opened.
171
+ class IO
172
+ include Enumerable
173
+ def self.open(filename, &block)
174
+ File.open(filename) do |io|
175
+ block.call(self.new(io))
176
+ end
177
+ end
178
+
179
+ attr_accessor :io
180
+ attr_accessor :index
181
+
182
+ def initialize(io)
183
+ @io = io
184
+ @index = {}
185
+ re = /^(\w+)#{Regexp.escape(KEY_VALUE_DELIMITER)}/
186
+ prev_io_pos = io.pos
187
+ triplets = io.each_line.map do |line|
188
+ key = re.match(line)[1]
189
+ [key, prev_io_pos + key.bytesize+KEY_VALUE_DELIMITER.bytesize, prev_io_pos=io.pos]
190
+ end
191
+ triplets.each do |key, start, end_pos|
192
+ @index[key] = [start, end_pos-start]
193
+ end
194
+ end
195
+ # returns an array of proteins for the given key (peptide aaseq)
196
+ def [](key)
197
+ (start, length) = @index[key]
198
+ @io.seek(start)
199
+ string = @io.read(length)
200
+ string.chomp!
201
+ string.split("\t")
202
+ end
203
+
204
+ # number of entries
205
+ def size ; @index.size end
206
+ alias_method :length, :size
207
+
208
+ def keys
209
+ @index.keys
210
+ end
211
+
212
+ # all the protein lists
213
+ def values
214
+ keys.map {|key| self[key] }
215
+ end
216
+
217
+ # yields a pair of aaseq and protein array
218
+ def each(&block)
219
+ @index.each do |key, start_length|
220
+ block.call([key, self[key]])
221
+ end
222
+ end
223
+ end
224
+ end
@@ -1,4 +1,3 @@
1
-
2
1
  module Ms ; end
3
2
  module Ms::Ident ; end
4
3
 
@@ -1,7 +1,8 @@
1
-
2
1
  module Ms ; end
3
2
  module Ms::Ident ; end
4
3
 
4
+ require 'set'
5
+
5
6
  module Ms::Ident::Protein
6
7
 
7
8
  class << self
@@ -13,5 +14,57 @@ module Ms::Ident::Protein
13
14
  reference.split(/[\s\r]/)[0]
14
15
  end
15
16
 
17
+ PRIORITIZE_PROTEINS = lambda do |protein_group_and_peptide_hits|
18
+ peptide_hits = protein_group_and_peptide_hits.last
19
+ num_uniq_aaseqs = peptide_hits.map {|hit| hit.aaseq }.uniq.size
20
+ num_uniq_aaseqs_at_z = peptide_hits.map {|hit| [hit.aaseq, hit.charge] }.uniq.size
21
+ [num_uniq_aaseqs, num_uniq_aaseqs_at_z, peptide_hits.size]
22
+ end
23
+
24
+
25
+ module_function
26
+ # greedy algorithm to map a set of peptide_hits to protein groups. each
27
+ # peptide hit should respond to :aaseq, :charge, :proteins if a block is
28
+ # given, yields a single argument: a doublet of protein_group and peptide
29
+ # set. It expects a metric or array to sort by for creating greedy protein
30
+ # groups (the greediest proteins should sort to the back of the array). if
31
+ # no block is given, the groups are sorted by [# uniq aaseqs, # uniq
32
+ # aaseq+charge, # peptide_hits] (see PRIORITIZE_PROTEINS). Sets of
33
+ # peptide_hits and the objects returned by peptide_hit#proteins are used as
34
+ # hash keys. As long as each peptide hit has a unique signature (like an
35
+ # id) then any object will work. If they are Struct objects, you might
36
+ # consider redefining the #hash method to be object_id for performance and
37
+ # accuracy.
38
+ def peptide_hits_to_protein_groups(peptide_hits, &sort_by)
39
+ sort_by ||= PRIORITIZE_PROTEINS
40
+ # note to self: I wrote this in 2011, so I think I know what I'm doing now
41
+ protein_to_peptides = Hash.new {|h,k| h[k] = Set.new }
42
+ peptide_hits.each do |peptide_hit|
43
+ peptide_hit.proteins.each do |protein|
44
+ protein_to_peptides[protein] << peptide_hit
45
+ end
46
+ end
47
+ peptides_to_protein_group = Hash.new {|h,k| h[k] = [] }
48
+ protein_to_peptides.each do |protein, peptide_set|
49
+ peptides_to_protein_group[peptide_set] << protein
50
+ end
51
+ protein_group_to_peptides = peptides_to_protein_group.invert
52
+ greedy_first = protein_group_to_peptides.sort_by(&sort_by).reverse
53
+ accounted_for = Set.new
54
+ surviving_protein_groups = []
55
+ # we are discarding the subsumed sets, but we could get them with
56
+ # partition
57
+ greedy_first.select do |group, peptide_set|
58
+ has_an_unaccounted_peptide = false
59
+ peptide_set.each do |peptide_hit|
60
+ unless accounted_for.include?(peptide_hit)
61
+ has_an_unaccounted_peptide = true
62
+ accounted_for.add(peptide_hit)
63
+ end
64
+ end
65
+ has_an_unaccounted_peptide
66
+ end
67
+ end
68
+
16
69
  end
17
70
 
@@ -0,0 +1,95 @@
1
+ require 'spec_helper'
2
+
3
+ require 'yaml'
4
+ path = 'ms/ident/peptide/db'
5
+ require path
6
+
7
+ module Kernel
8
+
9
+ def capture_stdout
10
+ out = StringIO.new
11
+ $stdout = out
12
+ yield
13
+ out.rewind
14
+ return out.read
15
+ ensure
16
+ $stdout = STDOUT
17
+ end
18
+
19
+ end
20
+
21
+ FASTA_FILE = [TESTFILES, path, 'uni_11_sp_tr.fasta'].join('/')
22
+
23
+ describe 'amino acid expansion' do
24
+
25
+ it 'can expand out wildcard amino acid combinations' do
26
+ array = Ms::Ident::Peptide::Db.expand_peptides('ALXX', 'X' => %w(* % &), 'L' => %w(P Q) )
27
+ array.sort.is %w(AP** AP*% AP*& AP%* AP%% AP%& AP&* AP&% AP&& AQ** AQ*% AQ*& AQ%* AQ%% AQ%& AQ&* AQ&% AQ&&).sort
28
+ end
29
+
30
+ it 'will not expand explosive combinations (>MAX_NUM_AA_EXPANSION)' do
31
+ # this is from real data
32
+ worst_case = 'LTLLRPEKHEAATGVDTICTHRVDPIGPGLXXEXLYWELSXLTXXIXELGPYTLDR'
33
+ Ms::Ident::Peptide::Db.expand_peptides(worst_case, 'X' => %w(* % &)).nil?.is true
34
+ end
35
+
36
+ it 'returns the peptide in the array if no expansion' do
37
+ array = Ms::Ident::Peptide::Db.expand_peptides('ZZZZZ', 'X' => %w(* % &), 'L' => %w(P Q) )
38
+ array.is ['ZZZZZ']
39
+ end
40
+
41
+ end
42
+
43
+ describe 'creating a peptide centric database' do
44
+
45
+ before do
46
+
47
+ #@output_file = [TESTFILES, path, 'uni_11_sp_tr.'].join('/')
48
+ @output_file = [TESTFILES, path, "uni_11_sp_tr.msd_clvg2.min_aaseq4.yml"].join('/')
49
+ end
50
+
51
+ it 'converts a fasta file into peptide centric db' do
52
+ output_files = Ms::Ident::Peptide::Db.cmdline([FASTA_FILE])
53
+ output_files.first.is File.expand_path(@output_file)
54
+ ok File.exist?(@output_file)
55
+ hash = {}
56
+ YAML.load_file(@output_file).each do |k,v|
57
+ hash[k] = v.split("\t")
58
+ end
59
+ sorted = hash.sort
60
+ # these are merely frozen, not perfectly defined
61
+ sorted.first.is ["AAFDDAIAELDTLSEESYK", ["sp|P62258|1433E_HUMAN"]]
62
+ sorted.last.is ["YWCRLGPPRWICQTIVSTNQYTHHR", ["tr|D2KTA8|D2KTA8_HUMAN"]]
63
+ sorted.size.is 728
64
+ File.unlink(@output_file)
65
+ end
66
+
67
+ it 'lists approved enzymes and exits' do
68
+ output = capture_stdout do
69
+ begin
70
+ Ms::Ident::Peptide::Db.cmdline(['--list-enzymes'])
71
+ rescue SystemExit
72
+ 1.is 1 # we exited
73
+ end
74
+ end
75
+ lines = output.split("\n")
76
+ ok lines.include?("trypsin")
77
+ ok lines.include?("chymotrypsin")
78
+ end
79
+ end
80
+
81
+ describe 'reading a peptide centric database' do
82
+ outfiles = Ms::Ident::Peptide::Db.cmdline([FASTA_FILE])
83
+ @outfile = outfiles.first
84
+
85
+ it 'reads the file on disk with random access or is enumerable' do
86
+ Ms::Ident::Peptide::Db::IO.open(@outfile) do |io|
87
+ io["AVTEQGHELSNEER"].enums %w(sp|P31946|1433B_HUMAN sp|P31946-2|1433B_HUMAN)
88
+ io["VRAAR"].enums ["tr|D3DX18|D3DX18_HUMAN"]
89
+ io.each_with_index do |key_prots, i|
90
+ key_prots.first.isa String
91
+ key_prots.last.isa Array
92
+ end
93
+ end
94
+ end
95
+ end
@@ -0,0 +1,69 @@
1
+ require 'spec_helper'
2
+
3
+ require 'ms/ident/protein'
4
+
5
+ PeptideHit = Struct.new(:aaseq, :charge, :proteins) do
6
+ def inspect # easier to read output
7
+ "<PeptideHit aaseq=#{self.aaseq} charge=#{self.charge} proteins(ids)=#{self.proteins.map(&:id).join(',')}>"
8
+ end
9
+ def hash ; self.object_id end
10
+ end
11
+ ProteinHit = Struct.new(:id) do
12
+ def inspect # easier to read output
13
+ "<Prt #{self.id}>"
14
+ end
15
+ def hash ; self.object_id end
16
+ end
17
+
18
+ describe 'creating minimal protein groups from peptide hits' do
19
+ before do
20
+ @pep_hits = [ ['AABBCCDD', 2],
21
+ ['BBCC', 2],
22
+ ['DDEEFFGG', 2],
23
+ ['DDEEFFGG', 3],
24
+ ['HIYA', 2],
25
+ ].map {|ar| PeptideHit.new(ar[0], ar[1], []) }
26
+ @prot_hits_hash = {
27
+ 'big_guy' => @pep_hits,
28
+ 'little_guy' => [@pep_hits.last],
29
+ 'medium_guy1' => @pep_hits[0,4],
30
+ 'medium_guy2' => @pep_hits[0,4],
31
+ 'subsumed_by_medium' => @pep_hits[2,2],
32
+ }
33
+ @prot_hits = @prot_hits_hash.keys.map {|id| ProteinHit.new(id) }
34
+ end
35
+
36
+ it 'is a greedy algorithm' do
37
+ @prot_hits.each {|prthit| @prot_hits_hash[prthit.id].each {|pep| pep.proteins << prthit } }
38
+ # big_guy has all the peptides, so it takes them all
39
+ reply = Ms::Ident::Protein.peptide_hits_to_protein_groups(@pep_hits)
40
+ reply.first.size.is 2 # the group and the peptide set
41
+ reply.first.first.size.is 1 # the group
42
+ reply.first.first.first.id.is 'big_guy'
43
+ end
44
+
45
+ it 'removes proteins accounted for only as little pieces of larger proteins' do
46
+ @prot_hits[1..-1].each {|prthit| @prot_hits_hash[prthit.id].each {|pep| pep.proteins << prthit } }
47
+ reply = Ms::Ident::Protein.peptide_hits_to_protein_groups(@pep_hits)
48
+ # no subsumed_by_medium
49
+ reply.map(&:first).any? {|protein_list| protein_list.any? {|v| v.id == 'subsumed_by_medium' }}.is false
50
+ end
51
+
52
+ it 'allows alternate sorting algorithms for greediness' do
53
+ @prot_hits.each {|prthit| @prot_hits_hash[prthit.id].each {|pep| pep.proteins << prthit } }
54
+ reply = Ms::Ident::Protein.peptide_hits_to_protein_groups(@pep_hits) do |prot_and_peptide_hits|
55
+ # deliberate using a counterintuitive sorting method to give little guys
56
+ # a chance
57
+ -prot_and_peptide_hits.last.size
58
+ end
59
+ # because the little proteins are given priority, they 'survive'. Bigger
60
+ # proteins may also survive if they have at least one unique peptide
61
+ # to add to the mix. This demonstrates how proteins can be weighted in
62
+ # different ways based on their peptide hits.
63
+ seen = []
64
+ reply.each {|pair| pair.first.each {|prot| seen << prot.id } }
65
+ # big guy is completely accounted for in the now prioritized little guy
66
+ # and medium guys, etc.
67
+ seen.sort.is @prot_hits_hash.keys[1..-1].sort
68
+ end
69
+ end
data/spec/spec_helper.rb CHANGED
@@ -1,18 +1,8 @@
1
1
  require 'rubygems'
2
- require 'bundler'
3
2
 
4
3
  $spec_large = ENV['SPEC_LARGE']
5
- development = $spec_large ? :development_large : :development
6
-
7
- begin
8
- Bundler.setup(:default, development)
9
- rescue Bundler::BundlerError => e
10
- $stderr.puts e.message
11
- $stderr.puts "Run `bundle install` to install missing gems"
12
- exit e.status_code
13
- end
14
- require 'spec/more'
15
4
 
5
+ require 'spec/more'
16
6
 
17
7
  load_testdata = lambda do
18
8
  require 'ms/testdata'
@@ -26,7 +16,6 @@ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
26
16
 
27
17
  Bacon.summary_on_exit
28
18
 
29
-
30
19
  def spec_large(&block)
31
20
  if $spec_large
32
21
  block.call
@@ -0,0 +1,69 @@
1
+ >sp|P31946|1433B_HUMAN 14-3-3 protein beta/alpha OS=Homo sapiens GN=YWHAB PE=1 SV=3
2
+ MTMDKSELVQKAKLAEQAERYDDMAAAMKAVTEQGHELSNEERNLLSVAYKNVVGARRSS
3
+ WRVISSIEQKTERNEKKQQMGKEYREKIEAELQDICNDVLELLDKYLIPNATQPESKVFY
4
+ LKMKGDYFRYLSEVASGDNKQTTVSNSQQAYQEAFEISKKEMQPTHPIRLGLALNFSVFY
5
+ YEILNSPEKACSLAKTAFDEAIAELDTLNEESYKDSTLIMQLLRDNLTLWTSENQGDEGD
6
+ AGEGEN
7
+ >sp|P31946-2|1433B_HUMAN Isoform Short of 14-3-3 protein beta/alpha OS=Homo sapiens GN=YWHAB
8
+ MDKSELVQKAKLAEQAERYDDMAAAMKAVTEQGHELSNEERNLLSVAYKNVVGARRSSWR
9
+ VISSIEQKTERNEKKQQMGKEYREKIEAELQDICNDVLELLDKYLIPNATQPESKVFYLK
10
+ MKGDYFRYLSEVASGDNKQTTVSNSQQAYQEAFEISKKEMQPTHPIRLGLALNFSVFYYE
11
+ ILNSPEKACSLAKTAFDEAIAELDTLNEESYKDSTLIMQLLRDNLTLWTSENQGDEGDAG
12
+ EGEN
13
+ >sp|P62258|1433E_HUMAN 14-3-3 protein epsilon OS=Homo sapiens GN=YWHAE PE=1 SV=1
14
+ MDDREDLVYQAKLAEQAERYDEMVESMKKVAGMDVELTVEERNLLSVAYKNVIGARRASW
15
+ RIISSIEQKEENKGGEDKLKMIREYRQMVETELKLICCDILDVLDKHLIPAANTGESKVF
16
+ YYKMKGDYHRYLAEFATGNDRKEAAENSLVAYKAASDIAMTELPPTHPIRLGLALNFSVF
17
+ YYEILNSPDRACRLAKAAFDDAIAELDTLSEESYKDSTLIMQLLRDNLTLWTSDMQGDGE
18
+ EQNKEALQDVEDENQ
19
+ >sp|Q04917|1433F_HUMAN 14-3-3 protein eta OS=Homo sapiens GN=YWHAH PE=1 SV=4
20
+ MGDREQLLQRARLAEQAERYDDMASAMKAVTELNEPLSNEDRNLLSVAYKNVVGARRSSW
21
+ RVISSIEQKTMADGNEKKLEKVKAYREKIEKELETVCNDVLSLLDKFLIKNCNDFQYESK
22
+ VFYLKMKGDYYRYLAEVASGEKKNSVVEASEAAYKEAFEISKEQMQPTHPIRLGLALNFS
23
+ VFYYEIQNAPEQACLLAKQAFDDAIAELDTLNEDSYKDSTLIMQLLRDNLTLWTSDQQDE
24
+ EAGEGN
25
+ >sp|P61981|1433G_HUMAN 14-3-3 protein gamma OS=Homo sapiens GN=YWHAG PE=1 SV=2
26
+ MVDREQLVQKARLAEQAERYDDMAAAMKNVTELNEPLSNEERNLLSVAYKNVVGARRSSW
27
+ RVISSIEQKTSADGNEKKIEMVRAYREKIEKELEAVCQDVLSLLDNYLIKNCSETQYESK
28
+ VFYLKMKGDYYRYLAEVATGEKRATVVESSEKAYSEAHEISKEHMQPTHPIRLGLALNYS
29
+ VFYYEIQNAPEQACHLAKTAFDDAIAELDTLNEDSYKDSTLIMQLLRDNLTLWTSDQQDD
30
+ DGGEGNN
31
+ >sp|P31947|1433S_HUMAN 14-3-3 protein sigma OS=Homo sapiens GN=SFN PE=1 SV=1
32
+ MERASLIQKAKLAEQAERYEDMAAFMKGAVEKGEELSCEERNLLSVAYKNVVGGQRAAWR
33
+ VLSSIEQKSNEEGSEEKGPEVREYREKVETELQGVCDTVLGLLDSHLIKEAGDAESRVFY
34
+ LKMKGDYYRYLAEVATGDDKKRIIDSARSAYQEAMDISKKEMPPTNPIRLGLALNFSVFH
35
+ YEIANSPEEAISLAKTTFDEAMADLHTLSEDSYKDSTLIMQLLRDNLTLWTADNAGEEGG
36
+ EAPQEPQS
37
+ >tr|D2KLI3|D2KLI3_HUMAN Cytochrome b OS=Homo sapiens GN=CYTB PE=3 SV=1
38
+ MTPTRKTNPLMKLINHSFIDLPTPSNISAWWNFGSLLGACLILQITTGLFLAMHYSPDAS
39
+ TAFSSIAHITRDVNYGWIIRYLHANGASMFFICLFLHIGRGLYYGSFLYSETWNIGIILL
40
+ LATMATAFMGYVLPWGQMSFWGATVITNLLSAIPYIGTDLVQWIWGGYSVDSPTLTRFFT
41
+ FHFILPFIIAALAALHLLFLHETGSNNPLGITSHSDKITFHPYYTIKDALGLLLFLLSLM
42
+ TLTLFSPDLLGDPDNYTLANPLNTPPHIKPEWYFLFAYTILRSVPNKLGGVLALLLSILI
43
+ LAMIPILHMSKQQSMMFRPLSQSLYWLLAADLLILTWIGGQPVSYPFTIIGQVASVLYFT
44
+ TILILMPTISLIENKMLKWA
45
+ >tr|D2KTA8|D2KTA8_HUMAN Putative uncharacterized protein FCAMR OS=Homo sapiens GN=FCAMR PE=4 SV=1
46
+ MDGEATVKPGEQVPLWTHGWPPDDPSPSFAAGSSFALPQKRPHPRWLWEGSLPSRTHLRA
47
+ MGTLRPSSPLCWREESSFAAPNSLKGSRLVSGEPGGAVTIQCHYAPSSVNRHQRKYWCRL
48
+ GPPRWICQTIVSTNQYTHHRYRDRVALTDFPQRGLFVVRLSQLSPDDIGCYLCGIGSENN
49
+ MLFLSMNLTISAGPASTLPTATPAAGELTMRSYGTASPVANRWTPGTTQTLGQGTAWDTV
50
+ ASTPGTSKTTASAEGRRTPGATRPAAPGTGSWAEGSVKAPAPIPESPPSKSRSMSNTTEG
51
+ VWEGTRSSVTNRARASKDRREMTTTKADRPREDIEGVRIALDAAKKVLGTIGPPALVSET
52
+ LAWEILPQATPVSKQQSQGSIGETTPAAGMWTLGTPAADVWILGTPAADVWTSMEAASGE
53
+ GSAAGDLDAATGDRGPQATLSQTPAVGPWGPPGKESSVKRTFPEDESSSRTLAPVSTMLA
54
+ LFMLMALVLLQRKLWRRRTSQEAERVTLIQMTHFLEVNPQADQLPHVERKMLQDDSLPAG
55
+ ASLTAPERNPGP
56
+ >tr|D2KTA9|D2KTA9_HUMAN Putative uncharacterized protein XKR5 OS=Homo sapiens GN=XKR5 PE=4 SV=1
57
+ MHARLLGLSALLQAAEQSARLYTVAYYFTTGRLLWGWLALAVLLPGFLVQALSYLWFRAD
58
+ GHPGHCSLMMLHLLQLGVWKRHWDAALTSLQKELEAPHRGWLQLQEADLSALRLLEALLQ
59
+ TGPHLLLQTYVFLASDFTDIVPGVSTLFSWSSLSWALVSYTRFMGFMKPGHLAMPWAALF
60
+ CQQLWRMGMLGTRVLSLVLFYKAYHFWVFVVAGAHWLVMTFWLVAQQSDIIDSTCHWRLF
61
+ NLLVGAVYILCYLSFWDSPSRNRMVTFYMVMLLENIILLLLATDFLQGASVDQPADHSWG
62
+ PVWISDWQCLTGNLLQPAASKIHRHLAGLPKEVLWHCRR
63
+ >tr|D3DSH8|D3DSH8_HUMAN HCG2036819, isoform CRA_a OS=Homo sapiens GN=hCG_2036819 PE=4 SV=1
64
+ MLGWIQPSRQPQLRAAPPTRTPSAKRCILCNFLPGCWLVGDVAGSRQPSAPQTLRQRQHT
65
+ RPPPQERGSGRRSPLREARRANPHFKSFPVLEARGLPCGARRTGPRRPVREMTLPSDPER
66
+ ATLPNPRLGAPAVPRRGPRSHGGRR
67
+ >tr|D3DX18|D3DX18_HUMAN Putative uncharacterized protein LOC128977 OS=Homo sapiens GN=LOC128977 PE=4 SV=1
68
+ MADGSGWQPPRPCEAYRAEWKLCRSARHFLHHYYVHGERPACEQWQRDLASCRDWEERRN
69
+ AEAQQSLCESERARVRAARKHILVWAPRQSPPPDWHLPLPQEKDE