bio-protparam 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +1 -0
- data/VERSION +1 -1
- data/lib/bio/util/protparam.rb +193 -48
- data/test/test_bio-protparam.rb +5 -1
- metadata +3 -5
- data/README.rdoc +0 -48
data/README.md
CHANGED
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.2.0
|
data/lib/bio/util/protparam.rb
CHANGED
@@ -1,13 +1,16 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
#
|
3
3
|
#
|
4
|
-
# = bio/
|
4
|
+
# = bio/util/protparam.rb - A Class to Calculate Protein Parameters.
|
5
5
|
#
|
6
6
|
# Copyright:: Copyright (C) 2012
|
7
7
|
# Hiroyuki Nakamura <hiroyuki@1vq9.com>
|
8
8
|
# License:: The Ruby License
|
9
9
|
#
|
10
|
+
|
10
11
|
require 'rational'
|
12
|
+
require 'net/http'
|
13
|
+
require 'uri'
|
11
14
|
|
12
15
|
module Bio
|
13
16
|
##
|
@@ -153,7 +156,7 @@ module Bio
|
|
153
156
|
}
|
154
157
|
}
|
155
158
|
|
156
|
-
# Estemated half-life of N-terminal residue of a protein.
|
159
|
+
# Estemated half-life (minutes) of N-terminal residue of a protein.
|
157
160
|
HALFLIFE = {
|
158
161
|
:ecoli => {
|
159
162
|
:I => 600,
|
@@ -349,9 +352,9 @@ module Bio
|
|
349
352
|
}
|
350
353
|
}
|
351
354
|
|
352
|
-
def initialize(seq)
|
355
|
+
def initialize(seq, mode=:local)
|
356
|
+
|
353
357
|
if seq.kind_of?(String) && Bio::Sequence.guess(seq) == Bio::Sequence::AA
|
354
|
-
# TODO: has issue.
|
355
358
|
@seq = Bio::Sequence::AA.new seq
|
356
359
|
elsif seq.kind_of? Bio::Sequence::AA
|
357
360
|
@seq = seq
|
@@ -361,8 +364,149 @@ module Bio
|
|
361
364
|
else
|
362
365
|
raise ArgumentError, "sequence must be an AA sequence"
|
363
366
|
end
|
367
|
+
|
368
|
+
self.class.class_eval do
|
369
|
+
include(if mode == :remote then Remote else Local end)
|
370
|
+
end
|
371
|
+
end
|
372
|
+
|
373
|
+
module Remote
|
374
|
+
PROTPARAM_URL = 'http://web.expasy.org/cgi-bin/protparam/protparam'
|
375
|
+
|
376
|
+
attr_accessor :result
|
377
|
+
|
378
|
+
def self.cast_method(type)
|
379
|
+
case type.to_s
|
380
|
+
when "Fixnum"
|
381
|
+
".to_i"
|
382
|
+
when "Float"
|
383
|
+
".to_f"
|
384
|
+
when "String"
|
385
|
+
""
|
386
|
+
else
|
387
|
+
""
|
388
|
+
end
|
389
|
+
end
|
390
|
+
|
391
|
+
def self.extract_options(*args)
|
392
|
+
# label, class, regex
|
393
|
+
# label, class, regex, lambda
|
394
|
+
# label, lambda
|
395
|
+
label, type, regex, block = [nil, nil, nil, nil]
|
396
|
+
if args.size > 2
|
397
|
+
label = args.shift
|
398
|
+
type = args.shift
|
399
|
+
if args.size > 1
|
400
|
+
regex, block = args
|
401
|
+
elsif args.size > 0
|
402
|
+
regex, block = if args.first.kind_of?(Regexp)
|
403
|
+
[args.first, nil]
|
404
|
+
elsif args.first.respond_to?(:call)
|
405
|
+
[nil, args.first]
|
406
|
+
end
|
407
|
+
end
|
408
|
+
end
|
409
|
+
[label, type, regex, block]
|
410
|
+
end
|
411
|
+
|
412
|
+
def self.rule(*args)
|
413
|
+
(label, type, regex, block) = extract_options(*args)
|
414
|
+
if regex && block
|
415
|
+
self.class_eval <<-METHOD
|
416
|
+
METHOD
|
417
|
+
elsif regex && !block
|
418
|
+
self.class_eval <<-METHOD
|
419
|
+
def #{label}
|
420
|
+
response = self.request
|
421
|
+
matched = %r/#{regex}/.match(response)
|
422
|
+
if matched.size > 1
|
423
|
+
matched[1]#{cast_method(type)}
|
424
|
+
else
|
425
|
+
nil
|
426
|
+
end
|
427
|
+
end
|
428
|
+
METHOD
|
429
|
+
elsif !regex && block
|
430
|
+
wrapped_block = Proc.new {|*method_args|
|
431
|
+
response = self.request
|
432
|
+
method_args.unshift response
|
433
|
+
block.call(method_args)
|
434
|
+
}
|
435
|
+
self.send(:define_method, label, &wrapped_block)
|
436
|
+
else
|
437
|
+
raise ArgumentError,
|
438
|
+
"Invalid arguments.rule(:label, :type, :regex) or rule(:label, :type, :lambda)"
|
439
|
+
end
|
440
|
+
end
|
441
|
+
|
442
|
+
rule :num_neg, Fixnum, %r/<B>Total number of negatively charged residues.*?<\/B>\s*(\d*)/
|
443
|
+
rule :num_pos, Fixnum, %r/<B>Total number of positively charged residues.*?<\/B>\s*(\d*)/
|
444
|
+
rule :amino_acid_number, Fixnum, %r/<B>Number of amino acids:<\/B> (\d+)/
|
445
|
+
rule :total_atoms, Fixnum, %r/<B>Total number of atoms:<\/B>\s*(\d*)/
|
446
|
+
rule :num_carbon, Fixnum, %r/Carbon\s+C\s+(\d+)/
|
447
|
+
rule :num_hydrogen, Fixnum, %r/Hydrogen\s+H\s+(\d+)/
|
448
|
+
rule :num_nitro, Fixnum, %r/Nitrogen\s+N\s+(\d+)/
|
449
|
+
rule :num_oxygen, Fixnum, %r/Oxygen\s+O\s+(\d+)/
|
450
|
+
rule :num_sulphur, Fixnum, %r/Sulfur\s+S\s+(\d+)/
|
451
|
+
rule :molecular_weight, Float, %r/<B>Molecular weight:<\/B> (\d*\.{0,1}\d*)/
|
452
|
+
rule :theoretical_pI, Float,%r/<B>Theoretical pI:<\/B> (-{0,1}\d*\.{0,1}\d*)/
|
453
|
+
rule :half_life, Float, %r/The estimated half-life is.*?(-{0,1}\d*\.{0,1}\d*)\s*hours \(mammalian reticulocytes, in vitro\)/
|
454
|
+
rule :instability_index, Float, %r/The instability index \(II\) is computed to be (-{0,1}\d*\.{0,1}\d*)/
|
455
|
+
rule :stability, String, %r/This classifies the protein as\s(\w+)\./
|
456
|
+
rule :aliphatic_index, Float, %r/<B>Aliphatic index:<\/B>\s*(-{0,1}\d*\.{0,1}\d*)/
|
457
|
+
rule :gravy, Float, %r/<B>Grand average of hydropathicity \(GRAVY\):<\/B>\s*(-{0,1}\d*\.{0,1}\d*)/
|
458
|
+
|
459
|
+
rule :half_life, Fixnum, proc {|response, category|
|
460
|
+
category ||= :mammalian
|
461
|
+
category_map = {
|
462
|
+
:mammalian => /\(mammalian\sreticulocytes,\sin\svitro\)/,
|
463
|
+
:yeast => /\(yeast,\sin\svivo\)/,
|
464
|
+
:ecoli => /\(Escherichia\scoli,\sin\svivo\)/
|
465
|
+
}
|
466
|
+
if /The\sestimated\shalf-life\sis:.*?
|
467
|
+
([>\d]+)\shours\s(?=#{category_map[category]})/mx =~ response
|
468
|
+
half_life = $1
|
469
|
+
half_life.gsub!(/>/, '') if half_life.include?('>')
|
470
|
+
(half_life.to_f * 60)
|
471
|
+
else
|
472
|
+
raise "Parse Error!"
|
473
|
+
end
|
474
|
+
}
|
475
|
+
|
476
|
+
rule :aa_comp, Fixnum, proc {|response, aa_code|
|
477
|
+
# Arg (R) 26 6.6%
|
478
|
+
aa_map = Hash[response.
|
479
|
+
scan(/(?:[A-Z][a-z]{2}){0,1}\s\(([A-Z])\)\s*?\d+?\s*?(\d+.\d+)%/).
|
480
|
+
map{|aa,val| [aa.to_sym, val.to_f] }]
|
481
|
+
if aa_code.nil?
|
482
|
+
aa_map
|
483
|
+
else
|
484
|
+
aa_map[aa_code.to_sym]
|
485
|
+
end
|
486
|
+
}
|
487
|
+
|
488
|
+
def stable?
|
489
|
+
(stablity == 'stable')
|
490
|
+
end
|
491
|
+
|
492
|
+
def request
|
493
|
+
@result ||= begin
|
494
|
+
res = Net::HTTP.post_form(URI(PROTPARAM_URL),
|
495
|
+
{'sequence' => @seq.to_s})
|
496
|
+
res.body
|
497
|
+
end
|
498
|
+
end
|
499
|
+
|
500
|
+
def fallback!
|
501
|
+
self.class.class_eval do
|
502
|
+
include Local
|
503
|
+
end
|
364
504
|
end
|
365
505
|
|
506
|
+
end
|
507
|
+
|
508
|
+
module Local
|
509
|
+
|
366
510
|
##
|
367
511
|
#
|
368
512
|
# Return the number of negative amino acids (D and E) in an AA sequence.
|
@@ -769,49 +913,50 @@ module Bio
|
|
769
913
|
def round(num, ndigits=0)
|
770
914
|
(num * (10 ** ndigits)).round().to_f / (10 ** ndigits).to_f
|
771
915
|
end
|
772
|
-
|
773
|
-
# --------------------------------
|
774
|
-
# :section: References
|
775
|
-
#
|
776
|
-
#
|
777
|
-
# 1. Protein Identification and Analysis Tools on the ExPASy Server;
|
778
|
-
# Gasteiger E., Hoogland C., Gattiker A., Duvaud S., Wilkins M.R.,
|
779
|
-
# Appel R.D., Bairoch A.; (In) John M. Walker (ed): The Proteomics
|
780
|
-
# Protocols Handbook, Humana Press (2005). pp. 571-607
|
781
|
-
# 2. Pace, C.N., Vajdos, F., Fee, L., Grimsley, G., and Gray, T. (1995)
|
782
|
-
# How to measure and predict the molar absorption coefficient of a
|
783
|
-
# protein. Protein Sci. 11, 2411-2423.
|
784
|
-
# 3. Edelhoch, H. (1967) Spectroscopic determination of tryptophan and
|
785
|
-
# tyrosine in proteins. Biochemistry 6, 1948-1954.
|
786
|
-
# 4. Gill, S.C. and von Hippel, P.H. (1989) Calculation of protein
|
787
|
-
# extinction coefficients from amino acid sequence data. Anal. Biochem.
|
788
|
-
# 182:319-326(1989).
|
789
|
-
# 5. Bachmair, A., Finley, D. and Varshavsky, A. (1986) In vivo half-life
|
790
|
-
# of a protein is a function of its amino-terminal residue. Science 234,
|
791
|
-
# 179-186.
|
792
|
-
# 6. Gonda, D.K., Bachmair, A., Wunning, I., Tobias, J.W., Lane, W.S. and
|
793
|
-
# Varshavsky, A. J. (1989) Universality and structure of the N-end rule.
|
794
|
-
# J. Biol. Chem. 264, 16700-16712.
|
795
|
-
# 7. Tobias, J.W., Shrader, T.E., Rocap, G. and Varshavsky, A. (1991) The
|
796
|
-
# N-end rule in bacteria. Science 254, 1374-1377.
|
797
|
-
# 8. Ciechanover, A. and Schwartz, A.L. (1989) How are substrates
|
798
|
-
# recognized by the ubiquitin-mediated proteolytic system? Trends Biochem.
|
799
|
-
# Sci. 14, 483-488.
|
800
|
-
# 9. Varshavsky, A. (1997) The N-end rule pathway of protein degradation.
|
801
|
-
# Genes Cells 2, 13-28.
|
802
|
-
# 10. Guruprasad, K., Reddy, B.V.B. and Pandit, M.W. (1990) Correlation
|
803
|
-
# between stability of a protein and its dipeptide composition: a novel
|
804
|
-
# approach for predicting in vivo stability of a protein from its primary
|
805
|
-
# sequence. Protein Eng. 4,155-161.
|
806
|
-
# 11. Ikai, A.J. (1980) Thermostability and aliphatic index of globular
|
807
|
-
# proteins. J. Biochem. 88, 1895-1898.
|
808
|
-
# 12. Kyte, J. and Doolittle, R.F. (1982) A simple method for displaying
|
809
|
-
# the hydropathic character of a protein. J. Mol. Biol. 157, 105-132.
|
810
|
-
# 13. Bjellqvist, B.,Hughes, G.J., Pasquali, Ch., Paquet, N., Ravier, F.,
|
811
|
-
# Sanchez, J.-Ch., Frutiger, S. & Hochstrasser, D.F. The focusing positions
|
812
|
-
# of polypeptides in immobilized pH gradients can be predicted from their
|
813
|
-
# amino acid sequences. Electrophoresis 1993, 14, 1023-1031.
|
814
|
-
#
|
815
|
-
# --------------------------------
|
816
916
|
end
|
917
|
+
|
918
|
+
# --------------------------------
|
919
|
+
# :section: References
|
920
|
+
#
|
921
|
+
#
|
922
|
+
# 1. Protein Identification and Analysis Tools on the ExPASy Server;
|
923
|
+
# Gasteiger E., Hoogland C., Gattiker A., Duvaud S., Wilkins M.R.,
|
924
|
+
# Appel R.D., Bairoch A.; (In) John M. Walker (ed): The Proteomics
|
925
|
+
# Protocols Handbook, Humana Press (2005). pp. 571-607
|
926
|
+
# 2. Pace, C.N., Vajdos, F., Fee, L., Grimsley, G., and Gray, T. (1995)
|
927
|
+
# How to measure and predict the molar absorption coefficient of a
|
928
|
+
# protein. Protein Sci. 11, 2411-2423.
|
929
|
+
# 3. Edelhoch, H. (1967) Spectroscopic determination of tryptophan and
|
930
|
+
# tyrosine in proteins. Biochemistry 6, 1948-1954.
|
931
|
+
# 4. Gill, S.C. and von Hippel, P.H. (1989) Calculation of protein
|
932
|
+
# extinction coefficients from amino acid sequence data. Anal. Biochem.
|
933
|
+
# 182:319-326(1989).
|
934
|
+
# 5. Bachmair, A., Finley, D. and Varshavsky, A. (1986) In vivo half-life
|
935
|
+
# of a protein is a function of its amino-terminal residue. Science 234,
|
936
|
+
# 179-186.
|
937
|
+
# 6. Gonda, D.K., Bachmair, A., Wunning, I., Tobias, J.W., Lane, W.S. and
|
938
|
+
# Varshavsky, A. J. (1989) Universality and structure of the N-end rule.
|
939
|
+
# J. Biol. Chem. 264, 16700-16712.
|
940
|
+
# 7. Tobias, J.W., Shrader, T.E., Rocap, G. and Varshavsky, A. (1991) The
|
941
|
+
# N-end rule in bacteria. Science 254, 1374-1377.
|
942
|
+
# 8. Ciechanover, A. and Schwartz, A.L. (1989) How are substrates
|
943
|
+
# recognized by the ubiquitin-mediated proteolytic system? Trends Biochem.
|
944
|
+
# Sci. 14, 483-488.
|
945
|
+
# 9. Varshavsky, A. (1997) The N-end rule pathway of protein degradation.
|
946
|
+
# Genes Cells 2, 13-28.
|
947
|
+
# 10. Guruprasad, K., Reddy, B.V.B. and Pandit, M.W. (1990) Correlation
|
948
|
+
# between stability of a protein and its dipeptide composition: a novel
|
949
|
+
# approach for predicting in vivo stability of a protein from its primary
|
950
|
+
# sequence. Protein Eng. 4,155-161.
|
951
|
+
# 11. Ikai, A.J. (1980) Thermostability and aliphatic index of globular
|
952
|
+
# proteins. J. Biochem. 88, 1895-1898.
|
953
|
+
# 12. Kyte, J. and Doolittle, R.F. (1982) A simple method for displaying
|
954
|
+
# the hydropathic character of a protein. J. Mol. Biol. 157, 105-132.
|
955
|
+
# 13. Bjellqvist, B.,Hughes, G.J., Pasquali, Ch., Paquet, N., Ravier, F.,
|
956
|
+
# Sanchez, J.-Ch., Frutiger, S. & Hochstrasser, D.F. The focusing positions
|
957
|
+
# of polypeptides in immobilized pH gradients can be predicted from their
|
958
|
+
# amino acid sequences. Electrophoresis 1993, 14, 1023-1031.
|
959
|
+
#
|
960
|
+
# --------------------------------
|
961
|
+
end
|
817
962
|
end
|
data/test/test_bio-protparam.rb
CHANGED
@@ -17,7 +17,11 @@ module Bio
|
|
17
17
|
def setup
|
18
18
|
data = File.read(File.join('test', 'data', 'uniprot', 'p53_human.uniprot'))
|
19
19
|
uniprot = Bio::UniProt.new(data)
|
20
|
-
|
20
|
+
if ENV['PROTPARAM_TEST_REMOTE']
|
21
|
+
@obj = Bio::Protparam.new(uniprot.seq, :remote)
|
22
|
+
else
|
23
|
+
@obj = Bio::Protparam.new(uniprot.seq, :local)
|
24
|
+
end
|
21
25
|
end
|
22
26
|
|
23
27
|
def test_num_neg
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-protparam
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-12-
|
12
|
+
date: 2012-12-27 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bio
|
@@ -84,14 +84,12 @@ extensions: []
|
|
84
84
|
extra_rdoc_files:
|
85
85
|
- LICENSE.txt
|
86
86
|
- README.md
|
87
|
-
- README.rdoc
|
88
87
|
files:
|
89
88
|
- .document
|
90
89
|
- .travis.yml
|
91
90
|
- Gemfile
|
92
91
|
- LICENSE.txt
|
93
92
|
- README.md
|
94
|
-
- README.rdoc
|
95
93
|
- Rakefile
|
96
94
|
- VERSION
|
97
95
|
- lib/bio-protparam.rb
|
@@ -114,7 +112,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
114
112
|
version: '0'
|
115
113
|
segments:
|
116
114
|
- 0
|
117
|
-
hash:
|
115
|
+
hash: 1639556585709602379
|
118
116
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
119
117
|
none: false
|
120
118
|
requirements:
|
data/README.rdoc
DELETED
@@ -1,48 +0,0 @@
|
|
1
|
-
= bio-protparam
|
2
|
-
|
3
|
-
{<img
|
4
|
-
src="https://secure.travis-ci.org/hryk/bioruby-protparam.png"
|
5
|
-
/>}[http://travis-ci.org/#!/hryk/bioruby-protparam]
|
6
|
-
|
7
|
-
Full description goes here
|
8
|
-
|
9
|
-
Note: this software is under active development!
|
10
|
-
|
11
|
-
== Installation
|
12
|
-
|
13
|
-
gem install bio-protparam
|
14
|
-
|
15
|
-
== Usage
|
16
|
-
|
17
|
-
== Developers
|
18
|
-
|
19
|
-
To use the library
|
20
|
-
|
21
|
-
require 'bio-protparam'
|
22
|
-
|
23
|
-
The API doc is online. For more code examples see also the test files in
|
24
|
-
the source tree.
|
25
|
-
|
26
|
-
== Project home page
|
27
|
-
|
28
|
-
Information on the source tree, documentation, issues and how to contribute, see
|
29
|
-
|
30
|
-
http://github.com/hryk/bioruby-protparam
|
31
|
-
|
32
|
-
The BioRuby community is on IRC server: irc.freenode.org, channel: #bioruby.
|
33
|
-
|
34
|
-
== Cite
|
35
|
-
|
36
|
-
If you use this software, please cite one of
|
37
|
-
|
38
|
-
* [BioRuby: bioinformatics software for the Ruby programming language](http://dx.doi.org/10.1093/bioinformatics/btq475)
|
39
|
-
* [Biogem: an effective tool-based approach for scaling up open source software development in bioinformatics](http://dx.doi.org/10.1093/bioinformatics/bts080)
|
40
|
-
|
41
|
-
== Biogems.info
|
42
|
-
|
43
|
-
This Biogem is published at http://biogems.info/index.html#bio-protparam
|
44
|
-
|
45
|
-
== Copyright
|
46
|
-
|
47
|
-
Copyright (c) 2012 hryk. See LICENSE.txt for further details.
|
48
|
-
|