bio-protparam 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +1 -0
- data/VERSION +1 -1
- data/lib/bio/util/protparam.rb +193 -48
- data/test/test_bio-protparam.rb +5 -1
- metadata +3 -5
- data/README.rdoc +0 -48
data/README.md
CHANGED
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.2.0
|
data/lib/bio/util/protparam.rb
CHANGED
@@ -1,13 +1,16 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
#
|
3
3
|
#
|
4
|
-
# = bio/
|
4
|
+
# = bio/util/protparam.rb - A Class to Calculate Protein Parameters.
|
5
5
|
#
|
6
6
|
# Copyright:: Copyright (C) 2012
|
7
7
|
# Hiroyuki Nakamura <hiroyuki@1vq9.com>
|
8
8
|
# License:: The Ruby License
|
9
9
|
#
|
10
|
+
|
10
11
|
require 'rational'
|
12
|
+
require 'net/http'
|
13
|
+
require 'uri'
|
11
14
|
|
12
15
|
module Bio
|
13
16
|
##
|
@@ -153,7 +156,7 @@ module Bio
|
|
153
156
|
}
|
154
157
|
}
|
155
158
|
|
156
|
-
# Estemated half-life of N-terminal residue of a protein.
|
159
|
+
# Estemated half-life (minutes) of N-terminal residue of a protein.
|
157
160
|
HALFLIFE = {
|
158
161
|
:ecoli => {
|
159
162
|
:I => 600,
|
@@ -349,9 +352,9 @@ module Bio
|
|
349
352
|
}
|
350
353
|
}
|
351
354
|
|
352
|
-
def initialize(seq)
|
355
|
+
def initialize(seq, mode=:local)
|
356
|
+
|
353
357
|
if seq.kind_of?(String) && Bio::Sequence.guess(seq) == Bio::Sequence::AA
|
354
|
-
# TODO: has issue.
|
355
358
|
@seq = Bio::Sequence::AA.new seq
|
356
359
|
elsif seq.kind_of? Bio::Sequence::AA
|
357
360
|
@seq = seq
|
@@ -361,8 +364,149 @@ module Bio
|
|
361
364
|
else
|
362
365
|
raise ArgumentError, "sequence must be an AA sequence"
|
363
366
|
end
|
367
|
+
|
368
|
+
self.class.class_eval do
|
369
|
+
include(if mode == :remote then Remote else Local end)
|
370
|
+
end
|
371
|
+
end
|
372
|
+
|
373
|
+
module Remote
|
374
|
+
PROTPARAM_URL = 'http://web.expasy.org/cgi-bin/protparam/protparam'
|
375
|
+
|
376
|
+
attr_accessor :result
|
377
|
+
|
378
|
+
def self.cast_method(type)
|
379
|
+
case type.to_s
|
380
|
+
when "Fixnum"
|
381
|
+
".to_i"
|
382
|
+
when "Float"
|
383
|
+
".to_f"
|
384
|
+
when "String"
|
385
|
+
""
|
386
|
+
else
|
387
|
+
""
|
388
|
+
end
|
389
|
+
end
|
390
|
+
|
391
|
+
def self.extract_options(*args)
|
392
|
+
# label, class, regex
|
393
|
+
# label, class, regex, lambda
|
394
|
+
# label, lambda
|
395
|
+
label, type, regex, block = [nil, nil, nil, nil]
|
396
|
+
if args.size > 2
|
397
|
+
label = args.shift
|
398
|
+
type = args.shift
|
399
|
+
if args.size > 1
|
400
|
+
regex, block = args
|
401
|
+
elsif args.size > 0
|
402
|
+
regex, block = if args.first.kind_of?(Regexp)
|
403
|
+
[args.first, nil]
|
404
|
+
elsif args.first.respond_to?(:call)
|
405
|
+
[nil, args.first]
|
406
|
+
end
|
407
|
+
end
|
408
|
+
end
|
409
|
+
[label, type, regex, block]
|
410
|
+
end
|
411
|
+
|
412
|
+
def self.rule(*args)
|
413
|
+
(label, type, regex, block) = extract_options(*args)
|
414
|
+
if regex && block
|
415
|
+
self.class_eval <<-METHOD
|
416
|
+
METHOD
|
417
|
+
elsif regex && !block
|
418
|
+
self.class_eval <<-METHOD
|
419
|
+
def #{label}
|
420
|
+
response = self.request
|
421
|
+
matched = %r/#{regex}/.match(response)
|
422
|
+
if matched.size > 1
|
423
|
+
matched[1]#{cast_method(type)}
|
424
|
+
else
|
425
|
+
nil
|
426
|
+
end
|
427
|
+
end
|
428
|
+
METHOD
|
429
|
+
elsif !regex && block
|
430
|
+
wrapped_block = Proc.new {|*method_args|
|
431
|
+
response = self.request
|
432
|
+
method_args.unshift response
|
433
|
+
block.call(method_args)
|
434
|
+
}
|
435
|
+
self.send(:define_method, label, &wrapped_block)
|
436
|
+
else
|
437
|
+
raise ArgumentError,
|
438
|
+
"Invalid arguments.rule(:label, :type, :regex) or rule(:label, :type, :lambda)"
|
439
|
+
end
|
440
|
+
end
|
441
|
+
|
442
|
+
rule :num_neg, Fixnum, %r/<B>Total number of negatively charged residues.*?<\/B>\s*(\d*)/
|
443
|
+
rule :num_pos, Fixnum, %r/<B>Total number of positively charged residues.*?<\/B>\s*(\d*)/
|
444
|
+
rule :amino_acid_number, Fixnum, %r/<B>Number of amino acids:<\/B> (\d+)/
|
445
|
+
rule :total_atoms, Fixnum, %r/<B>Total number of atoms:<\/B>\s*(\d*)/
|
446
|
+
rule :num_carbon, Fixnum, %r/Carbon\s+C\s+(\d+)/
|
447
|
+
rule :num_hydrogen, Fixnum, %r/Hydrogen\s+H\s+(\d+)/
|
448
|
+
rule :num_nitro, Fixnum, %r/Nitrogen\s+N\s+(\d+)/
|
449
|
+
rule :num_oxygen, Fixnum, %r/Oxygen\s+O\s+(\d+)/
|
450
|
+
rule :num_sulphur, Fixnum, %r/Sulfur\s+S\s+(\d+)/
|
451
|
+
rule :molecular_weight, Float, %r/<B>Molecular weight:<\/B> (\d*\.{0,1}\d*)/
|
452
|
+
rule :theoretical_pI, Float,%r/<B>Theoretical pI:<\/B> (-{0,1}\d*\.{0,1}\d*)/
|
453
|
+
rule :half_life, Float, %r/The estimated half-life is.*?(-{0,1}\d*\.{0,1}\d*)\s*hours \(mammalian reticulocytes, in vitro\)/
|
454
|
+
rule :instability_index, Float, %r/The instability index \(II\) is computed to be (-{0,1}\d*\.{0,1}\d*)/
|
455
|
+
rule :stability, String, %r/This classifies the protein as\s(\w+)\./
|
456
|
+
rule :aliphatic_index, Float, %r/<B>Aliphatic index:<\/B>\s*(-{0,1}\d*\.{0,1}\d*)/
|
457
|
+
rule :gravy, Float, %r/<B>Grand average of hydropathicity \(GRAVY\):<\/B>\s*(-{0,1}\d*\.{0,1}\d*)/
|
458
|
+
|
459
|
+
rule :half_life, Fixnum, proc {|response, category|
|
460
|
+
category ||= :mammalian
|
461
|
+
category_map = {
|
462
|
+
:mammalian => /\(mammalian\sreticulocytes,\sin\svitro\)/,
|
463
|
+
:yeast => /\(yeast,\sin\svivo\)/,
|
464
|
+
:ecoli => /\(Escherichia\scoli,\sin\svivo\)/
|
465
|
+
}
|
466
|
+
if /The\sestimated\shalf-life\sis:.*?
|
467
|
+
([>\d]+)\shours\s(?=#{category_map[category]})/mx =~ response
|
468
|
+
half_life = $1
|
469
|
+
half_life.gsub!(/>/, '') if half_life.include?('>')
|
470
|
+
(half_life.to_f * 60)
|
471
|
+
else
|
472
|
+
raise "Parse Error!"
|
473
|
+
end
|
474
|
+
}
|
475
|
+
|
476
|
+
rule :aa_comp, Fixnum, proc {|response, aa_code|
|
477
|
+
# Arg (R) 26 6.6%
|
478
|
+
aa_map = Hash[response.
|
479
|
+
scan(/(?:[A-Z][a-z]{2}){0,1}\s\(([A-Z])\)\s*?\d+?\s*?(\d+.\d+)%/).
|
480
|
+
map{|aa,val| [aa.to_sym, val.to_f] }]
|
481
|
+
if aa_code.nil?
|
482
|
+
aa_map
|
483
|
+
else
|
484
|
+
aa_map[aa_code.to_sym]
|
485
|
+
end
|
486
|
+
}
|
487
|
+
|
488
|
+
def stable?
|
489
|
+
(stablity == 'stable')
|
490
|
+
end
|
491
|
+
|
492
|
+
def request
|
493
|
+
@result ||= begin
|
494
|
+
res = Net::HTTP.post_form(URI(PROTPARAM_URL),
|
495
|
+
{'sequence' => @seq.to_s})
|
496
|
+
res.body
|
497
|
+
end
|
498
|
+
end
|
499
|
+
|
500
|
+
def fallback!
|
501
|
+
self.class.class_eval do
|
502
|
+
include Local
|
503
|
+
end
|
364
504
|
end
|
365
505
|
|
506
|
+
end
|
507
|
+
|
508
|
+
module Local
|
509
|
+
|
366
510
|
##
|
367
511
|
#
|
368
512
|
# Return the number of negative amino acids (D and E) in an AA sequence.
|
@@ -769,49 +913,50 @@ module Bio
|
|
769
913
|
def round(num, ndigits=0)
|
770
914
|
(num * (10 ** ndigits)).round().to_f / (10 ** ndigits).to_f
|
771
915
|
end
|
772
|
-
|
773
|
-
# --------------------------------
|
774
|
-
# :section: References
|
775
|
-
#
|
776
|
-
#
|
777
|
-
# 1. Protein Identification and Analysis Tools on the ExPASy Server;
|
778
|
-
# Gasteiger E., Hoogland C., Gattiker A., Duvaud S., Wilkins M.R.,
|
779
|
-
# Appel R.D., Bairoch A.; (In) John M. Walker (ed): The Proteomics
|
780
|
-
# Protocols Handbook, Humana Press (2005). pp. 571-607
|
781
|
-
# 2. Pace, C.N., Vajdos, F., Fee, L., Grimsley, G., and Gray, T. (1995)
|
782
|
-
# How to measure and predict the molar absorption coefficient of a
|
783
|
-
# protein. Protein Sci. 11, 2411-2423.
|
784
|
-
# 3. Edelhoch, H. (1967) Spectroscopic determination of tryptophan and
|
785
|
-
# tyrosine in proteins. Biochemistry 6, 1948-1954.
|
786
|
-
# 4. Gill, S.C. and von Hippel, P.H. (1989) Calculation of protein
|
787
|
-
# extinction coefficients from amino acid sequence data. Anal. Biochem.
|
788
|
-
# 182:319-326(1989).
|
789
|
-
# 5. Bachmair, A., Finley, D. and Varshavsky, A. (1986) In vivo half-life
|
790
|
-
# of a protein is a function of its amino-terminal residue. Science 234,
|
791
|
-
# 179-186.
|
792
|
-
# 6. Gonda, D.K., Bachmair, A., Wunning, I., Tobias, J.W., Lane, W.S. and
|
793
|
-
# Varshavsky, A. J. (1989) Universality and structure of the N-end rule.
|
794
|
-
# J. Biol. Chem. 264, 16700-16712.
|
795
|
-
# 7. Tobias, J.W., Shrader, T.E., Rocap, G. and Varshavsky, A. (1991) The
|
796
|
-
# N-end rule in bacteria. Science 254, 1374-1377.
|
797
|
-
# 8. Ciechanover, A. and Schwartz, A.L. (1989) How are substrates
|
798
|
-
# recognized by the ubiquitin-mediated proteolytic system? Trends Biochem.
|
799
|
-
# Sci. 14, 483-488.
|
800
|
-
# 9. Varshavsky, A. (1997) The N-end rule pathway of protein degradation.
|
801
|
-
# Genes Cells 2, 13-28.
|
802
|
-
# 10. Guruprasad, K., Reddy, B.V.B. and Pandit, M.W. (1990) Correlation
|
803
|
-
# between stability of a protein and its dipeptide composition: a novel
|
804
|
-
# approach for predicting in vivo stability of a protein from its primary
|
805
|
-
# sequence. Protein Eng. 4,155-161.
|
806
|
-
# 11. Ikai, A.J. (1980) Thermostability and aliphatic index of globular
|
807
|
-
# proteins. J. Biochem. 88, 1895-1898.
|
808
|
-
# 12. Kyte, J. and Doolittle, R.F. (1982) A simple method for displaying
|
809
|
-
# the hydropathic character of a protein. J. Mol. Biol. 157, 105-132.
|
810
|
-
# 13. Bjellqvist, B.,Hughes, G.J., Pasquali, Ch., Paquet, N., Ravier, F.,
|
811
|
-
# Sanchez, J.-Ch., Frutiger, S. & Hochstrasser, D.F. The focusing positions
|
812
|
-
# of polypeptides in immobilized pH gradients can be predicted from their
|
813
|
-
# amino acid sequences. Electrophoresis 1993, 14, 1023-1031.
|
814
|
-
#
|
815
|
-
# --------------------------------
|
816
916
|
end
|
917
|
+
|
918
|
+
# --------------------------------
|
919
|
+
# :section: References
|
920
|
+
#
|
921
|
+
#
|
922
|
+
# 1. Protein Identification and Analysis Tools on the ExPASy Server;
|
923
|
+
# Gasteiger E., Hoogland C., Gattiker A., Duvaud S., Wilkins M.R.,
|
924
|
+
# Appel R.D., Bairoch A.; (In) John M. Walker (ed): The Proteomics
|
925
|
+
# Protocols Handbook, Humana Press (2005). pp. 571-607
|
926
|
+
# 2. Pace, C.N., Vajdos, F., Fee, L., Grimsley, G., and Gray, T. (1995)
|
927
|
+
# How to measure and predict the molar absorption coefficient of a
|
928
|
+
# protein. Protein Sci. 11, 2411-2423.
|
929
|
+
# 3. Edelhoch, H. (1967) Spectroscopic determination of tryptophan and
|
930
|
+
# tyrosine in proteins. Biochemistry 6, 1948-1954.
|
931
|
+
# 4. Gill, S.C. and von Hippel, P.H. (1989) Calculation of protein
|
932
|
+
# extinction coefficients from amino acid sequence data. Anal. Biochem.
|
933
|
+
# 182:319-326(1989).
|
934
|
+
# 5. Bachmair, A., Finley, D. and Varshavsky, A. (1986) In vivo half-life
|
935
|
+
# of a protein is a function of its amino-terminal residue. Science 234,
|
936
|
+
# 179-186.
|
937
|
+
# 6. Gonda, D.K., Bachmair, A., Wunning, I., Tobias, J.W., Lane, W.S. and
|
938
|
+
# Varshavsky, A. J. (1989) Universality and structure of the N-end rule.
|
939
|
+
# J. Biol. Chem. 264, 16700-16712.
|
940
|
+
# 7. Tobias, J.W., Shrader, T.E., Rocap, G. and Varshavsky, A. (1991) The
|
941
|
+
# N-end rule in bacteria. Science 254, 1374-1377.
|
942
|
+
# 8. Ciechanover, A. and Schwartz, A.L. (1989) How are substrates
|
943
|
+
# recognized by the ubiquitin-mediated proteolytic system? Trends Biochem.
|
944
|
+
# Sci. 14, 483-488.
|
945
|
+
# 9. Varshavsky, A. (1997) The N-end rule pathway of protein degradation.
|
946
|
+
# Genes Cells 2, 13-28.
|
947
|
+
# 10. Guruprasad, K., Reddy, B.V.B. and Pandit, M.W. (1990) Correlation
|
948
|
+
# between stability of a protein and its dipeptide composition: a novel
|
949
|
+
# approach for predicting in vivo stability of a protein from its primary
|
950
|
+
# sequence. Protein Eng. 4,155-161.
|
951
|
+
# 11. Ikai, A.J. (1980) Thermostability and aliphatic index of globular
|
952
|
+
# proteins. J. Biochem. 88, 1895-1898.
|
953
|
+
# 12. Kyte, J. and Doolittle, R.F. (1982) A simple method for displaying
|
954
|
+
# the hydropathic character of a protein. J. Mol. Biol. 157, 105-132.
|
955
|
+
# 13. Bjellqvist, B.,Hughes, G.J., Pasquali, Ch., Paquet, N., Ravier, F.,
|
956
|
+
# Sanchez, J.-Ch., Frutiger, S. & Hochstrasser, D.F. The focusing positions
|
957
|
+
# of polypeptides in immobilized pH gradients can be predicted from their
|
958
|
+
# amino acid sequences. Electrophoresis 1993, 14, 1023-1031.
|
959
|
+
#
|
960
|
+
# --------------------------------
|
961
|
+
end
|
817
962
|
end
|
data/test/test_bio-protparam.rb
CHANGED
@@ -17,7 +17,11 @@ module Bio
|
|
17
17
|
def setup
|
18
18
|
data = File.read(File.join('test', 'data', 'uniprot', 'p53_human.uniprot'))
|
19
19
|
uniprot = Bio::UniProt.new(data)
|
20
|
-
|
20
|
+
if ENV['PROTPARAM_TEST_REMOTE']
|
21
|
+
@obj = Bio::Protparam.new(uniprot.seq, :remote)
|
22
|
+
else
|
23
|
+
@obj = Bio::Protparam.new(uniprot.seq, :local)
|
24
|
+
end
|
21
25
|
end
|
22
26
|
|
23
27
|
def test_num_neg
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-protparam
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-12-
|
12
|
+
date: 2012-12-27 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bio
|
@@ -84,14 +84,12 @@ extensions: []
|
|
84
84
|
extra_rdoc_files:
|
85
85
|
- LICENSE.txt
|
86
86
|
- README.md
|
87
|
-
- README.rdoc
|
88
87
|
files:
|
89
88
|
- .document
|
90
89
|
- .travis.yml
|
91
90
|
- Gemfile
|
92
91
|
- LICENSE.txt
|
93
92
|
- README.md
|
94
|
-
- README.rdoc
|
95
93
|
- Rakefile
|
96
94
|
- VERSION
|
97
95
|
- lib/bio-protparam.rb
|
@@ -114,7 +112,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
114
112
|
version: '0'
|
115
113
|
segments:
|
116
114
|
- 0
|
117
|
-
hash:
|
115
|
+
hash: 1639556585709602379
|
118
116
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
119
117
|
none: false
|
120
118
|
requirements:
|
data/README.rdoc
DELETED
@@ -1,48 +0,0 @@
|
|
1
|
-
= bio-protparam
|
2
|
-
|
3
|
-
{<img
|
4
|
-
src="https://secure.travis-ci.org/hryk/bioruby-protparam.png"
|
5
|
-
/>}[http://travis-ci.org/#!/hryk/bioruby-protparam]
|
6
|
-
|
7
|
-
Full description goes here
|
8
|
-
|
9
|
-
Note: this software is under active development!
|
10
|
-
|
11
|
-
== Installation
|
12
|
-
|
13
|
-
gem install bio-protparam
|
14
|
-
|
15
|
-
== Usage
|
16
|
-
|
17
|
-
== Developers
|
18
|
-
|
19
|
-
To use the library
|
20
|
-
|
21
|
-
require 'bio-protparam'
|
22
|
-
|
23
|
-
The API doc is online. For more code examples see also the test files in
|
24
|
-
the source tree.
|
25
|
-
|
26
|
-
== Project home page
|
27
|
-
|
28
|
-
Information on the source tree, documentation, issues and how to contribute, see
|
29
|
-
|
30
|
-
http://github.com/hryk/bioruby-protparam
|
31
|
-
|
32
|
-
The BioRuby community is on IRC server: irc.freenode.org, channel: #bioruby.
|
33
|
-
|
34
|
-
== Cite
|
35
|
-
|
36
|
-
If you use this software, please cite one of
|
37
|
-
|
38
|
-
* [BioRuby: bioinformatics software for the Ruby programming language](http://dx.doi.org/10.1093/bioinformatics/btq475)
|
39
|
-
* [Biogem: an effective tool-based approach for scaling up open source software development in bioinformatics](http://dx.doi.org/10.1093/bioinformatics/bts080)
|
40
|
-
|
41
|
-
== Biogems.info
|
42
|
-
|
43
|
-
This Biogem is published at http://biogems.info/index.html#bio-protparam
|
44
|
-
|
45
|
-
== Copyright
|
46
|
-
|
47
|
-
Copyright (c) 2012 hryk. See LICENSE.txt for further details.
|
48
|
-
|