bio-protparam 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -17,6 +17,7 @@ parameters instead of throwing query to Expasy protparam tool.
17
17
  ## Usage
18
18
 
19
19
  ```ruby
20
+ require 'bio'
20
21
  require 'bio-protparam'
21
22
 
22
23
  protparam = Bio::Protparam.new("MYNNYNLCHIRTINWEEIITGPSAMYSYVY...")
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.0
1
+ 0.2.0
@@ -1,13 +1,16 @@
1
1
  # encoding: utf-8
2
2
  #
3
3
  #
4
- # = bio/appl/protparam.rb - A Class to Calculate Protein Parameters.
4
+ # = bio/util/protparam.rb - A Class to Calculate Protein Parameters.
5
5
  #
6
6
  # Copyright:: Copyright (C) 2012
7
7
  # Hiroyuki Nakamura <hiroyuki@1vq9.com>
8
8
  # License:: The Ruby License
9
9
  #
10
+
10
11
  require 'rational'
12
+ require 'net/http'
13
+ require 'uri'
11
14
 
12
15
  module Bio
13
16
  ##
@@ -153,7 +156,7 @@ module Bio
153
156
  }
154
157
  }
155
158
 
156
- # Estemated half-life of N-terminal residue of a protein.
159
+ # Estemated half-life (minutes) of N-terminal residue of a protein.
157
160
  HALFLIFE = {
158
161
  :ecoli => {
159
162
  :I => 600,
@@ -349,9 +352,9 @@ module Bio
349
352
  }
350
353
  }
351
354
 
352
- def initialize(seq)
355
+ def initialize(seq, mode=:local)
356
+
353
357
  if seq.kind_of?(String) && Bio::Sequence.guess(seq) == Bio::Sequence::AA
354
- # TODO: has issue.
355
358
  @seq = Bio::Sequence::AA.new seq
356
359
  elsif seq.kind_of? Bio::Sequence::AA
357
360
  @seq = seq
@@ -361,8 +364,149 @@ module Bio
361
364
  else
362
365
  raise ArgumentError, "sequence must be an AA sequence"
363
366
  end
367
+
368
+ self.class.class_eval do
369
+ include(if mode == :remote then Remote else Local end)
370
+ end
371
+ end
372
+
373
+ module Remote
374
+ PROTPARAM_URL = 'http://web.expasy.org/cgi-bin/protparam/protparam'
375
+
376
+ attr_accessor :result
377
+
378
+ def self.cast_method(type)
379
+ case type.to_s
380
+ when "Fixnum"
381
+ ".to_i"
382
+ when "Float"
383
+ ".to_f"
384
+ when "String"
385
+ ""
386
+ else
387
+ ""
388
+ end
389
+ end
390
+
391
+ def self.extract_options(*args)
392
+ # label, class, regex
393
+ # label, class, regex, lambda
394
+ # label, lambda
395
+ label, type, regex, block = [nil, nil, nil, nil]
396
+ if args.size > 2
397
+ label = args.shift
398
+ type = args.shift
399
+ if args.size > 1
400
+ regex, block = args
401
+ elsif args.size > 0
402
+ regex, block = if args.first.kind_of?(Regexp)
403
+ [args.first, nil]
404
+ elsif args.first.respond_to?(:call)
405
+ [nil, args.first]
406
+ end
407
+ end
408
+ end
409
+ [label, type, regex, block]
410
+ end
411
+
412
+ def self.rule(*args)
413
+ (label, type, regex, block) = extract_options(*args)
414
+ if regex && block
415
+ self.class_eval <<-METHOD
416
+ METHOD
417
+ elsif regex && !block
418
+ self.class_eval <<-METHOD
419
+ def #{label}
420
+ response = self.request
421
+ matched = %r/#{regex}/.match(response)
422
+ if matched.size > 1
423
+ matched[1]#{cast_method(type)}
424
+ else
425
+ nil
426
+ end
427
+ end
428
+ METHOD
429
+ elsif !regex && block
430
+ wrapped_block = Proc.new {|*method_args|
431
+ response = self.request
432
+ method_args.unshift response
433
+ block.call(method_args)
434
+ }
435
+ self.send(:define_method, label, &wrapped_block)
436
+ else
437
+ raise ArgumentError,
438
+ "Invalid arguments.rule(:label, :type, :regex) or rule(:label, :type, :lambda)"
439
+ end
440
+ end
441
+
442
+ rule :num_neg, Fixnum, %r/<B>Total number of negatively charged residues.*?<\/B>\s*(\d*)/
443
+ rule :num_pos, Fixnum, %r/<B>Total number of positively charged residues.*?<\/B>\s*(\d*)/
444
+ rule :amino_acid_number, Fixnum, %r/<B>Number of amino acids:<\/B> (\d+)/
445
+ rule :total_atoms, Fixnum, %r/<B>Total number of atoms:<\/B>\s*(\d*)/
446
+ rule :num_carbon, Fixnum, %r/Carbon\s+C\s+(\d+)/
447
+ rule :num_hydrogen, Fixnum, %r/Hydrogen\s+H\s+(\d+)/
448
+ rule :num_nitro, Fixnum, %r/Nitrogen\s+N\s+(\d+)/
449
+ rule :num_oxygen, Fixnum, %r/Oxygen\s+O\s+(\d+)/
450
+ rule :num_sulphur, Fixnum, %r/Sulfur\s+S\s+(\d+)/
451
+ rule :molecular_weight, Float, %r/<B>Molecular weight:<\/B> (\d*\.{0,1}\d*)/
452
+ rule :theoretical_pI, Float,%r/<B>Theoretical pI:<\/B> (-{0,1}\d*\.{0,1}\d*)/
453
+ rule :half_life, Float, %r/The estimated half-life is.*?(-{0,1}\d*\.{0,1}\d*)\s*hours \(mammalian reticulocytes, in vitro\)/
454
+ rule :instability_index, Float, %r/The instability index \(II\) is computed to be (-{0,1}\d*\.{0,1}\d*)/
455
+ rule :stability, String, %r/This classifies the protein as\s(\w+)\./
456
+ rule :aliphatic_index, Float, %r/<B>Aliphatic index:<\/B>\s*(-{0,1}\d*\.{0,1}\d*)/
457
+ rule :gravy, Float, %r/<B>Grand average of hydropathicity \(GRAVY\):<\/B>\s*(-{0,1}\d*\.{0,1}\d*)/
458
+
459
+ rule :half_life, Fixnum, proc {|response, category|
460
+ category ||= :mammalian
461
+ category_map = {
462
+ :mammalian => /\(mammalian\sreticulocytes,\sin\svitro\)/,
463
+ :yeast => /\(yeast,\sin\svivo\)/,
464
+ :ecoli => /\(Escherichia\scoli,\sin\svivo\)/
465
+ }
466
+ if /The\sestimated\shalf-life\sis:.*?
467
+ ([>\d]+)\shours\s(?=#{category_map[category]})/mx =~ response
468
+ half_life = $1
469
+ half_life.gsub!(/>/, '') if half_life.include?('>')
470
+ (half_life.to_f * 60)
471
+ else
472
+ raise "Parse Error!"
473
+ end
474
+ }
475
+
476
+ rule :aa_comp, Fixnum, proc {|response, aa_code|
477
+ # Arg (R) 26 6.6%
478
+ aa_map = Hash[response.
479
+ scan(/(?:[A-Z][a-z]{2}){0,1}\s\(([A-Z])\)\s*?\d+?\s*?(\d+.\d+)%/).
480
+ map{|aa,val| [aa.to_sym, val.to_f] }]
481
+ if aa_code.nil?
482
+ aa_map
483
+ else
484
+ aa_map[aa_code.to_sym]
485
+ end
486
+ }
487
+
488
+ def stable?
489
+ (stablity == 'stable')
490
+ end
491
+
492
+ def request
493
+ @result ||= begin
494
+ res = Net::HTTP.post_form(URI(PROTPARAM_URL),
495
+ {'sequence' => @seq.to_s})
496
+ res.body
497
+ end
498
+ end
499
+
500
+ def fallback!
501
+ self.class.class_eval do
502
+ include Local
503
+ end
364
504
  end
365
505
 
506
+ end
507
+
508
+ module Local
509
+
366
510
  ##
367
511
  #
368
512
  # Return the number of negative amino acids (D and E) in an AA sequence.
@@ -769,49 +913,50 @@ module Bio
769
913
  def round(num, ndigits=0)
770
914
  (num * (10 ** ndigits)).round().to_f / (10 ** ndigits).to_f
771
915
  end
772
-
773
- # --------------------------------
774
- # :section: References
775
- #
776
- #
777
- # 1. Protein Identification and Analysis Tools on the ExPASy Server;
778
- # Gasteiger E., Hoogland C., Gattiker A., Duvaud S., Wilkins M.R.,
779
- # Appel R.D., Bairoch A.; (In) John M. Walker (ed): The Proteomics
780
- # Protocols Handbook, Humana Press (2005). pp. 571-607
781
- # 2. Pace, C.N., Vajdos, F., Fee, L., Grimsley, G., and Gray, T. (1995)
782
- # How to measure and predict the molar absorption coefficient of a
783
- # protein. Protein Sci. 11, 2411-2423.
784
- # 3. Edelhoch, H. (1967) Spectroscopic determination of tryptophan and
785
- # tyrosine in proteins. Biochemistry 6, 1948-1954.
786
- # 4. Gill, S.C. and von Hippel, P.H. (1989) Calculation of protein
787
- # extinction coefficients from amino acid sequence data. Anal. Biochem.
788
- # 182:319-326(1989).
789
- # 5. Bachmair, A., Finley, D. and Varshavsky, A. (1986) In vivo half-life
790
- # of a protein is a function of its amino-terminal residue. Science 234,
791
- # 179-186.
792
- # 6. Gonda, D.K., Bachmair, A., Wunning, I., Tobias, J.W., Lane, W.S. and
793
- # Varshavsky, A. J. (1989) Universality and structure of the N-end rule.
794
- # J. Biol. Chem. 264, 16700-16712.
795
- # 7. Tobias, J.W., Shrader, T.E., Rocap, G. and Varshavsky, A. (1991) The
796
- # N-end rule in bacteria. Science 254, 1374-1377.
797
- # 8. Ciechanover, A. and Schwartz, A.L. (1989) How are substrates
798
- # recognized by the ubiquitin-mediated proteolytic system? Trends Biochem.
799
- # Sci. 14, 483-488.
800
- # 9. Varshavsky, A. (1997) The N-end rule pathway of protein degradation.
801
- # Genes Cells 2, 13-28.
802
- # 10. Guruprasad, K., Reddy, B.V.B. and Pandit, M.W. (1990) Correlation
803
- # between stability of a protein and its dipeptide composition: a novel
804
- # approach for predicting in vivo stability of a protein from its primary
805
- # sequence. Protein Eng. 4,155-161.
806
- # 11. Ikai, A.J. (1980) Thermostability and aliphatic index of globular
807
- # proteins. J. Biochem. 88, 1895-1898.
808
- # 12. Kyte, J. and Doolittle, R.F. (1982) A simple method for displaying
809
- # the hydropathic character of a protein. J. Mol. Biol. 157, 105-132.
810
- # 13. Bjellqvist, B.,Hughes, G.J., Pasquali, Ch., Paquet, N., Ravier, F.,
811
- # Sanchez, J.-Ch., Frutiger, S. & Hochstrasser, D.F. The focusing positions
812
- # of polypeptides in immobilized pH gradients can be predicted from their
813
- # amino acid sequences. Electrophoresis 1993, 14, 1023-1031.
814
- #
815
- # --------------------------------
816
916
  end
917
+
918
+ # --------------------------------
919
+ # :section: References
920
+ #
921
+ #
922
+ # 1. Protein Identification and Analysis Tools on the ExPASy Server;
923
+ # Gasteiger E., Hoogland C., Gattiker A., Duvaud S., Wilkins M.R.,
924
+ # Appel R.D., Bairoch A.; (In) John M. Walker (ed): The Proteomics
925
+ # Protocols Handbook, Humana Press (2005). pp. 571-607
926
+ # 2. Pace, C.N., Vajdos, F., Fee, L., Grimsley, G., and Gray, T. (1995)
927
+ # How to measure and predict the molar absorption coefficient of a
928
+ # protein. Protein Sci. 11, 2411-2423.
929
+ # 3. Edelhoch, H. (1967) Spectroscopic determination of tryptophan and
930
+ # tyrosine in proteins. Biochemistry 6, 1948-1954.
931
+ # 4. Gill, S.C. and von Hippel, P.H. (1989) Calculation of protein
932
+ # extinction coefficients from amino acid sequence data. Anal. Biochem.
933
+ # 182:319-326(1989).
934
+ # 5. Bachmair, A., Finley, D. and Varshavsky, A. (1986) In vivo half-life
935
+ # of a protein is a function of its amino-terminal residue. Science 234,
936
+ # 179-186.
937
+ # 6. Gonda, D.K., Bachmair, A., Wunning, I., Tobias, J.W., Lane, W.S. and
938
+ # Varshavsky, A. J. (1989) Universality and structure of the N-end rule.
939
+ # J. Biol. Chem. 264, 16700-16712.
940
+ # 7. Tobias, J.W., Shrader, T.E., Rocap, G. and Varshavsky, A. (1991) The
941
+ # N-end rule in bacteria. Science 254, 1374-1377.
942
+ # 8. Ciechanover, A. and Schwartz, A.L. (1989) How are substrates
943
+ # recognized by the ubiquitin-mediated proteolytic system? Trends Biochem.
944
+ # Sci. 14, 483-488.
945
+ # 9. Varshavsky, A. (1997) The N-end rule pathway of protein degradation.
946
+ # Genes Cells 2, 13-28.
947
+ # 10. Guruprasad, K., Reddy, B.V.B. and Pandit, M.W. (1990) Correlation
948
+ # between stability of a protein and its dipeptide composition: a novel
949
+ # approach for predicting in vivo stability of a protein from its primary
950
+ # sequence. Protein Eng. 4,155-161.
951
+ # 11. Ikai, A.J. (1980) Thermostability and aliphatic index of globular
952
+ # proteins. J. Biochem. 88, 1895-1898.
953
+ # 12. Kyte, J. and Doolittle, R.F. (1982) A simple method for displaying
954
+ # the hydropathic character of a protein. J. Mol. Biol. 157, 105-132.
955
+ # 13. Bjellqvist, B.,Hughes, G.J., Pasquali, Ch., Paquet, N., Ravier, F.,
956
+ # Sanchez, J.-Ch., Frutiger, S. & Hochstrasser, D.F. The focusing positions
957
+ # of polypeptides in immobilized pH gradients can be predicted from their
958
+ # amino acid sequences. Electrophoresis 1993, 14, 1023-1031.
959
+ #
960
+ # --------------------------------
961
+ end
817
962
  end
@@ -17,7 +17,11 @@ module Bio
17
17
  def setup
18
18
  data = File.read(File.join('test', 'data', 'uniprot', 'p53_human.uniprot'))
19
19
  uniprot = Bio::UniProt.new(data)
20
- @obj = Bio::Protparam.new(uniprot.seq)
20
+ if ENV['PROTPARAM_TEST_REMOTE']
21
+ @obj = Bio::Protparam.new(uniprot.seq, :remote)
22
+ else
23
+ @obj = Bio::Protparam.new(uniprot.seq, :local)
24
+ end
21
25
  end
22
26
 
23
27
  def test_num_neg
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-protparam
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-12-17 00:00:00.000000000 Z
12
+ date: 2012-12-27 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bio
@@ -84,14 +84,12 @@ extensions: []
84
84
  extra_rdoc_files:
85
85
  - LICENSE.txt
86
86
  - README.md
87
- - README.rdoc
88
87
  files:
89
88
  - .document
90
89
  - .travis.yml
91
90
  - Gemfile
92
91
  - LICENSE.txt
93
92
  - README.md
94
- - README.rdoc
95
93
  - Rakefile
96
94
  - VERSION
97
95
  - lib/bio-protparam.rb
@@ -114,7 +112,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
114
112
  version: '0'
115
113
  segments:
116
114
  - 0
117
- hash: 2302094890483272954
115
+ hash: 1639556585709602379
118
116
  required_rubygems_version: !ruby/object:Gem::Requirement
119
117
  none: false
120
118
  requirements:
@@ -1,48 +0,0 @@
1
- = bio-protparam
2
-
3
- {<img
4
- src="https://secure.travis-ci.org/hryk/bioruby-protparam.png"
5
- />}[http://travis-ci.org/#!/hryk/bioruby-protparam]
6
-
7
- Full description goes here
8
-
9
- Note: this software is under active development!
10
-
11
- == Installation
12
-
13
- gem install bio-protparam
14
-
15
- == Usage
16
-
17
- == Developers
18
-
19
- To use the library
20
-
21
- require 'bio-protparam'
22
-
23
- The API doc is online. For more code examples see also the test files in
24
- the source tree.
25
-
26
- == Project home page
27
-
28
- Information on the source tree, documentation, issues and how to contribute, see
29
-
30
- http://github.com/hryk/bioruby-protparam
31
-
32
- The BioRuby community is on IRC server: irc.freenode.org, channel: #bioruby.
33
-
34
- == Cite
35
-
36
- If you use this software, please cite one of
37
-
38
- * [BioRuby: bioinformatics software for the Ruby programming language](http://dx.doi.org/10.1093/bioinformatics/btq475)
39
- * [Biogem: an effective tool-based approach for scaling up open source software development in bioinformatics](http://dx.doi.org/10.1093/bioinformatics/bts080)
40
-
41
- == Biogems.info
42
-
43
- This Biogem is published at http://biogems.info/index.html#bio-protparam
44
-
45
- == Copyright
46
-
47
- Copyright (c) 2012 hryk. See LICENSE.txt for further details.
48
-