bio-protparam 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -17,6 +17,7 @@ parameters instead of throwing query to Expasy protparam tool.
17
17
  ## Usage
18
18
 
19
19
  ```ruby
20
+ require 'bio'
20
21
  require 'bio-protparam'
21
22
 
22
23
  protparam = Bio::Protparam.new("MYNNYNLCHIRTINWEEIITGPSAMYSYVY...")
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.0
1
+ 0.2.0
@@ -1,13 +1,16 @@
1
1
  # encoding: utf-8
2
2
  #
3
3
  #
4
- # = bio/appl/protparam.rb - A Class to Calculate Protein Parameters.
4
+ # = bio/util/protparam.rb - A Class to Calculate Protein Parameters.
5
5
  #
6
6
  # Copyright:: Copyright (C) 2012
7
7
  # Hiroyuki Nakamura <hiroyuki@1vq9.com>
8
8
  # License:: The Ruby License
9
9
  #
10
+
10
11
  require 'rational'
12
+ require 'net/http'
13
+ require 'uri'
11
14
 
12
15
  module Bio
13
16
  ##
@@ -153,7 +156,7 @@ module Bio
153
156
  }
154
157
  }
155
158
 
156
- # Estemated half-life of N-terminal residue of a protein.
159
+ # Estemated half-life (minutes) of N-terminal residue of a protein.
157
160
  HALFLIFE = {
158
161
  :ecoli => {
159
162
  :I => 600,
@@ -349,9 +352,9 @@ module Bio
349
352
  }
350
353
  }
351
354
 
352
- def initialize(seq)
355
+ def initialize(seq, mode=:local)
356
+
353
357
  if seq.kind_of?(String) && Bio::Sequence.guess(seq) == Bio::Sequence::AA
354
- # TODO: has issue.
355
358
  @seq = Bio::Sequence::AA.new seq
356
359
  elsif seq.kind_of? Bio::Sequence::AA
357
360
  @seq = seq
@@ -361,8 +364,149 @@ module Bio
361
364
  else
362
365
  raise ArgumentError, "sequence must be an AA sequence"
363
366
  end
367
+
368
+ self.class.class_eval do
369
+ include(if mode == :remote then Remote else Local end)
370
+ end
371
+ end
372
+
373
+ module Remote
374
+ PROTPARAM_URL = 'http://web.expasy.org/cgi-bin/protparam/protparam'
375
+
376
+ attr_accessor :result
377
+
378
+ def self.cast_method(type)
379
+ case type.to_s
380
+ when "Fixnum"
381
+ ".to_i"
382
+ when "Float"
383
+ ".to_f"
384
+ when "String"
385
+ ""
386
+ else
387
+ ""
388
+ end
389
+ end
390
+
391
+ def self.extract_options(*args)
392
+ # label, class, regex
393
+ # label, class, regex, lambda
394
+ # label, lambda
395
+ label, type, regex, block = [nil, nil, nil, nil]
396
+ if args.size > 2
397
+ label = args.shift
398
+ type = args.shift
399
+ if args.size > 1
400
+ regex, block = args
401
+ elsif args.size > 0
402
+ regex, block = if args.first.kind_of?(Regexp)
403
+ [args.first, nil]
404
+ elsif args.first.respond_to?(:call)
405
+ [nil, args.first]
406
+ end
407
+ end
408
+ end
409
+ [label, type, regex, block]
410
+ end
411
+
412
+ def self.rule(*args)
413
+ (label, type, regex, block) = extract_options(*args)
414
+ if regex && block
415
+ self.class_eval <<-METHOD
416
+ METHOD
417
+ elsif regex && !block
418
+ self.class_eval <<-METHOD
419
+ def #{label}
420
+ response = self.request
421
+ matched = %r/#{regex}/.match(response)
422
+ if matched.size > 1
423
+ matched[1]#{cast_method(type)}
424
+ else
425
+ nil
426
+ end
427
+ end
428
+ METHOD
429
+ elsif !regex && block
430
+ wrapped_block = Proc.new {|*method_args|
431
+ response = self.request
432
+ method_args.unshift response
433
+ block.call(method_args)
434
+ }
435
+ self.send(:define_method, label, &wrapped_block)
436
+ else
437
+ raise ArgumentError,
438
+ "Invalid arguments.rule(:label, :type, :regex) or rule(:label, :type, :lambda)"
439
+ end
440
+ end
441
+
442
+ rule :num_neg, Fixnum, %r/<B>Total number of negatively charged residues.*?<\/B>\s*(\d*)/
443
+ rule :num_pos, Fixnum, %r/<B>Total number of positively charged residues.*?<\/B>\s*(\d*)/
444
+ rule :amino_acid_number, Fixnum, %r/<B>Number of amino acids:<\/B> (\d+)/
445
+ rule :total_atoms, Fixnum, %r/<B>Total number of atoms:<\/B>\s*(\d*)/
446
+ rule :num_carbon, Fixnum, %r/Carbon\s+C\s+(\d+)/
447
+ rule :num_hydrogen, Fixnum, %r/Hydrogen\s+H\s+(\d+)/
448
+ rule :num_nitro, Fixnum, %r/Nitrogen\s+N\s+(\d+)/
449
+ rule :num_oxygen, Fixnum, %r/Oxygen\s+O\s+(\d+)/
450
+ rule :num_sulphur, Fixnum, %r/Sulfur\s+S\s+(\d+)/
451
+ rule :molecular_weight, Float, %r/<B>Molecular weight:<\/B> (\d*\.{0,1}\d*)/
452
+ rule :theoretical_pI, Float,%r/<B>Theoretical pI:<\/B> (-{0,1}\d*\.{0,1}\d*)/
453
+ rule :half_life, Float, %r/The estimated half-life is.*?(-{0,1}\d*\.{0,1}\d*)\s*hours \(mammalian reticulocytes, in vitro\)/
454
+ rule :instability_index, Float, %r/The instability index \(II\) is computed to be (-{0,1}\d*\.{0,1}\d*)/
455
+ rule :stability, String, %r/This classifies the protein as\s(\w+)\./
456
+ rule :aliphatic_index, Float, %r/<B>Aliphatic index:<\/B>\s*(-{0,1}\d*\.{0,1}\d*)/
457
+ rule :gravy, Float, %r/<B>Grand average of hydropathicity \(GRAVY\):<\/B>\s*(-{0,1}\d*\.{0,1}\d*)/
458
+
459
+ rule :half_life, Fixnum, proc {|response, category|
460
+ category ||= :mammalian
461
+ category_map = {
462
+ :mammalian => /\(mammalian\sreticulocytes,\sin\svitro\)/,
463
+ :yeast => /\(yeast,\sin\svivo\)/,
464
+ :ecoli => /\(Escherichia\scoli,\sin\svivo\)/
465
+ }
466
+ if /The\sestimated\shalf-life\sis:.*?
467
+ ([>\d]+)\shours\s(?=#{category_map[category]})/mx =~ response
468
+ half_life = $1
469
+ half_life.gsub!(/>/, '') if half_life.include?('>')
470
+ (half_life.to_f * 60)
471
+ else
472
+ raise "Parse Error!"
473
+ end
474
+ }
475
+
476
+ rule :aa_comp, Fixnum, proc {|response, aa_code|
477
+ # Arg (R) 26 6.6%
478
+ aa_map = Hash[response.
479
+ scan(/(?:[A-Z][a-z]{2}){0,1}\s\(([A-Z])\)\s*?\d+?\s*?(\d+.\d+)%/).
480
+ map{|aa,val| [aa.to_sym, val.to_f] }]
481
+ if aa_code.nil?
482
+ aa_map
483
+ else
484
+ aa_map[aa_code.to_sym]
485
+ end
486
+ }
487
+
488
+ def stable?
489
+ (stablity == 'stable')
490
+ end
491
+
492
+ def request
493
+ @result ||= begin
494
+ res = Net::HTTP.post_form(URI(PROTPARAM_URL),
495
+ {'sequence' => @seq.to_s})
496
+ res.body
497
+ end
498
+ end
499
+
500
+ def fallback!
501
+ self.class.class_eval do
502
+ include Local
503
+ end
364
504
  end
365
505
 
506
+ end
507
+
508
+ module Local
509
+
366
510
  ##
367
511
  #
368
512
  # Return the number of negative amino acids (D and E) in an AA sequence.
@@ -769,49 +913,50 @@ module Bio
769
913
  def round(num, ndigits=0)
770
914
  (num * (10 ** ndigits)).round().to_f / (10 ** ndigits).to_f
771
915
  end
772
-
773
- # --------------------------------
774
- # :section: References
775
- #
776
- #
777
- # 1. Protein Identification and Analysis Tools on the ExPASy Server;
778
- # Gasteiger E., Hoogland C., Gattiker A., Duvaud S., Wilkins M.R.,
779
- # Appel R.D., Bairoch A.; (In) John M. Walker (ed): The Proteomics
780
- # Protocols Handbook, Humana Press (2005). pp. 571-607
781
- # 2. Pace, C.N., Vajdos, F., Fee, L., Grimsley, G., and Gray, T. (1995)
782
- # How to measure and predict the molar absorption coefficient of a
783
- # protein. Protein Sci. 11, 2411-2423.
784
- # 3. Edelhoch, H. (1967) Spectroscopic determination of tryptophan and
785
- # tyrosine in proteins. Biochemistry 6, 1948-1954.
786
- # 4. Gill, S.C. and von Hippel, P.H. (1989) Calculation of protein
787
- # extinction coefficients from amino acid sequence data. Anal. Biochem.
788
- # 182:319-326(1989).
789
- # 5. Bachmair, A., Finley, D. and Varshavsky, A. (1986) In vivo half-life
790
- # of a protein is a function of its amino-terminal residue. Science 234,
791
- # 179-186.
792
- # 6. Gonda, D.K., Bachmair, A., Wunning, I., Tobias, J.W., Lane, W.S. and
793
- # Varshavsky, A. J. (1989) Universality and structure of the N-end rule.
794
- # J. Biol. Chem. 264, 16700-16712.
795
- # 7. Tobias, J.W., Shrader, T.E., Rocap, G. and Varshavsky, A. (1991) The
796
- # N-end rule in bacteria. Science 254, 1374-1377.
797
- # 8. Ciechanover, A. and Schwartz, A.L. (1989) How are substrates
798
- # recognized by the ubiquitin-mediated proteolytic system? Trends Biochem.
799
- # Sci. 14, 483-488.
800
- # 9. Varshavsky, A. (1997) The N-end rule pathway of protein degradation.
801
- # Genes Cells 2, 13-28.
802
- # 10. Guruprasad, K., Reddy, B.V.B. and Pandit, M.W. (1990) Correlation
803
- # between stability of a protein and its dipeptide composition: a novel
804
- # approach for predicting in vivo stability of a protein from its primary
805
- # sequence. Protein Eng. 4,155-161.
806
- # 11. Ikai, A.J. (1980) Thermostability and aliphatic index of globular
807
- # proteins. J. Biochem. 88, 1895-1898.
808
- # 12. Kyte, J. and Doolittle, R.F. (1982) A simple method for displaying
809
- # the hydropathic character of a protein. J. Mol. Biol. 157, 105-132.
810
- # 13. Bjellqvist, B.,Hughes, G.J., Pasquali, Ch., Paquet, N., Ravier, F.,
811
- # Sanchez, J.-Ch., Frutiger, S. & Hochstrasser, D.F. The focusing positions
812
- # of polypeptides in immobilized pH gradients can be predicted from their
813
- # amino acid sequences. Electrophoresis 1993, 14, 1023-1031.
814
- #
815
- # --------------------------------
816
916
  end
917
+
918
+ # --------------------------------
919
+ # :section: References
920
+ #
921
+ #
922
+ # 1. Protein Identification and Analysis Tools on the ExPASy Server;
923
+ # Gasteiger E., Hoogland C., Gattiker A., Duvaud S., Wilkins M.R.,
924
+ # Appel R.D., Bairoch A.; (In) John M. Walker (ed): The Proteomics
925
+ # Protocols Handbook, Humana Press (2005). pp. 571-607
926
+ # 2. Pace, C.N., Vajdos, F., Fee, L., Grimsley, G., and Gray, T. (1995)
927
+ # How to measure and predict the molar absorption coefficient of a
928
+ # protein. Protein Sci. 11, 2411-2423.
929
+ # 3. Edelhoch, H. (1967) Spectroscopic determination of tryptophan and
930
+ # tyrosine in proteins. Biochemistry 6, 1948-1954.
931
+ # 4. Gill, S.C. and von Hippel, P.H. (1989) Calculation of protein
932
+ # extinction coefficients from amino acid sequence data. Anal. Biochem.
933
+ # 182:319-326(1989).
934
+ # 5. Bachmair, A., Finley, D. and Varshavsky, A. (1986) In vivo half-life
935
+ # of a protein is a function of its amino-terminal residue. Science 234,
936
+ # 179-186.
937
+ # 6. Gonda, D.K., Bachmair, A., Wunning, I., Tobias, J.W., Lane, W.S. and
938
+ # Varshavsky, A. J. (1989) Universality and structure of the N-end rule.
939
+ # J. Biol. Chem. 264, 16700-16712.
940
+ # 7. Tobias, J.W., Shrader, T.E., Rocap, G. and Varshavsky, A. (1991) The
941
+ # N-end rule in bacteria. Science 254, 1374-1377.
942
+ # 8. Ciechanover, A. and Schwartz, A.L. (1989) How are substrates
943
+ # recognized by the ubiquitin-mediated proteolytic system? Trends Biochem.
944
+ # Sci. 14, 483-488.
945
+ # 9. Varshavsky, A. (1997) The N-end rule pathway of protein degradation.
946
+ # Genes Cells 2, 13-28.
947
+ # 10. Guruprasad, K., Reddy, B.V.B. and Pandit, M.W. (1990) Correlation
948
+ # between stability of a protein and its dipeptide composition: a novel
949
+ # approach for predicting in vivo stability of a protein from its primary
950
+ # sequence. Protein Eng. 4,155-161.
951
+ # 11. Ikai, A.J. (1980) Thermostability and aliphatic index of globular
952
+ # proteins. J. Biochem. 88, 1895-1898.
953
+ # 12. Kyte, J. and Doolittle, R.F. (1982) A simple method for displaying
954
+ # the hydropathic character of a protein. J. Mol. Biol. 157, 105-132.
955
+ # 13. Bjellqvist, B.,Hughes, G.J., Pasquali, Ch., Paquet, N., Ravier, F.,
956
+ # Sanchez, J.-Ch., Frutiger, S. & Hochstrasser, D.F. The focusing positions
957
+ # of polypeptides in immobilized pH gradients can be predicted from their
958
+ # amino acid sequences. Electrophoresis 1993, 14, 1023-1031.
959
+ #
960
+ # --------------------------------
961
+ end
817
962
  end
@@ -17,7 +17,11 @@ module Bio
17
17
  def setup
18
18
  data = File.read(File.join('test', 'data', 'uniprot', 'p53_human.uniprot'))
19
19
  uniprot = Bio::UniProt.new(data)
20
- @obj = Bio::Protparam.new(uniprot.seq)
20
+ if ENV['PROTPARAM_TEST_REMOTE']
21
+ @obj = Bio::Protparam.new(uniprot.seq, :remote)
22
+ else
23
+ @obj = Bio::Protparam.new(uniprot.seq, :local)
24
+ end
21
25
  end
22
26
 
23
27
  def test_num_neg
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-protparam
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-12-17 00:00:00.000000000 Z
12
+ date: 2012-12-27 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bio
@@ -84,14 +84,12 @@ extensions: []
84
84
  extra_rdoc_files:
85
85
  - LICENSE.txt
86
86
  - README.md
87
- - README.rdoc
88
87
  files:
89
88
  - .document
90
89
  - .travis.yml
91
90
  - Gemfile
92
91
  - LICENSE.txt
93
92
  - README.md
94
- - README.rdoc
95
93
  - Rakefile
96
94
  - VERSION
97
95
  - lib/bio-protparam.rb
@@ -114,7 +112,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
114
112
  version: '0'
115
113
  segments:
116
114
  - 0
117
- hash: 2302094890483272954
115
+ hash: 1639556585709602379
118
116
  required_rubygems_version: !ruby/object:Gem::Requirement
119
117
  none: false
120
118
  requirements:
@@ -1,48 +0,0 @@
1
- = bio-protparam
2
-
3
- {<img
4
- src="https://secure.travis-ci.org/hryk/bioruby-protparam.png"
5
- />}[http://travis-ci.org/#!/hryk/bioruby-protparam]
6
-
7
- Full description goes here
8
-
9
- Note: this software is under active development!
10
-
11
- == Installation
12
-
13
- gem install bio-protparam
14
-
15
- == Usage
16
-
17
- == Developers
18
-
19
- To use the library
20
-
21
- require 'bio-protparam'
22
-
23
- The API doc is online. For more code examples see also the test files in
24
- the source tree.
25
-
26
- == Project home page
27
-
28
- Information on the source tree, documentation, issues and how to contribute, see
29
-
30
- http://github.com/hryk/bioruby-protparam
31
-
32
- The BioRuby community is on IRC server: irc.freenode.org, channel: #bioruby.
33
-
34
- == Cite
35
-
36
- If you use this software, please cite one of
37
-
38
- * [BioRuby: bioinformatics software for the Ruby programming language](http://dx.doi.org/10.1093/bioinformatics/btq475)
39
- * [Biogem: an effective tool-based approach for scaling up open source software development in bioinformatics](http://dx.doi.org/10.1093/bioinformatics/bts080)
40
-
41
- == Biogems.info
42
-
43
- This Biogem is published at http://biogems.info/index.html#bio-protparam
44
-
45
- == Copyright
46
-
47
- Copyright (c) 2012 hryk. See LICENSE.txt for further details.
48
-