bio 1.4.3.0001 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (158) hide show
  1. checksums.yaml +7 -0
  2. data/.travis.yml +39 -33
  3. data/BSDL +22 -0
  4. data/COPYING +2 -2
  5. data/COPYING.ja +36 -36
  6. data/ChangeLog +2404 -1025
  7. data/KNOWN_ISSUES.rdoc +15 -55
  8. data/README.rdoc +17 -23
  9. data/RELEASE_NOTES.rdoc +246 -183
  10. data/Rakefile +3 -2
  11. data/bin/br_biofetch.rb +29 -5
  12. data/bioruby.gemspec +15 -32
  13. data/bioruby.gemspec.erb +10 -20
  14. data/doc/ChangeLog-1.4.3 +1478 -0
  15. data/doc/RELEASE_NOTES-1.4.3.rdoc +204 -0
  16. data/doc/Tutorial.rd +0 -6
  17. data/doc/Tutorial.rd.html +7 -12
  18. data/doc/Tutorial.rd.ja +960 -1064
  19. data/doc/Tutorial.rd.ja.html +977 -1067
  20. data/gemfiles/Gemfile.travis-jruby1.8 +2 -1
  21. data/gemfiles/Gemfile.travis-jruby1.9 +2 -4
  22. data/gemfiles/Gemfile.travis-rbx +13 -0
  23. data/gemfiles/Gemfile.travis-ruby1.8 +2 -1
  24. data/gemfiles/Gemfile.travis-ruby1.9 +2 -4
  25. data/gemfiles/Gemfile.travis-ruby2.2 +9 -0
  26. data/lib/bio.rb +10 -43
  27. data/lib/bio/alignment.rb +8 -14
  28. data/lib/bio/appl/blast.rb +1 -2
  29. data/lib/bio/appl/blast/format0.rb +18 -7
  30. data/lib/bio/appl/blast/remote.rb +0 -9
  31. data/lib/bio/appl/blast/report.rb +1 -1
  32. data/lib/bio/appl/clustalw/report.rb +3 -1
  33. data/lib/bio/appl/genscan/report.rb +1 -2
  34. data/lib/bio/appl/iprscan/report.rb +1 -2
  35. data/lib/bio/appl/meme/mast.rb +4 -4
  36. data/lib/bio/appl/meme/mast/report.rb +1 -1
  37. data/lib/bio/appl/paml/codeml.rb +2 -2
  38. data/lib/bio/appl/paml/codeml/report.rb +1 -0
  39. data/lib/bio/appl/paml/common.rb +1 -1
  40. data/lib/bio/appl/sosui/report.rb +1 -2
  41. data/lib/bio/command.rb +62 -2
  42. data/lib/bio/data/aa.rb +13 -31
  43. data/lib/bio/data/codontable.rb +1 -2
  44. data/lib/bio/db/biosql/biosql_to_biosequence.rb +1 -0
  45. data/lib/bio/db/biosql/sequence.rb +1 -1
  46. data/lib/bio/db/embl/common.rb +1 -1
  47. data/lib/bio/db/embl/embl.rb +5 -4
  48. data/lib/bio/db/embl/format_embl.rb +3 -3
  49. data/lib/bio/db/embl/sptr.rb +9 -1444
  50. data/lib/bio/db/embl/swissprot.rb +12 -29
  51. data/lib/bio/db/embl/trembl.rb +13 -30
  52. data/lib/bio/db/embl/uniprot.rb +12 -29
  53. data/lib/bio/db/embl/uniprotkb.rb +1455 -0
  54. data/lib/bio/db/fasta.rb +17 -0
  55. data/lib/bio/db/fasta/defline.rb +1 -3
  56. data/lib/bio/db/fastq.rb +1 -1
  57. data/lib/bio/db/genbank/ddbj.rb +9 -5
  58. data/lib/bio/db/genbank/refseq.rb +11 -3
  59. data/lib/bio/db/gff.rb +3 -4
  60. data/lib/bio/db/go.rb +5 -6
  61. data/lib/bio/db/kegg/module.rb +4 -5
  62. data/lib/bio/db/kegg/pathway.rb +4 -5
  63. data/lib/bio/db/kegg/reaction.rb +1 -1
  64. data/lib/bio/db/nexus.rb +3 -2
  65. data/lib/bio/db/pdb/pdb.rb +2 -2
  66. data/lib/bio/db/phyloxml/phyloxml_elements.rb +82 -59
  67. data/lib/bio/db/phyloxml/phyloxml_parser.rb +2 -2
  68. data/lib/bio/db/phyloxml/phyloxml_writer.rb +1 -2
  69. data/lib/bio/db/sanger_chromatogram/chromatogram.rb +1 -2
  70. data/lib/bio/db/transfac.rb +1 -1
  71. data/lib/bio/io/das.rb +40 -41
  72. data/lib/bio/io/fastacmd.rb +0 -16
  73. data/lib/bio/io/fetch.rb +111 -55
  74. data/lib/bio/io/flatfile/buffer.rb +4 -5
  75. data/lib/bio/io/hinv.rb +2 -3
  76. data/lib/bio/io/ncbirest.rb +43 -6
  77. data/lib/bio/io/pubmed.rb +76 -81
  78. data/lib/bio/io/togows.rb +33 -10
  79. data/lib/bio/map.rb +1 -1
  80. data/lib/bio/pathway.rb +1 -1
  81. data/lib/bio/sequence/compat.rb +1 -1
  82. data/lib/bio/sequence/na.rb +63 -12
  83. data/lib/bio/shell.rb +0 -2
  84. data/lib/bio/shell/core.rb +5 -6
  85. data/lib/bio/shell/interface.rb +3 -4
  86. data/lib/bio/shell/irb.rb +1 -2
  87. data/lib/bio/shell/plugin/entry.rb +2 -3
  88. data/lib/bio/shell/plugin/seq.rb +7 -6
  89. data/lib/bio/shell/setup.rb +1 -2
  90. data/lib/bio/tree.rb +2 -2
  91. data/lib/bio/util/contingency_table.rb +0 -2
  92. data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +2 -2
  93. data/lib/bio/util/sirna.rb +76 -16
  94. data/lib/bio/version.rb +8 -9
  95. data/sample/benchmark_clustalw_report.rb +47 -0
  96. data/sample/biofetch.rb +248 -151
  97. data/setup.rb +6 -7
  98. data/test/data/clustalw/example1-seqnos.aln +58 -0
  99. data/test/network/bio/appl/blast/test_remote.rb +1 -15
  100. data/test/network/bio/appl/test_blast.rb +0 -12
  101. data/test/network/bio/io/test_pubmed.rb +49 -0
  102. data/test/network/bio/io/test_togows.rb +0 -1
  103. data/test/network/bio/test_command.rb +65 -2
  104. data/test/unit/bio/appl/bl2seq/test_report.rb +0 -1
  105. data/test/unit/bio/appl/blast/test_report.rb +110 -48
  106. data/test/unit/bio/appl/clustalw/test_report.rb +67 -51
  107. data/test/unit/bio/appl/sim4/test_report.rb +46 -17
  108. data/test/unit/bio/appl/test_blast.rb +2 -2
  109. data/test/unit/bio/db/embl/test_embl.rb +0 -1
  110. data/test/unit/bio/db/embl/test_embl_rel89.rb +0 -1
  111. data/test/unit/bio/db/embl/{test_sptr.rb → test_uniprotkb.rb} +111 -115
  112. data/test/unit/bio/db/embl/{test_uniprot_new_part.rb → test_uniprotkb_new_part.rb} +11 -11
  113. data/test/unit/bio/db/genbank/test_genbank.rb +10 -4
  114. data/test/unit/bio/db/pdb/test_pdb.rb +14 -8
  115. data/test/unit/bio/db/test_fasta.rb +41 -1
  116. data/test/unit/bio/db/test_fastq.rb +14 -4
  117. data/test/unit/bio/db/test_gff.rb +2 -2
  118. data/test/unit/bio/db/test_phyloxml.rb +30 -30
  119. data/test/unit/bio/db/test_phyloxml_writer.rb +2 -2
  120. data/test/unit/bio/io/flatfile/test_autodetection.rb +1 -2
  121. data/test/unit/bio/io/flatfile/test_buffer.rb +7 -1
  122. data/test/unit/bio/io/flatfile/test_splitter.rb +1 -1
  123. data/test/unit/bio/io/test_togows.rb +3 -2
  124. data/test/unit/bio/sequence/test_dblink.rb +1 -1
  125. data/test/unit/bio/sequence/test_na.rb +3 -1
  126. data/test/unit/bio/test_alignment.rb +1 -2
  127. data/test/unit/bio/test_command.rb +5 -4
  128. data/test/unit/bio/test_db.rb +4 -2
  129. data/test/unit/bio/test_pathway.rb +25 -10
  130. data/test/unit/bio/util/test_sirna.rb +22 -22
  131. metadata +656 -1430
  132. data/doc/KEGG_API.rd +0 -1843
  133. data/doc/KEGG_API.rd.ja +0 -1834
  134. data/extconf.rb +0 -2
  135. data/lib/bio/appl/blast/ddbj.rb +0 -131
  136. data/lib/bio/db/kegg/taxonomy.rb +0 -280
  137. data/lib/bio/io/dbget.rb +0 -194
  138. data/lib/bio/io/ddbjrest.rb +0 -344
  139. data/lib/bio/io/ddbjxml.rb +0 -458
  140. data/lib/bio/io/ebisoap.rb +0 -158
  141. data/lib/bio/io/ensembl.rb +0 -229
  142. data/lib/bio/io/higet.rb +0 -73
  143. data/lib/bio/io/keggapi.rb +0 -363
  144. data/lib/bio/io/ncbisoap.rb +0 -156
  145. data/lib/bio/io/soapwsdl.rb +0 -119
  146. data/lib/bio/shell/plugin/keggapi.rb +0 -181
  147. data/lib/bio/shell/plugin/soap.rb +0 -87
  148. data/sample/dbget +0 -37
  149. data/sample/demo_ddbjxml.rb +0 -212
  150. data/sample/demo_kegg_taxonomy.rb +0 -92
  151. data/sample/demo_keggapi.rb +0 -502
  152. data/sample/psortplot_html.rb +0 -214
  153. data/test/network/bio/io/test_ddbjrest.rb +0 -47
  154. data/test/network/bio/io/test_ensembl.rb +0 -230
  155. data/test/network/bio/io/test_soapwsdl.rb +0 -53
  156. data/test/unit/bio/io/test_ddbjxml.rb +0 -81
  157. data/test/unit/bio/io/test_ensembl.rb +0 -111
  158. data/test/unit/bio/io/test_soapwsdl.rb +0 -33
@@ -5,7 +5,6 @@
5
5
  # Mitsuteru C. Nakao <n@bioruby.org>
6
6
  # License:: The Ruby License
7
7
  #
8
- # $Id:$
9
8
  #
10
9
  # == Example
11
10
  #
@@ -60,7 +59,7 @@ module Bio
60
59
  if /NUMBER OF TM HELIX = (\d+)/ =~ line
61
60
  @tms = $1
62
61
  elsif /TM (\d+) +(\d+)- *(\d+) (\w+) +(\w+)/ =~ line
63
- tmh = $1.to_i
62
+ #tmh = $1.to_i
64
63
  range = Range.new($2.to_i, $3.to_i)
65
64
  grade = $4
66
65
  seq = $5
@@ -6,7 +6,6 @@
6
6
  # Toshiaki Katayama <k@bioruby.org>
7
7
  # License:: The Ruby License
8
8
  #
9
- # $Id:$
10
9
  #
11
10
 
12
11
  require 'open3'
@@ -856,7 +855,7 @@ module Command
856
855
  end.join('&')
857
856
  end
858
857
  when String
859
- data = URI.escape(params.strip)
858
+ raise TypeError, 'Bio::Command.make_cgi_params no longer accepts a single String as a form'
860
859
  end
861
860
  return data
862
861
  end
@@ -882,6 +881,67 @@ module Command
882
881
  return result
883
882
  end
884
883
 
884
+ # Same as:
885
+ # http = Net::HTTP.new(...); http.post(path, data, header)
886
+ # and
887
+ # it uses proxy if an environment variable (same as OpenURI.open_uri)
888
+ # is set.
889
+ # In addition, +header+ can be set.
890
+ # (Default Content-Type is application/octet-stream.
891
+ # Content-Length is automatically set by default.)
892
+ # +uri+ must be a URI object, +params+ must be a hash, and
893
+ # +header+ must be a hash.
894
+ #
895
+ # ---
896
+ # *Arguments*:
897
+ # * (required) _http_: Net::HTTP object or compatible object
898
+ # * (required) _path_: String
899
+ # * (required) _data_: String containing data
900
+ # * (optional) _header_: Hash containing header strings
901
+ # *Returns*:: (same as Net::HTTP::post)
902
+ def http_post(http, path, data, header = {})
903
+ hash = {
904
+ 'Content-Type' => 'application/octet-stream',
905
+ 'Content-Length' => data.length.to_s
906
+ }
907
+ hash.update(header)
908
+
909
+ http.post(path, data, hash)
910
+ end
911
+
912
+ # Same as:
913
+ # Net::HTTP.post(uri, params)
914
+ # and
915
+ # it uses proxy if an environment variable (same as OpenURI.open_uri)
916
+ # is set.
917
+ # In addition, +header+ can be set.
918
+ # (Default Content-Type is application/octet-stream.
919
+ # Content-Length is automatically set by default.)
920
+ # +uri+ must be a URI object, +data+ must be a String, and
921
+ # +header+ must be a hash.
922
+ #
923
+ # ---
924
+ # *Arguments*:
925
+ # * (required) _uri_: URI object or String
926
+ # * (optional) _data_: String containing data
927
+ # * (optional) _header_: Hash containing header strings
928
+ # *Returns*:: (same as Net::HTTP::post)
929
+ def post(uri, data, header = {})
930
+ unless uri.is_a?(URI)
931
+ uri = URI.parse(uri)
932
+ end
933
+
934
+ hash = {
935
+ 'Content-Type' => 'application/octet-stream',
936
+ 'Content-Length' => data.length.to_s
937
+ }
938
+ hash.update(header)
939
+
940
+ start_http(uri.host, uri.port) do |http|
941
+ http.post(uri.path, data, hash)
942
+ end
943
+ end
944
+
885
945
  end # module Command
886
946
  end # module Bio
887
947
 
@@ -108,23 +108,21 @@ class AminoAcid
108
108
  }
109
109
 
110
110
  def weight(x = nil)
111
- if x
112
- if x.length > 1
113
- total = 0.0
114
- x.each_byte do |byte|
115
- aa = byte.chr.upcase
116
- if WEIGHT[aa]
117
- total += WEIGHT[aa]
118
- else
119
- raise "Error: invalid amino acid '#{aa}'"
120
- end
111
+ return WEIGHT unless x
112
+
113
+ if x.length > 1
114
+ total = 0.0
115
+ x.each_byte do |byte|
116
+ aa = byte.chr.upcase
117
+ if WEIGHT[aa]
118
+ total += WEIGHT[aa]
119
+ else
120
+ raise "Error: invalid amino acid '#{aa}'"
121
121
  end
122
- total -= NucleicAcid.weight[:water] * (x.length - 1)
123
- else
124
- WEIGHT[x]
125
122
  end
123
+ total -= NucleicAcid.weight[:water] * (x.length - 1)
126
124
  else
127
- WEIGHT
125
+ WEIGHT[x]
128
126
  end
129
127
  end
130
128
 
@@ -237,11 +235,7 @@ class AminoAcid
237
235
 
238
236
 
239
237
  def reverse
240
- hash = Hash.new
241
- NAMES.each do |k, v|
242
- hash[v] = k
243
- end
244
- hash
238
+ @reverse ||= NAMES.invert
245
239
  end
246
240
 
247
241
  end
@@ -254,18 +248,6 @@ class AminoAcid
254
248
  extend Data
255
249
 
256
250
 
257
- private
258
-
259
-
260
- # override when used as an instance method to improve performance
261
- alias orig_reverse reverse
262
- def reverse
263
- unless @reverse
264
- @reverse = orig_reverse
265
- end
266
- @reverse
267
- end
268
-
269
251
  end
270
252
 
271
253
  end # module Bio
@@ -5,7 +5,6 @@
5
5
  # Toshiaki Katayama <k@bioruby.org>
6
6
  # License:: The Ruby License
7
7
  #
8
- # $Id:$
9
8
  #
10
9
  # == Data source
11
10
  #
@@ -127,7 +126,7 @@ class CodonTable
127
126
  # table.revtrans("A") # => ["gcg", "gct", "gca", "gcc"]
128
127
  #
129
128
  def revtrans(aa)
130
- unless @reverse
129
+ unless (defined? @reverse) && @reverse
131
130
  @reverse = {}
132
131
  @table.each do |k, v|
133
132
  @reverse[v] ||= []
@@ -11,6 +11,7 @@
11
11
 
12
12
  require 'bio/sequence'
13
13
  require 'bio/sequence/adapter'
14
+ require 'bio/sequence/dblink'
14
15
 
15
16
  # Internal use only. Normal users should not use this module.
16
17
  #
@@ -371,7 +371,7 @@ module Bio
371
371
  #probably would be better to d a class refrence to collect these informations
372
372
  @entry.bioentry_references.collect do |bio_ref|
373
373
  hash = Hash.new
374
- hash['authors'] = bio_ref.reference.authors.gsub(/\.\s/, "\.\s\|").split(/\|/)
374
+ hash['authors'] = bio_ref.reference.authors.gsub(/\.\s/, "\.\s\|").split(/\|/) if (bio_ref.reference and bio_ref.reference.authors)
375
375
 
376
376
  hash['sequence_position'] = "#{bio_ref.start_pos}-#{bio_ref.end_pos}" if (bio_ref.start_pos and bio_ref.end_pos)
377
377
  hash['title'] = bio_ref.reference.title
@@ -149,7 +149,7 @@ module Common
149
149
  unless @data['OS']
150
150
  os = Array.new
151
151
  fetch('OS').split(/, and|, /).each do |tmp|
152
- if tmp =~ /([A-Z][a-z]* *[\w\d \:\'\+\-]+[\w\d])/
152
+ if tmp =~ /([A-Z][a-z]* *[\w \:\'\+\-]+\w)/
153
153
  org = $1
154
154
  tmp =~ /(\(.+\))/
155
155
  os.push({'name' => $1, 'os' => org})
@@ -267,9 +267,10 @@ class EMBL < EMBLDB
267
267
  unless @data['OS']
268
268
  os = Array.new
269
269
  tmp = fetch('OS')
270
- if /([A-Z][a-z]* *[\w\d \:\'\+\-]+[\w\d]) *\(([\w\d ]+)\)\s*\z/ =~ tmp
270
+ if /([A-Z][a-z]* *[\w \:\'\+\-]+\w) *\(([\w ]+)\)\s*\z/ =~ tmp
271
271
  org = $1
272
- os.push({'name' => $2, 'os' => $1})
272
+ name = $2
273
+ os.push({'name' => name, 'os' => org})
273
274
  else
274
275
  os.push({'name' => nil, 'os' => tmp})
275
276
  end
@@ -340,7 +341,7 @@ class EMBL < EMBLDB
340
341
  @orig['FT'].each_line do |line|
341
342
  next if line =~ /^FEATURES/
342
343
 
343
- head = line[0,20].strip # feature key (source, CDS, ...)
344
+ #head = line[0,20].strip # feature key (source, CDS, ...)
344
345
  body = line[20,60].chomp # feature value (position, /qualifier=)
345
346
  if line =~ /^FT {3}(\S+)/
346
347
  ary.push([ $1, body ]) # [ feature, position, /q="data", ... ]
@@ -491,7 +492,7 @@ class EMBL < EMBLDB
491
492
  def parse_release_version(str)
492
493
  return [ nil, nil ] unless str
493
494
  a = str.split(/[\(\,\)]/)
494
- dstr = a.shift
495
+ a.shift #date string e.g. "14-OCT-2006"
495
496
  rel = nil
496
497
  ver = nil
497
498
  a.each do |x|
@@ -126,9 +126,9 @@ module Bio::Sequence::Format::NucFormatter
126
126
  def mol_type_embl
127
127
  if mt = molecule_type then
128
128
  mt
129
- elsif f = (features or []).find { |f| f.feature == 'source' } and
130
- q = f.qualifiers.find { |q| q.qualifier == 'mol_type' } then
131
- q.value
129
+ elsif fe = (features or []).find { |f| f.feature == 'source' } and
130
+ qu = fe.qualifiers.find { |q| q.qualifier == 'mol_type' } then
131
+ qu.value
132
132
  else
133
133
  'NA'
134
134
  end
@@ -1,1455 +1,20 @@
1
1
  #
2
- # = bio/db/embl/sptr.rb - UniProt/SwissProt and TrEMBL database class
2
+ # = bio/db/embl/sptr.rb - Bio::SPTR is an alias of Bio::UniProtKB
3
3
  #
4
- # Copyright:: Copyright (C) 2001-2006 Mitsuteru C. Nakao <n@bioruby.org>
4
+ # Copyright:: Copyright (C) 2013 BioRuby Project
5
5
  # License:: The Ruby License
6
6
  #
7
- # $Id:$
8
- #
9
- # == Description
10
- #
11
- # Shared methods for UniProtKB/SwissProt and TrEMBL classes.
12
- #
13
- # See the SWISS-PROT document file SPECLIST.TXT or UniProtKB/SwissProt
14
- # user manual.
15
- #
16
- # == Examples
17
- #
18
- # str = File.read("p53_human.swiss")
19
- # obj = Bio::SPTR.new(str)
20
- # obj.entry_id #=> "P53_HUMAN"
21
- #
22
- # == References
23
- #
24
- # * Swiss-Prot Protein knowledgebase. TrEMBL Computer-annotated supplement
25
- # to Swiss-Prot
26
- # http://au.expasy.org/sprot/
27
- #
28
- # * UniProt
29
- # http://uniprot.org/
30
- #
31
- # * The UniProtKB/SwissProt/TrEMBL User Manual
32
- # http://www.expasy.org/sprot/userman.html
33
- #
34
-
35
7
 
36
- require 'bio/db'
37
- require 'bio/db/embl/common'
8
+ warn "Bio::SPTR is changed to an alias of Bio::UniProtKB. Please use Bio::UniProtKB. Bio::SPTR may be deprecated in the future." if $VERBOSE
38
9
 
39
10
  module Bio
40
11
 
41
- # Parser class for UniProtKB/SwissProt and TrEMBL database entry.
42
- class SPTR < EMBLDB
43
- include Bio::EMBLDB::Common
44
-
45
- @@entry_regrexp = /[A-Z0-9]{1,4}_[A-Z0-9]{1,5}/
46
- @@data_class = ["STANDARD", "PRELIMINARY"]
47
-
48
- # returns a Hash of the ID line.
49
- #
50
- # returns a content (Int or String) of the ID line by a given key.
51
- # Hash keys: ['ENTRY_NAME', 'DATA_CLASS', 'MODECULE_TYPE', 'SEQUENCE_LENGTH']
52
- #
53
- # === ID Line (since UniProtKB release 9.0 of 31-Oct-2006)
54
- # ID P53_HUMAN Reviewed; 393 AA.
55
- # #"ID #{ENTRY_NAME} #{DATA_CLASS}; #{SEQUENCE_LENGTH}."
56
- #
57
- # === Examples
58
- # obj.id_line #=> {"ENTRY_NAME"=>"P53_HUMAN", "DATA_CLASS"=>"Reviewed",
59
- # "SEQUENCE_LENGTH"=>393, "MOLECULE_TYPE"=>nil}
60
- #
61
- # obj.id_line('ENTRY_NAME') #=> "P53_HUMAN"
62
- #
63
- #
64
- # === ID Line (older style)
65
- # ID P53_HUMAN STANDARD; PRT; 393 AA.
66
- # #"ID #{ENTRY_NAME} #{DATA_CLASS}; #{MOLECULE_TYPE}; #{SEQUENCE_LENGTH}."
67
- #
68
- # === Examples
69
- # obj.id_line #=> {"ENTRY_NAME"=>"P53_HUMAN", "DATA_CLASS"=>"STANDARD",
70
- # "SEQUENCE_LENGTH"=>393, "MOLECULE_TYPE"=>"PRT"}
71
- #
72
- # obj.id_line('ENTRY_NAME') #=> "P53_HUMAN"
73
- #
74
- def id_line(key = nil)
75
- return id_line[key] if key
76
- return @data['ID'] if @data['ID']
77
-
78
- part = @orig['ID'].split(/ +/)
79
- if part[4].to_s.chomp == 'AA.' then
80
- # after UniProtKB release 9.0 of 31-Oct-2006
81
- # (http://www.uniprot.org/docs/sp_news.htm)
82
- molecule_type = nil
83
- sequence_length = part[3].to_i
84
- else
85
- molecule_type = part[3].sub(/;/,'')
86
- sequence_length = part[4].to_i
87
- end
88
- @data['ID'] = {
89
- 'ENTRY_NAME' => part[1],
90
- 'DATA_CLASS' => part[2].sub(/;/,''),
91
- 'MOLECULE_TYPE' => molecule_type,
92
- 'SEQUENCE_LENGTH' => sequence_length
93
- }
94
- end
95
-
96
-
97
- # returns a ENTRY_NAME in the ID line.
98
- #
99
- def entry_id
100
- id_line('ENTRY_NAME')
101
- end
102
- alias entry_name entry_id
103
- alias entry entry_id
104
-
105
-
106
- # returns a MOLECULE_TYPE in the ID line.
107
- #
108
- # A short-cut for Bio::SPTR#id_line('MOLECULE_TYPE').
109
- def molecule
110
- id_line('MOLECULE_TYPE')
111
- end
112
- alias molecule_type molecule
113
-
114
-
115
- # returns a SEQUENCE_LENGTH in the ID line.
116
- #
117
- # A short-cut for Bio::SPTR#id_line('SEQUENCE_LENGHT').
118
- def sequence_length
119
- id_line('SEQUENCE_LENGTH')
120
- end
121
- alias aalen sequence_length
122
-
123
-
124
- # Bio::EMBLDB::Common#ac -> ary
125
- # #accessions -> ary
126
- # #accession -> String (accessions.first)
127
- @@ac_regrexp = /[OPQ][0-9][A-Z0-9]{3}[0-9]/
128
-
129
-
130
-
131
- # returns a Hash of information in the DT lines.
132
- # hash keys:
133
- # ['created', 'sequence', 'annotation']
134
- #--
135
- # also Symbols acceptable (ASAP):
136
- # [:created, :sequence, :annotation]
137
- #++
138
- #
139
- # Since UniProtKB release 7.0 of 07-Feb-2006, the DT line format is
140
- # changed, and the word "annotation" is no longer used in DT lines.
141
- # Despite the change, the word "annotation" is still used for keeping
142
- # compatibility.
143
- #
144
- # returns a String of information in the DT lines by a given key.
145
- #
146
- # === DT Line; date (3/entry)
147
- # DT DD-MMM-YYY (integrated into UniProtKB/XXXXX.)
148
- # DT DD-MMM-YYY (sequence version NN)
149
- # DT DD-MMM-YYY (entry version NN)
150
- #
151
- # The format have been changed in UniProtKB release 7.0 of 07-Feb-2006.
152
- # Below is the older format.
153
- #
154
- # === Old format of DT Line; date (3/entry)
155
- # DT DD-MMM-YYY (rel. NN, Created)
156
- # DT DD-MMM-YYY (rel. NN, Last sequence update)
157
- # DT DD-MMM-YYY (rel. NN, Last annotation update)
158
- def dt(key = nil)
159
- return dt[key] if key
160
- return @data['DT'] if @data['DT']
161
-
162
- part = self.get('DT').split(/\n/)
163
- @data['DT'] = {
164
- 'created' => part[0].sub(/\w{2} /,'').strip,
165
- 'sequence' => part[1].sub(/\w{2} /,'').strip,
166
- 'annotation' => part[2].sub(/\w{2} /,'').strip
167
- }
168
- end
169
-
170
-
171
- # (private) parses DE line (description lines)
172
- # since UniProtKB release 14.0 of 22-Jul-2008
173
- #
174
- # Return array containing array.
175
- #
176
- # http://www.uniprot.org/docs/sp_news.htm
177
- def parse_DE_line_rel14(str)
178
- # Retruns if it is not the new format since Rel.14
179
- return nil unless /^DE (RecName|AltName|SubName)\: / =~ str
180
- ret = []
181
- cur = nil
182
- str.each_line do |line|
183
- case line
184
- when /^DE (Includes|Contains)\: *$/
185
- cur = [ $1 ]
186
- ret.push cur
187
- cur = nil
188
- #subcat_and_desc = nil
189
- next
190
- when /^DE *(RecName|AltName|SubName)\: +(.*)/
191
- category = $1
192
- subcat_and_desc = $2
193
- cur = [ category ]
194
- ret.push cur
195
- when /^DE *(Flags)\: +(.*)/
196
- category = $1
197
- desc = $2
198
- flags = desc.strip.split(/\s*\;\s*/) || []
199
- cur = [ category, flags ]
200
- ret.push cur
201
- cur = nil
202
- #subcat_and_desc = nil
203
- next
204
- when /^DE *(.*)/
205
- subcat_and_desc = $1
206
- else
207
- warn "Warning: skipped DE line in unknown format: #{line.inspect}"
208
- #subcat_and_desc = nil
209
- next
210
- end
211
- case subcat_and_desc
212
- when nil
213
- # does nothing
214
- when /\A([^\=]+)\=(.*)/
215
- subcat = $1
216
- desc = $2
217
- desc.sub!(/\;\s*\z/, '')
218
- unless cur
219
- warn "Warning: unknown category in DE line: #{line.inspect}"
220
- cur = [ '' ]
221
- ret.push cur
222
- end
223
- cur.push [ subcat, desc ]
224
- else
225
- warn "Warning: skipped DE line description in unknown format: #{line.inspect}"
226
- end
227
- end
228
- ret
229
- end
230
- private :parse_DE_line_rel14
231
-
232
- # returns the proposed official name of the protein.
233
- # Returns a String.
234
- #
235
- # Since UniProtKB release 14.0 of 22-Jul-2008, the DE line format have
236
- # been changed. The method returns the full name which is taken from
237
- # "RecName: Full=" or "SubName: Full=" line normally in the beginning of
238
- # the DE lines.
239
- # Unlike parser for old format, no special treatments for fragment or
240
- # precursor.
241
- #
242
- # For old format, the method parses the DE lines and returns the protein
243
- # name as a String.
244
- #
245
- # === DE Line; description (>=1)
246
- # "DE #{OFFICIAL_NAME} (#{SYNONYM})"
247
- # "DE #{OFFICIAL_NAME} (#{SYNONYM}) [CONTEINS: #1; #2]."
248
- # OFFICIAL_NAME 1/entry
249
- # SYNONYM >=0
250
- # CONTEINS >=0
251
- def protein_name
252
- @data['DE'] ||= parse_DE_line_rel14(get('DE'))
253
- parsed_de_line = @data['DE']
254
- if parsed_de_line then
255
- # since UniProtKB release 14.0 of 22-Jul-2008
256
- name = nil
257
- parsed_de_line.each do |a|
258
- case a[0]
259
- when 'RecName', 'SubName'
260
- if name_pair = a[1..-1].find { |b| b[0] == 'Full' } then
261
- name = name_pair[1]
262
- break
263
- end
264
- end
265
- end
266
- name = name.to_s
267
- else
268
- # old format (before Rel. 13.x)
269
- name = ""
270
- if de_line = fetch('DE') then
271
- str = de_line[/^[^\[]*/] # everything preceding the first [ (the "contains" part)
272
- name = str[/^[^(]*/].strip
273
- name << ' (Fragment)' if str =~ /fragment/i
274
- end
275
- end
276
- return name
277
- end
278
-
279
-
280
- # returns synonyms (unofficial and/or alternative names).
281
- # Returns an Array containing String objects.
282
- #
283
- # Since UniProtKB release 14.0 of 22-Jul-2008, the DE line format have
284
- # been changed. The method returns the full or short names which are
285
- # taken from "RecName: Short=", "RecName: EC=", and AltName lines,
286
- # except after "Contains:" or "Includes:".
287
- # For keeping compatibility with old format parser, "RecName: EC=N.N.N.N"
288
- # is reported as "EC N.N.N.N".
289
- # In addition, to prevent confusion, "Allergen=" and "CD_antigen="
290
- # prefixes are added for the corresponding fields.
291
- #
292
- # For old format, the method parses the DE lines and returns synonyms.
293
- # synonyms are each placed in () following the official name on the DE line.
294
- def synonyms
295
- ary = Array.new
296
- @data['DE'] ||= parse_DE_line_rel14(get('DE'))
297
- parsed_de_line = @data['DE']
298
- if parsed_de_line then
299
- # since UniProtKB release 14.0 of 22-Jul-2008
300
- parsed_de_line.each do |a|
301
- case a[0]
302
- when 'Includes', 'Contains'
303
- break #the each loop
304
- when 'RecName', 'SubName', 'AltName'
305
- a[1..-1].each do |b|
306
- if name = b[1] and b[1] != self.protein_name then
307
- case b[0]
308
- when 'EC'
309
- name = "EC " + b[1]
310
- when 'Allergen', 'CD_antigen'
311
- name = b[0] + '=' + b[1]
312
- else
313
- name = b[1]
314
- end
315
- ary.push name
316
- end
317
- end
318
- end #case a[0]
319
- end #parsed_de_line.each
320
- else
321
- # old format (before Rel. 13.x)
322
- if de_line = fetch('DE') then
323
- line = de_line.sub(/\[.*\]/,'') # ignore stuff between [ and ]. That's the "contains" part
324
- line.scan(/\([^)]+/) do |synonym|
325
- unless synonym =~ /fragment/i then
326
- ary << synonym[1..-1].strip # index to remove the leading (
327
- end
328
- end
329
- end
330
- end
331
- return ary
332
- end
333
-
334
-
335
- # returns gene names in the GN line.
336
- #
337
- # New UniProt/SwissProt format:
338
- # * Bio::SPTR#gn -> [ <gene record>* ]
339
- # where <gene record> is:
340
- # { :name => '...',
341
- # :synonyms => [ 's1', 's2', ... ],
342
- # :loci => [ 'l1', 'l2', ... ],
343
- # :orfs => [ 'o1', 'o2', ... ]
344
- # }
345
- #
346
- # Old format:
347
- # * Bio::SPTR#gn -> Array # AND
348
- # * Bio::SPTR#gn[0] -> Array # OR
349
- #
350
- # === GN Line: Gene name(s) (>=0, optional)
351
- def gn
352
- unless @data['GN']
353
- case fetch('GN')
354
- when /Name=/,/ORFNames=/,/OrderedLocusNames=/,/Synonyms=/
355
- @data['GN'] = gn_uniprot_parser
356
- else
357
- @data['GN'] = gn_old_parser
358
- end
359
- end
360
- @data['GN']
361
- end
362
-
363
-
364
- # returns contents in the old style GN line.
365
- # === GN Line: Gene name(s) (>=0, optional)
366
- # GN HNS OR DRDX OR OSMZ OR BGLY.
367
- # GN CECA1 AND CECA2.
368
- # GN CECA1 AND (HOGE OR FUGA).
369
- #
370
- # GN NAME1 [(AND|OR) NAME]+.
371
- #
372
- # Bio::SPTR#gn -> Array # AND
373
- # #gn[0] -> Array # OR
374
- # #gene_names -> Array
375
- def gn_old_parser
376
- names = Array.new
377
- if get('GN').size > 0
378
- names = fetch('GN').sub(/\.$/,'').split(/ AND /)
379
- names.map! { |synonyms|
380
- synonyms = synonyms.gsub(/\(|\)/,'').split(/ OR /).map { |e|
381
- e.strip
382
- }
383
- }
384
- end
385
- @data['GN'] = names
386
- end
387
- private :gn_old_parser
388
-
389
- # returns contents in the structured GN line.
390
- # The new format of the GN line is:
391
- # GN Name=; Synonyms=[, ...]; OrderedLocusNames=[, ...];
392
- # GN ORFNames=[, ...];
393
- #
394
- # * Bio::SPTR#gn -> [ <gene record>* ]
395
- # where <gene record> is:
396
- # { :name => '...',
397
- # :synonyms => [ 's1', 's2', ... ],
398
- # :loci => [ 'l1', 'l2', ... ],
399
- # :orfs => [ 'o1', 'o2', ... ]
400
- # }
401
- def gn_uniprot_parser
402
- @data['GN'] = Array.new
403
- gn_line = fetch('GN').strip
404
- records = gn_line.split(/\s*and\s*/)
405
- records.each do |record|
406
- gene_hash = {:name => '', :synonyms => [], :loci => [], :orfs => []}
407
- record.each_line(';') do |element|
408
- case element
409
- when /Name=/ then
410
- gene_hash[:name] = $'[0..-2]
411
- when /Synonyms=/ then
412
- gene_hash[:synonyms] = $'[0..-2].split(/\s*,\s*/)
413
- when /OrderedLocusNames=/ then
414
- gene_hash[:loci] = $'[0..-2].split(/\s*,\s*/)
415
- when /ORFNames=/ then
416
- gene_hash[:orfs] = $'[0..-2].split(/\s*,\s*/)
417
- end
418
- end
419
- @data['GN'] << gene_hash
420
- end
421
- return @data['GN']
422
- end
423
- private :gn_uniprot_parser
424
-
425
-
426
- # returns a Array of gene names in the GN line.
427
- def gene_names
428
- gn # set @data['GN'] if it hasn't been already done
429
- if @data['GN'].first.class == Hash then
430
- @data['GN'].collect { |element| element[:name] }
431
- else
432
- @data['GN'].first
433
- end
434
- end
435
-
436
-
437
- # returns a String of the first gene name in the GN line.
438
- def gene_name
439
- gene_names.first
440
- end
441
-
442
-
443
- # returns a Array of Hashs or a String of the OS line when a key given.
444
- # * Bio::EMBLDB#os -> Array
445
- # [{'name' => '(Human)', 'os' => 'Homo sapiens'},
446
- # {'name' => '(Rat)', 'os' => 'Rattus norveticus'}]
447
- # * Bio::EPTR#os[0] -> Hash
448
- # {'name' => "(Human)", 'os' => 'Homo sapiens'}
449
- # * Bio::SPTR#os[0]['name'] -> "(Human)"
450
- # * Bio::EPTR#os(0) -> "Homo sapiens (Human)"
451
- #
452
- # === OS Line; organism species (>=1)
453
- # OS Genus species (name).
454
- # OS Genus species (name0) (name1).
455
- # OS Genus species (name0) (name1).
456
- # OS Genus species (name0), G s0 (name0), and G s (name0) (name1).
457
- # OS Homo sapiens (Human), and Rarrus norveticus (Rat)
458
- # OS Hippotis sp. Clark and Watts 825.
459
- # OS unknown cyperaceous sp.
460
- def os(num = nil)
461
- unless @data['OS']
462
- os = Array.new
463
- fetch('OS').split(/, and|, /).each do |tmp|
464
- if tmp =~ /(\w+ *[\w\d \:\'\+\-\.]+[\w\d\.])/
465
- org = $1
466
- tmp =~ /(\(.+\))/
467
- os.push({'name' => $1, 'os' => org})
468
- else
469
- raise "Error: OS Line. #{$!}\n#{fetch('OS')}\n"
470
- end
471
- end
472
- @data['OS'] = os
473
- end
474
-
475
- if num
476
- # EX. "Trifolium repens (white clover)"
477
- return "#{@data['OS'][num]['os']} #{@data['OS'][num]['name']}"
478
- else
479
- return @data['OS']
480
- end
481
- end
482
-
483
-
484
- # Bio::EMBLDB::Common#og -> Array
485
- # OG Line; organella (0 or 1/entry)
486
- # ["MITOCHONDRION", "CHLOROPLAST", "Cyanelle", "Plasmid"]
487
- # or a plasmid name (e.g. "Plasmid pBR322").
488
-
489
-
490
- # Bio::EMBLDB::Common#oc -> Array
491
- # OC Line; organism classification (>=1)
492
- # "OC Eukaryota; Alveolata; Apicomplexa; Piroplasmida; Theileriidae;"
493
- # "OC Theileria."
494
-
495
-
496
-
497
- # returns a Hash of oraganism taxonomy cross-references.
498
- # * Bio::SPTR#ox -> Hash
499
- # {'NCBI_TaxID' => ['1234','2345','3456','4567'], ...}
500
- #
501
- # === OX Line; organism taxonomy cross-reference (>=1 per entry)
502
- # OX NCBI_TaxID=1234;
503
- # OX NCBI_TaxID=1234, 2345, 3456, 4567;
504
- def ox
505
- unless @data['OX']
506
- tmp = fetch('OX').sub(/\.$/,'').split(/;/).map { |e| e.strip }
507
- hsh = Hash.new
508
- tmp.each do |e|
509
- db,refs = e.split(/=/)
510
- hsh[db] = refs.split(/, */)
511
- end
512
- @data['OX'] = hsh
513
- end
514
- return @data['OX']
515
- end
516
-
517
- # === The OH Line;
518
- #
519
- # OH NCBI_TaxID=TaxID; HostName.
520
- # http://br.expasy.org/sprot/userman.html#OH_line
521
- def oh
522
- unless @data['OH']
523
- @data['OH'] = fetch('OH').split("\. ").map {|x|
524
- if x =~ /NCBI_TaxID=(\d+);/
525
- taxid = $1
526
- else
527
- raise ArgumentError, ["Error: Invalid OH line format (#{self.entry_id}):",
528
- $!, "\n", get('OH'), "\n"].join
529
-
530
- end
531
- if x =~ /NCBI_TaxID=\d+; (.+)/
532
- host_name = $1
533
- host_name.sub!(/\.$/, '')
534
- else
535
- host_name = nil
536
- end
537
- {'NCBI_TaxID' => taxid, 'HostName' => host_name}
538
- }
539
- end
540
- @data['OH']
541
- end
542
-
543
-
544
-
545
- # Bio::EMBLDB::Common#ref -> Array
546
- # R Lines
547
- # RN RC RP RX RA RT RL
548
-
549
- # returns contents in the R lines.
550
- # * Bio::EMBLDB::Common#ref -> [ <refernece information Hash>* ]
551
- # where <reference information Hash> is:
552
- # {'RN' => '', 'RC' => '', 'RP' => '', 'RX' => '',
553
- # 'RA' => '', 'RT' => '', 'RL' => '', 'RG' => ''}
554
- #
555
- # R Lines
556
- # * RN RC RP RX RA RT RL RG
557
- def ref
558
- unless @data['R']
559
- @data['R'] = [get('R').split(/\nRN /)].flatten.map { |str|
560
- hash = {'RN' => '', 'RC' => '', 'RP' => '', 'RX' => '',
561
- 'RA' => '', 'RT' => '', 'RL' => '', 'RG' => ''}
562
- str = 'RN ' + str unless /^RN / =~ str
563
-
564
- str.split("\n").each do |line|
565
- if /^(R[NPXARLCTG]) (.+)/ =~ line
566
- hash[$1] += $2 + ' '
567
- else
568
- raise "Invalid format in R lines, \n[#{line}]\n"
569
- end
570
- end
571
-
572
- hash['RN'] = set_RN(hash['RN'])
573
- hash['RC'] = set_RC(hash['RC'])
574
- hash['RP'] = set_RP(hash['RP'])
575
- hash['RX'] = set_RX(hash['RX'])
576
- hash['RA'] = set_RA(hash['RA'])
577
- hash['RT'] = set_RT(hash['RT'])
578
- hash['RL'] = set_RL(hash['RL'])
579
- hash['RG'] = set_RG(hash['RG'])
580
-
581
- hash
582
- }
583
-
584
- end
585
- @data['R']
586
- end
587
-
588
- def set_RN(data)
589
- data.strip
590
- end
591
-
592
- def set_RC(data)
593
- data.scan(/([STP]\w+)=(.+);/).map { |comment|
594
- [comment[1].split(/, and |, /)].flatten.map { |text|
595
- {'Token' => comment[0], 'Text' => text}
596
- }
597
- }.flatten
598
- end
599
- private :set_RC
600
-
601
- def set_RP(data)
602
- data = data.strip
603
- data = data.sub(/\.$/, '')
604
- data.split(/, AND |, /i).map {|x|
605
- x = x.strip
606
- x = x.gsub(' ', ' ')
607
- }
608
- end
609
- private :set_RP
610
-
611
- def set_RX(data)
612
- rx = {'MEDLINE' => nil, 'PubMed' => nil, 'DOI' => nil}
613
- if data =~ /MEDLINE=(.+?);/
614
- rx['MEDLINE'] = $1
615
- end
616
- if data =~ /PubMed=(.+?);/
617
- rx['PubMed'] = $1
618
- end
619
- if data =~ /DOI=(.+?);/
620
- rx['DOI'] = $1
621
- end
622
- rx
623
- end
624
- private :set_RX
625
-
626
- def set_RA(data)
627
- data = data.sub(/; *$/, '')
628
- end
629
- private :set_RA
630
-
631
- def set_RT(data)
632
- data = data.sub(/; *$/, '')
633
- data = data.gsub(/(^"|"$)/, '')
634
- end
635
- private :set_RT
636
-
637
- def set_RL(data)
638
- data = data.strip
639
- end
640
- private :set_RL
641
-
642
- def set_RG(data)
643
- data = data.split('; ')
644
- end
645
- private :set_RG
646
-
647
-
648
-
649
- # returns Bio::Reference object from Bio::EMBLDB::Common#ref.
650
- # * Bio::EMBLDB::Common#ref -> Bio::References
651
- def references
652
- unless @data['references']
653
- ary = self.ref.map {|ent|
654
- hash = Hash.new('')
655
- ent.each {|key, value|
656
- case key
657
- when 'RA'
658
- hash['authors'] = value.split(/, /)
659
- when 'RT'
660
- hash['title'] = value
661
- when 'RL'
662
- if value =~ /(.*) (\d+) \((\d+)\), (\d+-\d+) \((\d+)\)$/
663
- hash['journal'] = $1
664
- hash['volume'] = $2
665
- hash['issue'] = $3
666
- hash['pages'] = $4
667
- hash['year'] = $5
668
- else
669
- hash['journal'] = value
670
- end
671
- when 'RX' # PUBMED, MEDLINE, DOI
672
- value.each do |tag, xref|
673
- hash[ tag.downcase ] = xref
674
- end
675
- end
676
- }
677
- Reference.new(hash)
678
- }
679
- @data['references'] = References.new(ary)
680
- end
681
- @data['references']
682
- end
683
-
684
-
685
-
686
-
687
-
688
-
689
- # === The HI line
690
- # Bio::SPTR#hi #=> hash
691
- def hi
692
- unless @data['HI']
693
- @data['HI'] = []
694
- fetch('HI').split(/\. /).each do |hlist|
695
- hash = {'Category' => '', 'Keywords' => [], 'Keyword' => ''}
696
- hash['Category'], hash['Keywords'] = hlist.split(': ')
697
- hash['Keywords'] = hash['Keywords'].split('; ')
698
- hash['Keyword'] = hash['Keywords'].pop
699
- hash['Keyword'].sub!(/\.$/, '')
700
- @data['HI'] << hash
701
- end
702
- end
703
- @data['HI']
704
- end
705
-
706
-
707
- @@cc_topics = ['PHARMACEUTICAL',
708
- 'BIOTECHNOLOGY',
709
- 'TOXIC DOSE',
710
- 'ALLERGEN',
711
- 'RNA EDITING',
712
- 'POLYMORPHISM',
713
- 'BIOPHYSICOCHEMICAL PROPERTIES',
714
- 'MASS SPECTROMETRY',
715
- 'WEB RESOURCE',
716
- 'ENZYME REGULATION',
717
- 'DISEASE',
718
- 'INTERACTION',
719
- 'DEVELOPMENTAL STAGE',
720
- 'INDUCTION',
721
- 'CAUTION',
722
- 'ALTERNATIVE PRODUCTS',
723
- 'DOMAIN',
724
- 'PTM',
725
- 'MISCELLANEOUS',
726
- 'TISSUE SPECIFICITY',
727
- 'COFACTOR',
728
- 'PATHWAY',
729
- 'SUBUNIT',
730
- 'CATALYTIC ACTIVITY',
731
- 'SUBCELLULAR LOCATION',
732
- 'FUNCTION',
733
- 'SIMILARITY']
734
- # returns contents in the CC lines.
735
- # * Bio::SPTR#cc -> Hash
736
- #
737
- # returns an object of contents in the TOPIC.
738
- # * Bio::SPTR#cc(TOPIC) -> Array w/in Hash, Hash
739
- #
740
- # returns contents of the "ALTERNATIVE PRODUCTS".
741
- # * Bio::SPTR#cc('ALTERNATIVE PRODUCTS') -> Hash
742
- # {'Event' => str,
743
- # 'Named isoforms' => int,
744
- # 'Comment' => str,
745
- # 'Variants'=>[{'Name' => str, 'Synonyms' => str, 'IsoId' => str, 'Sequence' => []}]}
746
- #
747
- # CC -!- ALTERNATIVE PRODUCTS:
748
- # CC Event=Alternative splicing; Named isoforms=15;
749
- # ...
750
- # CC placentae isoforms. All tissues differentially splice exon 13;
751
- # CC Name=A; Synonyms=no del;
752
- # CC IsoId=P15529-1; Sequence=Displayed;
753
- #
754
- # returns contents of the "DATABASE".
755
- # * Bio::SPTR#cc('DATABASE') -> Array
756
- # [{'NAME'=>str,'NOTE'=>str, 'WWW'=>URI,'FTP'=>URI}, ...]
757
- #
758
- # CC -!- DATABASE: NAME=Text[; NOTE=Text][; WWW="Address"][; FTP="Address"].
759
- #
760
- # returns contents of the "MASS SPECTROMETRY".
761
- # * Bio::SPTR#cc('MASS SPECTROMETRY') -> Array
762
- # [{'MW"=>float,'MW_ERR'=>float, 'METHOD'=>str,'RANGE'=>str}, ...]
763
- #
764
- # CC -!- MASS SPECTROMETRY: MW=XXX[; MW_ERR=XX][; METHOD=XX][;RANGE=XX-XX].
765
- #
766
- # === CC lines (>=0, optional)
767
- # CC -!- TISSUE SPECIFICITY: HIGHEST LEVELS FOUND IN TESTIS. ALSO PRESENT
768
- # CC IN LIVER, KIDNEY, LUNG AND BRAIN.
769
- #
770
- # CC -!- TOPIC: FIRST LINE OF A COMMENT BLOCK;
771
- # CC SECOND AND SUBSEQUENT LINES OF A COMMENT BLOCK.
772
- #
773
- # See also http://www.expasy.org/sprot/userman.html#CC_line
774
- #
775
- def cc(topic = nil)
776
- unless @data['CC']
777
- cc = Hash.new
778
- comment_border= '-' * (77 - 4 + 1)
779
- dlm = /-!- /
780
-
781
- # 12KD_MYCSM has no CC lines.
782
- return cc if get('CC').size == 0
783
-
784
- cc_raw = fetch('CC')
785
-
786
- # Removing the copyright statement.
787
- cc_raw.sub!(/ *---.+---/m, '')
788
-
789
- # Not any CC Lines without the copyright statement.
790
- return cc if cc_raw == ''
791
-
792
- begin
793
- cc_raw, copyright = cc_raw.split(/#{comment_border}/)[0]
794
- cc_raw = cc_raw.sub(dlm,'')
795
- cc_raw.split(dlm).each do |tmp|
796
- tmp = tmp.strip
797
-
798
- if /(^[A-Z ]+[A-Z]): (.+)/ =~ tmp
799
- key = $1
800
- body = $2
801
- body.gsub!(/- (?!AND)/,'-')
802
- body.strip!
803
- unless cc[key]
804
- cc[key] = [body]
805
- else
806
- cc[key].push(body)
807
- end
808
- else
809
- raise ["Error: [#{entry_id}]: CC Lines", '"', tmp, '"',
810
- '', get('CC'),''].join("\n")
811
- end
812
- end
813
- rescue NameError
814
- if fetch('CC') == ''
815
- return {}
816
- else
817
- raise ["Error: Invalid CC Lines: [#{entry_id}]: ",
818
- "\n'#{self.get('CC')}'\n", "(#{$!})"].join
819
- end
820
- rescue NoMethodError
821
- end
822
-
823
- @data['CC'] = cc
824
- end
825
-
826
-
827
- case topic
828
- when 'ALLERGEN'
829
- return @data['CC'][topic]
830
- when 'ALTERNATIVE PRODUCTS'
831
- return cc_alternative_products(@data['CC'][topic])
832
- when 'BIOPHYSICOCHEMICAL PROPERTIES'
833
- return cc_biophysiochemical_properties(@data['CC'][topic])
834
- when 'BIOTECHNOLOGY'
835
- return @data['CC'][topic]
836
- when 'CATALITIC ACTIVITY'
837
- return cc_catalytic_activity(@data['CC'][topic])
838
- when 'CAUTION'
839
- return cc_caution(@data['CC'][topic])
840
- when 'COFACTOR'
841
- return @data['CC'][topic]
842
- when 'DEVELOPMENTAL STAGE'
843
- return @data['CC'][topic].join('')
844
- when 'DISEASE'
845
- return @data['CC'][topic].join('')
846
- when 'DOMAIN'
847
- return @data['CC'][topic]
848
- when 'ENZYME REGULATION'
849
- return @data['CC'][topic].join('')
850
- when 'FUNCTION'
851
- return @data['CC'][topic].join('')
852
- when 'INDUCTION'
853
- return @data['CC'][topic].join('')
854
- when 'INTERACTION'
855
- return cc_interaction(@data['CC'][topic])
856
- when 'MASS SPECTROMETRY'
857
- return cc_mass_spectrometry(@data['CC'][topic])
858
- when 'MISCELLANEOUS'
859
- return @data['CC'][topic]
860
- when 'PATHWAY'
861
- return cc_pathway(@data['CC'][topic])
862
- when 'PHARMACEUTICAL'
863
- return @data['CC'][topic]
864
- when 'POLYMORPHISM'
865
- return @data['CC'][topic]
866
- when 'PTM'
867
- return @data['CC'][topic]
868
- when 'RNA EDITING'
869
- return cc_rna_editing(@data['CC'][topic])
870
- when 'SIMILARITY'
871
- return @data['CC'][topic]
872
- when 'SUBCELLULAR LOCATION'
873
- return cc_subcellular_location(@data['CC'][topic])
874
- when 'SUBUNIT'
875
- return @data['CC'][topic]
876
- when 'TISSUE SPECIFICITY'
877
- return @data['CC'][topic]
878
- when 'TOXIC DOSE'
879
- return @data['CC'][topic]
880
- when 'WEB RESOURCE'
881
- return cc_web_resource(@data['CC'][topic])
882
- when 'DATABASE'
883
- # DATABASE: NAME=Text[; NOTE=Text][; WWW="Address"][; FTP="Address"].
884
- tmp = Array.new
885
- db = @data['CC']['DATABASE']
886
- return db unless db
887
-
888
- db.each do |e|
889
- db = {'NAME' => nil, 'NOTE' => nil, 'WWW' => nil, 'FTP' => nil}
890
- e.sub(/.$/,'').split(/;/).each do |line|
891
- case line
892
- when /NAME=(.+)/
893
- db['NAME'] = $1
894
- when /NOTE=(.+)/
895
- db['NOTE'] = $1
896
- when /WWW="(.+)"/
897
- db['WWW'] = $1
898
- when /FTP="(.+)"/
899
- db['FTP'] = $1
900
- end
901
- end
902
- tmp.push(db)
903
- end
904
- return tmp
905
- when nil
906
- return @data['CC']
907
- else
908
- return @data['CC'][topic]
909
- end
910
- end
911
-
912
-
913
- def cc_alternative_products(data)
914
- ap = data.join('')
915
- return ap unless ap
916
-
917
- # Event, Named isoforms, Comment, [Name, Synonyms, IsoId, Sequnce]+
918
- tmp = {'Event' => "", 'Named isoforms' => "", 'Comment' => "",
919
- 'Variants' => []}
920
- if /Event=(.+?);/ =~ ap
921
- tmp['Event'] = $1
922
- tmp['Event'] = tmp['Event'].sub(/;/,'').split(/, /)
923
- end
924
- if /Named isoforms=(\S+?);/ =~ ap
925
- tmp['Named isoforms'] = $1
926
- end
927
- if /Comment=(.+?);/m =~ ap
928
- tmp['Comment'] = $1
929
- end
930
- ap.scan(/Name=.+?Sequence=.+?;/).each do |ent|
931
- tmp['Variants'] << cc_alternative_products_variants(ent)
932
- end
933
- return tmp
934
- end
935
- private :cc_alternative_products
936
-
937
- def cc_alternative_products_variants(data)
938
- variant = {'Name' => '', 'Synonyms' => [], 'IsoId' => [], 'Sequence' => []}
939
- data.split(/; /).map {|x| x.split(/=/) }.each do |e|
940
- case e[0]
941
- when 'Sequence', 'Synonyms', 'IsoId'
942
- e[1] = e[1].sub(/;/,'').split(/, /)
943
- end
944
- variant[e[0]] = e[1]
945
- end
946
- variant
947
- end
948
- private :cc_alternative_products_variants
949
-
950
-
951
- def cc_biophysiochemical_properties(data)
952
- data = data[0]
953
-
954
- hash = {'Absorption' => {},
955
- 'Kinetic parameters' => {},
956
- 'pH dependence' => "",
957
- 'Redox potential' => "",
958
- 'Temperature dependence' => ""}
959
- if data =~ /Absorption: Abs\(max\)=(.+?);/
960
- hash['Absorption']['Abs(max)'] = $1
961
- end
962
- if data =~ /Absorption: Abs\(max\)=.+; Note=(.+?);/
963
- hash['Absorption']['Note'] = $1
964
- end
965
- if data =~ /Kinetic parameters: KM=(.+?); Vmax=(.+?);/
966
- hash['Kinetic parameters']['KM'] = $1
967
- hash['Kinetic parameters']['Vmax'] = $2
968
- end
969
- if data =~ /Kinetic parameters: KM=.+; Vmax=.+; Note=(.+?);/
970
- hash['Kinetic parameters']['Note'] = $1
971
- end
972
- if data =~ /pH dependence: (.+?);/
973
- hash['pH dependence'] = $1
974
- end
975
- if data =~ /Redox potential: (.+?);/
976
- hash['Redox potential'] = $1
977
- end
978
- if data =~ /Temperature dependence: (.+?);/
979
- hash['Temperature dependence'] = $1
980
- end
981
- hash
982
- end
983
- private :cc_biophysiochemical_properties
984
-
985
-
986
- def cc_caution(data)
987
- data.join('')
988
- end
989
- private :cc_caution
990
-
991
-
992
- # returns conteins in a line of the CC INTERACTION section.
993
- #
994
- # CC P46527:CDKN1B; NbExp=1; IntAct=EBI-359815, EBI-519280;
995
- def cc_interaction(data)
996
- str = data.join('')
997
- it = str.scan(/(.+?); NbExp=(.+?); IntAct=(.+?);/)
998
- it.map {|ent|
999
- ent.map! {|x| x.strip }
1000
- if ent[0] =~ /^(.+):(.+)/
1001
- spac = $1
1002
- spid = $2.split(' ')[0]
1003
- optid = nil
1004
- elsif ent[0] =~ /Self/
1005
- spac = self.entry_id
1006
- spid = self.entry_id
1007
- optid = nil
1008
- end
1009
- if ent[0] =~ /^.+:.+ (.+)/
1010
- optid = $1
1011
- end
1012
-
1013
- {'SP_Ac' => spac,
1014
- 'identifier' => spid,
1015
- 'NbExp' => ent[1],
1016
- 'IntAct' => ent[2].split(', '),
1017
- 'optional_identifier' => optid}
1018
- }
1019
- end
1020
- private :cc_interaction
1021
-
1022
-
1023
- def cc_mass_spectrometry(data)
1024
- # MASS SPECTROMETRY: MW=XXX[; MW_ERR=XX][; METHOD=XX][;RANGE=XX-XX].
1025
- return data unless data
1026
-
1027
- data.map { |m|
1028
- mass = {'MW' => nil, 'MW_ERR' => nil, 'METHOD' => nil, 'RANGE' => nil,
1029
- 'NOTE' => nil}
1030
- m.sub(/.$/,'').split(/;/).each do |line|
1031
- case line
1032
- when /MW=(.+)/
1033
- mass['MW'] = $1
1034
- when /MW_ERR=(.+)/
1035
- mass['MW_ERR'] = $1
1036
- when /METHOD=(.+)/
1037
- mass['METHOD'] = $1
1038
- when /RANGE=(\d+-\d+)/
1039
- mass['RANGE'] = $1 # RANGE class ?
1040
- when /NOTE=(.+)/
1041
- mass['NOTE'] = $1
1042
- end
1043
- end
1044
- mass
1045
- }
1046
- end
1047
- private :cc_mass_spectrometry
1048
-
1049
-
1050
- def cc_pathway(data)
1051
- data.map {|x| x.sub(/\.$/, '') }.map {|x|
1052
- x.split(/; | and |: /)
1053
- }[0]
1054
- end
1055
- private :cc_pathway
1056
-
1057
-
1058
- def cc_rna_editing(data)
1059
- data = data.join('')
1060
- entry = {'Modified_positions' => [], 'Note' => ""}
1061
- if data =~ /Modified_positions=(.+?)(\.|;)/
1062
- entry['Modified_positions'] = $1.sub(/\.$/, '').split(', ')
1063
- else
1064
- raise ArgumentError, "Invarid CC RNA Editing lines (#{self.entry_id}):#{$!}\n#{get('CC')}"
1065
- end
1066
- if data =~ /Note=(.+)/
1067
- entry['Note'] = $1
1068
- end
1069
- entry
1070
- end
1071
- private :cc_rna_editing
1072
-
1073
-
1074
- def cc_subcellular_location(data)
1075
- data.map {|x|
1076
- x.split('. ').map {|y|
1077
- y.split('; ').map {|z|
1078
- z.sub(/\.$/, '')
1079
- }
1080
- }
1081
- }[0]
1082
- end
1083
- private :cc_subcellular_location
1084
-
1085
-
1086
- #--
1087
- # Since UniProtKB release 12.2 of 11-Sep-2007:
1088
- # CC -!- WEB RESOURCE: Name=ResourceName[; Note=FreeText][; URL=WWWAddress]. # Old format:
1089
- # CC -!- WEB RESOURCE: NAME=ResourceName[; NOTE=FreeText][; URL=WWWAddress].
1090
- #++
1091
-
1092
- def cc_web_resource(data)
1093
- data.map {|x|
1094
- entry = {'Name' => nil, 'Note' => nil, 'URL' => nil}
1095
- x.split(';').each do |y|
1096
- case y
1097
- when /(Name|Note)\=(.+)/
1098
- key = $1
1099
- val = $2.strip
1100
- entry[key] = val
1101
- when /(NAME|NOTE)\=(.+)/
1102
- key = $1.downcase.capitalize
1103
- val = $2.strip
1104
- entry[key] = val
1105
- when /URL\=\"(.+)\"/
1106
- entry['URL'] = $1.strip
1107
- end
1108
- end
1109
- entry
1110
- }
1111
- end
1112
- private :cc_web_resource
1113
-
1114
- # returns databases cross-references in the DR lines.
1115
- # * Bio::SPTR#dr -> Hash w/in Array
1116
- #
1117
- # === DR Line; defabases cross-reference (>=0)
1118
- # DR database_identifier; primary_identifier; secondary_identifier.
1119
- # a cross_ref pre one line
1120
- @@dr_database_identifier = ['EMBL','CARBBANK','DICTYDB','ECO2DBASE',
1121
- 'ECOGENE',
1122
- 'FLYBASE','GCRDB','HIV','HSC-2DPAGE','HSSP','INTERPRO','MAIZEDB',
1123
- 'MAIZE-2DPAGE','MENDEL','MGD''MIM','PDB','PFAM','PIR','PRINTS',
1124
- 'PROSITE','REBASE','AARHUS/GHENT-2DPAGE','SGD','STYGENE','SUBTILIST',
1125
- 'SWISS-2DPAGE','TIGR','TRANSFAC','TUBERCULIST','WORMPEP','YEPD','ZFIN']
1126
-
1127
- # Backup Bio::EMBLDB#dr as embl_dr
1128
- alias :embl_dr :dr
1129
-
1130
- # Bio::SPTR#dr
1131
- def dr(key = nil)
1132
- unless key
1133
- embl_dr
1134
- else
1135
- (embl_dr[key] or []).map {|x|
1136
- {'Accession' => x[0],
1137
- 'Version' => x[1],
1138
- ' ' => x[2],
1139
- 'Molecular Type' => x[3]}
1140
- }
1141
- end
1142
- end
1143
-
1144
-
1145
- # Bio::EMBLDB::Common#kw - Array
1146
- # #keywords -> Array
1147
- #
1148
- # KW Line; keyword (>=1)
1149
- # KW [Keyword;]+
1150
-
1151
-
1152
- # returns contents in the feature table.
1153
- #
1154
- # == Examples
1155
- #
1156
- # sp = Bio::SPTR.new(entry)
1157
- # ft = sp.ft
1158
- # ft.class #=> Hash
1159
- # ft.keys.each do |feature_key|
1160
- # ft[feature_key].each do |feature|
1161
- # feature['From'] #=> '1'
1162
- # feature['To'] #=> '21'
1163
- # feature['Description'] #=> ''
1164
- # feature['FTId'] #=> ''
1165
- # feature['diff'] #=> []
1166
- # feature['original'] #=> [feature_key, '1', '21', '', '']
1167
- # end
1168
- # end
1169
- #
1170
- # * Bio::SPTR#ft -> Hash
1171
- # {FEATURE_KEY => [{'From' => int, 'To' => int,
1172
- # 'Description' => aStr, 'FTId' => aStr,
1173
- # 'diff' => [original_residues, changed_residues],
1174
- # 'original' => aAry }],...}
1175
- #
1176
- # returns an Array of the information about the feature_name in the feature table.
1177
- # * Bio::SPTR#ft(feature_name) -> Array of Hash
1178
- # [{'From' => str, 'To' => str, 'Description' => str, 'FTId' => str},...]
1179
- #
1180
- # == FT Line; feature table data (>=0, optional)
1181
- #
1182
- # Col Data item
1183
- # ----- -----------------
1184
- # 1- 2 FT
1185
- # 6-13 Feature name
1186
- # 15-20 `FROM' endpoint
1187
- # 22-27 `TO' endpoint
1188
- # 35-75 Description (>=0 per key)
1189
- # ----- -----------------
1190
- #
1191
- # Note: 'FROM' and 'TO' endopoints are allowed to use non-numerial charactors
1192
- # including '<', '>' or '?'. (c.f. '<1', '?42')
1193
- #
1194
- # See also http://www.expasy.org/sprot/userman.html#FT_line
1195
- #
1196
- def ft(feature_key = nil)
1197
- return ft[feature_key] if feature_key
1198
- return @data['FT'] if @data['FT']
1199
-
1200
- table = []
1201
- begin
1202
- get('FT').split("\n").each do |line|
1203
- if line =~ /^FT \w/
1204
- feature = line.chomp.ljust(74)
1205
- table << [feature[ 5..12].strip, # Feature Name
1206
- feature[14..19].strip, # From
1207
- feature[21..26].strip, # To
1208
- feature[34..74].strip ] # Description
1209
- else
1210
- table.last << line.chomp.sub!(/^FT +/, '')
1211
- end
1212
- end
1213
-
1214
- # Joining Description lines
1215
- table = table.map { |feature|
1216
- ftid = feature.pop if feature.last =~ /FTId=/
1217
- if feature.size > 4
1218
- feature = [feature[0],
1219
- feature[1],
1220
- feature[2],
1221
- feature[3, feature.size - 3].join(" ")]
1222
- end
1223
- feature << if ftid then ftid else '' end
1224
- }
1225
-
1226
- hash = {}
1227
- table.each do |feature|
1228
- hash[feature[0]] = [] unless hash[feature[0]]
1229
- hash[feature[0]] << {
1230
- # Removing '<', '>' or '?' in FROM/TO endopoint.
1231
- 'From' => feature[1].sub(/\D/, '').to_i,
1232
- 'To' => feature[2].sub(/\D/, '').to_i,
1233
- 'Description' => feature[3],
1234
- 'FTId' => feature[4].to_s.sub(/\/FTId=/, '').sub(/\.$/, ''),
1235
- 'diff' => [],
1236
- 'original' => feature
1237
- }
1238
-
1239
- case feature[0]
1240
- when 'VARSPLIC', 'VARIANT', 'VAR_SEQ', 'CONFLICT'
1241
- case hash[feature[0]].last['Description']
1242
- when /(\w[\w ]*\w*) - ?> (\w[\w ]*\w*)/
1243
- original_res = $1
1244
- changed_res = $2
1245
- original_res = original_res.gsub(/ /,'').strip
1246
- chenged_res = changed_res.gsub(/ /,'').strip
1247
- when /Missing/i
1248
- original_res = seq.subseq(hash[feature[0]].last['From'],
1249
- hash[feature[0]].last['To'])
1250
- changed_res = ''
1251
- end
1252
- hash[feature[0]].last['diff'] = [original_res, chenged_res]
1253
- end
1254
- end
1255
- rescue
1256
- raise "Invalid FT Lines(#{$!}) in #{entry_id}:, \n'#{self.get('FT')}'\n"
1257
- end
1258
-
1259
- @data['FT'] = hash
1260
- end
1261
-
1262
-
1263
-
1264
- # returns a Hash of conteins in the SQ lines.
1265
- # * Bio::SPTRL#sq -> hsh
1266
- #
1267
- # returns a value of a key given in the SQ lines.
1268
- # * Bio::SPTRL#sq(key) -> int or str
1269
- # * Keys: ['MW', 'mw', 'molecular', 'weight', 'aalen', 'len', 'length',
1270
- # 'CRC64']
1271
- #
1272
- # === SQ Line; sequence header (1/entry)
1273
- # SQ SEQUENCE 233 AA; 25630 MW; 146A1B48A1475C86 CRC64;
1274
- # SQ SEQUENCE \d+ AA; \d+ MW; [0-9A-Z]+ CRC64;
1275
- #
1276
- # MW, Dalton unit.
1277
- # CRC64 (64-bit Cyclic Redundancy Check, ISO 3309).
1278
- def sq(key = nil)
1279
- unless @data['SQ']
1280
- if fetch('SQ') =~ /(\d+) AA\; (\d+) MW; (.+) CRC64;/
1281
- @data['SQ'] = { 'aalen' => $1.to_i, 'MW' => $2.to_i, 'CRC64' => $3 }
1282
- else
1283
- raise "Invalid SQ Line: \n'#{fetch('SQ')}'"
1284
- end
1285
- end
1286
-
1287
- if key
1288
- case key
1289
- when /mw/, /molecular/, /weight/
1290
- @data['SQ']['MW']
1291
- when /len/, /length/, /AA/
1292
- @data['SQ']['aalen']
1293
- else
1294
- @data['SQ'][key]
1295
- end
1296
- else
1297
- @data['SQ']
1298
- end
1299
- end
1300
-
1301
-
1302
- # returns a Bio::Sequence::AA of the amino acid sequence.
1303
- # * Bio::SPTR#seq -> Bio::Sequence::AA
1304
- #
1305
- # blank Line; sequence data (>=1)
1306
- def seq
1307
- unless @data['']
1308
- @data[''] = Sequence::AA.new( fetch('').gsub(/ |\d+/,'') )
1309
- end
1310
- return @data['']
1311
- end
1312
- alias aaseq seq
1313
-
1314
- end # class SPTR
1315
-
1316
- end # module Bio
1317
-
1318
-
1319
-
1320
- =begin
1321
-
1322
- = Bio::SPTR < Bio::DB
1323
-
1324
- Class for a entry in the SWISS-PROT/TrEMBL database.
1325
-
1326
- * ((<URL:http://www.ebi.ac.uk/swissprot/>))
1327
- * ((<URL:http://www.ebi.ac.uk/trembl/>))
1328
- * ((<URL:http://www.ebi.ac.uk/sprot/userman.html>))
1329
-
1330
-
1331
- --- Bio::SPTR.new(a_sp_entry)
1332
-
1333
- === ID line (Identification)
1334
-
1335
- --- Bio::SPTR#id_line -> {'ENTRY_NAME' => str, 'DATA_CLASS' => str,
1336
- 'MOLECULE_TYPE' => str, 'SEQUENCE_LENGTH' => int }
1337
- --- Bio::SPTR#id_line(key) -> str
1338
-
1339
- key = (ENTRY_NAME|MOLECULE_TYPE|DATA_CLASS|SEQUENCE_LENGTH)
1340
-
1341
- --- Bio::SPTR#entry_id -> str
1342
- --- Bio::SPTR#molecule -> str
1343
- --- Bio::SPTR#sequence_length -> int
1344
-
1345
-
1346
- === AC lines (Accession number)
1347
-
1348
- --- Bio::SPTR#ac -> ary
1349
- --- Bio::SPTR#accessions -> ary
1350
- --- Bio::SPTR#accession -> accessions.first
1351
-
1352
-
1353
- === GN line (Gene name(s))
1354
-
1355
- --- Bio::SPTR#gn -> [ary, ...] or [{:name => str, :synonyms => [], :loci => [], :orfs => []}]
1356
- --- Bio::SPTR#gene_name -> str
1357
- --- Bio::SPTR#gene_names -> [str] or [str]
1358
-
1359
-
1360
- === DT lines (Date)
1361
-
1362
- --- Bio::SPTR#dt -> {'created' => str, 'sequence' => str, 'annotation' => str}
1363
- --- Bio::SPTR#dt(key) -> str
1364
-
1365
- key := (created|annotation|sequence)
1366
-
1367
-
1368
- === DE lines (Description)
1369
-
1370
- --- Bio::SPTR#de -> str
1371
- #definition -> str
1372
-
1373
- --- Bio::SPTR#protein_name
1374
-
1375
- Returns the proposed official name of the protein
1376
-
1377
-
1378
- --- Bio::SPTR#synonyms
1379
-
1380
- Returns an array of synonyms (unofficial names)
1381
-
1382
- === KW lines (Keyword)
1383
-
1384
- --- Bio::SPTR#kw -> ary
1385
-
1386
- === OS lines (Organism species)
1387
-
1388
- --- Bio::SPTR#os -> [{'name' => str, 'os' => str}, ...]
1389
-
1390
- === OC lines (organism classification)
1391
-
1392
- --- Bio::SPTR#oc -> ary
1393
-
1394
- === OG line (Organella)
1395
-
1396
- --- Bio::SPTR#og -> ary
1397
-
1398
- === OX line (Organism taxonomy cross-reference)
1399
-
1400
- --- Bio::SPTR#ox -> {'NCBI_TaxID' => [], ...}
1401
-
1402
- === RN RC RP RX RA RT RL RG lines (Reference)
1403
-
1404
- --- Bio::SPTR#ref -> [{'RN' => int, 'RP' => str, 'RC' => str, 'RX' => str, ''RT' => str, 'RL' => str, 'RA' => str, 'RC' => str, 'RG' => str},...]
1405
-
1406
- === DR lines (Database cross-reference)
1407
-
1408
- --- Bio::SPTR#dr -> {'EMBL' => ary, ...}
1409
-
1410
- === FT lines (Feature table data)
1411
-
1412
- --- Bio::SPTR#ft -> hsh
1413
-
1414
- === SQ lines (Sequence header and data)
1415
-
1416
- --- Bio::SPTR#sq -> {'CRC64' => str, 'MW' => int, 'aalen' => int}
1417
- --- Bio::SPTR#sq(key) -> int or str
1418
-
1419
- key := (aalen|MW|CRC64)
1420
-
1421
- --- Bio::EMBL#seq -> Bio::Sequece::AA
1422
- #aaseq -> Bio::Sequece::AA
1423
-
1424
- =end
12
+ require "bio/db/embl/uniprotkb" unless const_defined?(:UniProtKB)
1425
13
 
1426
- # Content Occurrence in an entry
1427
- # ---- --------------------------- --------------------------------
1428
- # ID - identification (begins each entry; 1 per entry)
1429
- # AC - accession number(s) (>=1 per entry)
1430
- # DT - date (3 per entry)
1431
- # DE - description (>=1 per entry)
1432
- # GN - gene name(s) (>=0 per entry; optional)
1433
- # OS - organism species (>=1 per entry)
1434
- # OG - organelle (0 or 1 per entry; optional)
1435
- # OC - organism classification (>=1 per entry)
1436
- # OX - organism taxonomy x-ref (>=1 per entry)
1437
- # OH - Organism Host
1438
- # RN - reference number (>=1 per entry)
1439
- # RP - reference positions (>=1 per entry)
1440
- # RC - reference comment(s) (>=0 per entry; optional)
1441
- # RX - reference cross-reference(s) (>=0 per entry; optional)
1442
- # RA - reference author(s) (>=1 per entry)
1443
- # RT - reference title (>=0 per entry; optional)
1444
- # RL - reference location (>=1 per entry)
1445
- # RG - reference group(s)
1446
- # CC - comments or notes (>=0 per entry; optional)
1447
- # DR - database cross-references (>=0 per entry; optional)
1448
- # KW - keywords (>=1 per entry)
1449
- # FT - feature table data (>=0 per entry; optional)
1450
- # SQ - sequence header (1 per entry)
1451
- # - (blanks) The sequence data (>=1 per entry)
1452
- # // - termination line (ends each entry; 1 per entry)
1453
- # ---- --------------------------- --------------------------------
14
+ # Bio::SPTR is changed to an alias of Bio::UniProtKB.
15
+ # Please use Bio::UniProtKB.
16
+ # Bio::SPTR may be deprecated in the future.
17
+ SPTR = UniProtKB
1454
18
 
19
+ end #module Bio
1455
20