bio 1.4.3.0001 → 1.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (158) hide show
  1. checksums.yaml +7 -0
  2. data/.travis.yml +39 -33
  3. data/BSDL +22 -0
  4. data/COPYING +2 -2
  5. data/COPYING.ja +36 -36
  6. data/ChangeLog +2404 -1025
  7. data/KNOWN_ISSUES.rdoc +15 -55
  8. data/README.rdoc +17 -23
  9. data/RELEASE_NOTES.rdoc +246 -183
  10. data/Rakefile +3 -2
  11. data/bin/br_biofetch.rb +29 -5
  12. data/bioruby.gemspec +15 -32
  13. data/bioruby.gemspec.erb +10 -20
  14. data/doc/ChangeLog-1.4.3 +1478 -0
  15. data/doc/RELEASE_NOTES-1.4.3.rdoc +204 -0
  16. data/doc/Tutorial.rd +0 -6
  17. data/doc/Tutorial.rd.html +7 -12
  18. data/doc/Tutorial.rd.ja +960 -1064
  19. data/doc/Tutorial.rd.ja.html +977 -1067
  20. data/gemfiles/Gemfile.travis-jruby1.8 +2 -1
  21. data/gemfiles/Gemfile.travis-jruby1.9 +2 -4
  22. data/gemfiles/Gemfile.travis-rbx +13 -0
  23. data/gemfiles/Gemfile.travis-ruby1.8 +2 -1
  24. data/gemfiles/Gemfile.travis-ruby1.9 +2 -4
  25. data/gemfiles/Gemfile.travis-ruby2.2 +9 -0
  26. data/lib/bio.rb +10 -43
  27. data/lib/bio/alignment.rb +8 -14
  28. data/lib/bio/appl/blast.rb +1 -2
  29. data/lib/bio/appl/blast/format0.rb +18 -7
  30. data/lib/bio/appl/blast/remote.rb +0 -9
  31. data/lib/bio/appl/blast/report.rb +1 -1
  32. data/lib/bio/appl/clustalw/report.rb +3 -1
  33. data/lib/bio/appl/genscan/report.rb +1 -2
  34. data/lib/bio/appl/iprscan/report.rb +1 -2
  35. data/lib/bio/appl/meme/mast.rb +4 -4
  36. data/lib/bio/appl/meme/mast/report.rb +1 -1
  37. data/lib/bio/appl/paml/codeml.rb +2 -2
  38. data/lib/bio/appl/paml/codeml/report.rb +1 -0
  39. data/lib/bio/appl/paml/common.rb +1 -1
  40. data/lib/bio/appl/sosui/report.rb +1 -2
  41. data/lib/bio/command.rb +62 -2
  42. data/lib/bio/data/aa.rb +13 -31
  43. data/lib/bio/data/codontable.rb +1 -2
  44. data/lib/bio/db/biosql/biosql_to_biosequence.rb +1 -0
  45. data/lib/bio/db/biosql/sequence.rb +1 -1
  46. data/lib/bio/db/embl/common.rb +1 -1
  47. data/lib/bio/db/embl/embl.rb +5 -4
  48. data/lib/bio/db/embl/format_embl.rb +3 -3
  49. data/lib/bio/db/embl/sptr.rb +9 -1444
  50. data/lib/bio/db/embl/swissprot.rb +12 -29
  51. data/lib/bio/db/embl/trembl.rb +13 -30
  52. data/lib/bio/db/embl/uniprot.rb +12 -29
  53. data/lib/bio/db/embl/uniprotkb.rb +1455 -0
  54. data/lib/bio/db/fasta.rb +17 -0
  55. data/lib/bio/db/fasta/defline.rb +1 -3
  56. data/lib/bio/db/fastq.rb +1 -1
  57. data/lib/bio/db/genbank/ddbj.rb +9 -5
  58. data/lib/bio/db/genbank/refseq.rb +11 -3
  59. data/lib/bio/db/gff.rb +3 -4
  60. data/lib/bio/db/go.rb +5 -6
  61. data/lib/bio/db/kegg/module.rb +4 -5
  62. data/lib/bio/db/kegg/pathway.rb +4 -5
  63. data/lib/bio/db/kegg/reaction.rb +1 -1
  64. data/lib/bio/db/nexus.rb +3 -2
  65. data/lib/bio/db/pdb/pdb.rb +2 -2
  66. data/lib/bio/db/phyloxml/phyloxml_elements.rb +82 -59
  67. data/lib/bio/db/phyloxml/phyloxml_parser.rb +2 -2
  68. data/lib/bio/db/phyloxml/phyloxml_writer.rb +1 -2
  69. data/lib/bio/db/sanger_chromatogram/chromatogram.rb +1 -2
  70. data/lib/bio/db/transfac.rb +1 -1
  71. data/lib/bio/io/das.rb +40 -41
  72. data/lib/bio/io/fastacmd.rb +0 -16
  73. data/lib/bio/io/fetch.rb +111 -55
  74. data/lib/bio/io/flatfile/buffer.rb +4 -5
  75. data/lib/bio/io/hinv.rb +2 -3
  76. data/lib/bio/io/ncbirest.rb +43 -6
  77. data/lib/bio/io/pubmed.rb +76 -81
  78. data/lib/bio/io/togows.rb +33 -10
  79. data/lib/bio/map.rb +1 -1
  80. data/lib/bio/pathway.rb +1 -1
  81. data/lib/bio/sequence/compat.rb +1 -1
  82. data/lib/bio/sequence/na.rb +63 -12
  83. data/lib/bio/shell.rb +0 -2
  84. data/lib/bio/shell/core.rb +5 -6
  85. data/lib/bio/shell/interface.rb +3 -4
  86. data/lib/bio/shell/irb.rb +1 -2
  87. data/lib/bio/shell/plugin/entry.rb +2 -3
  88. data/lib/bio/shell/plugin/seq.rb +7 -6
  89. data/lib/bio/shell/setup.rb +1 -2
  90. data/lib/bio/tree.rb +2 -2
  91. data/lib/bio/util/contingency_table.rb +0 -2
  92. data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +2 -2
  93. data/lib/bio/util/sirna.rb +76 -16
  94. data/lib/bio/version.rb +8 -9
  95. data/sample/benchmark_clustalw_report.rb +47 -0
  96. data/sample/biofetch.rb +248 -151
  97. data/setup.rb +6 -7
  98. data/test/data/clustalw/example1-seqnos.aln +58 -0
  99. data/test/network/bio/appl/blast/test_remote.rb +1 -15
  100. data/test/network/bio/appl/test_blast.rb +0 -12
  101. data/test/network/bio/io/test_pubmed.rb +49 -0
  102. data/test/network/bio/io/test_togows.rb +0 -1
  103. data/test/network/bio/test_command.rb +65 -2
  104. data/test/unit/bio/appl/bl2seq/test_report.rb +0 -1
  105. data/test/unit/bio/appl/blast/test_report.rb +110 -48
  106. data/test/unit/bio/appl/clustalw/test_report.rb +67 -51
  107. data/test/unit/bio/appl/sim4/test_report.rb +46 -17
  108. data/test/unit/bio/appl/test_blast.rb +2 -2
  109. data/test/unit/bio/db/embl/test_embl.rb +0 -1
  110. data/test/unit/bio/db/embl/test_embl_rel89.rb +0 -1
  111. data/test/unit/bio/db/embl/{test_sptr.rb → test_uniprotkb.rb} +111 -115
  112. data/test/unit/bio/db/embl/{test_uniprot_new_part.rb → test_uniprotkb_new_part.rb} +11 -11
  113. data/test/unit/bio/db/genbank/test_genbank.rb +10 -4
  114. data/test/unit/bio/db/pdb/test_pdb.rb +14 -8
  115. data/test/unit/bio/db/test_fasta.rb +41 -1
  116. data/test/unit/bio/db/test_fastq.rb +14 -4
  117. data/test/unit/bio/db/test_gff.rb +2 -2
  118. data/test/unit/bio/db/test_phyloxml.rb +30 -30
  119. data/test/unit/bio/db/test_phyloxml_writer.rb +2 -2
  120. data/test/unit/bio/io/flatfile/test_autodetection.rb +1 -2
  121. data/test/unit/bio/io/flatfile/test_buffer.rb +7 -1
  122. data/test/unit/bio/io/flatfile/test_splitter.rb +1 -1
  123. data/test/unit/bio/io/test_togows.rb +3 -2
  124. data/test/unit/bio/sequence/test_dblink.rb +1 -1
  125. data/test/unit/bio/sequence/test_na.rb +3 -1
  126. data/test/unit/bio/test_alignment.rb +1 -2
  127. data/test/unit/bio/test_command.rb +5 -4
  128. data/test/unit/bio/test_db.rb +4 -2
  129. data/test/unit/bio/test_pathway.rb +25 -10
  130. data/test/unit/bio/util/test_sirna.rb +22 -22
  131. metadata +656 -1430
  132. data/doc/KEGG_API.rd +0 -1843
  133. data/doc/KEGG_API.rd.ja +0 -1834
  134. data/extconf.rb +0 -2
  135. data/lib/bio/appl/blast/ddbj.rb +0 -131
  136. data/lib/bio/db/kegg/taxonomy.rb +0 -280
  137. data/lib/bio/io/dbget.rb +0 -194
  138. data/lib/bio/io/ddbjrest.rb +0 -344
  139. data/lib/bio/io/ddbjxml.rb +0 -458
  140. data/lib/bio/io/ebisoap.rb +0 -158
  141. data/lib/bio/io/ensembl.rb +0 -229
  142. data/lib/bio/io/higet.rb +0 -73
  143. data/lib/bio/io/keggapi.rb +0 -363
  144. data/lib/bio/io/ncbisoap.rb +0 -156
  145. data/lib/bio/io/soapwsdl.rb +0 -119
  146. data/lib/bio/shell/plugin/keggapi.rb +0 -181
  147. data/lib/bio/shell/plugin/soap.rb +0 -87
  148. data/sample/dbget +0 -37
  149. data/sample/demo_ddbjxml.rb +0 -212
  150. data/sample/demo_kegg_taxonomy.rb +0 -92
  151. data/sample/demo_keggapi.rb +0 -502
  152. data/sample/psortplot_html.rb +0 -214
  153. data/test/network/bio/io/test_ddbjrest.rb +0 -47
  154. data/test/network/bio/io/test_ensembl.rb +0 -230
  155. data/test/network/bio/io/test_soapwsdl.rb +0 -53
  156. data/test/unit/bio/io/test_ddbjxml.rb +0 -81
  157. data/test/unit/bio/io/test_ensembl.rb +0 -111
  158. data/test/unit/bio/io/test_soapwsdl.rb +0 -33
@@ -5,7 +5,6 @@
5
5
  # Mitsuteru C. Nakao <n@bioruby.org>
6
6
  # License:: The Ruby License
7
7
  #
8
- # $Id:$
9
8
  #
10
9
  # == Example
11
10
  #
@@ -60,7 +59,7 @@ module Bio
60
59
  if /NUMBER OF TM HELIX = (\d+)/ =~ line
61
60
  @tms = $1
62
61
  elsif /TM (\d+) +(\d+)- *(\d+) (\w+) +(\w+)/ =~ line
63
- tmh = $1.to_i
62
+ #tmh = $1.to_i
64
63
  range = Range.new($2.to_i, $3.to_i)
65
64
  grade = $4
66
65
  seq = $5
@@ -6,7 +6,6 @@
6
6
  # Toshiaki Katayama <k@bioruby.org>
7
7
  # License:: The Ruby License
8
8
  #
9
- # $Id:$
10
9
  #
11
10
 
12
11
  require 'open3'
@@ -856,7 +855,7 @@ module Command
856
855
  end.join('&')
857
856
  end
858
857
  when String
859
- data = URI.escape(params.strip)
858
+ raise TypeError, 'Bio::Command.make_cgi_params no longer accepts a single String as a form'
860
859
  end
861
860
  return data
862
861
  end
@@ -882,6 +881,67 @@ module Command
882
881
  return result
883
882
  end
884
883
 
884
+ # Same as:
885
+ # http = Net::HTTP.new(...); http.post(path, data, header)
886
+ # and
887
+ # it uses proxy if an environment variable (same as OpenURI.open_uri)
888
+ # is set.
889
+ # In addition, +header+ can be set.
890
+ # (Default Content-Type is application/octet-stream.
891
+ # Content-Length is automatically set by default.)
892
+ # +uri+ must be a URI object, +params+ must be a hash, and
893
+ # +header+ must be a hash.
894
+ #
895
+ # ---
896
+ # *Arguments*:
897
+ # * (required) _http_: Net::HTTP object or compatible object
898
+ # * (required) _path_: String
899
+ # * (required) _data_: String containing data
900
+ # * (optional) _header_: Hash containing header strings
901
+ # *Returns*:: (same as Net::HTTP::post)
902
+ def http_post(http, path, data, header = {})
903
+ hash = {
904
+ 'Content-Type' => 'application/octet-stream',
905
+ 'Content-Length' => data.length.to_s
906
+ }
907
+ hash.update(header)
908
+
909
+ http.post(path, data, hash)
910
+ end
911
+
912
+ # Same as:
913
+ # Net::HTTP.post(uri, params)
914
+ # and
915
+ # it uses proxy if an environment variable (same as OpenURI.open_uri)
916
+ # is set.
917
+ # In addition, +header+ can be set.
918
+ # (Default Content-Type is application/octet-stream.
919
+ # Content-Length is automatically set by default.)
920
+ # +uri+ must be a URI object, +data+ must be a String, and
921
+ # +header+ must be a hash.
922
+ #
923
+ # ---
924
+ # *Arguments*:
925
+ # * (required) _uri_: URI object or String
926
+ # * (optional) _data_: String containing data
927
+ # * (optional) _header_: Hash containing header strings
928
+ # *Returns*:: (same as Net::HTTP::post)
929
+ def post(uri, data, header = {})
930
+ unless uri.is_a?(URI)
931
+ uri = URI.parse(uri)
932
+ end
933
+
934
+ hash = {
935
+ 'Content-Type' => 'application/octet-stream',
936
+ 'Content-Length' => data.length.to_s
937
+ }
938
+ hash.update(header)
939
+
940
+ start_http(uri.host, uri.port) do |http|
941
+ http.post(uri.path, data, hash)
942
+ end
943
+ end
944
+
885
945
  end # module Command
886
946
  end # module Bio
887
947
 
@@ -108,23 +108,21 @@ class AminoAcid
108
108
  }
109
109
 
110
110
  def weight(x = nil)
111
- if x
112
- if x.length > 1
113
- total = 0.0
114
- x.each_byte do |byte|
115
- aa = byte.chr.upcase
116
- if WEIGHT[aa]
117
- total += WEIGHT[aa]
118
- else
119
- raise "Error: invalid amino acid '#{aa}'"
120
- end
111
+ return WEIGHT unless x
112
+
113
+ if x.length > 1
114
+ total = 0.0
115
+ x.each_byte do |byte|
116
+ aa = byte.chr.upcase
117
+ if WEIGHT[aa]
118
+ total += WEIGHT[aa]
119
+ else
120
+ raise "Error: invalid amino acid '#{aa}'"
121
121
  end
122
- total -= NucleicAcid.weight[:water] * (x.length - 1)
123
- else
124
- WEIGHT[x]
125
122
  end
123
+ total -= NucleicAcid.weight[:water] * (x.length - 1)
126
124
  else
127
- WEIGHT
125
+ WEIGHT[x]
128
126
  end
129
127
  end
130
128
 
@@ -237,11 +235,7 @@ class AminoAcid
237
235
 
238
236
 
239
237
  def reverse
240
- hash = Hash.new
241
- NAMES.each do |k, v|
242
- hash[v] = k
243
- end
244
- hash
238
+ @reverse ||= NAMES.invert
245
239
  end
246
240
 
247
241
  end
@@ -254,18 +248,6 @@ class AminoAcid
254
248
  extend Data
255
249
 
256
250
 
257
- private
258
-
259
-
260
- # override when used as an instance method to improve performance
261
- alias orig_reverse reverse
262
- def reverse
263
- unless @reverse
264
- @reverse = orig_reverse
265
- end
266
- @reverse
267
- end
268
-
269
251
  end
270
252
 
271
253
  end # module Bio
@@ -5,7 +5,6 @@
5
5
  # Toshiaki Katayama <k@bioruby.org>
6
6
  # License:: The Ruby License
7
7
  #
8
- # $Id:$
9
8
  #
10
9
  # == Data source
11
10
  #
@@ -127,7 +126,7 @@ class CodonTable
127
126
  # table.revtrans("A") # => ["gcg", "gct", "gca", "gcc"]
128
127
  #
129
128
  def revtrans(aa)
130
- unless @reverse
129
+ unless (defined? @reverse) && @reverse
131
130
  @reverse = {}
132
131
  @table.each do |k, v|
133
132
  @reverse[v] ||= []
@@ -11,6 +11,7 @@
11
11
 
12
12
  require 'bio/sequence'
13
13
  require 'bio/sequence/adapter'
14
+ require 'bio/sequence/dblink'
14
15
 
15
16
  # Internal use only. Normal users should not use this module.
16
17
  #
@@ -371,7 +371,7 @@ module Bio
371
371
  #probably would be better to d a class refrence to collect these informations
372
372
  @entry.bioentry_references.collect do |bio_ref|
373
373
  hash = Hash.new
374
- hash['authors'] = bio_ref.reference.authors.gsub(/\.\s/, "\.\s\|").split(/\|/)
374
+ hash['authors'] = bio_ref.reference.authors.gsub(/\.\s/, "\.\s\|").split(/\|/) if (bio_ref.reference and bio_ref.reference.authors)
375
375
 
376
376
  hash['sequence_position'] = "#{bio_ref.start_pos}-#{bio_ref.end_pos}" if (bio_ref.start_pos and bio_ref.end_pos)
377
377
  hash['title'] = bio_ref.reference.title
@@ -149,7 +149,7 @@ module Common
149
149
  unless @data['OS']
150
150
  os = Array.new
151
151
  fetch('OS').split(/, and|, /).each do |tmp|
152
- if tmp =~ /([A-Z][a-z]* *[\w\d \:\'\+\-]+[\w\d])/
152
+ if tmp =~ /([A-Z][a-z]* *[\w \:\'\+\-]+\w)/
153
153
  org = $1
154
154
  tmp =~ /(\(.+\))/
155
155
  os.push({'name' => $1, 'os' => org})
@@ -267,9 +267,10 @@ class EMBL < EMBLDB
267
267
  unless @data['OS']
268
268
  os = Array.new
269
269
  tmp = fetch('OS')
270
- if /([A-Z][a-z]* *[\w\d \:\'\+\-]+[\w\d]) *\(([\w\d ]+)\)\s*\z/ =~ tmp
270
+ if /([A-Z][a-z]* *[\w \:\'\+\-]+\w) *\(([\w ]+)\)\s*\z/ =~ tmp
271
271
  org = $1
272
- os.push({'name' => $2, 'os' => $1})
272
+ name = $2
273
+ os.push({'name' => name, 'os' => org})
273
274
  else
274
275
  os.push({'name' => nil, 'os' => tmp})
275
276
  end
@@ -340,7 +341,7 @@ class EMBL < EMBLDB
340
341
  @orig['FT'].each_line do |line|
341
342
  next if line =~ /^FEATURES/
342
343
 
343
- head = line[0,20].strip # feature key (source, CDS, ...)
344
+ #head = line[0,20].strip # feature key (source, CDS, ...)
344
345
  body = line[20,60].chomp # feature value (position, /qualifier=)
345
346
  if line =~ /^FT {3}(\S+)/
346
347
  ary.push([ $1, body ]) # [ feature, position, /q="data", ... ]
@@ -491,7 +492,7 @@ class EMBL < EMBLDB
491
492
  def parse_release_version(str)
492
493
  return [ nil, nil ] unless str
493
494
  a = str.split(/[\(\,\)]/)
494
- dstr = a.shift
495
+ a.shift #date string e.g. "14-OCT-2006"
495
496
  rel = nil
496
497
  ver = nil
497
498
  a.each do |x|
@@ -126,9 +126,9 @@ module Bio::Sequence::Format::NucFormatter
126
126
  def mol_type_embl
127
127
  if mt = molecule_type then
128
128
  mt
129
- elsif f = (features or []).find { |f| f.feature == 'source' } and
130
- q = f.qualifiers.find { |q| q.qualifier == 'mol_type' } then
131
- q.value
129
+ elsif fe = (features or []).find { |f| f.feature == 'source' } and
130
+ qu = fe.qualifiers.find { |q| q.qualifier == 'mol_type' } then
131
+ qu.value
132
132
  else
133
133
  'NA'
134
134
  end
@@ -1,1455 +1,20 @@
1
1
  #
2
- # = bio/db/embl/sptr.rb - UniProt/SwissProt and TrEMBL database class
2
+ # = bio/db/embl/sptr.rb - Bio::SPTR is an alias of Bio::UniProtKB
3
3
  #
4
- # Copyright:: Copyright (C) 2001-2006 Mitsuteru C. Nakao <n@bioruby.org>
4
+ # Copyright:: Copyright (C) 2013 BioRuby Project
5
5
  # License:: The Ruby License
6
6
  #
7
- # $Id:$
8
- #
9
- # == Description
10
- #
11
- # Shared methods for UniProtKB/SwissProt and TrEMBL classes.
12
- #
13
- # See the SWISS-PROT document file SPECLIST.TXT or UniProtKB/SwissProt
14
- # user manual.
15
- #
16
- # == Examples
17
- #
18
- # str = File.read("p53_human.swiss")
19
- # obj = Bio::SPTR.new(str)
20
- # obj.entry_id #=> "P53_HUMAN"
21
- #
22
- # == References
23
- #
24
- # * Swiss-Prot Protein knowledgebase. TrEMBL Computer-annotated supplement
25
- # to Swiss-Prot
26
- # http://au.expasy.org/sprot/
27
- #
28
- # * UniProt
29
- # http://uniprot.org/
30
- #
31
- # * The UniProtKB/SwissProt/TrEMBL User Manual
32
- # http://www.expasy.org/sprot/userman.html
33
- #
34
-
35
7
 
36
- require 'bio/db'
37
- require 'bio/db/embl/common'
8
+ warn "Bio::SPTR is changed to an alias of Bio::UniProtKB. Please use Bio::UniProtKB. Bio::SPTR may be deprecated in the future." if $VERBOSE
38
9
 
39
10
  module Bio
40
11
 
41
- # Parser class for UniProtKB/SwissProt and TrEMBL database entry.
42
- class SPTR < EMBLDB
43
- include Bio::EMBLDB::Common
44
-
45
- @@entry_regrexp = /[A-Z0-9]{1,4}_[A-Z0-9]{1,5}/
46
- @@data_class = ["STANDARD", "PRELIMINARY"]
47
-
48
- # returns a Hash of the ID line.
49
- #
50
- # returns a content (Int or String) of the ID line by a given key.
51
- # Hash keys: ['ENTRY_NAME', 'DATA_CLASS', 'MODECULE_TYPE', 'SEQUENCE_LENGTH']
52
- #
53
- # === ID Line (since UniProtKB release 9.0 of 31-Oct-2006)
54
- # ID P53_HUMAN Reviewed; 393 AA.
55
- # #"ID #{ENTRY_NAME} #{DATA_CLASS}; #{SEQUENCE_LENGTH}."
56
- #
57
- # === Examples
58
- # obj.id_line #=> {"ENTRY_NAME"=>"P53_HUMAN", "DATA_CLASS"=>"Reviewed",
59
- # "SEQUENCE_LENGTH"=>393, "MOLECULE_TYPE"=>nil}
60
- #
61
- # obj.id_line('ENTRY_NAME') #=> "P53_HUMAN"
62
- #
63
- #
64
- # === ID Line (older style)
65
- # ID P53_HUMAN STANDARD; PRT; 393 AA.
66
- # #"ID #{ENTRY_NAME} #{DATA_CLASS}; #{MOLECULE_TYPE}; #{SEQUENCE_LENGTH}."
67
- #
68
- # === Examples
69
- # obj.id_line #=> {"ENTRY_NAME"=>"P53_HUMAN", "DATA_CLASS"=>"STANDARD",
70
- # "SEQUENCE_LENGTH"=>393, "MOLECULE_TYPE"=>"PRT"}
71
- #
72
- # obj.id_line('ENTRY_NAME') #=> "P53_HUMAN"
73
- #
74
- def id_line(key = nil)
75
- return id_line[key] if key
76
- return @data['ID'] if @data['ID']
77
-
78
- part = @orig['ID'].split(/ +/)
79
- if part[4].to_s.chomp == 'AA.' then
80
- # after UniProtKB release 9.0 of 31-Oct-2006
81
- # (http://www.uniprot.org/docs/sp_news.htm)
82
- molecule_type = nil
83
- sequence_length = part[3].to_i
84
- else
85
- molecule_type = part[3].sub(/;/,'')
86
- sequence_length = part[4].to_i
87
- end
88
- @data['ID'] = {
89
- 'ENTRY_NAME' => part[1],
90
- 'DATA_CLASS' => part[2].sub(/;/,''),
91
- 'MOLECULE_TYPE' => molecule_type,
92
- 'SEQUENCE_LENGTH' => sequence_length
93
- }
94
- end
95
-
96
-
97
- # returns a ENTRY_NAME in the ID line.
98
- #
99
- def entry_id
100
- id_line('ENTRY_NAME')
101
- end
102
- alias entry_name entry_id
103
- alias entry entry_id
104
-
105
-
106
- # returns a MOLECULE_TYPE in the ID line.
107
- #
108
- # A short-cut for Bio::SPTR#id_line('MOLECULE_TYPE').
109
- def molecule
110
- id_line('MOLECULE_TYPE')
111
- end
112
- alias molecule_type molecule
113
-
114
-
115
- # returns a SEQUENCE_LENGTH in the ID line.
116
- #
117
- # A short-cut for Bio::SPTR#id_line('SEQUENCE_LENGHT').
118
- def sequence_length
119
- id_line('SEQUENCE_LENGTH')
120
- end
121
- alias aalen sequence_length
122
-
123
-
124
- # Bio::EMBLDB::Common#ac -> ary
125
- # #accessions -> ary
126
- # #accession -> String (accessions.first)
127
- @@ac_regrexp = /[OPQ][0-9][A-Z0-9]{3}[0-9]/
128
-
129
-
130
-
131
- # returns a Hash of information in the DT lines.
132
- # hash keys:
133
- # ['created', 'sequence', 'annotation']
134
- #--
135
- # also Symbols acceptable (ASAP):
136
- # [:created, :sequence, :annotation]
137
- #++
138
- #
139
- # Since UniProtKB release 7.0 of 07-Feb-2006, the DT line format is
140
- # changed, and the word "annotation" is no longer used in DT lines.
141
- # Despite the change, the word "annotation" is still used for keeping
142
- # compatibility.
143
- #
144
- # returns a String of information in the DT lines by a given key.
145
- #
146
- # === DT Line; date (3/entry)
147
- # DT DD-MMM-YYY (integrated into UniProtKB/XXXXX.)
148
- # DT DD-MMM-YYY (sequence version NN)
149
- # DT DD-MMM-YYY (entry version NN)
150
- #
151
- # The format have been changed in UniProtKB release 7.0 of 07-Feb-2006.
152
- # Below is the older format.
153
- #
154
- # === Old format of DT Line; date (3/entry)
155
- # DT DD-MMM-YYY (rel. NN, Created)
156
- # DT DD-MMM-YYY (rel. NN, Last sequence update)
157
- # DT DD-MMM-YYY (rel. NN, Last annotation update)
158
- def dt(key = nil)
159
- return dt[key] if key
160
- return @data['DT'] if @data['DT']
161
-
162
- part = self.get('DT').split(/\n/)
163
- @data['DT'] = {
164
- 'created' => part[0].sub(/\w{2} /,'').strip,
165
- 'sequence' => part[1].sub(/\w{2} /,'').strip,
166
- 'annotation' => part[2].sub(/\w{2} /,'').strip
167
- }
168
- end
169
-
170
-
171
- # (private) parses DE line (description lines)
172
- # since UniProtKB release 14.0 of 22-Jul-2008
173
- #
174
- # Return array containing array.
175
- #
176
- # http://www.uniprot.org/docs/sp_news.htm
177
- def parse_DE_line_rel14(str)
178
- # Retruns if it is not the new format since Rel.14
179
- return nil unless /^DE (RecName|AltName|SubName)\: / =~ str
180
- ret = []
181
- cur = nil
182
- str.each_line do |line|
183
- case line
184
- when /^DE (Includes|Contains)\: *$/
185
- cur = [ $1 ]
186
- ret.push cur
187
- cur = nil
188
- #subcat_and_desc = nil
189
- next
190
- when /^DE *(RecName|AltName|SubName)\: +(.*)/
191
- category = $1
192
- subcat_and_desc = $2
193
- cur = [ category ]
194
- ret.push cur
195
- when /^DE *(Flags)\: +(.*)/
196
- category = $1
197
- desc = $2
198
- flags = desc.strip.split(/\s*\;\s*/) || []
199
- cur = [ category, flags ]
200
- ret.push cur
201
- cur = nil
202
- #subcat_and_desc = nil
203
- next
204
- when /^DE *(.*)/
205
- subcat_and_desc = $1
206
- else
207
- warn "Warning: skipped DE line in unknown format: #{line.inspect}"
208
- #subcat_and_desc = nil
209
- next
210
- end
211
- case subcat_and_desc
212
- when nil
213
- # does nothing
214
- when /\A([^\=]+)\=(.*)/
215
- subcat = $1
216
- desc = $2
217
- desc.sub!(/\;\s*\z/, '')
218
- unless cur
219
- warn "Warning: unknown category in DE line: #{line.inspect}"
220
- cur = [ '' ]
221
- ret.push cur
222
- end
223
- cur.push [ subcat, desc ]
224
- else
225
- warn "Warning: skipped DE line description in unknown format: #{line.inspect}"
226
- end
227
- end
228
- ret
229
- end
230
- private :parse_DE_line_rel14
231
-
232
- # returns the proposed official name of the protein.
233
- # Returns a String.
234
- #
235
- # Since UniProtKB release 14.0 of 22-Jul-2008, the DE line format have
236
- # been changed. The method returns the full name which is taken from
237
- # "RecName: Full=" or "SubName: Full=" line normally in the beginning of
238
- # the DE lines.
239
- # Unlike parser for old format, no special treatments for fragment or
240
- # precursor.
241
- #
242
- # For old format, the method parses the DE lines and returns the protein
243
- # name as a String.
244
- #
245
- # === DE Line; description (>=1)
246
- # "DE #{OFFICIAL_NAME} (#{SYNONYM})"
247
- # "DE #{OFFICIAL_NAME} (#{SYNONYM}) [CONTEINS: #1; #2]."
248
- # OFFICIAL_NAME 1/entry
249
- # SYNONYM >=0
250
- # CONTEINS >=0
251
- def protein_name
252
- @data['DE'] ||= parse_DE_line_rel14(get('DE'))
253
- parsed_de_line = @data['DE']
254
- if parsed_de_line then
255
- # since UniProtKB release 14.0 of 22-Jul-2008
256
- name = nil
257
- parsed_de_line.each do |a|
258
- case a[0]
259
- when 'RecName', 'SubName'
260
- if name_pair = a[1..-1].find { |b| b[0] == 'Full' } then
261
- name = name_pair[1]
262
- break
263
- end
264
- end
265
- end
266
- name = name.to_s
267
- else
268
- # old format (before Rel. 13.x)
269
- name = ""
270
- if de_line = fetch('DE') then
271
- str = de_line[/^[^\[]*/] # everything preceding the first [ (the "contains" part)
272
- name = str[/^[^(]*/].strip
273
- name << ' (Fragment)' if str =~ /fragment/i
274
- end
275
- end
276
- return name
277
- end
278
-
279
-
280
- # returns synonyms (unofficial and/or alternative names).
281
- # Returns an Array containing String objects.
282
- #
283
- # Since UniProtKB release 14.0 of 22-Jul-2008, the DE line format have
284
- # been changed. The method returns the full or short names which are
285
- # taken from "RecName: Short=", "RecName: EC=", and AltName lines,
286
- # except after "Contains:" or "Includes:".
287
- # For keeping compatibility with old format parser, "RecName: EC=N.N.N.N"
288
- # is reported as "EC N.N.N.N".
289
- # In addition, to prevent confusion, "Allergen=" and "CD_antigen="
290
- # prefixes are added for the corresponding fields.
291
- #
292
- # For old format, the method parses the DE lines and returns synonyms.
293
- # synonyms are each placed in () following the official name on the DE line.
294
- def synonyms
295
- ary = Array.new
296
- @data['DE'] ||= parse_DE_line_rel14(get('DE'))
297
- parsed_de_line = @data['DE']
298
- if parsed_de_line then
299
- # since UniProtKB release 14.0 of 22-Jul-2008
300
- parsed_de_line.each do |a|
301
- case a[0]
302
- when 'Includes', 'Contains'
303
- break #the each loop
304
- when 'RecName', 'SubName', 'AltName'
305
- a[1..-1].each do |b|
306
- if name = b[1] and b[1] != self.protein_name then
307
- case b[0]
308
- when 'EC'
309
- name = "EC " + b[1]
310
- when 'Allergen', 'CD_antigen'
311
- name = b[0] + '=' + b[1]
312
- else
313
- name = b[1]
314
- end
315
- ary.push name
316
- end
317
- end
318
- end #case a[0]
319
- end #parsed_de_line.each
320
- else
321
- # old format (before Rel. 13.x)
322
- if de_line = fetch('DE') then
323
- line = de_line.sub(/\[.*\]/,'') # ignore stuff between [ and ]. That's the "contains" part
324
- line.scan(/\([^)]+/) do |synonym|
325
- unless synonym =~ /fragment/i then
326
- ary << synonym[1..-1].strip # index to remove the leading (
327
- end
328
- end
329
- end
330
- end
331
- return ary
332
- end
333
-
334
-
335
- # returns gene names in the GN line.
336
- #
337
- # New UniProt/SwissProt format:
338
- # * Bio::SPTR#gn -> [ <gene record>* ]
339
- # where <gene record> is:
340
- # { :name => '...',
341
- # :synonyms => [ 's1', 's2', ... ],
342
- # :loci => [ 'l1', 'l2', ... ],
343
- # :orfs => [ 'o1', 'o2', ... ]
344
- # }
345
- #
346
- # Old format:
347
- # * Bio::SPTR#gn -> Array # AND
348
- # * Bio::SPTR#gn[0] -> Array # OR
349
- #
350
- # === GN Line: Gene name(s) (>=0, optional)
351
- def gn
352
- unless @data['GN']
353
- case fetch('GN')
354
- when /Name=/,/ORFNames=/,/OrderedLocusNames=/,/Synonyms=/
355
- @data['GN'] = gn_uniprot_parser
356
- else
357
- @data['GN'] = gn_old_parser
358
- end
359
- end
360
- @data['GN']
361
- end
362
-
363
-
364
- # returns contents in the old style GN line.
365
- # === GN Line: Gene name(s) (>=0, optional)
366
- # GN HNS OR DRDX OR OSMZ OR BGLY.
367
- # GN CECA1 AND CECA2.
368
- # GN CECA1 AND (HOGE OR FUGA).
369
- #
370
- # GN NAME1 [(AND|OR) NAME]+.
371
- #
372
- # Bio::SPTR#gn -> Array # AND
373
- # #gn[0] -> Array # OR
374
- # #gene_names -> Array
375
- def gn_old_parser
376
- names = Array.new
377
- if get('GN').size > 0
378
- names = fetch('GN').sub(/\.$/,'').split(/ AND /)
379
- names.map! { |synonyms|
380
- synonyms = synonyms.gsub(/\(|\)/,'').split(/ OR /).map { |e|
381
- e.strip
382
- }
383
- }
384
- end
385
- @data['GN'] = names
386
- end
387
- private :gn_old_parser
388
-
389
- # returns contents in the structured GN line.
390
- # The new format of the GN line is:
391
- # GN Name=; Synonyms=[, ...]; OrderedLocusNames=[, ...];
392
- # GN ORFNames=[, ...];
393
- #
394
- # * Bio::SPTR#gn -> [ <gene record>* ]
395
- # where <gene record> is:
396
- # { :name => '...',
397
- # :synonyms => [ 's1', 's2', ... ],
398
- # :loci => [ 'l1', 'l2', ... ],
399
- # :orfs => [ 'o1', 'o2', ... ]
400
- # }
401
- def gn_uniprot_parser
402
- @data['GN'] = Array.new
403
- gn_line = fetch('GN').strip
404
- records = gn_line.split(/\s*and\s*/)
405
- records.each do |record|
406
- gene_hash = {:name => '', :synonyms => [], :loci => [], :orfs => []}
407
- record.each_line(';') do |element|
408
- case element
409
- when /Name=/ then
410
- gene_hash[:name] = $'[0..-2]
411
- when /Synonyms=/ then
412
- gene_hash[:synonyms] = $'[0..-2].split(/\s*,\s*/)
413
- when /OrderedLocusNames=/ then
414
- gene_hash[:loci] = $'[0..-2].split(/\s*,\s*/)
415
- when /ORFNames=/ then
416
- gene_hash[:orfs] = $'[0..-2].split(/\s*,\s*/)
417
- end
418
- end
419
- @data['GN'] << gene_hash
420
- end
421
- return @data['GN']
422
- end
423
- private :gn_uniprot_parser
424
-
425
-
426
- # returns a Array of gene names in the GN line.
427
- def gene_names
428
- gn # set @data['GN'] if it hasn't been already done
429
- if @data['GN'].first.class == Hash then
430
- @data['GN'].collect { |element| element[:name] }
431
- else
432
- @data['GN'].first
433
- end
434
- end
435
-
436
-
437
- # returns a String of the first gene name in the GN line.
438
- def gene_name
439
- gene_names.first
440
- end
441
-
442
-
443
- # returns a Array of Hashs or a String of the OS line when a key given.
444
- # * Bio::EMBLDB#os -> Array
445
- # [{'name' => '(Human)', 'os' => 'Homo sapiens'},
446
- # {'name' => '(Rat)', 'os' => 'Rattus norveticus'}]
447
- # * Bio::EPTR#os[0] -> Hash
448
- # {'name' => "(Human)", 'os' => 'Homo sapiens'}
449
- # * Bio::SPTR#os[0]['name'] -> "(Human)"
450
- # * Bio::EPTR#os(0) -> "Homo sapiens (Human)"
451
- #
452
- # === OS Line; organism species (>=1)
453
- # OS Genus species (name).
454
- # OS Genus species (name0) (name1).
455
- # OS Genus species (name0) (name1).
456
- # OS Genus species (name0), G s0 (name0), and G s (name0) (name1).
457
- # OS Homo sapiens (Human), and Rarrus norveticus (Rat)
458
- # OS Hippotis sp. Clark and Watts 825.
459
- # OS unknown cyperaceous sp.
460
- def os(num = nil)
461
- unless @data['OS']
462
- os = Array.new
463
- fetch('OS').split(/, and|, /).each do |tmp|
464
- if tmp =~ /(\w+ *[\w\d \:\'\+\-\.]+[\w\d\.])/
465
- org = $1
466
- tmp =~ /(\(.+\))/
467
- os.push({'name' => $1, 'os' => org})
468
- else
469
- raise "Error: OS Line. #{$!}\n#{fetch('OS')}\n"
470
- end
471
- end
472
- @data['OS'] = os
473
- end
474
-
475
- if num
476
- # EX. "Trifolium repens (white clover)"
477
- return "#{@data['OS'][num]['os']} #{@data['OS'][num]['name']}"
478
- else
479
- return @data['OS']
480
- end
481
- end
482
-
483
-
484
- # Bio::EMBLDB::Common#og -> Array
485
- # OG Line; organella (0 or 1/entry)
486
- # ["MITOCHONDRION", "CHLOROPLAST", "Cyanelle", "Plasmid"]
487
- # or a plasmid name (e.g. "Plasmid pBR322").
488
-
489
-
490
- # Bio::EMBLDB::Common#oc -> Array
491
- # OC Line; organism classification (>=1)
492
- # "OC Eukaryota; Alveolata; Apicomplexa; Piroplasmida; Theileriidae;"
493
- # "OC Theileria."
494
-
495
-
496
-
497
- # returns a Hash of oraganism taxonomy cross-references.
498
- # * Bio::SPTR#ox -> Hash
499
- # {'NCBI_TaxID' => ['1234','2345','3456','4567'], ...}
500
- #
501
- # === OX Line; organism taxonomy cross-reference (>=1 per entry)
502
- # OX NCBI_TaxID=1234;
503
- # OX NCBI_TaxID=1234, 2345, 3456, 4567;
504
- def ox
505
- unless @data['OX']
506
- tmp = fetch('OX').sub(/\.$/,'').split(/;/).map { |e| e.strip }
507
- hsh = Hash.new
508
- tmp.each do |e|
509
- db,refs = e.split(/=/)
510
- hsh[db] = refs.split(/, */)
511
- end
512
- @data['OX'] = hsh
513
- end
514
- return @data['OX']
515
- end
516
-
517
- # === The OH Line;
518
- #
519
- # OH NCBI_TaxID=TaxID; HostName.
520
- # http://br.expasy.org/sprot/userman.html#OH_line
521
- def oh
522
- unless @data['OH']
523
- @data['OH'] = fetch('OH').split("\. ").map {|x|
524
- if x =~ /NCBI_TaxID=(\d+);/
525
- taxid = $1
526
- else
527
- raise ArgumentError, ["Error: Invalid OH line format (#{self.entry_id}):",
528
- $!, "\n", get('OH'), "\n"].join
529
-
530
- end
531
- if x =~ /NCBI_TaxID=\d+; (.+)/
532
- host_name = $1
533
- host_name.sub!(/\.$/, '')
534
- else
535
- host_name = nil
536
- end
537
- {'NCBI_TaxID' => taxid, 'HostName' => host_name}
538
- }
539
- end
540
- @data['OH']
541
- end
542
-
543
-
544
-
545
- # Bio::EMBLDB::Common#ref -> Array
546
- # R Lines
547
- # RN RC RP RX RA RT RL
548
-
549
- # returns contents in the R lines.
550
- # * Bio::EMBLDB::Common#ref -> [ <refernece information Hash>* ]
551
- # where <reference information Hash> is:
552
- # {'RN' => '', 'RC' => '', 'RP' => '', 'RX' => '',
553
- # 'RA' => '', 'RT' => '', 'RL' => '', 'RG' => ''}
554
- #
555
- # R Lines
556
- # * RN RC RP RX RA RT RL RG
557
- def ref
558
- unless @data['R']
559
- @data['R'] = [get('R').split(/\nRN /)].flatten.map { |str|
560
- hash = {'RN' => '', 'RC' => '', 'RP' => '', 'RX' => '',
561
- 'RA' => '', 'RT' => '', 'RL' => '', 'RG' => ''}
562
- str = 'RN ' + str unless /^RN / =~ str
563
-
564
- str.split("\n").each do |line|
565
- if /^(R[NPXARLCTG]) (.+)/ =~ line
566
- hash[$1] += $2 + ' '
567
- else
568
- raise "Invalid format in R lines, \n[#{line}]\n"
569
- end
570
- end
571
-
572
- hash['RN'] = set_RN(hash['RN'])
573
- hash['RC'] = set_RC(hash['RC'])
574
- hash['RP'] = set_RP(hash['RP'])
575
- hash['RX'] = set_RX(hash['RX'])
576
- hash['RA'] = set_RA(hash['RA'])
577
- hash['RT'] = set_RT(hash['RT'])
578
- hash['RL'] = set_RL(hash['RL'])
579
- hash['RG'] = set_RG(hash['RG'])
580
-
581
- hash
582
- }
583
-
584
- end
585
- @data['R']
586
- end
587
-
588
- def set_RN(data)
589
- data.strip
590
- end
591
-
592
- def set_RC(data)
593
- data.scan(/([STP]\w+)=(.+);/).map { |comment|
594
- [comment[1].split(/, and |, /)].flatten.map { |text|
595
- {'Token' => comment[0], 'Text' => text}
596
- }
597
- }.flatten
598
- end
599
- private :set_RC
600
-
601
- def set_RP(data)
602
- data = data.strip
603
- data = data.sub(/\.$/, '')
604
- data.split(/, AND |, /i).map {|x|
605
- x = x.strip
606
- x = x.gsub(' ', ' ')
607
- }
608
- end
609
- private :set_RP
610
-
611
- def set_RX(data)
612
- rx = {'MEDLINE' => nil, 'PubMed' => nil, 'DOI' => nil}
613
- if data =~ /MEDLINE=(.+?);/
614
- rx['MEDLINE'] = $1
615
- end
616
- if data =~ /PubMed=(.+?);/
617
- rx['PubMed'] = $1
618
- end
619
- if data =~ /DOI=(.+?);/
620
- rx['DOI'] = $1
621
- end
622
- rx
623
- end
624
- private :set_RX
625
-
626
- def set_RA(data)
627
- data = data.sub(/; *$/, '')
628
- end
629
- private :set_RA
630
-
631
- def set_RT(data)
632
- data = data.sub(/; *$/, '')
633
- data = data.gsub(/(^"|"$)/, '')
634
- end
635
- private :set_RT
636
-
637
- def set_RL(data)
638
- data = data.strip
639
- end
640
- private :set_RL
641
-
642
- def set_RG(data)
643
- data = data.split('; ')
644
- end
645
- private :set_RG
646
-
647
-
648
-
649
- # returns Bio::Reference object from Bio::EMBLDB::Common#ref.
650
- # * Bio::EMBLDB::Common#ref -> Bio::References
651
- def references
652
- unless @data['references']
653
- ary = self.ref.map {|ent|
654
- hash = Hash.new('')
655
- ent.each {|key, value|
656
- case key
657
- when 'RA'
658
- hash['authors'] = value.split(/, /)
659
- when 'RT'
660
- hash['title'] = value
661
- when 'RL'
662
- if value =~ /(.*) (\d+) \((\d+)\), (\d+-\d+) \((\d+)\)$/
663
- hash['journal'] = $1
664
- hash['volume'] = $2
665
- hash['issue'] = $3
666
- hash['pages'] = $4
667
- hash['year'] = $5
668
- else
669
- hash['journal'] = value
670
- end
671
- when 'RX' # PUBMED, MEDLINE, DOI
672
- value.each do |tag, xref|
673
- hash[ tag.downcase ] = xref
674
- end
675
- end
676
- }
677
- Reference.new(hash)
678
- }
679
- @data['references'] = References.new(ary)
680
- end
681
- @data['references']
682
- end
683
-
684
-
685
-
686
-
687
-
688
-
689
- # === The HI line
690
- # Bio::SPTR#hi #=> hash
691
- def hi
692
- unless @data['HI']
693
- @data['HI'] = []
694
- fetch('HI').split(/\. /).each do |hlist|
695
- hash = {'Category' => '', 'Keywords' => [], 'Keyword' => ''}
696
- hash['Category'], hash['Keywords'] = hlist.split(': ')
697
- hash['Keywords'] = hash['Keywords'].split('; ')
698
- hash['Keyword'] = hash['Keywords'].pop
699
- hash['Keyword'].sub!(/\.$/, '')
700
- @data['HI'] << hash
701
- end
702
- end
703
- @data['HI']
704
- end
705
-
706
-
707
- @@cc_topics = ['PHARMACEUTICAL',
708
- 'BIOTECHNOLOGY',
709
- 'TOXIC DOSE',
710
- 'ALLERGEN',
711
- 'RNA EDITING',
712
- 'POLYMORPHISM',
713
- 'BIOPHYSICOCHEMICAL PROPERTIES',
714
- 'MASS SPECTROMETRY',
715
- 'WEB RESOURCE',
716
- 'ENZYME REGULATION',
717
- 'DISEASE',
718
- 'INTERACTION',
719
- 'DEVELOPMENTAL STAGE',
720
- 'INDUCTION',
721
- 'CAUTION',
722
- 'ALTERNATIVE PRODUCTS',
723
- 'DOMAIN',
724
- 'PTM',
725
- 'MISCELLANEOUS',
726
- 'TISSUE SPECIFICITY',
727
- 'COFACTOR',
728
- 'PATHWAY',
729
- 'SUBUNIT',
730
- 'CATALYTIC ACTIVITY',
731
- 'SUBCELLULAR LOCATION',
732
- 'FUNCTION',
733
- 'SIMILARITY']
734
- # returns contents in the CC lines.
735
- # * Bio::SPTR#cc -> Hash
736
- #
737
- # returns an object of contents in the TOPIC.
738
- # * Bio::SPTR#cc(TOPIC) -> Array w/in Hash, Hash
739
- #
740
- # returns contents of the "ALTERNATIVE PRODUCTS".
741
- # * Bio::SPTR#cc('ALTERNATIVE PRODUCTS') -> Hash
742
- # {'Event' => str,
743
- # 'Named isoforms' => int,
744
- # 'Comment' => str,
745
- # 'Variants'=>[{'Name' => str, 'Synonyms' => str, 'IsoId' => str, 'Sequence' => []}]}
746
- #
747
- # CC -!- ALTERNATIVE PRODUCTS:
748
- # CC Event=Alternative splicing; Named isoforms=15;
749
- # ...
750
- # CC placentae isoforms. All tissues differentially splice exon 13;
751
- # CC Name=A; Synonyms=no del;
752
- # CC IsoId=P15529-1; Sequence=Displayed;
753
- #
754
- # returns contents of the "DATABASE".
755
- # * Bio::SPTR#cc('DATABASE') -> Array
756
- # [{'NAME'=>str,'NOTE'=>str, 'WWW'=>URI,'FTP'=>URI}, ...]
757
- #
758
- # CC -!- DATABASE: NAME=Text[; NOTE=Text][; WWW="Address"][; FTP="Address"].
759
- #
760
- # returns contents of the "MASS SPECTROMETRY".
761
- # * Bio::SPTR#cc('MASS SPECTROMETRY') -> Array
762
- # [{'MW"=>float,'MW_ERR'=>float, 'METHOD'=>str,'RANGE'=>str}, ...]
763
- #
764
- # CC -!- MASS SPECTROMETRY: MW=XXX[; MW_ERR=XX][; METHOD=XX][;RANGE=XX-XX].
765
- #
766
- # === CC lines (>=0, optional)
767
- # CC -!- TISSUE SPECIFICITY: HIGHEST LEVELS FOUND IN TESTIS. ALSO PRESENT
768
- # CC IN LIVER, KIDNEY, LUNG AND BRAIN.
769
- #
770
- # CC -!- TOPIC: FIRST LINE OF A COMMENT BLOCK;
771
- # CC SECOND AND SUBSEQUENT LINES OF A COMMENT BLOCK.
772
- #
773
- # See also http://www.expasy.org/sprot/userman.html#CC_line
774
- #
775
- def cc(topic = nil)
776
- unless @data['CC']
777
- cc = Hash.new
778
- comment_border= '-' * (77 - 4 + 1)
779
- dlm = /-!- /
780
-
781
- # 12KD_MYCSM has no CC lines.
782
- return cc if get('CC').size == 0
783
-
784
- cc_raw = fetch('CC')
785
-
786
- # Removing the copyright statement.
787
- cc_raw.sub!(/ *---.+---/m, '')
788
-
789
- # Not any CC Lines without the copyright statement.
790
- return cc if cc_raw == ''
791
-
792
- begin
793
- cc_raw, copyright = cc_raw.split(/#{comment_border}/)[0]
794
- cc_raw = cc_raw.sub(dlm,'')
795
- cc_raw.split(dlm).each do |tmp|
796
- tmp = tmp.strip
797
-
798
- if /(^[A-Z ]+[A-Z]): (.+)/ =~ tmp
799
- key = $1
800
- body = $2
801
- body.gsub!(/- (?!AND)/,'-')
802
- body.strip!
803
- unless cc[key]
804
- cc[key] = [body]
805
- else
806
- cc[key].push(body)
807
- end
808
- else
809
- raise ["Error: [#{entry_id}]: CC Lines", '"', tmp, '"',
810
- '', get('CC'),''].join("\n")
811
- end
812
- end
813
- rescue NameError
814
- if fetch('CC') == ''
815
- return {}
816
- else
817
- raise ["Error: Invalid CC Lines: [#{entry_id}]: ",
818
- "\n'#{self.get('CC')}'\n", "(#{$!})"].join
819
- end
820
- rescue NoMethodError
821
- end
822
-
823
- @data['CC'] = cc
824
- end
825
-
826
-
827
- case topic
828
- when 'ALLERGEN'
829
- return @data['CC'][topic]
830
- when 'ALTERNATIVE PRODUCTS'
831
- return cc_alternative_products(@data['CC'][topic])
832
- when 'BIOPHYSICOCHEMICAL PROPERTIES'
833
- return cc_biophysiochemical_properties(@data['CC'][topic])
834
- when 'BIOTECHNOLOGY'
835
- return @data['CC'][topic]
836
- when 'CATALITIC ACTIVITY'
837
- return cc_catalytic_activity(@data['CC'][topic])
838
- when 'CAUTION'
839
- return cc_caution(@data['CC'][topic])
840
- when 'COFACTOR'
841
- return @data['CC'][topic]
842
- when 'DEVELOPMENTAL STAGE'
843
- return @data['CC'][topic].join('')
844
- when 'DISEASE'
845
- return @data['CC'][topic].join('')
846
- when 'DOMAIN'
847
- return @data['CC'][topic]
848
- when 'ENZYME REGULATION'
849
- return @data['CC'][topic].join('')
850
- when 'FUNCTION'
851
- return @data['CC'][topic].join('')
852
- when 'INDUCTION'
853
- return @data['CC'][topic].join('')
854
- when 'INTERACTION'
855
- return cc_interaction(@data['CC'][topic])
856
- when 'MASS SPECTROMETRY'
857
- return cc_mass_spectrometry(@data['CC'][topic])
858
- when 'MISCELLANEOUS'
859
- return @data['CC'][topic]
860
- when 'PATHWAY'
861
- return cc_pathway(@data['CC'][topic])
862
- when 'PHARMACEUTICAL'
863
- return @data['CC'][topic]
864
- when 'POLYMORPHISM'
865
- return @data['CC'][topic]
866
- when 'PTM'
867
- return @data['CC'][topic]
868
- when 'RNA EDITING'
869
- return cc_rna_editing(@data['CC'][topic])
870
- when 'SIMILARITY'
871
- return @data['CC'][topic]
872
- when 'SUBCELLULAR LOCATION'
873
- return cc_subcellular_location(@data['CC'][topic])
874
- when 'SUBUNIT'
875
- return @data['CC'][topic]
876
- when 'TISSUE SPECIFICITY'
877
- return @data['CC'][topic]
878
- when 'TOXIC DOSE'
879
- return @data['CC'][topic]
880
- when 'WEB RESOURCE'
881
- return cc_web_resource(@data['CC'][topic])
882
- when 'DATABASE'
883
- # DATABASE: NAME=Text[; NOTE=Text][; WWW="Address"][; FTP="Address"].
884
- tmp = Array.new
885
- db = @data['CC']['DATABASE']
886
- return db unless db
887
-
888
- db.each do |e|
889
- db = {'NAME' => nil, 'NOTE' => nil, 'WWW' => nil, 'FTP' => nil}
890
- e.sub(/.$/,'').split(/;/).each do |line|
891
- case line
892
- when /NAME=(.+)/
893
- db['NAME'] = $1
894
- when /NOTE=(.+)/
895
- db['NOTE'] = $1
896
- when /WWW="(.+)"/
897
- db['WWW'] = $1
898
- when /FTP="(.+)"/
899
- db['FTP'] = $1
900
- end
901
- end
902
- tmp.push(db)
903
- end
904
- return tmp
905
- when nil
906
- return @data['CC']
907
- else
908
- return @data['CC'][topic]
909
- end
910
- end
911
-
912
-
913
- def cc_alternative_products(data)
914
- ap = data.join('')
915
- return ap unless ap
916
-
917
- # Event, Named isoforms, Comment, [Name, Synonyms, IsoId, Sequnce]+
918
- tmp = {'Event' => "", 'Named isoforms' => "", 'Comment' => "",
919
- 'Variants' => []}
920
- if /Event=(.+?);/ =~ ap
921
- tmp['Event'] = $1
922
- tmp['Event'] = tmp['Event'].sub(/;/,'').split(/, /)
923
- end
924
- if /Named isoforms=(\S+?);/ =~ ap
925
- tmp['Named isoforms'] = $1
926
- end
927
- if /Comment=(.+?);/m =~ ap
928
- tmp['Comment'] = $1
929
- end
930
- ap.scan(/Name=.+?Sequence=.+?;/).each do |ent|
931
- tmp['Variants'] << cc_alternative_products_variants(ent)
932
- end
933
- return tmp
934
- end
935
- private :cc_alternative_products
936
-
937
- def cc_alternative_products_variants(data)
938
- variant = {'Name' => '', 'Synonyms' => [], 'IsoId' => [], 'Sequence' => []}
939
- data.split(/; /).map {|x| x.split(/=/) }.each do |e|
940
- case e[0]
941
- when 'Sequence', 'Synonyms', 'IsoId'
942
- e[1] = e[1].sub(/;/,'').split(/, /)
943
- end
944
- variant[e[0]] = e[1]
945
- end
946
- variant
947
- end
948
- private :cc_alternative_products_variants
949
-
950
-
951
- def cc_biophysiochemical_properties(data)
952
- data = data[0]
953
-
954
- hash = {'Absorption' => {},
955
- 'Kinetic parameters' => {},
956
- 'pH dependence' => "",
957
- 'Redox potential' => "",
958
- 'Temperature dependence' => ""}
959
- if data =~ /Absorption: Abs\(max\)=(.+?);/
960
- hash['Absorption']['Abs(max)'] = $1
961
- end
962
- if data =~ /Absorption: Abs\(max\)=.+; Note=(.+?);/
963
- hash['Absorption']['Note'] = $1
964
- end
965
- if data =~ /Kinetic parameters: KM=(.+?); Vmax=(.+?);/
966
- hash['Kinetic parameters']['KM'] = $1
967
- hash['Kinetic parameters']['Vmax'] = $2
968
- end
969
- if data =~ /Kinetic parameters: KM=.+; Vmax=.+; Note=(.+?);/
970
- hash['Kinetic parameters']['Note'] = $1
971
- end
972
- if data =~ /pH dependence: (.+?);/
973
- hash['pH dependence'] = $1
974
- end
975
- if data =~ /Redox potential: (.+?);/
976
- hash['Redox potential'] = $1
977
- end
978
- if data =~ /Temperature dependence: (.+?);/
979
- hash['Temperature dependence'] = $1
980
- end
981
- hash
982
- end
983
- private :cc_biophysiochemical_properties
984
-
985
-
986
- def cc_caution(data)
987
- data.join('')
988
- end
989
- private :cc_caution
990
-
991
-
992
- # returns conteins in a line of the CC INTERACTION section.
993
- #
994
- # CC P46527:CDKN1B; NbExp=1; IntAct=EBI-359815, EBI-519280;
995
- def cc_interaction(data)
996
- str = data.join('')
997
- it = str.scan(/(.+?); NbExp=(.+?); IntAct=(.+?);/)
998
- it.map {|ent|
999
- ent.map! {|x| x.strip }
1000
- if ent[0] =~ /^(.+):(.+)/
1001
- spac = $1
1002
- spid = $2.split(' ')[0]
1003
- optid = nil
1004
- elsif ent[0] =~ /Self/
1005
- spac = self.entry_id
1006
- spid = self.entry_id
1007
- optid = nil
1008
- end
1009
- if ent[0] =~ /^.+:.+ (.+)/
1010
- optid = $1
1011
- end
1012
-
1013
- {'SP_Ac' => spac,
1014
- 'identifier' => spid,
1015
- 'NbExp' => ent[1],
1016
- 'IntAct' => ent[2].split(', '),
1017
- 'optional_identifier' => optid}
1018
- }
1019
- end
1020
- private :cc_interaction
1021
-
1022
-
1023
- def cc_mass_spectrometry(data)
1024
- # MASS SPECTROMETRY: MW=XXX[; MW_ERR=XX][; METHOD=XX][;RANGE=XX-XX].
1025
- return data unless data
1026
-
1027
- data.map { |m|
1028
- mass = {'MW' => nil, 'MW_ERR' => nil, 'METHOD' => nil, 'RANGE' => nil,
1029
- 'NOTE' => nil}
1030
- m.sub(/.$/,'').split(/;/).each do |line|
1031
- case line
1032
- when /MW=(.+)/
1033
- mass['MW'] = $1
1034
- when /MW_ERR=(.+)/
1035
- mass['MW_ERR'] = $1
1036
- when /METHOD=(.+)/
1037
- mass['METHOD'] = $1
1038
- when /RANGE=(\d+-\d+)/
1039
- mass['RANGE'] = $1 # RANGE class ?
1040
- when /NOTE=(.+)/
1041
- mass['NOTE'] = $1
1042
- end
1043
- end
1044
- mass
1045
- }
1046
- end
1047
- private :cc_mass_spectrometry
1048
-
1049
-
1050
- def cc_pathway(data)
1051
- data.map {|x| x.sub(/\.$/, '') }.map {|x|
1052
- x.split(/; | and |: /)
1053
- }[0]
1054
- end
1055
- private :cc_pathway
1056
-
1057
-
1058
- def cc_rna_editing(data)
1059
- data = data.join('')
1060
- entry = {'Modified_positions' => [], 'Note' => ""}
1061
- if data =~ /Modified_positions=(.+?)(\.|;)/
1062
- entry['Modified_positions'] = $1.sub(/\.$/, '').split(', ')
1063
- else
1064
- raise ArgumentError, "Invarid CC RNA Editing lines (#{self.entry_id}):#{$!}\n#{get('CC')}"
1065
- end
1066
- if data =~ /Note=(.+)/
1067
- entry['Note'] = $1
1068
- end
1069
- entry
1070
- end
1071
- private :cc_rna_editing
1072
-
1073
-
1074
- def cc_subcellular_location(data)
1075
- data.map {|x|
1076
- x.split('. ').map {|y|
1077
- y.split('; ').map {|z|
1078
- z.sub(/\.$/, '')
1079
- }
1080
- }
1081
- }[0]
1082
- end
1083
- private :cc_subcellular_location
1084
-
1085
-
1086
- #--
1087
- # Since UniProtKB release 12.2 of 11-Sep-2007:
1088
- # CC -!- WEB RESOURCE: Name=ResourceName[; Note=FreeText][; URL=WWWAddress]. # Old format:
1089
- # CC -!- WEB RESOURCE: NAME=ResourceName[; NOTE=FreeText][; URL=WWWAddress].
1090
- #++
1091
-
1092
- def cc_web_resource(data)
1093
- data.map {|x|
1094
- entry = {'Name' => nil, 'Note' => nil, 'URL' => nil}
1095
- x.split(';').each do |y|
1096
- case y
1097
- when /(Name|Note)\=(.+)/
1098
- key = $1
1099
- val = $2.strip
1100
- entry[key] = val
1101
- when /(NAME|NOTE)\=(.+)/
1102
- key = $1.downcase.capitalize
1103
- val = $2.strip
1104
- entry[key] = val
1105
- when /URL\=\"(.+)\"/
1106
- entry['URL'] = $1.strip
1107
- end
1108
- end
1109
- entry
1110
- }
1111
- end
1112
- private :cc_web_resource
1113
-
1114
- # returns databases cross-references in the DR lines.
1115
- # * Bio::SPTR#dr -> Hash w/in Array
1116
- #
1117
- # === DR Line; defabases cross-reference (>=0)
1118
- # DR database_identifier; primary_identifier; secondary_identifier.
1119
- # a cross_ref pre one line
1120
- @@dr_database_identifier = ['EMBL','CARBBANK','DICTYDB','ECO2DBASE',
1121
- 'ECOGENE',
1122
- 'FLYBASE','GCRDB','HIV','HSC-2DPAGE','HSSP','INTERPRO','MAIZEDB',
1123
- 'MAIZE-2DPAGE','MENDEL','MGD''MIM','PDB','PFAM','PIR','PRINTS',
1124
- 'PROSITE','REBASE','AARHUS/GHENT-2DPAGE','SGD','STYGENE','SUBTILIST',
1125
- 'SWISS-2DPAGE','TIGR','TRANSFAC','TUBERCULIST','WORMPEP','YEPD','ZFIN']
1126
-
1127
- # Backup Bio::EMBLDB#dr as embl_dr
1128
- alias :embl_dr :dr
1129
-
1130
- # Bio::SPTR#dr
1131
- def dr(key = nil)
1132
- unless key
1133
- embl_dr
1134
- else
1135
- (embl_dr[key] or []).map {|x|
1136
- {'Accession' => x[0],
1137
- 'Version' => x[1],
1138
- ' ' => x[2],
1139
- 'Molecular Type' => x[3]}
1140
- }
1141
- end
1142
- end
1143
-
1144
-
1145
- # Bio::EMBLDB::Common#kw - Array
1146
- # #keywords -> Array
1147
- #
1148
- # KW Line; keyword (>=1)
1149
- # KW [Keyword;]+
1150
-
1151
-
1152
- # returns contents in the feature table.
1153
- #
1154
- # == Examples
1155
- #
1156
- # sp = Bio::SPTR.new(entry)
1157
- # ft = sp.ft
1158
- # ft.class #=> Hash
1159
- # ft.keys.each do |feature_key|
1160
- # ft[feature_key].each do |feature|
1161
- # feature['From'] #=> '1'
1162
- # feature['To'] #=> '21'
1163
- # feature['Description'] #=> ''
1164
- # feature['FTId'] #=> ''
1165
- # feature['diff'] #=> []
1166
- # feature['original'] #=> [feature_key, '1', '21', '', '']
1167
- # end
1168
- # end
1169
- #
1170
- # * Bio::SPTR#ft -> Hash
1171
- # {FEATURE_KEY => [{'From' => int, 'To' => int,
1172
- # 'Description' => aStr, 'FTId' => aStr,
1173
- # 'diff' => [original_residues, changed_residues],
1174
- # 'original' => aAry }],...}
1175
- #
1176
- # returns an Array of the information about the feature_name in the feature table.
1177
- # * Bio::SPTR#ft(feature_name) -> Array of Hash
1178
- # [{'From' => str, 'To' => str, 'Description' => str, 'FTId' => str},...]
1179
- #
1180
- # == FT Line; feature table data (>=0, optional)
1181
- #
1182
- # Col Data item
1183
- # ----- -----------------
1184
- # 1- 2 FT
1185
- # 6-13 Feature name
1186
- # 15-20 `FROM' endpoint
1187
- # 22-27 `TO' endpoint
1188
- # 35-75 Description (>=0 per key)
1189
- # ----- -----------------
1190
- #
1191
- # Note: 'FROM' and 'TO' endopoints are allowed to use non-numerial charactors
1192
- # including '<', '>' or '?'. (c.f. '<1', '?42')
1193
- #
1194
- # See also http://www.expasy.org/sprot/userman.html#FT_line
1195
- #
1196
- def ft(feature_key = nil)
1197
- return ft[feature_key] if feature_key
1198
- return @data['FT'] if @data['FT']
1199
-
1200
- table = []
1201
- begin
1202
- get('FT').split("\n").each do |line|
1203
- if line =~ /^FT \w/
1204
- feature = line.chomp.ljust(74)
1205
- table << [feature[ 5..12].strip, # Feature Name
1206
- feature[14..19].strip, # From
1207
- feature[21..26].strip, # To
1208
- feature[34..74].strip ] # Description
1209
- else
1210
- table.last << line.chomp.sub!(/^FT +/, '')
1211
- end
1212
- end
1213
-
1214
- # Joining Description lines
1215
- table = table.map { |feature|
1216
- ftid = feature.pop if feature.last =~ /FTId=/
1217
- if feature.size > 4
1218
- feature = [feature[0],
1219
- feature[1],
1220
- feature[2],
1221
- feature[3, feature.size - 3].join(" ")]
1222
- end
1223
- feature << if ftid then ftid else '' end
1224
- }
1225
-
1226
- hash = {}
1227
- table.each do |feature|
1228
- hash[feature[0]] = [] unless hash[feature[0]]
1229
- hash[feature[0]] << {
1230
- # Removing '<', '>' or '?' in FROM/TO endopoint.
1231
- 'From' => feature[1].sub(/\D/, '').to_i,
1232
- 'To' => feature[2].sub(/\D/, '').to_i,
1233
- 'Description' => feature[3],
1234
- 'FTId' => feature[4].to_s.sub(/\/FTId=/, '').sub(/\.$/, ''),
1235
- 'diff' => [],
1236
- 'original' => feature
1237
- }
1238
-
1239
- case feature[0]
1240
- when 'VARSPLIC', 'VARIANT', 'VAR_SEQ', 'CONFLICT'
1241
- case hash[feature[0]].last['Description']
1242
- when /(\w[\w ]*\w*) - ?> (\w[\w ]*\w*)/
1243
- original_res = $1
1244
- changed_res = $2
1245
- original_res = original_res.gsub(/ /,'').strip
1246
- chenged_res = changed_res.gsub(/ /,'').strip
1247
- when /Missing/i
1248
- original_res = seq.subseq(hash[feature[0]].last['From'],
1249
- hash[feature[0]].last['To'])
1250
- changed_res = ''
1251
- end
1252
- hash[feature[0]].last['diff'] = [original_res, chenged_res]
1253
- end
1254
- end
1255
- rescue
1256
- raise "Invalid FT Lines(#{$!}) in #{entry_id}:, \n'#{self.get('FT')}'\n"
1257
- end
1258
-
1259
- @data['FT'] = hash
1260
- end
1261
-
1262
-
1263
-
1264
- # returns a Hash of conteins in the SQ lines.
1265
- # * Bio::SPTRL#sq -> hsh
1266
- #
1267
- # returns a value of a key given in the SQ lines.
1268
- # * Bio::SPTRL#sq(key) -> int or str
1269
- # * Keys: ['MW', 'mw', 'molecular', 'weight', 'aalen', 'len', 'length',
1270
- # 'CRC64']
1271
- #
1272
- # === SQ Line; sequence header (1/entry)
1273
- # SQ SEQUENCE 233 AA; 25630 MW; 146A1B48A1475C86 CRC64;
1274
- # SQ SEQUENCE \d+ AA; \d+ MW; [0-9A-Z]+ CRC64;
1275
- #
1276
- # MW, Dalton unit.
1277
- # CRC64 (64-bit Cyclic Redundancy Check, ISO 3309).
1278
- def sq(key = nil)
1279
- unless @data['SQ']
1280
- if fetch('SQ') =~ /(\d+) AA\; (\d+) MW; (.+) CRC64;/
1281
- @data['SQ'] = { 'aalen' => $1.to_i, 'MW' => $2.to_i, 'CRC64' => $3 }
1282
- else
1283
- raise "Invalid SQ Line: \n'#{fetch('SQ')}'"
1284
- end
1285
- end
1286
-
1287
- if key
1288
- case key
1289
- when /mw/, /molecular/, /weight/
1290
- @data['SQ']['MW']
1291
- when /len/, /length/, /AA/
1292
- @data['SQ']['aalen']
1293
- else
1294
- @data['SQ'][key]
1295
- end
1296
- else
1297
- @data['SQ']
1298
- end
1299
- end
1300
-
1301
-
1302
- # returns a Bio::Sequence::AA of the amino acid sequence.
1303
- # * Bio::SPTR#seq -> Bio::Sequence::AA
1304
- #
1305
- # blank Line; sequence data (>=1)
1306
- def seq
1307
- unless @data['']
1308
- @data[''] = Sequence::AA.new( fetch('').gsub(/ |\d+/,'') )
1309
- end
1310
- return @data['']
1311
- end
1312
- alias aaseq seq
1313
-
1314
- end # class SPTR
1315
-
1316
- end # module Bio
1317
-
1318
-
1319
-
1320
- =begin
1321
-
1322
- = Bio::SPTR < Bio::DB
1323
-
1324
- Class for a entry in the SWISS-PROT/TrEMBL database.
1325
-
1326
- * ((<URL:http://www.ebi.ac.uk/swissprot/>))
1327
- * ((<URL:http://www.ebi.ac.uk/trembl/>))
1328
- * ((<URL:http://www.ebi.ac.uk/sprot/userman.html>))
1329
-
1330
-
1331
- --- Bio::SPTR.new(a_sp_entry)
1332
-
1333
- === ID line (Identification)
1334
-
1335
- --- Bio::SPTR#id_line -> {'ENTRY_NAME' => str, 'DATA_CLASS' => str,
1336
- 'MOLECULE_TYPE' => str, 'SEQUENCE_LENGTH' => int }
1337
- --- Bio::SPTR#id_line(key) -> str
1338
-
1339
- key = (ENTRY_NAME|MOLECULE_TYPE|DATA_CLASS|SEQUENCE_LENGTH)
1340
-
1341
- --- Bio::SPTR#entry_id -> str
1342
- --- Bio::SPTR#molecule -> str
1343
- --- Bio::SPTR#sequence_length -> int
1344
-
1345
-
1346
- === AC lines (Accession number)
1347
-
1348
- --- Bio::SPTR#ac -> ary
1349
- --- Bio::SPTR#accessions -> ary
1350
- --- Bio::SPTR#accession -> accessions.first
1351
-
1352
-
1353
- === GN line (Gene name(s))
1354
-
1355
- --- Bio::SPTR#gn -> [ary, ...] or [{:name => str, :synonyms => [], :loci => [], :orfs => []}]
1356
- --- Bio::SPTR#gene_name -> str
1357
- --- Bio::SPTR#gene_names -> [str] or [str]
1358
-
1359
-
1360
- === DT lines (Date)
1361
-
1362
- --- Bio::SPTR#dt -> {'created' => str, 'sequence' => str, 'annotation' => str}
1363
- --- Bio::SPTR#dt(key) -> str
1364
-
1365
- key := (created|annotation|sequence)
1366
-
1367
-
1368
- === DE lines (Description)
1369
-
1370
- --- Bio::SPTR#de -> str
1371
- #definition -> str
1372
-
1373
- --- Bio::SPTR#protein_name
1374
-
1375
- Returns the proposed official name of the protein
1376
-
1377
-
1378
- --- Bio::SPTR#synonyms
1379
-
1380
- Returns an array of synonyms (unofficial names)
1381
-
1382
- === KW lines (Keyword)
1383
-
1384
- --- Bio::SPTR#kw -> ary
1385
-
1386
- === OS lines (Organism species)
1387
-
1388
- --- Bio::SPTR#os -> [{'name' => str, 'os' => str}, ...]
1389
-
1390
- === OC lines (organism classification)
1391
-
1392
- --- Bio::SPTR#oc -> ary
1393
-
1394
- === OG line (Organella)
1395
-
1396
- --- Bio::SPTR#og -> ary
1397
-
1398
- === OX line (Organism taxonomy cross-reference)
1399
-
1400
- --- Bio::SPTR#ox -> {'NCBI_TaxID' => [], ...}
1401
-
1402
- === RN RC RP RX RA RT RL RG lines (Reference)
1403
-
1404
- --- Bio::SPTR#ref -> [{'RN' => int, 'RP' => str, 'RC' => str, 'RX' => str, ''RT' => str, 'RL' => str, 'RA' => str, 'RC' => str, 'RG' => str},...]
1405
-
1406
- === DR lines (Database cross-reference)
1407
-
1408
- --- Bio::SPTR#dr -> {'EMBL' => ary, ...}
1409
-
1410
- === FT lines (Feature table data)
1411
-
1412
- --- Bio::SPTR#ft -> hsh
1413
-
1414
- === SQ lines (Sequence header and data)
1415
-
1416
- --- Bio::SPTR#sq -> {'CRC64' => str, 'MW' => int, 'aalen' => int}
1417
- --- Bio::SPTR#sq(key) -> int or str
1418
-
1419
- key := (aalen|MW|CRC64)
1420
-
1421
- --- Bio::EMBL#seq -> Bio::Sequece::AA
1422
- #aaseq -> Bio::Sequece::AA
1423
-
1424
- =end
12
+ require "bio/db/embl/uniprotkb" unless const_defined?(:UniProtKB)
1425
13
 
1426
- # Content Occurrence in an entry
1427
- # ---- --------------------------- --------------------------------
1428
- # ID - identification (begins each entry; 1 per entry)
1429
- # AC - accession number(s) (>=1 per entry)
1430
- # DT - date (3 per entry)
1431
- # DE - description (>=1 per entry)
1432
- # GN - gene name(s) (>=0 per entry; optional)
1433
- # OS - organism species (>=1 per entry)
1434
- # OG - organelle (0 or 1 per entry; optional)
1435
- # OC - organism classification (>=1 per entry)
1436
- # OX - organism taxonomy x-ref (>=1 per entry)
1437
- # OH - Organism Host
1438
- # RN - reference number (>=1 per entry)
1439
- # RP - reference positions (>=1 per entry)
1440
- # RC - reference comment(s) (>=0 per entry; optional)
1441
- # RX - reference cross-reference(s) (>=0 per entry; optional)
1442
- # RA - reference author(s) (>=1 per entry)
1443
- # RT - reference title (>=0 per entry; optional)
1444
- # RL - reference location (>=1 per entry)
1445
- # RG - reference group(s)
1446
- # CC - comments or notes (>=0 per entry; optional)
1447
- # DR - database cross-references (>=0 per entry; optional)
1448
- # KW - keywords (>=1 per entry)
1449
- # FT - feature table data (>=0 per entry; optional)
1450
- # SQ - sequence header (1 per entry)
1451
- # - (blanks) The sequence data (>=1 per entry)
1452
- # // - termination line (ends each entry; 1 per entry)
1453
- # ---- --------------------------- --------------------------------
14
+ # Bio::SPTR is changed to an alias of Bio::UniProtKB.
15
+ # Please use Bio::UniProtKB.
16
+ # Bio::SPTR may be deprecated in the future.
17
+ SPTR = UniProtKB
1454
18
 
19
+ end #module Bio
1455
20