bio 0.7.1 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/bioruby +71 -27
- data/bin/br_biofetch.rb +5 -17
- data/bin/br_bioflat.rb +14 -26
- data/bin/br_biogetseq.rb +6 -18
- data/bin/br_pmfetch.rb +6 -16
- data/doc/Changes-0.7.rd +35 -0
- data/doc/KEGG_API.rd +287 -172
- data/doc/KEGG_API.rd.ja +273 -160
- data/doc/Tutorial.rd +18 -9
- data/doc/Tutorial.rd.ja +656 -138
- data/lib/bio.rb +6 -24
- data/lib/bio/alignment.rb +5 -5
- data/lib/bio/appl/blast.rb +132 -98
- data/lib/bio/appl/blast/format0.rb +9 -19
- data/lib/bio/appl/blast/wublast.rb +5 -18
- data/lib/bio/appl/emboss.rb +40 -47
- data/lib/bio/appl/hmmer.rb +116 -82
- data/lib/bio/appl/hmmer/report.rb +509 -364
- data/lib/bio/appl/spidey/report.rb +7 -18
- data/lib/bio/data/na.rb +3 -21
- data/lib/bio/db.rb +3 -21
- data/lib/bio/db/aaindex.rb +147 -52
- data/lib/bio/db/embl/common.rb +27 -6
- data/lib/bio/db/embl/embl.rb +18 -10
- data/lib/bio/db/embl/sptr.rb +87 -67
- data/lib/bio/db/embl/swissprot.rb +32 -3
- data/lib/bio/db/embl/trembl.rb +32 -3
- data/lib/bio/db/embl/uniprot.rb +32 -3
- data/lib/bio/db/fasta.rb +327 -289
- data/lib/bio/db/medline.rb +25 -4
- data/lib/bio/db/nbrf.rb +12 -20
- data/lib/bio/db/pdb.rb +4 -1
- data/lib/bio/db/pdb/chemicalcomponent.rb +240 -0
- data/lib/bio/db/pdb/pdb.rb +13 -8
- data/lib/bio/db/rebase.rb +93 -97
- data/lib/bio/feature.rb +2 -31
- data/lib/bio/io/ddbjxml.rb +167 -139
- data/lib/bio/io/fastacmd.rb +89 -56
- data/lib/bio/io/flatfile.rb +994 -278
- data/lib/bio/io/flatfile/index.rb +257 -194
- data/lib/bio/io/flatfile/indexer.rb +37 -29
- data/lib/bio/reference.rb +147 -64
- data/lib/bio/sequence.rb +57 -417
- data/lib/bio/sequence/aa.rb +64 -0
- data/lib/bio/sequence/common.rb +175 -0
- data/lib/bio/sequence/compat.rb +68 -0
- data/lib/bio/sequence/format.rb +134 -0
- data/lib/bio/sequence/generic.rb +24 -0
- data/lib/bio/sequence/na.rb +189 -0
- data/lib/bio/shell.rb +9 -23
- data/lib/bio/shell/core.rb +130 -125
- data/lib/bio/shell/demo.rb +143 -0
- data/lib/bio/shell/{session.rb → interface.rb} +42 -40
- data/lib/bio/shell/object.rb +52 -0
- data/lib/bio/shell/plugin/codon.rb +4 -22
- data/lib/bio/shell/plugin/emboss.rb +23 -0
- data/lib/bio/shell/plugin/entry.rb +34 -25
- data/lib/bio/shell/plugin/flatfile.rb +5 -23
- data/lib/bio/shell/plugin/keggapi.rb +11 -24
- data/lib/bio/shell/plugin/midi.rb +5 -23
- data/lib/bio/shell/plugin/obda.rb +4 -22
- data/lib/bio/shell/plugin/seq.rb +6 -24
- data/lib/bio/shell/rails/Rakefile +10 -0
- data/lib/bio/shell/rails/app/controllers/application.rb +4 -0
- data/lib/bio/shell/rails/app/controllers/shell_controller.rb +94 -0
- data/lib/bio/shell/rails/app/helpers/application_helper.rb +3 -0
- data/lib/bio/shell/rails/app/models/shell_connection.rb +30 -0
- data/lib/bio/shell/rails/app/views/layouts/shell.rhtml +37 -0
- data/lib/bio/shell/rails/app/views/shell/history.rhtml +5 -0
- data/lib/bio/shell/rails/app/views/shell/index.rhtml +2 -0
- data/lib/bio/shell/rails/app/views/shell/show.rhtml +13 -0
- data/lib/bio/shell/rails/config/boot.rb +19 -0
- data/lib/bio/shell/rails/config/database.yml +85 -0
- data/lib/bio/shell/rails/config/environment.rb +53 -0
- data/lib/bio/shell/rails/config/environments/development.rb +19 -0
- data/lib/bio/shell/rails/config/environments/production.rb +19 -0
- data/lib/bio/shell/rails/config/environments/test.rb +19 -0
- data/lib/bio/shell/rails/config/routes.rb +19 -0
- data/lib/bio/shell/rails/doc/README_FOR_APP +2 -0
- data/lib/bio/shell/rails/public/404.html +8 -0
- data/lib/bio/shell/rails/public/500.html +8 -0
- data/lib/bio/shell/rails/public/dispatch.cgi +10 -0
- data/lib/bio/shell/rails/public/dispatch.fcgi +24 -0
- data/lib/bio/shell/rails/public/dispatch.rb +10 -0
- data/lib/bio/shell/rails/public/favicon.ico +0 -0
- data/lib/bio/shell/rails/public/images/icon.png +0 -0
- data/lib/bio/shell/rails/public/images/rails.png +0 -0
- data/lib/bio/shell/rails/public/index.html +277 -0
- data/lib/bio/shell/rails/public/javascripts/controls.js +750 -0
- data/lib/bio/shell/rails/public/javascripts/dragdrop.js +584 -0
- data/lib/bio/shell/rails/public/javascripts/effects.js +854 -0
- data/lib/bio/shell/rails/public/javascripts/prototype.js +1785 -0
- data/lib/bio/shell/rails/public/robots.txt +1 -0
- data/lib/bio/shell/rails/public/stylesheets/main.css +187 -0
- data/lib/bio/shell/rails/script/about +3 -0
- data/lib/bio/shell/rails/script/breakpointer +3 -0
- data/lib/bio/shell/rails/script/console +3 -0
- data/lib/bio/shell/rails/script/destroy +3 -0
- data/lib/bio/shell/rails/script/generate +3 -0
- data/lib/bio/shell/rails/script/performance/benchmarker +3 -0
- data/lib/bio/shell/rails/script/performance/profiler +3 -0
- data/lib/bio/shell/rails/script/plugin +3 -0
- data/lib/bio/shell/rails/script/process/reaper +3 -0
- data/lib/bio/shell/rails/script/process/spawner +3 -0
- data/lib/bio/shell/rails/script/process/spinner +3 -0
- data/lib/bio/shell/rails/script/runner +3 -0
- data/lib/bio/shell/rails/script/server +42 -0
- data/lib/bio/shell/rails/test/test_helper.rb +28 -0
- data/lib/bio/shell/web.rb +90 -0
- data/lib/bio/util/contingency_table.rb +231 -225
- data/sample/any2fasta.rb +59 -0
- data/test/data/HMMER/hmmpfam.out +64 -0
- data/test/data/HMMER/hmmsearch.out +88 -0
- data/test/data/aaindex/DAYM780301 +30 -0
- data/test/data/aaindex/PRAM900102 +20 -0
- data/test/data/bl2seq/cd8a_cd8b_blastp.bl2seq +53 -0
- data/test/data/bl2seq/cd8a_p53_e-5blastp.bl2seq +37 -0
- data/test/data/blast/{eco:b0002.faa → b0002.faa} +0 -0
- data/test/data/blast/{eco:b0002.faa.m0 → b0002.faa.m0} +2 -2
- data/test/data/blast/{eco:b0002.faa.m7 → b0002.faa.m7} +1 -1
- data/test/data/blast/{eco:b0002.faa.m8 → b0002.faa.m8} +0 -0
- data/test/unit/bio/appl/bl2seq/test_report.rb +134 -0
- data/test/unit/bio/appl/blast/test_report.rb +15 -12
- data/test/unit/bio/appl/blast/test_xmlparser.rb +4 -4
- data/test/unit/bio/appl/hmmer/test_report.rb +355 -0
- data/test/unit/bio/appl/test_blast.rb +5 -5
- data/test/unit/bio/data/test_na.rb +9 -18
- data/test/unit/bio/db/pdb/test_pdb.rb +169 -0
- data/test/unit/bio/db/test_aaindex.rb +197 -0
- data/test/unit/bio/io/test_fastacmd.rb +55 -0
- data/test/unit/bio/sequence/test_aa.rb +102 -0
- data/test/unit/bio/sequence/test_common.rb +178 -0
- data/test/unit/bio/sequence/test_compat.rb +82 -0
- data/test/unit/bio/sequence/test_na.rb +242 -0
- data/test/unit/bio/shell/plugin/test_seq.rb +29 -19
- data/test/unit/bio/test_alignment.rb +15 -7
- data/test/unit/bio/test_reference.rb +198 -0
- data/test/unit/bio/test_sequence.rb +4 -49
- data/test/unit/bio/test_shell.rb +2 -2
- metadata +118 -15
- data/lib/bio/io/brdb.rb +0 -103
- data/lib/bioruby.rb +0 -34
data/lib/bio/db/embl/common.rb
CHANGED
|
@@ -4,12 +4,15 @@
|
|
|
4
4
|
# Copyright:: Copyright (C) 2001-2005 Mitsuteru C. Nakao <n@bioruby.org>
|
|
5
5
|
# License:: LGPL
|
|
6
6
|
#
|
|
7
|
-
# $Id: common.rb,v 1.
|
|
7
|
+
# $Id: common.rb,v 1.9 2006/01/28 06:40:38 nakao Exp $
|
|
8
8
|
#
|
|
9
|
-
# ==
|
|
9
|
+
# == Description
|
|
10
10
|
#
|
|
11
|
-
#
|
|
12
|
-
#
|
|
11
|
+
# EMBL style databases class
|
|
12
|
+
#
|
|
13
|
+
# This module defines a common framework among EMBL, UniProtKB, SWISS-PROT,
|
|
14
|
+
# TrEMBL. For more details, see the documentations in each embl/*.rb
|
|
15
|
+
# libraries.
|
|
13
16
|
#
|
|
14
17
|
# EMBL style format:
|
|
15
18
|
# ID - identification (begins each entry; 1 per entry)
|
|
@@ -38,9 +41,9 @@
|
|
|
38
41
|
# bb - (blanks) sequence data (>=1 per entry)
|
|
39
42
|
# // - termination line (ends each entry; 1 per entry)
|
|
40
43
|
#
|
|
41
|
-
#
|
|
42
|
-
# == Example
|
|
44
|
+
# == Examples
|
|
43
45
|
#
|
|
46
|
+
# # Make a new parser class for EMBL style database entry.
|
|
44
47
|
# require 'bio/db/embl/common'
|
|
45
48
|
# module Bio
|
|
46
49
|
# class NEWDB < EMBLDB
|
|
@@ -48,6 +51,24 @@
|
|
|
48
51
|
# end
|
|
49
52
|
# end
|
|
50
53
|
#
|
|
54
|
+
# == References
|
|
55
|
+
#
|
|
56
|
+
# * The EMBL Nucleotide Sequence Database
|
|
57
|
+
# http://www.ebi.ac.uk/embl/
|
|
58
|
+
#
|
|
59
|
+
# * The EMBL Nucleotide Sequence Database: Users Manual
|
|
60
|
+
# http://www.ebi.ac.uk/embl/Documentation/User_manual/usrman.html
|
|
61
|
+
#
|
|
62
|
+
# * Swiss-Prot Protein knowledgebase. TrEMBL Computer-annotated supplement
|
|
63
|
+
# to Swiss-Prot
|
|
64
|
+
# http://au.expasy.org/sprot/
|
|
65
|
+
#
|
|
66
|
+
# * UniProt
|
|
67
|
+
# http://uniprot.org/
|
|
68
|
+
#
|
|
69
|
+
# * The UniProtKB/SwissProt/TrEMBL User Manual
|
|
70
|
+
# http://www.expasy.org/sprot/userman.html
|
|
71
|
+
#
|
|
51
72
|
#--
|
|
52
73
|
#
|
|
53
74
|
# This library is free software; you can redistribute it and/or
|
data/lib/bio/db/embl/embl.rb
CHANGED
|
@@ -5,20 +5,28 @@
|
|
|
5
5
|
# Copyright:: Copyright (C) 2001-2005 Mitsuteru C. Nakao <n@bioruby.org>
|
|
6
6
|
# License:: LGPL
|
|
7
7
|
#
|
|
8
|
-
# $Id: embl.rb,v 1.
|
|
8
|
+
# $Id: embl.rb,v 1.26 2006/01/28 06:40:38 nakao Exp $
|
|
9
9
|
#
|
|
10
|
-
# ==
|
|
10
|
+
# == Description
|
|
11
11
|
#
|
|
12
|
+
# Parser class for EMBL database entry.
|
|
12
13
|
#
|
|
13
|
-
#
|
|
14
|
-
# == Example
|
|
14
|
+
# == Examples
|
|
15
15
|
#
|
|
16
|
-
#
|
|
17
|
-
#
|
|
18
|
-
#
|
|
19
|
-
#
|
|
20
|
-
#
|
|
21
|
-
#
|
|
16
|
+
# emb = Bio::EMBL.new($<.read)
|
|
17
|
+
# emb.entry_id
|
|
18
|
+
# emb.each_cds do |cds|
|
|
19
|
+
# cds # A CDS in feature table.
|
|
20
|
+
# end
|
|
21
|
+
# emb.seq #=> "ACGT..."
|
|
22
|
+
#
|
|
23
|
+
# == References
|
|
24
|
+
#
|
|
25
|
+
# * The EMBL Nucleotide Sequence Database
|
|
26
|
+
# http://www.ebi.ac.uk/embl/
|
|
27
|
+
#
|
|
28
|
+
# * The EMBL Nucleotide Sequence Database: Users Manual
|
|
29
|
+
# http://www.ebi.ac.uk/embl/Documentation/User_manual/usrman.html
|
|
22
30
|
#
|
|
23
31
|
#--
|
|
24
32
|
#
|
data/lib/bio/db/embl/sptr.rb
CHANGED
|
@@ -4,13 +4,32 @@
|
|
|
4
4
|
# Copyright:: Copyright (C) 2001-2005 Mitsuteru C. Nakao <n@bioruby.org>
|
|
5
5
|
# License:: LGPL
|
|
6
6
|
#
|
|
7
|
-
# $Id: sptr.rb,v 1.
|
|
7
|
+
# $Id: sptr.rb,v 1.30 2006/01/28 06:40:38 nakao Exp $
|
|
8
8
|
#
|
|
9
|
-
# ==
|
|
9
|
+
# == Description
|
|
10
|
+
#
|
|
11
|
+
# Shared methods for UniProtKB/SwissProt and TrEMBL classes.
|
|
12
|
+
#
|
|
13
|
+
# See the SWISS-PROT document file SPECLIST.TXT or UniProtKB/SwissProt
|
|
14
|
+
# user manual.
|
|
15
|
+
#
|
|
16
|
+
# == Examples
|
|
17
|
+
#
|
|
18
|
+
# str = File.read("p53_human.swiss")
|
|
19
|
+
# obj = Bio::SPTR.new(str)
|
|
20
|
+
# obj.entry_id #=> "P53_HUMAN"
|
|
21
|
+
#
|
|
22
|
+
# == References
|
|
23
|
+
#
|
|
24
|
+
# * Swiss-Prot Protein knowledgebase. TrEMBL Computer-annotated supplement
|
|
25
|
+
# to Swiss-Prot
|
|
26
|
+
# http://au.expasy.org/sprot/
|
|
10
27
|
#
|
|
11
|
-
#
|
|
28
|
+
# * UniProt
|
|
29
|
+
# http://uniprot.org/
|
|
12
30
|
#
|
|
13
|
-
#
|
|
31
|
+
# * The UniProtKB/SwissProt/TrEMBL User Manual
|
|
32
|
+
# http://www.expasy.org/sprot/userman.html
|
|
14
33
|
#
|
|
15
34
|
#--
|
|
16
35
|
#
|
|
@@ -36,7 +55,7 @@ require 'bio/db/embl/common'
|
|
|
36
55
|
|
|
37
56
|
module Bio
|
|
38
57
|
|
|
39
|
-
# Parser class for UniProtKB/SwissProt and TrEMBL database entry
|
|
58
|
+
# Parser class for UniProtKB/SwissProt and TrEMBL database entry.
|
|
40
59
|
class SPTR < EMBLDB
|
|
41
60
|
include Bio::EMBLDB::Common
|
|
42
61
|
|
|
@@ -45,17 +64,19 @@ class SPTR < EMBLDB
|
|
|
45
64
|
|
|
46
65
|
|
|
47
66
|
# returns a Hash of the ID line.
|
|
67
|
+
#
|
|
48
68
|
# returns a content (Int or String) of the ID line by a given key.
|
|
49
69
|
# Hash keys: ['ENTRY_NAME', 'DATA_CLASS', 'MODECULE_TYPE', 'SEQUENCE_LENGTH']
|
|
50
70
|
#
|
|
51
|
-
# ID Line
|
|
52
|
-
#
|
|
71
|
+
# === ID Line
|
|
72
|
+
# ID P53_HUMAN STANDARD; PRT; 393 AA.
|
|
73
|
+
# #"ID #{ENTRY_NAME} #{DATA_CLASS}; #{MOLECULE_TYPE}; #{SEQUENCE_LENGTH}."
|
|
74
|
+
#
|
|
75
|
+
# === Examples
|
|
76
|
+
# obj.id_line #=> {"ENTRY_NAME"=>"P53_HUMAN", "DATA_CLASS"=>"STANDARD", "SEQUENCE_LENGTH"=>393, "MOLECULE_TYPE"=>"PRT"}
|
|
77
|
+
#
|
|
78
|
+
# obj.id_line('ENTRY_NAME') #=> "P53_HUMAN"
|
|
53
79
|
#
|
|
54
|
-
# ENTRY_NAME := "#{X}_#{Y}"
|
|
55
|
-
# X =~ /[A-Z0-9]{1,5}/ # The protein name.
|
|
56
|
-
# Y =~ /[A-Z0-9]{1,5}/ # The biological source of the protein.
|
|
57
|
-
# MOLECULE_TYPE := 'PRT' =~ /\w{3}/
|
|
58
|
-
# SEQUENCE_LENGTH =~ /\d+ AA/
|
|
59
80
|
def id_line(key = nil)
|
|
60
81
|
unless @data['ID']
|
|
61
82
|
tmp = Hash.new
|
|
@@ -78,7 +99,6 @@ class SPTR < EMBLDB
|
|
|
78
99
|
|
|
79
100
|
# returns a ENTRY_NAME in the ID line.
|
|
80
101
|
#
|
|
81
|
-
# A short-cut for Bio::SPTR#id_line('ENTRY_NAME').
|
|
82
102
|
def entry_id
|
|
83
103
|
id_line('ENTRY_NAME')
|
|
84
104
|
end
|
|
@@ -119,10 +139,10 @@ class SPTR < EMBLDB
|
|
|
119
139
|
#
|
|
120
140
|
# returns a String of information in the DT lines by a given key..
|
|
121
141
|
#
|
|
122
|
-
# DT Line; date (3/entry)
|
|
123
|
-
#
|
|
124
|
-
#
|
|
125
|
-
#
|
|
142
|
+
# === DT Line; date (3/entry)
|
|
143
|
+
# DT DD-MMM-YYY (rel. NN, Created)
|
|
144
|
+
# DT DD-MMM-YYY (rel. NN, Last sequence update)
|
|
145
|
+
# DT DD-MMM-YYY (rel. NN, Last annotation update)
|
|
126
146
|
def dt(key = nil)
|
|
127
147
|
unless @data['DT']
|
|
128
148
|
tmp = Hash.new
|
|
@@ -143,7 +163,7 @@ class SPTR < EMBLDB
|
|
|
143
163
|
|
|
144
164
|
# returns the proposed official name of the protein.
|
|
145
165
|
#
|
|
146
|
-
# DE Line; description (>=1)
|
|
166
|
+
# === DE Line; description (>=1)
|
|
147
167
|
# "DE #{OFFICIAL_NAME} (#{SYNONYM})"
|
|
148
168
|
# "DE #{OFFICIAL_NAME} (#{SYNONYM}) [CONTEINS: #1; #2]."
|
|
149
169
|
# OFFICIAL_NAME 1/entry
|
|
@@ -192,7 +212,7 @@ class SPTR < EMBLDB
|
|
|
192
212
|
# * Bio::SPTR#gn -> Array # AND
|
|
193
213
|
# * Bio::SPTR#gn[0] -> Array # OR
|
|
194
214
|
#
|
|
195
|
-
# GN Line: Gene name(s) (>=0, optional)
|
|
215
|
+
# === GN Line: Gene name(s) (>=0, optional)
|
|
196
216
|
def gn
|
|
197
217
|
return @data['GN'] if @data['GN']
|
|
198
218
|
|
|
@@ -205,7 +225,7 @@ class SPTR < EMBLDB
|
|
|
205
225
|
end
|
|
206
226
|
|
|
207
227
|
# returns contents in the old style GN line.
|
|
208
|
-
# GN Line: Gene name(s) (>=0, optional)
|
|
228
|
+
# === GN Line: Gene name(s) (>=0, optional)
|
|
209
229
|
# GN HNS OR DRDX OR OSMZ OR BGLY.
|
|
210
230
|
# GN CECA1 AND CECA2.
|
|
211
231
|
# GN CECA1 AND (HOGE OR FUGA).
|
|
@@ -292,7 +312,7 @@ class SPTR < EMBLDB
|
|
|
292
312
|
# * Bio::SPTR#os[0]['name'] -> "(Human)"
|
|
293
313
|
# * Bio::EPTR#os(0) -> "Homo sapiens (Human)"
|
|
294
314
|
#
|
|
295
|
-
# OS Line; organism species (>=1)
|
|
315
|
+
# === OS Line; organism species (>=1)
|
|
296
316
|
# OS Genus species (name).
|
|
297
317
|
# OS Genus species (name0) (name1).
|
|
298
318
|
# OS Genus species (name0) (name1).
|
|
@@ -337,9 +357,9 @@ class SPTR < EMBLDB
|
|
|
337
357
|
|
|
338
358
|
# returns a Hash of oraganism taxonomy cross-references.
|
|
339
359
|
# * Bio::SPTR#ox -> Hash
|
|
340
|
-
#
|
|
360
|
+
# {'NCBI_TaxID' => ['1234','2345','3456','4567'], ...}
|
|
341
361
|
#
|
|
342
|
-
# OX Line; organism taxonomy cross-reference (>=1 per entry)
|
|
362
|
+
# === OX Line; organism taxonomy cross-reference (>=1 per entry)
|
|
343
363
|
# OX NCBI_TaxID=1234;
|
|
344
364
|
# OX NCBI_TaxID=1234, 2345, 3456, 4567;
|
|
345
365
|
def ox
|
|
@@ -368,43 +388,43 @@ class SPTR < EMBLDB
|
|
|
368
388
|
'SIMILARITY','SUBCELLULAR LOCATION','SUBUNIT','TISSUE SPECIFICITY']
|
|
369
389
|
# returns contents in the CC lines.
|
|
370
390
|
# * Bio::SPTR#cc -> Hash
|
|
371
|
-
|
|
372
|
-
#
|
|
373
|
-
# returns an Array of contents in the TOPIC string.
|
|
391
|
+
#
|
|
392
|
+
# returns an object of contents in the TOPIC.
|
|
374
393
|
# * Bio::SPTR#cc(TOPIC) -> Array w/in Hash, Hash
|
|
375
394
|
#
|
|
376
395
|
# returns contents of the "ALTERNATIVE PRODUCTS".
|
|
377
396
|
# * Bio::SPTR#cc('ALTERNATIVE PRODUCTS') -> Hash
|
|
378
|
-
#
|
|
379
|
-
#
|
|
380
|
-
#
|
|
381
|
-
#
|
|
397
|
+
# {'Event' => str,
|
|
398
|
+
# 'Named isoforms' => int,
|
|
399
|
+
# 'Comment' => str,
|
|
400
|
+
# 'Variants'=>[{'Name' => str, 'Synonyms' => str, 'IsoId' => str, 'Sequence' => []}]}
|
|
382
401
|
#
|
|
383
|
-
#
|
|
384
|
-
#
|
|
385
|
-
#
|
|
386
|
-
#
|
|
387
|
-
#
|
|
388
|
-
#
|
|
402
|
+
# CC -!- ALTERNATIVE PRODUCTS:
|
|
403
|
+
# CC Event=Alternative splicing; Named isoforms=15;
|
|
404
|
+
# ...
|
|
405
|
+
# CC placentae isoforms. All tissues differentially splice exon 13;
|
|
406
|
+
# CC Name=A; Synonyms=no del;
|
|
407
|
+
# CC IsoId=P15529-1; Sequence=Displayed;
|
|
389
408
|
#
|
|
390
409
|
# returns contents of the "DATABASE".
|
|
391
410
|
# * Bio::SPTR#cc('DATABASE') -> Array
|
|
392
|
-
#
|
|
411
|
+
# [{'NAME'=>str,'NOTE'=>str, 'WWW'=>URI,'FTP'=>URI}, ...]
|
|
393
412
|
#
|
|
394
|
-
#
|
|
413
|
+
# CC -!- DATABASE: NAME=Text[; NOTE=Text][; WWW="Address"][; FTP="Address"].
|
|
395
414
|
#
|
|
396
415
|
# returns contents of the "MASS SPECTROMETRY".
|
|
397
416
|
# * Bio::SPTR#cc('MASS SPECTROMETRY') -> Array
|
|
398
|
-
#
|
|
417
|
+
# [{'MW"=>float,'MW_ERR'=>float, 'METHOD'=>str,'RANGE'=>str}, ...]
|
|
399
418
|
#
|
|
400
|
-
#
|
|
419
|
+
# CC -!- MASS SPECTROMETRY: MW=XXX[; MW_ERR=XX][; METHOD=XX][;RANGE=XX-XX].
|
|
420
|
+
#
|
|
421
|
+
# === CC lines (>=0, optional)
|
|
422
|
+
# CC -!- TISSUE SPECIFICITY: HIGHEST LEVELS FOUND IN TESTIS. ALSO PRESENT
|
|
423
|
+
# CC IN LIVER, KIDNEY, LUNG AND BRAIN.
|
|
424
|
+
#
|
|
425
|
+
# CC -!- TOPIC: FIRST LINE OF A COMMENT BLOCK;
|
|
426
|
+
# CC SECOND AND SUBSEQUENT LINES OF A COMMENT BLOCK.
|
|
401
427
|
#
|
|
402
|
-
# CC lines (>=0, optional)
|
|
403
|
-
# CC -!- TISSUE SPECIFICITY: HIGHEST LEVELS FOUND IN TESTIS. ALSO PRESENT
|
|
404
|
-
# CC IN LIVER, KIDNEY, LUNG AND BRAIN.
|
|
405
|
-
#
|
|
406
|
-
# CC -!- TOPIC: FIRST LINE OF A COMMENT BLOCK;
|
|
407
|
-
# CC SECOND AND SUBSEQUENT LINES OF A COMMENT BLOCK.
|
|
408
428
|
def cc(tag = nil)
|
|
409
429
|
unless @data['CC']
|
|
410
430
|
cc = Hash.new
|
|
@@ -541,7 +561,7 @@ class SPTR < EMBLDB
|
|
|
541
561
|
|
|
542
562
|
# returns conteins in a line of the CC INTERACTION section.
|
|
543
563
|
#
|
|
544
|
-
#
|
|
564
|
+
# CC P46527:CDKN1B; NbExp=1; IntAct=EBI-359815, EBI-519280;
|
|
545
565
|
def cc_interaction_parse(str)
|
|
546
566
|
it = str.scan(/(.+?); NbExp=(.+?); IntAct=(.+?);/)
|
|
547
567
|
it.map {|ent|
|
|
@@ -555,9 +575,9 @@ class SPTR < EMBLDB
|
|
|
555
575
|
# returns databases cross-references in the DR lines.
|
|
556
576
|
# * Bio::EMBLDB#dr -> Hash w/in Array
|
|
557
577
|
#
|
|
558
|
-
# DR Line; defabases cross-reference (>=0)
|
|
559
|
-
#
|
|
560
|
-
#
|
|
578
|
+
# === DR Line; defabases cross-reference (>=0)
|
|
579
|
+
# DR database_identifier; primary_identifier; secondary_identifier.
|
|
580
|
+
# a cross_ref pre one line
|
|
561
581
|
@@dr_database_identifier = ['EMBL','CARBBANK','DICTYDB','ECO2DBASE',
|
|
562
582
|
'ECOGENE',
|
|
563
583
|
'FLYBASE','GCRDB','HIV','HSC-2DPAGE','HSSP','INTERPRO','MAIZEDB',
|
|
@@ -574,23 +594,23 @@ class SPTR < EMBLDB
|
|
|
574
594
|
|
|
575
595
|
# returns conteins in the feature table.
|
|
576
596
|
# * Bio::SPTR#ft -> Hash
|
|
577
|
-
#
|
|
578
|
-
#
|
|
597
|
+
# {'feature_name' => [{'From' => str, 'To' => str,
|
|
598
|
+
# 'Description' => str, 'FTId' => str}],...}
|
|
579
599
|
#
|
|
580
600
|
# returns an Array of the information about the feature_name in the feature table.
|
|
581
601
|
# * Bio::SPTR#ft(feature_name) -> Array of Hash
|
|
582
|
-
#
|
|
583
|
-
#
|
|
584
|
-
# FT Line; feature table data (>=0, optional)
|
|
585
|
-
#
|
|
586
|
-
#
|
|
587
|
-
#
|
|
588
|
-
#
|
|
589
|
-
#
|
|
590
|
-
#
|
|
591
|
-
#
|
|
592
|
-
#
|
|
593
|
-
#
|
|
602
|
+
# [{'From' => str, 'To' => str, 'Description' => str, 'FTId' => str},...]
|
|
603
|
+
#
|
|
604
|
+
# == FT Line; feature table data (>=0, optional)
|
|
605
|
+
#
|
|
606
|
+
# Col Data item
|
|
607
|
+
# ----- -----------------
|
|
608
|
+
# 1- 2 FT
|
|
609
|
+
# 6-13 Feature name
|
|
610
|
+
# 15-20 `FROM' endpoint
|
|
611
|
+
# 22-27 `TO' endpoint
|
|
612
|
+
# 35-75 Description (>=0 per key)
|
|
613
|
+
# ----- -----------------
|
|
594
614
|
def ft(feature_name = nil)
|
|
595
615
|
unless @data['FT']
|
|
596
616
|
table = Hash.new()
|
|
@@ -692,9 +712,9 @@ class SPTR < EMBLDB
|
|
|
692
712
|
# * Bio::SPTRL#sq(key) -> int or str
|
|
693
713
|
# * Keys: ['MW', 'mw', 'molecular', 'weight', 'aalen', 'len', 'length', 'CRC64']
|
|
694
714
|
#
|
|
695
|
-
# SQ Line; sequence header (1/entry)
|
|
696
|
-
#
|
|
697
|
-
#
|
|
715
|
+
# === SQ Line; sequence header (1/entry)
|
|
716
|
+
# SQ SEQUENCE 233 AA; 25630 MW; 146A1B48A1475C86 CRC64;
|
|
717
|
+
# SQ SEQUENCE \d+ AA; \d+ MW; [0-9A-Z]+ CRC64;
|
|
698
718
|
#
|
|
699
719
|
# MW, Dalton unit.
|
|
700
720
|
# CRC64 (64-bit Cyclic Redundancy Check, ISO 3309).
|
|
@@ -1,7 +1,34 @@
|
|
|
1
1
|
#
|
|
2
|
-
# bio/db/embl/swissprot.rb - SwissProt database class
|
|
2
|
+
# = bio/db/embl/swissprot.rb - SwissProt database class
|
|
3
3
|
#
|
|
4
|
-
# Copyright (C) 2001, 2002 KATAYAMA Toshiaki <k@bioruby.org>
|
|
4
|
+
# Copyright:: Copyright (C) 2001, 2002 KATAYAMA Toshiaki <k@bioruby.org>
|
|
5
|
+
# License:: LGPL
|
|
6
|
+
#
|
|
7
|
+
# $Id: swissprot.rb,v 1.4 2006/01/28 06:40:38 nakao Exp $
|
|
8
|
+
#
|
|
9
|
+
# == Description
|
|
10
|
+
#
|
|
11
|
+
# Name space for SwissProt specific methods.
|
|
12
|
+
#
|
|
13
|
+
# SwissProt (before UniProtKB/SwissProt) specific methods are defined in
|
|
14
|
+
# this class. Shared methods for UniProtKB/SwissProt and TrEMBL classes
|
|
15
|
+
# are defined in Bio::SPTR class.
|
|
16
|
+
#
|
|
17
|
+
# == Examples
|
|
18
|
+
#
|
|
19
|
+
# str = File.read("p53_human.swiss")
|
|
20
|
+
# obj = Bio::SwissProt.new(str)
|
|
21
|
+
# obj.entry_id #=> "P53_HUMAN"
|
|
22
|
+
#
|
|
23
|
+
# == Referencees
|
|
24
|
+
#
|
|
25
|
+
# * Swiss-Prot Protein knowledgebase
|
|
26
|
+
# http://au.expasy.org/sprot/
|
|
27
|
+
#
|
|
28
|
+
# * Swiss-Prot Protein Knowledgebase User Manual
|
|
29
|
+
# http://au.expasy.org/sprot/userman.html
|
|
30
|
+
#
|
|
31
|
+
#--
|
|
5
32
|
#
|
|
6
33
|
# This library is free software; you can redistribute it and/or
|
|
7
34
|
# modify it under the terms of the GNU Lesser General Public
|
|
@@ -17,13 +44,15 @@
|
|
|
17
44
|
# License along with this library; if not, write to the Free Software
|
|
18
45
|
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
19
46
|
#
|
|
20
|
-
|
|
47
|
+
#++
|
|
21
48
|
#
|
|
22
49
|
|
|
23
50
|
require 'bio/db/embl/sptr'
|
|
24
51
|
|
|
25
52
|
module Bio
|
|
26
53
|
|
|
54
|
+
# Parser class for SwissProt database entry.
|
|
55
|
+
# See also Bio::SPTR class.
|
|
27
56
|
class SwissProt < SPTR
|
|
28
57
|
# Nothing to do (SwissProt format is abstracted in SPTR)
|
|
29
58
|
end
|
data/lib/bio/db/embl/trembl.rb
CHANGED
|
@@ -1,7 +1,34 @@
|
|
|
1
1
|
#
|
|
2
|
-
# bio/db/embl/trembl.rb - TrEMBL database class
|
|
2
|
+
# = bio/db/embl/trembl.rb - TrEMBL database class
|
|
3
3
|
#
|
|
4
|
-
# Copyright (C) 2001, 2002 KATAYAMA Toshiaki <k@bioruby.org>
|
|
4
|
+
# Copyright:: Copyright (C) 2001, 2002 KATAYAMA Toshiaki <k@bioruby.org>
|
|
5
|
+
# License:: LGPL
|
|
6
|
+
#
|
|
7
|
+
# $Id: trembl.rb,v 1.4 2006/01/28 06:40:38 nakao Exp $
|
|
8
|
+
#
|
|
9
|
+
# == Description
|
|
10
|
+
#
|
|
11
|
+
# Name space for TrEMBL specific methods.
|
|
12
|
+
#
|
|
13
|
+
# UniProtKB/SwissProt specific methods are defined in this class.
|
|
14
|
+
# Shared methods for UniProtKB/SwissProt and TrEMBL classes are
|
|
15
|
+
# defined in Bio::SPTR class.
|
|
16
|
+
#
|
|
17
|
+
# == Examples
|
|
18
|
+
#
|
|
19
|
+
# str = File.read("Q2UNG2_ASPOR.trembl")
|
|
20
|
+
# obj = Bio::TrEMBL.new(str)
|
|
21
|
+
# obj.entry_id #=> "Q2UNG2_ASPOR"
|
|
22
|
+
#
|
|
23
|
+
# == Referencees
|
|
24
|
+
#
|
|
25
|
+
# * TrEMBL Computer-annotated supplement to Swiss-Prot
|
|
26
|
+
# http://au.expasy.org/sprot/
|
|
27
|
+
#
|
|
28
|
+
# * TrEMBL Computer-annotated supplement to Swiss-Prot User Manual
|
|
29
|
+
# http://au.expasy.org/sprot/userman.html
|
|
30
|
+
#
|
|
31
|
+
#--
|
|
5
32
|
#
|
|
6
33
|
# This library is free software; you can redistribute it and/or
|
|
7
34
|
# modify it under the terms of the GNU Lesser General Public
|
|
@@ -17,13 +44,15 @@
|
|
|
17
44
|
# License along with this library; if not, write to the Free Software
|
|
18
45
|
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
19
46
|
#
|
|
20
|
-
|
|
47
|
+
#++
|
|
21
48
|
#
|
|
22
49
|
|
|
23
50
|
require 'bio/db/embl/sptr'
|
|
24
51
|
|
|
25
52
|
module Bio
|
|
26
53
|
|
|
54
|
+
# Parser class for TrEMBL database entry.
|
|
55
|
+
# See also Bio::SPTR class.
|
|
27
56
|
class TrEMBL < SPTR
|
|
28
57
|
# Nothing to do (TrEMBL format is abstracted in SPTR)
|
|
29
58
|
end
|