bio 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/bioruby +14 -122
- data/bin/br_biofetch.rb +2 -2
- data/bin/br_bioflat.rb +2 -2
- data/bin/br_biogetseq.rb +2 -2
- data/bin/br_pmfetch.rb +3 -3
- data/doc/Changes-0.7.rd +77 -0
- data/doc/KEGG_API.rd +523 -232
- data/doc/KEGG_API.rd.ja +529 -207
- data/doc/Tutorial.rd +48 -11
- data/lib/bio.rb +59 -6
- data/lib/bio/alignment.rb +713 -103
- data/lib/bio/appl/bl2seq/report.rb +2 -18
- data/lib/bio/appl/blast.rb +108 -91
- data/lib/bio/appl/blast/format0.rb +33 -18
- data/lib/bio/appl/blast/format8.rb +6 -20
- data/lib/bio/appl/blast/report.rb +293 -429
- data/lib/bio/appl/blast/rexml.rb +8 -22
- data/lib/bio/appl/blast/wublast.rb +21 -12
- data/lib/bio/appl/blast/xmlparser.rb +180 -183
- data/lib/bio/appl/blat/report.rb +127 -30
- data/lib/bio/appl/clustalw.rb +87 -59
- data/lib/bio/appl/clustalw/report.rb +20 -22
- data/lib/bio/appl/emboss.rb +113 -20
- data/lib/bio/appl/fasta.rb +173 -198
- data/lib/bio/appl/fasta/format10.rb +244 -347
- data/lib/bio/appl/gcg/msf.rb +212 -0
- data/lib/bio/appl/gcg/seq.rb +195 -0
- data/lib/bio/appl/genscan/report.rb +5 -23
- data/lib/bio/appl/hmmer.rb +8 -45
- data/lib/bio/appl/hmmer/report.rb +2 -20
- data/lib/bio/appl/iprscan/report.rb +374 -0
- data/lib/bio/appl/mafft.rb +87 -50
- data/lib/bio/appl/mafft/report.rb +151 -44
- data/lib/bio/appl/muscle.rb +52 -0
- data/lib/bio/appl/phylip/alignment.rb +129 -0
- data/lib/bio/appl/phylip/distance_matrix.rb +96 -0
- data/lib/bio/appl/probcons.rb +41 -0
- data/lib/bio/appl/psort.rb +89 -96
- data/lib/bio/appl/psort/report.rb +6 -22
- data/lib/bio/appl/pts1.rb +263 -0
- data/lib/bio/appl/sim4.rb +26 -36
- data/lib/bio/appl/sim4/report.rb +2 -18
- data/lib/bio/appl/sosui/report.rb +5 -20
- data/lib/bio/appl/spidey/report.rb +2 -2
- data/lib/bio/appl/targetp/report.rb +4 -20
- data/lib/bio/appl/tcoffee.rb +55 -0
- data/lib/bio/appl/tmhmm/report.rb +4 -20
- data/lib/bio/command.rb +235 -64
- data/lib/bio/data/aa.rb +21 -26
- data/lib/bio/data/codontable.rb +2 -20
- data/lib/bio/data/na.rb +19 -4
- data/lib/bio/db.rb +27 -12
- data/lib/bio/db/aaindex.rb +2 -20
- data/lib/bio/db/embl/common.rb +4 -21
- data/lib/bio/db/embl/embl.rb +33 -85
- data/lib/bio/db/embl/sptr.rb +612 -302
- data/lib/bio/db/embl/swissprot.rb +10 -29
- data/lib/bio/db/embl/trembl.rb +10 -29
- data/lib/bio/db/embl/uniprot.rb +10 -29
- data/lib/bio/db/fantom.rb +15 -20
- data/lib/bio/db/fasta.rb +3 -3
- data/lib/bio/db/genbank/common.rb +37 -46
- data/lib/bio/db/genbank/ddbj.rb +6 -18
- data/lib/bio/db/genbank/genbank.rb +47 -186
- data/lib/bio/db/genbank/genpept.rb +4 -17
- data/lib/bio/db/genbank/refseq.rb +4 -17
- data/lib/bio/db/gff.rb +103 -35
- data/lib/bio/db/go.rb +4 -20
- data/lib/bio/db/kegg/brite.rb +26 -36
- data/lib/bio/db/kegg/compound.rb +81 -85
- data/lib/bio/db/kegg/drug.rb +98 -0
- data/lib/bio/db/kegg/enzyme.rb +133 -110
- data/lib/bio/db/kegg/expression.rb +2 -20
- data/lib/bio/db/kegg/genes.rb +208 -238
- data/lib/bio/db/kegg/genome.rb +164 -285
- data/lib/bio/db/kegg/glycan.rb +114 -157
- data/lib/bio/db/kegg/keggtab.rb +242 -303
- data/lib/bio/db/kegg/kgml.rb +117 -160
- data/lib/bio/db/kegg/orthology.rb +112 -0
- data/lib/bio/db/kegg/reaction.rb +54 -69
- data/lib/bio/db/kegg/taxonomy.rb +331 -0
- data/lib/bio/db/lasergene.rb +209 -0
- data/lib/bio/db/litdb.rb +3 -27
- data/lib/bio/db/medline.rb +228 -249
- data/lib/bio/db/nbrf.rb +3 -3
- data/lib/bio/db/newick.rb +510 -0
- data/lib/bio/db/nexus.rb +1854 -0
- data/lib/bio/db/pdb.rb +5 -17
- data/lib/bio/db/pdb/atom.rb +2 -18
- data/lib/bio/db/pdb/chain.rb +2 -18
- data/lib/bio/db/pdb/chemicalcomponent.rb +2 -18
- data/lib/bio/db/pdb/model.rb +2 -18
- data/lib/bio/db/pdb/pdb.rb +73 -34
- data/lib/bio/db/pdb/residue.rb +4 -20
- data/lib/bio/db/pdb/utils.rb +2 -18
- data/lib/bio/db/prosite.rb +403 -422
- data/lib/bio/db/rebase.rb +84 -40
- data/lib/bio/db/soft.rb +404 -0
- data/lib/bio/db/transfac.rb +5 -17
- data/lib/bio/feature.rb +106 -52
- data/lib/bio/io/das.rb +32 -42
- data/lib/bio/io/dbget.rb +2 -20
- data/lib/bio/io/ddbjxml.rb +77 -138
- data/lib/bio/io/ebisoap.rb +158 -0
- data/lib/bio/io/ensembl.rb +229 -0
- data/lib/bio/io/fastacmd.rb +89 -82
- data/lib/bio/io/fetch.rb +163 -96
- data/lib/bio/io/flatfile.rb +170 -73
- data/lib/bio/io/flatfile/bdb.rb +3 -16
- data/lib/bio/io/flatfile/index.rb +2 -2
- data/lib/bio/io/flatfile/indexer.rb +3 -2
- data/lib/bio/io/higet.rb +12 -31
- data/lib/bio/io/keggapi.rb +210 -269
- data/lib/bio/io/ncbisoap.rb +155 -0
- data/lib/bio/io/pubmed.rb +169 -147
- data/lib/bio/io/registry.rb +4 -20
- data/lib/bio/io/soapwsdl.rb +43 -38
- data/lib/bio/io/sql.rb +242 -305
- data/lib/bio/location.rb +407 -285
- data/lib/bio/map.rb +410 -0
- data/lib/bio/pathway.rb +558 -695
- data/lib/bio/reference.rb +272 -75
- data/lib/bio/sequence.rb +255 -13
- data/lib/bio/sequence/aa.rb +71 -10
- data/lib/bio/sequence/common.rb +187 -33
- data/lib/bio/sequence/compat.rb +59 -4
- data/lib/bio/sequence/format.rb +54 -7
- data/lib/bio/sequence/generic.rb +3 -3
- data/lib/bio/sequence/na.rb +328 -26
- data/lib/bio/shell.rb +11 -4
- data/lib/bio/shell/core.rb +221 -160
- data/lib/bio/shell/demo.rb +18 -15
- data/lib/bio/shell/interface.rb +14 -12
- data/lib/bio/shell/irb.rb +95 -0
- data/lib/bio/shell/object.rb +45 -26
- data/lib/bio/shell/plugin/blast.rb +42 -0
- data/lib/bio/shell/plugin/codon.rb +22 -14
- data/lib/bio/shell/plugin/das.rb +58 -0
- data/lib/bio/shell/plugin/emboss.rb +2 -2
- data/lib/bio/shell/plugin/entry.rb +22 -11
- data/lib/bio/shell/plugin/flatfile.rb +2 -2
- data/lib/bio/shell/plugin/keggapi.rb +13 -6
- data/lib/bio/shell/plugin/midi.rb +4 -4
- data/lib/bio/shell/plugin/obda.rb +2 -2
- data/lib/bio/shell/plugin/psort.rb +56 -0
- data/lib/bio/shell/plugin/seq.rb +35 -8
- data/lib/bio/shell/plugin/soap.rb +87 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/bioruby_generator.rb +29 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_classes.rhtml +4 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_log.rhtml +27 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_methods.rhtml +11 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_modules.rhtml +4 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_variables.rhtml +7 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-bg.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-console.png +0 -0
- data/lib/bio/shell/rails/{public/images/icon.png → vendor/plugins/generators/bioruby/templates/bioruby-gem.png} +0 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-link.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.css +369 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.rhtml +47 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby_controller.rb +144 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby_helper.rb +47 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/commands.rhtml +8 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/history.rhtml +10 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/index.rhtml +22 -0
- data/lib/bio/shell/script.rb +25 -0
- data/lib/bio/shell/setup.rb +109 -0
- data/lib/bio/shell/web.rb +70 -58
- data/lib/bio/tree.rb +850 -0
- data/lib/bio/util/color_scheme.rb +84 -107
- data/lib/bio/util/color_scheme/buried.rb +5 -24
- data/lib/bio/util/color_scheme/helix.rb +5 -24
- data/lib/bio/util/color_scheme/hydropathy.rb +5 -24
- data/lib/bio/util/color_scheme/nucleotide.rb +5 -24
- data/lib/bio/util/color_scheme/strand.rb +5 -24
- data/lib/bio/util/color_scheme/taylor.rb +5 -24
- data/lib/bio/util/color_scheme/turn.rb +5 -24
- data/lib/bio/util/color_scheme/zappo.rb +5 -24
- data/lib/bio/util/contingency_table.rb +70 -43
- data/lib/bio/util/restriction_enzyme.rb +228 -0
- data/lib/bio/util/restriction_enzyme/analysis.rb +249 -0
- data/lib/bio/util/restriction_enzyme/analysis_basic.rb +217 -0
- data/lib/bio/util/restriction_enzyme/cut_symbol.rb +107 -0
- data/lib/bio/util/restriction_enzyme/double_stranded.rb +321 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/aligned_strands.rb +130 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair.rb +103 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair_in_enzyme_notation.rb +38 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations.rb +76 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations_in_enzyme_notation.rb +107 -0
- data/lib/bio/util/restriction_enzyme/enzymes.yaml +7061 -0
- data/lib/bio/util/restriction_enzyme/range/cut_range.rb +24 -0
- data/lib/bio/util/restriction_enzyme/range/cut_ranges.rb +47 -0
- data/lib/bio/util/restriction_enzyme/range/horizontal_cut_range.rb +67 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +257 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +242 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range/fragment.rb +51 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range/fragments.rb +41 -0
- data/lib/bio/util/restriction_enzyme/range/vertical_cut_range.rb +77 -0
- data/lib/bio/util/restriction_enzyme/single_strand.rb +199 -0
- data/lib/bio/util/restriction_enzyme/single_strand/cut_locations_in_enzyme_notation.rb +135 -0
- data/lib/bio/util/restriction_enzyme/single_strand_complement.rb +23 -0
- data/lib/bio/util/restriction_enzyme/string_formatting.rb +111 -0
- data/lib/bio/util/sirna.rb +4 -22
- data/sample/color_scheme_na.rb +4 -12
- data/sample/enzymes.rb +78 -0
- data/sample/goslim.rb +5 -13
- data/sample/psortplot_html.rb +4 -12
- data/test/data/blast/2.2.15.blastp.m7 +876 -0
- data/test/data/embl/AB090716.embl.rel89 +63 -0
- data/test/data/fasta/example1.txt +75 -0
- data/test/data/fasta/example2.txt +21 -0
- data/test/data/iprscan/merged.raw +32 -0
- data/test/data/iprscan/merged.txt +74 -0
- data/test/data/soft/GDS100_partial.soft +92 -0
- data/test/data/soft/GSE3457_family_partial.soft +874 -0
- data/test/functional/bio/io/test_ensembl.rb +103 -0
- data/test/functional/bio/io/test_soapwsdl.rb +5 -17
- data/test/unit/bio/appl/bl2seq/test_report.rb +2 -2
- data/test/unit/bio/appl/blast/test_report.rb +3 -16
- data/test/unit/bio/appl/blast/test_xmlparser.rb +4 -16
- data/test/unit/bio/appl/genscan/test_report.rb +3 -16
- data/test/unit/bio/appl/hmmer/test_report.rb +3 -16
- data/test/unit/bio/appl/iprscan/test_report.rb +338 -0
- data/test/unit/bio/appl/mafft/test_report.rb +63 -0
- data/test/unit/bio/appl/sosui/test_report.rb +3 -16
- data/test/unit/bio/appl/targetp/test_report.rb +3 -16
- data/test/unit/bio/appl/test_blast.rb +3 -16
- data/test/unit/bio/appl/test_fasta.rb +4 -16
- data/test/unit/bio/appl/test_pts1.rb +140 -0
- data/test/unit/bio/appl/tmhmm/test_report.rb +3 -16
- data/test/unit/bio/data/test_aa.rb +4 -17
- data/test/unit/bio/data/test_codontable.rb +3 -16
- data/test/unit/bio/data/test_na.rb +3 -3
- data/test/unit/bio/db/embl/test_common.rb +3 -16
- data/test/unit/bio/db/embl/test_embl.rb +3 -16
- data/test/unit/bio/db/embl/test_embl_rel89.rb +219 -0
- data/test/unit/bio/db/embl/test_sptr.rb +1548 -41
- data/test/unit/bio/db/embl/test_uniprot.rb +3 -16
- data/test/unit/bio/db/kegg/test_genes.rb +3 -16
- data/test/unit/bio/db/pdb/test_pdb.rb +7 -24
- data/test/unit/bio/db/test_aaindex.rb +2 -2
- data/test/unit/bio/db/test_fasta.rb +3 -16
- data/test/unit/bio/db/test_gff.rb +3 -16
- data/test/unit/bio/db/test_lasergene.rb +95 -0
- data/test/unit/bio/db/test_newick.rb +56 -0
- data/test/unit/bio/db/test_nexus.rb +360 -0
- data/test/unit/bio/db/test_prosite.rb +5 -18
- data/test/unit/bio/db/test_rebase.rb +11 -25
- data/test/unit/bio/db/test_soft.rb +138 -0
- data/test/unit/bio/io/test_ddbjxml.rb +5 -17
- data/test/unit/bio/io/test_ensembl.rb +109 -0
- data/test/unit/bio/io/test_fastacmd.rb +3 -16
- data/test/unit/bio/io/test_flatfile.rb +237 -0
- data/test/unit/bio/io/test_soapwsdl.rb +4 -17
- data/test/unit/bio/sequence/test_aa.rb +3 -3
- data/test/unit/bio/sequence/test_common.rb +3 -16
- data/test/unit/bio/sequence/test_compat.rb +3 -16
- data/test/unit/bio/sequence/test_na.rb +29 -3
- data/test/unit/bio/shell/plugin/test_seq.rb +8 -8
- data/test/unit/bio/test_alignment.rb +16 -27
- data/test/unit/bio/test_command.rb +242 -25
- data/test/unit/bio/test_db.rb +3 -16
- data/test/unit/bio/test_feature.rb +4 -16
- data/test/unit/bio/test_location.rb +4 -16
- data/test/unit/bio/test_map.rb +230 -0
- data/test/unit/bio/test_pathway.rb +4 -16
- data/test/unit/bio/test_reference.rb +2 -2
- data/test/unit/bio/test_sequence.rb +7 -19
- data/test/unit/bio/test_shell.rb +3 -16
- data/test/unit/bio/test_tree.rb +593 -0
- data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +299 -0
- data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +103 -0
- data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +240 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +100 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +75 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +73 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +53 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +104 -0
- data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +83 -0
- data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +246 -0
- data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +44 -0
- data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +115 -0
- data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +147 -0
- data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +147 -0
- data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +60 -0
- data/test/unit/bio/util/test_color_scheme.rb +6 -18
- data/test/unit/bio/util/test_contingency_table.rb +6 -18
- data/test/unit/bio/util/test_restriction_enzyme.rb +42 -0
- data/test/unit/bio/util/test_sirna.rb +3 -16
- metadata +228 -169
- data/doc/BioRuby.rd.ja +0 -225
- data/doc/Design.rd.ja +0 -341
- data/doc/TODO.rd.ja +0 -138
- data/lib/bio/appl/fasta/format6.rb +0 -37
- data/lib/bio/db/kegg/cell.rb +0 -88
- data/lib/bio/db/kegg/ko.rb +0 -178
- data/lib/bio/shell/rails/Rakefile +0 -10
- data/lib/bio/shell/rails/app/controllers/application.rb +0 -4
- data/lib/bio/shell/rails/app/controllers/shell_controller.rb +0 -94
- data/lib/bio/shell/rails/app/helpers/application_helper.rb +0 -3
- data/lib/bio/shell/rails/app/models/shell_connection.rb +0 -30
- data/lib/bio/shell/rails/app/views/layouts/shell.rhtml +0 -37
- data/lib/bio/shell/rails/app/views/shell/history.rhtml +0 -5
- data/lib/bio/shell/rails/app/views/shell/index.rhtml +0 -2
- data/lib/bio/shell/rails/app/views/shell/show.rhtml +0 -13
- data/lib/bio/shell/rails/config/boot.rb +0 -19
- data/lib/bio/shell/rails/config/database.yml +0 -85
- data/lib/bio/shell/rails/config/environment.rb +0 -53
- data/lib/bio/shell/rails/config/environments/development.rb +0 -19
- data/lib/bio/shell/rails/config/environments/production.rb +0 -19
- data/lib/bio/shell/rails/config/environments/test.rb +0 -19
- data/lib/bio/shell/rails/config/routes.rb +0 -19
- data/lib/bio/shell/rails/doc/README_FOR_APP +0 -2
- data/lib/bio/shell/rails/public/404.html +0 -8
- data/lib/bio/shell/rails/public/500.html +0 -8
- data/lib/bio/shell/rails/public/dispatch.cgi +0 -10
- data/lib/bio/shell/rails/public/dispatch.fcgi +0 -24
- data/lib/bio/shell/rails/public/dispatch.rb +0 -10
- data/lib/bio/shell/rails/public/favicon.ico +0 -0
- data/lib/bio/shell/rails/public/images/rails.png +0 -0
- data/lib/bio/shell/rails/public/index.html +0 -277
- data/lib/bio/shell/rails/public/javascripts/controls.js +0 -750
- data/lib/bio/shell/rails/public/javascripts/dragdrop.js +0 -584
- data/lib/bio/shell/rails/public/javascripts/effects.js +0 -854
- data/lib/bio/shell/rails/public/javascripts/prototype.js +0 -1785
- data/lib/bio/shell/rails/public/robots.txt +0 -1
- data/lib/bio/shell/rails/public/stylesheets/main.css +0 -187
- data/lib/bio/shell/rails/script/about +0 -3
- data/lib/bio/shell/rails/script/breakpointer +0 -3
- data/lib/bio/shell/rails/script/console +0 -3
- data/lib/bio/shell/rails/script/destroy +0 -3
- data/lib/bio/shell/rails/script/generate +0 -3
- data/lib/bio/shell/rails/script/performance/benchmarker +0 -3
- data/lib/bio/shell/rails/script/performance/profiler +0 -3
- data/lib/bio/shell/rails/script/plugin +0 -3
- data/lib/bio/shell/rails/script/process/reaper +0 -3
- data/lib/bio/shell/rails/script/process/spawner +0 -3
- data/lib/bio/shell/rails/script/process/spinner +0 -3
- data/lib/bio/shell/rails/script/runner +0 -3
- data/lib/bio/shell/rails/script/server +0 -42
- data/lib/bio/shell/rails/test/test_helper.rb +0 -28
data/lib/bio/db/embl/sptr.rb
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
#
|
|
2
2
|
# = bio/db/embl/sptr.rb - UniProt/SwissProt and TrEMBL database class
|
|
3
3
|
#
|
|
4
|
-
# Copyright:: Copyright (C) 2001-
|
|
5
|
-
# License::
|
|
4
|
+
# Copyright:: Copyright (C) 2001-2006 Mitsuteru C. Nakao <n@bioruby.org>
|
|
5
|
+
# License:: The Ruby License
|
|
6
6
|
#
|
|
7
|
-
# $Id: sptr.rb,v 1.
|
|
7
|
+
# $Id: sptr.rb,v 1.36 2007/04/05 23:35:40 trevor Exp $
|
|
8
8
|
#
|
|
9
9
|
# == Description
|
|
10
10
|
#
|
|
@@ -31,24 +31,7 @@
|
|
|
31
31
|
# * The UniProtKB/SwissProt/TrEMBL User Manual
|
|
32
32
|
# http://www.expasy.org/sprot/userman.html
|
|
33
33
|
#
|
|
34
|
-
|
|
35
|
-
#
|
|
36
|
-
# This library is free software; you can redistribute it and/or
|
|
37
|
-
# modify it under the terms of the GNU Lesser General Public
|
|
38
|
-
# License as published by the Free Software Foundation; either
|
|
39
|
-
# version 2 of the License, or (at your option) any later version.
|
|
40
|
-
#
|
|
41
|
-
# This library is distributed in the hope that it will be useful,
|
|
42
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
43
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
44
|
-
# Lesser General Public License for more details.
|
|
45
|
-
#
|
|
46
|
-
# You should have received a copy of the GNU Lesser General Public
|
|
47
|
-
# License along with this library; if not, write to the Free Software
|
|
48
|
-
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
49
|
-
#
|
|
50
|
-
#++
|
|
51
|
-
#
|
|
34
|
+
|
|
52
35
|
|
|
53
36
|
require 'bio/db'
|
|
54
37
|
require 'bio/db/embl/common'
|
|
@@ -62,7 +45,6 @@ class SPTR < EMBLDB
|
|
|
62
45
|
@@entry_regrexp = /[A-Z0-9]{1,4}_[A-Z0-9]{1,5}/
|
|
63
46
|
@@data_class = ["STANDARD", "PRELIMINARY"]
|
|
64
47
|
|
|
65
|
-
|
|
66
48
|
# returns a Hash of the ID line.
|
|
67
49
|
#
|
|
68
50
|
# returns a content (Int or String) of the ID line by a given key.
|
|
@@ -73,30 +55,25 @@ class SPTR < EMBLDB
|
|
|
73
55
|
# #"ID #{ENTRY_NAME} #{DATA_CLASS}; #{MOLECULE_TYPE}; #{SEQUENCE_LENGTH}."
|
|
74
56
|
#
|
|
75
57
|
# === Examples
|
|
76
|
-
# obj.id_line #=> {"ENTRY_NAME"=>"P53_HUMAN", "DATA_CLASS"=>"STANDARD",
|
|
58
|
+
# obj.id_line #=> {"ENTRY_NAME"=>"P53_HUMAN", "DATA_CLASS"=>"STANDARD",
|
|
59
|
+
# "SEQUENCE_LENGTH"=>393, "MOLECULE_TYPE"=>"PRT"}
|
|
77
60
|
#
|
|
78
61
|
# obj.id_line('ENTRY_NAME') #=> "P53_HUMAN"
|
|
79
62
|
#
|
|
80
63
|
def id_line(key = nil)
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
if key
|
|
92
|
-
@data['ID'][key] # String/Int
|
|
93
|
-
else
|
|
94
|
-
@data['ID'] # Hash
|
|
95
|
-
end
|
|
64
|
+
return id_line[key] if key
|
|
65
|
+
return @data['ID'] if @data['ID']
|
|
66
|
+
|
|
67
|
+
part = @orig['ID'].split(/ +/)
|
|
68
|
+
@data['ID'] = {
|
|
69
|
+
'ENTRY_NAME' => part[1],
|
|
70
|
+
'DATA_CLASS' => part[2].sub(/;/,''),
|
|
71
|
+
'MOLECULE_TYPE' => part[3].sub(/;/,''),
|
|
72
|
+
'SEQUENCE_LENGTH' => part[4].to_i
|
|
73
|
+
}
|
|
96
74
|
end
|
|
97
75
|
|
|
98
76
|
|
|
99
|
-
|
|
100
77
|
# returns a ENTRY_NAME in the ID line.
|
|
101
78
|
#
|
|
102
79
|
def entry_id
|
|
@@ -144,20 +121,15 @@ class SPTR < EMBLDB
|
|
|
144
121
|
# DT DD-MMM-YYY (rel. NN, Last sequence update)
|
|
145
122
|
# DT DD-MMM-YYY (rel. NN, Last annotation update)
|
|
146
123
|
def dt(key = nil)
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
if key
|
|
157
|
-
@data['DT'][key]
|
|
158
|
-
else
|
|
159
|
-
@data['DT']
|
|
160
|
-
end
|
|
124
|
+
return dt[key] if key
|
|
125
|
+
return @data['DT'] if @data['DT']
|
|
126
|
+
|
|
127
|
+
part = self.get('DT').split(/\n/)
|
|
128
|
+
@data['DT'] = {
|
|
129
|
+
'created' => part[0].sub(/\w{2} /,'').strip,
|
|
130
|
+
'sequence' => part[1].sub(/\w{2} /,'').strip,
|
|
131
|
+
'annotation' => part[2].sub(/\w{2} /,'').strip
|
|
132
|
+
}
|
|
161
133
|
end
|
|
162
134
|
|
|
163
135
|
|
|
@@ -214,16 +186,18 @@ class SPTR < EMBLDB
|
|
|
214
186
|
#
|
|
215
187
|
# === GN Line: Gene name(s) (>=0, optional)
|
|
216
188
|
def gn
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
189
|
+
unless @data['GN']
|
|
190
|
+
case fetch('GN')
|
|
191
|
+
when /Name=/,/ORFNames=/
|
|
192
|
+
@data['GN'] = gn_uniprot_parser
|
|
193
|
+
else
|
|
194
|
+
@data['GN'] = gn_old_parser
|
|
195
|
+
end
|
|
224
196
|
end
|
|
197
|
+
@data['GN']
|
|
225
198
|
end
|
|
226
199
|
|
|
200
|
+
|
|
227
201
|
# returns contents in the old style GN line.
|
|
228
202
|
# === GN Line: Gene name(s) (>=0, optional)
|
|
229
203
|
# GN HNS OR DRDX OR OSMZ OR BGLY.
|
|
@@ -245,7 +219,7 @@ class SPTR < EMBLDB
|
|
|
245
219
|
}
|
|
246
220
|
}
|
|
247
221
|
end
|
|
248
|
-
|
|
222
|
+
@data['GN'] = names
|
|
249
223
|
end
|
|
250
224
|
private :gn_old_parser
|
|
251
225
|
|
|
@@ -318,11 +292,13 @@ class SPTR < EMBLDB
|
|
|
318
292
|
# OS Genus species (name0) (name1).
|
|
319
293
|
# OS Genus species (name0), G s0 (name0), and G s (name0) (name1).
|
|
320
294
|
# OS Homo sapiens (Human), and Rarrus norveticus (Rat)
|
|
295
|
+
# OS Hippotis sp. Clark and Watts 825.
|
|
296
|
+
# OS unknown cyperaceous sp.
|
|
321
297
|
def os(num = nil)
|
|
322
298
|
unless @data['OS']
|
|
323
299
|
os = Array.new
|
|
324
300
|
fetch('OS').split(/, and|, /).each do |tmp|
|
|
325
|
-
if tmp =~ /(
|
|
301
|
+
if tmp =~ /(\w+ *[\w\d \:\'\+\-\.]+[\w\d\.])/
|
|
326
302
|
org = $1
|
|
327
303
|
tmp =~ /(\(.+\))/
|
|
328
304
|
os.push({'name' => $1, 'os' => org})
|
|
@@ -375,17 +351,224 @@ class SPTR < EMBLDB
|
|
|
375
351
|
return @data['OX']
|
|
376
352
|
end
|
|
377
353
|
|
|
354
|
+
# === The OH Line;
|
|
355
|
+
#
|
|
356
|
+
# OH NCBI_TaxID=TaxID; HostName.
|
|
357
|
+
# http://br.expasy.org/sprot/userman.html#OH_line
|
|
358
|
+
def oh
|
|
359
|
+
unless @data['OH']
|
|
360
|
+
@data['OH'] = fetch('OH').split("\. ").map {|x|
|
|
361
|
+
if x =~ /NCBI_TaxID=(\d+);/
|
|
362
|
+
taxid = $1
|
|
363
|
+
else
|
|
364
|
+
raise ArgumentError, ["Error: Invalid OH line format (#{self.entry_id}):",
|
|
365
|
+
$!, "\n", get('OH'), "\n"].join
|
|
366
|
+
|
|
367
|
+
end
|
|
368
|
+
if x =~ /NCBI_TaxID=\d+; (.+)/
|
|
369
|
+
host_name = $1
|
|
370
|
+
host_name.sub!(/\.$/, '')
|
|
371
|
+
else
|
|
372
|
+
host_name = nil
|
|
373
|
+
end
|
|
374
|
+
{'NCBI_TaxID' => taxid, 'HostName' => host_name}
|
|
375
|
+
}
|
|
376
|
+
end
|
|
377
|
+
@data['OH']
|
|
378
|
+
end
|
|
379
|
+
|
|
380
|
+
|
|
378
381
|
|
|
379
382
|
# Bio::EMBLDB::Common#ref -> Array
|
|
380
383
|
# R Lines
|
|
381
384
|
# RN RC RP RX RA RT RL
|
|
382
385
|
|
|
386
|
+
# returns contents in the R lines.
|
|
387
|
+
# * Bio::EMBLDB::Common#ref -> [ <refernece information Hash>* ]
|
|
388
|
+
# where <reference information Hash> is:
|
|
389
|
+
# {'RN' => '', 'RC' => '', 'RP' => '', 'RX' => '',
|
|
390
|
+
# 'RA' => '', 'RT' => '', 'RL' => '', 'RG' => ''}
|
|
391
|
+
#
|
|
392
|
+
# R Lines
|
|
393
|
+
# * RN RC RP RX RA RT RL RG
|
|
394
|
+
def ref
|
|
395
|
+
unless @data['R']
|
|
396
|
+
@data['R'] = [get('R').split(/\nRN /)].flatten.map { |str|
|
|
397
|
+
hash = {'RN' => '', 'RC' => '', 'RP' => '', 'RX' => '',
|
|
398
|
+
'RA' => '', 'RT' => '', 'RL' => '', 'RG' => ''}
|
|
399
|
+
str = 'RN ' + str unless /^RN / =~ str
|
|
400
|
+
|
|
401
|
+
str.split("\n").each do |line|
|
|
402
|
+
if /^(R[NPXARLCTG]) (.+)/ =~ line
|
|
403
|
+
hash[$1] += $2 + ' '
|
|
404
|
+
else
|
|
405
|
+
raise "Invalid format in R lines, \n[#{line}]\n"
|
|
406
|
+
end
|
|
407
|
+
end
|
|
408
|
+
|
|
409
|
+
hash['RN'] = set_RN(hash['RN'])
|
|
410
|
+
hash['RC'] = set_RC(hash['RC'])
|
|
411
|
+
hash['RP'] = set_RP(hash['RP'])
|
|
412
|
+
hash['RX'] = set_RX(hash['RX'])
|
|
413
|
+
hash['RA'] = set_RA(hash['RA'])
|
|
414
|
+
hash['RT'] = set_RT(hash['RT'])
|
|
415
|
+
hash['RL'] = set_RL(hash['RL'])
|
|
416
|
+
hash['RG'] = set_RG(hash['RG'])
|
|
417
|
+
|
|
418
|
+
hash
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
end
|
|
422
|
+
@data['R']
|
|
423
|
+
end
|
|
424
|
+
|
|
425
|
+
def set_RN(data)
|
|
426
|
+
data.strip
|
|
427
|
+
end
|
|
428
|
+
|
|
429
|
+
def set_RC(data)
|
|
430
|
+
data.scan(/([STP]\w+)=(.+);/).map { |comment|
|
|
431
|
+
[comment[1].split(/, and |, /)].flatten.map { |text|
|
|
432
|
+
{'Token' => comment[0], 'Text' => text}
|
|
433
|
+
}
|
|
434
|
+
}.flatten
|
|
435
|
+
end
|
|
436
|
+
private :set_RC
|
|
437
|
+
|
|
438
|
+
def set_RP(data)
|
|
439
|
+
data = data.strip
|
|
440
|
+
data = data.sub(/\.$/, '')
|
|
441
|
+
data.split(/, AND |, /i).map {|x|
|
|
442
|
+
x = x.strip
|
|
443
|
+
x = x.gsub(' ', ' ')
|
|
444
|
+
}
|
|
445
|
+
end
|
|
446
|
+
private :set_RP
|
|
383
447
|
|
|
384
|
-
|
|
385
|
-
'
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
448
|
+
def set_RX(data)
|
|
449
|
+
rx = {'MEDLINE' => nil, 'PubMed' => nil, 'DOI' => nil}
|
|
450
|
+
if data =~ /MEDLINE=(.+?);/
|
|
451
|
+
rx['MEDLINE'] = $1
|
|
452
|
+
end
|
|
453
|
+
if data =~ /PubMed=(.+?);/
|
|
454
|
+
rx['PubMed'] = $1
|
|
455
|
+
end
|
|
456
|
+
if data =~ /DOI=(.+?);/
|
|
457
|
+
rx['DOI'] = $1
|
|
458
|
+
end
|
|
459
|
+
rx
|
|
460
|
+
end
|
|
461
|
+
private :set_RX
|
|
462
|
+
|
|
463
|
+
def set_RA(data)
|
|
464
|
+
data = data.sub(/; *$/, '')
|
|
465
|
+
end
|
|
466
|
+
private :set_RA
|
|
467
|
+
|
|
468
|
+
def set_RT(data)
|
|
469
|
+
data = data.sub(/; *$/, '')
|
|
470
|
+
data = data.gsub(/(^"|"$)/, '')
|
|
471
|
+
end
|
|
472
|
+
private :set_RT
|
|
473
|
+
|
|
474
|
+
def set_RL(data)
|
|
475
|
+
data = data.strip
|
|
476
|
+
end
|
|
477
|
+
private :set_RL
|
|
478
|
+
|
|
479
|
+
def set_RG(data)
|
|
480
|
+
data = data.split('; ')
|
|
481
|
+
end
|
|
482
|
+
private :set_RG
|
|
483
|
+
|
|
484
|
+
|
|
485
|
+
|
|
486
|
+
# returns Bio::Reference object from Bio::EMBLDB::Common#ref.
|
|
487
|
+
# * Bio::EMBLDB::Common#ref -> Bio::References
|
|
488
|
+
def references
|
|
489
|
+
unless @data['references']
|
|
490
|
+
ary = self.ref.map {|ent|
|
|
491
|
+
hash = Hash.new('')
|
|
492
|
+
ent.each {|key, value|
|
|
493
|
+
case key
|
|
494
|
+
when 'RA'
|
|
495
|
+
hash['authors'] = value.split(/, /)
|
|
496
|
+
when 'RT'
|
|
497
|
+
hash['title'] = value
|
|
498
|
+
when 'RL'
|
|
499
|
+
if value =~ /(.*) (\d+) \((\d+)\), (\d+-\d+) \((\d+)\)$/
|
|
500
|
+
hash['journal'] = $1
|
|
501
|
+
hash['volume'] = $2
|
|
502
|
+
hash['issue'] = $3
|
|
503
|
+
hash['pages'] = $4
|
|
504
|
+
hash['year'] = $5
|
|
505
|
+
else
|
|
506
|
+
hash['journal'] = value
|
|
507
|
+
end
|
|
508
|
+
when 'RX' # PUBMED, MEDLINE
|
|
509
|
+
value.split('.').each {|item|
|
|
510
|
+
tag, xref = item.split(/; /).map {|i| i.strip }
|
|
511
|
+
hash[ tag.downcase ] = xref
|
|
512
|
+
}
|
|
513
|
+
end
|
|
514
|
+
}
|
|
515
|
+
Reference.new(hash)
|
|
516
|
+
}
|
|
517
|
+
@data['references'] = References.new(ary)
|
|
518
|
+
end
|
|
519
|
+
@data['references']
|
|
520
|
+
end
|
|
521
|
+
|
|
522
|
+
|
|
523
|
+
|
|
524
|
+
|
|
525
|
+
|
|
526
|
+
|
|
527
|
+
# === The HI line
|
|
528
|
+
# Bio::SPTR#hi #=> hash
|
|
529
|
+
def hi
|
|
530
|
+
unless @data['HI']
|
|
531
|
+
@data['HI'] = []
|
|
532
|
+
fetch('HI').split(/\. /).each do |hlist|
|
|
533
|
+
hash = {'Category' => '', 'Keywords' => [], 'Keyword' => ''}
|
|
534
|
+
hash['Category'], hash['Keywords'] = hlist.split(': ')
|
|
535
|
+
hash['Keywords'] = hash['Keywords'].split('; ')
|
|
536
|
+
hash['Keyword'] = hash['Keywords'].pop
|
|
537
|
+
hash['Keyword'].sub!(/\.$/, '')
|
|
538
|
+
@data['HI'] << hash
|
|
539
|
+
end
|
|
540
|
+
end
|
|
541
|
+
@data['HI']
|
|
542
|
+
end
|
|
543
|
+
|
|
544
|
+
|
|
545
|
+
@@cc_topics = ['PHARMACEUTICAL',
|
|
546
|
+
'BIOTECHNOLOGY',
|
|
547
|
+
'TOXIC DOSE',
|
|
548
|
+
'ALLERGEN',
|
|
549
|
+
'RNA EDITING',
|
|
550
|
+
'POLYMORPHISM',
|
|
551
|
+
'BIOPHYSICOCHEMICAL PROPERTIES',
|
|
552
|
+
'MASS SPECTROMETRY',
|
|
553
|
+
'WEB RESOURCE',
|
|
554
|
+
'ENZYME REGULATION',
|
|
555
|
+
'DISEASE',
|
|
556
|
+
'INTERACTION',
|
|
557
|
+
'DEVELOPMENTAL STAGE',
|
|
558
|
+
'INDUCTION',
|
|
559
|
+
'CAUTION',
|
|
560
|
+
'ALTERNATIVE PRODUCTS',
|
|
561
|
+
'DOMAIN',
|
|
562
|
+
'PTM',
|
|
563
|
+
'MISCELLANEOUS',
|
|
564
|
+
'TISSUE SPECIFICITY',
|
|
565
|
+
'COFACTOR',
|
|
566
|
+
'PATHWAY',
|
|
567
|
+
'SUBUNIT',
|
|
568
|
+
'CATALYTIC ACTIVITY',
|
|
569
|
+
'SUBCELLULAR LOCATION',
|
|
570
|
+
'FUNCTION',
|
|
571
|
+
'SIMILARITY']
|
|
389
572
|
# returns contents in the CC lines.
|
|
390
573
|
# * Bio::SPTR#cc -> Hash
|
|
391
574
|
#
|
|
@@ -425,27 +608,44 @@ class SPTR < EMBLDB
|
|
|
425
608
|
# CC -!- TOPIC: FIRST LINE OF A COMMENT BLOCK;
|
|
426
609
|
# CC SECOND AND SUBSEQUENT LINES OF A COMMENT BLOCK.
|
|
427
610
|
#
|
|
428
|
-
|
|
611
|
+
# See also http://www.expasy.org/sprot/userman.html#CC_line
|
|
612
|
+
#
|
|
613
|
+
def cc(topic = nil)
|
|
429
614
|
unless @data['CC']
|
|
430
615
|
cc = Hash.new
|
|
431
|
-
|
|
616
|
+
comment_border= '-' * (77 - 4 + 1)
|
|
432
617
|
dlm = /-!- /
|
|
433
618
|
|
|
434
|
-
|
|
619
|
+
# 12KD_MYCSM has no CC lines.
|
|
620
|
+
return cc if get('CC').size == 0
|
|
621
|
+
|
|
622
|
+
cc_raw = fetch('CC')
|
|
623
|
+
|
|
624
|
+
# Removing the copyright statement.
|
|
625
|
+
cc_raw.sub!(/ *---.+---/m, '')
|
|
626
|
+
|
|
627
|
+
# Not any CC Lines without the copyright statement.
|
|
628
|
+
return cc if cc_raw == ''
|
|
435
629
|
|
|
436
630
|
begin
|
|
437
|
-
|
|
631
|
+
cc_raw, copyright = cc_raw.split(/#{comment_border}/)[0]
|
|
632
|
+
cc_raw = cc_raw.sub(dlm,'')
|
|
633
|
+
cc_raw.split(dlm).each do |tmp|
|
|
634
|
+
tmp = tmp.strip
|
|
635
|
+
|
|
438
636
|
if /(^[A-Z ]+[A-Z]): (.+)/ =~ tmp
|
|
439
637
|
key = $1
|
|
440
|
-
body = $2
|
|
638
|
+
body = $2
|
|
639
|
+
body.gsub!(/- (?!AND)/,'-')
|
|
640
|
+
body.strip!
|
|
441
641
|
unless cc[key]
|
|
442
642
|
cc[key] = [body]
|
|
443
643
|
else
|
|
444
644
|
cc[key].push(body)
|
|
445
645
|
end
|
|
446
646
|
else
|
|
447
|
-
raise ["Error: [#{entry_id}]: CC Lines", '',
|
|
448
|
-
|
|
647
|
+
raise ["Error: [#{entry_id}]: CC Lines", '"', tmp, '"',
|
|
648
|
+
'', get('CC'),''].join("\n")
|
|
449
649
|
end
|
|
450
650
|
end
|
|
451
651
|
rescue NameError
|
|
@@ -461,29 +661,62 @@ class SPTR < EMBLDB
|
|
|
461
661
|
@data['CC'] = cc
|
|
462
662
|
end
|
|
463
663
|
|
|
464
|
-
case tag
|
|
465
|
-
when 'ALTERNATIVE PRODUCTS'
|
|
466
|
-
ap = @data['CC']['ALTERNATIVE PRODUCTS'].to_s
|
|
467
|
-
return ap unless ap
|
|
468
|
-
|
|
469
|
-
# Event, Named isoforms, Comment, [Name, Synonyms, IsoId, Sequnce]+
|
|
470
|
-
tmp = {'Event' => nil, 'Named isoforms' => nil, 'Comment' => nil, 'Variants' => []}
|
|
471
|
-
|
|
472
|
-
if /Event=(.+?);/ =~ ap
|
|
473
|
-
tmp['Event'] = $1
|
|
474
|
-
end
|
|
475
|
-
if /Named isoforms=(\S+?);/ =~ ap
|
|
476
|
-
tmp['Named isoforms'] = $1
|
|
477
|
-
end
|
|
478
|
-
if /Comment=(.+?);/m =~ ap
|
|
479
|
-
tmp['Comment'] = $1
|
|
480
|
-
end
|
|
481
|
-
ap.scan(/Name=.+?Sequence=.+?;/).each do |ent|
|
|
482
|
-
tmp['Variants'] << cc_ap_variants_parse(ent)
|
|
483
|
-
end
|
|
484
|
-
return tmp
|
|
485
|
-
|
|
486
664
|
|
|
665
|
+
case topic
|
|
666
|
+
when 'ALLERGEN'
|
|
667
|
+
return @data['CC'][topic]
|
|
668
|
+
when 'ALTERNATIVE PRODUCTS'
|
|
669
|
+
return cc_alternative_products(@data['CC'][topic])
|
|
670
|
+
when 'BIOPHYSICOCHEMICAL PROPERTIES'
|
|
671
|
+
return cc_biophysiochemical_properties(@data['CC'][topic])
|
|
672
|
+
when 'BIOTECHNOLOGY'
|
|
673
|
+
return @data['CC'][topic]
|
|
674
|
+
when 'CATALITIC ACTIVITY'
|
|
675
|
+
return cc_catalytic_activity(@data['CC'][topic])
|
|
676
|
+
when 'CAUTION'
|
|
677
|
+
return cc_caution(@data['CC'][topic])
|
|
678
|
+
when 'COFACTOR'
|
|
679
|
+
return @data['CC'][topic]
|
|
680
|
+
when 'DEVELOPMENTAL STAGE'
|
|
681
|
+
return @data['CC'][topic].to_s
|
|
682
|
+
when 'DISEASE'
|
|
683
|
+
return @data['CC'][topic].to_s
|
|
684
|
+
when 'DOMAIN'
|
|
685
|
+
return @data['CC'][topic]
|
|
686
|
+
when 'ENZYME REGULATION'
|
|
687
|
+
return @data['CC'][topic].to_s
|
|
688
|
+
when 'FUNCTION'
|
|
689
|
+
return @data['CC'][topic].to_s
|
|
690
|
+
when 'INDUCTION'
|
|
691
|
+
return @data['CC'][topic].to_s
|
|
692
|
+
when 'INTERACTION'
|
|
693
|
+
return cc_interaction(@data['CC'][topic])
|
|
694
|
+
when 'MASS SPECTROMETRY'
|
|
695
|
+
return cc_mass_spectrometry(@data['CC'][topic])
|
|
696
|
+
when 'MISCELLANEOUS'
|
|
697
|
+
return @data['CC'][topic]
|
|
698
|
+
when 'PATHWAY'
|
|
699
|
+
return cc_pathway(@data['CC'][topic])
|
|
700
|
+
when 'PHARMACEUTICAL'
|
|
701
|
+
return @data['CC'][topic]
|
|
702
|
+
when 'POLYMORPHISM'
|
|
703
|
+
return @data['CC'][topic]
|
|
704
|
+
when 'PTM'
|
|
705
|
+
return @data['CC'][topic]
|
|
706
|
+
when 'RNA EDITING'
|
|
707
|
+
return cc_rna_editing(@data['CC'][topic])
|
|
708
|
+
when 'SIMILARITY'
|
|
709
|
+
return @data['CC'][topic]
|
|
710
|
+
when 'SUBCELLULAR LOCATION'
|
|
711
|
+
return cc_subcellular_location(@data['CC'][topic])
|
|
712
|
+
when 'SUBUNIT'
|
|
713
|
+
return @data['CC'][topic]
|
|
714
|
+
when 'TISSUE SPECIFICITY'
|
|
715
|
+
return @data['CC'][topic]
|
|
716
|
+
when 'TOXIC DOSE'
|
|
717
|
+
return @data['CC'][topic]
|
|
718
|
+
when 'WEB RESOURCE'
|
|
719
|
+
return cc_web_resource(@data['CC'][topic])
|
|
487
720
|
when 'DATABASE'
|
|
488
721
|
# DATABASE: NAME=Text[; NOTE=Text][; WWW="Address"][; FTP="Address"].
|
|
489
722
|
tmp = Array.new
|
|
@@ -507,73 +740,208 @@ class SPTR < EMBLDB
|
|
|
507
740
|
tmp.push(db)
|
|
508
741
|
end
|
|
509
742
|
return tmp
|
|
510
|
-
|
|
511
|
-
when 'MASS SPECTOROMETRY'
|
|
512
|
-
# MASS SPECTROMETRY: MW=XXX[; MW_ERR=XX][; METHOD=XX][;RANGE=XX-XX].
|
|
513
|
-
tmp = Array.new
|
|
514
|
-
ms = @data['CC']['MASS SPECTOROMETRY']
|
|
515
|
-
return ms unless ms
|
|
516
|
-
|
|
517
|
-
ms.each do |m|
|
|
518
|
-
mass = {'MW'=>nil,'MW_ERR'=>nil,'METHOD'=>nil,'RANGE'=>nil}
|
|
519
|
-
m.sub(/.$/,'').split(/;/).each do |line|
|
|
520
|
-
case line
|
|
521
|
-
when /MW=(.+)/
|
|
522
|
-
mass['MW'] = $1.to_f
|
|
523
|
-
when /MW_ERR=(.+)/
|
|
524
|
-
mass['MW_ERR'] = $1.to_f
|
|
525
|
-
when /METHOD="(.+)"/
|
|
526
|
-
mass['METHOD'] = $1.to_s
|
|
527
|
-
when /RANGE="(\d+-\d+)"/
|
|
528
|
-
mass['RANGE'] = $1 # RANGE class ?
|
|
529
|
-
end
|
|
530
|
-
end
|
|
531
|
-
tmp.push(mass)
|
|
532
|
-
end
|
|
533
|
-
return tmp
|
|
534
|
-
|
|
535
|
-
when 'INTERACTION'
|
|
536
|
-
return cc_interaction_parse(@data['CC']['INTERACTION'].to_s)
|
|
537
|
-
|
|
538
743
|
when nil
|
|
539
744
|
return @data['CC']
|
|
540
|
-
|
|
541
745
|
else
|
|
542
|
-
return @data['CC'][
|
|
746
|
+
return @data['CC'][topic]
|
|
543
747
|
end
|
|
544
748
|
end
|
|
545
749
|
|
|
546
750
|
|
|
751
|
+
def cc_alternative_products(data)
|
|
752
|
+
ap = data.to_s
|
|
753
|
+
return ap unless ap
|
|
754
|
+
|
|
755
|
+
# Event, Named isoforms, Comment, [Name, Synonyms, IsoId, Sequnce]+
|
|
756
|
+
tmp = {'Event' => "", 'Named isoforms' => "", 'Comment' => "",
|
|
757
|
+
'Variants' => []}
|
|
758
|
+
if /Event=(.+?);/ =~ ap
|
|
759
|
+
tmp['Event'] = $1
|
|
760
|
+
tmp['Event'] = tmp['Event'].sub(/;/,'').split(/, /)
|
|
761
|
+
end
|
|
762
|
+
if /Named isoforms=(\S+?);/ =~ ap
|
|
763
|
+
tmp['Named isoforms'] = $1
|
|
764
|
+
end
|
|
765
|
+
if /Comment=(.+?);/m =~ ap
|
|
766
|
+
tmp['Comment'] = $1
|
|
767
|
+
end
|
|
768
|
+
ap.scan(/Name=.+?Sequence=.+?;/).each do |ent|
|
|
769
|
+
tmp['Variants'] << cc_alternative_products_variants(ent)
|
|
770
|
+
end
|
|
771
|
+
return tmp
|
|
772
|
+
end
|
|
773
|
+
private :cc_alternative_products
|
|
547
774
|
|
|
548
|
-
def
|
|
549
|
-
|
|
550
|
-
|
|
775
|
+
def cc_alternative_products_variants(data)
|
|
776
|
+
variant = {'Name' => '', 'Synonyms' => [], 'IsoId' => [], 'Sequence' => []}
|
|
777
|
+
data.split(/; /).map {|x| x.split(/=/) }.each do |e|
|
|
551
778
|
case e[0]
|
|
552
|
-
when 'Sequence'
|
|
779
|
+
when 'Sequence', 'Synonyms', 'IsoId'
|
|
553
780
|
e[1] = e[1].sub(/;/,'').split(/, /)
|
|
554
781
|
end
|
|
555
|
-
|
|
782
|
+
variant[e[0]] = e[1]
|
|
783
|
+
end
|
|
784
|
+
variant
|
|
785
|
+
end
|
|
786
|
+
private :cc_alternative_products_variants
|
|
787
|
+
|
|
788
|
+
|
|
789
|
+
def cc_biophysiochemical_properties(data)
|
|
790
|
+
data = data[0]
|
|
791
|
+
|
|
792
|
+
hash = {'Absorption' => {},
|
|
793
|
+
'Kinetic parameters' => {},
|
|
794
|
+
'pH dependence' => "",
|
|
795
|
+
'Redox potential' => "",
|
|
796
|
+
'Temperature dependence' => ""}
|
|
797
|
+
if data =~ /Absorption: Abs\(max\)=(.+?);/
|
|
798
|
+
hash['Absorption']['Abs(max)'] = $1
|
|
799
|
+
end
|
|
800
|
+
if data =~ /Absorption: Abs\(max\)=.+; Note=(.+?);/
|
|
801
|
+
hash['Absorption']['Note'] = $1
|
|
802
|
+
end
|
|
803
|
+
if data =~ /Kinetic parameters: KM=(.+?); Vmax=(.+?);/
|
|
804
|
+
hash['Kinetic parameters']['KM'] = $1
|
|
805
|
+
hash['Kinetic parameters']['Vmax'] = $2
|
|
556
806
|
end
|
|
557
|
-
|
|
807
|
+
if data =~ /Kinetic parameters: KM=.+; Vmax=.+; Note=(.+?);/
|
|
808
|
+
hash['Kinetic parameters']['Note'] = $1
|
|
809
|
+
end
|
|
810
|
+
if data =~ /pH dependence: (.+?);/
|
|
811
|
+
hash['pH dependence'] = $1
|
|
812
|
+
end
|
|
813
|
+
if data =~ /Redox potential: (.+?);/
|
|
814
|
+
hash['Redox potential'] = $1
|
|
815
|
+
end
|
|
816
|
+
if data =~ /Temperature dependence: (.+?);/
|
|
817
|
+
hash['Temperature dependence'] = $1
|
|
818
|
+
end
|
|
819
|
+
hash
|
|
820
|
+
end
|
|
821
|
+
private :cc_biophysiochemical_properties
|
|
822
|
+
|
|
823
|
+
|
|
824
|
+
def cc_caution(data)
|
|
825
|
+
data.to_s
|
|
558
826
|
end
|
|
559
|
-
private :
|
|
827
|
+
private :cc_caution
|
|
560
828
|
|
|
561
829
|
|
|
562
830
|
# returns conteins in a line of the CC INTERACTION section.
|
|
563
831
|
#
|
|
564
832
|
# CC P46527:CDKN1B; NbExp=1; IntAct=EBI-359815, EBI-519280;
|
|
565
|
-
def
|
|
833
|
+
def cc_interaction(data)
|
|
834
|
+
str = data.to_s
|
|
566
835
|
it = str.scan(/(.+?); NbExp=(.+?); IntAct=(.+?);/)
|
|
567
836
|
it.map {|ent|
|
|
568
|
-
{
|
|
569
|
-
|
|
570
|
-
|
|
837
|
+
ent.map! {|x| x.strip }
|
|
838
|
+
if ent[0] =~ /^(.+):(.+)/
|
|
839
|
+
spac = $1
|
|
840
|
+
spid = $2.split(' ')[0]
|
|
841
|
+
optid = nil
|
|
842
|
+
elsif ent[0] =~ /Self/
|
|
843
|
+
spac = self.entry_id
|
|
844
|
+
spid = self.entry_id
|
|
845
|
+
optid = nil
|
|
846
|
+
end
|
|
847
|
+
if ent[0] =~ /^.+:.+ (.+)/
|
|
848
|
+
optid = $1
|
|
849
|
+
end
|
|
850
|
+
|
|
851
|
+
{'SP_Ac' => spac,
|
|
852
|
+
'identifier' => spid,
|
|
853
|
+
'NbExp' => ent[1],
|
|
854
|
+
'IntAct' => ent[2].split(', '),
|
|
855
|
+
'optional_identifier' => optid}
|
|
856
|
+
}
|
|
857
|
+
end
|
|
858
|
+
private :cc_interaction
|
|
859
|
+
|
|
860
|
+
|
|
861
|
+
def cc_mass_spectrometry(data)
|
|
862
|
+
# MASS SPECTROMETRY: MW=XXX[; MW_ERR=XX][; METHOD=XX][;RANGE=XX-XX].
|
|
863
|
+
return data unless data
|
|
864
|
+
|
|
865
|
+
data.map { |m|
|
|
866
|
+
mass = {'MW' => nil, 'MW_ERR' => nil, 'METHOD' => nil, 'RANGE' => nil,
|
|
867
|
+
'NOTE' => nil}
|
|
868
|
+
m.sub(/.$/,'').split(/;/).each do |line|
|
|
869
|
+
case line
|
|
870
|
+
when /MW=(.+)/
|
|
871
|
+
mass['MW'] = $1
|
|
872
|
+
when /MW_ERR=(.+)/
|
|
873
|
+
mass['MW_ERR'] = $1
|
|
874
|
+
when /METHOD=(.+)/
|
|
875
|
+
mass['METHOD'] = $1
|
|
876
|
+
when /RANGE=(\d+-\d+)/
|
|
877
|
+
mass['RANGE'] = $1 # RANGE class ?
|
|
878
|
+
when /NOTE=(.+)/
|
|
879
|
+
mass['NOTE'] = $1
|
|
880
|
+
end
|
|
881
|
+
end
|
|
882
|
+
mass
|
|
883
|
+
}
|
|
884
|
+
end
|
|
885
|
+
private :cc_mass_spectrometry
|
|
886
|
+
|
|
887
|
+
|
|
888
|
+
def cc_pathway(data)
|
|
889
|
+
data.map {|x| x.sub(/\.$/, '') }.map {|x|
|
|
890
|
+
x.split(/; | and |: /)
|
|
891
|
+
}[0]
|
|
892
|
+
end
|
|
893
|
+
private :cc_pathway
|
|
894
|
+
|
|
895
|
+
|
|
896
|
+
def cc_rna_editing(data)
|
|
897
|
+
data = data.to_s
|
|
898
|
+
entry = {'Modified_positions' => [], 'Note' => ""}
|
|
899
|
+
if data =~ /Modified_positions=(.+?)(\.|;)/
|
|
900
|
+
entry['Modified_positions'] = $1.sub(/\.$/, '').split(', ')
|
|
901
|
+
else
|
|
902
|
+
raise ArgumentError, "Invarid CC RNA Editing lines (#{self.entry_id}):#{$!}\n#{get('CC')}"
|
|
903
|
+
end
|
|
904
|
+
if data =~ /Note=(.+)/
|
|
905
|
+
entry['Note'] = $1
|
|
906
|
+
end
|
|
907
|
+
entry
|
|
908
|
+
end
|
|
909
|
+
private :cc_rna_editing
|
|
910
|
+
|
|
911
|
+
|
|
912
|
+
def cc_subcellular_location(data)
|
|
913
|
+
data.map {|x|
|
|
914
|
+
x.split('. ').map {|y|
|
|
915
|
+
y.split('; ').map {|z|
|
|
916
|
+
z.sub(/\.$/, '')
|
|
917
|
+
}
|
|
918
|
+
}
|
|
919
|
+
}[0]
|
|
920
|
+
end
|
|
921
|
+
private :cc_subcellular_location
|
|
922
|
+
|
|
923
|
+
|
|
924
|
+
# CC -!- WEB RESOURCE: NAME=ResourceName[; NOTE=FreeText][; URL=WWWAddress].
|
|
925
|
+
def cc_web_resource(data)
|
|
926
|
+
data.map {|x|
|
|
927
|
+
entry = {'NAME' => nil, 'NOTE' => nil, 'URL' => nil}
|
|
928
|
+
x.split(';').each do |y|
|
|
929
|
+
case y
|
|
930
|
+
when /NAME=(.+)/
|
|
931
|
+
entry['NAME'] = $1.strip
|
|
932
|
+
when /NOTE=(.+)/
|
|
933
|
+
entry['NOTE'] = $1.strip
|
|
934
|
+
when /URL="(.+)"/
|
|
935
|
+
entry['URL'] = $1.strip
|
|
936
|
+
end
|
|
937
|
+
end
|
|
938
|
+
entry
|
|
571
939
|
}
|
|
572
940
|
end
|
|
573
|
-
|
|
941
|
+
|
|
574
942
|
|
|
575
943
|
# returns databases cross-references in the DR lines.
|
|
576
|
-
# * Bio::
|
|
944
|
+
# * Bio::SPTR#dr -> Hash w/in Array
|
|
577
945
|
#
|
|
578
946
|
# === DR Line; defabases cross-reference (>=0)
|
|
579
947
|
# DR database_identifier; primary_identifier; secondary_identifier.
|
|
@@ -585,6 +953,24 @@ class SPTR < EMBLDB
|
|
|
585
953
|
'PROSITE','REBASE','AARHUS/GHENT-2DPAGE','SGD','STYGENE','SUBTILIST',
|
|
586
954
|
'SWISS-2DPAGE','TIGR','TRANSFAC','TUBERCULIST','WORMPEP','YEPD','ZFIN']
|
|
587
955
|
|
|
956
|
+
# Backup Bio::EMBLDB#dr as embl_dr
|
|
957
|
+
alias :embl_dr :dr
|
|
958
|
+
|
|
959
|
+
# Bio::SPTR#dr
|
|
960
|
+
def dr(key = nil)
|
|
961
|
+
unless key
|
|
962
|
+
embl_dr
|
|
963
|
+
else
|
|
964
|
+
embl_dr[key].map {|x|
|
|
965
|
+
{'Accession' => x[0],
|
|
966
|
+
'Version' => x[1],
|
|
967
|
+
' ' => x[2],
|
|
968
|
+
'Molecular Type' => x[3]}
|
|
969
|
+
}
|
|
970
|
+
end
|
|
971
|
+
end
|
|
972
|
+
|
|
973
|
+
|
|
588
974
|
# Bio::EMBLDB::Common#kw - Array
|
|
589
975
|
# #keywords -> Array
|
|
590
976
|
#
|
|
@@ -592,10 +978,29 @@ class SPTR < EMBLDB
|
|
|
592
978
|
# KW [Keyword;]+
|
|
593
979
|
|
|
594
980
|
|
|
595
|
-
# returns
|
|
981
|
+
# returns contents in the feature table.
|
|
982
|
+
#
|
|
983
|
+
# == Examples
|
|
984
|
+
#
|
|
985
|
+
# sp = Bio::SPTR.new(entry)
|
|
986
|
+
# ft = sp.ft
|
|
987
|
+
# ft.class #=> Hash
|
|
988
|
+
# ft.keys.each do |feature_key|
|
|
989
|
+
# ft[feature_key].each do |feature|
|
|
990
|
+
# feature['From'] #=> '1'
|
|
991
|
+
# feature['To'] #=> '21'
|
|
992
|
+
# feature['Description'] #=> ''
|
|
993
|
+
# feature['FTId'] #=> ''
|
|
994
|
+
# feature['diff'] #=> []
|
|
995
|
+
# feature['original'] #=> [feature_key, '1', '21', '', '']
|
|
996
|
+
# end
|
|
997
|
+
# end
|
|
998
|
+
#
|
|
596
999
|
# * Bio::SPTR#ft -> Hash
|
|
597
|
-
# {
|
|
598
|
-
#
|
|
1000
|
+
# {FEATURE_KEY => [{'From' => int, 'To' => int,
|
|
1001
|
+
# 'Description' => aStr, 'FTId' => aStr,
|
|
1002
|
+
# 'diff' => [original_residues, changed_residues],
|
|
1003
|
+
# 'original' => aAry }],...}
|
|
599
1004
|
#
|
|
600
1005
|
# returns an Array of the information about the feature_name in the feature table.
|
|
601
1006
|
# * Bio::SPTR#ft(feature_name) -> Array of Hash
|
|
@@ -611,106 +1016,87 @@ class SPTR < EMBLDB
|
|
|
611
1016
|
# 22-27 `TO' endpoint
|
|
612
1017
|
# 35-75 Description (>=0 per key)
|
|
613
1018
|
# ----- -----------------
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
1019
|
+
#
|
|
1020
|
+
# Note: 'FROM' and 'TO' endopoints are allowed to use non-numerial charactors
|
|
1021
|
+
# including '<', '>' or '?'. (c.f. '<1', '?42')
|
|
1022
|
+
#
|
|
1023
|
+
# See also http://www.expasy.org/sprot/userman.html#FT_line
|
|
1024
|
+
#
|
|
1025
|
+
def ft(feature_key = nil)
|
|
1026
|
+
return ft[feature_key] if feature_key
|
|
1027
|
+
return @data['FT'] if @data['FT']
|
|
623
1028
|
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
'To' => to.to_i,
|
|
638
|
-
'Description' => desc,
|
|
639
|
-
'diff' => [],
|
|
640
|
-
'FTId' => nil }
|
|
641
|
-
last_feature = feature
|
|
642
|
-
next
|
|
643
|
-
end
|
|
1029
|
+
table = []
|
|
1030
|
+
begin
|
|
1031
|
+
get('FT').split("\n").each do |line|
|
|
1032
|
+
if line =~ /^FT \w/
|
|
1033
|
+
feature = line.chomp.ljust(74)
|
|
1034
|
+
table << [feature[ 5..12].strip, # Feature Name
|
|
1035
|
+
feature[14..19].strip, # From
|
|
1036
|
+
feature[21..26].strip, # To
|
|
1037
|
+
feature[34..74].strip ] # Description
|
|
1038
|
+
else
|
|
1039
|
+
table.last << line.chomp.sub!(/^FT +/, '')
|
|
1040
|
+
end
|
|
1041
|
+
end
|
|
644
1042
|
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
1043
|
+
# Joining Description lines
|
|
1044
|
+
table = table.map { |feature|
|
|
1045
|
+
ftid = feature.pop if feature.last =~ /FTId=/
|
|
1046
|
+
if feature.size > 4
|
|
1047
|
+
feature = [feature[0],
|
|
1048
|
+
feature[1],
|
|
1049
|
+
feature[2],
|
|
1050
|
+
feature[3, feature.size - 3].join(" ")]
|
|
1051
|
+
end
|
|
1052
|
+
feature << if ftid then ftid else '' end
|
|
1053
|
+
}
|
|
652
1054
|
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
raise line
|
|
665
|
-
end
|
|
666
|
-
table[last_feature].last['diff'] = [original, swap]
|
|
667
|
-
end
|
|
1055
|
+
hash = {}
|
|
1056
|
+
table.each do |feature|
|
|
1057
|
+
hash[feature[0]] = [] unless hash[feature[0]]
|
|
1058
|
+
hash[feature[0]] << {
|
|
1059
|
+
# Removing '<', '>' or '?' in FROM/TO endopoint.
|
|
1060
|
+
'From' => feature[1].sub(/\D/, '').to_i,
|
|
1061
|
+
'To' => feature[2].sub(/\D/, '').to_i,
|
|
1062
|
+
'Description' => feature[3],
|
|
1063
|
+
'FTId' => feature[4].to_s.sub(/\/FTId=/, '').sub(/\.$/, ''),
|
|
1064
|
+
'diff' => [],
|
|
1065
|
+
'original' => feature
|
|
668
1066
|
}
|
|
669
1067
|
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
a.gsub(/ /,'') + " -> " + b.gsub(/ /,'')
|
|
683
|
-
}
|
|
684
|
-
end
|
|
685
|
-
if /- [\w\d]/ =~ e['Description']
|
|
686
|
-
e['Description'].gsub!(/([\w\d]- [\w\d]+)/) {
|
|
687
|
-
a = $1
|
|
688
|
-
if /- AND/ =~ a
|
|
689
|
-
a
|
|
690
|
-
else
|
|
691
|
-
a.sub(/ /,'')
|
|
692
|
-
end
|
|
693
|
-
}
|
|
1068
|
+
case feature[0]
|
|
1069
|
+
when 'VARSPLIC', 'VARIANT', 'VAR_SEQ', 'CONFLICT'
|
|
1070
|
+
case hash[feature[0]].last['Description']
|
|
1071
|
+
when /(\w[\w ]*\w*) - ?> (\w[\w ]*\w*)/
|
|
1072
|
+
original_res = $1
|
|
1073
|
+
changed_res = $2
|
|
1074
|
+
original_res = original_res.gsub(/ /,'').strip
|
|
1075
|
+
chenged_res = changed_res.gsub(/ /,'').strip
|
|
1076
|
+
when /Missing/i
|
|
1077
|
+
original_res = seq.subseq(hash[feature[0]].last['From'],
|
|
1078
|
+
hash[feature[0]].last['To'])
|
|
1079
|
+
changed_res = ''
|
|
694
1080
|
end
|
|
1081
|
+
hash[feature[0]].last['diff'] = [original_res, chenged_res]
|
|
695
1082
|
end
|
|
696
1083
|
end
|
|
697
|
-
|
|
1084
|
+
rescue
|
|
1085
|
+
raise "Invalid FT Lines(#{$!}) in #{entry_id}:, \n'#{self.get('FT')}'\n"
|
|
698
1086
|
end
|
|
699
1087
|
|
|
700
|
-
|
|
701
|
-
@data['FT'][feature_name]
|
|
702
|
-
else
|
|
703
|
-
@data['FT']
|
|
704
|
-
end
|
|
1088
|
+
@data['FT'] = hash
|
|
705
1089
|
end
|
|
706
1090
|
|
|
707
1091
|
|
|
1092
|
+
|
|
708
1093
|
# returns a Hash of conteins in the SQ lines.
|
|
709
1094
|
# * Bio::SPTRL#sq -> hsh
|
|
710
1095
|
#
|
|
711
1096
|
# returns a value of a key given in the SQ lines.
|
|
712
1097
|
# * Bio::SPTRL#sq(key) -> int or str
|
|
713
|
-
# * Keys: ['MW', 'mw', 'molecular', 'weight', 'aalen', 'len', 'length',
|
|
1098
|
+
# * Keys: ['MW', 'mw', 'molecular', 'weight', 'aalen', 'len', 'length',
|
|
1099
|
+
# 'CRC64']
|
|
714
1100
|
#
|
|
715
1101
|
# === SQ Line; sequence header (1/entry)
|
|
716
1102
|
# SQ SEQUENCE 233 AA; 25630 MW; 146A1B48A1475C86 CRC64;
|
|
@@ -759,84 +1145,6 @@ end # class SPTR
|
|
|
759
1145
|
end # module Bio
|
|
760
1146
|
|
|
761
1147
|
|
|
762
|
-
if __FILE__ == $0
|
|
763
|
-
# Usage: ruby __FILE__ uniprot_sprot.dat
|
|
764
|
-
# Usage: ruby __FILE__ uniprot_sprot.dat | egrep '^RuntimeError'
|
|
765
|
-
|
|
766
|
-
begin
|
|
767
|
-
require 'pp'
|
|
768
|
-
alias pp p
|
|
769
|
-
rescue LoadError
|
|
770
|
-
end
|
|
771
|
-
|
|
772
|
-
def cmd(cmd, tag = nil, ent = $ent)
|
|
773
|
-
puts " ==> #{cmd} "
|
|
774
|
-
puts Bio::SPTR.new(ent).get(tag) if tag
|
|
775
|
-
begin
|
|
776
|
-
p eval(cmd)
|
|
777
|
-
rescue RuntimeError
|
|
778
|
-
puts "RuntimeError(#{Bio::SPTR.new($ent).entry_id})}: #{$!} "
|
|
779
|
-
end
|
|
780
|
-
puts
|
|
781
|
-
end
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
while $ent = $<.gets(Bio::SPTR::RS)
|
|
785
|
-
|
|
786
|
-
cmd "Bio::SPTR.new($ent).entry_id"
|
|
787
|
-
|
|
788
|
-
cmd "Bio::SPTR.new($ent).id_line", 'ID'
|
|
789
|
-
cmd "Bio::SPTR.new($ent).entry"
|
|
790
|
-
cmd "Bio::SPTR.new($ent).entry_name"
|
|
791
|
-
cmd "Bio::SPTR.new($ent).molecule"
|
|
792
|
-
cmd "Bio::SPTR.new($ent).sequence_length"
|
|
793
|
-
|
|
794
|
-
cmd "Bio::SPTR.new($ent).ac", 'AC'
|
|
795
|
-
cmd "Bio::SPTR.new($ent).accession"
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
cmd "Bio::SPTR.new($ent).gn", 'GN'
|
|
799
|
-
cmd "Bio::SPTR.new($ent).gene_name"
|
|
800
|
-
cmd "Bio::SPTR.new($ent).gene_names"
|
|
801
|
-
|
|
802
|
-
cmd "Bio::SPTR.new($ent).dt", "DT"
|
|
803
|
-
['created','annotation','sequence'].each do |key|
|
|
804
|
-
cmd "Bio::SPTR.new($ent).dt('#{key}')"
|
|
805
|
-
end
|
|
806
|
-
|
|
807
|
-
cmd "Bio::SPTR.new($ent).de", 'DE'
|
|
808
|
-
cmd "Bio::SPTR.new($ent).definition"
|
|
809
|
-
cmd "Bio::SPTR.new($ent).protein_name"
|
|
810
|
-
cmd "Bio::SPTR.new($ent).synonyms"
|
|
811
|
-
|
|
812
|
-
cmd "Bio::SPTR.new($ent).kw", 'KW'
|
|
813
|
-
|
|
814
|
-
cmd "Bio::SPTR.new($ent).os", 'OS'
|
|
815
|
-
|
|
816
|
-
cmd "Bio::SPTR.new($ent).oc", 'OC'
|
|
817
|
-
|
|
818
|
-
cmd "Bio::SPTR.new($ent).og", 'OG'
|
|
819
|
-
|
|
820
|
-
cmd "Bio::SPTR.new($ent).ox", 'OX'
|
|
821
|
-
|
|
822
|
-
cmd "Bio::SPTR.new($ent).ref", 'R'
|
|
823
|
-
|
|
824
|
-
cmd "Bio::SPTR.new($ent).cc", 'CC'
|
|
825
|
-
cmd "Bio::SPTR.new($ent).cc('ALTERNATIVE PRODUCTS')"
|
|
826
|
-
cmd "Bio::SPTR.new($ent).cc('DATABASE')"
|
|
827
|
-
cmd "Bio::SPTR.new($ent).cc('MASS SPECTOMETRY')"
|
|
828
|
-
|
|
829
|
-
cmd "Bio::SPTR.new($ent).dr", 'DR'
|
|
830
|
-
|
|
831
|
-
cmd "Bio::SPTR.new($ent).ft", 'FT'
|
|
832
|
-
cmd "Bio::SPTR.new($ent).ft['DOMAIN']"
|
|
833
|
-
|
|
834
|
-
cmd "Bio::SPTR.new($ent).sq", "SQ"
|
|
835
|
-
cmd "Bio::SPTR.new($ent).seq"
|
|
836
|
-
end
|
|
837
|
-
|
|
838
|
-
end
|
|
839
|
-
|
|
840
1148
|
|
|
841
1149
|
=begin
|
|
842
1150
|
|
|
@@ -955,6 +1263,7 @@ Class for a entry in the SWISS-PROT/TrEMBL database.
|
|
|
955
1263
|
# OG - organelle (0 or 1 per entry; optional)
|
|
956
1264
|
# OC - organism classification (>=1 per entry)
|
|
957
1265
|
# OX - organism taxonomy x-ref (>=1 per entry)
|
|
1266
|
+
# OH - Organism Host
|
|
958
1267
|
# RN - reference number (>=1 per entry)
|
|
959
1268
|
# RP - reference positions (>=1 per entry)
|
|
960
1269
|
# RC - reference comment(s) (>=0 per entry; optional)
|
|
@@ -962,6 +1271,7 @@ Class for a entry in the SWISS-PROT/TrEMBL database.
|
|
|
962
1271
|
# RA - reference author(s) (>=1 per entry)
|
|
963
1272
|
# RT - reference title (>=0 per entry; optional)
|
|
964
1273
|
# RL - reference location (>=1 per entry)
|
|
1274
|
+
# RG - reference group(s)
|
|
965
1275
|
# CC - comments or notes (>=0 per entry; optional)
|
|
966
1276
|
# DR - database cross-references (>=0 per entry; optional)
|
|
967
1277
|
# KW - keywords (>=1 per entry)
|