bio 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/bioruby +14 -122
- data/bin/br_biofetch.rb +2 -2
- data/bin/br_bioflat.rb +2 -2
- data/bin/br_biogetseq.rb +2 -2
- data/bin/br_pmfetch.rb +3 -3
- data/doc/Changes-0.7.rd +77 -0
- data/doc/KEGG_API.rd +523 -232
- data/doc/KEGG_API.rd.ja +529 -207
- data/doc/Tutorial.rd +48 -11
- data/lib/bio.rb +59 -6
- data/lib/bio/alignment.rb +713 -103
- data/lib/bio/appl/bl2seq/report.rb +2 -18
- data/lib/bio/appl/blast.rb +108 -91
- data/lib/bio/appl/blast/format0.rb +33 -18
- data/lib/bio/appl/blast/format8.rb +6 -20
- data/lib/bio/appl/blast/report.rb +293 -429
- data/lib/bio/appl/blast/rexml.rb +8 -22
- data/lib/bio/appl/blast/wublast.rb +21 -12
- data/lib/bio/appl/blast/xmlparser.rb +180 -183
- data/lib/bio/appl/blat/report.rb +127 -30
- data/lib/bio/appl/clustalw.rb +87 -59
- data/lib/bio/appl/clustalw/report.rb +20 -22
- data/lib/bio/appl/emboss.rb +113 -20
- data/lib/bio/appl/fasta.rb +173 -198
- data/lib/bio/appl/fasta/format10.rb +244 -347
- data/lib/bio/appl/gcg/msf.rb +212 -0
- data/lib/bio/appl/gcg/seq.rb +195 -0
- data/lib/bio/appl/genscan/report.rb +5 -23
- data/lib/bio/appl/hmmer.rb +8 -45
- data/lib/bio/appl/hmmer/report.rb +2 -20
- data/lib/bio/appl/iprscan/report.rb +374 -0
- data/lib/bio/appl/mafft.rb +87 -50
- data/lib/bio/appl/mafft/report.rb +151 -44
- data/lib/bio/appl/muscle.rb +52 -0
- data/lib/bio/appl/phylip/alignment.rb +129 -0
- data/lib/bio/appl/phylip/distance_matrix.rb +96 -0
- data/lib/bio/appl/probcons.rb +41 -0
- data/lib/bio/appl/psort.rb +89 -96
- data/lib/bio/appl/psort/report.rb +6 -22
- data/lib/bio/appl/pts1.rb +263 -0
- data/lib/bio/appl/sim4.rb +26 -36
- data/lib/bio/appl/sim4/report.rb +2 -18
- data/lib/bio/appl/sosui/report.rb +5 -20
- data/lib/bio/appl/spidey/report.rb +2 -2
- data/lib/bio/appl/targetp/report.rb +4 -20
- data/lib/bio/appl/tcoffee.rb +55 -0
- data/lib/bio/appl/tmhmm/report.rb +4 -20
- data/lib/bio/command.rb +235 -64
- data/lib/bio/data/aa.rb +21 -26
- data/lib/bio/data/codontable.rb +2 -20
- data/lib/bio/data/na.rb +19 -4
- data/lib/bio/db.rb +27 -12
- data/lib/bio/db/aaindex.rb +2 -20
- data/lib/bio/db/embl/common.rb +4 -21
- data/lib/bio/db/embl/embl.rb +33 -85
- data/lib/bio/db/embl/sptr.rb +612 -302
- data/lib/bio/db/embl/swissprot.rb +10 -29
- data/lib/bio/db/embl/trembl.rb +10 -29
- data/lib/bio/db/embl/uniprot.rb +10 -29
- data/lib/bio/db/fantom.rb +15 -20
- data/lib/bio/db/fasta.rb +3 -3
- data/lib/bio/db/genbank/common.rb +37 -46
- data/lib/bio/db/genbank/ddbj.rb +6 -18
- data/lib/bio/db/genbank/genbank.rb +47 -186
- data/lib/bio/db/genbank/genpept.rb +4 -17
- data/lib/bio/db/genbank/refseq.rb +4 -17
- data/lib/bio/db/gff.rb +103 -35
- data/lib/bio/db/go.rb +4 -20
- data/lib/bio/db/kegg/brite.rb +26 -36
- data/lib/bio/db/kegg/compound.rb +81 -85
- data/lib/bio/db/kegg/drug.rb +98 -0
- data/lib/bio/db/kegg/enzyme.rb +133 -110
- data/lib/bio/db/kegg/expression.rb +2 -20
- data/lib/bio/db/kegg/genes.rb +208 -238
- data/lib/bio/db/kegg/genome.rb +164 -285
- data/lib/bio/db/kegg/glycan.rb +114 -157
- data/lib/bio/db/kegg/keggtab.rb +242 -303
- data/lib/bio/db/kegg/kgml.rb +117 -160
- data/lib/bio/db/kegg/orthology.rb +112 -0
- data/lib/bio/db/kegg/reaction.rb +54 -69
- data/lib/bio/db/kegg/taxonomy.rb +331 -0
- data/lib/bio/db/lasergene.rb +209 -0
- data/lib/bio/db/litdb.rb +3 -27
- data/lib/bio/db/medline.rb +228 -249
- data/lib/bio/db/nbrf.rb +3 -3
- data/lib/bio/db/newick.rb +510 -0
- data/lib/bio/db/nexus.rb +1854 -0
- data/lib/bio/db/pdb.rb +5 -17
- data/lib/bio/db/pdb/atom.rb +2 -18
- data/lib/bio/db/pdb/chain.rb +2 -18
- data/lib/bio/db/pdb/chemicalcomponent.rb +2 -18
- data/lib/bio/db/pdb/model.rb +2 -18
- data/lib/bio/db/pdb/pdb.rb +73 -34
- data/lib/bio/db/pdb/residue.rb +4 -20
- data/lib/bio/db/pdb/utils.rb +2 -18
- data/lib/bio/db/prosite.rb +403 -422
- data/lib/bio/db/rebase.rb +84 -40
- data/lib/bio/db/soft.rb +404 -0
- data/lib/bio/db/transfac.rb +5 -17
- data/lib/bio/feature.rb +106 -52
- data/lib/bio/io/das.rb +32 -42
- data/lib/bio/io/dbget.rb +2 -20
- data/lib/bio/io/ddbjxml.rb +77 -138
- data/lib/bio/io/ebisoap.rb +158 -0
- data/lib/bio/io/ensembl.rb +229 -0
- data/lib/bio/io/fastacmd.rb +89 -82
- data/lib/bio/io/fetch.rb +163 -96
- data/lib/bio/io/flatfile.rb +170 -73
- data/lib/bio/io/flatfile/bdb.rb +3 -16
- data/lib/bio/io/flatfile/index.rb +2 -2
- data/lib/bio/io/flatfile/indexer.rb +3 -2
- data/lib/bio/io/higet.rb +12 -31
- data/lib/bio/io/keggapi.rb +210 -269
- data/lib/bio/io/ncbisoap.rb +155 -0
- data/lib/bio/io/pubmed.rb +169 -147
- data/lib/bio/io/registry.rb +4 -20
- data/lib/bio/io/soapwsdl.rb +43 -38
- data/lib/bio/io/sql.rb +242 -305
- data/lib/bio/location.rb +407 -285
- data/lib/bio/map.rb +410 -0
- data/lib/bio/pathway.rb +558 -695
- data/lib/bio/reference.rb +272 -75
- data/lib/bio/sequence.rb +255 -13
- data/lib/bio/sequence/aa.rb +71 -10
- data/lib/bio/sequence/common.rb +187 -33
- data/lib/bio/sequence/compat.rb +59 -4
- data/lib/bio/sequence/format.rb +54 -7
- data/lib/bio/sequence/generic.rb +3 -3
- data/lib/bio/sequence/na.rb +328 -26
- data/lib/bio/shell.rb +11 -4
- data/lib/bio/shell/core.rb +221 -160
- data/lib/bio/shell/demo.rb +18 -15
- data/lib/bio/shell/interface.rb +14 -12
- data/lib/bio/shell/irb.rb +95 -0
- data/lib/bio/shell/object.rb +45 -26
- data/lib/bio/shell/plugin/blast.rb +42 -0
- data/lib/bio/shell/plugin/codon.rb +22 -14
- data/lib/bio/shell/plugin/das.rb +58 -0
- data/lib/bio/shell/plugin/emboss.rb +2 -2
- data/lib/bio/shell/plugin/entry.rb +22 -11
- data/lib/bio/shell/plugin/flatfile.rb +2 -2
- data/lib/bio/shell/plugin/keggapi.rb +13 -6
- data/lib/bio/shell/plugin/midi.rb +4 -4
- data/lib/bio/shell/plugin/obda.rb +2 -2
- data/lib/bio/shell/plugin/psort.rb +56 -0
- data/lib/bio/shell/plugin/seq.rb +35 -8
- data/lib/bio/shell/plugin/soap.rb +87 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/bioruby_generator.rb +29 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_classes.rhtml +4 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_log.rhtml +27 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_methods.rhtml +11 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_modules.rhtml +4 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_variables.rhtml +7 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-bg.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-console.png +0 -0
- data/lib/bio/shell/rails/{public/images/icon.png → vendor/plugins/generators/bioruby/templates/bioruby-gem.png} +0 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-link.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.css +369 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.rhtml +47 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby_controller.rb +144 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby_helper.rb +47 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/commands.rhtml +8 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/history.rhtml +10 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/index.rhtml +22 -0
- data/lib/bio/shell/script.rb +25 -0
- data/lib/bio/shell/setup.rb +109 -0
- data/lib/bio/shell/web.rb +70 -58
- data/lib/bio/tree.rb +850 -0
- data/lib/bio/util/color_scheme.rb +84 -107
- data/lib/bio/util/color_scheme/buried.rb +5 -24
- data/lib/bio/util/color_scheme/helix.rb +5 -24
- data/lib/bio/util/color_scheme/hydropathy.rb +5 -24
- data/lib/bio/util/color_scheme/nucleotide.rb +5 -24
- data/lib/bio/util/color_scheme/strand.rb +5 -24
- data/lib/bio/util/color_scheme/taylor.rb +5 -24
- data/lib/bio/util/color_scheme/turn.rb +5 -24
- data/lib/bio/util/color_scheme/zappo.rb +5 -24
- data/lib/bio/util/contingency_table.rb +70 -43
- data/lib/bio/util/restriction_enzyme.rb +228 -0
- data/lib/bio/util/restriction_enzyme/analysis.rb +249 -0
- data/lib/bio/util/restriction_enzyme/analysis_basic.rb +217 -0
- data/lib/bio/util/restriction_enzyme/cut_symbol.rb +107 -0
- data/lib/bio/util/restriction_enzyme/double_stranded.rb +321 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/aligned_strands.rb +130 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair.rb +103 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair_in_enzyme_notation.rb +38 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations.rb +76 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations_in_enzyme_notation.rb +107 -0
- data/lib/bio/util/restriction_enzyme/enzymes.yaml +7061 -0
- data/lib/bio/util/restriction_enzyme/range/cut_range.rb +24 -0
- data/lib/bio/util/restriction_enzyme/range/cut_ranges.rb +47 -0
- data/lib/bio/util/restriction_enzyme/range/horizontal_cut_range.rb +67 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +257 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +242 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range/fragment.rb +51 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range/fragments.rb +41 -0
- data/lib/bio/util/restriction_enzyme/range/vertical_cut_range.rb +77 -0
- data/lib/bio/util/restriction_enzyme/single_strand.rb +199 -0
- data/lib/bio/util/restriction_enzyme/single_strand/cut_locations_in_enzyme_notation.rb +135 -0
- data/lib/bio/util/restriction_enzyme/single_strand_complement.rb +23 -0
- data/lib/bio/util/restriction_enzyme/string_formatting.rb +111 -0
- data/lib/bio/util/sirna.rb +4 -22
- data/sample/color_scheme_na.rb +4 -12
- data/sample/enzymes.rb +78 -0
- data/sample/goslim.rb +5 -13
- data/sample/psortplot_html.rb +4 -12
- data/test/data/blast/2.2.15.blastp.m7 +876 -0
- data/test/data/embl/AB090716.embl.rel89 +63 -0
- data/test/data/fasta/example1.txt +75 -0
- data/test/data/fasta/example2.txt +21 -0
- data/test/data/iprscan/merged.raw +32 -0
- data/test/data/iprscan/merged.txt +74 -0
- data/test/data/soft/GDS100_partial.soft +92 -0
- data/test/data/soft/GSE3457_family_partial.soft +874 -0
- data/test/functional/bio/io/test_ensembl.rb +103 -0
- data/test/functional/bio/io/test_soapwsdl.rb +5 -17
- data/test/unit/bio/appl/bl2seq/test_report.rb +2 -2
- data/test/unit/bio/appl/blast/test_report.rb +3 -16
- data/test/unit/bio/appl/blast/test_xmlparser.rb +4 -16
- data/test/unit/bio/appl/genscan/test_report.rb +3 -16
- data/test/unit/bio/appl/hmmer/test_report.rb +3 -16
- data/test/unit/bio/appl/iprscan/test_report.rb +338 -0
- data/test/unit/bio/appl/mafft/test_report.rb +63 -0
- data/test/unit/bio/appl/sosui/test_report.rb +3 -16
- data/test/unit/bio/appl/targetp/test_report.rb +3 -16
- data/test/unit/bio/appl/test_blast.rb +3 -16
- data/test/unit/bio/appl/test_fasta.rb +4 -16
- data/test/unit/bio/appl/test_pts1.rb +140 -0
- data/test/unit/bio/appl/tmhmm/test_report.rb +3 -16
- data/test/unit/bio/data/test_aa.rb +4 -17
- data/test/unit/bio/data/test_codontable.rb +3 -16
- data/test/unit/bio/data/test_na.rb +3 -3
- data/test/unit/bio/db/embl/test_common.rb +3 -16
- data/test/unit/bio/db/embl/test_embl.rb +3 -16
- data/test/unit/bio/db/embl/test_embl_rel89.rb +219 -0
- data/test/unit/bio/db/embl/test_sptr.rb +1548 -41
- data/test/unit/bio/db/embl/test_uniprot.rb +3 -16
- data/test/unit/bio/db/kegg/test_genes.rb +3 -16
- data/test/unit/bio/db/pdb/test_pdb.rb +7 -24
- data/test/unit/bio/db/test_aaindex.rb +2 -2
- data/test/unit/bio/db/test_fasta.rb +3 -16
- data/test/unit/bio/db/test_gff.rb +3 -16
- data/test/unit/bio/db/test_lasergene.rb +95 -0
- data/test/unit/bio/db/test_newick.rb +56 -0
- data/test/unit/bio/db/test_nexus.rb +360 -0
- data/test/unit/bio/db/test_prosite.rb +5 -18
- data/test/unit/bio/db/test_rebase.rb +11 -25
- data/test/unit/bio/db/test_soft.rb +138 -0
- data/test/unit/bio/io/test_ddbjxml.rb +5 -17
- data/test/unit/bio/io/test_ensembl.rb +109 -0
- data/test/unit/bio/io/test_fastacmd.rb +3 -16
- data/test/unit/bio/io/test_flatfile.rb +237 -0
- data/test/unit/bio/io/test_soapwsdl.rb +4 -17
- data/test/unit/bio/sequence/test_aa.rb +3 -3
- data/test/unit/bio/sequence/test_common.rb +3 -16
- data/test/unit/bio/sequence/test_compat.rb +3 -16
- data/test/unit/bio/sequence/test_na.rb +29 -3
- data/test/unit/bio/shell/plugin/test_seq.rb +8 -8
- data/test/unit/bio/test_alignment.rb +16 -27
- data/test/unit/bio/test_command.rb +242 -25
- data/test/unit/bio/test_db.rb +3 -16
- data/test/unit/bio/test_feature.rb +4 -16
- data/test/unit/bio/test_location.rb +4 -16
- data/test/unit/bio/test_map.rb +230 -0
- data/test/unit/bio/test_pathway.rb +4 -16
- data/test/unit/bio/test_reference.rb +2 -2
- data/test/unit/bio/test_sequence.rb +7 -19
- data/test/unit/bio/test_shell.rb +3 -16
- data/test/unit/bio/test_tree.rb +593 -0
- data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +299 -0
- data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +103 -0
- data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +240 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +100 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +75 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +73 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +53 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +104 -0
- data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +83 -0
- data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +246 -0
- data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +44 -0
- data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +115 -0
- data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +147 -0
- data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +147 -0
- data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +60 -0
- data/test/unit/bio/util/test_color_scheme.rb +6 -18
- data/test/unit/bio/util/test_contingency_table.rb +6 -18
- data/test/unit/bio/util/test_restriction_enzyme.rb +42 -0
- data/test/unit/bio/util/test_sirna.rb +3 -16
- metadata +228 -169
- data/doc/BioRuby.rd.ja +0 -225
- data/doc/Design.rd.ja +0 -341
- data/doc/TODO.rd.ja +0 -138
- data/lib/bio/appl/fasta/format6.rb +0 -37
- data/lib/bio/db/kegg/cell.rb +0 -88
- data/lib/bio/db/kegg/ko.rb +0 -178
- data/lib/bio/shell/rails/Rakefile +0 -10
- data/lib/bio/shell/rails/app/controllers/application.rb +0 -4
- data/lib/bio/shell/rails/app/controllers/shell_controller.rb +0 -94
- data/lib/bio/shell/rails/app/helpers/application_helper.rb +0 -3
- data/lib/bio/shell/rails/app/models/shell_connection.rb +0 -30
- data/lib/bio/shell/rails/app/views/layouts/shell.rhtml +0 -37
- data/lib/bio/shell/rails/app/views/shell/history.rhtml +0 -5
- data/lib/bio/shell/rails/app/views/shell/index.rhtml +0 -2
- data/lib/bio/shell/rails/app/views/shell/show.rhtml +0 -13
- data/lib/bio/shell/rails/config/boot.rb +0 -19
- data/lib/bio/shell/rails/config/database.yml +0 -85
- data/lib/bio/shell/rails/config/environment.rb +0 -53
- data/lib/bio/shell/rails/config/environments/development.rb +0 -19
- data/lib/bio/shell/rails/config/environments/production.rb +0 -19
- data/lib/bio/shell/rails/config/environments/test.rb +0 -19
- data/lib/bio/shell/rails/config/routes.rb +0 -19
- data/lib/bio/shell/rails/doc/README_FOR_APP +0 -2
- data/lib/bio/shell/rails/public/404.html +0 -8
- data/lib/bio/shell/rails/public/500.html +0 -8
- data/lib/bio/shell/rails/public/dispatch.cgi +0 -10
- data/lib/bio/shell/rails/public/dispatch.fcgi +0 -24
- data/lib/bio/shell/rails/public/dispatch.rb +0 -10
- data/lib/bio/shell/rails/public/favicon.ico +0 -0
- data/lib/bio/shell/rails/public/images/rails.png +0 -0
- data/lib/bio/shell/rails/public/index.html +0 -277
- data/lib/bio/shell/rails/public/javascripts/controls.js +0 -750
- data/lib/bio/shell/rails/public/javascripts/dragdrop.js +0 -584
- data/lib/bio/shell/rails/public/javascripts/effects.js +0 -854
- data/lib/bio/shell/rails/public/javascripts/prototype.js +0 -1785
- data/lib/bio/shell/rails/public/robots.txt +0 -1
- data/lib/bio/shell/rails/public/stylesheets/main.css +0 -187
- data/lib/bio/shell/rails/script/about +0 -3
- data/lib/bio/shell/rails/script/breakpointer +0 -3
- data/lib/bio/shell/rails/script/console +0 -3
- data/lib/bio/shell/rails/script/destroy +0 -3
- data/lib/bio/shell/rails/script/generate +0 -3
- data/lib/bio/shell/rails/script/performance/benchmarker +0 -3
- data/lib/bio/shell/rails/script/performance/profiler +0 -3
- data/lib/bio/shell/rails/script/plugin +0 -3
- data/lib/bio/shell/rails/script/process/reaper +0 -3
- data/lib/bio/shell/rails/script/process/spawner +0 -3
- data/lib/bio/shell/rails/script/process/spinner +0 -3
- data/lib/bio/shell/rails/script/runner +0 -3
- data/lib/bio/shell/rails/script/server +0 -42
- data/lib/bio/shell/rails/test/test_helper.rb +0 -28
data/lib/bio/location.rb
CHANGED
|
@@ -1,262 +1,46 @@
|
|
|
1
1
|
#
|
|
2
2
|
# = bio/location.rb - Locations/Location class (GenBank location format)
|
|
3
3
|
#
|
|
4
|
-
# Copyright:: Copyright (C) 2001, 2005
|
|
5
|
-
#
|
|
6
|
-
# License::
|
|
4
|
+
# Copyright:: Copyright (C) 2001, 2005 Toshiaki Katayama <k@bioruby.org>
|
|
5
|
+
# Copyright:: Copyright (C) 2006 Jan Aerts <jan.aerts@bbsrc.ac.uk>
|
|
6
|
+
# License:: The Ruby License
|
|
7
7
|
#
|
|
8
|
-
# $Id: location.rb,v 0.
|
|
8
|
+
# $Id: location.rb,v 0.28 2007/04/05 23:35:39 trevor Exp $
|
|
9
9
|
#
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
#
|
|
14
|
-
# According to the GenBank manual 'gbrel.txt', I classified position notations
|
|
15
|
-
# into 10 patterns - (A) to (J).
|
|
16
|
-
#
|
|
17
|
-
# 3.4.12.2 Feature Location
|
|
18
|
-
#
|
|
19
|
-
# The second column of the feature descriptor line designates the
|
|
20
|
-
# location of the feature in the sequence. The location descriptor
|
|
21
|
-
# begins at position 22. Several conventions are used to indicate
|
|
22
|
-
# sequence location.
|
|
23
|
-
#
|
|
24
|
-
# Base numbers in location descriptors refer to numbering in the entry,
|
|
25
|
-
# which is not necessarily the same as the numbering scheme used in the
|
|
26
|
-
# published report. The first base in the presented sequence is numbered
|
|
27
|
-
# base 1. Sequences are presented in the 5 to 3 direction.
|
|
28
|
-
#
|
|
29
|
-
# Location descriptors can be one of the following:
|
|
30
|
-
#
|
|
31
|
-
# (A) 1. A single base;
|
|
32
|
-
#
|
|
33
|
-
# (B) 2. A contiguous span of bases;
|
|
34
|
-
#
|
|
35
|
-
# (C) 3. A site between two bases;
|
|
36
|
-
#
|
|
37
|
-
# (D) 4. A single base chosen from a range of bases;
|
|
38
|
-
#
|
|
39
|
-
# (E) 5. A single base chosen from among two or more specified bases;
|
|
40
|
-
#
|
|
41
|
-
# (F) 6. A joining of sequence spans;
|
|
42
|
-
#
|
|
43
|
-
# (G) 7. A reference to an entry other than the one to which the feature
|
|
44
|
-
# belongs (i.e., a remote entry), followed by a location descriptor
|
|
45
|
-
# referring to the remote sequence;
|
|
46
|
-
#
|
|
47
|
-
# (H) 8. A literal sequence (a string of bases enclosed in quotation marks).
|
|
48
|
-
#
|
|
49
|
-
#
|
|
50
|
-
# (C) A site between two residues, such as an endonuclease cleavage site, is
|
|
51
|
-
# indicated by listing the two bases separated by a carat (e.g., 23^24).
|
|
52
|
-
#
|
|
53
|
-
# (D) A single residue chosen from a range of residues is indicated by the
|
|
54
|
-
# number of the first and last bases in the range separated by a single
|
|
55
|
-
# period (e.g., 23.79). The symbols < and > indicate that the end point
|
|
56
|
-
# (I) of the range is beyond the specified base number.
|
|
57
|
-
#
|
|
58
|
-
# (B) A contiguous span of bases is indicated by the number of the first and
|
|
59
|
-
# last bases in the range separated by two periods (e.g., 23..79). The
|
|
60
|
-
# (I) symbols < and > indicate that the end point of the range is beyond the
|
|
61
|
-
# specified base number. Starting and ending positions can be indicated
|
|
62
|
-
# by base number or by one of the operators described below.
|
|
63
|
-
#
|
|
64
|
-
# Operators are prefixes that specify what must be done to the indicated
|
|
65
|
-
# sequence to locate the feature. The following are the operators
|
|
66
|
-
# available, along with their most common format and a description.
|
|
67
|
-
#
|
|
68
|
-
# (J) complement (location): The feature is complementary to the location
|
|
69
|
-
# indicated. Complementary strands are read 5 to 3.
|
|
70
|
-
#
|
|
71
|
-
# (F) join (location, location, .. location): The indicated elements should
|
|
72
|
-
# be placed end to end to form one contiguous sequence.
|
|
73
|
-
#
|
|
74
|
-
# (F) order (location, location, .. location): The elements are found in the
|
|
75
|
-
# specified order in the 5 to 3 direction, but nothing is implied about
|
|
76
|
-
# the rationality of joining them.
|
|
77
|
-
#
|
|
78
|
-
# (F) group (location, location, .. location): The elements are related and
|
|
79
|
-
# should be grouped together, but no order is implied.
|
|
80
|
-
#
|
|
81
|
-
# (E) one-of (location, location, .. location): The element can be any one,
|
|
82
|
-
# but only one, of the items listed.
|
|
83
|
-
#
|
|
84
|
-
# === Reduction strategy of the position notations
|
|
85
|
-
#
|
|
86
|
-
# (A) Location n
|
|
87
|
-
#
|
|
88
|
-
# (B) Location n..m
|
|
89
|
-
#
|
|
90
|
-
# (C) Location n^m
|
|
91
|
-
#
|
|
92
|
-
# (D) (n.m) => Location n
|
|
93
|
-
#
|
|
94
|
-
# (E) one-of(n,m,..) => Location n
|
|
95
|
-
# one-of(n..m,..) => Location n..m
|
|
96
|
-
#
|
|
97
|
-
# (F) order(loc,loc,..) => join(loc, loc,..)
|
|
98
|
-
# group(loc,loc,..) => join(loc, loc,..)
|
|
99
|
-
# join(loc,loc,..) => Sequence
|
|
100
|
-
#
|
|
101
|
-
# (G) ID:loc => Location with ID
|
|
102
|
-
#
|
|
103
|
-
# (H) "atgc" => Location only with Sequence
|
|
104
|
-
#
|
|
105
|
-
# (I) <n => Location n with lt flag
|
|
106
|
-
# >n => Location n with gt flag
|
|
107
|
-
# <n..m => Location n..m with lt flag
|
|
108
|
-
# n..>m => Location n..m with gt flag
|
|
109
|
-
# <n..>m => Location n..m with lt, gt flag
|
|
110
|
-
#
|
|
111
|
-
# (J) complement(loc) => Sequence
|
|
112
|
-
#
|
|
113
|
-
# (K) replace(loc, str) => Location with replacement Sequence
|
|
114
|
-
#
|
|
115
|
-
# === GenBank location examples
|
|
116
|
-
#
|
|
117
|
-
# (C) n^m
|
|
118
|
-
#
|
|
119
|
-
# * [AB015179] 754^755
|
|
120
|
-
# * [AF179299] complement(53^54)
|
|
121
|
-
# * [CELXOL1ES] replace(4480^4481,"")
|
|
122
|
-
# * [ECOUW87] replace(4792^4793,"a")
|
|
123
|
-
# * [APLPCII] replace(1905^1906,"acaaagacaccgccctacgcc")
|
|
124
|
-
#
|
|
125
|
-
# (D) (n.m)
|
|
126
|
-
#
|
|
127
|
-
# * [HACSODA] 157..(800.806)
|
|
128
|
-
# * [HALSODB] (67.68)..(699.703)
|
|
129
|
-
# * [AP001918] (45934.45974)..46135
|
|
130
|
-
# * [BACSPOJ] <180..(731.761)
|
|
131
|
-
# * [BBU17998] (88.89)..>1122
|
|
132
|
-
# * [ECHTGA] complement((1700.1708)..(1715.1721))
|
|
133
|
-
# * [ECPAP17] complement(<22..(255.275))
|
|
134
|
-
# * [LPATOVGNS] complement((64.74)..1525)
|
|
135
|
-
# * [PIP404CG] join((8298.8300)..10206,1..855)
|
|
136
|
-
# * [BOVMHDQBY4] join(M30006.1:(392.467)..575,M30005.1:415..681,M30004.1:129..410,M30004.1:907..1017,521..534)
|
|
137
|
-
# * [HUMMIC2A] replace((651.655)..(651.655),"")
|
|
138
|
-
# * [HUMSOD102] order(L44135.1:(454.445)..>538,<1..181)
|
|
139
|
-
#
|
|
140
|
-
# (E) one-of
|
|
141
|
-
#
|
|
142
|
-
# * [ECU17136] one-of(898,900)..983
|
|
143
|
-
# * [CELCYT1A] one-of(5971..6308,5971..6309)
|
|
144
|
-
# * [DMU17742] 8050..one-of(10731,10758,10905,11242)
|
|
145
|
-
# * [PFU27807] one-of(623,627,632)..one-of(628,633,637)
|
|
146
|
-
# * [BTBAINH1] one-of(845,953,963,1078,1104)..1354
|
|
147
|
-
# * [ATU39449] join(one-of(969..1094,970..1094,995..1094,1018..1094),1518..1587,1726..2119,2220..2833,2945..3215)
|
|
148
|
-
#
|
|
149
|
-
# (F) join, order, group
|
|
150
|
-
#
|
|
151
|
-
# * [AB037374S2] join(AB037374.1:1..177,1..807)
|
|
152
|
-
# * [AP000001] join(complement(1..61),complement(AP000007.1:252907..253505))
|
|
153
|
-
# * [ASNOS11] join(AF130124.1:<2563..2964,AF130125.1:21..157,AF130126.1:12..174,AF130127.1:21..112,AF130128.1:21..162,AF130128.1:281..595,AF130128.1:661..842,AF130128.1:916..1030,AF130129.1:21..115,AF130130.1:21..165,AF130131.1:21..125,AF130132.1:21..428,AF130132.1:492..746,AF130133.1:21..168,AF130133.1:232..401,AF130133.1:475..906,AF130133.1:970..1107,AF130133.1:1176..1367,21..>128)
|
|
154
|
-
#
|
|
155
|
-
# * [AARPOB2] order(AF194507.1:<1..510,1..>871)
|
|
156
|
-
# * [AF006691] order(912..1918,20410..21416)
|
|
157
|
-
# * [AF024666] order(complement(18919..19224),complement(13965..14892))
|
|
158
|
-
# * [AF264948] order(27066..27076,27089..27099,27283..27314,27330..27352)
|
|
159
|
-
# * [D63363] order(3..26,complement(964..987))
|
|
160
|
-
# * [ECOCURLI2] order(complement(1009..>1260),complement(AF081827.1:<1..177))
|
|
161
|
-
# * [S72388S2] order(join(S72388.1:757..911,S72388.1:609..1542),1..>139)
|
|
162
|
-
# * [HEYRRE07] order(complement(1..38),complement(M82666.1:1..140),complement(M82665.1:1..176),complement(M82664.1:1..215),complement(M82663.1:1..185),complement(M82662.1:1..49),complement(M82661.1:1..133))
|
|
163
|
-
# * [COL11A1G34] order(AF101079.1:558..1307,AF101080.1:1..749,AF101081.1:1..898,AF101082.1:1..486,AF101083.1:1..942,AF101084.1:1..1734,AF101085.1:1..2385,AF101086.1:1..1813,AF101087.1:1..2287,AF101088.1:1..1073,AF101089.1:1..989,AF101090.1:1..5017,AF101091.1:1..3401,AF101092.1:1..1225,AF101093.1:1..1072,AF101094.1:1..989,AF101095.1:1..1669,AF101096.1:1..918,AF101097.1:1..1114,AF101098.1:1..1074,AF101099.1:1..1709,AF101100.1:1..986,AF101101.1:1..1934,AF101102.1:1..1699,AF101103.1:1..940,AF101104.1:1..2330,AF101105.1:1..4467,AF101106.1:1..1876,AF101107.1:1..2465,AF101108.1:1..1150,AF101109.1:1..1170,AF101110.1:1..1158,AF101111.1:1..1193,1..611)
|
|
164
|
-
#
|
|
165
|
-
# group() are found in the COMMENT field only (in GenBank 122.0)
|
|
166
|
-
#
|
|
167
|
-
# gbpat2.seq: FT repeat_region group(598..606,611..619)
|
|
168
|
-
# gbpat2.seq: FT repeat_region group(8..16,1457..1464).
|
|
169
|
-
# gbpat2.seq: FT variation group(t1,t2)
|
|
170
|
-
# gbpat2.seq: FT variation group(t1,t3)
|
|
171
|
-
# gbpat2.seq: FT variation group(t1,t2,t3)
|
|
172
|
-
# gbpat2.seq: FT repeat_region group(11..202,203..394)
|
|
173
|
-
# gbpri9.seq:COMMENT Residues reported = 'group(1..2145);'.
|
|
174
|
-
#
|
|
175
|
-
# (G) ID:location
|
|
176
|
-
#
|
|
177
|
-
# * [AARPOB2] order(AF194507.1:<1..510,1..>871)
|
|
178
|
-
# * [AF178221S4] join(AF178221.1:<1..60,AF178222.1:1..63,AF178223.1:1..42,1..>90)
|
|
179
|
-
# * [BOVMHDQBY4] join(M30006.1:(392.467)..575,M30005.1:415..681,M30004.1:129..410,M30004.1:907..1017,521..534)
|
|
180
|
-
# * [HUMSOD102] order(L44135.1:(454.445)..>538,<1..181)
|
|
181
|
-
# * [SL16SRRN1] order(<1..>267,X67092.1:<1..>249,X67093.1:<1..>233)
|
|
182
|
-
#
|
|
183
|
-
# (I) <, >
|
|
184
|
-
#
|
|
185
|
-
# * [A5U48871] <1..>318
|
|
186
|
-
# * [AA23SRRNP] <1..388
|
|
187
|
-
# * [AA23SRRNP] 503..>1010
|
|
188
|
-
# * [AAM5961] complement(<1..229)
|
|
189
|
-
# * [AAM5961] complement(5231..>5598)
|
|
190
|
-
# * [AF043934] join(<1,60..99,161..241,302..370,436..594,676..887,993..1141,1209..1329,1387..1559,1626..1646,1708..>1843)
|
|
191
|
-
# * [BACSPOJ] <180..(731.761)
|
|
192
|
-
# * [BBU17998] (88.89)..>1122
|
|
193
|
-
# * [AARPOB2] order(AF194507.1:<1..510,1..>871)
|
|
194
|
-
# * [SL16SRRN1] order(<1..>267,X67092.1:<1..>249,X67093.1:<1..>233)
|
|
195
|
-
#
|
|
196
|
-
# (J) complement
|
|
197
|
-
#
|
|
198
|
-
# * [AF179299] complement(53^54) <= hoge insertion site etc.
|
|
199
|
-
# * [AP000001] join(complement(1..61),complement(AP000007.1:252907..253505))
|
|
200
|
-
# * [AF209868S2] order(complement(1..>308),complement(AF209868.1:75..336))
|
|
201
|
-
# * [AP000001] join(complement(1..61),complement(AP000007.1:252907..253505))
|
|
202
|
-
# * [CPPLCG] complement(<1..(1093.1098))
|
|
203
|
-
# * [D63363] order(3..26,complement(964..987))
|
|
204
|
-
# * [ECHTGA] complement((1700.1708)..(1715.1721))
|
|
205
|
-
# * [ECOUXW] order(complement(1658..1663),complement(1636..1641))
|
|
206
|
-
# * [LPATOVGNS] complement((64.74)..1525)
|
|
207
|
-
# * [AF129075] complement(join(71606..71829,75327..75446,76039..76203,76282..76353,76914..77029,77114..77201,77276..77342,78138..78316,79755..79892,81501..81562,81676..81856,82341..82490,84208..84287,85032..85122,88316..88403))
|
|
208
|
-
# * [ZFDYST2] join(AF137145.1:<1..18,complement(<1..99))
|
|
209
|
-
#
|
|
210
|
-
# (K) replace
|
|
211
|
-
#
|
|
212
|
-
# * [CSU27710] replace(64,"A")
|
|
213
|
-
# * [CELXOL1ES] replace(5256,"t")
|
|
214
|
-
# * [ANICPC] replace(1..468,"")
|
|
215
|
-
# * [CSU27710] replace(67..68,"GC")
|
|
216
|
-
# * [CELXOL1ES] replace(4480^4481,"") <= ? only one case in GenBank 122.0
|
|
217
|
-
# * [ECOUW87] replace(4792^4793,"a")
|
|
218
|
-
# * [CEU34893] replace(1..22,"ggttttaacccagttactcaag")
|
|
219
|
-
# * [APLPCII] replace(1905^1906,"acaaagacaccgccctacgcc")
|
|
220
|
-
# * [MBDR3S1] replace(1400..>9281,"")
|
|
221
|
-
# * [HUMMHDPB1F] replace(complement(36..37),"ttc")
|
|
222
|
-
# * [HUMMIC2A] replace((651.655)..(651.655),"")
|
|
223
|
-
# * [LEIMDRPGP] replace(1..1554,"L01572")
|
|
224
|
-
# * [TRBND3] replace(376..395,"atttgtgtgtggtaatta")
|
|
225
|
-
# * [TRBND3] replace(376..395,"atttgtgtgggtaatttta")
|
|
226
|
-
# * [TRBND3] replace(376..395,"attttgttgttgttttgttttgaatta")
|
|
227
|
-
# * [TRBND3] replace(376..395,"atgtgtggtgaatta")
|
|
228
|
-
# * [TRBND3] replace(376..395,"atgtgtgtggtaatta")
|
|
229
|
-
# * [TRBND3] replace(376..395,"gatttgttgtggtaatttta")
|
|
230
|
-
# * [MSU09460] replace(193, <= replace(193, "t")
|
|
231
|
-
# * [HUMMAGE12X] replace(3002..3003, <= replace(3002..3003, "GC")
|
|
232
|
-
# * [ADR40FIB] replace(510..520, <= replace(510..520, "taatcctaccg")
|
|
233
|
-
# * [RATDYIIAAB] replace(1306..1443,"aagaacatccacggagtcagaactgggctcttcacgccggatttggcgttcgaggccattgtgaaaaagcaggcaatgcaccagcaagctcagttcctacccctgcgtggacctggttatccaggagctaatcagtacagttaggtggtcaagctgaaagagccctgtctgaaa")
|
|
234
|
-
#
|
|
235
|
-
#--
|
|
10
|
+
|
|
11
|
+
module Bio
|
|
12
|
+
|
|
13
|
+
# == Description
|
|
236
14
|
#
|
|
237
|
-
#
|
|
238
|
-
#
|
|
239
|
-
#
|
|
240
|
-
# version 2 of the License, or (at your option) any later version.
|
|
15
|
+
# The Bio::Location class describes the position of a genomic locus.
|
|
16
|
+
# Typically, Bio::Location objects are created automatically when the
|
|
17
|
+
# user creates a Bio::Locations object, instead of initialized directly.
|
|
241
18
|
#
|
|
242
|
-
#
|
|
243
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
244
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
245
|
-
# Lesser General Public License for more details.
|
|
19
|
+
# == Usage
|
|
246
20
|
#
|
|
247
|
-
#
|
|
248
|
-
#
|
|
249
|
-
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
21
|
+
# location = Bio::Location.new('500..550')
|
|
22
|
+
# puts "start=" + location.from.to_s + ";end=" + location.to.to_s
|
|
250
23
|
#
|
|
251
|
-
|
|
24
|
+
# #, or better: through Bio::Locations
|
|
25
|
+
# locations = Bio::Locations.new('500..550')
|
|
26
|
+
# locations.each do |location|
|
|
27
|
+
# puts "start=" + location.from.to_s + ";end=" + location.to.to_s
|
|
28
|
+
# end
|
|
252
29
|
#
|
|
253
|
-
|
|
254
|
-
module Bio
|
|
255
|
-
|
|
256
30
|
class Location
|
|
257
31
|
|
|
258
|
-
|
|
259
|
-
|
|
32
|
+
include Comparable
|
|
33
|
+
|
|
34
|
+
# Parses a'location' segment, which can be 'ID:' + ('n' or 'n..m' or 'n^m'
|
|
35
|
+
# or "seq") with '<' or '>', and returns a Bio::Location object.
|
|
36
|
+
#
|
|
37
|
+
# location = Bio::Location.new('500..550')
|
|
38
|
+
#
|
|
39
|
+
# ---
|
|
40
|
+
# *Arguments*:
|
|
41
|
+
# * (required) _str_: GenBank style position string (see Bio::Locations
|
|
42
|
+
# documentation)
|
|
43
|
+
# *Returns*:: the Bio::Location object
|
|
260
44
|
def initialize(location = nil)
|
|
261
45
|
|
|
262
46
|
if location
|
|
@@ -273,7 +57,7 @@ class Location
|
|
|
273
57
|
|
|
274
58
|
# s : start base, e : end base => from, to
|
|
275
59
|
case location
|
|
276
|
-
when /^[<>]?(\d+)$/
|
|
60
|
+
when /^[<>]?(\d+)$/ # (A, I) n
|
|
277
61
|
s = e = $1.to_i
|
|
278
62
|
when /^[<>]?(\d+)\.\.[<>]?(\d+)$/ # (B, I) n..m
|
|
279
63
|
s = $1.to_i
|
|
@@ -310,66 +94,252 @@ class Location
|
|
|
310
94
|
|
|
311
95
|
attr_accessor :from, :to, :strand, :sequence, :lt, :gt, :xref_id
|
|
312
96
|
|
|
313
|
-
#
|
|
97
|
+
# Complements the sequence (i.e. alternates the strand).
|
|
98
|
+
# ---
|
|
99
|
+
# *Returns*:: the Bio::Location object
|
|
314
100
|
def complement
|
|
315
101
|
@strand *= -1
|
|
316
102
|
self # return Location object
|
|
317
103
|
end
|
|
318
104
|
|
|
319
|
-
#
|
|
105
|
+
# Replaces the sequence of the location.
|
|
106
|
+
# ---
|
|
107
|
+
# *Arguments*:
|
|
108
|
+
# * (required) _sequence_: sequence to be used to replace the sequence
|
|
109
|
+
# at the location
|
|
110
|
+
# *Returns*:: the Bio::Location object
|
|
320
111
|
def replace(sequence)
|
|
321
|
-
@sequence
|
|
112
|
+
@sequence = sequence.downcase
|
|
322
113
|
self # return Location object
|
|
323
114
|
end
|
|
324
115
|
|
|
325
|
-
# Returns
|
|
116
|
+
# Returns the range (from..to) of the location as a Range object.
|
|
326
117
|
def range
|
|
327
118
|
@from..@to
|
|
328
119
|
end
|
|
329
120
|
|
|
330
|
-
|
|
121
|
+
# Check where a Bio::Location object is located compared to another
|
|
122
|
+
# Bio::Location object (mainly to facilitate the use of Comparable).
|
|
123
|
+
# A location A is upstream of location B if the start position of
|
|
124
|
+
# location A is smaller than the start position of location B. If
|
|
125
|
+
# they're the same, the end positions are checked.
|
|
126
|
+
# ---
|
|
127
|
+
# *Arguments*:
|
|
128
|
+
# * (required) _other location_: a Bio::Location object
|
|
129
|
+
# *Returns*::
|
|
130
|
+
# * 1 if self < other location
|
|
131
|
+
# * -1 if self > other location
|
|
132
|
+
# * 0 if both location are the same
|
|
133
|
+
# * nil if the argument is not a Bio::Location object
|
|
134
|
+
def <=>(other)
|
|
135
|
+
if ! other.kind_of?(Bio::Location)
|
|
136
|
+
return nil
|
|
137
|
+
end
|
|
331
138
|
|
|
139
|
+
if @from.to_f < other.from.to_f
|
|
140
|
+
return -1
|
|
141
|
+
elsif @from.to_f > other.from.to_f
|
|
142
|
+
return 1
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
if @to.to_f < other.to.to_f
|
|
146
|
+
return -1
|
|
147
|
+
elsif @to.to_f > other.to.to_f
|
|
148
|
+
return 1
|
|
149
|
+
end
|
|
150
|
+
return 0
|
|
151
|
+
end
|
|
332
152
|
|
|
153
|
+
end # Location
|
|
154
|
+
|
|
155
|
+
# == Description
|
|
156
|
+
#
|
|
157
|
+
# The Bio::Locations class is a container for Bio::Location objects:
|
|
158
|
+
# creating a Bio::Locations object (based on a GenBank style position string)
|
|
159
|
+
# will spawn an array of Bio::Location objects.
|
|
160
|
+
#
|
|
161
|
+
# == Usage
|
|
162
|
+
#
|
|
163
|
+
# locations = Bio::Locations.new('join(complement(500..550), 600..625)')
|
|
164
|
+
# locations.each do |loc|
|
|
165
|
+
# puts "class = " + loc.class.to_s
|
|
166
|
+
# puts "range = #{loc.from}..#{loc.to} (strand = #{loc.strand})"
|
|
167
|
+
# end
|
|
168
|
+
# # Output would be:
|
|
169
|
+
# # class = Bio::Location
|
|
170
|
+
# # range = 500..550 (strand = -1)
|
|
171
|
+
# # class = Bio::Location
|
|
172
|
+
# # range = 600..625 (strand = 1)
|
|
173
|
+
#
|
|
174
|
+
# # For the following three location strings, print the span and range
|
|
175
|
+
# ['one-of(898,900)..983',
|
|
176
|
+
# 'one-of(5971..6308,5971..6309)',
|
|
177
|
+
# '8050..one-of(10731,10758,10905,11242)'].each do |loc|
|
|
178
|
+
# location = Bio::Locations.new(loc)
|
|
179
|
+
# puts location.span
|
|
180
|
+
# puts location.range
|
|
181
|
+
# end
|
|
182
|
+
#
|
|
183
|
+
# === GenBank location descriptor classification
|
|
184
|
+
#
|
|
185
|
+
# ==== Definition of the position notation of the GenBank location format
|
|
186
|
+
#
|
|
187
|
+
# According to the GenBank manual 'gbrel.txt', position notations were
|
|
188
|
+
# classified into 10 patterns - (A) to (J).
|
|
189
|
+
#
|
|
190
|
+
# 3.4.12.2 Feature Location
|
|
191
|
+
#
|
|
192
|
+
# The second column of the feature descriptor line designates the
|
|
193
|
+
# location of the feature in the sequence. The location descriptor
|
|
194
|
+
# begins at position 22. Several conventions are used to indicate
|
|
195
|
+
# sequence location.
|
|
196
|
+
#
|
|
197
|
+
# Base numbers in location descriptors refer to numbering in the entry,
|
|
198
|
+
# which is not necessarily the same as the numbering scheme used in the
|
|
199
|
+
# published report. The first base in the presented sequence is numbered
|
|
200
|
+
# base 1. Sequences are presented in the 5 to 3 direction.
|
|
201
|
+
#
|
|
202
|
+
# Location descriptors can be one of the following:
|
|
203
|
+
#
|
|
204
|
+
# (A) 1. A single base;
|
|
205
|
+
#
|
|
206
|
+
# (B) 2. A contiguous span of bases;
|
|
207
|
+
#
|
|
208
|
+
# (C) 3. A site between two bases;
|
|
209
|
+
#
|
|
210
|
+
# (D) 4. A single base chosen from a range of bases;
|
|
211
|
+
#
|
|
212
|
+
# (E) 5. A single base chosen from among two or more specified bases;
|
|
213
|
+
#
|
|
214
|
+
# (F) 6. A joining of sequence spans;
|
|
215
|
+
#
|
|
216
|
+
# (G) 7. A reference to an entry other than the one to which the feature
|
|
217
|
+
# belongs (i.e., a remote entry), followed by a location descriptor
|
|
218
|
+
# referring to the remote sequence;
|
|
219
|
+
#
|
|
220
|
+
# (H) 8. A literal sequence (a string of bases enclosed in quotation marks).
|
|
221
|
+
#
|
|
222
|
+
# ==== Description commented with pattern IDs.
|
|
223
|
+
#
|
|
224
|
+
# (C) A site between two residues, such as an endonuclease cleavage site, is
|
|
225
|
+
# indicated by listing the two bases separated by a carat (e.g., 23^24).
|
|
226
|
+
#
|
|
227
|
+
# (D) A single residue chosen from a range of residues is indicated by the
|
|
228
|
+
# number of the first and last bases in the range separated by a single
|
|
229
|
+
# period (e.g., 23.79). The symbols < and > indicate that the end point
|
|
230
|
+
# (I) of the range is beyond the specified base number.
|
|
231
|
+
#
|
|
232
|
+
# (B) A contiguous span of bases is indicated by the number of the first and
|
|
233
|
+
# last bases in the range separated by two periods (e.g., 23..79). The
|
|
234
|
+
# (I) symbols < and > indicate that the end point of the range is beyond the
|
|
235
|
+
# specified base number. Starting and ending positions can be indicated
|
|
236
|
+
# by base number or by one of the operators described below.
|
|
237
|
+
#
|
|
238
|
+
# Operators are prefixes that specify what must be done to the indicated
|
|
239
|
+
# sequence to locate the feature. The following are the operators
|
|
240
|
+
# available, along with their most common format and a description.
|
|
241
|
+
#
|
|
242
|
+
# (J) complement (location): The feature is complementary to the location
|
|
243
|
+
# indicated. Complementary strands are read 5 to 3.
|
|
244
|
+
#
|
|
245
|
+
# (F) join (location, location, .. location): The indicated elements should
|
|
246
|
+
# be placed end to end to form one contiguous sequence.
|
|
247
|
+
#
|
|
248
|
+
# (F) order (location, location, .. location): The elements are found in the
|
|
249
|
+
# specified order in the 5 to 3 direction, but nothing is implied about
|
|
250
|
+
# the rationality of joining them.
|
|
251
|
+
#
|
|
252
|
+
# (F) group (location, location, .. location): The elements are related and
|
|
253
|
+
# should be grouped together, but no order is implied.
|
|
254
|
+
#
|
|
255
|
+
# (E) one-of (location, location, .. location): The element can be any one,
|
|
256
|
+
# but only one, of the items listed.
|
|
257
|
+
#
|
|
258
|
+
# === Reduction strategy of the position notations
|
|
259
|
+
#
|
|
260
|
+
# * (A) Location n
|
|
261
|
+
# * (B) Location n..m
|
|
262
|
+
# * (C) Location n^m
|
|
263
|
+
# * (D) (n.m) => Location n
|
|
264
|
+
# * (E)
|
|
265
|
+
# * one-of(n,m,..) => Location n
|
|
266
|
+
# * one-of(n..m,..) => Location n..m
|
|
267
|
+
# * (F)
|
|
268
|
+
# * order(loc,loc,..) => join(loc, loc,..)
|
|
269
|
+
# * group(loc,loc,..) => join(loc, loc,..)
|
|
270
|
+
# * join(loc,loc,..) => Sequence
|
|
271
|
+
# * (G) ID:loc => Location with ID
|
|
272
|
+
# * (H) "atgc" => Location only with Sequence
|
|
273
|
+
# * (I)
|
|
274
|
+
# * <n => Location n with lt flag
|
|
275
|
+
# * >n => Location n with gt flag
|
|
276
|
+
# * <n..m => Location n..m with lt flag
|
|
277
|
+
# * n..>m => Location n..m with gt flag
|
|
278
|
+
# * <n..>m => Location n..m with lt, gt flag
|
|
279
|
+
# * (J) complement(loc) => Sequence
|
|
280
|
+
# * (K) replace(loc, str) => Location with replacement Sequence
|
|
281
|
+
#
|
|
333
282
|
class Locations
|
|
334
283
|
|
|
335
284
|
include Enumerable
|
|
336
285
|
|
|
337
|
-
#
|
|
338
|
-
# which contains a list of Location objects.
|
|
286
|
+
# Parses a GenBank style position string and returns a Bio::Locations
|
|
287
|
+
# object, which contains a list of Bio::Location objects.
|
|
288
|
+
#
|
|
289
|
+
# locations = Bio::Locations.new('join(complement(500..550), 600..625)')
|
|
290
|
+
#
|
|
291
|
+
# ---
|
|
292
|
+
# *Arguments*:
|
|
293
|
+
# * (required) _str_: GenBank style position string
|
|
294
|
+
# *Returns*:: Bio::Locations object
|
|
339
295
|
def initialize(position)
|
|
340
296
|
if position.is_a? Array
|
|
341
297
|
@locations = position
|
|
342
298
|
else
|
|
343
299
|
position = gbl_cleanup(position) # preprocessing
|
|
344
|
-
@locations = gbl_pos2loc(position) # create an Array of Location
|
|
300
|
+
@locations = gbl_pos2loc(position) # create an Array of Bio::Location objects
|
|
345
301
|
end
|
|
346
302
|
end
|
|
303
|
+
|
|
304
|
+
# An Array of Bio::Location objects
|
|
347
305
|
attr_accessor :locations
|
|
348
306
|
|
|
349
|
-
#
|
|
307
|
+
# Evaluate equality of Bio::Locations object.
|
|
308
|
+
def equals?(other)
|
|
309
|
+
if ! other.kind_of?(Bio::Locations)
|
|
310
|
+
return nil
|
|
311
|
+
end
|
|
312
|
+
if self.sort == other.sort
|
|
313
|
+
return true
|
|
314
|
+
else
|
|
315
|
+
return false
|
|
316
|
+
end
|
|
317
|
+
end
|
|
318
|
+
|
|
319
|
+
# Iterates on each Bio::Location object.
|
|
350
320
|
def each
|
|
351
321
|
@locations.each do |x|
|
|
352
322
|
yield(x)
|
|
353
323
|
end
|
|
354
324
|
end
|
|
355
325
|
|
|
356
|
-
# Returns nth Location object.
|
|
326
|
+
# Returns nth Bio::Location object.
|
|
357
327
|
def [](n)
|
|
358
328
|
@locations[n]
|
|
359
329
|
end
|
|
360
330
|
|
|
361
|
-
# Returns first Location object.
|
|
331
|
+
# Returns first Bio::Location object.
|
|
362
332
|
def first
|
|
363
333
|
@locations.first
|
|
364
334
|
end
|
|
365
335
|
|
|
366
|
-
# Returns last Location object.
|
|
336
|
+
# Returns last Bio::Location object.
|
|
367
337
|
def last
|
|
368
338
|
@locations.last
|
|
369
339
|
end
|
|
370
340
|
|
|
371
341
|
# Returns an Array containing overall min and max position [min, max]
|
|
372
|
-
# of this Locations object.
|
|
342
|
+
# of this Bio::Locations object.
|
|
373
343
|
def span
|
|
374
344
|
span_min = @locations.min { |a,b| a.from <=> b.from }
|
|
375
345
|
span_max = @locations.max { |a,b| a.to <=> b.to }
|
|
@@ -396,9 +366,22 @@ class Locations
|
|
|
396
366
|
end
|
|
397
367
|
alias size length
|
|
398
368
|
|
|
399
|
-
#
|
|
400
|
-
#
|
|
401
|
-
#
|
|
369
|
+
# Converts absolute position in the whole of the DNA sequence to relative
|
|
370
|
+
# position in the locus.
|
|
371
|
+
#
|
|
372
|
+
# This method can for example be used to relate positions in a DNA-sequence
|
|
373
|
+
# with those in RNA. In this use, the optional ':aa'-flag returns the
|
|
374
|
+
# position of the associated amino-acid rather than the nucleotide.
|
|
375
|
+
#
|
|
376
|
+
# loc = Bio::Locations.new('complement(12838..13533)')
|
|
377
|
+
# puts loc.relative(13524) # => 10
|
|
378
|
+
# puts loc.relative(13506, :aa) # => 3
|
|
379
|
+
#
|
|
380
|
+
# ---
|
|
381
|
+
# *Arguments*:
|
|
382
|
+
# * (required) _position_: nucleotide position within whole of the sequence
|
|
383
|
+
# * _:aa_: flag that lets method return position in aminoacid coordinates
|
|
384
|
+
# *Returns*:: position within the location
|
|
402
385
|
def relative(n, type = nil)
|
|
403
386
|
case type
|
|
404
387
|
when :location
|
|
@@ -414,18 +397,23 @@ class Locations
|
|
|
414
397
|
end
|
|
415
398
|
end
|
|
416
399
|
|
|
417
|
-
#
|
|
418
|
-
#
|
|
419
|
-
#
|
|
420
|
-
#
|
|
421
|
-
#
|
|
400
|
+
# Converts relative position in the locus to position in the whole of the
|
|
401
|
+
# DNA sequence.
|
|
402
|
+
#
|
|
403
|
+
# This method can for example be used to relate positions in a DNA-sequence
|
|
404
|
+
# with those in RNA. In this use, the optional ':aa'-flag returns the
|
|
405
|
+
# position of the associated amino-acid rather than the nucleotide.
|
|
422
406
|
#
|
|
423
|
-
#
|
|
424
|
-
#
|
|
425
|
-
#
|
|
426
|
-
# loc.absolute(10, :aa) #=> 13506 (rel2abs)
|
|
427
|
-
# loc.relative(13506, :aa) #=> 10 (abs2rel)
|
|
407
|
+
# loc = Bio::Locations.new('complement(12838..13533)')
|
|
408
|
+
# puts loc.absolute(10) # => 13524
|
|
409
|
+
# puts loc.absolute(10, :aa) # => 13506
|
|
428
410
|
#
|
|
411
|
+
# ---
|
|
412
|
+
# *Arguments*:
|
|
413
|
+
# * (required) _position_: nucleotide position within locus
|
|
414
|
+
# * _:aa_: flag to be used if _position_ is a aminoacid position rather than
|
|
415
|
+
# a nucleotide position
|
|
416
|
+
# *Returns*:: position within the whole of the sequence
|
|
429
417
|
def absolute(n, type = nil)
|
|
430
418
|
case type
|
|
431
419
|
when :location
|
|
@@ -452,9 +440,9 @@ class Locations
|
|
|
452
440
|
# <match> $1 ( $2 $3 not )
|
|
453
441
|
position.gsub!(/(\.{2})?\(?([<>\d]+)\.([<>\d]+)(?!:)\)?/) do |match|
|
|
454
442
|
if $1
|
|
455
|
-
$1 + $3
|
|
443
|
+
$1 + $3 # ..(n.m) => ..m
|
|
456
444
|
else
|
|
457
|
-
$2
|
|
445
|
+
$2 # (?n.m)? => n
|
|
458
446
|
end
|
|
459
447
|
end
|
|
460
448
|
|
|
@@ -462,9 +450,9 @@ class Locations
|
|
|
462
450
|
# <match> .. one-of ($2 ,$3 )
|
|
463
451
|
position.gsub!(/(\.{2})?one-of\(([^,]+),([^)]+)\)/) do |match|
|
|
464
452
|
if $1
|
|
465
|
-
$1 + $3.gsub(/.*,(.*)/, '\1')
|
|
453
|
+
$1 + $3.gsub(/.*,(.*)/, '\1') # ..one-of(n,m) => ..m
|
|
466
454
|
else
|
|
467
|
-
$2
|
|
455
|
+
$2 # one-of(n,m) => n
|
|
468
456
|
end
|
|
469
457
|
end
|
|
470
458
|
|
|
@@ -514,7 +502,7 @@ class Locations
|
|
|
514
502
|
ary << gbl_pos2loc(position)
|
|
515
503
|
end
|
|
516
504
|
|
|
517
|
-
when /^complement\((.*)\)$/
|
|
505
|
+
when /^complement\((.*)\)$/ # (J) complement()
|
|
518
506
|
position = $1
|
|
519
507
|
gbl_pos2loc(position).reverse_each do |location|
|
|
520
508
|
ary << location.complement
|
|
@@ -579,17 +567,144 @@ class Locations
|
|
|
579
567
|
end
|
|
580
568
|
end
|
|
581
569
|
end
|
|
582
|
-
return nil
|
|
570
|
+
return nil # out of range
|
|
583
571
|
end
|
|
584
572
|
|
|
585
|
-
end #
|
|
573
|
+
end # Locations
|
|
586
574
|
|
|
587
|
-
end #
|
|
575
|
+
end # Bio
|
|
588
576
|
|
|
589
577
|
|
|
578
|
+
|
|
579
|
+
# === GenBank location examples
|
|
580
|
+
#
|
|
581
|
+
# (C) n^m
|
|
582
|
+
#
|
|
583
|
+
# * [AB015179] 754^755
|
|
584
|
+
# * [AF179299] complement(53^54)
|
|
585
|
+
# * [CELXOL1ES] replace(4480^4481,"")
|
|
586
|
+
# * [ECOUW87] replace(4792^4793,"a")
|
|
587
|
+
# * [APLPCII] replace(1905^1906,"acaaagacaccgccctacgcc")
|
|
588
|
+
#
|
|
589
|
+
# (D) (n.m)
|
|
590
|
+
#
|
|
591
|
+
# * [HACSODA] 157..(800.806)
|
|
592
|
+
# * [HALSODB] (67.68)..(699.703)
|
|
593
|
+
# * [AP001918] (45934.45974)..46135
|
|
594
|
+
# * [BACSPOJ] <180..(731.761)
|
|
595
|
+
# * [BBU17998] (88.89)..>1122
|
|
596
|
+
# * [ECHTGA] complement((1700.1708)..(1715.1721))
|
|
597
|
+
# * [ECPAP17] complement(<22..(255.275))
|
|
598
|
+
# * [LPATOVGNS] complement((64.74)..1525)
|
|
599
|
+
# * [PIP404CG] join((8298.8300)..10206,1..855)
|
|
600
|
+
# * [BOVMHDQBY4] join(M30006.1:(392.467)..575,M30005.1:415..681,M30004.1:129..410,M30004.1:907..1017,521..534)
|
|
601
|
+
# * [HUMMIC2A] replace((651.655)..(651.655),"")
|
|
602
|
+
# * [HUMSOD102] order(L44135.1:(454.445)..>538,<1..181)
|
|
603
|
+
#
|
|
604
|
+
# (E) one-of
|
|
605
|
+
#
|
|
606
|
+
# * [ECU17136] one-of(898,900)..983
|
|
607
|
+
# * [CELCYT1A] one-of(5971..6308,5971..6309)
|
|
608
|
+
# * [DMU17742] 8050..one-of(10731,10758,10905,11242)
|
|
609
|
+
# * [PFU27807] one-of(623,627,632)..one-of(628,633,637)
|
|
610
|
+
# * [BTBAINH1] one-of(845,953,963,1078,1104)..1354
|
|
611
|
+
# * [ATU39449] join(one-of(969..1094,970..1094,995..1094,1018..1094),1518..1587,1726..2119,2220..2833,2945..3215)
|
|
612
|
+
#
|
|
613
|
+
# (F) join, order, group
|
|
614
|
+
#
|
|
615
|
+
# * [AB037374S2] join(AB037374.1:1..177,1..807)
|
|
616
|
+
# * [AP000001] join(complement(1..61),complement(AP000007.1:252907..253505))
|
|
617
|
+
# * [ASNOS11] join(AF130124.1:<2563..2964,AF130125.1:21..157,AF130126.1:12..174,AF130127.1:21..112,AF130128.1:21..162,AF130128.1:281..595,AF130128.1:661..842,AF130128.1:916..1030,AF130129.1:21..115,AF130130.1:21..165,AF130131.1:21..125,AF130132.1:21..428,AF130132.1:492..746,AF130133.1:21..168,AF130133.1:232..401,AF130133.1:475..906,AF130133.1:970..1107,AF130133.1:1176..1367,21..>128)
|
|
618
|
+
#
|
|
619
|
+
# * [AARPOB2] order(AF194507.1:<1..510,1..>871)
|
|
620
|
+
# * [AF006691] order(912..1918,20410..21416)
|
|
621
|
+
# * [AF024666] order(complement(18919..19224),complement(13965..14892))
|
|
622
|
+
# * [AF264948] order(27066..27076,27089..27099,27283..27314,27330..27352)
|
|
623
|
+
# * [D63363] order(3..26,complement(964..987))
|
|
624
|
+
# * [ECOCURLI2] order(complement(1009..>1260),complement(AF081827.1:<1..177))
|
|
625
|
+
# * [S72388S2] order(join(S72388.1:757..911,S72388.1:609..1542),1..>139)
|
|
626
|
+
# * [HEYRRE07] order(complement(1..38),complement(M82666.1:1..140),complement(M82665.1:1..176),complement(M82664.1:1..215),complement(M82663.1:1..185),complement(M82662.1:1..49),complement(M82661.1:1..133))
|
|
627
|
+
# * [COL11A1G34] order(AF101079.1:558..1307,AF101080.1:1..749,AF101081.1:1..898,AF101082.1:1..486,AF101083.1:1..942,AF101084.1:1..1734,AF101085.1:1..2385,AF101086.1:1..1813,AF101087.1:1..2287,AF101088.1:1..1073,AF101089.1:1..989,AF101090.1:1..5017,AF101091.1:1..3401,AF101092.1:1..1225,AF101093.1:1..1072,AF101094.1:1..989,AF101095.1:1..1669,AF101096.1:1..918,AF101097.1:1..1114,AF101098.1:1..1074,AF101099.1:1..1709,AF101100.1:1..986,AF101101.1:1..1934,AF101102.1:1..1699,AF101103.1:1..940,AF101104.1:1..2330,AF101105.1:1..4467,AF101106.1:1..1876,AF101107.1:1..2465,AF101108.1:1..1150,AF101109.1:1..1170,AF101110.1:1..1158,AF101111.1:1..1193,1..611)
|
|
628
|
+
#
|
|
629
|
+
# group() are found in the COMMENT field only (in GenBank 122.0)
|
|
630
|
+
#
|
|
631
|
+
# gbpat2.seq: FT repeat_region group(598..606,611..619)
|
|
632
|
+
# gbpat2.seq: FT repeat_region group(8..16,1457..1464).
|
|
633
|
+
# gbpat2.seq: FT variation group(t1,t2)
|
|
634
|
+
# gbpat2.seq: FT variation group(t1,t3)
|
|
635
|
+
# gbpat2.seq: FT variation group(t1,t2,t3)
|
|
636
|
+
# gbpat2.seq: FT repeat_region group(11..202,203..394)
|
|
637
|
+
# gbpri9.seq:COMMENT Residues reported = 'group(1..2145);'.
|
|
638
|
+
#
|
|
639
|
+
# (G) ID:location
|
|
640
|
+
#
|
|
641
|
+
# * [AARPOB2] order(AF194507.1:<1..510,1..>871)
|
|
642
|
+
# * [AF178221S4] join(AF178221.1:<1..60,AF178222.1:1..63,AF178223.1:1..42,1..>90)
|
|
643
|
+
# * [BOVMHDQBY4] join(M30006.1:(392.467)..575,M30005.1:415..681,M30004.1:129..410,M30004.1:907..1017,521..534)
|
|
644
|
+
# * [HUMSOD102] order(L44135.1:(454.445)..>538,<1..181)
|
|
645
|
+
# * [SL16SRRN1] order(<1..>267,X67092.1:<1..>249,X67093.1:<1..>233)
|
|
646
|
+
#
|
|
647
|
+
# (I) <, >
|
|
648
|
+
#
|
|
649
|
+
# * [A5U48871] <1..>318
|
|
650
|
+
# * [AA23SRRNP] <1..388
|
|
651
|
+
# * [AA23SRRNP] 503..>1010
|
|
652
|
+
# * [AAM5961] complement(<1..229)
|
|
653
|
+
# * [AAM5961] complement(5231..>5598)
|
|
654
|
+
# * [AF043934] join(<1,60..99,161..241,302..370,436..594,676..887,993..1141,1209..1329,1387..1559,1626..1646,1708..>1843)
|
|
655
|
+
# * [BACSPOJ] <180..(731.761)
|
|
656
|
+
# * [BBU17998] (88.89)..>1122
|
|
657
|
+
# * [AARPOB2] order(AF194507.1:<1..510,1..>871)
|
|
658
|
+
# * [SL16SRRN1] order(<1..>267,X67092.1:<1..>249,X67093.1:<1..>233)
|
|
659
|
+
#
|
|
660
|
+
# (J) complement
|
|
661
|
+
#
|
|
662
|
+
# * [AF179299] complement(53^54) <= hoge insertion site etc.
|
|
663
|
+
# * [AP000001] join(complement(1..61),complement(AP000007.1:252907..253505))
|
|
664
|
+
# * [AF209868S2] order(complement(1..>308),complement(AF209868.1:75..336))
|
|
665
|
+
# * [AP000001] join(complement(1..61),complement(AP000007.1:252907..253505))
|
|
666
|
+
# * [CPPLCG] complement(<1..(1093.1098))
|
|
667
|
+
# * [D63363] order(3..26,complement(964..987))
|
|
668
|
+
# * [ECHTGA] complement((1700.1708)..(1715.1721))
|
|
669
|
+
# * [ECOUXW] order(complement(1658..1663),complement(1636..1641))
|
|
670
|
+
# * [LPATOVGNS] complement((64.74)..1525)
|
|
671
|
+
# * [AF129075] complement(join(71606..71829,75327..75446,76039..76203,76282..76353,76914..77029,77114..77201,77276..77342,78138..78316,79755..79892,81501..81562,81676..81856,82341..82490,84208..84287,85032..85122,88316..88403))
|
|
672
|
+
# * [ZFDYST2] join(AF137145.1:<1..18,complement(<1..99))
|
|
673
|
+
#
|
|
674
|
+
# (K) replace
|
|
675
|
+
#
|
|
676
|
+
# * [CSU27710] replace(64,"A")
|
|
677
|
+
# * [CELXOL1ES] replace(5256,"t")
|
|
678
|
+
# * [ANICPC] replace(1..468,"")
|
|
679
|
+
# * [CSU27710] replace(67..68,"GC")
|
|
680
|
+
# * [CELXOL1ES] replace(4480^4481,"") <= ? only one case in GenBank 122.0
|
|
681
|
+
# * [ECOUW87] replace(4792^4793,"a")
|
|
682
|
+
# * [CEU34893] replace(1..22,"ggttttaacccagttactcaag")
|
|
683
|
+
# * [APLPCII] replace(1905^1906,"acaaagacaccgccctacgcc")
|
|
684
|
+
# * [MBDR3S1] replace(1400..>9281,"")
|
|
685
|
+
# * [HUMMHDPB1F] replace(complement(36..37),"ttc")
|
|
686
|
+
# * [HUMMIC2A] replace((651.655)..(651.655),"")
|
|
687
|
+
# * [LEIMDRPGP] replace(1..1554,"L01572")
|
|
688
|
+
# * [TRBND3] replace(376..395,"atttgtgtgtggtaatta")
|
|
689
|
+
# * [TRBND3] replace(376..395,"atttgtgtgggtaatttta")
|
|
690
|
+
# * [TRBND3] replace(376..395,"attttgttgttgttttgttttgaatta")
|
|
691
|
+
# * [TRBND3] replace(376..395,"atgtgtggtgaatta")
|
|
692
|
+
# * [TRBND3] replace(376..395,"atgtgtgtggtaatta")
|
|
693
|
+
# * [TRBND3] replace(376..395,"gatttgttgtggtaatttta")
|
|
694
|
+
# * [MSU09460] replace(193, <= replace(193, "t")
|
|
695
|
+
# * [HUMMAGE12X] replace(3002..3003, <= replace(3002..3003, "GC")
|
|
696
|
+
# * [ADR40FIB] replace(510..520, <= replace(510..520, "taatcctaccg")
|
|
697
|
+
# * [RATDYIIAAB] replace(1306..1443,"aagaacatccacggagtcagaactgggctcttcacgccggatttggcgttcgaggccattgtgaaaaagcaggcaatgcaccagcaagctcagttcctacccctgcgtggacctggttatccaggagctaatcagtacagttaggtggtcaagctgaaagagccctgtctgaaa")
|
|
698
|
+
#
|
|
699
|
+
|
|
590
700
|
if __FILE__ == $0
|
|
591
701
|
puts "Test new & span methods"
|
|
592
702
|
[
|
|
703
|
+
'450',
|
|
704
|
+
'500..600',
|
|
705
|
+
'join(500..550, 600..625)',
|
|
706
|
+
'complement(join(500..550, 600..625))',
|
|
707
|
+
'join(complement(500..550), 600..625)',
|
|
593
708
|
'754^755',
|
|
594
709
|
'complement(53^54)',
|
|
595
710
|
'replace(4792^4793,"a")',
|
|
@@ -617,9 +732,14 @@ if __FILE__ == $0
|
|
|
617
732
|
'<200001..<318389',
|
|
618
733
|
].each do |pos|
|
|
619
734
|
p pos
|
|
620
|
-
p Bio::Locations.new(pos)
|
|
621
|
-
p Bio::Locations.new(pos).
|
|
622
|
-
p Bio::Locations.new(pos)
|
|
735
|
+
# p Bio::Locations.new(pos)
|
|
736
|
+
# p Bio::Locations.new(pos).span
|
|
737
|
+
# p Bio::Locations.new(pos).range
|
|
738
|
+
Bio::Locations.new(pos).each do |location|
|
|
739
|
+
puts "class=" + location.class.to_s
|
|
740
|
+
puts "start=" + location.from.to_s + "\tend=" + location.to.to_s + "\tstrand=" + location.strand.to_s
|
|
741
|
+
end
|
|
742
|
+
|
|
623
743
|
end
|
|
624
744
|
|
|
625
745
|
puts "Test rel2abs/abs2rel method"
|
|
@@ -646,5 +766,7 @@ if __FILE__ == $0
|
|
|
646
766
|
print "pos : "; p pos
|
|
647
767
|
print "`- loc[1] : "; p loc[1]
|
|
648
768
|
print " `- range : "; p loc[1].range
|
|
769
|
+
|
|
770
|
+
puts Bio::Location.new('5').<=>(Bio::Location.new('3'))
|
|
649
771
|
end
|
|
650
772
|
|