bio 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/bioruby +14 -122
- data/bin/br_biofetch.rb +2 -2
- data/bin/br_bioflat.rb +2 -2
- data/bin/br_biogetseq.rb +2 -2
- data/bin/br_pmfetch.rb +3 -3
- data/doc/Changes-0.7.rd +77 -0
- data/doc/KEGG_API.rd +523 -232
- data/doc/KEGG_API.rd.ja +529 -207
- data/doc/Tutorial.rd +48 -11
- data/lib/bio.rb +59 -6
- data/lib/bio/alignment.rb +713 -103
- data/lib/bio/appl/bl2seq/report.rb +2 -18
- data/lib/bio/appl/blast.rb +108 -91
- data/lib/bio/appl/blast/format0.rb +33 -18
- data/lib/bio/appl/blast/format8.rb +6 -20
- data/lib/bio/appl/blast/report.rb +293 -429
- data/lib/bio/appl/blast/rexml.rb +8 -22
- data/lib/bio/appl/blast/wublast.rb +21 -12
- data/lib/bio/appl/blast/xmlparser.rb +180 -183
- data/lib/bio/appl/blat/report.rb +127 -30
- data/lib/bio/appl/clustalw.rb +87 -59
- data/lib/bio/appl/clustalw/report.rb +20 -22
- data/lib/bio/appl/emboss.rb +113 -20
- data/lib/bio/appl/fasta.rb +173 -198
- data/lib/bio/appl/fasta/format10.rb +244 -347
- data/lib/bio/appl/gcg/msf.rb +212 -0
- data/lib/bio/appl/gcg/seq.rb +195 -0
- data/lib/bio/appl/genscan/report.rb +5 -23
- data/lib/bio/appl/hmmer.rb +8 -45
- data/lib/bio/appl/hmmer/report.rb +2 -20
- data/lib/bio/appl/iprscan/report.rb +374 -0
- data/lib/bio/appl/mafft.rb +87 -50
- data/lib/bio/appl/mafft/report.rb +151 -44
- data/lib/bio/appl/muscle.rb +52 -0
- data/lib/bio/appl/phylip/alignment.rb +129 -0
- data/lib/bio/appl/phylip/distance_matrix.rb +96 -0
- data/lib/bio/appl/probcons.rb +41 -0
- data/lib/bio/appl/psort.rb +89 -96
- data/lib/bio/appl/psort/report.rb +6 -22
- data/lib/bio/appl/pts1.rb +263 -0
- data/lib/bio/appl/sim4.rb +26 -36
- data/lib/bio/appl/sim4/report.rb +2 -18
- data/lib/bio/appl/sosui/report.rb +5 -20
- data/lib/bio/appl/spidey/report.rb +2 -2
- data/lib/bio/appl/targetp/report.rb +4 -20
- data/lib/bio/appl/tcoffee.rb +55 -0
- data/lib/bio/appl/tmhmm/report.rb +4 -20
- data/lib/bio/command.rb +235 -64
- data/lib/bio/data/aa.rb +21 -26
- data/lib/bio/data/codontable.rb +2 -20
- data/lib/bio/data/na.rb +19 -4
- data/lib/bio/db.rb +27 -12
- data/lib/bio/db/aaindex.rb +2 -20
- data/lib/bio/db/embl/common.rb +4 -21
- data/lib/bio/db/embl/embl.rb +33 -85
- data/lib/bio/db/embl/sptr.rb +612 -302
- data/lib/bio/db/embl/swissprot.rb +10 -29
- data/lib/bio/db/embl/trembl.rb +10 -29
- data/lib/bio/db/embl/uniprot.rb +10 -29
- data/lib/bio/db/fantom.rb +15 -20
- data/lib/bio/db/fasta.rb +3 -3
- data/lib/bio/db/genbank/common.rb +37 -46
- data/lib/bio/db/genbank/ddbj.rb +6 -18
- data/lib/bio/db/genbank/genbank.rb +47 -186
- data/lib/bio/db/genbank/genpept.rb +4 -17
- data/lib/bio/db/genbank/refseq.rb +4 -17
- data/lib/bio/db/gff.rb +103 -35
- data/lib/bio/db/go.rb +4 -20
- data/lib/bio/db/kegg/brite.rb +26 -36
- data/lib/bio/db/kegg/compound.rb +81 -85
- data/lib/bio/db/kegg/drug.rb +98 -0
- data/lib/bio/db/kegg/enzyme.rb +133 -110
- data/lib/bio/db/kegg/expression.rb +2 -20
- data/lib/bio/db/kegg/genes.rb +208 -238
- data/lib/bio/db/kegg/genome.rb +164 -285
- data/lib/bio/db/kegg/glycan.rb +114 -157
- data/lib/bio/db/kegg/keggtab.rb +242 -303
- data/lib/bio/db/kegg/kgml.rb +117 -160
- data/lib/bio/db/kegg/orthology.rb +112 -0
- data/lib/bio/db/kegg/reaction.rb +54 -69
- data/lib/bio/db/kegg/taxonomy.rb +331 -0
- data/lib/bio/db/lasergene.rb +209 -0
- data/lib/bio/db/litdb.rb +3 -27
- data/lib/bio/db/medline.rb +228 -249
- data/lib/bio/db/nbrf.rb +3 -3
- data/lib/bio/db/newick.rb +510 -0
- data/lib/bio/db/nexus.rb +1854 -0
- data/lib/bio/db/pdb.rb +5 -17
- data/lib/bio/db/pdb/atom.rb +2 -18
- data/lib/bio/db/pdb/chain.rb +2 -18
- data/lib/bio/db/pdb/chemicalcomponent.rb +2 -18
- data/lib/bio/db/pdb/model.rb +2 -18
- data/lib/bio/db/pdb/pdb.rb +73 -34
- data/lib/bio/db/pdb/residue.rb +4 -20
- data/lib/bio/db/pdb/utils.rb +2 -18
- data/lib/bio/db/prosite.rb +403 -422
- data/lib/bio/db/rebase.rb +84 -40
- data/lib/bio/db/soft.rb +404 -0
- data/lib/bio/db/transfac.rb +5 -17
- data/lib/bio/feature.rb +106 -52
- data/lib/bio/io/das.rb +32 -42
- data/lib/bio/io/dbget.rb +2 -20
- data/lib/bio/io/ddbjxml.rb +77 -138
- data/lib/bio/io/ebisoap.rb +158 -0
- data/lib/bio/io/ensembl.rb +229 -0
- data/lib/bio/io/fastacmd.rb +89 -82
- data/lib/bio/io/fetch.rb +163 -96
- data/lib/bio/io/flatfile.rb +170 -73
- data/lib/bio/io/flatfile/bdb.rb +3 -16
- data/lib/bio/io/flatfile/index.rb +2 -2
- data/lib/bio/io/flatfile/indexer.rb +3 -2
- data/lib/bio/io/higet.rb +12 -31
- data/lib/bio/io/keggapi.rb +210 -269
- data/lib/bio/io/ncbisoap.rb +155 -0
- data/lib/bio/io/pubmed.rb +169 -147
- data/lib/bio/io/registry.rb +4 -20
- data/lib/bio/io/soapwsdl.rb +43 -38
- data/lib/bio/io/sql.rb +242 -305
- data/lib/bio/location.rb +407 -285
- data/lib/bio/map.rb +410 -0
- data/lib/bio/pathway.rb +558 -695
- data/lib/bio/reference.rb +272 -75
- data/lib/bio/sequence.rb +255 -13
- data/lib/bio/sequence/aa.rb +71 -10
- data/lib/bio/sequence/common.rb +187 -33
- data/lib/bio/sequence/compat.rb +59 -4
- data/lib/bio/sequence/format.rb +54 -7
- data/lib/bio/sequence/generic.rb +3 -3
- data/lib/bio/sequence/na.rb +328 -26
- data/lib/bio/shell.rb +11 -4
- data/lib/bio/shell/core.rb +221 -160
- data/lib/bio/shell/demo.rb +18 -15
- data/lib/bio/shell/interface.rb +14 -12
- data/lib/bio/shell/irb.rb +95 -0
- data/lib/bio/shell/object.rb +45 -26
- data/lib/bio/shell/plugin/blast.rb +42 -0
- data/lib/bio/shell/plugin/codon.rb +22 -14
- data/lib/bio/shell/plugin/das.rb +58 -0
- data/lib/bio/shell/plugin/emboss.rb +2 -2
- data/lib/bio/shell/plugin/entry.rb +22 -11
- data/lib/bio/shell/plugin/flatfile.rb +2 -2
- data/lib/bio/shell/plugin/keggapi.rb +13 -6
- data/lib/bio/shell/plugin/midi.rb +4 -4
- data/lib/bio/shell/plugin/obda.rb +2 -2
- data/lib/bio/shell/plugin/psort.rb +56 -0
- data/lib/bio/shell/plugin/seq.rb +35 -8
- data/lib/bio/shell/plugin/soap.rb +87 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/bioruby_generator.rb +29 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_classes.rhtml +4 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_log.rhtml +27 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_methods.rhtml +11 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_modules.rhtml +4 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_variables.rhtml +7 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-bg.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-console.png +0 -0
- data/lib/bio/shell/rails/{public/images/icon.png → vendor/plugins/generators/bioruby/templates/bioruby-gem.png} +0 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-link.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.css +369 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.rhtml +47 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby_controller.rb +144 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby_helper.rb +47 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/commands.rhtml +8 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/history.rhtml +10 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/index.rhtml +22 -0
- data/lib/bio/shell/script.rb +25 -0
- data/lib/bio/shell/setup.rb +109 -0
- data/lib/bio/shell/web.rb +70 -58
- data/lib/bio/tree.rb +850 -0
- data/lib/bio/util/color_scheme.rb +84 -107
- data/lib/bio/util/color_scheme/buried.rb +5 -24
- data/lib/bio/util/color_scheme/helix.rb +5 -24
- data/lib/bio/util/color_scheme/hydropathy.rb +5 -24
- data/lib/bio/util/color_scheme/nucleotide.rb +5 -24
- data/lib/bio/util/color_scheme/strand.rb +5 -24
- data/lib/bio/util/color_scheme/taylor.rb +5 -24
- data/lib/bio/util/color_scheme/turn.rb +5 -24
- data/lib/bio/util/color_scheme/zappo.rb +5 -24
- data/lib/bio/util/contingency_table.rb +70 -43
- data/lib/bio/util/restriction_enzyme.rb +228 -0
- data/lib/bio/util/restriction_enzyme/analysis.rb +249 -0
- data/lib/bio/util/restriction_enzyme/analysis_basic.rb +217 -0
- data/lib/bio/util/restriction_enzyme/cut_symbol.rb +107 -0
- data/lib/bio/util/restriction_enzyme/double_stranded.rb +321 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/aligned_strands.rb +130 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair.rb +103 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair_in_enzyme_notation.rb +38 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations.rb +76 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations_in_enzyme_notation.rb +107 -0
- data/lib/bio/util/restriction_enzyme/enzymes.yaml +7061 -0
- data/lib/bio/util/restriction_enzyme/range/cut_range.rb +24 -0
- data/lib/bio/util/restriction_enzyme/range/cut_ranges.rb +47 -0
- data/lib/bio/util/restriction_enzyme/range/horizontal_cut_range.rb +67 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +257 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +242 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range/fragment.rb +51 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range/fragments.rb +41 -0
- data/lib/bio/util/restriction_enzyme/range/vertical_cut_range.rb +77 -0
- data/lib/bio/util/restriction_enzyme/single_strand.rb +199 -0
- data/lib/bio/util/restriction_enzyme/single_strand/cut_locations_in_enzyme_notation.rb +135 -0
- data/lib/bio/util/restriction_enzyme/single_strand_complement.rb +23 -0
- data/lib/bio/util/restriction_enzyme/string_formatting.rb +111 -0
- data/lib/bio/util/sirna.rb +4 -22
- data/sample/color_scheme_na.rb +4 -12
- data/sample/enzymes.rb +78 -0
- data/sample/goslim.rb +5 -13
- data/sample/psortplot_html.rb +4 -12
- data/test/data/blast/2.2.15.blastp.m7 +876 -0
- data/test/data/embl/AB090716.embl.rel89 +63 -0
- data/test/data/fasta/example1.txt +75 -0
- data/test/data/fasta/example2.txt +21 -0
- data/test/data/iprscan/merged.raw +32 -0
- data/test/data/iprscan/merged.txt +74 -0
- data/test/data/soft/GDS100_partial.soft +92 -0
- data/test/data/soft/GSE3457_family_partial.soft +874 -0
- data/test/functional/bio/io/test_ensembl.rb +103 -0
- data/test/functional/bio/io/test_soapwsdl.rb +5 -17
- data/test/unit/bio/appl/bl2seq/test_report.rb +2 -2
- data/test/unit/bio/appl/blast/test_report.rb +3 -16
- data/test/unit/bio/appl/blast/test_xmlparser.rb +4 -16
- data/test/unit/bio/appl/genscan/test_report.rb +3 -16
- data/test/unit/bio/appl/hmmer/test_report.rb +3 -16
- data/test/unit/bio/appl/iprscan/test_report.rb +338 -0
- data/test/unit/bio/appl/mafft/test_report.rb +63 -0
- data/test/unit/bio/appl/sosui/test_report.rb +3 -16
- data/test/unit/bio/appl/targetp/test_report.rb +3 -16
- data/test/unit/bio/appl/test_blast.rb +3 -16
- data/test/unit/bio/appl/test_fasta.rb +4 -16
- data/test/unit/bio/appl/test_pts1.rb +140 -0
- data/test/unit/bio/appl/tmhmm/test_report.rb +3 -16
- data/test/unit/bio/data/test_aa.rb +4 -17
- data/test/unit/bio/data/test_codontable.rb +3 -16
- data/test/unit/bio/data/test_na.rb +3 -3
- data/test/unit/bio/db/embl/test_common.rb +3 -16
- data/test/unit/bio/db/embl/test_embl.rb +3 -16
- data/test/unit/bio/db/embl/test_embl_rel89.rb +219 -0
- data/test/unit/bio/db/embl/test_sptr.rb +1548 -41
- data/test/unit/bio/db/embl/test_uniprot.rb +3 -16
- data/test/unit/bio/db/kegg/test_genes.rb +3 -16
- data/test/unit/bio/db/pdb/test_pdb.rb +7 -24
- data/test/unit/bio/db/test_aaindex.rb +2 -2
- data/test/unit/bio/db/test_fasta.rb +3 -16
- data/test/unit/bio/db/test_gff.rb +3 -16
- data/test/unit/bio/db/test_lasergene.rb +95 -0
- data/test/unit/bio/db/test_newick.rb +56 -0
- data/test/unit/bio/db/test_nexus.rb +360 -0
- data/test/unit/bio/db/test_prosite.rb +5 -18
- data/test/unit/bio/db/test_rebase.rb +11 -25
- data/test/unit/bio/db/test_soft.rb +138 -0
- data/test/unit/bio/io/test_ddbjxml.rb +5 -17
- data/test/unit/bio/io/test_ensembl.rb +109 -0
- data/test/unit/bio/io/test_fastacmd.rb +3 -16
- data/test/unit/bio/io/test_flatfile.rb +237 -0
- data/test/unit/bio/io/test_soapwsdl.rb +4 -17
- data/test/unit/bio/sequence/test_aa.rb +3 -3
- data/test/unit/bio/sequence/test_common.rb +3 -16
- data/test/unit/bio/sequence/test_compat.rb +3 -16
- data/test/unit/bio/sequence/test_na.rb +29 -3
- data/test/unit/bio/shell/plugin/test_seq.rb +8 -8
- data/test/unit/bio/test_alignment.rb +16 -27
- data/test/unit/bio/test_command.rb +242 -25
- data/test/unit/bio/test_db.rb +3 -16
- data/test/unit/bio/test_feature.rb +4 -16
- data/test/unit/bio/test_location.rb +4 -16
- data/test/unit/bio/test_map.rb +230 -0
- data/test/unit/bio/test_pathway.rb +4 -16
- data/test/unit/bio/test_reference.rb +2 -2
- data/test/unit/bio/test_sequence.rb +7 -19
- data/test/unit/bio/test_shell.rb +3 -16
- data/test/unit/bio/test_tree.rb +593 -0
- data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +299 -0
- data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +103 -0
- data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +240 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +100 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +75 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +73 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +53 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +104 -0
- data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +83 -0
- data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +246 -0
- data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +44 -0
- data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +115 -0
- data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +147 -0
- data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +147 -0
- data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +60 -0
- data/test/unit/bio/util/test_color_scheme.rb +6 -18
- data/test/unit/bio/util/test_contingency_table.rb +6 -18
- data/test/unit/bio/util/test_restriction_enzyme.rb +42 -0
- data/test/unit/bio/util/test_sirna.rb +3 -16
- metadata +228 -169
- data/doc/BioRuby.rd.ja +0 -225
- data/doc/Design.rd.ja +0 -341
- data/doc/TODO.rd.ja +0 -138
- data/lib/bio/appl/fasta/format6.rb +0 -37
- data/lib/bio/db/kegg/cell.rb +0 -88
- data/lib/bio/db/kegg/ko.rb +0 -178
- data/lib/bio/shell/rails/Rakefile +0 -10
- data/lib/bio/shell/rails/app/controllers/application.rb +0 -4
- data/lib/bio/shell/rails/app/controllers/shell_controller.rb +0 -94
- data/lib/bio/shell/rails/app/helpers/application_helper.rb +0 -3
- data/lib/bio/shell/rails/app/models/shell_connection.rb +0 -30
- data/lib/bio/shell/rails/app/views/layouts/shell.rhtml +0 -37
- data/lib/bio/shell/rails/app/views/shell/history.rhtml +0 -5
- data/lib/bio/shell/rails/app/views/shell/index.rhtml +0 -2
- data/lib/bio/shell/rails/app/views/shell/show.rhtml +0 -13
- data/lib/bio/shell/rails/config/boot.rb +0 -19
- data/lib/bio/shell/rails/config/database.yml +0 -85
- data/lib/bio/shell/rails/config/environment.rb +0 -53
- data/lib/bio/shell/rails/config/environments/development.rb +0 -19
- data/lib/bio/shell/rails/config/environments/production.rb +0 -19
- data/lib/bio/shell/rails/config/environments/test.rb +0 -19
- data/lib/bio/shell/rails/config/routes.rb +0 -19
- data/lib/bio/shell/rails/doc/README_FOR_APP +0 -2
- data/lib/bio/shell/rails/public/404.html +0 -8
- data/lib/bio/shell/rails/public/500.html +0 -8
- data/lib/bio/shell/rails/public/dispatch.cgi +0 -10
- data/lib/bio/shell/rails/public/dispatch.fcgi +0 -24
- data/lib/bio/shell/rails/public/dispatch.rb +0 -10
- data/lib/bio/shell/rails/public/favicon.ico +0 -0
- data/lib/bio/shell/rails/public/images/rails.png +0 -0
- data/lib/bio/shell/rails/public/index.html +0 -277
- data/lib/bio/shell/rails/public/javascripts/controls.js +0 -750
- data/lib/bio/shell/rails/public/javascripts/dragdrop.js +0 -584
- data/lib/bio/shell/rails/public/javascripts/effects.js +0 -854
- data/lib/bio/shell/rails/public/javascripts/prototype.js +0 -1785
- data/lib/bio/shell/rails/public/robots.txt +0 -1
- data/lib/bio/shell/rails/public/stylesheets/main.css +0 -187
- data/lib/bio/shell/rails/script/about +0 -3
- data/lib/bio/shell/rails/script/breakpointer +0 -3
- data/lib/bio/shell/rails/script/console +0 -3
- data/lib/bio/shell/rails/script/destroy +0 -3
- data/lib/bio/shell/rails/script/generate +0 -3
- data/lib/bio/shell/rails/script/performance/benchmarker +0 -3
- data/lib/bio/shell/rails/script/performance/profiler +0 -3
- data/lib/bio/shell/rails/script/plugin +0 -3
- data/lib/bio/shell/rails/script/process/reaper +0 -3
- data/lib/bio/shell/rails/script/process/spawner +0 -3
- data/lib/bio/shell/rails/script/process/spinner +0 -3
- data/lib/bio/shell/rails/script/runner +0 -3
- data/lib/bio/shell/rails/script/server +0 -42
- data/lib/bio/shell/rails/test/test_helper.rb +0 -28
data/lib/bio/db/rebase.rb
CHANGED
|
@@ -1,19 +1,33 @@
|
|
|
1
1
|
#
|
|
2
|
-
#
|
|
2
|
+
# bio/db/rebase.rb - Interface for EMBOSS formatted REBASE files
|
|
3
3
|
#
|
|
4
|
-
#
|
|
5
|
-
#
|
|
4
|
+
# Author:: Trevor Wennblom <mailto:trevor@corevx.com>
|
|
5
|
+
# Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com)
|
|
6
|
+
# License:: The Ruby License
|
|
6
7
|
#
|
|
7
|
-
# $Id: rebase.rb,v 1.
|
|
8
|
+
# $Id: rebase.rb,v 1.8 2007/04/05 23:35:40 trevor Exp $
|
|
8
9
|
#
|
|
10
|
+
|
|
11
|
+
autoload :YAML, 'yaml'
|
|
12
|
+
|
|
13
|
+
module Bio #:nodoc:
|
|
14
|
+
|
|
15
|
+
autoload :Reference, 'bio/reference'
|
|
16
|
+
|
|
17
|
+
#
|
|
18
|
+
# bio/db/rebase.rb - Interface for EMBOSS formatted REBASE files
|
|
9
19
|
#
|
|
10
|
-
#
|
|
20
|
+
# Author:: Trevor Wennblom <mailto:trevor@corevx.com>
|
|
21
|
+
# Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com)
|
|
22
|
+
# License:: The Ruby License
|
|
23
|
+
#
|
|
24
|
+
#
|
|
25
|
+
# = Description
|
|
11
26
|
#
|
|
12
27
|
# Bio::REBASE provides utilties for interacting with REBASE data in EMBOSS
|
|
13
28
|
# format. REBASE is the Restriction Enzyme Database, more information
|
|
14
29
|
# can be found here:
|
|
15
30
|
#
|
|
16
|
-
|
|
17
31
|
# * http://rebase.neb.com
|
|
18
32
|
#
|
|
19
33
|
# EMBOSS formatted files located at:
|
|
@@ -30,9 +44,9 @@
|
|
|
30
44
|
# % wget ftp://ftp.neb.com/pub/rebase/emboss*
|
|
31
45
|
#
|
|
32
46
|
#
|
|
33
|
-
#
|
|
47
|
+
# = Usage
|
|
34
48
|
#
|
|
35
|
-
# require 'bio
|
|
49
|
+
# require 'bio'
|
|
36
50
|
# require 'pp'
|
|
37
51
|
#
|
|
38
52
|
# enz = File.read('emboss_e')
|
|
@@ -65,6 +79,7 @@
|
|
|
65
79
|
# rebase = Bio::REBASE.load_yaml( 'enz.yaml', 'ref.yaml', 'sup.yaml' )
|
|
66
80
|
#
|
|
67
81
|
# pp rebase.enzymes[0..4] # ["AarI", "AasI", "AatI", "AatII", "Acc16I"]
|
|
82
|
+
# pp rebase.enzyme_name?('aasi') # true
|
|
68
83
|
# pp rebase['AarI'].pattern # "CACCTGC"
|
|
69
84
|
# pp rebase['AarI'].blunt? # false
|
|
70
85
|
# pp rebase['AarI'].organism # "Arthrobacter aurescens SS2-322"
|
|
@@ -92,37 +107,11 @@
|
|
|
92
107
|
# rebase.each do |name, info|
|
|
93
108
|
# pp "#{name}: #{info.methylation}" unless info.methylation.empty?
|
|
94
109
|
# end
|
|
95
|
-
#
|
|
96
|
-
#
|
|
97
|
-
#--
|
|
98
|
-
#
|
|
99
|
-
# This library is free software; you can redistribute it and/or
|
|
100
|
-
# modify it under the terms of the GNU Lesser General Public
|
|
101
|
-
# License as published by the Free Software Foundation; either
|
|
102
|
-
# version 2 of the License, or (at your option) any later version.
|
|
103
110
|
#
|
|
104
|
-
# This library is distributed in the hope that it will be useful,
|
|
105
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
106
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
107
|
-
# Lesser General Public License for more details.
|
|
108
|
-
#
|
|
109
|
-
# You should have received a copy of the GNU Lesser General Public
|
|
110
|
-
# License along with this library; if not, write to the Free Software
|
|
111
|
-
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
112
|
-
#
|
|
113
|
-
#++
|
|
114
|
-
#
|
|
115
|
-
|
|
116
|
-
autoload :YAML, 'yaml'
|
|
117
|
-
|
|
118
|
-
module Bio
|
|
119
|
-
|
|
120
|
-
autoload :Reference, 'reference'
|
|
121
|
-
|
|
122
111
|
|
|
123
112
|
class REBASE
|
|
124
113
|
|
|
125
|
-
class DynamicMethod_Hash < Hash
|
|
114
|
+
class DynamicMethod_Hash < Hash #:nodoc:
|
|
126
115
|
# Define a writer or reader
|
|
127
116
|
# * Allows hash[:kay]= to be accessed like hash.key=
|
|
128
117
|
# * Allows hash[:key] to be accessed like hash.key
|
|
@@ -142,7 +131,7 @@ class REBASE
|
|
|
142
131
|
end
|
|
143
132
|
end
|
|
144
133
|
|
|
145
|
-
class EnzymeEntry < DynamicMethod_Hash
|
|
134
|
+
class EnzymeEntry < DynamicMethod_Hash #:nodoc:
|
|
146
135
|
@@supplier_data = {}
|
|
147
136
|
def self.supplier_data=(d); @@supplier_data = d; end
|
|
148
137
|
|
|
@@ -153,23 +142,39 @@ class REBASE
|
|
|
153
142
|
end
|
|
154
143
|
end
|
|
155
144
|
|
|
145
|
+
# Calls _block_ once for each element in <tt>@data</tt> hash, passing that element as a parameter.
|
|
146
|
+
#
|
|
147
|
+
# ---
|
|
148
|
+
# *Arguments*
|
|
149
|
+
# * Accepts a block
|
|
150
|
+
# *Returns*:: results of _block_ operations
|
|
156
151
|
def each
|
|
157
|
-
@data.each { |
|
|
152
|
+
@data.each { |item| yield item }
|
|
158
153
|
end
|
|
159
154
|
|
|
160
155
|
# Make the instantiated class act like a Hash on @data
|
|
161
156
|
# Does the equivalent and more of this:
|
|
162
157
|
# def []( key ); @data[ key ]; end
|
|
163
158
|
# def size; @data.size; end
|
|
164
|
-
def method_missing(method_id, *args)
|
|
159
|
+
def method_missing(method_id, *args) #:nodoc:
|
|
165
160
|
self.class.class_eval do
|
|
166
161
|
define_method(method_id) { |a| Hash.instance_method(method_id).bind(@data).call(a) }
|
|
167
162
|
end
|
|
168
163
|
Hash.instance_method(method_id).bind(@data).call(*args)
|
|
169
164
|
end
|
|
170
165
|
|
|
171
|
-
#
|
|
166
|
+
# Constructor
|
|
167
|
+
#
|
|
168
|
+
# ---
|
|
169
|
+
# *Arguments*
|
|
170
|
+
# * +enzyme_lines+: (_required_) contents of EMBOSS formatted enzymes file
|
|
171
|
+
# * +reference_lines+: (_optional_) contents of EMBOSS formatted references file
|
|
172
|
+
# * +supplier_lines+: (_optional_) contents of EMBOSS formatted suppliers files
|
|
173
|
+
# * +yaml+: (_optional_, _default_ +false+) enzyme_lines, reference_lines, and supplier_lines are read as YAML if set to true
|
|
174
|
+
# *Returns*:: Bio::REBASE
|
|
172
175
|
def initialize( enzyme_lines, reference_lines = nil, supplier_lines = nil, yaml = false )
|
|
176
|
+
# All your REBASE are belong to us.
|
|
177
|
+
|
|
173
178
|
if yaml
|
|
174
179
|
@enzyme_data = enzyme_lines
|
|
175
180
|
@reference_data = reference_lines
|
|
@@ -185,24 +190,57 @@ class REBASE
|
|
|
185
190
|
end
|
|
186
191
|
|
|
187
192
|
# List the enzymes available
|
|
193
|
+
#
|
|
194
|
+
# ---
|
|
195
|
+
# *Arguments*
|
|
196
|
+
# * _none_
|
|
197
|
+
# *Returns*:: +Array+ sorted enzyme names
|
|
188
198
|
def enzymes
|
|
189
199
|
@data.keys.sort
|
|
190
200
|
end
|
|
201
|
+
|
|
202
|
+
# Check if supplied name is the name of an available enzyme
|
|
203
|
+
#
|
|
204
|
+
# ---
|
|
205
|
+
# *Arguments*
|
|
206
|
+
# * +name+: Enzyme name
|
|
207
|
+
# *Returns*:: +true/false+
|
|
208
|
+
def enzyme_name?(name)
|
|
209
|
+
enzymes.each do |e|
|
|
210
|
+
return true if e.downcase == name.downcase
|
|
211
|
+
end
|
|
212
|
+
return false
|
|
213
|
+
end
|
|
191
214
|
|
|
192
215
|
# Save the current data
|
|
193
216
|
# rebase.save_yaml( 'enz.yaml' )
|
|
194
217
|
# rebase.save_yaml( 'enz.yaml', 'ref.yaml' )
|
|
195
218
|
# rebase.save_yaml( 'enz.yaml', 'ref.yaml', 'sup.yaml' )
|
|
219
|
+
#
|
|
220
|
+
# ---
|
|
221
|
+
# *Arguments*
|
|
222
|
+
# * +f_enzyme+: (_required_) Filename to save YAML formatted output of enzyme data
|
|
223
|
+
# * +f_reference+: (_optional_) Filename to save YAML formatted output of reference data
|
|
224
|
+
# * +f_supplier+: (_optional_) Filename to save YAML formatted output of supplier data
|
|
225
|
+
# *Returns*:: nothing
|
|
196
226
|
def save_yaml( f_enzyme, f_reference=nil, f_supplier=nil )
|
|
197
227
|
File.open(f_enzyme, 'w') { |f| f.puts YAML.dump(@enzyme_data) }
|
|
198
228
|
File.open(f_reference, 'w') { |f| f.puts YAML.dump(@reference_data) } if f_reference
|
|
199
229
|
File.open(f_supplier, 'w') { |f| f.puts YAML.dump(@supplier_data) } if f_supplier
|
|
230
|
+
return
|
|
200
231
|
end
|
|
201
232
|
|
|
202
233
|
# Read REBASE EMBOSS-formatted files
|
|
203
234
|
# rebase = Bio::REBASE.read( 'emboss_e' )
|
|
204
235
|
# rebase = Bio::REBASE.read( 'emboss_e', 'emboss_r' )
|
|
205
236
|
# rebase = Bio::REBASE.read( 'emboss_e', 'emboss_r', 'emboss_s' )
|
|
237
|
+
#
|
|
238
|
+
# ---
|
|
239
|
+
# *Arguments*
|
|
240
|
+
# * +f_enzyme+: (_required_) Filename to read enzyme data
|
|
241
|
+
# * +f_reference+: (_optional_) Filename to read reference data
|
|
242
|
+
# * +f_supplier+: (_optional_) Filename to read supplier data
|
|
243
|
+
# *Returns*:: Bio::REBASE object
|
|
206
244
|
def self.read( f_enzyme, f_reference=nil, f_supplier=nil )
|
|
207
245
|
e = IO.readlines(f_enzyme)
|
|
208
246
|
r = f_reference ? IO.readlines(f_reference) : nil
|
|
@@ -214,6 +252,13 @@ class REBASE
|
|
|
214
252
|
# rebase = Bio::REBASE.load_yaml( 'enz.yaml' )
|
|
215
253
|
# rebase = Bio::REBASE.load_yaml( 'enz.yaml', 'ref.yaml' )
|
|
216
254
|
# rebase = Bio::REBASE.load_yaml( 'enz.yaml', 'ref.yaml', 'sup.yaml' )
|
|
255
|
+
#
|
|
256
|
+
# ---
|
|
257
|
+
# *Arguments*
|
|
258
|
+
# * +f_enzyme+: (_required_) Filename to read YAML-formatted enzyme data
|
|
259
|
+
# * +f_reference+: (_optional_) Filename to read YAML-formatted reference data
|
|
260
|
+
# * +f_supplier+: (_optional_) Filename to read YAML-formatted supplier data
|
|
261
|
+
# *Returns*:: Bio::REBASE object
|
|
217
262
|
def self.load_yaml( f_enzyme, f_reference=nil, f_supplier=nil )
|
|
218
263
|
e = YAML.load_file(f_enzyme)
|
|
219
264
|
r = f_reference ? YAML.load_file(f_reference) : nil
|
|
@@ -409,5 +454,4 @@ class REBASE
|
|
|
409
454
|
end
|
|
410
455
|
|
|
411
456
|
end # REBASE
|
|
412
|
-
|
|
413
457
|
end # Bio
|
data/lib/bio/db/soft.rb
ADDED
|
@@ -0,0 +1,404 @@
|
|
|
1
|
+
#
|
|
2
|
+
# bio/db/soft.rb - Interface for SOFT formatted files
|
|
3
|
+
#
|
|
4
|
+
# Author:: Trevor Wennblom <mailto:trevor@corevx.com>
|
|
5
|
+
# Copyright:: Copyright (c) 2007 Midwinter Laboratories, LLC (http://midwinterlabs.com)
|
|
6
|
+
# License:: The Ruby License
|
|
7
|
+
#
|
|
8
|
+
# $Id: soft.rb,v 1.2 2007/04/05 23:35:40 trevor Exp $
|
|
9
|
+
#
|
|
10
|
+
|
|
11
|
+
module Bio #:nodoc:
|
|
12
|
+
|
|
13
|
+
#
|
|
14
|
+
# bio/db/soft.rb - Interface for SOFT formatted files
|
|
15
|
+
#
|
|
16
|
+
# Author:: Trevor Wennblom <mailto:trevor@corevx.com>
|
|
17
|
+
# Copyright:: Copyright (c) 2007 Midwinter Laboratories, LLC (http://midwinterlabs.com)
|
|
18
|
+
# License:: The Ruby License
|
|
19
|
+
#
|
|
20
|
+
#
|
|
21
|
+
# = Description
|
|
22
|
+
#
|
|
23
|
+
# "SOFT (Simple Omnibus in Text Format) is a compact, simple, line-based,
|
|
24
|
+
# ASCII text format that incorporates experimental data and metadata."
|
|
25
|
+
# -- <em>GEO, National Center for Biotechnology Information</em>
|
|
26
|
+
#
|
|
27
|
+
# The Bio::SOFT module reads SOFT Series or Platform formatted files that
|
|
28
|
+
# contain information
|
|
29
|
+
# describing one database, one series, one platform, and many samples (GEO
|
|
30
|
+
# accessions). The data from the file can then be viewed with Ruby methods.
|
|
31
|
+
#
|
|
32
|
+
# Bio::SOFT also supports the reading of SOFT DataSet files which contain
|
|
33
|
+
# one database, one dataset, and many subsets.
|
|
34
|
+
#
|
|
35
|
+
# Format specification is located here:
|
|
36
|
+
# * http://www.ncbi.nlm.nih.gov/projects/geo/info/soft2.html#SOFTformat
|
|
37
|
+
#
|
|
38
|
+
# SOFT data files may be directly downloaded here:
|
|
39
|
+
# * ftp://ftp.ncbi.nih.gov/pub/geo/DATA/SOFT
|
|
40
|
+
#
|
|
41
|
+
# NCBI's Gene Expression Omnibus (GEO) is here:
|
|
42
|
+
# * http://www.ncbi.nlm.nih.gov/geo
|
|
43
|
+
#
|
|
44
|
+
# = Usage
|
|
45
|
+
#
|
|
46
|
+
# If an attribute has more than one value then the values are stored in an
|
|
47
|
+
# Array of String objects. Otherwise the attribute is stored as a String.
|
|
48
|
+
#
|
|
49
|
+
# The platform and each sample may contain a table of data. A dataset from a
|
|
50
|
+
# DataSet file may also contain a table.
|
|
51
|
+
#
|
|
52
|
+
# Attributes are dynamically created based on the data in the file.
|
|
53
|
+
# Predefined keys have not been created in advance due to the variability of
|
|
54
|
+
# SOFT files in-the-wild.
|
|
55
|
+
#
|
|
56
|
+
# Keys are generally stored as Symbols. In the case of keys for samples and
|
|
57
|
+
# table headings may alternatively be accessed with Strings.
|
|
58
|
+
# The names of samples (geo accessions) are case sensitive. Table headers
|
|
59
|
+
# are case insensitive.
|
|
60
|
+
#
|
|
61
|
+
# require 'bio'
|
|
62
|
+
#
|
|
63
|
+
# lines = IO.readlines('GSE3457_family.soft')
|
|
64
|
+
# soft = Bio::SOFT.new(lines)
|
|
65
|
+
#
|
|
66
|
+
# soft.platform[:geo_accession] # => "GPL2092"
|
|
67
|
+
# soft.platform[:organism] # => "Populus"
|
|
68
|
+
# soft.platform[:contributor] # => ["Jingyi,,Li", "Olga,,Shevchenko", "Steve,H,Strauss", "Amy,M,Brunner"]
|
|
69
|
+
# soft.platform[:data_row_count] # => "240"
|
|
70
|
+
# soft.platform.keys.sort {|a,b| a.to_s <=> b.to_s}[0..2] # => [:contact_address, :contact_city, :contact_country]
|
|
71
|
+
# soft.platform[:"contact_zip/postal_code"] # => "97331"
|
|
72
|
+
# soft.platform[:table].header # => ["ID", "GB_ACC", "SPOT_ID", "Function/Family", "ORGANISM", "SEQUENCE"]
|
|
73
|
+
# soft.platform[:table].header_description # => {"ORGANISM"=>"sequence sources", "SEQUENCE"=>"oligo sequence used", "Function/Family"=>"gene functions and family", "ID"=>"", "SPOT_ID"=>"", "GB_ACC"=>"Gene bank accession number"}
|
|
74
|
+
# soft.platform[:table].rows.size # => 240
|
|
75
|
+
# soft.platform[:table].rows[5] # => ["A039P68U", "AI163321", "", "TF, flowering protein CONSTANS", "P. tremula x P. tremuloides", "AGAAAATTCGATATACTGTCCGTAAAGAGGTAGCACTTAGAATGCAACGGAATAAAGGGCAGTTCACCTC"]
|
|
76
|
+
# soft.platform[:table].rows[5][4] # => "P. tremula x P. tremuloides"
|
|
77
|
+
# soft.platform[:table].rows[5][:organism] # => "P. tremula x P. tremuloides"
|
|
78
|
+
# soft.platform[:table].rows[5]['ORGANISM'] # => "P. tremula x P. tremuloides"
|
|
79
|
+
#
|
|
80
|
+
# soft.series[:geo_accession] # => "GSE3457"
|
|
81
|
+
# soft.series[:contributor] # => ["Jingyi,,Li", "Olga,,Shevchenko", "Ove,,Nilsson", "Steve,H,Strauss", "Amy,M,Brunner"]
|
|
82
|
+
# soft.series[:platform_id] # => "GPL2092"
|
|
83
|
+
# soft.series[:sample_id].size # => 74
|
|
84
|
+
# soft.series[:sample_id][0..4] # => ["GSM77557", "GSM77558", "GSM77559", "GSM77560", "GSM77561"]
|
|
85
|
+
#
|
|
86
|
+
# soft.database[:name] # => "Gene Expression Omnibus (GEO)"
|
|
87
|
+
# soft.database[:ref] # => "Nucleic Acids Res. 2005 Jan 1;33 Database Issue:D562-6"
|
|
88
|
+
# soft.database[:institute] # => "NCBI NLM NIH"
|
|
89
|
+
#
|
|
90
|
+
# soft.samples.size # => 74
|
|
91
|
+
# soft.samples[:GSM77600][:series_id] # => "GSE3457"
|
|
92
|
+
# soft.samples['GSM77600'][:series_id] # => "GSE3457"
|
|
93
|
+
# soft.samples[:GSM77600][:platform_id] # => "GPL2092"
|
|
94
|
+
# soft.samples[:GSM77600][:type] # => "RNA"
|
|
95
|
+
# soft.samples[:GSM77600][:title] # => "jst2b2"
|
|
96
|
+
# soft.samples[:GSM77600][:table].header # => ["ID_REF", "VALUE"]
|
|
97
|
+
# soft.samples[:GSM77600][:table].header_description # => {"ID_REF"=>"", "VALUE"=>"normalized signal intensities"}
|
|
98
|
+
# soft.samples[:GSM77600][:table].rows.size # => 217
|
|
99
|
+
# soft.samples[:GSM77600][:table].rows[5] # => ["A039P68U", "8.19"]
|
|
100
|
+
# soft.samples[:GSM77600][:table].rows[5][0] # => "A039P68U"
|
|
101
|
+
# soft.samples[:GSM77600][:table].rows[5][:id_ref] # => "A039P68U"
|
|
102
|
+
# soft.samples[:GSM77600][:table].rows[5]['ID_REF'] # => "A039P68U"
|
|
103
|
+
#
|
|
104
|
+
#
|
|
105
|
+
# lines = IO.readlines('GDS100.soft')
|
|
106
|
+
# soft = Bio::SOFT.new(lines)
|
|
107
|
+
#
|
|
108
|
+
# soft.database[:name] # => "Gene Expression Omnibus (GEO)"
|
|
109
|
+
# soft.database[:ref] # => "Nucleic Acids Res. 2005 Jan 1;33 Database Issue:D562-6"
|
|
110
|
+
# soft.database[:institute] # => "NCBI NLM NIH"
|
|
111
|
+
#
|
|
112
|
+
# soft.subsets.size # => 8
|
|
113
|
+
# soft.subsets.keys # => ["GDS100_1", "GDS100_2", "GDS100_3", "GDS100_4", "GDS100_5", "GDS100_6", "GDS100_7", "GDS100_8"]
|
|
114
|
+
# soft.subsets[:GDS100_7] # => {:dataset_id=>"GDS100", :type=>"time", :sample_id=>"GSM548,GSM543", :description=>"60 minute"}
|
|
115
|
+
# soft.subsets['GDS100_7'][:sample_id] # => "GSM548,GSM543"
|
|
116
|
+
# soft.subsets[:GDS100_7][:sample_id] # => "GSM548,GSM543"
|
|
117
|
+
# soft.subsets[:GDS100_7][:dataset_id] # => "GDS100"
|
|
118
|
+
#
|
|
119
|
+
# soft.dataset[:order] # => "none"
|
|
120
|
+
# soft.dataset[:sample_organism] # => "Escherichia coli"
|
|
121
|
+
# soft.dataset[:table].header # => ["ID_REF", "IDENTIFIER", "GSM549", "GSM542", "GSM543", "GSM547", "GSM544", "GSM545", "GSM546", "GSM548"]
|
|
122
|
+
# soft.dataset[:table].rows.size # => 5764
|
|
123
|
+
# soft.dataset[:table].rows[5] # => ["6", "EMPTY", "0.097", "0.217", "0.242", "0.067", "0.104", "0.162", "0.104", "0.154"]
|
|
124
|
+
# soft.dataset[:table].rows[5][4] # => "0.242"
|
|
125
|
+
# soft.dataset[:table].rows[5][:gsm549] # => "0.097"
|
|
126
|
+
# soft.dataset[:table].rows[5][:GSM549] # => "0.097"
|
|
127
|
+
# soft.dataset[:table].rows[5]['GSM549'] # => "0.097"
|
|
128
|
+
#
|
|
129
|
+
class SOFT
|
|
130
|
+
attr_accessor :database
|
|
131
|
+
attr_accessor :series, :platform, :samples
|
|
132
|
+
attr_accessor :dataset, :subsets
|
|
133
|
+
|
|
134
|
+
LINE_TYPE_ENTITY_INDICATOR = '^'
|
|
135
|
+
LINE_TYPE_ENTITY_ATTRIBUTE = '!'
|
|
136
|
+
LINE_TYPE_TABLE_HEADER = '#'
|
|
137
|
+
# data table row defined by absence of line type character
|
|
138
|
+
|
|
139
|
+
TABLE_COLUMN_DELIMITER = "\t"
|
|
140
|
+
|
|
141
|
+
# Constructor
|
|
142
|
+
#
|
|
143
|
+
# ---
|
|
144
|
+
# *Arguments*
|
|
145
|
+
# * +lines+: (_required_) contents of SOFT formatted file
|
|
146
|
+
# *Returns*:: Bio::SOFT
|
|
147
|
+
def initialize(lines=nil)
|
|
148
|
+
@database = Database.new
|
|
149
|
+
|
|
150
|
+
@series = Series.new
|
|
151
|
+
@platform = Platform.new
|
|
152
|
+
@samples = Samples.new
|
|
153
|
+
|
|
154
|
+
@dataset = Dataset.new
|
|
155
|
+
@subsets = Subsets.new
|
|
156
|
+
|
|
157
|
+
process(lines)
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
# Classes for Platform and Series files
|
|
161
|
+
|
|
162
|
+
class Samples < Hash #:nodoc:
|
|
163
|
+
def [](x)
|
|
164
|
+
x = x.to_s if x.kind_of?( Symbol )
|
|
165
|
+
super(x)
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
class Entity < Hash #:nodoc:
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
class Sample < Entity #:nodoc:
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
class Platform < Entity #:nodoc:
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
class Series < Entity #:nodoc:
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
# Classes for DataSet files
|
|
182
|
+
|
|
183
|
+
class Subsets < Samples #:nodoc:
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
class Subset < Entity #:nodoc:
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
class Dataset < Entity #:nodoc:
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
# Classes important for all types
|
|
193
|
+
|
|
194
|
+
class Database < Entity #:nodoc:
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
class Table #:nodoc:
|
|
198
|
+
attr_accessor :header
|
|
199
|
+
attr_accessor :header_description
|
|
200
|
+
attr_accessor :rows
|
|
201
|
+
|
|
202
|
+
class Header < Array #:nodoc:
|
|
203
|
+
# @column_index contains column name => numerical index of column
|
|
204
|
+
attr_accessor :column_index
|
|
205
|
+
|
|
206
|
+
def initialize
|
|
207
|
+
@column_index = {}
|
|
208
|
+
end
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
class Row < Array #:nodoc:
|
|
212
|
+
attr_accessor :header_object
|
|
213
|
+
|
|
214
|
+
def initialize( n, header_object=nil )
|
|
215
|
+
@header_object = header_object
|
|
216
|
+
super(n)
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
def [](x)
|
|
220
|
+
if x.kind_of?( Fixnum )
|
|
221
|
+
super(x)
|
|
222
|
+
else
|
|
223
|
+
begin
|
|
224
|
+
x = x.to_s.downcase.to_sym
|
|
225
|
+
z = @header_object.column_index[x]
|
|
226
|
+
unless z.kind_of?( Fixnum )
|
|
227
|
+
raise IndexError, "#{x.inspect} is not a valid index. Contents of @header_object.column_index: #{@header_object.column_index.inspect}"
|
|
228
|
+
end
|
|
229
|
+
self[ z ]
|
|
230
|
+
rescue NoMethodError
|
|
231
|
+
unless @header_object
|
|
232
|
+
$stderr.puts "Table::Row @header_object undefined!"
|
|
233
|
+
end
|
|
234
|
+
raise
|
|
235
|
+
end
|
|
236
|
+
end
|
|
237
|
+
end
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
def initialize()
|
|
241
|
+
@header_description = {}
|
|
242
|
+
@header = Header.new
|
|
243
|
+
@rows = []
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
def add_header( line )
|
|
247
|
+
raise "Can only define one header" unless @header.empty?
|
|
248
|
+
@header = @header.concat( parse_row( line ) ) # beware of clobbering this into an Array
|
|
249
|
+
@header.each_with_index do |key, i|
|
|
250
|
+
@header.column_index[key.downcase.to_sym] = i
|
|
251
|
+
end
|
|
252
|
+
end
|
|
253
|
+
|
|
254
|
+
def add_row( line )
|
|
255
|
+
@rows << Row.new( parse_row( line ), @header )
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
def add_header_or_row( line )
|
|
259
|
+
@header.empty? ? add_header( line ) : add_row( line )
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
protected
|
|
263
|
+
def parse_row( line )
|
|
264
|
+
line.split( TABLE_COLUMN_DELIMITER )
|
|
265
|
+
end
|
|
266
|
+
end
|
|
267
|
+
|
|
268
|
+
#########
|
|
269
|
+
protected
|
|
270
|
+
#########
|
|
271
|
+
|
|
272
|
+
def process(lines)
|
|
273
|
+
current_indicator = nil
|
|
274
|
+
current_class_accessor = nil
|
|
275
|
+
in_table = false
|
|
276
|
+
|
|
277
|
+
lines.each_with_index do |line, line_number|
|
|
278
|
+
line.strip!
|
|
279
|
+
next if line.nil? or line.empty?
|
|
280
|
+
case line[0].chr
|
|
281
|
+
when LINE_TYPE_ENTITY_INDICATOR
|
|
282
|
+
current_indicator, value = split_label_value_in( line[1..-1] )
|
|
283
|
+
|
|
284
|
+
case current_indicator
|
|
285
|
+
when 'DATABASE'
|
|
286
|
+
current_class_accessor = @database
|
|
287
|
+
when 'DATASET'
|
|
288
|
+
current_class_accessor = @dataset
|
|
289
|
+
when 'PLATFORM'
|
|
290
|
+
current_class_accessor = @platform
|
|
291
|
+
when 'SERIES'
|
|
292
|
+
current_class_accessor = @series
|
|
293
|
+
when 'SAMPLE'
|
|
294
|
+
@samples[value] = Sample.new
|
|
295
|
+
current_class_accessor = @samples[value]
|
|
296
|
+
when 'SUBSET'
|
|
297
|
+
@subsets[value] = Subset.new
|
|
298
|
+
current_class_accessor = @subsets[value]
|
|
299
|
+
else
|
|
300
|
+
custom_raise( line_number, error_msg(40, line) )
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
when LINE_TYPE_ENTITY_ATTRIBUTE
|
|
304
|
+
if( current_indicator == nil )
|
|
305
|
+
custom_raise( line_number, error_msg(30) )
|
|
306
|
+
end
|
|
307
|
+
|
|
308
|
+
# Handle lines such as '!platform_table_begin' and '!platform_table_end'
|
|
309
|
+
if in_table
|
|
310
|
+
if line =~ %r{table_begin}
|
|
311
|
+
next
|
|
312
|
+
elsif line =~ %r{table_end}
|
|
313
|
+
in_table = false
|
|
314
|
+
next
|
|
315
|
+
end
|
|
316
|
+
end
|
|
317
|
+
|
|
318
|
+
key, value = split_label_value_in( line, true )
|
|
319
|
+
key_s = key.to_sym
|
|
320
|
+
|
|
321
|
+
if current_class_accessor.include?( key_s )
|
|
322
|
+
if current_class_accessor[ key_s ].class != Array
|
|
323
|
+
current_class_accessor[ key_s ] = [ current_class_accessor[ key_s ] ]
|
|
324
|
+
end
|
|
325
|
+
current_class_accessor[key.to_sym] << value
|
|
326
|
+
else
|
|
327
|
+
current_class_accessor[key.to_sym] = value
|
|
328
|
+
end
|
|
329
|
+
|
|
330
|
+
when LINE_TYPE_TABLE_HEADER
|
|
331
|
+
if( (current_indicator != 'SAMPLE') and (current_indicator != 'PLATFORM') and (current_indicator != 'DATASET') )
|
|
332
|
+
custom_raise( line_number, error_msg(20, current_indicator.inspect) )
|
|
333
|
+
end
|
|
334
|
+
|
|
335
|
+
in_table = true # may be redundant, computationally not worth checking
|
|
336
|
+
|
|
337
|
+
# We only expect one table per platform or sample
|
|
338
|
+
current_class_accessor[:table] ||= Table.new
|
|
339
|
+
key, value = split_label_value_in( line )
|
|
340
|
+
# key[1..-1] -- Remove first character which is the LINE_TYPE_TABLE_HEADER
|
|
341
|
+
current_class_accessor[:table].header_description[ key[1..-1] ] = value
|
|
342
|
+
|
|
343
|
+
else
|
|
344
|
+
# Type: No line type - should be a row in a table.
|
|
345
|
+
|
|
346
|
+
if( (current_indicator == nil) or (in_table == false) )
|
|
347
|
+
custom_raise( line_number, error_msg(10) )
|
|
348
|
+
end
|
|
349
|
+
current_class_accessor[:table].add_header_or_row( line )
|
|
350
|
+
end
|
|
351
|
+
end
|
|
352
|
+
end
|
|
353
|
+
|
|
354
|
+
def error_msg( i, extra_info=nil )
|
|
355
|
+
case i
|
|
356
|
+
when 10
|
|
357
|
+
x = ["Lines without line-type characters are rows in a table, but",
|
|
358
|
+
"a line containing an entity indicator such as",
|
|
359
|
+
"\"#{LINE_TYPE_ENTITY_INDICATOR}SAMPLE\",",
|
|
360
|
+
"\"#{LINE_TYPE_ENTITY_INDICATOR}PLATFORM\",",
|
|
361
|
+
"or \"#{LINE_TYPE_ENTITY_INDICATOR}DATASET\" has not been",
|
|
362
|
+
"previously encountered or it does not appear that this line is",
|
|
363
|
+
"in a table."]
|
|
364
|
+
when 20
|
|
365
|
+
# tables are allowed inside samples and platforms
|
|
366
|
+
x = ["Tables are only allowed inside SAMPLE and PLATFORM.",
|
|
367
|
+
"Current table information found inside #{extra_info}."]
|
|
368
|
+
when 30
|
|
369
|
+
x = ["Entity attribute line (\"#{LINE_TYPE_ENTITY_ATTRIBUTE}\")",
|
|
370
|
+
"found before entity indicator line (\"#{LINE_TYPE_ENTITY_INDICATOR}\")"]
|
|
371
|
+
when 40
|
|
372
|
+
x = ["Unkown entity indicator. Must be DATABASE, SAMPLE, PLATFORM,",
|
|
373
|
+
"SERIES, DATASET, or SUBSET."]
|
|
374
|
+
else
|
|
375
|
+
raise IndexError, "Unknown error message requested."
|
|
376
|
+
end
|
|
377
|
+
|
|
378
|
+
x.join(" ")
|
|
379
|
+
end
|
|
380
|
+
|
|
381
|
+
def custom_raise( line_number_with_0_based_indexing, msg )
|
|
382
|
+
raise ["Error processing input line: #{line_number_with_0_based_indexing+1}",
|
|
383
|
+
msg].join("\t")
|
|
384
|
+
end
|
|
385
|
+
|
|
386
|
+
def split_label_value_in( line, shift_key=false )
|
|
387
|
+
line =~ %r{\s*=\s*}
|
|
388
|
+
key, value = $`, $'
|
|
389
|
+
|
|
390
|
+
if shift_key
|
|
391
|
+
key =~ %r{_}
|
|
392
|
+
key = $'
|
|
393
|
+
end
|
|
394
|
+
|
|
395
|
+
if( (key == nil) or (value == nil) )
|
|
396
|
+
puts line.inspect
|
|
397
|
+
raise
|
|
398
|
+
end
|
|
399
|
+
|
|
400
|
+
[key, value]
|
|
401
|
+
end
|
|
402
|
+
|
|
403
|
+
end # SOFT
|
|
404
|
+
end # Bio
|