RubyGems - bio - Versions diffs - 0.7.0 - Mend

bio 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (201) hide show

data/bin/bioruby +107 -0
data/bin/br_biofetch.rb +59 -0
data/bin/br_bioflat.rb +294 -0
data/bin/br_biogetseq.rb +57 -0
data/bin/br_pmfetch.rb +431 -0
data/doc/BioRuby.rd.ja +225 -0
data/doc/Changes-0.7.rd +236 -0
data/doc/Design.rd.ja +341 -0
data/doc/KEGG_API.rd +1437 -0
data/doc/KEGG_API.rd.ja +1399 -0
data/doc/TODO.rd.ja +138 -0
data/doc/Tutorial.rd +1138 -0
data/doc/Tutorial.rd.ja +2110 -0
data/etc/bioinformatics/seqdatabase.ini +210 -0
data/lib/bio.rb +256 -0
data/lib/bio/alignment.rb +1906 -0
data/lib/bio/appl/bl2seq/report.rb +350 -0
data/lib/bio/appl/blast.rb +269 -0
data/lib/bio/appl/blast/format0.rb +1402 -0
data/lib/bio/appl/blast/format8.rb +95 -0
data/lib/bio/appl/blast/report.rb +652 -0
data/lib/bio/appl/blast/rexml.rb +151 -0
data/lib/bio/appl/blast/wublast.rb +553 -0
data/lib/bio/appl/blast/xmlparser.rb +222 -0
data/lib/bio/appl/blat/report.rb +392 -0
data/lib/bio/appl/clustalw.rb +191 -0
data/lib/bio/appl/clustalw/report.rb +154 -0
data/lib/bio/appl/emboss.rb +68 -0
data/lib/bio/appl/fasta.rb +262 -0
data/lib/bio/appl/fasta/format10.rb +428 -0
data/lib/bio/appl/fasta/format6.rb +37 -0
data/lib/bio/appl/genscan/report.rb +570 -0
data/lib/bio/appl/hmmer.rb +129 -0
data/lib/bio/appl/hmmer/report.rb +556 -0
data/lib/bio/appl/mafft.rb +222 -0
data/lib/bio/appl/mafft/report.rb +119 -0
data/lib/bio/appl/psort.rb +555 -0
data/lib/bio/appl/psort/report.rb +473 -0
data/lib/bio/appl/sim4.rb +134 -0
data/lib/bio/appl/sim4/report.rb +501 -0
data/lib/bio/appl/sosui/report.rb +166 -0
data/lib/bio/appl/spidey/report.rb +604 -0
data/lib/bio/appl/targetp/report.rb +283 -0
data/lib/bio/appl/tmhmm/report.rb +238 -0
data/lib/bio/command.rb +166 -0
data/lib/bio/data/aa.rb +354 -0
data/lib/bio/data/codontable.rb +740 -0
data/lib/bio/data/na.rb +226 -0
data/lib/bio/db.rb +340 -0
data/lib/bio/db/aaindex.rb +280 -0
data/lib/bio/db/embl/common.rb +332 -0
data/lib/bio/db/embl/embl.rb +446 -0
data/lib/bio/db/embl/sptr.rb +954 -0
data/lib/bio/db/embl/swissprot.rb +32 -0
data/lib/bio/db/embl/trembl.rb +31 -0
data/lib/bio/db/embl/uniprot.rb +32 -0
data/lib/bio/db/fantom.rb +604 -0
data/lib/bio/db/fasta.rb +869 -0
data/lib/bio/db/genbank/common.rb +299 -0
data/lib/bio/db/genbank/ddbj.rb +34 -0
data/lib/bio/db/genbank/genbank.rb +354 -0
data/lib/bio/db/genbank/genpept.rb +73 -0
data/lib/bio/db/genbank/refseq.rb +31 -0
data/lib/bio/db/gff.rb +106 -0
data/lib/bio/db/go.rb +497 -0
data/lib/bio/db/kegg/brite.rb +51 -0
data/lib/bio/db/kegg/cell.rb +88 -0
data/lib/bio/db/kegg/compound.rb +130 -0
data/lib/bio/db/kegg/enzyme.rb +125 -0
data/lib/bio/db/kegg/expression.rb +173 -0
data/lib/bio/db/kegg/genes.rb +293 -0
data/lib/bio/db/kegg/genome.rb +362 -0
data/lib/bio/db/kegg/glycan.rb +213 -0
data/lib/bio/db/kegg/keggtab.rb +418 -0
data/lib/bio/db/kegg/kgml.rb +299 -0
data/lib/bio/db/kegg/ko.rb +178 -0
data/lib/bio/db/kegg/reaction.rb +97 -0
data/lib/bio/db/litdb.rb +131 -0
data/lib/bio/db/medline.rb +317 -0
data/lib/bio/db/nbrf.rb +199 -0
data/lib/bio/db/pdb.rb +38 -0
data/lib/bio/db/pdb/atom.rb +60 -0
data/lib/bio/db/pdb/chain.rb +117 -0
data/lib/bio/db/pdb/model.rb +106 -0
data/lib/bio/db/pdb/pdb.rb +1682 -0
data/lib/bio/db/pdb/residue.rb +122 -0
data/lib/bio/db/pdb/utils.rb +234 -0
data/lib/bio/db/prosite.rb +616 -0
data/lib/bio/db/rebase.rb +417 -0
data/lib/bio/db/transfac.rb +387 -0
data/lib/bio/feature.rb +201 -0
data/lib/bio/io/brdb.rb +103 -0
data/lib/bio/io/das.rb +471 -0
data/lib/bio/io/dbget.rb +212 -0
data/lib/bio/io/ddbjxml.rb +614 -0
data/lib/bio/io/fastacmd.rb +123 -0
data/lib/bio/io/fetch.rb +114 -0
data/lib/bio/io/flatfile.rb +496 -0
data/lib/bio/io/flatfile/bdb.rb +266 -0
data/lib/bio/io/flatfile/index.rb +1308 -0
data/lib/bio/io/flatfile/indexer.rb +778 -0
data/lib/bio/io/higet.rb +92 -0
data/lib/bio/io/keggapi.rb +863 -0
data/lib/bio/io/pubmed.rb +189 -0
data/lib/bio/io/registry.rb +308 -0
data/lib/bio/io/soapwsdl.rb +114 -0
data/lib/bio/io/sql.rb +428 -0
data/lib/bio/location.rb +650 -0
data/lib/bio/pathway.rb +991 -0
data/lib/bio/reference.rb +308 -0
data/lib/bio/sequence.rb +593 -0
data/lib/bio/shell.rb +51 -0
data/lib/bio/shell/core.rb +512 -0
data/lib/bio/shell/plugin/codon.rb +228 -0
data/lib/bio/shell/plugin/entry.rb +85 -0
data/lib/bio/shell/plugin/flatfile.rb +119 -0
data/lib/bio/shell/plugin/keggapi.rb +187 -0
data/lib/bio/shell/plugin/midi.rb +448 -0
data/lib/bio/shell/plugin/obda.rb +63 -0
data/lib/bio/shell/plugin/seq.rb +238 -0
data/lib/bio/shell/session.rb +214 -0
data/lib/bio/util/color_scheme.rb +214 -0
data/lib/bio/util/color_scheme/buried.rb +78 -0
data/lib/bio/util/color_scheme/helix.rb +78 -0
data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
data/lib/bio/util/color_scheme/strand.rb +78 -0
data/lib/bio/util/color_scheme/taylor.rb +69 -0
data/lib/bio/util/color_scheme/turn.rb +78 -0
data/lib/bio/util/color_scheme/zappo.rb +69 -0
data/lib/bio/util/contingency_table.rb +337 -0
data/lib/bio/util/sirna.rb +306 -0
data/lib/bioruby.rb +34 -0
data/sample/biofetch.rb +475 -0
data/sample/color_scheme_na.rb +99 -0
data/sample/dbget +37 -0
data/sample/fasta2tab.rb +99 -0
data/sample/fsplit.rb +51 -0
data/sample/gb2fasta.rb +31 -0
data/sample/gb2tab.rb +325 -0
data/sample/gbtab2mysql.rb +161 -0
data/sample/genes2nuc.rb +33 -0
data/sample/genes2pep.rb +33 -0
data/sample/genes2tab.rb +81 -0
data/sample/genome2rb.rb +29 -0
data/sample/genome2tab.rb +76 -0
data/sample/goslim.rb +311 -0
data/sample/gt2fasta.rb +47 -0
data/sample/pmfetch.rb +42 -0
data/sample/pmsearch.rb +42 -0
data/sample/psortplot_html.rb +222 -0
data/sample/ssearch2tab.rb +96 -0
data/sample/tdiary.rb +158 -0
data/sample/tfastx2tab.rb +100 -0
data/sample/vs-genes.rb +212 -0
data/test/data/SOSUI/sample.report +11 -0
data/test/data/TMHMM/sample.report +21 -0
data/test/data/blast/eco:b0002.faa +15 -0
data/test/data/blast/eco:b0002.faa.m0 +128 -0
data/test/data/blast/eco:b0002.faa.m7 +65 -0
data/test/data/blast/eco:b0002.faa.m8 +1 -0
data/test/data/embl/AB090716.embl +65 -0
data/test/data/genscan/sample.report +63 -0
data/test/data/prosite/prosite.dat +2233 -0
data/test/data/refseq/nm_126355.entret +64 -0
data/test/data/uniprot/p53_human.uniprot +1456 -0
data/test/runner.rb +10 -0
data/test/unit/bio/appl/blast/test_report.rb +427 -0
data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
data/test/unit/bio/appl/genscan/test_report.rb +195 -0
data/test/unit/bio/appl/sosui/test_report.rb +94 -0
data/test/unit/bio/appl/targetp/test_report.rb +159 -0
data/test/unit/bio/appl/test_blast.rb +159 -0
data/test/unit/bio/appl/test_fasta.rb +142 -0
data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
data/test/unit/bio/data/test_aa.rb +103 -0
data/test/unit/bio/data/test_codontable.rb +120 -0
data/test/unit/bio/data/test_na.rb +89 -0
data/test/unit/bio/db/embl/test_common.rb +130 -0
data/test/unit/bio/db/embl/test_embl.rb +227 -0
data/test/unit/bio/db/embl/test_sptr.rb +268 -0
data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
data/test/unit/bio/db/kegg/test_genes.rb +58 -0
data/test/unit/bio/db/test_fasta.rb +263 -0
data/test/unit/bio/db/test_gff.rb +140 -0
data/test/unit/bio/db/test_prosite.rb +1450 -0
data/test/unit/bio/io/test_ddbjxml.rb +87 -0
data/test/unit/bio/io/test_soapwsdl.rb +45 -0
data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
data/test/unit/bio/test_alignment.rb +1028 -0
data/test/unit/bio/test_command.rb +71 -0
data/test/unit/bio/test_db.rb +109 -0
data/test/unit/bio/test_feature.rb +128 -0
data/test/unit/bio/test_location.rb +51 -0
data/test/unit/bio/test_pathway.rb +485 -0
data/test/unit/bio/test_sequence.rb +386 -0
data/test/unit/bio/test_shell.rb +31 -0
data/test/unit/bio/util/test_color_scheme.rb +45 -0
data/test/unit/bio/util/test_contingency_table.rb +106 -0
data/test/unit/bio/util/test_sirna.rb +258 -0
metadata +295 -0

data/sample/gbtab2mysql.rb ADDED Viewed

@@ -0,0 +1,161 @@
+#!/usr/bin/env ruby
+#
+# gbtab2mysql.rb - load tab delimited GenBank data files into MySQL
+#
+#   Copyright (C) 2002 KATAYAMA Toshiaki <k@bioruby.org>
+#
+#  This program is free software; you can redistribute it and/or modify
+#  it under the terms of the GNU General Public License as published by
+#  the Free Software Foundation; either version 2 of the License, or
+#  (at your option) any later version.
+#
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with this program; if not, write to the Free Software
+#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#
+#  $Id: gbtab2mysql.rb,v 1.3 2002/06/25 19:30:26 k Exp $
+#
+require 'dbi'
+$schema_ent = <<END
+	id		varchar(16)	NOT NULL PRIMARY KEY,
+	nalen		integer,
+	strand		varchar(5),
+	natype		varchar(5),
+	circular	varchar(10),
+	division	varchar(5),
+	date		varchar(12),
+	definition	varchar(255),
+	accession	varchar(30),
+	versions	varchar(30),
+	keywords	varchar(255),
+	segment		varchar(255),
+	source		varchar(255),
+	organism	varchar(255),
+	taxonomy	varchar(255),
+	comment		text,
+	basecount	varchar(255),
+	origin		varchar(255),
+	KEY (nalen),
+	KEY (division),
+	KEY (accession),
+	KEY (organism),
+	KEY (taxonomy)
+END
+$schema_ft = <<END
+	id		varchar(16)	NOT NULL,
+	num		integer,
+	feature		varchar(30),
+	position	text,
+	span_min	integer,
+	span_max	integer,
+	qualifier	varchar(30),
+	value		text,
+	KEY (id),
+	KEY (num),
+	KEY (feature),
+	KEY (span_min),
+	KEY (span_max),
+	KEY (qualifier)
+END
+$schema_ref = <<END
+	id		varchar(16)	NOT NULL,
+	num		integer,
+	authors		text,
+	title		text,
+	journal		text,
+	medline		varchar(255),
+	pubmed		varchar(255),
+	KEY (id),
+	KEY (medline),
+	KEY (pubmed)
+END
+$schema_seq = <<END
+	id		varchar(16)	NOT NULL,
+	num		integer,
+	naseq		mediumtext,
+	KEY (id)
+END
+def create_table(dbh, table)
+  $stderr.puts("create tables on #{table}") if $DEBUG
+  query = "CREATE TABLE IF NOT EXISTS #{table} ( #{$schema_ent} )"
+  dbh.execute(query)
+  query = "CREATE TABLE IF NOT EXISTS #{table}ft ( #{$schema_ft} )"
+  dbh.execute(query)
+  query = "CREATE TABLE IF NOT EXISTS #{table}ref ( #{$schema_ref} )"
+  dbh.execute(query)
+  query = "CREATE TABLE IF NOT EXISTS #{table}seq ( #{$schema_seq} )"
+  dbh.execute(query)
+end
+def load_tab(dbh, base, table)
+  $stderr.puts("load #{base} into #{table}") if $DEBUG
+  query = "LOAD DATA LOCAL INFILE '#{base}.seq.ent.tab' INTO TABLE #{table}"
+  dbh.execute(query)
+  query = "LOAD DATA LOCAL INFILE '#{base}.seq.ft.tab' INTO TABLE #{table}ft"
+  dbh.execute(query)
+  query = "LOAD DATA LOCAL INFILE '#{base}.seq.ref.tab' INTO TABLE #{table}ref"
+  dbh.execute(query)
+  query = "LOAD DATA LOCAL INFILE '#{base}.seq.seq.tab' INTO TABLE #{table}seq"
+  dbh.execute(query)
+end
+def merge_table(dbh, tables)
+  query = "CREATE TABLE IF NOT EXISTS ent ( #{$schema_ent} )" +
+		" TYPE=MERGE UNION=( #{tables.join(', ')} )"
+  dbh.execute(query)
+  query = "CREATE TABLE IF NOT EXISTS ft ( #{$schema_ft} )" +
+		" TYPE=MERGE UNION=( #{tables.join('ft, ') + 'ft' } )"
+  dbh.execute(query)
+  query = "CREATE TABLE IF NOT EXISTS ref ( #{$schema_ref} )" +
+		" TYPE=MERGE UNION=( #{tables.join('ref, ') + 'ref' } )"
+  dbh.execute(query)
+  query = "CREATE TABLE IF NOT EXISTS seq ( #{$schema_seq} )" +
+		" TYPE=MERGE UNION=( #{tables.join('seq, ') + 'seq' } )"
+  dbh.execute(query)
+end
+$stderr.puts Time.now
+DBI.connect('dbi:Mysql:genbank:localhost', 'root') do |dbh|
+  tables = Array.new
+  Dir.glob("*.seq").sort.each do |gbk|
+    base = File.basename(gbk, '.seq')
+    div = base[/gb.../]
+    num = base[/\d+/].to_i
+    table = div
+    table = "%s%d" % [ div, (num - 1) / 20 + 1 ] if num > 20
+    unless dbh.tables.include?(table)
+      create_table(dbh, table)
+      tables.push(table)
+    end
+    load_tab(dbh, base, table)
+  end
+  merge_table(dbh, tables)
+end
+$stderr.puts Time.now

data/sample/genes2nuc.rb ADDED Viewed

@@ -0,0 +1,33 @@
+#!/usr/bin/env ruby
+#
+# genes2nuc.rb - convert KEGG/GENES entry into FASTA format (nuc)
+#
+#   Copyright (C) 2001 KATAYAMA Toshiaki <k@bioruby.org>
+#
+#  This program is free software; you can redistribute it and/or modify
+#  it under the terms of the GNU General Public License as published by
+#  the Free Software Foundation; either version 2 of the License, or
+#  (at your option) any later version.
+#
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+#
+#  $Id: genes2nuc.rb,v 0.4 2002/06/23 20:21:56 k Exp $
+#
+require 'bio/db/kegg/genes'
+require 'bio/extend'
+include Bio
+while gets(KEGG::GENES::DELIMITER)
+  genes = KEGG::GENES.new($_)
+  next if genes.nalen == 0
+  puts ">#{genes.entry_id}  #{genes.definition}"
+  puts genes.naseq.fold(60+12, 12)
+end

data/sample/genes2pep.rb ADDED Viewed

@@ -0,0 +1,33 @@
+#!/usr/bin/env ruby
+#
+# genes2nuc.rb - convert KEGG/GENES entry into FASTA format (nuc)
+#
+#   Copyright (C) 2001 KATAYAMA Toshiaki <k@bioruby.org>
+#
+#  This program is free software; you can redistribute it and/or modify
+#  it under the terms of the GNU General Public License as published by
+#  the Free Software Foundation; either version 2 of the License, or
+#  (at your option) any later version.
+#
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+#
+#  $Id: genes2pep.rb,v 0.4 2002/06/23 20:21:56 k Exp $
+#
+require 'bio/db/kegg/genes'
+require 'bio/extend'
+include Bio
+while gets(KEGG::GENES::DELIMITER)
+  genes = KEGG::GENES.new($_)
+  next if genes.aalen == 0
+  puts ">#{genes.entry_id}  #{genes.definition}"
+  puts genes.aaseq.fold(60+12, 12)
+end

data/sample/genes2tab.rb ADDED Viewed

@@ -0,0 +1,81 @@
+#!/usr/bin/env ruby
+#
+# genes2tab.rb - convert KEGG/GENES into tab delimited data for MySQL
+#
+#  Usage:
+#
+#    % genes2tab.rb /bio/db/kegg/genes/e.coli > genes_eco.tab
+#
+#   Copyright (C) 2001 KATAYAMA Toshiaki <k@bioruby.org>
+#
+#  This program is free software; you can redistribute it and/or modify
+#  it under the terms of the GNU General Public License as published by
+#  the Free Software Foundation; either version 2 of the License, or
+#  (at your option) any later version.
+#
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+#
+#  $Id: genes2tab.rb,v 0.5 2002/06/23 20:21:56 k Exp $
+#
+require 'bio/db/kegg/genes'
+include Bio
+while entry = gets(KEGG::GENES::DELIMITER)
+  genes = KEGG::GENES.new(entry)
+  db = genes.dblinks.inspect
+  if genes.codon_usage.length == 64
+    cu = genes.codon_usage.join(' ')
+  else
+    cu = '\N'
+  end
+  ary = [
+    genes.entry_id,
+    genes.division,
+    genes.organism,
+    genes.name,
+    genes.definition,
+    genes.keggclass,
+    genes.position,
+    db,
+    cu,
+    genes.aalen,
+    genes.aaseq,
+    genes.nalen,
+    genes.naseq
+  ]
+  puts ary.join("\t")
+end
+=begin
+CREATE DATABASE IF NOT EXISTS db_name;
+CREATE TABLE IF NOT EXISTS db_name.genes (
+	id		varchar(30)	not NULL,	# ENTRY ID
+	division	varchar(30),			# CDS, tRNA etc.
+	organism	varchar(255),
+	gene		varchar(255),
+	definition	varchar(255),
+	keggclass	varchar(255),
+	position	varchar(255),
+	dblinks		varchar(255),
+	codon_usage	text,
+	aalen		integer,
+	aaseq		text,
+	nalen		integer,
+	naseq		text
+);
+LOAD DATA LOCAL INFILE 'genes.tab' INTO TABLE db_name.genes;
+=end

data/sample/genome2rb.rb ADDED Viewed

@@ -0,0 +1,29 @@
+#!/usr/bin/env ruby
+#
+# genome2rb.rb - used to generate contents of the bio/data/keggorg.rb
+#
+#  Usage:
+#
+#    % genome2rb.rb genome | sort
+#
+#   Copyright (C) 2002 KATAYAMA Toshiaki <k@bioruby.org>
+#
+#  This program is free software; you can redistribute it and/or modify
+#  it under the terms of the GNU General Public License as published by
+#  the Free Software Foundation; either version 2 of the License, or
+#  (at your option) any later version.
+#
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+#
+#  $Id: genome2rb.rb,v 1.1 2002/03/04 08:14:45 katayama Exp $
+#
+require 'bio'
+Bio::FlatFile.new(Bio::KEGG::GENOME,ARGF).each do |x|
+  puts "    '#{x.entry_id}' => [ '#{x.name}', '#{x.definition}' ],"
+end

data/sample/genome2tab.rb ADDED Viewed

@@ -0,0 +1,76 @@
+#!/usr/bin/env ruby
+#
+# genome2tab.rb - convert KEGG/GENOME into tab delimited data for MySQL
+#
+#  Usage:
+#
+#    % genome2tab.rb /bio/db/kegg/genome/genome > genome.tab
+#
+#   Copyright (C) 2001 KATAYAMA Toshiaki <k@bioruby.org>
+#
+#  This program is free software; you can redistribute it and/or modify
+#  it under the terms of the GNU General Public License as published by
+#  the Free Software Foundation; either version 2 of the License, or
+#  (at your option) any later version.
+#
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+#
+#  $Id: genome2tab.rb,v 0.5 2002/06/23 20:21:56 k Exp $
+#
+require 'bio/db/kegg/genome'
+include Bio
+while entry = gets(KEGG::GENOME::DELIMITER)
+  genome = KEGG::GENOME.new(entry)
+  ref = genome.references.inspect
+  chr = genome.chromosomes.inspect
+  ary = [
+    genome.entry_id,
+    genome.name,
+    genome.definition,
+    genome.taxid,
+    genome.taxonomy,
+    genome.comment,
+    ref,
+    chr,
+    genome.nalen,
+    genome.num_gene,
+    genome.num_rna,
+    genome.gc,
+    genome.genomemap,
+  ]
+  puts ary.join("\t")
+end
+=begin
+CREATE DATABASE IF NOT EXISTS db_name;
+CREATE TABLE IF NOT EXISTS db_name.genome (
+	id		varchar(30)	not NULL,
+	name		varchar(80),
+	definition	varchar(255),
+	taxid		varchar(30),
+	taxonomy	varchar(255),
+	comment		varchar(255),
+	reference	text,
+	chromosome	text,
+	nalen		integer,
+	num_gene	integer,
+	num_rna		integer,
+	gc		float,
+	genomemap	varchar(30),
+);
+LOAD DATA LOCAL INFILE 'genome.tab' INTO TABLE db_name.genome;
+=end

data/sample/goslim.rb ADDED Viewed

@@ -0,0 +1,311 @@
+#!/usr/bin/env ruby
+#
+# goslim.rb - making a GO slim histgram
+#
+#  Usage:
+#
+#    % goslim.rb -p process.ontology -f function.ontology \
+#       -c component.ontology -s goslim_goa.2002 -g gene_association.mgi \
+#       -o mgi -r
+#    % R < mgi.R
+#    % gv mgi.pdf
+#
+#   Copyright (C) 2003 Mitsuteru C. Nakao <n@bioruby.org>
+#
+#  This program is free software; you can redistribute it and/or modify
+#  it under the terms of the GNU General Public License as published by
+#  the Free Software Foundation; either version 2 of the License, or
+#  (at your option) any later version.
+#
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+#
+#  $Id: goslim.rb,v 1.3 2003/05/13 10:45:42 n Exp $
+#
+SCRIPT_VERSION = '$Id: goslim.rb,v 1.3 2003/05/13 10:45:42 n Exp $'
+USAGE = "${__FILE__} - GO slim
+Usage:
+  #{__FILE__} -p process.ontology -f function.ontology \
+     -c component.ontolgy -g gene_association.mgi -s goslim_goa.2002 \
+     -o goslim.uniqued.out -r
+  #{__FILE__} -p process.ontology -f function.ontology \
+     -c component.ontolgy -l gene_association.list -s goslim_goa.2002 \
+     -o mgi.out -r
+  #{__FILE__} -p process.ontology -f function.ontology \
+     -c component.ontolgy -g gene_association.mgi -s goslim_goa.2002 >\
+     go_goslit.paired.list
+Options;
+ -p,--process <go/ontology/process.ontology>
+ -f,--function <go/ontology/function.ontolgoy>
+ -c,--component <go/ontology/component.ontology>
+ -g,--ga <go/gene-associations/gene_association.someone>
+ -l,--galist <a GO_ID list>
+ -s,--goslim <go/GO_slim/goslim_someone>
+ -o,--output <file_name> -- output file name.
+ -r,--r_script -- Writing a R script in <file_name>.R to plot a barplot.
+ -h,--help
+ -v,--version
+Format:
+  GO ID list: /^GO:\d{7}/ for each line
+Mitsuteru C. Nakao <n@bioruby.org>
+"
+require 'getoptlong'
+parser = GetoptLong.new
+parser.set_options(
+		   ['--process',   '-p', GetoptLong::REQUIRED_ARGUMENT],
+		   ['--function',  '-f', GetoptLong::REQUIRED_ARGUMENT],
+		   ['--component', '-c', GetoptLong::REQUIRED_ARGUMENT],
+		   ['--ga',        '-g', GetoptLong::REQUIRED_ARGUMENT],
+		   ['--galist',    '-l', GetoptLong::REQUIRED_ARGUMENT],
+		   ['--goslim',    '-s', GetoptLong::REQUIRED_ARGUMENT],
+		   ['--output',    '-o', GetoptLong::REQUIRED_ARGUMENT],
+		   ['--r_script',  '-r', GetoptLong::NO_ARGUMENT],
+		   ['--help',      '-h', GetoptLong::NO_ARGUMENT],
+		   ['--version',   '-v', GetoptLong::NO_ARGUMENT])
+begin
+  parser.each_option do |name, arg|
+    eval "$OPT_#{name.sub(/^--/, '').gsub(/-/, '_').upcase} = '#{arg}'"
+  end
+rescue
+  exit(1)
+end
+if $OPT_VERSION
+  puts SCRIPT_VERSION
+  exit(0)
+end
+if $OPT_HELP or !($OPT_PROCESS or $OPT_FUNCTION or $OPT_COMPONENT or
+		  ($OPT_GA or $OPT_GALIST))
+  puts USAGE
+  exit(0)
+end
+# subroutines
+def slim2r(datname)
+  tmp = "# usage: % R --vanilla < #{datname}.R
+data <- read.delim2('#{datname}')
+dat <- data$count
+names(dat) <- paste(data$GO.Term, dat)
+# set graphc format
+pdf('#{datname}.pdf')
+#postscript('#{datname}.ps')
+# outside margins
+par(mai = c(1,2.8,1,0.7))
+barplot(dat,
+        cex.names = 0.6,  # row names font size
+        las = 2,          # set horizontal row names
+        horiz = T,        # set horizontal
+        main = 'GO slim', # main title
+        # set color schema, proc, blue(3); func, red(2); comp, green(4)
+        col = cbind(c(data$aspect == 'process'),
+		    c(data$aspect == 'function'),
+                    c(data$aspect == 'component')) %*% c(4,2,3)) # color
+dev.off()
+"
+end
+# build GOslim uniqued list
+def slim(ontology, slim_ids, tmp, ga, aspect)
+  tmp[aspect] = Hash.new(0)
+  slim_ids.each {|slim_id|
+    term = ontology.goid2term(slim_id)
+    if term
+      tmp[aspect][term] = 0
+    else
+      next
+    end
+    ga.each {|gaid|
+      begin
+	res = ontology.bfs_shortest_path(slim_id, gaid)
+	tmp[aspect][term] += 1 if res[0]
+      rescue NameError
+	$stderr.puts "Warnning: GO:#{slim_id} (#{term}) doesn't exist in the #{aspect}.ontology."
+	tmp[aspect].delete(term)
+	break
+      end
+    }
+  }
+end
+# build GO-GOslim uniqued list
+def slim2(ontology, slim_ids, tmp, ga, aspect)
+  tmp[aspect] = Hash.new
+  slim_ids.each {|slim_id|
+    term = ontology.goid2term(slim_id)
+    if term
+      begin
+	unless tmp[aspect][term]['GOslim'].index(slim_id)
+	  tmp[aspect][term]['GOslim'] << slim_id
+	end
+      rescue NameError
+	tmp[aspect][term] = {'GOslim'=>[slim_id], 'GO'=>[]}
+      end
+    else
+      next
+    end
+    ga.each {|gaid|
+      begin
+	res = ontology.bfs_shortest_path(slim_id, gaid)
+	tmp[aspect][term]['GO'] << gaid if res[0]
+      rescue NameError
+	break
+      end
+    }
+  }
+end
+#
+# main
+#
+require 'bio/db/go'
+aspects = ['process', 'function', 'component']
+rootids = {
+  'process'   => '0008150',
+  'function'  => '0003674',
+  'component' => '0005575'}
+# files open
+ios = {}
+files = {
+  'process'   => $OPT_PROCESS,
+  'function'  => $OPT_FUNCTION,
+  'component' => $OPT_COMPONENT,
+  'ga'   => $OPT_GA,            # gene-association
+  'list' => $OPT_GALIST,        # gene-association list
+  'slim' => $OPT_GOSLIM}        # GO slim
+files.each {|k, file_name|
+  next if file_name == nil
+  ios[k] = File.open(file_name)
+}
+if $OPT_OUTPUT
+  ios['output']   = File.new($OPT_OUTPUT, "w+")
+  ios['r_script'] = File.new("#{$OPT_OUTPUT}.R", "w+")
+else
+  ios['r_script'] = ios['output'] = $stdout
+end
+# start
+# ontology
+ontology = {}
+aspects.each {|aspect|
+  ontology[aspect] = Bio::GO::Ontology.new(ios[aspect].read)
+}
+# GO slim
+goslim = Bio::GO::Ontology.new(ios['slim'].read)
+# assign a aspect to terms in the GO slim.
+slim_ids = Hash.new([])
+goslim.to_list.map {|ent| ent.node }.flatten.uniq.each {|goid|
+  rootids.each {|aspect, rootid|
+    begin
+      a,b = ontology[aspect].bfs_shortest_path(rootid, goid)
+      slim_ids[aspect] << goid
+    rescue NameError
+      $stderr.puts "Error: (#{rootid}, #{goid})"
+    end
+  }
+}
+# gene-associations
+ga_ids = []
+if $OPT_GA
+  ga = Bio::GO::GeneAssociation.parser(ios['ga'].read)
+  ga_ids = ga.map {|ent| ent.goid }
+elsif $OPT_GALIST
+  while line = ios['list'].gets
+    if /^GO:(\d{7})/ =~ line
+      goid = $1
+      ga_ids << goid
+    end
+  end
+else
+  puts "Error: -l or -g options"
+  exit
+end
+# count number
+count = Hash.new(0)
+aspects.each {|aspect|
+  slim2(ontology[aspect], slim_ids[aspect], count, ga_ids, aspect)
+}
+# output
+if $OPT_R_SCRIPT and $OPT_OUTPUT
+  tmp = [['aspect', 'count', 'GO Term'].join("\t")]
+else
+  tmp = [['aspect', 'GO ID', 'GOslim Term', 'GOslim ID'].join("\t")]
+end
+['component','function','process'].each {|aspect|
+  count[aspect].sort {|a, b| b[1]['GO'].size <=> a[1]['GO'].size }.each {|term, value|
+    next if term == ""
+    if $OPT_R_SCRIPT and $OPT_OUTPUT
+      tmp << [aspect, value['GO'].size, term].join("\t")
+    else
+      value['GO'].each {|goid|
+	tmp << [aspect, "GO:#{goid}", term,
+	  value['GOslim'].map {|e| "GO:#{e}" }.join(' ')].join("\t")
+      }
+    end
+  }
+}
+ios['output'].puts tmp.join("\n")
+if $OPT_R_SCRIPT and $OPT_OUTPUT
+  ios['r_script'].puts slim2r($OPT_OUTPUT)
+end
+#