bio 1.3.1 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +2105 -3728
- data/KNOWN_ISSUES.rdoc +35 -3
- data/README.rdoc +8 -2
- data/RELEASE_NOTES.rdoc +166 -0
- data/bin/bioruby +4 -1
- data/bioruby.gemspec +146 -1
- data/bioruby.gemspec.erb +3 -1
- data/doc/ChangeLog-before-1.3.1 +3961 -0
- data/doc/Tutorial.rd +154 -22
- data/doc/Tutorial.rd.html +125 -68
- data/lib/bio.rb +21 -6
- data/lib/bio/appl/bl2seq/report.rb +11 -202
- data/lib/bio/appl/blast/format0.rb +0 -193
- data/lib/bio/appl/blast/report.rb +2 -147
- data/lib/bio/appl/blast/wublast.rb +0 -208
- data/lib/bio/appl/fasta.rb +4 -19
- data/lib/bio/appl/fasta/format10.rb +0 -14
- data/lib/bio/appl/genscan/report.rb +0 -176
- data/lib/bio/appl/hmmer.rb +1 -15
- data/lib/bio/appl/hmmer/report.rb +0 -100
- data/lib/bio/appl/meme/mast.rb +156 -0
- data/lib/bio/appl/meme/mast/report.rb +91 -0
- data/lib/bio/appl/meme/motif.rb +48 -0
- data/lib/bio/appl/psort.rb +0 -111
- data/lib/bio/appl/psort/report.rb +1 -45
- data/lib/bio/appl/pts1.rb +2 -4
- data/lib/bio/appl/sosui/report.rb +5 -54
- data/lib/bio/appl/targetp/report.rb +1 -104
- data/lib/bio/appl/tmhmm/report.rb +0 -36
- data/lib/bio/command.rb +94 -10
- data/lib/bio/data/aa.rb +1 -77
- data/lib/bio/data/codontable.rb +1 -95
- data/lib/bio/data/na.rb +1 -26
- data/lib/bio/db/aaindex.rb +1 -38
- data/lib/bio/db/fasta.rb +1 -134
- data/lib/bio/db/fasta/format_qual.rb +204 -0
- data/lib/bio/db/fasta/qual.rb +102 -0
- data/lib/bio/db/fastq.rb +645 -0
- data/lib/bio/db/fastq/fastq_to_biosequence.rb +40 -0
- data/lib/bio/db/fastq/format_fastq.rb +175 -0
- data/lib/bio/db/genbank/genbank.rb +1 -86
- data/lib/bio/db/gff.rb +0 -17
- data/lib/bio/db/go.rb +4 -72
- data/lib/bio/db/kegg/common.rb +112 -0
- data/lib/bio/db/kegg/compound.rb +29 -20
- data/lib/bio/db/kegg/drug.rb +74 -34
- data/lib/bio/db/kegg/enzyme.rb +26 -5
- data/lib/bio/db/kegg/genes.rb +128 -15
- data/lib/bio/db/kegg/genome.rb +3 -41
- data/lib/bio/db/kegg/glycan.rb +19 -24
- data/lib/bio/db/kegg/orthology.rb +16 -56
- data/lib/bio/db/kegg/reaction.rb +81 -28
- data/lib/bio/db/kegg/taxonomy.rb +1 -52
- data/lib/bio/db/litdb.rb +1 -16
- data/lib/bio/db/phyloxml/phyloxml.xsd +582 -0
- data/lib/bio/db/phyloxml/phyloxml_elements.rb +1174 -0
- data/lib/bio/db/phyloxml/phyloxml_parser.rb +954 -0
- data/lib/bio/db/phyloxml/phyloxml_writer.rb +228 -0
- data/lib/bio/db/prosite.rb +2 -95
- data/lib/bio/db/rebase.rb +5 -6
- data/lib/bio/db/sanger_chromatogram/abif.rb +120 -0
- data/lib/bio/db/sanger_chromatogram/chromatogram.rb +133 -0
- data/lib/bio/db/sanger_chromatogram/chromatogram_to_biosequence.rb +32 -0
- data/lib/bio/db/sanger_chromatogram/scf.rb +210 -0
- data/lib/bio/io/das.rb +0 -44
- data/lib/bio/io/ddbjxml.rb +1 -181
- data/lib/bio/io/flatfile.rb +1 -7
- data/lib/bio/io/flatfile/autodetection.rb +6 -0
- data/lib/bio/io/keggapi.rb +0 -442
- data/lib/bio/io/ncbirest.rb +130 -132
- data/lib/bio/io/ncbisoap.rb +2 -1
- data/lib/bio/io/pubmed.rb +0 -88
- data/lib/bio/location.rb +0 -73
- data/lib/bio/pathway.rb +0 -171
- data/lib/bio/sequence.rb +18 -1
- data/lib/bio/sequence/adapter.rb +3 -0
- data/lib/bio/sequence/format.rb +16 -0
- data/lib/bio/sequence/quality_score.rb +205 -0
- data/lib/bio/tree.rb +70 -5
- data/lib/bio/util/restriction_enzyme/single_strand.rb +3 -2
- data/lib/bio/util/sirna.rb +1 -23
- data/lib/bio/version.rb +1 -1
- data/sample/demo_aaindex.rb +67 -0
- data/sample/demo_aminoacid.rb +101 -0
- data/sample/demo_bl2seq_report.rb +220 -0
- data/sample/demo_blast_report.rb +285 -0
- data/sample/demo_codontable.rb +119 -0
- data/sample/demo_das.rb +105 -0
- data/sample/demo_ddbjxml.rb +212 -0
- data/sample/demo_fasta_remote.rb +51 -0
- data/sample/demo_fastaformat.rb +105 -0
- data/sample/demo_genbank.rb +132 -0
- data/sample/demo_genscan_report.rb +202 -0
- data/sample/demo_gff1.rb +49 -0
- data/sample/demo_go.rb +98 -0
- data/sample/demo_hmmer_report.rb +149 -0
- data/sample/demo_kegg_compound.rb +57 -0
- data/sample/demo_kegg_drug.rb +65 -0
- data/sample/demo_kegg_genome.rb +74 -0
- data/sample/demo_kegg_glycan.rb +72 -0
- data/sample/demo_kegg_orthology.rb +62 -0
- data/sample/demo_kegg_reaction.rb +66 -0
- data/sample/demo_kegg_taxonomy.rb +92 -0
- data/sample/demo_keggapi.rb +502 -0
- data/sample/demo_litdb.rb +42 -0
- data/sample/demo_locations.rb +99 -0
- data/sample/demo_ncbi_rest.rb +130 -0
- data/sample/demo_nucleicacid.rb +49 -0
- data/sample/demo_pathway.rb +196 -0
- data/sample/demo_prosite.rb +120 -0
- data/sample/demo_psort.rb +138 -0
- data/sample/demo_psort_report.rb +70 -0
- data/sample/demo_pubmed.rb +118 -0
- data/sample/demo_sirna.rb +63 -0
- data/sample/demo_sosui_report.rb +89 -0
- data/sample/demo_targetp_report.rb +135 -0
- data/sample/demo_tmhmm_report.rb +68 -0
- data/sample/pmfetch.rb +13 -4
- data/sample/pmsearch.rb +15 -4
- data/sample/test_phyloxml_big.rb +205 -0
- data/test/bioruby_test_helper.rb +61 -0
- data/test/data/KEGG/1.1.1.1.enzyme +935 -0
- data/test/data/KEGG/C00025.compound +102 -0
- data/test/data/KEGG/D00063.drug +104 -0
- data/test/data/KEGG/G00024.glycan +47 -0
- data/test/data/KEGG/G01366.glycan +18 -0
- data/test/data/KEGG/K02338.orthology +902 -0
- data/test/data/KEGG/R00006.reaction +14 -0
- data/test/data/fastq/README.txt +109 -0
- data/test/data/fastq/error_diff_ids.fastq +20 -0
- data/test/data/fastq/error_double_qual.fastq +22 -0
- data/test/data/fastq/error_double_seq.fastq +22 -0
- data/test/data/fastq/error_long_qual.fastq +20 -0
- data/test/data/fastq/error_no_qual.fastq +20 -0
- data/test/data/fastq/error_qual_del.fastq +20 -0
- data/test/data/fastq/error_qual_escape.fastq +20 -0
- data/test/data/fastq/error_qual_null.fastq +0 -0
- data/test/data/fastq/error_qual_space.fastq +21 -0
- data/test/data/fastq/error_qual_tab.fastq +21 -0
- data/test/data/fastq/error_qual_unit_sep.fastq +20 -0
- data/test/data/fastq/error_qual_vtab.fastq +20 -0
- data/test/data/fastq/error_short_qual.fastq +20 -0
- data/test/data/fastq/error_spaces.fastq +20 -0
- data/test/data/fastq/error_tabs.fastq +21 -0
- data/test/data/fastq/error_trunc_at_plus.fastq +19 -0
- data/test/data/fastq/error_trunc_at_qual.fastq +19 -0
- data/test/data/fastq/error_trunc_at_seq.fastq +18 -0
- data/test/data/fastq/error_trunc_in_plus.fastq +19 -0
- data/test/data/fastq/error_trunc_in_qual.fastq +20 -0
- data/test/data/fastq/error_trunc_in_seq.fastq +18 -0
- data/test/data/fastq/error_trunc_in_title.fastq +17 -0
- data/test/data/fastq/illumina_full_range_as_illumina.fastq +8 -0
- data/test/data/fastq/illumina_full_range_as_sanger.fastq +8 -0
- data/test/data/fastq/illumina_full_range_as_solexa.fastq +8 -0
- data/test/data/fastq/illumina_full_range_original_illumina.fastq +8 -0
- data/test/data/fastq/longreads_as_illumina.fastq +40 -0
- data/test/data/fastq/longreads_as_sanger.fastq +40 -0
- data/test/data/fastq/longreads_as_solexa.fastq +40 -0
- data/test/data/fastq/longreads_original_sanger.fastq +120 -0
- data/test/data/fastq/misc_dna_as_illumina.fastq +16 -0
- data/test/data/fastq/misc_dna_as_sanger.fastq +16 -0
- data/test/data/fastq/misc_dna_as_solexa.fastq +16 -0
- data/test/data/fastq/misc_dna_original_sanger.fastq +16 -0
- data/test/data/fastq/misc_rna_as_illumina.fastq +16 -0
- data/test/data/fastq/misc_rna_as_sanger.fastq +16 -0
- data/test/data/fastq/misc_rna_as_solexa.fastq +16 -0
- data/test/data/fastq/misc_rna_original_sanger.fastq +16 -0
- data/test/data/fastq/sanger_full_range_as_illumina.fastq +8 -0
- data/test/data/fastq/sanger_full_range_as_sanger.fastq +8 -0
- data/test/data/fastq/sanger_full_range_as_solexa.fastq +8 -0
- data/test/data/fastq/sanger_full_range_original_sanger.fastq +8 -0
- data/test/data/fastq/solexa_full_range_as_illumina.fastq +8 -0
- data/test/data/fastq/solexa_full_range_as_sanger.fastq +8 -0
- data/test/data/fastq/solexa_full_range_as_solexa.fastq +8 -0
- data/test/data/fastq/solexa_full_range_original_solexa.fastq +8 -0
- data/test/data/fastq/wrapping_as_illumina.fastq +12 -0
- data/test/data/fastq/wrapping_as_sanger.fastq +12 -0
- data/test/data/fastq/wrapping_as_solexa.fastq +12 -0
- data/test/data/fastq/wrapping_original_sanger.fastq +24 -0
- data/test/data/meme/db +0 -0
- data/test/data/meme/mast +0 -0
- data/test/data/meme/mast.out +13 -0
- data/test/data/meme/meme.out +3 -0
- data/test/data/phyloxml/apaf.xml +666 -0
- data/test/data/phyloxml/bcl_2.xml +2097 -0
- data/test/data/phyloxml/made_up.xml +144 -0
- data/test/data/phyloxml/ncbi_taxonomy_mollusca_short.xml +65 -0
- data/test/data/phyloxml/phyloxml_examples.xml +415 -0
- data/test/data/sanger_chromatogram/test_chromatogram_abif.ab1 +0 -0
- data/test/data/sanger_chromatogram/test_chromatogram_scf_v2.scf +0 -0
- data/test/data/sanger_chromatogram/test_chromatogram_scf_v3.scf +0 -0
- data/test/functional/bio/appl/test_pts1.rb +7 -5
- data/test/functional/bio/io/test_ensembl.rb +4 -3
- data/test/functional/bio/io/test_pubmed.rb +9 -3
- data/test/functional/bio/io/test_soapwsdl.rb +5 -4
- data/test/functional/bio/io/test_togows.rb +5 -4
- data/test/functional/bio/sequence/test_output_embl.rb +6 -4
- data/test/functional/bio/test_command.rb +54 -5
- data/test/runner.rb +5 -3
- data/test/unit/bio/appl/bl2seq/test_report.rb +5 -4
- data/test/unit/bio/appl/blast/test_ncbioptions.rb +4 -2
- data/test/unit/bio/appl/blast/test_report.rb +5 -4
- data/test/unit/bio/appl/blast/test_rpsblast.rb +5 -4
- data/test/unit/bio/appl/gcg/test_msf.rb +5 -5
- data/test/unit/bio/appl/genscan/test_report.rb +8 -9
- data/test/unit/bio/appl/hmmer/test_report.rb +5 -4
- data/test/unit/bio/appl/iprscan/test_report.rb +6 -5
- data/test/unit/bio/appl/mafft/test_report.rb +6 -5
- data/test/unit/bio/appl/meme/mast/test_report.rb +46 -0
- data/test/unit/bio/appl/meme/test_mast.rb +103 -0
- data/test/unit/bio/appl/meme/test_motif.rb +38 -0
- data/test/unit/bio/appl/paml/codeml/test_rates.rb +5 -4
- data/test/unit/bio/appl/paml/codeml/test_report.rb +5 -4
- data/test/unit/bio/appl/paml/test_codeml.rb +5 -4
- data/test/unit/bio/appl/sim4/test_report.rb +5 -4
- data/test/unit/bio/appl/sosui/test_report.rb +6 -5
- data/test/unit/bio/appl/targetp/test_report.rb +5 -3
- data/test/unit/bio/appl/test_blast.rb +5 -4
- data/test/unit/bio/appl/test_fasta.rb +4 -2
- data/test/unit/bio/appl/test_pts1.rb +4 -2
- data/test/unit/bio/appl/tmhmm/test_report.rb +6 -5
- data/test/unit/bio/data/test_aa.rb +5 -3
- data/test/unit/bio/data/test_codontable.rb +5 -4
- data/test/unit/bio/data/test_na.rb +5 -3
- data/test/unit/bio/db/biosql/tc_biosql.rb +5 -1
- data/test/unit/bio/db/embl/test_common.rb +4 -2
- data/test/unit/bio/db/embl/test_embl.rb +6 -6
- data/test/unit/bio/db/embl/test_embl_rel89.rb +6 -6
- data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +7 -8
- data/test/unit/bio/db/embl/test_sptr.rb +6 -8
- data/test/unit/bio/db/embl/test_uniprot.rb +6 -5
- data/test/unit/bio/db/fasta/test_format_qual.rb +346 -0
- data/test/unit/bio/db/kegg/test_compound.rb +146 -0
- data/test/unit/bio/db/kegg/test_drug.rb +194 -0
- data/test/unit/bio/db/kegg/test_enzyme.rb +241 -0
- data/test/unit/bio/db/kegg/test_genes.rb +32 -4
- data/test/unit/bio/db/kegg/test_glycan.rb +260 -0
- data/test/unit/bio/db/kegg/test_orthology.rb +50 -0
- data/test/unit/bio/db/kegg/test_reaction.rb +96 -0
- data/test/unit/bio/db/pdb/test_pdb.rb +4 -2
- data/test/unit/bio/db/sanger_chromatogram/test_abif.rb +76 -0
- data/test/unit/bio/db/sanger_chromatogram/test_scf.rb +98 -0
- data/test/unit/bio/db/test_aaindex.rb +6 -6
- data/test/unit/bio/db/test_fasta.rb +5 -46
- data/test/unit/bio/db/test_fastq.rb +829 -0
- data/test/unit/bio/db/test_gff.rb +4 -2
- data/test/unit/bio/db/test_lasergene.rb +7 -5
- data/test/unit/bio/db/test_medline.rb +4 -2
- data/test/unit/bio/db/test_newick.rb +6 -6
- data/test/unit/bio/db/test_nexus.rb +4 -2
- data/test/unit/bio/db/test_phyloxml.rb +769 -0
- data/test/unit/bio/db/test_phyloxml_writer.rb +328 -0
- data/test/unit/bio/db/test_prosite.rb +6 -5
- data/test/unit/bio/db/test_qual.rb +63 -0
- data/test/unit/bio/db/test_rebase.rb +5 -3
- data/test/unit/bio/db/test_soft.rb +7 -6
- data/test/unit/bio/io/flatfile/test_autodetection.rb +6 -7
- data/test/unit/bio/io/flatfile/test_buffer.rb +6 -5
- data/test/unit/bio/io/flatfile/test_splitter.rb +4 -4
- data/test/unit/bio/io/test_ddbjxml.rb +4 -3
- data/test/unit/bio/io/test_ensembl.rb +5 -3
- data/test/unit/bio/io/test_fastacmd.rb +4 -3
- data/test/unit/bio/io/test_flatfile.rb +6 -5
- data/test/unit/bio/io/test_soapwsdl.rb +4 -3
- data/test/unit/bio/io/test_togows.rb +4 -2
- data/test/unit/bio/sequence/test_aa.rb +5 -3
- data/test/unit/bio/sequence/test_common.rb +4 -2
- data/test/unit/bio/sequence/test_compat.rb +4 -2
- data/test/unit/bio/sequence/test_dblink.rb +5 -3
- data/test/unit/bio/sequence/test_na.rb +4 -2
- data/test/unit/bio/sequence/test_quality_score.rb +330 -0
- data/test/unit/bio/shell/plugin/test_seq.rb +5 -3
- data/test/unit/bio/test_alignment.rb +5 -3
- data/test/unit/bio/test_command.rb +4 -3
- data/test/unit/bio/test_db.rb +5 -3
- data/test/unit/bio/test_feature.rb +4 -2
- data/test/unit/bio/test_location.rb +4 -2
- data/test/unit/bio/test_map.rb +5 -3
- data/test/unit/bio/test_pathway.rb +4 -2
- data/test/unit/bio/test_reference.rb +4 -2
- data/test/unit/bio/test_sequence.rb +5 -3
- data/test/unit/bio/test_shell.rb +5 -3
- data/test/unit/bio/test_tree.rb +6 -6
- data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +17 -13
- data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +17 -13
- data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +4 -2
- data/test/unit/bio/util/test_color_scheme.rb +5 -3
- data/test/unit/bio/util/test_contingency_table.rb +5 -3
- data/test/unit/bio/util/test_restriction_enzyme.rb +4 -2
- data/test/unit/bio/util/test_sirna.rb +6 -4
- metadata +147 -2
data/lib/bio/db/kegg/genome.rb
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
# Copyright:: Copyright (C) 2001, 2002, 2007 Toshiaki Katayama <k@bioruby.org>
|
|
5
5
|
# License:: The Ruby License
|
|
6
6
|
#
|
|
7
|
-
# $Id
|
|
7
|
+
# $Id:$
|
|
8
8
|
#
|
|
9
9
|
|
|
10
10
|
require 'bio/db'
|
|
@@ -19,6 +19,8 @@ class KEGG
|
|
|
19
19
|
# == References
|
|
20
20
|
#
|
|
21
21
|
# * ftp://ftp.genome.jp/pub/kegg/genomes/genome
|
|
22
|
+
# * http://www.genome.jp/dbget-bin/www_bfind?genome
|
|
23
|
+
# * http://www.genome.jp/kegg/catalog/org_list.html
|
|
22
24
|
#
|
|
23
25
|
class GENOME < KEGGDB
|
|
24
26
|
|
|
@@ -199,43 +201,3 @@ end # GENOME
|
|
|
199
201
|
end # KEGG
|
|
200
202
|
end # Bio
|
|
201
203
|
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
if __FILE__ == $0
|
|
205
|
-
|
|
206
|
-
begin
|
|
207
|
-
require 'pp'
|
|
208
|
-
def p(arg); pp(arg); end
|
|
209
|
-
rescue LoadError
|
|
210
|
-
end
|
|
211
|
-
|
|
212
|
-
require 'bio/io/flatfile'
|
|
213
|
-
|
|
214
|
-
ff = Bio::FlatFile.new(Bio::KEGG::GENOME, ARGF)
|
|
215
|
-
|
|
216
|
-
ff.each do |genome|
|
|
217
|
-
|
|
218
|
-
puts "### Tags"
|
|
219
|
-
p genome.tags
|
|
220
|
-
|
|
221
|
-
[
|
|
222
|
-
%w( ENTRY entry_id ),
|
|
223
|
-
%w( NAME name ),
|
|
224
|
-
%w( DEFINITION definition ),
|
|
225
|
-
%w( TAXONOMY taxonomy taxid lineage ),
|
|
226
|
-
%w( REFERENCE references ),
|
|
227
|
-
%w( CHROMOSOME chromosomes ),
|
|
228
|
-
%w( PLASMID plasmids ),
|
|
229
|
-
%w( STATISTICS statistics nalen num_gene num_rna ),
|
|
230
|
-
].each do |x|
|
|
231
|
-
puts "### " + x.shift
|
|
232
|
-
x.each do |m|
|
|
233
|
-
p genome.send(m)
|
|
234
|
-
end
|
|
235
|
-
end
|
|
236
|
-
|
|
237
|
-
end
|
|
238
|
-
|
|
239
|
-
end
|
|
240
|
-
|
|
241
|
-
|
data/lib/bio/db/kegg/glycan.rb
CHANGED
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
#
|
|
9
9
|
|
|
10
10
|
require 'bio/db'
|
|
11
|
+
require 'bio/db/kegg/common'
|
|
11
12
|
|
|
12
13
|
module Bio
|
|
13
14
|
class KEGG
|
|
@@ -17,6 +18,21 @@ class GLYCAN < KEGGDB
|
|
|
17
18
|
DELIMITER = RS = "\n///\n"
|
|
18
19
|
TAGSIZE = 12
|
|
19
20
|
|
|
21
|
+
include Common::DblinksAsHash
|
|
22
|
+
# Returns a Hash of the DB name and an Array of entry IDs in DBLINKS field.
|
|
23
|
+
def dblinks_as_hash; super; end if false #dummy for RDoc
|
|
24
|
+
alias dblinks dblinks_as_hash
|
|
25
|
+
|
|
26
|
+
include Common::PathwaysAsHash
|
|
27
|
+
# Returns a Hash of the pathway ID and name in PATHWAY field.
|
|
28
|
+
def pathways_as_hash; super; end if false #dummy for RDoc
|
|
29
|
+
alias pathways pathways_as_hash
|
|
30
|
+
|
|
31
|
+
include Common::OrthologsAsHash
|
|
32
|
+
# Returns a Hash of the orthology ID and definition in ORTHOLOGY field.
|
|
33
|
+
def orthologs_as_hash; super; end if false #dummy for RDoc
|
|
34
|
+
alias orthologs orthologs_as_hash
|
|
35
|
+
|
|
20
36
|
def initialize(entry)
|
|
21
37
|
super(entry, TAGSIZE)
|
|
22
38
|
end
|
|
@@ -73,7 +89,7 @@ class GLYCAN < KEGGDB
|
|
|
73
89
|
end
|
|
74
90
|
|
|
75
91
|
# PATHWAY
|
|
76
|
-
def
|
|
92
|
+
def pathways_as_strings
|
|
77
93
|
lines_fetch('PATHWAY')
|
|
78
94
|
end
|
|
79
95
|
|
|
@@ -91,7 +107,7 @@ class GLYCAN < KEGGDB
|
|
|
91
107
|
end
|
|
92
108
|
|
|
93
109
|
# ORTHOLOGY
|
|
94
|
-
def
|
|
110
|
+
def orthologs_as_strings
|
|
95
111
|
unless @data['ORTHOLOGY']
|
|
96
112
|
@data['ORTHOLOGY'] = lines_fetch('ORTHOLOGY')
|
|
97
113
|
end
|
|
@@ -126,7 +142,7 @@ class GLYCAN < KEGGDB
|
|
|
126
142
|
end
|
|
127
143
|
|
|
128
144
|
# DBLINKS
|
|
129
|
-
def
|
|
145
|
+
def dblinks_as_strings
|
|
130
146
|
unless @data['DBLINKS']
|
|
131
147
|
@data['DBLINKS'] = lines_fetch('DBLINKS')
|
|
132
148
|
end
|
|
@@ -143,24 +159,3 @@ end # GLYCAN
|
|
|
143
159
|
end # KEGG
|
|
144
160
|
end # Bio
|
|
145
161
|
|
|
146
|
-
|
|
147
|
-
if __FILE__ == $0
|
|
148
|
-
entry = ARGF.read # gl:G00024
|
|
149
|
-
gl = Bio::KEGG::GLYCAN.new(entry)
|
|
150
|
-
p gl.entry_id
|
|
151
|
-
p gl.name
|
|
152
|
-
p gl.composition
|
|
153
|
-
p gl.mass
|
|
154
|
-
p gl.keggclass
|
|
155
|
-
p gl.bindings
|
|
156
|
-
p gl.compounds
|
|
157
|
-
p gl.reactions
|
|
158
|
-
p gl.pathways
|
|
159
|
-
p gl.enzymes
|
|
160
|
-
p gl.orthologs
|
|
161
|
-
p gl.references
|
|
162
|
-
p gl.dblinks
|
|
163
|
-
p gl.kcf
|
|
164
|
-
end
|
|
165
|
-
|
|
166
|
-
|
|
@@ -5,10 +5,11 @@
|
|
|
5
5
|
# Copyright:: Copyright (C) 2003 Masumi Itoh <m@bioruby.org>
|
|
6
6
|
# License:: The Ruby License
|
|
7
7
|
#
|
|
8
|
-
# $Id
|
|
8
|
+
# $Id:$
|
|
9
9
|
#
|
|
10
10
|
|
|
11
11
|
require 'bio/db'
|
|
12
|
+
require 'bio/db/kegg/common'
|
|
12
13
|
|
|
13
14
|
module Bio
|
|
14
15
|
class KEGG
|
|
@@ -27,6 +28,16 @@ class ORTHOLOGY < KEGGDB
|
|
|
27
28
|
DELIMITER = RS = "\n///\n"
|
|
28
29
|
TAGSIZE = 12
|
|
29
30
|
|
|
31
|
+
include Common::DblinksAsHash
|
|
32
|
+
# Returns a Hash of the DB name and an Array of entry IDs in DBLINKS field.
|
|
33
|
+
def dblinks_as_hash; super; end if false #dummy for RDoc
|
|
34
|
+
alias dblinks dblinks_as_hash
|
|
35
|
+
|
|
36
|
+
include Common::GenesAsHash
|
|
37
|
+
# Returns a Hash of the organism ID and an Array of entry IDs in GENES field.
|
|
38
|
+
def genes_as_hash; super; end if false #dummy for RDoc
|
|
39
|
+
alias genes genes_as_hash
|
|
40
|
+
|
|
30
41
|
# Reads a flat file format entry of the KO database.
|
|
31
42
|
def initialize(entry)
|
|
32
43
|
super(entry, TAGSIZE)
|
|
@@ -68,69 +79,18 @@ class ORTHOLOGY < KEGGDB
|
|
|
68
79
|
end
|
|
69
80
|
|
|
70
81
|
# Returns an Array of a database name and entry IDs in DBLINKS field.
|
|
71
|
-
def
|
|
72
|
-
|
|
73
|
-
@data['DBLINKS'] = lines_fetch('DBLINKS')
|
|
74
|
-
end
|
|
75
|
-
@data['DBLINKS']
|
|
76
|
-
end
|
|
77
|
-
|
|
78
|
-
# Returns a Hash of the DB name and an Array of entry IDs in DBLINKS field.
|
|
79
|
-
def dblinks_as_hash
|
|
80
|
-
hash = {}
|
|
81
|
-
dblinks.each do |line|
|
|
82
|
-
name, *list = line.split(/\s+/)
|
|
83
|
-
db = name.downcase.sub(/:/, '')
|
|
84
|
-
hash[db] = list
|
|
85
|
-
end
|
|
86
|
-
return hash
|
|
82
|
+
def dblinks_as_strings
|
|
83
|
+
lines_fetch('DBLINKS')
|
|
87
84
|
end
|
|
88
85
|
|
|
89
86
|
# Returns an Array of the organism ID and entry IDs in GENES field.
|
|
90
|
-
def
|
|
91
|
-
|
|
92
|
-
@data['GENES'] = lines_fetch('GENES')
|
|
93
|
-
end
|
|
94
|
-
@data['GENES']
|
|
87
|
+
def genes_as_strings
|
|
88
|
+
lines_fetch('GENES')
|
|
95
89
|
end
|
|
96
90
|
|
|
97
|
-
# Returns a Hash of the organism ID and an Array of entry IDs in GENES field.
|
|
98
|
-
def genes_as_hash
|
|
99
|
-
hash = {}
|
|
100
|
-
genes.each do |line|
|
|
101
|
-
name, *list = line.split(/\s+/)
|
|
102
|
-
org = name.downcase.sub(/:/, '')
|
|
103
|
-
genes = list.map {|x| x.sub(/\(.*\)/, '')}
|
|
104
|
-
#names = list.map {|x| x.scan(/.*\((.*)\)/)}
|
|
105
|
-
hash[org] = genes
|
|
106
|
-
end
|
|
107
|
-
return hash
|
|
108
|
-
end
|
|
109
|
-
|
|
110
91
|
end # ORTHOLOGY
|
|
111
92
|
|
|
112
93
|
end # KEGG
|
|
113
94
|
end # Bio
|
|
114
95
|
|
|
115
96
|
|
|
116
|
-
|
|
117
|
-
if __FILE__ == $0
|
|
118
|
-
|
|
119
|
-
require 'bio/io/fetch'
|
|
120
|
-
|
|
121
|
-
flat = Bio::Fetch.query('ko', 'K00001')
|
|
122
|
-
entry = Bio::KEGG::ORTHOLOGY.new(flat)
|
|
123
|
-
|
|
124
|
-
p entry.entry_id
|
|
125
|
-
p entry.name
|
|
126
|
-
p entry.names
|
|
127
|
-
p entry.definition
|
|
128
|
-
p entry.keggclass
|
|
129
|
-
p entry.keggclasses
|
|
130
|
-
p entry.pathways
|
|
131
|
-
p entry.dblinks
|
|
132
|
-
p entry.genes
|
|
133
|
-
|
|
134
|
-
end
|
|
135
|
-
|
|
136
|
-
|
data/lib/bio/db/kegg/reaction.rb
CHANGED
|
@@ -2,12 +2,15 @@
|
|
|
2
2
|
# = bio/db/kegg/reaction.rb - KEGG REACTION database class
|
|
3
3
|
#
|
|
4
4
|
# Copyright:: Copyright (C) 2004 Toshiaki Katayama <k@bioruby.org>
|
|
5
|
+
# Copyright:: Copyright (C) 2009 Kozo Nishida <kozo-ni@is.naist.jp>
|
|
5
6
|
# License:: The Ruby License
|
|
6
7
|
#
|
|
7
|
-
# $Id
|
|
8
|
+
# $Id:$
|
|
8
9
|
#
|
|
9
10
|
|
|
10
11
|
require 'bio/db'
|
|
12
|
+
require 'bio/db/kegg/common'
|
|
13
|
+
require 'enumerator'
|
|
11
14
|
|
|
12
15
|
module Bio
|
|
13
16
|
class KEGG
|
|
@@ -17,44 +20,100 @@ class REACTION < KEGGDB
|
|
|
17
20
|
DELIMITER = RS = "\n///\n"
|
|
18
21
|
TAGSIZE = 12
|
|
19
22
|
|
|
23
|
+
include Common::PathwaysAsHash
|
|
24
|
+
# Returns a Hash of the pathway ID and name in PATHWAY field.
|
|
25
|
+
def pathways_as_hash; super; end if false #dummy for RDoc
|
|
26
|
+
alias pathways pathways_as_hash
|
|
27
|
+
|
|
28
|
+
include Common::OrthologsAsHash
|
|
29
|
+
# Returns a Hash of the orthology ID and definition in ORTHOLOGY field.
|
|
30
|
+
def orthologs_as_hash; super; end if false #dummy for RDoc
|
|
31
|
+
alias orthologs orthologs_as_hash
|
|
32
|
+
|
|
33
|
+
# Creates a new Bio::KEGG::REACTION object.
|
|
34
|
+
# ---
|
|
35
|
+
# *Arguments*:
|
|
36
|
+
# * (required) _entry_: (String) single entry as a string
|
|
37
|
+
# *Returns*:: Bio::KEGG::REACTION object
|
|
20
38
|
def initialize(entry)
|
|
21
39
|
super(entry, TAGSIZE)
|
|
22
40
|
end
|
|
23
41
|
|
|
24
|
-
# ENTRY
|
|
42
|
+
# ID of the entry, described in the ENTRY line.
|
|
43
|
+
# ---
|
|
44
|
+
# *Returns*:: String
|
|
25
45
|
def entry_id
|
|
26
46
|
field_fetch('ENTRY')[/\S+/]
|
|
27
47
|
end
|
|
28
48
|
|
|
29
|
-
# NAME
|
|
49
|
+
# Name of the reaction, described in the NAME line.
|
|
50
|
+
# ---
|
|
51
|
+
# *Returns*:: String
|
|
30
52
|
def name
|
|
31
|
-
field_fetch('NAME')
|
|
53
|
+
field_fetch('NAME')
|
|
32
54
|
end
|
|
33
55
|
|
|
34
|
-
# DEFINITION
|
|
56
|
+
# Definition of the reaction, described in the DEFINITION line.
|
|
57
|
+
# ---
|
|
58
|
+
# *Returns*:: String
|
|
35
59
|
def definition
|
|
36
60
|
field_fetch('DEFINITION')
|
|
37
61
|
end
|
|
38
62
|
|
|
39
|
-
# EQUATION
|
|
63
|
+
# Chemical equation, described in the EQUATION line.
|
|
64
|
+
# ---
|
|
65
|
+
# *Returns*:: String
|
|
40
66
|
def equation
|
|
41
67
|
field_fetch('EQUATION')
|
|
42
68
|
end
|
|
43
69
|
|
|
44
|
-
# RPAIR
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
70
|
+
# KEGG RPAIR (ReactantPair) information, described in the RPAIR lines.
|
|
71
|
+
# ---
|
|
72
|
+
# *Returns*:: Array containing String
|
|
73
|
+
def rpairs_as_strings
|
|
74
|
+
lines_fetch('RPAIR')
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# KEGG RPAIR (ReactantPair) information, described in the RPAIR lines.
|
|
78
|
+
# Returns a hash of RPair IDs and [ name, type ] informations, for example,
|
|
79
|
+
# { "RP12733" => [ "C00022_C00900", "trans" ],
|
|
80
|
+
# "RP05698" => [ "C00011_C00022", "leave" ],
|
|
81
|
+
# "RP00440" => [ "C00022_C00900", "main" ]
|
|
82
|
+
# }
|
|
83
|
+
# ---
|
|
84
|
+
# *Returns*:: Hash
|
|
85
|
+
def rpairs_as_hash
|
|
86
|
+
unless defined? @rpairs_as_hash
|
|
87
|
+
rps = {}
|
|
88
|
+
rpairs_as_strings.each do |line|
|
|
89
|
+
namespace, entry_id, name, rptype = line.split(/\s+/)
|
|
90
|
+
rps[entry_id] = [ name, rptype ]
|
|
91
|
+
end
|
|
92
|
+
@rpairs_as_hash = rps
|
|
48
93
|
end
|
|
49
|
-
@
|
|
94
|
+
@rpairs_as_hash
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
alias rpairs rpairs_as_hash
|
|
98
|
+
|
|
99
|
+
# Returns the content of the RPAIR entry as tokens
|
|
100
|
+
# (RPair signature, RPair ID, , RPair type).
|
|
101
|
+
# ---
|
|
102
|
+
# *Returns*:: Array containing String
|
|
103
|
+
def rpairs_as_tokens
|
|
104
|
+
fetch('RPAIR').split(/\s+/)
|
|
50
105
|
end
|
|
51
106
|
|
|
52
|
-
# PATHWAY
|
|
53
|
-
|
|
54
|
-
|
|
107
|
+
# Pathway information, described in the PATHWAY lines.
|
|
108
|
+
# ---
|
|
109
|
+
# *Returns*:: Array containing String
|
|
110
|
+
def pathways_as_strings
|
|
111
|
+
lines_fetch('PATHWAY')
|
|
55
112
|
end
|
|
56
113
|
|
|
57
|
-
# ENZYME
|
|
114
|
+
# Enzymes described in the ENZYME line.
|
|
115
|
+
# ---
|
|
116
|
+
# *Returns*:: Array containing String
|
|
58
117
|
def enzymes
|
|
59
118
|
unless @data['ENZYME']
|
|
60
119
|
@data['ENZYME'] = fetch('ENZYME').scan(/\S+/)
|
|
@@ -62,21 +121,15 @@ class REACTION < KEGGDB
|
|
|
62
121
|
@data['ENZYME']
|
|
63
122
|
end
|
|
64
123
|
|
|
124
|
+
# Orthologs described in the ORTHOLOGY lines.
|
|
125
|
+
# ---
|
|
126
|
+
# *Returns*:: Array containing String
|
|
127
|
+
def orthologs_as_strings
|
|
128
|
+
lines_fetch('ORTHOLOGY')
|
|
129
|
+
end
|
|
130
|
+
|
|
65
131
|
end # REACTION
|
|
66
132
|
|
|
67
133
|
end # KEGG
|
|
68
134
|
end # Bio
|
|
69
135
|
|
|
70
|
-
|
|
71
|
-
if __FILE__ == $0
|
|
72
|
-
entry = ARGF.read
|
|
73
|
-
rn = Bio::KEGG::REACTION.new(entry)
|
|
74
|
-
p rn.entry_id
|
|
75
|
-
p rn.name
|
|
76
|
-
p rn.definition
|
|
77
|
-
p rn.equation
|
|
78
|
-
p rn.rpairs
|
|
79
|
-
p rn.pathways
|
|
80
|
-
p rn.enzymes
|
|
81
|
-
end
|
|
82
|
-
|
data/lib/bio/db/kegg/taxonomy.rb
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
# Copyright:: Copyright (C) 2007 Toshiaki Katayama <k@bioruby.org>
|
|
5
5
|
# License:: The Ruby License
|
|
6
6
|
#
|
|
7
|
-
# $Id
|
|
7
|
+
# $Id:$
|
|
8
8
|
#
|
|
9
9
|
|
|
10
10
|
module Bio
|
|
@@ -278,54 +278,3 @@ end # Taxonomy
|
|
|
278
278
|
end # KEGG
|
|
279
279
|
end # Bio
|
|
280
280
|
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
if __FILE__ == $0
|
|
284
|
-
|
|
285
|
-
# Usage:
|
|
286
|
-
# % wget ftp://ftp.genome.jp/pub/kegg/genes/taxonomy
|
|
287
|
-
# % ruby taxonomy.rb taxonomy | less -S
|
|
288
|
-
|
|
289
|
-
taxonomy = ARGV.shift
|
|
290
|
-
org_list = ARGV.shift || nil
|
|
291
|
-
|
|
292
|
-
if org_list
|
|
293
|
-
orgs = File.readlines(org_list).map{|x| x.strip}
|
|
294
|
-
else
|
|
295
|
-
orgs = nil
|
|
296
|
-
end
|
|
297
|
-
|
|
298
|
-
tree = Bio::KEGG::Taxonomy.new(taxonomy, orgs)
|
|
299
|
-
|
|
300
|
-
puts ">>> tree - original"
|
|
301
|
-
puts tree
|
|
302
|
-
|
|
303
|
-
puts ">>> tree - after compact"
|
|
304
|
-
tree.compact
|
|
305
|
-
puts tree
|
|
306
|
-
|
|
307
|
-
puts ">>> tree - after reduce"
|
|
308
|
-
tree.reduce
|
|
309
|
-
puts tree
|
|
310
|
-
|
|
311
|
-
puts ">>> path - sorted"
|
|
312
|
-
tree.path.sort.each do |path|
|
|
313
|
-
puts path.join("/")
|
|
314
|
-
end
|
|
315
|
-
|
|
316
|
-
puts ">>> group : orgs"
|
|
317
|
-
tree.dfs(tree.root) do |parent, children|
|
|
318
|
-
if orgs = tree.organisms(parent)
|
|
319
|
-
puts "#{parent.ljust(30)} (#{orgs.size})\t#{orgs.join(', ')}"
|
|
320
|
-
end
|
|
321
|
-
end
|
|
322
|
-
|
|
323
|
-
puts ">>> group : subgroups"
|
|
324
|
-
tree.dfs_with_level(tree.root) do |parent, children, level|
|
|
325
|
-
subgroups = children.keys.sort
|
|
326
|
-
indent = " " * level
|
|
327
|
-
label = "#{indent} #{level} #{parent}"
|
|
328
|
-
puts "#{label.ljust(35)}\t#{subgroups.join(', ')}"
|
|
329
|
-
end
|
|
330
|
-
|
|
331
|
-
end
|