bio 1.3.1 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +2105 -3728
- data/KNOWN_ISSUES.rdoc +35 -3
- data/README.rdoc +8 -2
- data/RELEASE_NOTES.rdoc +166 -0
- data/bin/bioruby +4 -1
- data/bioruby.gemspec +146 -1
- data/bioruby.gemspec.erb +3 -1
- data/doc/ChangeLog-before-1.3.1 +3961 -0
- data/doc/Tutorial.rd +154 -22
- data/doc/Tutorial.rd.html +125 -68
- data/lib/bio.rb +21 -6
- data/lib/bio/appl/bl2seq/report.rb +11 -202
- data/lib/bio/appl/blast/format0.rb +0 -193
- data/lib/bio/appl/blast/report.rb +2 -147
- data/lib/bio/appl/blast/wublast.rb +0 -208
- data/lib/bio/appl/fasta.rb +4 -19
- data/lib/bio/appl/fasta/format10.rb +0 -14
- data/lib/bio/appl/genscan/report.rb +0 -176
- data/lib/bio/appl/hmmer.rb +1 -15
- data/lib/bio/appl/hmmer/report.rb +0 -100
- data/lib/bio/appl/meme/mast.rb +156 -0
- data/lib/bio/appl/meme/mast/report.rb +91 -0
- data/lib/bio/appl/meme/motif.rb +48 -0
- data/lib/bio/appl/psort.rb +0 -111
- data/lib/bio/appl/psort/report.rb +1 -45
- data/lib/bio/appl/pts1.rb +2 -4
- data/lib/bio/appl/sosui/report.rb +5 -54
- data/lib/bio/appl/targetp/report.rb +1 -104
- data/lib/bio/appl/tmhmm/report.rb +0 -36
- data/lib/bio/command.rb +94 -10
- data/lib/bio/data/aa.rb +1 -77
- data/lib/bio/data/codontable.rb +1 -95
- data/lib/bio/data/na.rb +1 -26
- data/lib/bio/db/aaindex.rb +1 -38
- data/lib/bio/db/fasta.rb +1 -134
- data/lib/bio/db/fasta/format_qual.rb +204 -0
- data/lib/bio/db/fasta/qual.rb +102 -0
- data/lib/bio/db/fastq.rb +645 -0
- data/lib/bio/db/fastq/fastq_to_biosequence.rb +40 -0
- data/lib/bio/db/fastq/format_fastq.rb +175 -0
- data/lib/bio/db/genbank/genbank.rb +1 -86
- data/lib/bio/db/gff.rb +0 -17
- data/lib/bio/db/go.rb +4 -72
- data/lib/bio/db/kegg/common.rb +112 -0
- data/lib/bio/db/kegg/compound.rb +29 -20
- data/lib/bio/db/kegg/drug.rb +74 -34
- data/lib/bio/db/kegg/enzyme.rb +26 -5
- data/lib/bio/db/kegg/genes.rb +128 -15
- data/lib/bio/db/kegg/genome.rb +3 -41
- data/lib/bio/db/kegg/glycan.rb +19 -24
- data/lib/bio/db/kegg/orthology.rb +16 -56
- data/lib/bio/db/kegg/reaction.rb +81 -28
- data/lib/bio/db/kegg/taxonomy.rb +1 -52
- data/lib/bio/db/litdb.rb +1 -16
- data/lib/bio/db/phyloxml/phyloxml.xsd +582 -0
- data/lib/bio/db/phyloxml/phyloxml_elements.rb +1174 -0
- data/lib/bio/db/phyloxml/phyloxml_parser.rb +954 -0
- data/lib/bio/db/phyloxml/phyloxml_writer.rb +228 -0
- data/lib/bio/db/prosite.rb +2 -95
- data/lib/bio/db/rebase.rb +5 -6
- data/lib/bio/db/sanger_chromatogram/abif.rb +120 -0
- data/lib/bio/db/sanger_chromatogram/chromatogram.rb +133 -0
- data/lib/bio/db/sanger_chromatogram/chromatogram_to_biosequence.rb +32 -0
- data/lib/bio/db/sanger_chromatogram/scf.rb +210 -0
- data/lib/bio/io/das.rb +0 -44
- data/lib/bio/io/ddbjxml.rb +1 -181
- data/lib/bio/io/flatfile.rb +1 -7
- data/lib/bio/io/flatfile/autodetection.rb +6 -0
- data/lib/bio/io/keggapi.rb +0 -442
- data/lib/bio/io/ncbirest.rb +130 -132
- data/lib/bio/io/ncbisoap.rb +2 -1
- data/lib/bio/io/pubmed.rb +0 -88
- data/lib/bio/location.rb +0 -73
- data/lib/bio/pathway.rb +0 -171
- data/lib/bio/sequence.rb +18 -1
- data/lib/bio/sequence/adapter.rb +3 -0
- data/lib/bio/sequence/format.rb +16 -0
- data/lib/bio/sequence/quality_score.rb +205 -0
- data/lib/bio/tree.rb +70 -5
- data/lib/bio/util/restriction_enzyme/single_strand.rb +3 -2
- data/lib/bio/util/sirna.rb +1 -23
- data/lib/bio/version.rb +1 -1
- data/sample/demo_aaindex.rb +67 -0
- data/sample/demo_aminoacid.rb +101 -0
- data/sample/demo_bl2seq_report.rb +220 -0
- data/sample/demo_blast_report.rb +285 -0
- data/sample/demo_codontable.rb +119 -0
- data/sample/demo_das.rb +105 -0
- data/sample/demo_ddbjxml.rb +212 -0
- data/sample/demo_fasta_remote.rb +51 -0
- data/sample/demo_fastaformat.rb +105 -0
- data/sample/demo_genbank.rb +132 -0
- data/sample/demo_genscan_report.rb +202 -0
- data/sample/demo_gff1.rb +49 -0
- data/sample/demo_go.rb +98 -0
- data/sample/demo_hmmer_report.rb +149 -0
- data/sample/demo_kegg_compound.rb +57 -0
- data/sample/demo_kegg_drug.rb +65 -0
- data/sample/demo_kegg_genome.rb +74 -0
- data/sample/demo_kegg_glycan.rb +72 -0
- data/sample/demo_kegg_orthology.rb +62 -0
- data/sample/demo_kegg_reaction.rb +66 -0
- data/sample/demo_kegg_taxonomy.rb +92 -0
- data/sample/demo_keggapi.rb +502 -0
- data/sample/demo_litdb.rb +42 -0
- data/sample/demo_locations.rb +99 -0
- data/sample/demo_ncbi_rest.rb +130 -0
- data/sample/demo_nucleicacid.rb +49 -0
- data/sample/demo_pathway.rb +196 -0
- data/sample/demo_prosite.rb +120 -0
- data/sample/demo_psort.rb +138 -0
- data/sample/demo_psort_report.rb +70 -0
- data/sample/demo_pubmed.rb +118 -0
- data/sample/demo_sirna.rb +63 -0
- data/sample/demo_sosui_report.rb +89 -0
- data/sample/demo_targetp_report.rb +135 -0
- data/sample/demo_tmhmm_report.rb +68 -0
- data/sample/pmfetch.rb +13 -4
- data/sample/pmsearch.rb +15 -4
- data/sample/test_phyloxml_big.rb +205 -0
- data/test/bioruby_test_helper.rb +61 -0
- data/test/data/KEGG/1.1.1.1.enzyme +935 -0
- data/test/data/KEGG/C00025.compound +102 -0
- data/test/data/KEGG/D00063.drug +104 -0
- data/test/data/KEGG/G00024.glycan +47 -0
- data/test/data/KEGG/G01366.glycan +18 -0
- data/test/data/KEGG/K02338.orthology +902 -0
- data/test/data/KEGG/R00006.reaction +14 -0
- data/test/data/fastq/README.txt +109 -0
- data/test/data/fastq/error_diff_ids.fastq +20 -0
- data/test/data/fastq/error_double_qual.fastq +22 -0
- data/test/data/fastq/error_double_seq.fastq +22 -0
- data/test/data/fastq/error_long_qual.fastq +20 -0
- data/test/data/fastq/error_no_qual.fastq +20 -0
- data/test/data/fastq/error_qual_del.fastq +20 -0
- data/test/data/fastq/error_qual_escape.fastq +20 -0
- data/test/data/fastq/error_qual_null.fastq +0 -0
- data/test/data/fastq/error_qual_space.fastq +21 -0
- data/test/data/fastq/error_qual_tab.fastq +21 -0
- data/test/data/fastq/error_qual_unit_sep.fastq +20 -0
- data/test/data/fastq/error_qual_vtab.fastq +20 -0
- data/test/data/fastq/error_short_qual.fastq +20 -0
- data/test/data/fastq/error_spaces.fastq +20 -0
- data/test/data/fastq/error_tabs.fastq +21 -0
- data/test/data/fastq/error_trunc_at_plus.fastq +19 -0
- data/test/data/fastq/error_trunc_at_qual.fastq +19 -0
- data/test/data/fastq/error_trunc_at_seq.fastq +18 -0
- data/test/data/fastq/error_trunc_in_plus.fastq +19 -0
- data/test/data/fastq/error_trunc_in_qual.fastq +20 -0
- data/test/data/fastq/error_trunc_in_seq.fastq +18 -0
- data/test/data/fastq/error_trunc_in_title.fastq +17 -0
- data/test/data/fastq/illumina_full_range_as_illumina.fastq +8 -0
- data/test/data/fastq/illumina_full_range_as_sanger.fastq +8 -0
- data/test/data/fastq/illumina_full_range_as_solexa.fastq +8 -0
- data/test/data/fastq/illumina_full_range_original_illumina.fastq +8 -0
- data/test/data/fastq/longreads_as_illumina.fastq +40 -0
- data/test/data/fastq/longreads_as_sanger.fastq +40 -0
- data/test/data/fastq/longreads_as_solexa.fastq +40 -0
- data/test/data/fastq/longreads_original_sanger.fastq +120 -0
- data/test/data/fastq/misc_dna_as_illumina.fastq +16 -0
- data/test/data/fastq/misc_dna_as_sanger.fastq +16 -0
- data/test/data/fastq/misc_dna_as_solexa.fastq +16 -0
- data/test/data/fastq/misc_dna_original_sanger.fastq +16 -0
- data/test/data/fastq/misc_rna_as_illumina.fastq +16 -0
- data/test/data/fastq/misc_rna_as_sanger.fastq +16 -0
- data/test/data/fastq/misc_rna_as_solexa.fastq +16 -0
- data/test/data/fastq/misc_rna_original_sanger.fastq +16 -0
- data/test/data/fastq/sanger_full_range_as_illumina.fastq +8 -0
- data/test/data/fastq/sanger_full_range_as_sanger.fastq +8 -0
- data/test/data/fastq/sanger_full_range_as_solexa.fastq +8 -0
- data/test/data/fastq/sanger_full_range_original_sanger.fastq +8 -0
- data/test/data/fastq/solexa_full_range_as_illumina.fastq +8 -0
- data/test/data/fastq/solexa_full_range_as_sanger.fastq +8 -0
- data/test/data/fastq/solexa_full_range_as_solexa.fastq +8 -0
- data/test/data/fastq/solexa_full_range_original_solexa.fastq +8 -0
- data/test/data/fastq/wrapping_as_illumina.fastq +12 -0
- data/test/data/fastq/wrapping_as_sanger.fastq +12 -0
- data/test/data/fastq/wrapping_as_solexa.fastq +12 -0
- data/test/data/fastq/wrapping_original_sanger.fastq +24 -0
- data/test/data/meme/db +0 -0
- data/test/data/meme/mast +0 -0
- data/test/data/meme/mast.out +13 -0
- data/test/data/meme/meme.out +3 -0
- data/test/data/phyloxml/apaf.xml +666 -0
- data/test/data/phyloxml/bcl_2.xml +2097 -0
- data/test/data/phyloxml/made_up.xml +144 -0
- data/test/data/phyloxml/ncbi_taxonomy_mollusca_short.xml +65 -0
- data/test/data/phyloxml/phyloxml_examples.xml +415 -0
- data/test/data/sanger_chromatogram/test_chromatogram_abif.ab1 +0 -0
- data/test/data/sanger_chromatogram/test_chromatogram_scf_v2.scf +0 -0
- data/test/data/sanger_chromatogram/test_chromatogram_scf_v3.scf +0 -0
- data/test/functional/bio/appl/test_pts1.rb +7 -5
- data/test/functional/bio/io/test_ensembl.rb +4 -3
- data/test/functional/bio/io/test_pubmed.rb +9 -3
- data/test/functional/bio/io/test_soapwsdl.rb +5 -4
- data/test/functional/bio/io/test_togows.rb +5 -4
- data/test/functional/bio/sequence/test_output_embl.rb +6 -4
- data/test/functional/bio/test_command.rb +54 -5
- data/test/runner.rb +5 -3
- data/test/unit/bio/appl/bl2seq/test_report.rb +5 -4
- data/test/unit/bio/appl/blast/test_ncbioptions.rb +4 -2
- data/test/unit/bio/appl/blast/test_report.rb +5 -4
- data/test/unit/bio/appl/blast/test_rpsblast.rb +5 -4
- data/test/unit/bio/appl/gcg/test_msf.rb +5 -5
- data/test/unit/bio/appl/genscan/test_report.rb +8 -9
- data/test/unit/bio/appl/hmmer/test_report.rb +5 -4
- data/test/unit/bio/appl/iprscan/test_report.rb +6 -5
- data/test/unit/bio/appl/mafft/test_report.rb +6 -5
- data/test/unit/bio/appl/meme/mast/test_report.rb +46 -0
- data/test/unit/bio/appl/meme/test_mast.rb +103 -0
- data/test/unit/bio/appl/meme/test_motif.rb +38 -0
- data/test/unit/bio/appl/paml/codeml/test_rates.rb +5 -4
- data/test/unit/bio/appl/paml/codeml/test_report.rb +5 -4
- data/test/unit/bio/appl/paml/test_codeml.rb +5 -4
- data/test/unit/bio/appl/sim4/test_report.rb +5 -4
- data/test/unit/bio/appl/sosui/test_report.rb +6 -5
- data/test/unit/bio/appl/targetp/test_report.rb +5 -3
- data/test/unit/bio/appl/test_blast.rb +5 -4
- data/test/unit/bio/appl/test_fasta.rb +4 -2
- data/test/unit/bio/appl/test_pts1.rb +4 -2
- data/test/unit/bio/appl/tmhmm/test_report.rb +6 -5
- data/test/unit/bio/data/test_aa.rb +5 -3
- data/test/unit/bio/data/test_codontable.rb +5 -4
- data/test/unit/bio/data/test_na.rb +5 -3
- data/test/unit/bio/db/biosql/tc_biosql.rb +5 -1
- data/test/unit/bio/db/embl/test_common.rb +4 -2
- data/test/unit/bio/db/embl/test_embl.rb +6 -6
- data/test/unit/bio/db/embl/test_embl_rel89.rb +6 -6
- data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +7 -8
- data/test/unit/bio/db/embl/test_sptr.rb +6 -8
- data/test/unit/bio/db/embl/test_uniprot.rb +6 -5
- data/test/unit/bio/db/fasta/test_format_qual.rb +346 -0
- data/test/unit/bio/db/kegg/test_compound.rb +146 -0
- data/test/unit/bio/db/kegg/test_drug.rb +194 -0
- data/test/unit/bio/db/kegg/test_enzyme.rb +241 -0
- data/test/unit/bio/db/kegg/test_genes.rb +32 -4
- data/test/unit/bio/db/kegg/test_glycan.rb +260 -0
- data/test/unit/bio/db/kegg/test_orthology.rb +50 -0
- data/test/unit/bio/db/kegg/test_reaction.rb +96 -0
- data/test/unit/bio/db/pdb/test_pdb.rb +4 -2
- data/test/unit/bio/db/sanger_chromatogram/test_abif.rb +76 -0
- data/test/unit/bio/db/sanger_chromatogram/test_scf.rb +98 -0
- data/test/unit/bio/db/test_aaindex.rb +6 -6
- data/test/unit/bio/db/test_fasta.rb +5 -46
- data/test/unit/bio/db/test_fastq.rb +829 -0
- data/test/unit/bio/db/test_gff.rb +4 -2
- data/test/unit/bio/db/test_lasergene.rb +7 -5
- data/test/unit/bio/db/test_medline.rb +4 -2
- data/test/unit/bio/db/test_newick.rb +6 -6
- data/test/unit/bio/db/test_nexus.rb +4 -2
- data/test/unit/bio/db/test_phyloxml.rb +769 -0
- data/test/unit/bio/db/test_phyloxml_writer.rb +328 -0
- data/test/unit/bio/db/test_prosite.rb +6 -5
- data/test/unit/bio/db/test_qual.rb +63 -0
- data/test/unit/bio/db/test_rebase.rb +5 -3
- data/test/unit/bio/db/test_soft.rb +7 -6
- data/test/unit/bio/io/flatfile/test_autodetection.rb +6 -7
- data/test/unit/bio/io/flatfile/test_buffer.rb +6 -5
- data/test/unit/bio/io/flatfile/test_splitter.rb +4 -4
- data/test/unit/bio/io/test_ddbjxml.rb +4 -3
- data/test/unit/bio/io/test_ensembl.rb +5 -3
- data/test/unit/bio/io/test_fastacmd.rb +4 -3
- data/test/unit/bio/io/test_flatfile.rb +6 -5
- data/test/unit/bio/io/test_soapwsdl.rb +4 -3
- data/test/unit/bio/io/test_togows.rb +4 -2
- data/test/unit/bio/sequence/test_aa.rb +5 -3
- data/test/unit/bio/sequence/test_common.rb +4 -2
- data/test/unit/bio/sequence/test_compat.rb +4 -2
- data/test/unit/bio/sequence/test_dblink.rb +5 -3
- data/test/unit/bio/sequence/test_na.rb +4 -2
- data/test/unit/bio/sequence/test_quality_score.rb +330 -0
- data/test/unit/bio/shell/plugin/test_seq.rb +5 -3
- data/test/unit/bio/test_alignment.rb +5 -3
- data/test/unit/bio/test_command.rb +4 -3
- data/test/unit/bio/test_db.rb +5 -3
- data/test/unit/bio/test_feature.rb +4 -2
- data/test/unit/bio/test_location.rb +4 -2
- data/test/unit/bio/test_map.rb +5 -3
- data/test/unit/bio/test_pathway.rb +4 -2
- data/test/unit/bio/test_reference.rb +4 -2
- data/test/unit/bio/test_sequence.rb +5 -3
- data/test/unit/bio/test_shell.rb +5 -3
- data/test/unit/bio/test_tree.rb +6 -6
- data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +17 -13
- data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +17 -13
- data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +4 -2
- data/test/unit/bio/util/test_color_scheme.rb +5 -3
- data/test/unit/bio/util/test_contingency_table.rb +5 -3
- data/test/unit/bio/util/test_restriction_enzyme.rb +4 -2
- data/test/unit/bio/util/test_sirna.rb +6 -4
- metadata +147 -2
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = bio/db/fastq/fastq_to_biosequence.rb - Bio::Fastq to Bio::Sequence adapter module
|
|
3
|
+
#
|
|
4
|
+
# Copyright:: Copyright (C) 2009
|
|
5
|
+
# Naohisa Goto <ng@bioruby.org>
|
|
6
|
+
# License:: The Ruby License
|
|
7
|
+
#
|
|
8
|
+
|
|
9
|
+
require 'bio/sequence'
|
|
10
|
+
require 'bio/sequence/adapter'
|
|
11
|
+
|
|
12
|
+
# Internal use only. Normal users should not use this module.
|
|
13
|
+
#
|
|
14
|
+
# Bio::Fastq to Bio::Sequence adapter module.
|
|
15
|
+
# It is internally used in Bio::Fastq#to_biosequence.
|
|
16
|
+
#
|
|
17
|
+
module Bio::Sequence::Adapter::Fastq
|
|
18
|
+
|
|
19
|
+
extend Bio::Sequence::Adapter
|
|
20
|
+
|
|
21
|
+
private
|
|
22
|
+
|
|
23
|
+
def_biosequence_adapter :seq
|
|
24
|
+
|
|
25
|
+
def_biosequence_adapter :entry_id
|
|
26
|
+
|
|
27
|
+
# primary accession
|
|
28
|
+
def_biosequence_adapter :primary_accession do |orig|
|
|
29
|
+
orig.entry_id
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def_biosequence_adapter :definition
|
|
33
|
+
|
|
34
|
+
def_biosequence_adapter :quality_scores
|
|
35
|
+
|
|
36
|
+
def_biosequence_adapter :quality_score_type
|
|
37
|
+
|
|
38
|
+
def_biosequence_adapter :error_probabilities
|
|
39
|
+
|
|
40
|
+
end #module Bio::Sequence::Adapter::Fastq
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = bio/db/fasta/format_fastq.rb - FASTQ format generater
|
|
3
|
+
#
|
|
4
|
+
# Copyright:: Copyright (C) 2009
|
|
5
|
+
# Naohisa Goto <ng@bioruby.org>
|
|
6
|
+
# License:: The Ruby License
|
|
7
|
+
#
|
|
8
|
+
|
|
9
|
+
require 'bio/db/fastq'
|
|
10
|
+
require 'bio/sequence/format'
|
|
11
|
+
|
|
12
|
+
module Bio::Sequence::Format::Formatter
|
|
13
|
+
|
|
14
|
+
# INTERNAL USE ONLY, YOU SHOULD NOT USE THIS CLASS.
|
|
15
|
+
#
|
|
16
|
+
# FASTQ format output class for Bio::Sequence.
|
|
17
|
+
#
|
|
18
|
+
# The default FASTQ format is fastq-sanger.
|
|
19
|
+
class Fastq < Bio::Sequence::Format::FormatterBase
|
|
20
|
+
|
|
21
|
+
# INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD.
|
|
22
|
+
#
|
|
23
|
+
# Creates a new Fasta format generater object from the sequence.
|
|
24
|
+
#
|
|
25
|
+
# ---
|
|
26
|
+
# *Arguments*:
|
|
27
|
+
# * _sequence_: Bio::Sequence object
|
|
28
|
+
# * (optional) :repeat_title => (true or false) if true, repeating title in the "+" line; if not true, "+" only (default false)
|
|
29
|
+
# * (optional) :width => _width_: (Fixnum) width to wrap sequence and quality lines; nil to prevent wrapping (default 70)
|
|
30
|
+
# * (optional) :title => _title_: (String) completely replaces title line with the _title_ (default nil)
|
|
31
|
+
# * (optional) :default_score => _score_: (Integer) default score for bases that have no valid quality scores or error probabilities; false or nil means the lowest score, true means the highest score (default nil)
|
|
32
|
+
def initialize; end if false # dummy for RDoc
|
|
33
|
+
|
|
34
|
+
# INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD.
|
|
35
|
+
#
|
|
36
|
+
# Output the FASTQ format string of the sequence.
|
|
37
|
+
#
|
|
38
|
+
# Currently, this method is used in Bio::Sequence#output like so,
|
|
39
|
+
#
|
|
40
|
+
# s = Bio::Sequence.new('atgc')
|
|
41
|
+
# puts s.output(:fastq_sanger)
|
|
42
|
+
# ---
|
|
43
|
+
# *Returns*:: String object
|
|
44
|
+
def output
|
|
45
|
+
title = @options[:title]
|
|
46
|
+
width = @options.has_key?(:width) ? @options[:width] : 70
|
|
47
|
+
seq = @sequence.seq.to_s
|
|
48
|
+
entry_id = @sequence.entry_id ||
|
|
49
|
+
"#{@sequence.primary_accession}.#{@sequence.sequence_version}"
|
|
50
|
+
definition = @sequence.definition
|
|
51
|
+
unless title then
|
|
52
|
+
title = definition.to_s
|
|
53
|
+
unless title[0, entry_id.length] == entry_id and
|
|
54
|
+
/\s/ =~ title[entry_id.length, 1].to_s then
|
|
55
|
+
title = "#{entry_id} #{title}"
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
title2 = @options[:repeat_title] ? title : ''
|
|
59
|
+
qstr = fastq_quality_string(seq, @options[:default_score])
|
|
60
|
+
|
|
61
|
+
"@#{title}\n" +
|
|
62
|
+
if width then
|
|
63
|
+
seq.gsub(Regexp.new(".{1,#{width}}"), "\\0\n")
|
|
64
|
+
else
|
|
65
|
+
seq + "\n"
|
|
66
|
+
end +
|
|
67
|
+
"+#{title2}\n" +
|
|
68
|
+
if width then
|
|
69
|
+
qstr.gsub(Regexp.new(".{1,#{width}}"), "\\0\n")
|
|
70
|
+
else
|
|
71
|
+
qstr + "\n"
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
private
|
|
76
|
+
def fastq_format_data
|
|
77
|
+
Bio::Fastq::FormatData::FASTQ_SANGER.instance
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def fastq_quality_string(seq, default_score)
|
|
81
|
+
sc = fastq_quality_scores(seq)
|
|
82
|
+
if sc.size < seq.length then
|
|
83
|
+
if default_score == true then
|
|
84
|
+
# when true, the highest score
|
|
85
|
+
default_score = fastq_format_data.score_range.end
|
|
86
|
+
else
|
|
87
|
+
# when false or nil, the lowest score
|
|
88
|
+
default_score ||= fastq_format_data.score_range.begin
|
|
89
|
+
end
|
|
90
|
+
sc = sc + ([ default_score ] * (seq.length - sc.size))
|
|
91
|
+
end
|
|
92
|
+
fastq_format_data.scores2str(sc)
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def fastq_quality_scores(seq)
|
|
96
|
+
return [] if seq.length <= 0
|
|
97
|
+
fmt = fastq_format_data
|
|
98
|
+
# checks quality_scores
|
|
99
|
+
qsc = @sequence.quality_scores
|
|
100
|
+
qsc_type = @sequence.quality_score_type
|
|
101
|
+
if qsc and qsc_type and
|
|
102
|
+
qsc_type == fmt.quality_score_type and
|
|
103
|
+
qsc.size >= seq.length then
|
|
104
|
+
return qsc
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# checks error_probabilities
|
|
108
|
+
ep = @sequence.error_probabilities
|
|
109
|
+
if ep and ep.size >= seq.length then
|
|
110
|
+
return fmt.p2q(ep[0, seq.length])
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# If quality score type of the sequence is nil, regarded as :phred.
|
|
114
|
+
qsc_type ||= :phred
|
|
115
|
+
|
|
116
|
+
# checks if scores can be converted
|
|
117
|
+
if qsc and qsc.size >= seq.length then
|
|
118
|
+
case [ qsc_type, fmt.quality_score_type ]
|
|
119
|
+
when [ :phred, :solexa ]
|
|
120
|
+
return fmt.convert_scores_from_phred_to_solexa(qsc[0, seq.length])
|
|
121
|
+
when [ :solexa, :phred ]
|
|
122
|
+
return fmt.convert_scores_from_solexa_to_phred(qsc[0, seq.length])
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# checks quality scores type
|
|
127
|
+
case qsc_type
|
|
128
|
+
when :phred, :solexa
|
|
129
|
+
#does nothing
|
|
130
|
+
else
|
|
131
|
+
qsc_type = nil
|
|
132
|
+
qsc = nil
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# collects piece of information
|
|
136
|
+
qsc_cov = qsc ? qsc.size.quo(seq.length) : 0
|
|
137
|
+
ep_cov = ep ? ep.size.quo(seq.length) : 0
|
|
138
|
+
if qsc_cov > ep_cov then
|
|
139
|
+
case [ qsc_type, fmt.quality_score_type ]
|
|
140
|
+
when [ :phred, :phred ], [ :solexa, :solexa ]
|
|
141
|
+
return qsc
|
|
142
|
+
when [ :phred, :solexa ]
|
|
143
|
+
return fmt.convert_scores_from_phred_to_solexa(qsc)
|
|
144
|
+
when [ :solexa, :phred ]
|
|
145
|
+
return fmt.convert_scores_from_solexa_to_phred(qsc)
|
|
146
|
+
end
|
|
147
|
+
elsif ep_cov > qsc_cov then
|
|
148
|
+
return fmt.p2q(ep)
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
# if no information, returns empty array
|
|
152
|
+
return []
|
|
153
|
+
end
|
|
154
|
+
end #class Fastq
|
|
155
|
+
|
|
156
|
+
# class Fastq_sanger is the same as the Fastq class.
|
|
157
|
+
Fastq_sanger = Fastq
|
|
158
|
+
|
|
159
|
+
class Fastq_solexa < Fastq
|
|
160
|
+
private
|
|
161
|
+
def fastq_format_data
|
|
162
|
+
Bio::Fastq::FormatData::FASTQ_SOLEXA.instance
|
|
163
|
+
end
|
|
164
|
+
end #class Fastq_solexa
|
|
165
|
+
|
|
166
|
+
class Fastq_illumina < Fastq
|
|
167
|
+
private
|
|
168
|
+
def fastq_format_data
|
|
169
|
+
Bio::Fastq::FormatData::FASTQ_ILLUMINA.instance
|
|
170
|
+
end
|
|
171
|
+
end #class Fastq_illumina
|
|
172
|
+
|
|
173
|
+
end #module Bio::Sequence::Format::Formatter
|
|
174
|
+
|
|
175
|
+
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
# Copyright:: Copyright (C) 2000-2005 Toshiaki Katayama <k@bioruby.org>
|
|
5
5
|
# License:: The Ruby License
|
|
6
6
|
#
|
|
7
|
-
# $Id
|
|
7
|
+
# $Id:$
|
|
8
8
|
#
|
|
9
9
|
|
|
10
10
|
require 'date'
|
|
@@ -163,88 +163,3 @@ class GenBank < NCBIDB
|
|
|
163
163
|
end # GenBank
|
|
164
164
|
end # Bio
|
|
165
165
|
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
if __FILE__ == $0
|
|
169
|
-
|
|
170
|
-
begin
|
|
171
|
-
require 'pp'
|
|
172
|
-
alias p pp
|
|
173
|
-
rescue LoadError
|
|
174
|
-
end
|
|
175
|
-
|
|
176
|
-
puts "### GenBank"
|
|
177
|
-
if ARGV.size > 0
|
|
178
|
-
gb = Bio::GenBank.new(ARGF.read)
|
|
179
|
-
else
|
|
180
|
-
require 'bio/io/fetch'
|
|
181
|
-
gb = Bio::GenBank.new(Bio::Fetch.query('gb', 'LPATOVGNS'))
|
|
182
|
-
end
|
|
183
|
-
|
|
184
|
-
puts "## LOCUS"
|
|
185
|
-
puts "# GenBank.locus"
|
|
186
|
-
p gb.locus
|
|
187
|
-
puts "# GenBank.entry_id"
|
|
188
|
-
p gb.entry_id
|
|
189
|
-
puts "# GenBank.nalen"
|
|
190
|
-
p gb.nalen
|
|
191
|
-
puts "# GenBank.strand"
|
|
192
|
-
p gb.strand
|
|
193
|
-
puts "# GenBank.natype"
|
|
194
|
-
p gb.natype
|
|
195
|
-
puts "# GenBank.circular"
|
|
196
|
-
p gb.circular
|
|
197
|
-
puts "# GenBank.division"
|
|
198
|
-
p gb.division
|
|
199
|
-
puts "# GenBank.date"
|
|
200
|
-
p gb.date
|
|
201
|
-
|
|
202
|
-
puts "## DEFINITION"
|
|
203
|
-
p gb.definition
|
|
204
|
-
|
|
205
|
-
puts "## ACCESSION"
|
|
206
|
-
p gb.accession
|
|
207
|
-
|
|
208
|
-
puts "## VERSION"
|
|
209
|
-
p gb.versions
|
|
210
|
-
p gb.version
|
|
211
|
-
p gb.gi
|
|
212
|
-
|
|
213
|
-
puts "## NID"
|
|
214
|
-
p gb.nid
|
|
215
|
-
|
|
216
|
-
puts "## KEYWORDS"
|
|
217
|
-
p gb.keywords
|
|
218
|
-
|
|
219
|
-
puts "## SEGMENT"
|
|
220
|
-
p gb.segment
|
|
221
|
-
|
|
222
|
-
puts "## SOURCE"
|
|
223
|
-
p gb.source
|
|
224
|
-
p gb.common_name
|
|
225
|
-
p gb.vernacular_name
|
|
226
|
-
p gb.organism
|
|
227
|
-
p gb.taxonomy
|
|
228
|
-
|
|
229
|
-
puts "## REFERENCE"
|
|
230
|
-
p gb.references
|
|
231
|
-
|
|
232
|
-
puts "## COMMENT"
|
|
233
|
-
p gb.comment
|
|
234
|
-
|
|
235
|
-
puts "## FEATURES"
|
|
236
|
-
p gb.features
|
|
237
|
-
|
|
238
|
-
puts "## BASE COUNT"
|
|
239
|
-
p gb.basecount
|
|
240
|
-
p gb.basecount('a')
|
|
241
|
-
p gb.basecount('A')
|
|
242
|
-
|
|
243
|
-
puts "## ORIGIN"
|
|
244
|
-
p gb.origin
|
|
245
|
-
p gb.naseq
|
|
246
|
-
|
|
247
|
-
end
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
data/lib/bio/db/gff.rb
CHANGED
|
@@ -1827,20 +1827,3 @@ module Bio
|
|
|
1827
1827
|
|
|
1828
1828
|
end # module Bio
|
|
1829
1829
|
|
|
1830
|
-
|
|
1831
|
-
if __FILE__ == $0
|
|
1832
|
-
begin
|
|
1833
|
-
require 'pp'
|
|
1834
|
-
alias p pp
|
|
1835
|
-
rescue LoadError
|
|
1836
|
-
end
|
|
1837
|
-
|
|
1838
|
-
this_gff = "SEQ1\tEMBL\tatg\t103\t105\t.\t+\t0\n"
|
|
1839
|
-
this_gff << "SEQ1\tEMBL\texon\t103\t172\t.\t+\t0\n"
|
|
1840
|
-
this_gff << "SEQ1\tEMBL\tsplice5\t172\t173\t.\t+\t.\n"
|
|
1841
|
-
this_gff << "SEQ1\tnetgene\tsplice5\t172\t173\t0.94\t+\t.\n"
|
|
1842
|
-
this_gff << "SEQ1\tgenie\tsp5-20\t163\t182\t2.3\t+\t.\n"
|
|
1843
|
-
this_gff << "SEQ1\tgenie\tsp5-10\t168\t177\t2.1\t+\t.\n"
|
|
1844
|
-
this_gff << "SEQ1\tgrail\tATG\t17\t19\t2.1\t-\t0\n"
|
|
1845
|
-
p Bio::GFF.new(this_gff)
|
|
1846
|
-
end
|
data/lib/bio/db/go.rb
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
# Mitsuteru C. Nakao <n@bioruby.org>
|
|
6
6
|
# License:: The Ruby License
|
|
7
7
|
#
|
|
8
|
-
# $Id
|
|
8
|
+
# $Id:$
|
|
9
9
|
#
|
|
10
10
|
# == Gene Ontology
|
|
11
11
|
#
|
|
@@ -89,7 +89,7 @@ class GO
|
|
|
89
89
|
stack = []
|
|
90
90
|
adj_list = []
|
|
91
91
|
|
|
92
|
-
str.
|
|
92
|
+
str.each_line {|line|
|
|
93
93
|
if /^!(.+?):\s+(\S.+)$/ =~ line # Parsing head lines
|
|
94
94
|
tag = $1
|
|
95
95
|
value = $2
|
|
@@ -198,13 +198,13 @@ class GO
|
|
|
198
198
|
# Block is acceptable.
|
|
199
199
|
def self.parser(str)
|
|
200
200
|
if block_given?
|
|
201
|
-
str.
|
|
201
|
+
str.each_line(DELIMITER) {|line|
|
|
202
202
|
next if /^!/ =~ line
|
|
203
203
|
yield GeneAssociation.new(line)
|
|
204
204
|
}
|
|
205
205
|
else
|
|
206
206
|
galist = []
|
|
207
|
-
str.
|
|
207
|
+
str.each_line(DELIMITER) {|line|
|
|
208
208
|
next if /^!/ =~ line
|
|
209
209
|
galist << GeneAssociation.new(line)
|
|
210
210
|
}
|
|
@@ -411,71 +411,3 @@ end # module Bio
|
|
|
411
411
|
|
|
412
412
|
|
|
413
413
|
|
|
414
|
-
if __FILE__ == $0
|
|
415
|
-
|
|
416
|
-
require 'net/http'
|
|
417
|
-
|
|
418
|
-
def wget(url)
|
|
419
|
-
if /http:\/\/(.+?)\// =~ url
|
|
420
|
-
host = $1
|
|
421
|
-
path = url[(url.index(host) + host.size)..url.size]
|
|
422
|
-
else
|
|
423
|
-
raise ArgumentError, "Invalid URL\n#{url}"
|
|
424
|
-
end
|
|
425
|
-
|
|
426
|
-
result = Net::HTTP.new(host).get(path).body
|
|
427
|
-
end
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
go_c_url = 'http://www.geneontology.org/ontology/component.ontology'
|
|
432
|
-
ga_url = 'http://www.geneontology.org/gene-associations/gene_association.sgd.gz'
|
|
433
|
-
e2g_url = 'http://www.geneontology.org/external2go/spkw2go'
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
puts "\n #==> Bio::GO::Ontology"
|
|
438
|
-
p go_c_url
|
|
439
|
-
component_ontology = wget(go_c_url)
|
|
440
|
-
comp = Bio::GO::Ontology.new(component_ontology)
|
|
441
|
-
|
|
442
|
-
[['0003673', '0005632'],
|
|
443
|
-
['0003673', '0005619'],
|
|
444
|
-
['0003673', '0004649']].each {|pair|
|
|
445
|
-
puts
|
|
446
|
-
p pair
|
|
447
|
-
p [:pair, pair.map {|i| [comp.id2term[i], comp.goid2term(i)] }]
|
|
448
|
-
puts "\n #==> comp.bfs_shortest_path(pair[0], pair[1])"
|
|
449
|
-
p comp.bfs_shortest_path(pair[0], pair[1])
|
|
450
|
-
}
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
puts "\n #==> Bio::GO::External2go"
|
|
454
|
-
p e2g_url
|
|
455
|
-
spkw2go = Bio::GO::External2go.new(wget(e2g_url))
|
|
456
|
-
|
|
457
|
-
puts "\n #==> spkw2go.db"
|
|
458
|
-
p spkw2go.db
|
|
459
|
-
|
|
460
|
-
puts "\n #==> spkw2go[1]"
|
|
461
|
-
p spkw2go[1]
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
require 'zlib'
|
|
466
|
-
puts "\n #==> Bio::GO::GeenAssociation"
|
|
467
|
-
p ga_url
|
|
468
|
-
ga = Zlib::Inflate.inflate(wget(ga_url))
|
|
469
|
-
ga = Bio::GO::GeneAssociation.parser(ga)
|
|
470
|
-
|
|
471
|
-
puts "\n #==> ga.size"
|
|
472
|
-
p ga.size
|
|
473
|
-
|
|
474
|
-
puts "\n #==> ga[100]"
|
|
475
|
-
p ga[100]
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
end
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = bio/db/kegg/common.rb - Common methods for KEGG database classes
|
|
3
|
+
#
|
|
4
|
+
# Copyright:: Copyright (C) 2003-2007 Toshiaki Katayama <k@bioruby.org>
|
|
5
|
+
# Copyright:: Copyright (C) 2003 Masumi Itoh <m@bioruby.org>
|
|
6
|
+
# Copyright:: Copyright (C) 2009 Kozo Nishida <kozo-ni@is.naist.jp>
|
|
7
|
+
# License:: The Ruby License
|
|
8
|
+
#
|
|
9
|
+
#
|
|
10
|
+
#
|
|
11
|
+
# == Description
|
|
12
|
+
#
|
|
13
|
+
# Note that the modules in this file are intended to be Bio::KEGG::*
|
|
14
|
+
# internal use only.
|
|
15
|
+
#
|
|
16
|
+
# This file contains modules that implement methods commonly used from
|
|
17
|
+
# KEGG database parser classes.
|
|
18
|
+
#
|
|
19
|
+
|
|
20
|
+
module Bio
|
|
21
|
+
class KEGG
|
|
22
|
+
|
|
23
|
+
# Namespace for methods commonly used in the Bio::KEGG::* classes.
|
|
24
|
+
module Common
|
|
25
|
+
|
|
26
|
+
# The module providing dblinks_as_hash methods.
|
|
27
|
+
#
|
|
28
|
+
# Bio::KEGG::* internal use only.
|
|
29
|
+
module DblinksAsHash
|
|
30
|
+
|
|
31
|
+
# Returns a Hash of the DB name and an Array of entry IDs in
|
|
32
|
+
# DBLINKS field.
|
|
33
|
+
def dblinks_as_hash
|
|
34
|
+
unless defined? @dblinks_as_hash
|
|
35
|
+
hash = {}
|
|
36
|
+
dblinks_as_strings.each do |line|
|
|
37
|
+
db, ids = line.split(/\:\s*/, 2)
|
|
38
|
+
list = ids.split(/\s+/)
|
|
39
|
+
hash[db] = list
|
|
40
|
+
end
|
|
41
|
+
@dblinks_as_hash = hash
|
|
42
|
+
end
|
|
43
|
+
@dblinks_as_hash
|
|
44
|
+
end
|
|
45
|
+
end #module DblinksAsHash
|
|
46
|
+
|
|
47
|
+
# The module providing pathways_as_hash method.
|
|
48
|
+
#
|
|
49
|
+
# Bio::KEGG::* internal use only.
|
|
50
|
+
module PathwaysAsHash
|
|
51
|
+
|
|
52
|
+
# Returns a Hash of the pathway ID and name in PATHWAY field.
|
|
53
|
+
def pathways_as_hash
|
|
54
|
+
unless defined? @pathways_as_hash then
|
|
55
|
+
hash = {}
|
|
56
|
+
pathways_as_strings.each do |line|
|
|
57
|
+
sign, entry_id, name = line.split(/\s+/, 3)
|
|
58
|
+
hash[entry_id] = name
|
|
59
|
+
end
|
|
60
|
+
@pathways_as_hash = hash
|
|
61
|
+
end
|
|
62
|
+
@pathways_as_hash
|
|
63
|
+
end
|
|
64
|
+
end #module PathwaysAsHash
|
|
65
|
+
|
|
66
|
+
# This module provides orthologs_as_hash method.
|
|
67
|
+
#
|
|
68
|
+
# Bio::KEGG::* internal use only.
|
|
69
|
+
module OrthologsAsHash
|
|
70
|
+
|
|
71
|
+
# Returns a Hash of the orthology ID and definition in ORTHOLOGY field.
|
|
72
|
+
def orthologs_as_hash
|
|
73
|
+
unless defined? @orthologs_as_hash
|
|
74
|
+
kos = {}
|
|
75
|
+
orthologs_as_strings.each do |ko|
|
|
76
|
+
entry = ko.scan(/K[0-9]{5}/)[0]
|
|
77
|
+
sign, entry_id, definition = ko.split(/\s+/, 3)
|
|
78
|
+
kos[entry_id] = definition
|
|
79
|
+
end
|
|
80
|
+
@orthologs_as_hash = kos
|
|
81
|
+
end
|
|
82
|
+
@orthologs_as_hash
|
|
83
|
+
end
|
|
84
|
+
end #module OrthologsAsHash
|
|
85
|
+
|
|
86
|
+
# This module provides genes_as_hash method.
|
|
87
|
+
#
|
|
88
|
+
# Bio::KEGG::* internal use only.
|
|
89
|
+
module GenesAsHash
|
|
90
|
+
|
|
91
|
+
# Returns a Hash of the organism ID and an Array of entry IDs in
|
|
92
|
+
# GENES field.
|
|
93
|
+
def genes_as_hash
|
|
94
|
+
unless defined? @genes_as_hash
|
|
95
|
+
hash = {}
|
|
96
|
+
genes_as_strings.each do |line|
|
|
97
|
+
name, *list = line.split(/\s+/)
|
|
98
|
+
org = name.downcase.sub(/:/, '')
|
|
99
|
+
genes = list.map {|x| x.sub(/\(.*\)/, '')}
|
|
100
|
+
#names = list.map {|x| x.scan(/.*\((.*)\)/)}
|
|
101
|
+
hash[org] = genes
|
|
102
|
+
end
|
|
103
|
+
@genes_as_hash = hash
|
|
104
|
+
end
|
|
105
|
+
@genes_as_hash
|
|
106
|
+
end
|
|
107
|
+
end #module GenesAsHash
|
|
108
|
+
|
|
109
|
+
end #module Common
|
|
110
|
+
end #class KEGG
|
|
111
|
+
end #module Bio
|
|
112
|
+
|