bio 1.3.1 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +2105 -3728
- data/KNOWN_ISSUES.rdoc +35 -3
- data/README.rdoc +8 -2
- data/RELEASE_NOTES.rdoc +166 -0
- data/bin/bioruby +4 -1
- data/bioruby.gemspec +146 -1
- data/bioruby.gemspec.erb +3 -1
- data/doc/ChangeLog-before-1.3.1 +3961 -0
- data/doc/Tutorial.rd +154 -22
- data/doc/Tutorial.rd.html +125 -68
- data/lib/bio.rb +21 -6
- data/lib/bio/appl/bl2seq/report.rb +11 -202
- data/lib/bio/appl/blast/format0.rb +0 -193
- data/lib/bio/appl/blast/report.rb +2 -147
- data/lib/bio/appl/blast/wublast.rb +0 -208
- data/lib/bio/appl/fasta.rb +4 -19
- data/lib/bio/appl/fasta/format10.rb +0 -14
- data/lib/bio/appl/genscan/report.rb +0 -176
- data/lib/bio/appl/hmmer.rb +1 -15
- data/lib/bio/appl/hmmer/report.rb +0 -100
- data/lib/bio/appl/meme/mast.rb +156 -0
- data/lib/bio/appl/meme/mast/report.rb +91 -0
- data/lib/bio/appl/meme/motif.rb +48 -0
- data/lib/bio/appl/psort.rb +0 -111
- data/lib/bio/appl/psort/report.rb +1 -45
- data/lib/bio/appl/pts1.rb +2 -4
- data/lib/bio/appl/sosui/report.rb +5 -54
- data/lib/bio/appl/targetp/report.rb +1 -104
- data/lib/bio/appl/tmhmm/report.rb +0 -36
- data/lib/bio/command.rb +94 -10
- data/lib/bio/data/aa.rb +1 -77
- data/lib/bio/data/codontable.rb +1 -95
- data/lib/bio/data/na.rb +1 -26
- data/lib/bio/db/aaindex.rb +1 -38
- data/lib/bio/db/fasta.rb +1 -134
- data/lib/bio/db/fasta/format_qual.rb +204 -0
- data/lib/bio/db/fasta/qual.rb +102 -0
- data/lib/bio/db/fastq.rb +645 -0
- data/lib/bio/db/fastq/fastq_to_biosequence.rb +40 -0
- data/lib/bio/db/fastq/format_fastq.rb +175 -0
- data/lib/bio/db/genbank/genbank.rb +1 -86
- data/lib/bio/db/gff.rb +0 -17
- data/lib/bio/db/go.rb +4 -72
- data/lib/bio/db/kegg/common.rb +112 -0
- data/lib/bio/db/kegg/compound.rb +29 -20
- data/lib/bio/db/kegg/drug.rb +74 -34
- data/lib/bio/db/kegg/enzyme.rb +26 -5
- data/lib/bio/db/kegg/genes.rb +128 -15
- data/lib/bio/db/kegg/genome.rb +3 -41
- data/lib/bio/db/kegg/glycan.rb +19 -24
- data/lib/bio/db/kegg/orthology.rb +16 -56
- data/lib/bio/db/kegg/reaction.rb +81 -28
- data/lib/bio/db/kegg/taxonomy.rb +1 -52
- data/lib/bio/db/litdb.rb +1 -16
- data/lib/bio/db/phyloxml/phyloxml.xsd +582 -0
- data/lib/bio/db/phyloxml/phyloxml_elements.rb +1174 -0
- data/lib/bio/db/phyloxml/phyloxml_parser.rb +954 -0
- data/lib/bio/db/phyloxml/phyloxml_writer.rb +228 -0
- data/lib/bio/db/prosite.rb +2 -95
- data/lib/bio/db/rebase.rb +5 -6
- data/lib/bio/db/sanger_chromatogram/abif.rb +120 -0
- data/lib/bio/db/sanger_chromatogram/chromatogram.rb +133 -0
- data/lib/bio/db/sanger_chromatogram/chromatogram_to_biosequence.rb +32 -0
- data/lib/bio/db/sanger_chromatogram/scf.rb +210 -0
- data/lib/bio/io/das.rb +0 -44
- data/lib/bio/io/ddbjxml.rb +1 -181
- data/lib/bio/io/flatfile.rb +1 -7
- data/lib/bio/io/flatfile/autodetection.rb +6 -0
- data/lib/bio/io/keggapi.rb +0 -442
- data/lib/bio/io/ncbirest.rb +130 -132
- data/lib/bio/io/ncbisoap.rb +2 -1
- data/lib/bio/io/pubmed.rb +0 -88
- data/lib/bio/location.rb +0 -73
- data/lib/bio/pathway.rb +0 -171
- data/lib/bio/sequence.rb +18 -1
- data/lib/bio/sequence/adapter.rb +3 -0
- data/lib/bio/sequence/format.rb +16 -0
- data/lib/bio/sequence/quality_score.rb +205 -0
- data/lib/bio/tree.rb +70 -5
- data/lib/bio/util/restriction_enzyme/single_strand.rb +3 -2
- data/lib/bio/util/sirna.rb +1 -23
- data/lib/bio/version.rb +1 -1
- data/sample/demo_aaindex.rb +67 -0
- data/sample/demo_aminoacid.rb +101 -0
- data/sample/demo_bl2seq_report.rb +220 -0
- data/sample/demo_blast_report.rb +285 -0
- data/sample/demo_codontable.rb +119 -0
- data/sample/demo_das.rb +105 -0
- data/sample/demo_ddbjxml.rb +212 -0
- data/sample/demo_fasta_remote.rb +51 -0
- data/sample/demo_fastaformat.rb +105 -0
- data/sample/demo_genbank.rb +132 -0
- data/sample/demo_genscan_report.rb +202 -0
- data/sample/demo_gff1.rb +49 -0
- data/sample/demo_go.rb +98 -0
- data/sample/demo_hmmer_report.rb +149 -0
- data/sample/demo_kegg_compound.rb +57 -0
- data/sample/demo_kegg_drug.rb +65 -0
- data/sample/demo_kegg_genome.rb +74 -0
- data/sample/demo_kegg_glycan.rb +72 -0
- data/sample/demo_kegg_orthology.rb +62 -0
- data/sample/demo_kegg_reaction.rb +66 -0
- data/sample/demo_kegg_taxonomy.rb +92 -0
- data/sample/demo_keggapi.rb +502 -0
- data/sample/demo_litdb.rb +42 -0
- data/sample/demo_locations.rb +99 -0
- data/sample/demo_ncbi_rest.rb +130 -0
- data/sample/demo_nucleicacid.rb +49 -0
- data/sample/demo_pathway.rb +196 -0
- data/sample/demo_prosite.rb +120 -0
- data/sample/demo_psort.rb +138 -0
- data/sample/demo_psort_report.rb +70 -0
- data/sample/demo_pubmed.rb +118 -0
- data/sample/demo_sirna.rb +63 -0
- data/sample/demo_sosui_report.rb +89 -0
- data/sample/demo_targetp_report.rb +135 -0
- data/sample/demo_tmhmm_report.rb +68 -0
- data/sample/pmfetch.rb +13 -4
- data/sample/pmsearch.rb +15 -4
- data/sample/test_phyloxml_big.rb +205 -0
- data/test/bioruby_test_helper.rb +61 -0
- data/test/data/KEGG/1.1.1.1.enzyme +935 -0
- data/test/data/KEGG/C00025.compound +102 -0
- data/test/data/KEGG/D00063.drug +104 -0
- data/test/data/KEGG/G00024.glycan +47 -0
- data/test/data/KEGG/G01366.glycan +18 -0
- data/test/data/KEGG/K02338.orthology +902 -0
- data/test/data/KEGG/R00006.reaction +14 -0
- data/test/data/fastq/README.txt +109 -0
- data/test/data/fastq/error_diff_ids.fastq +20 -0
- data/test/data/fastq/error_double_qual.fastq +22 -0
- data/test/data/fastq/error_double_seq.fastq +22 -0
- data/test/data/fastq/error_long_qual.fastq +20 -0
- data/test/data/fastq/error_no_qual.fastq +20 -0
- data/test/data/fastq/error_qual_del.fastq +20 -0
- data/test/data/fastq/error_qual_escape.fastq +20 -0
- data/test/data/fastq/error_qual_null.fastq +0 -0
- data/test/data/fastq/error_qual_space.fastq +21 -0
- data/test/data/fastq/error_qual_tab.fastq +21 -0
- data/test/data/fastq/error_qual_unit_sep.fastq +20 -0
- data/test/data/fastq/error_qual_vtab.fastq +20 -0
- data/test/data/fastq/error_short_qual.fastq +20 -0
- data/test/data/fastq/error_spaces.fastq +20 -0
- data/test/data/fastq/error_tabs.fastq +21 -0
- data/test/data/fastq/error_trunc_at_plus.fastq +19 -0
- data/test/data/fastq/error_trunc_at_qual.fastq +19 -0
- data/test/data/fastq/error_trunc_at_seq.fastq +18 -0
- data/test/data/fastq/error_trunc_in_plus.fastq +19 -0
- data/test/data/fastq/error_trunc_in_qual.fastq +20 -0
- data/test/data/fastq/error_trunc_in_seq.fastq +18 -0
- data/test/data/fastq/error_trunc_in_title.fastq +17 -0
- data/test/data/fastq/illumina_full_range_as_illumina.fastq +8 -0
- data/test/data/fastq/illumina_full_range_as_sanger.fastq +8 -0
- data/test/data/fastq/illumina_full_range_as_solexa.fastq +8 -0
- data/test/data/fastq/illumina_full_range_original_illumina.fastq +8 -0
- data/test/data/fastq/longreads_as_illumina.fastq +40 -0
- data/test/data/fastq/longreads_as_sanger.fastq +40 -0
- data/test/data/fastq/longreads_as_solexa.fastq +40 -0
- data/test/data/fastq/longreads_original_sanger.fastq +120 -0
- data/test/data/fastq/misc_dna_as_illumina.fastq +16 -0
- data/test/data/fastq/misc_dna_as_sanger.fastq +16 -0
- data/test/data/fastq/misc_dna_as_solexa.fastq +16 -0
- data/test/data/fastq/misc_dna_original_sanger.fastq +16 -0
- data/test/data/fastq/misc_rna_as_illumina.fastq +16 -0
- data/test/data/fastq/misc_rna_as_sanger.fastq +16 -0
- data/test/data/fastq/misc_rna_as_solexa.fastq +16 -0
- data/test/data/fastq/misc_rna_original_sanger.fastq +16 -0
- data/test/data/fastq/sanger_full_range_as_illumina.fastq +8 -0
- data/test/data/fastq/sanger_full_range_as_sanger.fastq +8 -0
- data/test/data/fastq/sanger_full_range_as_solexa.fastq +8 -0
- data/test/data/fastq/sanger_full_range_original_sanger.fastq +8 -0
- data/test/data/fastq/solexa_full_range_as_illumina.fastq +8 -0
- data/test/data/fastq/solexa_full_range_as_sanger.fastq +8 -0
- data/test/data/fastq/solexa_full_range_as_solexa.fastq +8 -0
- data/test/data/fastq/solexa_full_range_original_solexa.fastq +8 -0
- data/test/data/fastq/wrapping_as_illumina.fastq +12 -0
- data/test/data/fastq/wrapping_as_sanger.fastq +12 -0
- data/test/data/fastq/wrapping_as_solexa.fastq +12 -0
- data/test/data/fastq/wrapping_original_sanger.fastq +24 -0
- data/test/data/meme/db +0 -0
- data/test/data/meme/mast +0 -0
- data/test/data/meme/mast.out +13 -0
- data/test/data/meme/meme.out +3 -0
- data/test/data/phyloxml/apaf.xml +666 -0
- data/test/data/phyloxml/bcl_2.xml +2097 -0
- data/test/data/phyloxml/made_up.xml +144 -0
- data/test/data/phyloxml/ncbi_taxonomy_mollusca_short.xml +65 -0
- data/test/data/phyloxml/phyloxml_examples.xml +415 -0
- data/test/data/sanger_chromatogram/test_chromatogram_abif.ab1 +0 -0
- data/test/data/sanger_chromatogram/test_chromatogram_scf_v2.scf +0 -0
- data/test/data/sanger_chromatogram/test_chromatogram_scf_v3.scf +0 -0
- data/test/functional/bio/appl/test_pts1.rb +7 -5
- data/test/functional/bio/io/test_ensembl.rb +4 -3
- data/test/functional/bio/io/test_pubmed.rb +9 -3
- data/test/functional/bio/io/test_soapwsdl.rb +5 -4
- data/test/functional/bio/io/test_togows.rb +5 -4
- data/test/functional/bio/sequence/test_output_embl.rb +6 -4
- data/test/functional/bio/test_command.rb +54 -5
- data/test/runner.rb +5 -3
- data/test/unit/bio/appl/bl2seq/test_report.rb +5 -4
- data/test/unit/bio/appl/blast/test_ncbioptions.rb +4 -2
- data/test/unit/bio/appl/blast/test_report.rb +5 -4
- data/test/unit/bio/appl/blast/test_rpsblast.rb +5 -4
- data/test/unit/bio/appl/gcg/test_msf.rb +5 -5
- data/test/unit/bio/appl/genscan/test_report.rb +8 -9
- data/test/unit/bio/appl/hmmer/test_report.rb +5 -4
- data/test/unit/bio/appl/iprscan/test_report.rb +6 -5
- data/test/unit/bio/appl/mafft/test_report.rb +6 -5
- data/test/unit/bio/appl/meme/mast/test_report.rb +46 -0
- data/test/unit/bio/appl/meme/test_mast.rb +103 -0
- data/test/unit/bio/appl/meme/test_motif.rb +38 -0
- data/test/unit/bio/appl/paml/codeml/test_rates.rb +5 -4
- data/test/unit/bio/appl/paml/codeml/test_report.rb +5 -4
- data/test/unit/bio/appl/paml/test_codeml.rb +5 -4
- data/test/unit/bio/appl/sim4/test_report.rb +5 -4
- data/test/unit/bio/appl/sosui/test_report.rb +6 -5
- data/test/unit/bio/appl/targetp/test_report.rb +5 -3
- data/test/unit/bio/appl/test_blast.rb +5 -4
- data/test/unit/bio/appl/test_fasta.rb +4 -2
- data/test/unit/bio/appl/test_pts1.rb +4 -2
- data/test/unit/bio/appl/tmhmm/test_report.rb +6 -5
- data/test/unit/bio/data/test_aa.rb +5 -3
- data/test/unit/bio/data/test_codontable.rb +5 -4
- data/test/unit/bio/data/test_na.rb +5 -3
- data/test/unit/bio/db/biosql/tc_biosql.rb +5 -1
- data/test/unit/bio/db/embl/test_common.rb +4 -2
- data/test/unit/bio/db/embl/test_embl.rb +6 -6
- data/test/unit/bio/db/embl/test_embl_rel89.rb +6 -6
- data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +7 -8
- data/test/unit/bio/db/embl/test_sptr.rb +6 -8
- data/test/unit/bio/db/embl/test_uniprot.rb +6 -5
- data/test/unit/bio/db/fasta/test_format_qual.rb +346 -0
- data/test/unit/bio/db/kegg/test_compound.rb +146 -0
- data/test/unit/bio/db/kegg/test_drug.rb +194 -0
- data/test/unit/bio/db/kegg/test_enzyme.rb +241 -0
- data/test/unit/bio/db/kegg/test_genes.rb +32 -4
- data/test/unit/bio/db/kegg/test_glycan.rb +260 -0
- data/test/unit/bio/db/kegg/test_orthology.rb +50 -0
- data/test/unit/bio/db/kegg/test_reaction.rb +96 -0
- data/test/unit/bio/db/pdb/test_pdb.rb +4 -2
- data/test/unit/bio/db/sanger_chromatogram/test_abif.rb +76 -0
- data/test/unit/bio/db/sanger_chromatogram/test_scf.rb +98 -0
- data/test/unit/bio/db/test_aaindex.rb +6 -6
- data/test/unit/bio/db/test_fasta.rb +5 -46
- data/test/unit/bio/db/test_fastq.rb +829 -0
- data/test/unit/bio/db/test_gff.rb +4 -2
- data/test/unit/bio/db/test_lasergene.rb +7 -5
- data/test/unit/bio/db/test_medline.rb +4 -2
- data/test/unit/bio/db/test_newick.rb +6 -6
- data/test/unit/bio/db/test_nexus.rb +4 -2
- data/test/unit/bio/db/test_phyloxml.rb +769 -0
- data/test/unit/bio/db/test_phyloxml_writer.rb +328 -0
- data/test/unit/bio/db/test_prosite.rb +6 -5
- data/test/unit/bio/db/test_qual.rb +63 -0
- data/test/unit/bio/db/test_rebase.rb +5 -3
- data/test/unit/bio/db/test_soft.rb +7 -6
- data/test/unit/bio/io/flatfile/test_autodetection.rb +6 -7
- data/test/unit/bio/io/flatfile/test_buffer.rb +6 -5
- data/test/unit/bio/io/flatfile/test_splitter.rb +4 -4
- data/test/unit/bio/io/test_ddbjxml.rb +4 -3
- data/test/unit/bio/io/test_ensembl.rb +5 -3
- data/test/unit/bio/io/test_fastacmd.rb +4 -3
- data/test/unit/bio/io/test_flatfile.rb +6 -5
- data/test/unit/bio/io/test_soapwsdl.rb +4 -3
- data/test/unit/bio/io/test_togows.rb +4 -2
- data/test/unit/bio/sequence/test_aa.rb +5 -3
- data/test/unit/bio/sequence/test_common.rb +4 -2
- data/test/unit/bio/sequence/test_compat.rb +4 -2
- data/test/unit/bio/sequence/test_dblink.rb +5 -3
- data/test/unit/bio/sequence/test_na.rb +4 -2
- data/test/unit/bio/sequence/test_quality_score.rb +330 -0
- data/test/unit/bio/shell/plugin/test_seq.rb +5 -3
- data/test/unit/bio/test_alignment.rb +5 -3
- data/test/unit/bio/test_command.rb +4 -3
- data/test/unit/bio/test_db.rb +5 -3
- data/test/unit/bio/test_feature.rb +4 -2
- data/test/unit/bio/test_location.rb +4 -2
- data/test/unit/bio/test_map.rb +5 -3
- data/test/unit/bio/test_pathway.rb +4 -2
- data/test/unit/bio/test_reference.rb +4 -2
- data/test/unit/bio/test_sequence.rb +5 -3
- data/test/unit/bio/test_shell.rb +5 -3
- data/test/unit/bio/test_tree.rb +6 -6
- data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +17 -13
- data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +17 -13
- data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +4 -2
- data/test/unit/bio/util/test_color_scheme.rb +5 -3
- data/test/unit/bio/util/test_contingency_table.rb +5 -3
- data/test/unit/bio/util/test_restriction_enzyme.rb +4 -2
- data/test/unit/bio/util/test_sirna.rb +6 -4
- metadata +147 -2
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = bio/db/sanger_chromatogram/chromatogram.rb - Sanger Chromatogram class
|
|
3
|
+
#
|
|
4
|
+
# Copyright:: Copyright (C) 2009 Anthony Underwood <anthony.underwood@hpa.org.uk>, <email2ants@gmail.com>
|
|
5
|
+
# License:: The Ruby License
|
|
6
|
+
#
|
|
7
|
+
# $Id:$
|
|
8
|
+
#
|
|
9
|
+
require 'bio/sequence/adapter'
|
|
10
|
+
module Bio
|
|
11
|
+
# == Description
|
|
12
|
+
#
|
|
13
|
+
# This is the Superclass for the Abif and Scf classes that allow importing of the common scf
|
|
14
|
+
# and abi sequence chromatogram formats
|
|
15
|
+
# The following attributes are Common to both the Abif and Scf subclasses
|
|
16
|
+
#
|
|
17
|
+
# * *chromatogram_type* (String): This is extracted from the chromatogram file itself and will
|
|
18
|
+
# probably be either .scf or ABIF for Scf and Abif files respectively.
|
|
19
|
+
# * *version* (String): The version of the Scf or Abif file
|
|
20
|
+
# * *sequence* (String): the sequence contained within the chromatogram as a string.
|
|
21
|
+
# * *qualities* (Array): the quality scores of each base as an array of integers. These will
|
|
22
|
+
# probably be phred scores.
|
|
23
|
+
# * *peak_indices* (Array): if the sequence traces contained within the chromatogram are imagined
|
|
24
|
+
# as being plotted on an x,y graph, the peak indices are the x positions of the peaks that
|
|
25
|
+
# represent the nucleotides bases found in the sequence from the chromatogram. For example if
|
|
26
|
+
# the peak_indices are [16,24,37,49 ....] and the sequence is AGGT...., at position 16 the
|
|
27
|
+
# traces in the chromatogram were base-called as an A, position 24 a G, position 37 a G,
|
|
28
|
+
# position 49 a T etc
|
|
29
|
+
# * *atrace*, *ctrace*, *gtrace*, *ttrace* (Array): If the sequence traces contained within
|
|
30
|
+
# the chromatogram are imagined as being plotted on an x,y graph, these attributes are arrays of
|
|
31
|
+
# y positions for each of the 4 nucleotide bases along the length of the x axis. If these were
|
|
32
|
+
# plotted joined by lines of different colours then the resulting graph should look like the
|
|
33
|
+
# original chromatogram file when viewed in a chromtogram viewer such as Chromas, 4Peaks or
|
|
34
|
+
# FinchTV.
|
|
35
|
+
# * *dye_mobility* (String): The mobility of the dye used when sequencing. This can influence the
|
|
36
|
+
# base calling
|
|
37
|
+
#
|
|
38
|
+
# == Usage
|
|
39
|
+
# filename = "path/to/sequence_chromatogram_file"
|
|
40
|
+
#
|
|
41
|
+
# for Abif files
|
|
42
|
+
# chromatogram_ff = Bio::Abif.open(filename)
|
|
43
|
+
# for Scf files
|
|
44
|
+
# chromatogram_ff = Bio::Scf.open(filename)
|
|
45
|
+
#
|
|
46
|
+
# chromatogram = chromatogram_ff.next_entry
|
|
47
|
+
# chromatogram.to_seq # => returns a Bio::Sequence object
|
|
48
|
+
# chromatogram.sequence # => returns the sequence contained within the chromatogram as a string
|
|
49
|
+
# chromatogram.qualities # => returns an array of quality values for each base
|
|
50
|
+
# chromatogram.atrace # => returns an array of the a trace y positions
|
|
51
|
+
#
|
|
52
|
+
class SangerChromatogram
|
|
53
|
+
# The type of chromatogram file .scf for Scf files and ABIF doe Abif files
|
|
54
|
+
attr_accessor :chromatogram_type
|
|
55
|
+
# The Version of the Scf or Abif file (String)
|
|
56
|
+
attr_accessor :version
|
|
57
|
+
# The sequence contained within the chromatogram (String)
|
|
58
|
+
attr_accessor :sequence
|
|
59
|
+
# An array of quality scores for each base in the sequence (Array)
|
|
60
|
+
attr_accessor :qualities
|
|
61
|
+
# An array 'x' positions (see description) on the trace where the bases occur/have been called (Array)
|
|
62
|
+
attr_accessor :peak_indices
|
|
63
|
+
# An array of 'y' positions (see description) for the 'A' trace from the chromatogram (Array
|
|
64
|
+
attr_accessor :atrace
|
|
65
|
+
# An array of 'y' positions (see description) for the 'C' trace from the chromatogram (Array
|
|
66
|
+
attr_accessor :ctrace
|
|
67
|
+
# An array of 'y' positions (see description) for the 'G' trace from the chromatogram (Array
|
|
68
|
+
attr_accessor :gtrace
|
|
69
|
+
# An array of 'y' positions (see description) for the 'T' trace from the chromatogram (Array
|
|
70
|
+
attr_accessor :ttrace
|
|
71
|
+
#The mobility of the dye used when sequencing (String)
|
|
72
|
+
attr_accessor :dye_mobility
|
|
73
|
+
|
|
74
|
+
def self.open(filename)
|
|
75
|
+
Bio::FlatFile.open(self, filename)
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Returns a Bio::Sequence::NA object based on the sequence from the chromatogram
|
|
79
|
+
def seq
|
|
80
|
+
Bio::Sequence::NA.new(@sequence)
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Returns a Bio::Sequence object based on the sequence from the chromatogram
|
|
84
|
+
def to_biosequence
|
|
85
|
+
Bio::Sequence.adapter(self, Bio::Sequence::Adapter::SangerChromatogram)
|
|
86
|
+
end
|
|
87
|
+
alias :to_seq :to_biosequence
|
|
88
|
+
|
|
89
|
+
# Returns the sequence from the chromatogram as a string
|
|
90
|
+
def sequence_string
|
|
91
|
+
@sequence
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# Reverses and complements the current chromatogram object including its sequence, traces
|
|
95
|
+
# and qualities
|
|
96
|
+
def complement!
|
|
97
|
+
# reverse traces
|
|
98
|
+
tmp_trace = @atrace
|
|
99
|
+
@atrace = @ttrace.reverse
|
|
100
|
+
@ttrace = tmp_trace.reverse
|
|
101
|
+
tmp_trace = @ctrace
|
|
102
|
+
@ctrace = @gtrace.reverse
|
|
103
|
+
@gtrace = tmp_trace.reverse
|
|
104
|
+
|
|
105
|
+
# reverse base qualities
|
|
106
|
+
if !@aqual.nil? # if qualities exist
|
|
107
|
+
tmp_qual = @aqual
|
|
108
|
+
@aqual = @tqual.reverse
|
|
109
|
+
@tqual = tmp_qual.reverse
|
|
110
|
+
tmp_qual = @cqual
|
|
111
|
+
@cqual = @gqual.reverse
|
|
112
|
+
@gqual = tmp_qual.reverse
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
#reverse qualities
|
|
116
|
+
@qualities = @qualities.reverse
|
|
117
|
+
|
|
118
|
+
#reverse peak indices
|
|
119
|
+
@peak_indices = @peak_indices.map{|index| @atrace.size - index}
|
|
120
|
+
@peak_indices.reverse!
|
|
121
|
+
|
|
122
|
+
# reverse sequence
|
|
123
|
+
@sequence = @sequence.reverse.tr('atgcnrykmswbvdh','tacgnyrmkswvbhd')
|
|
124
|
+
end
|
|
125
|
+
# Returns a new chromatogram object of the appropriate subclass (scf or abi) where the
|
|
126
|
+
# sequence, traces and qualities have all been revesed and complemented
|
|
127
|
+
def complement
|
|
128
|
+
chromatogram = self.dup
|
|
129
|
+
chromatogram.complement!
|
|
130
|
+
return chromatogram
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
end
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = bio/db/sanger_chromatogram/chromatogram_to_biosequence.rb - Bio::SangerChromatogram to Bio::Sequence adapter module
|
|
3
|
+
#
|
|
4
|
+
# Copyright:: Copyright (C) 2009 Anthony Underwood <anthony.underwood@hpa.org.uk>, <email2ants@gmail.com>
|
|
5
|
+
# License:: The Ruby License
|
|
6
|
+
#
|
|
7
|
+
# $Id:$
|
|
8
|
+
#
|
|
9
|
+
|
|
10
|
+
require 'bio/sequence'
|
|
11
|
+
require 'bio/sequence/adapter'
|
|
12
|
+
|
|
13
|
+
# Internal use only. Normal users should not use this module.
|
|
14
|
+
#
|
|
15
|
+
# Bio::SangerChromatogram to Bio::Sequence adapter module.
|
|
16
|
+
# It is internally used in Bio::SangerChromatogram#to_biosequence.
|
|
17
|
+
#
|
|
18
|
+
module Bio::Sequence::Adapter::SangerChromatogram
|
|
19
|
+
|
|
20
|
+
extend Bio::Sequence::Adapter
|
|
21
|
+
|
|
22
|
+
private
|
|
23
|
+
|
|
24
|
+
def_biosequence_adapter :seq
|
|
25
|
+
|
|
26
|
+
# primary accession
|
|
27
|
+
def_biosequence_adapter :primary_accession do |orig|
|
|
28
|
+
orig.version
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
end #module Bio::Sequence::Adapter::SangerChromatogram
|
|
32
|
+
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = bio/db/sanger_chromatogram/scf.rb - Scf class
|
|
3
|
+
#
|
|
4
|
+
# Copyright:: Copyright (C) 2009 Anthony Underwood <anthony.underwood@hpa.org.uk>, <email2ants@gmail.com>
|
|
5
|
+
# License:: The Ruby License
|
|
6
|
+
#
|
|
7
|
+
|
|
8
|
+
require 'bio/db/sanger_chromatogram/chromatogram'
|
|
9
|
+
|
|
10
|
+
module Bio
|
|
11
|
+
# == Description
|
|
12
|
+
#
|
|
13
|
+
# This class inherits from the SangerChromatogram superclass. It captures the information contained
|
|
14
|
+
# within an scf format chromatogram file generated by DNA sequencing. See the SangerChromatogram class
|
|
15
|
+
# for usage
|
|
16
|
+
class Scf < SangerChromatogram
|
|
17
|
+
# sequence attributes
|
|
18
|
+
|
|
19
|
+
# The quality of each base at each position along the length of the sequence is captured
|
|
20
|
+
# by the nqual attributes where n is one of a, c, g or t. Generally the quality will be
|
|
21
|
+
# high for the base that is called at a particular position and low for all the other bases.
|
|
22
|
+
# However at positions of poor sequence quality, more than one base may have similar top scores.
|
|
23
|
+
# By analysing the nqual attributes it may be possible to determine if the base calling was
|
|
24
|
+
# correct or not.
|
|
25
|
+
# The quality of the A base at each sequence position
|
|
26
|
+
attr_accessor :aqual
|
|
27
|
+
# The quality of the C base at each sequence position
|
|
28
|
+
attr_accessor :cqual
|
|
29
|
+
# The quality of the G base at each sequence position
|
|
30
|
+
attr_accessor :gqual
|
|
31
|
+
# The quality of the T base at each sequence position
|
|
32
|
+
attr_accessor :tqual
|
|
33
|
+
# A hash of extra information extracted from the chromatogram file
|
|
34
|
+
attr_accessor :comments
|
|
35
|
+
|
|
36
|
+
# see SangerChromatogram class for how to create an Scf object and its usage
|
|
37
|
+
def initialize(string)
|
|
38
|
+
header = string.slice(0,128)
|
|
39
|
+
# read in header info
|
|
40
|
+
@chromatogram_type, @samples, @sample_offset, @bases, @bases_left_clip, @bases_right_clip, @bases_offset, @comment_size, @comments_offset, @version, @sample_size, @code_set, @header_spare = header.unpack("a4 NNNNNNNN a4 NN N20")
|
|
41
|
+
get_traces(string)
|
|
42
|
+
get_bases_peakIndices_and_qualities(string)
|
|
43
|
+
get_comments(string)
|
|
44
|
+
if @comments["DYEP"]
|
|
45
|
+
@dye_mobility = @comments["DYEP"]
|
|
46
|
+
else
|
|
47
|
+
@dye_mobility = "Unnown"
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
private
|
|
52
|
+
|
|
53
|
+
def get_traces(string)
|
|
54
|
+
if @version == "3.00"
|
|
55
|
+
# read in trace info
|
|
56
|
+
offset = @sample_offset
|
|
57
|
+
length = @samples * @sample_size
|
|
58
|
+
# determine whether the data is stored in 1 byte as an unsigned byte or 2 bytes as an unsigned short
|
|
59
|
+
@sample_size == 2 ? byte = "n" : byte = "c"
|
|
60
|
+
for base in ["a" , "c" , "g" , "t"]
|
|
61
|
+
trace_read = string.slice(offset,length).unpack("#{byte}#{@samples}")
|
|
62
|
+
# convert offsets
|
|
63
|
+
for sample_num in (0..trace_read.size-1)
|
|
64
|
+
if trace_read[sample_num] > 30000
|
|
65
|
+
trace_read[sample_num] = trace_read[sample_num] - 65536
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
# For 8-bit data we need to emulate a signed/unsigned
|
|
69
|
+
# cast that is implicit in the C implementations.....
|
|
70
|
+
if @sample_size == 1
|
|
71
|
+
for sample_num in (0..trace_read.size-1)
|
|
72
|
+
trace_read[sample_num] += 256 if trace_read[sample_num] < 0
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
trace_read = convert_deltas_to_values(trace_read)
|
|
76
|
+
self.instance_variable_set("@#{base}trace", trace_read)
|
|
77
|
+
offset += length
|
|
78
|
+
end
|
|
79
|
+
elsif @version == "2.00"
|
|
80
|
+
@atrace = []
|
|
81
|
+
@ctrace = []
|
|
82
|
+
@gtrace = []
|
|
83
|
+
@ttrace = []
|
|
84
|
+
# read in trace info
|
|
85
|
+
offset = @sample_offset
|
|
86
|
+
length = @samples * @sample_size * 4
|
|
87
|
+
# determine whether the data is stored in 1 byte as an unsigned byte or 2 bytes as an unsigned short
|
|
88
|
+
@sample_size == 2 ? byte = "n" : byte = "c"
|
|
89
|
+
trace_read = string.slice(offset,length).unpack("#{byte}#{@samples*4}")
|
|
90
|
+
(0..(@samples-1)*4).step(4) do |offset2|
|
|
91
|
+
@atrace << trace_read[offset2]
|
|
92
|
+
@ctrace << trace_read[offset2+1]
|
|
93
|
+
@gtrace << trace_read[offset2+2]
|
|
94
|
+
@ttrace << trace_read[offset2+3]
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
def get_bases_peakIndices_and_qualities(string)
|
|
99
|
+
if @version == "3.00"
|
|
100
|
+
# now go and get the peak index information
|
|
101
|
+
offset = @bases_offset
|
|
102
|
+
length = @bases * 4
|
|
103
|
+
get_v3_peak_indices(string,offset,length)
|
|
104
|
+
|
|
105
|
+
# now go and get the accuracy information
|
|
106
|
+
offset += length;
|
|
107
|
+
get_v3_accuracies(string,offset,length)
|
|
108
|
+
|
|
109
|
+
# OK, now go and get the base information.
|
|
110
|
+
offset += length;
|
|
111
|
+
length = @bases;
|
|
112
|
+
get_v3_sequence(string,offset,length)
|
|
113
|
+
|
|
114
|
+
#combine accuracies to get quality scores
|
|
115
|
+
@qualities= convert_accuracies_to_qualities
|
|
116
|
+
elsif @version == "2.00"
|
|
117
|
+
@peak_indices = []
|
|
118
|
+
@aqual = []
|
|
119
|
+
@cqual = []
|
|
120
|
+
@gqual = []
|
|
121
|
+
@tqual = []
|
|
122
|
+
@qualities = []
|
|
123
|
+
@sequence = ""
|
|
124
|
+
# now go and get the base information
|
|
125
|
+
offset = @bases_offset
|
|
126
|
+
length = @bases * 12
|
|
127
|
+
all_bases_info = string.slice(offset,length)
|
|
128
|
+
|
|
129
|
+
(0..length-1).step(12) do |offset2|
|
|
130
|
+
base_info = all_bases_info.slice(offset2,12).unpack("N C C C C a C3")
|
|
131
|
+
@peak_indices << base_info[0]
|
|
132
|
+
@aqual << base_info[1]
|
|
133
|
+
@cqual << base_info[2]
|
|
134
|
+
@gqual << base_info[3]
|
|
135
|
+
@tqual << base_info[4]
|
|
136
|
+
@sequence += base_info[5].downcase
|
|
137
|
+
case base_info[5].downcase
|
|
138
|
+
when "a"
|
|
139
|
+
@qualities << base_info[1]
|
|
140
|
+
when "c"
|
|
141
|
+
@qualities << base_info[2]
|
|
142
|
+
when "g"
|
|
143
|
+
@qualities << base_info[3]
|
|
144
|
+
when "t"
|
|
145
|
+
@qualities << base_info[4]
|
|
146
|
+
else
|
|
147
|
+
@qualities << 0
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
def get_v3_peak_indices(string,offset,length)
|
|
153
|
+
@peak_indices = string.slice(offset,length).unpack("N#{length/4}")
|
|
154
|
+
end
|
|
155
|
+
def get_v3_accuracies(string,offset,length)
|
|
156
|
+
qualities = string.slice(offset,length)
|
|
157
|
+
qual_length = length/4;
|
|
158
|
+
qual_offset = 0;
|
|
159
|
+
for base in ["a" , "c" , "g" , "t"]
|
|
160
|
+
self.instance_variable_set("@#{base}qual",qualities.slice(qual_offset,qual_length).unpack("C#{qual_length}"))
|
|
161
|
+
qual_offset += qual_length
|
|
162
|
+
end
|
|
163
|
+
end
|
|
164
|
+
def get_v3_sequence(string,offset,length)
|
|
165
|
+
@sequence = string.slice(offset,length).unpack("a#{length}").join('').downcase
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
def convert_deltas_to_values(trace_read)
|
|
169
|
+
p_sample = 0;
|
|
170
|
+
for sample_num in (0..trace_read.size-1)
|
|
171
|
+
trace_read[sample_num] = trace_read[sample_num] + p_sample
|
|
172
|
+
p_sample = trace_read[sample_num];
|
|
173
|
+
end
|
|
174
|
+
p_sample = 0;
|
|
175
|
+
for sample_num in (0..trace_read.size-1)
|
|
176
|
+
trace_read[sample_num] = trace_read[sample_num] + p_sample
|
|
177
|
+
p_sample = trace_read[sample_num];
|
|
178
|
+
end
|
|
179
|
+
return trace_read
|
|
180
|
+
end
|
|
181
|
+
def convert_accuracies_to_qualities
|
|
182
|
+
qualities = Array.new
|
|
183
|
+
for base_pos in (0..@sequence.length-1)
|
|
184
|
+
case sequence.slice(base_pos,1)
|
|
185
|
+
when "a"
|
|
186
|
+
qualities << @aqual[base_pos]
|
|
187
|
+
when "c"
|
|
188
|
+
qualities << @cqual[base_pos]
|
|
189
|
+
when "g"
|
|
190
|
+
qualities << @gqual[base_pos]
|
|
191
|
+
when "t"
|
|
192
|
+
qualities << @tqual[base_pos]
|
|
193
|
+
else
|
|
194
|
+
qualities << 0
|
|
195
|
+
end
|
|
196
|
+
end
|
|
197
|
+
return qualities
|
|
198
|
+
end
|
|
199
|
+
def get_comments(string)
|
|
200
|
+
@comments = Hash.new
|
|
201
|
+
comment_string = string.slice(@comments_offset,@comment_size)
|
|
202
|
+
comment_string.gsub!(/\0/, "")
|
|
203
|
+
comment_array = comment_string.split("\n")
|
|
204
|
+
comment_array.each do |comment|
|
|
205
|
+
comment =~ /(\w+)=(.*)/
|
|
206
|
+
@comments[$1] = $2
|
|
207
|
+
end
|
|
208
|
+
end
|
|
209
|
+
end
|
|
210
|
+
end
|
data/lib/bio/io/das.rb
CHANGED
|
@@ -415,47 +415,3 @@ end
|
|
|
415
415
|
|
|
416
416
|
end # module Bio
|
|
417
417
|
|
|
418
|
-
|
|
419
|
-
if __FILE__ == $0
|
|
420
|
-
|
|
421
|
-
# begin
|
|
422
|
-
# require 'pp'
|
|
423
|
-
# alias p pp
|
|
424
|
-
# rescue LoadError
|
|
425
|
-
# end
|
|
426
|
-
|
|
427
|
-
puts "### WormBase"
|
|
428
|
-
wormbase = Bio::DAS.new('http://www.wormbase.org/db/')
|
|
429
|
-
|
|
430
|
-
puts ">>> test get_dsn"
|
|
431
|
-
p wormbase.get_dsn
|
|
432
|
-
|
|
433
|
-
puts ">>> create segment obj Bio::DAS::SEGMENT.region('I', 1, 1000)"
|
|
434
|
-
seg = Bio::DAS::SEGMENT.region('I', 1, 1000)
|
|
435
|
-
p seg
|
|
436
|
-
|
|
437
|
-
puts ">>> test get_dna"
|
|
438
|
-
p wormbase.get_dna('elegans', seg)
|
|
439
|
-
|
|
440
|
-
puts "### test get_features"
|
|
441
|
-
p wormbase.get_features('elegans', seg)
|
|
442
|
-
|
|
443
|
-
puts "### KEGG DAS"
|
|
444
|
-
kegg_das = Bio::DAS.new("http://das.hgc.jp/cgi-bin/")
|
|
445
|
-
|
|
446
|
-
dsn_list = kegg_das.get_dsn
|
|
447
|
-
org_list = dsn_list.collect {|x| x.source}
|
|
448
|
-
|
|
449
|
-
puts ">>> dsn : entry_points"
|
|
450
|
-
org_list.each do |org|
|
|
451
|
-
print "#{org} : "
|
|
452
|
-
list = kegg_das.get_entry_points(org)
|
|
453
|
-
list.segments.each do |seg|
|
|
454
|
-
print " #{seg.entry_id}"
|
|
455
|
-
end
|
|
456
|
-
puts
|
|
457
|
-
end
|
|
458
|
-
|
|
459
|
-
end
|
|
460
|
-
|
|
461
|
-
|
data/lib/bio/io/ddbjxml.rb
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
# Toshiaki Katayama <k@bioruby.org>
|
|
6
6
|
# License:: The Ruby License
|
|
7
7
|
#
|
|
8
|
-
# $Id
|
|
8
|
+
# $Id:$
|
|
9
9
|
#
|
|
10
10
|
|
|
11
11
|
require 'bio/io/soapwsdl'
|
|
@@ -456,183 +456,3 @@ end # XML
|
|
|
456
456
|
end # DDBJ
|
|
457
457
|
end # Bio
|
|
458
458
|
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
if __FILE__ == $0
|
|
462
|
-
|
|
463
|
-
begin
|
|
464
|
-
require 'pp'
|
|
465
|
-
alias p pp
|
|
466
|
-
rescue LoadError
|
|
467
|
-
end
|
|
468
|
-
|
|
469
|
-
puts ">>> Bio::DDBJ::XML::Blast"
|
|
470
|
-
serv = Bio::DDBJ::XML::Blast.new
|
|
471
|
-
# serv.log = STDERR
|
|
472
|
-
|
|
473
|
-
query = "MSSRIARALALVVTLLHLTRLALSTCPAACHCPLEAPKCAPGVGLVRDGCGCCKVCAKQL"
|
|
474
|
-
|
|
475
|
-
puts "### searchSimple('blastp', 'SWISS', query)"
|
|
476
|
-
puts serv.searchSimple('blastp', 'SWISS', query)
|
|
477
|
-
|
|
478
|
-
puts "### searchParam('tblastn', 'ddbjvrl', query, '-m 8')"
|
|
479
|
-
puts serv.searchParam('tblastn', 'ddbjvrl', query, '-m 8')
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
puts ">>> Bio::DDBJ::XML::ClustalW"
|
|
483
|
-
serv = Bio::DDBJ::XML::ClustalW.new
|
|
484
|
-
|
|
485
|
-
query = <<END
|
|
486
|
-
> RABSTOUT rabbit Guinness receptor
|
|
487
|
-
LKMHLMGHLKMGLKMGLKGMHLMHLKHMHLMTYTYTTYRRWPLWMWLPDFGHAS
|
|
488
|
-
ADSCVCAHGFAVCACFAHFDVCFGAVCFHAVCFAHVCFAAAVCFAVCAC
|
|
489
|
-
> MUSNOSE mouse nose drying factor
|
|
490
|
-
mhkmmhkgmkhmhgmhmhglhmkmhlkmgkhmgkmkytytytryrwtqtqwtwyt
|
|
491
|
-
fdgfdsgafdagfdgfsagdfavdfdvgavfsvfgvdfsvdgvagvfdv
|
|
492
|
-
> HSHEAVEN human Guinness receptor repeat
|
|
493
|
-
mhkmmhkgmkhmhgmhmhg lhmkmhlkmgkhmgkmk ytytytryrwtqtqwtwyt
|
|
494
|
-
fdgfdsgafdagfdgfsag dfavdfdvgavfsvfgv dfsvdgvagvfdv
|
|
495
|
-
mhkmmhkgmkhmhgmhmhg lhmkmhlkmgkhmgkmk ytytytryrwtqtqwtwyt
|
|
496
|
-
fdgfdsgafdagfdgfsag dfavdfdvgavfsvfgv dfsvdgvagvfdv
|
|
497
|
-
END
|
|
498
|
-
|
|
499
|
-
puts "### analyzeSimple(query)"
|
|
500
|
-
puts serv.analyzeSimple(query)
|
|
501
|
-
|
|
502
|
-
puts "### analyzeParam(query, '-align -matrix=blosum')"
|
|
503
|
-
puts serv.analyzeParam(query, '-align -matrix=blosum')
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
puts ">>> Bio::DDBJ::XML::DDBJ"
|
|
507
|
-
serv = Bio::DDBJ::XML::DDBJ.new
|
|
508
|
-
|
|
509
|
-
puts "### getFFEntry('AB000050')"
|
|
510
|
-
puts serv.getFFEntry('AB000050')
|
|
511
|
-
|
|
512
|
-
puts "### getXMLEntry('AB000050')"
|
|
513
|
-
puts serv.getXMLEntry('AB000050')
|
|
514
|
-
|
|
515
|
-
puts "### getFeatureInfo('AB000050', 'cds')"
|
|
516
|
-
puts serv.getFeatureInfo('AB000050', 'cds')
|
|
517
|
-
|
|
518
|
-
puts "### getAllFeatures('AB000050')"
|
|
519
|
-
puts serv.getAllFeatures('AB000050')
|
|
520
|
-
|
|
521
|
-
puts "### getRelatedFeatures('AL121903', '59000', '64000')"
|
|
522
|
-
puts serv.getRelatedFeatures('AL121903', '59000', '64000')
|
|
523
|
-
|
|
524
|
-
puts "### getRelatedFeaturesSeq('AL121903', '59000', '64000')"
|
|
525
|
-
puts serv.getRelatedFeaturesSeq('AL121903', '59000', '64000')
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
puts ">>> Bio::DDBJ::XML::Fasta"
|
|
529
|
-
serv = Bio::DDBJ::XML::Fasta.new
|
|
530
|
-
|
|
531
|
-
query = ">Test\nMSDGAVQPDG GQPAVRNERA TGSGNGSGGG GGGGSGGVGI"
|
|
532
|
-
|
|
533
|
-
puts "### searchSimple('fasta34', 'PDB', query)"
|
|
534
|
-
puts serv.searchSimple('fasta34', 'PDB', query)
|
|
535
|
-
|
|
536
|
-
query = ">Test\nAGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC"
|
|
537
|
-
|
|
538
|
-
puts "### searchParam('fastx34_t', 'PDB', query, '-n')"
|
|
539
|
-
puts serv.searchParam('fastx34_t', 'PDB', query, '-n')
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
puts ">>> Bio::DDBJ::XML::GetEntry"
|
|
543
|
-
serv = Bio::DDBJ::XML::GetEntry.new
|
|
544
|
-
|
|
545
|
-
puts "### getDDBJEntry('AB000050')"
|
|
546
|
-
puts serv.getDDBJEntry('AB000050')
|
|
547
|
-
|
|
548
|
-
puts "### getPDBEntry('1AAR')"
|
|
549
|
-
puts serv. getPDBEntry('1AAR')
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
puts ">>> Bio::DDBJ::XML::Gib"
|
|
553
|
-
serv = Bio::DDBJ::XML::Gib.new
|
|
554
|
-
|
|
555
|
-
puts "### getOrganismList"
|
|
556
|
-
puts serv.getOrganismList
|
|
557
|
-
|
|
558
|
-
puts "### getChIDList"
|
|
559
|
-
puts serv.getChIDList
|
|
560
|
-
|
|
561
|
-
puts "### getOrganismNameFromChid('Sent_CT18:')"
|
|
562
|
-
puts serv.getOrganismNameFromChid('Sent_CT18:')
|
|
563
|
-
|
|
564
|
-
puts "### getChIDFromOrganismName('Aquifex aeolicus VF5')"
|
|
565
|
-
puts serv.getChIDFromOrganismName('Aquifex aeolicus VF5')
|
|
566
|
-
|
|
567
|
-
puts "### getAccession('Ecol_K12_MG1655:')"
|
|
568
|
-
puts serv.getAccession('Ecol_K12_MG1655:')
|
|
569
|
-
|
|
570
|
-
puts "### getPieceNumber('Mgen_G37:')"
|
|
571
|
-
puts serv.getPieceNumber('Mgen_G37:')
|
|
572
|
-
|
|
573
|
-
puts "### getDivision('Mgen_G37:')"
|
|
574
|
-
puts serv.getDivision('Mgen_G37:')
|
|
575
|
-
|
|
576
|
-
puts "### getType('Mgen_G37:')"
|
|
577
|
-
puts serv.getType('Mgen_G37:')
|
|
578
|
-
|
|
579
|
-
puts "### getCDS('Aaeo_VF5:ece1')"
|
|
580
|
-
puts serv.getCDS('Aaeo_VF5:ece1')
|
|
581
|
-
|
|
582
|
-
puts "### getFlatFile('Nost_PCC7120:pCC7120zeta')"
|
|
583
|
-
puts serv.getFlatFile('Nost_PCC7120:pCC7120zeta')
|
|
584
|
-
|
|
585
|
-
puts "### getFastaFile('Nost_PCC7120:pCC7120zeta')"
|
|
586
|
-
puts serv.getFastaFile('Nost_PCC7120:pCC7120zeta', 'cdsaa')
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
puts ">>> Bio::DDBJ::XML::Gtop"
|
|
590
|
-
serv = Bio::DDBJ::XML::Gtop.new
|
|
591
|
-
|
|
592
|
-
puts "### getOrganismList"
|
|
593
|
-
puts serv.getOrganismList
|
|
594
|
-
|
|
595
|
-
puts "### getMasterInfo"
|
|
596
|
-
puts serv.getMasterInfo('thrA', 'ecol0')
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
# puts ">>> Bio::DDBJ::XML::PML"
|
|
600
|
-
# serv = Bio::DDBJ::XML::PML.new
|
|
601
|
-
#
|
|
602
|
-
# puts "### getVariation('1')"
|
|
603
|
-
# puts serv.getVariation('1')
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
puts ">>> Bio::DDBJ::XML::SRS"
|
|
607
|
-
serv = Bio::DDBJ::XML::SRS.new
|
|
608
|
-
|
|
609
|
-
puts "### searchSimple('[pathway-des:sugar]')"
|
|
610
|
-
puts serv.searchSimple('[pathway-des:sugar]')
|
|
611
|
-
|
|
612
|
-
puts "### searchParam('[swissprot-des:cohesin]', '-f seq -sf fasta')"
|
|
613
|
-
puts serv.searchParam('[swissprot-des:cohesin]', '-f seq -sf fasta')
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
puts ">>> Bio::DDBJ::XML::TxSearch"
|
|
617
|
-
serv = Bio::DDBJ::XML::TxSearch.new
|
|
618
|
-
|
|
619
|
-
puts "### searchSimple('*coli')"
|
|
620
|
-
puts serv.searchSimple('*coli')
|
|
621
|
-
|
|
622
|
-
puts "### searchSimple('*tardigrada*')"
|
|
623
|
-
puts serv.searchSimple('*tardigrada*')
|
|
624
|
-
|
|
625
|
-
puts "### getTxId('Escherichia coli')"
|
|
626
|
-
puts serv.getTxId('Escherichia coli')
|
|
627
|
-
|
|
628
|
-
puts "### getTxName('562')"
|
|
629
|
-
puts serv.getTxName('562')
|
|
630
|
-
|
|
631
|
-
query = "Campylobacter coli\nEscherichia coli"
|
|
632
|
-
rank = "family\ngenus"
|
|
633
|
-
|
|
634
|
-
puts "### searchLineage(query, rank, 'Bacteria')"
|
|
635
|
-
puts serv.searchLineage(query, rank, 'Bacteria')
|
|
636
|
-
|
|
637
|
-
end
|
|
638
|
-
|