bio 1.3.1 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +2105 -3728
- data/KNOWN_ISSUES.rdoc +35 -3
- data/README.rdoc +8 -2
- data/RELEASE_NOTES.rdoc +166 -0
- data/bin/bioruby +4 -1
- data/bioruby.gemspec +146 -1
- data/bioruby.gemspec.erb +3 -1
- data/doc/ChangeLog-before-1.3.1 +3961 -0
- data/doc/Tutorial.rd +154 -22
- data/doc/Tutorial.rd.html +125 -68
- data/lib/bio.rb +21 -6
- data/lib/bio/appl/bl2seq/report.rb +11 -202
- data/lib/bio/appl/blast/format0.rb +0 -193
- data/lib/bio/appl/blast/report.rb +2 -147
- data/lib/bio/appl/blast/wublast.rb +0 -208
- data/lib/bio/appl/fasta.rb +4 -19
- data/lib/bio/appl/fasta/format10.rb +0 -14
- data/lib/bio/appl/genscan/report.rb +0 -176
- data/lib/bio/appl/hmmer.rb +1 -15
- data/lib/bio/appl/hmmer/report.rb +0 -100
- data/lib/bio/appl/meme/mast.rb +156 -0
- data/lib/bio/appl/meme/mast/report.rb +91 -0
- data/lib/bio/appl/meme/motif.rb +48 -0
- data/lib/bio/appl/psort.rb +0 -111
- data/lib/bio/appl/psort/report.rb +1 -45
- data/lib/bio/appl/pts1.rb +2 -4
- data/lib/bio/appl/sosui/report.rb +5 -54
- data/lib/bio/appl/targetp/report.rb +1 -104
- data/lib/bio/appl/tmhmm/report.rb +0 -36
- data/lib/bio/command.rb +94 -10
- data/lib/bio/data/aa.rb +1 -77
- data/lib/bio/data/codontable.rb +1 -95
- data/lib/bio/data/na.rb +1 -26
- data/lib/bio/db/aaindex.rb +1 -38
- data/lib/bio/db/fasta.rb +1 -134
- data/lib/bio/db/fasta/format_qual.rb +204 -0
- data/lib/bio/db/fasta/qual.rb +102 -0
- data/lib/bio/db/fastq.rb +645 -0
- data/lib/bio/db/fastq/fastq_to_biosequence.rb +40 -0
- data/lib/bio/db/fastq/format_fastq.rb +175 -0
- data/lib/bio/db/genbank/genbank.rb +1 -86
- data/lib/bio/db/gff.rb +0 -17
- data/lib/bio/db/go.rb +4 -72
- data/lib/bio/db/kegg/common.rb +112 -0
- data/lib/bio/db/kegg/compound.rb +29 -20
- data/lib/bio/db/kegg/drug.rb +74 -34
- data/lib/bio/db/kegg/enzyme.rb +26 -5
- data/lib/bio/db/kegg/genes.rb +128 -15
- data/lib/bio/db/kegg/genome.rb +3 -41
- data/lib/bio/db/kegg/glycan.rb +19 -24
- data/lib/bio/db/kegg/orthology.rb +16 -56
- data/lib/bio/db/kegg/reaction.rb +81 -28
- data/lib/bio/db/kegg/taxonomy.rb +1 -52
- data/lib/bio/db/litdb.rb +1 -16
- data/lib/bio/db/phyloxml/phyloxml.xsd +582 -0
- data/lib/bio/db/phyloxml/phyloxml_elements.rb +1174 -0
- data/lib/bio/db/phyloxml/phyloxml_parser.rb +954 -0
- data/lib/bio/db/phyloxml/phyloxml_writer.rb +228 -0
- data/lib/bio/db/prosite.rb +2 -95
- data/lib/bio/db/rebase.rb +5 -6
- data/lib/bio/db/sanger_chromatogram/abif.rb +120 -0
- data/lib/bio/db/sanger_chromatogram/chromatogram.rb +133 -0
- data/lib/bio/db/sanger_chromatogram/chromatogram_to_biosequence.rb +32 -0
- data/lib/bio/db/sanger_chromatogram/scf.rb +210 -0
- data/lib/bio/io/das.rb +0 -44
- data/lib/bio/io/ddbjxml.rb +1 -181
- data/lib/bio/io/flatfile.rb +1 -7
- data/lib/bio/io/flatfile/autodetection.rb +6 -0
- data/lib/bio/io/keggapi.rb +0 -442
- data/lib/bio/io/ncbirest.rb +130 -132
- data/lib/bio/io/ncbisoap.rb +2 -1
- data/lib/bio/io/pubmed.rb +0 -88
- data/lib/bio/location.rb +0 -73
- data/lib/bio/pathway.rb +0 -171
- data/lib/bio/sequence.rb +18 -1
- data/lib/bio/sequence/adapter.rb +3 -0
- data/lib/bio/sequence/format.rb +16 -0
- data/lib/bio/sequence/quality_score.rb +205 -0
- data/lib/bio/tree.rb +70 -5
- data/lib/bio/util/restriction_enzyme/single_strand.rb +3 -2
- data/lib/bio/util/sirna.rb +1 -23
- data/lib/bio/version.rb +1 -1
- data/sample/demo_aaindex.rb +67 -0
- data/sample/demo_aminoacid.rb +101 -0
- data/sample/demo_bl2seq_report.rb +220 -0
- data/sample/demo_blast_report.rb +285 -0
- data/sample/demo_codontable.rb +119 -0
- data/sample/demo_das.rb +105 -0
- data/sample/demo_ddbjxml.rb +212 -0
- data/sample/demo_fasta_remote.rb +51 -0
- data/sample/demo_fastaformat.rb +105 -0
- data/sample/demo_genbank.rb +132 -0
- data/sample/demo_genscan_report.rb +202 -0
- data/sample/demo_gff1.rb +49 -0
- data/sample/demo_go.rb +98 -0
- data/sample/demo_hmmer_report.rb +149 -0
- data/sample/demo_kegg_compound.rb +57 -0
- data/sample/demo_kegg_drug.rb +65 -0
- data/sample/demo_kegg_genome.rb +74 -0
- data/sample/demo_kegg_glycan.rb +72 -0
- data/sample/demo_kegg_orthology.rb +62 -0
- data/sample/demo_kegg_reaction.rb +66 -0
- data/sample/demo_kegg_taxonomy.rb +92 -0
- data/sample/demo_keggapi.rb +502 -0
- data/sample/demo_litdb.rb +42 -0
- data/sample/demo_locations.rb +99 -0
- data/sample/demo_ncbi_rest.rb +130 -0
- data/sample/demo_nucleicacid.rb +49 -0
- data/sample/demo_pathway.rb +196 -0
- data/sample/demo_prosite.rb +120 -0
- data/sample/demo_psort.rb +138 -0
- data/sample/demo_psort_report.rb +70 -0
- data/sample/demo_pubmed.rb +118 -0
- data/sample/demo_sirna.rb +63 -0
- data/sample/demo_sosui_report.rb +89 -0
- data/sample/demo_targetp_report.rb +135 -0
- data/sample/demo_tmhmm_report.rb +68 -0
- data/sample/pmfetch.rb +13 -4
- data/sample/pmsearch.rb +15 -4
- data/sample/test_phyloxml_big.rb +205 -0
- data/test/bioruby_test_helper.rb +61 -0
- data/test/data/KEGG/1.1.1.1.enzyme +935 -0
- data/test/data/KEGG/C00025.compound +102 -0
- data/test/data/KEGG/D00063.drug +104 -0
- data/test/data/KEGG/G00024.glycan +47 -0
- data/test/data/KEGG/G01366.glycan +18 -0
- data/test/data/KEGG/K02338.orthology +902 -0
- data/test/data/KEGG/R00006.reaction +14 -0
- data/test/data/fastq/README.txt +109 -0
- data/test/data/fastq/error_diff_ids.fastq +20 -0
- data/test/data/fastq/error_double_qual.fastq +22 -0
- data/test/data/fastq/error_double_seq.fastq +22 -0
- data/test/data/fastq/error_long_qual.fastq +20 -0
- data/test/data/fastq/error_no_qual.fastq +20 -0
- data/test/data/fastq/error_qual_del.fastq +20 -0
- data/test/data/fastq/error_qual_escape.fastq +20 -0
- data/test/data/fastq/error_qual_null.fastq +0 -0
- data/test/data/fastq/error_qual_space.fastq +21 -0
- data/test/data/fastq/error_qual_tab.fastq +21 -0
- data/test/data/fastq/error_qual_unit_sep.fastq +20 -0
- data/test/data/fastq/error_qual_vtab.fastq +20 -0
- data/test/data/fastq/error_short_qual.fastq +20 -0
- data/test/data/fastq/error_spaces.fastq +20 -0
- data/test/data/fastq/error_tabs.fastq +21 -0
- data/test/data/fastq/error_trunc_at_plus.fastq +19 -0
- data/test/data/fastq/error_trunc_at_qual.fastq +19 -0
- data/test/data/fastq/error_trunc_at_seq.fastq +18 -0
- data/test/data/fastq/error_trunc_in_plus.fastq +19 -0
- data/test/data/fastq/error_trunc_in_qual.fastq +20 -0
- data/test/data/fastq/error_trunc_in_seq.fastq +18 -0
- data/test/data/fastq/error_trunc_in_title.fastq +17 -0
- data/test/data/fastq/illumina_full_range_as_illumina.fastq +8 -0
- data/test/data/fastq/illumina_full_range_as_sanger.fastq +8 -0
- data/test/data/fastq/illumina_full_range_as_solexa.fastq +8 -0
- data/test/data/fastq/illumina_full_range_original_illumina.fastq +8 -0
- data/test/data/fastq/longreads_as_illumina.fastq +40 -0
- data/test/data/fastq/longreads_as_sanger.fastq +40 -0
- data/test/data/fastq/longreads_as_solexa.fastq +40 -0
- data/test/data/fastq/longreads_original_sanger.fastq +120 -0
- data/test/data/fastq/misc_dna_as_illumina.fastq +16 -0
- data/test/data/fastq/misc_dna_as_sanger.fastq +16 -0
- data/test/data/fastq/misc_dna_as_solexa.fastq +16 -0
- data/test/data/fastq/misc_dna_original_sanger.fastq +16 -0
- data/test/data/fastq/misc_rna_as_illumina.fastq +16 -0
- data/test/data/fastq/misc_rna_as_sanger.fastq +16 -0
- data/test/data/fastq/misc_rna_as_solexa.fastq +16 -0
- data/test/data/fastq/misc_rna_original_sanger.fastq +16 -0
- data/test/data/fastq/sanger_full_range_as_illumina.fastq +8 -0
- data/test/data/fastq/sanger_full_range_as_sanger.fastq +8 -0
- data/test/data/fastq/sanger_full_range_as_solexa.fastq +8 -0
- data/test/data/fastq/sanger_full_range_original_sanger.fastq +8 -0
- data/test/data/fastq/solexa_full_range_as_illumina.fastq +8 -0
- data/test/data/fastq/solexa_full_range_as_sanger.fastq +8 -0
- data/test/data/fastq/solexa_full_range_as_solexa.fastq +8 -0
- data/test/data/fastq/solexa_full_range_original_solexa.fastq +8 -0
- data/test/data/fastq/wrapping_as_illumina.fastq +12 -0
- data/test/data/fastq/wrapping_as_sanger.fastq +12 -0
- data/test/data/fastq/wrapping_as_solexa.fastq +12 -0
- data/test/data/fastq/wrapping_original_sanger.fastq +24 -0
- data/test/data/meme/db +0 -0
- data/test/data/meme/mast +0 -0
- data/test/data/meme/mast.out +13 -0
- data/test/data/meme/meme.out +3 -0
- data/test/data/phyloxml/apaf.xml +666 -0
- data/test/data/phyloxml/bcl_2.xml +2097 -0
- data/test/data/phyloxml/made_up.xml +144 -0
- data/test/data/phyloxml/ncbi_taxonomy_mollusca_short.xml +65 -0
- data/test/data/phyloxml/phyloxml_examples.xml +415 -0
- data/test/data/sanger_chromatogram/test_chromatogram_abif.ab1 +0 -0
- data/test/data/sanger_chromatogram/test_chromatogram_scf_v2.scf +0 -0
- data/test/data/sanger_chromatogram/test_chromatogram_scf_v3.scf +0 -0
- data/test/functional/bio/appl/test_pts1.rb +7 -5
- data/test/functional/bio/io/test_ensembl.rb +4 -3
- data/test/functional/bio/io/test_pubmed.rb +9 -3
- data/test/functional/bio/io/test_soapwsdl.rb +5 -4
- data/test/functional/bio/io/test_togows.rb +5 -4
- data/test/functional/bio/sequence/test_output_embl.rb +6 -4
- data/test/functional/bio/test_command.rb +54 -5
- data/test/runner.rb +5 -3
- data/test/unit/bio/appl/bl2seq/test_report.rb +5 -4
- data/test/unit/bio/appl/blast/test_ncbioptions.rb +4 -2
- data/test/unit/bio/appl/blast/test_report.rb +5 -4
- data/test/unit/bio/appl/blast/test_rpsblast.rb +5 -4
- data/test/unit/bio/appl/gcg/test_msf.rb +5 -5
- data/test/unit/bio/appl/genscan/test_report.rb +8 -9
- data/test/unit/bio/appl/hmmer/test_report.rb +5 -4
- data/test/unit/bio/appl/iprscan/test_report.rb +6 -5
- data/test/unit/bio/appl/mafft/test_report.rb +6 -5
- data/test/unit/bio/appl/meme/mast/test_report.rb +46 -0
- data/test/unit/bio/appl/meme/test_mast.rb +103 -0
- data/test/unit/bio/appl/meme/test_motif.rb +38 -0
- data/test/unit/bio/appl/paml/codeml/test_rates.rb +5 -4
- data/test/unit/bio/appl/paml/codeml/test_report.rb +5 -4
- data/test/unit/bio/appl/paml/test_codeml.rb +5 -4
- data/test/unit/bio/appl/sim4/test_report.rb +5 -4
- data/test/unit/bio/appl/sosui/test_report.rb +6 -5
- data/test/unit/bio/appl/targetp/test_report.rb +5 -3
- data/test/unit/bio/appl/test_blast.rb +5 -4
- data/test/unit/bio/appl/test_fasta.rb +4 -2
- data/test/unit/bio/appl/test_pts1.rb +4 -2
- data/test/unit/bio/appl/tmhmm/test_report.rb +6 -5
- data/test/unit/bio/data/test_aa.rb +5 -3
- data/test/unit/bio/data/test_codontable.rb +5 -4
- data/test/unit/bio/data/test_na.rb +5 -3
- data/test/unit/bio/db/biosql/tc_biosql.rb +5 -1
- data/test/unit/bio/db/embl/test_common.rb +4 -2
- data/test/unit/bio/db/embl/test_embl.rb +6 -6
- data/test/unit/bio/db/embl/test_embl_rel89.rb +6 -6
- data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +7 -8
- data/test/unit/bio/db/embl/test_sptr.rb +6 -8
- data/test/unit/bio/db/embl/test_uniprot.rb +6 -5
- data/test/unit/bio/db/fasta/test_format_qual.rb +346 -0
- data/test/unit/bio/db/kegg/test_compound.rb +146 -0
- data/test/unit/bio/db/kegg/test_drug.rb +194 -0
- data/test/unit/bio/db/kegg/test_enzyme.rb +241 -0
- data/test/unit/bio/db/kegg/test_genes.rb +32 -4
- data/test/unit/bio/db/kegg/test_glycan.rb +260 -0
- data/test/unit/bio/db/kegg/test_orthology.rb +50 -0
- data/test/unit/bio/db/kegg/test_reaction.rb +96 -0
- data/test/unit/bio/db/pdb/test_pdb.rb +4 -2
- data/test/unit/bio/db/sanger_chromatogram/test_abif.rb +76 -0
- data/test/unit/bio/db/sanger_chromatogram/test_scf.rb +98 -0
- data/test/unit/bio/db/test_aaindex.rb +6 -6
- data/test/unit/bio/db/test_fasta.rb +5 -46
- data/test/unit/bio/db/test_fastq.rb +829 -0
- data/test/unit/bio/db/test_gff.rb +4 -2
- data/test/unit/bio/db/test_lasergene.rb +7 -5
- data/test/unit/bio/db/test_medline.rb +4 -2
- data/test/unit/bio/db/test_newick.rb +6 -6
- data/test/unit/bio/db/test_nexus.rb +4 -2
- data/test/unit/bio/db/test_phyloxml.rb +769 -0
- data/test/unit/bio/db/test_phyloxml_writer.rb +328 -0
- data/test/unit/bio/db/test_prosite.rb +6 -5
- data/test/unit/bio/db/test_qual.rb +63 -0
- data/test/unit/bio/db/test_rebase.rb +5 -3
- data/test/unit/bio/db/test_soft.rb +7 -6
- data/test/unit/bio/io/flatfile/test_autodetection.rb +6 -7
- data/test/unit/bio/io/flatfile/test_buffer.rb +6 -5
- data/test/unit/bio/io/flatfile/test_splitter.rb +4 -4
- data/test/unit/bio/io/test_ddbjxml.rb +4 -3
- data/test/unit/bio/io/test_ensembl.rb +5 -3
- data/test/unit/bio/io/test_fastacmd.rb +4 -3
- data/test/unit/bio/io/test_flatfile.rb +6 -5
- data/test/unit/bio/io/test_soapwsdl.rb +4 -3
- data/test/unit/bio/io/test_togows.rb +4 -2
- data/test/unit/bio/sequence/test_aa.rb +5 -3
- data/test/unit/bio/sequence/test_common.rb +4 -2
- data/test/unit/bio/sequence/test_compat.rb +4 -2
- data/test/unit/bio/sequence/test_dblink.rb +5 -3
- data/test/unit/bio/sequence/test_na.rb +4 -2
- data/test/unit/bio/sequence/test_quality_score.rb +330 -0
- data/test/unit/bio/shell/plugin/test_seq.rb +5 -3
- data/test/unit/bio/test_alignment.rb +5 -3
- data/test/unit/bio/test_command.rb +4 -3
- data/test/unit/bio/test_db.rb +5 -3
- data/test/unit/bio/test_feature.rb +4 -2
- data/test/unit/bio/test_location.rb +4 -2
- data/test/unit/bio/test_map.rb +5 -3
- data/test/unit/bio/test_pathway.rb +4 -2
- data/test/unit/bio/test_reference.rb +4 -2
- data/test/unit/bio/test_sequence.rb +5 -3
- data/test/unit/bio/test_shell.rb +5 -3
- data/test/unit/bio/test_tree.rb +6 -6
- data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +17 -13
- data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +17 -13
- data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +4 -2
- data/test/unit/bio/util/test_color_scheme.rb +5 -3
- data/test/unit/bio/util/test_contingency_table.rb +5 -3
- data/test/unit/bio/util/test_restriction_enzyme.rb +4 -2
- data/test/unit/bio/util/test_sirna.rb +6 -4
- metadata +147 -2
|
@@ -9,10 +9,12 @@
|
|
|
9
9
|
# $Id:$
|
|
10
10
|
#
|
|
11
11
|
|
|
12
|
+
# loading helper routine for testing bioruby
|
|
12
13
|
require 'pathname'
|
|
13
|
-
|
|
14
|
-
|
|
14
|
+
load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4,
|
|
15
|
+
'bioruby_test_helper.rb')).cleanpath.to_s
|
|
15
16
|
|
|
17
|
+
# libraries needed for the tests
|
|
16
18
|
require 'test/unit'
|
|
17
19
|
require 'bio'
|
|
18
20
|
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
#
|
|
2
|
+
# test/unit/bio/db/sanger_chromatogram/test_abif.rb - Unit test for Bio::Abif
|
|
3
|
+
#
|
|
4
|
+
# Copyright:: Copyright (C) 2009 Anthony Underwood <anthony.underwood@hpa.org.uk>, <email2ants@gmail.com>
|
|
5
|
+
# License:: The Ruby License
|
|
6
|
+
#
|
|
7
|
+
|
|
8
|
+
# loading helper routine for testing bioruby
|
|
9
|
+
require 'pathname'
|
|
10
|
+
load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4,
|
|
11
|
+
'bioruby_test_helper.rb')).cleanpath.to_s
|
|
12
|
+
|
|
13
|
+
# libraries needed for the tests
|
|
14
|
+
require 'test/unit'
|
|
15
|
+
require 'bio/db/sanger_chromatogram/chromatogram'
|
|
16
|
+
require 'bio/db/sanger_chromatogram/abif'
|
|
17
|
+
|
|
18
|
+
module Bio
|
|
19
|
+
|
|
20
|
+
module TestAbifData
|
|
21
|
+
DataPath = Pathname.new(File.join(BioRubyTestDataPath,
|
|
22
|
+
'sanger_chromatogram')).cleanpath.to_s
|
|
23
|
+
def self.abif
|
|
24
|
+
File.read(File.join(DataPath, 'test_chromatogram_abif.ab1'))
|
|
25
|
+
end
|
|
26
|
+
end #module TestAbifData
|
|
27
|
+
|
|
28
|
+
class TestAbif < Test::Unit::TestCase
|
|
29
|
+
|
|
30
|
+
Abif_sequence = "nnnnnnnnnnnttggttggttcgctataaaaactcttattttggataatttgtttagctgttgcaatataaattgacccatttaatttataaattggattctcgttgcaataaatttccagatcctgaaaaagctctggcttaaccaaattgccttggctatcaatgcttctacaccaagaaggctttaaagagataggactaactgaaacgacactttttcccgttgcttgatgtatttcaacagcatgtcttatggtttctggcttcctgaatggagaagttggttgtaaaagcaatacactgtcaaaaaaaacctccatttgctgaaacttaaacaggaggtcaataacagtatgaatcacatccgaagtatccgtggctaaatcttccgatcttagccaaggtactgaagccccatattgaacn".freeze
|
|
31
|
+
Abif_RC_sequence = "ngttcaatatggggcttcagtaccttggctaagatcggaagatttagccacggatacttcggatgtgattcatactgttattgacctcctgtttaagtttcagcaaatggaggttttttttgacagtgtattgcttttacaaccaacttctccattcaggaagccagaaaccataagacatgctgttgaaatacatcaagcaacgggaaaaagtgtcgtttcagttagtcctatctctttaaagccttcttggtgtagaagcattgatagccaaggcaatttggttaagccagagctttttcaggatctggaaatttattgcaacgagaatccaatttataaattaaatgggtcaatttatattgcaacagctaaacaaattatccaaaataagagtttttatagcgaaccaaccaannnnnnnnnnn".freeze
|
|
32
|
+
|
|
33
|
+
Abif_first_10_peak_indices = [3, 16,38,61,66,91,105,115,138,151].freeze
|
|
34
|
+
Abif_last_10_peak_indices = [5070,5081,5094,5107,5120,5133,5145,5157,5169,5182].freeze
|
|
35
|
+
|
|
36
|
+
Abif_atrace_size = 5236
|
|
37
|
+
|
|
38
|
+
Abif_RC_first_10_peak_indices = Abif_last_10_peak_indices.collect{|index| Abif_atrace_size - index}.reverse.freeze
|
|
39
|
+
Abif_RC_last_10_peak_indices = Abif_first_10_peak_indices.collect{|index| Abif_atrace_size - index}.reverse.freeze
|
|
40
|
+
|
|
41
|
+
def setup
|
|
42
|
+
@abi = Abif.new(TestAbifData.abif)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def test_seq
|
|
46
|
+
assert_equal(Abif_sequence, @abi.seq.to_s)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def test_to_biosequence
|
|
50
|
+
assert_equal(Abif_sequence, @abi.to_biosequence.to_s)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def test_complement
|
|
54
|
+
@RC_chromatogram = @abi.complement
|
|
55
|
+
# check reverse complemented sequence
|
|
56
|
+
assert_equal(Abif_RC_sequence, @RC_chromatogram.sequence)
|
|
57
|
+
# check reverse complemented peak indices
|
|
58
|
+
assert_equal(Abif_RC_first_10_peak_indices,
|
|
59
|
+
@RC_chromatogram.peak_indices.slice(0,10))
|
|
60
|
+
assert_equal(Abif_RC_last_10_peak_indices,
|
|
61
|
+
@RC_chromatogram.peak_indices.slice(-10..-1))
|
|
62
|
+
# check reverse complemented traces
|
|
63
|
+
assert_equal(@abi.atrace.slice(0,10),
|
|
64
|
+
@RC_chromatogram.ttrace.slice(-10..-1).reverse)
|
|
65
|
+
assert_equal(@abi.ctrace.slice(0,10),
|
|
66
|
+
@RC_chromatogram.gtrace.slice(-10..-1).reverse)
|
|
67
|
+
assert_equal(@abi.gtrace.slice(0,10),
|
|
68
|
+
@RC_chromatogram.ctrace.slice(-10..-1).reverse)
|
|
69
|
+
assert_equal(@abi.ttrace.slice(0,10),
|
|
70
|
+
@RC_chromatogram.atrace.slice(-10..-1).reverse)
|
|
71
|
+
|
|
72
|
+
assert_equal(@abi.qualities.slice(0,10),
|
|
73
|
+
@RC_chromatogram.qualities.slice(-10..-1).reverse)
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
#
|
|
2
|
+
# test/unit/bio/db/sanger_chromatogram/test_scf.rb - Unit test for Bio::Scf
|
|
3
|
+
#
|
|
4
|
+
# Copyright:: Copyright (C) 2009 Anthony Underwood <anthony.underwood@hpa.org.uk>, <email2ants@gmail.com>
|
|
5
|
+
# License:: The Ruby License
|
|
6
|
+
#
|
|
7
|
+
|
|
8
|
+
# loading helper routine for testing bioruby
|
|
9
|
+
require 'pathname'
|
|
10
|
+
load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4,
|
|
11
|
+
'bioruby_test_helper.rb')).cleanpath.to_s
|
|
12
|
+
|
|
13
|
+
# libraries needed for the tests
|
|
14
|
+
require 'test/unit'
|
|
15
|
+
require 'bio/db/sanger_chromatogram/scf'
|
|
16
|
+
|
|
17
|
+
module Bio
|
|
18
|
+
|
|
19
|
+
module TestScfData
|
|
20
|
+
DataPath = Pathname.new(File.join(BioRubyTestDataPath,
|
|
21
|
+
'sanger_chromatogram')).cleanpath.to_s
|
|
22
|
+
def self.scf_version_2
|
|
23
|
+
File.read(File.join(DataPath, 'test_chromatogram_scf_v2.scf'))
|
|
24
|
+
end
|
|
25
|
+
def self.scf_version_3
|
|
26
|
+
File.read(File.join(DataPath, 'test_chromatogram_scf_v3.scf'))
|
|
27
|
+
end
|
|
28
|
+
end #module TestScfData
|
|
29
|
+
|
|
30
|
+
module TestScf_common
|
|
31
|
+
Scf_sequence = "attaacgtaaaaggtttggttggttcgctataaaaactcttattttggataatttgtttagctgttgcaatataaattgacccatttaatttataaattggattctcgttgcaataaatttccagatcctgaaaaagctctggcttaaccaaattgccttggctatcaatgcttctacaccaagaaggctttaaagagataggactaactgaaacgacactttttcccgttgcttgatgtatttcaacagcatgtcttatggtttctggcttcctgaatggagaagttggttgtaaaagcaatacactgtcaaaaaaaacctccatttgctgaaacttaaacaggaggtcaataacagtatgaatcacatccgaagtatccgtggctaaatcttccgatcttagccaaggtactgaagccccatattgaacggann".freeze
|
|
32
|
+
Scf_RC_sequence = "nntccgttcaatatggggcttcagtaccttggctaagatcggaagatttagccacggatacttcggatgtgattcatactgttattgacctcctgtttaagtttcagcaaatggaggttttttttgacagtgtattgcttttacaaccaacttctccattcaggaagccagaaaccataagacatgctgttgaaatacatcaagcaacgggaaaaagtgtcgtttcagttagtcctatctctttaaagccttcttggtgtagaagcattgatagccaaggcaatttggttaagccagagctttttcaggatctggaaatttattgcaacgagaatccaatttataaattaaatgggtcaatttatattgcaacagctaaacaaattatccaaaataagagtttttatagcgaaccaaccaaaccttttacgttaat".freeze
|
|
33
|
+
|
|
34
|
+
Scf_first_10_peak_indices = [16,24,37,49,64,64,80,92,103,113].freeze
|
|
35
|
+
Scf_last_10_peak_indices = [5120,5132,5145,5157,5169,5182,5195,5207,5219,5231].freeze
|
|
36
|
+
|
|
37
|
+
Scf_atrace_size = 5236
|
|
38
|
+
|
|
39
|
+
Scf_RC_first_10_peak_indices = Scf_last_10_peak_indices.collect{|index| Scf_atrace_size - index}.reverse.freeze
|
|
40
|
+
Scf_RC_last_10_peak_indices = Scf_first_10_peak_indices.collect{|index| Scf_atrace_size - index}.reverse.freeze
|
|
41
|
+
|
|
42
|
+
def test_seq
|
|
43
|
+
assert_equal(Scf_sequence, @scf.seq.to_s)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def test_to_biosequence
|
|
47
|
+
assert_equal(Scf_sequence, @scf.to_biosequence.to_s)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def test_complement
|
|
51
|
+
@RC_chromatogram = @scf.complement
|
|
52
|
+
# check reverse complemented sequence
|
|
53
|
+
assert_equal(Scf_RC_sequence, @RC_chromatogram.sequence)
|
|
54
|
+
# check reverse complemented peak indices
|
|
55
|
+
assert_equal(Scf_RC_first_10_peak_indices,
|
|
56
|
+
@RC_chromatogram.peak_indices.slice(0,10))
|
|
57
|
+
assert_equal(Scf_RC_last_10_peak_indices,
|
|
58
|
+
@RC_chromatogram.peak_indices.slice(-10..-1))
|
|
59
|
+
# check reverse complemented traces
|
|
60
|
+
assert_equal(@scf.atrace.slice(0,10),
|
|
61
|
+
@RC_chromatogram.ttrace.slice(-10..-1).reverse)
|
|
62
|
+
assert_equal(@scf.ctrace.slice(0,10),
|
|
63
|
+
@RC_chromatogram.gtrace.slice(-10..-1).reverse)
|
|
64
|
+
assert_equal(@scf.gtrace.slice(0,10),
|
|
65
|
+
@RC_chromatogram.ctrace.slice(-10..-1).reverse)
|
|
66
|
+
assert_equal(@scf.ttrace.slice(0,10),
|
|
67
|
+
@RC_chromatogram.atrace.slice(-10..-1).reverse)
|
|
68
|
+
# check reverse complemented individual and combined qualities
|
|
69
|
+
#if @RC_chromatogram.chromatogram_type == ".scf"
|
|
70
|
+
assert_equal(@scf.aqual.slice(0,10),
|
|
71
|
+
@RC_chromatogram.tqual.slice(-10..-1).reverse)
|
|
72
|
+
assert_equal(@scf.cqual.slice(0,10),
|
|
73
|
+
@RC_chromatogram.gqual.slice(-10..-1).reverse)
|
|
74
|
+
assert_equal(@scf.gqual.slice(0,10),
|
|
75
|
+
@RC_chromatogram.cqual.slice(-10..-1).reverse)
|
|
76
|
+
assert_equal(@scf.tqual.slice(0,10),
|
|
77
|
+
@RC_chromatogram.aqual.slice(-10..-1).reverse)
|
|
78
|
+
#end
|
|
79
|
+
assert_equal(@scf.qualities.slice(0,10),
|
|
80
|
+
@RC_chromatogram.qualities.slice(-10..-1).reverse)
|
|
81
|
+
end
|
|
82
|
+
end #module TestScf_common
|
|
83
|
+
|
|
84
|
+
class TestScf_version_2 < Test::Unit::TestCase
|
|
85
|
+
include TestScf_common
|
|
86
|
+
def setup
|
|
87
|
+
@scf = Scf.new(TestScfData.scf_version_2)
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
class TestScf_version_3 < Test::Unit::TestCase
|
|
92
|
+
include TestScf_common
|
|
93
|
+
def setup
|
|
94
|
+
@scf = Scf.new(TestScfData.scf_version_3)
|
|
95
|
+
end
|
|
96
|
+
end #class TestScf_version_3
|
|
97
|
+
|
|
98
|
+
end #module Bio
|
|
@@ -5,21 +5,21 @@
|
|
|
5
5
|
# Mitsuteru C. Nakao <n@bioruby.org>
|
|
6
6
|
# License:: The Ruby License
|
|
7
7
|
#
|
|
8
|
-
# $Id
|
|
8
|
+
# $Id:$
|
|
9
9
|
#
|
|
10
10
|
|
|
11
|
+
# loading helper routine for testing bioruby
|
|
11
12
|
require 'pathname'
|
|
12
|
-
|
|
13
|
-
|
|
13
|
+
load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3,
|
|
14
|
+
'bioruby_test_helper.rb')).cleanpath.to_s
|
|
14
15
|
|
|
16
|
+
# libraries needed for the tests
|
|
15
17
|
require 'test/unit'
|
|
16
|
-
require 'bio/io/fetch'
|
|
17
18
|
require 'bio/db/aaindex'
|
|
18
19
|
|
|
19
20
|
module Bio
|
|
20
21
|
class DataAAindex
|
|
21
|
-
|
|
22
|
-
TestDataAAindex = Pathname.new(File.join(bioruby_root, 'test', 'data', 'aaindex')).cleanpath.to_s
|
|
22
|
+
TestDataAAindex = Pathname.new(File.join(BioRubyTestDataPath, 'aaindex')).cleanpath.to_s
|
|
23
23
|
|
|
24
24
|
def self.aax1
|
|
25
25
|
File.read(File.join(TestDataAAindex, "PRAM900102"))
|
|
@@ -4,13 +4,15 @@
|
|
|
4
4
|
# Copyright:: Copyright (C) 2005 Mitsuteru Nakao <n@bioruby.org>
|
|
5
5
|
# License:: The Ruby License
|
|
6
6
|
#
|
|
7
|
-
# $Id
|
|
7
|
+
# $Id:$
|
|
8
8
|
#
|
|
9
9
|
|
|
10
|
+
# loading helper routine for testing bioruby
|
|
10
11
|
require 'pathname'
|
|
11
|
-
|
|
12
|
-
|
|
12
|
+
load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3,
|
|
13
|
+
'bioruby_test_helper.rb')).cleanpath.to_s
|
|
13
14
|
|
|
15
|
+
# libraries needed for the tests
|
|
14
16
|
require 'test/unit'
|
|
15
17
|
require 'bio/db/fasta'
|
|
16
18
|
|
|
@@ -193,49 +195,6 @@ END
|
|
|
193
195
|
end # class TestFastaFormat
|
|
194
196
|
|
|
195
197
|
|
|
196
|
-
class TestFastaNumericFormat < Test::Unit::TestCase
|
|
197
|
-
|
|
198
|
-
def setup
|
|
199
|
-
text =<<END
|
|
200
|
-
>CRA3575282.F
|
|
201
|
-
24 15 23 29 20 13 20 21 21 23 22 25 13 22 17 15 25 27 32 26
|
|
202
|
-
32 29 29 25
|
|
203
|
-
END
|
|
204
|
-
@obj = Bio::FastaNumericFormat.new(text)
|
|
205
|
-
end
|
|
206
|
-
|
|
207
|
-
def test_entry
|
|
208
|
-
assert_equal(">CRA3575282.F\n24 15 23 29 20 13 20 21 21 23 22 25 13 22 17 15 25 27 32 26 \n32 29 29 25\n", @obj.entry)
|
|
209
|
-
end
|
|
210
|
-
|
|
211
|
-
def test_entry_id
|
|
212
|
-
assert_equal('CRA3575282.F', @obj.entry_id)
|
|
213
|
-
end
|
|
214
|
-
|
|
215
|
-
def test_definition
|
|
216
|
-
assert_equal('CRA3575282.F', @obj.definition)
|
|
217
|
-
end
|
|
218
|
-
|
|
219
|
-
def test_data
|
|
220
|
-
data = [24, 15, 23, 29, 20, 13, 20, 21, 21, 23, 22, 25, 13, 22, 17, 15, 25, 27, 32, 26, 32, 29, 29, 25]
|
|
221
|
-
assert_equal(data, @obj.data)
|
|
222
|
-
end
|
|
223
|
-
|
|
224
|
-
def test_length
|
|
225
|
-
assert_equal(24, @obj.length)
|
|
226
|
-
end
|
|
227
|
-
|
|
228
|
-
def test_each
|
|
229
|
-
assert(@obj.each {|x| })
|
|
230
|
-
end
|
|
231
|
-
|
|
232
|
-
def test_arg
|
|
233
|
-
assert(@obj[0], '')
|
|
234
|
-
assert(@obj[-1], '')
|
|
235
|
-
end
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
end # class TestFastaFormatNumeric
|
|
239
198
|
|
|
240
199
|
|
|
241
200
|
class TestFastaDefinition < Test::Unit::TestCase
|
|
@@ -0,0 +1,829 @@
|
|
|
1
|
+
#
|
|
2
|
+
# test/unit/bio/db/test_fastq.rb - Unit test for Bio::Fastq
|
|
3
|
+
#
|
|
4
|
+
# Copyright:: Copyright (C) 2009
|
|
5
|
+
# Naohisa Goto <ng@bioruby.org>
|
|
6
|
+
# License:: The Ruby License
|
|
7
|
+
#
|
|
8
|
+
# $Id:$
|
|
9
|
+
#
|
|
10
|
+
|
|
11
|
+
# loading helper routine for testing bioruby
|
|
12
|
+
require 'pathname'
|
|
13
|
+
load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3,
|
|
14
|
+
'bioruby_test_helper.rb')).cleanpath.to_s
|
|
15
|
+
|
|
16
|
+
# libraries needed for the tests
|
|
17
|
+
require 'test/unit'
|
|
18
|
+
require 'bio/io/flatfile'
|
|
19
|
+
require 'bio/db/fastq'
|
|
20
|
+
|
|
21
|
+
module Bio
|
|
22
|
+
module TestFastq
|
|
23
|
+
|
|
24
|
+
TestFastqDataDir = Pathname.new(File.join(BioRubyTestDataPath,
|
|
25
|
+
'fastq')).cleanpath.to_s
|
|
26
|
+
|
|
27
|
+
# A module providing methods to compare float arrays
|
|
28
|
+
module FloatArrayComparison
|
|
29
|
+
private
|
|
30
|
+
def float_array_equivalent?(expected, actual, *arg)
|
|
31
|
+
assert_equal(expected.size, actual.size, *arg)
|
|
32
|
+
dt = Float::EPSILON * 1024
|
|
33
|
+
(0...(expected.size)).each do |i|
|
|
34
|
+
e = expected[i]
|
|
35
|
+
a = actual[i]
|
|
36
|
+
#assert_equal(e, a)
|
|
37
|
+
assert_in_delta(e, a, e.abs * dt)
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end #module FloatArrayComparison
|
|
41
|
+
|
|
42
|
+
# Tests using 'longreads_original_sanger.fastq'
|
|
43
|
+
class TestFastq_longreads_original_sanger < Test::Unit::TestCase
|
|
44
|
+
include FloatArrayComparison
|
|
45
|
+
|
|
46
|
+
SEQS =
|
|
47
|
+
[
|
|
48
|
+
'tcagTTAAGATGGGATAATATCCTCAGATTGCGTGATGAACTTTGTTCTGGTGGAGGAGA
|
|
49
|
+
AGGAAGTGCATTCGACGTATGCCCGTTTGTCGATATTTGtatttaaagtaatccgtcaca
|
|
50
|
+
aatcagtgacataaatattatttagatttcgggagcaactttatttattccacaagcagg
|
|
51
|
+
tttaaattttaaatttaaattattgcagaagactttaaattaacctcgttgtcggagtca
|
|
52
|
+
tttgttcggttattggtcgaaagtaaccncgggaagtgccgaaaactaacaaacaaaaga
|
|
53
|
+
agatagtgaaattttaattaaaanaaatagccaaacgtaactaactaaaacggacccgtc
|
|
54
|
+
gaggaactgccaacggacgacacagggagtagnnn',
|
|
55
|
+
'tcagCCAGCAATTCCGACTTAATTGTTCTTCTTCCATCATTCATCTCGACTAACAGTTCT
|
|
56
|
+
ACGATTAATGAGTTTGGCtttaatttgttgttcattattgtcacaattacactactgaga
|
|
57
|
+
ctgccaaggcacncagggataggnn',
|
|
58
|
+
'tcagTTTTCTTAAATTACTTGAATCTGTTGAAGTGGATGTCCACTTTTGTATGCCAAATA
|
|
59
|
+
TGCCCAGCGTATACGATCTTGGCCACATCTCCACATAATCATCAGTCGGATGCAAAAAGC
|
|
60
|
+
GATTAAACTAAAAATGAATGCGTTTTTAGATGAGTAAATAGGTAATACTTTGTTTAAATA
|
|
61
|
+
ATAAATGTCACAAACAGAACGCGGATTACAGTACCTGAAAATAGTTGTACTGTATCTGTG
|
|
62
|
+
CCGGCACTTCCTCGGCCCTGAGAAGTTGTCCCGTTGTTTCCATTCGCACCATCCAATGGC
|
|
63
|
+
CAAAGTTTGCGAAGAATCTGTTCCGTTCCATTACCAATTGTTTTTCCATGctgagactgc
|
|
64
|
+
caaggcacacaggggataggnn',
|
|
65
|
+
'tcagTTTTTGGAGAATTCCGTCAGGGACGGCATGGCATATTTGTGGGTTCGGCACGGCGT
|
|
66
|
+
CCTGGCCAAGAAGAAGAAGACGAATTAGCCCGTTAATTTAATGACACCTTCCCCAATTTT
|
|
67
|
+
GCAGCAATGATTGGTTCATTCTTGGCGGTGCGTTTTTGTGCTTCGTCGAATTGTTGGCCA
|
|
68
|
+
TTTTGGTCCACCGGCCATCATCTTTACGCTATCCGACTGATTGGAAATCACCGCCTAGCA
|
|
69
|
+
TTTTGCCGAAGATTGTTGCGTTGTACGGCCATGTGCTGATTGTTTACATTGGCATTCTTG
|
|
70
|
+
GCAATTTGTCCTTGGTCGGCTTTGACGGCAAATTTGCGGTGTTAAGTctgagactgccaa
|
|
71
|
+
ggcacacagggggatagggnn',
|
|
72
|
+
'tcagTTGACCGGCGTTGTGTAACAATAATTCATTATTCTGAGACGATGCCAATGTAATCG
|
|
73
|
+
ACGGTTTATGCCCAATTATTCCCATCTATGCTTAACTGATCAAATACTATTTGCATTACG
|
|
74
|
+
TCACGAAATTGCGCGAACACCGCCGGCCGACAATAATTTATACCGGACATACCGGAGTTG
|
|
75
|
+
ATGGTAATCGGTAAAGAGTTTTATTTAATTATntattatcnctattaattattgttanca
|
|
76
|
+
acaatgtgcacgctntgccgcccgccgccgccgtgtcggtaggaccccggacggacccgg
|
|
77
|
+
acccggttcgggtacccgttttcgggttcccggaaccgtttttcgggtacccggtttttt
|
|
78
|
+
cggggggccccccggtaaaaaaccggggaaccccctaaaacgggtaaacgtaccgtaagg
|
|
79
|
+
gaccccctaaacgggggccccgaaaaaccgggacccaaaccggggggaaacggttaaagg
|
|
80
|
+
ggggggaagtaggngnnnnnnnnnnnn',
|
|
81
|
+
'tcagTTATTGCAGTCGTTCCGCGCCATCGCCGGTAACCGTCCGCGTGTTATTCTGTGTAT
|
|
82
|
+
CGGCCAACCTTCGTATAACTTCGTATAATGTATGCTATACGAAGTTATTACGATCTATAC
|
|
83
|
+
CGGCGAAACTCAGCCGAAAGGTCTCGCGGTAGAGCCTATGAGCTGCCCGACCGATGCATT
|
|
84
|
+
TAAATTTCCGGGGATCGtcgctgatctgagactgccaaaggcacactagggggataggnn
|
|
85
|
+
nnnnnnnnnnnnnnnnnn',
|
|
86
|
+
'tcagGTTTTAAATCGCTTTCCAAGGAATTTGAGTCTAAATCCGGTGGATCCCATCAGTAC
|
|
87
|
+
AAATGCGGCGACAAGGCCGTGAAAACACTGCTTAATTCTTTGCACTTTTTGGCCACCTTT
|
|
88
|
+
TTGGAAATGTTGTTTTGTGTTCTCAAAATTTTCCATCTCAGAACAAACATTCCATCGGGC
|
|
89
|
+
TGATGTTGTGGCTTTTGGCGCGCGAAGTGCTGCTACTGCGCGGCAAAATCAGTCGCCAGA
|
|
90
|
+
CCGGTTTTGTTGTGGACGACAAAGTGATCATGCCTGACTTGTACTTCTACCGCGATCCGC
|
|
91
|
+
AAGCGCGAATTGGTCACATAGTTATAGAATTTTTGAGCCTTTTTCTTGACATAAAAAGTG
|
|
92
|
+
TGGTTTTAAAAATTTCCTGGCAGGACCCACGCCAACGTTCAGGAATAATATCTTTTAAAA
|
|
93
|
+
AGctgagactgccaaggcacacaggggataggn',
|
|
94
|
+
'tcagTTTAATTTGGTGCTTCCTTTCAATTCCTTAGTTTAAACTTGGCACTGAAGTCTCGC
|
|
95
|
+
ATTTATAACTAGAGCCCGGATTTTAGAGGCTAAAAAGTTTTCCAGATTTCAAAATTTATT
|
|
96
|
+
TCGAAACTATTTTTCTGATTGTGATGTGACGGATTTCTAAATTAAATCGAAATGATGTGT
|
|
97
|
+
ATTGAACTTAACAAGTGATTTTTATCAGATTTTGTCAATGAATAAATTTTAATTTAAATC
|
|
98
|
+
TCTTTCTAACACTTTCATGATTAAAATCTAACAAAGCGCGACCAGTATGTGAGAAGAGCA
|
|
99
|
+
AAAACAACAAAAAGTGCTAGCACTAAAGAAGGTTCGAACCCAACACATAACGTAAGAGTT
|
|
100
|
+
ACCGGGAAGAAAACCACTctgagactgccaaggcacacagggggataggnn',
|
|
101
|
+
'tcagTTTTCAAATTTTCCGAAATTTGCTGTTTGGTAGAAGGCAAATTATTTGATTGAATT
|
|
102
|
+
TTGTATTTATTTAAAACAATTTATTTTAAAATAATAATTTTCCATTGACTTTTTACATTT
|
|
103
|
+
AATTGATTTTATTATGCATTTTATATTTGTTTTCTAAATATTCGTTTGCAAACTCACGTT
|
|
104
|
+
GAAATTGTATTAAACTCGAAATTAGAGTTTTTGAAATTAATTTTTATGTAGCATAATATT
|
|
105
|
+
TTAAACATATTGGAATTTTATAAAACATTATATTTTTctgagactgccaaggcacacagg
|
|
106
|
+
gggataggn',
|
|
107
|
+
'tcagTTTTGATCTTTTAATAATGAATTTTAATGTGTTAAAATGATTGCATTGATGGCATA
|
|
108
|
+
ACCGCATTTAAATTAATTACATGAAGTGTAAGTATGAAATTTTCCTTTCCAAATTGCAAA
|
|
109
|
+
AACTAAAATTTAAAATTTATCGTAAAAATTAACATATATTTTAAACGATTTTAAGAAACA
|
|
110
|
+
TTTGTAAATTATATTTTTGTGAAGCGTTCAAACAAAAATAAACAATAAAATATTTTTCTA
|
|
111
|
+
TTTAATAGCAAAACATTTGACGATGAAAAGGAAAATGCGGGTTTGAAAATGGGCTTTGCC
|
|
112
|
+
ATGCTATTTTCATAATAACATATTTTTATTATGAATAATAAATTTACATACAATATATAC
|
|
113
|
+
AGTCTTAAATTTATTCATAATATTTTTGAGAATctgagactgccaaggcacacaggggat
|
|
114
|
+
aggn'
|
|
115
|
+
].collect { |x| x.gsub(/\s/, '').freeze }.freeze
|
|
116
|
+
|
|
117
|
+
IDLINES =
|
|
118
|
+
[
|
|
119
|
+
'FSRRS4401BE7HA [length=395] [gc=36.46] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=95]',
|
|
120
|
+
'FSRRS4401BRRTC [length=145] [gc=38.62] [flows=800] [phred_min=0] [phred_max=38] [trimmed_length=74]',
|
|
121
|
+
'FSRRS4401B64ST [length=382] [gc=40.58] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=346]',
|
|
122
|
+
'FSRRS4401EJ0YH [length=381] [gc=48.29] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=343]',
|
|
123
|
+
'FSRRS4401BK0IB [length=507] [gc=49.31] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=208]',
|
|
124
|
+
'FSRRS4401ARCCB [length=258] [gc=46.90] [flows=800] [phred_min=0] [phred_max=38] [trimmed_length=193]',
|
|
125
|
+
'FSRRS4401CM938 [length=453] [gc=44.15] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=418]',
|
|
126
|
+
'FSRRS4401EQLIK [length=411] [gc=34.31] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=374]',
|
|
127
|
+
'FSRRS4401AOV6A [length=309] [gc=22.98] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=273]',
|
|
128
|
+
'FSRRS4401EG0ZW [length=424] [gc=23.82] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=389]',
|
|
129
|
+
].collect { |x| x.freeze }.freeze
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
ENTRY_IDS = [ 'FSRRS4401BE7HA',
|
|
133
|
+
'FSRRS4401BRRTC',
|
|
134
|
+
'FSRRS4401B64ST',
|
|
135
|
+
'FSRRS4401EJ0YH',
|
|
136
|
+
'FSRRS4401BK0IB',
|
|
137
|
+
'FSRRS4401ARCCB',
|
|
138
|
+
'FSRRS4401CM938',
|
|
139
|
+
'FSRRS4401EQLIK',
|
|
140
|
+
'FSRRS4401AOV6A',
|
|
141
|
+
'FSRRS4401EG0ZW'
|
|
142
|
+
].collect { |x| x.freeze }.freeze
|
|
143
|
+
|
|
144
|
+
QUALITY_STRINGS =
|
|
145
|
+
[ <<'_0_', <<'_1_', <<'_2_', <<'_3_', <<'_4_', <<'_5_', <<'_6_', <<'_7_', <<'_8_', <<'_9_' ].collect { |x| x.delete("\r\n").freeze }.freeze
|
|
146
|
+
FFFDDDDDDDA666?688FFHGGIIIIIIIIIIIIIIIII
|
|
147
|
+
IHHHIIIIIIIIIGHGFFFFF====DFFFFFFFFFFFFFF
|
|
148
|
+
D???:3104/76=:5...4.3,,,366////4<ABBAAA=
|
|
149
|
+
CCFDDDDDDDD:666CDFFFF=<ABA=;:333111<===9
|
|
150
|
+
9;B889FFFFFFDDBDBDDD=8844231..,,,-,,,,,,
|
|
151
|
+
,,1133..---17111,,,,,22555131121.--.,333
|
|
152
|
+
11,.,,3--,,.,,--,3511123..--!,,,,--,----
|
|
153
|
+
9,,,,8=,,-,,,-,,,,---26:9:5-..1,,,,11//,
|
|
154
|
+
,,,!,,1917--,,,,-3.,--,,17,,,,---+11113.
|
|
155
|
+
030000,,,044400036;96662.//;7><;!!!
|
|
156
|
+
_0_
|
|
157
|
+
FFFFFFFFFDDDDFFFFGFDDDDBAAAAA=<4444@@B=5
|
|
158
|
+
55:BBBBB@@?8:8<?<89898<84442;==3,,,514,,
|
|
159
|
+
,11,,,.,,21777555513,..--1115758.//34488
|
|
160
|
+
><<;;;;9944/!/4,,,57855!!
|
|
161
|
+
_1_
|
|
162
|
+
IIIICCCCI??666IIIIIIIIIIIIIIIIIIIIIIIIII
|
|
163
|
+
IIII6666IAIIIII???IIIICCCIIIIIIIIIIIIIII
|
|
164
|
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII66333EI
|
|
165
|
+
CE::338=/----,8=>>??:2-////7>CEEIEIHHHII
|
|
166
|
+
IIIIIIIE;;9911199B???IBCHIIIIIIHHHIIHHHI
|
|
167
|
+
IIIIIIIIIIIIIIIIIBBCCIIIIIIIIIIIIIIIIIII
|
|
168
|
+
IIIIIIIIIIIIIIIGGGIIIIIIIIID?===DIIIHHHI
|
|
169
|
+
IIIIIIIIHHHIIIIIIIIIIHHHIHHHIIIIIIIIIIII
|
|
170
|
+
IIIIIIIIII?>;9988==5----.@@AEGIIIIIIIIIH
|
|
171
|
+
H????EIIIFF999;EIIBB!!
|
|
172
|
+
_2_
|
|
173
|
+
IIII?????IIIIIIIIIIIIIIHHHIIIIIIIIIIIIIH
|
|
174
|
+
HHIIHHHIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
|
|
175
|
+
IIIIIIIIHHHIIIIIHHHIIIIIIIIIIIAAAAII>>>>
|
|
176
|
+
IIIIIIIIIIIIIIIIIIIIIIIIIIEEIEE;33333D7I
|
|
177
|
+
IIIIIIIIIIIIIIIIIIIICC@@HHIIIIIIIIIIIIII
|
|
178
|
+
IIHHHIIIIIIIIIIIIIIIIIIIHHHIIIIIIIIIIIII
|
|
179
|
+
BBBBIHCDCHIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
|
|
180
|
+
IIHHHIIIHHCCDIIIIIIHHHIICCCH=CCIIIIIIIII
|
|
181
|
+
GGGIIIIIIHHHHHHIIIIIIIIIIIIIIIHHHIIHHE??
|
|
182
|
+
>>?EFEE?/////;:80--!!
|
|
183
|
+
_3_
|
|
184
|
+
FFFA@@FFFFFFFFFFHHB:::@BFFFFGGHIHIIIIIII
|
|
185
|
+
IIIIIIIIIIIIIIIIFFFFFFFFF?=BA@11188011<<
|
|
186
|
+
88;?AABDDC???DDAAAADA666D?DDD=====AA>?>>
|
|
187
|
+
<<<=<11188<<???AA?9555=ABBB@@?=>>?@@1114
|
|
188
|
+
2::DDA???DFFFFFFFFFFFFFBAAAA<<0000.22=//
|
|
189
|
+
//8,--111111!23--/24!37:6666<;822/..4!46
|
|
190
|
+
521177553.-.23!231121112,,-,,211==5-----
|
|
191
|
+
-,12,,,,,,-,,,-1,,,,-,,155--,,,,13111.,,
|
|
192
|
+
,,,,,,++111..11..1,,,,,,,,,+3,,,,,--22--
|
|
193
|
+
---//----55//**/--22--**,,,,**,,,,,,.1.,
|
|
194
|
+
*,,,,***,,,,,,,,,,,,,,,,,,,,,,,),,-,,,,,
|
|
195
|
+
,),,,,,**//.),,,///,,,,,,,,,,,.))33---,,
|
|
196
|
+
,,,,,,,,(0,,,!.!!!!!!!!!!!!
|
|
197
|
+
_4_
|
|
198
|
+
FFF<8::@DFFFFFFFGGFDCAAAAAB@@000046<;663
|
|
199
|
+
22366762243348<<=??4445::>ABAAA@<<==B=:5
|
|
200
|
+
55:BBD??=BDDDDFFFCCCCCCCFFCDDDFFFFFDBAA=
|
|
201
|
+
=88880004><<<99688;889<889?BBBBA=???DDBB
|
|
202
|
+
B@@??88889---237771,,,,,,,,--1152<<00158
|
|
203
|
+
A@><<<<<43277711,,,--37===75,----34666!!
|
|
204
|
+
!!!!!!!!!!!!!!!!!!
|
|
205
|
+
_5_
|
|
206
|
+
IIIIICC>>666IIIICCCIIIIIIIIHHHIIIIIG666I
|
|
207
|
+
IIIIIIIIIHHHIIIIIIIICCCIIIIIIIIIIIIIIIII
|
|
208
|
+
I@@@@IIIIIIIIIIIIIHHHIIII???=;IIEEI::///
|
|
209
|
+
//7544:?IBB72244E8EECEBC=@@@@@@@HHIIIIII
|
|
210
|
+
IIIIBBBIIIIIIIIIHHHIIIIIIIIIIIIICCCCIIII
|
|
211
|
+
IIIIIIIIIIIIIIIIIIIIIIII6666DEIIHEB??D@7
|
|
212
|
+
77772222D89EEIIIIIIIHHHIIIIIIIIHHHIIIIII
|
|
213
|
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIHHHIIIIII
|
|
214
|
+
IIIIIIIII==?==IIIII???=;I63DDD82--,,,38=
|
|
215
|
+
=::----,,---+++33066;@6380008/:889<:BGII
|
|
216
|
+
IIIIIIIFE<?F5500-----5:;;;:>?@C<<7999EEE
|
|
217
|
+
EEE@@@@EEEEE!
|
|
218
|
+
_6_
|
|
219
|
+
III?666??HHHIIIIIIIIIGGGIIIIIIIIIIIGGGHH
|
|
220
|
+
HIIIIIIIIIIIIIIIIIIIIGGGIIIIIIIIIIHHHIII
|
|
221
|
+
@@@@IIIIEIE111100----22?=8---:-------,,,
|
|
222
|
+
,33---5:3,----:1BBEEEHIIIIIIIIIIIB??A122
|
|
223
|
+
000...:?=024GIIIIIIIIIIIIIIIIIIECCHHB=//
|
|
224
|
+
-,,21??<5-002=6FBB?:9<=11/4444//-//77??G
|
|
225
|
+
EIEEHIACCIIIHHHIIIIIIICCCAIIIHHHHHHIIIII
|
|
226
|
+
IIIIIIIIIIIIIIIIIEE1//--822;----.777@EII
|
|
227
|
+
IIII???IIIIIIIIIIIHHHIIIIIIIIIIIIIIIIIII
|
|
228
|
+
I994227775555AE;IEEEEEIIIII??9755>@==:3,
|
|
229
|
+
,,,,33336!!
|
|
230
|
+
_7_
|
|
231
|
+
IIIICCCCI;;;CCCCIII???HHHIIIIHHHIIIIIIII
|
|
232
|
+
IIHHHIIIHHHIIIIIII@@@@IFICCCICAA;;;;ED?B
|
|
233
|
+
@@D66445555<<<GII>>AAIIIIIIII;;;::III???
|
|
234
|
+
CCCIII;;;;IFFIIIIICCCBIBIEEDC4444?4BBBE?
|
|
235
|
+
EIIICHHII;;;HIIIIIIHH;;;HHIIIII;;;IIIIHH
|
|
236
|
+
HIIIIII>>??>IEEBGG::1111/46FBFBB?=;=A?97
|
|
237
|
+
771119:EAAADDBD7777=/111122DA@@B68;;;I8H
|
|
238
|
+
HIIIII;;;;?>IECCCB/////;745=!
|
|
239
|
+
_8_
|
|
240
|
+
IIA94445EEII===>IIIIIIIIICCCCIIHIIICC;;;
|
|
241
|
+
;IIIIIIIIIIIIIIIIIIIIIIIIIF;;666DDIIIIII
|
|
242
|
+
IIIIIIIIIIIIIEE94442244@@666CC<<BDDA=---
|
|
243
|
+
--2<,,,,659//00===8CIII;>>==HH;;IIIIIICC
|
|
244
|
+
@@???III@@@@IC?666HIDDCI?B??CC<EE11111B4
|
|
245
|
+
BDDCB;=@B777>////-=323?423,,,/=1,,,,-:4E
|
|
246
|
+
;??EIIIIICCCCI>;;;IIIIIII<<@@?=////7=A99
|
|
247
|
+
988<<4455IEEEIIIIIIIIIIIII<999HIIIIIIIII
|
|
248
|
+
II?????IIIIIIIIIIICAC;55539EIIIIIIIIIIII
|
|
249
|
+
IIIIHH999HHHIA=AEEFF@=.....AD@@@DDEEEEFI
|
|
250
|
+
II;;;977FFCCC@24449?FDD!
|
|
251
|
+
_9_
|
|
252
|
+
|
|
253
|
+
QUALITY_SCORES = QUALITY_STRINGS.collect { |str|
|
|
254
|
+
str.unpack('C*').collect { |i| i - 33 }.freeze
|
|
255
|
+
}.freeze
|
|
256
|
+
|
|
257
|
+
ERROR_PROBABILITIES = QUALITY_SCORES.collect { |ary|
|
|
258
|
+
ary.collect { |q| 10 ** (- q / 10.0) }.freeze
|
|
259
|
+
}.freeze
|
|
260
|
+
|
|
261
|
+
def setup
|
|
262
|
+
fn = File.join(TestFastqDataDir, 'longreads_original_sanger.fastq')
|
|
263
|
+
@ff = Bio::FlatFile.open(Bio::Fastq, fn)
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
def test_validate_format
|
|
267
|
+
@ff.each do |e|
|
|
268
|
+
assert(e.validate_format)
|
|
269
|
+
end
|
|
270
|
+
assert(@ff.eof?)
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
def test_validate_format_with_array
|
|
274
|
+
@ff.each do |e|
|
|
275
|
+
a = []
|
|
276
|
+
assert(e.validate_format(a))
|
|
277
|
+
assert(a.empty?)
|
|
278
|
+
end
|
|
279
|
+
end
|
|
280
|
+
|
|
281
|
+
def test_definition
|
|
282
|
+
ids = IDLINES.dup
|
|
283
|
+
@ff.each do |e|
|
|
284
|
+
assert_equal(ids.shift, e.definition)
|
|
285
|
+
end
|
|
286
|
+
assert(ids.empty?)
|
|
287
|
+
end
|
|
288
|
+
|
|
289
|
+
def test_entry_id
|
|
290
|
+
ids = ENTRY_IDS.dup
|
|
291
|
+
@ff.each do |e|
|
|
292
|
+
assert_equal(ids.shift, e.entry_id)
|
|
293
|
+
end
|
|
294
|
+
assert(ids.empty?)
|
|
295
|
+
end
|
|
296
|
+
|
|
297
|
+
def test_sequence_string
|
|
298
|
+
seqs = SEQS.dup
|
|
299
|
+
@ff.each do |e|
|
|
300
|
+
s = seqs.shift
|
|
301
|
+
assert_equal(s, e.sequence_string)
|
|
302
|
+
end
|
|
303
|
+
assert(seqs.empty?)
|
|
304
|
+
end
|
|
305
|
+
|
|
306
|
+
def test_seq
|
|
307
|
+
seqs = SEQS.collect { |x| Bio::Sequence::Generic.new(x) }
|
|
308
|
+
@ff.each do |e|
|
|
309
|
+
s = seqs.shift
|
|
310
|
+
assert_equal(s, e.seq)
|
|
311
|
+
end
|
|
312
|
+
assert(seqs.empty?)
|
|
313
|
+
end
|
|
314
|
+
|
|
315
|
+
def test_naseq
|
|
316
|
+
seqs = SEQS.collect { |x| Bio::Sequence::NA.new(x) }
|
|
317
|
+
@ff.each do |e|
|
|
318
|
+
s = seqs.shift
|
|
319
|
+
assert_equal(s, e.naseq)
|
|
320
|
+
end
|
|
321
|
+
assert(seqs.empty?)
|
|
322
|
+
end
|
|
323
|
+
|
|
324
|
+
def test_nalen
|
|
325
|
+
lengths = SEQS.collect { |x| Bio::Sequence::NA.new(x).length }
|
|
326
|
+
@ff.each do |e|
|
|
327
|
+
i = lengths.shift
|
|
328
|
+
assert_equal(i, e.nalen)
|
|
329
|
+
end
|
|
330
|
+
assert(lengths.empty?)
|
|
331
|
+
end
|
|
332
|
+
|
|
333
|
+
def test_quality_string
|
|
334
|
+
qualities = QUALITY_STRINGS.dup
|
|
335
|
+
@ff.each do |e|
|
|
336
|
+
assert_equal(qualities.shift, e.quality_string)
|
|
337
|
+
end
|
|
338
|
+
assert(qualities.empty?)
|
|
339
|
+
end
|
|
340
|
+
|
|
341
|
+
def test_quality_scores
|
|
342
|
+
qualities = QUALITY_SCORES.dup
|
|
343
|
+
@ff.each do |e|
|
|
344
|
+
assert_equal(qualities.shift, e.quality_scores)
|
|
345
|
+
end
|
|
346
|
+
assert(qualities.empty?)
|
|
347
|
+
end
|
|
348
|
+
|
|
349
|
+
def test_error_probabilities
|
|
350
|
+
probs = ERROR_PROBABILITIES.dup
|
|
351
|
+
@ff.each do |e|
|
|
352
|
+
float_array_equivalent?(probs.shift,
|
|
353
|
+
e.error_probabilities)
|
|
354
|
+
end
|
|
355
|
+
assert(probs.empty?)
|
|
356
|
+
end
|
|
357
|
+
|
|
358
|
+
def test_to_biosequence
|
|
359
|
+
@ff.each_with_index do |e, i|
|
|
360
|
+
s = nil
|
|
361
|
+
assert_nothing_raised { s = e.to_biosequence }
|
|
362
|
+
assert_equal(Bio::Sequence::Generic.new(SEQS[i]), s.seq)
|
|
363
|
+
assert_equal(IDLINES[i], s.definition)
|
|
364
|
+
assert_equal(ENTRY_IDS[i], s.entry_id)
|
|
365
|
+
assert_equal(:phred, s.quality_score_type)
|
|
366
|
+
assert_equal(QUALITY_SCORES[i], s.quality_scores)
|
|
367
|
+
float_array_equivalent?(ERROR_PROBABILITIES[i],
|
|
368
|
+
s.error_probabilities)
|
|
369
|
+
end
|
|
370
|
+
end
|
|
371
|
+
|
|
372
|
+
def test_roundtrip
|
|
373
|
+
@ff.each_with_index do |e, i|
|
|
374
|
+
str_orig = @ff.entry_raw
|
|
375
|
+
s = e.to_biosequence
|
|
376
|
+
str = s.output(:fastq_sanger,
|
|
377
|
+
{ :repeat_title => true, :width => 80 })
|
|
378
|
+
assert_equal(str_orig, str)
|
|
379
|
+
e2 = Bio::Fastq.new(str)
|
|
380
|
+
assert_equal(e.sequence_string, e2.sequence_string)
|
|
381
|
+
assert_equal(e.quality_string, e2.quality_string)
|
|
382
|
+
assert_equal(e.definition, e2.definition)
|
|
383
|
+
assert_equal(e.quality_scores, e2.quality_scores)
|
|
384
|
+
float_array_equivalent?(e.error_probabilities,
|
|
385
|
+
e2.error_probabilities)
|
|
386
|
+
end
|
|
387
|
+
end
|
|
388
|
+
|
|
389
|
+
end #class TestFastq_longreads_original_sanger
|
|
390
|
+
|
|
391
|
+
# common methods to read *_full_range_as_*.fastq and test quality scores
|
|
392
|
+
# and error probabilities
|
|
393
|
+
module TestFastq_full_range
|
|
394
|
+
include FloatArrayComparison
|
|
395
|
+
|
|
396
|
+
private
|
|
397
|
+
def read_file(fn, format)
|
|
398
|
+
path = File.join(TestFastqDataDir, fn)
|
|
399
|
+
entries = Bio::FlatFile.open(Bio::Fastq, path) { |ff| ff.to_a }
|
|
400
|
+
entries.each { |e| e.format=format }
|
|
401
|
+
entries
|
|
402
|
+
end
|
|
403
|
+
|
|
404
|
+
def scores_through(range)
|
|
405
|
+
range.to_a
|
|
406
|
+
end
|
|
407
|
+
|
|
408
|
+
def scores_phred2solexa(range)
|
|
409
|
+
min = -5
|
|
410
|
+
max = 62
|
|
411
|
+
sc = range.collect do |q|
|
|
412
|
+
tmp = 10 ** (q / 10.0) - 1
|
|
413
|
+
if tmp <= 0 then
|
|
414
|
+
min
|
|
415
|
+
else
|
|
416
|
+
r = (10 * Math.log10(tmp)).round
|
|
417
|
+
if r < min then
|
|
418
|
+
min
|
|
419
|
+
elsif r > max then
|
|
420
|
+
max
|
|
421
|
+
else
|
|
422
|
+
r
|
|
423
|
+
end
|
|
424
|
+
end
|
|
425
|
+
end
|
|
426
|
+
sc
|
|
427
|
+
end
|
|
428
|
+
|
|
429
|
+
def scores_phred2illumina(range)
|
|
430
|
+
min = 0
|
|
431
|
+
max = 62
|
|
432
|
+
sc = range.collect do |q|
|
|
433
|
+
if q < min then
|
|
434
|
+
min
|
|
435
|
+
elsif q > max then
|
|
436
|
+
max
|
|
437
|
+
else
|
|
438
|
+
q
|
|
439
|
+
end
|
|
440
|
+
end
|
|
441
|
+
sc
|
|
442
|
+
end
|
|
443
|
+
|
|
444
|
+
def scores_phred2sanger(range)
|
|
445
|
+
min = 0
|
|
446
|
+
max = 93
|
|
447
|
+
sc = range.collect do |q|
|
|
448
|
+
if q < min then
|
|
449
|
+
min
|
|
450
|
+
elsif q > max then
|
|
451
|
+
max
|
|
452
|
+
else
|
|
453
|
+
q
|
|
454
|
+
end
|
|
455
|
+
end
|
|
456
|
+
sc
|
|
457
|
+
end
|
|
458
|
+
|
|
459
|
+
def scores_solexa2phred(range)
|
|
460
|
+
sc = range.collect do |q|
|
|
461
|
+
r = 10 * Math.log10(10 ** (q / 10.0) + 1)
|
|
462
|
+
r.round
|
|
463
|
+
end
|
|
464
|
+
sc
|
|
465
|
+
end
|
|
466
|
+
|
|
467
|
+
def scores_solexa2sanger(range)
|
|
468
|
+
scores_phred2sanger(scores_solexa2phred(range))
|
|
469
|
+
end
|
|
470
|
+
|
|
471
|
+
def scores_solexa2illumina(range)
|
|
472
|
+
scores_phred2illumina(scores_solexa2phred(range))
|
|
473
|
+
end
|
|
474
|
+
|
|
475
|
+
def common_test_quality_scores(scores, filename, format)
|
|
476
|
+
entries = read_file(filename, format)
|
|
477
|
+
assert_equal(scores, entries[0].quality_scores)
|
|
478
|
+
assert_equal(scores.reverse, entries[1].quality_scores)
|
|
479
|
+
end
|
|
480
|
+
|
|
481
|
+
def common_test_error_probabilities(probabilities, filename, format)
|
|
482
|
+
entries = read_file(filename, format)
|
|
483
|
+
float_array_equivalent?(probabilities,
|
|
484
|
+
entries[0].error_probabilities)
|
|
485
|
+
float_array_equivalent?(probabilities.reverse,
|
|
486
|
+
entries[1].error_probabilities)
|
|
487
|
+
end
|
|
488
|
+
|
|
489
|
+
def common_test_validate_format(filename, format)
|
|
490
|
+
entries = read_file(filename, format)
|
|
491
|
+
assert(entries[0].validate_format)
|
|
492
|
+
assert(entries[1].validate_format)
|
|
493
|
+
end
|
|
494
|
+
|
|
495
|
+
def phred_q2p(scores)
|
|
496
|
+
scores.collect { |q| 10 ** (-q / 10.0) }
|
|
497
|
+
end
|
|
498
|
+
|
|
499
|
+
def solexa_q2p(scores)
|
|
500
|
+
scores.collect do |q|
|
|
501
|
+
t = 10 ** (-q / 10.0)
|
|
502
|
+
t / (1.0 + t)
|
|
503
|
+
end
|
|
504
|
+
end
|
|
505
|
+
|
|
506
|
+
public
|
|
507
|
+
def test_validate_format
|
|
508
|
+
common_test_validate_format(self.class::FILENAME_AS_SANGER,
|
|
509
|
+
'fastq-sanger')
|
|
510
|
+
common_test_validate_format(self.class::FILENAME_AS_SOLEXA,
|
|
511
|
+
'fastq-solexa')
|
|
512
|
+
common_test_validate_format(self.class::FILENAME_AS_ILLUMINA,
|
|
513
|
+
'fastq-illumina')
|
|
514
|
+
end
|
|
515
|
+
|
|
516
|
+
def test_quality_scores_as_sanger
|
|
517
|
+
scores = scores_to_sanger(self.class::RANGE)
|
|
518
|
+
common_test_quality_scores(scores,
|
|
519
|
+
self.class::FILENAME_AS_SANGER,
|
|
520
|
+
'fastq-sanger')
|
|
521
|
+
end
|
|
522
|
+
|
|
523
|
+
def test_error_probabilities_as_sanger
|
|
524
|
+
scores = scores_to_sanger(self.class::RANGE)
|
|
525
|
+
probs = phred_q2p(scores)
|
|
526
|
+
common_test_error_probabilities(probs,
|
|
527
|
+
self.class::FILENAME_AS_SANGER,
|
|
528
|
+
'fastq-sanger')
|
|
529
|
+
end
|
|
530
|
+
|
|
531
|
+
def test_quality_scores_as_solexa
|
|
532
|
+
scores = scores_to_solexa(self.class::RANGE)
|
|
533
|
+
common_test_quality_scores(scores,
|
|
534
|
+
self.class::FILENAME_AS_SOLEXA,
|
|
535
|
+
'fastq-solexa')
|
|
536
|
+
end
|
|
537
|
+
|
|
538
|
+
def test_error_probabilities_as_solexa
|
|
539
|
+
scores = scores_to_solexa(self.class::RANGE)
|
|
540
|
+
probs = solexa_q2p(scores)
|
|
541
|
+
common_test_error_probabilities(probs,
|
|
542
|
+
self.class::FILENAME_AS_SOLEXA,
|
|
543
|
+
'fastq-solexa')
|
|
544
|
+
end
|
|
545
|
+
|
|
546
|
+
def test_quality_scores_as_illumina
|
|
547
|
+
scores = scores_to_illumina(self.class::RANGE)
|
|
548
|
+
common_test_quality_scores(scores,
|
|
549
|
+
self.class::FILENAME_AS_ILLUMINA,
|
|
550
|
+
'fastq-illumina')
|
|
551
|
+
end
|
|
552
|
+
|
|
553
|
+
def test_error_probabilities_as_illumina
|
|
554
|
+
scores = scores_to_illumina(self.class::RANGE)
|
|
555
|
+
probs = phred_q2p(scores)
|
|
556
|
+
common_test_error_probabilities(probs,
|
|
557
|
+
self.class::FILENAME_AS_ILLUMINA,
|
|
558
|
+
'fastq-illumina')
|
|
559
|
+
end
|
|
560
|
+
end #module TestFastq_full_range
|
|
561
|
+
|
|
562
|
+
|
|
563
|
+
class TestFastq_sanger_full_range < Test::Unit::TestCase
|
|
564
|
+
include TestFastq_full_range
|
|
565
|
+
|
|
566
|
+
RANGE = 0..93
|
|
567
|
+
FILENAME_AS_SANGER = 'sanger_full_range_as_sanger.fastq'
|
|
568
|
+
FILENAME_AS_SOLEXA = 'sanger_full_range_as_solexa.fastq'
|
|
569
|
+
FILENAME_AS_ILLUMINA = 'sanger_full_range_as_illumina.fastq'
|
|
570
|
+
|
|
571
|
+
alias scores_to_sanger scores_through
|
|
572
|
+
alias scores_to_solexa scores_phred2solexa
|
|
573
|
+
alias scores_to_illumina scores_phred2illumina
|
|
574
|
+
end #class TestFastq_sanger_full_range
|
|
575
|
+
|
|
576
|
+
|
|
577
|
+
class TestFastq_solexa_full_range < Test::Unit::TestCase
|
|
578
|
+
include TestFastq_full_range
|
|
579
|
+
|
|
580
|
+
RANGE = (-5)..62
|
|
581
|
+
FILENAME_AS_SANGER = 'solexa_full_range_as_sanger.fastq'
|
|
582
|
+
FILENAME_AS_SOLEXA = 'solexa_full_range_as_solexa.fastq'
|
|
583
|
+
FILENAME_AS_ILLUMINA = 'solexa_full_range_as_illumina.fastq'
|
|
584
|
+
|
|
585
|
+
alias scores_to_sanger scores_solexa2sanger
|
|
586
|
+
alias scores_to_solexa scores_through
|
|
587
|
+
alias scores_to_illumina scores_solexa2illumina
|
|
588
|
+
end #class TestFastq_solexa_full_range
|
|
589
|
+
|
|
590
|
+
|
|
591
|
+
class TestFastq_illumina_full_range < Test::Unit::TestCase
|
|
592
|
+
include TestFastq_full_range
|
|
593
|
+
|
|
594
|
+
RANGE = 0..62
|
|
595
|
+
FILENAME_AS_SANGER = 'illumina_full_range_as_sanger.fastq'
|
|
596
|
+
FILENAME_AS_SOLEXA = 'illumina_full_range_as_solexa.fastq'
|
|
597
|
+
FILENAME_AS_ILLUMINA = 'illumina_full_range_as_illumina.fastq'
|
|
598
|
+
|
|
599
|
+
alias scores_to_sanger scores_phred2sanger
|
|
600
|
+
alias scores_to_solexa scores_phred2solexa
|
|
601
|
+
alias scores_to_illumina scores_through
|
|
602
|
+
end #class TestFastq_illumina_full_range
|
|
603
|
+
|
|
604
|
+
|
|
605
|
+
# common methods for testing error_*.fastq
|
|
606
|
+
module TestFastq_error
|
|
607
|
+
|
|
608
|
+
FILENAME = nil
|
|
609
|
+
PRE_SKIP = 2
|
|
610
|
+
POST_SKIP = 2
|
|
611
|
+
ERRORS = []
|
|
612
|
+
|
|
613
|
+
def do_test_validate_format(ff)
|
|
614
|
+
e = ff.next_entry
|
|
615
|
+
#p e
|
|
616
|
+
a = []
|
|
617
|
+
assert_equal(false, e.validate_format(a))
|
|
618
|
+
assert_equal(self.class::ERRORS.size, a.size)
|
|
619
|
+
self.class::ERRORS.each do |ex|
|
|
620
|
+
obj = a.shift
|
|
621
|
+
assert_kind_of(ex.class, obj)
|
|
622
|
+
assert_equal(ex.message, obj.message)
|
|
623
|
+
end
|
|
624
|
+
end
|
|
625
|
+
private :do_test_validate_format
|
|
626
|
+
|
|
627
|
+
def test_validate_format
|
|
628
|
+
path = File.join(TestFastqDataDir, self.class::FILENAME)
|
|
629
|
+
Bio::FlatFile.open(Bio::Fastq, path) do |ff|
|
|
630
|
+
self.class::PRE_SKIP.times { ff.next_entry }
|
|
631
|
+
do_test_validate_format(ff)
|
|
632
|
+
self.class::POST_SKIP.times { ff.next_entry }
|
|
633
|
+
assert(ff.eof?)
|
|
634
|
+
end
|
|
635
|
+
end
|
|
636
|
+
end #module TestFastq_error
|
|
637
|
+
|
|
638
|
+
class TestFastq_error_diff_ids < Test::Unit::TestCase
|
|
639
|
+
include TestFastq_error
|
|
640
|
+
|
|
641
|
+
FILENAME = 'error_diff_ids.fastq'
|
|
642
|
+
PRE_SKIP = 2
|
|
643
|
+
POST_SKIP = 2
|
|
644
|
+
ERRORS = [ Bio::Fastq::Error::Diff_ids.new ]
|
|
645
|
+
end #class TestFastq_error_diff_ids
|
|
646
|
+
|
|
647
|
+
class TestFastq_error_double_qual < Test::Unit::TestCase
|
|
648
|
+
include TestFastq_error
|
|
649
|
+
|
|
650
|
+
FILENAME = 'error_double_qual.fastq'
|
|
651
|
+
PRE_SKIP = 2
|
|
652
|
+
POST_SKIP = 2
|
|
653
|
+
ERRORS = [ Bio::Fastq::Error::Long_qual.new ]
|
|
654
|
+
end #class TestFastq_error_double_qual
|
|
655
|
+
|
|
656
|
+
class TestFastq_error_double_seq < Test::Unit::TestCase
|
|
657
|
+
include TestFastq_error
|
|
658
|
+
|
|
659
|
+
FILENAME = 'error_double_seq.fastq'
|
|
660
|
+
PRE_SKIP = 3
|
|
661
|
+
POST_SKIP = 0
|
|
662
|
+
ERRORS = [ Bio::Fastq::Error::Long_qual.new ]
|
|
663
|
+
end #class TestFastq_error_double_seq
|
|
664
|
+
|
|
665
|
+
class TestFastq_error_long_qual < Test::Unit::TestCase
|
|
666
|
+
include TestFastq_error
|
|
667
|
+
|
|
668
|
+
FILENAME = 'error_long_qual.fastq'
|
|
669
|
+
PRE_SKIP = 3
|
|
670
|
+
POST_SKIP = 1
|
|
671
|
+
ERRORS = [ Bio::Fastq::Error::Long_qual.new ]
|
|
672
|
+
end #class TestFastq_error_long_qual
|
|
673
|
+
|
|
674
|
+
class TestFastq_error_no_qual < Test::Unit::TestCase
|
|
675
|
+
include TestFastq_error
|
|
676
|
+
|
|
677
|
+
FILENAME = 'error_no_qual.fastq'
|
|
678
|
+
PRE_SKIP = 0
|
|
679
|
+
POST_SKIP = 0
|
|
680
|
+
|
|
681
|
+
private
|
|
682
|
+
def do_test_validate_format(ff)
|
|
683
|
+
2.times do
|
|
684
|
+
e = ff.next_entry
|
|
685
|
+
a = []
|
|
686
|
+
e.validate_format(a)
|
|
687
|
+
assert_equal(1, a.size)
|
|
688
|
+
assert_kind_of(Bio::Fastq::Error::Long_qual, a[0])
|
|
689
|
+
end
|
|
690
|
+
1.times do
|
|
691
|
+
e = ff.next_entry
|
|
692
|
+
a = []
|
|
693
|
+
e.validate_format(a)
|
|
694
|
+
assert_equal(1, a.size)
|
|
695
|
+
assert_kind_of(Bio::Fastq::Error::Short_qual, a[0])
|
|
696
|
+
end
|
|
697
|
+
end
|
|
698
|
+
end #class TestFastq_error_no_qual
|
|
699
|
+
|
|
700
|
+
class TestFastq_error_qual_del < Test::Unit::TestCase
|
|
701
|
+
include TestFastq_error
|
|
702
|
+
|
|
703
|
+
FILENAME = 'error_qual_del.fastq'
|
|
704
|
+
PRE_SKIP = 3
|
|
705
|
+
POST_SKIP = 1
|
|
706
|
+
ERRORS = [ Bio::Fastq::Error::Qual_char.new(12) ]
|
|
707
|
+
end #class TestFastq_error_qual_del
|
|
708
|
+
|
|
709
|
+
class TestFastq_error_qual_escape < Test::Unit::TestCase
|
|
710
|
+
include TestFastq_error
|
|
711
|
+
|
|
712
|
+
FILENAME = 'error_qual_escape.fastq'
|
|
713
|
+
PRE_SKIP = 4
|
|
714
|
+
POST_SKIP = 0
|
|
715
|
+
ERRORS = [ Bio::Fastq::Error::Qual_char.new(7) ]
|
|
716
|
+
end #class TestFastq_error_qual_escape
|
|
717
|
+
|
|
718
|
+
class TestFastq_error_qual_null < Test::Unit::TestCase
|
|
719
|
+
include TestFastq_error
|
|
720
|
+
|
|
721
|
+
FILENAME = 'error_qual_null.fastq'
|
|
722
|
+
PRE_SKIP = 0
|
|
723
|
+
POST_SKIP = 4
|
|
724
|
+
ERRORS = [ Bio::Fastq::Error::Qual_char.new(3) ]
|
|
725
|
+
end #class TestFastq_error_qual_null
|
|
726
|
+
|
|
727
|
+
class TestFastq_error_qual_space < Test::Unit::TestCase
|
|
728
|
+
include TestFastq_error
|
|
729
|
+
|
|
730
|
+
FILENAME = 'error_qual_space.fastq'
|
|
731
|
+
PRE_SKIP = 3
|
|
732
|
+
POST_SKIP = 1
|
|
733
|
+
ERRORS = [ Bio::Fastq::Error::Qual_char.new(18) ]
|
|
734
|
+
end #class TestFastq_error_qual_space
|
|
735
|
+
|
|
736
|
+
class TestFastq_error_qual_tab < Test::Unit::TestCase
|
|
737
|
+
include TestFastq_error
|
|
738
|
+
|
|
739
|
+
FILENAME = 'error_qual_tab.fastq'
|
|
740
|
+
PRE_SKIP = 4
|
|
741
|
+
POST_SKIP = 0
|
|
742
|
+
ERRORS = [ Bio::Fastq::Error::Qual_char.new(10) ]
|
|
743
|
+
end #class TestFastq_error_qual_tab
|
|
744
|
+
|
|
745
|
+
class TestFastq_error_qual_unit_sep < Test::Unit::TestCase
|
|
746
|
+
include TestFastq_error
|
|
747
|
+
|
|
748
|
+
FILENAME = 'error_qual_unit_sep.fastq'
|
|
749
|
+
PRE_SKIP = 2
|
|
750
|
+
POST_SKIP = 2
|
|
751
|
+
ERRORS = [ Bio::Fastq::Error::Qual_char.new(5) ]
|
|
752
|
+
end #class TestFastq_error_qual_unit_sep
|
|
753
|
+
|
|
754
|
+
class TestFastq_error_qual_vtab < Test::Unit::TestCase
|
|
755
|
+
include TestFastq_error
|
|
756
|
+
|
|
757
|
+
FILENAME = 'error_qual_vtab.fastq'
|
|
758
|
+
PRE_SKIP = 0
|
|
759
|
+
POST_SKIP = 4
|
|
760
|
+
ERRORS = [ Bio::Fastq::Error::Qual_char.new(10) ]
|
|
761
|
+
end #class TestFastq_error_qual_vtab
|
|
762
|
+
|
|
763
|
+
class TestFastq_error_short_qual < Test::Unit::TestCase
|
|
764
|
+
include TestFastq_error
|
|
765
|
+
|
|
766
|
+
FILENAME = 'error_short_qual.fastq'
|
|
767
|
+
PRE_SKIP = 2
|
|
768
|
+
POST_SKIP = 1
|
|
769
|
+
ERRORS = [ Bio::Fastq::Error::Long_qual.new ]
|
|
770
|
+
end #class TestFastq_error_short_qual
|
|
771
|
+
|
|
772
|
+
class TestFastq_error_spaces < Test::Unit::TestCase
|
|
773
|
+
include TestFastq_error
|
|
774
|
+
|
|
775
|
+
FILENAME = 'error_spaces.fastq'
|
|
776
|
+
PRE_SKIP = 0
|
|
777
|
+
POST_SKIP = 0
|
|
778
|
+
ERRORS = [ Bio::Fastq::Error::Seq_char.new(9),
|
|
779
|
+
Bio::Fastq::Error::Seq_char.new(20),
|
|
780
|
+
Bio::Fastq::Error::Qual_char.new(9),
|
|
781
|
+
Bio::Fastq::Error::Qual_char.new(20)
|
|
782
|
+
]
|
|
783
|
+
|
|
784
|
+
private
|
|
785
|
+
def do_test_validate_format(ff)
|
|
786
|
+
5.times do
|
|
787
|
+
e = ff.next_entry
|
|
788
|
+
a = []
|
|
789
|
+
e.validate_format(a)
|
|
790
|
+
assert_equal(4, a.size)
|
|
791
|
+
self.class::ERRORS.each do |ex|
|
|
792
|
+
obj = a.shift
|
|
793
|
+
assert_kind_of(ex.class, obj)
|
|
794
|
+
assert_equal(ex.message, obj.message)
|
|
795
|
+
end
|
|
796
|
+
end
|
|
797
|
+
end
|
|
798
|
+
end #class TestFastq_error_spaces
|
|
799
|
+
|
|
800
|
+
class TestFastq_error_tabs < TestFastq_error_spaces
|
|
801
|
+
FILENAME = 'error_tabs.fastq'
|
|
802
|
+
end #class TestFastq_error_tabs
|
|
803
|
+
|
|
804
|
+
class TestFastq_error_trunc_at_plus < Test::Unit::TestCase
|
|
805
|
+
include TestFastq_error
|
|
806
|
+
|
|
807
|
+
FILENAME = 'error_trunc_at_plus.fastq'
|
|
808
|
+
PRE_SKIP = 4
|
|
809
|
+
POST_SKIP = 0
|
|
810
|
+
ERRORS = [ Bio::Fastq::Error::No_qual.new ]
|
|
811
|
+
end #class TestFastq_error_trunc_at_plus
|
|
812
|
+
|
|
813
|
+
class TestFastq_error_trunc_at_qual < TestFastq_error_trunc_at_plus
|
|
814
|
+
FILENAME = 'error_trunc_at_qual.fastq'
|
|
815
|
+
end #class TestFastq_error_trunc_at_qual
|
|
816
|
+
|
|
817
|
+
class TestFastq_error_trunc_at_seq < Test::Unit::TestCase
|
|
818
|
+
include TestFastq_error
|
|
819
|
+
|
|
820
|
+
FILENAME = 'error_trunc_at_seq.fastq'
|
|
821
|
+
PRE_SKIP = 4
|
|
822
|
+
POST_SKIP = 0
|
|
823
|
+
ERRORS = [ Bio::Fastq::Error::No_qual.new ]
|
|
824
|
+
end #class TestFastq_error_trunc_at_seq
|
|
825
|
+
|
|
826
|
+
|
|
827
|
+
end #module TestFastq
|
|
828
|
+
end #module Bio
|
|
829
|
+
|