bio 1.3.1 → 1.4.0
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +2105 -3728
- data/KNOWN_ISSUES.rdoc +35 -3
- data/README.rdoc +8 -2
- data/RELEASE_NOTES.rdoc +166 -0
- data/bin/bioruby +4 -1
- data/bioruby.gemspec +146 -1
- data/bioruby.gemspec.erb +3 -1
- data/doc/ChangeLog-before-1.3.1 +3961 -0
- data/doc/Tutorial.rd +154 -22
- data/doc/Tutorial.rd.html +125 -68
- data/lib/bio.rb +21 -6
- data/lib/bio/appl/bl2seq/report.rb +11 -202
- data/lib/bio/appl/blast/format0.rb +0 -193
- data/lib/bio/appl/blast/report.rb +2 -147
- data/lib/bio/appl/blast/wublast.rb +0 -208
- data/lib/bio/appl/fasta.rb +4 -19
- data/lib/bio/appl/fasta/format10.rb +0 -14
- data/lib/bio/appl/genscan/report.rb +0 -176
- data/lib/bio/appl/hmmer.rb +1 -15
- data/lib/bio/appl/hmmer/report.rb +0 -100
- data/lib/bio/appl/meme/mast.rb +156 -0
- data/lib/bio/appl/meme/mast/report.rb +91 -0
- data/lib/bio/appl/meme/motif.rb +48 -0
- data/lib/bio/appl/psort.rb +0 -111
- data/lib/bio/appl/psort/report.rb +1 -45
- data/lib/bio/appl/pts1.rb +2 -4
- data/lib/bio/appl/sosui/report.rb +5 -54
- data/lib/bio/appl/targetp/report.rb +1 -104
- data/lib/bio/appl/tmhmm/report.rb +0 -36
- data/lib/bio/command.rb +94 -10
- data/lib/bio/data/aa.rb +1 -77
- data/lib/bio/data/codontable.rb +1 -95
- data/lib/bio/data/na.rb +1 -26
- data/lib/bio/db/aaindex.rb +1 -38
- data/lib/bio/db/fasta.rb +1 -134
- data/lib/bio/db/fasta/format_qual.rb +204 -0
- data/lib/bio/db/fasta/qual.rb +102 -0
- data/lib/bio/db/fastq.rb +645 -0
- data/lib/bio/db/fastq/fastq_to_biosequence.rb +40 -0
- data/lib/bio/db/fastq/format_fastq.rb +175 -0
- data/lib/bio/db/genbank/genbank.rb +1 -86
- data/lib/bio/db/gff.rb +0 -17
- data/lib/bio/db/go.rb +4 -72
- data/lib/bio/db/kegg/common.rb +112 -0
- data/lib/bio/db/kegg/compound.rb +29 -20
- data/lib/bio/db/kegg/drug.rb +74 -34
- data/lib/bio/db/kegg/enzyme.rb +26 -5
- data/lib/bio/db/kegg/genes.rb +128 -15
- data/lib/bio/db/kegg/genome.rb +3 -41
- data/lib/bio/db/kegg/glycan.rb +19 -24
- data/lib/bio/db/kegg/orthology.rb +16 -56
- data/lib/bio/db/kegg/reaction.rb +81 -28
- data/lib/bio/db/kegg/taxonomy.rb +1 -52
- data/lib/bio/db/litdb.rb +1 -16
- data/lib/bio/db/phyloxml/phyloxml.xsd +582 -0
- data/lib/bio/db/phyloxml/phyloxml_elements.rb +1174 -0
- data/lib/bio/db/phyloxml/phyloxml_parser.rb +954 -0
- data/lib/bio/db/phyloxml/phyloxml_writer.rb +228 -0
- data/lib/bio/db/prosite.rb +2 -95
- data/lib/bio/db/rebase.rb +5 -6
- data/lib/bio/db/sanger_chromatogram/abif.rb +120 -0
- data/lib/bio/db/sanger_chromatogram/chromatogram.rb +133 -0
- data/lib/bio/db/sanger_chromatogram/chromatogram_to_biosequence.rb +32 -0
- data/lib/bio/db/sanger_chromatogram/scf.rb +210 -0
- data/lib/bio/io/das.rb +0 -44
- data/lib/bio/io/ddbjxml.rb +1 -181
- data/lib/bio/io/flatfile.rb +1 -7
- data/lib/bio/io/flatfile/autodetection.rb +6 -0
- data/lib/bio/io/keggapi.rb +0 -442
- data/lib/bio/io/ncbirest.rb +130 -132
- data/lib/bio/io/ncbisoap.rb +2 -1
- data/lib/bio/io/pubmed.rb +0 -88
- data/lib/bio/location.rb +0 -73
- data/lib/bio/pathway.rb +0 -171
- data/lib/bio/sequence.rb +18 -1
- data/lib/bio/sequence/adapter.rb +3 -0
- data/lib/bio/sequence/format.rb +16 -0
- data/lib/bio/sequence/quality_score.rb +205 -0
- data/lib/bio/tree.rb +70 -5
- data/lib/bio/util/restriction_enzyme/single_strand.rb +3 -2
- data/lib/bio/util/sirna.rb +1 -23
- data/lib/bio/version.rb +1 -1
- data/sample/demo_aaindex.rb +67 -0
- data/sample/demo_aminoacid.rb +101 -0
- data/sample/demo_bl2seq_report.rb +220 -0
- data/sample/demo_blast_report.rb +285 -0
- data/sample/demo_codontable.rb +119 -0
- data/sample/demo_das.rb +105 -0
- data/sample/demo_ddbjxml.rb +212 -0
- data/sample/demo_fasta_remote.rb +51 -0
- data/sample/demo_fastaformat.rb +105 -0
- data/sample/demo_genbank.rb +132 -0
- data/sample/demo_genscan_report.rb +202 -0
- data/sample/demo_gff1.rb +49 -0
- data/sample/demo_go.rb +98 -0
- data/sample/demo_hmmer_report.rb +149 -0
- data/sample/demo_kegg_compound.rb +57 -0
- data/sample/demo_kegg_drug.rb +65 -0
- data/sample/demo_kegg_genome.rb +74 -0
- data/sample/demo_kegg_glycan.rb +72 -0
- data/sample/demo_kegg_orthology.rb +62 -0
- data/sample/demo_kegg_reaction.rb +66 -0
- data/sample/demo_kegg_taxonomy.rb +92 -0
- data/sample/demo_keggapi.rb +502 -0
- data/sample/demo_litdb.rb +42 -0
- data/sample/demo_locations.rb +99 -0
- data/sample/demo_ncbi_rest.rb +130 -0
- data/sample/demo_nucleicacid.rb +49 -0
- data/sample/demo_pathway.rb +196 -0
- data/sample/demo_prosite.rb +120 -0
- data/sample/demo_psort.rb +138 -0
- data/sample/demo_psort_report.rb +70 -0
- data/sample/demo_pubmed.rb +118 -0
- data/sample/demo_sirna.rb +63 -0
- data/sample/demo_sosui_report.rb +89 -0
- data/sample/demo_targetp_report.rb +135 -0
- data/sample/demo_tmhmm_report.rb +68 -0
- data/sample/pmfetch.rb +13 -4
- data/sample/pmsearch.rb +15 -4
- data/sample/test_phyloxml_big.rb +205 -0
- data/test/bioruby_test_helper.rb +61 -0
- data/test/data/KEGG/1.1.1.1.enzyme +935 -0
- data/test/data/KEGG/C00025.compound +102 -0
- data/test/data/KEGG/D00063.drug +104 -0
- data/test/data/KEGG/G00024.glycan +47 -0
- data/test/data/KEGG/G01366.glycan +18 -0
- data/test/data/KEGG/K02338.orthology +902 -0
- data/test/data/KEGG/R00006.reaction +14 -0
- data/test/data/fastq/README.txt +109 -0
- data/test/data/fastq/error_diff_ids.fastq +20 -0
- data/test/data/fastq/error_double_qual.fastq +22 -0
- data/test/data/fastq/error_double_seq.fastq +22 -0
- data/test/data/fastq/error_long_qual.fastq +20 -0
- data/test/data/fastq/error_no_qual.fastq +20 -0
- data/test/data/fastq/error_qual_del.fastq +20 -0
- data/test/data/fastq/error_qual_escape.fastq +20 -0
- data/test/data/fastq/error_qual_null.fastq +0 -0
- data/test/data/fastq/error_qual_space.fastq +21 -0
- data/test/data/fastq/error_qual_tab.fastq +21 -0
- data/test/data/fastq/error_qual_unit_sep.fastq +20 -0
- data/test/data/fastq/error_qual_vtab.fastq +20 -0
- data/test/data/fastq/error_short_qual.fastq +20 -0
- data/test/data/fastq/error_spaces.fastq +20 -0
- data/test/data/fastq/error_tabs.fastq +21 -0
- data/test/data/fastq/error_trunc_at_plus.fastq +19 -0
- data/test/data/fastq/error_trunc_at_qual.fastq +19 -0
- data/test/data/fastq/error_trunc_at_seq.fastq +18 -0
- data/test/data/fastq/error_trunc_in_plus.fastq +19 -0
- data/test/data/fastq/error_trunc_in_qual.fastq +20 -0
- data/test/data/fastq/error_trunc_in_seq.fastq +18 -0
- data/test/data/fastq/error_trunc_in_title.fastq +17 -0
- data/test/data/fastq/illumina_full_range_as_illumina.fastq +8 -0
- data/test/data/fastq/illumina_full_range_as_sanger.fastq +8 -0
- data/test/data/fastq/illumina_full_range_as_solexa.fastq +8 -0
- data/test/data/fastq/illumina_full_range_original_illumina.fastq +8 -0
- data/test/data/fastq/longreads_as_illumina.fastq +40 -0
- data/test/data/fastq/longreads_as_sanger.fastq +40 -0
- data/test/data/fastq/longreads_as_solexa.fastq +40 -0
- data/test/data/fastq/longreads_original_sanger.fastq +120 -0
- data/test/data/fastq/misc_dna_as_illumina.fastq +16 -0
- data/test/data/fastq/misc_dna_as_sanger.fastq +16 -0
- data/test/data/fastq/misc_dna_as_solexa.fastq +16 -0
- data/test/data/fastq/misc_dna_original_sanger.fastq +16 -0
- data/test/data/fastq/misc_rna_as_illumina.fastq +16 -0
- data/test/data/fastq/misc_rna_as_sanger.fastq +16 -0
- data/test/data/fastq/misc_rna_as_solexa.fastq +16 -0
- data/test/data/fastq/misc_rna_original_sanger.fastq +16 -0
- data/test/data/fastq/sanger_full_range_as_illumina.fastq +8 -0
- data/test/data/fastq/sanger_full_range_as_sanger.fastq +8 -0
- data/test/data/fastq/sanger_full_range_as_solexa.fastq +8 -0
- data/test/data/fastq/sanger_full_range_original_sanger.fastq +8 -0
- data/test/data/fastq/solexa_full_range_as_illumina.fastq +8 -0
- data/test/data/fastq/solexa_full_range_as_sanger.fastq +8 -0
- data/test/data/fastq/solexa_full_range_as_solexa.fastq +8 -0
- data/test/data/fastq/solexa_full_range_original_solexa.fastq +8 -0
- data/test/data/fastq/wrapping_as_illumina.fastq +12 -0
- data/test/data/fastq/wrapping_as_sanger.fastq +12 -0
- data/test/data/fastq/wrapping_as_solexa.fastq +12 -0
- data/test/data/fastq/wrapping_original_sanger.fastq +24 -0
- data/test/data/meme/db +0 -0
- data/test/data/meme/mast +0 -0
- data/test/data/meme/mast.out +13 -0
- data/test/data/meme/meme.out +3 -0
- data/test/data/phyloxml/apaf.xml +666 -0
- data/test/data/phyloxml/bcl_2.xml +2097 -0
- data/test/data/phyloxml/made_up.xml +144 -0
- data/test/data/phyloxml/ncbi_taxonomy_mollusca_short.xml +65 -0
- data/test/data/phyloxml/phyloxml_examples.xml +415 -0
- data/test/data/sanger_chromatogram/test_chromatogram_abif.ab1 +0 -0
- data/test/data/sanger_chromatogram/test_chromatogram_scf_v2.scf +0 -0
- data/test/data/sanger_chromatogram/test_chromatogram_scf_v3.scf +0 -0
- data/test/functional/bio/appl/test_pts1.rb +7 -5
- data/test/functional/bio/io/test_ensembl.rb +4 -3
- data/test/functional/bio/io/test_pubmed.rb +9 -3
- data/test/functional/bio/io/test_soapwsdl.rb +5 -4
- data/test/functional/bio/io/test_togows.rb +5 -4
- data/test/functional/bio/sequence/test_output_embl.rb +6 -4
- data/test/functional/bio/test_command.rb +54 -5
- data/test/runner.rb +5 -3
- data/test/unit/bio/appl/bl2seq/test_report.rb +5 -4
- data/test/unit/bio/appl/blast/test_ncbioptions.rb +4 -2
- data/test/unit/bio/appl/blast/test_report.rb +5 -4
- data/test/unit/bio/appl/blast/test_rpsblast.rb +5 -4
- data/test/unit/bio/appl/gcg/test_msf.rb +5 -5
- data/test/unit/bio/appl/genscan/test_report.rb +8 -9
- data/test/unit/bio/appl/hmmer/test_report.rb +5 -4
- data/test/unit/bio/appl/iprscan/test_report.rb +6 -5
- data/test/unit/bio/appl/mafft/test_report.rb +6 -5
- data/test/unit/bio/appl/meme/mast/test_report.rb +46 -0
- data/test/unit/bio/appl/meme/test_mast.rb +103 -0
- data/test/unit/bio/appl/meme/test_motif.rb +38 -0
- data/test/unit/bio/appl/paml/codeml/test_rates.rb +5 -4
- data/test/unit/bio/appl/paml/codeml/test_report.rb +5 -4
- data/test/unit/bio/appl/paml/test_codeml.rb +5 -4
- data/test/unit/bio/appl/sim4/test_report.rb +5 -4
- data/test/unit/bio/appl/sosui/test_report.rb +6 -5
- data/test/unit/bio/appl/targetp/test_report.rb +5 -3
- data/test/unit/bio/appl/test_blast.rb +5 -4
- data/test/unit/bio/appl/test_fasta.rb +4 -2
- data/test/unit/bio/appl/test_pts1.rb +4 -2
- data/test/unit/bio/appl/tmhmm/test_report.rb +6 -5
- data/test/unit/bio/data/test_aa.rb +5 -3
- data/test/unit/bio/data/test_codontable.rb +5 -4
- data/test/unit/bio/data/test_na.rb +5 -3
- data/test/unit/bio/db/biosql/tc_biosql.rb +5 -1
- data/test/unit/bio/db/embl/test_common.rb +4 -2
- data/test/unit/bio/db/embl/test_embl.rb +6 -6
- data/test/unit/bio/db/embl/test_embl_rel89.rb +6 -6
- data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +7 -8
- data/test/unit/bio/db/embl/test_sptr.rb +6 -8
- data/test/unit/bio/db/embl/test_uniprot.rb +6 -5
- data/test/unit/bio/db/fasta/test_format_qual.rb +346 -0
- data/test/unit/bio/db/kegg/test_compound.rb +146 -0
- data/test/unit/bio/db/kegg/test_drug.rb +194 -0
- data/test/unit/bio/db/kegg/test_enzyme.rb +241 -0
- data/test/unit/bio/db/kegg/test_genes.rb +32 -4
- data/test/unit/bio/db/kegg/test_glycan.rb +260 -0
- data/test/unit/bio/db/kegg/test_orthology.rb +50 -0
- data/test/unit/bio/db/kegg/test_reaction.rb +96 -0
- data/test/unit/bio/db/pdb/test_pdb.rb +4 -2
- data/test/unit/bio/db/sanger_chromatogram/test_abif.rb +76 -0
- data/test/unit/bio/db/sanger_chromatogram/test_scf.rb +98 -0
- data/test/unit/bio/db/test_aaindex.rb +6 -6
- data/test/unit/bio/db/test_fasta.rb +5 -46
- data/test/unit/bio/db/test_fastq.rb +829 -0
- data/test/unit/bio/db/test_gff.rb +4 -2
- data/test/unit/bio/db/test_lasergene.rb +7 -5
- data/test/unit/bio/db/test_medline.rb +4 -2
- data/test/unit/bio/db/test_newick.rb +6 -6
- data/test/unit/bio/db/test_nexus.rb +4 -2
- data/test/unit/bio/db/test_phyloxml.rb +769 -0
- data/test/unit/bio/db/test_phyloxml_writer.rb +328 -0
- data/test/unit/bio/db/test_prosite.rb +6 -5
- data/test/unit/bio/db/test_qual.rb +63 -0
- data/test/unit/bio/db/test_rebase.rb +5 -3
- data/test/unit/bio/db/test_soft.rb +7 -6
- data/test/unit/bio/io/flatfile/test_autodetection.rb +6 -7
- data/test/unit/bio/io/flatfile/test_buffer.rb +6 -5
- data/test/unit/bio/io/flatfile/test_splitter.rb +4 -4
- data/test/unit/bio/io/test_ddbjxml.rb +4 -3
- data/test/unit/bio/io/test_ensembl.rb +5 -3
- data/test/unit/bio/io/test_fastacmd.rb +4 -3
- data/test/unit/bio/io/test_flatfile.rb +6 -5
- data/test/unit/bio/io/test_soapwsdl.rb +4 -3
- data/test/unit/bio/io/test_togows.rb +4 -2
- data/test/unit/bio/sequence/test_aa.rb +5 -3
- data/test/unit/bio/sequence/test_common.rb +4 -2
- data/test/unit/bio/sequence/test_compat.rb +4 -2
- data/test/unit/bio/sequence/test_dblink.rb +5 -3
- data/test/unit/bio/sequence/test_na.rb +4 -2
- data/test/unit/bio/sequence/test_quality_score.rb +330 -0
- data/test/unit/bio/shell/plugin/test_seq.rb +5 -3
- data/test/unit/bio/test_alignment.rb +5 -3
- data/test/unit/bio/test_command.rb +4 -3
- data/test/unit/bio/test_db.rb +5 -3
- data/test/unit/bio/test_feature.rb +4 -2
- data/test/unit/bio/test_location.rb +4 -2
- data/test/unit/bio/test_map.rb +5 -3
- data/test/unit/bio/test_pathway.rb +4 -2
- data/test/unit/bio/test_reference.rb +4 -2
- data/test/unit/bio/test_sequence.rb +5 -3
- data/test/unit/bio/test_shell.rb +5 -3
- data/test/unit/bio/test_tree.rb +6 -6
- data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +17 -13
- data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +17 -13
- data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +4 -2
- data/test/unit/bio/util/test_color_scheme.rb +5 -3
- data/test/unit/bio/util/test_contingency_table.rb +5 -3
- data/test/unit/bio/util/test_restriction_enzyme.rb +4 -2
- data/test/unit/bio/util/test_sirna.rb +6 -4
- metadata +147 -2
@@ -9,10 +9,12 @@
|
|
9
9
|
# $Id:$
|
10
10
|
#
|
11
11
|
|
12
|
+
# loading helper routine for testing bioruby
|
12
13
|
require 'pathname'
|
13
|
-
|
14
|
-
|
14
|
+
load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4,
|
15
|
+
'bioruby_test_helper.rb')).cleanpath.to_s
|
15
16
|
|
17
|
+
# libraries needed for the tests
|
16
18
|
require 'test/unit'
|
17
19
|
require 'bio'
|
18
20
|
|
@@ -0,0 +1,76 @@
|
|
1
|
+
#
|
2
|
+
# test/unit/bio/db/sanger_chromatogram/test_abif.rb - Unit test for Bio::Abif
|
3
|
+
#
|
4
|
+
# Copyright:: Copyright (C) 2009 Anthony Underwood <anthony.underwood@hpa.org.uk>, <email2ants@gmail.com>
|
5
|
+
# License:: The Ruby License
|
6
|
+
#
|
7
|
+
|
8
|
+
# loading helper routine for testing bioruby
|
9
|
+
require 'pathname'
|
10
|
+
load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4,
|
11
|
+
'bioruby_test_helper.rb')).cleanpath.to_s
|
12
|
+
|
13
|
+
# libraries needed for the tests
|
14
|
+
require 'test/unit'
|
15
|
+
require 'bio/db/sanger_chromatogram/chromatogram'
|
16
|
+
require 'bio/db/sanger_chromatogram/abif'
|
17
|
+
|
18
|
+
module Bio
|
19
|
+
|
20
|
+
module TestAbifData
|
21
|
+
DataPath = Pathname.new(File.join(BioRubyTestDataPath,
|
22
|
+
'sanger_chromatogram')).cleanpath.to_s
|
23
|
+
def self.abif
|
24
|
+
File.read(File.join(DataPath, 'test_chromatogram_abif.ab1'))
|
25
|
+
end
|
26
|
+
end #module TestAbifData
|
27
|
+
|
28
|
+
class TestAbif < Test::Unit::TestCase
|
29
|
+
|
30
|
+
Abif_sequence = "nnnnnnnnnnnttggttggttcgctataaaaactcttattttggataatttgtttagctgttgcaatataaattgacccatttaatttataaattggattctcgttgcaataaatttccagatcctgaaaaagctctggcttaaccaaattgccttggctatcaatgcttctacaccaagaaggctttaaagagataggactaactgaaacgacactttttcccgttgcttgatgtatttcaacagcatgtcttatggtttctggcttcctgaatggagaagttggttgtaaaagcaatacactgtcaaaaaaaacctccatttgctgaaacttaaacaggaggtcaataacagtatgaatcacatccgaagtatccgtggctaaatcttccgatcttagccaaggtactgaagccccatattgaacn".freeze
|
31
|
+
Abif_RC_sequence = "ngttcaatatggggcttcagtaccttggctaagatcggaagatttagccacggatacttcggatgtgattcatactgttattgacctcctgtttaagtttcagcaaatggaggttttttttgacagtgtattgcttttacaaccaacttctccattcaggaagccagaaaccataagacatgctgttgaaatacatcaagcaacgggaaaaagtgtcgtttcagttagtcctatctctttaaagccttcttggtgtagaagcattgatagccaaggcaatttggttaagccagagctttttcaggatctggaaatttattgcaacgagaatccaatttataaattaaatgggtcaatttatattgcaacagctaaacaaattatccaaaataagagtttttatagcgaaccaaccaannnnnnnnnnn".freeze
|
32
|
+
|
33
|
+
Abif_first_10_peak_indices = [3, 16,38,61,66,91,105,115,138,151].freeze
|
34
|
+
Abif_last_10_peak_indices = [5070,5081,5094,5107,5120,5133,5145,5157,5169,5182].freeze
|
35
|
+
|
36
|
+
Abif_atrace_size = 5236
|
37
|
+
|
38
|
+
Abif_RC_first_10_peak_indices = Abif_last_10_peak_indices.collect{|index| Abif_atrace_size - index}.reverse.freeze
|
39
|
+
Abif_RC_last_10_peak_indices = Abif_first_10_peak_indices.collect{|index| Abif_atrace_size - index}.reverse.freeze
|
40
|
+
|
41
|
+
def setup
|
42
|
+
@abi = Abif.new(TestAbifData.abif)
|
43
|
+
end
|
44
|
+
|
45
|
+
def test_seq
|
46
|
+
assert_equal(Abif_sequence, @abi.seq.to_s)
|
47
|
+
end
|
48
|
+
|
49
|
+
def test_to_biosequence
|
50
|
+
assert_equal(Abif_sequence, @abi.to_biosequence.to_s)
|
51
|
+
end
|
52
|
+
|
53
|
+
def test_complement
|
54
|
+
@RC_chromatogram = @abi.complement
|
55
|
+
# check reverse complemented sequence
|
56
|
+
assert_equal(Abif_RC_sequence, @RC_chromatogram.sequence)
|
57
|
+
# check reverse complemented peak indices
|
58
|
+
assert_equal(Abif_RC_first_10_peak_indices,
|
59
|
+
@RC_chromatogram.peak_indices.slice(0,10))
|
60
|
+
assert_equal(Abif_RC_last_10_peak_indices,
|
61
|
+
@RC_chromatogram.peak_indices.slice(-10..-1))
|
62
|
+
# check reverse complemented traces
|
63
|
+
assert_equal(@abi.atrace.slice(0,10),
|
64
|
+
@RC_chromatogram.ttrace.slice(-10..-1).reverse)
|
65
|
+
assert_equal(@abi.ctrace.slice(0,10),
|
66
|
+
@RC_chromatogram.gtrace.slice(-10..-1).reverse)
|
67
|
+
assert_equal(@abi.gtrace.slice(0,10),
|
68
|
+
@RC_chromatogram.ctrace.slice(-10..-1).reverse)
|
69
|
+
assert_equal(@abi.ttrace.slice(0,10),
|
70
|
+
@RC_chromatogram.atrace.slice(-10..-1).reverse)
|
71
|
+
|
72
|
+
assert_equal(@abi.qualities.slice(0,10),
|
73
|
+
@RC_chromatogram.qualities.slice(-10..-1).reverse)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
@@ -0,0 +1,98 @@
|
|
1
|
+
#
|
2
|
+
# test/unit/bio/db/sanger_chromatogram/test_scf.rb - Unit test for Bio::Scf
|
3
|
+
#
|
4
|
+
# Copyright:: Copyright (C) 2009 Anthony Underwood <anthony.underwood@hpa.org.uk>, <email2ants@gmail.com>
|
5
|
+
# License:: The Ruby License
|
6
|
+
#
|
7
|
+
|
8
|
+
# loading helper routine for testing bioruby
|
9
|
+
require 'pathname'
|
10
|
+
load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4,
|
11
|
+
'bioruby_test_helper.rb')).cleanpath.to_s
|
12
|
+
|
13
|
+
# libraries needed for the tests
|
14
|
+
require 'test/unit'
|
15
|
+
require 'bio/db/sanger_chromatogram/scf'
|
16
|
+
|
17
|
+
module Bio
|
18
|
+
|
19
|
+
module TestScfData
|
20
|
+
DataPath = Pathname.new(File.join(BioRubyTestDataPath,
|
21
|
+
'sanger_chromatogram')).cleanpath.to_s
|
22
|
+
def self.scf_version_2
|
23
|
+
File.read(File.join(DataPath, 'test_chromatogram_scf_v2.scf'))
|
24
|
+
end
|
25
|
+
def self.scf_version_3
|
26
|
+
File.read(File.join(DataPath, 'test_chromatogram_scf_v3.scf'))
|
27
|
+
end
|
28
|
+
end #module TestScfData
|
29
|
+
|
30
|
+
module TestScf_common
|
31
|
+
Scf_sequence = "attaacgtaaaaggtttggttggttcgctataaaaactcttattttggataatttgtttagctgttgcaatataaattgacccatttaatttataaattggattctcgttgcaataaatttccagatcctgaaaaagctctggcttaaccaaattgccttggctatcaatgcttctacaccaagaaggctttaaagagataggactaactgaaacgacactttttcccgttgcttgatgtatttcaacagcatgtcttatggtttctggcttcctgaatggagaagttggttgtaaaagcaatacactgtcaaaaaaaacctccatttgctgaaacttaaacaggaggtcaataacagtatgaatcacatccgaagtatccgtggctaaatcttccgatcttagccaaggtactgaagccccatattgaacggann".freeze
|
32
|
+
Scf_RC_sequence = "nntccgttcaatatggggcttcagtaccttggctaagatcggaagatttagccacggatacttcggatgtgattcatactgttattgacctcctgtttaagtttcagcaaatggaggttttttttgacagtgtattgcttttacaaccaacttctccattcaggaagccagaaaccataagacatgctgttgaaatacatcaagcaacgggaaaaagtgtcgtttcagttagtcctatctctttaaagccttcttggtgtagaagcattgatagccaaggcaatttggttaagccagagctttttcaggatctggaaatttattgcaacgagaatccaatttataaattaaatgggtcaatttatattgcaacagctaaacaaattatccaaaataagagtttttatagcgaaccaaccaaaccttttacgttaat".freeze
|
33
|
+
|
34
|
+
Scf_first_10_peak_indices = [16,24,37,49,64,64,80,92,103,113].freeze
|
35
|
+
Scf_last_10_peak_indices = [5120,5132,5145,5157,5169,5182,5195,5207,5219,5231].freeze
|
36
|
+
|
37
|
+
Scf_atrace_size = 5236
|
38
|
+
|
39
|
+
Scf_RC_first_10_peak_indices = Scf_last_10_peak_indices.collect{|index| Scf_atrace_size - index}.reverse.freeze
|
40
|
+
Scf_RC_last_10_peak_indices = Scf_first_10_peak_indices.collect{|index| Scf_atrace_size - index}.reverse.freeze
|
41
|
+
|
42
|
+
def test_seq
|
43
|
+
assert_equal(Scf_sequence, @scf.seq.to_s)
|
44
|
+
end
|
45
|
+
|
46
|
+
def test_to_biosequence
|
47
|
+
assert_equal(Scf_sequence, @scf.to_biosequence.to_s)
|
48
|
+
end
|
49
|
+
|
50
|
+
def test_complement
|
51
|
+
@RC_chromatogram = @scf.complement
|
52
|
+
# check reverse complemented sequence
|
53
|
+
assert_equal(Scf_RC_sequence, @RC_chromatogram.sequence)
|
54
|
+
# check reverse complemented peak indices
|
55
|
+
assert_equal(Scf_RC_first_10_peak_indices,
|
56
|
+
@RC_chromatogram.peak_indices.slice(0,10))
|
57
|
+
assert_equal(Scf_RC_last_10_peak_indices,
|
58
|
+
@RC_chromatogram.peak_indices.slice(-10..-1))
|
59
|
+
# check reverse complemented traces
|
60
|
+
assert_equal(@scf.atrace.slice(0,10),
|
61
|
+
@RC_chromatogram.ttrace.slice(-10..-1).reverse)
|
62
|
+
assert_equal(@scf.ctrace.slice(0,10),
|
63
|
+
@RC_chromatogram.gtrace.slice(-10..-1).reverse)
|
64
|
+
assert_equal(@scf.gtrace.slice(0,10),
|
65
|
+
@RC_chromatogram.ctrace.slice(-10..-1).reverse)
|
66
|
+
assert_equal(@scf.ttrace.slice(0,10),
|
67
|
+
@RC_chromatogram.atrace.slice(-10..-1).reverse)
|
68
|
+
# check reverse complemented individual and combined qualities
|
69
|
+
#if @RC_chromatogram.chromatogram_type == ".scf"
|
70
|
+
assert_equal(@scf.aqual.slice(0,10),
|
71
|
+
@RC_chromatogram.tqual.slice(-10..-1).reverse)
|
72
|
+
assert_equal(@scf.cqual.slice(0,10),
|
73
|
+
@RC_chromatogram.gqual.slice(-10..-1).reverse)
|
74
|
+
assert_equal(@scf.gqual.slice(0,10),
|
75
|
+
@RC_chromatogram.cqual.slice(-10..-1).reverse)
|
76
|
+
assert_equal(@scf.tqual.slice(0,10),
|
77
|
+
@RC_chromatogram.aqual.slice(-10..-1).reverse)
|
78
|
+
#end
|
79
|
+
assert_equal(@scf.qualities.slice(0,10),
|
80
|
+
@RC_chromatogram.qualities.slice(-10..-1).reverse)
|
81
|
+
end
|
82
|
+
end #module TestScf_common
|
83
|
+
|
84
|
+
class TestScf_version_2 < Test::Unit::TestCase
|
85
|
+
include TestScf_common
|
86
|
+
def setup
|
87
|
+
@scf = Scf.new(TestScfData.scf_version_2)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
class TestScf_version_3 < Test::Unit::TestCase
|
92
|
+
include TestScf_common
|
93
|
+
def setup
|
94
|
+
@scf = Scf.new(TestScfData.scf_version_3)
|
95
|
+
end
|
96
|
+
end #class TestScf_version_3
|
97
|
+
|
98
|
+
end #module Bio
|
@@ -5,21 +5,21 @@
|
|
5
5
|
# Mitsuteru C. Nakao <n@bioruby.org>
|
6
6
|
# License:: The Ruby License
|
7
7
|
#
|
8
|
-
# $Id
|
8
|
+
# $Id:$
|
9
9
|
#
|
10
10
|
|
11
|
+
# loading helper routine for testing bioruby
|
11
12
|
require 'pathname'
|
12
|
-
|
13
|
-
|
13
|
+
load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3,
|
14
|
+
'bioruby_test_helper.rb')).cleanpath.to_s
|
14
15
|
|
16
|
+
# libraries needed for the tests
|
15
17
|
require 'test/unit'
|
16
|
-
require 'bio/io/fetch'
|
17
18
|
require 'bio/db/aaindex'
|
18
19
|
|
19
20
|
module Bio
|
20
21
|
class DataAAindex
|
21
|
-
|
22
|
-
TestDataAAindex = Pathname.new(File.join(bioruby_root, 'test', 'data', 'aaindex')).cleanpath.to_s
|
22
|
+
TestDataAAindex = Pathname.new(File.join(BioRubyTestDataPath, 'aaindex')).cleanpath.to_s
|
23
23
|
|
24
24
|
def self.aax1
|
25
25
|
File.read(File.join(TestDataAAindex, "PRAM900102"))
|
@@ -4,13 +4,15 @@
|
|
4
4
|
# Copyright:: Copyright (C) 2005 Mitsuteru Nakao <n@bioruby.org>
|
5
5
|
# License:: The Ruby License
|
6
6
|
#
|
7
|
-
# $Id
|
7
|
+
# $Id:$
|
8
8
|
#
|
9
9
|
|
10
|
+
# loading helper routine for testing bioruby
|
10
11
|
require 'pathname'
|
11
|
-
|
12
|
-
|
12
|
+
load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3,
|
13
|
+
'bioruby_test_helper.rb')).cleanpath.to_s
|
13
14
|
|
15
|
+
# libraries needed for the tests
|
14
16
|
require 'test/unit'
|
15
17
|
require 'bio/db/fasta'
|
16
18
|
|
@@ -193,49 +195,6 @@ END
|
|
193
195
|
end # class TestFastaFormat
|
194
196
|
|
195
197
|
|
196
|
-
class TestFastaNumericFormat < Test::Unit::TestCase
|
197
|
-
|
198
|
-
def setup
|
199
|
-
text =<<END
|
200
|
-
>CRA3575282.F
|
201
|
-
24 15 23 29 20 13 20 21 21 23 22 25 13 22 17 15 25 27 32 26
|
202
|
-
32 29 29 25
|
203
|
-
END
|
204
|
-
@obj = Bio::FastaNumericFormat.new(text)
|
205
|
-
end
|
206
|
-
|
207
|
-
def test_entry
|
208
|
-
assert_equal(">CRA3575282.F\n24 15 23 29 20 13 20 21 21 23 22 25 13 22 17 15 25 27 32 26 \n32 29 29 25\n", @obj.entry)
|
209
|
-
end
|
210
|
-
|
211
|
-
def test_entry_id
|
212
|
-
assert_equal('CRA3575282.F', @obj.entry_id)
|
213
|
-
end
|
214
|
-
|
215
|
-
def test_definition
|
216
|
-
assert_equal('CRA3575282.F', @obj.definition)
|
217
|
-
end
|
218
|
-
|
219
|
-
def test_data
|
220
|
-
data = [24, 15, 23, 29, 20, 13, 20, 21, 21, 23, 22, 25, 13, 22, 17, 15, 25, 27, 32, 26, 32, 29, 29, 25]
|
221
|
-
assert_equal(data, @obj.data)
|
222
|
-
end
|
223
|
-
|
224
|
-
def test_length
|
225
|
-
assert_equal(24, @obj.length)
|
226
|
-
end
|
227
|
-
|
228
|
-
def test_each
|
229
|
-
assert(@obj.each {|x| })
|
230
|
-
end
|
231
|
-
|
232
|
-
def test_arg
|
233
|
-
assert(@obj[0], '')
|
234
|
-
assert(@obj[-1], '')
|
235
|
-
end
|
236
|
-
|
237
|
-
|
238
|
-
end # class TestFastaFormatNumeric
|
239
198
|
|
240
199
|
|
241
200
|
class TestFastaDefinition < Test::Unit::TestCase
|
@@ -0,0 +1,829 @@
|
|
1
|
+
#
|
2
|
+
# test/unit/bio/db/test_fastq.rb - Unit test for Bio::Fastq
|
3
|
+
#
|
4
|
+
# Copyright:: Copyright (C) 2009
|
5
|
+
# Naohisa Goto <ng@bioruby.org>
|
6
|
+
# License:: The Ruby License
|
7
|
+
#
|
8
|
+
# $Id:$
|
9
|
+
#
|
10
|
+
|
11
|
+
# loading helper routine for testing bioruby
|
12
|
+
require 'pathname'
|
13
|
+
load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3,
|
14
|
+
'bioruby_test_helper.rb')).cleanpath.to_s
|
15
|
+
|
16
|
+
# libraries needed for the tests
|
17
|
+
require 'test/unit'
|
18
|
+
require 'bio/io/flatfile'
|
19
|
+
require 'bio/db/fastq'
|
20
|
+
|
21
|
+
module Bio
|
22
|
+
module TestFastq
|
23
|
+
|
24
|
+
TestFastqDataDir = Pathname.new(File.join(BioRubyTestDataPath,
|
25
|
+
'fastq')).cleanpath.to_s
|
26
|
+
|
27
|
+
# A module providing methods to compare float arrays
|
28
|
+
module FloatArrayComparison
|
29
|
+
private
|
30
|
+
def float_array_equivalent?(expected, actual, *arg)
|
31
|
+
assert_equal(expected.size, actual.size, *arg)
|
32
|
+
dt = Float::EPSILON * 1024
|
33
|
+
(0...(expected.size)).each do |i|
|
34
|
+
e = expected[i]
|
35
|
+
a = actual[i]
|
36
|
+
#assert_equal(e, a)
|
37
|
+
assert_in_delta(e, a, e.abs * dt)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end #module FloatArrayComparison
|
41
|
+
|
42
|
+
# Tests using 'longreads_original_sanger.fastq'
|
43
|
+
class TestFastq_longreads_original_sanger < Test::Unit::TestCase
|
44
|
+
include FloatArrayComparison
|
45
|
+
|
46
|
+
SEQS =
|
47
|
+
[
|
48
|
+
'tcagTTAAGATGGGATAATATCCTCAGATTGCGTGATGAACTTTGTTCTGGTGGAGGAGA
|
49
|
+
AGGAAGTGCATTCGACGTATGCCCGTTTGTCGATATTTGtatttaaagtaatccgtcaca
|
50
|
+
aatcagtgacataaatattatttagatttcgggagcaactttatttattccacaagcagg
|
51
|
+
tttaaattttaaatttaaattattgcagaagactttaaattaacctcgttgtcggagtca
|
52
|
+
tttgttcggttattggtcgaaagtaaccncgggaagtgccgaaaactaacaaacaaaaga
|
53
|
+
agatagtgaaattttaattaaaanaaatagccaaacgtaactaactaaaacggacccgtc
|
54
|
+
gaggaactgccaacggacgacacagggagtagnnn',
|
55
|
+
'tcagCCAGCAATTCCGACTTAATTGTTCTTCTTCCATCATTCATCTCGACTAACAGTTCT
|
56
|
+
ACGATTAATGAGTTTGGCtttaatttgttgttcattattgtcacaattacactactgaga
|
57
|
+
ctgccaaggcacncagggataggnn',
|
58
|
+
'tcagTTTTCTTAAATTACTTGAATCTGTTGAAGTGGATGTCCACTTTTGTATGCCAAATA
|
59
|
+
TGCCCAGCGTATACGATCTTGGCCACATCTCCACATAATCATCAGTCGGATGCAAAAAGC
|
60
|
+
GATTAAACTAAAAATGAATGCGTTTTTAGATGAGTAAATAGGTAATACTTTGTTTAAATA
|
61
|
+
ATAAATGTCACAAACAGAACGCGGATTACAGTACCTGAAAATAGTTGTACTGTATCTGTG
|
62
|
+
CCGGCACTTCCTCGGCCCTGAGAAGTTGTCCCGTTGTTTCCATTCGCACCATCCAATGGC
|
63
|
+
CAAAGTTTGCGAAGAATCTGTTCCGTTCCATTACCAATTGTTTTTCCATGctgagactgc
|
64
|
+
caaggcacacaggggataggnn',
|
65
|
+
'tcagTTTTTGGAGAATTCCGTCAGGGACGGCATGGCATATTTGTGGGTTCGGCACGGCGT
|
66
|
+
CCTGGCCAAGAAGAAGAAGACGAATTAGCCCGTTAATTTAATGACACCTTCCCCAATTTT
|
67
|
+
GCAGCAATGATTGGTTCATTCTTGGCGGTGCGTTTTTGTGCTTCGTCGAATTGTTGGCCA
|
68
|
+
TTTTGGTCCACCGGCCATCATCTTTACGCTATCCGACTGATTGGAAATCACCGCCTAGCA
|
69
|
+
TTTTGCCGAAGATTGTTGCGTTGTACGGCCATGTGCTGATTGTTTACATTGGCATTCTTG
|
70
|
+
GCAATTTGTCCTTGGTCGGCTTTGACGGCAAATTTGCGGTGTTAAGTctgagactgccaa
|
71
|
+
ggcacacagggggatagggnn',
|
72
|
+
'tcagTTGACCGGCGTTGTGTAACAATAATTCATTATTCTGAGACGATGCCAATGTAATCG
|
73
|
+
ACGGTTTATGCCCAATTATTCCCATCTATGCTTAACTGATCAAATACTATTTGCATTACG
|
74
|
+
TCACGAAATTGCGCGAACACCGCCGGCCGACAATAATTTATACCGGACATACCGGAGTTG
|
75
|
+
ATGGTAATCGGTAAAGAGTTTTATTTAATTATntattatcnctattaattattgttanca
|
76
|
+
acaatgtgcacgctntgccgcccgccgccgccgtgtcggtaggaccccggacggacccgg
|
77
|
+
acccggttcgggtacccgttttcgggttcccggaaccgtttttcgggtacccggtttttt
|
78
|
+
cggggggccccccggtaaaaaaccggggaaccccctaaaacgggtaaacgtaccgtaagg
|
79
|
+
gaccccctaaacgggggccccgaaaaaccgggacccaaaccggggggaaacggttaaagg
|
80
|
+
ggggggaagtaggngnnnnnnnnnnnn',
|
81
|
+
'tcagTTATTGCAGTCGTTCCGCGCCATCGCCGGTAACCGTCCGCGTGTTATTCTGTGTAT
|
82
|
+
CGGCCAACCTTCGTATAACTTCGTATAATGTATGCTATACGAAGTTATTACGATCTATAC
|
83
|
+
CGGCGAAACTCAGCCGAAAGGTCTCGCGGTAGAGCCTATGAGCTGCCCGACCGATGCATT
|
84
|
+
TAAATTTCCGGGGATCGtcgctgatctgagactgccaaaggcacactagggggataggnn
|
85
|
+
nnnnnnnnnnnnnnnnnn',
|
86
|
+
'tcagGTTTTAAATCGCTTTCCAAGGAATTTGAGTCTAAATCCGGTGGATCCCATCAGTAC
|
87
|
+
AAATGCGGCGACAAGGCCGTGAAAACACTGCTTAATTCTTTGCACTTTTTGGCCACCTTT
|
88
|
+
TTGGAAATGTTGTTTTGTGTTCTCAAAATTTTCCATCTCAGAACAAACATTCCATCGGGC
|
89
|
+
TGATGTTGTGGCTTTTGGCGCGCGAAGTGCTGCTACTGCGCGGCAAAATCAGTCGCCAGA
|
90
|
+
CCGGTTTTGTTGTGGACGACAAAGTGATCATGCCTGACTTGTACTTCTACCGCGATCCGC
|
91
|
+
AAGCGCGAATTGGTCACATAGTTATAGAATTTTTGAGCCTTTTTCTTGACATAAAAAGTG
|
92
|
+
TGGTTTTAAAAATTTCCTGGCAGGACCCACGCCAACGTTCAGGAATAATATCTTTTAAAA
|
93
|
+
AGctgagactgccaaggcacacaggggataggn',
|
94
|
+
'tcagTTTAATTTGGTGCTTCCTTTCAATTCCTTAGTTTAAACTTGGCACTGAAGTCTCGC
|
95
|
+
ATTTATAACTAGAGCCCGGATTTTAGAGGCTAAAAAGTTTTCCAGATTTCAAAATTTATT
|
96
|
+
TCGAAACTATTTTTCTGATTGTGATGTGACGGATTTCTAAATTAAATCGAAATGATGTGT
|
97
|
+
ATTGAACTTAACAAGTGATTTTTATCAGATTTTGTCAATGAATAAATTTTAATTTAAATC
|
98
|
+
TCTTTCTAACACTTTCATGATTAAAATCTAACAAAGCGCGACCAGTATGTGAGAAGAGCA
|
99
|
+
AAAACAACAAAAAGTGCTAGCACTAAAGAAGGTTCGAACCCAACACATAACGTAAGAGTT
|
100
|
+
ACCGGGAAGAAAACCACTctgagactgccaaggcacacagggggataggnn',
|
101
|
+
'tcagTTTTCAAATTTTCCGAAATTTGCTGTTTGGTAGAAGGCAAATTATTTGATTGAATT
|
102
|
+
TTGTATTTATTTAAAACAATTTATTTTAAAATAATAATTTTCCATTGACTTTTTACATTT
|
103
|
+
AATTGATTTTATTATGCATTTTATATTTGTTTTCTAAATATTCGTTTGCAAACTCACGTT
|
104
|
+
GAAATTGTATTAAACTCGAAATTAGAGTTTTTGAAATTAATTTTTATGTAGCATAATATT
|
105
|
+
TTAAACATATTGGAATTTTATAAAACATTATATTTTTctgagactgccaaggcacacagg
|
106
|
+
gggataggn',
|
107
|
+
'tcagTTTTGATCTTTTAATAATGAATTTTAATGTGTTAAAATGATTGCATTGATGGCATA
|
108
|
+
ACCGCATTTAAATTAATTACATGAAGTGTAAGTATGAAATTTTCCTTTCCAAATTGCAAA
|
109
|
+
AACTAAAATTTAAAATTTATCGTAAAAATTAACATATATTTTAAACGATTTTAAGAAACA
|
110
|
+
TTTGTAAATTATATTTTTGTGAAGCGTTCAAACAAAAATAAACAATAAAATATTTTTCTA
|
111
|
+
TTTAATAGCAAAACATTTGACGATGAAAAGGAAAATGCGGGTTTGAAAATGGGCTTTGCC
|
112
|
+
ATGCTATTTTCATAATAACATATTTTTATTATGAATAATAAATTTACATACAATATATAC
|
113
|
+
AGTCTTAAATTTATTCATAATATTTTTGAGAATctgagactgccaaggcacacaggggat
|
114
|
+
aggn'
|
115
|
+
].collect { |x| x.gsub(/\s/, '').freeze }.freeze
|
116
|
+
|
117
|
+
IDLINES =
|
118
|
+
[
|
119
|
+
'FSRRS4401BE7HA [length=395] [gc=36.46] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=95]',
|
120
|
+
'FSRRS4401BRRTC [length=145] [gc=38.62] [flows=800] [phred_min=0] [phred_max=38] [trimmed_length=74]',
|
121
|
+
'FSRRS4401B64ST [length=382] [gc=40.58] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=346]',
|
122
|
+
'FSRRS4401EJ0YH [length=381] [gc=48.29] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=343]',
|
123
|
+
'FSRRS4401BK0IB [length=507] [gc=49.31] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=208]',
|
124
|
+
'FSRRS4401ARCCB [length=258] [gc=46.90] [flows=800] [phred_min=0] [phred_max=38] [trimmed_length=193]',
|
125
|
+
'FSRRS4401CM938 [length=453] [gc=44.15] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=418]',
|
126
|
+
'FSRRS4401EQLIK [length=411] [gc=34.31] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=374]',
|
127
|
+
'FSRRS4401AOV6A [length=309] [gc=22.98] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=273]',
|
128
|
+
'FSRRS4401EG0ZW [length=424] [gc=23.82] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=389]',
|
129
|
+
].collect { |x| x.freeze }.freeze
|
130
|
+
|
131
|
+
|
132
|
+
ENTRY_IDS = [ 'FSRRS4401BE7HA',
|
133
|
+
'FSRRS4401BRRTC',
|
134
|
+
'FSRRS4401B64ST',
|
135
|
+
'FSRRS4401EJ0YH',
|
136
|
+
'FSRRS4401BK0IB',
|
137
|
+
'FSRRS4401ARCCB',
|
138
|
+
'FSRRS4401CM938',
|
139
|
+
'FSRRS4401EQLIK',
|
140
|
+
'FSRRS4401AOV6A',
|
141
|
+
'FSRRS4401EG0ZW'
|
142
|
+
].collect { |x| x.freeze }.freeze
|
143
|
+
|
144
|
+
QUALITY_STRINGS =
|
145
|
+
[ <<'_0_', <<'_1_', <<'_2_', <<'_3_', <<'_4_', <<'_5_', <<'_6_', <<'_7_', <<'_8_', <<'_9_' ].collect { |x| x.delete("\r\n").freeze }.freeze
|
146
|
+
FFFDDDDDDDA666?688FFHGGIIIIIIIIIIIIIIIII
|
147
|
+
IHHHIIIIIIIIIGHGFFFFF====DFFFFFFFFFFFFFF
|
148
|
+
D???:3104/76=:5...4.3,,,366////4<ABBAAA=
|
149
|
+
CCFDDDDDDDD:666CDFFFF=<ABA=;:333111<===9
|
150
|
+
9;B889FFFFFFDDBDBDDD=8844231..,,,-,,,,,,
|
151
|
+
,,1133..---17111,,,,,22555131121.--.,333
|
152
|
+
11,.,,3--,,.,,--,3511123..--!,,,,--,----
|
153
|
+
9,,,,8=,,-,,,-,,,,---26:9:5-..1,,,,11//,
|
154
|
+
,,,!,,1917--,,,,-3.,--,,17,,,,---+11113.
|
155
|
+
030000,,,044400036;96662.//;7><;!!!
|
156
|
+
_0_
|
157
|
+
FFFFFFFFFDDDDFFFFGFDDDDBAAAAA=<4444@@B=5
|
158
|
+
55:BBBBB@@?8:8<?<89898<84442;==3,,,514,,
|
159
|
+
,11,,,.,,21777555513,..--1115758.//34488
|
160
|
+
><<;;;;9944/!/4,,,57855!!
|
161
|
+
_1_
|
162
|
+
IIIICCCCI??666IIIIIIIIIIIIIIIIIIIIIIIIII
|
163
|
+
IIII6666IAIIIII???IIIICCCIIIIIIIIIIIIIII
|
164
|
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII66333EI
|
165
|
+
CE::338=/----,8=>>??:2-////7>CEEIEIHHHII
|
166
|
+
IIIIIIIE;;9911199B???IBCHIIIIIIHHHIIHHHI
|
167
|
+
IIIIIIIIIIIIIIIIIBBCCIIIIIIIIIIIIIIIIIII
|
168
|
+
IIIIIIIIIIIIIIIGGGIIIIIIIIID?===DIIIHHHI
|
169
|
+
IIIIIIIIHHHIIIIIIIIIIHHHIHHHIIIIIIIIIIII
|
170
|
+
IIIIIIIIII?>;9988==5----.@@AEGIIIIIIIIIH
|
171
|
+
H????EIIIFF999;EIIBB!!
|
172
|
+
_2_
|
173
|
+
IIII?????IIIIIIIIIIIIIIHHHIIIIIIIIIIIIIH
|
174
|
+
HHIIHHHIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
|
175
|
+
IIIIIIIIHHHIIIIIHHHIIIIIIIIIIIAAAAII>>>>
|
176
|
+
IIIIIIIIIIIIIIIIIIIIIIIIIIEEIEE;33333D7I
|
177
|
+
IIIIIIIIIIIIIIIIIIIICC@@HHIIIIIIIIIIIIII
|
178
|
+
IIHHHIIIIIIIIIIIIIIIIIIIHHHIIIIIIIIIIIII
|
179
|
+
BBBBIHCDCHIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
|
180
|
+
IIHHHIIIHHCCDIIIIIIHHHIICCCH=CCIIIIIIIII
|
181
|
+
GGGIIIIIIHHHHHHIIIIIIIIIIIIIIIHHHIIHHE??
|
182
|
+
>>?EFEE?/////;:80--!!
|
183
|
+
_3_
|
184
|
+
FFFA@@FFFFFFFFFFHHB:::@BFFFFGGHIHIIIIIII
|
185
|
+
IIIIIIIIIIIIIIIIFFFFFFFFF?=BA@11188011<<
|
186
|
+
88;?AABDDC???DDAAAADA666D?DDD=====AA>?>>
|
187
|
+
<<<=<11188<<???AA?9555=ABBB@@?=>>?@@1114
|
188
|
+
2::DDA???DFFFFFFFFFFFFFBAAAA<<0000.22=//
|
189
|
+
//8,--111111!23--/24!37:6666<;822/..4!46
|
190
|
+
521177553.-.23!231121112,,-,,211==5-----
|
191
|
+
-,12,,,,,,-,,,-1,,,,-,,155--,,,,13111.,,
|
192
|
+
,,,,,,++111..11..1,,,,,,,,,+3,,,,,--22--
|
193
|
+
---//----55//**/--22--**,,,,**,,,,,,.1.,
|
194
|
+
*,,,,***,,,,,,,,,,,,,,,,,,,,,,,),,-,,,,,
|
195
|
+
,),,,,,**//.),,,///,,,,,,,,,,,.))33---,,
|
196
|
+
,,,,,,,,(0,,,!.!!!!!!!!!!!!
|
197
|
+
_4_
|
198
|
+
FFF<8::@DFFFFFFFGGFDCAAAAAB@@000046<;663
|
199
|
+
22366762243348<<=??4445::>ABAAA@<<==B=:5
|
200
|
+
55:BBD??=BDDDDFFFCCCCCCCFFCDDDFFFFFDBAA=
|
201
|
+
=88880004><<<99688;889<889?BBBBA=???DDBB
|
202
|
+
B@@??88889---237771,,,,,,,,--1152<<00158
|
203
|
+
A@><<<<<43277711,,,--37===75,----34666!!
|
204
|
+
!!!!!!!!!!!!!!!!!!
|
205
|
+
_5_
|
206
|
+
IIIIICC>>666IIIICCCIIIIIIIIHHHIIIIIG666I
|
207
|
+
IIIIIIIIIHHHIIIIIIIICCCIIIIIIIIIIIIIIIII
|
208
|
+
I@@@@IIIIIIIIIIIIIHHHIIII???=;IIEEI::///
|
209
|
+
//7544:?IBB72244E8EECEBC=@@@@@@@HHIIIIII
|
210
|
+
IIIIBBBIIIIIIIIIHHHIIIIIIIIIIIIICCCCIIII
|
211
|
+
IIIIIIIIIIIIIIIIIIIIIIII6666DEIIHEB??D@7
|
212
|
+
77772222D89EEIIIIIIIHHHIIIIIIIIHHHIIIIII
|
213
|
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIHHHIIIIII
|
214
|
+
IIIIIIIII==?==IIIII???=;I63DDD82--,,,38=
|
215
|
+
=::----,,---+++33066;@6380008/:889<:BGII
|
216
|
+
IIIIIIIFE<?F5500-----5:;;;:>?@C<<7999EEE
|
217
|
+
EEE@@@@EEEEE!
|
218
|
+
_6_
|
219
|
+
III?666??HHHIIIIIIIIIGGGIIIIIIIIIIIGGGHH
|
220
|
+
HIIIIIIIIIIIIIIIIIIIIGGGIIIIIIIIIIHHHIII
|
221
|
+
@@@@IIIIEIE111100----22?=8---:-------,,,
|
222
|
+
,33---5:3,----:1BBEEEHIIIIIIIIIIIB??A122
|
223
|
+
000...:?=024GIIIIIIIIIIIIIIIIIIECCHHB=//
|
224
|
+
-,,21??<5-002=6FBB?:9<=11/4444//-//77??G
|
225
|
+
EIEEHIACCIIIHHHIIIIIIICCCAIIIHHHHHHIIIII
|
226
|
+
IIIIIIIIIIIIIIIIIEE1//--822;----.777@EII
|
227
|
+
IIII???IIIIIIIIIIIHHHIIIIIIIIIIIIIIIIIII
|
228
|
+
I994227775555AE;IEEEEEIIIII??9755>@==:3,
|
229
|
+
,,,,33336!!
|
230
|
+
_7_
|
231
|
+
IIIICCCCI;;;CCCCIII???HHHIIIIHHHIIIIIIII
|
232
|
+
IIHHHIIIHHHIIIIIII@@@@IFICCCICAA;;;;ED?B
|
233
|
+
@@D66445555<<<GII>>AAIIIIIIII;;;::III???
|
234
|
+
CCCIII;;;;IFFIIIIICCCBIBIEEDC4444?4BBBE?
|
235
|
+
EIIICHHII;;;HIIIIIIHH;;;HHIIIII;;;IIIIHH
|
236
|
+
HIIIIII>>??>IEEBGG::1111/46FBFBB?=;=A?97
|
237
|
+
771119:EAAADDBD7777=/111122DA@@B68;;;I8H
|
238
|
+
HIIIII;;;;?>IECCCB/////;745=!
|
239
|
+
_8_
|
240
|
+
IIA94445EEII===>IIIIIIIIICCCCIIHIIICC;;;
|
241
|
+
;IIIIIIIIIIIIIIIIIIIIIIIIIF;;666DDIIIIII
|
242
|
+
IIIIIIIIIIIIIEE94442244@@666CC<<BDDA=---
|
243
|
+
--2<,,,,659//00===8CIII;>>==HH;;IIIIIICC
|
244
|
+
@@???III@@@@IC?666HIDDCI?B??CC<EE11111B4
|
245
|
+
BDDCB;=@B777>////-=323?423,,,/=1,,,,-:4E
|
246
|
+
;??EIIIIICCCCI>;;;IIIIIII<<@@?=////7=A99
|
247
|
+
988<<4455IEEEIIIIIIIIIIIII<999HIIIIIIIII
|
248
|
+
II?????IIIIIIIIIIICAC;55539EIIIIIIIIIIII
|
249
|
+
IIIIHH999HHHIA=AEEFF@=.....AD@@@DDEEEEFI
|
250
|
+
II;;;977FFCCC@24449?FDD!
|
251
|
+
_9_
|
252
|
+
|
253
|
+
QUALITY_SCORES = QUALITY_STRINGS.collect { |str|
|
254
|
+
str.unpack('C*').collect { |i| i - 33 }.freeze
|
255
|
+
}.freeze
|
256
|
+
|
257
|
+
ERROR_PROBABILITIES = QUALITY_SCORES.collect { |ary|
|
258
|
+
ary.collect { |q| 10 ** (- q / 10.0) }.freeze
|
259
|
+
}.freeze
|
260
|
+
|
261
|
+
def setup
|
262
|
+
fn = File.join(TestFastqDataDir, 'longreads_original_sanger.fastq')
|
263
|
+
@ff = Bio::FlatFile.open(Bio::Fastq, fn)
|
264
|
+
end
|
265
|
+
|
266
|
+
def test_validate_format
|
267
|
+
@ff.each do |e|
|
268
|
+
assert(e.validate_format)
|
269
|
+
end
|
270
|
+
assert(@ff.eof?)
|
271
|
+
end
|
272
|
+
|
273
|
+
def test_validate_format_with_array
|
274
|
+
@ff.each do |e|
|
275
|
+
a = []
|
276
|
+
assert(e.validate_format(a))
|
277
|
+
assert(a.empty?)
|
278
|
+
end
|
279
|
+
end
|
280
|
+
|
281
|
+
def test_definition
|
282
|
+
ids = IDLINES.dup
|
283
|
+
@ff.each do |e|
|
284
|
+
assert_equal(ids.shift, e.definition)
|
285
|
+
end
|
286
|
+
assert(ids.empty?)
|
287
|
+
end
|
288
|
+
|
289
|
+
def test_entry_id
|
290
|
+
ids = ENTRY_IDS.dup
|
291
|
+
@ff.each do |e|
|
292
|
+
assert_equal(ids.shift, e.entry_id)
|
293
|
+
end
|
294
|
+
assert(ids.empty?)
|
295
|
+
end
|
296
|
+
|
297
|
+
def test_sequence_string
|
298
|
+
seqs = SEQS.dup
|
299
|
+
@ff.each do |e|
|
300
|
+
s = seqs.shift
|
301
|
+
assert_equal(s, e.sequence_string)
|
302
|
+
end
|
303
|
+
assert(seqs.empty?)
|
304
|
+
end
|
305
|
+
|
306
|
+
def test_seq
|
307
|
+
seqs = SEQS.collect { |x| Bio::Sequence::Generic.new(x) }
|
308
|
+
@ff.each do |e|
|
309
|
+
s = seqs.shift
|
310
|
+
assert_equal(s, e.seq)
|
311
|
+
end
|
312
|
+
assert(seqs.empty?)
|
313
|
+
end
|
314
|
+
|
315
|
+
def test_naseq
|
316
|
+
seqs = SEQS.collect { |x| Bio::Sequence::NA.new(x) }
|
317
|
+
@ff.each do |e|
|
318
|
+
s = seqs.shift
|
319
|
+
assert_equal(s, e.naseq)
|
320
|
+
end
|
321
|
+
assert(seqs.empty?)
|
322
|
+
end
|
323
|
+
|
324
|
+
def test_nalen
|
325
|
+
lengths = SEQS.collect { |x| Bio::Sequence::NA.new(x).length }
|
326
|
+
@ff.each do |e|
|
327
|
+
i = lengths.shift
|
328
|
+
assert_equal(i, e.nalen)
|
329
|
+
end
|
330
|
+
assert(lengths.empty?)
|
331
|
+
end
|
332
|
+
|
333
|
+
def test_quality_string
|
334
|
+
qualities = QUALITY_STRINGS.dup
|
335
|
+
@ff.each do |e|
|
336
|
+
assert_equal(qualities.shift, e.quality_string)
|
337
|
+
end
|
338
|
+
assert(qualities.empty?)
|
339
|
+
end
|
340
|
+
|
341
|
+
def test_quality_scores
|
342
|
+
qualities = QUALITY_SCORES.dup
|
343
|
+
@ff.each do |e|
|
344
|
+
assert_equal(qualities.shift, e.quality_scores)
|
345
|
+
end
|
346
|
+
assert(qualities.empty?)
|
347
|
+
end
|
348
|
+
|
349
|
+
def test_error_probabilities
|
350
|
+
probs = ERROR_PROBABILITIES.dup
|
351
|
+
@ff.each do |e|
|
352
|
+
float_array_equivalent?(probs.shift,
|
353
|
+
e.error_probabilities)
|
354
|
+
end
|
355
|
+
assert(probs.empty?)
|
356
|
+
end
|
357
|
+
|
358
|
+
def test_to_biosequence
|
359
|
+
@ff.each_with_index do |e, i|
|
360
|
+
s = nil
|
361
|
+
assert_nothing_raised { s = e.to_biosequence }
|
362
|
+
assert_equal(Bio::Sequence::Generic.new(SEQS[i]), s.seq)
|
363
|
+
assert_equal(IDLINES[i], s.definition)
|
364
|
+
assert_equal(ENTRY_IDS[i], s.entry_id)
|
365
|
+
assert_equal(:phred, s.quality_score_type)
|
366
|
+
assert_equal(QUALITY_SCORES[i], s.quality_scores)
|
367
|
+
float_array_equivalent?(ERROR_PROBABILITIES[i],
|
368
|
+
s.error_probabilities)
|
369
|
+
end
|
370
|
+
end
|
371
|
+
|
372
|
+
def test_roundtrip
|
373
|
+
@ff.each_with_index do |e, i|
|
374
|
+
str_orig = @ff.entry_raw
|
375
|
+
s = e.to_biosequence
|
376
|
+
str = s.output(:fastq_sanger,
|
377
|
+
{ :repeat_title => true, :width => 80 })
|
378
|
+
assert_equal(str_orig, str)
|
379
|
+
e2 = Bio::Fastq.new(str)
|
380
|
+
assert_equal(e.sequence_string, e2.sequence_string)
|
381
|
+
assert_equal(e.quality_string, e2.quality_string)
|
382
|
+
assert_equal(e.definition, e2.definition)
|
383
|
+
assert_equal(e.quality_scores, e2.quality_scores)
|
384
|
+
float_array_equivalent?(e.error_probabilities,
|
385
|
+
e2.error_probabilities)
|
386
|
+
end
|
387
|
+
end
|
388
|
+
|
389
|
+
end #class TestFastq_longreads_original_sanger
|
390
|
+
|
391
|
+
# common methods to read *_full_range_as_*.fastq and test quality scores
|
392
|
+
# and error probabilities
|
393
|
+
module TestFastq_full_range
|
394
|
+
include FloatArrayComparison
|
395
|
+
|
396
|
+
private
|
397
|
+
def read_file(fn, format)
|
398
|
+
path = File.join(TestFastqDataDir, fn)
|
399
|
+
entries = Bio::FlatFile.open(Bio::Fastq, path) { |ff| ff.to_a }
|
400
|
+
entries.each { |e| e.format=format }
|
401
|
+
entries
|
402
|
+
end
|
403
|
+
|
404
|
+
def scores_through(range)
|
405
|
+
range.to_a
|
406
|
+
end
|
407
|
+
|
408
|
+
def scores_phred2solexa(range)
|
409
|
+
min = -5
|
410
|
+
max = 62
|
411
|
+
sc = range.collect do |q|
|
412
|
+
tmp = 10 ** (q / 10.0) - 1
|
413
|
+
if tmp <= 0 then
|
414
|
+
min
|
415
|
+
else
|
416
|
+
r = (10 * Math.log10(tmp)).round
|
417
|
+
if r < min then
|
418
|
+
min
|
419
|
+
elsif r > max then
|
420
|
+
max
|
421
|
+
else
|
422
|
+
r
|
423
|
+
end
|
424
|
+
end
|
425
|
+
end
|
426
|
+
sc
|
427
|
+
end
|
428
|
+
|
429
|
+
def scores_phred2illumina(range)
|
430
|
+
min = 0
|
431
|
+
max = 62
|
432
|
+
sc = range.collect do |q|
|
433
|
+
if q < min then
|
434
|
+
min
|
435
|
+
elsif q > max then
|
436
|
+
max
|
437
|
+
else
|
438
|
+
q
|
439
|
+
end
|
440
|
+
end
|
441
|
+
sc
|
442
|
+
end
|
443
|
+
|
444
|
+
def scores_phred2sanger(range)
|
445
|
+
min = 0
|
446
|
+
max = 93
|
447
|
+
sc = range.collect do |q|
|
448
|
+
if q < min then
|
449
|
+
min
|
450
|
+
elsif q > max then
|
451
|
+
max
|
452
|
+
else
|
453
|
+
q
|
454
|
+
end
|
455
|
+
end
|
456
|
+
sc
|
457
|
+
end
|
458
|
+
|
459
|
+
def scores_solexa2phred(range)
|
460
|
+
sc = range.collect do |q|
|
461
|
+
r = 10 * Math.log10(10 ** (q / 10.0) + 1)
|
462
|
+
r.round
|
463
|
+
end
|
464
|
+
sc
|
465
|
+
end
|
466
|
+
|
467
|
+
def scores_solexa2sanger(range)
|
468
|
+
scores_phred2sanger(scores_solexa2phred(range))
|
469
|
+
end
|
470
|
+
|
471
|
+
def scores_solexa2illumina(range)
|
472
|
+
scores_phred2illumina(scores_solexa2phred(range))
|
473
|
+
end
|
474
|
+
|
475
|
+
def common_test_quality_scores(scores, filename, format)
|
476
|
+
entries = read_file(filename, format)
|
477
|
+
assert_equal(scores, entries[0].quality_scores)
|
478
|
+
assert_equal(scores.reverse, entries[1].quality_scores)
|
479
|
+
end
|
480
|
+
|
481
|
+
def common_test_error_probabilities(probabilities, filename, format)
|
482
|
+
entries = read_file(filename, format)
|
483
|
+
float_array_equivalent?(probabilities,
|
484
|
+
entries[0].error_probabilities)
|
485
|
+
float_array_equivalent?(probabilities.reverse,
|
486
|
+
entries[1].error_probabilities)
|
487
|
+
end
|
488
|
+
|
489
|
+
def common_test_validate_format(filename, format)
|
490
|
+
entries = read_file(filename, format)
|
491
|
+
assert(entries[0].validate_format)
|
492
|
+
assert(entries[1].validate_format)
|
493
|
+
end
|
494
|
+
|
495
|
+
def phred_q2p(scores)
|
496
|
+
scores.collect { |q| 10 ** (-q / 10.0) }
|
497
|
+
end
|
498
|
+
|
499
|
+
def solexa_q2p(scores)
|
500
|
+
scores.collect do |q|
|
501
|
+
t = 10 ** (-q / 10.0)
|
502
|
+
t / (1.0 + t)
|
503
|
+
end
|
504
|
+
end
|
505
|
+
|
506
|
+
public
|
507
|
+
def test_validate_format
|
508
|
+
common_test_validate_format(self.class::FILENAME_AS_SANGER,
|
509
|
+
'fastq-sanger')
|
510
|
+
common_test_validate_format(self.class::FILENAME_AS_SOLEXA,
|
511
|
+
'fastq-solexa')
|
512
|
+
common_test_validate_format(self.class::FILENAME_AS_ILLUMINA,
|
513
|
+
'fastq-illumina')
|
514
|
+
end
|
515
|
+
|
516
|
+
def test_quality_scores_as_sanger
|
517
|
+
scores = scores_to_sanger(self.class::RANGE)
|
518
|
+
common_test_quality_scores(scores,
|
519
|
+
self.class::FILENAME_AS_SANGER,
|
520
|
+
'fastq-sanger')
|
521
|
+
end
|
522
|
+
|
523
|
+
def test_error_probabilities_as_sanger
|
524
|
+
scores = scores_to_sanger(self.class::RANGE)
|
525
|
+
probs = phred_q2p(scores)
|
526
|
+
common_test_error_probabilities(probs,
|
527
|
+
self.class::FILENAME_AS_SANGER,
|
528
|
+
'fastq-sanger')
|
529
|
+
end
|
530
|
+
|
531
|
+
def test_quality_scores_as_solexa
|
532
|
+
scores = scores_to_solexa(self.class::RANGE)
|
533
|
+
common_test_quality_scores(scores,
|
534
|
+
self.class::FILENAME_AS_SOLEXA,
|
535
|
+
'fastq-solexa')
|
536
|
+
end
|
537
|
+
|
538
|
+
def test_error_probabilities_as_solexa
|
539
|
+
scores = scores_to_solexa(self.class::RANGE)
|
540
|
+
probs = solexa_q2p(scores)
|
541
|
+
common_test_error_probabilities(probs,
|
542
|
+
self.class::FILENAME_AS_SOLEXA,
|
543
|
+
'fastq-solexa')
|
544
|
+
end
|
545
|
+
|
546
|
+
def test_quality_scores_as_illumina
|
547
|
+
scores = scores_to_illumina(self.class::RANGE)
|
548
|
+
common_test_quality_scores(scores,
|
549
|
+
self.class::FILENAME_AS_ILLUMINA,
|
550
|
+
'fastq-illumina')
|
551
|
+
end
|
552
|
+
|
553
|
+
def test_error_probabilities_as_illumina
|
554
|
+
scores = scores_to_illumina(self.class::RANGE)
|
555
|
+
probs = phred_q2p(scores)
|
556
|
+
common_test_error_probabilities(probs,
|
557
|
+
self.class::FILENAME_AS_ILLUMINA,
|
558
|
+
'fastq-illumina')
|
559
|
+
end
|
560
|
+
end #module TestFastq_full_range
|
561
|
+
|
562
|
+
|
563
|
+
class TestFastq_sanger_full_range < Test::Unit::TestCase
|
564
|
+
include TestFastq_full_range
|
565
|
+
|
566
|
+
RANGE = 0..93
|
567
|
+
FILENAME_AS_SANGER = 'sanger_full_range_as_sanger.fastq'
|
568
|
+
FILENAME_AS_SOLEXA = 'sanger_full_range_as_solexa.fastq'
|
569
|
+
FILENAME_AS_ILLUMINA = 'sanger_full_range_as_illumina.fastq'
|
570
|
+
|
571
|
+
alias scores_to_sanger scores_through
|
572
|
+
alias scores_to_solexa scores_phred2solexa
|
573
|
+
alias scores_to_illumina scores_phred2illumina
|
574
|
+
end #class TestFastq_sanger_full_range
|
575
|
+
|
576
|
+
|
577
|
+
class TestFastq_solexa_full_range < Test::Unit::TestCase
|
578
|
+
include TestFastq_full_range
|
579
|
+
|
580
|
+
RANGE = (-5)..62
|
581
|
+
FILENAME_AS_SANGER = 'solexa_full_range_as_sanger.fastq'
|
582
|
+
FILENAME_AS_SOLEXA = 'solexa_full_range_as_solexa.fastq'
|
583
|
+
FILENAME_AS_ILLUMINA = 'solexa_full_range_as_illumina.fastq'
|
584
|
+
|
585
|
+
alias scores_to_sanger scores_solexa2sanger
|
586
|
+
alias scores_to_solexa scores_through
|
587
|
+
alias scores_to_illumina scores_solexa2illumina
|
588
|
+
end #class TestFastq_solexa_full_range
|
589
|
+
|
590
|
+
|
591
|
+
class TestFastq_illumina_full_range < Test::Unit::TestCase
|
592
|
+
include TestFastq_full_range
|
593
|
+
|
594
|
+
RANGE = 0..62
|
595
|
+
FILENAME_AS_SANGER = 'illumina_full_range_as_sanger.fastq'
|
596
|
+
FILENAME_AS_SOLEXA = 'illumina_full_range_as_solexa.fastq'
|
597
|
+
FILENAME_AS_ILLUMINA = 'illumina_full_range_as_illumina.fastq'
|
598
|
+
|
599
|
+
alias scores_to_sanger scores_phred2sanger
|
600
|
+
alias scores_to_solexa scores_phred2solexa
|
601
|
+
alias scores_to_illumina scores_through
|
602
|
+
end #class TestFastq_illumina_full_range
|
603
|
+
|
604
|
+
|
605
|
+
# common methods for testing error_*.fastq
|
606
|
+
module TestFastq_error
|
607
|
+
|
608
|
+
FILENAME = nil
|
609
|
+
PRE_SKIP = 2
|
610
|
+
POST_SKIP = 2
|
611
|
+
ERRORS = []
|
612
|
+
|
613
|
+
def do_test_validate_format(ff)
|
614
|
+
e = ff.next_entry
|
615
|
+
#p e
|
616
|
+
a = []
|
617
|
+
assert_equal(false, e.validate_format(a))
|
618
|
+
assert_equal(self.class::ERRORS.size, a.size)
|
619
|
+
self.class::ERRORS.each do |ex|
|
620
|
+
obj = a.shift
|
621
|
+
assert_kind_of(ex.class, obj)
|
622
|
+
assert_equal(ex.message, obj.message)
|
623
|
+
end
|
624
|
+
end
|
625
|
+
private :do_test_validate_format
|
626
|
+
|
627
|
+
def test_validate_format
|
628
|
+
path = File.join(TestFastqDataDir, self.class::FILENAME)
|
629
|
+
Bio::FlatFile.open(Bio::Fastq, path) do |ff|
|
630
|
+
self.class::PRE_SKIP.times { ff.next_entry }
|
631
|
+
do_test_validate_format(ff)
|
632
|
+
self.class::POST_SKIP.times { ff.next_entry }
|
633
|
+
assert(ff.eof?)
|
634
|
+
end
|
635
|
+
end
|
636
|
+
end #module TestFastq_error
|
637
|
+
|
638
|
+
class TestFastq_error_diff_ids < Test::Unit::TestCase
|
639
|
+
include TestFastq_error
|
640
|
+
|
641
|
+
FILENAME = 'error_diff_ids.fastq'
|
642
|
+
PRE_SKIP = 2
|
643
|
+
POST_SKIP = 2
|
644
|
+
ERRORS = [ Bio::Fastq::Error::Diff_ids.new ]
|
645
|
+
end #class TestFastq_error_diff_ids
|
646
|
+
|
647
|
+
class TestFastq_error_double_qual < Test::Unit::TestCase
|
648
|
+
include TestFastq_error
|
649
|
+
|
650
|
+
FILENAME = 'error_double_qual.fastq'
|
651
|
+
PRE_SKIP = 2
|
652
|
+
POST_SKIP = 2
|
653
|
+
ERRORS = [ Bio::Fastq::Error::Long_qual.new ]
|
654
|
+
end #class TestFastq_error_double_qual
|
655
|
+
|
656
|
+
class TestFastq_error_double_seq < Test::Unit::TestCase
|
657
|
+
include TestFastq_error
|
658
|
+
|
659
|
+
FILENAME = 'error_double_seq.fastq'
|
660
|
+
PRE_SKIP = 3
|
661
|
+
POST_SKIP = 0
|
662
|
+
ERRORS = [ Bio::Fastq::Error::Long_qual.new ]
|
663
|
+
end #class TestFastq_error_double_seq
|
664
|
+
|
665
|
+
class TestFastq_error_long_qual < Test::Unit::TestCase
|
666
|
+
include TestFastq_error
|
667
|
+
|
668
|
+
FILENAME = 'error_long_qual.fastq'
|
669
|
+
PRE_SKIP = 3
|
670
|
+
POST_SKIP = 1
|
671
|
+
ERRORS = [ Bio::Fastq::Error::Long_qual.new ]
|
672
|
+
end #class TestFastq_error_long_qual
|
673
|
+
|
674
|
+
class TestFastq_error_no_qual < Test::Unit::TestCase
|
675
|
+
include TestFastq_error
|
676
|
+
|
677
|
+
FILENAME = 'error_no_qual.fastq'
|
678
|
+
PRE_SKIP = 0
|
679
|
+
POST_SKIP = 0
|
680
|
+
|
681
|
+
private
|
682
|
+
def do_test_validate_format(ff)
|
683
|
+
2.times do
|
684
|
+
e = ff.next_entry
|
685
|
+
a = []
|
686
|
+
e.validate_format(a)
|
687
|
+
assert_equal(1, a.size)
|
688
|
+
assert_kind_of(Bio::Fastq::Error::Long_qual, a[0])
|
689
|
+
end
|
690
|
+
1.times do
|
691
|
+
e = ff.next_entry
|
692
|
+
a = []
|
693
|
+
e.validate_format(a)
|
694
|
+
assert_equal(1, a.size)
|
695
|
+
assert_kind_of(Bio::Fastq::Error::Short_qual, a[0])
|
696
|
+
end
|
697
|
+
end
|
698
|
+
end #class TestFastq_error_no_qual
|
699
|
+
|
700
|
+
class TestFastq_error_qual_del < Test::Unit::TestCase
|
701
|
+
include TestFastq_error
|
702
|
+
|
703
|
+
FILENAME = 'error_qual_del.fastq'
|
704
|
+
PRE_SKIP = 3
|
705
|
+
POST_SKIP = 1
|
706
|
+
ERRORS = [ Bio::Fastq::Error::Qual_char.new(12) ]
|
707
|
+
end #class TestFastq_error_qual_del
|
708
|
+
|
709
|
+
class TestFastq_error_qual_escape < Test::Unit::TestCase
|
710
|
+
include TestFastq_error
|
711
|
+
|
712
|
+
FILENAME = 'error_qual_escape.fastq'
|
713
|
+
PRE_SKIP = 4
|
714
|
+
POST_SKIP = 0
|
715
|
+
ERRORS = [ Bio::Fastq::Error::Qual_char.new(7) ]
|
716
|
+
end #class TestFastq_error_qual_escape
|
717
|
+
|
718
|
+
class TestFastq_error_qual_null < Test::Unit::TestCase
|
719
|
+
include TestFastq_error
|
720
|
+
|
721
|
+
FILENAME = 'error_qual_null.fastq'
|
722
|
+
PRE_SKIP = 0
|
723
|
+
POST_SKIP = 4
|
724
|
+
ERRORS = [ Bio::Fastq::Error::Qual_char.new(3) ]
|
725
|
+
end #class TestFastq_error_qual_null
|
726
|
+
|
727
|
+
class TestFastq_error_qual_space < Test::Unit::TestCase
|
728
|
+
include TestFastq_error
|
729
|
+
|
730
|
+
FILENAME = 'error_qual_space.fastq'
|
731
|
+
PRE_SKIP = 3
|
732
|
+
POST_SKIP = 1
|
733
|
+
ERRORS = [ Bio::Fastq::Error::Qual_char.new(18) ]
|
734
|
+
end #class TestFastq_error_qual_space
|
735
|
+
|
736
|
+
class TestFastq_error_qual_tab < Test::Unit::TestCase
|
737
|
+
include TestFastq_error
|
738
|
+
|
739
|
+
FILENAME = 'error_qual_tab.fastq'
|
740
|
+
PRE_SKIP = 4
|
741
|
+
POST_SKIP = 0
|
742
|
+
ERRORS = [ Bio::Fastq::Error::Qual_char.new(10) ]
|
743
|
+
end #class TestFastq_error_qual_tab
|
744
|
+
|
745
|
+
class TestFastq_error_qual_unit_sep < Test::Unit::TestCase
|
746
|
+
include TestFastq_error
|
747
|
+
|
748
|
+
FILENAME = 'error_qual_unit_sep.fastq'
|
749
|
+
PRE_SKIP = 2
|
750
|
+
POST_SKIP = 2
|
751
|
+
ERRORS = [ Bio::Fastq::Error::Qual_char.new(5) ]
|
752
|
+
end #class TestFastq_error_qual_unit_sep
|
753
|
+
|
754
|
+
class TestFastq_error_qual_vtab < Test::Unit::TestCase
|
755
|
+
include TestFastq_error
|
756
|
+
|
757
|
+
FILENAME = 'error_qual_vtab.fastq'
|
758
|
+
PRE_SKIP = 0
|
759
|
+
POST_SKIP = 4
|
760
|
+
ERRORS = [ Bio::Fastq::Error::Qual_char.new(10) ]
|
761
|
+
end #class TestFastq_error_qual_vtab
|
762
|
+
|
763
|
+
class TestFastq_error_short_qual < Test::Unit::TestCase
|
764
|
+
include TestFastq_error
|
765
|
+
|
766
|
+
FILENAME = 'error_short_qual.fastq'
|
767
|
+
PRE_SKIP = 2
|
768
|
+
POST_SKIP = 1
|
769
|
+
ERRORS = [ Bio::Fastq::Error::Long_qual.new ]
|
770
|
+
end #class TestFastq_error_short_qual
|
771
|
+
|
772
|
+
class TestFastq_error_spaces < Test::Unit::TestCase
|
773
|
+
include TestFastq_error
|
774
|
+
|
775
|
+
FILENAME = 'error_spaces.fastq'
|
776
|
+
PRE_SKIP = 0
|
777
|
+
POST_SKIP = 0
|
778
|
+
ERRORS = [ Bio::Fastq::Error::Seq_char.new(9),
|
779
|
+
Bio::Fastq::Error::Seq_char.new(20),
|
780
|
+
Bio::Fastq::Error::Qual_char.new(9),
|
781
|
+
Bio::Fastq::Error::Qual_char.new(20)
|
782
|
+
]
|
783
|
+
|
784
|
+
private
|
785
|
+
def do_test_validate_format(ff)
|
786
|
+
5.times do
|
787
|
+
e = ff.next_entry
|
788
|
+
a = []
|
789
|
+
e.validate_format(a)
|
790
|
+
assert_equal(4, a.size)
|
791
|
+
self.class::ERRORS.each do |ex|
|
792
|
+
obj = a.shift
|
793
|
+
assert_kind_of(ex.class, obj)
|
794
|
+
assert_equal(ex.message, obj.message)
|
795
|
+
end
|
796
|
+
end
|
797
|
+
end
|
798
|
+
end #class TestFastq_error_spaces
|
799
|
+
|
800
|
+
class TestFastq_error_tabs < TestFastq_error_spaces
|
801
|
+
FILENAME = 'error_tabs.fastq'
|
802
|
+
end #class TestFastq_error_tabs
|
803
|
+
|
804
|
+
class TestFastq_error_trunc_at_plus < Test::Unit::TestCase
|
805
|
+
include TestFastq_error
|
806
|
+
|
807
|
+
FILENAME = 'error_trunc_at_plus.fastq'
|
808
|
+
PRE_SKIP = 4
|
809
|
+
POST_SKIP = 0
|
810
|
+
ERRORS = [ Bio::Fastq::Error::No_qual.new ]
|
811
|
+
end #class TestFastq_error_trunc_at_plus
|
812
|
+
|
813
|
+
class TestFastq_error_trunc_at_qual < TestFastq_error_trunc_at_plus
|
814
|
+
FILENAME = 'error_trunc_at_qual.fastq'
|
815
|
+
end #class TestFastq_error_trunc_at_qual
|
816
|
+
|
817
|
+
class TestFastq_error_trunc_at_seq < Test::Unit::TestCase
|
818
|
+
include TestFastq_error
|
819
|
+
|
820
|
+
FILENAME = 'error_trunc_at_seq.fastq'
|
821
|
+
PRE_SKIP = 4
|
822
|
+
POST_SKIP = 0
|
823
|
+
ERRORS = [ Bio::Fastq::Error::No_qual.new ]
|
824
|
+
end #class TestFastq_error_trunc_at_seq
|
825
|
+
|
826
|
+
|
827
|
+
end #module TestFastq
|
828
|
+
end #module Bio
|
829
|
+
|