bioroebe 0.10.80 → 0.12.24
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +3946 -2817
- data/bin/bioroebe +13 -2
- data/bin/bioroebe_hash +7 -0
- data/bin/codon_to_aminoacid +6 -4
- data/bin/compacter +7 -0
- data/bin/plain_palindrome +7 -0
- data/bioroebe.gemspec +3 -3
- data/doc/README.gen +3918 -2793
- data/doc/quality_control/commandline_applications.md +3 -3
- data/doc/statistics/statistics.md +7 -7
- data/doc/todo/bioroebe_GUI_todo.md +19 -14
- data/doc/todo/bioroebe_java_todo.md +22 -0
- data/doc/todo/bioroebe_todo.md +2075 -2620
- data/lib/bioroebe/C++/DNA.cpp +69 -0
- data/lib/bioroebe/C++/RNA.cpp +58 -0
- data/lib/bioroebe/C++/sequence.cpp +35 -0
- data/lib/bioroebe/abstract/README.md +1 -0
- data/lib/bioroebe/abstract/features.rb +29 -0
- data/lib/bioroebe/aminoacids/aminoacid_substitution.rb +1 -9
- data/lib/bioroebe/aminoacids/codon_percentage.rb +1 -9
- data/lib/bioroebe/aminoacids/deduce_aminoacid_sequence.rb +1 -9
- data/lib/bioroebe/aminoacids/display_aminoacid_table.rb +1 -0
- data/lib/bioroebe/aminoacids/show_hydrophobicity.rb +1 -6
- data/lib/bioroebe/base/base_module/base_module.rb +36 -0
- data/lib/bioroebe/base/colours_for_base/colours_for_base.rb +18 -8
- data/lib/bioroebe/base/commandline_application/commandline_application.rb +13 -9
- data/lib/bioroebe/base/commandline_application/commandline_arguments.rb +24 -19
- data/lib/bioroebe/base/commandline_application/misc.rb +66 -49
- data/lib/bioroebe/base/commandline_application/opn.rb +8 -8
- data/lib/bioroebe/base/commandline_application/reset.rb +5 -3
- data/lib/bioroebe/base/internal_hash_module/internal_hash_module.rb +42 -0
- data/lib/bioroebe/base/misc.rb +35 -0
- data/lib/bioroebe/base/prototype/misc.rb +15 -9
- data/lib/bioroebe/base/prototype/reset.rb +10 -0
- data/lib/bioroebe/cleave_and_digest/digestion.rb +10 -2
- data/lib/bioroebe/cleave_and_digest/trypsin.rb +104 -50
- data/lib/bioroebe/codon_tables/frequencies/parse_frequency_table.rb +2 -10
- data/lib/bioroebe/codons/codons.rb +1 -1
- data/lib/bioroebe/codons/convert_this_codon_to_that_aminoacid.rb +208 -59
- data/lib/bioroebe/codons/possible_codons_for_this_aminoacid.rb +1 -9
- data/lib/bioroebe/codons/show_codon_tables.rb +8 -3
- data/lib/bioroebe/codons/show_codon_usage.rb +15 -4
- data/lib/bioroebe/colours/rev.rb +4 -1
- data/lib/bioroebe/constants/aminoacids_and_proteins.rb +1 -0
- data/lib/bioroebe/constants/database_constants.rb +1 -1
- data/lib/bioroebe/constants/files_and_directories.rb +31 -4
- data/lib/bioroebe/constants/misc.rb +20 -0
- data/lib/bioroebe/constants/nucleotides.rb +7 -0
- data/lib/bioroebe/conversions/dna_to_aminoacid_sequence.rb +109 -39
- data/lib/bioroebe/count/count_amount_of_aminoacids.rb +3 -2
- data/lib/bioroebe/count/count_amount_of_nucleotides.rb +3 -0
- data/lib/bioroebe/cpp +1 -0
- data/lib/bioroebe/crystal/README.md +2 -0
- data/lib/bioroebe/crystal/to_rna.cr +19 -0
- data/lib/bioroebe/data/README.md +11 -8
- data/lib/bioroebe/data/electron_microscopy/pos_example.pos +396 -0
- data/lib/bioroebe/data/electron_microscopy/test_particles.star +36 -0
- data/lib/bioroebe/data/fasta/human/Homo_sapiens_hemoglobin_subunit_alpha_HBB_mRNA.fasta +9 -0
- data/lib/bioroebe/data/fasta/human/Homo_sapiens_hemoglobin_subunit_beta_HBB_mRNA.fasta +8 -0
- data/lib/bioroebe/data/fasta/human/README.md +2 -0
- data/lib/bioroebe/dotplots/advanced_dotplot.rb +1 -1
- data/lib/bioroebe/electron_microscopy/coordinate_analyzer.rb +15 -18
- data/lib/bioroebe/{fasta_and_fastq/parse_fasta/run.rb → electron_microscopy/electron_microscopy_module.rb} +16 -8
- data/lib/bioroebe/electron_microscopy/fix_pos_file.rb +1 -9
- data/lib/bioroebe/electron_microscopy/flipy.rb +83 -0
- data/lib/bioroebe/electron_microscopy/parse_coordinates.rb +2 -10
- data/lib/bioroebe/electron_microscopy/read_file_xmd.rb +1 -9
- data/lib/bioroebe/electron_microscopy/simple_star_file_generator.rb +4 -9
- data/lib/bioroebe/enzymes/has_this_restriction_enzyme.rb +10 -3
- data/lib/bioroebe/enzymes/restriction_enzyme.rb +23 -1
- data/lib/bioroebe/enzymes/restriction_enzymes/statistics.rb +65 -0
- data/lib/bioroebe/fasta_and_fastq/autocorrect_the_name_of_this_fasta_file.rb +1 -9
- data/lib/bioroebe/fasta_and_fastq/compact_fasta_file/compact_fasta_file.rb +7 -9
- data/lib/bioroebe/fasta_and_fastq/fasta_defline/fasta_defline.rb +1 -5
- data/lib/bioroebe/fasta_and_fastq/fasta_to_yaml/fasta_to_yaml.rb +81 -0
- data/lib/bioroebe/fasta_and_fastq/parse_fasta/parse_fasta.rb +1518 -7
- data/lib/bioroebe/fasta_and_fastq/return_fasta_subsection_of_this_file.rb +11 -2
- data/lib/bioroebe/fasta_and_fastq/show_fasta_headers.rb +27 -12
- data/lib/bioroebe/fasta_and_fastq/simplify_fasta_header/simplify_fasta_header.rb +1 -5
- data/lib/bioroebe/fasta_and_fastq/split_this_fasta_file_into_chromosomes/constants.rb +0 -5
- data/lib/bioroebe/genome/README.md +4 -0
- data/lib/bioroebe/genome/genome.rb +130 -0
- data/lib/bioroebe/genomes/genome_pattern.rb +3 -9
- data/lib/bioroebe/gui/gtk +1 -0
- data/lib/bioroebe/gui/gtk3/alignment/alignment.rb +106 -137
- data/lib/bioroebe/gui/gtk3/aminoacid_composition/aminoacid_composition.rb +27 -61
- data/lib/bioroebe/gui/gtk3/aminoacid_composition/customized_dialog.rb +1 -1
- data/lib/bioroebe/gui/gtk3/blosum_matrix_viewer/blosum_matrix_viewer.rb +1 -2
- data/lib/bioroebe/gui/gtk3/calculate_cell_numbers_of_bacteria/calculate_cell_numbers_of_bacteria.rb +1 -2
- data/lib/bioroebe/gui/gtk3/controller/controller.rb +46 -29
- data/lib/bioroebe/gui/gtk3/dna_to_aminoacid_widget/dna_to_aminoacid_widget.rb +77 -52
- data/lib/bioroebe/gui/gtk3/dna_to_reverse_complement_widget/dna_to_reverse_complement_widget.rb +1 -2
- data/lib/bioroebe/gui/gtk3/fasta_table_widget/fasta_table_widget.rb +100 -23
- data/lib/bioroebe/gui/gtk3/format_converter/format_converter.rb +1 -2
- data/lib/bioroebe/gui/gtk3/gene/gene.rb +1 -2
- data/lib/bioroebe/gui/gtk3/hamming_distance/hamming_distance.rb +43 -30
- data/lib/bioroebe/gui/gtk3/levensthein_distance/levensthein_distance.rb +1 -2
- data/lib/bioroebe/gui/gtk3/nucleotide_analyser/nucleotide_analyser.rb +120 -73
- data/lib/bioroebe/gui/gtk3/primer_design_widget/primer_design_widget.rb +1 -2
- data/lib/bioroebe/gui/gtk3/protein_to_DNA/protein_to_DNA.rb +19 -20
- data/lib/bioroebe/gui/gtk3/random_sequence/random_sequence.rb +20 -13
- data/lib/bioroebe/gui/gtk3/restriction_enzymes/restriction_enzymes.rb +1 -2
- data/lib/bioroebe/gui/gtk3/show_codon_table/misc.rb +97 -22
- data/lib/bioroebe/gui/gtk3/show_codon_table/show_codon_table.rb +3 -73
- data/lib/bioroebe/gui/gtk3/show_codon_usage/show_codon_usage.rb +1 -2
- data/lib/bioroebe/gui/gtk3/sizeseq/sizeseq.rb +1 -2
- data/lib/bioroebe/gui/gtk3/three_to_one/three_to_one.rb +1 -2
- data/lib/bioroebe/gui/gtk3/www_finder/www_finder.rb +1 -2
- data/lib/bioroebe/gui/javafx/bioroebe/Bioroebe.class +0 -0
- data/lib/bioroebe/gui/javafx/bioroebe/Bioroebe.java +104 -0
- data/lib/bioroebe/gui/javafx/bioroebe.jar +0 -0
- data/lib/bioroebe/gui/javafx/bioroebe.mf +1 -0
- data/lib/bioroebe/gui/javafx/module-info.class +0 -0
- data/lib/bioroebe/gui/javafx/module-info.java +5 -0
- data/lib/bioroebe/gui/jruby/alignment/alignment.rb +165 -0
- data/lib/bioroebe/gui/jruby/aminoacid_composition/aminoacid_composition.rb +166 -0
- data/lib/bioroebe/gui/libui/alignment/alignment.rb +3 -1
- data/lib/bioroebe/gui/libui/controller/controller.rb +116 -0
- data/lib/bioroebe/gui/libui/random_sequence/random_sequence.rb +18 -2
- data/lib/bioroebe/gui/libui/show_codon_table/show_codon_table.rb +2 -0
- data/lib/bioroebe/gui/libui/three_to_one/three_to_one.rb +8 -6
- data/lib/bioroebe/gui/shared_code/alignment/alignment_module.rb +102 -0
- data/lib/bioroebe/gui/shared_code/aminoacid_composition/aminoacid_composition_module.rb +94 -0
- data/lib/bioroebe/gui/shared_code/levensthein_distance/levensthein_distance_module.rb +18 -16
- data/lib/bioroebe/gui/shared_code/protein_to_DNA/protein_to_DNA_module.rb +14 -14
- data/lib/bioroebe/gui/swing/three_to_one/ThreeToOne$1.class +0 -0
- data/lib/bioroebe/gui/swing/three_to_one/ThreeToOne$CloseListener.class +0 -0
- data/lib/bioroebe/gui/swing/three_to_one/ThreeToOne.class +0 -0
- data/lib/bioroebe/gui/swing/three_to_one/ThreeToOne.java +141 -0
- data/lib/bioroebe/images/FORWARD_PRIMER.png +0 -0
- data/lib/bioroebe/images/REVERSE_PRIMER.png +0 -0
- data/lib/bioroebe/images/images.html +29845 -0
- data/lib/bioroebe/java/README.md +5 -0
- data/lib/bioroebe/java/bioroebe/AllInOne.java +1 -0
- data/lib/bioroebe/java/bioroebe/Base.class +0 -0
- data/lib/bioroebe/java/bioroebe/Base.java +39 -5
- data/lib/bioroebe/java/bioroebe/IsPalindrome.java +23 -5
- data/lib/bioroebe/java/bioroebe/SanitizeNucleotideSequence.java +0 -0
- data/lib/bioroebe/java/bioroebe/Sequence.java +28 -3
- data/lib/bioroebe/java/bioroebe/ToCamelcase.class +0 -0
- data/lib/bioroebe/java/bioroebe/ToCamelcase.java +16 -4
- data/lib/bioroebe/java/bioroebe/ToRNA.java +43 -0
- data/lib/bioroebe/java/bioroebe/ToplevelMethods.java +6 -0
- data/lib/bioroebe/java/bioroebe/{BisulfiteTreatment.class → src/BisulfiteTreatment.class} +0 -0
- data/lib/bioroebe/java/bioroebe/{Codons.class → src/Codons.class} +0 -0
- data/lib/bioroebe/java/bioroebe/src/Codons.java +35 -0
- data/lib/bioroebe/java/bioroebe/src/Commandline.class +0 -0
- data/lib/bioroebe/java/bioroebe/src/Commandline.java +101 -0
- data/lib/bioroebe/java/bioroebe/{Esystem.class → src/Esystem.class} +0 -0
- data/lib/bioroebe/java/bioroebe/{Esystem.java → src/Esystem.java} +6 -1
- data/lib/bioroebe/java/bioroebe/{GenerateRandomDnaSequence.class → src/GenerateRandomDnaSequence.class} +0 -0
- data/lib/bioroebe/java/bioroebe/{GenerateRandomDnaSequence.java → src/GenerateRandomDnaSequence.java} +8 -2
- data/lib/bioroebe/java/bioroebe/src/PartnerNucleotide.class +0 -0
- data/lib/bioroebe/java/bioroebe/src/PartnerNucleotide.java +56 -0
- data/lib/bioroebe/java/bioroebe/{RemoveFile.java → src/RemoveFile.java} +10 -4
- data/lib/bioroebe/java/bioroebe/{RemoveNumbers.class → src/RemoveNumbers.class} +0 -0
- data/lib/bioroebe/java/bioroebe/{RemoveNumbers.java → src/RemoveNumbers.java} +1 -0
- data/lib/bioroebe/java/bioroebe/src/toplevel_methods/BaseComposition.class +0 -0
- data/lib/bioroebe/java/bioroebe/src/toplevel_methods/BaseComposition.java +75 -0
- data/lib/bioroebe/misc/ruler.rb +11 -2
- data/lib/bioroebe/nucleotides/most_likely_nucleotide_sequence_for_this_aminoacid_sequence.rb +1 -9
- data/lib/bioroebe/nucleotides/sanitize_nucleotide_sequence.rb +59 -18
- data/lib/bioroebe/nucleotides/show_nucleotide_sequence.rb +7 -7
- data/lib/bioroebe/parsers/genbank_parser.rb +347 -26
- data/lib/bioroebe/parsers/gff.rb +1 -9
- data/lib/bioroebe/patterns/scan_for_repeat.rb +1 -5
- data/lib/bioroebe/pdb/fetch_fasta_sequence_from_pdb.rb +1 -9
- data/lib/bioroebe/pdb/parse_mmCIF_file.rb +1 -9
- data/lib/bioroebe/pdb/parse_pdb_file.rb +4 -10
- data/lib/bioroebe/project/project.rb +1 -1
- data/lib/bioroebe/python/README.md +1 -0
- data/lib/bioroebe/python/__pycache__/mymodule.cpython-39.pyc +0 -0
- data/lib/bioroebe/python/gui/gtk3/all_in_one.css +4 -0
- data/lib/bioroebe/python/gui/gtk3/all_in_one.py +59 -0
- data/lib/bioroebe/python/gui/gtk3/widget1.py +20 -0
- data/lib/bioroebe/python/gui/tkinter/all_in_one.py +91 -0
- data/lib/bioroebe/python/mymodule.py +8 -0
- data/lib/bioroebe/python/protein_to_dna.py +33 -0
- data/lib/bioroebe/python/shell/shell.py +19 -0
- data/lib/bioroebe/python/to_rna.py +14 -0
- data/lib/bioroebe/python/toplevel_methods/convert_dna_to_aminoacid_sequence.py +137 -0
- data/lib/bioroebe/python/toplevel_methods/esystem.py +12 -0
- data/lib/bioroebe/python/toplevel_methods/open_in_browser.py +20 -0
- data/lib/bioroebe/python/toplevel_methods/palindromes.py +52 -0
- data/lib/bioroebe/python/toplevel_methods/rds.py +13 -0
- data/lib/bioroebe/python/toplevel_methods/shuffleseq.py +23 -0
- data/lib/bioroebe/python/toplevel_methods/three_delimiter.py +37 -0
- data/lib/bioroebe/python/toplevel_methods/time_and_date.py +43 -0
- data/lib/bioroebe/python/toplevel_methods/to_camelcase.py +21 -0
- data/lib/bioroebe/requires/require_cleave_and_digest.rb +3 -1
- data/lib/bioroebe/requires/require_the_bioroebe_project.rb +3 -1
- data/lib/bioroebe/sequence/alignment.rb +14 -4
- data/lib/bioroebe/sequence/dna.rb +1 -0
- data/lib/bioroebe/sequence/nucleotide_module/nucleotide_module.rb +28 -25
- data/lib/bioroebe/sequence/protein.rb +105 -3
- data/lib/bioroebe/sequence/rna.rb +220 -0
- data/lib/bioroebe/sequence/sequence.rb +128 -40
- data/lib/bioroebe/shell/menu.rb +3815 -3696
- data/lib/bioroebe/shell/misc.rb +9019 -3133
- data/lib/bioroebe/shell/readline/readline.rb +1 -1
- data/lib/bioroebe/shell/shell.rb +1137 -28
- data/lib/bioroebe/siRNA/siRNA.rb +81 -1
- data/lib/bioroebe/string_matching/find_longest_substring.rb +3 -2
- data/lib/bioroebe/string_matching/hamming_distance.rb +1 -9
- data/lib/bioroebe/taxonomy/class_methods.rb +3 -8
- data/lib/bioroebe/taxonomy/constants.rb +4 -3
- data/lib/bioroebe/taxonomy/edit.rb +2 -1
- data/lib/bioroebe/taxonomy/help/help.rb +10 -10
- data/lib/bioroebe/taxonomy/help/helpline.rb +2 -2
- data/lib/bioroebe/taxonomy/info/check_available.rb +15 -9
- data/lib/bioroebe/taxonomy/info/info.rb +18 -11
- data/lib/bioroebe/taxonomy/info/is_dna.rb +46 -36
- data/lib/bioroebe/taxonomy/interactive.rb +140 -104
- data/lib/bioroebe/taxonomy/menu.rb +27 -18
- data/lib/bioroebe/taxonomy/parse_fasta.rb +3 -1
- data/lib/bioroebe/taxonomy/shared.rb +1 -0
- data/lib/bioroebe/taxonomy/taxonomy.rb +1 -0
- data/lib/bioroebe/toplevel_methods/aminoacids_and_proteins.rb +31 -24
- data/lib/bioroebe/toplevel_methods/colourize_related_methods.rb +164 -0
- data/lib/bioroebe/toplevel_methods/databases.rb +1 -1
- data/lib/bioroebe/toplevel_methods/digest.rb +18 -8
- data/lib/bioroebe/toplevel_methods/fasta_and_fastq.rb +107 -63
- data/lib/bioroebe/toplevel_methods/file_and_directory_related_actions.rb +14 -2
- data/lib/bioroebe/toplevel_methods/frequencies.rb +8 -1
- data/lib/bioroebe/toplevel_methods/misc.rb +175 -11
- data/lib/bioroebe/toplevel_methods/nucleotides.rb +118 -46
- data/lib/bioroebe/toplevel_methods/open_in_browser.rb +2 -0
- data/lib/bioroebe/toplevel_methods/palindromes.rb +75 -47
- data/lib/bioroebe/toplevel_methods/taxonomy.rb +3 -3
- data/lib/bioroebe/toplevel_methods/to_camelcase.rb +5 -0
- data/lib/bioroebe/utility_scripts/align_open_reading_frames.rb +1 -9
- data/lib/bioroebe/utility_scripts/check_for_mismatches/check_for_mismatches.rb +1 -9
- data/lib/bioroebe/utility_scripts/compacter/compacter.rb +251 -0
- data/lib/bioroebe/utility_scripts/compseq/compseq.rb +1 -9
- data/lib/bioroebe/utility_scripts/consensus_sequence.rb +6 -6
- data/lib/bioroebe/utility_scripts/create_batch_entrez_file.rb +1 -9
- data/lib/bioroebe/utility_scripts/dot_alignment.rb +1 -9
- data/lib/bioroebe/utility_scripts/move_file_to_its_correct_location.rb +1 -4
- data/lib/bioroebe/utility_scripts/parse_taxonomy.rb +2 -2
- data/lib/bioroebe/utility_scripts/permutations.rb +36 -9
- data/lib/bioroebe/utility_scripts/showorf/constants.rb +0 -5
- data/lib/bioroebe/utility_scripts/showorf/reset.rb +1 -4
- data/lib/bioroebe/version/version.rb +2 -2
- data/lib/bioroebe/www/embeddable_interface.rb +121 -58
- data/lib/bioroebe/www/sinatra/sinatra.rb +186 -71
- data/lib/bioroebe/yaml/aminoacids/amino_acids_long_name_to_one_letter.yml +2 -2
- data/lib/bioroebe/yaml/aminoacids/weight_of_common_proteins.yml +17 -17
- data/lib/bioroebe/yaml/configuration/browser.yml +1 -1
- data/lib/bioroebe/yaml/configuration/temp_dir.yml +1 -1
- data/lib/bioroebe/yaml/consensus_sequences/consensus_sequences.yml +1 -0
- data/lib/bioroebe/yaml/genomes/README.md +3 -4
- data/lib/bioroebe/yaml/nucleotides/nucleotides.yml +5 -0
- data/lib/bioroebe/yaml/restriction_enzymes/restriction_enzymes.yml +57 -57
- data/spec/README.md +6 -0
- data/spec/project_wide_specification/classes.md +5 -0
- metadata +107 -70
- data/doc/setup.rb +0 -1655
- data/lib/bioroebe/fasta_and_fastq/parse_fasta/constants.rb +0 -50
- data/lib/bioroebe/fasta_and_fastq/parse_fasta/initialize.rb +0 -86
- data/lib/bioroebe/fasta_and_fastq/parse_fasta/menu.rb +0 -117
- data/lib/bioroebe/fasta_and_fastq/parse_fasta/misc.rb +0 -981
- data/lib/bioroebe/fasta_and_fastq/parse_fasta/report.rb +0 -156
- data/lib/bioroebe/fasta_and_fastq/parse_fasta/reset.rb +0 -128
- data/lib/bioroebe/genbank/genbank_parser.rb +0 -291
- data/lib/bioroebe/java/bioroebe/AllInOne.class +0 -0
- data/lib/bioroebe/java/bioroebe/Cat.class +0 -0
- data/lib/bioroebe/java/bioroebe/Codons.java +0 -22
- data/lib/bioroebe/java/bioroebe/IsPalindrome.class +0 -0
- data/lib/bioroebe/java/bioroebe/PartnerNucleotide.class +0 -0
- data/lib/bioroebe/java/bioroebe/PartnerNucleotide.java +0 -19
- data/lib/bioroebe/java/bioroebe/SanitizeNucleotideSequence.class +0 -0
- data/lib/bioroebe/java/bioroebe/ToplevelMethods.class +0 -0
- data/lib/bioroebe/java/bioroebe.jar +0 -0
- data/lib/bioroebe/shell/add.rb +0 -108
- data/lib/bioroebe/shell/assign.rb +0 -360
- data/lib/bioroebe/shell/chop_and_cut.rb +0 -281
- data/lib/bioroebe/shell/constants.rb +0 -166
- data/lib/bioroebe/shell/download.rb +0 -335
- data/lib/bioroebe/shell/enable_and_disable.rb +0 -158
- data/lib/bioroebe/shell/enzymes.rb +0 -310
- data/lib/bioroebe/shell/fasta.rb +0 -345
- data/lib/bioroebe/shell/gtk.rb +0 -76
- data/lib/bioroebe/shell/history.rb +0 -132
- data/lib/bioroebe/shell/initialize.rb +0 -217
- data/lib/bioroebe/shell/loop.rb +0 -74
- data/lib/bioroebe/shell/prompt.rb +0 -107
- data/lib/bioroebe/shell/random.rb +0 -289
- data/lib/bioroebe/shell/reset.rb +0 -335
- data/lib/bioroebe/shell/scan_and_parse.rb +0 -135
- data/lib/bioroebe/shell/search.rb +0 -337
- data/lib/bioroebe/shell/sequences.rb +0 -200
- data/lib/bioroebe/shell/show_report_and_display.rb +0 -2901
- data/lib/bioroebe/shell/startup.rb +0 -127
- data/lib/bioroebe/shell/taxonomy.rb +0 -14
- data/lib/bioroebe/shell/tk.rb +0 -23
- data/lib/bioroebe/shell/user_input.rb +0 -88
- data/lib/bioroebe/shell/xorg.rb +0 -45
- data/lib/bioroebe/utility_scripts/compacter.rb +0 -131
- /data/lib/bioroebe/java/bioroebe/{BisulfiteTreatment.java → src/BisulfiteTreatment.java} +0 -0
- /data/lib/bioroebe/java/bioroebe/{RemoveFile.class → src/RemoveFile.class} +0 -0
@@ -1,2901 +0,0 @@
|
|
1
|
-
#!/usr/bin/ruby -w
|
2
|
-
# Encoding: UTF-8
|
3
|
-
# frozen_string_literal: true
|
4
|
-
# =========================================================================== #
|
5
|
-
# require 'bioroebe/shell/show_report_and_display.rb'
|
6
|
-
# =========================================================================== #
|
7
|
-
module Bioroebe
|
8
|
-
|
9
|
-
class Shell < ::Bioroebe::CommandlineApplication
|
10
|
-
|
11
|
-
require 'bioroebe/shell/search.rb'
|
12
|
-
require 'bioroebe/codons/show_codon_usage.rb'
|
13
|
-
require 'bioroebe/codons/show_this_codon_table.rb'
|
14
|
-
require 'bioroebe/count/count_amount_of_aminoacids.rb'
|
15
|
-
|
16
|
-
# ========================================================================= #
|
17
|
-
# === report_main_sequence
|
18
|
-
#
|
19
|
-
# We will call dna_with_ends() here in this method. The argument colourize will
|
20
|
-
# determine whether we will colourize the DNA strand or not.
|
21
|
-
#
|
22
|
-
# Invocation examples:
|
23
|
-
#
|
24
|
-
# report_main_sequence(::Bioroebe.start_codon?)
|
25
|
-
# report_main_sequence(:start_codon) # ← is the same as the ^^^ above
|
26
|
-
# report_main_sequence(:stop_codon) # ← Colourize the stop-codons.
|
27
|
-
#
|
28
|
-
# ========================================================================= #
|
29
|
-
def report_main_sequence(
|
30
|
-
colourize = nil,
|
31
|
-
input = dna_sequence_as_string?
|
32
|
-
)
|
33
|
-
case colourize
|
34
|
-
# ======================================================================= #
|
35
|
-
# === :stop_codon
|
36
|
-
#
|
37
|
-
# We attempt to colourize the stop-codons via this method.
|
38
|
-
# ======================================================================= #
|
39
|
-
when :stop_codon
|
40
|
-
colourize = stop_codons?
|
41
|
-
# ======================================================================= #
|
42
|
-
# === :stop_codon_in_frame1
|
43
|
-
# ======================================================================= #
|
44
|
-
when :stop_codon_in_frame1
|
45
|
-
new_string = remove_trailing_escape_code(
|
46
|
-
colour_for_nucleotides(
|
47
|
-
''.dup
|
48
|
-
).dup
|
49
|
-
).dup
|
50
|
-
scanned = input.scan(/.../)
|
51
|
-
scanned.each {|codon|
|
52
|
-
if is_a_stop_codon? codon
|
53
|
-
new_string << colour_for_stop_codon(codon.dup).dup+
|
54
|
-
remove_trailing_escape_code(
|
55
|
-
colour_for_nucleotides
|
56
|
-
)
|
57
|
-
else
|
58
|
-
new_string << codon.dup
|
59
|
-
end
|
60
|
-
}
|
61
|
-
e padding?+
|
62
|
-
rev+
|
63
|
-
leading_five_prime+
|
64
|
-
new_string+
|
65
|
-
rev+
|
66
|
-
trailing_three_prime
|
67
|
-
return
|
68
|
-
# ======================================================================= #
|
69
|
-
# === :start_codon
|
70
|
-
# ======================================================================= #
|
71
|
-
when :start_codon # Instruction to use a start codon here.
|
72
|
-
colourize = start_codon?
|
73
|
-
# ======================================================================= #
|
74
|
-
# === :start_and_stop_codon
|
75
|
-
# ======================================================================= #
|
76
|
-
when :start_and_stop_codon
|
77
|
-
colourize = [start_codon?, stop_codons?]
|
78
|
-
end
|
79
|
-
# ======================================================================= #
|
80
|
-
# The old code was:
|
81
|
-
# erev padding?+
|
82
|
-
# dna_with_ends(input, colourize) { :honour_coding_area_if_it_exists } # The dna_with_ends() method can deal with Arrays.
|
83
|
-
# This is now mostly ported (April 2020), but the :honour_coding_area_if_it_exists
|
84
|
-
# is not yet ported, so the above code will remain as-is, for the time
|
85
|
-
# being.
|
86
|
-
# ======================================================================= #
|
87
|
-
show_nucleotide_sequence?.report_this_sequence(input) {{
|
88
|
-
padding_to_use: padding?,
|
89
|
-
colourize_this_subsequence: colourize
|
90
|
-
}}
|
91
|
-
end; alias show_main_string report_main_sequence # === show_main_string
|
92
|
-
alias show_main_sequence report_main_sequence # === show_main_sequence
|
93
|
-
alias show_colourized_sequence report_main_sequence # === show_colourized_sequence
|
94
|
-
alias show_dna_sequence report_main_sequence # === show_dna_sequence
|
95
|
-
|
96
|
-
# ========================================================================= #
|
97
|
-
# === show_composition
|
98
|
-
#
|
99
|
-
# This method will analyse the DNA string composition.
|
100
|
-
#
|
101
|
-
# Invocation example:
|
102
|
-
#
|
103
|
-
# scompo
|
104
|
-
#
|
105
|
-
# ========================================================================= #
|
106
|
-
def show_composition(
|
107
|
-
i = dna_string?
|
108
|
-
)
|
109
|
-
length = i.size
|
110
|
-
report_size_of_main_string
|
111
|
-
hash = ::Bioroebe::CountAmountOfNucleotides.show_composition(i) # bl count_nucleotides
|
112
|
-
erev 'Showing how many of the '+steelblue('four nucleotides')+rev+
|
113
|
-
' are in that sequence (absolute numbers):'
|
114
|
-
print ' '
|
115
|
-
string = ''.dup
|
116
|
-
hash.each_pair {|nucleotide, n_times|
|
117
|
-
string << "#{nucleotide}: #{lightslategray(n_times.to_s)}#{rev}, "
|
118
|
-
}
|
119
|
-
e string.rstrip.chop # .chop() to get rid of the last ',' token.
|
120
|
-
erev "The respective frequencies derived from these absolute "\
|
121
|
-
"numbers, #{steelblue('in percent')}#{rev}"\
|
122
|
-
", are:"
|
123
|
-
print ' '
|
124
|
-
hash.each_pair {|nucleotide, n_times|
|
125
|
-
percentage = (n_times.to_f * 100 / length).round(2).to_s
|
126
|
-
print "#{rev}#{nucleotide}: #{orange(percentage)}#{rev}% "
|
127
|
-
}; erev
|
128
|
-
end
|
129
|
-
|
130
|
-
# ========================================================================= #
|
131
|
-
# === show_codon_usage
|
132
|
-
#
|
133
|
-
# This shows the codon usage of the string.
|
134
|
-
# ========================================================================= #
|
135
|
-
def show_codon_usage(
|
136
|
-
i = dna_sequence_as_string?
|
137
|
-
)
|
138
|
-
if i.is_a? Array
|
139
|
-
if i.empty?
|
140
|
-
i = dna_sequence_as_string?
|
141
|
-
else
|
142
|
-
i = i.flatten.compact.join
|
143
|
-
end
|
144
|
-
end
|
145
|
-
::Bioroebe::ShowCodonUsage.new(i)
|
146
|
-
end
|
147
|
-
|
148
|
-
# ========================================================================= #
|
149
|
-
# === show_all_codon_tables
|
150
|
-
#
|
151
|
-
# We used to tap into the Bio::CodonTable here for this part.
|
152
|
-
#
|
153
|
-
# But since some time, we no longer depend on this part - we
|
154
|
-
# have made available all of this in yaml files.
|
155
|
-
#
|
156
|
-
# The argument to this method can either be:
|
157
|
-
#
|
158
|
-
# :everything
|
159
|
-
# :only_names
|
160
|
-
#
|
161
|
-
# The first one is the default. This means that we will show everything.
|
162
|
-
#
|
163
|
-
# The second version is useful if you only what to report the names
|
164
|
-
# of the codon table in question. Several aliases exist for the
|
165
|
-
# second invocation.
|
166
|
-
# ========================================================================= #
|
167
|
-
def show_all_codon_tables(
|
168
|
-
show_what = :everything
|
169
|
-
)
|
170
|
-
unless Bioroebe.const_defined? :ShowCodonTables
|
171
|
-
require 'bioroebe/codons/show_codon_tables.rb'
|
172
|
-
end
|
173
|
-
e
|
174
|
-
::Bioroebe::ShowCodonTables.new(show_what)
|
175
|
-
e
|
176
|
-
end
|
177
|
-
|
178
|
-
# ========================================================================= #
|
179
|
-
# === report_n_start_codons
|
180
|
-
#
|
181
|
-
# Use this method to count how many ATG codons we have. We will honour
|
182
|
-
# the default start_codon in use.
|
183
|
-
#
|
184
|
-
# The third argument determines which reading frame is to be used. By
|
185
|
-
# default, the method will use the first reading frame.
|
186
|
-
# ========================================================================= #
|
187
|
-
def report_n_start_codons(
|
188
|
-
this_string = string?,
|
189
|
-
use_this_as_start_codon = ::Bioroebe.start_codon?, # Use the proper start codon.
|
190
|
-
in_which_frame = :frame1
|
191
|
-
)
|
192
|
-
# ======================================================================= #
|
193
|
-
# === Handle blocks next
|
194
|
-
# ======================================================================= #
|
195
|
-
if block_given?
|
196
|
-
yielded = yield
|
197
|
-
case yielded
|
198
|
-
when /^frame/
|
199
|
-
in_which_frame = yielded.to_sym
|
200
|
-
end
|
201
|
-
end
|
202
|
-
# ======================================================================= #
|
203
|
-
# The following can be invoked via:
|
204
|
-
# n_ORF? frame1
|
205
|
-
# ======================================================================= #
|
206
|
-
case in_which_frame
|
207
|
-
when :frame1
|
208
|
-
in_which_frame = 'frame 1'
|
209
|
-
when :frame2
|
210
|
-
in_which_frame = 'frame 2'
|
211
|
-
when :frame3
|
212
|
-
in_which_frame = 'frame 3'
|
213
|
-
end
|
214
|
-
n_start_codons = this_string.upcase.scan(/#{use_this_as_start_codon}/).size.to_s
|
215
|
-
# ======================================================================= #
|
216
|
-
# The above is not yet in the proper frame, though.
|
217
|
-
# ======================================================================= #
|
218
|
-
trailing_message = " Initiation Codons "\
|
219
|
-
"(in #{orangered(in_which_frame)}#{rev})."
|
220
|
-
erev "Our main string has #{sfancy(n_start_codons)}#{rev}"\
|
221
|
-
" #{simp(use_this_as_start_codon)}#{rev} ("\
|
222
|
-
"#{use_this_as_start_codon.tr('T','U')})"+
|
223
|
-
trailing_message
|
224
|
-
if coding_area? # This has been user-supplied in that case.
|
225
|
-
erev 'However had, only the nucleotides from position'
|
226
|
-
erev "#{sfancy(coding_area?.to_s.split('..').first.to_s)}#{rev}"\
|
227
|
-
" to position #{sfancy(coding_area?.to_s.split('..').last.to_s)}"\
|
228
|
-
"#{rev} will be colourized."
|
229
|
-
end
|
230
|
-
end
|
231
|
-
|
232
|
-
# ========================================================================= #
|
233
|
-
# === show_human_genome_version
|
234
|
-
#
|
235
|
-
# Use this method to show the most current human genome version.
|
236
|
-
# ========================================================================= #
|
237
|
-
def show_human_genome_version
|
238
|
-
human_genome_version = '' # Default.
|
239
|
-
remote_URL = 'https://www.ensembl.org/Homo_sapiens/Info/Index'
|
240
|
-
dataset = URI.open(remote_url).read
|
241
|
-
use_this_regex = /Genome assembly: (.{1,11}\.p\d+) <small>/ # See: https://rubular.com/r/DD5FhaPs3b
|
242
|
-
scanned = dataset.scan(use_this_regex).flatten
|
243
|
-
human_genome_version = scanned.first.to_s
|
244
|
-
erev "The most current human genome version is: "\
|
245
|
-
"#{sfancy(human_genome_version)}"
|
246
|
-
erev "The URL that was used to query this has been: "\
|
247
|
-
"#{steelblue(remote_URL)}"
|
248
|
-
end
|
249
|
-
|
250
|
-
# ========================================================================= #
|
251
|
-
# === show_oligo_length_three
|
252
|
-
#
|
253
|
-
# We align in chunks of three and tell the user how often we can find
|
254
|
-
# these individual codons.
|
255
|
-
#
|
256
|
-
# Invocation example:
|
257
|
-
#
|
258
|
-
# random 99; oligo_3
|
259
|
-
#
|
260
|
-
# ========================================================================= #
|
261
|
-
def show_oligo_length_three(
|
262
|
-
sequence = dna_sequence_object?
|
263
|
-
)
|
264
|
-
sequence = sequence.upcase # This is the sequence that will be scanned.
|
265
|
-
dna = ::Bioroebe.dna? # This is equal to A, T, C and G.
|
266
|
-
erev 'We will align the nucleotides in chunks of 3 and show their '\
|
267
|
-
'frequency.'
|
268
|
-
dna.each {|first_entry| # First nucleotide.
|
269
|
-
dna.each {|second_entry| # Second nucleotide.
|
270
|
-
dna.each {|third_entry| # Third nucleotide.
|
271
|
-
_ = first_entry+second_entry+third_entry
|
272
|
-
erev _+' '+sequence.scan(_).size.to_s
|
273
|
-
}
|
274
|
-
}
|
275
|
-
}
|
276
|
-
end
|
277
|
-
|
278
|
-
# ========================================================================= #
|
279
|
-
# === show_oligo_length_two
|
280
|
-
#
|
281
|
-
# Show all oligo of length two.
|
282
|
-
# ========================================================================= #
|
283
|
-
def show_oligo_length_two(
|
284
|
-
string = string?
|
285
|
-
)
|
286
|
-
sequence = string.upcase # Shorter copy and always upcased.
|
287
|
-
dna = ::Bioroebe.dna?
|
288
|
-
dna.each {|first_entry|
|
289
|
-
dna.each {|second_entry|
|
290
|
-
_ = "#{first_entry}#{second_entry}"
|
291
|
-
erev _+' '+sequence.scan(_).size.to_s
|
292
|
-
}
|
293
|
-
}
|
294
|
-
end
|
295
|
-
|
296
|
-
# ========================================================================= #
|
297
|
-
# === show_position_for_the_main_sequence
|
298
|
-
# ========================================================================= #
|
299
|
-
def show_position_for_the_main_sequence
|
300
|
-
array = sequence?.scan(/.{,25}/)
|
301
|
-
index_position = 1
|
302
|
-
array.each {|entry|
|
303
|
-
unless entry.empty?
|
304
|
-
erev entry.split(//).join(' ')
|
305
|
-
second_line = ''
|
306
|
-
start = index_position
|
307
|
-
index_position += entry.size
|
308
|
-
start.upto(index_position-1) {|position|
|
309
|
-
second_line << position.to_s.ljust(4)
|
310
|
-
}
|
311
|
-
erev cadetblue(second_line)+rev
|
312
|
-
e
|
313
|
-
end
|
314
|
-
}
|
315
|
-
end
|
316
|
-
|
317
|
-
# ========================================================================= #
|
318
|
-
# === report_this_input_was_not_found
|
319
|
-
#
|
320
|
-
# This method is used to notify the user that a certain input was
|
321
|
-
# not found.
|
322
|
-
# ========================================================================= #
|
323
|
-
def report_this_input_was_not_found(
|
324
|
-
i = ''
|
325
|
-
)
|
326
|
-
unless i.empty?
|
327
|
-
erev "Input `#{sfancy(i.to_s)}#{rev}` was not "\
|
328
|
-
"found to be a valid input for the BioShell."
|
329
|
-
end
|
330
|
-
end
|
331
|
-
|
332
|
-
# ========================================================================= #
|
333
|
-
# === show_local_sequences
|
334
|
-
#
|
335
|
-
# This method will show the available local sequences.
|
336
|
-
# ========================================================================= #
|
337
|
-
def show_local_sequences
|
338
|
-
possible_matches = return_fasta_files_in_the_log_directory
|
339
|
-
if possible_matches.empty?
|
340
|
-
erev 'No local fasta sequences could be found.'
|
341
|
-
else
|
342
|
-
e
|
343
|
-
erev 'The following local sequences were found in '\
|
344
|
-
'the main log'
|
345
|
-
erev 'directory ('+sdir(log_dir?)+rev+').'
|
346
|
-
e
|
347
|
-
possible_matches.each_with_index {|entry, index|
|
348
|
-
index += 1
|
349
|
-
_ = possible_matches.size.to_s.size
|
350
|
-
erev padding?+'('+index.to_s.rjust(_)+') '+rev+
|
351
|
-
sfile(File.basename(entry))+rev
|
352
|
-
}; e
|
353
|
-
end
|
354
|
-
end
|
355
|
-
|
356
|
-
# ========================================================================= #
|
357
|
-
# === show_nucleotide_sequence?
|
358
|
-
# ========================================================================= #
|
359
|
-
def show_nucleotide_sequence?
|
360
|
-
@internal_hash[:show_nucleotide_sequence]
|
361
|
-
end; alias display_nucleotide_object? show_nucleotide_sequence? # === display_nucleotide_object?
|
362
|
-
|
363
|
-
# ========================================================================= #
|
364
|
-
# === show_sequence_with_a_ruler
|
365
|
-
#
|
366
|
-
# This will show the main sequence together with a "ruler" on top.
|
367
|
-
#
|
368
|
-
# The first argument specifies how many nucleotides are to be displayed
|
369
|
-
# per given line.
|
370
|
-
#
|
371
|
-
# This method can also be called in this way:
|
372
|
-
#
|
373
|
-
# show_sequence_with_a_ruler { :without_colours }
|
374
|
-
#
|
375
|
-
# This will skip showing the ruler.
|
376
|
-
# ========================================================================= #
|
377
|
-
def show_sequence_with_a_ruler(
|
378
|
-
group_together_n_nucleotides = :default,
|
379
|
-
use_this_sequence = main_sequence?
|
380
|
-
)
|
381
|
-
if group_together_n_nucleotides.is_a?(Array)
|
382
|
-
group_together_n_nucleotides = group_together_n_nucleotides.first
|
383
|
-
if group_together_n_nucleotides.nil? or group_together_n_nucleotides.empty?
|
384
|
-
group_together_n_nucleotides = :default
|
385
|
-
end
|
386
|
-
end
|
387
|
-
case group_together_n_nucleotides
|
388
|
-
# ======================================================================= #
|
389
|
-
# === :default
|
390
|
-
# ======================================================================= #
|
391
|
-
when :default,
|
392
|
-
nil
|
393
|
-
group_together_n_nucleotides = 70
|
394
|
-
end
|
395
|
-
if group_together_n_nucleotides.is_a? String
|
396
|
-
# ===================================================================== #
|
397
|
-
# We need an Integer here.
|
398
|
-
# ===================================================================== #
|
399
|
-
group_together_n_nucleotides = group_together_n_nucleotides.to_i
|
400
|
-
end
|
401
|
-
e
|
402
|
-
e "Displaying the main sequence (length: #{use_this_sequence.to_s.size}) "\
|
403
|
-
"in a chunk of #{slateblue(group_together_n_nucleotides.to_s)}#{rev}"\
|
404
|
-
" nucleotides/\naminoacids next."
|
405
|
-
e
|
406
|
-
use_this_sequence = use_this_sequence.to_s
|
407
|
-
chunks = use_this_sequence.split(/(.{#{group_together_n_nucleotides}})/).reject(&:empty?)
|
408
|
-
array = chunks.each_slice(group_together_n_nucleotides).to_a.flatten #.join.split("\n")
|
409
|
-
use_this_ruler_type = :show_ruler # Note that :show_ruler is the default.
|
410
|
-
# ======================================================================= #
|
411
|
-
# === Handle blocks given next
|
412
|
-
# ======================================================================= #
|
413
|
-
if block_given?
|
414
|
-
yielded = yield
|
415
|
-
case yielded
|
416
|
-
# ===================================================================== #
|
417
|
-
# === :without_colours
|
418
|
-
# ===================================================================== #
|
419
|
-
when :without_colours
|
420
|
-
use_this_ruler_type = :without_colours
|
421
|
-
end
|
422
|
-
end
|
423
|
-
array.each {|sequence|
|
424
|
-
show_nucleotide_sequence?.display_with_prior_formatting(sequence) {
|
425
|
-
use_this_ruler_type
|
426
|
-
}
|
427
|
-
e
|
428
|
-
}
|
429
|
-
end
|
430
|
-
|
431
|
-
# ========================================================================= #
|
432
|
-
# === dna_with_ends
|
433
|
-
#
|
434
|
-
# Display DNA with proper ends.
|
435
|
-
#
|
436
|
-
# The first argument should be the string that we will colourize.
|
437
|
-
#
|
438
|
-
# If the second argument is given (`optional_colourize`), then this
|
439
|
-
# method will colourize the sequence at certain positions. This
|
440
|
-
# can be useful to display, for instance, restriction-sites.
|
441
|
-
# ========================================================================= #
|
442
|
-
def dna_with_ends(
|
443
|
-
i = dna_sequence_as_string?,
|
444
|
-
optional_colourize = nil,
|
445
|
-
colourize_everything = true
|
446
|
-
)
|
447
|
-
i.upcase! if config?.respond_to?(:upcase_nucleotides) and config?.upcase_nucleotides
|
448
|
-
if optional_colourize.is_a? String
|
449
|
-
optional_colourize = [optional_colourize]
|
450
|
-
end
|
451
|
-
if block_given?
|
452
|
-
yielded = yield
|
453
|
-
case yielded
|
454
|
-
# ===================================================================== #
|
455
|
-
# === :honour_coding_area_if_it_exists
|
456
|
-
# ===================================================================== #
|
457
|
-
when :honour_coding_area_if_it_exists
|
458
|
-
if optional_colourize and @internal_hash[:coding_area]
|
459
|
-
# ================================================================= #
|
460
|
-
# We will colourize based on the coding area that was designated.
|
461
|
-
# ================================================================= #
|
462
|
-
_ = @internal_hash[:coding_area]
|
463
|
-
# ================================================================= #
|
464
|
-
# We deduct 1 because ruby Arrays start at 0.
|
465
|
-
# ================================================================= #
|
466
|
-
start_position = _.split('..').first.to_i - 1
|
467
|
-
end_position = _.split('..').last.to_i - 1
|
468
|
-
internal_segment = i[start_position .. end_position]
|
469
|
-
use_this_as_return_string = ''
|
470
|
-
use_this_as_return_string << i[0..(start_position-1)]
|
471
|
-
optional_colourize.each {|inner_entry|
|
472
|
-
internal_segment.gsub!(inner_entry, yellow+inner_entry+rev)
|
473
|
-
}
|
474
|
-
use_this_as_return_string << internal_segment
|
475
|
-
use_this_as_return_string << i[(end_position+1) .. -1]
|
476
|
-
i = use_this_as_return_string
|
477
|
-
elsif optional_colourize
|
478
|
-
# ================================================================= #
|
479
|
-
# Apply all entries given in the Array.
|
480
|
-
# ================================================================= #
|
481
|
-
if optional_colourize.is_a? Array
|
482
|
-
optional_colourize.flatten.each {|inner_entry|
|
483
|
-
i.gsub!(
|
484
|
-
inner_entry, colour_for_stop_codon(inner_entry)+rev
|
485
|
-
) # Colourize in yellow.
|
486
|
-
}
|
487
|
-
else
|
488
|
-
# =================================================================== #
|
489
|
-
# Make sure that we have a String past this point.
|
490
|
-
# =================================================================== #
|
491
|
-
optional_colourize = optional_colourize.to_s
|
492
|
-
if colourize_everything == true
|
493
|
-
i.gsub!(optional_colourize, colour_for_stop_codon(optional_colourize)+rev)
|
494
|
-
else
|
495
|
-
if colourize_everything == 1
|
496
|
-
i.sub!(optional_colourize, colour_for_stop_codon(optional_colourize)+rev)
|
497
|
-
end
|
498
|
-
end
|
499
|
-
end
|
500
|
-
end
|
501
|
-
end
|
502
|
-
else
|
503
|
-
i = "#{sfancy(i)}#{rev}"
|
504
|
-
end
|
505
|
-
# ======================================================================= #
|
506
|
-
# We will report the DNA sequence with leading 5' prime and
|
507
|
-
# trailing 3' prime.
|
508
|
-
# ======================================================================= #
|
509
|
-
return "#{leading_five_prime}#{i}#{trailing_three_prime}"
|
510
|
-
end
|
511
|
-
|
512
|
-
require 'bioroebe/toplevel_methods/matches.rb'
|
513
|
-
# ========================================================================= #
|
514
|
-
# === report_the_first_atg
|
515
|
-
#
|
516
|
-
# This method will simply report the first ATG codon.
|
517
|
-
# ========================================================================= #
|
518
|
-
def report_the_first_atg
|
519
|
-
dna_sequence = dna_sequence_object_as_string?
|
520
|
-
array_matches = ::Bioroebe.return_all_substring_matches(
|
521
|
-
dna_sequence, start_codon?
|
522
|
-
)
|
523
|
-
start_position = array_matches.first.first
|
524
|
-
erev 'The first ATG can be found at position '+
|
525
|
-
simp(start_position.to_s)+rev+'.'
|
526
|
-
erev 'We will next show the first 100 nucleotides, starting from this:'
|
527
|
-
report_five_prime_three_prime(
|
528
|
-
dna_sequence_object?[start_position-1,100]
|
529
|
-
)
|
530
|
-
end
|
531
|
-
|
532
|
-
# ========================================================================= #
|
533
|
-
# === show_aminoacid_sequence
|
534
|
-
#
|
535
|
-
# To show the aminoacid sequence, do:
|
536
|
-
# show_aa
|
537
|
-
# ========================================================================= #
|
538
|
-
def show_aminoacid_sequence
|
539
|
-
erev padding?+
|
540
|
-
aminoacid_sequence? # aminoacids? # Will also use some padding.
|
541
|
-
end
|
542
|
-
|
543
|
-
# ========================================================================= #
|
544
|
-
# === show_dna_string (show string tag, show tag)
|
545
|
-
#
|
546
|
-
# Use this method to show the @sequence, or another string of your
|
547
|
-
# choosing, if you pass it to the method.
|
548
|
-
#
|
549
|
-
# You can also invoke this method with something like this:
|
550
|
-
#
|
551
|
-
# show_string { :with_colourized_separator }
|
552
|
-
#
|
553
|
-
# This means that we will use '|' separators that are colourized.
|
554
|
-
# ========================================================================= #
|
555
|
-
def show_dna_string(
|
556
|
-
this_string = dna_string?,
|
557
|
-
truncate_too_long_result = do_truncate?
|
558
|
-
)
|
559
|
-
result = rev.dup # This is the String that will be returned.
|
560
|
-
case truncate_too_long_result
|
561
|
-
when :do_not_truncate
|
562
|
-
truncate_too_long_result = false
|
563
|
-
end
|
564
|
-
truncate_at_n_elements = TRUNCATE_AT_N_ELEMENTS
|
565
|
-
if this_string.nil?
|
566
|
-
this_string = dna_string? if dna_string?
|
567
|
-
end
|
568
|
-
if this_string.to_s.empty?
|
569
|
-
report_that_a_string_must_be_assigned_first
|
570
|
-
else
|
571
|
-
# this_string.upcase! # Nope, do not upcase here. Use other methods to do so.
|
572
|
-
if mode? == :dna
|
573
|
-
if this_string.size > truncate_at_n_elements # Threshold for now.
|
574
|
-
if truncate_too_long_result or
|
575
|
-
(truncate_too_long_result == :do_not_truncate_and_do_not_show_leader_and_trailer)
|
576
|
-
this_string =
|
577
|
-
this_string[0, truncate_at_n_elements]+
|
578
|
-
swarn(' [TRUNCATED as the sequence '\
|
579
|
-
'is longer than '+truncate_at_n_elements.to_s+' nucleotides]')
|
580
|
-
end
|
581
|
-
end
|
582
|
-
# =================================================================== #
|
583
|
-
# Next, display the main string, without upcasing it.
|
584
|
-
# =================================================================== #
|
585
|
-
if block_given?
|
586
|
-
yielded = yield
|
587
|
-
case yielded
|
588
|
-
when :with_colourized_separator
|
589
|
-
_ = this_string.split(//)
|
590
|
-
str = ''.dup
|
591
|
-
_.each_with_index {|char, index|
|
592
|
-
str << char
|
593
|
-
str << paleturquoise('|')+sfancy if (index+1) % 3 == 0
|
594
|
-
}
|
595
|
-
this_string = str
|
596
|
-
end
|
597
|
-
end
|
598
|
-
if truncate_too_long_result == :do_not_truncate_and_do_not_show_leader_and_trailer
|
599
|
-
else
|
600
|
-
result << padding?+leading_5_prime
|
601
|
-
end
|
602
|
-
# =================================================================== #
|
603
|
-
# Next, add the DNA sequence to the result that will be displayed.
|
604
|
-
# =================================================================== #
|
605
|
-
result << colourize_dna_sequence(this_string)+rev
|
606
|
-
if truncate_too_long_result == :do_not_truncate_and_do_not_show_leader_and_trailer
|
607
|
-
else
|
608
|
-
result << trailing_3_prime
|
609
|
-
end
|
610
|
-
# =================================================================== #
|
611
|
-
# Delegate to class ShowNucleotideSequence next:
|
612
|
-
# =================================================================== #
|
613
|
-
display_nucleotide_sequence(this_string)
|
614
|
-
else # Else use the aminoacid mode.
|
615
|
-
show_aminoacid_sequence
|
616
|
-
end
|
617
|
-
end
|
618
|
-
end; alias show_main_string show_dna_string # === show_main_string
|
619
|
-
alias report_sequence show_dna_string # === report_sequence
|
620
|
-
alias show_sequence show_dna_string # === show_sequence
|
621
|
-
alias show_main_dna_sequence show_dna_string # === show_main_dna_sequence
|
622
|
-
alias show_string show_dna_string # === show_string
|
623
|
-
|
624
|
-
# ========================================================================= #
|
625
|
-
# === report_size_of_main_string
|
626
|
-
# ========================================================================= #
|
627
|
-
def report_size_of_main_string(
|
628
|
-
i = dna_sequence_object?,
|
629
|
-
type_of_string = 'main ' # This is usually the main DNA string.
|
630
|
-
)
|
631
|
-
i = dna_sequence_object? if i.nil?
|
632
|
-
i = dna_sequence_object? if i.is_a?(Array) and i.empty?
|
633
|
-
erev 'The '+type_of_string+'string has '+sfancy(i.size.to_s)+
|
634
|
-
rev+' '+nucleotides_or_aminoacids?+'.'
|
635
|
-
end; alias report_length_of_the_dna_string report_size_of_main_string # === report_length_of_the_dna_string
|
636
|
-
alias report_size_of_this_sequence report_size_of_main_string # === report_size_of_this_sequence
|
637
|
-
|
638
|
-
# ========================================================================= #
|
639
|
-
# === show_editor_in_use
|
640
|
-
# ========================================================================= #
|
641
|
-
def show_editor_in_use
|
642
|
-
e MAIN_EDITOR
|
643
|
-
end
|
644
|
-
|
645
|
-
# ========================================================================= #
|
646
|
-
# === show_welcome_message
|
647
|
-
#
|
648
|
-
# Show a little welcome message on startup. This can be disabled of
|
649
|
-
# course.
|
650
|
-
# ========================================================================= #
|
651
|
-
def show_welcome_message
|
652
|
-
unless silent_startup?
|
653
|
-
erev 'Welcome to the Bioroebe::Shell Version '+
|
654
|
-
sfancy(version?.to_s)+
|
655
|
-
rev+
|
656
|
-
', last updated: '+
|
657
|
-
simp(::Bioroebe.last_updated?)+
|
658
|
-
rev+'.'
|
659
|
-
erev 'Type "'+sfancy('help')+rev+'" to get some help.'
|
660
|
-
end
|
661
|
-
end
|
662
|
-
|
663
|
-
# ========================================================================= #
|
664
|
-
# === show_the_weight_of_the_four_individual_nucleotides
|
665
|
-
# ========================================================================= #
|
666
|
-
def show_the_weight_of_the_four_individual_nucleotides
|
667
|
-
e
|
668
|
-
erev ' A: '+adenin?.rjust(10)+' '+
|
669
|
-
palevioletred(weight_of_adenin?)
|
670
|
-
erev ' T: '+thymin?.rjust(10)+' '+
|
671
|
-
palevioletred(weight_of_thymin?)
|
672
|
-
erev ' C: '+cytosin?.rjust(10)+' '+
|
673
|
-
palevioletred(weight_of_cytosin?)
|
674
|
-
erev ' G: '+guanin?.rjust(10)+' '+
|
675
|
-
palevioletred(weight_of_guanin?)
|
676
|
-
e
|
677
|
-
end
|
678
|
-
|
679
|
-
# ========================================================================= #
|
680
|
-
# === show_this_subsequence
|
681
|
-
#
|
682
|
-
# Sometimes we want to show a subsequence. This method helps us to do
|
683
|
-
# so, too.
|
684
|
-
#
|
685
|
-
# The input may be "tainted", e. g. be a String like "12,345" or
|
686
|
-
# "12.345", so this method will have to eliminate the ',' and '.'
|
687
|
-
# characters as well, before converting this String into an
|
688
|
-
# Integer. (It must be an Integer because nucleotide counting
|
689
|
-
# can logically not be a Float.)
|
690
|
-
#
|
691
|
-
# Usage example:
|
692
|
-
#
|
693
|
-
# random 99; [22..33]
|
694
|
-
#
|
695
|
-
# ========================================================================= #
|
696
|
-
def show_this_subsequence(
|
697
|
-
start_position = 1,
|
698
|
-
end_position = 10,
|
699
|
-
work_on_this_sequence = dna_sequence_object?
|
700
|
-
)
|
701
|
-
start_position = start_position.to_s.delete(',.').to_i
|
702
|
-
end_position = end_position.to_s.delete(',.').to_i
|
703
|
-
if start_position < 1
|
704
|
-
erev 'The minimum for the start-position must be 1, so this'
|
705
|
-
erev 'is now treated as one rather than '+start_position.to_s+'.'
|
706
|
-
start_position = 1
|
707
|
-
end
|
708
|
-
if end_position > work_on_this_sequence.size
|
709
|
-
erev 'The sequence is '+slateblue('too long')+rev+' ('+
|
710
|
-
crimson('end_position')+rev+' is '\
|
711
|
-
'at '+sfancy(end_position.to_s)+rev+', '+
|
712
|
-
nucleotides_or_aminoacids?.to_s+' sequence length '\
|
713
|
-
'was: '+sfancy(work_on_this_sequence.size.to_s)+
|
714
|
-
rev+').'
|
715
|
-
erev 'It will be limited next to '+
|
716
|
-
sfancy(work_on_this_sequence.size.to_s)+rev+' in length.'
|
717
|
-
end_position = work_on_this_sequence.size
|
718
|
-
end
|
719
|
-
sequence = work_on_this_sequence.start_end(
|
720
|
-
start_position,
|
721
|
-
end_position
|
722
|
-
)
|
723
|
-
if sequence
|
724
|
-
size = sequence.size.to_s
|
725
|
-
nucleotides_or_aminoacids_or_empty = ''
|
726
|
-
if work_on_this_sequence.respond_to? :nucleotides_or_aminoacids?
|
727
|
-
nucleotides_or_aminoacids_or_empty = work_on_this_sequence.nucleotides_or_aminoacids?.to_s
|
728
|
-
end
|
729
|
-
erev 'Next showing a subsequence, '+
|
730
|
-
nucleotides_or_aminoacids_or_empty+' '+
|
731
|
-
olive(start_position.to_s)+rev+' to '+
|
732
|
-
olive(end_position.to_s)+rev+
|
733
|
-
' (including '+olive(start_position.to_s)+
|
734
|
-
rev+' and '+olive(end_position.to_s)+rev+').'
|
735
|
-
erev 'The length of the fragment will be '+
|
736
|
-
simp(size)+rev+
|
737
|
-
' '+
|
738
|
-
nucleotides_or_aminoacids_or_empty+
|
739
|
-
'.'
|
740
|
-
report_this_dna_sequence_with_proper_trailer_and_leader(sequence) { :try_to_colourize_start_codon }
|
741
|
-
else
|
742
|
-
erev 'This subsequence appears to be invalid '\
|
743
|
-
'(start: '+start_position.to_s+', end: '+end_position.to_s+')'
|
744
|
-
end
|
745
|
-
end
|
746
|
-
|
747
|
-
# ========================================================================= #
|
748
|
-
# === report_where_the_home_directory_can_be_found
|
749
|
-
# ========================================================================= #
|
750
|
-
def report_where_the_home_directory_can_be_found(
|
751
|
-
i = log_dir?
|
752
|
-
)
|
753
|
-
erev 'The "home" directory (actually called the log directory) '\
|
754
|
-
'can be found here:'
|
755
|
-
e
|
756
|
-
e " #{sdir(i)}"
|
757
|
-
e
|
758
|
-
end
|
759
|
-
|
760
|
-
# ========================================================================= #
|
761
|
-
# === show_double_strand
|
762
|
-
# ========================================================================= #
|
763
|
-
def show_both_dna_strands
|
764
|
-
show_main_sequence
|
765
|
-
show_complement(string?, :include_prime_ends)
|
766
|
-
end; alias show_double_strand show_both_dna_strands # === show_double_strand
|
767
|
-
|
768
|
-
# ========================================================================= #
|
769
|
-
# === show_codon_piped_sequence
|
770
|
-
# ========================================================================= #
|
771
|
-
def show_codon_piped_sequence
|
772
|
-
# _ = dna_sequence_object?.gsub(/(...)/, "\\1|") # Add | at every third position.
|
773
|
-
# erev rev+padding?+leading_5_prime+sfancy(_)+rev+trailing_3_prime
|
774
|
-
display_nucleotide_sequence(:default) { :piped }
|
775
|
-
end
|
776
|
-
|
777
|
-
# ========================================================================= #
|
778
|
-
# === show (show tag)
|
779
|
-
#
|
780
|
-
# Bundle together some show-related methods.
|
781
|
-
# ========================================================================= #
|
782
|
-
def show(i)
|
783
|
-
i = i.join(' ').strip if i.is_a? Array
|
784
|
-
case i
|
785
|
-
when 'codon_table','codon','codon table'
|
786
|
-
show_codon_table
|
787
|
-
when 'blosum','blosum matrix','blosum_matrix'
|
788
|
-
show_blosum_matrix
|
789
|
-
when '',nil # Empty or nil.
|
790
|
-
show_dna_string
|
791
|
-
end
|
792
|
-
end
|
793
|
-
|
794
|
-
# ========================================================================= #
|
795
|
-
# === display_nucleotide_sequence
|
796
|
-
#
|
797
|
-
# Consistently use this method whenever you wish to display a
|
798
|
-
# nucleotide sequence.
|
799
|
-
# ========================================================================= #
|
800
|
-
def display_nucleotide_sequence(
|
801
|
-
this_sequence = dna_sequence_object?,
|
802
|
-
&block
|
803
|
-
)
|
804
|
-
case this_sequence
|
805
|
-
when :default
|
806
|
-
this_sequence = dna_sequence_object?
|
807
|
-
end
|
808
|
-
do_show_piped_output = false
|
809
|
-
if block_given?
|
810
|
-
yielded = yield
|
811
|
-
case yielded
|
812
|
-
when :piped,
|
813
|
-
:show_piped
|
814
|
-
do_show_piped_output = true
|
815
|
-
end
|
816
|
-
end
|
817
|
-
hash = {
|
818
|
-
padding_to_use: padding?,
|
819
|
-
show_piped_output: do_show_piped_output
|
820
|
-
}
|
821
|
-
show_nucleotide_sequence?.report_this_sequence(this_sequence) { hash }
|
822
|
-
end; alias display_this_nucleotide_sequence display_nucleotide_sequence # === display_this_nucleotide_sequence
|
823
|
-
alias display_this_sequence display_nucleotide_sequence # === display_this_sequence
|
824
|
-
alias show_this_sequence display_nucleotide_sequence # === show_this_sequence
|
825
|
-
|
826
|
-
# ========================================================================= #
|
827
|
-
# === report_how_many_aminoacids_we_have
|
828
|
-
#
|
829
|
-
# This method will report how many aminoacids we have assigned.
|
830
|
-
# ========================================================================= #
|
831
|
-
def report_how_many_aminoacids_we_have
|
832
|
-
if aminoacids?
|
833
|
-
n_aminoacids = aminoacids?.size
|
834
|
-
else
|
835
|
-
n_aminoacids = dna_sequence_object?.size / 3.0
|
836
|
-
end
|
837
|
-
n_aminoacids = n_aminoacids.to_i
|
838
|
-
erev "This sequence has #{simp(n_aminoacids.to_s)}#{rev} aminoacids."
|
839
|
-
end
|
840
|
-
|
841
|
-
# ========================================================================= #
|
842
|
-
# === show_chromosome_table
|
843
|
-
# ========================================================================= #
|
844
|
-
def show_chromosome_table
|
845
|
-
lpadding_to_use = 16
|
846
|
-
erev 'Chromosome Table from file '+sfile(FILE_CHROMOSOME_NUMBERS)+rev
|
847
|
-
if File.exist? FILE_CHROMOSOME_NUMBERS
|
848
|
-
dataset = YAML.load_file(FILE_CHROMOSOME_NUMBERS)
|
849
|
-
e
|
850
|
-
dataset.each_pair {|key, value|
|
851
|
-
erev " "+key.ljust(lpadding_to_use)+
|
852
|
-
' '+
|
853
|
-
steelblue(value.to_s.rjust(3))
|
854
|
-
}
|
855
|
-
e
|
856
|
-
else
|
857
|
-
no_file_exists_at(FILE_CHROMOSOME_NUMBERS)
|
858
|
-
end
|
859
|
-
end
|
860
|
-
|
861
|
-
# ========================================================================= #
|
862
|
-
# === report_everything_about_this_amino_acid
|
863
|
-
#
|
864
|
-
# Use this method to report everything about any particular amino acid.
|
865
|
-
# ========================================================================= #
|
866
|
-
def report_everything_about_this_amino_acid(i)
|
867
|
-
if i.is_a? Array
|
868
|
-
i.each {|entry| report_everything_about_this_amino_acid(entry) }
|
869
|
-
else
|
870
|
-
i.delete!('?') if i.include? '?'
|
871
|
-
erev 'It seems as is we did find an Amino Acid ('+simp(i)+rev+
|
872
|
-
'). Its characteristic residue (R) is:'+N+N
|
873
|
-
unless AMINO_ACIDS_RESTE.has_key?(i)
|
874
|
-
# =================================================================== #
|
875
|
-
# This here is to map german names, such as "glycin",
|
876
|
-
# onto "glycine", the corresponding english name.
|
877
|
-
# =================================================================== #
|
878
|
-
if AMINO_ACIDS_LONG_NAME_TO_ONE_LETTER.has_key?(i)
|
879
|
-
i = AMINO_ACIDS_LONG_NAME_TO_ONE_LETTER[i]
|
880
|
-
i = AMINO_ACIDS_ENGLISH[i].downcase
|
881
|
-
end
|
882
|
-
end
|
883
|
-
residue = AMINO_ACIDS_RESTE[i.downcase].to_s
|
884
|
-
efancy " #{residue}#{N}"
|
885
|
-
erev 'The codons coding for the aminoacid '+simp(i)+rev+' are:'
|
886
|
-
e
|
887
|
-
e ' '+mediumturquoise(
|
888
|
-
::Bioroebe::PossibleCodonsForThisAminoacid.new(i).pretty_result
|
889
|
-
)
|
890
|
-
e
|
891
|
-
molecular_mass_of(i, 2) # The 2 says to round to 2 digit.
|
892
|
-
end
|
893
|
-
end
|
894
|
-
|
895
|
-
# ========================================================================= #
|
896
|
-
# === report_five_prime_three_prime
|
897
|
-
# ========================================================================= #
|
898
|
-
def report_five_prime_three_prime(i)
|
899
|
-
erev dna_with_ends(i)
|
900
|
-
end
|
901
|
-
|
902
|
-
# ========================================================================= #
|
903
|
-
# === show_startup_information
|
904
|
-
#
|
905
|
-
# This method here will usually be shown only once, on an initial startup
|
906
|
-
# of the Bioroebe::Shell. Afterwards, it will no longer be shown at all.
|
907
|
-
#
|
908
|
-
# Note that showing this can be disabled.
|
909
|
-
# ========================================================================= #
|
910
|
-
def show_startup_information
|
911
|
-
e
|
912
|
-
erev "This seems to be the first time that you are using the "\
|
913
|
-
"#{olivedrab('Bioroebe::Shell')}#{rev}, at the least on"
|
914
|
-
erev 'this computer.'
|
915
|
-
e
|
916
|
-
erev 'It is recommended to have a look at the following components first:'
|
917
|
-
e
|
918
|
-
efancy ' help'
|
919
|
-
efancy ' random'
|
920
|
-
efancy ' assign'
|
921
|
-
efancy ' complement'
|
922
|
-
e
|
923
|
-
erev 'If you want to show this intro-menu again, do:'
|
924
|
-
e
|
925
|
-
efancy ' show-intro'
|
926
|
-
e
|
927
|
-
erev 'You can also see more documentation at:'
|
928
|
-
e
|
929
|
-
e " #{slateblue(URL_TO_THE_DOCUMENTATION)}"
|
930
|
-
e
|
931
|
-
erev 'If you feel that something is missing or incorrect, feel '\
|
932
|
-
'free to send an email to:'
|
933
|
-
e
|
934
|
-
efancy " #{EMAIL}"
|
935
|
-
e
|
936
|
-
end
|
937
|
-
|
938
|
-
require 'bioroebe/colours/colourize_sequence.rb'
|
939
|
-
# ========================================================================= #
|
940
|
-
# === report_colourized_sequence
|
941
|
-
#
|
942
|
-
# This method will use the new class ColourizeSequence, rather than
|
943
|
-
# the old internal way.
|
944
|
-
#
|
945
|
-
# In the long run, it may be best to transition all of the Bioroebe::Shell
|
946
|
-
# into the new class - but for now, we will use a hybrid system.
|
947
|
-
#
|
948
|
-
# To invoke this method, try:
|
949
|
-
#
|
950
|
-
# start_and_stop?
|
951
|
-
#
|
952
|
-
# ========================================================================= #
|
953
|
-
def report_colourized_sequence(
|
954
|
-
colourize_what = :start_and_stop_codon
|
955
|
-
)
|
956
|
-
_ = ColourizeSequence.return_sequence(dna_sequence_object?) { colourize_what }
|
957
|
-
show_nucleotide_sequence?.display(_)
|
958
|
-
e
|
959
|
-
end
|
960
|
-
|
961
|
-
# ========================================================================= #
|
962
|
-
# === show_complement
|
963
|
-
#
|
964
|
-
# If the second argument is true, we pad via 5' and 3'.
|
965
|
-
#
|
966
|
-
# As of Feb 2015, we will try with leading padding as well.
|
967
|
-
# ========================================================================= #
|
968
|
-
def show_complement(
|
969
|
-
i = dna_string?,
|
970
|
-
also_include_prime_ends = false
|
971
|
-
)
|
972
|
-
case also_include_prime_ends
|
973
|
-
# ======================================================================= #
|
974
|
-
# === :show_leading_primes
|
975
|
-
# ======================================================================= #
|
976
|
-
when :show_leading_primes,
|
977
|
-
:include_prime_ends
|
978
|
-
also_include_prime_ends = true
|
979
|
-
end
|
980
|
-
i = dna_string? if i.nil?
|
981
|
-
i = i.join('') if i.is_a? Array
|
982
|
-
if also_include_prime_ends
|
983
|
-
erev padding?+rev+
|
984
|
-
leading_3_prime+
|
985
|
-
sfancy(complement(i))+
|
986
|
-
rev+trailing_5_prime
|
987
|
-
else
|
988
|
-
erev complement(i)
|
989
|
-
end
|
990
|
-
end
|
991
|
-
|
992
|
-
# ========================================================================= #
|
993
|
-
# === show_position_of_sequence
|
994
|
-
#
|
995
|
-
# This currently works only for Amino Acids - at the least I have tested
|
996
|
-
# it only on aminoacids so far, and not on DNA/RNA.
|
997
|
-
# ========================================================================= #
|
998
|
-
def show_position_of_sequence(
|
999
|
-
i = aa_sequence?,
|
1000
|
-
chunk_size = 10 # How many chunks to display per row.
|
1001
|
-
)
|
1002
|
-
array = i.chars
|
1003
|
-
_ = '' # The Display-String.
|
1004
|
-
index_string = ''
|
1005
|
-
0.upto(array.size) {|index|
|
1006
|
-
_ << array[index].to_s.rjust(2)+' '
|
1007
|
-
unless array.size == index
|
1008
|
-
index_string << palevioletred((index+1).to_s.rjust(2)+' ')
|
1009
|
-
end
|
1010
|
-
if index % chunk_size == (chunk_size - 1)
|
1011
|
-
_ << N
|
1012
|
-
_ << index_string << rev << N << N
|
1013
|
-
index_string = ''
|
1014
|
-
end
|
1015
|
-
}
|
1016
|
-
erev _ # Report it finally.
|
1017
|
-
erev index_string
|
1018
|
-
end
|
1019
|
-
|
1020
|
-
# ========================================================================= #
|
1021
|
-
# === show_alu_sequence
|
1022
|
-
#
|
1023
|
-
# Invoke this method by doing something like:
|
1024
|
-
#
|
1025
|
-
# alu_sequence?
|
1026
|
-
#
|
1027
|
-
# ========================================================================= #
|
1028
|
-
def show_alu_sequence
|
1029
|
-
fasta_dataset = ::Bioroebe.parse_fasta(FILE_ALU_ELEMENTS)
|
1030
|
-
_ = fasta_dataset.fasta_sequence
|
1031
|
-
erev 'The ALU sequence in humans may be this (length: '+
|
1032
|
-
sfancy(_.size.to_s)+rev+'):'
|
1033
|
-
erev' '+simp(_)
|
1034
|
-
end
|
1035
|
-
|
1036
|
-
# ========================================================================= #
|
1037
|
-
# === show_possible_codons_for_this_aminoacid
|
1038
|
-
# ========================================================================= #
|
1039
|
-
def show_possible_codons_for_this_aminoacid(i)
|
1040
|
-
possible_codons = PossibleCodonsForThisAminoacid[i,
|
1041
|
-
:use_only_the_four_standard_nucleotide_letters]
|
1042
|
-
@array_aminoacid_sequence << possible_codons
|
1043
|
-
return possible_codons
|
1044
|
-
end
|
1045
|
-
|
1046
|
-
# ========================================================================= #
|
1047
|
-
# === show_date
|
1048
|
-
# ========================================================================= #
|
1049
|
-
def show_date
|
1050
|
-
erev Time.now.strftime('%d.%m.%Y')
|
1051
|
-
end
|
1052
|
-
|
1053
|
-
# ========================================================================= #
|
1054
|
-
# === show_taxid
|
1055
|
-
#
|
1056
|
-
# This method will show the particular TaxID, using the NCBI taxonomy
|
1057
|
-
# database.
|
1058
|
-
#
|
1059
|
-
# The tax-id 9606 is "Homo sapiens".
|
1060
|
-
# ========================================================================= #
|
1061
|
-
def show_taxid(id = 9606)
|
1062
|
-
id = 9606 if id.nil?
|
1063
|
-
id = id.to_s
|
1064
|
-
url = 'http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id='+id+'&lvl=0'
|
1065
|
-
erev 'The remote URL is: '+sfancy(url)
|
1066
|
-
webpage = open(url).read
|
1067
|
-
regex = /^<table width="100%"><tr><td valign="top"><h2>(Homo sapiens)<\/h2>/ # See: http://rubular.com/r/aQK5O8ZfGa
|
1068
|
-
webpage =~ regex
|
1069
|
-
name_of_the_organism = $1.to_s.dup
|
1070
|
-
erev 'The TaxID of '+simp(id)+rev+' corresponds to `'+
|
1071
|
-
sfancy(name_of_the_organism)+rev+'`.'
|
1072
|
-
end
|
1073
|
-
|
1074
|
-
# ========================================================================= #
|
1075
|
-
# === show_nucleotides_table
|
1076
|
-
#
|
1077
|
-
# Use this method to show the nucleotides table - their formula and
|
1078
|
-
# the molecular mass.
|
1079
|
-
# ========================================================================= #
|
1080
|
-
def show_nucleotides_table
|
1081
|
-
array_display_these = %w(
|
1082
|
-
Adenin Cytosin Guanin Thymin
|
1083
|
-
)
|
1084
|
-
# ======================================================================= #
|
1085
|
-
# Grab the nucleotides.yml dataset next
|
1086
|
-
# ======================================================================= #
|
1087
|
-
dataset = YAML.load_file(FILE_NUCLEOTIDES)
|
1088
|
-
dataset.each_pair {|key, chemical_formula|
|
1089
|
-
if array_display_these.include? key # Display it in this case.
|
1090
|
-
molmasse = ChemistryParadise::CalculateAtomicMass.new(chemical_formula, :do_not_report).masse?
|
1091
|
-
molmasse = molmasse.to_f.round(2)
|
1092
|
-
e key.to_s.ljust(8)+' -> '+chemical_formula.to_s.rjust(8)+
|
1093
|
-
rev+' (Molecular mass: '+simp(molmasse.to_s)+')'+rev
|
1094
|
-
end
|
1095
|
-
}
|
1096
|
-
end
|
1097
|
-
|
1098
|
-
# ========================================================================= #
|
1099
|
-
# === show_ori_sequences
|
1100
|
-
#
|
1101
|
-
# The DnaA box is: TTATC[CA]A[CA]A
|
1102
|
-
# ========================================================================= #
|
1103
|
-
def show_ori_sequences
|
1104
|
-
erev 'The DnaA box has this consensus sequence: '+
|
1105
|
-
sfancy("5'-TTATC[CA]A[CA]A-3'")
|
1106
|
-
_ = 'TTATCCACA'
|
1107
|
-
erev 'Searching for '+_
|
1108
|
-
try_to_find_restriction_enzymes_for(_)
|
1109
|
-
_ = 'TTATCAAAA'
|
1110
|
-
erev 'Searching for '+_
|
1111
|
-
try_to_find_restriction_enzymes_for(_)
|
1112
|
-
end
|
1113
|
-
|
1114
|
-
# ========================================================================= #
|
1115
|
-
# === show_segments
|
1116
|
-
#
|
1117
|
-
# This method will show the DNA segments via a R-compatible way.
|
1118
|
-
#
|
1119
|
-
# Usage example:
|
1120
|
-
#
|
1121
|
-
# set AAAATGCAGTAACCCATGCCC; show_segments
|
1122
|
-
#
|
1123
|
-
# ========================================================================= #
|
1124
|
-
def show_segments
|
1125
|
-
array = ::Bioroebe.scan_this_input_for_startcodons(dna_sequence_object?)
|
1126
|
-
erev ' start end width'
|
1127
|
-
array.each_with_index {|inner_array, index|
|
1128
|
-
index += 1
|
1129
|
-
start_position = inner_array.first
|
1130
|
-
codon = inner_array.last.first
|
1131
|
-
erev ' ['+index.to_s+'] '+start_position.to_s.rjust(5)+' '+
|
1132
|
-
(start_position+2).to_s.rjust(5)+' '+'3'.rjust(4)+' ['+codon.downcase+']'
|
1133
|
-
}
|
1134
|
-
end
|
1135
|
-
|
1136
|
-
require 'bioroebe/toplevel_methods/aminoacids_and_proteins.rb'
|
1137
|
-
# ========================================================================= #
|
1138
|
-
# === show_possible_phosphorylation_sites
|
1139
|
-
#
|
1140
|
-
# This method will find all possible phosphorylation sites in any
|
1141
|
-
# given target sequence. It will also identify the aminoacids that
|
1142
|
-
# can be phosphorylated.
|
1143
|
-
#
|
1144
|
-
# To test this, try:
|
1145
|
-
#
|
1146
|
-
# random 250; P?
|
1147
|
-
#
|
1148
|
-
# ========================================================================= #
|
1149
|
-
def show_possible_phosphorylation_sites(i = aminoacid_sequence?)
|
1150
|
-
_ = dna_sequence_object?
|
1151
|
-
array_all_codons = []
|
1152
|
-
array_all_codons << ::Bioroebe.codons_for?(:serine)
|
1153
|
-
array_all_codons << ::Bioroebe.codons_for?(:tyrosine)
|
1154
|
-
array_all_codons << ::Bioroebe.codons_for?(:threonine)
|
1155
|
-
array_all_codons.flatten!
|
1156
|
-
# ======================================================================= #
|
1157
|
-
# === Convert Y into Purine/Pyrimidine next
|
1158
|
-
# ======================================================================= #
|
1159
|
-
if array_all_codons.any? {|entry| entry.end_with? 'Y' }
|
1160
|
-
array_all_codons.map! {|inner_entry|
|
1161
|
-
if inner_entry.end_with? 'Y'
|
1162
|
-
inner_entry = [
|
1163
|
-
inner_entry.sub(/Y$/,'T'),
|
1164
|
-
inner_entry.sub(/Y$/,'C')
|
1165
|
-
]
|
1166
|
-
end
|
1167
|
-
inner_entry
|
1168
|
-
}
|
1169
|
-
array_all_codons.flatten!
|
1170
|
-
end
|
1171
|
-
all_codons_found_in_the_sequence = []
|
1172
|
-
n_phosphorylation_sites = 0
|
1173
|
-
n_phosphorylation_sites =
|
1174
|
-
array_all_codons.map {|entry|
|
1175
|
-
if _.scan(/#{entry}/).size > 0
|
1176
|
-
all_codons_found_in_the_sequence << entry
|
1177
|
-
end
|
1178
|
-
_.scan(/#{entry}/).size }.inject(0){|sum, inner_element| sum + inner_element
|
1179
|
-
}
|
1180
|
-
all_codons_found_in_the_sequence.uniq!
|
1181
|
-
singular_or_plural = 'site'
|
1182
|
-
if n_phosphorylation_sites < 1
|
1183
|
-
singular_or_plural << 's'
|
1184
|
-
end
|
1185
|
-
erev 'In this sequence, we have found '+simp(n_phosphorylation_sites.to_s)+rev+
|
1186
|
-
' possible phosphorylation '+singular_or_plural+', using all '\
|
1187
|
-
'3 possible frames.'
|
1188
|
-
e
|
1189
|
-
erev 'In particular, these '+all_codons_found_in_the_sequence.size.to_s+
|
1190
|
-
' different codons were found: '
|
1191
|
-
e
|
1192
|
-
erev ' '+simp(all_codons_found_in_the_sequence.join('/'))+rev
|
1193
|
-
e
|
1194
|
-
erev 'For the first frame, the start positions are these:'
|
1195
|
-
e
|
1196
|
-
# ======================================================================= #
|
1197
|
-
# === Find the start positions for frame 1 next
|
1198
|
-
# ======================================================================= #
|
1199
|
-
array_start_positions_for_frame_1 = []
|
1200
|
-
scanned_result = _.scan(/.../)
|
1201
|
-
scanned_result.each_with_index {|codon, index|
|
1202
|
-
if all_codons_found_in_the_sequence.include? codon
|
1203
|
-
array_start_positions_for_frame_1 << (index * 3)+1
|
1204
|
-
end
|
1205
|
-
}
|
1206
|
-
erev ' DNA: '+simp(array_start_positions_for_frame_1.join('/'))+rev
|
1207
|
-
erev ' Protein: '+simp(array_start_positions_for_frame_1.map {|entry|
|
1208
|
-
entry = entry.to_i * 3
|
1209
|
-
entry.to_s
|
1210
|
-
}.join('/'))+rev
|
1211
|
-
# ======================================================================= #
|
1212
|
-
# Now modify the DNA sequence there but only in the first frame.
|
1213
|
-
# ======================================================================= #
|
1214
|
-
new_colourized_dna_sequence = ''
|
1215
|
-
all_triplets = _.scan(/.../)
|
1216
|
-
all_triplets.each {|codon|
|
1217
|
-
codon = swarn(codon) if all_codons_found_in_the_sequence.include? codon
|
1218
|
-
new_colourized_dna_sequence << codon+rev
|
1219
|
-
}
|
1220
|
-
e
|
1221
|
-
erev 'The DNA sequence with possible phosphorylation sites is:'
|
1222
|
-
e
|
1223
|
-
erev left_padding?+leading_five_prime+new_colourized_dna_sequence+trailing_three_prime
|
1224
|
-
e
|
1225
|
-
erev 'The Aminoacid sequence with possible phosphorylation sites is:'
|
1226
|
-
e
|
1227
|
-
erev ' '+
|
1228
|
-
::Bioroebe.colourize_aa(i, ARRAY_AMINOACIDS_THAT_CAN_BE_PHOSPHORYLATED).to_s
|
1229
|
-
e
|
1230
|
-
end
|
1231
|
-
|
1232
|
-
# ========================================================================= #
|
1233
|
-
# === show_molweight
|
1234
|
-
# ========================================================================= #
|
1235
|
-
def show_molweight(use_cliner = true)
|
1236
|
-
cliner if use_cliner
|
1237
|
-
MolecularWeightOfNucleotides.weights.each_with_index {|entry, index|
|
1238
|
-
case index
|
1239
|
-
when 0
|
1240
|
-
erev 'Adenine: '+sfancy(entry.to_s)+rev
|
1241
|
-
when 1
|
1242
|
-
erev 'Thymine: '+sfancy(entry.to_s)+rev
|
1243
|
-
when 2
|
1244
|
-
erev 'Guanine: '+sfancy(entry.to_s)+rev
|
1245
|
-
when 3
|
1246
|
-
erev 'Cytosine: '+sfancy(entry.to_s)+rev
|
1247
|
-
end
|
1248
|
-
}; cliner if use_cliner
|
1249
|
-
end
|
1250
|
-
|
1251
|
-
# ========================================================================= #
|
1252
|
-
# === show_weight_of_this_nucleotide
|
1253
|
-
#
|
1254
|
-
# Use this method to show the total weight of a specific nucleotide.
|
1255
|
-
#
|
1256
|
-
# Usage examples:
|
1257
|
-
#
|
1258
|
-
# weight? U
|
1259
|
-
# weight? T
|
1260
|
-
# weight? Adenine
|
1261
|
-
#
|
1262
|
-
# ========================================================================= #
|
1263
|
-
def show_weight_of_this_nucleotide(i)
|
1264
|
-
i = i.to_s
|
1265
|
-
if i.empty?
|
1266
|
-
erev 'Please supply a nucleotide, such as "Adenine" or "A".'
|
1267
|
-
erev 'Note that the short variant is preferred.'
|
1268
|
-
return
|
1269
|
-
end
|
1270
|
-
i = i[0,1] if i.size > 1
|
1271
|
-
_ = FILE_NUCLEOTIDES_WEIGHT # bl /Users/x/DATA/SCIENCE/YAML/nucleotides_weight.yml
|
1272
|
-
if File.exist?(_)
|
1273
|
-
_ = YAML.load_file(_)
|
1274
|
-
dataset = {}
|
1275
|
-
_.each_pair {|key, value|
|
1276
|
-
dataset[key[0,1]] = value
|
1277
|
-
}
|
1278
|
-
if dataset.has_key?(i)
|
1279
|
-
erev 'The weight of '+sfancy(i)+rev+' is: '+
|
1280
|
-
sfancy(
|
1281
|
-
ChemistryParadise.atomic_mass_of(dataset[i])
|
1282
|
-
)
|
1283
|
-
else
|
1284
|
-
erev 'The key `'+sfancy(i)+rev+'` was not found.'
|
1285
|
-
end
|
1286
|
-
else
|
1287
|
-
ewarn 'We did not find a required file at '+sfile(_)+rev+'.'
|
1288
|
-
end
|
1289
|
-
end
|
1290
|
-
|
1291
|
-
# ========================================================================= #
|
1292
|
-
# === show_todo_file
|
1293
|
-
# ========================================================================= #
|
1294
|
-
def show_todo_file
|
1295
|
-
cat '$RUBY_SRC/bioroebe/doc/TODO_FOR_THE_BIOROEBE_PROJECT.md'
|
1296
|
-
end
|
1297
|
-
|
1298
|
-
# ========================================================================= #
|
1299
|
-
# === report_where_the_pdf_tutorial_can_be_found
|
1300
|
-
#
|
1301
|
-
# Do notify the user where to find the .pdf tutorial.
|
1302
|
-
# ========================================================================= #
|
1303
|
-
def report_where_the_pdf_tutorial_can_be_found
|
1304
|
-
_ = File.basename(FILE_BIOROEBE_TUTORIAL)
|
1305
|
-
erev 'You can find the tutorial here:'
|
1306
|
-
e
|
1307
|
-
erev ' '+simp('http://shevegen.square7.ch/'+_)+rev
|
1308
|
-
e
|
1309
|
-
end
|
1310
|
-
|
1311
|
-
# ========================================================================= #
|
1312
|
-
# === show_directory_content
|
1313
|
-
# ========================================================================= #
|
1314
|
-
def show_directory_content(of_this_dir = '*')
|
1315
|
-
of_this_dir.prepend '*' unless of_this_dir.include? '*'
|
1316
|
-
cliner {
|
1317
|
-
Dir[of_this_dir].sort.each_with_index {|entry, index|
|
1318
|
-
index += 1
|
1319
|
-
entry << '/' if File.directory?(entry)
|
1320
|
-
erev index.to_s.rjust(2)+') '+entry
|
1321
|
-
}
|
1322
|
-
}
|
1323
|
-
end
|
1324
|
-
|
1325
|
-
require 'bioroebe/protein_structure/alpha_helix.rb'
|
1326
|
-
# ========================================================================= #
|
1327
|
-
# === show_length_of_alpha_helix
|
1328
|
-
# ========================================================================= #
|
1329
|
-
def show_length_of_alpha_helix(i)
|
1330
|
-
erev ::Bioroebe::AlphaHelix.length?(i)
|
1331
|
-
end
|
1332
|
-
|
1333
|
-
# ========================================================================= #
|
1334
|
-
# === show_and_calculate_weight_of_dna_string
|
1335
|
-
# ========================================================================= #
|
1336
|
-
def show_and_calculate_weight_of_dna_string(
|
1337
|
-
i = dna_sequence_object?
|
1338
|
-
)
|
1339
|
-
i = dna_sequence_object? if i.nil?
|
1340
|
-
i = dna_sequence_object? if is_a? Array and i.empty?
|
1341
|
-
sum = 0
|
1342
|
-
i.upcase.chars.each {|nucleotide|
|
1343
|
-
_ = case nucleotide
|
1344
|
-
when 'A'
|
1345
|
-
weight_of_adenin?
|
1346
|
-
when 'T'
|
1347
|
-
weight_of_thymin?
|
1348
|
-
when 'C'
|
1349
|
-
weight_of_cytosin?
|
1350
|
-
when 'G'
|
1351
|
-
weight_of_guanin?
|
1352
|
-
end
|
1353
|
-
sum += _.to_f
|
1354
|
-
}
|
1355
|
-
# ======================================================================= #
|
1356
|
-
# Round the sum properly here.
|
1357
|
-
# ======================================================================= #
|
1358
|
-
sum = sum.round(2)
|
1359
|
-
erev 'The weight of this nucleotide sequence is: '+
|
1360
|
-
simp(sum.to_s)+rev+' Dalton.'
|
1361
|
-
end
|
1362
|
-
|
1363
|
-
# ========================================================================= #
|
1364
|
-
# === show_name_of_the_gene
|
1365
|
-
# ========================================================================= #
|
1366
|
-
def show_name_of_the_gene
|
1367
|
-
erev 'The name of the gene at hand is: '+
|
1368
|
-
sfancy(sequence_object?.name_of_gene)
|
1369
|
-
end
|
1370
|
-
|
1371
|
-
# ========================================================================= #
|
1372
|
-
# === show_agarose_table
|
1373
|
-
#
|
1374
|
-
# This method will simply show common agarose concentrations.
|
1375
|
-
# ========================================================================= #
|
1376
|
-
def show_agarose_table
|
1377
|
-
hash = load_bioroebe_yaml_file(:agarose)
|
1378
|
-
e
|
1379
|
-
e 'Agarose concentrations:'
|
1380
|
-
e
|
1381
|
-
hash.each_pair {|concentration_of_the_gel, kb_fragment|
|
1382
|
-
erev ' A concentration of '+simp(concentration_of_the_gel.to_s+'%')+
|
1383
|
-
rev+' will separate DNA fragments between '+sfancy(kb_fragment)+
|
1384
|
-
rev+' kb.'
|
1385
|
-
}; e
|
1386
|
-
end
|
1387
|
-
|
1388
|
-
# ========================================================================= #
|
1389
|
-
# === start_codon?
|
1390
|
-
# ========================================================================= #
|
1391
|
-
def start_codon?
|
1392
|
-
::Bioroebe.start_codon?
|
1393
|
-
end
|
1394
|
-
|
1395
|
-
# ========================================================================= #
|
1396
|
-
# === stop_codons?
|
1397
|
-
# ========================================================================= #
|
1398
|
-
def stop_codons?
|
1399
|
-
::Bioroebe.stop_codons?
|
1400
|
-
end
|
1401
|
-
|
1402
|
-
# ========================================================================= #
|
1403
|
-
# === show_all_dmp_files
|
1404
|
-
#
|
1405
|
-
# Show all .dmp files here.
|
1406
|
-
# ========================================================================= #
|
1407
|
-
def show_all_dmp_files
|
1408
|
-
show_directory_content('.dmp')
|
1409
|
-
end
|
1410
|
-
|
1411
|
-
# ========================================================================= #
|
1412
|
-
# === show_and_calculate_weight_of_dna_string_or_aminoacid_sequence
|
1413
|
-
# ========================================================================= #
|
1414
|
-
def show_and_calculate_weight_of_dna_string_or_aminoacid_sequence(
|
1415
|
-
i = dna_sequence_object?
|
1416
|
-
)
|
1417
|
-
if i.nil?
|
1418
|
-
if dna_sequence_object?
|
1419
|
-
i = dna_sequence_object?
|
1420
|
-
end
|
1421
|
-
end
|
1422
|
-
# ======================================================================= #
|
1423
|
-
# First, we check if the input is an aminoacid-sequence.
|
1424
|
-
# ======================================================================= #
|
1425
|
-
if ::Bioroebe.is_aminoacid?(i)
|
1426
|
-
reverse = AMINO_ACIDS_ENGLISH.reverse
|
1427
|
-
i = reverse[i] # Replace it with the one-letter code next.
|
1428
|
-
# ===================================================================== #
|
1429
|
-
# Obtain the mass of this aminoacid.
|
1430
|
-
# ===================================================================== #
|
1431
|
-
i = AMINO_ACIDS_AVERAGE_MASS_TABLE[i]
|
1432
|
-
erev 'The weight of this aminoacid is: '+
|
1433
|
-
simp(i.to_s)+rev+' Dalton.'
|
1434
|
-
else
|
1435
|
-
show_and_calculate_weight_of_dna_string(i)
|
1436
|
-
end
|
1437
|
-
end
|
1438
|
-
|
1439
|
-
# ========================================================================= #
|
1440
|
-
# === show_t_phages
|
1441
|
-
# ========================================================================= #
|
1442
|
-
def show_t_phages
|
1443
|
-
dataset = YAML.load_file(
|
1444
|
-
::Bioroebe.yaml_dir?+'viruses/ecoli_phages.yml'
|
1445
|
-
)
|
1446
|
-
# ======================================================================= #
|
1447
|
-
# Next, display that as a table.
|
1448
|
-
# ======================================================================= #
|
1449
|
-
erev 'Name of Phage | Plaque Size | Head diameter | tail length | latent period | burst size'
|
1450
|
-
cliner length: 88
|
1451
|
-
dataset.each_pair {|name_of_phage, value|
|
1452
|
-
print '|',name_of_phage.to_s.center(13),'|'
|
1453
|
-
# ===================================================================== #
|
1454
|
-
# Display the plague size next, aka small, medium or large.
|
1455
|
-
# ===================================================================== #
|
1456
|
-
plaque_size = value['plaque_size']
|
1457
|
-
print plaque_size.to_s.center(13),'|'
|
1458
|
-
head = value['head']
|
1459
|
-
print head.to_s.center(15),'|'
|
1460
|
-
tail = value['tail']
|
1461
|
-
print tail.to_s.center(13),'|'
|
1462
|
-
# ===================================================================== #
|
1463
|
-
# Display the latent period.
|
1464
|
-
# ===================================================================== #
|
1465
|
-
latent_period = value['latent_period']
|
1466
|
-
print latent_period.to_s.center(15),'|'
|
1467
|
-
burst_size = value['burst_size']
|
1468
|
-
print burst_size.to_s.center(12),'|'
|
1469
|
-
e
|
1470
|
-
cliner length: 88
|
1471
|
-
}
|
1472
|
-
end
|
1473
|
-
|
1474
|
-
# ========================================================================= #
|
1475
|
-
# === show_html_colours
|
1476
|
-
# ========================================================================= #
|
1477
|
-
def show_html_colours
|
1478
|
-
e 'The available HTML colours are:'; e
|
1479
|
-
::Colours.show_html_colours; e
|
1480
|
-
end
|
1481
|
-
|
1482
|
-
# ========================================================================= #
|
1483
|
-
# === show_restriction_table
|
1484
|
-
#
|
1485
|
-
# This method will show a restriction table, that is, a table with
|
1486
|
-
# some different restriction enzymes.
|
1487
|
-
#
|
1488
|
-
# To invoke this method, do:
|
1489
|
-
#
|
1490
|
-
# show_restriction_table
|
1491
|
-
#
|
1492
|
-
# ========================================================================= #
|
1493
|
-
def show_restriction_table
|
1494
|
-
most_ljust = 20
|
1495
|
-
erev 'Showing a few different cutters (4,5,6,7,8) in table format next:'
|
1496
|
-
erev '---------------------------------------------------------'
|
1497
|
-
e peru(' 4-cutter'.ljust(most_ljust))+' | '+orange('ChaI'.ljust(10))+' | '+
|
1498
|
-
olivedrab('GATC'.ljust(10))
|
1499
|
-
e peru(' 5-cutter'.ljust(most_ljust))+' | '+orange('FmuI'.ljust(10))+' | '+
|
1500
|
-
olivedrab('GGNCC'.ljust(10))
|
1501
|
-
e peru(' 6-cutter'.ljust(most_ljust))+' | '+orange('EcoRI'.ljust(10))+' | '+
|
1502
|
-
olivedrab('GAATTC'.ljust(10))
|
1503
|
-
e peru(' 7-cutter'.ljust(most_ljust))+' | '+orange('PfoI'.ljust(10))+' | '+
|
1504
|
-
olivedrab('TCCNGGA'.ljust(10))
|
1505
|
-
e peru(' 8-cutter'.ljust(most_ljust))+' | '+orange('PacI'.ljust(10))+' | '+
|
1506
|
-
olivedrab('TTAATTAA'.ljust(10))
|
1507
|
-
erev '---------------------------------------------------------'
|
1508
|
-
end
|
1509
|
-
|
1510
|
-
# ========================================================================= #
|
1511
|
-
# === show_numbered_nucleotide_positions
|
1512
|
-
#
|
1513
|
-
# This method will show "numbered" nucleotide positions such as:
|
1514
|
-
#
|
1515
|
-
# 1234567891234567891234567
|
1516
|
-
# ATGCAGGTCATCAGTCAGTCAGTCA
|
1517
|
-
#
|
1518
|
-
# ========================================================================= #
|
1519
|
-
def show_numbered_nucleotide_positions
|
1520
|
-
_ = sequence?.string?
|
1521
|
-
chars = _.chars
|
1522
|
-
chunk = chars.each_slice(40)
|
1523
|
-
chunked = chunk.map {|line| line.join }
|
1524
|
-
chunked.each {|line|
|
1525
|
-
chars = line.chars
|
1526
|
-
upper_strand = ''.dup
|
1527
|
-
counter = 0
|
1528
|
-
chars.each {|char| counter += 1
|
1529
|
-
if counter > 9
|
1530
|
-
counter = 0
|
1531
|
-
end
|
1532
|
-
upper_strand << counter.to_s
|
1533
|
-
}
|
1534
|
-
e lightsteelblue(upper_strand)
|
1535
|
-
erev line
|
1536
|
-
}
|
1537
|
-
end
|
1538
|
-
|
1539
|
-
# ========================================================================= #
|
1540
|
-
# === show_fastq_quality_score_table
|
1541
|
-
# ========================================================================= #
|
1542
|
-
def show_fastq_quality_score_table
|
1543
|
-
_ = FILE_FASTQ_QUALITY_SCHEMES
|
1544
|
-
if File.exist? _
|
1545
|
-
dataset = YAML.load_file(_)
|
1546
|
-
keys = dataset.keys
|
1547
|
-
keys.each {|this_key|
|
1548
|
-
e sfancy(this_key+':')
|
1549
|
-
e
|
1550
|
-
inner_dataset = dataset[this_key]
|
1551
|
-
erev ' Ascii character range: '+
|
1552
|
-
seagreen(inner_dataset['ascii_character_range'].to_s)
|
1553
|
-
erev ' Offset: '+
|
1554
|
-
seagreen(inner_dataset['offset'].to_s)
|
1555
|
-
erev ' Quality score type: '+
|
1556
|
-
seagreen(inner_dataset['quality_score_type'].to_s)
|
1557
|
-
erev ' Quality score range: '+
|
1558
|
-
seagreen(inner_dataset['quality_score_range'].to_s)
|
1559
|
-
e
|
1560
|
-
}; e
|
1561
|
-
end
|
1562
|
-
end
|
1563
|
-
|
1564
|
-
# ========================================================================= #
|
1565
|
-
# === report_the_protein_weight
|
1566
|
-
# ========================================================================= #
|
1567
|
-
def report_the_protein_weight
|
1568
|
-
_ = aminoacid_sequence?
|
1569
|
-
if _.include? '*'
|
1570
|
-
erev 'Note that this aminoacid sequence has a stop codon, denoted by the *:'
|
1571
|
-
e
|
1572
|
-
erev ' '+sfancy(_)+rev
|
1573
|
-
e
|
1574
|
-
erev 'Since a stop codon is not translated into an aminoacid'
|
1575
|
-
erev 'it makes little sense to include it into the weight-calculation.'
|
1576
|
-
erev 'Thus, we will use only the part up to the first * token.'
|
1577
|
-
_ = _[0 .. (_.index('*') - 1)]
|
1578
|
-
end
|
1579
|
-
sum = ::Bioroebe.amino_acid_average_mass(_)
|
1580
|
-
e 'The total weight of these '+simp(_.size.to_s)+rev+
|
1581
|
-
' aminoacids is: '+sfancy(sum.to_f.round(2).to_s)+rev+
|
1582
|
-
' Dalton'
|
1583
|
-
end
|
1584
|
-
|
1585
|
-
# ========================================================================= #
|
1586
|
-
# === report_all_stop_codons
|
1587
|
-
#
|
1588
|
-
# This method will report all stop codons in the given sequence.
|
1589
|
-
#
|
1590
|
-
# We will not modify the input given to this method.
|
1591
|
-
#
|
1592
|
-
# The three stop codons, in RNA, are:
|
1593
|
-
#
|
1594
|
-
# UGA
|
1595
|
-
# UAG
|
1596
|
-
# UAA
|
1597
|
-
#
|
1598
|
-
# ========================================================================= #
|
1599
|
-
def report_all_stop_codons(
|
1600
|
-
i = dna_sequence_object?
|
1601
|
-
)
|
1602
|
-
i.upcase!
|
1603
|
-
erev 'Our input sequence has '+simp(i.size.to_s)+rev+' nucleotides.'
|
1604
|
-
n_UGA = 'UGA'
|
1605
|
-
n_UGA = 'TGA' if is_dna?
|
1606
|
-
erev 'We did find '+
|
1607
|
-
simp(
|
1608
|
-
i.scan(/#{n_UGA}/
|
1609
|
-
).size.to_s.rjust(2))+rev+' '+n_UGA+' stop codons.'
|
1610
|
-
n_UAG = 'UAG'
|
1611
|
-
n_UAG = 'TAG' if is_dna?
|
1612
|
-
erev 'We did find '+
|
1613
|
-
simp(i.scan(/#{n_UAG}/).size.to_s.rjust(2))+rev+' '+n_UAG+' stop codons.'
|
1614
|
-
n_UAA = 'UAA'
|
1615
|
-
n_UAA = 'TAA' if is_dna?
|
1616
|
-
erev 'We did find '+
|
1617
|
-
simp(i.scan(/#{n_UAA}/).size.to_s.rjust(2))+rev+' '+n_UAA+' stop codons.'
|
1618
|
-
end
|
1619
|
-
|
1620
|
-
# ========================================================================= #
|
1621
|
-
# === determine_and_report_all_stop_codons
|
1622
|
-
# ========================================================================= #
|
1623
|
-
def determine_and_report_all_stop_codons
|
1624
|
-
dna_sequence = dna_sequence_object?
|
1625
|
-
erev 'Because 3 different stop codons exist, we have '\
|
1626
|
-
'to do '+slateblue('3 runs')+rev+'.'
|
1627
|
-
stop_codons?.each {|this_stop_codon|
|
1628
|
-
array_matches = ::Bioroebe.return_all_substring_matches(
|
1629
|
-
dna_sequence, this_stop_codon
|
1630
|
-
)
|
1631
|
-
if array_matches.empty?
|
1632
|
-
erev 'No match has been found.'
|
1633
|
-
else
|
1634
|
-
start_position = array_matches.last.first
|
1635
|
-
erev 'For the stop codon '+sfancy(this_stop_codon)+rev+' the last codon'
|
1636
|
-
erev 'occurrs at position '+simp(start_position.to_s)+rev+'.'
|
1637
|
-
end
|
1638
|
-
}
|
1639
|
-
end
|
1640
|
-
|
1641
|
-
# ========================================================================= #
|
1642
|
-
# === show_seq_1
|
1643
|
-
# ========================================================================= #
|
1644
|
-
def show_seq_1(i = seq1?)
|
1645
|
-
erev padding?+leading_five_prime+
|
1646
|
-
sfancy(i)+rev+trailing_three_prime
|
1647
|
-
end
|
1648
|
-
|
1649
|
-
# ========================================================================= #
|
1650
|
-
# === show_seq_2
|
1651
|
-
# ========================================================================= #
|
1652
|
-
def show_seq_2(i = seq2?)
|
1653
|
-
erev padding?+leading_five_prime+
|
1654
|
-
sfancy(i)+rev+trailing_three_prime
|
1655
|
-
end
|
1656
|
-
|
1657
|
-
# ========================================================================= #
|
1658
|
-
# === show_seq_3
|
1659
|
-
# ========================================================================= #
|
1660
|
-
def show_seq_3(i = seq3?)
|
1661
|
-
erev padding?+leading_five_prime+
|
1662
|
-
sfancy(i)+rev+trailing_three_prime
|
1663
|
-
end
|
1664
|
-
|
1665
|
-
# ========================================================================= #
|
1666
|
-
# === show_seq_4
|
1667
|
-
# ========================================================================= #
|
1668
|
-
def show_seq_4
|
1669
|
-
erev padding?+leading_five_prime+sfancy(seq4?)+rev+trailing_three_prime
|
1670
|
-
end
|
1671
|
-
|
1672
|
-
# ========================================================================= #
|
1673
|
-
# === show_seq_5
|
1674
|
-
# ========================================================================= #
|
1675
|
-
def show_seq_5
|
1676
|
-
erev padding?+leading_five_prime+sfancy(seq5?)+rev+trailing_three_prime
|
1677
|
-
end
|
1678
|
-
|
1679
|
-
# ========================================================================= #
|
1680
|
-
# === show_seq_6
|
1681
|
-
# ========================================================================= #
|
1682
|
-
def show_seq_6
|
1683
|
-
erev padding?+leading_five_prime+sfancy(seq6?)+rev+trailing_three_prime
|
1684
|
-
end
|
1685
|
-
|
1686
|
-
# ========================================================================= #
|
1687
|
-
# === show_start_and_stop_codons
|
1688
|
-
#
|
1689
|
-
# This will show BOTH start and stop codons, in different colours.
|
1690
|
-
#
|
1691
|
-
# Since start codons may be more important, we will first locate
|
1692
|
-
# and colourize them, and afterwards, will also colourize the
|
1693
|
-
# stop codons.
|
1694
|
-
# ========================================================================= #
|
1695
|
-
def show_start_and_stop_codons
|
1696
|
-
_ = string?
|
1697
|
-
start_codon = ::Bioroebe.start_codon?
|
1698
|
-
stop_codons = ::Bioroebe.stop_codons?
|
1699
|
-
_.gsub!(/(#{start_codon})/, yellow+'\\1'+colour_for_nucleotide)
|
1700
|
-
stop_codons.each {|stop_codon|
|
1701
|
-
_.gsub!(/(#{stop_codon})/, salmon('\\1')+colour_for_nucleotide)
|
1702
|
-
}
|
1703
|
-
erev 'Start codon: '+yellow+start_codon+rev
|
1704
|
-
stop_codons = stop_codons.join(', ').strip
|
1705
|
-
stop_codons.chop! if stop_codons.end_with? ','
|
1706
|
-
# ======================================================================= #
|
1707
|
-
# Show the stop codons that we will use:
|
1708
|
-
# ======================================================================= #
|
1709
|
-
erev 'Stop codons: '+salmon(stop_codons)+rev
|
1710
|
-
erev dna_padding(_)
|
1711
|
-
end
|
1712
|
-
|
1713
|
-
# ========================================================================= #
|
1714
|
-
# === report_when_the_bioroebe_project_was_last_updated
|
1715
|
-
# ========================================================================= #
|
1716
|
-
def report_when_the_bioroebe_project_was_last_updated
|
1717
|
-
result = 'The Bioroebe-Project was last updated on: '+
|
1718
|
-
slateblue(LAST_UPDATE)+rev
|
1719
|
-
result = result.dup
|
1720
|
-
n_days_difference = ((Time.now - Time.parse(LAST_UPDATE))/60/60/24).round(2).to_s
|
1721
|
-
result << ' (~'+n_days_difference.to_s+' days ago)'
|
1722
|
-
erev result
|
1723
|
-
end
|
1724
|
-
|
1725
|
-
# ========================================================================= #
|
1726
|
-
# === show_information_about_the_gff_format
|
1727
|
-
# ========================================================================= #
|
1728
|
-
def show_information_about_the_gff_format
|
1729
|
-
erev 'Fields must be tab-separated in the .gff format.'
|
1730
|
-
e
|
1731
|
-
erev 'All but the final field in each feature line must'
|
1732
|
-
erev 'contain a value; "empty" columns should be denoted with a "."'
|
1733
|
-
e
|
1734
|
-
egold 'seqname:'
|
1735
|
-
erev 'This is the name of the chromosome or scaffold; chromosome names'
|
1736
|
-
erev 'can be given with or without the "chr" prefix.'
|
1737
|
-
erev 'Important note: the seqname must be one used within Ensembl, '
|
1738
|
-
erev 'i.e. a standard chromosome name or an Ensembl identifier such as a'
|
1739
|
-
erev 'scaffold ID, without any additional content such as species or'
|
1740
|
-
erev 'assembly. See the example GFF output below.'
|
1741
|
-
e
|
1742
|
-
egold 'source:'
|
1743
|
-
erev 'Name of the program that generated this feature, or '
|
1744
|
-
erev 'the data source (database or project name)'
|
1745
|
-
e
|
1746
|
-
egold 'feature:'
|
1747
|
-
erev 'feature type name, e.g. Gene, Variation, Similarity'
|
1748
|
-
e
|
1749
|
-
egold 'start:'
|
1750
|
-
erev 'Start position of the feature, with sequence numbering starting at 1.'
|
1751
|
-
e
|
1752
|
-
egold 'end:'
|
1753
|
-
erev 'End position of the feature, with sequence numbering '\
|
1754
|
-
'starting at 1.'
|
1755
|
-
e
|
1756
|
-
egold 'score:'
|
1757
|
-
erev 'A floating point value.'
|
1758
|
-
e
|
1759
|
-
egold 'strand:'
|
1760
|
-
erev 'defined as + (forward) or - (reverse).'
|
1761
|
-
e
|
1762
|
-
egold "frame:"
|
1763
|
-
erev " - One of '0', '1' or '2'. '0' indicates that the first base "
|
1764
|
-
erev "of the feature is the first base of a codon, '1' that the second "
|
1765
|
-
erev "base is the first base of a codon, and so on."
|
1766
|
-
e
|
1767
|
-
egold 'attribute:'
|
1768
|
-
erev 'A semicolon-separated list of tag-value pairs, providing '
|
1769
|
-
erev 'additional information about each feature.'
|
1770
|
-
e
|
1771
|
-
end
|
1772
|
-
|
1773
|
-
# ========================================================================= #
|
1774
|
-
# === show_header_of_this_pdb_file
|
1775
|
-
# ========================================================================= #
|
1776
|
-
def show_header_of_this_pdb_file(i)
|
1777
|
-
lines = File.readlines(i)
|
1778
|
-
first = lines.first.split(' ')[1..-1].join(' ').strip
|
1779
|
-
second = lines[1].split(' ')[1..-1].join(' ').strip
|
1780
|
-
erev first
|
1781
|
-
erev ' '+second
|
1782
|
-
end
|
1783
|
-
|
1784
|
-
# ========================================================================= #
|
1785
|
-
# === show_useful_URLs
|
1786
|
-
#
|
1787
|
-
# This method will simply show some important, bioinformatics related
|
1788
|
-
# URLs. In particular URLs that may be important for bioinformatics
|
1789
|
-
# related tasks, e. g. NCBI, GeneBank and so forth.
|
1790
|
-
# ========================================================================= #
|
1791
|
-
def show_useful_URLs
|
1792
|
-
e
|
1793
|
-
erev 'NCBI: '+sfancy(obtain_url_for(:ncbi))
|
1794
|
-
erev 'GenBank: '+sfancy(obtain_url_for(:genbank))
|
1795
|
-
erev 'PDB: '+sfancy(obtain_url_for(:pdb))
|
1796
|
-
erev 'Prosite: '+sfancy(obtain_url_for(:prosite))
|
1797
|
-
e
|
1798
|
-
end
|
1799
|
-
|
1800
|
-
# ========================================================================= #
|
1801
|
-
# === show_header_of
|
1802
|
-
# ========================================================================= #
|
1803
|
-
def show_header_of(i)
|
1804
|
-
if i.is_a? Array
|
1805
|
-
i.each {|entry| show_header_of(entry) }
|
1806
|
-
else
|
1807
|
-
unless File.exist? i
|
1808
|
-
erev "No file exists at `#{sfile(i)}#{rev}`."
|
1809
|
-
return
|
1810
|
-
end
|
1811
|
-
case i
|
1812
|
-
# ===================================================================== #
|
1813
|
-
# === .pdb
|
1814
|
-
# ===================================================================== #
|
1815
|
-
when /\.pdb$/
|
1816
|
-
show_header_of_this_pdb_file(i)
|
1817
|
-
end
|
1818
|
-
end
|
1819
|
-
end
|
1820
|
-
|
1821
|
-
# ========================================================================= #
|
1822
|
-
# === show_GFP_sequence (gfp tag)
|
1823
|
-
#
|
1824
|
-
# This method will show the GFP sequence, on the DNA level.
|
1825
|
-
# ========================================================================= #
|
1826
|
-
def show_GFP_sequence
|
1827
|
-
erev return_five_prime_header+
|
1828
|
-
return_default_GFP_sequence
|
1829
|
-
end
|
1830
|
-
|
1831
|
-
# ========================================================================= #
|
1832
|
-
# === return_default_GFP_sequence
|
1833
|
-
# ========================================================================= #
|
1834
|
-
def return_default_GFP_sequence(
|
1835
|
-
path_to_the_file = FILE_GFP_SEQUENCE
|
1836
|
-
)
|
1837
|
-
Fasta.new(path_to_the_file) { :be_quiet }.return_sequence
|
1838
|
-
end
|
1839
|
-
|
1840
|
-
# ========================================================================= #
|
1841
|
-
# === try_to_show_the_configuration
|
1842
|
-
# ========================================================================= #
|
1843
|
-
def try_to_show_the_configuration
|
1844
|
-
@config.show_config if @config.respond_to? :show_config
|
1845
|
-
_ = verbose_truth(use_expand_cd_aliases?)
|
1846
|
-
colourized_yes_or_no = simp(_.to_s)
|
1847
|
-
erev 'Will we use class Rcfiles::DirectoryAliases: '+
|
1848
|
-
colourized_yes_or_no
|
1849
|
-
end
|
1850
|
-
|
1851
|
-
require 'bioroebe/aminoacids/aminoacids_mass_table.rb'
|
1852
|
-
# ========================================================================= #
|
1853
|
-
# === show_aminoacids_mass_table
|
1854
|
-
#
|
1855
|
-
# This shows the weight of the aminoacids, in a table-layout.
|
1856
|
-
# ========================================================================= #
|
1857
|
-
def show_aminoacids_mass_table
|
1858
|
-
AminoacidsMassTable.report_which_file_is_used
|
1859
|
-
AminoacidsMassTable.show(padding?) # bl aminoacids_mass_table.rb
|
1860
|
-
end; alias aminoacid_table_overview show_aminoacids_mass_table # === show_aminoacids_mass_table
|
1861
|
-
|
1862
|
-
require 'bioroebe/utility_scripts/pathways.rb'
|
1863
|
-
# ========================================================================= #
|
1864
|
-
# === show_all_pathways
|
1865
|
-
#
|
1866
|
-
# Simply show all Pathways.
|
1867
|
-
# ========================================================================= #
|
1868
|
-
def show_all_pathways
|
1869
|
-
::Bioroebe::Pathways.show_all_pathways
|
1870
|
-
end
|
1871
|
-
|
1872
|
-
# ========================================================================= #
|
1873
|
-
# === show_sequence_in_splitted_form
|
1874
|
-
#
|
1875
|
-
# We will show the main DNA sequence in a three-letter splitted form.
|
1876
|
-
#
|
1877
|
-
# You can optionally use an argument, the first argument, a number. By
|
1878
|
-
# default this is 3, so we will split into chunks of 3.
|
1879
|
-
#
|
1880
|
-
# The second argument says which token we will use for rejoining. It
|
1881
|
-
# defaults to ' ' so the nucleotides will be rejoined via ' ', but
|
1882
|
-
# you can also use another token such as '-', which may lead to a
|
1883
|
-
# String such as 'ATG-CGA-ACC' and so forth.
|
1884
|
-
# ========================================================================= #
|
1885
|
-
def show_sequence_in_splitted_form(
|
1886
|
-
how_many = 3,
|
1887
|
-
use_this_token_for_rejoining = ' ' # <- Which token to use for the re-joining action.
|
1888
|
-
)
|
1889
|
-
case how_many
|
1890
|
-
when nil, :default # Use a default value here.
|
1891
|
-
how_many = 3
|
1892
|
-
end
|
1893
|
-
result = '.' * how_many.to_i
|
1894
|
-
use_this_regex = /#{result}/
|
1895
|
-
if string?.empty?
|
1896
|
-
erev 'Please first "assign" a sequence.'
|
1897
|
-
else
|
1898
|
-
if block_given?
|
1899
|
-
yielded = yield
|
1900
|
-
if yielded.is_a? Hash
|
1901
|
-
# ================================================================= #
|
1902
|
-
# === :use_this_token
|
1903
|
-
# ================================================================= #
|
1904
|
-
if yielded.has_key? :use_this_token
|
1905
|
-
use_this_token_for_rejoining = yielded.delete(:use_this_token)
|
1906
|
-
end
|
1907
|
-
end
|
1908
|
-
end
|
1909
|
-
string = string?.to_s
|
1910
|
-
scanned = string.scan(use_this_regex)
|
1911
|
-
scanned.map! {|entry|
|
1912
|
-
# =================================================================== #
|
1913
|
-
# Colourize start codons next.
|
1914
|
-
# =================================================================== #
|
1915
|
-
if is_this_a_start_codon? entry
|
1916
|
-
entry = mediumseagreen(entry)+
|
1917
|
-
return_colour_for_nucleotides
|
1918
|
-
elsif is_this_a_stop_codon? entry
|
1919
|
-
entry = mediumorchid(entry)+
|
1920
|
-
return_colour_for_nucleotides
|
1921
|
-
end
|
1922
|
-
entry
|
1923
|
-
}
|
1924
|
-
_ = scanned.join(use_this_token_for_rejoining)
|
1925
|
-
# ===================================================================== #
|
1926
|
-
# Finally show the sequence.
|
1927
|
-
# ===================================================================== #
|
1928
|
-
erev left_padding?+
|
1929
|
-
five_prime+
|
1930
|
-
return_colour_for_nucleotides+
|
1931
|
-
_+
|
1932
|
-
rev+
|
1933
|
-
three_prime
|
1934
|
-
end
|
1935
|
-
end
|
1936
|
-
|
1937
|
-
# ========================================================================= #
|
1938
|
-
# === show_disulfides
|
1939
|
-
#
|
1940
|
-
# Show the (possible) disulfide positions in a protein.
|
1941
|
-
# ========================================================================= #
|
1942
|
-
def show_disulfides
|
1943
|
-
_ = aminoacid_sequence?
|
1944
|
-
if _.include? 'C'
|
1945
|
-
n_cytosines = _.count('C')
|
1946
|
-
erev "This aminoacid sequence has #{steelblue(n_cytosines.to_s)}#{rev} cysteines."
|
1947
|
-
if n_cytosines > 1
|
1948
|
-
erev 'Thus, there could be disulfide bonds. '+
|
1949
|
-
gold(cheerful_person)+rev
|
1950
|
-
show_sequence_with_a_ruler(:default, _)
|
1951
|
-
erev 'The positions of cysteines are at:'
|
1952
|
-
_.chars.each_with_index {|aminoacid, index|
|
1953
|
-
if aminoacid == 'C'
|
1954
|
-
erev 'Position: '+steelblue((index+1).to_s.rjust(3))
|
1955
|
-
end
|
1956
|
-
}
|
1957
|
-
end
|
1958
|
-
else
|
1959
|
-
e 'This aminoacid sequence has no cystein. Thus, '\
|
1960
|
-
'there can not be any disulfide bonds.'
|
1961
|
-
end
|
1962
|
-
end
|
1963
|
-
|
1964
|
-
# ========================================================================= #
|
1965
|
-
# === show_aminoacids_residues
|
1966
|
-
# ========================================================================= #
|
1967
|
-
def show_aminoacids_residues
|
1968
|
-
erev 'The aminoacid residues are:'; e
|
1969
|
-
ENGLISH_LONG_NAMES_FOR_THE_AMINO_ACIDS.each {|this_aminoacid|
|
1970
|
-
erev this_aminoacid.ljust(14)+': '+
|
1971
|
-
simp(AMINO_ACIDS_RESTE[this_aminoacid.downcase]) # Must downcase.
|
1972
|
-
}; e
|
1973
|
-
end
|
1974
|
-
|
1975
|
-
# ========================================================================= #
|
1976
|
-
# === show_hint_how_to_use_the_local_sequences
|
1977
|
-
#
|
1978
|
-
# Show a hint for the user.
|
1979
|
-
# ========================================================================= #
|
1980
|
-
def show_hint_how_to_use_the_local_sequences
|
1981
|
-
unless return_fasta_files_in_the_log_directory.empty?
|
1982
|
-
erev 'You can load up any of these sequences by issuing:'
|
1983
|
-
e
|
1984
|
-
erev ' use_this_fasta 1 # for file number 1'
|
1985
|
-
e
|
1986
|
-
end
|
1987
|
-
end
|
1988
|
-
|
1989
|
-
# ========================================================================= #
|
1990
|
-
# === colour_for_stop_codon
|
1991
|
-
# ========================================================================= #
|
1992
|
-
def colour_for_stop_codon(i)
|
1993
|
-
orange(i)
|
1994
|
-
end
|
1995
|
-
|
1996
|
-
# ========================================================================= #
|
1997
|
-
# === colour_for_nucleotide
|
1998
|
-
# ========================================================================= #
|
1999
|
-
def colour_for_nucleotide(i = '')
|
2000
|
-
royalblue(i)
|
2001
|
-
end; alias colour_for_nucleotides colour_for_nucleotide # === colour_for_nucleotides
|
2002
|
-
|
2003
|
-
# ========================================================================= #
|
2004
|
-
# === report_this_dna_sequence_with_proper_trailer_and_leader
|
2005
|
-
# ========================================================================= #
|
2006
|
-
def report_this_dna_sequence_with_proper_trailer_and_leader(i)
|
2007
|
-
i = i.to_s
|
2008
|
-
if block_given?
|
2009
|
-
yielded = yield
|
2010
|
-
case yielded
|
2011
|
-
when :try_to_colourize_start_codon
|
2012
|
-
# =================================================================== #
|
2013
|
-
# We will try to colourize the start codon here.
|
2014
|
-
# =================================================================== #
|
2015
|
-
if i.start_with? start_codon?
|
2016
|
-
i[0,3] = cyan(i[0,3])+return_colour_for_nucleotides
|
2017
|
-
end
|
2018
|
-
end
|
2019
|
-
end
|
2020
|
-
colourized_dna_sequence = colourize_this_dna_sequence(i)
|
2021
|
-
colourized_dna_sequence = remove_trailing_escape_code(
|
2022
|
-
colourized_dna_sequence
|
2023
|
-
)
|
2024
|
-
erev left_pad?+
|
2025
|
-
leading_5_prime+
|
2026
|
-
colourized_dna_sequence+
|
2027
|
-
rev+
|
2028
|
-
trailing_3_prime
|
2029
|
-
end
|
2030
|
-
|
2031
|
-
# ========================================================================= #
|
2032
|
-
# === show_hydropathy_table
|
2033
|
-
#
|
2034
|
-
# Show the hydropathy table.
|
2035
|
-
# ========================================================================= #
|
2036
|
-
def show_hydropathy_table
|
2037
|
-
e
|
2038
|
-
HYDROPATHY_TABLE.each_pair {|aminoacid_one_letter, hydropathy_value|
|
2039
|
-
e ' '+sfancy(aminoacid_one_letter)+' | '+
|
2040
|
-
simp(hydropathy_value.to_s.rjust(4))
|
2041
|
-
}; e
|
2042
|
-
end
|
2043
|
-
|
2044
|
-
# ========================================================================= #
|
2045
|
-
# === show_known_nls_sequences
|
2046
|
-
#
|
2047
|
-
# This Wikipedia page may be useful:
|
2048
|
-
# http://en.wikipedia.org/wiki/Nuclear_localization_sequence
|
2049
|
-
# ========================================================================= #
|
2050
|
-
def show_known_nls_sequences
|
2051
|
-
erev 'These NLS sequences are known:'+N+N
|
2052
|
-
padding = 36
|
2053
|
-
NUCLEAR_LOCALIZATION_SEQUENCES.each_pair {|key, value|
|
2054
|
-
e sfancy(key.ljust(padding))+' '+value
|
2055
|
-
}
|
2056
|
-
end
|
2057
|
-
|
2058
|
-
# ========================================================================= #
|
2059
|
-
# === report_mode
|
2060
|
-
# ========================================================================= #
|
2061
|
-
def report_mode
|
2062
|
-
erev mode?
|
2063
|
-
end
|
2064
|
-
|
2065
|
-
# ========================================================================= #
|
2066
|
-
# === show_reste
|
2067
|
-
#
|
2068
|
-
# This will show the residues of the various amino acids.
|
2069
|
-
# ========================================================================= #
|
2070
|
-
def show_reste
|
2071
|
-
e; AMINO_ACIDS_RESTE.each_pair {|key, value|
|
2072
|
-
erev ' '+key.ljust(14)+' -> '+sfancy(value)
|
2073
|
-
}; e
|
2074
|
-
end
|
2075
|
-
|
2076
|
-
require 'bioroebe/string_matching/simple_string_comparer.rb'
|
2077
|
-
# ========================================================================= #
|
2078
|
-
# === show_sixpack_alignment
|
2079
|
-
#
|
2080
|
-
# We will feed some input to class Bioroebe::SimpleStringComparer.
|
2081
|
-
# ========================================================================= #
|
2082
|
-
def show_sixpack_alignment(
|
2083
|
-
i = dna_sequence_object?
|
2084
|
-
)
|
2085
|
-
erev 'Input sequence 1:'
|
2086
|
-
string1 = $stdin.gets.chomp
|
2087
|
-
erev 'Input sequence 2:'
|
2088
|
-
string2 = $stdin.gets.chomp
|
2089
|
-
# ======================================================================= #
|
2090
|
-
# Delegate into class SimpleStringComparer next.
|
2091
|
-
# ======================================================================= #
|
2092
|
-
_ = ::Bioroebe::SimpleStringComparer.new(:dont_run_yet) # bl $BIOROEBE/string_matching/simple_string_comparer.rb
|
2093
|
-
_.set_main_alignment_token_to '|'
|
2094
|
-
_.string1 = string1
|
2095
|
-
_.string2 = string2
|
2096
|
-
_.compare
|
2097
|
-
end
|
2098
|
-
|
2099
|
-
# ========================================================================= #
|
2100
|
-
# === show_average_weight_of_a_nucleotide
|
2101
|
-
#
|
2102
|
-
# The formulat was obtained from the following website:
|
2103
|
-
#
|
2104
|
-
# http://www.biophp.org/minitools/useful_formulas/demo.php
|
2105
|
-
#
|
2106
|
-
# ========================================================================= #
|
2107
|
-
def show_average_weight_of_a_nucleotide
|
2108
|
-
erev 'The average molecular weight (MW) of dsDNA is '+sfancy('660')+' Da.'
|
2109
|
-
erev 'The average molecular weight (MW) of ssDNA is '+sfancy('330')+' Da.'
|
2110
|
-
end
|
2111
|
-
|
2112
|
-
# ========================================================================= #
|
2113
|
-
# === show_config_dir
|
2114
|
-
#
|
2115
|
-
# This method will show the configuration directory.
|
2116
|
-
# ========================================================================= #
|
2117
|
-
def show_config_dir
|
2118
|
-
config_dir = File.dirname(__FILE__)+'/configuration/'
|
2119
|
-
erev 'The configuration directory for the Bioroebe::Shell is at:'
|
2120
|
-
erev ' `'+sfile(config_dir)+rev+'`'
|
2121
|
-
end
|
2122
|
-
|
2123
|
-
# ========================================================================= #
|
2124
|
-
# === show_last_downloaded_file
|
2125
|
-
# ========================================================================= #
|
2126
|
-
def show_last_downloaded_file
|
2127
|
-
if @array_all_downloads.empty?
|
2128
|
-
erev 'We have not yet downloaded any file.'
|
2129
|
-
else
|
2130
|
-
erev 'The last downloaded data was: '+
|
2131
|
-
sfancy(@array_all_downloads.last)
|
2132
|
-
end
|
2133
|
-
end
|
2134
|
-
|
2135
|
-
# ========================================================================= #
|
2136
|
-
# === show_jumper_directories
|
2137
|
-
# ========================================================================= #
|
2138
|
-
def show_jumper_directories
|
2139
|
-
if @internal_hash[:array_jumper_directories].empty?
|
2140
|
-
erev 'No jumper directory has been assigned yet.'
|
2141
|
-
else
|
2142
|
-
erev 'The available jumper directories are:'
|
2143
|
-
pp @internal_hash[:array_jumper_directories]
|
2144
|
-
end
|
2145
|
-
end
|
2146
|
-
|
2147
|
-
# ========================================================================= #
|
2148
|
-
# === show_save_file
|
2149
|
-
# ========================================================================= #
|
2150
|
-
def show_save_file
|
2151
|
-
erev 'We will store into the file '+sfile(save_file?)+rev+'.'
|
2152
|
-
erev 'If you wish to instead store into the current directory,'
|
2153
|
-
erev 'input "save_here".'
|
2154
|
-
end
|
2155
|
-
|
2156
|
-
# ========================================================================= #
|
2157
|
-
# === show_sigma_tutorial
|
2158
|
-
#
|
2159
|
-
# This method tells the user a bit about the sigma factors.
|
2160
|
-
# ========================================================================= #
|
2161
|
-
def show_sigma_tutorial
|
2162
|
-
erev 'This subsection contains some information about Sigmafactors.'
|
2163
|
-
e
|
2164
|
-
erev 'A sigma factor a protein needed for initiation of RNA synthesis.'
|
2165
|
-
e
|
2166
|
-
erev 'It is a bacterial transcription initiation factor.'
|
2167
|
-
e
|
2168
|
-
erev 'It will enable the specific binding of RNA polymerase to gene promoters.'
|
2169
|
-
e
|
2170
|
-
erev 'Sigma factors vary, which allows the bacterial cell to respond to'
|
2171
|
-
erev 'different environmental signals.'
|
2172
|
-
e
|
2173
|
-
erev 'Every molecule of RNA polymerase holoenzyme will contain only one '\
|
2174
|
-
'sigma factor.'
|
2175
|
-
e
|
2176
|
-
erev 'The number of sigma factors varies between bacterial species.'
|
2177
|
-
e
|
2178
|
-
erev 'E. coli has seven sigma factors.'
|
2179
|
-
e
|
2180
|
-
erev 'Sigma factors are distinguished by their characteristic molecular '\
|
2181
|
-
'weights.'
|
2182
|
-
e
|
2183
|
-
erev 'For instance, sigma-70 refers to the sigma factor with a molecular '\
|
2184
|
-
'weight of 70 kDa.'
|
2185
|
-
e
|
2186
|
-
erev 'Once initiation of RNA transcription is complete, the sigma'
|
2187
|
-
erev 'factor can leave the complex.'
|
2188
|
-
e
|
2189
|
-
erev 'Sigmafactor rpoD 70 can be found here:'
|
2190
|
-
e ' '+simp('http://www.ncbi.nlm.nih.gov/gene/947567')
|
2191
|
-
end
|
2192
|
-
|
2193
|
-
# ========================================================================= #
|
2194
|
-
# === show_last_input
|
2195
|
-
#
|
2196
|
-
# sli can be used as command to access this method.
|
2197
|
-
# ========================================================================= #
|
2198
|
-
def show_last_input
|
2199
|
-
if readline_is_available?
|
2200
|
-
e sfancy(Readline::HISTORY[-1])
|
2201
|
-
Readline::HISTORY.pop
|
2202
|
-
end
|
2203
|
-
e "The last user input was: #{sfancy(@user_input)}"
|
2204
|
-
end
|
2205
|
-
|
2206
|
-
# ========================================================================= #
|
2207
|
-
# === show_mnemo
|
2208
|
-
#
|
2209
|
-
# A little helper-method to memorize things.
|
2210
|
-
# ========================================================================= #
|
2211
|
-
def show_mnemo
|
2212
|
-
e
|
2213
|
-
erev 'Amino Acids with negatively charged side groups: -'
|
2214
|
-
e sfancy(' D E')
|
2215
|
-
erev 'Amino Acids with positive charged side groups: +'
|
2216
|
-
e sfancy(' K R H')
|
2217
|
-
e
|
2218
|
-
e sfancy('Oxidoreduktasen:')+rev+' Oxidations-Reduktions-Reaktionen'
|
2219
|
-
e sfancy('Transferasen:')+rev+' Übertragung funktioneller Gruppen'
|
2220
|
-
e sfancy('Hydrolasen:')+rev+' Hydrolasereaktionen'
|
2221
|
-
e sfancy('Lyasen:')+rev+' Eliminierung von Gruppen unter '\
|
2222
|
-
'Ausbildung von Doppelbindungen'
|
2223
|
-
e sfancy('Isomerasen:')+rev+' Isomerisierungen'
|
2224
|
-
e sfancy('Ligasen:')+rev+' ATP-hydrolytic formation of bonds'
|
2225
|
-
e
|
2226
|
-
end
|
2227
|
-
|
2228
|
-
# ========================================================================= #
|
2229
|
-
# === show_histone_table
|
2230
|
-
# ========================================================================= #
|
2231
|
-
def show_histone_table
|
2232
|
-
erev 'The following table will show Calf Thymus Histones:'
|
2233
|
-
e
|
2234
|
-
erev 'Histone | number of residues | mass in kDa | n% Arginine | n% Lysine'
|
2235
|
-
erev ' H1 215 23.0 1 29'
|
2236
|
-
erev ' H2A 129 14.0 9 11'
|
2237
|
-
erev ' H2B 125 13.8 6 16'
|
2238
|
-
erev ' H3 135 15.3 13 10'
|
2239
|
-
erev ' H4 102 11.3 14 11'
|
2240
|
-
e
|
2241
|
-
end
|
2242
|
-
|
2243
|
-
# ========================================================================= #
|
2244
|
-
# === show_average_weight_of_an_aminoacid
|
2245
|
-
#
|
2246
|
-
# Show the average weight for an aminoacid that is part of a protein.
|
2247
|
-
# ========================================================================= #
|
2248
|
-
def show_average_weight_of_an_aminoacid
|
2249
|
-
erev 'The average molecular weight (MW) of an amino '\
|
2250
|
-
'acid is '+sfancy('110')+' Da.'
|
2251
|
-
end
|
2252
|
-
|
2253
|
-
# ========================================================================= #
|
2254
|
-
# === show_first_orf
|
2255
|
-
#
|
2256
|
-
# This will show the first ORF.
|
2257
|
-
#
|
2258
|
-
# Invocation example:
|
2259
|
-
#
|
2260
|
-
# show_first_orf
|
2261
|
-
#
|
2262
|
-
# ========================================================================= #
|
2263
|
-
def show_first_orf(
|
2264
|
-
of_this_sequence = dna_sequence_object?
|
2265
|
-
)
|
2266
|
-
_ = of_this_sequence
|
2267
|
-
return_all_possible_start_codons.each {|this_codon|
|
2268
|
-
if _.include? this_codon
|
2269
|
-
index = _.index(this_codon)
|
2270
|
-
sequence = _[index..-1]
|
2271
|
-
e rev+padding?+leading_5_prime+sfancy(sequence)+
|
2272
|
-
rev+trailing_3_prime+' (Start position at nucleotide: '+
|
2273
|
-
orange((index+1).to_s)+rev+')'
|
2274
|
-
else
|
2275
|
-
erev 'Not found the codon '+simp(this_codon)+rev+'.'
|
2276
|
-
end
|
2277
|
-
}
|
2278
|
-
end
|
2279
|
-
|
2280
|
-
# ========================================================================= #
|
2281
|
-
# === show_available_vectors
|
2282
|
-
# ========================================================================= #
|
2283
|
-
def show_available_vectors
|
2284
|
-
erev 'We will next try to show the available vectors.'
|
2285
|
-
erev 'For now, these are all file names that start with the '\
|
2286
|
-
'the prefix '+orange('vector_')+rev+'.'
|
2287
|
-
_ = return_available_vectors # Defined in bioroebe/shell.rb
|
2288
|
-
if _.empty?
|
2289
|
-
erev 'No vector-sequence was found.'
|
2290
|
-
else
|
2291
|
-
erev 'We found at the least one entry.'
|
2292
|
-
print ' '
|
2293
|
-
pp _
|
2294
|
-
erev 'Assigning the first one to the second sequence.'
|
2295
|
-
set_sequence_2(Bioroebe::Sequence.sequence_from_file(_.first))
|
2296
|
-
erev 'You can feedback this sequence via:'
|
2297
|
-
e
|
2298
|
-
erev ' seq2?'
|
2299
|
-
e
|
2300
|
-
end
|
2301
|
-
end
|
2302
|
-
|
2303
|
-
# ========================================================================= #
|
2304
|
-
# === report_current_genbank_version
|
2305
|
-
#
|
2306
|
-
# You can use this method to report the current genbank version.
|
2307
|
-
# ========================================================================= #
|
2308
|
-
def report_current_genbank_version(
|
2309
|
-
optional_arguments = nil
|
2310
|
-
)
|
2311
|
-
remote_url = 'https://www.ncbi.nlm.nih.gov/genbank/statistics/'
|
2312
|
-
if optional_arguments
|
2313
|
-
case optional_arguments
|
2314
|
-
when :also_report_the_URL
|
2315
|
-
erev 'We will obtain the latest Genbank version from the URL:'
|
2316
|
-
e
|
2317
|
-
erev " #{simp(remote_url)}"
|
2318
|
-
e
|
2319
|
-
end
|
2320
|
-
end
|
2321
|
-
remote_dataset = URI.open(remote_url).read.split(N)
|
2322
|
-
# ======================================================================= #
|
2323
|
-
# For the following Regex, see this link:
|
2324
|
-
#
|
2325
|
-
# https://rubular.com/r/XC97c7i6sR
|
2326
|
-
#
|
2327
|
-
# ======================================================================= #
|
2328
|
-
regex_to_use =
|
2329
|
-
/<td>(\d{1,3})<\/td><td>(.{1,3}\s{1,3}\d{4})<\/td><td>\d+<\/td><td>\d+<\/td><td>\d+<\/td><td>\d+<\/td><\/tr><\/tbody><\/table>$/
|
2330
|
-
_ = ''.dup
|
2331
|
-
is_open = false
|
2332
|
-
remote_dataset.each {|line|
|
2333
|
-
if line.include? '<table id="stats_table" summary="GENBANK AND WGS'
|
2334
|
-
_ << line
|
2335
|
-
is_open = true
|
2336
|
-
else
|
2337
|
-
_ << line if is_open
|
2338
|
-
if line.include? '</table>'
|
2339
|
-
is_open = false
|
2340
|
-
end
|
2341
|
-
end
|
2342
|
-
}
|
2343
|
-
_ =~ regex_to_use # Match the regex against the substring assigned to _.
|
2344
|
-
version = $1.to_s.dup
|
2345
|
-
month_and_year = $2.to_s.dup
|
2346
|
-
erev 'The current Genbank version is: '+simp(version)+
|
2347
|
-
rev+' (released on '+simp(month_and_year)+rev+')'
|
2348
|
-
end
|
2349
|
-
|
2350
|
-
# ========================================================================= #
|
2351
|
-
# === show_copyright_clause
|
2352
|
-
#
|
2353
|
-
# This method will simply show the licence used for the project.
|
2354
|
-
#
|
2355
|
-
# This has to be updated manually, though; and since the licence
|
2356
|
-
# may change one day, I will keep track when this method has been
|
2357
|
-
# last modified, which is on the 28.04.2020 (28th April, 2020).
|
2358
|
-
# ========================================================================= #
|
2359
|
-
def show_copyright_clause
|
2360
|
-
e
|
2361
|
-
erev 'This project is free software, licensed under the LGPL-2.0 license.'
|
2362
|
-
erev 'No "any later clause"; LGPL-2.0 applies to it.'
|
2363
|
-
e
|
2364
|
-
erev ' Copyright: Robert A. Heiler (2010-2020 and later)'
|
2365
|
-
e
|
2366
|
-
erev 'The biomart component is licensed under the MIT license and is'
|
2367
|
-
erev 'written by Darren Oakley. The MIT license is retained for the'
|
2368
|
-
erev 'Biomart component.'
|
2369
|
-
e
|
2370
|
-
erev '(Note that the bioroebe project used to be under the GPL licence'
|
2371
|
-
erev 'before some time; see the homepage of this gem for the explanation'
|
2372
|
-
erev 'as to why a switch occurred towards LGPL.)'
|
2373
|
-
end
|
2374
|
-
|
2375
|
-
# ========================================================================= #
|
2376
|
-
# === report_n_proteins_registered_in_swiss_prot
|
2377
|
-
#
|
2378
|
-
# This method will report how many proteins are registered in swiss-prot.
|
2379
|
-
#
|
2380
|
-
# Invoke this method like so:
|
2381
|
-
#
|
2382
|
-
# swiss-prot?
|
2383
|
-
#
|
2384
|
-
# ========================================================================= #
|
2385
|
-
def report_n_proteins_registered_in_swiss_prot
|
2386
|
-
regex_to_use = /contains (\d+) sequence entries/ # See: http://rubular.com/r/Bl9tHfheEx
|
2387
|
-
url = 'https://web.expasy.org/docs/relnotes/relstat.html'
|
2388
|
-
dataset = open(url).read
|
2389
|
-
dataset =~ regex_to_use
|
2390
|
-
n_registered_proteins = $1.to_s.dup
|
2391
|
-
erev 'There are '+simp(n_registered_proteins)+rev+' registered '\
|
2392
|
-
'proteins in the Swiss-Prot database.'
|
2393
|
-
erev "The URL used to determine this was: "\
|
2394
|
-
"#{simp(url)}"
|
2395
|
-
end
|
2396
|
-
|
2397
|
-
|
2398
|
-
# ========================================================================= #
|
2399
|
-
# === report_whether_readline_is_available
|
2400
|
-
# ========================================================================= #
|
2401
|
-
def report_whether_readline_is_available
|
2402
|
-
erev 'Is readline available? '+
|
2403
|
-
slateblue(
|
2404
|
-
verbose_truth(
|
2405
|
-
(Object.const_defined? :Readline)
|
2406
|
-
)
|
2407
|
-
)
|
2408
|
-
end
|
2409
|
-
|
2410
|
-
require 'bioroebe/dotplots/advanced_dotplot.rb'
|
2411
|
-
# ========================================================================= #
|
2412
|
-
# === show_2D_dotplot
|
2413
|
-
# ========================================================================= #
|
2414
|
-
def show_2D_dotplot(
|
2415
|
-
string1 = nil, string2 = nil
|
2416
|
-
)
|
2417
|
-
if string1.nil? and string2.nil?
|
2418
|
-
erev 'You want to use a dotplot.'
|
2419
|
-
erev 'Please provide the first string, which will be on the left side:'
|
2420
|
-
string1 = $stdin.gets.chomp
|
2421
|
-
erev 'Please provide the second string, which will be on the top side:'
|
2422
|
-
string2 = $stdin.gets.chomp
|
2423
|
-
end
|
2424
|
-
::Bioroebe::AdvancedDotplot.new(string1, string2)
|
2425
|
-
end
|
2426
|
-
|
2427
|
-
# ========================================================================= #
|
2428
|
-
# === show_reverse_dna_string
|
2429
|
-
#
|
2430
|
-
# This method will simply show the DNA sequence reversed.
|
2431
|
-
# ========================================================================= #
|
2432
|
-
def show_reverse_dna_string
|
2433
|
-
erev padding?+
|
2434
|
-
leading_five_prime+
|
2435
|
-
sfancy(return_reverse_dna_string)+
|
2436
|
-
rev+
|
2437
|
-
trailing_three_prime
|
2438
|
-
end
|
2439
|
-
|
2440
|
-
# ========================================================================= #
|
2441
|
-
# === show_download_dir
|
2442
|
-
# ========================================================================= #
|
2443
|
-
def show_download_dir
|
2444
|
-
erev ::Bioroebe.download_directory?
|
2445
|
-
end
|
2446
|
-
|
2447
|
-
# ========================================================================= #
|
2448
|
-
# === show_this_sequence_padded
|
2449
|
-
#
|
2450
|
-
# Usage example:
|
2451
|
-
#
|
2452
|
-
# show_this_sequence_padded ATGACTTAGCCACAACTGCATGCATATGCATGACTGACT
|
2453
|
-
#
|
2454
|
-
# ========================================================================= #
|
2455
|
-
def show_this_sequence_padded(
|
2456
|
-
i = dna_sequence_object?
|
2457
|
-
)
|
2458
|
-
if i.is_a? Array and i.empty?
|
2459
|
-
i << dna_sequence_object?
|
2460
|
-
end
|
2461
|
-
if i.is_a? Array
|
2462
|
-
i = i.join
|
2463
|
-
end
|
2464
|
-
# ======================================================================= #
|
2465
|
-
# First, split it into an array of 80 characters each.
|
2466
|
-
# ======================================================================= #
|
2467
|
-
array = i.scan(/.{,80}/).reject {|entry| entry.empty? }
|
2468
|
-
array.each {|entry|
|
2469
|
-
erev entry
|
2470
|
-
}
|
2471
|
-
end
|
2472
|
-
|
2473
|
-
require 'bioroebe/enzymes/restriction_enzymes_file.rb'
|
2474
|
-
# ========================================================================= #
|
2475
|
-
# === show_all_yaml_files
|
2476
|
-
#
|
2477
|
-
# We show which yaml files we will use here.
|
2478
|
-
# ========================================================================= #
|
2479
|
-
def show_all_yaml_files
|
2480
|
-
erev 'The file that holds our restriction enzymes can be found here:'
|
2481
|
-
e
|
2482
|
-
erev " #{sfile(::Bioroebe.restriction_enzymes_file)}"
|
2483
|
-
e
|
2484
|
-
end
|
2485
|
-
|
2486
|
-
# ========================================================================= #
|
2487
|
-
# === show_resources_about_the_horseradish_peroxidase
|
2488
|
-
# ========================================================================= #
|
2489
|
-
def show_resources_about_the_horseradish_peroxidase
|
2490
|
-
e 'https://www.ncbi.nlm.nih.gov/gene/?term=%22Horseradish+Peroxidase%22'
|
2491
|
-
e 'https://www.ncbi.nlm.nih.gov/gene/836533'
|
2492
|
-
e 'Fasta: https://www.ncbi.nlm.nih.gov/nuccore/NC_003076.8?report=fasta&from=25659257&to=25661007&strand=true'
|
2493
|
-
end
|
2494
|
-
|
2495
|
-
# ========================================================================= #
|
2496
|
-
# === report_whether_we_will_make_use_of_expand_cd_aliases
|
2497
|
-
# ========================================================================= #
|
2498
|
-
def report_whether_we_will_make_use_of_expand_cd_aliases
|
2499
|
-
erev Bioroebe::VerboseTruth[use_expand_cd_aliases?]
|
2500
|
-
end
|
2501
|
-
|
2502
|
-
# ========================================================================= #
|
2503
|
-
# === report_useful_packages_installed
|
2504
|
-
#
|
2505
|
-
# This aggregate method can be used to report versions that may be
|
2506
|
-
# installed on the given system, e. g. science-based projects and
|
2507
|
-
# similar variants.
|
2508
|
-
# ========================================================================= #
|
2509
|
-
def report_useful_packages_installed
|
2510
|
-
try_to_report_the_version_of_viennarna
|
2511
|
-
try_to_report_the_version_of_bedtools
|
2512
|
-
end
|
2513
|
-
|
2514
|
-
# ========================================================================= #
|
2515
|
-
# === try_to_report_the_version_of_viennarna
|
2516
|
-
#
|
2517
|
-
# This method can be used to see the version of ViennaRNA, if it is
|
2518
|
-
# installed at all.
|
2519
|
-
# ========================================================================= #
|
2520
|
-
def try_to_report_the_version_of_viennarna
|
2521
|
-
result = `RNAplfold --version 2>&1`
|
2522
|
-
if result.include? 'command not found'
|
2523
|
-
e
|
2524
|
-
erev 'ViennaRNA does not appear to be installed / available.'
|
2525
|
-
e
|
2526
|
-
if is_on_roebe?
|
2527
|
-
erev 'You may be able to install it via:'
|
2528
|
-
e
|
2529
|
-
erev ' rbt viennarna'
|
2530
|
-
e
|
2531
|
-
end
|
2532
|
-
else
|
2533
|
-
version = result.sub(/RNAplfold/,'').strip.to_s
|
2534
|
-
erev 'The version of ViennaRNA is: '+
|
2535
|
-
orange(version)+rev
|
2536
|
-
end
|
2537
|
-
end
|
2538
|
-
|
2539
|
-
# ========================================================================= #
|
2540
|
-
# === report_current_working_directory
|
2541
|
-
# ========================================================================= #
|
2542
|
-
def report_current_working_directory
|
2543
|
-
erev 'We are in the directory:'
|
2544
|
-
erev " #{sdir(return_working_directory)}"
|
2545
|
-
end
|
2546
|
-
|
2547
|
-
# ========================================================================= #
|
2548
|
-
# === report_which_yaml_engine_is_in_use
|
2549
|
-
# ========================================================================= #
|
2550
|
-
def report_which_yaml_engine_is_in_use
|
2551
|
-
erev 'The yaml engine in use is: '+
|
2552
|
-
sfancy(::Bioroebe.use_which_yaml_engine?)+
|
2553
|
-
rev
|
2554
|
-
end
|
2555
|
-
|
2556
|
-
begin
|
2557
|
-
require 'directory_paradise'
|
2558
|
-
rescue LoadError; end
|
2559
|
-
# ========================================================================= #
|
2560
|
-
# === show_file_listing
|
2561
|
-
#
|
2562
|
-
# Make use of DirectoryContent to show the content of a file.
|
2563
|
-
#
|
2564
|
-
# To invoke this method from within the Bioroebe::Shell, do:
|
2565
|
-
#
|
2566
|
-
# ll
|
2567
|
-
#
|
2568
|
-
# ========================================================================= #
|
2569
|
-
def show_file_listing(
|
2570
|
-
from_this_directory = Dir.pwd
|
2571
|
-
)
|
2572
|
-
_ = DirectoryParadise::Report.new(from_this_directory, :dont_run_yet)
|
2573
|
-
_.dont_report_total_filesize
|
2574
|
-
_.disable_colours unless use_colours?
|
2575
|
-
_.run
|
2576
|
-
end
|
2577
|
-
|
2578
|
-
# ========================================================================= #
|
2579
|
-
# === try_to_report_the_version_of_bedtools
|
2580
|
-
# ========================================================================= #
|
2581
|
-
def try_to_report_the_version_of_bedtools
|
2582
|
-
result = `bedtools --version 2>&1`
|
2583
|
-
if result.include? 'command not found'
|
2584
|
-
e
|
2585
|
-
erev 'The bedtools do not appear to be installed / available.'
|
2586
|
-
e
|
2587
|
-
if is_on_roebe?
|
2588
|
-
erev 'You may be able to install it via:'
|
2589
|
-
e
|
2590
|
-
erev ' rbt bedtools'
|
2591
|
-
e
|
2592
|
-
end
|
2593
|
-
else
|
2594
|
-
version = result.sub(/bedtools/,'').strip.to_s.delete('v')
|
2595
|
-
erev "The version of bedtools is: "\
|
2596
|
-
"#{orange(version)}#{rev}"
|
2597
|
-
end
|
2598
|
-
end
|
2599
|
-
|
2600
|
-
# ========================================================================= #
|
2601
|
-
# === three_to_one
|
2602
|
-
#
|
2603
|
-
# This method will translate, and output, a three-letter aminoacid
|
2604
|
-
# into the corresponding single-letter code.
|
2605
|
-
#
|
2606
|
-
# Invocation example:
|
2607
|
-
#
|
2608
|
-
# three_to_one Thr Thr Glu Ala Val Glu Ser Thr Val Ala Thr Leu Glu Asp Ser # => T T E A V E S T V A T L E D S
|
2609
|
-
# 3to1 ARG-ALA-SER-LEU-PHE-TRP-LYS-HIS-ASN-SER-VAL-LEU-ILE-VAL-PRO
|
2610
|
-
#
|
2611
|
-
# ========================================================================= #
|
2612
|
-
def three_to_one(i)
|
2613
|
-
if i.is_a? Array
|
2614
|
-
i = i.join('-').strip
|
2615
|
-
end
|
2616
|
-
e ::Bioroebe.three_to_one(i).strip
|
2617
|
-
end
|
2618
|
-
|
2619
|
-
require 'bioroebe/codons/codons.rb'
|
2620
|
-
# ========================================================================= #
|
2621
|
-
# === show_codons_of_this_aminoacid_or_show_kazusa_codon
|
2622
|
-
#
|
2623
|
-
# This method can be used to output which codon codes for a specific
|
2624
|
-
# aminoacid.
|
2625
|
-
#
|
2626
|
-
# The input to this method should be a specific codon, such as ATG or
|
2627
|
-
# GGC and so forth.
|
2628
|
-
#
|
2629
|
-
# If no input is provided, we will instead show the webpage of
|
2630
|
-
# kazusa.
|
2631
|
-
#
|
2632
|
-
# Invocation examples:
|
2633
|
-
#
|
2634
|
-
# codon? ATG # => M
|
2635
|
-
# codon? AUG # => M
|
2636
|
-
#
|
2637
|
-
# ========================================================================= #
|
2638
|
-
def show_codons_of_this_aminoacid_or_show_kazusa_codon(i = nil)
|
2639
|
-
if i.is_a? Array
|
2640
|
-
i = i.first
|
2641
|
-
end
|
2642
|
-
if i # If the user provided input, we check it.
|
2643
|
-
# ===================================================================== #
|
2644
|
-
# Next, find all codons for the given aminoacid.
|
2645
|
-
# ===================================================================== #
|
2646
|
-
e ::Bioroebe.codon_to_aminoacid(i)
|
2647
|
-
else
|
2648
|
-
erev "The URL is at: "\
|
2649
|
-
"#{simp('http://www.kazusa.or.jp/codon/')}"
|
2650
|
-
end
|
2651
|
-
end
|
2652
|
-
|
2653
|
-
# ========================================================================= #
|
2654
|
-
# === return_reverse_dna_string
|
2655
|
-
# ========================================================================= #
|
2656
|
-
def return_reverse_dna_string
|
2657
|
-
complement_sequence?.reverse
|
2658
|
-
end
|
2659
|
-
|
2660
|
-
# ========================================================================= #
|
2661
|
-
# === showorf (showorf tag)
|
2662
|
-
#
|
2663
|
-
# Use this method to show the open reading frame of a given sequence.
|
2664
|
-
#
|
2665
|
-
# We can also use it to selectively show a certain frame, such as
|
2666
|
-
# frame2. See class Bioroebe::ShowOrf for this.
|
2667
|
-
#
|
2668
|
-
# Note that in May 2020 (10.05.2020) class Bioroebe::ShowOrf here
|
2669
|
-
# was replaced with
|
2670
|
-
# ========================================================================= #
|
2671
|
-
def showorf(
|
2672
|
-
i = dna_sequence_object?,
|
2673
|
-
show_how_many_frames = :show_three_frames
|
2674
|
-
)
|
2675
|
-
i = dna_sequence_object? if i.nil?
|
2676
|
-
i = dna_sequence_object? if i.is_a?(Array) and i.empty?
|
2677
|
-
display_open_reading_frames(i) { show_how_many_frames }
|
2678
|
-
end
|
2679
|
-
|
2680
|
-
# ========================================================================= #
|
2681
|
-
# === display_open_reading_frames
|
2682
|
-
#
|
2683
|
-
# Invocation example:
|
2684
|
-
#
|
2685
|
-
# display_open_reading_frames ATGAGCAAGGCCGACTACGAGAAG
|
2686
|
-
#
|
2687
|
-
# ========================================================================= #
|
2688
|
-
def display_open_reading_frames(
|
2689
|
-
i = dna_sequence_object?, &block
|
2690
|
-
)
|
2691
|
-
i = i.first if i.is_a? Array
|
2692
|
-
i = dna_sequence_object? if i.nil?
|
2693
|
-
i = dna_sequence_object? if i.empty?
|
2694
|
-
require 'bioroebe/utility_scripts/display_open_reading_frames/display_open_reading_frames.rb'
|
2695
|
-
::Bioroebe::DisplayOpenReadingFrames.new(i, &block)
|
2696
|
-
end
|
2697
|
-
|
2698
|
-
require 'bioroebe/fasta_and_fastq/show_fasta_headers.rb'
|
2699
|
-
# ========================================================================= #
|
2700
|
-
# === show_fasta_headers
|
2701
|
-
#
|
2702
|
-
# Just show the fasta headers.
|
2703
|
-
# ========================================================================= #
|
2704
|
-
def show_fasta_headers(i)
|
2705
|
-
::Bioroebe::ShowFastaHeaders.new(i) # Delegate into class Bioroebe::ShowFastaHeaders.
|
2706
|
-
end
|
2707
|
-
|
2708
|
-
# ========================================================================= #
|
2709
|
-
# === show_commandline_options
|
2710
|
-
#
|
2711
|
-
# Show the available commandline options.
|
2712
|
-
#
|
2713
|
-
# To invoke this method from the commandline, do:
|
2714
|
-
#
|
2715
|
-
# bioroebe --help
|
2716
|
-
#
|
2717
|
-
# ========================================================================= #
|
2718
|
-
def show_commandline_options
|
2719
|
-
e
|
2720
|
-
ecomment(' --silent # perform a silent startup')
|
2721
|
-
ecomment(' --sequence # use this nucleotide sequence on '\
|
2722
|
-
'startup; can be a number too such as 150')
|
2723
|
-
ecomment(' --n_fasta_entries # report how many fasta '\
|
2724
|
-
'entries are in this directory')
|
2725
|
-
ecomment(' --disable-opn # permanently disable opn')
|
2726
|
-
ecomment(' --random-aminoacids=33 # "generate" 33 random amino acids and display them')
|
2727
|
-
ecomment(' --n-aminoacids=33 # an alias to the ^^^ above')
|
2728
|
-
ecomment(' --protein-to-dna # convert protein-aminoacid '\
|
2729
|
-
'sequence back to DNA')
|
2730
|
-
e
|
2731
|
-
exit
|
2732
|
-
end
|
2733
|
-
|
2734
|
-
# ========================================================================= #
|
2735
|
-
# === show_codon_table
|
2736
|
-
# ========================================================================= #
|
2737
|
-
def show_codon_table(i = nil)
|
2738
|
-
if i and i.is_a?(Array) and i.empty?
|
2739
|
-
i << 1 # Default to the vertebrate codon table in this case.
|
2740
|
-
end
|
2741
|
-
ShowThisCodonTable.new(i)
|
2742
|
-
end
|
2743
|
-
|
2744
|
-
# ========================================================================= #
|
2745
|
-
# === show_rna_sequence
|
2746
|
-
#
|
2747
|
-
# Use this method to convert a given sequence to RNA.
|
2748
|
-
# ========================================================================= #
|
2749
|
-
def show_rna_sequence(
|
2750
|
-
i = sequence_object?.to_rna
|
2751
|
-
)
|
2752
|
-
i = sequence_object?.to_rna if i.nil?
|
2753
|
-
i = i.to_str if i.respond_to? :to_str
|
2754
|
-
if i.include? 'T'
|
2755
|
-
i.tr!('T','U')
|
2756
|
-
end
|
2757
|
-
display_nucleotide_object?.display(i) {{ use_this_as_padding: lpad? }}
|
2758
|
-
end
|
2759
|
-
|
2760
|
-
# ========================================================================= #
|
2761
|
-
# === report_size_of
|
2762
|
-
# ========================================================================= #
|
2763
|
-
def report_size_of(
|
2764
|
-
i = nil
|
2765
|
-
)
|
2766
|
-
if i.nil?
|
2767
|
-
i = dna_sequence_object?
|
2768
|
-
end
|
2769
|
-
if i
|
2770
|
-
erev "This sequence contains #{sfancy(i.size.to_s)}#{rev} nucleotides."
|
2771
|
-
else
|
2772
|
-
report_size_of_main_string
|
2773
|
-
end
|
2774
|
-
end
|
2775
|
-
|
2776
|
-
# ========================================================================= #
|
2777
|
-
# === display_glycolysis_pathway
|
2778
|
-
#
|
2779
|
-
# This method will show the glycolysis Pathway.
|
2780
|
-
# ========================================================================= #
|
2781
|
-
def display_glycolysis_pathway
|
2782
|
-
array = Pathways.glycolysis_pathway # Obtain the glyclosis pathway, as Array.
|
2783
|
-
if Object.const_defined? :Display
|
2784
|
-
Display.display(array, ')')
|
2785
|
-
else
|
2786
|
-
array.each {|entry| e ' - '+entry }
|
2787
|
-
end
|
2788
|
-
end
|
2789
|
-
|
2790
|
-
# ========================================================================= #
|
2791
|
-
# === show_the_weight_of_some_common_proteins
|
2792
|
-
# ========================================================================= #
|
2793
|
-
def show_the_weight_of_some_common_proteins(
|
2794
|
-
use_this_file = FILE_WEIGHT_OF_COMMON_PROTEINS
|
2795
|
-
)
|
2796
|
-
erev 'Showing the weight of some common proteins next (in kDa):'
|
2797
|
-
e
|
2798
|
-
dataset = File.readlines(use_this_file).select {|line|
|
2799
|
-
line.include? ' # '
|
2800
|
-
}
|
2801
|
-
dataset.each {|line|
|
2802
|
-
splitted = line.split(':')
|
2803
|
-
key = splitted[0]
|
2804
|
-
value = splitted[1 .. -1].join(' ').strip
|
2805
|
-
erev " #{(key+':').ljust(25)} "\
|
2806
|
-
"#{lightblue((value.to_s+' kDa').rjust(12))}"
|
2807
|
-
}
|
2808
|
-
e
|
2809
|
-
end
|
2810
|
-
|
2811
|
-
# ========================================================================= #
|
2812
|
-
# === show_protein_composition
|
2813
|
-
#
|
2814
|
-
# Delegate towards class CountAmountOfAminoacids
|
2815
|
-
# ========================================================================= #
|
2816
|
-
def show_protein_composition(i)
|
2817
|
-
::Bioroebe::CountAmountOfAminoacids.new(i) # bl $BIOROEBE/count_amount_of_aminoacids.rb
|
2818
|
-
end
|
2819
|
-
|
2820
|
-
# ========================================================================= #
|
2821
|
-
# === show_all_deducible_aminoacid_sequences
|
2822
|
-
#
|
2823
|
-
# Note that if the string is too short, we won't display the other frames.
|
2824
|
-
#
|
2825
|
-
# If the third argument, `show_translations_aligned`, is set to
|
2826
|
-
# true then we will additionally display all 3 frames aligned
|
2827
|
-
# one to another.
|
2828
|
-
#
|
2829
|
-
# Usage example:
|
2830
|
-
#
|
2831
|
-
# toproteins AUG
|
2832
|
-
# toproteins AUGAUGUUGAAU
|
2833
|
-
# toproteins AUG-AUG-UUG-AAA-GGU-CGC-AAU-STOP
|
2834
|
-
#
|
2835
|
-
# ========================================================================= #
|
2836
|
-
def show_all_deducible_aminoacid_sequences(
|
2837
|
-
i = dna_sequence_as_string?,
|
2838
|
-
also_show_numbers = true,
|
2839
|
-
show_translations_aligned = true
|
2840
|
-
)
|
2841
|
-
if i and i.is_a?(Array) and i.empty?
|
2842
|
-
i = dna_sequence_as_string?
|
2843
|
-
end
|
2844
|
-
i = dna_sequence_as_string? if i.nil?
|
2845
|
-
i = i.join(' ').strip if i.is_a? Array
|
2846
|
-
i = i.to_s.dup # To avoid nil-operations.
|
2847
|
-
i.delete!('-') if i.include? '-'
|
2848
|
-
if i.empty? # This means that the user has not yet assigned a DNA sequence.
|
2849
|
-
erev 'Please assign some DNA sequence. You can also randomly generate'
|
2850
|
-
erev 'a new sequence via "random".'
|
2851
|
-
return
|
2852
|
-
end
|
2853
|
-
cliner
|
2854
|
-
erev N+'The amino acid sequence for '+sfancy('Frame 1')+rev+' is: '
|
2855
|
-
e
|
2856
|
-
converted_sequence_for_frame_1 = translate_dna_into_aminoacid(i).to_s
|
2857
|
-
erev ' '+converted_sequence_for_frame_1+N+N
|
2858
|
-
# ======================================================================= #
|
2859
|
-
# === Also show numbers
|
2860
|
-
# ======================================================================= #
|
2861
|
-
if also_show_numbers
|
2862
|
-
verbose_report_numbered_amino_acid_sequence(converted_sequence_for_frame_1)
|
2863
|
-
end
|
2864
|
-
cliner
|
2865
|
-
if i && i.size > 2
|
2866
|
-
erev N+N+'The amino acid sequence for '+sfancy('Frame 2')+rev+' is: '
|
2867
|
-
e
|
2868
|
-
converted_sequence_for_frame_2 = translate_dna_into_aminoacid_frame2(i)
|
2869
|
-
erev ' '+converted_sequence_for_frame_2+N+N
|
2870
|
-
if also_show_numbers
|
2871
|
-
verbose_report_numbered_amino_acid_sequence(converted_sequence_for_frame_2, '2')
|
2872
|
-
end
|
2873
|
-
cliner
|
2874
|
-
e
|
2875
|
-
erev N+N+'The amino acid sequence for '+sfancy('Frame 3')+rev+' is: '
|
2876
|
-
e
|
2877
|
-
converted_sequence_for_frame_3 = translate_dna_into_aminoacid_frame3(i)
|
2878
|
-
erev ' '+converted_sequence_for_frame_3+N+N
|
2879
|
-
if also_show_numbers
|
2880
|
-
verbose_report_numbered_amino_acid_sequence(converted_sequence_for_frame_3, '3')
|
2881
|
-
end
|
2882
|
-
e
|
2883
|
-
cliner
|
2884
|
-
if show_translations_aligned
|
2885
|
-
showorf(i) # Delegate into class Showorf here.
|
2886
|
-
end
|
2887
|
-
end
|
2888
|
-
end
|
2889
|
-
|
2890
|
-
# ========================================================================= #
|
2891
|
-
# === show_blosum_matrix
|
2892
|
-
#
|
2893
|
-
# Delegate towards bioroebe here, and invoke the .blosum() method.
|
2894
|
-
# ========================================================================= #
|
2895
|
-
def show_blosum_matrix
|
2896
|
-
erev 'Showing the blosum matrix next:'
|
2897
|
-
require 'bioroebe/blosum/blosum.rb'
|
2898
|
-
Bioroebe::Blosum.show_matrix
|
2899
|
-
end
|
2900
|
-
|
2901
|
-
end; end
|