bioroebe 0.12.24 → 0.13.31
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENSE.md +7 -8
- data/README.md +566 -354
- data/bin/all_positions_of_this_nucleotide +1 -1
- data/bin/aminoacid_frequencies +1 -1
- data/bin/automatically_rename_this_fasta_file +1 -1
- data/bin/base_composition +1 -1
- data/bin/batch_create_windows_executables +1 -1
- data/bin/bioroebe +12 -1
- data/bin/bioroebe_cat +7 -0
- data/bin/calculate_exponential_growth +7 -0
- data/bin/calculate_n50_value +1 -1
- data/bin/calculate_the_frequencies_of_this_species +7 -0
- data/bin/chunked_display +1 -1
- data/bin/codon_frequency +1 -1
- data/bin/codon_to_aminoacid +1 -1
- data/bin/colourize_this_fasta_sequence +1 -1
- data/bin/complementary_dna_strand +1 -1
- data/bin/complementary_rna_strand +1 -1
- data/bin/consensus_sequence +1 -1
- data/bin/dna_to_rna +1 -1
- data/bin/downcase_chunked_display +1 -1
- data/bin/download_this_pdb +1 -1
- data/bin/fasta_index +1 -1
- data/bin/fetch_data_from_uniprot +1 -1
- data/bin/filter_away_invalid_nucleotides +1 -1
- data/bin/find_substring +1 -1
- data/bin/input_as_dna +1 -1
- data/bin/is_palindrome +1 -1
- data/bin/leading_five_prime +1 -1
- data/bin/longest_ORF +1 -1
- data/bin/longest_substring +1 -1
- data/bin/open_reading_frames +1 -1
- data/bin/partner_nucleotide +1 -1
- data/bin/plain_palindrome +1 -1
- data/bin/random_dna_sequence +1 -1
- data/bin/random_sequence +1 -1
- data/bin/raw_hamming_distance +1 -1
- data/bin/return_longest_substring_via_LCS_algorithm +1 -1
- data/bin/reverse_sequence +1 -1
- data/bin/short_aminoacid_letter_from_long_aminoacid_name +1 -1
- data/bin/show_atomic_composition +1 -1
- data/bin/show_fasta_header +1 -1
- data/bin/show_nucleotide_sequence +1 -1
- data/bin/show_this_dna_sequence +1 -1
- data/bin/show_time_now +7 -0
- data/bin/sort_aminoacid_based_on_its_hydrophobicity +1 -1
- data/bin/strict_filter_away_invalid_aminoacids +1 -1
- data/{lib/bioroebe/base/reset.rb → bin/three_delimiter} +9 -6
- data/bin/three_to_one +1 -1
- data/bin/to_rna +1 -1
- data/bin/trailing_three_prime +1 -1
- data/bin/upcase_this_aminoacid_sequence_and_remove_numbers +1 -1
- data/bioroebe.gemspec +6 -7
- data/doc/README.gen +534 -322
- data/doc/blosum/blosum.md +4 -0
- data/doc/compatibility/BIO_PHP.md +20 -18
- data/doc/compatibility/README.md +2 -3
- data/doc/compatibility/emboss.md +5 -3
- data/doc/{extensive_usage_example.md → extensive_usage_example/extensive_usage_example.md} +4 -2
- data/doc/{instructions_for_the_taxonomy_subproject.md → instructions_for_the_taxonomy_subproject/instructions_for_the_taxonomy_subproject.md} +36 -33
- data/doc/{legacy_paths.md → legacy_paths/legacy_paths.md} +3 -3
- data/doc/statistics/statistics.md +12 -10
- data/doc/todo/bioroebe_GUI_todo.md +6 -1
- data/doc/todo/bioroebe_java_todo.md +3 -2
- data/doc/todo/bioroebe_todo.md +328 -310
- data/doc/{using_biomart.md → using_biomart/using_biomart.md} +7 -3
- data/lib/bioroebe/abstract/features.rb +0 -0
- data/lib/bioroebe/aminoacids/aminoacid_substitution.rb +1 -1
- data/lib/bioroebe/aminoacids/aminoacids_mass_table.rb +3 -1
- data/lib/bioroebe/aminoacids/codon_percentage.rb +18 -10
- data/lib/bioroebe/aminoacids/create_random_aminoacids.rb +5 -2
- data/lib/bioroebe/aminoacids/deduce_aminoacid_sequence.rb +90 -64
- data/lib/bioroebe/aminoacids/display_aminoacid_table.rb +1 -3
- data/lib/bioroebe/aminoacids/show_hydrophobicity.rb +2 -2
- data/lib/bioroebe/annotations/create_annotation_format.rb +2 -2
- data/lib/bioroebe/base/base.rb +101 -6
- data/lib/bioroebe/base/base_module/base_module.rb +9 -1
- data/lib/bioroebe/base/colours.rb +3 -0
- data/lib/bioroebe/base/colours_for_base/colours_for_base.rb +80 -44
- data/lib/bioroebe/base/commandline_application/README.md +1 -1
- data/lib/bioroebe/base/commandline_application/commandline_application.rb +661 -22
- data/lib/bioroebe/base/commandline_application/commandline_arguments.rb +2 -1
- data/lib/bioroebe/base/infer_the_namespace_module/infer_the_namespace_module.rb +37 -0
- data/lib/bioroebe/base/internal_hash_module/internal_hash_module.rb +1 -6
- data/lib/bioroebe/base/prototype/prototype.rb +155 -14
- data/lib/bioroebe/biomart/attribute.rb +1 -1
- data/lib/bioroebe/biomart/biomart.rb +8 -9
- data/lib/bioroebe/biomart/server.rb +1 -1
- data/lib/bioroebe/blosum/blosum.rb +2 -2
- data/lib/bioroebe/calculate/calculate_blosum_score.rb +5 -3
- data/lib/bioroebe/calculate/calculate_gc_content.rb +1 -1
- data/lib/bioroebe/calculate/calculate_levensthein_distance.rb +5 -3
- data/lib/bioroebe/calculate/calculate_melting_temperature.rb +2 -10
- data/lib/bioroebe/calculate/calculate_melting_temperature_for_more_than_thirteen_nucleotides.rb +6 -15
- data/lib/bioroebe/calculate/calculate_the_position_specific_scoring_matrix.rb +4 -2
- data/lib/bioroebe/cell/cell.rb +3 -2
- data/lib/bioroebe/cell/specialized_cells/B_cell.rb +60 -0
- data/lib/bioroebe/cell/specialized_cells/Macrophage.rb +60 -0
- data/lib/bioroebe/cell/specialized_cells/README.md +5 -0
- data/lib/bioroebe/cell/specialized_cells/T_cell.rb +60 -0
- data/lib/bioroebe/cleave_and_digest/cleave.rb +3 -1
- data/lib/bioroebe/cleave_and_digest/digestion.rb +1 -1
- data/lib/bioroebe/codon_tables/frequencies/10090_Mus_musculus.yml +93 -0
- data/lib/bioroebe/codon_tables/frequencies/107243_Thlaspi_caerulescens.yml +72 -0
- data/lib/bioroebe/codon_tables/frequencies/parse_frequency_table.rb +2 -2
- data/lib/bioroebe/codons/codon_table.rb +10 -2
- data/lib/bioroebe/codons/codons.rb +3 -3
- data/lib/bioroebe/codons/convert_this_codon_to_that_aminoacid.rb +18 -15
- data/lib/bioroebe/codons/determine_optimal_codons.rb +1 -1
- data/lib/bioroebe/codons/possible_codons_for_this_aminoacid.rb +4 -2
- data/lib/bioroebe/codons/show_codon_tables.rb +1 -1
- data/lib/bioroebe/codons/show_codon_usage.rb +1 -2
- data/lib/bioroebe/codons/show_this_codon_table.rb +2 -2
- data/lib/bioroebe/codons/start_codons.rb +7 -3
- data/lib/bioroebe/colours/colour_schemes/README.md +1 -1
- data/lib/bioroebe/colours/colour_schemes/array_available_colour_schemes.rb +3 -3
- data/lib/bioroebe/colours/colour_schemes/colour_scheme.rb +3 -3
- data/lib/bioroebe/colours/colour_schemes/colour_scheme_demo.rb +4 -3
- data/lib/bioroebe/colours/colour_schemes/helix.rb +3 -1
- data/lib/bioroebe/colours/colour_schemes/hydropathy.rb +3 -1
- data/lib/bioroebe/colours/colour_schemes/score.rb +13 -2
- data/lib/bioroebe/colours/colour_schemes/strand.rb +3 -1
- data/lib/bioroebe/colours/colour_schemes/turn.rb +3 -1
- data/lib/bioroebe/colours/colour_schemes/zappo.rb +1 -1
- data/lib/bioroebe/{toplevel_methods/colourize_related_methods.rb → colours/colourize_related_code.rb} +1 -3
- data/lib/bioroebe/colours/colourize_sequence.rb +3 -1
- data/lib/bioroebe/colours/colours.rb +172 -15
- data/lib/bioroebe/configuration/configuration.rb +1 -1
- data/lib/bioroebe/constants/GUIs.rb +2 -2
- data/lib/bioroebe/constants/constants.rb +1349 -0
- data/lib/bioroebe/conversions/convert_aminoacid_to_dna.rb +8 -13
- data/lib/bioroebe/conversions/dna_to_aminoacid_sequence.rb +9 -3
- data/lib/bioroebe/count/count_amount_of_aminoacids.rb +11 -10
- data/lib/bioroebe/count/count_amount_of_nucleotides.rb +1 -1
- data/lib/bioroebe/count/count_at.rb +2 -1
- data/lib/bioroebe/databases/download_taxonomy_database.rb +1 -1
- data/lib/bioroebe/dotplots/advanced_dotplot.rb +2 -2
- data/lib/bioroebe/electron_microscopy/coordinate_analyzer.rb +2 -2
- data/lib/bioroebe/electron_microscopy/fix_pos_file.rb +2 -2
- data/lib/bioroebe/electron_microscopy/flipy.rb +2 -2
- data/lib/bioroebe/electron_microscopy/generate_em2em_file.rb +3 -11
- data/lib/bioroebe/electron_microscopy/parse_coordinates.rb +6 -6
- data/lib/bioroebe/electron_microscopy/read_file_xmd.rb +6 -6
- data/lib/bioroebe/electron_microscopy/simple_star_file_generator.rb +2 -2
- data/lib/bioroebe/enzymes/has_this_restriction_enzyme.rb +1 -1
- data/lib/bioroebe/enzymes/restriction_enzyme.rb +1 -1
- data/lib/bioroebe/enzymes/restriction_enzymes/statistics.rb +4 -3
- data/lib/bioroebe/enzymes/restriction_enzymes_file.rb +1 -1
- data/lib/bioroebe/enzymes/return_sequence_that_is_cut_via_restriction_enzyme.rb +4 -3
- data/lib/bioroebe/enzymes/show_restriction_enzymes.rb +3 -3
- data/lib/bioroebe/ext/main.cpp +0 -1
- data/lib/bioroebe/fasta_and_fastq/autocorrect_the_name_of_this_fasta_file.rb +3 -3
- data/lib/bioroebe/fasta_and_fastq/compact_fasta_file/compact_fasta_file.rb +1 -1
- data/lib/bioroebe/fasta_and_fastq/display_how_many_fasta_entries_are_in_this_directory.rb +1 -1
- data/lib/bioroebe/fasta_and_fastq/download_fasta.rb +8 -14
- data/lib/bioroebe/fasta_and_fastq/fasta_defline/fasta_defline.rb +1 -1
- data/lib/bioroebe/fasta_and_fastq/fasta_to_yaml/fasta_to_yaml.rb +1 -1
- data/lib/bioroebe/fasta_and_fastq/fastq_format_explainer.rb +1 -1
- data/lib/bioroebe/fasta_and_fastq/length_modifier/length_modifier.rb +1 -1
- data/lib/bioroebe/fasta_and_fastq/parse_fasta/parse_fasta.rb +37 -11
- data/lib/bioroebe/fasta_and_fastq/parse_fastq/parse_fastq.rb +2 -2
- data/lib/bioroebe/fasta_and_fastq/return_fasta_subsection_of_this_file.rb +1 -1
- data/lib/bioroebe/fasta_and_fastq/show_fasta_headers.rb +5 -13
- data/lib/bioroebe/fasta_and_fastq/show_fasta_statistics.rb +1 -1
- data/lib/bioroebe/fasta_and_fastq/simplify_fasta_header/simplify_fasta_header.rb +1 -1
- data/lib/bioroebe/fasta_and_fastq/split_this_fasta_file_into_chromosomes/reset.rb +3 -6
- data/lib/bioroebe/fasta_and_fastq/split_this_fasta_file_into_chromosomes/split_this_fasta_file_into_chromosomes.rb +3 -3
- data/lib/bioroebe/genbank/genbank_flat_file_format_generator.rb +20 -11
- data/lib/bioroebe/genome/genome.rb +1 -1
- data/lib/bioroebe/genomes/genome_pattern.rb +17 -16
- data/lib/bioroebe/genomes/genome_retriever.rb +4 -2
- data/lib/bioroebe/gui/experimental/snapgene/snapgene.rb +10 -13
- data/lib/bioroebe/gui/universal_widgets/alignment/alignment.rb +557 -0
- data/lib/bioroebe/gui/{gtk3 → universal_widgets}/aminoacid_composition/aminoacid_composition.rb +498 -198
- data/lib/bioroebe/gui/universal_widgets/anti_sense_strand/anti_sense_strand.rb +665 -0
- data/lib/bioroebe/gui/universal_widgets/blosum_matrix_viewer/blosum_matrix_viewer.rb +329 -0
- data/lib/bioroebe/gui/universal_widgets/calculate_cell_numbers_of_bacteria/calculate_cell_numbers_of_bacteria.rb +423 -0
- data/lib/bioroebe/gui/{gtk3 → universal_widgets}/controller/controller.rb +170 -118
- data/lib/bioroebe/gui/{gtk3 → universal_widgets}/dna_to_aminoacid_widget/dna_to_aminoacid_widget.rb +277 -215
- data/lib/bioroebe/gui/{shared_code/dna_to_reverse_complement_widget/dna_to_reverse_complement_widget_module.rb → universal_widgets/dna_to_reverse_complement_widget/dna_to_reverse_complement_widget.rb} +297 -107
- data/lib/bioroebe/gui/universal_widgets/fasta_table_widget/fasta_table_widget.rb +643 -0
- data/lib/bioroebe/gui/{gtk3 → universal_widgets}/format_converter/format_converter.rb +236 -164
- data/lib/bioroebe/gui/universal_widgets/gene/gene.rb +278 -0
- data/lib/bioroebe/gui/universal_widgets/hamming_distance/hamming_distance.rb +646 -0
- data/lib/bioroebe/gui/{shared_code/levensthein_distance/levensthein_distance_module.rb → universal_widgets/levensthein_distance/levensthein_distance.rb} +313 -88
- data/lib/bioroebe/gui/{gtk3 → universal_widgets}/nucleotide_analyser/nucleotide_analyser.rb +281 -189
- data/lib/bioroebe/gui/{gtk3 → universal_widgets}/parse_pdb_file/parse_pdb_file.rb +265 -149
- data/lib/bioroebe/gui/{gtk3 → universal_widgets}/primer_design_widget/primer_design_widget.rb +337 -263
- data/lib/bioroebe/gui/universal_widgets/protein_to_DNA/protein_to_DNA.rb +408 -0
- data/lib/bioroebe/gui/{gtk3 → universal_widgets}/random_sequence/random_sequence.rb +245 -187
- data/lib/bioroebe/gui/{gtk3 → universal_widgets}/restriction_enzymes/restriction_enzymes.rb +207 -137
- data/lib/bioroebe/gui/universal_widgets/shell/shell.rb +288 -0
- data/lib/bioroebe/gui/{gtk3/show_codon_table/misc.rb → universal_widgets/show_codon_table/show_codon_table.rb} +290 -110
- data/lib/bioroebe/gui/{shared_code/show_codon_usage/show_codon_usage_module.rb → universal_widgets/show_codon_usage/show_codon_usage.rb} +228 -47
- data/lib/bioroebe/gui/{gtk3 → universal_widgets}/sizeseq/sizeseq.rb +151 -69
- data/lib/bioroebe/gui/{gtk3 → universal_widgets}/three_to_one/three_to_one.rb +190 -127
- data/lib/bioroebe/gui/{gtk3 → universal_widgets}/www_finder/www_finder.rb +211 -152
- data/lib/bioroebe/images/images.html +953 -1170
- data/lib/bioroebe/images/misc/README.md +6 -0
- data/lib/bioroebe/images/misc/activation.avif +0 -0
- data/lib/bioroebe/images/misc/inhibition.avif +0 -0
- data/lib/bioroebe/images/misc/small_virus_logo.avif +0 -0
- data/lib/bioroebe/{constants/base_directory.rb → log_directory/log_directory.rb} +79 -59
- data/lib/bioroebe/matplotlib/matplotlib_generator.rb +1 -1
- data/lib/bioroebe/misc/quiz/three_letter_to_aminoacid.rb +1 -1
- data/lib/bioroebe/misc/ruler.rb +5 -5
- data/lib/bioroebe/misc/useful_formulas.rb +3 -3
- data/lib/bioroebe/ncbi/efetch.rb +1 -2
- data/lib/bioroebe/ngs/phred_quality_score_table.rb +3 -3
- data/lib/bioroebe/nucleotides/complementary_dna_strand.rb +3 -6
- data/lib/bioroebe/nucleotides/molecular_weight_of_nucleotides.rb +3 -3
- data/lib/bioroebe/nucleotides/most_likely_nucleotide_sequence_for_this_aminoacid_sequence.rb +6 -10
- data/lib/bioroebe/nucleotides/{show_nucleotide_sequence.rb → show_nucleotide_sequence/show_nucleotide_sequence.rb} +377 -255
- data/lib/bioroebe/palindromes/palindrome_2D_structure.rb +1 -1
- data/lib/bioroebe/palindromes/palindrome_finder.rb +1 -1
- data/lib/bioroebe/palindromes/palindrome_generator.rb +2 -10
- data/lib/bioroebe/parsers/biolang_parser.rb +1 -1
- data/lib/bioroebe/parsers/blosum_parser.rb +14 -19
- data/lib/bioroebe/parsers/genbank_parser.rb +2 -6
- data/lib/bioroebe/parsers/gff.rb +9 -9
- data/lib/bioroebe/parsers/parse_embl.rb +2 -6
- data/lib/bioroebe/parsers/stride_parser.rb +4 -12
- data/lib/bioroebe/patterns/analyse_glycosylation_pattern.rb +2 -2
- data/lib/bioroebe/patterns/is_this_sequence_a_EGF2_pattern.rb +6 -3
- data/lib/bioroebe/patterns/profile_pattern.rb +2 -2
- data/lib/bioroebe/patterns/rgg_scanner.rb +4 -2
- data/lib/bioroebe/{protein_structure → pdb_and_protein_structure}/alpha_helix.rb +2 -2
- data/lib/bioroebe/{pdb → pdb_and_protein_structure}/download_this_pdb.rb +2 -3
- data/lib/bioroebe/{pdb → pdb_and_protein_structure}/fetch_fasta_sequence_from_pdb.rb +4 -4
- data/lib/bioroebe/{protein_structure → pdb_and_protein_structure}/helical_wheel.rb +2 -2
- data/lib/bioroebe/{pdb → pdb_and_protein_structure}/parse_mmCIF_file.rb +1 -1
- data/lib/bioroebe/{pdb → pdb_and_protein_structure}/parse_pdb_file.rb +3 -3
- data/lib/bioroebe/{pdb → pdb_and_protein_structure}/report_secondary_structures_from_this_pdb_file.rb +3 -3
- data/lib/bioroebe/project/project.rb +3 -1
- data/lib/bioroebe/raw_sequence/README.md +8 -8
- data/lib/bioroebe/raw_sequence/raw_sequence.rb +11 -2
- data/lib/bioroebe/regexes/regexes.rb +1 -2
- data/lib/bioroebe/requires/commandline_application.rb +3 -1
- data/lib/bioroebe/requires/require_all_pdb_files.rb +1 -1
- data/lib/bioroebe/requires/require_all_taxonomy_files.rb +1 -1
- data/lib/bioroebe/requires/require_all_utility_scripts_files.rb +10 -0
- data/lib/bioroebe/requires/require_colours.rb +1 -1
- data/lib/bioroebe/requires/require_the_bioroebe_project.rb +5 -7
- data/lib/bioroebe/requires/require_the_bioroebe_sinatra_components.rb +1 -1
- data/lib/bioroebe/requires/require_the_constants.rb +2 -14
- data/lib/bioroebe/requires/require_yaml.rb +7 -5
- data/lib/bioroebe/sequence/alignment.rb +1 -1
- data/lib/bioroebe/sequence/dna.rb +4 -2
- data/lib/bioroebe/sequence/nucleotide_module/nucleotide_module.rb +22 -8
- data/lib/bioroebe/sequence/protein.rb +2 -2
- data/lib/bioroebe/sequence/reverse_complement.rb +3 -3
- data/lib/bioroebe/sequence/rna.rb +9 -8
- data/lib/bioroebe/sequence/sequence.rb +3 -3
- data/lib/bioroebe/shell/configuration/additionally_set_xorg_buffer.yml +0 -0
- data/lib/bioroebe/shell/configuration/may_we_show_the_startup_information.yml +0 -0
- data/lib/bioroebe/shell/configuration/upcase_nucleotides.yml +0 -0
- data/lib/bioroebe/shell/configuration/use_silent_startup.yml +1 -1
- data/lib/bioroebe/shell/help/class.rb +68 -19
- data/lib/bioroebe/shell/menu.rb +5244 -5322
- data/lib/bioroebe/shell/{readline/readline.rb → readline.rb} +1 -3
- data/lib/bioroebe/shell/shell.rb +11240 -453
- data/lib/bioroebe/siRNA/siRNA.rb +3 -3
- data/lib/bioroebe/{gui/shared_code/blosum_matrix_viewer/blosum_matrix_viewer_module.rb → sinatra/sinatra_interface.rb} +28 -19
- data/lib/bioroebe/{www/sinatra/sinatra.rb → sinatra/sinatra_wrapper.rb} +731 -754
- data/lib/bioroebe/string_matching/find_longest_substring.rb +2 -10
- data/lib/bioroebe/string_matching/find_longest_substring_via_LCS_algorithm.rb +4 -14
- data/lib/bioroebe/string_matching/hamming_distance.rb +11 -10
- data/lib/bioroebe/string_matching/levensthein.rb +5 -17
- data/lib/bioroebe/string_matching/simple_string_comparer.rb +48 -4
- data/lib/bioroebe/string_matching/smith_waterman.rb +11 -6
- data/lib/bioroebe/svg/glyph.rb +4 -1
- data/lib/bioroebe/svg/mini_feature.rb +1 -1
- data/lib/bioroebe/svg/page.rb +18 -7
- data/lib/bioroebe/svg/svgee.rb +22 -13
- data/lib/bioroebe/svg/track.rb +20 -4
- data/lib/bioroebe/taxonomy/chart.rb +2 -2
- data/lib/bioroebe/taxonomy/class_methods.rb +5 -6
- data/lib/bioroebe/taxonomy/constants.rb +1 -1
- data/lib/bioroebe/taxonomy/info/info.rb +1 -1
- data/lib/bioroebe/taxonomy/info/is_dna.rb +1 -1
- data/lib/bioroebe/taxonomy/interactive.rb +1 -2
- data/lib/bioroebe/taxonomy/menu.rb +1 -1
- data/lib/bioroebe/taxonomy/node.rb +1 -1
- data/lib/bioroebe/taxonomy/parse_fasta.rb +4 -2
- data/lib/bioroebe/taxonomy/shared.rb +5 -4
- data/lib/bioroebe/taxonomy/taxonomy.rb +2 -4
- data/lib/bioroebe/toplevel_methods/fasta_and_fastq.rb +3 -45
- data/lib/bioroebe/toplevel_methods/{is_on_roebe.rb → roebe.rb} +1 -11
- data/lib/bioroebe/toplevel_methods/taxonomy.rb +6 -12
- data/lib/bioroebe/toplevel_methods/toplevel_methods.rb +5568 -0
- data/lib/bioroebe/utility_scripts/align_open_reading_frames.rb +4 -3
- data/lib/bioroebe/utility_scripts/analyse_local_dataset.rb +2 -2
- data/lib/bioroebe/utility_scripts/check_for_mismatches/check_for_mismatches.rb +16 -9
- data/lib/bioroebe/utility_scripts/compacter/compacter.rb +4 -2
- data/lib/bioroebe/utility_scripts/compare_these_two_sequences_via_blosum.rb +119 -0
- data/lib/bioroebe/utility_scripts/compseq/compseq.rb +11 -9
- data/lib/bioroebe/utility_scripts/{consensus_sequence.rb → consensus_sequence/consensus_sequence.rb} +13 -4
- data/lib/bioroebe/utility_scripts/{create_batch_entrez_file.rb → create_batch_entrez_file/create_batch_entrez_file.rb} +5 -5
- data/lib/bioroebe/utility_scripts/{determine_antigenic_areas.rb → determine_antigenic_areas/determine_antigenic_areas.rb} +5 -5
- data/lib/bioroebe/utility_scripts/{determine_missing_nucleotides_percentage.rb → determine_missing_nucleotides_percentage/determine_missing_nucleotides_percentage.rb} +16 -15
- data/lib/bioroebe/utility_scripts/display_open_reading_frames/display_open_reading_frames.rb +7 -7
- data/lib/bioroebe/utility_scripts/display_open_reading_frames/misc.rb +1 -1
- data/lib/bioroebe/utility_scripts/display_open_reading_frames/report.rb +2 -0
- data/lib/bioroebe/utility_scripts/{dot_alignment.rb → dot_alignment/dot_alignment.rb} +3 -3
- data/lib/bioroebe/utility_scripts/{download_files_from_rebase.rb → download_files_from_rebase/download_files_from_rebase.rb} +5 -5
- data/lib/bioroebe/utility_scripts/fetch_data_from_uniprot/fetch_data_from_uniprot.rb +269 -0
- data/lib/bioroebe/utility_scripts/find_gene.rb +4 -2
- data/lib/bioroebe/utility_scripts/{mirror_repeat.rb → mirror_repeat/mirror_repeat.rb} +5 -5
- data/lib/bioroebe/utility_scripts/move_file_to_its_correct_location.rb +3 -3
- data/lib/bioroebe/utility_scripts/{parse_taxonomy.rb → parse_taxonomy/parse_taxonomy.rb} +15 -6
- data/lib/bioroebe/utility_scripts/{pathways.rb → pathways/pathways.rb} +4 -3
- data/lib/bioroebe/utility_scripts/{permutations.rb → permutations/permutations.rb} +3 -3
- data/lib/bioroebe/utility_scripts/punnet/punnet.rb +4 -2
- data/lib/bioroebe/utility_scripts/{show_this_dna_sequence.rb → show_this_dna_sequence/show_this_dna_sequence.rb} +1 -1
- data/lib/bioroebe/utility_scripts/showorf/showorf.rb +406 -10
- data/lib/bioroebe/version/version.rb +2 -2
- data/lib/bioroebe/viennarna/rnafold_wrapper.rb +5 -13
- data/lib/bioroebe/virus/individual_viruses/README.md +15 -0
- data/lib/bioroebe/virus/individual_viruses/tobacco_mosaic_virus.rb +40 -0
- data/lib/bioroebe/virus/virus.rb +76 -0
- data/lib/bioroebe/www/bioroebe.cgi +4 -3
- data/lib/bioroebe/www/embeddable_interface.rb +85 -49
- data/lib/bioroebe/yaml/agarose/agarose_concentrations.yml +6 -6
- data/lib/bioroebe/yaml/antisense/antisense.yml +2 -0
- data/lib/bioroebe/yaml/blosum/blosum50.yml +6 -0
- data/lib/bioroebe/yaml/blosum/blosum90.yml +2 -1
- data/lib/bioroebe/yaml/chromosomes/chromosome_numbers.yml +2 -2
- data/lib/bioroebe/yaml/configuration/temp_dir.yml +1 -1
- data/lib/bioroebe/yaml/consensus_sequences/consensus_sequences.yml +1 -0
- data/lib/bioroebe/yaml/enzymes/enzyme_classes.yml +7 -6
- data/lib/bioroebe/yaml/humans/human_chromosomes.yml +3 -3
- data/lib/bioroebe/yaml/mRNA/mRNA.yml +1 -5
- data/lib/bioroebe/yaml/nucleotides/abbreviations_for_nucleotides.yml +1 -0
- data/lib/bioroebe/yaml/nucleotides/nucleotide_density.yml +2 -1
- data/lib/bioroebe/yaml/promoters/35S.yml +3 -1
- data/lib/bioroebe/yaml/proteases/proteases.yml +3 -1
- data/lib/bioroebe/yaml/proteins/ubiquitin.yml +4 -1
- data/lib/bioroebe/yaml/restriction_enzymes/restriction_enzymes.yml +7 -7
- data/spec/testing_toplevel_method_editor.rb +1 -1
- data/spec/testing_toplevel_method_verbose.rb +1 -1
- data/test/testing_dna_to_rna_conversion.rb +1 -1
- metadata +127 -235
- data/doc/blosum.md +0 -5
- data/lib/bioroebe/base/commandline_application/aminoacids.rb +0 -33
- data/lib/bioroebe/base/commandline_application/directory.rb +0 -33
- data/lib/bioroebe/base/commandline_application/extract.rb +0 -22
- data/lib/bioroebe/base/commandline_application/misc.rb +0 -502
- data/lib/bioroebe/base/commandline_application/opn.rb +0 -47
- data/lib/bioroebe/base/commandline_application/reset.rb +0 -42
- data/lib/bioroebe/base/commandline_application/warnings.rb +0 -36
- data/lib/bioroebe/base/commandline_application/write_what_into.rb +0 -29
- data/lib/bioroebe/base/initialize.rb +0 -18
- data/lib/bioroebe/base/misc.rb +0 -129
- data/lib/bioroebe/base/namespace.rb +0 -16
- data/lib/bioroebe/base/prototype/e_and_ee.rb +0 -24
- data/lib/bioroebe/base/prototype/misc.rb +0 -114
- data/lib/bioroebe/base/prototype/mkdir.rb +0 -20
- data/lib/bioroebe/base/prototype/reset.rb +0 -36
- data/lib/bioroebe/colours/misc_colours.rb +0 -80
- data/lib/bioroebe/colours/rev.rb +0 -44
- data/lib/bioroebe/colours/sdir.rb +0 -21
- data/lib/bioroebe/colours/sfancy.rb +0 -21
- data/lib/bioroebe/colours/sfile.rb +0 -21
- data/lib/bioroebe/colours/simp.rb +0 -21
- data/lib/bioroebe/colours/swarn.rb +0 -29
- data/lib/bioroebe/constants/aminoacids_and_proteins.rb +0 -147
- data/lib/bioroebe/constants/carriage_return.rb +0 -14
- data/lib/bioroebe/constants/codon_tables.rb +0 -77
- data/lib/bioroebe/constants/database_constants.rb +0 -107
- data/lib/bioroebe/constants/files_and_directories.rb +0 -606
- data/lib/bioroebe/constants/misc.rb +0 -209
- data/lib/bioroebe/constants/newline.rb +0 -14
- data/lib/bioroebe/constants/nucleotides.rb +0 -121
- data/lib/bioroebe/constants/regex.rb +0 -28
- data/lib/bioroebe/constants/roebe.rb +0 -38
- data/lib/bioroebe/constants/row_terminator.rb +0 -16
- data/lib/bioroebe/constants/tabulator.rb +0 -14
- data/lib/bioroebe/constants/unicode.rb +0 -12
- data/lib/bioroebe/constants/urls.rb +0 -50
- data/lib/bioroebe/gui/gtk +0 -1
- data/lib/bioroebe/gui/gtk3/README.md +0 -2
- data/lib/bioroebe/gui/gtk3/alignment/alignment.rb +0 -306
- data/lib/bioroebe/gui/gtk3/anti_sense_strand/anti_sense_strand.rb +0 -29
- data/lib/bioroebe/gui/gtk3/blosum_matrix_viewer/blosum_matrix_viewer.rb +0 -195
- data/lib/bioroebe/gui/gtk3/calculate_cell_numbers_of_bacteria/calculate_cell_numbers_of_bacteria.rb +0 -105
- data/lib/bioroebe/gui/gtk3/dna_to_reverse_complement_widget/dna_to_reverse_complement_widget.rb +0 -188
- data/lib/bioroebe/gui/gtk3/fasta_table_widget/fasta_table_widget.rb +0 -322
- data/lib/bioroebe/gui/gtk3/gene/gene.rb +0 -181
- data/lib/bioroebe/gui/gtk3/hamming_distance/hamming_distance.rb +0 -383
- data/lib/bioroebe/gui/gtk3/levensthein_distance/levensthein_distance.rb +0 -174
- data/lib/bioroebe/gui/gtk3/protein_to_DNA/protein_to_DNA.rb +0 -181
- data/lib/bioroebe/gui/gtk3/show_codon_table/show_codon_table.rb +0 -101
- data/lib/bioroebe/gui/gtk3/show_codon_usage/show_codon_usage.rb +0 -145
- data/lib/bioroebe/gui/gtk3/three_to_one/title.rb +0 -23
- data/lib/bioroebe/gui/jruby/alignment/alignment.rb +0 -165
- data/lib/bioroebe/gui/jruby/aminoacid_composition/aminoacid_composition.rb +0 -166
- data/lib/bioroebe/gui/jruby/blosum_matrix_viewer/blosum_matrix_viewer.rb +0 -82
- data/lib/bioroebe/gui/libui/README.md +0 -4
- data/lib/bioroebe/gui/libui/alignment/alignment.rb +0 -116
- data/lib/bioroebe/gui/libui/blosum_matrix_viewer/blosum_matrix_viewer.rb +0 -112
- data/lib/bioroebe/gui/libui/calculate_cell_numbers_of_bacteria/calculate_cell_numbers_of_bacteria.rb +0 -60
- data/lib/bioroebe/gui/libui/controller/controller.rb +0 -116
- data/lib/bioroebe/gui/libui/dna_to_aminoacid_widget/dna_to_aminoacid_widget.rb +0 -161
- data/lib/bioroebe/gui/libui/dna_to_reverse_complement_widget/dna_to_reverse_complement_widget.rb +0 -76
- data/lib/bioroebe/gui/libui/hamming_distance/hamming_distance.rb +0 -135
- data/lib/bioroebe/gui/libui/levensthein_distance/levensthein_distance.rb +0 -118
- data/lib/bioroebe/gui/libui/protein_to_DNA/protein_to_DNA.rb +0 -115
- data/lib/bioroebe/gui/libui/random_sequence/random_sequence.rb +0 -190
- data/lib/bioroebe/gui/libui/show_codon_table/show_codon_table.rb +0 -134
- data/lib/bioroebe/gui/libui/show_codon_usage/show_codon_usage.rb +0 -89
- data/lib/bioroebe/gui/libui/three_to_one/three_to_one.rb +0 -113
- data/lib/bioroebe/gui/shared_code/alignment/alignment_module.rb +0 -102
- data/lib/bioroebe/gui/shared_code/aminoacid_composition/aminoacid_composition_module.rb +0 -94
- data/lib/bioroebe/gui/shared_code/calculate_cell_numbers_of_bacteria/calculate_cell_numbers_of_bacteria_module.rb +0 -216
- data/lib/bioroebe/gui/shared_code/protein_to_DNA/protein_to_DNA_module.rb +0 -192
- data/lib/bioroebe/gui/shared_code/show_codon_table/show_codon_table_module.rb +0 -72
- data/lib/bioroebe/gui/tk/aminoacid_composition/aminoacid_composition.rb +0 -206
- data/lib/bioroebe/gui/tk/blosum_matrix_viewer/blosum_matrix_viewer.rb +0 -140
- data/lib/bioroebe/gui/tk/hamming_distance/hamming_distance.rb +0 -262
- data/lib/bioroebe/gui/tk/levensthein_distance/levensthein_distance.rb +0 -243
- data/lib/bioroebe/gui/tk/three_to_one/three_to_one.rb +0 -199
- data/lib/bioroebe/gui/unified_widgets/anti_sense_strand/anti_sense_strand.rb +0 -519
- data/lib/bioroebe/shell/colours/colours.rb +0 -235
- data/lib/bioroebe/shell/help/help.rb +0 -25
- data/lib/bioroebe/shell/misc.rb +0 -10227
- data/lib/bioroebe/toplevel_methods/ad_hoc_task.rb +0 -56
- data/lib/bioroebe/toplevel_methods/aminoacids_and_proteins.rb +0 -722
- data/lib/bioroebe/toplevel_methods/atomic_composition.rb +0 -198
- data/lib/bioroebe/toplevel_methods/base_composition.rb +0 -121
- data/lib/bioroebe/toplevel_methods/blast.rb +0 -153
- data/lib/bioroebe/toplevel_methods/calculate_n50_value.rb +0 -57
- data/lib/bioroebe/toplevel_methods/cat.rb +0 -71
- data/lib/bioroebe/toplevel_methods/chunked_display.rb +0 -92
- data/lib/bioroebe/toplevel_methods/cliner.rb +0 -81
- data/lib/bioroebe/toplevel_methods/complement.rb +0 -58
- data/lib/bioroebe/toplevel_methods/convert_global_env.rb +0 -39
- data/lib/bioroebe/toplevel_methods/databases.rb +0 -73
- data/lib/bioroebe/toplevel_methods/delimiter.rb +0 -19
- data/lib/bioroebe/toplevel_methods/digest.rb +0 -81
- data/lib/bioroebe/toplevel_methods/download_and_fetch_data.rb +0 -146
- data/lib/bioroebe/toplevel_methods/e.rb +0 -20
- data/lib/bioroebe/toplevel_methods/editor.rb +0 -21
- data/lib/bioroebe/toplevel_methods/esystem.rb +0 -22
- data/lib/bioroebe/toplevel_methods/exponential_growth.rb +0 -74
- data/lib/bioroebe/toplevel_methods/extract.rb +0 -56
- data/lib/bioroebe/toplevel_methods/file_and_directory_related_actions.rb +0 -269
- data/lib/bioroebe/toplevel_methods/frequencies.rb +0 -99
- data/lib/bioroebe/toplevel_methods/hamming_distance.rb +0 -60
- data/lib/bioroebe/toplevel_methods/infer.rb +0 -66
- data/lib/bioroebe/toplevel_methods/leading_five_prime_and_trailing_three_prime.rb +0 -101
- data/lib/bioroebe/toplevel_methods/levensthein.rb +0 -63
- data/lib/bioroebe/toplevel_methods/log_directory.rb +0 -109
- data/lib/bioroebe/toplevel_methods/longest_common_substring.rb +0 -55
- data/lib/bioroebe/toplevel_methods/map_ncbi_entry_to_eutils_id.rb +0 -88
- data/lib/bioroebe/toplevel_methods/matches.rb +0 -259
- data/lib/bioroebe/toplevel_methods/misc.rb +0 -596
- data/lib/bioroebe/toplevel_methods/nucleotides.rb +0 -787
- data/lib/bioroebe/toplevel_methods/number_of_clones.rb +0 -63
- data/lib/bioroebe/toplevel_methods/open_in_browser.rb +0 -79
- data/lib/bioroebe/toplevel_methods/open_reading_frames.rb +0 -236
- data/lib/bioroebe/toplevel_methods/opn.rb +0 -34
- data/lib/bioroebe/toplevel_methods/palindromes.rb +0 -155
- data/lib/bioroebe/toplevel_methods/parse.rb +0 -59
- data/lib/bioroebe/toplevel_methods/phred_error_probability.rb +0 -68
- data/lib/bioroebe/toplevel_methods/rds.rb +0 -24
- data/lib/bioroebe/toplevel_methods/remove.rb +0 -86
- data/lib/bioroebe/toplevel_methods/return_source_code_of_this_method.rb +0 -35
- data/lib/bioroebe/toplevel_methods/return_subsequence_based_on_indices.rb +0 -68
- data/lib/bioroebe/toplevel_methods/rna_splicing.rb +0 -73
- data/lib/bioroebe/toplevel_methods/rnalfold.rb +0 -69
- data/lib/bioroebe/toplevel_methods/searching_and_finding.rb +0 -116
- data/lib/bioroebe/toplevel_methods/shuffleseq.rb +0 -37
- data/lib/bioroebe/toplevel_methods/statistics.rb +0 -53
- data/lib/bioroebe/toplevel_methods/sum_of_odd_integers.rb +0 -62
- data/lib/bioroebe/toplevel_methods/three_delimiter.rb +0 -34
- data/lib/bioroebe/toplevel_methods/time_and_date.rb +0 -53
- data/lib/bioroebe/toplevel_methods/to_camelcase.rb +0 -31
- data/lib/bioroebe/toplevel_methods/truncate.rb +0 -48
- data/lib/bioroebe/toplevel_methods/url.rb +0 -36
- data/lib/bioroebe/toplevel_methods/verbose.rb +0 -59
- data/lib/bioroebe/utility_scripts/showorf/constants.rb +0 -31
- data/lib/bioroebe/utility_scripts/showorf/help.rb +0 -33
- data/lib/bioroebe/utility_scripts/showorf/initialize.rb +0 -52
- data/lib/bioroebe/utility_scripts/showorf/menu.rb +0 -68
- data/lib/bioroebe/utility_scripts/showorf/reset.rb +0 -36
- data/lib/bioroebe/utility_scripts/showorf/run.rb +0 -152
- data/lib/bioroebe/utility_scripts/showorf/show.rb +0 -97
- /data/doc/{german_names_for_the_aminoacids.md → german_names_for_the_aminoacids/german_names_for_the_aminoacids.md} +0 -0
- /data/doc/{pdb_ATOM_entry.md → pdb_ATOM_entry/pdb_ATOM_entry.md} +0 -0
- /data/doc/{resources.md → resources/resources.md} +0 -0
- /data/lib/bioroebe/gui/{gtk3 → universal_widgets}/aminoacid_composition/customized_dialog.rb +0 -0
- /data/lib/bioroebe/gui/{gtk3 → universal_widgets}/anti_sense_strand/anti_sense_strand.config +0 -0
- /data/lib/bioroebe/gui/{gtk3 → universal_widgets}/calculate_cell_numbers_of_bacteria/calculate_cell_numbers_of_bacteria.config +0 -0
- /data/lib/bioroebe/gui/{gtk3 → universal_widgets}/dna_to_reverse_complement_widget/dna_to_reverse_complement_widget.config +0 -0
- /data/lib/bioroebe/gui/{gtk3 → universal_widgets}/hamming_distance/hamming_distance.config +0 -0
- /data/lib/bioroebe/gui/{gtk3 → universal_widgets}/levensthein_distance/levensthein_distance.config +0 -0
- /data/lib/bioroebe/gui/{gtk3 → universal_widgets}/protein_to_DNA/protein_to_DNA.config +0 -0
- /data/lib/bioroebe/gui/{gtk3 → universal_widgets}/restriction_enzymes/restriction_enzymes.config +0 -0
- /data/lib/bioroebe/gui/{gtk3 → universal_widgets}/www_finder/www_finder.config +0 -0
- /data/lib/bioroebe/yaml/{base_composition_of_dna.yml → base_composition_of_dna/base_composition_of_dna.yml} +0 -0
- /data/lib/bioroebe/yaml/{nuclear_localization_sequences.yml → nuclear_localization_sequences/nuclear_localization_sequences.yml} +0 -0
- /data/lib/bioroebe/yaml/{talens.yml → talens/talens.yml} +0 -0
@@ -1,787 +0,0 @@
|
|
1
|
-
#!/usr/bin/ruby -w
|
2
|
-
# Encoding: UTF-8
|
3
|
-
# frozen_string_literal: true
|
4
|
-
# =========================================================================== #
|
5
|
-
# This file will gather "dna-related" code that can reside on the
|
6
|
-
# toplevel. For example, Bioroebe.generate_random_dna_sequence()
|
7
|
-
# will reside in this file here.
|
8
|
-
# =========================================================================== #
|
9
|
-
# require 'bioroebe/toplevel_methods/nucleotides.rb'
|
10
|
-
# Bioroebe.to_rna
|
11
|
-
# Bioroebe.generate_random_dna_sequence(ARGV)
|
12
|
-
# Bioroebe.complementary_dna_strand('ATCATCATC') # => "TAGTAGTAG"
|
13
|
-
# Bioroebe.filter_away_invalid_nucleotides(ARGV)
|
14
|
-
# Bioroebe.return_all_positions_of_this_nucleotide(ARGV)
|
15
|
-
# Bioroebe.partner_nucleotide(ARGV)
|
16
|
-
# =========================================================================== #
|
17
|
-
module Bioroebe
|
18
|
-
|
19
|
-
require 'bioroebe/constants/nucleotides.rb'
|
20
|
-
require 'bioroebe/toplevel_methods/e.rb'
|
21
|
-
|
22
|
-
# ========================================================================= #
|
23
|
-
# === Bioroebe.can_base_pair_with?
|
24
|
-
#
|
25
|
-
# Usage example:
|
26
|
-
#
|
27
|
-
# Bioroebe.can_base_pair_with?('A','T') # => true
|
28
|
-
# Bioroebe.can_base_pair_with?('A','G') # => false
|
29
|
-
#
|
30
|
-
# ========================================================================= #
|
31
|
-
def self.can_base_pair_with?(a, b)
|
32
|
-
::Bioroebe.partner_nucleotide(a) == b
|
33
|
-
end
|
34
|
-
|
35
|
-
# ========================================================================= #
|
36
|
-
# === Bioroebe.partner_nucleotide
|
37
|
-
#
|
38
|
-
# This small "table" will simply return the corresponding Nucleotide
|
39
|
-
# matching the given input at hand - in other words, the corresponding
|
40
|
-
# DNA nucleotide that can base-pair with the input nucleotide.
|
41
|
-
#
|
42
|
-
# Since this is a method call this may be a bit slow if you have to
|
43
|
-
# invoke the method repeatedly. In this case, you should consider
|
44
|
-
# using the method befined below, through the method
|
45
|
-
# Bioroebe.partner_nucleotide_hash(), and then use that Hash instead.
|
46
|
-
#
|
47
|
-
# Usage example:
|
48
|
-
#
|
49
|
-
# Bioroebe.partner_nucleotide('A') # => "T"
|
50
|
-
# Bioroebe.should_match_to?('T') # => "A"
|
51
|
-
#
|
52
|
-
# ========================================================================= #
|
53
|
-
def self.partner_nucleotide(i)
|
54
|
-
i = i.first if i.is_a? Array # We only handle the first entry in an Array.
|
55
|
-
if i
|
56
|
-
i = i.dup if i.frozen?
|
57
|
-
i.upcase! # just in caase.
|
58
|
-
case i # case tag.
|
59
|
-
when 'A'
|
60
|
-
return 'T'
|
61
|
-
when 'T','U'
|
62
|
-
return 'A'
|
63
|
-
when 'G'
|
64
|
-
return 'C'
|
65
|
-
when 'C'
|
66
|
-
return 'G'
|
67
|
-
else
|
68
|
-
return nil # means illegal value.
|
69
|
-
end
|
70
|
-
end
|
71
|
-
return i # Return just in case.
|
72
|
-
end; self.instance_eval { alias complementary_nucleotide partner_nucleotide } # === Bioroebe.complementary_nucleotide
|
73
|
-
self.instance_eval { alias return_dna_match partner_nucleotide } # === Bioroebe.return_dna_match
|
74
|
-
self.instance_eval { alias should_match_to? partner_nucleotide } # === Bioroebe.should_match_to?
|
75
|
-
|
76
|
-
# ========================================================================= #
|
77
|
-
# === Bioroebe.contains_an_inverted_repeat?
|
78
|
-
#
|
79
|
-
# We assume an inverted repeat to exist if at the least 2 nucleotides
|
80
|
-
# match to one another in the reverse, so a total of 4 matching
|
81
|
-
# nucleotides. This assumption may not necessarily be correct and
|
82
|
-
# we may have to fine-tune this at a later time.
|
83
|
-
#
|
84
|
-
# For testing purpose, the sequence 'TTACGAAAAAACGTAA' can be used.
|
85
|
-
# ========================================================================= #
|
86
|
-
def self.contains_an_inverted_repeat?(
|
87
|
-
#i = 'AGCCCCGCAAAAAAGGCGGGCU'
|
88
|
-
i = 'TTACGAAAAAACGTAA' # This is in the 5'→3' direction.
|
89
|
-
)
|
90
|
-
contains_an_inverted_repeat = false
|
91
|
-
longest_stretch = 0
|
92
|
-
current_stretch = 0
|
93
|
-
halfed_position = i.size / 2
|
94
|
-
both_sides = [
|
95
|
-
i[0 .. (halfed_position-1)],
|
96
|
-
i[halfed_position .. -1]
|
97
|
-
]
|
98
|
-
# ======================================================================= #
|
99
|
-
# Now that we have both sides, we will try to match them. First reverse
|
100
|
-
# the second, though.
|
101
|
-
# ======================================================================= #
|
102
|
-
first = both_sides[0]
|
103
|
-
second = both_sides[1].reverse # Work via the reverse sequence.
|
104
|
-
first.chars.each_with_index {|this_nucleotide, index|
|
105
|
-
if can_base_pair_with?(second[index], this_nucleotide)
|
106
|
-
current_stretch += 1
|
107
|
-
longest_stretch = current_stretch if current_stretch > longest_stretch
|
108
|
-
else
|
109
|
-
current_stretch = 0
|
110
|
-
end
|
111
|
-
}
|
112
|
-
if longest_stretch >= 2
|
113
|
-
contains_an_inverted_repeat = true
|
114
|
-
end
|
115
|
-
return contains_an_inverted_repeat
|
116
|
-
end
|
117
|
-
|
118
|
-
# ========================================================================= #
|
119
|
-
# === Bioroebe.complementary_rna_strand
|
120
|
-
#
|
121
|
-
# This method will simply return the corresponding (complementary)
|
122
|
-
# RNA strand.
|
123
|
-
#
|
124
|
-
# Usage example:
|
125
|
-
#
|
126
|
-
# Bioroebe.complementary_rna_strand('ATCATCATC') # => "UAGUAGUAG"
|
127
|
-
#
|
128
|
-
# ========================================================================= #
|
129
|
-
def self.complementary_rna_strand(i)
|
130
|
-
if i.is_a? Array
|
131
|
-
i = i.first
|
132
|
-
end
|
133
|
-
hash = partner_nucleotide_hash
|
134
|
-
i.chars.map {|entry| hash[entry] }.join.tr('T','U')
|
135
|
-
end; self.instance_eval { alias complementary_rna complementary_rna_strand } # === Bioroebe.complementary_rna
|
136
|
-
|
137
|
-
# ========================================================================= #
|
138
|
-
# === Bioroebe.is_a_purine?
|
139
|
-
#
|
140
|
-
# This method will simply return true if we have a purine (as the
|
141
|
-
# given input to this method), and false otherwise.
|
142
|
-
#
|
143
|
-
# The argument given to this method should be a single letter, such
|
144
|
-
# as 'A' or 'G' (a String).
|
145
|
-
#
|
146
|
-
# In nucleic acids, two types of nucleobases are purine derivatives
|
147
|
-
# and would, thus, return true via this method:
|
148
|
-
#
|
149
|
-
# - adenine (A)
|
150
|
-
# - guanine (G)
|
151
|
-
#
|
152
|
-
# ========================================================================= #
|
153
|
-
def self.is_a_purine?(
|
154
|
-
i = 'A'
|
155
|
-
)
|
156
|
-
i = i.first if i.is_a? Array
|
157
|
-
case i
|
158
|
-
when 'A','G'
|
159
|
-
true
|
160
|
-
else # This is for T, C and U, at the least.
|
161
|
-
false
|
162
|
-
end
|
163
|
-
end
|
164
|
-
|
165
|
-
# ========================================================================= #
|
166
|
-
# === Bioroebe.is_a_pyrimidine?
|
167
|
-
#
|
168
|
-
# This method will return true if we have a pyrimidine (as input), and
|
169
|
-
# false otherwise.
|
170
|
-
#
|
171
|
-
# In DNA and RNA we may find these pyrimidine derivatives:
|
172
|
-
#
|
173
|
-
# cytosine (C), thymine (T), and uracil (U)
|
174
|
-
#
|
175
|
-
# URL for explanations is at:
|
176
|
-
#
|
177
|
-
# https://en.wikipedia.org/wiki/Pyrimidine
|
178
|
-
#
|
179
|
-
# ========================================================================= #
|
180
|
-
def self.is_a_pyrimidine?(
|
181
|
-
i = 'C'
|
182
|
-
)
|
183
|
-
i = i.first if i.is_a? Array
|
184
|
-
case i
|
185
|
-
when 'C','T','U'
|
186
|
-
true
|
187
|
-
else # This is for A and G, at the least.
|
188
|
-
false
|
189
|
-
end
|
190
|
-
end
|
191
|
-
|
192
|
-
# ========================================================================= #
|
193
|
-
# === Bioroebe.partner_nucleotide_hash
|
194
|
-
#
|
195
|
-
# This method will return a Hash, which should be faster for lookup.
|
196
|
-
# ========================================================================= #
|
197
|
-
def self.partner_nucleotide_hash
|
198
|
-
HASH_DNA_NUCLEOTIDES
|
199
|
-
end
|
200
|
-
|
201
|
-
# ========================================================================= #
|
202
|
-
# === Bioroebe.nucleotide_permutations
|
203
|
-
#
|
204
|
-
# This method will try to permutate the given nucleotides, so that
|
205
|
-
# we get an Array that has e. g. "AA", "AT", "AG", "AC". So really
|
206
|
-
# all permutations possible.
|
207
|
-
#
|
208
|
-
# The first argument to this method tells us how long we will run
|
209
|
-
# the algorithm at hand. A level of 2 means to show only permutations
|
210
|
-
# for two nucleotides and so forth. Unfortunately, this does not
|
211
|
-
# yet work with anything more than 2 for the time being.
|
212
|
-
#
|
213
|
-
# Since as of August 2019, this method can also read from a local
|
214
|
-
# file - this was added to specifically solve a problem of the
|
215
|
-
# ROSALIND challenges. The task can be seen here:
|
216
|
-
# https://rosalind.info/problems/lexf/
|
217
|
-
#
|
218
|
-
# Usage example:
|
219
|
-
#
|
220
|
-
# Bioroebe.nucleotide_permutations
|
221
|
-
#
|
222
|
-
# ========================================================================= #
|
223
|
-
def self.nucleotide_permutations(
|
224
|
-
level = 2,
|
225
|
-
nucleotides = DNA_NUCLEOTIDES # => ["A", "T", "G", "C"]
|
226
|
-
)
|
227
|
-
# ======================================================================= #
|
228
|
-
# Grab a reference to the possible nucleotides next.
|
229
|
-
# ======================================================================= #
|
230
|
-
_ = []
|
231
|
-
if level and File.file?(level.to_s)
|
232
|
-
dataset = File.read(level).strip
|
233
|
-
splitted = dataset.split("\n")
|
234
|
-
nucleotides = splitted.first.strip.split(' ')
|
235
|
-
level = splitted.last
|
236
|
-
end
|
237
|
-
level = level.to_i # We need an Integer past this point.
|
238
|
-
(level - 1).times {
|
239
|
-
nucleotides.each {|entry|
|
240
|
-
_ << nucleotides.map {|inner_entry| entry+inner_entry }
|
241
|
-
}
|
242
|
-
}
|
243
|
-
return _.flatten.uniq.sort
|
244
|
-
end
|
245
|
-
|
246
|
-
# ========================================================================= #
|
247
|
-
# === Bioroebe.return_all_positions_of_this_nucleotide
|
248
|
-
#
|
249
|
-
# This method will return all positions of a given nucleotide in a
|
250
|
-
# larger subsequence, as an Array. Thus, the returned value will
|
251
|
-
# be an Array.
|
252
|
-
#
|
253
|
-
# For example: if the input String is 'AUGCUUCAGAAAGGUCUUACG' and we
|
254
|
-
# search for 'U' then this method must return an Array that holds
|
255
|
-
# [2, 5, 6, 15, 17, 18].
|
256
|
-
# ========================================================================= #
|
257
|
-
def self.return_all_positions_of_this_nucleotide(
|
258
|
-
input_string = 'AUGCUUCAGAAAGGUCUUACG',
|
259
|
-
this_nucleotide = 'U'
|
260
|
-
)
|
261
|
-
# ======================================================================= #
|
262
|
-
# Setting the default for this_nucleotide next:
|
263
|
-
# ======================================================================= #
|
264
|
-
this_nucleotide = 'U' if this_nucleotide.nil?
|
265
|
-
if input_string
|
266
|
-
array = (0 ... input_string.length).find_all {|position|
|
267
|
-
input_string[position, 1] == this_nucleotide
|
268
|
-
}.map {|line| line += 1 } # This line is for dealing with nucleotides.
|
269
|
-
return array
|
270
|
-
end
|
271
|
-
end
|
272
|
-
|
273
|
-
# ========================================================================= #
|
274
|
-
# === Bioroebe.only_nucleotides?
|
275
|
-
#
|
276
|
-
# This filter-method will return true or false depending on whether
|
277
|
-
# we have only valid nucleotides as part of the given input to this
|
278
|
-
# method.
|
279
|
-
#
|
280
|
-
# In order for this to work, we will tap into the constant
|
281
|
-
# called HASH_DNA_NUCLEOTIDES. Note that, despite the name,
|
282
|
-
# we also include RNA there.
|
283
|
-
#
|
284
|
-
# Usage examples:
|
285
|
-
#
|
286
|
-
# Bioroebe.only_nucleotides? 'ATGCG' # => true
|
287
|
-
# Bioroebe.only_nucleotides? 'ATGCGi' # => false
|
288
|
-
#
|
289
|
-
# ========================================================================= #
|
290
|
-
def self.only_nucleotides?(i)
|
291
|
-
i = i.join.strip if i.is_a? Array
|
292
|
-
allowed_keys = HASH_DNA_NUCLEOTIDES.keys
|
293
|
-
chars = i.chars
|
294
|
-
chars.all? {|entry| allowed_keys.include? entry }
|
295
|
-
end
|
296
|
-
|
297
|
-
# ========================================================================= #
|
298
|
-
# === Bioroebe.is_reverse_complement?
|
299
|
-
#
|
300
|
-
# This method was specifically added to solve a problem at Rosalind.
|
301
|
-
#
|
302
|
-
# Usage example:
|
303
|
-
#
|
304
|
-
# Bioroebe.is_reverse_complement?('GCATGC') # => true
|
305
|
-
# Bioroebe.is_reverse_complement?('GCATGCA') # => false
|
306
|
-
#
|
307
|
-
# ========================================================================= #
|
308
|
-
def self.is_reverse_complement?(i)
|
309
|
-
::Bioroebe.complement(i) == i.reverse
|
310
|
-
end
|
311
|
-
|
312
|
-
# ========================================================================= #
|
313
|
-
# === Bioroebe.generate_random_dna_sequence
|
314
|
-
#
|
315
|
-
# This method will "generate" a random DNA sequence (as a String).
|
316
|
-
#
|
317
|
-
# A String will be returned by this method.
|
318
|
-
#
|
319
|
-
# The second argument to this method can be a Hash, specifying the
|
320
|
-
# percentage likelihood for each of the nucleotides. See the
|
321
|
-
# following usage examples to find out how to use this.
|
322
|
-
#
|
323
|
-
# Usage examples:
|
324
|
-
#
|
325
|
-
# Bioroebe.random_dna 15 # => "TTGGTAAGCTCTTTA"
|
326
|
-
# Bioroebe.random_dna 25 # => "TTAGCACAAGCATGGACGGACCAGA"
|
327
|
-
# Bioroebe.random_dna(50, { A: 10, T: 10, C: 10, G: 70}) # => "GGGGTGGGGAGGGTATGCGGAGGAAGGGCGGGAAGGGCGGGGGCTGGGCG"
|
328
|
-
# Bioroebe.random_dna(20, 'ATGGGGGGGG') # => "TGAGGGGGGGGGTGGGAGGG"
|
329
|
-
# Bioroebe.random_dna(20, 'ATGGGGGGGG') # => "GGTAGGGGGGGGTAGGGGGG"
|
330
|
-
#
|
331
|
-
# ========================================================================= #
|
332
|
-
def self.generate_random_dna_sequence(
|
333
|
-
i = ARGV,
|
334
|
-
optional_hash_with_the_frequencies = {} # ← This may be a String too, mind you.
|
335
|
-
)
|
336
|
-
# ======================================================================= #
|
337
|
-
# First define our result-String. This one will be returned by this
|
338
|
-
# method.
|
339
|
-
# ======================================================================= #
|
340
|
-
result = ''.dup
|
341
|
-
_ = Bioroebe::DNA_NUCLEOTIDES # Get a handle to the four DNA nucleotides.
|
342
|
-
if i.is_a? Array
|
343
|
-
i = i.join.strip
|
344
|
-
end
|
345
|
-
case i
|
346
|
-
when :default
|
347
|
-
i = 250
|
348
|
-
end
|
349
|
-
i = i.to_i # This is "n times".
|
350
|
-
# ======================================================================= #
|
351
|
-
# First handle the case where the user passed a String:
|
352
|
-
# ======================================================================= #
|
353
|
-
if optional_hash_with_the_frequencies.is_a? String
|
354
|
-
pool = optional_hash_with_the_frequencies.dup.chars.shuffle
|
355
|
-
i.times {
|
356
|
-
if pool.size == 0
|
357
|
-
pool = optional_hash_with_the_frequencies.dup.chars.shuffle
|
358
|
-
end
|
359
|
-
result << pool.pop
|
360
|
-
}
|
361
|
-
elsif optional_hash_with_the_frequencies.empty?
|
362
|
-
# ===================================================================== #
|
363
|
-
# This is the default clause.
|
364
|
-
# ===================================================================== #
|
365
|
-
i.times {
|
366
|
-
result << _.sample
|
367
|
-
}
|
368
|
-
else
|
369
|
-
# ===================================================================== #
|
370
|
-
# Else, the user wants to use a frequency hash:
|
371
|
-
# ===================================================================== #
|
372
|
-
hash = optional_hash_with_the_frequencies
|
373
|
-
frequency_for_A = hash[:A]
|
374
|
-
frequency_for_T = hash[:T]
|
375
|
-
frequency_for_C = hash[:C]
|
376
|
-
frequency_for_G = hash[:G]
|
377
|
-
i.times {
|
378
|
-
percentage = rand(100)+1
|
379
|
-
if percentage <= frequency_for_A
|
380
|
-
match = 'A'
|
381
|
-
elsif (percentage > frequency_for_A) and
|
382
|
-
(percentage <= frequency_for_A+frequency_for_T)
|
383
|
-
match = 'T'
|
384
|
-
elsif (percentage > frequency_for_A+frequency_for_T) and
|
385
|
-
(percentage <= frequency_for_A+frequency_for_T+frequency_for_C)
|
386
|
-
match = 'C'
|
387
|
-
elsif (percentage > frequency_for_A+frequency_for_T+frequency_for_C) and
|
388
|
-
(percentage <= frequency_for_A+frequency_for_T+frequency_for_C+frequency_for_G)
|
389
|
-
match = 'G'
|
390
|
-
else
|
391
|
-
e 'Not found a match for '+percentage.to_s
|
392
|
-
end
|
393
|
-
result << match
|
394
|
-
}
|
395
|
-
end
|
396
|
-
result
|
397
|
-
end; self.instance_eval { alias random_dna generate_random_dna_sequence } # === Bioroebe.random_dna
|
398
|
-
self.instance_eval { alias generate_dna generate_random_dna_sequence } # === Bioroebe.generate_dna
|
399
|
-
self.instance_eval { alias create_random_dna_sequence generate_random_dna_sequence } # === Bioroebe.create_random_dna_sequence
|
400
|
-
self.instance_eval { alias create_random_dna generate_random_dna_sequence } # === Bioroebe.create_random_dna
|
401
|
-
|
402
|
-
# ========================================================================= #
|
403
|
-
# === Bioroebe.filter_away_invalid_nucleotides
|
404
|
-
#
|
405
|
-
# This method can be used to filter away invalid nucleotides. An "invalid"
|
406
|
-
# nucleotide is, for example, if you work with DNA sequences, any character
|
407
|
-
# that is not allowed to be part of DNA. For example, Uracil, which can
|
408
|
-
# be found (almost exclusively) only in RNA.
|
409
|
-
#
|
410
|
-
# As for now, the behaviour is to downcase the given input before
|
411
|
-
# applying the .tr() method on the given String.
|
412
|
-
#
|
413
|
-
# Usage example:
|
414
|
-
#
|
415
|
-
# Bioroebe.filter_away_invalid_nucleotides 'ATGCCGGAGGAGANNN' # => "ATGCCGGAGGAGA"
|
416
|
-
#
|
417
|
-
# ========================================================================= #
|
418
|
-
def self.filter_away_invalid_nucleotides(
|
419
|
-
i,
|
420
|
-
preserve_uracil = false
|
421
|
-
)
|
422
|
-
if i.is_a? Array
|
423
|
-
i = i.join(' ').strip
|
424
|
-
end
|
425
|
-
case preserve_uracil
|
426
|
-
when :preserve_uracil
|
427
|
-
preserve_uracil = true
|
428
|
-
when :preserve_nothing
|
429
|
-
preserve_uracil = false
|
430
|
-
end
|
431
|
-
i = i.to_s.upcase
|
432
|
-
if preserve_uracil
|
433
|
-
i.tr!('B,D-F,H-S,V-Z','') # A T C G U
|
434
|
-
else
|
435
|
-
i.tr!('B,D-F,H-S,U-Z','') # A T C G
|
436
|
-
end
|
437
|
-
return i
|
438
|
-
end
|
439
|
-
|
440
|
-
# ========================================================================= #
|
441
|
-
# === Bioroebe.input_as_dna
|
442
|
-
#
|
443
|
-
# This method will only accept input that is DNA, that is, the short
|
444
|
-
# letter variant (thus, A, T, C or G). Any other input will be
|
445
|
-
# stripped away, aka discarded, so this methods acts as a filter -
|
446
|
-
# a forward-filter for DNA.
|
447
|
-
#
|
448
|
-
# The method will return a "String" that is assumed to be a
|
449
|
-
# "DNA string". You can expect only DNA nucleotides to be
|
450
|
-
# part of that string.
|
451
|
-
#
|
452
|
-
# Usage example:
|
453
|
-
#
|
454
|
-
# Bioroebe.input_as_dna 'UUTGAGGACCA' # => "TGAGGACCA"
|
455
|
-
#
|
456
|
-
# ========================================================================= #
|
457
|
-
def self.input_as_dna(i)
|
458
|
-
i = i.first if i.is_a? Array
|
459
|
-
i = i.dup if i.frozen?
|
460
|
-
i.upcase!
|
461
|
-
# ======================================================================= #
|
462
|
-
# The next method is defined in this file here.
|
463
|
-
# ======================================================================= #
|
464
|
-
i = filter_away_invalid_nucleotides(i, :preserve_nothing)
|
465
|
-
return i
|
466
|
-
end
|
467
|
-
|
468
|
-
# ========================================================================= #
|
469
|
-
# === Bioroebe.to_rna
|
470
|
-
#
|
471
|
-
# This method will convert DNA into RNA. See the usage example below for
|
472
|
-
# a bit more details.
|
473
|
-
#
|
474
|
-
# Usage example:
|
475
|
-
#
|
476
|
-
# Bioroebe.to_rna 'ATGACCG' # => "AUGACCG"
|
477
|
-
#
|
478
|
-
# ========================================================================= #
|
479
|
-
def self.to_rna(
|
480
|
-
i = nil,
|
481
|
-
upcase_me = true
|
482
|
-
)
|
483
|
-
i = i.first if i.is_a? Array
|
484
|
-
i = i.to_s
|
485
|
-
i = i.dup if i.frozen?
|
486
|
-
if i and File.exist?(i) and i.include?('.') # Assume input such as 'foobar.md'.
|
487
|
-
i = File.read(i)
|
488
|
-
end
|
489
|
-
i.gsub!(/T/,'U')
|
490
|
-
return i
|
491
|
-
end
|
492
|
-
|
493
|
-
# ========================================================================= #
|
494
|
-
# === Bioroebe.to_dna
|
495
|
-
#
|
496
|
-
# This method will essentially replace all "U" with "T", from the given
|
497
|
-
# input argument (which can be a String or an Array).
|
498
|
-
#
|
499
|
-
# To test this method, do:
|
500
|
-
#
|
501
|
-
# Bioroebe.to_dna 'ACCACACCAUUUCCCAUGGGUGUGUGG' # => "ACCACACCATTTCCCATGGGTGTGTGG"
|
502
|
-
#
|
503
|
-
# ========================================================================= #
|
504
|
-
def self.to_dna(
|
505
|
-
i = nil,
|
506
|
-
upcase_me = true
|
507
|
-
)
|
508
|
-
i = i.first if i.is_a? Array
|
509
|
-
i = i.to_s
|
510
|
-
i = filter_away_invalid_nucleotides(i, :preserve_uracil) # A module-method.
|
511
|
-
i = i.upcase if upcase_me
|
512
|
-
if upcase_me # Sanitize all U into T.
|
513
|
-
i.tr!('U','T')
|
514
|
-
else
|
515
|
-
i.tr!('u','t')
|
516
|
-
end
|
517
|
-
return i
|
518
|
-
end
|
519
|
-
|
520
|
-
# ========================================================================= #
|
521
|
-
# === Bioroebe.generate_nucleotide_sequence_based_on_these_frequencies
|
522
|
-
#
|
523
|
-
# The second argument to this method should be a Hash.
|
524
|
-
#
|
525
|
-
# The default output may be a String such as this one here:
|
526
|
-
#
|
527
|
-
# AACTGAACATTTTAGGAGATATCAAGACCCTCTGATTCTCAAGGAATAATTAGCTAATTT
|
528
|
-
#
|
529
|
-
# Usage example:
|
530
|
-
#
|
531
|
-
# Bioroebe.generate_nucleotide_sequence_based_on_these_frequencies(:default, { A: 0.25, C: 0.25, G: 0.25, T: 0.25 })
|
532
|
-
#
|
533
|
-
# ========================================================================= #
|
534
|
-
def self.generate_nucleotide_sequence_based_on_these_frequencies(
|
535
|
-
n_nucleotides = 1061, # Denote how many nucleotides to use.
|
536
|
-
hash_frequencies = {
|
537
|
-
A: 0.3191430,
|
538
|
-
C: 0.2086633,
|
539
|
-
G: 0.2580345,
|
540
|
-
T: 0.2141593
|
541
|
-
}
|
542
|
-
)
|
543
|
-
case n_nucleotides
|
544
|
-
# ======================================================================= #
|
545
|
-
# === :default
|
546
|
-
# ======================================================================= #
|
547
|
-
when :default
|
548
|
-
n_nucleotides = 500
|
549
|
-
end
|
550
|
-
result = ''.dup
|
551
|
-
frequency_for_A = hash_frequencies[:A]
|
552
|
-
frequency_for_C = hash_frequencies[:C]
|
553
|
-
frequency_for_G = hash_frequencies[:G]
|
554
|
-
frequency_for_T = hash_frequencies[:T]
|
555
|
-
n_nucleotides.times {|run_number_n|
|
556
|
-
use_this_number = rand(0)
|
557
|
-
if use_this_number <= frequency_for_A
|
558
|
-
result << 'A'
|
559
|
-
elsif use_this_number <= (frequency_for_A+frequency_for_C)
|
560
|
-
result << 'C'
|
561
|
-
elsif use_this_number <= (frequency_for_A+frequency_for_C+frequency_for_G)
|
562
|
-
result << 'G'
|
563
|
-
elsif use_this_number <= (frequency_for_A+frequency_for_C+frequency_for_G+frequency_for_T)
|
564
|
-
result << 'T'
|
565
|
-
end
|
566
|
-
}
|
567
|
-
return result
|
568
|
-
end; self.instance_eval { alias generate_nucleotide_sequence_based_on_frequencies generate_nucleotide_sequence_based_on_these_frequencies } # === Bioroebe.generate_nucleotide_sequence_based_on_frequencies
|
569
|
-
|
570
|
-
# ========================================================================= #
|
571
|
-
# === Bioroebe.n_transversions
|
572
|
-
# ========================================================================= #
|
573
|
-
def self.n_transversions(
|
574
|
-
string1 = 'ATGAAA',
|
575
|
-
string2 = 'ATGCTG'
|
576
|
-
)
|
577
|
-
n_transversions = 0
|
578
|
-
chars1 = string1.chars
|
579
|
-
chars2 = string2.chars
|
580
|
-
chars1.each_with_index {|char1, index|
|
581
|
-
char2 = chars2[index]
|
582
|
-
if char1 == char2
|
583
|
-
# Equal, so it can not be a transition or transversion.
|
584
|
-
elsif is_a_pyrimidine?(char1)
|
585
|
-
# In this case it can be either a transition or a transversion.
|
586
|
-
if is_a_purine?(char2)
|
587
|
-
n_transversions += 1
|
588
|
-
end
|
589
|
-
elsif is_a_purine?(char1)
|
590
|
-
if is_a_pyrimidine?(char2)
|
591
|
-
n_transversions += 1
|
592
|
-
end
|
593
|
-
end
|
594
|
-
}
|
595
|
-
n_transversions
|
596
|
-
end
|
597
|
-
|
598
|
-
# ========================================================================= #
|
599
|
-
# === Bioroebe.n_transitions
|
600
|
-
#
|
601
|
-
# In genetics, a transition is a point mutation that changes a purine
|
602
|
-
# nucleotide to another purine (A ←→ G) or a pyrimidine nucleotide
|
603
|
-
# to another pyrimidine (C ←→ T).
|
604
|
-
#
|
605
|
-
# The method Bioroebe.n_transitions will return an Integer value.
|
606
|
-
#
|
607
|
-
# It expects two Strings as arguments.
|
608
|
-
#
|
609
|
-
# Usage example:
|
610
|
-
#
|
611
|
-
# Bioroebe.n_transitions('ATGAAAAACA', 'ATGCTGATGG') # => 2
|
612
|
-
#
|
613
|
-
# ========================================================================= #
|
614
|
-
def self.n_transitions(
|
615
|
-
string1 = 'ATGAAA',
|
616
|
-
string2 = 'ATGCTG'
|
617
|
-
)
|
618
|
-
n_transitions = 0
|
619
|
-
chars1 = string1.chars
|
620
|
-
chars2 = string2.chars
|
621
|
-
chars1.each_with_index {|char1, index|
|
622
|
-
char2 = chars2[index]
|
623
|
-
if char1 == char2
|
624
|
-
# Equal, so it can not be a transition or transversion.
|
625
|
-
elsif is_a_pyrimidine?(char1)
|
626
|
-
# In this case it can be either a transition or a transversion.
|
627
|
-
if is_a_pyrimidine?(char2)
|
628
|
-
n_transitions += 1
|
629
|
-
end
|
630
|
-
elsif is_a_purine?(char1)
|
631
|
-
if is_a_purine?(char2)
|
632
|
-
n_transitions += 1
|
633
|
-
end
|
634
|
-
end
|
635
|
-
}
|
636
|
-
n_transitions
|
637
|
-
end
|
638
|
-
|
639
|
-
# ========================================================================= #
|
640
|
-
# === Bioroebe.transitions_to_transversions_ratio
|
641
|
-
#
|
642
|
-
# This method will calculate the transition-to-transversion ratio
|
643
|
-
# between two Strings of equal length.
|
644
|
-
#
|
645
|
-
# The second argument, called `string2`, can be nil, in which case
|
646
|
-
# we may re-set it to a value if the input to string1 is a file. The
|
647
|
-
# reason for this is that we may want to read both string1 and
|
648
|
-
# string2 from a file, if available. If it is a file then it is
|
649
|
-
# typically assumed to be a FASTA (.fasta or .fa) file. See the
|
650
|
-
# following usage examples for this.
|
651
|
-
#
|
652
|
-
# Usage examples:
|
653
|
-
#
|
654
|
-
# dataset = Bioroebe.transitions_to_transversions_ratio('/XXX.txt')
|
655
|
-
# dataset = Bioroebe.transitions_to_transversions_ratio('/TRANSITIONS.txt')
|
656
|
-
#
|
657
|
-
# ========================================================================= #
|
658
|
-
def self.transitions_to_transversions_ratio(
|
659
|
-
string1, string2 = nil, round_to_n_positions = 11
|
660
|
-
)
|
661
|
-
if File.file? string1
|
662
|
-
# ===================================================================== #
|
663
|
-
# We can read in fasta-data here.
|
664
|
-
# ===================================================================== #
|
665
|
-
require 'bioroebe/fasta_and_fastq/parse_fasta/parse_fasta.rb'
|
666
|
-
dataset = ::Bioroebe::ParseFasta.new(string1) { :be_quiet }.values
|
667
|
-
if dataset.size > 1
|
668
|
-
string1 = dataset[0]
|
669
|
-
string2 = dataset[1]
|
670
|
-
end
|
671
|
-
end
|
672
|
-
n_transitions = n_transitions(string1, string2).to_f
|
673
|
-
ratio = ( n_transitions / n_transversions(string1, string2).to_f )
|
674
|
-
return ratio.to_f.round(round_to_n_positions)
|
675
|
-
end
|
676
|
-
|
677
|
-
# ========================================================================= #
|
678
|
-
# === Bioroebe.generate_random_rna_sequence
|
679
|
-
#
|
680
|
-
# The input-argument should be a number, an Integer, such as 10.
|
681
|
-
#
|
682
|
-
# Usage example:
|
683
|
-
#
|
684
|
-
# Bioroebe.generate_random_rna_sequence(10)
|
685
|
-
#
|
686
|
-
# ========================================================================= #
|
687
|
-
def self.generate_random_rna_sequence(i = ARGV)
|
688
|
-
if i.is_a? Array
|
689
|
-
i = i.join(' ').strip
|
690
|
-
end
|
691
|
-
_ = Bioroebe::RNA_NUCLEOTIDES # Point to the allowed RNA-nucleotides here.
|
692
|
-
result = ''.dup
|
693
|
-
i.to_s.to_i.times {
|
694
|
-
result << _.sample
|
695
|
-
}
|
696
|
-
return result
|
697
|
-
end
|
698
|
-
|
699
|
-
end
|
700
|
-
|
701
|
-
if __FILE__ == $PROGRAM_NAME
|
702
|
-
require 'colours/autoinclude'
|
703
|
-
e
|
704
|
-
e 'Next testing whether the following is a purine or a pyrimidine:'
|
705
|
-
e
|
706
|
-
e 'A T C G U'
|
707
|
-
e
|
708
|
-
p Bioroebe.is_a_purine?('A')
|
709
|
-
p Bioroebe.is_a_purine?('T')
|
710
|
-
p Bioroebe.is_a_purine?('C')
|
711
|
-
p Bioroebe.is_a_purine?('G')
|
712
|
-
p Bioroebe.is_a_purine?('U')
|
713
|
-
p Bioroebe.is_a_pyrimidine?('A')
|
714
|
-
p Bioroebe.is_a_pyrimidine?('T')
|
715
|
-
p Bioroebe.is_a_pyrimidine?('C')
|
716
|
-
p Bioroebe.is_a_pyrimidine?('G')
|
717
|
-
p Bioroebe.is_a_pyrimidine?('U')
|
718
|
-
|
719
|
-
pp Bioroebe.return_all_positions_of_this_nucleotide(ARGV.first, ARGV[1])
|
720
|
-
if ARGV.empty?
|
721
|
-
puts Bioroebe.partner_nucleotide('A')
|
722
|
-
else
|
723
|
-
puts Bioroebe.partner_nucleotide(ARGV)
|
724
|
-
end
|
725
|
-
|
726
|
-
_ = 'ATGCG'; print 'Does '+_+' contain only nucleotides? '; p Bioroebe.only_nucleotides? _ # => true
|
727
|
-
_ = 'ATGCGi'; print 'Does '+_+' contain only nucleotides? '; p Bioroebe.only_nucleotides? _ # => false
|
728
|
-
|
729
|
-
e
|
730
|
-
e 'Next handling Bioroebe.nucleotide_permutations()'
|
731
|
-
e
|
732
|
-
if ARGV.empty?
|
733
|
-
pp Bioroebe.nucleotide_permutations
|
734
|
-
pp Bioroebe.nucleotide_permutations(3)
|
735
|
-
else
|
736
|
-
result = Bioroebe.nucleotide_permutations(ARGV.first)
|
737
|
-
result.each {|entry|
|
738
|
-
puts entry
|
739
|
-
}
|
740
|
-
end
|
741
|
-
|
742
|
-
Bioroebe.generate_random_dna_sequence(ARGV)
|
743
|
-
|
744
|
-
input_sequence = 'BBBATGCCGGAGGAGANNN'
|
745
|
-
|
746
|
-
e
|
747
|
-
e Colours.rev+'The input sequence used was:'
|
748
|
-
e
|
749
|
-
e ' '+Colours.steelblue(input_sequence)
|
750
|
-
e
|
751
|
-
e 'The output sequence is:'
|
752
|
-
e
|
753
|
-
e ' '+Colours.steelblue(Bioroebe.filter_away_invalid_nucleotides(input_sequence))
|
754
|
-
e # filter_away_invalid_nucleotides
|
755
|
-
e
|
756
|
-
e Bioroebe.complementary_dna_strand(ARGV)
|
757
|
-
e
|
758
|
-
e Bioroebe.generate_nucleotide_sequence_based_on_these_frequencies
|
759
|
-
e
|
760
|
-
|
761
|
-
# ========================================================================= #
|
762
|
-
# The code here can be used as test the transitions-and-transversions
|
763
|
-
# functionality.
|
764
|
-
# ========================================================================= #
|
765
|
-
alias e puts
|
766
|
-
e 'The two strings that are to be compared, are:'
|
767
|
-
e
|
768
|
-
e ' ATGAAA'
|
769
|
-
e ' ATGCTG'
|
770
|
-
e
|
771
|
-
e 'n transition events?: '+Bioroebe.n_transitions.to_s
|
772
|
-
e 'n transversion events?: '+Bioroebe.n_transversions.to_s
|
773
|
-
e
|
774
|
-
string1 = 'GCAACGCACAACGAAAACCCTTAGGGACTGGATTATTTCGTGATCGTTGTAGTTATTGGAAGTACGGGCATCAACCCAGTT'
|
775
|
-
string2 = 'TTATCTGACAAAGAAAGCCGTCAACGGCTGGATAATTTCGCGATCGTGCTGGTTACTGGCGGTACGAGTGTTCCTTTGGGT'
|
776
|
-
e string1
|
777
|
-
e string2
|
778
|
-
e
|
779
|
-
e 'The ratio is:'
|
780
|
-
e
|
781
|
-
e " #{Bioroebe.transitions_to_transversions_ratio(string1, string2).to_s}"
|
782
|
-
e
|
783
|
-
end # returnallpositionsofthisnucleotide AUGCUUCAGAAAGGUCUUACG # => [2, 5, 6, 15, 17, 18]
|
784
|
-
# returnallpositionsofthisnucleotide AUGCUUCAGAGGGGUCUUACG G # => [3, 9, 11, 12, 13, 14, 21]
|
785
|
-
# nucleotide_permutations
|
786
|
-
# nucleotide_permutations 2
|
787
|
-
# nucleotide_permutations /PERMUTATIONS.md
|