bioroebe 0.12.24 → 0.13.31
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENSE.md +7 -8
- data/README.md +566 -354
- data/bin/all_positions_of_this_nucleotide +1 -1
- data/bin/aminoacid_frequencies +1 -1
- data/bin/automatically_rename_this_fasta_file +1 -1
- data/bin/base_composition +1 -1
- data/bin/batch_create_windows_executables +1 -1
- data/bin/bioroebe +12 -1
- data/bin/bioroebe_cat +7 -0
- data/bin/calculate_exponential_growth +7 -0
- data/bin/calculate_n50_value +1 -1
- data/bin/calculate_the_frequencies_of_this_species +7 -0
- data/bin/chunked_display +1 -1
- data/bin/codon_frequency +1 -1
- data/bin/codon_to_aminoacid +1 -1
- data/bin/colourize_this_fasta_sequence +1 -1
- data/bin/complementary_dna_strand +1 -1
- data/bin/complementary_rna_strand +1 -1
- data/bin/consensus_sequence +1 -1
- data/bin/dna_to_rna +1 -1
- data/bin/downcase_chunked_display +1 -1
- data/bin/download_this_pdb +1 -1
- data/bin/fasta_index +1 -1
- data/bin/fetch_data_from_uniprot +1 -1
- data/bin/filter_away_invalid_nucleotides +1 -1
- data/bin/find_substring +1 -1
- data/bin/input_as_dna +1 -1
- data/bin/is_palindrome +1 -1
- data/bin/leading_five_prime +1 -1
- data/bin/longest_ORF +1 -1
- data/bin/longest_substring +1 -1
- data/bin/open_reading_frames +1 -1
- data/bin/partner_nucleotide +1 -1
- data/bin/plain_palindrome +1 -1
- data/bin/random_dna_sequence +1 -1
- data/bin/random_sequence +1 -1
- data/bin/raw_hamming_distance +1 -1
- data/bin/return_longest_substring_via_LCS_algorithm +1 -1
- data/bin/reverse_sequence +1 -1
- data/bin/short_aminoacid_letter_from_long_aminoacid_name +1 -1
- data/bin/show_atomic_composition +1 -1
- data/bin/show_fasta_header +1 -1
- data/bin/show_nucleotide_sequence +1 -1
- data/bin/show_this_dna_sequence +1 -1
- data/bin/show_time_now +7 -0
- data/bin/sort_aminoacid_based_on_its_hydrophobicity +1 -1
- data/bin/strict_filter_away_invalid_aminoacids +1 -1
- data/{lib/bioroebe/base/reset.rb → bin/three_delimiter} +9 -6
- data/bin/three_to_one +1 -1
- data/bin/to_rna +1 -1
- data/bin/trailing_three_prime +1 -1
- data/bin/upcase_this_aminoacid_sequence_and_remove_numbers +1 -1
- data/bioroebe.gemspec +6 -7
- data/doc/README.gen +534 -322
- data/doc/blosum/blosum.md +4 -0
- data/doc/compatibility/BIO_PHP.md +20 -18
- data/doc/compatibility/README.md +2 -3
- data/doc/compatibility/emboss.md +5 -3
- data/doc/{extensive_usage_example.md → extensive_usage_example/extensive_usage_example.md} +4 -2
- data/doc/{instructions_for_the_taxonomy_subproject.md → instructions_for_the_taxonomy_subproject/instructions_for_the_taxonomy_subproject.md} +36 -33
- data/doc/{legacy_paths.md → legacy_paths/legacy_paths.md} +3 -3
- data/doc/statistics/statistics.md +12 -10
- data/doc/todo/bioroebe_GUI_todo.md +6 -1
- data/doc/todo/bioroebe_java_todo.md +3 -2
- data/doc/todo/bioroebe_todo.md +328 -310
- data/doc/{using_biomart.md → using_biomart/using_biomart.md} +7 -3
- data/lib/bioroebe/abstract/features.rb +0 -0
- data/lib/bioroebe/aminoacids/aminoacid_substitution.rb +1 -1
- data/lib/bioroebe/aminoacids/aminoacids_mass_table.rb +3 -1
- data/lib/bioroebe/aminoacids/codon_percentage.rb +18 -10
- data/lib/bioroebe/aminoacids/create_random_aminoacids.rb +5 -2
- data/lib/bioroebe/aminoacids/deduce_aminoacid_sequence.rb +90 -64
- data/lib/bioroebe/aminoacids/display_aminoacid_table.rb +1 -3
- data/lib/bioroebe/aminoacids/show_hydrophobicity.rb +2 -2
- data/lib/bioroebe/annotations/create_annotation_format.rb +2 -2
- data/lib/bioroebe/base/base.rb +101 -6
- data/lib/bioroebe/base/base_module/base_module.rb +9 -1
- data/lib/bioroebe/base/colours.rb +3 -0
- data/lib/bioroebe/base/colours_for_base/colours_for_base.rb +80 -44
- data/lib/bioroebe/base/commandline_application/README.md +1 -1
- data/lib/bioroebe/base/commandline_application/commandline_application.rb +661 -22
- data/lib/bioroebe/base/commandline_application/commandline_arguments.rb +2 -1
- data/lib/bioroebe/base/infer_the_namespace_module/infer_the_namespace_module.rb +37 -0
- data/lib/bioroebe/base/internal_hash_module/internal_hash_module.rb +1 -6
- data/lib/bioroebe/base/prototype/prototype.rb +155 -14
- data/lib/bioroebe/biomart/attribute.rb +1 -1
- data/lib/bioroebe/biomart/biomart.rb +8 -9
- data/lib/bioroebe/biomart/server.rb +1 -1
- data/lib/bioroebe/blosum/blosum.rb +2 -2
- data/lib/bioroebe/calculate/calculate_blosum_score.rb +5 -3
- data/lib/bioroebe/calculate/calculate_gc_content.rb +1 -1
- data/lib/bioroebe/calculate/calculate_levensthein_distance.rb +5 -3
- data/lib/bioroebe/calculate/calculate_melting_temperature.rb +2 -10
- data/lib/bioroebe/calculate/calculate_melting_temperature_for_more_than_thirteen_nucleotides.rb +6 -15
- data/lib/bioroebe/calculate/calculate_the_position_specific_scoring_matrix.rb +4 -2
- data/lib/bioroebe/cell/cell.rb +3 -2
- data/lib/bioroebe/cell/specialized_cells/B_cell.rb +60 -0
- data/lib/bioroebe/cell/specialized_cells/Macrophage.rb +60 -0
- data/lib/bioroebe/cell/specialized_cells/README.md +5 -0
- data/lib/bioroebe/cell/specialized_cells/T_cell.rb +60 -0
- data/lib/bioroebe/cleave_and_digest/cleave.rb +3 -1
- data/lib/bioroebe/cleave_and_digest/digestion.rb +1 -1
- data/lib/bioroebe/codon_tables/frequencies/10090_Mus_musculus.yml +93 -0
- data/lib/bioroebe/codon_tables/frequencies/107243_Thlaspi_caerulescens.yml +72 -0
- data/lib/bioroebe/codon_tables/frequencies/parse_frequency_table.rb +2 -2
- data/lib/bioroebe/codons/codon_table.rb +10 -2
- data/lib/bioroebe/codons/codons.rb +3 -3
- data/lib/bioroebe/codons/convert_this_codon_to_that_aminoacid.rb +18 -15
- data/lib/bioroebe/codons/determine_optimal_codons.rb +1 -1
- data/lib/bioroebe/codons/possible_codons_for_this_aminoacid.rb +4 -2
- data/lib/bioroebe/codons/show_codon_tables.rb +1 -1
- data/lib/bioroebe/codons/show_codon_usage.rb +1 -2
- data/lib/bioroebe/codons/show_this_codon_table.rb +2 -2
- data/lib/bioroebe/codons/start_codons.rb +7 -3
- data/lib/bioroebe/colours/colour_schemes/README.md +1 -1
- data/lib/bioroebe/colours/colour_schemes/array_available_colour_schemes.rb +3 -3
- data/lib/bioroebe/colours/colour_schemes/colour_scheme.rb +3 -3
- data/lib/bioroebe/colours/colour_schemes/colour_scheme_demo.rb +4 -3
- data/lib/bioroebe/colours/colour_schemes/helix.rb +3 -1
- data/lib/bioroebe/colours/colour_schemes/hydropathy.rb +3 -1
- data/lib/bioroebe/colours/colour_schemes/score.rb +13 -2
- data/lib/bioroebe/colours/colour_schemes/strand.rb +3 -1
- data/lib/bioroebe/colours/colour_schemes/turn.rb +3 -1
- data/lib/bioroebe/colours/colour_schemes/zappo.rb +1 -1
- data/lib/bioroebe/{toplevel_methods/colourize_related_methods.rb → colours/colourize_related_code.rb} +1 -3
- data/lib/bioroebe/colours/colourize_sequence.rb +3 -1
- data/lib/bioroebe/colours/colours.rb +172 -15
- data/lib/bioroebe/configuration/configuration.rb +1 -1
- data/lib/bioroebe/constants/GUIs.rb +2 -2
- data/lib/bioroebe/constants/constants.rb +1349 -0
- data/lib/bioroebe/conversions/convert_aminoacid_to_dna.rb +8 -13
- data/lib/bioroebe/conversions/dna_to_aminoacid_sequence.rb +9 -3
- data/lib/bioroebe/count/count_amount_of_aminoacids.rb +11 -10
- data/lib/bioroebe/count/count_amount_of_nucleotides.rb +1 -1
- data/lib/bioroebe/count/count_at.rb +2 -1
- data/lib/bioroebe/databases/download_taxonomy_database.rb +1 -1
- data/lib/bioroebe/dotplots/advanced_dotplot.rb +2 -2
- data/lib/bioroebe/electron_microscopy/coordinate_analyzer.rb +2 -2
- data/lib/bioroebe/electron_microscopy/fix_pos_file.rb +2 -2
- data/lib/bioroebe/electron_microscopy/flipy.rb +2 -2
- data/lib/bioroebe/electron_microscopy/generate_em2em_file.rb +3 -11
- data/lib/bioroebe/electron_microscopy/parse_coordinates.rb +6 -6
- data/lib/bioroebe/electron_microscopy/read_file_xmd.rb +6 -6
- data/lib/bioroebe/electron_microscopy/simple_star_file_generator.rb +2 -2
- data/lib/bioroebe/enzymes/has_this_restriction_enzyme.rb +1 -1
- data/lib/bioroebe/enzymes/restriction_enzyme.rb +1 -1
- data/lib/bioroebe/enzymes/restriction_enzymes/statistics.rb +4 -3
- data/lib/bioroebe/enzymes/restriction_enzymes_file.rb +1 -1
- data/lib/bioroebe/enzymes/return_sequence_that_is_cut_via_restriction_enzyme.rb +4 -3
- data/lib/bioroebe/enzymes/show_restriction_enzymes.rb +3 -3
- data/lib/bioroebe/ext/main.cpp +0 -1
- data/lib/bioroebe/fasta_and_fastq/autocorrect_the_name_of_this_fasta_file.rb +3 -3
- data/lib/bioroebe/fasta_and_fastq/compact_fasta_file/compact_fasta_file.rb +1 -1
- data/lib/bioroebe/fasta_and_fastq/display_how_many_fasta_entries_are_in_this_directory.rb +1 -1
- data/lib/bioroebe/fasta_and_fastq/download_fasta.rb +8 -14
- data/lib/bioroebe/fasta_and_fastq/fasta_defline/fasta_defline.rb +1 -1
- data/lib/bioroebe/fasta_and_fastq/fasta_to_yaml/fasta_to_yaml.rb +1 -1
- data/lib/bioroebe/fasta_and_fastq/fastq_format_explainer.rb +1 -1
- data/lib/bioroebe/fasta_and_fastq/length_modifier/length_modifier.rb +1 -1
- data/lib/bioroebe/fasta_and_fastq/parse_fasta/parse_fasta.rb +37 -11
- data/lib/bioroebe/fasta_and_fastq/parse_fastq/parse_fastq.rb +2 -2
- data/lib/bioroebe/fasta_and_fastq/return_fasta_subsection_of_this_file.rb +1 -1
- data/lib/bioroebe/fasta_and_fastq/show_fasta_headers.rb +5 -13
- data/lib/bioroebe/fasta_and_fastq/show_fasta_statistics.rb +1 -1
- data/lib/bioroebe/fasta_and_fastq/simplify_fasta_header/simplify_fasta_header.rb +1 -1
- data/lib/bioroebe/fasta_and_fastq/split_this_fasta_file_into_chromosomes/reset.rb +3 -6
- data/lib/bioroebe/fasta_and_fastq/split_this_fasta_file_into_chromosomes/split_this_fasta_file_into_chromosomes.rb +3 -3
- data/lib/bioroebe/genbank/genbank_flat_file_format_generator.rb +20 -11
- data/lib/bioroebe/genome/genome.rb +1 -1
- data/lib/bioroebe/genomes/genome_pattern.rb +17 -16
- data/lib/bioroebe/genomes/genome_retriever.rb +4 -2
- data/lib/bioroebe/gui/experimental/snapgene/snapgene.rb +10 -13
- data/lib/bioroebe/gui/universal_widgets/alignment/alignment.rb +557 -0
- data/lib/bioroebe/gui/{gtk3 → universal_widgets}/aminoacid_composition/aminoacid_composition.rb +498 -198
- data/lib/bioroebe/gui/universal_widgets/anti_sense_strand/anti_sense_strand.rb +665 -0
- data/lib/bioroebe/gui/universal_widgets/blosum_matrix_viewer/blosum_matrix_viewer.rb +329 -0
- data/lib/bioroebe/gui/universal_widgets/calculate_cell_numbers_of_bacteria/calculate_cell_numbers_of_bacteria.rb +423 -0
- data/lib/bioroebe/gui/{gtk3 → universal_widgets}/controller/controller.rb +170 -118
- data/lib/bioroebe/gui/{gtk3 → universal_widgets}/dna_to_aminoacid_widget/dna_to_aminoacid_widget.rb +277 -215
- data/lib/bioroebe/gui/{shared_code/dna_to_reverse_complement_widget/dna_to_reverse_complement_widget_module.rb → universal_widgets/dna_to_reverse_complement_widget/dna_to_reverse_complement_widget.rb} +297 -107
- data/lib/bioroebe/gui/universal_widgets/fasta_table_widget/fasta_table_widget.rb +643 -0
- data/lib/bioroebe/gui/{gtk3 → universal_widgets}/format_converter/format_converter.rb +236 -164
- data/lib/bioroebe/gui/universal_widgets/gene/gene.rb +278 -0
- data/lib/bioroebe/gui/universal_widgets/hamming_distance/hamming_distance.rb +646 -0
- data/lib/bioroebe/gui/{shared_code/levensthein_distance/levensthein_distance_module.rb → universal_widgets/levensthein_distance/levensthein_distance.rb} +313 -88
- data/lib/bioroebe/gui/{gtk3 → universal_widgets}/nucleotide_analyser/nucleotide_analyser.rb +281 -189
- data/lib/bioroebe/gui/{gtk3 → universal_widgets}/parse_pdb_file/parse_pdb_file.rb +265 -149
- data/lib/bioroebe/gui/{gtk3 → universal_widgets}/primer_design_widget/primer_design_widget.rb +337 -263
- data/lib/bioroebe/gui/universal_widgets/protein_to_DNA/protein_to_DNA.rb +408 -0
- data/lib/bioroebe/gui/{gtk3 → universal_widgets}/random_sequence/random_sequence.rb +245 -187
- data/lib/bioroebe/gui/{gtk3 → universal_widgets}/restriction_enzymes/restriction_enzymes.rb +207 -137
- data/lib/bioroebe/gui/universal_widgets/shell/shell.rb +288 -0
- data/lib/bioroebe/gui/{gtk3/show_codon_table/misc.rb → universal_widgets/show_codon_table/show_codon_table.rb} +290 -110
- data/lib/bioroebe/gui/{shared_code/show_codon_usage/show_codon_usage_module.rb → universal_widgets/show_codon_usage/show_codon_usage.rb} +228 -47
- data/lib/bioroebe/gui/{gtk3 → universal_widgets}/sizeseq/sizeseq.rb +151 -69
- data/lib/bioroebe/gui/{gtk3 → universal_widgets}/three_to_one/three_to_one.rb +190 -127
- data/lib/bioroebe/gui/{gtk3 → universal_widgets}/www_finder/www_finder.rb +211 -152
- data/lib/bioroebe/images/images.html +953 -1170
- data/lib/bioroebe/images/misc/README.md +6 -0
- data/lib/bioroebe/images/misc/activation.avif +0 -0
- data/lib/bioroebe/images/misc/inhibition.avif +0 -0
- data/lib/bioroebe/images/misc/small_virus_logo.avif +0 -0
- data/lib/bioroebe/{constants/base_directory.rb → log_directory/log_directory.rb} +79 -59
- data/lib/bioroebe/matplotlib/matplotlib_generator.rb +1 -1
- data/lib/bioroebe/misc/quiz/three_letter_to_aminoacid.rb +1 -1
- data/lib/bioroebe/misc/ruler.rb +5 -5
- data/lib/bioroebe/misc/useful_formulas.rb +3 -3
- data/lib/bioroebe/ncbi/efetch.rb +1 -2
- data/lib/bioroebe/ngs/phred_quality_score_table.rb +3 -3
- data/lib/bioroebe/nucleotides/complementary_dna_strand.rb +3 -6
- data/lib/bioroebe/nucleotides/molecular_weight_of_nucleotides.rb +3 -3
- data/lib/bioroebe/nucleotides/most_likely_nucleotide_sequence_for_this_aminoacid_sequence.rb +6 -10
- data/lib/bioroebe/nucleotides/{show_nucleotide_sequence.rb → show_nucleotide_sequence/show_nucleotide_sequence.rb} +377 -255
- data/lib/bioroebe/palindromes/palindrome_2D_structure.rb +1 -1
- data/lib/bioroebe/palindromes/palindrome_finder.rb +1 -1
- data/lib/bioroebe/palindromes/palindrome_generator.rb +2 -10
- data/lib/bioroebe/parsers/biolang_parser.rb +1 -1
- data/lib/bioroebe/parsers/blosum_parser.rb +14 -19
- data/lib/bioroebe/parsers/genbank_parser.rb +2 -6
- data/lib/bioroebe/parsers/gff.rb +9 -9
- data/lib/bioroebe/parsers/parse_embl.rb +2 -6
- data/lib/bioroebe/parsers/stride_parser.rb +4 -12
- data/lib/bioroebe/patterns/analyse_glycosylation_pattern.rb +2 -2
- data/lib/bioroebe/patterns/is_this_sequence_a_EGF2_pattern.rb +6 -3
- data/lib/bioroebe/patterns/profile_pattern.rb +2 -2
- data/lib/bioroebe/patterns/rgg_scanner.rb +4 -2
- data/lib/bioroebe/{protein_structure → pdb_and_protein_structure}/alpha_helix.rb +2 -2
- data/lib/bioroebe/{pdb → pdb_and_protein_structure}/download_this_pdb.rb +2 -3
- data/lib/bioroebe/{pdb → pdb_and_protein_structure}/fetch_fasta_sequence_from_pdb.rb +4 -4
- data/lib/bioroebe/{protein_structure → pdb_and_protein_structure}/helical_wheel.rb +2 -2
- data/lib/bioroebe/{pdb → pdb_and_protein_structure}/parse_mmCIF_file.rb +1 -1
- data/lib/bioroebe/{pdb → pdb_and_protein_structure}/parse_pdb_file.rb +3 -3
- data/lib/bioroebe/{pdb → pdb_and_protein_structure}/report_secondary_structures_from_this_pdb_file.rb +3 -3
- data/lib/bioroebe/project/project.rb +3 -1
- data/lib/bioroebe/raw_sequence/README.md +8 -8
- data/lib/bioroebe/raw_sequence/raw_sequence.rb +11 -2
- data/lib/bioroebe/regexes/regexes.rb +1 -2
- data/lib/bioroebe/requires/commandline_application.rb +3 -1
- data/lib/bioroebe/requires/require_all_pdb_files.rb +1 -1
- data/lib/bioroebe/requires/require_all_taxonomy_files.rb +1 -1
- data/lib/bioroebe/requires/require_all_utility_scripts_files.rb +10 -0
- data/lib/bioroebe/requires/require_colours.rb +1 -1
- data/lib/bioroebe/requires/require_the_bioroebe_project.rb +5 -7
- data/lib/bioroebe/requires/require_the_bioroebe_sinatra_components.rb +1 -1
- data/lib/bioroebe/requires/require_the_constants.rb +2 -14
- data/lib/bioroebe/requires/require_yaml.rb +7 -5
- data/lib/bioroebe/sequence/alignment.rb +1 -1
- data/lib/bioroebe/sequence/dna.rb +4 -2
- data/lib/bioroebe/sequence/nucleotide_module/nucleotide_module.rb +22 -8
- data/lib/bioroebe/sequence/protein.rb +2 -2
- data/lib/bioroebe/sequence/reverse_complement.rb +3 -3
- data/lib/bioroebe/sequence/rna.rb +9 -8
- data/lib/bioroebe/sequence/sequence.rb +3 -3
- data/lib/bioroebe/shell/configuration/additionally_set_xorg_buffer.yml +0 -0
- data/lib/bioroebe/shell/configuration/may_we_show_the_startup_information.yml +0 -0
- data/lib/bioroebe/shell/configuration/upcase_nucleotides.yml +0 -0
- data/lib/bioroebe/shell/configuration/use_silent_startup.yml +1 -1
- data/lib/bioroebe/shell/help/class.rb +68 -19
- data/lib/bioroebe/shell/menu.rb +5244 -5322
- data/lib/bioroebe/shell/{readline/readline.rb → readline.rb} +1 -3
- data/lib/bioroebe/shell/shell.rb +11240 -453
- data/lib/bioroebe/siRNA/siRNA.rb +3 -3
- data/lib/bioroebe/{gui/shared_code/blosum_matrix_viewer/blosum_matrix_viewer_module.rb → sinatra/sinatra_interface.rb} +28 -19
- data/lib/bioroebe/{www/sinatra/sinatra.rb → sinatra/sinatra_wrapper.rb} +731 -754
- data/lib/bioroebe/string_matching/find_longest_substring.rb +2 -10
- data/lib/bioroebe/string_matching/find_longest_substring_via_LCS_algorithm.rb +4 -14
- data/lib/bioroebe/string_matching/hamming_distance.rb +11 -10
- data/lib/bioroebe/string_matching/levensthein.rb +5 -17
- data/lib/bioroebe/string_matching/simple_string_comparer.rb +48 -4
- data/lib/bioroebe/string_matching/smith_waterman.rb +11 -6
- data/lib/bioroebe/svg/glyph.rb +4 -1
- data/lib/bioroebe/svg/mini_feature.rb +1 -1
- data/lib/bioroebe/svg/page.rb +18 -7
- data/lib/bioroebe/svg/svgee.rb +22 -13
- data/lib/bioroebe/svg/track.rb +20 -4
- data/lib/bioroebe/taxonomy/chart.rb +2 -2
- data/lib/bioroebe/taxonomy/class_methods.rb +5 -6
- data/lib/bioroebe/taxonomy/constants.rb +1 -1
- data/lib/bioroebe/taxonomy/info/info.rb +1 -1
- data/lib/bioroebe/taxonomy/info/is_dna.rb +1 -1
- data/lib/bioroebe/taxonomy/interactive.rb +1 -2
- data/lib/bioroebe/taxonomy/menu.rb +1 -1
- data/lib/bioroebe/taxonomy/node.rb +1 -1
- data/lib/bioroebe/taxonomy/parse_fasta.rb +4 -2
- data/lib/bioroebe/taxonomy/shared.rb +5 -4
- data/lib/bioroebe/taxonomy/taxonomy.rb +2 -4
- data/lib/bioroebe/toplevel_methods/fasta_and_fastq.rb +3 -45
- data/lib/bioroebe/toplevel_methods/{is_on_roebe.rb → roebe.rb} +1 -11
- data/lib/bioroebe/toplevel_methods/taxonomy.rb +6 -12
- data/lib/bioroebe/toplevel_methods/toplevel_methods.rb +5568 -0
- data/lib/bioroebe/utility_scripts/align_open_reading_frames.rb +4 -3
- data/lib/bioroebe/utility_scripts/analyse_local_dataset.rb +2 -2
- data/lib/bioroebe/utility_scripts/check_for_mismatches/check_for_mismatches.rb +16 -9
- data/lib/bioroebe/utility_scripts/compacter/compacter.rb +4 -2
- data/lib/bioroebe/utility_scripts/compare_these_two_sequences_via_blosum.rb +119 -0
- data/lib/bioroebe/utility_scripts/compseq/compseq.rb +11 -9
- data/lib/bioroebe/utility_scripts/{consensus_sequence.rb → consensus_sequence/consensus_sequence.rb} +13 -4
- data/lib/bioroebe/utility_scripts/{create_batch_entrez_file.rb → create_batch_entrez_file/create_batch_entrez_file.rb} +5 -5
- data/lib/bioroebe/utility_scripts/{determine_antigenic_areas.rb → determine_antigenic_areas/determine_antigenic_areas.rb} +5 -5
- data/lib/bioroebe/utility_scripts/{determine_missing_nucleotides_percentage.rb → determine_missing_nucleotides_percentage/determine_missing_nucleotides_percentage.rb} +16 -15
- data/lib/bioroebe/utility_scripts/display_open_reading_frames/display_open_reading_frames.rb +7 -7
- data/lib/bioroebe/utility_scripts/display_open_reading_frames/misc.rb +1 -1
- data/lib/bioroebe/utility_scripts/display_open_reading_frames/report.rb +2 -0
- data/lib/bioroebe/utility_scripts/{dot_alignment.rb → dot_alignment/dot_alignment.rb} +3 -3
- data/lib/bioroebe/utility_scripts/{download_files_from_rebase.rb → download_files_from_rebase/download_files_from_rebase.rb} +5 -5
- data/lib/bioroebe/utility_scripts/fetch_data_from_uniprot/fetch_data_from_uniprot.rb +269 -0
- data/lib/bioroebe/utility_scripts/find_gene.rb +4 -2
- data/lib/bioroebe/utility_scripts/{mirror_repeat.rb → mirror_repeat/mirror_repeat.rb} +5 -5
- data/lib/bioroebe/utility_scripts/move_file_to_its_correct_location.rb +3 -3
- data/lib/bioroebe/utility_scripts/{parse_taxonomy.rb → parse_taxonomy/parse_taxonomy.rb} +15 -6
- data/lib/bioroebe/utility_scripts/{pathways.rb → pathways/pathways.rb} +4 -3
- data/lib/bioroebe/utility_scripts/{permutations.rb → permutations/permutations.rb} +3 -3
- data/lib/bioroebe/utility_scripts/punnet/punnet.rb +4 -2
- data/lib/bioroebe/utility_scripts/{show_this_dna_sequence.rb → show_this_dna_sequence/show_this_dna_sequence.rb} +1 -1
- data/lib/bioroebe/utility_scripts/showorf/showorf.rb +406 -10
- data/lib/bioroebe/version/version.rb +2 -2
- data/lib/bioroebe/viennarna/rnafold_wrapper.rb +5 -13
- data/lib/bioroebe/virus/individual_viruses/README.md +15 -0
- data/lib/bioroebe/virus/individual_viruses/tobacco_mosaic_virus.rb +40 -0
- data/lib/bioroebe/virus/virus.rb +76 -0
- data/lib/bioroebe/www/bioroebe.cgi +4 -3
- data/lib/bioroebe/www/embeddable_interface.rb +85 -49
- data/lib/bioroebe/yaml/agarose/agarose_concentrations.yml +6 -6
- data/lib/bioroebe/yaml/antisense/antisense.yml +2 -0
- data/lib/bioroebe/yaml/blosum/blosum50.yml +6 -0
- data/lib/bioroebe/yaml/blosum/blosum90.yml +2 -1
- data/lib/bioroebe/yaml/chromosomes/chromosome_numbers.yml +2 -2
- data/lib/bioroebe/yaml/configuration/temp_dir.yml +1 -1
- data/lib/bioroebe/yaml/consensus_sequences/consensus_sequences.yml +1 -0
- data/lib/bioroebe/yaml/enzymes/enzyme_classes.yml +7 -6
- data/lib/bioroebe/yaml/humans/human_chromosomes.yml +3 -3
- data/lib/bioroebe/yaml/mRNA/mRNA.yml +1 -5
- data/lib/bioroebe/yaml/nucleotides/abbreviations_for_nucleotides.yml +1 -0
- data/lib/bioroebe/yaml/nucleotides/nucleotide_density.yml +2 -1
- data/lib/bioroebe/yaml/promoters/35S.yml +3 -1
- data/lib/bioroebe/yaml/proteases/proteases.yml +3 -1
- data/lib/bioroebe/yaml/proteins/ubiquitin.yml +4 -1
- data/lib/bioroebe/yaml/restriction_enzymes/restriction_enzymes.yml +7 -7
- data/spec/testing_toplevel_method_editor.rb +1 -1
- data/spec/testing_toplevel_method_verbose.rb +1 -1
- data/test/testing_dna_to_rna_conversion.rb +1 -1
- metadata +127 -235
- data/doc/blosum.md +0 -5
- data/lib/bioroebe/base/commandline_application/aminoacids.rb +0 -33
- data/lib/bioroebe/base/commandline_application/directory.rb +0 -33
- data/lib/bioroebe/base/commandline_application/extract.rb +0 -22
- data/lib/bioroebe/base/commandline_application/misc.rb +0 -502
- data/lib/bioroebe/base/commandline_application/opn.rb +0 -47
- data/lib/bioroebe/base/commandline_application/reset.rb +0 -42
- data/lib/bioroebe/base/commandline_application/warnings.rb +0 -36
- data/lib/bioroebe/base/commandline_application/write_what_into.rb +0 -29
- data/lib/bioroebe/base/initialize.rb +0 -18
- data/lib/bioroebe/base/misc.rb +0 -129
- data/lib/bioroebe/base/namespace.rb +0 -16
- data/lib/bioroebe/base/prototype/e_and_ee.rb +0 -24
- data/lib/bioroebe/base/prototype/misc.rb +0 -114
- data/lib/bioroebe/base/prototype/mkdir.rb +0 -20
- data/lib/bioroebe/base/prototype/reset.rb +0 -36
- data/lib/bioroebe/colours/misc_colours.rb +0 -80
- data/lib/bioroebe/colours/rev.rb +0 -44
- data/lib/bioroebe/colours/sdir.rb +0 -21
- data/lib/bioroebe/colours/sfancy.rb +0 -21
- data/lib/bioroebe/colours/sfile.rb +0 -21
- data/lib/bioroebe/colours/simp.rb +0 -21
- data/lib/bioroebe/colours/swarn.rb +0 -29
- data/lib/bioroebe/constants/aminoacids_and_proteins.rb +0 -147
- data/lib/bioroebe/constants/carriage_return.rb +0 -14
- data/lib/bioroebe/constants/codon_tables.rb +0 -77
- data/lib/bioroebe/constants/database_constants.rb +0 -107
- data/lib/bioroebe/constants/files_and_directories.rb +0 -606
- data/lib/bioroebe/constants/misc.rb +0 -209
- data/lib/bioroebe/constants/newline.rb +0 -14
- data/lib/bioroebe/constants/nucleotides.rb +0 -121
- data/lib/bioroebe/constants/regex.rb +0 -28
- data/lib/bioroebe/constants/roebe.rb +0 -38
- data/lib/bioroebe/constants/row_terminator.rb +0 -16
- data/lib/bioroebe/constants/tabulator.rb +0 -14
- data/lib/bioroebe/constants/unicode.rb +0 -12
- data/lib/bioroebe/constants/urls.rb +0 -50
- data/lib/bioroebe/gui/gtk +0 -1
- data/lib/bioroebe/gui/gtk3/README.md +0 -2
- data/lib/bioroebe/gui/gtk3/alignment/alignment.rb +0 -306
- data/lib/bioroebe/gui/gtk3/anti_sense_strand/anti_sense_strand.rb +0 -29
- data/lib/bioroebe/gui/gtk3/blosum_matrix_viewer/blosum_matrix_viewer.rb +0 -195
- data/lib/bioroebe/gui/gtk3/calculate_cell_numbers_of_bacteria/calculate_cell_numbers_of_bacteria.rb +0 -105
- data/lib/bioroebe/gui/gtk3/dna_to_reverse_complement_widget/dna_to_reverse_complement_widget.rb +0 -188
- data/lib/bioroebe/gui/gtk3/fasta_table_widget/fasta_table_widget.rb +0 -322
- data/lib/bioroebe/gui/gtk3/gene/gene.rb +0 -181
- data/lib/bioroebe/gui/gtk3/hamming_distance/hamming_distance.rb +0 -383
- data/lib/bioroebe/gui/gtk3/levensthein_distance/levensthein_distance.rb +0 -174
- data/lib/bioroebe/gui/gtk3/protein_to_DNA/protein_to_DNA.rb +0 -181
- data/lib/bioroebe/gui/gtk3/show_codon_table/show_codon_table.rb +0 -101
- data/lib/bioroebe/gui/gtk3/show_codon_usage/show_codon_usage.rb +0 -145
- data/lib/bioroebe/gui/gtk3/three_to_one/title.rb +0 -23
- data/lib/bioroebe/gui/jruby/alignment/alignment.rb +0 -165
- data/lib/bioroebe/gui/jruby/aminoacid_composition/aminoacid_composition.rb +0 -166
- data/lib/bioroebe/gui/jruby/blosum_matrix_viewer/blosum_matrix_viewer.rb +0 -82
- data/lib/bioroebe/gui/libui/README.md +0 -4
- data/lib/bioroebe/gui/libui/alignment/alignment.rb +0 -116
- data/lib/bioroebe/gui/libui/blosum_matrix_viewer/blosum_matrix_viewer.rb +0 -112
- data/lib/bioroebe/gui/libui/calculate_cell_numbers_of_bacteria/calculate_cell_numbers_of_bacteria.rb +0 -60
- data/lib/bioroebe/gui/libui/controller/controller.rb +0 -116
- data/lib/bioroebe/gui/libui/dna_to_aminoacid_widget/dna_to_aminoacid_widget.rb +0 -161
- data/lib/bioroebe/gui/libui/dna_to_reverse_complement_widget/dna_to_reverse_complement_widget.rb +0 -76
- data/lib/bioroebe/gui/libui/hamming_distance/hamming_distance.rb +0 -135
- data/lib/bioroebe/gui/libui/levensthein_distance/levensthein_distance.rb +0 -118
- data/lib/bioroebe/gui/libui/protein_to_DNA/protein_to_DNA.rb +0 -115
- data/lib/bioroebe/gui/libui/random_sequence/random_sequence.rb +0 -190
- data/lib/bioroebe/gui/libui/show_codon_table/show_codon_table.rb +0 -134
- data/lib/bioroebe/gui/libui/show_codon_usage/show_codon_usage.rb +0 -89
- data/lib/bioroebe/gui/libui/three_to_one/three_to_one.rb +0 -113
- data/lib/bioroebe/gui/shared_code/alignment/alignment_module.rb +0 -102
- data/lib/bioroebe/gui/shared_code/aminoacid_composition/aminoacid_composition_module.rb +0 -94
- data/lib/bioroebe/gui/shared_code/calculate_cell_numbers_of_bacteria/calculate_cell_numbers_of_bacteria_module.rb +0 -216
- data/lib/bioroebe/gui/shared_code/protein_to_DNA/protein_to_DNA_module.rb +0 -192
- data/lib/bioroebe/gui/shared_code/show_codon_table/show_codon_table_module.rb +0 -72
- data/lib/bioroebe/gui/tk/aminoacid_composition/aminoacid_composition.rb +0 -206
- data/lib/bioroebe/gui/tk/blosum_matrix_viewer/blosum_matrix_viewer.rb +0 -140
- data/lib/bioroebe/gui/tk/hamming_distance/hamming_distance.rb +0 -262
- data/lib/bioroebe/gui/tk/levensthein_distance/levensthein_distance.rb +0 -243
- data/lib/bioroebe/gui/tk/three_to_one/three_to_one.rb +0 -199
- data/lib/bioroebe/gui/unified_widgets/anti_sense_strand/anti_sense_strand.rb +0 -519
- data/lib/bioroebe/shell/colours/colours.rb +0 -235
- data/lib/bioroebe/shell/help/help.rb +0 -25
- data/lib/bioroebe/shell/misc.rb +0 -10227
- data/lib/bioroebe/toplevel_methods/ad_hoc_task.rb +0 -56
- data/lib/bioroebe/toplevel_methods/aminoacids_and_proteins.rb +0 -722
- data/lib/bioroebe/toplevel_methods/atomic_composition.rb +0 -198
- data/lib/bioroebe/toplevel_methods/base_composition.rb +0 -121
- data/lib/bioroebe/toplevel_methods/blast.rb +0 -153
- data/lib/bioroebe/toplevel_methods/calculate_n50_value.rb +0 -57
- data/lib/bioroebe/toplevel_methods/cat.rb +0 -71
- data/lib/bioroebe/toplevel_methods/chunked_display.rb +0 -92
- data/lib/bioroebe/toplevel_methods/cliner.rb +0 -81
- data/lib/bioroebe/toplevel_methods/complement.rb +0 -58
- data/lib/bioroebe/toplevel_methods/convert_global_env.rb +0 -39
- data/lib/bioroebe/toplevel_methods/databases.rb +0 -73
- data/lib/bioroebe/toplevel_methods/delimiter.rb +0 -19
- data/lib/bioroebe/toplevel_methods/digest.rb +0 -81
- data/lib/bioroebe/toplevel_methods/download_and_fetch_data.rb +0 -146
- data/lib/bioroebe/toplevel_methods/e.rb +0 -20
- data/lib/bioroebe/toplevel_methods/editor.rb +0 -21
- data/lib/bioroebe/toplevel_methods/esystem.rb +0 -22
- data/lib/bioroebe/toplevel_methods/exponential_growth.rb +0 -74
- data/lib/bioroebe/toplevel_methods/extract.rb +0 -56
- data/lib/bioroebe/toplevel_methods/file_and_directory_related_actions.rb +0 -269
- data/lib/bioroebe/toplevel_methods/frequencies.rb +0 -99
- data/lib/bioroebe/toplevel_methods/hamming_distance.rb +0 -60
- data/lib/bioroebe/toplevel_methods/infer.rb +0 -66
- data/lib/bioroebe/toplevel_methods/leading_five_prime_and_trailing_three_prime.rb +0 -101
- data/lib/bioroebe/toplevel_methods/levensthein.rb +0 -63
- data/lib/bioroebe/toplevel_methods/log_directory.rb +0 -109
- data/lib/bioroebe/toplevel_methods/longest_common_substring.rb +0 -55
- data/lib/bioroebe/toplevel_methods/map_ncbi_entry_to_eutils_id.rb +0 -88
- data/lib/bioroebe/toplevel_methods/matches.rb +0 -259
- data/lib/bioroebe/toplevel_methods/misc.rb +0 -596
- data/lib/bioroebe/toplevel_methods/nucleotides.rb +0 -787
- data/lib/bioroebe/toplevel_methods/number_of_clones.rb +0 -63
- data/lib/bioroebe/toplevel_methods/open_in_browser.rb +0 -79
- data/lib/bioroebe/toplevel_methods/open_reading_frames.rb +0 -236
- data/lib/bioroebe/toplevel_methods/opn.rb +0 -34
- data/lib/bioroebe/toplevel_methods/palindromes.rb +0 -155
- data/lib/bioroebe/toplevel_methods/parse.rb +0 -59
- data/lib/bioroebe/toplevel_methods/phred_error_probability.rb +0 -68
- data/lib/bioroebe/toplevel_methods/rds.rb +0 -24
- data/lib/bioroebe/toplevel_methods/remove.rb +0 -86
- data/lib/bioroebe/toplevel_methods/return_source_code_of_this_method.rb +0 -35
- data/lib/bioroebe/toplevel_methods/return_subsequence_based_on_indices.rb +0 -68
- data/lib/bioroebe/toplevel_methods/rna_splicing.rb +0 -73
- data/lib/bioroebe/toplevel_methods/rnalfold.rb +0 -69
- data/lib/bioroebe/toplevel_methods/searching_and_finding.rb +0 -116
- data/lib/bioroebe/toplevel_methods/shuffleseq.rb +0 -37
- data/lib/bioroebe/toplevel_methods/statistics.rb +0 -53
- data/lib/bioroebe/toplevel_methods/sum_of_odd_integers.rb +0 -62
- data/lib/bioroebe/toplevel_methods/three_delimiter.rb +0 -34
- data/lib/bioroebe/toplevel_methods/time_and_date.rb +0 -53
- data/lib/bioroebe/toplevel_methods/to_camelcase.rb +0 -31
- data/lib/bioroebe/toplevel_methods/truncate.rb +0 -48
- data/lib/bioroebe/toplevel_methods/url.rb +0 -36
- data/lib/bioroebe/toplevel_methods/verbose.rb +0 -59
- data/lib/bioroebe/utility_scripts/showorf/constants.rb +0 -31
- data/lib/bioroebe/utility_scripts/showorf/help.rb +0 -33
- data/lib/bioroebe/utility_scripts/showorf/initialize.rb +0 -52
- data/lib/bioroebe/utility_scripts/showorf/menu.rb +0 -68
- data/lib/bioroebe/utility_scripts/showorf/reset.rb +0 -36
- data/lib/bioroebe/utility_scripts/showorf/run.rb +0 -152
- data/lib/bioroebe/utility_scripts/showorf/show.rb +0 -97
- /data/doc/{german_names_for_the_aminoacids.md → german_names_for_the_aminoacids/german_names_for_the_aminoacids.md} +0 -0
- /data/doc/{pdb_ATOM_entry.md → pdb_ATOM_entry/pdb_ATOM_entry.md} +0 -0
- /data/doc/{resources.md → resources/resources.md} +0 -0
- /data/lib/bioroebe/gui/{gtk3 → universal_widgets}/aminoacid_composition/customized_dialog.rb +0 -0
- /data/lib/bioroebe/gui/{gtk3 → universal_widgets}/anti_sense_strand/anti_sense_strand.config +0 -0
- /data/lib/bioroebe/gui/{gtk3 → universal_widgets}/calculate_cell_numbers_of_bacteria/calculate_cell_numbers_of_bacteria.config +0 -0
- /data/lib/bioroebe/gui/{gtk3 → universal_widgets}/dna_to_reverse_complement_widget/dna_to_reverse_complement_widget.config +0 -0
- /data/lib/bioroebe/gui/{gtk3 → universal_widgets}/hamming_distance/hamming_distance.config +0 -0
- /data/lib/bioroebe/gui/{gtk3 → universal_widgets}/levensthein_distance/levensthein_distance.config +0 -0
- /data/lib/bioroebe/gui/{gtk3 → universal_widgets}/protein_to_DNA/protein_to_DNA.config +0 -0
- /data/lib/bioroebe/gui/{gtk3 → universal_widgets}/restriction_enzymes/restriction_enzymes.config +0 -0
- /data/lib/bioroebe/gui/{gtk3 → universal_widgets}/www_finder/www_finder.config +0 -0
- /data/lib/bioroebe/yaml/{base_composition_of_dna.yml → base_composition_of_dna/base_composition_of_dna.yml} +0 -0
- /data/lib/bioroebe/yaml/{nuclear_localization_sequences.yml → nuclear_localization_sequences/nuclear_localization_sequences.yml} +0 -0
- /data/lib/bioroebe/yaml/{talens.yml → talens/talens.yml} +0 -0
@@ -0,0 +1,1349 @@
|
|
1
|
+
#!/usr/bin/ruby -w
|
2
|
+
# Encoding: UTF-8
|
3
|
+
# frozen_string_literal: true
|
4
|
+
# =========================================================================== #
|
5
|
+
# This file here also contains data particular to my home setup, the system
|
6
|
+
# I use at home. It is thus not hugely relevant to other people, but
|
7
|
+
# simplifies the job for me whenever I work on bio-related stuff in
|
8
|
+
# general.
|
9
|
+
#
|
10
|
+
# This file may contain some handy remote URLs, as constants, such as
|
11
|
+
# NCBI_GENE; we could also use local "URLs", although this is probably
|
12
|
+
# no longer necessary.
|
13
|
+
#
|
14
|
+
# This .rb file used to contain several hardcoded constants. Since as of
|
15
|
+
# September 2019 this older approach was slightly deprecated, mostly because
|
16
|
+
# hardcoded constants do not allow for as much flexibility as method calls
|
17
|
+
# do. Even if a method call is slightly slower, I found that in the long
|
18
|
+
# run it greatly makes the resulting code easier to maintain, rather than
|
19
|
+
# having to deal with hardcoded constants that may lead to inflexibility.
|
20
|
+
# This happened to me in a restricted university setting, so I don't quite
|
21
|
+
# fancy hardcoded constants anymore as much - at the least not for
|
22
|
+
# important variables.
|
23
|
+
# =========================================================================== #
|
24
|
+
# require 'bioroebe/constants/constants.rb
|
25
|
+
# Bioroebe.load_and_return_the_restriction_enzymes
|
26
|
+
# Bioroebe.blosum_directory?
|
27
|
+
# Bioroebe.filter_away_invalid_aminoacids
|
28
|
+
# =========================================================================== #
|
29
|
+
module Bioroebe
|
30
|
+
|
31
|
+
require 'bioroebe/toplevel_methods/roebe.rb'
|
32
|
+
require 'bioroebe/log_directory/log_directory.rb'
|
33
|
+
|
34
|
+
# ========================================================================= #
|
35
|
+
# === array_failsave_requires
|
36
|
+
#
|
37
|
+
# Next we do some failsafe requires for the Bioroebe::Shell component
|
38
|
+
# specifically.
|
39
|
+
# ========================================================================= #
|
40
|
+
array_failsave_requires = %w(
|
41
|
+
convert_global_env
|
42
|
+
)
|
43
|
+
array_failsave_requires.each {|project|
|
44
|
+
begin
|
45
|
+
require project
|
46
|
+
rescue LoadError; end # Silent rescue.
|
47
|
+
}
|
48
|
+
|
49
|
+
# ========================================================================= #
|
50
|
+
# === Bioroebe::N
|
51
|
+
# ========================================================================= #
|
52
|
+
N = "\n"
|
53
|
+
|
54
|
+
# ========================================================================= #
|
55
|
+
# === Bioroebe::R
|
56
|
+
# ========================================================================= #
|
57
|
+
R = "\r"
|
58
|
+
|
59
|
+
# ========================================================================= #
|
60
|
+
# === TABULATOR
|
61
|
+
# ========================================================================= #
|
62
|
+
TABULATOR = "\t"
|
63
|
+
|
64
|
+
# ========================================================================= #
|
65
|
+
# === Bioroebe::ROW_TERMINATOR
|
66
|
+
#
|
67
|
+
# This constant is not often in use, though.
|
68
|
+
# ========================================================================= #
|
69
|
+
ROW_TERMINATOR = "#{TABULATOR}|\n"
|
70
|
+
|
71
|
+
# ========================================================================= #
|
72
|
+
# === E_BOX
|
73
|
+
#
|
74
|
+
# This constant was disabled in December 2020. It may have to be moved
|
75
|
+
# to .rb files that depend on it, if we still actually need it. If
|
76
|
+
# not then this comment will be removed at a later point anyway.
|
77
|
+
# ========================================================================= #
|
78
|
+
# E_BOX = 'CACGTG' # The "E Box".
|
79
|
+
|
80
|
+
# ========================================================================= #
|
81
|
+
# === BE_VERBOSE
|
82
|
+
# ========================================================================= #
|
83
|
+
BE_VERBOSE = true
|
84
|
+
|
85
|
+
# ========================================================================= #
|
86
|
+
# === TOKEN (TOKEN tag)
|
87
|
+
# ========================================================================= #
|
88
|
+
TOKEN = '|'
|
89
|
+
# ========================================================================= #
|
90
|
+
# === ARRAY_AMINOACIDS_THAT_CAN_BE_PHOSPHORYLATED
|
91
|
+
#
|
92
|
+
# Just list the aminoacids that can typically be phosphorylated.
|
93
|
+
# ========================================================================= #
|
94
|
+
ARRAY_AMINOACIDS_THAT_CAN_BE_PHOSPHORYLATED = %w(
|
95
|
+
S Y T
|
96
|
+
)
|
97
|
+
|
98
|
+
# ========================================================================= #
|
99
|
+
# === ENGLISH_LONG_NAMES_FOR_THE_AMINO_ACIDS
|
100
|
+
#
|
101
|
+
# We have to keep the long names for the amino acids in one
|
102
|
+
# constant, so that we can do queries lateron.
|
103
|
+
# ========================================================================= #
|
104
|
+
ENGLISH_LONG_NAMES_FOR_THE_AMINO_ACIDS = (%w(
|
105
|
+
Alanine
|
106
|
+
Arginine
|
107
|
+
Asparagine
|
108
|
+
Cysteine
|
109
|
+
Glutamine
|
110
|
+
Glycine
|
111
|
+
Histidine
|
112
|
+
Isoleucine
|
113
|
+
Leucine
|
114
|
+
Lysine
|
115
|
+
Methionine
|
116
|
+
Phenylalanine
|
117
|
+
Proline
|
118
|
+
Serine
|
119
|
+
Threonine
|
120
|
+
Tryptophane
|
121
|
+
Tyrosine
|
122
|
+
Valine
|
123
|
+
) << 'Aspartic acid' << 'Glutamic acid').sort
|
124
|
+
|
125
|
+
# ========================================================================= #
|
126
|
+
# === POSSIBLE_AMINO_ACIDS
|
127
|
+
#
|
128
|
+
# Which Aminoacids are possible/allowed? We will list them here:
|
129
|
+
#
|
130
|
+
# ACDEFGHIKLMNPQRSTUVWY
|
131
|
+
#
|
132
|
+
# Note that this is distinct from the constant AMINO_ACIDS, which is
|
133
|
+
# instead loaded from a local .yml file. This constant includes
|
134
|
+
# all the 20 canonical aminoacids, whereas AMINO_ACIDS may also
|
135
|
+
# include pyrrolysine and selenocysteine.
|
136
|
+
# ========================================================================= #
|
137
|
+
POSSIBLE_AMINO_ACIDS = 'ACDEFGHIKLMNPQRSTVWY'
|
138
|
+
TWENTY_CANONICAL_AMINOACIDS = POSSIBLE_AMINO_ACIDS # === TWENTY_CANONICAL_AMINOACIDS
|
139
|
+
|
140
|
+
# ========================================================================= #
|
141
|
+
# === ARRAY_AMINO_ACIDS_ALPHABET
|
142
|
+
#
|
143
|
+
# This keeps an Array with all aminoacids, in one-letter format.
|
144
|
+
#
|
145
|
+
# So it is equivalent to:
|
146
|
+
#
|
147
|
+
# ["A", "C", "D", "E", "F", "G", "H", "I", "K", "L", "M", "N", "P", "Q", "R", "S", "T", "V", "W", "Y"]
|
148
|
+
#
|
149
|
+
# ========================================================================= #
|
150
|
+
ARRAY_AMINO_ACIDS_ALPHABET = POSSIBLE_AMINO_ACIDS.chars
|
151
|
+
|
152
|
+
# ========================================================================= #
|
153
|
+
# === Bioroebe.all_aminoacids?
|
154
|
+
#
|
155
|
+
# This method will return all available aminoacids.
|
156
|
+
#
|
157
|
+
# Example:
|
158
|
+
#
|
159
|
+
# Bioroebe.all_aminoacids? # => ["A", "C", "D", "E", "F", "G", "H", "I", "K", "L", "M", "N", "P", "Q", "R", "S", "T", "V", "W", "Y"]
|
160
|
+
#
|
161
|
+
# ========================================================================= #
|
162
|
+
def self.all_aminoacids?
|
163
|
+
ARRAY_AMINO_ACIDS_ALPHABET
|
164
|
+
end
|
165
|
+
|
166
|
+
# ========================================================================= #
|
167
|
+
# === Bioroebe.filter_away_invalid_aminoacids
|
168
|
+
#
|
169
|
+
# Usage example:
|
170
|
+
#
|
171
|
+
# Bioroebe.filter_away_invalid_aminoacids('ATMÜ') # => "ATM"
|
172
|
+
#
|
173
|
+
# ========================================================================= #
|
174
|
+
def self.filter_away_invalid_aminoacids(i)
|
175
|
+
array_that_is_allowed = all_aminoacids?
|
176
|
+
return i.chars.select {|entry| array_that_is_allowed.include? entry }.join
|
177
|
+
end
|
178
|
+
|
179
|
+
# ========================================================================= #
|
180
|
+
# === all_aminoacids?
|
181
|
+
# ========================================================================= #
|
182
|
+
def all_aminoacids?
|
183
|
+
::Bioroebe.all_aminoacids?
|
184
|
+
end
|
185
|
+
|
186
|
+
# ========================================================================= #
|
187
|
+
# === VERTICAL_UNICODE_BAR
|
188
|
+
# ========================================================================= #
|
189
|
+
VERTICAL_UNICODE_BAR = '|'
|
190
|
+
|
191
|
+
# ========================================================================= #
|
192
|
+
# === AMINOACID_FAMILIES
|
193
|
+
# ========================================================================= #
|
194
|
+
AMINOACID_FAMILIES = {
|
195
|
+
'citratzyklus' => {
|
196
|
+
# Alpha-Ketoglutarat: EPQR
|
197
|
+
'alpha-ketoglutarat' => %w( E P Q R ),
|
198
|
+
# Oxalacetat: DMN-KTI
|
199
|
+
'oxalacetat' => %w( D N K M T I ),
|
200
|
+
},
|
201
|
+
'glykolyse' => {
|
202
|
+
'pyruvat' => %w( A V L ), # AVL
|
203
|
+
'3-phosphoglycerinsäure' => %w( S G C ), # SGC
|
204
|
+
},
|
205
|
+
'chorismat' => {
|
206
|
+
'aromatische_familie' => %w( F Y W ) # FYW
|
207
|
+
},
|
208
|
+
'ribose-5-p' => {
|
209
|
+
'histidinol' => %w( H ) # Histidine.
|
210
|
+
},
|
211
|
+
}
|
212
|
+
|
213
|
+
# ========================================================================= #
|
214
|
+
# === Bioroebe.aminoacid_families?
|
215
|
+
#
|
216
|
+
# Feedback which aminoacid-families we know of.
|
217
|
+
#
|
218
|
+
# Usage example:
|
219
|
+
#
|
220
|
+
# pp Bioroebe.aminoacid_families?; ''
|
221
|
+
#
|
222
|
+
# ========================================================================= #
|
223
|
+
def self.aminoacid_families?
|
224
|
+
AMINOACID_FAMILIES
|
225
|
+
end; self.instance_eval { alias aa_families? aminoacid_families? } # === Bioroebe.aa_families?
|
226
|
+
|
227
|
+
# ========================================================================= #
|
228
|
+
# === VALID_WAYS_TO_EXIT
|
229
|
+
#
|
230
|
+
# All ways to exit will be recorded here.
|
231
|
+
#
|
232
|
+
# If you need to use more ways, simply append to this Array.
|
233
|
+
#
|
234
|
+
# This constant may have to be moved into the bio-shell part eventually.
|
235
|
+
# ========================================================================= #
|
236
|
+
VALID_WAYS_TO_EXIT = %w(
|
237
|
+
quit q exit qq :q qt
|
238
|
+
bye
|
239
|
+
rda
|
240
|
+
r2
|
241
|
+
tq
|
242
|
+
sq
|
243
|
+
exit_program
|
244
|
+
exitprogram
|
245
|
+
)
|
246
|
+
|
247
|
+
# ========================================================================= #
|
248
|
+
# === NAMES_ENTRIES
|
249
|
+
#
|
250
|
+
# This used to belong to the Taxonomy submodule.
|
251
|
+
# ========================================================================= #
|
252
|
+
NAMES_ENTRIES = 'names.sql'
|
253
|
+
|
254
|
+
# ========================================================================= #
|
255
|
+
# === NODES_ENTRIES
|
256
|
+
#
|
257
|
+
# This used to belong to the Taxonomy submodule.
|
258
|
+
# ========================================================================= #
|
259
|
+
NODES_ENTRIES = 'nodes.sql'
|
260
|
+
|
261
|
+
# ========================================================================= #
|
262
|
+
# === FASTA_ENTRIES
|
263
|
+
#
|
264
|
+
# This used to belong to the Taxonomy submodule.
|
265
|
+
# ========================================================================= #
|
266
|
+
FASTA_ENTRIES = 'fasta.sql'
|
267
|
+
|
268
|
+
# ========================================================================= #
|
269
|
+
# === SHALL_WE_LOG_LAST_UPDATE
|
270
|
+
#
|
271
|
+
# This constant exists specifically for the taxonomy-component of
|
272
|
+
# the Bioroebe project.
|
273
|
+
# ========================================================================= #
|
274
|
+
SHALL_WE_LOG_LAST_UPDATE = true
|
275
|
+
|
276
|
+
# ========================================================================= #
|
277
|
+
# === Bioroebe.taxonomy_ncbi_database_last_update_log_file
|
278
|
+
#
|
279
|
+
# This constant is used specifically for the taxonomy-component of the
|
280
|
+
# Bioroebe project.
|
281
|
+
# ========================================================================= #
|
282
|
+
def self.taxonomy_ncbi_database_last_update_log_file
|
283
|
+
"#{::Bioroebe.log_dir?}taxonomy_ncbi_database_last_update.log"
|
284
|
+
end
|
285
|
+
|
286
|
+
# ========================================================================= #
|
287
|
+
# === NAME_OF_BIO_SHELL
|
288
|
+
#
|
289
|
+
# This constant can be used as the default prompt for the bioshell
|
290
|
+
# component.
|
291
|
+
# ========================================================================= #
|
292
|
+
NAME_OF_BIO_SHELL = 'BIO SHELL> '
|
293
|
+
|
294
|
+
# ========================================================================= #
|
295
|
+
# === DEFAULT_DNA_SEQUENCE
|
296
|
+
#
|
297
|
+
# This is a default "test" DNA sequence, in the sense that it can be
|
298
|
+
# used to quickly test functionality within the bioroebe project.
|
299
|
+
#
|
300
|
+
# It was added in May 2020, but it may be that we have to remove
|
301
|
+
# it at a later time, or move it into a separate .yml file. For
|
302
|
+
# the time being, though, it will reside here.
|
303
|
+
# ========================================================================= #
|
304
|
+
DEFAULT_DNA_SEQUENCE =
|
305
|
+
'CGGCCCGATTTGGGTTTCGGAGCGATCGAAATACCAGCACTACCATGAATTCTAT'\
|
306
|
+
'ATGGCTGCCGTTCACAGCCTTAATTTTAGGCTTTCCACCTGATCACTCTTTAATC'\
|
307
|
+
'TCCATTGTTTCTGGTACGCAGAAATTGACGCTTCCCATTCATTCACGGCTAAAAT'\
|
308
|
+
'CAAGGATTCCACCAGAATCGCGGGCCGCGTGGGTGCGCCGTCGACCTCCTCGGCC'\
|
309
|
+
'AAATAAGAACGGGCAGGTAAGAGACTAGGGTACTCAAGAT'
|
310
|
+
|
311
|
+
# ========================================================================= #
|
312
|
+
# === DEFAULT_LENGTH_FOR_DNA
|
313
|
+
#
|
314
|
+
# How long our DNA-generated strings should be by default.
|
315
|
+
#
|
316
|
+
# This may be used by some scripts, so it provides a default value
|
317
|
+
# for use in these scripts.
|
318
|
+
#
|
319
|
+
# 150 nucleotides are the current default.
|
320
|
+
# ========================================================================= #
|
321
|
+
DEFAULT_LENGTH_FOR_DNA = 150
|
322
|
+
|
323
|
+
# ========================================================================= #
|
324
|
+
# === FIELD_TERMINATOR
|
325
|
+
# ========================================================================= #
|
326
|
+
FIELD_TERMINATOR = "#{TABULATOR}|#{TABULATOR}"
|
327
|
+
MAIN_DELIMITER = DELIMITER = FIELD_TERMINATOR # An alias to the above.
|
328
|
+
|
329
|
+
# ========================================================================= #
|
330
|
+
# === USERS_X
|
331
|
+
# ========================================================================= #
|
332
|
+
USERS_X = '/home/x/'
|
333
|
+
HOME_DIRECTORY_OF_USER_X = USERS_X # === HOME_DIRECTORY_OF_USER_X
|
334
|
+
|
335
|
+
# ========================================================================= #
|
336
|
+
# === RUBY_SRC
|
337
|
+
#
|
338
|
+
# This constant is only useful on my home directory. Most other users
|
339
|
+
# will not need it, ever.
|
340
|
+
# ========================================================================= #
|
341
|
+
RUBY_SRC =
|
342
|
+
"#{USERS_X}programming/ruby/src/"
|
343
|
+
|
344
|
+
# ========================================================================= #
|
345
|
+
# === BIOROEBE_AT_HOME
|
346
|
+
# ========================================================================= #
|
347
|
+
BIOROEBE_AT_HOME = "#{RUBY_SRC}bioroebe/lib/bioroebe/"
|
348
|
+
|
349
|
+
# ========================================================================= #
|
350
|
+
# === LOCALHOST
|
351
|
+
# ========================================================================= #
|
352
|
+
LOCALHOST = 'http://localhost/'
|
353
|
+
|
354
|
+
# ========================================================================= #
|
355
|
+
# === PATH_TO_THE_RELION_BINARY
|
356
|
+
#
|
357
|
+
# This constant can be set to determine where relion resides. It is
|
358
|
+
# mostly an ad-hoc constant.
|
359
|
+
# ========================================================================= #
|
360
|
+
PATH_TO_THE_RELION_BINARY =
|
361
|
+
'/opt/RELION/relion-1.3/bin/relion'
|
362
|
+
|
363
|
+
# ========================================================================= #
|
364
|
+
# === ARRAY_REGISTERED_ACTIONS
|
365
|
+
#
|
366
|
+
# ARRAY_REGISTERED_ACTIONS becomes @registered_actions.
|
367
|
+
# ========================================================================= #
|
368
|
+
ARRAY_REGISTERED_ACTIONS = %w(
|
369
|
+
to_rna
|
370
|
+
to_dna
|
371
|
+
rest
|
372
|
+
pubmed
|
373
|
+
blosum
|
374
|
+
restriction
|
375
|
+
translate
|
376
|
+
quit
|
377
|
+
shorten_aminoacid
|
378
|
+
)
|
379
|
+
|
380
|
+
# ========================================================================= #
|
381
|
+
# === FILE_BIO_LANG
|
382
|
+
# ========================================================================= #
|
383
|
+
FILE_BIO_LANG =
|
384
|
+
"#{USERS_X}data/personal/yaml/bio_lang/bio_lang.md"
|
385
|
+
|
386
|
+
# ========================================================================= #
|
387
|
+
# === AC_ELEMENT
|
388
|
+
# ========================================================================= #
|
389
|
+
# AC_ELEMENT = 4_565 # n nucleotides found in the AC zea mais element.
|
390
|
+
|
391
|
+
# ========================================================================= #
|
392
|
+
# === BIOSHELL_FILE
|
393
|
+
# ========================================================================= #
|
394
|
+
BIOSHELL_FILE = ConvertGlobalEnv[
|
395
|
+
'$BIORUBY/shell/shell.rb'
|
396
|
+
] if Object.const_defined?(:ConvertGlobalEnv)
|
397
|
+
|
398
|
+
# ========================================================================= #
|
399
|
+
# === EMAIL
|
400
|
+
#
|
401
|
+
# My email address - not too terribly useful for other people,
|
402
|
+
# but nonetheless it may be useful to display it, in particular
|
403
|
+
# for GUI-related components of the bioroebe-project and simple
|
404
|
+
# feedback in the long run.
|
405
|
+
# ========================================================================= #
|
406
|
+
EMAIL = 'shevy@inbox.lt'
|
407
|
+
|
408
|
+
# ========================================================================= #
|
409
|
+
# === REGEX_FOR_N_GLYCOSYLATION_PATTERN
|
410
|
+
#
|
411
|
+
# See rubular at:
|
412
|
+
#
|
413
|
+
# https://rubular.com/r/D95Cq7oR5x
|
414
|
+
#
|
415
|
+
# ========================================================================= #
|
416
|
+
REGEX_FOR_N_GLYCOSYLATION_PATTERN =
|
417
|
+
/(?=(N[^P][ST][^P]))/
|
418
|
+
|
419
|
+
# ========================================================================= #
|
420
|
+
# === REGEX_PROSITE_FOR_ANY_AMINOACID
|
421
|
+
# ========================================================================= #
|
422
|
+
REGEX_PROSITE_FOR_ANY_AMINOACID =
|
423
|
+
/x\((\d+)\)/
|
424
|
+
|
425
|
+
# ========================================================================= #
|
426
|
+
# === Bioroebe::STOP_CODONS
|
427
|
+
#
|
428
|
+
# The STOP codons that can be found in Humans, in RNA format.
|
429
|
+
# ========================================================================= #
|
430
|
+
STOP_CODONS = %w(
|
431
|
+
UAA UAG UGA
|
432
|
+
)
|
433
|
+
|
434
|
+
# ========================================================================= #
|
435
|
+
# === Bioroebe::RNA_NUCLEOTIDES
|
436
|
+
#
|
437
|
+
# This will refer to an Array including all four RNA nucleotides,
|
438
|
+
# that is A, U, G and C.
|
439
|
+
# ========================================================================= #
|
440
|
+
RNA_NUCLEOTIDES = %w( A U G C )
|
441
|
+
ALLOWED_RNA_NUCLEOTIDES = RNA_NUCLEOTIDES # === ALLOWED_RNA_NUCLEOTIDES
|
442
|
+
|
443
|
+
# ========================================================================= #
|
444
|
+
# === Bioroebe::POSSIBLE_RNA_NUCLEOTIDES
|
445
|
+
#
|
446
|
+
# This is a bit different to RNA_NUCLEOTIDES in that N is also a part
|
447
|
+
# of it. It is not entirely clear whether this array here is kept,
|
448
|
+
# though.
|
449
|
+
# ========================================================================= #
|
450
|
+
POSSIBLE_RNA_NUCLEOTIDES = %w(
|
451
|
+
A U G C N
|
452
|
+
)
|
453
|
+
|
454
|
+
# ========================================================================= #
|
455
|
+
# === Bioroebe::DNA_NUCLEOTIDES
|
456
|
+
#
|
457
|
+
# This is the variant without N.
|
458
|
+
# ========================================================================= #
|
459
|
+
DNA_NUCLEOTIDES = %w( A T G C )
|
460
|
+
|
461
|
+
# ========================================================================= #
|
462
|
+
# === Bioroebe::HASH_DNA_NUCLEOTIDES
|
463
|
+
#
|
464
|
+
# Since as of 20.04.2014, Uracil is also part of this Hash. While this
|
465
|
+
# is, strictly speaking, not absolutely correct, it does simplify some
|
466
|
+
# downstream code. However had, this may possibly be re-evaluated in
|
467
|
+
# the future.
|
468
|
+
#
|
469
|
+
# This Hash may be helpful when the user wishes to find a complement to
|
470
|
+
# a nucleotide. There is a method that does the same, but this Hash
|
471
|
+
# should be faster than a method call, so use it in particular if
|
472
|
+
# you need to focus more on speed.
|
473
|
+
# ========================================================================= #
|
474
|
+
HASH_DNA_NUCLEOTIDES = {
|
475
|
+
'A' => 'T',
|
476
|
+
'T' => 'A',
|
477
|
+
'G' => 'C',
|
478
|
+
'C' => 'G',
|
479
|
+
'U' => 'A'
|
480
|
+
}
|
481
|
+
|
482
|
+
# ========================================================================= #
|
483
|
+
# === Bioroebe.this_nucleotide_pairs_with?
|
484
|
+
# ========================================================================= #
|
485
|
+
def self.this_nucleotide_pairs_with?(i)
|
486
|
+
HASH_DNA_NUCLEOTIDES[i.to_s.upcase]
|
487
|
+
end
|
488
|
+
|
489
|
+
# ========================================================================= #
|
490
|
+
# === POSSIBLE_DNA_NUCLEOTIDES
|
491
|
+
#
|
492
|
+
# This constant will keep all possible DNA nucleotides.
|
493
|
+
#
|
494
|
+
# N is also a valid entry, 'Yarrowia_lipolytica_genome.fa' includes it.
|
495
|
+
# However had,
|
496
|
+
#
|
497
|
+
# Only these sequences are allowed in DNA.
|
498
|
+
#
|
499
|
+
# To scope to this, do:
|
500
|
+
#
|
501
|
+
# Bioroebe::POSSIBLE_DNA_NUCLEOTIDES
|
502
|
+
#
|
503
|
+
# ========================================================================= #
|
504
|
+
POSSIBLE_DNA_NUCLEOTIDES = %w(
|
505
|
+
A T G C N
|
506
|
+
); ARRAY_VALID_DNA_SEQUENCES = POSSIBLE_DNA_NUCLEOTIDES # === ARRAY_VALID_DNA_SEQUENCES
|
507
|
+
|
508
|
+
# ========================================================================= #
|
509
|
+
# === Bioroebe.return_random_nucleotide
|
510
|
+
#
|
511
|
+
# This method will randomly return either 'A', 'T', 'C' or 'G'.
|
512
|
+
# ========================================================================= #
|
513
|
+
def self.return_random_nucleotide
|
514
|
+
(POSSIBLE_DNA_NUCLEOTIDES - ['N']).sample # This constant holds A T G and C.
|
515
|
+
end; self.instance_eval { alias random_nucleotide return_random_nucleotide } # === Bioroebe.random_nucleotide
|
516
|
+
|
517
|
+
# ========================================================================= #
|
518
|
+
# === Bioroebe.allowed_dna_nucleotides?
|
519
|
+
#
|
520
|
+
# This will return an Array with valid DNA nucleotides.
|
521
|
+
# ========================================================================= #
|
522
|
+
def self.allowed_dna_nucleotides?
|
523
|
+
POSSIBLE_DNA_NUCLEOTIDES - ['N']
|
524
|
+
end; self.instance_eval { alias dna? allowed_dna_nucleotides? } # === Bioroebe.dna?
|
525
|
+
|
526
|
+
# ========================================================================= #
|
527
|
+
# === Bioroebe.return_DNA_nucleotides
|
528
|
+
# ========================================================================= #
|
529
|
+
def self.return_DNA_nucleotides
|
530
|
+
return DNA_NUCLEOTIDES
|
531
|
+
end
|
532
|
+
|
533
|
+
# ========================================================================= #
|
534
|
+
# === FTP_NCBI_TAXONOMY_DATABASE
|
535
|
+
#
|
536
|
+
# This constant refers to the taxonomy-database from NCBI. This is the
|
537
|
+
# file that can be downloaded from the NCBI homepage (actually, the
|
538
|
+
# ftp-listing).
|
539
|
+
#
|
540
|
+
# Take note that this database, in .tar.gz format, is about 50 MB in
|
541
|
+
# size or even larger these days. So only download it if you really
|
542
|
+
# need it locally.
|
543
|
+
# ========================================================================= #
|
544
|
+
FTP_NCBI_TAXONOMY_DATABASE =
|
545
|
+
'ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz'
|
546
|
+
|
547
|
+
# ========================================================================= #
|
548
|
+
# === URL_TO_TAXONOMY_ARCHIVE
|
549
|
+
#
|
550
|
+
# An "alias" to the above ^^^ constant.
|
551
|
+
# ========================================================================= #
|
552
|
+
URL_TO_TAXONOMY_ARCHIVE = FTP_NCBI_TAXONOMY_DATABASE
|
553
|
+
|
554
|
+
# ========================================================================= #
|
555
|
+
# === NCBI_NUCCORE
|
556
|
+
# ========================================================================= #
|
557
|
+
NCBI_NUCCORE =
|
558
|
+
'https://www.ncbi.nlm.nih.gov/nuccore/'
|
559
|
+
|
560
|
+
# ========================================================================= #
|
561
|
+
# === NCBI_GENE
|
562
|
+
# ========================================================================= #
|
563
|
+
NCBI_GENE =
|
564
|
+
'https://www.ncbi.nlm.nih.gov/gene/'
|
565
|
+
|
566
|
+
# ========================================================================= #
|
567
|
+
# === FILE_HYDROPATHY_TABLE
|
568
|
+
# ========================================================================= #
|
569
|
+
FILE_HYDROPATHY_TABLE =
|
570
|
+
"#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}hydropathy_table.yml"
|
571
|
+
|
572
|
+
# ========================================================================= #
|
573
|
+
# === FILE_NUCLEAR_LOCALIZATION_SEQUENCES
|
574
|
+
# ========================================================================= #
|
575
|
+
FILE_NUCLEAR_LOCALIZATION_SEQUENCES =
|
576
|
+
"#{project_yaml_directory?}nuclear_localization_sequences.yml"
|
577
|
+
|
578
|
+
# ========================================================================= #
|
579
|
+
# === FILE_DEFAULT_COLOURS_FOR_THE_AMINOACIDS
|
580
|
+
# ========================================================================= #
|
581
|
+
FILE_DEFAULT_COLOURS_FOR_THE_AMINOACIDS =
|
582
|
+
"#{project_yaml_directory?}configuration/default_colours_for_the_aminoacids.yml"
|
583
|
+
|
584
|
+
# ========================================================================= #
|
585
|
+
# === FILE_BROWSER
|
586
|
+
# ========================================================================= #
|
587
|
+
FILE_BROWSER =
|
588
|
+
"#{project_yaml_directory?}configuration/browser.yml"
|
589
|
+
|
590
|
+
# ========================================================================= #
|
591
|
+
# === FILE_AMINOACIDS_MOLECULAR_FORMULA
|
592
|
+
# ========================================================================= #
|
593
|
+
FILE_AMINOACIDS_MOLECULAR_FORMULA =
|
594
|
+
"#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}amino_acids_molecular_formula.yml"
|
595
|
+
|
596
|
+
# ========================================================================= #
|
597
|
+
# === FILE_AMINOACIDS_THREE_TO_ONE
|
598
|
+
# ========================================================================= #
|
599
|
+
FILE_AMINOACIDS_THREE_TO_ONE =
|
600
|
+
"#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}amino_acids_three_to_one.yml"
|
601
|
+
|
602
|
+
# ========================================================================= #
|
603
|
+
# === FILE_WEIGHT_OF_COMMON_PROTEINS
|
604
|
+
# ========================================================================= #
|
605
|
+
FILE_WEIGHT_OF_COMMON_PROTEINS =
|
606
|
+
"#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}weight_of_common_proteins.yml"
|
607
|
+
|
608
|
+
# ========================================================================= #
|
609
|
+
# === FILE_AMINO_ACIDS_LONG_NAME_TO_ONE_LETTER
|
610
|
+
# ========================================================================= #
|
611
|
+
FILE_AMINO_ACIDS_LONG_NAME_TO_ONE_LETTER =
|
612
|
+
"#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}amino_acids_long_name_to_one_letter.yml"
|
613
|
+
|
614
|
+
# ========================================================================= #
|
615
|
+
# === FILE_AMINO_ACIDS_MOLECULAR_FORMULA
|
616
|
+
# ========================================================================= #
|
617
|
+
FILE_AMINO_ACIDS_MOLECULAR_FORMULA =
|
618
|
+
"#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}amino_acids_molecular_formula.yml"
|
619
|
+
|
620
|
+
# ========================================================================= #
|
621
|
+
# === FILE_AMINO_ACIDS_MASS_TABLE
|
622
|
+
#
|
623
|
+
# bl $BIOROEBE_YAML/aminoacids/amino_acids_monoisotopic_mass_table.yml
|
624
|
+
#
|
625
|
+
# ========================================================================= #
|
626
|
+
FILE_AMINO_ACIDS_MASS_TABLE =
|
627
|
+
"#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}amino_acids_monoisotopic_mass_table.yml"
|
628
|
+
|
629
|
+
# ========================================================================= #
|
630
|
+
# === FILE_AMINO_ACIDS
|
631
|
+
# ========================================================================= #
|
632
|
+
FILE_AMINO_ACIDS =
|
633
|
+
"#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}amino_acids.yml"
|
634
|
+
|
635
|
+
# ========================================================================= #
|
636
|
+
# === Bioroebe.file_amino_acids
|
637
|
+
# ========================================================================= #
|
638
|
+
def self.file_amino_acids
|
639
|
+
FILE_AMINO_ACIDS
|
640
|
+
end
|
641
|
+
|
642
|
+
# ========================================================================= #
|
643
|
+
# === FILE_AMINO_ACIDS_ABBREVIATIONS
|
644
|
+
# ========================================================================= #
|
645
|
+
FILE_AMINO_ACIDS_ABBREVIATIONS =
|
646
|
+
"#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}amino_acids_abbreviations.yml"
|
647
|
+
|
648
|
+
# ========================================================================= #
|
649
|
+
# === Bioroebe.file_amino_acids_abbreviations
|
650
|
+
# ========================================================================= #
|
651
|
+
def self.file_amino_acids_abbreviations
|
652
|
+
FILE_AMINO_ACIDS_ABBREVIATIONS
|
653
|
+
end
|
654
|
+
|
655
|
+
# ========================================================================= #
|
656
|
+
# === AMINO_ACIDS_ABBREVIATIONS
|
657
|
+
# ========================================================================= #
|
658
|
+
if File.exist? FILE_AMINO_ACIDS_ABBREVIATIONS
|
659
|
+
begin
|
660
|
+
# $BIOROEBE/amino_acids_abbreviations.yml
|
661
|
+
AMINO_ACIDS_ABBREVIATIONS = YAML.load_file(FILE_AMINO_ACIDS_ABBREVIATIONS)
|
662
|
+
rescue Psych::SyntaxError => error
|
663
|
+
pp error
|
664
|
+
AMINO_ACIDS_ABBREVIATIONS = {}
|
665
|
+
end
|
666
|
+
end
|
667
|
+
|
668
|
+
# ========================================================================= #
|
669
|
+
# === DIRECTORY_CODON_TABLES_FREQUENCIES
|
670
|
+
#
|
671
|
+
# This constant may point to a directory such as:
|
672
|
+
#
|
673
|
+
# /home/Programs/Ruby/2.7.0/lib/ruby/site_ruby/2.7.0/bioroebe/codon_tables/frequencies/
|
674
|
+
#
|
675
|
+
# ========================================================================= #
|
676
|
+
DIRECTORY_CODON_TABLES_FREQUENCIES =
|
677
|
+
"#{CODON_TABLES_DIRECTORY}frequencies/"
|
678
|
+
|
679
|
+
# ========================================================================= #
|
680
|
+
# === Bioroebe.directory_frequencies?
|
681
|
+
#
|
682
|
+
# Preferentially use this method past the year 2022 - it is a tiny bit
|
683
|
+
# more flexible than the above constant.
|
684
|
+
# ========================================================================= #
|
685
|
+
def self.directory_frequencies?(
|
686
|
+
codon_tables_directory = CODON_TABLES_DIRECTORY
|
687
|
+
)
|
688
|
+
"#{codon_tables_directory}frequencies/"
|
689
|
+
end
|
690
|
+
|
691
|
+
# ========================================================================= #
|
692
|
+
# === FILE_NUCLEOTIDES
|
693
|
+
# ========================================================================= #
|
694
|
+
FILE_NUCLEOTIDES =
|
695
|
+
"#{project_yaml_directory?}nucleotides/nucleotides.yml"
|
696
|
+
|
697
|
+
# ========================================================================= #
|
698
|
+
# === NUCLEOTIDES
|
699
|
+
# ========================================================================= #
|
700
|
+
if File.exist? FILE_NUCLEOTIDES
|
701
|
+
NUCLEOTIDES = YAML.load_file(FILE_NUCLEOTIDES)
|
702
|
+
else
|
703
|
+
NUCLEOTIDES = nil
|
704
|
+
end
|
705
|
+
|
706
|
+
# ========================================================================= #
|
707
|
+
# === Bioroebe.file_statistics?
|
708
|
+
#
|
709
|
+
# This file can normally be found here:
|
710
|
+
#
|
711
|
+
# $BIOROEBE/yaml/statistics.yml
|
712
|
+
#
|
713
|
+
# ========================================================================= #
|
714
|
+
def self.file_statistics?
|
715
|
+
"#{Bioroebe.log_dir?}statistics.yml"
|
716
|
+
end
|
717
|
+
|
718
|
+
# ========================================================================= #
|
719
|
+
# === Bioroebe.fasta_dir?
|
720
|
+
# ========================================================================= #
|
721
|
+
def self.fasta_dir?
|
722
|
+
"#{Bioroebe.log_dir?}fasta/"
|
723
|
+
end
|
724
|
+
|
725
|
+
# ========================================================================= #
|
726
|
+
# === Bioroebe.file_fastq_quality_schemes
|
727
|
+
#
|
728
|
+
# This constant will point to a location such as this one here:
|
729
|
+
#
|
730
|
+
# /Programs/Ruby/2.6.4/lib/ruby/site_ruby/2.6.0/bioroebe/yaml/fastq_quality_schemes.yml
|
731
|
+
#
|
732
|
+
# ========================================================================= #
|
733
|
+
def self.file_fastq_quality_schemes
|
734
|
+
"#{project_yaml_directory?}fasta_and_fastq/fastq_quality_schemes.yml"
|
735
|
+
end
|
736
|
+
|
737
|
+
# ========================================================================= #
|
738
|
+
# === Bioroebe.file_talens
|
739
|
+
# ========================================================================= #
|
740
|
+
def self.file_talens
|
741
|
+
"#{project_yaml_directory?}talens.yml"
|
742
|
+
end
|
743
|
+
|
744
|
+
# ========================================================================= #
|
745
|
+
# === FILE_GFP_SEQUENCE
|
746
|
+
# ========================================================================= #
|
747
|
+
FILE_GFP_SEQUENCE =
|
748
|
+
"#{project_yaml_directory?}sequences/"\
|
749
|
+
"JX472995_Green_fluorescent_protein_from_Aequorea_victoria.fasta"
|
750
|
+
|
751
|
+
# ========================================================================= #
|
752
|
+
# === FILE_RESTRICTION_ENZYMES
|
753
|
+
#
|
754
|
+
# bl $BIOROEBE/yaml/restriction/enzymes/restriction_enzymes.yml
|
755
|
+
# ========================================================================= #
|
756
|
+
FILE_RESTRICTION_ENZYMES =
|
757
|
+
"#{project_yaml_directory?}restriction_enzymes/restriction_enzymes.yml"
|
758
|
+
|
759
|
+
# ========================================================================= #
|
760
|
+
# === FILE_COLOURIZE_FASTA_SEQUENCES
|
761
|
+
#
|
762
|
+
# This constants points to the .yml file that will hold information
|
763
|
+
# in how to colourize the FASTA sequences.
|
764
|
+
# ========================================================================= #
|
765
|
+
FILE_COLOURIZE_FASTA_SEQUENCES =
|
766
|
+
"#{project_yaml_directory?}configuration/colourize_fasta_sequences.yml"
|
767
|
+
|
768
|
+
# ========================================================================= #
|
769
|
+
# === FILE_BLOSUM45
|
770
|
+
# ========================================================================= #
|
771
|
+
FILE_BLOSUM45 = "#{BLOSUM_DIRECTORY}/blosum45.yml"
|
772
|
+
|
773
|
+
# ========================================================================= #
|
774
|
+
# === FILE_BLOSUM50
|
775
|
+
# ========================================================================= #
|
776
|
+
FILE_BLOSUM50 = "#{BLOSUM_DIRECTORY}/blosum50.yml"
|
777
|
+
|
778
|
+
# ========================================================================= #
|
779
|
+
# === FILE_BLOSUM62
|
780
|
+
# ========================================================================= #
|
781
|
+
FILE_BLOSUM62 = "#{BLOSUM_DIRECTORY}/blosum62.yml"
|
782
|
+
|
783
|
+
# ========================================================================= #
|
784
|
+
# === FILE_BLOSUM80
|
785
|
+
# ========================================================================= #
|
786
|
+
FILE_BLOSUM80 = "#{BLOSUM_DIRECTORY}/blosum80.yml"
|
787
|
+
|
788
|
+
# ========================================================================= #
|
789
|
+
# === FILE_BLOSUM90
|
790
|
+
# ========================================================================= #
|
791
|
+
FILE_BLOSUM90 = "#{BLOSUM_DIRECTORY}/blosum90.yml"
|
792
|
+
|
793
|
+
# ========================================================================= #
|
794
|
+
# === BLOSUM_MATRIX
|
795
|
+
# ========================================================================= #
|
796
|
+
FILE_BLOSUM_MATRIX = "#{BLOSUM_DIRECTORY}blosum_matrix.yml"
|
797
|
+
|
798
|
+
# ========================================================================= #
|
799
|
+
# === Bioroebe.blosum_matrix
|
800
|
+
# ========================================================================= #
|
801
|
+
def self.blosum_matrix(i = FILE_BLOSUM_MATRIX)
|
802
|
+
YAML.load_file(i)
|
803
|
+
end
|
804
|
+
|
805
|
+
# ========================================================================= #
|
806
|
+
# === HYDROPATHY_TABLE
|
807
|
+
# ========================================================================= #
|
808
|
+
if File.exist? FILE_HYDROPATHY_TABLE
|
809
|
+
HYDROPATHY_TABLE =
|
810
|
+
YAML.load_file(
|
811
|
+
FILE_HYDROPATHY_TABLE
|
812
|
+
)
|
813
|
+
end
|
814
|
+
|
815
|
+
# ========================================================================= #
|
816
|
+
# === FILE_CHROMOSOME_NUMBERS
|
817
|
+
# ========================================================================= #
|
818
|
+
FILE_CHROMOSOME_NUMBERS =
|
819
|
+
"#{project_yaml_directory?}chromosomes/chromosome_numbers.yml"
|
820
|
+
|
821
|
+
# ========================================================================= #
|
822
|
+
# === FILE_AMINO_ACIDS_FREQUENCY
|
823
|
+
# ========================================================================= #
|
824
|
+
FILE_AMINO_ACIDS_FREQUENCY =
|
825
|
+
"#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}amino_acids_frequency.yml"
|
826
|
+
|
827
|
+
# ========================================================================= #
|
828
|
+
# === Bioroebe.file_amino_acids_frequency
|
829
|
+
# ========================================================================= #
|
830
|
+
def self.file_amino_acids_frequency
|
831
|
+
"#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}amino_acids_frequency.yml"
|
832
|
+
end
|
833
|
+
|
834
|
+
# ========================================================================= #
|
835
|
+
# === FILE_AMINO_ACIDS_RESTE_YAML
|
836
|
+
# ========================================================================= #
|
837
|
+
FILE_AMINO_ACIDS_RESTE_YAML =
|
838
|
+
"#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}amino_acids_reste.yml"
|
839
|
+
|
840
|
+
# ========================================================================= #
|
841
|
+
# === FILE_AMINO_ACIDS_THREE_TO_ONE
|
842
|
+
#
|
843
|
+
# We'll keep the keys downcased.
|
844
|
+
#
|
845
|
+
# bl $RUBY_SRC/bioroebe/lib/bioroebe/yaml/aminoacids/amino_acids_three_to_one.yml
|
846
|
+
#
|
847
|
+
# ========================================================================= #
|
848
|
+
FILE_AMINO_ACIDS_THREE_TO_ONE =
|
849
|
+
"#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}amino_acids_three_to_one.yml"
|
850
|
+
|
851
|
+
# ========================================================================= #
|
852
|
+
# === FILE_AMINO_ACIDS_AVERAGE_MASS_TABLE
|
853
|
+
#
|
854
|
+
# This will point to the file amino_acids_average_mass_table.yml.
|
855
|
+
# ========================================================================= #
|
856
|
+
FILE_AMINO_ACIDS_AVERAGE_MASS_TABLE =
|
857
|
+
"#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}amino_acids_average_mass_table.yml"
|
858
|
+
|
859
|
+
# ========================================================================= #
|
860
|
+
# === FILE_NUCLEOTIDES_WEIGHT
|
861
|
+
#
|
862
|
+
# The path to the file that holds the weight of the nucleotides.
|
863
|
+
# ========================================================================= #
|
864
|
+
FILE_NUCLEOTIDES_WEIGHT =
|
865
|
+
"#{project_yaml_directory?}nucleotides/nucleotides_weight.yml"
|
866
|
+
|
867
|
+
# ========================================================================= #
|
868
|
+
# === UNICODE_HORIZONTAL_BAR
|
869
|
+
# ========================================================================= #
|
870
|
+
UNICODE_HORIZONTAL_BAR = '―'
|
871
|
+
|
872
|
+
# ========================================================================= #
|
873
|
+
# === Bioroebe.load_and_return_the_restriction_enzymes
|
874
|
+
#
|
875
|
+
# This method will load, and then return the restriction enzymes that
|
876
|
+
# are bundled within the bioroebe-project.
|
877
|
+
# ========================================================================= #
|
878
|
+
def self.load_and_return_the_restriction_enzymes
|
879
|
+
YAML.load_file(FILE_RESTRICTION_ENZYMES)
|
880
|
+
end
|
881
|
+
|
882
|
+
# ========================================================================= #
|
883
|
+
# === Bioroebe.file_restriction_enzymes
|
884
|
+
# ========================================================================= #
|
885
|
+
def self.file_restriction_enzymes
|
886
|
+
FILE_RESTRICTION_ENZYMES
|
887
|
+
end
|
888
|
+
|
889
|
+
# ========================================================================= #
|
890
|
+
# === file_restriction_enzymes
|
891
|
+
# ========================================================================= #
|
892
|
+
def file_restriction_enzymes
|
893
|
+
::Bioroebe.file_restriction_enzymes
|
894
|
+
end
|
895
|
+
|
896
|
+
# ========================================================================= #
|
897
|
+
# === Bioroebe.blosum_directory?
|
898
|
+
# ========================================================================= #
|
899
|
+
def self.blosum_directory?
|
900
|
+
"#{project_yaml_directory?}blosum/"
|
901
|
+
end
|
902
|
+
|
903
|
+
# ========================================================================= #
|
904
|
+
# === AMINO_ACIDS_MOLECULAR_FORMULA
|
905
|
+
# ========================================================================= #
|
906
|
+
if File.exist? FILE_AMINO_ACIDS_MOLECULAR_FORMULA
|
907
|
+
AMINO_ACIDS_MOLECULAR_FORMULA = YAML.load_file(
|
908
|
+
FILE_AMINO_ACIDS_MOLECULAR_FORMULA
|
909
|
+
)
|
910
|
+
end
|
911
|
+
|
912
|
+
# ========================================================================= #
|
913
|
+
# === AMINO_ACIDS_RESTE
|
914
|
+
# ========================================================================= #
|
915
|
+
if File.exist? FILE_AMINO_ACIDS_RESTE_YAML
|
916
|
+
hash = {}
|
917
|
+
begin
|
918
|
+
_ = YAML.load_file(FILE_AMINO_ACIDS_RESTE_YAML)
|
919
|
+
# ======================================================================= #
|
920
|
+
# As of August 2015, we will keep the keys downcased.
|
921
|
+
# ======================================================================= #
|
922
|
+
_.each_pair {|key, value|
|
923
|
+
key = key.dup if key.frozen?
|
924
|
+
key.downcase!
|
925
|
+
hash[key] = value
|
926
|
+
}
|
927
|
+
AMINO_ACIDS_RESTE = hash
|
928
|
+
rescue Psych::SyntaxError
|
929
|
+
AMINO_ACIDS_RESTE = {}
|
930
|
+
end
|
931
|
+
else
|
932
|
+
AMINO_ACIDS_RESTE = {}
|
933
|
+
end
|
934
|
+
|
935
|
+
# ========================================================================= #
|
936
|
+
# === AMINO_ACIDS_LONG_NAME_TO_ONE_LETTER
|
937
|
+
#
|
938
|
+
# Load up the dataset stored in the file
|
939
|
+
# amino_acids_long_name_to_one_letter.yml.
|
940
|
+
# ========================================================================= #
|
941
|
+
_ = FILE_AMINO_ACIDS_LONG_NAME_TO_ONE_LETTER
|
942
|
+
if File.exist? _
|
943
|
+
AMINO_ACIDS_LONG_NAME_TO_ONE_LETTER = YAML.load_file(_)
|
944
|
+
else
|
945
|
+
puts "No file exists at #{_}."
|
946
|
+
end
|
947
|
+
|
948
|
+
# ========================================================================= #
|
949
|
+
# === AMINO_ACIDS_MASS_TABLE
|
950
|
+
# ========================================================================= #
|
951
|
+
if File.exist? FILE_AMINO_ACIDS_MASS_TABLE # bl $BIOROEBE/yaml/amino_acids_mass_table.yml
|
952
|
+
AMINO_ACIDS_MASS_TABLE = YAML.load_file(FILE_AMINO_ACIDS_MASS_TABLE)
|
953
|
+
else # Else hardcode the AminoAcid table here. This may no longer be necessary, though.
|
954
|
+
AMINO_ACIDS_MASS_TABLE = {
|
955
|
+
'A' => 71.03711, 'C' => 103.00919, 'D' => 115.02694,
|
956
|
+
'E' => 129.04259, 'F' => 147.06841, 'G' => 57.02146,
|
957
|
+
'H' => 137.05891, 'I' => 113.08406, 'K' => 128.09496,
|
958
|
+
'L' => 113.08406, 'M' => 131.04049, 'N' => 114.04293,
|
959
|
+
'P' => 97.05276, 'Q' => 128.05858, 'R' => 156.10111,
|
960
|
+
'S' => 87.03203, 'T' => 101.04768, 'V' => 99.06841,
|
961
|
+
'W' => 186.07931, 'Y' => 163.06333
|
962
|
+
}
|
963
|
+
end; AMINO_ACIDS_AVERAGE_MONOISOTOPIC_TABLE = AMINO_ACIDS_MASS_TABLE # An alias.
|
964
|
+
|
965
|
+
# ========================================================================= #
|
966
|
+
# Next determine some important constants.
|
967
|
+
# ========================================================================= #
|
968
|
+
if File.exist? FILE_AMINO_ACIDS
|
969
|
+
# ======================================================================= #
|
970
|
+
# === Bioroebe::AMINO_ACIDS
|
971
|
+
#
|
972
|
+
# Currently listing 21 AminoAcids from amino_acids.yml
|
973
|
+
#
|
974
|
+
# bl $BIOROEBE/yaml/aminoacids/amino_acids.yml
|
975
|
+
#
|
976
|
+
# ======================================================================= #
|
977
|
+
AMINO_ACIDS = YAML.load_file(
|
978
|
+
FILE_AMINO_ACIDS
|
979
|
+
)
|
980
|
+
# ======================================================================= #
|
981
|
+
# === ::Bioroebe::AMINO_ACIDS_ENGLISH
|
982
|
+
# ======================================================================= #
|
983
|
+
FILE_AMINO_ACIDS_ENGLISH = YAML.load_file("#{BIOROEBE_YAML_AMINOACIDS_DIRECTORY}amino_acids_english.yml")
|
984
|
+
AMINO_ACIDS_ENGLISH = FILE_AMINO_ACIDS_ENGLISH # === AMINO_ACIDS_ENGLISH
|
985
|
+
end
|
986
|
+
|
987
|
+
# ========================================================================= #
|
988
|
+
# === Bioroebe.aminoacids?
|
989
|
+
#
|
990
|
+
# Note that this will return a Hash that looks like this:
|
991
|
+
#
|
992
|
+
# {"A"=>{"ala"=>"alanine", "d
|
993
|
+
#
|
994
|
+
# ========================================================================= #
|
995
|
+
def self.aminoacids?
|
996
|
+
AMINO_ACIDS
|
997
|
+
end
|
998
|
+
|
999
|
+
# ========================================================================= #
|
1000
|
+
# === AMINO_ACIDS_AVERAGE_MASS_TABLE
|
1001
|
+
#
|
1002
|
+
# And here is the average mass table.
|
1003
|
+
# ========================================================================= #
|
1004
|
+
if File.exist? FILE_AMINO_ACIDS_AVERAGE_MASS_TABLE # bl $BIOROEBE/yaml/aminoacids/amino_acids_average_mass_table.yml
|
1005
|
+
AMINO_ACIDS_AVERAGE_MASS_TABLE = YAML.load_file(FILE_AMINO_ACIDS_AVERAGE_MASS_TABLE)
|
1006
|
+
else # Else simply hardcode the AminoAcid table here.
|
1007
|
+
AMINO_ACIDS_AVERAGE_MASS_TABLE = {
|
1008
|
+
'A' => 71.0788,
|
1009
|
+
'C' => 103.1388,
|
1010
|
+
'D' => 115.0886,
|
1011
|
+
'E' => 129.1155,
|
1012
|
+
'F' => 147.1766,
|
1013
|
+
'G' => 57.0519,
|
1014
|
+
'H' => 137.1411,
|
1015
|
+
'I' => 113.1594,
|
1016
|
+
'K' => 128.1741,
|
1017
|
+
'L' => 113.1594,
|
1018
|
+
'M' => 131.1926,
|
1019
|
+
'N' => 114.1038,
|
1020
|
+
'P' => 97.1167,
|
1021
|
+
'Q' => 128.1307,
|
1022
|
+
'R' => 156.1875,
|
1023
|
+
'S' => 87.0782,
|
1024
|
+
'T' => 101.1051,
|
1025
|
+
'V' => 99.1326,
|
1026
|
+
'W' => 186.2132,
|
1027
|
+
'Y' => 163.1760
|
1028
|
+
}
|
1029
|
+
end
|
1030
|
+
|
1031
|
+
# ========================================================================= #
|
1032
|
+
# === AMINO_ACIDS_THREE_TO_ONE
|
1033
|
+
#
|
1034
|
+
# An "alias" to this constant exists, named AA_THREE_TO_ONE.
|
1035
|
+
#
|
1036
|
+
# bl $BIOROEBE/yaml/aminoacids/amino_acids_three_to_one.yml
|
1037
|
+
#
|
1038
|
+
# ========================================================================= #
|
1039
|
+
if File.exist? FILE_AMINO_ACIDS_THREE_TO_ONE
|
1040
|
+
hash = {}
|
1041
|
+
YAML.load_file(
|
1042
|
+
FILE_AMINO_ACIDS_THREE_TO_ONE
|
1043
|
+
).each {|key, value|
|
1044
|
+
hash[key.to_s.downcase] = value
|
1045
|
+
}
|
1046
|
+
AMINO_ACIDS_THREE_TO_ONE = hash
|
1047
|
+
# AA_THREE_TO_ONE = AMINO_ACIDS_THREE_TO_ONE # Legacy pointer. Is mildly deprecated.
|
1048
|
+
# ^^^ Disabled as of 25.03.2020. Use the longer variant instead.
|
1049
|
+
end
|
1050
|
+
|
1051
|
+
# ========================================================================= #
|
1052
|
+
# === NUCLEAR_LOCALIZATION_SEQUENCES
|
1053
|
+
#
|
1054
|
+
# We must be careful and check whether it exists or not.
|
1055
|
+
# ========================================================================= #
|
1056
|
+
_ = FILE_NUCLEAR_LOCALIZATION_SEQUENCES
|
1057
|
+
if File.exist? _
|
1058
|
+
begin
|
1059
|
+
NUCLEAR_LOCALIZATION_SEQUENCES = YAML.load_file(_)
|
1060
|
+
# ======================================================================= #
|
1061
|
+
# === Bioroebe::ARRAY_NLS_SEQUENCES
|
1062
|
+
#
|
1063
|
+
# All NLS sequences should be registered here.
|
1064
|
+
# ======================================================================= #
|
1065
|
+
ARRAY_NLS_SEQUENCES = NUCLEAR_LOCALIZATION_SEQUENCES.values.map {|entry|
|
1066
|
+
entry = entry[0, entry.index(' ')] if entry.include? ' '
|
1067
|
+
entry.delete!('[') if entry.include? '['
|
1068
|
+
entry.delete!(']') if entry.include? ']'
|
1069
|
+
entry
|
1070
|
+
}
|
1071
|
+
rescue Psych::SyntaxError => error
|
1072
|
+
pp error
|
1073
|
+
ARRAY_NLS_SEQUENCES = []
|
1074
|
+
end
|
1075
|
+
else
|
1076
|
+
NUCLEAR_LOCALIZATION_SEQUENCES = ''
|
1077
|
+
ARRAY_NLS_SEQUENCES = []
|
1078
|
+
end
|
1079
|
+
|
1080
|
+
# ========================================================================= #
|
1081
|
+
# === Bioroebe.random_aminoacid?
|
1082
|
+
#
|
1083
|
+
# This method will return a random aminoacid.
|
1084
|
+
#
|
1085
|
+
# A number can be passed to this method, which specifies how many
|
1086
|
+
# random aminoacids are to be returned, e. g. 20 as argument refers
|
1087
|
+
# to 20 aminoacids that will be generated here.
|
1088
|
+
#
|
1089
|
+
# Usage example:
|
1090
|
+
#
|
1091
|
+
# Bioroebe.random_aminoacid? 20 # => "UAVHYQQESWUYAOVESEIY"
|
1092
|
+
#
|
1093
|
+
# ========================================================================= #
|
1094
|
+
def self.random_aminoacid?(optional_return_n_aminoacids = 1)
|
1095
|
+
case optional_return_n_aminoacids
|
1096
|
+
when 1
|
1097
|
+
AMINO_ACIDS.keys.sample # This should be a bit faster.
|
1098
|
+
else
|
1099
|
+
optional_return_n_aminoacids.to_i.times.map {
|
1100
|
+
AMINO_ACIDS.keys.sample
|
1101
|
+
}.join.strip
|
1102
|
+
end
|
1103
|
+
end; self.instance_eval { alias random_aminoacid random_aminoacid? } # === Bioroebe.random_aminoacid
|
1104
|
+
self.instance_eval { alias return_random_aminoacid random_aminoacid? } # === Bioroebe.return_random_aminoacid
|
1105
|
+
|
1106
|
+
# ========================================================================= #
|
1107
|
+
# === Bioroebe.return_array_of_common_aminoacids
|
1108
|
+
# ========================================================================= #
|
1109
|
+
def self.return_array_of_common_aminoacids
|
1110
|
+
AMINO_ACIDS.keys - ['O'] - ['U']
|
1111
|
+
end
|
1112
|
+
|
1113
|
+
# ========================================================================= #
|
1114
|
+
# === FILE_ALU_ELEMENTS
|
1115
|
+
# ========================================================================= #
|
1116
|
+
# FILE_ALU_ELEMENTS =
|
1117
|
+
# "#{project_yaml_directory?}alu_elements.yml"
|
1118
|
+
|
1119
|
+
# ========================================================================= #
|
1120
|
+
# === Bioroebe.file_molecular_weight
|
1121
|
+
# ========================================================================= #
|
1122
|
+
def self.file_molecular_weight
|
1123
|
+
"#{project_yaml_directory?}aminoacids/molecular_weight.yml"
|
1124
|
+
end
|
1125
|
+
|
1126
|
+
# ========================================================================= #
|
1127
|
+
# === Bioroebe.file_amino_acids_long_name_to_one_letter
|
1128
|
+
#
|
1129
|
+
# This method will return a String such as:
|
1130
|
+
#
|
1131
|
+
# "/home/Programs/Ruby/3.1.2/lib/ruby/site_ruby/3.1.0/bioroebe/yaml/aminoacids/amino_acids_long_name_to_one_letter.yml"
|
1132
|
+
#
|
1133
|
+
# ========================================================================= #
|
1134
|
+
def self.file_amino_acids_long_name_to_one_letter
|
1135
|
+
"#{project_yaml_directory?}aminoacids/amino_acids_long_name_to_one_letter.yml"
|
1136
|
+
end
|
1137
|
+
|
1138
|
+
module CodonTablesFrequencies # === Bioroebe::CodonTablesFrequencies
|
1139
|
+
|
1140
|
+
# ========================================================================= #
|
1141
|
+
# === Bioroebe::CodonTablesFrequencies::FILE_HOMO_SAPIENS
|
1142
|
+
#
|
1143
|
+
# Note that before the rewrite in September 2019, there was another
|
1144
|
+
# old constant referring to this entry here, called
|
1145
|
+
# CODON_TABLE_FOR_HOMO_SAPIENS. That latter constant was removed in
|
1146
|
+
# 2019, to simplify the code and make it overall more consistent.
|
1147
|
+
# ========================================================================= #
|
1148
|
+
# bl $RUBY_BIOROEBE/codon_tables/frequencies/9606_Homo_sapiens.yml
|
1149
|
+
# ========================================================================= #
|
1150
|
+
FILE_HOMO_SAPIENS =
|
1151
|
+
"#{::Bioroebe::DIRECTORY_CODON_TABLES_FREQUENCIES}"\
|
1152
|
+
"9606_Homo_sapiens.yml"
|
1153
|
+
|
1154
|
+
# ========================================================================= #
|
1155
|
+
# === FILE_E_COLI
|
1156
|
+
#
|
1157
|
+
# This is for the bacterium E. coli.
|
1158
|
+
# ========================================================================= #
|
1159
|
+
FILE_E_COLI =
|
1160
|
+
"#{::Bioroebe::DIRECTORY_CODON_TABLES_FREQUENCIES}"\
|
1161
|
+
"83333_Escherichia_coli_K12.yml"
|
1162
|
+
|
1163
|
+
# ========================================================================= #
|
1164
|
+
# === FILE_SACCHAROMYCES_CEREVISIAE
|
1165
|
+
#
|
1166
|
+
# This is for the yeast Saccharomyces cerevisiae.
|
1167
|
+
# ========================================================================= #
|
1168
|
+
FILE_SACCHAROMYCES_CEREVISIAE =
|
1169
|
+
"#{::Bioroebe::DIRECTORY_CODON_TABLES_FREQUENCIES}"\
|
1170
|
+
"4932_Saccharomyces_cerevisiae.yml"
|
1171
|
+
|
1172
|
+
# ========================================================================= #
|
1173
|
+
# === FILE_FELIS_CATUS
|
1174
|
+
#
|
1175
|
+
# This is for the domestic cat.
|
1176
|
+
# ========================================================================= #
|
1177
|
+
FILE_FELIS_CATUS =
|
1178
|
+
"#{::Bioroebe::DIRECTORY_CODON_TABLES_FREQUENCIES}"\
|
1179
|
+
"9685_Felis_catus.yml"
|
1180
|
+
|
1181
|
+
# ========================================================================= #
|
1182
|
+
# === FILE_ARABIDOPSIS_THALIANA
|
1183
|
+
#
|
1184
|
+
# This is for Arabidopsis thaliana.
|
1185
|
+
# ========================================================================= #
|
1186
|
+
FILE_ARABIDOPSIS_THALIANA =
|
1187
|
+
"#{::Bioroebe::DIRECTORY_CODON_TABLES_FREQUENCIES}"\
|
1188
|
+
"3702_Arabidopsis_thaliana.yml"
|
1189
|
+
|
1190
|
+
# ========================================================================= #
|
1191
|
+
# === FILE_DROSOPHILA_MELANOGASTER
|
1192
|
+
#
|
1193
|
+
# This is for the fruit fly.
|
1194
|
+
# ========================================================================= #
|
1195
|
+
FILE_DROSOPHILA_MELANOGASTER =
|
1196
|
+
"#{::Bioroebe::DIRECTORY_CODON_TABLES_FREQUENCIES}"\
|
1197
|
+
"7227_Drosophila_melanogaster.yml"
|
1198
|
+
|
1199
|
+
end # === end Bioroebe::CodonTablesFrequencies
|
1200
|
+
|
1201
|
+
# =========================================================================== #
|
1202
|
+
# === Bioroebe::Postgresql
|
1203
|
+
#
|
1204
|
+
# This file stores any required information to login into a postgresql
|
1205
|
+
# database. This can be on your home system or as part of a cluster.
|
1206
|
+
#
|
1207
|
+
# The below command is equivalent to the following command-line:
|
1208
|
+
#
|
1209
|
+
# psql --host=bunuel.imp.univie.ac.at --port=5471 --username=robert --dbname=robert_db --quiet
|
1210
|
+
#
|
1211
|
+
# =========================================================================== #
|
1212
|
+
# POSTGRE_LOGIN_COMMAND = '
|
1213
|
+
# psql --host=bunuel.imp.univie.ac.at --port=5471 --username=robert --dbname=robert_db --quiet
|
1214
|
+
# '.delete("\n").strip
|
1215
|
+
# =========================================================================== #
|
1216
|
+
module Postgresql # === Bioroebe::Postgresql
|
1217
|
+
|
1218
|
+
# ========================================================================= #
|
1219
|
+
# === Bioroebe::Postgresql
|
1220
|
+
#
|
1221
|
+
# The following constants can be modified to account for different
|
1222
|
+
# password, username, dbname and so forth.
|
1223
|
+
# ========================================================================= #
|
1224
|
+
|
1225
|
+
# ========================================================================= #
|
1226
|
+
# === POSTGRE_HOST
|
1227
|
+
# ========================================================================= #
|
1228
|
+
POSTGRE_HOST = 'bunuel.imp.univie.ac.at'
|
1229
|
+
|
1230
|
+
# ========================================================================= #
|
1231
|
+
# === POSTGRE_PORT
|
1232
|
+
# ========================================================================= #
|
1233
|
+
POSTGRE_PORT = '5471'
|
1234
|
+
|
1235
|
+
# ========================================================================= #
|
1236
|
+
# === POSTGRE_USERNAME
|
1237
|
+
# ========================================================================= #
|
1238
|
+
POSTGRE_USERNAME = 'robert'
|
1239
|
+
|
1240
|
+
# ========================================================================= #
|
1241
|
+
# === POSTGRE_DBNAME
|
1242
|
+
# ========================================================================= #
|
1243
|
+
POSTGRE_DBNAME = 'robert_db'
|
1244
|
+
|
1245
|
+
# ========================================================================= #
|
1246
|
+
# === POSTGRE_EXTRA_FLAGS
|
1247
|
+
# ========================================================================= #
|
1248
|
+
POSTGRE_EXTRA_FLAGS = '--quiet'
|
1249
|
+
|
1250
|
+
# ========================================================================= #
|
1251
|
+
# === POSTGRE_PASSWORD
|
1252
|
+
# ========================================================================= #
|
1253
|
+
POSTGRE_PASSWORD = 'robert123'
|
1254
|
+
|
1255
|
+
# ========================================================================= #
|
1256
|
+
# === POSTGRE_LOGIN_COMMAND
|
1257
|
+
#
|
1258
|
+
# This constant will become the full postgre login command.
|
1259
|
+
# ========================================================================= #
|
1260
|
+
POSTGRE_LOGIN_COMMAND = ('
|
1261
|
+
psql --host='+POSTGRE_HOST+' --port='+POSTGRE_PORT+
|
1262
|
+
' --username='+POSTGRE_USERNAME+
|
1263
|
+
' --dbname='+POSTGRE_DBNAME+
|
1264
|
+
' '+POSTGRE_EXTRA_FLAGS+'
|
1265
|
+
'.delete("\n").strip).strip
|
1266
|
+
POSTGRE_LOGIN_COMMAND_HOME = POSTGRE_LOGIN_COMMAND
|
1267
|
+
|
1268
|
+
# ========================================================================= #
|
1269
|
+
# === POSTGRE_LOGIN_COMMAND_NO_HEADERS
|
1270
|
+
#
|
1271
|
+
# Here we use --tuples-only.
|
1272
|
+
# ========================================================================= #
|
1273
|
+
POSTGRE_LOGIN_COMMAND_NO_HEADERS = POSTGRE_LOGIN_COMMAND+'
|
1274
|
+
--tuples-only
|
1275
|
+
'.delete("\n").squeeze(' ') # --quiet
|
1276
|
+
|
1277
|
+
# ========================================================================= #
|
1278
|
+
# === POSTGRE_DROP_NODES_COMMAND
|
1279
|
+
# ========================================================================= #
|
1280
|
+
POSTGRE_DROP_NODES_COMMAND = DROP_TABLE_NODES = 'DROP TABLE nodes;'
|
1281
|
+
POSTGRE_DROP_NAMES_COMMAND = DROP_TABLE_NAMES = 'DROP TABLE names;'
|
1282
|
+
DROP_TABLE_FASTA = 'DROP TABLE fasta;'
|
1283
|
+
|
1284
|
+
# ========================================================================= #
|
1285
|
+
# === Bioroebe::Postgresql::POSTGRE_DROP_FASTA_COMMAND
|
1286
|
+
# ========================================================================= #
|
1287
|
+
POSTGRE_DROP_FASTA_COMMAND = DROP_FASTA_TABLE = DROP_TABLE_FASTA
|
1288
|
+
|
1289
|
+
end # === end Bioroebe::Postgresql
|
1290
|
+
|
1291
|
+
end
|
1292
|
+
|
1293
|
+
if __FILE__ == $PROGRAM_NAME
|
1294
|
+
alias e puts
|
1295
|
+
e
|
1296
|
+
e 'The allowed DNA nucleotides are:'
|
1297
|
+
e
|
1298
|
+
e ' '+Bioroebe.allowed_dna_nucleotides?.join(', ')
|
1299
|
+
e
|
1300
|
+
e 'The base directory for the Bioroebe project can be found at: '+
|
1301
|
+
Bioroebe.base_directory?
|
1302
|
+
e Bioroebe.user_home_directory?
|
1303
|
+
_ = "wget #{Bioroebe::FTP_NCBI_TAXONOMY_DATABASE}"
|
1304
|
+
e _; e '(We could invoke the above, but this is currently disabled.)' # system _
|
1305
|
+
e
|
1306
|
+
# ========================================================================= #
|
1307
|
+
# Past this points we will test file-constants:
|
1308
|
+
# ========================================================================= #
|
1309
|
+
include Bioroebe
|
1310
|
+
e FILE_FASTQ_QUALITY_SCHEMES
|
1311
|
+
pp NUCLEOTIDES
|
1312
|
+
e 'Restriction enzymes can be found here: '+
|
1313
|
+
FILE_RESTRICTION_ENZYMES
|
1314
|
+
e FILE_HYDROPATHY_TABLE
|
1315
|
+
pp HYDROPATHY_TABLE
|
1316
|
+
e '='*80
|
1317
|
+
e FILE_AMINO_ACIDS_FREQUENCY
|
1318
|
+
e FILE_NUCLEOTIDES_WEIGHT
|
1319
|
+
e FILE_AMINOACIDS_MOLECULAR_FORMULA
|
1320
|
+
e FILE_AMINOACIDS_THREE_TO_ONE
|
1321
|
+
e '='*80
|
1322
|
+
pp AMINO_ACIDS
|
1323
|
+
e '='*80
|
1324
|
+
pp AMINO_ACIDS_AVERAGE_MASS_TABLE
|
1325
|
+
e '='*80
|
1326
|
+
pp AMINO_ACIDS_THREE_TO_ONE
|
1327
|
+
e '='*80
|
1328
|
+
pp Bioroebe::DEFAULT_DNA_SEQUENCE
|
1329
|
+
# ========================================================================= #
|
1330
|
+
# Show where the taxonomy-component is stored at:
|
1331
|
+
# ========================================================================= #
|
1332
|
+
e "The log file for the taxonomy-component is at: "\
|
1333
|
+
"#{Bioroebe.taxonomy_ncbi_database_last_update_log_file}"
|
1334
|
+
# ========================================================================= #
|
1335
|
+
# Past this point we will test Postgresql specific parts:
|
1336
|
+
# ========================================================================= #
|
1337
|
+
include Bioroebe::Postgresql
|
1338
|
+
e POSTGRE_LOGIN_COMMAND
|
1339
|
+
e POSTGRE_LOGIN_COMMAND_HOME
|
1340
|
+
e POSTGRE_LOGIN_COMMAND_NO_HEADERS
|
1341
|
+
e POSTGRE_DROP_NODES_COMMAND
|
1342
|
+
e POSTGRE_DROP_NAMES_COMMAND
|
1343
|
+
e POSTGRE_DROP_FASTA_COMMAND
|
1344
|
+
e
|
1345
|
+
e Bioroebe::CodonTablesFrequencies::FILE_DROSOPHILA_MELANOGASTER
|
1346
|
+
e
|
1347
|
+
pp Bioroebe.aa_families?
|
1348
|
+
e
|
1349
|
+
end
|