bioroebe 0.10.80
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE.md +428 -0
- data/README.md +9280 -0
- data/bin/advanced_dotplot +7 -0
- data/bin/align_open_reading_frames +12 -0
- data/bin/all_positions_of_this_nucleotide +7 -0
- data/bin/aminoacid_composition +7 -0
- data/bin/aminoacid_frequencies +12 -0
- data/bin/aminoacid_substitution +7 -0
- data/bin/automatically_rename_this_fasta_file +7 -0
- data/bin/base_composition +7 -0
- data/bin/batch_create_windows_executables +7 -0
- data/bin/biomart_console +11 -0
- data/bin/bioroebe +27 -0
- data/bin/bioroebe_controller +10 -0
- data/bin/bioshell +26 -0
- data/bin/blosum_2D_table +12 -0
- data/bin/calculate_n50_value +12 -0
- data/bin/chunked_display +12 -0
- data/bin/codon_frequency +9 -0
- data/bin/codon_to_aminoacid +30 -0
- data/bin/colourize_this_fasta_sequence +14 -0
- data/bin/compact_fasta_file +7 -0
- data/bin/complement +7 -0
- data/bin/complementary_dna_strand +12 -0
- data/bin/complementary_rna_strand +12 -0
- data/bin/compseq +7 -0
- data/bin/consensus_sequence +17 -0
- data/bin/count_AT +12 -0
- data/bin/count_GC +12 -0
- data/bin/create_random_aminoacids +7 -0
- data/bin/decode_this_aminoacid_sequence +20 -0
- data/bin/deduce_aminoacid_sequence +13 -0
- data/bin/deduce_most_likely_aminoacid_sequence +7 -0
- data/bin/display_aminoacid_table +12 -0
- data/bin/display_open_reading_frames +7 -0
- data/bin/dna_to_aminoacid_sequence +7 -0
- data/bin/dna_to_rna +7 -0
- data/bin/downcase_chunked_display +12 -0
- data/bin/download_this_pdb +7 -0
- data/bin/fasta_index +7 -0
- data/bin/fetch_data_from_uniprot +12 -0
- data/bin/filter_away_invalid_nucleotides +12 -0
- data/bin/find_substring +19 -0
- data/bin/genbank_to_fasta +7 -0
- data/bin/hamming_distance +12 -0
- data/bin/input_as_dna +12 -0
- data/bin/is_palindrome +13 -0
- data/bin/leading_five_prime +7 -0
- data/bin/levensthein +7 -0
- data/bin/longest_ORF +14 -0
- data/bin/longest_substring +12 -0
- data/bin/n_stop_codons_in_this_sequence +15 -0
- data/bin/open_reading_frames +14 -0
- data/bin/overwrite_fasta_header +7 -0
- data/bin/palindrome_2D_structure +7 -0
- data/bin/palindrome_generator +7 -0
- data/bin/parse_fasta +7 -0
- data/bin/partner_nucleotide +9 -0
- data/bin/possible_codons_for_this_aminoacid +12 -0
- data/bin/random_dna_sequence +12 -0
- data/bin/random_sequence +12 -0
- data/bin/raw_hamming_distance +12 -0
- data/bin/return_longest_substring_via_LCS_algorithm +7 -0
- data/bin/reverse_complement +7 -0
- data/bin/reverse_sequence +7 -0
- data/bin/ruler +12 -0
- data/bin/scan_this_input_for_startcodons +12 -0
- data/bin/short_aminoacid_letter_from_long_aminoacid_name +7 -0
- data/bin/show_atomic_composition +7 -0
- data/bin/show_codon_usage +12 -0
- data/bin/show_fasta_header +7 -0
- data/bin/show_nucleotide_sequence +7 -0
- data/bin/show_this_codon_table +7 -0
- data/bin/show_this_dna_sequence +7 -0
- data/bin/showorf +14 -0
- data/bin/simplify_fasta +7 -0
- data/bin/sort_aminoacid_based_on_its_hydrophobicity +7 -0
- data/bin/split_this_fasta_file_into_chromosomes +7 -0
- data/bin/strict_filter_away_invalid_aminoacids +7 -0
- data/bin/taxonomy +63 -0
- data/bin/three_to_one +7 -0
- data/bin/to_rna +7 -0
- data/bin/trailing_three_prime +7 -0
- data/bin/trypsin_digest +7 -0
- data/bin/upcase_this_aminoacid_sequence_and_remove_numbers +7 -0
- data/bioroebe.gemspec +97 -0
- data/doc/IUPAC_aminoacids_code.md +36 -0
- data/doc/IUPAC_nucleotide_code.md +19 -0
- data/doc/README.gen +9237 -0
- data/doc/blosum.md +5 -0
- data/doc/compatibility/BIO_PHP.md +37 -0
- data/doc/compatibility/README.md +3 -0
- data/doc/compatibility/emboss.md +56 -0
- data/doc/extensive_usage_example.md +35 -0
- data/doc/german_names_for_the_aminoacids.md +27 -0
- data/doc/instructions_for_the_taxonomy_subproject.md +504 -0
- data/doc/legacy_paths.md +9 -0
- data/doc/pdb_ATOM_entry.md +33 -0
- data/doc/quality_control/README.md +2 -0
- data/doc/quality_control/commandline_applications.md +13 -0
- data/doc/resources.md +23 -0
- data/doc/setup.rb +1655 -0
- data/doc/statistics/statistics.md +41 -0
- data/doc/todo/README.md +5 -0
- data/doc/todo/bioroebe_GUI_todo.md +15 -0
- data/doc/todo/bioroebe_todo.md +2823 -0
- data/doc/using_biomart.md +258 -0
- data/html/test.html +144 -0
- data/lib/bioroebe/aminoacids/aminoacid_substitution.rb +118 -0
- data/lib/bioroebe/aminoacids/aminoacids_mass_table.rb +118 -0
- data/lib/bioroebe/aminoacids/codon_percentage.rb +189 -0
- data/lib/bioroebe/aminoacids/colourize_hydrophilic_and_hydrophobic_aminoacids.rb +110 -0
- data/lib/bioroebe/aminoacids/create_random_aminoacids.rb +221 -0
- data/lib/bioroebe/aminoacids/deduce_aminoacid_sequence.rb +460 -0
- data/lib/bioroebe/aminoacids/display_aminoacid_table.rb +231 -0
- data/lib/bioroebe/aminoacids/show_hydrophobicity.rb +184 -0
- data/lib/bioroebe/annotations/README.md +2 -0
- data/lib/bioroebe/annotations/create_annotation_format.rb +208 -0
- data/lib/bioroebe/autoinclude.rb +7 -0
- data/lib/bioroebe/base/base.rb +35 -0
- data/lib/bioroebe/base/colours.rb +14 -0
- data/lib/bioroebe/base/colours_for_base/colours_for_base.rb +275 -0
- data/lib/bioroebe/base/commandline_application/README.md +7 -0
- data/lib/bioroebe/base/commandline_application/aminoacids.rb +33 -0
- data/lib/bioroebe/base/commandline_application/commandline_application.rb +37 -0
- data/lib/bioroebe/base/commandline_application/commandline_arguments.rb +144 -0
- data/lib/bioroebe/base/commandline_application/directory.rb +33 -0
- data/lib/bioroebe/base/commandline_application/extract.rb +22 -0
- data/lib/bioroebe/base/commandline_application/misc.rb +485 -0
- data/lib/bioroebe/base/commandline_application/opn.rb +47 -0
- data/lib/bioroebe/base/commandline_application/reset.rb +40 -0
- data/lib/bioroebe/base/commandline_application/warnings.rb +36 -0
- data/lib/bioroebe/base/commandline_application/write_what_into.rb +29 -0
- data/lib/bioroebe/base/initialize.rb +18 -0
- data/lib/bioroebe/base/misc.rb +94 -0
- data/lib/bioroebe/base/namespace.rb +16 -0
- data/lib/bioroebe/base/prototype/README.md +12 -0
- data/lib/bioroebe/base/prototype/e_and_ee.rb +24 -0
- data/lib/bioroebe/base/prototype/misc.rb +108 -0
- data/lib/bioroebe/base/prototype/mkdir.rb +20 -0
- data/lib/bioroebe/base/prototype/prototype.rb +21 -0
- data/lib/bioroebe/base/prototype/reset.rb +26 -0
- data/lib/bioroebe/base/reset.rb +11 -0
- data/lib/bioroebe/biomart/LICENSE.md +27 -0
- data/lib/bioroebe/biomart/attribute.rb +77 -0
- data/lib/bioroebe/biomart/biomart.rb +227 -0
- data/lib/bioroebe/biomart/database.rb +128 -0
- data/lib/bioroebe/biomart/dataset.rb +572 -0
- data/lib/bioroebe/biomart/filter.rb +97 -0
- data/lib/bioroebe/biomart/server.rb +152 -0
- data/lib/bioroebe/blosum/blosum.rb +88 -0
- data/lib/bioroebe/calculate/calculate_blosum_score.rb +145 -0
- data/lib/bioroebe/calculate/calculate_gc_content.rb +301 -0
- data/lib/bioroebe/calculate/calculate_levensthein_distance.rb +100 -0
- data/lib/bioroebe/calculate/calculate_melting_temperature.rb +398 -0
- data/lib/bioroebe/calculate/calculate_melting_temperature_for_more_than_thirteen_nucleotides.rb +304 -0
- data/lib/bioroebe/calculate/calculate_the_position_specific_scoring_matrix.rb +166 -0
- data/lib/bioroebe/cell/README.md +1 -0
- data/lib/bioroebe/cell/cell.rb +63 -0
- data/lib/bioroebe/cleave_and_digest/README.md +2 -0
- data/lib/bioroebe/cleave_and_digest/cleave.rb +80 -0
- data/lib/bioroebe/cleave_and_digest/digestion.rb +75 -0
- data/lib/bioroebe/cleave_and_digest/trypsin.rb +192 -0
- data/lib/bioroebe/codon_tables/README.md +9 -0
- data/lib/bioroebe/codon_tables/frequencies/287_Pseudomonas_aeruginosa.yml +101 -0
- data/lib/bioroebe/codon_tables/frequencies/3702_Arabidopsis_thaliana.yml +77 -0
- data/lib/bioroebe/codon_tables/frequencies/4932_Saccharomyces_cerevisiae.yml +103 -0
- data/lib/bioroebe/codon_tables/frequencies/7227_Drosophila_melanogaster.yml +71 -0
- data/lib/bioroebe/codon_tables/frequencies/83333_Escherichia_coli_K12.yml +103 -0
- data/lib/bioroebe/codon_tables/frequencies/9606_Homo_sapiens.yml +123 -0
- data/lib/bioroebe/codon_tables/frequencies/9685_Felis_catus.yml +78 -0
- data/lib/bioroebe/codon_tables/frequencies/README.md +10 -0
- data/lib/bioroebe/codon_tables/frequencies/parse_frequency_table.rb +337 -0
- data/lib/bioroebe/codons/README.md +28 -0
- data/lib/bioroebe/codons/codon_table.rb +416 -0
- data/lib/bioroebe/codons/codon_tables.rb +123 -0
- data/lib/bioroebe/codons/codons.rb +517 -0
- data/lib/bioroebe/codons/convert_this_codon_to_that_aminoacid.rb +102 -0
- data/lib/bioroebe/codons/detect_minimal_codon.rb +180 -0
- data/lib/bioroebe/codons/determine_optimal_codons.rb +74 -0
- data/lib/bioroebe/codons/possible_codons_for_this_aminoacid.rb +380 -0
- data/lib/bioroebe/codons/sanitize_codon_frequency.rb +144 -0
- data/lib/bioroebe/codons/show_codon_tables.rb +130 -0
- data/lib/bioroebe/codons/show_codon_usage.rb +197 -0
- data/lib/bioroebe/codons/show_this_codon_table.rb +573 -0
- data/lib/bioroebe/codons/start_codons.rb +105 -0
- data/lib/bioroebe/colours/colour_schemes/README.md +10 -0
- data/lib/bioroebe/colours/colour_schemes/array_available_colour_schemes.rb +38 -0
- data/lib/bioroebe/colours/colour_schemes/buried.rb +70 -0
- data/lib/bioroebe/colours/colour_schemes/colour_scheme.rb +101 -0
- data/lib/bioroebe/colours/colour_schemes/colour_scheme_demo.rb +262 -0
- data/lib/bioroebe/colours/colour_schemes/helix.rb +65 -0
- data/lib/bioroebe/colours/colour_schemes/hydropathy.rb +70 -0
- data/lib/bioroebe/colours/colour_schemes/nucleotide.rb +47 -0
- data/lib/bioroebe/colours/colour_schemes/score.rb +112 -0
- data/lib/bioroebe/colours/colour_schemes/simple.rb +42 -0
- data/lib/bioroebe/colours/colour_schemes/strand.rb +65 -0
- data/lib/bioroebe/colours/colour_schemes/taylor.rb +58 -0
- data/lib/bioroebe/colours/colour_schemes/turn.rb +65 -0
- data/lib/bioroebe/colours/colour_schemes/zappo.rb +59 -0
- data/lib/bioroebe/colours/colourize_sequence.rb +262 -0
- data/lib/bioroebe/colours/colours.rb +119 -0
- data/lib/bioroebe/colours/misc_colours.rb +80 -0
- data/lib/bioroebe/colours/rev.rb +41 -0
- data/lib/bioroebe/colours/sdir.rb +21 -0
- data/lib/bioroebe/colours/sfancy.rb +21 -0
- data/lib/bioroebe/colours/sfile.rb +21 -0
- data/lib/bioroebe/colours/simp.rb +21 -0
- data/lib/bioroebe/colours/swarn.rb +29 -0
- data/lib/bioroebe/colours/use_colours.rb +27 -0
- data/lib/bioroebe/configuration/configuration.rb +114 -0
- data/lib/bioroebe/configuration/constants.rb +35 -0
- data/lib/bioroebe/constants/GUIs.rb +79 -0
- data/lib/bioroebe/constants/aminoacids_and_proteins.rb +146 -0
- data/lib/bioroebe/constants/base_directory.rb +120 -0
- data/lib/bioroebe/constants/carriage_return.rb +14 -0
- data/lib/bioroebe/constants/codon_tables.rb +77 -0
- data/lib/bioroebe/constants/database_constants.rb +107 -0
- data/lib/bioroebe/constants/files_and_directories.rb +579 -0
- data/lib/bioroebe/constants/misc.rb +189 -0
- data/lib/bioroebe/constants/newline.rb +14 -0
- data/lib/bioroebe/constants/nucleotides.rb +114 -0
- data/lib/bioroebe/constants/regex.rb +28 -0
- data/lib/bioroebe/constants/roebe.rb +38 -0
- data/lib/bioroebe/constants/row_terminator.rb +16 -0
- data/lib/bioroebe/constants/tabulator.rb +14 -0
- data/lib/bioroebe/constants/unicode.rb +12 -0
- data/lib/bioroebe/constants/urls.rb +50 -0
- data/lib/bioroebe/conversions/README.md +3 -0
- data/lib/bioroebe/conversions/convert_aminoacid_to_dna.rb +298 -0
- data/lib/bioroebe/conversions/dna_to_aminoacid_sequence.rb +569 -0
- data/lib/bioroebe/count/README.md +1 -0
- data/lib/bioroebe/count/count_amount_of_aminoacids.rb +352 -0
- data/lib/bioroebe/count/count_amount_of_nucleotides.rb +491 -0
- data/lib/bioroebe/count/count_at.rb +39 -0
- data/lib/bioroebe/count/count_gc.rb +43 -0
- data/lib/bioroebe/css/README.md +5 -0
- data/lib/bioroebe/css/project.css +121 -0
- data/lib/bioroebe/data/README.md +10 -0
- data/lib/bioroebe/data/bam/README.md +1 -0
- data/lib/bioroebe/data/data.txt +192 -0
- data/lib/bioroebe/data/fasta/GFP_mutant_3_coding_sequence.fasta +12 -0
- data/lib/bioroebe/data/fasta/alu_elements.fasta +42 -0
- data/lib/bioroebe/data/fasta/lady_slippers_orchid.fasta +1197 -0
- data/lib/bioroebe/data/fasta/loxP.fasta +2 -0
- data/lib/bioroebe/data/fasta/ls_orchid.fasta +1197 -0
- data/lib/bioroebe/data/fasta/pax6_in_mouse.fasta +1 -0
- data/lib/bioroebe/data/fasta/test.fasta +7 -0
- data/lib/bioroebe/data/fasta/test_DNA.fasta +1 -0
- data/lib/bioroebe/data/fastq/fastq_example_file.fastq +32 -0
- data/lib/bioroebe/data/fastq/fastq_example_file_SP1.fastq +1000 -0
- data/lib/bioroebe/data/fastq/one_random_fastq_entry.fastq +4 -0
- data/lib/bioroebe/data/genbank/sample_file.genbank +15 -0
- data/lib/bioroebe/data/genbank/standard.fasta +3 -0
- data/lib/bioroebe/data/gff/Escherichia_coli_K12_plasmid_F_DNA_NC_002483.1.gff3 +345 -0
- data/lib/bioroebe/data/gff/sample.gff +2 -0
- data/lib/bioroebe/data/gff/test_gene.gff +4 -0
- data/lib/bioroebe/data/gff/transcripts.gff +16 -0
- data/lib/bioroebe/data/gtf/README.md +1 -0
- data/lib/bioroebe/data/json/example_config.json +48 -0
- data/lib/bioroebe/data/pdb/1VII.pdb +754 -0
- data/lib/bioroebe/data/pdb/ala_phe_ala.pdb +228 -0
- data/lib/bioroebe/data/pdb/rcsb_pdb_1VII.fasta +2 -0
- data/lib/bioroebe/data/phylip/README.md +11 -0
- data/lib/bioroebe/data/phylip/example.phylip +7 -0
- data/lib/bioroebe/data/svg/example.svg +301 -0
- data/lib/bioroebe/databases/README.md +1 -0
- data/lib/bioroebe/databases/download_taxonomy_database.rb +102 -0
- data/lib/bioroebe/dotplots/README.md +5 -0
- data/lib/bioroebe/dotplots/advanced_dotplot.rb +256 -0
- data/lib/bioroebe/dotplots/dotplot.rb +184 -0
- data/lib/bioroebe/electron_microscopy/coordinate_analyzer.rb +191 -0
- data/lib/bioroebe/electron_microscopy/fix_pos_file.rb +102 -0
- data/lib/bioroebe/electron_microscopy/generate_em2em_file.rb +122 -0
- data/lib/bioroebe/electron_microscopy/parse_coordinates.rb +197 -0
- data/lib/bioroebe/electron_microscopy/read_file_xmd.rb +282 -0
- data/lib/bioroebe/electron_microscopy/simple_star_file_generator.rb +131 -0
- data/lib/bioroebe/encoding/README.md +2 -0
- data/lib/bioroebe/encoding/encoding.rb +45 -0
- data/lib/bioroebe/enzymes/README.md +2 -0
- data/lib/bioroebe/enzymes/has_this_restriction_enzyme.rb +46 -0
- data/lib/bioroebe/enzymes/restriction_enzyme.rb +200 -0
- data/lib/bioroebe/enzymes/restriction_enzymes_file.rb +72 -0
- data/lib/bioroebe/enzymes/return_restriction_enzyme_sequence_and_cut_position.rb +80 -0
- data/lib/bioroebe/enzymes/return_sequence_that_is_cut_via_restriction_enzyme.rb +65 -0
- data/lib/bioroebe/enzymes/show_restriction_enzymes.rb +119 -0
- data/lib/bioroebe/exceptions/README.md +2 -0
- data/lib/bioroebe/exceptions/exceptions.rb +17 -0
- data/lib/bioroebe/ext/LICENCE.md +5 -0
- data/lib/bioroebe/ext/README.md +7 -0
- data/lib/bioroebe/ext/main.cpp +45 -0
- data/lib/bioroebe/ext/nucleotide.cpp +24 -0
- data/lib/bioroebe/ext/nussinov_algorithm.cpp +348 -0
- data/lib/bioroebe/ext/sequence +0 -0
- data/lib/bioroebe/ext/sequence.cpp +162 -0
- data/lib/bioroebe/fasta_and_fastq/README.md +6 -0
- data/lib/bioroebe/fasta_and_fastq/autocorrect_the_name_of_this_fasta_file.rb +88 -0
- data/lib/bioroebe/fasta_and_fastq/compact_fasta_file/compact_fasta_file.rb +151 -0
- data/lib/bioroebe/fasta_and_fastq/display_how_many_fasta_entries_are_in_this_directory.rb +111 -0
- data/lib/bioroebe/fasta_and_fastq/download_fasta.rb +248 -0
- data/lib/bioroebe/fasta_and_fastq/fasta_defline/README.md +2 -0
- data/lib/bioroebe/fasta_and_fastq/fasta_defline/fasta_defline.rb +113 -0
- data/lib/bioroebe/fasta_and_fastq/fasta_parser.rb +167 -0
- data/lib/bioroebe/fasta_and_fastq/fastq_format_explainer.rb +131 -0
- data/lib/bioroebe/fasta_and_fastq/length_modifier/length_modifier.rb +87 -0
- data/lib/bioroebe/fasta_and_fastq/parse_fasta/constants.rb +50 -0
- data/lib/bioroebe/fasta_and_fastq/parse_fasta/initialize.rb +86 -0
- data/lib/bioroebe/fasta_and_fastq/parse_fasta/menu.rb +117 -0
- data/lib/bioroebe/fasta_and_fastq/parse_fasta/misc.rb +981 -0
- data/lib/bioroebe/fasta_and_fastq/parse_fasta/parse_fasta.rb +27 -0
- data/lib/bioroebe/fasta_and_fastq/parse_fasta/report.rb +156 -0
- data/lib/bioroebe/fasta_and_fastq/parse_fasta/reset.rb +128 -0
- data/lib/bioroebe/fasta_and_fastq/parse_fasta/run.rb +20 -0
- data/lib/bioroebe/fasta_and_fastq/parse_fastq/parse_fastq.rb +83 -0
- data/lib/bioroebe/fasta_and_fastq/return_fasta_subsection_of_this_file.rb +112 -0
- data/lib/bioroebe/fasta_and_fastq/show_fasta_headers.rb +135 -0
- data/lib/bioroebe/fasta_and_fastq/show_fasta_statistics.rb +188 -0
- data/lib/bioroebe/fasta_and_fastq/simplify_fasta_header/simplify_fasta_header.rb +111 -0
- data/lib/bioroebe/fasta_and_fastq/split_this_fasta_file_into_chromosomes/constants.rb +26 -0
- data/lib/bioroebe/fasta_and_fastq/split_this_fasta_file_into_chromosomes/menu.rb +41 -0
- data/lib/bioroebe/fasta_and_fastq/split_this_fasta_file_into_chromosomes/misc.rb +23 -0
- data/lib/bioroebe/fasta_and_fastq/split_this_fasta_file_into_chromosomes/reset.rb +68 -0
- data/lib/bioroebe/fasta_and_fastq/split_this_fasta_file_into_chromosomes/split_this_fasta_file_into_chromosomes.rb +290 -0
- data/lib/bioroebe/genbank/README.md +1 -0
- data/lib/bioroebe/genbank/genbank_flat_file_format_generator.rb +275 -0
- data/lib/bioroebe/genbank/genbank_parser.rb +291 -0
- data/lib/bioroebe/gene/gene.rb +64 -0
- data/lib/bioroebe/genomes/genome_pattern.rb +165 -0
- data/lib/bioroebe/genomes/genome_retriever.rb +79 -0
- data/lib/bioroebe/gui/experimental/README.md +1 -0
- data/lib/bioroebe/gui/experimental/snapgene/snapgene.rb +147 -0
- data/lib/bioroebe/gui/gtk3/README.md +2 -0
- data/lib/bioroebe/gui/gtk3/alignment/alignment.rb +337 -0
- data/lib/bioroebe/gui/gtk3/aminoacid_composition/aminoacid_composition.rb +510 -0
- data/lib/bioroebe/gui/gtk3/aminoacid_composition/customized_dialog.rb +55 -0
- data/lib/bioroebe/gui/gtk3/anti_sense_strand/anti_sense_strand.config +6 -0
- data/lib/bioroebe/gui/gtk3/anti_sense_strand/anti_sense_strand.rb +29 -0
- data/lib/bioroebe/gui/gtk3/blosum_matrix_viewer/blosum_matrix_viewer.rb +196 -0
- data/lib/bioroebe/gui/gtk3/calculate_cell_numbers_of_bacteria/calculate_cell_numbers_of_bacteria.config +6 -0
- data/lib/bioroebe/gui/gtk3/calculate_cell_numbers_of_bacteria/calculate_cell_numbers_of_bacteria.rb +106 -0
- data/lib/bioroebe/gui/gtk3/controller/controller.rb +406 -0
- data/lib/bioroebe/gui/gtk3/dna_to_aminoacid_widget/dna_to_aminoacid_widget.rb +609 -0
- data/lib/bioroebe/gui/gtk3/dna_to_reverse_complement_widget/dna_to_reverse_complement_widget.config +6 -0
- data/lib/bioroebe/gui/gtk3/dna_to_reverse_complement_widget/dna_to_reverse_complement_widget.rb +189 -0
- data/lib/bioroebe/gui/gtk3/fasta_table_widget/fasta_table_widget.rb +245 -0
- data/lib/bioroebe/gui/gtk3/format_converter/format_converter.rb +346 -0
- data/lib/bioroebe/gui/gtk3/gene/gene.rb +182 -0
- data/lib/bioroebe/gui/gtk3/hamming_distance/hamming_distance.config +6 -0
- data/lib/bioroebe/gui/gtk3/hamming_distance/hamming_distance.rb +370 -0
- data/lib/bioroebe/gui/gtk3/levensthein_distance/levensthein_distance.config +6 -0
- data/lib/bioroebe/gui/gtk3/levensthein_distance/levensthein_distance.rb +175 -0
- data/lib/bioroebe/gui/gtk3/nucleotide_analyser/nucleotide_analyser.rb +428 -0
- data/lib/bioroebe/gui/gtk3/parse_pdb_file/parse_pdb_file.rb +342 -0
- data/lib/bioroebe/gui/gtk3/primer_design_widget/primer_design_widget.rb +580 -0
- data/lib/bioroebe/gui/gtk3/protein_to_DNA/protein_to_DNA.config +6 -0
- data/lib/bioroebe/gui/gtk3/protein_to_DNA/protein_to_DNA.rb +182 -0
- data/lib/bioroebe/gui/gtk3/random_sequence/random_sequence.rb +566 -0
- data/lib/bioroebe/gui/gtk3/restriction_enzymes/restriction_enzymes.config +6 -0
- data/lib/bioroebe/gui/gtk3/restriction_enzymes/restriction_enzymes.rb +329 -0
- data/lib/bioroebe/gui/gtk3/show_codon_table/misc.rb +556 -0
- data/lib/bioroebe/gui/gtk3/show_codon_table/show_codon_table.rb +171 -0
- data/lib/bioroebe/gui/gtk3/show_codon_usage/show_codon_usage.rb +146 -0
- data/lib/bioroebe/gui/gtk3/sizeseq/sizeseq.rb +207 -0
- data/lib/bioroebe/gui/gtk3/three_to_one/three_to_one.rb +279 -0
- data/lib/bioroebe/gui/gtk3/three_to_one/title.rb +23 -0
- data/lib/bioroebe/gui/gtk3/www_finder/www_finder.config +6 -0
- data/lib/bioroebe/gui/gtk3/www_finder/www_finder.rb +368 -0
- data/lib/bioroebe/gui/jruby/blosum_matrix_viewer/blosum_matrix_viewer.rb +82 -0
- data/lib/bioroebe/gui/libui/README.md +4 -0
- data/lib/bioroebe/gui/libui/alignment/alignment.rb +114 -0
- data/lib/bioroebe/gui/libui/blosum_matrix_viewer/blosum_matrix_viewer.rb +112 -0
- data/lib/bioroebe/gui/libui/calculate_cell_numbers_of_bacteria/calculate_cell_numbers_of_bacteria.rb +60 -0
- data/lib/bioroebe/gui/libui/dna_to_aminoacid_widget/dna_to_aminoacid_widget.rb +161 -0
- data/lib/bioroebe/gui/libui/dna_to_reverse_complement_widget/dna_to_reverse_complement_widget.rb +76 -0
- data/lib/bioroebe/gui/libui/hamming_distance/hamming_distance.rb +135 -0
- data/lib/bioroebe/gui/libui/levensthein_distance/levensthein_distance.rb +118 -0
- data/lib/bioroebe/gui/libui/protein_to_DNA/protein_to_DNA.rb +115 -0
- data/lib/bioroebe/gui/libui/random_sequence/random_sequence.rb +174 -0
- data/lib/bioroebe/gui/libui/show_codon_table/show_codon_table.rb +132 -0
- data/lib/bioroebe/gui/libui/show_codon_usage/show_codon_usage.rb +89 -0
- data/lib/bioroebe/gui/libui/three_to_one/three_to_one.rb +111 -0
- data/lib/bioroebe/gui/shared_code/blosum_matrix_viewer/blosum_matrix_viewer_module.rb +42 -0
- data/lib/bioroebe/gui/shared_code/calculate_cell_numbers_of_bacteria/calculate_cell_numbers_of_bacteria_module.rb +216 -0
- data/lib/bioroebe/gui/shared_code/dna_to_reverse_complement_widget/dna_to_reverse_complement_widget_module.rb +284 -0
- data/lib/bioroebe/gui/shared_code/levensthein_distance/levensthein_distance_module.rb +402 -0
- data/lib/bioroebe/gui/shared_code/protein_to_DNA/protein_to_DNA_module.rb +192 -0
- data/lib/bioroebe/gui/shared_code/show_codon_table/show_codon_table_module.rb +72 -0
- data/lib/bioroebe/gui/shared_code/show_codon_usage/show_codon_usage_module.rb +213 -0
- data/lib/bioroebe/gui/tk/aminoacid_composition/aminoacid_composition.rb +206 -0
- data/lib/bioroebe/gui/tk/blosum_matrix_viewer/blosum_matrix_viewer.rb +140 -0
- data/lib/bioroebe/gui/tk/hamming_distance/hamming_distance.rb +262 -0
- data/lib/bioroebe/gui/tk/levensthein_distance/levensthein_distance.rb +243 -0
- data/lib/bioroebe/gui/tk/three_to_one/three_to_one.rb +199 -0
- data/lib/bioroebe/gui/unified_widgets/anti_sense_strand/anti_sense_strand.rb +519 -0
- data/lib/bioroebe/images/BIOROEBE.png +0 -0
- data/lib/bioroebe/images/BIOROEBE_NEW_LOGO.png +0 -0
- data/lib/bioroebe/images/BlosumMatrixViewer.png +0 -0
- data/lib/bioroebe/images/DnaToAminoacidWidget.png +0 -0
- data/lib/bioroebe/images/PRINTED_AMINOACID_TABLE.png +0 -0
- data/lib/bioroebe/images/class_ConvertAminoacidToDNA.png +0 -0
- data/lib/bioroebe/images/class_SimpleStringComparer.png +0 -0
- data/lib/bioroebe/images/example_of_FASTA_coloured_output.png +0 -0
- data/lib/bioroebe/images/libui_hamming_distance_widget.png +0 -0
- data/lib/bioroebe/images/pretty_DNA_picture.png +0 -0
- data/lib/bioroebe/images/primer_design_widget.png +0 -0
- data/lib/bioroebe/images/restriction_enzyme_commandline_result.png +0 -0
- data/lib/bioroebe/images/ruby-gtk_three_to_one_widget.png +0 -0
- data/lib/bioroebe/images/small_DNA_logo.png +0 -0
- data/lib/bioroebe/images/small_drosophila_image.png +0 -0
- data/lib/bioroebe/java/README.md +6 -0
- data/lib/bioroebe/java/bioroebe/AllInOne.class +0 -0
- data/lib/bioroebe/java/bioroebe/AllInOne.java +214 -0
- data/lib/bioroebe/java/bioroebe/Base.class +0 -0
- data/lib/bioroebe/java/bioroebe/Base.java +102 -0
- data/lib/bioroebe/java/bioroebe/BisulfiteTreatment.class +0 -0
- data/lib/bioroebe/java/bioroebe/BisulfiteTreatment.java +23 -0
- data/lib/bioroebe/java/bioroebe/Cat.class +0 -0
- data/lib/bioroebe/java/bioroebe/Codons.class +0 -0
- data/lib/bioroebe/java/bioroebe/Codons.java +22 -0
- data/lib/bioroebe/java/bioroebe/Esystem.class +0 -0
- data/lib/bioroebe/java/bioroebe/Esystem.java +47 -0
- data/lib/bioroebe/java/bioroebe/GUI/BaseFrame.class +0 -0
- data/lib/bioroebe/java/bioroebe/GUI/BaseFrame.java +65 -0
- data/lib/bioroebe/java/bioroebe/GenerateRandomDnaSequence.class +0 -0
- data/lib/bioroebe/java/bioroebe/GenerateRandomDnaSequence.java +32 -0
- data/lib/bioroebe/java/bioroebe/IsPalindrome.class +0 -0
- data/lib/bioroebe/java/bioroebe/IsPalindrome.java +18 -0
- data/lib/bioroebe/java/bioroebe/PartnerNucleotide.class +0 -0
- data/lib/bioroebe/java/bioroebe/PartnerNucleotide.java +19 -0
- data/lib/bioroebe/java/bioroebe/README.md +4 -0
- data/lib/bioroebe/java/bioroebe/RemoveFile.class +0 -0
- data/lib/bioroebe/java/bioroebe/RemoveFile.java +24 -0
- data/lib/bioroebe/java/bioroebe/RemoveNumbers.class +0 -0
- data/lib/bioroebe/java/bioroebe/RemoveNumbers.java +14 -0
- data/lib/bioroebe/java/bioroebe/SanitizeNucleotideSequence.class +0 -0
- data/lib/bioroebe/java/bioroebe/SanitizeNucleotideSequence.java +20 -0
- data/lib/bioroebe/java/bioroebe/SaveFile.java +44 -0
- data/lib/bioroebe/java/bioroebe/Sequence.java +28 -0
- data/lib/bioroebe/java/bioroebe/ToCamelcase.class +0 -0
- data/lib/bioroebe/java/bioroebe/ToCamelcase.java +32 -0
- data/lib/bioroebe/java/bioroebe/ToplevelMethods.class +0 -0
- data/lib/bioroebe/java/bioroebe/ToplevelMethods.java +15 -0
- data/lib/bioroebe/java/bioroebe/enums/DNA.java +6 -0
- data/lib/bioroebe/java/bioroebe.jar +0 -0
- data/lib/bioroebe/matplotlib/matplotlib_generator.rb +104 -0
- data/lib/bioroebe/misc/quiz/README.md +6 -0
- data/lib/bioroebe/misc/quiz/three_letter_to_aminoacid.rb +163 -0
- data/lib/bioroebe/misc/ruler.rb +244 -0
- data/lib/bioroebe/misc/useful_formulas.rb +129 -0
- data/lib/bioroebe/ncbi/efetch.rb +253 -0
- data/lib/bioroebe/ncbi/ncbi.rb +93 -0
- data/lib/bioroebe/ngs/README.md +2 -0
- data/lib/bioroebe/ngs/phred_quality_score_table.rb +123 -0
- data/lib/bioroebe/nucleotides/complementary_dna_strand.rb +166 -0
- data/lib/bioroebe/nucleotides/molecular_weight_of_nucleotides.rb +135 -0
- data/lib/bioroebe/nucleotides/most_likely_nucleotide_sequence_for_this_aminoacid_sequence.rb +198 -0
- data/lib/bioroebe/nucleotides/sanitize_nucleotide_sequence.rb +133 -0
- data/lib/bioroebe/nucleotides/show_nucleotide_sequence.rb +556 -0
- data/lib/bioroebe/palindromes/palindrome_2D_structure.rb +139 -0
- data/lib/bioroebe/palindromes/palindrome_finder.rb +208 -0
- data/lib/bioroebe/palindromes/palindrome_generator.rb +272 -0
- data/lib/bioroebe/parsers/biolang_parser.rb +156 -0
- data/lib/bioroebe/parsers/blosum_parser.rb +222 -0
- data/lib/bioroebe/parsers/genbank_parser.rb +78 -0
- data/lib/bioroebe/parsers/gff.rb +346 -0
- data/lib/bioroebe/parsers/parse_embl.rb +76 -0
- data/lib/bioroebe/parsers/stride_parser.rb +117 -0
- data/lib/bioroebe/patterns/README.md +5 -0
- data/lib/bioroebe/patterns/analyse_glycosylation_pattern.rb +149 -0
- data/lib/bioroebe/patterns/is_this_sequence_a_EGF2_pattern.rb +66 -0
- data/lib/bioroebe/patterns/profile_pattern.rb +182 -0
- data/lib/bioroebe/patterns/rgg_scanner.rb +160 -0
- data/lib/bioroebe/patterns/scan_for_repeat.rb +157 -0
- data/lib/bioroebe/pdb/download_this_pdb.rb +67 -0
- data/lib/bioroebe/pdb/fetch_fasta_sequence_from_pdb.rb +164 -0
- data/lib/bioroebe/pdb/parse_mmCIF_file.rb +63 -0
- data/lib/bioroebe/pdb/parse_pdb_file.rb +1086 -0
- data/lib/bioroebe/pdb/report_secondary_structures_from_this_pdb_file.rb +225 -0
- data/lib/bioroebe/perl/README.md +7 -0
- data/lib/bioroebe/perl/local_to_global.pl +694 -0
- data/lib/bioroebe/project/project.rb +264 -0
- data/lib/bioroebe/protein_structure/alpha_helix.rb +96 -0
- data/lib/bioroebe/protein_structure/helical_wheel.rb +205 -0
- data/lib/bioroebe/raw_sequence/README.md +17 -0
- data/lib/bioroebe/raw_sequence/raw_sequence.rb +557 -0
- data/lib/bioroebe/readline/README.md +2 -0
- data/lib/bioroebe/readline/readline.rb +31 -0
- data/lib/bioroebe/regexes/README.md +2 -0
- data/lib/bioroebe/regexes/regexes.rb +34 -0
- data/lib/bioroebe/requires/commandline_application.rb +5 -0
- data/lib/bioroebe/requires/require_all_aminoacids_files.rb +28 -0
- data/lib/bioroebe/requires/require_all_calculate_files.rb +26 -0
- data/lib/bioroebe/requires/require_all_codon_files.rb +26 -0
- data/lib/bioroebe/requires/require_all_colour_scheme_files.rb +26 -0
- data/lib/bioroebe/requires/require_all_count_files.rb +26 -0
- data/lib/bioroebe/requires/require_all_dotplot_files.rb +28 -0
- data/lib/bioroebe/requires/require_all_electron_microscopy_files.rb +26 -0
- data/lib/bioroebe/requires/require_all_enzymes_files.rb +28 -0
- data/lib/bioroebe/requires/require_all_fasta_and_fastq_files.rb +32 -0
- data/lib/bioroebe/requires/require_all_nucleotides_files.rb +28 -0
- data/lib/bioroebe/requires/require_all_palindromes_files.rb +29 -0
- data/lib/bioroebe/requires/require_all_parser_files.rb +28 -0
- data/lib/bioroebe/requires/require_all_pattern_files.rb +29 -0
- data/lib/bioroebe/requires/require_all_pdb_files.rb +26 -0
- data/lib/bioroebe/requires/require_all_sequence_files.rb +26 -0
- data/lib/bioroebe/requires/require_all_string_matching_files.rb +28 -0
- data/lib/bioroebe/requires/require_all_svg_files.rb +12 -0
- data/lib/bioroebe/requires/require_all_taxonomy_files.rb +35 -0
- data/lib/bioroebe/requires/require_all_utility_scripts_files.rb +32 -0
- data/lib/bioroebe/requires/require_cleave_and_digest.rb +24 -0
- data/lib/bioroebe/requires/require_colours.rb +20 -0
- data/lib/bioroebe/requires/require_encoding.rb +7 -0
- data/lib/bioroebe/requires/require_sequence.rb +7 -0
- data/lib/bioroebe/requires/require_the_bioroebe_project.rb +162 -0
- data/lib/bioroebe/requires/require_the_bioroebe_shell.rb +7 -0
- data/lib/bioroebe/requires/require_the_bioroebe_sinatra_components.rb +7 -0
- data/lib/bioroebe/requires/require_the_constants.rb +23 -0
- data/lib/bioroebe/requires/require_the_toplevel_methods.rb +29 -0
- data/lib/bioroebe/requires/require_yaml.rb +94 -0
- data/lib/bioroebe/sequence/alignment.rb +214 -0
- data/lib/bioroebe/sequence/dna.rb +211 -0
- data/lib/bioroebe/sequence/nucleotide_module/nucleotide_module.rb +404 -0
- data/lib/bioroebe/sequence/protein.rb +281 -0
- data/lib/bioroebe/sequence/reverse_complement.rb +148 -0
- data/lib/bioroebe/sequence/sequence.rb +706 -0
- data/lib/bioroebe/shell/add.rb +108 -0
- data/lib/bioroebe/shell/assign.rb +360 -0
- data/lib/bioroebe/shell/chop_and_cut.rb +281 -0
- data/lib/bioroebe/shell/colours/colours.rb +235 -0
- data/lib/bioroebe/shell/configuration/additionally_set_xorg_buffer.yml +1 -0
- data/lib/bioroebe/shell/configuration/may_we_show_the_startup_information.yml +1 -0
- data/lib/bioroebe/shell/configuration/upcase_nucleotides.yml +1 -0
- data/lib/bioroebe/shell/configuration/use_silent_startup.yml +1 -0
- data/lib/bioroebe/shell/constants.rb +166 -0
- data/lib/bioroebe/shell/download.rb +335 -0
- data/lib/bioroebe/shell/enable_and_disable.rb +158 -0
- data/lib/bioroebe/shell/enzymes.rb +310 -0
- data/lib/bioroebe/shell/fasta.rb +345 -0
- data/lib/bioroebe/shell/gtk.rb +76 -0
- data/lib/bioroebe/shell/help/class.rb +443 -0
- data/lib/bioroebe/shell/help/help.rb +25 -0
- data/lib/bioroebe/shell/history.rb +132 -0
- data/lib/bioroebe/shell/initialize.rb +217 -0
- data/lib/bioroebe/shell/loop.rb +74 -0
- data/lib/bioroebe/shell/menu.rb +5320 -0
- data/lib/bioroebe/shell/misc.rb +4341 -0
- data/lib/bioroebe/shell/prompt.rb +107 -0
- data/lib/bioroebe/shell/random.rb +289 -0
- data/lib/bioroebe/shell/readline/readline.rb +91 -0
- data/lib/bioroebe/shell/reset.rb +335 -0
- data/lib/bioroebe/shell/scan_and_parse.rb +135 -0
- data/lib/bioroebe/shell/search.rb +337 -0
- data/lib/bioroebe/shell/sequences.rb +200 -0
- data/lib/bioroebe/shell/shell.rb +41 -0
- data/lib/bioroebe/shell/show_report_and_display.rb +2901 -0
- data/lib/bioroebe/shell/startup.rb +127 -0
- data/lib/bioroebe/shell/taxonomy.rb +14 -0
- data/lib/bioroebe/shell/tk.rb +23 -0
- data/lib/bioroebe/shell/user_input.rb +88 -0
- data/lib/bioroebe/shell/xorg.rb +45 -0
- data/lib/bioroebe/siRNA/README.md +2 -0
- data/lib/bioroebe/siRNA/siRNA.rb +93 -0
- data/lib/bioroebe/string_matching/README.md +13 -0
- data/lib/bioroebe/string_matching/find_longest_substring.rb +162 -0
- data/lib/bioroebe/string_matching/find_longest_substring_via_LCS_algorithm.rb +175 -0
- data/lib/bioroebe/string_matching/hamming_distance.rb +313 -0
- data/lib/bioroebe/string_matching/levensthein.rb +698 -0
- data/lib/bioroebe/string_matching/simple_string_comparer.rb +294 -0
- data/lib/bioroebe/string_matching/smith_waterman.rb +276 -0
- data/lib/bioroebe/svg/README.md +1 -0
- data/lib/bioroebe/svg/glyph.rb +719 -0
- data/lib/bioroebe/svg/mini_feature.rb +111 -0
- data/lib/bioroebe/svg/page.rb +570 -0
- data/lib/bioroebe/svg/primitive.rb +70 -0
- data/lib/bioroebe/svg/svgee.rb +326 -0
- data/lib/bioroebe/svg/track.rb +263 -0
- data/lib/bioroebe/taxonomy/README.md +1 -0
- data/lib/bioroebe/taxonomy/chart.rb +95 -0
- data/lib/bioroebe/taxonomy/class_methods.rb +181 -0
- data/lib/bioroebe/taxonomy/colours.rb +26 -0
- data/lib/bioroebe/taxonomy/constants.rb +218 -0
- data/lib/bioroebe/taxonomy/edit.rb +97 -0
- data/lib/bioroebe/taxonomy/help/help.rb +65 -0
- data/lib/bioroebe/taxonomy/help/helpline.rb +53 -0
- data/lib/bioroebe/taxonomy/info/check_available.rb +143 -0
- data/lib/bioroebe/taxonomy/info/info.rb +337 -0
- data/lib/bioroebe/taxonomy/info/is_dna.rb +150 -0
- data/lib/bioroebe/taxonomy/interactive.rb +1933 -0
- data/lib/bioroebe/taxonomy/menu.rb +905 -0
- data/lib/bioroebe/taxonomy/node.rb +118 -0
- data/lib/bioroebe/taxonomy/parse_fasta.rb +383 -0
- data/lib/bioroebe/taxonomy/shared.rb +287 -0
- data/lib/bioroebe/taxonomy/taxonomy.rb +521 -0
- data/lib/bioroebe/toplevel_methods/ad_hoc_task.rb +56 -0
- data/lib/bioroebe/toplevel_methods/aminoacids_and_proteins.rb +715 -0
- data/lib/bioroebe/toplevel_methods/atomic_composition.rb +198 -0
- data/lib/bioroebe/toplevel_methods/base_composition.rb +121 -0
- data/lib/bioroebe/toplevel_methods/blast.rb +153 -0
- data/lib/bioroebe/toplevel_methods/calculate_n50_value.rb +57 -0
- data/lib/bioroebe/toplevel_methods/cat.rb +71 -0
- data/lib/bioroebe/toplevel_methods/chunked_display.rb +92 -0
- data/lib/bioroebe/toplevel_methods/cliner.rb +81 -0
- data/lib/bioroebe/toplevel_methods/complement.rb +58 -0
- data/lib/bioroebe/toplevel_methods/convert_global_env.rb +39 -0
- data/lib/bioroebe/toplevel_methods/databases.rb +73 -0
- data/lib/bioroebe/toplevel_methods/delimiter.rb +19 -0
- data/lib/bioroebe/toplevel_methods/digest.rb +71 -0
- data/lib/bioroebe/toplevel_methods/download_and_fetch_data.rb +146 -0
- data/lib/bioroebe/toplevel_methods/e.rb +20 -0
- data/lib/bioroebe/toplevel_methods/editor.rb +21 -0
- data/lib/bioroebe/toplevel_methods/esystem.rb +22 -0
- data/lib/bioroebe/toplevel_methods/exponential_growth.rb +74 -0
- data/lib/bioroebe/toplevel_methods/extract.rb +56 -0
- data/lib/bioroebe/toplevel_methods/fasta_and_fastq.rb +353 -0
- data/lib/bioroebe/toplevel_methods/file_and_directory_related_actions.rb +257 -0
- data/lib/bioroebe/toplevel_methods/frequencies.rb +92 -0
- data/lib/bioroebe/toplevel_methods/hamming_distance.rb +60 -0
- data/lib/bioroebe/toplevel_methods/infer.rb +66 -0
- data/lib/bioroebe/toplevel_methods/is_on_roebe.rb +39 -0
- data/lib/bioroebe/toplevel_methods/leading_five_prime_and_trailing_three_prime.rb +101 -0
- data/lib/bioroebe/toplevel_methods/levensthein.rb +63 -0
- data/lib/bioroebe/toplevel_methods/log_directory.rb +109 -0
- data/lib/bioroebe/toplevel_methods/longest_common_substring.rb +55 -0
- data/lib/bioroebe/toplevel_methods/map_ncbi_entry_to_eutils_id.rb +88 -0
- data/lib/bioroebe/toplevel_methods/matches.rb +259 -0
- data/lib/bioroebe/toplevel_methods/misc.rb +432 -0
- data/lib/bioroebe/toplevel_methods/nucleotides.rb +715 -0
- data/lib/bioroebe/toplevel_methods/number_of_clones.rb +63 -0
- data/lib/bioroebe/toplevel_methods/open_in_browser.rb +77 -0
- data/lib/bioroebe/toplevel_methods/open_reading_frames.rb +236 -0
- data/lib/bioroebe/toplevel_methods/opn.rb +34 -0
- data/lib/bioroebe/toplevel_methods/palindromes.rb +127 -0
- data/lib/bioroebe/toplevel_methods/parse.rb +59 -0
- data/lib/bioroebe/toplevel_methods/phred_error_probability.rb +68 -0
- data/lib/bioroebe/toplevel_methods/rds.rb +24 -0
- data/lib/bioroebe/toplevel_methods/remove.rb +86 -0
- data/lib/bioroebe/toplevel_methods/return_source_code_of_this_method.rb +35 -0
- data/lib/bioroebe/toplevel_methods/return_subsequence_based_on_indices.rb +68 -0
- data/lib/bioroebe/toplevel_methods/rna_splicing.rb +73 -0
- data/lib/bioroebe/toplevel_methods/rnalfold.rb +69 -0
- data/lib/bioroebe/toplevel_methods/searching_and_finding.rb +116 -0
- data/lib/bioroebe/toplevel_methods/shuffleseq.rb +37 -0
- data/lib/bioroebe/toplevel_methods/statistics.rb +53 -0
- data/lib/bioroebe/toplevel_methods/sum_of_odd_integers.rb +62 -0
- data/lib/bioroebe/toplevel_methods/taxonomy.rb +187 -0
- data/lib/bioroebe/toplevel_methods/three_delimiter.rb +34 -0
- data/lib/bioroebe/toplevel_methods/time_and_date.rb +53 -0
- data/lib/bioroebe/toplevel_methods/to_camelcase.rb +26 -0
- data/lib/bioroebe/toplevel_methods/truncate.rb +48 -0
- data/lib/bioroebe/toplevel_methods/url.rb +36 -0
- data/lib/bioroebe/toplevel_methods/verbose.rb +59 -0
- data/lib/bioroebe/utility_scripts/align_open_reading_frames.rb +191 -0
- data/lib/bioroebe/utility_scripts/analyse_local_dataset.rb +119 -0
- data/lib/bioroebe/utility_scripts/check_for_mismatches/check_for_mismatches.rb +230 -0
- data/lib/bioroebe/utility_scripts/compacter.rb +131 -0
- data/lib/bioroebe/utility_scripts/compseq/compseq.rb +529 -0
- data/lib/bioroebe/utility_scripts/consensus_sequence.rb +374 -0
- data/lib/bioroebe/utility_scripts/create_batch_entrez_file.rb +130 -0
- data/lib/bioroebe/utility_scripts/determine_antigenic_areas.rb +115 -0
- data/lib/bioroebe/utility_scripts/determine_missing_nucleotides_percentage.rb +137 -0
- data/lib/bioroebe/utility_scripts/display_open_reading_frames/determine.rb +73 -0
- data/lib/bioroebe/utility_scripts/display_open_reading_frames/display_open_reading_frames.rb +31 -0
- data/lib/bioroebe/utility_scripts/display_open_reading_frames/initialize.rb +37 -0
- data/lib/bioroebe/utility_scripts/display_open_reading_frames/menu.rb +49 -0
- data/lib/bioroebe/utility_scripts/display_open_reading_frames/misc.rb +471 -0
- data/lib/bioroebe/utility_scripts/display_open_reading_frames/report.rb +113 -0
- data/lib/bioroebe/utility_scripts/display_open_reading_frames/reset.rb +56 -0
- data/lib/bioroebe/utility_scripts/dot_alignment.rb +177 -0
- data/lib/bioroebe/utility_scripts/download_files_from_rebase.rb +72 -0
- data/lib/bioroebe/utility_scripts/find_gene.rb +202 -0
- data/lib/bioroebe/utility_scripts/mirror_repeat.rb +235 -0
- data/lib/bioroebe/utility_scripts/move_file_to_its_correct_location.rb +151 -0
- data/lib/bioroebe/utility_scripts/parse_taxonomy.rb +168 -0
- data/lib/bioroebe/utility_scripts/pathways.rb +152 -0
- data/lib/bioroebe/utility_scripts/permutations.rb +145 -0
- data/lib/bioroebe/utility_scripts/punnet/punnet.rb +126 -0
- data/lib/bioroebe/utility_scripts/show_this_dna_sequence.rb +148 -0
- data/lib/bioroebe/utility_scripts/showorf/constants.rb +36 -0
- data/lib/bioroebe/utility_scripts/showorf/help.rb +33 -0
- data/lib/bioroebe/utility_scripts/showorf/initialize.rb +52 -0
- data/lib/bioroebe/utility_scripts/showorf/menu.rb +68 -0
- data/lib/bioroebe/utility_scripts/showorf/reset.rb +39 -0
- data/lib/bioroebe/utility_scripts/showorf/run.rb +152 -0
- data/lib/bioroebe/utility_scripts/showorf/show.rb +97 -0
- data/lib/bioroebe/utility_scripts/showorf/showorf.rb +488 -0
- data/lib/bioroebe/version/version.rb +44 -0
- data/lib/bioroebe/viennarna/README.md +3 -0
- data/lib/bioroebe/viennarna/rnafold_wrapper.rb +196 -0
- data/lib/bioroebe/with_gui.rb +18 -0
- data/lib/bioroebe/www/bioroebe.cgi +44 -0
- data/lib/bioroebe/www/embeddable_interface.rb +686 -0
- data/lib/bioroebe/www/sinatra/sinatra.rb +1013 -0
- data/lib/bioroebe/yaml/agarose/agarose_concentrations.yml +21 -0
- data/lib/bioroebe/yaml/aminoacids/amino_acids.yml +92 -0
- data/lib/bioroebe/yaml/aminoacids/amino_acids_abbreviations.yml +31 -0
- data/lib/bioroebe/yaml/aminoacids/amino_acids_average_mass_table.yml +33 -0
- data/lib/bioroebe/yaml/aminoacids/amino_acids_classification.yml +18 -0
- data/lib/bioroebe/yaml/aminoacids/amino_acids_english.yml +26 -0
- data/lib/bioroebe/yaml/aminoacids/amino_acids_frequency.yml +44 -0
- data/lib/bioroebe/yaml/aminoacids/amino_acids_long_name_to_one_letter.yml +61 -0
- data/lib/bioroebe/yaml/aminoacids/amino_acids_molecular_formula.yml +32 -0
- data/lib/bioroebe/yaml/aminoacids/amino_acids_monoisotopic_mass_table.yml +38 -0
- data/lib/bioroebe/yaml/aminoacids/amino_acids_reste.yml +35 -0
- data/lib/bioroebe/yaml/aminoacids/amino_acids_three_to_one.yml +34 -0
- data/lib/bioroebe/yaml/aminoacids/hydropathy_table.yml +44 -0
- data/lib/bioroebe/yaml/aminoacids/molecular_weight.yml +29 -0
- data/lib/bioroebe/yaml/aminoacids/simple_aminoacids.yml +66 -0
- data/lib/bioroebe/yaml/aminoacids/weight_of_common_proteins.yml +33 -0
- data/lib/bioroebe/yaml/antisense/antisense.yml +9 -0
- data/lib/bioroebe/yaml/base_composition_of_dna.yml +37 -0
- data/lib/bioroebe/yaml/blosum/blosum45.yml +36 -0
- data/lib/bioroebe/yaml/blosum/blosum50.yml +34 -0
- data/lib/bioroebe/yaml/blosum/blosum62.yml +35 -0
- data/lib/bioroebe/yaml/blosum/blosum80.yml +37 -0
- data/lib/bioroebe/yaml/blosum/blosum90.yml +36 -0
- data/lib/bioroebe/yaml/blosum/blosum_matrix.yml +200 -0
- data/lib/bioroebe/yaml/chromosomes/chromosome_numbers.yml +30 -0
- data/lib/bioroebe/yaml/codon_tables/1.yml +113 -0
- data/lib/bioroebe/yaml/codon_tables/10.yml +89 -0
- data/lib/bioroebe/yaml/codon_tables/11.yml +91 -0
- data/lib/bioroebe/yaml/codon_tables/12.yml +89 -0
- data/lib/bioroebe/yaml/codon_tables/13.yml +89 -0
- data/lib/bioroebe/yaml/codon_tables/14.yml +89 -0
- data/lib/bioroebe/yaml/codon_tables/15.yml +94 -0
- data/lib/bioroebe/yaml/codon_tables/16.yml +89 -0
- data/lib/bioroebe/yaml/codon_tables/2.yml +86 -0
- data/lib/bioroebe/yaml/codon_tables/21.yml +89 -0
- data/lib/bioroebe/yaml/codon_tables/22.yml +89 -0
- data/lib/bioroebe/yaml/codon_tables/23.yml +91 -0
- data/lib/bioroebe/yaml/codon_tables/24.yml +89 -0
- data/lib/bioroebe/yaml/codon_tables/25.yml +89 -0
- data/lib/bioroebe/yaml/codon_tables/26.yml +96 -0
- data/lib/bioroebe/yaml/codon_tables/27.yml +104 -0
- data/lib/bioroebe/yaml/codon_tables/28.yml +97 -0
- data/lib/bioroebe/yaml/codon_tables/29.yml +93 -0
- data/lib/bioroebe/yaml/codon_tables/3.yml +98 -0
- data/lib/bioroebe/yaml/codon_tables/30.yml +91 -0
- data/lib/bioroebe/yaml/codon_tables/31.yml +94 -0
- data/lib/bioroebe/yaml/codon_tables/33.yml +101 -0
- data/lib/bioroebe/yaml/codon_tables/4.yml +96 -0
- data/lib/bioroebe/yaml/codon_tables/5.yml +100 -0
- data/lib/bioroebe/yaml/codon_tables/6.yml +96 -0
- data/lib/bioroebe/yaml/codon_tables/9.yml +97 -0
- data/lib/bioroebe/yaml/codon_tables/overview.yml +42 -0
- data/lib/bioroebe/yaml/configuration/README.md +12 -0
- data/lib/bioroebe/yaml/configuration/browser.yml +1 -0
- data/lib/bioroebe/yaml/configuration/colourize_fasta_sequences.yml +14 -0
- data/lib/bioroebe/yaml/configuration/default_colours_for_the_aminoacids.yml +28 -0
- data/lib/bioroebe/yaml/configuration/temp_dir.yml +1 -0
- data/lib/bioroebe/yaml/configuration/try_to_use_matplotlib.yml +1 -0
- data/lib/bioroebe/yaml/configuration/use_opn.yml +1 -0
- data/lib/bioroebe/yaml/configuration/use_this_database.yml +1 -0
- data/lib/bioroebe/yaml/create_these_directories_on_startup/create_these_directories_on_startup.yml +9 -0
- data/lib/bioroebe/yaml/default_dna_input.yml +3 -0
- data/lib/bioroebe/yaml/enzymes/enzyme_classes.yml +15 -0
- data/lib/bioroebe/yaml/enzymes/pH-Optima.yml +11 -0
- data/lib/bioroebe/yaml/fasta_and_fastq/fastq_quality_schemes.yml +44 -0
- data/lib/bioroebe/yaml/genomes/README.md +16 -0
- data/lib/bioroebe/yaml/humans/README.md +2 -0
- data/lib/bioroebe/yaml/humans/human_chromosomes.yml +53 -0
- data/lib/bioroebe/yaml/laboratory/README.md +1 -0
- data/lib/bioroebe/yaml/laboratory/pipettes.yml +8 -0
- data/lib/bioroebe/yaml/mRNA/mRNA.yml +16 -0
- data/lib/bioroebe/yaml/nuclear_localization_sequences.yml +15 -0
- data/lib/bioroebe/yaml/nucleotides/abbreviations_for_nucleotides.yml +29 -0
- data/lib/bioroebe/yaml/nucleotides/nucleotide_density.yml +10 -0
- data/lib/bioroebe/yaml/nucleotides/nucleotides.yml +34 -0
- data/lib/bioroebe/yaml/nucleotides/nucleotides_weight.yml +12 -0
- data/lib/bioroebe/yaml/pathways/README.md +2 -0
- data/lib/bioroebe/yaml/pathways/citric_acid_cycle.yml +16 -0
- data/lib/bioroebe/yaml/pathways/glycolysis.yml +20 -0
- data/lib/bioroebe/yaml/pathways/shikimate_pathway.yml +23 -0
- data/lib/bioroebe/yaml/pathways/urea_cycle.yml +11 -0
- data/lib/bioroebe/yaml/primers/README.md +4 -0
- data/lib/bioroebe/yaml/primers/primers.yml +3 -0
- data/lib/bioroebe/yaml/promoters/35S.yml +15 -0
- data/lib/bioroebe/yaml/promoters/strong_promoters.yml +24 -0
- data/lib/bioroebe/yaml/proteases/proteases.yml +3 -0
- data/lib/bioroebe/yaml/proteins/ubiquitin.yml +4 -0
- data/lib/bioroebe/yaml/remote_urls/README.md +4 -0
- data/lib/bioroebe/yaml/remote_urls/remote_urls.yml +3 -0
- data/lib/bioroebe/yaml/restriction_enzymes/restriction_enzymes.yml +630 -0
- data/lib/bioroebe/yaml/sequences/JX472995_Green_fluorescent_protein_from_Aequorea_victoria.fasta +14 -0
- data/lib/bioroebe/yaml/sequences/README.md +2 -0
- data/lib/bioroebe/yaml/talens.yml +22 -0
- data/lib/bioroebe/yaml/viruses/ecoli_phages.yml +63 -0
- data/lib/bioroebe/yaml/viruses/viruses.yml +6 -0
- data/lib/bioroebe.rb +5 -0
- data/spec/testing_toplevel_method_editor.rb +20 -0
- data/spec/testing_toplevel_method_url.rb +15 -0
- data/spec/testing_toplevel_method_verbose.rb +13 -0
- data/test/advanced_svg_example.rb +307 -0
- data/test/testing_bioroebe.rb +25 -0
- data/test/testing_codons.rb +45 -0
- data/test/testing_dna_to_rna_conversion.rb +15 -0
- data/test/testing_parse_pdb_file.rb +23 -0
- data/test/testing_reverse_complement.rb +32 -0
- data/test/testing_svg_component_of_bioroebe.rb +311 -0
- data/test/testing_svg_component_of_bioroebe_from_json_dataset.rb +34 -0
- data/test/testing_taxonomy.rb +22 -0
- metadata +1059 -0
@@ -0,0 +1,1933 @@
|
|
1
|
+
#!/usr/bin/ruby -w
|
2
|
+
# Encoding: UTF-8
|
3
|
+
# frozen_string_literal: true
|
4
|
+
# =========================================================================== #
|
5
|
+
# === Bioroebe::Taxonomy::Interactive
|
6
|
+
#
|
7
|
+
# This bundles together various taxonomy-related code portions.
|
8
|
+
#
|
9
|
+
# This file can be used interactively. It provides the central
|
10
|
+
# point of entry for the taxonomy module.
|
11
|
+
# =========================================================================== #
|
12
|
+
require 'bioroebe/base/commandline_application/commandline_application.rb'
|
13
|
+
|
14
|
+
module Bioroebe
|
15
|
+
|
16
|
+
module Taxonomy
|
17
|
+
|
18
|
+
class Interactive < ::Bioroebe::CommandlineApplication # === Bioroebe::Taxonomy::Interactive
|
19
|
+
|
20
|
+
require 'bioroebe/taxonomy/shared.rb'
|
21
|
+
require 'bioroebe/taxonomy/class_methods.rb'
|
22
|
+
require 'bioroebe/taxonomy/info/check_available.rb'
|
23
|
+
require 'bioroebe/taxonomy/info/is_dna.rb'
|
24
|
+
require 'bioroebe/taxonomy/info/info.rb'
|
25
|
+
require 'bioroebe/taxonomy/help/help.rb'
|
26
|
+
require 'bioroebe/taxonomy/menu.rb'
|
27
|
+
|
28
|
+
begin
|
29
|
+
require 'readline' # Enable readline here, if available.
|
30
|
+
rescue LoadError; end
|
31
|
+
|
32
|
+
begin
|
33
|
+
require 'cliner' # Needed in this file here.
|
34
|
+
rescue LoadError; end
|
35
|
+
|
36
|
+
begin
|
37
|
+
require 'stat_file'
|
38
|
+
rescue LoadError; end
|
39
|
+
|
40
|
+
include Taxonomy # Add the main namespace here.
|
41
|
+
include Taxonomy::Shared
|
42
|
+
if Object.const_defined?(:Roebe) and
|
43
|
+
Roebe.const_defined?(:SqlParadise)
|
44
|
+
include ::Roebe::SqlParadise
|
45
|
+
end
|
46
|
+
|
47
|
+
# ========================================================================= #
|
48
|
+
# The SQL-relevant parts come next.
|
49
|
+
# ========================================================================= #
|
50
|
+
begin
|
51
|
+
require 'roebe/sql_paradise'
|
52
|
+
Roebe::SqlParadise::Commands.set_temp_dir ::Bioroebe.log_dir?
|
53
|
+
rescue LoadError; end
|
54
|
+
|
55
|
+
# ========================================================================= #
|
56
|
+
# === NAMESPACE
|
57
|
+
# ========================================================================= #
|
58
|
+
NAMESPACE = inspect
|
59
|
+
|
60
|
+
# ========================================================================= #
|
61
|
+
# === FASTA_SQL
|
62
|
+
# ========================================================================= #
|
63
|
+
FASTA_SQL = AUTOGENERATED_SQL_FILES_DIR+'fasta.sql'
|
64
|
+
|
65
|
+
# ========================================================================= #
|
66
|
+
# === BE_VERBOSE
|
67
|
+
#
|
68
|
+
# Constants for this class here.
|
69
|
+
# ========================================================================= #
|
70
|
+
BE_VERBOSE = true # This can be disabled via 'silent' from the commandline.
|
71
|
+
|
72
|
+
# ========================================================================= #
|
73
|
+
# === USE_COLOURS
|
74
|
+
#
|
75
|
+
# Whether to have colourized output or not.
|
76
|
+
# ========================================================================= #
|
77
|
+
USE_COLOURS = true # This can be disabled via 'nocolours'.
|
78
|
+
|
79
|
+
# ========================================================================= #
|
80
|
+
# === NO_HELP
|
81
|
+
# ========================================================================= #
|
82
|
+
NO_HELP = false # This can be disabled via 'nohelp'.
|
83
|
+
|
84
|
+
# ========================================================================= #
|
85
|
+
# === initialize
|
86
|
+
#
|
87
|
+
# The first argument are the commandline arguments.
|
88
|
+
# ========================================================================= #
|
89
|
+
def initialize(
|
90
|
+
optional_commandline_arguments = nil,
|
91
|
+
run_already = true
|
92
|
+
)
|
93
|
+
reset
|
94
|
+
set_commandline_arguments(
|
95
|
+
optional_commandline_arguments
|
96
|
+
)
|
97
|
+
run if run_already
|
98
|
+
end
|
99
|
+
|
100
|
+
# ========================================================================= #
|
101
|
+
# === reset
|
102
|
+
# ========================================================================= #
|
103
|
+
def reset
|
104
|
+
super()
|
105
|
+
# ======================================================================= #
|
106
|
+
# === @namespace
|
107
|
+
# ======================================================================= #
|
108
|
+
@namespace = NAMESPACE
|
109
|
+
# ======================================================================= #
|
110
|
+
# === @be_verbose
|
111
|
+
# ======================================================================= #
|
112
|
+
@be_verbose = BE_VERBOSE
|
113
|
+
# ======================================================================= #
|
114
|
+
# === @use_colours
|
115
|
+
# ======================================================================= #
|
116
|
+
@use_colours = USE_COLOURS
|
117
|
+
# ======================================================================= #
|
118
|
+
# === @no_help
|
119
|
+
# ======================================================================= #
|
120
|
+
@no_help = false
|
121
|
+
# ======================================================================= #
|
122
|
+
# === @run_standalone
|
123
|
+
# ======================================================================= #
|
124
|
+
@run_standalone = true
|
125
|
+
end
|
126
|
+
|
127
|
+
# ========================================================================= #
|
128
|
+
# === get (get tag)
|
129
|
+
#
|
130
|
+
# Get something from the localome table with this method.
|
131
|
+
# ========================================================================= #
|
132
|
+
def get(
|
133
|
+
i,
|
134
|
+
use_this_id = nil
|
135
|
+
)
|
136
|
+
i = i.to_s
|
137
|
+
_ = "SELECT #{i} from fasta".dup
|
138
|
+
_ << " where taxid='"+use_this_id.to_s+"';" if use_this_id
|
139
|
+
if i.include? 'taxid' # Sort by taxid in this case.
|
140
|
+
_ << ' ORDER BY taxid'
|
141
|
+
end
|
142
|
+
_ << ';'
|
143
|
+
result = run_sql(_, :silent, :tuples)
|
144
|
+
return result
|
145
|
+
end
|
146
|
+
|
147
|
+
# ========================================================================= #
|
148
|
+
# === taxtree
|
149
|
+
#
|
150
|
+
# Obtain the taxtree, then display it. Input to this should be the
|
151
|
+
# Taxonomy ID. We will first check the localome table, then query
|
152
|
+
# from the two ncbi tables.
|
153
|
+
#
|
154
|
+
# To test this method, try:
|
155
|
+
#
|
156
|
+
# taxtree 106583
|
157
|
+
# taxtree 77166
|
158
|
+
#
|
159
|
+
# ========================================================================= #
|
160
|
+
def taxtree(i)
|
161
|
+
i = i.to_s
|
162
|
+
e 'The lineage for Taxonomy ID '+simp(i)+' is as follows:'
|
163
|
+
if has? i # Ok, the localome table has this entry.
|
164
|
+
lineage_ids = get('lineage_ids', i).strip.split('->').map(&:strip)
|
165
|
+
else
|
166
|
+
lineage_ids = return_full_lineage_of(i)
|
167
|
+
lineage_ids.map! {|entry| entry[0]}
|
168
|
+
end
|
169
|
+
report_tree_lineage(lineage_ids)
|
170
|
+
end
|
171
|
+
|
172
|
+
# ========================================================================= #
|
173
|
+
# === report_tree_lineage
|
174
|
+
#
|
175
|
+
# This method expects an Array as input, which contains all the IDs
|
176
|
+
# that we will report in a tree (hence the name tree here).
|
177
|
+
# ========================================================================= #
|
178
|
+
def report_tree_lineage(i)
|
179
|
+
i.each_with_index {|entry, index|
|
180
|
+
padding = ' ' * (index+1)
|
181
|
+
e padding+entry.to_s
|
182
|
+
}
|
183
|
+
end
|
184
|
+
|
185
|
+
# ========================================================================= #
|
186
|
+
# === remove_taxid_from_localome_table
|
187
|
+
#
|
188
|
+
# This method will remove a TaxID from the localome table.
|
189
|
+
# ========================================================================= #
|
190
|
+
def remove_taxid_from_localome_table(i)
|
191
|
+
i = i.to_s
|
192
|
+
if has_id? i
|
193
|
+
# Ok we found an entry, thus we can remove it now:
|
194
|
+
run_sql "
|
195
|
+
DELETE FROM fasta
|
196
|
+
WHERE taxid='"+i+"';
|
197
|
+
"
|
198
|
+
if be_verbose?
|
199
|
+
e 'Removed entry '+simp(i)+
|
200
|
+
' (Taxonomy ID) from the localome (fasta) table.'
|
201
|
+
end
|
202
|
+
else
|
203
|
+
e "Could not find TaxID #{simp(i)}." if be_verbose?
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
# ========================================================================= #
|
208
|
+
# === update_local_localomes
|
209
|
+
#
|
210
|
+
# Update the local collection. This will overwrite the old dataset
|
211
|
+
# completely, so be careful when using this.
|
212
|
+
# ========================================================================= #
|
213
|
+
def update_local_localomes
|
214
|
+
show_current_time
|
215
|
+
create_fasta_table # Get in a new, fresh table.
|
216
|
+
_ = get_all_info_entries_with_tax_id
|
217
|
+
show_current_time
|
218
|
+
_.each {|entry|
|
219
|
+
e 'Next working on '+sfile(entry)
|
220
|
+
create_sql_file_from_local_fasta_entry(entry)
|
221
|
+
}
|
222
|
+
report_how_many_info_files_exist_and_how_many_lack_taxonomy_id
|
223
|
+
old_time = @time
|
224
|
+
show_time_now # Show the end time.
|
225
|
+
new_time = @time
|
226
|
+
difference = Time.parse(new_time) - Time.parse(old_time)
|
227
|
+
report_how_long_it_took_us(difference, 'localome table')
|
228
|
+
end
|
229
|
+
|
230
|
+
# ========================================================================= #
|
231
|
+
# === try_to_display_the_status
|
232
|
+
# ========================================================================= #
|
233
|
+
def try_to_display_the_status(i = nil)
|
234
|
+
if i # if an argument was provided
|
235
|
+
StatFile.new(i) if Object.const_defined?(:StatFile)
|
236
|
+
else
|
237
|
+
Taxonomy.status?
|
238
|
+
Info.status
|
239
|
+
end
|
240
|
+
end
|
241
|
+
|
242
|
+
# ========================================================================= #
|
243
|
+
# === generate (generate tag)
|
244
|
+
#
|
245
|
+
# This method can be used to generate nodes or names .sql files.
|
246
|
+
# These .sql files will be generated in the TEMP_DIR, which
|
247
|
+
# at the time of writing this documentation is at
|
248
|
+
# /tmp/robert.
|
249
|
+
# ========================================================================= #
|
250
|
+
def generate(i)
|
251
|
+
i = i.to_s
|
252
|
+
case i # case tag
|
253
|
+
when 'nodes','names','nodes.sql','names.sql'
|
254
|
+
i.gsub!(/\.sql/,'')
|
255
|
+
menu(i) # Delegate towards the main menu here.
|
256
|
+
end
|
257
|
+
end; alias create generate # === create
|
258
|
+
|
259
|
+
# ========================================================================= #
|
260
|
+
# === show_all_eukarya
|
261
|
+
# ========================================================================= #
|
262
|
+
def show_all_eukarya(
|
263
|
+
optional_show_path = false
|
264
|
+
)
|
265
|
+
|
266
|
+
e 'We found these Eukaryota in the localomes Database:'+N+N
|
267
|
+
if optional_show_path.is_a? String
|
268
|
+
_ = select_name_and_tax_id_and_lineage_ids_and_path # 0,1,2,3
|
269
|
+
else
|
270
|
+
_ = select_name_and_tax_id_and_lineage_ids
|
271
|
+
end
|
272
|
+
|
273
|
+
n_entries = _.split(N).size
|
274
|
+
result = []
|
275
|
+
counter = 0
|
276
|
+
_.split(N).each_with_index {|entry|
|
277
|
+
splitted = entry.split('|')
|
278
|
+
lineage = splitted[2]
|
279
|
+
if lineage =~ / #{Eukaryota_Taxonomy_ID} /
|
280
|
+
scientific_name = splitted.first.strip
|
281
|
+
counter += 1
|
282
|
+
result = simp(counter.to_s)+') '+scientific_name+
|
283
|
+
' ('+sfancy('TaxID')+': '+splitted[1].to_s.strip+')'
|
284
|
+
if optional_show_path.is_a? String
|
285
|
+
result << (' Path: '+splitted[3]).ljust(40) # Append the path.
|
286
|
+
end
|
287
|
+
e result
|
288
|
+
result << scientific_name
|
289
|
+
else # debug
|
290
|
+
end
|
291
|
+
}
|
292
|
+
e N+N+'Out of '+sfancy(n_entries.to_s)+' registered '\
|
293
|
+
'entries in total in the localomes database,'
|
294
|
+
e sfancy(result.size.to_s)+' belong to Eukaryota (the Taxonomy '\
|
295
|
+
'ID of Eukaryota is '+sfancy(Eukaryota_Taxonomy_ID.to_s)+').'
|
296
|
+
array = _.split(N).map {|entry| entry.split('|')[1].strip}
|
297
|
+
report_total_amount_of_proteomes(array)
|
298
|
+
end
|
299
|
+
|
300
|
+
# ========================================================================= #
|
301
|
+
# === query_from_localome_table
|
302
|
+
#
|
303
|
+
# This will display the result as well.
|
304
|
+
# ========================================================================= #
|
305
|
+
def query_from_localome_table(i)
|
306
|
+
e get(i)
|
307
|
+
end
|
308
|
+
|
309
|
+
# ========================================================================= #
|
310
|
+
# === disable_colours
|
311
|
+
# ========================================================================= #
|
312
|
+
def disable_colours
|
313
|
+
@use_colours = false
|
314
|
+
end
|
315
|
+
|
316
|
+
# ========================================================================= #
|
317
|
+
# === try_to_show_when_the_last_update_has_happened
|
318
|
+
# ========================================================================= #
|
319
|
+
def try_to_show_when_the_last_update_has_happened
|
320
|
+
e cat(SAVE_FILE) if File.exist? SAVE_FILE
|
321
|
+
end
|
322
|
+
|
323
|
+
# ========================================================================= #
|
324
|
+
# === query_whether_we_have_this_id(i)
|
325
|
+
#
|
326
|
+
# We query whether we have a specific ID or whether we don't.
|
327
|
+
# ========================================================================= #
|
328
|
+
def query_whether_we_have_this_id(i)
|
329
|
+
if has_id? i
|
330
|
+
e 'We do have the id '+simp(i.to_s)+'.'
|
331
|
+
else
|
332
|
+
e 'We do not have the id '+simp(i.to_s)+'.'
|
333
|
+
end
|
334
|
+
end
|
335
|
+
|
336
|
+
# ========================================================================= #
|
337
|
+
# === report_lineage_ids_and_lineage_scientific_name
|
338
|
+
# ========================================================================= #
|
339
|
+
def report_lineage_ids_and_lineage_scientific_name(i)
|
340
|
+
e 'The lineage of all parent ids for Tax ID '+sfancy(i)+' is: '+
|
341
|
+
@lineage_ids+', and in long form '+@lineage_scientific_name
|
342
|
+
end
|
343
|
+
|
344
|
+
# ========================================================================= #
|
345
|
+
# === drop_nodes_table
|
346
|
+
# ========================================================================= #
|
347
|
+
def drop_nodes_table(be_verbose = true)
|
348
|
+
run_sql POSTGRE_DROP_NODES_COMMAND, be_verbose
|
349
|
+
end
|
350
|
+
|
351
|
+
# ========================================================================= #
|
352
|
+
# === drop_names_table
|
353
|
+
# ========================================================================= #
|
354
|
+
def drop_names_table(be_verbose = true)
|
355
|
+
run_sql POSTGRE_DROP_NAMES_COMMAND, be_verbose
|
356
|
+
end
|
357
|
+
|
358
|
+
# ========================================================================= #
|
359
|
+
# === show_scientific_name_of
|
360
|
+
# ========================================================================= #
|
361
|
+
def show_scientific_name_of(i)
|
362
|
+
result = get_scientific_name_of(i).to_s
|
363
|
+
if result.empty?
|
364
|
+
e 'We tried to find a Tax ID but we got no result.'
|
365
|
+
e 'Are you able to connect to the postgresql-database?'
|
366
|
+
e 'Perhaps this Tax ID does not exist.'
|
367
|
+
else
|
368
|
+
e 'The scientific name of Tax ID '+lightblue(i.to_s)+
|
369
|
+
' is: '+simp(result)
|
370
|
+
end
|
371
|
+
end
|
372
|
+
|
373
|
+
# ========================================================================= #
|
374
|
+
# === lupdate (update tag)
|
375
|
+
#
|
376
|
+
# This is the general way to update something. "update everything' is
|
377
|
+
# the most important component - it will update everything, including
|
378
|
+
# the localomes entry.
|
379
|
+
# ========================================================================= #
|
380
|
+
def lupdate(i = :ncbi)
|
381
|
+
i = i.to_s
|
382
|
+
case i
|
383
|
+
when 'all','everything','both'
|
384
|
+
update_ncbi_database
|
385
|
+
update_local_localomes
|
386
|
+
update_lineage
|
387
|
+
# ======================================================================= #
|
388
|
+
# === ncbi_database
|
389
|
+
# ======================================================================= #
|
390
|
+
when /^ncbi(-|_| )?database$/, 'ncbi','1',
|
391
|
+
'update_ncbi'
|
392
|
+
update_ncbi_database
|
393
|
+
# ======================================================================= #
|
394
|
+
# === localomes
|
395
|
+
# ======================================================================= #
|
396
|
+
when 'localomes','local','loc','localome','2',
|
397
|
+
'fasta','database','databases'
|
398
|
+
update_local_localomes # This will overwrite the old dataset completely, be careful.
|
399
|
+
# ======================================================================= #
|
400
|
+
# === lineage
|
401
|
+
# ======================================================================= #
|
402
|
+
when 'lineage','lin','li'
|
403
|
+
update_lineage
|
404
|
+
else
|
405
|
+
e 'Not sure what to update - the input was '+sfancy(i.to_s)
|
406
|
+
end
|
407
|
+
end
|
408
|
+
|
409
|
+
# ========================================================================= #
|
410
|
+
# === disable (disable tag)
|
411
|
+
# ========================================================================= #
|
412
|
+
def disable(i)
|
413
|
+
case i.to_sym
|
414
|
+
when :colours
|
415
|
+
disable_colours
|
416
|
+
end
|
417
|
+
end
|
418
|
+
|
419
|
+
# ========================================================================= #
|
420
|
+
# === query_localome_including_path
|
421
|
+
#
|
422
|
+
# We also show the path here.
|
423
|
+
# ========================================================================= #
|
424
|
+
def query_localome_including_path
|
425
|
+
e 'These organisms are available locally:'
|
426
|
+
result = run_query(
|
427
|
+
'select name,taxid,path from fasta ORDER BY taxid;',
|
428
|
+
false, :tuples)
|
429
|
+
splitted = result.split(N)
|
430
|
+
splitted.each_with_index {|entry, index|
|
431
|
+
index += 1
|
432
|
+
inner_splitted = entry.split('|')
|
433
|
+
name = inner_splitted[0].strip.ljust(40)
|
434
|
+
path = 'the local path is at '+N+' '+sfancy(inner_splitted[2])
|
435
|
+
e ' ('+index.to_s+') '+name+' -> TaxID: '+
|
436
|
+
simp(inner_splitted[1].strip)+', '+path
|
437
|
+
}
|
438
|
+
report_n_registered_organisms_in_localome(splitted.size)
|
439
|
+
end; alias localome? query_localome_including_path # === localome?
|
440
|
+
|
441
|
+
# ========================================================================= #
|
442
|
+
# === report_n_registered_organisms_in_localome
|
443
|
+
# ========================================================================= #
|
444
|
+
def report_n_registered_organisms_in_localome(i)
|
445
|
+
e 'We have a total of '+sfancy(i.to_s)+' organisms '+
|
446
|
+
'registered in the localome (== fasta) table.'
|
447
|
+
end
|
448
|
+
|
449
|
+
# ========================================================================= #
|
450
|
+
# === drop_nodes_and_names_database_tables
|
451
|
+
# ========================================================================= #
|
452
|
+
def drop_nodes_and_names_database_tables(be_verbose = true)
|
453
|
+
drop_nodes_table(be_verbose)
|
454
|
+
drop_names_table(be_verbose)
|
455
|
+
end
|
456
|
+
|
457
|
+
# ========================================================================= #
|
458
|
+
# === query_localome
|
459
|
+
#
|
460
|
+
# Query the localome table.
|
461
|
+
# ========================================================================= #
|
462
|
+
def query_localome
|
463
|
+
e 'These organisms are available locally:'
|
464
|
+
result = run_query(
|
465
|
+
'select name,taxid from fasta ORDER BY taxid;',
|
466
|
+
false, :tuples)
|
467
|
+
splitted = result.split(N)
|
468
|
+
splitted.each_with_index {|entry, index|
|
469
|
+
index += 1
|
470
|
+
inner_splitted = entry.split('|')
|
471
|
+
name = inner_splitted[0].strip.ljust(30)
|
472
|
+
e ' ('+index.to_s+') '+name+' -> TaxID: '+
|
473
|
+
simp(inner_splitted[1].strip)
|
474
|
+
}
|
475
|
+
report_n_registered_organisms_in_localome(splitted.size)
|
476
|
+
end
|
477
|
+
|
478
|
+
# ========================================================================= #
|
479
|
+
# === update_ncbi_database
|
480
|
+
#
|
481
|
+
# We will update the NCBI Taxonomy database with this method. This
|
482
|
+
# includes a download, extracting it, generating the .sql files,
|
483
|
+
# and then populating the postgresql database.
|
484
|
+
# ========================================================================= #
|
485
|
+
def update_ncbi_database
|
486
|
+
# show_time_now # Show the start time. No longer needed as the next call does that.
|
487
|
+
download :ncbi # Defined in this file here.
|
488
|
+
if be_verbose?
|
489
|
+
e 'We will update the local postgre NCBI Table '\
|
490
|
+
'now (this may take about one hour in total):'
|
491
|
+
end
|
492
|
+
# This will generate names.sql and nodes.sql.
|
493
|
+
menu 'names'
|
494
|
+
menu 'nodes'
|
495
|
+
# Next, connect to the postgre database and read in the auto-generated dumps:
|
496
|
+
if be_verbose?
|
497
|
+
e 'We will next load this dataset into the Database.'
|
498
|
+
e 'First, dropping the old table entries in the Postgresql database '
|
499
|
+
e 'via the DROP TABLE command:'
|
500
|
+
end
|
501
|
+
drop_nodes_and_names_database_tables
|
502
|
+
nodes_size = File.size(NODES_SQL).to_s
|
503
|
+
names_size = File.size(NAMES_SQL).to_s
|
504
|
+
e 'Next we will populate the two tables with '+sfancy('names.sql')+
|
505
|
+
' (Filesize: '+names_size+') and '+sfancy('nodes.sql')+
|
506
|
+
' (Filesize: '+nodes_size+') file.'
|
507
|
+
e 'This may take a while, possibly about an hour, so '\
|
508
|
+
'please remain patient.'
|
509
|
+
read_in_names_and_nodes_sql_files
|
510
|
+
e 'Nota bene: the above commands can only work if psql '\
|
511
|
+
'is in your $PATH.'
|
512
|
+
e 'If it did not work, please check and see first '\
|
513
|
+
'whether your $PATH variable is proper.'
|
514
|
+
e N+'We will display the content of the $PATH variable '\
|
515
|
+
'now: '+sfancy(ENV['PATH'])
|
516
|
+
old_time = @time
|
517
|
+
show_time_now # Show the end time.
|
518
|
+
new_time = @time
|
519
|
+
difference = Time.parse(new_time) - Time.parse(old_time)
|
520
|
+
report_how_long_it_took_us(difference)
|
521
|
+
end; alias update_database update_ncbi_database
|
522
|
+
|
523
|
+
# ========================================================================= #
|
524
|
+
# === report_lineage_of
|
525
|
+
# ========================================================================= #
|
526
|
+
def report_lineage_of(f)
|
527
|
+
obtain_full_lineage_for(f)
|
528
|
+
report_lineage_ids_and_lineage_scientific_name(f)
|
529
|
+
e
|
530
|
+
generate_html_links_for( @lineage_ids.split(' -> ') )
|
531
|
+
end
|
532
|
+
|
533
|
+
# ========================================================================= #
|
534
|
+
# === report_how_long_it_took_us
|
535
|
+
#
|
536
|
+
# The input to this method should be the number of seconds, i.e.
|
537
|
+
# 60 seconds.
|
538
|
+
# ========================================================================= #
|
539
|
+
def report_how_long_it_took_us(
|
540
|
+
i = 0,
|
541
|
+
title_of_table = 'NCBI-based taxonomy table'
|
542
|
+
)
|
543
|
+
n_minutes = (i.to_f / 60.0).to_s
|
544
|
+
if n_minutes.size > 4
|
545
|
+
n_minutes = n_minutes[0,4]
|
546
|
+
end
|
547
|
+
e 'Updating the '+title_of_table+' took us '+
|
548
|
+
sfancy(i.to_s)+' seconds (= '+n_minutes.to_s+' minutes).'
|
549
|
+
end
|
550
|
+
|
551
|
+
# ========================================================================= #
|
552
|
+
# === read_sql
|
553
|
+
#
|
554
|
+
# Read in sql into the Postgre Database.
|
555
|
+
# ========================================================================= #
|
556
|
+
def read_sql(i = :fasta)
|
557
|
+
set_pgpassword
|
558
|
+
case i
|
559
|
+
# ======================================================================= #
|
560
|
+
# === :fasta
|
561
|
+
# ======================================================================= #
|
562
|
+
when :fasta
|
563
|
+
if at_home? # On my home system.
|
564
|
+
cmd = POSTGRE_LOGIN_COMMAND_HOME
|
565
|
+
else
|
566
|
+
cmd = POSTGRE_LOGIN_COMMAND
|
567
|
+
end
|
568
|
+
cmd << ' -f '+FASTA_SQL
|
569
|
+
e 'Next, we will read in from '+FASTA_SQL if be_verbose?
|
570
|
+
esystem cmd
|
571
|
+
e 'Done reading in the dataset!' if be_verbose?
|
572
|
+
end
|
573
|
+
end
|
574
|
+
|
575
|
+
# ========================================================================= #
|
576
|
+
# === search_in_localomes
|
577
|
+
#
|
578
|
+
# This will search in localomes.
|
579
|
+
# ========================================================================= #
|
580
|
+
def search_in_localomes(i = 'Blastocystis hominis')
|
581
|
+
if be_verbose?
|
582
|
+
e 'We will now try to search the fasta table for `'+sfancy(i)+'`.'
|
583
|
+
end
|
584
|
+
_ = "select name,modification_time FROM fasta WHERE name LIKE '%"+i+"%' LIMIT 3;"
|
585
|
+
result = run_sql_query(_, true)
|
586
|
+
return result
|
587
|
+
end
|
588
|
+
|
589
|
+
# ========================================================================= #
|
590
|
+
# === open_project_files (open tag)
|
591
|
+
#
|
592
|
+
# This method will open the various project files in the editor.
|
593
|
+
# ========================================================================= #
|
594
|
+
def open_project_files
|
595
|
+
ARRAY_PROJECT_FILES.each {|entry|
|
596
|
+
_ = (editor?+' '+PROJECT_BASE_DIR+'lib/taxonomy/'+entry.to_s).squeeze '/'
|
597
|
+
esystem _
|
598
|
+
}
|
599
|
+
end
|
600
|
+
|
601
|
+
# ========================================================================= #
|
602
|
+
# === do_run_connected
|
603
|
+
# ========================================================================= #
|
604
|
+
def do_run_connected
|
605
|
+
@run_standalone = false
|
606
|
+
end; alias run_connected do_run_connected # === run_connected
|
607
|
+
|
608
|
+
# ========================================================================= #
|
609
|
+
# === use_colours?
|
610
|
+
# ========================================================================= #
|
611
|
+
def use_colours?
|
612
|
+
@use_colours
|
613
|
+
end
|
614
|
+
|
615
|
+
# ========================================================================= #
|
616
|
+
# === create_default_directories
|
617
|
+
# ========================================================================= #
|
618
|
+
def create_default_directories
|
619
|
+
e 'Creating some directories now.'
|
620
|
+
array_create_these_directores = %w(
|
621
|
+
/data/ncbi/taxonomy/
|
622
|
+
/tmp/robert/autogenerated_sql_files/
|
623
|
+
/data/curated/sequences/localome/
|
624
|
+
/data/curated/sequences/aa/
|
625
|
+
/data/curated/sequences/nt/
|
626
|
+
/data/curated/sequences/INFO/
|
627
|
+
/data/curated/sequences/localome/incoming/
|
628
|
+
); pp array_create_these_directores
|
629
|
+
mkdir(array)
|
630
|
+
end
|
631
|
+
|
632
|
+
# ========================================================================= #
|
633
|
+
# === user_input?
|
634
|
+
# ========================================================================= #
|
635
|
+
def user_input?
|
636
|
+
@user_input
|
637
|
+
end
|
638
|
+
|
639
|
+
# ========================================================================= #
|
640
|
+
# === create_dirs
|
641
|
+
# ========================================================================= #
|
642
|
+
def create_dirs(be_verbose = false)
|
643
|
+
case be_verbose
|
644
|
+
when :be_verbose
|
645
|
+
be_verbose = true
|
646
|
+
end
|
647
|
+
if be_verbose
|
648
|
+
opnn; e 'We will create the temp directory and the download directory next.'
|
649
|
+
end
|
650
|
+
ensure_that_temp_dir_exists
|
651
|
+
ensure_that_download_dir_exists
|
652
|
+
end
|
653
|
+
|
654
|
+
# ========================================================================= #
|
655
|
+
# === try_to_show_dependencies
|
656
|
+
#
|
657
|
+
# Show the dependencies of the Taxonomy Module. This will tell us which
|
658
|
+
# dependencies we have to satisfy.
|
659
|
+
# ========================================================================= #
|
660
|
+
def try_to_show_dependencies
|
661
|
+
_ = return_dependencies
|
662
|
+
if File.exist? _
|
663
|
+
e 'The Taxonomy Module depends on these Ruby Gems:'+N+N
|
664
|
+
cat _
|
665
|
+
else
|
666
|
+
e 'We could not find a file at position `'+sfile(_)+'`.'
|
667
|
+
end
|
668
|
+
end
|
669
|
+
|
670
|
+
# ========================================================================= #
|
671
|
+
# === show_login_information
|
672
|
+
# ========================================================================= #
|
673
|
+
def show_login_information
|
674
|
+
e 'The login information is stored in the file '
|
675
|
+
e
|
676
|
+
e ' '+sfile(PROJECT_BASE_DIR2+'databases/postgresql_login_command.rb')
|
677
|
+
e
|
678
|
+
show_port # Show the port as well here.
|
679
|
+
end
|
680
|
+
|
681
|
+
# ========================================================================= #
|
682
|
+
# === return_shared_code
|
683
|
+
# ========================================================================= #
|
684
|
+
def return_shared_code
|
685
|
+
return PROJECT_BASE_DIR2+'shared/shared'
|
686
|
+
end
|
687
|
+
|
688
|
+
# ========================================================================= #
|
689
|
+
# === return_gemspec_file
|
690
|
+
#
|
691
|
+
# We return the main taxonomy.gemspec file here.
|
692
|
+
# ========================================================================= #
|
693
|
+
def return_gemspec_file
|
694
|
+
return PROJECT_BASE_DIR+'taxonomy.gemspec'
|
695
|
+
end
|
696
|
+
|
697
|
+
# ========================================================================= #
|
698
|
+
# === return_login_file
|
699
|
+
# ========================================================================= #
|
700
|
+
def return_login_file
|
701
|
+
return PROJECT_BASE_DIR2+'postgresql_login_command.rb'
|
702
|
+
end
|
703
|
+
|
704
|
+
# ========================================================================= #
|
705
|
+
# === show_nodes_table
|
706
|
+
# ========================================================================= #
|
707
|
+
def show_nodes_table
|
708
|
+
e 'We use these values for the nodes table:'
|
709
|
+
e
|
710
|
+
efancy ' taxid'
|
711
|
+
efancy ' parent_taxid'
|
712
|
+
efancy ' rank'
|
713
|
+
e
|
714
|
+
e 'We will also try to show a random selection of 10 entries from '\
|
715
|
+
'there now:'
|
716
|
+
run_sql 'SELECT taxid,parent_taxid,rank FROM nodes
|
717
|
+
ORDER BY RANDOM(), taxid LIMIT 10'
|
718
|
+
end
|
719
|
+
|
720
|
+
|
721
|
+
# ========================================================================= #
|
722
|
+
# === show_port
|
723
|
+
#
|
724
|
+
# Use this method to show the port.
|
725
|
+
# ========================================================================= #
|
726
|
+
def show_port
|
727
|
+
e "The port we will use is: #{simp('UNKNOWN')}"
|
728
|
+
end
|
729
|
+
|
730
|
+
# ========================================================================= #
|
731
|
+
# === update_lineage
|
732
|
+
#
|
733
|
+
# This will update only the lineage part of the localome database, which
|
734
|
+
# means the lineageIDs and the lineage scientific names.
|
735
|
+
# In order for this to work, we need to obtain the TaxID of the
|
736
|
+
# specific organism.
|
737
|
+
# ========================================================================= #
|
738
|
+
def update_lineage
|
739
|
+
e 'We will now obtain all TaxIDs in the localome table.' if be_verbose?
|
740
|
+
obtain_taxids = run_sql('select taxid from fasta;', false, :tuples).split(N).
|
741
|
+
map(&:strip).map(&:to_i).sort
|
742
|
+
# Ignore all entries that are 0.
|
743
|
+
obtain_taxids.reject! {|entry|
|
744
|
+
entry.to_i == 0
|
745
|
+
}
|
746
|
+
# We will have to update the following taxids, which is the collection
|
747
|
+
# of all TaxIDs available in the table:
|
748
|
+
obtain_taxids.each {|taxid|
|
749
|
+
e 'Now updating entry with the TaxID: '+sfancy(taxid.to_s)
|
750
|
+
get_lineage_ids_and_lineage_scientific_name(taxid)
|
751
|
+
report_lineage_ids_and_lineage_scientific_name(taxid)
|
752
|
+
if has_id?(taxid)
|
753
|
+
# We pad the two entries with proper quotes.
|
754
|
+
lineage_id = pad_with_single_quotes(@lineage_ids)
|
755
|
+
lineage_scientific_name = pad_with_single_quotes(@lineage_scientific_name)
|
756
|
+
_ = "UPDATE fasta SET lineage_ids="+lineage_id+", lineage_scientific_name="+lineage_scientific_name+" WHERE taxid='"+taxid.to_s+"'"
|
757
|
+
run_sql_query(_)
|
758
|
+
e 'Updated entry.' if be_verbose?
|
759
|
+
else
|
760
|
+
e 'We did not find the TaxID: '+simp(taxid.to_s)
|
761
|
+
end
|
762
|
+
}
|
763
|
+
e 'Finished updating lineage.' if be_verbose?
|
764
|
+
end
|
765
|
+
|
766
|
+
# ========================================================================= #
|
767
|
+
# === make_taxonomy_gem
|
768
|
+
#
|
769
|
+
# This is the code that creates the taxonomy .gem.
|
770
|
+
# ========================================================================= #
|
771
|
+
def make_taxonomy_gem
|
772
|
+
unless Dir[TAXONOMY_HOME_DIR+'*.gemspec'].empty? # Do we have a .gemspec file there?
|
773
|
+
cd TAXONOMY_HOME_DIR
|
774
|
+
end
|
775
|
+
e
|
776
|
+
e 'We will now attempt to build the taxonomy .gem (from '+
|
777
|
+
'directory '+(Dir.pwd+'/').squeeze('/')+')'
|
778
|
+
e
|
779
|
+
if File.exist? 'taxonomy.gemspec'
|
780
|
+
esystem 'gem build taxonomy.gemspec', :use_colours
|
781
|
+
e
|
782
|
+
e 'Done. There should now be a .gem file here.'
|
783
|
+
location = Dir['*.gem']
|
784
|
+
if location
|
785
|
+
e 'We assume the full file path to be at:'
|
786
|
+
e ' '+sfile(location.first)
|
787
|
+
cp(location.first, TEMP_DIR)
|
788
|
+
e 'All done - the .gem should be ready now.'
|
789
|
+
end
|
790
|
+
else
|
791
|
+
e 'Could not find a file called taxonomy.gemspec, thus aborting now.'
|
792
|
+
end
|
793
|
+
end
|
794
|
+
|
795
|
+
# ========================================================================= #
|
796
|
+
# === silently_update_ncbi_database
|
797
|
+
#
|
798
|
+
# This method is the one that can be used to silently update the NCBI table
|
799
|
+
# via a cron job.
|
800
|
+
# ========================================================================= #
|
801
|
+
def silently_update_ncbi_database
|
802
|
+
download :ncbi, :be_silent
|
803
|
+
menu 'names'
|
804
|
+
menu 'nodes'
|
805
|
+
drop_nodes_and_names_database_tables(:be_silent)
|
806
|
+
read_in_names_and_nodes_sql_files(:be_silent)
|
807
|
+
end
|
808
|
+
|
809
|
+
# ========================================================================= #
|
810
|
+
# === select_name_and_lineage_ids
|
811
|
+
# ========================================================================= #
|
812
|
+
def select_name_and_lineage_ids
|
813
|
+
run_query('select name,lineage_ids from fasta;', false, :tuples)
|
814
|
+
end
|
815
|
+
|
816
|
+
# ========================================================================= #
|
817
|
+
# === select_name_and_tax_id_and_lineage_ids
|
818
|
+
# ========================================================================= #
|
819
|
+
def select_name_and_tax_id_and_lineage_ids
|
820
|
+
run_query('select name,taxid,lineage_ids from fasta;', false, :tuples)
|
821
|
+
end
|
822
|
+
|
823
|
+
# ========================================================================= #
|
824
|
+
# === select_name_and_tax_id_and_lineage_ids_and_path
|
825
|
+
# ========================================================================= #
|
826
|
+
def select_name_and_tax_id_and_lineage_ids_and_path
|
827
|
+
run_query('select name,taxid,lineage_ids,path from fasta;', false, :tuples)
|
828
|
+
end
|
829
|
+
|
830
|
+
# ========================================================================= #
|
831
|
+
# === report_total_amount_of_proteomes
|
832
|
+
#
|
833
|
+
# The input should be an array of Taxonomy IDs.
|
834
|
+
# ========================================================================= #
|
835
|
+
def report_total_amount_of_proteomes(i)
|
836
|
+
if i.is_a? Array
|
837
|
+
n_entries = 0
|
838
|
+
i.each {|entry|
|
839
|
+
n_entries += run_query("select n_accession_numbers from fasta WHERE taxid='"+entry+"';",
|
840
|
+
false,
|
841
|
+
:tuples).strip.to_i
|
842
|
+
}
|
843
|
+
e 'We have found a total of '+sfancy(n_entries.to_s)+' accession '+
|
844
|
+
'numbers from the above dataset.'
|
845
|
+
end
|
846
|
+
end
|
847
|
+
|
848
|
+
# ========================================================================= #
|
849
|
+
# === search_in_database_for_name
|
850
|
+
#
|
851
|
+
# Use this method to search in a database for a name.
|
852
|
+
#
|
853
|
+
# This is something such as the following SQL query:
|
854
|
+
# select taxid FROM names WHERE name_txt LIKE '%Zygosaccharomyces rouxii%' limit 30;
|
855
|
+
#
|
856
|
+
# It will return the TaxID of the organism.
|
857
|
+
# ========================================================================= #
|
858
|
+
def search_in_database_for_name(i = 'Zygosaccharomyces rouxii')
|
859
|
+
e 'We will now try to search the names table for '+sfancy(i)+'.'
|
860
|
+
_ = "select taxid,name_txt FROM names WHERE name_txt LIKE '%"+i+"%' LIMIT 3;"
|
861
|
+
result = run_sql_query(_, false, :tuples)
|
862
|
+
return result
|
863
|
+
end
|
864
|
+
|
865
|
+
# ========================================================================= #
|
866
|
+
# === set_path
|
867
|
+
#
|
868
|
+
# Use this method to update the path to a local fasta entry, inside the
|
869
|
+
# localome entry. In order for this to work, the ID must exist.
|
870
|
+
#
|
871
|
+
# Complete usage example:
|
872
|
+
#
|
873
|
+
# spath 1257118 /resources/seqdata/curated/sequences/localome/proteomes/Acanthamoeba_castellanii_Neff_pep.fa
|
874
|
+
#
|
875
|
+
# ========================================================================= #
|
876
|
+
def set_path(i)
|
877
|
+
if i.include? ' '
|
878
|
+
splitted = i.split(' ')
|
879
|
+
taxid_to_update = splitted[0].to_s
|
880
|
+
new_path = splitted[1].to_s
|
881
|
+
if has_id? taxid_to_update # If localomes has this ID, we continue here:
|
882
|
+
e 'Now updating path for TaxID '+sfancy(taxid_to_update)+'.'
|
883
|
+
run_sql "UPDATE fasta SET path='"+new_path+"' WHERE taxid='"+taxid_to_update+"'"
|
884
|
+
else
|
885
|
+
e 'We could not find a TaxID '+sfancy(taxid_to_update)+
|
886
|
+
' in the localome table. Thus can not update the path.'
|
887
|
+
end
|
888
|
+
else
|
889
|
+
if File.exist? i
|
890
|
+
data = File.readlines(i).map(&:chomp).reject {|entry| entry.empty? }
|
891
|
+
data.each {|entry|
|
892
|
+
entry = entry.strip
|
893
|
+
if entry.include? '|'
|
894
|
+
splitted = entry.split('|')
|
895
|
+
else # else assume \t
|
896
|
+
splitted = entry.split("\t")
|
897
|
+
end
|
898
|
+
set_path(splitted.first+' '+splitted[1])
|
899
|
+
}
|
900
|
+
e 'Note: if you wish to see the modified dataset, input: taxid,path'
|
901
|
+
else
|
902
|
+
e 'Format was not correct. Please either provide a file as argument,'
|
903
|
+
e 'or use input such as this here:'
|
904
|
+
e
|
905
|
+
e ' spath 1257118 /resources/seqdata/curated/sequences/localome/proteomes/Acanthamoeba_castellanii_Neff_pep.fa'
|
906
|
+
e
|
907
|
+
end
|
908
|
+
end
|
909
|
+
end
|
910
|
+
|
911
|
+
# ========================================================================= #
|
912
|
+
# === nohelp
|
913
|
+
# ========================================================================= #
|
914
|
+
def nohelp
|
915
|
+
@no_help = true
|
916
|
+
end
|
917
|
+
|
918
|
+
# ========================================================================= #
|
919
|
+
# === show_short_help
|
920
|
+
# ========================================================================= #
|
921
|
+
def show_short_help
|
922
|
+
e '(Type "help" or "?" for help, or "nohelp" to disable '\
|
923
|
+
'this notification here).' unless @no_help
|
924
|
+
end
|
925
|
+
|
926
|
+
# ========================================================================= #
|
927
|
+
# === show_postgres_size
|
928
|
+
#
|
929
|
+
# This method will show the size of the postgres database.
|
930
|
+
# ========================================================================= #
|
931
|
+
def show_postgres_size
|
932
|
+
e 'Next querying the size of the postgresql table:'
|
933
|
+
sql_query POSTGRESQL_QUERY_SIZE, true
|
934
|
+
# relpages are not too useful to look at, so we commented it out again.
|
935
|
+
# sql_query 'SELECT relname, relpages FROM pg_class ORDER BY relpages DESC LIMIT 5;'
|
936
|
+
e 'The total disk size for the robert_db is:'
|
937
|
+
sql_query "SELECT pg_size_pretty(pg_database_size('robert_db'));",true
|
938
|
+
e 'Size of '+sfancy(:names)+' is: '+
|
939
|
+
sql_query(
|
940
|
+
"SELECT pg_size_pretty(pg_total_relation_size('names'));",false, :tuples)
|
941
|
+
e 'Size of '+sfancy(:nodes)+' is: '+
|
942
|
+
sql_query(
|
943
|
+
"SELECT pg_size_pretty(pg_total_relation_size('nodes'));",false, :tuples)
|
944
|
+
e 'Size of '+sfancy(:fasta)+' is: '+
|
945
|
+
sql_query(
|
946
|
+
"SELECT pg_size_pretty(pg_total_relation_size('fasta'));",false, :tuples)
|
947
|
+
end
|
948
|
+
|
949
|
+
# ========================================================================= #
|
950
|
+
# === obtain_full_lineage_for
|
951
|
+
#
|
952
|
+
# The input to this method should be an existing TaxID.
|
953
|
+
# ========================================================================= #
|
954
|
+
def obtain_full_lineage_for(f)
|
955
|
+
result = return_full_lineage_of(f) # in taxonomy.rb
|
956
|
+
if result
|
957
|
+
@lineage_ids = ''.dup
|
958
|
+
@lineage_scientific_name = ''.dup
|
959
|
+
hash = Hash[result]
|
960
|
+
hash.each {|taxid, scientific_name|
|
961
|
+
@lineage_ids << taxid.to_s+' -> '
|
962
|
+
@lineage_scientific_name << scientific_name.chomp+' (Tax id: '+taxid.to_s+'), '
|
963
|
+
}
|
964
|
+
@lineage_scientific_name = @lineage_scientific_name.strip
|
965
|
+
# Next, get rid of trailing , characters.
|
966
|
+
@lineage_scientific_name.chop! if @lineage_scientific_name.end_with? ','
|
967
|
+
@lineage_ids.strip!
|
968
|
+
if @lineage_ids.end_with? '->'
|
969
|
+
@lineage_ids[-2,2] = ''.dup
|
970
|
+
@lineage_ids.strip!
|
971
|
+
end
|
972
|
+
@lineage_scientific_name.chomp!
|
973
|
+
end
|
974
|
+
return result
|
975
|
+
end; alias get_lineage_ids_and_lineage_scientific_name obtain_full_lineage_for
|
976
|
+
|
977
|
+
# ========================================================================= #
|
978
|
+
# === drop_table (drop tag)
|
979
|
+
#
|
980
|
+
# Use this method when you wish to drop a table.
|
981
|
+
# ========================================================================= #
|
982
|
+
def drop_table(i = DROP_FASTA_TABLE)
|
983
|
+
case i.to_s
|
984
|
+
when 'nodes'
|
985
|
+
when 'fasta',
|
986
|
+
'localomes',
|
987
|
+
'localome'
|
988
|
+
i = DROP_FASTA_TABLE
|
989
|
+
end
|
990
|
+
::Bioroebe.run_sql_query(i)
|
991
|
+
end
|
992
|
+
|
993
|
+
# ========================================================================= #
|
994
|
+
# === query_localomes_by_modtime
|
995
|
+
# ========================================================================= #
|
996
|
+
def query_localomes_by_modtime
|
997
|
+
_ = 'SELECT name,modification_time FROM fasta ORDER BY modification_time;'
|
998
|
+
run_sql_query _
|
999
|
+
end
|
1000
|
+
|
1001
|
+
# ========================================================================= #
|
1002
|
+
# === query
|
1003
|
+
#
|
1004
|
+
# Formulate a query against the database.
|
1005
|
+
# ========================================================================= #
|
1006
|
+
def query(i)
|
1007
|
+
case i.to_s
|
1008
|
+
when 'localome','extended','main'
|
1009
|
+
query_localome_including_path
|
1010
|
+
when 'modtime'
|
1011
|
+
query_localomes_by_modtime
|
1012
|
+
when 'ncbi'
|
1013
|
+
e 'https://www.ncbi.nlm.nih.gov/taxonomy'
|
1014
|
+
end
|
1015
|
+
end
|
1016
|
+
|
1017
|
+
# ========================================================================= #
|
1018
|
+
# === get_all_info_entries_with_tax_id
|
1019
|
+
# ========================================================================= #
|
1020
|
+
def get_all_info_entries_with_tax_id
|
1021
|
+
result = []
|
1022
|
+
if be_verbose?
|
1023
|
+
e 'We will now attempt to find all .INFO files that do have a'
|
1024
|
+
e 'proper Taxonomy ID entry.'
|
1025
|
+
end
|
1026
|
+
Dir[INFO_DIR+'*.INFO'].each {|file|
|
1027
|
+
_ = Info.new(file, :be_verbose => false)
|
1028
|
+
if _.has_taxonomy_id?
|
1029
|
+
e sfile(file)+' has a Taxonomy ID.' if be_verbose?
|
1030
|
+
result << file
|
1031
|
+
end
|
1032
|
+
}
|
1033
|
+
return result
|
1034
|
+
end
|
1035
|
+
|
1036
|
+
# ========================================================================= #
|
1037
|
+
# === read_in_names_and_nodes_sql_files
|
1038
|
+
#
|
1039
|
+
# We use two commands for populating names.sql and nodes.sql tables.
|
1040
|
+
# ========================================================================= #
|
1041
|
+
def read_in_names_and_nodes_sql_files(be_verbose = true)
|
1042
|
+
be_verbose = false if be_verbose == :be_silent
|
1043
|
+
|
1044
|
+
if at_home? # We use another command at home.
|
1045
|
+
cmd = POSTGRE_LOGIN_COMMAND_HOME
|
1046
|
+
else
|
1047
|
+
cmd = POSTGRE_LOGIN_COMMAND
|
1048
|
+
end
|
1049
|
+
|
1050
|
+
if be_verbose
|
1051
|
+
esystem cmd+' -f '+SHARED_HOME+'names.sql'
|
1052
|
+
esystem cmd+' -f '+SHARED_HOME+'nodes.sql'
|
1053
|
+
else # Else just use system.
|
1054
|
+
system cmd+' -f '+SHARED_HOME+'names.sql'
|
1055
|
+
system cmd+' -f '+SHARED_HOME+'nodes.sql'
|
1056
|
+
end
|
1057
|
+
end
|
1058
|
+
|
1059
|
+
# ========================================================================= #
|
1060
|
+
# === be_silent
|
1061
|
+
# ========================================================================= #
|
1062
|
+
def be_silent
|
1063
|
+
if @be_verbose == false
|
1064
|
+
e 'We are already silent.'
|
1065
|
+
else
|
1066
|
+
@be_verbose = false
|
1067
|
+
e 'We will now be silent.'
|
1068
|
+
end
|
1069
|
+
end
|
1070
|
+
|
1071
|
+
# ========================================================================= #
|
1072
|
+
# === record_last_command
|
1073
|
+
#
|
1074
|
+
# Use this method to record the last command issued.
|
1075
|
+
#
|
1076
|
+
# The constant LAST_INTERACTIVE_COMMAND determines where we store this.
|
1077
|
+
# ========================================================================= #
|
1078
|
+
def record_last_command(
|
1079
|
+
consider_storing_this = '',
|
1080
|
+
optional = ''
|
1081
|
+
)
|
1082
|
+
what = consider_storing_this.to_s.dup # Work on a copy from this point onwards.
|
1083
|
+
what << '|'+optional.to_s unless optional.to_s.empty?
|
1084
|
+
unless what.empty?
|
1085
|
+
# ===================================================================== #
|
1086
|
+
# Before we can store into the file, we need to ensure that the
|
1087
|
+
# TEMP_DIR exists. We ensure this with the next check - if it
|
1088
|
+
# does not exist then we will create it.
|
1089
|
+
# ===================================================================== #
|
1090
|
+
unless Dir.exist? TEMP_DIR
|
1091
|
+
e 'We will now create the directory '+sdir(TEMP_DIR)+'.'
|
1092
|
+
ensure_that_temp_dir_exists
|
1093
|
+
end
|
1094
|
+
begin
|
1095
|
+
write_what_into(what, LAST_INTERACTIVE_COMMAND)
|
1096
|
+
rescue Exception => error
|
1097
|
+
e "A small error has happened in the method: "\
|
1098
|
+
"#{__method__}()"
|
1099
|
+
p error
|
1100
|
+
e "Could not write into `#{sfile(into)}`."
|
1101
|
+
end
|
1102
|
+
end unless what.include? 'last?' # Exception for "last?".
|
1103
|
+
end
|
1104
|
+
|
1105
|
+
# ========================================================================= #
|
1106
|
+
# === be_verbose?
|
1107
|
+
# ========================================================================= #
|
1108
|
+
def be_verbose?
|
1109
|
+
@be_verbose
|
1110
|
+
end
|
1111
|
+
|
1112
|
+
# ========================================================================= #
|
1113
|
+
# === repeat_last_command
|
1114
|
+
# ========================================================================= #
|
1115
|
+
def repeat_last_command
|
1116
|
+
if File.exist? LAST_INTERACTIVE_COMMAND
|
1117
|
+
show_last_command
|
1118
|
+
else
|
1119
|
+
e 'No file at '+sfile(LAST_INTERACTIVE_COMMAND)+' could be found.'
|
1120
|
+
end
|
1121
|
+
end
|
1122
|
+
|
1123
|
+
# ========================================================================= #
|
1124
|
+
# === be
|
1125
|
+
# ========================================================================= #
|
1126
|
+
def be(i)
|
1127
|
+
case i.to_sym
|
1128
|
+
when :verbose
|
1129
|
+
be_verbose
|
1130
|
+
when :silent
|
1131
|
+
be_silent
|
1132
|
+
end
|
1133
|
+
end
|
1134
|
+
|
1135
|
+
# ========================================================================= #
|
1136
|
+
# === show_table_names
|
1137
|
+
# ========================================================================= #
|
1138
|
+
def show_table_names
|
1139
|
+
e N+'The SQL table structure is as follows:'+N+N
|
1140
|
+
efancy 'names.sql'
|
1141
|
+
e ' (1) '+NAMES_ENTRIES+N+N
|
1142
|
+
efancy 'nodes.sql'
|
1143
|
+
e ' (2) '+NODES_ENTRIES+N+N
|
1144
|
+
efancy 'fasta.sql'
|
1145
|
+
e ' (3) '+FASTA_ENTRIES+N+N
|
1146
|
+
end
|
1147
|
+
|
1148
|
+
# ========================================================================= #
|
1149
|
+
# === be_verbose
|
1150
|
+
# ========================================================================= #
|
1151
|
+
def be_verbose
|
1152
|
+
if @be_verbose == true
|
1153
|
+
e 'We are already verbose.'
|
1154
|
+
else
|
1155
|
+
@be_verbose = true
|
1156
|
+
e 'We will now be verbose.'
|
1157
|
+
end
|
1158
|
+
end
|
1159
|
+
|
1160
|
+
# ========================================================================= #
|
1161
|
+
# === finished
|
1162
|
+
#
|
1163
|
+
# Here we simply report to the user that we have finished.
|
1164
|
+
# ========================================================================= #
|
1165
|
+
def finished(optional_extra_message = '')
|
1166
|
+
unless optional_extra_message.empty?
|
1167
|
+
optional_extra_message << '.' unless optional_extra_message.end_with? '.'
|
1168
|
+
end
|
1169
|
+
e 'Done! '+optional_extra_message
|
1170
|
+
end
|
1171
|
+
|
1172
|
+
# ========================================================================= #
|
1173
|
+
# === has_id?
|
1174
|
+
#
|
1175
|
+
# We ask the localome if an ID is registered.
|
1176
|
+
# run_sql is defined in the file shared.rb
|
1177
|
+
# ========================================================================= #
|
1178
|
+
def has_id?(i)
|
1179
|
+
i = i.to_s
|
1180
|
+
_ = "SELECT taxid from fasta where taxid='"+i+"'"
|
1181
|
+
if run_sql(_, false, :tuples).strip.empty?
|
1182
|
+
return false
|
1183
|
+
else
|
1184
|
+
return true
|
1185
|
+
end
|
1186
|
+
end; alias has? has_id? # === has?
|
1187
|
+
|
1188
|
+
# ========================================================================= #
|
1189
|
+
# === show_shared_code_location
|
1190
|
+
#
|
1191
|
+
# To invoke this method, do:
|
1192
|
+
# shared_code?
|
1193
|
+
# ========================================================================= #
|
1194
|
+
def show_shared_code_location
|
1195
|
+
e 'You can find the file shared.rb here:'
|
1196
|
+
_ = base_dir?
|
1197
|
+
e ' '+sfile(_+'/shared.rb')
|
1198
|
+
end
|
1199
|
+
|
1200
|
+
# ========================================================================= #
|
1201
|
+
# === show_important_directories
|
1202
|
+
#
|
1203
|
+
# Delegate towards Taxonomy.status? from here.
|
1204
|
+
# ========================================================================= #
|
1205
|
+
def show_important_directories
|
1206
|
+
Taxonomy.status?
|
1207
|
+
end
|
1208
|
+
|
1209
|
+
# ========================================================================= #
|
1210
|
+
# === first_argument?
|
1211
|
+
# ========================================================================= #
|
1212
|
+
def first_argument?
|
1213
|
+
@first_argument
|
1214
|
+
end; alias f? first_argument? # === f?
|
1215
|
+
alias f first_argument?
|
1216
|
+
|
1217
|
+
# ========================================================================= #
|
1218
|
+
# === show_lineage_from_localome_table
|
1219
|
+
#
|
1220
|
+
# The TaxID must exist before we can use this.
|
1221
|
+
#
|
1222
|
+
# Usage examples:
|
1223
|
+
#
|
1224
|
+
# show_lineage_from_localome_table 2762
|
1225
|
+
# slocalome 2762
|
1226
|
+
#
|
1227
|
+
# ========================================================================= #
|
1228
|
+
def show_lineage_from_localome_table(i)
|
1229
|
+
result = run_sql(
|
1230
|
+
"select lineage_ids from fasta WHERE taxid='"+i.to_s+"';", false, :tuples
|
1231
|
+
).chomp.strip
|
1232
|
+
if result.empty?
|
1233
|
+
e 'This TaxID was not found in the localome table.'
|
1234
|
+
e 'To find out which entries exist in the localome table, do "localome?".'
|
1235
|
+
else
|
1236
|
+
e result
|
1237
|
+
end
|
1238
|
+
end
|
1239
|
+
|
1240
|
+
# ========================================================================= #
|
1241
|
+
# === report_how_many_info_files_exist_and_how_many_lack_taxonomy_id
|
1242
|
+
#
|
1243
|
+
# This will check on at least one directory.
|
1244
|
+
# ========================================================================= #
|
1245
|
+
def report_how_many_info_files_exist_and_how_many_lack_taxonomy_id
|
1246
|
+
_ = get_all_info_entries_with_tax_id
|
1247
|
+
e 'We will try to assess the various .INFO files from the '+
|
1248
|
+
sdir(info_dir?)+'.'
|
1249
|
+
n_info_files = Dir[INFO_DIR+'*.INFO'].size
|
1250
|
+
if n_info_files > 0
|
1251
|
+
e 'We did find '+sfancy(_.size)+' .INFO files with Taxonomy ID. '+
|
1252
|
+
'That means that '+sfancy( Dir[INFO_DIR+'*.INFO'].size - _.size )+
|
1253
|
+
' .INFO files do not have '
|
1254
|
+
e 'a Taxonomy ID. In total there are '+sfancy(n_info_files.to_s)+
|
1255
|
+
' .INFO files there.'
|
1256
|
+
info_files_in_incoming_dir = Dir[INCOMING_DIR+'*.INFO']
|
1257
|
+
if info_files_in_incoming_dir.size > 0
|
1258
|
+
e 'We also found at least one .INFO file in '+sdir(INCOMING_DIR)+'.'
|
1259
|
+
e 'We will display them now:'
|
1260
|
+
pp info_files_in_incoming_dir
|
1261
|
+
end
|
1262
|
+
else
|
1263
|
+
e 'We found no .INFO file there.'
|
1264
|
+
end
|
1265
|
+
end
|
1266
|
+
|
1267
|
+
# ========================================================================= #
|
1268
|
+
# === load_from_info
|
1269
|
+
#
|
1270
|
+
# This method shall attempt to load from an .NFO file, right into the
|
1271
|
+
# localome/ directory at /data/curated/sequences/localome/incoming/.
|
1272
|
+
# ========================================================================= #
|
1273
|
+
def load_from_info(i)
|
1274
|
+
e 'We will now attempt to load an .INFO file.'
|
1275
|
+
info = Info.new(i)
|
1276
|
+
info.report_id
|
1277
|
+
taxid = info.taxid?
|
1278
|
+
e 'We will try to load a fasta file, if it is nearby.'
|
1279
|
+
_ = info.fasta?
|
1280
|
+
e _
|
1281
|
+
ParseFasta.new(_).report
|
1282
|
+
if has_id? taxid # If we already have an entry like that, we refuse.
|
1283
|
+
e 'We already have an entry with the Taxonomic ID taxid '+taxid+'.'
|
1284
|
+
e 'You would have to remove it first before you can load a new one.'
|
1285
|
+
e '(Removing is as simple as: remove_id TAXONOMIC_ID_GOES_HERE)'
|
1286
|
+
else
|
1287
|
+
e 'No entry was yet found in the localome table, thus we will '+
|
1288
|
+
'attempt to add this dataset now.'
|
1289
|
+
create_sql_file_from_local_fasta_entry(i)
|
1290
|
+
e 'You can update the path in the Postgresql database via:'
|
1291
|
+
e ' set_path TAXONOMY_ID path_goes_here'
|
1292
|
+
end
|
1293
|
+
end
|
1294
|
+
|
1295
|
+
# ========================================================================= #
|
1296
|
+
# === show_type_of_all_info_files
|
1297
|
+
#
|
1298
|
+
# We will get all .INFO files and display the type.
|
1299
|
+
# ========================================================================= #
|
1300
|
+
def show_type_of_all_info_files(i = nil)
|
1301
|
+
if i
|
1302
|
+
info = Info.new(i, :silent)
|
1303
|
+
info.find_fasta
|
1304
|
+
if info.fasta?
|
1305
|
+
fasta = ParseFasta.new(info.corresponding_datafile)
|
1306
|
+
e sfile(i)+' is DNA or Protein? '+fasta.type.to_s
|
1307
|
+
else
|
1308
|
+
e 'We could not locate a local fasta entry for '+sfile(i)
|
1309
|
+
end
|
1310
|
+
else # else input was nil.
|
1311
|
+
Dir[INFO_DIR+'*'].each {|entry|
|
1312
|
+
show_type_of_all_info_files(entry)
|
1313
|
+
}
|
1314
|
+
end
|
1315
|
+
end
|
1316
|
+
|
1317
|
+
# ========================================================================= #
|
1318
|
+
# === show_all_prokarya (prokarya tag)
|
1319
|
+
#
|
1320
|
+
# This method will show all entries in the database that are from
|
1321
|
+
# Prokaryotes.
|
1322
|
+
# ========================================================================= #
|
1323
|
+
def show_all_prokarya
|
1324
|
+
_ = select_name_and_tax_id_and_lineage_ids
|
1325
|
+
n_entries = _.split(N).size
|
1326
|
+
result = []
|
1327
|
+
counter = 0
|
1328
|
+
_.split(N).each_with_index {|entry|
|
1329
|
+
splitted = entry.split('|')
|
1330
|
+
lineage = splitted[2]
|
1331
|
+
if lineage =~ / #{Bacteria_Taxonomy_ID} /
|
1332
|
+
scientific_name = splitted.first.strip
|
1333
|
+
counter += 1
|
1334
|
+
e simp(counter.to_s)+') '+scientific_name+' ('+
|
1335
|
+
sfancy('TaxID')+': '+splitted[1].to_s.strip+')'
|
1336
|
+
result << scientific_name
|
1337
|
+
end
|
1338
|
+
}
|
1339
|
+
e N+N+'Out of '+sfancy(n_entries.to_s)+' registered entries in '+
|
1340
|
+
'total in the localomes database,'
|
1341
|
+
e sfancy(result.size.to_s)+' belong to Prokarya (the Taxonomy ID of '+
|
1342
|
+
'Prokarya is '+sfancy(Bacteria_Taxonomy_ID.to_s)+').'
|
1343
|
+
end
|
1344
|
+
|
1345
|
+
# ========================================================================= #
|
1346
|
+
# === Interactive.run
|
1347
|
+
# ========================================================================= #
|
1348
|
+
def self.run(i = nil)
|
1349
|
+
Taxonomy::Interactive.new(i)
|
1350
|
+
end
|
1351
|
+
|
1352
|
+
# ========================================================================= #
|
1353
|
+
# === Taxonomy.run_interactive
|
1354
|
+
# ========================================================================= #
|
1355
|
+
def self.run_interactive(i = nil)
|
1356
|
+
Interactive.run(i) # An alias to the above.
|
1357
|
+
end
|
1358
|
+
|
1359
|
+
# ========================================================================= #
|
1360
|
+
# === get_user_input
|
1361
|
+
#
|
1362
|
+
# Get user input via this method. We prefer to use Readline if it is
|
1363
|
+
# available, otherwise we will simply use a $stdin.gets() call.
|
1364
|
+
# ========================================================================= #
|
1365
|
+
def get_user_input
|
1366
|
+
if Object.const_defined? :Readline
|
1367
|
+
_ = Readline.readline('', true)
|
1368
|
+
Readline::HISTORY.pop if _ =~ /^\s*$/
|
1369
|
+
@user_input = _
|
1370
|
+
else
|
1371
|
+
@user_input = $stdin.gets.chomp
|
1372
|
+
end
|
1373
|
+
sanitize_user_input
|
1374
|
+
process_user_input
|
1375
|
+
return @user_input
|
1376
|
+
end; alias obtain_user_input get_user_input
|
1377
|
+
|
1378
|
+
# ========================================================================= #
|
1379
|
+
# === sanitize_user_input
|
1380
|
+
# ========================================================================= #
|
1381
|
+
def sanitize_user_input
|
1382
|
+
splitted = @user_input.split(' ')
|
1383
|
+
@cmd = splitted.first
|
1384
|
+
if @user_input.empty?
|
1385
|
+
@first_argument = nil
|
1386
|
+
else
|
1387
|
+
@first_argument = splitted[1..-1].join(' ') # This is equal to all remaining arguments for now, actually.
|
1388
|
+
end
|
1389
|
+
end
|
1390
|
+
|
1391
|
+
# ========================================================================= #
|
1392
|
+
# === process_user_input
|
1393
|
+
# ========================================================================= #
|
1394
|
+
def process_user_input(i = @user_input)
|
1395
|
+
if i.is_a? Array
|
1396
|
+
i.each {|entry| process_user_input(entry) }
|
1397
|
+
else # We assume it is a String here.
|
1398
|
+
if i.include? ';'
|
1399
|
+
process_user_input(i.split(';'))
|
1400
|
+
else
|
1401
|
+
check_against_menu(@cmd, @first_argument)
|
1402
|
+
end
|
1403
|
+
end
|
1404
|
+
end
|
1405
|
+
|
1406
|
+
# ========================================================================= #
|
1407
|
+
# === try_to_show_instructions
|
1408
|
+
#
|
1409
|
+
# This method will try to show the instructions to the user, based
|
1410
|
+
# on what the method return_instructions() will give us.
|
1411
|
+
# ========================================================================= #
|
1412
|
+
def try_to_show_instructions
|
1413
|
+
_ = return_instructions
|
1414
|
+
if File.exist? _
|
1415
|
+
e 'Now reading in from file `'+sfile(_)+'`.'
|
1416
|
+
cat _
|
1417
|
+
else
|
1418
|
+
e 'We could not find a file at position '+_
|
1419
|
+
end
|
1420
|
+
end
|
1421
|
+
|
1422
|
+
# ========================================================================= #
|
1423
|
+
# We return the bin/taxonomy file here.
|
1424
|
+
# ========================================================================= #
|
1425
|
+
def return_taxonomy_file
|
1426
|
+
return PROJECT_BASE_DIR+'bin/taxonomy'
|
1427
|
+
end
|
1428
|
+
|
1429
|
+
# ========================================================================= #
|
1430
|
+
# === show_changelog
|
1431
|
+
# ========================================================================= #
|
1432
|
+
def show_changelog
|
1433
|
+
cat PROJECT_DOC_DIR+'CHANGELOG_FROM_0.0.14_TO_0.0.15'
|
1434
|
+
end
|
1435
|
+
|
1436
|
+
# ========================================================================= #
|
1437
|
+
# === return_dependencies
|
1438
|
+
#
|
1439
|
+
# We return the dependencies file here.
|
1440
|
+
# ========================================================================= #
|
1441
|
+
def return_dependencies
|
1442
|
+
PROJECT_DOC_DIR+'DEPENDENCIES'
|
1443
|
+
end
|
1444
|
+
|
1445
|
+
# ========================================================================= #
|
1446
|
+
# === return_instructions
|
1447
|
+
# ========================================================================= #
|
1448
|
+
def return_instructions
|
1449
|
+
PROJECT_DOC_DIR+'INSTRUCTIONS'
|
1450
|
+
end
|
1451
|
+
|
1452
|
+
# ========================================================================= #
|
1453
|
+
# === show_last_command
|
1454
|
+
# ========================================================================= #
|
1455
|
+
def show_last_command
|
1456
|
+
last_command = File.read(LAST_INTERACTIVE_COMMAND)
|
1457
|
+
e 'The last command was: '+simp(last_command.to_s)
|
1458
|
+
e 'This was read in from the file '+sfile(LAST_INTERACTIVE_COMMAND)
|
1459
|
+
end
|
1460
|
+
|
1461
|
+
# ========================================================================= #
|
1462
|
+
# === create_sql_file_from_local_fasta_entry
|
1463
|
+
#
|
1464
|
+
# This method will create a .sql file, based on the information
|
1465
|
+
# obtained from the local fasta entries.
|
1466
|
+
#
|
1467
|
+
# The first argument to it should be an .INFO file, not a fasta file!
|
1468
|
+
# ========================================================================= #
|
1469
|
+
def create_sql_file_from_local_fasta_entry(i)
|
1470
|
+
ensure_that_temp_dir_exists
|
1471
|
+
info = Info.new(i, :be_verbose => false)
|
1472
|
+
info.try_to_find_likely_fasta_file # Need to run it once so we can try to find the fasta file.
|
1473
|
+
if info.taxid
|
1474
|
+
obtain_full_lineage_for(info.taxid) # This will set @lineage_id
|
1475
|
+
else
|
1476
|
+
if be_verbose?
|
1477
|
+
e 'We did not find a Tax ID for `'+sfile(i)+'`.'
|
1478
|
+
e 'This is considered a fatal error for now - we will thus exit.'
|
1479
|
+
end
|
1480
|
+
exit
|
1481
|
+
end
|
1482
|
+
mkdir(AUTOGENERATED_SQL_FILES_DIR) unless Dir.exist?(AUTOGENERATED_SQL_FILES_DIR)
|
1483
|
+
sql_filename = AUTOGENERATED_SQL_FILES_DIR+
|
1484
|
+
File.basename(i).gsub(/#{File.extname(i)}/,'')+'.sql'
|
1485
|
+
remove(sql_filename) if File.exist? sql_filename # Get rido f the old one first.
|
1486
|
+
# ======================================================================= #
|
1487
|
+
# Next, we will build up our SQL string:
|
1488
|
+
# ======================================================================= #
|
1489
|
+
string = ''.dup # This is the string that will be stored.
|
1490
|
+
# ======================================================================= #
|
1491
|
+
# (1) - the scientific name comes first
|
1492
|
+
# ======================================================================= #
|
1493
|
+
scientific_name = get_scientific_name(info.taxid).strip
|
1494
|
+
if scientific_name.empty? # Exit if we can not find it.
|
1495
|
+
if be_verbose?
|
1496
|
+
ewarn 'The scientific name for '+simp(info.taxid.to_s)+' was empty.'
|
1497
|
+
ewarn 'This we consider to be a runtime error for now, thus '+
|
1498
|
+
'we will exit.'
|
1499
|
+
end
|
1500
|
+
exit
|
1501
|
+
end
|
1502
|
+
string << pad_with_single_quotes(
|
1503
|
+
scientific_name
|
1504
|
+
)+', '
|
1505
|
+
fasta = ParseFasta.new(info.corresponding_datafile)
|
1506
|
+
# ======================================================================= #
|
1507
|
+
# (2) Next we need the Taxonomic ID, which is an integer.
|
1508
|
+
# ======================================================================= #
|
1509
|
+
string << info.taxid.to_s+', '
|
1510
|
+
# ======================================================================= #
|
1511
|
+
# (3) We now need to find out whether we have DNA or Protein.
|
1512
|
+
# ======================================================================= #
|
1513
|
+
type = fasta.type? # DNA or Protein.
|
1514
|
+
if type == 'DNA'
|
1515
|
+
type = 0
|
1516
|
+
elsif type == 'Protein'
|
1517
|
+
type = 1
|
1518
|
+
end
|
1519
|
+
string << type.to_s+', '
|
1520
|
+
# ======================================================================= #
|
1521
|
+
# (4) Now comes the lineage_ids, of varchar(100)
|
1522
|
+
# ======================================================================= #
|
1523
|
+
string << pad_with_single_quotes(@lineage_ids)+', '
|
1524
|
+
# ======================================================================= #
|
1525
|
+
# (5) And the lineage_scientific_name
|
1526
|
+
# ======================================================================= #
|
1527
|
+
string << pad_with_single_quotes(@lineage_scientific_name)+', '
|
1528
|
+
# ======================================================================= #
|
1529
|
+
# Here be the accession_number varchar(200000)
|
1530
|
+
# Disabled as of Friday, needs more testing first. But is not even needed.
|
1531
|
+
# string << pad_with_single_quotes(
|
1532
|
+
# fasta.all_accession_entries.join("\t|")
|
1533
|
+
# )+', '
|
1534
|
+
# string << pad_with_single_quotes(0)+', '
|
1535
|
+
# Dataset should come here
|
1536
|
+
# string << pad_with_single_quotes(0)+', '
|
1537
|
+
# (6) And the comment field will be here.
|
1538
|
+
# ======================================================================= #
|
1539
|
+
string << pad_with_single_quotes(0)+', '
|
1540
|
+
# ======================================================================= #
|
1541
|
+
# (7) filesize: the size of the file in question
|
1542
|
+
# ======================================================================= #
|
1543
|
+
string << fasta.filesize.to_s+', '
|
1544
|
+
# ======================================================================= #
|
1545
|
+
# (8) modification_time
|
1546
|
+
# ======================================================================= #
|
1547
|
+
string << pad_with_single_quotes(fasta.modification_time.to_s)+', '
|
1548
|
+
# ======================================================================= #
|
1549
|
+
# (9) n_accession_numbers
|
1550
|
+
# ======================================================================= #
|
1551
|
+
string << fasta.n_entries.to_s+', '
|
1552
|
+
# ======================================================================= #
|
1553
|
+
# (10) path - the filepath in question
|
1554
|
+
# ======================================================================= #
|
1555
|
+
string << pad_with_single_quotes(info.corresponding_datafile)
|
1556
|
+
# ======================================================================= #
|
1557
|
+
# Now we can insert into the postgresql Database.
|
1558
|
+
# ======================================================================= #
|
1559
|
+
string = InsertInto[
|
1560
|
+
'fasta',
|
1561
|
+
'name, taxid, type, lineage_ids, lineage_scientific_name, comment_field, filesize, modification_time, n_accession_numbers, path', # accession_number, dataset,
|
1562
|
+
string
|
1563
|
+
]
|
1564
|
+
e 'Now storing into '+sfile(sql_filename) if be_verbose?
|
1565
|
+
write_what_into(string, sql_filename)
|
1566
|
+
if be_verbose?
|
1567
|
+
e 'Next, we will attempt to read in this file into the '
|
1568
|
+
'Postgresql localome (fasta) table.'
|
1569
|
+
end
|
1570
|
+
cmd = POSTGRE_LOGIN_COMMAND+' -f '+sql_filename
|
1571
|
+
esystem(cmd)
|
1572
|
+
if be_verbose?
|
1573
|
+
e 'Done! All should have been well by now. '+
|
1574
|
+
'The Taxonomic ID was '+info.taxid
|
1575
|
+
end
|
1576
|
+
end
|
1577
|
+
|
1578
|
+
# ========================================================================= #
|
1579
|
+
# === insert_into
|
1580
|
+
# ========================================================================= #
|
1581
|
+
def insert_into(i = :nodes)
|
1582
|
+
i = i.to_s
|
1583
|
+
case i
|
1584
|
+
when 'fasta'
|
1585
|
+
create_fasta_table
|
1586
|
+
when 'nodes'
|
1587
|
+
populate_nodes_table(:be_verbose)
|
1588
|
+
when 'names'
|
1589
|
+
populate_names_table(:be_verbose)
|
1590
|
+
end
|
1591
|
+
end
|
1592
|
+
|
1593
|
+
# ========================================================================= #
|
1594
|
+
# === which_database_to_use?
|
1595
|
+
# ========================================================================= #
|
1596
|
+
def which_database_to_use?
|
1597
|
+
File.read(PROJECT_YAML_DIR+'use_this_database.yml')
|
1598
|
+
end
|
1599
|
+
|
1600
|
+
# ========================================================================= #
|
1601
|
+
# === show_configuration
|
1602
|
+
# ========================================================================= #
|
1603
|
+
def show_configuration
|
1604
|
+
e which_database_to_use?
|
1605
|
+
end
|
1606
|
+
|
1607
|
+
# ========================================================================= #
|
1608
|
+
# === names_sql
|
1609
|
+
# ========================================================================= #
|
1610
|
+
def names_sql
|
1611
|
+
store_here = AUTOGENERATED_SQL_FILES_DIR+'names.sql'
|
1612
|
+
File.delete(store_here) if File.exist? store_here
|
1613
|
+
e 'We will populate the names-table next. This will take a while.'
|
1614
|
+
populate_names_table :be_verbose, store_here
|
1615
|
+
end
|
1616
|
+
|
1617
|
+
# ========================================================================= #
|
1618
|
+
# === nodes_sql
|
1619
|
+
#
|
1620
|
+
# Use this method to generate the nodes.sql file.
|
1621
|
+
# ========================================================================= #
|
1622
|
+
def nodes_sql
|
1623
|
+
store_here = AUTOGENERATED_SQL_FILES_DIR+'nodes.sql'
|
1624
|
+
File.delete(store_here) if File.exist? store_here
|
1625
|
+
e 'We will populate the nodes-table next. This will take a while.'
|
1626
|
+
populate_nodes_table :be_verbose, store_here
|
1627
|
+
end
|
1628
|
+
|
1629
|
+
# ========================================================================= #
|
1630
|
+
# === set_database
|
1631
|
+
# ========================================================================= #
|
1632
|
+
def set_database(i)
|
1633
|
+
i = i.to_s.chomp.delete('"')
|
1634
|
+
if i.empty?
|
1635
|
+
e 'Please provide the type of the database you wish to '\
|
1636
|
+
'use, such as "postgresql".'
|
1637
|
+
else
|
1638
|
+
case i
|
1639
|
+
when 'postgre'
|
1640
|
+
i = 'postgresql'
|
1641
|
+
end
|
1642
|
+
i = i.downcase
|
1643
|
+
e 'Storing '+i+' into '+sfile(FILE_USE_THIS_DATABASE)+'.'
|
1644
|
+
write_what_into(i, FILE_USE_THIS_DATABASE)
|
1645
|
+
end
|
1646
|
+
end
|
1647
|
+
|
1648
|
+
# ========================================================================= #
|
1649
|
+
# === create_fasta_table
|
1650
|
+
#
|
1651
|
+
# This method generates the fasta table into either postgresql or
|
1652
|
+
# whatever else you stored things into.
|
1653
|
+
# ========================================================================= #
|
1654
|
+
def create_fasta_table
|
1655
|
+
remove(FASTA_SQL)
|
1656
|
+
# e 'Now storing into fasta.sql (Fasta-Format).'
|
1657
|
+
create_and_save_table('fasta', true) # true for verbose
|
1658
|
+
cat FASTA_SQL # Also display it, after creation.
|
1659
|
+
# Next, drop the old Fasta table. But we must set the password before.
|
1660
|
+
e 'Trying to drop the fasta table next.'
|
1661
|
+
drop_table :fasta
|
1662
|
+
# And now, read in the new one.
|
1663
|
+
read_sql :fasta
|
1664
|
+
end
|
1665
|
+
|
1666
|
+
# ========================================================================= #
|
1667
|
+
# === set_commandline_arguments
|
1668
|
+
# ========================================================================= #
|
1669
|
+
def set_commandline_arguments(i = nil)
|
1670
|
+
i = [i] unless i.is_a? Array
|
1671
|
+
@commandline_arguments = i # Must be an Array.
|
1672
|
+
end
|
1673
|
+
|
1674
|
+
# ========================================================================= #
|
1675
|
+
# === do_startup_actions
|
1676
|
+
# ========================================================================= #
|
1677
|
+
def do_startup_actions
|
1678
|
+
if be_verbose?
|
1679
|
+
e Colours.rev+'Welcome to interactive Taxonomy.'+N+N
|
1680
|
+
e 'We will show the help menu now (on startup).'+N+N
|
1681
|
+
show_help
|
1682
|
+
e 'Please input your command(s). ("help" for help, '+
|
1683
|
+
'"quit", "exit" or "q" to exit.)'
|
1684
|
+
nohelp if at_home? # At home I won't use this here.
|
1685
|
+
end
|
1686
|
+
check_commandline_arguments
|
1687
|
+
end
|
1688
|
+
|
1689
|
+
# ========================================================================= #
|
1690
|
+
# === check_commandline_arguments
|
1691
|
+
#
|
1692
|
+
# We check the commandline.
|
1693
|
+
# ========================================================================= #
|
1694
|
+
def check_commandline_arguments
|
1695
|
+
@commandline_arguments.each {|entry|
|
1696
|
+
case entry
|
1697
|
+
when :run_connected
|
1698
|
+
do_run_connected # This mode is for when we embed it in the BioroebeShell.
|
1699
|
+
end
|
1700
|
+
}
|
1701
|
+
end
|
1702
|
+
|
1703
|
+
# ========================================================================= #
|
1704
|
+
# === fetch_user_input_via_loop
|
1705
|
+
# ========================================================================= #
|
1706
|
+
def fetch_user_input_via_loop
|
1707
|
+
loop {
|
1708
|
+
begin
|
1709
|
+
obtain_user_input # This sets the @user_input variable.
|
1710
|
+
rescue Interrupt
|
1711
|
+
if be_verbose?
|
1712
|
+
e 'Sigint occurred by user interrupt, exiting gracefully now.'
|
1713
|
+
end
|
1714
|
+
exit_program
|
1715
|
+
end
|
1716
|
+
if run_connected?
|
1717
|
+
break if VALID_WAYS_TO_EXIT.include? @user_input
|
1718
|
+
end
|
1719
|
+
}
|
1720
|
+
end
|
1721
|
+
|
1722
|
+
# ========================================================================= #
|
1723
|
+
# === run_standalone?
|
1724
|
+
# ========================================================================= #
|
1725
|
+
def run_standalone?
|
1726
|
+
@run_standalone
|
1727
|
+
end
|
1728
|
+
|
1729
|
+
# ========================================================================= #
|
1730
|
+
# === run_connected?
|
1731
|
+
# ========================================================================= #
|
1732
|
+
def run_connected?
|
1733
|
+
! run_standalone?
|
1734
|
+
end
|
1735
|
+
|
1736
|
+
# ========================================================================= #
|
1737
|
+
# === exit_program
|
1738
|
+
#
|
1739
|
+
# Use this method when exiting.
|
1740
|
+
# ========================================================================= #
|
1741
|
+
def exit_program
|
1742
|
+
exit if run_standalone?
|
1743
|
+
end
|
1744
|
+
|
1745
|
+
# ========================================================================= #
|
1746
|
+
# === show_sql_commands (sql tag)
|
1747
|
+
#
|
1748
|
+
# This method will feedback the SQL commands to create our postgre
|
1749
|
+
# table and also show the Insert commands used.
|
1750
|
+
# ========================================================================= #
|
1751
|
+
def show_sql_commands
|
1752
|
+
e N+'The three commands to create the '+
|
1753
|
+
simp('PostgreSQL database')+' are:'+N+N
|
1754
|
+
e ' (1) names table:'+N+N
|
1755
|
+
efancy ' '+create_table(:names).to_s
|
1756
|
+
e N+' (2) nodes table:'+N+N
|
1757
|
+
efancy ' '+create_table(:nodes).to_s
|
1758
|
+
e N+' (3) fasta table:'+N+N
|
1759
|
+
efancy ' '+create_table(:fasta).to_s
|
1760
|
+
end; alias sql? show_sql_commands # === sql?
|
1761
|
+
alias table_names? show_sql_commands # === table_names?
|
1762
|
+
|
1763
|
+
# ========================================================================= #
|
1764
|
+
# === create_table
|
1765
|
+
#
|
1766
|
+
# A wrapper over class CreateTable.
|
1767
|
+
# ========================================================================= #
|
1768
|
+
def create_table(i)
|
1769
|
+
i = i.to_s
|
1770
|
+
case i
|
1771
|
+
when 'fasta'
|
1772
|
+
create_fasta_table
|
1773
|
+
else
|
1774
|
+
i << '.sql' # Append .sql in this case.
|
1775
|
+
result = ::SqlParadise::Commands.create_table(i)
|
1776
|
+
store_here = AUTOGENERATED_SQL_FILES_DIR+'create_table_'+i
|
1777
|
+
e 'Next creating a table for '+simp(i)+'. Will '+
|
1778
|
+
'store into '+sfile(store_here)
|
1779
|
+
save_what_into(result, store_here)
|
1780
|
+
e result
|
1781
|
+
end
|
1782
|
+
end
|
1783
|
+
|
1784
|
+
# ========================================================================= #
|
1785
|
+
# === enable
|
1786
|
+
# ========================================================================= #
|
1787
|
+
def enable(i)
|
1788
|
+
i = i.to_s
|
1789
|
+
if i.start_with? 'enable'
|
1790
|
+
i.gsub!(/^enable/,'')
|
1791
|
+
end
|
1792
|
+
i.strip!
|
1793
|
+
case i
|
1794
|
+
when 'colours'
|
1795
|
+
enable_colours
|
1796
|
+
end
|
1797
|
+
end
|
1798
|
+
|
1799
|
+
# ========================================================================= #
|
1800
|
+
# === show_sql_commands_only
|
1801
|
+
# ========================================================================= #
|
1802
|
+
def show_sql_commands_only
|
1803
|
+
e Commands.create_table :taxonomy_nodes
|
1804
|
+
e
|
1805
|
+
e Commands.create_table :taxonomy_names
|
1806
|
+
e
|
1807
|
+
e Commands.create_table :taxonomy_fasta
|
1808
|
+
e
|
1809
|
+
end
|
1810
|
+
|
1811
|
+
# ========================================================================= #
|
1812
|
+
# === enable_colours
|
1813
|
+
# ========================================================================= #
|
1814
|
+
def enable_colours
|
1815
|
+
@use_colours = true
|
1816
|
+
Taxonomy.enable_colours
|
1817
|
+
end
|
1818
|
+
|
1819
|
+
# ========================================================================= #
|
1820
|
+
# === create_database
|
1821
|
+
# ========================================================================= #
|
1822
|
+
def create_database
|
1823
|
+
case which_database_to_use?
|
1824
|
+
when 'sqlite'
|
1825
|
+
one = 'CREATE TABLE names ( taxid int, name_txt varchar(155), unique_name varchar(100), name_class varchar(25) );'
|
1826
|
+
two = 'CREATE TABLE nodes ( taxid int, parent_taxid int, rank varchar(25) );'
|
1827
|
+
three = 'CREATE TABLE fasta ( name varchar(80), taxid integer, type smallint, lineage_ids varchar(500), lineage_scientific_name varchar(2500), comment_field varchar(8000), filesize integer, modification_time varchar(25), n_accession_numbers integer, path varchar(120) );'
|
1828
|
+
e one
|
1829
|
+
e two
|
1830
|
+
e three
|
1831
|
+
_ = 'sqlite3 '+working_dir?+'names.db "'+one+'"'
|
1832
|
+
esystem _
|
1833
|
+
_ = 'sqlite3 '+working_dir?+'nodes.db "'+two+'"'
|
1834
|
+
esystem _
|
1835
|
+
_ = 'sqlite3 '+working_dir?+'fasta.db "'+three+'"'
|
1836
|
+
esystem _
|
1837
|
+
File.read(Taxonomy::Constants::NAMES_SQL).each_line {|line|
|
1838
|
+
line = line.chomp
|
1839
|
+
if line.include? '"'
|
1840
|
+
line.gsub!(/"/,'\"')
|
1841
|
+
end
|
1842
|
+
_ = 'sqlite3 '+working_dir?+'names.db "'+line+'"'
|
1843
|
+
Esystem.esystem _
|
1844
|
+
}
|
1845
|
+
else # default to postgresql for now.
|
1846
|
+
end
|
1847
|
+
end
|
1848
|
+
|
1849
|
+
# ========================================================================= #
|
1850
|
+
# === download
|
1851
|
+
#
|
1852
|
+
# This method can be used to download the ncbi database.
|
1853
|
+
# ========================================================================= #
|
1854
|
+
def download(i = f?)
|
1855
|
+
i = i.to_s
|
1856
|
+
case i # case tag
|
1857
|
+
# ======================================================================= #
|
1858
|
+
# === --help
|
1859
|
+
# ======================================================================= #
|
1860
|
+
when 'HELP','?','--help'
|
1861
|
+
e 'Currently we can download only the NCBI database.'
|
1862
|
+
e
|
1863
|
+
e 'To do so, do:'
|
1864
|
+
e ' download ncbi'
|
1865
|
+
# ======================================================================= #
|
1866
|
+
# === --ncbi
|
1867
|
+
# ======================================================================= #
|
1868
|
+
when 'ncbi','--ncbi','database','1','' # '' is also a default here.
|
1869
|
+
download_ncbi_database
|
1870
|
+
if be_verbose?
|
1871
|
+
e
|
1872
|
+
e 'If all went well then you could now try to update the '+
|
1873
|
+
'database, by issuing:'
|
1874
|
+
e
|
1875
|
+
efancy ' update databases'
|
1876
|
+
e
|
1877
|
+
end
|
1878
|
+
else
|
1879
|
+
e 'In the method download(), we do not know the given '+
|
1880
|
+
'input `'+simp(i.to_s)+'`.'
|
1881
|
+
end
|
1882
|
+
end
|
1883
|
+
|
1884
|
+
# ========================================================================= #
|
1885
|
+
# === download_ncbi_database
|
1886
|
+
# ========================================================================= #
|
1887
|
+
def download_ncbi_database
|
1888
|
+
Taxonomy.download_ncbi_database # bl $TAXONOMY/class_methods.rb
|
1889
|
+
end
|
1890
|
+
|
1891
|
+
# ========================================================================= #
|
1892
|
+
# === add_comment (comment tag)
|
1893
|
+
#
|
1894
|
+
# Use this method to add a comment to the localome table.
|
1895
|
+
# ========================================================================= #
|
1896
|
+
def add_comment(taxid)
|
1897
|
+
if has? taxid
|
1898
|
+
e 'Please input the comment you wish to add to the '+
|
1899
|
+
'Taxonomy ID '+simp(taxid.to_s)+' now:'
|
1900
|
+
add_this = $stdin.gets.chomp
|
1901
|
+
_ = "UPDATE fasta SET comment_field='"+add_this+"' WHERE taxid='"+taxid.to_s+"'"
|
1902
|
+
run_sql(_)
|
1903
|
+
else
|
1904
|
+
erev "We could not find any entry with the Taxonomy "\
|
1905
|
+
"ID #{taxid.to_s}."
|
1906
|
+
end
|
1907
|
+
end
|
1908
|
+
|
1909
|
+
# ========================================================================= #
|
1910
|
+
# === run (run tag)
|
1911
|
+
# ========================================================================= #
|
1912
|
+
def run
|
1913
|
+
do_startup_actions
|
1914
|
+
fetch_user_input_via_loop
|
1915
|
+
end; alias run_in_interactive_mode run # === run_in_interactive_mode
|
1916
|
+
|
1917
|
+
end
|
1918
|
+
|
1919
|
+
# ========================================================================= #
|
1920
|
+
# === Taxonomy.interactive
|
1921
|
+
#
|
1922
|
+
# Invoke this method if you wish to directly invoke the interactive
|
1923
|
+
# component of the Taxonomy module.
|
1924
|
+
# ========================================================================= #
|
1925
|
+
def self.interactive(i = nil)
|
1926
|
+
::Bioroebe::Taxonomy::Interactive.new(i)
|
1927
|
+
end
|
1928
|
+
|
1929
|
+
end; end
|
1930
|
+
|
1931
|
+
if __FILE__ == $PROGRAM_NAME
|
1932
|
+
Bioroebe::Taxonomy::Interactive.run_interactive(ARGV)
|
1933
|
+
end # itax; Taxonomy.interactive
|