bioroebe 0.10.80 → 0.12.24
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +3946 -2817
- data/bin/bioroebe +13 -2
- data/bin/bioroebe_hash +7 -0
- data/bin/codon_to_aminoacid +6 -4
- data/bin/compacter +7 -0
- data/bin/plain_palindrome +7 -0
- data/bioroebe.gemspec +3 -3
- data/doc/README.gen +3918 -2793
- data/doc/quality_control/commandline_applications.md +3 -3
- data/doc/statistics/statistics.md +7 -7
- data/doc/todo/bioroebe_GUI_todo.md +19 -14
- data/doc/todo/bioroebe_java_todo.md +22 -0
- data/doc/todo/bioroebe_todo.md +2075 -2620
- data/lib/bioroebe/C++/DNA.cpp +69 -0
- data/lib/bioroebe/C++/RNA.cpp +58 -0
- data/lib/bioroebe/C++/sequence.cpp +35 -0
- data/lib/bioroebe/abstract/README.md +1 -0
- data/lib/bioroebe/abstract/features.rb +29 -0
- data/lib/bioroebe/aminoacids/aminoacid_substitution.rb +1 -9
- data/lib/bioroebe/aminoacids/codon_percentage.rb +1 -9
- data/lib/bioroebe/aminoacids/deduce_aminoacid_sequence.rb +1 -9
- data/lib/bioroebe/aminoacids/display_aminoacid_table.rb +1 -0
- data/lib/bioroebe/aminoacids/show_hydrophobicity.rb +1 -6
- data/lib/bioroebe/base/base_module/base_module.rb +36 -0
- data/lib/bioroebe/base/colours_for_base/colours_for_base.rb +18 -8
- data/lib/bioroebe/base/commandline_application/commandline_application.rb +13 -9
- data/lib/bioroebe/base/commandline_application/commandline_arguments.rb +24 -19
- data/lib/bioroebe/base/commandline_application/misc.rb +66 -49
- data/lib/bioroebe/base/commandline_application/opn.rb +8 -8
- data/lib/bioroebe/base/commandline_application/reset.rb +5 -3
- data/lib/bioroebe/base/internal_hash_module/internal_hash_module.rb +42 -0
- data/lib/bioroebe/base/misc.rb +35 -0
- data/lib/bioroebe/base/prototype/misc.rb +15 -9
- data/lib/bioroebe/base/prototype/reset.rb +10 -0
- data/lib/bioroebe/cleave_and_digest/digestion.rb +10 -2
- data/lib/bioroebe/cleave_and_digest/trypsin.rb +104 -50
- data/lib/bioroebe/codon_tables/frequencies/parse_frequency_table.rb +2 -10
- data/lib/bioroebe/codons/codons.rb +1 -1
- data/lib/bioroebe/codons/convert_this_codon_to_that_aminoacid.rb +208 -59
- data/lib/bioroebe/codons/possible_codons_for_this_aminoacid.rb +1 -9
- data/lib/bioroebe/codons/show_codon_tables.rb +8 -3
- data/lib/bioroebe/codons/show_codon_usage.rb +15 -4
- data/lib/bioroebe/colours/rev.rb +4 -1
- data/lib/bioroebe/constants/aminoacids_and_proteins.rb +1 -0
- data/lib/bioroebe/constants/database_constants.rb +1 -1
- data/lib/bioroebe/constants/files_and_directories.rb +31 -4
- data/lib/bioroebe/constants/misc.rb +20 -0
- data/lib/bioroebe/constants/nucleotides.rb +7 -0
- data/lib/bioroebe/conversions/dna_to_aminoacid_sequence.rb +109 -39
- data/lib/bioroebe/count/count_amount_of_aminoacids.rb +3 -2
- data/lib/bioroebe/count/count_amount_of_nucleotides.rb +3 -0
- data/lib/bioroebe/cpp +1 -0
- data/lib/bioroebe/crystal/README.md +2 -0
- data/lib/bioroebe/crystal/to_rna.cr +19 -0
- data/lib/bioroebe/data/README.md +11 -8
- data/lib/bioroebe/data/electron_microscopy/pos_example.pos +396 -0
- data/lib/bioroebe/data/electron_microscopy/test_particles.star +36 -0
- data/lib/bioroebe/data/fasta/human/Homo_sapiens_hemoglobin_subunit_alpha_HBB_mRNA.fasta +9 -0
- data/lib/bioroebe/data/fasta/human/Homo_sapiens_hemoglobin_subunit_beta_HBB_mRNA.fasta +8 -0
- data/lib/bioroebe/data/fasta/human/README.md +2 -0
- data/lib/bioroebe/dotplots/advanced_dotplot.rb +1 -1
- data/lib/bioroebe/electron_microscopy/coordinate_analyzer.rb +15 -18
- data/lib/bioroebe/{fasta_and_fastq/parse_fasta/run.rb → electron_microscopy/electron_microscopy_module.rb} +16 -8
- data/lib/bioroebe/electron_microscopy/fix_pos_file.rb +1 -9
- data/lib/bioroebe/electron_microscopy/flipy.rb +83 -0
- data/lib/bioroebe/electron_microscopy/parse_coordinates.rb +2 -10
- data/lib/bioroebe/electron_microscopy/read_file_xmd.rb +1 -9
- data/lib/bioroebe/electron_microscopy/simple_star_file_generator.rb +4 -9
- data/lib/bioroebe/enzymes/has_this_restriction_enzyme.rb +10 -3
- data/lib/bioroebe/enzymes/restriction_enzyme.rb +23 -1
- data/lib/bioroebe/enzymes/restriction_enzymes/statistics.rb +65 -0
- data/lib/bioroebe/fasta_and_fastq/autocorrect_the_name_of_this_fasta_file.rb +1 -9
- data/lib/bioroebe/fasta_and_fastq/compact_fasta_file/compact_fasta_file.rb +7 -9
- data/lib/bioroebe/fasta_and_fastq/fasta_defline/fasta_defline.rb +1 -5
- data/lib/bioroebe/fasta_and_fastq/fasta_to_yaml/fasta_to_yaml.rb +81 -0
- data/lib/bioroebe/fasta_and_fastq/parse_fasta/parse_fasta.rb +1518 -7
- data/lib/bioroebe/fasta_and_fastq/return_fasta_subsection_of_this_file.rb +11 -2
- data/lib/bioroebe/fasta_and_fastq/show_fasta_headers.rb +27 -12
- data/lib/bioroebe/fasta_and_fastq/simplify_fasta_header/simplify_fasta_header.rb +1 -5
- data/lib/bioroebe/fasta_and_fastq/split_this_fasta_file_into_chromosomes/constants.rb +0 -5
- data/lib/bioroebe/genome/README.md +4 -0
- data/lib/bioroebe/genome/genome.rb +130 -0
- data/lib/bioroebe/genomes/genome_pattern.rb +3 -9
- data/lib/bioroebe/gui/gtk +1 -0
- data/lib/bioroebe/gui/gtk3/alignment/alignment.rb +106 -137
- data/lib/bioroebe/gui/gtk3/aminoacid_composition/aminoacid_composition.rb +27 -61
- data/lib/bioroebe/gui/gtk3/aminoacid_composition/customized_dialog.rb +1 -1
- data/lib/bioroebe/gui/gtk3/blosum_matrix_viewer/blosum_matrix_viewer.rb +1 -2
- data/lib/bioroebe/gui/gtk3/calculate_cell_numbers_of_bacteria/calculate_cell_numbers_of_bacteria.rb +1 -2
- data/lib/bioroebe/gui/gtk3/controller/controller.rb +46 -29
- data/lib/bioroebe/gui/gtk3/dna_to_aminoacid_widget/dna_to_aminoacid_widget.rb +77 -52
- data/lib/bioroebe/gui/gtk3/dna_to_reverse_complement_widget/dna_to_reverse_complement_widget.rb +1 -2
- data/lib/bioroebe/gui/gtk3/fasta_table_widget/fasta_table_widget.rb +100 -23
- data/lib/bioroebe/gui/gtk3/format_converter/format_converter.rb +1 -2
- data/lib/bioroebe/gui/gtk3/gene/gene.rb +1 -2
- data/lib/bioroebe/gui/gtk3/hamming_distance/hamming_distance.rb +43 -30
- data/lib/bioroebe/gui/gtk3/levensthein_distance/levensthein_distance.rb +1 -2
- data/lib/bioroebe/gui/gtk3/nucleotide_analyser/nucleotide_analyser.rb +120 -73
- data/lib/bioroebe/gui/gtk3/primer_design_widget/primer_design_widget.rb +1 -2
- data/lib/bioroebe/gui/gtk3/protein_to_DNA/protein_to_DNA.rb +19 -20
- data/lib/bioroebe/gui/gtk3/random_sequence/random_sequence.rb +20 -13
- data/lib/bioroebe/gui/gtk3/restriction_enzymes/restriction_enzymes.rb +1 -2
- data/lib/bioroebe/gui/gtk3/show_codon_table/misc.rb +97 -22
- data/lib/bioroebe/gui/gtk3/show_codon_table/show_codon_table.rb +3 -73
- data/lib/bioroebe/gui/gtk3/show_codon_usage/show_codon_usage.rb +1 -2
- data/lib/bioroebe/gui/gtk3/sizeseq/sizeseq.rb +1 -2
- data/lib/bioroebe/gui/gtk3/three_to_one/three_to_one.rb +1 -2
- data/lib/bioroebe/gui/gtk3/www_finder/www_finder.rb +1 -2
- data/lib/bioroebe/gui/javafx/bioroebe/Bioroebe.class +0 -0
- data/lib/bioroebe/gui/javafx/bioroebe/Bioroebe.java +104 -0
- data/lib/bioroebe/gui/javafx/bioroebe.jar +0 -0
- data/lib/bioroebe/gui/javafx/bioroebe.mf +1 -0
- data/lib/bioroebe/gui/javafx/module-info.class +0 -0
- data/lib/bioroebe/gui/javafx/module-info.java +5 -0
- data/lib/bioroebe/gui/jruby/alignment/alignment.rb +165 -0
- data/lib/bioroebe/gui/jruby/aminoacid_composition/aminoacid_composition.rb +166 -0
- data/lib/bioroebe/gui/libui/alignment/alignment.rb +3 -1
- data/lib/bioroebe/gui/libui/controller/controller.rb +116 -0
- data/lib/bioroebe/gui/libui/random_sequence/random_sequence.rb +18 -2
- data/lib/bioroebe/gui/libui/show_codon_table/show_codon_table.rb +2 -0
- data/lib/bioroebe/gui/libui/three_to_one/three_to_one.rb +8 -6
- data/lib/bioroebe/gui/shared_code/alignment/alignment_module.rb +102 -0
- data/lib/bioroebe/gui/shared_code/aminoacid_composition/aminoacid_composition_module.rb +94 -0
- data/lib/bioroebe/gui/shared_code/levensthein_distance/levensthein_distance_module.rb +18 -16
- data/lib/bioroebe/gui/shared_code/protein_to_DNA/protein_to_DNA_module.rb +14 -14
- data/lib/bioroebe/gui/swing/three_to_one/ThreeToOne$1.class +0 -0
- data/lib/bioroebe/gui/swing/three_to_one/ThreeToOne$CloseListener.class +0 -0
- data/lib/bioroebe/gui/swing/three_to_one/ThreeToOne.class +0 -0
- data/lib/bioroebe/gui/swing/three_to_one/ThreeToOne.java +141 -0
- data/lib/bioroebe/images/FORWARD_PRIMER.png +0 -0
- data/lib/bioroebe/images/REVERSE_PRIMER.png +0 -0
- data/lib/bioroebe/images/images.html +29845 -0
- data/lib/bioroebe/java/README.md +5 -0
- data/lib/bioroebe/java/bioroebe/AllInOne.java +1 -0
- data/lib/bioroebe/java/bioroebe/Base.class +0 -0
- data/lib/bioroebe/java/bioroebe/Base.java +39 -5
- data/lib/bioroebe/java/bioroebe/IsPalindrome.java +23 -5
- data/lib/bioroebe/java/bioroebe/SanitizeNucleotideSequence.java +0 -0
- data/lib/bioroebe/java/bioroebe/Sequence.java +28 -3
- data/lib/bioroebe/java/bioroebe/ToCamelcase.class +0 -0
- data/lib/bioroebe/java/bioroebe/ToCamelcase.java +16 -4
- data/lib/bioroebe/java/bioroebe/ToRNA.java +43 -0
- data/lib/bioroebe/java/bioroebe/ToplevelMethods.java +6 -0
- data/lib/bioroebe/java/bioroebe/{BisulfiteTreatment.class → src/BisulfiteTreatment.class} +0 -0
- data/lib/bioroebe/java/bioroebe/{Codons.class → src/Codons.class} +0 -0
- data/lib/bioroebe/java/bioroebe/src/Codons.java +35 -0
- data/lib/bioroebe/java/bioroebe/src/Commandline.class +0 -0
- data/lib/bioroebe/java/bioroebe/src/Commandline.java +101 -0
- data/lib/bioroebe/java/bioroebe/{Esystem.class → src/Esystem.class} +0 -0
- data/lib/bioroebe/java/bioroebe/{Esystem.java → src/Esystem.java} +6 -1
- data/lib/bioroebe/java/bioroebe/{GenerateRandomDnaSequence.class → src/GenerateRandomDnaSequence.class} +0 -0
- data/lib/bioroebe/java/bioroebe/{GenerateRandomDnaSequence.java → src/GenerateRandomDnaSequence.java} +8 -2
- data/lib/bioroebe/java/bioroebe/src/PartnerNucleotide.class +0 -0
- data/lib/bioroebe/java/bioroebe/src/PartnerNucleotide.java +56 -0
- data/lib/bioroebe/java/bioroebe/{RemoveFile.java → src/RemoveFile.java} +10 -4
- data/lib/bioroebe/java/bioroebe/{RemoveNumbers.class → src/RemoveNumbers.class} +0 -0
- data/lib/bioroebe/java/bioroebe/{RemoveNumbers.java → src/RemoveNumbers.java} +1 -0
- data/lib/bioroebe/java/bioroebe/src/toplevel_methods/BaseComposition.class +0 -0
- data/lib/bioroebe/java/bioroebe/src/toplevel_methods/BaseComposition.java +75 -0
- data/lib/bioroebe/misc/ruler.rb +11 -2
- data/lib/bioroebe/nucleotides/most_likely_nucleotide_sequence_for_this_aminoacid_sequence.rb +1 -9
- data/lib/bioroebe/nucleotides/sanitize_nucleotide_sequence.rb +59 -18
- data/lib/bioroebe/nucleotides/show_nucleotide_sequence.rb +7 -7
- data/lib/bioroebe/parsers/genbank_parser.rb +347 -26
- data/lib/bioroebe/parsers/gff.rb +1 -9
- data/lib/bioroebe/patterns/scan_for_repeat.rb +1 -5
- data/lib/bioroebe/pdb/fetch_fasta_sequence_from_pdb.rb +1 -9
- data/lib/bioroebe/pdb/parse_mmCIF_file.rb +1 -9
- data/lib/bioroebe/pdb/parse_pdb_file.rb +4 -10
- data/lib/bioroebe/project/project.rb +1 -1
- data/lib/bioroebe/python/README.md +1 -0
- data/lib/bioroebe/python/__pycache__/mymodule.cpython-39.pyc +0 -0
- data/lib/bioroebe/python/gui/gtk3/all_in_one.css +4 -0
- data/lib/bioroebe/python/gui/gtk3/all_in_one.py +59 -0
- data/lib/bioroebe/python/gui/gtk3/widget1.py +20 -0
- data/lib/bioroebe/python/gui/tkinter/all_in_one.py +91 -0
- data/lib/bioroebe/python/mymodule.py +8 -0
- data/lib/bioroebe/python/protein_to_dna.py +33 -0
- data/lib/bioroebe/python/shell/shell.py +19 -0
- data/lib/bioroebe/python/to_rna.py +14 -0
- data/lib/bioroebe/python/toplevel_methods/convert_dna_to_aminoacid_sequence.py +137 -0
- data/lib/bioroebe/python/toplevel_methods/esystem.py +12 -0
- data/lib/bioroebe/python/toplevel_methods/open_in_browser.py +20 -0
- data/lib/bioroebe/python/toplevel_methods/palindromes.py +52 -0
- data/lib/bioroebe/python/toplevel_methods/rds.py +13 -0
- data/lib/bioroebe/python/toplevel_methods/shuffleseq.py +23 -0
- data/lib/bioroebe/python/toplevel_methods/three_delimiter.py +37 -0
- data/lib/bioroebe/python/toplevel_methods/time_and_date.py +43 -0
- data/lib/bioroebe/python/toplevel_methods/to_camelcase.py +21 -0
- data/lib/bioroebe/requires/require_cleave_and_digest.rb +3 -1
- data/lib/bioroebe/requires/require_the_bioroebe_project.rb +3 -1
- data/lib/bioroebe/sequence/alignment.rb +14 -4
- data/lib/bioroebe/sequence/dna.rb +1 -0
- data/lib/bioroebe/sequence/nucleotide_module/nucleotide_module.rb +28 -25
- data/lib/bioroebe/sequence/protein.rb +105 -3
- data/lib/bioroebe/sequence/rna.rb +220 -0
- data/lib/bioroebe/sequence/sequence.rb +128 -40
- data/lib/bioroebe/shell/menu.rb +3815 -3696
- data/lib/bioroebe/shell/misc.rb +9019 -3133
- data/lib/bioroebe/shell/readline/readline.rb +1 -1
- data/lib/bioroebe/shell/shell.rb +1137 -28
- data/lib/bioroebe/siRNA/siRNA.rb +81 -1
- data/lib/bioroebe/string_matching/find_longest_substring.rb +3 -2
- data/lib/bioroebe/string_matching/hamming_distance.rb +1 -9
- data/lib/bioroebe/taxonomy/class_methods.rb +3 -8
- data/lib/bioroebe/taxonomy/constants.rb +4 -3
- data/lib/bioroebe/taxonomy/edit.rb +2 -1
- data/lib/bioroebe/taxonomy/help/help.rb +10 -10
- data/lib/bioroebe/taxonomy/help/helpline.rb +2 -2
- data/lib/bioroebe/taxonomy/info/check_available.rb +15 -9
- data/lib/bioroebe/taxonomy/info/info.rb +18 -11
- data/lib/bioroebe/taxonomy/info/is_dna.rb +46 -36
- data/lib/bioroebe/taxonomy/interactive.rb +140 -104
- data/lib/bioroebe/taxonomy/menu.rb +27 -18
- data/lib/bioroebe/taxonomy/parse_fasta.rb +3 -1
- data/lib/bioroebe/taxonomy/shared.rb +1 -0
- data/lib/bioroebe/taxonomy/taxonomy.rb +1 -0
- data/lib/bioroebe/toplevel_methods/aminoacids_and_proteins.rb +31 -24
- data/lib/bioroebe/toplevel_methods/colourize_related_methods.rb +164 -0
- data/lib/bioroebe/toplevel_methods/databases.rb +1 -1
- data/lib/bioroebe/toplevel_methods/digest.rb +18 -8
- data/lib/bioroebe/toplevel_methods/fasta_and_fastq.rb +107 -63
- data/lib/bioroebe/toplevel_methods/file_and_directory_related_actions.rb +14 -2
- data/lib/bioroebe/toplevel_methods/frequencies.rb +8 -1
- data/lib/bioroebe/toplevel_methods/misc.rb +175 -11
- data/lib/bioroebe/toplevel_methods/nucleotides.rb +118 -46
- data/lib/bioroebe/toplevel_methods/open_in_browser.rb +2 -0
- data/lib/bioroebe/toplevel_methods/palindromes.rb +75 -47
- data/lib/bioroebe/toplevel_methods/taxonomy.rb +3 -3
- data/lib/bioroebe/toplevel_methods/to_camelcase.rb +5 -0
- data/lib/bioroebe/utility_scripts/align_open_reading_frames.rb +1 -9
- data/lib/bioroebe/utility_scripts/check_for_mismatches/check_for_mismatches.rb +1 -9
- data/lib/bioroebe/utility_scripts/compacter/compacter.rb +251 -0
- data/lib/bioroebe/utility_scripts/compseq/compseq.rb +1 -9
- data/lib/bioroebe/utility_scripts/consensus_sequence.rb +6 -6
- data/lib/bioroebe/utility_scripts/create_batch_entrez_file.rb +1 -9
- data/lib/bioroebe/utility_scripts/dot_alignment.rb +1 -9
- data/lib/bioroebe/utility_scripts/move_file_to_its_correct_location.rb +1 -4
- data/lib/bioroebe/utility_scripts/parse_taxonomy.rb +2 -2
- data/lib/bioroebe/utility_scripts/permutations.rb +36 -9
- data/lib/bioroebe/utility_scripts/showorf/constants.rb +0 -5
- data/lib/bioroebe/utility_scripts/showorf/reset.rb +1 -4
- data/lib/bioroebe/version/version.rb +2 -2
- data/lib/bioroebe/www/embeddable_interface.rb +121 -58
- data/lib/bioroebe/www/sinatra/sinatra.rb +186 -71
- data/lib/bioroebe/yaml/aminoacids/amino_acids_long_name_to_one_letter.yml +2 -2
- data/lib/bioroebe/yaml/aminoacids/weight_of_common_proteins.yml +17 -17
- data/lib/bioroebe/yaml/configuration/browser.yml +1 -1
- data/lib/bioroebe/yaml/configuration/temp_dir.yml +1 -1
- data/lib/bioroebe/yaml/consensus_sequences/consensus_sequences.yml +1 -0
- data/lib/bioroebe/yaml/genomes/README.md +3 -4
- data/lib/bioroebe/yaml/nucleotides/nucleotides.yml +5 -0
- data/lib/bioroebe/yaml/restriction_enzymes/restriction_enzymes.yml +57 -57
- data/spec/README.md +6 -0
- data/spec/project_wide_specification/classes.md +5 -0
- metadata +107 -70
- data/doc/setup.rb +0 -1655
- data/lib/bioroebe/fasta_and_fastq/parse_fasta/constants.rb +0 -50
- data/lib/bioroebe/fasta_and_fastq/parse_fasta/initialize.rb +0 -86
- data/lib/bioroebe/fasta_and_fastq/parse_fasta/menu.rb +0 -117
- data/lib/bioroebe/fasta_and_fastq/parse_fasta/misc.rb +0 -981
- data/lib/bioroebe/fasta_and_fastq/parse_fasta/report.rb +0 -156
- data/lib/bioroebe/fasta_and_fastq/parse_fasta/reset.rb +0 -128
- data/lib/bioroebe/genbank/genbank_parser.rb +0 -291
- data/lib/bioroebe/java/bioroebe/AllInOne.class +0 -0
- data/lib/bioroebe/java/bioroebe/Cat.class +0 -0
- data/lib/bioroebe/java/bioroebe/Codons.java +0 -22
- data/lib/bioroebe/java/bioroebe/IsPalindrome.class +0 -0
- data/lib/bioroebe/java/bioroebe/PartnerNucleotide.class +0 -0
- data/lib/bioroebe/java/bioroebe/PartnerNucleotide.java +0 -19
- data/lib/bioroebe/java/bioroebe/SanitizeNucleotideSequence.class +0 -0
- data/lib/bioroebe/java/bioroebe/ToplevelMethods.class +0 -0
- data/lib/bioroebe/java/bioroebe.jar +0 -0
- data/lib/bioroebe/shell/add.rb +0 -108
- data/lib/bioroebe/shell/assign.rb +0 -360
- data/lib/bioroebe/shell/chop_and_cut.rb +0 -281
- data/lib/bioroebe/shell/constants.rb +0 -166
- data/lib/bioroebe/shell/download.rb +0 -335
- data/lib/bioroebe/shell/enable_and_disable.rb +0 -158
- data/lib/bioroebe/shell/enzymes.rb +0 -310
- data/lib/bioroebe/shell/fasta.rb +0 -345
- data/lib/bioroebe/shell/gtk.rb +0 -76
- data/lib/bioroebe/shell/history.rb +0 -132
- data/lib/bioroebe/shell/initialize.rb +0 -217
- data/lib/bioroebe/shell/loop.rb +0 -74
- data/lib/bioroebe/shell/prompt.rb +0 -107
- data/lib/bioroebe/shell/random.rb +0 -289
- data/lib/bioroebe/shell/reset.rb +0 -335
- data/lib/bioroebe/shell/scan_and_parse.rb +0 -135
- data/lib/bioroebe/shell/search.rb +0 -337
- data/lib/bioroebe/shell/sequences.rb +0 -200
- data/lib/bioroebe/shell/show_report_and_display.rb +0 -2901
- data/lib/bioroebe/shell/startup.rb +0 -127
- data/lib/bioroebe/shell/taxonomy.rb +0 -14
- data/lib/bioroebe/shell/tk.rb +0 -23
- data/lib/bioroebe/shell/user_input.rb +0 -88
- data/lib/bioroebe/shell/xorg.rb +0 -45
- data/lib/bioroebe/utility_scripts/compacter.rb +0 -131
- /data/lib/bioroebe/java/bioroebe/{BisulfiteTreatment.java → src/BisulfiteTreatment.java} +0 -0
- /data/lib/bioroebe/java/bioroebe/{RemoveFile.class → src/RemoveFile.class} +0 -0
@@ -1,310 +0,0 @@
|
|
1
|
-
#!/usr/bin/ruby -w
|
2
|
-
# Encoding: UTF-8
|
3
|
-
# frozen_string_literal: true
|
4
|
-
# =========================================================================== #
|
5
|
-
# Enzyme-related components of the BioShell will be stored here.
|
6
|
-
# =========================================================================== #
|
7
|
-
# require 'bioroebe/shell/enzymes.rb'
|
8
|
-
# =========================================================================== #
|
9
|
-
module Bioroebe
|
10
|
-
|
11
|
-
class Shell < ::Bioroebe::CommandlineApplication
|
12
|
-
|
13
|
-
require 'bioroebe/enzymes/has_this_restriction_enzyme.rb'
|
14
|
-
require 'bioroebe/enzymes/restriction_enzymes_file.rb'
|
15
|
-
|
16
|
-
# ========================================================================= #
|
17
|
-
# === return_random_restriction_enzyme
|
18
|
-
#
|
19
|
-
# This method will return a random restriction enzyme, such as:
|
20
|
-
#
|
21
|
-
# ["EgeI", "GGCGCC 3"]
|
22
|
-
#
|
23
|
-
# ========================================================================= #
|
24
|
-
def return_random_restriction_enzyme(be_verbose = false)
|
25
|
-
splitted = ::Bioroebe.restriction_enzymes.sample
|
26
|
-
_ = splitted[1].split(' ')[0]
|
27
|
-
if be_verbose
|
28
|
-
erev 'Now adding restriction site `'+red(splitted[0])+
|
29
|
-
'` (cuts at '+simp(_)+').'
|
30
|
-
end
|
31
|
-
return _
|
32
|
-
end
|
33
|
-
|
34
|
-
# ========================================================================= #
|
35
|
-
# === find_restriction_enzymes_that_cut_at
|
36
|
-
#
|
37
|
-
# A wrapper over find_restriction_sites().
|
38
|
-
# ========================================================================= #
|
39
|
-
def find_restriction_enzymes_that_cut_at(i)
|
40
|
-
erev 'Trying to find restriction enzymes that '\
|
41
|
-
'cut at `'+sfancy(i)+rev+'`.'
|
42
|
-
result = find_restriction_sites(i)
|
43
|
-
unless result
|
44
|
-
erev 'Found no result for this sequence.'
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
# ========================================================================= #
|
49
|
-
# === show_restriction_enzymes
|
50
|
-
#
|
51
|
-
# Display the available restriction enzymes here.
|
52
|
-
#
|
53
|
-
# If we pass an argument, then we assume that we wish to show only
|
54
|
-
# these restriction enzymes that cut at n bp.
|
55
|
-
#
|
56
|
-
# Invocation example:
|
57
|
-
#
|
58
|
-
# show_restriction_enzymes(:show_all)
|
59
|
-
#
|
60
|
-
# ========================================================================= #
|
61
|
-
def show_restriction_enzymes(optional_input = nil)
|
62
|
-
case optional_input
|
63
|
-
when nil, :show_all # This means to show everything.
|
64
|
-
::Bioroebe.show_restriction_enzymes # Defined in module_methods.rb
|
65
|
-
else # Ok we gave input then.
|
66
|
-
_ = ::Bioroebe.restriction_enzymes
|
67
|
-
_.select! {|entry|
|
68
|
-
last = entry.last
|
69
|
-
last = last.split(' ').last
|
70
|
-
if last == optional_input
|
71
|
-
true
|
72
|
-
else
|
73
|
-
false
|
74
|
-
end
|
75
|
-
}
|
76
|
-
if _.empty?
|
77
|
-
erev 'We found no match for '+optional_input+'.'
|
78
|
-
else # else display the cutters.
|
79
|
-
erev 'These enzymes cut at `'+sfancy(optional_input)+rev+'` nucleotides.'
|
80
|
-
_.each {|entry|
|
81
|
-
entry[0] = entry[0].rjust(15)
|
82
|
-
entry[1] = entry[1].gsub(/ (.+)/, swarn(' \\1')+rev)
|
83
|
-
e " #{entry.join(' -> ')}"
|
84
|
-
}
|
85
|
-
erev 'These are '+simp(_.size.to_s)+rev+' restriction enzymes.'
|
86
|
-
end
|
87
|
-
end
|
88
|
-
end
|
89
|
-
|
90
|
-
# ========================================================================= #
|
91
|
-
# === try_to_find_this_restriction_enzyme
|
92
|
-
#
|
93
|
-
# Use this method to find a specific restriction enzyme.
|
94
|
-
#
|
95
|
-
# The restriction enzymes are stored in this yaml file here:
|
96
|
-
#
|
97
|
-
# bl $BIOROEBE/yaml/restriction_enzymes/restriction_enzymes.yml
|
98
|
-
#
|
99
|
-
# Usage example:
|
100
|
-
#
|
101
|
-
# MvnI?
|
102
|
-
#
|
103
|
-
# ========================================================================= #
|
104
|
-
def try_to_find_this_restriction_enzyme(i)
|
105
|
-
i = i.dup if i.frozen?
|
106
|
-
i.delete!('?') # We do not need any '?' characters.
|
107
|
-
original_input = i.dup
|
108
|
-
# i = i.downcase # No longer downcase since as of June 2018.
|
109
|
-
if i.include? 'restriction'
|
110
|
-
i.sub!(/restriction/,'')
|
111
|
-
end
|
112
|
-
if i.include? '.site' # Assume a syntax such as: Restriction.EcoRI.site
|
113
|
-
e ::Bioroebe.restriction_enzyme(i)
|
114
|
-
else # else it will be more verbose
|
115
|
-
i.delete!('.') if i.include? '.'
|
116
|
-
if i.end_with?('1') and !::Bioroebe.has_this_restriction_enzyme?(i) # Is invalid.
|
117
|
-
erev 'The input `'+simp(i)+rev+'` ends with the number 1. This '\
|
118
|
-
'is not possible, so'
|
119
|
-
erev 'we replace the trailing 1 with a capital I.'
|
120
|
-
i[-1,1] = 'i' # Ok not a capital one, because we store in a downcased variant.
|
121
|
-
original_input[-1,1] = 'I'
|
122
|
-
end
|
123
|
-
if ::Bioroebe.has_this_restriction_enzyme? i
|
124
|
-
target_sequence_data = ::Bioroebe.return_restriction_enzyme_sequence_and_cut_position(i)
|
125
|
-
# =================================================================== #
|
126
|
-
# Tap into the method Bioroebe.restriction_enzyme
|
127
|
-
# =================================================================== #
|
128
|
-
_ = ::Bioroebe.restriction_enzyme(i) # bl $BIOROEBE/module_methods.rb
|
129
|
-
erev "We have found a restriction enzyme called "\
|
130
|
-
"#{sfancy(original_input)}#{rev}."
|
131
|
-
e
|
132
|
-
e "#{rev}The sequence this #{mediumorchid(_.size.to_s+'-cutter')}#{rev}"\
|
133
|
-
" relates to is: `"\
|
134
|
-
"#{sfancy(five_prime+simp(_)+rev)}"\
|
135
|
-
"#{sfancy(three_trailer)}#{rev}`"
|
136
|
-
e
|
137
|
-
# =================================================================== #
|
138
|
-
# The variable target_sequence_data will look like this:
|
139
|
-
# ["GCCNNNNNGGC", "7", "7"]
|
140
|
-
# =================================================================== #
|
141
|
-
if target_sequence_data.last == :blunt
|
142
|
-
erev "This restriction enzyme will produce a "\
|
143
|
-
"#{seagreen('blunt')}#{rev} overhang."
|
144
|
-
e
|
145
|
-
else
|
146
|
-
erev "This restriction enzyme will produce a "\
|
147
|
-
"#{seagreen('sticky-end')}#{rev} overhang."
|
148
|
-
e
|
149
|
-
end
|
150
|
-
# =================================================================== #
|
151
|
-
# Next, show the exact cut that will be made.
|
152
|
-
# =================================================================== #
|
153
|
-
sequence = ::Bioroebe.return_sequence_that_is_cut_via_restriction_enzyme(i)
|
154
|
-
erev 'It will specifically cut between: '+
|
155
|
-
sfancy(five_prime)+rev+
|
156
|
-
simp(sequence)+
|
157
|
-
sfancy(three_trailer)+rev
|
158
|
-
# =================================================================== #
|
159
|
-
# And the complementary sequence follows next. The colour used
|
160
|
-
# is swarn().
|
161
|
-
# =================================================================== #
|
162
|
-
complementary_sequence = reverse(
|
163
|
-
Colours.remove_escape_sequences(sequence)
|
164
|
-
)
|
165
|
-
# =================================================================== #
|
166
|
-
# We must insert a | at the right position.
|
167
|
-
# =================================================================== #
|
168
|
-
target_sequence_data = target_sequence_data[1].to_i
|
169
|
-
complementary_sequence[-target_sequence_data,0] = swarn('|')+rev
|
170
|
-
erev ''.ljust(38)+sfancy(leading_three_prime)+rev+
|
171
|
-
complementary_sequence+rev+
|
172
|
-
sfancy(five_prime_trailer)+rev
|
173
|
-
else
|
174
|
-
erev 'We were unable to find a restriction enzyme called '\
|
175
|
-
'`'+sfancy(i)+'`'+rev
|
176
|
-
end
|
177
|
-
end
|
178
|
-
end
|
179
|
-
|
180
|
-
# ========================================================================= #
|
181
|
-
# === try_to_find_restriction_enzymes_for
|
182
|
-
#
|
183
|
-
# This method name is a slight misnomer; we can simply find any
|
184
|
-
# target sequence.
|
185
|
-
#
|
186
|
-
# The method can also handle some Symbols as input, such as the symbol
|
187
|
-
# :shine_dalgarno, which will be replaced accordingly to the SD
|
188
|
-
# sequence.
|
189
|
-
# ========================================================================= #
|
190
|
-
def try_to_find_restriction_enzymes_for(
|
191
|
-
i
|
192
|
-
)
|
193
|
-
# ======================================================================= #
|
194
|
-
# === We always have to work with an Array as input
|
195
|
-
# ======================================================================= #
|
196
|
-
unless i.is_a? Array
|
197
|
-
i = [i]
|
198
|
-
end
|
199
|
-
i.map! {|entry|
|
200
|
-
case entry # Use special sequences.
|
201
|
-
when :shine_dalgarno
|
202
|
-
entry = 'AGGAGGT'
|
203
|
-
end
|
204
|
-
# ===================================================================== #
|
205
|
-
# Past this point, we will assume a String as input. But we will have
|
206
|
-
# to make sure, still.
|
207
|
-
# ===================================================================== #
|
208
|
-
entry = entry.to_s unless entry.is_a? String
|
209
|
-
entry.delete!('-') if entry.include? '-'
|
210
|
-
entry
|
211
|
-
}
|
212
|
-
i.each {|entry|
|
213
|
-
report_main_sequence(entry) { :with_ruler }
|
214
|
-
possible_results = dna_string?.scan(/#{entry}/)
|
215
|
-
unless possible_results.empty?
|
216
|
-
e
|
217
|
-
erev "Start nucleotide position is at: "\
|
218
|
-
"#{simp((dna_string?.index(entry)+1))}#{rev}"
|
219
|
-
e
|
220
|
-
end
|
221
|
-
}
|
222
|
-
end; alias find_this_sequence try_to_find_restriction_enzymes_for # === find_this_sequence
|
223
|
-
alias find_in_main_sequence try_to_find_restriction_enzymes_for # === find_in_main_sequence
|
224
|
-
|
225
|
-
# ========================================================================= #
|
226
|
-
# === restriction_enzyme_digest
|
227
|
-
#
|
228
|
-
# This method allows us to simulate a restriction digest, on a
|
229
|
-
# DNA polymer.
|
230
|
-
#
|
231
|
-
# You can either give the matching DNA nucleotides or you can
|
232
|
-
# use the name of a restriction enzyme instead, such as 'EcoRI'.
|
233
|
-
#
|
234
|
-
# Usage examples:
|
235
|
-
#
|
236
|
-
# random 750; digest_at TTGC
|
237
|
-
# random 750; digest_at EcoRI
|
238
|
-
# random 2000; [33,0] = GAATTC; digest_at EcoRI
|
239
|
-
#
|
240
|
-
# ========================================================================= #
|
241
|
-
def restriction_enzyme_digest(
|
242
|
-
split_at = nil # Default value is nil.
|
243
|
-
)
|
244
|
-
_ = dna_sequence? # Keep a copy of the DNA sequence.
|
245
|
-
# ======================================================================= #
|
246
|
-
# === Grab the first entry if we have an Array
|
247
|
-
# ======================================================================= #
|
248
|
-
split_at = split_at.first if split_at.is_a? Array
|
249
|
-
split_at = 'TTG' if split_at.nil?
|
250
|
-
split_at = split_at.to_s # Work on Strings past this point here.
|
251
|
-
split_at.sub!(/^first_/,'') if split_at.include? 'first_ATG'
|
252
|
-
# ======================================================================= #
|
253
|
-
# === Chop off all '?' in the sequence
|
254
|
-
# ======================================================================= #
|
255
|
-
split_at.delete!('?') if split_at.include? '?'
|
256
|
-
# ======================================================================= #
|
257
|
-
# Next, allow the user to substitute for names of restriction enzymes.
|
258
|
-
# How do we determine that a restriction enzyme was given to this
|
259
|
-
# method? Simple - we first remove all instances of 'A','T','C','G'
|
260
|
-
# in our DNA sequence string. If the string is then still not empty,
|
261
|
-
# we will assume that it is the name of a restriction enzyme.
|
262
|
-
# ======================================================================= #
|
263
|
-
unless (_.delete('ATGC').size > 0)
|
264
|
-
erev 'Assumingly a restriction enzyme was given as input.'
|
265
|
-
target_sequence = ::Bioroebe.restriction_enzyme(split_at)
|
266
|
-
# Must check for nil values still
|
267
|
-
if target_sequence
|
268
|
-
erev "Substituting with `#{simp(target_sequence)}#{rev}` next (for #{split_at})."
|
269
|
-
split_at = target_sequence
|
270
|
-
else
|
271
|
-
erev 'No substitute could be found for `'+sfancy(split_at)+rev+'`.'
|
272
|
-
end
|
273
|
-
end
|
274
|
-
if _.include? split_at
|
275
|
-
splitted = _.split(split_at)
|
276
|
-
e
|
277
|
-
erev 'We will next display all '+simp(splitted.size.to_s)+rev+
|
278
|
-
' segments that were found (in orange is the part that '\
|
279
|
-
'is cut-out):'
|
280
|
-
e
|
281
|
-
splitted.each_with_index {|sequence, index|
|
282
|
-
index += 1
|
283
|
-
erev lpad?+lead_five_prime+sfancy(sequence)+rev+
|
284
|
-
trail_three_prime+' (size: '+
|
285
|
-
violet(sequence.size.to_s)+rev+')'
|
286
|
-
unless index > (splitted.size-1)
|
287
|
-
erev lpad?+lead_five_prime+orange(split_at)+rev+
|
288
|
-
trail_three_prime+rev
|
289
|
-
end
|
290
|
-
}
|
291
|
-
e
|
292
|
-
erev 'Note that this will NOT be the actual DNA fragments, '\
|
293
|
-
'because the restriction'
|
294
|
-
erev 'enzyme may cut differentially within that orange sequence.'
|
295
|
-
else # The target sequence was not included in this case.
|
296
|
-
erev "No target match for `"\
|
297
|
-
"#{simp(target_sequence)}#{rev}` was "\
|
298
|
-
"found in the given DNA sequence."
|
299
|
-
end
|
300
|
-
end; alias digest restriction_enzyme_digest # === digest
|
301
|
-
|
302
|
-
# ========================================================================= #
|
303
|
-
# === restriction_enzymes_run
|
304
|
-
# ========================================================================= #
|
305
|
-
def restriction_enzymes_run
|
306
|
-
require 'bioroebe/gui/gtk2/restriction_enzymes/restriction_enzymes.rb'
|
307
|
-
::Bioroebe::GUI::Gtk::RestrictionEnzymes.start_gui_application
|
308
|
-
end
|
309
|
-
|
310
|
-
end; end
|
data/lib/bioroebe/shell/fasta.rb
DELETED
@@ -1,345 +0,0 @@
|
|
1
|
-
#!/usr/bin/ruby -w
|
2
|
-
# Encoding: UTF-8
|
3
|
-
# frozen_string_literal: true
|
4
|
-
# =========================================================================== #
|
5
|
-
# require 'bioroebe/shell/fasta.rb'
|
6
|
-
# =========================================================================== #
|
7
|
-
module Bioroebe
|
8
|
-
|
9
|
-
class Shell < ::Bioroebe::CommandlineApplication
|
10
|
-
|
11
|
-
# ========================================================================= #
|
12
|
-
# === handle_fasta
|
13
|
-
#
|
14
|
-
# Use this method to properly handle a fasta file.
|
15
|
-
#
|
16
|
-
# The argument should be the (local) path to a fasta file.
|
17
|
-
# ========================================================================= #
|
18
|
-
def handle_fasta(i)
|
19
|
-
if i.nil?
|
20
|
-
if File.exist? fasta_file?.to_s
|
21
|
-
e sfile(fasta_file?.to_s)
|
22
|
-
else
|
23
|
-
show_my_fasta_file # As a reminder.
|
24
|
-
end
|
25
|
-
else
|
26
|
-
i = i.to_s unless i.is_a? String # Need a String past this point.
|
27
|
-
if File.exist?(i) and i.end_with?('.fasta')
|
28
|
-
opnn; erev 'Trying to parse the file `'+sfile(i)+rev+'` next.'
|
29
|
-
parse_fasta_format(i)
|
30
|
-
else
|
31
|
-
fasta_files = Dir['*.fasta']
|
32
|
-
unless fasta_files.empty?
|
33
|
-
erev 'There seems to be at least one .fasta file in this '\
|
34
|
-
'directory ('+sdir(return_pwd)+').'
|
35
|
-
end
|
36
|
-
end
|
37
|
-
end
|
38
|
-
end; alias assign_fasta handle_fasta # === assign_fasta
|
39
|
-
alias handle_this_fasta_file handle_fasta # === handle_this_fasta_file
|
40
|
-
|
41
|
-
require 'bioroebe/fasta_and_fastq/parse_fasta/misc.rb'
|
42
|
-
# ========================================================================= #
|
43
|
-
# === parse_fasta_format
|
44
|
-
#
|
45
|
-
# Parse FASTA format here. We will delegate into class
|
46
|
-
# Bioroebe::ParseFasta for that.
|
47
|
-
#
|
48
|
-
# Usage example:
|
49
|
-
#
|
50
|
-
# pfasta NM_001180897.3_Saccharomyces_cerevisiae_S288c_Aga2p_AGA2.fasta
|
51
|
-
#
|
52
|
-
# ========================================================================= #
|
53
|
-
def parse_fasta_format(
|
54
|
-
i = nil
|
55
|
-
)
|
56
|
-
if i.is_a? Array
|
57
|
-
i.each {|entry|
|
58
|
-
parse_fasta_format(entry)
|
59
|
-
}
|
60
|
-
else
|
61
|
-
# ===================================================================== #
|
62
|
-
# === If input is only numbers.
|
63
|
-
# ===================================================================== #
|
64
|
-
i = Dir['*'][i.to_i + 1] if i =~ /^\d+$/ # <- Only numbers.
|
65
|
-
case i
|
66
|
-
# ===================================================================== #
|
67
|
-
# === ASSIGN
|
68
|
-
#
|
69
|
-
# This entry point can be used by the user to input ad-hoc data
|
70
|
-
# for a FASTA sequence.
|
71
|
-
# ===================================================================== #
|
72
|
-
when /^ASSIGN$/i
|
73
|
-
opnn; erev 'Input your FASTA Data now (Use __ to terminate input):'
|
74
|
-
i = $stdin.gets('__').chomp
|
75
|
-
end
|
76
|
-
# ===================================================================== #
|
77
|
-
# If we did not provide an input, we scan for entries with .fa
|
78
|
-
# in the current directory.
|
79
|
-
# ===================================================================== #
|
80
|
-
if i.nil?
|
81
|
-
unless Dir['*.fa'].empty?
|
82
|
-
i = Dir['*.fa']
|
83
|
-
end
|
84
|
-
end
|
85
|
-
if i.is_a? Array
|
86
|
-
i = i.first
|
87
|
-
end
|
88
|
-
if i
|
89
|
-
erev "Now loading from `#{sfancy(i)}#{rev}`."
|
90
|
-
end
|
91
|
-
@internal_hash[:fasta_file] = i
|
92
|
-
parse_fasta_object = ::Bioroebe.parse_fasta(i) # bl $RSRC/bioroebe/lib/bioroebe/fasta/parse_fasta.rb
|
93
|
-
# ===================================================================== #
|
94
|
-
# === We will store all created fasta objects in an Array
|
95
|
-
# ===================================================================== #
|
96
|
-
array_fasta? << parse_fasta_object
|
97
|
-
this_sequence = parse_fasta_object.sequence?
|
98
|
-
# ===================================================================== #
|
99
|
-
# Handle large sequences next - we will add a timer. The purpose of
|
100
|
-
# this timer is to notify the user how long it took to assign to
|
101
|
-
# the main string. At a later point, we can optimize the speed and
|
102
|
-
# do the assignment in pure C rather than ruby.
|
103
|
-
# ===================================================================== #
|
104
|
-
if this_sequence.size > 1_000_000
|
105
|
-
add_timer_snapshot
|
106
|
-
erev 'The sequence is fairly large - we will time how long it takes to'
|
107
|
-
erev 'assign it to the main sequence.'
|
108
|
-
end
|
109
|
-
# ===================================================================== #
|
110
|
-
# Obtain the type next:
|
111
|
-
# ===================================================================== #
|
112
|
-
type = parse_fasta_object.type?
|
113
|
-
unless type == :protein
|
114
|
-
set_dna_sequence(this_sequence)
|
115
|
-
if this_sequence.size > 1_000_000
|
116
|
-
add_timer_snapshot
|
117
|
-
n_seconds_difference = calculate_time_difference.abs.to_f.round(3).to_s
|
118
|
-
erev "Loading these #{springgreen(this_sequence.size.to_s)}"\
|
119
|
-
"#{rev}"\
|
120
|
-
" nucleotides "\
|
121
|
-
"took #{sfancy(n_seconds_difference)}#{rev} seconds."
|
122
|
-
end
|
123
|
-
end
|
124
|
-
end
|
125
|
-
end; alias parse_this_fasta_file parse_fasta_format # === parse_this_fasta_file
|
126
|
-
|
127
|
-
require 'bioroebe/toplevel_methods/delimiter.rb'
|
128
|
-
# ========================================================================= #
|
129
|
-
# === obtain_multiline_fasta
|
130
|
-
#
|
131
|
-
# If we want to obtain multiline FASTA input, that is input that includes
|
132
|
-
# the "\n" newline character, then we can use the following method here.
|
133
|
-
#
|
134
|
-
# We will use $stdin to obtain the input. The end-delimiter will
|
135
|
-
# be ___
|
136
|
-
# ========================================================================= #
|
137
|
-
def obtain_multiline_fasta
|
138
|
-
delimiter = ::Bioroebe.delimiter?
|
139
|
-
erev 'Input your Fasta format or nucleotide sequence next - '\
|
140
|
-
'delimit/end via "'+lightgreen(delimiter)+rev+'" (3x the _ '\
|
141
|
-
'character).'
|
142
|
-
# ======================================================================= #
|
143
|
-
# Chop away all newlines.
|
144
|
-
# ======================================================================= #
|
145
|
-
dataset = $stdin.gets(delimiter)
|
146
|
-
# ======================================================================= #
|
147
|
-
# Format the dataset a little.
|
148
|
-
# ======================================================================= #
|
149
|
-
dataset.chomp!
|
150
|
-
dataset.delete!('_')
|
151
|
-
dataset.delete!(N)
|
152
|
-
dataset.strip!
|
153
|
-
parse_fasta_format(dataset)
|
154
|
-
# assign_sequence(dataset)
|
155
|
-
end
|
156
|
-
|
157
|
-
# ========================================================================= #
|
158
|
-
# === array_fasta?
|
159
|
-
# ========================================================================= #
|
160
|
-
def array_fasta?
|
161
|
-
@internal_hash[:array_fasta]
|
162
|
-
end
|
163
|
-
|
164
|
-
require 'bioroebe/toplevel_methods/fasta_and_fastq.rb'
|
165
|
-
# ========================================================================= #
|
166
|
-
# === index_this_fasta_file
|
167
|
-
#
|
168
|
-
# This will index FASTA files (.fa or .fasta) via the samtools.
|
169
|
-
# ========================================================================= #
|
170
|
-
def index_this_fasta_file(i)
|
171
|
-
# ======================================================================= #
|
172
|
-
# === Handle blocks first
|
173
|
-
# ======================================================================= #
|
174
|
-
if block_given?
|
175
|
-
yielded = yield
|
176
|
-
case yielded
|
177
|
-
when :use_all_fasta_files_if_no_argument_was_given
|
178
|
-
if i.nil? or i.empty?
|
179
|
-
i = Dir['*.fasta']+
|
180
|
-
Dir['*.fa'].flatten.compact
|
181
|
-
end
|
182
|
-
end
|
183
|
-
end
|
184
|
-
if i.is_a? Array
|
185
|
-
i.each {|entry| index_this_fasta_file(entry) }
|
186
|
-
else
|
187
|
-
i = i.to_s # Need to work on a String past this point.
|
188
|
-
if File.exist? i
|
189
|
-
erev "Indexing the following file next, via "\
|
190
|
-
"#{steelblue('samtools')}#{rev}:"
|
191
|
-
Bioroebe.index_this_fasta_file(i)
|
192
|
-
else
|
193
|
-
no_file_exists_at(i)
|
194
|
-
end
|
195
|
-
end
|
196
|
-
end
|
197
|
-
|
198
|
-
# ========================================================================= #
|
199
|
-
# === try_to_display_this_fasta_entry
|
200
|
-
# ========================================================================= #
|
201
|
-
def try_to_display_this_fasta_entry(i)
|
202
|
-
if i.is_a? String
|
203
|
-
i = i.to_i - 1 # -1 because Arrays in ruby begin at 0.
|
204
|
-
end
|
205
|
-
last_entry = array_fasta?.last
|
206
|
-
sequence_data = last_entry[i]
|
207
|
-
erev sequence_data
|
208
|
-
if block_given?
|
209
|
-
yielded = yield
|
210
|
-
case yielded
|
211
|
-
# ===================================================================== #
|
212
|
-
# === :and_assign_it_as_well
|
213
|
-
# ===================================================================== #
|
214
|
-
when :and_assign_it_as_well
|
215
|
-
assign(sequence_data) # In this case it will become the new main sequence data.
|
216
|
-
end
|
217
|
-
end
|
218
|
-
end
|
219
|
-
|
220
|
-
# ========================================================================= #
|
221
|
-
# === parse_this_fasta_sequence
|
222
|
-
# ========================================================================= #
|
223
|
-
def parse_this_fasta_sequence(i)
|
224
|
-
if i and File.file?(i)
|
225
|
-
set_aminoacid(File.read(i).delete("\n"))
|
226
|
-
end
|
227
|
-
end
|
228
|
-
|
229
|
-
# ========================================================================= #
|
230
|
-
# === fasta?
|
231
|
-
#
|
232
|
-
# We need a query method over the main fasta object, IF it was set.
|
233
|
-
#
|
234
|
-
# Since we already have an Array that keeps track of these objects,
|
235
|
-
# we can simply grab the last one from that collection.
|
236
|
-
# ========================================================================= #
|
237
|
-
def fasta?
|
238
|
-
array_fasta?.last
|
239
|
-
end; alias last_fasta? fasta? # === fasta?
|
240
|
-
alias last_fasta_entry? fasta? # === last_fasta_entry?
|
241
|
-
|
242
|
-
# ========================================================================= #
|
243
|
-
# === colourize_fasta_file
|
244
|
-
#
|
245
|
-
# Invocation example:
|
246
|
-
#
|
247
|
-
# colourize_fasta_file /Depot/Temp/bioroebe/sequence.fasta
|
248
|
-
#
|
249
|
-
# ========================================================================= #
|
250
|
-
def colourize_fasta_file(i)
|
251
|
-
if i.is_a? Array
|
252
|
-
i.each {|entry| colourize_fasta_file(entry) }
|
253
|
-
else
|
254
|
-
# ===================================================================== #
|
255
|
-
# First, get the raw content of the fasta sequence here.
|
256
|
-
# ===================================================================== #
|
257
|
-
if File.exist? i
|
258
|
-
sequence = ::Bioroebe.parse_fasta_file(i).sequence?
|
259
|
-
# =================================================================== #
|
260
|
-
# Now that we have the sequence, colourize it.
|
261
|
-
# =================================================================== #
|
262
|
-
cliner {
|
263
|
-
ColourSchemeDemo.new(sequence)
|
264
|
-
}
|
265
|
-
end
|
266
|
-
end
|
267
|
-
end
|
268
|
-
|
269
|
-
# ========================================================================= #
|
270
|
-
# === to_fasta
|
271
|
-
#
|
272
|
-
# Create a Fasta format from the target sequence.
|
273
|
-
# ========================================================================= #
|
274
|
-
def to_fasta(
|
275
|
-
i = dna_sequence?
|
276
|
-
)
|
277
|
-
array = i.scan(/.{,80}/).reject {|entry| entry.empty? }
|
278
|
-
name_of_the_gene = sequence_object?.name_of_gene?.to_s
|
279
|
-
if name_of_the_gene.empty?
|
280
|
-
name_of_the_gene << 'Drosophila melanogaster chromosome'
|
281
|
-
end
|
282
|
-
array[0,0] = '>gi|671162122:c7086083-7083225 '+name_of_the_gene
|
283
|
-
e array.join(N)
|
284
|
-
end
|
285
|
-
|
286
|
-
# ========================================================================= #
|
287
|
-
# === fasta_file?
|
288
|
-
# ========================================================================= #
|
289
|
-
def fasta_file?(i = :fasta_file)
|
290
|
-
if @internal_hash[:fasta_file].has_key?(i)
|
291
|
-
@internal_hash[:fasta_file].fetch(i)
|
292
|
-
else
|
293
|
-
erev 'We could not find the key called `'+simp(i.to_s)+rev+'`.'
|
294
|
-
end
|
295
|
-
end
|
296
|
-
|
297
|
-
# ========================================================================= #
|
298
|
-
# === create_fasta_file
|
299
|
-
# ========================================================================= #
|
300
|
-
def create_fasta_file
|
301
|
-
set_save_file :default_fasta
|
302
|
-
e 'Now creating a new fasta file. Will store into `'+sfile(@save_file)+'`.'
|
303
|
-
_ = '>gi|12345|pir|TVHGG| some unknown protein'+N
|
304
|
-
_ << string?
|
305
|
-
save_file(_, @internal_hash[:save_file])
|
306
|
-
end
|
307
|
-
|
308
|
-
# ========================================================================= #
|
309
|
-
# === return_fasta_files_in_the_log_directory
|
310
|
-
# ========================================================================= #
|
311
|
-
def return_fasta_files_in_the_log_directory
|
312
|
-
Dir[::Bioroebe.log_dir?+'*.fa*']
|
313
|
-
end
|
314
|
-
|
315
|
-
# ========================================================================= #
|
316
|
-
# === use_this_fasta_file
|
317
|
-
#
|
318
|
-
# Use a fasta file based on its position.
|
319
|
-
#
|
320
|
-
# For instance, fasta file at position 2 will be the second fasta file
|
321
|
-
# kept in the main log directory.
|
322
|
-
# ========================================================================= #
|
323
|
-
def use_this_fasta_file(at_position = 1)
|
324
|
-
# ======================================================================= #
|
325
|
-
# We need to map the given position to the existing (local) file at hand.
|
326
|
-
# ======================================================================= #
|
327
|
-
this_fasta_file = return_fasta_files_in_the_log_directory[at_position.to_i - 1]
|
328
|
-
if this_fasta_file
|
329
|
-
assign_fasta(this_fasta_file)
|
330
|
-
else
|
331
|
-
erev 'Could not find any file at position '+simp(at_position.to_s)+rev+'.'
|
332
|
-
erev 'Use "'+steelblue('show_fasta_files')+rev+
|
333
|
-
'" to see which fasta files are available.'
|
334
|
-
end
|
335
|
-
end
|
336
|
-
|
337
|
-
# ========================================================================= #
|
338
|
-
# === show_my_fasta_file
|
339
|
-
# ========================================================================= #
|
340
|
-
def show_my_fasta_file
|
341
|
-
e HOME_DIRECTORY_OF_USER_X+
|
342
|
-
'data/science/BIOINFORMATIK/DATA/FASTA/tardigrada_fasta.ffn'
|
343
|
-
end
|
344
|
-
|
345
|
-
end; end
|