bioroebe 0.10.80 → 0.12.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +3946 -2817
- data/bin/bioroebe +13 -2
- data/bin/bioroebe_hash +7 -0
- data/bin/codon_to_aminoacid +6 -4
- data/bin/compacter +7 -0
- data/bin/plain_palindrome +7 -0
- data/bioroebe.gemspec +3 -3
- data/doc/README.gen +3918 -2793
- data/doc/quality_control/commandline_applications.md +3 -3
- data/doc/statistics/statistics.md +7 -7
- data/doc/todo/bioroebe_GUI_todo.md +19 -14
- data/doc/todo/bioroebe_java_todo.md +22 -0
- data/doc/todo/bioroebe_todo.md +2075 -2620
- data/lib/bioroebe/C++/DNA.cpp +69 -0
- data/lib/bioroebe/C++/RNA.cpp +58 -0
- data/lib/bioroebe/C++/sequence.cpp +35 -0
- data/lib/bioroebe/abstract/README.md +1 -0
- data/lib/bioroebe/abstract/features.rb +29 -0
- data/lib/bioroebe/aminoacids/aminoacid_substitution.rb +1 -9
- data/lib/bioroebe/aminoacids/codon_percentage.rb +1 -9
- data/lib/bioroebe/aminoacids/deduce_aminoacid_sequence.rb +1 -9
- data/lib/bioroebe/aminoacids/display_aminoacid_table.rb +1 -0
- data/lib/bioroebe/aminoacids/show_hydrophobicity.rb +1 -6
- data/lib/bioroebe/base/base_module/base_module.rb +36 -0
- data/lib/bioroebe/base/colours_for_base/colours_for_base.rb +18 -8
- data/lib/bioroebe/base/commandline_application/commandline_application.rb +13 -9
- data/lib/bioroebe/base/commandline_application/commandline_arguments.rb +24 -19
- data/lib/bioroebe/base/commandline_application/misc.rb +66 -49
- data/lib/bioroebe/base/commandline_application/opn.rb +8 -8
- data/lib/bioroebe/base/commandline_application/reset.rb +5 -3
- data/lib/bioroebe/base/internal_hash_module/internal_hash_module.rb +42 -0
- data/lib/bioroebe/base/misc.rb +35 -0
- data/lib/bioroebe/base/prototype/misc.rb +15 -9
- data/lib/bioroebe/base/prototype/reset.rb +10 -0
- data/lib/bioroebe/cleave_and_digest/digestion.rb +10 -2
- data/lib/bioroebe/cleave_and_digest/trypsin.rb +104 -50
- data/lib/bioroebe/codon_tables/frequencies/parse_frequency_table.rb +2 -10
- data/lib/bioroebe/codons/codons.rb +1 -1
- data/lib/bioroebe/codons/convert_this_codon_to_that_aminoacid.rb +208 -59
- data/lib/bioroebe/codons/possible_codons_for_this_aminoacid.rb +1 -9
- data/lib/bioroebe/codons/show_codon_tables.rb +8 -3
- data/lib/bioroebe/codons/show_codon_usage.rb +15 -4
- data/lib/bioroebe/colours/rev.rb +4 -1
- data/lib/bioroebe/constants/aminoacids_and_proteins.rb +1 -0
- data/lib/bioroebe/constants/database_constants.rb +1 -1
- data/lib/bioroebe/constants/files_and_directories.rb +31 -4
- data/lib/bioroebe/constants/misc.rb +20 -0
- data/lib/bioroebe/constants/nucleotides.rb +7 -0
- data/lib/bioroebe/conversions/dna_to_aminoacid_sequence.rb +109 -39
- data/lib/bioroebe/count/count_amount_of_aminoacids.rb +3 -2
- data/lib/bioroebe/count/count_amount_of_nucleotides.rb +3 -0
- data/lib/bioroebe/cpp +1 -0
- data/lib/bioroebe/crystal/README.md +2 -0
- data/lib/bioroebe/crystal/to_rna.cr +19 -0
- data/lib/bioroebe/data/README.md +11 -8
- data/lib/bioroebe/data/electron_microscopy/pos_example.pos +396 -0
- data/lib/bioroebe/data/electron_microscopy/test_particles.star +36 -0
- data/lib/bioroebe/data/fasta/human/Homo_sapiens_hemoglobin_subunit_alpha_HBB_mRNA.fasta +9 -0
- data/lib/bioroebe/data/fasta/human/Homo_sapiens_hemoglobin_subunit_beta_HBB_mRNA.fasta +8 -0
- data/lib/bioroebe/data/fasta/human/README.md +2 -0
- data/lib/bioroebe/dotplots/advanced_dotplot.rb +1 -1
- data/lib/bioroebe/electron_microscopy/coordinate_analyzer.rb +15 -18
- data/lib/bioroebe/{fasta_and_fastq/parse_fasta/run.rb → electron_microscopy/electron_microscopy_module.rb} +16 -8
- data/lib/bioroebe/electron_microscopy/fix_pos_file.rb +1 -9
- data/lib/bioroebe/electron_microscopy/flipy.rb +83 -0
- data/lib/bioroebe/electron_microscopy/parse_coordinates.rb +2 -10
- data/lib/bioroebe/electron_microscopy/read_file_xmd.rb +1 -9
- data/lib/bioroebe/electron_microscopy/simple_star_file_generator.rb +4 -9
- data/lib/bioroebe/enzymes/has_this_restriction_enzyme.rb +10 -3
- data/lib/bioroebe/enzymes/restriction_enzyme.rb +23 -1
- data/lib/bioroebe/enzymes/restriction_enzymes/statistics.rb +65 -0
- data/lib/bioroebe/fasta_and_fastq/autocorrect_the_name_of_this_fasta_file.rb +1 -9
- data/lib/bioroebe/fasta_and_fastq/compact_fasta_file/compact_fasta_file.rb +7 -9
- data/lib/bioroebe/fasta_and_fastq/fasta_defline/fasta_defline.rb +1 -5
- data/lib/bioroebe/fasta_and_fastq/fasta_to_yaml/fasta_to_yaml.rb +81 -0
- data/lib/bioroebe/fasta_and_fastq/parse_fasta/parse_fasta.rb +1518 -7
- data/lib/bioroebe/fasta_and_fastq/return_fasta_subsection_of_this_file.rb +11 -2
- data/lib/bioroebe/fasta_and_fastq/show_fasta_headers.rb +27 -12
- data/lib/bioroebe/fasta_and_fastq/simplify_fasta_header/simplify_fasta_header.rb +1 -5
- data/lib/bioroebe/fasta_and_fastq/split_this_fasta_file_into_chromosomes/constants.rb +0 -5
- data/lib/bioroebe/genome/README.md +4 -0
- data/lib/bioroebe/genome/genome.rb +130 -0
- data/lib/bioroebe/genomes/genome_pattern.rb +3 -9
- data/lib/bioroebe/gui/gtk +1 -0
- data/lib/bioroebe/gui/gtk3/alignment/alignment.rb +106 -137
- data/lib/bioroebe/gui/gtk3/aminoacid_composition/aminoacid_composition.rb +27 -61
- data/lib/bioroebe/gui/gtk3/aminoacid_composition/customized_dialog.rb +1 -1
- data/lib/bioroebe/gui/gtk3/blosum_matrix_viewer/blosum_matrix_viewer.rb +1 -2
- data/lib/bioroebe/gui/gtk3/calculate_cell_numbers_of_bacteria/calculate_cell_numbers_of_bacteria.rb +1 -2
- data/lib/bioroebe/gui/gtk3/controller/controller.rb +46 -29
- data/lib/bioroebe/gui/gtk3/dna_to_aminoacid_widget/dna_to_aminoacid_widget.rb +77 -52
- data/lib/bioroebe/gui/gtk3/dna_to_reverse_complement_widget/dna_to_reverse_complement_widget.rb +1 -2
- data/lib/bioroebe/gui/gtk3/fasta_table_widget/fasta_table_widget.rb +100 -23
- data/lib/bioroebe/gui/gtk3/format_converter/format_converter.rb +1 -2
- data/lib/bioroebe/gui/gtk3/gene/gene.rb +1 -2
- data/lib/bioroebe/gui/gtk3/hamming_distance/hamming_distance.rb +43 -30
- data/lib/bioroebe/gui/gtk3/levensthein_distance/levensthein_distance.rb +1 -2
- data/lib/bioroebe/gui/gtk3/nucleotide_analyser/nucleotide_analyser.rb +120 -73
- data/lib/bioroebe/gui/gtk3/primer_design_widget/primer_design_widget.rb +1 -2
- data/lib/bioroebe/gui/gtk3/protein_to_DNA/protein_to_DNA.rb +19 -20
- data/lib/bioroebe/gui/gtk3/random_sequence/random_sequence.rb +20 -13
- data/lib/bioroebe/gui/gtk3/restriction_enzymes/restriction_enzymes.rb +1 -2
- data/lib/bioroebe/gui/gtk3/show_codon_table/misc.rb +97 -22
- data/lib/bioroebe/gui/gtk3/show_codon_table/show_codon_table.rb +3 -73
- data/lib/bioroebe/gui/gtk3/show_codon_usage/show_codon_usage.rb +1 -2
- data/lib/bioroebe/gui/gtk3/sizeseq/sizeseq.rb +1 -2
- data/lib/bioroebe/gui/gtk3/three_to_one/three_to_one.rb +1 -2
- data/lib/bioroebe/gui/gtk3/www_finder/www_finder.rb +1 -2
- data/lib/bioroebe/gui/javafx/bioroebe/Bioroebe.class +0 -0
- data/lib/bioroebe/gui/javafx/bioroebe/Bioroebe.java +104 -0
- data/lib/bioroebe/gui/javafx/bioroebe.jar +0 -0
- data/lib/bioroebe/gui/javafx/bioroebe.mf +1 -0
- data/lib/bioroebe/gui/javafx/module-info.class +0 -0
- data/lib/bioroebe/gui/javafx/module-info.java +5 -0
- data/lib/bioroebe/gui/jruby/alignment/alignment.rb +165 -0
- data/lib/bioroebe/gui/jruby/aminoacid_composition/aminoacid_composition.rb +166 -0
- data/lib/bioroebe/gui/libui/alignment/alignment.rb +3 -1
- data/lib/bioroebe/gui/libui/controller/controller.rb +116 -0
- data/lib/bioroebe/gui/libui/random_sequence/random_sequence.rb +18 -2
- data/lib/bioroebe/gui/libui/show_codon_table/show_codon_table.rb +2 -0
- data/lib/bioroebe/gui/libui/three_to_one/three_to_one.rb +8 -6
- data/lib/bioroebe/gui/shared_code/alignment/alignment_module.rb +102 -0
- data/lib/bioroebe/gui/shared_code/aminoacid_composition/aminoacid_composition_module.rb +94 -0
- data/lib/bioroebe/gui/shared_code/levensthein_distance/levensthein_distance_module.rb +18 -16
- data/lib/bioroebe/gui/shared_code/protein_to_DNA/protein_to_DNA_module.rb +14 -14
- data/lib/bioroebe/gui/swing/three_to_one/ThreeToOne$1.class +0 -0
- data/lib/bioroebe/gui/swing/three_to_one/ThreeToOne$CloseListener.class +0 -0
- data/lib/bioroebe/gui/swing/three_to_one/ThreeToOne.class +0 -0
- data/lib/bioroebe/gui/swing/three_to_one/ThreeToOne.java +141 -0
- data/lib/bioroebe/images/FORWARD_PRIMER.png +0 -0
- data/lib/bioroebe/images/REVERSE_PRIMER.png +0 -0
- data/lib/bioroebe/images/images.html +29845 -0
- data/lib/bioroebe/java/README.md +5 -0
- data/lib/bioroebe/java/bioroebe/AllInOne.java +1 -0
- data/lib/bioroebe/java/bioroebe/Base.class +0 -0
- data/lib/bioroebe/java/bioroebe/Base.java +39 -5
- data/lib/bioroebe/java/bioroebe/IsPalindrome.java +23 -5
- data/lib/bioroebe/java/bioroebe/SanitizeNucleotideSequence.java +0 -0
- data/lib/bioroebe/java/bioroebe/Sequence.java +28 -3
- data/lib/bioroebe/java/bioroebe/ToCamelcase.class +0 -0
- data/lib/bioroebe/java/bioroebe/ToCamelcase.java +16 -4
- data/lib/bioroebe/java/bioroebe/ToRNA.java +43 -0
- data/lib/bioroebe/java/bioroebe/ToplevelMethods.java +6 -0
- data/lib/bioroebe/java/bioroebe/{BisulfiteTreatment.class → src/BisulfiteTreatment.class} +0 -0
- data/lib/bioroebe/java/bioroebe/{Codons.class → src/Codons.class} +0 -0
- data/lib/bioroebe/java/bioroebe/src/Codons.java +35 -0
- data/lib/bioroebe/java/bioroebe/src/Commandline.class +0 -0
- data/lib/bioroebe/java/bioroebe/src/Commandline.java +101 -0
- data/lib/bioroebe/java/bioroebe/{Esystem.class → src/Esystem.class} +0 -0
- data/lib/bioroebe/java/bioroebe/{Esystem.java → src/Esystem.java} +6 -1
- data/lib/bioroebe/java/bioroebe/{GenerateRandomDnaSequence.class → src/GenerateRandomDnaSequence.class} +0 -0
- data/lib/bioroebe/java/bioroebe/{GenerateRandomDnaSequence.java → src/GenerateRandomDnaSequence.java} +8 -2
- data/lib/bioroebe/java/bioroebe/src/PartnerNucleotide.class +0 -0
- data/lib/bioroebe/java/bioroebe/src/PartnerNucleotide.java +56 -0
- data/lib/bioroebe/java/bioroebe/{RemoveFile.java → src/RemoveFile.java} +10 -4
- data/lib/bioroebe/java/bioroebe/{RemoveNumbers.class → src/RemoveNumbers.class} +0 -0
- data/lib/bioroebe/java/bioroebe/{RemoveNumbers.java → src/RemoveNumbers.java} +1 -0
- data/lib/bioroebe/java/bioroebe/src/toplevel_methods/BaseComposition.class +0 -0
- data/lib/bioroebe/java/bioroebe/src/toplevel_methods/BaseComposition.java +75 -0
- data/lib/bioroebe/misc/ruler.rb +11 -2
- data/lib/bioroebe/nucleotides/most_likely_nucleotide_sequence_for_this_aminoacid_sequence.rb +1 -9
- data/lib/bioroebe/nucleotides/sanitize_nucleotide_sequence.rb +59 -18
- data/lib/bioroebe/nucleotides/show_nucleotide_sequence.rb +7 -7
- data/lib/bioroebe/parsers/genbank_parser.rb +347 -26
- data/lib/bioroebe/parsers/gff.rb +1 -9
- data/lib/bioroebe/patterns/scan_for_repeat.rb +1 -5
- data/lib/bioroebe/pdb/fetch_fasta_sequence_from_pdb.rb +1 -9
- data/lib/bioroebe/pdb/parse_mmCIF_file.rb +1 -9
- data/lib/bioroebe/pdb/parse_pdb_file.rb +4 -10
- data/lib/bioroebe/project/project.rb +1 -1
- data/lib/bioroebe/python/README.md +1 -0
- data/lib/bioroebe/python/__pycache__/mymodule.cpython-39.pyc +0 -0
- data/lib/bioroebe/python/gui/gtk3/all_in_one.css +4 -0
- data/lib/bioroebe/python/gui/gtk3/all_in_one.py +59 -0
- data/lib/bioroebe/python/gui/gtk3/widget1.py +20 -0
- data/lib/bioroebe/python/gui/tkinter/all_in_one.py +91 -0
- data/lib/bioroebe/python/mymodule.py +8 -0
- data/lib/bioroebe/python/protein_to_dna.py +33 -0
- data/lib/bioroebe/python/shell/shell.py +19 -0
- data/lib/bioroebe/python/to_rna.py +14 -0
- data/lib/bioroebe/python/toplevel_methods/convert_dna_to_aminoacid_sequence.py +137 -0
- data/lib/bioroebe/python/toplevel_methods/esystem.py +12 -0
- data/lib/bioroebe/python/toplevel_methods/open_in_browser.py +20 -0
- data/lib/bioroebe/python/toplevel_methods/palindromes.py +52 -0
- data/lib/bioroebe/python/toplevel_methods/rds.py +13 -0
- data/lib/bioroebe/python/toplevel_methods/shuffleseq.py +23 -0
- data/lib/bioroebe/python/toplevel_methods/three_delimiter.py +37 -0
- data/lib/bioroebe/python/toplevel_methods/time_and_date.py +43 -0
- data/lib/bioroebe/python/toplevel_methods/to_camelcase.py +21 -0
- data/lib/bioroebe/requires/require_cleave_and_digest.rb +3 -1
- data/lib/bioroebe/requires/require_the_bioroebe_project.rb +3 -1
- data/lib/bioroebe/sequence/alignment.rb +14 -4
- data/lib/bioroebe/sequence/dna.rb +1 -0
- data/lib/bioroebe/sequence/nucleotide_module/nucleotide_module.rb +28 -25
- data/lib/bioroebe/sequence/protein.rb +105 -3
- data/lib/bioroebe/sequence/rna.rb +220 -0
- data/lib/bioroebe/sequence/sequence.rb +128 -40
- data/lib/bioroebe/shell/menu.rb +3815 -3696
- data/lib/bioroebe/shell/misc.rb +9019 -3133
- data/lib/bioroebe/shell/readline/readline.rb +1 -1
- data/lib/bioroebe/shell/shell.rb +1137 -28
- data/lib/bioroebe/siRNA/siRNA.rb +81 -1
- data/lib/bioroebe/string_matching/find_longest_substring.rb +3 -2
- data/lib/bioroebe/string_matching/hamming_distance.rb +1 -9
- data/lib/bioroebe/taxonomy/class_methods.rb +3 -8
- data/lib/bioroebe/taxonomy/constants.rb +4 -3
- data/lib/bioroebe/taxonomy/edit.rb +2 -1
- data/lib/bioroebe/taxonomy/help/help.rb +10 -10
- data/lib/bioroebe/taxonomy/help/helpline.rb +2 -2
- data/lib/bioroebe/taxonomy/info/check_available.rb +15 -9
- data/lib/bioroebe/taxonomy/info/info.rb +18 -11
- data/lib/bioroebe/taxonomy/info/is_dna.rb +46 -36
- data/lib/bioroebe/taxonomy/interactive.rb +140 -104
- data/lib/bioroebe/taxonomy/menu.rb +27 -18
- data/lib/bioroebe/taxonomy/parse_fasta.rb +3 -1
- data/lib/bioroebe/taxonomy/shared.rb +1 -0
- data/lib/bioroebe/taxonomy/taxonomy.rb +1 -0
- data/lib/bioroebe/toplevel_methods/aminoacids_and_proteins.rb +31 -24
- data/lib/bioroebe/toplevel_methods/colourize_related_methods.rb +164 -0
- data/lib/bioroebe/toplevel_methods/databases.rb +1 -1
- data/lib/bioroebe/toplevel_methods/digest.rb +18 -8
- data/lib/bioroebe/toplevel_methods/fasta_and_fastq.rb +107 -63
- data/lib/bioroebe/toplevel_methods/file_and_directory_related_actions.rb +14 -2
- data/lib/bioroebe/toplevel_methods/frequencies.rb +8 -1
- data/lib/bioroebe/toplevel_methods/misc.rb +175 -11
- data/lib/bioroebe/toplevel_methods/nucleotides.rb +118 -46
- data/lib/bioroebe/toplevel_methods/open_in_browser.rb +2 -0
- data/lib/bioroebe/toplevel_methods/palindromes.rb +75 -47
- data/lib/bioroebe/toplevel_methods/taxonomy.rb +3 -3
- data/lib/bioroebe/toplevel_methods/to_camelcase.rb +5 -0
- data/lib/bioroebe/utility_scripts/align_open_reading_frames.rb +1 -9
- data/lib/bioroebe/utility_scripts/check_for_mismatches/check_for_mismatches.rb +1 -9
- data/lib/bioroebe/utility_scripts/compacter/compacter.rb +251 -0
- data/lib/bioroebe/utility_scripts/compseq/compseq.rb +1 -9
- data/lib/bioroebe/utility_scripts/consensus_sequence.rb +6 -6
- data/lib/bioroebe/utility_scripts/create_batch_entrez_file.rb +1 -9
- data/lib/bioroebe/utility_scripts/dot_alignment.rb +1 -9
- data/lib/bioroebe/utility_scripts/move_file_to_its_correct_location.rb +1 -4
- data/lib/bioroebe/utility_scripts/parse_taxonomy.rb +2 -2
- data/lib/bioroebe/utility_scripts/permutations.rb +36 -9
- data/lib/bioroebe/utility_scripts/showorf/constants.rb +0 -5
- data/lib/bioroebe/utility_scripts/showorf/reset.rb +1 -4
- data/lib/bioroebe/version/version.rb +2 -2
- data/lib/bioroebe/www/embeddable_interface.rb +121 -58
- data/lib/bioroebe/www/sinatra/sinatra.rb +186 -71
- data/lib/bioroebe/yaml/aminoacids/amino_acids_long_name_to_one_letter.yml +2 -2
- data/lib/bioroebe/yaml/aminoacids/weight_of_common_proteins.yml +17 -17
- data/lib/bioroebe/yaml/configuration/browser.yml +1 -1
- data/lib/bioroebe/yaml/configuration/temp_dir.yml +1 -1
- data/lib/bioroebe/yaml/consensus_sequences/consensus_sequences.yml +1 -0
- data/lib/bioroebe/yaml/genomes/README.md +3 -4
- data/lib/bioroebe/yaml/nucleotides/nucleotides.yml +5 -0
- data/lib/bioroebe/yaml/restriction_enzymes/restriction_enzymes.yml +57 -57
- data/spec/README.md +6 -0
- data/spec/project_wide_specification/classes.md +5 -0
- metadata +107 -70
- data/doc/setup.rb +0 -1655
- data/lib/bioroebe/fasta_and_fastq/parse_fasta/constants.rb +0 -50
- data/lib/bioroebe/fasta_and_fastq/parse_fasta/initialize.rb +0 -86
- data/lib/bioroebe/fasta_and_fastq/parse_fasta/menu.rb +0 -117
- data/lib/bioroebe/fasta_and_fastq/parse_fasta/misc.rb +0 -981
- data/lib/bioroebe/fasta_and_fastq/parse_fasta/report.rb +0 -156
- data/lib/bioroebe/fasta_and_fastq/parse_fasta/reset.rb +0 -128
- data/lib/bioroebe/genbank/genbank_parser.rb +0 -291
- data/lib/bioroebe/java/bioroebe/AllInOne.class +0 -0
- data/lib/bioroebe/java/bioroebe/Cat.class +0 -0
- data/lib/bioroebe/java/bioroebe/Codons.java +0 -22
- data/lib/bioroebe/java/bioroebe/IsPalindrome.class +0 -0
- data/lib/bioroebe/java/bioroebe/PartnerNucleotide.class +0 -0
- data/lib/bioroebe/java/bioroebe/PartnerNucleotide.java +0 -19
- data/lib/bioroebe/java/bioroebe/SanitizeNucleotideSequence.class +0 -0
- data/lib/bioroebe/java/bioroebe/ToplevelMethods.class +0 -0
- data/lib/bioroebe/java/bioroebe.jar +0 -0
- data/lib/bioroebe/shell/add.rb +0 -108
- data/lib/bioroebe/shell/assign.rb +0 -360
- data/lib/bioroebe/shell/chop_and_cut.rb +0 -281
- data/lib/bioroebe/shell/constants.rb +0 -166
- data/lib/bioroebe/shell/download.rb +0 -335
- data/lib/bioroebe/shell/enable_and_disable.rb +0 -158
- data/lib/bioroebe/shell/enzymes.rb +0 -310
- data/lib/bioroebe/shell/fasta.rb +0 -345
- data/lib/bioroebe/shell/gtk.rb +0 -76
- data/lib/bioroebe/shell/history.rb +0 -132
- data/lib/bioroebe/shell/initialize.rb +0 -217
- data/lib/bioroebe/shell/loop.rb +0 -74
- data/lib/bioroebe/shell/prompt.rb +0 -107
- data/lib/bioroebe/shell/random.rb +0 -289
- data/lib/bioroebe/shell/reset.rb +0 -335
- data/lib/bioroebe/shell/scan_and_parse.rb +0 -135
- data/lib/bioroebe/shell/search.rb +0 -337
- data/lib/bioroebe/shell/sequences.rb +0 -200
- data/lib/bioroebe/shell/show_report_and_display.rb +0 -2901
- data/lib/bioroebe/shell/startup.rb +0 -127
- data/lib/bioroebe/shell/taxonomy.rb +0 -14
- data/lib/bioroebe/shell/tk.rb +0 -23
- data/lib/bioroebe/shell/user_input.rb +0 -88
- data/lib/bioroebe/shell/xorg.rb +0 -45
- data/lib/bioroebe/utility_scripts/compacter.rb +0 -131
- /data/lib/bioroebe/java/bioroebe/{BisulfiteTreatment.java → src/BisulfiteTreatment.java} +0 -0
- /data/lib/bioroebe/java/bioroebe/{RemoveFile.class → src/RemoveFile.class} +0 -0
data/lib/bioroebe/misc/ruler.rb
CHANGED
@@ -24,6 +24,8 @@
|
|
24
24
|
# variant - that one should work fine.
|
25
25
|
# =========================================================================== #
|
26
26
|
# require 'bioroebe/misc/ruler.rb'
|
27
|
+
# puts Bioroebe.return_ruler('ATGCTGACAGGGGGGGEEEEEE')
|
28
|
+
# Bioroebe.ruler_return_as_string_without_colours 'ATGCTGACAGGGGGGGEEEEEE'
|
27
29
|
# Bioroebe::Ruler.new(ARGV)
|
28
30
|
# =========================================================================== #
|
29
31
|
require 'bioroebe/base/commandline_application/commandline_application.rb'
|
@@ -54,8 +56,14 @@ class Ruler < ::Bioroebe::CommandlineApplication # === Bioroebe::Ruler
|
|
54
56
|
set_group_together_n_nucleotides(
|
55
57
|
group_together_n_nucleotides
|
56
58
|
)
|
59
|
+
# ======================================================================= #
|
60
|
+
# === Handle blocks next
|
61
|
+
# ======================================================================= #
|
57
62
|
if block_given?
|
58
63
|
yielded = yield
|
64
|
+
# ===================================================================== #
|
65
|
+
# === Handle Hashes next
|
66
|
+
# ===================================================================== #
|
59
67
|
if yielded.is_a? Hash
|
60
68
|
# =================================================================== #
|
61
69
|
# === :ruler
|
@@ -206,14 +214,15 @@ end
|
|
206
214
|
# This will return the ruler as a String; it exists mostly for
|
207
215
|
# convenience reasons.
|
208
216
|
#
|
209
|
-
# Invocation
|
217
|
+
# Invocation examples:
|
210
218
|
#
|
211
219
|
# puts Bioroebe.ruler_return_as_string 'ATGCTGACAGGGGGGGEEEEEE'
|
220
|
+
# puts Bioroebe.return_ruler('ATGCTGACAGGGGGGGEEEEEE')
|
212
221
|
#
|
213
222
|
# =========================================================================== #
|
214
223
|
def self.ruler_return_as_string(i, group_together_n_nucleotides = 70)
|
215
224
|
::Bioroebe::Ruler.new(i, group_together_n_nucleotides).result_as_string
|
216
|
-
end
|
225
|
+
end; self.instance_eval { alias return_ruler ruler_return_as_string } # === Bioroebe.return_ruler
|
217
226
|
|
218
227
|
# =========================================================================== #
|
219
228
|
# === Bioroebe.ruler_return_as_string_without_colours
|
data/lib/bioroebe/nucleotides/most_likely_nucleotide_sequence_for_this_aminoacid_sequence.rb
CHANGED
@@ -27,11 +27,6 @@ class MostLikelyNucleotideSequenceForThisAminoacidSequence < ::Bioroebe::Command
|
|
27
27
|
require 'bioroebe/aminoacids/codon_percentage.rb'
|
28
28
|
require 'bioroebe/codons/possible_codons_for_this_aminoacid.rb'
|
29
29
|
|
30
|
-
# ========================================================================= #
|
31
|
-
# === NAMESPACE
|
32
|
-
# ========================================================================= #
|
33
|
-
NAMESPACE = inspect
|
34
|
-
|
35
30
|
# ========================================================================= #
|
36
31
|
# === initialize
|
37
32
|
# ========================================================================= #
|
@@ -63,10 +58,7 @@ class MostLikelyNucleotideSequenceForThisAminoacidSequence < ::Bioroebe::Command
|
|
63
58
|
# ========================================================================= #
|
64
59
|
def reset
|
65
60
|
super()
|
66
|
-
|
67
|
-
# === @namespace
|
68
|
-
# ======================================================================= #
|
69
|
-
@namespace = NAMESPACE
|
61
|
+
infer_the_namespace
|
70
62
|
# ======================================================================= #
|
71
63
|
# === @internal_hash
|
72
64
|
# ======================================================================= #
|
@@ -33,10 +33,25 @@ class SanitizeNucleotideSequence < Base # === Bioroebe::SanitizeNucleotideSequen
|
|
33
33
|
# ========================================================================= #
|
34
34
|
def initialize(
|
35
35
|
i = nil,
|
36
|
-
run_already = true
|
36
|
+
run_already = true,
|
37
|
+
&block
|
37
38
|
)
|
38
39
|
reset
|
39
40
|
set_input(i)
|
41
|
+
# ======================================================================= #
|
42
|
+
# === Handle blocks next
|
43
|
+
# ======================================================================= #
|
44
|
+
if block_given?
|
45
|
+
yielded = yield
|
46
|
+
case yielded
|
47
|
+
# ===================================================================== #
|
48
|
+
# === :do_not_remove_newlines
|
49
|
+
# ===================================================================== #
|
50
|
+
when /do(_|-)?not(_|-)?remove(_|-)?newlines$/,
|
51
|
+
:do_not_remove_newlines
|
52
|
+
@shall_we_remove_newlines = false
|
53
|
+
end
|
54
|
+
end
|
40
55
|
run if run_already
|
41
56
|
end
|
42
57
|
|
@@ -45,6 +60,19 @@ class SanitizeNucleotideSequence < Base # === Bioroebe::SanitizeNucleotideSequen
|
|
45
60
|
# ========================================================================= #
|
46
61
|
def reset
|
47
62
|
super()
|
63
|
+
# ======================================================================= #
|
64
|
+
# === @shall_we_remove_newlines
|
65
|
+
#
|
66
|
+
# By default newlines will be removed.
|
67
|
+
# ======================================================================= #
|
68
|
+
@shall_we_remove_newlines = true
|
69
|
+
end
|
70
|
+
|
71
|
+
# ========================================================================= #
|
72
|
+
# === shall_we_remove_newlines?
|
73
|
+
# ========================================================================= #
|
74
|
+
def shall_we_remove_newlines?
|
75
|
+
@shall_we_remove_newlines
|
48
76
|
end
|
49
77
|
|
50
78
|
# ========================================================================= #
|
@@ -57,30 +85,37 @@ class SanitizeNucleotideSequence < Base # === Bioroebe::SanitizeNucleotideSequen
|
|
57
85
|
if i.first.start_with?('>') and i.first.include?(N) # Assume gi number, which we will chop off.
|
58
86
|
i[0] = i[0][i[0].index(N) .. -1] # Get all from the start, to the first newline.
|
59
87
|
end
|
60
|
-
i = i.join(' ').strip
|
88
|
+
# i = i.join(' ') # .strip
|
61
89
|
end
|
62
|
-
i = i.to_s.dup
|
90
|
+
# i = i.to_s.dup
|
91
|
+
i = [i] unless i.is_a?(Array)
|
63
92
|
@input = i
|
64
|
-
sanitize_input
|
65
93
|
end
|
66
94
|
|
67
95
|
# ========================================================================= #
|
68
|
-
# ===
|
96
|
+
# === sanitize_the_input
|
69
97
|
# ========================================================================= #
|
70
|
-
def
|
71
|
-
@input.
|
72
|
-
|
98
|
+
def sanitize_the_input
|
99
|
+
@input.reject! {|entry| entry.empty? }
|
100
|
+
if shall_we_remove_newlines?
|
101
|
+
@input.map! {|entry|
|
102
|
+
entry = entry.delete("\n")
|
103
|
+
entry = entry.delete("\\\\n")
|
104
|
+
}
|
105
|
+
end
|
73
106
|
# ======================================================================= #
|
74
107
|
# Next remove all numbers.
|
75
108
|
# ======================================================================= #
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
109
|
+
@input.map! {|entry|
|
110
|
+
chars = entry.chars
|
111
|
+
chars.reject! {|inner_entry| inner_entry =~ /\d+/ } # Reject numbers.
|
112
|
+
if entry.include? '/'
|
113
|
+
entry.delete('/')
|
114
|
+
end
|
115
|
+
entry
|
116
|
+
}
|
117
|
+
@input = @input.join
|
118
|
+
end; alias sanitize_input sanitize_the_input # === sanitize_input
|
84
119
|
|
85
120
|
# ========================================================================= #
|
86
121
|
# === input?
|
@@ -102,13 +137,19 @@ class SanitizeNucleotideSequence < Base # === Bioroebe::SanitizeNucleotideSequen
|
|
102
137
|
# === run (run tag)
|
103
138
|
# ========================================================================= #
|
104
139
|
def run
|
140
|
+
sanitize_the_input
|
105
141
|
end
|
106
142
|
|
107
143
|
# ========================================================================= #
|
108
144
|
# === Bioroebe::SanitizeNucleotideSequence[]
|
145
|
+
#
|
146
|
+
# This method will yield a consecutive nucleotide String by default.
|
109
147
|
# ========================================================================= #
|
110
|
-
def self.[](
|
111
|
-
|
148
|
+
def self.[](
|
149
|
+
i = "1 ATCCG\n30 TTA",
|
150
|
+
&block
|
151
|
+
)
|
152
|
+
new(i, &block).result?
|
112
153
|
end
|
113
154
|
|
114
155
|
end
|
@@ -475,13 +475,6 @@ class ShowNucleotideSequence < ::Bioroebe::Sequence # === Bioroebe::ShowNucleoti
|
|
475
475
|
erev i
|
476
476
|
end; alias display report # === display (display tag)
|
477
477
|
|
478
|
-
# ========================================================================= #
|
479
|
-
# === do_colourize_the_start_codon
|
480
|
-
# ========================================================================= #
|
481
|
-
def do_colourize_the_start_codon
|
482
|
-
add_this_substring('ATG')
|
483
|
-
end
|
484
|
-
|
485
478
|
# ========================================================================= #
|
486
479
|
# === colourize_dna_sequence
|
487
480
|
# ========================================================================= #
|
@@ -529,6 +522,13 @@ class ShowNucleotideSequence < ::Bioroebe::Sequence # === Bioroebe::ShowNucleoti
|
|
529
522
|
end; alias set_search_for search_for_this_substring # === set_search_for
|
530
523
|
alias add_this_substring search_for_this_substring # === add_this_substring
|
531
524
|
|
525
|
+
# ========================================================================= #
|
526
|
+
# === do_colourize_the_start_codon
|
527
|
+
# ========================================================================= #
|
528
|
+
def do_colourize_the_start_codon
|
529
|
+
add_this_substring('ATG')
|
530
|
+
end
|
531
|
+
|
532
532
|
# ========================================================================= #
|
533
533
|
# === run
|
534
534
|
# ========================================================================= #
|
@@ -4,10 +4,29 @@
|
|
4
4
|
# =========================================================================== #
|
5
5
|
# === Bioroebe::GenbankParser
|
6
6
|
#
|
7
|
-
# This class can be used to parse genbank-files
|
8
|
-
#
|
7
|
+
# This class can be used to parse genbank-files (typically stored as .genbank
|
8
|
+
# or .gbk, so their file extension is usually ".gbk").
|
9
|
+
#
|
10
|
+
# Since as of the rewrite in July 2022 the class can also handle multiple
|
11
|
+
# fasta entries now.
|
12
|
+
#
|
13
|
+
# The class is similar to class FastaParser, but instead it will only
|
14
|
+
# select the content between "ORIGIN" and "VERSION" entries.
|
15
|
+
#
|
16
|
+
# The user can pass the content of a genbank-file to this class, and it
|
17
|
+
# can then report the nucleotide sequence, e. g. the part starting after
|
18
|
+
# the ORIGIN string.
|
19
|
+
#
|
20
|
+
# The reason why this class has been created was because it is sometimes
|
21
|
+
# necessary to parse a genebank file.
|
22
|
+
#
|
23
|
+
# Usage example:
|
24
|
+
#
|
25
|
+
# Bioroebe::GenbankParser.new(ARGV)
|
26
|
+
#
|
9
27
|
# =========================================================================== #
|
10
|
-
# require 'bioroebe/
|
28
|
+
# require 'bioroebe/genbank/genbank_parser.rb'
|
29
|
+
# genbank_parser = Bioroebe::GenbankParser.new(ARGV)
|
11
30
|
# =========================================================================== #
|
12
31
|
require 'bioroebe/base/commandline_application/commandline_application.rb'
|
13
32
|
|
@@ -16,63 +35,365 @@ module Bioroebe
|
|
16
35
|
class GenbankParser < ::Bioroebe::CommandlineApplication # === Bioroebe::GenbankParser
|
17
36
|
|
18
37
|
# ========================================================================= #
|
19
|
-
# ===
|
38
|
+
# === UPCASE_THE_SEQUENCE
|
39
|
+
#
|
40
|
+
# Setting this constant to true will cause this class to store the
|
41
|
+
# FASTA sequence in an upcased variant, e. g. "AGCAGCTA" rather
|
42
|
+
# than "acgatcag".
|
20
43
|
# ========================================================================= #
|
21
|
-
|
44
|
+
UPCASE_THE_SEQUENCE = true
|
45
|
+
|
46
|
+
# ========================================================================= #
|
47
|
+
# === TEST_STRING
|
48
|
+
#
|
49
|
+
# Our example test-string, to see how such a genbank file usually looks
|
50
|
+
# like.
|
51
|
+
#
|
52
|
+
# This will contain two different FASTA sequences.
|
53
|
+
# ========================================================================= #
|
54
|
+
TEST_STRING = ' /note="internal transcribed spacer 2"
|
55
|
+
ORIGIN
|
56
|
+
1 cgtaacaagg tttccgtagg tgaaccttcg gaaggatcat tgttgagacc cccaaaaaaa
|
57
|
+
61 cgatcgagtt aatccggagg accggtgtag tttggtctcc caggggcttt ggctactgtg
|
58
|
+
121 gtggccgtga atttccgtcg aacctccttg ggagaattct tgatggcaat tgaacccttg
|
59
|
+
181 gcccggcgca gtttcgcccc aagtcaaatg agatggaacc ggcggagggc atcgtcctcc
|
60
|
+
241 atggaaccgg ggagggccgg cgttcttccg ttccccccat gaattttttt ttgacaactc
|
61
|
+
301 tcggcaacgg atatctcggc tctttgcatc cgatgaaaga acccagcgaa atgtgataag
|
62
|
+
361 tggtgtgaat tgcagaatcc cgtgaaccat cgagtctttg aacgcaagtt gcgcccgagg
|
63
|
+
421 ccatcaggct aagggcacgc ctgcctgggc gttgcgtgct gcatctctct cccattgcta
|
64
|
+
481 aggctgaaca ggcatactgt tcggccggcg cggatgagtg tttggcccct tgttcttcgg
|
65
|
+
541 tgcgatgggt ccaagacctg ggcttttgac ggccggaaat ccggcaagag gtggacggac
|
66
|
+
601 ggtggctgcg acgaagctgt cgtgcgaatg ccctacgctg tcgtatttga tgggccggaa
|
67
|
+
661 taaatccctt ttgagcccca ttggaggcac gtcaacccgt gggcggtcga cggccatttg
|
68
|
+
721 gatgcaaccc caggtcaggt gagga
|
69
|
+
//
|
70
|
+
LOCUS Z78510 750 bp DNA linear PLN 30-NOV-2006
|
71
|
+
DEFINITION P.caricinum 5.8S rRNA gene and ITS1 and ITS2 DNA.
|
72
|
+
ACCESSION Z78510
|
73
|
+
VERSION Z78510.1 GI:2765635
|
74
|
+
KEYWORDS 5.8S ribosomal RNA; 5.8S rRNA gene; internal transcribed spacer;
|
75
|
+
ITS1; ITS2.
|
76
|
+
SOURCE Phragmipedium caricinum
|
77
|
+
ORGANISM Phragmipedium caricinum
|
78
|
+
Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta;
|
79
|
+
Spermatophyta; Magnoliophyta; Liliopsida; Asparagales; Orchidaceae;
|
80
|
+
Cypripedioideae; Phragmipedium.
|
81
|
+
REFERENCE 1
|
82
|
+
AUTHORS Cox,A.V., Pridgeon,A.M., Albert,V.A. and Chase,M.W.
|
83
|
+
TITLE Phylogenetics of the slipper orchids (Cypripedioideae:
|
84
|
+
Orchidaceae): nuclear rDNA ITS sequences
|
85
|
+
JOURNAL Unpublished
|
86
|
+
REFERENCE 2 (bases 1 to 750)
|
87
|
+
AUTHORS Cox,A.V.
|
88
|
+
TITLE Direct Submission
|
89
|
+
JOURNAL Submitted (19-AUG-1996) Cox A.V., Royal Botanic Gardens, Kew,
|
90
|
+
Richmond, Surrey TW9 3AB, UK
|
91
|
+
FEATURES Location/Qualifiers
|
92
|
+
source 1..750
|
93
|
+
/organism="Phragmipedium caricinum"
|
94
|
+
/mol_type="genomic DNA"
|
95
|
+
/db_xref="taxon:53127"
|
96
|
+
misc_feature 1..380
|
97
|
+
/note="internal transcribed spacer 1"
|
98
|
+
gene 381..550
|
99
|
+
/gene="5.8S rRNA"
|
100
|
+
rRNA 381..550
|
101
|
+
/gene="5.8S rRNA"
|
102
|
+
/product="5.8S ribosomal RNA"
|
103
|
+
misc_feature 551..750
|
104
|
+
/note="internal transcribed spacer 2"
|
105
|
+
ORIGIN
|
106
|
+
1 ctaaccaggg ttccgaggtg accttcggga ggattccttt ttaagccccc gaaaaaacga
|
107
|
+
61 tcgaattaaa ccggaggacc ggtttaattt ggtctcccca ggggctttcc ccccttggtg
|
108
|
+
121 gccgtgaatt tccatcgaac ccccctggga gaattcttgg tggccaatgg acccttggcc
|
109
|
+
181 cggcgcaatt tcccccccaa tcaaatgaga taggaccggc agggggcgtc cccccccatg
|
110
|
+
241 gaaccgggga gggccggcat tcttccgttc ccccctcgga ttttttgaca actctcgcaa
|
111
|
+
301 cggatatctc gcctctttgc atcggatgga agaacgcagc gaaatgtgat aagtggtgtg
|
112
|
+
361 aattgcagaa tcccgtgaac catcgagtct ttgaacgcaa gttgcgcccg aggccatcag
|
113
|
+
421 gctaagggca cgcctgcctg ggcgttgcgt gctgcatctc tcccattgct aaggttgaac
|
114
|
+
481 gggcatactg ttcggccggc gcggatgaga gattggcccc ttgttcttcg gtgcgatggg
|
115
|
+
541 tccaagacct gggcttttga cggtccaaaa tccggcaaga ggtggacgga cggtggctgc
|
116
|
+
601 gacaaagctg tcgtgcgaat gccctgcgtt gtcgtttttg atgggccgga ataaatccct
|
117
|
+
661 tttgaacccc attggaggca cgtcaaccca tgggcggttg acggccattt ggatgcaacc
|
118
|
+
721 ccaggtcagg tgagccaccc gctgagttta
|
119
|
+
//
|
120
|
+
LOCUS Z78509 731 bp DNA linear PLN 30-NOV-2006
|
121
|
+
DEFINITION P.pearcei 5.8S rRNA gene and ITS1 and ITS2 DNA.
|
122
|
+
ACCESSION Z78509
|
123
|
+
VERSION Z78509.1 GI:2765634
|
124
|
+
KEYWORDS 5.8S ribosomal RNA; 5.8S rRNA gene; internal transcribed spacer;
|
125
|
+
ITS1; ITS2.
|
126
|
+
SOURCE Phragmipedium pearcei
|
127
|
+
ORGANISM Phragmipedium pearcei
|
128
|
+
Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta;
|
129
|
+
Spermatophyta; Magnoliophyta; Liliopsida; Asparagales; Orchidaceae;
|
130
|
+
Cypripedioideae; Phragmipedium.
|
131
|
+
REFERENCE 1
|
132
|
+
AUTHORS Cox,A.V., Pridgeon,A.M., Albert,V.A. and Chase,M.W.
|
133
|
+
TITLE Phylogenetics of the slipper orchids (Cypripedioideae:
|
134
|
+
Orchidaceae): nuclear rDNA ITS sequences
|
135
|
+
JOURNAL Unpublished
|
136
|
+
REFERENCE 2 (bases 1 to 731)
|
137
|
+
AUTHORS Cox,A.V.
|
138
|
+
TITLE Direct Submission
|
139
|
+
JOURNAL Submitted (19-AUG-1996) Cox A.V., Royal Botanic Gardens, Kew,
|
140
|
+
Richmond, Surrey TW9 3AB, UK
|
141
|
+
FEATURES Location/Qualifiers
|
142
|
+
source 1..731
|
143
|
+
/organism="Phragmipedium pearcei"
|
144
|
+
/mol_type="genomic DNA"
|
145
|
+
/db_xref="taxon:53135"
|
146
|
+
misc_feature 1..380
|
147
|
+
/note="internal transcribed spacer 1"
|
148
|
+
gene 381..550
|
149
|
+
/gene="5.8S rRNA"
|
150
|
+
rRNA 381..550
|
151
|
+
/gene="5.8S rRNA"
|
152
|
+
/product="5.8S ribosomal RNA"
|
153
|
+
misc_feature 551..731
|
154
|
+
/note="internal transcribed spacer 2"
|
155
|
+
ORIGIN
|
156
|
+
1 cgtaacaagg tttccgtagg tgaacctgcg gaaggatcat tgttgagacc gccaaatata
|
157
|
+
61 cgatcgagtt aatccggagg accggtgtag tttggtctcc caggggcttt cgccgctgtg
|
158
|
+
121 gtgaccgtga tttgccatcg agcctccttg ggagatttct tgatggcaat tgaacccttg
|
159
|
+
181 gcccggcgca gtttcgcgcc aagtcatatg agatagaacc ggcggagggc gtcgtcctcc
|
160
|
+
241 atggagcggg gagggccggc atgctccgtg cccccccatg aatttttctg acaactctcg
|
161
|
+
301 gcaacggacg taacaaggtt taaatgtgat aagcaggtgt gaattgcaga atcccgtgaa
|
162
|
+
361 ccatcgagtc tttgaacgca agttgcgccc gaggccatca ggttaagggc acgcctgcct
|
163
|
+
421 gggcgttgcg tgctgcatct ctcccattgc taaggttgaa cgggcatact gttcggccgg
|
164
|
+
481 cgcggatgag agtttggccc cttgttcttc ggtgcgatgg gtccaagacc tgggcttttg
|
165
|
+
541 acggtccaaa atccggcaag aggtggacgg acggtggctg cgacagagct gtcgtgcgaa
|
166
|
+
601 tgccctacgt tgtcgttttt gatgggccag aataaatccc ttttgaaccc cattggaggc
|
167
|
+
661 acgtcaaccc aatggggggt gacgggcatt tggttaaccc cggcaagtta aggcacccgt
|
168
|
+
721 taattttagg a
|
169
|
+
//
|
170
|
+
LOCUS Z78508 741 bp DNA linear PLN 30-NOV-2006'
|
22
171
|
|
23
172
|
# ========================================================================= #
|
24
173
|
# === initialize
|
25
174
|
# ========================================================================= #
|
26
175
|
def initialize(
|
27
|
-
commandline_arguments =
|
176
|
+
commandline_arguments = nil,
|
28
177
|
run_already = true
|
29
178
|
)
|
30
179
|
reset
|
31
180
|
set_commandline_arguments(
|
32
181
|
commandline_arguments
|
33
182
|
)
|
183
|
+
menu
|
184
|
+
if block_given?
|
185
|
+
yielded = yield
|
186
|
+
case yielded
|
187
|
+
# ===================================================================== #
|
188
|
+
# === :do_not_report_anything
|
189
|
+
# ===================================================================== #
|
190
|
+
when :do_not_report_anything
|
191
|
+
@internal_hash[:report_the_dataset] = false
|
192
|
+
end
|
193
|
+
end
|
34
194
|
run if run_already
|
35
195
|
end
|
36
196
|
|
37
197
|
# ========================================================================= #
|
38
|
-
# === reset
|
198
|
+
# === reset (reset tag)
|
39
199
|
# ========================================================================= #
|
40
200
|
def reset
|
41
201
|
super()
|
202
|
+
infer_the_namespace
|
203
|
+
# ======================================================================= #
|
204
|
+
# === @internal_hash
|
205
|
+
# ======================================================================= #
|
206
|
+
# @internal_hash = {}
|
207
|
+
# ======================================================================= #
|
208
|
+
# === :work_on_this_file
|
209
|
+
# ======================================================================= #
|
210
|
+
@internal_hash[:work_on_this_file] = nil
|
211
|
+
# ======================================================================= #
|
212
|
+
# === :report_the_dataset
|
213
|
+
# ======================================================================= #
|
214
|
+
@internal_hash[:report_the_dataset] = true
|
215
|
+
# ======================================================================= #
|
216
|
+
# === :n_FASTA_entries_in_the_file
|
217
|
+
#
|
218
|
+
# This variable will keep track how many FASTA entries are in
|
219
|
+
# the genbank file at hand.
|
220
|
+
# ======================================================================= #
|
221
|
+
@internal_hash[:n_FASTA_entries_in_the_file] = 0
|
222
|
+
# ======================================================================= #
|
223
|
+
# === :dataset_from_all_FASTA_entries_as_a_hash
|
224
|
+
#
|
225
|
+
# This hash will contain all the FASTA sequences in the given
|
226
|
+
# genbank file at hand. This constitutes the main dataset of
|
227
|
+
# this clas.
|
228
|
+
# ======================================================================= #
|
229
|
+
@internal_hash[:dataset_from_all_FASTA_entries_as_a_hash] = {}
|
42
230
|
end
|
43
231
|
|
44
232
|
# ========================================================================= #
|
45
|
-
# ===
|
233
|
+
# === menu (menu tag)
|
46
234
|
# ========================================================================= #
|
47
|
-
def
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
235
|
+
def menu(
|
236
|
+
i = commandline_arguments_containing_leading_hyphens?
|
237
|
+
)
|
238
|
+
if i.is_a? Array
|
239
|
+
i.each {|entry| menu(entry) }
|
240
|
+
else
|
241
|
+
case i # (case tag)
|
242
|
+
# ===================================================================== #
|
243
|
+
# === gparser --help
|
244
|
+
# ===================================================================== #
|
245
|
+
when /^-?-?help$/i
|
246
|
+
show_help
|
247
|
+
exit
|
248
|
+
# ===================================================================== #
|
249
|
+
# === gparser --test
|
250
|
+
#
|
251
|
+
# This entry point can be used to test the default TEST_STRING.
|
252
|
+
# ===================================================================== #
|
253
|
+
when /^-?-?test$/i,
|
254
|
+
/^-?-?test(-|_)?string$/i
|
255
|
+
analyse_this_dataset(TEST_STRING)
|
256
|
+
exit
|
59
257
|
end
|
60
|
-
@sequence = dataset
|
61
258
|
end
|
62
259
|
end
|
63
260
|
|
261
|
+
# ========================================================================= #
|
262
|
+
# === work_on_which_file?
|
263
|
+
# ========================================================================= #
|
264
|
+
def work_on_which_file?
|
265
|
+
@internal_hash[:work_on_this_file]
|
266
|
+
end
|
267
|
+
|
268
|
+
# ========================================================================= #
|
269
|
+
# === report_the_dataset?
|
270
|
+
# ========================================================================= #
|
271
|
+
def report_the_dataset?
|
272
|
+
@internal_hash[:report_the_dataset]
|
273
|
+
end
|
274
|
+
|
275
|
+
# ========================================================================= #
|
276
|
+
# === set_work_on_this_file
|
277
|
+
# ========================================================================= #
|
278
|
+
def set_work_on_this_file(
|
279
|
+
i = first_argument?
|
280
|
+
)
|
281
|
+
@internal_hash[:work_on_this_file] = i
|
282
|
+
end
|
283
|
+
|
284
|
+
# ========================================================================= #
|
285
|
+
# === analyse_this_dataset
|
286
|
+
# ========================================================================= #
|
287
|
+
def analyse_this_dataset(dataset)
|
288
|
+
use_this_regex =
|
289
|
+
/ORIGIN[\/\-\.\s0-9a-zA-Z]+VERSION\s*[\.0-9A-Z]+/ # See: https://rubular.com/r/0q7rFIUflX7yzw
|
290
|
+
scanned = dataset.scan(use_this_regex)
|
291
|
+
@internal_hash[:n_FASTA_entries_in_the_file] = scanned
|
292
|
+
discover_the_corresponding_FASTA_entries_from_this_dataset(scanned)
|
293
|
+
consider_reporting_our_findings_to_the_user
|
294
|
+
end; alias determine_dataset analyse_this_dataset # === determine_dataset
|
295
|
+
|
296
|
+
# ========================================================================= #
|
297
|
+
# === dataset?
|
298
|
+
# ========================================================================= #
|
299
|
+
def dataset?
|
300
|
+
@internal_hash[:dataset_from_all_FASTA_entries_as_a_hash]
|
301
|
+
end; alias main_dataset? dataset? # === main_dataset?
|
302
|
+
|
303
|
+
# ========================================================================= #
|
304
|
+
# === sequences?
|
305
|
+
# ========================================================================= #
|
306
|
+
def sequences?
|
307
|
+
dataset?.values
|
308
|
+
end
|
309
|
+
|
64
310
|
# ========================================================================= #
|
65
311
|
# === sequence?
|
66
312
|
# ========================================================================= #
|
67
313
|
def sequence?
|
68
|
-
|
314
|
+
sequences?.first
|
69
315
|
end; alias coding_sequence? sequence? # === coding_sequence?
|
70
316
|
alias cds sequence? # === cds
|
71
317
|
|
318
|
+
# ========================================================================= #
|
319
|
+
# === discover_the_corresponding_FASTA_entries_from_this_dataset
|
320
|
+
# ========================================================================= #
|
321
|
+
def discover_the_corresponding_FASTA_entries_from_this_dataset(i)
|
322
|
+
regex_to_use_for_the_id = /VERSION\s*([\.A-Za-z0-9]+)/
|
323
|
+
if i.is_a? Array
|
324
|
+
i.each {|this_dataset|
|
325
|
+
this_dataset =~ regex_to_use_for_the_id
|
326
|
+
use_this_id = $1.to_s.dup
|
327
|
+
use_this_FASTA_sequence = this_dataset.scan(
|
328
|
+
/^\s*\d{1,100}([\sa-zA-Z]+)/
|
329
|
+
).flatten.join(' ').delete(" \n")
|
330
|
+
use_this_FASTA_sequence.upcase! if UPCASE_THE_SEQUENCE
|
331
|
+
@internal_hash[:dataset_from_all_FASTA_entries_as_a_hash][use_this_id] = use_this_FASTA_sequence
|
332
|
+
}
|
333
|
+
end
|
334
|
+
end
|
335
|
+
|
336
|
+
# ========================================================================= #
|
337
|
+
# === verbose_check_whether_the_file_exists
|
338
|
+
# ========================================================================= #
|
339
|
+
def verbose_check_whether_the_file_exists
|
340
|
+
_ = @internal_hash[:work_on_this_file]
|
341
|
+
if _ and File.exist?(_)
|
342
|
+
true
|
343
|
+
else
|
344
|
+
opnn; e 'No file exists at '+sfile(_)
|
345
|
+
false
|
346
|
+
end
|
347
|
+
end
|
348
|
+
|
349
|
+
# ========================================================================= #
|
350
|
+
# === consider_reporting_our_findings_to_the_user (report tag)
|
351
|
+
# ========================================================================= #
|
352
|
+
def consider_reporting_our_findings_to_the_user
|
353
|
+
if report_the_dataset? and dataset? and !dataset?.empty?
|
354
|
+
main_dataset?.each_pair {|key, value|
|
355
|
+
e steelblue("#{key}:")
|
356
|
+
e lightblue(value)
|
357
|
+
e
|
358
|
+
}
|
359
|
+
end
|
360
|
+
end; alias report consider_reporting_our_findings_to_the_user # === report
|
361
|
+
alias report_the_dataset consider_reporting_our_findings_to_the_user # === report_the_dataset
|
362
|
+
|
363
|
+
# ========================================================================= #
|
364
|
+
# === run (run tag)
|
365
|
+
# ========================================================================= #
|
366
|
+
def run
|
367
|
+
set_work_on_this_file(first_argument?)
|
368
|
+
# ======================================================================= #
|
369
|
+
# First check whether the given file exists or not:
|
370
|
+
# ======================================================================= #
|
371
|
+
if verbose_check_whether_the_file_exists
|
372
|
+
original_dataset = File.read(@internal_hash[:work_on_this_file]) # Just store it completely.
|
373
|
+
if original_dataset.include?('ORIGIN') and original_dataset.include?('VERSION ')
|
374
|
+
analyse_this_dataset(original_dataset)
|
375
|
+
else
|
376
|
+
opnn; e 'No keywords ORIGIN and VERSION were found in this file.'
|
377
|
+
end
|
378
|
+
end
|
379
|
+
end
|
380
|
+
|
381
|
+
# ========================================================================= #
|
382
|
+
# === Bioroebe::GenbankParser[]
|
383
|
+
# ========================================================================= #
|
384
|
+
def self.[](i = '')
|
385
|
+
new(i)
|
386
|
+
end
|
387
|
+
|
72
388
|
end; end
|
73
389
|
|
74
390
|
if __FILE__ == $PROGRAM_NAME
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
391
|
+
alias e puts
|
392
|
+
genbank_parser = Bioroebe::GenbankParser.new(ARGV)
|
393
|
+
# genbank_parser = Bioroebe::GenbankParser.new('/home/Temp/bioroebe/ls_orchid.gbk')
|
394
|
+
e genbank_parser.sequence?
|
395
|
+
# e _.id
|
396
|
+
end # genbankparser *genbank
|
397
|
+
# genbankparser
|
398
|
+
# genbankparser sample_file.genbank
|
399
|
+
# genbankparser --test
|
data/lib/bioroebe/parsers/gff.rb
CHANGED
@@ -68,11 +68,6 @@ module Parser
|
|
68
68
|
|
69
69
|
class GFF < ::Bioroebe::CommandlineApplication # === Bioroebe::Parser::GFF
|
70
70
|
|
71
|
-
# ========================================================================= #
|
72
|
-
# === NAMESPACE
|
73
|
-
# ========================================================================= #
|
74
|
-
NAMESPACE = inspect
|
75
|
-
|
76
71
|
# ========================================================================= #
|
77
72
|
# === INPUT_FILE
|
78
73
|
#
|
@@ -112,10 +107,7 @@ class GFF < ::Bioroebe::CommandlineApplication # === Bioroebe::Parser::GFF
|
|
112
107
|
# ========================================================================= #
|
113
108
|
def reset
|
114
109
|
super()
|
115
|
-
|
116
|
-
# === @namespace
|
117
|
-
# ======================================================================= #
|
118
|
-
@namespace = NAMESPACE
|
110
|
+
infer_the_namespace
|
119
111
|
# ======================================================================= #
|
120
112
|
# === @input_file
|
121
113
|
# ======================================================================= #
|