bioroebe 0.10.80 → 0.12.24
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +3946 -2817
- data/bin/bioroebe +13 -2
- data/bin/bioroebe_hash +7 -0
- data/bin/codon_to_aminoacid +6 -4
- data/bin/compacter +7 -0
- data/bin/plain_palindrome +7 -0
- data/bioroebe.gemspec +3 -3
- data/doc/README.gen +3918 -2793
- data/doc/quality_control/commandline_applications.md +3 -3
- data/doc/statistics/statistics.md +7 -7
- data/doc/todo/bioroebe_GUI_todo.md +19 -14
- data/doc/todo/bioroebe_java_todo.md +22 -0
- data/doc/todo/bioroebe_todo.md +2075 -2620
- data/lib/bioroebe/C++/DNA.cpp +69 -0
- data/lib/bioroebe/C++/RNA.cpp +58 -0
- data/lib/bioroebe/C++/sequence.cpp +35 -0
- data/lib/bioroebe/abstract/README.md +1 -0
- data/lib/bioroebe/abstract/features.rb +29 -0
- data/lib/bioroebe/aminoacids/aminoacid_substitution.rb +1 -9
- data/lib/bioroebe/aminoacids/codon_percentage.rb +1 -9
- data/lib/bioroebe/aminoacids/deduce_aminoacid_sequence.rb +1 -9
- data/lib/bioroebe/aminoacids/display_aminoacid_table.rb +1 -0
- data/lib/bioroebe/aminoacids/show_hydrophobicity.rb +1 -6
- data/lib/bioroebe/base/base_module/base_module.rb +36 -0
- data/lib/bioroebe/base/colours_for_base/colours_for_base.rb +18 -8
- data/lib/bioroebe/base/commandline_application/commandline_application.rb +13 -9
- data/lib/bioroebe/base/commandline_application/commandline_arguments.rb +24 -19
- data/lib/bioroebe/base/commandline_application/misc.rb +66 -49
- data/lib/bioroebe/base/commandline_application/opn.rb +8 -8
- data/lib/bioroebe/base/commandline_application/reset.rb +5 -3
- data/lib/bioroebe/base/internal_hash_module/internal_hash_module.rb +42 -0
- data/lib/bioroebe/base/misc.rb +35 -0
- data/lib/bioroebe/base/prototype/misc.rb +15 -9
- data/lib/bioroebe/base/prototype/reset.rb +10 -0
- data/lib/bioroebe/cleave_and_digest/digestion.rb +10 -2
- data/lib/bioroebe/cleave_and_digest/trypsin.rb +104 -50
- data/lib/bioroebe/codon_tables/frequencies/parse_frequency_table.rb +2 -10
- data/lib/bioroebe/codons/codons.rb +1 -1
- data/lib/bioroebe/codons/convert_this_codon_to_that_aminoacid.rb +208 -59
- data/lib/bioroebe/codons/possible_codons_for_this_aminoacid.rb +1 -9
- data/lib/bioroebe/codons/show_codon_tables.rb +8 -3
- data/lib/bioroebe/codons/show_codon_usage.rb +15 -4
- data/lib/bioroebe/colours/rev.rb +4 -1
- data/lib/bioroebe/constants/aminoacids_and_proteins.rb +1 -0
- data/lib/bioroebe/constants/database_constants.rb +1 -1
- data/lib/bioroebe/constants/files_and_directories.rb +31 -4
- data/lib/bioroebe/constants/misc.rb +20 -0
- data/lib/bioroebe/constants/nucleotides.rb +7 -0
- data/lib/bioroebe/conversions/dna_to_aminoacid_sequence.rb +109 -39
- data/lib/bioroebe/count/count_amount_of_aminoacids.rb +3 -2
- data/lib/bioroebe/count/count_amount_of_nucleotides.rb +3 -0
- data/lib/bioroebe/cpp +1 -0
- data/lib/bioroebe/crystal/README.md +2 -0
- data/lib/bioroebe/crystal/to_rna.cr +19 -0
- data/lib/bioroebe/data/README.md +11 -8
- data/lib/bioroebe/data/electron_microscopy/pos_example.pos +396 -0
- data/lib/bioroebe/data/electron_microscopy/test_particles.star +36 -0
- data/lib/bioroebe/data/fasta/human/Homo_sapiens_hemoglobin_subunit_alpha_HBB_mRNA.fasta +9 -0
- data/lib/bioroebe/data/fasta/human/Homo_sapiens_hemoglobin_subunit_beta_HBB_mRNA.fasta +8 -0
- data/lib/bioroebe/data/fasta/human/README.md +2 -0
- data/lib/bioroebe/dotplots/advanced_dotplot.rb +1 -1
- data/lib/bioroebe/electron_microscopy/coordinate_analyzer.rb +15 -18
- data/lib/bioroebe/{fasta_and_fastq/parse_fasta/run.rb → electron_microscopy/electron_microscopy_module.rb} +16 -8
- data/lib/bioroebe/electron_microscopy/fix_pos_file.rb +1 -9
- data/lib/bioroebe/electron_microscopy/flipy.rb +83 -0
- data/lib/bioroebe/electron_microscopy/parse_coordinates.rb +2 -10
- data/lib/bioroebe/electron_microscopy/read_file_xmd.rb +1 -9
- data/lib/bioroebe/electron_microscopy/simple_star_file_generator.rb +4 -9
- data/lib/bioroebe/enzymes/has_this_restriction_enzyme.rb +10 -3
- data/lib/bioroebe/enzymes/restriction_enzyme.rb +23 -1
- data/lib/bioroebe/enzymes/restriction_enzymes/statistics.rb +65 -0
- data/lib/bioroebe/fasta_and_fastq/autocorrect_the_name_of_this_fasta_file.rb +1 -9
- data/lib/bioroebe/fasta_and_fastq/compact_fasta_file/compact_fasta_file.rb +7 -9
- data/lib/bioroebe/fasta_and_fastq/fasta_defline/fasta_defline.rb +1 -5
- data/lib/bioroebe/fasta_and_fastq/fasta_to_yaml/fasta_to_yaml.rb +81 -0
- data/lib/bioroebe/fasta_and_fastq/parse_fasta/parse_fasta.rb +1518 -7
- data/lib/bioroebe/fasta_and_fastq/return_fasta_subsection_of_this_file.rb +11 -2
- data/lib/bioroebe/fasta_and_fastq/show_fasta_headers.rb +27 -12
- data/lib/bioroebe/fasta_and_fastq/simplify_fasta_header/simplify_fasta_header.rb +1 -5
- data/lib/bioroebe/fasta_and_fastq/split_this_fasta_file_into_chromosomes/constants.rb +0 -5
- data/lib/bioroebe/genome/README.md +4 -0
- data/lib/bioroebe/genome/genome.rb +130 -0
- data/lib/bioroebe/genomes/genome_pattern.rb +3 -9
- data/lib/bioroebe/gui/gtk +1 -0
- data/lib/bioroebe/gui/gtk3/alignment/alignment.rb +106 -137
- data/lib/bioroebe/gui/gtk3/aminoacid_composition/aminoacid_composition.rb +27 -61
- data/lib/bioroebe/gui/gtk3/aminoacid_composition/customized_dialog.rb +1 -1
- data/lib/bioroebe/gui/gtk3/blosum_matrix_viewer/blosum_matrix_viewer.rb +1 -2
- data/lib/bioroebe/gui/gtk3/calculate_cell_numbers_of_bacteria/calculate_cell_numbers_of_bacteria.rb +1 -2
- data/lib/bioroebe/gui/gtk3/controller/controller.rb +46 -29
- data/lib/bioroebe/gui/gtk3/dna_to_aminoacid_widget/dna_to_aminoacid_widget.rb +77 -52
- data/lib/bioroebe/gui/gtk3/dna_to_reverse_complement_widget/dna_to_reverse_complement_widget.rb +1 -2
- data/lib/bioroebe/gui/gtk3/fasta_table_widget/fasta_table_widget.rb +100 -23
- data/lib/bioroebe/gui/gtk3/format_converter/format_converter.rb +1 -2
- data/lib/bioroebe/gui/gtk3/gene/gene.rb +1 -2
- data/lib/bioroebe/gui/gtk3/hamming_distance/hamming_distance.rb +43 -30
- data/lib/bioroebe/gui/gtk3/levensthein_distance/levensthein_distance.rb +1 -2
- data/lib/bioroebe/gui/gtk3/nucleotide_analyser/nucleotide_analyser.rb +120 -73
- data/lib/bioroebe/gui/gtk3/primer_design_widget/primer_design_widget.rb +1 -2
- data/lib/bioroebe/gui/gtk3/protein_to_DNA/protein_to_DNA.rb +19 -20
- data/lib/bioroebe/gui/gtk3/random_sequence/random_sequence.rb +20 -13
- data/lib/bioroebe/gui/gtk3/restriction_enzymes/restriction_enzymes.rb +1 -2
- data/lib/bioroebe/gui/gtk3/show_codon_table/misc.rb +97 -22
- data/lib/bioroebe/gui/gtk3/show_codon_table/show_codon_table.rb +3 -73
- data/lib/bioroebe/gui/gtk3/show_codon_usage/show_codon_usage.rb +1 -2
- data/lib/bioroebe/gui/gtk3/sizeseq/sizeseq.rb +1 -2
- data/lib/bioroebe/gui/gtk3/three_to_one/three_to_one.rb +1 -2
- data/lib/bioroebe/gui/gtk3/www_finder/www_finder.rb +1 -2
- data/lib/bioroebe/gui/javafx/bioroebe/Bioroebe.class +0 -0
- data/lib/bioroebe/gui/javafx/bioroebe/Bioroebe.java +104 -0
- data/lib/bioroebe/gui/javafx/bioroebe.jar +0 -0
- data/lib/bioroebe/gui/javafx/bioroebe.mf +1 -0
- data/lib/bioroebe/gui/javafx/module-info.class +0 -0
- data/lib/bioroebe/gui/javafx/module-info.java +5 -0
- data/lib/bioroebe/gui/jruby/alignment/alignment.rb +165 -0
- data/lib/bioroebe/gui/jruby/aminoacid_composition/aminoacid_composition.rb +166 -0
- data/lib/bioroebe/gui/libui/alignment/alignment.rb +3 -1
- data/lib/bioroebe/gui/libui/controller/controller.rb +116 -0
- data/lib/bioroebe/gui/libui/random_sequence/random_sequence.rb +18 -2
- data/lib/bioroebe/gui/libui/show_codon_table/show_codon_table.rb +2 -0
- data/lib/bioroebe/gui/libui/three_to_one/three_to_one.rb +8 -6
- data/lib/bioroebe/gui/shared_code/alignment/alignment_module.rb +102 -0
- data/lib/bioroebe/gui/shared_code/aminoacid_composition/aminoacid_composition_module.rb +94 -0
- data/lib/bioroebe/gui/shared_code/levensthein_distance/levensthein_distance_module.rb +18 -16
- data/lib/bioroebe/gui/shared_code/protein_to_DNA/protein_to_DNA_module.rb +14 -14
- data/lib/bioroebe/gui/swing/three_to_one/ThreeToOne$1.class +0 -0
- data/lib/bioroebe/gui/swing/three_to_one/ThreeToOne$CloseListener.class +0 -0
- data/lib/bioroebe/gui/swing/three_to_one/ThreeToOne.class +0 -0
- data/lib/bioroebe/gui/swing/three_to_one/ThreeToOne.java +141 -0
- data/lib/bioroebe/images/FORWARD_PRIMER.png +0 -0
- data/lib/bioroebe/images/REVERSE_PRIMER.png +0 -0
- data/lib/bioroebe/images/images.html +29845 -0
- data/lib/bioroebe/java/README.md +5 -0
- data/lib/bioroebe/java/bioroebe/AllInOne.java +1 -0
- data/lib/bioroebe/java/bioroebe/Base.class +0 -0
- data/lib/bioroebe/java/bioroebe/Base.java +39 -5
- data/lib/bioroebe/java/bioroebe/IsPalindrome.java +23 -5
- data/lib/bioroebe/java/bioroebe/SanitizeNucleotideSequence.java +0 -0
- data/lib/bioroebe/java/bioroebe/Sequence.java +28 -3
- data/lib/bioroebe/java/bioroebe/ToCamelcase.class +0 -0
- data/lib/bioroebe/java/bioroebe/ToCamelcase.java +16 -4
- data/lib/bioroebe/java/bioroebe/ToRNA.java +43 -0
- data/lib/bioroebe/java/bioroebe/ToplevelMethods.java +6 -0
- data/lib/bioroebe/java/bioroebe/{BisulfiteTreatment.class → src/BisulfiteTreatment.class} +0 -0
- data/lib/bioroebe/java/bioroebe/{Codons.class → src/Codons.class} +0 -0
- data/lib/bioroebe/java/bioroebe/src/Codons.java +35 -0
- data/lib/bioroebe/java/bioroebe/src/Commandline.class +0 -0
- data/lib/bioroebe/java/bioroebe/src/Commandline.java +101 -0
- data/lib/bioroebe/java/bioroebe/{Esystem.class → src/Esystem.class} +0 -0
- data/lib/bioroebe/java/bioroebe/{Esystem.java → src/Esystem.java} +6 -1
- data/lib/bioroebe/java/bioroebe/{GenerateRandomDnaSequence.class → src/GenerateRandomDnaSequence.class} +0 -0
- data/lib/bioroebe/java/bioroebe/{GenerateRandomDnaSequence.java → src/GenerateRandomDnaSequence.java} +8 -2
- data/lib/bioroebe/java/bioroebe/src/PartnerNucleotide.class +0 -0
- data/lib/bioroebe/java/bioroebe/src/PartnerNucleotide.java +56 -0
- data/lib/bioroebe/java/bioroebe/{RemoveFile.java → src/RemoveFile.java} +10 -4
- data/lib/bioroebe/java/bioroebe/{RemoveNumbers.class → src/RemoveNumbers.class} +0 -0
- data/lib/bioroebe/java/bioroebe/{RemoveNumbers.java → src/RemoveNumbers.java} +1 -0
- data/lib/bioroebe/java/bioroebe/src/toplevel_methods/BaseComposition.class +0 -0
- data/lib/bioroebe/java/bioroebe/src/toplevel_methods/BaseComposition.java +75 -0
- data/lib/bioroebe/misc/ruler.rb +11 -2
- data/lib/bioroebe/nucleotides/most_likely_nucleotide_sequence_for_this_aminoacid_sequence.rb +1 -9
- data/lib/bioroebe/nucleotides/sanitize_nucleotide_sequence.rb +59 -18
- data/lib/bioroebe/nucleotides/show_nucleotide_sequence.rb +7 -7
- data/lib/bioroebe/parsers/genbank_parser.rb +347 -26
- data/lib/bioroebe/parsers/gff.rb +1 -9
- data/lib/bioroebe/patterns/scan_for_repeat.rb +1 -5
- data/lib/bioroebe/pdb/fetch_fasta_sequence_from_pdb.rb +1 -9
- data/lib/bioroebe/pdb/parse_mmCIF_file.rb +1 -9
- data/lib/bioroebe/pdb/parse_pdb_file.rb +4 -10
- data/lib/bioroebe/project/project.rb +1 -1
- data/lib/bioroebe/python/README.md +1 -0
- data/lib/bioroebe/python/__pycache__/mymodule.cpython-39.pyc +0 -0
- data/lib/bioroebe/python/gui/gtk3/all_in_one.css +4 -0
- data/lib/bioroebe/python/gui/gtk3/all_in_one.py +59 -0
- data/lib/bioroebe/python/gui/gtk3/widget1.py +20 -0
- data/lib/bioroebe/python/gui/tkinter/all_in_one.py +91 -0
- data/lib/bioroebe/python/mymodule.py +8 -0
- data/lib/bioroebe/python/protein_to_dna.py +33 -0
- data/lib/bioroebe/python/shell/shell.py +19 -0
- data/lib/bioroebe/python/to_rna.py +14 -0
- data/lib/bioroebe/python/toplevel_methods/convert_dna_to_aminoacid_sequence.py +137 -0
- data/lib/bioroebe/python/toplevel_methods/esystem.py +12 -0
- data/lib/bioroebe/python/toplevel_methods/open_in_browser.py +20 -0
- data/lib/bioroebe/python/toplevel_methods/palindromes.py +52 -0
- data/lib/bioroebe/python/toplevel_methods/rds.py +13 -0
- data/lib/bioroebe/python/toplevel_methods/shuffleseq.py +23 -0
- data/lib/bioroebe/python/toplevel_methods/three_delimiter.py +37 -0
- data/lib/bioroebe/python/toplevel_methods/time_and_date.py +43 -0
- data/lib/bioroebe/python/toplevel_methods/to_camelcase.py +21 -0
- data/lib/bioroebe/requires/require_cleave_and_digest.rb +3 -1
- data/lib/bioroebe/requires/require_the_bioroebe_project.rb +3 -1
- data/lib/bioroebe/sequence/alignment.rb +14 -4
- data/lib/bioroebe/sequence/dna.rb +1 -0
- data/lib/bioroebe/sequence/nucleotide_module/nucleotide_module.rb +28 -25
- data/lib/bioroebe/sequence/protein.rb +105 -3
- data/lib/bioroebe/sequence/rna.rb +220 -0
- data/lib/bioroebe/sequence/sequence.rb +128 -40
- data/lib/bioroebe/shell/menu.rb +3815 -3696
- data/lib/bioroebe/shell/misc.rb +9019 -3133
- data/lib/bioroebe/shell/readline/readline.rb +1 -1
- data/lib/bioroebe/shell/shell.rb +1137 -28
- data/lib/bioroebe/siRNA/siRNA.rb +81 -1
- data/lib/bioroebe/string_matching/find_longest_substring.rb +3 -2
- data/lib/bioroebe/string_matching/hamming_distance.rb +1 -9
- data/lib/bioroebe/taxonomy/class_methods.rb +3 -8
- data/lib/bioroebe/taxonomy/constants.rb +4 -3
- data/lib/bioroebe/taxonomy/edit.rb +2 -1
- data/lib/bioroebe/taxonomy/help/help.rb +10 -10
- data/lib/bioroebe/taxonomy/help/helpline.rb +2 -2
- data/lib/bioroebe/taxonomy/info/check_available.rb +15 -9
- data/lib/bioroebe/taxonomy/info/info.rb +18 -11
- data/lib/bioroebe/taxonomy/info/is_dna.rb +46 -36
- data/lib/bioroebe/taxonomy/interactive.rb +140 -104
- data/lib/bioroebe/taxonomy/menu.rb +27 -18
- data/lib/bioroebe/taxonomy/parse_fasta.rb +3 -1
- data/lib/bioroebe/taxonomy/shared.rb +1 -0
- data/lib/bioroebe/taxonomy/taxonomy.rb +1 -0
- data/lib/bioroebe/toplevel_methods/aminoacids_and_proteins.rb +31 -24
- data/lib/bioroebe/toplevel_methods/colourize_related_methods.rb +164 -0
- data/lib/bioroebe/toplevel_methods/databases.rb +1 -1
- data/lib/bioroebe/toplevel_methods/digest.rb +18 -8
- data/lib/bioroebe/toplevel_methods/fasta_and_fastq.rb +107 -63
- data/lib/bioroebe/toplevel_methods/file_and_directory_related_actions.rb +14 -2
- data/lib/bioroebe/toplevel_methods/frequencies.rb +8 -1
- data/lib/bioroebe/toplevel_methods/misc.rb +175 -11
- data/lib/bioroebe/toplevel_methods/nucleotides.rb +118 -46
- data/lib/bioroebe/toplevel_methods/open_in_browser.rb +2 -0
- data/lib/bioroebe/toplevel_methods/palindromes.rb +75 -47
- data/lib/bioroebe/toplevel_methods/taxonomy.rb +3 -3
- data/lib/bioroebe/toplevel_methods/to_camelcase.rb +5 -0
- data/lib/bioroebe/utility_scripts/align_open_reading_frames.rb +1 -9
- data/lib/bioroebe/utility_scripts/check_for_mismatches/check_for_mismatches.rb +1 -9
- data/lib/bioroebe/utility_scripts/compacter/compacter.rb +251 -0
- data/lib/bioroebe/utility_scripts/compseq/compseq.rb +1 -9
- data/lib/bioroebe/utility_scripts/consensus_sequence.rb +6 -6
- data/lib/bioroebe/utility_scripts/create_batch_entrez_file.rb +1 -9
- data/lib/bioroebe/utility_scripts/dot_alignment.rb +1 -9
- data/lib/bioroebe/utility_scripts/move_file_to_its_correct_location.rb +1 -4
- data/lib/bioroebe/utility_scripts/parse_taxonomy.rb +2 -2
- data/lib/bioroebe/utility_scripts/permutations.rb +36 -9
- data/lib/bioroebe/utility_scripts/showorf/constants.rb +0 -5
- data/lib/bioroebe/utility_scripts/showorf/reset.rb +1 -4
- data/lib/bioroebe/version/version.rb +2 -2
- data/lib/bioroebe/www/embeddable_interface.rb +121 -58
- data/lib/bioroebe/www/sinatra/sinatra.rb +186 -71
- data/lib/bioroebe/yaml/aminoacids/amino_acids_long_name_to_one_letter.yml +2 -2
- data/lib/bioroebe/yaml/aminoacids/weight_of_common_proteins.yml +17 -17
- data/lib/bioroebe/yaml/configuration/browser.yml +1 -1
- data/lib/bioroebe/yaml/configuration/temp_dir.yml +1 -1
- data/lib/bioroebe/yaml/consensus_sequences/consensus_sequences.yml +1 -0
- data/lib/bioroebe/yaml/genomes/README.md +3 -4
- data/lib/bioroebe/yaml/nucleotides/nucleotides.yml +5 -0
- data/lib/bioroebe/yaml/restriction_enzymes/restriction_enzymes.yml +57 -57
- data/spec/README.md +6 -0
- data/spec/project_wide_specification/classes.md +5 -0
- metadata +107 -70
- data/doc/setup.rb +0 -1655
- data/lib/bioroebe/fasta_and_fastq/parse_fasta/constants.rb +0 -50
- data/lib/bioroebe/fasta_and_fastq/parse_fasta/initialize.rb +0 -86
- data/lib/bioroebe/fasta_and_fastq/parse_fasta/menu.rb +0 -117
- data/lib/bioroebe/fasta_and_fastq/parse_fasta/misc.rb +0 -981
- data/lib/bioroebe/fasta_and_fastq/parse_fasta/report.rb +0 -156
- data/lib/bioroebe/fasta_and_fastq/parse_fasta/reset.rb +0 -128
- data/lib/bioroebe/genbank/genbank_parser.rb +0 -291
- data/lib/bioroebe/java/bioroebe/AllInOne.class +0 -0
- data/lib/bioroebe/java/bioroebe/Cat.class +0 -0
- data/lib/bioroebe/java/bioroebe/Codons.java +0 -22
- data/lib/bioroebe/java/bioroebe/IsPalindrome.class +0 -0
- data/lib/bioroebe/java/bioroebe/PartnerNucleotide.class +0 -0
- data/lib/bioroebe/java/bioroebe/PartnerNucleotide.java +0 -19
- data/lib/bioroebe/java/bioroebe/SanitizeNucleotideSequence.class +0 -0
- data/lib/bioroebe/java/bioroebe/ToplevelMethods.class +0 -0
- data/lib/bioroebe/java/bioroebe.jar +0 -0
- data/lib/bioroebe/shell/add.rb +0 -108
- data/lib/bioroebe/shell/assign.rb +0 -360
- data/lib/bioroebe/shell/chop_and_cut.rb +0 -281
- data/lib/bioroebe/shell/constants.rb +0 -166
- data/lib/bioroebe/shell/download.rb +0 -335
- data/lib/bioroebe/shell/enable_and_disable.rb +0 -158
- data/lib/bioroebe/shell/enzymes.rb +0 -310
- data/lib/bioroebe/shell/fasta.rb +0 -345
- data/lib/bioroebe/shell/gtk.rb +0 -76
- data/lib/bioroebe/shell/history.rb +0 -132
- data/lib/bioroebe/shell/initialize.rb +0 -217
- data/lib/bioroebe/shell/loop.rb +0 -74
- data/lib/bioroebe/shell/prompt.rb +0 -107
- data/lib/bioroebe/shell/random.rb +0 -289
- data/lib/bioroebe/shell/reset.rb +0 -335
- data/lib/bioroebe/shell/scan_and_parse.rb +0 -135
- data/lib/bioroebe/shell/search.rb +0 -337
- data/lib/bioroebe/shell/sequences.rb +0 -200
- data/lib/bioroebe/shell/show_report_and_display.rb +0 -2901
- data/lib/bioroebe/shell/startup.rb +0 -127
- data/lib/bioroebe/shell/taxonomy.rb +0 -14
- data/lib/bioroebe/shell/tk.rb +0 -23
- data/lib/bioroebe/shell/user_input.rb +0 -88
- data/lib/bioroebe/shell/xorg.rb +0 -45
- data/lib/bioroebe/utility_scripts/compacter.rb +0 -131
- /data/lib/bioroebe/java/bioroebe/{BisulfiteTreatment.java → src/BisulfiteTreatment.java} +0 -0
- /data/lib/bioroebe/java/bioroebe/{RemoveFile.class → src/RemoveFile.class} +0 -0
data/lib/bioroebe/misc/ruler.rb
CHANGED
@@ -24,6 +24,8 @@
|
|
24
24
|
# variant - that one should work fine.
|
25
25
|
# =========================================================================== #
|
26
26
|
# require 'bioroebe/misc/ruler.rb'
|
27
|
+
# puts Bioroebe.return_ruler('ATGCTGACAGGGGGGGEEEEEE')
|
28
|
+
# Bioroebe.ruler_return_as_string_without_colours 'ATGCTGACAGGGGGGGEEEEEE'
|
27
29
|
# Bioroebe::Ruler.new(ARGV)
|
28
30
|
# =========================================================================== #
|
29
31
|
require 'bioroebe/base/commandline_application/commandline_application.rb'
|
@@ -54,8 +56,14 @@ class Ruler < ::Bioroebe::CommandlineApplication # === Bioroebe::Ruler
|
|
54
56
|
set_group_together_n_nucleotides(
|
55
57
|
group_together_n_nucleotides
|
56
58
|
)
|
59
|
+
# ======================================================================= #
|
60
|
+
# === Handle blocks next
|
61
|
+
# ======================================================================= #
|
57
62
|
if block_given?
|
58
63
|
yielded = yield
|
64
|
+
# ===================================================================== #
|
65
|
+
# === Handle Hashes next
|
66
|
+
# ===================================================================== #
|
59
67
|
if yielded.is_a? Hash
|
60
68
|
# =================================================================== #
|
61
69
|
# === :ruler
|
@@ -206,14 +214,15 @@ end
|
|
206
214
|
# This will return the ruler as a String; it exists mostly for
|
207
215
|
# convenience reasons.
|
208
216
|
#
|
209
|
-
# Invocation
|
217
|
+
# Invocation examples:
|
210
218
|
#
|
211
219
|
# puts Bioroebe.ruler_return_as_string 'ATGCTGACAGGGGGGGEEEEEE'
|
220
|
+
# puts Bioroebe.return_ruler('ATGCTGACAGGGGGGGEEEEEE')
|
212
221
|
#
|
213
222
|
# =========================================================================== #
|
214
223
|
def self.ruler_return_as_string(i, group_together_n_nucleotides = 70)
|
215
224
|
::Bioroebe::Ruler.new(i, group_together_n_nucleotides).result_as_string
|
216
|
-
end
|
225
|
+
end; self.instance_eval { alias return_ruler ruler_return_as_string } # === Bioroebe.return_ruler
|
217
226
|
|
218
227
|
# =========================================================================== #
|
219
228
|
# === Bioroebe.ruler_return_as_string_without_colours
|
data/lib/bioroebe/nucleotides/most_likely_nucleotide_sequence_for_this_aminoacid_sequence.rb
CHANGED
@@ -27,11 +27,6 @@ class MostLikelyNucleotideSequenceForThisAminoacidSequence < ::Bioroebe::Command
|
|
27
27
|
require 'bioroebe/aminoacids/codon_percentage.rb'
|
28
28
|
require 'bioroebe/codons/possible_codons_for_this_aminoacid.rb'
|
29
29
|
|
30
|
-
# ========================================================================= #
|
31
|
-
# === NAMESPACE
|
32
|
-
# ========================================================================= #
|
33
|
-
NAMESPACE = inspect
|
34
|
-
|
35
30
|
# ========================================================================= #
|
36
31
|
# === initialize
|
37
32
|
# ========================================================================= #
|
@@ -63,10 +58,7 @@ class MostLikelyNucleotideSequenceForThisAminoacidSequence < ::Bioroebe::Command
|
|
63
58
|
# ========================================================================= #
|
64
59
|
def reset
|
65
60
|
super()
|
66
|
-
|
67
|
-
# === @namespace
|
68
|
-
# ======================================================================= #
|
69
|
-
@namespace = NAMESPACE
|
61
|
+
infer_the_namespace
|
70
62
|
# ======================================================================= #
|
71
63
|
# === @internal_hash
|
72
64
|
# ======================================================================= #
|
@@ -33,10 +33,25 @@ class SanitizeNucleotideSequence < Base # === Bioroebe::SanitizeNucleotideSequen
|
|
33
33
|
# ========================================================================= #
|
34
34
|
def initialize(
|
35
35
|
i = nil,
|
36
|
-
run_already = true
|
36
|
+
run_already = true,
|
37
|
+
&block
|
37
38
|
)
|
38
39
|
reset
|
39
40
|
set_input(i)
|
41
|
+
# ======================================================================= #
|
42
|
+
# === Handle blocks next
|
43
|
+
# ======================================================================= #
|
44
|
+
if block_given?
|
45
|
+
yielded = yield
|
46
|
+
case yielded
|
47
|
+
# ===================================================================== #
|
48
|
+
# === :do_not_remove_newlines
|
49
|
+
# ===================================================================== #
|
50
|
+
when /do(_|-)?not(_|-)?remove(_|-)?newlines$/,
|
51
|
+
:do_not_remove_newlines
|
52
|
+
@shall_we_remove_newlines = false
|
53
|
+
end
|
54
|
+
end
|
40
55
|
run if run_already
|
41
56
|
end
|
42
57
|
|
@@ -45,6 +60,19 @@ class SanitizeNucleotideSequence < Base # === Bioroebe::SanitizeNucleotideSequen
|
|
45
60
|
# ========================================================================= #
|
46
61
|
def reset
|
47
62
|
super()
|
63
|
+
# ======================================================================= #
|
64
|
+
# === @shall_we_remove_newlines
|
65
|
+
#
|
66
|
+
# By default newlines will be removed.
|
67
|
+
# ======================================================================= #
|
68
|
+
@shall_we_remove_newlines = true
|
69
|
+
end
|
70
|
+
|
71
|
+
# ========================================================================= #
|
72
|
+
# === shall_we_remove_newlines?
|
73
|
+
# ========================================================================= #
|
74
|
+
def shall_we_remove_newlines?
|
75
|
+
@shall_we_remove_newlines
|
48
76
|
end
|
49
77
|
|
50
78
|
# ========================================================================= #
|
@@ -57,30 +85,37 @@ class SanitizeNucleotideSequence < Base # === Bioroebe::SanitizeNucleotideSequen
|
|
57
85
|
if i.first.start_with?('>') and i.first.include?(N) # Assume gi number, which we will chop off.
|
58
86
|
i[0] = i[0][i[0].index(N) .. -1] # Get all from the start, to the first newline.
|
59
87
|
end
|
60
|
-
i = i.join(' ').strip
|
88
|
+
# i = i.join(' ') # .strip
|
61
89
|
end
|
62
|
-
i = i.to_s.dup
|
90
|
+
# i = i.to_s.dup
|
91
|
+
i = [i] unless i.is_a?(Array)
|
63
92
|
@input = i
|
64
|
-
sanitize_input
|
65
93
|
end
|
66
94
|
|
67
95
|
# ========================================================================= #
|
68
|
-
# ===
|
96
|
+
# === sanitize_the_input
|
69
97
|
# ========================================================================= #
|
70
|
-
def
|
71
|
-
@input.
|
72
|
-
|
98
|
+
def sanitize_the_input
|
99
|
+
@input.reject! {|entry| entry.empty? }
|
100
|
+
if shall_we_remove_newlines?
|
101
|
+
@input.map! {|entry|
|
102
|
+
entry = entry.delete("\n")
|
103
|
+
entry = entry.delete("\\\\n")
|
104
|
+
}
|
105
|
+
end
|
73
106
|
# ======================================================================= #
|
74
107
|
# Next remove all numbers.
|
75
108
|
# ======================================================================= #
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
109
|
+
@input.map! {|entry|
|
110
|
+
chars = entry.chars
|
111
|
+
chars.reject! {|inner_entry| inner_entry =~ /\d+/ } # Reject numbers.
|
112
|
+
if entry.include? '/'
|
113
|
+
entry.delete('/')
|
114
|
+
end
|
115
|
+
entry
|
116
|
+
}
|
117
|
+
@input = @input.join
|
118
|
+
end; alias sanitize_input sanitize_the_input # === sanitize_input
|
84
119
|
|
85
120
|
# ========================================================================= #
|
86
121
|
# === input?
|
@@ -102,13 +137,19 @@ class SanitizeNucleotideSequence < Base # === Bioroebe::SanitizeNucleotideSequen
|
|
102
137
|
# === run (run tag)
|
103
138
|
# ========================================================================= #
|
104
139
|
def run
|
140
|
+
sanitize_the_input
|
105
141
|
end
|
106
142
|
|
107
143
|
# ========================================================================= #
|
108
144
|
# === Bioroebe::SanitizeNucleotideSequence[]
|
145
|
+
#
|
146
|
+
# This method will yield a consecutive nucleotide String by default.
|
109
147
|
# ========================================================================= #
|
110
|
-
def self.[](
|
111
|
-
|
148
|
+
def self.[](
|
149
|
+
i = "1 ATCCG\n30 TTA",
|
150
|
+
&block
|
151
|
+
)
|
152
|
+
new(i, &block).result?
|
112
153
|
end
|
113
154
|
|
114
155
|
end
|
@@ -475,13 +475,6 @@ class ShowNucleotideSequence < ::Bioroebe::Sequence # === Bioroebe::ShowNucleoti
|
|
475
475
|
erev i
|
476
476
|
end; alias display report # === display (display tag)
|
477
477
|
|
478
|
-
# ========================================================================= #
|
479
|
-
# === do_colourize_the_start_codon
|
480
|
-
# ========================================================================= #
|
481
|
-
def do_colourize_the_start_codon
|
482
|
-
add_this_substring('ATG')
|
483
|
-
end
|
484
|
-
|
485
478
|
# ========================================================================= #
|
486
479
|
# === colourize_dna_sequence
|
487
480
|
# ========================================================================= #
|
@@ -529,6 +522,13 @@ class ShowNucleotideSequence < ::Bioroebe::Sequence # === Bioroebe::ShowNucleoti
|
|
529
522
|
end; alias set_search_for search_for_this_substring # === set_search_for
|
530
523
|
alias add_this_substring search_for_this_substring # === add_this_substring
|
531
524
|
|
525
|
+
# ========================================================================= #
|
526
|
+
# === do_colourize_the_start_codon
|
527
|
+
# ========================================================================= #
|
528
|
+
def do_colourize_the_start_codon
|
529
|
+
add_this_substring('ATG')
|
530
|
+
end
|
531
|
+
|
532
532
|
# ========================================================================= #
|
533
533
|
# === run
|
534
534
|
# ========================================================================= #
|
@@ -4,10 +4,29 @@
|
|
4
4
|
# =========================================================================== #
|
5
5
|
# === Bioroebe::GenbankParser
|
6
6
|
#
|
7
|
-
# This class can be used to parse genbank-files
|
8
|
-
#
|
7
|
+
# This class can be used to parse genbank-files (typically stored as .genbank
|
8
|
+
# or .gbk, so their file extension is usually ".gbk").
|
9
|
+
#
|
10
|
+
# Since as of the rewrite in July 2022 the class can also handle multiple
|
11
|
+
# fasta entries now.
|
12
|
+
#
|
13
|
+
# The class is similar to class FastaParser, but instead it will only
|
14
|
+
# select the content between "ORIGIN" and "VERSION" entries.
|
15
|
+
#
|
16
|
+
# The user can pass the content of a genbank-file to this class, and it
|
17
|
+
# can then report the nucleotide sequence, e. g. the part starting after
|
18
|
+
# the ORIGIN string.
|
19
|
+
#
|
20
|
+
# The reason why this class has been created was because it is sometimes
|
21
|
+
# necessary to parse a genebank file.
|
22
|
+
#
|
23
|
+
# Usage example:
|
24
|
+
#
|
25
|
+
# Bioroebe::GenbankParser.new(ARGV)
|
26
|
+
#
|
9
27
|
# =========================================================================== #
|
10
|
-
# require 'bioroebe/
|
28
|
+
# require 'bioroebe/genbank/genbank_parser.rb'
|
29
|
+
# genbank_parser = Bioroebe::GenbankParser.new(ARGV)
|
11
30
|
# =========================================================================== #
|
12
31
|
require 'bioroebe/base/commandline_application/commandline_application.rb'
|
13
32
|
|
@@ -16,63 +35,365 @@ module Bioroebe
|
|
16
35
|
class GenbankParser < ::Bioroebe::CommandlineApplication # === Bioroebe::GenbankParser
|
17
36
|
|
18
37
|
# ========================================================================= #
|
19
|
-
# ===
|
38
|
+
# === UPCASE_THE_SEQUENCE
|
39
|
+
#
|
40
|
+
# Setting this constant to true will cause this class to store the
|
41
|
+
# FASTA sequence in an upcased variant, e. g. "AGCAGCTA" rather
|
42
|
+
# than "acgatcag".
|
20
43
|
# ========================================================================= #
|
21
|
-
|
44
|
+
UPCASE_THE_SEQUENCE = true
|
45
|
+
|
46
|
+
# ========================================================================= #
|
47
|
+
# === TEST_STRING
|
48
|
+
#
|
49
|
+
# Our example test-string, to see how such a genbank file usually looks
|
50
|
+
# like.
|
51
|
+
#
|
52
|
+
# This will contain two different FASTA sequences.
|
53
|
+
# ========================================================================= #
|
54
|
+
TEST_STRING = ' /note="internal transcribed spacer 2"
|
55
|
+
ORIGIN
|
56
|
+
1 cgtaacaagg tttccgtagg tgaaccttcg gaaggatcat tgttgagacc cccaaaaaaa
|
57
|
+
61 cgatcgagtt aatccggagg accggtgtag tttggtctcc caggggcttt ggctactgtg
|
58
|
+
121 gtggccgtga atttccgtcg aacctccttg ggagaattct tgatggcaat tgaacccttg
|
59
|
+
181 gcccggcgca gtttcgcccc aagtcaaatg agatggaacc ggcggagggc atcgtcctcc
|
60
|
+
241 atggaaccgg ggagggccgg cgttcttccg ttccccccat gaattttttt ttgacaactc
|
61
|
+
301 tcggcaacgg atatctcggc tctttgcatc cgatgaaaga acccagcgaa atgtgataag
|
62
|
+
361 tggtgtgaat tgcagaatcc cgtgaaccat cgagtctttg aacgcaagtt gcgcccgagg
|
63
|
+
421 ccatcaggct aagggcacgc ctgcctgggc gttgcgtgct gcatctctct cccattgcta
|
64
|
+
481 aggctgaaca ggcatactgt tcggccggcg cggatgagtg tttggcccct tgttcttcgg
|
65
|
+
541 tgcgatgggt ccaagacctg ggcttttgac ggccggaaat ccggcaagag gtggacggac
|
66
|
+
601 ggtggctgcg acgaagctgt cgtgcgaatg ccctacgctg tcgtatttga tgggccggaa
|
67
|
+
661 taaatccctt ttgagcccca ttggaggcac gtcaacccgt gggcggtcga cggccatttg
|
68
|
+
721 gatgcaaccc caggtcaggt gagga
|
69
|
+
//
|
70
|
+
LOCUS Z78510 750 bp DNA linear PLN 30-NOV-2006
|
71
|
+
DEFINITION P.caricinum 5.8S rRNA gene and ITS1 and ITS2 DNA.
|
72
|
+
ACCESSION Z78510
|
73
|
+
VERSION Z78510.1 GI:2765635
|
74
|
+
KEYWORDS 5.8S ribosomal RNA; 5.8S rRNA gene; internal transcribed spacer;
|
75
|
+
ITS1; ITS2.
|
76
|
+
SOURCE Phragmipedium caricinum
|
77
|
+
ORGANISM Phragmipedium caricinum
|
78
|
+
Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta;
|
79
|
+
Spermatophyta; Magnoliophyta; Liliopsida; Asparagales; Orchidaceae;
|
80
|
+
Cypripedioideae; Phragmipedium.
|
81
|
+
REFERENCE 1
|
82
|
+
AUTHORS Cox,A.V., Pridgeon,A.M., Albert,V.A. and Chase,M.W.
|
83
|
+
TITLE Phylogenetics of the slipper orchids (Cypripedioideae:
|
84
|
+
Orchidaceae): nuclear rDNA ITS sequences
|
85
|
+
JOURNAL Unpublished
|
86
|
+
REFERENCE 2 (bases 1 to 750)
|
87
|
+
AUTHORS Cox,A.V.
|
88
|
+
TITLE Direct Submission
|
89
|
+
JOURNAL Submitted (19-AUG-1996) Cox A.V., Royal Botanic Gardens, Kew,
|
90
|
+
Richmond, Surrey TW9 3AB, UK
|
91
|
+
FEATURES Location/Qualifiers
|
92
|
+
source 1..750
|
93
|
+
/organism="Phragmipedium caricinum"
|
94
|
+
/mol_type="genomic DNA"
|
95
|
+
/db_xref="taxon:53127"
|
96
|
+
misc_feature 1..380
|
97
|
+
/note="internal transcribed spacer 1"
|
98
|
+
gene 381..550
|
99
|
+
/gene="5.8S rRNA"
|
100
|
+
rRNA 381..550
|
101
|
+
/gene="5.8S rRNA"
|
102
|
+
/product="5.8S ribosomal RNA"
|
103
|
+
misc_feature 551..750
|
104
|
+
/note="internal transcribed spacer 2"
|
105
|
+
ORIGIN
|
106
|
+
1 ctaaccaggg ttccgaggtg accttcggga ggattccttt ttaagccccc gaaaaaacga
|
107
|
+
61 tcgaattaaa ccggaggacc ggtttaattt ggtctcccca ggggctttcc ccccttggtg
|
108
|
+
121 gccgtgaatt tccatcgaac ccccctggga gaattcttgg tggccaatgg acccttggcc
|
109
|
+
181 cggcgcaatt tcccccccaa tcaaatgaga taggaccggc agggggcgtc cccccccatg
|
110
|
+
241 gaaccgggga gggccggcat tcttccgttc ccccctcgga ttttttgaca actctcgcaa
|
111
|
+
301 cggatatctc gcctctttgc atcggatgga agaacgcagc gaaatgtgat aagtggtgtg
|
112
|
+
361 aattgcagaa tcccgtgaac catcgagtct ttgaacgcaa gttgcgcccg aggccatcag
|
113
|
+
421 gctaagggca cgcctgcctg ggcgttgcgt gctgcatctc tcccattgct aaggttgaac
|
114
|
+
481 gggcatactg ttcggccggc gcggatgaga gattggcccc ttgttcttcg gtgcgatggg
|
115
|
+
541 tccaagacct gggcttttga cggtccaaaa tccggcaaga ggtggacgga cggtggctgc
|
116
|
+
601 gacaaagctg tcgtgcgaat gccctgcgtt gtcgtttttg atgggccgga ataaatccct
|
117
|
+
661 tttgaacccc attggaggca cgtcaaccca tgggcggttg acggccattt ggatgcaacc
|
118
|
+
721 ccaggtcagg tgagccaccc gctgagttta
|
119
|
+
//
|
120
|
+
LOCUS Z78509 731 bp DNA linear PLN 30-NOV-2006
|
121
|
+
DEFINITION P.pearcei 5.8S rRNA gene and ITS1 and ITS2 DNA.
|
122
|
+
ACCESSION Z78509
|
123
|
+
VERSION Z78509.1 GI:2765634
|
124
|
+
KEYWORDS 5.8S ribosomal RNA; 5.8S rRNA gene; internal transcribed spacer;
|
125
|
+
ITS1; ITS2.
|
126
|
+
SOURCE Phragmipedium pearcei
|
127
|
+
ORGANISM Phragmipedium pearcei
|
128
|
+
Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta;
|
129
|
+
Spermatophyta; Magnoliophyta; Liliopsida; Asparagales; Orchidaceae;
|
130
|
+
Cypripedioideae; Phragmipedium.
|
131
|
+
REFERENCE 1
|
132
|
+
AUTHORS Cox,A.V., Pridgeon,A.M., Albert,V.A. and Chase,M.W.
|
133
|
+
TITLE Phylogenetics of the slipper orchids (Cypripedioideae:
|
134
|
+
Orchidaceae): nuclear rDNA ITS sequences
|
135
|
+
JOURNAL Unpublished
|
136
|
+
REFERENCE 2 (bases 1 to 731)
|
137
|
+
AUTHORS Cox,A.V.
|
138
|
+
TITLE Direct Submission
|
139
|
+
JOURNAL Submitted (19-AUG-1996) Cox A.V., Royal Botanic Gardens, Kew,
|
140
|
+
Richmond, Surrey TW9 3AB, UK
|
141
|
+
FEATURES Location/Qualifiers
|
142
|
+
source 1..731
|
143
|
+
/organism="Phragmipedium pearcei"
|
144
|
+
/mol_type="genomic DNA"
|
145
|
+
/db_xref="taxon:53135"
|
146
|
+
misc_feature 1..380
|
147
|
+
/note="internal transcribed spacer 1"
|
148
|
+
gene 381..550
|
149
|
+
/gene="5.8S rRNA"
|
150
|
+
rRNA 381..550
|
151
|
+
/gene="5.8S rRNA"
|
152
|
+
/product="5.8S ribosomal RNA"
|
153
|
+
misc_feature 551..731
|
154
|
+
/note="internal transcribed spacer 2"
|
155
|
+
ORIGIN
|
156
|
+
1 cgtaacaagg tttccgtagg tgaacctgcg gaaggatcat tgttgagacc gccaaatata
|
157
|
+
61 cgatcgagtt aatccggagg accggtgtag tttggtctcc caggggcttt cgccgctgtg
|
158
|
+
121 gtgaccgtga tttgccatcg agcctccttg ggagatttct tgatggcaat tgaacccttg
|
159
|
+
181 gcccggcgca gtttcgcgcc aagtcatatg agatagaacc ggcggagggc gtcgtcctcc
|
160
|
+
241 atggagcggg gagggccggc atgctccgtg cccccccatg aatttttctg acaactctcg
|
161
|
+
301 gcaacggacg taacaaggtt taaatgtgat aagcaggtgt gaattgcaga atcccgtgaa
|
162
|
+
361 ccatcgagtc tttgaacgca agttgcgccc gaggccatca ggttaagggc acgcctgcct
|
163
|
+
421 gggcgttgcg tgctgcatct ctcccattgc taaggttgaa cgggcatact gttcggccgg
|
164
|
+
481 cgcggatgag agtttggccc cttgttcttc ggtgcgatgg gtccaagacc tgggcttttg
|
165
|
+
541 acggtccaaa atccggcaag aggtggacgg acggtggctg cgacagagct gtcgtgcgaa
|
166
|
+
601 tgccctacgt tgtcgttttt gatgggccag aataaatccc ttttgaaccc cattggaggc
|
167
|
+
661 acgtcaaccc aatggggggt gacgggcatt tggttaaccc cggcaagtta aggcacccgt
|
168
|
+
721 taattttagg a
|
169
|
+
//
|
170
|
+
LOCUS Z78508 741 bp DNA linear PLN 30-NOV-2006'
|
22
171
|
|
23
172
|
# ========================================================================= #
|
24
173
|
# === initialize
|
25
174
|
# ========================================================================= #
|
26
175
|
def initialize(
|
27
|
-
commandline_arguments =
|
176
|
+
commandline_arguments = nil,
|
28
177
|
run_already = true
|
29
178
|
)
|
30
179
|
reset
|
31
180
|
set_commandline_arguments(
|
32
181
|
commandline_arguments
|
33
182
|
)
|
183
|
+
menu
|
184
|
+
if block_given?
|
185
|
+
yielded = yield
|
186
|
+
case yielded
|
187
|
+
# ===================================================================== #
|
188
|
+
# === :do_not_report_anything
|
189
|
+
# ===================================================================== #
|
190
|
+
when :do_not_report_anything
|
191
|
+
@internal_hash[:report_the_dataset] = false
|
192
|
+
end
|
193
|
+
end
|
34
194
|
run if run_already
|
35
195
|
end
|
36
196
|
|
37
197
|
# ========================================================================= #
|
38
|
-
# === reset
|
198
|
+
# === reset (reset tag)
|
39
199
|
# ========================================================================= #
|
40
200
|
def reset
|
41
201
|
super()
|
202
|
+
infer_the_namespace
|
203
|
+
# ======================================================================= #
|
204
|
+
# === @internal_hash
|
205
|
+
# ======================================================================= #
|
206
|
+
# @internal_hash = {}
|
207
|
+
# ======================================================================= #
|
208
|
+
# === :work_on_this_file
|
209
|
+
# ======================================================================= #
|
210
|
+
@internal_hash[:work_on_this_file] = nil
|
211
|
+
# ======================================================================= #
|
212
|
+
# === :report_the_dataset
|
213
|
+
# ======================================================================= #
|
214
|
+
@internal_hash[:report_the_dataset] = true
|
215
|
+
# ======================================================================= #
|
216
|
+
# === :n_FASTA_entries_in_the_file
|
217
|
+
#
|
218
|
+
# This variable will keep track how many FASTA entries are in
|
219
|
+
# the genbank file at hand.
|
220
|
+
# ======================================================================= #
|
221
|
+
@internal_hash[:n_FASTA_entries_in_the_file] = 0
|
222
|
+
# ======================================================================= #
|
223
|
+
# === :dataset_from_all_FASTA_entries_as_a_hash
|
224
|
+
#
|
225
|
+
# This hash will contain all the FASTA sequences in the given
|
226
|
+
# genbank file at hand. This constitutes the main dataset of
|
227
|
+
# this clas.
|
228
|
+
# ======================================================================= #
|
229
|
+
@internal_hash[:dataset_from_all_FASTA_entries_as_a_hash] = {}
|
42
230
|
end
|
43
231
|
|
44
232
|
# ========================================================================= #
|
45
|
-
# ===
|
233
|
+
# === menu (menu tag)
|
46
234
|
# ========================================================================= #
|
47
|
-
def
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
235
|
+
def menu(
|
236
|
+
i = commandline_arguments_containing_leading_hyphens?
|
237
|
+
)
|
238
|
+
if i.is_a? Array
|
239
|
+
i.each {|entry| menu(entry) }
|
240
|
+
else
|
241
|
+
case i # (case tag)
|
242
|
+
# ===================================================================== #
|
243
|
+
# === gparser --help
|
244
|
+
# ===================================================================== #
|
245
|
+
when /^-?-?help$/i
|
246
|
+
show_help
|
247
|
+
exit
|
248
|
+
# ===================================================================== #
|
249
|
+
# === gparser --test
|
250
|
+
#
|
251
|
+
# This entry point can be used to test the default TEST_STRING.
|
252
|
+
# ===================================================================== #
|
253
|
+
when /^-?-?test$/i,
|
254
|
+
/^-?-?test(-|_)?string$/i
|
255
|
+
analyse_this_dataset(TEST_STRING)
|
256
|
+
exit
|
59
257
|
end
|
60
|
-
@sequence = dataset
|
61
258
|
end
|
62
259
|
end
|
63
260
|
|
261
|
+
# ========================================================================= #
|
262
|
+
# === work_on_which_file?
|
263
|
+
# ========================================================================= #
|
264
|
+
def work_on_which_file?
|
265
|
+
@internal_hash[:work_on_this_file]
|
266
|
+
end
|
267
|
+
|
268
|
+
# ========================================================================= #
|
269
|
+
# === report_the_dataset?
|
270
|
+
# ========================================================================= #
|
271
|
+
def report_the_dataset?
|
272
|
+
@internal_hash[:report_the_dataset]
|
273
|
+
end
|
274
|
+
|
275
|
+
# ========================================================================= #
|
276
|
+
# === set_work_on_this_file
|
277
|
+
# ========================================================================= #
|
278
|
+
def set_work_on_this_file(
|
279
|
+
i = first_argument?
|
280
|
+
)
|
281
|
+
@internal_hash[:work_on_this_file] = i
|
282
|
+
end
|
283
|
+
|
284
|
+
# ========================================================================= #
|
285
|
+
# === analyse_this_dataset
|
286
|
+
# ========================================================================= #
|
287
|
+
def analyse_this_dataset(dataset)
|
288
|
+
use_this_regex =
|
289
|
+
/ORIGIN[\/\-\.\s0-9a-zA-Z]+VERSION\s*[\.0-9A-Z]+/ # See: https://rubular.com/r/0q7rFIUflX7yzw
|
290
|
+
scanned = dataset.scan(use_this_regex)
|
291
|
+
@internal_hash[:n_FASTA_entries_in_the_file] = scanned
|
292
|
+
discover_the_corresponding_FASTA_entries_from_this_dataset(scanned)
|
293
|
+
consider_reporting_our_findings_to_the_user
|
294
|
+
end; alias determine_dataset analyse_this_dataset # === determine_dataset
|
295
|
+
|
296
|
+
# ========================================================================= #
|
297
|
+
# === dataset?
|
298
|
+
# ========================================================================= #
|
299
|
+
def dataset?
|
300
|
+
@internal_hash[:dataset_from_all_FASTA_entries_as_a_hash]
|
301
|
+
end; alias main_dataset? dataset? # === main_dataset?
|
302
|
+
|
303
|
+
# ========================================================================= #
|
304
|
+
# === sequences?
|
305
|
+
# ========================================================================= #
|
306
|
+
def sequences?
|
307
|
+
dataset?.values
|
308
|
+
end
|
309
|
+
|
64
310
|
# ========================================================================= #
|
65
311
|
# === sequence?
|
66
312
|
# ========================================================================= #
|
67
313
|
def sequence?
|
68
|
-
|
314
|
+
sequences?.first
|
69
315
|
end; alias coding_sequence? sequence? # === coding_sequence?
|
70
316
|
alias cds sequence? # === cds
|
71
317
|
|
318
|
+
# ========================================================================= #
|
319
|
+
# === discover_the_corresponding_FASTA_entries_from_this_dataset
|
320
|
+
# ========================================================================= #
|
321
|
+
def discover_the_corresponding_FASTA_entries_from_this_dataset(i)
|
322
|
+
regex_to_use_for_the_id = /VERSION\s*([\.A-Za-z0-9]+)/
|
323
|
+
if i.is_a? Array
|
324
|
+
i.each {|this_dataset|
|
325
|
+
this_dataset =~ regex_to_use_for_the_id
|
326
|
+
use_this_id = $1.to_s.dup
|
327
|
+
use_this_FASTA_sequence = this_dataset.scan(
|
328
|
+
/^\s*\d{1,100}([\sa-zA-Z]+)/
|
329
|
+
).flatten.join(' ').delete(" \n")
|
330
|
+
use_this_FASTA_sequence.upcase! if UPCASE_THE_SEQUENCE
|
331
|
+
@internal_hash[:dataset_from_all_FASTA_entries_as_a_hash][use_this_id] = use_this_FASTA_sequence
|
332
|
+
}
|
333
|
+
end
|
334
|
+
end
|
335
|
+
|
336
|
+
# ========================================================================= #
|
337
|
+
# === verbose_check_whether_the_file_exists
|
338
|
+
# ========================================================================= #
|
339
|
+
def verbose_check_whether_the_file_exists
|
340
|
+
_ = @internal_hash[:work_on_this_file]
|
341
|
+
if _ and File.exist?(_)
|
342
|
+
true
|
343
|
+
else
|
344
|
+
opnn; e 'No file exists at '+sfile(_)
|
345
|
+
false
|
346
|
+
end
|
347
|
+
end
|
348
|
+
|
349
|
+
# ========================================================================= #
|
350
|
+
# === consider_reporting_our_findings_to_the_user (report tag)
|
351
|
+
# ========================================================================= #
|
352
|
+
def consider_reporting_our_findings_to_the_user
|
353
|
+
if report_the_dataset? and dataset? and !dataset?.empty?
|
354
|
+
main_dataset?.each_pair {|key, value|
|
355
|
+
e steelblue("#{key}:")
|
356
|
+
e lightblue(value)
|
357
|
+
e
|
358
|
+
}
|
359
|
+
end
|
360
|
+
end; alias report consider_reporting_our_findings_to_the_user # === report
|
361
|
+
alias report_the_dataset consider_reporting_our_findings_to_the_user # === report_the_dataset
|
362
|
+
|
363
|
+
# ========================================================================= #
|
364
|
+
# === run (run tag)
|
365
|
+
# ========================================================================= #
|
366
|
+
def run
|
367
|
+
set_work_on_this_file(first_argument?)
|
368
|
+
# ======================================================================= #
|
369
|
+
# First check whether the given file exists or not:
|
370
|
+
# ======================================================================= #
|
371
|
+
if verbose_check_whether_the_file_exists
|
372
|
+
original_dataset = File.read(@internal_hash[:work_on_this_file]) # Just store it completely.
|
373
|
+
if original_dataset.include?('ORIGIN') and original_dataset.include?('VERSION ')
|
374
|
+
analyse_this_dataset(original_dataset)
|
375
|
+
else
|
376
|
+
opnn; e 'No keywords ORIGIN and VERSION were found in this file.'
|
377
|
+
end
|
378
|
+
end
|
379
|
+
end
|
380
|
+
|
381
|
+
# ========================================================================= #
|
382
|
+
# === Bioroebe::GenbankParser[]
|
383
|
+
# ========================================================================= #
|
384
|
+
def self.[](i = '')
|
385
|
+
new(i)
|
386
|
+
end
|
387
|
+
|
72
388
|
end; end
|
73
389
|
|
74
390
|
if __FILE__ == $PROGRAM_NAME
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
391
|
+
alias e puts
|
392
|
+
genbank_parser = Bioroebe::GenbankParser.new(ARGV)
|
393
|
+
# genbank_parser = Bioroebe::GenbankParser.new('/home/Temp/bioroebe/ls_orchid.gbk')
|
394
|
+
e genbank_parser.sequence?
|
395
|
+
# e _.id
|
396
|
+
end # genbankparser *genbank
|
397
|
+
# genbankparser
|
398
|
+
# genbankparser sample_file.genbank
|
399
|
+
# genbankparser --test
|
data/lib/bioroebe/parsers/gff.rb
CHANGED
@@ -68,11 +68,6 @@ module Parser
|
|
68
68
|
|
69
69
|
class GFF < ::Bioroebe::CommandlineApplication # === Bioroebe::Parser::GFF
|
70
70
|
|
71
|
-
# ========================================================================= #
|
72
|
-
# === NAMESPACE
|
73
|
-
# ========================================================================= #
|
74
|
-
NAMESPACE = inspect
|
75
|
-
|
76
71
|
# ========================================================================= #
|
77
72
|
# === INPUT_FILE
|
78
73
|
#
|
@@ -112,10 +107,7 @@ class GFF < ::Bioroebe::CommandlineApplication # === Bioroebe::Parser::GFF
|
|
112
107
|
# ========================================================================= #
|
113
108
|
def reset
|
114
109
|
super()
|
115
|
-
|
116
|
-
# === @namespace
|
117
|
-
# ======================================================================= #
|
118
|
-
@namespace = NAMESPACE
|
110
|
+
infer_the_namespace
|
119
111
|
# ======================================================================= #
|
120
112
|
# === @input_file
|
121
113
|
# ======================================================================= #
|