bioroebe 0.10.80 → 0.12.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of bioroebe might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/README.md +3612 -2781
- data/bin/bioroebe +7 -1
- data/bin/bioroebe_hash +7 -0
- data/bin/codon_to_aminoacid +1 -0
- data/bioroebe.gemspec +3 -3
- data/doc/README.gen +3612 -2742
- data/doc/quality_control/commandline_applications.md +3 -3
- data/doc/todo/bioroebe_java_todo.md +22 -0
- data/doc/todo/bioroebe_todo.md +2059 -2615
- data/lib/bioroebe/aminoacids/aminoacid_substitution.rb +1 -9
- data/lib/bioroebe/aminoacids/codon_percentage.rb +1 -9
- data/lib/bioroebe/aminoacids/deduce_aminoacid_sequence.rb +1 -9
- data/lib/bioroebe/aminoacids/display_aminoacid_table.rb +1 -0
- data/lib/bioroebe/aminoacids/show_hydrophobicity.rb +1 -6
- data/lib/bioroebe/base/colours_for_base/colours_for_base.rb +18 -8
- data/lib/bioroebe/base/commandline_application/commandline_arguments.rb +15 -11
- data/lib/bioroebe/base/commandline_application/misc.rb +66 -49
- data/lib/bioroebe/base/commandline_application/opn.rb +8 -8
- data/lib/bioroebe/base/commandline_application/reset.rb +3 -2
- data/lib/bioroebe/base/misc.rb +35 -0
- data/lib/bioroebe/base/prototype/misc.rb +11 -1
- data/lib/bioroebe/codon_tables/frequencies/parse_frequency_table.rb +2 -10
- data/lib/bioroebe/codons/codons.rb +1 -1
- data/lib/bioroebe/codons/convert_this_codon_to_that_aminoacid.rb +192 -58
- data/lib/bioroebe/codons/possible_codons_for_this_aminoacid.rb +1 -9
- data/lib/bioroebe/codons/show_codon_tables.rb +6 -2
- data/lib/bioroebe/codons/show_codon_usage.rb +15 -4
- data/lib/bioroebe/colours/rev.rb +4 -1
- data/lib/bioroebe/constants/aminoacids_and_proteins.rb +1 -0
- data/lib/bioroebe/constants/database_constants.rb +1 -1
- data/lib/bioroebe/constants/files_and_directories.rb +31 -4
- data/lib/bioroebe/constants/misc.rb +20 -0
- data/lib/bioroebe/conversions/dna_to_aminoacid_sequence.rb +58 -24
- data/lib/bioroebe/count/count_amount_of_aminoacids.rb +3 -2
- data/lib/bioroebe/count/count_amount_of_nucleotides.rb +3 -0
- data/lib/bioroebe/crystal/README.md +2 -0
- data/lib/bioroebe/crystal/to_rna.cr +19 -0
- data/lib/bioroebe/data/README.md +11 -8
- data/lib/bioroebe/data/electron_microscopy/pos_example.pos +396 -0
- data/lib/bioroebe/data/electron_microscopy/test_particles.star +36 -0
- data/lib/bioroebe/data/fasta/human/Homo_sapiens_hemoglobin_subunit_alpha_HBB_mRNA.fasta +9 -0
- data/lib/bioroebe/data/fasta/human/Homo_sapiens_hemoglobin_subunit_beta_HBB_mRNA.fasta +8 -0
- data/lib/bioroebe/data/fasta/human/README.md +2 -0
- data/lib/bioroebe/electron_microscopy/coordinate_analyzer.rb +15 -18
- data/lib/bioroebe/{fasta_and_fastq/parse_fasta/run.rb → electron_microscopy/electron_microscopy_module.rb} +16 -8
- data/lib/bioroebe/electron_microscopy/fix_pos_file.rb +1 -9
- data/lib/bioroebe/electron_microscopy/flipy.rb +83 -0
- data/lib/bioroebe/electron_microscopy/parse_coordinates.rb +2 -10
- data/lib/bioroebe/electron_microscopy/read_file_xmd.rb +1 -9
- data/lib/bioroebe/electron_microscopy/simple_star_file_generator.rb +4 -9
- data/lib/bioroebe/enzymes/has_this_restriction_enzyme.rb +10 -3
- data/lib/bioroebe/enzymes/restriction_enzyme.rb +23 -1
- data/lib/bioroebe/enzymes/restriction_enzymes/statistics.rb +65 -0
- data/lib/bioroebe/fasta_and_fastq/autocorrect_the_name_of_this_fasta_file.rb +1 -9
- data/lib/bioroebe/fasta_and_fastq/compact_fasta_file/compact_fasta_file.rb +7 -9
- data/lib/bioroebe/fasta_and_fastq/fasta_defline/fasta_defline.rb +1 -5
- data/lib/bioroebe/fasta_and_fastq/fasta_to_yaml/fasta_to_yaml.rb +81 -0
- data/lib/bioroebe/fasta_and_fastq/parse_fasta/parse_fasta.rb +1465 -7
- data/lib/bioroebe/fasta_and_fastq/return_fasta_subsection_of_this_file.rb +11 -2
- data/lib/bioroebe/fasta_and_fastq/show_fasta_headers.rb +27 -12
- data/lib/bioroebe/fasta_and_fastq/simplify_fasta_header/simplify_fasta_header.rb +1 -5
- data/lib/bioroebe/fasta_and_fastq/split_this_fasta_file_into_chromosomes/constants.rb +0 -5
- data/lib/bioroebe/genome/README.md +4 -0
- data/lib/bioroebe/genome/genome.rb +67 -0
- data/lib/bioroebe/genomes/genome_pattern.rb +3 -9
- data/lib/bioroebe/gui/gtk +1 -0
- data/lib/bioroebe/gui/gtk3/alignment/alignment.rb +73 -128
- data/lib/bioroebe/gui/gtk3/controller/controller.rb +45 -27
- data/lib/bioroebe/gui/gtk3/dna_to_aminoacid_widget/dna_to_aminoacid_widget.rb +76 -50
- data/lib/bioroebe/gui/gtk3/fasta_table_widget/fasta_table_widget.rb +99 -21
- data/lib/bioroebe/gui/gtk3/hamming_distance/hamming_distance.rb +42 -28
- data/lib/bioroebe/gui/gtk3/nucleotide_analyser/nucleotide_analyser.rb +119 -71
- data/lib/bioroebe/gui/gtk3/protein_to_DNA/protein_to_DNA.rb +18 -18
- data/lib/bioroebe/gui/gtk3/random_sequence/random_sequence.rb +19 -11
- data/lib/bioroebe/gui/jruby/alignment/alignment.rb +165 -0
- data/lib/bioroebe/gui/libui/alignment/alignment.rb +3 -1
- data/lib/bioroebe/gui/libui/controller/controller.rb +116 -0
- data/lib/bioroebe/gui/libui/random_sequence/random_sequence.rb +18 -2
- data/lib/bioroebe/gui/libui/show_codon_table/show_codon_table.rb +2 -0
- data/lib/bioroebe/gui/libui/three_to_one/three_to_one.rb +8 -6
- data/lib/bioroebe/gui/shared_code/alignment/alignment_module.rb +102 -0
- data/lib/bioroebe/gui/shared_code/levensthein_distance/levensthein_distance_module.rb +18 -16
- data/lib/bioroebe/gui/shared_code/protein_to_DNA/protein_to_DNA_module.rb +14 -14
- data/lib/bioroebe/gui/swing/three_to_one/ThreeToOne$1.class +0 -0
- data/lib/bioroebe/gui/swing/three_to_one/ThreeToOne$CloseListener.class +0 -0
- data/lib/bioroebe/gui/swing/three_to_one/ThreeToOne.class +0 -0
- data/lib/bioroebe/gui/swing/three_to_one/ThreeToOne.java +141 -0
- data/lib/bioroebe/images/FORWARD_PRIMER.png +0 -0
- data/lib/bioroebe/images/REVERSE_PRIMER.png +0 -0
- data/lib/bioroebe/java/README.md +4 -0
- data/lib/bioroebe/java/bioroebe/Sequence.java +25 -1
- data/lib/bioroebe/java/bioroebe/src/main/java/bioroebe/Base.class +0 -0
- data/lib/bioroebe/java/bioroebe/{Base.java → src/main/java/bioroebe/Base.java} +15 -2
- data/lib/bioroebe/java/bioroebe/{BisulfiteTreatment.class → src/main/java/bioroebe/BisulfiteTreatment.class} +0 -0
- data/lib/bioroebe/java/bioroebe/{Codons.class → src/main/java/bioroebe/Codons.class} +0 -0
- data/lib/bioroebe/java/bioroebe/src/main/java/bioroebe/Codons.java +34 -0
- data/lib/bioroebe/java/bioroebe/src/main/java/bioroebe/Commandline.class +0 -0
- data/lib/bioroebe/java/bioroebe/src/main/java/bioroebe/Commandline.java +101 -0
- data/lib/bioroebe/java/bioroebe/{GenerateRandomDnaSequence.class → src/main/java/bioroebe/GenerateRandomDnaSequence.class} +0 -0
- data/lib/bioroebe/java/bioroebe/{GenerateRandomDnaSequence.java → src/main/java/bioroebe/GenerateRandomDnaSequence.java} +8 -2
- data/lib/bioroebe/java/bioroebe/{IsPalindrome.class → src/main/java/bioroebe/IsPalindrome.class} +0 -0
- data/lib/bioroebe/java/bioroebe/{IsPalindrome.java → src/main/java/bioroebe/IsPalindrome.java} +5 -1
- data/lib/bioroebe/java/bioroebe/src/main/java/bioroebe/PartnerNucleotide.class +0 -0
- data/lib/bioroebe/java/bioroebe/src/main/java/bioroebe/PartnerNucleotide.java +56 -0
- data/lib/bioroebe/java/bioroebe/{RemoveFile.java → src/main/java/bioroebe/RemoveFile.java} +10 -4
- data/lib/bioroebe/java/bioroebe/{RemoveNumbers.class → src/main/java/bioroebe/RemoveNumbers.class} +0 -0
- data/lib/bioroebe/java/bioroebe/{RemoveNumbers.java → src/main/java/bioroebe/RemoveNumbers.java} +1 -0
- data/lib/bioroebe/java/bioroebe/{ToCamelcase.class → src/main/java/bioroebe/ToCamelcase.class} +0 -0
- data/lib/bioroebe/java/bioroebe/{ToCamelcase.java → src/main/java/bioroebe/ToCamelcase.java} +3 -3
- data/lib/bioroebe/java/bioroebe/src/main/java/bioroebe/ToRNA.class +0 -0
- data/lib/bioroebe/java/bioroebe/src/main/java/bioroebe/ToRNA.java +42 -0
- data/lib/bioroebe/java/bioroebe/src/main/java/bioroebe/toplevel_methods/BaseComposition.class +0 -0
- data/lib/bioroebe/java/bioroebe/src/main/java/bioroebe/toplevel_methods/BaseComposition.java +75 -0
- data/lib/bioroebe/misc/ruler.rb +11 -2
- data/lib/bioroebe/nucleotides/most_likely_nucleotide_sequence_for_this_aminoacid_sequence.rb +1 -9
- data/lib/bioroebe/nucleotides/show_nucleotide_sequence.rb +7 -7
- data/lib/bioroebe/parsers/genbank_parser.rb +347 -26
- data/lib/bioroebe/parsers/gff.rb +1 -9
- data/lib/bioroebe/patterns/scan_for_repeat.rb +1 -5
- data/lib/bioroebe/pdb/fetch_fasta_sequence_from_pdb.rb +1 -9
- data/lib/bioroebe/pdb/parse_mmCIF_file.rb +1 -9
- data/lib/bioroebe/pdb/parse_pdb_file.rb +4 -10
- data/lib/bioroebe/project/project.rb +1 -1
- data/lib/bioroebe/python/README.md +1 -0
- data/lib/bioroebe/python/__pycache__/mymodule.cpython-39.pyc +0 -0
- data/lib/bioroebe/python/gui/gtk3/all_in_one.css +4 -0
- data/lib/bioroebe/python/gui/gtk3/all_in_one.py +59 -0
- data/lib/bioroebe/python/gui/gtk3/widget1.py +20 -0
- data/lib/bioroebe/python/gui/tkinter/all_in_one.py +91 -0
- data/lib/bioroebe/python/mymodule.py +8 -0
- data/lib/bioroebe/python/protein_to_dna.py +33 -0
- data/lib/bioroebe/python/shell/shell.py +19 -0
- data/lib/bioroebe/python/to_rna.py +14 -0
- data/lib/bioroebe/python/toplevel_methods/esystem.py +12 -0
- data/lib/bioroebe/python/toplevel_methods/open_in_browser.py +20 -0
- data/lib/bioroebe/python/toplevel_methods/palindromes.py +42 -0
- data/lib/bioroebe/python/toplevel_methods/rds.py +13 -0
- data/lib/bioroebe/python/toplevel_methods/shuffleseq.py +23 -0
- data/lib/bioroebe/python/toplevel_methods/three_delimiter.py +37 -0
- data/lib/bioroebe/python/toplevel_methods/time_and_date.py +43 -0
- data/lib/bioroebe/python/toplevel_methods/to_camelcase.py +21 -0
- data/lib/bioroebe/requires/require_the_bioroebe_project.rb +3 -1
- data/lib/bioroebe/sequence/alignment.rb +14 -4
- data/lib/bioroebe/sequence/dna.rb +1 -0
- data/lib/bioroebe/sequence/nucleotide_module/nucleotide_module.rb +28 -25
- data/lib/bioroebe/sequence/protein.rb +105 -3
- data/lib/bioroebe/sequence/sequence.rb +87 -21
- data/lib/bioroebe/shell/menu.rb +3829 -3714
- data/lib/bioroebe/shell/misc.rb +59 -4307
- data/lib/bioroebe/shell/readline/readline.rb +1 -1
- data/lib/bioroebe/shell/shell.rb +11255 -28
- data/lib/bioroebe/siRNA/siRNA.rb +81 -1
- data/lib/bioroebe/string_matching/find_longest_substring.rb +3 -2
- data/lib/bioroebe/string_matching/hamming_distance.rb +1 -9
- data/lib/bioroebe/taxonomy/class_methods.rb +3 -8
- data/lib/bioroebe/taxonomy/constants.rb +4 -3
- data/lib/bioroebe/taxonomy/edit.rb +2 -1
- data/lib/bioroebe/taxonomy/help/help.rb +10 -10
- data/lib/bioroebe/taxonomy/help/helpline.rb +2 -2
- data/lib/bioroebe/taxonomy/info/check_available.rb +15 -9
- data/lib/bioroebe/taxonomy/info/info.rb +18 -11
- data/lib/bioroebe/taxonomy/info/is_dna.rb +46 -36
- data/lib/bioroebe/taxonomy/interactive.rb +140 -104
- data/lib/bioroebe/taxonomy/menu.rb +27 -18
- data/lib/bioroebe/taxonomy/parse_fasta.rb +3 -1
- data/lib/bioroebe/taxonomy/shared.rb +1 -0
- data/lib/bioroebe/taxonomy/taxonomy.rb +1 -0
- data/lib/bioroebe/toplevel_methods/aminoacids_and_proteins.rb +31 -24
- data/lib/bioroebe/toplevel_methods/colourize_related_methods.rb +164 -0
- data/lib/bioroebe/toplevel_methods/databases.rb +1 -1
- data/lib/bioroebe/toplevel_methods/digest.rb +18 -8
- data/lib/bioroebe/toplevel_methods/fasta_and_fastq.rb +107 -63
- data/lib/bioroebe/toplevel_methods/file_and_directory_related_actions.rb +14 -2
- data/lib/bioroebe/toplevel_methods/frequencies.rb +8 -1
- data/lib/bioroebe/toplevel_methods/misc.rb +142 -12
- data/lib/bioroebe/toplevel_methods/nucleotides.rb +118 -46
- data/lib/bioroebe/toplevel_methods/open_in_browser.rb +2 -0
- data/lib/bioroebe/toplevel_methods/palindromes.rb +1 -2
- data/lib/bioroebe/toplevel_methods/taxonomy.rb +2 -2
- data/lib/bioroebe/toplevel_methods/to_camelcase.rb +5 -0
- data/lib/bioroebe/utility_scripts/align_open_reading_frames.rb +1 -9
- data/lib/bioroebe/utility_scripts/check_for_mismatches/check_for_mismatches.rb +1 -9
- data/lib/bioroebe/utility_scripts/compacter.rb +1 -9
- data/lib/bioroebe/utility_scripts/compseq/compseq.rb +1 -9
- data/lib/bioroebe/utility_scripts/consensus_sequence.rb +6 -6
- data/lib/bioroebe/utility_scripts/create_batch_entrez_file.rb +1 -9
- data/lib/bioroebe/utility_scripts/dot_alignment.rb +1 -9
- data/lib/bioroebe/utility_scripts/move_file_to_its_correct_location.rb +1 -4
- data/lib/bioroebe/utility_scripts/parse_taxonomy.rb +2 -2
- data/lib/bioroebe/utility_scripts/showorf/constants.rb +0 -5
- data/lib/bioroebe/utility_scripts/showorf/reset.rb +1 -4
- data/lib/bioroebe/version/version.rb +2 -2
- data/lib/bioroebe/www/embeddable_interface.rb +103 -54
- data/lib/bioroebe/www/sinatra/sinatra.rb +186 -70
- data/lib/bioroebe/yaml/aminoacids/amino_acids_long_name_to_one_letter.yml +2 -2
- data/lib/bioroebe/yaml/configuration/browser.yml +1 -1
- data/lib/bioroebe/yaml/configuration/temp_dir.yml +1 -1
- data/lib/bioroebe/yaml/genomes/README.md +3 -4
- data/lib/bioroebe/yaml/restriction_enzymes/restriction_enzymes.yml +27 -27
- metadata +81 -64
- data/doc/setup.rb +0 -1655
- data/lib/bioroebe/fasta_and_fastq/parse_fasta/constants.rb +0 -50
- data/lib/bioroebe/fasta_and_fastq/parse_fasta/initialize.rb +0 -86
- data/lib/bioroebe/fasta_and_fastq/parse_fasta/menu.rb +0 -117
- data/lib/bioroebe/fasta_and_fastq/parse_fasta/misc.rb +0 -981
- data/lib/bioroebe/fasta_and_fastq/parse_fasta/report.rb +0 -156
- data/lib/bioroebe/fasta_and_fastq/parse_fasta/reset.rb +0 -128
- data/lib/bioroebe/genbank/genbank_parser.rb +0 -291
- data/lib/bioroebe/java/bioroebe/Base.class +0 -0
- data/lib/bioroebe/java/bioroebe/Codons.java +0 -22
- data/lib/bioroebe/java/bioroebe/PartnerNucleotide.class +0 -0
- data/lib/bioroebe/java/bioroebe/PartnerNucleotide.java +0 -19
- data/lib/bioroebe/java/bioroebe.jar +0 -0
- data/lib/bioroebe/shell/add.rb +0 -108
- data/lib/bioroebe/shell/assign.rb +0 -360
- data/lib/bioroebe/shell/chop_and_cut.rb +0 -281
- data/lib/bioroebe/shell/constants.rb +0 -166
- data/lib/bioroebe/shell/download.rb +0 -335
- data/lib/bioroebe/shell/enable_and_disable.rb +0 -158
- data/lib/bioroebe/shell/enzymes.rb +0 -310
- data/lib/bioroebe/shell/fasta.rb +0 -345
- data/lib/bioroebe/shell/gtk.rb +0 -76
- data/lib/bioroebe/shell/history.rb +0 -132
- data/lib/bioroebe/shell/initialize.rb +0 -217
- data/lib/bioroebe/shell/loop.rb +0 -74
- data/lib/bioroebe/shell/prompt.rb +0 -107
- data/lib/bioroebe/shell/random.rb +0 -289
- data/lib/bioroebe/shell/reset.rb +0 -335
- data/lib/bioroebe/shell/scan_and_parse.rb +0 -135
- data/lib/bioroebe/shell/search.rb +0 -337
- data/lib/bioroebe/shell/sequences.rb +0 -200
- data/lib/bioroebe/shell/show_report_and_display.rb +0 -2901
- data/lib/bioroebe/shell/startup.rb +0 -127
- data/lib/bioroebe/shell/taxonomy.rb +0 -14
- data/lib/bioroebe/shell/tk.rb +0 -23
- data/lib/bioroebe/shell/user_input.rb +0 -88
- data/lib/bioroebe/shell/xorg.rb +0 -45
- /data/lib/bioroebe/java/bioroebe/{BisulfiteTreatment.java → src/main/java/bioroebe/BisulfiteTreatment.java} +0 -0
- /data/lib/bioroebe/java/bioroebe/{Esystem.class → src/main/java/bioroebe/Esystem.class} +0 -0
- /data/lib/bioroebe/java/bioroebe/{Esystem.java → src/main/java/bioroebe/Esystem.java} +0 -0
- /data/lib/bioroebe/java/bioroebe/{RemoveFile.class → src/main/java/bioroebe/RemoveFile.class} +0 -0
@@ -1,2901 +0,0 @@
|
|
1
|
-
#!/usr/bin/ruby -w
|
2
|
-
# Encoding: UTF-8
|
3
|
-
# frozen_string_literal: true
|
4
|
-
# =========================================================================== #
|
5
|
-
# require 'bioroebe/shell/show_report_and_display.rb'
|
6
|
-
# =========================================================================== #
|
7
|
-
module Bioroebe
|
8
|
-
|
9
|
-
class Shell < ::Bioroebe::CommandlineApplication
|
10
|
-
|
11
|
-
require 'bioroebe/shell/search.rb'
|
12
|
-
require 'bioroebe/codons/show_codon_usage.rb'
|
13
|
-
require 'bioroebe/codons/show_this_codon_table.rb'
|
14
|
-
require 'bioroebe/count/count_amount_of_aminoacids.rb'
|
15
|
-
|
16
|
-
# ========================================================================= #
|
17
|
-
# === report_main_sequence
|
18
|
-
#
|
19
|
-
# We will call dna_with_ends() here in this method. The argument colourize will
|
20
|
-
# determine whether we will colourize the DNA strand or not.
|
21
|
-
#
|
22
|
-
# Invocation examples:
|
23
|
-
#
|
24
|
-
# report_main_sequence(::Bioroebe.start_codon?)
|
25
|
-
# report_main_sequence(:start_codon) # ← is the same as the ^^^ above
|
26
|
-
# report_main_sequence(:stop_codon) # ← Colourize the stop-codons.
|
27
|
-
#
|
28
|
-
# ========================================================================= #
|
29
|
-
def report_main_sequence(
|
30
|
-
colourize = nil,
|
31
|
-
input = dna_sequence_as_string?
|
32
|
-
)
|
33
|
-
case colourize
|
34
|
-
# ======================================================================= #
|
35
|
-
# === :stop_codon
|
36
|
-
#
|
37
|
-
# We attempt to colourize the stop-codons via this method.
|
38
|
-
# ======================================================================= #
|
39
|
-
when :stop_codon
|
40
|
-
colourize = stop_codons?
|
41
|
-
# ======================================================================= #
|
42
|
-
# === :stop_codon_in_frame1
|
43
|
-
# ======================================================================= #
|
44
|
-
when :stop_codon_in_frame1
|
45
|
-
new_string = remove_trailing_escape_code(
|
46
|
-
colour_for_nucleotides(
|
47
|
-
''.dup
|
48
|
-
).dup
|
49
|
-
).dup
|
50
|
-
scanned = input.scan(/.../)
|
51
|
-
scanned.each {|codon|
|
52
|
-
if is_a_stop_codon? codon
|
53
|
-
new_string << colour_for_stop_codon(codon.dup).dup+
|
54
|
-
remove_trailing_escape_code(
|
55
|
-
colour_for_nucleotides
|
56
|
-
)
|
57
|
-
else
|
58
|
-
new_string << codon.dup
|
59
|
-
end
|
60
|
-
}
|
61
|
-
e padding?+
|
62
|
-
rev+
|
63
|
-
leading_five_prime+
|
64
|
-
new_string+
|
65
|
-
rev+
|
66
|
-
trailing_three_prime
|
67
|
-
return
|
68
|
-
# ======================================================================= #
|
69
|
-
# === :start_codon
|
70
|
-
# ======================================================================= #
|
71
|
-
when :start_codon # Instruction to use a start codon here.
|
72
|
-
colourize = start_codon?
|
73
|
-
# ======================================================================= #
|
74
|
-
# === :start_and_stop_codon
|
75
|
-
# ======================================================================= #
|
76
|
-
when :start_and_stop_codon
|
77
|
-
colourize = [start_codon?, stop_codons?]
|
78
|
-
end
|
79
|
-
# ======================================================================= #
|
80
|
-
# The old code was:
|
81
|
-
# erev padding?+
|
82
|
-
# dna_with_ends(input, colourize) { :honour_coding_area_if_it_exists } # The dna_with_ends() method can deal with Arrays.
|
83
|
-
# This is now mostly ported (April 2020), but the :honour_coding_area_if_it_exists
|
84
|
-
# is not yet ported, so the above code will remain as-is, for the time
|
85
|
-
# being.
|
86
|
-
# ======================================================================= #
|
87
|
-
show_nucleotide_sequence?.report_this_sequence(input) {{
|
88
|
-
padding_to_use: padding?,
|
89
|
-
colourize_this_subsequence: colourize
|
90
|
-
}}
|
91
|
-
end; alias show_main_string report_main_sequence # === show_main_string
|
92
|
-
alias show_main_sequence report_main_sequence # === show_main_sequence
|
93
|
-
alias show_colourized_sequence report_main_sequence # === show_colourized_sequence
|
94
|
-
alias show_dna_sequence report_main_sequence # === show_dna_sequence
|
95
|
-
|
96
|
-
# ========================================================================= #
|
97
|
-
# === show_composition
|
98
|
-
#
|
99
|
-
# This method will analyse the DNA string composition.
|
100
|
-
#
|
101
|
-
# Invocation example:
|
102
|
-
#
|
103
|
-
# scompo
|
104
|
-
#
|
105
|
-
# ========================================================================= #
|
106
|
-
def show_composition(
|
107
|
-
i = dna_string?
|
108
|
-
)
|
109
|
-
length = i.size
|
110
|
-
report_size_of_main_string
|
111
|
-
hash = ::Bioroebe::CountAmountOfNucleotides.show_composition(i) # bl count_nucleotides
|
112
|
-
erev 'Showing how many of the '+steelblue('four nucleotides')+rev+
|
113
|
-
' are in that sequence (absolute numbers):'
|
114
|
-
print ' '
|
115
|
-
string = ''.dup
|
116
|
-
hash.each_pair {|nucleotide, n_times|
|
117
|
-
string << "#{nucleotide}: #{lightslategray(n_times.to_s)}#{rev}, "
|
118
|
-
}
|
119
|
-
e string.rstrip.chop # .chop() to get rid of the last ',' token.
|
120
|
-
erev "The respective frequencies derived from these absolute "\
|
121
|
-
"numbers, #{steelblue('in percent')}#{rev}"\
|
122
|
-
", are:"
|
123
|
-
print ' '
|
124
|
-
hash.each_pair {|nucleotide, n_times|
|
125
|
-
percentage = (n_times.to_f * 100 / length).round(2).to_s
|
126
|
-
print "#{rev}#{nucleotide}: #{orange(percentage)}#{rev}% "
|
127
|
-
}; erev
|
128
|
-
end
|
129
|
-
|
130
|
-
# ========================================================================= #
|
131
|
-
# === show_codon_usage
|
132
|
-
#
|
133
|
-
# This shows the codon usage of the string.
|
134
|
-
# ========================================================================= #
|
135
|
-
def show_codon_usage(
|
136
|
-
i = dna_sequence_as_string?
|
137
|
-
)
|
138
|
-
if i.is_a? Array
|
139
|
-
if i.empty?
|
140
|
-
i = dna_sequence_as_string?
|
141
|
-
else
|
142
|
-
i = i.flatten.compact.join
|
143
|
-
end
|
144
|
-
end
|
145
|
-
::Bioroebe::ShowCodonUsage.new(i)
|
146
|
-
end
|
147
|
-
|
148
|
-
# ========================================================================= #
|
149
|
-
# === show_all_codon_tables
|
150
|
-
#
|
151
|
-
# We used to tap into the Bio::CodonTable here for this part.
|
152
|
-
#
|
153
|
-
# But since some time, we no longer depend on this part - we
|
154
|
-
# have made available all of this in yaml files.
|
155
|
-
#
|
156
|
-
# The argument to this method can either be:
|
157
|
-
#
|
158
|
-
# :everything
|
159
|
-
# :only_names
|
160
|
-
#
|
161
|
-
# The first one is the default. This means that we will show everything.
|
162
|
-
#
|
163
|
-
# The second version is useful if you only what to report the names
|
164
|
-
# of the codon table in question. Several aliases exist for the
|
165
|
-
# second invocation.
|
166
|
-
# ========================================================================= #
|
167
|
-
def show_all_codon_tables(
|
168
|
-
show_what = :everything
|
169
|
-
)
|
170
|
-
unless Bioroebe.const_defined? :ShowCodonTables
|
171
|
-
require 'bioroebe/codons/show_codon_tables.rb'
|
172
|
-
end
|
173
|
-
e
|
174
|
-
::Bioroebe::ShowCodonTables.new(show_what)
|
175
|
-
e
|
176
|
-
end
|
177
|
-
|
178
|
-
# ========================================================================= #
|
179
|
-
# === report_n_start_codons
|
180
|
-
#
|
181
|
-
# Use this method to count how many ATG codons we have. We will honour
|
182
|
-
# the default start_codon in use.
|
183
|
-
#
|
184
|
-
# The third argument determines which reading frame is to be used. By
|
185
|
-
# default, the method will use the first reading frame.
|
186
|
-
# ========================================================================= #
|
187
|
-
def report_n_start_codons(
|
188
|
-
this_string = string?,
|
189
|
-
use_this_as_start_codon = ::Bioroebe.start_codon?, # Use the proper start codon.
|
190
|
-
in_which_frame = :frame1
|
191
|
-
)
|
192
|
-
# ======================================================================= #
|
193
|
-
# === Handle blocks next
|
194
|
-
# ======================================================================= #
|
195
|
-
if block_given?
|
196
|
-
yielded = yield
|
197
|
-
case yielded
|
198
|
-
when /^frame/
|
199
|
-
in_which_frame = yielded.to_sym
|
200
|
-
end
|
201
|
-
end
|
202
|
-
# ======================================================================= #
|
203
|
-
# The following can be invoked via:
|
204
|
-
# n_ORF? frame1
|
205
|
-
# ======================================================================= #
|
206
|
-
case in_which_frame
|
207
|
-
when :frame1
|
208
|
-
in_which_frame = 'frame 1'
|
209
|
-
when :frame2
|
210
|
-
in_which_frame = 'frame 2'
|
211
|
-
when :frame3
|
212
|
-
in_which_frame = 'frame 3'
|
213
|
-
end
|
214
|
-
n_start_codons = this_string.upcase.scan(/#{use_this_as_start_codon}/).size.to_s
|
215
|
-
# ======================================================================= #
|
216
|
-
# The above is not yet in the proper frame, though.
|
217
|
-
# ======================================================================= #
|
218
|
-
trailing_message = " Initiation Codons "\
|
219
|
-
"(in #{orangered(in_which_frame)}#{rev})."
|
220
|
-
erev "Our main string has #{sfancy(n_start_codons)}#{rev}"\
|
221
|
-
" #{simp(use_this_as_start_codon)}#{rev} ("\
|
222
|
-
"#{use_this_as_start_codon.tr('T','U')})"+
|
223
|
-
trailing_message
|
224
|
-
if coding_area? # This has been user-supplied in that case.
|
225
|
-
erev 'However had, only the nucleotides from position'
|
226
|
-
erev "#{sfancy(coding_area?.to_s.split('..').first.to_s)}#{rev}"\
|
227
|
-
" to position #{sfancy(coding_area?.to_s.split('..').last.to_s)}"\
|
228
|
-
"#{rev} will be colourized."
|
229
|
-
end
|
230
|
-
end
|
231
|
-
|
232
|
-
# ========================================================================= #
|
233
|
-
# === show_human_genome_version
|
234
|
-
#
|
235
|
-
# Use this method to show the most current human genome version.
|
236
|
-
# ========================================================================= #
|
237
|
-
def show_human_genome_version
|
238
|
-
human_genome_version = '' # Default.
|
239
|
-
remote_URL = 'https://www.ensembl.org/Homo_sapiens/Info/Index'
|
240
|
-
dataset = URI.open(remote_url).read
|
241
|
-
use_this_regex = /Genome assembly: (.{1,11}\.p\d+) <small>/ # See: https://rubular.com/r/DD5FhaPs3b
|
242
|
-
scanned = dataset.scan(use_this_regex).flatten
|
243
|
-
human_genome_version = scanned.first.to_s
|
244
|
-
erev "The most current human genome version is: "\
|
245
|
-
"#{sfancy(human_genome_version)}"
|
246
|
-
erev "The URL that was used to query this has been: "\
|
247
|
-
"#{steelblue(remote_URL)}"
|
248
|
-
end
|
249
|
-
|
250
|
-
# ========================================================================= #
|
251
|
-
# === show_oligo_length_three
|
252
|
-
#
|
253
|
-
# We align in chunks of three and tell the user how often we can find
|
254
|
-
# these individual codons.
|
255
|
-
#
|
256
|
-
# Invocation example:
|
257
|
-
#
|
258
|
-
# random 99; oligo_3
|
259
|
-
#
|
260
|
-
# ========================================================================= #
|
261
|
-
def show_oligo_length_three(
|
262
|
-
sequence = dna_sequence_object?
|
263
|
-
)
|
264
|
-
sequence = sequence.upcase # This is the sequence that will be scanned.
|
265
|
-
dna = ::Bioroebe.dna? # This is equal to A, T, C and G.
|
266
|
-
erev 'We will align the nucleotides in chunks of 3 and show their '\
|
267
|
-
'frequency.'
|
268
|
-
dna.each {|first_entry| # First nucleotide.
|
269
|
-
dna.each {|second_entry| # Second nucleotide.
|
270
|
-
dna.each {|third_entry| # Third nucleotide.
|
271
|
-
_ = first_entry+second_entry+third_entry
|
272
|
-
erev _+' '+sequence.scan(_).size.to_s
|
273
|
-
}
|
274
|
-
}
|
275
|
-
}
|
276
|
-
end
|
277
|
-
|
278
|
-
# ========================================================================= #
|
279
|
-
# === show_oligo_length_two
|
280
|
-
#
|
281
|
-
# Show all oligo of length two.
|
282
|
-
# ========================================================================= #
|
283
|
-
def show_oligo_length_two(
|
284
|
-
string = string?
|
285
|
-
)
|
286
|
-
sequence = string.upcase # Shorter copy and always upcased.
|
287
|
-
dna = ::Bioroebe.dna?
|
288
|
-
dna.each {|first_entry|
|
289
|
-
dna.each {|second_entry|
|
290
|
-
_ = "#{first_entry}#{second_entry}"
|
291
|
-
erev _+' '+sequence.scan(_).size.to_s
|
292
|
-
}
|
293
|
-
}
|
294
|
-
end
|
295
|
-
|
296
|
-
# ========================================================================= #
|
297
|
-
# === show_position_for_the_main_sequence
|
298
|
-
# ========================================================================= #
|
299
|
-
def show_position_for_the_main_sequence
|
300
|
-
array = sequence?.scan(/.{,25}/)
|
301
|
-
index_position = 1
|
302
|
-
array.each {|entry|
|
303
|
-
unless entry.empty?
|
304
|
-
erev entry.split(//).join(' ')
|
305
|
-
second_line = ''
|
306
|
-
start = index_position
|
307
|
-
index_position += entry.size
|
308
|
-
start.upto(index_position-1) {|position|
|
309
|
-
second_line << position.to_s.ljust(4)
|
310
|
-
}
|
311
|
-
erev cadetblue(second_line)+rev
|
312
|
-
e
|
313
|
-
end
|
314
|
-
}
|
315
|
-
end
|
316
|
-
|
317
|
-
# ========================================================================= #
|
318
|
-
# === report_this_input_was_not_found
|
319
|
-
#
|
320
|
-
# This method is used to notify the user that a certain input was
|
321
|
-
# not found.
|
322
|
-
# ========================================================================= #
|
323
|
-
def report_this_input_was_not_found(
|
324
|
-
i = ''
|
325
|
-
)
|
326
|
-
unless i.empty?
|
327
|
-
erev "Input `#{sfancy(i.to_s)}#{rev}` was not "\
|
328
|
-
"found to be a valid input for the BioShell."
|
329
|
-
end
|
330
|
-
end
|
331
|
-
|
332
|
-
# ========================================================================= #
|
333
|
-
# === show_local_sequences
|
334
|
-
#
|
335
|
-
# This method will show the available local sequences.
|
336
|
-
# ========================================================================= #
|
337
|
-
def show_local_sequences
|
338
|
-
possible_matches = return_fasta_files_in_the_log_directory
|
339
|
-
if possible_matches.empty?
|
340
|
-
erev 'No local fasta sequences could be found.'
|
341
|
-
else
|
342
|
-
e
|
343
|
-
erev 'The following local sequences were found in '\
|
344
|
-
'the main log'
|
345
|
-
erev 'directory ('+sdir(log_dir?)+rev+').'
|
346
|
-
e
|
347
|
-
possible_matches.each_with_index {|entry, index|
|
348
|
-
index += 1
|
349
|
-
_ = possible_matches.size.to_s.size
|
350
|
-
erev padding?+'('+index.to_s.rjust(_)+') '+rev+
|
351
|
-
sfile(File.basename(entry))+rev
|
352
|
-
}; e
|
353
|
-
end
|
354
|
-
end
|
355
|
-
|
356
|
-
# ========================================================================= #
|
357
|
-
# === show_nucleotide_sequence?
|
358
|
-
# ========================================================================= #
|
359
|
-
def show_nucleotide_sequence?
|
360
|
-
@internal_hash[:show_nucleotide_sequence]
|
361
|
-
end; alias display_nucleotide_object? show_nucleotide_sequence? # === display_nucleotide_object?
|
362
|
-
|
363
|
-
# ========================================================================= #
|
364
|
-
# === show_sequence_with_a_ruler
|
365
|
-
#
|
366
|
-
# This will show the main sequence together with a "ruler" on top.
|
367
|
-
#
|
368
|
-
# The first argument specifies how many nucleotides are to be displayed
|
369
|
-
# per given line.
|
370
|
-
#
|
371
|
-
# This method can also be called in this way:
|
372
|
-
#
|
373
|
-
# show_sequence_with_a_ruler { :without_colours }
|
374
|
-
#
|
375
|
-
# This will skip showing the ruler.
|
376
|
-
# ========================================================================= #
|
377
|
-
def show_sequence_with_a_ruler(
|
378
|
-
group_together_n_nucleotides = :default,
|
379
|
-
use_this_sequence = main_sequence?
|
380
|
-
)
|
381
|
-
if group_together_n_nucleotides.is_a?(Array)
|
382
|
-
group_together_n_nucleotides = group_together_n_nucleotides.first
|
383
|
-
if group_together_n_nucleotides.nil? or group_together_n_nucleotides.empty?
|
384
|
-
group_together_n_nucleotides = :default
|
385
|
-
end
|
386
|
-
end
|
387
|
-
case group_together_n_nucleotides
|
388
|
-
# ======================================================================= #
|
389
|
-
# === :default
|
390
|
-
# ======================================================================= #
|
391
|
-
when :default,
|
392
|
-
nil
|
393
|
-
group_together_n_nucleotides = 70
|
394
|
-
end
|
395
|
-
if group_together_n_nucleotides.is_a? String
|
396
|
-
# ===================================================================== #
|
397
|
-
# We need an Integer here.
|
398
|
-
# ===================================================================== #
|
399
|
-
group_together_n_nucleotides = group_together_n_nucleotides.to_i
|
400
|
-
end
|
401
|
-
e
|
402
|
-
e "Displaying the main sequence (length: #{use_this_sequence.to_s.size}) "\
|
403
|
-
"in a chunk of #{slateblue(group_together_n_nucleotides.to_s)}#{rev}"\
|
404
|
-
" nucleotides/\naminoacids next."
|
405
|
-
e
|
406
|
-
use_this_sequence = use_this_sequence.to_s
|
407
|
-
chunks = use_this_sequence.split(/(.{#{group_together_n_nucleotides}})/).reject(&:empty?)
|
408
|
-
array = chunks.each_slice(group_together_n_nucleotides).to_a.flatten #.join.split("\n")
|
409
|
-
use_this_ruler_type = :show_ruler # Note that :show_ruler is the default.
|
410
|
-
# ======================================================================= #
|
411
|
-
# === Handle blocks given next
|
412
|
-
# ======================================================================= #
|
413
|
-
if block_given?
|
414
|
-
yielded = yield
|
415
|
-
case yielded
|
416
|
-
# ===================================================================== #
|
417
|
-
# === :without_colours
|
418
|
-
# ===================================================================== #
|
419
|
-
when :without_colours
|
420
|
-
use_this_ruler_type = :without_colours
|
421
|
-
end
|
422
|
-
end
|
423
|
-
array.each {|sequence|
|
424
|
-
show_nucleotide_sequence?.display_with_prior_formatting(sequence) {
|
425
|
-
use_this_ruler_type
|
426
|
-
}
|
427
|
-
e
|
428
|
-
}
|
429
|
-
end
|
430
|
-
|
431
|
-
# ========================================================================= #
|
432
|
-
# === dna_with_ends
|
433
|
-
#
|
434
|
-
# Display DNA with proper ends.
|
435
|
-
#
|
436
|
-
# The first argument should be the string that we will colourize.
|
437
|
-
#
|
438
|
-
# If the second argument is given (`optional_colourize`), then this
|
439
|
-
# method will colourize the sequence at certain positions. This
|
440
|
-
# can be useful to display, for instance, restriction-sites.
|
441
|
-
# ========================================================================= #
|
442
|
-
def dna_with_ends(
|
443
|
-
i = dna_sequence_as_string?,
|
444
|
-
optional_colourize = nil,
|
445
|
-
colourize_everything = true
|
446
|
-
)
|
447
|
-
i.upcase! if config?.respond_to?(:upcase_nucleotides) and config?.upcase_nucleotides
|
448
|
-
if optional_colourize.is_a? String
|
449
|
-
optional_colourize = [optional_colourize]
|
450
|
-
end
|
451
|
-
if block_given?
|
452
|
-
yielded = yield
|
453
|
-
case yielded
|
454
|
-
# ===================================================================== #
|
455
|
-
# === :honour_coding_area_if_it_exists
|
456
|
-
# ===================================================================== #
|
457
|
-
when :honour_coding_area_if_it_exists
|
458
|
-
if optional_colourize and @internal_hash[:coding_area]
|
459
|
-
# ================================================================= #
|
460
|
-
# We will colourize based on the coding area that was designated.
|
461
|
-
# ================================================================= #
|
462
|
-
_ = @internal_hash[:coding_area]
|
463
|
-
# ================================================================= #
|
464
|
-
# We deduct 1 because ruby Arrays start at 0.
|
465
|
-
# ================================================================= #
|
466
|
-
start_position = _.split('..').first.to_i - 1
|
467
|
-
end_position = _.split('..').last.to_i - 1
|
468
|
-
internal_segment = i[start_position .. end_position]
|
469
|
-
use_this_as_return_string = ''
|
470
|
-
use_this_as_return_string << i[0..(start_position-1)]
|
471
|
-
optional_colourize.each {|inner_entry|
|
472
|
-
internal_segment.gsub!(inner_entry, yellow+inner_entry+rev)
|
473
|
-
}
|
474
|
-
use_this_as_return_string << internal_segment
|
475
|
-
use_this_as_return_string << i[(end_position+1) .. -1]
|
476
|
-
i = use_this_as_return_string
|
477
|
-
elsif optional_colourize
|
478
|
-
# ================================================================= #
|
479
|
-
# Apply all entries given in the Array.
|
480
|
-
# ================================================================= #
|
481
|
-
if optional_colourize.is_a? Array
|
482
|
-
optional_colourize.flatten.each {|inner_entry|
|
483
|
-
i.gsub!(
|
484
|
-
inner_entry, colour_for_stop_codon(inner_entry)+rev
|
485
|
-
) # Colourize in yellow.
|
486
|
-
}
|
487
|
-
else
|
488
|
-
# =================================================================== #
|
489
|
-
# Make sure that we have a String past this point.
|
490
|
-
# =================================================================== #
|
491
|
-
optional_colourize = optional_colourize.to_s
|
492
|
-
if colourize_everything == true
|
493
|
-
i.gsub!(optional_colourize, colour_for_stop_codon(optional_colourize)+rev)
|
494
|
-
else
|
495
|
-
if colourize_everything == 1
|
496
|
-
i.sub!(optional_colourize, colour_for_stop_codon(optional_colourize)+rev)
|
497
|
-
end
|
498
|
-
end
|
499
|
-
end
|
500
|
-
end
|
501
|
-
end
|
502
|
-
else
|
503
|
-
i = "#{sfancy(i)}#{rev}"
|
504
|
-
end
|
505
|
-
# ======================================================================= #
|
506
|
-
# We will report the DNA sequence with leading 5' prime and
|
507
|
-
# trailing 3' prime.
|
508
|
-
# ======================================================================= #
|
509
|
-
return "#{leading_five_prime}#{i}#{trailing_three_prime}"
|
510
|
-
end
|
511
|
-
|
512
|
-
require 'bioroebe/toplevel_methods/matches.rb'
|
513
|
-
# ========================================================================= #
|
514
|
-
# === report_the_first_atg
|
515
|
-
#
|
516
|
-
# This method will simply report the first ATG codon.
|
517
|
-
# ========================================================================= #
|
518
|
-
def report_the_first_atg
|
519
|
-
dna_sequence = dna_sequence_object_as_string?
|
520
|
-
array_matches = ::Bioroebe.return_all_substring_matches(
|
521
|
-
dna_sequence, start_codon?
|
522
|
-
)
|
523
|
-
start_position = array_matches.first.first
|
524
|
-
erev 'The first ATG can be found at position '+
|
525
|
-
simp(start_position.to_s)+rev+'.'
|
526
|
-
erev 'We will next show the first 100 nucleotides, starting from this:'
|
527
|
-
report_five_prime_three_prime(
|
528
|
-
dna_sequence_object?[start_position-1,100]
|
529
|
-
)
|
530
|
-
end
|
531
|
-
|
532
|
-
# ========================================================================= #
|
533
|
-
# === show_aminoacid_sequence
|
534
|
-
#
|
535
|
-
# To show the aminoacid sequence, do:
|
536
|
-
# show_aa
|
537
|
-
# ========================================================================= #
|
538
|
-
def show_aminoacid_sequence
|
539
|
-
erev padding?+
|
540
|
-
aminoacid_sequence? # aminoacids? # Will also use some padding.
|
541
|
-
end
|
542
|
-
|
543
|
-
# ========================================================================= #
|
544
|
-
# === show_dna_string (show string tag, show tag)
|
545
|
-
#
|
546
|
-
# Use this method to show the @sequence, or another string of your
|
547
|
-
# choosing, if you pass it to the method.
|
548
|
-
#
|
549
|
-
# You can also invoke this method with something like this:
|
550
|
-
#
|
551
|
-
# show_string { :with_colourized_separator }
|
552
|
-
#
|
553
|
-
# This means that we will use '|' separators that are colourized.
|
554
|
-
# ========================================================================= #
|
555
|
-
def show_dna_string(
|
556
|
-
this_string = dna_string?,
|
557
|
-
truncate_too_long_result = do_truncate?
|
558
|
-
)
|
559
|
-
result = rev.dup # This is the String that will be returned.
|
560
|
-
case truncate_too_long_result
|
561
|
-
when :do_not_truncate
|
562
|
-
truncate_too_long_result = false
|
563
|
-
end
|
564
|
-
truncate_at_n_elements = TRUNCATE_AT_N_ELEMENTS
|
565
|
-
if this_string.nil?
|
566
|
-
this_string = dna_string? if dna_string?
|
567
|
-
end
|
568
|
-
if this_string.to_s.empty?
|
569
|
-
report_that_a_string_must_be_assigned_first
|
570
|
-
else
|
571
|
-
# this_string.upcase! # Nope, do not upcase here. Use other methods to do so.
|
572
|
-
if mode? == :dna
|
573
|
-
if this_string.size > truncate_at_n_elements # Threshold for now.
|
574
|
-
if truncate_too_long_result or
|
575
|
-
(truncate_too_long_result == :do_not_truncate_and_do_not_show_leader_and_trailer)
|
576
|
-
this_string =
|
577
|
-
this_string[0, truncate_at_n_elements]+
|
578
|
-
swarn(' [TRUNCATED as the sequence '\
|
579
|
-
'is longer than '+truncate_at_n_elements.to_s+' nucleotides]')
|
580
|
-
end
|
581
|
-
end
|
582
|
-
# =================================================================== #
|
583
|
-
# Next, display the main string, without upcasing it.
|
584
|
-
# =================================================================== #
|
585
|
-
if block_given?
|
586
|
-
yielded = yield
|
587
|
-
case yielded
|
588
|
-
when :with_colourized_separator
|
589
|
-
_ = this_string.split(//)
|
590
|
-
str = ''.dup
|
591
|
-
_.each_with_index {|char, index|
|
592
|
-
str << char
|
593
|
-
str << paleturquoise('|')+sfancy if (index+1) % 3 == 0
|
594
|
-
}
|
595
|
-
this_string = str
|
596
|
-
end
|
597
|
-
end
|
598
|
-
if truncate_too_long_result == :do_not_truncate_and_do_not_show_leader_and_trailer
|
599
|
-
else
|
600
|
-
result << padding?+leading_5_prime
|
601
|
-
end
|
602
|
-
# =================================================================== #
|
603
|
-
# Next, add the DNA sequence to the result that will be displayed.
|
604
|
-
# =================================================================== #
|
605
|
-
result << colourize_dna_sequence(this_string)+rev
|
606
|
-
if truncate_too_long_result == :do_not_truncate_and_do_not_show_leader_and_trailer
|
607
|
-
else
|
608
|
-
result << trailing_3_prime
|
609
|
-
end
|
610
|
-
# =================================================================== #
|
611
|
-
# Delegate to class ShowNucleotideSequence next:
|
612
|
-
# =================================================================== #
|
613
|
-
display_nucleotide_sequence(this_string)
|
614
|
-
else # Else use the aminoacid mode.
|
615
|
-
show_aminoacid_sequence
|
616
|
-
end
|
617
|
-
end
|
618
|
-
end; alias show_main_string show_dna_string # === show_main_string
|
619
|
-
alias report_sequence show_dna_string # === report_sequence
|
620
|
-
alias show_sequence show_dna_string # === show_sequence
|
621
|
-
alias show_main_dna_sequence show_dna_string # === show_main_dna_sequence
|
622
|
-
alias show_string show_dna_string # === show_string
|
623
|
-
|
624
|
-
# ========================================================================= #
|
625
|
-
# === report_size_of_main_string
|
626
|
-
# ========================================================================= #
|
627
|
-
def report_size_of_main_string(
|
628
|
-
i = dna_sequence_object?,
|
629
|
-
type_of_string = 'main ' # This is usually the main DNA string.
|
630
|
-
)
|
631
|
-
i = dna_sequence_object? if i.nil?
|
632
|
-
i = dna_sequence_object? if i.is_a?(Array) and i.empty?
|
633
|
-
erev 'The '+type_of_string+'string has '+sfancy(i.size.to_s)+
|
634
|
-
rev+' '+nucleotides_or_aminoacids?+'.'
|
635
|
-
end; alias report_length_of_the_dna_string report_size_of_main_string # === report_length_of_the_dna_string
|
636
|
-
alias report_size_of_this_sequence report_size_of_main_string # === report_size_of_this_sequence
|
637
|
-
|
638
|
-
# ========================================================================= #
|
639
|
-
# === show_editor_in_use
|
640
|
-
# ========================================================================= #
|
641
|
-
def show_editor_in_use
|
642
|
-
e MAIN_EDITOR
|
643
|
-
end
|
644
|
-
|
645
|
-
# ========================================================================= #
|
646
|
-
# === show_welcome_message
|
647
|
-
#
|
648
|
-
# Show a little welcome message on startup. This can be disabled of
|
649
|
-
# course.
|
650
|
-
# ========================================================================= #
|
651
|
-
def show_welcome_message
|
652
|
-
unless silent_startup?
|
653
|
-
erev 'Welcome to the Bioroebe::Shell Version '+
|
654
|
-
sfancy(version?.to_s)+
|
655
|
-
rev+
|
656
|
-
', last updated: '+
|
657
|
-
simp(::Bioroebe.last_updated?)+
|
658
|
-
rev+'.'
|
659
|
-
erev 'Type "'+sfancy('help')+rev+'" to get some help.'
|
660
|
-
end
|
661
|
-
end
|
662
|
-
|
663
|
-
# ========================================================================= #
|
664
|
-
# === show_the_weight_of_the_four_individual_nucleotides
|
665
|
-
# ========================================================================= #
|
666
|
-
def show_the_weight_of_the_four_individual_nucleotides
|
667
|
-
e
|
668
|
-
erev ' A: '+adenin?.rjust(10)+' '+
|
669
|
-
palevioletred(weight_of_adenin?)
|
670
|
-
erev ' T: '+thymin?.rjust(10)+' '+
|
671
|
-
palevioletred(weight_of_thymin?)
|
672
|
-
erev ' C: '+cytosin?.rjust(10)+' '+
|
673
|
-
palevioletred(weight_of_cytosin?)
|
674
|
-
erev ' G: '+guanin?.rjust(10)+' '+
|
675
|
-
palevioletred(weight_of_guanin?)
|
676
|
-
e
|
677
|
-
end
|
678
|
-
|
679
|
-
# ========================================================================= #
|
680
|
-
# === show_this_subsequence
|
681
|
-
#
|
682
|
-
# Sometimes we want to show a subsequence. This method helps us to do
|
683
|
-
# so, too.
|
684
|
-
#
|
685
|
-
# The input may be "tainted", e. g. be a String like "12,345" or
|
686
|
-
# "12.345", so this method will have to eliminate the ',' and '.'
|
687
|
-
# characters as well, before converting this String into an
|
688
|
-
# Integer. (It must be an Integer because nucleotide counting
|
689
|
-
# can logically not be a Float.)
|
690
|
-
#
|
691
|
-
# Usage example:
|
692
|
-
#
|
693
|
-
# random 99; [22..33]
|
694
|
-
#
|
695
|
-
# ========================================================================= #
|
696
|
-
def show_this_subsequence(
|
697
|
-
start_position = 1,
|
698
|
-
end_position = 10,
|
699
|
-
work_on_this_sequence = dna_sequence_object?
|
700
|
-
)
|
701
|
-
start_position = start_position.to_s.delete(',.').to_i
|
702
|
-
end_position = end_position.to_s.delete(',.').to_i
|
703
|
-
if start_position < 1
|
704
|
-
erev 'The minimum for the start-position must be 1, so this'
|
705
|
-
erev 'is now treated as one rather than '+start_position.to_s+'.'
|
706
|
-
start_position = 1
|
707
|
-
end
|
708
|
-
if end_position > work_on_this_sequence.size
|
709
|
-
erev 'The sequence is '+slateblue('too long')+rev+' ('+
|
710
|
-
crimson('end_position')+rev+' is '\
|
711
|
-
'at '+sfancy(end_position.to_s)+rev+', '+
|
712
|
-
nucleotides_or_aminoacids?.to_s+' sequence length '\
|
713
|
-
'was: '+sfancy(work_on_this_sequence.size.to_s)+
|
714
|
-
rev+').'
|
715
|
-
erev 'It will be limited next to '+
|
716
|
-
sfancy(work_on_this_sequence.size.to_s)+rev+' in length.'
|
717
|
-
end_position = work_on_this_sequence.size
|
718
|
-
end
|
719
|
-
sequence = work_on_this_sequence.start_end(
|
720
|
-
start_position,
|
721
|
-
end_position
|
722
|
-
)
|
723
|
-
if sequence
|
724
|
-
size = sequence.size.to_s
|
725
|
-
nucleotides_or_aminoacids_or_empty = ''
|
726
|
-
if work_on_this_sequence.respond_to? :nucleotides_or_aminoacids?
|
727
|
-
nucleotides_or_aminoacids_or_empty = work_on_this_sequence.nucleotides_or_aminoacids?.to_s
|
728
|
-
end
|
729
|
-
erev 'Next showing a subsequence, '+
|
730
|
-
nucleotides_or_aminoacids_or_empty+' '+
|
731
|
-
olive(start_position.to_s)+rev+' to '+
|
732
|
-
olive(end_position.to_s)+rev+
|
733
|
-
' (including '+olive(start_position.to_s)+
|
734
|
-
rev+' and '+olive(end_position.to_s)+rev+').'
|
735
|
-
erev 'The length of the fragment will be '+
|
736
|
-
simp(size)+rev+
|
737
|
-
' '+
|
738
|
-
nucleotides_or_aminoacids_or_empty+
|
739
|
-
'.'
|
740
|
-
report_this_dna_sequence_with_proper_trailer_and_leader(sequence) { :try_to_colourize_start_codon }
|
741
|
-
else
|
742
|
-
erev 'This subsequence appears to be invalid '\
|
743
|
-
'(start: '+start_position.to_s+', end: '+end_position.to_s+')'
|
744
|
-
end
|
745
|
-
end
|
746
|
-
|
747
|
-
# ========================================================================= #
|
748
|
-
# === report_where_the_home_directory_can_be_found
|
749
|
-
# ========================================================================= #
|
750
|
-
def report_where_the_home_directory_can_be_found(
|
751
|
-
i = log_dir?
|
752
|
-
)
|
753
|
-
erev 'The "home" directory (actually called the log directory) '\
|
754
|
-
'can be found here:'
|
755
|
-
e
|
756
|
-
e " #{sdir(i)}"
|
757
|
-
e
|
758
|
-
end
|
759
|
-
|
760
|
-
# ========================================================================= #
|
761
|
-
# === show_double_strand
|
762
|
-
# ========================================================================= #
|
763
|
-
def show_both_dna_strands
|
764
|
-
show_main_sequence
|
765
|
-
show_complement(string?, :include_prime_ends)
|
766
|
-
end; alias show_double_strand show_both_dna_strands # === show_double_strand
|
767
|
-
|
768
|
-
# ========================================================================= #
|
769
|
-
# === show_codon_piped_sequence
|
770
|
-
# ========================================================================= #
|
771
|
-
def show_codon_piped_sequence
|
772
|
-
# _ = dna_sequence_object?.gsub(/(...)/, "\\1|") # Add | at every third position.
|
773
|
-
# erev rev+padding?+leading_5_prime+sfancy(_)+rev+trailing_3_prime
|
774
|
-
display_nucleotide_sequence(:default) { :piped }
|
775
|
-
end
|
776
|
-
|
777
|
-
# ========================================================================= #
|
778
|
-
# === show (show tag)
|
779
|
-
#
|
780
|
-
# Bundle together some show-related methods.
|
781
|
-
# ========================================================================= #
|
782
|
-
def show(i)
|
783
|
-
i = i.join(' ').strip if i.is_a? Array
|
784
|
-
case i
|
785
|
-
when 'codon_table','codon','codon table'
|
786
|
-
show_codon_table
|
787
|
-
when 'blosum','blosum matrix','blosum_matrix'
|
788
|
-
show_blosum_matrix
|
789
|
-
when '',nil # Empty or nil.
|
790
|
-
show_dna_string
|
791
|
-
end
|
792
|
-
end
|
793
|
-
|
794
|
-
# ========================================================================= #
|
795
|
-
# === display_nucleotide_sequence
|
796
|
-
#
|
797
|
-
# Consistently use this method whenever you wish to display a
|
798
|
-
# nucleotide sequence.
|
799
|
-
# ========================================================================= #
|
800
|
-
def display_nucleotide_sequence(
|
801
|
-
this_sequence = dna_sequence_object?,
|
802
|
-
&block
|
803
|
-
)
|
804
|
-
case this_sequence
|
805
|
-
when :default
|
806
|
-
this_sequence = dna_sequence_object?
|
807
|
-
end
|
808
|
-
do_show_piped_output = false
|
809
|
-
if block_given?
|
810
|
-
yielded = yield
|
811
|
-
case yielded
|
812
|
-
when :piped,
|
813
|
-
:show_piped
|
814
|
-
do_show_piped_output = true
|
815
|
-
end
|
816
|
-
end
|
817
|
-
hash = {
|
818
|
-
padding_to_use: padding?,
|
819
|
-
show_piped_output: do_show_piped_output
|
820
|
-
}
|
821
|
-
show_nucleotide_sequence?.report_this_sequence(this_sequence) { hash }
|
822
|
-
end; alias display_this_nucleotide_sequence display_nucleotide_sequence # === display_this_nucleotide_sequence
|
823
|
-
alias display_this_sequence display_nucleotide_sequence # === display_this_sequence
|
824
|
-
alias show_this_sequence display_nucleotide_sequence # === show_this_sequence
|
825
|
-
|
826
|
-
# ========================================================================= #
|
827
|
-
# === report_how_many_aminoacids_we_have
|
828
|
-
#
|
829
|
-
# This method will report how many aminoacids we have assigned.
|
830
|
-
# ========================================================================= #
|
831
|
-
def report_how_many_aminoacids_we_have
|
832
|
-
if aminoacids?
|
833
|
-
n_aminoacids = aminoacids?.size
|
834
|
-
else
|
835
|
-
n_aminoacids = dna_sequence_object?.size / 3.0
|
836
|
-
end
|
837
|
-
n_aminoacids = n_aminoacids.to_i
|
838
|
-
erev "This sequence has #{simp(n_aminoacids.to_s)}#{rev} aminoacids."
|
839
|
-
end
|
840
|
-
|
841
|
-
# ========================================================================= #
|
842
|
-
# === show_chromosome_table
|
843
|
-
# ========================================================================= #
|
844
|
-
def show_chromosome_table
|
845
|
-
lpadding_to_use = 16
|
846
|
-
erev 'Chromosome Table from file '+sfile(FILE_CHROMOSOME_NUMBERS)+rev
|
847
|
-
if File.exist? FILE_CHROMOSOME_NUMBERS
|
848
|
-
dataset = YAML.load_file(FILE_CHROMOSOME_NUMBERS)
|
849
|
-
e
|
850
|
-
dataset.each_pair {|key, value|
|
851
|
-
erev " "+key.ljust(lpadding_to_use)+
|
852
|
-
' '+
|
853
|
-
steelblue(value.to_s.rjust(3))
|
854
|
-
}
|
855
|
-
e
|
856
|
-
else
|
857
|
-
no_file_exists_at(FILE_CHROMOSOME_NUMBERS)
|
858
|
-
end
|
859
|
-
end
|
860
|
-
|
861
|
-
# ========================================================================= #
|
862
|
-
# === report_everything_about_this_amino_acid
|
863
|
-
#
|
864
|
-
# Use this method to report everything about any particular amino acid.
|
865
|
-
# ========================================================================= #
|
866
|
-
def report_everything_about_this_amino_acid(i)
|
867
|
-
if i.is_a? Array
|
868
|
-
i.each {|entry| report_everything_about_this_amino_acid(entry) }
|
869
|
-
else
|
870
|
-
i.delete!('?') if i.include? '?'
|
871
|
-
erev 'It seems as is we did find an Amino Acid ('+simp(i)+rev+
|
872
|
-
'). Its characteristic residue (R) is:'+N+N
|
873
|
-
unless AMINO_ACIDS_RESTE.has_key?(i)
|
874
|
-
# =================================================================== #
|
875
|
-
# This here is to map german names, such as "glycin",
|
876
|
-
# onto "glycine", the corresponding english name.
|
877
|
-
# =================================================================== #
|
878
|
-
if AMINO_ACIDS_LONG_NAME_TO_ONE_LETTER.has_key?(i)
|
879
|
-
i = AMINO_ACIDS_LONG_NAME_TO_ONE_LETTER[i]
|
880
|
-
i = AMINO_ACIDS_ENGLISH[i].downcase
|
881
|
-
end
|
882
|
-
end
|
883
|
-
residue = AMINO_ACIDS_RESTE[i.downcase].to_s
|
884
|
-
efancy " #{residue}#{N}"
|
885
|
-
erev 'The codons coding for the aminoacid '+simp(i)+rev+' are:'
|
886
|
-
e
|
887
|
-
e ' '+mediumturquoise(
|
888
|
-
::Bioroebe::PossibleCodonsForThisAminoacid.new(i).pretty_result
|
889
|
-
)
|
890
|
-
e
|
891
|
-
molecular_mass_of(i, 2) # The 2 says to round to 2 digit.
|
892
|
-
end
|
893
|
-
end
|
894
|
-
|
895
|
-
# ========================================================================= #
|
896
|
-
# === report_five_prime_three_prime
|
897
|
-
# ========================================================================= #
|
898
|
-
def report_five_prime_three_prime(i)
|
899
|
-
erev dna_with_ends(i)
|
900
|
-
end
|
901
|
-
|
902
|
-
# ========================================================================= #
|
903
|
-
# === show_startup_information
|
904
|
-
#
|
905
|
-
# This method here will usually be shown only once, on an initial startup
|
906
|
-
# of the Bioroebe::Shell. Afterwards, it will no longer be shown at all.
|
907
|
-
#
|
908
|
-
# Note that showing this can be disabled.
|
909
|
-
# ========================================================================= #
|
910
|
-
def show_startup_information
|
911
|
-
e
|
912
|
-
erev "This seems to be the first time that you are using the "\
|
913
|
-
"#{olivedrab('Bioroebe::Shell')}#{rev}, at the least on"
|
914
|
-
erev 'this computer.'
|
915
|
-
e
|
916
|
-
erev 'It is recommended to have a look at the following components first:'
|
917
|
-
e
|
918
|
-
efancy ' help'
|
919
|
-
efancy ' random'
|
920
|
-
efancy ' assign'
|
921
|
-
efancy ' complement'
|
922
|
-
e
|
923
|
-
erev 'If you want to show this intro-menu again, do:'
|
924
|
-
e
|
925
|
-
efancy ' show-intro'
|
926
|
-
e
|
927
|
-
erev 'You can also see more documentation at:'
|
928
|
-
e
|
929
|
-
e " #{slateblue(URL_TO_THE_DOCUMENTATION)}"
|
930
|
-
e
|
931
|
-
erev 'If you feel that something is missing or incorrect, feel '\
|
932
|
-
'free to send an email to:'
|
933
|
-
e
|
934
|
-
efancy " #{EMAIL}"
|
935
|
-
e
|
936
|
-
end
|
937
|
-
|
938
|
-
require 'bioroebe/colours/colourize_sequence.rb'
|
939
|
-
# ========================================================================= #
|
940
|
-
# === report_colourized_sequence
|
941
|
-
#
|
942
|
-
# This method will use the new class ColourizeSequence, rather than
|
943
|
-
# the old internal way.
|
944
|
-
#
|
945
|
-
# In the long run, it may be best to transition all of the Bioroebe::Shell
|
946
|
-
# into the new class - but for now, we will use a hybrid system.
|
947
|
-
#
|
948
|
-
# To invoke this method, try:
|
949
|
-
#
|
950
|
-
# start_and_stop?
|
951
|
-
#
|
952
|
-
# ========================================================================= #
|
953
|
-
def report_colourized_sequence(
|
954
|
-
colourize_what = :start_and_stop_codon
|
955
|
-
)
|
956
|
-
_ = ColourizeSequence.return_sequence(dna_sequence_object?) { colourize_what }
|
957
|
-
show_nucleotide_sequence?.display(_)
|
958
|
-
e
|
959
|
-
end
|
960
|
-
|
961
|
-
# ========================================================================= #
|
962
|
-
# === show_complement
|
963
|
-
#
|
964
|
-
# If the second argument is true, we pad via 5' and 3'.
|
965
|
-
#
|
966
|
-
# As of Feb 2015, we will try with leading padding as well.
|
967
|
-
# ========================================================================= #
|
968
|
-
def show_complement(
|
969
|
-
i = dna_string?,
|
970
|
-
also_include_prime_ends = false
|
971
|
-
)
|
972
|
-
case also_include_prime_ends
|
973
|
-
# ======================================================================= #
|
974
|
-
# === :show_leading_primes
|
975
|
-
# ======================================================================= #
|
976
|
-
when :show_leading_primes,
|
977
|
-
:include_prime_ends
|
978
|
-
also_include_prime_ends = true
|
979
|
-
end
|
980
|
-
i = dna_string? if i.nil?
|
981
|
-
i = i.join('') if i.is_a? Array
|
982
|
-
if also_include_prime_ends
|
983
|
-
erev padding?+rev+
|
984
|
-
leading_3_prime+
|
985
|
-
sfancy(complement(i))+
|
986
|
-
rev+trailing_5_prime
|
987
|
-
else
|
988
|
-
erev complement(i)
|
989
|
-
end
|
990
|
-
end
|
991
|
-
|
992
|
-
# ========================================================================= #
|
993
|
-
# === show_position_of_sequence
|
994
|
-
#
|
995
|
-
# This currently works only for Amino Acids - at the least I have tested
|
996
|
-
# it only on aminoacids so far, and not on DNA/RNA.
|
997
|
-
# ========================================================================= #
|
998
|
-
def show_position_of_sequence(
|
999
|
-
i = aa_sequence?,
|
1000
|
-
chunk_size = 10 # How many chunks to display per row.
|
1001
|
-
)
|
1002
|
-
array = i.chars
|
1003
|
-
_ = '' # The Display-String.
|
1004
|
-
index_string = ''
|
1005
|
-
0.upto(array.size) {|index|
|
1006
|
-
_ << array[index].to_s.rjust(2)+' '
|
1007
|
-
unless array.size == index
|
1008
|
-
index_string << palevioletred((index+1).to_s.rjust(2)+' ')
|
1009
|
-
end
|
1010
|
-
if index % chunk_size == (chunk_size - 1)
|
1011
|
-
_ << N
|
1012
|
-
_ << index_string << rev << N << N
|
1013
|
-
index_string = ''
|
1014
|
-
end
|
1015
|
-
}
|
1016
|
-
erev _ # Report it finally.
|
1017
|
-
erev index_string
|
1018
|
-
end
|
1019
|
-
|
1020
|
-
# ========================================================================= #
|
1021
|
-
# === show_alu_sequence
|
1022
|
-
#
|
1023
|
-
# Invoke this method by doing something like:
|
1024
|
-
#
|
1025
|
-
# alu_sequence?
|
1026
|
-
#
|
1027
|
-
# ========================================================================= #
|
1028
|
-
def show_alu_sequence
|
1029
|
-
fasta_dataset = ::Bioroebe.parse_fasta(FILE_ALU_ELEMENTS)
|
1030
|
-
_ = fasta_dataset.fasta_sequence
|
1031
|
-
erev 'The ALU sequence in humans may be this (length: '+
|
1032
|
-
sfancy(_.size.to_s)+rev+'):'
|
1033
|
-
erev' '+simp(_)
|
1034
|
-
end
|
1035
|
-
|
1036
|
-
# ========================================================================= #
|
1037
|
-
# === show_possible_codons_for_this_aminoacid
|
1038
|
-
# ========================================================================= #
|
1039
|
-
def show_possible_codons_for_this_aminoacid(i)
|
1040
|
-
possible_codons = PossibleCodonsForThisAminoacid[i,
|
1041
|
-
:use_only_the_four_standard_nucleotide_letters]
|
1042
|
-
@array_aminoacid_sequence << possible_codons
|
1043
|
-
return possible_codons
|
1044
|
-
end
|
1045
|
-
|
1046
|
-
# ========================================================================= #
|
1047
|
-
# === show_date
|
1048
|
-
# ========================================================================= #
|
1049
|
-
def show_date
|
1050
|
-
erev Time.now.strftime('%d.%m.%Y')
|
1051
|
-
end
|
1052
|
-
|
1053
|
-
# ========================================================================= #
|
1054
|
-
# === show_taxid
|
1055
|
-
#
|
1056
|
-
# This method will show the particular TaxID, using the NCBI taxonomy
|
1057
|
-
# database.
|
1058
|
-
#
|
1059
|
-
# The tax-id 9606 is "Homo sapiens".
|
1060
|
-
# ========================================================================= #
|
1061
|
-
def show_taxid(id = 9606)
|
1062
|
-
id = 9606 if id.nil?
|
1063
|
-
id = id.to_s
|
1064
|
-
url = 'http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id='+id+'&lvl=0'
|
1065
|
-
erev 'The remote URL is: '+sfancy(url)
|
1066
|
-
webpage = open(url).read
|
1067
|
-
regex = /^<table width="100%"><tr><td valign="top"><h2>(Homo sapiens)<\/h2>/ # See: http://rubular.com/r/aQK5O8ZfGa
|
1068
|
-
webpage =~ regex
|
1069
|
-
name_of_the_organism = $1.to_s.dup
|
1070
|
-
erev 'The TaxID of '+simp(id)+rev+' corresponds to `'+
|
1071
|
-
sfancy(name_of_the_organism)+rev+'`.'
|
1072
|
-
end
|
1073
|
-
|
1074
|
-
# ========================================================================= #
|
1075
|
-
# === show_nucleotides_table
|
1076
|
-
#
|
1077
|
-
# Use this method to show the nucleotides table - their formula and
|
1078
|
-
# the molecular mass.
|
1079
|
-
# ========================================================================= #
|
1080
|
-
def show_nucleotides_table
|
1081
|
-
array_display_these = %w(
|
1082
|
-
Adenin Cytosin Guanin Thymin
|
1083
|
-
)
|
1084
|
-
# ======================================================================= #
|
1085
|
-
# Grab the nucleotides.yml dataset next
|
1086
|
-
# ======================================================================= #
|
1087
|
-
dataset = YAML.load_file(FILE_NUCLEOTIDES)
|
1088
|
-
dataset.each_pair {|key, chemical_formula|
|
1089
|
-
if array_display_these.include? key # Display it in this case.
|
1090
|
-
molmasse = ChemistryParadise::CalculateAtomicMass.new(chemical_formula, :do_not_report).masse?
|
1091
|
-
molmasse = molmasse.to_f.round(2)
|
1092
|
-
e key.to_s.ljust(8)+' -> '+chemical_formula.to_s.rjust(8)+
|
1093
|
-
rev+' (Molecular mass: '+simp(molmasse.to_s)+')'+rev
|
1094
|
-
end
|
1095
|
-
}
|
1096
|
-
end
|
1097
|
-
|
1098
|
-
# ========================================================================= #
|
1099
|
-
# === show_ori_sequences
|
1100
|
-
#
|
1101
|
-
# The DnaA box is: TTATC[CA]A[CA]A
|
1102
|
-
# ========================================================================= #
|
1103
|
-
def show_ori_sequences
|
1104
|
-
erev 'The DnaA box has this consensus sequence: '+
|
1105
|
-
sfancy("5'-TTATC[CA]A[CA]A-3'")
|
1106
|
-
_ = 'TTATCCACA'
|
1107
|
-
erev 'Searching for '+_
|
1108
|
-
try_to_find_restriction_enzymes_for(_)
|
1109
|
-
_ = 'TTATCAAAA'
|
1110
|
-
erev 'Searching for '+_
|
1111
|
-
try_to_find_restriction_enzymes_for(_)
|
1112
|
-
end
|
1113
|
-
|
1114
|
-
# ========================================================================= #
|
1115
|
-
# === show_segments
|
1116
|
-
#
|
1117
|
-
# This method will show the DNA segments via a R-compatible way.
|
1118
|
-
#
|
1119
|
-
# Usage example:
|
1120
|
-
#
|
1121
|
-
# set AAAATGCAGTAACCCATGCCC; show_segments
|
1122
|
-
#
|
1123
|
-
# ========================================================================= #
|
1124
|
-
def show_segments
|
1125
|
-
array = ::Bioroebe.scan_this_input_for_startcodons(dna_sequence_object?)
|
1126
|
-
erev ' start end width'
|
1127
|
-
array.each_with_index {|inner_array, index|
|
1128
|
-
index += 1
|
1129
|
-
start_position = inner_array.first
|
1130
|
-
codon = inner_array.last.first
|
1131
|
-
erev ' ['+index.to_s+'] '+start_position.to_s.rjust(5)+' '+
|
1132
|
-
(start_position+2).to_s.rjust(5)+' '+'3'.rjust(4)+' ['+codon.downcase+']'
|
1133
|
-
}
|
1134
|
-
end
|
1135
|
-
|
1136
|
-
require 'bioroebe/toplevel_methods/aminoacids_and_proteins.rb'
|
1137
|
-
# ========================================================================= #
|
1138
|
-
# === show_possible_phosphorylation_sites
|
1139
|
-
#
|
1140
|
-
# This method will find all possible phosphorylation sites in any
|
1141
|
-
# given target sequence. It will also identify the aminoacids that
|
1142
|
-
# can be phosphorylated.
|
1143
|
-
#
|
1144
|
-
# To test this, try:
|
1145
|
-
#
|
1146
|
-
# random 250; P?
|
1147
|
-
#
|
1148
|
-
# ========================================================================= #
|
1149
|
-
def show_possible_phosphorylation_sites(i = aminoacid_sequence?)
|
1150
|
-
_ = dna_sequence_object?
|
1151
|
-
array_all_codons = []
|
1152
|
-
array_all_codons << ::Bioroebe.codons_for?(:serine)
|
1153
|
-
array_all_codons << ::Bioroebe.codons_for?(:tyrosine)
|
1154
|
-
array_all_codons << ::Bioroebe.codons_for?(:threonine)
|
1155
|
-
array_all_codons.flatten!
|
1156
|
-
# ======================================================================= #
|
1157
|
-
# === Convert Y into Purine/Pyrimidine next
|
1158
|
-
# ======================================================================= #
|
1159
|
-
if array_all_codons.any? {|entry| entry.end_with? 'Y' }
|
1160
|
-
array_all_codons.map! {|inner_entry|
|
1161
|
-
if inner_entry.end_with? 'Y'
|
1162
|
-
inner_entry = [
|
1163
|
-
inner_entry.sub(/Y$/,'T'),
|
1164
|
-
inner_entry.sub(/Y$/,'C')
|
1165
|
-
]
|
1166
|
-
end
|
1167
|
-
inner_entry
|
1168
|
-
}
|
1169
|
-
array_all_codons.flatten!
|
1170
|
-
end
|
1171
|
-
all_codons_found_in_the_sequence = []
|
1172
|
-
n_phosphorylation_sites = 0
|
1173
|
-
n_phosphorylation_sites =
|
1174
|
-
array_all_codons.map {|entry|
|
1175
|
-
if _.scan(/#{entry}/).size > 0
|
1176
|
-
all_codons_found_in_the_sequence << entry
|
1177
|
-
end
|
1178
|
-
_.scan(/#{entry}/).size }.inject(0){|sum, inner_element| sum + inner_element
|
1179
|
-
}
|
1180
|
-
all_codons_found_in_the_sequence.uniq!
|
1181
|
-
singular_or_plural = 'site'
|
1182
|
-
if n_phosphorylation_sites < 1
|
1183
|
-
singular_or_plural << 's'
|
1184
|
-
end
|
1185
|
-
erev 'In this sequence, we have found '+simp(n_phosphorylation_sites.to_s)+rev+
|
1186
|
-
' possible phosphorylation '+singular_or_plural+', using all '\
|
1187
|
-
'3 possible frames.'
|
1188
|
-
e
|
1189
|
-
erev 'In particular, these '+all_codons_found_in_the_sequence.size.to_s+
|
1190
|
-
' different codons were found: '
|
1191
|
-
e
|
1192
|
-
erev ' '+simp(all_codons_found_in_the_sequence.join('/'))+rev
|
1193
|
-
e
|
1194
|
-
erev 'For the first frame, the start positions are these:'
|
1195
|
-
e
|
1196
|
-
# ======================================================================= #
|
1197
|
-
# === Find the start positions for frame 1 next
|
1198
|
-
# ======================================================================= #
|
1199
|
-
array_start_positions_for_frame_1 = []
|
1200
|
-
scanned_result = _.scan(/.../)
|
1201
|
-
scanned_result.each_with_index {|codon, index|
|
1202
|
-
if all_codons_found_in_the_sequence.include? codon
|
1203
|
-
array_start_positions_for_frame_1 << (index * 3)+1
|
1204
|
-
end
|
1205
|
-
}
|
1206
|
-
erev ' DNA: '+simp(array_start_positions_for_frame_1.join('/'))+rev
|
1207
|
-
erev ' Protein: '+simp(array_start_positions_for_frame_1.map {|entry|
|
1208
|
-
entry = entry.to_i * 3
|
1209
|
-
entry.to_s
|
1210
|
-
}.join('/'))+rev
|
1211
|
-
# ======================================================================= #
|
1212
|
-
# Now modify the DNA sequence there but only in the first frame.
|
1213
|
-
# ======================================================================= #
|
1214
|
-
new_colourized_dna_sequence = ''
|
1215
|
-
all_triplets = _.scan(/.../)
|
1216
|
-
all_triplets.each {|codon|
|
1217
|
-
codon = swarn(codon) if all_codons_found_in_the_sequence.include? codon
|
1218
|
-
new_colourized_dna_sequence << codon+rev
|
1219
|
-
}
|
1220
|
-
e
|
1221
|
-
erev 'The DNA sequence with possible phosphorylation sites is:'
|
1222
|
-
e
|
1223
|
-
erev left_padding?+leading_five_prime+new_colourized_dna_sequence+trailing_three_prime
|
1224
|
-
e
|
1225
|
-
erev 'The Aminoacid sequence with possible phosphorylation sites is:'
|
1226
|
-
e
|
1227
|
-
erev ' '+
|
1228
|
-
::Bioroebe.colourize_aa(i, ARRAY_AMINOACIDS_THAT_CAN_BE_PHOSPHORYLATED).to_s
|
1229
|
-
e
|
1230
|
-
end
|
1231
|
-
|
1232
|
-
# ========================================================================= #
|
1233
|
-
# === show_molweight
|
1234
|
-
# ========================================================================= #
|
1235
|
-
def show_molweight(use_cliner = true)
|
1236
|
-
cliner if use_cliner
|
1237
|
-
MolecularWeightOfNucleotides.weights.each_with_index {|entry, index|
|
1238
|
-
case index
|
1239
|
-
when 0
|
1240
|
-
erev 'Adenine: '+sfancy(entry.to_s)+rev
|
1241
|
-
when 1
|
1242
|
-
erev 'Thymine: '+sfancy(entry.to_s)+rev
|
1243
|
-
when 2
|
1244
|
-
erev 'Guanine: '+sfancy(entry.to_s)+rev
|
1245
|
-
when 3
|
1246
|
-
erev 'Cytosine: '+sfancy(entry.to_s)+rev
|
1247
|
-
end
|
1248
|
-
}; cliner if use_cliner
|
1249
|
-
end
|
1250
|
-
|
1251
|
-
# ========================================================================= #
|
1252
|
-
# === show_weight_of_this_nucleotide
|
1253
|
-
#
|
1254
|
-
# Use this method to show the total weight of a specific nucleotide.
|
1255
|
-
#
|
1256
|
-
# Usage examples:
|
1257
|
-
#
|
1258
|
-
# weight? U
|
1259
|
-
# weight? T
|
1260
|
-
# weight? Adenine
|
1261
|
-
#
|
1262
|
-
# ========================================================================= #
|
1263
|
-
def show_weight_of_this_nucleotide(i)
|
1264
|
-
i = i.to_s
|
1265
|
-
if i.empty?
|
1266
|
-
erev 'Please supply a nucleotide, such as "Adenine" or "A".'
|
1267
|
-
erev 'Note that the short variant is preferred.'
|
1268
|
-
return
|
1269
|
-
end
|
1270
|
-
i = i[0,1] if i.size > 1
|
1271
|
-
_ = FILE_NUCLEOTIDES_WEIGHT # bl /Users/x/DATA/SCIENCE/YAML/nucleotides_weight.yml
|
1272
|
-
if File.exist?(_)
|
1273
|
-
_ = YAML.load_file(_)
|
1274
|
-
dataset = {}
|
1275
|
-
_.each_pair {|key, value|
|
1276
|
-
dataset[key[0,1]] = value
|
1277
|
-
}
|
1278
|
-
if dataset.has_key?(i)
|
1279
|
-
erev 'The weight of '+sfancy(i)+rev+' is: '+
|
1280
|
-
sfancy(
|
1281
|
-
ChemistryParadise.atomic_mass_of(dataset[i])
|
1282
|
-
)
|
1283
|
-
else
|
1284
|
-
erev 'The key `'+sfancy(i)+rev+'` was not found.'
|
1285
|
-
end
|
1286
|
-
else
|
1287
|
-
ewarn 'We did not find a required file at '+sfile(_)+rev+'.'
|
1288
|
-
end
|
1289
|
-
end
|
1290
|
-
|
1291
|
-
# ========================================================================= #
|
1292
|
-
# === show_todo_file
|
1293
|
-
# ========================================================================= #
|
1294
|
-
def show_todo_file
|
1295
|
-
cat '$RUBY_SRC/bioroebe/doc/TODO_FOR_THE_BIOROEBE_PROJECT.md'
|
1296
|
-
end
|
1297
|
-
|
1298
|
-
# ========================================================================= #
|
1299
|
-
# === report_where_the_pdf_tutorial_can_be_found
|
1300
|
-
#
|
1301
|
-
# Do notify the user where to find the .pdf tutorial.
|
1302
|
-
# ========================================================================= #
|
1303
|
-
def report_where_the_pdf_tutorial_can_be_found
|
1304
|
-
_ = File.basename(FILE_BIOROEBE_TUTORIAL)
|
1305
|
-
erev 'You can find the tutorial here:'
|
1306
|
-
e
|
1307
|
-
erev ' '+simp('http://shevegen.square7.ch/'+_)+rev
|
1308
|
-
e
|
1309
|
-
end
|
1310
|
-
|
1311
|
-
# ========================================================================= #
|
1312
|
-
# === show_directory_content
|
1313
|
-
# ========================================================================= #
|
1314
|
-
def show_directory_content(of_this_dir = '*')
|
1315
|
-
of_this_dir.prepend '*' unless of_this_dir.include? '*'
|
1316
|
-
cliner {
|
1317
|
-
Dir[of_this_dir].sort.each_with_index {|entry, index|
|
1318
|
-
index += 1
|
1319
|
-
entry << '/' if File.directory?(entry)
|
1320
|
-
erev index.to_s.rjust(2)+') '+entry
|
1321
|
-
}
|
1322
|
-
}
|
1323
|
-
end
|
1324
|
-
|
1325
|
-
require 'bioroebe/protein_structure/alpha_helix.rb'
|
1326
|
-
# ========================================================================= #
|
1327
|
-
# === show_length_of_alpha_helix
|
1328
|
-
# ========================================================================= #
|
1329
|
-
def show_length_of_alpha_helix(i)
|
1330
|
-
erev ::Bioroebe::AlphaHelix.length?(i)
|
1331
|
-
end
|
1332
|
-
|
1333
|
-
# ========================================================================= #
|
1334
|
-
# === show_and_calculate_weight_of_dna_string
|
1335
|
-
# ========================================================================= #
|
1336
|
-
def show_and_calculate_weight_of_dna_string(
|
1337
|
-
i = dna_sequence_object?
|
1338
|
-
)
|
1339
|
-
i = dna_sequence_object? if i.nil?
|
1340
|
-
i = dna_sequence_object? if is_a? Array and i.empty?
|
1341
|
-
sum = 0
|
1342
|
-
i.upcase.chars.each {|nucleotide|
|
1343
|
-
_ = case nucleotide
|
1344
|
-
when 'A'
|
1345
|
-
weight_of_adenin?
|
1346
|
-
when 'T'
|
1347
|
-
weight_of_thymin?
|
1348
|
-
when 'C'
|
1349
|
-
weight_of_cytosin?
|
1350
|
-
when 'G'
|
1351
|
-
weight_of_guanin?
|
1352
|
-
end
|
1353
|
-
sum += _.to_f
|
1354
|
-
}
|
1355
|
-
# ======================================================================= #
|
1356
|
-
# Round the sum properly here.
|
1357
|
-
# ======================================================================= #
|
1358
|
-
sum = sum.round(2)
|
1359
|
-
erev 'The weight of this nucleotide sequence is: '+
|
1360
|
-
simp(sum.to_s)+rev+' Dalton.'
|
1361
|
-
end
|
1362
|
-
|
1363
|
-
# ========================================================================= #
|
1364
|
-
# === show_name_of_the_gene
|
1365
|
-
# ========================================================================= #
|
1366
|
-
def show_name_of_the_gene
|
1367
|
-
erev 'The name of the gene at hand is: '+
|
1368
|
-
sfancy(sequence_object?.name_of_gene)
|
1369
|
-
end
|
1370
|
-
|
1371
|
-
# ========================================================================= #
|
1372
|
-
# === show_agarose_table
|
1373
|
-
#
|
1374
|
-
# This method will simply show common agarose concentrations.
|
1375
|
-
# ========================================================================= #
|
1376
|
-
def show_agarose_table
|
1377
|
-
hash = load_bioroebe_yaml_file(:agarose)
|
1378
|
-
e
|
1379
|
-
e 'Agarose concentrations:'
|
1380
|
-
e
|
1381
|
-
hash.each_pair {|concentration_of_the_gel, kb_fragment|
|
1382
|
-
erev ' A concentration of '+simp(concentration_of_the_gel.to_s+'%')+
|
1383
|
-
rev+' will separate DNA fragments between '+sfancy(kb_fragment)+
|
1384
|
-
rev+' kb.'
|
1385
|
-
}; e
|
1386
|
-
end
|
1387
|
-
|
1388
|
-
# ========================================================================= #
|
1389
|
-
# === start_codon?
|
1390
|
-
# ========================================================================= #
|
1391
|
-
def start_codon?
|
1392
|
-
::Bioroebe.start_codon?
|
1393
|
-
end
|
1394
|
-
|
1395
|
-
# ========================================================================= #
|
1396
|
-
# === stop_codons?
|
1397
|
-
# ========================================================================= #
|
1398
|
-
def stop_codons?
|
1399
|
-
::Bioroebe.stop_codons?
|
1400
|
-
end
|
1401
|
-
|
1402
|
-
# ========================================================================= #
|
1403
|
-
# === show_all_dmp_files
|
1404
|
-
#
|
1405
|
-
# Show all .dmp files here.
|
1406
|
-
# ========================================================================= #
|
1407
|
-
def show_all_dmp_files
|
1408
|
-
show_directory_content('.dmp')
|
1409
|
-
end
|
1410
|
-
|
1411
|
-
# ========================================================================= #
|
1412
|
-
# === show_and_calculate_weight_of_dna_string_or_aminoacid_sequence
|
1413
|
-
# ========================================================================= #
|
1414
|
-
def show_and_calculate_weight_of_dna_string_or_aminoacid_sequence(
|
1415
|
-
i = dna_sequence_object?
|
1416
|
-
)
|
1417
|
-
if i.nil?
|
1418
|
-
if dna_sequence_object?
|
1419
|
-
i = dna_sequence_object?
|
1420
|
-
end
|
1421
|
-
end
|
1422
|
-
# ======================================================================= #
|
1423
|
-
# First, we check if the input is an aminoacid-sequence.
|
1424
|
-
# ======================================================================= #
|
1425
|
-
if ::Bioroebe.is_aminoacid?(i)
|
1426
|
-
reverse = AMINO_ACIDS_ENGLISH.reverse
|
1427
|
-
i = reverse[i] # Replace it with the one-letter code next.
|
1428
|
-
# ===================================================================== #
|
1429
|
-
# Obtain the mass of this aminoacid.
|
1430
|
-
# ===================================================================== #
|
1431
|
-
i = AMINO_ACIDS_AVERAGE_MASS_TABLE[i]
|
1432
|
-
erev 'The weight of this aminoacid is: '+
|
1433
|
-
simp(i.to_s)+rev+' Dalton.'
|
1434
|
-
else
|
1435
|
-
show_and_calculate_weight_of_dna_string(i)
|
1436
|
-
end
|
1437
|
-
end
|
1438
|
-
|
1439
|
-
# ========================================================================= #
|
1440
|
-
# === show_t_phages
|
1441
|
-
# ========================================================================= #
|
1442
|
-
def show_t_phages
|
1443
|
-
dataset = YAML.load_file(
|
1444
|
-
::Bioroebe.yaml_dir?+'viruses/ecoli_phages.yml'
|
1445
|
-
)
|
1446
|
-
# ======================================================================= #
|
1447
|
-
# Next, display that as a table.
|
1448
|
-
# ======================================================================= #
|
1449
|
-
erev 'Name of Phage | Plaque Size | Head diameter | tail length | latent period | burst size'
|
1450
|
-
cliner length: 88
|
1451
|
-
dataset.each_pair {|name_of_phage, value|
|
1452
|
-
print '|',name_of_phage.to_s.center(13),'|'
|
1453
|
-
# ===================================================================== #
|
1454
|
-
# Display the plague size next, aka small, medium or large.
|
1455
|
-
# ===================================================================== #
|
1456
|
-
plaque_size = value['plaque_size']
|
1457
|
-
print plaque_size.to_s.center(13),'|'
|
1458
|
-
head = value['head']
|
1459
|
-
print head.to_s.center(15),'|'
|
1460
|
-
tail = value['tail']
|
1461
|
-
print tail.to_s.center(13),'|'
|
1462
|
-
# ===================================================================== #
|
1463
|
-
# Display the latent period.
|
1464
|
-
# ===================================================================== #
|
1465
|
-
latent_period = value['latent_period']
|
1466
|
-
print latent_period.to_s.center(15),'|'
|
1467
|
-
burst_size = value['burst_size']
|
1468
|
-
print burst_size.to_s.center(12),'|'
|
1469
|
-
e
|
1470
|
-
cliner length: 88
|
1471
|
-
}
|
1472
|
-
end
|
1473
|
-
|
1474
|
-
# ========================================================================= #
|
1475
|
-
# === show_html_colours
|
1476
|
-
# ========================================================================= #
|
1477
|
-
def show_html_colours
|
1478
|
-
e 'The available HTML colours are:'; e
|
1479
|
-
::Colours.show_html_colours; e
|
1480
|
-
end
|
1481
|
-
|
1482
|
-
# ========================================================================= #
|
1483
|
-
# === show_restriction_table
|
1484
|
-
#
|
1485
|
-
# This method will show a restriction table, that is, a table with
|
1486
|
-
# some different restriction enzymes.
|
1487
|
-
#
|
1488
|
-
# To invoke this method, do:
|
1489
|
-
#
|
1490
|
-
# show_restriction_table
|
1491
|
-
#
|
1492
|
-
# ========================================================================= #
|
1493
|
-
def show_restriction_table
|
1494
|
-
most_ljust = 20
|
1495
|
-
erev 'Showing a few different cutters (4,5,6,7,8) in table format next:'
|
1496
|
-
erev '---------------------------------------------------------'
|
1497
|
-
e peru(' 4-cutter'.ljust(most_ljust))+' | '+orange('ChaI'.ljust(10))+' | '+
|
1498
|
-
olivedrab('GATC'.ljust(10))
|
1499
|
-
e peru(' 5-cutter'.ljust(most_ljust))+' | '+orange('FmuI'.ljust(10))+' | '+
|
1500
|
-
olivedrab('GGNCC'.ljust(10))
|
1501
|
-
e peru(' 6-cutter'.ljust(most_ljust))+' | '+orange('EcoRI'.ljust(10))+' | '+
|
1502
|
-
olivedrab('GAATTC'.ljust(10))
|
1503
|
-
e peru(' 7-cutter'.ljust(most_ljust))+' | '+orange('PfoI'.ljust(10))+' | '+
|
1504
|
-
olivedrab('TCCNGGA'.ljust(10))
|
1505
|
-
e peru(' 8-cutter'.ljust(most_ljust))+' | '+orange('PacI'.ljust(10))+' | '+
|
1506
|
-
olivedrab('TTAATTAA'.ljust(10))
|
1507
|
-
erev '---------------------------------------------------------'
|
1508
|
-
end
|
1509
|
-
|
1510
|
-
# ========================================================================= #
|
1511
|
-
# === show_numbered_nucleotide_positions
|
1512
|
-
#
|
1513
|
-
# This method will show "numbered" nucleotide positions such as:
|
1514
|
-
#
|
1515
|
-
# 1234567891234567891234567
|
1516
|
-
# ATGCAGGTCATCAGTCAGTCAGTCA
|
1517
|
-
#
|
1518
|
-
# ========================================================================= #
|
1519
|
-
def show_numbered_nucleotide_positions
|
1520
|
-
_ = sequence?.string?
|
1521
|
-
chars = _.chars
|
1522
|
-
chunk = chars.each_slice(40)
|
1523
|
-
chunked = chunk.map {|line| line.join }
|
1524
|
-
chunked.each {|line|
|
1525
|
-
chars = line.chars
|
1526
|
-
upper_strand = ''.dup
|
1527
|
-
counter = 0
|
1528
|
-
chars.each {|char| counter += 1
|
1529
|
-
if counter > 9
|
1530
|
-
counter = 0
|
1531
|
-
end
|
1532
|
-
upper_strand << counter.to_s
|
1533
|
-
}
|
1534
|
-
e lightsteelblue(upper_strand)
|
1535
|
-
erev line
|
1536
|
-
}
|
1537
|
-
end
|
1538
|
-
|
1539
|
-
# ========================================================================= #
|
1540
|
-
# === show_fastq_quality_score_table
|
1541
|
-
# ========================================================================= #
|
1542
|
-
def show_fastq_quality_score_table
|
1543
|
-
_ = FILE_FASTQ_QUALITY_SCHEMES
|
1544
|
-
if File.exist? _
|
1545
|
-
dataset = YAML.load_file(_)
|
1546
|
-
keys = dataset.keys
|
1547
|
-
keys.each {|this_key|
|
1548
|
-
e sfancy(this_key+':')
|
1549
|
-
e
|
1550
|
-
inner_dataset = dataset[this_key]
|
1551
|
-
erev ' Ascii character range: '+
|
1552
|
-
seagreen(inner_dataset['ascii_character_range'].to_s)
|
1553
|
-
erev ' Offset: '+
|
1554
|
-
seagreen(inner_dataset['offset'].to_s)
|
1555
|
-
erev ' Quality score type: '+
|
1556
|
-
seagreen(inner_dataset['quality_score_type'].to_s)
|
1557
|
-
erev ' Quality score range: '+
|
1558
|
-
seagreen(inner_dataset['quality_score_range'].to_s)
|
1559
|
-
e
|
1560
|
-
}; e
|
1561
|
-
end
|
1562
|
-
end
|
1563
|
-
|
1564
|
-
# ========================================================================= #
|
1565
|
-
# === report_the_protein_weight
|
1566
|
-
# ========================================================================= #
|
1567
|
-
def report_the_protein_weight
|
1568
|
-
_ = aminoacid_sequence?
|
1569
|
-
if _.include? '*'
|
1570
|
-
erev 'Note that this aminoacid sequence has a stop codon, denoted by the *:'
|
1571
|
-
e
|
1572
|
-
erev ' '+sfancy(_)+rev
|
1573
|
-
e
|
1574
|
-
erev 'Since a stop codon is not translated into an aminoacid'
|
1575
|
-
erev 'it makes little sense to include it into the weight-calculation.'
|
1576
|
-
erev 'Thus, we will use only the part up to the first * token.'
|
1577
|
-
_ = _[0 .. (_.index('*') - 1)]
|
1578
|
-
end
|
1579
|
-
sum = ::Bioroebe.amino_acid_average_mass(_)
|
1580
|
-
e 'The total weight of these '+simp(_.size.to_s)+rev+
|
1581
|
-
' aminoacids is: '+sfancy(sum.to_f.round(2).to_s)+rev+
|
1582
|
-
' Dalton'
|
1583
|
-
end
|
1584
|
-
|
1585
|
-
# ========================================================================= #
|
1586
|
-
# === report_all_stop_codons
|
1587
|
-
#
|
1588
|
-
# This method will report all stop codons in the given sequence.
|
1589
|
-
#
|
1590
|
-
# We will not modify the input given to this method.
|
1591
|
-
#
|
1592
|
-
# The three stop codons, in RNA, are:
|
1593
|
-
#
|
1594
|
-
# UGA
|
1595
|
-
# UAG
|
1596
|
-
# UAA
|
1597
|
-
#
|
1598
|
-
# ========================================================================= #
|
1599
|
-
def report_all_stop_codons(
|
1600
|
-
i = dna_sequence_object?
|
1601
|
-
)
|
1602
|
-
i.upcase!
|
1603
|
-
erev 'Our input sequence has '+simp(i.size.to_s)+rev+' nucleotides.'
|
1604
|
-
n_UGA = 'UGA'
|
1605
|
-
n_UGA = 'TGA' if is_dna?
|
1606
|
-
erev 'We did find '+
|
1607
|
-
simp(
|
1608
|
-
i.scan(/#{n_UGA}/
|
1609
|
-
).size.to_s.rjust(2))+rev+' '+n_UGA+' stop codons.'
|
1610
|
-
n_UAG = 'UAG'
|
1611
|
-
n_UAG = 'TAG' if is_dna?
|
1612
|
-
erev 'We did find '+
|
1613
|
-
simp(i.scan(/#{n_UAG}/).size.to_s.rjust(2))+rev+' '+n_UAG+' stop codons.'
|
1614
|
-
n_UAA = 'UAA'
|
1615
|
-
n_UAA = 'TAA' if is_dna?
|
1616
|
-
erev 'We did find '+
|
1617
|
-
simp(i.scan(/#{n_UAA}/).size.to_s.rjust(2))+rev+' '+n_UAA+' stop codons.'
|
1618
|
-
end
|
1619
|
-
|
1620
|
-
# ========================================================================= #
|
1621
|
-
# === determine_and_report_all_stop_codons
|
1622
|
-
# ========================================================================= #
|
1623
|
-
def determine_and_report_all_stop_codons
|
1624
|
-
dna_sequence = dna_sequence_object?
|
1625
|
-
erev 'Because 3 different stop codons exist, we have '\
|
1626
|
-
'to do '+slateblue('3 runs')+rev+'.'
|
1627
|
-
stop_codons?.each {|this_stop_codon|
|
1628
|
-
array_matches = ::Bioroebe.return_all_substring_matches(
|
1629
|
-
dna_sequence, this_stop_codon
|
1630
|
-
)
|
1631
|
-
if array_matches.empty?
|
1632
|
-
erev 'No match has been found.'
|
1633
|
-
else
|
1634
|
-
start_position = array_matches.last.first
|
1635
|
-
erev 'For the stop codon '+sfancy(this_stop_codon)+rev+' the last codon'
|
1636
|
-
erev 'occurrs at position '+simp(start_position.to_s)+rev+'.'
|
1637
|
-
end
|
1638
|
-
}
|
1639
|
-
end
|
1640
|
-
|
1641
|
-
# ========================================================================= #
|
1642
|
-
# === show_seq_1
|
1643
|
-
# ========================================================================= #
|
1644
|
-
def show_seq_1(i = seq1?)
|
1645
|
-
erev padding?+leading_five_prime+
|
1646
|
-
sfancy(i)+rev+trailing_three_prime
|
1647
|
-
end
|
1648
|
-
|
1649
|
-
# ========================================================================= #
|
1650
|
-
# === show_seq_2
|
1651
|
-
# ========================================================================= #
|
1652
|
-
def show_seq_2(i = seq2?)
|
1653
|
-
erev padding?+leading_five_prime+
|
1654
|
-
sfancy(i)+rev+trailing_three_prime
|
1655
|
-
end
|
1656
|
-
|
1657
|
-
# ========================================================================= #
|
1658
|
-
# === show_seq_3
|
1659
|
-
# ========================================================================= #
|
1660
|
-
def show_seq_3(i = seq3?)
|
1661
|
-
erev padding?+leading_five_prime+
|
1662
|
-
sfancy(i)+rev+trailing_three_prime
|
1663
|
-
end
|
1664
|
-
|
1665
|
-
# ========================================================================= #
|
1666
|
-
# === show_seq_4
|
1667
|
-
# ========================================================================= #
|
1668
|
-
def show_seq_4
|
1669
|
-
erev padding?+leading_five_prime+sfancy(seq4?)+rev+trailing_three_prime
|
1670
|
-
end
|
1671
|
-
|
1672
|
-
# ========================================================================= #
|
1673
|
-
# === show_seq_5
|
1674
|
-
# ========================================================================= #
|
1675
|
-
def show_seq_5
|
1676
|
-
erev padding?+leading_five_prime+sfancy(seq5?)+rev+trailing_three_prime
|
1677
|
-
end
|
1678
|
-
|
1679
|
-
# ========================================================================= #
|
1680
|
-
# === show_seq_6
|
1681
|
-
# ========================================================================= #
|
1682
|
-
def show_seq_6
|
1683
|
-
erev padding?+leading_five_prime+sfancy(seq6?)+rev+trailing_three_prime
|
1684
|
-
end
|
1685
|
-
|
1686
|
-
# ========================================================================= #
|
1687
|
-
# === show_start_and_stop_codons
|
1688
|
-
#
|
1689
|
-
# This will show BOTH start and stop codons, in different colours.
|
1690
|
-
#
|
1691
|
-
# Since start codons may be more important, we will first locate
|
1692
|
-
# and colourize them, and afterwards, will also colourize the
|
1693
|
-
# stop codons.
|
1694
|
-
# ========================================================================= #
|
1695
|
-
def show_start_and_stop_codons
|
1696
|
-
_ = string?
|
1697
|
-
start_codon = ::Bioroebe.start_codon?
|
1698
|
-
stop_codons = ::Bioroebe.stop_codons?
|
1699
|
-
_.gsub!(/(#{start_codon})/, yellow+'\\1'+colour_for_nucleotide)
|
1700
|
-
stop_codons.each {|stop_codon|
|
1701
|
-
_.gsub!(/(#{stop_codon})/, salmon('\\1')+colour_for_nucleotide)
|
1702
|
-
}
|
1703
|
-
erev 'Start codon: '+yellow+start_codon+rev
|
1704
|
-
stop_codons = stop_codons.join(', ').strip
|
1705
|
-
stop_codons.chop! if stop_codons.end_with? ','
|
1706
|
-
# ======================================================================= #
|
1707
|
-
# Show the stop codons that we will use:
|
1708
|
-
# ======================================================================= #
|
1709
|
-
erev 'Stop codons: '+salmon(stop_codons)+rev
|
1710
|
-
erev dna_padding(_)
|
1711
|
-
end
|
1712
|
-
|
1713
|
-
# ========================================================================= #
|
1714
|
-
# === report_when_the_bioroebe_project_was_last_updated
|
1715
|
-
# ========================================================================= #
|
1716
|
-
def report_when_the_bioroebe_project_was_last_updated
|
1717
|
-
result = 'The Bioroebe-Project was last updated on: '+
|
1718
|
-
slateblue(LAST_UPDATE)+rev
|
1719
|
-
result = result.dup
|
1720
|
-
n_days_difference = ((Time.now - Time.parse(LAST_UPDATE))/60/60/24).round(2).to_s
|
1721
|
-
result << ' (~'+n_days_difference.to_s+' days ago)'
|
1722
|
-
erev result
|
1723
|
-
end
|
1724
|
-
|
1725
|
-
# ========================================================================= #
|
1726
|
-
# === show_information_about_the_gff_format
|
1727
|
-
# ========================================================================= #
|
1728
|
-
def show_information_about_the_gff_format
|
1729
|
-
erev 'Fields must be tab-separated in the .gff format.'
|
1730
|
-
e
|
1731
|
-
erev 'All but the final field in each feature line must'
|
1732
|
-
erev 'contain a value; "empty" columns should be denoted with a "."'
|
1733
|
-
e
|
1734
|
-
egold 'seqname:'
|
1735
|
-
erev 'This is the name of the chromosome or scaffold; chromosome names'
|
1736
|
-
erev 'can be given with or without the "chr" prefix.'
|
1737
|
-
erev 'Important note: the seqname must be one used within Ensembl, '
|
1738
|
-
erev 'i.e. a standard chromosome name or an Ensembl identifier such as a'
|
1739
|
-
erev 'scaffold ID, without any additional content such as species or'
|
1740
|
-
erev 'assembly. See the example GFF output below.'
|
1741
|
-
e
|
1742
|
-
egold 'source:'
|
1743
|
-
erev 'Name of the program that generated this feature, or '
|
1744
|
-
erev 'the data source (database or project name)'
|
1745
|
-
e
|
1746
|
-
egold 'feature:'
|
1747
|
-
erev 'feature type name, e.g. Gene, Variation, Similarity'
|
1748
|
-
e
|
1749
|
-
egold 'start:'
|
1750
|
-
erev 'Start position of the feature, with sequence numbering starting at 1.'
|
1751
|
-
e
|
1752
|
-
egold 'end:'
|
1753
|
-
erev 'End position of the feature, with sequence numbering '\
|
1754
|
-
'starting at 1.'
|
1755
|
-
e
|
1756
|
-
egold 'score:'
|
1757
|
-
erev 'A floating point value.'
|
1758
|
-
e
|
1759
|
-
egold 'strand:'
|
1760
|
-
erev 'defined as + (forward) or - (reverse).'
|
1761
|
-
e
|
1762
|
-
egold "frame:"
|
1763
|
-
erev " - One of '0', '1' or '2'. '0' indicates that the first base "
|
1764
|
-
erev "of the feature is the first base of a codon, '1' that the second "
|
1765
|
-
erev "base is the first base of a codon, and so on."
|
1766
|
-
e
|
1767
|
-
egold 'attribute:'
|
1768
|
-
erev 'A semicolon-separated list of tag-value pairs, providing '
|
1769
|
-
erev 'additional information about each feature.'
|
1770
|
-
e
|
1771
|
-
end
|
1772
|
-
|
1773
|
-
# ========================================================================= #
|
1774
|
-
# === show_header_of_this_pdb_file
|
1775
|
-
# ========================================================================= #
|
1776
|
-
def show_header_of_this_pdb_file(i)
|
1777
|
-
lines = File.readlines(i)
|
1778
|
-
first = lines.first.split(' ')[1..-1].join(' ').strip
|
1779
|
-
second = lines[1].split(' ')[1..-1].join(' ').strip
|
1780
|
-
erev first
|
1781
|
-
erev ' '+second
|
1782
|
-
end
|
1783
|
-
|
1784
|
-
# ========================================================================= #
|
1785
|
-
# === show_useful_URLs
|
1786
|
-
#
|
1787
|
-
# This method will simply show some important, bioinformatics related
|
1788
|
-
# URLs. In particular URLs that may be important for bioinformatics
|
1789
|
-
# related tasks, e. g. NCBI, GeneBank and so forth.
|
1790
|
-
# ========================================================================= #
|
1791
|
-
def show_useful_URLs
|
1792
|
-
e
|
1793
|
-
erev 'NCBI: '+sfancy(obtain_url_for(:ncbi))
|
1794
|
-
erev 'GenBank: '+sfancy(obtain_url_for(:genbank))
|
1795
|
-
erev 'PDB: '+sfancy(obtain_url_for(:pdb))
|
1796
|
-
erev 'Prosite: '+sfancy(obtain_url_for(:prosite))
|
1797
|
-
e
|
1798
|
-
end
|
1799
|
-
|
1800
|
-
# ========================================================================= #
|
1801
|
-
# === show_header_of
|
1802
|
-
# ========================================================================= #
|
1803
|
-
def show_header_of(i)
|
1804
|
-
if i.is_a? Array
|
1805
|
-
i.each {|entry| show_header_of(entry) }
|
1806
|
-
else
|
1807
|
-
unless File.exist? i
|
1808
|
-
erev "No file exists at `#{sfile(i)}#{rev}`."
|
1809
|
-
return
|
1810
|
-
end
|
1811
|
-
case i
|
1812
|
-
# ===================================================================== #
|
1813
|
-
# === .pdb
|
1814
|
-
# ===================================================================== #
|
1815
|
-
when /\.pdb$/
|
1816
|
-
show_header_of_this_pdb_file(i)
|
1817
|
-
end
|
1818
|
-
end
|
1819
|
-
end
|
1820
|
-
|
1821
|
-
# ========================================================================= #
|
1822
|
-
# === show_GFP_sequence (gfp tag)
|
1823
|
-
#
|
1824
|
-
# This method will show the GFP sequence, on the DNA level.
|
1825
|
-
# ========================================================================= #
|
1826
|
-
def show_GFP_sequence
|
1827
|
-
erev return_five_prime_header+
|
1828
|
-
return_default_GFP_sequence
|
1829
|
-
end
|
1830
|
-
|
1831
|
-
# ========================================================================= #
|
1832
|
-
# === return_default_GFP_sequence
|
1833
|
-
# ========================================================================= #
|
1834
|
-
def return_default_GFP_sequence(
|
1835
|
-
path_to_the_file = FILE_GFP_SEQUENCE
|
1836
|
-
)
|
1837
|
-
Fasta.new(path_to_the_file) { :be_quiet }.return_sequence
|
1838
|
-
end
|
1839
|
-
|
1840
|
-
# ========================================================================= #
|
1841
|
-
# === try_to_show_the_configuration
|
1842
|
-
# ========================================================================= #
|
1843
|
-
def try_to_show_the_configuration
|
1844
|
-
@config.show_config if @config.respond_to? :show_config
|
1845
|
-
_ = verbose_truth(use_expand_cd_aliases?)
|
1846
|
-
colourized_yes_or_no = simp(_.to_s)
|
1847
|
-
erev 'Will we use class Rcfiles::DirectoryAliases: '+
|
1848
|
-
colourized_yes_or_no
|
1849
|
-
end
|
1850
|
-
|
1851
|
-
require 'bioroebe/aminoacids/aminoacids_mass_table.rb'
|
1852
|
-
# ========================================================================= #
|
1853
|
-
# === show_aminoacids_mass_table
|
1854
|
-
#
|
1855
|
-
# This shows the weight of the aminoacids, in a table-layout.
|
1856
|
-
# ========================================================================= #
|
1857
|
-
def show_aminoacids_mass_table
|
1858
|
-
AminoacidsMassTable.report_which_file_is_used
|
1859
|
-
AminoacidsMassTable.show(padding?) # bl aminoacids_mass_table.rb
|
1860
|
-
end; alias aminoacid_table_overview show_aminoacids_mass_table # === show_aminoacids_mass_table
|
1861
|
-
|
1862
|
-
require 'bioroebe/utility_scripts/pathways.rb'
|
1863
|
-
# ========================================================================= #
|
1864
|
-
# === show_all_pathways
|
1865
|
-
#
|
1866
|
-
# Simply show all Pathways.
|
1867
|
-
# ========================================================================= #
|
1868
|
-
def show_all_pathways
|
1869
|
-
::Bioroebe::Pathways.show_all_pathways
|
1870
|
-
end
|
1871
|
-
|
1872
|
-
# ========================================================================= #
|
1873
|
-
# === show_sequence_in_splitted_form
|
1874
|
-
#
|
1875
|
-
# We will show the main DNA sequence in a three-letter splitted form.
|
1876
|
-
#
|
1877
|
-
# You can optionally use an argument, the first argument, a number. By
|
1878
|
-
# default this is 3, so we will split into chunks of 3.
|
1879
|
-
#
|
1880
|
-
# The second argument says which token we will use for rejoining. It
|
1881
|
-
# defaults to ' ' so the nucleotides will be rejoined via ' ', but
|
1882
|
-
# you can also use another token such as '-', which may lead to a
|
1883
|
-
# String such as 'ATG-CGA-ACC' and so forth.
|
1884
|
-
# ========================================================================= #
|
1885
|
-
def show_sequence_in_splitted_form(
|
1886
|
-
how_many = 3,
|
1887
|
-
use_this_token_for_rejoining = ' ' # <- Which token to use for the re-joining action.
|
1888
|
-
)
|
1889
|
-
case how_many
|
1890
|
-
when nil, :default # Use a default value here.
|
1891
|
-
how_many = 3
|
1892
|
-
end
|
1893
|
-
result = '.' * how_many.to_i
|
1894
|
-
use_this_regex = /#{result}/
|
1895
|
-
if string?.empty?
|
1896
|
-
erev 'Please first "assign" a sequence.'
|
1897
|
-
else
|
1898
|
-
if block_given?
|
1899
|
-
yielded = yield
|
1900
|
-
if yielded.is_a? Hash
|
1901
|
-
# ================================================================= #
|
1902
|
-
# === :use_this_token
|
1903
|
-
# ================================================================= #
|
1904
|
-
if yielded.has_key? :use_this_token
|
1905
|
-
use_this_token_for_rejoining = yielded.delete(:use_this_token)
|
1906
|
-
end
|
1907
|
-
end
|
1908
|
-
end
|
1909
|
-
string = string?.to_s
|
1910
|
-
scanned = string.scan(use_this_regex)
|
1911
|
-
scanned.map! {|entry|
|
1912
|
-
# =================================================================== #
|
1913
|
-
# Colourize start codons next.
|
1914
|
-
# =================================================================== #
|
1915
|
-
if is_this_a_start_codon? entry
|
1916
|
-
entry = mediumseagreen(entry)+
|
1917
|
-
return_colour_for_nucleotides
|
1918
|
-
elsif is_this_a_stop_codon? entry
|
1919
|
-
entry = mediumorchid(entry)+
|
1920
|
-
return_colour_for_nucleotides
|
1921
|
-
end
|
1922
|
-
entry
|
1923
|
-
}
|
1924
|
-
_ = scanned.join(use_this_token_for_rejoining)
|
1925
|
-
# ===================================================================== #
|
1926
|
-
# Finally show the sequence.
|
1927
|
-
# ===================================================================== #
|
1928
|
-
erev left_padding?+
|
1929
|
-
five_prime+
|
1930
|
-
return_colour_for_nucleotides+
|
1931
|
-
_+
|
1932
|
-
rev+
|
1933
|
-
three_prime
|
1934
|
-
end
|
1935
|
-
end
|
1936
|
-
|
1937
|
-
# ========================================================================= #
|
1938
|
-
# === show_disulfides
|
1939
|
-
#
|
1940
|
-
# Show the (possible) disulfide positions in a protein.
|
1941
|
-
# ========================================================================= #
|
1942
|
-
def show_disulfides
|
1943
|
-
_ = aminoacid_sequence?
|
1944
|
-
if _.include? 'C'
|
1945
|
-
n_cytosines = _.count('C')
|
1946
|
-
erev "This aminoacid sequence has #{steelblue(n_cytosines.to_s)}#{rev} cysteines."
|
1947
|
-
if n_cytosines > 1
|
1948
|
-
erev 'Thus, there could be disulfide bonds. '+
|
1949
|
-
gold(cheerful_person)+rev
|
1950
|
-
show_sequence_with_a_ruler(:default, _)
|
1951
|
-
erev 'The positions of cysteines are at:'
|
1952
|
-
_.chars.each_with_index {|aminoacid, index|
|
1953
|
-
if aminoacid == 'C'
|
1954
|
-
erev 'Position: '+steelblue((index+1).to_s.rjust(3))
|
1955
|
-
end
|
1956
|
-
}
|
1957
|
-
end
|
1958
|
-
else
|
1959
|
-
e 'This aminoacid sequence has no cystein. Thus, '\
|
1960
|
-
'there can not be any disulfide bonds.'
|
1961
|
-
end
|
1962
|
-
end
|
1963
|
-
|
1964
|
-
# ========================================================================= #
|
1965
|
-
# === show_aminoacids_residues
|
1966
|
-
# ========================================================================= #
|
1967
|
-
def show_aminoacids_residues
|
1968
|
-
erev 'The aminoacid residues are:'; e
|
1969
|
-
ENGLISH_LONG_NAMES_FOR_THE_AMINO_ACIDS.each {|this_aminoacid|
|
1970
|
-
erev this_aminoacid.ljust(14)+': '+
|
1971
|
-
simp(AMINO_ACIDS_RESTE[this_aminoacid.downcase]) # Must downcase.
|
1972
|
-
}; e
|
1973
|
-
end
|
1974
|
-
|
1975
|
-
# ========================================================================= #
|
1976
|
-
# === show_hint_how_to_use_the_local_sequences
|
1977
|
-
#
|
1978
|
-
# Show a hint for the user.
|
1979
|
-
# ========================================================================= #
|
1980
|
-
def show_hint_how_to_use_the_local_sequences
|
1981
|
-
unless return_fasta_files_in_the_log_directory.empty?
|
1982
|
-
erev 'You can load up any of these sequences by issuing:'
|
1983
|
-
e
|
1984
|
-
erev ' use_this_fasta 1 # for file number 1'
|
1985
|
-
e
|
1986
|
-
end
|
1987
|
-
end
|
1988
|
-
|
1989
|
-
# ========================================================================= #
|
1990
|
-
# === colour_for_stop_codon
|
1991
|
-
# ========================================================================= #
|
1992
|
-
def colour_for_stop_codon(i)
|
1993
|
-
orange(i)
|
1994
|
-
end
|
1995
|
-
|
1996
|
-
# ========================================================================= #
|
1997
|
-
# === colour_for_nucleotide
|
1998
|
-
# ========================================================================= #
|
1999
|
-
def colour_for_nucleotide(i = '')
|
2000
|
-
royalblue(i)
|
2001
|
-
end; alias colour_for_nucleotides colour_for_nucleotide # === colour_for_nucleotides
|
2002
|
-
|
2003
|
-
# ========================================================================= #
|
2004
|
-
# === report_this_dna_sequence_with_proper_trailer_and_leader
|
2005
|
-
# ========================================================================= #
|
2006
|
-
def report_this_dna_sequence_with_proper_trailer_and_leader(i)
|
2007
|
-
i = i.to_s
|
2008
|
-
if block_given?
|
2009
|
-
yielded = yield
|
2010
|
-
case yielded
|
2011
|
-
when :try_to_colourize_start_codon
|
2012
|
-
# =================================================================== #
|
2013
|
-
# We will try to colourize the start codon here.
|
2014
|
-
# =================================================================== #
|
2015
|
-
if i.start_with? start_codon?
|
2016
|
-
i[0,3] = cyan(i[0,3])+return_colour_for_nucleotides
|
2017
|
-
end
|
2018
|
-
end
|
2019
|
-
end
|
2020
|
-
colourized_dna_sequence = colourize_this_dna_sequence(i)
|
2021
|
-
colourized_dna_sequence = remove_trailing_escape_code(
|
2022
|
-
colourized_dna_sequence
|
2023
|
-
)
|
2024
|
-
erev left_pad?+
|
2025
|
-
leading_5_prime+
|
2026
|
-
colourized_dna_sequence+
|
2027
|
-
rev+
|
2028
|
-
trailing_3_prime
|
2029
|
-
end
|
2030
|
-
|
2031
|
-
# ========================================================================= #
|
2032
|
-
# === show_hydropathy_table
|
2033
|
-
#
|
2034
|
-
# Show the hydropathy table.
|
2035
|
-
# ========================================================================= #
|
2036
|
-
def show_hydropathy_table
|
2037
|
-
e
|
2038
|
-
HYDROPATHY_TABLE.each_pair {|aminoacid_one_letter, hydropathy_value|
|
2039
|
-
e ' '+sfancy(aminoacid_one_letter)+' | '+
|
2040
|
-
simp(hydropathy_value.to_s.rjust(4))
|
2041
|
-
}; e
|
2042
|
-
end
|
2043
|
-
|
2044
|
-
# ========================================================================= #
|
2045
|
-
# === show_known_nls_sequences
|
2046
|
-
#
|
2047
|
-
# This Wikipedia page may be useful:
|
2048
|
-
# http://en.wikipedia.org/wiki/Nuclear_localization_sequence
|
2049
|
-
# ========================================================================= #
|
2050
|
-
def show_known_nls_sequences
|
2051
|
-
erev 'These NLS sequences are known:'+N+N
|
2052
|
-
padding = 36
|
2053
|
-
NUCLEAR_LOCALIZATION_SEQUENCES.each_pair {|key, value|
|
2054
|
-
e sfancy(key.ljust(padding))+' '+value
|
2055
|
-
}
|
2056
|
-
end
|
2057
|
-
|
2058
|
-
# ========================================================================= #
|
2059
|
-
# === report_mode
|
2060
|
-
# ========================================================================= #
|
2061
|
-
def report_mode
|
2062
|
-
erev mode?
|
2063
|
-
end
|
2064
|
-
|
2065
|
-
# ========================================================================= #
|
2066
|
-
# === show_reste
|
2067
|
-
#
|
2068
|
-
# This will show the residues of the various amino acids.
|
2069
|
-
# ========================================================================= #
|
2070
|
-
def show_reste
|
2071
|
-
e; AMINO_ACIDS_RESTE.each_pair {|key, value|
|
2072
|
-
erev ' '+key.ljust(14)+' -> '+sfancy(value)
|
2073
|
-
}; e
|
2074
|
-
end
|
2075
|
-
|
2076
|
-
require 'bioroebe/string_matching/simple_string_comparer.rb'
|
2077
|
-
# ========================================================================= #
|
2078
|
-
# === show_sixpack_alignment
|
2079
|
-
#
|
2080
|
-
# We will feed some input to class Bioroebe::SimpleStringComparer.
|
2081
|
-
# ========================================================================= #
|
2082
|
-
def show_sixpack_alignment(
|
2083
|
-
i = dna_sequence_object?
|
2084
|
-
)
|
2085
|
-
erev 'Input sequence 1:'
|
2086
|
-
string1 = $stdin.gets.chomp
|
2087
|
-
erev 'Input sequence 2:'
|
2088
|
-
string2 = $stdin.gets.chomp
|
2089
|
-
# ======================================================================= #
|
2090
|
-
# Delegate into class SimpleStringComparer next.
|
2091
|
-
# ======================================================================= #
|
2092
|
-
_ = ::Bioroebe::SimpleStringComparer.new(:dont_run_yet) # bl $BIOROEBE/string_matching/simple_string_comparer.rb
|
2093
|
-
_.set_main_alignment_token_to '|'
|
2094
|
-
_.string1 = string1
|
2095
|
-
_.string2 = string2
|
2096
|
-
_.compare
|
2097
|
-
end
|
2098
|
-
|
2099
|
-
# ========================================================================= #
|
2100
|
-
# === show_average_weight_of_a_nucleotide
|
2101
|
-
#
|
2102
|
-
# The formulat was obtained from the following website:
|
2103
|
-
#
|
2104
|
-
# http://www.biophp.org/minitools/useful_formulas/demo.php
|
2105
|
-
#
|
2106
|
-
# ========================================================================= #
|
2107
|
-
def show_average_weight_of_a_nucleotide
|
2108
|
-
erev 'The average molecular weight (MW) of dsDNA is '+sfancy('660')+' Da.'
|
2109
|
-
erev 'The average molecular weight (MW) of ssDNA is '+sfancy('330')+' Da.'
|
2110
|
-
end
|
2111
|
-
|
2112
|
-
# ========================================================================= #
|
2113
|
-
# === show_config_dir
|
2114
|
-
#
|
2115
|
-
# This method will show the configuration directory.
|
2116
|
-
# ========================================================================= #
|
2117
|
-
def show_config_dir
|
2118
|
-
config_dir = File.dirname(__FILE__)+'/configuration/'
|
2119
|
-
erev 'The configuration directory for the Bioroebe::Shell is at:'
|
2120
|
-
erev ' `'+sfile(config_dir)+rev+'`'
|
2121
|
-
end
|
2122
|
-
|
2123
|
-
# ========================================================================= #
|
2124
|
-
# === show_last_downloaded_file
|
2125
|
-
# ========================================================================= #
|
2126
|
-
def show_last_downloaded_file
|
2127
|
-
if @array_all_downloads.empty?
|
2128
|
-
erev 'We have not yet downloaded any file.'
|
2129
|
-
else
|
2130
|
-
erev 'The last downloaded data was: '+
|
2131
|
-
sfancy(@array_all_downloads.last)
|
2132
|
-
end
|
2133
|
-
end
|
2134
|
-
|
2135
|
-
# ========================================================================= #
|
2136
|
-
# === show_jumper_directories
|
2137
|
-
# ========================================================================= #
|
2138
|
-
def show_jumper_directories
|
2139
|
-
if @internal_hash[:array_jumper_directories].empty?
|
2140
|
-
erev 'No jumper directory has been assigned yet.'
|
2141
|
-
else
|
2142
|
-
erev 'The available jumper directories are:'
|
2143
|
-
pp @internal_hash[:array_jumper_directories]
|
2144
|
-
end
|
2145
|
-
end
|
2146
|
-
|
2147
|
-
# ========================================================================= #
|
2148
|
-
# === show_save_file
|
2149
|
-
# ========================================================================= #
|
2150
|
-
def show_save_file
|
2151
|
-
erev 'We will store into the file '+sfile(save_file?)+rev+'.'
|
2152
|
-
erev 'If you wish to instead store into the current directory,'
|
2153
|
-
erev 'input "save_here".'
|
2154
|
-
end
|
2155
|
-
|
2156
|
-
# ========================================================================= #
|
2157
|
-
# === show_sigma_tutorial
|
2158
|
-
#
|
2159
|
-
# This method tells the user a bit about the sigma factors.
|
2160
|
-
# ========================================================================= #
|
2161
|
-
def show_sigma_tutorial
|
2162
|
-
erev 'This subsection contains some information about Sigmafactors.'
|
2163
|
-
e
|
2164
|
-
erev 'A sigma factor a protein needed for initiation of RNA synthesis.'
|
2165
|
-
e
|
2166
|
-
erev 'It is a bacterial transcription initiation factor.'
|
2167
|
-
e
|
2168
|
-
erev 'It will enable the specific binding of RNA polymerase to gene promoters.'
|
2169
|
-
e
|
2170
|
-
erev 'Sigma factors vary, which allows the bacterial cell to respond to'
|
2171
|
-
erev 'different environmental signals.'
|
2172
|
-
e
|
2173
|
-
erev 'Every molecule of RNA polymerase holoenzyme will contain only one '\
|
2174
|
-
'sigma factor.'
|
2175
|
-
e
|
2176
|
-
erev 'The number of sigma factors varies between bacterial species.'
|
2177
|
-
e
|
2178
|
-
erev 'E. coli has seven sigma factors.'
|
2179
|
-
e
|
2180
|
-
erev 'Sigma factors are distinguished by their characteristic molecular '\
|
2181
|
-
'weights.'
|
2182
|
-
e
|
2183
|
-
erev 'For instance, sigma-70 refers to the sigma factor with a molecular '\
|
2184
|
-
'weight of 70 kDa.'
|
2185
|
-
e
|
2186
|
-
erev 'Once initiation of RNA transcription is complete, the sigma'
|
2187
|
-
erev 'factor can leave the complex.'
|
2188
|
-
e
|
2189
|
-
erev 'Sigmafactor rpoD 70 can be found here:'
|
2190
|
-
e ' '+simp('http://www.ncbi.nlm.nih.gov/gene/947567')
|
2191
|
-
end
|
2192
|
-
|
2193
|
-
# ========================================================================= #
|
2194
|
-
# === show_last_input
|
2195
|
-
#
|
2196
|
-
# sli can be used as command to access this method.
|
2197
|
-
# ========================================================================= #
|
2198
|
-
def show_last_input
|
2199
|
-
if readline_is_available?
|
2200
|
-
e sfancy(Readline::HISTORY[-1])
|
2201
|
-
Readline::HISTORY.pop
|
2202
|
-
end
|
2203
|
-
e "The last user input was: #{sfancy(@user_input)}"
|
2204
|
-
end
|
2205
|
-
|
2206
|
-
# ========================================================================= #
|
2207
|
-
# === show_mnemo
|
2208
|
-
#
|
2209
|
-
# A little helper-method to memorize things.
|
2210
|
-
# ========================================================================= #
|
2211
|
-
def show_mnemo
|
2212
|
-
e
|
2213
|
-
erev 'Amino Acids with negatively charged side groups: -'
|
2214
|
-
e sfancy(' D E')
|
2215
|
-
erev 'Amino Acids with positive charged side groups: +'
|
2216
|
-
e sfancy(' K R H')
|
2217
|
-
e
|
2218
|
-
e sfancy('Oxidoreduktasen:')+rev+' Oxidations-Reduktions-Reaktionen'
|
2219
|
-
e sfancy('Transferasen:')+rev+' Übertragung funktioneller Gruppen'
|
2220
|
-
e sfancy('Hydrolasen:')+rev+' Hydrolasereaktionen'
|
2221
|
-
e sfancy('Lyasen:')+rev+' Eliminierung von Gruppen unter '\
|
2222
|
-
'Ausbildung von Doppelbindungen'
|
2223
|
-
e sfancy('Isomerasen:')+rev+' Isomerisierungen'
|
2224
|
-
e sfancy('Ligasen:')+rev+' ATP-hydrolytic formation of bonds'
|
2225
|
-
e
|
2226
|
-
end
|
2227
|
-
|
2228
|
-
# ========================================================================= #
|
2229
|
-
# === show_histone_table
|
2230
|
-
# ========================================================================= #
|
2231
|
-
def show_histone_table
|
2232
|
-
erev 'The following table will show Calf Thymus Histones:'
|
2233
|
-
e
|
2234
|
-
erev 'Histone | number of residues | mass in kDa | n% Arginine | n% Lysine'
|
2235
|
-
erev ' H1 215 23.0 1 29'
|
2236
|
-
erev ' H2A 129 14.0 9 11'
|
2237
|
-
erev ' H2B 125 13.8 6 16'
|
2238
|
-
erev ' H3 135 15.3 13 10'
|
2239
|
-
erev ' H4 102 11.3 14 11'
|
2240
|
-
e
|
2241
|
-
end
|
2242
|
-
|
2243
|
-
# ========================================================================= #
|
2244
|
-
# === show_average_weight_of_an_aminoacid
|
2245
|
-
#
|
2246
|
-
# Show the average weight for an aminoacid that is part of a protein.
|
2247
|
-
# ========================================================================= #
|
2248
|
-
def show_average_weight_of_an_aminoacid
|
2249
|
-
erev 'The average molecular weight (MW) of an amino '\
|
2250
|
-
'acid is '+sfancy('110')+' Da.'
|
2251
|
-
end
|
2252
|
-
|
2253
|
-
# ========================================================================= #
|
2254
|
-
# === show_first_orf
|
2255
|
-
#
|
2256
|
-
# This will show the first ORF.
|
2257
|
-
#
|
2258
|
-
# Invocation example:
|
2259
|
-
#
|
2260
|
-
# show_first_orf
|
2261
|
-
#
|
2262
|
-
# ========================================================================= #
|
2263
|
-
def show_first_orf(
|
2264
|
-
of_this_sequence = dna_sequence_object?
|
2265
|
-
)
|
2266
|
-
_ = of_this_sequence
|
2267
|
-
return_all_possible_start_codons.each {|this_codon|
|
2268
|
-
if _.include? this_codon
|
2269
|
-
index = _.index(this_codon)
|
2270
|
-
sequence = _[index..-1]
|
2271
|
-
e rev+padding?+leading_5_prime+sfancy(sequence)+
|
2272
|
-
rev+trailing_3_prime+' (Start position at nucleotide: '+
|
2273
|
-
orange((index+1).to_s)+rev+')'
|
2274
|
-
else
|
2275
|
-
erev 'Not found the codon '+simp(this_codon)+rev+'.'
|
2276
|
-
end
|
2277
|
-
}
|
2278
|
-
end
|
2279
|
-
|
2280
|
-
# ========================================================================= #
|
2281
|
-
# === show_available_vectors
|
2282
|
-
# ========================================================================= #
|
2283
|
-
def show_available_vectors
|
2284
|
-
erev 'We will next try to show the available vectors.'
|
2285
|
-
erev 'For now, these are all file names that start with the '\
|
2286
|
-
'the prefix '+orange('vector_')+rev+'.'
|
2287
|
-
_ = return_available_vectors # Defined in bioroebe/shell.rb
|
2288
|
-
if _.empty?
|
2289
|
-
erev 'No vector-sequence was found.'
|
2290
|
-
else
|
2291
|
-
erev 'We found at the least one entry.'
|
2292
|
-
print ' '
|
2293
|
-
pp _
|
2294
|
-
erev 'Assigning the first one to the second sequence.'
|
2295
|
-
set_sequence_2(Bioroebe::Sequence.sequence_from_file(_.first))
|
2296
|
-
erev 'You can feedback this sequence via:'
|
2297
|
-
e
|
2298
|
-
erev ' seq2?'
|
2299
|
-
e
|
2300
|
-
end
|
2301
|
-
end
|
2302
|
-
|
2303
|
-
# ========================================================================= #
|
2304
|
-
# === report_current_genbank_version
|
2305
|
-
#
|
2306
|
-
# You can use this method to report the current genbank version.
|
2307
|
-
# ========================================================================= #
|
2308
|
-
def report_current_genbank_version(
|
2309
|
-
optional_arguments = nil
|
2310
|
-
)
|
2311
|
-
remote_url = 'https://www.ncbi.nlm.nih.gov/genbank/statistics/'
|
2312
|
-
if optional_arguments
|
2313
|
-
case optional_arguments
|
2314
|
-
when :also_report_the_URL
|
2315
|
-
erev 'We will obtain the latest Genbank version from the URL:'
|
2316
|
-
e
|
2317
|
-
erev " #{simp(remote_url)}"
|
2318
|
-
e
|
2319
|
-
end
|
2320
|
-
end
|
2321
|
-
remote_dataset = URI.open(remote_url).read.split(N)
|
2322
|
-
# ======================================================================= #
|
2323
|
-
# For the following Regex, see this link:
|
2324
|
-
#
|
2325
|
-
# https://rubular.com/r/XC97c7i6sR
|
2326
|
-
#
|
2327
|
-
# ======================================================================= #
|
2328
|
-
regex_to_use =
|
2329
|
-
/<td>(\d{1,3})<\/td><td>(.{1,3}\s{1,3}\d{4})<\/td><td>\d+<\/td><td>\d+<\/td><td>\d+<\/td><td>\d+<\/td><\/tr><\/tbody><\/table>$/
|
2330
|
-
_ = ''.dup
|
2331
|
-
is_open = false
|
2332
|
-
remote_dataset.each {|line|
|
2333
|
-
if line.include? '<table id="stats_table" summary="GENBANK AND WGS'
|
2334
|
-
_ << line
|
2335
|
-
is_open = true
|
2336
|
-
else
|
2337
|
-
_ << line if is_open
|
2338
|
-
if line.include? '</table>'
|
2339
|
-
is_open = false
|
2340
|
-
end
|
2341
|
-
end
|
2342
|
-
}
|
2343
|
-
_ =~ regex_to_use # Match the regex against the substring assigned to _.
|
2344
|
-
version = $1.to_s.dup
|
2345
|
-
month_and_year = $2.to_s.dup
|
2346
|
-
erev 'The current Genbank version is: '+simp(version)+
|
2347
|
-
rev+' (released on '+simp(month_and_year)+rev+')'
|
2348
|
-
end
|
2349
|
-
|
2350
|
-
# ========================================================================= #
|
2351
|
-
# === show_copyright_clause
|
2352
|
-
#
|
2353
|
-
# This method will simply show the licence used for the project.
|
2354
|
-
#
|
2355
|
-
# This has to be updated manually, though; and since the licence
|
2356
|
-
# may change one day, I will keep track when this method has been
|
2357
|
-
# last modified, which is on the 28.04.2020 (28th April, 2020).
|
2358
|
-
# ========================================================================= #
|
2359
|
-
def show_copyright_clause
|
2360
|
-
e
|
2361
|
-
erev 'This project is free software, licensed under the LGPL-2.0 license.'
|
2362
|
-
erev 'No "any later clause"; LGPL-2.0 applies to it.'
|
2363
|
-
e
|
2364
|
-
erev ' Copyright: Robert A. Heiler (2010-2020 and later)'
|
2365
|
-
e
|
2366
|
-
erev 'The biomart component is licensed under the MIT license and is'
|
2367
|
-
erev 'written by Darren Oakley. The MIT license is retained for the'
|
2368
|
-
erev 'Biomart component.'
|
2369
|
-
e
|
2370
|
-
erev '(Note that the bioroebe project used to be under the GPL licence'
|
2371
|
-
erev 'before some time; see the homepage of this gem for the explanation'
|
2372
|
-
erev 'as to why a switch occurred towards LGPL.)'
|
2373
|
-
end
|
2374
|
-
|
2375
|
-
# ========================================================================= #
|
2376
|
-
# === report_n_proteins_registered_in_swiss_prot
|
2377
|
-
#
|
2378
|
-
# This method will report how many proteins are registered in swiss-prot.
|
2379
|
-
#
|
2380
|
-
# Invoke this method like so:
|
2381
|
-
#
|
2382
|
-
# swiss-prot?
|
2383
|
-
#
|
2384
|
-
# ========================================================================= #
|
2385
|
-
def report_n_proteins_registered_in_swiss_prot
|
2386
|
-
regex_to_use = /contains (\d+) sequence entries/ # See: http://rubular.com/r/Bl9tHfheEx
|
2387
|
-
url = 'https://web.expasy.org/docs/relnotes/relstat.html'
|
2388
|
-
dataset = open(url).read
|
2389
|
-
dataset =~ regex_to_use
|
2390
|
-
n_registered_proteins = $1.to_s.dup
|
2391
|
-
erev 'There are '+simp(n_registered_proteins)+rev+' registered '\
|
2392
|
-
'proteins in the Swiss-Prot database.'
|
2393
|
-
erev "The URL used to determine this was: "\
|
2394
|
-
"#{simp(url)}"
|
2395
|
-
end
|
2396
|
-
|
2397
|
-
|
2398
|
-
# ========================================================================= #
|
2399
|
-
# === report_whether_readline_is_available
|
2400
|
-
# ========================================================================= #
|
2401
|
-
def report_whether_readline_is_available
|
2402
|
-
erev 'Is readline available? '+
|
2403
|
-
slateblue(
|
2404
|
-
verbose_truth(
|
2405
|
-
(Object.const_defined? :Readline)
|
2406
|
-
)
|
2407
|
-
)
|
2408
|
-
end
|
2409
|
-
|
2410
|
-
require 'bioroebe/dotplots/advanced_dotplot.rb'
|
2411
|
-
# ========================================================================= #
|
2412
|
-
# === show_2D_dotplot
|
2413
|
-
# ========================================================================= #
|
2414
|
-
def show_2D_dotplot(
|
2415
|
-
string1 = nil, string2 = nil
|
2416
|
-
)
|
2417
|
-
if string1.nil? and string2.nil?
|
2418
|
-
erev 'You want to use a dotplot.'
|
2419
|
-
erev 'Please provide the first string, which will be on the left side:'
|
2420
|
-
string1 = $stdin.gets.chomp
|
2421
|
-
erev 'Please provide the second string, which will be on the top side:'
|
2422
|
-
string2 = $stdin.gets.chomp
|
2423
|
-
end
|
2424
|
-
::Bioroebe::AdvancedDotplot.new(string1, string2)
|
2425
|
-
end
|
2426
|
-
|
2427
|
-
# ========================================================================= #
|
2428
|
-
# === show_reverse_dna_string
|
2429
|
-
#
|
2430
|
-
# This method will simply show the DNA sequence reversed.
|
2431
|
-
# ========================================================================= #
|
2432
|
-
def show_reverse_dna_string
|
2433
|
-
erev padding?+
|
2434
|
-
leading_five_prime+
|
2435
|
-
sfancy(return_reverse_dna_string)+
|
2436
|
-
rev+
|
2437
|
-
trailing_three_prime
|
2438
|
-
end
|
2439
|
-
|
2440
|
-
# ========================================================================= #
|
2441
|
-
# === show_download_dir
|
2442
|
-
# ========================================================================= #
|
2443
|
-
def show_download_dir
|
2444
|
-
erev ::Bioroebe.download_directory?
|
2445
|
-
end
|
2446
|
-
|
2447
|
-
# ========================================================================= #
|
2448
|
-
# === show_this_sequence_padded
|
2449
|
-
#
|
2450
|
-
# Usage example:
|
2451
|
-
#
|
2452
|
-
# show_this_sequence_padded ATGACTTAGCCACAACTGCATGCATATGCATGACTGACT
|
2453
|
-
#
|
2454
|
-
# ========================================================================= #
|
2455
|
-
def show_this_sequence_padded(
|
2456
|
-
i = dna_sequence_object?
|
2457
|
-
)
|
2458
|
-
if i.is_a? Array and i.empty?
|
2459
|
-
i << dna_sequence_object?
|
2460
|
-
end
|
2461
|
-
if i.is_a? Array
|
2462
|
-
i = i.join
|
2463
|
-
end
|
2464
|
-
# ======================================================================= #
|
2465
|
-
# First, split it into an array of 80 characters each.
|
2466
|
-
# ======================================================================= #
|
2467
|
-
array = i.scan(/.{,80}/).reject {|entry| entry.empty? }
|
2468
|
-
array.each {|entry|
|
2469
|
-
erev entry
|
2470
|
-
}
|
2471
|
-
end
|
2472
|
-
|
2473
|
-
require 'bioroebe/enzymes/restriction_enzymes_file.rb'
|
2474
|
-
# ========================================================================= #
|
2475
|
-
# === show_all_yaml_files
|
2476
|
-
#
|
2477
|
-
# We show which yaml files we will use here.
|
2478
|
-
# ========================================================================= #
|
2479
|
-
def show_all_yaml_files
|
2480
|
-
erev 'The file that holds our restriction enzymes can be found here:'
|
2481
|
-
e
|
2482
|
-
erev " #{sfile(::Bioroebe.restriction_enzymes_file)}"
|
2483
|
-
e
|
2484
|
-
end
|
2485
|
-
|
2486
|
-
# ========================================================================= #
|
2487
|
-
# === show_resources_about_the_horseradish_peroxidase
|
2488
|
-
# ========================================================================= #
|
2489
|
-
def show_resources_about_the_horseradish_peroxidase
|
2490
|
-
e 'https://www.ncbi.nlm.nih.gov/gene/?term=%22Horseradish+Peroxidase%22'
|
2491
|
-
e 'https://www.ncbi.nlm.nih.gov/gene/836533'
|
2492
|
-
e 'Fasta: https://www.ncbi.nlm.nih.gov/nuccore/NC_003076.8?report=fasta&from=25659257&to=25661007&strand=true'
|
2493
|
-
end
|
2494
|
-
|
2495
|
-
# ========================================================================= #
|
2496
|
-
# === report_whether_we_will_make_use_of_expand_cd_aliases
|
2497
|
-
# ========================================================================= #
|
2498
|
-
def report_whether_we_will_make_use_of_expand_cd_aliases
|
2499
|
-
erev Bioroebe::VerboseTruth[use_expand_cd_aliases?]
|
2500
|
-
end
|
2501
|
-
|
2502
|
-
# ========================================================================= #
|
2503
|
-
# === report_useful_packages_installed
|
2504
|
-
#
|
2505
|
-
# This aggregate method can be used to report versions that may be
|
2506
|
-
# installed on the given system, e. g. science-based projects and
|
2507
|
-
# similar variants.
|
2508
|
-
# ========================================================================= #
|
2509
|
-
def report_useful_packages_installed
|
2510
|
-
try_to_report_the_version_of_viennarna
|
2511
|
-
try_to_report_the_version_of_bedtools
|
2512
|
-
end
|
2513
|
-
|
2514
|
-
# ========================================================================= #
|
2515
|
-
# === try_to_report_the_version_of_viennarna
|
2516
|
-
#
|
2517
|
-
# This method can be used to see the version of ViennaRNA, if it is
|
2518
|
-
# installed at all.
|
2519
|
-
# ========================================================================= #
|
2520
|
-
def try_to_report_the_version_of_viennarna
|
2521
|
-
result = `RNAplfold --version 2>&1`
|
2522
|
-
if result.include? 'command not found'
|
2523
|
-
e
|
2524
|
-
erev 'ViennaRNA does not appear to be installed / available.'
|
2525
|
-
e
|
2526
|
-
if is_on_roebe?
|
2527
|
-
erev 'You may be able to install it via:'
|
2528
|
-
e
|
2529
|
-
erev ' rbt viennarna'
|
2530
|
-
e
|
2531
|
-
end
|
2532
|
-
else
|
2533
|
-
version = result.sub(/RNAplfold/,'').strip.to_s
|
2534
|
-
erev 'The version of ViennaRNA is: '+
|
2535
|
-
orange(version)+rev
|
2536
|
-
end
|
2537
|
-
end
|
2538
|
-
|
2539
|
-
# ========================================================================= #
|
2540
|
-
# === report_current_working_directory
|
2541
|
-
# ========================================================================= #
|
2542
|
-
def report_current_working_directory
|
2543
|
-
erev 'We are in the directory:'
|
2544
|
-
erev " #{sdir(return_working_directory)}"
|
2545
|
-
end
|
2546
|
-
|
2547
|
-
# ========================================================================= #
|
2548
|
-
# === report_which_yaml_engine_is_in_use
|
2549
|
-
# ========================================================================= #
|
2550
|
-
def report_which_yaml_engine_is_in_use
|
2551
|
-
erev 'The yaml engine in use is: '+
|
2552
|
-
sfancy(::Bioroebe.use_which_yaml_engine?)+
|
2553
|
-
rev
|
2554
|
-
end
|
2555
|
-
|
2556
|
-
begin
|
2557
|
-
require 'directory_paradise'
|
2558
|
-
rescue LoadError; end
|
2559
|
-
# ========================================================================= #
|
2560
|
-
# === show_file_listing
|
2561
|
-
#
|
2562
|
-
# Make use of DirectoryContent to show the content of a file.
|
2563
|
-
#
|
2564
|
-
# To invoke this method from within the Bioroebe::Shell, do:
|
2565
|
-
#
|
2566
|
-
# ll
|
2567
|
-
#
|
2568
|
-
# ========================================================================= #
|
2569
|
-
def show_file_listing(
|
2570
|
-
from_this_directory = Dir.pwd
|
2571
|
-
)
|
2572
|
-
_ = DirectoryParadise::Report.new(from_this_directory, :dont_run_yet)
|
2573
|
-
_.dont_report_total_filesize
|
2574
|
-
_.disable_colours unless use_colours?
|
2575
|
-
_.run
|
2576
|
-
end
|
2577
|
-
|
2578
|
-
# ========================================================================= #
|
2579
|
-
# === try_to_report_the_version_of_bedtools
|
2580
|
-
# ========================================================================= #
|
2581
|
-
def try_to_report_the_version_of_bedtools
|
2582
|
-
result = `bedtools --version 2>&1`
|
2583
|
-
if result.include? 'command not found'
|
2584
|
-
e
|
2585
|
-
erev 'The bedtools do not appear to be installed / available.'
|
2586
|
-
e
|
2587
|
-
if is_on_roebe?
|
2588
|
-
erev 'You may be able to install it via:'
|
2589
|
-
e
|
2590
|
-
erev ' rbt bedtools'
|
2591
|
-
e
|
2592
|
-
end
|
2593
|
-
else
|
2594
|
-
version = result.sub(/bedtools/,'').strip.to_s.delete('v')
|
2595
|
-
erev "The version of bedtools is: "\
|
2596
|
-
"#{orange(version)}#{rev}"
|
2597
|
-
end
|
2598
|
-
end
|
2599
|
-
|
2600
|
-
# ========================================================================= #
|
2601
|
-
# === three_to_one
|
2602
|
-
#
|
2603
|
-
# This method will translate, and output, a three-letter aminoacid
|
2604
|
-
# into the corresponding single-letter code.
|
2605
|
-
#
|
2606
|
-
# Invocation example:
|
2607
|
-
#
|
2608
|
-
# three_to_one Thr Thr Glu Ala Val Glu Ser Thr Val Ala Thr Leu Glu Asp Ser # => T T E A V E S T V A T L E D S
|
2609
|
-
# 3to1 ARG-ALA-SER-LEU-PHE-TRP-LYS-HIS-ASN-SER-VAL-LEU-ILE-VAL-PRO
|
2610
|
-
#
|
2611
|
-
# ========================================================================= #
|
2612
|
-
def three_to_one(i)
|
2613
|
-
if i.is_a? Array
|
2614
|
-
i = i.join('-').strip
|
2615
|
-
end
|
2616
|
-
e ::Bioroebe.three_to_one(i).strip
|
2617
|
-
end
|
2618
|
-
|
2619
|
-
require 'bioroebe/codons/codons.rb'
|
2620
|
-
# ========================================================================= #
|
2621
|
-
# === show_codons_of_this_aminoacid_or_show_kazusa_codon
|
2622
|
-
#
|
2623
|
-
# This method can be used to output which codon codes for a specific
|
2624
|
-
# aminoacid.
|
2625
|
-
#
|
2626
|
-
# The input to this method should be a specific codon, such as ATG or
|
2627
|
-
# GGC and so forth.
|
2628
|
-
#
|
2629
|
-
# If no input is provided, we will instead show the webpage of
|
2630
|
-
# kazusa.
|
2631
|
-
#
|
2632
|
-
# Invocation examples:
|
2633
|
-
#
|
2634
|
-
# codon? ATG # => M
|
2635
|
-
# codon? AUG # => M
|
2636
|
-
#
|
2637
|
-
# ========================================================================= #
|
2638
|
-
def show_codons_of_this_aminoacid_or_show_kazusa_codon(i = nil)
|
2639
|
-
if i.is_a? Array
|
2640
|
-
i = i.first
|
2641
|
-
end
|
2642
|
-
if i # If the user provided input, we check it.
|
2643
|
-
# ===================================================================== #
|
2644
|
-
# Next, find all codons for the given aminoacid.
|
2645
|
-
# ===================================================================== #
|
2646
|
-
e ::Bioroebe.codon_to_aminoacid(i)
|
2647
|
-
else
|
2648
|
-
erev "The URL is at: "\
|
2649
|
-
"#{simp('http://www.kazusa.or.jp/codon/')}"
|
2650
|
-
end
|
2651
|
-
end
|
2652
|
-
|
2653
|
-
# ========================================================================= #
|
2654
|
-
# === return_reverse_dna_string
|
2655
|
-
# ========================================================================= #
|
2656
|
-
def return_reverse_dna_string
|
2657
|
-
complement_sequence?.reverse
|
2658
|
-
end
|
2659
|
-
|
2660
|
-
# ========================================================================= #
|
2661
|
-
# === showorf (showorf tag)
|
2662
|
-
#
|
2663
|
-
# Use this method to show the open reading frame of a given sequence.
|
2664
|
-
#
|
2665
|
-
# We can also use it to selectively show a certain frame, such as
|
2666
|
-
# frame2. See class Bioroebe::ShowOrf for this.
|
2667
|
-
#
|
2668
|
-
# Note that in May 2020 (10.05.2020) class Bioroebe::ShowOrf here
|
2669
|
-
# was replaced with
|
2670
|
-
# ========================================================================= #
|
2671
|
-
def showorf(
|
2672
|
-
i = dna_sequence_object?,
|
2673
|
-
show_how_many_frames = :show_three_frames
|
2674
|
-
)
|
2675
|
-
i = dna_sequence_object? if i.nil?
|
2676
|
-
i = dna_sequence_object? if i.is_a?(Array) and i.empty?
|
2677
|
-
display_open_reading_frames(i) { show_how_many_frames }
|
2678
|
-
end
|
2679
|
-
|
2680
|
-
# ========================================================================= #
|
2681
|
-
# === display_open_reading_frames
|
2682
|
-
#
|
2683
|
-
# Invocation example:
|
2684
|
-
#
|
2685
|
-
# display_open_reading_frames ATGAGCAAGGCCGACTACGAGAAG
|
2686
|
-
#
|
2687
|
-
# ========================================================================= #
|
2688
|
-
def display_open_reading_frames(
|
2689
|
-
i = dna_sequence_object?, &block
|
2690
|
-
)
|
2691
|
-
i = i.first if i.is_a? Array
|
2692
|
-
i = dna_sequence_object? if i.nil?
|
2693
|
-
i = dna_sequence_object? if i.empty?
|
2694
|
-
require 'bioroebe/utility_scripts/display_open_reading_frames/display_open_reading_frames.rb'
|
2695
|
-
::Bioroebe::DisplayOpenReadingFrames.new(i, &block)
|
2696
|
-
end
|
2697
|
-
|
2698
|
-
require 'bioroebe/fasta_and_fastq/show_fasta_headers.rb'
|
2699
|
-
# ========================================================================= #
|
2700
|
-
# === show_fasta_headers
|
2701
|
-
#
|
2702
|
-
# Just show the fasta headers.
|
2703
|
-
# ========================================================================= #
|
2704
|
-
def show_fasta_headers(i)
|
2705
|
-
::Bioroebe::ShowFastaHeaders.new(i) # Delegate into class Bioroebe::ShowFastaHeaders.
|
2706
|
-
end
|
2707
|
-
|
2708
|
-
# ========================================================================= #
|
2709
|
-
# === show_commandline_options
|
2710
|
-
#
|
2711
|
-
# Show the available commandline options.
|
2712
|
-
#
|
2713
|
-
# To invoke this method from the commandline, do:
|
2714
|
-
#
|
2715
|
-
# bioroebe --help
|
2716
|
-
#
|
2717
|
-
# ========================================================================= #
|
2718
|
-
def show_commandline_options
|
2719
|
-
e
|
2720
|
-
ecomment(' --silent # perform a silent startup')
|
2721
|
-
ecomment(' --sequence # use this nucleotide sequence on '\
|
2722
|
-
'startup; can be a number too such as 150')
|
2723
|
-
ecomment(' --n_fasta_entries # report how many fasta '\
|
2724
|
-
'entries are in this directory')
|
2725
|
-
ecomment(' --disable-opn # permanently disable opn')
|
2726
|
-
ecomment(' --random-aminoacids=33 # "generate" 33 random amino acids and display them')
|
2727
|
-
ecomment(' --n-aminoacids=33 # an alias to the ^^^ above')
|
2728
|
-
ecomment(' --protein-to-dna # convert protein-aminoacid '\
|
2729
|
-
'sequence back to DNA')
|
2730
|
-
e
|
2731
|
-
exit
|
2732
|
-
end
|
2733
|
-
|
2734
|
-
# ========================================================================= #
|
2735
|
-
# === show_codon_table
|
2736
|
-
# ========================================================================= #
|
2737
|
-
def show_codon_table(i = nil)
|
2738
|
-
if i and i.is_a?(Array) and i.empty?
|
2739
|
-
i << 1 # Default to the vertebrate codon table in this case.
|
2740
|
-
end
|
2741
|
-
ShowThisCodonTable.new(i)
|
2742
|
-
end
|
2743
|
-
|
2744
|
-
# ========================================================================= #
|
2745
|
-
# === show_rna_sequence
|
2746
|
-
#
|
2747
|
-
# Use this method to convert a given sequence to RNA.
|
2748
|
-
# ========================================================================= #
|
2749
|
-
def show_rna_sequence(
|
2750
|
-
i = sequence_object?.to_rna
|
2751
|
-
)
|
2752
|
-
i = sequence_object?.to_rna if i.nil?
|
2753
|
-
i = i.to_str if i.respond_to? :to_str
|
2754
|
-
if i.include? 'T'
|
2755
|
-
i.tr!('T','U')
|
2756
|
-
end
|
2757
|
-
display_nucleotide_object?.display(i) {{ use_this_as_padding: lpad? }}
|
2758
|
-
end
|
2759
|
-
|
2760
|
-
# ========================================================================= #
|
2761
|
-
# === report_size_of
|
2762
|
-
# ========================================================================= #
|
2763
|
-
def report_size_of(
|
2764
|
-
i = nil
|
2765
|
-
)
|
2766
|
-
if i.nil?
|
2767
|
-
i = dna_sequence_object?
|
2768
|
-
end
|
2769
|
-
if i
|
2770
|
-
erev "This sequence contains #{sfancy(i.size.to_s)}#{rev} nucleotides."
|
2771
|
-
else
|
2772
|
-
report_size_of_main_string
|
2773
|
-
end
|
2774
|
-
end
|
2775
|
-
|
2776
|
-
# ========================================================================= #
|
2777
|
-
# === display_glycolysis_pathway
|
2778
|
-
#
|
2779
|
-
# This method will show the glycolysis Pathway.
|
2780
|
-
# ========================================================================= #
|
2781
|
-
def display_glycolysis_pathway
|
2782
|
-
array = Pathways.glycolysis_pathway # Obtain the glyclosis pathway, as Array.
|
2783
|
-
if Object.const_defined? :Display
|
2784
|
-
Display.display(array, ')')
|
2785
|
-
else
|
2786
|
-
array.each {|entry| e ' - '+entry }
|
2787
|
-
end
|
2788
|
-
end
|
2789
|
-
|
2790
|
-
# ========================================================================= #
|
2791
|
-
# === show_the_weight_of_some_common_proteins
|
2792
|
-
# ========================================================================= #
|
2793
|
-
def show_the_weight_of_some_common_proteins(
|
2794
|
-
use_this_file = FILE_WEIGHT_OF_COMMON_PROTEINS
|
2795
|
-
)
|
2796
|
-
erev 'Showing the weight of some common proteins next (in kDa):'
|
2797
|
-
e
|
2798
|
-
dataset = File.readlines(use_this_file).select {|line|
|
2799
|
-
line.include? ' # '
|
2800
|
-
}
|
2801
|
-
dataset.each {|line|
|
2802
|
-
splitted = line.split(':')
|
2803
|
-
key = splitted[0]
|
2804
|
-
value = splitted[1 .. -1].join(' ').strip
|
2805
|
-
erev " #{(key+':').ljust(25)} "\
|
2806
|
-
"#{lightblue((value.to_s+' kDa').rjust(12))}"
|
2807
|
-
}
|
2808
|
-
e
|
2809
|
-
end
|
2810
|
-
|
2811
|
-
# ========================================================================= #
|
2812
|
-
# === show_protein_composition
|
2813
|
-
#
|
2814
|
-
# Delegate towards class CountAmountOfAminoacids
|
2815
|
-
# ========================================================================= #
|
2816
|
-
def show_protein_composition(i)
|
2817
|
-
::Bioroebe::CountAmountOfAminoacids.new(i) # bl $BIOROEBE/count_amount_of_aminoacids.rb
|
2818
|
-
end
|
2819
|
-
|
2820
|
-
# ========================================================================= #
|
2821
|
-
# === show_all_deducible_aminoacid_sequences
|
2822
|
-
#
|
2823
|
-
# Note that if the string is too short, we won't display the other frames.
|
2824
|
-
#
|
2825
|
-
# If the third argument, `show_translations_aligned`, is set to
|
2826
|
-
# true then we will additionally display all 3 frames aligned
|
2827
|
-
# one to another.
|
2828
|
-
#
|
2829
|
-
# Usage example:
|
2830
|
-
#
|
2831
|
-
# toproteins AUG
|
2832
|
-
# toproteins AUGAUGUUGAAU
|
2833
|
-
# toproteins AUG-AUG-UUG-AAA-GGU-CGC-AAU-STOP
|
2834
|
-
#
|
2835
|
-
# ========================================================================= #
|
2836
|
-
def show_all_deducible_aminoacid_sequences(
|
2837
|
-
i = dna_sequence_as_string?,
|
2838
|
-
also_show_numbers = true,
|
2839
|
-
show_translations_aligned = true
|
2840
|
-
)
|
2841
|
-
if i and i.is_a?(Array) and i.empty?
|
2842
|
-
i = dna_sequence_as_string?
|
2843
|
-
end
|
2844
|
-
i = dna_sequence_as_string? if i.nil?
|
2845
|
-
i = i.join(' ').strip if i.is_a? Array
|
2846
|
-
i = i.to_s.dup # To avoid nil-operations.
|
2847
|
-
i.delete!('-') if i.include? '-'
|
2848
|
-
if i.empty? # This means that the user has not yet assigned a DNA sequence.
|
2849
|
-
erev 'Please assign some DNA sequence. You can also randomly generate'
|
2850
|
-
erev 'a new sequence via "random".'
|
2851
|
-
return
|
2852
|
-
end
|
2853
|
-
cliner
|
2854
|
-
erev N+'The amino acid sequence for '+sfancy('Frame 1')+rev+' is: '
|
2855
|
-
e
|
2856
|
-
converted_sequence_for_frame_1 = translate_dna_into_aminoacid(i).to_s
|
2857
|
-
erev ' '+converted_sequence_for_frame_1+N+N
|
2858
|
-
# ======================================================================= #
|
2859
|
-
# === Also show numbers
|
2860
|
-
# ======================================================================= #
|
2861
|
-
if also_show_numbers
|
2862
|
-
verbose_report_numbered_amino_acid_sequence(converted_sequence_for_frame_1)
|
2863
|
-
end
|
2864
|
-
cliner
|
2865
|
-
if i && i.size > 2
|
2866
|
-
erev N+N+'The amino acid sequence for '+sfancy('Frame 2')+rev+' is: '
|
2867
|
-
e
|
2868
|
-
converted_sequence_for_frame_2 = translate_dna_into_aminoacid_frame2(i)
|
2869
|
-
erev ' '+converted_sequence_for_frame_2+N+N
|
2870
|
-
if also_show_numbers
|
2871
|
-
verbose_report_numbered_amino_acid_sequence(converted_sequence_for_frame_2, '2')
|
2872
|
-
end
|
2873
|
-
cliner
|
2874
|
-
e
|
2875
|
-
erev N+N+'The amino acid sequence for '+sfancy('Frame 3')+rev+' is: '
|
2876
|
-
e
|
2877
|
-
converted_sequence_for_frame_3 = translate_dna_into_aminoacid_frame3(i)
|
2878
|
-
erev ' '+converted_sequence_for_frame_3+N+N
|
2879
|
-
if also_show_numbers
|
2880
|
-
verbose_report_numbered_amino_acid_sequence(converted_sequence_for_frame_3, '3')
|
2881
|
-
end
|
2882
|
-
e
|
2883
|
-
cliner
|
2884
|
-
if show_translations_aligned
|
2885
|
-
showorf(i) # Delegate into class Showorf here.
|
2886
|
-
end
|
2887
|
-
end
|
2888
|
-
end
|
2889
|
-
|
2890
|
-
# ========================================================================= #
|
2891
|
-
# === show_blosum_matrix
|
2892
|
-
#
|
2893
|
-
# Delegate towards bioroebe here, and invoke the .blosum() method.
|
2894
|
-
# ========================================================================= #
|
2895
|
-
def show_blosum_matrix
|
2896
|
-
erev 'Showing the blosum matrix next:'
|
2897
|
-
require 'bioroebe/blosum/blosum.rb'
|
2898
|
-
Bioroebe::Blosum.show_matrix
|
2899
|
-
end
|
2900
|
-
|
2901
|
-
end; end
|