bioroebe 0.10.80 → 0.12.24

Sign up to get free protection for your applications and to get access to all the features.
Files changed (301) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +3946 -2817
  3. data/bin/bioroebe +13 -2
  4. data/bin/bioroebe_hash +7 -0
  5. data/bin/codon_to_aminoacid +6 -4
  6. data/bin/compacter +7 -0
  7. data/bin/plain_palindrome +7 -0
  8. data/bioroebe.gemspec +3 -3
  9. data/doc/README.gen +3918 -2793
  10. data/doc/quality_control/commandline_applications.md +3 -3
  11. data/doc/statistics/statistics.md +7 -7
  12. data/doc/todo/bioroebe_GUI_todo.md +19 -14
  13. data/doc/todo/bioroebe_java_todo.md +22 -0
  14. data/doc/todo/bioroebe_todo.md +2075 -2620
  15. data/lib/bioroebe/C++/DNA.cpp +69 -0
  16. data/lib/bioroebe/C++/RNA.cpp +58 -0
  17. data/lib/bioroebe/C++/sequence.cpp +35 -0
  18. data/lib/bioroebe/abstract/README.md +1 -0
  19. data/lib/bioroebe/abstract/features.rb +29 -0
  20. data/lib/bioroebe/aminoacids/aminoacid_substitution.rb +1 -9
  21. data/lib/bioroebe/aminoacids/codon_percentage.rb +1 -9
  22. data/lib/bioroebe/aminoacids/deduce_aminoacid_sequence.rb +1 -9
  23. data/lib/bioroebe/aminoacids/display_aminoacid_table.rb +1 -0
  24. data/lib/bioroebe/aminoacids/show_hydrophobicity.rb +1 -6
  25. data/lib/bioroebe/base/base_module/base_module.rb +36 -0
  26. data/lib/bioroebe/base/colours_for_base/colours_for_base.rb +18 -8
  27. data/lib/bioroebe/base/commandline_application/commandline_application.rb +13 -9
  28. data/lib/bioroebe/base/commandline_application/commandline_arguments.rb +24 -19
  29. data/lib/bioroebe/base/commandline_application/misc.rb +66 -49
  30. data/lib/bioroebe/base/commandline_application/opn.rb +8 -8
  31. data/lib/bioroebe/base/commandline_application/reset.rb +5 -3
  32. data/lib/bioroebe/base/internal_hash_module/internal_hash_module.rb +42 -0
  33. data/lib/bioroebe/base/misc.rb +35 -0
  34. data/lib/bioroebe/base/prototype/misc.rb +15 -9
  35. data/lib/bioroebe/base/prototype/reset.rb +10 -0
  36. data/lib/bioroebe/cleave_and_digest/digestion.rb +10 -2
  37. data/lib/bioroebe/cleave_and_digest/trypsin.rb +104 -50
  38. data/lib/bioroebe/codon_tables/frequencies/parse_frequency_table.rb +2 -10
  39. data/lib/bioroebe/codons/codons.rb +1 -1
  40. data/lib/bioroebe/codons/convert_this_codon_to_that_aminoacid.rb +208 -59
  41. data/lib/bioroebe/codons/possible_codons_for_this_aminoacid.rb +1 -9
  42. data/lib/bioroebe/codons/show_codon_tables.rb +8 -3
  43. data/lib/bioroebe/codons/show_codon_usage.rb +15 -4
  44. data/lib/bioroebe/colours/rev.rb +4 -1
  45. data/lib/bioroebe/constants/aminoacids_and_proteins.rb +1 -0
  46. data/lib/bioroebe/constants/database_constants.rb +1 -1
  47. data/lib/bioroebe/constants/files_and_directories.rb +31 -4
  48. data/lib/bioroebe/constants/misc.rb +20 -0
  49. data/lib/bioroebe/constants/nucleotides.rb +7 -0
  50. data/lib/bioroebe/conversions/dna_to_aminoacid_sequence.rb +109 -39
  51. data/lib/bioroebe/count/count_amount_of_aminoacids.rb +3 -2
  52. data/lib/bioroebe/count/count_amount_of_nucleotides.rb +3 -0
  53. data/lib/bioroebe/cpp +1 -0
  54. data/lib/bioroebe/crystal/README.md +2 -0
  55. data/lib/bioroebe/crystal/to_rna.cr +19 -0
  56. data/lib/bioroebe/data/README.md +11 -8
  57. data/lib/bioroebe/data/electron_microscopy/pos_example.pos +396 -0
  58. data/lib/bioroebe/data/electron_microscopy/test_particles.star +36 -0
  59. data/lib/bioroebe/data/fasta/human/Homo_sapiens_hemoglobin_subunit_alpha_HBB_mRNA.fasta +9 -0
  60. data/lib/bioroebe/data/fasta/human/Homo_sapiens_hemoglobin_subunit_beta_HBB_mRNA.fasta +8 -0
  61. data/lib/bioroebe/data/fasta/human/README.md +2 -0
  62. data/lib/bioroebe/dotplots/advanced_dotplot.rb +1 -1
  63. data/lib/bioroebe/electron_microscopy/coordinate_analyzer.rb +15 -18
  64. data/lib/bioroebe/{fasta_and_fastq/parse_fasta/run.rb → electron_microscopy/electron_microscopy_module.rb} +16 -8
  65. data/lib/bioroebe/electron_microscopy/fix_pos_file.rb +1 -9
  66. data/lib/bioroebe/electron_microscopy/flipy.rb +83 -0
  67. data/lib/bioroebe/electron_microscopy/parse_coordinates.rb +2 -10
  68. data/lib/bioroebe/electron_microscopy/read_file_xmd.rb +1 -9
  69. data/lib/bioroebe/electron_microscopy/simple_star_file_generator.rb +4 -9
  70. data/lib/bioroebe/enzymes/has_this_restriction_enzyme.rb +10 -3
  71. data/lib/bioroebe/enzymes/restriction_enzyme.rb +23 -1
  72. data/lib/bioroebe/enzymes/restriction_enzymes/statistics.rb +65 -0
  73. data/lib/bioroebe/fasta_and_fastq/autocorrect_the_name_of_this_fasta_file.rb +1 -9
  74. data/lib/bioroebe/fasta_and_fastq/compact_fasta_file/compact_fasta_file.rb +7 -9
  75. data/lib/bioroebe/fasta_and_fastq/fasta_defline/fasta_defline.rb +1 -5
  76. data/lib/bioroebe/fasta_and_fastq/fasta_to_yaml/fasta_to_yaml.rb +81 -0
  77. data/lib/bioroebe/fasta_and_fastq/parse_fasta/parse_fasta.rb +1518 -7
  78. data/lib/bioroebe/fasta_and_fastq/return_fasta_subsection_of_this_file.rb +11 -2
  79. data/lib/bioroebe/fasta_and_fastq/show_fasta_headers.rb +27 -12
  80. data/lib/bioroebe/fasta_and_fastq/simplify_fasta_header/simplify_fasta_header.rb +1 -5
  81. data/lib/bioroebe/fasta_and_fastq/split_this_fasta_file_into_chromosomes/constants.rb +0 -5
  82. data/lib/bioroebe/genome/README.md +4 -0
  83. data/lib/bioroebe/genome/genome.rb +130 -0
  84. data/lib/bioroebe/genomes/genome_pattern.rb +3 -9
  85. data/lib/bioroebe/gui/gtk +1 -0
  86. data/lib/bioroebe/gui/gtk3/alignment/alignment.rb +106 -137
  87. data/lib/bioroebe/gui/gtk3/aminoacid_composition/aminoacid_composition.rb +27 -61
  88. data/lib/bioroebe/gui/gtk3/aminoacid_composition/customized_dialog.rb +1 -1
  89. data/lib/bioroebe/gui/gtk3/blosum_matrix_viewer/blosum_matrix_viewer.rb +1 -2
  90. data/lib/bioroebe/gui/gtk3/calculate_cell_numbers_of_bacteria/calculate_cell_numbers_of_bacteria.rb +1 -2
  91. data/lib/bioroebe/gui/gtk3/controller/controller.rb +46 -29
  92. data/lib/bioroebe/gui/gtk3/dna_to_aminoacid_widget/dna_to_aminoacid_widget.rb +77 -52
  93. data/lib/bioroebe/gui/gtk3/dna_to_reverse_complement_widget/dna_to_reverse_complement_widget.rb +1 -2
  94. data/lib/bioroebe/gui/gtk3/fasta_table_widget/fasta_table_widget.rb +100 -23
  95. data/lib/bioroebe/gui/gtk3/format_converter/format_converter.rb +1 -2
  96. data/lib/bioroebe/gui/gtk3/gene/gene.rb +1 -2
  97. data/lib/bioroebe/gui/gtk3/hamming_distance/hamming_distance.rb +43 -30
  98. data/lib/bioroebe/gui/gtk3/levensthein_distance/levensthein_distance.rb +1 -2
  99. data/lib/bioroebe/gui/gtk3/nucleotide_analyser/nucleotide_analyser.rb +120 -73
  100. data/lib/bioroebe/gui/gtk3/primer_design_widget/primer_design_widget.rb +1 -2
  101. data/lib/bioroebe/gui/gtk3/protein_to_DNA/protein_to_DNA.rb +19 -20
  102. data/lib/bioroebe/gui/gtk3/random_sequence/random_sequence.rb +20 -13
  103. data/lib/bioroebe/gui/gtk3/restriction_enzymes/restriction_enzymes.rb +1 -2
  104. data/lib/bioroebe/gui/gtk3/show_codon_table/misc.rb +97 -22
  105. data/lib/bioroebe/gui/gtk3/show_codon_table/show_codon_table.rb +3 -73
  106. data/lib/bioroebe/gui/gtk3/show_codon_usage/show_codon_usage.rb +1 -2
  107. data/lib/bioroebe/gui/gtk3/sizeseq/sizeseq.rb +1 -2
  108. data/lib/bioroebe/gui/gtk3/three_to_one/three_to_one.rb +1 -2
  109. data/lib/bioroebe/gui/gtk3/www_finder/www_finder.rb +1 -2
  110. data/lib/bioroebe/gui/javafx/bioroebe/Bioroebe.class +0 -0
  111. data/lib/bioroebe/gui/javafx/bioroebe/Bioroebe.java +104 -0
  112. data/lib/bioroebe/gui/javafx/bioroebe.jar +0 -0
  113. data/lib/bioroebe/gui/javafx/bioroebe.mf +1 -0
  114. data/lib/bioroebe/gui/javafx/module-info.class +0 -0
  115. data/lib/bioroebe/gui/javafx/module-info.java +5 -0
  116. data/lib/bioroebe/gui/jruby/alignment/alignment.rb +165 -0
  117. data/lib/bioroebe/gui/jruby/aminoacid_composition/aminoacid_composition.rb +166 -0
  118. data/lib/bioroebe/gui/libui/alignment/alignment.rb +3 -1
  119. data/lib/bioroebe/gui/libui/controller/controller.rb +116 -0
  120. data/lib/bioroebe/gui/libui/random_sequence/random_sequence.rb +18 -2
  121. data/lib/bioroebe/gui/libui/show_codon_table/show_codon_table.rb +2 -0
  122. data/lib/bioroebe/gui/libui/three_to_one/three_to_one.rb +8 -6
  123. data/lib/bioroebe/gui/shared_code/alignment/alignment_module.rb +102 -0
  124. data/lib/bioroebe/gui/shared_code/aminoacid_composition/aminoacid_composition_module.rb +94 -0
  125. data/lib/bioroebe/gui/shared_code/levensthein_distance/levensthein_distance_module.rb +18 -16
  126. data/lib/bioroebe/gui/shared_code/protein_to_DNA/protein_to_DNA_module.rb +14 -14
  127. data/lib/bioroebe/gui/swing/three_to_one/ThreeToOne$1.class +0 -0
  128. data/lib/bioroebe/gui/swing/three_to_one/ThreeToOne$CloseListener.class +0 -0
  129. data/lib/bioroebe/gui/swing/three_to_one/ThreeToOne.class +0 -0
  130. data/lib/bioroebe/gui/swing/three_to_one/ThreeToOne.java +141 -0
  131. data/lib/bioroebe/images/FORWARD_PRIMER.png +0 -0
  132. data/lib/bioroebe/images/REVERSE_PRIMER.png +0 -0
  133. data/lib/bioroebe/images/images.html +29845 -0
  134. data/lib/bioroebe/java/README.md +5 -0
  135. data/lib/bioroebe/java/bioroebe/AllInOne.java +1 -0
  136. data/lib/bioroebe/java/bioroebe/Base.class +0 -0
  137. data/lib/bioroebe/java/bioroebe/Base.java +39 -5
  138. data/lib/bioroebe/java/bioroebe/IsPalindrome.java +23 -5
  139. data/lib/bioroebe/java/bioroebe/SanitizeNucleotideSequence.java +0 -0
  140. data/lib/bioroebe/java/bioroebe/Sequence.java +28 -3
  141. data/lib/bioroebe/java/bioroebe/ToCamelcase.class +0 -0
  142. data/lib/bioroebe/java/bioroebe/ToCamelcase.java +16 -4
  143. data/lib/bioroebe/java/bioroebe/ToRNA.java +43 -0
  144. data/lib/bioroebe/java/bioroebe/ToplevelMethods.java +6 -0
  145. data/lib/bioroebe/java/bioroebe/{BisulfiteTreatment.class → src/BisulfiteTreatment.class} +0 -0
  146. data/lib/bioroebe/java/bioroebe/{Codons.class → src/Codons.class} +0 -0
  147. data/lib/bioroebe/java/bioroebe/src/Codons.java +35 -0
  148. data/lib/bioroebe/java/bioroebe/src/Commandline.class +0 -0
  149. data/lib/bioroebe/java/bioroebe/src/Commandline.java +101 -0
  150. data/lib/bioroebe/java/bioroebe/{Esystem.class → src/Esystem.class} +0 -0
  151. data/lib/bioroebe/java/bioroebe/{Esystem.java → src/Esystem.java} +6 -1
  152. data/lib/bioroebe/java/bioroebe/{GenerateRandomDnaSequence.class → src/GenerateRandomDnaSequence.class} +0 -0
  153. data/lib/bioroebe/java/bioroebe/{GenerateRandomDnaSequence.java → src/GenerateRandomDnaSequence.java} +8 -2
  154. data/lib/bioroebe/java/bioroebe/src/PartnerNucleotide.class +0 -0
  155. data/lib/bioroebe/java/bioroebe/src/PartnerNucleotide.java +56 -0
  156. data/lib/bioroebe/java/bioroebe/{RemoveFile.java → src/RemoveFile.java} +10 -4
  157. data/lib/bioroebe/java/bioroebe/{RemoveNumbers.class → src/RemoveNumbers.class} +0 -0
  158. data/lib/bioroebe/java/bioroebe/{RemoveNumbers.java → src/RemoveNumbers.java} +1 -0
  159. data/lib/bioroebe/java/bioroebe/src/toplevel_methods/BaseComposition.class +0 -0
  160. data/lib/bioroebe/java/bioroebe/src/toplevel_methods/BaseComposition.java +75 -0
  161. data/lib/bioroebe/misc/ruler.rb +11 -2
  162. data/lib/bioroebe/nucleotides/most_likely_nucleotide_sequence_for_this_aminoacid_sequence.rb +1 -9
  163. data/lib/bioroebe/nucleotides/sanitize_nucleotide_sequence.rb +59 -18
  164. data/lib/bioroebe/nucleotides/show_nucleotide_sequence.rb +7 -7
  165. data/lib/bioroebe/parsers/genbank_parser.rb +347 -26
  166. data/lib/bioroebe/parsers/gff.rb +1 -9
  167. data/lib/bioroebe/patterns/scan_for_repeat.rb +1 -5
  168. data/lib/bioroebe/pdb/fetch_fasta_sequence_from_pdb.rb +1 -9
  169. data/lib/bioroebe/pdb/parse_mmCIF_file.rb +1 -9
  170. data/lib/bioroebe/pdb/parse_pdb_file.rb +4 -10
  171. data/lib/bioroebe/project/project.rb +1 -1
  172. data/lib/bioroebe/python/README.md +1 -0
  173. data/lib/bioroebe/python/__pycache__/mymodule.cpython-39.pyc +0 -0
  174. data/lib/bioroebe/python/gui/gtk3/all_in_one.css +4 -0
  175. data/lib/bioroebe/python/gui/gtk3/all_in_one.py +59 -0
  176. data/lib/bioroebe/python/gui/gtk3/widget1.py +20 -0
  177. data/lib/bioroebe/python/gui/tkinter/all_in_one.py +91 -0
  178. data/lib/bioroebe/python/mymodule.py +8 -0
  179. data/lib/bioroebe/python/protein_to_dna.py +33 -0
  180. data/lib/bioroebe/python/shell/shell.py +19 -0
  181. data/lib/bioroebe/python/to_rna.py +14 -0
  182. data/lib/bioroebe/python/toplevel_methods/convert_dna_to_aminoacid_sequence.py +137 -0
  183. data/lib/bioroebe/python/toplevel_methods/esystem.py +12 -0
  184. data/lib/bioroebe/python/toplevel_methods/open_in_browser.py +20 -0
  185. data/lib/bioroebe/python/toplevel_methods/palindromes.py +52 -0
  186. data/lib/bioroebe/python/toplevel_methods/rds.py +13 -0
  187. data/lib/bioroebe/python/toplevel_methods/shuffleseq.py +23 -0
  188. data/lib/bioroebe/python/toplevel_methods/three_delimiter.py +37 -0
  189. data/lib/bioroebe/python/toplevel_methods/time_and_date.py +43 -0
  190. data/lib/bioroebe/python/toplevel_methods/to_camelcase.py +21 -0
  191. data/lib/bioroebe/requires/require_cleave_and_digest.rb +3 -1
  192. data/lib/bioroebe/requires/require_the_bioroebe_project.rb +3 -1
  193. data/lib/bioroebe/sequence/alignment.rb +14 -4
  194. data/lib/bioroebe/sequence/dna.rb +1 -0
  195. data/lib/bioroebe/sequence/nucleotide_module/nucleotide_module.rb +28 -25
  196. data/lib/bioroebe/sequence/protein.rb +105 -3
  197. data/lib/bioroebe/sequence/rna.rb +220 -0
  198. data/lib/bioroebe/sequence/sequence.rb +128 -40
  199. data/lib/bioroebe/shell/menu.rb +3815 -3696
  200. data/lib/bioroebe/shell/misc.rb +9019 -3133
  201. data/lib/bioroebe/shell/readline/readline.rb +1 -1
  202. data/lib/bioroebe/shell/shell.rb +1137 -28
  203. data/lib/bioroebe/siRNA/siRNA.rb +81 -1
  204. data/lib/bioroebe/string_matching/find_longest_substring.rb +3 -2
  205. data/lib/bioroebe/string_matching/hamming_distance.rb +1 -9
  206. data/lib/bioroebe/taxonomy/class_methods.rb +3 -8
  207. data/lib/bioroebe/taxonomy/constants.rb +4 -3
  208. data/lib/bioroebe/taxonomy/edit.rb +2 -1
  209. data/lib/bioroebe/taxonomy/help/help.rb +10 -10
  210. data/lib/bioroebe/taxonomy/help/helpline.rb +2 -2
  211. data/lib/bioroebe/taxonomy/info/check_available.rb +15 -9
  212. data/lib/bioroebe/taxonomy/info/info.rb +18 -11
  213. data/lib/bioroebe/taxonomy/info/is_dna.rb +46 -36
  214. data/lib/bioroebe/taxonomy/interactive.rb +140 -104
  215. data/lib/bioroebe/taxonomy/menu.rb +27 -18
  216. data/lib/bioroebe/taxonomy/parse_fasta.rb +3 -1
  217. data/lib/bioroebe/taxonomy/shared.rb +1 -0
  218. data/lib/bioroebe/taxonomy/taxonomy.rb +1 -0
  219. data/lib/bioroebe/toplevel_methods/aminoacids_and_proteins.rb +31 -24
  220. data/lib/bioroebe/toplevel_methods/colourize_related_methods.rb +164 -0
  221. data/lib/bioroebe/toplevel_methods/databases.rb +1 -1
  222. data/lib/bioroebe/toplevel_methods/digest.rb +18 -8
  223. data/lib/bioroebe/toplevel_methods/fasta_and_fastq.rb +107 -63
  224. data/lib/bioroebe/toplevel_methods/file_and_directory_related_actions.rb +14 -2
  225. data/lib/bioroebe/toplevel_methods/frequencies.rb +8 -1
  226. data/lib/bioroebe/toplevel_methods/misc.rb +175 -11
  227. data/lib/bioroebe/toplevel_methods/nucleotides.rb +118 -46
  228. data/lib/bioroebe/toplevel_methods/open_in_browser.rb +2 -0
  229. data/lib/bioroebe/toplevel_methods/palindromes.rb +75 -47
  230. data/lib/bioroebe/toplevel_methods/taxonomy.rb +3 -3
  231. data/lib/bioroebe/toplevel_methods/to_camelcase.rb +5 -0
  232. data/lib/bioroebe/utility_scripts/align_open_reading_frames.rb +1 -9
  233. data/lib/bioroebe/utility_scripts/check_for_mismatches/check_for_mismatches.rb +1 -9
  234. data/lib/bioroebe/utility_scripts/compacter/compacter.rb +251 -0
  235. data/lib/bioroebe/utility_scripts/compseq/compseq.rb +1 -9
  236. data/lib/bioroebe/utility_scripts/consensus_sequence.rb +6 -6
  237. data/lib/bioroebe/utility_scripts/create_batch_entrez_file.rb +1 -9
  238. data/lib/bioroebe/utility_scripts/dot_alignment.rb +1 -9
  239. data/lib/bioroebe/utility_scripts/move_file_to_its_correct_location.rb +1 -4
  240. data/lib/bioroebe/utility_scripts/parse_taxonomy.rb +2 -2
  241. data/lib/bioroebe/utility_scripts/permutations.rb +36 -9
  242. data/lib/bioroebe/utility_scripts/showorf/constants.rb +0 -5
  243. data/lib/bioroebe/utility_scripts/showorf/reset.rb +1 -4
  244. data/lib/bioroebe/version/version.rb +2 -2
  245. data/lib/bioroebe/www/embeddable_interface.rb +121 -58
  246. data/lib/bioroebe/www/sinatra/sinatra.rb +186 -71
  247. data/lib/bioroebe/yaml/aminoacids/amino_acids_long_name_to_one_letter.yml +2 -2
  248. data/lib/bioroebe/yaml/aminoacids/weight_of_common_proteins.yml +17 -17
  249. data/lib/bioroebe/yaml/configuration/browser.yml +1 -1
  250. data/lib/bioroebe/yaml/configuration/temp_dir.yml +1 -1
  251. data/lib/bioroebe/yaml/consensus_sequences/consensus_sequences.yml +1 -0
  252. data/lib/bioroebe/yaml/genomes/README.md +3 -4
  253. data/lib/bioroebe/yaml/nucleotides/nucleotides.yml +5 -0
  254. data/lib/bioroebe/yaml/restriction_enzymes/restriction_enzymes.yml +57 -57
  255. data/spec/README.md +6 -0
  256. data/spec/project_wide_specification/classes.md +5 -0
  257. metadata +107 -70
  258. data/doc/setup.rb +0 -1655
  259. data/lib/bioroebe/fasta_and_fastq/parse_fasta/constants.rb +0 -50
  260. data/lib/bioroebe/fasta_and_fastq/parse_fasta/initialize.rb +0 -86
  261. data/lib/bioroebe/fasta_and_fastq/parse_fasta/menu.rb +0 -117
  262. data/lib/bioroebe/fasta_and_fastq/parse_fasta/misc.rb +0 -981
  263. data/lib/bioroebe/fasta_and_fastq/parse_fasta/report.rb +0 -156
  264. data/lib/bioroebe/fasta_and_fastq/parse_fasta/reset.rb +0 -128
  265. data/lib/bioroebe/genbank/genbank_parser.rb +0 -291
  266. data/lib/bioroebe/java/bioroebe/AllInOne.class +0 -0
  267. data/lib/bioroebe/java/bioroebe/Cat.class +0 -0
  268. data/lib/bioroebe/java/bioroebe/Codons.java +0 -22
  269. data/lib/bioroebe/java/bioroebe/IsPalindrome.class +0 -0
  270. data/lib/bioroebe/java/bioroebe/PartnerNucleotide.class +0 -0
  271. data/lib/bioroebe/java/bioroebe/PartnerNucleotide.java +0 -19
  272. data/lib/bioroebe/java/bioroebe/SanitizeNucleotideSequence.class +0 -0
  273. data/lib/bioroebe/java/bioroebe/ToplevelMethods.class +0 -0
  274. data/lib/bioroebe/java/bioroebe.jar +0 -0
  275. data/lib/bioroebe/shell/add.rb +0 -108
  276. data/lib/bioroebe/shell/assign.rb +0 -360
  277. data/lib/bioroebe/shell/chop_and_cut.rb +0 -281
  278. data/lib/bioroebe/shell/constants.rb +0 -166
  279. data/lib/bioroebe/shell/download.rb +0 -335
  280. data/lib/bioroebe/shell/enable_and_disable.rb +0 -158
  281. data/lib/bioroebe/shell/enzymes.rb +0 -310
  282. data/lib/bioroebe/shell/fasta.rb +0 -345
  283. data/lib/bioroebe/shell/gtk.rb +0 -76
  284. data/lib/bioroebe/shell/history.rb +0 -132
  285. data/lib/bioroebe/shell/initialize.rb +0 -217
  286. data/lib/bioroebe/shell/loop.rb +0 -74
  287. data/lib/bioroebe/shell/prompt.rb +0 -107
  288. data/lib/bioroebe/shell/random.rb +0 -289
  289. data/lib/bioroebe/shell/reset.rb +0 -335
  290. data/lib/bioroebe/shell/scan_and_parse.rb +0 -135
  291. data/lib/bioroebe/shell/search.rb +0 -337
  292. data/lib/bioroebe/shell/sequences.rb +0 -200
  293. data/lib/bioroebe/shell/show_report_and_display.rb +0 -2901
  294. data/lib/bioroebe/shell/startup.rb +0 -127
  295. data/lib/bioroebe/shell/taxonomy.rb +0 -14
  296. data/lib/bioroebe/shell/tk.rb +0 -23
  297. data/lib/bioroebe/shell/user_input.rb +0 -88
  298. data/lib/bioroebe/shell/xorg.rb +0 -45
  299. data/lib/bioroebe/utility_scripts/compacter.rb +0 -131
  300. /data/lib/bioroebe/java/bioroebe/{BisulfiteTreatment.java → src/BisulfiteTreatment.java} +0 -0
  301. /data/lib/bioroebe/java/bioroebe/{RemoveFile.class → src/RemoveFile.class} +0 -0
@@ -1,2901 +0,0 @@
1
- #!/usr/bin/ruby -w
2
- # Encoding: UTF-8
3
- # frozen_string_literal: true
4
- # =========================================================================== #
5
- # require 'bioroebe/shell/show_report_and_display.rb'
6
- # =========================================================================== #
7
- module Bioroebe
8
-
9
- class Shell < ::Bioroebe::CommandlineApplication
10
-
11
- require 'bioroebe/shell/search.rb'
12
- require 'bioroebe/codons/show_codon_usage.rb'
13
- require 'bioroebe/codons/show_this_codon_table.rb'
14
- require 'bioroebe/count/count_amount_of_aminoacids.rb'
15
-
16
- # ========================================================================= #
17
- # === report_main_sequence
18
- #
19
- # We will call dna_with_ends() here in this method. The argument colourize will
20
- # determine whether we will colourize the DNA strand or not.
21
- #
22
- # Invocation examples:
23
- #
24
- # report_main_sequence(::Bioroebe.start_codon?)
25
- # report_main_sequence(:start_codon) # ← is the same as the ^^^ above
26
- # report_main_sequence(:stop_codon) # ← Colourize the stop-codons.
27
- #
28
- # ========================================================================= #
29
- def report_main_sequence(
30
- colourize = nil,
31
- input = dna_sequence_as_string?
32
- )
33
- case colourize
34
- # ======================================================================= #
35
- # === :stop_codon
36
- #
37
- # We attempt to colourize the stop-codons via this method.
38
- # ======================================================================= #
39
- when :stop_codon
40
- colourize = stop_codons?
41
- # ======================================================================= #
42
- # === :stop_codon_in_frame1
43
- # ======================================================================= #
44
- when :stop_codon_in_frame1
45
- new_string = remove_trailing_escape_code(
46
- colour_for_nucleotides(
47
- ''.dup
48
- ).dup
49
- ).dup
50
- scanned = input.scan(/.../)
51
- scanned.each {|codon|
52
- if is_a_stop_codon? codon
53
- new_string << colour_for_stop_codon(codon.dup).dup+
54
- remove_trailing_escape_code(
55
- colour_for_nucleotides
56
- )
57
- else
58
- new_string << codon.dup
59
- end
60
- }
61
- e padding?+
62
- rev+
63
- leading_five_prime+
64
- new_string+
65
- rev+
66
- trailing_three_prime
67
- return
68
- # ======================================================================= #
69
- # === :start_codon
70
- # ======================================================================= #
71
- when :start_codon # Instruction to use a start codon here.
72
- colourize = start_codon?
73
- # ======================================================================= #
74
- # === :start_and_stop_codon
75
- # ======================================================================= #
76
- when :start_and_stop_codon
77
- colourize = [start_codon?, stop_codons?]
78
- end
79
- # ======================================================================= #
80
- # The old code was:
81
- # erev padding?+
82
- # dna_with_ends(input, colourize) { :honour_coding_area_if_it_exists } # The dna_with_ends() method can deal with Arrays.
83
- # This is now mostly ported (April 2020), but the :honour_coding_area_if_it_exists
84
- # is not yet ported, so the above code will remain as-is, for the time
85
- # being.
86
- # ======================================================================= #
87
- show_nucleotide_sequence?.report_this_sequence(input) {{
88
- padding_to_use: padding?,
89
- colourize_this_subsequence: colourize
90
- }}
91
- end; alias show_main_string report_main_sequence # === show_main_string
92
- alias show_main_sequence report_main_sequence # === show_main_sequence
93
- alias show_colourized_sequence report_main_sequence # === show_colourized_sequence
94
- alias show_dna_sequence report_main_sequence # === show_dna_sequence
95
-
96
- # ========================================================================= #
97
- # === show_composition
98
- #
99
- # This method will analyse the DNA string composition.
100
- #
101
- # Invocation example:
102
- #
103
- # scompo
104
- #
105
- # ========================================================================= #
106
- def show_composition(
107
- i = dna_string?
108
- )
109
- length = i.size
110
- report_size_of_main_string
111
- hash = ::Bioroebe::CountAmountOfNucleotides.show_composition(i) # bl count_nucleotides
112
- erev 'Showing how many of the '+steelblue('four nucleotides')+rev+
113
- ' are in that sequence (absolute numbers):'
114
- print ' '
115
- string = ''.dup
116
- hash.each_pair {|nucleotide, n_times|
117
- string << "#{nucleotide}: #{lightslategray(n_times.to_s)}#{rev}, "
118
- }
119
- e string.rstrip.chop # .chop() to get rid of the last ',' token.
120
- erev "The respective frequencies derived from these absolute "\
121
- "numbers, #{steelblue('in percent')}#{rev}"\
122
- ", are:"
123
- print ' '
124
- hash.each_pair {|nucleotide, n_times|
125
- percentage = (n_times.to_f * 100 / length).round(2).to_s
126
- print "#{rev}#{nucleotide}: #{orange(percentage)}#{rev}% "
127
- }; erev
128
- end
129
-
130
- # ========================================================================= #
131
- # === show_codon_usage
132
- #
133
- # This shows the codon usage of the string.
134
- # ========================================================================= #
135
- def show_codon_usage(
136
- i = dna_sequence_as_string?
137
- )
138
- if i.is_a? Array
139
- if i.empty?
140
- i = dna_sequence_as_string?
141
- else
142
- i = i.flatten.compact.join
143
- end
144
- end
145
- ::Bioroebe::ShowCodonUsage.new(i)
146
- end
147
-
148
- # ========================================================================= #
149
- # === show_all_codon_tables
150
- #
151
- # We used to tap into the Bio::CodonTable here for this part.
152
- #
153
- # But since some time, we no longer depend on this part - we
154
- # have made available all of this in yaml files.
155
- #
156
- # The argument to this method can either be:
157
- #
158
- # :everything
159
- # :only_names
160
- #
161
- # The first one is the default. This means that we will show everything.
162
- #
163
- # The second version is useful if you only what to report the names
164
- # of the codon table in question. Several aliases exist for the
165
- # second invocation.
166
- # ========================================================================= #
167
- def show_all_codon_tables(
168
- show_what = :everything
169
- )
170
- unless Bioroebe.const_defined? :ShowCodonTables
171
- require 'bioroebe/codons/show_codon_tables.rb'
172
- end
173
- e
174
- ::Bioroebe::ShowCodonTables.new(show_what)
175
- e
176
- end
177
-
178
- # ========================================================================= #
179
- # === report_n_start_codons
180
- #
181
- # Use this method to count how many ATG codons we have. We will honour
182
- # the default start_codon in use.
183
- #
184
- # The third argument determines which reading frame is to be used. By
185
- # default, the method will use the first reading frame.
186
- # ========================================================================= #
187
- def report_n_start_codons(
188
- this_string = string?,
189
- use_this_as_start_codon = ::Bioroebe.start_codon?, # Use the proper start codon.
190
- in_which_frame = :frame1
191
- )
192
- # ======================================================================= #
193
- # === Handle blocks next
194
- # ======================================================================= #
195
- if block_given?
196
- yielded = yield
197
- case yielded
198
- when /^frame/
199
- in_which_frame = yielded.to_sym
200
- end
201
- end
202
- # ======================================================================= #
203
- # The following can be invoked via:
204
- # n_ORF? frame1
205
- # ======================================================================= #
206
- case in_which_frame
207
- when :frame1
208
- in_which_frame = 'frame 1'
209
- when :frame2
210
- in_which_frame = 'frame 2'
211
- when :frame3
212
- in_which_frame = 'frame 3'
213
- end
214
- n_start_codons = this_string.upcase.scan(/#{use_this_as_start_codon}/).size.to_s
215
- # ======================================================================= #
216
- # The above is not yet in the proper frame, though.
217
- # ======================================================================= #
218
- trailing_message = " Initiation Codons "\
219
- "(in #{orangered(in_which_frame)}#{rev})."
220
- erev "Our main string has #{sfancy(n_start_codons)}#{rev}"\
221
- " #{simp(use_this_as_start_codon)}#{rev} ("\
222
- "#{use_this_as_start_codon.tr('T','U')})"+
223
- trailing_message
224
- if coding_area? # This has been user-supplied in that case.
225
- erev 'However had, only the nucleotides from position'
226
- erev "#{sfancy(coding_area?.to_s.split('..').first.to_s)}#{rev}"\
227
- " to position #{sfancy(coding_area?.to_s.split('..').last.to_s)}"\
228
- "#{rev} will be colourized."
229
- end
230
- end
231
-
232
- # ========================================================================= #
233
- # === show_human_genome_version
234
- #
235
- # Use this method to show the most current human genome version.
236
- # ========================================================================= #
237
- def show_human_genome_version
238
- human_genome_version = '' # Default.
239
- remote_URL = 'https://www.ensembl.org/Homo_sapiens/Info/Index'
240
- dataset = URI.open(remote_url).read
241
- use_this_regex = /Genome assembly: (.{1,11}\.p\d+) <small>/ # See: https://rubular.com/r/DD5FhaPs3b
242
- scanned = dataset.scan(use_this_regex).flatten
243
- human_genome_version = scanned.first.to_s
244
- erev "The most current human genome version is: "\
245
- "#{sfancy(human_genome_version)}"
246
- erev "The URL that was used to query this has been: "\
247
- "#{steelblue(remote_URL)}"
248
- end
249
-
250
- # ========================================================================= #
251
- # === show_oligo_length_three
252
- #
253
- # We align in chunks of three and tell the user how often we can find
254
- # these individual codons.
255
- #
256
- # Invocation example:
257
- #
258
- # random 99; oligo_3
259
- #
260
- # ========================================================================= #
261
- def show_oligo_length_three(
262
- sequence = dna_sequence_object?
263
- )
264
- sequence = sequence.upcase # This is the sequence that will be scanned.
265
- dna = ::Bioroebe.dna? # This is equal to A, T, C and G.
266
- erev 'We will align the nucleotides in chunks of 3 and show their '\
267
- 'frequency.'
268
- dna.each {|first_entry| # First nucleotide.
269
- dna.each {|second_entry| # Second nucleotide.
270
- dna.each {|third_entry| # Third nucleotide.
271
- _ = first_entry+second_entry+third_entry
272
- erev _+' '+sequence.scan(_).size.to_s
273
- }
274
- }
275
- }
276
- end
277
-
278
- # ========================================================================= #
279
- # === show_oligo_length_two
280
- #
281
- # Show all oligo of length two.
282
- # ========================================================================= #
283
- def show_oligo_length_two(
284
- string = string?
285
- )
286
- sequence = string.upcase # Shorter copy and always upcased.
287
- dna = ::Bioroebe.dna?
288
- dna.each {|first_entry|
289
- dna.each {|second_entry|
290
- _ = "#{first_entry}#{second_entry}"
291
- erev _+' '+sequence.scan(_).size.to_s
292
- }
293
- }
294
- end
295
-
296
- # ========================================================================= #
297
- # === show_position_for_the_main_sequence
298
- # ========================================================================= #
299
- def show_position_for_the_main_sequence
300
- array = sequence?.scan(/.{,25}/)
301
- index_position = 1
302
- array.each {|entry|
303
- unless entry.empty?
304
- erev entry.split(//).join(' ')
305
- second_line = ''
306
- start = index_position
307
- index_position += entry.size
308
- start.upto(index_position-1) {|position|
309
- second_line << position.to_s.ljust(4)
310
- }
311
- erev cadetblue(second_line)+rev
312
- e
313
- end
314
- }
315
- end
316
-
317
- # ========================================================================= #
318
- # === report_this_input_was_not_found
319
- #
320
- # This method is used to notify the user that a certain input was
321
- # not found.
322
- # ========================================================================= #
323
- def report_this_input_was_not_found(
324
- i = ''
325
- )
326
- unless i.empty?
327
- erev "Input `#{sfancy(i.to_s)}#{rev}` was not "\
328
- "found to be a valid input for the BioShell."
329
- end
330
- end
331
-
332
- # ========================================================================= #
333
- # === show_local_sequences
334
- #
335
- # This method will show the available local sequences.
336
- # ========================================================================= #
337
- def show_local_sequences
338
- possible_matches = return_fasta_files_in_the_log_directory
339
- if possible_matches.empty?
340
- erev 'No local fasta sequences could be found.'
341
- else
342
- e
343
- erev 'The following local sequences were found in '\
344
- 'the main log'
345
- erev 'directory ('+sdir(log_dir?)+rev+').'
346
- e
347
- possible_matches.each_with_index {|entry, index|
348
- index += 1
349
- _ = possible_matches.size.to_s.size
350
- erev padding?+'('+index.to_s.rjust(_)+') '+rev+
351
- sfile(File.basename(entry))+rev
352
- }; e
353
- end
354
- end
355
-
356
- # ========================================================================= #
357
- # === show_nucleotide_sequence?
358
- # ========================================================================= #
359
- def show_nucleotide_sequence?
360
- @internal_hash[:show_nucleotide_sequence]
361
- end; alias display_nucleotide_object? show_nucleotide_sequence? # === display_nucleotide_object?
362
-
363
- # ========================================================================= #
364
- # === show_sequence_with_a_ruler
365
- #
366
- # This will show the main sequence together with a "ruler" on top.
367
- #
368
- # The first argument specifies how many nucleotides are to be displayed
369
- # per given line.
370
- #
371
- # This method can also be called in this way:
372
- #
373
- # show_sequence_with_a_ruler { :without_colours }
374
- #
375
- # This will skip showing the ruler.
376
- # ========================================================================= #
377
- def show_sequence_with_a_ruler(
378
- group_together_n_nucleotides = :default,
379
- use_this_sequence = main_sequence?
380
- )
381
- if group_together_n_nucleotides.is_a?(Array)
382
- group_together_n_nucleotides = group_together_n_nucleotides.first
383
- if group_together_n_nucleotides.nil? or group_together_n_nucleotides.empty?
384
- group_together_n_nucleotides = :default
385
- end
386
- end
387
- case group_together_n_nucleotides
388
- # ======================================================================= #
389
- # === :default
390
- # ======================================================================= #
391
- when :default,
392
- nil
393
- group_together_n_nucleotides = 70
394
- end
395
- if group_together_n_nucleotides.is_a? String
396
- # ===================================================================== #
397
- # We need an Integer here.
398
- # ===================================================================== #
399
- group_together_n_nucleotides = group_together_n_nucleotides.to_i
400
- end
401
- e
402
- e "Displaying the main sequence (length: #{use_this_sequence.to_s.size}) "\
403
- "in a chunk of #{slateblue(group_together_n_nucleotides.to_s)}#{rev}"\
404
- " nucleotides/\naminoacids next."
405
- e
406
- use_this_sequence = use_this_sequence.to_s
407
- chunks = use_this_sequence.split(/(.{#{group_together_n_nucleotides}})/).reject(&:empty?)
408
- array = chunks.each_slice(group_together_n_nucleotides).to_a.flatten #.join.split("\n")
409
- use_this_ruler_type = :show_ruler # Note that :show_ruler is the default.
410
- # ======================================================================= #
411
- # === Handle blocks given next
412
- # ======================================================================= #
413
- if block_given?
414
- yielded = yield
415
- case yielded
416
- # ===================================================================== #
417
- # === :without_colours
418
- # ===================================================================== #
419
- when :without_colours
420
- use_this_ruler_type = :without_colours
421
- end
422
- end
423
- array.each {|sequence|
424
- show_nucleotide_sequence?.display_with_prior_formatting(sequence) {
425
- use_this_ruler_type
426
- }
427
- e
428
- }
429
- end
430
-
431
- # ========================================================================= #
432
- # === dna_with_ends
433
- #
434
- # Display DNA with proper ends.
435
- #
436
- # The first argument should be the string that we will colourize.
437
- #
438
- # If the second argument is given (`optional_colourize`), then this
439
- # method will colourize the sequence at certain positions. This
440
- # can be useful to display, for instance, restriction-sites.
441
- # ========================================================================= #
442
- def dna_with_ends(
443
- i = dna_sequence_as_string?,
444
- optional_colourize = nil,
445
- colourize_everything = true
446
- )
447
- i.upcase! if config?.respond_to?(:upcase_nucleotides) and config?.upcase_nucleotides
448
- if optional_colourize.is_a? String
449
- optional_colourize = [optional_colourize]
450
- end
451
- if block_given?
452
- yielded = yield
453
- case yielded
454
- # ===================================================================== #
455
- # === :honour_coding_area_if_it_exists
456
- # ===================================================================== #
457
- when :honour_coding_area_if_it_exists
458
- if optional_colourize and @internal_hash[:coding_area]
459
- # ================================================================= #
460
- # We will colourize based on the coding area that was designated.
461
- # ================================================================= #
462
- _ = @internal_hash[:coding_area]
463
- # ================================================================= #
464
- # We deduct 1 because ruby Arrays start at 0.
465
- # ================================================================= #
466
- start_position = _.split('..').first.to_i - 1
467
- end_position = _.split('..').last.to_i - 1
468
- internal_segment = i[start_position .. end_position]
469
- use_this_as_return_string = ''
470
- use_this_as_return_string << i[0..(start_position-1)]
471
- optional_colourize.each {|inner_entry|
472
- internal_segment.gsub!(inner_entry, yellow+inner_entry+rev)
473
- }
474
- use_this_as_return_string << internal_segment
475
- use_this_as_return_string << i[(end_position+1) .. -1]
476
- i = use_this_as_return_string
477
- elsif optional_colourize
478
- # ================================================================= #
479
- # Apply all entries given in the Array.
480
- # ================================================================= #
481
- if optional_colourize.is_a? Array
482
- optional_colourize.flatten.each {|inner_entry|
483
- i.gsub!(
484
- inner_entry, colour_for_stop_codon(inner_entry)+rev
485
- ) # Colourize in yellow.
486
- }
487
- else
488
- # =================================================================== #
489
- # Make sure that we have a String past this point.
490
- # =================================================================== #
491
- optional_colourize = optional_colourize.to_s
492
- if colourize_everything == true
493
- i.gsub!(optional_colourize, colour_for_stop_codon(optional_colourize)+rev)
494
- else
495
- if colourize_everything == 1
496
- i.sub!(optional_colourize, colour_for_stop_codon(optional_colourize)+rev)
497
- end
498
- end
499
- end
500
- end
501
- end
502
- else
503
- i = "#{sfancy(i)}#{rev}"
504
- end
505
- # ======================================================================= #
506
- # We will report the DNA sequence with leading 5' prime and
507
- # trailing 3' prime.
508
- # ======================================================================= #
509
- return "#{leading_five_prime}#{i}#{trailing_three_prime}"
510
- end
511
-
512
- require 'bioroebe/toplevel_methods/matches.rb'
513
- # ========================================================================= #
514
- # === report_the_first_atg
515
- #
516
- # This method will simply report the first ATG codon.
517
- # ========================================================================= #
518
- def report_the_first_atg
519
- dna_sequence = dna_sequence_object_as_string?
520
- array_matches = ::Bioroebe.return_all_substring_matches(
521
- dna_sequence, start_codon?
522
- )
523
- start_position = array_matches.first.first
524
- erev 'The first ATG can be found at position '+
525
- simp(start_position.to_s)+rev+'.'
526
- erev 'We will next show the first 100 nucleotides, starting from this:'
527
- report_five_prime_three_prime(
528
- dna_sequence_object?[start_position-1,100]
529
- )
530
- end
531
-
532
- # ========================================================================= #
533
- # === show_aminoacid_sequence
534
- #
535
- # To show the aminoacid sequence, do:
536
- # show_aa
537
- # ========================================================================= #
538
- def show_aminoacid_sequence
539
- erev padding?+
540
- aminoacid_sequence? # aminoacids? # Will also use some padding.
541
- end
542
-
543
- # ========================================================================= #
544
- # === show_dna_string (show string tag, show tag)
545
- #
546
- # Use this method to show the @sequence, or another string of your
547
- # choosing, if you pass it to the method.
548
- #
549
- # You can also invoke this method with something like this:
550
- #
551
- # show_string { :with_colourized_separator }
552
- #
553
- # This means that we will use '|' separators that are colourized.
554
- # ========================================================================= #
555
- def show_dna_string(
556
- this_string = dna_string?,
557
- truncate_too_long_result = do_truncate?
558
- )
559
- result = rev.dup # This is the String that will be returned.
560
- case truncate_too_long_result
561
- when :do_not_truncate
562
- truncate_too_long_result = false
563
- end
564
- truncate_at_n_elements = TRUNCATE_AT_N_ELEMENTS
565
- if this_string.nil?
566
- this_string = dna_string? if dna_string?
567
- end
568
- if this_string.to_s.empty?
569
- report_that_a_string_must_be_assigned_first
570
- else
571
- # this_string.upcase! # Nope, do not upcase here. Use other methods to do so.
572
- if mode? == :dna
573
- if this_string.size > truncate_at_n_elements # Threshold for now.
574
- if truncate_too_long_result or
575
- (truncate_too_long_result == :do_not_truncate_and_do_not_show_leader_and_trailer)
576
- this_string =
577
- this_string[0, truncate_at_n_elements]+
578
- swarn(' [TRUNCATED as the sequence '\
579
- 'is longer than '+truncate_at_n_elements.to_s+' nucleotides]')
580
- end
581
- end
582
- # =================================================================== #
583
- # Next, display the main string, without upcasing it.
584
- # =================================================================== #
585
- if block_given?
586
- yielded = yield
587
- case yielded
588
- when :with_colourized_separator
589
- _ = this_string.split(//)
590
- str = ''.dup
591
- _.each_with_index {|char, index|
592
- str << char
593
- str << paleturquoise('|')+sfancy if (index+1) % 3 == 0
594
- }
595
- this_string = str
596
- end
597
- end
598
- if truncate_too_long_result == :do_not_truncate_and_do_not_show_leader_and_trailer
599
- else
600
- result << padding?+leading_5_prime
601
- end
602
- # =================================================================== #
603
- # Next, add the DNA sequence to the result that will be displayed.
604
- # =================================================================== #
605
- result << colourize_dna_sequence(this_string)+rev
606
- if truncate_too_long_result == :do_not_truncate_and_do_not_show_leader_and_trailer
607
- else
608
- result << trailing_3_prime
609
- end
610
- # =================================================================== #
611
- # Delegate to class ShowNucleotideSequence next:
612
- # =================================================================== #
613
- display_nucleotide_sequence(this_string)
614
- else # Else use the aminoacid mode.
615
- show_aminoacid_sequence
616
- end
617
- end
618
- end; alias show_main_string show_dna_string # === show_main_string
619
- alias report_sequence show_dna_string # === report_sequence
620
- alias show_sequence show_dna_string # === show_sequence
621
- alias show_main_dna_sequence show_dna_string # === show_main_dna_sequence
622
- alias show_string show_dna_string # === show_string
623
-
624
- # ========================================================================= #
625
- # === report_size_of_main_string
626
- # ========================================================================= #
627
- def report_size_of_main_string(
628
- i = dna_sequence_object?,
629
- type_of_string = 'main ' # This is usually the main DNA string.
630
- )
631
- i = dna_sequence_object? if i.nil?
632
- i = dna_sequence_object? if i.is_a?(Array) and i.empty?
633
- erev 'The '+type_of_string+'string has '+sfancy(i.size.to_s)+
634
- rev+' '+nucleotides_or_aminoacids?+'.'
635
- end; alias report_length_of_the_dna_string report_size_of_main_string # === report_length_of_the_dna_string
636
- alias report_size_of_this_sequence report_size_of_main_string # === report_size_of_this_sequence
637
-
638
- # ========================================================================= #
639
- # === show_editor_in_use
640
- # ========================================================================= #
641
- def show_editor_in_use
642
- e MAIN_EDITOR
643
- end
644
-
645
- # ========================================================================= #
646
- # === show_welcome_message
647
- #
648
- # Show a little welcome message on startup. This can be disabled of
649
- # course.
650
- # ========================================================================= #
651
- def show_welcome_message
652
- unless silent_startup?
653
- erev 'Welcome to the Bioroebe::Shell Version '+
654
- sfancy(version?.to_s)+
655
- rev+
656
- ', last updated: '+
657
- simp(::Bioroebe.last_updated?)+
658
- rev+'.'
659
- erev 'Type "'+sfancy('help')+rev+'" to get some help.'
660
- end
661
- end
662
-
663
- # ========================================================================= #
664
- # === show_the_weight_of_the_four_individual_nucleotides
665
- # ========================================================================= #
666
- def show_the_weight_of_the_four_individual_nucleotides
667
- e
668
- erev ' A: '+adenin?.rjust(10)+' '+
669
- palevioletred(weight_of_adenin?)
670
- erev ' T: '+thymin?.rjust(10)+' '+
671
- palevioletred(weight_of_thymin?)
672
- erev ' C: '+cytosin?.rjust(10)+' '+
673
- palevioletred(weight_of_cytosin?)
674
- erev ' G: '+guanin?.rjust(10)+' '+
675
- palevioletred(weight_of_guanin?)
676
- e
677
- end
678
-
679
- # ========================================================================= #
680
- # === show_this_subsequence
681
- #
682
- # Sometimes we want to show a subsequence. This method helps us to do
683
- # so, too.
684
- #
685
- # The input may be "tainted", e. g. be a String like "12,345" or
686
- # "12.345", so this method will have to eliminate the ',' and '.'
687
- # characters as well, before converting this String into an
688
- # Integer. (It must be an Integer because nucleotide counting
689
- # can logically not be a Float.)
690
- #
691
- # Usage example:
692
- #
693
- # random 99; [22..33]
694
- #
695
- # ========================================================================= #
696
- def show_this_subsequence(
697
- start_position = 1,
698
- end_position = 10,
699
- work_on_this_sequence = dna_sequence_object?
700
- )
701
- start_position = start_position.to_s.delete(',.').to_i
702
- end_position = end_position.to_s.delete(',.').to_i
703
- if start_position < 1
704
- erev 'The minimum for the start-position must be 1, so this'
705
- erev 'is now treated as one rather than '+start_position.to_s+'.'
706
- start_position = 1
707
- end
708
- if end_position > work_on_this_sequence.size
709
- erev 'The sequence is '+slateblue('too long')+rev+' ('+
710
- crimson('end_position')+rev+' is '\
711
- 'at '+sfancy(end_position.to_s)+rev+', '+
712
- nucleotides_or_aminoacids?.to_s+' sequence length '\
713
- 'was: '+sfancy(work_on_this_sequence.size.to_s)+
714
- rev+').'
715
- erev 'It will be limited next to '+
716
- sfancy(work_on_this_sequence.size.to_s)+rev+' in length.'
717
- end_position = work_on_this_sequence.size
718
- end
719
- sequence = work_on_this_sequence.start_end(
720
- start_position,
721
- end_position
722
- )
723
- if sequence
724
- size = sequence.size.to_s
725
- nucleotides_or_aminoacids_or_empty = ''
726
- if work_on_this_sequence.respond_to? :nucleotides_or_aminoacids?
727
- nucleotides_or_aminoacids_or_empty = work_on_this_sequence.nucleotides_or_aminoacids?.to_s
728
- end
729
- erev 'Next showing a subsequence, '+
730
- nucleotides_or_aminoacids_or_empty+' '+
731
- olive(start_position.to_s)+rev+' to '+
732
- olive(end_position.to_s)+rev+
733
- ' (including '+olive(start_position.to_s)+
734
- rev+' and '+olive(end_position.to_s)+rev+').'
735
- erev 'The length of the fragment will be '+
736
- simp(size)+rev+
737
- ' '+
738
- nucleotides_or_aminoacids_or_empty+
739
- '.'
740
- report_this_dna_sequence_with_proper_trailer_and_leader(sequence) { :try_to_colourize_start_codon }
741
- else
742
- erev 'This subsequence appears to be invalid '\
743
- '(start: '+start_position.to_s+', end: '+end_position.to_s+')'
744
- end
745
- end
746
-
747
- # ========================================================================= #
748
- # === report_where_the_home_directory_can_be_found
749
- # ========================================================================= #
750
- def report_where_the_home_directory_can_be_found(
751
- i = log_dir?
752
- )
753
- erev 'The "home" directory (actually called the log directory) '\
754
- 'can be found here:'
755
- e
756
- e " #{sdir(i)}"
757
- e
758
- end
759
-
760
- # ========================================================================= #
761
- # === show_double_strand
762
- # ========================================================================= #
763
- def show_both_dna_strands
764
- show_main_sequence
765
- show_complement(string?, :include_prime_ends)
766
- end; alias show_double_strand show_both_dna_strands # === show_double_strand
767
-
768
- # ========================================================================= #
769
- # === show_codon_piped_sequence
770
- # ========================================================================= #
771
- def show_codon_piped_sequence
772
- # _ = dna_sequence_object?.gsub(/(...)/, "\\1|") # Add | at every third position.
773
- # erev rev+padding?+leading_5_prime+sfancy(_)+rev+trailing_3_prime
774
- display_nucleotide_sequence(:default) { :piped }
775
- end
776
-
777
- # ========================================================================= #
778
- # === show (show tag)
779
- #
780
- # Bundle together some show-related methods.
781
- # ========================================================================= #
782
- def show(i)
783
- i = i.join(' ').strip if i.is_a? Array
784
- case i
785
- when 'codon_table','codon','codon table'
786
- show_codon_table
787
- when 'blosum','blosum matrix','blosum_matrix'
788
- show_blosum_matrix
789
- when '',nil # Empty or nil.
790
- show_dna_string
791
- end
792
- end
793
-
794
- # ========================================================================= #
795
- # === display_nucleotide_sequence
796
- #
797
- # Consistently use this method whenever you wish to display a
798
- # nucleotide sequence.
799
- # ========================================================================= #
800
- def display_nucleotide_sequence(
801
- this_sequence = dna_sequence_object?,
802
- &block
803
- )
804
- case this_sequence
805
- when :default
806
- this_sequence = dna_sequence_object?
807
- end
808
- do_show_piped_output = false
809
- if block_given?
810
- yielded = yield
811
- case yielded
812
- when :piped,
813
- :show_piped
814
- do_show_piped_output = true
815
- end
816
- end
817
- hash = {
818
- padding_to_use: padding?,
819
- show_piped_output: do_show_piped_output
820
- }
821
- show_nucleotide_sequence?.report_this_sequence(this_sequence) { hash }
822
- end; alias display_this_nucleotide_sequence display_nucleotide_sequence # === display_this_nucleotide_sequence
823
- alias display_this_sequence display_nucleotide_sequence # === display_this_sequence
824
- alias show_this_sequence display_nucleotide_sequence # === show_this_sequence
825
-
826
- # ========================================================================= #
827
- # === report_how_many_aminoacids_we_have
828
- #
829
- # This method will report how many aminoacids we have assigned.
830
- # ========================================================================= #
831
- def report_how_many_aminoacids_we_have
832
- if aminoacids?
833
- n_aminoacids = aminoacids?.size
834
- else
835
- n_aminoacids = dna_sequence_object?.size / 3.0
836
- end
837
- n_aminoacids = n_aminoacids.to_i
838
- erev "This sequence has #{simp(n_aminoacids.to_s)}#{rev} aminoacids."
839
- end
840
-
841
- # ========================================================================= #
842
- # === show_chromosome_table
843
- # ========================================================================= #
844
- def show_chromosome_table
845
- lpadding_to_use = 16
846
- erev 'Chromosome Table from file '+sfile(FILE_CHROMOSOME_NUMBERS)+rev
847
- if File.exist? FILE_CHROMOSOME_NUMBERS
848
- dataset = YAML.load_file(FILE_CHROMOSOME_NUMBERS)
849
- e
850
- dataset.each_pair {|key, value|
851
- erev " "+key.ljust(lpadding_to_use)+
852
- ' '+
853
- steelblue(value.to_s.rjust(3))
854
- }
855
- e
856
- else
857
- no_file_exists_at(FILE_CHROMOSOME_NUMBERS)
858
- end
859
- end
860
-
861
- # ========================================================================= #
862
- # === report_everything_about_this_amino_acid
863
- #
864
- # Use this method to report everything about any particular amino acid.
865
- # ========================================================================= #
866
- def report_everything_about_this_amino_acid(i)
867
- if i.is_a? Array
868
- i.each {|entry| report_everything_about_this_amino_acid(entry) }
869
- else
870
- i.delete!('?') if i.include? '?'
871
- erev 'It seems as is we did find an Amino Acid ('+simp(i)+rev+
872
- '). Its characteristic residue (R) is:'+N+N
873
- unless AMINO_ACIDS_RESTE.has_key?(i)
874
- # =================================================================== #
875
- # This here is to map german names, such as "glycin",
876
- # onto "glycine", the corresponding english name.
877
- # =================================================================== #
878
- if AMINO_ACIDS_LONG_NAME_TO_ONE_LETTER.has_key?(i)
879
- i = AMINO_ACIDS_LONG_NAME_TO_ONE_LETTER[i]
880
- i = AMINO_ACIDS_ENGLISH[i].downcase
881
- end
882
- end
883
- residue = AMINO_ACIDS_RESTE[i.downcase].to_s
884
- efancy " #{residue}#{N}"
885
- erev 'The codons coding for the aminoacid '+simp(i)+rev+' are:'
886
- e
887
- e ' '+mediumturquoise(
888
- ::Bioroebe::PossibleCodonsForThisAminoacid.new(i).pretty_result
889
- )
890
- e
891
- molecular_mass_of(i, 2) # The 2 says to round to 2 digit.
892
- end
893
- end
894
-
895
- # ========================================================================= #
896
- # === report_five_prime_three_prime
897
- # ========================================================================= #
898
- def report_five_prime_three_prime(i)
899
- erev dna_with_ends(i)
900
- end
901
-
902
- # ========================================================================= #
903
- # === show_startup_information
904
- #
905
- # This method here will usually be shown only once, on an initial startup
906
- # of the Bioroebe::Shell. Afterwards, it will no longer be shown at all.
907
- #
908
- # Note that showing this can be disabled.
909
- # ========================================================================= #
910
- def show_startup_information
911
- e
912
- erev "This seems to be the first time that you are using the "\
913
- "#{olivedrab('Bioroebe::Shell')}#{rev}, at the least on"
914
- erev 'this computer.'
915
- e
916
- erev 'It is recommended to have a look at the following components first:'
917
- e
918
- efancy ' help'
919
- efancy ' random'
920
- efancy ' assign'
921
- efancy ' complement'
922
- e
923
- erev 'If you want to show this intro-menu again, do:'
924
- e
925
- efancy ' show-intro'
926
- e
927
- erev 'You can also see more documentation at:'
928
- e
929
- e " #{slateblue(URL_TO_THE_DOCUMENTATION)}"
930
- e
931
- erev 'If you feel that something is missing or incorrect, feel '\
932
- 'free to send an email to:'
933
- e
934
- efancy " #{EMAIL}"
935
- e
936
- end
937
-
938
- require 'bioroebe/colours/colourize_sequence.rb'
939
- # ========================================================================= #
940
- # === report_colourized_sequence
941
- #
942
- # This method will use the new class ColourizeSequence, rather than
943
- # the old internal way.
944
- #
945
- # In the long run, it may be best to transition all of the Bioroebe::Shell
946
- # into the new class - but for now, we will use a hybrid system.
947
- #
948
- # To invoke this method, try:
949
- #
950
- # start_and_stop?
951
- #
952
- # ========================================================================= #
953
- def report_colourized_sequence(
954
- colourize_what = :start_and_stop_codon
955
- )
956
- _ = ColourizeSequence.return_sequence(dna_sequence_object?) { colourize_what }
957
- show_nucleotide_sequence?.display(_)
958
- e
959
- end
960
-
961
- # ========================================================================= #
962
- # === show_complement
963
- #
964
- # If the second argument is true, we pad via 5' and 3'.
965
- #
966
- # As of Feb 2015, we will try with leading padding as well.
967
- # ========================================================================= #
968
- def show_complement(
969
- i = dna_string?,
970
- also_include_prime_ends = false
971
- )
972
- case also_include_prime_ends
973
- # ======================================================================= #
974
- # === :show_leading_primes
975
- # ======================================================================= #
976
- when :show_leading_primes,
977
- :include_prime_ends
978
- also_include_prime_ends = true
979
- end
980
- i = dna_string? if i.nil?
981
- i = i.join('') if i.is_a? Array
982
- if also_include_prime_ends
983
- erev padding?+rev+
984
- leading_3_prime+
985
- sfancy(complement(i))+
986
- rev+trailing_5_prime
987
- else
988
- erev complement(i)
989
- end
990
- end
991
-
992
- # ========================================================================= #
993
- # === show_position_of_sequence
994
- #
995
- # This currently works only for Amino Acids - at the least I have tested
996
- # it only on aminoacids so far, and not on DNA/RNA.
997
- # ========================================================================= #
998
- def show_position_of_sequence(
999
- i = aa_sequence?,
1000
- chunk_size = 10 # How many chunks to display per row.
1001
- )
1002
- array = i.chars
1003
- _ = '' # The Display-String.
1004
- index_string = ''
1005
- 0.upto(array.size) {|index|
1006
- _ << array[index].to_s.rjust(2)+' '
1007
- unless array.size == index
1008
- index_string << palevioletred((index+1).to_s.rjust(2)+' ')
1009
- end
1010
- if index % chunk_size == (chunk_size - 1)
1011
- _ << N
1012
- _ << index_string << rev << N << N
1013
- index_string = ''
1014
- end
1015
- }
1016
- erev _ # Report it finally.
1017
- erev index_string
1018
- end
1019
-
1020
- # ========================================================================= #
1021
- # === show_alu_sequence
1022
- #
1023
- # Invoke this method by doing something like:
1024
- #
1025
- # alu_sequence?
1026
- #
1027
- # ========================================================================= #
1028
- def show_alu_sequence
1029
- fasta_dataset = ::Bioroebe.parse_fasta(FILE_ALU_ELEMENTS)
1030
- _ = fasta_dataset.fasta_sequence
1031
- erev 'The ALU sequence in humans may be this (length: '+
1032
- sfancy(_.size.to_s)+rev+'):'
1033
- erev' '+simp(_)
1034
- end
1035
-
1036
- # ========================================================================= #
1037
- # === show_possible_codons_for_this_aminoacid
1038
- # ========================================================================= #
1039
- def show_possible_codons_for_this_aminoacid(i)
1040
- possible_codons = PossibleCodonsForThisAminoacid[i,
1041
- :use_only_the_four_standard_nucleotide_letters]
1042
- @array_aminoacid_sequence << possible_codons
1043
- return possible_codons
1044
- end
1045
-
1046
- # ========================================================================= #
1047
- # === show_date
1048
- # ========================================================================= #
1049
- def show_date
1050
- erev Time.now.strftime('%d.%m.%Y')
1051
- end
1052
-
1053
- # ========================================================================= #
1054
- # === show_taxid
1055
- #
1056
- # This method will show the particular TaxID, using the NCBI taxonomy
1057
- # database.
1058
- #
1059
- # The tax-id 9606 is "Homo sapiens".
1060
- # ========================================================================= #
1061
- def show_taxid(id = 9606)
1062
- id = 9606 if id.nil?
1063
- id = id.to_s
1064
- url = 'http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id='+id+'&lvl=0'
1065
- erev 'The remote URL is: '+sfancy(url)
1066
- webpage = open(url).read
1067
- regex = /^<table width="100%"><tr><td valign="top"><h2>(Homo sapiens)<\/h2>/ # See: http://rubular.com/r/aQK5O8ZfGa
1068
- webpage =~ regex
1069
- name_of_the_organism = $1.to_s.dup
1070
- erev 'The TaxID of '+simp(id)+rev+' corresponds to `'+
1071
- sfancy(name_of_the_organism)+rev+'`.'
1072
- end
1073
-
1074
- # ========================================================================= #
1075
- # === show_nucleotides_table
1076
- #
1077
- # Use this method to show the nucleotides table - their formula and
1078
- # the molecular mass.
1079
- # ========================================================================= #
1080
- def show_nucleotides_table
1081
- array_display_these = %w(
1082
- Adenin Cytosin Guanin Thymin
1083
- )
1084
- # ======================================================================= #
1085
- # Grab the nucleotides.yml dataset next
1086
- # ======================================================================= #
1087
- dataset = YAML.load_file(FILE_NUCLEOTIDES)
1088
- dataset.each_pair {|key, chemical_formula|
1089
- if array_display_these.include? key # Display it in this case.
1090
- molmasse = ChemistryParadise::CalculateAtomicMass.new(chemical_formula, :do_not_report).masse?
1091
- molmasse = molmasse.to_f.round(2)
1092
- e key.to_s.ljust(8)+' -> '+chemical_formula.to_s.rjust(8)+
1093
- rev+' (Molecular mass: '+simp(molmasse.to_s)+')'+rev
1094
- end
1095
- }
1096
- end
1097
-
1098
- # ========================================================================= #
1099
- # === show_ori_sequences
1100
- #
1101
- # The DnaA box is: TTATC[CA]A[CA]A
1102
- # ========================================================================= #
1103
- def show_ori_sequences
1104
- erev 'The DnaA box has this consensus sequence: '+
1105
- sfancy("5'-TTATC[CA]A[CA]A-3'")
1106
- _ = 'TTATCCACA'
1107
- erev 'Searching for '+_
1108
- try_to_find_restriction_enzymes_for(_)
1109
- _ = 'TTATCAAAA'
1110
- erev 'Searching for '+_
1111
- try_to_find_restriction_enzymes_for(_)
1112
- end
1113
-
1114
- # ========================================================================= #
1115
- # === show_segments
1116
- #
1117
- # This method will show the DNA segments via a R-compatible way.
1118
- #
1119
- # Usage example:
1120
- #
1121
- # set AAAATGCAGTAACCCATGCCC; show_segments
1122
- #
1123
- # ========================================================================= #
1124
- def show_segments
1125
- array = ::Bioroebe.scan_this_input_for_startcodons(dna_sequence_object?)
1126
- erev ' start end width'
1127
- array.each_with_index {|inner_array, index|
1128
- index += 1
1129
- start_position = inner_array.first
1130
- codon = inner_array.last.first
1131
- erev ' ['+index.to_s+'] '+start_position.to_s.rjust(5)+' '+
1132
- (start_position+2).to_s.rjust(5)+' '+'3'.rjust(4)+' ['+codon.downcase+']'
1133
- }
1134
- end
1135
-
1136
- require 'bioroebe/toplevel_methods/aminoacids_and_proteins.rb'
1137
- # ========================================================================= #
1138
- # === show_possible_phosphorylation_sites
1139
- #
1140
- # This method will find all possible phosphorylation sites in any
1141
- # given target sequence. It will also identify the aminoacids that
1142
- # can be phosphorylated.
1143
- #
1144
- # To test this, try:
1145
- #
1146
- # random 250; P?
1147
- #
1148
- # ========================================================================= #
1149
- def show_possible_phosphorylation_sites(i = aminoacid_sequence?)
1150
- _ = dna_sequence_object?
1151
- array_all_codons = []
1152
- array_all_codons << ::Bioroebe.codons_for?(:serine)
1153
- array_all_codons << ::Bioroebe.codons_for?(:tyrosine)
1154
- array_all_codons << ::Bioroebe.codons_for?(:threonine)
1155
- array_all_codons.flatten!
1156
- # ======================================================================= #
1157
- # === Convert Y into Purine/Pyrimidine next
1158
- # ======================================================================= #
1159
- if array_all_codons.any? {|entry| entry.end_with? 'Y' }
1160
- array_all_codons.map! {|inner_entry|
1161
- if inner_entry.end_with? 'Y'
1162
- inner_entry = [
1163
- inner_entry.sub(/Y$/,'T'),
1164
- inner_entry.sub(/Y$/,'C')
1165
- ]
1166
- end
1167
- inner_entry
1168
- }
1169
- array_all_codons.flatten!
1170
- end
1171
- all_codons_found_in_the_sequence = []
1172
- n_phosphorylation_sites = 0
1173
- n_phosphorylation_sites =
1174
- array_all_codons.map {|entry|
1175
- if _.scan(/#{entry}/).size > 0
1176
- all_codons_found_in_the_sequence << entry
1177
- end
1178
- _.scan(/#{entry}/).size }.inject(0){|sum, inner_element| sum + inner_element
1179
- }
1180
- all_codons_found_in_the_sequence.uniq!
1181
- singular_or_plural = 'site'
1182
- if n_phosphorylation_sites < 1
1183
- singular_or_plural << 's'
1184
- end
1185
- erev 'In this sequence, we have found '+simp(n_phosphorylation_sites.to_s)+rev+
1186
- ' possible phosphorylation '+singular_or_plural+', using all '\
1187
- '3 possible frames.'
1188
- e
1189
- erev 'In particular, these '+all_codons_found_in_the_sequence.size.to_s+
1190
- ' different codons were found: '
1191
- e
1192
- erev ' '+simp(all_codons_found_in_the_sequence.join('/'))+rev
1193
- e
1194
- erev 'For the first frame, the start positions are these:'
1195
- e
1196
- # ======================================================================= #
1197
- # === Find the start positions for frame 1 next
1198
- # ======================================================================= #
1199
- array_start_positions_for_frame_1 = []
1200
- scanned_result = _.scan(/.../)
1201
- scanned_result.each_with_index {|codon, index|
1202
- if all_codons_found_in_the_sequence.include? codon
1203
- array_start_positions_for_frame_1 << (index * 3)+1
1204
- end
1205
- }
1206
- erev ' DNA: '+simp(array_start_positions_for_frame_1.join('/'))+rev
1207
- erev ' Protein: '+simp(array_start_positions_for_frame_1.map {|entry|
1208
- entry = entry.to_i * 3
1209
- entry.to_s
1210
- }.join('/'))+rev
1211
- # ======================================================================= #
1212
- # Now modify the DNA sequence there but only in the first frame.
1213
- # ======================================================================= #
1214
- new_colourized_dna_sequence = ''
1215
- all_triplets = _.scan(/.../)
1216
- all_triplets.each {|codon|
1217
- codon = swarn(codon) if all_codons_found_in_the_sequence.include? codon
1218
- new_colourized_dna_sequence << codon+rev
1219
- }
1220
- e
1221
- erev 'The DNA sequence with possible phosphorylation sites is:'
1222
- e
1223
- erev left_padding?+leading_five_prime+new_colourized_dna_sequence+trailing_three_prime
1224
- e
1225
- erev 'The Aminoacid sequence with possible phosphorylation sites is:'
1226
- e
1227
- erev ' '+
1228
- ::Bioroebe.colourize_aa(i, ARRAY_AMINOACIDS_THAT_CAN_BE_PHOSPHORYLATED).to_s
1229
- e
1230
- end
1231
-
1232
- # ========================================================================= #
1233
- # === show_molweight
1234
- # ========================================================================= #
1235
- def show_molweight(use_cliner = true)
1236
- cliner if use_cliner
1237
- MolecularWeightOfNucleotides.weights.each_with_index {|entry, index|
1238
- case index
1239
- when 0
1240
- erev 'Adenine: '+sfancy(entry.to_s)+rev
1241
- when 1
1242
- erev 'Thymine: '+sfancy(entry.to_s)+rev
1243
- when 2
1244
- erev 'Guanine: '+sfancy(entry.to_s)+rev
1245
- when 3
1246
- erev 'Cytosine: '+sfancy(entry.to_s)+rev
1247
- end
1248
- }; cliner if use_cliner
1249
- end
1250
-
1251
- # ========================================================================= #
1252
- # === show_weight_of_this_nucleotide
1253
- #
1254
- # Use this method to show the total weight of a specific nucleotide.
1255
- #
1256
- # Usage examples:
1257
- #
1258
- # weight? U
1259
- # weight? T
1260
- # weight? Adenine
1261
- #
1262
- # ========================================================================= #
1263
- def show_weight_of_this_nucleotide(i)
1264
- i = i.to_s
1265
- if i.empty?
1266
- erev 'Please supply a nucleotide, such as "Adenine" or "A".'
1267
- erev 'Note that the short variant is preferred.'
1268
- return
1269
- end
1270
- i = i[0,1] if i.size > 1
1271
- _ = FILE_NUCLEOTIDES_WEIGHT # bl /Users/x/DATA/SCIENCE/YAML/nucleotides_weight.yml
1272
- if File.exist?(_)
1273
- _ = YAML.load_file(_)
1274
- dataset = {}
1275
- _.each_pair {|key, value|
1276
- dataset[key[0,1]] = value
1277
- }
1278
- if dataset.has_key?(i)
1279
- erev 'The weight of '+sfancy(i)+rev+' is: '+
1280
- sfancy(
1281
- ChemistryParadise.atomic_mass_of(dataset[i])
1282
- )
1283
- else
1284
- erev 'The key `'+sfancy(i)+rev+'` was not found.'
1285
- end
1286
- else
1287
- ewarn 'We did not find a required file at '+sfile(_)+rev+'.'
1288
- end
1289
- end
1290
-
1291
- # ========================================================================= #
1292
- # === show_todo_file
1293
- # ========================================================================= #
1294
- def show_todo_file
1295
- cat '$RUBY_SRC/bioroebe/doc/TODO_FOR_THE_BIOROEBE_PROJECT.md'
1296
- end
1297
-
1298
- # ========================================================================= #
1299
- # === report_where_the_pdf_tutorial_can_be_found
1300
- #
1301
- # Do notify the user where to find the .pdf tutorial.
1302
- # ========================================================================= #
1303
- def report_where_the_pdf_tutorial_can_be_found
1304
- _ = File.basename(FILE_BIOROEBE_TUTORIAL)
1305
- erev 'You can find the tutorial here:'
1306
- e
1307
- erev ' '+simp('http://shevegen.square7.ch/'+_)+rev
1308
- e
1309
- end
1310
-
1311
- # ========================================================================= #
1312
- # === show_directory_content
1313
- # ========================================================================= #
1314
- def show_directory_content(of_this_dir = '*')
1315
- of_this_dir.prepend '*' unless of_this_dir.include? '*'
1316
- cliner {
1317
- Dir[of_this_dir].sort.each_with_index {|entry, index|
1318
- index += 1
1319
- entry << '/' if File.directory?(entry)
1320
- erev index.to_s.rjust(2)+') '+entry
1321
- }
1322
- }
1323
- end
1324
-
1325
- require 'bioroebe/protein_structure/alpha_helix.rb'
1326
- # ========================================================================= #
1327
- # === show_length_of_alpha_helix
1328
- # ========================================================================= #
1329
- def show_length_of_alpha_helix(i)
1330
- erev ::Bioroebe::AlphaHelix.length?(i)
1331
- end
1332
-
1333
- # ========================================================================= #
1334
- # === show_and_calculate_weight_of_dna_string
1335
- # ========================================================================= #
1336
- def show_and_calculate_weight_of_dna_string(
1337
- i = dna_sequence_object?
1338
- )
1339
- i = dna_sequence_object? if i.nil?
1340
- i = dna_sequence_object? if is_a? Array and i.empty?
1341
- sum = 0
1342
- i.upcase.chars.each {|nucleotide|
1343
- _ = case nucleotide
1344
- when 'A'
1345
- weight_of_adenin?
1346
- when 'T'
1347
- weight_of_thymin?
1348
- when 'C'
1349
- weight_of_cytosin?
1350
- when 'G'
1351
- weight_of_guanin?
1352
- end
1353
- sum += _.to_f
1354
- }
1355
- # ======================================================================= #
1356
- # Round the sum properly here.
1357
- # ======================================================================= #
1358
- sum = sum.round(2)
1359
- erev 'The weight of this nucleotide sequence is: '+
1360
- simp(sum.to_s)+rev+' Dalton.'
1361
- end
1362
-
1363
- # ========================================================================= #
1364
- # === show_name_of_the_gene
1365
- # ========================================================================= #
1366
- def show_name_of_the_gene
1367
- erev 'The name of the gene at hand is: '+
1368
- sfancy(sequence_object?.name_of_gene)
1369
- end
1370
-
1371
- # ========================================================================= #
1372
- # === show_agarose_table
1373
- #
1374
- # This method will simply show common agarose concentrations.
1375
- # ========================================================================= #
1376
- def show_agarose_table
1377
- hash = load_bioroebe_yaml_file(:agarose)
1378
- e
1379
- e 'Agarose concentrations:'
1380
- e
1381
- hash.each_pair {|concentration_of_the_gel, kb_fragment|
1382
- erev ' A concentration of '+simp(concentration_of_the_gel.to_s+'%')+
1383
- rev+' will separate DNA fragments between '+sfancy(kb_fragment)+
1384
- rev+' kb.'
1385
- }; e
1386
- end
1387
-
1388
- # ========================================================================= #
1389
- # === start_codon?
1390
- # ========================================================================= #
1391
- def start_codon?
1392
- ::Bioroebe.start_codon?
1393
- end
1394
-
1395
- # ========================================================================= #
1396
- # === stop_codons?
1397
- # ========================================================================= #
1398
- def stop_codons?
1399
- ::Bioroebe.stop_codons?
1400
- end
1401
-
1402
- # ========================================================================= #
1403
- # === show_all_dmp_files
1404
- #
1405
- # Show all .dmp files here.
1406
- # ========================================================================= #
1407
- def show_all_dmp_files
1408
- show_directory_content('.dmp')
1409
- end
1410
-
1411
- # ========================================================================= #
1412
- # === show_and_calculate_weight_of_dna_string_or_aminoacid_sequence
1413
- # ========================================================================= #
1414
- def show_and_calculate_weight_of_dna_string_or_aminoacid_sequence(
1415
- i = dna_sequence_object?
1416
- )
1417
- if i.nil?
1418
- if dna_sequence_object?
1419
- i = dna_sequence_object?
1420
- end
1421
- end
1422
- # ======================================================================= #
1423
- # First, we check if the input is an aminoacid-sequence.
1424
- # ======================================================================= #
1425
- if ::Bioroebe.is_aminoacid?(i)
1426
- reverse = AMINO_ACIDS_ENGLISH.reverse
1427
- i = reverse[i] # Replace it with the one-letter code next.
1428
- # ===================================================================== #
1429
- # Obtain the mass of this aminoacid.
1430
- # ===================================================================== #
1431
- i = AMINO_ACIDS_AVERAGE_MASS_TABLE[i]
1432
- erev 'The weight of this aminoacid is: '+
1433
- simp(i.to_s)+rev+' Dalton.'
1434
- else
1435
- show_and_calculate_weight_of_dna_string(i)
1436
- end
1437
- end
1438
-
1439
- # ========================================================================= #
1440
- # === show_t_phages
1441
- # ========================================================================= #
1442
- def show_t_phages
1443
- dataset = YAML.load_file(
1444
- ::Bioroebe.yaml_dir?+'viruses/ecoli_phages.yml'
1445
- )
1446
- # ======================================================================= #
1447
- # Next, display that as a table.
1448
- # ======================================================================= #
1449
- erev 'Name of Phage | Plaque Size | Head diameter | tail length | latent period | burst size'
1450
- cliner length: 88
1451
- dataset.each_pair {|name_of_phage, value|
1452
- print '|',name_of_phage.to_s.center(13),'|'
1453
- # ===================================================================== #
1454
- # Display the plague size next, aka small, medium or large.
1455
- # ===================================================================== #
1456
- plaque_size = value['plaque_size']
1457
- print plaque_size.to_s.center(13),'|'
1458
- head = value['head']
1459
- print head.to_s.center(15),'|'
1460
- tail = value['tail']
1461
- print tail.to_s.center(13),'|'
1462
- # ===================================================================== #
1463
- # Display the latent period.
1464
- # ===================================================================== #
1465
- latent_period = value['latent_period']
1466
- print latent_period.to_s.center(15),'|'
1467
- burst_size = value['burst_size']
1468
- print burst_size.to_s.center(12),'|'
1469
- e
1470
- cliner length: 88
1471
- }
1472
- end
1473
-
1474
- # ========================================================================= #
1475
- # === show_html_colours
1476
- # ========================================================================= #
1477
- def show_html_colours
1478
- e 'The available HTML colours are:'; e
1479
- ::Colours.show_html_colours; e
1480
- end
1481
-
1482
- # ========================================================================= #
1483
- # === show_restriction_table
1484
- #
1485
- # This method will show a restriction table, that is, a table with
1486
- # some different restriction enzymes.
1487
- #
1488
- # To invoke this method, do:
1489
- #
1490
- # show_restriction_table
1491
- #
1492
- # ========================================================================= #
1493
- def show_restriction_table
1494
- most_ljust = 20
1495
- erev 'Showing a few different cutters (4,5,6,7,8) in table format next:'
1496
- erev '---------------------------------------------------------'
1497
- e peru(' 4-cutter'.ljust(most_ljust))+' | '+orange('ChaI'.ljust(10))+' | '+
1498
- olivedrab('GATC'.ljust(10))
1499
- e peru(' 5-cutter'.ljust(most_ljust))+' | '+orange('FmuI'.ljust(10))+' | '+
1500
- olivedrab('GGNCC'.ljust(10))
1501
- e peru(' 6-cutter'.ljust(most_ljust))+' | '+orange('EcoRI'.ljust(10))+' | '+
1502
- olivedrab('GAATTC'.ljust(10))
1503
- e peru(' 7-cutter'.ljust(most_ljust))+' | '+orange('PfoI'.ljust(10))+' | '+
1504
- olivedrab('TCCNGGA'.ljust(10))
1505
- e peru(' 8-cutter'.ljust(most_ljust))+' | '+orange('PacI'.ljust(10))+' | '+
1506
- olivedrab('TTAATTAA'.ljust(10))
1507
- erev '---------------------------------------------------------'
1508
- end
1509
-
1510
- # ========================================================================= #
1511
- # === show_numbered_nucleotide_positions
1512
- #
1513
- # This method will show "numbered" nucleotide positions such as:
1514
- #
1515
- # 1234567891234567891234567
1516
- # ATGCAGGTCATCAGTCAGTCAGTCA
1517
- #
1518
- # ========================================================================= #
1519
- def show_numbered_nucleotide_positions
1520
- _ = sequence?.string?
1521
- chars = _.chars
1522
- chunk = chars.each_slice(40)
1523
- chunked = chunk.map {|line| line.join }
1524
- chunked.each {|line|
1525
- chars = line.chars
1526
- upper_strand = ''.dup
1527
- counter = 0
1528
- chars.each {|char| counter += 1
1529
- if counter > 9
1530
- counter = 0
1531
- end
1532
- upper_strand << counter.to_s
1533
- }
1534
- e lightsteelblue(upper_strand)
1535
- erev line
1536
- }
1537
- end
1538
-
1539
- # ========================================================================= #
1540
- # === show_fastq_quality_score_table
1541
- # ========================================================================= #
1542
- def show_fastq_quality_score_table
1543
- _ = FILE_FASTQ_QUALITY_SCHEMES
1544
- if File.exist? _
1545
- dataset = YAML.load_file(_)
1546
- keys = dataset.keys
1547
- keys.each {|this_key|
1548
- e sfancy(this_key+':')
1549
- e
1550
- inner_dataset = dataset[this_key]
1551
- erev ' Ascii character range: '+
1552
- seagreen(inner_dataset['ascii_character_range'].to_s)
1553
- erev ' Offset: '+
1554
- seagreen(inner_dataset['offset'].to_s)
1555
- erev ' Quality score type: '+
1556
- seagreen(inner_dataset['quality_score_type'].to_s)
1557
- erev ' Quality score range: '+
1558
- seagreen(inner_dataset['quality_score_range'].to_s)
1559
- e
1560
- }; e
1561
- end
1562
- end
1563
-
1564
- # ========================================================================= #
1565
- # === report_the_protein_weight
1566
- # ========================================================================= #
1567
- def report_the_protein_weight
1568
- _ = aminoacid_sequence?
1569
- if _.include? '*'
1570
- erev 'Note that this aminoacid sequence has a stop codon, denoted by the *:'
1571
- e
1572
- erev ' '+sfancy(_)+rev
1573
- e
1574
- erev 'Since a stop codon is not translated into an aminoacid'
1575
- erev 'it makes little sense to include it into the weight-calculation.'
1576
- erev 'Thus, we will use only the part up to the first * token.'
1577
- _ = _[0 .. (_.index('*') - 1)]
1578
- end
1579
- sum = ::Bioroebe.amino_acid_average_mass(_)
1580
- e 'The total weight of these '+simp(_.size.to_s)+rev+
1581
- ' aminoacids is: '+sfancy(sum.to_f.round(2).to_s)+rev+
1582
- ' Dalton'
1583
- end
1584
-
1585
- # ========================================================================= #
1586
- # === report_all_stop_codons
1587
- #
1588
- # This method will report all stop codons in the given sequence.
1589
- #
1590
- # We will not modify the input given to this method.
1591
- #
1592
- # The three stop codons, in RNA, are:
1593
- #
1594
- # UGA
1595
- # UAG
1596
- # UAA
1597
- #
1598
- # ========================================================================= #
1599
- def report_all_stop_codons(
1600
- i = dna_sequence_object?
1601
- )
1602
- i.upcase!
1603
- erev 'Our input sequence has '+simp(i.size.to_s)+rev+' nucleotides.'
1604
- n_UGA = 'UGA'
1605
- n_UGA = 'TGA' if is_dna?
1606
- erev 'We did find '+
1607
- simp(
1608
- i.scan(/#{n_UGA}/
1609
- ).size.to_s.rjust(2))+rev+' '+n_UGA+' stop codons.'
1610
- n_UAG = 'UAG'
1611
- n_UAG = 'TAG' if is_dna?
1612
- erev 'We did find '+
1613
- simp(i.scan(/#{n_UAG}/).size.to_s.rjust(2))+rev+' '+n_UAG+' stop codons.'
1614
- n_UAA = 'UAA'
1615
- n_UAA = 'TAA' if is_dna?
1616
- erev 'We did find '+
1617
- simp(i.scan(/#{n_UAA}/).size.to_s.rjust(2))+rev+' '+n_UAA+' stop codons.'
1618
- end
1619
-
1620
- # ========================================================================= #
1621
- # === determine_and_report_all_stop_codons
1622
- # ========================================================================= #
1623
- def determine_and_report_all_stop_codons
1624
- dna_sequence = dna_sequence_object?
1625
- erev 'Because 3 different stop codons exist, we have '\
1626
- 'to do '+slateblue('3 runs')+rev+'.'
1627
- stop_codons?.each {|this_stop_codon|
1628
- array_matches = ::Bioroebe.return_all_substring_matches(
1629
- dna_sequence, this_stop_codon
1630
- )
1631
- if array_matches.empty?
1632
- erev 'No match has been found.'
1633
- else
1634
- start_position = array_matches.last.first
1635
- erev 'For the stop codon '+sfancy(this_stop_codon)+rev+' the last codon'
1636
- erev 'occurrs at position '+simp(start_position.to_s)+rev+'.'
1637
- end
1638
- }
1639
- end
1640
-
1641
- # ========================================================================= #
1642
- # === show_seq_1
1643
- # ========================================================================= #
1644
- def show_seq_1(i = seq1?)
1645
- erev padding?+leading_five_prime+
1646
- sfancy(i)+rev+trailing_three_prime
1647
- end
1648
-
1649
- # ========================================================================= #
1650
- # === show_seq_2
1651
- # ========================================================================= #
1652
- def show_seq_2(i = seq2?)
1653
- erev padding?+leading_five_prime+
1654
- sfancy(i)+rev+trailing_three_prime
1655
- end
1656
-
1657
- # ========================================================================= #
1658
- # === show_seq_3
1659
- # ========================================================================= #
1660
- def show_seq_3(i = seq3?)
1661
- erev padding?+leading_five_prime+
1662
- sfancy(i)+rev+trailing_three_prime
1663
- end
1664
-
1665
- # ========================================================================= #
1666
- # === show_seq_4
1667
- # ========================================================================= #
1668
- def show_seq_4
1669
- erev padding?+leading_five_prime+sfancy(seq4?)+rev+trailing_three_prime
1670
- end
1671
-
1672
- # ========================================================================= #
1673
- # === show_seq_5
1674
- # ========================================================================= #
1675
- def show_seq_5
1676
- erev padding?+leading_five_prime+sfancy(seq5?)+rev+trailing_three_prime
1677
- end
1678
-
1679
- # ========================================================================= #
1680
- # === show_seq_6
1681
- # ========================================================================= #
1682
- def show_seq_6
1683
- erev padding?+leading_five_prime+sfancy(seq6?)+rev+trailing_three_prime
1684
- end
1685
-
1686
- # ========================================================================= #
1687
- # === show_start_and_stop_codons
1688
- #
1689
- # This will show BOTH start and stop codons, in different colours.
1690
- #
1691
- # Since start codons may be more important, we will first locate
1692
- # and colourize them, and afterwards, will also colourize the
1693
- # stop codons.
1694
- # ========================================================================= #
1695
- def show_start_and_stop_codons
1696
- _ = string?
1697
- start_codon = ::Bioroebe.start_codon?
1698
- stop_codons = ::Bioroebe.stop_codons?
1699
- _.gsub!(/(#{start_codon})/, yellow+'\\1'+colour_for_nucleotide)
1700
- stop_codons.each {|stop_codon|
1701
- _.gsub!(/(#{stop_codon})/, salmon('\\1')+colour_for_nucleotide)
1702
- }
1703
- erev 'Start codon: '+yellow+start_codon+rev
1704
- stop_codons = stop_codons.join(', ').strip
1705
- stop_codons.chop! if stop_codons.end_with? ','
1706
- # ======================================================================= #
1707
- # Show the stop codons that we will use:
1708
- # ======================================================================= #
1709
- erev 'Stop codons: '+salmon(stop_codons)+rev
1710
- erev dna_padding(_)
1711
- end
1712
-
1713
- # ========================================================================= #
1714
- # === report_when_the_bioroebe_project_was_last_updated
1715
- # ========================================================================= #
1716
- def report_when_the_bioroebe_project_was_last_updated
1717
- result = 'The Bioroebe-Project was last updated on: '+
1718
- slateblue(LAST_UPDATE)+rev
1719
- result = result.dup
1720
- n_days_difference = ((Time.now - Time.parse(LAST_UPDATE))/60/60/24).round(2).to_s
1721
- result << ' (~'+n_days_difference.to_s+' days ago)'
1722
- erev result
1723
- end
1724
-
1725
- # ========================================================================= #
1726
- # === show_information_about_the_gff_format
1727
- # ========================================================================= #
1728
- def show_information_about_the_gff_format
1729
- erev 'Fields must be tab-separated in the .gff format.'
1730
- e
1731
- erev 'All but the final field in each feature line must'
1732
- erev 'contain a value; "empty" columns should be denoted with a "."'
1733
- e
1734
- egold 'seqname:'
1735
- erev 'This is the name of the chromosome or scaffold; chromosome names'
1736
- erev 'can be given with or without the "chr" prefix.'
1737
- erev 'Important note: the seqname must be one used within Ensembl, '
1738
- erev 'i.e. a standard chromosome name or an Ensembl identifier such as a'
1739
- erev 'scaffold ID, without any additional content such as species or'
1740
- erev 'assembly. See the example GFF output below.'
1741
- e
1742
- egold 'source:'
1743
- erev 'Name of the program that generated this feature, or '
1744
- erev 'the data source (database or project name)'
1745
- e
1746
- egold 'feature:'
1747
- erev 'feature type name, e.g. Gene, Variation, Similarity'
1748
- e
1749
- egold 'start:'
1750
- erev 'Start position of the feature, with sequence numbering starting at 1.'
1751
- e
1752
- egold 'end:'
1753
- erev 'End position of the feature, with sequence numbering '\
1754
- 'starting at 1.'
1755
- e
1756
- egold 'score:'
1757
- erev 'A floating point value.'
1758
- e
1759
- egold 'strand:'
1760
- erev 'defined as + (forward) or - (reverse).'
1761
- e
1762
- egold "frame:"
1763
- erev " - One of '0', '1' or '2'. '0' indicates that the first base "
1764
- erev "of the feature is the first base of a codon, '1' that the second "
1765
- erev "base is the first base of a codon, and so on."
1766
- e
1767
- egold 'attribute:'
1768
- erev 'A semicolon-separated list of tag-value pairs, providing '
1769
- erev 'additional information about each feature.'
1770
- e
1771
- end
1772
-
1773
- # ========================================================================= #
1774
- # === show_header_of_this_pdb_file
1775
- # ========================================================================= #
1776
- def show_header_of_this_pdb_file(i)
1777
- lines = File.readlines(i)
1778
- first = lines.first.split(' ')[1..-1].join(' ').strip
1779
- second = lines[1].split(' ')[1..-1].join(' ').strip
1780
- erev first
1781
- erev ' '+second
1782
- end
1783
-
1784
- # ========================================================================= #
1785
- # === show_useful_URLs
1786
- #
1787
- # This method will simply show some important, bioinformatics related
1788
- # URLs. In particular URLs that may be important for bioinformatics
1789
- # related tasks, e. g. NCBI, GeneBank and so forth.
1790
- # ========================================================================= #
1791
- def show_useful_URLs
1792
- e
1793
- erev 'NCBI: '+sfancy(obtain_url_for(:ncbi))
1794
- erev 'GenBank: '+sfancy(obtain_url_for(:genbank))
1795
- erev 'PDB: '+sfancy(obtain_url_for(:pdb))
1796
- erev 'Prosite: '+sfancy(obtain_url_for(:prosite))
1797
- e
1798
- end
1799
-
1800
- # ========================================================================= #
1801
- # === show_header_of
1802
- # ========================================================================= #
1803
- def show_header_of(i)
1804
- if i.is_a? Array
1805
- i.each {|entry| show_header_of(entry) }
1806
- else
1807
- unless File.exist? i
1808
- erev "No file exists at `#{sfile(i)}#{rev}`."
1809
- return
1810
- end
1811
- case i
1812
- # ===================================================================== #
1813
- # === .pdb
1814
- # ===================================================================== #
1815
- when /\.pdb$/
1816
- show_header_of_this_pdb_file(i)
1817
- end
1818
- end
1819
- end
1820
-
1821
- # ========================================================================= #
1822
- # === show_GFP_sequence (gfp tag)
1823
- #
1824
- # This method will show the GFP sequence, on the DNA level.
1825
- # ========================================================================= #
1826
- def show_GFP_sequence
1827
- erev return_five_prime_header+
1828
- return_default_GFP_sequence
1829
- end
1830
-
1831
- # ========================================================================= #
1832
- # === return_default_GFP_sequence
1833
- # ========================================================================= #
1834
- def return_default_GFP_sequence(
1835
- path_to_the_file = FILE_GFP_SEQUENCE
1836
- )
1837
- Fasta.new(path_to_the_file) { :be_quiet }.return_sequence
1838
- end
1839
-
1840
- # ========================================================================= #
1841
- # === try_to_show_the_configuration
1842
- # ========================================================================= #
1843
- def try_to_show_the_configuration
1844
- @config.show_config if @config.respond_to? :show_config
1845
- _ = verbose_truth(use_expand_cd_aliases?)
1846
- colourized_yes_or_no = simp(_.to_s)
1847
- erev 'Will we use class Rcfiles::DirectoryAliases: '+
1848
- colourized_yes_or_no
1849
- end
1850
-
1851
- require 'bioroebe/aminoacids/aminoacids_mass_table.rb'
1852
- # ========================================================================= #
1853
- # === show_aminoacids_mass_table
1854
- #
1855
- # This shows the weight of the aminoacids, in a table-layout.
1856
- # ========================================================================= #
1857
- def show_aminoacids_mass_table
1858
- AminoacidsMassTable.report_which_file_is_used
1859
- AminoacidsMassTable.show(padding?) # bl aminoacids_mass_table.rb
1860
- end; alias aminoacid_table_overview show_aminoacids_mass_table # === show_aminoacids_mass_table
1861
-
1862
- require 'bioroebe/utility_scripts/pathways.rb'
1863
- # ========================================================================= #
1864
- # === show_all_pathways
1865
- #
1866
- # Simply show all Pathways.
1867
- # ========================================================================= #
1868
- def show_all_pathways
1869
- ::Bioroebe::Pathways.show_all_pathways
1870
- end
1871
-
1872
- # ========================================================================= #
1873
- # === show_sequence_in_splitted_form
1874
- #
1875
- # We will show the main DNA sequence in a three-letter splitted form.
1876
- #
1877
- # You can optionally use an argument, the first argument, a number. By
1878
- # default this is 3, so we will split into chunks of 3.
1879
- #
1880
- # The second argument says which token we will use for rejoining. It
1881
- # defaults to ' ' so the nucleotides will be rejoined via ' ', but
1882
- # you can also use another token such as '-', which may lead to a
1883
- # String such as 'ATG-CGA-ACC' and so forth.
1884
- # ========================================================================= #
1885
- def show_sequence_in_splitted_form(
1886
- how_many = 3,
1887
- use_this_token_for_rejoining = ' ' # <- Which token to use for the re-joining action.
1888
- )
1889
- case how_many
1890
- when nil, :default # Use a default value here.
1891
- how_many = 3
1892
- end
1893
- result = '.' * how_many.to_i
1894
- use_this_regex = /#{result}/
1895
- if string?.empty?
1896
- erev 'Please first "assign" a sequence.'
1897
- else
1898
- if block_given?
1899
- yielded = yield
1900
- if yielded.is_a? Hash
1901
- # ================================================================= #
1902
- # === :use_this_token
1903
- # ================================================================= #
1904
- if yielded.has_key? :use_this_token
1905
- use_this_token_for_rejoining = yielded.delete(:use_this_token)
1906
- end
1907
- end
1908
- end
1909
- string = string?.to_s
1910
- scanned = string.scan(use_this_regex)
1911
- scanned.map! {|entry|
1912
- # =================================================================== #
1913
- # Colourize start codons next.
1914
- # =================================================================== #
1915
- if is_this_a_start_codon? entry
1916
- entry = mediumseagreen(entry)+
1917
- return_colour_for_nucleotides
1918
- elsif is_this_a_stop_codon? entry
1919
- entry = mediumorchid(entry)+
1920
- return_colour_for_nucleotides
1921
- end
1922
- entry
1923
- }
1924
- _ = scanned.join(use_this_token_for_rejoining)
1925
- # ===================================================================== #
1926
- # Finally show the sequence.
1927
- # ===================================================================== #
1928
- erev left_padding?+
1929
- five_prime+
1930
- return_colour_for_nucleotides+
1931
- _+
1932
- rev+
1933
- three_prime
1934
- end
1935
- end
1936
-
1937
- # ========================================================================= #
1938
- # === show_disulfides
1939
- #
1940
- # Show the (possible) disulfide positions in a protein.
1941
- # ========================================================================= #
1942
- def show_disulfides
1943
- _ = aminoacid_sequence?
1944
- if _.include? 'C'
1945
- n_cytosines = _.count('C')
1946
- erev "This aminoacid sequence has #{steelblue(n_cytosines.to_s)}#{rev} cysteines."
1947
- if n_cytosines > 1
1948
- erev 'Thus, there could be disulfide bonds. '+
1949
- gold(cheerful_person)+rev
1950
- show_sequence_with_a_ruler(:default, _)
1951
- erev 'The positions of cysteines are at:'
1952
- _.chars.each_with_index {|aminoacid, index|
1953
- if aminoacid == 'C'
1954
- erev 'Position: '+steelblue((index+1).to_s.rjust(3))
1955
- end
1956
- }
1957
- end
1958
- else
1959
- e 'This aminoacid sequence has no cystein. Thus, '\
1960
- 'there can not be any disulfide bonds.'
1961
- end
1962
- end
1963
-
1964
- # ========================================================================= #
1965
- # === show_aminoacids_residues
1966
- # ========================================================================= #
1967
- def show_aminoacids_residues
1968
- erev 'The aminoacid residues are:'; e
1969
- ENGLISH_LONG_NAMES_FOR_THE_AMINO_ACIDS.each {|this_aminoacid|
1970
- erev this_aminoacid.ljust(14)+': '+
1971
- simp(AMINO_ACIDS_RESTE[this_aminoacid.downcase]) # Must downcase.
1972
- }; e
1973
- end
1974
-
1975
- # ========================================================================= #
1976
- # === show_hint_how_to_use_the_local_sequences
1977
- #
1978
- # Show a hint for the user.
1979
- # ========================================================================= #
1980
- def show_hint_how_to_use_the_local_sequences
1981
- unless return_fasta_files_in_the_log_directory.empty?
1982
- erev 'You can load up any of these sequences by issuing:'
1983
- e
1984
- erev ' use_this_fasta 1 # for file number 1'
1985
- e
1986
- end
1987
- end
1988
-
1989
- # ========================================================================= #
1990
- # === colour_for_stop_codon
1991
- # ========================================================================= #
1992
- def colour_for_stop_codon(i)
1993
- orange(i)
1994
- end
1995
-
1996
- # ========================================================================= #
1997
- # === colour_for_nucleotide
1998
- # ========================================================================= #
1999
- def colour_for_nucleotide(i = '')
2000
- royalblue(i)
2001
- end; alias colour_for_nucleotides colour_for_nucleotide # === colour_for_nucleotides
2002
-
2003
- # ========================================================================= #
2004
- # === report_this_dna_sequence_with_proper_trailer_and_leader
2005
- # ========================================================================= #
2006
- def report_this_dna_sequence_with_proper_trailer_and_leader(i)
2007
- i = i.to_s
2008
- if block_given?
2009
- yielded = yield
2010
- case yielded
2011
- when :try_to_colourize_start_codon
2012
- # =================================================================== #
2013
- # We will try to colourize the start codon here.
2014
- # =================================================================== #
2015
- if i.start_with? start_codon?
2016
- i[0,3] = cyan(i[0,3])+return_colour_for_nucleotides
2017
- end
2018
- end
2019
- end
2020
- colourized_dna_sequence = colourize_this_dna_sequence(i)
2021
- colourized_dna_sequence = remove_trailing_escape_code(
2022
- colourized_dna_sequence
2023
- )
2024
- erev left_pad?+
2025
- leading_5_prime+
2026
- colourized_dna_sequence+
2027
- rev+
2028
- trailing_3_prime
2029
- end
2030
-
2031
- # ========================================================================= #
2032
- # === show_hydropathy_table
2033
- #
2034
- # Show the hydropathy table.
2035
- # ========================================================================= #
2036
- def show_hydropathy_table
2037
- e
2038
- HYDROPATHY_TABLE.each_pair {|aminoacid_one_letter, hydropathy_value|
2039
- e ' '+sfancy(aminoacid_one_letter)+' | '+
2040
- simp(hydropathy_value.to_s.rjust(4))
2041
- }; e
2042
- end
2043
-
2044
- # ========================================================================= #
2045
- # === show_known_nls_sequences
2046
- #
2047
- # This Wikipedia page may be useful:
2048
- # http://en.wikipedia.org/wiki/Nuclear_localization_sequence
2049
- # ========================================================================= #
2050
- def show_known_nls_sequences
2051
- erev 'These NLS sequences are known:'+N+N
2052
- padding = 36
2053
- NUCLEAR_LOCALIZATION_SEQUENCES.each_pair {|key, value|
2054
- e sfancy(key.ljust(padding))+' '+value
2055
- }
2056
- end
2057
-
2058
- # ========================================================================= #
2059
- # === report_mode
2060
- # ========================================================================= #
2061
- def report_mode
2062
- erev mode?
2063
- end
2064
-
2065
- # ========================================================================= #
2066
- # === show_reste
2067
- #
2068
- # This will show the residues of the various amino acids.
2069
- # ========================================================================= #
2070
- def show_reste
2071
- e; AMINO_ACIDS_RESTE.each_pair {|key, value|
2072
- erev ' '+key.ljust(14)+' -> '+sfancy(value)
2073
- }; e
2074
- end
2075
-
2076
- require 'bioroebe/string_matching/simple_string_comparer.rb'
2077
- # ========================================================================= #
2078
- # === show_sixpack_alignment
2079
- #
2080
- # We will feed some input to class Bioroebe::SimpleStringComparer.
2081
- # ========================================================================= #
2082
- def show_sixpack_alignment(
2083
- i = dna_sequence_object?
2084
- )
2085
- erev 'Input sequence 1:'
2086
- string1 = $stdin.gets.chomp
2087
- erev 'Input sequence 2:'
2088
- string2 = $stdin.gets.chomp
2089
- # ======================================================================= #
2090
- # Delegate into class SimpleStringComparer next.
2091
- # ======================================================================= #
2092
- _ = ::Bioroebe::SimpleStringComparer.new(:dont_run_yet) # bl $BIOROEBE/string_matching/simple_string_comparer.rb
2093
- _.set_main_alignment_token_to '|'
2094
- _.string1 = string1
2095
- _.string2 = string2
2096
- _.compare
2097
- end
2098
-
2099
- # ========================================================================= #
2100
- # === show_average_weight_of_a_nucleotide
2101
- #
2102
- # The formulat was obtained from the following website:
2103
- #
2104
- # http://www.biophp.org/minitools/useful_formulas/demo.php
2105
- #
2106
- # ========================================================================= #
2107
- def show_average_weight_of_a_nucleotide
2108
- erev 'The average molecular weight (MW) of dsDNA is '+sfancy('660')+' Da.'
2109
- erev 'The average molecular weight (MW) of ssDNA is '+sfancy('330')+' Da.'
2110
- end
2111
-
2112
- # ========================================================================= #
2113
- # === show_config_dir
2114
- #
2115
- # This method will show the configuration directory.
2116
- # ========================================================================= #
2117
- def show_config_dir
2118
- config_dir = File.dirname(__FILE__)+'/configuration/'
2119
- erev 'The configuration directory for the Bioroebe::Shell is at:'
2120
- erev ' `'+sfile(config_dir)+rev+'`'
2121
- end
2122
-
2123
- # ========================================================================= #
2124
- # === show_last_downloaded_file
2125
- # ========================================================================= #
2126
- def show_last_downloaded_file
2127
- if @array_all_downloads.empty?
2128
- erev 'We have not yet downloaded any file.'
2129
- else
2130
- erev 'The last downloaded data was: '+
2131
- sfancy(@array_all_downloads.last)
2132
- end
2133
- end
2134
-
2135
- # ========================================================================= #
2136
- # === show_jumper_directories
2137
- # ========================================================================= #
2138
- def show_jumper_directories
2139
- if @internal_hash[:array_jumper_directories].empty?
2140
- erev 'No jumper directory has been assigned yet.'
2141
- else
2142
- erev 'The available jumper directories are:'
2143
- pp @internal_hash[:array_jumper_directories]
2144
- end
2145
- end
2146
-
2147
- # ========================================================================= #
2148
- # === show_save_file
2149
- # ========================================================================= #
2150
- def show_save_file
2151
- erev 'We will store into the file '+sfile(save_file?)+rev+'.'
2152
- erev 'If you wish to instead store into the current directory,'
2153
- erev 'input "save_here".'
2154
- end
2155
-
2156
- # ========================================================================= #
2157
- # === show_sigma_tutorial
2158
- #
2159
- # This method tells the user a bit about the sigma factors.
2160
- # ========================================================================= #
2161
- def show_sigma_tutorial
2162
- erev 'This subsection contains some information about Sigmafactors.'
2163
- e
2164
- erev 'A sigma factor a protein needed for initiation of RNA synthesis.'
2165
- e
2166
- erev 'It is a bacterial transcription initiation factor.'
2167
- e
2168
- erev 'It will enable the specific binding of RNA polymerase to gene promoters.'
2169
- e
2170
- erev 'Sigma factors vary, which allows the bacterial cell to respond to'
2171
- erev 'different environmental signals.'
2172
- e
2173
- erev 'Every molecule of RNA polymerase holoenzyme will contain only one '\
2174
- 'sigma factor.'
2175
- e
2176
- erev 'The number of sigma factors varies between bacterial species.'
2177
- e
2178
- erev 'E. coli has seven sigma factors.'
2179
- e
2180
- erev 'Sigma factors are distinguished by their characteristic molecular '\
2181
- 'weights.'
2182
- e
2183
- erev 'For instance, sigma-70 refers to the sigma factor with a molecular '\
2184
- 'weight of 70 kDa.'
2185
- e
2186
- erev 'Once initiation of RNA transcription is complete, the sigma'
2187
- erev 'factor can leave the complex.'
2188
- e
2189
- erev 'Sigmafactor rpoD 70 can be found here:'
2190
- e ' '+simp('http://www.ncbi.nlm.nih.gov/gene/947567')
2191
- end
2192
-
2193
- # ========================================================================= #
2194
- # === show_last_input
2195
- #
2196
- # sli can be used as command to access this method.
2197
- # ========================================================================= #
2198
- def show_last_input
2199
- if readline_is_available?
2200
- e sfancy(Readline::HISTORY[-1])
2201
- Readline::HISTORY.pop
2202
- end
2203
- e "The last user input was: #{sfancy(@user_input)}"
2204
- end
2205
-
2206
- # ========================================================================= #
2207
- # === show_mnemo
2208
- #
2209
- # A little helper-method to memorize things.
2210
- # ========================================================================= #
2211
- def show_mnemo
2212
- e
2213
- erev 'Amino Acids with negatively charged side groups: -'
2214
- e sfancy(' D E')
2215
- erev 'Amino Acids with positive charged side groups: +'
2216
- e sfancy(' K R H')
2217
- e
2218
- e sfancy('Oxidoreduktasen:')+rev+' Oxidations-Reduktions-Reaktionen'
2219
- e sfancy('Transferasen:')+rev+' Übertragung funktioneller Gruppen'
2220
- e sfancy('Hydrolasen:')+rev+' Hydrolasereaktionen'
2221
- e sfancy('Lyasen:')+rev+' Eliminierung von Gruppen unter '\
2222
- 'Ausbildung von Doppelbindungen'
2223
- e sfancy('Isomerasen:')+rev+' Isomerisierungen'
2224
- e sfancy('Ligasen:')+rev+' ATP-hydrolytic formation of bonds'
2225
- e
2226
- end
2227
-
2228
- # ========================================================================= #
2229
- # === show_histone_table
2230
- # ========================================================================= #
2231
- def show_histone_table
2232
- erev 'The following table will show Calf Thymus Histones:'
2233
- e
2234
- erev 'Histone | number of residues | mass in kDa | n% Arginine | n% Lysine'
2235
- erev ' H1 215 23.0 1 29'
2236
- erev ' H2A 129 14.0 9 11'
2237
- erev ' H2B 125 13.8 6 16'
2238
- erev ' H3 135 15.3 13 10'
2239
- erev ' H4 102 11.3 14 11'
2240
- e
2241
- end
2242
-
2243
- # ========================================================================= #
2244
- # === show_average_weight_of_an_aminoacid
2245
- #
2246
- # Show the average weight for an aminoacid that is part of a protein.
2247
- # ========================================================================= #
2248
- def show_average_weight_of_an_aminoacid
2249
- erev 'The average molecular weight (MW) of an amino '\
2250
- 'acid is '+sfancy('110')+' Da.'
2251
- end
2252
-
2253
- # ========================================================================= #
2254
- # === show_first_orf
2255
- #
2256
- # This will show the first ORF.
2257
- #
2258
- # Invocation example:
2259
- #
2260
- # show_first_orf
2261
- #
2262
- # ========================================================================= #
2263
- def show_first_orf(
2264
- of_this_sequence = dna_sequence_object?
2265
- )
2266
- _ = of_this_sequence
2267
- return_all_possible_start_codons.each {|this_codon|
2268
- if _.include? this_codon
2269
- index = _.index(this_codon)
2270
- sequence = _[index..-1]
2271
- e rev+padding?+leading_5_prime+sfancy(sequence)+
2272
- rev+trailing_3_prime+' (Start position at nucleotide: '+
2273
- orange((index+1).to_s)+rev+')'
2274
- else
2275
- erev 'Not found the codon '+simp(this_codon)+rev+'.'
2276
- end
2277
- }
2278
- end
2279
-
2280
- # ========================================================================= #
2281
- # === show_available_vectors
2282
- # ========================================================================= #
2283
- def show_available_vectors
2284
- erev 'We will next try to show the available vectors.'
2285
- erev 'For now, these are all file names that start with the '\
2286
- 'the prefix '+orange('vector_')+rev+'.'
2287
- _ = return_available_vectors # Defined in bioroebe/shell.rb
2288
- if _.empty?
2289
- erev 'No vector-sequence was found.'
2290
- else
2291
- erev 'We found at the least one entry.'
2292
- print ' '
2293
- pp _
2294
- erev 'Assigning the first one to the second sequence.'
2295
- set_sequence_2(Bioroebe::Sequence.sequence_from_file(_.first))
2296
- erev 'You can feedback this sequence via:'
2297
- e
2298
- erev ' seq2?'
2299
- e
2300
- end
2301
- end
2302
-
2303
- # ========================================================================= #
2304
- # === report_current_genbank_version
2305
- #
2306
- # You can use this method to report the current genbank version.
2307
- # ========================================================================= #
2308
- def report_current_genbank_version(
2309
- optional_arguments = nil
2310
- )
2311
- remote_url = 'https://www.ncbi.nlm.nih.gov/genbank/statistics/'
2312
- if optional_arguments
2313
- case optional_arguments
2314
- when :also_report_the_URL
2315
- erev 'We will obtain the latest Genbank version from the URL:'
2316
- e
2317
- erev " #{simp(remote_url)}"
2318
- e
2319
- end
2320
- end
2321
- remote_dataset = URI.open(remote_url).read.split(N)
2322
- # ======================================================================= #
2323
- # For the following Regex, see this link:
2324
- #
2325
- # https://rubular.com/r/XC97c7i6sR
2326
- #
2327
- # ======================================================================= #
2328
- regex_to_use =
2329
- /<td>(\d{1,3})<\/td><td>(.{1,3}\s{1,3}\d{4})<\/td><td>\d+<\/td><td>\d+<\/td><td>\d+<\/td><td>\d+<\/td><\/tr><\/tbody><\/table>$/
2330
- _ = ''.dup
2331
- is_open = false
2332
- remote_dataset.each {|line|
2333
- if line.include? '<table id="stats_table" summary="GENBANK AND WGS'
2334
- _ << line
2335
- is_open = true
2336
- else
2337
- _ << line if is_open
2338
- if line.include? '</table>'
2339
- is_open = false
2340
- end
2341
- end
2342
- }
2343
- _ =~ regex_to_use # Match the regex against the substring assigned to _.
2344
- version = $1.to_s.dup
2345
- month_and_year = $2.to_s.dup
2346
- erev 'The current Genbank version is: '+simp(version)+
2347
- rev+' (released on '+simp(month_and_year)+rev+')'
2348
- end
2349
-
2350
- # ========================================================================= #
2351
- # === show_copyright_clause
2352
- #
2353
- # This method will simply show the licence used for the project.
2354
- #
2355
- # This has to be updated manually, though; and since the licence
2356
- # may change one day, I will keep track when this method has been
2357
- # last modified, which is on the 28.04.2020 (28th April, 2020).
2358
- # ========================================================================= #
2359
- def show_copyright_clause
2360
- e
2361
- erev 'This project is free software, licensed under the LGPL-2.0 license.'
2362
- erev 'No "any later clause"; LGPL-2.0 applies to it.'
2363
- e
2364
- erev ' Copyright: Robert A. Heiler (2010-2020 and later)'
2365
- e
2366
- erev 'The biomart component is licensed under the MIT license and is'
2367
- erev 'written by Darren Oakley. The MIT license is retained for the'
2368
- erev 'Biomart component.'
2369
- e
2370
- erev '(Note that the bioroebe project used to be under the GPL licence'
2371
- erev 'before some time; see the homepage of this gem for the explanation'
2372
- erev 'as to why a switch occurred towards LGPL.)'
2373
- end
2374
-
2375
- # ========================================================================= #
2376
- # === report_n_proteins_registered_in_swiss_prot
2377
- #
2378
- # This method will report how many proteins are registered in swiss-prot.
2379
- #
2380
- # Invoke this method like so:
2381
- #
2382
- # swiss-prot?
2383
- #
2384
- # ========================================================================= #
2385
- def report_n_proteins_registered_in_swiss_prot
2386
- regex_to_use = /contains (\d+) sequence entries/ # See: http://rubular.com/r/Bl9tHfheEx
2387
- url = 'https://web.expasy.org/docs/relnotes/relstat.html'
2388
- dataset = open(url).read
2389
- dataset =~ regex_to_use
2390
- n_registered_proteins = $1.to_s.dup
2391
- erev 'There are '+simp(n_registered_proteins)+rev+' registered '\
2392
- 'proteins in the Swiss-Prot database.'
2393
- erev "The URL used to determine this was: "\
2394
- "#{simp(url)}"
2395
- end
2396
-
2397
-
2398
- # ========================================================================= #
2399
- # === report_whether_readline_is_available
2400
- # ========================================================================= #
2401
- def report_whether_readline_is_available
2402
- erev 'Is readline available? '+
2403
- slateblue(
2404
- verbose_truth(
2405
- (Object.const_defined? :Readline)
2406
- )
2407
- )
2408
- end
2409
-
2410
- require 'bioroebe/dotplots/advanced_dotplot.rb'
2411
- # ========================================================================= #
2412
- # === show_2D_dotplot
2413
- # ========================================================================= #
2414
- def show_2D_dotplot(
2415
- string1 = nil, string2 = nil
2416
- )
2417
- if string1.nil? and string2.nil?
2418
- erev 'You want to use a dotplot.'
2419
- erev 'Please provide the first string, which will be on the left side:'
2420
- string1 = $stdin.gets.chomp
2421
- erev 'Please provide the second string, which will be on the top side:'
2422
- string2 = $stdin.gets.chomp
2423
- end
2424
- ::Bioroebe::AdvancedDotplot.new(string1, string2)
2425
- end
2426
-
2427
- # ========================================================================= #
2428
- # === show_reverse_dna_string
2429
- #
2430
- # This method will simply show the DNA sequence reversed.
2431
- # ========================================================================= #
2432
- def show_reverse_dna_string
2433
- erev padding?+
2434
- leading_five_prime+
2435
- sfancy(return_reverse_dna_string)+
2436
- rev+
2437
- trailing_three_prime
2438
- end
2439
-
2440
- # ========================================================================= #
2441
- # === show_download_dir
2442
- # ========================================================================= #
2443
- def show_download_dir
2444
- erev ::Bioroebe.download_directory?
2445
- end
2446
-
2447
- # ========================================================================= #
2448
- # === show_this_sequence_padded
2449
- #
2450
- # Usage example:
2451
- #
2452
- # show_this_sequence_padded ATGACTTAGCCACAACTGCATGCATATGCATGACTGACT
2453
- #
2454
- # ========================================================================= #
2455
- def show_this_sequence_padded(
2456
- i = dna_sequence_object?
2457
- )
2458
- if i.is_a? Array and i.empty?
2459
- i << dna_sequence_object?
2460
- end
2461
- if i.is_a? Array
2462
- i = i.join
2463
- end
2464
- # ======================================================================= #
2465
- # First, split it into an array of 80 characters each.
2466
- # ======================================================================= #
2467
- array = i.scan(/.{,80}/).reject {|entry| entry.empty? }
2468
- array.each {|entry|
2469
- erev entry
2470
- }
2471
- end
2472
-
2473
- require 'bioroebe/enzymes/restriction_enzymes_file.rb'
2474
- # ========================================================================= #
2475
- # === show_all_yaml_files
2476
- #
2477
- # We show which yaml files we will use here.
2478
- # ========================================================================= #
2479
- def show_all_yaml_files
2480
- erev 'The file that holds our restriction enzymes can be found here:'
2481
- e
2482
- erev " #{sfile(::Bioroebe.restriction_enzymes_file)}"
2483
- e
2484
- end
2485
-
2486
- # ========================================================================= #
2487
- # === show_resources_about_the_horseradish_peroxidase
2488
- # ========================================================================= #
2489
- def show_resources_about_the_horseradish_peroxidase
2490
- e 'https://www.ncbi.nlm.nih.gov/gene/?term=%22Horseradish+Peroxidase%22'
2491
- e 'https://www.ncbi.nlm.nih.gov/gene/836533'
2492
- e 'Fasta: https://www.ncbi.nlm.nih.gov/nuccore/NC_003076.8?report=fasta&from=25659257&to=25661007&strand=true'
2493
- end
2494
-
2495
- # ========================================================================= #
2496
- # === report_whether_we_will_make_use_of_expand_cd_aliases
2497
- # ========================================================================= #
2498
- def report_whether_we_will_make_use_of_expand_cd_aliases
2499
- erev Bioroebe::VerboseTruth[use_expand_cd_aliases?]
2500
- end
2501
-
2502
- # ========================================================================= #
2503
- # === report_useful_packages_installed
2504
- #
2505
- # This aggregate method can be used to report versions that may be
2506
- # installed on the given system, e. g. science-based projects and
2507
- # similar variants.
2508
- # ========================================================================= #
2509
- def report_useful_packages_installed
2510
- try_to_report_the_version_of_viennarna
2511
- try_to_report_the_version_of_bedtools
2512
- end
2513
-
2514
- # ========================================================================= #
2515
- # === try_to_report_the_version_of_viennarna
2516
- #
2517
- # This method can be used to see the version of ViennaRNA, if it is
2518
- # installed at all.
2519
- # ========================================================================= #
2520
- def try_to_report_the_version_of_viennarna
2521
- result = `RNAplfold --version 2>&1`
2522
- if result.include? 'command not found'
2523
- e
2524
- erev 'ViennaRNA does not appear to be installed / available.'
2525
- e
2526
- if is_on_roebe?
2527
- erev 'You may be able to install it via:'
2528
- e
2529
- erev ' rbt viennarna'
2530
- e
2531
- end
2532
- else
2533
- version = result.sub(/RNAplfold/,'').strip.to_s
2534
- erev 'The version of ViennaRNA is: '+
2535
- orange(version)+rev
2536
- end
2537
- end
2538
-
2539
- # ========================================================================= #
2540
- # === report_current_working_directory
2541
- # ========================================================================= #
2542
- def report_current_working_directory
2543
- erev 'We are in the directory:'
2544
- erev " #{sdir(return_working_directory)}"
2545
- end
2546
-
2547
- # ========================================================================= #
2548
- # === report_which_yaml_engine_is_in_use
2549
- # ========================================================================= #
2550
- def report_which_yaml_engine_is_in_use
2551
- erev 'The yaml engine in use is: '+
2552
- sfancy(::Bioroebe.use_which_yaml_engine?)+
2553
- rev
2554
- end
2555
-
2556
- begin
2557
- require 'directory_paradise'
2558
- rescue LoadError; end
2559
- # ========================================================================= #
2560
- # === show_file_listing
2561
- #
2562
- # Make use of DirectoryContent to show the content of a file.
2563
- #
2564
- # To invoke this method from within the Bioroebe::Shell, do:
2565
- #
2566
- # ll
2567
- #
2568
- # ========================================================================= #
2569
- def show_file_listing(
2570
- from_this_directory = Dir.pwd
2571
- )
2572
- _ = DirectoryParadise::Report.new(from_this_directory, :dont_run_yet)
2573
- _.dont_report_total_filesize
2574
- _.disable_colours unless use_colours?
2575
- _.run
2576
- end
2577
-
2578
- # ========================================================================= #
2579
- # === try_to_report_the_version_of_bedtools
2580
- # ========================================================================= #
2581
- def try_to_report_the_version_of_bedtools
2582
- result = `bedtools --version 2>&1`
2583
- if result.include? 'command not found'
2584
- e
2585
- erev 'The bedtools do not appear to be installed / available.'
2586
- e
2587
- if is_on_roebe?
2588
- erev 'You may be able to install it via:'
2589
- e
2590
- erev ' rbt bedtools'
2591
- e
2592
- end
2593
- else
2594
- version = result.sub(/bedtools/,'').strip.to_s.delete('v')
2595
- erev "The version of bedtools is: "\
2596
- "#{orange(version)}#{rev}"
2597
- end
2598
- end
2599
-
2600
- # ========================================================================= #
2601
- # === three_to_one
2602
- #
2603
- # This method will translate, and output, a three-letter aminoacid
2604
- # into the corresponding single-letter code.
2605
- #
2606
- # Invocation example:
2607
- #
2608
- # three_to_one Thr Thr Glu Ala Val Glu Ser Thr Val Ala Thr Leu Glu Asp Ser # => T T E A V E S T V A T L E D S
2609
- # 3to1 ARG-ALA-SER-LEU-PHE-TRP-LYS-HIS-ASN-SER-VAL-LEU-ILE-VAL-PRO
2610
- #
2611
- # ========================================================================= #
2612
- def three_to_one(i)
2613
- if i.is_a? Array
2614
- i = i.join('-').strip
2615
- end
2616
- e ::Bioroebe.three_to_one(i).strip
2617
- end
2618
-
2619
- require 'bioroebe/codons/codons.rb'
2620
- # ========================================================================= #
2621
- # === show_codons_of_this_aminoacid_or_show_kazusa_codon
2622
- #
2623
- # This method can be used to output which codon codes for a specific
2624
- # aminoacid.
2625
- #
2626
- # The input to this method should be a specific codon, such as ATG or
2627
- # GGC and so forth.
2628
- #
2629
- # If no input is provided, we will instead show the webpage of
2630
- # kazusa.
2631
- #
2632
- # Invocation examples:
2633
- #
2634
- # codon? ATG # => M
2635
- # codon? AUG # => M
2636
- #
2637
- # ========================================================================= #
2638
- def show_codons_of_this_aminoacid_or_show_kazusa_codon(i = nil)
2639
- if i.is_a? Array
2640
- i = i.first
2641
- end
2642
- if i # If the user provided input, we check it.
2643
- # ===================================================================== #
2644
- # Next, find all codons for the given aminoacid.
2645
- # ===================================================================== #
2646
- e ::Bioroebe.codon_to_aminoacid(i)
2647
- else
2648
- erev "The URL is at: "\
2649
- "#{simp('http://www.kazusa.or.jp/codon/')}"
2650
- end
2651
- end
2652
-
2653
- # ========================================================================= #
2654
- # === return_reverse_dna_string
2655
- # ========================================================================= #
2656
- def return_reverse_dna_string
2657
- complement_sequence?.reverse
2658
- end
2659
-
2660
- # ========================================================================= #
2661
- # === showorf (showorf tag)
2662
- #
2663
- # Use this method to show the open reading frame of a given sequence.
2664
- #
2665
- # We can also use it to selectively show a certain frame, such as
2666
- # frame2. See class Bioroebe::ShowOrf for this.
2667
- #
2668
- # Note that in May 2020 (10.05.2020) class Bioroebe::ShowOrf here
2669
- # was replaced with
2670
- # ========================================================================= #
2671
- def showorf(
2672
- i = dna_sequence_object?,
2673
- show_how_many_frames = :show_three_frames
2674
- )
2675
- i = dna_sequence_object? if i.nil?
2676
- i = dna_sequence_object? if i.is_a?(Array) and i.empty?
2677
- display_open_reading_frames(i) { show_how_many_frames }
2678
- end
2679
-
2680
- # ========================================================================= #
2681
- # === display_open_reading_frames
2682
- #
2683
- # Invocation example:
2684
- #
2685
- # display_open_reading_frames ATGAGCAAGGCCGACTACGAGAAG
2686
- #
2687
- # ========================================================================= #
2688
- def display_open_reading_frames(
2689
- i = dna_sequence_object?, &block
2690
- )
2691
- i = i.first if i.is_a? Array
2692
- i = dna_sequence_object? if i.nil?
2693
- i = dna_sequence_object? if i.empty?
2694
- require 'bioroebe/utility_scripts/display_open_reading_frames/display_open_reading_frames.rb'
2695
- ::Bioroebe::DisplayOpenReadingFrames.new(i, &block)
2696
- end
2697
-
2698
- require 'bioroebe/fasta_and_fastq/show_fasta_headers.rb'
2699
- # ========================================================================= #
2700
- # === show_fasta_headers
2701
- #
2702
- # Just show the fasta headers.
2703
- # ========================================================================= #
2704
- def show_fasta_headers(i)
2705
- ::Bioroebe::ShowFastaHeaders.new(i) # Delegate into class Bioroebe::ShowFastaHeaders.
2706
- end
2707
-
2708
- # ========================================================================= #
2709
- # === show_commandline_options
2710
- #
2711
- # Show the available commandline options.
2712
- #
2713
- # To invoke this method from the commandline, do:
2714
- #
2715
- # bioroebe --help
2716
- #
2717
- # ========================================================================= #
2718
- def show_commandline_options
2719
- e
2720
- ecomment(' --silent # perform a silent startup')
2721
- ecomment(' --sequence # use this nucleotide sequence on '\
2722
- 'startup; can be a number too such as 150')
2723
- ecomment(' --n_fasta_entries # report how many fasta '\
2724
- 'entries are in this directory')
2725
- ecomment(' --disable-opn # permanently disable opn')
2726
- ecomment(' --random-aminoacids=33 # "generate" 33 random amino acids and display them')
2727
- ecomment(' --n-aminoacids=33 # an alias to the ^^^ above')
2728
- ecomment(' --protein-to-dna # convert protein-aminoacid '\
2729
- 'sequence back to DNA')
2730
- e
2731
- exit
2732
- end
2733
-
2734
- # ========================================================================= #
2735
- # === show_codon_table
2736
- # ========================================================================= #
2737
- def show_codon_table(i = nil)
2738
- if i and i.is_a?(Array) and i.empty?
2739
- i << 1 # Default to the vertebrate codon table in this case.
2740
- end
2741
- ShowThisCodonTable.new(i)
2742
- end
2743
-
2744
- # ========================================================================= #
2745
- # === show_rna_sequence
2746
- #
2747
- # Use this method to convert a given sequence to RNA.
2748
- # ========================================================================= #
2749
- def show_rna_sequence(
2750
- i = sequence_object?.to_rna
2751
- )
2752
- i = sequence_object?.to_rna if i.nil?
2753
- i = i.to_str if i.respond_to? :to_str
2754
- if i.include? 'T'
2755
- i.tr!('T','U')
2756
- end
2757
- display_nucleotide_object?.display(i) {{ use_this_as_padding: lpad? }}
2758
- end
2759
-
2760
- # ========================================================================= #
2761
- # === report_size_of
2762
- # ========================================================================= #
2763
- def report_size_of(
2764
- i = nil
2765
- )
2766
- if i.nil?
2767
- i = dna_sequence_object?
2768
- end
2769
- if i
2770
- erev "This sequence contains #{sfancy(i.size.to_s)}#{rev} nucleotides."
2771
- else
2772
- report_size_of_main_string
2773
- end
2774
- end
2775
-
2776
- # ========================================================================= #
2777
- # === display_glycolysis_pathway
2778
- #
2779
- # This method will show the glycolysis Pathway.
2780
- # ========================================================================= #
2781
- def display_glycolysis_pathway
2782
- array = Pathways.glycolysis_pathway # Obtain the glyclosis pathway, as Array.
2783
- if Object.const_defined? :Display
2784
- Display.display(array, ')')
2785
- else
2786
- array.each {|entry| e ' - '+entry }
2787
- end
2788
- end
2789
-
2790
- # ========================================================================= #
2791
- # === show_the_weight_of_some_common_proteins
2792
- # ========================================================================= #
2793
- def show_the_weight_of_some_common_proteins(
2794
- use_this_file = FILE_WEIGHT_OF_COMMON_PROTEINS
2795
- )
2796
- erev 'Showing the weight of some common proteins next (in kDa):'
2797
- e
2798
- dataset = File.readlines(use_this_file).select {|line|
2799
- line.include? ' # '
2800
- }
2801
- dataset.each {|line|
2802
- splitted = line.split(':')
2803
- key = splitted[0]
2804
- value = splitted[1 .. -1].join(' ').strip
2805
- erev " #{(key+':').ljust(25)} "\
2806
- "#{lightblue((value.to_s+' kDa').rjust(12))}"
2807
- }
2808
- e
2809
- end
2810
-
2811
- # ========================================================================= #
2812
- # === show_protein_composition
2813
- #
2814
- # Delegate towards class CountAmountOfAminoacids
2815
- # ========================================================================= #
2816
- def show_protein_composition(i)
2817
- ::Bioroebe::CountAmountOfAminoacids.new(i) # bl $BIOROEBE/count_amount_of_aminoacids.rb
2818
- end
2819
-
2820
- # ========================================================================= #
2821
- # === show_all_deducible_aminoacid_sequences
2822
- #
2823
- # Note that if the string is too short, we won't display the other frames.
2824
- #
2825
- # If the third argument, `show_translations_aligned`, is set to
2826
- # true then we will additionally display all 3 frames aligned
2827
- # one to another.
2828
- #
2829
- # Usage example:
2830
- #
2831
- # toproteins AUG
2832
- # toproteins AUGAUGUUGAAU
2833
- # toproteins AUG-AUG-UUG-AAA-GGU-CGC-AAU-STOP
2834
- #
2835
- # ========================================================================= #
2836
- def show_all_deducible_aminoacid_sequences(
2837
- i = dna_sequence_as_string?,
2838
- also_show_numbers = true,
2839
- show_translations_aligned = true
2840
- )
2841
- if i and i.is_a?(Array) and i.empty?
2842
- i = dna_sequence_as_string?
2843
- end
2844
- i = dna_sequence_as_string? if i.nil?
2845
- i = i.join(' ').strip if i.is_a? Array
2846
- i = i.to_s.dup # To avoid nil-operations.
2847
- i.delete!('-') if i.include? '-'
2848
- if i.empty? # This means that the user has not yet assigned a DNA sequence.
2849
- erev 'Please assign some DNA sequence. You can also randomly generate'
2850
- erev 'a new sequence via "random".'
2851
- return
2852
- end
2853
- cliner
2854
- erev N+'The amino acid sequence for '+sfancy('Frame 1')+rev+' is: '
2855
- e
2856
- converted_sequence_for_frame_1 = translate_dna_into_aminoacid(i).to_s
2857
- erev ' '+converted_sequence_for_frame_1+N+N
2858
- # ======================================================================= #
2859
- # === Also show numbers
2860
- # ======================================================================= #
2861
- if also_show_numbers
2862
- verbose_report_numbered_amino_acid_sequence(converted_sequence_for_frame_1)
2863
- end
2864
- cliner
2865
- if i && i.size > 2
2866
- erev N+N+'The amino acid sequence for '+sfancy('Frame 2')+rev+' is: '
2867
- e
2868
- converted_sequence_for_frame_2 = translate_dna_into_aminoacid_frame2(i)
2869
- erev ' '+converted_sequence_for_frame_2+N+N
2870
- if also_show_numbers
2871
- verbose_report_numbered_amino_acid_sequence(converted_sequence_for_frame_2, '2')
2872
- end
2873
- cliner
2874
- e
2875
- erev N+N+'The amino acid sequence for '+sfancy('Frame 3')+rev+' is: '
2876
- e
2877
- converted_sequence_for_frame_3 = translate_dna_into_aminoacid_frame3(i)
2878
- erev ' '+converted_sequence_for_frame_3+N+N
2879
- if also_show_numbers
2880
- verbose_report_numbered_amino_acid_sequence(converted_sequence_for_frame_3, '3')
2881
- end
2882
- e
2883
- cliner
2884
- if show_translations_aligned
2885
- showorf(i) # Delegate into class Showorf here.
2886
- end
2887
- end
2888
- end
2889
-
2890
- # ========================================================================= #
2891
- # === show_blosum_matrix
2892
- #
2893
- # Delegate towards bioroebe here, and invoke the .blosum() method.
2894
- # ========================================================================= #
2895
- def show_blosum_matrix
2896
- erev 'Showing the blosum matrix next:'
2897
- require 'bioroebe/blosum/blosum.rb'
2898
- Bioroebe::Blosum.show_matrix
2899
- end
2900
-
2901
- end; end