bioroebe 0.12.24 → 0.13.31

Sign up to get free protection for your applications and to get access to all the features.
Files changed (503) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE.md +7 -8
  3. data/README.md +566 -354
  4. data/bin/all_positions_of_this_nucleotide +1 -1
  5. data/bin/aminoacid_frequencies +1 -1
  6. data/bin/automatically_rename_this_fasta_file +1 -1
  7. data/bin/base_composition +1 -1
  8. data/bin/batch_create_windows_executables +1 -1
  9. data/bin/bioroebe +12 -1
  10. data/bin/bioroebe_cat +7 -0
  11. data/bin/calculate_exponential_growth +7 -0
  12. data/bin/calculate_n50_value +1 -1
  13. data/bin/calculate_the_frequencies_of_this_species +7 -0
  14. data/bin/chunked_display +1 -1
  15. data/bin/codon_frequency +1 -1
  16. data/bin/codon_to_aminoacid +1 -1
  17. data/bin/colourize_this_fasta_sequence +1 -1
  18. data/bin/complementary_dna_strand +1 -1
  19. data/bin/complementary_rna_strand +1 -1
  20. data/bin/consensus_sequence +1 -1
  21. data/bin/dna_to_rna +1 -1
  22. data/bin/downcase_chunked_display +1 -1
  23. data/bin/download_this_pdb +1 -1
  24. data/bin/fasta_index +1 -1
  25. data/bin/fetch_data_from_uniprot +1 -1
  26. data/bin/filter_away_invalid_nucleotides +1 -1
  27. data/bin/find_substring +1 -1
  28. data/bin/input_as_dna +1 -1
  29. data/bin/is_palindrome +1 -1
  30. data/bin/leading_five_prime +1 -1
  31. data/bin/longest_ORF +1 -1
  32. data/bin/longest_substring +1 -1
  33. data/bin/open_reading_frames +1 -1
  34. data/bin/partner_nucleotide +1 -1
  35. data/bin/plain_palindrome +1 -1
  36. data/bin/random_dna_sequence +1 -1
  37. data/bin/random_sequence +1 -1
  38. data/bin/raw_hamming_distance +1 -1
  39. data/bin/return_longest_substring_via_LCS_algorithm +1 -1
  40. data/bin/reverse_sequence +1 -1
  41. data/bin/short_aminoacid_letter_from_long_aminoacid_name +1 -1
  42. data/bin/show_atomic_composition +1 -1
  43. data/bin/show_fasta_header +1 -1
  44. data/bin/show_nucleotide_sequence +1 -1
  45. data/bin/show_this_dna_sequence +1 -1
  46. data/bin/show_time_now +7 -0
  47. data/bin/sort_aminoacid_based_on_its_hydrophobicity +1 -1
  48. data/bin/strict_filter_away_invalid_aminoacids +1 -1
  49. data/{lib/bioroebe/base/reset.rb → bin/three_delimiter} +9 -6
  50. data/bin/three_to_one +1 -1
  51. data/bin/to_rna +1 -1
  52. data/bin/trailing_three_prime +1 -1
  53. data/bin/upcase_this_aminoacid_sequence_and_remove_numbers +1 -1
  54. data/bioroebe.gemspec +6 -7
  55. data/doc/README.gen +534 -322
  56. data/doc/blosum/blosum.md +4 -0
  57. data/doc/compatibility/BIO_PHP.md +20 -18
  58. data/doc/compatibility/README.md +2 -3
  59. data/doc/compatibility/emboss.md +5 -3
  60. data/doc/{extensive_usage_example.md → extensive_usage_example/extensive_usage_example.md} +4 -2
  61. data/doc/{instructions_for_the_taxonomy_subproject.md → instructions_for_the_taxonomy_subproject/instructions_for_the_taxonomy_subproject.md} +36 -33
  62. data/doc/{legacy_paths.md → legacy_paths/legacy_paths.md} +3 -3
  63. data/doc/statistics/statistics.md +12 -10
  64. data/doc/todo/bioroebe_GUI_todo.md +6 -1
  65. data/doc/todo/bioroebe_java_todo.md +3 -2
  66. data/doc/todo/bioroebe_todo.md +328 -310
  67. data/doc/{using_biomart.md → using_biomart/using_biomart.md} +7 -3
  68. data/lib/bioroebe/abstract/features.rb +0 -0
  69. data/lib/bioroebe/aminoacids/aminoacid_substitution.rb +1 -1
  70. data/lib/bioroebe/aminoacids/aminoacids_mass_table.rb +3 -1
  71. data/lib/bioroebe/aminoacids/codon_percentage.rb +18 -10
  72. data/lib/bioroebe/aminoacids/create_random_aminoacids.rb +5 -2
  73. data/lib/bioroebe/aminoacids/deduce_aminoacid_sequence.rb +90 -64
  74. data/lib/bioroebe/aminoacids/display_aminoacid_table.rb +1 -3
  75. data/lib/bioroebe/aminoacids/show_hydrophobicity.rb +2 -2
  76. data/lib/bioroebe/annotations/create_annotation_format.rb +2 -2
  77. data/lib/bioroebe/base/base.rb +101 -6
  78. data/lib/bioroebe/base/base_module/base_module.rb +9 -1
  79. data/lib/bioroebe/base/colours.rb +3 -0
  80. data/lib/bioroebe/base/colours_for_base/colours_for_base.rb +80 -44
  81. data/lib/bioroebe/base/commandline_application/README.md +1 -1
  82. data/lib/bioroebe/base/commandline_application/commandline_application.rb +661 -22
  83. data/lib/bioroebe/base/commandline_application/commandline_arguments.rb +2 -1
  84. data/lib/bioroebe/base/infer_the_namespace_module/infer_the_namespace_module.rb +37 -0
  85. data/lib/bioroebe/base/internal_hash_module/internal_hash_module.rb +1 -6
  86. data/lib/bioroebe/base/prototype/prototype.rb +155 -14
  87. data/lib/bioroebe/biomart/attribute.rb +1 -1
  88. data/lib/bioroebe/biomart/biomart.rb +8 -9
  89. data/lib/bioroebe/biomart/server.rb +1 -1
  90. data/lib/bioroebe/blosum/blosum.rb +2 -2
  91. data/lib/bioroebe/calculate/calculate_blosum_score.rb +5 -3
  92. data/lib/bioroebe/calculate/calculate_gc_content.rb +1 -1
  93. data/lib/bioroebe/calculate/calculate_levensthein_distance.rb +5 -3
  94. data/lib/bioroebe/calculate/calculate_melting_temperature.rb +2 -10
  95. data/lib/bioroebe/calculate/calculate_melting_temperature_for_more_than_thirteen_nucleotides.rb +6 -15
  96. data/lib/bioroebe/calculate/calculate_the_position_specific_scoring_matrix.rb +4 -2
  97. data/lib/bioroebe/cell/cell.rb +3 -2
  98. data/lib/bioroebe/cell/specialized_cells/B_cell.rb +60 -0
  99. data/lib/bioroebe/cell/specialized_cells/Macrophage.rb +60 -0
  100. data/lib/bioroebe/cell/specialized_cells/README.md +5 -0
  101. data/lib/bioroebe/cell/specialized_cells/T_cell.rb +60 -0
  102. data/lib/bioroebe/cleave_and_digest/cleave.rb +3 -1
  103. data/lib/bioroebe/cleave_and_digest/digestion.rb +1 -1
  104. data/lib/bioroebe/codon_tables/frequencies/10090_Mus_musculus.yml +93 -0
  105. data/lib/bioroebe/codon_tables/frequencies/107243_Thlaspi_caerulescens.yml +72 -0
  106. data/lib/bioroebe/codon_tables/frequencies/parse_frequency_table.rb +2 -2
  107. data/lib/bioroebe/codons/codon_table.rb +10 -2
  108. data/lib/bioroebe/codons/codons.rb +3 -3
  109. data/lib/bioroebe/codons/convert_this_codon_to_that_aminoacid.rb +18 -15
  110. data/lib/bioroebe/codons/determine_optimal_codons.rb +1 -1
  111. data/lib/bioroebe/codons/possible_codons_for_this_aminoacid.rb +4 -2
  112. data/lib/bioroebe/codons/show_codon_tables.rb +1 -1
  113. data/lib/bioroebe/codons/show_codon_usage.rb +1 -2
  114. data/lib/bioroebe/codons/show_this_codon_table.rb +2 -2
  115. data/lib/bioroebe/codons/start_codons.rb +7 -3
  116. data/lib/bioroebe/colours/colour_schemes/README.md +1 -1
  117. data/lib/bioroebe/colours/colour_schemes/array_available_colour_schemes.rb +3 -3
  118. data/lib/bioroebe/colours/colour_schemes/colour_scheme.rb +3 -3
  119. data/lib/bioroebe/colours/colour_schemes/colour_scheme_demo.rb +4 -3
  120. data/lib/bioroebe/colours/colour_schemes/helix.rb +3 -1
  121. data/lib/bioroebe/colours/colour_schemes/hydropathy.rb +3 -1
  122. data/lib/bioroebe/colours/colour_schemes/score.rb +13 -2
  123. data/lib/bioroebe/colours/colour_schemes/strand.rb +3 -1
  124. data/lib/bioroebe/colours/colour_schemes/turn.rb +3 -1
  125. data/lib/bioroebe/colours/colour_schemes/zappo.rb +1 -1
  126. data/lib/bioroebe/{toplevel_methods/colourize_related_methods.rb → colours/colourize_related_code.rb} +1 -3
  127. data/lib/bioroebe/colours/colourize_sequence.rb +3 -1
  128. data/lib/bioroebe/colours/colours.rb +172 -15
  129. data/lib/bioroebe/configuration/configuration.rb +1 -1
  130. data/lib/bioroebe/constants/GUIs.rb +2 -2
  131. data/lib/bioroebe/constants/constants.rb +1349 -0
  132. data/lib/bioroebe/conversions/convert_aminoacid_to_dna.rb +8 -13
  133. data/lib/bioroebe/conversions/dna_to_aminoacid_sequence.rb +9 -3
  134. data/lib/bioroebe/count/count_amount_of_aminoacids.rb +11 -10
  135. data/lib/bioroebe/count/count_amount_of_nucleotides.rb +1 -1
  136. data/lib/bioroebe/count/count_at.rb +2 -1
  137. data/lib/bioroebe/databases/download_taxonomy_database.rb +1 -1
  138. data/lib/bioroebe/dotplots/advanced_dotplot.rb +2 -2
  139. data/lib/bioroebe/electron_microscopy/coordinate_analyzer.rb +2 -2
  140. data/lib/bioroebe/electron_microscopy/fix_pos_file.rb +2 -2
  141. data/lib/bioroebe/electron_microscopy/flipy.rb +2 -2
  142. data/lib/bioroebe/electron_microscopy/generate_em2em_file.rb +3 -11
  143. data/lib/bioroebe/electron_microscopy/parse_coordinates.rb +6 -6
  144. data/lib/bioroebe/electron_microscopy/read_file_xmd.rb +6 -6
  145. data/lib/bioroebe/electron_microscopy/simple_star_file_generator.rb +2 -2
  146. data/lib/bioroebe/enzymes/has_this_restriction_enzyme.rb +1 -1
  147. data/lib/bioroebe/enzymes/restriction_enzyme.rb +1 -1
  148. data/lib/bioroebe/enzymes/restriction_enzymes/statistics.rb +4 -3
  149. data/lib/bioroebe/enzymes/restriction_enzymes_file.rb +1 -1
  150. data/lib/bioroebe/enzymes/return_sequence_that_is_cut_via_restriction_enzyme.rb +4 -3
  151. data/lib/bioroebe/enzymes/show_restriction_enzymes.rb +3 -3
  152. data/lib/bioroebe/ext/main.cpp +0 -1
  153. data/lib/bioroebe/fasta_and_fastq/autocorrect_the_name_of_this_fasta_file.rb +3 -3
  154. data/lib/bioroebe/fasta_and_fastq/compact_fasta_file/compact_fasta_file.rb +1 -1
  155. data/lib/bioroebe/fasta_and_fastq/display_how_many_fasta_entries_are_in_this_directory.rb +1 -1
  156. data/lib/bioroebe/fasta_and_fastq/download_fasta.rb +8 -14
  157. data/lib/bioroebe/fasta_and_fastq/fasta_defline/fasta_defline.rb +1 -1
  158. data/lib/bioroebe/fasta_and_fastq/fasta_to_yaml/fasta_to_yaml.rb +1 -1
  159. data/lib/bioroebe/fasta_and_fastq/fastq_format_explainer.rb +1 -1
  160. data/lib/bioroebe/fasta_and_fastq/length_modifier/length_modifier.rb +1 -1
  161. data/lib/bioroebe/fasta_and_fastq/parse_fasta/parse_fasta.rb +37 -11
  162. data/lib/bioroebe/fasta_and_fastq/parse_fastq/parse_fastq.rb +2 -2
  163. data/lib/bioroebe/fasta_and_fastq/return_fasta_subsection_of_this_file.rb +1 -1
  164. data/lib/bioroebe/fasta_and_fastq/show_fasta_headers.rb +5 -13
  165. data/lib/bioroebe/fasta_and_fastq/show_fasta_statistics.rb +1 -1
  166. data/lib/bioroebe/fasta_and_fastq/simplify_fasta_header/simplify_fasta_header.rb +1 -1
  167. data/lib/bioroebe/fasta_and_fastq/split_this_fasta_file_into_chromosomes/reset.rb +3 -6
  168. data/lib/bioroebe/fasta_and_fastq/split_this_fasta_file_into_chromosomes/split_this_fasta_file_into_chromosomes.rb +3 -3
  169. data/lib/bioroebe/genbank/genbank_flat_file_format_generator.rb +20 -11
  170. data/lib/bioroebe/genome/genome.rb +1 -1
  171. data/lib/bioroebe/genomes/genome_pattern.rb +17 -16
  172. data/lib/bioroebe/genomes/genome_retriever.rb +4 -2
  173. data/lib/bioroebe/gui/experimental/snapgene/snapgene.rb +10 -13
  174. data/lib/bioroebe/gui/universal_widgets/alignment/alignment.rb +557 -0
  175. data/lib/bioroebe/gui/{gtk3 → universal_widgets}/aminoacid_composition/aminoacid_composition.rb +498 -198
  176. data/lib/bioroebe/gui/universal_widgets/anti_sense_strand/anti_sense_strand.rb +665 -0
  177. data/lib/bioroebe/gui/universal_widgets/blosum_matrix_viewer/blosum_matrix_viewer.rb +329 -0
  178. data/lib/bioroebe/gui/universal_widgets/calculate_cell_numbers_of_bacteria/calculate_cell_numbers_of_bacteria.rb +423 -0
  179. data/lib/bioroebe/gui/{gtk3 → universal_widgets}/controller/controller.rb +170 -118
  180. data/lib/bioroebe/gui/{gtk3 → universal_widgets}/dna_to_aminoacid_widget/dna_to_aminoacid_widget.rb +277 -215
  181. data/lib/bioroebe/gui/{shared_code/dna_to_reverse_complement_widget/dna_to_reverse_complement_widget_module.rb → universal_widgets/dna_to_reverse_complement_widget/dna_to_reverse_complement_widget.rb} +297 -107
  182. data/lib/bioroebe/gui/universal_widgets/fasta_table_widget/fasta_table_widget.rb +643 -0
  183. data/lib/bioroebe/gui/{gtk3 → universal_widgets}/format_converter/format_converter.rb +236 -164
  184. data/lib/bioroebe/gui/universal_widgets/gene/gene.rb +278 -0
  185. data/lib/bioroebe/gui/universal_widgets/hamming_distance/hamming_distance.rb +646 -0
  186. data/lib/bioroebe/gui/{shared_code/levensthein_distance/levensthein_distance_module.rb → universal_widgets/levensthein_distance/levensthein_distance.rb} +313 -88
  187. data/lib/bioroebe/gui/{gtk3 → universal_widgets}/nucleotide_analyser/nucleotide_analyser.rb +281 -189
  188. data/lib/bioroebe/gui/{gtk3 → universal_widgets}/parse_pdb_file/parse_pdb_file.rb +265 -149
  189. data/lib/bioroebe/gui/{gtk3 → universal_widgets}/primer_design_widget/primer_design_widget.rb +337 -263
  190. data/lib/bioroebe/gui/universal_widgets/protein_to_DNA/protein_to_DNA.rb +408 -0
  191. data/lib/bioroebe/gui/{gtk3 → universal_widgets}/random_sequence/random_sequence.rb +245 -187
  192. data/lib/bioroebe/gui/{gtk3 → universal_widgets}/restriction_enzymes/restriction_enzymes.rb +207 -137
  193. data/lib/bioroebe/gui/universal_widgets/shell/shell.rb +288 -0
  194. data/lib/bioroebe/gui/{gtk3/show_codon_table/misc.rb → universal_widgets/show_codon_table/show_codon_table.rb} +290 -110
  195. data/lib/bioroebe/gui/{shared_code/show_codon_usage/show_codon_usage_module.rb → universal_widgets/show_codon_usage/show_codon_usage.rb} +228 -47
  196. data/lib/bioroebe/gui/{gtk3 → universal_widgets}/sizeseq/sizeseq.rb +151 -69
  197. data/lib/bioroebe/gui/{gtk3 → universal_widgets}/three_to_one/three_to_one.rb +190 -127
  198. data/lib/bioroebe/gui/{gtk3 → universal_widgets}/www_finder/www_finder.rb +211 -152
  199. data/lib/bioroebe/images/images.html +953 -1170
  200. data/lib/bioroebe/images/misc/README.md +6 -0
  201. data/lib/bioroebe/images/misc/activation.avif +0 -0
  202. data/lib/bioroebe/images/misc/inhibition.avif +0 -0
  203. data/lib/bioroebe/images/misc/small_virus_logo.avif +0 -0
  204. data/lib/bioroebe/{constants/base_directory.rb → log_directory/log_directory.rb} +79 -59
  205. data/lib/bioroebe/matplotlib/matplotlib_generator.rb +1 -1
  206. data/lib/bioroebe/misc/quiz/three_letter_to_aminoacid.rb +1 -1
  207. data/lib/bioroebe/misc/ruler.rb +5 -5
  208. data/lib/bioroebe/misc/useful_formulas.rb +3 -3
  209. data/lib/bioroebe/ncbi/efetch.rb +1 -2
  210. data/lib/bioroebe/ngs/phred_quality_score_table.rb +3 -3
  211. data/lib/bioroebe/nucleotides/complementary_dna_strand.rb +3 -6
  212. data/lib/bioroebe/nucleotides/molecular_weight_of_nucleotides.rb +3 -3
  213. data/lib/bioroebe/nucleotides/most_likely_nucleotide_sequence_for_this_aminoacid_sequence.rb +6 -10
  214. data/lib/bioroebe/nucleotides/{show_nucleotide_sequence.rb → show_nucleotide_sequence/show_nucleotide_sequence.rb} +377 -255
  215. data/lib/bioroebe/palindromes/palindrome_2D_structure.rb +1 -1
  216. data/lib/bioroebe/palindromes/palindrome_finder.rb +1 -1
  217. data/lib/bioroebe/palindromes/palindrome_generator.rb +2 -10
  218. data/lib/bioroebe/parsers/biolang_parser.rb +1 -1
  219. data/lib/bioroebe/parsers/blosum_parser.rb +14 -19
  220. data/lib/bioroebe/parsers/genbank_parser.rb +2 -6
  221. data/lib/bioroebe/parsers/gff.rb +9 -9
  222. data/lib/bioroebe/parsers/parse_embl.rb +2 -6
  223. data/lib/bioroebe/parsers/stride_parser.rb +4 -12
  224. data/lib/bioroebe/patterns/analyse_glycosylation_pattern.rb +2 -2
  225. data/lib/bioroebe/patterns/is_this_sequence_a_EGF2_pattern.rb +6 -3
  226. data/lib/bioroebe/patterns/profile_pattern.rb +2 -2
  227. data/lib/bioroebe/patterns/rgg_scanner.rb +4 -2
  228. data/lib/bioroebe/{protein_structure → pdb_and_protein_structure}/alpha_helix.rb +2 -2
  229. data/lib/bioroebe/{pdb → pdb_and_protein_structure}/download_this_pdb.rb +2 -3
  230. data/lib/bioroebe/{pdb → pdb_and_protein_structure}/fetch_fasta_sequence_from_pdb.rb +4 -4
  231. data/lib/bioroebe/{protein_structure → pdb_and_protein_structure}/helical_wheel.rb +2 -2
  232. data/lib/bioroebe/{pdb → pdb_and_protein_structure}/parse_mmCIF_file.rb +1 -1
  233. data/lib/bioroebe/{pdb → pdb_and_protein_structure}/parse_pdb_file.rb +3 -3
  234. data/lib/bioroebe/{pdb → pdb_and_protein_structure}/report_secondary_structures_from_this_pdb_file.rb +3 -3
  235. data/lib/bioroebe/project/project.rb +3 -1
  236. data/lib/bioroebe/raw_sequence/README.md +8 -8
  237. data/lib/bioroebe/raw_sequence/raw_sequence.rb +11 -2
  238. data/lib/bioroebe/regexes/regexes.rb +1 -2
  239. data/lib/bioroebe/requires/commandline_application.rb +3 -1
  240. data/lib/bioroebe/requires/require_all_pdb_files.rb +1 -1
  241. data/lib/bioroebe/requires/require_all_taxonomy_files.rb +1 -1
  242. data/lib/bioroebe/requires/require_all_utility_scripts_files.rb +10 -0
  243. data/lib/bioroebe/requires/require_colours.rb +1 -1
  244. data/lib/bioroebe/requires/require_the_bioroebe_project.rb +5 -7
  245. data/lib/bioroebe/requires/require_the_bioroebe_sinatra_components.rb +1 -1
  246. data/lib/bioroebe/requires/require_the_constants.rb +2 -14
  247. data/lib/bioroebe/requires/require_yaml.rb +7 -5
  248. data/lib/bioroebe/sequence/alignment.rb +1 -1
  249. data/lib/bioroebe/sequence/dna.rb +4 -2
  250. data/lib/bioroebe/sequence/nucleotide_module/nucleotide_module.rb +22 -8
  251. data/lib/bioroebe/sequence/protein.rb +2 -2
  252. data/lib/bioroebe/sequence/reverse_complement.rb +3 -3
  253. data/lib/bioroebe/sequence/rna.rb +9 -8
  254. data/lib/bioroebe/sequence/sequence.rb +3 -3
  255. data/lib/bioroebe/shell/configuration/additionally_set_xorg_buffer.yml +0 -0
  256. data/lib/bioroebe/shell/configuration/may_we_show_the_startup_information.yml +0 -0
  257. data/lib/bioroebe/shell/configuration/upcase_nucleotides.yml +0 -0
  258. data/lib/bioroebe/shell/configuration/use_silent_startup.yml +1 -1
  259. data/lib/bioroebe/shell/help/class.rb +68 -19
  260. data/lib/bioroebe/shell/menu.rb +5244 -5322
  261. data/lib/bioroebe/shell/{readline/readline.rb → readline.rb} +1 -3
  262. data/lib/bioroebe/shell/shell.rb +11240 -453
  263. data/lib/bioroebe/siRNA/siRNA.rb +3 -3
  264. data/lib/bioroebe/{gui/shared_code/blosum_matrix_viewer/blosum_matrix_viewer_module.rb → sinatra/sinatra_interface.rb} +28 -19
  265. data/lib/bioroebe/{www/sinatra/sinatra.rb → sinatra/sinatra_wrapper.rb} +731 -754
  266. data/lib/bioroebe/string_matching/find_longest_substring.rb +2 -10
  267. data/lib/bioroebe/string_matching/find_longest_substring_via_LCS_algorithm.rb +4 -14
  268. data/lib/bioroebe/string_matching/hamming_distance.rb +11 -10
  269. data/lib/bioroebe/string_matching/levensthein.rb +5 -17
  270. data/lib/bioroebe/string_matching/simple_string_comparer.rb +48 -4
  271. data/lib/bioroebe/string_matching/smith_waterman.rb +11 -6
  272. data/lib/bioroebe/svg/glyph.rb +4 -1
  273. data/lib/bioroebe/svg/mini_feature.rb +1 -1
  274. data/lib/bioroebe/svg/page.rb +18 -7
  275. data/lib/bioroebe/svg/svgee.rb +22 -13
  276. data/lib/bioroebe/svg/track.rb +20 -4
  277. data/lib/bioroebe/taxonomy/chart.rb +2 -2
  278. data/lib/bioroebe/taxonomy/class_methods.rb +5 -6
  279. data/lib/bioroebe/taxonomy/constants.rb +1 -1
  280. data/lib/bioroebe/taxonomy/info/info.rb +1 -1
  281. data/lib/bioroebe/taxonomy/info/is_dna.rb +1 -1
  282. data/lib/bioroebe/taxonomy/interactive.rb +1 -2
  283. data/lib/bioroebe/taxonomy/menu.rb +1 -1
  284. data/lib/bioroebe/taxonomy/node.rb +1 -1
  285. data/lib/bioroebe/taxonomy/parse_fasta.rb +4 -2
  286. data/lib/bioroebe/taxonomy/shared.rb +5 -4
  287. data/lib/bioroebe/taxonomy/taxonomy.rb +2 -4
  288. data/lib/bioroebe/toplevel_methods/fasta_and_fastq.rb +3 -45
  289. data/lib/bioroebe/toplevel_methods/{is_on_roebe.rb → roebe.rb} +1 -11
  290. data/lib/bioroebe/toplevel_methods/taxonomy.rb +6 -12
  291. data/lib/bioroebe/toplevel_methods/toplevel_methods.rb +5568 -0
  292. data/lib/bioroebe/utility_scripts/align_open_reading_frames.rb +4 -3
  293. data/lib/bioroebe/utility_scripts/analyse_local_dataset.rb +2 -2
  294. data/lib/bioroebe/utility_scripts/check_for_mismatches/check_for_mismatches.rb +16 -9
  295. data/lib/bioroebe/utility_scripts/compacter/compacter.rb +4 -2
  296. data/lib/bioroebe/utility_scripts/compare_these_two_sequences_via_blosum.rb +119 -0
  297. data/lib/bioroebe/utility_scripts/compseq/compseq.rb +11 -9
  298. data/lib/bioroebe/utility_scripts/{consensus_sequence.rb → consensus_sequence/consensus_sequence.rb} +13 -4
  299. data/lib/bioroebe/utility_scripts/{create_batch_entrez_file.rb → create_batch_entrez_file/create_batch_entrez_file.rb} +5 -5
  300. data/lib/bioroebe/utility_scripts/{determine_antigenic_areas.rb → determine_antigenic_areas/determine_antigenic_areas.rb} +5 -5
  301. data/lib/bioroebe/utility_scripts/{determine_missing_nucleotides_percentage.rb → determine_missing_nucleotides_percentage/determine_missing_nucleotides_percentage.rb} +16 -15
  302. data/lib/bioroebe/utility_scripts/display_open_reading_frames/display_open_reading_frames.rb +7 -7
  303. data/lib/bioroebe/utility_scripts/display_open_reading_frames/misc.rb +1 -1
  304. data/lib/bioroebe/utility_scripts/display_open_reading_frames/report.rb +2 -0
  305. data/lib/bioroebe/utility_scripts/{dot_alignment.rb → dot_alignment/dot_alignment.rb} +3 -3
  306. data/lib/bioroebe/utility_scripts/{download_files_from_rebase.rb → download_files_from_rebase/download_files_from_rebase.rb} +5 -5
  307. data/lib/bioroebe/utility_scripts/fetch_data_from_uniprot/fetch_data_from_uniprot.rb +269 -0
  308. data/lib/bioroebe/utility_scripts/find_gene.rb +4 -2
  309. data/lib/bioroebe/utility_scripts/{mirror_repeat.rb → mirror_repeat/mirror_repeat.rb} +5 -5
  310. data/lib/bioroebe/utility_scripts/move_file_to_its_correct_location.rb +3 -3
  311. data/lib/bioroebe/utility_scripts/{parse_taxonomy.rb → parse_taxonomy/parse_taxonomy.rb} +15 -6
  312. data/lib/bioroebe/utility_scripts/{pathways.rb → pathways/pathways.rb} +4 -3
  313. data/lib/bioroebe/utility_scripts/{permutations.rb → permutations/permutations.rb} +3 -3
  314. data/lib/bioroebe/utility_scripts/punnet/punnet.rb +4 -2
  315. data/lib/bioroebe/utility_scripts/{show_this_dna_sequence.rb → show_this_dna_sequence/show_this_dna_sequence.rb} +1 -1
  316. data/lib/bioroebe/utility_scripts/showorf/showorf.rb +406 -10
  317. data/lib/bioroebe/version/version.rb +2 -2
  318. data/lib/bioroebe/viennarna/rnafold_wrapper.rb +5 -13
  319. data/lib/bioroebe/virus/individual_viruses/README.md +15 -0
  320. data/lib/bioroebe/virus/individual_viruses/tobacco_mosaic_virus.rb +40 -0
  321. data/lib/bioroebe/virus/virus.rb +76 -0
  322. data/lib/bioroebe/www/bioroebe.cgi +4 -3
  323. data/lib/bioroebe/www/embeddable_interface.rb +85 -49
  324. data/lib/bioroebe/yaml/agarose/agarose_concentrations.yml +6 -6
  325. data/lib/bioroebe/yaml/antisense/antisense.yml +2 -0
  326. data/lib/bioroebe/yaml/blosum/blosum50.yml +6 -0
  327. data/lib/bioroebe/yaml/blosum/blosum90.yml +2 -1
  328. data/lib/bioroebe/yaml/chromosomes/chromosome_numbers.yml +2 -2
  329. data/lib/bioroebe/yaml/configuration/temp_dir.yml +1 -1
  330. data/lib/bioroebe/yaml/consensus_sequences/consensus_sequences.yml +1 -0
  331. data/lib/bioroebe/yaml/enzymes/enzyme_classes.yml +7 -6
  332. data/lib/bioroebe/yaml/humans/human_chromosomes.yml +3 -3
  333. data/lib/bioroebe/yaml/mRNA/mRNA.yml +1 -5
  334. data/lib/bioroebe/yaml/nucleotides/abbreviations_for_nucleotides.yml +1 -0
  335. data/lib/bioroebe/yaml/nucleotides/nucleotide_density.yml +2 -1
  336. data/lib/bioroebe/yaml/promoters/35S.yml +3 -1
  337. data/lib/bioroebe/yaml/proteases/proteases.yml +3 -1
  338. data/lib/bioroebe/yaml/proteins/ubiquitin.yml +4 -1
  339. data/lib/bioroebe/yaml/restriction_enzymes/restriction_enzymes.yml +7 -7
  340. data/spec/testing_toplevel_method_editor.rb +1 -1
  341. data/spec/testing_toplevel_method_verbose.rb +1 -1
  342. data/test/testing_dna_to_rna_conversion.rb +1 -1
  343. metadata +127 -235
  344. data/doc/blosum.md +0 -5
  345. data/lib/bioroebe/base/commandline_application/aminoacids.rb +0 -33
  346. data/lib/bioroebe/base/commandline_application/directory.rb +0 -33
  347. data/lib/bioroebe/base/commandline_application/extract.rb +0 -22
  348. data/lib/bioroebe/base/commandline_application/misc.rb +0 -502
  349. data/lib/bioroebe/base/commandline_application/opn.rb +0 -47
  350. data/lib/bioroebe/base/commandline_application/reset.rb +0 -42
  351. data/lib/bioroebe/base/commandline_application/warnings.rb +0 -36
  352. data/lib/bioroebe/base/commandline_application/write_what_into.rb +0 -29
  353. data/lib/bioroebe/base/initialize.rb +0 -18
  354. data/lib/bioroebe/base/misc.rb +0 -129
  355. data/lib/bioroebe/base/namespace.rb +0 -16
  356. data/lib/bioroebe/base/prototype/e_and_ee.rb +0 -24
  357. data/lib/bioroebe/base/prototype/misc.rb +0 -114
  358. data/lib/bioroebe/base/prototype/mkdir.rb +0 -20
  359. data/lib/bioroebe/base/prototype/reset.rb +0 -36
  360. data/lib/bioroebe/colours/misc_colours.rb +0 -80
  361. data/lib/bioroebe/colours/rev.rb +0 -44
  362. data/lib/bioroebe/colours/sdir.rb +0 -21
  363. data/lib/bioroebe/colours/sfancy.rb +0 -21
  364. data/lib/bioroebe/colours/sfile.rb +0 -21
  365. data/lib/bioroebe/colours/simp.rb +0 -21
  366. data/lib/bioroebe/colours/swarn.rb +0 -29
  367. data/lib/bioroebe/constants/aminoacids_and_proteins.rb +0 -147
  368. data/lib/bioroebe/constants/carriage_return.rb +0 -14
  369. data/lib/bioroebe/constants/codon_tables.rb +0 -77
  370. data/lib/bioroebe/constants/database_constants.rb +0 -107
  371. data/lib/bioroebe/constants/files_and_directories.rb +0 -606
  372. data/lib/bioroebe/constants/misc.rb +0 -209
  373. data/lib/bioroebe/constants/newline.rb +0 -14
  374. data/lib/bioroebe/constants/nucleotides.rb +0 -121
  375. data/lib/bioroebe/constants/regex.rb +0 -28
  376. data/lib/bioroebe/constants/roebe.rb +0 -38
  377. data/lib/bioroebe/constants/row_terminator.rb +0 -16
  378. data/lib/bioroebe/constants/tabulator.rb +0 -14
  379. data/lib/bioroebe/constants/unicode.rb +0 -12
  380. data/lib/bioroebe/constants/urls.rb +0 -50
  381. data/lib/bioroebe/gui/gtk +0 -1
  382. data/lib/bioroebe/gui/gtk3/README.md +0 -2
  383. data/lib/bioroebe/gui/gtk3/alignment/alignment.rb +0 -306
  384. data/lib/bioroebe/gui/gtk3/anti_sense_strand/anti_sense_strand.rb +0 -29
  385. data/lib/bioroebe/gui/gtk3/blosum_matrix_viewer/blosum_matrix_viewer.rb +0 -195
  386. data/lib/bioroebe/gui/gtk3/calculate_cell_numbers_of_bacteria/calculate_cell_numbers_of_bacteria.rb +0 -105
  387. data/lib/bioroebe/gui/gtk3/dna_to_reverse_complement_widget/dna_to_reverse_complement_widget.rb +0 -188
  388. data/lib/bioroebe/gui/gtk3/fasta_table_widget/fasta_table_widget.rb +0 -322
  389. data/lib/bioroebe/gui/gtk3/gene/gene.rb +0 -181
  390. data/lib/bioroebe/gui/gtk3/hamming_distance/hamming_distance.rb +0 -383
  391. data/lib/bioroebe/gui/gtk3/levensthein_distance/levensthein_distance.rb +0 -174
  392. data/lib/bioroebe/gui/gtk3/protein_to_DNA/protein_to_DNA.rb +0 -181
  393. data/lib/bioroebe/gui/gtk3/show_codon_table/show_codon_table.rb +0 -101
  394. data/lib/bioroebe/gui/gtk3/show_codon_usage/show_codon_usage.rb +0 -145
  395. data/lib/bioroebe/gui/gtk3/three_to_one/title.rb +0 -23
  396. data/lib/bioroebe/gui/jruby/alignment/alignment.rb +0 -165
  397. data/lib/bioroebe/gui/jruby/aminoacid_composition/aminoacid_composition.rb +0 -166
  398. data/lib/bioroebe/gui/jruby/blosum_matrix_viewer/blosum_matrix_viewer.rb +0 -82
  399. data/lib/bioroebe/gui/libui/README.md +0 -4
  400. data/lib/bioroebe/gui/libui/alignment/alignment.rb +0 -116
  401. data/lib/bioroebe/gui/libui/blosum_matrix_viewer/blosum_matrix_viewer.rb +0 -112
  402. data/lib/bioroebe/gui/libui/calculate_cell_numbers_of_bacteria/calculate_cell_numbers_of_bacteria.rb +0 -60
  403. data/lib/bioroebe/gui/libui/controller/controller.rb +0 -116
  404. data/lib/bioroebe/gui/libui/dna_to_aminoacid_widget/dna_to_aminoacid_widget.rb +0 -161
  405. data/lib/bioroebe/gui/libui/dna_to_reverse_complement_widget/dna_to_reverse_complement_widget.rb +0 -76
  406. data/lib/bioroebe/gui/libui/hamming_distance/hamming_distance.rb +0 -135
  407. data/lib/bioroebe/gui/libui/levensthein_distance/levensthein_distance.rb +0 -118
  408. data/lib/bioroebe/gui/libui/protein_to_DNA/protein_to_DNA.rb +0 -115
  409. data/lib/bioroebe/gui/libui/random_sequence/random_sequence.rb +0 -190
  410. data/lib/bioroebe/gui/libui/show_codon_table/show_codon_table.rb +0 -134
  411. data/lib/bioroebe/gui/libui/show_codon_usage/show_codon_usage.rb +0 -89
  412. data/lib/bioroebe/gui/libui/three_to_one/three_to_one.rb +0 -113
  413. data/lib/bioroebe/gui/shared_code/alignment/alignment_module.rb +0 -102
  414. data/lib/bioroebe/gui/shared_code/aminoacid_composition/aminoacid_composition_module.rb +0 -94
  415. data/lib/bioroebe/gui/shared_code/calculate_cell_numbers_of_bacteria/calculate_cell_numbers_of_bacteria_module.rb +0 -216
  416. data/lib/bioroebe/gui/shared_code/protein_to_DNA/protein_to_DNA_module.rb +0 -192
  417. data/lib/bioroebe/gui/shared_code/show_codon_table/show_codon_table_module.rb +0 -72
  418. data/lib/bioroebe/gui/tk/aminoacid_composition/aminoacid_composition.rb +0 -206
  419. data/lib/bioroebe/gui/tk/blosum_matrix_viewer/blosum_matrix_viewer.rb +0 -140
  420. data/lib/bioroebe/gui/tk/hamming_distance/hamming_distance.rb +0 -262
  421. data/lib/bioroebe/gui/tk/levensthein_distance/levensthein_distance.rb +0 -243
  422. data/lib/bioroebe/gui/tk/three_to_one/three_to_one.rb +0 -199
  423. data/lib/bioroebe/gui/unified_widgets/anti_sense_strand/anti_sense_strand.rb +0 -519
  424. data/lib/bioroebe/shell/colours/colours.rb +0 -235
  425. data/lib/bioroebe/shell/help/help.rb +0 -25
  426. data/lib/bioroebe/shell/misc.rb +0 -10227
  427. data/lib/bioroebe/toplevel_methods/ad_hoc_task.rb +0 -56
  428. data/lib/bioroebe/toplevel_methods/aminoacids_and_proteins.rb +0 -722
  429. data/lib/bioroebe/toplevel_methods/atomic_composition.rb +0 -198
  430. data/lib/bioroebe/toplevel_methods/base_composition.rb +0 -121
  431. data/lib/bioroebe/toplevel_methods/blast.rb +0 -153
  432. data/lib/bioroebe/toplevel_methods/calculate_n50_value.rb +0 -57
  433. data/lib/bioroebe/toplevel_methods/cat.rb +0 -71
  434. data/lib/bioroebe/toplevel_methods/chunked_display.rb +0 -92
  435. data/lib/bioroebe/toplevel_methods/cliner.rb +0 -81
  436. data/lib/bioroebe/toplevel_methods/complement.rb +0 -58
  437. data/lib/bioroebe/toplevel_methods/convert_global_env.rb +0 -39
  438. data/lib/bioroebe/toplevel_methods/databases.rb +0 -73
  439. data/lib/bioroebe/toplevel_methods/delimiter.rb +0 -19
  440. data/lib/bioroebe/toplevel_methods/digest.rb +0 -81
  441. data/lib/bioroebe/toplevel_methods/download_and_fetch_data.rb +0 -146
  442. data/lib/bioroebe/toplevel_methods/e.rb +0 -20
  443. data/lib/bioroebe/toplevel_methods/editor.rb +0 -21
  444. data/lib/bioroebe/toplevel_methods/esystem.rb +0 -22
  445. data/lib/bioroebe/toplevel_methods/exponential_growth.rb +0 -74
  446. data/lib/bioroebe/toplevel_methods/extract.rb +0 -56
  447. data/lib/bioroebe/toplevel_methods/file_and_directory_related_actions.rb +0 -269
  448. data/lib/bioroebe/toplevel_methods/frequencies.rb +0 -99
  449. data/lib/bioroebe/toplevel_methods/hamming_distance.rb +0 -60
  450. data/lib/bioroebe/toplevel_methods/infer.rb +0 -66
  451. data/lib/bioroebe/toplevel_methods/leading_five_prime_and_trailing_three_prime.rb +0 -101
  452. data/lib/bioroebe/toplevel_methods/levensthein.rb +0 -63
  453. data/lib/bioroebe/toplevel_methods/log_directory.rb +0 -109
  454. data/lib/bioroebe/toplevel_methods/longest_common_substring.rb +0 -55
  455. data/lib/bioroebe/toplevel_methods/map_ncbi_entry_to_eutils_id.rb +0 -88
  456. data/lib/bioroebe/toplevel_methods/matches.rb +0 -259
  457. data/lib/bioroebe/toplevel_methods/misc.rb +0 -596
  458. data/lib/bioroebe/toplevel_methods/nucleotides.rb +0 -787
  459. data/lib/bioroebe/toplevel_methods/number_of_clones.rb +0 -63
  460. data/lib/bioroebe/toplevel_methods/open_in_browser.rb +0 -79
  461. data/lib/bioroebe/toplevel_methods/open_reading_frames.rb +0 -236
  462. data/lib/bioroebe/toplevel_methods/opn.rb +0 -34
  463. data/lib/bioroebe/toplevel_methods/palindromes.rb +0 -155
  464. data/lib/bioroebe/toplevel_methods/parse.rb +0 -59
  465. data/lib/bioroebe/toplevel_methods/phred_error_probability.rb +0 -68
  466. data/lib/bioroebe/toplevel_methods/rds.rb +0 -24
  467. data/lib/bioroebe/toplevel_methods/remove.rb +0 -86
  468. data/lib/bioroebe/toplevel_methods/return_source_code_of_this_method.rb +0 -35
  469. data/lib/bioroebe/toplevel_methods/return_subsequence_based_on_indices.rb +0 -68
  470. data/lib/bioroebe/toplevel_methods/rna_splicing.rb +0 -73
  471. data/lib/bioroebe/toplevel_methods/rnalfold.rb +0 -69
  472. data/lib/bioroebe/toplevel_methods/searching_and_finding.rb +0 -116
  473. data/lib/bioroebe/toplevel_methods/shuffleseq.rb +0 -37
  474. data/lib/bioroebe/toplevel_methods/statistics.rb +0 -53
  475. data/lib/bioroebe/toplevel_methods/sum_of_odd_integers.rb +0 -62
  476. data/lib/bioroebe/toplevel_methods/three_delimiter.rb +0 -34
  477. data/lib/bioroebe/toplevel_methods/time_and_date.rb +0 -53
  478. data/lib/bioroebe/toplevel_methods/to_camelcase.rb +0 -31
  479. data/lib/bioroebe/toplevel_methods/truncate.rb +0 -48
  480. data/lib/bioroebe/toplevel_methods/url.rb +0 -36
  481. data/lib/bioroebe/toplevel_methods/verbose.rb +0 -59
  482. data/lib/bioroebe/utility_scripts/showorf/constants.rb +0 -31
  483. data/lib/bioroebe/utility_scripts/showorf/help.rb +0 -33
  484. data/lib/bioroebe/utility_scripts/showorf/initialize.rb +0 -52
  485. data/lib/bioroebe/utility_scripts/showorf/menu.rb +0 -68
  486. data/lib/bioroebe/utility_scripts/showorf/reset.rb +0 -36
  487. data/lib/bioroebe/utility_scripts/showorf/run.rb +0 -152
  488. data/lib/bioroebe/utility_scripts/showorf/show.rb +0 -97
  489. /data/doc/{german_names_for_the_aminoacids.md → german_names_for_the_aminoacids/german_names_for_the_aminoacids.md} +0 -0
  490. /data/doc/{pdb_ATOM_entry.md → pdb_ATOM_entry/pdb_ATOM_entry.md} +0 -0
  491. /data/doc/{resources.md → resources/resources.md} +0 -0
  492. /data/lib/bioroebe/gui/{gtk3 → universal_widgets}/aminoacid_composition/customized_dialog.rb +0 -0
  493. /data/lib/bioroebe/gui/{gtk3 → universal_widgets}/anti_sense_strand/anti_sense_strand.config +0 -0
  494. /data/lib/bioroebe/gui/{gtk3 → universal_widgets}/calculate_cell_numbers_of_bacteria/calculate_cell_numbers_of_bacteria.config +0 -0
  495. /data/lib/bioroebe/gui/{gtk3 → universal_widgets}/dna_to_reverse_complement_widget/dna_to_reverse_complement_widget.config +0 -0
  496. /data/lib/bioroebe/gui/{gtk3 → universal_widgets}/hamming_distance/hamming_distance.config +0 -0
  497. /data/lib/bioroebe/gui/{gtk3 → universal_widgets}/levensthein_distance/levensthein_distance.config +0 -0
  498. /data/lib/bioroebe/gui/{gtk3 → universal_widgets}/protein_to_DNA/protein_to_DNA.config +0 -0
  499. /data/lib/bioroebe/gui/{gtk3 → universal_widgets}/restriction_enzymes/restriction_enzymes.config +0 -0
  500. /data/lib/bioroebe/gui/{gtk3 → universal_widgets}/www_finder/www_finder.config +0 -0
  501. /data/lib/bioroebe/yaml/{base_composition_of_dna.yml → base_composition_of_dna/base_composition_of_dna.yml} +0 -0
  502. /data/lib/bioroebe/yaml/{nuclear_localization_sequences.yml → nuclear_localization_sequences/nuclear_localization_sequences.yml} +0 -0
  503. /data/lib/bioroebe/yaml/{talens.yml → talens/talens.yml} +0 -0
data/doc/README.gen CHANGED
@@ -10,239 +10,193 @@ DEFAULT_HEADER
10
10
  <img src="https://i.imgur.com/AfduheY.png" style="margin: 4px; margin-left: 12px;"/>
11
11
 
12
12
  The **<span style="color: darkblue">above pictures</span>**,
13
- more or less, represent DNA - in particular a **dsDNA helix**
14
- (a **double-stranded DNA helix**).
13
+ more or less, represent DNA or, more generally, information in biological
14
+ systems. In particular the very left picture used to represented a
15
+ <b>dsDNA helix</b> (a **double-stranded DNA helix**), but it no longer
16
+ really like a dsDNA helix, due to my failed attempts at 'improving' it over
17
+ the years. One day I should really should re-do that image ...
15
18
 
16
- The very left picture no longer looks like a dsDNA helix, though, due to
17
- my attempts to 'improve' it over the years and failing hard at that.
18
- I really should re-do that image one day.
19
-
20
- The other two images are more recent. For instance, the second picture (the one
19
+ The other images are more recent. For instance, the second picture (the one
21
20
  that is almost in the middle) shows a schematic for a dsDNA helix. Of course
22
- DNA does not look like this at all whatsoever (hydrogen bonds can not possibly
23
- ever look as depicted like that, aside from the spacing being incorrect), but
24
- it is a pretty picture nonetheless - so let's go with pretty for the fancy
25
- visual show effects! \o/
21
+ DNA does not look like this at all whatsoever - hydrogen bonds can not possibly
22
+ ever look as depicted like that, aside from the spacing being incorrect
23
+ anyway - but it is a pretty picture nonetheless, so let's go with pretty,
24
+ for the fancy visual show effects! \o/
26
25
 
27
26
  Minor nitpick: keep in mind that DNA in regular cells is right-handed, so if
28
27
  you see a DNA double helix displayed that is going in the left direction then
29
- this is technically incorrect. So the image on the very right hand side is
30
- showing two dsDNA where one is evidently "incorrect" - but we could reason
31
- about it still being correct if we assume that one of the two dsDNA molecules
32
- shown is a synthetic one for a mirror cell, similar to racemases and epimers
33
- in chemistry.
28
+ this is <b>technically incorrect</b>. So the image on the very right hand side
29
+ is showing two dsDNA where one is evidently "<b>incorrect</b>", since it is
30
+ a mirror image - but we could reason about it still being correct <b>if</b> we
31
+ were to assume that one of the two dsDNA molecules shown is a synthetic one
32
+ for a mirror cell, similar to how racemases and epimers (in chemistry) may
33
+ be defined.
34
34
 
35
35
  The last image, that is the fourth image from left, shows a dsDNA helix. It
36
36
  is a bit better than the other pictures because it is **somewhat** more
37
37
  accurate. The distance between the two helices is <b>2nm</b>, so this
38
38
  picture kind of shows this somewhat more accurate. The distance between
39
39
  two adjacent nucleotide pairs is <b>3.4 nm</b> - so that kind of fits
40
- for the fourth image; not quite perfect, but somewhat close to that.
40
+ the distance shown in the fourth image; not quite perfect, but somewhat
41
+ close to that.
41
42
 
42
43
  ## About the BioRoebe project: History and Goals
43
44
 
44
45
  The **BioRoebe project** was initially created in the year **2007** -
45
- or at the least close to 2007, give or take, under **another name**.
46
+ or at the least close towards that year, give or take, under <b>another
47
+ name</b>.
46
48
 
47
49
  I was using the project for quite some time for my own, personal
48
50
  use cases in regards to **bioinformatics** and **molecular biology**;
49
- just a small hobby project, for very small, minor tasks. In many
50
- ways the project is still **just a hobby project** really - it's
51
- not a professional suite of software, and won't be for the
52
- foreseeable time either due to time constraints alone.
51
+ just a small hobby project, for very small, minor tasks.
52
+
53
+ In many ways the project is still **just a hobby project** really -
54
+ it's not a professional suite of software, and won't be for the
55
+ foreseeable time either, due to time constraints alone.
53
56
 
54
- In **early 2013**, the project was finally published on
57
+ In **early 2013**, the project was finally <b>published</b> on
55
58
  **rubygems.org**, which has been its new home ever since -
56
59
  and probably will remain its home, for a very long time to come.
57
- It is hard to predict the future accurately, though.
58
-
59
- Nonetheless, since as of **2013** the project has grown considerably
60
- in size. That makes describing the project a bit difficult, too, since
61
- the **use cases** for the project have increased, changed and been adapted
62
- over the years. There is not merely 'one' use case only - the **bioroebe**
63
- gem is **a toolset project**. Different people make use of different
64
- tools. Even programming languages may vary - while most of the
65
- bioroebe project is written in ruby, there are some (smaller)
66
- parts written in Java as well and I do not rule out using Python or
67
- other programming languages to do specific tasks either. Some parts
68
- of the bioroebe project are more widely (and often) used; other parts
69
- refer to **niche use cases** and thus are less frequently used.
60
+ It is hard to predict the future accurately, though. Perhaps I
61
+ may use the project for additional professional work in the future -
62
+ I don't know yet.
63
+
64
+ Nonetheless, since as of the year **2013**, the project has grown
65
+ considerably in size. That makes describing the project a bit difficult,
66
+ too, since the **use cases** for the project have increased, changed
67
+ and been adapted over the years. More code, and more use cases,
68
+ begetting more (and better) documentation. That makes sense, right?
69
+
70
+ So there is not merely 'one' use case only for this project - the
71
+ **bioroebe** gem is ultimately <b>a toolset project</b>. Different
72
+ people make use of different tools. Even programming languages may
73
+ vary when it comes to this project - while most of the bioroebe project is
74
+ written in <b>ruby</b>, there are some (smaller) parts written in Java as
75
+ well and I do not rule out using Python or other programming languages to
76
+ do specific tasks either in the long run, including C++, C or any other
77
+ programming language. Some parts of the bioroebe project are more widely
78
+ (and often) used; other parts refer to **niche use cases** and thus
79
+ are less frequently used.
70
80
 
71
81
  Despite the plethora of options supported by this project, the
72
82
  **BioRoebe project** has several very important
73
83
  <span style="color: darkblue; font-weight: bold">goals</span>
74
- that are still **valid as of today** and stand out compared to
75
- other goals of lesser importance.
84
+ that are still **valid as of today** and <b>stand out compared to
85
+ other goals of lesser importance</b>.
76
86
 
77
- The **primary purpose** for this project - that is the **main use
78
- case**, if but one has to be named - is to be able to quickly help
79
- in regards to **bioinformatics-related tasks**, and associated
87
+ The <b>primary purpose</b> of this project - that is the <b>main use
88
+ case</b>, if but one has to be named - is <b>to be able to quickly help
89
+ in regards to bioinformatics-related tasks</b>, and associated
80
90
  <b>wet-lab</b>-related use cases from within molecular biology.
81
91
 
82
92
  For example, the project should allow its users to run it on
83
- a **local computer**, on a **remote computer**, be used on
93
+ a <b>local computer</b>, on a **remote computer**, be used on
84
94
  the commandline, via **different GUIs** or via the **www**
85
- or on a smartphone/mobile device in the long run.
95
+ or on a smartphone/mobile device in the long run. Flexible use
96
+ cases all the way down the rabbit hole.
86
97
 
87
- **There should be no limitation in where and how the project should
88
- or could be run**, including the possibility to run it on as many
98
+ <b>There should be no limitation in where and how the project should
99
+ or could be run</b>, including the possibility to run it on as many
89
100
  different operating systems as possible (at the least if ruby is
90
101
  available on that operating system; or possibly Java as an
91
- additional option one day). This is why the project has to
92
- try to **stay as flexible as possible** - we must support
93
- different operating systems, with all their quirks and
94
- unique oddities, if this is doable.
102
+ additional option one day, including jruby-SWING bindings). This
103
+ is why the project has to try to **stay as flexible as possible** -
104
+ we <b>must</b> support different operating systems, with all their
105
+ quirks and unique oddities, if this is doable.
95
106
 
96
107
  Note that this goal also applies to **programming languages**,
97
- as pointed out already. While the primary focus for the **bioroebe**
108
+ as pointed out above. While the primary focus for the **bioroebe**
98
109
  project is (and will remain) on ruby, I specifically **do not** exclude
99
110
  the possibility that languages such as Java, C/C++ or even Python may
100
111
  be included and used in this project. In fact: since as of **2021**,
101
112
  Java-specific parts will be extended in the bioroebe project as well.
113
+
102
114
  In the long run I would like to support both ruby and Java from the
103
- get go. The primary question that is relevant here in regards
104
- to different programming languages is that of <b>use case</b>;
115
+ get go, on an equal level. The primary question that is relevant here in
116
+ regards to different programming languages is that of <b>use case</b>;
105
117
  <b>usability</b> and <b>usefulness</b>, and then <b>maintainability
106
- of the code base</b> as well, as a secondary consideration.
118
+ of the code base</b> as well, as a secondary consideration. Documentation
119
+ is also equally important, so I will try to improve the documentation
120
+ systematically - both from a user's point of view, but also from
121
+ the point of view of other developers who may try to make use of
122
+ this project.
107
123
 
108
124
  The bioroebe project additionally has to **solve real problems**,
109
125
  in particular from a molecular biology point of view. Most
110
126
  bioinformatics-related toolkits were written by experts in the
111
127
  field who tend to have a strong background in **mathematics** and
112
128
  **informatics**. While that is a perfectly fine background to have,
113
- and most definitely an **asset**, I simply came from the "<i>other</i>"
114
- side - molecular biology and molecular genetics. This makes for
115
- a difference in thinking too, because I tend to be closer towards
116
- the side of, say, synthetic biology, than on the side of
129
+ and most definitely an **asset**, I myself came from the "<i>other</i>"
130
+ side of the medal - molecular biology and molecular genetics. This makes
131
+ for a difference in thinking too, because I tend to be closer towards
132
+ the side of, say, <b>synthetic biology</b>, than on the side of
117
133
  (bio)mathematics or statistics, due to my own interests in the
118
- way how I think or approach a given problem set. It's different
119
- to a primary in-silico driven approach. Nonetheless, the **bioroebe
120
- project** attempts to remain as flexible as possible, including
121
- exploring other ways in how a given problem set can be solved.
122
-
123
- When the bioroebe project was initially created, I wanted it to be
124
- **more natural** to people who may not necessarily excel at designing
125
- (or even understanding) algorithms. Not that algorithms should
126
- be neglected, mind you, for efficiency reasons alone; but the
127
- primary view for the whole BioRoebe suite of programs will be
128
- to focus on "real" biology first and foremost, not just
129
- <i>in-silico</i> dry runs or simulations, per se. Although most
130
- of the fields of bioinformatics is dominated by mathematicians and
131
- computer scientists, I know that there are plenty of people who
132
- come from a simpler **molecular biology**-specific background. So the
133
- project tries to cater primarily to the latter group, without trying
134
- to exclude anyone else. This also includes the focus on
135
- **documentation** - while the documentation is far from perfect,
136
- I try to polish it every now and then. The aim here is to make
137
- the documentation useful for "Average Joe" - the common user,
138
- at the least from a wet-lab focus on molecular biology.
134
+ way how I think or approach a given problem set. I find life fascinating,
135
+ more so than computer systems - thus, biological information is
136
+ more appealing to me than computer-stored information.
137
+
138
+ It is most definitely a different view to primarily in-silico driven
139
+ approaches. Nonetheless, the **bioroebe project** attempts to remain
140
+ as flexible as possible, including exploring other ways in how a
141
+ given problem set can be solved.
142
+
143
+ When the bioroebe project was initially created, many years ago, I wanted it
144
+ to be <b>more natural</b> to people who may not necessarily excel at designing
145
+ (or even understanding) algorithms. Not that algorithms should be neglected,
146
+ mind you, for <b>efficiency reasons</b> alone; but the primary view for the
147
+ whole BioRoebe suite of programs will be to focus on "real" biology first
148
+ and foremost, not just <i>in-silico</i> dry runs or simulations, per se.
149
+
150
+ Although most of the fields of bioinformatics is dominated by mathematicians
151
+ and computer scientists, I know that there are plenty of people who come from
152
+ a simpler **molecular biology**-specific background. So the project tries to
153
+ cater primarily to the latter group, without trying to exclude anyone else.
154
+ This also includes the focus on **documentation** - while the documentation
155
+ right now is far from perfect, I try to polish it every now and then. The
156
+ main aim here is to make the documentation useful for "Average Joe" - the
157
+ common user, at the least from a wet-lab focus on molecular biology.
139
158
 
140
159
  So, <b>how</b> can the BioRoebe project be helpful to its users?
141
160
 
142
161
  The **BioRoebe** project can be used to solve (some) problems
143
162
  related to **biology**, **molecular biology**, **genetics**
144
- and, last but not least, **bioinformatics**.
163
+ and, last but not least, **bioinformatics**, perhaps even
164
+ <b>synthetic biology</b>.
145
165
 
146
166
  For example, say that you quickly wish to **reverse-translate a
147
- sequence of amino acids**, and select all possible codons,
148
- or <b>the most likely codon candidate</b>; or just random codon
149
- candidates, and display that result on the commandline or via
150
- a www interface.
167
+ sequence of amino acids**, and select all possible codons from
168
+ that sequence, or <b>the most likely codon candidate(s)</b>; or
169
+ just random codon candidates, and display that result on the
170
+ commandline or via a www interface.
151
171
 
152
- This is easily possible through **BioRoebe**. How fancy and
153
- useful! \o/
172
+ This is easily possible through **BioRoebe**. <b>How fancy and
173
+ useful!</b> \o/
154
174
 
155
175
  For instance, when I do this on the commandline via bash and
156
- KDE konsole:
176
+ KDE konsole, invoking the aliased binary called
177
+ <b>revseq</b>, for <b>reverse sequence</b>:
157
178
 
158
- revseq AAT # Alanine-Alanine-Threonine
179
+ revseq AAT # Alanine-Alanine-Threonine, so three amino acids
159
180
 
160
- Then it will show the following DNA sequence:
181
+ Then on the commandline the following result will be shown,
182
+ representing a <b>DNA sequence</b>:
161
183
 
162
184
  GCCGCCACC # These are 9 nucleotides, corresponding to the three amino acids.
163
185
 
164
- (This will only work if your alias of revseq points
165
- to the correct bin/ entry. In my case I have
166
- aliased **revseq** onto **bin/deduce_most_likely_aminoacid_sequence**.
167
- Of course you can use any other alias, or just flat out call
168
- **bin/deduce_most_likely_aminoacid_sequence** directly.)
169
-
170
- ## Requiring BioRoebe and starting the bioshell interface
171
-
172
- In order to require **BioRoebe**, do use a line in ruby code such
173
- as the following one:
174
-
175
- require 'bioroebe'
176
-
177
- To **automatically** include the **main namespace** upon require-time,
178
- the following line of code can be used:
179
-
180
- require 'bioroebe/autoinclude'
181
-
182
- Note that this will include into the Object namespace, so if you want
183
- to have more control over the include-action, you need to first require
184
- the bioroebe project, and then include it onto the target namespace
185
- that you wish to use specifically, such as a subclass or another
186
- module. If you do not need to autoinclude then I recommend to
187
- simply use the first variant how to require the bioroebe gem - that
188
- should suffice. The reason why the file called **autoinclude.rb**
189
- exists is mostly due to laziness, so we can type less. We can omit
190
- <b>include Bioroebe</b> after all.
191
-
192
- The **BioRoebe** project comes with a file called **bin/bioshell**,
193
- which allows you to start this project from a typical shell, such
194
- as **bash**, by issuing this command on the command prompt:
195
-
196
- bioshell
197
-
198
- You can also load up the project and run it from within a .rb file
199
- or during an **IRB session**, by doing the following:
200
-
201
- require 'bioroebe'
202
-
203
- Bioroebe::BioShell[]
204
-
205
- Or alternatively, which may be more convenient to type:
206
-
207
- require 'bioroebe'
208
-
209
- Bioroebe.start_shell # No need to use the [], unlike the example shown above
210
-
211
- ## Usage of the BioRoebe project
212
-
213
- Not all subcomponents within the **BioRoebe** project have received
214
- equal attention and thus, the **quality** of these subcomponents may
215
- often differ, to word this nicely.
216
-
217
- Patches and contributions to extend functionality, improve the
218
- documentation, fix existing bugs or improve the usability and
219
- general quality of the project, are welcome. Take note that I
220
- in general tend to add **new** entries at the bottom of this file
221
- here (README.gen or README.md, respectively); use the navigation
222
- menu on the **top right** of this page to quickly jump to these
223
- entries. Sometimes headers change a bit, but by and large content
224
- is rarely removed; so if you ever found something in the past,
225
- you should be able to find it again in the future - except for
226
- when APIs are removed. These may be omitted in the documentation.
227
- That's quite rare, though.
228
-
229
- I will move entries that have received updates with more recent
230
- releases to the ~bottom of this very page here - that way it should
231
- be a bit easier to keep up to date with what has changed within
232
- this project. It is admittedly becoming a fairly large project,
233
- which is why I try to keep things somewhat organized.
234
-
235
- In the long run I will also, most likely, publish the documentation
236
- in a "booklet" format such as https://yaml.readthedocs.io/en/latest/.
237
- That way one may be more easily able to read individual
238
- subchapters.
186
+ (This will only work if your alias of <b>revseq</b> points
187
+ to the correct bin/ entry as well; I keep the rcfiles gem for
188
+ that, which includes all the aliases I use. In my case I have
189
+ aliased **revseq** onto **bin/deduce_most_likely_aminoacid_sequence**
190
+ actually, as can be seen in the rcfiles gem. Of course you can use
191
+ any other alias, or just flat out call **bin/deduce_most_likely_aminoacid_sequence**
192
+ directly. I just like to be succinct and to-the-point whenever
193
+ possible, so the revseq alias suits the way how my brain works.)
239
194
 
240
195
  ## Differences and Compatibility towards BioRuby
241
196
 
242
- This subsection will explain some of the philosophical - and, more
243
- importantly, **practical** - differences between **BioRoebe** and
244
- **BioRuby**, as both projects have somewhat similar, hence **shared
245
- goals**.
197
+ This subsection will explain some of the philosophical - and, more importantly,
198
+ **practical** - differences between **BioRoebe** and **BioRuby**, as both projects
199
+ have somewhat similar, hence <b>shared goals</b>.
246
200
 
247
201
  One philosophical difference, for example, is that BioRoebe is less
248
202
  focused on bioinformatics as such, and more focused on **molecular
@@ -340,6 +294,106 @@ instead, they can do so, too - see the <b>API</b> for codon tables
340
294
  lateron. Simply define your own constants and pass them to the
341
295
  appropriate methods.
342
296
 
297
+ ## The rewrite in November 2023
298
+
299
+ The bioroebe gem was partially rewritten in November 2023.
300
+
301
+ The primary goal for this rewrite was to add a jruby-SWING
302
+ GUI that allows the user to make use of the interactive
303
+ bioshell from a SWING GUI. This GUI works right now, but it
304
+ is not very pretty or elegant, as the following image shows
305
+ (you may have to scroll down a little bit, in order to
306
+ see it):
307
+
308
+ <img src="https://i.imgur.com/bVr8eIx.png" style="margin: 1em">
309
+
310
+ I also made sure that this does indeed work on Windows, which
311
+ was the primary reason this GUI was added in the first place -
312
+ and it does indeed work on windows. \o/
313
+
314
+ In the coming months this will be improved.
315
+
316
+ ## Usage of the BioRoebe project
317
+
318
+ Not all subcomponents within the **BioRoebe** project have received equal
319
+ attention and thus, the **quality** of these subcomponents may often
320
+ differ, to word this nicely. In other words: the quality of the code will
321
+ be different, with some parts being tested more than others.
322
+
323
+ Patches and contributions to extend functionality, improve the documentation,
324
+ fix existing bugs or improve the usability and general quality of the project,
325
+ are in general welcome. Take note that I in general tend to add **new**
326
+ entries at the bottom of this file here (README.gen or README.md, respectively);
327
+ use the navigation menu on the **top right** of this page to quickly jump to
328
+ these entries.
329
+
330
+ Sometimes headers change a bit, but by and large content is rarely removed; so
331
+ if you ever found something in the past, you should be able to find it again
332
+ in the future - except for when APIs or submodules are removed. These may
333
+ be omitted in the documentation. That's quite rare to happen, though.
334
+
335
+ I will typically move entries that have received updates with more recent
336
+ releases to the ~bottom of this very page here - that way it should be a bit
337
+ easier to keep up to date with what has changed within this project. It is
338
+ admittedly becoming a fairly large project, which is why I try to keep
339
+ things somewhat organized here.
340
+
341
+ In the long run I will also, most likely, publish the documentation
342
+ in a "booklet" format such as https://yaml.readthedocs.io/en/latest/.
343
+ That way one may be more easily able to read individual subchapters.
344
+
345
+ The rest of this document shall now attempt to explain the different
346
+ parts of the bioroebe-gem.
347
+
348
+ ## Requiring the BioRoebe project and starting the bioshell interface
349
+
350
+ In order to require **BioRoebe** you can use the following line of
351
+ ruby code:
352
+
353
+ require 'bioroebe'
354
+
355
+ To <b>automatically</b> include the **main namespace** upon require-time,
356
+ the following line of code can be used:
357
+
358
+ require 'bioroebe/autoinclude'
359
+
360
+ Note that this will include into the Object namespace, so if you want
361
+ to have more control over the include-action (and avoid inclusion into
362
+ ruby's Object namespace), you need to first require the bioroebe
363
+ project, as shown above, and then include it onto the target namespace
364
+ that you wish to use specifically, such as a subclass or another
365
+ module.
366
+
367
+ If you do not need to autoinclude then I recommend to simply use the
368
+ first variant (require 'bioroebe') how to require the bioroebe gem -
369
+ that should suffice for most use cases. The reason why the file called
370
+ <b>autoinclude.rb</b> exists is mostly due to my inherent laziness,
371
+ as well as a desire to be flexible, so we can ultimately type less.
372
+ We can omit <b>include Bioroebe</b> after all in the second case.
373
+
374
+ The **BioRoebe** project comes with a file called **bin/bioshell**,
375
+ which allows you to start this project from a typical shell, similar
376
+ to <b>bash</b>, by issuing this command on the command prompt:
377
+
378
+ bioshell
379
+
380
+ (If this does not work, make sure that bin/bioshell is in your
381
+ $PATH; you can also extract the .gem and move bin/bioshell to
382
+ any location you desire it to be, of course.)
383
+
384
+ You can also load up the project and run it from within a .rb
385
+ file or during an **IRB session**, by doing the following:
386
+
387
+ require 'bioroebe'
388
+
389
+ Bioroebe::BioShell[]
390
+
391
+ Or alternatively, which may be more convenient to type:
392
+
393
+ require 'bioroebe'
394
+
395
+ Bioroebe.start_shell # No need to use the [] here, unlike in the example shown above
396
+
343
397
  ## Readline support in the BioRoebe project
344
398
 
345
399
  The **BioRoebe** project will attempt to make use of **Readline**, if
@@ -384,6 +438,61 @@ Personally I recommend that people should switch to **psych** and
384
438
  give it a try. It should work fine really. But ultimately this is
385
439
  up to them.
386
440
 
441
+ ## Colours support in the Bioroebe project
442
+
443
+ <b>Colours</b> can be immensely useful, at the least to most people.
444
+
445
+ The bioroebe project has to support colours. The primary file
446
+ that bundles together most of that colours-related functionality
447
+ can be found at:
448
+
449
+ require 'bioroebe/colours/colours.rb'
450
+
451
+ This, in turn, depends on the gem called <b>colours</b>.
452
+
453
+ Code exists in the bioroebe project that can be used on the
454
+ commandline to output colours, such as the following image
455
+ shows:
456
+
457
+ <img src="https://i.imgur.com/VbfOME3.png" style="margin: 1em">
458
+
459
+ If you do not want or need colours, you can disable them via
460
+ this method call:
461
+
462
+ Bioroebe.disable_colours
463
+
464
+ Conversely, to enable colours again, use:
465
+
466
+ Bioroebe.enable_colours
467
+
468
+ Classes that subclass from Bioroebe::CommandlineApplication will
469
+ have a method called <b>.use_colours?</b>, which can be used
470
+ to query whether the class at hand makes use of colours.
471
+
472
+ This functionality depends on the gem called <b>colours</b>.
473
+
474
+ In the past the code allowed for konsole-colours support (KDE
475
+ konsole) and simpler terminals without RGB colour support.
476
+
477
+ When the bioroebe project was rewritten in April 2020, this was
478
+ changed. The project now depends on the Colours module, and it
479
+ will try to use that project to its full possibilities, including
480
+ KDE konsole colours (RGB colours) by default. This will also include
481
+ so called "HTML colours", such as :slateblue or :steelblue.
482
+
483
+ ## class Bioroebe::DetermineMissingNucleotidesPercentage
484
+
485
+ The small class Bioroebe::DetermineMissingNucleotidesPercentage can be
486
+ used to determine missing nucleotide content, in percentage.
487
+
488
+ For instance, say that you know the GC content of a given
489
+ DNA sequence is at 48%. You want to know the GC and AT
490
+ content of that quickly, so you invoke this class.
491
+
492
+ It's output may then look like this:
493
+
494
+ <img src="https://i.imgur.com/txV3ZGY.png" style="margin: 1em">
495
+
387
496
  ## Phred quality score
388
497
 
389
498
  If you need support for PHRED, you could use this method:
@@ -480,28 +589,6 @@ now and then.
480
589
  TLR3 | double-stranded RNA (dsRNA) | https://en.wikipedia.org/wiki/TLR3
481
590
  TLR4 | binds cell-wall components of gram-negative bacteria (via their LPS) | https://en.wikipedia.org/wiki/TLR4
482
591
 
483
- ## Enzymes
484
-
485
- This subsection will be expanded at a later time - it will be
486
- about enzymes in general.
487
-
488
- For now, if you need a table, as memory, for the enzyme classes,
489
- here is one:
490
-
491
- | Enzyme class (EC) | Name |
492
- |-------------------- |------------------|
493
- | 1 | Oxidoreductases |
494
- | 2 | Transferases |
495
- | 3 | Hydrolases |
496
- | 4 | Lyases |
497
- | 5 | Isomerases |
498
- | 6 | Ligases |
499
- | 7 | Translocases |
500
-
501
- See wikipedia for more information:
502
-
503
- https://en.wikipedia.org/wiki/Enzyme_Commission_number#Top_level_codes
504
-
505
592
  ## Using Bioroebe in a project
506
593
 
507
594
  Of considerable usefulness to the end-user may be the **BioShell**.
@@ -2451,24 +2538,6 @@ And, my favourite one:
2451
2538
 
2452
2539
  https://labcalculator.net/wiki/oligo-tm
2453
2540
 
2454
- ## SimpleStringComparer
2455
-
2456
- class **SimpleStringComparer** can be used to compare two strings visually,
2457
- similar as to how **NCBI BLAST** compares two sequences to one another.
2458
-
2459
- By default the output of that class will be, in colours, on the commandline,
2460
- but you can disable this like so:
2461
-
2462
- Bioroebe::SimpleStringComparer.new(ARGV) { :disable_colours }
2463
-
2464
- Let's look at another example:
2465
-
2466
- Bioroebe::SimpleStringComparer.new('AAAAAAAAAAAAAATTTTTTTTTTTAAAAAAAAAAAATATA|GAAAAAAAAAAAAAAAATATTTTTTTTTTTTTTTTTTTTTT')
2467
-
2468
- This may look like so:
2469
-
2470
- <img src="https://i.imgur.com/kkqkpmQ.png" style="margin-left: 2em">
2471
-
2472
2541
  ## Bioroebe::SanitizeNucleotideSequence
2473
2542
 
2474
2543
  class **Bioroebe::SanitizeNucleotideSequence** can be used to **sanitize
@@ -2529,7 +2598,7 @@ class will replace the older code. But it will happen.
2529
2598
  If you wish to make use of this class in your own projects,
2530
2599
  first require it:
2531
2600
 
2532
- require 'bioroebe/nucleotides/show_nucleotide_sequence.rb'
2601
+ require 'bioroebe/nucleotides/show_nucleotide_sequence/show_nucleotide_sequence.rb'
2533
2602
 
2534
2603
  And then you can use it to display a nucleotide sequence,
2535
2604
  such as via:
@@ -3381,7 +3450,7 @@ This would colourize Lysine. K is the one amino acid letter
3381
3450
  for Lysine.
3382
3451
 
3383
3452
  Next, you can test whether this works. A simply way is to
3384
- ask for the Ubiquitin sequence. Pay attention to lysine
3453
+ ask for the Ubiquitin sequence. Pay attention to <b>lysine</b>
3385
3454
  at position 48.
3386
3455
 
3387
3456
  From within the bioshell, you can query for the ubiquitin
@@ -4135,7 +4204,7 @@ DNA-strand**, whereas the **palindrome** occurs on the **sister
4135
4204
  strand**.
4136
4205
 
4137
4206
  Bioroebe supports the simple "creation" of mirror repeats, through the
4138
- file <b>bioroebe/utility_scripts/mirror_repeat.rb</b> and the method
4207
+ file <b>bioroebe/utility_scripts/mirror_repeat/mirror_repeat.rb</b> and the method
4139
4208
  there called <b>Bioroebe.mirror_repeat_of()</b>.
4140
4209
 
4141
4210
  Simply pass in the sequence that you wish to mirror, such as:
@@ -4250,50 +4319,6 @@ Why is this important to understand? Well, you may wish to design
4250
4319
  or check that both primers in PCR, forward and reverse, would be
4251
4320
  correct.
4252
4321
 
4253
- ## Logging and Log output
4254
-
4255
- The **BioRoebe project** may autogenerate some files, including
4256
- **log files**.
4257
-
4258
- In order to be able to do so, the bioroebe project needs the user
4259
- to be able to access a **base directory**, the so-called
4260
- **working base directory**. This is where bioroebe assumes
4261
- most working files to exist.
4262
-
4263
- On my linux system this used to default to the directory
4264
- called **/Depot/Bioroebe/**. On other systems, the log directory
4265
- may default into the **user's home directory**, via a call to
4266
- <b>"#{File.expand_path('~')}/"</b>. (I use this presently,
4267
- since **2020**.)
4268
-
4269
- This should work on most systems, but it may not be what you
4270
- want to have or use in your own workflow. Thus, code exists
4271
- that allows you to designate another log directory to use.
4272
-
4273
- The API for this is simply called:
4274
-
4275
- Bioroebe.set_log_dir()
4276
-
4277
- The method can be found in
4278
- **lib/bioroebe/toplevel_methods/log_directory.rb**.
4279
-
4280
- If you want to do this from within the **bioshell** itself,
4281
- try:
4282
-
4283
- set_log_dir /tmp/test
4284
- setlogdir /tmp/test
4285
-
4286
- If you use the interactive bioroebe-shell then you can use
4287
- **home?** to determine where the log directory is on your
4288
- system. For example, I tend to just use **/root/Bioroebe/**
4289
- these days, when I am the superuser.
4290
-
4291
- Note that you can also define the environment variable
4292
- called **BIOROEBE_DEFAULT_LOG_DIRECTORY**. If this is
4293
- set on startup of the bioroebe-shell, then it will
4294
- overrule the initial :default value that is used
4295
- otherwise.
4296
-
4297
4322
  ## Working with Blosum
4298
4323
 
4299
4324
  **BLOSUM** is used to sequence-align proteins; thus, it can be
@@ -4722,6 +4747,16 @@ than by phosphatases. Thus, there is a bias in regards to the
4722
4747
  publications that are published. This bias is not necessarily
4723
4748
  "existing" on the level of the cell(s) itself.
4724
4749
 
4750
+ Next, a table is shown to compare some search databases:
4751
+
4752
+ Name | remote URL
4753
+ ----------------|------------------------------------------------------------------------|
4754
+ PubMed | https://pubmed.­ncbi.­nlm.­nih.­gov/ |
4755
+ Google Scholar | https://scholar.­google.­com/ |
4756
+ Web of Science | https://clarivate.com/webofsciencegroup/solutions/web-of-science/ |
4757
+ Scopus | https://www.­scopus.­com/ |
4758
+ ----------------|------------------------------------------------------------------------|
4759
+
4725
4760
  ## Browser setting in the configuration file
4726
4761
 
4727
4762
  The browser (for opening external websites) is defined here:
@@ -4862,7 +4897,7 @@ Let's take the phage lambda of E. coli. The **refseq entry** is at:
4862
4897
 
4863
4898
  https://www.ncbi.nlm.nih.gov/nuccore/9626243
4864
4899
 
4865
- The genome is **48502 bp** long (**dsDNA**).
4900
+ The genome is <b>48502 bp</b> long (<b>dsDNA</b>).
4866
4901
 
4867
4902
  The NCBI ID is: **NC_001416.1**
4868
4903
 
@@ -5296,44 +5331,6 @@ Try the following instruction in order to **disable** it again:
5296
5331
 
5297
5332
  no_expand_cd_aliases
5298
5333
 
5299
- ## UniProt
5300
-
5301
- UniProt is a freely accessible database of protein sequence and
5302
- functional information. What makes it also useful for
5303
- bioinformatics is that you can easily query the FASTA sequence
5304
- of a protein.
5305
-
5306
- Consider the protein called **A2Z669**. The **entry** to this protein
5307
- can be found here:
5308
-
5309
- https://www.uniprot.org/uniprot/A2Z669
5310
-
5311
- And the corresponding FASTA sequence of that protein can be
5312
- found here, if you append **.fasta** to the URL:
5313
-
5314
- https://www.uniprot.org/uniprot/A2Z669.fasta
5315
-
5316
- If you wish to save this file, from within **Bioroebe** itself,
5317
- then you can use the following API:
5318
-
5319
- Bioroebe.fetch_data_from_uniprot()
5320
- Bioroebe.fetch_data_from_uniprot('A2Z669')
5321
-
5322
- NCBI also has entries related to UniProt.
5323
-
5324
- Example:
5325
-
5326
- https://www.ncbi.nlm.nih.gov/protein/P02768
5327
- https://www.ncbi.nlm.nih.gov/protein/P02768.2?report=fasta
5328
-
5329
- This has the header **RecName: Full=Serum albumin; Flags:
5330
- PrecursorUniProtKB/Swiss-Prot: P02768.2**.
5331
-
5332
- From the bioroebe-shell, you can can fetch data from
5333
- <b>Uniprot</b>, such as by issuing:
5334
-
5335
- unitprot_fetch
5336
-
5337
5334
  ## AminoAcid composition
5338
5335
 
5339
5336
  A small widget exists to show the amino-acid composition.
@@ -5442,11 +5439,9 @@ A bin file exists as well:
5442
5439
 
5443
5440
  bin/genbank_to_fasta
5444
5441
 
5445
- This is also handled by the more generic method
5446
- called **Bioroebe.parse()**, which attempts to parse
5447
- any file that may be relevant in regards to bioinformatics
5448
- eventually. For now (**May 2021**) only a few files are
5449
- supported.
5442
+ This is also handled by the more generic method called <b>Bioroebe.parse()</b>,
5443
+ which attempts to parse any file that may be relevant in regards to bioinformatics
5444
+ eventually. For now (**May 2021**) only a few files are supported.
5450
5445
 
5451
5446
  A small ruby-gtk3 widget exists for this as well:
5452
5447
 
@@ -6053,6 +6048,7 @@ found in **bioroebe/toplevel_methods/palindromes.rb**.
6053
6048
  You can also use a toplevel method for this. Example:
6054
6049
 
6055
6050
  Bioroebe.palindrome_generator 4 # => "CTAG\nGATC"
6051
+ Bioroebe.palindrome_generator(10)
6056
6052
 
6057
6053
  In **June 2020** code was added to "display" a 2D structure
6058
6054
  of RNA or DNA palindromes. The code for this resides in
@@ -6460,20 +6456,6 @@ You can use it as follows:
6460
6456
  Several commandline scripts make use of that. I found it to be
6461
6457
  useful to have a short visual separator ready.
6462
6458
 
6463
- ## Using Bioroebe.three_delimiter()
6464
-
6465
- The method **Bioroebe.three_delimiter()** can be used to
6466
- split a String into a String where every third position has
6467
- a trailing '|' token.
6468
-
6469
- So for instance:
6470
-
6471
- Bioroebe.three_delimiter 'ATGGGGATGTAGGTA' # => "ATG|GGG|ATG|TAG|GTA"
6472
-
6473
- The primary reason why that was added as a toplevel method has been
6474
- because it may be visually simpler to identify the individual codons
6475
- via your eyes that way.
6476
-
6477
6459
  ## Generating a random DNA sequence
6478
6460
 
6479
6461
  You can "generate" a random DNA sequence from the commandline
@@ -7182,6 +7164,11 @@ It will deduce the possible codons for the aminoacid sequence
7182
7164
  MTTAGP, and it will display the findings in RNA - thus, all
7183
7165
  T are U on the display on the commandline.
7184
7166
 
7167
+ The commandline output of the above, captured as an image,
7168
+ will be shown next:
7169
+
7170
+ <img src="https://i.imgur.com/EUCU3sH.png" style="margin: 1em">
7171
+
7185
7172
  ## Determining the possible codons for a given aminoacid
7186
7173
 
7187
7174
  If you need to quickly determine all possible codons for a specific
@@ -8952,7 +8939,7 @@ that the base is actually correct, we can use the following formula:
8952
8939
  P stands for <b>Probability</b>. An example to this follows next:
8953
8940
 
8954
8941
  Say that you have a quality score of 48; then, in ruby code,
8955
- you would get an **error probability** of:
8942
+ you would get an <b>error probability</b> of:
8956
8943
 
8957
8944
  10 ** (-48 / 10.0) # => 1.584893192461114e-05
8958
8945
 
@@ -9520,11 +9507,14 @@ information about <b>SUMO</b>: https://en.wikipedia.org/wiki/SUMO_protein
9520
9507
 
9521
9508
  This is just a simple table, for summary purposes.
9522
9509
 
9523
- Name of the organism | Latin name | Number of chromosomes
9524
- ------------------------------|--------------------------------|-----------------------
9510
+ Name of the organism | Latin name | Number of chromosomes (in somatic, diploid cells)
9511
+ ------------------------------|--------------------------------|---------------------------------------------------
9525
9512
  Zebrafish | Danio rerio | 16
9513
+ Cabbage plants | Brassica oleracea | 18
9526
9514
  House mouse | Mus musculus | 20
9515
+ Chimpanzees | Pan troglodytes | 48
9527
9516
  Pigeon (the domestic pigeon) | Columba livia domestica | 80
9517
+ Hedgehogs | Erinaceidae | 90
9528
9518
 
9529
9519
  ## Finding the consensus sequence and constructing a frequency profile
9530
9520
 
@@ -9797,6 +9787,9 @@ in time.
9797
9787
 
9798
9788
  ## GUIs of the bioroebe project - Graphical User Interface of the bioroebe project
9799
9789
 
9790
+ The bioroebe project comes with various GUI (Graphical User Interfaces), to
9791
+ help work with various aspects even for "average users".
9792
+
9800
9793
  For example, <b>levensthein.rb</b> has code that allows you to
9801
9794
  start its <b>ruby-gtk GUI</b> component, via:
9802
9795
 
@@ -9806,7 +9799,8 @@ Or, in a more generic manner:
9806
9799
 
9807
9800
  bioroebe --levensthein-gui
9808
9801
 
9809
- Here is a **screenshot** of the gtk2-class for <b>HammingDistance</b>.
9802
+ Here is a **screenshot** of the (old) ruby-gtk2-class for the
9803
+ <b>HammingDistance</b>.
9810
9804
 
9811
9805
  <img src="https://i.imgur.com/OT4dJiq.png" style="margin-left: 2em">
9812
9806
 
@@ -9912,7 +9906,7 @@ bindings in Bioroebe to ruby-tk bindings:
9912
9906
  19 | sizeseq | [NOT YET IMPLEMENTED] | |
9913
9907
  20 | three_to_one | [NOT YET IMPLEMENTED] | |
9914
9908
  21 | www_finder | [NOT YET IMPLEMENTED] | |
9915
- 22 | blosum_matrix_viewer | [TINY BIT IMPLEMENTED; ~5%] | |
9909
+ 22 | blosum_matrix_viewer | [TINY BIT IMPLEMENTED; ~5%] | | [PARTIALLY IMPLEMENTED]
9916
9910
  23 | random_sequence | [NOT YET IMPLEMENTED] | |
9917
9911
 
9918
9912
  </div>
@@ -9952,6 +9946,12 @@ second one is jruby+swing:
9952
9946
 
9953
9947
  <img src="https://i.imgur.com/5IUbDSt.png" style="margin: 1em">
9954
9948
 
9949
+ In December 2023 I decided to replace all old GUIs via the
9950
+ universal-widget projects. This project allows us to, eventually,
9951
+ make use of different GUI toolkits, as well as the world wide
9952
+ web, for GUIs. This is ongoing - right now only one GUI has been
9953
+ ported (three_to_one.rb), but expect more changes in 2024 here.
9954
+
9955
9955
  ### libUI support
9956
9956
 
9957
9957
  Presently, since as of **August 2021**, support for libUI in the bioroebe
@@ -10279,12 +10279,13 @@ For instance, the last CDS ranges from 2931 to 3917.
10279
10279
  class <b>Bioroebe::Compacter</b> can be used to sanitize a text file that
10280
10280
  is supposedly a FASTA sequence, such as for a DNA sequence.
10281
10281
 
10282
- In September 2023 this class was partially rewritten - the old code
10283
- was not flexible enough and confusing to me. I also added more
10284
- commandline options to this class, to allow the user more fine-tuned
10285
- control.
10282
+ In <b>September 2023</b> this class was partially rewritten - the old
10283
+ code was not flexible enough and confusing to me, which is a bad sign
10284
+ considering I wrote it in the first place. I also added more commandline
10285
+ options to this class during the rewrite, to allow the user more
10286
+ fine-tuned control over its behaviour.
10286
10287
 
10287
- Why has this class been created in the first place?
10288
+ <b>Why</b> has this class been created in the first place?
10288
10289
 
10289
10290
  If you download data from the internet, that data may not be what
10290
10291
  you want it to be. It may contain numbers, rather than just
@@ -10301,6 +10302,217 @@ such as the following example shows:
10301
10302
 
10302
10303
  compacter SPRR4_protein.fasta --retain-newlines
10303
10304
 
10305
+ ## Calculating the BLOSUM substitution score via class Bioroebe::CompareTheseTwoSequencesViaBlosum.new
10306
+
10307
+ class Bioroebe::CompareTheseTwoSequencesViaBlosum.new is mostly an
10308
+ ad-hoc class; I wrote it quickly in 2023 to calculate the
10309
+ BLOSUM50 score.
10310
+
10311
+ I then invoke it like this from the commandline:
10312
+
10313
+ comparethesetwosequencesviablosum GSAQVKGHGKKVADALTNAVAHVDDMPNALSALSD----LHAHK GSGYLVGDSLTFVDLL--VAQHTADLLAANAALLDEFPQFKAHQ
10314
+
10315
+ This compares the two sequences:
10316
+
10317
+ GSAQVKGHGKKVADALTNAVAHVDDMPNALSALSD----LHAHK
10318
+ GSGYLVGDSLTFVDLL--VAQHTADLLAANAALLDEFPQFKAHQ
10319
+
10320
+ The score I obtained was 39.
10321
+
10322
+ ## Bioroebe::SimpleStringComparer
10323
+
10324
+ class **SimpleStringComparer** (Bioroebe::SimpleStringComparer) can be
10325
+ used to compare two strings visually, similar as to how **NCBI BLAST**
10326
+ compares two sequences to one another.
10327
+
10328
+ By default the output of that class will be, in colours, on the commandline,
10329
+ but you can disable this like so:
10330
+
10331
+ Bioroebe::SimpleStringComparer.new(ARGV) { :disable_colours }
10332
+
10333
+ Let's look at another example:
10334
+
10335
+ Bioroebe::SimpleStringComparer.new('AAAAAAAAAAAAAATTTTTTTTTTTAAAAAAAAAAAATATA|GAAAAAAAAAAAAAAAATATTTTTTTTTTTTTTTTTTTTTT')
10336
+
10337
+ This may look like so:
10338
+
10339
+ <img src="https://i.imgur.com/kkqkpmQ.png" style="margin-left: 2em">
10340
+
10341
+ Because different people may wish to use different colours,
10342
+ the class allows the user to change these colours via
10343
+ the commandline. Let's assume you did alias
10344
+ simple_string_comparer to this class - then you can do the following:
10345
+
10346
+ simple_string_comparer 'AAAAAAAAAAAAAATTTTTTTTTTTAAAAAAAAAAAATATA|GAAAAAAAAAAAAAAAATATTTTTTTTTTTTTTTTTTTTTT' --colour-for-a-match=lightblue
10347
+
10348
+ So you can modify the vertical bar and display it in a specific
10349
+ colour. See the following image how this may then look:
10350
+
10351
+ <img src="https://i.imgur.com/ipaXUQT.png" style="margin: 1em">
10352
+
10353
+ I may add more options here, to allow arbitrary styling, but
10354
+ I'll leave it at this for now - the future shows how useful
10355
+ this class may be.
10356
+
10357
+ ## Using Bioroebe.three_delimiter()
10358
+
10359
+ The method <b>Bioroebe.three_delimiter()</b> can be used to
10360
+ split a String into a String where every third position has
10361
+ a trailing '|' token.
10362
+
10363
+ So, for instance:
10364
+
10365
+ Bioroebe.three_delimiter 'ATGGGGATGTAGGTAAAA' # => "ATG|GGG|ATG|TAG|GTA|AAA"
10366
+
10367
+ The primary reason why that was added as a toplevel method has been
10368
+ because it may be visually simpler to identify the individual codons
10369
+ via your eyes that way.
10370
+
10371
+ The following image shows this output:
10372
+
10373
+ <img src="https://i.imgur.com/aakm0Z9.png" style="margin: 1em">
10374
+
10375
+ ## Bioroebe.cat() - displaying the content of files
10376
+
10377
+ If you need to display the content of files you can use the
10378
+ helper-method called <b>Bioroebe.cat()</b>. A /bin executable
10379
+ for this functionality exists as well, aptly
10380
+ called <b>bioroebe_cat</b>.
10381
+
10382
+ ## Bioroebe.extractseq
10383
+
10384
+ Bioroebe.extractseq can be used to assemble a new sequence
10385
+ from an existing sequence. This functionality has been
10386
+ inspired by EMBOSS extractseq.
10387
+
10388
+ Usage example:
10389
+
10390
+ Bioroebe.extractseq('AAAGGGTTT', '7-9','3-4') # => TTTAG
10391
+
10392
+ So a new String is generated; 7-9 and 3-4 refer to the range,
10393
+ so first we take position 7, 8, and 9, then we add 3 and
10394
+ 4, and finally return the new sequence.
10395
+
10396
+ Note that one difference between EMBOSS extractseq and
10397
+ bioroebe extractseq is that no local file is generated
10398
+ in bioroebe; you may have to combine this by yourself
10399
+ if you desire this functionality.
10400
+
10401
+ ## Bioroebe.log_dir? - Logging and Log output
10402
+
10403
+ Since as of <b>November 2023</b>, the bioroebe project uses a simplified approach
10404
+ when it comes to the log-directory. Before that there was also the
10405
+ Bioroebe.base_dir? in use, and I kept on forgetting what the difference
10406
+ was between these two - so the latter simply became an alias to
10407
+ Bioroebe.log_dir? now.
10408
+
10409
+ <b>Bioroebe.log_dir?</b> will determine where the directory resides into which
10410
+ you can put files and directories, and have the bioroebe-project
10411
+ recognize these files and directories too, in particular FASTA files.
10412
+
10413
+ The **BioRoebe project** may autogenerate some files, including
10414
+ **log files**.
10415
+
10416
+ In order to be able to do so, the bioroebe project needs the user
10417
+ to be able to access a **base directory**, the so-called
10418
+ **working base directory**. This is where bioroebe assumes
10419
+ most working files to exist.
10420
+
10421
+ On my linux system this used to default to the directory
10422
+ called **/Depot/Bioroebe/**. On other systems, the log directory
10423
+ may default into the **user's home directory**, via a call to
10424
+ <b>"#{File.expand_path('~')}/"</b>. (I use this presently,
10425
+ since **2020**.)
10426
+
10427
+ This should work on most systems, but it may not be what you
10428
+ want to have or use in your own workflow. Thus, code exists
10429
+ that allows you to designate another log directory to use.
10430
+
10431
+ The API for this is simply called:
10432
+
10433
+ Bioroebe.set_log_dir()
10434
+
10435
+ The method can be found in
10436
+ **lib/bioroebe/log_directory/log_directory.rb**.
10437
+
10438
+ If you want to do this from within the **bioshell** itself,
10439
+ try:
10440
+
10441
+ set_log_dir /tmp/test
10442
+ setlogdir /tmp/test
10443
+
10444
+ If you use the interactive bioroebe-shell then you can use
10445
+ **home?** to determine where the log directory is on your
10446
+ system. For example, I tend to just use **/root/Bioroebe/**
10447
+ these days, when I am the superuser.
10448
+
10449
+ Note that you can also define the environment variable called
10450
+ <b>BIOROEBE_DEFAULT_LOG_DIRECTORY</b>. If this is set on startup
10451
+ of the bioroebe-shell, then it will overrule the initial :default
10452
+ value that is used otherwise.
10453
+
10454
+ ## Enzymes
10455
+
10456
+ This subsection will be expanded at a later time - it will be
10457
+ about enzymes in general.
10458
+
10459
+ For now, if you need a table, as memory, for the enzyme classes,
10460
+ here is one:
10461
+
10462
+ | Enzyme class (EC) | Name |
10463
+ |-------------------- |------------------|
10464
+ | 1 | Oxidoreductases |
10465
+ | 2 | Transferases |
10466
+ | 3 | Hydrolases |
10467
+ | 4 | Lyases |
10468
+ | 5 | Isomerases |
10469
+ | 6 | Ligases |
10470
+ | 7 | Translocases |
10471
+
10472
+ See wikipedia for more information:
10473
+
10474
+ https://en.wikipedia.org/wiki/Enzyme_Commission_number#Top_level_codes
10475
+
10476
+ ## UniProt and the data provided by UniProt
10477
+
10478
+ UniProt is a freely accessible database of protein sequence and
10479
+ functional information. What makes it also useful for
10480
+ bioinformatics is that you can easily query the FASTA sequence
10481
+ of a protein.
10482
+
10483
+ Consider the protein called **A2Z669**. The **entry** to this protein
10484
+ can be found here:
10485
+
10486
+ https://www.uniprot.org/uniprot/A2Z669
10487
+
10488
+ And the corresponding FASTA sequence of that protein can be
10489
+ found here, if you append **.fasta** to the URL:
10490
+
10491
+ https://www.uniprot.org/uniprot/A2Z669.fasta
10492
+
10493
+ If you wish to save this file, from within **Bioroebe** itself,
10494
+ then you can use the following API:
10495
+
10496
+ Bioroebe.fetch_data_from_uniprot()
10497
+ Bioroebe.fetch_data_from_uniprot('A2Z669')
10498
+
10499
+ NCBI also has entries related to UniProt.
10500
+
10501
+ Example:
10502
+
10503
+ https://www.ncbi.nlm.nih.gov/protein/P02768
10504
+ https://www.ncbi.nlm.nih.gov/protein/P02768.2?report=fasta
10505
+
10506
+ This has the header **RecName: Full=Serum albumin; Flags:
10507
+ PrecursorUniProtKB/Swiss-Prot: P02768.2**.
10508
+
10509
+ From the bioroebe-shell, you can can fetch data from
10510
+ <b>Uniprot</b>, such as by issuing:
10511
+
10512
+ uniprot_fetch
10513
+ uniprot # This alias works as well.
10514
+ fetch_data_from_uniprot # As does this variant.
10515
+
10304
10516
  ## Possibly useful links in regards to molecular biology and science in general
10305
10517
 
10306
10518
  On the www there are a myriad of links to various other external sites.