bioroebe 0.12.24 → 0.13.31

Sign up to get free protection for your applications and to get access to all the features.
Files changed (503) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE.md +7 -8
  3. data/README.md +566 -354
  4. data/bin/all_positions_of_this_nucleotide +1 -1
  5. data/bin/aminoacid_frequencies +1 -1
  6. data/bin/automatically_rename_this_fasta_file +1 -1
  7. data/bin/base_composition +1 -1
  8. data/bin/batch_create_windows_executables +1 -1
  9. data/bin/bioroebe +12 -1
  10. data/bin/bioroebe_cat +7 -0
  11. data/bin/calculate_exponential_growth +7 -0
  12. data/bin/calculate_n50_value +1 -1
  13. data/bin/calculate_the_frequencies_of_this_species +7 -0
  14. data/bin/chunked_display +1 -1
  15. data/bin/codon_frequency +1 -1
  16. data/bin/codon_to_aminoacid +1 -1
  17. data/bin/colourize_this_fasta_sequence +1 -1
  18. data/bin/complementary_dna_strand +1 -1
  19. data/bin/complementary_rna_strand +1 -1
  20. data/bin/consensus_sequence +1 -1
  21. data/bin/dna_to_rna +1 -1
  22. data/bin/downcase_chunked_display +1 -1
  23. data/bin/download_this_pdb +1 -1
  24. data/bin/fasta_index +1 -1
  25. data/bin/fetch_data_from_uniprot +1 -1
  26. data/bin/filter_away_invalid_nucleotides +1 -1
  27. data/bin/find_substring +1 -1
  28. data/bin/input_as_dna +1 -1
  29. data/bin/is_palindrome +1 -1
  30. data/bin/leading_five_prime +1 -1
  31. data/bin/longest_ORF +1 -1
  32. data/bin/longest_substring +1 -1
  33. data/bin/open_reading_frames +1 -1
  34. data/bin/partner_nucleotide +1 -1
  35. data/bin/plain_palindrome +1 -1
  36. data/bin/random_dna_sequence +1 -1
  37. data/bin/random_sequence +1 -1
  38. data/bin/raw_hamming_distance +1 -1
  39. data/bin/return_longest_substring_via_LCS_algorithm +1 -1
  40. data/bin/reverse_sequence +1 -1
  41. data/bin/short_aminoacid_letter_from_long_aminoacid_name +1 -1
  42. data/bin/show_atomic_composition +1 -1
  43. data/bin/show_fasta_header +1 -1
  44. data/bin/show_nucleotide_sequence +1 -1
  45. data/bin/show_this_dna_sequence +1 -1
  46. data/bin/show_time_now +7 -0
  47. data/bin/sort_aminoacid_based_on_its_hydrophobicity +1 -1
  48. data/bin/strict_filter_away_invalid_aminoacids +1 -1
  49. data/{lib/bioroebe/base/reset.rb → bin/three_delimiter} +9 -6
  50. data/bin/three_to_one +1 -1
  51. data/bin/to_rna +1 -1
  52. data/bin/trailing_three_prime +1 -1
  53. data/bin/upcase_this_aminoacid_sequence_and_remove_numbers +1 -1
  54. data/bioroebe.gemspec +6 -7
  55. data/doc/README.gen +534 -322
  56. data/doc/blosum/blosum.md +4 -0
  57. data/doc/compatibility/BIO_PHP.md +20 -18
  58. data/doc/compatibility/README.md +2 -3
  59. data/doc/compatibility/emboss.md +5 -3
  60. data/doc/{extensive_usage_example.md → extensive_usage_example/extensive_usage_example.md} +4 -2
  61. data/doc/{instructions_for_the_taxonomy_subproject.md → instructions_for_the_taxonomy_subproject/instructions_for_the_taxonomy_subproject.md} +36 -33
  62. data/doc/{legacy_paths.md → legacy_paths/legacy_paths.md} +3 -3
  63. data/doc/statistics/statistics.md +12 -10
  64. data/doc/todo/bioroebe_GUI_todo.md +6 -1
  65. data/doc/todo/bioroebe_java_todo.md +3 -2
  66. data/doc/todo/bioroebe_todo.md +328 -310
  67. data/doc/{using_biomart.md → using_biomart/using_biomart.md} +7 -3
  68. data/lib/bioroebe/abstract/features.rb +0 -0
  69. data/lib/bioroebe/aminoacids/aminoacid_substitution.rb +1 -1
  70. data/lib/bioroebe/aminoacids/aminoacids_mass_table.rb +3 -1
  71. data/lib/bioroebe/aminoacids/codon_percentage.rb +18 -10
  72. data/lib/bioroebe/aminoacids/create_random_aminoacids.rb +5 -2
  73. data/lib/bioroebe/aminoacids/deduce_aminoacid_sequence.rb +90 -64
  74. data/lib/bioroebe/aminoacids/display_aminoacid_table.rb +1 -3
  75. data/lib/bioroebe/aminoacids/show_hydrophobicity.rb +2 -2
  76. data/lib/bioroebe/annotations/create_annotation_format.rb +2 -2
  77. data/lib/bioroebe/base/base.rb +101 -6
  78. data/lib/bioroebe/base/base_module/base_module.rb +9 -1
  79. data/lib/bioroebe/base/colours.rb +3 -0
  80. data/lib/bioroebe/base/colours_for_base/colours_for_base.rb +80 -44
  81. data/lib/bioroebe/base/commandline_application/README.md +1 -1
  82. data/lib/bioroebe/base/commandline_application/commandline_application.rb +661 -22
  83. data/lib/bioroebe/base/commandline_application/commandline_arguments.rb +2 -1
  84. data/lib/bioroebe/base/infer_the_namespace_module/infer_the_namespace_module.rb +37 -0
  85. data/lib/bioroebe/base/internal_hash_module/internal_hash_module.rb +1 -6
  86. data/lib/bioroebe/base/prototype/prototype.rb +155 -14
  87. data/lib/bioroebe/biomart/attribute.rb +1 -1
  88. data/lib/bioroebe/biomart/biomart.rb +8 -9
  89. data/lib/bioroebe/biomart/server.rb +1 -1
  90. data/lib/bioroebe/blosum/blosum.rb +2 -2
  91. data/lib/bioroebe/calculate/calculate_blosum_score.rb +5 -3
  92. data/lib/bioroebe/calculate/calculate_gc_content.rb +1 -1
  93. data/lib/bioroebe/calculate/calculate_levensthein_distance.rb +5 -3
  94. data/lib/bioroebe/calculate/calculate_melting_temperature.rb +2 -10
  95. data/lib/bioroebe/calculate/calculate_melting_temperature_for_more_than_thirteen_nucleotides.rb +6 -15
  96. data/lib/bioroebe/calculate/calculate_the_position_specific_scoring_matrix.rb +4 -2
  97. data/lib/bioroebe/cell/cell.rb +3 -2
  98. data/lib/bioroebe/cell/specialized_cells/B_cell.rb +60 -0
  99. data/lib/bioroebe/cell/specialized_cells/Macrophage.rb +60 -0
  100. data/lib/bioroebe/cell/specialized_cells/README.md +5 -0
  101. data/lib/bioroebe/cell/specialized_cells/T_cell.rb +60 -0
  102. data/lib/bioroebe/cleave_and_digest/cleave.rb +3 -1
  103. data/lib/bioroebe/cleave_and_digest/digestion.rb +1 -1
  104. data/lib/bioroebe/codon_tables/frequencies/10090_Mus_musculus.yml +93 -0
  105. data/lib/bioroebe/codon_tables/frequencies/107243_Thlaspi_caerulescens.yml +72 -0
  106. data/lib/bioroebe/codon_tables/frequencies/parse_frequency_table.rb +2 -2
  107. data/lib/bioroebe/codons/codon_table.rb +10 -2
  108. data/lib/bioroebe/codons/codons.rb +3 -3
  109. data/lib/bioroebe/codons/convert_this_codon_to_that_aminoacid.rb +18 -15
  110. data/lib/bioroebe/codons/determine_optimal_codons.rb +1 -1
  111. data/lib/bioroebe/codons/possible_codons_for_this_aminoacid.rb +4 -2
  112. data/lib/bioroebe/codons/show_codon_tables.rb +1 -1
  113. data/lib/bioroebe/codons/show_codon_usage.rb +1 -2
  114. data/lib/bioroebe/codons/show_this_codon_table.rb +2 -2
  115. data/lib/bioroebe/codons/start_codons.rb +7 -3
  116. data/lib/bioroebe/colours/colour_schemes/README.md +1 -1
  117. data/lib/bioroebe/colours/colour_schemes/array_available_colour_schemes.rb +3 -3
  118. data/lib/bioroebe/colours/colour_schemes/colour_scheme.rb +3 -3
  119. data/lib/bioroebe/colours/colour_schemes/colour_scheme_demo.rb +4 -3
  120. data/lib/bioroebe/colours/colour_schemes/helix.rb +3 -1
  121. data/lib/bioroebe/colours/colour_schemes/hydropathy.rb +3 -1
  122. data/lib/bioroebe/colours/colour_schemes/score.rb +13 -2
  123. data/lib/bioroebe/colours/colour_schemes/strand.rb +3 -1
  124. data/lib/bioroebe/colours/colour_schemes/turn.rb +3 -1
  125. data/lib/bioroebe/colours/colour_schemes/zappo.rb +1 -1
  126. data/lib/bioroebe/{toplevel_methods/colourize_related_methods.rb → colours/colourize_related_code.rb} +1 -3
  127. data/lib/bioroebe/colours/colourize_sequence.rb +3 -1
  128. data/lib/bioroebe/colours/colours.rb +172 -15
  129. data/lib/bioroebe/configuration/configuration.rb +1 -1
  130. data/lib/bioroebe/constants/GUIs.rb +2 -2
  131. data/lib/bioroebe/constants/constants.rb +1349 -0
  132. data/lib/bioroebe/conversions/convert_aminoacid_to_dna.rb +8 -13
  133. data/lib/bioroebe/conversions/dna_to_aminoacid_sequence.rb +9 -3
  134. data/lib/bioroebe/count/count_amount_of_aminoacids.rb +11 -10
  135. data/lib/bioroebe/count/count_amount_of_nucleotides.rb +1 -1
  136. data/lib/bioroebe/count/count_at.rb +2 -1
  137. data/lib/bioroebe/databases/download_taxonomy_database.rb +1 -1
  138. data/lib/bioroebe/dotplots/advanced_dotplot.rb +2 -2
  139. data/lib/bioroebe/electron_microscopy/coordinate_analyzer.rb +2 -2
  140. data/lib/bioroebe/electron_microscopy/fix_pos_file.rb +2 -2
  141. data/lib/bioroebe/electron_microscopy/flipy.rb +2 -2
  142. data/lib/bioroebe/electron_microscopy/generate_em2em_file.rb +3 -11
  143. data/lib/bioroebe/electron_microscopy/parse_coordinates.rb +6 -6
  144. data/lib/bioroebe/electron_microscopy/read_file_xmd.rb +6 -6
  145. data/lib/bioroebe/electron_microscopy/simple_star_file_generator.rb +2 -2
  146. data/lib/bioroebe/enzymes/has_this_restriction_enzyme.rb +1 -1
  147. data/lib/bioroebe/enzymes/restriction_enzyme.rb +1 -1
  148. data/lib/bioroebe/enzymes/restriction_enzymes/statistics.rb +4 -3
  149. data/lib/bioroebe/enzymes/restriction_enzymes_file.rb +1 -1
  150. data/lib/bioroebe/enzymes/return_sequence_that_is_cut_via_restriction_enzyme.rb +4 -3
  151. data/lib/bioroebe/enzymes/show_restriction_enzymes.rb +3 -3
  152. data/lib/bioroebe/ext/main.cpp +0 -1
  153. data/lib/bioroebe/fasta_and_fastq/autocorrect_the_name_of_this_fasta_file.rb +3 -3
  154. data/lib/bioroebe/fasta_and_fastq/compact_fasta_file/compact_fasta_file.rb +1 -1
  155. data/lib/bioroebe/fasta_and_fastq/display_how_many_fasta_entries_are_in_this_directory.rb +1 -1
  156. data/lib/bioroebe/fasta_and_fastq/download_fasta.rb +8 -14
  157. data/lib/bioroebe/fasta_and_fastq/fasta_defline/fasta_defline.rb +1 -1
  158. data/lib/bioroebe/fasta_and_fastq/fasta_to_yaml/fasta_to_yaml.rb +1 -1
  159. data/lib/bioroebe/fasta_and_fastq/fastq_format_explainer.rb +1 -1
  160. data/lib/bioroebe/fasta_and_fastq/length_modifier/length_modifier.rb +1 -1
  161. data/lib/bioroebe/fasta_and_fastq/parse_fasta/parse_fasta.rb +37 -11
  162. data/lib/bioroebe/fasta_and_fastq/parse_fastq/parse_fastq.rb +2 -2
  163. data/lib/bioroebe/fasta_and_fastq/return_fasta_subsection_of_this_file.rb +1 -1
  164. data/lib/bioroebe/fasta_and_fastq/show_fasta_headers.rb +5 -13
  165. data/lib/bioroebe/fasta_and_fastq/show_fasta_statistics.rb +1 -1
  166. data/lib/bioroebe/fasta_and_fastq/simplify_fasta_header/simplify_fasta_header.rb +1 -1
  167. data/lib/bioroebe/fasta_and_fastq/split_this_fasta_file_into_chromosomes/reset.rb +3 -6
  168. data/lib/bioroebe/fasta_and_fastq/split_this_fasta_file_into_chromosomes/split_this_fasta_file_into_chromosomes.rb +3 -3
  169. data/lib/bioroebe/genbank/genbank_flat_file_format_generator.rb +20 -11
  170. data/lib/bioroebe/genome/genome.rb +1 -1
  171. data/lib/bioroebe/genomes/genome_pattern.rb +17 -16
  172. data/lib/bioroebe/genomes/genome_retriever.rb +4 -2
  173. data/lib/bioroebe/gui/experimental/snapgene/snapgene.rb +10 -13
  174. data/lib/bioroebe/gui/universal_widgets/alignment/alignment.rb +557 -0
  175. data/lib/bioroebe/gui/{gtk3 → universal_widgets}/aminoacid_composition/aminoacid_composition.rb +498 -198
  176. data/lib/bioroebe/gui/universal_widgets/anti_sense_strand/anti_sense_strand.rb +665 -0
  177. data/lib/bioroebe/gui/universal_widgets/blosum_matrix_viewer/blosum_matrix_viewer.rb +329 -0
  178. data/lib/bioroebe/gui/universal_widgets/calculate_cell_numbers_of_bacteria/calculate_cell_numbers_of_bacteria.rb +423 -0
  179. data/lib/bioroebe/gui/{gtk3 → universal_widgets}/controller/controller.rb +170 -118
  180. data/lib/bioroebe/gui/{gtk3 → universal_widgets}/dna_to_aminoacid_widget/dna_to_aminoacid_widget.rb +277 -215
  181. data/lib/bioroebe/gui/{shared_code/dna_to_reverse_complement_widget/dna_to_reverse_complement_widget_module.rb → universal_widgets/dna_to_reverse_complement_widget/dna_to_reverse_complement_widget.rb} +297 -107
  182. data/lib/bioroebe/gui/universal_widgets/fasta_table_widget/fasta_table_widget.rb +643 -0
  183. data/lib/bioroebe/gui/{gtk3 → universal_widgets}/format_converter/format_converter.rb +236 -164
  184. data/lib/bioroebe/gui/universal_widgets/gene/gene.rb +278 -0
  185. data/lib/bioroebe/gui/universal_widgets/hamming_distance/hamming_distance.rb +646 -0
  186. data/lib/bioroebe/gui/{shared_code/levensthein_distance/levensthein_distance_module.rb → universal_widgets/levensthein_distance/levensthein_distance.rb} +313 -88
  187. data/lib/bioroebe/gui/{gtk3 → universal_widgets}/nucleotide_analyser/nucleotide_analyser.rb +281 -189
  188. data/lib/bioroebe/gui/{gtk3 → universal_widgets}/parse_pdb_file/parse_pdb_file.rb +265 -149
  189. data/lib/bioroebe/gui/{gtk3 → universal_widgets}/primer_design_widget/primer_design_widget.rb +337 -263
  190. data/lib/bioroebe/gui/universal_widgets/protein_to_DNA/protein_to_DNA.rb +408 -0
  191. data/lib/bioroebe/gui/{gtk3 → universal_widgets}/random_sequence/random_sequence.rb +245 -187
  192. data/lib/bioroebe/gui/{gtk3 → universal_widgets}/restriction_enzymes/restriction_enzymes.rb +207 -137
  193. data/lib/bioroebe/gui/universal_widgets/shell/shell.rb +288 -0
  194. data/lib/bioroebe/gui/{gtk3/show_codon_table/misc.rb → universal_widgets/show_codon_table/show_codon_table.rb} +290 -110
  195. data/lib/bioroebe/gui/{shared_code/show_codon_usage/show_codon_usage_module.rb → universal_widgets/show_codon_usage/show_codon_usage.rb} +228 -47
  196. data/lib/bioroebe/gui/{gtk3 → universal_widgets}/sizeseq/sizeseq.rb +151 -69
  197. data/lib/bioroebe/gui/{gtk3 → universal_widgets}/three_to_one/three_to_one.rb +190 -127
  198. data/lib/bioroebe/gui/{gtk3 → universal_widgets}/www_finder/www_finder.rb +211 -152
  199. data/lib/bioroebe/images/images.html +953 -1170
  200. data/lib/bioroebe/images/misc/README.md +6 -0
  201. data/lib/bioroebe/images/misc/activation.avif +0 -0
  202. data/lib/bioroebe/images/misc/inhibition.avif +0 -0
  203. data/lib/bioroebe/images/misc/small_virus_logo.avif +0 -0
  204. data/lib/bioroebe/{constants/base_directory.rb → log_directory/log_directory.rb} +79 -59
  205. data/lib/bioroebe/matplotlib/matplotlib_generator.rb +1 -1
  206. data/lib/bioroebe/misc/quiz/three_letter_to_aminoacid.rb +1 -1
  207. data/lib/bioroebe/misc/ruler.rb +5 -5
  208. data/lib/bioroebe/misc/useful_formulas.rb +3 -3
  209. data/lib/bioroebe/ncbi/efetch.rb +1 -2
  210. data/lib/bioroebe/ngs/phred_quality_score_table.rb +3 -3
  211. data/lib/bioroebe/nucleotides/complementary_dna_strand.rb +3 -6
  212. data/lib/bioroebe/nucleotides/molecular_weight_of_nucleotides.rb +3 -3
  213. data/lib/bioroebe/nucleotides/most_likely_nucleotide_sequence_for_this_aminoacid_sequence.rb +6 -10
  214. data/lib/bioroebe/nucleotides/{show_nucleotide_sequence.rb → show_nucleotide_sequence/show_nucleotide_sequence.rb} +377 -255
  215. data/lib/bioroebe/palindromes/palindrome_2D_structure.rb +1 -1
  216. data/lib/bioroebe/palindromes/palindrome_finder.rb +1 -1
  217. data/lib/bioroebe/palindromes/palindrome_generator.rb +2 -10
  218. data/lib/bioroebe/parsers/biolang_parser.rb +1 -1
  219. data/lib/bioroebe/parsers/blosum_parser.rb +14 -19
  220. data/lib/bioroebe/parsers/genbank_parser.rb +2 -6
  221. data/lib/bioroebe/parsers/gff.rb +9 -9
  222. data/lib/bioroebe/parsers/parse_embl.rb +2 -6
  223. data/lib/bioroebe/parsers/stride_parser.rb +4 -12
  224. data/lib/bioroebe/patterns/analyse_glycosylation_pattern.rb +2 -2
  225. data/lib/bioroebe/patterns/is_this_sequence_a_EGF2_pattern.rb +6 -3
  226. data/lib/bioroebe/patterns/profile_pattern.rb +2 -2
  227. data/lib/bioroebe/patterns/rgg_scanner.rb +4 -2
  228. data/lib/bioroebe/{protein_structure → pdb_and_protein_structure}/alpha_helix.rb +2 -2
  229. data/lib/bioroebe/{pdb → pdb_and_protein_structure}/download_this_pdb.rb +2 -3
  230. data/lib/bioroebe/{pdb → pdb_and_protein_structure}/fetch_fasta_sequence_from_pdb.rb +4 -4
  231. data/lib/bioroebe/{protein_structure → pdb_and_protein_structure}/helical_wheel.rb +2 -2
  232. data/lib/bioroebe/{pdb → pdb_and_protein_structure}/parse_mmCIF_file.rb +1 -1
  233. data/lib/bioroebe/{pdb → pdb_and_protein_structure}/parse_pdb_file.rb +3 -3
  234. data/lib/bioroebe/{pdb → pdb_and_protein_structure}/report_secondary_structures_from_this_pdb_file.rb +3 -3
  235. data/lib/bioroebe/project/project.rb +3 -1
  236. data/lib/bioroebe/raw_sequence/README.md +8 -8
  237. data/lib/bioroebe/raw_sequence/raw_sequence.rb +11 -2
  238. data/lib/bioroebe/regexes/regexes.rb +1 -2
  239. data/lib/bioroebe/requires/commandline_application.rb +3 -1
  240. data/lib/bioroebe/requires/require_all_pdb_files.rb +1 -1
  241. data/lib/bioroebe/requires/require_all_taxonomy_files.rb +1 -1
  242. data/lib/bioroebe/requires/require_all_utility_scripts_files.rb +10 -0
  243. data/lib/bioroebe/requires/require_colours.rb +1 -1
  244. data/lib/bioroebe/requires/require_the_bioroebe_project.rb +5 -7
  245. data/lib/bioroebe/requires/require_the_bioroebe_sinatra_components.rb +1 -1
  246. data/lib/bioroebe/requires/require_the_constants.rb +2 -14
  247. data/lib/bioroebe/requires/require_yaml.rb +7 -5
  248. data/lib/bioroebe/sequence/alignment.rb +1 -1
  249. data/lib/bioroebe/sequence/dna.rb +4 -2
  250. data/lib/bioroebe/sequence/nucleotide_module/nucleotide_module.rb +22 -8
  251. data/lib/bioroebe/sequence/protein.rb +2 -2
  252. data/lib/bioroebe/sequence/reverse_complement.rb +3 -3
  253. data/lib/bioroebe/sequence/rna.rb +9 -8
  254. data/lib/bioroebe/sequence/sequence.rb +3 -3
  255. data/lib/bioroebe/shell/configuration/additionally_set_xorg_buffer.yml +0 -0
  256. data/lib/bioroebe/shell/configuration/may_we_show_the_startup_information.yml +0 -0
  257. data/lib/bioroebe/shell/configuration/upcase_nucleotides.yml +0 -0
  258. data/lib/bioroebe/shell/configuration/use_silent_startup.yml +1 -1
  259. data/lib/bioroebe/shell/help/class.rb +68 -19
  260. data/lib/bioroebe/shell/menu.rb +5244 -5322
  261. data/lib/bioroebe/shell/{readline/readline.rb → readline.rb} +1 -3
  262. data/lib/bioroebe/shell/shell.rb +11240 -453
  263. data/lib/bioroebe/siRNA/siRNA.rb +3 -3
  264. data/lib/bioroebe/{gui/shared_code/blosum_matrix_viewer/blosum_matrix_viewer_module.rb → sinatra/sinatra_interface.rb} +28 -19
  265. data/lib/bioroebe/{www/sinatra/sinatra.rb → sinatra/sinatra_wrapper.rb} +731 -754
  266. data/lib/bioroebe/string_matching/find_longest_substring.rb +2 -10
  267. data/lib/bioroebe/string_matching/find_longest_substring_via_LCS_algorithm.rb +4 -14
  268. data/lib/bioroebe/string_matching/hamming_distance.rb +11 -10
  269. data/lib/bioroebe/string_matching/levensthein.rb +5 -17
  270. data/lib/bioroebe/string_matching/simple_string_comparer.rb +48 -4
  271. data/lib/bioroebe/string_matching/smith_waterman.rb +11 -6
  272. data/lib/bioroebe/svg/glyph.rb +4 -1
  273. data/lib/bioroebe/svg/mini_feature.rb +1 -1
  274. data/lib/bioroebe/svg/page.rb +18 -7
  275. data/lib/bioroebe/svg/svgee.rb +22 -13
  276. data/lib/bioroebe/svg/track.rb +20 -4
  277. data/lib/bioroebe/taxonomy/chart.rb +2 -2
  278. data/lib/bioroebe/taxonomy/class_methods.rb +5 -6
  279. data/lib/bioroebe/taxonomy/constants.rb +1 -1
  280. data/lib/bioroebe/taxonomy/info/info.rb +1 -1
  281. data/lib/bioroebe/taxonomy/info/is_dna.rb +1 -1
  282. data/lib/bioroebe/taxonomy/interactive.rb +1 -2
  283. data/lib/bioroebe/taxonomy/menu.rb +1 -1
  284. data/lib/bioroebe/taxonomy/node.rb +1 -1
  285. data/lib/bioroebe/taxonomy/parse_fasta.rb +4 -2
  286. data/lib/bioroebe/taxonomy/shared.rb +5 -4
  287. data/lib/bioroebe/taxonomy/taxonomy.rb +2 -4
  288. data/lib/bioroebe/toplevel_methods/fasta_and_fastq.rb +3 -45
  289. data/lib/bioroebe/toplevel_methods/{is_on_roebe.rb → roebe.rb} +1 -11
  290. data/lib/bioroebe/toplevel_methods/taxonomy.rb +6 -12
  291. data/lib/bioroebe/toplevel_methods/toplevel_methods.rb +5568 -0
  292. data/lib/bioroebe/utility_scripts/align_open_reading_frames.rb +4 -3
  293. data/lib/bioroebe/utility_scripts/analyse_local_dataset.rb +2 -2
  294. data/lib/bioroebe/utility_scripts/check_for_mismatches/check_for_mismatches.rb +16 -9
  295. data/lib/bioroebe/utility_scripts/compacter/compacter.rb +4 -2
  296. data/lib/bioroebe/utility_scripts/compare_these_two_sequences_via_blosum.rb +119 -0
  297. data/lib/bioroebe/utility_scripts/compseq/compseq.rb +11 -9
  298. data/lib/bioroebe/utility_scripts/{consensus_sequence.rb → consensus_sequence/consensus_sequence.rb} +13 -4
  299. data/lib/bioroebe/utility_scripts/{create_batch_entrez_file.rb → create_batch_entrez_file/create_batch_entrez_file.rb} +5 -5
  300. data/lib/bioroebe/utility_scripts/{determine_antigenic_areas.rb → determine_antigenic_areas/determine_antigenic_areas.rb} +5 -5
  301. data/lib/bioroebe/utility_scripts/{determine_missing_nucleotides_percentage.rb → determine_missing_nucleotides_percentage/determine_missing_nucleotides_percentage.rb} +16 -15
  302. data/lib/bioroebe/utility_scripts/display_open_reading_frames/display_open_reading_frames.rb +7 -7
  303. data/lib/bioroebe/utility_scripts/display_open_reading_frames/misc.rb +1 -1
  304. data/lib/bioroebe/utility_scripts/display_open_reading_frames/report.rb +2 -0
  305. data/lib/bioroebe/utility_scripts/{dot_alignment.rb → dot_alignment/dot_alignment.rb} +3 -3
  306. data/lib/bioroebe/utility_scripts/{download_files_from_rebase.rb → download_files_from_rebase/download_files_from_rebase.rb} +5 -5
  307. data/lib/bioroebe/utility_scripts/fetch_data_from_uniprot/fetch_data_from_uniprot.rb +269 -0
  308. data/lib/bioroebe/utility_scripts/find_gene.rb +4 -2
  309. data/lib/bioroebe/utility_scripts/{mirror_repeat.rb → mirror_repeat/mirror_repeat.rb} +5 -5
  310. data/lib/bioroebe/utility_scripts/move_file_to_its_correct_location.rb +3 -3
  311. data/lib/bioroebe/utility_scripts/{parse_taxonomy.rb → parse_taxonomy/parse_taxonomy.rb} +15 -6
  312. data/lib/bioroebe/utility_scripts/{pathways.rb → pathways/pathways.rb} +4 -3
  313. data/lib/bioroebe/utility_scripts/{permutations.rb → permutations/permutations.rb} +3 -3
  314. data/lib/bioroebe/utility_scripts/punnet/punnet.rb +4 -2
  315. data/lib/bioroebe/utility_scripts/{show_this_dna_sequence.rb → show_this_dna_sequence/show_this_dna_sequence.rb} +1 -1
  316. data/lib/bioroebe/utility_scripts/showorf/showorf.rb +406 -10
  317. data/lib/bioroebe/version/version.rb +2 -2
  318. data/lib/bioroebe/viennarna/rnafold_wrapper.rb +5 -13
  319. data/lib/bioroebe/virus/individual_viruses/README.md +15 -0
  320. data/lib/bioroebe/virus/individual_viruses/tobacco_mosaic_virus.rb +40 -0
  321. data/lib/bioroebe/virus/virus.rb +76 -0
  322. data/lib/bioroebe/www/bioroebe.cgi +4 -3
  323. data/lib/bioroebe/www/embeddable_interface.rb +85 -49
  324. data/lib/bioroebe/yaml/agarose/agarose_concentrations.yml +6 -6
  325. data/lib/bioroebe/yaml/antisense/antisense.yml +2 -0
  326. data/lib/bioroebe/yaml/blosum/blosum50.yml +6 -0
  327. data/lib/bioroebe/yaml/blosum/blosum90.yml +2 -1
  328. data/lib/bioroebe/yaml/chromosomes/chromosome_numbers.yml +2 -2
  329. data/lib/bioroebe/yaml/configuration/temp_dir.yml +1 -1
  330. data/lib/bioroebe/yaml/consensus_sequences/consensus_sequences.yml +1 -0
  331. data/lib/bioroebe/yaml/enzymes/enzyme_classes.yml +7 -6
  332. data/lib/bioroebe/yaml/humans/human_chromosomes.yml +3 -3
  333. data/lib/bioroebe/yaml/mRNA/mRNA.yml +1 -5
  334. data/lib/bioroebe/yaml/nucleotides/abbreviations_for_nucleotides.yml +1 -0
  335. data/lib/bioroebe/yaml/nucleotides/nucleotide_density.yml +2 -1
  336. data/lib/bioroebe/yaml/promoters/35S.yml +3 -1
  337. data/lib/bioroebe/yaml/proteases/proteases.yml +3 -1
  338. data/lib/bioroebe/yaml/proteins/ubiquitin.yml +4 -1
  339. data/lib/bioroebe/yaml/restriction_enzymes/restriction_enzymes.yml +7 -7
  340. data/spec/testing_toplevel_method_editor.rb +1 -1
  341. data/spec/testing_toplevel_method_verbose.rb +1 -1
  342. data/test/testing_dna_to_rna_conversion.rb +1 -1
  343. metadata +127 -235
  344. data/doc/blosum.md +0 -5
  345. data/lib/bioroebe/base/commandline_application/aminoacids.rb +0 -33
  346. data/lib/bioroebe/base/commandline_application/directory.rb +0 -33
  347. data/lib/bioroebe/base/commandline_application/extract.rb +0 -22
  348. data/lib/bioroebe/base/commandline_application/misc.rb +0 -502
  349. data/lib/bioroebe/base/commandline_application/opn.rb +0 -47
  350. data/lib/bioroebe/base/commandline_application/reset.rb +0 -42
  351. data/lib/bioroebe/base/commandline_application/warnings.rb +0 -36
  352. data/lib/bioroebe/base/commandline_application/write_what_into.rb +0 -29
  353. data/lib/bioroebe/base/initialize.rb +0 -18
  354. data/lib/bioroebe/base/misc.rb +0 -129
  355. data/lib/bioroebe/base/namespace.rb +0 -16
  356. data/lib/bioroebe/base/prototype/e_and_ee.rb +0 -24
  357. data/lib/bioroebe/base/prototype/misc.rb +0 -114
  358. data/lib/bioroebe/base/prototype/mkdir.rb +0 -20
  359. data/lib/bioroebe/base/prototype/reset.rb +0 -36
  360. data/lib/bioroebe/colours/misc_colours.rb +0 -80
  361. data/lib/bioroebe/colours/rev.rb +0 -44
  362. data/lib/bioroebe/colours/sdir.rb +0 -21
  363. data/lib/bioroebe/colours/sfancy.rb +0 -21
  364. data/lib/bioroebe/colours/sfile.rb +0 -21
  365. data/lib/bioroebe/colours/simp.rb +0 -21
  366. data/lib/bioroebe/colours/swarn.rb +0 -29
  367. data/lib/bioroebe/constants/aminoacids_and_proteins.rb +0 -147
  368. data/lib/bioroebe/constants/carriage_return.rb +0 -14
  369. data/lib/bioroebe/constants/codon_tables.rb +0 -77
  370. data/lib/bioroebe/constants/database_constants.rb +0 -107
  371. data/lib/bioroebe/constants/files_and_directories.rb +0 -606
  372. data/lib/bioroebe/constants/misc.rb +0 -209
  373. data/lib/bioroebe/constants/newline.rb +0 -14
  374. data/lib/bioroebe/constants/nucleotides.rb +0 -121
  375. data/lib/bioroebe/constants/regex.rb +0 -28
  376. data/lib/bioroebe/constants/roebe.rb +0 -38
  377. data/lib/bioroebe/constants/row_terminator.rb +0 -16
  378. data/lib/bioroebe/constants/tabulator.rb +0 -14
  379. data/lib/bioroebe/constants/unicode.rb +0 -12
  380. data/lib/bioroebe/constants/urls.rb +0 -50
  381. data/lib/bioroebe/gui/gtk +0 -1
  382. data/lib/bioroebe/gui/gtk3/README.md +0 -2
  383. data/lib/bioroebe/gui/gtk3/alignment/alignment.rb +0 -306
  384. data/lib/bioroebe/gui/gtk3/anti_sense_strand/anti_sense_strand.rb +0 -29
  385. data/lib/bioroebe/gui/gtk3/blosum_matrix_viewer/blosum_matrix_viewer.rb +0 -195
  386. data/lib/bioroebe/gui/gtk3/calculate_cell_numbers_of_bacteria/calculate_cell_numbers_of_bacteria.rb +0 -105
  387. data/lib/bioroebe/gui/gtk3/dna_to_reverse_complement_widget/dna_to_reverse_complement_widget.rb +0 -188
  388. data/lib/bioroebe/gui/gtk3/fasta_table_widget/fasta_table_widget.rb +0 -322
  389. data/lib/bioroebe/gui/gtk3/gene/gene.rb +0 -181
  390. data/lib/bioroebe/gui/gtk3/hamming_distance/hamming_distance.rb +0 -383
  391. data/lib/bioroebe/gui/gtk3/levensthein_distance/levensthein_distance.rb +0 -174
  392. data/lib/bioroebe/gui/gtk3/protein_to_DNA/protein_to_DNA.rb +0 -181
  393. data/lib/bioroebe/gui/gtk3/show_codon_table/show_codon_table.rb +0 -101
  394. data/lib/bioroebe/gui/gtk3/show_codon_usage/show_codon_usage.rb +0 -145
  395. data/lib/bioroebe/gui/gtk3/three_to_one/title.rb +0 -23
  396. data/lib/bioroebe/gui/jruby/alignment/alignment.rb +0 -165
  397. data/lib/bioroebe/gui/jruby/aminoacid_composition/aminoacid_composition.rb +0 -166
  398. data/lib/bioroebe/gui/jruby/blosum_matrix_viewer/blosum_matrix_viewer.rb +0 -82
  399. data/lib/bioroebe/gui/libui/README.md +0 -4
  400. data/lib/bioroebe/gui/libui/alignment/alignment.rb +0 -116
  401. data/lib/bioroebe/gui/libui/blosum_matrix_viewer/blosum_matrix_viewer.rb +0 -112
  402. data/lib/bioroebe/gui/libui/calculate_cell_numbers_of_bacteria/calculate_cell_numbers_of_bacteria.rb +0 -60
  403. data/lib/bioroebe/gui/libui/controller/controller.rb +0 -116
  404. data/lib/bioroebe/gui/libui/dna_to_aminoacid_widget/dna_to_aminoacid_widget.rb +0 -161
  405. data/lib/bioroebe/gui/libui/dna_to_reverse_complement_widget/dna_to_reverse_complement_widget.rb +0 -76
  406. data/lib/bioroebe/gui/libui/hamming_distance/hamming_distance.rb +0 -135
  407. data/lib/bioroebe/gui/libui/levensthein_distance/levensthein_distance.rb +0 -118
  408. data/lib/bioroebe/gui/libui/protein_to_DNA/protein_to_DNA.rb +0 -115
  409. data/lib/bioroebe/gui/libui/random_sequence/random_sequence.rb +0 -190
  410. data/lib/bioroebe/gui/libui/show_codon_table/show_codon_table.rb +0 -134
  411. data/lib/bioroebe/gui/libui/show_codon_usage/show_codon_usage.rb +0 -89
  412. data/lib/bioroebe/gui/libui/three_to_one/three_to_one.rb +0 -113
  413. data/lib/bioroebe/gui/shared_code/alignment/alignment_module.rb +0 -102
  414. data/lib/bioroebe/gui/shared_code/aminoacid_composition/aminoacid_composition_module.rb +0 -94
  415. data/lib/bioroebe/gui/shared_code/calculate_cell_numbers_of_bacteria/calculate_cell_numbers_of_bacteria_module.rb +0 -216
  416. data/lib/bioroebe/gui/shared_code/protein_to_DNA/protein_to_DNA_module.rb +0 -192
  417. data/lib/bioroebe/gui/shared_code/show_codon_table/show_codon_table_module.rb +0 -72
  418. data/lib/bioroebe/gui/tk/aminoacid_composition/aminoacid_composition.rb +0 -206
  419. data/lib/bioroebe/gui/tk/blosum_matrix_viewer/blosum_matrix_viewer.rb +0 -140
  420. data/lib/bioroebe/gui/tk/hamming_distance/hamming_distance.rb +0 -262
  421. data/lib/bioroebe/gui/tk/levensthein_distance/levensthein_distance.rb +0 -243
  422. data/lib/bioroebe/gui/tk/three_to_one/three_to_one.rb +0 -199
  423. data/lib/bioroebe/gui/unified_widgets/anti_sense_strand/anti_sense_strand.rb +0 -519
  424. data/lib/bioroebe/shell/colours/colours.rb +0 -235
  425. data/lib/bioroebe/shell/help/help.rb +0 -25
  426. data/lib/bioroebe/shell/misc.rb +0 -10227
  427. data/lib/bioroebe/toplevel_methods/ad_hoc_task.rb +0 -56
  428. data/lib/bioroebe/toplevel_methods/aminoacids_and_proteins.rb +0 -722
  429. data/lib/bioroebe/toplevel_methods/atomic_composition.rb +0 -198
  430. data/lib/bioroebe/toplevel_methods/base_composition.rb +0 -121
  431. data/lib/bioroebe/toplevel_methods/blast.rb +0 -153
  432. data/lib/bioroebe/toplevel_methods/calculate_n50_value.rb +0 -57
  433. data/lib/bioroebe/toplevel_methods/cat.rb +0 -71
  434. data/lib/bioroebe/toplevel_methods/chunked_display.rb +0 -92
  435. data/lib/bioroebe/toplevel_methods/cliner.rb +0 -81
  436. data/lib/bioroebe/toplevel_methods/complement.rb +0 -58
  437. data/lib/bioroebe/toplevel_methods/convert_global_env.rb +0 -39
  438. data/lib/bioroebe/toplevel_methods/databases.rb +0 -73
  439. data/lib/bioroebe/toplevel_methods/delimiter.rb +0 -19
  440. data/lib/bioroebe/toplevel_methods/digest.rb +0 -81
  441. data/lib/bioroebe/toplevel_methods/download_and_fetch_data.rb +0 -146
  442. data/lib/bioroebe/toplevel_methods/e.rb +0 -20
  443. data/lib/bioroebe/toplevel_methods/editor.rb +0 -21
  444. data/lib/bioroebe/toplevel_methods/esystem.rb +0 -22
  445. data/lib/bioroebe/toplevel_methods/exponential_growth.rb +0 -74
  446. data/lib/bioroebe/toplevel_methods/extract.rb +0 -56
  447. data/lib/bioroebe/toplevel_methods/file_and_directory_related_actions.rb +0 -269
  448. data/lib/bioroebe/toplevel_methods/frequencies.rb +0 -99
  449. data/lib/bioroebe/toplevel_methods/hamming_distance.rb +0 -60
  450. data/lib/bioroebe/toplevel_methods/infer.rb +0 -66
  451. data/lib/bioroebe/toplevel_methods/leading_five_prime_and_trailing_three_prime.rb +0 -101
  452. data/lib/bioroebe/toplevel_methods/levensthein.rb +0 -63
  453. data/lib/bioroebe/toplevel_methods/log_directory.rb +0 -109
  454. data/lib/bioroebe/toplevel_methods/longest_common_substring.rb +0 -55
  455. data/lib/bioroebe/toplevel_methods/map_ncbi_entry_to_eutils_id.rb +0 -88
  456. data/lib/bioroebe/toplevel_methods/matches.rb +0 -259
  457. data/lib/bioroebe/toplevel_methods/misc.rb +0 -596
  458. data/lib/bioroebe/toplevel_methods/nucleotides.rb +0 -787
  459. data/lib/bioroebe/toplevel_methods/number_of_clones.rb +0 -63
  460. data/lib/bioroebe/toplevel_methods/open_in_browser.rb +0 -79
  461. data/lib/bioroebe/toplevel_methods/open_reading_frames.rb +0 -236
  462. data/lib/bioroebe/toplevel_methods/opn.rb +0 -34
  463. data/lib/bioroebe/toplevel_methods/palindromes.rb +0 -155
  464. data/lib/bioroebe/toplevel_methods/parse.rb +0 -59
  465. data/lib/bioroebe/toplevel_methods/phred_error_probability.rb +0 -68
  466. data/lib/bioroebe/toplevel_methods/rds.rb +0 -24
  467. data/lib/bioroebe/toplevel_methods/remove.rb +0 -86
  468. data/lib/bioroebe/toplevel_methods/return_source_code_of_this_method.rb +0 -35
  469. data/lib/bioroebe/toplevel_methods/return_subsequence_based_on_indices.rb +0 -68
  470. data/lib/bioroebe/toplevel_methods/rna_splicing.rb +0 -73
  471. data/lib/bioroebe/toplevel_methods/rnalfold.rb +0 -69
  472. data/lib/bioroebe/toplevel_methods/searching_and_finding.rb +0 -116
  473. data/lib/bioroebe/toplevel_methods/shuffleseq.rb +0 -37
  474. data/lib/bioroebe/toplevel_methods/statistics.rb +0 -53
  475. data/lib/bioroebe/toplevel_methods/sum_of_odd_integers.rb +0 -62
  476. data/lib/bioroebe/toplevel_methods/three_delimiter.rb +0 -34
  477. data/lib/bioroebe/toplevel_methods/time_and_date.rb +0 -53
  478. data/lib/bioroebe/toplevel_methods/to_camelcase.rb +0 -31
  479. data/lib/bioroebe/toplevel_methods/truncate.rb +0 -48
  480. data/lib/bioroebe/toplevel_methods/url.rb +0 -36
  481. data/lib/bioroebe/toplevel_methods/verbose.rb +0 -59
  482. data/lib/bioroebe/utility_scripts/showorf/constants.rb +0 -31
  483. data/lib/bioroebe/utility_scripts/showorf/help.rb +0 -33
  484. data/lib/bioroebe/utility_scripts/showorf/initialize.rb +0 -52
  485. data/lib/bioroebe/utility_scripts/showorf/menu.rb +0 -68
  486. data/lib/bioroebe/utility_scripts/showorf/reset.rb +0 -36
  487. data/lib/bioroebe/utility_scripts/showorf/run.rb +0 -152
  488. data/lib/bioroebe/utility_scripts/showorf/show.rb +0 -97
  489. /data/doc/{german_names_for_the_aminoacids.md → german_names_for_the_aminoacids/german_names_for_the_aminoacids.md} +0 -0
  490. /data/doc/{pdb_ATOM_entry.md → pdb_ATOM_entry/pdb_ATOM_entry.md} +0 -0
  491. /data/doc/{resources.md → resources/resources.md} +0 -0
  492. /data/lib/bioroebe/gui/{gtk3 → universal_widgets}/aminoacid_composition/customized_dialog.rb +0 -0
  493. /data/lib/bioroebe/gui/{gtk3 → universal_widgets}/anti_sense_strand/anti_sense_strand.config +0 -0
  494. /data/lib/bioroebe/gui/{gtk3 → universal_widgets}/calculate_cell_numbers_of_bacteria/calculate_cell_numbers_of_bacteria.config +0 -0
  495. /data/lib/bioroebe/gui/{gtk3 → universal_widgets}/dna_to_reverse_complement_widget/dna_to_reverse_complement_widget.config +0 -0
  496. /data/lib/bioroebe/gui/{gtk3 → universal_widgets}/hamming_distance/hamming_distance.config +0 -0
  497. /data/lib/bioroebe/gui/{gtk3 → universal_widgets}/levensthein_distance/levensthein_distance.config +0 -0
  498. /data/lib/bioroebe/gui/{gtk3 → universal_widgets}/protein_to_DNA/protein_to_DNA.config +0 -0
  499. /data/lib/bioroebe/gui/{gtk3 → universal_widgets}/restriction_enzymes/restriction_enzymes.config +0 -0
  500. /data/lib/bioroebe/gui/{gtk3 → universal_widgets}/www_finder/www_finder.config +0 -0
  501. /data/lib/bioroebe/yaml/{base_composition_of_dna.yml → base_composition_of_dna/base_composition_of_dna.yml} +0 -0
  502. /data/lib/bioroebe/yaml/{nuclear_localization_sequences.yml → nuclear_localization_sequences/nuclear_localization_sequences.yml} +0 -0
  503. /data/lib/bioroebe/yaml/{talens.yml → talens/talens.yml} +0 -0
@@ -1,63 +0,0 @@
1
- #!/usr/bin/ruby -w
2
- # Encoding: UTF-8
3
- # frozen_string_literal: true
4
- # =========================================================================== #
5
- module Bioroebe
6
-
7
- # ========================================================================= #
8
- # === Bioroebe.min3
9
- # ========================================================================= #
10
- def self.min3(a, b, c)
11
- if a < b && a < c
12
- a
13
- elsif b < c
14
- b
15
- else
16
- c
17
- end
18
- end
19
-
20
- # ========================================================================= #
21
- # === Bioroebe.levenshtein_distance
22
- # ========================================================================= #
23
- def self.levenshtein_distance(str1, str2)
24
- n = str1.length
25
- m = str2.length
26
-
27
- return m if n.zero?
28
- return n if m.zero?
29
-
30
- x = nil
31
- d = (0 .. m).to_a # Get the distance.
32
-
33
- # ======================================================================= #
34
- # To avoid duplicating an enumerable object, create it outside of the
35
- # loop.
36
- # ======================================================================= #
37
- str2_codepoints = str2.codepoints
38
-
39
- str1.each_codepoint.with_index(1) { |char1, i|
40
- j = 0
41
- while j < m
42
- cost = (char1 == str2_codepoints[j]) ? 0 : 1
43
- x = min3(
44
- d[j + 1] + 1, # insertion
45
- i + 1, # deletion
46
- d[j] + cost # substitution
47
- )
48
- d[j] = i
49
- i = x
50
- j += 1
51
- end
52
- d[m] = x
53
- }
54
- x # Return the cost here.
55
- end
56
-
57
- end
58
-
59
- if __FILE__ == $PROGRAM_NAME
60
- alias e puts
61
- e 'Difference between "shevy" and "chevy" is: '+
62
- Bioroebe.levenshtein_distance('shevy', 'chevy').to_s # => 1
63
- end # rb levensthein.rb
@@ -1,109 +0,0 @@
1
- #!/usr/bin/ruby -w
2
- # Encoding: UTF-8
3
- # frozen_string_literal: true
4
- # =========================================================================== #
5
- # This file keeps track of where we will store output generated by
6
- # the Bioroebe project.
7
- # =========================================================================== #
8
- # require 'bioroebe/toplevel_methods/log_directory.rb'
9
- # Bioroebe.download_dir?
10
- # =========================================================================== #
11
- module Bioroebe
12
-
13
- # ========================================================================= #
14
- # === @log_directory
15
- #
16
- # Keep track where to log output into, through this variable.
17
- # ========================================================================= #
18
- @log_directory = nil
19
-
20
- # ========================================================================= #
21
- # === Bioroebe.log_directory?
22
- #
23
- # This method will keep track over where we will store output,
24
- # related to the Bioroebe project.
25
- # ========================================================================= #
26
- def self.log_directory?
27
- @log_directory
28
- end; self.instance_eval { alias log_dir log_directory? } # === Bioroebe.log_dir
29
- self.instance_eval { alias store_here? log_directory? } # === Bioroebe.store_here?
30
- self.instance_eval { alias save_dir? log_directory? } # === Bioroebe.save_dir?
31
- self.instance_eval { alias log_dir? log_directory? } # === Bioroebe.log_dir?
32
- self.instance_eval { alias project_log_directory? log_directory? } # === Bioroebe.project_log_directory?
33
-
34
- require 'bioroebe/constants/base_directory.rb'
35
- # ========================================================================= #
36
- # === Bioroebe.set_log_dir
37
- #
38
- # This method will set the log-directory for the Bioroebe project.
39
- #
40
- # Usually this will be in the user's home directory, and then a
41
- # 'Bioroebe/' suffix appended. The default input-value is the
42
- # symbol :default.
43
- #
44
- # The user can set to use another log-directory through this method.
45
- # ========================================================================= #
46
- def self.set_log_directory(
47
- i = :default
48
- )
49
- case i # case tag
50
- # ======================================================================= #
51
- # === :default
52
- #
53
- # This is the default value.
54
- # ======================================================================= #
55
- when :default
56
- i = ::Bioroebe.base_directory?
57
- end
58
- unless i.end_with? '/'
59
- i = i.dup if i.frozen?
60
- i << '/'
61
- end
62
- @log_directory = i
63
- end; self.instance_eval { alias set_log_dir set_log_directory } # === Bioroebe.set_log_dir
64
-
65
- # ========================================================================= #
66
- # === Initialize it at once, with the default value.
67
- # ========================================================================= #
68
- _ = :default # Uses the default path.
69
- if ENV.has_key? 'BIOROEBE_DEFAULT_LOG_DIRECTORY'
70
- _ = ENV['BIOROEBE_DEFAULT_LOG_DIRECTORY']
71
- end
72
- set_log_directory(_)
73
-
74
- # ========================================================================= #
75
- # === Bioroebe.download_directory?
76
- # ========================================================================= #
77
- def self.download_directory?
78
- "#{@log_directory}Downloads/"
79
- end; self.instance_eval { alias download_dir? download_directory? } # === Bioroebe.download_dir?
80
-
81
- # ========================================================================= #
82
- # === LOCAL_DIRECTORY_FOR_UNIPROT
83
- #
84
- # This denotes the directory for uniprot-files.
85
- # ========================================================================= #
86
- LOCAL_DIRECTORY_FOR_UNIPROT = "#{@log_directory}uniprot/"
87
-
88
- # ========================================================================= #
89
- # === Bioroebe::AUTOGENERATED_SQL_FILES_DIR
90
- # ========================================================================= #
91
- AUTOGENERATED_SQL_FILES_DIR =
92
- "#{@log_directory}autogenerated_sql_files/" # Where to store .sql files.
93
-
94
- # ========================================================================= #
95
- # === Bioroebe.fasta_directory?
96
- #
97
- # This method will return a path such as "/root/Bioroebe/fasta/".
98
- # ========================================================================= #
99
- def self.fasta_directory?
100
- "#{::Bioroebe.log_directory?}fasta/"
101
- end
102
-
103
- end
104
-
105
- if __FILE__ == $PROGRAM_NAME
106
- alias e puts
107
- e Bioroebe.log_directory?
108
- e Bioroebe.download_directory?
109
- end
@@ -1,55 +0,0 @@
1
- #!/usr/bin/ruby -w
2
- # Encoding: UTF-8
3
- # frozen_string_literal: true
4
- # =========================================================================== #
5
- # require 'bioroebe/toplevel_methods/longest_common_substring.rb'
6
- # =========================================================================== #
7
- module Bioroebe
8
-
9
- # ========================================================================= #
10
- # === Bioroebe.longest_common_substring
11
- #
12
- # This method will return the longest common substring. There may be
13
- # more than one solution, though.
14
- #
15
- # Note that this method was specifically written for Rosalind, so it
16
- # may not be too overly useful for most general tasks. For example,
17
- # it is not optimised for speed, so you should probably not use
18
- # it for very long sequences.
19
- # ========================================================================= #
20
- def self.longest_common_substring(sequences)
21
- if sequences and !sequences.empty? and File.file?(sequences.first)
22
- require 'bioroebe/requires/require_all_fasta_and_fastq_files.rb'
23
- hash = parse_fasta_quietly(sequences.first).hash?
24
- sequences = hash.values.map {|entry| entry.delete("\n") }
25
- end
26
- # ======================================================================= #
27
- # First obtain a handle towards the shortest sequence.
28
- # ======================================================================= #
29
- shortest_sequence = sequences.min_by(&:length)
30
- maxlen = shortest_sequence.length
31
- maxlen.downto(1) {|len|
32
- 0.upto(maxlen - len) { |start|
33
- substring = shortest_sequence.to_s[start,len]
34
- if sequences.all? {|seq| seq.to_s.include? substring }
35
- return shortest_sequence.class.new(substring)
36
- end
37
- }
38
- }
39
- return nil
40
- end
41
-
42
- end
43
-
44
- if __FILE__ == $PROGRAM_NAME
45
- alias e puts
46
- if ARGV.empty?
47
- e Bioroebe.longest_common_substring(
48
- %w( GATTACA TAGACCA ATACA )
49
- )
50
- else
51
- e Bioroebe.longest_common_substring(ARGV)
52
- end
53
- end # longestcommonsubstring
54
- # longestcommonsubstring ATTTTT ATTT
55
- # longestcommonsubstring /rosalind_lcsm.txt
@@ -1,88 +0,0 @@
1
- #!/usr/bin/ruby -w
2
- # Encoding: UTF-8
3
- # frozen_string_literal: true
4
- # =========================================================================== #
5
- # require 'bioroebe/toplevel_methods/map_ncbi_entry_to_eutils_id.rb'
6
- # =========================================================================== #
7
- module Bioroebe
8
-
9
- # ========================================================================= #
10
- # === Bioroebe.map_ncbi_entry_to_eutils_id
11
- #
12
- # This is a general "URL mapper" from certain NCBI IDs to specific
13
- # nucleotide sequences. Only some URLs will be mapped, in particular
14
- # those that I use more frequently.
15
- #
16
- # The method was created specifically to allow simpler input via
17
- # the commandline and the bioshell - it is easier to remember
18
- # a name such as "rhinovirus" as opposed to the NC entry, which
19
- # is "NC_038311".
20
- #
21
- # Usage examples:
22
- #
23
- # Bioroebe.map_ncbi_entry_to_eutils_id 'rhinovirus'
24
- # Bioroebe.map_ncbi_entry_to_eutils_id 'T6'
25
- #
26
- # ========================================================================= #
27
- def self.map_ncbi_entry_to_eutils_id(
28
- i = 'NC_001416.1.fasta'
29
- )
30
- case i
31
- # ======================================================================= #
32
- # === Lambda phage genome
33
- # ======================================================================= #
34
- when /^-?-?NC_001416.1.fasta$/i,
35
- 'lambda'
36
- i = '9626243'
37
- # ======================================================================= #
38
- # === P1
39
- # ======================================================================= #
40
- when 'NC_005856.1.fasta',
41
- 'P1'
42
- i = '46401626'
43
- # ======================================================================= #
44
- # === P2
45
- # ======================================================================= #
46
- when 'NC_041848.1.fasta',
47
- 'P2'
48
- i = '1631913463'
49
- # ======================================================================= #
50
- # === T12
51
- # ======================================================================= #
52
- when 'NC_028700.1.fasta',
53
- 'T12'
54
- i = '966201481'
55
- # ======================================================================= #
56
- # === T2
57
- # ======================================================================= #
58
- when 'AP018813.1.fasta',
59
- 'T2'
60
- i = 'AP018813.1'
61
- # ======================================================================= #
62
- # === T4
63
- # ======================================================================= #
64
- when 'NC_000866.4.fasta',
65
- 'T4'
66
- i = 'NC_000866.4'
67
- # ======================================================================= #
68
- # === T6
69
- # ======================================================================= #
70
- when 'T6'
71
- i = 'MH550421.1'
72
- # ======================================================================= #
73
- # === rhinovirus
74
- # ======================================================================= #
75
- when /NC_038311/,
76
- 'rhinovirus'
77
- i = '1464306962'
78
- end
79
- return i
80
- end
81
-
82
- end
83
-
84
- if __FILE__ == $PROGRAM_NAME
85
- alias e puts
86
- e Bioroebe.map_ncbi_entry_to_eutils_id 'rhinovirus'
87
- e Bioroebe.map_ncbi_entry_to_eutils_id 'T6'
88
- end
@@ -1,259 +0,0 @@
1
- #!/usr/bin/ruby -w
2
- # Encoding: UTF-8
3
- # frozen_string_literal: true
4
- # =========================================================================== #
5
- # Match-related actions on the toplevel "namespace" are collected in this
6
- # file here.
7
- # =========================================================================== #
8
- # require 'bioroebe/toplevel_methods/matches.rb'
9
- # =========================================================================== #
10
- module Bioroebe
11
-
12
- require 'bioroebe/constants/regex.rb'
13
- require 'bioroebe/toplevel_methods/e.rb'
14
- require 'bioroebe/toplevel_methods/download_and_fetch_data.rb'
15
- require 'bioroebe/toplevel_methods/file_and_directory_related_actions.rb'
16
- require 'bioroebe/fasta_and_fastq/parse_fasta/parse_fasta.rb'
17
- require 'bioroebe/codons/start_codons.rb'
18
-
19
- # ========================================================================= #
20
- # === Bioroebe.determine_n_glycosylation_matches
21
- #
22
- # This method can be used to determine N-Glycosylation patterns in a
23
- # protein.
24
- #
25
- # The input to this method should be an aminoacid chain - aka a protein
26
- # sequence.
27
- #
28
- # This method will return an Array. This Array holds the indices where
29
- # a N-glycosylation pattern begins.
30
- #
31
- # Usage example:
32
- #
33
- # Bioroebe.determine_n_glycosylation_matches # => [85, 118, 142, 306, 395]
34
- #
35
- # ========================================================================= #
36
- def self.determine_n_glycosylation_matches(
37
- of_this_protein_sequence =
38
- 'MKNKFKTQEELVNHLKTVGFVFANSEIYNGLANAWDYGPLGVLLKNNLKNLWWKEFVTKQKDV'\
39
- 'VGLDSAIILNPLVWKASGHLDNFSDPLIDCKNCKARYRADKLIESFDENIHIAENSSNEEFAK'\
40
- 'VLNDYEISCPTCKQFNWTEIRHFNLMFKTYQGVIEDAKNVVYLRPETAQGIFVNFKNVQRSMR'\
41
- 'LHLPFGIAQIGKSFRNEITPGNFIFRTREFEQMEIEFFLKEESAYDIFDKYLNQIENWLVSAC'\
42
- 'GLSLNNLRKHEHPKEELSHYSKKTIDFEYNFLHGFSELYGIAYRTNYDLSVHMNLSKKDLTYF'\
43
- 'DEQTKEKYVPHVIEPSVGVERLLYAILTEATFIEKLENDDERILMDLKYDLAPYKIAVMPLVN'\
44
- 'KLKDKAEEIYGKILDLNISATFDNSGSIGKRYRRQDAIGTIYCLTIDFDSLDDQQDPSFTIRE'\
45
- 'RNSMAQKRIKLSELPLYLNQKAHEDFQRQCQK'
46
- )
47
- if of_this_protein_sequence.is_a? Array
48
- of_this_protein_sequence.each {|this_sequence|
49
- determine_n_glycosylation_matches(this_sequence)
50
- }
51
- else
52
- scanned = of_this_protein_sequence.scan(
53
- REGEX_FOR_N_GLYCOSYLATION_PATTERN
54
- )
55
- scanned.flatten.uniq.map {|substring|
56
- of_this_protein_sequence.index(substring)+1 # +1 because ruby starts at 0.
57
- }
58
- end
59
- end
60
-
61
- # ========================================================================= #
62
- # === Bioroebe.return_longest_substring_via_LCS_algorithm
63
- #
64
- # This method will return the longest substring between two different
65
- # sequences. It will ignore gaps and will thus not be as sophisticated
66
- # as other algorithms.
67
- #
68
- # This method will return a String.
69
- #
70
- # Usage example in plain ruby:
71
- #
72
- # Bioroebe.return_longest_substring_via_LCS_algorithm 'ATGAAA','ATGCAT' # => 'ATGA'
73
- #
74
- # ========================================================================= #
75
- def self.return_longest_substring_via_LCS_algorithm(
76
- sequence1 = ARGV,
77
- sequence2 = nil
78
- )
79
- begin
80
- require 'diff/lcs'
81
- rescue LoadError; end
82
- if sequence1.is_a? Array
83
- if sequence1.size > 1
84
- sequence2 = sequence1[1]
85
- sequence1 = sequence1[0]
86
- end
87
- end
88
- lcs_object = Diff::LCS.LCS(sequence1, sequence2)
89
- return lcs_object.join # Must return a String.
90
- end
91
-
92
- # ========================================================================= #
93
- # === Bioroebe.return_array_of_sequence_matches
94
- #
95
- # This method can be used to return an Array of subsequence matches.
96
- #
97
- # This method will always return an Array - an empty Array if no
98
- # subsequence match has been found; and an Array with integer numbers
99
- # as indices, to denote which indices contain the subsequence. These
100
- # numbers store the start position of the subsequence.
101
- #
102
- # Keep in mind that for nucleotides we will start at +1, not 0, so
103
- # if you see an Array such as [3, 7] as result then this refers to
104
- # the nucleotide at position 3 and the nucleotide at position 7.
105
- #
106
- # Arguments to this method:
107
- #
108
- # The first argument to this method should be the main sequence.
109
- #
110
- # The second argument to this method should be the subsequence
111
- # that we assume to be part of the main sequence (that is, to
112
- # occur within that main sequence at the least once).
113
- #
114
- # Specific invocation examples, with the last example showing
115
- # how an empty Array will be returned:
116
- #
117
- # Bioroebe.return_array_of_sequence_matches('ACGTACGTAACG','GTA') # => [3, 7]
118
- # Bioroebe.return_array_of_sequence_matches('ACGTACGTAACG','GTAAA') # => []
119
- # Bioroebe.return_array_of_sequence_matches('ATGGTGGTGGTGATGATGTGCACTCGGTTCCAGGGGGGTCGTAGGAATTGAGCTAGGACCTCCACTAGGCATCTGGGTTCGAAATTATAAAGGTCAAACGCTACTACCCTGTGTGCTAGCTTTGAAGGGCTGACGCTAGAATTGACGTCGCCGGTGACATTTGCGTCGAGGTGGTAGTAATGTTCCAAGGATGCCGGACGAACGGTACTTACCCTCGTTAGTCAGATCGCCAACCCGAGTCGTCCATGAGGAAAGCGTAATGGAAGAGACCGGGACAGCCCTCCTCAAATGTGCCTGGATACGAGTGATTTACCA','ATG')
120
- #
121
- # ========================================================================= #
122
- def self.return_array_of_sequence_matches(
123
- main_sequence,
124
- subsequence # ← The subsequence that you wish to match onto the main sequence.
125
- )
126
- if main_sequence
127
- array = []
128
- chars = main_sequence.chars
129
- if main_sequence.is_a? ::Bioroebe::Sequence
130
- main_sequence = main_sequence.sequence?
131
- end
132
- chars.each_with_index.select {|char, index|
133
- if main_sequence[index, subsequence.length] == subsequence
134
- array << index+1
135
- end
136
- }
137
- return array
138
- else
139
- []
140
- end
141
- end; self.instance_eval { alias return_indices_of_matches return_array_of_sequence_matches } # === Bioroebe.return_indices_of_matches
142
- self.instance_eval { alias indices_of_matches return_array_of_sequence_matches } # === Bioroebe.indices_of_matches
143
-
144
- # ========================================================================= #
145
- # === Bioroebe.return_all_substring_matches
146
- #
147
- # This method will give us back an Array that contains all matching
148
- # substrings.
149
- #
150
- # By default, the method will search for start codons such as ATG
151
- # or GTG.
152
- # ========================================================================= #
153
- def self.return_all_substring_matches(
154
- this_string,
155
- use_this_as_substring = ::Bioroebe.start_codon?
156
- )
157
- # ======================================================================= #
158
- # We will search for both ATG and AUG though, respectively the
159
- # input variants given to us. If the following regex appears to
160
- # be complicated to you, here is the old variant for the regex:
161
- #
162
- # use_this_regex = /(ATG|AUG)/i
163
- #
164
- # =================================================================== #
165
- if use_this_as_substring.include? 'T'
166
- use_this_regex = /(#{use_this_as_substring}|#{use_this_as_substring.tr('T','U')})/i
167
- else
168
- use_this_regex = /#{use_this_as_substring}/i
169
- end
170
- result = this_string.to_enum(:scan, use_this_regex).map { |match|
171
- [$`.size + 1, match] # +1 because we refer to the nucleotide positions.
172
- }
173
- return result
174
- end
175
-
176
- # ========================================================================= #
177
- # === Bioroebe.show_n_glycosylation_motifs
178
- #
179
- # The argument to this method should be a local file, which
180
- # stores the IDs of the proteins. Only use one ID per given
181
- # line, though.
182
- #
183
- # This method has been inspired by this Rosalind task:
184
- #
185
- # http://rosalind.info/problems/mprt/
186
- #
187
- # ========================================================================= #
188
- def self.show_n_glycosylation_motifs(
189
- from_this_file = 'foobar.fasta'
190
- )
191
- if from_this_file.is_a? Array
192
- from_this_file = from_this_file.first
193
- end
194
- from_this_file = from_this_file.to_s
195
- if File.exist? from_this_file
196
- result = ''.dup
197
- dataset = File.readlines(from_this_file).reject {|entry| entry.strip.empty? }
198
- index = 0
199
- dataset.each {|this_id_for_the_protein| index += 1
200
- this_id_for_the_protein.strip!
201
- # =================================================================== #
202
- # Fetch the remote dataset from uniprot.
203
- # =================================================================== #
204
- e
205
- e royalblue(
206
- index.to_s.rjust(2)+') '
207
- )
208
- file_location = ::Bioroebe.fetch_data_from_uniprot(this_id_for_the_protein)
209
- parsed_fasta_sequence = ::Bioroebe::ParseFasta.new(file_location) { :be_quiet }.first_value
210
- array_of_indices = ::Bioroebe.determine_n_glycosylation_matches(parsed_fasta_sequence)
211
- # =================================================================== #
212
- # Next, this is the output that Rosalind wants to have.
213
- # =================================================================== #
214
- unless array_of_indices.empty?
215
- result << this_id_for_the_protein+"\n"
216
- result << array_of_indices.join(' ')+"\n"
217
- end
218
- }
219
- e result # And display that result finally.
220
- else
221
- no_file_exists_at(from_this_file)
222
- end
223
- end; self.instance_eval { alias glycosylation show_n_glycosylation_motifs } # === Bioroebe.glycosylation
224
-
225
- end
226
-
227
- if __FILE__ == $PROGRAM_NAME
228
- pp Bioroebe.determine_n_glycosylation_matches
229
- Bioroebe.show_n_glycosylation_motifs(ARGV)
230
- # ========================================================================= #
231
- # Some test-code.
232
- # ========================================================================= #
233
- require 'colours/autoinclude'
234
- input_sequence = 'TGCAGAAAAAAAAAAAGCCCCCCCCGTTTTTTTTATGCCCCCCGCGGGGGGGGATGTGGGGGC'
235
- e rev+'The input sequence is:'
236
- e
237
- e " #{sfancy(input_sequence)}"
238
- e
239
- array_matches = Bioroebe.return_all_substring_matches(input_sequence)
240
- e 'The first match (for the sequence '+::Bioroebe.start_codon?+') would yield:'
241
- e
242
- e " #{sfancy(input_sequence[(array_matches.first.first-1) .. -1])}"
243
- e
244
- e 'All matches found with a start codon are contained in the following Array:'
245
- e
246
- print ' '; pp array_matches
247
- e
248
- e 'Next, testing Bioroebe.return_array_of_sequence_matches(ARGV[0], ARGV[1])'
249
- e
250
- pp Bioroebe.return_array_of_sequence_matches(ARGV[0], ARGV[1])
251
- end # determinenglycosylationmatches
252
- # determinenglycosylationmatches /UNIPROT.md
253
- # determinenglycosylationmatches /HERE.md
254
- # determinenglycosylationmatches /ABC.md
255
- # determinenglycosylationmatches /rosalind_mprt.txt
256
- #
257
- # return_array_of_sequence_matches AUGACGTACGTAACG AUG # => [1]
258
- # return_array_of_sequence_matches ACGTACGTAACG GTA # => [3, 7]
259
- # rb matches.rb ACGTACGTAACG GTA