bioroebe 0.10.80

Sign up to get free protection for your applications and to get access to all the features.
Files changed (802) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.md +428 -0
  3. data/README.md +9280 -0
  4. data/bin/advanced_dotplot +7 -0
  5. data/bin/align_open_reading_frames +12 -0
  6. data/bin/all_positions_of_this_nucleotide +7 -0
  7. data/bin/aminoacid_composition +7 -0
  8. data/bin/aminoacid_frequencies +12 -0
  9. data/bin/aminoacid_substitution +7 -0
  10. data/bin/automatically_rename_this_fasta_file +7 -0
  11. data/bin/base_composition +7 -0
  12. data/bin/batch_create_windows_executables +7 -0
  13. data/bin/biomart_console +11 -0
  14. data/bin/bioroebe +27 -0
  15. data/bin/bioroebe_controller +10 -0
  16. data/bin/bioshell +26 -0
  17. data/bin/blosum_2D_table +12 -0
  18. data/bin/calculate_n50_value +12 -0
  19. data/bin/chunked_display +12 -0
  20. data/bin/codon_frequency +9 -0
  21. data/bin/codon_to_aminoacid +30 -0
  22. data/bin/colourize_this_fasta_sequence +14 -0
  23. data/bin/compact_fasta_file +7 -0
  24. data/bin/complement +7 -0
  25. data/bin/complementary_dna_strand +12 -0
  26. data/bin/complementary_rna_strand +12 -0
  27. data/bin/compseq +7 -0
  28. data/bin/consensus_sequence +17 -0
  29. data/bin/count_AT +12 -0
  30. data/bin/count_GC +12 -0
  31. data/bin/create_random_aminoacids +7 -0
  32. data/bin/decode_this_aminoacid_sequence +20 -0
  33. data/bin/deduce_aminoacid_sequence +13 -0
  34. data/bin/deduce_most_likely_aminoacid_sequence +7 -0
  35. data/bin/display_aminoacid_table +12 -0
  36. data/bin/display_open_reading_frames +7 -0
  37. data/bin/dna_to_aminoacid_sequence +7 -0
  38. data/bin/dna_to_rna +7 -0
  39. data/bin/downcase_chunked_display +12 -0
  40. data/bin/download_this_pdb +7 -0
  41. data/bin/fasta_index +7 -0
  42. data/bin/fetch_data_from_uniprot +12 -0
  43. data/bin/filter_away_invalid_nucleotides +12 -0
  44. data/bin/find_substring +19 -0
  45. data/bin/genbank_to_fasta +7 -0
  46. data/bin/hamming_distance +12 -0
  47. data/bin/input_as_dna +12 -0
  48. data/bin/is_palindrome +13 -0
  49. data/bin/leading_five_prime +7 -0
  50. data/bin/levensthein +7 -0
  51. data/bin/longest_ORF +14 -0
  52. data/bin/longest_substring +12 -0
  53. data/bin/n_stop_codons_in_this_sequence +15 -0
  54. data/bin/open_reading_frames +14 -0
  55. data/bin/overwrite_fasta_header +7 -0
  56. data/bin/palindrome_2D_structure +7 -0
  57. data/bin/palindrome_generator +7 -0
  58. data/bin/parse_fasta +7 -0
  59. data/bin/partner_nucleotide +9 -0
  60. data/bin/possible_codons_for_this_aminoacid +12 -0
  61. data/bin/random_dna_sequence +12 -0
  62. data/bin/random_sequence +12 -0
  63. data/bin/raw_hamming_distance +12 -0
  64. data/bin/return_longest_substring_via_LCS_algorithm +7 -0
  65. data/bin/reverse_complement +7 -0
  66. data/bin/reverse_sequence +7 -0
  67. data/bin/ruler +12 -0
  68. data/bin/scan_this_input_for_startcodons +12 -0
  69. data/bin/short_aminoacid_letter_from_long_aminoacid_name +7 -0
  70. data/bin/show_atomic_composition +7 -0
  71. data/bin/show_codon_usage +12 -0
  72. data/bin/show_fasta_header +7 -0
  73. data/bin/show_nucleotide_sequence +7 -0
  74. data/bin/show_this_codon_table +7 -0
  75. data/bin/show_this_dna_sequence +7 -0
  76. data/bin/showorf +14 -0
  77. data/bin/simplify_fasta +7 -0
  78. data/bin/sort_aminoacid_based_on_its_hydrophobicity +7 -0
  79. data/bin/split_this_fasta_file_into_chromosomes +7 -0
  80. data/bin/strict_filter_away_invalid_aminoacids +7 -0
  81. data/bin/taxonomy +63 -0
  82. data/bin/three_to_one +7 -0
  83. data/bin/to_rna +7 -0
  84. data/bin/trailing_three_prime +7 -0
  85. data/bin/trypsin_digest +7 -0
  86. data/bin/upcase_this_aminoacid_sequence_and_remove_numbers +7 -0
  87. data/bioroebe.gemspec +97 -0
  88. data/doc/IUPAC_aminoacids_code.md +36 -0
  89. data/doc/IUPAC_nucleotide_code.md +19 -0
  90. data/doc/README.gen +9237 -0
  91. data/doc/blosum.md +5 -0
  92. data/doc/compatibility/BIO_PHP.md +37 -0
  93. data/doc/compatibility/README.md +3 -0
  94. data/doc/compatibility/emboss.md +56 -0
  95. data/doc/extensive_usage_example.md +35 -0
  96. data/doc/german_names_for_the_aminoacids.md +27 -0
  97. data/doc/instructions_for_the_taxonomy_subproject.md +504 -0
  98. data/doc/legacy_paths.md +9 -0
  99. data/doc/pdb_ATOM_entry.md +33 -0
  100. data/doc/quality_control/README.md +2 -0
  101. data/doc/quality_control/commandline_applications.md +13 -0
  102. data/doc/resources.md +23 -0
  103. data/doc/setup.rb +1655 -0
  104. data/doc/statistics/statistics.md +41 -0
  105. data/doc/todo/README.md +5 -0
  106. data/doc/todo/bioroebe_GUI_todo.md +15 -0
  107. data/doc/todo/bioroebe_todo.md +2823 -0
  108. data/doc/using_biomart.md +258 -0
  109. data/html/test.html +144 -0
  110. data/lib/bioroebe/aminoacids/aminoacid_substitution.rb +118 -0
  111. data/lib/bioroebe/aminoacids/aminoacids_mass_table.rb +118 -0
  112. data/lib/bioroebe/aminoacids/codon_percentage.rb +189 -0
  113. data/lib/bioroebe/aminoacids/colourize_hydrophilic_and_hydrophobic_aminoacids.rb +110 -0
  114. data/lib/bioroebe/aminoacids/create_random_aminoacids.rb +221 -0
  115. data/lib/bioroebe/aminoacids/deduce_aminoacid_sequence.rb +460 -0
  116. data/lib/bioroebe/aminoacids/display_aminoacid_table.rb +231 -0
  117. data/lib/bioroebe/aminoacids/show_hydrophobicity.rb +184 -0
  118. data/lib/bioroebe/annotations/README.md +2 -0
  119. data/lib/bioroebe/annotations/create_annotation_format.rb +208 -0
  120. data/lib/bioroebe/autoinclude.rb +7 -0
  121. data/lib/bioroebe/base/base.rb +35 -0
  122. data/lib/bioroebe/base/colours.rb +14 -0
  123. data/lib/bioroebe/base/colours_for_base/colours_for_base.rb +275 -0
  124. data/lib/bioroebe/base/commandline_application/README.md +7 -0
  125. data/lib/bioroebe/base/commandline_application/aminoacids.rb +33 -0
  126. data/lib/bioroebe/base/commandline_application/commandline_application.rb +37 -0
  127. data/lib/bioroebe/base/commandline_application/commandline_arguments.rb +144 -0
  128. data/lib/bioroebe/base/commandline_application/directory.rb +33 -0
  129. data/lib/bioroebe/base/commandline_application/extract.rb +22 -0
  130. data/lib/bioroebe/base/commandline_application/misc.rb +485 -0
  131. data/lib/bioroebe/base/commandline_application/opn.rb +47 -0
  132. data/lib/bioroebe/base/commandline_application/reset.rb +40 -0
  133. data/lib/bioroebe/base/commandline_application/warnings.rb +36 -0
  134. data/lib/bioroebe/base/commandline_application/write_what_into.rb +29 -0
  135. data/lib/bioroebe/base/initialize.rb +18 -0
  136. data/lib/bioroebe/base/misc.rb +94 -0
  137. data/lib/bioroebe/base/namespace.rb +16 -0
  138. data/lib/bioroebe/base/prototype/README.md +12 -0
  139. data/lib/bioroebe/base/prototype/e_and_ee.rb +24 -0
  140. data/lib/bioroebe/base/prototype/misc.rb +108 -0
  141. data/lib/bioroebe/base/prototype/mkdir.rb +20 -0
  142. data/lib/bioroebe/base/prototype/prototype.rb +21 -0
  143. data/lib/bioroebe/base/prototype/reset.rb +26 -0
  144. data/lib/bioroebe/base/reset.rb +11 -0
  145. data/lib/bioroebe/biomart/LICENSE.md +27 -0
  146. data/lib/bioroebe/biomart/attribute.rb +77 -0
  147. data/lib/bioroebe/biomart/biomart.rb +227 -0
  148. data/lib/bioroebe/biomart/database.rb +128 -0
  149. data/lib/bioroebe/biomart/dataset.rb +572 -0
  150. data/lib/bioroebe/biomart/filter.rb +97 -0
  151. data/lib/bioroebe/biomart/server.rb +152 -0
  152. data/lib/bioroebe/blosum/blosum.rb +88 -0
  153. data/lib/bioroebe/calculate/calculate_blosum_score.rb +145 -0
  154. data/lib/bioroebe/calculate/calculate_gc_content.rb +301 -0
  155. data/lib/bioroebe/calculate/calculate_levensthein_distance.rb +100 -0
  156. data/lib/bioroebe/calculate/calculate_melting_temperature.rb +398 -0
  157. data/lib/bioroebe/calculate/calculate_melting_temperature_for_more_than_thirteen_nucleotides.rb +304 -0
  158. data/lib/bioroebe/calculate/calculate_the_position_specific_scoring_matrix.rb +166 -0
  159. data/lib/bioroebe/cell/README.md +1 -0
  160. data/lib/bioroebe/cell/cell.rb +63 -0
  161. data/lib/bioroebe/cleave_and_digest/README.md +2 -0
  162. data/lib/bioroebe/cleave_and_digest/cleave.rb +80 -0
  163. data/lib/bioroebe/cleave_and_digest/digestion.rb +75 -0
  164. data/lib/bioroebe/cleave_and_digest/trypsin.rb +192 -0
  165. data/lib/bioroebe/codon_tables/README.md +9 -0
  166. data/lib/bioroebe/codon_tables/frequencies/287_Pseudomonas_aeruginosa.yml +101 -0
  167. data/lib/bioroebe/codon_tables/frequencies/3702_Arabidopsis_thaliana.yml +77 -0
  168. data/lib/bioroebe/codon_tables/frequencies/4932_Saccharomyces_cerevisiae.yml +103 -0
  169. data/lib/bioroebe/codon_tables/frequencies/7227_Drosophila_melanogaster.yml +71 -0
  170. data/lib/bioroebe/codon_tables/frequencies/83333_Escherichia_coli_K12.yml +103 -0
  171. data/lib/bioroebe/codon_tables/frequencies/9606_Homo_sapiens.yml +123 -0
  172. data/lib/bioroebe/codon_tables/frequencies/9685_Felis_catus.yml +78 -0
  173. data/lib/bioroebe/codon_tables/frequencies/README.md +10 -0
  174. data/lib/bioroebe/codon_tables/frequencies/parse_frequency_table.rb +337 -0
  175. data/lib/bioroebe/codons/README.md +28 -0
  176. data/lib/bioroebe/codons/codon_table.rb +416 -0
  177. data/lib/bioroebe/codons/codon_tables.rb +123 -0
  178. data/lib/bioroebe/codons/codons.rb +517 -0
  179. data/lib/bioroebe/codons/convert_this_codon_to_that_aminoacid.rb +102 -0
  180. data/lib/bioroebe/codons/detect_minimal_codon.rb +180 -0
  181. data/lib/bioroebe/codons/determine_optimal_codons.rb +74 -0
  182. data/lib/bioroebe/codons/possible_codons_for_this_aminoacid.rb +380 -0
  183. data/lib/bioroebe/codons/sanitize_codon_frequency.rb +144 -0
  184. data/lib/bioroebe/codons/show_codon_tables.rb +130 -0
  185. data/lib/bioroebe/codons/show_codon_usage.rb +197 -0
  186. data/lib/bioroebe/codons/show_this_codon_table.rb +573 -0
  187. data/lib/bioroebe/codons/start_codons.rb +105 -0
  188. data/lib/bioroebe/colours/colour_schemes/README.md +10 -0
  189. data/lib/bioroebe/colours/colour_schemes/array_available_colour_schemes.rb +38 -0
  190. data/lib/bioroebe/colours/colour_schemes/buried.rb +70 -0
  191. data/lib/bioroebe/colours/colour_schemes/colour_scheme.rb +101 -0
  192. data/lib/bioroebe/colours/colour_schemes/colour_scheme_demo.rb +262 -0
  193. data/lib/bioroebe/colours/colour_schemes/helix.rb +65 -0
  194. data/lib/bioroebe/colours/colour_schemes/hydropathy.rb +70 -0
  195. data/lib/bioroebe/colours/colour_schemes/nucleotide.rb +47 -0
  196. data/lib/bioroebe/colours/colour_schemes/score.rb +112 -0
  197. data/lib/bioroebe/colours/colour_schemes/simple.rb +42 -0
  198. data/lib/bioroebe/colours/colour_schemes/strand.rb +65 -0
  199. data/lib/bioroebe/colours/colour_schemes/taylor.rb +58 -0
  200. data/lib/bioroebe/colours/colour_schemes/turn.rb +65 -0
  201. data/lib/bioroebe/colours/colour_schemes/zappo.rb +59 -0
  202. data/lib/bioroebe/colours/colourize_sequence.rb +262 -0
  203. data/lib/bioroebe/colours/colours.rb +119 -0
  204. data/lib/bioroebe/colours/misc_colours.rb +80 -0
  205. data/lib/bioroebe/colours/rev.rb +41 -0
  206. data/lib/bioroebe/colours/sdir.rb +21 -0
  207. data/lib/bioroebe/colours/sfancy.rb +21 -0
  208. data/lib/bioroebe/colours/sfile.rb +21 -0
  209. data/lib/bioroebe/colours/simp.rb +21 -0
  210. data/lib/bioroebe/colours/swarn.rb +29 -0
  211. data/lib/bioroebe/colours/use_colours.rb +27 -0
  212. data/lib/bioroebe/configuration/configuration.rb +114 -0
  213. data/lib/bioroebe/configuration/constants.rb +35 -0
  214. data/lib/bioroebe/constants/GUIs.rb +79 -0
  215. data/lib/bioroebe/constants/aminoacids_and_proteins.rb +146 -0
  216. data/lib/bioroebe/constants/base_directory.rb +120 -0
  217. data/lib/bioroebe/constants/carriage_return.rb +14 -0
  218. data/lib/bioroebe/constants/codon_tables.rb +77 -0
  219. data/lib/bioroebe/constants/database_constants.rb +107 -0
  220. data/lib/bioroebe/constants/files_and_directories.rb +579 -0
  221. data/lib/bioroebe/constants/misc.rb +189 -0
  222. data/lib/bioroebe/constants/newline.rb +14 -0
  223. data/lib/bioroebe/constants/nucleotides.rb +114 -0
  224. data/lib/bioroebe/constants/regex.rb +28 -0
  225. data/lib/bioroebe/constants/roebe.rb +38 -0
  226. data/lib/bioroebe/constants/row_terminator.rb +16 -0
  227. data/lib/bioroebe/constants/tabulator.rb +14 -0
  228. data/lib/bioroebe/constants/unicode.rb +12 -0
  229. data/lib/bioroebe/constants/urls.rb +50 -0
  230. data/lib/bioroebe/conversions/README.md +3 -0
  231. data/lib/bioroebe/conversions/convert_aminoacid_to_dna.rb +298 -0
  232. data/lib/bioroebe/conversions/dna_to_aminoacid_sequence.rb +569 -0
  233. data/lib/bioroebe/count/README.md +1 -0
  234. data/lib/bioroebe/count/count_amount_of_aminoacids.rb +352 -0
  235. data/lib/bioroebe/count/count_amount_of_nucleotides.rb +491 -0
  236. data/lib/bioroebe/count/count_at.rb +39 -0
  237. data/lib/bioroebe/count/count_gc.rb +43 -0
  238. data/lib/bioroebe/css/README.md +5 -0
  239. data/lib/bioroebe/css/project.css +121 -0
  240. data/lib/bioroebe/data/README.md +10 -0
  241. data/lib/bioroebe/data/bam/README.md +1 -0
  242. data/lib/bioroebe/data/data.txt +192 -0
  243. data/lib/bioroebe/data/fasta/GFP_mutant_3_coding_sequence.fasta +12 -0
  244. data/lib/bioroebe/data/fasta/alu_elements.fasta +42 -0
  245. data/lib/bioroebe/data/fasta/lady_slippers_orchid.fasta +1197 -0
  246. data/lib/bioroebe/data/fasta/loxP.fasta +2 -0
  247. data/lib/bioroebe/data/fasta/ls_orchid.fasta +1197 -0
  248. data/lib/bioroebe/data/fasta/pax6_in_mouse.fasta +1 -0
  249. data/lib/bioroebe/data/fasta/test.fasta +7 -0
  250. data/lib/bioroebe/data/fasta/test_DNA.fasta +1 -0
  251. data/lib/bioroebe/data/fastq/fastq_example_file.fastq +32 -0
  252. data/lib/bioroebe/data/fastq/fastq_example_file_SP1.fastq +1000 -0
  253. data/lib/bioroebe/data/fastq/one_random_fastq_entry.fastq +4 -0
  254. data/lib/bioroebe/data/genbank/sample_file.genbank +15 -0
  255. data/lib/bioroebe/data/genbank/standard.fasta +3 -0
  256. data/lib/bioroebe/data/gff/Escherichia_coli_K12_plasmid_F_DNA_NC_002483.1.gff3 +345 -0
  257. data/lib/bioroebe/data/gff/sample.gff +2 -0
  258. data/lib/bioroebe/data/gff/test_gene.gff +4 -0
  259. data/lib/bioroebe/data/gff/transcripts.gff +16 -0
  260. data/lib/bioroebe/data/gtf/README.md +1 -0
  261. data/lib/bioroebe/data/json/example_config.json +48 -0
  262. data/lib/bioroebe/data/pdb/1VII.pdb +754 -0
  263. data/lib/bioroebe/data/pdb/ala_phe_ala.pdb +228 -0
  264. data/lib/bioroebe/data/pdb/rcsb_pdb_1VII.fasta +2 -0
  265. data/lib/bioroebe/data/phylip/README.md +11 -0
  266. data/lib/bioroebe/data/phylip/example.phylip +7 -0
  267. data/lib/bioroebe/data/svg/example.svg +301 -0
  268. data/lib/bioroebe/databases/README.md +1 -0
  269. data/lib/bioroebe/databases/download_taxonomy_database.rb +102 -0
  270. data/lib/bioroebe/dotplots/README.md +5 -0
  271. data/lib/bioroebe/dotplots/advanced_dotplot.rb +256 -0
  272. data/lib/bioroebe/dotplots/dotplot.rb +184 -0
  273. data/lib/bioroebe/electron_microscopy/coordinate_analyzer.rb +191 -0
  274. data/lib/bioroebe/electron_microscopy/fix_pos_file.rb +102 -0
  275. data/lib/bioroebe/electron_microscopy/generate_em2em_file.rb +122 -0
  276. data/lib/bioroebe/electron_microscopy/parse_coordinates.rb +197 -0
  277. data/lib/bioroebe/electron_microscopy/read_file_xmd.rb +282 -0
  278. data/lib/bioroebe/electron_microscopy/simple_star_file_generator.rb +131 -0
  279. data/lib/bioroebe/encoding/README.md +2 -0
  280. data/lib/bioroebe/encoding/encoding.rb +45 -0
  281. data/lib/bioroebe/enzymes/README.md +2 -0
  282. data/lib/bioroebe/enzymes/has_this_restriction_enzyme.rb +46 -0
  283. data/lib/bioroebe/enzymes/restriction_enzyme.rb +200 -0
  284. data/lib/bioroebe/enzymes/restriction_enzymes_file.rb +72 -0
  285. data/lib/bioroebe/enzymes/return_restriction_enzyme_sequence_and_cut_position.rb +80 -0
  286. data/lib/bioroebe/enzymes/return_sequence_that_is_cut_via_restriction_enzyme.rb +65 -0
  287. data/lib/bioroebe/enzymes/show_restriction_enzymes.rb +119 -0
  288. data/lib/bioroebe/exceptions/README.md +2 -0
  289. data/lib/bioroebe/exceptions/exceptions.rb +17 -0
  290. data/lib/bioroebe/ext/LICENCE.md +5 -0
  291. data/lib/bioroebe/ext/README.md +7 -0
  292. data/lib/bioroebe/ext/main.cpp +45 -0
  293. data/lib/bioroebe/ext/nucleotide.cpp +24 -0
  294. data/lib/bioroebe/ext/nussinov_algorithm.cpp +348 -0
  295. data/lib/bioroebe/ext/sequence +0 -0
  296. data/lib/bioroebe/ext/sequence.cpp +162 -0
  297. data/lib/bioroebe/fasta_and_fastq/README.md +6 -0
  298. data/lib/bioroebe/fasta_and_fastq/autocorrect_the_name_of_this_fasta_file.rb +88 -0
  299. data/lib/bioroebe/fasta_and_fastq/compact_fasta_file/compact_fasta_file.rb +151 -0
  300. data/lib/bioroebe/fasta_and_fastq/display_how_many_fasta_entries_are_in_this_directory.rb +111 -0
  301. data/lib/bioroebe/fasta_and_fastq/download_fasta.rb +248 -0
  302. data/lib/bioroebe/fasta_and_fastq/fasta_defline/README.md +2 -0
  303. data/lib/bioroebe/fasta_and_fastq/fasta_defline/fasta_defline.rb +113 -0
  304. data/lib/bioroebe/fasta_and_fastq/fasta_parser.rb +167 -0
  305. data/lib/bioroebe/fasta_and_fastq/fastq_format_explainer.rb +131 -0
  306. data/lib/bioroebe/fasta_and_fastq/length_modifier/length_modifier.rb +87 -0
  307. data/lib/bioroebe/fasta_and_fastq/parse_fasta/constants.rb +50 -0
  308. data/lib/bioroebe/fasta_and_fastq/parse_fasta/initialize.rb +86 -0
  309. data/lib/bioroebe/fasta_and_fastq/parse_fasta/menu.rb +117 -0
  310. data/lib/bioroebe/fasta_and_fastq/parse_fasta/misc.rb +981 -0
  311. data/lib/bioroebe/fasta_and_fastq/parse_fasta/parse_fasta.rb +27 -0
  312. data/lib/bioroebe/fasta_and_fastq/parse_fasta/report.rb +156 -0
  313. data/lib/bioroebe/fasta_and_fastq/parse_fasta/reset.rb +128 -0
  314. data/lib/bioroebe/fasta_and_fastq/parse_fasta/run.rb +20 -0
  315. data/lib/bioroebe/fasta_and_fastq/parse_fastq/parse_fastq.rb +83 -0
  316. data/lib/bioroebe/fasta_and_fastq/return_fasta_subsection_of_this_file.rb +112 -0
  317. data/lib/bioroebe/fasta_and_fastq/show_fasta_headers.rb +135 -0
  318. data/lib/bioroebe/fasta_and_fastq/show_fasta_statistics.rb +188 -0
  319. data/lib/bioroebe/fasta_and_fastq/simplify_fasta_header/simplify_fasta_header.rb +111 -0
  320. data/lib/bioroebe/fasta_and_fastq/split_this_fasta_file_into_chromosomes/constants.rb +26 -0
  321. data/lib/bioroebe/fasta_and_fastq/split_this_fasta_file_into_chromosomes/menu.rb +41 -0
  322. data/lib/bioroebe/fasta_and_fastq/split_this_fasta_file_into_chromosomes/misc.rb +23 -0
  323. data/lib/bioroebe/fasta_and_fastq/split_this_fasta_file_into_chromosomes/reset.rb +68 -0
  324. data/lib/bioroebe/fasta_and_fastq/split_this_fasta_file_into_chromosomes/split_this_fasta_file_into_chromosomes.rb +290 -0
  325. data/lib/bioroebe/genbank/README.md +1 -0
  326. data/lib/bioroebe/genbank/genbank_flat_file_format_generator.rb +275 -0
  327. data/lib/bioroebe/genbank/genbank_parser.rb +291 -0
  328. data/lib/bioroebe/gene/gene.rb +64 -0
  329. data/lib/bioroebe/genomes/genome_pattern.rb +165 -0
  330. data/lib/bioroebe/genomes/genome_retriever.rb +79 -0
  331. data/lib/bioroebe/gui/experimental/README.md +1 -0
  332. data/lib/bioroebe/gui/experimental/snapgene/snapgene.rb +147 -0
  333. data/lib/bioroebe/gui/gtk3/README.md +2 -0
  334. data/lib/bioroebe/gui/gtk3/alignment/alignment.rb +337 -0
  335. data/lib/bioroebe/gui/gtk3/aminoacid_composition/aminoacid_composition.rb +510 -0
  336. data/lib/bioroebe/gui/gtk3/aminoacid_composition/customized_dialog.rb +55 -0
  337. data/lib/bioroebe/gui/gtk3/anti_sense_strand/anti_sense_strand.config +6 -0
  338. data/lib/bioroebe/gui/gtk3/anti_sense_strand/anti_sense_strand.rb +29 -0
  339. data/lib/bioroebe/gui/gtk3/blosum_matrix_viewer/blosum_matrix_viewer.rb +196 -0
  340. data/lib/bioroebe/gui/gtk3/calculate_cell_numbers_of_bacteria/calculate_cell_numbers_of_bacteria.config +6 -0
  341. data/lib/bioroebe/gui/gtk3/calculate_cell_numbers_of_bacteria/calculate_cell_numbers_of_bacteria.rb +106 -0
  342. data/lib/bioroebe/gui/gtk3/controller/controller.rb +406 -0
  343. data/lib/bioroebe/gui/gtk3/dna_to_aminoacid_widget/dna_to_aminoacid_widget.rb +609 -0
  344. data/lib/bioroebe/gui/gtk3/dna_to_reverse_complement_widget/dna_to_reverse_complement_widget.config +6 -0
  345. data/lib/bioroebe/gui/gtk3/dna_to_reverse_complement_widget/dna_to_reverse_complement_widget.rb +189 -0
  346. data/lib/bioroebe/gui/gtk3/fasta_table_widget/fasta_table_widget.rb +245 -0
  347. data/lib/bioroebe/gui/gtk3/format_converter/format_converter.rb +346 -0
  348. data/lib/bioroebe/gui/gtk3/gene/gene.rb +182 -0
  349. data/lib/bioroebe/gui/gtk3/hamming_distance/hamming_distance.config +6 -0
  350. data/lib/bioroebe/gui/gtk3/hamming_distance/hamming_distance.rb +370 -0
  351. data/lib/bioroebe/gui/gtk3/levensthein_distance/levensthein_distance.config +6 -0
  352. data/lib/bioroebe/gui/gtk3/levensthein_distance/levensthein_distance.rb +175 -0
  353. data/lib/bioroebe/gui/gtk3/nucleotide_analyser/nucleotide_analyser.rb +428 -0
  354. data/lib/bioroebe/gui/gtk3/parse_pdb_file/parse_pdb_file.rb +342 -0
  355. data/lib/bioroebe/gui/gtk3/primer_design_widget/primer_design_widget.rb +580 -0
  356. data/lib/bioroebe/gui/gtk3/protein_to_DNA/protein_to_DNA.config +6 -0
  357. data/lib/bioroebe/gui/gtk3/protein_to_DNA/protein_to_DNA.rb +182 -0
  358. data/lib/bioroebe/gui/gtk3/random_sequence/random_sequence.rb +566 -0
  359. data/lib/bioroebe/gui/gtk3/restriction_enzymes/restriction_enzymes.config +6 -0
  360. data/lib/bioroebe/gui/gtk3/restriction_enzymes/restriction_enzymes.rb +329 -0
  361. data/lib/bioroebe/gui/gtk3/show_codon_table/misc.rb +556 -0
  362. data/lib/bioroebe/gui/gtk3/show_codon_table/show_codon_table.rb +171 -0
  363. data/lib/bioroebe/gui/gtk3/show_codon_usage/show_codon_usage.rb +146 -0
  364. data/lib/bioroebe/gui/gtk3/sizeseq/sizeseq.rb +207 -0
  365. data/lib/bioroebe/gui/gtk3/three_to_one/three_to_one.rb +279 -0
  366. data/lib/bioroebe/gui/gtk3/three_to_one/title.rb +23 -0
  367. data/lib/bioroebe/gui/gtk3/www_finder/www_finder.config +6 -0
  368. data/lib/bioroebe/gui/gtk3/www_finder/www_finder.rb +368 -0
  369. data/lib/bioroebe/gui/jruby/blosum_matrix_viewer/blosum_matrix_viewer.rb +82 -0
  370. data/lib/bioroebe/gui/libui/README.md +4 -0
  371. data/lib/bioroebe/gui/libui/alignment/alignment.rb +114 -0
  372. data/lib/bioroebe/gui/libui/blosum_matrix_viewer/blosum_matrix_viewer.rb +112 -0
  373. data/lib/bioroebe/gui/libui/calculate_cell_numbers_of_bacteria/calculate_cell_numbers_of_bacteria.rb +60 -0
  374. data/lib/bioroebe/gui/libui/dna_to_aminoacid_widget/dna_to_aminoacid_widget.rb +161 -0
  375. data/lib/bioroebe/gui/libui/dna_to_reverse_complement_widget/dna_to_reverse_complement_widget.rb +76 -0
  376. data/lib/bioroebe/gui/libui/hamming_distance/hamming_distance.rb +135 -0
  377. data/lib/bioroebe/gui/libui/levensthein_distance/levensthein_distance.rb +118 -0
  378. data/lib/bioroebe/gui/libui/protein_to_DNA/protein_to_DNA.rb +115 -0
  379. data/lib/bioroebe/gui/libui/random_sequence/random_sequence.rb +174 -0
  380. data/lib/bioroebe/gui/libui/show_codon_table/show_codon_table.rb +132 -0
  381. data/lib/bioroebe/gui/libui/show_codon_usage/show_codon_usage.rb +89 -0
  382. data/lib/bioroebe/gui/libui/three_to_one/three_to_one.rb +111 -0
  383. data/lib/bioroebe/gui/shared_code/blosum_matrix_viewer/blosum_matrix_viewer_module.rb +42 -0
  384. data/lib/bioroebe/gui/shared_code/calculate_cell_numbers_of_bacteria/calculate_cell_numbers_of_bacteria_module.rb +216 -0
  385. data/lib/bioroebe/gui/shared_code/dna_to_reverse_complement_widget/dna_to_reverse_complement_widget_module.rb +284 -0
  386. data/lib/bioroebe/gui/shared_code/levensthein_distance/levensthein_distance_module.rb +402 -0
  387. data/lib/bioroebe/gui/shared_code/protein_to_DNA/protein_to_DNA_module.rb +192 -0
  388. data/lib/bioroebe/gui/shared_code/show_codon_table/show_codon_table_module.rb +72 -0
  389. data/lib/bioroebe/gui/shared_code/show_codon_usage/show_codon_usage_module.rb +213 -0
  390. data/lib/bioroebe/gui/tk/aminoacid_composition/aminoacid_composition.rb +206 -0
  391. data/lib/bioroebe/gui/tk/blosum_matrix_viewer/blosum_matrix_viewer.rb +140 -0
  392. data/lib/bioroebe/gui/tk/hamming_distance/hamming_distance.rb +262 -0
  393. data/lib/bioroebe/gui/tk/levensthein_distance/levensthein_distance.rb +243 -0
  394. data/lib/bioroebe/gui/tk/three_to_one/three_to_one.rb +199 -0
  395. data/lib/bioroebe/gui/unified_widgets/anti_sense_strand/anti_sense_strand.rb +519 -0
  396. data/lib/bioroebe/images/BIOROEBE.png +0 -0
  397. data/lib/bioroebe/images/BIOROEBE_NEW_LOGO.png +0 -0
  398. data/lib/bioroebe/images/BlosumMatrixViewer.png +0 -0
  399. data/lib/bioroebe/images/DnaToAminoacidWidget.png +0 -0
  400. data/lib/bioroebe/images/PRINTED_AMINOACID_TABLE.png +0 -0
  401. data/lib/bioroebe/images/class_ConvertAminoacidToDNA.png +0 -0
  402. data/lib/bioroebe/images/class_SimpleStringComparer.png +0 -0
  403. data/lib/bioroebe/images/example_of_FASTA_coloured_output.png +0 -0
  404. data/lib/bioroebe/images/libui_hamming_distance_widget.png +0 -0
  405. data/lib/bioroebe/images/pretty_DNA_picture.png +0 -0
  406. data/lib/bioroebe/images/primer_design_widget.png +0 -0
  407. data/lib/bioroebe/images/restriction_enzyme_commandline_result.png +0 -0
  408. data/lib/bioroebe/images/ruby-gtk_three_to_one_widget.png +0 -0
  409. data/lib/bioroebe/images/small_DNA_logo.png +0 -0
  410. data/lib/bioroebe/images/small_drosophila_image.png +0 -0
  411. data/lib/bioroebe/java/README.md +6 -0
  412. data/lib/bioroebe/java/bioroebe/AllInOne.class +0 -0
  413. data/lib/bioroebe/java/bioroebe/AllInOne.java +214 -0
  414. data/lib/bioroebe/java/bioroebe/Base.class +0 -0
  415. data/lib/bioroebe/java/bioroebe/Base.java +102 -0
  416. data/lib/bioroebe/java/bioroebe/BisulfiteTreatment.class +0 -0
  417. data/lib/bioroebe/java/bioroebe/BisulfiteTreatment.java +23 -0
  418. data/lib/bioroebe/java/bioroebe/Cat.class +0 -0
  419. data/lib/bioroebe/java/bioroebe/Codons.class +0 -0
  420. data/lib/bioroebe/java/bioroebe/Codons.java +22 -0
  421. data/lib/bioroebe/java/bioroebe/Esystem.class +0 -0
  422. data/lib/bioroebe/java/bioroebe/Esystem.java +47 -0
  423. data/lib/bioroebe/java/bioroebe/GUI/BaseFrame.class +0 -0
  424. data/lib/bioroebe/java/bioroebe/GUI/BaseFrame.java +65 -0
  425. data/lib/bioroebe/java/bioroebe/GenerateRandomDnaSequence.class +0 -0
  426. data/lib/bioroebe/java/bioroebe/GenerateRandomDnaSequence.java +32 -0
  427. data/lib/bioroebe/java/bioroebe/IsPalindrome.class +0 -0
  428. data/lib/bioroebe/java/bioroebe/IsPalindrome.java +18 -0
  429. data/lib/bioroebe/java/bioroebe/PartnerNucleotide.class +0 -0
  430. data/lib/bioroebe/java/bioroebe/PartnerNucleotide.java +19 -0
  431. data/lib/bioroebe/java/bioroebe/README.md +4 -0
  432. data/lib/bioroebe/java/bioroebe/RemoveFile.class +0 -0
  433. data/lib/bioroebe/java/bioroebe/RemoveFile.java +24 -0
  434. data/lib/bioroebe/java/bioroebe/RemoveNumbers.class +0 -0
  435. data/lib/bioroebe/java/bioroebe/RemoveNumbers.java +14 -0
  436. data/lib/bioroebe/java/bioroebe/SanitizeNucleotideSequence.class +0 -0
  437. data/lib/bioroebe/java/bioroebe/SanitizeNucleotideSequence.java +20 -0
  438. data/lib/bioroebe/java/bioroebe/SaveFile.java +44 -0
  439. data/lib/bioroebe/java/bioroebe/Sequence.java +28 -0
  440. data/lib/bioroebe/java/bioroebe/ToCamelcase.class +0 -0
  441. data/lib/bioroebe/java/bioroebe/ToCamelcase.java +32 -0
  442. data/lib/bioroebe/java/bioroebe/ToplevelMethods.class +0 -0
  443. data/lib/bioroebe/java/bioroebe/ToplevelMethods.java +15 -0
  444. data/lib/bioroebe/java/bioroebe/enums/DNA.java +6 -0
  445. data/lib/bioroebe/java/bioroebe.jar +0 -0
  446. data/lib/bioroebe/matplotlib/matplotlib_generator.rb +104 -0
  447. data/lib/bioroebe/misc/quiz/README.md +6 -0
  448. data/lib/bioroebe/misc/quiz/three_letter_to_aminoacid.rb +163 -0
  449. data/lib/bioroebe/misc/ruler.rb +244 -0
  450. data/lib/bioroebe/misc/useful_formulas.rb +129 -0
  451. data/lib/bioroebe/ncbi/efetch.rb +253 -0
  452. data/lib/bioroebe/ncbi/ncbi.rb +93 -0
  453. data/lib/bioroebe/ngs/README.md +2 -0
  454. data/lib/bioroebe/ngs/phred_quality_score_table.rb +123 -0
  455. data/lib/bioroebe/nucleotides/complementary_dna_strand.rb +166 -0
  456. data/lib/bioroebe/nucleotides/molecular_weight_of_nucleotides.rb +135 -0
  457. data/lib/bioroebe/nucleotides/most_likely_nucleotide_sequence_for_this_aminoacid_sequence.rb +198 -0
  458. data/lib/bioroebe/nucleotides/sanitize_nucleotide_sequence.rb +133 -0
  459. data/lib/bioroebe/nucleotides/show_nucleotide_sequence.rb +556 -0
  460. data/lib/bioroebe/palindromes/palindrome_2D_structure.rb +139 -0
  461. data/lib/bioroebe/palindromes/palindrome_finder.rb +208 -0
  462. data/lib/bioroebe/palindromes/palindrome_generator.rb +272 -0
  463. data/lib/bioroebe/parsers/biolang_parser.rb +156 -0
  464. data/lib/bioroebe/parsers/blosum_parser.rb +222 -0
  465. data/lib/bioroebe/parsers/genbank_parser.rb +78 -0
  466. data/lib/bioroebe/parsers/gff.rb +346 -0
  467. data/lib/bioroebe/parsers/parse_embl.rb +76 -0
  468. data/lib/bioroebe/parsers/stride_parser.rb +117 -0
  469. data/lib/bioroebe/patterns/README.md +5 -0
  470. data/lib/bioroebe/patterns/analyse_glycosylation_pattern.rb +149 -0
  471. data/lib/bioroebe/patterns/is_this_sequence_a_EGF2_pattern.rb +66 -0
  472. data/lib/bioroebe/patterns/profile_pattern.rb +182 -0
  473. data/lib/bioroebe/patterns/rgg_scanner.rb +160 -0
  474. data/lib/bioroebe/patterns/scan_for_repeat.rb +157 -0
  475. data/lib/bioroebe/pdb/download_this_pdb.rb +67 -0
  476. data/lib/bioroebe/pdb/fetch_fasta_sequence_from_pdb.rb +164 -0
  477. data/lib/bioroebe/pdb/parse_mmCIF_file.rb +63 -0
  478. data/lib/bioroebe/pdb/parse_pdb_file.rb +1086 -0
  479. data/lib/bioroebe/pdb/report_secondary_structures_from_this_pdb_file.rb +225 -0
  480. data/lib/bioroebe/perl/README.md +7 -0
  481. data/lib/bioroebe/perl/local_to_global.pl +694 -0
  482. data/lib/bioroebe/project/project.rb +264 -0
  483. data/lib/bioroebe/protein_structure/alpha_helix.rb +96 -0
  484. data/lib/bioroebe/protein_structure/helical_wheel.rb +205 -0
  485. data/lib/bioroebe/raw_sequence/README.md +17 -0
  486. data/lib/bioroebe/raw_sequence/raw_sequence.rb +557 -0
  487. data/lib/bioroebe/readline/README.md +2 -0
  488. data/lib/bioroebe/readline/readline.rb +31 -0
  489. data/lib/bioroebe/regexes/README.md +2 -0
  490. data/lib/bioroebe/regexes/regexes.rb +34 -0
  491. data/lib/bioroebe/requires/commandline_application.rb +5 -0
  492. data/lib/bioroebe/requires/require_all_aminoacids_files.rb +28 -0
  493. data/lib/bioroebe/requires/require_all_calculate_files.rb +26 -0
  494. data/lib/bioroebe/requires/require_all_codon_files.rb +26 -0
  495. data/lib/bioroebe/requires/require_all_colour_scheme_files.rb +26 -0
  496. data/lib/bioroebe/requires/require_all_count_files.rb +26 -0
  497. data/lib/bioroebe/requires/require_all_dotplot_files.rb +28 -0
  498. data/lib/bioroebe/requires/require_all_electron_microscopy_files.rb +26 -0
  499. data/lib/bioroebe/requires/require_all_enzymes_files.rb +28 -0
  500. data/lib/bioroebe/requires/require_all_fasta_and_fastq_files.rb +32 -0
  501. data/lib/bioroebe/requires/require_all_nucleotides_files.rb +28 -0
  502. data/lib/bioroebe/requires/require_all_palindromes_files.rb +29 -0
  503. data/lib/bioroebe/requires/require_all_parser_files.rb +28 -0
  504. data/lib/bioroebe/requires/require_all_pattern_files.rb +29 -0
  505. data/lib/bioroebe/requires/require_all_pdb_files.rb +26 -0
  506. data/lib/bioroebe/requires/require_all_sequence_files.rb +26 -0
  507. data/lib/bioroebe/requires/require_all_string_matching_files.rb +28 -0
  508. data/lib/bioroebe/requires/require_all_svg_files.rb +12 -0
  509. data/lib/bioroebe/requires/require_all_taxonomy_files.rb +35 -0
  510. data/lib/bioroebe/requires/require_all_utility_scripts_files.rb +32 -0
  511. data/lib/bioroebe/requires/require_cleave_and_digest.rb +24 -0
  512. data/lib/bioroebe/requires/require_colours.rb +20 -0
  513. data/lib/bioroebe/requires/require_encoding.rb +7 -0
  514. data/lib/bioroebe/requires/require_sequence.rb +7 -0
  515. data/lib/bioroebe/requires/require_the_bioroebe_project.rb +162 -0
  516. data/lib/bioroebe/requires/require_the_bioroebe_shell.rb +7 -0
  517. data/lib/bioroebe/requires/require_the_bioroebe_sinatra_components.rb +7 -0
  518. data/lib/bioroebe/requires/require_the_constants.rb +23 -0
  519. data/lib/bioroebe/requires/require_the_toplevel_methods.rb +29 -0
  520. data/lib/bioroebe/requires/require_yaml.rb +94 -0
  521. data/lib/bioroebe/sequence/alignment.rb +214 -0
  522. data/lib/bioroebe/sequence/dna.rb +211 -0
  523. data/lib/bioroebe/sequence/nucleotide_module/nucleotide_module.rb +404 -0
  524. data/lib/bioroebe/sequence/protein.rb +281 -0
  525. data/lib/bioroebe/sequence/reverse_complement.rb +148 -0
  526. data/lib/bioroebe/sequence/sequence.rb +706 -0
  527. data/lib/bioroebe/shell/add.rb +108 -0
  528. data/lib/bioroebe/shell/assign.rb +360 -0
  529. data/lib/bioroebe/shell/chop_and_cut.rb +281 -0
  530. data/lib/bioroebe/shell/colours/colours.rb +235 -0
  531. data/lib/bioroebe/shell/configuration/additionally_set_xorg_buffer.yml +1 -0
  532. data/lib/bioroebe/shell/configuration/may_we_show_the_startup_information.yml +1 -0
  533. data/lib/bioroebe/shell/configuration/upcase_nucleotides.yml +1 -0
  534. data/lib/bioroebe/shell/configuration/use_silent_startup.yml +1 -0
  535. data/lib/bioroebe/shell/constants.rb +166 -0
  536. data/lib/bioroebe/shell/download.rb +335 -0
  537. data/lib/bioroebe/shell/enable_and_disable.rb +158 -0
  538. data/lib/bioroebe/shell/enzymes.rb +310 -0
  539. data/lib/bioroebe/shell/fasta.rb +345 -0
  540. data/lib/bioroebe/shell/gtk.rb +76 -0
  541. data/lib/bioroebe/shell/help/class.rb +443 -0
  542. data/lib/bioroebe/shell/help/help.rb +25 -0
  543. data/lib/bioroebe/shell/history.rb +132 -0
  544. data/lib/bioroebe/shell/initialize.rb +217 -0
  545. data/lib/bioroebe/shell/loop.rb +74 -0
  546. data/lib/bioroebe/shell/menu.rb +5320 -0
  547. data/lib/bioroebe/shell/misc.rb +4341 -0
  548. data/lib/bioroebe/shell/prompt.rb +107 -0
  549. data/lib/bioroebe/shell/random.rb +289 -0
  550. data/lib/bioroebe/shell/readline/readline.rb +91 -0
  551. data/lib/bioroebe/shell/reset.rb +335 -0
  552. data/lib/bioroebe/shell/scan_and_parse.rb +135 -0
  553. data/lib/bioroebe/shell/search.rb +337 -0
  554. data/lib/bioroebe/shell/sequences.rb +200 -0
  555. data/lib/bioroebe/shell/shell.rb +41 -0
  556. data/lib/bioroebe/shell/show_report_and_display.rb +2901 -0
  557. data/lib/bioroebe/shell/startup.rb +127 -0
  558. data/lib/bioroebe/shell/taxonomy.rb +14 -0
  559. data/lib/bioroebe/shell/tk.rb +23 -0
  560. data/lib/bioroebe/shell/user_input.rb +88 -0
  561. data/lib/bioroebe/shell/xorg.rb +45 -0
  562. data/lib/bioroebe/siRNA/README.md +2 -0
  563. data/lib/bioroebe/siRNA/siRNA.rb +93 -0
  564. data/lib/bioroebe/string_matching/README.md +13 -0
  565. data/lib/bioroebe/string_matching/find_longest_substring.rb +162 -0
  566. data/lib/bioroebe/string_matching/find_longest_substring_via_LCS_algorithm.rb +175 -0
  567. data/lib/bioroebe/string_matching/hamming_distance.rb +313 -0
  568. data/lib/bioroebe/string_matching/levensthein.rb +698 -0
  569. data/lib/bioroebe/string_matching/simple_string_comparer.rb +294 -0
  570. data/lib/bioroebe/string_matching/smith_waterman.rb +276 -0
  571. data/lib/bioroebe/svg/README.md +1 -0
  572. data/lib/bioroebe/svg/glyph.rb +719 -0
  573. data/lib/bioroebe/svg/mini_feature.rb +111 -0
  574. data/lib/bioroebe/svg/page.rb +570 -0
  575. data/lib/bioroebe/svg/primitive.rb +70 -0
  576. data/lib/bioroebe/svg/svgee.rb +326 -0
  577. data/lib/bioroebe/svg/track.rb +263 -0
  578. data/lib/bioroebe/taxonomy/README.md +1 -0
  579. data/lib/bioroebe/taxonomy/chart.rb +95 -0
  580. data/lib/bioroebe/taxonomy/class_methods.rb +181 -0
  581. data/lib/bioroebe/taxonomy/colours.rb +26 -0
  582. data/lib/bioroebe/taxonomy/constants.rb +218 -0
  583. data/lib/bioroebe/taxonomy/edit.rb +97 -0
  584. data/lib/bioroebe/taxonomy/help/help.rb +65 -0
  585. data/lib/bioroebe/taxonomy/help/helpline.rb +53 -0
  586. data/lib/bioroebe/taxonomy/info/check_available.rb +143 -0
  587. data/lib/bioroebe/taxonomy/info/info.rb +337 -0
  588. data/lib/bioroebe/taxonomy/info/is_dna.rb +150 -0
  589. data/lib/bioroebe/taxonomy/interactive.rb +1933 -0
  590. data/lib/bioroebe/taxonomy/menu.rb +905 -0
  591. data/lib/bioroebe/taxonomy/node.rb +118 -0
  592. data/lib/bioroebe/taxonomy/parse_fasta.rb +383 -0
  593. data/lib/bioroebe/taxonomy/shared.rb +287 -0
  594. data/lib/bioroebe/taxonomy/taxonomy.rb +521 -0
  595. data/lib/bioroebe/toplevel_methods/ad_hoc_task.rb +56 -0
  596. data/lib/bioroebe/toplevel_methods/aminoacids_and_proteins.rb +715 -0
  597. data/lib/bioroebe/toplevel_methods/atomic_composition.rb +198 -0
  598. data/lib/bioroebe/toplevel_methods/base_composition.rb +121 -0
  599. data/lib/bioroebe/toplevel_methods/blast.rb +153 -0
  600. data/lib/bioroebe/toplevel_methods/calculate_n50_value.rb +57 -0
  601. data/lib/bioroebe/toplevel_methods/cat.rb +71 -0
  602. data/lib/bioroebe/toplevel_methods/chunked_display.rb +92 -0
  603. data/lib/bioroebe/toplevel_methods/cliner.rb +81 -0
  604. data/lib/bioroebe/toplevel_methods/complement.rb +58 -0
  605. data/lib/bioroebe/toplevel_methods/convert_global_env.rb +39 -0
  606. data/lib/bioroebe/toplevel_methods/databases.rb +73 -0
  607. data/lib/bioroebe/toplevel_methods/delimiter.rb +19 -0
  608. data/lib/bioroebe/toplevel_methods/digest.rb +71 -0
  609. data/lib/bioroebe/toplevel_methods/download_and_fetch_data.rb +146 -0
  610. data/lib/bioroebe/toplevel_methods/e.rb +20 -0
  611. data/lib/bioroebe/toplevel_methods/editor.rb +21 -0
  612. data/lib/bioroebe/toplevel_methods/esystem.rb +22 -0
  613. data/lib/bioroebe/toplevel_methods/exponential_growth.rb +74 -0
  614. data/lib/bioroebe/toplevel_methods/extract.rb +56 -0
  615. data/lib/bioroebe/toplevel_methods/fasta_and_fastq.rb +353 -0
  616. data/lib/bioroebe/toplevel_methods/file_and_directory_related_actions.rb +257 -0
  617. data/lib/bioroebe/toplevel_methods/frequencies.rb +92 -0
  618. data/lib/bioroebe/toplevel_methods/hamming_distance.rb +60 -0
  619. data/lib/bioroebe/toplevel_methods/infer.rb +66 -0
  620. data/lib/bioroebe/toplevel_methods/is_on_roebe.rb +39 -0
  621. data/lib/bioroebe/toplevel_methods/leading_five_prime_and_trailing_three_prime.rb +101 -0
  622. data/lib/bioroebe/toplevel_methods/levensthein.rb +63 -0
  623. data/lib/bioroebe/toplevel_methods/log_directory.rb +109 -0
  624. data/lib/bioroebe/toplevel_methods/longest_common_substring.rb +55 -0
  625. data/lib/bioroebe/toplevel_methods/map_ncbi_entry_to_eutils_id.rb +88 -0
  626. data/lib/bioroebe/toplevel_methods/matches.rb +259 -0
  627. data/lib/bioroebe/toplevel_methods/misc.rb +432 -0
  628. data/lib/bioroebe/toplevel_methods/nucleotides.rb +715 -0
  629. data/lib/bioroebe/toplevel_methods/number_of_clones.rb +63 -0
  630. data/lib/bioroebe/toplevel_methods/open_in_browser.rb +77 -0
  631. data/lib/bioroebe/toplevel_methods/open_reading_frames.rb +236 -0
  632. data/lib/bioroebe/toplevel_methods/opn.rb +34 -0
  633. data/lib/bioroebe/toplevel_methods/palindromes.rb +127 -0
  634. data/lib/bioroebe/toplevel_methods/parse.rb +59 -0
  635. data/lib/bioroebe/toplevel_methods/phred_error_probability.rb +68 -0
  636. data/lib/bioroebe/toplevel_methods/rds.rb +24 -0
  637. data/lib/bioroebe/toplevel_methods/remove.rb +86 -0
  638. data/lib/bioroebe/toplevel_methods/return_source_code_of_this_method.rb +35 -0
  639. data/lib/bioroebe/toplevel_methods/return_subsequence_based_on_indices.rb +68 -0
  640. data/lib/bioroebe/toplevel_methods/rna_splicing.rb +73 -0
  641. data/lib/bioroebe/toplevel_methods/rnalfold.rb +69 -0
  642. data/lib/bioroebe/toplevel_methods/searching_and_finding.rb +116 -0
  643. data/lib/bioroebe/toplevel_methods/shuffleseq.rb +37 -0
  644. data/lib/bioroebe/toplevel_methods/statistics.rb +53 -0
  645. data/lib/bioroebe/toplevel_methods/sum_of_odd_integers.rb +62 -0
  646. data/lib/bioroebe/toplevel_methods/taxonomy.rb +187 -0
  647. data/lib/bioroebe/toplevel_methods/three_delimiter.rb +34 -0
  648. data/lib/bioroebe/toplevel_methods/time_and_date.rb +53 -0
  649. data/lib/bioroebe/toplevel_methods/to_camelcase.rb +26 -0
  650. data/lib/bioroebe/toplevel_methods/truncate.rb +48 -0
  651. data/lib/bioroebe/toplevel_methods/url.rb +36 -0
  652. data/lib/bioroebe/toplevel_methods/verbose.rb +59 -0
  653. data/lib/bioroebe/utility_scripts/align_open_reading_frames.rb +191 -0
  654. data/lib/bioroebe/utility_scripts/analyse_local_dataset.rb +119 -0
  655. data/lib/bioroebe/utility_scripts/check_for_mismatches/check_for_mismatches.rb +230 -0
  656. data/lib/bioroebe/utility_scripts/compacter.rb +131 -0
  657. data/lib/bioroebe/utility_scripts/compseq/compseq.rb +529 -0
  658. data/lib/bioroebe/utility_scripts/consensus_sequence.rb +374 -0
  659. data/lib/bioroebe/utility_scripts/create_batch_entrez_file.rb +130 -0
  660. data/lib/bioroebe/utility_scripts/determine_antigenic_areas.rb +115 -0
  661. data/lib/bioroebe/utility_scripts/determine_missing_nucleotides_percentage.rb +137 -0
  662. data/lib/bioroebe/utility_scripts/display_open_reading_frames/determine.rb +73 -0
  663. data/lib/bioroebe/utility_scripts/display_open_reading_frames/display_open_reading_frames.rb +31 -0
  664. data/lib/bioroebe/utility_scripts/display_open_reading_frames/initialize.rb +37 -0
  665. data/lib/bioroebe/utility_scripts/display_open_reading_frames/menu.rb +49 -0
  666. data/lib/bioroebe/utility_scripts/display_open_reading_frames/misc.rb +471 -0
  667. data/lib/bioroebe/utility_scripts/display_open_reading_frames/report.rb +113 -0
  668. data/lib/bioroebe/utility_scripts/display_open_reading_frames/reset.rb +56 -0
  669. data/lib/bioroebe/utility_scripts/dot_alignment.rb +177 -0
  670. data/lib/bioroebe/utility_scripts/download_files_from_rebase.rb +72 -0
  671. data/lib/bioroebe/utility_scripts/find_gene.rb +202 -0
  672. data/lib/bioroebe/utility_scripts/mirror_repeat.rb +235 -0
  673. data/lib/bioroebe/utility_scripts/move_file_to_its_correct_location.rb +151 -0
  674. data/lib/bioroebe/utility_scripts/parse_taxonomy.rb +168 -0
  675. data/lib/bioroebe/utility_scripts/pathways.rb +152 -0
  676. data/lib/bioroebe/utility_scripts/permutations.rb +145 -0
  677. data/lib/bioroebe/utility_scripts/punnet/punnet.rb +126 -0
  678. data/lib/bioroebe/utility_scripts/show_this_dna_sequence.rb +148 -0
  679. data/lib/bioroebe/utility_scripts/showorf/constants.rb +36 -0
  680. data/lib/bioroebe/utility_scripts/showorf/help.rb +33 -0
  681. data/lib/bioroebe/utility_scripts/showorf/initialize.rb +52 -0
  682. data/lib/bioroebe/utility_scripts/showorf/menu.rb +68 -0
  683. data/lib/bioroebe/utility_scripts/showorf/reset.rb +39 -0
  684. data/lib/bioroebe/utility_scripts/showorf/run.rb +152 -0
  685. data/lib/bioroebe/utility_scripts/showorf/show.rb +97 -0
  686. data/lib/bioroebe/utility_scripts/showorf/showorf.rb +488 -0
  687. data/lib/bioroebe/version/version.rb +44 -0
  688. data/lib/bioroebe/viennarna/README.md +3 -0
  689. data/lib/bioroebe/viennarna/rnafold_wrapper.rb +196 -0
  690. data/lib/bioroebe/with_gui.rb +18 -0
  691. data/lib/bioroebe/www/bioroebe.cgi +44 -0
  692. data/lib/bioroebe/www/embeddable_interface.rb +686 -0
  693. data/lib/bioroebe/www/sinatra/sinatra.rb +1013 -0
  694. data/lib/bioroebe/yaml/agarose/agarose_concentrations.yml +21 -0
  695. data/lib/bioroebe/yaml/aminoacids/amino_acids.yml +92 -0
  696. data/lib/bioroebe/yaml/aminoacids/amino_acids_abbreviations.yml +31 -0
  697. data/lib/bioroebe/yaml/aminoacids/amino_acids_average_mass_table.yml +33 -0
  698. data/lib/bioroebe/yaml/aminoacids/amino_acids_classification.yml +18 -0
  699. data/lib/bioroebe/yaml/aminoacids/amino_acids_english.yml +26 -0
  700. data/lib/bioroebe/yaml/aminoacids/amino_acids_frequency.yml +44 -0
  701. data/lib/bioroebe/yaml/aminoacids/amino_acids_long_name_to_one_letter.yml +61 -0
  702. data/lib/bioroebe/yaml/aminoacids/amino_acids_molecular_formula.yml +32 -0
  703. data/lib/bioroebe/yaml/aminoacids/amino_acids_monoisotopic_mass_table.yml +38 -0
  704. data/lib/bioroebe/yaml/aminoacids/amino_acids_reste.yml +35 -0
  705. data/lib/bioroebe/yaml/aminoacids/amino_acids_three_to_one.yml +34 -0
  706. data/lib/bioroebe/yaml/aminoacids/hydropathy_table.yml +44 -0
  707. data/lib/bioroebe/yaml/aminoacids/molecular_weight.yml +29 -0
  708. data/lib/bioroebe/yaml/aminoacids/simple_aminoacids.yml +66 -0
  709. data/lib/bioroebe/yaml/aminoacids/weight_of_common_proteins.yml +33 -0
  710. data/lib/bioroebe/yaml/antisense/antisense.yml +9 -0
  711. data/lib/bioroebe/yaml/base_composition_of_dna.yml +37 -0
  712. data/lib/bioroebe/yaml/blosum/blosum45.yml +36 -0
  713. data/lib/bioroebe/yaml/blosum/blosum50.yml +34 -0
  714. data/lib/bioroebe/yaml/blosum/blosum62.yml +35 -0
  715. data/lib/bioroebe/yaml/blosum/blosum80.yml +37 -0
  716. data/lib/bioroebe/yaml/blosum/blosum90.yml +36 -0
  717. data/lib/bioroebe/yaml/blosum/blosum_matrix.yml +200 -0
  718. data/lib/bioroebe/yaml/chromosomes/chromosome_numbers.yml +30 -0
  719. data/lib/bioroebe/yaml/codon_tables/1.yml +113 -0
  720. data/lib/bioroebe/yaml/codon_tables/10.yml +89 -0
  721. data/lib/bioroebe/yaml/codon_tables/11.yml +91 -0
  722. data/lib/bioroebe/yaml/codon_tables/12.yml +89 -0
  723. data/lib/bioroebe/yaml/codon_tables/13.yml +89 -0
  724. data/lib/bioroebe/yaml/codon_tables/14.yml +89 -0
  725. data/lib/bioroebe/yaml/codon_tables/15.yml +94 -0
  726. data/lib/bioroebe/yaml/codon_tables/16.yml +89 -0
  727. data/lib/bioroebe/yaml/codon_tables/2.yml +86 -0
  728. data/lib/bioroebe/yaml/codon_tables/21.yml +89 -0
  729. data/lib/bioroebe/yaml/codon_tables/22.yml +89 -0
  730. data/lib/bioroebe/yaml/codon_tables/23.yml +91 -0
  731. data/lib/bioroebe/yaml/codon_tables/24.yml +89 -0
  732. data/lib/bioroebe/yaml/codon_tables/25.yml +89 -0
  733. data/lib/bioroebe/yaml/codon_tables/26.yml +96 -0
  734. data/lib/bioroebe/yaml/codon_tables/27.yml +104 -0
  735. data/lib/bioroebe/yaml/codon_tables/28.yml +97 -0
  736. data/lib/bioroebe/yaml/codon_tables/29.yml +93 -0
  737. data/lib/bioroebe/yaml/codon_tables/3.yml +98 -0
  738. data/lib/bioroebe/yaml/codon_tables/30.yml +91 -0
  739. data/lib/bioroebe/yaml/codon_tables/31.yml +94 -0
  740. data/lib/bioroebe/yaml/codon_tables/33.yml +101 -0
  741. data/lib/bioroebe/yaml/codon_tables/4.yml +96 -0
  742. data/lib/bioroebe/yaml/codon_tables/5.yml +100 -0
  743. data/lib/bioroebe/yaml/codon_tables/6.yml +96 -0
  744. data/lib/bioroebe/yaml/codon_tables/9.yml +97 -0
  745. data/lib/bioroebe/yaml/codon_tables/overview.yml +42 -0
  746. data/lib/bioroebe/yaml/configuration/README.md +12 -0
  747. data/lib/bioroebe/yaml/configuration/browser.yml +1 -0
  748. data/lib/bioroebe/yaml/configuration/colourize_fasta_sequences.yml +14 -0
  749. data/lib/bioroebe/yaml/configuration/default_colours_for_the_aminoacids.yml +28 -0
  750. data/lib/bioroebe/yaml/configuration/temp_dir.yml +1 -0
  751. data/lib/bioroebe/yaml/configuration/try_to_use_matplotlib.yml +1 -0
  752. data/lib/bioroebe/yaml/configuration/use_opn.yml +1 -0
  753. data/lib/bioroebe/yaml/configuration/use_this_database.yml +1 -0
  754. data/lib/bioroebe/yaml/create_these_directories_on_startup/create_these_directories_on_startup.yml +9 -0
  755. data/lib/bioroebe/yaml/default_dna_input.yml +3 -0
  756. data/lib/bioroebe/yaml/enzymes/enzyme_classes.yml +15 -0
  757. data/lib/bioroebe/yaml/enzymes/pH-Optima.yml +11 -0
  758. data/lib/bioroebe/yaml/fasta_and_fastq/fastq_quality_schemes.yml +44 -0
  759. data/lib/bioroebe/yaml/genomes/README.md +16 -0
  760. data/lib/bioroebe/yaml/humans/README.md +2 -0
  761. data/lib/bioroebe/yaml/humans/human_chromosomes.yml +53 -0
  762. data/lib/bioroebe/yaml/laboratory/README.md +1 -0
  763. data/lib/bioroebe/yaml/laboratory/pipettes.yml +8 -0
  764. data/lib/bioroebe/yaml/mRNA/mRNA.yml +16 -0
  765. data/lib/bioroebe/yaml/nuclear_localization_sequences.yml +15 -0
  766. data/lib/bioroebe/yaml/nucleotides/abbreviations_for_nucleotides.yml +29 -0
  767. data/lib/bioroebe/yaml/nucleotides/nucleotide_density.yml +10 -0
  768. data/lib/bioroebe/yaml/nucleotides/nucleotides.yml +34 -0
  769. data/lib/bioroebe/yaml/nucleotides/nucleotides_weight.yml +12 -0
  770. data/lib/bioroebe/yaml/pathways/README.md +2 -0
  771. data/lib/bioroebe/yaml/pathways/citric_acid_cycle.yml +16 -0
  772. data/lib/bioroebe/yaml/pathways/glycolysis.yml +20 -0
  773. data/lib/bioroebe/yaml/pathways/shikimate_pathway.yml +23 -0
  774. data/lib/bioroebe/yaml/pathways/urea_cycle.yml +11 -0
  775. data/lib/bioroebe/yaml/primers/README.md +4 -0
  776. data/lib/bioroebe/yaml/primers/primers.yml +3 -0
  777. data/lib/bioroebe/yaml/promoters/35S.yml +15 -0
  778. data/lib/bioroebe/yaml/promoters/strong_promoters.yml +24 -0
  779. data/lib/bioroebe/yaml/proteases/proteases.yml +3 -0
  780. data/lib/bioroebe/yaml/proteins/ubiquitin.yml +4 -0
  781. data/lib/bioroebe/yaml/remote_urls/README.md +4 -0
  782. data/lib/bioroebe/yaml/remote_urls/remote_urls.yml +3 -0
  783. data/lib/bioroebe/yaml/restriction_enzymes/restriction_enzymes.yml +630 -0
  784. data/lib/bioroebe/yaml/sequences/JX472995_Green_fluorescent_protein_from_Aequorea_victoria.fasta +14 -0
  785. data/lib/bioroebe/yaml/sequences/README.md +2 -0
  786. data/lib/bioroebe/yaml/talens.yml +22 -0
  787. data/lib/bioroebe/yaml/viruses/ecoli_phages.yml +63 -0
  788. data/lib/bioroebe/yaml/viruses/viruses.yml +6 -0
  789. data/lib/bioroebe.rb +5 -0
  790. data/spec/testing_toplevel_method_editor.rb +20 -0
  791. data/spec/testing_toplevel_method_url.rb +15 -0
  792. data/spec/testing_toplevel_method_verbose.rb +13 -0
  793. data/test/advanced_svg_example.rb +307 -0
  794. data/test/testing_bioroebe.rb +25 -0
  795. data/test/testing_codons.rb +45 -0
  796. data/test/testing_dna_to_rna_conversion.rb +15 -0
  797. data/test/testing_parse_pdb_file.rb +23 -0
  798. data/test/testing_reverse_complement.rb +32 -0
  799. data/test/testing_svg_component_of_bioroebe.rb +311 -0
  800. data/test/testing_svg_component_of_bioroebe_from_json_dataset.rb +34 -0
  801. data/test/testing_taxonomy.rb +22 -0
  802. metadata +1059 -0
@@ -0,0 +1,981 @@
1
+ #!/usr/bin/ruby -w
2
+ # Encoding: UTF-8
3
+ # frozen_string_literal: true
4
+ # =========================================================================== #
5
+ # === Bioroebe::ParseFasta
6
+ #
7
+ # This class will parse through a local FASTA file and find the
8
+ # proper entries.
9
+ #
10
+ # A FASTA file may have nucleotides or an aminoacid-sequence, so
11
+ # we have to keep this in mind when parsing it.
12
+ #
13
+ # Usage examples:
14
+ #
15
+ # Bioroebe::ParseFasta.new(ARGV)
16
+ # Bioroebe.parse_fasta(ARGV)
17
+ #
18
+ # =========================================================================== #
19
+ # require 'bioroebe/fasta_and_fastq/parse_fasta/misc.rb'
20
+ # Bioroebe.parse_fasta
21
+ # Bioroebe.sizeseq
22
+ # =========================================================================== #
23
+ require 'bioroebe/base/commandline_application/commandline_application.rb'
24
+ require 'bioroebe/fasta_and_fastq/parse_fasta/menu.rb'
25
+ require 'bioroebe/fasta_and_fastq/parse_fasta/report.rb'
26
+
27
+ module Bioroebe
28
+
29
+ class ParseFasta < ::Bioroebe::CommandlineApplication # === Bioroebe::ParseFasta
30
+
31
+ require 'bioroebe/calculate/calculate_gc_content.rb'
32
+
33
+ # ========================================================================= #
34
+ # === show_help (help tag)
35
+ #
36
+ # This method will inform the user how this class may be used from the
37
+ # commandline.
38
+ #
39
+ # Invocation example:
40
+ #
41
+ # pfasta --help
42
+ #
43
+ # ========================================================================= #
44
+ def show_help
45
+ e
46
+ eparse ' --size'
47
+ eparse ' --also-show-the-sequence'
48
+ eparse ' --header # show the header as well (normally the '\
49
+ 'header is not shown)'
50
+ eparse ' --limit=1000 # limit to show only the first 1000 '\
51
+ 'nucleotides; use'
52
+ eparse ' # any number that you need here'
53
+ eparse ' --one-line # show the sequence on one line only, '\
54
+ 'e. g. all newlines'
55
+ eparse ' # were removed'
56
+ eparse ' --toprotein # show the protein sequence as well '\
57
+ '(assumes DNA or RNA'
58
+ eparse ' # .fasta file)'
59
+ e
60
+ end
61
+
62
+ # ========================================================================= #
63
+ # === show_the_translated_protein_sequence?
64
+ # ========================================================================= #
65
+ def show_the_translated_protein_sequence?
66
+ @internal_hash[:show_the_translated_protein_sequence]
67
+ end
68
+
69
+ # ========================================================================= #
70
+ # === set_round_to
71
+ #
72
+ # This will set to how many decimal numbers we will round to. This is
73
+ # mostly done for display-purposes, hence why the default is a fairly
74
+ # low value.
75
+ # ========================================================================= #
76
+ def set_round_to(
77
+ i = :default
78
+ )
79
+ case i
80
+ # ======================================================================= #
81
+ # === :default
82
+ #
83
+ # Since as of April 2021, the new default is 2, for rounding.
84
+ # ======================================================================= #
85
+ when :default
86
+ i = DEFAULT_ROUND_TO
87
+ end
88
+ @round_to = i.to_i
89
+ end
90
+
91
+ # ========================================================================= #
92
+ # === do_process_the_commandline_arguments_that_are_files
93
+ # ========================================================================= #
94
+ def do_process_the_commandline_arguments_that_are_files(
95
+ these_files = commandline_arguments_that_are_files?
96
+ )
97
+ unless these_files.is_a? Array
98
+ these_files = [these_files].flatten.compact
99
+ end
100
+ these_files.each {|this_file|
101
+ set_input_file(this_file)
102
+ set_data # This will use the default file.
103
+ split_into_proper_sections
104
+ report_the_FASTA_header if @show_the_header
105
+ if @sort_by_size
106
+ run_sizeseq_comparison
107
+ else
108
+ # =================================================================== #
109
+ # === Handle cases where the input is a protein
110
+ # =================================================================== #
111
+ if is_the_sequence_a_polypeptide?
112
+ if be_verbose?
113
+ erev "This sequence is assumed to be a #{royalblue('protein')}#{rev}."
114
+ report_how_many_elements_we_have_found
115
+ end
116
+ else # Must be a protein.
117
+ # =================================================================== #
118
+ # === Else it must be RNA or DNA
119
+ # =================================================================== #
120
+ if be_verbose?
121
+ erev "This sequence is assumed to "\
122
+ "be #{royalblue('DNA')}#{rev} or #{royalblue('RNA')}#{rev}."
123
+ end
124
+ calculate_gc_content # GC content makes only sense for nucleotides.
125
+ report_how_many_elements_we_have_found if be_verbose?
126
+ end
127
+ if be_verbose?
128
+ report_the_nucleotide_composition
129
+ report_on_how_many_entries_we_did_work
130
+ if report_the_sequence?
131
+ do_report_the_sequence
132
+ end
133
+ end
134
+ end
135
+ }
136
+ end
137
+
138
+ # ========================================================================= #
139
+ # === sanitize_the_description
140
+ #
141
+ # This method will iterate over the description entry and sanitize
142
+ # it. In this context sanitizing means to add the "length" entry,
143
+ # and the "type" entry, such as in:
144
+ #
145
+ # " # length=231; type=dna"
146
+ #
147
+ # ========================================================================= #
148
+ def sanitize_the_description
149
+ @data.map! {|line|
150
+ if line.start_with?('>') and !line.include?('length=')
151
+ length = 0
152
+ if @hash.has_key? line.delete('>')
153
+ length = @hash[line.delete('>')].size
154
+ end
155
+ line << " # length=#{length}; type=dna" # Currently hardcoded to DNA.
156
+ end
157
+ line
158
+ }
159
+ end
160
+
161
+ # ========================================================================= #
162
+ # === entries?
163
+ # ========================================================================= #
164
+ def entries?
165
+ @data
166
+ end
167
+
168
+ # ========================================================================= #
169
+ # === we_may_exit
170
+ # ========================================================================= #
171
+ def we_may_exit
172
+ @may_we_exit = true
173
+ end
174
+
175
+ # ========================================================================= #
176
+ # === output_results
177
+ # ========================================================================= #
178
+ def output_results
179
+ pp @hash
180
+ end
181
+
182
+ # ========================================================================= #
183
+ # === sanitize_data
184
+ # ========================================================================= #
185
+ def sanitize_data(i)
186
+ if i.is_a? Array
187
+ i.flatten!
188
+ i.reject! {|entry| entry.start_with? '#' }
189
+ i.reject! {|entry| entry.strip.empty? }
190
+ if i.first and i.first.include? "\r"
191
+ # =================================================================== #
192
+ # Some FASTA files include "\r" line endings. We will check first
193
+ # for the first entry to contain a \r, and if so, we assume the
194
+ # whole FASTA file may have \r, which then will be removed.
195
+ # =================================================================== #
196
+ i.map! {|entry| entry.delete("\r") }
197
+ end
198
+ end
199
+ # ========================================================================= #
200
+ # === Run through SanitizeNucleotideSequence
201
+ # ========================================================================= #
202
+ if @internal_hash[:remove_numbers_from_input]
203
+ i = Bioroebe::SanitizeNucleotideSequence[i]
204
+ end
205
+ i
206
+ end
207
+
208
+ # ========================================================================= #
209
+ # === current_key?
210
+ # ========================================================================= #
211
+ def current_key?
212
+ @current_key
213
+ end; alias id? current_key? # === id?
214
+ alias sequence_id? current_key? # === sequence_id?
215
+ alias title current_key? # === title
216
+ alias title? current_key? # === title?
217
+
218
+ # ========================================================================= #
219
+ # === opnn
220
+ # ========================================================================= #
221
+ def opnn
222
+ super(NAMESPACE) if use_opn?
223
+ end
224
+
225
+ # ========================================================================= #
226
+ # === use_opn?
227
+ # ========================================================================= #
228
+ def use_opn?
229
+ @use_opn
230
+ end
231
+
232
+ # ========================================================================= #
233
+ # === calculate_gc_content
234
+ #
235
+ # Calculate the gc content through this method, which is called from
236
+ # within the method run().
237
+ # ========================================================================= #
238
+ def calculate_gc_content
239
+ _ = @hash.values.join.delete(N)
240
+ if is_polynucleotide? _
241
+ @hash.each_pair {|key, content|
242
+ # =================================================================== #
243
+ # Delegate towards the method Bioroebe.gc_content next, including
244
+ # to round towards 5 positions:
245
+ # =================================================================== #
246
+ gc_content = ::Bioroebe.gc_content(content.upcase, @round_to)
247
+ gc_content = gc_content.first if gc_content.is_a? Array
248
+ gc_content = gc_content.to_s
249
+ minimal_key = key.to_s
250
+ if minimal_key.include? '|'
251
+ minimal_key = minimal_key.split('|').last.strip
252
+ end
253
+ if be_verbose?
254
+ _ = minimal_key.strip
255
+ if _.size > 40 # Shorten the content a bit if it is too long.
256
+ _ = _[0 .. 40]+' [...]'
257
+ end
258
+ erev 'GC content of "'+simp(_)+rev+'" is: '+
259
+ "#{sfancy(gc_content)}#{rev} %"
260
+ end
261
+ }
262
+ else
263
+ erev '`'+simp(_)+rev+'` is not a polynucleotide.' if be_verbose?
264
+ end
265
+ end
266
+
267
+ # ========================================================================= #
268
+ # === first_value
269
+ #
270
+ # This will return the first entry of the Fasta files.
271
+ # ========================================================================= #
272
+ def first_value
273
+ sequences?.first
274
+ end
275
+
276
+ # ========================================================================= #
277
+ # === nucleotides_or_aminoacids?
278
+ # ========================================================================= #
279
+ def nucleotides_or_aminoacids?
280
+ if is_polynucleotide?
281
+ 'nucleotides'
282
+ else
283
+ 'aminoacids'
284
+ end
285
+ end
286
+
287
+ # ========================================================================= #
288
+ # === is_polynucleotide?
289
+ # ========================================================================= #
290
+ def is_polynucleotide?(i = main_sequence?)
291
+ !is_protein?(i)
292
+ end; alias is_a_nucleotide? is_polynucleotide? # === is_a_nucleotide?
293
+
294
+ # ========================================================================= #
295
+ # === is_this_sequence_a_polynucleotide_sequence?
296
+ # ========================================================================= #
297
+ def is_this_sequence_a_polynucleotide_sequence?
298
+ !is_protein?
299
+ end
300
+
301
+ # ========================================================================= #
302
+ # === data?
303
+ #
304
+ # This will contain the full content of the (whole) .fasta file, including
305
+ # the header.
306
+ # ========================================================================= #
307
+ def data?
308
+ @data
309
+ end; alias input? data? # === input?
310
+ alias dataset? data? # === dataset?
311
+
312
+ # ========================================================================= #
313
+ # === hash?
314
+ # ========================================================================= #
315
+ def hash?
316
+ @hash
317
+ end
318
+
319
+ # ========================================================================= #
320
+ # === sequences?
321
+ #
322
+ # This method will obtain all found sequences.
323
+ # ========================================================================= #
324
+ def sequences?
325
+ @hash.values
326
+ end; alias sequences sequences? # === sequences
327
+ alias values sequences? # === values
328
+
329
+ # ========================================================================= #
330
+ # === short_headers?
331
+ #
332
+ # The short-headers are like the headers, but if a ' ' token is found
333
+ # then the line will be truncated towards that first ' '.
334
+ #
335
+ # An example is:
336
+ #
337
+ # sp|Q91FT8|234R_IIV6 Uncharacterized protein 234R OS=Invertebrate iridescent virus 6 OX=176652 GN=IIV6-234R PE=4 SV=1
338
+ #
339
+ # This will be truncated towards
340
+ #
341
+ # sp|Q91FT8|234R_IIV6
342
+ #
343
+ # This could then be used to automatically rename FASTA files, for
344
+ # instance.
345
+ # ========================================================================= #
346
+ def short_headers?
347
+ headers?.map {|entry|
348
+ if entry.include? ' '
349
+ entry = entry.split(' ').first
350
+ end
351
+ entry
352
+ }
353
+ end
354
+
355
+ # ========================================================================= #
356
+ # === set_data
357
+ #
358
+ # This is the setter-method towards @data. It is no longer allowed to
359
+ # invoke set_input_file() since as of 12.06.2020. This means that
360
+ # you have to invoke that method prior to calling this method.
361
+ # ========================================================================= #
362
+ def set_data(i = @input_file)
363
+ # ======================================================================= #
364
+ # The next line attempts to ensure that even an Array can be used
365
+ # as input to that method.
366
+ # ======================================================================= #
367
+ i = [i].flatten.compact.first.to_s.dup
368
+ if File.exist? i.to_s # First try to read in from a file.
369
+ if be_verbose?
370
+ opnn; erev "Will read from the file `#{sfile(i)}#{rev}`."
371
+ end
372
+ i = File.readlines(i)
373
+ if @is_a_genbank_file
374
+ selected = i.select {|line|
375
+ line.start_with?(' ') and # such as: " 61 atggggcctg caatggggcc tgcaatgggg cctgca\n"
376
+ (line.strip =~ /\d+/)
377
+ }.map {|inner_line|
378
+ inner_line.strip.delete(' 0123456789').strip.upcase
379
+ }
380
+ i = ["> genbank file"]+selected
381
+ end
382
+ end
383
+ if i.nil? or i.empty?
384
+ i = DEFAULT_FASTA
385
+ opnn; erev 'No input was provided. Thus a default FASTA '\
386
+ 'sequence will be used instead.'
387
+ end
388
+ i = sanitize_data(i)
389
+ i = i.split(N) if i.is_a? String
390
+ @data = i
391
+ end; alias set_sequence set_data # === set_Sequence
392
+
393
+ # ========================================================================= #
394
+ # === set_be_verbose_and_report_the_sequence
395
+ # ========================================================================= #
396
+ def set_be_verbose_and_report_the_sequence
397
+ set_be_verbose
398
+ @internal_hash[:report_the_sequence] = true
399
+ end
400
+
401
+ # ========================================================================= #
402
+ # === condense_the_sequence_onto_a_single_line?
403
+ # ========================================================================= #
404
+ def condense_the_sequence_onto_a_single_line?
405
+ @internal_hash[:condense_the_sequence_onto_a_single_line]
406
+ end
407
+
408
+ # ========================================================================= #
409
+ # === return_size_sorted_hash
410
+ # ========================================================================= #
411
+ def return_size_sorted_hash(i = @hash)
412
+ _ = i.sort_by {|key, value| value.size }
413
+ i = Hash[_]
414
+ return i
415
+ end
416
+
417
+ # ========================================================================= #
418
+ # === do_sort_by_size
419
+ #
420
+ # This method will sort the hash by size of the sequence. It has been
421
+ # inspired by the EMBOSS sizeq functionality.
422
+ #
423
+ # The output that should be generated might look like this:
424
+ #
425
+ # https://www.bioinformatics.nl/cgi-bin/emboss/help/sizeseq#input.1
426
+ #
427
+ # Invocation example:
428
+ #
429
+ # x = Bioroebe::ParseFasta.new('/Depot/j/globins.fasta'); x.do_sort_by_size
430
+ #
431
+ # ========================================================================= #
432
+ def do_sort_by_size
433
+ # ======================================================================= #
434
+ # Sort it here first, by the size of the "value", aka the sequence body.
435
+ # ======================================================================= #
436
+ @hash = return_size_sorted_hash(@hash)
437
+ _ = ''.dup
438
+ @hash.each_pair {|key, sequence|
439
+ _ << '> ID '+sequence.size.to_s+' AA.; DE: '+key.to_s+
440
+ ' SQ '+sequence.size.to_s+' AA'+N # ; unknown MW as of yet; '\
441
+ #'unknown CRC64 as of yet'+N
442
+ _ << sequence+N+N
443
+ }
444
+ e _
445
+ end; alias run_sizeseq_comparison do_sort_by_size # === run_sizeseq_comparison
446
+
447
+ # ========================================================================= #
448
+ # === n_nucleotides?
449
+ # ========================================================================= #
450
+ def n_nucleotides?
451
+ @hash.values.first.delete("\n").size
452
+ end; alias return_n_aminoacids n_nucleotides? # === return_n_aminoacids
453
+ alias size? n_nucleotides? # === size?
454
+ alias sequence_size? n_nucleotides? # === sequence_size?
455
+
456
+ # ========================================================================= #
457
+ # === headers?
458
+ # ========================================================================= #
459
+ def headers?
460
+ @hash.keys
461
+ end
462
+
463
+ # ========================================================================= #
464
+ # === first_key?
465
+ #
466
+ # Obtain the very first entry.
467
+ # ========================================================================= #
468
+ def first_key?
469
+ headers?.first
470
+ end
471
+
472
+ # ========================================================================= #
473
+ # === header?
474
+ #
475
+ # This variant will always return the first entry.
476
+ # ========================================================================= #
477
+ def header?
478
+ headers?.first.to_s
479
+ end
480
+
481
+ # ========================================================================= #
482
+ # === raw_body?
483
+ # ========================================================================= #
484
+ def raw_body?
485
+ @hash.values.first
486
+ end
487
+
488
+ # ========================================================================= #
489
+ # === do_show_the_header
490
+ # ========================================================================= #
491
+ def do_show_the_header
492
+ @show_the_header = true
493
+ end
494
+
495
+ # ========================================================================= #
496
+ # === set_input_file
497
+ #
498
+ # This method will be used to keep track of the input-file, from
499
+ # which we will read the dataset.
500
+ # ========================================================================= #
501
+ def set_input_file(i = nil)
502
+ if i.nil?
503
+ # ===================================================================== #
504
+ # First, we try to find a .fasta or .fa file in the current
505
+ # directory. If we can find it, we will use that instead.
506
+ # ===================================================================== #
507
+ unless Dir['*.{fa,fasta}'].empty?
508
+ file = Dir['*.{fa,fasta}'].first
509
+ if be_verbose?
510
+ result = 'A '
511
+ if file.end_with? '.fasta'
512
+ result < 'FASTA '
513
+ end
514
+ result << 'file was found in this directory ('+sfile(file)+').'
515
+ opnn; erev result
516
+ opnn; erev 'We will use it.'
517
+ end
518
+ i = file
519
+ end
520
+ unless Dir['*.{fa,fasta}'].empty?
521
+ file = Dir['*.{fa,fasta}'].first
522
+ if be_verbose?
523
+ opnn; erev "We have found a file in this "\
524
+ "directory (#{sfile(file)}#{rev})."
525
+ opnn; erev 'We will use it.'
526
+ end
527
+ i = file
528
+ end
529
+ end
530
+ if i and File.exist?(i)
531
+ dataset = File.read(i)
532
+ if dataset[0 .. ('LOCUS'.size - 1)] == 'LOCUS'
533
+ @is_a_genbank_file = true
534
+ end
535
+ end
536
+ @input_file = i
537
+ end; alias set_input_files set_input_file # === set_input_files
538
+
539
+ # ========================================================================= #
540
+ # === save_the_file?
541
+ # ========================================================================= #
542
+ def save_the_file?
543
+ @internal_hash[:save_the_file]
544
+ end
545
+
546
+ # ========================================================================= #
547
+ # === overwrite_the_original_file?
548
+ # ========================================================================= #
549
+ def overwrite_the_original_file?
550
+ @internal_hash[:overwrite_the_original_file]
551
+ end
552
+
553
+ # ========================================================================= #
554
+ # === split_into_proper_sections
555
+ #
556
+ # Split up into the fasta identifier, and the content.
557
+ # ========================================================================= #
558
+ def split_into_proper_sections
559
+ unless @data.to_s.include? '>'
560
+ erev 'No ">" character was found in this dataset.'
561
+ erev 'It is recommended to always have a > identifier '\
562
+ 'for the'
563
+ erev 'FASTA format (such as in a .fasta or a .fa file).'
564
+ end if be_verbose? # Ok, the input data includes >. We can proceed.
565
+ @data.each { |line|
566
+ # ===================================================================== #
567
+ # === Handle the leading > FASTA identifier first
568
+ # ===================================================================== #
569
+ if line.start_with? '>' # leading identifier.
570
+ @current_key = line[1..-1].chomp # Select all but the first character.
571
+ @hash[@current_key] = ''.dup
572
+ else
573
+ line.delete!('_')
574
+ unless @current_key
575
+ @current_key = 'standard'
576
+ @hash[@current_key] = ''.dup
577
+ end
578
+ # =================================================================== #
579
+ # === Retain the newlines
580
+ #
581
+ # Here we may decide to get rid of newlines, but it is better to
582
+ # NOT remove the newlines - that way we can simply save the
583
+ # dataset again.
584
+ # @hash[@current_key] << no_newlines(line)
585
+ # =================================================================== #
586
+ @hash[@current_key] << line
587
+ end
588
+ }
589
+ end
590
+
591
+ # ========================================================================= #
592
+ # === save_into_a_fasta_file
593
+ # ========================================================================= #
594
+ def save_into_a_fasta_file(
595
+ be_verbose = be_verbose?
596
+ )
597
+ case be_verbose
598
+ when :be_verbose
599
+ be_verbose = true
600
+ end
601
+ if @data
602
+ what = @data.join("\n")
603
+ into = 'standard.fasta'
604
+ erev 'Saving into '+sfile(into)+rev+'.' if be_verbose
605
+ write_what_into(what, into)
606
+ return File.absolute_path(into) # And return the file we saved into.
607
+ else
608
+ opnn; erev 'No @data variable exists.'
609
+ end
610
+ end; alias do_save_the_file save_into_a_fasta_file # === do_save_the_file
611
+
612
+ # ========================================================================= #
613
+ # === add_length_information_to_the_header
614
+ # ========================================================================= #
615
+ def add_length_information_to_the_header
616
+ _ = header?.strip
617
+ _ << ' length='+sequence_size?.to_s+';'
618
+ # ======================================================================= #
619
+ # Next, designate where to store this file.
620
+ # ======================================================================= #
621
+ into = 'new_fasta_file.fasta'
622
+ if overwrite_the_original_file?
623
+ into = @input_file
624
+ end
625
+ what = ''.dup
626
+ what << "> "+_+"\n"
627
+ what << raw_body?
628
+ if what and into
629
+ erev 'Storing into `'+sfile(into)+rev+'`.'
630
+ write_what_into(what, into)
631
+ end
632
+ end
633
+
634
+ # ========================================================================= #
635
+ # === simplify_header
636
+ #
637
+ # This method can be called to simplify the header. It will save into
638
+ # a .fasta file at once.
639
+ # ========================================================================= #
640
+ def simplify_header
641
+ _ = header?
642
+ # ======================================================================= #
643
+ # Next, simplify the header. We must start with checking for [] first,
644
+ # because if there are any [] in the FASTA header then we can simplify
645
+ # stuff at once.
646
+ # ======================================================================= #
647
+ if _.include?('[') and _.include?(']')
648
+ _ = '> '+_.strip.scan(/\[.+\]/).flatten.first.delete('[]')+"\n"
649
+ elsif _.include? ','
650
+ _ = _[0 .. (_.index(',') - 1) ].strip
651
+ end
652
+ what = nil
653
+ # ======================================================================= #
654
+ # Next, designate where to store this file.
655
+ # ======================================================================= #
656
+ into = 'new_fasta_file.fasta'
657
+ if overwrite_the_original_file?
658
+ into = @input_file
659
+ end
660
+ if _.start_with? '>'
661
+ what = _
662
+ elsif _.include?('[') and _.include?(']') # For example: [Pan troglodytes]
663
+ # ===================================================================== #
664
+ # See rubular at:
665
+ #
666
+ # https://rubular.com/r/aDjI0JwMOUlZzP
667
+ #
668
+ # ===================================================================== #
669
+ what = "> "+_.scan(/\[(.+)\]/).flatten.first.to_s+"\n".dup
670
+ elsif _.include? 'Human'
671
+ _scanned_result = _.scan(/(Human)/)
672
+ what = "> "+$1.to_s.dup+"\n".dup
673
+ else
674
+ erev "Unsure what to do: #{steelblue(_)}"
675
+ end
676
+ if what and into
677
+ what << raw_body?
678
+ erev 'Storing into `'+sfile(into)+rev+'`.'
679
+ write_what_into(what, into)
680
+ end
681
+ end
682
+
683
+ # ========================================================================= #
684
+ # === sequence
685
+ #
686
+ # This method will return the sequence, without any newlines. It is also
687
+ # called the "body" of a FASTA file.
688
+ # ========================================================================= #
689
+ def sequence
690
+ _ = @hash.values.first
691
+ _.chomp! if _ and _.end_with?(N)
692
+ return no_newlines(_)
693
+ end; alias fasta_sequence sequence # === fasta_sequence
694
+ alias sequence? sequence # === sequence?
695
+ alias body? sequence # === body?
696
+ alias body sequence # === body?
697
+ alias naseq sequence # === naseq
698
+ alias nucleotide_sequence sequence # === nucleotide_sequence
699
+ alias return_sequence sequence # === return_sequence
700
+ alias content? sequence # === content?
701
+
702
+ # ========================================================================= #
703
+ # === save
704
+ #
705
+ # This method will save our FASTA file.
706
+ # ========================================================================= #
707
+ def save
708
+ if @input_file.nil?
709
+ erev "The generic file #{sfile('foobar.fasta')}#{rev} "\
710
+ "will be used."
711
+ set_input_file('foobar.fasta')
712
+ end
713
+ into = @input_file
714
+ what = @data.join("\n")
715
+ erev 'Storing into '+sfile(into)+rev+'.'
716
+ write_what_into(what, into)
717
+ return into
718
+ end
719
+
720
+ # ========================================================================= #
721
+ # === []
722
+ #
723
+ # This is a simpler query-interface for obtaining the DNA/RNA sequence
724
+ # of the FASTA file (or aminoacid sequence, if we have a protein at
725
+ # hand here).
726
+ #
727
+ # Using the method sequences? here, which in turn works on @hash, is
728
+ # ok because Hashes are kept in a sorted manner in ruby since some
729
+ # time.
730
+ # ========================================================================= #
731
+ def [](i)
732
+ sequences?[i]
733
+ end
734
+
735
+ # ========================================================================= #
736
+ # === Bioroebe::ParseFasta[]
737
+ # ========================================================================= #
738
+ def self.[](i)
739
+ _ = new(i)
740
+ _.sequences?
741
+ end
742
+
743
+ # ========================================================================= #
744
+ # === type?
745
+ # ========================================================================= #
746
+ def type?
747
+ if is_the_sequence_a_polypeptide?
748
+ :protein
749
+ elsif is_this_sequence_a_polynucleotide_sequence?
750
+ :dna_or_rna
751
+ else
752
+ :unknown
753
+ end
754
+ end
755
+
756
+ # ========================================================================= #
757
+ # === is_the_sequence_a_polypeptide?
758
+ #
759
+ # This method can be used to determine whether a given input sequence
760
+ # is a polypeptide (aka a protein) or whether it is not.
761
+ #
762
+ # If this sequence is a polypeptide then this method will return true.
763
+ # Otherwise false will be returned.
764
+ # ========================================================================= #
765
+ def is_the_sequence_a_polypeptide?(
766
+ i = main_sequence?
767
+ )
768
+ return_value = false # Set the default return value here.
769
+ # ======================================================================= #
770
+ # Look at the first 120 positions to determine whether this is a protein
771
+ # or a nucleotide sequence.
772
+ # ======================================================================= #
773
+ subsequence = i[0 .. 119] # Must deduct 1 at the end since Arrays in ruby start at 0.
774
+ # ======================================================================= #
775
+ # Build a frequency of the characters there.
776
+ # ======================================================================= #
777
+ hash = {}
778
+ hash.default = 0
779
+ subsequence.chars.each {|character|
780
+ hash[character] += 1
781
+ }
782
+ keys_to_check_for = %w(
783
+ B D E F H I J K L M O P Q R S V W X Y Z
784
+ )
785
+
786
+ values = hash.select {|key, value|
787
+ if keys_to_check_for.include? key
788
+ true
789
+ else
790
+ false
791
+ end
792
+ }.values.sum
793
+ if values > 0
794
+ return_value = true
795
+ end
796
+ return return_value
797
+ end; alias is_protein? is_the_sequence_a_polypeptide? # === is_protein?
798
+ alias is_a_protein? is_the_sequence_a_polypeptide? # === is_a_protein?
799
+
800
+ # ========================================================================= #
801
+ # === main_sequence?
802
+ #
803
+ # This will always return the first entry.
804
+ # ========================================================================= #
805
+ def main_sequence?
806
+ @hash.values.first
807
+ end
808
+
809
+ # ========================================================================= #
810
+ # === gc_content?
811
+ # ========================================================================= #
812
+ def gc_content?
813
+ return ::Bioroebe.gc_content(main_sequence?).to_f # Must be a float.
814
+ end; alias gc_content gc_content? # === gc_content
815
+
816
+ # ========================================================================= #
817
+ # === sequence_object
818
+ #
819
+ # This method will return a Sequence object.
820
+ #
821
+ # Usage example:
822
+ #
823
+ # x = Bioroebe.parse_fasta 'ls_orchid.fasta'
824
+ # y = x.sequence_object # y is now an instance of Bioroebe::Sequence
825
+ #
826
+ # ========================================================================= #
827
+ def sequence_object
828
+ ::Bioroebe::Sequence.new(main_sequence?)
829
+ end
830
+
831
+ end
832
+
833
+ Fasta = ParseFasta # Add an "alias" constant to class ParseFasta.
834
+
835
+ # =========================================================================== #
836
+ # === Bioroebe.parse_fasta_quietly
837
+ #
838
+ # As the variant above, but will work quietly.
839
+ # =========================================================================== #
840
+ def self.parse_fasta_quietly(
841
+ i, use_colours = true
842
+ )
843
+ ::Bioroebe.parse_fasta(i, use_colours) { :be_quiet }
844
+ end
845
+
846
+ # =========================================================================== #
847
+ # === Bioroebe.return_fasta_entry_with_the_highest_gc_content
848
+ #
849
+ # The first argument should be a locally existing FASTA file that
850
+ # contains different sequences.
851
+ #
852
+ # Usage example:
853
+ #
854
+ # x = Bioroebe.return_fasta_entry_with_the_highest_gc_content('/rosalind_gc.txt')
855
+ #
856
+ # =========================================================================== #
857
+ def self.return_fasta_entry_with_the_highest_gc_content(this_fasta_file)
858
+ if File.exist? this_fasta_file
859
+ dataset = File.read(this_fasta_file)
860
+ dataset = parse_fasta(dataset) { :be_quiet }
861
+ hash = dataset.hash?
862
+ hash.transform_values! {|this_value|
863
+ ::Bioroebe.gc_content(this_value).to_f
864
+ }
865
+ return hash.max_by {|key, value| value }
866
+ else
867
+ erev "No file exists at #{sfile(this_fasta_file)}#{rev}."
868
+ end
869
+ end
870
+
871
+ # =========================================================================== #
872
+ # === Bioroebe.sizeseq
873
+ #
874
+ # This method will "size-sequence compare", typically on a .fasta file.
875
+ # =========================================================================== #
876
+ def self.sizeseq(i)
877
+ if i.is_a? Array
878
+ i = i.first
879
+ end
880
+ _ = Bioroebe.parse_fasta(i) { :be_quiet }
881
+ _.do_sort_by_size
882
+ end
883
+
884
+ # =========================================================================== #
885
+ # === Bioroebe.return_sizeseq
886
+ #
887
+ # This is as Bioroebe.sizeseq(), but it will just return the result,
888
+ # rather than output it.
889
+ # =========================================================================== #
890
+ def self.return_sizeseq(i)
891
+ if i.is_a? Array
892
+ i = i.first
893
+ end
894
+ _ = Bioroebe.parse_fasta(i) { :be_quiet }
895
+ hash = _.return_size_sorted_hash
896
+ result = ''.dup
897
+ hash.each_pair {|key, sequence|
898
+ result << '> ID '+sequence.size.to_s+' AA.; DE: '+key.to_s+
899
+ ' SQ '+sequence.size.to_s+' AA'+N
900
+ result << sequence+N+N
901
+ }
902
+ return result
903
+ end
904
+
905
+ # =========================================================================== #
906
+ # === Bioroebe.genbank_to_fasta
907
+ #
908
+ # This method will convert from a genbank file, to a .fasta file.
909
+ #
910
+ # Invocation example:
911
+ #
912
+ # Bioroebe.genbank_to_fasta('/home/x/DATA/PROGRAMMING_LANGUAGES/RUBY/src/bioroebe/lib/bioroebe/data/genbank/sample_file.genbank')
913
+ #
914
+ # =========================================================================== #
915
+ def self.genbank_to_fasta(
916
+ this_file,
917
+ be_verbose = :be_verbose
918
+ )
919
+ case be_verbose
920
+ when :be_quiet
921
+ be_verbose = false
922
+ end
923
+ if this_file.is_a? Array
924
+ this_file = this_file.first
925
+ end
926
+ if File.exist? this_file
927
+ _ = Bioroebe::ParseFasta.new(this_file) { :be_quiet }
928
+ else
929
+ _ = Bioroebe::ParseFasta.new(:do_not_run_yet) { :be_quiet }
930
+ _.set_data # This will use the default file.
931
+ _.split_into_proper_sections
932
+ end
933
+ file_path = _.save_into_a_fasta_file(be_verbose)
934
+ return file_path
935
+ end
936
+
937
+ # =========================================================================== #
938
+ # === Bioroebe.parse_fasta_file
939
+ # =========================================================================== #
940
+ def self.parse_fasta_file(
941
+ i, use_colours = true
942
+ )
943
+ use_this_hash = {
944
+ use_colours: use_colours,
945
+ be_verbose: false
946
+ }
947
+ ParseFasta.new(i) { use_this_hash }
948
+ end; self.instance_eval { alias fasta_file parse_fasta_file } # === Bioroebe.fasta_file
949
+
950
+ # =========================================================================== #
951
+ # === Bioroebe.parse_fasta
952
+ #
953
+ # Easier reader-method for .fasta files.
954
+ #
955
+ # The second argument determines whether we will use colours or whether
956
+ # we will not. For now, the default is to not use colours when we use
957
+ # this particular class method.
958
+ #
959
+ # Invocation examples:
960
+ #
961
+ # x = Bioroebe.parse_fasta('/rosalind_gc.txt')
962
+ # hash = Bioroebe.parse_fasta('/rosalind_gc.txt').hash?
963
+ #
964
+ # =========================================================================== #
965
+ def self.parse_fasta(
966
+ i,
967
+ use_colours = true
968
+ )
969
+ use_this_hash = {
970
+ use_colours: use_colours
971
+ }
972
+ if block_given?
973
+ use_this_hash = {
974
+ use_colours: use_colours,
975
+ be_verbose: yield
976
+ }
977
+ end
978
+ ::Bioroebe::ParseFasta.new(i) { use_this_hash }
979
+ end; self.instance_eval { alias fasta parse_fasta } # === Bioroebe.fasta
980
+
981
+ end