bioroebe 0.10.80

Sign up to get free protection for your applications and to get access to all the features.
Files changed (802) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.md +428 -0
  3. data/README.md +9280 -0
  4. data/bin/advanced_dotplot +7 -0
  5. data/bin/align_open_reading_frames +12 -0
  6. data/bin/all_positions_of_this_nucleotide +7 -0
  7. data/bin/aminoacid_composition +7 -0
  8. data/bin/aminoacid_frequencies +12 -0
  9. data/bin/aminoacid_substitution +7 -0
  10. data/bin/automatically_rename_this_fasta_file +7 -0
  11. data/bin/base_composition +7 -0
  12. data/bin/batch_create_windows_executables +7 -0
  13. data/bin/biomart_console +11 -0
  14. data/bin/bioroebe +27 -0
  15. data/bin/bioroebe_controller +10 -0
  16. data/bin/bioshell +26 -0
  17. data/bin/blosum_2D_table +12 -0
  18. data/bin/calculate_n50_value +12 -0
  19. data/bin/chunked_display +12 -0
  20. data/bin/codon_frequency +9 -0
  21. data/bin/codon_to_aminoacid +30 -0
  22. data/bin/colourize_this_fasta_sequence +14 -0
  23. data/bin/compact_fasta_file +7 -0
  24. data/bin/complement +7 -0
  25. data/bin/complementary_dna_strand +12 -0
  26. data/bin/complementary_rna_strand +12 -0
  27. data/bin/compseq +7 -0
  28. data/bin/consensus_sequence +17 -0
  29. data/bin/count_AT +12 -0
  30. data/bin/count_GC +12 -0
  31. data/bin/create_random_aminoacids +7 -0
  32. data/bin/decode_this_aminoacid_sequence +20 -0
  33. data/bin/deduce_aminoacid_sequence +13 -0
  34. data/bin/deduce_most_likely_aminoacid_sequence +7 -0
  35. data/bin/display_aminoacid_table +12 -0
  36. data/bin/display_open_reading_frames +7 -0
  37. data/bin/dna_to_aminoacid_sequence +7 -0
  38. data/bin/dna_to_rna +7 -0
  39. data/bin/downcase_chunked_display +12 -0
  40. data/bin/download_this_pdb +7 -0
  41. data/bin/fasta_index +7 -0
  42. data/bin/fetch_data_from_uniprot +12 -0
  43. data/bin/filter_away_invalid_nucleotides +12 -0
  44. data/bin/find_substring +19 -0
  45. data/bin/genbank_to_fasta +7 -0
  46. data/bin/hamming_distance +12 -0
  47. data/bin/input_as_dna +12 -0
  48. data/bin/is_palindrome +13 -0
  49. data/bin/leading_five_prime +7 -0
  50. data/bin/levensthein +7 -0
  51. data/bin/longest_ORF +14 -0
  52. data/bin/longest_substring +12 -0
  53. data/bin/n_stop_codons_in_this_sequence +15 -0
  54. data/bin/open_reading_frames +14 -0
  55. data/bin/overwrite_fasta_header +7 -0
  56. data/bin/palindrome_2D_structure +7 -0
  57. data/bin/palindrome_generator +7 -0
  58. data/bin/parse_fasta +7 -0
  59. data/bin/partner_nucleotide +9 -0
  60. data/bin/possible_codons_for_this_aminoacid +12 -0
  61. data/bin/random_dna_sequence +12 -0
  62. data/bin/random_sequence +12 -0
  63. data/bin/raw_hamming_distance +12 -0
  64. data/bin/return_longest_substring_via_LCS_algorithm +7 -0
  65. data/bin/reverse_complement +7 -0
  66. data/bin/reverse_sequence +7 -0
  67. data/bin/ruler +12 -0
  68. data/bin/scan_this_input_for_startcodons +12 -0
  69. data/bin/short_aminoacid_letter_from_long_aminoacid_name +7 -0
  70. data/bin/show_atomic_composition +7 -0
  71. data/bin/show_codon_usage +12 -0
  72. data/bin/show_fasta_header +7 -0
  73. data/bin/show_nucleotide_sequence +7 -0
  74. data/bin/show_this_codon_table +7 -0
  75. data/bin/show_this_dna_sequence +7 -0
  76. data/bin/showorf +14 -0
  77. data/bin/simplify_fasta +7 -0
  78. data/bin/sort_aminoacid_based_on_its_hydrophobicity +7 -0
  79. data/bin/split_this_fasta_file_into_chromosomes +7 -0
  80. data/bin/strict_filter_away_invalid_aminoacids +7 -0
  81. data/bin/taxonomy +63 -0
  82. data/bin/three_to_one +7 -0
  83. data/bin/to_rna +7 -0
  84. data/bin/trailing_three_prime +7 -0
  85. data/bin/trypsin_digest +7 -0
  86. data/bin/upcase_this_aminoacid_sequence_and_remove_numbers +7 -0
  87. data/bioroebe.gemspec +97 -0
  88. data/doc/IUPAC_aminoacids_code.md +36 -0
  89. data/doc/IUPAC_nucleotide_code.md +19 -0
  90. data/doc/README.gen +9237 -0
  91. data/doc/blosum.md +5 -0
  92. data/doc/compatibility/BIO_PHP.md +37 -0
  93. data/doc/compatibility/README.md +3 -0
  94. data/doc/compatibility/emboss.md +56 -0
  95. data/doc/extensive_usage_example.md +35 -0
  96. data/doc/german_names_for_the_aminoacids.md +27 -0
  97. data/doc/instructions_for_the_taxonomy_subproject.md +504 -0
  98. data/doc/legacy_paths.md +9 -0
  99. data/doc/pdb_ATOM_entry.md +33 -0
  100. data/doc/quality_control/README.md +2 -0
  101. data/doc/quality_control/commandline_applications.md +13 -0
  102. data/doc/resources.md +23 -0
  103. data/doc/setup.rb +1655 -0
  104. data/doc/statistics/statistics.md +41 -0
  105. data/doc/todo/README.md +5 -0
  106. data/doc/todo/bioroebe_GUI_todo.md +15 -0
  107. data/doc/todo/bioroebe_todo.md +2823 -0
  108. data/doc/using_biomart.md +258 -0
  109. data/html/test.html +144 -0
  110. data/lib/bioroebe/aminoacids/aminoacid_substitution.rb +118 -0
  111. data/lib/bioroebe/aminoacids/aminoacids_mass_table.rb +118 -0
  112. data/lib/bioroebe/aminoacids/codon_percentage.rb +189 -0
  113. data/lib/bioroebe/aminoacids/colourize_hydrophilic_and_hydrophobic_aminoacids.rb +110 -0
  114. data/lib/bioroebe/aminoacids/create_random_aminoacids.rb +221 -0
  115. data/lib/bioroebe/aminoacids/deduce_aminoacid_sequence.rb +460 -0
  116. data/lib/bioroebe/aminoacids/display_aminoacid_table.rb +231 -0
  117. data/lib/bioroebe/aminoacids/show_hydrophobicity.rb +184 -0
  118. data/lib/bioroebe/annotations/README.md +2 -0
  119. data/lib/bioroebe/annotations/create_annotation_format.rb +208 -0
  120. data/lib/bioroebe/autoinclude.rb +7 -0
  121. data/lib/bioroebe/base/base.rb +35 -0
  122. data/lib/bioroebe/base/colours.rb +14 -0
  123. data/lib/bioroebe/base/colours_for_base/colours_for_base.rb +275 -0
  124. data/lib/bioroebe/base/commandline_application/README.md +7 -0
  125. data/lib/bioroebe/base/commandline_application/aminoacids.rb +33 -0
  126. data/lib/bioroebe/base/commandline_application/commandline_application.rb +37 -0
  127. data/lib/bioroebe/base/commandline_application/commandline_arguments.rb +144 -0
  128. data/lib/bioroebe/base/commandline_application/directory.rb +33 -0
  129. data/lib/bioroebe/base/commandline_application/extract.rb +22 -0
  130. data/lib/bioroebe/base/commandline_application/misc.rb +485 -0
  131. data/lib/bioroebe/base/commandline_application/opn.rb +47 -0
  132. data/lib/bioroebe/base/commandline_application/reset.rb +40 -0
  133. data/lib/bioroebe/base/commandline_application/warnings.rb +36 -0
  134. data/lib/bioroebe/base/commandline_application/write_what_into.rb +29 -0
  135. data/lib/bioroebe/base/initialize.rb +18 -0
  136. data/lib/bioroebe/base/misc.rb +94 -0
  137. data/lib/bioroebe/base/namespace.rb +16 -0
  138. data/lib/bioroebe/base/prototype/README.md +12 -0
  139. data/lib/bioroebe/base/prototype/e_and_ee.rb +24 -0
  140. data/lib/bioroebe/base/prototype/misc.rb +108 -0
  141. data/lib/bioroebe/base/prototype/mkdir.rb +20 -0
  142. data/lib/bioroebe/base/prototype/prototype.rb +21 -0
  143. data/lib/bioroebe/base/prototype/reset.rb +26 -0
  144. data/lib/bioroebe/base/reset.rb +11 -0
  145. data/lib/bioroebe/biomart/LICENSE.md +27 -0
  146. data/lib/bioroebe/biomart/attribute.rb +77 -0
  147. data/lib/bioroebe/biomart/biomart.rb +227 -0
  148. data/lib/bioroebe/biomart/database.rb +128 -0
  149. data/lib/bioroebe/biomart/dataset.rb +572 -0
  150. data/lib/bioroebe/biomart/filter.rb +97 -0
  151. data/lib/bioroebe/biomart/server.rb +152 -0
  152. data/lib/bioroebe/blosum/blosum.rb +88 -0
  153. data/lib/bioroebe/calculate/calculate_blosum_score.rb +145 -0
  154. data/lib/bioroebe/calculate/calculate_gc_content.rb +301 -0
  155. data/lib/bioroebe/calculate/calculate_levensthein_distance.rb +100 -0
  156. data/lib/bioroebe/calculate/calculate_melting_temperature.rb +398 -0
  157. data/lib/bioroebe/calculate/calculate_melting_temperature_for_more_than_thirteen_nucleotides.rb +304 -0
  158. data/lib/bioroebe/calculate/calculate_the_position_specific_scoring_matrix.rb +166 -0
  159. data/lib/bioroebe/cell/README.md +1 -0
  160. data/lib/bioroebe/cell/cell.rb +63 -0
  161. data/lib/bioroebe/cleave_and_digest/README.md +2 -0
  162. data/lib/bioroebe/cleave_and_digest/cleave.rb +80 -0
  163. data/lib/bioroebe/cleave_and_digest/digestion.rb +75 -0
  164. data/lib/bioroebe/cleave_and_digest/trypsin.rb +192 -0
  165. data/lib/bioroebe/codon_tables/README.md +9 -0
  166. data/lib/bioroebe/codon_tables/frequencies/287_Pseudomonas_aeruginosa.yml +101 -0
  167. data/lib/bioroebe/codon_tables/frequencies/3702_Arabidopsis_thaliana.yml +77 -0
  168. data/lib/bioroebe/codon_tables/frequencies/4932_Saccharomyces_cerevisiae.yml +103 -0
  169. data/lib/bioroebe/codon_tables/frequencies/7227_Drosophila_melanogaster.yml +71 -0
  170. data/lib/bioroebe/codon_tables/frequencies/83333_Escherichia_coli_K12.yml +103 -0
  171. data/lib/bioroebe/codon_tables/frequencies/9606_Homo_sapiens.yml +123 -0
  172. data/lib/bioroebe/codon_tables/frequencies/9685_Felis_catus.yml +78 -0
  173. data/lib/bioroebe/codon_tables/frequencies/README.md +10 -0
  174. data/lib/bioroebe/codon_tables/frequencies/parse_frequency_table.rb +337 -0
  175. data/lib/bioroebe/codons/README.md +28 -0
  176. data/lib/bioroebe/codons/codon_table.rb +416 -0
  177. data/lib/bioroebe/codons/codon_tables.rb +123 -0
  178. data/lib/bioroebe/codons/codons.rb +517 -0
  179. data/lib/bioroebe/codons/convert_this_codon_to_that_aminoacid.rb +102 -0
  180. data/lib/bioroebe/codons/detect_minimal_codon.rb +180 -0
  181. data/lib/bioroebe/codons/determine_optimal_codons.rb +74 -0
  182. data/lib/bioroebe/codons/possible_codons_for_this_aminoacid.rb +380 -0
  183. data/lib/bioroebe/codons/sanitize_codon_frequency.rb +144 -0
  184. data/lib/bioroebe/codons/show_codon_tables.rb +130 -0
  185. data/lib/bioroebe/codons/show_codon_usage.rb +197 -0
  186. data/lib/bioroebe/codons/show_this_codon_table.rb +573 -0
  187. data/lib/bioroebe/codons/start_codons.rb +105 -0
  188. data/lib/bioroebe/colours/colour_schemes/README.md +10 -0
  189. data/lib/bioroebe/colours/colour_schemes/array_available_colour_schemes.rb +38 -0
  190. data/lib/bioroebe/colours/colour_schemes/buried.rb +70 -0
  191. data/lib/bioroebe/colours/colour_schemes/colour_scheme.rb +101 -0
  192. data/lib/bioroebe/colours/colour_schemes/colour_scheme_demo.rb +262 -0
  193. data/lib/bioroebe/colours/colour_schemes/helix.rb +65 -0
  194. data/lib/bioroebe/colours/colour_schemes/hydropathy.rb +70 -0
  195. data/lib/bioroebe/colours/colour_schemes/nucleotide.rb +47 -0
  196. data/lib/bioroebe/colours/colour_schemes/score.rb +112 -0
  197. data/lib/bioroebe/colours/colour_schemes/simple.rb +42 -0
  198. data/lib/bioroebe/colours/colour_schemes/strand.rb +65 -0
  199. data/lib/bioroebe/colours/colour_schemes/taylor.rb +58 -0
  200. data/lib/bioroebe/colours/colour_schemes/turn.rb +65 -0
  201. data/lib/bioroebe/colours/colour_schemes/zappo.rb +59 -0
  202. data/lib/bioroebe/colours/colourize_sequence.rb +262 -0
  203. data/lib/bioroebe/colours/colours.rb +119 -0
  204. data/lib/bioroebe/colours/misc_colours.rb +80 -0
  205. data/lib/bioroebe/colours/rev.rb +41 -0
  206. data/lib/bioroebe/colours/sdir.rb +21 -0
  207. data/lib/bioroebe/colours/sfancy.rb +21 -0
  208. data/lib/bioroebe/colours/sfile.rb +21 -0
  209. data/lib/bioroebe/colours/simp.rb +21 -0
  210. data/lib/bioroebe/colours/swarn.rb +29 -0
  211. data/lib/bioroebe/colours/use_colours.rb +27 -0
  212. data/lib/bioroebe/configuration/configuration.rb +114 -0
  213. data/lib/bioroebe/configuration/constants.rb +35 -0
  214. data/lib/bioroebe/constants/GUIs.rb +79 -0
  215. data/lib/bioroebe/constants/aminoacids_and_proteins.rb +146 -0
  216. data/lib/bioroebe/constants/base_directory.rb +120 -0
  217. data/lib/bioroebe/constants/carriage_return.rb +14 -0
  218. data/lib/bioroebe/constants/codon_tables.rb +77 -0
  219. data/lib/bioroebe/constants/database_constants.rb +107 -0
  220. data/lib/bioroebe/constants/files_and_directories.rb +579 -0
  221. data/lib/bioroebe/constants/misc.rb +189 -0
  222. data/lib/bioroebe/constants/newline.rb +14 -0
  223. data/lib/bioroebe/constants/nucleotides.rb +114 -0
  224. data/lib/bioroebe/constants/regex.rb +28 -0
  225. data/lib/bioroebe/constants/roebe.rb +38 -0
  226. data/lib/bioroebe/constants/row_terminator.rb +16 -0
  227. data/lib/bioroebe/constants/tabulator.rb +14 -0
  228. data/lib/bioroebe/constants/unicode.rb +12 -0
  229. data/lib/bioroebe/constants/urls.rb +50 -0
  230. data/lib/bioroebe/conversions/README.md +3 -0
  231. data/lib/bioroebe/conversions/convert_aminoacid_to_dna.rb +298 -0
  232. data/lib/bioroebe/conversions/dna_to_aminoacid_sequence.rb +569 -0
  233. data/lib/bioroebe/count/README.md +1 -0
  234. data/lib/bioroebe/count/count_amount_of_aminoacids.rb +352 -0
  235. data/lib/bioroebe/count/count_amount_of_nucleotides.rb +491 -0
  236. data/lib/bioroebe/count/count_at.rb +39 -0
  237. data/lib/bioroebe/count/count_gc.rb +43 -0
  238. data/lib/bioroebe/css/README.md +5 -0
  239. data/lib/bioroebe/css/project.css +121 -0
  240. data/lib/bioroebe/data/README.md +10 -0
  241. data/lib/bioroebe/data/bam/README.md +1 -0
  242. data/lib/bioroebe/data/data.txt +192 -0
  243. data/lib/bioroebe/data/fasta/GFP_mutant_3_coding_sequence.fasta +12 -0
  244. data/lib/bioroebe/data/fasta/alu_elements.fasta +42 -0
  245. data/lib/bioroebe/data/fasta/lady_slippers_orchid.fasta +1197 -0
  246. data/lib/bioroebe/data/fasta/loxP.fasta +2 -0
  247. data/lib/bioroebe/data/fasta/ls_orchid.fasta +1197 -0
  248. data/lib/bioroebe/data/fasta/pax6_in_mouse.fasta +1 -0
  249. data/lib/bioroebe/data/fasta/test.fasta +7 -0
  250. data/lib/bioroebe/data/fasta/test_DNA.fasta +1 -0
  251. data/lib/bioroebe/data/fastq/fastq_example_file.fastq +32 -0
  252. data/lib/bioroebe/data/fastq/fastq_example_file_SP1.fastq +1000 -0
  253. data/lib/bioroebe/data/fastq/one_random_fastq_entry.fastq +4 -0
  254. data/lib/bioroebe/data/genbank/sample_file.genbank +15 -0
  255. data/lib/bioroebe/data/genbank/standard.fasta +3 -0
  256. data/lib/bioroebe/data/gff/Escherichia_coli_K12_plasmid_F_DNA_NC_002483.1.gff3 +345 -0
  257. data/lib/bioroebe/data/gff/sample.gff +2 -0
  258. data/lib/bioroebe/data/gff/test_gene.gff +4 -0
  259. data/lib/bioroebe/data/gff/transcripts.gff +16 -0
  260. data/lib/bioroebe/data/gtf/README.md +1 -0
  261. data/lib/bioroebe/data/json/example_config.json +48 -0
  262. data/lib/bioroebe/data/pdb/1VII.pdb +754 -0
  263. data/lib/bioroebe/data/pdb/ala_phe_ala.pdb +228 -0
  264. data/lib/bioroebe/data/pdb/rcsb_pdb_1VII.fasta +2 -0
  265. data/lib/bioroebe/data/phylip/README.md +11 -0
  266. data/lib/bioroebe/data/phylip/example.phylip +7 -0
  267. data/lib/bioroebe/data/svg/example.svg +301 -0
  268. data/lib/bioroebe/databases/README.md +1 -0
  269. data/lib/bioroebe/databases/download_taxonomy_database.rb +102 -0
  270. data/lib/bioroebe/dotplots/README.md +5 -0
  271. data/lib/bioroebe/dotplots/advanced_dotplot.rb +256 -0
  272. data/lib/bioroebe/dotplots/dotplot.rb +184 -0
  273. data/lib/bioroebe/electron_microscopy/coordinate_analyzer.rb +191 -0
  274. data/lib/bioroebe/electron_microscopy/fix_pos_file.rb +102 -0
  275. data/lib/bioroebe/electron_microscopy/generate_em2em_file.rb +122 -0
  276. data/lib/bioroebe/electron_microscopy/parse_coordinates.rb +197 -0
  277. data/lib/bioroebe/electron_microscopy/read_file_xmd.rb +282 -0
  278. data/lib/bioroebe/electron_microscopy/simple_star_file_generator.rb +131 -0
  279. data/lib/bioroebe/encoding/README.md +2 -0
  280. data/lib/bioroebe/encoding/encoding.rb +45 -0
  281. data/lib/bioroebe/enzymes/README.md +2 -0
  282. data/lib/bioroebe/enzymes/has_this_restriction_enzyme.rb +46 -0
  283. data/lib/bioroebe/enzymes/restriction_enzyme.rb +200 -0
  284. data/lib/bioroebe/enzymes/restriction_enzymes_file.rb +72 -0
  285. data/lib/bioroebe/enzymes/return_restriction_enzyme_sequence_and_cut_position.rb +80 -0
  286. data/lib/bioroebe/enzymes/return_sequence_that_is_cut_via_restriction_enzyme.rb +65 -0
  287. data/lib/bioroebe/enzymes/show_restriction_enzymes.rb +119 -0
  288. data/lib/bioroebe/exceptions/README.md +2 -0
  289. data/lib/bioroebe/exceptions/exceptions.rb +17 -0
  290. data/lib/bioroebe/ext/LICENCE.md +5 -0
  291. data/lib/bioroebe/ext/README.md +7 -0
  292. data/lib/bioroebe/ext/main.cpp +45 -0
  293. data/lib/bioroebe/ext/nucleotide.cpp +24 -0
  294. data/lib/bioroebe/ext/nussinov_algorithm.cpp +348 -0
  295. data/lib/bioroebe/ext/sequence +0 -0
  296. data/lib/bioroebe/ext/sequence.cpp +162 -0
  297. data/lib/bioroebe/fasta_and_fastq/README.md +6 -0
  298. data/lib/bioroebe/fasta_and_fastq/autocorrect_the_name_of_this_fasta_file.rb +88 -0
  299. data/lib/bioroebe/fasta_and_fastq/compact_fasta_file/compact_fasta_file.rb +151 -0
  300. data/lib/bioroebe/fasta_and_fastq/display_how_many_fasta_entries_are_in_this_directory.rb +111 -0
  301. data/lib/bioroebe/fasta_and_fastq/download_fasta.rb +248 -0
  302. data/lib/bioroebe/fasta_and_fastq/fasta_defline/README.md +2 -0
  303. data/lib/bioroebe/fasta_and_fastq/fasta_defline/fasta_defline.rb +113 -0
  304. data/lib/bioroebe/fasta_and_fastq/fasta_parser.rb +167 -0
  305. data/lib/bioroebe/fasta_and_fastq/fastq_format_explainer.rb +131 -0
  306. data/lib/bioroebe/fasta_and_fastq/length_modifier/length_modifier.rb +87 -0
  307. data/lib/bioroebe/fasta_and_fastq/parse_fasta/constants.rb +50 -0
  308. data/lib/bioroebe/fasta_and_fastq/parse_fasta/initialize.rb +86 -0
  309. data/lib/bioroebe/fasta_and_fastq/parse_fasta/menu.rb +117 -0
  310. data/lib/bioroebe/fasta_and_fastq/parse_fasta/misc.rb +981 -0
  311. data/lib/bioroebe/fasta_and_fastq/parse_fasta/parse_fasta.rb +27 -0
  312. data/lib/bioroebe/fasta_and_fastq/parse_fasta/report.rb +156 -0
  313. data/lib/bioroebe/fasta_and_fastq/parse_fasta/reset.rb +128 -0
  314. data/lib/bioroebe/fasta_and_fastq/parse_fasta/run.rb +20 -0
  315. data/lib/bioroebe/fasta_and_fastq/parse_fastq/parse_fastq.rb +83 -0
  316. data/lib/bioroebe/fasta_and_fastq/return_fasta_subsection_of_this_file.rb +112 -0
  317. data/lib/bioroebe/fasta_and_fastq/show_fasta_headers.rb +135 -0
  318. data/lib/bioroebe/fasta_and_fastq/show_fasta_statistics.rb +188 -0
  319. data/lib/bioroebe/fasta_and_fastq/simplify_fasta_header/simplify_fasta_header.rb +111 -0
  320. data/lib/bioroebe/fasta_and_fastq/split_this_fasta_file_into_chromosomes/constants.rb +26 -0
  321. data/lib/bioroebe/fasta_and_fastq/split_this_fasta_file_into_chromosomes/menu.rb +41 -0
  322. data/lib/bioroebe/fasta_and_fastq/split_this_fasta_file_into_chromosomes/misc.rb +23 -0
  323. data/lib/bioroebe/fasta_and_fastq/split_this_fasta_file_into_chromosomes/reset.rb +68 -0
  324. data/lib/bioroebe/fasta_and_fastq/split_this_fasta_file_into_chromosomes/split_this_fasta_file_into_chromosomes.rb +290 -0
  325. data/lib/bioroebe/genbank/README.md +1 -0
  326. data/lib/bioroebe/genbank/genbank_flat_file_format_generator.rb +275 -0
  327. data/lib/bioroebe/genbank/genbank_parser.rb +291 -0
  328. data/lib/bioroebe/gene/gene.rb +64 -0
  329. data/lib/bioroebe/genomes/genome_pattern.rb +165 -0
  330. data/lib/bioroebe/genomes/genome_retriever.rb +79 -0
  331. data/lib/bioroebe/gui/experimental/README.md +1 -0
  332. data/lib/bioroebe/gui/experimental/snapgene/snapgene.rb +147 -0
  333. data/lib/bioroebe/gui/gtk3/README.md +2 -0
  334. data/lib/bioroebe/gui/gtk3/alignment/alignment.rb +337 -0
  335. data/lib/bioroebe/gui/gtk3/aminoacid_composition/aminoacid_composition.rb +510 -0
  336. data/lib/bioroebe/gui/gtk3/aminoacid_composition/customized_dialog.rb +55 -0
  337. data/lib/bioroebe/gui/gtk3/anti_sense_strand/anti_sense_strand.config +6 -0
  338. data/lib/bioroebe/gui/gtk3/anti_sense_strand/anti_sense_strand.rb +29 -0
  339. data/lib/bioroebe/gui/gtk3/blosum_matrix_viewer/blosum_matrix_viewer.rb +196 -0
  340. data/lib/bioroebe/gui/gtk3/calculate_cell_numbers_of_bacteria/calculate_cell_numbers_of_bacteria.config +6 -0
  341. data/lib/bioroebe/gui/gtk3/calculate_cell_numbers_of_bacteria/calculate_cell_numbers_of_bacteria.rb +106 -0
  342. data/lib/bioroebe/gui/gtk3/controller/controller.rb +406 -0
  343. data/lib/bioroebe/gui/gtk3/dna_to_aminoacid_widget/dna_to_aminoacid_widget.rb +609 -0
  344. data/lib/bioroebe/gui/gtk3/dna_to_reverse_complement_widget/dna_to_reverse_complement_widget.config +6 -0
  345. data/lib/bioroebe/gui/gtk3/dna_to_reverse_complement_widget/dna_to_reverse_complement_widget.rb +189 -0
  346. data/lib/bioroebe/gui/gtk3/fasta_table_widget/fasta_table_widget.rb +245 -0
  347. data/lib/bioroebe/gui/gtk3/format_converter/format_converter.rb +346 -0
  348. data/lib/bioroebe/gui/gtk3/gene/gene.rb +182 -0
  349. data/lib/bioroebe/gui/gtk3/hamming_distance/hamming_distance.config +6 -0
  350. data/lib/bioroebe/gui/gtk3/hamming_distance/hamming_distance.rb +370 -0
  351. data/lib/bioroebe/gui/gtk3/levensthein_distance/levensthein_distance.config +6 -0
  352. data/lib/bioroebe/gui/gtk3/levensthein_distance/levensthein_distance.rb +175 -0
  353. data/lib/bioroebe/gui/gtk3/nucleotide_analyser/nucleotide_analyser.rb +428 -0
  354. data/lib/bioroebe/gui/gtk3/parse_pdb_file/parse_pdb_file.rb +342 -0
  355. data/lib/bioroebe/gui/gtk3/primer_design_widget/primer_design_widget.rb +580 -0
  356. data/lib/bioroebe/gui/gtk3/protein_to_DNA/protein_to_DNA.config +6 -0
  357. data/lib/bioroebe/gui/gtk3/protein_to_DNA/protein_to_DNA.rb +182 -0
  358. data/lib/bioroebe/gui/gtk3/random_sequence/random_sequence.rb +566 -0
  359. data/lib/bioroebe/gui/gtk3/restriction_enzymes/restriction_enzymes.config +6 -0
  360. data/lib/bioroebe/gui/gtk3/restriction_enzymes/restriction_enzymes.rb +329 -0
  361. data/lib/bioroebe/gui/gtk3/show_codon_table/misc.rb +556 -0
  362. data/lib/bioroebe/gui/gtk3/show_codon_table/show_codon_table.rb +171 -0
  363. data/lib/bioroebe/gui/gtk3/show_codon_usage/show_codon_usage.rb +146 -0
  364. data/lib/bioroebe/gui/gtk3/sizeseq/sizeseq.rb +207 -0
  365. data/lib/bioroebe/gui/gtk3/three_to_one/three_to_one.rb +279 -0
  366. data/lib/bioroebe/gui/gtk3/three_to_one/title.rb +23 -0
  367. data/lib/bioroebe/gui/gtk3/www_finder/www_finder.config +6 -0
  368. data/lib/bioroebe/gui/gtk3/www_finder/www_finder.rb +368 -0
  369. data/lib/bioroebe/gui/jruby/blosum_matrix_viewer/blosum_matrix_viewer.rb +82 -0
  370. data/lib/bioroebe/gui/libui/README.md +4 -0
  371. data/lib/bioroebe/gui/libui/alignment/alignment.rb +114 -0
  372. data/lib/bioroebe/gui/libui/blosum_matrix_viewer/blosum_matrix_viewer.rb +112 -0
  373. data/lib/bioroebe/gui/libui/calculate_cell_numbers_of_bacteria/calculate_cell_numbers_of_bacteria.rb +60 -0
  374. data/lib/bioroebe/gui/libui/dna_to_aminoacid_widget/dna_to_aminoacid_widget.rb +161 -0
  375. data/lib/bioroebe/gui/libui/dna_to_reverse_complement_widget/dna_to_reverse_complement_widget.rb +76 -0
  376. data/lib/bioroebe/gui/libui/hamming_distance/hamming_distance.rb +135 -0
  377. data/lib/bioroebe/gui/libui/levensthein_distance/levensthein_distance.rb +118 -0
  378. data/lib/bioroebe/gui/libui/protein_to_DNA/protein_to_DNA.rb +115 -0
  379. data/lib/bioroebe/gui/libui/random_sequence/random_sequence.rb +174 -0
  380. data/lib/bioroebe/gui/libui/show_codon_table/show_codon_table.rb +132 -0
  381. data/lib/bioroebe/gui/libui/show_codon_usage/show_codon_usage.rb +89 -0
  382. data/lib/bioroebe/gui/libui/three_to_one/three_to_one.rb +111 -0
  383. data/lib/bioroebe/gui/shared_code/blosum_matrix_viewer/blosum_matrix_viewer_module.rb +42 -0
  384. data/lib/bioroebe/gui/shared_code/calculate_cell_numbers_of_bacteria/calculate_cell_numbers_of_bacteria_module.rb +216 -0
  385. data/lib/bioroebe/gui/shared_code/dna_to_reverse_complement_widget/dna_to_reverse_complement_widget_module.rb +284 -0
  386. data/lib/bioroebe/gui/shared_code/levensthein_distance/levensthein_distance_module.rb +402 -0
  387. data/lib/bioroebe/gui/shared_code/protein_to_DNA/protein_to_DNA_module.rb +192 -0
  388. data/lib/bioroebe/gui/shared_code/show_codon_table/show_codon_table_module.rb +72 -0
  389. data/lib/bioroebe/gui/shared_code/show_codon_usage/show_codon_usage_module.rb +213 -0
  390. data/lib/bioroebe/gui/tk/aminoacid_composition/aminoacid_composition.rb +206 -0
  391. data/lib/bioroebe/gui/tk/blosum_matrix_viewer/blosum_matrix_viewer.rb +140 -0
  392. data/lib/bioroebe/gui/tk/hamming_distance/hamming_distance.rb +262 -0
  393. data/lib/bioroebe/gui/tk/levensthein_distance/levensthein_distance.rb +243 -0
  394. data/lib/bioroebe/gui/tk/three_to_one/three_to_one.rb +199 -0
  395. data/lib/bioroebe/gui/unified_widgets/anti_sense_strand/anti_sense_strand.rb +519 -0
  396. data/lib/bioroebe/images/BIOROEBE.png +0 -0
  397. data/lib/bioroebe/images/BIOROEBE_NEW_LOGO.png +0 -0
  398. data/lib/bioroebe/images/BlosumMatrixViewer.png +0 -0
  399. data/lib/bioroebe/images/DnaToAminoacidWidget.png +0 -0
  400. data/lib/bioroebe/images/PRINTED_AMINOACID_TABLE.png +0 -0
  401. data/lib/bioroebe/images/class_ConvertAminoacidToDNA.png +0 -0
  402. data/lib/bioroebe/images/class_SimpleStringComparer.png +0 -0
  403. data/lib/bioroebe/images/example_of_FASTA_coloured_output.png +0 -0
  404. data/lib/bioroebe/images/libui_hamming_distance_widget.png +0 -0
  405. data/lib/bioroebe/images/pretty_DNA_picture.png +0 -0
  406. data/lib/bioroebe/images/primer_design_widget.png +0 -0
  407. data/lib/bioroebe/images/restriction_enzyme_commandline_result.png +0 -0
  408. data/lib/bioroebe/images/ruby-gtk_three_to_one_widget.png +0 -0
  409. data/lib/bioroebe/images/small_DNA_logo.png +0 -0
  410. data/lib/bioroebe/images/small_drosophila_image.png +0 -0
  411. data/lib/bioroebe/java/README.md +6 -0
  412. data/lib/bioroebe/java/bioroebe/AllInOne.class +0 -0
  413. data/lib/bioroebe/java/bioroebe/AllInOne.java +214 -0
  414. data/lib/bioroebe/java/bioroebe/Base.class +0 -0
  415. data/lib/bioroebe/java/bioroebe/Base.java +102 -0
  416. data/lib/bioroebe/java/bioroebe/BisulfiteTreatment.class +0 -0
  417. data/lib/bioroebe/java/bioroebe/BisulfiteTreatment.java +23 -0
  418. data/lib/bioroebe/java/bioroebe/Cat.class +0 -0
  419. data/lib/bioroebe/java/bioroebe/Codons.class +0 -0
  420. data/lib/bioroebe/java/bioroebe/Codons.java +22 -0
  421. data/lib/bioroebe/java/bioroebe/Esystem.class +0 -0
  422. data/lib/bioroebe/java/bioroebe/Esystem.java +47 -0
  423. data/lib/bioroebe/java/bioroebe/GUI/BaseFrame.class +0 -0
  424. data/lib/bioroebe/java/bioroebe/GUI/BaseFrame.java +65 -0
  425. data/lib/bioroebe/java/bioroebe/GenerateRandomDnaSequence.class +0 -0
  426. data/lib/bioroebe/java/bioroebe/GenerateRandomDnaSequence.java +32 -0
  427. data/lib/bioroebe/java/bioroebe/IsPalindrome.class +0 -0
  428. data/lib/bioroebe/java/bioroebe/IsPalindrome.java +18 -0
  429. data/lib/bioroebe/java/bioroebe/PartnerNucleotide.class +0 -0
  430. data/lib/bioroebe/java/bioroebe/PartnerNucleotide.java +19 -0
  431. data/lib/bioroebe/java/bioroebe/README.md +4 -0
  432. data/lib/bioroebe/java/bioroebe/RemoveFile.class +0 -0
  433. data/lib/bioroebe/java/bioroebe/RemoveFile.java +24 -0
  434. data/lib/bioroebe/java/bioroebe/RemoveNumbers.class +0 -0
  435. data/lib/bioroebe/java/bioroebe/RemoveNumbers.java +14 -0
  436. data/lib/bioroebe/java/bioroebe/SanitizeNucleotideSequence.class +0 -0
  437. data/lib/bioroebe/java/bioroebe/SanitizeNucleotideSequence.java +20 -0
  438. data/lib/bioroebe/java/bioroebe/SaveFile.java +44 -0
  439. data/lib/bioroebe/java/bioroebe/Sequence.java +28 -0
  440. data/lib/bioroebe/java/bioroebe/ToCamelcase.class +0 -0
  441. data/lib/bioroebe/java/bioroebe/ToCamelcase.java +32 -0
  442. data/lib/bioroebe/java/bioroebe/ToplevelMethods.class +0 -0
  443. data/lib/bioroebe/java/bioroebe/ToplevelMethods.java +15 -0
  444. data/lib/bioroebe/java/bioroebe/enums/DNA.java +6 -0
  445. data/lib/bioroebe/java/bioroebe.jar +0 -0
  446. data/lib/bioroebe/matplotlib/matplotlib_generator.rb +104 -0
  447. data/lib/bioroebe/misc/quiz/README.md +6 -0
  448. data/lib/bioroebe/misc/quiz/three_letter_to_aminoacid.rb +163 -0
  449. data/lib/bioroebe/misc/ruler.rb +244 -0
  450. data/lib/bioroebe/misc/useful_formulas.rb +129 -0
  451. data/lib/bioroebe/ncbi/efetch.rb +253 -0
  452. data/lib/bioroebe/ncbi/ncbi.rb +93 -0
  453. data/lib/bioroebe/ngs/README.md +2 -0
  454. data/lib/bioroebe/ngs/phred_quality_score_table.rb +123 -0
  455. data/lib/bioroebe/nucleotides/complementary_dna_strand.rb +166 -0
  456. data/lib/bioroebe/nucleotides/molecular_weight_of_nucleotides.rb +135 -0
  457. data/lib/bioroebe/nucleotides/most_likely_nucleotide_sequence_for_this_aminoacid_sequence.rb +198 -0
  458. data/lib/bioroebe/nucleotides/sanitize_nucleotide_sequence.rb +133 -0
  459. data/lib/bioroebe/nucleotides/show_nucleotide_sequence.rb +556 -0
  460. data/lib/bioroebe/palindromes/palindrome_2D_structure.rb +139 -0
  461. data/lib/bioroebe/palindromes/palindrome_finder.rb +208 -0
  462. data/lib/bioroebe/palindromes/palindrome_generator.rb +272 -0
  463. data/lib/bioroebe/parsers/biolang_parser.rb +156 -0
  464. data/lib/bioroebe/parsers/blosum_parser.rb +222 -0
  465. data/lib/bioroebe/parsers/genbank_parser.rb +78 -0
  466. data/lib/bioroebe/parsers/gff.rb +346 -0
  467. data/lib/bioroebe/parsers/parse_embl.rb +76 -0
  468. data/lib/bioroebe/parsers/stride_parser.rb +117 -0
  469. data/lib/bioroebe/patterns/README.md +5 -0
  470. data/lib/bioroebe/patterns/analyse_glycosylation_pattern.rb +149 -0
  471. data/lib/bioroebe/patterns/is_this_sequence_a_EGF2_pattern.rb +66 -0
  472. data/lib/bioroebe/patterns/profile_pattern.rb +182 -0
  473. data/lib/bioroebe/patterns/rgg_scanner.rb +160 -0
  474. data/lib/bioroebe/patterns/scan_for_repeat.rb +157 -0
  475. data/lib/bioroebe/pdb/download_this_pdb.rb +67 -0
  476. data/lib/bioroebe/pdb/fetch_fasta_sequence_from_pdb.rb +164 -0
  477. data/lib/bioroebe/pdb/parse_mmCIF_file.rb +63 -0
  478. data/lib/bioroebe/pdb/parse_pdb_file.rb +1086 -0
  479. data/lib/bioroebe/pdb/report_secondary_structures_from_this_pdb_file.rb +225 -0
  480. data/lib/bioroebe/perl/README.md +7 -0
  481. data/lib/bioroebe/perl/local_to_global.pl +694 -0
  482. data/lib/bioroebe/project/project.rb +264 -0
  483. data/lib/bioroebe/protein_structure/alpha_helix.rb +96 -0
  484. data/lib/bioroebe/protein_structure/helical_wheel.rb +205 -0
  485. data/lib/bioroebe/raw_sequence/README.md +17 -0
  486. data/lib/bioroebe/raw_sequence/raw_sequence.rb +557 -0
  487. data/lib/bioroebe/readline/README.md +2 -0
  488. data/lib/bioroebe/readline/readline.rb +31 -0
  489. data/lib/bioroebe/regexes/README.md +2 -0
  490. data/lib/bioroebe/regexes/regexes.rb +34 -0
  491. data/lib/bioroebe/requires/commandline_application.rb +5 -0
  492. data/lib/bioroebe/requires/require_all_aminoacids_files.rb +28 -0
  493. data/lib/bioroebe/requires/require_all_calculate_files.rb +26 -0
  494. data/lib/bioroebe/requires/require_all_codon_files.rb +26 -0
  495. data/lib/bioroebe/requires/require_all_colour_scheme_files.rb +26 -0
  496. data/lib/bioroebe/requires/require_all_count_files.rb +26 -0
  497. data/lib/bioroebe/requires/require_all_dotplot_files.rb +28 -0
  498. data/lib/bioroebe/requires/require_all_electron_microscopy_files.rb +26 -0
  499. data/lib/bioroebe/requires/require_all_enzymes_files.rb +28 -0
  500. data/lib/bioroebe/requires/require_all_fasta_and_fastq_files.rb +32 -0
  501. data/lib/bioroebe/requires/require_all_nucleotides_files.rb +28 -0
  502. data/lib/bioroebe/requires/require_all_palindromes_files.rb +29 -0
  503. data/lib/bioroebe/requires/require_all_parser_files.rb +28 -0
  504. data/lib/bioroebe/requires/require_all_pattern_files.rb +29 -0
  505. data/lib/bioroebe/requires/require_all_pdb_files.rb +26 -0
  506. data/lib/bioroebe/requires/require_all_sequence_files.rb +26 -0
  507. data/lib/bioroebe/requires/require_all_string_matching_files.rb +28 -0
  508. data/lib/bioroebe/requires/require_all_svg_files.rb +12 -0
  509. data/lib/bioroebe/requires/require_all_taxonomy_files.rb +35 -0
  510. data/lib/bioroebe/requires/require_all_utility_scripts_files.rb +32 -0
  511. data/lib/bioroebe/requires/require_cleave_and_digest.rb +24 -0
  512. data/lib/bioroebe/requires/require_colours.rb +20 -0
  513. data/lib/bioroebe/requires/require_encoding.rb +7 -0
  514. data/lib/bioroebe/requires/require_sequence.rb +7 -0
  515. data/lib/bioroebe/requires/require_the_bioroebe_project.rb +162 -0
  516. data/lib/bioroebe/requires/require_the_bioroebe_shell.rb +7 -0
  517. data/lib/bioroebe/requires/require_the_bioroebe_sinatra_components.rb +7 -0
  518. data/lib/bioroebe/requires/require_the_constants.rb +23 -0
  519. data/lib/bioroebe/requires/require_the_toplevel_methods.rb +29 -0
  520. data/lib/bioroebe/requires/require_yaml.rb +94 -0
  521. data/lib/bioroebe/sequence/alignment.rb +214 -0
  522. data/lib/bioroebe/sequence/dna.rb +211 -0
  523. data/lib/bioroebe/sequence/nucleotide_module/nucleotide_module.rb +404 -0
  524. data/lib/bioroebe/sequence/protein.rb +281 -0
  525. data/lib/bioroebe/sequence/reverse_complement.rb +148 -0
  526. data/lib/bioroebe/sequence/sequence.rb +706 -0
  527. data/lib/bioroebe/shell/add.rb +108 -0
  528. data/lib/bioroebe/shell/assign.rb +360 -0
  529. data/lib/bioroebe/shell/chop_and_cut.rb +281 -0
  530. data/lib/bioroebe/shell/colours/colours.rb +235 -0
  531. data/lib/bioroebe/shell/configuration/additionally_set_xorg_buffer.yml +1 -0
  532. data/lib/bioroebe/shell/configuration/may_we_show_the_startup_information.yml +1 -0
  533. data/lib/bioroebe/shell/configuration/upcase_nucleotides.yml +1 -0
  534. data/lib/bioroebe/shell/configuration/use_silent_startup.yml +1 -0
  535. data/lib/bioroebe/shell/constants.rb +166 -0
  536. data/lib/bioroebe/shell/download.rb +335 -0
  537. data/lib/bioroebe/shell/enable_and_disable.rb +158 -0
  538. data/lib/bioroebe/shell/enzymes.rb +310 -0
  539. data/lib/bioroebe/shell/fasta.rb +345 -0
  540. data/lib/bioroebe/shell/gtk.rb +76 -0
  541. data/lib/bioroebe/shell/help/class.rb +443 -0
  542. data/lib/bioroebe/shell/help/help.rb +25 -0
  543. data/lib/bioroebe/shell/history.rb +132 -0
  544. data/lib/bioroebe/shell/initialize.rb +217 -0
  545. data/lib/bioroebe/shell/loop.rb +74 -0
  546. data/lib/bioroebe/shell/menu.rb +5320 -0
  547. data/lib/bioroebe/shell/misc.rb +4341 -0
  548. data/lib/bioroebe/shell/prompt.rb +107 -0
  549. data/lib/bioroebe/shell/random.rb +289 -0
  550. data/lib/bioroebe/shell/readline/readline.rb +91 -0
  551. data/lib/bioroebe/shell/reset.rb +335 -0
  552. data/lib/bioroebe/shell/scan_and_parse.rb +135 -0
  553. data/lib/bioroebe/shell/search.rb +337 -0
  554. data/lib/bioroebe/shell/sequences.rb +200 -0
  555. data/lib/bioroebe/shell/shell.rb +41 -0
  556. data/lib/bioroebe/shell/show_report_and_display.rb +2901 -0
  557. data/lib/bioroebe/shell/startup.rb +127 -0
  558. data/lib/bioroebe/shell/taxonomy.rb +14 -0
  559. data/lib/bioroebe/shell/tk.rb +23 -0
  560. data/lib/bioroebe/shell/user_input.rb +88 -0
  561. data/lib/bioroebe/shell/xorg.rb +45 -0
  562. data/lib/bioroebe/siRNA/README.md +2 -0
  563. data/lib/bioroebe/siRNA/siRNA.rb +93 -0
  564. data/lib/bioroebe/string_matching/README.md +13 -0
  565. data/lib/bioroebe/string_matching/find_longest_substring.rb +162 -0
  566. data/lib/bioroebe/string_matching/find_longest_substring_via_LCS_algorithm.rb +175 -0
  567. data/lib/bioroebe/string_matching/hamming_distance.rb +313 -0
  568. data/lib/bioroebe/string_matching/levensthein.rb +698 -0
  569. data/lib/bioroebe/string_matching/simple_string_comparer.rb +294 -0
  570. data/lib/bioroebe/string_matching/smith_waterman.rb +276 -0
  571. data/lib/bioroebe/svg/README.md +1 -0
  572. data/lib/bioroebe/svg/glyph.rb +719 -0
  573. data/lib/bioroebe/svg/mini_feature.rb +111 -0
  574. data/lib/bioroebe/svg/page.rb +570 -0
  575. data/lib/bioroebe/svg/primitive.rb +70 -0
  576. data/lib/bioroebe/svg/svgee.rb +326 -0
  577. data/lib/bioroebe/svg/track.rb +263 -0
  578. data/lib/bioroebe/taxonomy/README.md +1 -0
  579. data/lib/bioroebe/taxonomy/chart.rb +95 -0
  580. data/lib/bioroebe/taxonomy/class_methods.rb +181 -0
  581. data/lib/bioroebe/taxonomy/colours.rb +26 -0
  582. data/lib/bioroebe/taxonomy/constants.rb +218 -0
  583. data/lib/bioroebe/taxonomy/edit.rb +97 -0
  584. data/lib/bioroebe/taxonomy/help/help.rb +65 -0
  585. data/lib/bioroebe/taxonomy/help/helpline.rb +53 -0
  586. data/lib/bioroebe/taxonomy/info/check_available.rb +143 -0
  587. data/lib/bioroebe/taxonomy/info/info.rb +337 -0
  588. data/lib/bioroebe/taxonomy/info/is_dna.rb +150 -0
  589. data/lib/bioroebe/taxonomy/interactive.rb +1933 -0
  590. data/lib/bioroebe/taxonomy/menu.rb +905 -0
  591. data/lib/bioroebe/taxonomy/node.rb +118 -0
  592. data/lib/bioroebe/taxonomy/parse_fasta.rb +383 -0
  593. data/lib/bioroebe/taxonomy/shared.rb +287 -0
  594. data/lib/bioroebe/taxonomy/taxonomy.rb +521 -0
  595. data/lib/bioroebe/toplevel_methods/ad_hoc_task.rb +56 -0
  596. data/lib/bioroebe/toplevel_methods/aminoacids_and_proteins.rb +715 -0
  597. data/lib/bioroebe/toplevel_methods/atomic_composition.rb +198 -0
  598. data/lib/bioroebe/toplevel_methods/base_composition.rb +121 -0
  599. data/lib/bioroebe/toplevel_methods/blast.rb +153 -0
  600. data/lib/bioroebe/toplevel_methods/calculate_n50_value.rb +57 -0
  601. data/lib/bioroebe/toplevel_methods/cat.rb +71 -0
  602. data/lib/bioroebe/toplevel_methods/chunked_display.rb +92 -0
  603. data/lib/bioroebe/toplevel_methods/cliner.rb +81 -0
  604. data/lib/bioroebe/toplevel_methods/complement.rb +58 -0
  605. data/lib/bioroebe/toplevel_methods/convert_global_env.rb +39 -0
  606. data/lib/bioroebe/toplevel_methods/databases.rb +73 -0
  607. data/lib/bioroebe/toplevel_methods/delimiter.rb +19 -0
  608. data/lib/bioroebe/toplevel_methods/digest.rb +71 -0
  609. data/lib/bioroebe/toplevel_methods/download_and_fetch_data.rb +146 -0
  610. data/lib/bioroebe/toplevel_methods/e.rb +20 -0
  611. data/lib/bioroebe/toplevel_methods/editor.rb +21 -0
  612. data/lib/bioroebe/toplevel_methods/esystem.rb +22 -0
  613. data/lib/bioroebe/toplevel_methods/exponential_growth.rb +74 -0
  614. data/lib/bioroebe/toplevel_methods/extract.rb +56 -0
  615. data/lib/bioroebe/toplevel_methods/fasta_and_fastq.rb +353 -0
  616. data/lib/bioroebe/toplevel_methods/file_and_directory_related_actions.rb +257 -0
  617. data/lib/bioroebe/toplevel_methods/frequencies.rb +92 -0
  618. data/lib/bioroebe/toplevel_methods/hamming_distance.rb +60 -0
  619. data/lib/bioroebe/toplevel_methods/infer.rb +66 -0
  620. data/lib/bioroebe/toplevel_methods/is_on_roebe.rb +39 -0
  621. data/lib/bioroebe/toplevel_methods/leading_five_prime_and_trailing_three_prime.rb +101 -0
  622. data/lib/bioroebe/toplevel_methods/levensthein.rb +63 -0
  623. data/lib/bioroebe/toplevel_methods/log_directory.rb +109 -0
  624. data/lib/bioroebe/toplevel_methods/longest_common_substring.rb +55 -0
  625. data/lib/bioroebe/toplevel_methods/map_ncbi_entry_to_eutils_id.rb +88 -0
  626. data/lib/bioroebe/toplevel_methods/matches.rb +259 -0
  627. data/lib/bioroebe/toplevel_methods/misc.rb +432 -0
  628. data/lib/bioroebe/toplevel_methods/nucleotides.rb +715 -0
  629. data/lib/bioroebe/toplevel_methods/number_of_clones.rb +63 -0
  630. data/lib/bioroebe/toplevel_methods/open_in_browser.rb +77 -0
  631. data/lib/bioroebe/toplevel_methods/open_reading_frames.rb +236 -0
  632. data/lib/bioroebe/toplevel_methods/opn.rb +34 -0
  633. data/lib/bioroebe/toplevel_methods/palindromes.rb +127 -0
  634. data/lib/bioroebe/toplevel_methods/parse.rb +59 -0
  635. data/lib/bioroebe/toplevel_methods/phred_error_probability.rb +68 -0
  636. data/lib/bioroebe/toplevel_methods/rds.rb +24 -0
  637. data/lib/bioroebe/toplevel_methods/remove.rb +86 -0
  638. data/lib/bioroebe/toplevel_methods/return_source_code_of_this_method.rb +35 -0
  639. data/lib/bioroebe/toplevel_methods/return_subsequence_based_on_indices.rb +68 -0
  640. data/lib/bioroebe/toplevel_methods/rna_splicing.rb +73 -0
  641. data/lib/bioroebe/toplevel_methods/rnalfold.rb +69 -0
  642. data/lib/bioroebe/toplevel_methods/searching_and_finding.rb +116 -0
  643. data/lib/bioroebe/toplevel_methods/shuffleseq.rb +37 -0
  644. data/lib/bioroebe/toplevel_methods/statistics.rb +53 -0
  645. data/lib/bioroebe/toplevel_methods/sum_of_odd_integers.rb +62 -0
  646. data/lib/bioroebe/toplevel_methods/taxonomy.rb +187 -0
  647. data/lib/bioroebe/toplevel_methods/three_delimiter.rb +34 -0
  648. data/lib/bioroebe/toplevel_methods/time_and_date.rb +53 -0
  649. data/lib/bioroebe/toplevel_methods/to_camelcase.rb +26 -0
  650. data/lib/bioroebe/toplevel_methods/truncate.rb +48 -0
  651. data/lib/bioroebe/toplevel_methods/url.rb +36 -0
  652. data/lib/bioroebe/toplevel_methods/verbose.rb +59 -0
  653. data/lib/bioroebe/utility_scripts/align_open_reading_frames.rb +191 -0
  654. data/lib/bioroebe/utility_scripts/analyse_local_dataset.rb +119 -0
  655. data/lib/bioroebe/utility_scripts/check_for_mismatches/check_for_mismatches.rb +230 -0
  656. data/lib/bioroebe/utility_scripts/compacter.rb +131 -0
  657. data/lib/bioroebe/utility_scripts/compseq/compseq.rb +529 -0
  658. data/lib/bioroebe/utility_scripts/consensus_sequence.rb +374 -0
  659. data/lib/bioroebe/utility_scripts/create_batch_entrez_file.rb +130 -0
  660. data/lib/bioroebe/utility_scripts/determine_antigenic_areas.rb +115 -0
  661. data/lib/bioroebe/utility_scripts/determine_missing_nucleotides_percentage.rb +137 -0
  662. data/lib/bioroebe/utility_scripts/display_open_reading_frames/determine.rb +73 -0
  663. data/lib/bioroebe/utility_scripts/display_open_reading_frames/display_open_reading_frames.rb +31 -0
  664. data/lib/bioroebe/utility_scripts/display_open_reading_frames/initialize.rb +37 -0
  665. data/lib/bioroebe/utility_scripts/display_open_reading_frames/menu.rb +49 -0
  666. data/lib/bioroebe/utility_scripts/display_open_reading_frames/misc.rb +471 -0
  667. data/lib/bioroebe/utility_scripts/display_open_reading_frames/report.rb +113 -0
  668. data/lib/bioroebe/utility_scripts/display_open_reading_frames/reset.rb +56 -0
  669. data/lib/bioroebe/utility_scripts/dot_alignment.rb +177 -0
  670. data/lib/bioroebe/utility_scripts/download_files_from_rebase.rb +72 -0
  671. data/lib/bioroebe/utility_scripts/find_gene.rb +202 -0
  672. data/lib/bioroebe/utility_scripts/mirror_repeat.rb +235 -0
  673. data/lib/bioroebe/utility_scripts/move_file_to_its_correct_location.rb +151 -0
  674. data/lib/bioroebe/utility_scripts/parse_taxonomy.rb +168 -0
  675. data/lib/bioroebe/utility_scripts/pathways.rb +152 -0
  676. data/lib/bioroebe/utility_scripts/permutations.rb +145 -0
  677. data/lib/bioroebe/utility_scripts/punnet/punnet.rb +126 -0
  678. data/lib/bioroebe/utility_scripts/show_this_dna_sequence.rb +148 -0
  679. data/lib/bioroebe/utility_scripts/showorf/constants.rb +36 -0
  680. data/lib/bioroebe/utility_scripts/showorf/help.rb +33 -0
  681. data/lib/bioroebe/utility_scripts/showorf/initialize.rb +52 -0
  682. data/lib/bioroebe/utility_scripts/showorf/menu.rb +68 -0
  683. data/lib/bioroebe/utility_scripts/showorf/reset.rb +39 -0
  684. data/lib/bioroebe/utility_scripts/showorf/run.rb +152 -0
  685. data/lib/bioroebe/utility_scripts/showorf/show.rb +97 -0
  686. data/lib/bioroebe/utility_scripts/showorf/showorf.rb +488 -0
  687. data/lib/bioroebe/version/version.rb +44 -0
  688. data/lib/bioroebe/viennarna/README.md +3 -0
  689. data/lib/bioroebe/viennarna/rnafold_wrapper.rb +196 -0
  690. data/lib/bioroebe/with_gui.rb +18 -0
  691. data/lib/bioroebe/www/bioroebe.cgi +44 -0
  692. data/lib/bioroebe/www/embeddable_interface.rb +686 -0
  693. data/lib/bioroebe/www/sinatra/sinatra.rb +1013 -0
  694. data/lib/bioroebe/yaml/agarose/agarose_concentrations.yml +21 -0
  695. data/lib/bioroebe/yaml/aminoacids/amino_acids.yml +92 -0
  696. data/lib/bioroebe/yaml/aminoacids/amino_acids_abbreviations.yml +31 -0
  697. data/lib/bioroebe/yaml/aminoacids/amino_acids_average_mass_table.yml +33 -0
  698. data/lib/bioroebe/yaml/aminoacids/amino_acids_classification.yml +18 -0
  699. data/lib/bioroebe/yaml/aminoacids/amino_acids_english.yml +26 -0
  700. data/lib/bioroebe/yaml/aminoacids/amino_acids_frequency.yml +44 -0
  701. data/lib/bioroebe/yaml/aminoacids/amino_acids_long_name_to_one_letter.yml +61 -0
  702. data/lib/bioroebe/yaml/aminoacids/amino_acids_molecular_formula.yml +32 -0
  703. data/lib/bioroebe/yaml/aminoacids/amino_acids_monoisotopic_mass_table.yml +38 -0
  704. data/lib/bioroebe/yaml/aminoacids/amino_acids_reste.yml +35 -0
  705. data/lib/bioroebe/yaml/aminoacids/amino_acids_three_to_one.yml +34 -0
  706. data/lib/bioroebe/yaml/aminoacids/hydropathy_table.yml +44 -0
  707. data/lib/bioroebe/yaml/aminoacids/molecular_weight.yml +29 -0
  708. data/lib/bioroebe/yaml/aminoacids/simple_aminoacids.yml +66 -0
  709. data/lib/bioroebe/yaml/aminoacids/weight_of_common_proteins.yml +33 -0
  710. data/lib/bioroebe/yaml/antisense/antisense.yml +9 -0
  711. data/lib/bioroebe/yaml/base_composition_of_dna.yml +37 -0
  712. data/lib/bioroebe/yaml/blosum/blosum45.yml +36 -0
  713. data/lib/bioroebe/yaml/blosum/blosum50.yml +34 -0
  714. data/lib/bioroebe/yaml/blosum/blosum62.yml +35 -0
  715. data/lib/bioroebe/yaml/blosum/blosum80.yml +37 -0
  716. data/lib/bioroebe/yaml/blosum/blosum90.yml +36 -0
  717. data/lib/bioroebe/yaml/blosum/blosum_matrix.yml +200 -0
  718. data/lib/bioroebe/yaml/chromosomes/chromosome_numbers.yml +30 -0
  719. data/lib/bioroebe/yaml/codon_tables/1.yml +113 -0
  720. data/lib/bioroebe/yaml/codon_tables/10.yml +89 -0
  721. data/lib/bioroebe/yaml/codon_tables/11.yml +91 -0
  722. data/lib/bioroebe/yaml/codon_tables/12.yml +89 -0
  723. data/lib/bioroebe/yaml/codon_tables/13.yml +89 -0
  724. data/lib/bioroebe/yaml/codon_tables/14.yml +89 -0
  725. data/lib/bioroebe/yaml/codon_tables/15.yml +94 -0
  726. data/lib/bioroebe/yaml/codon_tables/16.yml +89 -0
  727. data/lib/bioroebe/yaml/codon_tables/2.yml +86 -0
  728. data/lib/bioroebe/yaml/codon_tables/21.yml +89 -0
  729. data/lib/bioroebe/yaml/codon_tables/22.yml +89 -0
  730. data/lib/bioroebe/yaml/codon_tables/23.yml +91 -0
  731. data/lib/bioroebe/yaml/codon_tables/24.yml +89 -0
  732. data/lib/bioroebe/yaml/codon_tables/25.yml +89 -0
  733. data/lib/bioroebe/yaml/codon_tables/26.yml +96 -0
  734. data/lib/bioroebe/yaml/codon_tables/27.yml +104 -0
  735. data/lib/bioroebe/yaml/codon_tables/28.yml +97 -0
  736. data/lib/bioroebe/yaml/codon_tables/29.yml +93 -0
  737. data/lib/bioroebe/yaml/codon_tables/3.yml +98 -0
  738. data/lib/bioroebe/yaml/codon_tables/30.yml +91 -0
  739. data/lib/bioroebe/yaml/codon_tables/31.yml +94 -0
  740. data/lib/bioroebe/yaml/codon_tables/33.yml +101 -0
  741. data/lib/bioroebe/yaml/codon_tables/4.yml +96 -0
  742. data/lib/bioroebe/yaml/codon_tables/5.yml +100 -0
  743. data/lib/bioroebe/yaml/codon_tables/6.yml +96 -0
  744. data/lib/bioroebe/yaml/codon_tables/9.yml +97 -0
  745. data/lib/bioroebe/yaml/codon_tables/overview.yml +42 -0
  746. data/lib/bioroebe/yaml/configuration/README.md +12 -0
  747. data/lib/bioroebe/yaml/configuration/browser.yml +1 -0
  748. data/lib/bioroebe/yaml/configuration/colourize_fasta_sequences.yml +14 -0
  749. data/lib/bioroebe/yaml/configuration/default_colours_for_the_aminoacids.yml +28 -0
  750. data/lib/bioroebe/yaml/configuration/temp_dir.yml +1 -0
  751. data/lib/bioroebe/yaml/configuration/try_to_use_matplotlib.yml +1 -0
  752. data/lib/bioroebe/yaml/configuration/use_opn.yml +1 -0
  753. data/lib/bioroebe/yaml/configuration/use_this_database.yml +1 -0
  754. data/lib/bioroebe/yaml/create_these_directories_on_startup/create_these_directories_on_startup.yml +9 -0
  755. data/lib/bioroebe/yaml/default_dna_input.yml +3 -0
  756. data/lib/bioroebe/yaml/enzymes/enzyme_classes.yml +15 -0
  757. data/lib/bioroebe/yaml/enzymes/pH-Optima.yml +11 -0
  758. data/lib/bioroebe/yaml/fasta_and_fastq/fastq_quality_schemes.yml +44 -0
  759. data/lib/bioroebe/yaml/genomes/README.md +16 -0
  760. data/lib/bioroebe/yaml/humans/README.md +2 -0
  761. data/lib/bioroebe/yaml/humans/human_chromosomes.yml +53 -0
  762. data/lib/bioroebe/yaml/laboratory/README.md +1 -0
  763. data/lib/bioroebe/yaml/laboratory/pipettes.yml +8 -0
  764. data/lib/bioroebe/yaml/mRNA/mRNA.yml +16 -0
  765. data/lib/bioroebe/yaml/nuclear_localization_sequences.yml +15 -0
  766. data/lib/bioroebe/yaml/nucleotides/abbreviations_for_nucleotides.yml +29 -0
  767. data/lib/bioroebe/yaml/nucleotides/nucleotide_density.yml +10 -0
  768. data/lib/bioroebe/yaml/nucleotides/nucleotides.yml +34 -0
  769. data/lib/bioroebe/yaml/nucleotides/nucleotides_weight.yml +12 -0
  770. data/lib/bioroebe/yaml/pathways/README.md +2 -0
  771. data/lib/bioroebe/yaml/pathways/citric_acid_cycle.yml +16 -0
  772. data/lib/bioroebe/yaml/pathways/glycolysis.yml +20 -0
  773. data/lib/bioroebe/yaml/pathways/shikimate_pathway.yml +23 -0
  774. data/lib/bioroebe/yaml/pathways/urea_cycle.yml +11 -0
  775. data/lib/bioroebe/yaml/primers/README.md +4 -0
  776. data/lib/bioroebe/yaml/primers/primers.yml +3 -0
  777. data/lib/bioroebe/yaml/promoters/35S.yml +15 -0
  778. data/lib/bioroebe/yaml/promoters/strong_promoters.yml +24 -0
  779. data/lib/bioroebe/yaml/proteases/proteases.yml +3 -0
  780. data/lib/bioroebe/yaml/proteins/ubiquitin.yml +4 -0
  781. data/lib/bioroebe/yaml/remote_urls/README.md +4 -0
  782. data/lib/bioroebe/yaml/remote_urls/remote_urls.yml +3 -0
  783. data/lib/bioroebe/yaml/restriction_enzymes/restriction_enzymes.yml +630 -0
  784. data/lib/bioroebe/yaml/sequences/JX472995_Green_fluorescent_protein_from_Aequorea_victoria.fasta +14 -0
  785. data/lib/bioroebe/yaml/sequences/README.md +2 -0
  786. data/lib/bioroebe/yaml/talens.yml +22 -0
  787. data/lib/bioroebe/yaml/viruses/ecoli_phages.yml +63 -0
  788. data/lib/bioroebe/yaml/viruses/viruses.yml +6 -0
  789. data/lib/bioroebe.rb +5 -0
  790. data/spec/testing_toplevel_method_editor.rb +20 -0
  791. data/spec/testing_toplevel_method_url.rb +15 -0
  792. data/spec/testing_toplevel_method_verbose.rb +13 -0
  793. data/test/advanced_svg_example.rb +307 -0
  794. data/test/testing_bioroebe.rb +25 -0
  795. data/test/testing_codons.rb +45 -0
  796. data/test/testing_dna_to_rna_conversion.rb +15 -0
  797. data/test/testing_parse_pdb_file.rb +23 -0
  798. data/test/testing_reverse_complement.rb +32 -0
  799. data/test/testing_svg_component_of_bioroebe.rb +311 -0
  800. data/test/testing_svg_component_of_bioroebe_from_json_dataset.rb +34 -0
  801. data/test/testing_taxonomy.rb +22 -0
  802. metadata +1059 -0
@@ -0,0 +1,1933 @@
1
+ #!/usr/bin/ruby -w
2
+ # Encoding: UTF-8
3
+ # frozen_string_literal: true
4
+ # =========================================================================== #
5
+ # === Bioroebe::Taxonomy::Interactive
6
+ #
7
+ # This bundles together various taxonomy-related code portions.
8
+ #
9
+ # This file can be used interactively. It provides the central
10
+ # point of entry for the taxonomy module.
11
+ # =========================================================================== #
12
+ require 'bioroebe/base/commandline_application/commandline_application.rb'
13
+
14
+ module Bioroebe
15
+
16
+ module Taxonomy
17
+
18
+ class Interactive < ::Bioroebe::CommandlineApplication # === Bioroebe::Taxonomy::Interactive
19
+
20
+ require 'bioroebe/taxonomy/shared.rb'
21
+ require 'bioroebe/taxonomy/class_methods.rb'
22
+ require 'bioroebe/taxonomy/info/check_available.rb'
23
+ require 'bioroebe/taxonomy/info/is_dna.rb'
24
+ require 'bioroebe/taxonomy/info/info.rb'
25
+ require 'bioroebe/taxonomy/help/help.rb'
26
+ require 'bioroebe/taxonomy/menu.rb'
27
+
28
+ begin
29
+ require 'readline' # Enable readline here, if available.
30
+ rescue LoadError; end
31
+
32
+ begin
33
+ require 'cliner' # Needed in this file here.
34
+ rescue LoadError; end
35
+
36
+ begin
37
+ require 'stat_file'
38
+ rescue LoadError; end
39
+
40
+ include Taxonomy # Add the main namespace here.
41
+ include Taxonomy::Shared
42
+ if Object.const_defined?(:Roebe) and
43
+ Roebe.const_defined?(:SqlParadise)
44
+ include ::Roebe::SqlParadise
45
+ end
46
+
47
+ # ========================================================================= #
48
+ # The SQL-relevant parts come next.
49
+ # ========================================================================= #
50
+ begin
51
+ require 'roebe/sql_paradise'
52
+ Roebe::SqlParadise::Commands.set_temp_dir ::Bioroebe.log_dir?
53
+ rescue LoadError; end
54
+
55
+ # ========================================================================= #
56
+ # === NAMESPACE
57
+ # ========================================================================= #
58
+ NAMESPACE = inspect
59
+
60
+ # ========================================================================= #
61
+ # === FASTA_SQL
62
+ # ========================================================================= #
63
+ FASTA_SQL = AUTOGENERATED_SQL_FILES_DIR+'fasta.sql'
64
+
65
+ # ========================================================================= #
66
+ # === BE_VERBOSE
67
+ #
68
+ # Constants for this class here.
69
+ # ========================================================================= #
70
+ BE_VERBOSE = true # This can be disabled via 'silent' from the commandline.
71
+
72
+ # ========================================================================= #
73
+ # === USE_COLOURS
74
+ #
75
+ # Whether to have colourized output or not.
76
+ # ========================================================================= #
77
+ USE_COLOURS = true # This can be disabled via 'nocolours'.
78
+
79
+ # ========================================================================= #
80
+ # === NO_HELP
81
+ # ========================================================================= #
82
+ NO_HELP = false # This can be disabled via 'nohelp'.
83
+
84
+ # ========================================================================= #
85
+ # === initialize
86
+ #
87
+ # The first argument are the commandline arguments.
88
+ # ========================================================================= #
89
+ def initialize(
90
+ optional_commandline_arguments = nil,
91
+ run_already = true
92
+ )
93
+ reset
94
+ set_commandline_arguments(
95
+ optional_commandline_arguments
96
+ )
97
+ run if run_already
98
+ end
99
+
100
+ # ========================================================================= #
101
+ # === reset
102
+ # ========================================================================= #
103
+ def reset
104
+ super()
105
+ # ======================================================================= #
106
+ # === @namespace
107
+ # ======================================================================= #
108
+ @namespace = NAMESPACE
109
+ # ======================================================================= #
110
+ # === @be_verbose
111
+ # ======================================================================= #
112
+ @be_verbose = BE_VERBOSE
113
+ # ======================================================================= #
114
+ # === @use_colours
115
+ # ======================================================================= #
116
+ @use_colours = USE_COLOURS
117
+ # ======================================================================= #
118
+ # === @no_help
119
+ # ======================================================================= #
120
+ @no_help = false
121
+ # ======================================================================= #
122
+ # === @run_standalone
123
+ # ======================================================================= #
124
+ @run_standalone = true
125
+ end
126
+
127
+ # ========================================================================= #
128
+ # === get (get tag)
129
+ #
130
+ # Get something from the localome table with this method.
131
+ # ========================================================================= #
132
+ def get(
133
+ i,
134
+ use_this_id = nil
135
+ )
136
+ i = i.to_s
137
+ _ = "SELECT #{i} from fasta".dup
138
+ _ << " where taxid='"+use_this_id.to_s+"';" if use_this_id
139
+ if i.include? 'taxid' # Sort by taxid in this case.
140
+ _ << ' ORDER BY taxid'
141
+ end
142
+ _ << ';'
143
+ result = run_sql(_, :silent, :tuples)
144
+ return result
145
+ end
146
+
147
+ # ========================================================================= #
148
+ # === taxtree
149
+ #
150
+ # Obtain the taxtree, then display it. Input to this should be the
151
+ # Taxonomy ID. We will first check the localome table, then query
152
+ # from the two ncbi tables.
153
+ #
154
+ # To test this method, try:
155
+ #
156
+ # taxtree 106583
157
+ # taxtree 77166
158
+ #
159
+ # ========================================================================= #
160
+ def taxtree(i)
161
+ i = i.to_s
162
+ e 'The lineage for Taxonomy ID '+simp(i)+' is as follows:'
163
+ if has? i # Ok, the localome table has this entry.
164
+ lineage_ids = get('lineage_ids', i).strip.split('->').map(&:strip)
165
+ else
166
+ lineage_ids = return_full_lineage_of(i)
167
+ lineage_ids.map! {|entry| entry[0]}
168
+ end
169
+ report_tree_lineage(lineage_ids)
170
+ end
171
+
172
+ # ========================================================================= #
173
+ # === report_tree_lineage
174
+ #
175
+ # This method expects an Array as input, which contains all the IDs
176
+ # that we will report in a tree (hence the name tree here).
177
+ # ========================================================================= #
178
+ def report_tree_lineage(i)
179
+ i.each_with_index {|entry, index|
180
+ padding = ' ' * (index+1)
181
+ e padding+entry.to_s
182
+ }
183
+ end
184
+
185
+ # ========================================================================= #
186
+ # === remove_taxid_from_localome_table
187
+ #
188
+ # This method will remove a TaxID from the localome table.
189
+ # ========================================================================= #
190
+ def remove_taxid_from_localome_table(i)
191
+ i = i.to_s
192
+ if has_id? i
193
+ # Ok we found an entry, thus we can remove it now:
194
+ run_sql "
195
+ DELETE FROM fasta
196
+ WHERE taxid='"+i+"';
197
+ "
198
+ if be_verbose?
199
+ e 'Removed entry '+simp(i)+
200
+ ' (Taxonomy ID) from the localome (fasta) table.'
201
+ end
202
+ else
203
+ e "Could not find TaxID #{simp(i)}." if be_verbose?
204
+ end
205
+ end
206
+
207
+ # ========================================================================= #
208
+ # === update_local_localomes
209
+ #
210
+ # Update the local collection. This will overwrite the old dataset
211
+ # completely, so be careful when using this.
212
+ # ========================================================================= #
213
+ def update_local_localomes
214
+ show_current_time
215
+ create_fasta_table # Get in a new, fresh table.
216
+ _ = get_all_info_entries_with_tax_id
217
+ show_current_time
218
+ _.each {|entry|
219
+ e 'Next working on '+sfile(entry)
220
+ create_sql_file_from_local_fasta_entry(entry)
221
+ }
222
+ report_how_many_info_files_exist_and_how_many_lack_taxonomy_id
223
+ old_time = @time
224
+ show_time_now # Show the end time.
225
+ new_time = @time
226
+ difference = Time.parse(new_time) - Time.parse(old_time)
227
+ report_how_long_it_took_us(difference, 'localome table')
228
+ end
229
+
230
+ # ========================================================================= #
231
+ # === try_to_display_the_status
232
+ # ========================================================================= #
233
+ def try_to_display_the_status(i = nil)
234
+ if i # if an argument was provided
235
+ StatFile.new(i) if Object.const_defined?(:StatFile)
236
+ else
237
+ Taxonomy.status?
238
+ Info.status
239
+ end
240
+ end
241
+
242
+ # ========================================================================= #
243
+ # === generate (generate tag)
244
+ #
245
+ # This method can be used to generate nodes or names .sql files.
246
+ # These .sql files will be generated in the TEMP_DIR, which
247
+ # at the time of writing this documentation is at
248
+ # /tmp/robert.
249
+ # ========================================================================= #
250
+ def generate(i)
251
+ i = i.to_s
252
+ case i # case tag
253
+ when 'nodes','names','nodes.sql','names.sql'
254
+ i.gsub!(/\.sql/,'')
255
+ menu(i) # Delegate towards the main menu here.
256
+ end
257
+ end; alias create generate # === create
258
+
259
+ # ========================================================================= #
260
+ # === show_all_eukarya
261
+ # ========================================================================= #
262
+ def show_all_eukarya(
263
+ optional_show_path = false
264
+ )
265
+
266
+ e 'We found these Eukaryota in the localomes Database:'+N+N
267
+ if optional_show_path.is_a? String
268
+ _ = select_name_and_tax_id_and_lineage_ids_and_path # 0,1,2,3
269
+ else
270
+ _ = select_name_and_tax_id_and_lineage_ids
271
+ end
272
+
273
+ n_entries = _.split(N).size
274
+ result = []
275
+ counter = 0
276
+ _.split(N).each_with_index {|entry|
277
+ splitted = entry.split('|')
278
+ lineage = splitted[2]
279
+ if lineage =~ / #{Eukaryota_Taxonomy_ID} /
280
+ scientific_name = splitted.first.strip
281
+ counter += 1
282
+ result = simp(counter.to_s)+') '+scientific_name+
283
+ ' ('+sfancy('TaxID')+': '+splitted[1].to_s.strip+')'
284
+ if optional_show_path.is_a? String
285
+ result << (' Path: '+splitted[3]).ljust(40) # Append the path.
286
+ end
287
+ e result
288
+ result << scientific_name
289
+ else # debug
290
+ end
291
+ }
292
+ e N+N+'Out of '+sfancy(n_entries.to_s)+' registered '\
293
+ 'entries in total in the localomes database,'
294
+ e sfancy(result.size.to_s)+' belong to Eukaryota (the Taxonomy '\
295
+ 'ID of Eukaryota is '+sfancy(Eukaryota_Taxonomy_ID.to_s)+').'
296
+ array = _.split(N).map {|entry| entry.split('|')[1].strip}
297
+ report_total_amount_of_proteomes(array)
298
+ end
299
+
300
+ # ========================================================================= #
301
+ # === query_from_localome_table
302
+ #
303
+ # This will display the result as well.
304
+ # ========================================================================= #
305
+ def query_from_localome_table(i)
306
+ e get(i)
307
+ end
308
+
309
+ # ========================================================================= #
310
+ # === disable_colours
311
+ # ========================================================================= #
312
+ def disable_colours
313
+ @use_colours = false
314
+ end
315
+
316
+ # ========================================================================= #
317
+ # === try_to_show_when_the_last_update_has_happened
318
+ # ========================================================================= #
319
+ def try_to_show_when_the_last_update_has_happened
320
+ e cat(SAVE_FILE) if File.exist? SAVE_FILE
321
+ end
322
+
323
+ # ========================================================================= #
324
+ # === query_whether_we_have_this_id(i)
325
+ #
326
+ # We query whether we have a specific ID or whether we don't.
327
+ # ========================================================================= #
328
+ def query_whether_we_have_this_id(i)
329
+ if has_id? i
330
+ e 'We do have the id '+simp(i.to_s)+'.'
331
+ else
332
+ e 'We do not have the id '+simp(i.to_s)+'.'
333
+ end
334
+ end
335
+
336
+ # ========================================================================= #
337
+ # === report_lineage_ids_and_lineage_scientific_name
338
+ # ========================================================================= #
339
+ def report_lineage_ids_and_lineage_scientific_name(i)
340
+ e 'The lineage of all parent ids for Tax ID '+sfancy(i)+' is: '+
341
+ @lineage_ids+', and in long form '+@lineage_scientific_name
342
+ end
343
+
344
+ # ========================================================================= #
345
+ # === drop_nodes_table
346
+ # ========================================================================= #
347
+ def drop_nodes_table(be_verbose = true)
348
+ run_sql POSTGRE_DROP_NODES_COMMAND, be_verbose
349
+ end
350
+
351
+ # ========================================================================= #
352
+ # === drop_names_table
353
+ # ========================================================================= #
354
+ def drop_names_table(be_verbose = true)
355
+ run_sql POSTGRE_DROP_NAMES_COMMAND, be_verbose
356
+ end
357
+
358
+ # ========================================================================= #
359
+ # === show_scientific_name_of
360
+ # ========================================================================= #
361
+ def show_scientific_name_of(i)
362
+ result = get_scientific_name_of(i).to_s
363
+ if result.empty?
364
+ e 'We tried to find a Tax ID but we got no result.'
365
+ e 'Are you able to connect to the postgresql-database?'
366
+ e 'Perhaps this Tax ID does not exist.'
367
+ else
368
+ e 'The scientific name of Tax ID '+lightblue(i.to_s)+
369
+ ' is: '+simp(result)
370
+ end
371
+ end
372
+
373
+ # ========================================================================= #
374
+ # === lupdate (update tag)
375
+ #
376
+ # This is the general way to update something. "update everything' is
377
+ # the most important component - it will update everything, including
378
+ # the localomes entry.
379
+ # ========================================================================= #
380
+ def lupdate(i = :ncbi)
381
+ i = i.to_s
382
+ case i
383
+ when 'all','everything','both'
384
+ update_ncbi_database
385
+ update_local_localomes
386
+ update_lineage
387
+ # ======================================================================= #
388
+ # === ncbi_database
389
+ # ======================================================================= #
390
+ when /^ncbi(-|_| )?database$/, 'ncbi','1',
391
+ 'update_ncbi'
392
+ update_ncbi_database
393
+ # ======================================================================= #
394
+ # === localomes
395
+ # ======================================================================= #
396
+ when 'localomes','local','loc','localome','2',
397
+ 'fasta','database','databases'
398
+ update_local_localomes # This will overwrite the old dataset completely, be careful.
399
+ # ======================================================================= #
400
+ # === lineage
401
+ # ======================================================================= #
402
+ when 'lineage','lin','li'
403
+ update_lineage
404
+ else
405
+ e 'Not sure what to update - the input was '+sfancy(i.to_s)
406
+ end
407
+ end
408
+
409
+ # ========================================================================= #
410
+ # === disable (disable tag)
411
+ # ========================================================================= #
412
+ def disable(i)
413
+ case i.to_sym
414
+ when :colours
415
+ disable_colours
416
+ end
417
+ end
418
+
419
+ # ========================================================================= #
420
+ # === query_localome_including_path
421
+ #
422
+ # We also show the path here.
423
+ # ========================================================================= #
424
+ def query_localome_including_path
425
+ e 'These organisms are available locally:'
426
+ result = run_query(
427
+ 'select name,taxid,path from fasta ORDER BY taxid;',
428
+ false, :tuples)
429
+ splitted = result.split(N)
430
+ splitted.each_with_index {|entry, index|
431
+ index += 1
432
+ inner_splitted = entry.split('|')
433
+ name = inner_splitted[0].strip.ljust(40)
434
+ path = 'the local path is at '+N+' '+sfancy(inner_splitted[2])
435
+ e ' ('+index.to_s+') '+name+' -> TaxID: '+
436
+ simp(inner_splitted[1].strip)+', '+path
437
+ }
438
+ report_n_registered_organisms_in_localome(splitted.size)
439
+ end; alias localome? query_localome_including_path # === localome?
440
+
441
+ # ========================================================================= #
442
+ # === report_n_registered_organisms_in_localome
443
+ # ========================================================================= #
444
+ def report_n_registered_organisms_in_localome(i)
445
+ e 'We have a total of '+sfancy(i.to_s)+' organisms '+
446
+ 'registered in the localome (== fasta) table.'
447
+ end
448
+
449
+ # ========================================================================= #
450
+ # === drop_nodes_and_names_database_tables
451
+ # ========================================================================= #
452
+ def drop_nodes_and_names_database_tables(be_verbose = true)
453
+ drop_nodes_table(be_verbose)
454
+ drop_names_table(be_verbose)
455
+ end
456
+
457
+ # ========================================================================= #
458
+ # === query_localome
459
+ #
460
+ # Query the localome table.
461
+ # ========================================================================= #
462
+ def query_localome
463
+ e 'These organisms are available locally:'
464
+ result = run_query(
465
+ 'select name,taxid from fasta ORDER BY taxid;',
466
+ false, :tuples)
467
+ splitted = result.split(N)
468
+ splitted.each_with_index {|entry, index|
469
+ index += 1
470
+ inner_splitted = entry.split('|')
471
+ name = inner_splitted[0].strip.ljust(30)
472
+ e ' ('+index.to_s+') '+name+' -> TaxID: '+
473
+ simp(inner_splitted[1].strip)
474
+ }
475
+ report_n_registered_organisms_in_localome(splitted.size)
476
+ end
477
+
478
+ # ========================================================================= #
479
+ # === update_ncbi_database
480
+ #
481
+ # We will update the NCBI Taxonomy database with this method. This
482
+ # includes a download, extracting it, generating the .sql files,
483
+ # and then populating the postgresql database.
484
+ # ========================================================================= #
485
+ def update_ncbi_database
486
+ # show_time_now # Show the start time. No longer needed as the next call does that.
487
+ download :ncbi # Defined in this file here.
488
+ if be_verbose?
489
+ e 'We will update the local postgre NCBI Table '\
490
+ 'now (this may take about one hour in total):'
491
+ end
492
+ # This will generate names.sql and nodes.sql.
493
+ menu 'names'
494
+ menu 'nodes'
495
+ # Next, connect to the postgre database and read in the auto-generated dumps:
496
+ if be_verbose?
497
+ e 'We will next load this dataset into the Database.'
498
+ e 'First, dropping the old table entries in the Postgresql database '
499
+ e 'via the DROP TABLE command:'
500
+ end
501
+ drop_nodes_and_names_database_tables
502
+ nodes_size = File.size(NODES_SQL).to_s
503
+ names_size = File.size(NAMES_SQL).to_s
504
+ e 'Next we will populate the two tables with '+sfancy('names.sql')+
505
+ ' (Filesize: '+names_size+') and '+sfancy('nodes.sql')+
506
+ ' (Filesize: '+nodes_size+') file.'
507
+ e 'This may take a while, possibly about an hour, so '\
508
+ 'please remain patient.'
509
+ read_in_names_and_nodes_sql_files
510
+ e 'Nota bene: the above commands can only work if psql '\
511
+ 'is in your $PATH.'
512
+ e 'If it did not work, please check and see first '\
513
+ 'whether your $PATH variable is proper.'
514
+ e N+'We will display the content of the $PATH variable '\
515
+ 'now: '+sfancy(ENV['PATH'])
516
+ old_time = @time
517
+ show_time_now # Show the end time.
518
+ new_time = @time
519
+ difference = Time.parse(new_time) - Time.parse(old_time)
520
+ report_how_long_it_took_us(difference)
521
+ end; alias update_database update_ncbi_database
522
+
523
+ # ========================================================================= #
524
+ # === report_lineage_of
525
+ # ========================================================================= #
526
+ def report_lineage_of(f)
527
+ obtain_full_lineage_for(f)
528
+ report_lineage_ids_and_lineage_scientific_name(f)
529
+ e
530
+ generate_html_links_for( @lineage_ids.split(' -> ') )
531
+ end
532
+
533
+ # ========================================================================= #
534
+ # === report_how_long_it_took_us
535
+ #
536
+ # The input to this method should be the number of seconds, i.e.
537
+ # 60 seconds.
538
+ # ========================================================================= #
539
+ def report_how_long_it_took_us(
540
+ i = 0,
541
+ title_of_table = 'NCBI-based taxonomy table'
542
+ )
543
+ n_minutes = (i.to_f / 60.0).to_s
544
+ if n_minutes.size > 4
545
+ n_minutes = n_minutes[0,4]
546
+ end
547
+ e 'Updating the '+title_of_table+' took us '+
548
+ sfancy(i.to_s)+' seconds (= '+n_minutes.to_s+' minutes).'
549
+ end
550
+
551
+ # ========================================================================= #
552
+ # === read_sql
553
+ #
554
+ # Read in sql into the Postgre Database.
555
+ # ========================================================================= #
556
+ def read_sql(i = :fasta)
557
+ set_pgpassword
558
+ case i
559
+ # ======================================================================= #
560
+ # === :fasta
561
+ # ======================================================================= #
562
+ when :fasta
563
+ if at_home? # On my home system.
564
+ cmd = POSTGRE_LOGIN_COMMAND_HOME
565
+ else
566
+ cmd = POSTGRE_LOGIN_COMMAND
567
+ end
568
+ cmd << ' -f '+FASTA_SQL
569
+ e 'Next, we will read in from '+FASTA_SQL if be_verbose?
570
+ esystem cmd
571
+ e 'Done reading in the dataset!' if be_verbose?
572
+ end
573
+ end
574
+
575
+ # ========================================================================= #
576
+ # === search_in_localomes
577
+ #
578
+ # This will search in localomes.
579
+ # ========================================================================= #
580
+ def search_in_localomes(i = 'Blastocystis hominis')
581
+ if be_verbose?
582
+ e 'We will now try to search the fasta table for `'+sfancy(i)+'`.'
583
+ end
584
+ _ = "select name,modification_time FROM fasta WHERE name LIKE '%"+i+"%' LIMIT 3;"
585
+ result = run_sql_query(_, true)
586
+ return result
587
+ end
588
+
589
+ # ========================================================================= #
590
+ # === open_project_files (open tag)
591
+ #
592
+ # This method will open the various project files in the editor.
593
+ # ========================================================================= #
594
+ def open_project_files
595
+ ARRAY_PROJECT_FILES.each {|entry|
596
+ _ = (editor?+' '+PROJECT_BASE_DIR+'lib/taxonomy/'+entry.to_s).squeeze '/'
597
+ esystem _
598
+ }
599
+ end
600
+
601
+ # ========================================================================= #
602
+ # === do_run_connected
603
+ # ========================================================================= #
604
+ def do_run_connected
605
+ @run_standalone = false
606
+ end; alias run_connected do_run_connected # === run_connected
607
+
608
+ # ========================================================================= #
609
+ # === use_colours?
610
+ # ========================================================================= #
611
+ def use_colours?
612
+ @use_colours
613
+ end
614
+
615
+ # ========================================================================= #
616
+ # === create_default_directories
617
+ # ========================================================================= #
618
+ def create_default_directories
619
+ e 'Creating some directories now.'
620
+ array_create_these_directores = %w(
621
+ /data/ncbi/taxonomy/
622
+ /tmp/robert/autogenerated_sql_files/
623
+ /data/curated/sequences/localome/
624
+ /data/curated/sequences/aa/
625
+ /data/curated/sequences/nt/
626
+ /data/curated/sequences/INFO/
627
+ /data/curated/sequences/localome/incoming/
628
+ ); pp array_create_these_directores
629
+ mkdir(array)
630
+ end
631
+
632
+ # ========================================================================= #
633
+ # === user_input?
634
+ # ========================================================================= #
635
+ def user_input?
636
+ @user_input
637
+ end
638
+
639
+ # ========================================================================= #
640
+ # === create_dirs
641
+ # ========================================================================= #
642
+ def create_dirs(be_verbose = false)
643
+ case be_verbose
644
+ when :be_verbose
645
+ be_verbose = true
646
+ end
647
+ if be_verbose
648
+ opnn; e 'We will create the temp directory and the download directory next.'
649
+ end
650
+ ensure_that_temp_dir_exists
651
+ ensure_that_download_dir_exists
652
+ end
653
+
654
+ # ========================================================================= #
655
+ # === try_to_show_dependencies
656
+ #
657
+ # Show the dependencies of the Taxonomy Module. This will tell us which
658
+ # dependencies we have to satisfy.
659
+ # ========================================================================= #
660
+ def try_to_show_dependencies
661
+ _ = return_dependencies
662
+ if File.exist? _
663
+ e 'The Taxonomy Module depends on these Ruby Gems:'+N+N
664
+ cat _
665
+ else
666
+ e 'We could not find a file at position `'+sfile(_)+'`.'
667
+ end
668
+ end
669
+
670
+ # ========================================================================= #
671
+ # === show_login_information
672
+ # ========================================================================= #
673
+ def show_login_information
674
+ e 'The login information is stored in the file '
675
+ e
676
+ e ' '+sfile(PROJECT_BASE_DIR2+'databases/postgresql_login_command.rb')
677
+ e
678
+ show_port # Show the port as well here.
679
+ end
680
+
681
+ # ========================================================================= #
682
+ # === return_shared_code
683
+ # ========================================================================= #
684
+ def return_shared_code
685
+ return PROJECT_BASE_DIR2+'shared/shared'
686
+ end
687
+
688
+ # ========================================================================= #
689
+ # === return_gemspec_file
690
+ #
691
+ # We return the main taxonomy.gemspec file here.
692
+ # ========================================================================= #
693
+ def return_gemspec_file
694
+ return PROJECT_BASE_DIR+'taxonomy.gemspec'
695
+ end
696
+
697
+ # ========================================================================= #
698
+ # === return_login_file
699
+ # ========================================================================= #
700
+ def return_login_file
701
+ return PROJECT_BASE_DIR2+'postgresql_login_command.rb'
702
+ end
703
+
704
+ # ========================================================================= #
705
+ # === show_nodes_table
706
+ # ========================================================================= #
707
+ def show_nodes_table
708
+ e 'We use these values for the nodes table:'
709
+ e
710
+ efancy ' taxid'
711
+ efancy ' parent_taxid'
712
+ efancy ' rank'
713
+ e
714
+ e 'We will also try to show a random selection of 10 entries from '\
715
+ 'there now:'
716
+ run_sql 'SELECT taxid,parent_taxid,rank FROM nodes
717
+ ORDER BY RANDOM(), taxid LIMIT 10'
718
+ end
719
+
720
+
721
+ # ========================================================================= #
722
+ # === show_port
723
+ #
724
+ # Use this method to show the port.
725
+ # ========================================================================= #
726
+ def show_port
727
+ e "The port we will use is: #{simp('UNKNOWN')}"
728
+ end
729
+
730
+ # ========================================================================= #
731
+ # === update_lineage
732
+ #
733
+ # This will update only the lineage part of the localome database, which
734
+ # means the lineageIDs and the lineage scientific names.
735
+ # In order for this to work, we need to obtain the TaxID of the
736
+ # specific organism.
737
+ # ========================================================================= #
738
+ def update_lineage
739
+ e 'We will now obtain all TaxIDs in the localome table.' if be_verbose?
740
+ obtain_taxids = run_sql('select taxid from fasta;', false, :tuples).split(N).
741
+ map(&:strip).map(&:to_i).sort
742
+ # Ignore all entries that are 0.
743
+ obtain_taxids.reject! {|entry|
744
+ entry.to_i == 0
745
+ }
746
+ # We will have to update the following taxids, which is the collection
747
+ # of all TaxIDs available in the table:
748
+ obtain_taxids.each {|taxid|
749
+ e 'Now updating entry with the TaxID: '+sfancy(taxid.to_s)
750
+ get_lineage_ids_and_lineage_scientific_name(taxid)
751
+ report_lineage_ids_and_lineage_scientific_name(taxid)
752
+ if has_id?(taxid)
753
+ # We pad the two entries with proper quotes.
754
+ lineage_id = pad_with_single_quotes(@lineage_ids)
755
+ lineage_scientific_name = pad_with_single_quotes(@lineage_scientific_name)
756
+ _ = "UPDATE fasta SET lineage_ids="+lineage_id+", lineage_scientific_name="+lineage_scientific_name+" WHERE taxid='"+taxid.to_s+"'"
757
+ run_sql_query(_)
758
+ e 'Updated entry.' if be_verbose?
759
+ else
760
+ e 'We did not find the TaxID: '+simp(taxid.to_s)
761
+ end
762
+ }
763
+ e 'Finished updating lineage.' if be_verbose?
764
+ end
765
+
766
+ # ========================================================================= #
767
+ # === make_taxonomy_gem
768
+ #
769
+ # This is the code that creates the taxonomy .gem.
770
+ # ========================================================================= #
771
+ def make_taxonomy_gem
772
+ unless Dir[TAXONOMY_HOME_DIR+'*.gemspec'].empty? # Do we have a .gemspec file there?
773
+ cd TAXONOMY_HOME_DIR
774
+ end
775
+ e
776
+ e 'We will now attempt to build the taxonomy .gem (from '+
777
+ 'directory '+(Dir.pwd+'/').squeeze('/')+')'
778
+ e
779
+ if File.exist? 'taxonomy.gemspec'
780
+ esystem 'gem build taxonomy.gemspec', :use_colours
781
+ e
782
+ e 'Done. There should now be a .gem file here.'
783
+ location = Dir['*.gem']
784
+ if location
785
+ e 'We assume the full file path to be at:'
786
+ e ' '+sfile(location.first)
787
+ cp(location.first, TEMP_DIR)
788
+ e 'All done - the .gem should be ready now.'
789
+ end
790
+ else
791
+ e 'Could not find a file called taxonomy.gemspec, thus aborting now.'
792
+ end
793
+ end
794
+
795
+ # ========================================================================= #
796
+ # === silently_update_ncbi_database
797
+ #
798
+ # This method is the one that can be used to silently update the NCBI table
799
+ # via a cron job.
800
+ # ========================================================================= #
801
+ def silently_update_ncbi_database
802
+ download :ncbi, :be_silent
803
+ menu 'names'
804
+ menu 'nodes'
805
+ drop_nodes_and_names_database_tables(:be_silent)
806
+ read_in_names_and_nodes_sql_files(:be_silent)
807
+ end
808
+
809
+ # ========================================================================= #
810
+ # === select_name_and_lineage_ids
811
+ # ========================================================================= #
812
+ def select_name_and_lineage_ids
813
+ run_query('select name,lineage_ids from fasta;', false, :tuples)
814
+ end
815
+
816
+ # ========================================================================= #
817
+ # === select_name_and_tax_id_and_lineage_ids
818
+ # ========================================================================= #
819
+ def select_name_and_tax_id_and_lineage_ids
820
+ run_query('select name,taxid,lineage_ids from fasta;', false, :tuples)
821
+ end
822
+
823
+ # ========================================================================= #
824
+ # === select_name_and_tax_id_and_lineage_ids_and_path
825
+ # ========================================================================= #
826
+ def select_name_and_tax_id_and_lineage_ids_and_path
827
+ run_query('select name,taxid,lineage_ids,path from fasta;', false, :tuples)
828
+ end
829
+
830
+ # ========================================================================= #
831
+ # === report_total_amount_of_proteomes
832
+ #
833
+ # The input should be an array of Taxonomy IDs.
834
+ # ========================================================================= #
835
+ def report_total_amount_of_proteomes(i)
836
+ if i.is_a? Array
837
+ n_entries = 0
838
+ i.each {|entry|
839
+ n_entries += run_query("select n_accession_numbers from fasta WHERE taxid='"+entry+"';",
840
+ false,
841
+ :tuples).strip.to_i
842
+ }
843
+ e 'We have found a total of '+sfancy(n_entries.to_s)+' accession '+
844
+ 'numbers from the above dataset.'
845
+ end
846
+ end
847
+
848
+ # ========================================================================= #
849
+ # === search_in_database_for_name
850
+ #
851
+ # Use this method to search in a database for a name.
852
+ #
853
+ # This is something such as the following SQL query:
854
+ # select taxid FROM names WHERE name_txt LIKE '%Zygosaccharomyces rouxii%' limit 30;
855
+ #
856
+ # It will return the TaxID of the organism.
857
+ # ========================================================================= #
858
+ def search_in_database_for_name(i = 'Zygosaccharomyces rouxii')
859
+ e 'We will now try to search the names table for '+sfancy(i)+'.'
860
+ _ = "select taxid,name_txt FROM names WHERE name_txt LIKE '%"+i+"%' LIMIT 3;"
861
+ result = run_sql_query(_, false, :tuples)
862
+ return result
863
+ end
864
+
865
+ # ========================================================================= #
866
+ # === set_path
867
+ #
868
+ # Use this method to update the path to a local fasta entry, inside the
869
+ # localome entry. In order for this to work, the ID must exist.
870
+ #
871
+ # Complete usage example:
872
+ #
873
+ # spath 1257118 /resources/seqdata/curated/sequences/localome/proteomes/Acanthamoeba_castellanii_Neff_pep.fa
874
+ #
875
+ # ========================================================================= #
876
+ def set_path(i)
877
+ if i.include? ' '
878
+ splitted = i.split(' ')
879
+ taxid_to_update = splitted[0].to_s
880
+ new_path = splitted[1].to_s
881
+ if has_id? taxid_to_update # If localomes has this ID, we continue here:
882
+ e 'Now updating path for TaxID '+sfancy(taxid_to_update)+'.'
883
+ run_sql "UPDATE fasta SET path='"+new_path+"' WHERE taxid='"+taxid_to_update+"'"
884
+ else
885
+ e 'We could not find a TaxID '+sfancy(taxid_to_update)+
886
+ ' in the localome table. Thus can not update the path.'
887
+ end
888
+ else
889
+ if File.exist? i
890
+ data = File.readlines(i).map(&:chomp).reject {|entry| entry.empty? }
891
+ data.each {|entry|
892
+ entry = entry.strip
893
+ if entry.include? '|'
894
+ splitted = entry.split('|')
895
+ else # else assume \t
896
+ splitted = entry.split("\t")
897
+ end
898
+ set_path(splitted.first+' '+splitted[1])
899
+ }
900
+ e 'Note: if you wish to see the modified dataset, input: taxid,path'
901
+ else
902
+ e 'Format was not correct. Please either provide a file as argument,'
903
+ e 'or use input such as this here:'
904
+ e
905
+ e ' spath 1257118 /resources/seqdata/curated/sequences/localome/proteomes/Acanthamoeba_castellanii_Neff_pep.fa'
906
+ e
907
+ end
908
+ end
909
+ end
910
+
911
+ # ========================================================================= #
912
+ # === nohelp
913
+ # ========================================================================= #
914
+ def nohelp
915
+ @no_help = true
916
+ end
917
+
918
+ # ========================================================================= #
919
+ # === show_short_help
920
+ # ========================================================================= #
921
+ def show_short_help
922
+ e '(Type "help" or "?" for help, or "nohelp" to disable '\
923
+ 'this notification here).' unless @no_help
924
+ end
925
+
926
+ # ========================================================================= #
927
+ # === show_postgres_size
928
+ #
929
+ # This method will show the size of the postgres database.
930
+ # ========================================================================= #
931
+ def show_postgres_size
932
+ e 'Next querying the size of the postgresql table:'
933
+ sql_query POSTGRESQL_QUERY_SIZE, true
934
+ # relpages are not too useful to look at, so we commented it out again.
935
+ # sql_query 'SELECT relname, relpages FROM pg_class ORDER BY relpages DESC LIMIT 5;'
936
+ e 'The total disk size for the robert_db is:'
937
+ sql_query "SELECT pg_size_pretty(pg_database_size('robert_db'));",true
938
+ e 'Size of '+sfancy(:names)+' is: '+
939
+ sql_query(
940
+ "SELECT pg_size_pretty(pg_total_relation_size('names'));",false, :tuples)
941
+ e 'Size of '+sfancy(:nodes)+' is: '+
942
+ sql_query(
943
+ "SELECT pg_size_pretty(pg_total_relation_size('nodes'));",false, :tuples)
944
+ e 'Size of '+sfancy(:fasta)+' is: '+
945
+ sql_query(
946
+ "SELECT pg_size_pretty(pg_total_relation_size('fasta'));",false, :tuples)
947
+ end
948
+
949
+ # ========================================================================= #
950
+ # === obtain_full_lineage_for
951
+ #
952
+ # The input to this method should be an existing TaxID.
953
+ # ========================================================================= #
954
+ def obtain_full_lineage_for(f)
955
+ result = return_full_lineage_of(f) # in taxonomy.rb
956
+ if result
957
+ @lineage_ids = ''.dup
958
+ @lineage_scientific_name = ''.dup
959
+ hash = Hash[result]
960
+ hash.each {|taxid, scientific_name|
961
+ @lineage_ids << taxid.to_s+' -> '
962
+ @lineage_scientific_name << scientific_name.chomp+' (Tax id: '+taxid.to_s+'), '
963
+ }
964
+ @lineage_scientific_name = @lineage_scientific_name.strip
965
+ # Next, get rid of trailing , characters.
966
+ @lineage_scientific_name.chop! if @lineage_scientific_name.end_with? ','
967
+ @lineage_ids.strip!
968
+ if @lineage_ids.end_with? '->'
969
+ @lineage_ids[-2,2] = ''.dup
970
+ @lineage_ids.strip!
971
+ end
972
+ @lineage_scientific_name.chomp!
973
+ end
974
+ return result
975
+ end; alias get_lineage_ids_and_lineage_scientific_name obtain_full_lineage_for
976
+
977
+ # ========================================================================= #
978
+ # === drop_table (drop tag)
979
+ #
980
+ # Use this method when you wish to drop a table.
981
+ # ========================================================================= #
982
+ def drop_table(i = DROP_FASTA_TABLE)
983
+ case i.to_s
984
+ when 'nodes'
985
+ when 'fasta',
986
+ 'localomes',
987
+ 'localome'
988
+ i = DROP_FASTA_TABLE
989
+ end
990
+ ::Bioroebe.run_sql_query(i)
991
+ end
992
+
993
+ # ========================================================================= #
994
+ # === query_localomes_by_modtime
995
+ # ========================================================================= #
996
+ def query_localomes_by_modtime
997
+ _ = 'SELECT name,modification_time FROM fasta ORDER BY modification_time;'
998
+ run_sql_query _
999
+ end
1000
+
1001
+ # ========================================================================= #
1002
+ # === query
1003
+ #
1004
+ # Formulate a query against the database.
1005
+ # ========================================================================= #
1006
+ def query(i)
1007
+ case i.to_s
1008
+ when 'localome','extended','main'
1009
+ query_localome_including_path
1010
+ when 'modtime'
1011
+ query_localomes_by_modtime
1012
+ when 'ncbi'
1013
+ e 'https://www.ncbi.nlm.nih.gov/taxonomy'
1014
+ end
1015
+ end
1016
+
1017
+ # ========================================================================= #
1018
+ # === get_all_info_entries_with_tax_id
1019
+ # ========================================================================= #
1020
+ def get_all_info_entries_with_tax_id
1021
+ result = []
1022
+ if be_verbose?
1023
+ e 'We will now attempt to find all .INFO files that do have a'
1024
+ e 'proper Taxonomy ID entry.'
1025
+ end
1026
+ Dir[INFO_DIR+'*.INFO'].each {|file|
1027
+ _ = Info.new(file, :be_verbose => false)
1028
+ if _.has_taxonomy_id?
1029
+ e sfile(file)+' has a Taxonomy ID.' if be_verbose?
1030
+ result << file
1031
+ end
1032
+ }
1033
+ return result
1034
+ end
1035
+
1036
+ # ========================================================================= #
1037
+ # === read_in_names_and_nodes_sql_files
1038
+ #
1039
+ # We use two commands for populating names.sql and nodes.sql tables.
1040
+ # ========================================================================= #
1041
+ def read_in_names_and_nodes_sql_files(be_verbose = true)
1042
+ be_verbose = false if be_verbose == :be_silent
1043
+
1044
+ if at_home? # We use another command at home.
1045
+ cmd = POSTGRE_LOGIN_COMMAND_HOME
1046
+ else
1047
+ cmd = POSTGRE_LOGIN_COMMAND
1048
+ end
1049
+
1050
+ if be_verbose
1051
+ esystem cmd+' -f '+SHARED_HOME+'names.sql'
1052
+ esystem cmd+' -f '+SHARED_HOME+'nodes.sql'
1053
+ else # Else just use system.
1054
+ system cmd+' -f '+SHARED_HOME+'names.sql'
1055
+ system cmd+' -f '+SHARED_HOME+'nodes.sql'
1056
+ end
1057
+ end
1058
+
1059
+ # ========================================================================= #
1060
+ # === be_silent
1061
+ # ========================================================================= #
1062
+ def be_silent
1063
+ if @be_verbose == false
1064
+ e 'We are already silent.'
1065
+ else
1066
+ @be_verbose = false
1067
+ e 'We will now be silent.'
1068
+ end
1069
+ end
1070
+
1071
+ # ========================================================================= #
1072
+ # === record_last_command
1073
+ #
1074
+ # Use this method to record the last command issued.
1075
+ #
1076
+ # The constant LAST_INTERACTIVE_COMMAND determines where we store this.
1077
+ # ========================================================================= #
1078
+ def record_last_command(
1079
+ consider_storing_this = '',
1080
+ optional = ''
1081
+ )
1082
+ what = consider_storing_this.to_s.dup # Work on a copy from this point onwards.
1083
+ what << '|'+optional.to_s unless optional.to_s.empty?
1084
+ unless what.empty?
1085
+ # ===================================================================== #
1086
+ # Before we can store into the file, we need to ensure that the
1087
+ # TEMP_DIR exists. We ensure this with the next check - if it
1088
+ # does not exist then we will create it.
1089
+ # ===================================================================== #
1090
+ unless Dir.exist? TEMP_DIR
1091
+ e 'We will now create the directory '+sdir(TEMP_DIR)+'.'
1092
+ ensure_that_temp_dir_exists
1093
+ end
1094
+ begin
1095
+ write_what_into(what, LAST_INTERACTIVE_COMMAND)
1096
+ rescue Exception => error
1097
+ e "A small error has happened in the method: "\
1098
+ "#{__method__}()"
1099
+ p error
1100
+ e "Could not write into `#{sfile(into)}`."
1101
+ end
1102
+ end unless what.include? 'last?' # Exception for "last?".
1103
+ end
1104
+
1105
+ # ========================================================================= #
1106
+ # === be_verbose?
1107
+ # ========================================================================= #
1108
+ def be_verbose?
1109
+ @be_verbose
1110
+ end
1111
+
1112
+ # ========================================================================= #
1113
+ # === repeat_last_command
1114
+ # ========================================================================= #
1115
+ def repeat_last_command
1116
+ if File.exist? LAST_INTERACTIVE_COMMAND
1117
+ show_last_command
1118
+ else
1119
+ e 'No file at '+sfile(LAST_INTERACTIVE_COMMAND)+' could be found.'
1120
+ end
1121
+ end
1122
+
1123
+ # ========================================================================= #
1124
+ # === be
1125
+ # ========================================================================= #
1126
+ def be(i)
1127
+ case i.to_sym
1128
+ when :verbose
1129
+ be_verbose
1130
+ when :silent
1131
+ be_silent
1132
+ end
1133
+ end
1134
+
1135
+ # ========================================================================= #
1136
+ # === show_table_names
1137
+ # ========================================================================= #
1138
+ def show_table_names
1139
+ e N+'The SQL table structure is as follows:'+N+N
1140
+ efancy 'names.sql'
1141
+ e ' (1) '+NAMES_ENTRIES+N+N
1142
+ efancy 'nodes.sql'
1143
+ e ' (2) '+NODES_ENTRIES+N+N
1144
+ efancy 'fasta.sql'
1145
+ e ' (3) '+FASTA_ENTRIES+N+N
1146
+ end
1147
+
1148
+ # ========================================================================= #
1149
+ # === be_verbose
1150
+ # ========================================================================= #
1151
+ def be_verbose
1152
+ if @be_verbose == true
1153
+ e 'We are already verbose.'
1154
+ else
1155
+ @be_verbose = true
1156
+ e 'We will now be verbose.'
1157
+ end
1158
+ end
1159
+
1160
+ # ========================================================================= #
1161
+ # === finished
1162
+ #
1163
+ # Here we simply report to the user that we have finished.
1164
+ # ========================================================================= #
1165
+ def finished(optional_extra_message = '')
1166
+ unless optional_extra_message.empty?
1167
+ optional_extra_message << '.' unless optional_extra_message.end_with? '.'
1168
+ end
1169
+ e 'Done! '+optional_extra_message
1170
+ end
1171
+
1172
+ # ========================================================================= #
1173
+ # === has_id?
1174
+ #
1175
+ # We ask the localome if an ID is registered.
1176
+ # run_sql is defined in the file shared.rb
1177
+ # ========================================================================= #
1178
+ def has_id?(i)
1179
+ i = i.to_s
1180
+ _ = "SELECT taxid from fasta where taxid='"+i+"'"
1181
+ if run_sql(_, false, :tuples).strip.empty?
1182
+ return false
1183
+ else
1184
+ return true
1185
+ end
1186
+ end; alias has? has_id? # === has?
1187
+
1188
+ # ========================================================================= #
1189
+ # === show_shared_code_location
1190
+ #
1191
+ # To invoke this method, do:
1192
+ # shared_code?
1193
+ # ========================================================================= #
1194
+ def show_shared_code_location
1195
+ e 'You can find the file shared.rb here:'
1196
+ _ = base_dir?
1197
+ e ' '+sfile(_+'/shared.rb')
1198
+ end
1199
+
1200
+ # ========================================================================= #
1201
+ # === show_important_directories
1202
+ #
1203
+ # Delegate towards Taxonomy.status? from here.
1204
+ # ========================================================================= #
1205
+ def show_important_directories
1206
+ Taxonomy.status?
1207
+ end
1208
+
1209
+ # ========================================================================= #
1210
+ # === first_argument?
1211
+ # ========================================================================= #
1212
+ def first_argument?
1213
+ @first_argument
1214
+ end; alias f? first_argument? # === f?
1215
+ alias f first_argument?
1216
+
1217
+ # ========================================================================= #
1218
+ # === show_lineage_from_localome_table
1219
+ #
1220
+ # The TaxID must exist before we can use this.
1221
+ #
1222
+ # Usage examples:
1223
+ #
1224
+ # show_lineage_from_localome_table 2762
1225
+ # slocalome 2762
1226
+ #
1227
+ # ========================================================================= #
1228
+ def show_lineage_from_localome_table(i)
1229
+ result = run_sql(
1230
+ "select lineage_ids from fasta WHERE taxid='"+i.to_s+"';", false, :tuples
1231
+ ).chomp.strip
1232
+ if result.empty?
1233
+ e 'This TaxID was not found in the localome table.'
1234
+ e 'To find out which entries exist in the localome table, do "localome?".'
1235
+ else
1236
+ e result
1237
+ end
1238
+ end
1239
+
1240
+ # ========================================================================= #
1241
+ # === report_how_many_info_files_exist_and_how_many_lack_taxonomy_id
1242
+ #
1243
+ # This will check on at least one directory.
1244
+ # ========================================================================= #
1245
+ def report_how_many_info_files_exist_and_how_many_lack_taxonomy_id
1246
+ _ = get_all_info_entries_with_tax_id
1247
+ e 'We will try to assess the various .INFO files from the '+
1248
+ sdir(info_dir?)+'.'
1249
+ n_info_files = Dir[INFO_DIR+'*.INFO'].size
1250
+ if n_info_files > 0
1251
+ e 'We did find '+sfancy(_.size)+' .INFO files with Taxonomy ID. '+
1252
+ 'That means that '+sfancy( Dir[INFO_DIR+'*.INFO'].size - _.size )+
1253
+ ' .INFO files do not have '
1254
+ e 'a Taxonomy ID. In total there are '+sfancy(n_info_files.to_s)+
1255
+ ' .INFO files there.'
1256
+ info_files_in_incoming_dir = Dir[INCOMING_DIR+'*.INFO']
1257
+ if info_files_in_incoming_dir.size > 0
1258
+ e 'We also found at least one .INFO file in '+sdir(INCOMING_DIR)+'.'
1259
+ e 'We will display them now:'
1260
+ pp info_files_in_incoming_dir
1261
+ end
1262
+ else
1263
+ e 'We found no .INFO file there.'
1264
+ end
1265
+ end
1266
+
1267
+ # ========================================================================= #
1268
+ # === load_from_info
1269
+ #
1270
+ # This method shall attempt to load from an .NFO file, right into the
1271
+ # localome/ directory at /data/curated/sequences/localome/incoming/.
1272
+ # ========================================================================= #
1273
+ def load_from_info(i)
1274
+ e 'We will now attempt to load an .INFO file.'
1275
+ info = Info.new(i)
1276
+ info.report_id
1277
+ taxid = info.taxid?
1278
+ e 'We will try to load a fasta file, if it is nearby.'
1279
+ _ = info.fasta?
1280
+ e _
1281
+ ParseFasta.new(_).report
1282
+ if has_id? taxid # If we already have an entry like that, we refuse.
1283
+ e 'We already have an entry with the Taxonomic ID taxid '+taxid+'.'
1284
+ e 'You would have to remove it first before you can load a new one.'
1285
+ e '(Removing is as simple as: remove_id TAXONOMIC_ID_GOES_HERE)'
1286
+ else
1287
+ e 'No entry was yet found in the localome table, thus we will '+
1288
+ 'attempt to add this dataset now.'
1289
+ create_sql_file_from_local_fasta_entry(i)
1290
+ e 'You can update the path in the Postgresql database via:'
1291
+ e ' set_path TAXONOMY_ID path_goes_here'
1292
+ end
1293
+ end
1294
+
1295
+ # ========================================================================= #
1296
+ # === show_type_of_all_info_files
1297
+ #
1298
+ # We will get all .INFO files and display the type.
1299
+ # ========================================================================= #
1300
+ def show_type_of_all_info_files(i = nil)
1301
+ if i
1302
+ info = Info.new(i, :silent)
1303
+ info.find_fasta
1304
+ if info.fasta?
1305
+ fasta = ParseFasta.new(info.corresponding_datafile)
1306
+ e sfile(i)+' is DNA or Protein? '+fasta.type.to_s
1307
+ else
1308
+ e 'We could not locate a local fasta entry for '+sfile(i)
1309
+ end
1310
+ else # else input was nil.
1311
+ Dir[INFO_DIR+'*'].each {|entry|
1312
+ show_type_of_all_info_files(entry)
1313
+ }
1314
+ end
1315
+ end
1316
+
1317
+ # ========================================================================= #
1318
+ # === show_all_prokarya (prokarya tag)
1319
+ #
1320
+ # This method will show all entries in the database that are from
1321
+ # Prokaryotes.
1322
+ # ========================================================================= #
1323
+ def show_all_prokarya
1324
+ _ = select_name_and_tax_id_and_lineage_ids
1325
+ n_entries = _.split(N).size
1326
+ result = []
1327
+ counter = 0
1328
+ _.split(N).each_with_index {|entry|
1329
+ splitted = entry.split('|')
1330
+ lineage = splitted[2]
1331
+ if lineage =~ / #{Bacteria_Taxonomy_ID} /
1332
+ scientific_name = splitted.first.strip
1333
+ counter += 1
1334
+ e simp(counter.to_s)+') '+scientific_name+' ('+
1335
+ sfancy('TaxID')+': '+splitted[1].to_s.strip+')'
1336
+ result << scientific_name
1337
+ end
1338
+ }
1339
+ e N+N+'Out of '+sfancy(n_entries.to_s)+' registered entries in '+
1340
+ 'total in the localomes database,'
1341
+ e sfancy(result.size.to_s)+' belong to Prokarya (the Taxonomy ID of '+
1342
+ 'Prokarya is '+sfancy(Bacteria_Taxonomy_ID.to_s)+').'
1343
+ end
1344
+
1345
+ # ========================================================================= #
1346
+ # === Interactive.run
1347
+ # ========================================================================= #
1348
+ def self.run(i = nil)
1349
+ Taxonomy::Interactive.new(i)
1350
+ end
1351
+
1352
+ # ========================================================================= #
1353
+ # === Taxonomy.run_interactive
1354
+ # ========================================================================= #
1355
+ def self.run_interactive(i = nil)
1356
+ Interactive.run(i) # An alias to the above.
1357
+ end
1358
+
1359
+ # ========================================================================= #
1360
+ # === get_user_input
1361
+ #
1362
+ # Get user input via this method. We prefer to use Readline if it is
1363
+ # available, otherwise we will simply use a $stdin.gets() call.
1364
+ # ========================================================================= #
1365
+ def get_user_input
1366
+ if Object.const_defined? :Readline
1367
+ _ = Readline.readline('', true)
1368
+ Readline::HISTORY.pop if _ =~ /^\s*$/
1369
+ @user_input = _
1370
+ else
1371
+ @user_input = $stdin.gets.chomp
1372
+ end
1373
+ sanitize_user_input
1374
+ process_user_input
1375
+ return @user_input
1376
+ end; alias obtain_user_input get_user_input
1377
+
1378
+ # ========================================================================= #
1379
+ # === sanitize_user_input
1380
+ # ========================================================================= #
1381
+ def sanitize_user_input
1382
+ splitted = @user_input.split(' ')
1383
+ @cmd = splitted.first
1384
+ if @user_input.empty?
1385
+ @first_argument = nil
1386
+ else
1387
+ @first_argument = splitted[1..-1].join(' ') # This is equal to all remaining arguments for now, actually.
1388
+ end
1389
+ end
1390
+
1391
+ # ========================================================================= #
1392
+ # === process_user_input
1393
+ # ========================================================================= #
1394
+ def process_user_input(i = @user_input)
1395
+ if i.is_a? Array
1396
+ i.each {|entry| process_user_input(entry) }
1397
+ else # We assume it is a String here.
1398
+ if i.include? ';'
1399
+ process_user_input(i.split(';'))
1400
+ else
1401
+ check_against_menu(@cmd, @first_argument)
1402
+ end
1403
+ end
1404
+ end
1405
+
1406
+ # ========================================================================= #
1407
+ # === try_to_show_instructions
1408
+ #
1409
+ # This method will try to show the instructions to the user, based
1410
+ # on what the method return_instructions() will give us.
1411
+ # ========================================================================= #
1412
+ def try_to_show_instructions
1413
+ _ = return_instructions
1414
+ if File.exist? _
1415
+ e 'Now reading in from file `'+sfile(_)+'`.'
1416
+ cat _
1417
+ else
1418
+ e 'We could not find a file at position '+_
1419
+ end
1420
+ end
1421
+
1422
+ # ========================================================================= #
1423
+ # We return the bin/taxonomy file here.
1424
+ # ========================================================================= #
1425
+ def return_taxonomy_file
1426
+ return PROJECT_BASE_DIR+'bin/taxonomy'
1427
+ end
1428
+
1429
+ # ========================================================================= #
1430
+ # === show_changelog
1431
+ # ========================================================================= #
1432
+ def show_changelog
1433
+ cat PROJECT_DOC_DIR+'CHANGELOG_FROM_0.0.14_TO_0.0.15'
1434
+ end
1435
+
1436
+ # ========================================================================= #
1437
+ # === return_dependencies
1438
+ #
1439
+ # We return the dependencies file here.
1440
+ # ========================================================================= #
1441
+ def return_dependencies
1442
+ PROJECT_DOC_DIR+'DEPENDENCIES'
1443
+ end
1444
+
1445
+ # ========================================================================= #
1446
+ # === return_instructions
1447
+ # ========================================================================= #
1448
+ def return_instructions
1449
+ PROJECT_DOC_DIR+'INSTRUCTIONS'
1450
+ end
1451
+
1452
+ # ========================================================================= #
1453
+ # === show_last_command
1454
+ # ========================================================================= #
1455
+ def show_last_command
1456
+ last_command = File.read(LAST_INTERACTIVE_COMMAND)
1457
+ e 'The last command was: '+simp(last_command.to_s)
1458
+ e 'This was read in from the file '+sfile(LAST_INTERACTIVE_COMMAND)
1459
+ end
1460
+
1461
+ # ========================================================================= #
1462
+ # === create_sql_file_from_local_fasta_entry
1463
+ #
1464
+ # This method will create a .sql file, based on the information
1465
+ # obtained from the local fasta entries.
1466
+ #
1467
+ # The first argument to it should be an .INFO file, not a fasta file!
1468
+ # ========================================================================= #
1469
+ def create_sql_file_from_local_fasta_entry(i)
1470
+ ensure_that_temp_dir_exists
1471
+ info = Info.new(i, :be_verbose => false)
1472
+ info.try_to_find_likely_fasta_file # Need to run it once so we can try to find the fasta file.
1473
+ if info.taxid
1474
+ obtain_full_lineage_for(info.taxid) # This will set @lineage_id
1475
+ else
1476
+ if be_verbose?
1477
+ e 'We did not find a Tax ID for `'+sfile(i)+'`.'
1478
+ e 'This is considered a fatal error for now - we will thus exit.'
1479
+ end
1480
+ exit
1481
+ end
1482
+ mkdir(AUTOGENERATED_SQL_FILES_DIR) unless Dir.exist?(AUTOGENERATED_SQL_FILES_DIR)
1483
+ sql_filename = AUTOGENERATED_SQL_FILES_DIR+
1484
+ File.basename(i).gsub(/#{File.extname(i)}/,'')+'.sql'
1485
+ remove(sql_filename) if File.exist? sql_filename # Get rido f the old one first.
1486
+ # ======================================================================= #
1487
+ # Next, we will build up our SQL string:
1488
+ # ======================================================================= #
1489
+ string = ''.dup # This is the string that will be stored.
1490
+ # ======================================================================= #
1491
+ # (1) - the scientific name comes first
1492
+ # ======================================================================= #
1493
+ scientific_name = get_scientific_name(info.taxid).strip
1494
+ if scientific_name.empty? # Exit if we can not find it.
1495
+ if be_verbose?
1496
+ ewarn 'The scientific name for '+simp(info.taxid.to_s)+' was empty.'
1497
+ ewarn 'This we consider to be a runtime error for now, thus '+
1498
+ 'we will exit.'
1499
+ end
1500
+ exit
1501
+ end
1502
+ string << pad_with_single_quotes(
1503
+ scientific_name
1504
+ )+', '
1505
+ fasta = ParseFasta.new(info.corresponding_datafile)
1506
+ # ======================================================================= #
1507
+ # (2) Next we need the Taxonomic ID, which is an integer.
1508
+ # ======================================================================= #
1509
+ string << info.taxid.to_s+', '
1510
+ # ======================================================================= #
1511
+ # (3) We now need to find out whether we have DNA or Protein.
1512
+ # ======================================================================= #
1513
+ type = fasta.type? # DNA or Protein.
1514
+ if type == 'DNA'
1515
+ type = 0
1516
+ elsif type == 'Protein'
1517
+ type = 1
1518
+ end
1519
+ string << type.to_s+', '
1520
+ # ======================================================================= #
1521
+ # (4) Now comes the lineage_ids, of varchar(100)
1522
+ # ======================================================================= #
1523
+ string << pad_with_single_quotes(@lineage_ids)+', '
1524
+ # ======================================================================= #
1525
+ # (5) And the lineage_scientific_name
1526
+ # ======================================================================= #
1527
+ string << pad_with_single_quotes(@lineage_scientific_name)+', '
1528
+ # ======================================================================= #
1529
+ # Here be the accession_number varchar(200000)
1530
+ # Disabled as of Friday, needs more testing first. But is not even needed.
1531
+ # string << pad_with_single_quotes(
1532
+ # fasta.all_accession_entries.join("\t|")
1533
+ # )+', '
1534
+ # string << pad_with_single_quotes(0)+', '
1535
+ # Dataset should come here
1536
+ # string << pad_with_single_quotes(0)+', '
1537
+ # (6) And the comment field will be here.
1538
+ # ======================================================================= #
1539
+ string << pad_with_single_quotes(0)+', '
1540
+ # ======================================================================= #
1541
+ # (7) filesize: the size of the file in question
1542
+ # ======================================================================= #
1543
+ string << fasta.filesize.to_s+', '
1544
+ # ======================================================================= #
1545
+ # (8) modification_time
1546
+ # ======================================================================= #
1547
+ string << pad_with_single_quotes(fasta.modification_time.to_s)+', '
1548
+ # ======================================================================= #
1549
+ # (9) n_accession_numbers
1550
+ # ======================================================================= #
1551
+ string << fasta.n_entries.to_s+', '
1552
+ # ======================================================================= #
1553
+ # (10) path - the filepath in question
1554
+ # ======================================================================= #
1555
+ string << pad_with_single_quotes(info.corresponding_datafile)
1556
+ # ======================================================================= #
1557
+ # Now we can insert into the postgresql Database.
1558
+ # ======================================================================= #
1559
+ string = InsertInto[
1560
+ 'fasta',
1561
+ 'name, taxid, type, lineage_ids, lineage_scientific_name, comment_field, filesize, modification_time, n_accession_numbers, path', # accession_number, dataset,
1562
+ string
1563
+ ]
1564
+ e 'Now storing into '+sfile(sql_filename) if be_verbose?
1565
+ write_what_into(string, sql_filename)
1566
+ if be_verbose?
1567
+ e 'Next, we will attempt to read in this file into the '
1568
+ 'Postgresql localome (fasta) table.'
1569
+ end
1570
+ cmd = POSTGRE_LOGIN_COMMAND+' -f '+sql_filename
1571
+ esystem(cmd)
1572
+ if be_verbose?
1573
+ e 'Done! All should have been well by now. '+
1574
+ 'The Taxonomic ID was '+info.taxid
1575
+ end
1576
+ end
1577
+
1578
+ # ========================================================================= #
1579
+ # === insert_into
1580
+ # ========================================================================= #
1581
+ def insert_into(i = :nodes)
1582
+ i = i.to_s
1583
+ case i
1584
+ when 'fasta'
1585
+ create_fasta_table
1586
+ when 'nodes'
1587
+ populate_nodes_table(:be_verbose)
1588
+ when 'names'
1589
+ populate_names_table(:be_verbose)
1590
+ end
1591
+ end
1592
+
1593
+ # ========================================================================= #
1594
+ # === which_database_to_use?
1595
+ # ========================================================================= #
1596
+ def which_database_to_use?
1597
+ File.read(PROJECT_YAML_DIR+'use_this_database.yml')
1598
+ end
1599
+
1600
+ # ========================================================================= #
1601
+ # === show_configuration
1602
+ # ========================================================================= #
1603
+ def show_configuration
1604
+ e which_database_to_use?
1605
+ end
1606
+
1607
+ # ========================================================================= #
1608
+ # === names_sql
1609
+ # ========================================================================= #
1610
+ def names_sql
1611
+ store_here = AUTOGENERATED_SQL_FILES_DIR+'names.sql'
1612
+ File.delete(store_here) if File.exist? store_here
1613
+ e 'We will populate the names-table next. This will take a while.'
1614
+ populate_names_table :be_verbose, store_here
1615
+ end
1616
+
1617
+ # ========================================================================= #
1618
+ # === nodes_sql
1619
+ #
1620
+ # Use this method to generate the nodes.sql file.
1621
+ # ========================================================================= #
1622
+ def nodes_sql
1623
+ store_here = AUTOGENERATED_SQL_FILES_DIR+'nodes.sql'
1624
+ File.delete(store_here) if File.exist? store_here
1625
+ e 'We will populate the nodes-table next. This will take a while.'
1626
+ populate_nodes_table :be_verbose, store_here
1627
+ end
1628
+
1629
+ # ========================================================================= #
1630
+ # === set_database
1631
+ # ========================================================================= #
1632
+ def set_database(i)
1633
+ i = i.to_s.chomp.delete('"')
1634
+ if i.empty?
1635
+ e 'Please provide the type of the database you wish to '\
1636
+ 'use, such as "postgresql".'
1637
+ else
1638
+ case i
1639
+ when 'postgre'
1640
+ i = 'postgresql'
1641
+ end
1642
+ i = i.downcase
1643
+ e 'Storing '+i+' into '+sfile(FILE_USE_THIS_DATABASE)+'.'
1644
+ write_what_into(i, FILE_USE_THIS_DATABASE)
1645
+ end
1646
+ end
1647
+
1648
+ # ========================================================================= #
1649
+ # === create_fasta_table
1650
+ #
1651
+ # This method generates the fasta table into either postgresql or
1652
+ # whatever else you stored things into.
1653
+ # ========================================================================= #
1654
+ def create_fasta_table
1655
+ remove(FASTA_SQL)
1656
+ # e 'Now storing into fasta.sql (Fasta-Format).'
1657
+ create_and_save_table('fasta', true) # true for verbose
1658
+ cat FASTA_SQL # Also display it, after creation.
1659
+ # Next, drop the old Fasta table. But we must set the password before.
1660
+ e 'Trying to drop the fasta table next.'
1661
+ drop_table :fasta
1662
+ # And now, read in the new one.
1663
+ read_sql :fasta
1664
+ end
1665
+
1666
+ # ========================================================================= #
1667
+ # === set_commandline_arguments
1668
+ # ========================================================================= #
1669
+ def set_commandline_arguments(i = nil)
1670
+ i = [i] unless i.is_a? Array
1671
+ @commandline_arguments = i # Must be an Array.
1672
+ end
1673
+
1674
+ # ========================================================================= #
1675
+ # === do_startup_actions
1676
+ # ========================================================================= #
1677
+ def do_startup_actions
1678
+ if be_verbose?
1679
+ e Colours.rev+'Welcome to interactive Taxonomy.'+N+N
1680
+ e 'We will show the help menu now (on startup).'+N+N
1681
+ show_help
1682
+ e 'Please input your command(s). ("help" for help, '+
1683
+ '"quit", "exit" or "q" to exit.)'
1684
+ nohelp if at_home? # At home I won't use this here.
1685
+ end
1686
+ check_commandline_arguments
1687
+ end
1688
+
1689
+ # ========================================================================= #
1690
+ # === check_commandline_arguments
1691
+ #
1692
+ # We check the commandline.
1693
+ # ========================================================================= #
1694
+ def check_commandline_arguments
1695
+ @commandline_arguments.each {|entry|
1696
+ case entry
1697
+ when :run_connected
1698
+ do_run_connected # This mode is for when we embed it in the BioroebeShell.
1699
+ end
1700
+ }
1701
+ end
1702
+
1703
+ # ========================================================================= #
1704
+ # === fetch_user_input_via_loop
1705
+ # ========================================================================= #
1706
+ def fetch_user_input_via_loop
1707
+ loop {
1708
+ begin
1709
+ obtain_user_input # This sets the @user_input variable.
1710
+ rescue Interrupt
1711
+ if be_verbose?
1712
+ e 'Sigint occurred by user interrupt, exiting gracefully now.'
1713
+ end
1714
+ exit_program
1715
+ end
1716
+ if run_connected?
1717
+ break if VALID_WAYS_TO_EXIT.include? @user_input
1718
+ end
1719
+ }
1720
+ end
1721
+
1722
+ # ========================================================================= #
1723
+ # === run_standalone?
1724
+ # ========================================================================= #
1725
+ def run_standalone?
1726
+ @run_standalone
1727
+ end
1728
+
1729
+ # ========================================================================= #
1730
+ # === run_connected?
1731
+ # ========================================================================= #
1732
+ def run_connected?
1733
+ ! run_standalone?
1734
+ end
1735
+
1736
+ # ========================================================================= #
1737
+ # === exit_program
1738
+ #
1739
+ # Use this method when exiting.
1740
+ # ========================================================================= #
1741
+ def exit_program
1742
+ exit if run_standalone?
1743
+ end
1744
+
1745
+ # ========================================================================= #
1746
+ # === show_sql_commands (sql tag)
1747
+ #
1748
+ # This method will feedback the SQL commands to create our postgre
1749
+ # table and also show the Insert commands used.
1750
+ # ========================================================================= #
1751
+ def show_sql_commands
1752
+ e N+'The three commands to create the '+
1753
+ simp('PostgreSQL database')+' are:'+N+N
1754
+ e ' (1) names table:'+N+N
1755
+ efancy ' '+create_table(:names).to_s
1756
+ e N+' (2) nodes table:'+N+N
1757
+ efancy ' '+create_table(:nodes).to_s
1758
+ e N+' (3) fasta table:'+N+N
1759
+ efancy ' '+create_table(:fasta).to_s
1760
+ end; alias sql? show_sql_commands # === sql?
1761
+ alias table_names? show_sql_commands # === table_names?
1762
+
1763
+ # ========================================================================= #
1764
+ # === create_table
1765
+ #
1766
+ # A wrapper over class CreateTable.
1767
+ # ========================================================================= #
1768
+ def create_table(i)
1769
+ i = i.to_s
1770
+ case i
1771
+ when 'fasta'
1772
+ create_fasta_table
1773
+ else
1774
+ i << '.sql' # Append .sql in this case.
1775
+ result = ::SqlParadise::Commands.create_table(i)
1776
+ store_here = AUTOGENERATED_SQL_FILES_DIR+'create_table_'+i
1777
+ e 'Next creating a table for '+simp(i)+'. Will '+
1778
+ 'store into '+sfile(store_here)
1779
+ save_what_into(result, store_here)
1780
+ e result
1781
+ end
1782
+ end
1783
+
1784
+ # ========================================================================= #
1785
+ # === enable
1786
+ # ========================================================================= #
1787
+ def enable(i)
1788
+ i = i.to_s
1789
+ if i.start_with? 'enable'
1790
+ i.gsub!(/^enable/,'')
1791
+ end
1792
+ i.strip!
1793
+ case i
1794
+ when 'colours'
1795
+ enable_colours
1796
+ end
1797
+ end
1798
+
1799
+ # ========================================================================= #
1800
+ # === show_sql_commands_only
1801
+ # ========================================================================= #
1802
+ def show_sql_commands_only
1803
+ e Commands.create_table :taxonomy_nodes
1804
+ e
1805
+ e Commands.create_table :taxonomy_names
1806
+ e
1807
+ e Commands.create_table :taxonomy_fasta
1808
+ e
1809
+ end
1810
+
1811
+ # ========================================================================= #
1812
+ # === enable_colours
1813
+ # ========================================================================= #
1814
+ def enable_colours
1815
+ @use_colours = true
1816
+ Taxonomy.enable_colours
1817
+ end
1818
+
1819
+ # ========================================================================= #
1820
+ # === create_database
1821
+ # ========================================================================= #
1822
+ def create_database
1823
+ case which_database_to_use?
1824
+ when 'sqlite'
1825
+ one = 'CREATE TABLE names ( taxid int, name_txt varchar(155), unique_name varchar(100), name_class varchar(25) );'
1826
+ two = 'CREATE TABLE nodes ( taxid int, parent_taxid int, rank varchar(25) );'
1827
+ three = 'CREATE TABLE fasta ( name varchar(80), taxid integer, type smallint, lineage_ids varchar(500), lineage_scientific_name varchar(2500), comment_field varchar(8000), filesize integer, modification_time varchar(25), n_accession_numbers integer, path varchar(120) );'
1828
+ e one
1829
+ e two
1830
+ e three
1831
+ _ = 'sqlite3 '+working_dir?+'names.db "'+one+'"'
1832
+ esystem _
1833
+ _ = 'sqlite3 '+working_dir?+'nodes.db "'+two+'"'
1834
+ esystem _
1835
+ _ = 'sqlite3 '+working_dir?+'fasta.db "'+three+'"'
1836
+ esystem _
1837
+ File.read(Taxonomy::Constants::NAMES_SQL).each_line {|line|
1838
+ line = line.chomp
1839
+ if line.include? '"'
1840
+ line.gsub!(/"/,'\"')
1841
+ end
1842
+ _ = 'sqlite3 '+working_dir?+'names.db "'+line+'"'
1843
+ Esystem.esystem _
1844
+ }
1845
+ else # default to postgresql for now.
1846
+ end
1847
+ end
1848
+
1849
+ # ========================================================================= #
1850
+ # === download
1851
+ #
1852
+ # This method can be used to download the ncbi database.
1853
+ # ========================================================================= #
1854
+ def download(i = f?)
1855
+ i = i.to_s
1856
+ case i # case tag
1857
+ # ======================================================================= #
1858
+ # === --help
1859
+ # ======================================================================= #
1860
+ when 'HELP','?','--help'
1861
+ e 'Currently we can download only the NCBI database.'
1862
+ e
1863
+ e 'To do so, do:'
1864
+ e ' download ncbi'
1865
+ # ======================================================================= #
1866
+ # === --ncbi
1867
+ # ======================================================================= #
1868
+ when 'ncbi','--ncbi','database','1','' # '' is also a default here.
1869
+ download_ncbi_database
1870
+ if be_verbose?
1871
+ e
1872
+ e 'If all went well then you could now try to update the '+
1873
+ 'database, by issuing:'
1874
+ e
1875
+ efancy ' update databases'
1876
+ e
1877
+ end
1878
+ else
1879
+ e 'In the method download(), we do not know the given '+
1880
+ 'input `'+simp(i.to_s)+'`.'
1881
+ end
1882
+ end
1883
+
1884
+ # ========================================================================= #
1885
+ # === download_ncbi_database
1886
+ # ========================================================================= #
1887
+ def download_ncbi_database
1888
+ Taxonomy.download_ncbi_database # bl $TAXONOMY/class_methods.rb
1889
+ end
1890
+
1891
+ # ========================================================================= #
1892
+ # === add_comment (comment tag)
1893
+ #
1894
+ # Use this method to add a comment to the localome table.
1895
+ # ========================================================================= #
1896
+ def add_comment(taxid)
1897
+ if has? taxid
1898
+ e 'Please input the comment you wish to add to the '+
1899
+ 'Taxonomy ID '+simp(taxid.to_s)+' now:'
1900
+ add_this = $stdin.gets.chomp
1901
+ _ = "UPDATE fasta SET comment_field='"+add_this+"' WHERE taxid='"+taxid.to_s+"'"
1902
+ run_sql(_)
1903
+ else
1904
+ erev "We could not find any entry with the Taxonomy "\
1905
+ "ID #{taxid.to_s}."
1906
+ end
1907
+ end
1908
+
1909
+ # ========================================================================= #
1910
+ # === run (run tag)
1911
+ # ========================================================================= #
1912
+ def run
1913
+ do_startup_actions
1914
+ fetch_user_input_via_loop
1915
+ end; alias run_in_interactive_mode run # === run_in_interactive_mode
1916
+
1917
+ end
1918
+
1919
+ # ========================================================================= #
1920
+ # === Taxonomy.interactive
1921
+ #
1922
+ # Invoke this method if you wish to directly invoke the interactive
1923
+ # component of the Taxonomy module.
1924
+ # ========================================================================= #
1925
+ def self.interactive(i = nil)
1926
+ ::Bioroebe::Taxonomy::Interactive.new(i)
1927
+ end
1928
+
1929
+ end; end
1930
+
1931
+ if __FILE__ == $PROGRAM_NAME
1932
+ Bioroebe::Taxonomy::Interactive.run_interactive(ARGV)
1933
+ end # itax; Taxonomy.interactive