bioroebe 0.10.80

Sign up to get free protection for your applications and to get access to all the features.
Files changed (802) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.md +428 -0
  3. data/README.md +9280 -0
  4. data/bin/advanced_dotplot +7 -0
  5. data/bin/align_open_reading_frames +12 -0
  6. data/bin/all_positions_of_this_nucleotide +7 -0
  7. data/bin/aminoacid_composition +7 -0
  8. data/bin/aminoacid_frequencies +12 -0
  9. data/bin/aminoacid_substitution +7 -0
  10. data/bin/automatically_rename_this_fasta_file +7 -0
  11. data/bin/base_composition +7 -0
  12. data/bin/batch_create_windows_executables +7 -0
  13. data/bin/biomart_console +11 -0
  14. data/bin/bioroebe +27 -0
  15. data/bin/bioroebe_controller +10 -0
  16. data/bin/bioshell +26 -0
  17. data/bin/blosum_2D_table +12 -0
  18. data/bin/calculate_n50_value +12 -0
  19. data/bin/chunked_display +12 -0
  20. data/bin/codon_frequency +9 -0
  21. data/bin/codon_to_aminoacid +30 -0
  22. data/bin/colourize_this_fasta_sequence +14 -0
  23. data/bin/compact_fasta_file +7 -0
  24. data/bin/complement +7 -0
  25. data/bin/complementary_dna_strand +12 -0
  26. data/bin/complementary_rna_strand +12 -0
  27. data/bin/compseq +7 -0
  28. data/bin/consensus_sequence +17 -0
  29. data/bin/count_AT +12 -0
  30. data/bin/count_GC +12 -0
  31. data/bin/create_random_aminoacids +7 -0
  32. data/bin/decode_this_aminoacid_sequence +20 -0
  33. data/bin/deduce_aminoacid_sequence +13 -0
  34. data/bin/deduce_most_likely_aminoacid_sequence +7 -0
  35. data/bin/display_aminoacid_table +12 -0
  36. data/bin/display_open_reading_frames +7 -0
  37. data/bin/dna_to_aminoacid_sequence +7 -0
  38. data/bin/dna_to_rna +7 -0
  39. data/bin/downcase_chunked_display +12 -0
  40. data/bin/download_this_pdb +7 -0
  41. data/bin/fasta_index +7 -0
  42. data/bin/fetch_data_from_uniprot +12 -0
  43. data/bin/filter_away_invalid_nucleotides +12 -0
  44. data/bin/find_substring +19 -0
  45. data/bin/genbank_to_fasta +7 -0
  46. data/bin/hamming_distance +12 -0
  47. data/bin/input_as_dna +12 -0
  48. data/bin/is_palindrome +13 -0
  49. data/bin/leading_five_prime +7 -0
  50. data/bin/levensthein +7 -0
  51. data/bin/longest_ORF +14 -0
  52. data/bin/longest_substring +12 -0
  53. data/bin/n_stop_codons_in_this_sequence +15 -0
  54. data/bin/open_reading_frames +14 -0
  55. data/bin/overwrite_fasta_header +7 -0
  56. data/bin/palindrome_2D_structure +7 -0
  57. data/bin/palindrome_generator +7 -0
  58. data/bin/parse_fasta +7 -0
  59. data/bin/partner_nucleotide +9 -0
  60. data/bin/possible_codons_for_this_aminoacid +12 -0
  61. data/bin/random_dna_sequence +12 -0
  62. data/bin/random_sequence +12 -0
  63. data/bin/raw_hamming_distance +12 -0
  64. data/bin/return_longest_substring_via_LCS_algorithm +7 -0
  65. data/bin/reverse_complement +7 -0
  66. data/bin/reverse_sequence +7 -0
  67. data/bin/ruler +12 -0
  68. data/bin/scan_this_input_for_startcodons +12 -0
  69. data/bin/short_aminoacid_letter_from_long_aminoacid_name +7 -0
  70. data/bin/show_atomic_composition +7 -0
  71. data/bin/show_codon_usage +12 -0
  72. data/bin/show_fasta_header +7 -0
  73. data/bin/show_nucleotide_sequence +7 -0
  74. data/bin/show_this_codon_table +7 -0
  75. data/bin/show_this_dna_sequence +7 -0
  76. data/bin/showorf +14 -0
  77. data/bin/simplify_fasta +7 -0
  78. data/bin/sort_aminoacid_based_on_its_hydrophobicity +7 -0
  79. data/bin/split_this_fasta_file_into_chromosomes +7 -0
  80. data/bin/strict_filter_away_invalid_aminoacids +7 -0
  81. data/bin/taxonomy +63 -0
  82. data/bin/three_to_one +7 -0
  83. data/bin/to_rna +7 -0
  84. data/bin/trailing_three_prime +7 -0
  85. data/bin/trypsin_digest +7 -0
  86. data/bin/upcase_this_aminoacid_sequence_and_remove_numbers +7 -0
  87. data/bioroebe.gemspec +97 -0
  88. data/doc/IUPAC_aminoacids_code.md +36 -0
  89. data/doc/IUPAC_nucleotide_code.md +19 -0
  90. data/doc/README.gen +9237 -0
  91. data/doc/blosum.md +5 -0
  92. data/doc/compatibility/BIO_PHP.md +37 -0
  93. data/doc/compatibility/README.md +3 -0
  94. data/doc/compatibility/emboss.md +56 -0
  95. data/doc/extensive_usage_example.md +35 -0
  96. data/doc/german_names_for_the_aminoacids.md +27 -0
  97. data/doc/instructions_for_the_taxonomy_subproject.md +504 -0
  98. data/doc/legacy_paths.md +9 -0
  99. data/doc/pdb_ATOM_entry.md +33 -0
  100. data/doc/quality_control/README.md +2 -0
  101. data/doc/quality_control/commandline_applications.md +13 -0
  102. data/doc/resources.md +23 -0
  103. data/doc/setup.rb +1655 -0
  104. data/doc/statistics/statistics.md +41 -0
  105. data/doc/todo/README.md +5 -0
  106. data/doc/todo/bioroebe_GUI_todo.md +15 -0
  107. data/doc/todo/bioroebe_todo.md +2823 -0
  108. data/doc/using_biomart.md +258 -0
  109. data/html/test.html +144 -0
  110. data/lib/bioroebe/aminoacids/aminoacid_substitution.rb +118 -0
  111. data/lib/bioroebe/aminoacids/aminoacids_mass_table.rb +118 -0
  112. data/lib/bioroebe/aminoacids/codon_percentage.rb +189 -0
  113. data/lib/bioroebe/aminoacids/colourize_hydrophilic_and_hydrophobic_aminoacids.rb +110 -0
  114. data/lib/bioroebe/aminoacids/create_random_aminoacids.rb +221 -0
  115. data/lib/bioroebe/aminoacids/deduce_aminoacid_sequence.rb +460 -0
  116. data/lib/bioroebe/aminoacids/display_aminoacid_table.rb +231 -0
  117. data/lib/bioroebe/aminoacids/show_hydrophobicity.rb +184 -0
  118. data/lib/bioroebe/annotations/README.md +2 -0
  119. data/lib/bioroebe/annotations/create_annotation_format.rb +208 -0
  120. data/lib/bioroebe/autoinclude.rb +7 -0
  121. data/lib/bioroebe/base/base.rb +35 -0
  122. data/lib/bioroebe/base/colours.rb +14 -0
  123. data/lib/bioroebe/base/colours_for_base/colours_for_base.rb +275 -0
  124. data/lib/bioroebe/base/commandline_application/README.md +7 -0
  125. data/lib/bioroebe/base/commandline_application/aminoacids.rb +33 -0
  126. data/lib/bioroebe/base/commandline_application/commandline_application.rb +37 -0
  127. data/lib/bioroebe/base/commandline_application/commandline_arguments.rb +144 -0
  128. data/lib/bioroebe/base/commandline_application/directory.rb +33 -0
  129. data/lib/bioroebe/base/commandline_application/extract.rb +22 -0
  130. data/lib/bioroebe/base/commandline_application/misc.rb +485 -0
  131. data/lib/bioroebe/base/commandline_application/opn.rb +47 -0
  132. data/lib/bioroebe/base/commandline_application/reset.rb +40 -0
  133. data/lib/bioroebe/base/commandline_application/warnings.rb +36 -0
  134. data/lib/bioroebe/base/commandline_application/write_what_into.rb +29 -0
  135. data/lib/bioroebe/base/initialize.rb +18 -0
  136. data/lib/bioroebe/base/misc.rb +94 -0
  137. data/lib/bioroebe/base/namespace.rb +16 -0
  138. data/lib/bioroebe/base/prototype/README.md +12 -0
  139. data/lib/bioroebe/base/prototype/e_and_ee.rb +24 -0
  140. data/lib/bioroebe/base/prototype/misc.rb +108 -0
  141. data/lib/bioroebe/base/prototype/mkdir.rb +20 -0
  142. data/lib/bioroebe/base/prototype/prototype.rb +21 -0
  143. data/lib/bioroebe/base/prototype/reset.rb +26 -0
  144. data/lib/bioroebe/base/reset.rb +11 -0
  145. data/lib/bioroebe/biomart/LICENSE.md +27 -0
  146. data/lib/bioroebe/biomart/attribute.rb +77 -0
  147. data/lib/bioroebe/biomart/biomart.rb +227 -0
  148. data/lib/bioroebe/biomart/database.rb +128 -0
  149. data/lib/bioroebe/biomart/dataset.rb +572 -0
  150. data/lib/bioroebe/biomart/filter.rb +97 -0
  151. data/lib/bioroebe/biomart/server.rb +152 -0
  152. data/lib/bioroebe/blosum/blosum.rb +88 -0
  153. data/lib/bioroebe/calculate/calculate_blosum_score.rb +145 -0
  154. data/lib/bioroebe/calculate/calculate_gc_content.rb +301 -0
  155. data/lib/bioroebe/calculate/calculate_levensthein_distance.rb +100 -0
  156. data/lib/bioroebe/calculate/calculate_melting_temperature.rb +398 -0
  157. data/lib/bioroebe/calculate/calculate_melting_temperature_for_more_than_thirteen_nucleotides.rb +304 -0
  158. data/lib/bioroebe/calculate/calculate_the_position_specific_scoring_matrix.rb +166 -0
  159. data/lib/bioroebe/cell/README.md +1 -0
  160. data/lib/bioroebe/cell/cell.rb +63 -0
  161. data/lib/bioroebe/cleave_and_digest/README.md +2 -0
  162. data/lib/bioroebe/cleave_and_digest/cleave.rb +80 -0
  163. data/lib/bioroebe/cleave_and_digest/digestion.rb +75 -0
  164. data/lib/bioroebe/cleave_and_digest/trypsin.rb +192 -0
  165. data/lib/bioroebe/codon_tables/README.md +9 -0
  166. data/lib/bioroebe/codon_tables/frequencies/287_Pseudomonas_aeruginosa.yml +101 -0
  167. data/lib/bioroebe/codon_tables/frequencies/3702_Arabidopsis_thaliana.yml +77 -0
  168. data/lib/bioroebe/codon_tables/frequencies/4932_Saccharomyces_cerevisiae.yml +103 -0
  169. data/lib/bioroebe/codon_tables/frequencies/7227_Drosophila_melanogaster.yml +71 -0
  170. data/lib/bioroebe/codon_tables/frequencies/83333_Escherichia_coli_K12.yml +103 -0
  171. data/lib/bioroebe/codon_tables/frequencies/9606_Homo_sapiens.yml +123 -0
  172. data/lib/bioroebe/codon_tables/frequencies/9685_Felis_catus.yml +78 -0
  173. data/lib/bioroebe/codon_tables/frequencies/README.md +10 -0
  174. data/lib/bioroebe/codon_tables/frequencies/parse_frequency_table.rb +337 -0
  175. data/lib/bioroebe/codons/README.md +28 -0
  176. data/lib/bioroebe/codons/codon_table.rb +416 -0
  177. data/lib/bioroebe/codons/codon_tables.rb +123 -0
  178. data/lib/bioroebe/codons/codons.rb +517 -0
  179. data/lib/bioroebe/codons/convert_this_codon_to_that_aminoacid.rb +102 -0
  180. data/lib/bioroebe/codons/detect_minimal_codon.rb +180 -0
  181. data/lib/bioroebe/codons/determine_optimal_codons.rb +74 -0
  182. data/lib/bioroebe/codons/possible_codons_for_this_aminoacid.rb +380 -0
  183. data/lib/bioroebe/codons/sanitize_codon_frequency.rb +144 -0
  184. data/lib/bioroebe/codons/show_codon_tables.rb +130 -0
  185. data/lib/bioroebe/codons/show_codon_usage.rb +197 -0
  186. data/lib/bioroebe/codons/show_this_codon_table.rb +573 -0
  187. data/lib/bioroebe/codons/start_codons.rb +105 -0
  188. data/lib/bioroebe/colours/colour_schemes/README.md +10 -0
  189. data/lib/bioroebe/colours/colour_schemes/array_available_colour_schemes.rb +38 -0
  190. data/lib/bioroebe/colours/colour_schemes/buried.rb +70 -0
  191. data/lib/bioroebe/colours/colour_schemes/colour_scheme.rb +101 -0
  192. data/lib/bioroebe/colours/colour_schemes/colour_scheme_demo.rb +262 -0
  193. data/lib/bioroebe/colours/colour_schemes/helix.rb +65 -0
  194. data/lib/bioroebe/colours/colour_schemes/hydropathy.rb +70 -0
  195. data/lib/bioroebe/colours/colour_schemes/nucleotide.rb +47 -0
  196. data/lib/bioroebe/colours/colour_schemes/score.rb +112 -0
  197. data/lib/bioroebe/colours/colour_schemes/simple.rb +42 -0
  198. data/lib/bioroebe/colours/colour_schemes/strand.rb +65 -0
  199. data/lib/bioroebe/colours/colour_schemes/taylor.rb +58 -0
  200. data/lib/bioroebe/colours/colour_schemes/turn.rb +65 -0
  201. data/lib/bioroebe/colours/colour_schemes/zappo.rb +59 -0
  202. data/lib/bioroebe/colours/colourize_sequence.rb +262 -0
  203. data/lib/bioroebe/colours/colours.rb +119 -0
  204. data/lib/bioroebe/colours/misc_colours.rb +80 -0
  205. data/lib/bioroebe/colours/rev.rb +41 -0
  206. data/lib/bioroebe/colours/sdir.rb +21 -0
  207. data/lib/bioroebe/colours/sfancy.rb +21 -0
  208. data/lib/bioroebe/colours/sfile.rb +21 -0
  209. data/lib/bioroebe/colours/simp.rb +21 -0
  210. data/lib/bioroebe/colours/swarn.rb +29 -0
  211. data/lib/bioroebe/colours/use_colours.rb +27 -0
  212. data/lib/bioroebe/configuration/configuration.rb +114 -0
  213. data/lib/bioroebe/configuration/constants.rb +35 -0
  214. data/lib/bioroebe/constants/GUIs.rb +79 -0
  215. data/lib/bioroebe/constants/aminoacids_and_proteins.rb +146 -0
  216. data/lib/bioroebe/constants/base_directory.rb +120 -0
  217. data/lib/bioroebe/constants/carriage_return.rb +14 -0
  218. data/lib/bioroebe/constants/codon_tables.rb +77 -0
  219. data/lib/bioroebe/constants/database_constants.rb +107 -0
  220. data/lib/bioroebe/constants/files_and_directories.rb +579 -0
  221. data/lib/bioroebe/constants/misc.rb +189 -0
  222. data/lib/bioroebe/constants/newline.rb +14 -0
  223. data/lib/bioroebe/constants/nucleotides.rb +114 -0
  224. data/lib/bioroebe/constants/regex.rb +28 -0
  225. data/lib/bioroebe/constants/roebe.rb +38 -0
  226. data/lib/bioroebe/constants/row_terminator.rb +16 -0
  227. data/lib/bioroebe/constants/tabulator.rb +14 -0
  228. data/lib/bioroebe/constants/unicode.rb +12 -0
  229. data/lib/bioroebe/constants/urls.rb +50 -0
  230. data/lib/bioroebe/conversions/README.md +3 -0
  231. data/lib/bioroebe/conversions/convert_aminoacid_to_dna.rb +298 -0
  232. data/lib/bioroebe/conversions/dna_to_aminoacid_sequence.rb +569 -0
  233. data/lib/bioroebe/count/README.md +1 -0
  234. data/lib/bioroebe/count/count_amount_of_aminoacids.rb +352 -0
  235. data/lib/bioroebe/count/count_amount_of_nucleotides.rb +491 -0
  236. data/lib/bioroebe/count/count_at.rb +39 -0
  237. data/lib/bioroebe/count/count_gc.rb +43 -0
  238. data/lib/bioroebe/css/README.md +5 -0
  239. data/lib/bioroebe/css/project.css +121 -0
  240. data/lib/bioroebe/data/README.md +10 -0
  241. data/lib/bioroebe/data/bam/README.md +1 -0
  242. data/lib/bioroebe/data/data.txt +192 -0
  243. data/lib/bioroebe/data/fasta/GFP_mutant_3_coding_sequence.fasta +12 -0
  244. data/lib/bioroebe/data/fasta/alu_elements.fasta +42 -0
  245. data/lib/bioroebe/data/fasta/lady_slippers_orchid.fasta +1197 -0
  246. data/lib/bioroebe/data/fasta/loxP.fasta +2 -0
  247. data/lib/bioroebe/data/fasta/ls_orchid.fasta +1197 -0
  248. data/lib/bioroebe/data/fasta/pax6_in_mouse.fasta +1 -0
  249. data/lib/bioroebe/data/fasta/test.fasta +7 -0
  250. data/lib/bioroebe/data/fasta/test_DNA.fasta +1 -0
  251. data/lib/bioroebe/data/fastq/fastq_example_file.fastq +32 -0
  252. data/lib/bioroebe/data/fastq/fastq_example_file_SP1.fastq +1000 -0
  253. data/lib/bioroebe/data/fastq/one_random_fastq_entry.fastq +4 -0
  254. data/lib/bioroebe/data/genbank/sample_file.genbank +15 -0
  255. data/lib/bioroebe/data/genbank/standard.fasta +3 -0
  256. data/lib/bioroebe/data/gff/Escherichia_coli_K12_plasmid_F_DNA_NC_002483.1.gff3 +345 -0
  257. data/lib/bioroebe/data/gff/sample.gff +2 -0
  258. data/lib/bioroebe/data/gff/test_gene.gff +4 -0
  259. data/lib/bioroebe/data/gff/transcripts.gff +16 -0
  260. data/lib/bioroebe/data/gtf/README.md +1 -0
  261. data/lib/bioroebe/data/json/example_config.json +48 -0
  262. data/lib/bioroebe/data/pdb/1VII.pdb +754 -0
  263. data/lib/bioroebe/data/pdb/ala_phe_ala.pdb +228 -0
  264. data/lib/bioroebe/data/pdb/rcsb_pdb_1VII.fasta +2 -0
  265. data/lib/bioroebe/data/phylip/README.md +11 -0
  266. data/lib/bioroebe/data/phylip/example.phylip +7 -0
  267. data/lib/bioroebe/data/svg/example.svg +301 -0
  268. data/lib/bioroebe/databases/README.md +1 -0
  269. data/lib/bioroebe/databases/download_taxonomy_database.rb +102 -0
  270. data/lib/bioroebe/dotplots/README.md +5 -0
  271. data/lib/bioroebe/dotplots/advanced_dotplot.rb +256 -0
  272. data/lib/bioroebe/dotplots/dotplot.rb +184 -0
  273. data/lib/bioroebe/electron_microscopy/coordinate_analyzer.rb +191 -0
  274. data/lib/bioroebe/electron_microscopy/fix_pos_file.rb +102 -0
  275. data/lib/bioroebe/electron_microscopy/generate_em2em_file.rb +122 -0
  276. data/lib/bioroebe/electron_microscopy/parse_coordinates.rb +197 -0
  277. data/lib/bioroebe/electron_microscopy/read_file_xmd.rb +282 -0
  278. data/lib/bioroebe/electron_microscopy/simple_star_file_generator.rb +131 -0
  279. data/lib/bioroebe/encoding/README.md +2 -0
  280. data/lib/bioroebe/encoding/encoding.rb +45 -0
  281. data/lib/bioroebe/enzymes/README.md +2 -0
  282. data/lib/bioroebe/enzymes/has_this_restriction_enzyme.rb +46 -0
  283. data/lib/bioroebe/enzymes/restriction_enzyme.rb +200 -0
  284. data/lib/bioroebe/enzymes/restriction_enzymes_file.rb +72 -0
  285. data/lib/bioroebe/enzymes/return_restriction_enzyme_sequence_and_cut_position.rb +80 -0
  286. data/lib/bioroebe/enzymes/return_sequence_that_is_cut_via_restriction_enzyme.rb +65 -0
  287. data/lib/bioroebe/enzymes/show_restriction_enzymes.rb +119 -0
  288. data/lib/bioroebe/exceptions/README.md +2 -0
  289. data/lib/bioroebe/exceptions/exceptions.rb +17 -0
  290. data/lib/bioroebe/ext/LICENCE.md +5 -0
  291. data/lib/bioroebe/ext/README.md +7 -0
  292. data/lib/bioroebe/ext/main.cpp +45 -0
  293. data/lib/bioroebe/ext/nucleotide.cpp +24 -0
  294. data/lib/bioroebe/ext/nussinov_algorithm.cpp +348 -0
  295. data/lib/bioroebe/ext/sequence +0 -0
  296. data/lib/bioroebe/ext/sequence.cpp +162 -0
  297. data/lib/bioroebe/fasta_and_fastq/README.md +6 -0
  298. data/lib/bioroebe/fasta_and_fastq/autocorrect_the_name_of_this_fasta_file.rb +88 -0
  299. data/lib/bioroebe/fasta_and_fastq/compact_fasta_file/compact_fasta_file.rb +151 -0
  300. data/lib/bioroebe/fasta_and_fastq/display_how_many_fasta_entries_are_in_this_directory.rb +111 -0
  301. data/lib/bioroebe/fasta_and_fastq/download_fasta.rb +248 -0
  302. data/lib/bioroebe/fasta_and_fastq/fasta_defline/README.md +2 -0
  303. data/lib/bioroebe/fasta_and_fastq/fasta_defline/fasta_defline.rb +113 -0
  304. data/lib/bioroebe/fasta_and_fastq/fasta_parser.rb +167 -0
  305. data/lib/bioroebe/fasta_and_fastq/fastq_format_explainer.rb +131 -0
  306. data/lib/bioroebe/fasta_and_fastq/length_modifier/length_modifier.rb +87 -0
  307. data/lib/bioroebe/fasta_and_fastq/parse_fasta/constants.rb +50 -0
  308. data/lib/bioroebe/fasta_and_fastq/parse_fasta/initialize.rb +86 -0
  309. data/lib/bioroebe/fasta_and_fastq/parse_fasta/menu.rb +117 -0
  310. data/lib/bioroebe/fasta_and_fastq/parse_fasta/misc.rb +981 -0
  311. data/lib/bioroebe/fasta_and_fastq/parse_fasta/parse_fasta.rb +27 -0
  312. data/lib/bioroebe/fasta_and_fastq/parse_fasta/report.rb +156 -0
  313. data/lib/bioroebe/fasta_and_fastq/parse_fasta/reset.rb +128 -0
  314. data/lib/bioroebe/fasta_and_fastq/parse_fasta/run.rb +20 -0
  315. data/lib/bioroebe/fasta_and_fastq/parse_fastq/parse_fastq.rb +83 -0
  316. data/lib/bioroebe/fasta_and_fastq/return_fasta_subsection_of_this_file.rb +112 -0
  317. data/lib/bioroebe/fasta_and_fastq/show_fasta_headers.rb +135 -0
  318. data/lib/bioroebe/fasta_and_fastq/show_fasta_statistics.rb +188 -0
  319. data/lib/bioroebe/fasta_and_fastq/simplify_fasta_header/simplify_fasta_header.rb +111 -0
  320. data/lib/bioroebe/fasta_and_fastq/split_this_fasta_file_into_chromosomes/constants.rb +26 -0
  321. data/lib/bioroebe/fasta_and_fastq/split_this_fasta_file_into_chromosomes/menu.rb +41 -0
  322. data/lib/bioroebe/fasta_and_fastq/split_this_fasta_file_into_chromosomes/misc.rb +23 -0
  323. data/lib/bioroebe/fasta_and_fastq/split_this_fasta_file_into_chromosomes/reset.rb +68 -0
  324. data/lib/bioroebe/fasta_and_fastq/split_this_fasta_file_into_chromosomes/split_this_fasta_file_into_chromosomes.rb +290 -0
  325. data/lib/bioroebe/genbank/README.md +1 -0
  326. data/lib/bioroebe/genbank/genbank_flat_file_format_generator.rb +275 -0
  327. data/lib/bioroebe/genbank/genbank_parser.rb +291 -0
  328. data/lib/bioroebe/gene/gene.rb +64 -0
  329. data/lib/bioroebe/genomes/genome_pattern.rb +165 -0
  330. data/lib/bioroebe/genomes/genome_retriever.rb +79 -0
  331. data/lib/bioroebe/gui/experimental/README.md +1 -0
  332. data/lib/bioroebe/gui/experimental/snapgene/snapgene.rb +147 -0
  333. data/lib/bioroebe/gui/gtk3/README.md +2 -0
  334. data/lib/bioroebe/gui/gtk3/alignment/alignment.rb +337 -0
  335. data/lib/bioroebe/gui/gtk3/aminoacid_composition/aminoacid_composition.rb +510 -0
  336. data/lib/bioroebe/gui/gtk3/aminoacid_composition/customized_dialog.rb +55 -0
  337. data/lib/bioroebe/gui/gtk3/anti_sense_strand/anti_sense_strand.config +6 -0
  338. data/lib/bioroebe/gui/gtk3/anti_sense_strand/anti_sense_strand.rb +29 -0
  339. data/lib/bioroebe/gui/gtk3/blosum_matrix_viewer/blosum_matrix_viewer.rb +196 -0
  340. data/lib/bioroebe/gui/gtk3/calculate_cell_numbers_of_bacteria/calculate_cell_numbers_of_bacteria.config +6 -0
  341. data/lib/bioroebe/gui/gtk3/calculate_cell_numbers_of_bacteria/calculate_cell_numbers_of_bacteria.rb +106 -0
  342. data/lib/bioroebe/gui/gtk3/controller/controller.rb +406 -0
  343. data/lib/bioroebe/gui/gtk3/dna_to_aminoacid_widget/dna_to_aminoacid_widget.rb +609 -0
  344. data/lib/bioroebe/gui/gtk3/dna_to_reverse_complement_widget/dna_to_reverse_complement_widget.config +6 -0
  345. data/lib/bioroebe/gui/gtk3/dna_to_reverse_complement_widget/dna_to_reverse_complement_widget.rb +189 -0
  346. data/lib/bioroebe/gui/gtk3/fasta_table_widget/fasta_table_widget.rb +245 -0
  347. data/lib/bioroebe/gui/gtk3/format_converter/format_converter.rb +346 -0
  348. data/lib/bioroebe/gui/gtk3/gene/gene.rb +182 -0
  349. data/lib/bioroebe/gui/gtk3/hamming_distance/hamming_distance.config +6 -0
  350. data/lib/bioroebe/gui/gtk3/hamming_distance/hamming_distance.rb +370 -0
  351. data/lib/bioroebe/gui/gtk3/levensthein_distance/levensthein_distance.config +6 -0
  352. data/lib/bioroebe/gui/gtk3/levensthein_distance/levensthein_distance.rb +175 -0
  353. data/lib/bioroebe/gui/gtk3/nucleotide_analyser/nucleotide_analyser.rb +428 -0
  354. data/lib/bioroebe/gui/gtk3/parse_pdb_file/parse_pdb_file.rb +342 -0
  355. data/lib/bioroebe/gui/gtk3/primer_design_widget/primer_design_widget.rb +580 -0
  356. data/lib/bioroebe/gui/gtk3/protein_to_DNA/protein_to_DNA.config +6 -0
  357. data/lib/bioroebe/gui/gtk3/protein_to_DNA/protein_to_DNA.rb +182 -0
  358. data/lib/bioroebe/gui/gtk3/random_sequence/random_sequence.rb +566 -0
  359. data/lib/bioroebe/gui/gtk3/restriction_enzymes/restriction_enzymes.config +6 -0
  360. data/lib/bioroebe/gui/gtk3/restriction_enzymes/restriction_enzymes.rb +329 -0
  361. data/lib/bioroebe/gui/gtk3/show_codon_table/misc.rb +556 -0
  362. data/lib/bioroebe/gui/gtk3/show_codon_table/show_codon_table.rb +171 -0
  363. data/lib/bioroebe/gui/gtk3/show_codon_usage/show_codon_usage.rb +146 -0
  364. data/lib/bioroebe/gui/gtk3/sizeseq/sizeseq.rb +207 -0
  365. data/lib/bioroebe/gui/gtk3/three_to_one/three_to_one.rb +279 -0
  366. data/lib/bioroebe/gui/gtk3/three_to_one/title.rb +23 -0
  367. data/lib/bioroebe/gui/gtk3/www_finder/www_finder.config +6 -0
  368. data/lib/bioroebe/gui/gtk3/www_finder/www_finder.rb +368 -0
  369. data/lib/bioroebe/gui/jruby/blosum_matrix_viewer/blosum_matrix_viewer.rb +82 -0
  370. data/lib/bioroebe/gui/libui/README.md +4 -0
  371. data/lib/bioroebe/gui/libui/alignment/alignment.rb +114 -0
  372. data/lib/bioroebe/gui/libui/blosum_matrix_viewer/blosum_matrix_viewer.rb +112 -0
  373. data/lib/bioroebe/gui/libui/calculate_cell_numbers_of_bacteria/calculate_cell_numbers_of_bacteria.rb +60 -0
  374. data/lib/bioroebe/gui/libui/dna_to_aminoacid_widget/dna_to_aminoacid_widget.rb +161 -0
  375. data/lib/bioroebe/gui/libui/dna_to_reverse_complement_widget/dna_to_reverse_complement_widget.rb +76 -0
  376. data/lib/bioroebe/gui/libui/hamming_distance/hamming_distance.rb +135 -0
  377. data/lib/bioroebe/gui/libui/levensthein_distance/levensthein_distance.rb +118 -0
  378. data/lib/bioroebe/gui/libui/protein_to_DNA/protein_to_DNA.rb +115 -0
  379. data/lib/bioroebe/gui/libui/random_sequence/random_sequence.rb +174 -0
  380. data/lib/bioroebe/gui/libui/show_codon_table/show_codon_table.rb +132 -0
  381. data/lib/bioroebe/gui/libui/show_codon_usage/show_codon_usage.rb +89 -0
  382. data/lib/bioroebe/gui/libui/three_to_one/three_to_one.rb +111 -0
  383. data/lib/bioroebe/gui/shared_code/blosum_matrix_viewer/blosum_matrix_viewer_module.rb +42 -0
  384. data/lib/bioroebe/gui/shared_code/calculate_cell_numbers_of_bacteria/calculate_cell_numbers_of_bacteria_module.rb +216 -0
  385. data/lib/bioroebe/gui/shared_code/dna_to_reverse_complement_widget/dna_to_reverse_complement_widget_module.rb +284 -0
  386. data/lib/bioroebe/gui/shared_code/levensthein_distance/levensthein_distance_module.rb +402 -0
  387. data/lib/bioroebe/gui/shared_code/protein_to_DNA/protein_to_DNA_module.rb +192 -0
  388. data/lib/bioroebe/gui/shared_code/show_codon_table/show_codon_table_module.rb +72 -0
  389. data/lib/bioroebe/gui/shared_code/show_codon_usage/show_codon_usage_module.rb +213 -0
  390. data/lib/bioroebe/gui/tk/aminoacid_composition/aminoacid_composition.rb +206 -0
  391. data/lib/bioroebe/gui/tk/blosum_matrix_viewer/blosum_matrix_viewer.rb +140 -0
  392. data/lib/bioroebe/gui/tk/hamming_distance/hamming_distance.rb +262 -0
  393. data/lib/bioroebe/gui/tk/levensthein_distance/levensthein_distance.rb +243 -0
  394. data/lib/bioroebe/gui/tk/three_to_one/three_to_one.rb +199 -0
  395. data/lib/bioroebe/gui/unified_widgets/anti_sense_strand/anti_sense_strand.rb +519 -0
  396. data/lib/bioroebe/images/BIOROEBE.png +0 -0
  397. data/lib/bioroebe/images/BIOROEBE_NEW_LOGO.png +0 -0
  398. data/lib/bioroebe/images/BlosumMatrixViewer.png +0 -0
  399. data/lib/bioroebe/images/DnaToAminoacidWidget.png +0 -0
  400. data/lib/bioroebe/images/PRINTED_AMINOACID_TABLE.png +0 -0
  401. data/lib/bioroebe/images/class_ConvertAminoacidToDNA.png +0 -0
  402. data/lib/bioroebe/images/class_SimpleStringComparer.png +0 -0
  403. data/lib/bioroebe/images/example_of_FASTA_coloured_output.png +0 -0
  404. data/lib/bioroebe/images/libui_hamming_distance_widget.png +0 -0
  405. data/lib/bioroebe/images/pretty_DNA_picture.png +0 -0
  406. data/lib/bioroebe/images/primer_design_widget.png +0 -0
  407. data/lib/bioroebe/images/restriction_enzyme_commandline_result.png +0 -0
  408. data/lib/bioroebe/images/ruby-gtk_three_to_one_widget.png +0 -0
  409. data/lib/bioroebe/images/small_DNA_logo.png +0 -0
  410. data/lib/bioroebe/images/small_drosophila_image.png +0 -0
  411. data/lib/bioroebe/java/README.md +6 -0
  412. data/lib/bioroebe/java/bioroebe/AllInOne.class +0 -0
  413. data/lib/bioroebe/java/bioroebe/AllInOne.java +214 -0
  414. data/lib/bioroebe/java/bioroebe/Base.class +0 -0
  415. data/lib/bioroebe/java/bioroebe/Base.java +102 -0
  416. data/lib/bioroebe/java/bioroebe/BisulfiteTreatment.class +0 -0
  417. data/lib/bioroebe/java/bioroebe/BisulfiteTreatment.java +23 -0
  418. data/lib/bioroebe/java/bioroebe/Cat.class +0 -0
  419. data/lib/bioroebe/java/bioroebe/Codons.class +0 -0
  420. data/lib/bioroebe/java/bioroebe/Codons.java +22 -0
  421. data/lib/bioroebe/java/bioroebe/Esystem.class +0 -0
  422. data/lib/bioroebe/java/bioroebe/Esystem.java +47 -0
  423. data/lib/bioroebe/java/bioroebe/GUI/BaseFrame.class +0 -0
  424. data/lib/bioroebe/java/bioroebe/GUI/BaseFrame.java +65 -0
  425. data/lib/bioroebe/java/bioroebe/GenerateRandomDnaSequence.class +0 -0
  426. data/lib/bioroebe/java/bioroebe/GenerateRandomDnaSequence.java +32 -0
  427. data/lib/bioroebe/java/bioroebe/IsPalindrome.class +0 -0
  428. data/lib/bioroebe/java/bioroebe/IsPalindrome.java +18 -0
  429. data/lib/bioroebe/java/bioroebe/PartnerNucleotide.class +0 -0
  430. data/lib/bioroebe/java/bioroebe/PartnerNucleotide.java +19 -0
  431. data/lib/bioroebe/java/bioroebe/README.md +4 -0
  432. data/lib/bioroebe/java/bioroebe/RemoveFile.class +0 -0
  433. data/lib/bioroebe/java/bioroebe/RemoveFile.java +24 -0
  434. data/lib/bioroebe/java/bioroebe/RemoveNumbers.class +0 -0
  435. data/lib/bioroebe/java/bioroebe/RemoveNumbers.java +14 -0
  436. data/lib/bioroebe/java/bioroebe/SanitizeNucleotideSequence.class +0 -0
  437. data/lib/bioroebe/java/bioroebe/SanitizeNucleotideSequence.java +20 -0
  438. data/lib/bioroebe/java/bioroebe/SaveFile.java +44 -0
  439. data/lib/bioroebe/java/bioroebe/Sequence.java +28 -0
  440. data/lib/bioroebe/java/bioroebe/ToCamelcase.class +0 -0
  441. data/lib/bioroebe/java/bioroebe/ToCamelcase.java +32 -0
  442. data/lib/bioroebe/java/bioroebe/ToplevelMethods.class +0 -0
  443. data/lib/bioroebe/java/bioroebe/ToplevelMethods.java +15 -0
  444. data/lib/bioroebe/java/bioroebe/enums/DNA.java +6 -0
  445. data/lib/bioroebe/java/bioroebe.jar +0 -0
  446. data/lib/bioroebe/matplotlib/matplotlib_generator.rb +104 -0
  447. data/lib/bioroebe/misc/quiz/README.md +6 -0
  448. data/lib/bioroebe/misc/quiz/three_letter_to_aminoacid.rb +163 -0
  449. data/lib/bioroebe/misc/ruler.rb +244 -0
  450. data/lib/bioroebe/misc/useful_formulas.rb +129 -0
  451. data/lib/bioroebe/ncbi/efetch.rb +253 -0
  452. data/lib/bioroebe/ncbi/ncbi.rb +93 -0
  453. data/lib/bioroebe/ngs/README.md +2 -0
  454. data/lib/bioroebe/ngs/phred_quality_score_table.rb +123 -0
  455. data/lib/bioroebe/nucleotides/complementary_dna_strand.rb +166 -0
  456. data/lib/bioroebe/nucleotides/molecular_weight_of_nucleotides.rb +135 -0
  457. data/lib/bioroebe/nucleotides/most_likely_nucleotide_sequence_for_this_aminoacid_sequence.rb +198 -0
  458. data/lib/bioroebe/nucleotides/sanitize_nucleotide_sequence.rb +133 -0
  459. data/lib/bioroebe/nucleotides/show_nucleotide_sequence.rb +556 -0
  460. data/lib/bioroebe/palindromes/palindrome_2D_structure.rb +139 -0
  461. data/lib/bioroebe/palindromes/palindrome_finder.rb +208 -0
  462. data/lib/bioroebe/palindromes/palindrome_generator.rb +272 -0
  463. data/lib/bioroebe/parsers/biolang_parser.rb +156 -0
  464. data/lib/bioroebe/parsers/blosum_parser.rb +222 -0
  465. data/lib/bioroebe/parsers/genbank_parser.rb +78 -0
  466. data/lib/bioroebe/parsers/gff.rb +346 -0
  467. data/lib/bioroebe/parsers/parse_embl.rb +76 -0
  468. data/lib/bioroebe/parsers/stride_parser.rb +117 -0
  469. data/lib/bioroebe/patterns/README.md +5 -0
  470. data/lib/bioroebe/patterns/analyse_glycosylation_pattern.rb +149 -0
  471. data/lib/bioroebe/patterns/is_this_sequence_a_EGF2_pattern.rb +66 -0
  472. data/lib/bioroebe/patterns/profile_pattern.rb +182 -0
  473. data/lib/bioroebe/patterns/rgg_scanner.rb +160 -0
  474. data/lib/bioroebe/patterns/scan_for_repeat.rb +157 -0
  475. data/lib/bioroebe/pdb/download_this_pdb.rb +67 -0
  476. data/lib/bioroebe/pdb/fetch_fasta_sequence_from_pdb.rb +164 -0
  477. data/lib/bioroebe/pdb/parse_mmCIF_file.rb +63 -0
  478. data/lib/bioroebe/pdb/parse_pdb_file.rb +1086 -0
  479. data/lib/bioroebe/pdb/report_secondary_structures_from_this_pdb_file.rb +225 -0
  480. data/lib/bioroebe/perl/README.md +7 -0
  481. data/lib/bioroebe/perl/local_to_global.pl +694 -0
  482. data/lib/bioroebe/project/project.rb +264 -0
  483. data/lib/bioroebe/protein_structure/alpha_helix.rb +96 -0
  484. data/lib/bioroebe/protein_structure/helical_wheel.rb +205 -0
  485. data/lib/bioroebe/raw_sequence/README.md +17 -0
  486. data/lib/bioroebe/raw_sequence/raw_sequence.rb +557 -0
  487. data/lib/bioroebe/readline/README.md +2 -0
  488. data/lib/bioroebe/readline/readline.rb +31 -0
  489. data/lib/bioroebe/regexes/README.md +2 -0
  490. data/lib/bioroebe/regexes/regexes.rb +34 -0
  491. data/lib/bioroebe/requires/commandline_application.rb +5 -0
  492. data/lib/bioroebe/requires/require_all_aminoacids_files.rb +28 -0
  493. data/lib/bioroebe/requires/require_all_calculate_files.rb +26 -0
  494. data/lib/bioroebe/requires/require_all_codon_files.rb +26 -0
  495. data/lib/bioroebe/requires/require_all_colour_scheme_files.rb +26 -0
  496. data/lib/bioroebe/requires/require_all_count_files.rb +26 -0
  497. data/lib/bioroebe/requires/require_all_dotplot_files.rb +28 -0
  498. data/lib/bioroebe/requires/require_all_electron_microscopy_files.rb +26 -0
  499. data/lib/bioroebe/requires/require_all_enzymes_files.rb +28 -0
  500. data/lib/bioroebe/requires/require_all_fasta_and_fastq_files.rb +32 -0
  501. data/lib/bioroebe/requires/require_all_nucleotides_files.rb +28 -0
  502. data/lib/bioroebe/requires/require_all_palindromes_files.rb +29 -0
  503. data/lib/bioroebe/requires/require_all_parser_files.rb +28 -0
  504. data/lib/bioroebe/requires/require_all_pattern_files.rb +29 -0
  505. data/lib/bioroebe/requires/require_all_pdb_files.rb +26 -0
  506. data/lib/bioroebe/requires/require_all_sequence_files.rb +26 -0
  507. data/lib/bioroebe/requires/require_all_string_matching_files.rb +28 -0
  508. data/lib/bioroebe/requires/require_all_svg_files.rb +12 -0
  509. data/lib/bioroebe/requires/require_all_taxonomy_files.rb +35 -0
  510. data/lib/bioroebe/requires/require_all_utility_scripts_files.rb +32 -0
  511. data/lib/bioroebe/requires/require_cleave_and_digest.rb +24 -0
  512. data/lib/bioroebe/requires/require_colours.rb +20 -0
  513. data/lib/bioroebe/requires/require_encoding.rb +7 -0
  514. data/lib/bioroebe/requires/require_sequence.rb +7 -0
  515. data/lib/bioroebe/requires/require_the_bioroebe_project.rb +162 -0
  516. data/lib/bioroebe/requires/require_the_bioroebe_shell.rb +7 -0
  517. data/lib/bioroebe/requires/require_the_bioroebe_sinatra_components.rb +7 -0
  518. data/lib/bioroebe/requires/require_the_constants.rb +23 -0
  519. data/lib/bioroebe/requires/require_the_toplevel_methods.rb +29 -0
  520. data/lib/bioroebe/requires/require_yaml.rb +94 -0
  521. data/lib/bioroebe/sequence/alignment.rb +214 -0
  522. data/lib/bioroebe/sequence/dna.rb +211 -0
  523. data/lib/bioroebe/sequence/nucleotide_module/nucleotide_module.rb +404 -0
  524. data/lib/bioroebe/sequence/protein.rb +281 -0
  525. data/lib/bioroebe/sequence/reverse_complement.rb +148 -0
  526. data/lib/bioroebe/sequence/sequence.rb +706 -0
  527. data/lib/bioroebe/shell/add.rb +108 -0
  528. data/lib/bioroebe/shell/assign.rb +360 -0
  529. data/lib/bioroebe/shell/chop_and_cut.rb +281 -0
  530. data/lib/bioroebe/shell/colours/colours.rb +235 -0
  531. data/lib/bioroebe/shell/configuration/additionally_set_xorg_buffer.yml +1 -0
  532. data/lib/bioroebe/shell/configuration/may_we_show_the_startup_information.yml +1 -0
  533. data/lib/bioroebe/shell/configuration/upcase_nucleotides.yml +1 -0
  534. data/lib/bioroebe/shell/configuration/use_silent_startup.yml +1 -0
  535. data/lib/bioroebe/shell/constants.rb +166 -0
  536. data/lib/bioroebe/shell/download.rb +335 -0
  537. data/lib/bioroebe/shell/enable_and_disable.rb +158 -0
  538. data/lib/bioroebe/shell/enzymes.rb +310 -0
  539. data/lib/bioroebe/shell/fasta.rb +345 -0
  540. data/lib/bioroebe/shell/gtk.rb +76 -0
  541. data/lib/bioroebe/shell/help/class.rb +443 -0
  542. data/lib/bioroebe/shell/help/help.rb +25 -0
  543. data/lib/bioroebe/shell/history.rb +132 -0
  544. data/lib/bioroebe/shell/initialize.rb +217 -0
  545. data/lib/bioroebe/shell/loop.rb +74 -0
  546. data/lib/bioroebe/shell/menu.rb +5320 -0
  547. data/lib/bioroebe/shell/misc.rb +4341 -0
  548. data/lib/bioroebe/shell/prompt.rb +107 -0
  549. data/lib/bioroebe/shell/random.rb +289 -0
  550. data/lib/bioroebe/shell/readline/readline.rb +91 -0
  551. data/lib/bioroebe/shell/reset.rb +335 -0
  552. data/lib/bioroebe/shell/scan_and_parse.rb +135 -0
  553. data/lib/bioroebe/shell/search.rb +337 -0
  554. data/lib/bioroebe/shell/sequences.rb +200 -0
  555. data/lib/bioroebe/shell/shell.rb +41 -0
  556. data/lib/bioroebe/shell/show_report_and_display.rb +2901 -0
  557. data/lib/bioroebe/shell/startup.rb +127 -0
  558. data/lib/bioroebe/shell/taxonomy.rb +14 -0
  559. data/lib/bioroebe/shell/tk.rb +23 -0
  560. data/lib/bioroebe/shell/user_input.rb +88 -0
  561. data/lib/bioroebe/shell/xorg.rb +45 -0
  562. data/lib/bioroebe/siRNA/README.md +2 -0
  563. data/lib/bioroebe/siRNA/siRNA.rb +93 -0
  564. data/lib/bioroebe/string_matching/README.md +13 -0
  565. data/lib/bioroebe/string_matching/find_longest_substring.rb +162 -0
  566. data/lib/bioroebe/string_matching/find_longest_substring_via_LCS_algorithm.rb +175 -0
  567. data/lib/bioroebe/string_matching/hamming_distance.rb +313 -0
  568. data/lib/bioroebe/string_matching/levensthein.rb +698 -0
  569. data/lib/bioroebe/string_matching/simple_string_comparer.rb +294 -0
  570. data/lib/bioroebe/string_matching/smith_waterman.rb +276 -0
  571. data/lib/bioroebe/svg/README.md +1 -0
  572. data/lib/bioroebe/svg/glyph.rb +719 -0
  573. data/lib/bioroebe/svg/mini_feature.rb +111 -0
  574. data/lib/bioroebe/svg/page.rb +570 -0
  575. data/lib/bioroebe/svg/primitive.rb +70 -0
  576. data/lib/bioroebe/svg/svgee.rb +326 -0
  577. data/lib/bioroebe/svg/track.rb +263 -0
  578. data/lib/bioroebe/taxonomy/README.md +1 -0
  579. data/lib/bioroebe/taxonomy/chart.rb +95 -0
  580. data/lib/bioroebe/taxonomy/class_methods.rb +181 -0
  581. data/lib/bioroebe/taxonomy/colours.rb +26 -0
  582. data/lib/bioroebe/taxonomy/constants.rb +218 -0
  583. data/lib/bioroebe/taxonomy/edit.rb +97 -0
  584. data/lib/bioroebe/taxonomy/help/help.rb +65 -0
  585. data/lib/bioroebe/taxonomy/help/helpline.rb +53 -0
  586. data/lib/bioroebe/taxonomy/info/check_available.rb +143 -0
  587. data/lib/bioroebe/taxonomy/info/info.rb +337 -0
  588. data/lib/bioroebe/taxonomy/info/is_dna.rb +150 -0
  589. data/lib/bioroebe/taxonomy/interactive.rb +1933 -0
  590. data/lib/bioroebe/taxonomy/menu.rb +905 -0
  591. data/lib/bioroebe/taxonomy/node.rb +118 -0
  592. data/lib/bioroebe/taxonomy/parse_fasta.rb +383 -0
  593. data/lib/bioroebe/taxonomy/shared.rb +287 -0
  594. data/lib/bioroebe/taxonomy/taxonomy.rb +521 -0
  595. data/lib/bioroebe/toplevel_methods/ad_hoc_task.rb +56 -0
  596. data/lib/bioroebe/toplevel_methods/aminoacids_and_proteins.rb +715 -0
  597. data/lib/bioroebe/toplevel_methods/atomic_composition.rb +198 -0
  598. data/lib/bioroebe/toplevel_methods/base_composition.rb +121 -0
  599. data/lib/bioroebe/toplevel_methods/blast.rb +153 -0
  600. data/lib/bioroebe/toplevel_methods/calculate_n50_value.rb +57 -0
  601. data/lib/bioroebe/toplevel_methods/cat.rb +71 -0
  602. data/lib/bioroebe/toplevel_methods/chunked_display.rb +92 -0
  603. data/lib/bioroebe/toplevel_methods/cliner.rb +81 -0
  604. data/lib/bioroebe/toplevel_methods/complement.rb +58 -0
  605. data/lib/bioroebe/toplevel_methods/convert_global_env.rb +39 -0
  606. data/lib/bioroebe/toplevel_methods/databases.rb +73 -0
  607. data/lib/bioroebe/toplevel_methods/delimiter.rb +19 -0
  608. data/lib/bioroebe/toplevel_methods/digest.rb +71 -0
  609. data/lib/bioroebe/toplevel_methods/download_and_fetch_data.rb +146 -0
  610. data/lib/bioroebe/toplevel_methods/e.rb +20 -0
  611. data/lib/bioroebe/toplevel_methods/editor.rb +21 -0
  612. data/lib/bioroebe/toplevel_methods/esystem.rb +22 -0
  613. data/lib/bioroebe/toplevel_methods/exponential_growth.rb +74 -0
  614. data/lib/bioroebe/toplevel_methods/extract.rb +56 -0
  615. data/lib/bioroebe/toplevel_methods/fasta_and_fastq.rb +353 -0
  616. data/lib/bioroebe/toplevel_methods/file_and_directory_related_actions.rb +257 -0
  617. data/lib/bioroebe/toplevel_methods/frequencies.rb +92 -0
  618. data/lib/bioroebe/toplevel_methods/hamming_distance.rb +60 -0
  619. data/lib/bioroebe/toplevel_methods/infer.rb +66 -0
  620. data/lib/bioroebe/toplevel_methods/is_on_roebe.rb +39 -0
  621. data/lib/bioroebe/toplevel_methods/leading_five_prime_and_trailing_three_prime.rb +101 -0
  622. data/lib/bioroebe/toplevel_methods/levensthein.rb +63 -0
  623. data/lib/bioroebe/toplevel_methods/log_directory.rb +109 -0
  624. data/lib/bioroebe/toplevel_methods/longest_common_substring.rb +55 -0
  625. data/lib/bioroebe/toplevel_methods/map_ncbi_entry_to_eutils_id.rb +88 -0
  626. data/lib/bioroebe/toplevel_methods/matches.rb +259 -0
  627. data/lib/bioroebe/toplevel_methods/misc.rb +432 -0
  628. data/lib/bioroebe/toplevel_methods/nucleotides.rb +715 -0
  629. data/lib/bioroebe/toplevel_methods/number_of_clones.rb +63 -0
  630. data/lib/bioroebe/toplevel_methods/open_in_browser.rb +77 -0
  631. data/lib/bioroebe/toplevel_methods/open_reading_frames.rb +236 -0
  632. data/lib/bioroebe/toplevel_methods/opn.rb +34 -0
  633. data/lib/bioroebe/toplevel_methods/palindromes.rb +127 -0
  634. data/lib/bioroebe/toplevel_methods/parse.rb +59 -0
  635. data/lib/bioroebe/toplevel_methods/phred_error_probability.rb +68 -0
  636. data/lib/bioroebe/toplevel_methods/rds.rb +24 -0
  637. data/lib/bioroebe/toplevel_methods/remove.rb +86 -0
  638. data/lib/bioroebe/toplevel_methods/return_source_code_of_this_method.rb +35 -0
  639. data/lib/bioroebe/toplevel_methods/return_subsequence_based_on_indices.rb +68 -0
  640. data/lib/bioroebe/toplevel_methods/rna_splicing.rb +73 -0
  641. data/lib/bioroebe/toplevel_methods/rnalfold.rb +69 -0
  642. data/lib/bioroebe/toplevel_methods/searching_and_finding.rb +116 -0
  643. data/lib/bioroebe/toplevel_methods/shuffleseq.rb +37 -0
  644. data/lib/bioroebe/toplevel_methods/statistics.rb +53 -0
  645. data/lib/bioroebe/toplevel_methods/sum_of_odd_integers.rb +62 -0
  646. data/lib/bioroebe/toplevel_methods/taxonomy.rb +187 -0
  647. data/lib/bioroebe/toplevel_methods/three_delimiter.rb +34 -0
  648. data/lib/bioroebe/toplevel_methods/time_and_date.rb +53 -0
  649. data/lib/bioroebe/toplevel_methods/to_camelcase.rb +26 -0
  650. data/lib/bioroebe/toplevel_methods/truncate.rb +48 -0
  651. data/lib/bioroebe/toplevel_methods/url.rb +36 -0
  652. data/lib/bioroebe/toplevel_methods/verbose.rb +59 -0
  653. data/lib/bioroebe/utility_scripts/align_open_reading_frames.rb +191 -0
  654. data/lib/bioroebe/utility_scripts/analyse_local_dataset.rb +119 -0
  655. data/lib/bioroebe/utility_scripts/check_for_mismatches/check_for_mismatches.rb +230 -0
  656. data/lib/bioroebe/utility_scripts/compacter.rb +131 -0
  657. data/lib/bioroebe/utility_scripts/compseq/compseq.rb +529 -0
  658. data/lib/bioroebe/utility_scripts/consensus_sequence.rb +374 -0
  659. data/lib/bioroebe/utility_scripts/create_batch_entrez_file.rb +130 -0
  660. data/lib/bioroebe/utility_scripts/determine_antigenic_areas.rb +115 -0
  661. data/lib/bioroebe/utility_scripts/determine_missing_nucleotides_percentage.rb +137 -0
  662. data/lib/bioroebe/utility_scripts/display_open_reading_frames/determine.rb +73 -0
  663. data/lib/bioroebe/utility_scripts/display_open_reading_frames/display_open_reading_frames.rb +31 -0
  664. data/lib/bioroebe/utility_scripts/display_open_reading_frames/initialize.rb +37 -0
  665. data/lib/bioroebe/utility_scripts/display_open_reading_frames/menu.rb +49 -0
  666. data/lib/bioroebe/utility_scripts/display_open_reading_frames/misc.rb +471 -0
  667. data/lib/bioroebe/utility_scripts/display_open_reading_frames/report.rb +113 -0
  668. data/lib/bioroebe/utility_scripts/display_open_reading_frames/reset.rb +56 -0
  669. data/lib/bioroebe/utility_scripts/dot_alignment.rb +177 -0
  670. data/lib/bioroebe/utility_scripts/download_files_from_rebase.rb +72 -0
  671. data/lib/bioroebe/utility_scripts/find_gene.rb +202 -0
  672. data/lib/bioroebe/utility_scripts/mirror_repeat.rb +235 -0
  673. data/lib/bioroebe/utility_scripts/move_file_to_its_correct_location.rb +151 -0
  674. data/lib/bioroebe/utility_scripts/parse_taxonomy.rb +168 -0
  675. data/lib/bioroebe/utility_scripts/pathways.rb +152 -0
  676. data/lib/bioroebe/utility_scripts/permutations.rb +145 -0
  677. data/lib/bioroebe/utility_scripts/punnet/punnet.rb +126 -0
  678. data/lib/bioroebe/utility_scripts/show_this_dna_sequence.rb +148 -0
  679. data/lib/bioroebe/utility_scripts/showorf/constants.rb +36 -0
  680. data/lib/bioroebe/utility_scripts/showorf/help.rb +33 -0
  681. data/lib/bioroebe/utility_scripts/showorf/initialize.rb +52 -0
  682. data/lib/bioroebe/utility_scripts/showorf/menu.rb +68 -0
  683. data/lib/bioroebe/utility_scripts/showorf/reset.rb +39 -0
  684. data/lib/bioroebe/utility_scripts/showorf/run.rb +152 -0
  685. data/lib/bioroebe/utility_scripts/showorf/show.rb +97 -0
  686. data/lib/bioroebe/utility_scripts/showorf/showorf.rb +488 -0
  687. data/lib/bioroebe/version/version.rb +44 -0
  688. data/lib/bioroebe/viennarna/README.md +3 -0
  689. data/lib/bioroebe/viennarna/rnafold_wrapper.rb +196 -0
  690. data/lib/bioroebe/with_gui.rb +18 -0
  691. data/lib/bioroebe/www/bioroebe.cgi +44 -0
  692. data/lib/bioroebe/www/embeddable_interface.rb +686 -0
  693. data/lib/bioroebe/www/sinatra/sinatra.rb +1013 -0
  694. data/lib/bioroebe/yaml/agarose/agarose_concentrations.yml +21 -0
  695. data/lib/bioroebe/yaml/aminoacids/amino_acids.yml +92 -0
  696. data/lib/bioroebe/yaml/aminoacids/amino_acids_abbreviations.yml +31 -0
  697. data/lib/bioroebe/yaml/aminoacids/amino_acids_average_mass_table.yml +33 -0
  698. data/lib/bioroebe/yaml/aminoacids/amino_acids_classification.yml +18 -0
  699. data/lib/bioroebe/yaml/aminoacids/amino_acids_english.yml +26 -0
  700. data/lib/bioroebe/yaml/aminoacids/amino_acids_frequency.yml +44 -0
  701. data/lib/bioroebe/yaml/aminoacids/amino_acids_long_name_to_one_letter.yml +61 -0
  702. data/lib/bioroebe/yaml/aminoacids/amino_acids_molecular_formula.yml +32 -0
  703. data/lib/bioroebe/yaml/aminoacids/amino_acids_monoisotopic_mass_table.yml +38 -0
  704. data/lib/bioroebe/yaml/aminoacids/amino_acids_reste.yml +35 -0
  705. data/lib/bioroebe/yaml/aminoacids/amino_acids_three_to_one.yml +34 -0
  706. data/lib/bioroebe/yaml/aminoacids/hydropathy_table.yml +44 -0
  707. data/lib/bioroebe/yaml/aminoacids/molecular_weight.yml +29 -0
  708. data/lib/bioroebe/yaml/aminoacids/simple_aminoacids.yml +66 -0
  709. data/lib/bioroebe/yaml/aminoacids/weight_of_common_proteins.yml +33 -0
  710. data/lib/bioroebe/yaml/antisense/antisense.yml +9 -0
  711. data/lib/bioroebe/yaml/base_composition_of_dna.yml +37 -0
  712. data/lib/bioroebe/yaml/blosum/blosum45.yml +36 -0
  713. data/lib/bioroebe/yaml/blosum/blosum50.yml +34 -0
  714. data/lib/bioroebe/yaml/blosum/blosum62.yml +35 -0
  715. data/lib/bioroebe/yaml/blosum/blosum80.yml +37 -0
  716. data/lib/bioroebe/yaml/blosum/blosum90.yml +36 -0
  717. data/lib/bioroebe/yaml/blosum/blosum_matrix.yml +200 -0
  718. data/lib/bioroebe/yaml/chromosomes/chromosome_numbers.yml +30 -0
  719. data/lib/bioroebe/yaml/codon_tables/1.yml +113 -0
  720. data/lib/bioroebe/yaml/codon_tables/10.yml +89 -0
  721. data/lib/bioroebe/yaml/codon_tables/11.yml +91 -0
  722. data/lib/bioroebe/yaml/codon_tables/12.yml +89 -0
  723. data/lib/bioroebe/yaml/codon_tables/13.yml +89 -0
  724. data/lib/bioroebe/yaml/codon_tables/14.yml +89 -0
  725. data/lib/bioroebe/yaml/codon_tables/15.yml +94 -0
  726. data/lib/bioroebe/yaml/codon_tables/16.yml +89 -0
  727. data/lib/bioroebe/yaml/codon_tables/2.yml +86 -0
  728. data/lib/bioroebe/yaml/codon_tables/21.yml +89 -0
  729. data/lib/bioroebe/yaml/codon_tables/22.yml +89 -0
  730. data/lib/bioroebe/yaml/codon_tables/23.yml +91 -0
  731. data/lib/bioroebe/yaml/codon_tables/24.yml +89 -0
  732. data/lib/bioroebe/yaml/codon_tables/25.yml +89 -0
  733. data/lib/bioroebe/yaml/codon_tables/26.yml +96 -0
  734. data/lib/bioroebe/yaml/codon_tables/27.yml +104 -0
  735. data/lib/bioroebe/yaml/codon_tables/28.yml +97 -0
  736. data/lib/bioroebe/yaml/codon_tables/29.yml +93 -0
  737. data/lib/bioroebe/yaml/codon_tables/3.yml +98 -0
  738. data/lib/bioroebe/yaml/codon_tables/30.yml +91 -0
  739. data/lib/bioroebe/yaml/codon_tables/31.yml +94 -0
  740. data/lib/bioroebe/yaml/codon_tables/33.yml +101 -0
  741. data/lib/bioroebe/yaml/codon_tables/4.yml +96 -0
  742. data/lib/bioroebe/yaml/codon_tables/5.yml +100 -0
  743. data/lib/bioroebe/yaml/codon_tables/6.yml +96 -0
  744. data/lib/bioroebe/yaml/codon_tables/9.yml +97 -0
  745. data/lib/bioroebe/yaml/codon_tables/overview.yml +42 -0
  746. data/lib/bioroebe/yaml/configuration/README.md +12 -0
  747. data/lib/bioroebe/yaml/configuration/browser.yml +1 -0
  748. data/lib/bioroebe/yaml/configuration/colourize_fasta_sequences.yml +14 -0
  749. data/lib/bioroebe/yaml/configuration/default_colours_for_the_aminoacids.yml +28 -0
  750. data/lib/bioroebe/yaml/configuration/temp_dir.yml +1 -0
  751. data/lib/bioroebe/yaml/configuration/try_to_use_matplotlib.yml +1 -0
  752. data/lib/bioroebe/yaml/configuration/use_opn.yml +1 -0
  753. data/lib/bioroebe/yaml/configuration/use_this_database.yml +1 -0
  754. data/lib/bioroebe/yaml/create_these_directories_on_startup/create_these_directories_on_startup.yml +9 -0
  755. data/lib/bioroebe/yaml/default_dna_input.yml +3 -0
  756. data/lib/bioroebe/yaml/enzymes/enzyme_classes.yml +15 -0
  757. data/lib/bioroebe/yaml/enzymes/pH-Optima.yml +11 -0
  758. data/lib/bioroebe/yaml/fasta_and_fastq/fastq_quality_schemes.yml +44 -0
  759. data/lib/bioroebe/yaml/genomes/README.md +16 -0
  760. data/lib/bioroebe/yaml/humans/README.md +2 -0
  761. data/lib/bioroebe/yaml/humans/human_chromosomes.yml +53 -0
  762. data/lib/bioroebe/yaml/laboratory/README.md +1 -0
  763. data/lib/bioroebe/yaml/laboratory/pipettes.yml +8 -0
  764. data/lib/bioroebe/yaml/mRNA/mRNA.yml +16 -0
  765. data/lib/bioroebe/yaml/nuclear_localization_sequences.yml +15 -0
  766. data/lib/bioroebe/yaml/nucleotides/abbreviations_for_nucleotides.yml +29 -0
  767. data/lib/bioroebe/yaml/nucleotides/nucleotide_density.yml +10 -0
  768. data/lib/bioroebe/yaml/nucleotides/nucleotides.yml +34 -0
  769. data/lib/bioroebe/yaml/nucleotides/nucleotides_weight.yml +12 -0
  770. data/lib/bioroebe/yaml/pathways/README.md +2 -0
  771. data/lib/bioroebe/yaml/pathways/citric_acid_cycle.yml +16 -0
  772. data/lib/bioroebe/yaml/pathways/glycolysis.yml +20 -0
  773. data/lib/bioroebe/yaml/pathways/shikimate_pathway.yml +23 -0
  774. data/lib/bioroebe/yaml/pathways/urea_cycle.yml +11 -0
  775. data/lib/bioroebe/yaml/primers/README.md +4 -0
  776. data/lib/bioroebe/yaml/primers/primers.yml +3 -0
  777. data/lib/bioroebe/yaml/promoters/35S.yml +15 -0
  778. data/lib/bioroebe/yaml/promoters/strong_promoters.yml +24 -0
  779. data/lib/bioroebe/yaml/proteases/proteases.yml +3 -0
  780. data/lib/bioroebe/yaml/proteins/ubiquitin.yml +4 -0
  781. data/lib/bioroebe/yaml/remote_urls/README.md +4 -0
  782. data/lib/bioroebe/yaml/remote_urls/remote_urls.yml +3 -0
  783. data/lib/bioroebe/yaml/restriction_enzymes/restriction_enzymes.yml +630 -0
  784. data/lib/bioroebe/yaml/sequences/JX472995_Green_fluorescent_protein_from_Aequorea_victoria.fasta +14 -0
  785. data/lib/bioroebe/yaml/sequences/README.md +2 -0
  786. data/lib/bioroebe/yaml/talens.yml +22 -0
  787. data/lib/bioroebe/yaml/viruses/ecoli_phages.yml +63 -0
  788. data/lib/bioroebe/yaml/viruses/viruses.yml +6 -0
  789. data/lib/bioroebe.rb +5 -0
  790. data/spec/testing_toplevel_method_editor.rb +20 -0
  791. data/spec/testing_toplevel_method_url.rb +15 -0
  792. data/spec/testing_toplevel_method_verbose.rb +13 -0
  793. data/test/advanced_svg_example.rb +307 -0
  794. data/test/testing_bioroebe.rb +25 -0
  795. data/test/testing_codons.rb +45 -0
  796. data/test/testing_dna_to_rna_conversion.rb +15 -0
  797. data/test/testing_parse_pdb_file.rb +23 -0
  798. data/test/testing_reverse_complement.rb +32 -0
  799. data/test/testing_svg_component_of_bioroebe.rb +311 -0
  800. data/test/testing_svg_component_of_bioroebe_from_json_dataset.rb +34 -0
  801. data/test/testing_taxonomy.rb +22 -0
  802. metadata +1059 -0
@@ -0,0 +1,1086 @@
1
+ #!/usr/bin/ruby -w
2
+ # Encoding: UTF-8
3
+ # frozen_string_literal: true
4
+ # =========================================================================== #
5
+ # === Bioroebe::ParsePdbFile
6
+ #
7
+ # This class will parse a .pdb file.
8
+ #
9
+ # The format for a .pdb file is described here:
10
+ #
11
+ # https://www.wwpdb.org/documentation/file-format
12
+ # https://www.cgl.ucsf.edu/chimera/docs/UsersGuide/tutorials/pdbintro.html
13
+ #
14
+ # The following short reference table can be used to get an overview of
15
+ # the main attributes of this format:
16
+ #
17
+ # Record Type Data Provided by Record
18
+ #
19
+ #----------------|-------------------------------------------------------------|
20
+ # ATOM | atomic coordinate record containing the X,Y,Z orthogonal Å |
21
+ # | coordinates for atoms in standard residues (amino acids and |
22
+ # | nucleic acids). Each atom in the coordinate section is |
23
+ # | identified by a sequential number in the entry file. |
24
+ # | ATOM records for proteins are listed from amino to |
25
+ # | carboxyl terminus. |
26
+ #----------------|-------------------------------------------------------------|
27
+ # HETATM | atomic coordinate record containing the X,Y,Z orthogonal Å |
28
+ # | coordinates for atoms in nonstandard residues. Nonstandard |
29
+ # | residues include inhibitors, cofactors, ions, and solvent. |
30
+ # | The only functional difference from ATOM records is that |
31
+ # | HETATM residues are by default not connected to other |
32
+ # | residues. Note that water residues should be in HETATM |
33
+ # | records. |
34
+ #----------------|-------------------------------------------------------------|
35
+ # TER | indicates the end of a chain of residues. For example, a |
36
+ # | hemoglobin molecule consists of four subunit chains that |
37
+ # | are not connected. TER indicates the end of a chain and |
38
+ # | prevents the display of a connection to the next chain. |
39
+ #----------------|-------------------------------------------------------------|
40
+ # HELIX | indicates the location and type (right-handed alpha, etc.) |
41
+ # | of helices. One record per helix. |
42
+ #----------------|-------------------------------------------------------------|
43
+ # SHEET | indicates the location, sense (anti-parallel, etc.) and |
44
+ # | registration with respect to the previous strand in the |
45
+ # | sheet (if any) of each strand in the model. One record |
46
+ # | per strand. |
47
+ # | |
48
+ # | See also: |
49
+ # | |
50
+ # | https://www.wwpdb.org/documentation/file-format-content/format23/sect5.html#SHEET
51
+ # | |
52
+ #----------------|-------------------------------------------------------------|
53
+ # SSBOND | defines disulfide bond linkages between cysteine residues. |
54
+ #----------------|-------------------------------------------------------------|
55
+ # KEYWDS | contains some extra information about the given structure |
56
+ #----------------|-------------------------------------------------------------|
57
+ #
58
+ # Format explained in more detail:
59
+ #
60
+ # COLUMNS DATA TYPE CONTENTS
61
+ # ------------------------------------------------------------------------------
62
+ # 1 - 6 Record name "ATOM "
63
+ # 7 - 11 Integer Atom serial number.
64
+ # 13 - 16 Atom Atom name.
65
+ # 17 Character Alternate location indicator.
66
+ # 18 - 20 Residue name Residue name.
67
+ # 22 Character Chain identifier.
68
+ # 23 - 26 Integer Residue sequence number.
69
+ # 27 AChar Code for insertion of residues.
70
+ # 31 - 38 Real(8.3) Orthogonal coordinates for X in Angstroms.
71
+ # 39 - 46 Real(8.3) Orthogonal coordinates for Y in Angstroms.
72
+ # 47 - 54 Real(8.3) Orthogonal coordinates for Z in Angstroms.
73
+ # 55 - 60 Real(6.2) Occupancy.
74
+ # 61 - 66 Real(6.2) Temperature factor (Default = 0.0).
75
+ # 73 - 76 LString(4) Segment identifier, left-justified.
76
+ # 77 - 78 LString(2) Element symbol, right-justified.
77
+ # 79 - 80 LString(2) Charge on the atom.
78
+ #
79
+ # Example:
80
+ #
81
+ # 1 2 3 4 5 6 7 8
82
+ # 12345678901234567890123456789012345678901234567890123456789012345678901234567890
83
+ # ATOM 145 N VAL A 25 32.433 16.336 57.540 1.00 11.92 A1 N
84
+ # ATOM 146 CA VAL A 25 31.132 16.439 58.160 1.00 11.85 A1 C
85
+ # ATOM 147 C VAL A 25 30.447 15.105 58.363 1.00 12.34 A1 C
86
+ # ATOM 148 O VAL A 25 29.520 15.059 59.174 1.00 15.65 A1 O
87
+ # ATOM 149 CB AVAL A 25 30.385 17.437 57.230 0.28 13.88 A1 C
88
+ # ATOM 150 CB BVAL A 25 30.166 17.399 57.373 0.72 15.41 A1 C
89
+ # ATOM 151 CG1AVAL A 25 28.870 17.401 57.336 0.28 12.64 A1 C
90
+ # ATOM 152 CG1BVAL A 25 30.805 18.788 57.449 0.72 15.11 A1 C
91
+ # ATOM 153 CG2AVAL A 25 30.835 18.826 57.661 0.28 13.58 A1 C
92
+ # ATOM 154 CG2BVAL A 25 29.909 16.996 55.922 0.72 13.25 A1 C
93
+ #
94
+ # Usage example:
95
+ #
96
+ # Bioroebe::ParsePdbFile.new(ARGV)
97
+ #
98
+ # =========================================================================== #
99
+ # If you need a sample .pdb file, have a look here:
100
+ #
101
+ # https://gist.github.com/cstein/6699200
102
+ #
103
+ # For more information about the ATOM entry, have a look here:
104
+ #
105
+ # https://zhanglab.ccmb.med.umich.edu/SSIPe/pdb_atom_format.html#ATOM
106
+ #
107
+ # =========================================================================== #
108
+ # require 'bioroebe/pdb/parse_pdb_file.rb'
109
+ # Bioroebe.return_aminoacid_sequence_from_this_pdb_file
110
+ # =========================================================================== #
111
+ require 'bioroebe/base/commandline_application/commandline_application.rb'
112
+
113
+ module Bioroebe
114
+
115
+ class ParsePdbFile < ::Bioroebe::CommandlineApplication # === Bioroebe::ParsePdbFile
116
+
117
+ # ========================================================================= #
118
+ # === NAMESPACE
119
+ # ========================================================================= #
120
+ NAMESPACE = inspect
121
+
122
+ # ========================================================================= #
123
+ # === DEFAULT_PDB_FILE
124
+ # ========================================================================= #
125
+ DEFAULT_PDB_FILE = "#{::Bioroebe.log_directory?}test.pdb"
126
+
127
+ # ========================================================================= #
128
+ # === initialize
129
+ # ========================================================================= #
130
+ def initialize(
131
+ i = DEFAULT_PDB_FILE,
132
+ run_already = true
133
+ )
134
+ reset
135
+ unless i.is_a? Array
136
+ i = [i].flatten.compact
137
+ end
138
+ set_commandline_arguments(
139
+ return_entries_with_two_leading_hyphens_from(i)
140
+ )
141
+ set_pdb_files(
142
+ return_entries_without_two_leading_hyphens(i)
143
+ )
144
+ # ======================================================================= #
145
+ # === Handle blocks
146
+ # ======================================================================= #
147
+ if block_given?
148
+ yielded = yield
149
+ case yielded
150
+ # ===================================================================== #
151
+ # === :be_silent
152
+ #
153
+ # Invocation example for this entry point:
154
+ #
155
+ # Bioroebe::ParsePdbFile.new(ARGV) { :be_silent }
156
+ #
157
+ # ===================================================================== #
158
+ when :be_silent
159
+ set_be_silent
160
+ end
161
+ end
162
+ case run_already
163
+ when :do_not_run_yet
164
+ run_already = false
165
+ end
166
+ run if run_already
167
+ end
168
+
169
+ # ========================================================================= #
170
+ # === reset (reset tag)
171
+ # ========================================================================= #
172
+ def reset
173
+ super()
174
+ # ======================================================================= #
175
+ # === @namespace
176
+ # ======================================================================= #
177
+ @namespace = NAMESPACE
178
+ # ======================================================================= #
179
+ # === @do_create_a_fasta_file
180
+ #
181
+ # This variable has to exist outside of the method call
182
+ # reset_internal_variables() as it would otherwise
183
+ # ignore every commandline argument passed in by the
184
+ # user.
185
+ # ======================================================================= #
186
+ @do_create_a_fasta_file = false
187
+ # ======================================================================= #
188
+ # === @x_coordinates
189
+ # ======================================================================= #
190
+ @x_coordinates = []
191
+ # ======================================================================= #
192
+ # === @y_coordinates
193
+ # ======================================================================= #
194
+ @y_coordinates = []
195
+ # ======================================================================= #
196
+ # === @z_coordinates
197
+ # ======================================================================= #
198
+ @z_coordinates = []
199
+ # ======================================================================= #
200
+ # All internal variables can be reset through this method.
201
+ # ======================================================================= #
202
+ reset_internal_variables
203
+ end
204
+
205
+ # ========================================================================= #
206
+ # === reset_internal_variables
207
+ # ========================================================================= #
208
+ def reset_internal_variables
209
+ self.header = ''
210
+ self.title = ''
211
+ # ======================================================================= #
212
+ # === @body
213
+ # ======================================================================= #
214
+ @body = nil
215
+ # ======================================================================= #
216
+ # === @taxid
217
+ # ======================================================================= #
218
+ @taxid = nil
219
+ # ======================================================================= #
220
+ # === @n_chains_are_in_this_atom
221
+ # ======================================================================= #
222
+ @n_chains_are_in_this_atom = 0
223
+ # ======================================================================= #
224
+ # === @alpha_helices
225
+ # ======================================================================= #
226
+ @alpha_helices = []
227
+ # ======================================================================= #
228
+ # === @beta_sheets
229
+ # ======================================================================= #
230
+ @beta_sheets = []
231
+ # ======================================================================= #
232
+ # === @does_the_file_exist
233
+ # ======================================================================= #
234
+ @does_the_file_exist = false
235
+ # ======================================================================= #
236
+ # === @name_of_the_species
237
+ # ======================================================================= #
238
+ @name_of_the_species = nil
239
+ # ======================================================================= #
240
+ # === @taxid_of_the_species
241
+ # ======================================================================= #
242
+ @taxid_of_the_species = nil
243
+ # ======================================================================= #
244
+ # === @report_the_aminoacid_sequence
245
+ # ======================================================================= #
246
+ @report_the_aminoacid_sequence = true
247
+ # ======================================================================= #
248
+ # === @keywords
249
+ # ======================================================================= #
250
+ @keywords = nil
251
+ end
252
+
253
+ # ========================================================================= #
254
+ # === keywords?
255
+ # ========================================================================= #
256
+ def keywords?
257
+ @keywords
258
+ end; alias keywords keywords? # === keywords
259
+
260
+ # ========================================================================= #
261
+ # === set_pdb_files
262
+ #
263
+ # We will keep this as an Array.
264
+ # ========================================================================= #
265
+ def set_pdb_files(
266
+ i = DEFAULT_PDB_FILE
267
+ )
268
+ i = [i] unless i.is_a? Array
269
+ i.map! {|entry|
270
+ entry = entry.to_s.dup # To avoid frozen-Strings.
271
+ case entry
272
+ # ===================================================================== #
273
+ # === :1fat
274
+ # ===================================================================== #
275
+ when ':1fat'
276
+ entry = "#{::Bioroebe.project_base_directory?}data/1fat.pdb"
277
+ end
278
+ # ===================================================================== #
279
+ # The user may input a String such as "1NR6", but may not want to
280
+ # input the longer "1NR6.pdb". In that case, if such a .pdb file
281
+ # exists, we will use that as path instead.
282
+ # ===================================================================== #
283
+ if File.exist?("#{entry}.pdb") and !File.exist?(entry)
284
+ entry << '.pdb'
285
+ end
286
+ File.absolute_path(entry) # We require the full local path to the file at hand.
287
+ }
288
+ @pdb_files = i
289
+ end
290
+
291
+ # ========================================================================= #
292
+ # === try_to_determine_the_taxid_from_this_input
293
+ #
294
+ # This method will attempt to determine the taxid entry.
295
+ # ========================================================================= #
296
+ def try_to_determine_the_taxid_from_this_input(i)
297
+ if i.is_a? Array
298
+ # ===================================================================== #
299
+ # We will try to find entries like this:
300
+ #
301
+ # SOURCE 3 ORGANISM_TAXID: 300852;
302
+ #
303
+ # ===================================================================== #
304
+ _ = i.select {|line| line.include? 'ORGANISM_TAXID:' }
305
+ unless _.empty?
306
+ @taxid = _.first.strip.delete(';').split(' ').last
307
+ end
308
+ end
309
+ end
310
+
311
+ # ========================================================================= #
312
+ # === taxid?
313
+ # ========================================================================= #
314
+ def taxid?
315
+ @taxid
316
+ end
317
+
318
+ # ========================================================================= #
319
+ # === body?
320
+ # ========================================================================= #
321
+ def body?
322
+ @body
323
+ end
324
+
325
+ # ========================================================================= #
326
+ # === input_sequence?
327
+ #
328
+ # This will return a String.
329
+ # ========================================================================= #
330
+ def input_sequence?
331
+ _ = @body
332
+ if _.is_a? Array
333
+ _ = _.join(N)
334
+ end
335
+ _
336
+ end
337
+
338
+ # ========================================================================= #
339
+ # === set_body
340
+ #
341
+ # This method keeps track of the main "body" of the .pdb file at hand.
342
+ # ========================================================================= #
343
+ def set_body(i)
344
+ @body = i
345
+ end; alias body= set_body # === body?
346
+
347
+ # ========================================================================= #
348
+ # === set_header
349
+ #
350
+ # The header may have an entry such as:
351
+ #
352
+ # HEADER RIBOSOMAL PROTEIN/RNA 16-APR-10 3IYQ
353
+ #
354
+ # ========================================================================= #
355
+ def set_header(i)
356
+ if i
357
+ if i.is_a? Array
358
+ i = i.first
359
+ return if i.nil? # Can't work with nil-entries.
360
+ end
361
+ i = i.dup if i.frozen?
362
+ # ===================================================================== #
363
+ # We do a bit sanitizing here.
364
+ # ===================================================================== #
365
+ if i.include?('HEADER')
366
+ i.sub!(/HEADER/,'')
367
+ end
368
+ i.strip!
369
+ if i.include? ' '
370
+ i = i.split(' ').map(&:strip).first
371
+ end
372
+ end
373
+ @header = i
374
+ end; alias header= set_header # === header?
375
+
376
+ # ========================================================================= #
377
+ # === string?
378
+ # ========================================================================= #
379
+ def string?
380
+ @body
381
+ end; alias data? string? # === data?
382
+
383
+ # ========================================================================= #
384
+ # === n_atoms?
385
+ #
386
+ # Returns how many ATOM entries we have in this .pdb file.
387
+ # ========================================================================= #
388
+ def n_atoms?(i = data?)
389
+ i.select {|entry| entry.start_with? 'ATOM' }.size if i
390
+ end; alias n_atom_entries? n_atoms? # === n_atom_entries?
391
+
392
+ # ========================================================================= #
393
+ # === title?
394
+ # ========================================================================= #
395
+ def title?
396
+ @title
397
+ end; alias title title? # === title
398
+
399
+ # ========================================================================= #
400
+ # === header?
401
+ # ========================================================================= #
402
+ def header?
403
+ @header
404
+ end; alias header header? # === header
405
+
406
+ # ========================================================================= #
407
+ # === report_n_atoms
408
+ # ========================================================================= #
409
+ def report_n_atoms
410
+ if be_verbose?
411
+ e "#{sfancy(n_atoms?.to_s)}#{rev} ATOM entries were found "\
412
+ "being part of the file at"
413
+ e "`#{sfile(main_file?)}#{rev}`."
414
+ end
415
+ end
416
+
417
+ # ========================================================================= #
418
+ # === readlines_from_this_file
419
+ # ========================================================================= #
420
+ def readlines_from_this_file(file)
421
+ File.readlines(file)
422
+ end
423
+
424
+ # ========================================================================= #
425
+ # === set_header_title_and_body
426
+ #
427
+ # The input to this method should be an Array.
428
+ # ========================================================================= #
429
+ def set_header_title_and_body(dataset)
430
+ set_header(
431
+ dataset.select {|entry| entry.include? 'HEADER' }
432
+ )
433
+ self.title = dataset.select {|entry| entry.include? 'TITLE' }
434
+ set_body(dataset)
435
+ end
436
+
437
+ # ========================================================================= #
438
+ # === try_to_determine_the_alpha_helices_in_this_protein
439
+ # ========================================================================= #
440
+ def try_to_determine_the_alpha_helices_in_this_protein(i)
441
+ if i.is_a? Array
442
+ selection = i.select {|line| line.start_with? 'HELIX ' }
443
+ @alpha_helices = selection
444
+ end
445
+ end
446
+
447
+ # ========================================================================= #
448
+ # === n_alpha_helices?
449
+ # ========================================================================= #
450
+ def n_alpha_helices?
451
+ @alpha_helices.size
452
+ end
453
+
454
+ # ========================================================================= #
455
+ # === consider_reporting_alpha_helices_that_were_found
456
+ # ========================================================================= #
457
+ def consider_reporting_alpha_helices_that_were_found(
458
+ i = @alpha_helices
459
+ )
460
+ unless i.empty?
461
+ if be_verbose?
462
+ erev "This protein contains "\
463
+ "#{slateblue(i.size.to_s)}#{rev} alpha-helices."
464
+ end
465
+ end
466
+ end
467
+
468
+ # ========================================================================= #
469
+ # === consider_reporting_beta_sheet_that_were_found
470
+ # ========================================================================= #
471
+ def consider_reporting_beta_sheet_that_were_found(i = @beta_sheets)
472
+ unless i.empty?
473
+ e "This protein contains #{slateblue(i.size.to_s)}#{rev} beta-sheets."
474
+ end
475
+ end
476
+
477
+ # ========================================================================= #
478
+ # === n_aminoacids?
479
+ # ========================================================================= #
480
+ def n_aminoacids?
481
+ @aminoacid_sequence.size if @aminoacid_sequence
482
+ end
483
+
484
+ # ========================================================================= #
485
+ # === silently_determine_the_aminoacid_sequence
486
+ #
487
+ # This method is probably not quite correct, as it does not take into
488
+ # consideration that there may be a succession of aminoacids.
489
+ # ========================================================================= #
490
+ def silently_determine_the_aminoacid_sequence(i)
491
+ this_aminoacid_sequence = ''.dup
492
+ selection = i.select {|line| line.start_with?('ATOM ') }
493
+ last_number_of_aminoacid = 0
494
+ selection.each {|line, index|
495
+ line.strip!
496
+ # ===================================================================== #
497
+ # The line may look like this:
498
+ #
499
+ # ATOM 69 CG2 THR A 8 23.165 11.137 48.942 1.00 30.40 C
500
+ #
501
+ # Each atom in the coordinate section is identified by a sequential
502
+ # number in the entry file. The entry at position 5, seen below,
503
+ # identifies the aminoacid there.
504
+ # ===================================================================== #
505
+ # 0 1 2 3 4 5 6 7 8 9 10 11
506
+ # ===================================================================== #
507
+ splitted = line.split(' ').map(&:strip).map {|entry| entry.squeeze(' ') }
508
+ @x_coordinates << splitted[6].to_f
509
+ @y_coordinates << splitted[7].to_f
510
+ @z_coordinates << splitted[8].to_f
511
+ this_aminoacid = three_to_one(splitted[3])
512
+ number_of_this_aminoacid = splitted[5].to_i
513
+ # ===================================================================== #
514
+ # Entries may look like this:
515
+ #
516
+ # ATOM 490 HZ3 LYS A 70 4.674 -0.770 -3.751 1.00 2.07 H
517
+ # ATOM 491 N LYS A 71 8.012 0.034 2.745 1.00 0.74 N
518
+ #
519
+ # ===================================================================== #
520
+ if number_of_this_aminoacid > last_number_of_aminoacid
521
+ this_aminoacid_sequence << this_aminoacid
522
+ last_number_of_aminoacid = number_of_this_aminoacid
523
+ end
524
+ }
525
+ @aminoacid_sequence = this_aminoacid_sequence
526
+ end
527
+
528
+ # ========================================================================= #
529
+ # === aminoacid_sequence?
530
+ # ========================================================================= #
531
+ def aminoacid_sequence?
532
+ @aminoacid_sequence
533
+ end
534
+
535
+ # ========================================================================= #
536
+ # === calculate_the_distance_between_two_points
537
+ #
538
+ # Pass in two arrays to this method.
539
+ # ========================================================================= #
540
+ def calculate_the_distance_between_two_points(p1, p2)
541
+ a = (p2[0] - p1[0]) ** 2
542
+ b = (p2[1] - p1[1]) ** 2
543
+ c = (p2[2] - p1[2]) ** 2
544
+ return Math.sqrt(a+b+c)
545
+ end
546
+
547
+ # ========================================================================= #
548
+ # === check_whether_this_pdb_sequence_contains_dna
549
+ # ========================================================================= #
550
+ def check_whether_this_pdb_sequence_contains_dna
551
+ _ = input_sequence?
552
+ if _.include? 'MOLECULE: DNA'
553
+ erev 'This protein sequence contains at the least one DNA strand.'
554
+ sequence = _.scan(/ MOLECULE: DNA(.+)$/).flatten
555
+ if sequence
556
+ sequence = sequence.first.to_s.strip if sequence.respond_to? :first
557
+ # =================================================================== #
558
+ # This may look like this:
559
+ #
560
+ # "(5'-D(*CP*GP*CP*GP*AP*AP*TP*TP*CP*GP*CP*G)-3');"
561
+ #
562
+ # =================================================================== #
563
+ sequence = sequence.tr('-','').delete("'D(P*);53")
564
+ result = ('This DNA sequence is '+colourize_dna(sequence)+rev+'.').dup
565
+ # =================================================================== #
566
+ # Check whether it is a palindrome.
567
+ # =================================================================== #
568
+ if is_this_sequence_a_palindrome? sequence
569
+ result << rev+' It is a palindrome.'
570
+ else
571
+ result << rev+' It is NOT a palindrome.'
572
+ end
573
+ erev result
574
+ end
575
+ end
576
+ end
577
+
578
+ # ========================================================================= #
579
+ # === try_to_determine_the_beta_sheets_in_this_protein
580
+ #
581
+ # Beta-sheets begin with the word 'SHEET '.
582
+ # ========================================================================= #
583
+ def try_to_determine_the_beta_sheets_in_this_protein(i)
584
+ if i.is_a? Array
585
+ selection = i.select {|line| line.start_with? 'SHEET ' }
586
+ @beta_sheets = selection
587
+ end
588
+ end
589
+
590
+ # ========================================================================= #
591
+ # === return_all_ATOM_entries
592
+ # ========================================================================= #
593
+ def return_all_ATOM_entries
594
+ @body.select {|entry| entry.start_with? 'ATOM ' }
595
+ end
596
+
597
+ # ========================================================================= #
598
+ # === consider_reporting_how_many_chains_are_in_this_structure
599
+ #
600
+ # A better way to report how many chains are in a structure
601
+ # is via:
602
+ #
603
+ # COMPND 3 CHAIN: A, B;
604
+ #
605
+ # ========================================================================= #
606
+ def consider_reporting_how_many_chains_are_in_this_structure
607
+ # _ = return_all_ATOM_entries.max {|line|
608
+ # line.split(' ')[1].to_i
609
+ # }
610
+ # # ===================================================================== #
611
+ # # The entry may look like this:
612
+ # #
613
+ # # "ATOM 52643 N LYS N 393 27.402 -53.192 44.13"
614
+ # #
615
+ # # ===================================================================== #
616
+ # _ = convert_this_alphabet_character_to_number(
617
+ # _.split(' ')[2]
618
+ # )
619
+ selection = @body.select {|entry|
620
+ entry.include? 'COMPND ' and entry.include? 'CHAIN: '
621
+ }
622
+ # ======================================================================= #
623
+ # Examples:
624
+ #
625
+ # ["COMPND 3 CHAIN: A, B, C, D, E, F, G, H, I, J, K, L, M, N; \n",
626
+ # "COMPND 8 CHAIN: O, P, Q, R, S, T, U; \n"]
627
+ #
628
+ # ======================================================================= #
629
+ _ = selection.join(' ')
630
+ use_this_regex = /([A-Z])(,|;)/
631
+ scanned = _.scan(use_this_regex).map {|inner_array| inner_array[0].ord }
632
+ unless scanned.empty?
633
+ max = scanned.max - 64 # -64 because A is 65 and it is the start.
634
+ if be_verbose?
635
+ erev "There are #{steelblue(max)}#{rev} chains in this molecule."
636
+ end
637
+ end
638
+ end
639
+
640
+ # ========================================================================= #
641
+ # === convert_this_alphabet_character_to_number
642
+ #
643
+ # The input of "A" would mean "1".
644
+ # ========================================================================= #
645
+ def convert_this_alphabet_character_to_number(i)
646
+ (i.ord - 64)
647
+ end
648
+
649
+ # ========================================================================= #
650
+ # === try_to_report_the_organism_at_hand
651
+ #
652
+ # This method will try to extract the organism's name.
653
+ #
654
+ # This entry may look like this:
655
+ #
656
+ # SOURCE 2 ORGANISM_SCIENTIFIC: SQUALUS ACANTHIAS;
657
+ #
658
+ # ========================================================================= #
659
+ def try_to_report_the_organism_at_hand(i = @body)
660
+ if i.is_a?(Array) and !i.empty?
661
+ # ===================================================================== #
662
+ # === ORGANISM_SCIENTIFIC
663
+ # ===================================================================== #
664
+ _ = i.select {|line| line.include? 'ORGANISM_SCIENTIFIC:' }
665
+ if _
666
+ first_element = _.first
667
+ if first_element
668
+ first_element.strip!
669
+ @name_of_the_species = first_element.split(':').last.delete(';').strip
670
+ end
671
+ end
672
+ # ===================================================================== #
673
+ # === ORGANISM_TAXID
674
+ #
675
+ # Next try to find out the taxid number of the organism at hand.
676
+ # ===================================================================== #
677
+ _ = i.select {|line| line.include? ' ORGANISM_TAXID: ' }
678
+ if _
679
+ first_element = _.first
680
+ if first_element
681
+ first_element.strip!
682
+ @taxid_of_the_species = first_element.split(':').last.delete(';').strip
683
+ end
684
+ end
685
+ if be_verbose? and @name_of_the_species
686
+ report_extra_information_about_the_species_at_hand
687
+ end
688
+ end
689
+ end
690
+
691
+ # ========================================================================= #
692
+ # === name_of_the_species?
693
+ # ========================================================================= #
694
+ def name_of_the_species?
695
+ @name_of_the_species
696
+ end
697
+
698
+ # ========================================================================= #
699
+ # === report_extra_information_about_the_species_at_hand
700
+ # ========================================================================= #
701
+ def report_extra_information_about_the_species_at_hand
702
+ result = ''.dup
703
+ result << 'The name of the organism (Entry: '+
704
+ steelblue('ORGANISM_SCIENTIFIC')+
705
+ rev+
706
+ ') is'+N
707
+ result << '`'+orange(name_of_the_species?)+rev+'`.'
708
+ if @taxid_of_the_species
709
+ organism_common = organism_common?.to_s
710
+ if organism_common and !organism_common.empty?
711
+ result << rev+' (Taxid: '+
712
+ steelblue(@taxid_of_the_species.to_s)+
713
+ rev+'; '+
714
+ seagreen(
715
+ organism_common
716
+ )+rev+')'
717
+ end
718
+ end
719
+ erev result
720
+ end
721
+
722
+ # ========================================================================= #
723
+ # === organism_common?
724
+ # ========================================================================= #
725
+ def organism_common?
726
+ if @body
727
+ _ = @body.join(N).scan(/ORGANISM_COMMON: (.+);/).flatten.uniq
728
+ if _ and _.is_a?(Array)
729
+ _ = _.first.to_s
730
+ end
731
+ return _
732
+ end
733
+ end
734
+
735
+ # ========================================================================= #
736
+ # === process_each_pdb_file
737
+ #
738
+ # This method is the main powerhorse method of this class.
739
+ # ========================================================================= #
740
+ def process_each_pdb_file
741
+ @pdb_files.each {|file|
742
+ if File.exist?(file) and File.file?(file)
743
+ reset_internal_variables # Reset the internal variables here.
744
+ @does_the_file_exist = true
745
+ set_this_file(file)
746
+ dataset = readlines_from_this_file(file)
747
+ if dataset.any? {|line| line.include? 'KEYWDS' }
748
+ set_keywords(dataset)
749
+ end
750
+ set_header_title_and_body(dataset)
751
+ analyze_the_dataset
752
+ consider_creating_a_fasta_file
753
+ else
754
+ opnn; erev "No file at `#{sfile(file)}#{rev}` could be found."
755
+ @does_the_file_exist = false
756
+ end
757
+ }
758
+ end
759
+
760
+ # ========================================================================= #
761
+ # === taxid_of_the_species?
762
+ # ========================================================================= #
763
+ def taxid_of_the_species?
764
+ @taxid_of_the_species
765
+ end
766
+
767
+ # ========================================================================= #
768
+ # === menu (menu tag)
769
+ # ========================================================================= #
770
+ def menu(
771
+ i = @commandline_arguments
772
+ )
773
+ if i.is_a? Array
774
+ i.each {|entry| menu(entry) }
775
+ else
776
+ case i
777
+ # ===================================================================== #
778
+ # === parsedb 2HI4.pdb --no-colours
779
+ # ===================================================================== #
780
+ when /^-?-?no(-|_)?colou?rs$/i
781
+ disable_colours
782
+ # ===================================================================== #
783
+ # === parsedb 2HI4.pdb --create-fasta-file
784
+ # ===================================================================== #
785
+ when /^-?-?create(-|_)?fasta(-|_)?file$/i
786
+ @do_create_a_fasta_file = true
787
+ end
788
+ end
789
+ end
790
+
791
+ # ========================================================================= #
792
+ # === return_short_filename
793
+ # ========================================================================= #
794
+ def return_short_filename
795
+ File.basename(@this_file)
796
+ end
797
+
798
+ # ========================================================================= #
799
+ # === consider_creating_a_fasta_file
800
+ # ========================================================================= #
801
+ def consider_creating_a_fasta_file
802
+ if @do_create_a_fasta_file
803
+ what = aminoacid_sequence?
804
+ into = return_short_filename.sub(/\.pdb$/,'')+'.fasta'
805
+ into = File.absolute_path(into)
806
+ erev 'Storing into the file `'+sfile(into)+rev+'`.'
807
+ write_what_into(what, into)
808
+ end
809
+ end
810
+
811
+ # ========================================================================= #
812
+ # === set_this_file
813
+ # ========================================================================= #
814
+ def set_this_file(i)
815
+ @this_file = i
816
+ end
817
+
818
+ # ========================================================================= #
819
+ # === main_file?
820
+ # ========================================================================= #
821
+ def main_file?
822
+ @this_file
823
+ end; alias return_filename main_file? # === return_filename
824
+
825
+ # ========================================================================= #
826
+ # === try_to_determine_the_max_distance_between_the_atoms_in_this_protein?
827
+ # ========================================================================= #
828
+ def try_to_determine_the_max_distance_between_the_atoms_in_this_protein?(
829
+ array = @body
830
+ )
831
+ max_value = max_distance?(array)
832
+ if be_verbose?
833
+ erev 'The maximum difference between the atoms is '+
834
+ sfancy(max_value.to_s)+rev
835
+ end
836
+ end; alias try_to_determine_the_max_distance_between_the_atoms_in_this_protein try_to_determine_the_max_distance_between_the_atoms_in_this_protein? # === try_to_determine_the_max_distance_between_the_atoms_in_this_protein
837
+
838
+ # ========================================================================= #
839
+ # === max_distance?
840
+ # ========================================================================= #
841
+ def max_distance?(
842
+ array = @body
843
+ )
844
+ return if array.nil? or array.empty?
845
+ # ======================================================================= #
846
+ # ["ATOM 1 N MET A 41 1.177 -10.035 -3.493 1.00 2.04 N",
847
+ # "ATOM 2 CA MET A 41 0.292 -8.839 -3.377 1.00 1.55 C"]
848
+ # ======================================================================= #
849
+ max_value = 0
850
+ modified_array = array.map {|line|
851
+ splitted = line.split(' ')
852
+ x = splitted[6].to_f
853
+ y = splitted[7].to_f
854
+ z = splitted[8].to_f
855
+ [x,y,z]
856
+ }
857
+ # ======================================================================= #
858
+ # [1.177, -10.035, -3.493]
859
+ # [0.292, -8.839, -3.377]
860
+ # ======================================================================= #
861
+ modified_array.each_with_index {|entry, index| index += 1
862
+ if index == modified_array.size
863
+ index = 0
864
+ end
865
+ array1 = entry
866
+ array2 = modified_array[index]
867
+ result = calculate_the_distance_between_two_points(array1, array2)
868
+ if result > max_value
869
+ max_value = result
870
+ end
871
+ }
872
+ return max_value
873
+ end
874
+
875
+ # ========================================================================= #
876
+ # === title?
877
+ # ========================================================================= #
878
+ def title=(i)
879
+ if i.is_a? Array
880
+ i.map!(&:chomp)
881
+ i.map!(&:strip)
882
+ end
883
+ @title = i
884
+ end; alias set_title title= # === set_title
885
+
886
+ # ========================================================================= #
887
+ # === calculate_the_centroid_position
888
+ #
889
+ # This method will calculate the centroid aka the "average position
890
+ # of the atoms" in that .pdb file. Currently this will only
891
+ # assume that each atom is the same, but in reality we should also
892
+ # include the weight of the atom at hand - this is currently not
893
+ # implemented via this method, though.
894
+ #
895
+ # If this is ever improved, we need to include the weight of the
896
+ # corresponding atom as well.
897
+ # ========================================================================= #
898
+ def calculate_the_centroid_position
899
+ n_atoms = n_atoms?.to_i
900
+ if n_atoms > 0
901
+ x_average = @x_coordinates.sum / n_atoms.to_f
902
+ y_average = @y_coordinates.sum / n_atoms.to_f
903
+ z_average = @z_coordinates.sum / n_atoms.to_f
904
+ else
905
+ x_average = y_average = z_average = 0
906
+ end
907
+ array = [x_average, y_average, z_average]
908
+ return array
909
+ end; alias calculate_centroid calculate_the_centroid_position # === calculate_centroid
910
+
911
+ # ========================================================================= #
912
+ # === report_header
913
+ #
914
+ # This will also report the filename.
915
+ # ========================================================================= #
916
+ def report_header(
917
+ of_this_file = @this_file
918
+ )
919
+ if be_verbose?
920
+ _ = return_short_filename
921
+ e orange(header?)+rev+
922
+ ' (File: '+
923
+ steelblue(_)+
924
+ rev+'; Filesize: '+
925
+ (File.size(of_this_file) / 1024).to_s+
926
+ 'kb'+')'+rev
927
+ end
928
+ end
929
+
930
+ # ========================================================================= #
931
+ # === analyze_the_dataset
932
+ #
933
+ # This method is the "powerhorse" of this class.
934
+ # ========================================================================= #
935
+ def analyze_the_dataset(
936
+ body = @body
937
+ )
938
+ if @does_the_file_exist
939
+ report_header
940
+ try_to_report_the_organism_at_hand(body)
941
+ report_n_atoms
942
+ check_whether_this_pdb_sequence_contains_dna
943
+ silently_determine_the_aminoacid_sequence(body)
944
+ consider_reporting_the_aminoacid_sequence
945
+ consider_reporting_the_number_of_individual_aminoacids
946
+ consider_reporting_the_number_of_residues
947
+ # ===================================================================== #
948
+ # Try to obtain the taxid.
949
+ # ===================================================================== #
950
+ try_to_determine_the_taxid_from_this_input(body)
951
+ try_to_determine_the_alpha_helices_in_this_protein(body)
952
+ consider_reporting_alpha_helices_that_were_found
953
+ try_to_determine_the_beta_sheets_in_this_protein(body)
954
+ try_to_determine_the_max_distance_between_the_atoms_in_this_protein(body)
955
+ consider_reporting_beta_sheet_that_were_found
956
+ consider_reporting_how_many_chains_are_in_this_structure
957
+ consider_reporting_the_keywords
958
+ end
959
+ end
960
+
961
+ # ========================================================================= #
962
+ # === consider_reporting_the_keywords
963
+ #
964
+ # This method will report the discovered keyword entries in the given
965
+ # .pdb file at hand (if this .pdb file contains these keywords entries
966
+ # that is).
967
+ # ========================================================================= #
968
+ def consider_reporting_the_keywords(
969
+ keywords = keywords?
970
+ )
971
+ if keywords
972
+ erev "The keywords are: #{steelblue(keywords)}"
973
+ end
974
+ end
975
+
976
+ # ========================================================================= #
977
+ # === consider_reporting_the_number_of_residues
978
+ # ========================================================================= #
979
+ def consider_reporting_the_number_of_residues
980
+ if @report_the_aminoacid_sequence and be_verbose?
981
+ erev 'Total no:of residues - '+steelblue(@aminoacid_sequence.size.to_s)
982
+ end
983
+ end
984
+
985
+ # ========================================================================= #
986
+ # === consider_reporting_the_number_of_individual_aminoacids
987
+ # ========================================================================= #
988
+ def consider_reporting_the_number_of_individual_aminoacids
989
+ if @report_the_aminoacid_sequence and be_verbose?
990
+ hash = @aminoacid_sequence.each_char.tally
991
+ hash.each_pair {|aminoacid_one_letter, n_occurrences|
992
+ erev 'Total no:of '+
993
+ rev+
994
+ lightgreen(::Bioroebe.one_to_three(aminoacid_one_letter).upcase)+
995
+ rev+
996
+ ' - '+
997
+ steelblue(n_occurrences.to_s)
998
+ }
999
+ end
1000
+ end
1001
+
1002
+ # ========================================================================= #
1003
+ # === set_keywords
1004
+ # ========================================================================= #
1005
+ def set_keywords(i)
1006
+ if i.is_a? Array
1007
+ i.flatten!
1008
+ selection = i.select {|entry| entry.include? 'KEYWDS' }
1009
+ i = selection.first
1010
+ end
1011
+ @keywords = i.to_s.strip
1012
+ end; alias keywords= set_keywords # === keywords=
1013
+
1014
+ # ========================================================================= #
1015
+ # === consider_reporting_the_aminoacid_sequence
1016
+ #
1017
+ # This method will typically display the aminoacid sequence at hand.
1018
+ # ========================================================================= #
1019
+ def consider_reporting_the_aminoacid_sequence
1020
+ if @report_the_aminoacid_sequence and be_verbose?
1021
+ _ = @aminoacid_sequence
1022
+ erev 'The aminoacid sequence ('+
1023
+ steelblue(_.size.to_s)+rev+
1024
+ ' aminoacids) is:'
1025
+ # erev ' '+colourize_this_aminoacid_sequence(_) # <- We could colourize it.
1026
+ erev " #{steelblue(_)}"
1027
+ end
1028
+ end
1029
+
1030
+ # ========================================================================= #
1031
+ # === run (run tag)
1032
+ # ========================================================================= #
1033
+ def run
1034
+ menu
1035
+ process_each_pdb_file
1036
+ if be_verbose?
1037
+ erev 'The centered position is at: '+
1038
+ steelblue(
1039
+ calculate_the_centroid_position.join(', ')
1040
+ )
1041
+ end
1042
+ end
1043
+
1044
+ end
1045
+
1046
+ # =========================================================================== #
1047
+ # === Bioroebe.parse_pdb_file
1048
+ # =========================================================================== #
1049
+ def self.parse_pdb_file(i = ARGV)
1050
+ Bioroebe::ParsePdbFile.new(i)
1051
+ end
1052
+
1053
+ # =========================================================================== #
1054
+ # === Bioroebe.return_aminoacid_sequence_from_this_pdb_file
1055
+ #
1056
+ # This variant will (silently) return the aminoacid sequence. The
1057
+ # input must be a file that exists locally; if you already have
1058
+ # a String that you wish to just parse, use the method defined
1059
+ # below instead.
1060
+ #
1061
+ # Invocation example:
1062
+ #
1063
+ # Bioroebe.return_aminoacid_sequence_from_this_pdb_file "1VII.pdb" # => "MLSDEDFKAVFGMTRSAFANLPLWKQQNLKKEKGLF"
1064
+ #
1065
+ # =========================================================================== #
1066
+ def self.return_aminoacid_sequence_from_this_pdb_file(i = ARGV)
1067
+ Bioroebe::ParsePdbFile.new(i) { :be_silent }.aminoacid_sequence?
1068
+ end
1069
+
1070
+ # =========================================================================== #
1071
+ # === Bioroebe.return_aminoacid_sequence_from_this_string
1072
+ # =========================================================================== #
1073
+ def self.return_aminoacid_sequence_from_this_string(i = ARGV)
1074
+ _ = Bioroebe::ParsePdbFile.new(i, :do_not_run_yet) { :be_silent }
1075
+ dataset = i.split("\n")
1076
+ _.set_header_title_and_body(dataset)
1077
+ _.aminoacid_sequence?
1078
+ end
1079
+
1080
+ end
1081
+
1082
+ if __FILE__ == $PROGRAM_NAME
1083
+ require 'colours/autoinclude'
1084
+ _ = Bioroebe::ParsePdbFile.new(ARGV)
1085
+ end # pdbfile test.pdb
1086
+ # pdbfile :1fat