bioroebe 0.10.80 → 0.11.32

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of bioroebe might be problematic. Click here for more details.

Files changed (210) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +3209 -2618
  3. data/bin/bioroebe +7 -1
  4. data/bioroebe.gemspec +3 -3
  5. data/doc/README.gen +3208 -2617
  6. data/doc/quality_control/commandline_applications.md +3 -3
  7. data/doc/todo/bioroebe_todo.md +2040 -2615
  8. data/lib/bioroebe/aminoacids/aminoacid_substitution.rb +1 -9
  9. data/lib/bioroebe/aminoacids/codon_percentage.rb +1 -9
  10. data/lib/bioroebe/aminoacids/deduce_aminoacid_sequence.rb +1 -9
  11. data/lib/bioroebe/aminoacids/display_aminoacid_table.rb +1 -0
  12. data/lib/bioroebe/aminoacids/show_hydrophobicity.rb +1 -6
  13. data/lib/bioroebe/base/colours_for_base/colours_for_base.rb +18 -8
  14. data/lib/bioroebe/base/commandline_application/commandline_arguments.rb +13 -11
  15. data/lib/bioroebe/base/commandline_application/misc.rb +26 -9
  16. data/lib/bioroebe/base/commandline_application/opn.rb +8 -8
  17. data/lib/bioroebe/base/commandline_application/reset.rb +3 -2
  18. data/lib/bioroebe/base/misc.rb +35 -0
  19. data/lib/bioroebe/base/prototype/misc.rb +11 -1
  20. data/lib/bioroebe/codon_tables/frequencies/parse_frequency_table.rb +2 -10
  21. data/lib/bioroebe/codons/possible_codons_for_this_aminoacid.rb +1 -9
  22. data/lib/bioroebe/codons/show_codon_tables.rb +6 -2
  23. data/lib/bioroebe/codons/show_codon_usage.rb +2 -1
  24. data/lib/bioroebe/constants/aminoacids_and_proteins.rb +1 -0
  25. data/lib/bioroebe/constants/database_constants.rb +1 -1
  26. data/lib/bioroebe/constants/files_and_directories.rb +31 -4
  27. data/lib/bioroebe/constants/misc.rb +20 -0
  28. data/lib/bioroebe/count/count_amount_of_nucleotides.rb +3 -0
  29. data/lib/bioroebe/crystal/README.md +2 -0
  30. data/lib/bioroebe/crystal/to_rna.cr +19 -0
  31. data/lib/bioroebe/data/README.md +11 -8
  32. data/lib/bioroebe/data/electron_microscopy/pos_example.pos +396 -0
  33. data/lib/bioroebe/data/electron_microscopy/test_particles.star +36 -0
  34. data/lib/bioroebe/electron_microscopy/coordinate_analyzer.rb +15 -18
  35. data/lib/bioroebe/{fasta_and_fastq/parse_fasta/run.rb → electron_microscopy/electron_microscopy_module.rb} +16 -8
  36. data/lib/bioroebe/electron_microscopy/fix_pos_file.rb +1 -9
  37. data/lib/bioroebe/electron_microscopy/flipy.rb +83 -0
  38. data/lib/bioroebe/electron_microscopy/parse_coordinates.rb +2 -10
  39. data/lib/bioroebe/electron_microscopy/read_file_xmd.rb +1 -9
  40. data/lib/bioroebe/electron_microscopy/simple_star_file_generator.rb +4 -9
  41. data/lib/bioroebe/fasta_and_fastq/autocorrect_the_name_of_this_fasta_file.rb +1 -9
  42. data/lib/bioroebe/fasta_and_fastq/compact_fasta_file/compact_fasta_file.rb +7 -9
  43. data/lib/bioroebe/fasta_and_fastq/fasta_defline/fasta_defline.rb +1 -5
  44. data/lib/bioroebe/fasta_and_fastq/fasta_to_yaml/fasta_to_yaml.rb +81 -0
  45. data/lib/bioroebe/fasta_and_fastq/parse_fasta/parse_fasta.rb +1460 -7
  46. data/lib/bioroebe/fasta_and_fastq/return_fasta_subsection_of_this_file.rb +11 -2
  47. data/lib/bioroebe/fasta_and_fastq/show_fasta_headers.rb +27 -12
  48. data/lib/bioroebe/fasta_and_fastq/simplify_fasta_header/simplify_fasta_header.rb +1 -5
  49. data/lib/bioroebe/fasta_and_fastq/split_this_fasta_file_into_chromosomes/constants.rb +0 -5
  50. data/lib/bioroebe/genome/README.md +4 -0
  51. data/lib/bioroebe/genome/genome.rb +67 -0
  52. data/lib/bioroebe/genomes/genome_pattern.rb +3 -9
  53. data/lib/bioroebe/gui/gtk +1 -0
  54. data/lib/bioroebe/gui/gtk3/controller/controller.rb +45 -27
  55. data/lib/bioroebe/gui/gtk3/dna_to_aminoacid_widget/dna_to_aminoacid_widget.rb +76 -50
  56. data/lib/bioroebe/gui/gtk3/fasta_table_widget/fasta_table_widget.rb +99 -21
  57. data/lib/bioroebe/gui/gtk3/hamming_distance/hamming_distance.rb +42 -28
  58. data/lib/bioroebe/gui/gtk3/nucleotide_analyser/nucleotide_analyser.rb +119 -71
  59. data/lib/bioroebe/gui/gtk3/protein_to_DNA/protein_to_DNA.rb +18 -18
  60. data/lib/bioroebe/gui/gtk3/random_sequence/random_sequence.rb +19 -11
  61. data/lib/bioroebe/gui/libui/three_to_one/three_to_one.rb +8 -6
  62. data/lib/bioroebe/gui/shared_code/protein_to_DNA/protein_to_DNA_module.rb +14 -14
  63. data/lib/bioroebe/gui/swing/three_to_one/ThreeToOne$1.class +0 -0
  64. data/lib/bioroebe/gui/swing/three_to_one/ThreeToOne$CloseListener.class +0 -0
  65. data/lib/bioroebe/gui/swing/three_to_one/ThreeToOne.class +0 -0
  66. data/lib/bioroebe/gui/swing/three_to_one/ThreeToOne.java +141 -0
  67. data/lib/bioroebe/java/README.md +4 -0
  68. data/lib/bioroebe/java/bioroebe/Sequence.java +25 -1
  69. data/lib/bioroebe/java/bioroebe/src/main/java/bioroebe/Base.class +0 -0
  70. data/lib/bioroebe/java/bioroebe/{Base.java → src/main/java/bioroebe/Base.java} +15 -2
  71. data/lib/bioroebe/java/bioroebe/{BisulfiteTreatment.class → src/main/java/bioroebe/BisulfiteTreatment.class} +0 -0
  72. data/lib/bioroebe/java/bioroebe/{BisulfiteTreatment.java → src/main/java/bioroebe/BisulfiteTreatment.java} +0 -0
  73. data/lib/bioroebe/java/bioroebe/{Codons.class → src/main/java/bioroebe/Codons.class} +0 -0
  74. data/lib/bioroebe/java/bioroebe/src/main/java/bioroebe/Codons.java +34 -0
  75. data/lib/bioroebe/java/bioroebe/src/main/java/bioroebe/Commandline.class +0 -0
  76. data/lib/bioroebe/java/bioroebe/src/main/java/bioroebe/Commandline.java +95 -0
  77. data/lib/bioroebe/java/bioroebe/{Esystem.class → src/main/java/bioroebe/Esystem.class} +0 -0
  78. data/lib/bioroebe/java/bioroebe/{Esystem.java → src/main/java/bioroebe/Esystem.java} +0 -0
  79. data/lib/bioroebe/java/bioroebe/{GenerateRandomDnaSequence.class → src/main/java/bioroebe/GenerateRandomDnaSequence.class} +0 -0
  80. data/lib/bioroebe/java/bioroebe/{GenerateRandomDnaSequence.java → src/main/java/bioroebe/GenerateRandomDnaSequence.java} +8 -2
  81. data/lib/bioroebe/java/bioroebe/{IsPalindrome.class → src/main/java/bioroebe/IsPalindrome.class} +0 -0
  82. data/lib/bioroebe/java/bioroebe/{IsPalindrome.java → src/main/java/bioroebe/IsPalindrome.java} +5 -1
  83. data/lib/bioroebe/java/bioroebe/src/main/java/bioroebe/PartnerNucleotide.class +0 -0
  84. data/lib/bioroebe/java/bioroebe/src/main/java/bioroebe/PartnerNucleotide.java +56 -0
  85. data/lib/bioroebe/java/bioroebe/{RemoveFile.class → src/main/java/bioroebe/RemoveFile.class} +0 -0
  86. data/lib/bioroebe/java/bioroebe/{RemoveFile.java → src/main/java/bioroebe/RemoveFile.java} +10 -4
  87. data/lib/bioroebe/java/bioroebe/{RemoveNumbers.class → src/main/java/bioroebe/RemoveNumbers.class} +0 -0
  88. data/lib/bioroebe/java/bioroebe/{RemoveNumbers.java → src/main/java/bioroebe/RemoveNumbers.java} +1 -0
  89. data/lib/bioroebe/java/bioroebe/{ToCamelcase.class → src/main/java/bioroebe/ToCamelcase.class} +0 -0
  90. data/lib/bioroebe/java/bioroebe/{ToCamelcase.java → src/main/java/bioroebe/ToCamelcase.java} +3 -3
  91. data/lib/bioroebe/java/bioroebe/src/main/java/bioroebe/ToRNA.class +0 -0
  92. data/lib/bioroebe/java/bioroebe/src/main/java/bioroebe/ToRNA.java +42 -0
  93. data/lib/bioroebe/java/bioroebe/toplevel_methods/BaseComposition.class +0 -0
  94. data/lib/bioroebe/java/bioroebe/toplevel_methods/BaseComposition.java +73 -0
  95. data/lib/bioroebe/misc/ruler.rb +1 -0
  96. data/lib/bioroebe/nucleotides/most_likely_nucleotide_sequence_for_this_aminoacid_sequence.rb +1 -9
  97. data/lib/bioroebe/nucleotides/show_nucleotide_sequence.rb +7 -7
  98. data/lib/bioroebe/parsers/genbank_parser.rb +347 -26
  99. data/lib/bioroebe/parsers/gff.rb +1 -9
  100. data/lib/bioroebe/patterns/scan_for_repeat.rb +1 -5
  101. data/lib/bioroebe/pdb/fetch_fasta_sequence_from_pdb.rb +1 -9
  102. data/lib/bioroebe/pdb/parse_mmCIF_file.rb +1 -9
  103. data/lib/bioroebe/pdb/parse_pdb_file.rb +1 -9
  104. data/lib/bioroebe/project/project.rb +1 -1
  105. data/lib/bioroebe/python/README.md +1 -0
  106. data/lib/bioroebe/python/__pycache__/mymodule.cpython-39.pyc +0 -0
  107. data/lib/bioroebe/python/gui/gtk3/all_in_one.css +4 -0
  108. data/lib/bioroebe/python/gui/gtk3/all_in_one.py +59 -0
  109. data/lib/bioroebe/python/gui/gtk3/widget1.py +20 -0
  110. data/lib/bioroebe/python/gui/tkinter/all_in_one.py +91 -0
  111. data/lib/bioroebe/python/mymodule.py +8 -0
  112. data/lib/bioroebe/python/protein_to_dna.py +33 -0
  113. data/lib/bioroebe/python/shell/shell.py +19 -0
  114. data/lib/bioroebe/python/to_rna.py +14 -0
  115. data/lib/bioroebe/python/toplevel_methods/esystem.py +12 -0
  116. data/lib/bioroebe/python/toplevel_methods/open_in_browser.py +20 -0
  117. data/lib/bioroebe/python/toplevel_methods/palindromes.py +42 -0
  118. data/lib/bioroebe/python/toplevel_methods/rds.py +13 -0
  119. data/lib/bioroebe/python/toplevel_methods/shuffleseq.py +23 -0
  120. data/lib/bioroebe/python/toplevel_methods/three_delimiter.py +37 -0
  121. data/lib/bioroebe/python/toplevel_methods/time_and_date.py +43 -0
  122. data/lib/bioroebe/python/toplevel_methods/to_camelcase.py +21 -0
  123. data/lib/bioroebe/requires/require_the_bioroebe_project.rb +3 -1
  124. data/lib/bioroebe/sequence/nucleotide_module/nucleotide_module.rb +28 -25
  125. data/lib/bioroebe/sequence/protein.rb +105 -3
  126. data/lib/bioroebe/sequence/sequence.rb +61 -2
  127. data/lib/bioroebe/shell/menu.rb +3819 -3713
  128. data/lib/bioroebe/shell/misc.rb +51 -4311
  129. data/lib/bioroebe/shell/readline/readline.rb +1 -1
  130. data/lib/bioroebe/shell/shell.rb +11250 -28
  131. data/lib/bioroebe/siRNA/siRNA.rb +81 -1
  132. data/lib/bioroebe/string_matching/find_longest_substring.rb +3 -2
  133. data/lib/bioroebe/string_matching/hamming_distance.rb +1 -9
  134. data/lib/bioroebe/taxonomy/class_methods.rb +3 -8
  135. data/lib/bioroebe/taxonomy/constants.rb +4 -3
  136. data/lib/bioroebe/taxonomy/edit.rb +2 -1
  137. data/lib/bioroebe/taxonomy/help/help.rb +10 -10
  138. data/lib/bioroebe/taxonomy/info/check_available.rb +15 -9
  139. data/lib/bioroebe/taxonomy/info/info.rb +18 -11
  140. data/lib/bioroebe/taxonomy/info/is_dna.rb +46 -36
  141. data/lib/bioroebe/taxonomy/interactive.rb +140 -104
  142. data/lib/bioroebe/taxonomy/menu.rb +27 -18
  143. data/lib/bioroebe/taxonomy/parse_fasta.rb +3 -1
  144. data/lib/bioroebe/taxonomy/shared.rb +1 -0
  145. data/lib/bioroebe/taxonomy/taxonomy.rb +1 -0
  146. data/lib/bioroebe/toplevel_methods/aminoacids_and_proteins.rb +31 -24
  147. data/lib/bioroebe/toplevel_methods/colourize_related_methods.rb +164 -0
  148. data/lib/bioroebe/toplevel_methods/databases.rb +1 -1
  149. data/lib/bioroebe/toplevel_methods/fasta_and_fastq.rb +107 -63
  150. data/lib/bioroebe/toplevel_methods/file_and_directory_related_actions.rb +14 -2
  151. data/lib/bioroebe/toplevel_methods/misc.rb +118 -11
  152. data/lib/bioroebe/toplevel_methods/nucleotides.rb +22 -5
  153. data/lib/bioroebe/toplevel_methods/open_in_browser.rb +2 -0
  154. data/lib/bioroebe/toplevel_methods/palindromes.rb +1 -2
  155. data/lib/bioroebe/toplevel_methods/taxonomy.rb +2 -2
  156. data/lib/bioroebe/toplevel_methods/to_camelcase.rb +5 -0
  157. data/lib/bioroebe/utility_scripts/align_open_reading_frames.rb +1 -9
  158. data/lib/bioroebe/utility_scripts/check_for_mismatches/check_for_mismatches.rb +1 -9
  159. data/lib/bioroebe/utility_scripts/compacter.rb +1 -9
  160. data/lib/bioroebe/utility_scripts/compseq/compseq.rb +1 -9
  161. data/lib/bioroebe/utility_scripts/create_batch_entrez_file.rb +1 -9
  162. data/lib/bioroebe/utility_scripts/dot_alignment.rb +1 -9
  163. data/lib/bioroebe/utility_scripts/move_file_to_its_correct_location.rb +1 -4
  164. data/lib/bioroebe/utility_scripts/showorf/constants.rb +0 -5
  165. data/lib/bioroebe/utility_scripts/showorf/reset.rb +1 -4
  166. data/lib/bioroebe/version/version.rb +2 -2
  167. data/lib/bioroebe/www/embeddable_interface.rb +101 -52
  168. data/lib/bioroebe/www/sinatra/sinatra.rb +186 -70
  169. data/lib/bioroebe/yaml/aminoacids/amino_acids_long_name_to_one_letter.yml +2 -2
  170. data/lib/bioroebe/yaml/configuration/browser.yml +1 -1
  171. data/lib/bioroebe/yaml/genomes/README.md +3 -4
  172. data/lib/bioroebe/yaml/restriction_enzymes/restriction_enzymes.yml +3 -3
  173. metadata +69 -64
  174. data/doc/setup.rb +0 -1655
  175. data/lib/bioroebe/fasta_and_fastq/parse_fasta/constants.rb +0 -50
  176. data/lib/bioroebe/fasta_and_fastq/parse_fasta/initialize.rb +0 -86
  177. data/lib/bioroebe/fasta_and_fastq/parse_fasta/menu.rb +0 -117
  178. data/lib/bioroebe/fasta_and_fastq/parse_fasta/misc.rb +0 -981
  179. data/lib/bioroebe/fasta_and_fastq/parse_fasta/report.rb +0 -156
  180. data/lib/bioroebe/fasta_and_fastq/parse_fasta/reset.rb +0 -128
  181. data/lib/bioroebe/genbank/genbank_parser.rb +0 -291
  182. data/lib/bioroebe/java/bioroebe/Base.class +0 -0
  183. data/lib/bioroebe/java/bioroebe/Codons.java +0 -22
  184. data/lib/bioroebe/java/bioroebe/PartnerNucleotide.class +0 -0
  185. data/lib/bioroebe/java/bioroebe/PartnerNucleotide.java +0 -19
  186. data/lib/bioroebe/java/bioroebe.jar +0 -0
  187. data/lib/bioroebe/shell/add.rb +0 -108
  188. data/lib/bioroebe/shell/assign.rb +0 -360
  189. data/lib/bioroebe/shell/chop_and_cut.rb +0 -281
  190. data/lib/bioroebe/shell/constants.rb +0 -166
  191. data/lib/bioroebe/shell/download.rb +0 -335
  192. data/lib/bioroebe/shell/enable_and_disable.rb +0 -158
  193. data/lib/bioroebe/shell/enzymes.rb +0 -310
  194. data/lib/bioroebe/shell/fasta.rb +0 -345
  195. data/lib/bioroebe/shell/gtk.rb +0 -76
  196. data/lib/bioroebe/shell/history.rb +0 -132
  197. data/lib/bioroebe/shell/initialize.rb +0 -217
  198. data/lib/bioroebe/shell/loop.rb +0 -74
  199. data/lib/bioroebe/shell/prompt.rb +0 -107
  200. data/lib/bioroebe/shell/random.rb +0 -289
  201. data/lib/bioroebe/shell/reset.rb +0 -335
  202. data/lib/bioroebe/shell/scan_and_parse.rb +0 -135
  203. data/lib/bioroebe/shell/search.rb +0 -337
  204. data/lib/bioroebe/shell/sequences.rb +0 -200
  205. data/lib/bioroebe/shell/show_report_and_display.rb +0 -2901
  206. data/lib/bioroebe/shell/startup.rb +0 -127
  207. data/lib/bioroebe/shell/taxonomy.rb +0 -14
  208. data/lib/bioroebe/shell/tk.rb +0 -23
  209. data/lib/bioroebe/shell/user_input.rb +0 -88
  210. data/lib/bioroebe/shell/xorg.rb +0 -45
@@ -2,15 +2,1468 @@
2
2
  # Encoding: UTF-8
3
3
  # frozen_string_literal: true
4
4
  # =========================================================================== #
5
+ # === Bioroebe::ParseFasta
6
+ #
7
+ # This class will parse through a local FASTA file and find the
8
+ # proper entries.
9
+ #
10
+ # A FASTA file may have nucleotides or an aminoacid-sequence, so
11
+ # we have to keep this in mind when parsing it.
12
+ #
13
+ # Usage examples:
14
+ #
15
+ # Bioroebe::ParseFasta.new(ARGV)
16
+ # Bioroebe.parse_fasta(ARGV)
17
+ #
18
+ # =========================================================================== #
5
19
  # require 'bioroebe/fasta_and_fastq/parse_fasta/parse_fasta.rb'
6
- # Bioroebe::ParseFasta.new(ARGV)
20
+ # Bioroebe.parse_fasta
21
+ # Bioroebe.sizeseq
7
22
  # =========================================================================== #
8
23
  require 'bioroebe/base/commandline_application/commandline_application.rb'
9
- require 'bioroebe/fasta_and_fastq/parse_fasta/constants.rb'
10
- require 'bioroebe/fasta_and_fastq/parse_fasta/initialize.rb'
11
- require 'bioroebe/fasta_and_fastq/parse_fasta/misc.rb'
12
- require 'bioroebe/fasta_and_fastq/parse_fasta/reset.rb'
13
- require 'bioroebe/fasta_and_fastq/parse_fasta/run.rb'
24
+
25
+ module Bioroebe
26
+
27
+ class ParseFasta < ::Bioroebe::CommandlineApplication # === Bioroebe::ParseFasta
28
+
29
+ require 'bioroebe/sequence/dna.rb'
30
+
31
+ # ========================================================================= #
32
+ # === REGEX_NON_NUCLEOTIDES
33
+ #
34
+ # All non-nucleotides will be handled here via this regex.
35
+ #
36
+ # N is excluded because it may stand for "any" nucleotide too, at
37
+ # the least for a purine.
38
+ # ========================================================================= #
39
+ REGEX_NON_NUCLEOTIDES =
40
+ /BDEFHIJKLMOPQRSVWXYZ/
41
+
42
+ # ========================================================================= #
43
+ # === DEFAULT_FASTA
44
+ #
45
+ # This String can be used to quickly test code depending on FASTA
46
+ # entries.
47
+ # ========================================================================= #
48
+ DEFAULT_FASTA = '>Rosalind_6404
49
+ CCTGCGGAAGATCGGCACTAGAATAGCCAGAACCGTTTCTCTGAGGCTTCCGGCCTTCCC
50
+ TCCCACTAATAATTCTGAGG
51
+ >Rosalind_5959
52
+ CCATCGGTAGCGCATCCTTAGTCCAATTAAGTCCCTATCCAGGCGCTCCGCCGAAGGTCT
53
+ ATATCCATTTGTCAGCAGACACGC
54
+ >Rosalind_0808
55
+ CCACCCTCGTGGTATGGCTAGGCATTCAGGAACCGGAGAACGCTTCAGACCAGCCCGGAC
56
+ TGGGAACCTGCGGGCAGTAGGTGGAAT'
57
+
58
+ # ========================================================================= #
59
+ # === DEFAULT_ROUND_TO
60
+ # ========================================================================= #
61
+ DEFAULT_ROUND_TO = 2
62
+
63
+ # ========================================================================= #
64
+ # === initialize
65
+ # ========================================================================= #
66
+ def initialize(
67
+ i = DEFAULT_FASTA,
68
+ run_already = true,
69
+ &block
70
+ )
71
+ reset
72
+ # ======================================================================= #
73
+ # === Handle blocks next
74
+ # ======================================================================= #
75
+ if block_given?
76
+ yielded = yield
77
+ # ===================================================================== #
78
+ # First handle Symbols.
79
+ # ===================================================================== #
80
+ case yielded
81
+ # ===================================================================== #
82
+ # === :be_verbose
83
+ # ===================================================================== #
84
+ when :be_verbose,
85
+ :verbose
86
+ set_be_verbose_and_report_the_sequence
87
+ # ===================================================================== #
88
+ # === :be_quiet
89
+ # ===================================================================== #
90
+ when :be_quiet,
91
+ :be_silent
92
+ be_quiet
93
+ # ===================================================================== #
94
+ # === :sizeseq
95
+ # ===================================================================== #
96
+ when :sizeseq
97
+ @sort_by_size = true
98
+ end
99
+ # ===================================================================== #
100
+ # === Handle Hashes next
101
+ # ===================================================================== #
102
+ if yielded.is_a? Hash
103
+ # =================================================================== #
104
+ # === :be_verbose
105
+ # =================================================================== #
106
+ if yielded.has_key? :be_verbose
107
+ set_be_verbose(yielded.delete(:be_verbose))
108
+ @internal_hash[:report_the_sequence] = true
109
+ end
110
+ # =================================================================== #
111
+ # === :use_colours
112
+ # =================================================================== #
113
+ if yielded.has_key? :use_colours
114
+ set_use_colours(
115
+ yielded.delete(:use_colours)
116
+ )
117
+ end
118
+ # =================================================================== #
119
+ # === :sizeseq
120
+ # =================================================================== #
121
+ if yielded.has_key? :sizeseq
122
+ @sort_by_size = true
123
+ end
124
+ end
125
+ end
126
+ set_commandline_arguments(i)
127
+ case run_already
128
+ # ======================================================================= #
129
+ # === :dont_run_yet
130
+ # ======================================================================= #
131
+ when :dont_run_yet,
132
+ :do_not_run_yet
133
+ run_already = false
134
+ end
135
+ run if run_already
136
+ end
137
+
138
+ # ========================================================================= #
139
+ # === reset (reset tag)
140
+ # ========================================================================= #
141
+ def reset
142
+ super()
143
+ infer_the_namespace
144
+ # ======================================================================= #
145
+ # === @is_a_genbank_file
146
+ # ======================================================================= #
147
+ @is_a_genbank_file = false
148
+ # ======================================================================= #
149
+ # === @input_file
150
+ #
151
+ # This variable denotes which input file is used to read data from.
152
+ #
153
+ # It is nil initially because we may skip reading from an existing
154
+ # file and e. g. only read from a String or some other non-file
155
+ # entity.
156
+ # ======================================================================= #
157
+ @input_file = nil
158
+ # ======================================================================= #
159
+ # === @hash
160
+ #
161
+ # This is the main variable for the class. It will keep entries such
162
+ # as this one here:
163
+ #
164
+ # {
165
+ # "ENSMUSG00000020122|ENSMUST08" => "CCCTCC"
166
+ # }
167
+ #
168
+ # ======================================================================= #
169
+ @hash = {}
170
+ # ======================================================================= #
171
+ # === @internal_hash
172
+ #
173
+ # This Hash exists for internal configuration of the class.
174
+ # ======================================================================= #
175
+ @internal_hash = {}
176
+ # ======================================================================= #
177
+ # === :report_the_sequence
178
+ # ======================================================================= #
179
+ @internal_hash[:report_the_sequence] = false
180
+ # ======================================================================= #
181
+ # === :overwrite_the_original_file
182
+ # ======================================================================= #
183
+ @internal_hash[:overwrite_the_original_file] = false
184
+ # ======================================================================= #
185
+ # === :save_the_file
186
+ # ======================================================================= #
187
+ @internal_hash[:save_the_file] = false
188
+ # ======================================================================= #
189
+ # === :remove_numbers_from_input
190
+ # ======================================================================= #
191
+ @internal_hash[:remove_numbers_from_input] = false
192
+ # ======================================================================= #
193
+ # === :show_the_translated_protein_sequence
194
+ #
195
+ # This setting is false initially. If set to true via the commandline
196
+ # then report() will show the translated protein sequence as well.
197
+ # ======================================================================= #
198
+ @internal_hash[:show_the_translated_protein_sequence] = false
199
+ # ======================================================================= #
200
+ # === :condense_the_sequence_onto_a_single_line
201
+ #
202
+ # By default the output of this class will include newlines for the
203
+ # sequence. If this is not wanted by the user then the following
204
+ # variable keeps track of that behaviour. You can use the flag
205
+ # called --one-line to enable a condensed output, with newlines
206
+ # being removed.
207
+ # ======================================================================= #
208
+ @internal_hash[:condense_the_sequence_onto_a_single_line] = false
209
+ # ======================================================================= #
210
+ # === :limit_the_display_to_n_nucleotides
211
+ #
212
+ # If this variable is a number rather than nil, then it will be used
213
+ # to display only a limited number of nucleotides, e. g. "1000" if
214
+ # the user passes in 1000.
215
+ # ======================================================================= #
216
+ @internal_hash[:limit_the_display_to_n_nucleotides] = nil
217
+ # ======================================================================= #
218
+ # === @may_we_exit
219
+ # ======================================================================= #
220
+ @may_we_exit = false
221
+ # ======================================================================= #
222
+ # === @current_key
223
+ # ======================================================================= #
224
+ @current_key = nil
225
+ # ======================================================================= #
226
+ # === @use_opn
227
+ # ======================================================================= #
228
+ @use_opn = ::Bioroebe.use_opn?
229
+ # ======================================================================= #
230
+ # === @colourize_sequence
231
+ # ======================================================================= #
232
+ @colourize_sequence = false
233
+ # ======================================================================= #
234
+ # === @sort_by_size
235
+ #
236
+ # If the following variable is set to true, then this class will
237
+ # run a sizeseq-comparison, that is, it will compare all sequences
238
+ # and output them in a size-sorted manner, similar to the EMBOSS
239
+ # sizeseq action.
240
+ # ======================================================================= #
241
+ @sort_by_size = false
242
+ # ======================================================================= #
243
+ # === @show_the_header
244
+ #
245
+ # If this variable is true then the header will be shown.
246
+ # ======================================================================= #
247
+ @show_the_header = false
248
+ set_round_to :default
249
+ set_be_verbose
250
+ end
251
+
252
+ # ========================================================================= #
253
+ # === menu (menu tag)
254
+ # ========================================================================= #
255
+ def menu(
256
+ i = return_commandline_arguments_that_are_not_files
257
+ )
258
+ if i.is_a? Array
259
+ i.each {|entry| menu(entry) }
260
+ else
261
+ case i # case tag
262
+ # ===================================================================== #
263
+ # === --to-protein
264
+ #
265
+ # Invocation example:
266
+ #
267
+ # pfasta *.fasta --toprotein
268
+ #
269
+ # ===================================================================== #
270
+ when /^-?-?to(-|_)?protein/i
271
+ @internal_hash[:show_the_translated_protein_sequence] = true
272
+ # ===================================================================== #
273
+ # === --one-line
274
+ #
275
+ # Invocation example:
276
+ #
277
+ # pfasta rpoS_NC_000913.3.fasta --one-line
278
+ #
279
+ # ===================================================================== #
280
+ when /^-?-?one(-|_)?liner?/i
281
+ @internal_hash[:condense_the_sequence_onto_a_single_line] = true
282
+ # ===================================================================== #
283
+ # === --limit=1000
284
+ #
285
+ # Invocation example:
286
+ #
287
+ # pfasta --limit=1000
288
+ #
289
+ # ===================================================================== #
290
+ when /^-?-?limit=(\d+)$/i
291
+ @internal_hash[:limit_the_display_to_n_nucleotides] = $1.to_s.dup.to_i
292
+ # ===================================================================== #
293
+ # === --overwrite
294
+ # ===================================================================== #
295
+ when /^-?-?overwrite/i
296
+ @internal_hash[:overwrite_the_original_file] = true
297
+ # ===================================================================== #
298
+ # === --help
299
+ #
300
+ # Usage example:
301
+ #
302
+ # parse_fasta --help
303
+ #
304
+ # ===================================================================== #
305
+ when /^-?-?help/i
306
+ show_help
307
+ exit
308
+ # ===================================================================== #
309
+ # === --save-file
310
+ # ===================================================================== #
311
+ when /^-?-?save(-|_)?file/i
312
+ @internal_hash[:save_the_file] = true
313
+ # ===================================================================== #
314
+ # === --also-show-the-sequence
315
+ #
316
+ # To invoke this method try:
317
+ #
318
+ # parsefasta /Depot/Bioroebe/NP_013521.3_289_aa.fasta --show
319
+ #
320
+ # ===================================================================== #
321
+ when /^-?-?also(-|_)?show(-|_)?the(-|_)?sequence$/i,
322
+ /^-?-?report$/i,
323
+ /^-?-?show$/i
324
+ @internal_hash[:report_the_sequence] = true
325
+ # ===================================================================== #
326
+ # === --header
327
+ # ===================================================================== #
328
+ when /^-?-?header/i
329
+ do_show_the_header
330
+ # ===================================================================== #
331
+ # === --short
332
+ #
333
+ # This entry point can be used to show 300 nucleotides and not
334
+ # more, by simply using the --short commandline flag.
335
+ # ===================================================================== #
336
+ when /^-?-?short/i
337
+ @internal_hash[:limit_the_display_to_n_nucleotides] = 300
338
+ # ===================================================================== #
339
+ # === --size
340
+ #
341
+ # This will simply tell us how many nucleotides the given sequence
342
+ # has, then exit.
343
+ #
344
+ # To invoke this method try:
345
+ #
346
+ # parsefasta /Depot/Bioroebe/NP_013521.3_289_aa.fasta --size
347
+ #
348
+ # ===================================================================== #
349
+ when /^-?-?size$/i
350
+ set_be_quiet
351
+ do_process_the_commandline_arguments_that_are_files
352
+ erev size? # Report the size here.
353
+ exit
354
+ end
355
+ end
356
+ end
357
+
358
+ require 'bioroebe/calculate/calculate_gc_content.rb'
359
+
360
+ # ========================================================================= #
361
+ # === show_help (help tag)
362
+ #
363
+ # This method will inform the user how this class may be used from the
364
+ # commandline.
365
+ #
366
+ # Invocation example:
367
+ #
368
+ # pfasta --help
369
+ #
370
+ # ========================================================================= #
371
+ def show_help
372
+ e
373
+ eparse ' --size'
374
+ eparse ' --also-show-the-sequence'
375
+ eparse ' --header # show the header as well (normally the '\
376
+ 'header is not shown)'
377
+ eparse ' --limit=1000 # limit to show only the first 1000 '\
378
+ 'nucleotides; use'
379
+ eparse ' # any number that you need here'
380
+ eparse ' --one-line # show the sequence on one line only, '\
381
+ 'e. g. all newlines'
382
+ eparse ' # were removed'
383
+ eparse ' --toprotein # show the protein sequence as well '\
384
+ '(assumes DNA or RNA'
385
+ eparse ' # .fasta file)'
386
+ e
387
+ end
388
+
389
+ # ========================================================================= #
390
+ # === show_the_translated_protein_sequence?
391
+ # ========================================================================= #
392
+ def show_the_translated_protein_sequence?
393
+ @internal_hash[:show_the_translated_protein_sequence]
394
+ end
395
+
396
+ # ========================================================================= #
397
+ # === set_round_to
398
+ #
399
+ # This will set to how many decimal numbers we will round to. This is
400
+ # mostly done for display-purposes, hence why the default is a fairly
401
+ # low value.
402
+ # ========================================================================= #
403
+ def set_round_to(
404
+ i = :default
405
+ )
406
+ case i
407
+ # ======================================================================= #
408
+ # === :default
409
+ #
410
+ # Since as of April 2021, the new default is 2, for rounding.
411
+ # ======================================================================= #
412
+ when :default
413
+ i = DEFAULT_ROUND_TO
414
+ end
415
+ @internal_hash[:round_to] = i.to_i
416
+ end
417
+
418
+ # ========================================================================= #
419
+ # === do_process_the_commandline_arguments_that_are_files
420
+ # ========================================================================= #
421
+ def do_process_the_commandline_arguments_that_are_files(
422
+ these_files = commandline_arguments_that_are_files?
423
+ )
424
+ unless these_files.is_a? Array
425
+ these_files = [these_files].flatten.compact
426
+ end
427
+ these_files.each {|this_file|
428
+ set_input_file(this_file)
429
+ set_data # This will use the default file.
430
+ split_into_proper_sections
431
+ report_the_FASTA_header if @show_the_header
432
+ if @sort_by_size
433
+ run_sizeseq_comparison
434
+ else
435
+ # =================================================================== #
436
+ # === Handle cases where the input is a protein
437
+ # =================================================================== #
438
+ if is_the_sequence_a_polypeptide?
439
+ if be_verbose?
440
+ erev "This sequence is assumed to be a #{royalblue('protein')}#{rev}."
441
+ report_how_many_elements_we_have_found
442
+ end
443
+ else # Must be a protein.
444
+ # =================================================================== #
445
+ # === Else it must be RNA or DNA
446
+ # =================================================================== #
447
+ if be_verbose?
448
+ erev "This sequence is assumed to "\
449
+ "be #{royalblue('DNA')}#{rev} or #{royalblue('RNA')}#{rev}."
450
+ end
451
+ calculate_gc_content # GC content makes only sense for nucleotides.
452
+ report_how_many_elements_we_have_found if be_verbose?
453
+ end
454
+ if be_verbose?
455
+ report_the_nucleotide_composition
456
+ report_on_how_many_entries_we_did_work
457
+ if report_the_sequence?
458
+ do_report_the_sequence
459
+ end
460
+ end
461
+ end
462
+ }
463
+ end
464
+
465
+ # ========================================================================= #
466
+ # === sanitize_the_description
467
+ #
468
+ # This method will iterate over the description entry and sanitize
469
+ # it. In this context sanitizing means to add the "length" entry,
470
+ # and the "type" entry, such as in:
471
+ #
472
+ # " # length=231; type=dna"
473
+ #
474
+ # ========================================================================= #
475
+ def sanitize_the_description
476
+ @data.map! {|line|
477
+ if line.start_with?('>') and !line.include?('length=')
478
+ length = 0
479
+ if @hash.has_key? line.delete('>')
480
+ length = @hash[line.delete('>')].size
481
+ end
482
+ line << " # length=#{length}; type=dna" # Currently hardcoded to DNA.
483
+ end
484
+ line
485
+ }
486
+ end
487
+
488
+ # ========================================================================= #
489
+ # === entries?
490
+ # ========================================================================= #
491
+ def entries?
492
+ @data
493
+ end
494
+
495
+ # ========================================================================= #
496
+ # === we_may_exit
497
+ # ========================================================================= #
498
+ def we_may_exit
499
+ @may_we_exit = true
500
+ end
501
+
502
+ # ========================================================================= #
503
+ # === output_results
504
+ # ========================================================================= #
505
+ def output_results
506
+ pp @hash
507
+ end
508
+
509
+ # ========================================================================= #
510
+ # === do_report_the_sequence (report tag)
511
+ #
512
+ # This method is used to display the main sequence at hand.
513
+ # ========================================================================= #
514
+ def do_report_the_sequence
515
+ _ = main_sequence?
516
+ # ======================================================================= #
517
+ # Honour the --limit commandline flag next.
518
+ # ======================================================================= #
519
+ if @internal_hash[:limit_the_display_to_n_nucleotides]
520
+ _ = _[0 .. (@internal_hash[:limit_the_display_to_n_nucleotides] - 1)]
521
+ end
522
+ if @colourize_sequence
523
+ if is_polynucleotide?
524
+ # =================================================================== #
525
+ # Else assume this is DNA/RNA input.
526
+ # =================================================================== #
527
+ _.gsub!(/A/, teal('A')+rev)
528
+ _.gsub!(/C/, slateblue('C')+rev)
529
+ _.gsub!(/G/, royalblue('G')+rev)
530
+ _.gsub!(/T/, steelblue('T')+rev)
531
+ _.gsub!(/U/, steelblue('U')+rev)
532
+ #else
533
+ end
534
+ end
535
+ if condense_the_sequence_onto_a_single_line?
536
+ _ = _.delete("\n")
537
+ end
538
+ erev colourize_this_nucleotide_sequence(_)
539
+ e if condense_the_sequence_onto_a_single_line?
540
+ if show_the_translated_protein_sequence?
541
+ # ===================================================================== #
542
+ # Do show the translated protein sequence next:
543
+ # ===================================================================== #
544
+ translated_into_aa = Bioroebe.to_aa(_)
545
+ translated_into_aa_and_colourized = translated_into_aa.dup
546
+ if translated_into_aa.include? '*'
547
+ translated_into_aa_and_colourized = translated_into_aa.gsub(/\*/,tomato('*'))
548
+ end
549
+ erev 'The translated aminoacid sequence of '+
550
+ sfancy(translated_into_aa.size.to_s)+rev+
551
+ ' aminoacids is:'
552
+ e
553
+ erev steelblue(" #{translated_into_aa_and_colourized}")
554
+ e
555
+ end
556
+ end; alias display do_report_the_sequence # === display
557
+ alias report do_report_the_sequence # === report
558
+
559
+ # ========================================================================= #
560
+ # === report_the_nucleotide_composition
561
+ # ========================================================================= #
562
+ def report_the_nucleotide_composition
563
+ if is_this_sequence_a_polynucleotide_sequence?
564
+ first = @hash.values.first.upcase
565
+ total_size = first.size
566
+ n_adenines = first.count('A')
567
+ n_thymidines = first.count('T')
568
+ n_cytodines = first.count('C')
569
+ n_guanines = first.count('G')
570
+ erev "The nucleotide composition is as follows:"
571
+ e " "\
572
+ "#{steelblue(n_adenines)}#{rev}x A (#{(n_adenines * 100.0 / total_size).round(2)}%), "\
573
+ "#{steelblue(n_thymidines)}#{rev}x T (#{(n_thymidines * 100.0 / total_size).round(2)}%), "\
574
+ "#{steelblue(n_cytodines)}#{rev}x C (#{(n_cytodines * 100.0 / total_size).round(2)}%), "\
575
+ "#{steelblue(n_guanines)}#{rev}x G (#{(n_guanines * 100.0 / total_size).round(2)}%)"
576
+ elsif is_a_protein?
577
+ # ===================================================================== #
578
+ # Report the composition of the protein:
579
+ # ===================================================================== #
580
+ sequence = @hash.values.first.delete("\n")
581
+ erev "The protein composition (aminoacids) is as follows:"
582
+ # e colourize_this_aminoacid_sequence_for_the_commandline(" #{sequence}")
583
+ e orchid(" #{sequence}")
584
+ end
585
+ end; alias report_the_protein_composition report_the_nucleotide_composition # === report_the_protein_composition
586
+
587
+ # ========================================================================= #
588
+ # === report_how_many_elements_we_have_found
589
+ # ========================================================================= #
590
+ def report_how_many_elements_we_have_found
591
+ if @hash
592
+ first = @hash.values.first.delete("\n")
593
+ size = first.size.to_s
594
+ if be_verbose?
595
+ n_start_codons = first.count('ATG')
596
+ # =================================================================== #
597
+ # We upcase it since as of October 2021, as some FASTA files may
598
+ # include the sequence in lowercased characters.
599
+ # =================================================================== #
600
+ n_start_codons += first.reverse.upcase.count('ATG')
601
+ result = "This sequence contains #{simp(size.to_s)}#{rev}"\
602
+ " #{nucleotides_or_aminoacids?}".dup
603
+ if is_a_nucleotide?
604
+ result << " and #{n_start_codons} "\
605
+ "ATG codons (on both strands) in total"
606
+ end
607
+ result << '.'
608
+ if size.to_i > 1_000_000
609
+ # ================================================================= #
610
+ # Format the number with '_' characters.
611
+ # ================================================================= #
612
+ formatted = size.to_i.to_s.reverse.split(/(.{3})/).reject(&:empty?).join('_').reverse
613
+ result = result.dup if result.frozen?
614
+ result << ' ('+simp(formatted+' bp')+rev+')'
615
+ end
616
+ erev result
617
+ end
618
+ end
619
+ end
620
+
621
+ # ========================================================================= #
622
+ # === report_on_how_many_entries_we_did_work
623
+ # ========================================================================= #
624
+ def report_on_how_many_entries_we_did_work
625
+ if be_verbose?
626
+ entry_or_entries = 'entry'
627
+ if @hash.keys.size > 1
628
+ entry_or_entries = 'entries'
629
+ end
630
+ erev "We have identified a total of #{orange(@hash.keys.size)}"\
631
+ "#{rev} #{entry_or_entries} in this fasta dataset."
632
+ e
633
+ end
634
+ end
635
+
636
+ # ========================================================================= #
637
+ # === report_the_FASTA_header
638
+ # ========================================================================= #
639
+ def report_the_FASTA_header
640
+ e "#{rev}The header is: #{steelblue(header?)}"
641
+ end
642
+
643
+ # ========================================================================= #
644
+ # === report_the_sequence?
645
+ # ========================================================================= #
646
+ def report_the_sequence?
647
+ @internal_hash[:report_the_sequence]
648
+ end
649
+
650
+ # ========================================================================= #
651
+ # === run (run tag)
652
+ # ========================================================================= #
653
+ def run
654
+ menu
655
+ do_process_the_commandline_arguments_that_are_files
656
+ do_save_the_file if save_the_file?
657
+ end
658
+
659
+ # ========================================================================= #
660
+ # === sanitize_data
661
+ # ========================================================================= #
662
+ def sanitize_data(i)
663
+ if i.is_a? Array
664
+ i.flatten!
665
+ i.reject! {|entry| entry.start_with? '#' }
666
+ i.reject! {|entry| entry.strip.empty? }
667
+ if i.first and i.first.include? "\r"
668
+ # =================================================================== #
669
+ # Some FASTA files include "\r" line endings. We will check first
670
+ # for the first entry to contain a \r, and if so, we assume the
671
+ # whole FASTA file may have \r, which then will be removed.
672
+ # =================================================================== #
673
+ i.map! {|entry| entry.delete("\r") }
674
+ end
675
+ end
676
+ # ========================================================================= #
677
+ # === Run through SanitizeNucleotideSequence
678
+ # ========================================================================= #
679
+ if @internal_hash[:remove_numbers_from_input]
680
+ i = Bioroebe::SanitizeNucleotideSequence[i]
681
+ end
682
+ i
683
+ end
684
+
685
+ # ========================================================================= #
686
+ # === current_key?
687
+ # ========================================================================= #
688
+ def current_key?
689
+ @current_key
690
+ end; alias id? current_key? # === id?
691
+ alias sequence_id? current_key? # === sequence_id?
692
+ alias title current_key? # === title
693
+ alias title? current_key? # === title?
694
+
695
+ # ========================================================================= #
696
+ # === round_to?
697
+ # ========================================================================= #
698
+ def round_to?
699
+ @internal_hash[:round_to]
700
+ end
701
+
702
+ # ========================================================================= #
703
+ # === opnn
704
+ # ========================================================================= #
705
+ def opnn
706
+ super(namespace?) if use_opn?
707
+ end
708
+
709
+ # ========================================================================= #
710
+ # === use_opn?
711
+ # ========================================================================= #
712
+ def use_opn?
713
+ @use_opn
714
+ end
715
+
716
+ # ========================================================================= #
717
+ # === calculate_gc_content
718
+ #
719
+ # Calculate the gc content through this method, which is called from
720
+ # within the method run().
721
+ # ========================================================================= #
722
+ def calculate_gc_content
723
+ _ = @hash.values.join.delete(N)
724
+ if is_polynucleotide? _
725
+ @hash.each_pair {|key, content|
726
+ # =================================================================== #
727
+ # Delegate towards the method Bioroebe.gc_content next, including
728
+ # to round towards 5 positions:
729
+ # =================================================================== #
730
+ gc_content = ::Bioroebe.gc_content(content.upcase, round_to?)
731
+ gc_content = gc_content.first if gc_content.is_a? Array
732
+ gc_content = gc_content.to_s
733
+ minimal_key = key.to_s
734
+ if minimal_key.include? '|'
735
+ minimal_key = minimal_key.split('|').last.strip
736
+ end
737
+ if be_verbose?
738
+ _ = minimal_key.strip
739
+ if _.size > 40 # Shorten the content a bit if it is too long.
740
+ _ = _[0 .. 40]+' [...]'
741
+ end
742
+ erev 'GC content of "'+simp(_)+rev+'" is: '+
743
+ "#{sfancy(gc_content)}#{rev} %"
744
+ end
745
+ }
746
+ else
747
+ erev '`'+simp(_)+rev+'` is not a polynucleotide.' if be_verbose?
748
+ end
749
+ end
750
+
751
+ # ========================================================================= #
752
+ # === first_value
753
+ #
754
+ # This will return the first entry of the Fasta files.
755
+ # ========================================================================= #
756
+ def first_value
757
+ sequences?.first
758
+ end
759
+
760
+ # ========================================================================= #
761
+ # === nucleotides_or_aminoacids?
762
+ # ========================================================================= #
763
+ def nucleotides_or_aminoacids?
764
+ if is_polynucleotide?
765
+ 'nucleotides'
766
+ else
767
+ 'aminoacids'
768
+ end
769
+ end
770
+
771
+ # ========================================================================= #
772
+ # === is_polynucleotide?
773
+ # ========================================================================= #
774
+ def is_polynucleotide?(i = main_sequence?)
775
+ !is_protein?(i)
776
+ end; alias is_a_nucleotide? is_polynucleotide? # === is_a_nucleotide?
777
+
778
+ # ========================================================================= #
779
+ # === is_this_sequence_a_polynucleotide_sequence?
780
+ # ========================================================================= #
781
+ def is_this_sequence_a_polynucleotide_sequence?
782
+ !is_protein?
783
+ end
784
+
785
+ # ========================================================================= #
786
+ # === data?
787
+ #
788
+ # This will contain the full content of the (whole) .fasta file, including
789
+ # the header.
790
+ # ========================================================================= #
791
+ def data?
792
+ @data
793
+ end; alias input? data? # === input?
794
+ alias dataset? data? # === dataset?
795
+
796
+ # ========================================================================= #
797
+ # === hash?
798
+ # ========================================================================= #
799
+ def hash?
800
+ @hash
801
+ end
802
+
803
+ # ========================================================================= #
804
+ # === sequences?
805
+ #
806
+ # This method will obtain all found sequences.
807
+ # ========================================================================= #
808
+ def sequences?
809
+ @hash.values
810
+ end; alias sequences sequences? # === sequences
811
+ alias values sequences? # === values
812
+
813
+ # ========================================================================= #
814
+ # === short_headers?
815
+ #
816
+ # The short-headers are like the headers, but if a ' ' token is found
817
+ # then the line will be truncated towards that first ' '.
818
+ #
819
+ # An example is:
820
+ #
821
+ # sp|Q91FT8|234R_IIV6 Uncharacterized protein 234R OS=Invertebrate iridescent virus 6 OX=176652 GN=IIV6-234R PE=4 SV=1
822
+ #
823
+ # This will be truncated towards
824
+ #
825
+ # sp|Q91FT8|234R_IIV6
826
+ #
827
+ # This could then be used to automatically rename FASTA files, for
828
+ # instance.
829
+ # ========================================================================= #
830
+ def short_headers?
831
+ headers?.map {|entry|
832
+ if entry.include? ' '
833
+ entry = entry.split(' ').first
834
+ end
835
+ entry
836
+ }
837
+ end
838
+
839
+ # ========================================================================= #
840
+ # === set_data
841
+ #
842
+ # This is the setter-method towards @data. It is no longer allowed to
843
+ # invoke set_input_file() since as of 12.06.2020. This means that
844
+ # you have to invoke that method prior to calling this method.
845
+ # ========================================================================= #
846
+ def set_data(i = @input_file)
847
+ # ======================================================================= #
848
+ # The next line attempts to ensure that even an Array can be used
849
+ # as input to that method.
850
+ # ======================================================================= #
851
+ i = [i].flatten.compact.first.to_s.dup
852
+ if File.exist? i.to_s # First try to read in from a file.
853
+ if be_verbose?
854
+ opnn; erev "Will read from the file `#{sfile(i)}#{rev}`."
855
+ end
856
+ i = File.readlines(i)
857
+ if @is_a_genbank_file
858
+ selected = i.select {|line|
859
+ line.start_with?(' ') and # such as: " 61 atggggcctg caatggggcc tgcaatgggg cctgca\n"
860
+ (line.strip =~ /\d+/)
861
+ }.map {|inner_line|
862
+ inner_line.strip.delete(' 0123456789').strip.upcase
863
+ }
864
+ i = ["> genbank file"]+selected
865
+ end
866
+ end
867
+ if i.nil? or i.empty?
868
+ i = DEFAULT_FASTA
869
+ opnn; erev 'No input was provided. Thus a default FASTA '\
870
+ 'sequence will be used instead.'
871
+ end
872
+ i = sanitize_data(i)
873
+ i = i.split(N) if i.is_a? String
874
+ @data = i
875
+ end; alias set_sequence set_data # === set_Sequence
876
+
877
+ # ========================================================================= #
878
+ # === set_be_verbose_and_report_the_sequence
879
+ # ========================================================================= #
880
+ def set_be_verbose_and_report_the_sequence
881
+ set_be_verbose
882
+ @internal_hash[:report_the_sequence] = true
883
+ end
884
+
885
+ # ========================================================================= #
886
+ # === condense_the_sequence_onto_a_single_line?
887
+ # ========================================================================= #
888
+ def condense_the_sequence_onto_a_single_line?
889
+ @internal_hash[:condense_the_sequence_onto_a_single_line]
890
+ end
891
+
892
+ # ========================================================================= #
893
+ # === return_size_sorted_hash
894
+ # ========================================================================= #
895
+ def return_size_sorted_hash(i = @hash)
896
+ _ = i.sort_by {|key, value| value.size }
897
+ i = Hash[_]
898
+ return i
899
+ end
900
+
901
+ # ========================================================================= #
902
+ # === do_sort_by_size
903
+ #
904
+ # This method will sort the hash by size of the sequence. It has been
905
+ # inspired by the EMBOSS sizeq functionality.
906
+ #
907
+ # The output that should be generated might look like this:
908
+ #
909
+ # https://www.bioinformatics.nl/cgi-bin/emboss/help/sizeseq#input.1
910
+ #
911
+ # Invocation example:
912
+ #
913
+ # x = Bioroebe::ParseFasta.new('/Depot/j/globins.fasta'); x.do_sort_by_size
914
+ #
915
+ # ========================================================================= #
916
+ def do_sort_by_size
917
+ # ======================================================================= #
918
+ # Sort it here first, by the size of the "value", aka the sequence body.
919
+ # ======================================================================= #
920
+ @hash = return_size_sorted_hash(@hash)
921
+ _ = ''.dup
922
+ @hash.each_pair {|key, sequence|
923
+ _ << '> ID '+sequence.size.to_s+' AA.; DE: '+key.to_s+
924
+ ' SQ '+sequence.size.to_s+' AA'+N # ; unknown MW as of yet; '\
925
+ #'unknown CRC64 as of yet'+N
926
+ _ << sequence+N+N
927
+ }
928
+ e _
929
+ end; alias run_sizeseq_comparison do_sort_by_size # === run_sizeseq_comparison
930
+
931
+ # ========================================================================= #
932
+ # === n_nucleotides?
933
+ # ========================================================================= #
934
+ def n_nucleotides?
935
+ @hash.values.first.delete("\n").size
936
+ end; alias return_n_aminoacids n_nucleotides? # === return_n_aminoacids
937
+ alias size? n_nucleotides? # === size?
938
+ alias sequence_size? n_nucleotides? # === sequence_size?
939
+
940
+ # ========================================================================= #
941
+ # === headers?
942
+ # ========================================================================= #
943
+ def headers?
944
+ @hash.keys
945
+ end
946
+
947
+ # ========================================================================= #
948
+ # === first_key?
949
+ #
950
+ # Obtain the very first entry.
951
+ # ========================================================================= #
952
+ def first_key?
953
+ headers?.first
954
+ end
955
+
956
+ # ========================================================================= #
957
+ # === header?
958
+ #
959
+ # This variant will always return the first entry.
960
+ # ========================================================================= #
961
+ def header?
962
+ headers?.first.to_s
963
+ end
964
+
965
+ # ========================================================================= #
966
+ # === raw_body?
967
+ # ========================================================================= #
968
+ def raw_body?
969
+ @hash.values.first
970
+ end
971
+
972
+ # ========================================================================= #
973
+ # === do_show_the_header
974
+ # ========================================================================= #
975
+ def do_show_the_header
976
+ @show_the_header = true
977
+ end
978
+
979
+ # ========================================================================= #
980
+ # === set_input_file
981
+ #
982
+ # This method will be used to keep track of the input-file, from
983
+ # which we will read the dataset.
984
+ # ========================================================================= #
985
+ def set_input_file(i = nil)
986
+ if i.nil?
987
+ # ===================================================================== #
988
+ # First, we try to find a .fasta or .fa file in the current
989
+ # directory. If we can find it, we will use that instead.
990
+ # ===================================================================== #
991
+ unless Dir['*.{fa,fasta}'].empty?
992
+ file = Dir['*.{fa,fasta}'].first
993
+ if be_verbose?
994
+ result = 'A '
995
+ if file.end_with? '.fasta'
996
+ result < 'FASTA '
997
+ end
998
+ result << 'file was found in this directory ('+sfile(file)+').'
999
+ opnn; erev result
1000
+ opnn; erev 'We will use it.'
1001
+ end
1002
+ i = file
1003
+ end
1004
+ unless Dir['*.{fa,fasta}'].empty?
1005
+ file = Dir['*.{fa,fasta}'].first
1006
+ if be_verbose?
1007
+ opnn; erev "We have found a file in this "\
1008
+ "directory (#{sfile(file)}#{rev})."
1009
+ opnn; erev 'We will use it.'
1010
+ end
1011
+ i = file
1012
+ end
1013
+ end
1014
+ if i and File.exist?(i)
1015
+ dataset = File.read(i)
1016
+ if dataset[0 .. ('LOCUS'.size - 1)] == 'LOCUS'
1017
+ @is_a_genbank_file = true
1018
+ end
1019
+ end
1020
+ @input_file = i
1021
+ end; alias set_input_files set_input_file # === set_input_files
1022
+
1023
+ # ========================================================================= #
1024
+ # === save_the_file?
1025
+ # ========================================================================= #
1026
+ def save_the_file?
1027
+ @internal_hash[:save_the_file]
1028
+ end
1029
+
1030
+ # ========================================================================= #
1031
+ # === overwrite_the_original_file?
1032
+ # ========================================================================= #
1033
+ def overwrite_the_original_file?
1034
+ @internal_hash[:overwrite_the_original_file]
1035
+ end
1036
+
1037
+ # ========================================================================= #
1038
+ # === split_into_proper_sections
1039
+ #
1040
+ # Split up into the fasta identifier, and the content.
1041
+ # ========================================================================= #
1042
+ def split_into_proper_sections
1043
+ unless @data.to_s.include? '>'
1044
+ erev 'No ">" character was found in this dataset.'
1045
+ erev 'It is recommended to always have a > identifier '\
1046
+ 'for the'
1047
+ erev 'FASTA format (such as in a .fasta or a .fa file).'
1048
+ end if be_verbose? # Ok, the input data includes >. We can proceed.
1049
+ @data.each { |line|
1050
+ # ===================================================================== #
1051
+ # === Handle the leading > FASTA identifier first
1052
+ # ===================================================================== #
1053
+ if line.start_with? '>' # leading identifier.
1054
+ @current_key = line[1..-1].chomp # Select all but the first character.
1055
+ @hash[@current_key] = ''.dup
1056
+ else
1057
+ line.delete!('_')
1058
+ unless @current_key
1059
+ @current_key = 'standard'
1060
+ @hash[@current_key] = ''.dup
1061
+ end
1062
+ # =================================================================== #
1063
+ # === Retain the newlines
1064
+ #
1065
+ # Here we may decide to get rid of newlines, but it is better to
1066
+ # NOT remove the newlines - that way we can simply save the
1067
+ # dataset again.
1068
+ # @hash[@current_key] << no_newlines(line)
1069
+ # =================================================================== #
1070
+ @hash[@current_key] << line
1071
+ end
1072
+ }
1073
+ end
1074
+
1075
+ # ========================================================================= #
1076
+ # === save_into_a_fasta_file
1077
+ # ========================================================================= #
1078
+ def save_into_a_fasta_file(
1079
+ be_verbose = be_verbose?
1080
+ )
1081
+ case be_verbose
1082
+ when :be_verbose
1083
+ be_verbose = true
1084
+ end
1085
+ if @data
1086
+ what = @data.join("\n")
1087
+ into = 'standard.fasta'
1088
+ erev 'Saving into '+sfile(into)+rev+'.' if be_verbose
1089
+ write_what_into(what, into)
1090
+ return File.absolute_path(into) # And return the file we saved into.
1091
+ else
1092
+ opnn; erev 'No @data variable exists.'
1093
+ end
1094
+ end; alias do_save_the_file save_into_a_fasta_file # === do_save_the_file
1095
+
1096
+ # ========================================================================= #
1097
+ # === add_length_information_to_the_header
1098
+ # ========================================================================= #
1099
+ def add_length_information_to_the_header
1100
+ _ = header?.strip
1101
+ _ << ' length='+sequence_size?.to_s+';'
1102
+ # ======================================================================= #
1103
+ # Next, designate where to store this file.
1104
+ # ======================================================================= #
1105
+ into = 'new_fasta_file.fasta'
1106
+ if overwrite_the_original_file?
1107
+ into = @input_file
1108
+ end
1109
+ what = ''.dup
1110
+ what << "> "+_+"\n"
1111
+ what << raw_body?
1112
+ if what and into
1113
+ erev 'Storing into `'+sfile(into)+rev+'`.'
1114
+ write_what_into(what, into)
1115
+ end
1116
+ end
1117
+
1118
+ # ========================================================================= #
1119
+ # === simplify_header
1120
+ #
1121
+ # This method can be called to simplify the header. It will save into
1122
+ # a .fasta file at once.
1123
+ # ========================================================================= #
1124
+ def simplify_header
1125
+ _ = header?
1126
+ # ======================================================================= #
1127
+ # Next, simplify the header. We must start with checking for [] first,
1128
+ # because if there are any [] in the FASTA header then we can simplify
1129
+ # stuff at once.
1130
+ # ======================================================================= #
1131
+ if _.include?('[') and _.include?(']')
1132
+ _ = '> '+_.strip.scan(/\[.+\]/).flatten.first.delete('[]')+"\n"
1133
+ elsif _.include? ','
1134
+ _ = _[0 .. (_.index(',') - 1) ].strip
1135
+ end
1136
+ what = nil
1137
+ # ======================================================================= #
1138
+ # Next, designate where to store this file.
1139
+ # ======================================================================= #
1140
+ into = 'new_fasta_file.fasta'
1141
+ if overwrite_the_original_file?
1142
+ into = @input_file
1143
+ end
1144
+ if _.start_with? '>'
1145
+ what = _
1146
+ elsif _.include?('[') and _.include?(']') # For example: [Pan troglodytes]
1147
+ # ===================================================================== #
1148
+ # See rubular at:
1149
+ #
1150
+ # https://rubular.com/r/aDjI0JwMOUlZzP
1151
+ #
1152
+ # ===================================================================== #
1153
+ what = "> "+_.scan(/\[(.+)\]/).flatten.first.to_s+"\n".dup
1154
+ elsif _.include? 'Human'
1155
+ _scanned_result = _.scan(/(Human)/)
1156
+ what = "> "+$1.to_s.dup+"\n".dup
1157
+ else
1158
+ erev "Unsure what to do: #{steelblue(_)}"
1159
+ end
1160
+ if what and into
1161
+ what << raw_body?
1162
+ erev 'Storing into `'+sfile(into)+rev+'`.'
1163
+ write_what_into(what, into)
1164
+ end
1165
+ end
1166
+
1167
+ # ========================================================================= #
1168
+ # === sequence
1169
+ #
1170
+ # This method will return the sequence, without any newlines. It is also
1171
+ # called the "body" of a FASTA file.
1172
+ # ========================================================================= #
1173
+ def sequence
1174
+ _ = @hash.values.first
1175
+ _.chomp! if _ and _.end_with?(N)
1176
+ return no_newlines(_)
1177
+ end; alias fasta_sequence sequence # === fasta_sequence
1178
+ alias sequence? sequence # === sequence?
1179
+ alias body? sequence # === body?
1180
+ alias body sequence # === body?
1181
+ alias naseq sequence # === naseq
1182
+ alias nucleotide_sequence sequence # === nucleotide_sequence
1183
+ alias return_sequence sequence # === return_sequence
1184
+ alias content? sequence # === content?
1185
+
1186
+ # ========================================================================= #
1187
+ # === save
1188
+ #
1189
+ # This method will save our FASTA file.
1190
+ # ========================================================================= #
1191
+ def save
1192
+ if @input_file.nil?
1193
+ erev "The generic file #{sfile('foobar.fasta')}#{rev} "\
1194
+ "will be used."
1195
+ set_input_file('foobar.fasta')
1196
+ end
1197
+ into = @input_file
1198
+ what = @data.join("\n")
1199
+ erev 'Storing into '+sfile(into)+rev+'.'
1200
+ write_what_into(what, into)
1201
+ return into
1202
+ end
1203
+
1204
+ # ========================================================================= #
1205
+ # === []
1206
+ #
1207
+ # This is a simpler query-interface for obtaining the DNA/RNA sequence
1208
+ # of the FASTA file (or aminoacid sequence, if we have a protein at
1209
+ # hand here).
1210
+ #
1211
+ # Using the method sequences? here, which in turn works on @hash, is
1212
+ # ok because Hashes are kept in a sorted manner in ruby since some
1213
+ # time.
1214
+ # ========================================================================= #
1215
+ def [](i)
1216
+ sequences?[i]
1217
+ end
1218
+
1219
+ # ========================================================================= #
1220
+ # === Bioroebe::ParseFasta[]
1221
+ # ========================================================================= #
1222
+ def self.[](i)
1223
+ _ = new(i)
1224
+ _.sequences?
1225
+ end
1226
+
1227
+ # ========================================================================= #
1228
+ # === type?
1229
+ # ========================================================================= #
1230
+ def type?
1231
+ if is_the_sequence_a_polypeptide?
1232
+ :protein
1233
+ elsif is_this_sequence_a_polynucleotide_sequence?
1234
+ :dna_or_rna
1235
+ else
1236
+ :unknown
1237
+ end
1238
+ end
1239
+
1240
+ # ========================================================================= #
1241
+ # === is_the_sequence_a_polypeptide?
1242
+ #
1243
+ # This method can be used to determine whether a given input sequence
1244
+ # is a polypeptide (aka a protein) or whether it is not.
1245
+ #
1246
+ # If this sequence is a polypeptide then this method will return true.
1247
+ # Otherwise false will be returned.
1248
+ # ========================================================================= #
1249
+ def is_the_sequence_a_polypeptide?(
1250
+ i = main_sequence?
1251
+ )
1252
+ return_value = false # Set the default return value here.
1253
+ # ======================================================================= #
1254
+ # Look at the first 120 positions to determine whether this is a protein
1255
+ # or a nucleotide sequence.
1256
+ # ======================================================================= #
1257
+ subsequence = i[0 .. 119] # Must deduct 1 at the end since Arrays in ruby start at 0.
1258
+ # ======================================================================= #
1259
+ # Build a frequency of the characters there.
1260
+ # ======================================================================= #
1261
+ hash = {}
1262
+ hash.default = 0
1263
+ subsequence.chars.each {|character|
1264
+ hash[character] += 1
1265
+ }
1266
+ keys_to_check_for = %w(
1267
+ B D E F H I J K L M O P Q R S V W X Y Z
1268
+ )
1269
+
1270
+ values = hash.select {|key, value|
1271
+ if keys_to_check_for.include? key
1272
+ true
1273
+ else
1274
+ false
1275
+ end
1276
+ }.values.sum
1277
+ if values > 0
1278
+ return_value = true
1279
+ end
1280
+ return return_value
1281
+ end; alias is_protein? is_the_sequence_a_polypeptide? # === is_protein?
1282
+ alias is_a_protein? is_the_sequence_a_polypeptide? # === is_a_protein?
1283
+
1284
+ # ========================================================================= #
1285
+ # === main_sequence?
1286
+ #
1287
+ # This will always return the first entry.
1288
+ # ========================================================================= #
1289
+ def main_sequence?
1290
+ @hash.values.first
1291
+ end
1292
+
1293
+ # ========================================================================= #
1294
+ # === gc_content?
1295
+ # ========================================================================= #
1296
+ def gc_content?
1297
+ return ::Bioroebe.gc_content(main_sequence?).to_f # Must be a float.
1298
+ end; alias gc_content gc_content? # === gc_content
1299
+
1300
+ # ========================================================================= #
1301
+ # === sequence_object
1302
+ #
1303
+ # This method will return a Sequence object.
1304
+ #
1305
+ # Usage example:
1306
+ #
1307
+ # x = Bioroebe.parse_fasta 'ls_orchid.fasta'
1308
+ # y = x.sequence_object # y is now an instance of Bioroebe::Sequence
1309
+ #
1310
+ # ========================================================================= #
1311
+ def sequence_object
1312
+ ::Bioroebe::Sequence.new(main_sequence?)
1313
+ end
1314
+
1315
+ end
1316
+
1317
+ Fasta = ParseFasta # Add an "alias" constant to class ParseFasta.
1318
+
1319
+ # =========================================================================== #
1320
+ # === Bioroebe.parse_fasta_quietly
1321
+ #
1322
+ # As the variant above, but will work quietly.
1323
+ # =========================================================================== #
1324
+ def self.parse_fasta_quietly(
1325
+ i, use_colours = true
1326
+ )
1327
+ ::Bioroebe.parse_fasta(i, use_colours) { :be_quiet }
1328
+ end
1329
+
1330
+ # =========================================================================== #
1331
+ # === Bioroebe.return_fasta_entry_with_the_highest_gc_content
1332
+ #
1333
+ # The first argument should be a locally existing FASTA file that
1334
+ # contains different sequences.
1335
+ #
1336
+ # Usage example:
1337
+ #
1338
+ # x = Bioroebe.return_fasta_entry_with_the_highest_gc_content('/rosalind_gc.txt')
1339
+ #
1340
+ # =========================================================================== #
1341
+ def self.return_fasta_entry_with_the_highest_gc_content(this_fasta_file)
1342
+ if File.exist? this_fasta_file
1343
+ dataset = File.read(this_fasta_file)
1344
+ dataset = parse_fasta(dataset) { :be_quiet }
1345
+ hash = dataset.hash?
1346
+ hash.transform_values! {|this_value|
1347
+ ::Bioroebe.gc_content(this_value).to_f
1348
+ }
1349
+ return hash.max_by {|key, value| value }
1350
+ else
1351
+ erev "No file exists at #{sfile(this_fasta_file)}#{rev}."
1352
+ end
1353
+ end
1354
+
1355
+ # =========================================================================== #
1356
+ # === Bioroebe.sizeseq
1357
+ #
1358
+ # This method will "size-sequence compare", typically on a .fasta file.
1359
+ # =========================================================================== #
1360
+ def self.sizeseq(i)
1361
+ if i.is_a? Array
1362
+ i = i.first
1363
+ end
1364
+ _ = Bioroebe.parse_fasta(i) { :be_quiet }
1365
+ _.do_sort_by_size
1366
+ end
1367
+
1368
+ # =========================================================================== #
1369
+ # === Bioroebe.return_sizeseq
1370
+ #
1371
+ # This is as Bioroebe.sizeseq(), but it will just return the result,
1372
+ # rather than output it.
1373
+ # =========================================================================== #
1374
+ def self.return_sizeseq(i)
1375
+ if i.is_a? Array
1376
+ i = i.first
1377
+ end
1378
+ _ = Bioroebe.parse_fasta(i) { :be_quiet }
1379
+ hash = _.return_size_sorted_hash
1380
+ result = ''.dup
1381
+ hash.each_pair {|key, sequence|
1382
+ result << '> ID '+sequence.size.to_s+' AA.; DE: '+key.to_s+
1383
+ ' SQ '+sequence.size.to_s+' AA'+N
1384
+ result << sequence+N+N
1385
+ }
1386
+ return result
1387
+ end
1388
+
1389
+ # =========================================================================== #
1390
+ # === Bioroebe.genbank_to_fasta
1391
+ #
1392
+ # This method will convert from a genbank file, to a .fasta file.
1393
+ #
1394
+ # Invocation example:
1395
+ #
1396
+ # Bioroebe.genbank_to_fasta('/home/x/DATA/PROGRAMMING_LANGUAGES/RUBY/src/bioroebe/lib/bioroebe/data/genbank/sample_file.genbank')
1397
+ #
1398
+ # =========================================================================== #
1399
+ def self.genbank_to_fasta(
1400
+ this_file,
1401
+ be_verbose = :be_verbose
1402
+ )
1403
+ case be_verbose
1404
+ when :be_quiet
1405
+ be_verbose = false
1406
+ end
1407
+ if this_file.is_a? Array
1408
+ this_file = this_file.first
1409
+ end
1410
+ if File.exist? this_file
1411
+ _ = Bioroebe::ParseFasta.new(this_file) { :be_quiet }
1412
+ else
1413
+ _ = Bioroebe::ParseFasta.new(:do_not_run_yet) { :be_quiet }
1414
+ _.set_data # This will use the default file.
1415
+ _.split_into_proper_sections
1416
+ end
1417
+ file_path = _.save_into_a_fasta_file(be_verbose)
1418
+ return file_path
1419
+ end
1420
+
1421
+ # =========================================================================== #
1422
+ # === Bioroebe.parse_fasta_file
1423
+ # =========================================================================== #
1424
+ def self.parse_fasta_file(
1425
+ i = ARGV,
1426
+ use_colours = true
1427
+ )
1428
+ use_this_hash = {
1429
+ use_colours: use_colours,
1430
+ be_verbose: false
1431
+ }
1432
+ ParseFasta.new(i) { use_this_hash }
1433
+ end; self.instance_eval { alias fasta_file parse_fasta_file } # === Bioroebe.fasta_file
1434
+
1435
+ # =========================================================================== #
1436
+ # === Bioroebe.parse_fasta
1437
+ #
1438
+ # Easier reader-method for .fasta files.
1439
+ #
1440
+ # The second argument determines whether we will use colours or whether
1441
+ # we will not. For now, the default is to not use colours when we use
1442
+ # this particular class method.
1443
+ #
1444
+ # Invocation examples:
1445
+ #
1446
+ # x = Bioroebe.parse_fasta('/rosalind_gc.txt')
1447
+ # hash = Bioroebe.parse_fasta('/rosalind_gc.txt').hash?
1448
+ #
1449
+ # =========================================================================== #
1450
+ def self.parse_fasta(
1451
+ i,
1452
+ use_colours = true
1453
+ )
1454
+ use_this_hash = {
1455
+ use_colours: use_colours
1456
+ }
1457
+ if block_given?
1458
+ use_this_hash = {
1459
+ use_colours: use_colours,
1460
+ be_verbose: yield
1461
+ }
1462
+ end
1463
+ ::Bioroebe::ParseFasta.new(i) { use_this_hash }
1464
+ end; self.instance_eval { alias fasta parse_fasta } # === Bioroebe.fasta
1465
+
1466
+ end
14
1467
 
15
1468
  if __FILE__ == $PROGRAM_NAME
16
1469
  Bioroebe::ParseFasta.new(ARGV) { :sizeseq }
@@ -24,4 +1477,4 @@ end # corefasta globins.fasta
24
1477
  # pfasta /GC.txt
25
1478
  # pfasta 013521.3_289_aa.fasta --also-show-the-sequence
26
1479
  # pfasta $RSRC/bioroebe/lib/bioroebe/data/GFP_mutant_3_coding_sequence.fasta --also-show-the-sequence
27
- # corefasta $J/globins.fasta
1480
+ # corefasta $J/globins.fasta