bioroebe 0.10.80 → 0.11.32

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of bioroebe might be problematic. Click here for more details.

Files changed (210) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +3209 -2618
  3. data/bin/bioroebe +7 -1
  4. data/bioroebe.gemspec +3 -3
  5. data/doc/README.gen +3208 -2617
  6. data/doc/quality_control/commandline_applications.md +3 -3
  7. data/doc/todo/bioroebe_todo.md +2040 -2615
  8. data/lib/bioroebe/aminoacids/aminoacid_substitution.rb +1 -9
  9. data/lib/bioroebe/aminoacids/codon_percentage.rb +1 -9
  10. data/lib/bioroebe/aminoacids/deduce_aminoacid_sequence.rb +1 -9
  11. data/lib/bioroebe/aminoacids/display_aminoacid_table.rb +1 -0
  12. data/lib/bioroebe/aminoacids/show_hydrophobicity.rb +1 -6
  13. data/lib/bioroebe/base/colours_for_base/colours_for_base.rb +18 -8
  14. data/lib/bioroebe/base/commandline_application/commandline_arguments.rb +13 -11
  15. data/lib/bioroebe/base/commandline_application/misc.rb +26 -9
  16. data/lib/bioroebe/base/commandline_application/opn.rb +8 -8
  17. data/lib/bioroebe/base/commandline_application/reset.rb +3 -2
  18. data/lib/bioroebe/base/misc.rb +35 -0
  19. data/lib/bioroebe/base/prototype/misc.rb +11 -1
  20. data/lib/bioroebe/codon_tables/frequencies/parse_frequency_table.rb +2 -10
  21. data/lib/bioroebe/codons/possible_codons_for_this_aminoacid.rb +1 -9
  22. data/lib/bioroebe/codons/show_codon_tables.rb +6 -2
  23. data/lib/bioroebe/codons/show_codon_usage.rb +2 -1
  24. data/lib/bioroebe/constants/aminoacids_and_proteins.rb +1 -0
  25. data/lib/bioroebe/constants/database_constants.rb +1 -1
  26. data/lib/bioroebe/constants/files_and_directories.rb +31 -4
  27. data/lib/bioroebe/constants/misc.rb +20 -0
  28. data/lib/bioroebe/count/count_amount_of_nucleotides.rb +3 -0
  29. data/lib/bioroebe/crystal/README.md +2 -0
  30. data/lib/bioroebe/crystal/to_rna.cr +19 -0
  31. data/lib/bioroebe/data/README.md +11 -8
  32. data/lib/bioroebe/data/electron_microscopy/pos_example.pos +396 -0
  33. data/lib/bioroebe/data/electron_microscopy/test_particles.star +36 -0
  34. data/lib/bioroebe/electron_microscopy/coordinate_analyzer.rb +15 -18
  35. data/lib/bioroebe/{fasta_and_fastq/parse_fasta/run.rb → electron_microscopy/electron_microscopy_module.rb} +16 -8
  36. data/lib/bioroebe/electron_microscopy/fix_pos_file.rb +1 -9
  37. data/lib/bioroebe/electron_microscopy/flipy.rb +83 -0
  38. data/lib/bioroebe/electron_microscopy/parse_coordinates.rb +2 -10
  39. data/lib/bioroebe/electron_microscopy/read_file_xmd.rb +1 -9
  40. data/lib/bioroebe/electron_microscopy/simple_star_file_generator.rb +4 -9
  41. data/lib/bioroebe/fasta_and_fastq/autocorrect_the_name_of_this_fasta_file.rb +1 -9
  42. data/lib/bioroebe/fasta_and_fastq/compact_fasta_file/compact_fasta_file.rb +7 -9
  43. data/lib/bioroebe/fasta_and_fastq/fasta_defline/fasta_defline.rb +1 -5
  44. data/lib/bioroebe/fasta_and_fastq/fasta_to_yaml/fasta_to_yaml.rb +81 -0
  45. data/lib/bioroebe/fasta_and_fastq/parse_fasta/parse_fasta.rb +1460 -7
  46. data/lib/bioroebe/fasta_and_fastq/return_fasta_subsection_of_this_file.rb +11 -2
  47. data/lib/bioroebe/fasta_and_fastq/show_fasta_headers.rb +27 -12
  48. data/lib/bioroebe/fasta_and_fastq/simplify_fasta_header/simplify_fasta_header.rb +1 -5
  49. data/lib/bioroebe/fasta_and_fastq/split_this_fasta_file_into_chromosomes/constants.rb +0 -5
  50. data/lib/bioroebe/genome/README.md +4 -0
  51. data/lib/bioroebe/genome/genome.rb +67 -0
  52. data/lib/bioroebe/genomes/genome_pattern.rb +3 -9
  53. data/lib/bioroebe/gui/gtk +1 -0
  54. data/lib/bioroebe/gui/gtk3/controller/controller.rb +45 -27
  55. data/lib/bioroebe/gui/gtk3/dna_to_aminoacid_widget/dna_to_aminoacid_widget.rb +76 -50
  56. data/lib/bioroebe/gui/gtk3/fasta_table_widget/fasta_table_widget.rb +99 -21
  57. data/lib/bioroebe/gui/gtk3/hamming_distance/hamming_distance.rb +42 -28
  58. data/lib/bioroebe/gui/gtk3/nucleotide_analyser/nucleotide_analyser.rb +119 -71
  59. data/lib/bioroebe/gui/gtk3/protein_to_DNA/protein_to_DNA.rb +18 -18
  60. data/lib/bioroebe/gui/gtk3/random_sequence/random_sequence.rb +19 -11
  61. data/lib/bioroebe/gui/libui/three_to_one/three_to_one.rb +8 -6
  62. data/lib/bioroebe/gui/shared_code/protein_to_DNA/protein_to_DNA_module.rb +14 -14
  63. data/lib/bioroebe/gui/swing/three_to_one/ThreeToOne$1.class +0 -0
  64. data/lib/bioroebe/gui/swing/three_to_one/ThreeToOne$CloseListener.class +0 -0
  65. data/lib/bioroebe/gui/swing/three_to_one/ThreeToOne.class +0 -0
  66. data/lib/bioroebe/gui/swing/three_to_one/ThreeToOne.java +141 -0
  67. data/lib/bioroebe/java/README.md +4 -0
  68. data/lib/bioroebe/java/bioroebe/Sequence.java +25 -1
  69. data/lib/bioroebe/java/bioroebe/src/main/java/bioroebe/Base.class +0 -0
  70. data/lib/bioroebe/java/bioroebe/{Base.java → src/main/java/bioroebe/Base.java} +15 -2
  71. data/lib/bioroebe/java/bioroebe/{BisulfiteTreatment.class → src/main/java/bioroebe/BisulfiteTreatment.class} +0 -0
  72. data/lib/bioroebe/java/bioroebe/{BisulfiteTreatment.java → src/main/java/bioroebe/BisulfiteTreatment.java} +0 -0
  73. data/lib/bioroebe/java/bioroebe/{Codons.class → src/main/java/bioroebe/Codons.class} +0 -0
  74. data/lib/bioroebe/java/bioroebe/src/main/java/bioroebe/Codons.java +34 -0
  75. data/lib/bioroebe/java/bioroebe/src/main/java/bioroebe/Commandline.class +0 -0
  76. data/lib/bioroebe/java/bioroebe/src/main/java/bioroebe/Commandline.java +95 -0
  77. data/lib/bioroebe/java/bioroebe/{Esystem.class → src/main/java/bioroebe/Esystem.class} +0 -0
  78. data/lib/bioroebe/java/bioroebe/{Esystem.java → src/main/java/bioroebe/Esystem.java} +0 -0
  79. data/lib/bioroebe/java/bioroebe/{GenerateRandomDnaSequence.class → src/main/java/bioroebe/GenerateRandomDnaSequence.class} +0 -0
  80. data/lib/bioroebe/java/bioroebe/{GenerateRandomDnaSequence.java → src/main/java/bioroebe/GenerateRandomDnaSequence.java} +8 -2
  81. data/lib/bioroebe/java/bioroebe/{IsPalindrome.class → src/main/java/bioroebe/IsPalindrome.class} +0 -0
  82. data/lib/bioroebe/java/bioroebe/{IsPalindrome.java → src/main/java/bioroebe/IsPalindrome.java} +5 -1
  83. data/lib/bioroebe/java/bioroebe/src/main/java/bioroebe/PartnerNucleotide.class +0 -0
  84. data/lib/bioroebe/java/bioroebe/src/main/java/bioroebe/PartnerNucleotide.java +56 -0
  85. data/lib/bioroebe/java/bioroebe/{RemoveFile.class → src/main/java/bioroebe/RemoveFile.class} +0 -0
  86. data/lib/bioroebe/java/bioroebe/{RemoveFile.java → src/main/java/bioroebe/RemoveFile.java} +10 -4
  87. data/lib/bioroebe/java/bioroebe/{RemoveNumbers.class → src/main/java/bioroebe/RemoveNumbers.class} +0 -0
  88. data/lib/bioroebe/java/bioroebe/{RemoveNumbers.java → src/main/java/bioroebe/RemoveNumbers.java} +1 -0
  89. data/lib/bioroebe/java/bioroebe/{ToCamelcase.class → src/main/java/bioroebe/ToCamelcase.class} +0 -0
  90. data/lib/bioroebe/java/bioroebe/{ToCamelcase.java → src/main/java/bioroebe/ToCamelcase.java} +3 -3
  91. data/lib/bioroebe/java/bioroebe/src/main/java/bioroebe/ToRNA.class +0 -0
  92. data/lib/bioroebe/java/bioroebe/src/main/java/bioroebe/ToRNA.java +42 -0
  93. data/lib/bioroebe/java/bioroebe/toplevel_methods/BaseComposition.class +0 -0
  94. data/lib/bioroebe/java/bioroebe/toplevel_methods/BaseComposition.java +73 -0
  95. data/lib/bioroebe/misc/ruler.rb +1 -0
  96. data/lib/bioroebe/nucleotides/most_likely_nucleotide_sequence_for_this_aminoacid_sequence.rb +1 -9
  97. data/lib/bioroebe/nucleotides/show_nucleotide_sequence.rb +7 -7
  98. data/lib/bioroebe/parsers/genbank_parser.rb +347 -26
  99. data/lib/bioroebe/parsers/gff.rb +1 -9
  100. data/lib/bioroebe/patterns/scan_for_repeat.rb +1 -5
  101. data/lib/bioroebe/pdb/fetch_fasta_sequence_from_pdb.rb +1 -9
  102. data/lib/bioroebe/pdb/parse_mmCIF_file.rb +1 -9
  103. data/lib/bioroebe/pdb/parse_pdb_file.rb +1 -9
  104. data/lib/bioroebe/project/project.rb +1 -1
  105. data/lib/bioroebe/python/README.md +1 -0
  106. data/lib/bioroebe/python/__pycache__/mymodule.cpython-39.pyc +0 -0
  107. data/lib/bioroebe/python/gui/gtk3/all_in_one.css +4 -0
  108. data/lib/bioroebe/python/gui/gtk3/all_in_one.py +59 -0
  109. data/lib/bioroebe/python/gui/gtk3/widget1.py +20 -0
  110. data/lib/bioroebe/python/gui/tkinter/all_in_one.py +91 -0
  111. data/lib/bioroebe/python/mymodule.py +8 -0
  112. data/lib/bioroebe/python/protein_to_dna.py +33 -0
  113. data/lib/bioroebe/python/shell/shell.py +19 -0
  114. data/lib/bioroebe/python/to_rna.py +14 -0
  115. data/lib/bioroebe/python/toplevel_methods/esystem.py +12 -0
  116. data/lib/bioroebe/python/toplevel_methods/open_in_browser.py +20 -0
  117. data/lib/bioroebe/python/toplevel_methods/palindromes.py +42 -0
  118. data/lib/bioroebe/python/toplevel_methods/rds.py +13 -0
  119. data/lib/bioroebe/python/toplevel_methods/shuffleseq.py +23 -0
  120. data/lib/bioroebe/python/toplevel_methods/three_delimiter.py +37 -0
  121. data/lib/bioroebe/python/toplevel_methods/time_and_date.py +43 -0
  122. data/lib/bioroebe/python/toplevel_methods/to_camelcase.py +21 -0
  123. data/lib/bioroebe/requires/require_the_bioroebe_project.rb +3 -1
  124. data/lib/bioroebe/sequence/nucleotide_module/nucleotide_module.rb +28 -25
  125. data/lib/bioroebe/sequence/protein.rb +105 -3
  126. data/lib/bioroebe/sequence/sequence.rb +61 -2
  127. data/lib/bioroebe/shell/menu.rb +3819 -3713
  128. data/lib/bioroebe/shell/misc.rb +51 -4311
  129. data/lib/bioroebe/shell/readline/readline.rb +1 -1
  130. data/lib/bioroebe/shell/shell.rb +11250 -28
  131. data/lib/bioroebe/siRNA/siRNA.rb +81 -1
  132. data/lib/bioroebe/string_matching/find_longest_substring.rb +3 -2
  133. data/lib/bioroebe/string_matching/hamming_distance.rb +1 -9
  134. data/lib/bioroebe/taxonomy/class_methods.rb +3 -8
  135. data/lib/bioroebe/taxonomy/constants.rb +4 -3
  136. data/lib/bioroebe/taxonomy/edit.rb +2 -1
  137. data/lib/bioroebe/taxonomy/help/help.rb +10 -10
  138. data/lib/bioroebe/taxonomy/info/check_available.rb +15 -9
  139. data/lib/bioroebe/taxonomy/info/info.rb +18 -11
  140. data/lib/bioroebe/taxonomy/info/is_dna.rb +46 -36
  141. data/lib/bioroebe/taxonomy/interactive.rb +140 -104
  142. data/lib/bioroebe/taxonomy/menu.rb +27 -18
  143. data/lib/bioroebe/taxonomy/parse_fasta.rb +3 -1
  144. data/lib/bioroebe/taxonomy/shared.rb +1 -0
  145. data/lib/bioroebe/taxonomy/taxonomy.rb +1 -0
  146. data/lib/bioroebe/toplevel_methods/aminoacids_and_proteins.rb +31 -24
  147. data/lib/bioroebe/toplevel_methods/colourize_related_methods.rb +164 -0
  148. data/lib/bioroebe/toplevel_methods/databases.rb +1 -1
  149. data/lib/bioroebe/toplevel_methods/fasta_and_fastq.rb +107 -63
  150. data/lib/bioroebe/toplevel_methods/file_and_directory_related_actions.rb +14 -2
  151. data/lib/bioroebe/toplevel_methods/misc.rb +118 -11
  152. data/lib/bioroebe/toplevel_methods/nucleotides.rb +22 -5
  153. data/lib/bioroebe/toplevel_methods/open_in_browser.rb +2 -0
  154. data/lib/bioroebe/toplevel_methods/palindromes.rb +1 -2
  155. data/lib/bioroebe/toplevel_methods/taxonomy.rb +2 -2
  156. data/lib/bioroebe/toplevel_methods/to_camelcase.rb +5 -0
  157. data/lib/bioroebe/utility_scripts/align_open_reading_frames.rb +1 -9
  158. data/lib/bioroebe/utility_scripts/check_for_mismatches/check_for_mismatches.rb +1 -9
  159. data/lib/bioroebe/utility_scripts/compacter.rb +1 -9
  160. data/lib/bioroebe/utility_scripts/compseq/compseq.rb +1 -9
  161. data/lib/bioroebe/utility_scripts/create_batch_entrez_file.rb +1 -9
  162. data/lib/bioroebe/utility_scripts/dot_alignment.rb +1 -9
  163. data/lib/bioroebe/utility_scripts/move_file_to_its_correct_location.rb +1 -4
  164. data/lib/bioroebe/utility_scripts/showorf/constants.rb +0 -5
  165. data/lib/bioroebe/utility_scripts/showorf/reset.rb +1 -4
  166. data/lib/bioroebe/version/version.rb +2 -2
  167. data/lib/bioroebe/www/embeddable_interface.rb +101 -52
  168. data/lib/bioroebe/www/sinatra/sinatra.rb +186 -70
  169. data/lib/bioroebe/yaml/aminoacids/amino_acids_long_name_to_one_letter.yml +2 -2
  170. data/lib/bioroebe/yaml/configuration/browser.yml +1 -1
  171. data/lib/bioroebe/yaml/genomes/README.md +3 -4
  172. data/lib/bioroebe/yaml/restriction_enzymes/restriction_enzymes.yml +3 -3
  173. metadata +69 -64
  174. data/doc/setup.rb +0 -1655
  175. data/lib/bioroebe/fasta_and_fastq/parse_fasta/constants.rb +0 -50
  176. data/lib/bioroebe/fasta_and_fastq/parse_fasta/initialize.rb +0 -86
  177. data/lib/bioroebe/fasta_and_fastq/parse_fasta/menu.rb +0 -117
  178. data/lib/bioroebe/fasta_and_fastq/parse_fasta/misc.rb +0 -981
  179. data/lib/bioroebe/fasta_and_fastq/parse_fasta/report.rb +0 -156
  180. data/lib/bioroebe/fasta_and_fastq/parse_fasta/reset.rb +0 -128
  181. data/lib/bioroebe/genbank/genbank_parser.rb +0 -291
  182. data/lib/bioroebe/java/bioroebe/Base.class +0 -0
  183. data/lib/bioroebe/java/bioroebe/Codons.java +0 -22
  184. data/lib/bioroebe/java/bioroebe/PartnerNucleotide.class +0 -0
  185. data/lib/bioroebe/java/bioroebe/PartnerNucleotide.java +0 -19
  186. data/lib/bioroebe/java/bioroebe.jar +0 -0
  187. data/lib/bioroebe/shell/add.rb +0 -108
  188. data/lib/bioroebe/shell/assign.rb +0 -360
  189. data/lib/bioroebe/shell/chop_and_cut.rb +0 -281
  190. data/lib/bioroebe/shell/constants.rb +0 -166
  191. data/lib/bioroebe/shell/download.rb +0 -335
  192. data/lib/bioroebe/shell/enable_and_disable.rb +0 -158
  193. data/lib/bioroebe/shell/enzymes.rb +0 -310
  194. data/lib/bioroebe/shell/fasta.rb +0 -345
  195. data/lib/bioroebe/shell/gtk.rb +0 -76
  196. data/lib/bioroebe/shell/history.rb +0 -132
  197. data/lib/bioroebe/shell/initialize.rb +0 -217
  198. data/lib/bioroebe/shell/loop.rb +0 -74
  199. data/lib/bioroebe/shell/prompt.rb +0 -107
  200. data/lib/bioroebe/shell/random.rb +0 -289
  201. data/lib/bioroebe/shell/reset.rb +0 -335
  202. data/lib/bioroebe/shell/scan_and_parse.rb +0 -135
  203. data/lib/bioroebe/shell/search.rb +0 -337
  204. data/lib/bioroebe/shell/sequences.rb +0 -200
  205. data/lib/bioroebe/shell/show_report_and_display.rb +0 -2901
  206. data/lib/bioroebe/shell/startup.rb +0 -127
  207. data/lib/bioroebe/shell/taxonomy.rb +0 -14
  208. data/lib/bioroebe/shell/tk.rb +0 -23
  209. data/lib/bioroebe/shell/user_input.rb +0 -88
  210. data/lib/bioroebe/shell/xorg.rb +0 -45
@@ -1,981 +0,0 @@
1
- #!/usr/bin/ruby -w
2
- # Encoding: UTF-8
3
- # frozen_string_literal: true
4
- # =========================================================================== #
5
- # === Bioroebe::ParseFasta
6
- #
7
- # This class will parse through a local FASTA file and find the
8
- # proper entries.
9
- #
10
- # A FASTA file may have nucleotides or an aminoacid-sequence, so
11
- # we have to keep this in mind when parsing it.
12
- #
13
- # Usage examples:
14
- #
15
- # Bioroebe::ParseFasta.new(ARGV)
16
- # Bioroebe.parse_fasta(ARGV)
17
- #
18
- # =========================================================================== #
19
- # require 'bioroebe/fasta_and_fastq/parse_fasta/misc.rb'
20
- # Bioroebe.parse_fasta
21
- # Bioroebe.sizeseq
22
- # =========================================================================== #
23
- require 'bioroebe/base/commandline_application/commandline_application.rb'
24
- require 'bioroebe/fasta_and_fastq/parse_fasta/menu.rb'
25
- require 'bioroebe/fasta_and_fastq/parse_fasta/report.rb'
26
-
27
- module Bioroebe
28
-
29
- class ParseFasta < ::Bioroebe::CommandlineApplication # === Bioroebe::ParseFasta
30
-
31
- require 'bioroebe/calculate/calculate_gc_content.rb'
32
-
33
- # ========================================================================= #
34
- # === show_help (help tag)
35
- #
36
- # This method will inform the user how this class may be used from the
37
- # commandline.
38
- #
39
- # Invocation example:
40
- #
41
- # pfasta --help
42
- #
43
- # ========================================================================= #
44
- def show_help
45
- e
46
- eparse ' --size'
47
- eparse ' --also-show-the-sequence'
48
- eparse ' --header # show the header as well (normally the '\
49
- 'header is not shown)'
50
- eparse ' --limit=1000 # limit to show only the first 1000 '\
51
- 'nucleotides; use'
52
- eparse ' # any number that you need here'
53
- eparse ' --one-line # show the sequence on one line only, '\
54
- 'e. g. all newlines'
55
- eparse ' # were removed'
56
- eparse ' --toprotein # show the protein sequence as well '\
57
- '(assumes DNA or RNA'
58
- eparse ' # .fasta file)'
59
- e
60
- end
61
-
62
- # ========================================================================= #
63
- # === show_the_translated_protein_sequence?
64
- # ========================================================================= #
65
- def show_the_translated_protein_sequence?
66
- @internal_hash[:show_the_translated_protein_sequence]
67
- end
68
-
69
- # ========================================================================= #
70
- # === set_round_to
71
- #
72
- # This will set to how many decimal numbers we will round to. This is
73
- # mostly done for display-purposes, hence why the default is a fairly
74
- # low value.
75
- # ========================================================================= #
76
- def set_round_to(
77
- i = :default
78
- )
79
- case i
80
- # ======================================================================= #
81
- # === :default
82
- #
83
- # Since as of April 2021, the new default is 2, for rounding.
84
- # ======================================================================= #
85
- when :default
86
- i = DEFAULT_ROUND_TO
87
- end
88
- @round_to = i.to_i
89
- end
90
-
91
- # ========================================================================= #
92
- # === do_process_the_commandline_arguments_that_are_files
93
- # ========================================================================= #
94
- def do_process_the_commandline_arguments_that_are_files(
95
- these_files = commandline_arguments_that_are_files?
96
- )
97
- unless these_files.is_a? Array
98
- these_files = [these_files].flatten.compact
99
- end
100
- these_files.each {|this_file|
101
- set_input_file(this_file)
102
- set_data # This will use the default file.
103
- split_into_proper_sections
104
- report_the_FASTA_header if @show_the_header
105
- if @sort_by_size
106
- run_sizeseq_comparison
107
- else
108
- # =================================================================== #
109
- # === Handle cases where the input is a protein
110
- # =================================================================== #
111
- if is_the_sequence_a_polypeptide?
112
- if be_verbose?
113
- erev "This sequence is assumed to be a #{royalblue('protein')}#{rev}."
114
- report_how_many_elements_we_have_found
115
- end
116
- else # Must be a protein.
117
- # =================================================================== #
118
- # === Else it must be RNA or DNA
119
- # =================================================================== #
120
- if be_verbose?
121
- erev "This sequence is assumed to "\
122
- "be #{royalblue('DNA')}#{rev} or #{royalblue('RNA')}#{rev}."
123
- end
124
- calculate_gc_content # GC content makes only sense for nucleotides.
125
- report_how_many_elements_we_have_found if be_verbose?
126
- end
127
- if be_verbose?
128
- report_the_nucleotide_composition
129
- report_on_how_many_entries_we_did_work
130
- if report_the_sequence?
131
- do_report_the_sequence
132
- end
133
- end
134
- end
135
- }
136
- end
137
-
138
- # ========================================================================= #
139
- # === sanitize_the_description
140
- #
141
- # This method will iterate over the description entry and sanitize
142
- # it. In this context sanitizing means to add the "length" entry,
143
- # and the "type" entry, such as in:
144
- #
145
- # " # length=231; type=dna"
146
- #
147
- # ========================================================================= #
148
- def sanitize_the_description
149
- @data.map! {|line|
150
- if line.start_with?('>') and !line.include?('length=')
151
- length = 0
152
- if @hash.has_key? line.delete('>')
153
- length = @hash[line.delete('>')].size
154
- end
155
- line << " # length=#{length}; type=dna" # Currently hardcoded to DNA.
156
- end
157
- line
158
- }
159
- end
160
-
161
- # ========================================================================= #
162
- # === entries?
163
- # ========================================================================= #
164
- def entries?
165
- @data
166
- end
167
-
168
- # ========================================================================= #
169
- # === we_may_exit
170
- # ========================================================================= #
171
- def we_may_exit
172
- @may_we_exit = true
173
- end
174
-
175
- # ========================================================================= #
176
- # === output_results
177
- # ========================================================================= #
178
- def output_results
179
- pp @hash
180
- end
181
-
182
- # ========================================================================= #
183
- # === sanitize_data
184
- # ========================================================================= #
185
- def sanitize_data(i)
186
- if i.is_a? Array
187
- i.flatten!
188
- i.reject! {|entry| entry.start_with? '#' }
189
- i.reject! {|entry| entry.strip.empty? }
190
- if i.first and i.first.include? "\r"
191
- # =================================================================== #
192
- # Some FASTA files include "\r" line endings. We will check first
193
- # for the first entry to contain a \r, and if so, we assume the
194
- # whole FASTA file may have \r, which then will be removed.
195
- # =================================================================== #
196
- i.map! {|entry| entry.delete("\r") }
197
- end
198
- end
199
- # ========================================================================= #
200
- # === Run through SanitizeNucleotideSequence
201
- # ========================================================================= #
202
- if @internal_hash[:remove_numbers_from_input]
203
- i = Bioroebe::SanitizeNucleotideSequence[i]
204
- end
205
- i
206
- end
207
-
208
- # ========================================================================= #
209
- # === current_key?
210
- # ========================================================================= #
211
- def current_key?
212
- @current_key
213
- end; alias id? current_key? # === id?
214
- alias sequence_id? current_key? # === sequence_id?
215
- alias title current_key? # === title
216
- alias title? current_key? # === title?
217
-
218
- # ========================================================================= #
219
- # === opnn
220
- # ========================================================================= #
221
- def opnn
222
- super(NAMESPACE) if use_opn?
223
- end
224
-
225
- # ========================================================================= #
226
- # === use_opn?
227
- # ========================================================================= #
228
- def use_opn?
229
- @use_opn
230
- end
231
-
232
- # ========================================================================= #
233
- # === calculate_gc_content
234
- #
235
- # Calculate the gc content through this method, which is called from
236
- # within the method run().
237
- # ========================================================================= #
238
- def calculate_gc_content
239
- _ = @hash.values.join.delete(N)
240
- if is_polynucleotide? _
241
- @hash.each_pair {|key, content|
242
- # =================================================================== #
243
- # Delegate towards the method Bioroebe.gc_content next, including
244
- # to round towards 5 positions:
245
- # =================================================================== #
246
- gc_content = ::Bioroebe.gc_content(content.upcase, @round_to)
247
- gc_content = gc_content.first if gc_content.is_a? Array
248
- gc_content = gc_content.to_s
249
- minimal_key = key.to_s
250
- if minimal_key.include? '|'
251
- minimal_key = minimal_key.split('|').last.strip
252
- end
253
- if be_verbose?
254
- _ = minimal_key.strip
255
- if _.size > 40 # Shorten the content a bit if it is too long.
256
- _ = _[0 .. 40]+' [...]'
257
- end
258
- erev 'GC content of "'+simp(_)+rev+'" is: '+
259
- "#{sfancy(gc_content)}#{rev} %"
260
- end
261
- }
262
- else
263
- erev '`'+simp(_)+rev+'` is not a polynucleotide.' if be_verbose?
264
- end
265
- end
266
-
267
- # ========================================================================= #
268
- # === first_value
269
- #
270
- # This will return the first entry of the Fasta files.
271
- # ========================================================================= #
272
- def first_value
273
- sequences?.first
274
- end
275
-
276
- # ========================================================================= #
277
- # === nucleotides_or_aminoacids?
278
- # ========================================================================= #
279
- def nucleotides_or_aminoacids?
280
- if is_polynucleotide?
281
- 'nucleotides'
282
- else
283
- 'aminoacids'
284
- end
285
- end
286
-
287
- # ========================================================================= #
288
- # === is_polynucleotide?
289
- # ========================================================================= #
290
- def is_polynucleotide?(i = main_sequence?)
291
- !is_protein?(i)
292
- end; alias is_a_nucleotide? is_polynucleotide? # === is_a_nucleotide?
293
-
294
- # ========================================================================= #
295
- # === is_this_sequence_a_polynucleotide_sequence?
296
- # ========================================================================= #
297
- def is_this_sequence_a_polynucleotide_sequence?
298
- !is_protein?
299
- end
300
-
301
- # ========================================================================= #
302
- # === data?
303
- #
304
- # This will contain the full content of the (whole) .fasta file, including
305
- # the header.
306
- # ========================================================================= #
307
- def data?
308
- @data
309
- end; alias input? data? # === input?
310
- alias dataset? data? # === dataset?
311
-
312
- # ========================================================================= #
313
- # === hash?
314
- # ========================================================================= #
315
- def hash?
316
- @hash
317
- end
318
-
319
- # ========================================================================= #
320
- # === sequences?
321
- #
322
- # This method will obtain all found sequences.
323
- # ========================================================================= #
324
- def sequences?
325
- @hash.values
326
- end; alias sequences sequences? # === sequences
327
- alias values sequences? # === values
328
-
329
- # ========================================================================= #
330
- # === short_headers?
331
- #
332
- # The short-headers are like the headers, but if a ' ' token is found
333
- # then the line will be truncated towards that first ' '.
334
- #
335
- # An example is:
336
- #
337
- # sp|Q91FT8|234R_IIV6 Uncharacterized protein 234R OS=Invertebrate iridescent virus 6 OX=176652 GN=IIV6-234R PE=4 SV=1
338
- #
339
- # This will be truncated towards
340
- #
341
- # sp|Q91FT8|234R_IIV6
342
- #
343
- # This could then be used to automatically rename FASTA files, for
344
- # instance.
345
- # ========================================================================= #
346
- def short_headers?
347
- headers?.map {|entry|
348
- if entry.include? ' '
349
- entry = entry.split(' ').first
350
- end
351
- entry
352
- }
353
- end
354
-
355
- # ========================================================================= #
356
- # === set_data
357
- #
358
- # This is the setter-method towards @data. It is no longer allowed to
359
- # invoke set_input_file() since as of 12.06.2020. This means that
360
- # you have to invoke that method prior to calling this method.
361
- # ========================================================================= #
362
- def set_data(i = @input_file)
363
- # ======================================================================= #
364
- # The next line attempts to ensure that even an Array can be used
365
- # as input to that method.
366
- # ======================================================================= #
367
- i = [i].flatten.compact.first.to_s.dup
368
- if File.exist? i.to_s # First try to read in from a file.
369
- if be_verbose?
370
- opnn; erev "Will read from the file `#{sfile(i)}#{rev}`."
371
- end
372
- i = File.readlines(i)
373
- if @is_a_genbank_file
374
- selected = i.select {|line|
375
- line.start_with?(' ') and # such as: " 61 atggggcctg caatggggcc tgcaatgggg cctgca\n"
376
- (line.strip =~ /\d+/)
377
- }.map {|inner_line|
378
- inner_line.strip.delete(' 0123456789').strip.upcase
379
- }
380
- i = ["> genbank file"]+selected
381
- end
382
- end
383
- if i.nil? or i.empty?
384
- i = DEFAULT_FASTA
385
- opnn; erev 'No input was provided. Thus a default FASTA '\
386
- 'sequence will be used instead.'
387
- end
388
- i = sanitize_data(i)
389
- i = i.split(N) if i.is_a? String
390
- @data = i
391
- end; alias set_sequence set_data # === set_Sequence
392
-
393
- # ========================================================================= #
394
- # === set_be_verbose_and_report_the_sequence
395
- # ========================================================================= #
396
- def set_be_verbose_and_report_the_sequence
397
- set_be_verbose
398
- @internal_hash[:report_the_sequence] = true
399
- end
400
-
401
- # ========================================================================= #
402
- # === condense_the_sequence_onto_a_single_line?
403
- # ========================================================================= #
404
- def condense_the_sequence_onto_a_single_line?
405
- @internal_hash[:condense_the_sequence_onto_a_single_line]
406
- end
407
-
408
- # ========================================================================= #
409
- # === return_size_sorted_hash
410
- # ========================================================================= #
411
- def return_size_sorted_hash(i = @hash)
412
- _ = i.sort_by {|key, value| value.size }
413
- i = Hash[_]
414
- return i
415
- end
416
-
417
- # ========================================================================= #
418
- # === do_sort_by_size
419
- #
420
- # This method will sort the hash by size of the sequence. It has been
421
- # inspired by the EMBOSS sizeq functionality.
422
- #
423
- # The output that should be generated might look like this:
424
- #
425
- # https://www.bioinformatics.nl/cgi-bin/emboss/help/sizeseq#input.1
426
- #
427
- # Invocation example:
428
- #
429
- # x = Bioroebe::ParseFasta.new('/Depot/j/globins.fasta'); x.do_sort_by_size
430
- #
431
- # ========================================================================= #
432
- def do_sort_by_size
433
- # ======================================================================= #
434
- # Sort it here first, by the size of the "value", aka the sequence body.
435
- # ======================================================================= #
436
- @hash = return_size_sorted_hash(@hash)
437
- _ = ''.dup
438
- @hash.each_pair {|key, sequence|
439
- _ << '> ID '+sequence.size.to_s+' AA.; DE: '+key.to_s+
440
- ' SQ '+sequence.size.to_s+' AA'+N # ; unknown MW as of yet; '\
441
- #'unknown CRC64 as of yet'+N
442
- _ << sequence+N+N
443
- }
444
- e _
445
- end; alias run_sizeseq_comparison do_sort_by_size # === run_sizeseq_comparison
446
-
447
- # ========================================================================= #
448
- # === n_nucleotides?
449
- # ========================================================================= #
450
- def n_nucleotides?
451
- @hash.values.first.delete("\n").size
452
- end; alias return_n_aminoacids n_nucleotides? # === return_n_aminoacids
453
- alias size? n_nucleotides? # === size?
454
- alias sequence_size? n_nucleotides? # === sequence_size?
455
-
456
- # ========================================================================= #
457
- # === headers?
458
- # ========================================================================= #
459
- def headers?
460
- @hash.keys
461
- end
462
-
463
- # ========================================================================= #
464
- # === first_key?
465
- #
466
- # Obtain the very first entry.
467
- # ========================================================================= #
468
- def first_key?
469
- headers?.first
470
- end
471
-
472
- # ========================================================================= #
473
- # === header?
474
- #
475
- # This variant will always return the first entry.
476
- # ========================================================================= #
477
- def header?
478
- headers?.first.to_s
479
- end
480
-
481
- # ========================================================================= #
482
- # === raw_body?
483
- # ========================================================================= #
484
- def raw_body?
485
- @hash.values.first
486
- end
487
-
488
- # ========================================================================= #
489
- # === do_show_the_header
490
- # ========================================================================= #
491
- def do_show_the_header
492
- @show_the_header = true
493
- end
494
-
495
- # ========================================================================= #
496
- # === set_input_file
497
- #
498
- # This method will be used to keep track of the input-file, from
499
- # which we will read the dataset.
500
- # ========================================================================= #
501
- def set_input_file(i = nil)
502
- if i.nil?
503
- # ===================================================================== #
504
- # First, we try to find a .fasta or .fa file in the current
505
- # directory. If we can find it, we will use that instead.
506
- # ===================================================================== #
507
- unless Dir['*.{fa,fasta}'].empty?
508
- file = Dir['*.{fa,fasta}'].first
509
- if be_verbose?
510
- result = 'A '
511
- if file.end_with? '.fasta'
512
- result < 'FASTA '
513
- end
514
- result << 'file was found in this directory ('+sfile(file)+').'
515
- opnn; erev result
516
- opnn; erev 'We will use it.'
517
- end
518
- i = file
519
- end
520
- unless Dir['*.{fa,fasta}'].empty?
521
- file = Dir['*.{fa,fasta}'].first
522
- if be_verbose?
523
- opnn; erev "We have found a file in this "\
524
- "directory (#{sfile(file)}#{rev})."
525
- opnn; erev 'We will use it.'
526
- end
527
- i = file
528
- end
529
- end
530
- if i and File.exist?(i)
531
- dataset = File.read(i)
532
- if dataset[0 .. ('LOCUS'.size - 1)] == 'LOCUS'
533
- @is_a_genbank_file = true
534
- end
535
- end
536
- @input_file = i
537
- end; alias set_input_files set_input_file # === set_input_files
538
-
539
- # ========================================================================= #
540
- # === save_the_file?
541
- # ========================================================================= #
542
- def save_the_file?
543
- @internal_hash[:save_the_file]
544
- end
545
-
546
- # ========================================================================= #
547
- # === overwrite_the_original_file?
548
- # ========================================================================= #
549
- def overwrite_the_original_file?
550
- @internal_hash[:overwrite_the_original_file]
551
- end
552
-
553
- # ========================================================================= #
554
- # === split_into_proper_sections
555
- #
556
- # Split up into the fasta identifier, and the content.
557
- # ========================================================================= #
558
- def split_into_proper_sections
559
- unless @data.to_s.include? '>'
560
- erev 'No ">" character was found in this dataset.'
561
- erev 'It is recommended to always have a > identifier '\
562
- 'for the'
563
- erev 'FASTA format (such as in a .fasta or a .fa file).'
564
- end if be_verbose? # Ok, the input data includes >. We can proceed.
565
- @data.each { |line|
566
- # ===================================================================== #
567
- # === Handle the leading > FASTA identifier first
568
- # ===================================================================== #
569
- if line.start_with? '>' # leading identifier.
570
- @current_key = line[1..-1].chomp # Select all but the first character.
571
- @hash[@current_key] = ''.dup
572
- else
573
- line.delete!('_')
574
- unless @current_key
575
- @current_key = 'standard'
576
- @hash[@current_key] = ''.dup
577
- end
578
- # =================================================================== #
579
- # === Retain the newlines
580
- #
581
- # Here we may decide to get rid of newlines, but it is better to
582
- # NOT remove the newlines - that way we can simply save the
583
- # dataset again.
584
- # @hash[@current_key] << no_newlines(line)
585
- # =================================================================== #
586
- @hash[@current_key] << line
587
- end
588
- }
589
- end
590
-
591
- # ========================================================================= #
592
- # === save_into_a_fasta_file
593
- # ========================================================================= #
594
- def save_into_a_fasta_file(
595
- be_verbose = be_verbose?
596
- )
597
- case be_verbose
598
- when :be_verbose
599
- be_verbose = true
600
- end
601
- if @data
602
- what = @data.join("\n")
603
- into = 'standard.fasta'
604
- erev 'Saving into '+sfile(into)+rev+'.' if be_verbose
605
- write_what_into(what, into)
606
- return File.absolute_path(into) # And return the file we saved into.
607
- else
608
- opnn; erev 'No @data variable exists.'
609
- end
610
- end; alias do_save_the_file save_into_a_fasta_file # === do_save_the_file
611
-
612
- # ========================================================================= #
613
- # === add_length_information_to_the_header
614
- # ========================================================================= #
615
- def add_length_information_to_the_header
616
- _ = header?.strip
617
- _ << ' length='+sequence_size?.to_s+';'
618
- # ======================================================================= #
619
- # Next, designate where to store this file.
620
- # ======================================================================= #
621
- into = 'new_fasta_file.fasta'
622
- if overwrite_the_original_file?
623
- into = @input_file
624
- end
625
- what = ''.dup
626
- what << "> "+_+"\n"
627
- what << raw_body?
628
- if what and into
629
- erev 'Storing into `'+sfile(into)+rev+'`.'
630
- write_what_into(what, into)
631
- end
632
- end
633
-
634
- # ========================================================================= #
635
- # === simplify_header
636
- #
637
- # This method can be called to simplify the header. It will save into
638
- # a .fasta file at once.
639
- # ========================================================================= #
640
- def simplify_header
641
- _ = header?
642
- # ======================================================================= #
643
- # Next, simplify the header. We must start with checking for [] first,
644
- # because if there are any [] in the FASTA header then we can simplify
645
- # stuff at once.
646
- # ======================================================================= #
647
- if _.include?('[') and _.include?(']')
648
- _ = '> '+_.strip.scan(/\[.+\]/).flatten.first.delete('[]')+"\n"
649
- elsif _.include? ','
650
- _ = _[0 .. (_.index(',') - 1) ].strip
651
- end
652
- what = nil
653
- # ======================================================================= #
654
- # Next, designate where to store this file.
655
- # ======================================================================= #
656
- into = 'new_fasta_file.fasta'
657
- if overwrite_the_original_file?
658
- into = @input_file
659
- end
660
- if _.start_with? '>'
661
- what = _
662
- elsif _.include?('[') and _.include?(']') # For example: [Pan troglodytes]
663
- # ===================================================================== #
664
- # See rubular at:
665
- #
666
- # https://rubular.com/r/aDjI0JwMOUlZzP
667
- #
668
- # ===================================================================== #
669
- what = "> "+_.scan(/\[(.+)\]/).flatten.first.to_s+"\n".dup
670
- elsif _.include? 'Human'
671
- _scanned_result = _.scan(/(Human)/)
672
- what = "> "+$1.to_s.dup+"\n".dup
673
- else
674
- erev "Unsure what to do: #{steelblue(_)}"
675
- end
676
- if what and into
677
- what << raw_body?
678
- erev 'Storing into `'+sfile(into)+rev+'`.'
679
- write_what_into(what, into)
680
- end
681
- end
682
-
683
- # ========================================================================= #
684
- # === sequence
685
- #
686
- # This method will return the sequence, without any newlines. It is also
687
- # called the "body" of a FASTA file.
688
- # ========================================================================= #
689
- def sequence
690
- _ = @hash.values.first
691
- _.chomp! if _ and _.end_with?(N)
692
- return no_newlines(_)
693
- end; alias fasta_sequence sequence # === fasta_sequence
694
- alias sequence? sequence # === sequence?
695
- alias body? sequence # === body?
696
- alias body sequence # === body?
697
- alias naseq sequence # === naseq
698
- alias nucleotide_sequence sequence # === nucleotide_sequence
699
- alias return_sequence sequence # === return_sequence
700
- alias content? sequence # === content?
701
-
702
- # ========================================================================= #
703
- # === save
704
- #
705
- # This method will save our FASTA file.
706
- # ========================================================================= #
707
- def save
708
- if @input_file.nil?
709
- erev "The generic file #{sfile('foobar.fasta')}#{rev} "\
710
- "will be used."
711
- set_input_file('foobar.fasta')
712
- end
713
- into = @input_file
714
- what = @data.join("\n")
715
- erev 'Storing into '+sfile(into)+rev+'.'
716
- write_what_into(what, into)
717
- return into
718
- end
719
-
720
- # ========================================================================= #
721
- # === []
722
- #
723
- # This is a simpler query-interface for obtaining the DNA/RNA sequence
724
- # of the FASTA file (or aminoacid sequence, if we have a protein at
725
- # hand here).
726
- #
727
- # Using the method sequences? here, which in turn works on @hash, is
728
- # ok because Hashes are kept in a sorted manner in ruby since some
729
- # time.
730
- # ========================================================================= #
731
- def [](i)
732
- sequences?[i]
733
- end
734
-
735
- # ========================================================================= #
736
- # === Bioroebe::ParseFasta[]
737
- # ========================================================================= #
738
- def self.[](i)
739
- _ = new(i)
740
- _.sequences?
741
- end
742
-
743
- # ========================================================================= #
744
- # === type?
745
- # ========================================================================= #
746
- def type?
747
- if is_the_sequence_a_polypeptide?
748
- :protein
749
- elsif is_this_sequence_a_polynucleotide_sequence?
750
- :dna_or_rna
751
- else
752
- :unknown
753
- end
754
- end
755
-
756
- # ========================================================================= #
757
- # === is_the_sequence_a_polypeptide?
758
- #
759
- # This method can be used to determine whether a given input sequence
760
- # is a polypeptide (aka a protein) or whether it is not.
761
- #
762
- # If this sequence is a polypeptide then this method will return true.
763
- # Otherwise false will be returned.
764
- # ========================================================================= #
765
- def is_the_sequence_a_polypeptide?(
766
- i = main_sequence?
767
- )
768
- return_value = false # Set the default return value here.
769
- # ======================================================================= #
770
- # Look at the first 120 positions to determine whether this is a protein
771
- # or a nucleotide sequence.
772
- # ======================================================================= #
773
- subsequence = i[0 .. 119] # Must deduct 1 at the end since Arrays in ruby start at 0.
774
- # ======================================================================= #
775
- # Build a frequency of the characters there.
776
- # ======================================================================= #
777
- hash = {}
778
- hash.default = 0
779
- subsequence.chars.each {|character|
780
- hash[character] += 1
781
- }
782
- keys_to_check_for = %w(
783
- B D E F H I J K L M O P Q R S V W X Y Z
784
- )
785
-
786
- values = hash.select {|key, value|
787
- if keys_to_check_for.include? key
788
- true
789
- else
790
- false
791
- end
792
- }.values.sum
793
- if values > 0
794
- return_value = true
795
- end
796
- return return_value
797
- end; alias is_protein? is_the_sequence_a_polypeptide? # === is_protein?
798
- alias is_a_protein? is_the_sequence_a_polypeptide? # === is_a_protein?
799
-
800
- # ========================================================================= #
801
- # === main_sequence?
802
- #
803
- # This will always return the first entry.
804
- # ========================================================================= #
805
- def main_sequence?
806
- @hash.values.first
807
- end
808
-
809
- # ========================================================================= #
810
- # === gc_content?
811
- # ========================================================================= #
812
- def gc_content?
813
- return ::Bioroebe.gc_content(main_sequence?).to_f # Must be a float.
814
- end; alias gc_content gc_content? # === gc_content
815
-
816
- # ========================================================================= #
817
- # === sequence_object
818
- #
819
- # This method will return a Sequence object.
820
- #
821
- # Usage example:
822
- #
823
- # x = Bioroebe.parse_fasta 'ls_orchid.fasta'
824
- # y = x.sequence_object # y is now an instance of Bioroebe::Sequence
825
- #
826
- # ========================================================================= #
827
- def sequence_object
828
- ::Bioroebe::Sequence.new(main_sequence?)
829
- end
830
-
831
- end
832
-
833
- Fasta = ParseFasta # Add an "alias" constant to class ParseFasta.
834
-
835
- # =========================================================================== #
836
- # === Bioroebe.parse_fasta_quietly
837
- #
838
- # As the variant above, but will work quietly.
839
- # =========================================================================== #
840
- def self.parse_fasta_quietly(
841
- i, use_colours = true
842
- )
843
- ::Bioroebe.parse_fasta(i, use_colours) { :be_quiet }
844
- end
845
-
846
- # =========================================================================== #
847
- # === Bioroebe.return_fasta_entry_with_the_highest_gc_content
848
- #
849
- # The first argument should be a locally existing FASTA file that
850
- # contains different sequences.
851
- #
852
- # Usage example:
853
- #
854
- # x = Bioroebe.return_fasta_entry_with_the_highest_gc_content('/rosalind_gc.txt')
855
- #
856
- # =========================================================================== #
857
- def self.return_fasta_entry_with_the_highest_gc_content(this_fasta_file)
858
- if File.exist? this_fasta_file
859
- dataset = File.read(this_fasta_file)
860
- dataset = parse_fasta(dataset) { :be_quiet }
861
- hash = dataset.hash?
862
- hash.transform_values! {|this_value|
863
- ::Bioroebe.gc_content(this_value).to_f
864
- }
865
- return hash.max_by {|key, value| value }
866
- else
867
- erev "No file exists at #{sfile(this_fasta_file)}#{rev}."
868
- end
869
- end
870
-
871
- # =========================================================================== #
872
- # === Bioroebe.sizeseq
873
- #
874
- # This method will "size-sequence compare", typically on a .fasta file.
875
- # =========================================================================== #
876
- def self.sizeseq(i)
877
- if i.is_a? Array
878
- i = i.first
879
- end
880
- _ = Bioroebe.parse_fasta(i) { :be_quiet }
881
- _.do_sort_by_size
882
- end
883
-
884
- # =========================================================================== #
885
- # === Bioroebe.return_sizeseq
886
- #
887
- # This is as Bioroebe.sizeseq(), but it will just return the result,
888
- # rather than output it.
889
- # =========================================================================== #
890
- def self.return_sizeseq(i)
891
- if i.is_a? Array
892
- i = i.first
893
- end
894
- _ = Bioroebe.parse_fasta(i) { :be_quiet }
895
- hash = _.return_size_sorted_hash
896
- result = ''.dup
897
- hash.each_pair {|key, sequence|
898
- result << '> ID '+sequence.size.to_s+' AA.; DE: '+key.to_s+
899
- ' SQ '+sequence.size.to_s+' AA'+N
900
- result << sequence+N+N
901
- }
902
- return result
903
- end
904
-
905
- # =========================================================================== #
906
- # === Bioroebe.genbank_to_fasta
907
- #
908
- # This method will convert from a genbank file, to a .fasta file.
909
- #
910
- # Invocation example:
911
- #
912
- # Bioroebe.genbank_to_fasta('/home/x/DATA/PROGRAMMING_LANGUAGES/RUBY/src/bioroebe/lib/bioroebe/data/genbank/sample_file.genbank')
913
- #
914
- # =========================================================================== #
915
- def self.genbank_to_fasta(
916
- this_file,
917
- be_verbose = :be_verbose
918
- )
919
- case be_verbose
920
- when :be_quiet
921
- be_verbose = false
922
- end
923
- if this_file.is_a? Array
924
- this_file = this_file.first
925
- end
926
- if File.exist? this_file
927
- _ = Bioroebe::ParseFasta.new(this_file) { :be_quiet }
928
- else
929
- _ = Bioroebe::ParseFasta.new(:do_not_run_yet) { :be_quiet }
930
- _.set_data # This will use the default file.
931
- _.split_into_proper_sections
932
- end
933
- file_path = _.save_into_a_fasta_file(be_verbose)
934
- return file_path
935
- end
936
-
937
- # =========================================================================== #
938
- # === Bioroebe.parse_fasta_file
939
- # =========================================================================== #
940
- def self.parse_fasta_file(
941
- i, use_colours = true
942
- )
943
- use_this_hash = {
944
- use_colours: use_colours,
945
- be_verbose: false
946
- }
947
- ParseFasta.new(i) { use_this_hash }
948
- end; self.instance_eval { alias fasta_file parse_fasta_file } # === Bioroebe.fasta_file
949
-
950
- # =========================================================================== #
951
- # === Bioroebe.parse_fasta
952
- #
953
- # Easier reader-method for .fasta files.
954
- #
955
- # The second argument determines whether we will use colours or whether
956
- # we will not. For now, the default is to not use colours when we use
957
- # this particular class method.
958
- #
959
- # Invocation examples:
960
- #
961
- # x = Bioroebe.parse_fasta('/rosalind_gc.txt')
962
- # hash = Bioroebe.parse_fasta('/rosalind_gc.txt').hash?
963
- #
964
- # =========================================================================== #
965
- def self.parse_fasta(
966
- i,
967
- use_colours = true
968
- )
969
- use_this_hash = {
970
- use_colours: use_colours
971
- }
972
- if block_given?
973
- use_this_hash = {
974
- use_colours: use_colours,
975
- be_verbose: yield
976
- }
977
- end
978
- ::Bioroebe::ParseFasta.new(i) { use_this_hash }
979
- end; self.instance_eval { alias fasta parse_fasta } # === Bioroebe.fasta
980
-
981
- end