BioDSL 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (186) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -0
  3. data/BioDSL.gemspec +1 -1
  4. data/Gemfile +6 -0
  5. data/README.md +289 -155
  6. data/Rakefile +18 -16
  7. data/lib/BioDSL.rb +1 -1
  8. data/lib/BioDSL/cary.rb +78 -53
  9. data/lib/BioDSL/command.rb +2 -2
  10. data/lib/BioDSL/commands.rb +1 -1
  11. data/lib/BioDSL/commands/add_key.rb +1 -1
  12. data/lib/BioDSL/commands/align_seq_mothur.rb +4 -4
  13. data/lib/BioDSL/commands/analyze_residue_distribution.rb +5 -5
  14. data/lib/BioDSL/commands/assemble_pairs.rb +13 -13
  15. data/lib/BioDSL/commands/assemble_seq_idba.rb +7 -9
  16. data/lib/BioDSL/commands/assemble_seq_ray.rb +13 -13
  17. data/lib/BioDSL/commands/assemble_seq_spades.rb +4 -4
  18. data/lib/BioDSL/commands/classify_seq.rb +8 -8
  19. data/lib/BioDSL/commands/classify_seq_mothur.rb +5 -5
  20. data/lib/BioDSL/commands/clip_primer.rb +7 -7
  21. data/lib/BioDSL/commands/cluster_otus.rb +5 -5
  22. data/lib/BioDSL/commands/collapse_otus.rb +2 -2
  23. data/lib/BioDSL/commands/collect_otus.rb +2 -2
  24. data/lib/BioDSL/commands/complement_seq.rb +4 -4
  25. data/lib/BioDSL/commands/count.rb +1 -1
  26. data/lib/BioDSL/commands/count_values.rb +2 -2
  27. data/lib/BioDSL/commands/degap_seq.rb +6 -7
  28. data/lib/BioDSL/commands/dereplicate_seq.rb +1 -1
  29. data/lib/BioDSL/commands/dump.rb +2 -2
  30. data/lib/BioDSL/commands/filter_rrna.rb +4 -4
  31. data/lib/BioDSL/commands/genecall.rb +7 -7
  32. data/lib/BioDSL/commands/grab.rb +1 -1
  33. data/lib/BioDSL/commands/index_taxonomy.rb +3 -3
  34. data/lib/BioDSL/commands/mask_seq.rb +4 -4
  35. data/lib/BioDSL/commands/mean_scores.rb +2 -2
  36. data/lib/BioDSL/commands/merge_pair_seq.rb +3 -3
  37. data/lib/BioDSL/commands/merge_table.rb +1 -1
  38. data/lib/BioDSL/commands/merge_values.rb +1 -1
  39. data/lib/BioDSL/commands/plot_heatmap.rb +4 -5
  40. data/lib/BioDSL/commands/plot_histogram.rb +4 -4
  41. data/lib/BioDSL/commands/plot_matches.rb +5 -5
  42. data/lib/BioDSL/commands/plot_residue_distribution.rb +6 -6
  43. data/lib/BioDSL/commands/plot_scores.rb +7 -7
  44. data/lib/BioDSL/commands/random.rb +1 -1
  45. data/lib/BioDSL/commands/read_fasta.rb +9 -9
  46. data/lib/BioDSL/commands/read_fastq.rb +16 -16
  47. data/lib/BioDSL/commands/read_table.rb +2 -3
  48. data/lib/BioDSL/commands/reverse_seq.rb +4 -4
  49. data/lib/BioDSL/commands/slice_align.rb +4 -4
  50. data/lib/BioDSL/commands/slice_seq.rb +3 -3
  51. data/lib/BioDSL/commands/sort.rb +1 -1
  52. data/lib/BioDSL/commands/split_pair_seq.rb +6 -7
  53. data/lib/BioDSL/commands/split_values.rb +2 -2
  54. data/lib/BioDSL/commands/trim_primer.rb +13 -8
  55. data/lib/BioDSL/commands/trim_seq.rb +5 -5
  56. data/lib/BioDSL/commands/uchime_ref.rb +6 -6
  57. data/lib/BioDSL/commands/uclust.rb +5 -5
  58. data/lib/BioDSL/commands/unique_values.rb +1 -1
  59. data/lib/BioDSL/commands/usearch_global.rb +2 -2
  60. data/lib/BioDSL/commands/usearch_local.rb +2 -2
  61. data/lib/BioDSL/commands/write_fasta.rb +7 -9
  62. data/lib/BioDSL/commands/write_fastq.rb +4 -4
  63. data/lib/BioDSL/commands/write_table.rb +3 -3
  64. data/lib/BioDSL/commands/write_tree.rb +2 -3
  65. data/lib/BioDSL/config.rb +2 -2
  66. data/lib/BioDSL/csv.rb +8 -10
  67. data/lib/BioDSL/debug.rb +1 -1
  68. data/lib/BioDSL/fasta.rb +54 -40
  69. data/lib/BioDSL/fastq.rb +35 -32
  70. data/lib/BioDSL/filesys.rb +56 -47
  71. data/lib/BioDSL/fork.rb +1 -1
  72. data/lib/BioDSL/hamming.rb +1 -1
  73. data/lib/BioDSL/helpers.rb +1 -1
  74. data/lib/BioDSL/helpers/aux_helper.rb +1 -1
  75. data/lib/BioDSL/helpers/email_helper.rb +1 -1
  76. data/lib/BioDSL/helpers/history_helper.rb +1 -1
  77. data/lib/BioDSL/helpers/log_helper.rb +1 -1
  78. data/lib/BioDSL/helpers/options_helper.rb +1 -1
  79. data/lib/BioDSL/helpers/status_helper.rb +1 -1
  80. data/lib/BioDSL/html_report.rb +1 -1
  81. data/lib/BioDSL/math.rb +1 -1
  82. data/lib/BioDSL/mummer.rb +1 -1
  83. data/lib/BioDSL/pipeline.rb +1 -1
  84. data/lib/BioDSL/seq.rb +240 -231
  85. data/lib/BioDSL/seq/ambiguity.rb +1 -1
  86. data/lib/BioDSL/seq/assemble.rb +1 -1
  87. data/lib/BioDSL/seq/backtrack.rb +93 -76
  88. data/lib/BioDSL/seq/digest.rb +1 -1
  89. data/lib/BioDSL/seq/dynamic.rb +43 -55
  90. data/lib/BioDSL/seq/homopolymer.rb +34 -36
  91. data/lib/BioDSL/seq/kmer.rb +67 -50
  92. data/lib/BioDSL/seq/levenshtein.rb +35 -40
  93. data/lib/BioDSL/seq/translate.rb +64 -55
  94. data/lib/BioDSL/seq/trim.rb +60 -50
  95. data/lib/BioDSL/serializer.rb +1 -1
  96. data/lib/BioDSL/stream.rb +1 -1
  97. data/lib/BioDSL/taxonomy.rb +1 -1
  98. data/lib/BioDSL/test.rb +1 -1
  99. data/lib/BioDSL/tmp_dir.rb +1 -1
  100. data/lib/BioDSL/usearch.rb +1 -1
  101. data/lib/BioDSL/verbose.rb +1 -1
  102. data/lib/BioDSL/version.rb +2 -2
  103. data/test/BioDSL/commands/test_add_key.rb +1 -1
  104. data/test/BioDSL/commands/test_align_seq_mothur.rb +1 -1
  105. data/test/BioDSL/commands/test_analyze_residue_distribution.rb +1 -1
  106. data/test/BioDSL/commands/test_assemble_pairs.rb +1 -1
  107. data/test/BioDSL/commands/test_assemble_seq_idba.rb +1 -1
  108. data/test/BioDSL/commands/test_assemble_seq_ray.rb +1 -1
  109. data/test/BioDSL/commands/test_assemble_seq_spades.rb +1 -1
  110. data/test/BioDSL/commands/test_classify_seq.rb +1 -1
  111. data/test/BioDSL/commands/test_classify_seq_mothur.rb +1 -1
  112. data/test/BioDSL/commands/test_clip_primer.rb +1 -1
  113. data/test/BioDSL/commands/test_cluster_otus.rb +1 -1
  114. data/test/BioDSL/commands/test_collapse_otus.rb +1 -1
  115. data/test/BioDSL/commands/test_collect_otus.rb +1 -1
  116. data/test/BioDSL/commands/test_complement_seq.rb +1 -1
  117. data/test/BioDSL/commands/test_count.rb +1 -1
  118. data/test/BioDSL/commands/test_count_values.rb +1 -1
  119. data/test/BioDSL/commands/test_degap_seq.rb +1 -1
  120. data/test/BioDSL/commands/test_dereplicate_seq.rb +1 -1
  121. data/test/BioDSL/commands/test_dump.rb +1 -1
  122. data/test/BioDSL/commands/test_filter_rrna.rb +1 -1
  123. data/test/BioDSL/commands/test_genecall.rb +1 -1
  124. data/test/BioDSL/commands/test_grab.rb +1 -1
  125. data/test/BioDSL/commands/test_index_taxonomy.rb +1 -1
  126. data/test/BioDSL/commands/test_mask_seq.rb +1 -1
  127. data/test/BioDSL/commands/test_mean_scores.rb +1 -1
  128. data/test/BioDSL/commands/test_merge_pair_seq.rb +1 -1
  129. data/test/BioDSL/commands/test_merge_table.rb +1 -1
  130. data/test/BioDSL/commands/test_merge_values.rb +1 -1
  131. data/test/BioDSL/commands/test_plot_heatmap.rb +1 -1
  132. data/test/BioDSL/commands/test_plot_histogram.rb +1 -1
  133. data/test/BioDSL/commands/test_plot_matches.rb +1 -1
  134. data/test/BioDSL/commands/test_plot_residue_distribution.rb +1 -1
  135. data/test/BioDSL/commands/test_plot_scores.rb +1 -1
  136. data/test/BioDSL/commands/test_random.rb +1 -1
  137. data/test/BioDSL/commands/test_read_fasta.rb +1 -1
  138. data/test/BioDSL/commands/test_read_fastq.rb +1 -1
  139. data/test/BioDSL/commands/test_read_table.rb +1 -1
  140. data/test/BioDSL/commands/test_reverse_seq.rb +1 -1
  141. data/test/BioDSL/commands/test_slice_align.rb +1 -1
  142. data/test/BioDSL/commands/test_slice_seq.rb +1 -1
  143. data/test/BioDSL/commands/test_sort.rb +1 -1
  144. data/test/BioDSL/commands/test_split_pair_seq.rb +1 -1
  145. data/test/BioDSL/commands/test_split_values.rb +1 -1
  146. data/test/BioDSL/commands/test_trim_primer.rb +1 -1
  147. data/test/BioDSL/commands/test_trim_seq.rb +1 -1
  148. data/test/BioDSL/commands/test_uchime_ref.rb +1 -1
  149. data/test/BioDSL/commands/test_uclust.rb +1 -1
  150. data/test/BioDSL/commands/test_unique_values.rb +1 -1
  151. data/test/BioDSL/commands/test_usearch_global.rb +1 -1
  152. data/test/BioDSL/commands/test_usearch_local.rb +1 -1
  153. data/test/BioDSL/commands/test_write_fasta.rb +1 -1
  154. data/test/BioDSL/commands/test_write_fastq.rb +1 -1
  155. data/test/BioDSL/commands/test_write_table.rb +1 -1
  156. data/test/BioDSL/commands/test_write_tree.rb +1 -1
  157. data/test/BioDSL/helpers/test_options_helper.rb +3 -3
  158. data/test/BioDSL/seq/test_assemble.rb +58 -56
  159. data/test/BioDSL/seq/test_backtrack.rb +83 -81
  160. data/test/BioDSL/seq/test_digest.rb +47 -45
  161. data/test/BioDSL/seq/test_dynamic.rb +66 -64
  162. data/test/BioDSL/seq/test_homopolymer.rb +35 -33
  163. data/test/BioDSL/seq/test_kmer.rb +29 -28
  164. data/test/BioDSL/seq/test_translate.rb +44 -42
  165. data/test/BioDSL/seq/test_trim.rb +59 -57
  166. data/test/BioDSL/test_cary.rb +1 -1
  167. data/test/BioDSL/test_command.rb +2 -2
  168. data/test/BioDSL/test_csv.rb +34 -31
  169. data/test/BioDSL/test_debug.rb +31 -31
  170. data/test/BioDSL/test_fasta.rb +30 -29
  171. data/test/BioDSL/test_fastq.rb +27 -26
  172. data/test/BioDSL/test_filesys.rb +28 -27
  173. data/test/BioDSL/test_fork.rb +29 -28
  174. data/test/BioDSL/test_math.rb +31 -30
  175. data/test/BioDSL/test_mummer.rb +1 -1
  176. data/test/BioDSL/test_pipeline.rb +1 -1
  177. data/test/BioDSL/test_seq.rb +42 -41
  178. data/test/BioDSL/test_serializer.rb +35 -33
  179. data/test/BioDSL/test_stream.rb +28 -27
  180. data/test/BioDSL/test_taxonomy.rb +38 -37
  181. data/test/BioDSL/test_test.rb +32 -31
  182. data/test/BioDSL/test_tmp_dir.rb +1 -1
  183. data/test/BioDSL/test_usearch.rb +28 -27
  184. data/test/BioDSL/test_verbose.rb +32 -31
  185. data/test/helper.rb +34 -31
  186. metadata +3 -2
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -162,7 +162,7 @@ module BioDSL
162
162
  ios.each(:uc) do |record|
163
163
  record[:RECORD_TYPE] = 'usearch'
164
164
  output << record
165
- @status[:hits_out] += 1
165
+ @status[:hits_out] += 1
166
166
  @status[:records_out] += 1
167
167
  end
168
168
  end
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -162,7 +162,7 @@ module BioDSL
162
162
  ios.each(:uc) do |record|
163
163
  record[:RECORD_TYPE] = 'usearch'
164
164
  output << record
165
- @status[:hits_out] += 1
165
+ @status[:hits_out] += 1
166
166
  @status[:records_out] += 1
167
167
  end
168
168
  end
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -137,18 +137,16 @@ module BioDSL
137
137
 
138
138
  if (entry = record2entry(record))
139
139
  $stdout.puts entry.to_fasta(wrap)
140
- @status[:sequences_in] += 1
140
+ @status[:sequences_in] += 1
141
141
  @status[:sequences_out] += 1
142
- @status[:residues_in] += entry.length
143
- @status[:residues_out] += entry.length
142
+ @status[:residues_in] += entry.length
143
+ @status[:residues_out] += entry.length
144
144
  end
145
145
 
146
146
  write_output(output, record)
147
147
  end
148
148
  end
149
149
 
150
- # rubocop: disable Metrics/AbcSize
151
-
152
150
  # Write all sequence entries to a specified file.
153
151
  #
154
152
  # @param input [Enumerator] The input stream.
@@ -160,10 +158,10 @@ module BioDSL
160
158
 
161
159
  if (entry = record2entry(record))
162
160
  ios.puts entry.to_fasta(@options[:wrap])
163
- @status[:sequences_in] += 1
161
+ @status[:sequences_in] += 1
164
162
  @status[:sequences_out] += 1
165
- @status[:residues_in] += entry.length
166
- @status[:residues_out] += entry.length
163
+ @status[:residues_in] += entry.length
164
+ @status[:residues_out] += entry.length
167
165
  end
168
166
 
169
167
  write_output(output, record)
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -91,7 +91,7 @@ module BioDSL
91
91
  #
92
92
  # @return [WriteFastq] Class instance.
93
93
  def initialize(options)
94
- @options = options
94
+ @options = options
95
95
  check_options
96
96
  @options[:output] ||= $stdout
97
97
  @compress = choose_compression
@@ -139,7 +139,7 @@ module BioDSL
139
139
 
140
140
  if record[:SEQ]
141
141
  @status[:sequences_in] += 1
142
- @status[:residues_in] += record[:SEQ].length
142
+ @status[:residues_in] += record[:SEQ].length
143
143
 
144
144
  write_fastq(record, ios) if record[:SEQ_NAME] && record[:SCORES]
145
145
  end
@@ -162,7 +162,7 @@ module BioDSL
162
162
 
163
163
  ios.puts entry.to_fastq
164
164
  @status[:sequences_out] += 1
165
- @status[:residues_out] += entry.length
165
+ @status[:residues_out] += entry.length
166
166
  end
167
167
 
168
168
  # Choose compression to use which can either be gzip or bzip2 or no
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -185,7 +185,7 @@ module BioDSL
185
185
  #
186
186
  # @return [WriteTable] Class instance.
187
187
  def initialize(options)
188
- @options = options
188
+ @options = options
189
189
  check_options
190
190
  @options[:delimiter] ||= "\t"
191
191
  @compress = choose_compression
@@ -402,7 +402,7 @@ module BioDSL
402
402
  def align_columns(table)
403
403
  @rows.first.each_with_index do |cell, i|
404
404
  next unless cell.is_a?(Fixnum) ||
405
- cell.is_a?(Float) ||
405
+ cell.is_a?(Float) ||
406
406
  cell.delete(',') =~ /^[0-9]+$/
407
407
 
408
408
  table.align_column(i, :right)
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -79,7 +79,6 @@ module BioDSL
79
79
  @cmd = compile_command
80
80
  end
81
81
 
82
- # rubocop: disable Metrics/AbcSize
83
82
  # rubocop: disable MethodLength
84
83
 
85
84
  # Return command lambda for write_tree.
@@ -146,7 +145,7 @@ module BioDSL
146
145
  entry.seq_name ||= i
147
146
 
148
147
  @status[:sequences_in] += 1
149
- @status[:residues_in] += entry.length
148
+ @status[:residues_in] += entry.length
150
149
 
151
150
  stdin.puts entry.to_fasta
152
151
  end
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -36,7 +36,7 @@ module BioDSL
36
36
  HISTORY_FILE = File.join(ENV['HOME'], '.BioDSL_history')
37
37
  LOG_FILE = File.join(ENV['HOME'], '.BioDSL_log')
38
38
  RC_FILE = File.join(ENV['HOME'], '.BioDSLrc')
39
- STATUS_PROGRESS_INTERVAL = 0.1 # update progress every n second.
39
+ STATUS_PROGRESS_INTERVAL = 0.1 # update progress every n second.
40
40
 
41
41
  options = options_load_rc({}, :pipeline)
42
42
 
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of BioDSL (www.github.com/maasha/BioDSL). #
24
+ # This software is part of BioDSL (http://maasha.github.io/BioDSL). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -283,15 +283,13 @@ module BioDSL
283
283
  fields.each do |field|
284
284
  field = field.to_num
285
285
 
286
- if field.is_a? Fixnum
287
- types << :to_i
288
- elsif field.is_a? Float
289
- types << :to_f
290
- elsif field.is_a? String
291
- types << :to_s
292
- else
293
- types << nil
294
- end
286
+ types << if field.is_a? Fixnum
287
+ :to_i
288
+ elsif field.is_a? Float
289
+ :to_f
290
+ elsif field.is_a? String
291
+ :to_s
292
+ end
295
293
  end
296
294
 
297
295
  @types = types
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -1,45 +1,48 @@
1
- # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
2
- # #
3
- # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
4
- # #
5
- # This program is free software; you can redistribute it and/or #
6
- # modify it under the terms of the GNU General Public License #
7
- # as published by the Free Software Foundation; either version 2 #
8
- # of the License, or (at your option) any later version. #
9
- # #
10
- # This program is distributed in the hope that it will be useful, #
11
- # but WITHOUT ANY WARRANTY; without even the implied warranty of #
12
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
13
- # GNU General Public License for more details. #
14
- # #
15
- # You should have received a copy of the GNU General Public License #
16
- # along with this program; if not, write to the Free Software #
17
- # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #
18
- # #
19
- # http://www.gnu.org/copyleft/gpl.html #
20
- # #
21
- # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
22
- # #
23
- # This software is part of BioDSL (www.BioDSL.org). #
24
- # #
25
- # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
26
-
1
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
2
+ # #
3
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
4
+ # #
5
+ # This program is free software; you can redistribute it and/or #
6
+ # modify it under the terms of the GNU General Public License #
7
+ # as published by the Free Software Foundation; either version 2 #
8
+ # of the License, or (at your option) any later version. #
9
+ # #
10
+ # This program is distributed in the hope that it will be useful, #
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
13
+ # GNU General Public License for more details. #
14
+ # #
15
+ # You should have received a copy of the GNU General Public License #
16
+ # along with this program; if not, write to the Free Software #
17
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
18
+ # USA. #
19
+ # #
20
+ # http://www.gnu.org/copyleft/gpl.html #
21
+ # #
22
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
+ # #
24
+ # This software is part of BioDSL (http://maasha.github.io/BioDSL). #
25
+ # #
26
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
+
28
+ # Namespace for BioDSL.
27
29
  module BioDSL
28
30
  # Error class for all exceptions to do with FASTA.
29
31
  class FastaError < StandardError; end
30
32
 
33
+ # Class for reading and writing FASTA files.
31
34
  class Fasta
32
35
  def self.open(*args)
33
36
  ios = IO.open(*args)
34
37
 
35
38
  if block_given?
36
39
  begin
37
- yield self.new(ios)
40
+ yield new(ios)
38
41
  ensure
39
42
  ios.close
40
43
  end
41
44
  else
42
- return self.new(ios)
45
+ return new(ios)
43
46
  end
44
47
  end
45
48
 
@@ -60,13 +63,13 @@ module BioDSL
60
63
  def initialize(io)
61
64
  @io = io
62
65
  @seq_name = nil
63
- @seq = ""
66
+ @seq = ''
64
67
  @got_first = nil
65
68
  @got_last = nil
66
69
  end
67
70
 
68
71
  def each
69
- while entry = next_entry
72
+ while (entry = next_entry)
70
73
  yield entry
71
74
  end
72
75
  end
@@ -84,24 +87,33 @@ module BioDSL
84
87
  next if line.empty?
85
88
 
86
89
  if line[0] == '>'
87
- if not @got_first and not @seq.empty?
88
- raise FastaError, "Bad FASTA format -> content before Fasta header: #{@seq}" unless @seq.empty?
90
+ if !@got_first && !@seq.empty?
91
+ unless @seq.empty?
92
+ fail FastaError, 'Bad FASTA format -> content before Fasta ' \
93
+ "header: #{@seq}"
94
+ end
89
95
  end
90
96
 
91
97
  @got_first = true
92
98
 
93
99
  if @seq_name
94
100
  entry = Seq.new(seq_name: @seq_name, seq: @seq)
95
- @seq_name = line[1 .. -1]
96
- @seq = ""
101
+ @seq_name = line[1..-1]
102
+ @seq = ''
97
103
 
98
- raise FastaError, "Bad FASTA format -> truncated Fasta header: no content after '>'" if @seq_name.empty?
104
+ if @seq_name.empty?
105
+ fail FastaError, 'Bad FASTA format -> truncated Fasta header: ' \
106
+ 'no content after \'>\''
107
+ end
99
108
 
100
109
  return entry
101
110
  else
102
- @seq_name = line[1 .. -1]
111
+ @seq_name = line[1..-1]
103
112
 
104
- raise FastaError, "Bad FASTA format -> truncated Fasta header: no content after '>'" if @seq_name.empty?
113
+ if @seq_name.empty?
114
+ fail FastaError, 'Bad FASTA format -> truncated Fasta header: ' \
115
+ ' no content after \'>\''
116
+ end
105
117
  end
106
118
  else
107
119
  @seq << line
@@ -115,16 +127,18 @@ module BioDSL
115
127
  return entry
116
128
  end
117
129
 
118
- if not @got_last and not @seq.empty?
119
- raise FastaError, "Bad FASTA format -> content witout Fasta header: #{@seq}"
130
+ if !@got_last && !@seq.empty?
131
+ fail FastaError, 'Bad FASTA format -> content witout Fasta header: ' +
132
+ @seq
120
133
  end
121
134
 
122
135
  nil
123
136
  end
124
137
 
138
+ # Class for FASTA IO
125
139
  class IO < Filesys
126
140
  def each
127
- while not @io.eof?
141
+ until @io.eof?
128
142
  yield @io.gets
129
143
  end
130
144
  end
@@ -1,29 +1,31 @@
1
- # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
2
- # #
3
- # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
4
- # #
5
- # This program is free software; you can redistribute it and/or #
6
- # modify it under the terms of the GNU General Public License #
7
- # as published by the Free Software Foundation; either version 2 #
8
- # of the License, or (at your option) any later version. #
9
- # #
10
- # This program is distributed in the hope that it will be useful, #
11
- # but WITHOUT ANY WARRANTY; without even the implied warranty of #
12
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
13
- # GNU General Public License for more details. #
14
- # #
15
- # You should have received a copy of the GNU General Public License #
16
- # along with this program; if not, write to the Free Software #
17
- # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #
18
- # #
19
- # http://www.gnu.org/copyleft/gpl.html #
20
- # #
21
- # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
22
- # #
23
- # This software is part of BioDSL (www.BioDSL.org). #
24
- # #
25
- # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
1
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
2
+ # #
3
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
4
+ # #
5
+ # This program is free software; you can redistribute it and/or #
6
+ # modify it under the terms of the GNU General Public License #
7
+ # as published by the Free Software Foundation; either version 2 #
8
+ # of the License, or (at your option) any later version. #
9
+ # #
10
+ # This program is distributed in the hope that it will be useful, #
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
13
+ # GNU General Public License for more details. #
14
+ # #
15
+ # You should have received a copy of the GNU General Public License #
16
+ # along with this program; if not, write to the Free Software #
17
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
18
+ # USA. #
19
+ # #
20
+ # http://www.gnu.org/copyleft/gpl.html #
21
+ # #
22
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
+ # #
24
+ # This software is part of BioDSL (http://maasha.github.io/BioDSL). #
25
+ # #
26
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
26
27
 
28
+ # Namespace for BioDSL.
27
29
  module BioDSL
28
30
  # Error class for all exceptions to do with FASTQ.
29
31
  class FastqError < StandardError; end
@@ -35,21 +37,21 @@ module BioDSL
35
37
 
36
38
  if block_given?
37
39
  begin
38
- yield self.new(ios)
40
+ yield new(ios)
39
41
  ensure
40
42
  ios.close
41
43
  end
42
44
  else
43
- return self.new(ios)
45
+ return new(ios)
44
46
  end
45
47
  end
46
48
 
47
49
  def initialize(io)
48
- @io = io
50
+ @io = io
49
51
  end
50
52
 
51
53
  def each
52
- while entry = next_entry
54
+ while (entry = next_entry)
53
55
  yield entry
54
56
  end
55
57
  end
@@ -58,17 +60,18 @@ module BioDSL
58
60
  # as a Seq object. If no entry is found or eof then nil is returned.
59
61
  def next_entry
60
62
  return nil if @io.eof?
61
- seq_name = @io.gets[1 .. -2]
63
+ seq_name = @io.gets[1..-2]
62
64
  seq = @io.gets.chomp
63
65
  @io.gets
64
- qual = @io.gets.chomp
66
+ qual = @io.gets.chomp
65
67
 
66
68
  Seq.new(seq_name: seq_name, seq: seq, qual: qual)
67
69
  end
68
70
 
71
+ # Class for FASTQ IO.
69
72
  class IO < Filesys
70
73
  def each
71
- while not @io.eof?
74
+ until @io.eof?
72
75
  yield @io.gets
73
76
  end
74
77
  end