BioDSL 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +66 -66
  3. data/examples/fastq_to_fasta.rb +1 -1
  4. data/lib/BioDSL/commands/align_seq_mothur.rb +1 -1
  5. data/lib/BioDSL/commands/analyze_residue_distribution.rb +2 -2
  6. data/lib/BioDSL/commands/assemble_pairs.rb +1 -1
  7. data/lib/BioDSL/commands/assemble_seq_idba.rb +1 -1
  8. data/lib/BioDSL/commands/assemble_seq_ray.rb +1 -1
  9. data/lib/BioDSL/commands/assemble_seq_spades.rb +2 -2
  10. data/lib/BioDSL/commands/classify_seq.rb +1 -1
  11. data/lib/BioDSL/commands/classify_seq_mothur.rb +1 -1
  12. data/lib/BioDSL/commands/clip_primer.rb +2 -2
  13. data/lib/BioDSL/commands/cluster_otus.rb +1 -1
  14. data/lib/BioDSL/commands/collapse_otus.rb +2 -2
  15. data/lib/BioDSL/commands/complement_seq.rb +1 -1
  16. data/lib/BioDSL/commands/count.rb +1 -1
  17. data/lib/BioDSL/commands/count_values.rb +1 -1
  18. data/lib/BioDSL/commands/degap_seq.rb +2 -2
  19. data/lib/BioDSL/commands/dereplicate_seq.rb +1 -1
  20. data/lib/BioDSL/commands/filter_rrna.rb +1 -1
  21. data/lib/BioDSL/commands/genecall.rb +2 -2
  22. data/lib/BioDSL/commands/index_taxonomy.rb +1 -1
  23. data/lib/BioDSL/commands/mask_seq.rb +3 -3
  24. data/lib/BioDSL/commands/mean_scores.rb +2 -2
  25. data/lib/BioDSL/commands/merge_pair_seq.rb +1 -1
  26. data/lib/BioDSL/commands/merge_table.rb +1 -1
  27. data/lib/BioDSL/commands/plot_heatmap.rb +1 -1
  28. data/lib/BioDSL/commands/plot_matches.rb +1 -1
  29. data/lib/BioDSL/commands/plot_residue_distribution.rb +1 -1
  30. data/lib/BioDSL/commands/random.rb +1 -1
  31. data/lib/BioDSL/commands/read_fastq.rb +9 -9
  32. data/lib/BioDSL/commands/read_table.rb +7 -7
  33. data/lib/BioDSL/commands/reverse_seq.rb +1 -1
  34. data/lib/BioDSL/commands/slice_align.rb +4 -4
  35. data/lib/BioDSL/commands/slice_seq.rb +4 -4
  36. data/lib/BioDSL/commands/sort.rb +4 -4
  37. data/lib/BioDSL/commands/split_pair_seq.rb +1 -1
  38. data/lib/BioDSL/commands/trim_primer.rb +2 -2
  39. data/lib/BioDSL/commands/trim_seq.rb +4 -4
  40. data/lib/BioDSL/commands/unique_values.rb +2 -2
  41. data/lib/BioDSL/commands/write_tree.rb +1 -1
  42. data/lib/BioDSL/pipeline.rb +2 -2
  43. data/lib/BioDSL/version.rb +1 -1
  44. data/lib/BioDSL.rb +1 -1
  45. data/test/BioDSL/commands/test_align_seq_mothur.rb +1 -1
  46. data/test/BioDSL/commands/test_analyze_residue_distribution.rb +1 -1
  47. data/test/BioDSL/commands/test_classify_seq.rb +1 -1
  48. data/test/BioDSL/commands/test_classify_seq_mothur.rb +1 -1
  49. data/test/BioDSL/commands/test_collapse_otus.rb +1 -1
  50. data/test/BioDSL/commands/test_grab.rb +1 -1
  51. data/test/BioDSL/commands/test_read_fasta.rb +1 -1
  52. data/test/BioDSL/commands/test_read_fastq.rb +1 -1
  53. data/test/BioDSL/commands/test_read_table.rb +1 -1
  54. data/test/BioDSL/test_pipeline.rb +7 -7
  55. data/test/helper.rb +1 -1
  56. metadata +3 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 62ee1a33fd4240d69a9947883c1a2616c080c55b
4
- data.tar.gz: bfceb11bc1375355e5b61eaf3ae31dd3459f6572
3
+ metadata.gz: b828e339f7d9337acdaf88a4206cb4cf15a6778c
4
+ data.tar.gz: 6c130d98ba2e9ca1c1bdf6a044bbe7d6e2c6f309
5
5
  SHA512:
6
- metadata.gz: f9c8bd7cae663da3110438b209b55ac92301c0de4fe5800dbf2c0d322c6089a17ebe4018a27a2ffaa00f615cb9ac6b89e15c4eaeba73bffd659c423b28ba5813
7
- data.tar.gz: 5d1ffa6e7e16bc836774a42e79ebd0fcf9091264349a781106501178d1572c9d0176a892f8c07e27d1885d69a12831152f307147b9b098e45ed80179e0316588
6
+ metadata.gz: 8f1fcfd7080a7487fd1a75152c4da3ce7328e86e19843181a93c30d1bb94a2f8f78a065cd208002324df2e0bbbbfbde5576a6157ece8c4c6c4878a6311e0074e
7
+ data.tar.gz: 31b549d1294e2be25897d824ab019154dfa570305dd1f79b041ae641b8208d2fa00e97ac36e4bdfe8a2dacd46d7cc8da9db4ddb5d75f4c9f3a3e6997aa4e0ae0
data/README.md CHANGED
@@ -15,7 +15,7 @@ A test script:
15
15
 
16
16
  require 'BioDSL'
17
17
 
18
- p = BP.new.
18
+ p = BD.new.
19
19
  read_fasta(input: "input.fna").
20
20
  grab(select: "ATC$", keys: :SEQ).
21
21
  write_fasta(output: "output.fna").
@@ -29,24 +29,24 @@ adding the following to your `~/.bashrc` file:
29
29
  And then start the interactive shell:
30
30
 
31
31
  $ ibp
32
- irb(main):001:0> p = BP.new
33
- => BP.new
32
+ irb(main):001:0> p = BD.new
33
+ => BD.new
34
34
  irb(main):002:0> p.read_fasta(input: "input.fna")
35
- => BP.new.read_fasta(input: "input.fna")
35
+ => BD.new.read_fasta(input: "input.fna")
36
36
  irb(main):003:0> p.grab(select: "ATC$", keys: :SEQ)
37
- => BP.new.read_fasta(input: "input.fna").grab(select: "ATC$", keys: :SEQ)
37
+ => BD.new.read_fasta(input: "input.fna").grab(select: "ATC$", keys: :SEQ)
38
38
  irb(main):004:0> p.write_fasta(output: "output.fna")
39
- => BP.new.read_fasta(input: "input.fna").grab(select: "ATC$", keys: :SEQ).write_fasta(output: "output.fna")
39
+ => BD.new.read_fasta(input: "input.fna").grab(select: "ATC$", keys: :SEQ).write_fasta(output: "output.fna")
40
40
  irb(main):005:0> p.run(progress: true)
41
- => BP.new.read_fasta(input: "input.fna").grab(select: "ATC$", keys: :SEQ).write_fasta(output: "output.fna").run(progress: true)
41
+ => BD.new.read_fasta(input: "input.fna").grab(select: "ATC$", keys: :SEQ).write_fasta(output: "output.fna").run(progress: true)
42
42
  irb(main):006:0>
43
43
 
44
44
 
45
45
  Or chaining commands directly:
46
46
 
47
47
  $ ibp
48
- irb(main):001:0> BP.new.read_fasta(input: "input.fna").grab(select: "ATC$", keys: :SEQ).write_fasta(output: "output.fna").run(progress: true)
49
- => BP.new.read_fasta(input: "input.fna").grab(select: "ATC$", keys: :SEQ).write_fasta(output: "output.fna").run(progress: true)
48
+ irb(main):001:0> BD.new.read_fasta(input: "input.fna").grab(select: "ATC$", keys: :SEQ).write_fasta(output: "output.fna").run(progress: true)
49
+ => BD.new.read_fasta(input: "input.fna").grab(select: "ATC$", keys: :SEQ).write_fasta(output: "output.fna").run(progress: true)
50
50
  irb(main):002:0>
51
51
 
52
52
  Or run on the command line with the alias bp which you can create by adding the
@@ -56,61 +56,61 @@ following to your ~/.bashrc file:
56
56
 
57
57
  Then you can run the below from the command line:
58
58
 
59
- $ bp -e 'BP.new.read_fasta(input: "input.fna").grab(select: "ATC$", keys: :SEQ).write_fasta(output: "output.fna").run(progress: true)'
59
+ $ bp -e 'BD.new.read_fasta(input: "input.fna").grab(select: "ATC$", keys: :SEQ).write_fasta(output: "output.fna").run(progress: true)'
60
60
 
61
61
  Available BioDSL
62
62
  -------------------
63
63
 
64
- * [add_key] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/AddKey)
65
- * [align_seq_mothur] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/AlignSeqMothur)
66
- * [analyze_residue_distribution] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/AnalyzeResidueDistribution)
67
- * [assemble_pairs] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/AssemblePairs)
68
- * [assemble_seq_idba] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/AssembleSeqIdba)
69
- * [assemble_seq_ray] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/AssembleSeqRay)
70
- * [assemble_seq_spades] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/AssembleSeqSpades)
71
- * [classify_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/ClassifySeq)
72
- * [classify_seq_mothur] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/ClassifySeqMothur)
73
- * [clip_primer] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/ClipPrimer)
74
- * [cluster_otus] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/ClusterOtus)
75
- * [collapse_otus] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/CollapseOtus)
76
- * [collect_otus] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/CollectOtus)
77
- * [complement_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/ComplementSeq)
78
- * [count] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/Count)
79
- * [degap_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/DegapSeq)
80
- * [dereplicate_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/DereplicateSeq)
81
- * [dump] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/Dump)
82
- * [filter_rrna] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/FilterRrna)
83
- * [genecall] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/Genecall)
84
- * [grab] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/Grab)
85
- * [index_taxonomy] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/IndexTaxonomy)
86
- * [mean_scores] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/MeanScores)
87
- * [merge_pair_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/MergePairSeq)
88
- * [merge_table] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/MergeTable)
89
- * [merge_values] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/MergeValues)
90
- * [plot_heatmap] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/PlotHeatmap)
91
- * [plot_histogram] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/PlotHistogram)
92
- * [plot_matches] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/PlotMatches)
93
- * [plot_residue_distribution] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/PlotResidueDistribution)
94
- * [plot_scores] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/PlotScores)
95
- * [random] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/Random)
96
- * [read_fasta] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/ReadFasta)
97
- * [read_fastq] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/ReadFastq)
98
- * [read_table] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/ReadTable)
99
- * [reverse_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/ReverseSeq)
100
- * [slice_align] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/SliceAlign)
101
- * [slice_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/SliceSeq)
102
- * [sort] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/Sort)
103
- * [split_pair_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/SplitPairSeq)
104
- * [split_values] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/SplitValues)
105
- * [trim_primer] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/TrimPrimer)
106
- * [trim_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/TrimSeq)
107
- * [uchime_ref] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/UchimeRef)
108
- * [unique_values] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/UniqueValues)
109
- * [usearch_global] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/UsearchGlobal)
110
- * [write_fasta] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/WriteFasta)
111
- * [write_fastq] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/WriteFastq)
112
- * [write_table] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/WriteTable)
113
- * [write_tree] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/WriteTree)
64
+ * [add_key] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/AddKey)
65
+ * [align_seq_mothur] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/AlignSeqMothur)
66
+ * [analyze_residue_distribution] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/AnalyzeResidueDistribution)
67
+ * [assemble_pairs] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/AssemblePairs)
68
+ * [assemble_seq_idba] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/AssembleSeqIdba)
69
+ * [assemble_seq_ray] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/AssembleSeqRay)
70
+ * [assemble_seq_spades] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/AssembleSeqSpades)
71
+ * [classify_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/ClassifySeq)
72
+ * [classify_seq_mothur] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/ClassifySeqMothur)
73
+ * [clip_primer] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/ClipPrimer)
74
+ * [cluster_otus] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/ClusterOtus)
75
+ * [collapse_otus] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/CollapseOtus)
76
+ * [collect_otus] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/CollectOtus)
77
+ * [complement_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/ComplementSeq)
78
+ * [count] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/Count)
79
+ * [degap_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/DegapSeq)
80
+ * [dereplicate_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/DereplicateSeq)
81
+ * [dump] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/Dump)
82
+ * [filter_rrna] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/FilterRrna)
83
+ * [genecall] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/Genecall)
84
+ * [grab] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/Grab)
85
+ * [index_taxonomy] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/IndexTaxonomy)
86
+ * [mean_scores] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/MeanScores)
87
+ * [merge_pair_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/MergePairSeq)
88
+ * [merge_table] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/MergeTable)
89
+ * [merge_values] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/MergeValues)
90
+ * [plot_heatmap] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/PlotHeatmap)
91
+ * [plot_histogram] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/PlotHistogram)
92
+ * [plot_matches] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/PlotMatches)
93
+ * [plot_residue_distribution] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/PlotResidueDistribution)
94
+ * [plot_scores] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/PlotScores)
95
+ * [random] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/Random)
96
+ * [read_fasta] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/ReadFasta)
97
+ * [read_fastq] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/ReadFastq)
98
+ * [read_table] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/ReadTable)
99
+ * [reverse_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/ReverseSeq)
100
+ * [slice_align] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/SliceAlign)
101
+ * [slice_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/SliceSeq)
102
+ * [sort] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/Sort)
103
+ * [split_pair_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/SplitPairSeq)
104
+ * [split_values] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/SplitValues)
105
+ * [trim_primer] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/TrimPrimer)
106
+ * [trim_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/TrimSeq)
107
+ * [uchime_ref] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/UchimeRef)
108
+ * [unique_values] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/UniqueValues)
109
+ * [usearch_global] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/UsearchGlobal)
110
+ * [write_fasta] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/WriteFasta)
111
+ * [write_fastq] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/WriteFastq)
112
+ * [write_table] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/WriteTable)
113
+ * [write_tree] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/WriteTree)
114
114
 
115
115
  Log and History
116
116
  ---------------
@@ -127,37 +127,37 @@ Progress:
127
127
 
128
128
  Show nifty progress table with commands, records read and emittet and time.
129
129
 
130
- `BP.new.read_fasta(input: "input.fna").dump.run(progress: true)`
130
+ `BD.new.read_fasta(input: "input.fna").dump.run(progress: true)`
131
131
 
132
132
  Verbose:
133
133
 
134
134
  Output verbose messages from commands and the run status.
135
135
 
136
- `BP.new.read_fasta(input: "input.fna").dump.run(verbose: true)`
136
+ `BD.new.read_fasta(input: "input.fna").dump.run(verbose: true)`
137
137
 
138
138
  Debug:
139
139
 
140
140
  Output debug messages from commands using these.
141
141
 
142
- `BP.new.read_fasta(input: "input.fna").dump.run(debug: true)`
142
+ `BD.new.read_fasta(input: "input.fna").dump.run(debug: true)`
143
143
 
144
144
  E-mail notification:
145
145
 
146
146
  Send an email when run is complete.
147
147
 
148
- `BP.new.read_fasta(input: "input.fna").dump.run(email: mail@maasha.dk, subject: "Script done!")`
148
+ `BD.new.read_fasta(input: "input.fna").dump.run(email: mail@maasha.dk, subject: "Script done!")`
149
149
 
150
150
  Report:
151
151
 
152
152
  Create an HTML report of the run stats:
153
153
 
154
- `BP.new.read_fasta(input: "input.fna").dump.run(report: "status.html")`
154
+ `BD.new.read_fasta(input: "input.fna").dump.run(report: "status.html")`
155
155
 
156
156
  Output dir:
157
157
 
158
158
  All output files from commands are put in a specified dir:
159
159
 
160
- `BP.new.read_fasta(input: "input.fna").dump.run(output_dir: "Results")`
160
+ `BD.new.read_fasta(input: "input.fna").dump.run(output_dir: "Results")`
161
161
 
162
162
 
163
163
  Configuration File
@@ -5,4 +5,4 @@ require 'BioDSL'
5
5
  # Read in sequences in FASTQ format from the file `test.fq` and save them in
6
6
  # FASTA format in the file `test.fna`.
7
7
 
8
- BP.new.read_fastq(input: "test.fq").write_fasta(output: "test.fna").run
8
+ BD.new.read_fastq(input: "test.fq").write_fasta(output: "test.fna").run
@@ -53,7 +53,7 @@ module BioDSL
53
53
  # To align the entries in the FASTA file `test.fna` to the template alignment
54
54
  # in the file `template.fna` do:
55
55
  #
56
- # BP.new.
56
+ # BD.new.
57
57
  # read_fasta(input: "test.fna").
58
58
  # align_seq_mothur(template_file: "template.fna").
59
59
  # run
@@ -68,7 +68,7 @@ module BioDSL
68
68
  # Now we run the data through the following pipeline and get the resulting
69
69
  # table:
70
70
  #
71
- # BP.new.
71
+ # BD.new.
72
72
  # read_fasta(input: "test.fna").
73
73
  # analyze_residue_distribution.
74
74
  # grab(select: "residue").
@@ -91,7 +91,7 @@ module BioDSL
91
91
  # Here we do the same as above, but output percentages instead of absolute
92
92
  # counts:
93
93
  #
94
- # BP.new.
94
+ # BD.new.
95
95
  # read_fasta(input: "test.fna").
96
96
  # analyze_residue_distribution(percent: true).
97
97
  # grab(select: "residue").
@@ -77,7 +77,7 @@ module BioDSL
77
77
  # If you have two pair-end sequence files with the Illumina data then you
78
78
  # can assemble these using assemble_pairs like this:
79
79
  #
80
- # BP.new.
80
+ # BD.new.
81
81
  # read_fastq(input: "file1.fq", input2: "file2.fq).
82
82
  # assemble_pairs(reverse_complement: true).
83
83
  # run
@@ -58,7 +58,7 @@ module BioDSL
58
58
  # If you have two pair-end sequence files with the Illumina data then you
59
59
  # can assemble these using +assemble_seq_idba+ like this:
60
60
  #
61
- # BP.new.
61
+ # BD.new.
62
62
  # read_fastq(input: "file1.fq", input2: "file2.fq).
63
63
  # assemble_seq_idba.
64
64
  # write_fasta(output: "contigs.fna").
@@ -61,7 +61,7 @@ module BioDSL
61
61
  # If you have two pair-end sequence files with the Illumina data then you
62
62
  # can assemble these using +assemble_seq_ray+ like this:
63
63
  #
64
- # BP.new.
64
+ # BD.new.
65
65
  # read_fastq(input: "file1.fq", input2: "file2.fq).
66
66
  # assemble_seq_ray.
67
67
  # write_fasta(output: "contigs.fna").
@@ -56,7 +56,7 @@ module BioDSL
56
56
  # If you have two pair-end sequence files with the Illumina data then you
57
57
  # can assemble these using assemble_seq_spades like this:
58
58
  #
59
- # BP.new.
59
+ # BD.new.
60
60
  # read_fastq(input: "file1.fq", input2: "file2.fq).
61
61
  # assemble_seq_spades(kmers: [55,77,99,127]).
62
62
  # write_fasta(output: "contigs.fna").
@@ -69,7 +69,7 @@ module BioDSL
69
69
  include AuxHelper
70
70
 
71
71
  STATS = %i(records_in records_out sequences_in sequences_out residues_in
72
- records_out assembled)
72
+ residues_out records_out assembled)
73
73
 
74
74
  # Constructor for the AssembleSeqSpades class.
75
75
  #
@@ -100,7 +100,7 @@ module BioDSL
100
100
  #
101
101
  # To classify a bunch of OTU sequences in the file +otus.fna+ we do:
102
102
  #
103
- # BP.new.
103
+ # BD.new.
104
104
  # read_fasta(input: "otus.fna").
105
105
  # classify_seq(dir: "RDP11_3").
106
106
  # write_table(keys: [:SEQ_NAME, :TAXONOMY_HITS, :TAXONOMY]).
@@ -61,7 +61,7 @@ module BioDSL
61
61
  # database = "trainset9_032012.pds.fasta"
62
62
  # taxonomy = "trainset9_032012.pds.tax"
63
63
  #
64
- # BP.new.
64
+ # BD.new.
65
65
  # read_fasta(input: "otus.fna").
66
66
  # classify_seq_mothur(database: database, taxonomy: taxonomy).
67
67
  # grab(exact: true, keys: :RECORD_TYPE, select: "taxonomy").
@@ -75,7 +75,7 @@ module BioDSL
75
75
  # To clip this sequence in the forward direction with the primer
76
76
  # 'TGACTACGACTACGACTACT' do:
77
77
  #
78
- # BP.new.
78
+ # BD.new.
79
79
  # read_fasta(input: "test.fna").
80
80
  # clip_primer(primer: "TGACTACGACTACGACTACT", direction: :forward).
81
81
  # dump.
@@ -91,7 +91,7 @@ module BioDSL
91
91
  #
92
92
  # Or in the reverse direction:
93
93
  #
94
- # BP.new.
94
+ # BD.new.
95
95
  # read_fasta(input: "test.fna").
96
96
  # clip_primer(primer: "TGACTACGACTACGACTACT", direction: :reverse).
97
97
  # dump.
@@ -53,7 +53,7 @@ module BioDSL
53
53
  #
54
54
  # To create OTU clusters do:
55
55
  #
56
- # BP.new.
56
+ # BD.new.
57
57
  # read_fasta(input: "in.fna").
58
58
  # dereplicate_seq.
59
59
  # sort(key: :SEQ_COUNT, reverse: true).
@@ -43,7 +43,7 @@ module BioDSL
43
43
  # Here is an OTU table with four rows, one of which has a redundant Taxonomy
44
44
  # string:
45
45
  #
46
- # BP.new.read_table(input: "otu_table.txt").dump.run
46
+ # BD.new.read_table(input: "otu_table.txt").dump.run
47
47
  #
48
48
  # {:OTU=>"OTU_1",
49
49
  # :CM1_COUNT=>881,
@@ -73,7 +73,7 @@ module BioDSL
73
73
  # In order to collapse the redudant OTU simply run the stream through
74
74
  # +collapse_otus+:
75
75
  #
76
- # BP.new.read_table(input: "otu_table.txt").collapse_otus.dump.run
76
+ # BD.new.read_table(input: "otu_table.txt").collapse_otus.dump.run
77
77
  #
78
78
  # {:OTU=>"OTU_1",
79
79
  # :CM1_COUNT=>881,
@@ -51,7 +51,7 @@ module BioDSL
51
51
  #
52
52
  # To complement the sequence do:
53
53
  #
54
- # BP.new.read_fastq(input:"test.fq").complement_seq.dump.run
54
+ # BD.new.read_fastq(input:"test.fq").complement_seq.dump.run
55
55
  #
56
56
  # {:SEQ_NAME=>"M02529:88:000000000-AC0WY:1:1101:12879:1928 2:N:0:185",
57
57
  # :SEQ=>"AACATTTTGCTGCCGGTCAC",
@@ -46,7 +46,7 @@ module BioDSL
46
46
  #
47
47
  # To count the number of records in the file `test.fq`:
48
48
  #
49
- # BP.new.read_fastq(input: "test.fq").count(output: "count.txt").dump.run
49
+ # BD.new.read_fastq(input: "test.fq").count(output: "count.txt").dump.run
50
50
  #
51
51
  # {:SEQ_NAME=>"ILLUMINA-52179E_0004:2:1:1040:5263#TTAGGC/1",
52
52
  # :SEQ=>"TTCGGCATCGGCGGCGACGTTGGCGGCGGGGCCGGGCGGGTCGANNNCAT",
@@ -52,7 +52,7 @@ module BioDSL
52
52
  # To count the values of both columns we first read the table with
53
53
  # +read_table+ and then pass the result to +count_values+:
54
54
  #
55
- # BP.new.
55
+ # BD.new.
56
56
  # read_table(input: "test.tab").
57
57
  # count_values(keys: [:V0, :V1]).
58
58
  # dump.
@@ -51,7 +51,7 @@ module BioDSL
51
51
  #
52
52
  # To remove all gaps from all sequences do:
53
53
  #
54
- # BP.new.read_fasta(input: "test.fna").degap_seq.dump.run
54
+ # BD.new.read_fasta(input: "test.fna").degap_seq.dump.run
55
55
  #
56
56
  # {:SEQ_NAME=>"test1", :SEQ=>"AGTC", :SEQ_LEN=>4}
57
57
  # {:SEQ_NAME=>"test2", :SEQ=>"AGGTC", :SEQ_LEN=>5}
@@ -59,7 +59,7 @@ module BioDSL
59
59
  #
60
60
  # To remove all gap-only columns use the +columns_only+ option:
61
61
  #
62
- # BP.new.
62
+ # BD.new.
63
63
  # read_fasta(input: "test.fna").
64
64
  # degap_seq(columns_only: true).
65
65
  # dump.
@@ -53,7 +53,7 @@ module BioDSL
53
53
  #
54
54
  # To dereplicate all sequences we use +read_fasta+ and +dereplicate_seq+:
55
55
  #
56
- # BP.new.read_fasta(input: "test.fna").dereplicate_seq.dump.run
56
+ # BD.new.read_fasta(input: "test.fna").dereplicate_seq.dump.run
57
57
  #
58
58
  # {:SEQ_NAME=>"test1", :SEQ=>"ATGC", :SEQ_LEN=>4, :SEQ_COUNT=>2}
59
59
  # {:SEQ_NAME=>"test3", :SEQ=>"GCAT", :SEQ_LEN=>4, :SEQ_COUNT=>1}
@@ -51,7 +51,7 @@ module BioDSL
51
51
  #
52
52
  # To filter all reads matching the SILVA archaea 23S rRNA do:
53
53
  #
54
- # BP.new.
54
+ # BD.new.
55
55
  # read_fastq(input: "reads.fq").
56
56
  # filter_rrna(ref_fasta: ["silva-arc-23s-id98.fasta"],
57
57
  # ref_index: ["silva-arc-23s-id98.fasta.idx*"]).
@@ -59,7 +59,7 @@ module BioDSL
59
59
  #
60
60
  # To genecall a genome do:
61
61
  #
62
- # BP.new.
62
+ # BD.new.
63
63
  # read_fasta(input: "contigs.fna").
64
64
  # genecall.
65
65
  # grab(select: "genecall", key: :type, exact: true).
@@ -68,7 +68,7 @@ module BioDSL
68
68
  #
69
69
  # To add genecall data to the sequence name use +merge_values+:
70
70
  #
71
- # BP.new.
71
+ # BD.new.
72
72
  # read_fasta(input: "contigs.fna").
73
73
  # genecall(type: "protein").
74
74
  # grab(select: "genecall", key: :type, exact: true).
@@ -113,7 +113,7 @@ module BioDSL
113
113
  #
114
114
  # == Examples
115
115
  #
116
- # BP.new.
116
+ # BD.new.
117
117
  # read_fasta(input: "RDP_11_Bacteria.fna").
118
118
  # index_taxonomy(output_dir: "RDP_11").
119
119
  # run
@@ -57,7 +57,7 @@ module BioDSL
57
57
  # We can read in these sequence using +read_fastq+ and then soft mask the
58
58
  # sequence with mask_seq like this:
59
59
  #
60
- # BP.new.read_fastq(input: "test.fq").mask_seq.dump.run
60
+ # BD.new.read_fastq(input: "test.fq").mask_seq.dump.run
61
61
  #
62
62
  # {:SEQ_NAME=>"HWI-EAS157_20FFGAAXX:2:1:888:434",
63
63
  # :SEQ=>"ttggtcgctcgctccgcgacCTCAGATCAGACGTGGGCGAT",
@@ -66,7 +66,7 @@ module BioDSL
66
66
  #
67
67
  # Using the +quality_min+ option we can change the cutoff:
68
68
  #
69
- # BP.new.read_fastq(input: "test.fq").mask_seq(quality_min: 25).dump.run
69
+ # BD.new.read_fastq(input: "test.fq").mask_seq(quality_min: 25).dump.run
70
70
  #
71
71
  # {:SEQ_NAME=>"HWI-EAS157_20FFGAAXX:2:1:888:434",
72
72
  # :SEQ=>"ttggtcgctcgctccgcgacctcagATCAGACGTGGGCGAT",
@@ -75,7 +75,7 @@ module BioDSL
75
75
  #
76
76
  # Using the +mask+ option for hard masking:
77
77
  #
78
- # BP.new.read_fastq(input: "test.fq").mask_seq(mask: :hard).dump.run
78
+ # BD.new.read_fastq(input: "test.fq").mask_seq(mask: :hard).dump.run
79
79
  #
80
80
  # {:SEQ_NAME=>"HWI-EAS157_20FFGAAXX:2:1:888:434",
81
81
  # :SEQ=>"NNNNNNNNNNNNNNNNNNNNCTCAGATCAGACGTGGGCGAT",
@@ -66,7 +66,7 @@ module BioDSL
66
66
  #
67
67
  # To calculate the mean score do:
68
68
  #
69
- # BP.new.read_fastq(input: "test.fq").mean_scores.dump.run
69
+ # BD.new.read_fastq(input: "test.fq").mean_scores.dump.run
70
70
  #
71
71
  # {:SEQ_NAME=>"HWI-EAS157_20FFGAAXX:2:1:888:434",
72
72
  # :SEQ=>"TTGGTCGCTCGCTCGACCTCAGATCAGACGTGG",
@@ -76,7 +76,7 @@ module BioDSL
76
76
  #
77
77
  # To calculate local means for a sliding window, do:
78
78
  #
79
- # BP.new.read_fastq(input: "test.fq").mean_scores(local: true).dump.run
79
+ # BD.new.read_fastq(input: "test.fq").mean_scores(local: true).dump.run
80
80
  #
81
81
  # {:SEQ_NAME=>"HWI-EAS157_20FFGAAXX:2:1:888:434",
82
82
  # :SEQ=>"TTGGTCGCTCGCTCGACCTCAGATCAGACGTGG",
@@ -70,7 +70,7 @@ module BioDSL
70
70
  #
71
71
  # To merge these interleaved pair-end sequences use merge_pair_seq:
72
72
  #
73
- # BP.new.
73
+ # BD.new.
74
74
  # read_fastq(input: "test.fq", encoding: :base_33).
75
75
  # merge_pair_seq.
76
76
  # dump.
@@ -76,7 +76,7 @@ module BioDSL
76
76
  #
77
77
  # We can merge the data with +merge_table+ like this:
78
78
  #
79
- # BP.new.
79
+ # BD.new.
80
80
  # read_table(input: "test1.tab").
81
81
  # merge_table(input: "test2.tab", key: :ID).
82
82
  # dump.
@@ -64,7 +64,7 @@ module BioDSL
64
64
  #
65
65
  # Here we plot a heatmap of data a table:
66
66
  #
67
- # BP.new.read_table(input: "test.tab").plot_heatmap.run
67
+ # BD.new.read_table(input: "test.tab").plot_heatmap.run
68
68
  #
69
69
  # rubocop:disable ClassLength
70
70
  class PlotHeatmap
@@ -68,7 +68,7 @@ module BioDSL
68
68
  # Here we plot two matches from a table. The vector records are shown in the
69
69
  # +dump+ output:
70
70
  #
71
- # BP.new.read_table(input: "test.tab").dump.plot_matches.run
71
+ # BD.new.read_table(input: "test.tab").dump.plot_matches.run
72
72
  #
73
73
  # {:Q_BEG=>0, :Q_END=>10, :S_BEG=>0, :S_END=>10, :STRAND=>"+"}
74
74
  # {:Q_BEG=>0, :Q_END=>10, :S_BEG=>0, :S_END=>10, :STRAND=>"-"}
@@ -65,7 +65,7 @@ module BioDSL
65
65
  #
66
66
  # Here we plot a residue distribution of a FASTA file:
67
67
  #
68
- # BP.new.read_fasta(input: "test.fna").plot_residue_distribution.run
68
+ # BD.new.read_fasta(input: "test.fna").plot_residue_distribution.run
69
69
  #
70
70
  # rubocop: disable ClassLength
71
71
  class PlotResidueDistribution
@@ -47,7 +47,7 @@ module BioDSL
47
47
  #
48
48
  # To pick some random records from the stream do:
49
49
  #
50
- # BP.new.
50
+ # BD.new.
51
51
  # read_fasta(input: "in.fna").
52
52
  # random(number: 10_000).
53
53
  # write_fasta(output: "out.fna").
@@ -64,39 +64,39 @@ module BioDSL
64
64
  #
65
65
  # To read all FASTQ entries from a file:
66
66
  #
67
- # BP.new.read_fastq(input: "test.fq").dump.run
67
+ # BD.new.read_fastq(input: "test.fq").dump.run
68
68
  #
69
69
  # To read all FASTQ entries from a gzipped file:
70
70
  #
71
- # BP.new.read_fastq(input: "test.fq.gz").dump.run
71
+ # BD.new.read_fastq(input: "test.fq.gz").dump.run
72
72
  #
73
73
  # To read in only 10 records from a FASTQ file:
74
74
  #
75
- # BP.new.read_fastq(input: "test.fq", first: 10).dump.run
75
+ # BD.new.read_fastq(input: "test.fq", first: 10).dump.run
76
76
  #
77
77
  # To read in the last 10 records from a FASTQ file:
78
78
  #
79
- # BP.new.read_fastq(input: "test.fq", last: 10).dump.run
79
+ # BD.new.read_fastq(input: "test.fq", last: 10).dump.run
80
80
  #
81
81
  # To read all FASTQ entries from multiple files:
82
82
  #
83
- # BP.new.read_fastq(input: "test1.fq,test2.fq").dump.run
83
+ # BD.new.read_fastq(input: "test1.fq,test2.fq").dump.run
84
84
  #
85
85
  # To read FASTQ entries from multiple files using a glob expression:
86
86
  #
87
- # BP.new.read_fastq(input: "*.fq").dump.run
87
+ # BD.new.read_fastq(input: "*.fq").dump.run
88
88
  #
89
89
  # To read FASTQ entries from pair-end data:
90
90
  #
91
- # BP.new.read_fastq(input: "file1.fq", input2: "file2.fq").dump.run
91
+ # BD.new.read_fastq(input: "file1.fq", input2: "file2.fq").dump.run
92
92
  #
93
93
  # To read FASTQ entries from pair-end data:
94
94
  #
95
- # BP.new.read_fastq(input: "file1.fq", input2: "file2.fq").dump.run
95
+ # BD.new.read_fastq(input: "file1.fq", input2: "file2.fq").dump.run
96
96
  #
97
97
  # To read FASTQ entries from pair-end data and reverse-complement read2:
98
98
  #
99
- # BP.new.
99
+ # BD.new.
100
100
  # read_fastq(input: "file1.fq", input2: "file2.fq",
101
101
  # reverse_complement: true)
102
102
  # .dump.run
@@ -93,7 +93,7 @@ module BioDSL
93
93
  # where the keys Organism, Sequence and Count are taken from the comment
94
94
  # line prefixe with #:
95
95
  #
96
- # BP.new.read_tab(input: "test.tab").dump.run
96
+ # BD.new.read_tab(input: "test.tab").dump.run
97
97
  #
98
98
  # {:Organism=>"Human", :Sequence=>"ATACGTCAG", :Count=>23524}
99
99
  # {:Organism=>"Dog", :Sequence=>"AGCATGAC", :Count=>2442}
@@ -103,7 +103,7 @@ module BioDSL
103
103
  # However, if the first line is skipped using the +skip+ option the keys
104
104
  # will default to V0, V1, V2 ... Vn:
105
105
  #
106
- # BP.new.read_table(input: "test.tab", skip: 1).dump.run
106
+ # BD.new.read_table(input: "test.tab", skip: 1).dump.run
107
107
  #
108
108
  # {:V0=>"Human", :V1=>"ATACGTCAG", :V2=>23524}
109
109
  # {:V0=>"Dog", :V1=>"AGCATGAC", :V2=>2442}
@@ -112,7 +112,7 @@ module BioDSL
112
112
  #
113
113
  # To explicitly name the columns (or the keys) use the +keys+ option:
114
114
  #
115
- # BP.new.
115
+ # BD.new.
116
116
  # read_table(input: "test.tab", skip: 1, keys: [:ORGANISM, :SEQ, :COUNT]).
117
117
  # dump.
118
118
  # run
@@ -128,7 +128,7 @@ module BioDSL
128
128
  # argument. So to read in only the sequence and the count so that the
129
129
  # count comes before the sequence do:
130
130
  #
131
- # BP.new.read_table(input: "test.tab", skip: 1, select: [2, 1]).dump.run
131
+ # BD.new.read_table(input: "test.tab", skip: 1, select: [2, 1]).dump.run
132
132
  #
133
133
  # {:V0=>23524, :V1=>"ATACGTCAG"}
134
134
  # {:V0=>2442, :V1=>"AGCATGAC"}
@@ -141,7 +141,7 @@ module BioDSL
141
141
  #
142
142
  # Then the header keys can be used:
143
143
  #
144
- # BP.new.
144
+ # BD.new.
145
145
  # read_table(input: "test.tab", skip: 1, select: [:Count, :Sequence]).
146
146
  # dump.
147
147
  # run
@@ -154,7 +154,7 @@ module BioDSL
154
154
  # Likewise, it is possible to reject specified columns from being read
155
155
  # using the +reject+ option:
156
156
  #
157
- # BP.new.read_table(input: "test.tab", skip: 1, reject: [2, 1]).dump.run
157
+ # BD.new.read_table(input: "test.tab", skip: 1, reject: [2, 1]).dump.run
158
158
  #
159
159
  # {:V0=>"Human"}
160
160
  # {:V0=>"Dog"}
@@ -163,7 +163,7 @@ module BioDSL
163
163
  #
164
164
  # And again, the header keys can be used if a header is present:
165
165
  #
166
- # BP.new.
166
+ # BD.new.
167
167
  # read_table(input: "test.tab", skip: 1, reject: [:Count, :Sequence]).
168
168
  # dump.
169
169
  # run
@@ -51,7 +51,7 @@ module BioDSL
51
51
  #
52
52
  # To reverse the sequence simply do:
53
53
  #
54
- # BP.new.read_fastq(input:"test.fq").reverse_seq.dump.run
54
+ # BD.new.read_fastq(input:"test.fq").reverse_seq.dump.run
55
55
  #
56
56
  # {:SEQ_NAME=>"M02529:88:000000000-AC0WY:1:1101:12879:1928 2:N:0:185",
57
57
  # :SEQ=>"GTGACCGGCAGCAAAATGTT",
@@ -92,7 +92,7 @@ module BioDSL
92
92
  #
93
93
  # We can slice the alignment with +slice_align+ using a range:
94
94
  #
95
- # BP.new.
95
+ # BD.new.
96
96
  # read_fasta(input: "test.fna").
97
97
  # slice_align(slice: 14 .. 27).
98
98
  # dump.
@@ -107,7 +107,7 @@ module BioDSL
107
107
  #
108
108
  # Or we could slice the alignment using a set of primers:
109
109
  #
110
- # BP.new.
110
+ # BD.new.
111
111
  # read_fasta(input: "test.fna").
112
112
  # slice_align(forward: "CGCATACG", reverse: "GAGGGG", max_mismatches: 0,
113
113
  # max_insertions: 0, max_deletions: 0).
@@ -128,7 +128,7 @@ module BioDSL
128
128
  # and spefifying primers these will be matched to the template and the hit
129
129
  # positions used for slicing:
130
130
  #
131
- # BP.new.
131
+ # BD.new.
132
132
  # read_fasta(input: "test.fna").
133
133
  # slice_align(template_file: "template.fna", forward: "GAATACG",
134
134
  # reverse: "ATTCGAT", max_mismatches: 0, max_insertions: 0,
@@ -147,7 +147,7 @@ module BioDSL
147
147
  # is useful if you are slicing 16S rRNA alignments and want the _E.coli_
148
148
  # corresponding positions - simply use the _E.coli_ sequence as template.
149
149
  #
150
- # BP.new.
150
+ # BD.new.
151
151
  # read_fasta(input: "test.fna").
152
152
  # slice_align(template_file: "template.fna", slice: 4 .. 14).
153
153
  # dump.run
@@ -55,7 +55,7 @@ module BioDSL
55
55
  #
56
56
  # To slice the second residue from the beginning do:
57
57
  #
58
- # BP.new.read_fastq(input: "test.fq").slice_seq(slice: 2).dump.run
58
+ # BD.new.read_fastq(input: "test.fq").slice_seq(slice: 2).dump.run
59
59
  #
60
60
  # {:SEQ_NAME=>"HWI-EAS157_20FFGAAXX:2:1:888:434",
61
61
  # :SEQ=>"G",
@@ -64,7 +64,7 @@ module BioDSL
64
64
  #
65
65
  # To slice the last residue do:
66
66
  #
67
- # BP.new.read_fastq(input: "test.fq").slice_seq(slice: -1).dump.run
67
+ # BD.new.read_fastq(input: "test.fq").slice_seq(slice: -1).dump.run
68
68
  #
69
69
  # {:SEQ_NAME=>"HWI-EAS157_20FFGAAXX:2:1:888:434",
70
70
  # :SEQ=>"T",
@@ -73,7 +73,7 @@ module BioDSL
73
73
  #
74
74
  # To slice the first 5 residues do:
75
75
  #
76
- # BP.new.read_fastq(input: "test.fq").slice_seq(slice: 0 ... 5).dump.run
76
+ # BD.new.read_fastq(input: "test.fq").slice_seq(slice: 0 ... 5).dump.run
77
77
  #
78
78
  # {:SEQ_NAME=>"HWI-EAS157_20FFGAAXX:2:1:888:434",
79
79
  # :SEQ=>"TTGGT",
@@ -82,7 +82,7 @@ module BioDSL
82
82
  #
83
83
  # To slice the last 5 residues do:
84
84
  #
85
- # BP.new.read_fastq(input: "test.fq").slice_seq(slice: -5 .. -1).dump.run
85
+ # BD.new.read_fastq(input: "test.fq").slice_seq(slice: -5 .. -1).dump.run
86
86
  #
87
87
  # {:SEQ_NAME=>"HWI-EAS157_20FFGAAXX:2:1:888:434",
88
88
  # :SEQ=>"GCGAT",
@@ -53,7 +53,7 @@ module BioDSL
53
53
  #
54
54
  # To sort this accoring to COUNT in descending order do:
55
55
  #
56
- # BP.new.read_table(input: "test.tab").sort(key: :COUNT).dump.run
56
+ # BD.new.read_table(input: "test.tab").sort(key: :COUNT).dump.run
57
57
  #
58
58
  # {:COUNT=>1, :ORGANISM=>"Eel"}
59
59
  # {:COUNT=>3, :ORGANISM=>"Cat"}
@@ -61,7 +61,7 @@ module BioDSL
61
61
  #
62
62
  # And in ascending order:
63
63
  #
64
- # BP.new.
64
+ # BD.new.
65
65
  # read_table(input: "test.tab").
66
66
  # sort(key: :COUNT, reverse: true).
67
67
  # dump.
@@ -73,7 +73,7 @@ module BioDSL
73
73
  #
74
74
  # The type of value determines the sorting, alphabetical order:
75
75
  #
76
- # BP.new.read_table(input: "test.tab").sort(key: :ORGANISM).dump.run
76
+ # BD.new.read_table(input: "test.tab").sort(key: :ORGANISM).dump.run
77
77
  #
78
78
  # {:COUNT=>3, :ORGANISM=>"Cat"}
79
79
  # {:COUNT=>4, :ORGANISM=>"Dog"}
@@ -81,7 +81,7 @@ module BioDSL
81
81
  #
82
82
  # And reverse alphabetic order:
83
83
  #
84
- # BP.new.
84
+ # BD.new.
85
85
  # read_table(input: "test.tab").
86
86
  # sort(key: :ORGANISM, reverse: true).
87
87
  # dump.
@@ -65,7 +65,7 @@ module BioDSL
65
65
  #
66
66
  # These can be split using split_pair_seq:
67
67
  #
68
- # BP.new.
68
+ # BD.new.
69
69
  # read_fastq(input: "test.fq", encoding: :base_33).
70
70
  # merge_pair_seq.
71
71
  # split_pair_seq.
@@ -82,7 +82,7 @@ module BioDSL
82
82
  #
83
83
  # The forward end can be trimmed like this:
84
84
  #
85
- # BP.new.
85
+ # BD.new.
86
86
  # read_fasta(input: "test.fna").
87
87
  # trim_primer(primer: "ATAGAACTGAC", direction: :forward).
88
88
  # dump.
@@ -98,7 +98,7 @@ module BioDSL
98
98
  #
99
99
  # And trimming a reverse primer:
100
100
  #
101
- # BP.new.
101
+ # BD.new.
102
102
  # read_fasta(input: "test.fna").
103
103
  # trim_primer(primer: "ACTACGTGCGGAT", direction: :reverse).
104
104
  # dump.
@@ -58,7 +58,7 @@ module BioDSL
58
58
  #
59
59
  # To trim both ends simply do:
60
60
  #
61
- # BP.new.read_fastq(input: "test.fq").trim_seq.trim_seq.run
61
+ # BD.new.read_fastq(input: "test.fq").trim_seq.trim_seq.run
62
62
  #
63
63
  # SEQ_NAME: test
64
64
  # SEQ: tctgacgtatcgatcgttgattagttgctagctatgcagtctacgacgagcat
@@ -68,7 +68,7 @@ module BioDSL
68
68
  #
69
69
  # Use the +quality_min+ option to change the minimum value to discard:
70
70
  #
71
- # BP.new.
71
+ # BD.new.
72
72
  # read_fastq(input: "test.fq").
73
73
  # trim_seq(quality_min: 25).
74
74
  # trim_seq.
@@ -82,7 +82,7 @@ module BioDSL
82
82
  #
83
83
  # To trim the left end only (use :rigth for right end only), do:
84
84
  #
85
- # BP.new.read_fastq(input: "test.fq").trim_seq(mode: :left).trim_seq.run
85
+ # BD.new.read_fastq(input: "test.fq").trim_seq(mode: :left).trim_seq.run
86
86
  #
87
87
  # SEQ_NAME: test
88
88
  # SEQ: tctgacgtatcgatcgttgattagttgctagctatgcagtctacgacgagcatgctagctag
@@ -93,7 +93,7 @@ module BioDSL
93
93
  # To increase the length of stretch of good quality residues to match, use
94
94
  # the +length_min+ option:
95
95
  #
96
- # BP.new.read_fastq(input: "test.fq").trim_seq(length_min: 4).trim_seq.run
96
+ # BD.new.read_fastq(input: "test.fq").trim_seq(length_min: 4).trim_seq.run
97
97
  #
98
98
  # SEQ_NAME: test
99
99
  # SEQ: tctgacgtatcgatcgttgattagttgctagctatgcagtct
@@ -56,7 +56,7 @@ module BioDSL
56
56
  # To output only unique values for the first column we first read the table
57
57
  # with +read_table+ and then pass the result to +unique_values+:
58
58
  #
59
- # BP.new.read_table(input: "test.tab").unique_values(key: :V0).dump.run
59
+ # BD.new.read_table(input: "test.tab").unique_values(key: :V0).dump.run
60
60
  #
61
61
  # {:V0=>"Human", :V1=>"H1"}
62
62
  # {:V0=>"Dog", :V1=>"D1"}
@@ -64,7 +64,7 @@ module BioDSL
64
64
  #
65
65
  # To output duplicate records instead use the +invert+ options:
66
66
  #
67
- # BP.new.
67
+ # BD.new.
68
68
  # read_table(input: "test.tab").
69
69
  # unique_values(key: :V0, invert: true).
70
70
  # dump.
@@ -50,7 +50,7 @@ module BioDSL
50
50
  #
51
51
  # To create a tree from aligned FASTA sequences in the file `align.fna` do:
52
52
  #
53
- # BP.new.
53
+ # BD.new.
54
54
  # read_fasta(input: "align.fna").
55
55
  # write_tree(output: "align.tree").
56
56
  # run
@@ -139,7 +139,7 @@ module BioDSL
139
139
 
140
140
  # Format a Pipeline to a pretty string which is returned.
141
141
  def to_s
142
- command_strings = %w(BP new)
142
+ command_strings = %w(BD new)
143
143
 
144
144
  @commands.each { |command| command_strings << command.to_s }
145
145
 
@@ -317,7 +317,7 @@ module BioDSL
317
317
  # @option options [Booleon] :debug Debug flag.
318
318
  # @option options [Booleon] :verbose Verbose flag.
319
319
  def prime_variables(options)
320
- BioDSL.test = ENV['BP_TEST']
320
+ BioDSL.test = ENV['BD_TEST']
321
321
  BioDSL.debug = options[:debug]
322
322
  BioDSL.verbose = options[:verbose]
323
323
  end
@@ -27,5 +27,5 @@
27
27
 
28
28
  # Namespace for BioDSL.
29
29
  module BioDSL
30
- VERSION = '1.0.0'
30
+ VERSION = '1.0.1'
31
31
  end
data/lib/BioDSL.rb CHANGED
@@ -78,4 +78,4 @@ module BioDSL
78
78
  require 'BioDSL/verbose'
79
79
  end
80
80
 
81
- BP = BioDSL::Pipeline # Module alias for irb short hand
81
+ BD = BioDSL::Pipeline # Module alias for irb short hand
@@ -47,7 +47,7 @@ class TestAlignSeqMothur < Test::Unit::TestCase
47
47
  @output.write(SEQ_NAME: 'test', SEQ: 'gattccgatcgatcgatcga')
48
48
  @output.close
49
49
 
50
- @p = BP.new
50
+ @p = BD.new
51
51
  end
52
52
 
53
53
  def write_template
@@ -49,7 +49,7 @@ class TestAnalyzeResidueDistribution < Test::Unit::TestCase
49
49
 
50
50
  @output.close
51
51
 
52
- @p = BP.new
52
+ @p = BD.new
53
53
  end
54
54
 
55
55
  def teardown
@@ -33,7 +33,7 @@ require 'test/helper'
33
33
  # Test class for ClassifySeq.
34
34
  class TestClassifySeq < Test::Unit::TestCase
35
35
  def setup
36
- @p = BP.new
36
+ @p = BD.new
37
37
  end
38
38
 
39
39
  test 'BioDSL::Pipeline#classify_seq with disallowed option raises' do
@@ -35,7 +35,7 @@ class TestClassifySeqMothur < Test::Unit::TestCase
35
35
  def setup
36
36
  omit('mothur not found') unless BioDSL::Filesys.which('mothur')
37
37
 
38
- @p = BP.new
38
+ @p = BD.new
39
39
  @database = __FILE__
40
40
  @taxonomy = __FILE__
41
41
  end
@@ -47,7 +47,7 @@ class TestCollapseOtus < Test::Unit::TestCase
47
47
 
48
48
  @output.close
49
49
 
50
- @p = BP.new
50
+ @p = BD.new
51
51
  end
52
52
 
53
53
  test 'BioDSL::Pipeline::Count with invalid options raises' do
@@ -110,7 +110,7 @@ class TestGrab < Test::Unit::TestCase
110
110
 
111
111
  test 'BioDSL::Pipeline::Grab#to_s with select and symbol key return OK' do
112
112
  @p.grab(select: :SEQ_NAME)
113
- expected = 'BP.new.grab(select: :SEQ_NAME)'
113
+ expected = 'BD.new.grab(select: :SEQ_NAME)'
114
114
  assert_equal(expected, @p.to_s)
115
115
  end
116
116
 
@@ -197,7 +197,7 @@ class TestReadFasta < Test::Unit::TestCase
197
197
  test 'BioDSL::Pipeline::ReadFasta#to_s with :first returns correctly' do
198
198
  @p.read_fasta(input: @file, first: 3)
199
199
 
200
- expected = %{BP.new.read_fasta(input: "#{@file}", first: 3)}
200
+ expected = %{BD.new.read_fasta(input: "#{@file}", first: 3)}
201
201
 
202
202
  assert_equal(expected, @p.to_s)
203
203
  end
@@ -377,7 +377,7 @@ class TestReadFastq < Test::Unit::TestCase
377
377
  test 'BioDSL::Pipeline::ReadFastq#to_s with :first returns correctly' do
378
378
  @p.read_fastq(input: @file, first: 3)
379
379
 
380
- expected = %{BP.new.read_fastq(input: "#{@file}", first: 3)}
380
+ expected = %{BD.new.read_fastq(input: "#{@file}", first: 3)}
381
381
 
382
382
  assert_equal(expected, @p.to_s)
383
383
  end
@@ -295,7 +295,7 @@ class TestReadTable < Test::Unit::TestCase
295
295
  test 'BioDSL::Pipeline::ReadTable#to_s with :first returns correctly' do
296
296
  @p.read_table(input: @file, first: 3)
297
297
 
298
- expected = %{BP.new.read_table(input: "#{@file}", first: 3)}
298
+ expected = %{BD.new.read_table(input: "#{@file}", first: 3)}
299
299
 
300
300
  assert_equal(expected, @p.to_s)
301
301
  end
@@ -44,7 +44,7 @@ class PipelineTest < Test::Unit::TestCase
44
44
  delivery_method :test
45
45
  end
46
46
 
47
- @p = BP.new
47
+ @p = BD.new
48
48
  end
49
49
 
50
50
  def setup_fasta_files
@@ -69,27 +69,27 @@ class PipelineTest < Test::Unit::TestCase
69
69
 
70
70
  test 'BioDSL::Pipeline#to_s w/o options and w/o .run() returns OK' do
71
71
  @p.commands << BioDSL::Command.new('dump', nil, {})
72
- expected = %(BP.new.dump)
72
+ expected = %(BD.new.dump)
73
73
  assert_equal(expected, @p.to_s)
74
74
  end
75
75
 
76
76
  test 'BioDSL::Pipeline#to_s with options and w/o .run() returns OK' do
77
77
  @p.commands << BioDSL::Command.new('read_fasta', nil, input: 'test.fna')
78
- expected = %(BP.new.read_fasta(input: "test.fna"))
78
+ expected = %(BD.new.read_fasta(input: "test.fna"))
79
79
  assert_equal(expected, @p.to_s)
80
80
  end
81
81
 
82
82
  test 'BioDSL::Pipeline#to_s w/o options and .run() returns OK' do
83
83
  @p.commands << BioDSL::Command.new('dump', nil, {})
84
84
  @p.complete = true
85
- expected = %(BP.new.dump.run)
85
+ expected = %(BD.new.dump.run)
86
86
  assert_equal(expected, @p.run.to_s)
87
87
  end
88
88
 
89
89
  test 'BioDSL::Pipeline#to_s with options and .run() returns OK' do
90
90
  @p.commands << BioDSL::Command.new('read_fasta', nil, input: 'test.fna')
91
91
  @p.complete = true
92
- expected = %{BP.new.read_fasta(input: "test.fna").run}
92
+ expected = %{BD.new.read_fasta(input: "test.fna").run}
93
93
  assert_equal(expected, @p.run.to_s)
94
94
  end
95
95
 
@@ -113,13 +113,13 @@ class PipelineTest < Test::Unit::TestCase
113
113
 
114
114
  test 'BioDSL::Pipeline#+ of two Pipelines return correctly' do
115
115
  p = BioDSL::Pipeline.new.dump(first: 2)
116
- assert_equal('BP.new.dump(first: 2)', (@p + p).to_s)
116
+ assert_equal('BD.new.dump(first: 2)', (@p + p).to_s)
117
117
  end
118
118
 
119
119
  test 'BioDSL::Pipeline#+ of three Pipelines return correctly' do
120
120
  p1 = BioDSL::Pipeline.new.dump(first: 2)
121
121
  p2 = BioDSL::Pipeline.new.dump(last: 3)
122
- assert_equal('BP.new.dump(first: 2).dump(last: 3)', (@p + p1 + p2).to_s)
122
+ assert_equal('BD.new.dump(first: 2).dump(last: 3)', (@p + p1 + p2).to_s)
123
123
  end
124
124
 
125
125
  test 'BioDSL::Pipeline#pop decreases size' do
data/test/helper.rb CHANGED
@@ -41,7 +41,7 @@ require 'BioDSL'
41
41
  require 'test/unit'
42
42
  require 'mocha/test_unit'
43
43
 
44
- ENV['BP_TEST'] = "true"
44
+ ENV['BD_TEST'] = "true"
45
45
 
46
46
  module Kernel
47
47
  def capture_stdout
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: BioDSL
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Martin A. Hansen
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-09-30 00:00:00.000000000 Z
11
+ date: 2015-11-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: haml
@@ -413,7 +413,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
413
413
  version: '0'
414
414
  requirements: []
415
415
  rubyforge_project: BioDSL
416
- rubygems_version: 2.4.5.1
416
+ rubygems_version: 2.4.8
417
417
  signing_key:
418
418
  specification_version: 4
419
419
  summary: BioDSL