BioDSL 1.0.0 → 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +66 -66
  3. data/examples/fastq_to_fasta.rb +1 -1
  4. data/lib/BioDSL/commands/align_seq_mothur.rb +1 -1
  5. data/lib/BioDSL/commands/analyze_residue_distribution.rb +2 -2
  6. data/lib/BioDSL/commands/assemble_pairs.rb +1 -1
  7. data/lib/BioDSL/commands/assemble_seq_idba.rb +1 -1
  8. data/lib/BioDSL/commands/assemble_seq_ray.rb +1 -1
  9. data/lib/BioDSL/commands/assemble_seq_spades.rb +2 -2
  10. data/lib/BioDSL/commands/classify_seq.rb +1 -1
  11. data/lib/BioDSL/commands/classify_seq_mothur.rb +1 -1
  12. data/lib/BioDSL/commands/clip_primer.rb +2 -2
  13. data/lib/BioDSL/commands/cluster_otus.rb +1 -1
  14. data/lib/BioDSL/commands/collapse_otus.rb +2 -2
  15. data/lib/BioDSL/commands/complement_seq.rb +1 -1
  16. data/lib/BioDSL/commands/count.rb +1 -1
  17. data/lib/BioDSL/commands/count_values.rb +1 -1
  18. data/lib/BioDSL/commands/degap_seq.rb +2 -2
  19. data/lib/BioDSL/commands/dereplicate_seq.rb +1 -1
  20. data/lib/BioDSL/commands/filter_rrna.rb +1 -1
  21. data/lib/BioDSL/commands/genecall.rb +2 -2
  22. data/lib/BioDSL/commands/index_taxonomy.rb +1 -1
  23. data/lib/BioDSL/commands/mask_seq.rb +3 -3
  24. data/lib/BioDSL/commands/mean_scores.rb +2 -2
  25. data/lib/BioDSL/commands/merge_pair_seq.rb +1 -1
  26. data/lib/BioDSL/commands/merge_table.rb +1 -1
  27. data/lib/BioDSL/commands/plot_heatmap.rb +1 -1
  28. data/lib/BioDSL/commands/plot_matches.rb +1 -1
  29. data/lib/BioDSL/commands/plot_residue_distribution.rb +1 -1
  30. data/lib/BioDSL/commands/random.rb +1 -1
  31. data/lib/BioDSL/commands/read_fastq.rb +9 -9
  32. data/lib/BioDSL/commands/read_table.rb +7 -7
  33. data/lib/BioDSL/commands/reverse_seq.rb +1 -1
  34. data/lib/BioDSL/commands/slice_align.rb +4 -4
  35. data/lib/BioDSL/commands/slice_seq.rb +4 -4
  36. data/lib/BioDSL/commands/sort.rb +4 -4
  37. data/lib/BioDSL/commands/split_pair_seq.rb +1 -1
  38. data/lib/BioDSL/commands/trim_primer.rb +2 -2
  39. data/lib/BioDSL/commands/trim_seq.rb +4 -4
  40. data/lib/BioDSL/commands/unique_values.rb +2 -2
  41. data/lib/BioDSL/commands/write_tree.rb +1 -1
  42. data/lib/BioDSL/pipeline.rb +2 -2
  43. data/lib/BioDSL/version.rb +1 -1
  44. data/lib/BioDSL.rb +1 -1
  45. data/test/BioDSL/commands/test_align_seq_mothur.rb +1 -1
  46. data/test/BioDSL/commands/test_analyze_residue_distribution.rb +1 -1
  47. data/test/BioDSL/commands/test_classify_seq.rb +1 -1
  48. data/test/BioDSL/commands/test_classify_seq_mothur.rb +1 -1
  49. data/test/BioDSL/commands/test_collapse_otus.rb +1 -1
  50. data/test/BioDSL/commands/test_grab.rb +1 -1
  51. data/test/BioDSL/commands/test_read_fasta.rb +1 -1
  52. data/test/BioDSL/commands/test_read_fastq.rb +1 -1
  53. data/test/BioDSL/commands/test_read_table.rb +1 -1
  54. data/test/BioDSL/test_pipeline.rb +7 -7
  55. data/test/helper.rb +1 -1
  56. metadata +3 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 62ee1a33fd4240d69a9947883c1a2616c080c55b
4
- data.tar.gz: bfceb11bc1375355e5b61eaf3ae31dd3459f6572
3
+ metadata.gz: b828e339f7d9337acdaf88a4206cb4cf15a6778c
4
+ data.tar.gz: 6c130d98ba2e9ca1c1bdf6a044bbe7d6e2c6f309
5
5
  SHA512:
6
- metadata.gz: f9c8bd7cae663da3110438b209b55ac92301c0de4fe5800dbf2c0d322c6089a17ebe4018a27a2ffaa00f615cb9ac6b89e15c4eaeba73bffd659c423b28ba5813
7
- data.tar.gz: 5d1ffa6e7e16bc836774a42e79ebd0fcf9091264349a781106501178d1572c9d0176a892f8c07e27d1885d69a12831152f307147b9b098e45ed80179e0316588
6
+ metadata.gz: 8f1fcfd7080a7487fd1a75152c4da3ce7328e86e19843181a93c30d1bb94a2f8f78a065cd208002324df2e0bbbbfbde5576a6157ece8c4c6c4878a6311e0074e
7
+ data.tar.gz: 31b549d1294e2be25897d824ab019154dfa570305dd1f79b041ae641b8208d2fa00e97ac36e4bdfe8a2dacd46d7cc8da9db4ddb5d75f4c9f3a3e6997aa4e0ae0
data/README.md CHANGED
@@ -15,7 +15,7 @@ A test script:
15
15
 
16
16
  require 'BioDSL'
17
17
 
18
- p = BP.new.
18
+ p = BD.new.
19
19
  read_fasta(input: "input.fna").
20
20
  grab(select: "ATC$", keys: :SEQ).
21
21
  write_fasta(output: "output.fna").
@@ -29,24 +29,24 @@ adding the following to your `~/.bashrc` file:
29
29
  And then start the interactive shell:
30
30
 
31
31
  $ ibp
32
- irb(main):001:0> p = BP.new
33
- => BP.new
32
+ irb(main):001:0> p = BD.new
33
+ => BD.new
34
34
  irb(main):002:0> p.read_fasta(input: "input.fna")
35
- => BP.new.read_fasta(input: "input.fna")
35
+ => BD.new.read_fasta(input: "input.fna")
36
36
  irb(main):003:0> p.grab(select: "ATC$", keys: :SEQ)
37
- => BP.new.read_fasta(input: "input.fna").grab(select: "ATC$", keys: :SEQ)
37
+ => BD.new.read_fasta(input: "input.fna").grab(select: "ATC$", keys: :SEQ)
38
38
  irb(main):004:0> p.write_fasta(output: "output.fna")
39
- => BP.new.read_fasta(input: "input.fna").grab(select: "ATC$", keys: :SEQ).write_fasta(output: "output.fna")
39
+ => BD.new.read_fasta(input: "input.fna").grab(select: "ATC$", keys: :SEQ).write_fasta(output: "output.fna")
40
40
  irb(main):005:0> p.run(progress: true)
41
- => BP.new.read_fasta(input: "input.fna").grab(select: "ATC$", keys: :SEQ).write_fasta(output: "output.fna").run(progress: true)
41
+ => BD.new.read_fasta(input: "input.fna").grab(select: "ATC$", keys: :SEQ).write_fasta(output: "output.fna").run(progress: true)
42
42
  irb(main):006:0>
43
43
 
44
44
 
45
45
  Or chaining commands directly:
46
46
 
47
47
  $ ibp
48
- irb(main):001:0> BP.new.read_fasta(input: "input.fna").grab(select: "ATC$", keys: :SEQ).write_fasta(output: "output.fna").run(progress: true)
49
- => BP.new.read_fasta(input: "input.fna").grab(select: "ATC$", keys: :SEQ).write_fasta(output: "output.fna").run(progress: true)
48
+ irb(main):001:0> BD.new.read_fasta(input: "input.fna").grab(select: "ATC$", keys: :SEQ).write_fasta(output: "output.fna").run(progress: true)
49
+ => BD.new.read_fasta(input: "input.fna").grab(select: "ATC$", keys: :SEQ).write_fasta(output: "output.fna").run(progress: true)
50
50
  irb(main):002:0>
51
51
 
52
52
  Or run on the command line with the alias bp which you can create by adding the
@@ -56,61 +56,61 @@ following to your ~/.bashrc file:
56
56
 
57
57
  Then you can run the below from the command line:
58
58
 
59
- $ bp -e 'BP.new.read_fasta(input: "input.fna").grab(select: "ATC$", keys: :SEQ).write_fasta(output: "output.fna").run(progress: true)'
59
+ $ bp -e 'BD.new.read_fasta(input: "input.fna").grab(select: "ATC$", keys: :SEQ).write_fasta(output: "output.fna").run(progress: true)'
60
60
 
61
61
  Available BioDSL
62
62
  -------------------
63
63
 
64
- * [add_key] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/AddKey)
65
- * [align_seq_mothur] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/AlignSeqMothur)
66
- * [analyze_residue_distribution] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/AnalyzeResidueDistribution)
67
- * [assemble_pairs] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/AssemblePairs)
68
- * [assemble_seq_idba] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/AssembleSeqIdba)
69
- * [assemble_seq_ray] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/AssembleSeqRay)
70
- * [assemble_seq_spades] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/AssembleSeqSpades)
71
- * [classify_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/ClassifySeq)
72
- * [classify_seq_mothur] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/ClassifySeqMothur)
73
- * [clip_primer] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/ClipPrimer)
74
- * [cluster_otus] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/ClusterOtus)
75
- * [collapse_otus] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/CollapseOtus)
76
- * [collect_otus] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/CollectOtus)
77
- * [complement_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/ComplementSeq)
78
- * [count] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/Count)
79
- * [degap_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/DegapSeq)
80
- * [dereplicate_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/DereplicateSeq)
81
- * [dump] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/Dump)
82
- * [filter_rrna] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/FilterRrna)
83
- * [genecall] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/Genecall)
84
- * [grab] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/Grab)
85
- * [index_taxonomy] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/IndexTaxonomy)
86
- * [mean_scores] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/MeanScores)
87
- * [merge_pair_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/MergePairSeq)
88
- * [merge_table] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/MergeTable)
89
- * [merge_values] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/MergeValues)
90
- * [plot_heatmap] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/PlotHeatmap)
91
- * [plot_histogram] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/PlotHistogram)
92
- * [plot_matches] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/PlotMatches)
93
- * [plot_residue_distribution] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/PlotResidueDistribution)
94
- * [plot_scores] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/PlotScores)
95
- * [random] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/Random)
96
- * [read_fasta] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/ReadFasta)
97
- * [read_fastq] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/ReadFastq)
98
- * [read_table] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/ReadTable)
99
- * [reverse_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/ReverseSeq)
100
- * [slice_align] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/SliceAlign)
101
- * [slice_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/SliceSeq)
102
- * [sort] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/Sort)
103
- * [split_pair_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/SplitPairSeq)
104
- * [split_values] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/SplitValues)
105
- * [trim_primer] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/TrimPrimer)
106
- * [trim_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/TrimSeq)
107
- * [uchime_ref] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/UchimeRef)
108
- * [unique_values] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/UniqueValues)
109
- * [usearch_global] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/UsearchGlobal)
110
- * [write_fasta] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/WriteFasta)
111
- * [write_fastq] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/WriteFastq)
112
- * [write_table] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/WriteTable)
113
- * [write_tree] (http://www.rubydoc.info/gems/BioDSL/1.0.0/BioDSL/WriteTree)
64
+ * [add_key] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/AddKey)
65
+ * [align_seq_mothur] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/AlignSeqMothur)
66
+ * [analyze_residue_distribution] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/AnalyzeResidueDistribution)
67
+ * [assemble_pairs] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/AssemblePairs)
68
+ * [assemble_seq_idba] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/AssembleSeqIdba)
69
+ * [assemble_seq_ray] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/AssembleSeqRay)
70
+ * [assemble_seq_spades] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/AssembleSeqSpades)
71
+ * [classify_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/ClassifySeq)
72
+ * [classify_seq_mothur] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/ClassifySeqMothur)
73
+ * [clip_primer] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/ClipPrimer)
74
+ * [cluster_otus] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/ClusterOtus)
75
+ * [collapse_otus] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/CollapseOtus)
76
+ * [collect_otus] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/CollectOtus)
77
+ * [complement_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/ComplementSeq)
78
+ * [count] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/Count)
79
+ * [degap_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/DegapSeq)
80
+ * [dereplicate_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/DereplicateSeq)
81
+ * [dump] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/Dump)
82
+ * [filter_rrna] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/FilterRrna)
83
+ * [genecall] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/Genecall)
84
+ * [grab] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/Grab)
85
+ * [index_taxonomy] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/IndexTaxonomy)
86
+ * [mean_scores] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/MeanScores)
87
+ * [merge_pair_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/MergePairSeq)
88
+ * [merge_table] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/MergeTable)
89
+ * [merge_values] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/MergeValues)
90
+ * [plot_heatmap] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/PlotHeatmap)
91
+ * [plot_histogram] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/PlotHistogram)
92
+ * [plot_matches] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/PlotMatches)
93
+ * [plot_residue_distribution] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/PlotResidueDistribution)
94
+ * [plot_scores] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/PlotScores)
95
+ * [random] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/Random)
96
+ * [read_fasta] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/ReadFasta)
97
+ * [read_fastq] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/ReadFastq)
98
+ * [read_table] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/ReadTable)
99
+ * [reverse_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/ReverseSeq)
100
+ * [slice_align] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/SliceAlign)
101
+ * [slice_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/SliceSeq)
102
+ * [sort] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/Sort)
103
+ * [split_pair_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/SplitPairSeq)
104
+ * [split_values] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/SplitValues)
105
+ * [trim_primer] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/TrimPrimer)
106
+ * [trim_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/TrimSeq)
107
+ * [uchime_ref] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/UchimeRef)
108
+ * [unique_values] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/UniqueValues)
109
+ * [usearch_global] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/UsearchGlobal)
110
+ * [write_fasta] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/WriteFasta)
111
+ * [write_fastq] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/WriteFastq)
112
+ * [write_table] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/WriteTable)
113
+ * [write_tree] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/WriteTree)
114
114
 
115
115
  Log and History
116
116
  ---------------
@@ -127,37 +127,37 @@ Progress:
127
127
 
128
128
  Show nifty progress table with commands, records read and emittet and time.
129
129
 
130
- `BP.new.read_fasta(input: "input.fna").dump.run(progress: true)`
130
+ `BD.new.read_fasta(input: "input.fna").dump.run(progress: true)`
131
131
 
132
132
  Verbose:
133
133
 
134
134
  Output verbose messages from commands and the run status.
135
135
 
136
- `BP.new.read_fasta(input: "input.fna").dump.run(verbose: true)`
136
+ `BD.new.read_fasta(input: "input.fna").dump.run(verbose: true)`
137
137
 
138
138
  Debug:
139
139
 
140
140
  Output debug messages from commands using these.
141
141
 
142
- `BP.new.read_fasta(input: "input.fna").dump.run(debug: true)`
142
+ `BD.new.read_fasta(input: "input.fna").dump.run(debug: true)`
143
143
 
144
144
  E-mail notification:
145
145
 
146
146
  Send an email when run is complete.
147
147
 
148
- `BP.new.read_fasta(input: "input.fna").dump.run(email: mail@maasha.dk, subject: "Script done!")`
148
+ `BD.new.read_fasta(input: "input.fna").dump.run(email: mail@maasha.dk, subject: "Script done!")`
149
149
 
150
150
  Report:
151
151
 
152
152
  Create an HTML report of the run stats:
153
153
 
154
- `BP.new.read_fasta(input: "input.fna").dump.run(report: "status.html")`
154
+ `BD.new.read_fasta(input: "input.fna").dump.run(report: "status.html")`
155
155
 
156
156
  Output dir:
157
157
 
158
158
  All output files from commands are put in a specified dir:
159
159
 
160
- `BP.new.read_fasta(input: "input.fna").dump.run(output_dir: "Results")`
160
+ `BD.new.read_fasta(input: "input.fna").dump.run(output_dir: "Results")`
161
161
 
162
162
 
163
163
  Configuration File
@@ -5,4 +5,4 @@ require 'BioDSL'
5
5
  # Read in sequences in FASTQ format from the file `test.fq` and save them in
6
6
  # FASTA format in the file `test.fna`.
7
7
 
8
- BP.new.read_fastq(input: "test.fq").write_fasta(output: "test.fna").run
8
+ BD.new.read_fastq(input: "test.fq").write_fasta(output: "test.fna").run
@@ -53,7 +53,7 @@ module BioDSL
53
53
  # To align the entries in the FASTA file `test.fna` to the template alignment
54
54
  # in the file `template.fna` do:
55
55
  #
56
- # BP.new.
56
+ # BD.new.
57
57
  # read_fasta(input: "test.fna").
58
58
  # align_seq_mothur(template_file: "template.fna").
59
59
  # run
@@ -68,7 +68,7 @@ module BioDSL
68
68
  # Now we run the data through the following pipeline and get the resulting
69
69
  # table:
70
70
  #
71
- # BP.new.
71
+ # BD.new.
72
72
  # read_fasta(input: "test.fna").
73
73
  # analyze_residue_distribution.
74
74
  # grab(select: "residue").
@@ -91,7 +91,7 @@ module BioDSL
91
91
  # Here we do the same as above, but output percentages instead of absolute
92
92
  # counts:
93
93
  #
94
- # BP.new.
94
+ # BD.new.
95
95
  # read_fasta(input: "test.fna").
96
96
  # analyze_residue_distribution(percent: true).
97
97
  # grab(select: "residue").
@@ -77,7 +77,7 @@ module BioDSL
77
77
  # If you have two pair-end sequence files with the Illumina data then you
78
78
  # can assemble these using assemble_pairs like this:
79
79
  #
80
- # BP.new.
80
+ # BD.new.
81
81
  # read_fastq(input: "file1.fq", input2: "file2.fq).
82
82
  # assemble_pairs(reverse_complement: true).
83
83
  # run
@@ -58,7 +58,7 @@ module BioDSL
58
58
  # If you have two pair-end sequence files with the Illumina data then you
59
59
  # can assemble these using +assemble_seq_idba+ like this:
60
60
  #
61
- # BP.new.
61
+ # BD.new.
62
62
  # read_fastq(input: "file1.fq", input2: "file2.fq).
63
63
  # assemble_seq_idba.
64
64
  # write_fasta(output: "contigs.fna").
@@ -61,7 +61,7 @@ module BioDSL
61
61
  # If you have two pair-end sequence files with the Illumina data then you
62
62
  # can assemble these using +assemble_seq_ray+ like this:
63
63
  #
64
- # BP.new.
64
+ # BD.new.
65
65
  # read_fastq(input: "file1.fq", input2: "file2.fq).
66
66
  # assemble_seq_ray.
67
67
  # write_fasta(output: "contigs.fna").
@@ -56,7 +56,7 @@ module BioDSL
56
56
  # If you have two pair-end sequence files with the Illumina data then you
57
57
  # can assemble these using assemble_seq_spades like this:
58
58
  #
59
- # BP.new.
59
+ # BD.new.
60
60
  # read_fastq(input: "file1.fq", input2: "file2.fq).
61
61
  # assemble_seq_spades(kmers: [55,77,99,127]).
62
62
  # write_fasta(output: "contigs.fna").
@@ -69,7 +69,7 @@ module BioDSL
69
69
  include AuxHelper
70
70
 
71
71
  STATS = %i(records_in records_out sequences_in sequences_out residues_in
72
- records_out assembled)
72
+ residues_out records_out assembled)
73
73
 
74
74
  # Constructor for the AssembleSeqSpades class.
75
75
  #
@@ -100,7 +100,7 @@ module BioDSL
100
100
  #
101
101
  # To classify a bunch of OTU sequences in the file +otus.fna+ we do:
102
102
  #
103
- # BP.new.
103
+ # BD.new.
104
104
  # read_fasta(input: "otus.fna").
105
105
  # classify_seq(dir: "RDP11_3").
106
106
  # write_table(keys: [:SEQ_NAME, :TAXONOMY_HITS, :TAXONOMY]).
@@ -61,7 +61,7 @@ module BioDSL
61
61
  # database = "trainset9_032012.pds.fasta"
62
62
  # taxonomy = "trainset9_032012.pds.tax"
63
63
  #
64
- # BP.new.
64
+ # BD.new.
65
65
  # read_fasta(input: "otus.fna").
66
66
  # classify_seq_mothur(database: database, taxonomy: taxonomy).
67
67
  # grab(exact: true, keys: :RECORD_TYPE, select: "taxonomy").
@@ -75,7 +75,7 @@ module BioDSL
75
75
  # To clip this sequence in the forward direction with the primer
76
76
  # 'TGACTACGACTACGACTACT' do:
77
77
  #
78
- # BP.new.
78
+ # BD.new.
79
79
  # read_fasta(input: "test.fna").
80
80
  # clip_primer(primer: "TGACTACGACTACGACTACT", direction: :forward).
81
81
  # dump.
@@ -91,7 +91,7 @@ module BioDSL
91
91
  #
92
92
  # Or in the reverse direction:
93
93
  #
94
- # BP.new.
94
+ # BD.new.
95
95
  # read_fasta(input: "test.fna").
96
96
  # clip_primer(primer: "TGACTACGACTACGACTACT", direction: :reverse).
97
97
  # dump.
@@ -53,7 +53,7 @@ module BioDSL
53
53
  #
54
54
  # To create OTU clusters do:
55
55
  #
56
- # BP.new.
56
+ # BD.new.
57
57
  # read_fasta(input: "in.fna").
58
58
  # dereplicate_seq.
59
59
  # sort(key: :SEQ_COUNT, reverse: true).
@@ -43,7 +43,7 @@ module BioDSL
43
43
  # Here is an OTU table with four rows, one of which has a redundant Taxonomy
44
44
  # string:
45
45
  #
46
- # BP.new.read_table(input: "otu_table.txt").dump.run
46
+ # BD.new.read_table(input: "otu_table.txt").dump.run
47
47
  #
48
48
  # {:OTU=>"OTU_1",
49
49
  # :CM1_COUNT=>881,
@@ -73,7 +73,7 @@ module BioDSL
73
73
  # In order to collapse the redudant OTU simply run the stream through
74
74
  # +collapse_otus+:
75
75
  #
76
- # BP.new.read_table(input: "otu_table.txt").collapse_otus.dump.run
76
+ # BD.new.read_table(input: "otu_table.txt").collapse_otus.dump.run
77
77
  #
78
78
  # {:OTU=>"OTU_1",
79
79
  # :CM1_COUNT=>881,
@@ -51,7 +51,7 @@ module BioDSL
51
51
  #
52
52
  # To complement the sequence do:
53
53
  #
54
- # BP.new.read_fastq(input:"test.fq").complement_seq.dump.run
54
+ # BD.new.read_fastq(input:"test.fq").complement_seq.dump.run
55
55
  #
56
56
  # {:SEQ_NAME=>"M02529:88:000000000-AC0WY:1:1101:12879:1928 2:N:0:185",
57
57
  # :SEQ=>"AACATTTTGCTGCCGGTCAC",
@@ -46,7 +46,7 @@ module BioDSL
46
46
  #
47
47
  # To count the number of records in the file `test.fq`:
48
48
  #
49
- # BP.new.read_fastq(input: "test.fq").count(output: "count.txt").dump.run
49
+ # BD.new.read_fastq(input: "test.fq").count(output: "count.txt").dump.run
50
50
  #
51
51
  # {:SEQ_NAME=>"ILLUMINA-52179E_0004:2:1:1040:5263#TTAGGC/1",
52
52
  # :SEQ=>"TTCGGCATCGGCGGCGACGTTGGCGGCGGGGCCGGGCGGGTCGANNNCAT",
@@ -52,7 +52,7 @@ module BioDSL
52
52
  # To count the values of both columns we first read the table with
53
53
  # +read_table+ and then pass the result to +count_values+:
54
54
  #
55
- # BP.new.
55
+ # BD.new.
56
56
  # read_table(input: "test.tab").
57
57
  # count_values(keys: [:V0, :V1]).
58
58
  # dump.
@@ -51,7 +51,7 @@ module BioDSL
51
51
  #
52
52
  # To remove all gaps from all sequences do:
53
53
  #
54
- # BP.new.read_fasta(input: "test.fna").degap_seq.dump.run
54
+ # BD.new.read_fasta(input: "test.fna").degap_seq.dump.run
55
55
  #
56
56
  # {:SEQ_NAME=>"test1", :SEQ=>"AGTC", :SEQ_LEN=>4}
57
57
  # {:SEQ_NAME=>"test2", :SEQ=>"AGGTC", :SEQ_LEN=>5}
@@ -59,7 +59,7 @@ module BioDSL
59
59
  #
60
60
  # To remove all gap-only columns use the +columns_only+ option:
61
61
  #
62
- # BP.new.
62
+ # BD.new.
63
63
  # read_fasta(input: "test.fna").
64
64
  # degap_seq(columns_only: true).
65
65
  # dump.
@@ -53,7 +53,7 @@ module BioDSL
53
53
  #
54
54
  # To dereplicate all sequences we use +read_fasta+ and +dereplicate_seq+:
55
55
  #
56
- # BP.new.read_fasta(input: "test.fna").dereplicate_seq.dump.run
56
+ # BD.new.read_fasta(input: "test.fna").dereplicate_seq.dump.run
57
57
  #
58
58
  # {:SEQ_NAME=>"test1", :SEQ=>"ATGC", :SEQ_LEN=>4, :SEQ_COUNT=>2}
59
59
  # {:SEQ_NAME=>"test3", :SEQ=>"GCAT", :SEQ_LEN=>4, :SEQ_COUNT=>1}
@@ -51,7 +51,7 @@ module BioDSL
51
51
  #
52
52
  # To filter all reads matching the SILVA archaea 23S rRNA do:
53
53
  #
54
- # BP.new.
54
+ # BD.new.
55
55
  # read_fastq(input: "reads.fq").
56
56
  # filter_rrna(ref_fasta: ["silva-arc-23s-id98.fasta"],
57
57
  # ref_index: ["silva-arc-23s-id98.fasta.idx*"]).
@@ -59,7 +59,7 @@ module BioDSL
59
59
  #
60
60
  # To genecall a genome do:
61
61
  #
62
- # BP.new.
62
+ # BD.new.
63
63
  # read_fasta(input: "contigs.fna").
64
64
  # genecall.
65
65
  # grab(select: "genecall", key: :type, exact: true).
@@ -68,7 +68,7 @@ module BioDSL
68
68
  #
69
69
  # To add genecall data to the sequence name use +merge_values+:
70
70
  #
71
- # BP.new.
71
+ # BD.new.
72
72
  # read_fasta(input: "contigs.fna").
73
73
  # genecall(type: "protein").
74
74
  # grab(select: "genecall", key: :type, exact: true).
@@ -113,7 +113,7 @@ module BioDSL
113
113
  #
114
114
  # == Examples
115
115
  #
116
- # BP.new.
116
+ # BD.new.
117
117
  # read_fasta(input: "RDP_11_Bacteria.fna").
118
118
  # index_taxonomy(output_dir: "RDP_11").
119
119
  # run
@@ -57,7 +57,7 @@ module BioDSL
57
57
  # We can read in these sequence using +read_fastq+ and then soft mask the
58
58
  # sequence with mask_seq like this:
59
59
  #
60
- # BP.new.read_fastq(input: "test.fq").mask_seq.dump.run
60
+ # BD.new.read_fastq(input: "test.fq").mask_seq.dump.run
61
61
  #
62
62
  # {:SEQ_NAME=>"HWI-EAS157_20FFGAAXX:2:1:888:434",
63
63
  # :SEQ=>"ttggtcgctcgctccgcgacCTCAGATCAGACGTGGGCGAT",
@@ -66,7 +66,7 @@ module BioDSL
66
66
  #
67
67
  # Using the +quality_min+ option we can change the cutoff:
68
68
  #
69
- # BP.new.read_fastq(input: "test.fq").mask_seq(quality_min: 25).dump.run
69
+ # BD.new.read_fastq(input: "test.fq").mask_seq(quality_min: 25).dump.run
70
70
  #
71
71
  # {:SEQ_NAME=>"HWI-EAS157_20FFGAAXX:2:1:888:434",
72
72
  # :SEQ=>"ttggtcgctcgctccgcgacctcagATCAGACGTGGGCGAT",
@@ -75,7 +75,7 @@ module BioDSL
75
75
  #
76
76
  # Using the +mask+ option for hard masking:
77
77
  #
78
- # BP.new.read_fastq(input: "test.fq").mask_seq(mask: :hard).dump.run
78
+ # BD.new.read_fastq(input: "test.fq").mask_seq(mask: :hard).dump.run
79
79
  #
80
80
  # {:SEQ_NAME=>"HWI-EAS157_20FFGAAXX:2:1:888:434",
81
81
  # :SEQ=>"NNNNNNNNNNNNNNNNNNNNCTCAGATCAGACGTGGGCGAT",
@@ -66,7 +66,7 @@ module BioDSL
66
66
  #
67
67
  # To calculate the mean score do:
68
68
  #
69
- # BP.new.read_fastq(input: "test.fq").mean_scores.dump.run
69
+ # BD.new.read_fastq(input: "test.fq").mean_scores.dump.run
70
70
  #
71
71
  # {:SEQ_NAME=>"HWI-EAS157_20FFGAAXX:2:1:888:434",
72
72
  # :SEQ=>"TTGGTCGCTCGCTCGACCTCAGATCAGACGTGG",
@@ -76,7 +76,7 @@ module BioDSL
76
76
  #
77
77
  # To calculate local means for a sliding window, do:
78
78
  #
79
- # BP.new.read_fastq(input: "test.fq").mean_scores(local: true).dump.run
79
+ # BD.new.read_fastq(input: "test.fq").mean_scores(local: true).dump.run
80
80
  #
81
81
  # {:SEQ_NAME=>"HWI-EAS157_20FFGAAXX:2:1:888:434",
82
82
  # :SEQ=>"TTGGTCGCTCGCTCGACCTCAGATCAGACGTGG",
@@ -70,7 +70,7 @@ module BioDSL
70
70
  #
71
71
  # To merge these interleaved pair-end sequences use merge_pair_seq:
72
72
  #
73
- # BP.new.
73
+ # BD.new.
74
74
  # read_fastq(input: "test.fq", encoding: :base_33).
75
75
  # merge_pair_seq.
76
76
  # dump.
@@ -76,7 +76,7 @@ module BioDSL
76
76
  #
77
77
  # We can merge the data with +merge_table+ like this:
78
78
  #
79
- # BP.new.
79
+ # BD.new.
80
80
  # read_table(input: "test1.tab").
81
81
  # merge_table(input: "test2.tab", key: :ID).
82
82
  # dump.
@@ -64,7 +64,7 @@ module BioDSL
64
64
  #
65
65
  # Here we plot a heatmap of data a table:
66
66
  #
67
- # BP.new.read_table(input: "test.tab").plot_heatmap.run
67
+ # BD.new.read_table(input: "test.tab").plot_heatmap.run
68
68
  #
69
69
  # rubocop:disable ClassLength
70
70
  class PlotHeatmap
@@ -68,7 +68,7 @@ module BioDSL
68
68
  # Here we plot two matches from a table. The vector records are shown in the
69
69
  # +dump+ output:
70
70
  #
71
- # BP.new.read_table(input: "test.tab").dump.plot_matches.run
71
+ # BD.new.read_table(input: "test.tab").dump.plot_matches.run
72
72
  #
73
73
  # {:Q_BEG=>0, :Q_END=>10, :S_BEG=>0, :S_END=>10, :STRAND=>"+"}
74
74
  # {:Q_BEG=>0, :Q_END=>10, :S_BEG=>0, :S_END=>10, :STRAND=>"-"}
@@ -65,7 +65,7 @@ module BioDSL
65
65
  #
66
66
  # Here we plot a residue distribution of a FASTA file:
67
67
  #
68
- # BP.new.read_fasta(input: "test.fna").plot_residue_distribution.run
68
+ # BD.new.read_fasta(input: "test.fna").plot_residue_distribution.run
69
69
  #
70
70
  # rubocop: disable ClassLength
71
71
  class PlotResidueDistribution
@@ -47,7 +47,7 @@ module BioDSL
47
47
  #
48
48
  # To pick some random records from the stream do:
49
49
  #
50
- # BP.new.
50
+ # BD.new.
51
51
  # read_fasta(input: "in.fna").
52
52
  # random(number: 10_000).
53
53
  # write_fasta(output: "out.fna").
@@ -64,39 +64,39 @@ module BioDSL
64
64
  #
65
65
  # To read all FASTQ entries from a file:
66
66
  #
67
- # BP.new.read_fastq(input: "test.fq").dump.run
67
+ # BD.new.read_fastq(input: "test.fq").dump.run
68
68
  #
69
69
  # To read all FASTQ entries from a gzipped file:
70
70
  #
71
- # BP.new.read_fastq(input: "test.fq.gz").dump.run
71
+ # BD.new.read_fastq(input: "test.fq.gz").dump.run
72
72
  #
73
73
  # To read in only 10 records from a FASTQ file:
74
74
  #
75
- # BP.new.read_fastq(input: "test.fq", first: 10).dump.run
75
+ # BD.new.read_fastq(input: "test.fq", first: 10).dump.run
76
76
  #
77
77
  # To read in the last 10 records from a FASTQ file:
78
78
  #
79
- # BP.new.read_fastq(input: "test.fq", last: 10).dump.run
79
+ # BD.new.read_fastq(input: "test.fq", last: 10).dump.run
80
80
  #
81
81
  # To read all FASTQ entries from multiple files:
82
82
  #
83
- # BP.new.read_fastq(input: "test1.fq,test2.fq").dump.run
83
+ # BD.new.read_fastq(input: "test1.fq,test2.fq").dump.run
84
84
  #
85
85
  # To read FASTQ entries from multiple files using a glob expression:
86
86
  #
87
- # BP.new.read_fastq(input: "*.fq").dump.run
87
+ # BD.new.read_fastq(input: "*.fq").dump.run
88
88
  #
89
89
  # To read FASTQ entries from pair-end data:
90
90
  #
91
- # BP.new.read_fastq(input: "file1.fq", input2: "file2.fq").dump.run
91
+ # BD.new.read_fastq(input: "file1.fq", input2: "file2.fq").dump.run
92
92
  #
93
93
  # To read FASTQ entries from pair-end data:
94
94
  #
95
- # BP.new.read_fastq(input: "file1.fq", input2: "file2.fq").dump.run
95
+ # BD.new.read_fastq(input: "file1.fq", input2: "file2.fq").dump.run
96
96
  #
97
97
  # To read FASTQ entries from pair-end data and reverse-complement read2:
98
98
  #
99
- # BP.new.
99
+ # BD.new.
100
100
  # read_fastq(input: "file1.fq", input2: "file2.fq",
101
101
  # reverse_complement: true)
102
102
  # .dump.run
@@ -93,7 +93,7 @@ module BioDSL
93
93
  # where the keys Organism, Sequence and Count are taken from the comment
94
94
  # line prefixe with #:
95
95
  #
96
- # BP.new.read_tab(input: "test.tab").dump.run
96
+ # BD.new.read_tab(input: "test.tab").dump.run
97
97
  #
98
98
  # {:Organism=>"Human", :Sequence=>"ATACGTCAG", :Count=>23524}
99
99
  # {:Organism=>"Dog", :Sequence=>"AGCATGAC", :Count=>2442}
@@ -103,7 +103,7 @@ module BioDSL
103
103
  # However, if the first line is skipped using the +skip+ option the keys
104
104
  # will default to V0, V1, V2 ... Vn:
105
105
  #
106
- # BP.new.read_table(input: "test.tab", skip: 1).dump.run
106
+ # BD.new.read_table(input: "test.tab", skip: 1).dump.run
107
107
  #
108
108
  # {:V0=>"Human", :V1=>"ATACGTCAG", :V2=>23524}
109
109
  # {:V0=>"Dog", :V1=>"AGCATGAC", :V2=>2442}
@@ -112,7 +112,7 @@ module BioDSL
112
112
  #
113
113
  # To explicitly name the columns (or the keys) use the +keys+ option:
114
114
  #
115
- # BP.new.
115
+ # BD.new.
116
116
  # read_table(input: "test.tab", skip: 1, keys: [:ORGANISM, :SEQ, :COUNT]).
117
117
  # dump.
118
118
  # run
@@ -128,7 +128,7 @@ module BioDSL
128
128
  # argument. So to read in only the sequence and the count so that the
129
129
  # count comes before the sequence do:
130
130
  #
131
- # BP.new.read_table(input: "test.tab", skip: 1, select: [2, 1]).dump.run
131
+ # BD.new.read_table(input: "test.tab", skip: 1, select: [2, 1]).dump.run
132
132
  #
133
133
  # {:V0=>23524, :V1=>"ATACGTCAG"}
134
134
  # {:V0=>2442, :V1=>"AGCATGAC"}
@@ -141,7 +141,7 @@ module BioDSL
141
141
  #
142
142
  # Then the header keys can be used:
143
143
  #
144
- # BP.new.
144
+ # BD.new.
145
145
  # read_table(input: "test.tab", skip: 1, select: [:Count, :Sequence]).
146
146
  # dump.
147
147
  # run
@@ -154,7 +154,7 @@ module BioDSL
154
154
  # Likewise, it is possible to reject specified columns from being read
155
155
  # using the +reject+ option:
156
156
  #
157
- # BP.new.read_table(input: "test.tab", skip: 1, reject: [2, 1]).dump.run
157
+ # BD.new.read_table(input: "test.tab", skip: 1, reject: [2, 1]).dump.run
158
158
  #
159
159
  # {:V0=>"Human"}
160
160
  # {:V0=>"Dog"}
@@ -163,7 +163,7 @@ module BioDSL
163
163
  #
164
164
  # And again, the header keys can be used if a header is present:
165
165
  #
166
- # BP.new.
166
+ # BD.new.
167
167
  # read_table(input: "test.tab", skip: 1, reject: [:Count, :Sequence]).
168
168
  # dump.
169
169
  # run
@@ -51,7 +51,7 @@ module BioDSL
51
51
  #
52
52
  # To reverse the sequence simply do:
53
53
  #
54
- # BP.new.read_fastq(input:"test.fq").reverse_seq.dump.run
54
+ # BD.new.read_fastq(input:"test.fq").reverse_seq.dump.run
55
55
  #
56
56
  # {:SEQ_NAME=>"M02529:88:000000000-AC0WY:1:1101:12879:1928 2:N:0:185",
57
57
  # :SEQ=>"GTGACCGGCAGCAAAATGTT",
@@ -92,7 +92,7 @@ module BioDSL
92
92
  #
93
93
  # We can slice the alignment with +slice_align+ using a range:
94
94
  #
95
- # BP.new.
95
+ # BD.new.
96
96
  # read_fasta(input: "test.fna").
97
97
  # slice_align(slice: 14 .. 27).
98
98
  # dump.
@@ -107,7 +107,7 @@ module BioDSL
107
107
  #
108
108
  # Or we could slice the alignment using a set of primers:
109
109
  #
110
- # BP.new.
110
+ # BD.new.
111
111
  # read_fasta(input: "test.fna").
112
112
  # slice_align(forward: "CGCATACG", reverse: "GAGGGG", max_mismatches: 0,
113
113
  # max_insertions: 0, max_deletions: 0).
@@ -128,7 +128,7 @@ module BioDSL
128
128
  # and spefifying primers these will be matched to the template and the hit
129
129
  # positions used for slicing:
130
130
  #
131
- # BP.new.
131
+ # BD.new.
132
132
  # read_fasta(input: "test.fna").
133
133
  # slice_align(template_file: "template.fna", forward: "GAATACG",
134
134
  # reverse: "ATTCGAT", max_mismatches: 0, max_insertions: 0,
@@ -147,7 +147,7 @@ module BioDSL
147
147
  # is useful if you are slicing 16S rRNA alignments and want the _E.coli_
148
148
  # corresponding positions - simply use the _E.coli_ sequence as template.
149
149
  #
150
- # BP.new.
150
+ # BD.new.
151
151
  # read_fasta(input: "test.fna").
152
152
  # slice_align(template_file: "template.fna", slice: 4 .. 14).
153
153
  # dump.run
@@ -55,7 +55,7 @@ module BioDSL
55
55
  #
56
56
  # To slice the second residue from the beginning do:
57
57
  #
58
- # BP.new.read_fastq(input: "test.fq").slice_seq(slice: 2).dump.run
58
+ # BD.new.read_fastq(input: "test.fq").slice_seq(slice: 2).dump.run
59
59
  #
60
60
  # {:SEQ_NAME=>"HWI-EAS157_20FFGAAXX:2:1:888:434",
61
61
  # :SEQ=>"G",
@@ -64,7 +64,7 @@ module BioDSL
64
64
  #
65
65
  # To slice the last residue do:
66
66
  #
67
- # BP.new.read_fastq(input: "test.fq").slice_seq(slice: -1).dump.run
67
+ # BD.new.read_fastq(input: "test.fq").slice_seq(slice: -1).dump.run
68
68
  #
69
69
  # {:SEQ_NAME=>"HWI-EAS157_20FFGAAXX:2:1:888:434",
70
70
  # :SEQ=>"T",
@@ -73,7 +73,7 @@ module BioDSL
73
73
  #
74
74
  # To slice the first 5 residues do:
75
75
  #
76
- # BP.new.read_fastq(input: "test.fq").slice_seq(slice: 0 ... 5).dump.run
76
+ # BD.new.read_fastq(input: "test.fq").slice_seq(slice: 0 ... 5).dump.run
77
77
  #
78
78
  # {:SEQ_NAME=>"HWI-EAS157_20FFGAAXX:2:1:888:434",
79
79
  # :SEQ=>"TTGGT",
@@ -82,7 +82,7 @@ module BioDSL
82
82
  #
83
83
  # To slice the last 5 residues do:
84
84
  #
85
- # BP.new.read_fastq(input: "test.fq").slice_seq(slice: -5 .. -1).dump.run
85
+ # BD.new.read_fastq(input: "test.fq").slice_seq(slice: -5 .. -1).dump.run
86
86
  #
87
87
  # {:SEQ_NAME=>"HWI-EAS157_20FFGAAXX:2:1:888:434",
88
88
  # :SEQ=>"GCGAT",
@@ -53,7 +53,7 @@ module BioDSL
53
53
  #
54
54
  # To sort this accoring to COUNT in descending order do:
55
55
  #
56
- # BP.new.read_table(input: "test.tab").sort(key: :COUNT).dump.run
56
+ # BD.new.read_table(input: "test.tab").sort(key: :COUNT).dump.run
57
57
  #
58
58
  # {:COUNT=>1, :ORGANISM=>"Eel"}
59
59
  # {:COUNT=>3, :ORGANISM=>"Cat"}
@@ -61,7 +61,7 @@ module BioDSL
61
61
  #
62
62
  # And in ascending order:
63
63
  #
64
- # BP.new.
64
+ # BD.new.
65
65
  # read_table(input: "test.tab").
66
66
  # sort(key: :COUNT, reverse: true).
67
67
  # dump.
@@ -73,7 +73,7 @@ module BioDSL
73
73
  #
74
74
  # The type of value determines the sorting, alphabetical order:
75
75
  #
76
- # BP.new.read_table(input: "test.tab").sort(key: :ORGANISM).dump.run
76
+ # BD.new.read_table(input: "test.tab").sort(key: :ORGANISM).dump.run
77
77
  #
78
78
  # {:COUNT=>3, :ORGANISM=>"Cat"}
79
79
  # {:COUNT=>4, :ORGANISM=>"Dog"}
@@ -81,7 +81,7 @@ module BioDSL
81
81
  #
82
82
  # And reverse alphabetic order:
83
83
  #
84
- # BP.new.
84
+ # BD.new.
85
85
  # read_table(input: "test.tab").
86
86
  # sort(key: :ORGANISM, reverse: true).
87
87
  # dump.
@@ -65,7 +65,7 @@ module BioDSL
65
65
  #
66
66
  # These can be split using split_pair_seq:
67
67
  #
68
- # BP.new.
68
+ # BD.new.
69
69
  # read_fastq(input: "test.fq", encoding: :base_33).
70
70
  # merge_pair_seq.
71
71
  # split_pair_seq.
@@ -82,7 +82,7 @@ module BioDSL
82
82
  #
83
83
  # The forward end can be trimmed like this:
84
84
  #
85
- # BP.new.
85
+ # BD.new.
86
86
  # read_fasta(input: "test.fna").
87
87
  # trim_primer(primer: "ATAGAACTGAC", direction: :forward).
88
88
  # dump.
@@ -98,7 +98,7 @@ module BioDSL
98
98
  #
99
99
  # And trimming a reverse primer:
100
100
  #
101
- # BP.new.
101
+ # BD.new.
102
102
  # read_fasta(input: "test.fna").
103
103
  # trim_primer(primer: "ACTACGTGCGGAT", direction: :reverse).
104
104
  # dump.
@@ -58,7 +58,7 @@ module BioDSL
58
58
  #
59
59
  # To trim both ends simply do:
60
60
  #
61
- # BP.new.read_fastq(input: "test.fq").trim_seq.trim_seq.run
61
+ # BD.new.read_fastq(input: "test.fq").trim_seq.trim_seq.run
62
62
  #
63
63
  # SEQ_NAME: test
64
64
  # SEQ: tctgacgtatcgatcgttgattagttgctagctatgcagtctacgacgagcat
@@ -68,7 +68,7 @@ module BioDSL
68
68
  #
69
69
  # Use the +quality_min+ option to change the minimum value to discard:
70
70
  #
71
- # BP.new.
71
+ # BD.new.
72
72
  # read_fastq(input: "test.fq").
73
73
  # trim_seq(quality_min: 25).
74
74
  # trim_seq.
@@ -82,7 +82,7 @@ module BioDSL
82
82
  #
83
83
  # To trim the left end only (use :rigth for right end only), do:
84
84
  #
85
- # BP.new.read_fastq(input: "test.fq").trim_seq(mode: :left).trim_seq.run
85
+ # BD.new.read_fastq(input: "test.fq").trim_seq(mode: :left).trim_seq.run
86
86
  #
87
87
  # SEQ_NAME: test
88
88
  # SEQ: tctgacgtatcgatcgttgattagttgctagctatgcagtctacgacgagcatgctagctag
@@ -93,7 +93,7 @@ module BioDSL
93
93
  # To increase the length of stretch of good quality residues to match, use
94
94
  # the +length_min+ option:
95
95
  #
96
- # BP.new.read_fastq(input: "test.fq").trim_seq(length_min: 4).trim_seq.run
96
+ # BD.new.read_fastq(input: "test.fq").trim_seq(length_min: 4).trim_seq.run
97
97
  #
98
98
  # SEQ_NAME: test
99
99
  # SEQ: tctgacgtatcgatcgttgattagttgctagctatgcagtct
@@ -56,7 +56,7 @@ module BioDSL
56
56
  # To output only unique values for the first column we first read the table
57
57
  # with +read_table+ and then pass the result to +unique_values+:
58
58
  #
59
- # BP.new.read_table(input: "test.tab").unique_values(key: :V0).dump.run
59
+ # BD.new.read_table(input: "test.tab").unique_values(key: :V0).dump.run
60
60
  #
61
61
  # {:V0=>"Human", :V1=>"H1"}
62
62
  # {:V0=>"Dog", :V1=>"D1"}
@@ -64,7 +64,7 @@ module BioDSL
64
64
  #
65
65
  # To output duplicate records instead use the +invert+ options:
66
66
  #
67
- # BP.new.
67
+ # BD.new.
68
68
  # read_table(input: "test.tab").
69
69
  # unique_values(key: :V0, invert: true).
70
70
  # dump.
@@ -50,7 +50,7 @@ module BioDSL
50
50
  #
51
51
  # To create a tree from aligned FASTA sequences in the file `align.fna` do:
52
52
  #
53
- # BP.new.
53
+ # BD.new.
54
54
  # read_fasta(input: "align.fna").
55
55
  # write_tree(output: "align.tree").
56
56
  # run
@@ -139,7 +139,7 @@ module BioDSL
139
139
 
140
140
  # Format a Pipeline to a pretty string which is returned.
141
141
  def to_s
142
- command_strings = %w(BP new)
142
+ command_strings = %w(BD new)
143
143
 
144
144
  @commands.each { |command| command_strings << command.to_s }
145
145
 
@@ -317,7 +317,7 @@ module BioDSL
317
317
  # @option options [Booleon] :debug Debug flag.
318
318
  # @option options [Booleon] :verbose Verbose flag.
319
319
  def prime_variables(options)
320
- BioDSL.test = ENV['BP_TEST']
320
+ BioDSL.test = ENV['BD_TEST']
321
321
  BioDSL.debug = options[:debug]
322
322
  BioDSL.verbose = options[:verbose]
323
323
  end
@@ -27,5 +27,5 @@
27
27
 
28
28
  # Namespace for BioDSL.
29
29
  module BioDSL
30
- VERSION = '1.0.0'
30
+ VERSION = '1.0.1'
31
31
  end
data/lib/BioDSL.rb CHANGED
@@ -78,4 +78,4 @@ module BioDSL
78
78
  require 'BioDSL/verbose'
79
79
  end
80
80
 
81
- BP = BioDSL::Pipeline # Module alias for irb short hand
81
+ BD = BioDSL::Pipeline # Module alias for irb short hand
@@ -47,7 +47,7 @@ class TestAlignSeqMothur < Test::Unit::TestCase
47
47
  @output.write(SEQ_NAME: 'test', SEQ: 'gattccgatcgatcgatcga')
48
48
  @output.close
49
49
 
50
- @p = BP.new
50
+ @p = BD.new
51
51
  end
52
52
 
53
53
  def write_template
@@ -49,7 +49,7 @@ class TestAnalyzeResidueDistribution < Test::Unit::TestCase
49
49
 
50
50
  @output.close
51
51
 
52
- @p = BP.new
52
+ @p = BD.new
53
53
  end
54
54
 
55
55
  def teardown
@@ -33,7 +33,7 @@ require 'test/helper'
33
33
  # Test class for ClassifySeq.
34
34
  class TestClassifySeq < Test::Unit::TestCase
35
35
  def setup
36
- @p = BP.new
36
+ @p = BD.new
37
37
  end
38
38
 
39
39
  test 'BioDSL::Pipeline#classify_seq with disallowed option raises' do
@@ -35,7 +35,7 @@ class TestClassifySeqMothur < Test::Unit::TestCase
35
35
  def setup
36
36
  omit('mothur not found') unless BioDSL::Filesys.which('mothur')
37
37
 
38
- @p = BP.new
38
+ @p = BD.new
39
39
  @database = __FILE__
40
40
  @taxonomy = __FILE__
41
41
  end
@@ -47,7 +47,7 @@ class TestCollapseOtus < Test::Unit::TestCase
47
47
 
48
48
  @output.close
49
49
 
50
- @p = BP.new
50
+ @p = BD.new
51
51
  end
52
52
 
53
53
  test 'BioDSL::Pipeline::Count with invalid options raises' do
@@ -110,7 +110,7 @@ class TestGrab < Test::Unit::TestCase
110
110
 
111
111
  test 'BioDSL::Pipeline::Grab#to_s with select and symbol key return OK' do
112
112
  @p.grab(select: :SEQ_NAME)
113
- expected = 'BP.new.grab(select: :SEQ_NAME)'
113
+ expected = 'BD.new.grab(select: :SEQ_NAME)'
114
114
  assert_equal(expected, @p.to_s)
115
115
  end
116
116
 
@@ -197,7 +197,7 @@ class TestReadFasta < Test::Unit::TestCase
197
197
  test 'BioDSL::Pipeline::ReadFasta#to_s with :first returns correctly' do
198
198
  @p.read_fasta(input: @file, first: 3)
199
199
 
200
- expected = %{BP.new.read_fasta(input: "#{@file}", first: 3)}
200
+ expected = %{BD.new.read_fasta(input: "#{@file}", first: 3)}
201
201
 
202
202
  assert_equal(expected, @p.to_s)
203
203
  end
@@ -377,7 +377,7 @@ class TestReadFastq < Test::Unit::TestCase
377
377
  test 'BioDSL::Pipeline::ReadFastq#to_s with :first returns correctly' do
378
378
  @p.read_fastq(input: @file, first: 3)
379
379
 
380
- expected = %{BP.new.read_fastq(input: "#{@file}", first: 3)}
380
+ expected = %{BD.new.read_fastq(input: "#{@file}", first: 3)}
381
381
 
382
382
  assert_equal(expected, @p.to_s)
383
383
  end
@@ -295,7 +295,7 @@ class TestReadTable < Test::Unit::TestCase
295
295
  test 'BioDSL::Pipeline::ReadTable#to_s with :first returns correctly' do
296
296
  @p.read_table(input: @file, first: 3)
297
297
 
298
- expected = %{BP.new.read_table(input: "#{@file}", first: 3)}
298
+ expected = %{BD.new.read_table(input: "#{@file}", first: 3)}
299
299
 
300
300
  assert_equal(expected, @p.to_s)
301
301
  end
@@ -44,7 +44,7 @@ class PipelineTest < Test::Unit::TestCase
44
44
  delivery_method :test
45
45
  end
46
46
 
47
- @p = BP.new
47
+ @p = BD.new
48
48
  end
49
49
 
50
50
  def setup_fasta_files
@@ -69,27 +69,27 @@ class PipelineTest < Test::Unit::TestCase
69
69
 
70
70
  test 'BioDSL::Pipeline#to_s w/o options and w/o .run() returns OK' do
71
71
  @p.commands << BioDSL::Command.new('dump', nil, {})
72
- expected = %(BP.new.dump)
72
+ expected = %(BD.new.dump)
73
73
  assert_equal(expected, @p.to_s)
74
74
  end
75
75
 
76
76
  test 'BioDSL::Pipeline#to_s with options and w/o .run() returns OK' do
77
77
  @p.commands << BioDSL::Command.new('read_fasta', nil, input: 'test.fna')
78
- expected = %(BP.new.read_fasta(input: "test.fna"))
78
+ expected = %(BD.new.read_fasta(input: "test.fna"))
79
79
  assert_equal(expected, @p.to_s)
80
80
  end
81
81
 
82
82
  test 'BioDSL::Pipeline#to_s w/o options and .run() returns OK' do
83
83
  @p.commands << BioDSL::Command.new('dump', nil, {})
84
84
  @p.complete = true
85
- expected = %(BP.new.dump.run)
85
+ expected = %(BD.new.dump.run)
86
86
  assert_equal(expected, @p.run.to_s)
87
87
  end
88
88
 
89
89
  test 'BioDSL::Pipeline#to_s with options and .run() returns OK' do
90
90
  @p.commands << BioDSL::Command.new('read_fasta', nil, input: 'test.fna')
91
91
  @p.complete = true
92
- expected = %{BP.new.read_fasta(input: "test.fna").run}
92
+ expected = %{BD.new.read_fasta(input: "test.fna").run}
93
93
  assert_equal(expected, @p.run.to_s)
94
94
  end
95
95
 
@@ -113,13 +113,13 @@ class PipelineTest < Test::Unit::TestCase
113
113
 
114
114
  test 'BioDSL::Pipeline#+ of two Pipelines return correctly' do
115
115
  p = BioDSL::Pipeline.new.dump(first: 2)
116
- assert_equal('BP.new.dump(first: 2)', (@p + p).to_s)
116
+ assert_equal('BD.new.dump(first: 2)', (@p + p).to_s)
117
117
  end
118
118
 
119
119
  test 'BioDSL::Pipeline#+ of three Pipelines return correctly' do
120
120
  p1 = BioDSL::Pipeline.new.dump(first: 2)
121
121
  p2 = BioDSL::Pipeline.new.dump(last: 3)
122
- assert_equal('BP.new.dump(first: 2).dump(last: 3)', (@p + p1 + p2).to_s)
122
+ assert_equal('BD.new.dump(first: 2).dump(last: 3)', (@p + p1 + p2).to_s)
123
123
  end
124
124
 
125
125
  test 'BioDSL::Pipeline#pop decreases size' do
data/test/helper.rb CHANGED
@@ -41,7 +41,7 @@ require 'BioDSL'
41
41
  require 'test/unit'
42
42
  require 'mocha/test_unit'
43
43
 
44
- ENV['BP_TEST'] = "true"
44
+ ENV['BD_TEST'] = "true"
45
45
 
46
46
  module Kernel
47
47
  def capture_stdout
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: BioDSL
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Martin A. Hansen
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-09-30 00:00:00.000000000 Z
11
+ date: 2015-11-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: haml
@@ -413,7 +413,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
413
413
  version: '0'
414
414
  requirements: []
415
415
  rubyforge_project: BioDSL
416
- rubygems_version: 2.4.5.1
416
+ rubygems_version: 2.4.8
417
417
  signing_key:
418
418
  specification_version: 4
419
419
  summary: BioDSL