BioDSL 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +66 -66
- data/examples/fastq_to_fasta.rb +1 -1
- data/lib/BioDSL/commands/align_seq_mothur.rb +1 -1
- data/lib/BioDSL/commands/analyze_residue_distribution.rb +2 -2
- data/lib/BioDSL/commands/assemble_pairs.rb +1 -1
- data/lib/BioDSL/commands/assemble_seq_idba.rb +1 -1
- data/lib/BioDSL/commands/assemble_seq_ray.rb +1 -1
- data/lib/BioDSL/commands/assemble_seq_spades.rb +2 -2
- data/lib/BioDSL/commands/classify_seq.rb +1 -1
- data/lib/BioDSL/commands/classify_seq_mothur.rb +1 -1
- data/lib/BioDSL/commands/clip_primer.rb +2 -2
- data/lib/BioDSL/commands/cluster_otus.rb +1 -1
- data/lib/BioDSL/commands/collapse_otus.rb +2 -2
- data/lib/BioDSL/commands/complement_seq.rb +1 -1
- data/lib/BioDSL/commands/count.rb +1 -1
- data/lib/BioDSL/commands/count_values.rb +1 -1
- data/lib/BioDSL/commands/degap_seq.rb +2 -2
- data/lib/BioDSL/commands/dereplicate_seq.rb +1 -1
- data/lib/BioDSL/commands/filter_rrna.rb +1 -1
- data/lib/BioDSL/commands/genecall.rb +2 -2
- data/lib/BioDSL/commands/index_taxonomy.rb +1 -1
- data/lib/BioDSL/commands/mask_seq.rb +3 -3
- data/lib/BioDSL/commands/mean_scores.rb +2 -2
- data/lib/BioDSL/commands/merge_pair_seq.rb +1 -1
- data/lib/BioDSL/commands/merge_table.rb +1 -1
- data/lib/BioDSL/commands/plot_heatmap.rb +1 -1
- data/lib/BioDSL/commands/plot_matches.rb +1 -1
- data/lib/BioDSL/commands/plot_residue_distribution.rb +1 -1
- data/lib/BioDSL/commands/random.rb +1 -1
- data/lib/BioDSL/commands/read_fastq.rb +9 -9
- data/lib/BioDSL/commands/read_table.rb +7 -7
- data/lib/BioDSL/commands/reverse_seq.rb +1 -1
- data/lib/BioDSL/commands/slice_align.rb +4 -4
- data/lib/BioDSL/commands/slice_seq.rb +4 -4
- data/lib/BioDSL/commands/sort.rb +4 -4
- data/lib/BioDSL/commands/split_pair_seq.rb +1 -1
- data/lib/BioDSL/commands/trim_primer.rb +2 -2
- data/lib/BioDSL/commands/trim_seq.rb +4 -4
- data/lib/BioDSL/commands/unique_values.rb +2 -2
- data/lib/BioDSL/commands/write_tree.rb +1 -1
- data/lib/BioDSL/pipeline.rb +2 -2
- data/lib/BioDSL/version.rb +1 -1
- data/lib/BioDSL.rb +1 -1
- data/test/BioDSL/commands/test_align_seq_mothur.rb +1 -1
- data/test/BioDSL/commands/test_analyze_residue_distribution.rb +1 -1
- data/test/BioDSL/commands/test_classify_seq.rb +1 -1
- data/test/BioDSL/commands/test_classify_seq_mothur.rb +1 -1
- data/test/BioDSL/commands/test_collapse_otus.rb +1 -1
- data/test/BioDSL/commands/test_grab.rb +1 -1
- data/test/BioDSL/commands/test_read_fasta.rb +1 -1
- data/test/BioDSL/commands/test_read_fastq.rb +1 -1
- data/test/BioDSL/commands/test_read_table.rb +1 -1
- data/test/BioDSL/test_pipeline.rb +7 -7
- data/test/helper.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b828e339f7d9337acdaf88a4206cb4cf15a6778c
|
4
|
+
data.tar.gz: 6c130d98ba2e9ca1c1bdf6a044bbe7d6e2c6f309
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8f1fcfd7080a7487fd1a75152c4da3ce7328e86e19843181a93c30d1bb94a2f8f78a065cd208002324df2e0bbbbfbde5576a6157ece8c4c6c4878a6311e0074e
|
7
|
+
data.tar.gz: 31b549d1294e2be25897d824ab019154dfa570305dd1f79b041ae641b8208d2fa00e97ac36e4bdfe8a2dacd46d7cc8da9db4ddb5d75f4c9f3a3e6997aa4e0ae0
|
data/README.md
CHANGED
@@ -15,7 +15,7 @@ A test script:
|
|
15
15
|
|
16
16
|
require 'BioDSL'
|
17
17
|
|
18
|
-
p =
|
18
|
+
p = BD.new.
|
19
19
|
read_fasta(input: "input.fna").
|
20
20
|
grab(select: "ATC$", keys: :SEQ).
|
21
21
|
write_fasta(output: "output.fna").
|
@@ -29,24 +29,24 @@ adding the following to your `~/.bashrc` file:
|
|
29
29
|
And then start the interactive shell:
|
30
30
|
|
31
31
|
$ ibp
|
32
|
-
irb(main):001:0> p =
|
33
|
-
=>
|
32
|
+
irb(main):001:0> p = BD.new
|
33
|
+
=> BD.new
|
34
34
|
irb(main):002:0> p.read_fasta(input: "input.fna")
|
35
|
-
=>
|
35
|
+
=> BD.new.read_fasta(input: "input.fna")
|
36
36
|
irb(main):003:0> p.grab(select: "ATC$", keys: :SEQ)
|
37
|
-
=>
|
37
|
+
=> BD.new.read_fasta(input: "input.fna").grab(select: "ATC$", keys: :SEQ)
|
38
38
|
irb(main):004:0> p.write_fasta(output: "output.fna")
|
39
|
-
=>
|
39
|
+
=> BD.new.read_fasta(input: "input.fna").grab(select: "ATC$", keys: :SEQ).write_fasta(output: "output.fna")
|
40
40
|
irb(main):005:0> p.run(progress: true)
|
41
|
-
=>
|
41
|
+
=> BD.new.read_fasta(input: "input.fna").grab(select: "ATC$", keys: :SEQ).write_fasta(output: "output.fna").run(progress: true)
|
42
42
|
irb(main):006:0>
|
43
43
|
|
44
44
|
|
45
45
|
Or chaining commands directly:
|
46
46
|
|
47
47
|
$ ibp
|
48
|
-
irb(main):001:0>
|
49
|
-
=>
|
48
|
+
irb(main):001:0> BD.new.read_fasta(input: "input.fna").grab(select: "ATC$", keys: :SEQ).write_fasta(output: "output.fna").run(progress: true)
|
49
|
+
=> BD.new.read_fasta(input: "input.fna").grab(select: "ATC$", keys: :SEQ).write_fasta(output: "output.fna").run(progress: true)
|
50
50
|
irb(main):002:0>
|
51
51
|
|
52
52
|
Or run on the command line with the alias bp which you can create by adding the
|
@@ -56,61 +56,61 @@ following to your ~/.bashrc file:
|
|
56
56
|
|
57
57
|
Then you can run the below from the command line:
|
58
58
|
|
59
|
-
$ bp -e '
|
59
|
+
$ bp -e 'BD.new.read_fasta(input: "input.fna").grab(select: "ATC$", keys: :SEQ).write_fasta(output: "output.fna").run(progress: true)'
|
60
60
|
|
61
61
|
Available BioDSL
|
62
62
|
-------------------
|
63
63
|
|
64
|
-
* [add_key] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
65
|
-
* [align_seq_mothur] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
66
|
-
* [analyze_residue_distribution] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
67
|
-
* [assemble_pairs] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
68
|
-
* [assemble_seq_idba] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
69
|
-
* [assemble_seq_ray] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
70
|
-
* [assemble_seq_spades] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
71
|
-
* [classify_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
72
|
-
* [classify_seq_mothur] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
73
|
-
* [clip_primer] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
74
|
-
* [cluster_otus] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
75
|
-
* [collapse_otus] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
76
|
-
* [collect_otus] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
77
|
-
* [complement_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
78
|
-
* [count] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
79
|
-
* [degap_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
80
|
-
* [dereplicate_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
81
|
-
* [dump] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
82
|
-
* [filter_rrna] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
83
|
-
* [genecall] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
84
|
-
* [grab] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
85
|
-
* [index_taxonomy] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
86
|
-
* [mean_scores] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
87
|
-
* [merge_pair_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
88
|
-
* [merge_table] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
89
|
-
* [merge_values] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
90
|
-
* [plot_heatmap] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
91
|
-
* [plot_histogram] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
92
|
-
* [plot_matches] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
93
|
-
* [plot_residue_distribution] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
94
|
-
* [plot_scores] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
95
|
-
* [random] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
96
|
-
* [read_fasta] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
97
|
-
* [read_fastq] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
98
|
-
* [read_table] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
99
|
-
* [reverse_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
100
|
-
* [slice_align] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
101
|
-
* [slice_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
102
|
-
* [sort] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
103
|
-
* [split_pair_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
104
|
-
* [split_values] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
105
|
-
* [trim_primer] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
106
|
-
* [trim_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
107
|
-
* [uchime_ref] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
108
|
-
* [unique_values] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
109
|
-
* [usearch_global] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
110
|
-
* [write_fasta] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
111
|
-
* [write_fastq] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
112
|
-
* [write_table] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
113
|
-
* [write_tree] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
64
|
+
* [add_key] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/AddKey)
|
65
|
+
* [align_seq_mothur] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/AlignSeqMothur)
|
66
|
+
* [analyze_residue_distribution] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/AnalyzeResidueDistribution)
|
67
|
+
* [assemble_pairs] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/AssemblePairs)
|
68
|
+
* [assemble_seq_idba] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/AssembleSeqIdba)
|
69
|
+
* [assemble_seq_ray] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/AssembleSeqRay)
|
70
|
+
* [assemble_seq_spades] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/AssembleSeqSpades)
|
71
|
+
* [classify_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/ClassifySeq)
|
72
|
+
* [classify_seq_mothur] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/ClassifySeqMothur)
|
73
|
+
* [clip_primer] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/ClipPrimer)
|
74
|
+
* [cluster_otus] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/ClusterOtus)
|
75
|
+
* [collapse_otus] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/CollapseOtus)
|
76
|
+
* [collect_otus] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/CollectOtus)
|
77
|
+
* [complement_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/ComplementSeq)
|
78
|
+
* [count] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/Count)
|
79
|
+
* [degap_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/DegapSeq)
|
80
|
+
* [dereplicate_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/DereplicateSeq)
|
81
|
+
* [dump] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/Dump)
|
82
|
+
* [filter_rrna] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/FilterRrna)
|
83
|
+
* [genecall] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/Genecall)
|
84
|
+
* [grab] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/Grab)
|
85
|
+
* [index_taxonomy] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/IndexTaxonomy)
|
86
|
+
* [mean_scores] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/MeanScores)
|
87
|
+
* [merge_pair_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/MergePairSeq)
|
88
|
+
* [merge_table] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/MergeTable)
|
89
|
+
* [merge_values] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/MergeValues)
|
90
|
+
* [plot_heatmap] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/PlotHeatmap)
|
91
|
+
* [plot_histogram] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/PlotHistogram)
|
92
|
+
* [plot_matches] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/PlotMatches)
|
93
|
+
* [plot_residue_distribution] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/PlotResidueDistribution)
|
94
|
+
* [plot_scores] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/PlotScores)
|
95
|
+
* [random] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/Random)
|
96
|
+
* [read_fasta] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/ReadFasta)
|
97
|
+
* [read_fastq] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/ReadFastq)
|
98
|
+
* [read_table] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/ReadTable)
|
99
|
+
* [reverse_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/ReverseSeq)
|
100
|
+
* [slice_align] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/SliceAlign)
|
101
|
+
* [slice_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/SliceSeq)
|
102
|
+
* [sort] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/Sort)
|
103
|
+
* [split_pair_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/SplitPairSeq)
|
104
|
+
* [split_values] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/SplitValues)
|
105
|
+
* [trim_primer] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/TrimPrimer)
|
106
|
+
* [trim_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/TrimSeq)
|
107
|
+
* [uchime_ref] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/UchimeRef)
|
108
|
+
* [unique_values] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/UniqueValues)
|
109
|
+
* [usearch_global] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/UsearchGlobal)
|
110
|
+
* [write_fasta] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/WriteFasta)
|
111
|
+
* [write_fastq] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/WriteFastq)
|
112
|
+
* [write_table] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/WriteTable)
|
113
|
+
* [write_tree] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/WriteTree)
|
114
114
|
|
115
115
|
Log and History
|
116
116
|
---------------
|
@@ -127,37 +127,37 @@ Progress:
|
|
127
127
|
|
128
128
|
Show nifty progress table with commands, records read and emittet and time.
|
129
129
|
|
130
|
-
`
|
130
|
+
`BD.new.read_fasta(input: "input.fna").dump.run(progress: true)`
|
131
131
|
|
132
132
|
Verbose:
|
133
133
|
|
134
134
|
Output verbose messages from commands and the run status.
|
135
135
|
|
136
|
-
`
|
136
|
+
`BD.new.read_fasta(input: "input.fna").dump.run(verbose: true)`
|
137
137
|
|
138
138
|
Debug:
|
139
139
|
|
140
140
|
Output debug messages from commands using these.
|
141
141
|
|
142
|
-
`
|
142
|
+
`BD.new.read_fasta(input: "input.fna").dump.run(debug: true)`
|
143
143
|
|
144
144
|
E-mail notification:
|
145
145
|
|
146
146
|
Send an email when run is complete.
|
147
147
|
|
148
|
-
`
|
148
|
+
`BD.new.read_fasta(input: "input.fna").dump.run(email: mail@maasha.dk, subject: "Script done!")`
|
149
149
|
|
150
150
|
Report:
|
151
151
|
|
152
152
|
Create an HTML report of the run stats:
|
153
153
|
|
154
|
-
`
|
154
|
+
`BD.new.read_fasta(input: "input.fna").dump.run(report: "status.html")`
|
155
155
|
|
156
156
|
Output dir:
|
157
157
|
|
158
158
|
All output files from commands are put in a specified dir:
|
159
159
|
|
160
|
-
`
|
160
|
+
`BD.new.read_fasta(input: "input.fna").dump.run(output_dir: "Results")`
|
161
161
|
|
162
162
|
|
163
163
|
Configuration File
|
data/examples/fastq_to_fasta.rb
CHANGED
@@ -5,4 +5,4 @@ require 'BioDSL'
|
|
5
5
|
# Read in sequences in FASTQ format from the file `test.fq` and save them in
|
6
6
|
# FASTA format in the file `test.fna`.
|
7
7
|
|
8
|
-
|
8
|
+
BD.new.read_fastq(input: "test.fq").write_fasta(output: "test.fna").run
|
@@ -53,7 +53,7 @@ module BioDSL
|
|
53
53
|
# To align the entries in the FASTA file `test.fna` to the template alignment
|
54
54
|
# in the file `template.fna` do:
|
55
55
|
#
|
56
|
-
#
|
56
|
+
# BD.new.
|
57
57
|
# read_fasta(input: "test.fna").
|
58
58
|
# align_seq_mothur(template_file: "template.fna").
|
59
59
|
# run
|
@@ -68,7 +68,7 @@ module BioDSL
|
|
68
68
|
# Now we run the data through the following pipeline and get the resulting
|
69
69
|
# table:
|
70
70
|
#
|
71
|
-
#
|
71
|
+
# BD.new.
|
72
72
|
# read_fasta(input: "test.fna").
|
73
73
|
# analyze_residue_distribution.
|
74
74
|
# grab(select: "residue").
|
@@ -91,7 +91,7 @@ module BioDSL
|
|
91
91
|
# Here we do the same as above, but output percentages instead of absolute
|
92
92
|
# counts:
|
93
93
|
#
|
94
|
-
#
|
94
|
+
# BD.new.
|
95
95
|
# read_fasta(input: "test.fna").
|
96
96
|
# analyze_residue_distribution(percent: true).
|
97
97
|
# grab(select: "residue").
|
@@ -77,7 +77,7 @@ module BioDSL
|
|
77
77
|
# If you have two pair-end sequence files with the Illumina data then you
|
78
78
|
# can assemble these using assemble_pairs like this:
|
79
79
|
#
|
80
|
-
#
|
80
|
+
# BD.new.
|
81
81
|
# read_fastq(input: "file1.fq", input2: "file2.fq).
|
82
82
|
# assemble_pairs(reverse_complement: true).
|
83
83
|
# run
|
@@ -58,7 +58,7 @@ module BioDSL
|
|
58
58
|
# If you have two pair-end sequence files with the Illumina data then you
|
59
59
|
# can assemble these using +assemble_seq_idba+ like this:
|
60
60
|
#
|
61
|
-
#
|
61
|
+
# BD.new.
|
62
62
|
# read_fastq(input: "file1.fq", input2: "file2.fq).
|
63
63
|
# assemble_seq_idba.
|
64
64
|
# write_fasta(output: "contigs.fna").
|
@@ -61,7 +61,7 @@ module BioDSL
|
|
61
61
|
# If you have two pair-end sequence files with the Illumina data then you
|
62
62
|
# can assemble these using +assemble_seq_ray+ like this:
|
63
63
|
#
|
64
|
-
#
|
64
|
+
# BD.new.
|
65
65
|
# read_fastq(input: "file1.fq", input2: "file2.fq).
|
66
66
|
# assemble_seq_ray.
|
67
67
|
# write_fasta(output: "contigs.fna").
|
@@ -56,7 +56,7 @@ module BioDSL
|
|
56
56
|
# If you have two pair-end sequence files with the Illumina data then you
|
57
57
|
# can assemble these using assemble_seq_spades like this:
|
58
58
|
#
|
59
|
-
#
|
59
|
+
# BD.new.
|
60
60
|
# read_fastq(input: "file1.fq", input2: "file2.fq).
|
61
61
|
# assemble_seq_spades(kmers: [55,77,99,127]).
|
62
62
|
# write_fasta(output: "contigs.fna").
|
@@ -69,7 +69,7 @@ module BioDSL
|
|
69
69
|
include AuxHelper
|
70
70
|
|
71
71
|
STATS = %i(records_in records_out sequences_in sequences_out residues_in
|
72
|
-
records_out assembled)
|
72
|
+
residues_out records_out assembled)
|
73
73
|
|
74
74
|
# Constructor for the AssembleSeqSpades class.
|
75
75
|
#
|
@@ -100,7 +100,7 @@ module BioDSL
|
|
100
100
|
#
|
101
101
|
# To classify a bunch of OTU sequences in the file +otus.fna+ we do:
|
102
102
|
#
|
103
|
-
#
|
103
|
+
# BD.new.
|
104
104
|
# read_fasta(input: "otus.fna").
|
105
105
|
# classify_seq(dir: "RDP11_3").
|
106
106
|
# write_table(keys: [:SEQ_NAME, :TAXONOMY_HITS, :TAXONOMY]).
|
@@ -61,7 +61,7 @@ module BioDSL
|
|
61
61
|
# database = "trainset9_032012.pds.fasta"
|
62
62
|
# taxonomy = "trainset9_032012.pds.tax"
|
63
63
|
#
|
64
|
-
#
|
64
|
+
# BD.new.
|
65
65
|
# read_fasta(input: "otus.fna").
|
66
66
|
# classify_seq_mothur(database: database, taxonomy: taxonomy).
|
67
67
|
# grab(exact: true, keys: :RECORD_TYPE, select: "taxonomy").
|
@@ -75,7 +75,7 @@ module BioDSL
|
|
75
75
|
# To clip this sequence in the forward direction with the primer
|
76
76
|
# 'TGACTACGACTACGACTACT' do:
|
77
77
|
#
|
78
|
-
#
|
78
|
+
# BD.new.
|
79
79
|
# read_fasta(input: "test.fna").
|
80
80
|
# clip_primer(primer: "TGACTACGACTACGACTACT", direction: :forward).
|
81
81
|
# dump.
|
@@ -91,7 +91,7 @@ module BioDSL
|
|
91
91
|
#
|
92
92
|
# Or in the reverse direction:
|
93
93
|
#
|
94
|
-
#
|
94
|
+
# BD.new.
|
95
95
|
# read_fasta(input: "test.fna").
|
96
96
|
# clip_primer(primer: "TGACTACGACTACGACTACT", direction: :reverse).
|
97
97
|
# dump.
|
@@ -43,7 +43,7 @@ module BioDSL
|
|
43
43
|
# Here is an OTU table with four rows, one of which has a redundant Taxonomy
|
44
44
|
# string:
|
45
45
|
#
|
46
|
-
#
|
46
|
+
# BD.new.read_table(input: "otu_table.txt").dump.run
|
47
47
|
#
|
48
48
|
# {:OTU=>"OTU_1",
|
49
49
|
# :CM1_COUNT=>881,
|
@@ -73,7 +73,7 @@ module BioDSL
|
|
73
73
|
# In order to collapse the redudant OTU simply run the stream through
|
74
74
|
# +collapse_otus+:
|
75
75
|
#
|
76
|
-
#
|
76
|
+
# BD.new.read_table(input: "otu_table.txt").collapse_otus.dump.run
|
77
77
|
#
|
78
78
|
# {:OTU=>"OTU_1",
|
79
79
|
# :CM1_COUNT=>881,
|
@@ -51,7 +51,7 @@ module BioDSL
|
|
51
51
|
#
|
52
52
|
# To complement the sequence do:
|
53
53
|
#
|
54
|
-
#
|
54
|
+
# BD.new.read_fastq(input:"test.fq").complement_seq.dump.run
|
55
55
|
#
|
56
56
|
# {:SEQ_NAME=>"M02529:88:000000000-AC0WY:1:1101:12879:1928 2:N:0:185",
|
57
57
|
# :SEQ=>"AACATTTTGCTGCCGGTCAC",
|
@@ -46,7 +46,7 @@ module BioDSL
|
|
46
46
|
#
|
47
47
|
# To count the number of records in the file `test.fq`:
|
48
48
|
#
|
49
|
-
#
|
49
|
+
# BD.new.read_fastq(input: "test.fq").count(output: "count.txt").dump.run
|
50
50
|
#
|
51
51
|
# {:SEQ_NAME=>"ILLUMINA-52179E_0004:2:1:1040:5263#TTAGGC/1",
|
52
52
|
# :SEQ=>"TTCGGCATCGGCGGCGACGTTGGCGGCGGGGCCGGGCGGGTCGANNNCAT",
|
@@ -52,7 +52,7 @@ module BioDSL
|
|
52
52
|
# To count the values of both columns we first read the table with
|
53
53
|
# +read_table+ and then pass the result to +count_values+:
|
54
54
|
#
|
55
|
-
#
|
55
|
+
# BD.new.
|
56
56
|
# read_table(input: "test.tab").
|
57
57
|
# count_values(keys: [:V0, :V1]).
|
58
58
|
# dump.
|
@@ -51,7 +51,7 @@ module BioDSL
|
|
51
51
|
#
|
52
52
|
# To remove all gaps from all sequences do:
|
53
53
|
#
|
54
|
-
#
|
54
|
+
# BD.new.read_fasta(input: "test.fna").degap_seq.dump.run
|
55
55
|
#
|
56
56
|
# {:SEQ_NAME=>"test1", :SEQ=>"AGTC", :SEQ_LEN=>4}
|
57
57
|
# {:SEQ_NAME=>"test2", :SEQ=>"AGGTC", :SEQ_LEN=>5}
|
@@ -59,7 +59,7 @@ module BioDSL
|
|
59
59
|
#
|
60
60
|
# To remove all gap-only columns use the +columns_only+ option:
|
61
61
|
#
|
62
|
-
#
|
62
|
+
# BD.new.
|
63
63
|
# read_fasta(input: "test.fna").
|
64
64
|
# degap_seq(columns_only: true).
|
65
65
|
# dump.
|
@@ -53,7 +53,7 @@ module BioDSL
|
|
53
53
|
#
|
54
54
|
# To dereplicate all sequences we use +read_fasta+ and +dereplicate_seq+:
|
55
55
|
#
|
56
|
-
#
|
56
|
+
# BD.new.read_fasta(input: "test.fna").dereplicate_seq.dump.run
|
57
57
|
#
|
58
58
|
# {:SEQ_NAME=>"test1", :SEQ=>"ATGC", :SEQ_LEN=>4, :SEQ_COUNT=>2}
|
59
59
|
# {:SEQ_NAME=>"test3", :SEQ=>"GCAT", :SEQ_LEN=>4, :SEQ_COUNT=>1}
|
@@ -51,7 +51,7 @@ module BioDSL
|
|
51
51
|
#
|
52
52
|
# To filter all reads matching the SILVA archaea 23S rRNA do:
|
53
53
|
#
|
54
|
-
#
|
54
|
+
# BD.new.
|
55
55
|
# read_fastq(input: "reads.fq").
|
56
56
|
# filter_rrna(ref_fasta: ["silva-arc-23s-id98.fasta"],
|
57
57
|
# ref_index: ["silva-arc-23s-id98.fasta.idx*"]).
|
@@ -59,7 +59,7 @@ module BioDSL
|
|
59
59
|
#
|
60
60
|
# To genecall a genome do:
|
61
61
|
#
|
62
|
-
#
|
62
|
+
# BD.new.
|
63
63
|
# read_fasta(input: "contigs.fna").
|
64
64
|
# genecall.
|
65
65
|
# grab(select: "genecall", key: :type, exact: true).
|
@@ -68,7 +68,7 @@ module BioDSL
|
|
68
68
|
#
|
69
69
|
# To add genecall data to the sequence name use +merge_values+:
|
70
70
|
#
|
71
|
-
#
|
71
|
+
# BD.new.
|
72
72
|
# read_fasta(input: "contigs.fna").
|
73
73
|
# genecall(type: "protein").
|
74
74
|
# grab(select: "genecall", key: :type, exact: true).
|
@@ -57,7 +57,7 @@ module BioDSL
|
|
57
57
|
# We can read in these sequence using +read_fastq+ and then soft mask the
|
58
58
|
# sequence with mask_seq like this:
|
59
59
|
#
|
60
|
-
#
|
60
|
+
# BD.new.read_fastq(input: "test.fq").mask_seq.dump.run
|
61
61
|
#
|
62
62
|
# {:SEQ_NAME=>"HWI-EAS157_20FFGAAXX:2:1:888:434",
|
63
63
|
# :SEQ=>"ttggtcgctcgctccgcgacCTCAGATCAGACGTGGGCGAT",
|
@@ -66,7 +66,7 @@ module BioDSL
|
|
66
66
|
#
|
67
67
|
# Using the +quality_min+ option we can change the cutoff:
|
68
68
|
#
|
69
|
-
#
|
69
|
+
# BD.new.read_fastq(input: "test.fq").mask_seq(quality_min: 25).dump.run
|
70
70
|
#
|
71
71
|
# {:SEQ_NAME=>"HWI-EAS157_20FFGAAXX:2:1:888:434",
|
72
72
|
# :SEQ=>"ttggtcgctcgctccgcgacctcagATCAGACGTGGGCGAT",
|
@@ -75,7 +75,7 @@ module BioDSL
|
|
75
75
|
#
|
76
76
|
# Using the +mask+ option for hard masking:
|
77
77
|
#
|
78
|
-
#
|
78
|
+
# BD.new.read_fastq(input: "test.fq").mask_seq(mask: :hard).dump.run
|
79
79
|
#
|
80
80
|
# {:SEQ_NAME=>"HWI-EAS157_20FFGAAXX:2:1:888:434",
|
81
81
|
# :SEQ=>"NNNNNNNNNNNNNNNNNNNNCTCAGATCAGACGTGGGCGAT",
|
@@ -66,7 +66,7 @@ module BioDSL
|
|
66
66
|
#
|
67
67
|
# To calculate the mean score do:
|
68
68
|
#
|
69
|
-
#
|
69
|
+
# BD.new.read_fastq(input: "test.fq").mean_scores.dump.run
|
70
70
|
#
|
71
71
|
# {:SEQ_NAME=>"HWI-EAS157_20FFGAAXX:2:1:888:434",
|
72
72
|
# :SEQ=>"TTGGTCGCTCGCTCGACCTCAGATCAGACGTGG",
|
@@ -76,7 +76,7 @@ module BioDSL
|
|
76
76
|
#
|
77
77
|
# To calculate local means for a sliding window, do:
|
78
78
|
#
|
79
|
-
#
|
79
|
+
# BD.new.read_fastq(input: "test.fq").mean_scores(local: true).dump.run
|
80
80
|
#
|
81
81
|
# {:SEQ_NAME=>"HWI-EAS157_20FFGAAXX:2:1:888:434",
|
82
82
|
# :SEQ=>"TTGGTCGCTCGCTCGACCTCAGATCAGACGTGG",
|
@@ -68,7 +68,7 @@ module BioDSL
|
|
68
68
|
# Here we plot two matches from a table. The vector records are shown in the
|
69
69
|
# +dump+ output:
|
70
70
|
#
|
71
|
-
#
|
71
|
+
# BD.new.read_table(input: "test.tab").dump.plot_matches.run
|
72
72
|
#
|
73
73
|
# {:Q_BEG=>0, :Q_END=>10, :S_BEG=>0, :S_END=>10, :STRAND=>"+"}
|
74
74
|
# {:Q_BEG=>0, :Q_END=>10, :S_BEG=>0, :S_END=>10, :STRAND=>"-"}
|
@@ -65,7 +65,7 @@ module BioDSL
|
|
65
65
|
#
|
66
66
|
# Here we plot a residue distribution of a FASTA file:
|
67
67
|
#
|
68
|
-
#
|
68
|
+
# BD.new.read_fasta(input: "test.fna").plot_residue_distribution.run
|
69
69
|
#
|
70
70
|
# rubocop: disable ClassLength
|
71
71
|
class PlotResidueDistribution
|
@@ -64,39 +64,39 @@ module BioDSL
|
|
64
64
|
#
|
65
65
|
# To read all FASTQ entries from a file:
|
66
66
|
#
|
67
|
-
#
|
67
|
+
# BD.new.read_fastq(input: "test.fq").dump.run
|
68
68
|
#
|
69
69
|
# To read all FASTQ entries from a gzipped file:
|
70
70
|
#
|
71
|
-
#
|
71
|
+
# BD.new.read_fastq(input: "test.fq.gz").dump.run
|
72
72
|
#
|
73
73
|
# To read in only 10 records from a FASTQ file:
|
74
74
|
#
|
75
|
-
#
|
75
|
+
# BD.new.read_fastq(input: "test.fq", first: 10).dump.run
|
76
76
|
#
|
77
77
|
# To read in the last 10 records from a FASTQ file:
|
78
78
|
#
|
79
|
-
#
|
79
|
+
# BD.new.read_fastq(input: "test.fq", last: 10).dump.run
|
80
80
|
#
|
81
81
|
# To read all FASTQ entries from multiple files:
|
82
82
|
#
|
83
|
-
#
|
83
|
+
# BD.new.read_fastq(input: "test1.fq,test2.fq").dump.run
|
84
84
|
#
|
85
85
|
# To read FASTQ entries from multiple files using a glob expression:
|
86
86
|
#
|
87
|
-
#
|
87
|
+
# BD.new.read_fastq(input: "*.fq").dump.run
|
88
88
|
#
|
89
89
|
# To read FASTQ entries from pair-end data:
|
90
90
|
#
|
91
|
-
#
|
91
|
+
# BD.new.read_fastq(input: "file1.fq", input2: "file2.fq").dump.run
|
92
92
|
#
|
93
93
|
# To read FASTQ entries from pair-end data:
|
94
94
|
#
|
95
|
-
#
|
95
|
+
# BD.new.read_fastq(input: "file1.fq", input2: "file2.fq").dump.run
|
96
96
|
#
|
97
97
|
# To read FASTQ entries from pair-end data and reverse-complement read2:
|
98
98
|
#
|
99
|
-
#
|
99
|
+
# BD.new.
|
100
100
|
# read_fastq(input: "file1.fq", input2: "file2.fq",
|
101
101
|
# reverse_complement: true)
|
102
102
|
# .dump.run
|
@@ -93,7 +93,7 @@ module BioDSL
|
|
93
93
|
# where the keys Organism, Sequence and Count are taken from the comment
|
94
94
|
# line prefixe with #:
|
95
95
|
#
|
96
|
-
#
|
96
|
+
# BD.new.read_tab(input: "test.tab").dump.run
|
97
97
|
#
|
98
98
|
# {:Organism=>"Human", :Sequence=>"ATACGTCAG", :Count=>23524}
|
99
99
|
# {:Organism=>"Dog", :Sequence=>"AGCATGAC", :Count=>2442}
|
@@ -103,7 +103,7 @@ module BioDSL
|
|
103
103
|
# However, if the first line is skipped using the +skip+ option the keys
|
104
104
|
# will default to V0, V1, V2 ... Vn:
|
105
105
|
#
|
106
|
-
#
|
106
|
+
# BD.new.read_table(input: "test.tab", skip: 1).dump.run
|
107
107
|
#
|
108
108
|
# {:V0=>"Human", :V1=>"ATACGTCAG", :V2=>23524}
|
109
109
|
# {:V0=>"Dog", :V1=>"AGCATGAC", :V2=>2442}
|
@@ -112,7 +112,7 @@ module BioDSL
|
|
112
112
|
#
|
113
113
|
# To explicitly name the columns (or the keys) use the +keys+ option:
|
114
114
|
#
|
115
|
-
#
|
115
|
+
# BD.new.
|
116
116
|
# read_table(input: "test.tab", skip: 1, keys: [:ORGANISM, :SEQ, :COUNT]).
|
117
117
|
# dump.
|
118
118
|
# run
|
@@ -128,7 +128,7 @@ module BioDSL
|
|
128
128
|
# argument. So to read in only the sequence and the count so that the
|
129
129
|
# count comes before the sequence do:
|
130
130
|
#
|
131
|
-
#
|
131
|
+
# BD.new.read_table(input: "test.tab", skip: 1, select: [2, 1]).dump.run
|
132
132
|
#
|
133
133
|
# {:V0=>23524, :V1=>"ATACGTCAG"}
|
134
134
|
# {:V0=>2442, :V1=>"AGCATGAC"}
|
@@ -141,7 +141,7 @@ module BioDSL
|
|
141
141
|
#
|
142
142
|
# Then the header keys can be used:
|
143
143
|
#
|
144
|
-
#
|
144
|
+
# BD.new.
|
145
145
|
# read_table(input: "test.tab", skip: 1, select: [:Count, :Sequence]).
|
146
146
|
# dump.
|
147
147
|
# run
|
@@ -154,7 +154,7 @@ module BioDSL
|
|
154
154
|
# Likewise, it is possible to reject specified columns from being read
|
155
155
|
# using the +reject+ option:
|
156
156
|
#
|
157
|
-
#
|
157
|
+
# BD.new.read_table(input: "test.tab", skip: 1, reject: [2, 1]).dump.run
|
158
158
|
#
|
159
159
|
# {:V0=>"Human"}
|
160
160
|
# {:V0=>"Dog"}
|
@@ -163,7 +163,7 @@ module BioDSL
|
|
163
163
|
#
|
164
164
|
# And again, the header keys can be used if a header is present:
|
165
165
|
#
|
166
|
-
#
|
166
|
+
# BD.new.
|
167
167
|
# read_table(input: "test.tab", skip: 1, reject: [:Count, :Sequence]).
|
168
168
|
# dump.
|
169
169
|
# run
|
@@ -51,7 +51,7 @@ module BioDSL
|
|
51
51
|
#
|
52
52
|
# To reverse the sequence simply do:
|
53
53
|
#
|
54
|
-
#
|
54
|
+
# BD.new.read_fastq(input:"test.fq").reverse_seq.dump.run
|
55
55
|
#
|
56
56
|
# {:SEQ_NAME=>"M02529:88:000000000-AC0WY:1:1101:12879:1928 2:N:0:185",
|
57
57
|
# :SEQ=>"GTGACCGGCAGCAAAATGTT",
|
@@ -92,7 +92,7 @@ module BioDSL
|
|
92
92
|
#
|
93
93
|
# We can slice the alignment with +slice_align+ using a range:
|
94
94
|
#
|
95
|
-
#
|
95
|
+
# BD.new.
|
96
96
|
# read_fasta(input: "test.fna").
|
97
97
|
# slice_align(slice: 14 .. 27).
|
98
98
|
# dump.
|
@@ -107,7 +107,7 @@ module BioDSL
|
|
107
107
|
#
|
108
108
|
# Or we could slice the alignment using a set of primers:
|
109
109
|
#
|
110
|
-
#
|
110
|
+
# BD.new.
|
111
111
|
# read_fasta(input: "test.fna").
|
112
112
|
# slice_align(forward: "CGCATACG", reverse: "GAGGGG", max_mismatches: 0,
|
113
113
|
# max_insertions: 0, max_deletions: 0).
|
@@ -128,7 +128,7 @@ module BioDSL
|
|
128
128
|
# and spefifying primers these will be matched to the template and the hit
|
129
129
|
# positions used for slicing:
|
130
130
|
#
|
131
|
-
#
|
131
|
+
# BD.new.
|
132
132
|
# read_fasta(input: "test.fna").
|
133
133
|
# slice_align(template_file: "template.fna", forward: "GAATACG",
|
134
134
|
# reverse: "ATTCGAT", max_mismatches: 0, max_insertions: 0,
|
@@ -147,7 +147,7 @@ module BioDSL
|
|
147
147
|
# is useful if you are slicing 16S rRNA alignments and want the _E.coli_
|
148
148
|
# corresponding positions - simply use the _E.coli_ sequence as template.
|
149
149
|
#
|
150
|
-
#
|
150
|
+
# BD.new.
|
151
151
|
# read_fasta(input: "test.fna").
|
152
152
|
# slice_align(template_file: "template.fna", slice: 4 .. 14).
|
153
153
|
# dump.run
|
@@ -55,7 +55,7 @@ module BioDSL
|
|
55
55
|
#
|
56
56
|
# To slice the second residue from the beginning do:
|
57
57
|
#
|
58
|
-
#
|
58
|
+
# BD.new.read_fastq(input: "test.fq").slice_seq(slice: 2).dump.run
|
59
59
|
#
|
60
60
|
# {:SEQ_NAME=>"HWI-EAS157_20FFGAAXX:2:1:888:434",
|
61
61
|
# :SEQ=>"G",
|
@@ -64,7 +64,7 @@ module BioDSL
|
|
64
64
|
#
|
65
65
|
# To slice the last residue do:
|
66
66
|
#
|
67
|
-
#
|
67
|
+
# BD.new.read_fastq(input: "test.fq").slice_seq(slice: -1).dump.run
|
68
68
|
#
|
69
69
|
# {:SEQ_NAME=>"HWI-EAS157_20FFGAAXX:2:1:888:434",
|
70
70
|
# :SEQ=>"T",
|
@@ -73,7 +73,7 @@ module BioDSL
|
|
73
73
|
#
|
74
74
|
# To slice the first 5 residues do:
|
75
75
|
#
|
76
|
-
#
|
76
|
+
# BD.new.read_fastq(input: "test.fq").slice_seq(slice: 0 ... 5).dump.run
|
77
77
|
#
|
78
78
|
# {:SEQ_NAME=>"HWI-EAS157_20FFGAAXX:2:1:888:434",
|
79
79
|
# :SEQ=>"TTGGT",
|
@@ -82,7 +82,7 @@ module BioDSL
|
|
82
82
|
#
|
83
83
|
# To slice the last 5 residues do:
|
84
84
|
#
|
85
|
-
#
|
85
|
+
# BD.new.read_fastq(input: "test.fq").slice_seq(slice: -5 .. -1).dump.run
|
86
86
|
#
|
87
87
|
# {:SEQ_NAME=>"HWI-EAS157_20FFGAAXX:2:1:888:434",
|
88
88
|
# :SEQ=>"GCGAT",
|
data/lib/BioDSL/commands/sort.rb
CHANGED
@@ -53,7 +53,7 @@ module BioDSL
|
|
53
53
|
#
|
54
54
|
# To sort this accoring to COUNT in descending order do:
|
55
55
|
#
|
56
|
-
#
|
56
|
+
# BD.new.read_table(input: "test.tab").sort(key: :COUNT).dump.run
|
57
57
|
#
|
58
58
|
# {:COUNT=>1, :ORGANISM=>"Eel"}
|
59
59
|
# {:COUNT=>3, :ORGANISM=>"Cat"}
|
@@ -61,7 +61,7 @@ module BioDSL
|
|
61
61
|
#
|
62
62
|
# And in ascending order:
|
63
63
|
#
|
64
|
-
#
|
64
|
+
# BD.new.
|
65
65
|
# read_table(input: "test.tab").
|
66
66
|
# sort(key: :COUNT, reverse: true).
|
67
67
|
# dump.
|
@@ -73,7 +73,7 @@ module BioDSL
|
|
73
73
|
#
|
74
74
|
# The type of value determines the sorting, alphabetical order:
|
75
75
|
#
|
76
|
-
#
|
76
|
+
# BD.new.read_table(input: "test.tab").sort(key: :ORGANISM).dump.run
|
77
77
|
#
|
78
78
|
# {:COUNT=>3, :ORGANISM=>"Cat"}
|
79
79
|
# {:COUNT=>4, :ORGANISM=>"Dog"}
|
@@ -81,7 +81,7 @@ module BioDSL
|
|
81
81
|
#
|
82
82
|
# And reverse alphabetic order:
|
83
83
|
#
|
84
|
-
#
|
84
|
+
# BD.new.
|
85
85
|
# read_table(input: "test.tab").
|
86
86
|
# sort(key: :ORGANISM, reverse: true).
|
87
87
|
# dump.
|
@@ -82,7 +82,7 @@ module BioDSL
|
|
82
82
|
#
|
83
83
|
# The forward end can be trimmed like this:
|
84
84
|
#
|
85
|
-
#
|
85
|
+
# BD.new.
|
86
86
|
# read_fasta(input: "test.fna").
|
87
87
|
# trim_primer(primer: "ATAGAACTGAC", direction: :forward).
|
88
88
|
# dump.
|
@@ -98,7 +98,7 @@ module BioDSL
|
|
98
98
|
#
|
99
99
|
# And trimming a reverse primer:
|
100
100
|
#
|
101
|
-
#
|
101
|
+
# BD.new.
|
102
102
|
# read_fasta(input: "test.fna").
|
103
103
|
# trim_primer(primer: "ACTACGTGCGGAT", direction: :reverse).
|
104
104
|
# dump.
|
@@ -58,7 +58,7 @@ module BioDSL
|
|
58
58
|
#
|
59
59
|
# To trim both ends simply do:
|
60
60
|
#
|
61
|
-
#
|
61
|
+
# BD.new.read_fastq(input: "test.fq").trim_seq.trim_seq.run
|
62
62
|
#
|
63
63
|
# SEQ_NAME: test
|
64
64
|
# SEQ: tctgacgtatcgatcgttgattagttgctagctatgcagtctacgacgagcat
|
@@ -68,7 +68,7 @@ module BioDSL
|
|
68
68
|
#
|
69
69
|
# Use the +quality_min+ option to change the minimum value to discard:
|
70
70
|
#
|
71
|
-
#
|
71
|
+
# BD.new.
|
72
72
|
# read_fastq(input: "test.fq").
|
73
73
|
# trim_seq(quality_min: 25).
|
74
74
|
# trim_seq.
|
@@ -82,7 +82,7 @@ module BioDSL
|
|
82
82
|
#
|
83
83
|
# To trim the left end only (use :rigth for right end only), do:
|
84
84
|
#
|
85
|
-
#
|
85
|
+
# BD.new.read_fastq(input: "test.fq").trim_seq(mode: :left).trim_seq.run
|
86
86
|
#
|
87
87
|
# SEQ_NAME: test
|
88
88
|
# SEQ: tctgacgtatcgatcgttgattagttgctagctatgcagtctacgacgagcatgctagctag
|
@@ -93,7 +93,7 @@ module BioDSL
|
|
93
93
|
# To increase the length of stretch of good quality residues to match, use
|
94
94
|
# the +length_min+ option:
|
95
95
|
#
|
96
|
-
#
|
96
|
+
# BD.new.read_fastq(input: "test.fq").trim_seq(length_min: 4).trim_seq.run
|
97
97
|
#
|
98
98
|
# SEQ_NAME: test
|
99
99
|
# SEQ: tctgacgtatcgatcgttgattagttgctagctatgcagtct
|
@@ -56,7 +56,7 @@ module BioDSL
|
|
56
56
|
# To output only unique values for the first column we first read the table
|
57
57
|
# with +read_table+ and then pass the result to +unique_values+:
|
58
58
|
#
|
59
|
-
#
|
59
|
+
# BD.new.read_table(input: "test.tab").unique_values(key: :V0).dump.run
|
60
60
|
#
|
61
61
|
# {:V0=>"Human", :V1=>"H1"}
|
62
62
|
# {:V0=>"Dog", :V1=>"D1"}
|
@@ -64,7 +64,7 @@ module BioDSL
|
|
64
64
|
#
|
65
65
|
# To output duplicate records instead use the +invert+ options:
|
66
66
|
#
|
67
|
-
#
|
67
|
+
# BD.new.
|
68
68
|
# read_table(input: "test.tab").
|
69
69
|
# unique_values(key: :V0, invert: true).
|
70
70
|
# dump.
|
data/lib/BioDSL/pipeline.rb
CHANGED
@@ -139,7 +139,7 @@ module BioDSL
|
|
139
139
|
|
140
140
|
# Format a Pipeline to a pretty string which is returned.
|
141
141
|
def to_s
|
142
|
-
command_strings = %w(
|
142
|
+
command_strings = %w(BD new)
|
143
143
|
|
144
144
|
@commands.each { |command| command_strings << command.to_s }
|
145
145
|
|
@@ -317,7 +317,7 @@ module BioDSL
|
|
317
317
|
# @option options [Booleon] :debug Debug flag.
|
318
318
|
# @option options [Booleon] :verbose Verbose flag.
|
319
319
|
def prime_variables(options)
|
320
|
-
BioDSL.test = ENV['
|
320
|
+
BioDSL.test = ENV['BD_TEST']
|
321
321
|
BioDSL.debug = options[:debug]
|
322
322
|
BioDSL.verbose = options[:verbose]
|
323
323
|
end
|
data/lib/BioDSL/version.rb
CHANGED
data/lib/BioDSL.rb
CHANGED
@@ -110,7 +110,7 @@ class TestGrab < Test::Unit::TestCase
|
|
110
110
|
|
111
111
|
test 'BioDSL::Pipeline::Grab#to_s with select and symbol key return OK' do
|
112
112
|
@p.grab(select: :SEQ_NAME)
|
113
|
-
expected = '
|
113
|
+
expected = 'BD.new.grab(select: :SEQ_NAME)'
|
114
114
|
assert_equal(expected, @p.to_s)
|
115
115
|
end
|
116
116
|
|
@@ -197,7 +197,7 @@ class TestReadFasta < Test::Unit::TestCase
|
|
197
197
|
test 'BioDSL::Pipeline::ReadFasta#to_s with :first returns correctly' do
|
198
198
|
@p.read_fasta(input: @file, first: 3)
|
199
199
|
|
200
|
-
expected = %{
|
200
|
+
expected = %{BD.new.read_fasta(input: "#{@file}", first: 3)}
|
201
201
|
|
202
202
|
assert_equal(expected, @p.to_s)
|
203
203
|
end
|
@@ -377,7 +377,7 @@ class TestReadFastq < Test::Unit::TestCase
|
|
377
377
|
test 'BioDSL::Pipeline::ReadFastq#to_s with :first returns correctly' do
|
378
378
|
@p.read_fastq(input: @file, first: 3)
|
379
379
|
|
380
|
-
expected = %{
|
380
|
+
expected = %{BD.new.read_fastq(input: "#{@file}", first: 3)}
|
381
381
|
|
382
382
|
assert_equal(expected, @p.to_s)
|
383
383
|
end
|
@@ -295,7 +295,7 @@ class TestReadTable < Test::Unit::TestCase
|
|
295
295
|
test 'BioDSL::Pipeline::ReadTable#to_s with :first returns correctly' do
|
296
296
|
@p.read_table(input: @file, first: 3)
|
297
297
|
|
298
|
-
expected = %{
|
298
|
+
expected = %{BD.new.read_table(input: "#{@file}", first: 3)}
|
299
299
|
|
300
300
|
assert_equal(expected, @p.to_s)
|
301
301
|
end
|
@@ -44,7 +44,7 @@ class PipelineTest < Test::Unit::TestCase
|
|
44
44
|
delivery_method :test
|
45
45
|
end
|
46
46
|
|
47
|
-
@p =
|
47
|
+
@p = BD.new
|
48
48
|
end
|
49
49
|
|
50
50
|
def setup_fasta_files
|
@@ -69,27 +69,27 @@ class PipelineTest < Test::Unit::TestCase
|
|
69
69
|
|
70
70
|
test 'BioDSL::Pipeline#to_s w/o options and w/o .run() returns OK' do
|
71
71
|
@p.commands << BioDSL::Command.new('dump', nil, {})
|
72
|
-
expected = %(
|
72
|
+
expected = %(BD.new.dump)
|
73
73
|
assert_equal(expected, @p.to_s)
|
74
74
|
end
|
75
75
|
|
76
76
|
test 'BioDSL::Pipeline#to_s with options and w/o .run() returns OK' do
|
77
77
|
@p.commands << BioDSL::Command.new('read_fasta', nil, input: 'test.fna')
|
78
|
-
expected = %(
|
78
|
+
expected = %(BD.new.read_fasta(input: "test.fna"))
|
79
79
|
assert_equal(expected, @p.to_s)
|
80
80
|
end
|
81
81
|
|
82
82
|
test 'BioDSL::Pipeline#to_s w/o options and .run() returns OK' do
|
83
83
|
@p.commands << BioDSL::Command.new('dump', nil, {})
|
84
84
|
@p.complete = true
|
85
|
-
expected = %(
|
85
|
+
expected = %(BD.new.dump.run)
|
86
86
|
assert_equal(expected, @p.run.to_s)
|
87
87
|
end
|
88
88
|
|
89
89
|
test 'BioDSL::Pipeline#to_s with options and .run() returns OK' do
|
90
90
|
@p.commands << BioDSL::Command.new('read_fasta', nil, input: 'test.fna')
|
91
91
|
@p.complete = true
|
92
|
-
expected = %{
|
92
|
+
expected = %{BD.new.read_fasta(input: "test.fna").run}
|
93
93
|
assert_equal(expected, @p.run.to_s)
|
94
94
|
end
|
95
95
|
|
@@ -113,13 +113,13 @@ class PipelineTest < Test::Unit::TestCase
|
|
113
113
|
|
114
114
|
test 'BioDSL::Pipeline#+ of two Pipelines return correctly' do
|
115
115
|
p = BioDSL::Pipeline.new.dump(first: 2)
|
116
|
-
assert_equal('
|
116
|
+
assert_equal('BD.new.dump(first: 2)', (@p + p).to_s)
|
117
117
|
end
|
118
118
|
|
119
119
|
test 'BioDSL::Pipeline#+ of three Pipelines return correctly' do
|
120
120
|
p1 = BioDSL::Pipeline.new.dump(first: 2)
|
121
121
|
p2 = BioDSL::Pipeline.new.dump(last: 3)
|
122
|
-
assert_equal('
|
122
|
+
assert_equal('BD.new.dump(first: 2).dump(last: 3)', (@p + p1 + p2).to_s)
|
123
123
|
end
|
124
124
|
|
125
125
|
test 'BioDSL::Pipeline#pop decreases size' do
|
data/test/helper.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: BioDSL
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Martin A. Hansen
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-11-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: haml
|
@@ -413,7 +413,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
413
413
|
version: '0'
|
414
414
|
requirements: []
|
415
415
|
rubyforge_project: BioDSL
|
416
|
-
rubygems_version: 2.4.
|
416
|
+
rubygems_version: 2.4.8
|
417
417
|
signing_key:
|
418
418
|
specification_version: 4
|
419
419
|
summary: BioDSL
|