BioDSL 1.0.0 → 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +66 -66
- data/examples/fastq_to_fasta.rb +1 -1
- data/lib/BioDSL/commands/align_seq_mothur.rb +1 -1
- data/lib/BioDSL/commands/analyze_residue_distribution.rb +2 -2
- data/lib/BioDSL/commands/assemble_pairs.rb +1 -1
- data/lib/BioDSL/commands/assemble_seq_idba.rb +1 -1
- data/lib/BioDSL/commands/assemble_seq_ray.rb +1 -1
- data/lib/BioDSL/commands/assemble_seq_spades.rb +2 -2
- data/lib/BioDSL/commands/classify_seq.rb +1 -1
- data/lib/BioDSL/commands/classify_seq_mothur.rb +1 -1
- data/lib/BioDSL/commands/clip_primer.rb +2 -2
- data/lib/BioDSL/commands/cluster_otus.rb +1 -1
- data/lib/BioDSL/commands/collapse_otus.rb +2 -2
- data/lib/BioDSL/commands/complement_seq.rb +1 -1
- data/lib/BioDSL/commands/count.rb +1 -1
- data/lib/BioDSL/commands/count_values.rb +1 -1
- data/lib/BioDSL/commands/degap_seq.rb +2 -2
- data/lib/BioDSL/commands/dereplicate_seq.rb +1 -1
- data/lib/BioDSL/commands/filter_rrna.rb +1 -1
- data/lib/BioDSL/commands/genecall.rb +2 -2
- data/lib/BioDSL/commands/index_taxonomy.rb +1 -1
- data/lib/BioDSL/commands/mask_seq.rb +3 -3
- data/lib/BioDSL/commands/mean_scores.rb +2 -2
- data/lib/BioDSL/commands/merge_pair_seq.rb +1 -1
- data/lib/BioDSL/commands/merge_table.rb +1 -1
- data/lib/BioDSL/commands/plot_heatmap.rb +1 -1
- data/lib/BioDSL/commands/plot_matches.rb +1 -1
- data/lib/BioDSL/commands/plot_residue_distribution.rb +1 -1
- data/lib/BioDSL/commands/random.rb +1 -1
- data/lib/BioDSL/commands/read_fastq.rb +9 -9
- data/lib/BioDSL/commands/read_table.rb +7 -7
- data/lib/BioDSL/commands/reverse_seq.rb +1 -1
- data/lib/BioDSL/commands/slice_align.rb +4 -4
- data/lib/BioDSL/commands/slice_seq.rb +4 -4
- data/lib/BioDSL/commands/sort.rb +4 -4
- data/lib/BioDSL/commands/split_pair_seq.rb +1 -1
- data/lib/BioDSL/commands/trim_primer.rb +2 -2
- data/lib/BioDSL/commands/trim_seq.rb +4 -4
- data/lib/BioDSL/commands/unique_values.rb +2 -2
- data/lib/BioDSL/commands/write_tree.rb +1 -1
- data/lib/BioDSL/pipeline.rb +2 -2
- data/lib/BioDSL/version.rb +1 -1
- data/lib/BioDSL.rb +1 -1
- data/test/BioDSL/commands/test_align_seq_mothur.rb +1 -1
- data/test/BioDSL/commands/test_analyze_residue_distribution.rb +1 -1
- data/test/BioDSL/commands/test_classify_seq.rb +1 -1
- data/test/BioDSL/commands/test_classify_seq_mothur.rb +1 -1
- data/test/BioDSL/commands/test_collapse_otus.rb +1 -1
- data/test/BioDSL/commands/test_grab.rb +1 -1
- data/test/BioDSL/commands/test_read_fasta.rb +1 -1
- data/test/BioDSL/commands/test_read_fastq.rb +1 -1
- data/test/BioDSL/commands/test_read_table.rb +1 -1
- data/test/BioDSL/test_pipeline.rb +7 -7
- data/test/helper.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b828e339f7d9337acdaf88a4206cb4cf15a6778c
|
4
|
+
data.tar.gz: 6c130d98ba2e9ca1c1bdf6a044bbe7d6e2c6f309
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8f1fcfd7080a7487fd1a75152c4da3ce7328e86e19843181a93c30d1bb94a2f8f78a065cd208002324df2e0bbbbfbde5576a6157ece8c4c6c4878a6311e0074e
|
7
|
+
data.tar.gz: 31b549d1294e2be25897d824ab019154dfa570305dd1f79b041ae641b8208d2fa00e97ac36e4bdfe8a2dacd46d7cc8da9db4ddb5d75f4c9f3a3e6997aa4e0ae0
|
data/README.md
CHANGED
@@ -15,7 +15,7 @@ A test script:
|
|
15
15
|
|
16
16
|
require 'BioDSL'
|
17
17
|
|
18
|
-
p =
|
18
|
+
p = BD.new.
|
19
19
|
read_fasta(input: "input.fna").
|
20
20
|
grab(select: "ATC$", keys: :SEQ).
|
21
21
|
write_fasta(output: "output.fna").
|
@@ -29,24 +29,24 @@ adding the following to your `~/.bashrc` file:
|
|
29
29
|
And then start the interactive shell:
|
30
30
|
|
31
31
|
$ ibp
|
32
|
-
irb(main):001:0> p =
|
33
|
-
=>
|
32
|
+
irb(main):001:0> p = BD.new
|
33
|
+
=> BD.new
|
34
34
|
irb(main):002:0> p.read_fasta(input: "input.fna")
|
35
|
-
=>
|
35
|
+
=> BD.new.read_fasta(input: "input.fna")
|
36
36
|
irb(main):003:0> p.grab(select: "ATC$", keys: :SEQ)
|
37
|
-
=>
|
37
|
+
=> BD.new.read_fasta(input: "input.fna").grab(select: "ATC$", keys: :SEQ)
|
38
38
|
irb(main):004:0> p.write_fasta(output: "output.fna")
|
39
|
-
=>
|
39
|
+
=> BD.new.read_fasta(input: "input.fna").grab(select: "ATC$", keys: :SEQ).write_fasta(output: "output.fna")
|
40
40
|
irb(main):005:0> p.run(progress: true)
|
41
|
-
=>
|
41
|
+
=> BD.new.read_fasta(input: "input.fna").grab(select: "ATC$", keys: :SEQ).write_fasta(output: "output.fna").run(progress: true)
|
42
42
|
irb(main):006:0>
|
43
43
|
|
44
44
|
|
45
45
|
Or chaining commands directly:
|
46
46
|
|
47
47
|
$ ibp
|
48
|
-
irb(main):001:0>
|
49
|
-
=>
|
48
|
+
irb(main):001:0> BD.new.read_fasta(input: "input.fna").grab(select: "ATC$", keys: :SEQ).write_fasta(output: "output.fna").run(progress: true)
|
49
|
+
=> BD.new.read_fasta(input: "input.fna").grab(select: "ATC$", keys: :SEQ).write_fasta(output: "output.fna").run(progress: true)
|
50
50
|
irb(main):002:0>
|
51
51
|
|
52
52
|
Or run on the command line with the alias bp which you can create by adding the
|
@@ -56,61 +56,61 @@ following to your ~/.bashrc file:
|
|
56
56
|
|
57
57
|
Then you can run the below from the command line:
|
58
58
|
|
59
|
-
$ bp -e '
|
59
|
+
$ bp -e 'BD.new.read_fasta(input: "input.fna").grab(select: "ATC$", keys: :SEQ).write_fasta(output: "output.fna").run(progress: true)'
|
60
60
|
|
61
61
|
Available BioDSL
|
62
62
|
-------------------
|
63
63
|
|
64
|
-
* [add_key] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
65
|
-
* [align_seq_mothur] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
66
|
-
* [analyze_residue_distribution] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
67
|
-
* [assemble_pairs] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
68
|
-
* [assemble_seq_idba] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
69
|
-
* [assemble_seq_ray] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
70
|
-
* [assemble_seq_spades] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
71
|
-
* [classify_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
72
|
-
* [classify_seq_mothur] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
73
|
-
* [clip_primer] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
74
|
-
* [cluster_otus] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
75
|
-
* [collapse_otus] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
76
|
-
* [collect_otus] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
77
|
-
* [complement_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
78
|
-
* [count] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
79
|
-
* [degap_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
80
|
-
* [dereplicate_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
81
|
-
* [dump] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
82
|
-
* [filter_rrna] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
83
|
-
* [genecall] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
84
|
-
* [grab] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
85
|
-
* [index_taxonomy] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
86
|
-
* [mean_scores] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
87
|
-
* [merge_pair_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
88
|
-
* [merge_table] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
89
|
-
* [merge_values] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
90
|
-
* [plot_heatmap] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
91
|
-
* [plot_histogram] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
92
|
-
* [plot_matches] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
93
|
-
* [plot_residue_distribution] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
94
|
-
* [plot_scores] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
95
|
-
* [random] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
96
|
-
* [read_fasta] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
97
|
-
* [read_fastq] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
98
|
-
* [read_table] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
99
|
-
* [reverse_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
100
|
-
* [slice_align] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
101
|
-
* [slice_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
102
|
-
* [sort] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
103
|
-
* [split_pair_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
104
|
-
* [split_values] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
105
|
-
* [trim_primer] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
106
|
-
* [trim_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
107
|
-
* [uchime_ref] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
108
|
-
* [unique_values] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
109
|
-
* [usearch_global] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
110
|
-
* [write_fasta] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
111
|
-
* [write_fastq] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
112
|
-
* [write_table] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
113
|
-
* [write_tree] (http://www.rubydoc.info/gems/BioDSL/1.0.
|
64
|
+
* [add_key] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/AddKey)
|
65
|
+
* [align_seq_mothur] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/AlignSeqMothur)
|
66
|
+
* [analyze_residue_distribution] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/AnalyzeResidueDistribution)
|
67
|
+
* [assemble_pairs] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/AssemblePairs)
|
68
|
+
* [assemble_seq_idba] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/AssembleSeqIdba)
|
69
|
+
* [assemble_seq_ray] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/AssembleSeqRay)
|
70
|
+
* [assemble_seq_spades] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/AssembleSeqSpades)
|
71
|
+
* [classify_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/ClassifySeq)
|
72
|
+
* [classify_seq_mothur] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/ClassifySeqMothur)
|
73
|
+
* [clip_primer] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/ClipPrimer)
|
74
|
+
* [cluster_otus] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/ClusterOtus)
|
75
|
+
* [collapse_otus] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/CollapseOtus)
|
76
|
+
* [collect_otus] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/CollectOtus)
|
77
|
+
* [complement_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/ComplementSeq)
|
78
|
+
* [count] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/Count)
|
79
|
+
* [degap_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/DegapSeq)
|
80
|
+
* [dereplicate_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/DereplicateSeq)
|
81
|
+
* [dump] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/Dump)
|
82
|
+
* [filter_rrna] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/FilterRrna)
|
83
|
+
* [genecall] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/Genecall)
|
84
|
+
* [grab] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/Grab)
|
85
|
+
* [index_taxonomy] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/IndexTaxonomy)
|
86
|
+
* [mean_scores] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/MeanScores)
|
87
|
+
* [merge_pair_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/MergePairSeq)
|
88
|
+
* [merge_table] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/MergeTable)
|
89
|
+
* [merge_values] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/MergeValues)
|
90
|
+
* [plot_heatmap] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/PlotHeatmap)
|
91
|
+
* [plot_histogram] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/PlotHistogram)
|
92
|
+
* [plot_matches] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/PlotMatches)
|
93
|
+
* [plot_residue_distribution] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/PlotResidueDistribution)
|
94
|
+
* [plot_scores] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/PlotScores)
|
95
|
+
* [random] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/Random)
|
96
|
+
* [read_fasta] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/ReadFasta)
|
97
|
+
* [read_fastq] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/ReadFastq)
|
98
|
+
* [read_table] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/ReadTable)
|
99
|
+
* [reverse_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/ReverseSeq)
|
100
|
+
* [slice_align] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/SliceAlign)
|
101
|
+
* [slice_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/SliceSeq)
|
102
|
+
* [sort] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/Sort)
|
103
|
+
* [split_pair_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/SplitPairSeq)
|
104
|
+
* [split_values] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/SplitValues)
|
105
|
+
* [trim_primer] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/TrimPrimer)
|
106
|
+
* [trim_seq] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/TrimSeq)
|
107
|
+
* [uchime_ref] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/UchimeRef)
|
108
|
+
* [unique_values] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/UniqueValues)
|
109
|
+
* [usearch_global] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/UsearchGlobal)
|
110
|
+
* [write_fasta] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/WriteFasta)
|
111
|
+
* [write_fastq] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/WriteFastq)
|
112
|
+
* [write_table] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/WriteTable)
|
113
|
+
* [write_tree] (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/WriteTree)
|
114
114
|
|
115
115
|
Log and History
|
116
116
|
---------------
|
@@ -127,37 +127,37 @@ Progress:
|
|
127
127
|
|
128
128
|
Show nifty progress table with commands, records read and emittet and time.
|
129
129
|
|
130
|
-
`
|
130
|
+
`BD.new.read_fasta(input: "input.fna").dump.run(progress: true)`
|
131
131
|
|
132
132
|
Verbose:
|
133
133
|
|
134
134
|
Output verbose messages from commands and the run status.
|
135
135
|
|
136
|
-
`
|
136
|
+
`BD.new.read_fasta(input: "input.fna").dump.run(verbose: true)`
|
137
137
|
|
138
138
|
Debug:
|
139
139
|
|
140
140
|
Output debug messages from commands using these.
|
141
141
|
|
142
|
-
`
|
142
|
+
`BD.new.read_fasta(input: "input.fna").dump.run(debug: true)`
|
143
143
|
|
144
144
|
E-mail notification:
|
145
145
|
|
146
146
|
Send an email when run is complete.
|
147
147
|
|
148
|
-
`
|
148
|
+
`BD.new.read_fasta(input: "input.fna").dump.run(email: mail@maasha.dk, subject: "Script done!")`
|
149
149
|
|
150
150
|
Report:
|
151
151
|
|
152
152
|
Create an HTML report of the run stats:
|
153
153
|
|
154
|
-
`
|
154
|
+
`BD.new.read_fasta(input: "input.fna").dump.run(report: "status.html")`
|
155
155
|
|
156
156
|
Output dir:
|
157
157
|
|
158
158
|
All output files from commands are put in a specified dir:
|
159
159
|
|
160
|
-
`
|
160
|
+
`BD.new.read_fasta(input: "input.fna").dump.run(output_dir: "Results")`
|
161
161
|
|
162
162
|
|
163
163
|
Configuration File
|
data/examples/fastq_to_fasta.rb
CHANGED
@@ -5,4 +5,4 @@ require 'BioDSL'
|
|
5
5
|
# Read in sequences in FASTQ format from the file `test.fq` and save them in
|
6
6
|
# FASTA format in the file `test.fna`.
|
7
7
|
|
8
|
-
|
8
|
+
BD.new.read_fastq(input: "test.fq").write_fasta(output: "test.fna").run
|
@@ -53,7 +53,7 @@ module BioDSL
|
|
53
53
|
# To align the entries in the FASTA file `test.fna` to the template alignment
|
54
54
|
# in the file `template.fna` do:
|
55
55
|
#
|
56
|
-
#
|
56
|
+
# BD.new.
|
57
57
|
# read_fasta(input: "test.fna").
|
58
58
|
# align_seq_mothur(template_file: "template.fna").
|
59
59
|
# run
|
@@ -68,7 +68,7 @@ module BioDSL
|
|
68
68
|
# Now we run the data through the following pipeline and get the resulting
|
69
69
|
# table:
|
70
70
|
#
|
71
|
-
#
|
71
|
+
# BD.new.
|
72
72
|
# read_fasta(input: "test.fna").
|
73
73
|
# analyze_residue_distribution.
|
74
74
|
# grab(select: "residue").
|
@@ -91,7 +91,7 @@ module BioDSL
|
|
91
91
|
# Here we do the same as above, but output percentages instead of absolute
|
92
92
|
# counts:
|
93
93
|
#
|
94
|
-
#
|
94
|
+
# BD.new.
|
95
95
|
# read_fasta(input: "test.fna").
|
96
96
|
# analyze_residue_distribution(percent: true).
|
97
97
|
# grab(select: "residue").
|
@@ -77,7 +77,7 @@ module BioDSL
|
|
77
77
|
# If you have two pair-end sequence files with the Illumina data then you
|
78
78
|
# can assemble these using assemble_pairs like this:
|
79
79
|
#
|
80
|
-
#
|
80
|
+
# BD.new.
|
81
81
|
# read_fastq(input: "file1.fq", input2: "file2.fq).
|
82
82
|
# assemble_pairs(reverse_complement: true).
|
83
83
|
# run
|
@@ -58,7 +58,7 @@ module BioDSL
|
|
58
58
|
# If you have two pair-end sequence files with the Illumina data then you
|
59
59
|
# can assemble these using +assemble_seq_idba+ like this:
|
60
60
|
#
|
61
|
-
#
|
61
|
+
# BD.new.
|
62
62
|
# read_fastq(input: "file1.fq", input2: "file2.fq).
|
63
63
|
# assemble_seq_idba.
|
64
64
|
# write_fasta(output: "contigs.fna").
|
@@ -61,7 +61,7 @@ module BioDSL
|
|
61
61
|
# If you have two pair-end sequence files with the Illumina data then you
|
62
62
|
# can assemble these using +assemble_seq_ray+ like this:
|
63
63
|
#
|
64
|
-
#
|
64
|
+
# BD.new.
|
65
65
|
# read_fastq(input: "file1.fq", input2: "file2.fq).
|
66
66
|
# assemble_seq_ray.
|
67
67
|
# write_fasta(output: "contigs.fna").
|
@@ -56,7 +56,7 @@ module BioDSL
|
|
56
56
|
# If you have two pair-end sequence files with the Illumina data then you
|
57
57
|
# can assemble these using assemble_seq_spades like this:
|
58
58
|
#
|
59
|
-
#
|
59
|
+
# BD.new.
|
60
60
|
# read_fastq(input: "file1.fq", input2: "file2.fq).
|
61
61
|
# assemble_seq_spades(kmers: [55,77,99,127]).
|
62
62
|
# write_fasta(output: "contigs.fna").
|
@@ -69,7 +69,7 @@ module BioDSL
|
|
69
69
|
include AuxHelper
|
70
70
|
|
71
71
|
STATS = %i(records_in records_out sequences_in sequences_out residues_in
|
72
|
-
records_out assembled)
|
72
|
+
residues_out records_out assembled)
|
73
73
|
|
74
74
|
# Constructor for the AssembleSeqSpades class.
|
75
75
|
#
|
@@ -100,7 +100,7 @@ module BioDSL
|
|
100
100
|
#
|
101
101
|
# To classify a bunch of OTU sequences in the file +otus.fna+ we do:
|
102
102
|
#
|
103
|
-
#
|
103
|
+
# BD.new.
|
104
104
|
# read_fasta(input: "otus.fna").
|
105
105
|
# classify_seq(dir: "RDP11_3").
|
106
106
|
# write_table(keys: [:SEQ_NAME, :TAXONOMY_HITS, :TAXONOMY]).
|
@@ -61,7 +61,7 @@ module BioDSL
|
|
61
61
|
# database = "trainset9_032012.pds.fasta"
|
62
62
|
# taxonomy = "trainset9_032012.pds.tax"
|
63
63
|
#
|
64
|
-
#
|
64
|
+
# BD.new.
|
65
65
|
# read_fasta(input: "otus.fna").
|
66
66
|
# classify_seq_mothur(database: database, taxonomy: taxonomy).
|
67
67
|
# grab(exact: true, keys: :RECORD_TYPE, select: "taxonomy").
|
@@ -75,7 +75,7 @@ module BioDSL
|
|
75
75
|
# To clip this sequence in the forward direction with the primer
|
76
76
|
# 'TGACTACGACTACGACTACT' do:
|
77
77
|
#
|
78
|
-
#
|
78
|
+
# BD.new.
|
79
79
|
# read_fasta(input: "test.fna").
|
80
80
|
# clip_primer(primer: "TGACTACGACTACGACTACT", direction: :forward).
|
81
81
|
# dump.
|
@@ -91,7 +91,7 @@ module BioDSL
|
|
91
91
|
#
|
92
92
|
# Or in the reverse direction:
|
93
93
|
#
|
94
|
-
#
|
94
|
+
# BD.new.
|
95
95
|
# read_fasta(input: "test.fna").
|
96
96
|
# clip_primer(primer: "TGACTACGACTACGACTACT", direction: :reverse).
|
97
97
|
# dump.
|
@@ -43,7 +43,7 @@ module BioDSL
|
|
43
43
|
# Here is an OTU table with four rows, one of which has a redundant Taxonomy
|
44
44
|
# string:
|
45
45
|
#
|
46
|
-
#
|
46
|
+
# BD.new.read_table(input: "otu_table.txt").dump.run
|
47
47
|
#
|
48
48
|
# {:OTU=>"OTU_1",
|
49
49
|
# :CM1_COUNT=>881,
|
@@ -73,7 +73,7 @@ module BioDSL
|
|
73
73
|
# In order to collapse the redudant OTU simply run the stream through
|
74
74
|
# +collapse_otus+:
|
75
75
|
#
|
76
|
-
#
|
76
|
+
# BD.new.read_table(input: "otu_table.txt").collapse_otus.dump.run
|
77
77
|
#
|
78
78
|
# {:OTU=>"OTU_1",
|
79
79
|
# :CM1_COUNT=>881,
|
@@ -51,7 +51,7 @@ module BioDSL
|
|
51
51
|
#
|
52
52
|
# To complement the sequence do:
|
53
53
|
#
|
54
|
-
#
|
54
|
+
# BD.new.read_fastq(input:"test.fq").complement_seq.dump.run
|
55
55
|
#
|
56
56
|
# {:SEQ_NAME=>"M02529:88:000000000-AC0WY:1:1101:12879:1928 2:N:0:185",
|
57
57
|
# :SEQ=>"AACATTTTGCTGCCGGTCAC",
|
@@ -46,7 +46,7 @@ module BioDSL
|
|
46
46
|
#
|
47
47
|
# To count the number of records in the file `test.fq`:
|
48
48
|
#
|
49
|
-
#
|
49
|
+
# BD.new.read_fastq(input: "test.fq").count(output: "count.txt").dump.run
|
50
50
|
#
|
51
51
|
# {:SEQ_NAME=>"ILLUMINA-52179E_0004:2:1:1040:5263#TTAGGC/1",
|
52
52
|
# :SEQ=>"TTCGGCATCGGCGGCGACGTTGGCGGCGGGGCCGGGCGGGTCGANNNCAT",
|
@@ -52,7 +52,7 @@ module BioDSL
|
|
52
52
|
# To count the values of both columns we first read the table with
|
53
53
|
# +read_table+ and then pass the result to +count_values+:
|
54
54
|
#
|
55
|
-
#
|
55
|
+
# BD.new.
|
56
56
|
# read_table(input: "test.tab").
|
57
57
|
# count_values(keys: [:V0, :V1]).
|
58
58
|
# dump.
|
@@ -51,7 +51,7 @@ module BioDSL
|
|
51
51
|
#
|
52
52
|
# To remove all gaps from all sequences do:
|
53
53
|
#
|
54
|
-
#
|
54
|
+
# BD.new.read_fasta(input: "test.fna").degap_seq.dump.run
|
55
55
|
#
|
56
56
|
# {:SEQ_NAME=>"test1", :SEQ=>"AGTC", :SEQ_LEN=>4}
|
57
57
|
# {:SEQ_NAME=>"test2", :SEQ=>"AGGTC", :SEQ_LEN=>5}
|
@@ -59,7 +59,7 @@ module BioDSL
|
|
59
59
|
#
|
60
60
|
# To remove all gap-only columns use the +columns_only+ option:
|
61
61
|
#
|
62
|
-
#
|
62
|
+
# BD.new.
|
63
63
|
# read_fasta(input: "test.fna").
|
64
64
|
# degap_seq(columns_only: true).
|
65
65
|
# dump.
|
@@ -53,7 +53,7 @@ module BioDSL
|
|
53
53
|
#
|
54
54
|
# To dereplicate all sequences we use +read_fasta+ and +dereplicate_seq+:
|
55
55
|
#
|
56
|
-
#
|
56
|
+
# BD.new.read_fasta(input: "test.fna").dereplicate_seq.dump.run
|
57
57
|
#
|
58
58
|
# {:SEQ_NAME=>"test1", :SEQ=>"ATGC", :SEQ_LEN=>4, :SEQ_COUNT=>2}
|
59
59
|
# {:SEQ_NAME=>"test3", :SEQ=>"GCAT", :SEQ_LEN=>4, :SEQ_COUNT=>1}
|
@@ -51,7 +51,7 @@ module BioDSL
|
|
51
51
|
#
|
52
52
|
# To filter all reads matching the SILVA archaea 23S rRNA do:
|
53
53
|
#
|
54
|
-
#
|
54
|
+
# BD.new.
|
55
55
|
# read_fastq(input: "reads.fq").
|
56
56
|
# filter_rrna(ref_fasta: ["silva-arc-23s-id98.fasta"],
|
57
57
|
# ref_index: ["silva-arc-23s-id98.fasta.idx*"]).
|
@@ -59,7 +59,7 @@ module BioDSL
|
|
59
59
|
#
|
60
60
|
# To genecall a genome do:
|
61
61
|
#
|
62
|
-
#
|
62
|
+
# BD.new.
|
63
63
|
# read_fasta(input: "contigs.fna").
|
64
64
|
# genecall.
|
65
65
|
# grab(select: "genecall", key: :type, exact: true).
|
@@ -68,7 +68,7 @@ module BioDSL
|
|
68
68
|
#
|
69
69
|
# To add genecall data to the sequence name use +merge_values+:
|
70
70
|
#
|
71
|
-
#
|
71
|
+
# BD.new.
|
72
72
|
# read_fasta(input: "contigs.fna").
|
73
73
|
# genecall(type: "protein").
|
74
74
|
# grab(select: "genecall", key: :type, exact: true).
|
@@ -57,7 +57,7 @@ module BioDSL
|
|
57
57
|
# We can read in these sequence using +read_fastq+ and then soft mask the
|
58
58
|
# sequence with mask_seq like this:
|
59
59
|
#
|
60
|
-
#
|
60
|
+
# BD.new.read_fastq(input: "test.fq").mask_seq.dump.run
|
61
61
|
#
|
62
62
|
# {:SEQ_NAME=>"HWI-EAS157_20FFGAAXX:2:1:888:434",
|
63
63
|
# :SEQ=>"ttggtcgctcgctccgcgacCTCAGATCAGACGTGGGCGAT",
|
@@ -66,7 +66,7 @@ module BioDSL
|
|
66
66
|
#
|
67
67
|
# Using the +quality_min+ option we can change the cutoff:
|
68
68
|
#
|
69
|
-
#
|
69
|
+
# BD.new.read_fastq(input: "test.fq").mask_seq(quality_min: 25).dump.run
|
70
70
|
#
|
71
71
|
# {:SEQ_NAME=>"HWI-EAS157_20FFGAAXX:2:1:888:434",
|
72
72
|
# :SEQ=>"ttggtcgctcgctccgcgacctcagATCAGACGTGGGCGAT",
|
@@ -75,7 +75,7 @@ module BioDSL
|
|
75
75
|
#
|
76
76
|
# Using the +mask+ option for hard masking:
|
77
77
|
#
|
78
|
-
#
|
78
|
+
# BD.new.read_fastq(input: "test.fq").mask_seq(mask: :hard).dump.run
|
79
79
|
#
|
80
80
|
# {:SEQ_NAME=>"HWI-EAS157_20FFGAAXX:2:1:888:434",
|
81
81
|
# :SEQ=>"NNNNNNNNNNNNNNNNNNNNCTCAGATCAGACGTGGGCGAT",
|
@@ -66,7 +66,7 @@ module BioDSL
|
|
66
66
|
#
|
67
67
|
# To calculate the mean score do:
|
68
68
|
#
|
69
|
-
#
|
69
|
+
# BD.new.read_fastq(input: "test.fq").mean_scores.dump.run
|
70
70
|
#
|
71
71
|
# {:SEQ_NAME=>"HWI-EAS157_20FFGAAXX:2:1:888:434",
|
72
72
|
# :SEQ=>"TTGGTCGCTCGCTCGACCTCAGATCAGACGTGG",
|
@@ -76,7 +76,7 @@ module BioDSL
|
|
76
76
|
#
|
77
77
|
# To calculate local means for a sliding window, do:
|
78
78
|
#
|
79
|
-
#
|
79
|
+
# BD.new.read_fastq(input: "test.fq").mean_scores(local: true).dump.run
|
80
80
|
#
|
81
81
|
# {:SEQ_NAME=>"HWI-EAS157_20FFGAAXX:2:1:888:434",
|
82
82
|
# :SEQ=>"TTGGTCGCTCGCTCGACCTCAGATCAGACGTGG",
|
@@ -68,7 +68,7 @@ module BioDSL
|
|
68
68
|
# Here we plot two matches from a table. The vector records are shown in the
|
69
69
|
# +dump+ output:
|
70
70
|
#
|
71
|
-
#
|
71
|
+
# BD.new.read_table(input: "test.tab").dump.plot_matches.run
|
72
72
|
#
|
73
73
|
# {:Q_BEG=>0, :Q_END=>10, :S_BEG=>0, :S_END=>10, :STRAND=>"+"}
|
74
74
|
# {:Q_BEG=>0, :Q_END=>10, :S_BEG=>0, :S_END=>10, :STRAND=>"-"}
|
@@ -65,7 +65,7 @@ module BioDSL
|
|
65
65
|
#
|
66
66
|
# Here we plot a residue distribution of a FASTA file:
|
67
67
|
#
|
68
|
-
#
|
68
|
+
# BD.new.read_fasta(input: "test.fna").plot_residue_distribution.run
|
69
69
|
#
|
70
70
|
# rubocop: disable ClassLength
|
71
71
|
class PlotResidueDistribution
|
@@ -64,39 +64,39 @@ module BioDSL
|
|
64
64
|
#
|
65
65
|
# To read all FASTQ entries from a file:
|
66
66
|
#
|
67
|
-
#
|
67
|
+
# BD.new.read_fastq(input: "test.fq").dump.run
|
68
68
|
#
|
69
69
|
# To read all FASTQ entries from a gzipped file:
|
70
70
|
#
|
71
|
-
#
|
71
|
+
# BD.new.read_fastq(input: "test.fq.gz").dump.run
|
72
72
|
#
|
73
73
|
# To read in only 10 records from a FASTQ file:
|
74
74
|
#
|
75
|
-
#
|
75
|
+
# BD.new.read_fastq(input: "test.fq", first: 10).dump.run
|
76
76
|
#
|
77
77
|
# To read in the last 10 records from a FASTQ file:
|
78
78
|
#
|
79
|
-
#
|
79
|
+
# BD.new.read_fastq(input: "test.fq", last: 10).dump.run
|
80
80
|
#
|
81
81
|
# To read all FASTQ entries from multiple files:
|
82
82
|
#
|
83
|
-
#
|
83
|
+
# BD.new.read_fastq(input: "test1.fq,test2.fq").dump.run
|
84
84
|
#
|
85
85
|
# To read FASTQ entries from multiple files using a glob expression:
|
86
86
|
#
|
87
|
-
#
|
87
|
+
# BD.new.read_fastq(input: "*.fq").dump.run
|
88
88
|
#
|
89
89
|
# To read FASTQ entries from pair-end data:
|
90
90
|
#
|
91
|
-
#
|
91
|
+
# BD.new.read_fastq(input: "file1.fq", input2: "file2.fq").dump.run
|
92
92
|
#
|
93
93
|
# To read FASTQ entries from pair-end data:
|
94
94
|
#
|
95
|
-
#
|
95
|
+
# BD.new.read_fastq(input: "file1.fq", input2: "file2.fq").dump.run
|
96
96
|
#
|
97
97
|
# To read FASTQ entries from pair-end data and reverse-complement read2:
|
98
98
|
#
|
99
|
-
#
|
99
|
+
# BD.new.
|
100
100
|
# read_fastq(input: "file1.fq", input2: "file2.fq",
|
101
101
|
# reverse_complement: true)
|
102
102
|
# .dump.run
|
@@ -93,7 +93,7 @@ module BioDSL
|
|
93
93
|
# where the keys Organism, Sequence and Count are taken from the comment
|
94
94
|
# line prefixe with #:
|
95
95
|
#
|
96
|
-
#
|
96
|
+
# BD.new.read_tab(input: "test.tab").dump.run
|
97
97
|
#
|
98
98
|
# {:Organism=>"Human", :Sequence=>"ATACGTCAG", :Count=>23524}
|
99
99
|
# {:Organism=>"Dog", :Sequence=>"AGCATGAC", :Count=>2442}
|
@@ -103,7 +103,7 @@ module BioDSL
|
|
103
103
|
# However, if the first line is skipped using the +skip+ option the keys
|
104
104
|
# will default to V0, V1, V2 ... Vn:
|
105
105
|
#
|
106
|
-
#
|
106
|
+
# BD.new.read_table(input: "test.tab", skip: 1).dump.run
|
107
107
|
#
|
108
108
|
# {:V0=>"Human", :V1=>"ATACGTCAG", :V2=>23524}
|
109
109
|
# {:V0=>"Dog", :V1=>"AGCATGAC", :V2=>2442}
|
@@ -112,7 +112,7 @@ module BioDSL
|
|
112
112
|
#
|
113
113
|
# To explicitly name the columns (or the keys) use the +keys+ option:
|
114
114
|
#
|
115
|
-
#
|
115
|
+
# BD.new.
|
116
116
|
# read_table(input: "test.tab", skip: 1, keys: [:ORGANISM, :SEQ, :COUNT]).
|
117
117
|
# dump.
|
118
118
|
# run
|
@@ -128,7 +128,7 @@ module BioDSL
|
|
128
128
|
# argument. So to read in only the sequence and the count so that the
|
129
129
|
# count comes before the sequence do:
|
130
130
|
#
|
131
|
-
#
|
131
|
+
# BD.new.read_table(input: "test.tab", skip: 1, select: [2, 1]).dump.run
|
132
132
|
#
|
133
133
|
# {:V0=>23524, :V1=>"ATACGTCAG"}
|
134
134
|
# {:V0=>2442, :V1=>"AGCATGAC"}
|
@@ -141,7 +141,7 @@ module BioDSL
|
|
141
141
|
#
|
142
142
|
# Then the header keys can be used:
|
143
143
|
#
|
144
|
-
#
|
144
|
+
# BD.new.
|
145
145
|
# read_table(input: "test.tab", skip: 1, select: [:Count, :Sequence]).
|
146
146
|
# dump.
|
147
147
|
# run
|
@@ -154,7 +154,7 @@ module BioDSL
|
|
154
154
|
# Likewise, it is possible to reject specified columns from being read
|
155
155
|
# using the +reject+ option:
|
156
156
|
#
|
157
|
-
#
|
157
|
+
# BD.new.read_table(input: "test.tab", skip: 1, reject: [2, 1]).dump.run
|
158
158
|
#
|
159
159
|
# {:V0=>"Human"}
|
160
160
|
# {:V0=>"Dog"}
|
@@ -163,7 +163,7 @@ module BioDSL
|
|
163
163
|
#
|
164
164
|
# And again, the header keys can be used if a header is present:
|
165
165
|
#
|
166
|
-
#
|
166
|
+
# BD.new.
|
167
167
|
# read_table(input: "test.tab", skip: 1, reject: [:Count, :Sequence]).
|
168
168
|
# dump.
|
169
169
|
# run
|
@@ -51,7 +51,7 @@ module BioDSL
|
|
51
51
|
#
|
52
52
|
# To reverse the sequence simply do:
|
53
53
|
#
|
54
|
-
#
|
54
|
+
# BD.new.read_fastq(input:"test.fq").reverse_seq.dump.run
|
55
55
|
#
|
56
56
|
# {:SEQ_NAME=>"M02529:88:000000000-AC0WY:1:1101:12879:1928 2:N:0:185",
|
57
57
|
# :SEQ=>"GTGACCGGCAGCAAAATGTT",
|
@@ -92,7 +92,7 @@ module BioDSL
|
|
92
92
|
#
|
93
93
|
# We can slice the alignment with +slice_align+ using a range:
|
94
94
|
#
|
95
|
-
#
|
95
|
+
# BD.new.
|
96
96
|
# read_fasta(input: "test.fna").
|
97
97
|
# slice_align(slice: 14 .. 27).
|
98
98
|
# dump.
|
@@ -107,7 +107,7 @@ module BioDSL
|
|
107
107
|
#
|
108
108
|
# Or we could slice the alignment using a set of primers:
|
109
109
|
#
|
110
|
-
#
|
110
|
+
# BD.new.
|
111
111
|
# read_fasta(input: "test.fna").
|
112
112
|
# slice_align(forward: "CGCATACG", reverse: "GAGGGG", max_mismatches: 0,
|
113
113
|
# max_insertions: 0, max_deletions: 0).
|
@@ -128,7 +128,7 @@ module BioDSL
|
|
128
128
|
# and spefifying primers these will be matched to the template and the hit
|
129
129
|
# positions used for slicing:
|
130
130
|
#
|
131
|
-
#
|
131
|
+
# BD.new.
|
132
132
|
# read_fasta(input: "test.fna").
|
133
133
|
# slice_align(template_file: "template.fna", forward: "GAATACG",
|
134
134
|
# reverse: "ATTCGAT", max_mismatches: 0, max_insertions: 0,
|
@@ -147,7 +147,7 @@ module BioDSL
|
|
147
147
|
# is useful if you are slicing 16S rRNA alignments and want the _E.coli_
|
148
148
|
# corresponding positions - simply use the _E.coli_ sequence as template.
|
149
149
|
#
|
150
|
-
#
|
150
|
+
# BD.new.
|
151
151
|
# read_fasta(input: "test.fna").
|
152
152
|
# slice_align(template_file: "template.fna", slice: 4 .. 14).
|
153
153
|
# dump.run
|
@@ -55,7 +55,7 @@ module BioDSL
|
|
55
55
|
#
|
56
56
|
# To slice the second residue from the beginning do:
|
57
57
|
#
|
58
|
-
#
|
58
|
+
# BD.new.read_fastq(input: "test.fq").slice_seq(slice: 2).dump.run
|
59
59
|
#
|
60
60
|
# {:SEQ_NAME=>"HWI-EAS157_20FFGAAXX:2:1:888:434",
|
61
61
|
# :SEQ=>"G",
|
@@ -64,7 +64,7 @@ module BioDSL
|
|
64
64
|
#
|
65
65
|
# To slice the last residue do:
|
66
66
|
#
|
67
|
-
#
|
67
|
+
# BD.new.read_fastq(input: "test.fq").slice_seq(slice: -1).dump.run
|
68
68
|
#
|
69
69
|
# {:SEQ_NAME=>"HWI-EAS157_20FFGAAXX:2:1:888:434",
|
70
70
|
# :SEQ=>"T",
|
@@ -73,7 +73,7 @@ module BioDSL
|
|
73
73
|
#
|
74
74
|
# To slice the first 5 residues do:
|
75
75
|
#
|
76
|
-
#
|
76
|
+
# BD.new.read_fastq(input: "test.fq").slice_seq(slice: 0 ... 5).dump.run
|
77
77
|
#
|
78
78
|
# {:SEQ_NAME=>"HWI-EAS157_20FFGAAXX:2:1:888:434",
|
79
79
|
# :SEQ=>"TTGGT",
|
@@ -82,7 +82,7 @@ module BioDSL
|
|
82
82
|
#
|
83
83
|
# To slice the last 5 residues do:
|
84
84
|
#
|
85
|
-
#
|
85
|
+
# BD.new.read_fastq(input: "test.fq").slice_seq(slice: -5 .. -1).dump.run
|
86
86
|
#
|
87
87
|
# {:SEQ_NAME=>"HWI-EAS157_20FFGAAXX:2:1:888:434",
|
88
88
|
# :SEQ=>"GCGAT",
|
data/lib/BioDSL/commands/sort.rb
CHANGED
@@ -53,7 +53,7 @@ module BioDSL
|
|
53
53
|
#
|
54
54
|
# To sort this accoring to COUNT in descending order do:
|
55
55
|
#
|
56
|
-
#
|
56
|
+
# BD.new.read_table(input: "test.tab").sort(key: :COUNT).dump.run
|
57
57
|
#
|
58
58
|
# {:COUNT=>1, :ORGANISM=>"Eel"}
|
59
59
|
# {:COUNT=>3, :ORGANISM=>"Cat"}
|
@@ -61,7 +61,7 @@ module BioDSL
|
|
61
61
|
#
|
62
62
|
# And in ascending order:
|
63
63
|
#
|
64
|
-
#
|
64
|
+
# BD.new.
|
65
65
|
# read_table(input: "test.tab").
|
66
66
|
# sort(key: :COUNT, reverse: true).
|
67
67
|
# dump.
|
@@ -73,7 +73,7 @@ module BioDSL
|
|
73
73
|
#
|
74
74
|
# The type of value determines the sorting, alphabetical order:
|
75
75
|
#
|
76
|
-
#
|
76
|
+
# BD.new.read_table(input: "test.tab").sort(key: :ORGANISM).dump.run
|
77
77
|
#
|
78
78
|
# {:COUNT=>3, :ORGANISM=>"Cat"}
|
79
79
|
# {:COUNT=>4, :ORGANISM=>"Dog"}
|
@@ -81,7 +81,7 @@ module BioDSL
|
|
81
81
|
#
|
82
82
|
# And reverse alphabetic order:
|
83
83
|
#
|
84
|
-
#
|
84
|
+
# BD.new.
|
85
85
|
# read_table(input: "test.tab").
|
86
86
|
# sort(key: :ORGANISM, reverse: true).
|
87
87
|
# dump.
|
@@ -82,7 +82,7 @@ module BioDSL
|
|
82
82
|
#
|
83
83
|
# The forward end can be trimmed like this:
|
84
84
|
#
|
85
|
-
#
|
85
|
+
# BD.new.
|
86
86
|
# read_fasta(input: "test.fna").
|
87
87
|
# trim_primer(primer: "ATAGAACTGAC", direction: :forward).
|
88
88
|
# dump.
|
@@ -98,7 +98,7 @@ module BioDSL
|
|
98
98
|
#
|
99
99
|
# And trimming a reverse primer:
|
100
100
|
#
|
101
|
-
#
|
101
|
+
# BD.new.
|
102
102
|
# read_fasta(input: "test.fna").
|
103
103
|
# trim_primer(primer: "ACTACGTGCGGAT", direction: :reverse).
|
104
104
|
# dump.
|
@@ -58,7 +58,7 @@ module BioDSL
|
|
58
58
|
#
|
59
59
|
# To trim both ends simply do:
|
60
60
|
#
|
61
|
-
#
|
61
|
+
# BD.new.read_fastq(input: "test.fq").trim_seq.trim_seq.run
|
62
62
|
#
|
63
63
|
# SEQ_NAME: test
|
64
64
|
# SEQ: tctgacgtatcgatcgttgattagttgctagctatgcagtctacgacgagcat
|
@@ -68,7 +68,7 @@ module BioDSL
|
|
68
68
|
#
|
69
69
|
# Use the +quality_min+ option to change the minimum value to discard:
|
70
70
|
#
|
71
|
-
#
|
71
|
+
# BD.new.
|
72
72
|
# read_fastq(input: "test.fq").
|
73
73
|
# trim_seq(quality_min: 25).
|
74
74
|
# trim_seq.
|
@@ -82,7 +82,7 @@ module BioDSL
|
|
82
82
|
#
|
83
83
|
# To trim the left end only (use :rigth for right end only), do:
|
84
84
|
#
|
85
|
-
#
|
85
|
+
# BD.new.read_fastq(input: "test.fq").trim_seq(mode: :left).trim_seq.run
|
86
86
|
#
|
87
87
|
# SEQ_NAME: test
|
88
88
|
# SEQ: tctgacgtatcgatcgttgattagttgctagctatgcagtctacgacgagcatgctagctag
|
@@ -93,7 +93,7 @@ module BioDSL
|
|
93
93
|
# To increase the length of stretch of good quality residues to match, use
|
94
94
|
# the +length_min+ option:
|
95
95
|
#
|
96
|
-
#
|
96
|
+
# BD.new.read_fastq(input: "test.fq").trim_seq(length_min: 4).trim_seq.run
|
97
97
|
#
|
98
98
|
# SEQ_NAME: test
|
99
99
|
# SEQ: tctgacgtatcgatcgttgattagttgctagctatgcagtct
|
@@ -56,7 +56,7 @@ module BioDSL
|
|
56
56
|
# To output only unique values for the first column we first read the table
|
57
57
|
# with +read_table+ and then pass the result to +unique_values+:
|
58
58
|
#
|
59
|
-
#
|
59
|
+
# BD.new.read_table(input: "test.tab").unique_values(key: :V0).dump.run
|
60
60
|
#
|
61
61
|
# {:V0=>"Human", :V1=>"H1"}
|
62
62
|
# {:V0=>"Dog", :V1=>"D1"}
|
@@ -64,7 +64,7 @@ module BioDSL
|
|
64
64
|
#
|
65
65
|
# To output duplicate records instead use the +invert+ options:
|
66
66
|
#
|
67
|
-
#
|
67
|
+
# BD.new.
|
68
68
|
# read_table(input: "test.tab").
|
69
69
|
# unique_values(key: :V0, invert: true).
|
70
70
|
# dump.
|
data/lib/BioDSL/pipeline.rb
CHANGED
@@ -139,7 +139,7 @@ module BioDSL
|
|
139
139
|
|
140
140
|
# Format a Pipeline to a pretty string which is returned.
|
141
141
|
def to_s
|
142
|
-
command_strings = %w(
|
142
|
+
command_strings = %w(BD new)
|
143
143
|
|
144
144
|
@commands.each { |command| command_strings << command.to_s }
|
145
145
|
|
@@ -317,7 +317,7 @@ module BioDSL
|
|
317
317
|
# @option options [Booleon] :debug Debug flag.
|
318
318
|
# @option options [Booleon] :verbose Verbose flag.
|
319
319
|
def prime_variables(options)
|
320
|
-
BioDSL.test = ENV['
|
320
|
+
BioDSL.test = ENV['BD_TEST']
|
321
321
|
BioDSL.debug = options[:debug]
|
322
322
|
BioDSL.verbose = options[:verbose]
|
323
323
|
end
|
data/lib/BioDSL/version.rb
CHANGED
data/lib/BioDSL.rb
CHANGED
@@ -110,7 +110,7 @@ class TestGrab < Test::Unit::TestCase
|
|
110
110
|
|
111
111
|
test 'BioDSL::Pipeline::Grab#to_s with select and symbol key return OK' do
|
112
112
|
@p.grab(select: :SEQ_NAME)
|
113
|
-
expected = '
|
113
|
+
expected = 'BD.new.grab(select: :SEQ_NAME)'
|
114
114
|
assert_equal(expected, @p.to_s)
|
115
115
|
end
|
116
116
|
|
@@ -197,7 +197,7 @@ class TestReadFasta < Test::Unit::TestCase
|
|
197
197
|
test 'BioDSL::Pipeline::ReadFasta#to_s with :first returns correctly' do
|
198
198
|
@p.read_fasta(input: @file, first: 3)
|
199
199
|
|
200
|
-
expected = %{
|
200
|
+
expected = %{BD.new.read_fasta(input: "#{@file}", first: 3)}
|
201
201
|
|
202
202
|
assert_equal(expected, @p.to_s)
|
203
203
|
end
|
@@ -377,7 +377,7 @@ class TestReadFastq < Test::Unit::TestCase
|
|
377
377
|
test 'BioDSL::Pipeline::ReadFastq#to_s with :first returns correctly' do
|
378
378
|
@p.read_fastq(input: @file, first: 3)
|
379
379
|
|
380
|
-
expected = %{
|
380
|
+
expected = %{BD.new.read_fastq(input: "#{@file}", first: 3)}
|
381
381
|
|
382
382
|
assert_equal(expected, @p.to_s)
|
383
383
|
end
|
@@ -295,7 +295,7 @@ class TestReadTable < Test::Unit::TestCase
|
|
295
295
|
test 'BioDSL::Pipeline::ReadTable#to_s with :first returns correctly' do
|
296
296
|
@p.read_table(input: @file, first: 3)
|
297
297
|
|
298
|
-
expected = %{
|
298
|
+
expected = %{BD.new.read_table(input: "#{@file}", first: 3)}
|
299
299
|
|
300
300
|
assert_equal(expected, @p.to_s)
|
301
301
|
end
|
@@ -44,7 +44,7 @@ class PipelineTest < Test::Unit::TestCase
|
|
44
44
|
delivery_method :test
|
45
45
|
end
|
46
46
|
|
47
|
-
@p =
|
47
|
+
@p = BD.new
|
48
48
|
end
|
49
49
|
|
50
50
|
def setup_fasta_files
|
@@ -69,27 +69,27 @@ class PipelineTest < Test::Unit::TestCase
|
|
69
69
|
|
70
70
|
test 'BioDSL::Pipeline#to_s w/o options and w/o .run() returns OK' do
|
71
71
|
@p.commands << BioDSL::Command.new('dump', nil, {})
|
72
|
-
expected = %(
|
72
|
+
expected = %(BD.new.dump)
|
73
73
|
assert_equal(expected, @p.to_s)
|
74
74
|
end
|
75
75
|
|
76
76
|
test 'BioDSL::Pipeline#to_s with options and w/o .run() returns OK' do
|
77
77
|
@p.commands << BioDSL::Command.new('read_fasta', nil, input: 'test.fna')
|
78
|
-
expected = %(
|
78
|
+
expected = %(BD.new.read_fasta(input: "test.fna"))
|
79
79
|
assert_equal(expected, @p.to_s)
|
80
80
|
end
|
81
81
|
|
82
82
|
test 'BioDSL::Pipeline#to_s w/o options and .run() returns OK' do
|
83
83
|
@p.commands << BioDSL::Command.new('dump', nil, {})
|
84
84
|
@p.complete = true
|
85
|
-
expected = %(
|
85
|
+
expected = %(BD.new.dump.run)
|
86
86
|
assert_equal(expected, @p.run.to_s)
|
87
87
|
end
|
88
88
|
|
89
89
|
test 'BioDSL::Pipeline#to_s with options and .run() returns OK' do
|
90
90
|
@p.commands << BioDSL::Command.new('read_fasta', nil, input: 'test.fna')
|
91
91
|
@p.complete = true
|
92
|
-
expected = %{
|
92
|
+
expected = %{BD.new.read_fasta(input: "test.fna").run}
|
93
93
|
assert_equal(expected, @p.run.to_s)
|
94
94
|
end
|
95
95
|
|
@@ -113,13 +113,13 @@ class PipelineTest < Test::Unit::TestCase
|
|
113
113
|
|
114
114
|
test 'BioDSL::Pipeline#+ of two Pipelines return correctly' do
|
115
115
|
p = BioDSL::Pipeline.new.dump(first: 2)
|
116
|
-
assert_equal('
|
116
|
+
assert_equal('BD.new.dump(first: 2)', (@p + p).to_s)
|
117
117
|
end
|
118
118
|
|
119
119
|
test 'BioDSL::Pipeline#+ of three Pipelines return correctly' do
|
120
120
|
p1 = BioDSL::Pipeline.new.dump(first: 2)
|
121
121
|
p2 = BioDSL::Pipeline.new.dump(last: 3)
|
122
|
-
assert_equal('
|
122
|
+
assert_equal('BD.new.dump(first: 2).dump(last: 3)', (@p + p1 + p2).to_s)
|
123
123
|
end
|
124
124
|
|
125
125
|
test 'BioDSL::Pipeline#pop decreases size' do
|
data/test/helper.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: BioDSL
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Martin A. Hansen
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-11-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: haml
|
@@ -413,7 +413,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
413
413
|
version: '0'
|
414
414
|
requirements: []
|
415
415
|
rubyforge_project: BioDSL
|
416
|
-
rubygems_version: 2.4.
|
416
|
+
rubygems_version: 2.4.8
|
417
417
|
signing_key:
|
418
418
|
specification_version: 4
|
419
419
|
summary: BioDSL
|