lederhosen 1.6.2 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +1 -1
- data/lederhosen.gemspec +6 -6
- data/lib/lederhosen/tasks/trim.rb +6 -6
- data/lib/lederhosen/trimmer.rb +152 -52
- data/lib/lederhosen/version.rb +3 -3
- data/readme.md +42 -8
- data/spec/data/trimmed_sizes.txt +100 -0
- data/spec/trimmer_spec.rb +41 -3
- metadata +8 -8
- data/.rvmrc +0 -1
data/Gemfile
CHANGED
data/lederhosen.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "lederhosen"
|
8
|
-
s.version = "1.
|
8
|
+
s.version = "1.7.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Austin G. Davis-Richardson"]
|
12
|
-
s.date = "2012-12-
|
12
|
+
s.date = "2012-12-19"
|
13
13
|
s.description = "Various tools for OTU clustering"
|
14
14
|
s.email = "harekrishna@gmail.com"
|
15
15
|
s.executables = ["lederhosen"]
|
@@ -18,7 +18,6 @@ Gem::Specification.new do |s|
|
|
18
18
|
]
|
19
19
|
s.files = [
|
20
20
|
".rspec",
|
21
|
-
".rvmrc",
|
22
21
|
"Gemfile",
|
23
22
|
"LICENSE.txt",
|
24
23
|
"Rakefile",
|
@@ -46,6 +45,7 @@ Gem::Specification.new do |s|
|
|
46
45
|
"spec/data/ILT_L_9_B_002_3.txt.gz",
|
47
46
|
"spec/data/example.fastq",
|
48
47
|
"spec/data/test.uc",
|
48
|
+
"spec/data/trimmed_sizes.txt",
|
49
49
|
"spec/no_tasks_spec.rb",
|
50
50
|
"spec/spec_helper.rb",
|
51
51
|
"spec/trimmer_spec.rb"
|
@@ -60,14 +60,14 @@ Gem::Specification.new do |s|
|
|
60
60
|
s.specification_version = 3
|
61
61
|
|
62
62
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
63
|
-
s.add_runtime_dependency(%q<dna>, ["
|
63
|
+
s.add_runtime_dependency(%q<dna>, [">= 0"])
|
64
64
|
s.add_runtime_dependency(%q<progressbar>, ["= 0.12.0"])
|
65
65
|
s.add_runtime_dependency(%q<thor>, ["= 0.16.0"])
|
66
66
|
s.add_development_dependency(%q<rdoc>, ["~> 3.12"])
|
67
67
|
s.add_development_dependency(%q<jeweler>, ["= 1.8.4"])
|
68
68
|
s.add_development_dependency(%q<ruby-prof>, ["= 0.11.2"])
|
69
69
|
else
|
70
|
-
s.add_dependency(%q<dna>, ["
|
70
|
+
s.add_dependency(%q<dna>, [">= 0"])
|
71
71
|
s.add_dependency(%q<progressbar>, ["= 0.12.0"])
|
72
72
|
s.add_dependency(%q<thor>, ["= 0.16.0"])
|
73
73
|
s.add_dependency(%q<rdoc>, ["~> 3.12"])
|
@@ -75,7 +75,7 @@ Gem::Specification.new do |s|
|
|
75
75
|
s.add_dependency(%q<ruby-prof>, ["= 0.11.2"])
|
76
76
|
end
|
77
77
|
else
|
78
|
-
s.add_dependency(%q<dna>, ["
|
78
|
+
s.add_dependency(%q<dna>, [">= 0"])
|
79
79
|
s.add_dependency(%q<progressbar>, ["= 0.12.0"])
|
80
80
|
s.add_dependency(%q<thor>, ["= 0.16.0"])
|
81
81
|
s.add_dependency(%q<rdoc>, ["~> 3.12"])
|
@@ -10,16 +10,16 @@ module Lederhosen
|
|
10
10
|
desc "trim",
|
11
11
|
"trim reads based on quality scores"
|
12
12
|
|
13
|
-
method_option :reads_dir,
|
14
|
-
method_option :out_dir,
|
15
|
-
method_option :
|
16
|
-
method_option :read_type,
|
13
|
+
method_option :reads_dir, :type => :string, :required => true
|
14
|
+
method_option :out_dir, :type => :string, :required => true
|
15
|
+
method_option :left_trim, :type => :numeric, :default => 0
|
16
|
+
method_option :read_type, :type => :string, :default => 'qseq'
|
17
17
|
method_option :min_length, :type => :numeric, :default => 75
|
18
18
|
|
19
19
|
def trim
|
20
20
|
raw_reads = options[:reads_dir]
|
21
21
|
out_dir = options[:out_dir]
|
22
|
-
|
22
|
+
left_trim = options[:left_trim]
|
23
23
|
read_type = options[:read_type]
|
24
24
|
min_length = options[:min_length]
|
25
25
|
|
@@ -48,7 +48,7 @@ module Lederhosen
|
|
48
48
|
out = File.join(out_dir, "#{File.basename(prefix)}.fasta")
|
49
49
|
|
50
50
|
# create the trimmed sequence generator
|
51
|
-
trim_args = { :
|
51
|
+
trim_args = { :left_trim => left_trim, :min_length => min_length }
|
52
52
|
|
53
53
|
trimmer =
|
54
54
|
if read_type == 'qseq'
|
data/lib/lederhosen/trimmer.rb
CHANGED
@@ -1,56 +1,39 @@
|
|
1
1
|
module Lederhosen
|
2
2
|
module Trimmer
|
3
3
|
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
4
|
+
##
|
5
|
+
# Code used for sequence trimming
|
6
|
+
#
|
7
|
+
# - PairedTrimmer
|
8
|
+
# - HuangTrimmer
|
9
|
+
# - ProbabilityTrimmer
|
10
|
+
# - QSEQTrimmer
|
11
|
+
#
|
12
|
+
# Some major refactoring needs to get done here
|
13
|
+
#
|
14
14
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
next
|
31
|
-
end
|
32
|
-
end
|
33
|
-
end
|
15
|
+
# HaungTrimmer
|
16
|
+
#
|
17
|
+
# class that has the trim function. Used in mixins
|
18
|
+
# this trim function is based on the function documented
|
19
|
+
# in the paper:
|
20
|
+
# Huang X, Wang J, Aluru S, Yang SP, Hillier L. (2003). PCAP:
|
21
|
+
# a whole-genome assembly program. Genome Res 13:
|
22
|
+
# 2164–2170.
|
23
|
+
#
|
24
|
+
# The implementation is a direct copy from the perl implementation
|
25
|
+
# implemented in Pangea 1.0:
|
26
|
+
# PANGEA: pipeline for analysis of next generation amplicons
|
27
|
+
# A Giongo, DB Crabb, AG Davis-Richardson - ISME , 2010
|
28
|
+
#
|
29
|
+
class HuangTrimmer
|
34
30
|
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
s.reverse.tr('GATCNgatcn','CTAGNctagn')
|
31
|
+
def initialize(args={})
|
32
|
+
@min = args[:min]
|
33
|
+
@offset = args[:offset]
|
39
34
|
end
|
40
35
|
|
41
|
-
|
42
|
-
# so you can use it if you don't want to initialize a PairedTrimmer
|
43
|
-
def trim_seq(dna, args={})
|
44
|
-
|
45
|
-
# trim primers off of sequence
|
46
|
-
# XXX this is experiment-specific and needs to be made
|
47
|
-
# into a parameter
|
48
|
-
if @pretrim
|
49
|
-
dna.sequence = dna.sequence[@pretrim..-1]
|
50
|
-
dna.quality = dna.quality[@pretrim..-1]
|
51
|
-
end
|
52
|
-
|
53
|
-
dna.sequence.gsub! '.', 'N'
|
36
|
+
def trim_seq(dna)
|
54
37
|
|
55
38
|
_sum, _max, first, last, start, _end = 0, 0, 0, 0, 0
|
56
39
|
|
@@ -66,14 +49,130 @@ class PairedTrimmer < Enumerator
|
|
66
49
|
end
|
67
50
|
end
|
68
51
|
|
69
|
-
|
52
|
+
begin
|
53
|
+
dna.sequence[start, _end - start].gsub('.', 'N')
|
54
|
+
rescue
|
55
|
+
nil
|
56
|
+
end
|
70
57
|
end
|
58
|
+
end
|
71
59
|
|
60
|
+
#
|
61
|
+
# return the longest string starting from the left side
|
62
|
+
# where the PROBABILITY OF ERROR as computed from the PHRED
|
63
|
+
# scores does not go above a certain cutoff
|
64
|
+
# (default is 0.005)
|
65
|
+
#
|
66
|
+
class ProbabilityTrimmer
|
67
|
+
|
68
|
+
def initialize(args = {})
|
69
|
+
@cutoff = args[:cutoff] || 0.005
|
70
|
+
@min = args[:min]
|
71
|
+
@seqtech = args[:seq_tech] || fail
|
72
|
+
# must be illumina, sanger or solexa
|
73
|
+
end
|
74
|
+
|
75
|
+
def trim_seq(dna)
|
76
|
+
trim_coord = dna.sequence.size
|
77
|
+
probabilities = dna.send(:"#{@seqtech}_probabilities")
|
78
|
+
probabilities.each_with_index do |q, i|
|
79
|
+
if q > @cutoff
|
80
|
+
trim_coord = i
|
81
|
+
break
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
begin
|
86
|
+
dna.sequence[0..trim_coord].gsub('.', 'N')
|
87
|
+
rescue
|
88
|
+
nil
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
#
|
94
|
+
# Base class for trimming paired-end reads
|
95
|
+
#
|
96
|
+
class PairedTrimmer < Enumerator
|
97
|
+
|
98
|
+
def initialize(args = {})
|
99
|
+
@pretrim = args[:pretrim]
|
100
|
+
# TODO
|
101
|
+
# need to be able to trim from left, right of pairs
|
102
|
+
# thinking about specifying a "trimming language"
|
103
|
+
#
|
104
|
+
# Something like:
|
105
|
+
#
|
106
|
+
# --trim="5L0 0L3"
|
107
|
+
# --trim="0L4 2L6"
|
108
|
+
#
|
109
|
+
# also thinking about breaking all of this trimming stuff
|
110
|
+
# out into its own package. (to be more unixy and stuff ;)
|
111
|
+
#
|
112
|
+
@min_length = args[:min_length] || 70
|
113
|
+
@min = args[:min] || 20
|
114
|
+
@offset = args[:cutoff] || 64 # XXX should both be called 'cutoff'
|
115
|
+
@left_trim = args[:left_trim] || 0 # trim adapter sequence
|
116
|
+
@skip_ambig = args[:skip_ambiguous] || false
|
117
|
+
@trimmer = args[:trimmer] || ProbabilityTrimmer.new(:min => @min,
|
118
|
+
:offset => @offset,
|
119
|
+
:seq_tech =>
|
120
|
+
:illumina)
|
121
|
+
end
|
122
|
+
|
123
|
+
def each(&block)
|
124
|
+
|
125
|
+
skipped_because_singleton = 0
|
126
|
+
skipped_because_length = 0
|
127
|
+
skipped_because_ambig = 0
|
128
|
+
|
129
|
+
@paired_iterator.each_with_index do |a, i|
|
130
|
+
seqa = @trimmer.trim_seq(a[0])[@left_trim..-1] rescue nil # trim adapter sequence
|
131
|
+
seqb = @trimmer.trim_seq a[1]
|
132
|
+
|
133
|
+
# make sure sequences are good
|
134
|
+
# (both pairs survived and both are at least min_length long)
|
135
|
+
# optionally skip reads that contain ambiguous nucleotides (N)
|
136
|
+
if [seqa, seqb].include? nil
|
137
|
+
skipped_because_singleton += 1
|
138
|
+
elsif !(seqb.length >= @min_length && seqa.length >= @min_length)
|
139
|
+
skipped_because_length += 1
|
140
|
+
elsif @skip_ambig and (seqb =~ /N/ or seqa =~ /N/)
|
141
|
+
skipped_because_ambig
|
142
|
+
else # reads are good
|
143
|
+
#
|
144
|
+
# TODO
|
145
|
+
# this is experiment specific. I save memory down the road
|
146
|
+
# by having both of the reads in the forward orientation
|
147
|
+
# but depending on the sequencing technology/pipeline
|
148
|
+
# this may change.
|
149
|
+
#
|
150
|
+
# I'm planning on removing the trimming steps from lederhosen
|
151
|
+
# for their own gem. With that, this will go too.
|
152
|
+
#
|
153
|
+
seqb = reverse_complement(seqb)
|
154
|
+
|
155
|
+
# Create and yield new fasta objects
|
156
|
+
# Perhaps this is slow?
|
157
|
+
a = Fasta.new :name => "#{i}:0", :sequence => seqa
|
158
|
+
b = Fasta.new :name => "#{i}:1", :sequence => seqb
|
159
|
+
block.yield a
|
160
|
+
block.yield b
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
# reverse complement a DNA sequence
|
166
|
+
# assumes only GATCN nucleotides
|
167
|
+
def reverse_complement(s)
|
168
|
+
s.reverse.tr('GATCNgatcn','CTAGNctagn')
|
169
|
+
end
|
72
170
|
end
|
73
171
|
|
74
172
|
#
|
75
173
|
# Yields trimmed fasta records given an input
|
76
174
|
# interleaved, paired-end fastq file
|
175
|
+
#
|
77
176
|
class InterleavedTrimmer < PairedTrimmer
|
78
177
|
|
79
178
|
def initialize(interleaved_file, args = {})
|
@@ -88,15 +187,16 @@ class InterleavedTrimmer < PairedTrimmer
|
|
88
187
|
end
|
89
188
|
|
90
189
|
reads = Dna.new handle
|
91
|
-
|
92
|
-
|
93
|
-
super(iterator, args)
|
190
|
+
@paired_iterator = reads.each_slice(2)
|
94
191
|
|
192
|
+
super(args)
|
95
193
|
end
|
96
194
|
end
|
97
195
|
|
196
|
+
#
|
98
197
|
# Yield trimmed fasta records given an two separate
|
99
198
|
# paired QSEQ files
|
199
|
+
#
|
100
200
|
class QSEQTrimmer < PairedTrimmer
|
101
201
|
def initialize(left_file, right_file, args = {})
|
102
202
|
# create an iterator that yields paired records
|
@@ -112,9 +212,9 @@ class QSEQTrimmer < PairedTrimmer
|
|
112
212
|
left_file_reads = Dna.new left_handle
|
113
213
|
right_reads = Dna.new right_handle
|
114
214
|
|
115
|
-
|
215
|
+
@paired_iterator = left_file_reads.zip(right_reads)
|
116
216
|
|
117
|
-
super(
|
217
|
+
super(args)
|
118
218
|
|
119
219
|
left_handle.close
|
120
220
|
right_handle.close
|
data/lib/lederhosen/version.rb
CHANGED
data/readme.md
CHANGED
@@ -2,11 +2,18 @@
|
|
2
2
|
|
3
3
|
# Lederhosen
|
4
4
|
|
5
|
-
|
5
|
+
Lederhosen is a set of tools for OTU clustering rRNA amplicons using Robert Edgar's USEARCH.
|
6
6
|
|
7
|
-
|
7
|
+
It handles quality control of raw sequence data, running USEARCH, and creating and filtering tables.
|
8
8
|
|
9
|
-
|
9
|
+
Lederhosen is not a pipeline but rather a set of tools broken up into tasks. Tasks are invoked by running `lederhosen TASK ...`.
|
10
|
+
|
11
|
+
Lederhosen is designed with the following "pipeline" in mind:
|
12
|
+
|
13
|
+
1. Quality control of sequence data.
|
14
|
+
2. Clustering sequences to centroid or reference sequences (read: database)
|
15
|
+
3. Generating tables from USEARCH output.
|
16
|
+
4. Filtering tables to remove small or insignificant OTUs.
|
10
17
|
|
11
18
|
### About
|
12
19
|
|
@@ -20,13 +27,18 @@ using paired and non-paired end short reads such as those produced by Illumina (
|
|
20
27
|
### Features
|
21
28
|
|
22
29
|
- Sequence trimming (paired-end Illumina).
|
23
|
-
- Parallel, referenced-based clustering to TaxCollector using USEARCH
|
30
|
+
- Parallel, referenced-based clustering to TaxCollector using USEARCH.
|
31
|
+
- Queue-agnostic support for running jobs on clusters.
|
32
|
+
- Support for RDP, TaxCollector or GreenGenes databases.
|
24
33
|
- Generation and filtering of OTU abundancy matrices.
|
25
34
|
|
26
35
|
### Installation
|
27
36
|
|
28
37
|
0. Obtain & Install [USEARCH](http://www.drive5.com/) (32bit is fine for non-commercial use)
|
29
|
-
2. Get a
|
38
|
+
2. Get a database:
|
39
|
+
- [TaxCollector](http://github.com/audy/taxcollector)
|
40
|
+
- [GreenGenes](http://greengenes.lbl.gov) 16S database
|
41
|
+
- File an [issue report](https://github.com/audy/lederhosen/issues) or pull request ;) to request support for a different database.
|
30
42
|
3. Install Lederhosen by typing:
|
31
43
|
|
32
44
|
`sudo gem install lederhosen`
|
@@ -48,7 +60,9 @@ You can also trim interleaved, paired-end FASTQ files:
|
|
48
60
|
|
49
61
|
lederhosen trim --reads_dir=reads/*.fastq --out_dir=trimmed/ read-type='fastq'
|
50
62
|
|
51
|
-
|
63
|
+
Lederhosen will also trim off adapter sequences from the 5' end of the "left" read with the `--left-trim` option.
|
64
|
+
|
65
|
+
lederhosen trim --reads_dir=reads/*.fastq --out_dir=trimed/ --read-type='fastq' --left-trim=11
|
52
66
|
|
53
67
|
### Create Database
|
54
68
|
|
@@ -94,8 +108,28 @@ This will create the files:
|
|
94
108
|
You can get the representative sequences for each cluster using the `get_reps` tasks. This will extract the representative sequence from
|
95
109
|
the __database__ you ran usearch with. Make sure you use the same database that you used when running usearch.
|
96
110
|
|
97
|
-
|
111
|
+
```bash
|
112
|
+
lederhosen get_reps \
|
113
|
+
--input=clusters.uc \
|
114
|
+
--database=taxcollector.fa \
|
115
|
+
--output=representatives.fasta
|
116
|
+
```
|
98
117
|
|
99
118
|
You can get the representatives from more than one cluster file using a glob:
|
100
119
|
|
101
|
-
|
120
|
+
```bash
|
121
|
+
lederhosen get_reps \
|
122
|
+
--input=*.uc \
|
123
|
+
--database=taxcollector.fa \
|
124
|
+
--output=representatives.fasta
|
125
|
+
```
|
126
|
+
|
127
|
+
## Acknowledgements
|
128
|
+
|
129
|
+
- Lexi, Vinnie and Kevin for beta-testing and putting up with bugs
|
130
|
+
- The QIIME project for inspiration
|
131
|
+
- Sinbad Richardson for the Lederhosen Guy artwork
|
132
|
+
|
133
|
+
## Please Cite
|
134
|
+
|
135
|
+
Please cite this GitHub repo (https://github.com/audy/lederhosen) with the version you used (type `lederhosen version`) unless I publish a paper. Then cite that.
|
@@ -0,0 +1,100 @@
|
|
1
|
+
99
|
2
|
+
91
|
3
|
+
100
|
4
|
+
85
|
5
|
+
100
|
6
|
+
91
|
7
|
+
100
|
8
|
+
81
|
9
|
+
100
|
10
|
+
81
|
11
|
+
100
|
12
|
+
91
|
13
|
+
100
|
14
|
+
91
|
15
|
+
100
|
16
|
+
91
|
17
|
+
100
|
18
|
+
91
|
19
|
+
100
|
20
|
+
91
|
21
|
+
100
|
22
|
+
91
|
23
|
+
100
|
24
|
+
91
|
25
|
+
92
|
26
|
+
91
|
27
|
+
100
|
28
|
+
91
|
29
|
+
99
|
30
|
+
91
|
31
|
+
100
|
32
|
+
91
|
33
|
+
100
|
34
|
+
81
|
35
|
+
100
|
36
|
+
81
|
37
|
+
100
|
38
|
+
91
|
39
|
+
100
|
40
|
+
91
|
41
|
+
100
|
42
|
+
91
|
43
|
+
99
|
44
|
+
91
|
45
|
+
100
|
46
|
+
91
|
47
|
+
100
|
48
|
+
91
|
49
|
+
100
|
50
|
+
81
|
51
|
+
100
|
52
|
+
91
|
53
|
+
98
|
54
|
+
91
|
55
|
+
40
|
56
|
+
91
|
57
|
+
96
|
58
|
+
91
|
59
|
+
35
|
60
|
+
81
|
61
|
+
100
|
62
|
+
91
|
63
|
+
100
|
64
|
+
91
|
65
|
+
100
|
66
|
+
91
|
67
|
+
100
|
68
|
+
91
|
69
|
+
100
|
70
|
+
91
|
71
|
+
100
|
72
|
+
91
|
73
|
+
100
|
74
|
+
91
|
75
|
+
100
|
76
|
+
91
|
77
|
+
49
|
78
|
+
91
|
79
|
+
100
|
80
|
+
91
|
81
|
+
100
|
82
|
+
91
|
83
|
+
100
|
84
|
+
91
|
85
|
+
100
|
86
|
+
91
|
87
|
+
89
|
88
|
+
81
|
89
|
+
100
|
90
|
+
91
|
91
|
+
100
|
92
|
+
91
|
93
|
+
100
|
94
|
+
91
|
95
|
+
100
|
96
|
+
91
|
97
|
+
100
|
98
|
+
91
|
99
|
+
100
|
100
|
+
91
|
data/spec/trimmer_spec.rb
CHANGED
@@ -2,13 +2,49 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
describe Lederhosen::Trimmer do
|
4
4
|
|
5
|
-
describe Lederhosen::Trimmer::PairedTrimmer
|
5
|
+
describe Lederhosen::Trimmer::PairedTrimmer
|
6
|
+
|
7
|
+
describe Lederhosen::Trimmer::ProbabilityTrimmer do
|
8
|
+
let :sequence_trimmer do
|
9
|
+
# default cutoff should be 0.005
|
10
|
+
Lederhosen::Trimmer::ProbabilityTrimmer.new :seq_tech => :illumina
|
11
|
+
end
|
12
|
+
|
13
|
+
it 'can be created' do
|
14
|
+
sequence_trimmer.should_not be_nil
|
15
|
+
end
|
16
|
+
|
17
|
+
it 'trims records as expected'
|
18
|
+
|
19
|
+
end
|
20
|
+
|
21
|
+
describe Lederhosen::Trimmer::HuangTrimmer do
|
22
|
+
|
23
|
+
let :sequence_trimmer do
|
24
|
+
Lederhosen::Trimmer::HuangTrimmer.new(:offset => 64, :min => 20)
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'trims records as expected' do
|
28
|
+
|
29
|
+
trimmed_sizes = File.readlines('spec/data/trimmed_sizes.txt').map &:to_i
|
30
|
+
|
31
|
+
File.open('spec/data/example.fastq') do |handle|
|
32
|
+
records = Dna.new handle
|
33
|
+
records.each do |record|
|
34
|
+
trimmed_record = sequence_trimmer.trim_seq record
|
35
|
+
trimmed_record.size.should == trimmed_sizes.shift
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
6
39
|
|
7
40
|
end
|
8
41
|
|
9
42
|
describe Lederhosen::Trimmer::QSEQTrimmer do
|
10
43
|
|
11
|
-
let
|
44
|
+
let :qseq_trimmer do
|
45
|
+
Lederhosen::Trimmer::QSEQTrimmer.new 'spec/data/ILT_L_9_B_001_1.txt.gz',
|
46
|
+
'spec/data/ILT_L_9_B_001_3.txt.gz'
|
47
|
+
end
|
12
48
|
|
13
49
|
it 'can be initialized' do
|
14
50
|
qseq_trimmer.should_not be_nil
|
@@ -27,7 +63,9 @@ describe Lederhosen::Trimmer do
|
|
27
63
|
|
28
64
|
describe Lederhosen::Trimmer::InterleavedTrimmer do
|
29
65
|
|
30
|
-
let
|
66
|
+
let :interleaved_trimmer do
|
67
|
+
Lederhosen::Trimmer::InterleavedTrimmer.new 'spec/data/example.fastq'
|
68
|
+
end
|
31
69
|
|
32
70
|
it 'can be initialized' do
|
33
71
|
interleaved_trimmer.should_not be_nil
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lederhosen
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.7.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,24 +9,24 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-12-
|
12
|
+
date: 2012-12-19 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: dna
|
16
16
|
requirement: !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
|
-
- - '
|
19
|
+
- - ! '>='
|
20
20
|
- !ruby/object:Gem::Version
|
21
|
-
version: 0
|
21
|
+
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
24
|
version_requirements: !ruby/object:Gem::Requirement
|
25
25
|
none: false
|
26
26
|
requirements:
|
27
|
-
- - '
|
27
|
+
- - ! '>='
|
28
28
|
- !ruby/object:Gem::Version
|
29
|
-
version: 0
|
29
|
+
version: '0'
|
30
30
|
- !ruby/object:Gem::Dependency
|
31
31
|
name: progressbar
|
32
32
|
requirement: !ruby/object:Gem::Requirement
|
@@ -116,7 +116,6 @@ extra_rdoc_files:
|
|
116
116
|
- LICENSE.txt
|
117
117
|
files:
|
118
118
|
- .rspec
|
119
|
-
- .rvmrc
|
120
119
|
- Gemfile
|
121
120
|
- LICENSE.txt
|
122
121
|
- Rakefile
|
@@ -144,6 +143,7 @@ files:
|
|
144
143
|
- spec/data/ILT_L_9_B_002_3.txt.gz
|
145
144
|
- spec/data/example.fastq
|
146
145
|
- spec/data/test.uc
|
146
|
+
- spec/data/trimmed_sizes.txt
|
147
147
|
- spec/no_tasks_spec.rb
|
148
148
|
- spec/spec_helper.rb
|
149
149
|
- spec/trimmer_spec.rb
|
@@ -162,7 +162,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
162
162
|
version: '0'
|
163
163
|
segments:
|
164
164
|
- 0
|
165
|
-
hash: -
|
165
|
+
hash: -1116066410733680786
|
166
166
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
167
167
|
none: false
|
168
168
|
requirements:
|
data/.rvmrc
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
rvm use 1.8.7@lederhosen --create
|