lederhosen 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -19,9 +19,16 @@ module Lederhosen
19
19
 
20
20
  # run blat/blast
21
21
  cmd = [
22
- 'blat',
23
-
22
+ 'blat',
23
+ database,
24
+ reps,
25
+ '-t=dna',
26
+ '-q=dna',
27
+ '-out=blast8',
28
+ output
24
29
  ]
30
+
31
+ exec cmd.join(' ')
25
32
 
26
33
  end
27
34
 
data/lib/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Lederhosen
2
- VERSION = '0.1.1'
2
+ VERSION = '0.1.2'
3
3
  end
data/pipeline.sh CHANGED
@@ -1,37 +1,71 @@
1
1
  #!/bin/bash
2
2
 
3
- set +e
4
-
5
3
  # An example OTU clustering pipeline
6
4
  # Austin G. Davis-Richardson
7
5
  # <harekrishna at gmail dot com>
6
+ # http://github.com/audy/lederhosen
7
+
8
+ set -e
8
9
 
9
- raw_reads='raw_reads/*.txt'
10
- identities='0.975'
10
+ raw_reads='spec/data/*.txt'
11
11
  out_dir='pipeline'
12
+ taxcollector='taxcollector.fa'
13
+ min_reads=50
14
+ min_samples=10
12
15
 
13
16
  # trim reads
14
- bin/lederhosen trim --reads-dir=$raw_reads --out-dir=$out_dir/trimmed
17
+ bin/lederhosen trim \
18
+ --reads-dir=$raw_reads \
19
+ --out-dir=$out_dir/trimmed
15
20
 
16
21
  # join reads
17
- bin/lederhosen join --trimmed=$out_dir/trimmed/*.fasta --output=$out_dir/joined.fasta
22
+ bin/lederhosen join \
23
+ --trimmed=$out_dir/trimmed/*.fasta \
24
+ --output=$out_dir/joined.fasta
18
25
 
19
26
  # filter reads
20
- bin/lederhosen filter --input=$out_dir/joined.fasta --output=$out_dir/filtered.fasta -k=10 --cutoff=50
27
+ bin/lederhosen k_filter \
28
+ --input=$out_dir/joined.fasta \
29
+ --output=$out_dir/filtered.fasta \
30
+ -k=10 \
31
+ --cutoff=50
21
32
 
22
33
  # sort
23
- bin/lederhosen sort --input=$out_dir/filtered.fasta --output=$out_dir/sorted.fasta
34
+ bin/lederhosen sort \
35
+ --input=$out_dir/filtered.fasta \
36
+ --output=$out_dir/sorted.fasta
24
37
 
25
- # cluster
26
- for i in $identities
38
+ for i in 0.80 0.90 0.95
27
39
  do
28
- bin/lederhosen cluster --input=$out_dir/sorted.fasta --output=$out_dir/clusters_"$i"_.uc --identity=$i
29
- done
40
+ # cluster
41
+ bin/lederhosen cluster \
42
+ --input=$out_dir/sorted.fasta \
43
+ --output=$out_dir/clusters_"$i".uc \
44
+ --identity=$i
30
45
 
31
- # generate otu tables
32
- for i in $identities
33
- do
34
- bin/lederhosen otu_table --clusters=$out_dir/clusters_"$i"_.uc --output=$out_dir/otus_"$i"
46
+ # filter uc file
47
+ bin/lederhosen uc_filter \
48
+ --input=$out_dir/clusters_"$i".uc \
49
+ --output=$out_dir/clusters_"$i".uc.filtered \
50
+ --reads=$min_reads \
51
+ --samples=$min_samples \
52
+
53
+ # generate otu table
54
+ bin/lederhosen otu_table \
55
+ --clusters=$out_dir/clusters_"$i".uc.filtered \
56
+ --output=$out_dir/otus_"$i"
57
+
58
+ # get representative reads
59
+ bin/lederhosen rep_reads \
60
+ --clusters=$out_dir/clusters_"$i".uc.filtered \
61
+ --joined=$out_dir/sorted.fasta \
62
+ --output=$out_dir/representatives_"$i".fasta
63
+
64
+ # blast representative reads
65
+ bin/lederhosen name \
66
+ --reps=$out_dir/representatives_"$i".fasta \
67
+ --output=$out_dir/taxonomies_"$i".txt \
68
+ --database=$taxcollector
35
69
  done
36
70
 
37
71
  echo "complete!"
data/readme.md CHANGED
@@ -16,44 +16,15 @@ Cluster raw Illumina 16S rRNA amplicon data to generate OTUs. Use at your own ri
16
16
 
17
17
  Type `lederhosen help` for complete instructions
18
18
 
19
- ### 1. Trim raw reads
20
-
21
- `$ lederhosen trim --reads-dir=reads-dir/*.txt --out-dir=trimmed`
22
-
23
- ### 2. Join trimmed reads
24
-
25
- `$ lederhosen join --trimmed=trimmed/*.fasta --output=joined.fasta`
26
-
27
- ### 3. Sort trimmed reads
28
-
29
- `$ lederhosen sort --input=joined.fasta --output=sorted.fasta`
30
-
31
- ### 4. Cluster sorted reads
32
-
33
- `$ lederhosen cluster --identity=0.975 --input=sorted.fasta --output=clusters`
34
-
35
- ### 5. Make OTU tables
36
-
37
- `% lederhosen otu_table --clusters=clusters.uc --output=clusters_975.csv`
38
-
39
- This will output a csv (`clusters.975.csv`) and a fasta (`clusters.975.fasta`) file. The fasta file can be used to identify clusters in a 16S rRNA database using BLAST or something.
40
-
41
- ### 6. Get representative reads from each cluster
42
-
43
- `% lederhosen rep_reads --clusters=clusters.uc --joined=joined.fasta --output=representatives.fasta`
44
-
45
- ### 6. Get a fasta file containing all reads for each cluster
46
-
47
- (time consuming and probably not necessary)
48
-
49
- `% lederhosen split --clusters=clusters_97.5.txt --reads=joined.fasta --min-clst-size=100`
50
-
51
- `--min-clst-size` is the minimum reads a cluster must have in order to for a fasta file containing its reads to be created. The reason for needing this because it is computationally prohibitive to randomly write millions of files or store all reads in memory, sort, and output non-randomly.
52
-
53
- ### 7. Identifying Clusters
54
-
55
- (Still under development)
56
-
57
- You need BLAT (in your `$PATH`) & TaxCollector.
58
-
59
- `$ lederhosen name --reps=representatives.fasta --db=taxcollector.fa --output=output_prefix`
19
+ See pipeline.sh for example usage.
20
+
21
+ ## Features
22
+
23
+ - Sequence trimming (paired-end Illumina).
24
+ - K-mer filtering.
25
+ - Clustering w/ UCLUST.
26
+ - UCLUST output filtering.
27
+ - Separation of representative reads.
28
+ - Separation of all reads belonging to each cluster.
29
+ - Identification of clusters using TaxCollector.
30
+ - Generation of OTU abundancy matrices.
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lederhosen
3
3
  version: !ruby/object:Gem::Version
4
- hash: 25
4
+ hash: 31
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 1
9
- - 1
10
- version: 0.1.1
9
+ - 2
10
+ version: 0.1.2
11
11
  platform: ruby
12
12
  authors:
13
13
  - Austin G. Davis-Richardson
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2012-05-22 00:00:00 Z
18
+ date: 2012-05-23 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  name: dna