RubyGems - lederhosen - Versions diffs - 0.1.1 → 0.1.2 - Mend

lederhosen 0.1.1 → 0.1.2

Files changed (5) hide show

data/lib/lederhosen/tasks/name.rb CHANGED Viewed

@@ -19,9 +19,16 @@ module Lederhosen
       # run blat/blast
       cmd = [
-      'blat',
+        'blat',
+        database,
+        reps,
+        '-t=dna',
+        '-q=dna',
+        '-out=blast8',
+        output
       ]
+      exec cmd.join(' ')
     end

data/lib/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Lederhosen
-  VERSION = '0.1.1'
+  VERSION = '0.1.2'
 end

data/pipeline.sh CHANGED Viewed

@@ -1,37 +1,71 @@
 #!/bin/bash
-set +e
 # An example OTU clustering pipeline
 # Austin G. Davis-Richardson
 # <harekrishna at gmail dot com>
+# http://github.com/audy/lederhosen
+set -e
-raw_reads='raw_reads/*.txt'
-identities='0.975'
+raw_reads='spec/data/*.txt'
 out_dir='pipeline'
+taxcollector='taxcollector.fa'
+min_reads=50
+min_samples=10
 # trim reads
-bin/lederhosen trim --reads-dir=$raw_reads --out-dir=$out_dir/trimmed
+bin/lederhosen trim \
+               --reads-dir=$raw_reads \
+               --out-dir=$out_dir/trimmed
 # join reads
-bin/lederhosen join --trimmed=$out_dir/trimmed/*.fasta --output=$out_dir/joined.fasta
+bin/lederhosen join \
+               --trimmed=$out_dir/trimmed/*.fasta \
+               --output=$out_dir/joined.fasta
 # filter reads
-bin/lederhosen filter --input=$out_dir/joined.fasta --output=$out_dir/filtered.fasta -k=10 --cutoff=50
+bin/lederhosen k_filter \
+               --input=$out_dir/joined.fasta \
+               --output=$out_dir/filtered.fasta \
+               -k=10 \
+               --cutoff=50
 # sort
-bin/lederhosen sort --input=$out_dir/filtered.fasta --output=$out_dir/sorted.fasta
+bin/lederhosen sort \
+               --input=$out_dir/filtered.fasta \
+               --output=$out_dir/sorted.fasta
-# cluster
-for i in $identities
+for i in 0.80 0.90 0.95
 do
-    bin/lederhosen cluster --input=$out_dir/sorted.fasta --output=$out_dir/clusters_"$i"_.uc --identity=$i
-done
+    # cluster
+    bin/lederhosen cluster \
+                   --input=$out_dir/sorted.fasta \
+                   --output=$out_dir/clusters_"$i".uc \
+                   --identity=$i
-# generate otu tables
-for i in $identities
-do
-    bin/lederhosen otu_table --clusters=$out_dir/clusters_"$i"_.uc --output=$out_dir/otus_"$i"
+    # filter uc file
+    bin/lederhosen uc_filter \
+                   --input=$out_dir/clusters_"$i".uc \
+                   --output=$out_dir/clusters_"$i".uc.filtered \
+                   --reads=$min_reads \
+                   --samples=$min_samples \
+    # generate otu table
+    bin/lederhosen otu_table \
+                   --clusters=$out_dir/clusters_"$i".uc.filtered \
+                   --output=$out_dir/otus_"$i"
+    # get representative reads
+    bin/lederhosen rep_reads \
+                   --clusters=$out_dir/clusters_"$i".uc.filtered \
+                   --joined=$out_dir/sorted.fasta \
+                   --output=$out_dir/representatives_"$i".fasta
+    # blast representative reads
+    bin/lederhosen name \
+                   --reps=$out_dir/representatives_"$i".fasta \
+                   --output=$out_dir/taxonomies_"$i".txt \
+                   --database=$taxcollector
 done
 echo "complete!"

data/readme.md CHANGED Viewed

@@ -16,44 +16,15 @@ Cluster raw Illumina 16S rRNA amplicon data to generate OTUs. Use at your own ri
 Type `lederhosen help` for complete instructions
-### 1. Trim raw reads
-`$ lederhosen trim --reads-dir=reads-dir/*.txt --out-dir=trimmed`
-### 2. Join trimmed reads
-`$ lederhosen join --trimmed=trimmed/*.fasta --output=joined.fasta`
-### 3. Sort trimmed reads
-`$ lederhosen sort --input=joined.fasta --output=sorted.fasta`
-### 4. Cluster sorted reads
-`$ lederhosen cluster --identity=0.975 --input=sorted.fasta --output=clusters`
-### 5. Make OTU tables
-`% lederhosen otu_table --clusters=clusters.uc --output=clusters_975.csv`
-This will output a csv (`clusters.975.csv`) and a fasta (`clusters.975.fasta`) file. The fasta file can be used to identify clusters in a 16S rRNA database using BLAST or something.
-### 6. Get representative reads from each cluster
-`% lederhosen rep_reads --clusters=clusters.uc --joined=joined.fasta --output=representatives.fasta`
-### 6. Get a fasta file containing all reads for each cluster
-(time consuming and probably not necessary)
-`% lederhosen split --clusters=clusters_97.5.txt --reads=joined.fasta --min-clst-size=100`
-`--min-clst-size` is the minimum reads a cluster must have in order to for a fasta file containing its reads to be created. The reason for needing this because it is computationally prohibitive to randomly write millions of files or store all reads in memory, sort, and output non-randomly.
-### 7. Identifying Clusters
-(Still under development)
-You need BLAT (in your `$PATH`) & TaxCollector.
-`$ lederhosen name --reps=representatives.fasta --db=taxcollector.fa --output=output_prefix`
+See pipeline.sh for example usage.
+## Features
+- Sequence trimming (paired-end Illumina).
+- K-mer filtering.
+- Clustering w/ UCLUST.
+- UCLUST output filtering.
+- Separation of representative reads.
+- Separation of all reads belonging to each cluster.
+- Identification of clusters using TaxCollector.
+- Generation of OTU abundancy matrices.

metadata CHANGED Viewed

@@ -1,13 +1,13 @@
 --- !ruby/object:Gem::Specification
 name: lederhosen
 version: !ruby/object:Gem::Version
-  hash: 25
+  hash: 31
   prerelease:
   segments:
   - 0
   - 1
-  - 1
-  version: 0.1.1
+  - 2
+  version: 0.1.2
 platform: ruby
 authors:
 - Austin G. Davis-Richardson
@@ -15,7 +15,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2012-05-22 00:00:00 Z
+date: 2012-05-23 00:00:00 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: dna