miga-base 0.3.0.0 → 0.3.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +21 -4
- data/actions/init.rb +258 -0
- data/actions/run_local.rb +1 -2
- data/actions/test_taxonomy.rb +4 -1
- data/bin/miga +8 -1
- data/lib/miga/dataset.rb +4 -4
- data/lib/miga/dataset_result.rb +7 -4
- data/lib/miga/version.rb +2 -2
- data/scripts/_distances_noref_nomulti.bash +3 -1
- data/scripts/clade_finding.bash +1 -1
- data/scripts/init.bash +1 -1
- data/scripts/miga.bash +1 -1
- data/scripts/mytaxa.bash +78 -72
- data/scripts/mytaxa_scan.bash +67 -62
- data/scripts/ogs.bash +1 -1
- data/scripts/trimmed_fasta.bash +4 -3
- data/utils/enveomics/Examples/aai-matrix.bash +66 -0
- data/utils/enveomics/Examples/ani-matrix.bash +66 -0
- data/utils/enveomics/Examples/essential-phylogeny.bash +105 -0
- data/utils/enveomics/Examples/unus-genome-phylogeny.bash +100 -0
- data/utils/enveomics/LICENSE.txt +73 -0
- data/utils/enveomics/Makefile +52 -0
- data/utils/enveomics/Manifest/Tasks/aasubs.json +103 -0
- data/utils/enveomics/Manifest/Tasks/blasttab.json +703 -0
- data/utils/enveomics/Manifest/Tasks/distances.json +161 -0
- data/utils/enveomics/Manifest/Tasks/fasta.json +571 -0
- data/utils/enveomics/Manifest/Tasks/fastq.json +208 -0
- data/utils/enveomics/Manifest/Tasks/graphics.json +126 -0
- data/utils/enveomics/Manifest/Tasks/ogs.json +339 -0
- data/utils/enveomics/Manifest/Tasks/other.json +746 -0
- data/utils/enveomics/Manifest/Tasks/remote.json +355 -0
- data/utils/enveomics/Manifest/Tasks/sequence-identity.json +454 -0
- data/utils/enveomics/Manifest/Tasks/tables.json +308 -0
- data/utils/enveomics/Manifest/Tasks/trees.json +68 -0
- data/utils/enveomics/Manifest/Tasks/variants.json +111 -0
- data/utils/enveomics/Manifest/categories.json +132 -0
- data/utils/enveomics/Manifest/examples.json +154 -0
- data/utils/enveomics/Manifest/tasks.json +4 -0
- data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +69 -0
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +56 -0
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +60 -0
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +38 -0
- data/utils/enveomics/Pipelines/assembly.pbs/README.md +189 -0
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +112 -0
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +23 -0
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +44 -0
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +50 -0
- data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +37 -0
- data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +68 -0
- data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +49 -0
- data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +80 -0
- data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +57 -0
- data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +63 -0
- data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +38 -0
- data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +73 -0
- data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +21 -0
- data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +72 -0
- data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +98 -0
- data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +55 -0
- data/utils/enveomics/Pipelines/blast.pbs/README.md +127 -0
- data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +109 -0
- data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +128 -0
- data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +16 -0
- data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +22 -0
- data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +26 -0
- data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +89 -0
- data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +29 -0
- data/utils/enveomics/Pipelines/idba.pbs/README.md +49 -0
- data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +95 -0
- data/utils/enveomics/Pipelines/idba.pbs/run.pbs +56 -0
- data/utils/enveomics/Pipelines/trim.pbs/README.md +54 -0
- data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +70 -0
- data/utils/enveomics/Pipelines/trim.pbs/run.pbs +130 -0
- data/utils/enveomics/README.md +40 -0
- data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +171 -0
- data/utils/enveomics/Scripts/Aln.cat.rb +162 -0
- data/utils/enveomics/Scripts/Aln.convert.pl +35 -0
- data/utils/enveomics/Scripts/AlphaDiversity.pl +152 -0
- data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +102 -0
- data/utils/enveomics/Scripts/BlastTab.addlen.rb +61 -0
- data/utils/enveomics/Scripts/BlastTab.advance.bash +48 -0
- data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +55 -0
- data/utils/enveomics/Scripts/BlastTab.catsbj.pl +106 -0
- data/utils/enveomics/Scripts/BlastTab.cogCat.rb +76 -0
- data/utils/enveomics/Scripts/BlastTab.filter.pl +47 -0
- data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +194 -0
- data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +104 -0
- data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +157 -0
- data/utils/enveomics/Scripts/BlastTab.recplot2.R +40 -0
- data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +86 -0
- data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +119 -0
- data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +86 -0
- data/utils/enveomics/Scripts/BlastTab.subsample.pl +47 -0
- data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +114 -0
- data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +90 -0
- data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +101 -0
- data/utils/enveomics/Scripts/Chao1.pl +97 -0
- data/utils/enveomics/Scripts/CharTable.classify.rb +234 -0
- data/utils/enveomics/Scripts/EBIseq2tax.rb +83 -0
- data/utils/enveomics/Scripts/FastA.N50.pl +56 -0
- data/utils/enveomics/Scripts/FastA.filter.pl +52 -0
- data/utils/enveomics/Scripts/FastA.filterLen.pl +28 -0
- data/utils/enveomics/Scripts/FastA.filterN.pl +60 -0
- data/utils/enveomics/Scripts/FastA.fragment.rb +92 -0
- data/utils/enveomics/Scripts/FastA.gc.pl +42 -0
- data/utils/enveomics/Scripts/FastA.interpose.pl +87 -0
- data/utils/enveomics/Scripts/FastA.length.pl +38 -0
- data/utils/enveomics/Scripts/FastA.per_file.pl +36 -0
- data/utils/enveomics/Scripts/FastA.qlen.pl +57 -0
- data/utils/enveomics/Scripts/FastA.rename.pl +65 -0
- data/utils/enveomics/Scripts/FastA.revcom.pl +23 -0
- data/utils/enveomics/Scripts/FastA.slider.pl +85 -0
- data/utils/enveomics/Scripts/FastA.split.pl +55 -0
- data/utils/enveomics/Scripts/FastA.subsample.pl +131 -0
- data/utils/enveomics/Scripts/FastA.tag.rb +64 -0
- data/utils/enveomics/Scripts/FastA.wrap.rb +48 -0
- data/utils/enveomics/Scripts/FastQ.filter.pl +54 -0
- data/utils/enveomics/Scripts/FastQ.interpose.pl +90 -0
- data/utils/enveomics/Scripts/FastQ.offset.pl +90 -0
- data/utils/enveomics/Scripts/FastQ.split.pl +53 -0
- data/utils/enveomics/Scripts/FastQ.tag.rb +63 -0
- data/utils/enveomics/Scripts/FastQ.toFastA.awk +24 -0
- data/utils/enveomics/Scripts/GenBank.add_fields.rb +84 -0
- data/utils/enveomics/Scripts/HMM.essential.rb +254 -0
- data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +83 -0
- data/utils/enveomics/Scripts/JPlace.distances.rb +88 -0
- data/utils/enveomics/Scripts/JPlace.to_iToL.rb +306 -0
- data/utils/enveomics/Scripts/M5nr.getSequences.rb +81 -0
- data/utils/enveomics/Scripts/MeTaxa.distribution.pl +198 -0
- data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +35 -0
- data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +49 -0
- data/utils/enveomics/Scripts/NCBIacc2tax.rb +92 -0
- data/utils/enveomics/Scripts/Newick.autoprune.R +27 -0
- data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +228 -0
- data/utils/enveomics/Scripts/RefSeq.download.bash +48 -0
- data/utils/enveomics/Scripts/SRA.download.bash +50 -0
- data/utils/enveomics/Scripts/TRIBS.plot-test.R +36 -0
- data/utils/enveomics/Scripts/TRIBS.test.R +39 -0
- data/utils/enveomics/Scripts/Table.barplot.R +30 -0
- data/utils/enveomics/Scripts/Table.df2dist.R +30 -0
- data/utils/enveomics/Scripts/Table.filter.pl +61 -0
- data/utils/enveomics/Scripts/Table.merge.pl +77 -0
- data/utils/enveomics/Scripts/Table.replace.rb +69 -0
- data/utils/enveomics/Scripts/Table.round.rb +63 -0
- data/utils/enveomics/Scripts/Table.split.pl +57 -0
- data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +227 -0
- data/utils/enveomics/Scripts/VCF.KaKs.rb +147 -0
- data/utils/enveomics/Scripts/VCF.SNPs.rb +88 -0
- data/utils/enveomics/Scripts/aai.rb +373 -0
- data/utils/enveomics/Scripts/ani.rb +362 -0
- data/utils/enveomics/Scripts/gi2tax.rb +103 -0
- data/utils/enveomics/Scripts/in_silico_GA_GI.pl +96 -0
- data/utils/enveomics/Scripts/lib/data/essential.hmm.gz +0 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +26 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +253 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +182 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +74 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +237 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/stat.rb +30 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +135 -0
- data/utils/enveomics/Scripts/ogs.annotate.rb +88 -0
- data/utils/enveomics/Scripts/ogs.core-pan.rb +160 -0
- data/utils/enveomics/Scripts/ogs.extract.rb +125 -0
- data/utils/enveomics/Scripts/ogs.mcl.rb +186 -0
- data/utils/enveomics/Scripts/ogs.rb +104 -0
- data/utils/enveomics/Scripts/ogs.stats.rb +131 -0
- data/utils/enveomics/Scripts/rbm.rb +137 -0
- data/utils/enveomics/Tests/Makefile +10 -0
- data/utils/enveomics/Tests/Mgen_M2288.faa +3189 -0
- data/utils/enveomics/Tests/Mgen_M2288.fna +8282 -0
- data/utils/enveomics/Tests/Mgen_M2321.fna +8288 -0
- data/utils/enveomics/Tests/Nequ_Kin4M.faa +2970 -0
- data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
- data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +7 -0
- data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +17 -0
- data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +137 -0
- data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +123 -0
- data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +200 -0
- data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +55 -0
- data/utils/enveomics/Tests/alkB.nwk +1 -0
- data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +13 -0
- data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +17 -0
- data/utils/enveomics/Tests/hiv1.faa +59 -0
- data/utils/enveomics/Tests/hiv1.fna +134 -0
- data/utils/enveomics/Tests/hiv2.faa +70 -0
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +233 -0
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +1 -0
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +233 -0
- data/utils/enveomics/Tests/phyla_counts.tsv +10 -0
- data/utils/enveomics/Tests/primate_lentivirus.ogs +11 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +9 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +8 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +6 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +9 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +6 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +6 -0
- data/utils/enveomics/build_enveomics_r.bash +44 -0
- data/utils/enveomics/enveomics.R/DESCRIPTION +31 -0
- data/utils/enveomics/enveomics.R/NAMESPACE +35 -0
- data/utils/enveomics/enveomics.R/R/autoprune.R +121 -0
- data/utils/enveomics/enveomics.R/R/barplot.R +165 -0
- data/utils/enveomics/enveomics.R/R/cliopts.R +119 -0
- data/utils/enveomics/enveomics.R/R/df2dist.R +117 -0
- data/utils/enveomics/enveomics.R/R/growthcurve.R +263 -0
- data/utils/enveomics/enveomics.R/R/recplot.R +320 -0
- data/utils/enveomics/enveomics.R/R/recplot2.R +745 -0
- data/utils/enveomics/enveomics.R/R/tribs.R +423 -0
- data/utils/enveomics/enveomics.R/R/utils.R +16 -0
- data/utils/enveomics/enveomics.R/README.md +52 -0
- data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
- data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
- data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +30 -0
- data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +43 -0
- data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +37 -0
- data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +24 -0
- data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +24 -0
- data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +33 -0
- data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +64 -0
- data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +37 -0
- data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +18 -0
- data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +26 -0
- data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +25 -0
- data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +26 -0
- data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +49 -0
- data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +28 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +97 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +40 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +40 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +24 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__findPeak.Rd +40 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__findPeaks.Rd +18 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +22 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +20 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +18 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +18 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +27 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +53 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +44 -0
- data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +44 -0
- data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +21 -0
- data/utils/enveomics/enveomics.R/man/enveomics.R-package.Rd +15 -0
- data/utils/enveomics/enveomics.R/man/growth.curves.Rd +14 -0
- data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +13 -0
- data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +43 -0
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +29 -0
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +30 -0
- data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +71 -0
- data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +18 -0
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +18 -0
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +18 -0
- data/utils/enveomics/enveomics.R/man/z$-methods.Rd +27 -0
- data/utils/enveomics/globals.mk +8 -0
- data/utils/enveomics/manifest.json +9 -0
- data/utils/index_metadata.rb +0 -0
- data/utils/plot-taxdist.R +0 -0
- data/utils/requirements.txt +19 -19
- metadata +242 -2
@@ -0,0 +1,96 @@
|
|
1
|
+
# usage perl in_silico_GA.pl [options]
|
2
|
+
|
3
|
+
use Getopt::Long;
|
4
|
+
use Math::Random qw(:all);
|
5
|
+
|
6
|
+
$argu=GetOptions('in=s'=>\$infile, # input fasta chr file
|
7
|
+
'out=s'=>\$outfile, # output file name
|
8
|
+
'coverage=s'=>\$cov, # desired output
|
9
|
+
'seq_error=s'=>\$seq_error, # sequencing error
|
10
|
+
'read_len=s'=>\$read_len, # simulated read length
|
11
|
+
'ins_len=s'=>\$ins_len, # insertion length
|
12
|
+
'ins_var=s'=>\$ins_var);
|
13
|
+
|
14
|
+
$chr='';
|
15
|
+
open(IN,$infile);
|
16
|
+
open(OUT,">$outfile");
|
17
|
+
%code=();
|
18
|
+
$code{'0'}='C';
|
19
|
+
$code{'1'}='A';
|
20
|
+
$code{'2'}='T';
|
21
|
+
$code{'3'}='G';
|
22
|
+
|
23
|
+
while(<IN>){
|
24
|
+
chomp;
|
25
|
+
if(!/^\>/){
|
26
|
+
$chr.=$_;
|
27
|
+
}
|
28
|
+
else{
|
29
|
+
$gi=$_;
|
30
|
+
if($gi= ~/^\>gi\|(\S+)\|\S+\|\S+/){
|
31
|
+
$gi=$1;}
|
32
|
+
|
33
|
+
}
|
34
|
+
}
|
35
|
+
close(IN);
|
36
|
+
|
37
|
+
$chr_size=length $chr;
|
38
|
+
print "chromosome size: $chr_size\n";
|
39
|
+
$seg_size=2*$read_len+$ins_len;
|
40
|
+
$reads_number=int($cov*$chr_size/($read_len*2));
|
41
|
+
print "generated reads $reads_number x 2\n";
|
42
|
+
|
43
|
+
for(1..$reads_number){
|
44
|
+
$index=$_;
|
45
|
+
$l=length $index;
|
46
|
+
$k=8-$l;
|
47
|
+
$kk='0' x $k;
|
48
|
+
$id= 'read'.$kk.$index.'_'.$gi;
|
49
|
+
|
50
|
+
#make start site;
|
51
|
+
$start_site=int(rand($chr_size));
|
52
|
+
#make short seg length;
|
53
|
+
$seg_length=int(random_normal(1,$seg_size,$ins_var));
|
54
|
+
|
55
|
+
#extract the segment
|
56
|
+
$seg=substr($chr,$start_site,$seg_length);
|
57
|
+
$s_len=length $seg;
|
58
|
+
$gap=$seg_length-$s_len;
|
59
|
+
if($gap!=0){
|
60
|
+
$makeup=substr($chr,0,$gap);
|
61
|
+
$seg.=$makeup;
|
62
|
+
}
|
63
|
+
|
64
|
+
$id.='.start'.$start_site.'.seg_len'.$seg_length;
|
65
|
+
|
66
|
+
#get the reads
|
67
|
+
$seq1=substr($seg,0,$read_len);
|
68
|
+
#$seg=~tr/ATCG/TAGC/ this line can change the orientation of the second read;
|
69
|
+
$seq2=substr($seg,-$read_len);
|
70
|
+
# sequencing error introducing
|
71
|
+
@seq1=split(//,$seq1);
|
72
|
+
@seq2=split(//,$seq2);
|
73
|
+
@mut1=random_binomial($read_len,1,$seq_error);
|
74
|
+
@mut2=random_binomial($read_len,1,$seq_error);
|
75
|
+
|
76
|
+
for(0..$#mut1){
|
77
|
+
$i=$_;
|
78
|
+
if($mut1[$i]==1){
|
79
|
+
$r=int(rand(4));
|
80
|
+
$seq1[$i]=$code{$r};
|
81
|
+
}
|
82
|
+
if($mut2[$i]==1){
|
83
|
+
$r=int(rand(4));
|
84
|
+
$seq2[$i]=$code{$r};
|
85
|
+
}
|
86
|
+
}
|
87
|
+
$seq1=join('',@seq1);
|
88
|
+
$seq2=join('',@seq2);
|
89
|
+
|
90
|
+
$id1=$id.'#0/1';
|
91
|
+
$id2=$id.'#0/2';
|
92
|
+
|
93
|
+
print OUT ">$id1\n$seq1\n>$id2\n$seq2\n";
|
94
|
+
}
|
95
|
+
|
96
|
+
|
Binary file
|
@@ -0,0 +1,26 @@
|
|
1
|
+
|
2
|
+
#
|
3
|
+
# @author: Luis M. Rodriguez-R
|
4
|
+
# @update: Oct-21-2015
|
5
|
+
# @license: artistic license 2.0
|
6
|
+
#
|
7
|
+
|
8
|
+
require "optparse"
|
9
|
+
ARGV << "-h" if ARGV.size==0
|
10
|
+
|
11
|
+
def use(gems, mandatory=true)
|
12
|
+
gems = [gems] unless gems.is_a? Array
|
13
|
+
begin
|
14
|
+
require "rubygems"
|
15
|
+
while ! gems.empty?
|
16
|
+
require gems.first
|
17
|
+
gems.shift
|
18
|
+
end
|
19
|
+
return true
|
20
|
+
rescue LoadError
|
21
|
+
abort "\nUnmet requirements, please install required gems:" +
|
22
|
+
gems.map{ |gem| "\n gem install #{gem}" }.join + "\n\n" if mandatory
|
23
|
+
return false
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
@@ -0,0 +1,253 @@
|
|
1
|
+
|
2
|
+
#
|
3
|
+
# @author: Luis M. Rodriguez-R
|
4
|
+
# @update: Jul-14-2015
|
5
|
+
# @license: artistic license 2.0
|
6
|
+
#
|
7
|
+
|
8
|
+
module JPlace
|
9
|
+
##### CLASSES:
|
10
|
+
# Placement.new(placement[, fields]): Initializes a new read placement.
|
11
|
+
# placement: A hash containing the placement.
|
12
|
+
# fields: If passed, sets the field order for all subsequent placements.
|
13
|
+
class Placement
|
14
|
+
attr_writer :flag # This attribute is used by JPlace.distances.rb as a placeholder
|
15
|
+
attr_reader :p, :n, :m, :flag
|
16
|
+
@@fields = nil
|
17
|
+
def self.fields=(fields)
|
18
|
+
@@fields=fields
|
19
|
+
end
|
20
|
+
def self.fields
|
21
|
+
@@fields
|
22
|
+
end
|
23
|
+
def initialize(placement, fields=nil)
|
24
|
+
@@fields = fields if @@fields.nil? and not fields.nil?
|
25
|
+
# Save only the best (first) placement:
|
26
|
+
abort "Placements must contain a 'p' field.\n" if placement["p"].nil?
|
27
|
+
abort "Placements must contain a 'p' field with at least one entry.\n" if placement["p"][0].nil?
|
28
|
+
@p = [placement["p"][0]]
|
29
|
+
# Find name-only placements (EPA-style):
|
30
|
+
unless placement["n"].nil?
|
31
|
+
@n = placement["n"]
|
32
|
+
@m = @n.map{ |n| 1 }
|
33
|
+
end
|
34
|
+
# Find multiplicity placements (pplacer-style):
|
35
|
+
unless placement["nm"].nil?
|
36
|
+
@n = placement["nm"].map{ |nm| nm[0] }
|
37
|
+
@m = placement["nm"].map{ |nm| nm[1].to_i }
|
38
|
+
end
|
39
|
+
abort "Placements must contain one of 'n' or 'nm' fields.\n" if @n.nil? or @m.nil?
|
40
|
+
end
|
41
|
+
def nm
|
42
|
+
(0 .. (self.n.length-1)).map{ |i| {:n=>self.n[i], :m=>self.m[i]} }
|
43
|
+
end
|
44
|
+
def get_field_value(field)
|
45
|
+
abort "Impossible to read placement with undefined fields." if @@fields.nil?
|
46
|
+
f = @@fields.find_index(field)
|
47
|
+
abort "Undefined field #{field}." if f.nil?
|
48
|
+
self.p[0][f]
|
49
|
+
end
|
50
|
+
def set_field_value(field, value)
|
51
|
+
f = @@fields.find_index(field)
|
52
|
+
abort "Undefined field #{field}." if f.nil?
|
53
|
+
self.p[0][f] = value
|
54
|
+
end
|
55
|
+
def edge_num
|
56
|
+
self.get_field_value('edge_num').to_i
|
57
|
+
end
|
58
|
+
def likelihood
|
59
|
+
self.get_field_value('likelihood').to_f
|
60
|
+
end
|
61
|
+
def like_weight_ratio
|
62
|
+
self.get_field_value('like_weight_ratio').to_f
|
63
|
+
end
|
64
|
+
def distal_length
|
65
|
+
(self.get_field_value('distal_length') || 0).to_f
|
66
|
+
end
|
67
|
+
def pendant_length
|
68
|
+
(self.get_field_value('pendant_length') || 0).to_f
|
69
|
+
end
|
70
|
+
def to_s
|
71
|
+
"#<Placement of #{self.n}: #{self.p}>"
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# Ancilliary class Tree
|
76
|
+
class Tree
|
77
|
+
@@HAS_ICONV = nil
|
78
|
+
def self.has_iconv?
|
79
|
+
if @@HAS_ICONV.nil?
|
80
|
+
@@HAS_ICONV = true
|
81
|
+
begin
|
82
|
+
require 'rubygems'
|
83
|
+
require 'iconv'
|
84
|
+
rescue LoadError
|
85
|
+
@@HAS_ICONV = false
|
86
|
+
end
|
87
|
+
end
|
88
|
+
@@HAS_ICONV
|
89
|
+
end
|
90
|
+
def self.from_nwk(nwk)
|
91
|
+
if Tree.has_iconv?
|
92
|
+
ic = Iconv.new('UTF-8//IGNORE','UTF-8')
|
93
|
+
nwk = ic.iconv(nwk + ' ')[0..-2]
|
94
|
+
end
|
95
|
+
Node.new(nwk)
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
# Node.new(nwk[, parent]): Initializes a new Node.
|
100
|
+
# nwk: Node's description in Newick format.
|
101
|
+
# parent: Node's parent, or nil if root node.
|
102
|
+
class Node
|
103
|
+
# Class
|
104
|
+
@@edges = []
|
105
|
+
def self.edges
|
106
|
+
@@edges
|
107
|
+
end
|
108
|
+
def self.register(node)
|
109
|
+
@@edges[node.index] = node unless node.index.nil?
|
110
|
+
end
|
111
|
+
# Class-level functions related to JPlace
|
112
|
+
def self.link_placement(placement)
|
113
|
+
abort "Trying to link placement in undefined edge #{placement.edge_num}: #{placement.to_s}" if @@edges[placement.edge_num].nil?
|
114
|
+
@@edges[placement.edge_num].add_placement!(placement)
|
115
|
+
end
|
116
|
+
def self.unlink_placement(placement)
|
117
|
+
@@edges[placement.edge_num].delete_placement!(placement)
|
118
|
+
end
|
119
|
+
# Instance
|
120
|
+
attr_reader :children, :length, :name, :label, :index, :nwk, :parent, :placements, :collapsed
|
121
|
+
def initialize(nwk, parent=nil)
|
122
|
+
abort "Empty newick.\n" if nwk.nil? or nwk==''
|
123
|
+
nwk.gsub! /;(.)/, '--\1'
|
124
|
+
@nwk = nwk
|
125
|
+
@parent = parent
|
126
|
+
@placements = []
|
127
|
+
@collapsed = FALSE
|
128
|
+
# Find index
|
129
|
+
index_m = /^(?<pre>.*){(?<idx>[0-9]+)}(?<post>[^\(\),;]*);?$/.match(nwk)
|
130
|
+
if index_m.nil? and parent.nil? and nwk[nwk.length-1]==';'
|
131
|
+
@index = nil
|
132
|
+
else
|
133
|
+
abort "Unindexed edge found:\n#{@nwk}\n" if index_m.nil?
|
134
|
+
nwk = index_m[:pre]+index_m[:post]
|
135
|
+
@index = index_m[:idx].to_i
|
136
|
+
end
|
137
|
+
# Find name, label, and length
|
138
|
+
meta_m = /^(\((?<cont>.+)\))?(?<name>[^:\(\);]*)(:(?<length>[0-9\.Ee+-]*)(?<label>\[[^\[\]\(\);]+\])?)?;?$/.match(nwk) or
|
139
|
+
abort "Cannot parse node metadata (index #{@index}):\n#{@nwk}\n"
|
140
|
+
nwk = meta_m[:cont]
|
141
|
+
@name = meta_m[:name]
|
142
|
+
@length = meta_m[:length]
|
143
|
+
@label = meta_m[:label]
|
144
|
+
# Find children
|
145
|
+
@children = []
|
146
|
+
nwk ||= ''
|
147
|
+
quote = nil
|
148
|
+
while nwk != ''
|
149
|
+
i = 0
|
150
|
+
j = 0
|
151
|
+
nwk.each_char do |chr|
|
152
|
+
if quote.nil?
|
153
|
+
if chr=='"' or chr=="'"
|
154
|
+
quote = chr
|
155
|
+
else
|
156
|
+
i += 1 if chr=='('
|
157
|
+
i -= 1 if chr==')'
|
158
|
+
if i==0 and chr==','
|
159
|
+
i=nil
|
160
|
+
break
|
161
|
+
end
|
162
|
+
end
|
163
|
+
else
|
164
|
+
quote = nil if chr==quote
|
165
|
+
end
|
166
|
+
j += 1
|
167
|
+
end
|
168
|
+
abort "Unbalanced node at edge {#{@index}}, with leftness #{i}:\n#{@nwk}\n" unless i.nil? or i==0
|
169
|
+
@children << Node.new(nwk[0 .. j-1],self)
|
170
|
+
nwk = nwk.length==j ? '' : nwk[j+1 .. -1]
|
171
|
+
end
|
172
|
+
Node.register(self)
|
173
|
+
end
|
174
|
+
# Accessors/Setters
|
175
|
+
def name=(new_name)
|
176
|
+
@name = new_name.gsub(/[\s\(\),;:]/, '_')
|
177
|
+
end
|
178
|
+
# Tree algorithms
|
179
|
+
def post_order &blk
|
180
|
+
self.children.each { |n| n.post_order &blk }
|
181
|
+
blk[self]
|
182
|
+
end
|
183
|
+
def in_order &blk
|
184
|
+
abort "Tree must be dycotomic to traverse in_order, node #{self.cannonical_name} "+
|
185
|
+
"has #{self.children.lenght} children." unless [0,2].include? self.children.length
|
186
|
+
self.children[0].in_order &blk unless self.children[0].nil?
|
187
|
+
blk[self]
|
188
|
+
self.children[1].in_order &blk unless self.children[1].nil?
|
189
|
+
end
|
190
|
+
def pre_order &blk
|
191
|
+
blk[self]
|
192
|
+
self.children.each { |n| n.pre_order &blk }
|
193
|
+
end
|
194
|
+
def path_to_root
|
195
|
+
if @path_to_root.nil?
|
196
|
+
@path_to_root = [self]
|
197
|
+
@path_to_root += self.parent.path_to_root unless self.parent.nil?
|
198
|
+
end
|
199
|
+
@path_to_root
|
200
|
+
end
|
201
|
+
def distance_to_root
|
202
|
+
if @distance_to_root.nil?
|
203
|
+
@distance_to_root = path_to_root.map{ |n| n.length.nil? ? 0.0 : n.length.to_f }.reduce(0.0, :+)
|
204
|
+
end
|
205
|
+
@distance_to_root
|
206
|
+
end
|
207
|
+
def lca(node)
|
208
|
+
p1 = self.path_to_root
|
209
|
+
p2 = node.path_to_root
|
210
|
+
p1.find{ |n| p2.include? n }
|
211
|
+
end
|
212
|
+
def distance(node)
|
213
|
+
self.distance_to_root + node.distance_to_root - (2.0 * self.lca(node).distance_to_root)
|
214
|
+
end
|
215
|
+
def ==(node) self.index == node.index ; end
|
216
|
+
# Tree representation
|
217
|
+
def cannonical_name
|
218
|
+
return(self.name) unless self.name.nil? or self.name == ""
|
219
|
+
return(self.label) unless self.label.nil? or self.label == ""
|
220
|
+
return("{#{self.index.to_s}}") unless self.index.nil?
|
221
|
+
""
|
222
|
+
end
|
223
|
+
def to_s
|
224
|
+
o = ""
|
225
|
+
o += "(" + self.children.map{ |c| c.to_s }.join(",") + ")" if self.children.length > 0
|
226
|
+
o += self.cannonical_name
|
227
|
+
u = "#{self.length.nil? ? "" : self.length}#{self.label.nil? ? "" : self.label}"
|
228
|
+
o += ":#{u}" unless u==""
|
229
|
+
o
|
230
|
+
end
|
231
|
+
# Instance-level functions related to JPlace
|
232
|
+
def collapse!
|
233
|
+
self.pre_order do |n|
|
234
|
+
if n!=self
|
235
|
+
while n.placements.length > 0
|
236
|
+
p = Node.unlink_placement(n.placements[0])
|
237
|
+
p.set_field_value('edge_num', self.index)
|
238
|
+
Node.link_placement(p)
|
239
|
+
end
|
240
|
+
end
|
241
|
+
end
|
242
|
+
@collapsed = TRUE
|
243
|
+
end
|
244
|
+
def add_placement!(placement)
|
245
|
+
@placements << placement
|
246
|
+
end
|
247
|
+
def delete_placement!(placement)
|
248
|
+
@placements.delete(placement)
|
249
|
+
end
|
250
|
+
end
|
251
|
+
|
252
|
+
end # module JPlace
|
253
|
+
|
@@ -0,0 +1,182 @@
|
|
1
|
+
|
2
|
+
##### CLASSES:
|
3
|
+
# Gene.new(genome, id): Initializes a new Gene.
|
4
|
+
# genome: A string uniquely identifying the parent genome.
|
5
|
+
# id: A string uniquely identifying the gene within the genome. It can be
|
6
|
+
# non-unique across genomes.
|
7
|
+
class Gene
|
8
|
+
attr_reader :genome_id, :id
|
9
|
+
@@genomes = []
|
10
|
+
def self.genomes
|
11
|
+
@@genomes
|
12
|
+
end
|
13
|
+
def initialize(genome, id)
|
14
|
+
if genome.is_a? Integer
|
15
|
+
abort "Internal error: Genome #{genome} does not exist yet." if
|
16
|
+
@@genomes[genome].nil?
|
17
|
+
@genome_id = genome
|
18
|
+
else
|
19
|
+
@@genomes << genome unless @@genomes.include? genome
|
20
|
+
@genome_id = @@genomes.index(genome)
|
21
|
+
end
|
22
|
+
@id = id
|
23
|
+
end
|
24
|
+
# Compare if two Gene objects refer to the same gene.
|
25
|
+
def ==(b)
|
26
|
+
self.genome_id==b.genome_id and self.id==b.id
|
27
|
+
end
|
28
|
+
# Get all genomes in the run as an array of strings.
|
29
|
+
def genome
|
30
|
+
@@genomes[self.genome_id]
|
31
|
+
end
|
32
|
+
def to_s
|
33
|
+
"#{self.genome}:#{self.id}"
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
# OG.new(): Initializes an empty OG.
|
38
|
+
# OG.new(genomes, genes): Initializes a pre-computed OG.
|
39
|
+
# genomes: List of genomes as an array of strings (as in Gene.genomes).
|
40
|
+
# genes: List of genes as an array of strings, with '-' indicating no genes and
|
41
|
+
# multiple genes separated by ','.
|
42
|
+
class OG
|
43
|
+
attr_reader :genes, :notes
|
44
|
+
def initialize(genomes=nil, genes=nil)
|
45
|
+
@genes = []
|
46
|
+
@notes = []
|
47
|
+
unless genomes.nil? or genes.nil?
|
48
|
+
(0 .. genes.length-1).each do |genome_i|
|
49
|
+
next if genes[genome_i]=="-"
|
50
|
+
genes[genome_i].split(/,/).each do |gene_id|
|
51
|
+
self << Gene.new(genomes[genome_i], gene_id)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
# Add genes or combine another OG into the loaded OG (self).
|
57
|
+
def <<(obj)
|
58
|
+
if obj.is_a? Gene
|
59
|
+
@genes[obj.genome_id] = [] if @genes[obj.genome_id].nil?
|
60
|
+
@genes[obj.genome_id] << obj.id unless self.include? obj
|
61
|
+
elsif obj.is_a? OG
|
62
|
+
obj.genes_obj.each{ |gene| self << gene }
|
63
|
+
else
|
64
|
+
abort "Unsupported class for #{obj}"
|
65
|
+
end
|
66
|
+
end
|
67
|
+
# Get the list of genes as objects (internally saved as strings to save RAM).
|
68
|
+
def genes_obj
|
69
|
+
o = []
|
70
|
+
(0 .. Gene.genomes.length-1).map do |genome_id|
|
71
|
+
o += self.genes[genome_id].map{ |gene_id|
|
72
|
+
Gene.new(Gene.genomes[genome_id], gene_id) } unless
|
73
|
+
self.genes[genome_id].nil?
|
74
|
+
end
|
75
|
+
return o
|
76
|
+
end
|
77
|
+
# Evaluates if the OG contains the passed gene.
|
78
|
+
def include?(gene)
|
79
|
+
return false if self.genes[gene.genome_id].nil?
|
80
|
+
self.genes[gene.genome_id].include? gene.id
|
81
|
+
end
|
82
|
+
# Get the list of genomes containing genes in this OG.
|
83
|
+
def genomes
|
84
|
+
(0 .. Gene.genomes.length-1).select do |gno|
|
85
|
+
not(self.genes[gno].nil? or self.genes[gno].empty?)
|
86
|
+
end
|
87
|
+
end
|
88
|
+
# Adds a note that will be printed after the last column
|
89
|
+
def add_note note, note_idx=nil
|
90
|
+
if note_idx.nil?
|
91
|
+
@notes << note
|
92
|
+
else
|
93
|
+
@notes[note_idx] = (@notes[note_idx].nil? ? '' :
|
94
|
+
(@notes[note_idx]+' || ')) + note
|
95
|
+
end
|
96
|
+
end
|
97
|
+
def to_s
|
98
|
+
(0 .. Gene.genomes.length-1).map do |genome_id|
|
99
|
+
self.genes[genome_id].nil? ? "-" : self.genes[genome_id].join(",")
|
100
|
+
end.join("\t") + ((self.notes.size==0) ? '' :
|
101
|
+
("\t#\t"+self.notes.join("\t")))
|
102
|
+
end
|
103
|
+
def to_bool_a
|
104
|
+
(0 .. Gene.genomes.length-1).map { |genome_id| not genes[genome_id].nil? }
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
# OGCollection.new(): Initializes an empty collection of OGs.
|
109
|
+
class OGCollection
|
110
|
+
attr_reader :ogs, :note_srcs
|
111
|
+
def initialize
|
112
|
+
@ogs = []
|
113
|
+
@note_srcs = []
|
114
|
+
end
|
115
|
+
# Add an OG to the collection
|
116
|
+
def <<(og)
|
117
|
+
@ogs << og
|
118
|
+
end
|
119
|
+
# Compare OGs all-vs-all to identify groups that should be merged.
|
120
|
+
def consolidate!
|
121
|
+
old_ogs = self.ogs
|
122
|
+
@ogs = []
|
123
|
+
old_ogs.each do |og|
|
124
|
+
is_new = true
|
125
|
+
og.genes_obj.each do |gene|
|
126
|
+
o = self.get_og gene
|
127
|
+
unless o.nil?
|
128
|
+
o << og
|
129
|
+
is_new = false
|
130
|
+
break
|
131
|
+
end
|
132
|
+
end
|
133
|
+
self << og if is_new
|
134
|
+
end
|
135
|
+
end
|
136
|
+
# Removes OGs present in less than 'fraction' of the genomes
|
137
|
+
def filter_core!(fraction=1.0)
|
138
|
+
min_genomes = (fraction * Gene.genomes.size).ceil
|
139
|
+
@ogs.select! { |og| og.genomes.size >= min_genomes }
|
140
|
+
end
|
141
|
+
# Removes OGs present more than 'dups' number of times in any genome
|
142
|
+
def remove_inparalogs!(dups=1)
|
143
|
+
@ogs.select! do |og|
|
144
|
+
og.genes.map{ |pergenome| pergenome.size }.max <= dups
|
145
|
+
end
|
146
|
+
end
|
147
|
+
# Add a pair of RBM genes into the corresponding OG, or create a new OG.
|
148
|
+
def add_rbm(a, b)
|
149
|
+
og = self.get_og(a)
|
150
|
+
og = self.get_og(b) if og.nil?
|
151
|
+
if og.nil?
|
152
|
+
og = OG.new
|
153
|
+
@ogs << og
|
154
|
+
end
|
155
|
+
og << a
|
156
|
+
og << b
|
157
|
+
end
|
158
|
+
# Get the OG containing the gene (returns the first, if multiple).
|
159
|
+
def get_og(gene)
|
160
|
+
idx = self.ogs.index { |og| og.include? gene }
|
161
|
+
idx.nil? ? nil : self.ogs[idx]
|
162
|
+
end
|
163
|
+
# Get the genes from a given genome (returns an array of arrays)
|
164
|
+
def get_genome_genes(genome)
|
165
|
+
genome_id = Gene.genomes.index(genome)
|
166
|
+
self.ogs.map do |og|
|
167
|
+
g = og.genes[genome_id]
|
168
|
+
g.nil? ? [] : g
|
169
|
+
end
|
170
|
+
end
|
171
|
+
# Add annotation sources
|
172
|
+
def add_note_src src
|
173
|
+
@note_srcs << src
|
174
|
+
end
|
175
|
+
def to_s
|
176
|
+
Gene.genomes.join("\t") + ((self.note_srcs.length>0) ?
|
177
|
+
("\t#\t"+self.note_srcs.join("\t")) : '') +
|
178
|
+
"\n" + self.ogs.map{ |og| og.to_s }.join("\n")
|
179
|
+
end
|
180
|
+
def to_bool_a ; ogs.map{ |og| og.to_bool_a } ; end
|
181
|
+
end
|
182
|
+
|