partis-bcr 1.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bin/FastTree +0 -0
- bin/add-chimeras.py +59 -0
- bin/add-seqs-to-outputs.py +81 -0
- bin/bcr-phylo-run.py +799 -0
- bin/build.sh +24 -0
- bin/cf-alleles.py +97 -0
- bin/cf-germlines.py +57 -0
- bin/cf-linearham.py +199 -0
- bin/chimera-plot.py +76 -0
- bin/choose-partially-paired.py +143 -0
- bin/circle-plots.py +30 -0
- bin/compare-plotdirs.py +298 -0
- bin/diff-parameters.py +133 -0
- bin/docker-hub-push.sh +6 -0
- bin/extract-pairing-info.py +55 -0
- bin/gcdyn-simu-run.py +223 -0
- bin/gctree-run.py +244 -0
- bin/get-naive-probabilities.py +126 -0
- bin/iqtree-1.6.12 +0 -0
- bin/lonr.r +1020 -0
- bin/makeHtml +52 -0
- bin/mds-run.py +46 -0
- bin/parse-output.py +277 -0
- bin/partis +1869 -0
- bin/partis-pip +116 -0
- bin/partis.py +1869 -0
- bin/plot-gl-set-trees.py +519 -0
- bin/plot-hmms.py +151 -0
- bin/plot-lb-tree.py +427 -0
- bin/raxml-ng +0 -0
- bin/read-bcr-phylo-trees.py +38 -0
- bin/read-gctree-output.py +166 -0
- bin/run-chimeras.sh +64 -0
- bin/run-dtr-scan.sh +25 -0
- bin/run-paired-loci.sh +100 -0
- bin/run-tree-metrics.sh +88 -0
- bin/smetric-run.py +62 -0
- bin/split-loci.py +317 -0
- bin/swarm-2.1.13-linux-x86_64 +0 -0
- bin/test-germline-inference.py +425 -0
- bin/tree-perf-run.py +194 -0
- bin/vsearch-2.4.3-linux-x86_64 +0 -0
- bin/vsearch-2.4.3-macos-x86_64 +0 -0
- bin/xvfb-run +194 -0
- partis_bcr-1.0.1.data/scripts/cf-alleles.py +97 -0
- partis_bcr-1.0.1.data/scripts/cf-germlines.py +57 -0
- partis_bcr-1.0.1.data/scripts/extract-pairing-info.py +55 -0
- partis_bcr-1.0.1.data/scripts/gctree-run.py +244 -0
- partis_bcr-1.0.1.data/scripts/parse-output.py +277 -0
- partis_bcr-1.0.1.data/scripts/split-loci.py +317 -0
- partis_bcr-1.0.1.data/scripts/test.py +1005 -0
- {partis_bcr-1.0.0.dist-info → partis_bcr-1.0.1.dist-info}/METADATA +1 -1
- {partis_bcr-1.0.0.dist-info → partis_bcr-1.0.1.dist-info}/RECORD +101 -50
- partis_bcr-1.0.1.dist-info/top_level.txt +1 -0
- {partis → python}/glutils.py +1 -1
- python/main.py +30 -0
- {partis → python}/plotting.py +10 -1
- {partis → python}/treeutils.py +18 -16
- {partis → python}/utils.py +14 -7
- partis/main.py +0 -59
- partis_bcr-1.0.0.dist-info/top_level.txt +0 -1
- {partis_bcr-1.0.0.dist-info → partis_bcr-1.0.1.dist-info}/WHEEL +0 -0
- {partis_bcr-1.0.0.dist-info → partis_bcr-1.0.1.dist-info}/entry_points.txt +0 -0
- {partis_bcr-1.0.0.dist-info → partis_bcr-1.0.1.dist-info}/licenses/COPYING +0 -0
- {partis → python}/__init__.py +0 -0
- {partis → python}/alleleclusterer.py +0 -0
- {partis → python}/allelefinder.py +0 -0
- {partis → python}/alleleremover.py +0 -0
- {partis → python}/annotationclustering.py +0 -0
- {partis → python}/baseutils.py +0 -0
- {partis → python}/cache/__init__.py +0 -0
- {partis → python}/cache/cached_uncertainties.py +0 -0
- {partis → python}/clusterpath.py +0 -0
- {partis → python}/coar.py +0 -0
- {partis → python}/corrcounter.py +0 -0
- {partis → python}/datautils.py +0 -0
- {partis → python}/event.py +0 -0
- {partis → python}/fraction_uncertainty.py +0 -0
- {partis → python}/gex.py +0 -0
- {partis → python}/glomerator.py +0 -0
- {partis → python}/hist.py +0 -0
- {partis → python}/hmmwriter.py +0 -0
- {partis → python}/hutils.py +0 -0
- {partis → python}/indelutils.py +0 -0
- {partis → python}/lbplotting.py +0 -0
- {partis → python}/mds.py +0 -0
- {partis → python}/mutefreqer.py +0 -0
- {partis → python}/paircluster.py +0 -0
- {partis → python}/parametercounter.py +0 -0
- {partis → python}/paramutils.py +0 -0
- {partis → python}/partitiondriver.py +0 -0
- {partis → python}/partitionplotter.py +0 -0
- {partis → python}/performanceplotter.py +0 -0
- {partis → python}/plotconfig.py +0 -0
- {partis → python}/processargs.py +0 -0
- {partis → python}/prutils.py +0 -0
- {partis → python}/recombinator.py +0 -0
- {partis → python}/scanplot.py +0 -0
- {partis → python}/seqfileopener.py +0 -0
- {partis → python}/treegenerator.py +0 -0
- {partis → python}/viterbicluster.py +0 -0
- {partis → python}/vrc01.py +0 -0
- {partis → python}/waterer.py +0 -0
bin/run-chimeras.sh
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
# ----------------------------------------------------------------------------------------
|
4
|
+
# dual barcode data from chaim/sai(?)
|
5
|
+
label=chimera-dual-barcode
|
6
|
+
datadir=/fh/fast/matsen_e/data/$label
|
7
|
+
|
8
|
+
outdir=$fs/partis/$label/fixed-d-synthesis
|
9
|
+
for fn in good chimeras; do
|
10
|
+
mkdir -p $outdir/$fn
|
11
|
+
# ./bin/partis cache-parameters --only-sm --infname $datadir/$fn.fa --parameter-dir $outdir/$fn/parameters --plotdir $outdir/$fn/plots --sw-cachefname $outdir/$fn/sw-cache.yaml --n-procs 5 --only-overall-plots --only-csv-plots >$outdir/$fn/log
|
12
|
+
./bin/chimera-plot.py $outdir/$fn/sw-cache.yaml $outdir/$fn/chimera-plots --title $fn --chunk-len 75
|
13
|
+
|
14
|
+
# ./bin/partis simulate --outfname $outdir/$fn/simu.yaml --parameter-dir $outdir/$fn/parameters --parameter-type sw --n-sim-events 1000 --n-leaves 1 --constant-number-of-leaves
|
15
|
+
# ./bin/partis cache-parameters --is-simu --only-sm --infname $outdir/$fn/simu.yaml --parameter-dir $outdir/$fn/simu-parameters --plotdir $outdir/$fn/simu-plots --sw-cachefname $outdir/$fn/simu-parameters/sw-cache.yaml --n-procs 5 --only-overall-plots --only-csv-plots >$outdir/$fn/simu-log
|
16
|
+
# ./bin/add-chimeras.py $outdir/$fn/simu.yaml $outdir/$fn/simu-all-chimeras.fa --min-chunk-len 1
|
17
|
+
# ./bin/partis cache-parameters --only-sm --infname $outdir/$fn/simu-all-chimeras.fa --parameter-dir $outdir/$fn/simu-all-chimeras-parameters --plotdir $outdir/$fn/simu-all-chimeras-plots --sw-cachefname $outdir/$fn/simu-all-chimeras-parameters/sw-cache.yaml --n-procs 5 --only-overall-plots --only-csv-plots >$outdir/$fn/simu-all-chimeras-log
|
18
|
+
# ./bin/chimera-plot.py $outdir/$fn/simu-parameters/sw-cache.yaml $outdir/$fn/chimera-plots-simu
|
19
|
+
# ./bin/chimera-plot.py $outdir/$fn/simu-all-chimeras-parameters/sw-cache.yaml $outdir/$fn/chimera-plots-simu-all-chimeras
|
20
|
+
# ./bin/compare-plotdirs.py --outdir $outdir/comparison-plots/simu-all-chimeras-vs-none/$fn --log y --translegend 0:-0.2 --names no@chimeras:all@chimeras --plotdirs $outdir/$fn/chimera-plots-simu:$outdir/$fn/chimera-plots-simu-all-chimeras
|
21
|
+
done
|
22
|
+
./bin/compare-plotdirs.py --outdir $outdir/comparison-plots-good-vs-chimeras --log y --translegend 0:-0.2 --names none:all@chimeras --plotdirs $outdir/good/chimera-plots:$outdir/chimeras/chimera-plots
|
23
|
+
# ./bin/compare-plotdirs.py --outdir $outdir/comparison-plots-no-indels --log y --translegend 0:-0.2 --names good:chimeras --plotdirs $outdir/good/chimera-plots:$outdir/chimeras/chimera-plots
|
24
|
+
# ./bin/compare-plotdirs.py --outdir $outdir/comparison-plots XXX --log y --translegend 0:-0.2 --names good:chimeras --plotdirs $outdir/good/chimera-plots:$outdir/chimeras/chimera-plots
|
25
|
+
|
26
|
+
# subd=sw/mute-freqs/overall
|
27
|
+
# ./bin/compare-plotdirs.py --outdir $outdir/comparison-plots/data-vs-simu/$subd --names good:good-simu:chimeras:chimeras-simu --plotdirs $outdir/good/plots/$subd:$outdir/good/simu-plots/$subd:$outdir/chimeras/plots/$subd:$outdir/chimeras/simu-plots/$subd
|
28
|
+
|
29
|
+
exit 0
|
30
|
+
|
31
|
+
# ----------------------------------------------------------------------------------------
|
32
|
+
# initial simulation study
|
33
|
+
pd=/fh/fast/matsen_e/dralph/partis/chimera-testing
|
34
|
+
|
35
|
+
simfn=test/reference-results/test/simu.csv
|
36
|
+
chfn=simu-all-chimeras
|
37
|
+
|
38
|
+
label=typical-simu
|
39
|
+
# ./bin/partis cache-parameters --only-sm --infname $simfn --parameter-dir $pd/parameters/$label --sw-cachefname $pd/parameters/$label/sw-cache.csv --n-procs 8
|
40
|
+
# ./bin/chimera-plot.py $pd/parameters/$label/sw-cache.csv $pd/plots/$label
|
41
|
+
|
42
|
+
# ./bin/add-chimeras.py $simfn $chfn.fa
|
43
|
+
# ./bin/partis cache-parameters --only-sm --infname $chfn.fa --parameter-dir $pd/parameters/$chfn --sw-cachefname $pd/parameters/$chfn/sw-cache.csv --n-procs 8
|
44
|
+
# ./bin/chimera-plot.py $pd/parameters/$chfn/sw-cache.csv $pd/plots/$chfn
|
45
|
+
|
46
|
+
# ./bin/compare-plotdirs.py --outdir $pd/plots/comparisons/simu --names typical:chimeras --plotdirs $pd/plots/$label:$pd/plots/$chfn
|
47
|
+
# ./bin/compare-plotdirs.py --outdir $pd/plots/comparisons/simu-log --log y --translegend 0:0.15 --names typical:chimeras --plotdirs $pd/plots/$label:$pd/plots/$chfn
|
48
|
+
|
49
|
+
# data
|
50
|
+
study=jason-mg
|
51
|
+
samples="MK02-igh MK03-igh MK08-igh AR03-igh AR04-igh HD10-igh HD13-igh"
|
52
|
+
# for sample in $samples; do
|
53
|
+
# echo $sample
|
54
|
+
# ./bin/chimera-plot.py /fh/fast/matsen_e/processed-data/partis/$study/gls-gen-paper-v10/$sample/sw-cache.csv $pd/plots/$study/$sample
|
55
|
+
# done
|
56
|
+
./bin/compare-plotdirs.py --outdir $pd/plots/comparisons/$study-log --log y --translegend 0:-0.2 --names `echo $samples|sed 's/ /:/g'` --plotdirs $pd/plots/$study
|
57
|
+
|
58
|
+
study=jason-influenza
|
59
|
+
samples="FV-igh IB-igh GMC-igh"
|
60
|
+
# for sample in $samples; do
|
61
|
+
# echo $sample
|
62
|
+
# ./bin/chimera-plot.py /fh/fast/matsen_e/processed-data/partis/$study/gls-gen-paper-v10/$sample/sw-cache.csv $pd/plots/$study/$sample
|
63
|
+
# done
|
64
|
+
./bin/compare-plotdirs.py --outdir $pd/plots/comparisons/$study-log --log y --translegend 0:0 --names `echo $samples|sed 's/ /:/g'` --plotdirs $pd/plots/$study
|
bin/run-dtr-scan.sh
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
# action=test #train # plot
|
4
|
+
# for trv in v0 v1 v2 v3; do
|
5
|
+
# trlabel=dtr-train-$trv
|
6
|
+
# common=" --training-label $trlabel --n-max-queries 150000 --n-max-procs 7" # --cgroup among-families --tvar affinity"# ; trseed=0 --training-seed $trseed
|
7
|
+
# ./test/dtr-scan.py $action --label dtr-train-v3 $common
|
8
|
+
# done
|
9
|
+
# exit 0
|
10
|
+
|
11
|
+
action=plot #test # train
|
12
|
+
for cg in within-families among-families; do
|
13
|
+
for tv in affinity delta-affinity; do
|
14
|
+
lfn=_output/dtr-scan/$cg-$tv.txt
|
15
|
+
echo "" >$lfn
|
16
|
+
for trv in v0 v1 v2 v3; do
|
17
|
+
trlabel=dtr-train-$trv
|
18
|
+
common=" --training-label $trlabel --n-max-queries 150000 --n-max-procs 7 --cgroup $cg --tvar $tv" # ; trseed=0 --training-seed $trseed
|
19
|
+
./test/dtr-scan.py $action --label dtr-train-v0 $common >>$lfn
|
20
|
+
./test/dtr-scan.py $action --label dtr-train-v1 $common >>$lfn
|
21
|
+
./test/dtr-scan.py $action --label dtr-train-v2 $common >>$lfn
|
22
|
+
./test/dtr-scan.py $action --label dtr-train-v3 $common >>$lfn
|
23
|
+
done
|
24
|
+
done
|
25
|
+
done
|
bin/run-paired-loci.sh
ADDED
@@ -0,0 +1,100 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
bin=./test/cf-paired-loci.py
|
4
|
+
|
5
|
+
# methods=synth-distance-0.03:synth-singletons-0.20:vjcdr3-0.8:enclone:mobille:scoper:vsearch-partition:partition # this is for vs-shm; for time-reqd: enclone:mobille:scoper:vsearch-partition:partition NOTE enclone needs fixing tho (for missing uids)
|
6
|
+
# methods=synth-distance-0.00:synth-distance-0.005:synth-distance-0.02:partition # this is for vs-shm; for time-reqd: enclone:mobille:scoper:vsearch-partition:partition NOTE enclone needs fixing tho (for missing uids)
|
7
|
+
methods=igblast:annotate:star-partition:partition:linearham # for test-antn imbal-v3
|
8
|
+
# methods=partition:single-chain-partis; xstr="--combo-extra-str single-vs-joint-partis"
|
9
|
+
# methods=scoper:single-chain-scoper; xstr="--combo-extra-str single-vs-joint-scoper" # NOTE this is only for vs-shm (comparing single vs joint); for time-reqd you only need scoper
|
10
|
+
# methods=simu:cache-parameters:partition:write-fake-paired-annotations:replay-plot:iqtree:raxml:igphyml:gctree:gctree-mut-mult:tree-perf # coar stuff cache-parameters:partition
|
11
|
+
astr="--actions $methods" #partition --merge-paired-partitions" #$methods"
|
12
|
+
# astr="--actions combine-plots --plot-metrics $methods --perf-metrics precision:n-clusters"
|
13
|
+
# astr="--actions combine-plots --plot-metrics $methods $xstr"
|
14
|
+
common="--n-sub-procs 15 --n-max-procs 5 --single-light-locus igk --base-outdir /fh/fast/matsen_e/dralph/partis/paired-loci $astr --dry" # /fh/local/dralph
|
15
|
+
# echo $bin --label vs-shm --version v3 --n-replicates 3 --n-leaves-list 3 --n-sim-events-list 10000 --scratch-mute-freq-list 0.01:0.05:0.10:0.20:0.30 --simu-extra-args=\"--flat-mute-freq --same-mute-freq-for-all-seqs --mutate-stop-codons\" --final-plot-xvar scratch-mute-freq $common # with these simu args, the scratch mute freq is almost identical to the final mean mfreq, so can just use the scratch mute freq on x axis
|
16
|
+
# # echo $bin --label vs-n-leaves --version v0 --n-replicates 3 --n-leaves-list 1:5:10:50 --n-sim-events-list 1000 --simu-extra-args="--constant-number-of-leaves" $common
|
17
|
+
# echo $bin --label vs-n-leaves --version v1 --n-replicates 3 --n-leaves-list 1:2:3:5:7:10:25:50 --n-sim-events-list 50 --antn-perf --perf-metrics naive-hdist --simu-extra-args="--constant-number-of-leaves" $common
|
18
|
+
# # echo $bin --label vs-n-leaves --version v2 --n-replicates 3 --n-leaves-list 1:2:3:5:7:25:50 --n-sim-events-list 50 --antn-perf --perf-metrics naive-hdist --scratch-mute-freq-list 0.15 --simu-extra-args="--constant-number-of-leaves --flat-mute-freq --same-mute-freq-for-all-seqs" $common
|
19
|
+
# echo $bin --label vs-n-sim-events --version v1 --n-replicates 3 --n-leaves-list 1 --n-sim-events-list 100:1000:10000:50000 --scratch-mute-freq-list 0.07:0.15 --simu-extra-args=\"--flat-mute-freq --same-mute-freq-for-all-seqs --constant-number-of-leaves\" --final-plot-xvar n-sim-events --pvks-to-plot 0.15 $common
|
20
|
+
# echo $bin --label time-reqd --version v0 --n-replicates 3 --n-leaves-list 10 --n-sim-events-list 100:1000:10000 --perf-metrics time-reqd --x-legend-var n-seqs $common --n-sub-procs 28 --n-max-procs 1 # NOTE duplicate parallelization args also NOTE there's also a 4th replicate, and 50000 samples for at least 3 replicates NOTE also will have to turn up fail % by hand for enclone
|
21
|
+
# echo $bin --label pairclean --version v0 --n-replicates 3 --n-leaves-list 3:10 --constant-number-of-leaves-list 0:1 --n-sim-events-list 3000 --scratch-mute-freq-list 0.15 --mean-cells-per-droplet-list None:1:2:5:10 --simu-extra-args=\"--flat-mute-freq --same-mute-freq-for-all-seqs\" --final-plot-xvar mean-cells-per-droplet --perf-metrics all-pcfrac:f1:precision:sensitivity --make-hist-plots --use-val-cfgs $common # --fraction-of-reads-to-remove-list 0.05
|
22
|
+
# echo $bin --label pairclean --version v1 --n-replicates 3 --n-leaves-list 1:2:3:10:hist --constant-number-of-leaves-list 1:1:1:1:0 --zip-vars n-leaves:constant-number-of-leaves --n-sim-events-list 3000 --scratch-mute-freq-list 0.15 --mean-cells-per-droplet-list None:1:2:5:10 --simu-extra-args=\"--flat-mute-freq --same-mute-freq-for-all-seqs\" --final-plot-xvar mean-cells-per-droplet --perf-metrics all-pcfrac:f1:precision:sensitivity --make-hist-plots --use-val-cfgs $common # --fraction-of-reads-to-remove-list 0.05
|
23
|
+
# echo $bin --label pairclean --version v2 --n-replicates 3 --n-leaves-list 1:2:3:10:hist --constant-number-of-leaves-list 1:1:1:1:0 --zip-vars n-leaves:constant-number-of-leaves --n-sim-events-list 3000 --scratch-mute-freq-list 0.07 --mean-cells-per-droplet-list 1:2:5:10 --simu-extra-args=\"--flat-mute-freq --same-mute-freq-for-all-seqs --constant-cells-per-droplet\" --final-plot-xvar mean-cells-per-droplet --perf-metrics all-pcfrac:f1:precision:sensitivity --make-hist-plots --use-val-cfgs $common # --fraction-of-reads-to-remove-list 0.05
|
24
|
+
# echo $bin --label pairfix --version v1 --n-replicates 3 --n-leaves-list hist --n-sim-events-list 3000 --scratch-mute-freq-list 0.07 --bulk-data-fraction-list 0:0.5:0.8:0.9:0.95 --simu-extra-args=\"--flat-mute-freq --same-mute-freq-for-all-seqs\" --inference-extra-args=\"--pair-unpaired-seqs-with-paired-family\" --final-plot-xvar bulk-data-fraction --perf-metrics all-pcfrac:f1:precision:sensitivity --make-hist-plots --use-val-cfgs --empty-bin-range 0:200 $common
|
25
|
+
echo $bin --label test-antn --version imbal-v3 --n-replicates 2 --tree-imbalance-list None:0.04:0.07 --scratch-mute-freq-list 0.15 --n-leaves-list 50 --simu-extra-args=\"--flat-mute-freq --same-mute-freq-for-all-seqs\" --n-sim-events-list 50 --antn-perf --perf-metrics naive-hdist $common # NOTE also made :0.13:0.14:0.16
|
26
|
+
exit 0
|
27
|
+
# echo $bin --label bcr-phylo-antn --version v0 --n-replicates 2 --obs-times-list 50:150:300 --n-sim-seqs-per-generation-list 15:45 --context-depend-list 1 --simu-type bcr-phylo --dont-observe-common-ancestors --antn-perf --perf-metrics naive-hdist $common
|
28
|
+
simu_extra="--simu-extra-args=\"--target-distance 10 --context-depend 1 --tdist-weights random-uniform --min-target-distance 2 --n-sim-seqs-per-generation 89 --parameter-variances n-sim-seqs-per-generation,23 --aa-paratope-positions N=60 --aa-struct-positions N=100 --leaf-sampling-scheme high-affinity --n-naive-seq-copies 100\""
|
29
|
+
echo $bin --label gct-valid --version v6 --n-replicates 3 --obs-times-list 15:20:30:40:50 --n-sim-events-list 70 --carry-cap-list 1000 --simu-type bcr-phylo --perf-metrics coar:rf:mrca --calc-antns --inference-extra-args=\"--no-indels --simultaneous-true-clonal-seqs\" --plot-metrics tree-perf --final-plot-xvar obs-times $simu_extra $common # NOTE also have sampling times 10, 100, 150 for most methods
|
30
|
+
# echo $bin --label gct-valid --version gcdyn-v1 --n-replicates 2 --simu-type gcdyn --n-sim-events-list 70 --obs-times-list 15:30 --perf-metrics coar:rf:mrca --calc-antns --inference-extra-args=\"--no-indels --simultaneous-true-clonal-seqs\" --plot-metrics tree-perf $common
|
31
|
+
|
32
|
+
# NOTE have to set --n-sub-procs to 1 for partition step, and re-set --n-sim-events-list for each --n-leaves value (500 leaves: 10 events, 100:50, 50:100):
|
33
|
+
# echo $bin --label key-trans --version v0 --n-replicates 2 --biggest-naive-seq-cluster-to-calculate 5:15:10000 --biggest-logprob-cluster-to-calculate 5:15:10000 --zip-vars biggest-naive-seq-cluster-to-calculate:biggest-logprob-cluster-to-calculate --scratch-mute-freq-list 0.15 --n-leaves-list 50:100:500 --simu-extra-args=\"--flat-mute-freq --same-mute-freq-for-all-seqs --constant-number-of-leaves --only-genes IGHV1-2*01:IGHD2-15*01:IGHJ6*02:IGKV1-12*01:IGKJ1*01:IGKDx-x*x --force-dont-generate-germline-set --allowed-cdr3-lengths 66:33\" --n-sim-events-list 10 --inference-extra-args=\"--debug 1 --sw-debug 0\" --perf-metrics time-reqd --final-plot-xvar biggest-logprob-cluster-to-calculate --bcrham-time $common
|
34
|
+
# echo $bin --label lcdr3 --version v0 --n-replicates 2 --n-sim-events-list 1000 --single-light-locus igk --base-outdir /fh/fast/matsen_e/dralph/partis/paired-loci --allowed-cdr3-lengths ighM9-22:ighM15-34:ighM36-43:ighM45-52:ighM54-61:ighM63-73 $common
|
35
|
+
# echo $bin --label lcdr3 --version v2 --n-replicates 2 --n-sim-events-list 5000 --single-light-locus igk --base-outdir /fh/fast/matsen_e/dralph/partis/paired-loci --allowed-cdr3-lengths ighM24-25:ighM27-28:ighM30-31:ighM48-49 --simu-extra-args=\"--n-leaves 1 --constant-number-of-leaves\" $common
|
36
|
+
# echo $bin --label lcdr3 --version one-j-v2 --n-replicates 2 --n-sim-events-list 5000 --single-light-locus igk --base-outdir /fh/fast/matsen_e/dralph/partis/paired-loci --allowed-cdr3-lengths ighM24-25:ighM27-28:ighM30-31:ighM48-49 --simu-extra-args=\"--only-genes=IGHJ4*01:IGKJ1*01 --n-genes-per-region ::1 --n-leaves 1 --constant-number-of-leaves\" $common # --inference-extra-args=\"--add-pairwise-clustering-metrics\"
|
37
|
+
# echo $bin --label lcdr3 --version all-together --n-replicates 2 --n-sim-events-list 5000 --single-light-locus igk --base-outdir /fh/fast/matsen_e/dralph/partis/paired-loci --allowed-cdr3-lengths ighM24-25,ighM27-28,ighM30-31 --simu-extra-args=\"--n-leaves 1 --constant-number-of-leaves\" --final-plot-xvar allowed-cdr3-lengths $common
|
38
|
+
# echo $bin --label test-data-in --version v0 --data-in-cfg datascripts/meta/spisak-simu/samples.yaml --dataset-in-list cdr_18_set_1-simu-v0:cdr_21_set_1-simu-v0:cdr_24_set_1-simu-v0:cdr_42_set_1-simu-v0 $common # --extra-args=\"--add-pairwise-clustering-metrics\"
|
39
|
+
exit 0
|
40
|
+
|
41
|
+
# ----------------------------------------------------------------------------------------
|
42
|
+
# data
|
43
|
+
dvsn=v1 #test-ctnt
|
44
|
+
simvsn=v0
|
45
|
+
|
46
|
+
# bin=./datascripts/run.py
|
47
|
+
# common="--study 10x-examples --version $dvsn --paired-loci --n-procs 20 --no-slurm" # --only-csv-plots"
|
48
|
+
# infxtra='--extra-args=\"--make-per-gene-plots\"'
|
49
|
+
# # $bin cache-parameters $common --no-simu $infxtra
|
50
|
+
# # $bin partition $common --no-simu $infxtra
|
51
|
+
# # $bin simulate $common --logstr $simvsn # --extra-args="--paired-correlation-values v_gene.d_gene,0.5:v_gene.j_gene,0.5"
|
52
|
+
# # $bin cache-parameters $common $infxtra
|
53
|
+
# # $bin partition $common $infxtra
|
54
|
+
# exit 0
|
55
|
+
|
56
|
+
bin=./bin/compare-plotdirs.py
|
57
|
+
fsddir=/fh/fast/matsen_e/processed-data/partis
|
58
|
+
bidir=$fsddir/10x-examples/$dvsn
|
59
|
+
bodir=$bidir/comparisons
|
60
|
+
|
61
|
+
tsubj=hs-1-postvax
|
62
|
+
tpd=$bidir/comparisons/$tsubj
|
63
|
+
$bin --outdir $bodir/ins-del-lens-all/$tsubj --plotdirs $bidir/$tsubj/igh+igk/plots/igh/overall:$bidir/$tsubj/igh+igk/plots/igk/overall:$bidir/$tsubj/igh+igl/plots/igl/overall --names igh:igk:igl --normalize & # --extra-stats mean
|
64
|
+
$bin --outdir $bodir/ins-del-lens-igh/$tsubj --plotdirs $bidir/$tsubj/igh+igk/plots/igh/overall --names igh --normalize --single-plotdir & # --extra-stats mean
|
65
|
+
exit 0
|
66
|
+
|
67
|
+
for subj in hs-1-postvax hs-1-prevax hs-2-pbmc mm-balbc; do #hs-1-prevax; do #mm-balbc
|
68
|
+
for ltmp in igh igk; do # igl
|
69
|
+
# ddir=$bidir/$subj/single-chain/plots/$ltmp/parameters/hmm
|
70
|
+
ddir=$bidir/$subj/igh+igk/plots/$ltmp
|
71
|
+
sdir=$bidir/$subj-simu-$simvsn/single-chain/plots/$ltmp/parameters/true
|
72
|
+
for subd in overall mute-freqs; do
|
73
|
+
# for subd in mute-freqs/per-gene-per-position/v mute-freqs/per-gene-per-position/d mute-freqs/per-gene-per-position/j; do
|
74
|
+
$bin --outdir $bodir/$subj/$subd-$ltmp --plotdirs $ddir/$subd:$sdir/$subd --names data:simu --normalize & # --extra-stats mean
|
75
|
+
done
|
76
|
+
# break
|
77
|
+
done
|
78
|
+
# for ltmp in igh igk igl; do
|
79
|
+
# # single-chain/plots/igh/partitions/sizes
|
80
|
+
# # plots/igh/partitions/sizes
|
81
|
+
# substr=plots/$ltmp/partitions/sizes
|
82
|
+
# ddir=$bidir/$subj #/single-chain/$substr
|
83
|
+
# sdir=$bidir/$subj-simu-$simvsn #/single-chain/$substr
|
84
|
+
# subd=cluster-sizes
|
85
|
+
# echo $bin --outdir $bodir/$subj/$subd-$ltmp --plotdirs $sdir/single-chain/$substr:$ddir/single-chain/$substr:$sdir/$substr:$ddir/$substr --names simu@single:data@single:simu@joint:data@joint \
|
86
|
+
# --add-to-title $ltmp --log xy --colors '#006600:#2b65ec:#990012:black' --linewidths 5:2:5:2 &
|
87
|
+
# done
|
88
|
+
done
|
89
|
+
exit 0
|
90
|
+
|
91
|
+
for subjdir in $bidir/hs-1-postvax $fsddir/goo-dengue-10x/v12/d-14; do
|
92
|
+
pstr=paired-seqs-per-seq
|
93
|
+
cfd=$bodir/`basename $subjdir`/$pstr
|
94
|
+
# for tstr in before after; do
|
95
|
+
# mkdir -p $cfd/$tstr
|
96
|
+
# cp -v $subjdir/plots/$pstr-$tstr.csv $cfd/$tstr/$pstr.csv
|
97
|
+
# done
|
98
|
+
$bin --outdir $cfd/cf --plotdirs $cfd/after:$cfd/before --names after:before --normalize --colors='#006600:#990012' --xbounds=-0.5:9.5 --ybounds=0:0.9 --xticks 0:1:2:3:4:5:6:7:8:9 --no-errors --square-bins --ytitle="fraction of seqs" & # --add-to-title `basename $subjdir`
|
99
|
+
done
|
100
|
+
|
bin/run-tree-metrics.sh
ADDED
@@ -0,0 +1,88 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
bin=./test/cf-tree-metrics.py
|
4
|
+
|
5
|
+
label=v0
|
6
|
+
testlabel=test-old
|
7
|
+
testargs="--n-sim-seqs-per-gen-list 50:125 --lb-tau-list 0.002:0.003 --obs-times 100 --carry-cap 1000 --n-generations-list 4:5"
|
8
|
+
|
9
|
+
# # $bin --actions get-lb-bounds --label $label #--make-plots
|
10
|
+
# # $bin --actions get-lb-bounds --label $testlabel $testargs --make-plots
|
11
|
+
# # echo $bin --actions get-lb-bounds --seq-len 133 --label aa-lb-bounds-v0 --make-plots
|
12
|
+
# # echo $bin --actions get-lb-bounds --seq-len 133 --label $testlabel $testargs --make-plots
|
13
|
+
# for slen in 1200; do #300 400 500 600 700 900; do # redo normalization for different seq lengths
|
14
|
+
# # 1200 doesn't work, some %.xf print things i think need to be changed
|
15
|
+
# $bin --actions get-lb-bounds --seq-len $slen --label generalize-lb-bounds-$slen --lb-tau-list auto --make-plots >$slen.log #--make-plots
|
16
|
+
# done
|
17
|
+
# exit 0
|
18
|
+
|
19
|
+
# echo $bin --label $testlabel $testargs --only-csv-plots
|
20
|
+
# echo $bin --label $label --n-replicates 3 --only-csv-plots
|
21
|
+
|
22
|
+
dtv=3; nest=100; depth=10 # dtv=3; nest=30; depth=10 # dtv=2; nest=100; depth=5
|
23
|
+
# mm=dtr; dtr_args="--dtr-path /fh/fast/matsen_e/dralph/partis/tree-metrics/dtr-train-v$dtv/seed-0/dtr/train_n-estimators_${nest}_max-depth_${depth}-dtr-models --extra-plotstr v$dtv-$nest-$depth"
|
24
|
+
# dtr_args="--actions plot --plot-metrics dtr --plot-metric-extra-strs v3-100-10" # :dtr:dtr # :v2-100-5:v3-30-10
|
25
|
+
|
26
|
+
# for mm in lbr aa-lbr; do #aa-lbi aa-lbr shm delta-lbi cons-dist-aa cons-dist-nuc; do
|
27
|
+
common="--only-csv-plots --n-max-procs 25 $dtr_args"
|
28
|
+
# common="--actions get-tree-metrics --only-csv-plots --n-max-procs 25 --metric-method $mm $dtr_args" # # --no-tree-plots --slurm
|
29
|
+
# common="--actions plot --plot-metrics lbr:aa-lbr --distr-hist-limit N:3" # cons-dist-nuc:cons-dist-aa"
|
30
|
+
# common="--actions plot --plot-metrics shm:delta-lbi:lbi:lbr:cons-dist-aa:cons-dist-nuc:aa-lbi:aa-lbr" # --plot-metric-extra-strs ::::"
|
31
|
+
# common="--actions combine-plots --plot-metrics shm:delta-lbi:lbi:lbr:cons-dist-aa:cons-dist-nuc:aa-lbi:aa-lbr:dtr --combo-extra-str with-dtr-and-aa-lb --plot-metric-extra-strs ::::::::v3-100-10 --dont-plot-extra-str"
|
32
|
+
# common="--actions combine-plots --plot-metrics shm:delta-lbi:lbi:lbr:cons-dist-aa:cons-dist-nuc:aa-lbi:aa-lbr --combo-extra-str with-aa-lb" #-pvk-$pvk --pvks-to-plot $pvk" # :dtr --plot-metric-extra-strs :::::v3-100-10 --dont-plot-extra-str
|
33
|
+
common="$common --base-outdir /fh/local/dralph/partis/tree-metrics"
|
34
|
+
# ----------------------------------------------------------------------------------------
|
35
|
+
# $bin --label vary-metric-v2 --n-replicates 30 --n-sim-events-per-proc 10 --carry-cap-list 500 --obs-times-list 50:100:500 --n-sim-seqs-per-gen-list 100 --metric-for-target-distance-list aa:aa-sim-blosum --lb-tau-list 0.0025 --final-plot-xvar obs-times --include-relative-affy-plots $common # --pvks-to-plot aa-sim-blosum
|
36
|
+
# echo $bin --label tau-vs-obs-frac-v1 --n-replicates 30 --n-sim-events-per-proc 10 --carry-cap-list 1000 --obs-times-list 150 --n-sim-seqs-per-gen-list 30:50:100:200 --legend-var obs_frac $common # rerun of "v2" (at top), but redoing things since plots look a bit weird on v2 when I remake them
|
37
|
+
echo $bin --label vary-selection-strength-v1 --n-replicates 30 --n-sim-events-per-proc 10 --carry-cap-list 1000 --obs-times-list 150 --n-sim-seqs-per-gen-list 100 --selection-strength-list 0.1:0.4:0.7:0.8:0.9:1.0 --final-plot-xvar selection-strength $common # all the other tau values are also there, but I decided I wanted selection strength on the x axis NOTE the other tau values will have lbr-tau-factor 1 and non-normalized lbi, so you *really* need to not mix them with 0.0025 ## --lb-tau-list 0.0025
|
38
|
+
# $bin --label carry-cap-vs-n-obs-v1 --n-replicates 30 --n-sim-events-per-proc 10 --carry-cap-list 500:1000:3000 --obs-times-list 150 --n-sim-seqs-per-gen-list 30:75:150:500 --lb-tau-list 0.0025 --final-plot-xvar carry-cap $common --pvks-to-plot 30 # full carry cap: 250:500:1000:3000 and n/gen: 15:30:75:150:500 lists (not plotting them all)
|
39
|
+
# $bin --label vary-obs-times-v2 --n-replicates 30 --n-sim-events-per-proc 10 --carry-cap-list 350:1000:2000 --obs-times-list 50:100:250:500:1000:3000 --n-sim-seqs-per-gen-list 100 --lb-tau-list 0.0025 --final-plot-xvar obs-times $common --pvks-to-plot 350 # full carry-cap-list 250:350:500:1000:2000
|
40
|
+
# $bin --label vary-obs-times-v3 --n-replicates 30 --n-sim-events-per-proc 10 --carry-cap-list 350:1000:2000 --obs-times-list 50,100,150,200,250:100,200,300,400,500:200,400,600,800,1000:600,1200,1800,2400,3000 --n-sim-seqs-per-gen-list 20 --lb-tau-list 0.0025 --final-plot-xvar obs-times --include-relative-affy-plots $common --pvks-to-plot 1000 # full carry-cap-list 250:350:500:1000:2000
|
41
|
+
# $bin --label vary-obs-times-v4 --n-replicates 30 --n-sim-events-per-proc 10 --min-target-distance-list 2 --carry-cap-list 350:1000:2000 --obs-times-list 50,100,150,200,250:100,200,300,400,500:200,400,600,800,1000:600,1200,1800,2400,3000 --n-sim-seqs-per-gen-list 20 --lb-tau-list 0.0025 --final-plot-xvar obs-times --include-relative-affy-plots $common --pvks-to-plot 1000
|
42
|
+
# $bin --label vary-obs-times-v5 --n-replicates 30 --n-sim-events-per-proc 10 --min-target-distance-list 2 --carry-cap-list 350:1000:2000 --obs-times-list 50:100:250:500:1000:3000 --n-sim-seqs-per-gen-list 100 --lb-tau-list 0.0025 --final-plot-xvar obs-times $common --pvks-to-plot 350
|
43
|
+
# $bin --label vary-sampling-scheme-v1 --n-replicates 30 --n-sim-events-per-proc 10 --carry-cap-list 1000 --obs-times-list 150 --n-sim-seqs-per-gen-list 30:50:100:200 --leaf-sampling-scheme-list uniform-random:affinity-biased:high-affinity --lb-tau-list 0.0025 --final-plot-xvar n-sim-seqs-per-gen $common --pvks-to-plot high-affinity
|
44
|
+
# $bin --label vary-n-targets-v0 --n-replicates 30 --n-sim-events-per-proc 10 --carry-cap-list 1000 --obs-times-list 50:100:250:500 --n-sim-seqs-per-gen-list 100 --target-count-list 1:2:4 --lb-tau-list 0.0025 --final-plot-xvar obs-times $common # --pvks-to-plot 4
|
45
|
+
# $bin --label vary-n-targets-v1 --n-replicates 30 --n-sim-events-per-proc 10 --carry-cap-list 1000 --obs-times-list 50:100:250:500 --n-sim-seqs-per-gen-list 100 --target-count-list 4:8:16 --n-target-clusters-list 1:2:4 --zip-vars target-count:n-target-clusters --lb-tau-list 0.0025 --final-plot-xvar obs-times $common --pvks-to-plot="4; 16"
|
46
|
+
# $bin --label vary-context-dependence-v1 --n-replicates 5 --n-sim-events-per-proc 50 --carry-cap-list 350 --obs-times-list 100:350 --n-sim-seqs-per-gen-list 100 --context-depend-list 0:1 --lb-tau-list 0.0025 $common --final-plot-xvar obs-times --n-sub-procs 10 # --pvks-to-plot 0 (and 1), do 'em separately
|
47
|
+
# $bin --label among-family-variation-v0 --n-replicates 30 --n-sim-events-per-proc 10 --carry-cap-list 1000 --obs-times-list 250 --n-sim-seqs-per-gen-list 0 --lb-tau-list 0.0025 --parameter-variances-list n-sim-seqs-per-generation,15..15:n-sim-seqs-per-generation,50..50:n-sim-seqs-per-generation,250..250:n-sim-seqs-per-generation,25..50..75:n-sim-seqs-per-generation,15..50..150..500 --final-plot-xvar parameter-variances $common
|
48
|
+
# $bin --label among-family-variation-v1 --n-replicates 15 --n-sim-events-per-proc 10 --carry-cap-list 1000 --obs-times-list 250 --n-sim-seqs-per-gen-list 150 --lb-tau-list 0.0025 --parameter-variances-list selection-strength,0.25..0.25:selection-strength,0.75..0.75:selection-strength,1.0..1.0:selection-strength,0.5..0.75..1.0:selection-strength,0.25..0.67..1.0:selection-strength,0.1..0.25..0.75..1.0 --final-plot-xvar parameter-variances $common
|
49
|
+
# $bin --label among-family-variation-v2 --n-replicates 15 --n-sim-events-per-proc 10 --carry-cap-list 1000 --obs-times-list=-1 --n-sim-seqs-per-gen-list 150 --lb-tau-list 0.0025 --parameter-variances-list obs-times,50..50:obs-times,150..150:obs-times,500..500:obs-times,50..100..200:obs-times,50..150..500..1000 --final-plot-xvar parameter-variances $common
|
50
|
+
# $bin --label among-family-variation-v4 --n-replicates 15 --n-sim-events-per-proc 10 --carry-cap-list 1000 --obs-times-list=-1 --n-sim-seqs-per-gen-list=-1 --lb-tau-list 0.0025 --parameter-variances-list n-sim-seqs-per-generation,150..150_c_selection-strength,0.75..0.75_c_obs-times,250..250:n-sim-seqs-per-generation,50..150..300_c_selection-strength,0.5..0.75..1.0_c_obs-times,100..250..500:n-sim-seqs-per-generation,25..150..500_c_selection-strength,0.25..0.75..1.0_c_obs-times,50..250..1000 --final-plot-xvar parameter-variances $common
|
51
|
+
# echo $bin --label true-vs-inferred-v0 --n-replicates 2 --n-sim-events-per-proc 30 --carry-cap-list 500 --obs-times-list 150:1500 --n-sim-seqs-per-gen-list 100 --lb-tau-list 0.0025 $common # NOTE to get the true vs inferred plots you have to turn off --only-csv-plots for get-tree-metrics (and probably turn on --no-tree-plots) and uncomment two fcn calls in treeutils.plot_tree_metrics(): lbplotting.plot_true_vs_inferred_lb() and lbplotting.plot_cons_seq_accuracy()
|
52
|
+
# echo $bin --label true-vs-inferred-v1 --n-replicates 2 --n-sim-events-per-proc 50 --carry-cap-list 500:2000 --obs-times-list 150:1500 --n-sim-seqs-per-gen-list 100 --lb-tau-list 0.0025 $common
|
53
|
+
# echo $bin --label true-vs-inferred-v2 --n-replicates 2 --n-sim-events-per-proc 50 --carry-cap-list 500:2000 --obs-times-list 150:1500 --n-sim-seqs-per-gen-list 100 --min-target-distance-list 0:2:5 --lb-tau-list 0.0025 $common
|
54
|
+
# $bin --label fix-lbr-tau-sample-time-v0 --n-replicates 10 --n-sim-events-per-proc 50 --carry-cap-list 1000 --obs-times-list 100:350:50,75,100,150:100,150,300,350 --n-sim-seqs-per-gen-list 100:100:25:25 --zip-vars obs-times:n-sim-seqs-per-gen $common
|
55
|
+
# $bin --label fix-lbr-tau-fraction-sampled-v0 --n-replicates 10 --n-sim-events-per-proc 50 --carry-cap-list 1000 --obs-times-list 50,100,150 --n-sim-seqs-per-gen-list 10:25:50:100:300 $common
|
56
|
+
# echo $bin --label fix-lbr-tau-obs-time-v0 --n-replicates 5 --n-sim-events-per-proc 30 --carry-cap-list 1000 --obs-times-list 100:250:500 --n-sim-seqs-per-gen-list 100:200 $common
|
57
|
+
# echo $bin --label fix-lbr-tau-fraction-sampled-v1 --n-replicates 10 --n-sim-events-per-proc 10 --carry-cap-list 1000 --obs-times-list 100,150,200,250,300 --n-sim-seqs-per-gen-list 10:50 $common
|
58
|
+
# --final-plot-xvar n-sim-seqs-per-gen --pvks-to-plot 0.002
|
59
|
+
|
60
|
+
# some of these may have had different n max queries for different steps (but n max queries was only set for the steps where its value isn't very important, i.e. it was never set for training)
|
61
|
+
# common="--actions bcr-phylo --bcr-phylo-actions cache-parameters:partition --only-csv-plots" # --base-outdir /fh/local/dralph/partis/tree-metrics" # --sub-slurm" # /loc/scratch/dralph/partis/tree-metrics
|
62
|
+
common="$common --n-max-queries 150000" # $common --iseed 1" for the dtr training samples we usually only want to test on 150k or so queries (and also sometimes need to restrict to certain seeds to avoid training samples)
|
63
|
+
# echo $bin --label dtr-train-v0 --n-replicates 5 --n-sim-events-per-proc 1000 --carry-cap-list 1500 --obs-times-list 150 --n-sim-seqs-per-gen-list 150 --selection-strength 0.75 --lb-tau-list 0.0025 --no-scan-parameter-variances carry-cap,2000:obs-times,150:n-sim-seqs-per-generation,200:selection-strength,0.5 $common &
|
64
|
+
# echo $bin --label dtr-train-v1 --n-replicates 5 --n-sub-procs 30 --n-sim-events-per-proc 50000 --carry-cap-list 1500 --obs-times-list 150 --n-sim-seqs-per-gen-list 30 --selection-strength 0.75 --lb-tau-list 0.0025 --no-scan-parameter-variances carry-cap,2000:obs-times,150:n-sim-seqs-per-generation,15:selection-strength,0.5 $common &
|
65
|
+
# echo $bin --label dtr-train-v2 --n-replicates 2 --n-sub-procs 15 --n-sim-events-per-proc 300000 --carry-cap-list 1500 --obs-times-list 150 --n-sim-seqs-per-gen-list 20 --selection-strength 0.75 --lb-tau-list 0.0025 --no-scan-parameter-variances carry-cap,2000:obs-times,150:n-sim-seqs-per-generation,15:selection-strength,0.5 $common &
|
66
|
+
# echo $bin --label dtr-train-v3 --n-replicates 2 --n-sub-procs 25 --n-sim-events-per-proc 50000 --carry-cap-list=-1 --obs-times-list=-1 --n-sim-seqs-per-gen-list=-1 --selection-strength=-1. --lb-tau-list 0.0025 --no-scan-parameter-variances carry-cap,250..500..900..1000..1100..1500..5000:obs-times,75..100..150..200..1000:n-sim-seqs-per-generation,15..30..75..150..500:selection-strength,0.5..0.9..0.95..1.0 $common # NOTE made a second replicate (iseed 1) with only 1000 events, just for testing
|
67
|
+
# done
|
68
|
+
|
69
|
+
|
70
|
+
# ----------------------------------------------------------------------------------------
|
71
|
+
# old ones:
|
72
|
+
# echo $bin --label vary-carry-cap-v0 --n-replicates 10 --n-sim-events-per-proc 10 --carry-cap-list 500:750:1000:2000:5000 --obs-times-list 100,200 --n-sim-seqs-per-gen-list 75 --include-relative-affy-plots $common
|
73
|
+
# echo $bin --label vary-obs-times-v0 --n-replicates 10 --n-sim-events-per-proc 10 --carry-cap-list 1000 --obs-times-list 100:200:300:100,150:200,250:100,200,300 --n-sim-seqs-per-gen-list 100:100:100:50:50:33 --zip-vars obs-times:n-sim-seqs-per-gen --include-relative-affy-plots $common
|
74
|
+
# echo $bin --label vary-obs-times-v1 --n-replicates 10 --n-sim-events-per-proc 10 --carry-cap-list 1000 --obs-times-list 300:100,200,300:200,250,300 --n-sim-seqs-per-gen-list 100:33:33 --zip-vars obs-times:n-sim-seqs-per-gen --include-relative-affy-plots $common
|
75
|
+
# echo $bin --label vary-obs-frac-v0 --n-replicates 30 --n-sim-events-per-proc 10 --carry-cap-list 1000 --obs-times-list 150 --n-sim-seqs-per-gen-list 30:50:75:100:150:200 $common
|
76
|
+
# echo $bin --label vary-metric-v0 --n-replicates 30 --n-sim-events-per-proc 10 --carry-cap-list 1000 --obs-times-list 150 --n-sim-seqs-per-gen-list 100 --metric-for-target-distance-list aa:aa-sim-blosum --include-relative-affy-plots $common
|
77
|
+
# echo $bin --label vary-metric-v1 --n-replicates 30 --n-sim-events-per-proc 10 --carry-cap-list 1000 --obs-times-list 500 --n-sim-seqs-per-gen-list 100 --metric-for-target-distance-list aa:aa-sim-blosum --include-relative-affy-plots $common --pvks-to-plot aa-sim-blosum # seeing if different parameters will change the fact that lbi does better than cons-dist-aa (as in vary-metric-v0)
|
78
|
+
# echo $bin --label vary-selection-strength-v0 --n-replicates 30 --n-sim-events-per-proc 10 --carry-cap-list 1000 --obs-times-list 150 --n-sim-seqs-per-gen-list 100 --selection-strength-list 0.1:0.4:0.7:0.8:0.9:1.0 $common
|
79
|
+
# echo $bin --label carry-cap-vs-n-obs-v0 --n-replicates 30 --n-sim-events-per-proc 10 --carry-cap-list 260:260:500:500:700:700:1500:1500:3000:3000 --obs-times-list 150 --n-sim-seqs-per-gen-list 13:26:25:50:35:70:75:150:150:300 --lb-tau-list 0.0025 --zip-vars carry-cap:n-sim-seqs-per-gen --final-plot-xvar carry-cap --legend-var obs_frac $common
|
80
|
+
# echo $bin --label carry-cap-vs-n-obs-only-leaves-v0 --n-replicates 30 --n-sim-events-per-proc 10 --carry-cap-list 250:500:1000:3000 --obs-times-list 150 --n-sim-seqs-per-gen-list 15:75:500 --lb-tau-list 0.0025 --dont-observe-common-ancestors --final-plot-xvar carry-cap $common
|
81
|
+
# echo $bin --label choose-among-families-v1 --n-replicates 30 --n-sim-events-per-proc 30 --carry-cap-list 1500 --obs-times-list 150 --n-sim-seqs-per-gen-list 150 --selection-strength 0.75 --lb-tau-list 0.0025 --dont-observe-common-ancestors --no-scan-parameter-variances carry-cap,2000:obs-times,150:n-sim-seqs-per-generation,200:selection-strength,0.5 $common
|
82
|
+
# echo $bin --label choose-among-families-v2 --n-replicates 10 --n-sim-events-per-proc 30 --carry-cap-list 1500 --obs-times-list 150 --n-sim-seqs-per-gen-list 150 --selection-strength 0.75 --lb-tau-list 0.0025 --dont-observe-common-ancestors --no-scan-parameter-variances selection-strength,0.5 $common
|
83
|
+
# echo $bin --label choose-among-families-v3 --n-replicates 10 --n-sim-events-per-proc 30 --carry-cap-list 1500 --obs-times-list 150 --n-sim-seqs-per-gen-list 150 --lb-tau-list 0.0025 --dont-observe-common-ancestors $common
|
84
|
+
# echo $bin --label choose-among-families-v4 --n-replicates 10 --n-sim-events-per-proc 150 --carry-cap-list 1500 --obs-times-list 150 --n-sim-seqs-per-gen-list 150 --lb-tau-list 0.0025 --dont-observe-common-ancestors $common
|
85
|
+
# echo $bin --label choose-among-families-v5 --n-replicates 10 --n-sim-events-per-proc 150 --carry-cap-list 1500 --obs-times-list 150 --n-sim-seqs-per-gen-list 150 --lb-tau-list 0.0025 $common
|
86
|
+
# echo $bin --label vary-sampling-scheme-v0 --n-replicates 30 --n-sim-events-per-proc 10 --carry-cap-list 1000 --obs-times-list 150 --n-sim-seqs-per-gen-list 100 --leaf-sampling-scheme-list uniform-random:affinity-biased:high-affinity --lb-tau-list 0.0025 --include-relative-affy-plots $common
|
87
|
+
# echo $bin --label vary-context-dependence-v0 --n-replicates 5 --n-sim-events-per-proc 10 --carry-cap-list 350 --obs-times-list 100 --n-sim-seqs-per-gen-list 30 --context-depend-list 0:1 --lb-tau-list 0.0025 $common --n-sub-procs 10
|
88
|
+
# $bin --label among-family-variation-v3 --n-replicates 15 --n-sim-events-per-proc 10 --carry-cap-list 1000 --obs-times-list=-1 --n-sim-seqs-per-gen-list 150 --lb-tau-list 0.0025 --parameter-variances-list obs-times,50..50:obs-times,150..150:obs-times,500..500:obs-times,100..150..250:obs-times,50..150..250..500 --final-plot-xvar parameter-variances $common
|
bin/smetric-run.py
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
from __future__ import absolute_import, division, unicode_literals
|
3
|
+
from __future__ import print_function
|
4
|
+
import sys
|
5
|
+
import colored_traceback.always
|
6
|
+
import os
|
7
|
+
import yaml
|
8
|
+
import argparse
|
9
|
+
import numpy
|
10
|
+
|
11
|
+
sys.path.insert(1, '.') #'./python')
|
12
|
+
import python.utils as utils
|
13
|
+
import python.treeutils as treeutils
|
14
|
+
|
15
|
+
# NOTE this only really works on simulation, although it maybe wouldn't take much work to get it working on data
|
16
|
+
|
17
|
+
parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter)
|
18
|
+
parser.add_argument('--infname', required=True)
|
19
|
+
parser.add_argument('--base-plotdir', required=True)
|
20
|
+
parser.add_argument('--lb-tau', type=float)
|
21
|
+
parser.add_argument('--dont-normalize-lbi', action='store_true')
|
22
|
+
parser.add_argument('--action', choices=['train', 'test'])
|
23
|
+
parser.add_argument('--dtr-path')
|
24
|
+
parser.add_argument('--metric-method', default='aa-lbi')
|
25
|
+
parser.add_argument('--dtr-cfg')
|
26
|
+
parser.add_argument('--only-csv-plots', action='store_true')
|
27
|
+
parser.add_argument('--n-max-queries', type=int, default=-1)
|
28
|
+
parser.add_argument('--max-family-size', type=int, help='subset each family down to this size before passing to treeutils')
|
29
|
+
parser.add_argument('--cluster-indices')
|
30
|
+
parser.add_argument('--min-selection-metric-cluster-size', type=int, default=treeutils.default_min_selection_metric_cluster_size)
|
31
|
+
parser.add_argument('--include-relative-affy-plots', action='store_true')
|
32
|
+
parser.add_argument('--make-tree-plots', action='store_true')
|
33
|
+
parser.add_argument('--only-look-upwards', action='store_true')
|
34
|
+
parser.add_argument('--label-tree-nodes', action='store_true')
|
35
|
+
parser.add_argument('--label-root-node', action='store_true')
|
36
|
+
parser.add_argument('--selection-metric-plot-cfg', default= ':'.join(treeutils.default_plot_cfg))
|
37
|
+
args = parser.parse_args()
|
38
|
+
# NOTE extra required args are set in treeutils plot_tree_metrics()
|
39
|
+
args.cluster_indices = utils.get_arg_list(args.cluster_indices, intify_with_ranges=True)
|
40
|
+
args.selection_metric_plot_cfg = utils.get_arg_list(args.selection_metric_plot_cfg, choices=treeutils.all_plot_cfg)
|
41
|
+
workdir = None, None
|
42
|
+
if args.make_tree_plots or 'tree' in args.selection_metric_plot_cfg:
|
43
|
+
workdir = utils.choose_random_subdir('/tmp/%s/tree-metrics' % os.getenv('USER'))
|
44
|
+
|
45
|
+
if args.n_max_queries != -1:
|
46
|
+
print(' --n-max-queries set to %d' % args.n_max_queries)
|
47
|
+
glfo, true_lines, _ = utils.read_output(args.infname, n_max_queries=args.n_max_queries)
|
48
|
+
|
49
|
+
# numpy.random.seed(1)
|
50
|
+
if args.max_family_size is not None:
|
51
|
+
for line in [l for l in true_lines if len(l['unique_ids']) > args.max_family_size]:
|
52
|
+
iseqs_to_keep = numpy.random.choice(range(len(line['unique_ids'])), size=args.max_family_size, replace=False)
|
53
|
+
utils.restrict_to_iseqs(line, iseqs_to_keep, glfo)
|
54
|
+
|
55
|
+
if args.metric_method == 'dtr':
|
56
|
+
raise Exception('I think the [new] first arg here (metrics_to_calc) isn\'t right, but don\'t want to test cause i don\'t care about dtr')
|
57
|
+
treeutils.add_smetrics(args, ['lbi', 'lbr', 'dtr'], None, args.lb_tau, base_plotdir=args.base_plotdir,
|
58
|
+
train_dtr=args.action=='train', dtr_cfg=args.dtr_cfg, true_lines_to_use=true_lines, workdir=workdir) # NOTE if you need this in the future you may want to add tree_inference_method 'gctree' and tree_inference_outdir
|
59
|
+
else:
|
60
|
+
treeutils.calculate_individual_tree_metrics(args.metric_method, true_lines, base_plotdir=args.base_plotdir, lb_tau=args.lb_tau, only_csv=args.only_csv_plots,
|
61
|
+
min_cluster_size=args.min_selection_metric_cluster_size, include_relative_affy_plots=args.include_relative_affy_plots,
|
62
|
+
dont_normalize_lbi=args.dont_normalize_lbi, workdir=workdir, cluster_indices=args.cluster_indices, only_look_upwards=args.only_look_upwards, args=args) #, debug=True)
|