partis-bcr 1.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. bin/FastTree +0 -0
  2. bin/add-chimeras.py +59 -0
  3. bin/add-seqs-to-outputs.py +81 -0
  4. bin/bcr-phylo-run.py +799 -0
  5. bin/build.sh +24 -0
  6. bin/cf-alleles.py +97 -0
  7. bin/cf-germlines.py +57 -0
  8. bin/cf-linearham.py +199 -0
  9. bin/chimera-plot.py +76 -0
  10. bin/choose-partially-paired.py +143 -0
  11. bin/circle-plots.py +30 -0
  12. bin/compare-plotdirs.py +298 -0
  13. bin/diff-parameters.py +133 -0
  14. bin/docker-hub-push.sh +6 -0
  15. bin/extract-pairing-info.py +55 -0
  16. bin/gcdyn-simu-run.py +223 -0
  17. bin/gctree-run.py +244 -0
  18. bin/get-naive-probabilities.py +126 -0
  19. bin/iqtree-1.6.12 +0 -0
  20. bin/lonr.r +1020 -0
  21. bin/makeHtml +52 -0
  22. bin/mds-run.py +46 -0
  23. bin/parse-output.py +277 -0
  24. bin/partis +1869 -0
  25. bin/partis-pip +116 -0
  26. bin/partis.py +1869 -0
  27. bin/plot-gl-set-trees.py +519 -0
  28. bin/plot-hmms.py +151 -0
  29. bin/plot-lb-tree.py +427 -0
  30. bin/raxml-ng +0 -0
  31. bin/read-bcr-phylo-trees.py +38 -0
  32. bin/read-gctree-output.py +166 -0
  33. bin/run-chimeras.sh +64 -0
  34. bin/run-dtr-scan.sh +25 -0
  35. bin/run-paired-loci.sh +100 -0
  36. bin/run-tree-metrics.sh +88 -0
  37. bin/smetric-run.py +62 -0
  38. bin/split-loci.py +317 -0
  39. bin/swarm-2.1.13-linux-x86_64 +0 -0
  40. bin/test-germline-inference.py +425 -0
  41. bin/tree-perf-run.py +194 -0
  42. bin/vsearch-2.4.3-linux-x86_64 +0 -0
  43. bin/vsearch-2.4.3-macos-x86_64 +0 -0
  44. bin/xvfb-run +194 -0
  45. partis_bcr-1.0.1.data/scripts/cf-alleles.py +97 -0
  46. partis_bcr-1.0.1.data/scripts/cf-germlines.py +57 -0
  47. partis_bcr-1.0.1.data/scripts/extract-pairing-info.py +55 -0
  48. partis_bcr-1.0.1.data/scripts/gctree-run.py +244 -0
  49. partis_bcr-1.0.1.data/scripts/parse-output.py +277 -0
  50. partis_bcr-1.0.1.data/scripts/split-loci.py +317 -0
  51. partis_bcr-1.0.1.data/scripts/test.py +1005 -0
  52. {partis_bcr-1.0.0.dist-info → partis_bcr-1.0.1.dist-info}/METADATA +1 -1
  53. {partis_bcr-1.0.0.dist-info → partis_bcr-1.0.1.dist-info}/RECORD +101 -50
  54. partis_bcr-1.0.1.dist-info/top_level.txt +1 -0
  55. {partis → python}/glutils.py +1 -1
  56. python/main.py +30 -0
  57. {partis → python}/plotting.py +10 -1
  58. {partis → python}/treeutils.py +18 -16
  59. {partis → python}/utils.py +14 -7
  60. partis/main.py +0 -59
  61. partis_bcr-1.0.0.dist-info/top_level.txt +0 -1
  62. {partis_bcr-1.0.0.dist-info → partis_bcr-1.0.1.dist-info}/WHEEL +0 -0
  63. {partis_bcr-1.0.0.dist-info → partis_bcr-1.0.1.dist-info}/entry_points.txt +0 -0
  64. {partis_bcr-1.0.0.dist-info → partis_bcr-1.0.1.dist-info}/licenses/COPYING +0 -0
  65. {partis → python}/__init__.py +0 -0
  66. {partis → python}/alleleclusterer.py +0 -0
  67. {partis → python}/allelefinder.py +0 -0
  68. {partis → python}/alleleremover.py +0 -0
  69. {partis → python}/annotationclustering.py +0 -0
  70. {partis → python}/baseutils.py +0 -0
  71. {partis → python}/cache/__init__.py +0 -0
  72. {partis → python}/cache/cached_uncertainties.py +0 -0
  73. {partis → python}/clusterpath.py +0 -0
  74. {partis → python}/coar.py +0 -0
  75. {partis → python}/corrcounter.py +0 -0
  76. {partis → python}/datautils.py +0 -0
  77. {partis → python}/event.py +0 -0
  78. {partis → python}/fraction_uncertainty.py +0 -0
  79. {partis → python}/gex.py +0 -0
  80. {partis → python}/glomerator.py +0 -0
  81. {partis → python}/hist.py +0 -0
  82. {partis → python}/hmmwriter.py +0 -0
  83. {partis → python}/hutils.py +0 -0
  84. {partis → python}/indelutils.py +0 -0
  85. {partis → python}/lbplotting.py +0 -0
  86. {partis → python}/mds.py +0 -0
  87. {partis → python}/mutefreqer.py +0 -0
  88. {partis → python}/paircluster.py +0 -0
  89. {partis → python}/parametercounter.py +0 -0
  90. {partis → python}/paramutils.py +0 -0
  91. {partis → python}/partitiondriver.py +0 -0
  92. {partis → python}/partitionplotter.py +0 -0
  93. {partis → python}/performanceplotter.py +0 -0
  94. {partis → python}/plotconfig.py +0 -0
  95. {partis → python}/processargs.py +0 -0
  96. {partis → python}/prutils.py +0 -0
  97. {partis → python}/recombinator.py +0 -0
  98. {partis → python}/scanplot.py +0 -0
  99. {partis → python}/seqfileopener.py +0 -0
  100. {partis → python}/treegenerator.py +0 -0
  101. {partis → python}/viterbicluster.py +0 -0
  102. {partis → python}/vrc01.py +0 -0
  103. {partis → python}/waterer.py +0 -0
bin/run-chimeras.sh ADDED
@@ -0,0 +1,64 @@
1
+ #!/bin/bash
2
+
3
+ # ----------------------------------------------------------------------------------------
4
+ # dual barcode data from chaim/sai(?)
5
+ label=chimera-dual-barcode
6
+ datadir=/fh/fast/matsen_e/data/$label
7
+
8
+ outdir=$fs/partis/$label/fixed-d-synthesis
9
+ for fn in good chimeras; do
10
+ mkdir -p $outdir/$fn
11
+ # ./bin/partis cache-parameters --only-sm --infname $datadir/$fn.fa --parameter-dir $outdir/$fn/parameters --plotdir $outdir/$fn/plots --sw-cachefname $outdir/$fn/sw-cache.yaml --n-procs 5 --only-overall-plots --only-csv-plots >$outdir/$fn/log
12
+ ./bin/chimera-plot.py $outdir/$fn/sw-cache.yaml $outdir/$fn/chimera-plots --title $fn --chunk-len 75
13
+
14
+ # ./bin/partis simulate --outfname $outdir/$fn/simu.yaml --parameter-dir $outdir/$fn/parameters --parameter-type sw --n-sim-events 1000 --n-leaves 1 --constant-number-of-leaves
15
+ # ./bin/partis cache-parameters --is-simu --only-sm --infname $outdir/$fn/simu.yaml --parameter-dir $outdir/$fn/simu-parameters --plotdir $outdir/$fn/simu-plots --sw-cachefname $outdir/$fn/simu-parameters/sw-cache.yaml --n-procs 5 --only-overall-plots --only-csv-plots >$outdir/$fn/simu-log
16
+ # ./bin/add-chimeras.py $outdir/$fn/simu.yaml $outdir/$fn/simu-all-chimeras.fa --min-chunk-len 1
17
+ # ./bin/partis cache-parameters --only-sm --infname $outdir/$fn/simu-all-chimeras.fa --parameter-dir $outdir/$fn/simu-all-chimeras-parameters --plotdir $outdir/$fn/simu-all-chimeras-plots --sw-cachefname $outdir/$fn/simu-all-chimeras-parameters/sw-cache.yaml --n-procs 5 --only-overall-plots --only-csv-plots >$outdir/$fn/simu-all-chimeras-log
18
+ # ./bin/chimera-plot.py $outdir/$fn/simu-parameters/sw-cache.yaml $outdir/$fn/chimera-plots-simu
19
+ # ./bin/chimera-plot.py $outdir/$fn/simu-all-chimeras-parameters/sw-cache.yaml $outdir/$fn/chimera-plots-simu-all-chimeras
20
+ # ./bin/compare-plotdirs.py --outdir $outdir/comparison-plots/simu-all-chimeras-vs-none/$fn --log y --translegend 0:-0.2 --names no@chimeras:all@chimeras --plotdirs $outdir/$fn/chimera-plots-simu:$outdir/$fn/chimera-plots-simu-all-chimeras
21
+ done
22
+ ./bin/compare-plotdirs.py --outdir $outdir/comparison-plots-good-vs-chimeras --log y --translegend 0:-0.2 --names none:all@chimeras --plotdirs $outdir/good/chimera-plots:$outdir/chimeras/chimera-plots
23
+ # ./bin/compare-plotdirs.py --outdir $outdir/comparison-plots-no-indels --log y --translegend 0:-0.2 --names good:chimeras --plotdirs $outdir/good/chimera-plots:$outdir/chimeras/chimera-plots
24
+ # ./bin/compare-plotdirs.py --outdir $outdir/comparison-plots XXX --log y --translegend 0:-0.2 --names good:chimeras --plotdirs $outdir/good/chimera-plots:$outdir/chimeras/chimera-plots
25
+
26
+ # subd=sw/mute-freqs/overall
27
+ # ./bin/compare-plotdirs.py --outdir $outdir/comparison-plots/data-vs-simu/$subd --names good:good-simu:chimeras:chimeras-simu --plotdirs $outdir/good/plots/$subd:$outdir/good/simu-plots/$subd:$outdir/chimeras/plots/$subd:$outdir/chimeras/simu-plots/$subd
28
+
29
+ exit 0
30
+
31
+ # ----------------------------------------------------------------------------------------
32
+ # initial simulation study
33
+ pd=/fh/fast/matsen_e/dralph/partis/chimera-testing
34
+
35
+ simfn=test/reference-results/test/simu.csv
36
+ chfn=simu-all-chimeras
37
+
38
+ label=typical-simu
39
+ # ./bin/partis cache-parameters --only-sm --infname $simfn --parameter-dir $pd/parameters/$label --sw-cachefname $pd/parameters/$label/sw-cache.csv --n-procs 8
40
+ # ./bin/chimera-plot.py $pd/parameters/$label/sw-cache.csv $pd/plots/$label
41
+
42
+ # ./bin/add-chimeras.py $simfn $chfn.fa
43
+ # ./bin/partis cache-parameters --only-sm --infname $chfn.fa --parameter-dir $pd/parameters/$chfn --sw-cachefname $pd/parameters/$chfn/sw-cache.csv --n-procs 8
44
+ # ./bin/chimera-plot.py $pd/parameters/$chfn/sw-cache.csv $pd/plots/$chfn
45
+
46
+ # ./bin/compare-plotdirs.py --outdir $pd/plots/comparisons/simu --names typical:chimeras --plotdirs $pd/plots/$label:$pd/plots/$chfn
47
+ # ./bin/compare-plotdirs.py --outdir $pd/plots/comparisons/simu-log --log y --translegend 0:0.15 --names typical:chimeras --plotdirs $pd/plots/$label:$pd/plots/$chfn
48
+
49
+ # data
50
+ study=jason-mg
51
+ samples="MK02-igh MK03-igh MK08-igh AR03-igh AR04-igh HD10-igh HD13-igh"
52
+ # for sample in $samples; do
53
+ # echo $sample
54
+ # ./bin/chimera-plot.py /fh/fast/matsen_e/processed-data/partis/$study/gls-gen-paper-v10/$sample/sw-cache.csv $pd/plots/$study/$sample
55
+ # done
56
+ ./bin/compare-plotdirs.py --outdir $pd/plots/comparisons/$study-log --log y --translegend 0:-0.2 --names `echo $samples|sed 's/ /:/g'` --plotdirs $pd/plots/$study
57
+
58
+ study=jason-influenza
59
+ samples="FV-igh IB-igh GMC-igh"
60
+ # for sample in $samples; do
61
+ # echo $sample
62
+ # ./bin/chimera-plot.py /fh/fast/matsen_e/processed-data/partis/$study/gls-gen-paper-v10/$sample/sw-cache.csv $pd/plots/$study/$sample
63
+ # done
64
+ ./bin/compare-plotdirs.py --outdir $pd/plots/comparisons/$study-log --log y --translegend 0:0 --names `echo $samples|sed 's/ /:/g'` --plotdirs $pd/plots/$study
bin/run-dtr-scan.sh ADDED
@@ -0,0 +1,25 @@
1
+ #!/bin/bash
2
+
3
+ # action=test #train # plot
4
+ # for trv in v0 v1 v2 v3; do
5
+ # trlabel=dtr-train-$trv
6
+ # common=" --training-label $trlabel --n-max-queries 150000 --n-max-procs 7" # --cgroup among-families --tvar affinity"# ; trseed=0 --training-seed $trseed
7
+ # ./test/dtr-scan.py $action --label dtr-train-v3 $common
8
+ # done
9
+ # exit 0
10
+
11
+ action=plot #test # train
12
+ for cg in within-families among-families; do
13
+ for tv in affinity delta-affinity; do
14
+ lfn=_output/dtr-scan/$cg-$tv.txt
15
+ echo "" >$lfn
16
+ for trv in v0 v1 v2 v3; do
17
+ trlabel=dtr-train-$trv
18
+ common=" --training-label $trlabel --n-max-queries 150000 --n-max-procs 7 --cgroup $cg --tvar $tv" # ; trseed=0 --training-seed $trseed
19
+ ./test/dtr-scan.py $action --label dtr-train-v0 $common >>$lfn
20
+ ./test/dtr-scan.py $action --label dtr-train-v1 $common >>$lfn
21
+ ./test/dtr-scan.py $action --label dtr-train-v2 $common >>$lfn
22
+ ./test/dtr-scan.py $action --label dtr-train-v3 $common >>$lfn
23
+ done
24
+ done
25
+ done
bin/run-paired-loci.sh ADDED
@@ -0,0 +1,100 @@
1
+ #!/bin/bash
2
+
3
+ bin=./test/cf-paired-loci.py
4
+
5
+ # methods=synth-distance-0.03:synth-singletons-0.20:vjcdr3-0.8:enclone:mobille:scoper:vsearch-partition:partition # this is for vs-shm; for time-reqd: enclone:mobille:scoper:vsearch-partition:partition NOTE enclone needs fixing tho (for missing uids)
6
+ # methods=synth-distance-0.00:synth-distance-0.005:synth-distance-0.02:partition # this is for vs-shm; for time-reqd: enclone:mobille:scoper:vsearch-partition:partition NOTE enclone needs fixing tho (for missing uids)
7
+ methods=igblast:annotate:star-partition:partition:linearham # for test-antn imbal-v3
8
+ # methods=partition:single-chain-partis; xstr="--combo-extra-str single-vs-joint-partis"
9
+ # methods=scoper:single-chain-scoper; xstr="--combo-extra-str single-vs-joint-scoper" # NOTE this is only for vs-shm (comparing single vs joint); for time-reqd you only need scoper
10
+ # methods=simu:cache-parameters:partition:write-fake-paired-annotations:replay-plot:iqtree:raxml:igphyml:gctree:gctree-mut-mult:tree-perf # coar stuff cache-parameters:partition
11
+ astr="--actions $methods" #partition --merge-paired-partitions" #$methods"
12
+ # astr="--actions combine-plots --plot-metrics $methods --perf-metrics precision:n-clusters"
13
+ # astr="--actions combine-plots --plot-metrics $methods $xstr"
14
+ common="--n-sub-procs 15 --n-max-procs 5 --single-light-locus igk --base-outdir /fh/fast/matsen_e/dralph/partis/paired-loci $astr --dry" # /fh/local/dralph
15
+ # echo $bin --label vs-shm --version v3 --n-replicates 3 --n-leaves-list 3 --n-sim-events-list 10000 --scratch-mute-freq-list 0.01:0.05:0.10:0.20:0.30 --simu-extra-args=\"--flat-mute-freq --same-mute-freq-for-all-seqs --mutate-stop-codons\" --final-plot-xvar scratch-mute-freq $common # with these simu args, the scratch mute freq is almost identical to the final mean mfreq, so can just use the scratch mute freq on x axis
16
+ # # echo $bin --label vs-n-leaves --version v0 --n-replicates 3 --n-leaves-list 1:5:10:50 --n-sim-events-list 1000 --simu-extra-args="--constant-number-of-leaves" $common
17
+ # echo $bin --label vs-n-leaves --version v1 --n-replicates 3 --n-leaves-list 1:2:3:5:7:10:25:50 --n-sim-events-list 50 --antn-perf --perf-metrics naive-hdist --simu-extra-args="--constant-number-of-leaves" $common
18
+ # # echo $bin --label vs-n-leaves --version v2 --n-replicates 3 --n-leaves-list 1:2:3:5:7:25:50 --n-sim-events-list 50 --antn-perf --perf-metrics naive-hdist --scratch-mute-freq-list 0.15 --simu-extra-args="--constant-number-of-leaves --flat-mute-freq --same-mute-freq-for-all-seqs" $common
19
+ # echo $bin --label vs-n-sim-events --version v1 --n-replicates 3 --n-leaves-list 1 --n-sim-events-list 100:1000:10000:50000 --scratch-mute-freq-list 0.07:0.15 --simu-extra-args=\"--flat-mute-freq --same-mute-freq-for-all-seqs --constant-number-of-leaves\" --final-plot-xvar n-sim-events --pvks-to-plot 0.15 $common
20
+ # echo $bin --label time-reqd --version v0 --n-replicates 3 --n-leaves-list 10 --n-sim-events-list 100:1000:10000 --perf-metrics time-reqd --x-legend-var n-seqs $common --n-sub-procs 28 --n-max-procs 1 # NOTE duplicate parallelization args also NOTE there's also a 4th replicate, and 50000 samples for at least 3 replicates NOTE also will have to turn up fail % by hand for enclone
21
+ # echo $bin --label pairclean --version v0 --n-replicates 3 --n-leaves-list 3:10 --constant-number-of-leaves-list 0:1 --n-sim-events-list 3000 --scratch-mute-freq-list 0.15 --mean-cells-per-droplet-list None:1:2:5:10 --simu-extra-args=\"--flat-mute-freq --same-mute-freq-for-all-seqs\" --final-plot-xvar mean-cells-per-droplet --perf-metrics all-pcfrac:f1:precision:sensitivity --make-hist-plots --use-val-cfgs $common # --fraction-of-reads-to-remove-list 0.05
22
+ # echo $bin --label pairclean --version v1 --n-replicates 3 --n-leaves-list 1:2:3:10:hist --constant-number-of-leaves-list 1:1:1:1:0 --zip-vars n-leaves:constant-number-of-leaves --n-sim-events-list 3000 --scratch-mute-freq-list 0.15 --mean-cells-per-droplet-list None:1:2:5:10 --simu-extra-args=\"--flat-mute-freq --same-mute-freq-for-all-seqs\" --final-plot-xvar mean-cells-per-droplet --perf-metrics all-pcfrac:f1:precision:sensitivity --make-hist-plots --use-val-cfgs $common # --fraction-of-reads-to-remove-list 0.05
23
+ # echo $bin --label pairclean --version v2 --n-replicates 3 --n-leaves-list 1:2:3:10:hist --constant-number-of-leaves-list 1:1:1:1:0 --zip-vars n-leaves:constant-number-of-leaves --n-sim-events-list 3000 --scratch-mute-freq-list 0.07 --mean-cells-per-droplet-list 1:2:5:10 --simu-extra-args=\"--flat-mute-freq --same-mute-freq-for-all-seqs --constant-cells-per-droplet\" --final-plot-xvar mean-cells-per-droplet --perf-metrics all-pcfrac:f1:precision:sensitivity --make-hist-plots --use-val-cfgs $common # --fraction-of-reads-to-remove-list 0.05
24
+ # echo $bin --label pairfix --version v1 --n-replicates 3 --n-leaves-list hist --n-sim-events-list 3000 --scratch-mute-freq-list 0.07 --bulk-data-fraction-list 0:0.5:0.8:0.9:0.95 --simu-extra-args=\"--flat-mute-freq --same-mute-freq-for-all-seqs\" --inference-extra-args=\"--pair-unpaired-seqs-with-paired-family\" --final-plot-xvar bulk-data-fraction --perf-metrics all-pcfrac:f1:precision:sensitivity --make-hist-plots --use-val-cfgs --empty-bin-range 0:200 $common
25
+ echo $bin --label test-antn --version imbal-v3 --n-replicates 2 --tree-imbalance-list None:0.04:0.07 --scratch-mute-freq-list 0.15 --n-leaves-list 50 --simu-extra-args=\"--flat-mute-freq --same-mute-freq-for-all-seqs\" --n-sim-events-list 50 --antn-perf --perf-metrics naive-hdist $common # NOTE also made :0.13:0.14:0.16
26
+ exit 0
27
+ # echo $bin --label bcr-phylo-antn --version v0 --n-replicates 2 --obs-times-list 50:150:300 --n-sim-seqs-per-generation-list 15:45 --context-depend-list 1 --simu-type bcr-phylo --dont-observe-common-ancestors --antn-perf --perf-metrics naive-hdist $common
28
+ simu_extra="--simu-extra-args=\"--target-distance 10 --context-depend 1 --tdist-weights random-uniform --min-target-distance 2 --n-sim-seqs-per-generation 89 --parameter-variances n-sim-seqs-per-generation,23 --aa-paratope-positions N=60 --aa-struct-positions N=100 --leaf-sampling-scheme high-affinity --n-naive-seq-copies 100\""
29
+ echo $bin --label gct-valid --version v6 --n-replicates 3 --obs-times-list 15:20:30:40:50 --n-sim-events-list 70 --carry-cap-list 1000 --simu-type bcr-phylo --perf-metrics coar:rf:mrca --calc-antns --inference-extra-args=\"--no-indels --simultaneous-true-clonal-seqs\" --plot-metrics tree-perf --final-plot-xvar obs-times $simu_extra $common # NOTE also have sampling times 10, 100, 150 for most methods
30
+ # echo $bin --label gct-valid --version gcdyn-v1 --n-replicates 2 --simu-type gcdyn --n-sim-events-list 70 --obs-times-list 15:30 --perf-metrics coar:rf:mrca --calc-antns --inference-extra-args=\"--no-indels --simultaneous-true-clonal-seqs\" --plot-metrics tree-perf $common
31
+
32
+ # NOTE have to set --n-sub-procs to 1 for partition step, and re-set --n-sim-events-list for each --n-leaves value (500 leaves: 10 events, 100:50, 50:100):
33
+ # echo $bin --label key-trans --version v0 --n-replicates 2 --biggest-naive-seq-cluster-to-calculate 5:15:10000 --biggest-logprob-cluster-to-calculate 5:15:10000 --zip-vars biggest-naive-seq-cluster-to-calculate:biggest-logprob-cluster-to-calculate --scratch-mute-freq-list 0.15 --n-leaves-list 50:100:500 --simu-extra-args=\"--flat-mute-freq --same-mute-freq-for-all-seqs --constant-number-of-leaves --only-genes IGHV1-2*01:IGHD2-15*01:IGHJ6*02:IGKV1-12*01:IGKJ1*01:IGKDx-x*x --force-dont-generate-germline-set --allowed-cdr3-lengths 66:33\" --n-sim-events-list 10 --inference-extra-args=\"--debug 1 --sw-debug 0\" --perf-metrics time-reqd --final-plot-xvar biggest-logprob-cluster-to-calculate --bcrham-time $common
34
+ # echo $bin --label lcdr3 --version v0 --n-replicates 2 --n-sim-events-list 1000 --single-light-locus igk --base-outdir /fh/fast/matsen_e/dralph/partis/paired-loci --allowed-cdr3-lengths ighM9-22:ighM15-34:ighM36-43:ighM45-52:ighM54-61:ighM63-73 $common
35
+ # echo $bin --label lcdr3 --version v2 --n-replicates 2 --n-sim-events-list 5000 --single-light-locus igk --base-outdir /fh/fast/matsen_e/dralph/partis/paired-loci --allowed-cdr3-lengths ighM24-25:ighM27-28:ighM30-31:ighM48-49 --simu-extra-args=\"--n-leaves 1 --constant-number-of-leaves\" $common
36
+ # echo $bin --label lcdr3 --version one-j-v2 --n-replicates 2 --n-sim-events-list 5000 --single-light-locus igk --base-outdir /fh/fast/matsen_e/dralph/partis/paired-loci --allowed-cdr3-lengths ighM24-25:ighM27-28:ighM30-31:ighM48-49 --simu-extra-args=\"--only-genes=IGHJ4*01:IGKJ1*01 --n-genes-per-region ::1 --n-leaves 1 --constant-number-of-leaves\" $common # --inference-extra-args=\"--add-pairwise-clustering-metrics\"
37
+ # echo $bin --label lcdr3 --version all-together --n-replicates 2 --n-sim-events-list 5000 --single-light-locus igk --base-outdir /fh/fast/matsen_e/dralph/partis/paired-loci --allowed-cdr3-lengths ighM24-25,ighM27-28,ighM30-31 --simu-extra-args=\"--n-leaves 1 --constant-number-of-leaves\" --final-plot-xvar allowed-cdr3-lengths $common
38
+ # echo $bin --label test-data-in --version v0 --data-in-cfg datascripts/meta/spisak-simu/samples.yaml --dataset-in-list cdr_18_set_1-simu-v0:cdr_21_set_1-simu-v0:cdr_24_set_1-simu-v0:cdr_42_set_1-simu-v0 $common # --extra-args=\"--add-pairwise-clustering-metrics\"
39
+ exit 0
40
+
41
+ # ----------------------------------------------------------------------------------------
42
+ # data
43
+ dvsn=v1 #test-ctnt
44
+ simvsn=v0
45
+
46
+ # bin=./datascripts/run.py
47
+ # common="--study 10x-examples --version $dvsn --paired-loci --n-procs 20 --no-slurm" # --only-csv-plots"
48
+ # infxtra='--extra-args=\"--make-per-gene-plots\"'
49
+ # # $bin cache-parameters $common --no-simu $infxtra
50
+ # # $bin partition $common --no-simu $infxtra
51
+ # # $bin simulate $common --logstr $simvsn # --extra-args="--paired-correlation-values v_gene.d_gene,0.5:v_gene.j_gene,0.5"
52
+ # # $bin cache-parameters $common $infxtra
53
+ # # $bin partition $common $infxtra
54
+ # exit 0
55
+
56
+ bin=./bin/compare-plotdirs.py
57
+ fsddir=/fh/fast/matsen_e/processed-data/partis
58
+ bidir=$fsddir/10x-examples/$dvsn
59
+ bodir=$bidir/comparisons
60
+
61
+ tsubj=hs-1-postvax
62
+ tpd=$bidir/comparisons/$tsubj
63
+ $bin --outdir $bodir/ins-del-lens-all/$tsubj --plotdirs $bidir/$tsubj/igh+igk/plots/igh/overall:$bidir/$tsubj/igh+igk/plots/igk/overall:$bidir/$tsubj/igh+igl/plots/igl/overall --names igh:igk:igl --normalize & # --extra-stats mean
64
+ $bin --outdir $bodir/ins-del-lens-igh/$tsubj --plotdirs $bidir/$tsubj/igh+igk/plots/igh/overall --names igh --normalize --single-plotdir & # --extra-stats mean
65
+ exit 0
66
+
67
+ for subj in hs-1-postvax hs-1-prevax hs-2-pbmc mm-balbc; do #hs-1-prevax; do #mm-balbc
68
+ for ltmp in igh igk; do # igl
69
+ # ddir=$bidir/$subj/single-chain/plots/$ltmp/parameters/hmm
70
+ ddir=$bidir/$subj/igh+igk/plots/$ltmp
71
+ sdir=$bidir/$subj-simu-$simvsn/single-chain/plots/$ltmp/parameters/true
72
+ for subd in overall mute-freqs; do
73
+ # for subd in mute-freqs/per-gene-per-position/v mute-freqs/per-gene-per-position/d mute-freqs/per-gene-per-position/j; do
74
+ $bin --outdir $bodir/$subj/$subd-$ltmp --plotdirs $ddir/$subd:$sdir/$subd --names data:simu --normalize & # --extra-stats mean
75
+ done
76
+ # break
77
+ done
78
+ # for ltmp in igh igk igl; do
79
+ # # single-chain/plots/igh/partitions/sizes
80
+ # # plots/igh/partitions/sizes
81
+ # substr=plots/$ltmp/partitions/sizes
82
+ # ddir=$bidir/$subj #/single-chain/$substr
83
+ # sdir=$bidir/$subj-simu-$simvsn #/single-chain/$substr
84
+ # subd=cluster-sizes
85
+ # echo $bin --outdir $bodir/$subj/$subd-$ltmp --plotdirs $sdir/single-chain/$substr:$ddir/single-chain/$substr:$sdir/$substr:$ddir/$substr --names simu@single:data@single:simu@joint:data@joint \
86
+ # --add-to-title $ltmp --log xy --colors '#006600:#2b65ec:#990012:black' --linewidths 5:2:5:2 &
87
+ # done
88
+ done
89
+ exit 0
90
+
91
+ for subjdir in $bidir/hs-1-postvax $fsddir/goo-dengue-10x/v12/d-14; do
92
+ pstr=paired-seqs-per-seq
93
+ cfd=$bodir/`basename $subjdir`/$pstr
94
+ # for tstr in before after; do
95
+ # mkdir -p $cfd/$tstr
96
+ # cp -v $subjdir/plots/$pstr-$tstr.csv $cfd/$tstr/$pstr.csv
97
+ # done
98
+ $bin --outdir $cfd/cf --plotdirs $cfd/after:$cfd/before --names after:before --normalize --colors='#006600:#990012' --xbounds=-0.5:9.5 --ybounds=0:0.9 --xticks 0:1:2:3:4:5:6:7:8:9 --no-errors --square-bins --ytitle="fraction of seqs" & # --add-to-title `basename $subjdir`
99
+ done
100
+
@@ -0,0 +1,88 @@
1
+ #!/bin/bash
2
+
3
+ bin=./test/cf-tree-metrics.py
4
+
5
+ label=v0
6
+ testlabel=test-old
7
+ testargs="--n-sim-seqs-per-gen-list 50:125 --lb-tau-list 0.002:0.003 --obs-times 100 --carry-cap 1000 --n-generations-list 4:5"
8
+
9
+ # # $bin --actions get-lb-bounds --label $label #--make-plots
10
+ # # $bin --actions get-lb-bounds --label $testlabel $testargs --make-plots
11
+ # # echo $bin --actions get-lb-bounds --seq-len 133 --label aa-lb-bounds-v0 --make-plots
12
+ # # echo $bin --actions get-lb-bounds --seq-len 133 --label $testlabel $testargs --make-plots
13
+ # for slen in 1200; do #300 400 500 600 700 900; do # redo normalization for different seq lengths
14
+ # # 1200 doesn't work, some %.xf print things i think need to be changed
15
+ # $bin --actions get-lb-bounds --seq-len $slen --label generalize-lb-bounds-$slen --lb-tau-list auto --make-plots >$slen.log #--make-plots
16
+ # done
17
+ # exit 0
18
+
19
+ # echo $bin --label $testlabel $testargs --only-csv-plots
20
+ # echo $bin --label $label --n-replicates 3 --only-csv-plots
21
+
22
+ dtv=3; nest=100; depth=10 # dtv=3; nest=30; depth=10 # dtv=2; nest=100; depth=5
23
+ # mm=dtr; dtr_args="--dtr-path /fh/fast/matsen_e/dralph/partis/tree-metrics/dtr-train-v$dtv/seed-0/dtr/train_n-estimators_${nest}_max-depth_${depth}-dtr-models --extra-plotstr v$dtv-$nest-$depth"
24
+ # dtr_args="--actions plot --plot-metrics dtr --plot-metric-extra-strs v3-100-10" # :dtr:dtr # :v2-100-5:v3-30-10
25
+
26
+ # for mm in lbr aa-lbr; do #aa-lbi aa-lbr shm delta-lbi cons-dist-aa cons-dist-nuc; do
27
+ common="--only-csv-plots --n-max-procs 25 $dtr_args"
28
+ # common="--actions get-tree-metrics --only-csv-plots --n-max-procs 25 --metric-method $mm $dtr_args" # # --no-tree-plots --slurm
29
+ # common="--actions plot --plot-metrics lbr:aa-lbr --distr-hist-limit N:3" # cons-dist-nuc:cons-dist-aa"
30
+ # common="--actions plot --plot-metrics shm:delta-lbi:lbi:lbr:cons-dist-aa:cons-dist-nuc:aa-lbi:aa-lbr" # --plot-metric-extra-strs ::::"
31
+ # common="--actions combine-plots --plot-metrics shm:delta-lbi:lbi:lbr:cons-dist-aa:cons-dist-nuc:aa-lbi:aa-lbr:dtr --combo-extra-str with-dtr-and-aa-lb --plot-metric-extra-strs ::::::::v3-100-10 --dont-plot-extra-str"
32
+ # common="--actions combine-plots --plot-metrics shm:delta-lbi:lbi:lbr:cons-dist-aa:cons-dist-nuc:aa-lbi:aa-lbr --combo-extra-str with-aa-lb" #-pvk-$pvk --pvks-to-plot $pvk" # :dtr --plot-metric-extra-strs :::::v3-100-10 --dont-plot-extra-str
33
+ common="$common --base-outdir /fh/local/dralph/partis/tree-metrics"
34
+ # ----------------------------------------------------------------------------------------
35
+ # $bin --label vary-metric-v2 --n-replicates 30 --n-sim-events-per-proc 10 --carry-cap-list 500 --obs-times-list 50:100:500 --n-sim-seqs-per-gen-list 100 --metric-for-target-distance-list aa:aa-sim-blosum --lb-tau-list 0.0025 --final-plot-xvar obs-times --include-relative-affy-plots $common # --pvks-to-plot aa-sim-blosum
36
+ # echo $bin --label tau-vs-obs-frac-v1 --n-replicates 30 --n-sim-events-per-proc 10 --carry-cap-list 1000 --obs-times-list 150 --n-sim-seqs-per-gen-list 30:50:100:200 --legend-var obs_frac $common # rerun of "v2" (at top), but redoing things since plots look a bit weird on v2 when I remake them
37
+ echo $bin --label vary-selection-strength-v1 --n-replicates 30 --n-sim-events-per-proc 10 --carry-cap-list 1000 --obs-times-list 150 --n-sim-seqs-per-gen-list 100 --selection-strength-list 0.1:0.4:0.7:0.8:0.9:1.0 --final-plot-xvar selection-strength $common # all the other tau values are also there, but I decided I wanted selection strength on the x axis NOTE the other tau values will have lbr-tau-factor 1 and non-normalized lbi, so you *really* need to not mix them with 0.0025 ## --lb-tau-list 0.0025
38
+ # $bin --label carry-cap-vs-n-obs-v1 --n-replicates 30 --n-sim-events-per-proc 10 --carry-cap-list 500:1000:3000 --obs-times-list 150 --n-sim-seqs-per-gen-list 30:75:150:500 --lb-tau-list 0.0025 --final-plot-xvar carry-cap $common --pvks-to-plot 30 # full carry cap: 250:500:1000:3000 and n/gen: 15:30:75:150:500 lists (not plotting them all)
39
+ # $bin --label vary-obs-times-v2 --n-replicates 30 --n-sim-events-per-proc 10 --carry-cap-list 350:1000:2000 --obs-times-list 50:100:250:500:1000:3000 --n-sim-seqs-per-gen-list 100 --lb-tau-list 0.0025 --final-plot-xvar obs-times $common --pvks-to-plot 350 # full carry-cap-list 250:350:500:1000:2000
40
+ # $bin --label vary-obs-times-v3 --n-replicates 30 --n-sim-events-per-proc 10 --carry-cap-list 350:1000:2000 --obs-times-list 50,100,150,200,250:100,200,300,400,500:200,400,600,800,1000:600,1200,1800,2400,3000 --n-sim-seqs-per-gen-list 20 --lb-tau-list 0.0025 --final-plot-xvar obs-times --include-relative-affy-plots $common --pvks-to-plot 1000 # full carry-cap-list 250:350:500:1000:2000
41
+ # $bin --label vary-obs-times-v4 --n-replicates 30 --n-sim-events-per-proc 10 --min-target-distance-list 2 --carry-cap-list 350:1000:2000 --obs-times-list 50,100,150,200,250:100,200,300,400,500:200,400,600,800,1000:600,1200,1800,2400,3000 --n-sim-seqs-per-gen-list 20 --lb-tau-list 0.0025 --final-plot-xvar obs-times --include-relative-affy-plots $common --pvks-to-plot 1000
42
+ # $bin --label vary-obs-times-v5 --n-replicates 30 --n-sim-events-per-proc 10 --min-target-distance-list 2 --carry-cap-list 350:1000:2000 --obs-times-list 50:100:250:500:1000:3000 --n-sim-seqs-per-gen-list 100 --lb-tau-list 0.0025 --final-plot-xvar obs-times $common --pvks-to-plot 350
43
+ # $bin --label vary-sampling-scheme-v1 --n-replicates 30 --n-sim-events-per-proc 10 --carry-cap-list 1000 --obs-times-list 150 --n-sim-seqs-per-gen-list 30:50:100:200 --leaf-sampling-scheme-list uniform-random:affinity-biased:high-affinity --lb-tau-list 0.0025 --final-plot-xvar n-sim-seqs-per-gen $common --pvks-to-plot high-affinity
44
+ # $bin --label vary-n-targets-v0 --n-replicates 30 --n-sim-events-per-proc 10 --carry-cap-list 1000 --obs-times-list 50:100:250:500 --n-sim-seqs-per-gen-list 100 --target-count-list 1:2:4 --lb-tau-list 0.0025 --final-plot-xvar obs-times $common # --pvks-to-plot 4
45
+ # $bin --label vary-n-targets-v1 --n-replicates 30 --n-sim-events-per-proc 10 --carry-cap-list 1000 --obs-times-list 50:100:250:500 --n-sim-seqs-per-gen-list 100 --target-count-list 4:8:16 --n-target-clusters-list 1:2:4 --zip-vars target-count:n-target-clusters --lb-tau-list 0.0025 --final-plot-xvar obs-times $common --pvks-to-plot="4; 16"
46
+ # $bin --label vary-context-dependence-v1 --n-replicates 5 --n-sim-events-per-proc 50 --carry-cap-list 350 --obs-times-list 100:350 --n-sim-seqs-per-gen-list 100 --context-depend-list 0:1 --lb-tau-list 0.0025 $common --final-plot-xvar obs-times --n-sub-procs 10 # --pvks-to-plot 0 (and 1), do 'em separately
47
+ # $bin --label among-family-variation-v0 --n-replicates 30 --n-sim-events-per-proc 10 --carry-cap-list 1000 --obs-times-list 250 --n-sim-seqs-per-gen-list 0 --lb-tau-list 0.0025 --parameter-variances-list n-sim-seqs-per-generation,15..15:n-sim-seqs-per-generation,50..50:n-sim-seqs-per-generation,250..250:n-sim-seqs-per-generation,25..50..75:n-sim-seqs-per-generation,15..50..150..500 --final-plot-xvar parameter-variances $common
48
+ # $bin --label among-family-variation-v1 --n-replicates 15 --n-sim-events-per-proc 10 --carry-cap-list 1000 --obs-times-list 250 --n-sim-seqs-per-gen-list 150 --lb-tau-list 0.0025 --parameter-variances-list selection-strength,0.25..0.25:selection-strength,0.75..0.75:selection-strength,1.0..1.0:selection-strength,0.5..0.75..1.0:selection-strength,0.25..0.67..1.0:selection-strength,0.1..0.25..0.75..1.0 --final-plot-xvar parameter-variances $common
49
+ # $bin --label among-family-variation-v2 --n-replicates 15 --n-sim-events-per-proc 10 --carry-cap-list 1000 --obs-times-list=-1 --n-sim-seqs-per-gen-list 150 --lb-tau-list 0.0025 --parameter-variances-list obs-times,50..50:obs-times,150..150:obs-times,500..500:obs-times,50..100..200:obs-times,50..150..500..1000 --final-plot-xvar parameter-variances $common
50
+ # $bin --label among-family-variation-v4 --n-replicates 15 --n-sim-events-per-proc 10 --carry-cap-list 1000 --obs-times-list=-1 --n-sim-seqs-per-gen-list=-1 --lb-tau-list 0.0025 --parameter-variances-list n-sim-seqs-per-generation,150..150_c_selection-strength,0.75..0.75_c_obs-times,250..250:n-sim-seqs-per-generation,50..150..300_c_selection-strength,0.5..0.75..1.0_c_obs-times,100..250..500:n-sim-seqs-per-generation,25..150..500_c_selection-strength,0.25..0.75..1.0_c_obs-times,50..250..1000 --final-plot-xvar parameter-variances $common
51
+ # echo $bin --label true-vs-inferred-v0 --n-replicates 2 --n-sim-events-per-proc 30 --carry-cap-list 500 --obs-times-list 150:1500 --n-sim-seqs-per-gen-list 100 --lb-tau-list 0.0025 $common # NOTE to get the true vs inferred plots you have to turn off --only-csv-plots for get-tree-metrics (and probably turn on --no-tree-plots) and uncomment two fcn calls in treeutils.plot_tree_metrics(): lbplotting.plot_true_vs_inferred_lb() and lbplotting.plot_cons_seq_accuracy()
52
+ # echo $bin --label true-vs-inferred-v1 --n-replicates 2 --n-sim-events-per-proc 50 --carry-cap-list 500:2000 --obs-times-list 150:1500 --n-sim-seqs-per-gen-list 100 --lb-tau-list 0.0025 $common
53
+ # echo $bin --label true-vs-inferred-v2 --n-replicates 2 --n-sim-events-per-proc 50 --carry-cap-list 500:2000 --obs-times-list 150:1500 --n-sim-seqs-per-gen-list 100 --min-target-distance-list 0:2:5 --lb-tau-list 0.0025 $common
54
+ # $bin --label fix-lbr-tau-sample-time-v0 --n-replicates 10 --n-sim-events-per-proc 50 --carry-cap-list 1000 --obs-times-list 100:350:50,75,100,150:100,150,300,350 --n-sim-seqs-per-gen-list 100:100:25:25 --zip-vars obs-times:n-sim-seqs-per-gen $common
55
+ # $bin --label fix-lbr-tau-fraction-sampled-v0 --n-replicates 10 --n-sim-events-per-proc 50 --carry-cap-list 1000 --obs-times-list 50,100,150 --n-sim-seqs-per-gen-list 10:25:50:100:300 $common
56
+ # echo $bin --label fix-lbr-tau-obs-time-v0 --n-replicates 5 --n-sim-events-per-proc 30 --carry-cap-list 1000 --obs-times-list 100:250:500 --n-sim-seqs-per-gen-list 100:200 $common
57
+ # echo $bin --label fix-lbr-tau-fraction-sampled-v1 --n-replicates 10 --n-sim-events-per-proc 10 --carry-cap-list 1000 --obs-times-list 100,150,200,250,300 --n-sim-seqs-per-gen-list 10:50 $common
58
+ # --final-plot-xvar n-sim-seqs-per-gen --pvks-to-plot 0.002
59
+
60
+ # some of these may have had different n max queries for different steps (but n max queries was only set for the steps where its value isn't very important, i.e. it was never set for training)
61
+ # common="--actions bcr-phylo --bcr-phylo-actions cache-parameters:partition --only-csv-plots" # --base-outdir /fh/local/dralph/partis/tree-metrics" # --sub-slurm" # /loc/scratch/dralph/partis/tree-metrics
62
+ common="$common --n-max-queries 150000" # $common --iseed 1" for the dtr training samples we usually only want to test on 150k or so queries (and also sometimes need to restrict to certain seeds to avoid training samples)
63
+ # echo $bin --label dtr-train-v0 --n-replicates 5 --n-sim-events-per-proc 1000 --carry-cap-list 1500 --obs-times-list 150 --n-sim-seqs-per-gen-list 150 --selection-strength 0.75 --lb-tau-list 0.0025 --no-scan-parameter-variances carry-cap,2000:obs-times,150:n-sim-seqs-per-generation,200:selection-strength,0.5 $common &
64
+ # echo $bin --label dtr-train-v1 --n-replicates 5 --n-sub-procs 30 --n-sim-events-per-proc 50000 --carry-cap-list 1500 --obs-times-list 150 --n-sim-seqs-per-gen-list 30 --selection-strength 0.75 --lb-tau-list 0.0025 --no-scan-parameter-variances carry-cap,2000:obs-times,150:n-sim-seqs-per-generation,15:selection-strength,0.5 $common &
65
+ # echo $bin --label dtr-train-v2 --n-replicates 2 --n-sub-procs 15 --n-sim-events-per-proc 300000 --carry-cap-list 1500 --obs-times-list 150 --n-sim-seqs-per-gen-list 20 --selection-strength 0.75 --lb-tau-list 0.0025 --no-scan-parameter-variances carry-cap,2000:obs-times,150:n-sim-seqs-per-generation,15:selection-strength,0.5 $common &
66
+ # echo $bin --label dtr-train-v3 --n-replicates 2 --n-sub-procs 25 --n-sim-events-per-proc 50000 --carry-cap-list=-1 --obs-times-list=-1 --n-sim-seqs-per-gen-list=-1 --selection-strength=-1. --lb-tau-list 0.0025 --no-scan-parameter-variances carry-cap,250..500..900..1000..1100..1500..5000:obs-times,75..100..150..200..1000:n-sim-seqs-per-generation,15..30..75..150..500:selection-strength,0.5..0.9..0.95..1.0 $common # NOTE made a second replicate (iseed 1) with only 1000 events, just for testing
67
+ # done
68
+
69
+
70
+ # ----------------------------------------------------------------------------------------
71
+ # old ones:
72
+ # echo $bin --label vary-carry-cap-v0 --n-replicates 10 --n-sim-events-per-proc 10 --carry-cap-list 500:750:1000:2000:5000 --obs-times-list 100,200 --n-sim-seqs-per-gen-list 75 --include-relative-affy-plots $common
73
+ # echo $bin --label vary-obs-times-v0 --n-replicates 10 --n-sim-events-per-proc 10 --carry-cap-list 1000 --obs-times-list 100:200:300:100,150:200,250:100,200,300 --n-sim-seqs-per-gen-list 100:100:100:50:50:33 --zip-vars obs-times:n-sim-seqs-per-gen --include-relative-affy-plots $common
74
+ # echo $bin --label vary-obs-times-v1 --n-replicates 10 --n-sim-events-per-proc 10 --carry-cap-list 1000 --obs-times-list 300:100,200,300:200,250,300 --n-sim-seqs-per-gen-list 100:33:33 --zip-vars obs-times:n-sim-seqs-per-gen --include-relative-affy-plots $common
75
+ # echo $bin --label vary-obs-frac-v0 --n-replicates 30 --n-sim-events-per-proc 10 --carry-cap-list 1000 --obs-times-list 150 --n-sim-seqs-per-gen-list 30:50:75:100:150:200 $common
76
+ # echo $bin --label vary-metric-v0 --n-replicates 30 --n-sim-events-per-proc 10 --carry-cap-list 1000 --obs-times-list 150 --n-sim-seqs-per-gen-list 100 --metric-for-target-distance-list aa:aa-sim-blosum --include-relative-affy-plots $common
77
+ # echo $bin --label vary-metric-v1 --n-replicates 30 --n-sim-events-per-proc 10 --carry-cap-list 1000 --obs-times-list 500 --n-sim-seqs-per-gen-list 100 --metric-for-target-distance-list aa:aa-sim-blosum --include-relative-affy-plots $common --pvks-to-plot aa-sim-blosum # seeing if different parameters will change the fact that lbi does better than cons-dist-aa (as in vary-metric-v0)
78
+ # echo $bin --label vary-selection-strength-v0 --n-replicates 30 --n-sim-events-per-proc 10 --carry-cap-list 1000 --obs-times-list 150 --n-sim-seqs-per-gen-list 100 --selection-strength-list 0.1:0.4:0.7:0.8:0.9:1.0 $common
79
+ # echo $bin --label carry-cap-vs-n-obs-v0 --n-replicates 30 --n-sim-events-per-proc 10 --carry-cap-list 260:260:500:500:700:700:1500:1500:3000:3000 --obs-times-list 150 --n-sim-seqs-per-gen-list 13:26:25:50:35:70:75:150:150:300 --lb-tau-list 0.0025 --zip-vars carry-cap:n-sim-seqs-per-gen --final-plot-xvar carry-cap --legend-var obs_frac $common
80
+ # echo $bin --label carry-cap-vs-n-obs-only-leaves-v0 --n-replicates 30 --n-sim-events-per-proc 10 --carry-cap-list 250:500:1000:3000 --obs-times-list 150 --n-sim-seqs-per-gen-list 15:75:500 --lb-tau-list 0.0025 --dont-observe-common-ancestors --final-plot-xvar carry-cap $common
81
+ # echo $bin --label choose-among-families-v1 --n-replicates 30 --n-sim-events-per-proc 30 --carry-cap-list 1500 --obs-times-list 150 --n-sim-seqs-per-gen-list 150 --selection-strength 0.75 --lb-tau-list 0.0025 --dont-observe-common-ancestors --no-scan-parameter-variances carry-cap,2000:obs-times,150:n-sim-seqs-per-generation,200:selection-strength,0.5 $common
82
+ # echo $bin --label choose-among-families-v2 --n-replicates 10 --n-sim-events-per-proc 30 --carry-cap-list 1500 --obs-times-list 150 --n-sim-seqs-per-gen-list 150 --selection-strength 0.75 --lb-tau-list 0.0025 --dont-observe-common-ancestors --no-scan-parameter-variances selection-strength,0.5 $common
83
+ # echo $bin --label choose-among-families-v3 --n-replicates 10 --n-sim-events-per-proc 30 --carry-cap-list 1500 --obs-times-list 150 --n-sim-seqs-per-gen-list 150 --lb-tau-list 0.0025 --dont-observe-common-ancestors $common
84
+ # echo $bin --label choose-among-families-v4 --n-replicates 10 --n-sim-events-per-proc 150 --carry-cap-list 1500 --obs-times-list 150 --n-sim-seqs-per-gen-list 150 --lb-tau-list 0.0025 --dont-observe-common-ancestors $common
85
+ # echo $bin --label choose-among-families-v5 --n-replicates 10 --n-sim-events-per-proc 150 --carry-cap-list 1500 --obs-times-list 150 --n-sim-seqs-per-gen-list 150 --lb-tau-list 0.0025 $common
86
+ # echo $bin --label vary-sampling-scheme-v0 --n-replicates 30 --n-sim-events-per-proc 10 --carry-cap-list 1000 --obs-times-list 150 --n-sim-seqs-per-gen-list 100 --leaf-sampling-scheme-list uniform-random:affinity-biased:high-affinity --lb-tau-list 0.0025 --include-relative-affy-plots $common
87
+ # echo $bin --label vary-context-dependence-v0 --n-replicates 5 --n-sim-events-per-proc 10 --carry-cap-list 350 --obs-times-list 100 --n-sim-seqs-per-gen-list 30 --context-depend-list 0:1 --lb-tau-list 0.0025 $common --n-sub-procs 10
88
+ # $bin --label among-family-variation-v3 --n-replicates 15 --n-sim-events-per-proc 10 --carry-cap-list 1000 --obs-times-list=-1 --n-sim-seqs-per-gen-list 150 --lb-tau-list 0.0025 --parameter-variances-list obs-times,50..50:obs-times,150..150:obs-times,500..500:obs-times,100..150..250:obs-times,50..150..250..500 --final-plot-xvar parameter-variances $common
bin/smetric-run.py ADDED
@@ -0,0 +1,62 @@
1
+ #!/usr/bin/env python3
2
+ from __future__ import absolute_import, division, unicode_literals
3
+ from __future__ import print_function
4
+ import sys
5
+ import colored_traceback.always
6
+ import os
7
+ import yaml
8
+ import argparse
9
+ import numpy
10
+
11
+ sys.path.insert(1, '.') #'./python')
12
+ import python.utils as utils
13
+ import python.treeutils as treeutils
14
+
15
+ # NOTE this only really works on simulation, although it maybe wouldn't take much work to get it working on data
16
+
17
+ parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter)
18
+ parser.add_argument('--infname', required=True)
19
+ parser.add_argument('--base-plotdir', required=True)
20
+ parser.add_argument('--lb-tau', type=float)
21
+ parser.add_argument('--dont-normalize-lbi', action='store_true')
22
+ parser.add_argument('--action', choices=['train', 'test'])
23
+ parser.add_argument('--dtr-path')
24
+ parser.add_argument('--metric-method', default='aa-lbi')
25
+ parser.add_argument('--dtr-cfg')
26
+ parser.add_argument('--only-csv-plots', action='store_true')
27
+ parser.add_argument('--n-max-queries', type=int, default=-1)
28
+ parser.add_argument('--max-family-size', type=int, help='subset each family down to this size before passing to treeutils')
29
+ parser.add_argument('--cluster-indices')
30
+ parser.add_argument('--min-selection-metric-cluster-size', type=int, default=treeutils.default_min_selection_metric_cluster_size)
31
+ parser.add_argument('--include-relative-affy-plots', action='store_true')
32
+ parser.add_argument('--make-tree-plots', action='store_true')
33
+ parser.add_argument('--only-look-upwards', action='store_true')
34
+ parser.add_argument('--label-tree-nodes', action='store_true')
35
+ parser.add_argument('--label-root-node', action='store_true')
36
+ parser.add_argument('--selection-metric-plot-cfg', default= ':'.join(treeutils.default_plot_cfg))
37
+ args = parser.parse_args()
38
+ # NOTE extra required args are set in treeutils plot_tree_metrics()
39
+ args.cluster_indices = utils.get_arg_list(args.cluster_indices, intify_with_ranges=True)
40
+ args.selection_metric_plot_cfg = utils.get_arg_list(args.selection_metric_plot_cfg, choices=treeutils.all_plot_cfg)
41
+ workdir = None, None
42
+ if args.make_tree_plots or 'tree' in args.selection_metric_plot_cfg:
43
+ workdir = utils.choose_random_subdir('/tmp/%s/tree-metrics' % os.getenv('USER'))
44
+
45
+ if args.n_max_queries != -1:
46
+ print(' --n-max-queries set to %d' % args.n_max_queries)
47
+ glfo, true_lines, _ = utils.read_output(args.infname, n_max_queries=args.n_max_queries)
48
+
49
+ # numpy.random.seed(1)
50
+ if args.max_family_size is not None:
51
+ for line in [l for l in true_lines if len(l['unique_ids']) > args.max_family_size]:
52
+ iseqs_to_keep = numpy.random.choice(range(len(line['unique_ids'])), size=args.max_family_size, replace=False)
53
+ utils.restrict_to_iseqs(line, iseqs_to_keep, glfo)
54
+
55
+ if args.metric_method == 'dtr':
56
+ raise Exception('I think the [new] first arg here (metrics_to_calc) isn\'t right, but don\'t want to test cause i don\'t care about dtr')
57
+ treeutils.add_smetrics(args, ['lbi', 'lbr', 'dtr'], None, args.lb_tau, base_plotdir=args.base_plotdir,
58
+ train_dtr=args.action=='train', dtr_cfg=args.dtr_cfg, true_lines_to_use=true_lines, workdir=workdir) # NOTE if you need this in the future you may want to add tree_inference_method 'gctree' and tree_inference_outdir
59
+ else:
60
+ treeutils.calculate_individual_tree_metrics(args.metric_method, true_lines, base_plotdir=args.base_plotdir, lb_tau=args.lb_tau, only_csv=args.only_csv_plots,
61
+ min_cluster_size=args.min_selection_metric_cluster_size, include_relative_affy_plots=args.include_relative_affy_plots,
62
+ dont_normalize_lbi=args.dont_normalize_lbi, workdir=workdir, cluster_indices=args.cluster_indices, only_look_upwards=args.only_look_upwards, args=args) #, debug=True)