pg-sui 0.2.3__py3-none-any.whl → 1.6.16a3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. pg_sui-1.6.16a3.dist-info/METADATA +292 -0
  2. pg_sui-1.6.16a3.dist-info/RECORD +81 -0
  3. {pg_sui-0.2.3.dist-info → pg_sui-1.6.16a3.dist-info}/WHEEL +1 -1
  4. pg_sui-1.6.16a3.dist-info/entry_points.txt +4 -0
  5. {pg_sui-0.2.3.dist-info → pg_sui-1.6.16a3.dist-info/licenses}/LICENSE +0 -0
  6. pg_sui-1.6.16a3.dist-info/top_level.txt +1 -0
  7. pgsui/__init__.py +35 -54
  8. pgsui/_version.py +34 -0
  9. pgsui/cli.py +922 -0
  10. pgsui/data_processing/__init__.py +0 -0
  11. pgsui/data_processing/config.py +565 -0
  12. pgsui/data_processing/containers.py +1436 -0
  13. pgsui/data_processing/transformers.py +557 -907
  14. pgsui/{example_data/trees → electron/app}/__init__.py +0 -0
  15. pgsui/electron/app/__main__.py +5 -0
  16. pgsui/electron/app/extra-resources/.gitkeep +1 -0
  17. pgsui/electron/app/icons/icons/1024x1024.png +0 -0
  18. pgsui/electron/app/icons/icons/128x128.png +0 -0
  19. pgsui/electron/app/icons/icons/16x16.png +0 -0
  20. pgsui/electron/app/icons/icons/24x24.png +0 -0
  21. pgsui/electron/app/icons/icons/256x256.png +0 -0
  22. pgsui/electron/app/icons/icons/32x32.png +0 -0
  23. pgsui/electron/app/icons/icons/48x48.png +0 -0
  24. pgsui/electron/app/icons/icons/512x512.png +0 -0
  25. pgsui/electron/app/icons/icons/64x64.png +0 -0
  26. pgsui/electron/app/icons/icons/icon.icns +0 -0
  27. pgsui/electron/app/icons/icons/icon.ico +0 -0
  28. pgsui/electron/app/main.js +227 -0
  29. pgsui/electron/app/package-lock.json +6894 -0
  30. pgsui/electron/app/package.json +51 -0
  31. pgsui/electron/app/preload.js +15 -0
  32. pgsui/electron/app/server.py +157 -0
  33. pgsui/electron/app/ui/logo.png +0 -0
  34. pgsui/electron/app/ui/renderer.js +131 -0
  35. pgsui/electron/app/ui/styles.css +59 -0
  36. pgsui/electron/app/ui/ui_shim.js +72 -0
  37. pgsui/electron/bootstrap.py +43 -0
  38. pgsui/electron/launch.py +57 -0
  39. pgsui/electron/package.json +14 -0
  40. pgsui/example_data/__init__.py +0 -0
  41. pgsui/example_data/phylip_files/__init__.py +0 -0
  42. pgsui/example_data/phylip_files/test.phy +0 -0
  43. pgsui/example_data/popmaps/__init__.py +0 -0
  44. pgsui/example_data/popmaps/{test.popmap → phylogen_nomx.popmap} +185 -99
  45. pgsui/example_data/structure_files/__init__.py +0 -0
  46. pgsui/example_data/structure_files/test.pops.2row.allsites.str +0 -0
  47. pgsui/example_data/vcf_files/phylogen_subset14K.vcf.gz +0 -0
  48. pgsui/example_data/vcf_files/phylogen_subset14K.vcf.gz.tbi +0 -0
  49. pgsui/impute/__init__.py +0 -0
  50. pgsui/impute/deterministic/imputers/allele_freq.py +725 -0
  51. pgsui/impute/deterministic/imputers/mode.py +844 -0
  52. pgsui/impute/deterministic/imputers/nmf.py +221 -0
  53. pgsui/impute/deterministic/imputers/phylo.py +973 -0
  54. pgsui/impute/deterministic/imputers/ref_allele.py +669 -0
  55. pgsui/impute/supervised/__init__.py +0 -0
  56. pgsui/impute/supervised/base.py +343 -0
  57. pgsui/impute/{unsupervised/models/in_development → supervised/imputers}/__init__.py +0 -0
  58. pgsui/impute/supervised/imputers/hist_gradient_boosting.py +317 -0
  59. pgsui/impute/supervised/imputers/random_forest.py +291 -0
  60. pgsui/impute/unsupervised/__init__.py +0 -0
  61. pgsui/impute/unsupervised/base.py +1121 -0
  62. pgsui/impute/unsupervised/callbacks.py +92 -262
  63. {simulation → pgsui/impute/unsupervised/imputers}/__init__.py +0 -0
  64. pgsui/impute/unsupervised/imputers/autoencoder.py +1361 -0
  65. pgsui/impute/unsupervised/imputers/nlpca.py +1666 -0
  66. pgsui/impute/unsupervised/imputers/ubp.py +1660 -0
  67. pgsui/impute/unsupervised/imputers/vae.py +1316 -0
  68. pgsui/impute/unsupervised/loss_functions.py +261 -0
  69. pgsui/impute/unsupervised/models/__init__.py +0 -0
  70. pgsui/impute/unsupervised/models/autoencoder_model.py +215 -567
  71. pgsui/impute/unsupervised/models/nlpca_model.py +155 -394
  72. pgsui/impute/unsupervised/models/ubp_model.py +180 -1106
  73. pgsui/impute/unsupervised/models/vae_model.py +269 -630
  74. pgsui/impute/unsupervised/nn_scorers.py +255 -0
  75. pgsui/utils/__init__.py +0 -0
  76. pgsui/utils/classification_viz.py +608 -0
  77. pgsui/utils/logging_utils.py +22 -0
  78. pgsui/utils/misc.py +35 -480
  79. pgsui/utils/plotting.py +996 -829
  80. pgsui/utils/pretty_metrics.py +290 -0
  81. pgsui/utils/scorers.py +213 -666
  82. pg_sui-0.2.3.dist-info/METADATA +0 -322
  83. pg_sui-0.2.3.dist-info/RECORD +0 -75
  84. pg_sui-0.2.3.dist-info/top_level.txt +0 -3
  85. pgsui/example_data/phylip_files/test_n10.phy +0 -118
  86. pgsui/example_data/phylip_files/test_n100.phy +0 -118
  87. pgsui/example_data/phylip_files/test_n2.phy +0 -118
  88. pgsui/example_data/phylip_files/test_n500.phy +0 -118
  89. pgsui/example_data/structure_files/test.nopops.1row.10sites.str +0 -117
  90. pgsui/example_data/structure_files/test.nopops.2row.100sites.str +0 -234
  91. pgsui/example_data/structure_files/test.nopops.2row.10sites.str +0 -234
  92. pgsui/example_data/structure_files/test.nopops.2row.30sites.str +0 -234
  93. pgsui/example_data/structure_files/test.nopops.2row.allsites.str +0 -234
  94. pgsui/example_data/structure_files/test.pops.1row.10sites.str +0 -117
  95. pgsui/example_data/structure_files/test.pops.2row.10sites.str +0 -234
  96. pgsui/example_data/trees/test.iqtree +0 -376
  97. pgsui/example_data/trees/test.qmat +0 -5
  98. pgsui/example_data/trees/test.rate +0 -2033
  99. pgsui/example_data/trees/test.tre +0 -1
  100. pgsui/example_data/trees/test_n10.rate +0 -19
  101. pgsui/example_data/trees/test_n100.rate +0 -109
  102. pgsui/example_data/trees/test_n500.rate +0 -509
  103. pgsui/example_data/trees/test_siterates.txt +0 -2024
  104. pgsui/example_data/trees/test_siterates_n10.txt +0 -10
  105. pgsui/example_data/trees/test_siterates_n100.txt +0 -100
  106. pgsui/example_data/trees/test_siterates_n500.txt +0 -500
  107. pgsui/example_data/vcf_files/test.vcf +0 -244
  108. pgsui/example_data/vcf_files/test.vcf.gz +0 -0
  109. pgsui/example_data/vcf_files/test.vcf.gz.tbi +0 -0
  110. pgsui/impute/estimators.py +0 -1268
  111. pgsui/impute/impute.py +0 -1463
  112. pgsui/impute/simple_imputers.py +0 -1431
  113. pgsui/impute/supervised/iterative_imputer_fixedparams.py +0 -782
  114. pgsui/impute/supervised/iterative_imputer_gridsearch.py +0 -1024
  115. pgsui/impute/unsupervised/keras_classifiers.py +0 -697
  116. pgsui/impute/unsupervised/models/in_development/cnn_model.py +0 -486
  117. pgsui/impute/unsupervised/neural_network_imputers.py +0 -1440
  118. pgsui/impute/unsupervised/neural_network_methods.py +0 -1395
  119. pgsui/pg_sui.py +0 -261
  120. pgsui/utils/sequence_tools.py +0 -407
  121. simulation/sim_benchmarks.py +0 -333
  122. simulation/sim_treeparams.py +0 -475
  123. test/__init__.py +0 -0
  124. test/pg_sui_simtest.py +0 -215
  125. test/pg_sui_testing.py +0 -523
  126. test/test.py +0 -151
  127. test/test_pgsui.py +0 -374
  128. test/test_tkc.py +0 -185
@@ -1,333 +0,0 @@
1
- #!/usr/bin/env python
2
- import sys
3
- import os
4
- import subprocess
5
- import errno
6
-
7
- import toytree
8
- import toyplot.pdf
9
- import pyvolve
10
- import copy
11
- import random
12
- import re
13
- import numpy as np
14
- import pandas as pd
15
-
16
- import matplotlib.pyplot as plt
17
-
18
- def main():
19
- """
20
- Using pyvolve and toytree to simulate data for PG-SUI
21
-
22
- Generates random data matrices w/ varying dimensions
23
- for the purposes of runtime benchmarking
24
-
25
- """
26
- seed=553423
27
- random.seed(seed)
28
-
29
- num_samples_range=[10, 100, 1000]
30
- num_loci_range=[100, 1000, 10000]
31
- loc_length=100
32
- write_gene_alignments=False
33
- make_gene_trees=False
34
- make_guidetrees=True #set to true to run IQTREE on simulated SNP matrices
35
- keep_all=False #set to true to keep ALL iqtree outputs
36
- keep_report=True #set to true to keep .iqtree files
37
- get_siterates=True #set to true to infer site-specific rates in IQTREE
38
- snps_per_locus=1
39
- iqtree_bin="iqtree2"
40
- get_rates=True
41
- iq_procs=2
42
-
43
- ###################
44
-
45
- if get_siterates and not make_guidetrees:
46
- print("ERROR: can't set get_siterates=True and make_guidetrees=False")
47
- print("Setting make_guidetrees=True and proceeding...")
48
- make_guidetrees=True
49
-
50
- for num_samples in num_samples_range:
51
- outgroup="r0"
52
- tobj = toytree.rtree.unittree(ntips=num_samples,
53
- treeheight=1.0,
54
- random_names=False,
55
- seed=random.randint(1, (sys.maxsize * 2 + 1)))
56
- guidetree=tobj.write(tree_format=5)
57
- for num_loci in num_loci_range:
58
- print("num_samples:",num_samples)
59
- print("num_loci:",num_loci)
60
- pass
61
-
62
- base="benchmark_"+"i"+str(num_samples)+"_l"+str(num_loci)
63
- tobj.write((base+"_guidetree.tre"))
64
-
65
- data=dict()
66
- for ind in tobj.get_tip_labels():
67
- data[ind] = list()
68
-
69
- my_tree = pyvolve.read_tree(tree=guidetree)
70
-
71
-
72
- #for model in ["gtr","gtrgamma"]:
73
- for model in ["gtrgamma"]:
74
- model_outpath=base+"_"+model
75
-
76
- for locus in range(num_loci):
77
- print(locus)
78
- f = np.random.random(4)
79
- f /= f.sum()
80
- parameters = {
81
- "mu":
82
- {"AC": np.random.uniform(low=0.0, high=1.0),
83
- "AG": np.random.uniform(low=0.0, high=1.0),
84
- "AT": np.random.uniform(low=0.0, high=1.0),
85
- "CG": np.random.uniform(low=0.0, high=1.0),
86
- "CT": np.random.uniform(low=0.0, high=1.0),
87
- "GT": np.random.uniform(low=0.0, high=1.0)},
88
- "state_freqs":
89
- [f[0], f[1], f[2], f[3]]
90
- }
91
- if model == "gtr":
92
- #GTR model, without rate heterogeneity
93
- my_model = pyvolve.Model("nucleotide",
94
- parameters)
95
- else:
96
- my_model = pyvolve.Model("nucleotide",
97
- parameters,
98
- rate_factors = [
99
- np.random.uniform(low=0.1, high=0.7, size=1),
100
- np.random.uniform(low=0.5, high=1.2, size=1),
101
- np.random.uniform(low=1.0, high=1.8, size=1),
102
- np.random.uniform(low=1.5, high=5.0, size=1)
103
- ],
104
- rate_probs = [0.4, 0.3, 0.2, 0.1] )
105
- if write_gene_alignments:
106
- fasta_outpath="full_alignments/"
107
- if not os.path.exists(fasta_outpath):
108
- os.mkdir(fasta_outpath)
109
- else:
110
- fasta_outpath=model_outpath
111
- fastaout=fasta_outpath +model_outpath+"_loc"+str(locus) + "_gene-alignment.fasta"
112
- #sample a gene alignment
113
- loc = sample_locus(my_tree, my_model, loc_length, snps_per_locus, fastaout)
114
-
115
- if loc:
116
- #sample SNP(s) from gene alignment
117
- sampled = sample_snp(read_fasta(fastaout), loc_length, snps_per_locus)
118
- if sampled is not None:
119
- data = add_locus(data,sampled)
120
-
121
- if not write_gene_alignments:
122
- os.remove(fastaout)
123
- if make_gene_trees:
124
- print("ERROR: Can't make gene trees when write_gene_alignments = False")
125
- elif make_gene_trees:
126
- run_iqtree(fastaout,
127
- iqtree_path=iqtree_bin,
128
- keep_all=keep_all,
129
- keep_report=keep_report,
130
- rates=get_siterates,
131
- procs=iq_procs)
132
- reroot_tree(tree=(fastaout+".treefile"),
133
- rooted=(fastaout+".rooted.tre"),
134
- outgroup_wildcard=outgroup)
135
-
136
- #write full SNP alignment & generate tree
137
- all_snp_out=model_outpath+"_base-snps-concat.fasta"
138
- write_fasta(data, all_snp_out)
139
- if make_guidetrees:
140
- run_iqtree(all_snp_out,
141
- iqtree_path=iqtree_bin,
142
- keep_all=keep_all,
143
- keep_report=keep_report,
144
- rates=get_siterates,
145
- procs=iq_procs)
146
- reroot_tree(tree=(all_snp_out+".treefile"),
147
- rooted=(all_snp_out+".rooted.tre"),
148
- outgroup_wildcard=outgroup)
149
-
150
- def reroot_tree(tree, rooted="out.rooted.tre", outgroup_wildcard="out"):
151
- t=toytree.tree(tree)
152
- try:
153
- rt=t.root(wildcard=outgroup_wildcard)
154
- rt.write(rooted, tree_format=5)
155
- return(rt)
156
- except Exception:
157
- t.write(rooted, tree_format=5)
158
- return(None)
159
-
160
-
161
- def hybridization(dat, prob=0.1, source=None, target=None):
162
- new_dat=dict()
163
- if source is None:
164
- source = [key for key in dat.keys()]
165
- if target is None:
166
- target = [key for key in dat.keys()]
167
-
168
- for individual in dat.keys():
169
- new_dat[individual] = dat[individual]
170
- aln_len=len(dat[individual])
171
- all_indices=list(range(aln_len))
172
- num=int(aln_len*prob)
173
-
174
- for target_individual in target:
175
- snp_indices = np.random.choice(all_indices, size=num, replace=False)
176
- for index in snp_indices:
177
- source_ind=np.random.choice(source, size=1)[0]
178
- new_dat[target_individual][index] = new_dat[source_ind][index]
179
- return(new_dat)
180
-
181
- def run_iqtree(aln,
182
- iqtree_path="iqtree",
183
- keep_all=False,
184
- keep_report=False,
185
- outgroup=None,
186
- rates=False,
187
- procs=4):
188
- #run
189
- cmd = [iqtree_path,
190
- "-s",
191
- str(aln),
192
- "-m",
193
- "GTR+I*G4",
194
- "-redo",
195
- "-T",
196
- str(procs)
197
- ]
198
- if outgroup is not None:
199
- cmd.append("-o")
200
- cmd.append(str(outgroup))
201
- if rates:
202
- #use -wst (NOT -mlrate) if using iq-tree 1.6xx
203
- #cmd.append("-wsr")
204
- cmd.append("--mlrate")
205
- result = subprocess.run(cmd, capture_output=True, text=True)
206
- #print(result.stdout)
207
- #print(result.stderr)
208
-
209
- if not keep_all:
210
- #delete everything except treefile
211
- silentremove((aln + ".bionj"))
212
- silentremove((aln + ".ckp.gz"))
213
- silentremove((aln + ".log"))
214
- silentremove((aln + ".mldist"))
215
- silentremove((aln + ".uniqueseq.phy"))
216
- if not keep_report:
217
- silentremove((aln + ".iqtree"))
218
- return((aln + ".treefile"))
219
-
220
- def add_locus(d, new):
221
- for sample in d.keys():
222
- for snp in new[sample]:
223
- d[sample].append(snp)
224
- return(d)
225
-
226
- def write_fasta(seqs, fas):
227
- with open(fas, 'w') as fh:
228
- #Write seqs to FASTA first
229
- for a in seqs.keys():
230
- name = ">" + str(a) + "\n"
231
- seq = "".join(seqs[a]) + "\n"
232
- fh.write(name)
233
- fh.write(seq)
234
- fh.close()
235
-
236
- def read_fasta(fasta):
237
- data = dict()
238
- header=False
239
- sample=None
240
- sequence=""
241
- with open(fasta, "r") as fin:
242
- for line in fin:
243
- line = line.strip()
244
- if not line: # If blank line.
245
- continue
246
- if line[0] == ">":
247
- if sample:
248
- data[sample] = sequence
249
- sequence = ""
250
- sample=line[1:]
251
- else:
252
- sequence = sequence + line
253
- data[sample] = sequence
254
- return(data)
255
-
256
- def sample_snp(aln_dict, aln_len, snps_per_locus=1):
257
- snp_indices = []
258
- snp_aln = dict()
259
- if aln_len == 1:
260
- for sample in aln_dict.keys():
261
- snp_aln[sample] = aln_dict[sample][0]
262
- return(snp_aln)
263
- else:
264
- for sample in aln_dict.keys():
265
- snp_aln[sample] = []
266
- for i in range(aln_len):
267
- vars=[]
268
- for sample in aln_dict.keys():
269
- nuc=aln_dict[sample][i]
270
- if len(vars) == 0:
271
- vars.append(nuc)
272
- elif nuc not in vars:
273
- snp_indices.append(i)
274
- break
275
- if len(snp_indices) == 0:
276
- return(None)
277
- elif len(snp_indices) == 1:
278
- #sample them all
279
- for sample in aln_dict.keys():
280
- snp_aln[sample] = aln_dict[sample][snp_indices[0]]
281
- else:
282
- sampled_indices = np.random.choice(snp_indices, size=snps_per_locus, replace=False)
283
- for sample in aln_dict.keys():
284
- for i in sampled_indices:
285
- snp_aln[sample].append(aln_dict[sample][i])
286
- return(snp_aln)
287
-
288
- def sample_locus(tree, model, gene_len=1000, num_snps=1, out="out.fasta"):
289
- try:
290
- my_partition = pyvolve.Partition(models = model, size=gene_len)
291
- my_evolver = pyvolve.Evolver(partitions = my_partition, tree = tree)
292
- my_evolver(seqfile = out,
293
- seqfmt = "fasta",
294
- ratefile=False,
295
- infofile=False)
296
- return(True)
297
- except Exception:
298
- return False
299
-
300
- def silentremove(filename):
301
- try:
302
- os.remove(filename)
303
- except OSError as e: # this would be "except OSError, e:" before Python 2.6
304
- if e.errno != errno.ENOENT: # errno.ENOENT = no such file or directory
305
- raise # re-raise exception if a different error occurred
306
-
307
- def get_tree_tips(tree):
308
- tips = re.split('[ ,\(\);]', tree)
309
- return([i for i in tips if i])
310
-
311
- def basic_tree_plot(tree, out="out.pdf"):
312
- mystyle = {
313
- "edge_type": "p",
314
- "edge_style": {
315
- "stroke": toytree.colors[0],
316
- "stroke-width": 1,
317
- },
318
- "tip_labels_align": True,
319
- "tip_labels_style": {"font-size": "5px"},
320
- "node_labels": False,
321
- "tip_labels": True
322
- }
323
-
324
- canvas, axes, mark = tree.draw(
325
- width=400,
326
- height=600,
327
- **mystyle,
328
- )
329
-
330
- toyplot.pdf.render(canvas, out)
331
-
332
- if __name__ == "__main__":
333
- main()