gwaslab 3.5.7__py3-none-any.whl → 3.5.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gwaslab might be problematic. Click here for more details.

Files changed (63) hide show
  1. gwaslab/__init__.py +2 -0
  2. gwaslab/bd_common_data.py +1 -0
  3. gwaslab/bd_get_hapmap3.py +0 -1
  4. gwaslab/data/formatbook.json +78 -0
  5. gwaslab/g_Sumstats.py +98 -24
  6. gwaslab/g_SumstatsMulti.py +287 -0
  7. gwaslab/g_SumstatsPair.py +101 -16
  8. gwaslab/g_Sumstats_polars.py +245 -0
  9. gwaslab/g_headers.py +12 -3
  10. gwaslab/g_meta.py +123 -47
  11. gwaslab/g_meta_update.py +48 -0
  12. gwaslab/g_vchange_status_polars.py +44 -0
  13. gwaslab/g_version.py +2 -2
  14. gwaslab/hm_casting.py +169 -110
  15. gwaslab/hm_casting_polars.py +202 -0
  16. gwaslab/hm_harmonize_sumstats.py +19 -8
  17. gwaslab/io_load_ld.py +529 -0
  18. gwaslab/io_preformat_input.py +11 -0
  19. gwaslab/io_preformat_input_polars.py +632 -0
  20. gwaslab/io_process_args.py +25 -1
  21. gwaslab/io_read_ldsc.py +34 -3
  22. gwaslab/io_read_pipcs.py +62 -6
  23. gwaslab/prscs_gigrnd.py +122 -0
  24. gwaslab/prscs_mcmc_gtb.py +136 -0
  25. gwaslab/prscs_parse_genet.py +98 -0
  26. gwaslab/qc_build.py +53 -0
  27. gwaslab/qc_check_datatype.py +10 -8
  28. gwaslab/qc_check_datatype_polars.py +128 -0
  29. gwaslab/qc_fix_sumstats.py +25 -23
  30. gwaslab/qc_fix_sumstats_polars.py +193 -0
  31. gwaslab/util_ex_calculate_ldmatrix.py +49 -19
  32. gwaslab/util_ex_gwascatalog.py +71 -28
  33. gwaslab/util_ex_ldsc.py +67 -21
  34. gwaslab/util_ex_match_ldmatrix.py +396 -0
  35. gwaslab/util_ex_run_2samplemr.py +0 -2
  36. gwaslab/util_ex_run_ccgwas.py +155 -0
  37. gwaslab/util_ex_run_coloc.py +1 -1
  38. gwaslab/util_ex_run_hyprcoloc.py +117 -0
  39. gwaslab/util_ex_run_mesusie.py +155 -0
  40. gwaslab/util_ex_run_mtag.py +92 -0
  41. gwaslab/util_ex_run_prscs.py +85 -0
  42. gwaslab/util_ex_run_susie.py +40 -9
  43. gwaslab/util_in_estimate_ess.py +18 -0
  44. gwaslab/util_in_fill_data.py +20 -1
  45. gwaslab/util_in_filter_value.py +10 -5
  46. gwaslab/util_in_get_sig.py +71 -13
  47. gwaslab/util_in_meta.py +168 -4
  48. gwaslab/util_in_meta_polars.py +174 -0
  49. gwaslab/viz_plot_compare_effect.py +87 -23
  50. gwaslab/viz_plot_credible_sets.py +55 -11
  51. gwaslab/viz_plot_effect.py +22 -12
  52. gwaslab/viz_plot_miamiplot2.py +3 -2
  53. gwaslab/viz_plot_mqqplot.py +84 -81
  54. gwaslab/viz_plot_qqplot.py +6 -6
  55. gwaslab/viz_plot_regional2.py +2 -1
  56. gwaslab/viz_plot_stackedregional.py +4 -1
  57. {gwaslab-3.5.7.dist-info → gwaslab-3.5.8.dist-info}/METADATA +8 -6
  58. gwaslab-3.5.8.dist-info/RECORD +117 -0
  59. {gwaslab-3.5.7.dist-info → gwaslab-3.5.8.dist-info}/WHEEL +1 -1
  60. gwaslab-3.5.7.dist-info/RECORD +0 -96
  61. {gwaslab-3.5.7.dist-info → gwaslab-3.5.8.dist-info/licenses}/LICENSE +0 -0
  62. {gwaslab-3.5.7.dist-info → gwaslab-3.5.8.dist-info/licenses}/LICENSE_before_v3.4.39 +0 -0
  63. {gwaslab-3.5.7.dist-info → gwaslab-3.5.8.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,245 @@
1
+ import pandas as pd
2
+ import numpy as np
3
+ import time
4
+ import copy
5
+ from gwaslab.g_Sumstats_summary import summarize
6
+ from gwaslab.g_Sumstats_summary import lookupstatus
7
+ from gwaslab.io_preformat_input_polars import preformatp
8
+ from gwaslab.io_to_formats import _to_format
9
+ from gwaslab.g_Log import Log
10
+ from gwaslab.qc_fix_sumstats import fixID
11
+ from gwaslab.qc_fix_sumstats import flipSNPID
12
+ from gwaslab.qc_fix_sumstats import stripSNPID
13
+ from gwaslab.qc_fix_sumstats import removedup
14
+ from gwaslab.qc_fix_sumstats import fixchr
15
+ from gwaslab.qc_fix_sumstats import fixpos
16
+ from gwaslab.qc_fix_sumstats import fixallele
17
+ from gwaslab.qc_fix_sumstats import parallelnormalizeallele
18
+ from gwaslab.qc_fix_sumstats import sanitycheckstats
19
+ from gwaslab.qc_fix_sumstats import parallelizeliftovervariant
20
+ from gwaslab.qc_fix_sumstats import flipallelestats
21
+ from gwaslab.qc_fix_sumstats import sortcoordinate
22
+ from gwaslab.qc_fix_sumstats import sortcolumn
23
+ from gwaslab.qc_fix_sumstats import _set_build
24
+ from gwaslab.qc_fix_sumstats import _process_build
25
+ from gwaslab.hm_harmonize_sumstats import parallelecheckaf
26
+ from gwaslab.hm_harmonize_sumstats import paralleleinferaf
27
+ from gwaslab.hm_harmonize_sumstats import checkref
28
+ from gwaslab.hm_harmonize_sumstats import oldcheckref
29
+ from gwaslab.hm_harmonize_sumstats import rsidtochrpos
30
+ from gwaslab.hm_harmonize_sumstats import parallelizeassignrsid
31
+ from gwaslab.hm_harmonize_sumstats import parallelinferstrand
32
+ from gwaslab.hm_harmonize_sumstats import parallelrsidtochrpos
33
+ from gwaslab.hm_harmonize_sumstats import _paralleleinferafwithmaf
34
+ from gwaslab.util_in_filter_value import filtervalues
35
+ from gwaslab.util_in_filter_value import filterout
36
+ from gwaslab.util_in_filter_value import filterin
37
+ from gwaslab.util_in_filter_value import filterregionin
38
+ from gwaslab.util_in_filter_value import filterregionout
39
+ from gwaslab.util_in_filter_value import _filter_indel
40
+ from gwaslab.util_in_filter_value import _filter_palindromic
41
+ from gwaslab.util_in_filter_value import _filter_snp
42
+ from gwaslab.util_in_filter_value import _filter_region
43
+ from gwaslab.util_in_filter_value import _exclude_hla
44
+ from gwaslab.util_in_filter_value import _search_variants
45
+ from gwaslab.util_in_filter_value import inferbuild
46
+ from gwaslab.util_in_filter_value import sampling
47
+ from gwaslab.util_in_filter_value import _get_flanking
48
+ from gwaslab.util_in_filter_value import _get_flanking_by_chrpos
49
+ from gwaslab.util_in_filter_value import _get_flanking_by_id
50
+ from gwaslab.util_in_calculate_gc import lambdaGC
51
+ from gwaslab.util_in_convert_h2 import _get_per_snp_r2
52
+ from gwaslab.util_in_get_sig import getsig
53
+ from gwaslab.util_in_get_density import getsignaldensity
54
+ from gwaslab.util_in_get_density import assigndensity
55
+ from gwaslab.util_in_get_sig import annogene
56
+ from gwaslab.util_in_get_sig import getnovel
57
+ from gwaslab.util_in_get_sig import _check_cis
58
+ from gwaslab.util_in_get_sig import _check_novel_set
59
+ from gwaslab.util_in_fill_data import filldata
60
+ from gwaslab.bd_get_hapmap3 import gethapmap3
61
+ from gwaslab.bd_common_data import get_chr_list
62
+ from gwaslab.bd_common_data import get_number_to_chr
63
+ from gwaslab.bd_common_data import get_chr_to_number
64
+ from gwaslab.bd_common_data import get_high_ld
65
+ from gwaslab.bd_common_data import get_format_dict
66
+ from gwaslab.bd_common_data import get_formats_list
67
+ from gwaslab.g_version import _show_version
68
+ from gwaslab.g_version import gwaslab_info
69
+ from gwaslab.g_meta import _init_meta
70
+ from gwaslab.g_meta import _append_meta_record
71
+ from gwaslab.g_meta_update import _update_meta
72
+ from gwaslab.util_ex_run_clumping import _clump
73
+ from gwaslab.util_ex_calculate_ldmatrix import tofinemapping
74
+ from gwaslab.io_load_ld import tofinemapping_using_ld
75
+ from gwaslab.util_ex_calculate_prs import _calculate_prs
76
+ from gwaslab.viz_plot_mqqplot import mqqplot
77
+ from gwaslab.viz_plot_trumpetplot import plottrumpet
78
+ from gwaslab.viz_plot_compare_af import plotdaf
79
+ from gwaslab.util_ex_run_susie import _run_susie_rss
80
+ from gwaslab.util_ex_run_susie import _get_cs_lead
81
+ from gwaslab.qc_fix_sumstats import _check_data_consistency
82
+ from gwaslab.util_ex_ldsc import _estimate_h2_by_ldsc
83
+ from gwaslab.util_ex_ldsc import _estimate_rg_by_ldsc
84
+ from gwaslab.util_ex_ldsc import _estimate_h2_cts_by_ldsc
85
+ from gwaslab.util_ex_ldsc import _estimate_partitioned_h2_by_ldsc
86
+ from gwaslab.util_ex_ldproxyfinder import _extract_ld_proxy
87
+ from gwaslab.bd_get_hapmap3 import gethapmap3
88
+ from gwaslab.util_abf_finemapping import abf_finemapping
89
+ from gwaslab.util_abf_finemapping import make_cs
90
+ from gwaslab.io_read_pipcs import _read_pipcs
91
+ from gwaslab.util_in_estimate_ess import _get_ess
92
+ from gwaslab.viz_plot_credible_sets import _plot_cs
93
+ from gwaslab.hm_casting import _align_with_mold
94
+ from gwaslab.hm_casting import _merge_mold_with_sumstats_by_chrpos
95
+ import gc
96
+ from gwaslab.viz_plot_phe_heatmap import _gwheatmap
97
+ from gwaslab.util_ex_run_prscs import _run_prscs
98
+
99
+ #20220309
100
+ class Sumstatsp():
101
+ def __init__(self,
102
+ sumstats,
103
+ fmt=None,
104
+ tab_fmt="tsv",
105
+ snpid=None,
106
+ rsid=None,
107
+ chrom=None,
108
+ pos=None,
109
+ ea=None,
110
+ nea=None,
111
+ ref=None,
112
+ alt=None,
113
+ eaf=None,
114
+ neaf=None,
115
+ maf=None,
116
+ n=None,
117
+ beta=None,
118
+ se=None,
119
+ chisq=None,
120
+ z=None,
121
+ f=None,
122
+ t=None,
123
+ p=None,
124
+ q=None,
125
+ mlog10p=None,
126
+ test=None,
127
+ info=None,
128
+ OR=None,
129
+ OR_95L=None,
130
+ OR_95U=None,
131
+ beta_95L=None,
132
+ beta_95U=None,
133
+ HR=None,
134
+ HR_95L=None,
135
+ HR_95U=None,
136
+ ncase=None,
137
+ ncontrol=None,
138
+ neff=None,
139
+ i2=None,
140
+ phet=None,
141
+ dof=None,
142
+ snpr2=None,
143
+ status=None,
144
+ other=[],
145
+ chrom_pat=None,
146
+ snpid_pat=None,
147
+ usekeys=None,
148
+ direction=None,
149
+ verbose=True,
150
+ study="Study_1",
151
+ trait="Trait_1",
152
+ build="99",
153
+ species="homo sapiens",
154
+ build_infer=False,
155
+ **readargs):
156
+
157
+ # basic attributes
158
+ self.data = pd.DataFrame()
159
+ self.log = Log()
160
+ self.ldsc_h2 = None
161
+ self.ldsc_h2_results = None
162
+ self.ldsc_rg = pd.DataFrame()
163
+ self.ldsc_h2_cts = None
164
+ self.ldsc_partitioned_h2_summary = None
165
+ self.ldsc_partitioned_h2_results = None
166
+ # meta information
167
+ self.meta = _init_meta()
168
+ self.build = build
169
+ self.meta["gwaslab"]["study_name"] = study
170
+ self.meta["gwaslab"]["species"] = species
171
+
172
+ # initialize attributes for clumping and finmapping
173
+ #self.to_finemapping_file_path = ""
174
+ #self.to_finemapping_file = pd.DataFrame()
175
+ #self.plink_log = ""
176
+
177
+ # path / file / plink_log
178
+ self.finemapping = dict()
179
+
180
+ # clumps / clumps_raw / plink_log
181
+ self.clumps = dict()
182
+
183
+ #
184
+ self.pipcs = pd.DataFrame()
185
+
186
+ # print gwaslab version information
187
+ _show_version(self.log, verbose=verbose)
188
+
189
+ #preformat the data
190
+ self.data = preformatp(
191
+ sumstats=sumstats,
192
+ fmt=fmt,
193
+ tab_fmt = tab_fmt,
194
+ snpid=snpid,
195
+ rsid=rsid,
196
+ chrom=chrom,
197
+ pos=pos,
198
+ ea=ea,
199
+ nea=nea,
200
+ ref=ref,
201
+ alt=alt,
202
+ eaf=eaf,
203
+ neaf=neaf,
204
+ maf=maf,
205
+ n=n,
206
+ beta=beta,
207
+ se=se,
208
+ chisq=chisq,
209
+ z=z,
210
+ f=f,
211
+ t=t,
212
+ p=p,
213
+ q=q,
214
+ mlog10p=mlog10p,
215
+ test=test,
216
+ info=info,
217
+ OR=OR,
218
+ OR_95L=OR_95L,
219
+ OR_95U=OR_95U,
220
+ beta_95L=beta_95L,
221
+ beta_95U=beta_95U,
222
+ HR=HR,
223
+ HR_95L=HR_95L,
224
+ HR_95U=HR_95U,
225
+ i2=i2,
226
+ phet=phet,
227
+ dof=dof,
228
+ snpr2=snpr2,
229
+ ncase=ncase,
230
+ ncontrol=ncontrol,
231
+ neff=neff,
232
+ direction=direction,
233
+ study=study,
234
+ build=build,
235
+ trait=trait,
236
+ status=status,
237
+ other=other,
238
+ usekeys=usekeys,
239
+ chrom_pat=chrom_pat,
240
+ snpid_pat=snpid_pat,
241
+ verbose=verbose,
242
+ readargs=readargs,
243
+ log=self.log)
244
+
245
+ gc.collect()
gwaslab/g_headers.py CHANGED
@@ -33,9 +33,10 @@ dtype_dic={
33
33
  'SNPR2' : 'float64' ,
34
34
  'DOF' : 'Int64' ,
35
35
  'P_HET' : 'float64' ,
36
- 'I2_HET' : 'float64' ,
36
+ 'I2' : 'float64' ,
37
37
  'DENSITY' : 'Int64' ,
38
38
  'N' : 'Int64' ,
39
+ 'N_EFF' : 'float64' ,
39
40
  'N_CASE' : 'Int64' ,
40
41
  'N_CONTROL' : 'Int64' ,
41
42
  'GENENAME' : 'string' ,
@@ -92,9 +93,10 @@ description_dic={
92
93
  'SNPR2' :' per variant R2 ',
93
94
  'DOF' :' degree of freedom ',
94
95
  'P_HET' :' heterogeneity test P value ',
95
- 'I2_HET' :' heterogeneity I2 ',
96
+ 'I2' :' heterogeneity I2 ',
96
97
  'DENSITY' :' signal density ',
97
98
  'N' :' total sample size ',
99
+ 'N_EFF' :' Effective sample size ',
98
100
  'N_CASE' :' number of cases ',
99
101
  'N_CONTROL' :' number of controls ',
100
102
  'GENENAME' :' nearest gene symbol ',
@@ -117,7 +119,14 @@ def _get_headers(mode="all"):
117
119
  if mode=="info":
118
120
  return ["SNPID","rsID","CHR","POS","EA","NEA","STATUS"]
119
121
  elif mode=="stats":
120
- return ["BETA","SE","P","MLOG10P","N","N_CASE","N_CONTROL","Z","T","F","OR","OR_95L","OR_95U","HR","HR_95L","HR_95U","MAF","EAF","BETA_95L","BETA_95U"]
122
+ return ["BETA","SE","P","MLOG10P",
123
+ "N","N_CASE","N_CONTROL","N_EFF",
124
+ "Z","T","F",
125
+ "OR","OR_95L","OR_95U",
126
+ "HR","HR_95L","HR_95U",
127
+ "MAF","EAF",
128
+ "BETA_95L","BETA_95U",
129
+ "P_HET","I2"]
121
130
  else:
122
131
  return description_dic.keys()
123
132
 
gwaslab/g_meta.py CHANGED
@@ -1,54 +1,130 @@
1
1
  from gwaslab.g_version import gwaslab_info
2
2
 
3
- def _init_meta():
4
- metadata = {"gwaslab":{
5
- "gwaslab_version": gwaslab_info()["version"],
6
- "study_name":"Sumstats_1",
7
- "study_type":"Unknown",
8
- "species":"homo sapiens",
9
- "genome_build":"99",
10
- "variants":{
11
- "variant_number":"Unknown",
12
- "min_P":"Unknown",
13
- "number_of_chromosomes":"Unknown",
14
- },
3
+ def _init_meta(object="Sumstats"):
4
+ metadata_ssf ={
5
+ "genotyping_technology":"Unknown",
6
+ "gwas_id":"Unknown",
15
7
  "samples":{
16
- "sample_size":"Unknown",
17
- "sample_size_case":"Unknown",
18
- "sample_size_control":"Unknown",
19
- "sample_size_median":"Unknown",
20
- "sample_size_min":"Unknown",
21
- },
22
- "references":{
23
- "ref_rsid_tsv":"Unknown",
24
- "ref_rsid_vcf":"Unknown",
25
- "ref_seq":"Unknown",
26
- "ref_infer":"Unknown",
27
- "ref_infer_af":"Unknown",
28
- "ref_infer_daf":"Unknown",
29
- "ref_rsid_to_chrpos_tsv":"Unknown",
30
- "ref_rsid_to_chrpos_vcf":"Unknown"
31
- }
8
+ "sample_size":"Unknown",
9
+ "sample_ancestry":"European",
10
+ "ancestry_method":"self-reported|genetically determined",
11
+ } ,
12
+ "trait_description":"Unknown",
13
+ "minor_allele_freq_lower_limit":"Unknown",
14
+ "data_file_name":"Unknown",
15
+ "file_type":"Unknown",
16
+ "data_file_md5sum":"Unknown",
17
+ "is_harmonised":"Unchecked",
18
+ "is_sorted":"Unchecked",
19
+ "genome_assembly":"Unknown",
20
+ "date_last_modified":"Unknown",
21
+ "coordinate_system":"1-based",
22
+ "sex": "M|F|combined"
23
+ }
24
+ metadata_multi ={
25
+ "genome_assembly":"Unknown",
26
+ "date_last_modified":"Unknown",
27
+ "coordinate_system":"1-based"
28
+ }
29
+
30
+ # Sumstats
31
+ if object=="Sumstats":
32
+ metadata = {"gwaslab":{
33
+ "gwaslab_version": gwaslab_info()["version"],
34
+ "gwaslab_object":"gwaslab.Sumstats",
35
+ "study_name":"Sumstats1",
36
+ "study_type":"Unknown",
37
+ "species":"homo sapiens",
38
+ "genome_build":"99",
39
+ "sample_prevalence":"Unknown",
40
+ "population_prevalence":"Unknown",
41
+ "variants":{
42
+ "variant_number":"Unknown",
43
+ "min_P":"Unknown",
44
+ "number_of_chromosomes":"Unknown",
32
45
  },
33
- "genotyping_technology":"Unknown",
34
- "gwas_id":"Unknown",
35
- "samples":{
36
- "sample_size":"Unknown",
37
- "sample_ancestry":"European",
38
- "ancestry_method":"self-reported|genetically determined",
39
- } ,
40
- "trait_description":"Unknown",
41
- "minor_allele_freq_lower_limit":"Unknown",
42
- "data_file_name":"Unknown",
43
- "file_type":"Unknown",
44
- "data_file_md5sum":"Unknown",
45
- "is_harmonised":"Unchecked",
46
- "is_sorted":"Unchecked",
47
- "genome_assembly":"Unknown",
48
- "date_last_modified":"Unknown",
49
- "coordinate_system":"1-based",
50
- "sex": "M|F|combined"
51
- }
46
+ "samples":{
47
+ "sample_size":"Unknown",
48
+ "sample_size_case":"Unknown",
49
+ "sample_size_control":"Unknown",
50
+ "sample_size_median":"Unknown",
51
+ "sample_size_min":"Unknown",
52
+ },
53
+ "references":{
54
+ "ref_rsid_tsv":"Unknown",
55
+ "ref_rsid_vcf":"Unknown",
56
+ "ref_seq":"Unknown",
57
+ "ref_infer":"Unknown",
58
+ "ref_infer_af":"Unknown",
59
+ "ref_infer_daf":"Unknown",
60
+ "ref_rsid_to_chrpos_tsv":"Unknown",
61
+ "ref_rsid_to_chrpos_vcf":"Unknown"
62
+ }}}
63
+ metadata |= metadata_ssf
64
+
65
+ # SumstatsPair
66
+ elif object=="SumstatsPair":
67
+ metadata = {"gwaslab":{
68
+ "gwaslab_version": gwaslab_info()["version"],
69
+ "gwaslab_object":"gwaslab.SumstatsPair",
70
+ "group_name":"Group1",
71
+ "species":"homo sapiens",
72
+ "genome_build":"99",
73
+ "variants":{
74
+ "variant_number":"Unknown",
75
+ "min_P":"Unknown",
76
+ "number_of_chromosomes":"Unknown",
77
+ },
78
+ "samples":{
79
+ "sample_size":"Unknown",
80
+ "sample_size_case":"Unknown",
81
+ "sample_size_control":"Unknown",
82
+ "sample_size_median":"Unknown",
83
+ "sample_size_min":"Unknown",
84
+ },
85
+ "references":{
86
+ "ref_rsid_tsv":"Unknown",
87
+ "ref_rsid_vcf":"Unknown",
88
+ "ref_seq":"Unknown",
89
+ "ref_infer":"Unknown",
90
+ "ref_infer_af":"Unknown",
91
+ "ref_infer_daf":"Unknown",
92
+ "ref_rsid_to_chrpos_tsv":"Unknown",
93
+ "ref_rsid_to_chrpos_vcf":"Unknown"
94
+ }}}
95
+ metadata |= metadata_multi
96
+
97
+ # SumstatsMulti
98
+ elif object=="SumstatsMulti":
99
+ metadata = {"gwaslab":{
100
+ "gwaslab_version": gwaslab_info()["version"],
101
+ "gwaslab_object":"gwaslab.SumstatsMulti",
102
+ "group_name":"Group1",
103
+ "species":"homo sapiens",
104
+ "genome_build":"99",
105
+ "variants":{
106
+ "variant_number":"Unknown",
107
+ "min_P":"Unknown",
108
+ "number_of_chromosomes":"Unknown",
109
+ },
110
+ "samples":{
111
+ "sample_size":"Unknown",
112
+ "sample_size_case":"Unknown",
113
+ "sample_size_control":"Unknown",
114
+ "sample_size_median":"Unknown",
115
+ "sample_size_min":"Unknown",
116
+ },
117
+ "references":{
118
+ "ref_rsid_tsv":"Unknown",
119
+ "ref_rsid_vcf":"Unknown",
120
+ "ref_seq":"Unknown",
121
+ "ref_infer":"Unknown",
122
+ "ref_infer_af":"Unknown",
123
+ "ref_infer_daf":"Unknown",
124
+ "ref_rsid_to_chrpos_tsv":"Unknown",
125
+ "ref_rsid_to_chrpos_vcf":"Unknown"
126
+ }}}
127
+ metadata |= metadata_multi
52
128
  return metadata.copy()
53
129
 
54
130
  def _append_meta_record(old, new):
@@ -0,0 +1,48 @@
1
+ import numpy as np
2
+ from gwaslab.util_in_filter_value import inferbuild
3
+ from gwaslab.g_Log import Log
4
+ import time
5
+
6
+ def _update_meta(meta, sumstats, object="Sumstats",log=Log(), verbose=True):
7
+
8
+ meta["gwaslab"]["variants"]["variant_number"] = len(sumstats)
9
+
10
+ if "CHR" in sumstats.columns:
11
+ meta["gwaslab"]["variants"]["number_of_chromosomes"] = len(sumstats["CHR"].unique())
12
+
13
+ if meta["gwaslab"]["gwaslab_object"]=="gwaslab.Sumstats":
14
+ if "P" in sumstats.columns:
15
+ meta["gwaslab"]["variants"]["min_P"]=np.nanmin(sumstats["P"])
16
+ if "EAF" in sumstats.columns:
17
+ meta["gwaslab"]["variants"]["min_minor_allele_freq"]=min (np.min(sumstats["EAF"]) , 1- np.max(sumstats["EAF"]))
18
+ if "N" in sumstats.columns:
19
+ meta["gwaslab"]["samples"]["sample_size"] = int(sumstats["N"].max())
20
+ meta["gwaslab"]["samples"]["sample_size_median"] = sumstats["N"].median()
21
+ meta["gwaslab"]["samples"]["sample_size_min"] = int(sumstats["N"].min())
22
+
23
+ if meta["gwaslab"]["gwaslab_object"]=="gwaslab.SumstatsMulti" or meta["gwaslab"]["gwaslab_object"]=="gwaslab.SumstatsPair":
24
+ nstudy = meta["gwaslab"]['number_of_studies']
25
+ for i in range(nstudy):
26
+ i_form_1 = i + 1
27
+ meta["gwaslab"]["variants"][i_form_1]=dict()
28
+ meta["gwaslab"]["samples"][i_form_1] =dict()
29
+
30
+ if "P_{}".format(i_form_1) in sumstats.columns:
31
+ p = "P_{}".format(i_form_1)
32
+
33
+ meta["gwaslab"]["variants"][i_form_1]["min_P"]= np.nanmin(sumstats[p])
34
+ if "N_{}".format(i_form_1) in sumstats.columns:
35
+ n = "N_{}".format(i_form_1)
36
+ meta["gwaslab"]["samples"][i_form_1]["sample_size"] = int(sumstats[n].max())
37
+ meta["gwaslab"]["samples"][i_form_1]["sample_size_median"] = sumstats[n].median()
38
+ meta["gwaslab"]["samples"][i_form_1]["sample_size_min"] = int(sumstats[n].min())
39
+ if "EAF_{}".format(i_form_1) in sumstats.columns:
40
+ eaf="EAF_{}".format(i_form_1)
41
+ meta["gwaslab"]["variants"][i_form_1]["min_minor_allele_freq"]=min (np.min(sumstats[eaf]) , 1- np.max(sumstats[eaf]))
42
+
43
+ if meta["gwaslab"]["genome_build"] == "99":
44
+ _, meta["gwaslab"]["genome_build"] = inferbuild(sumstats, change_status=False, log=log, verbose=verbose)
45
+
46
+ meta["date_last_modified"] = str(time.strftime('%Y/%m/%d'))
47
+
48
+ return meta
@@ -0,0 +1,44 @@
1
+ import pandas as pd
2
+ import polars as pl
3
+
4
+ def vchange_statusp(sumstats, matched_index, status, digit, before, after):
5
+ dic={}
6
+ for i in range(len(before)):
7
+ dic[before[i]]=after[i]
8
+
9
+ sumstats = sumstats.with_columns(pl.col(status).cast(pl.String).alias(status))
10
+
11
+ if digit>1:
12
+ sumstats = sumstats.with_columns(
13
+ pl.when( matched_index )
14
+ .then( pl.col(status).str.slice(0,digit-1) + pl.col(status).str.slice(digit-1,1).str.replace_many(dic) + pl.col(status).str.slice(digit))
15
+ .otherwise( pl.col(status) )
16
+ .alias(status)
17
+ )
18
+ else:
19
+ sumstats = sumstats.with_columns(
20
+ pl.when( matched_index )
21
+ .then( pl.col(status).str.slice(0,1).str.replace_many(dic) + pl.col(status).str.slice(digit) )
22
+ .otherwise( pl.col(status) )
23
+ .alias(status)
24
+ )
25
+ return sumstats
26
+
27
+ def copy_statusp(sumstats, matched_index, from_status, to_status, digit):
28
+ sumstats = sumstats.with_columns(pl.col(from_status).cast(pl.String).alias(from_status))
29
+ sumstats = sumstats.with_columns(pl.col(to_status).cast(pl.String).alias(to_status))
30
+ if digit>1:
31
+ sumstats = sumstats.with_columns(
32
+ pl.when( matched_index )
33
+ .then( pl.col(from_status).str.slice(0,digit-1) + pl.col(to_status).str.slice(digit-1,1) + pl.col(from_status).str.slice(digit))
34
+ .otherwise( pl.col(to_status) )
35
+ .alias(to_status)
36
+ )
37
+ else:
38
+ sumstats = sumstats.with_columns(
39
+ pl.when( matched_index )
40
+ .then( pl.col(from_status).str.slice(0,1) + pl.col(to_status).str.slice(digit) )
41
+ .otherwise( pl.col(to_status) )
42
+ .alias(to_status)
43
+ )
44
+ return sumstats
gwaslab/g_version.py CHANGED
@@ -15,8 +15,8 @@ def _get_version():
15
15
  def gwaslab_info():
16
16
  # version meta information
17
17
  dic={
18
- "version":"3.5.7",
19
- "release_date":"20250307"
18
+ "version":"3.5.8",
19
+ "release_date":"20250424"
20
20
  }
21
21
  return dic
22
22