py-geodetector 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
py_geodetector/geodetector.py
CHANGED
|
@@ -2,8 +2,8 @@ import warnings
|
|
|
2
2
|
import numpy as np
|
|
3
3
|
import pandas as pd
|
|
4
4
|
from typing import Sequence
|
|
5
|
-
from scipy.stats import f, ncf
|
|
6
5
|
import matplotlib.pyplot as plt
|
|
6
|
+
from scipy.stats import f, levene, ncf, ttest_ind
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
from pathlib import Path
|
|
@@ -27,10 +27,11 @@ def _plot_value(ax, interaction_df, ecological_df, value_fontsize=10):
|
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
class GeoDetector(object):
|
|
30
|
-
def __init__(self, df: pd.DataFrame, y: str, factors: Sequence[str]):
|
|
30
|
+
def __init__(self, df: pd.DataFrame, y: str, factors: Sequence[str], alpha=0.05):
|
|
31
31
|
self.df = df
|
|
32
32
|
self.y = y
|
|
33
33
|
self.factors = factors
|
|
34
|
+
self.alpha = alpha
|
|
34
35
|
self._check_data(df, y, factors)
|
|
35
36
|
self.factor_df, self.interaction_df, self.ecological_df = None, None, None
|
|
36
37
|
|
|
@@ -155,11 +156,44 @@ class GeoDetector(object):
|
|
|
155
156
|
ssw2, _, _ = self._cal_ssw(self.df, self.y, self.factors[j])
|
|
156
157
|
dfd = self.df[self.factors[j]].notna().sum()-1
|
|
157
158
|
fval = (dfn*(dfd-1)*ssw1)/(dfd*(dfn-1)*ssw2)
|
|
158
|
-
if fval<f.ppf(
|
|
159
|
+
if fval<f.ppf(self.alpha, dfn, dfn):
|
|
159
160
|
self.ecological_df.loc[self.factors[i], self.factors[j]] = 'Y'
|
|
160
161
|
else:
|
|
161
162
|
self.ecological_df.loc[self.factors[i], self.factors[j]] = 'N'
|
|
162
163
|
return self.ecological_df
|
|
164
|
+
|
|
165
|
+
def risk_detector(self):
|
|
166
|
+
"""
|
|
167
|
+
Compares the difference of average values between sub-groups
|
|
168
|
+
Reference:
|
|
169
|
+
https://github.com/gsnrguo/QGIS-Geographical-detector/blob/main/gd_core/geodetector.py
|
|
170
|
+
"""
|
|
171
|
+
risk_result = dict()
|
|
172
|
+
for factor in self.factors:
|
|
173
|
+
risk_name = self.df.groupby(factor)[self.y].mean()
|
|
174
|
+
strata = np.sort(self.df[factor].unique())
|
|
175
|
+
t_test = np.empty((len(strata), len(strata)))
|
|
176
|
+
t_test.fill(np.nan)
|
|
177
|
+
t_test_strata = pd.DataFrame(t_test, index=strata, columns=strata)
|
|
178
|
+
for i in range(len(strata) - 1):
|
|
179
|
+
for j in range(i + 1, len(strata)):
|
|
180
|
+
y_i = self.df.loc[self.df[factor] == strata[i], [self.y]]
|
|
181
|
+
y_j = self.df.loc[self.df[factor] == strata[j], [self.y]]
|
|
182
|
+
y_i = np.array(y_i).reshape(-1)
|
|
183
|
+
y_j = np.array(y_j).reshape(-1)
|
|
184
|
+
# hypothesis testing of variance homogeneity
|
|
185
|
+
levene_result = levene(y_i, y_j)
|
|
186
|
+
if levene_result.pvalue < self.alpha:
|
|
187
|
+
# variance non-homogeneous
|
|
188
|
+
ttest_result = ttest_ind(y_i, y_j, equal_var=False)
|
|
189
|
+
else:
|
|
190
|
+
ttest_result = ttest_ind(y_i, y_j)
|
|
191
|
+
|
|
192
|
+
t_test_strata.iloc[j, i] = ttest_result.pvalue <= self.alpha
|
|
193
|
+
|
|
194
|
+
risk_factor = dict(risk=risk_name, ttest_stra=t_test_strata)
|
|
195
|
+
risk_result[factor] = risk_factor
|
|
196
|
+
return risk_result
|
|
163
197
|
|
|
164
198
|
def plot(self, tick_fontsize=10, value_fontsize=10, colorbar_fontsize=10, show=True):
|
|
165
199
|
if isinstance(self.interaction_df, type(None)):
|
|
@@ -188,4 +222,4 @@ class GeoDetector(object):
|
|
|
188
222
|
plt.show()
|
|
189
223
|
return ax
|
|
190
224
|
else:
|
|
191
|
-
return ax
|
|
225
|
+
return ax
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: py_geodetector
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.3
|
|
4
4
|
Summary: A simple Python package for the geodetector
|
|
5
5
|
Project-URL: Homepage, https://github.com/djw-easy/GeoDetector
|
|
6
6
|
Project-URL: Bug Tracker, https://github.com/djw-easy/GeoDetector/issues
|
|
@@ -41,6 +41,9 @@ interaction_df, interaction_relationship_df = gd.interaction_detector(relationsh
|
|
|
41
41
|
# ecological detect
|
|
42
42
|
ecological_df = gd.ecological_detector()
|
|
43
43
|
|
|
44
|
+
# risk detect
|
|
45
|
+
risk_result = gd.risk_detector()
|
|
46
|
+
|
|
44
47
|
# plot
|
|
45
48
|
# use a heatmap visualize the interaction detect result,
|
|
46
49
|
# red text means that the ecological detection results show a significant difference
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
py_geodetector/__init__.py,sha256=bhyJq1ipXNRhG-zPbF_5YUbJ_sana5rJ-FG-ogBNh_U,57
|
|
2
|
+
py_geodetector/geodetector.py,sha256=AmIt70OpMEaNQxKAvI-QiFwjfs-Sa-nb89OD12gxiIQ,9911
|
|
3
|
+
py_geodetector/example_data/disease.csv,sha256=sodkE21Xw-eSlGWxLE1by7hSHQk2FjNxb4ncWdyWLmE,2231
|
|
4
|
+
py_geodetector-0.1.3.dist-info/METADATA,sha256=Ngr3o6q8DpBQq5LriVXchSKvTp_zHly3YgkgZR14XOQ,1901
|
|
5
|
+
py_geodetector-0.1.3.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
|
6
|
+
py_geodetector-0.1.3.dist-info/RECORD,,
|
|
@@ -1,6 +0,0 @@
|
|
|
1
|
-
py_geodetector/__init__.py,sha256=bhyJq1ipXNRhG-zPbF_5YUbJ_sana5rJ-FG-ogBNh_U,57
|
|
2
|
-
py_geodetector/geodetector.py,sha256=R-3OlNrgghwZTdieIbhB5J3N3nl2-ix6bjwjyVmh1E8,8240
|
|
3
|
-
py_geodetector/example_data/disease.csv,sha256=sodkE21Xw-eSlGWxLE1by7hSHQk2FjNxb4ncWdyWLmE,2231
|
|
4
|
-
py_geodetector-0.1.1.dist-info/METADATA,sha256=ILLj1V2F_SoPoiJCdaAjmRCT_W4gSeLqCeKAmoznUL0,1853
|
|
5
|
-
py_geodetector-0.1.1.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
|
6
|
-
py_geodetector-0.1.1.dist-info/RECORD,,
|
|
File without changes
|