py-geodetector 0.1.2__tar.gz → 0.1.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {py_geodetector-0.1.2 → py_geodetector-0.1.3}/PKG-INFO +4 -1
- {py_geodetector-0.1.2 → py_geodetector-0.1.3}/README.md +3 -0
- {py_geodetector-0.1.2 → py_geodetector-0.1.3}/example.ipynb +151 -11
- {py_geodetector-0.1.2 → py_geodetector-0.1.3}/pyproject.toml +1 -1
- {py_geodetector-0.1.2 → py_geodetector-0.1.3}/src/py_geodetector/geodetector.py +38 -4
- {py_geodetector-0.1.2 → py_geodetector-0.1.3}/.gitignore +0 -0
- {py_geodetector-0.1.2 → py_geodetector-0.1.3}/src/__init__.py +0 -0
- {py_geodetector-0.1.2 → py_geodetector-0.1.3}/src/py_geodetector/__init__.py +0 -0
- {py_geodetector-0.1.2 → py_geodetector-0.1.3}/src/py_geodetector/example_data/disease.csv +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: py_geodetector
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.3
|
|
4
4
|
Summary: A simple Python package for the geodetector
|
|
5
5
|
Project-URL: Homepage, https://github.com/djw-easy/GeoDetector
|
|
6
6
|
Project-URL: Bug Tracker, https://github.com/djw-easy/GeoDetector/issues
|
|
@@ -41,6 +41,9 @@ interaction_df, interaction_relationship_df = gd.interaction_detector(relationsh
|
|
|
41
41
|
# ecological detect
|
|
42
42
|
ecological_df = gd.ecological_detector()
|
|
43
43
|
|
|
44
|
+
# risk detect
|
|
45
|
+
risk_result = gd.risk_detector()
|
|
46
|
+
|
|
44
47
|
# plot
|
|
45
48
|
# use a heatmap visualize the interaction detect result,
|
|
46
49
|
# red text means that the ecological detection results show a significant difference
|
|
@@ -28,6 +28,9 @@ interaction_df, interaction_relationship_df = gd.interaction_detector(relationsh
|
|
|
28
28
|
# ecological detect
|
|
29
29
|
ecological_df = gd.ecological_detector()
|
|
30
30
|
|
|
31
|
+
# risk detect
|
|
32
|
+
risk_result = gd.risk_detector()
|
|
33
|
+
|
|
31
34
|
# plot
|
|
32
35
|
# use a heatmap visualize the interaction detect result,
|
|
33
36
|
# red text means that the ecological detection results show a significant difference
|
|
@@ -13,6 +13,15 @@
|
|
|
13
13
|
"cell_type": "code",
|
|
14
14
|
"execution_count": 2,
|
|
15
15
|
"metadata": {},
|
|
16
|
+
"outputs": [],
|
|
17
|
+
"source": [
|
|
18
|
+
"from src.py_geodetector import GeoDetector, load_example_data"
|
|
19
|
+
]
|
|
20
|
+
},
|
|
21
|
+
{
|
|
22
|
+
"cell_type": "code",
|
|
23
|
+
"execution_count": 3,
|
|
24
|
+
"metadata": {},
|
|
16
25
|
"outputs": [
|
|
17
26
|
{
|
|
18
27
|
"data": {
|
|
@@ -90,7 +99,7 @@
|
|
|
90
99
|
"4 6.49 3 2 4"
|
|
91
100
|
]
|
|
92
101
|
},
|
|
93
|
-
"execution_count":
|
|
102
|
+
"execution_count": 3,
|
|
94
103
|
"metadata": {},
|
|
95
104
|
"output_type": "execute_result"
|
|
96
105
|
}
|
|
@@ -102,7 +111,7 @@
|
|
|
102
111
|
},
|
|
103
112
|
{
|
|
104
113
|
"cell_type": "code",
|
|
105
|
-
"execution_count":
|
|
114
|
+
"execution_count": 4,
|
|
106
115
|
"metadata": {},
|
|
107
116
|
"outputs": [],
|
|
108
117
|
"source": [
|
|
@@ -111,7 +120,7 @@
|
|
|
111
120
|
},
|
|
112
121
|
{
|
|
113
122
|
"cell_type": "code",
|
|
114
|
-
"execution_count":
|
|
123
|
+
"execution_count": 5,
|
|
115
124
|
"metadata": {},
|
|
116
125
|
"outputs": [
|
|
117
126
|
{
|
|
@@ -163,7 +172,7 @@
|
|
|
163
172
|
"p value 0.363236 0.000117 0.040804"
|
|
164
173
|
]
|
|
165
174
|
},
|
|
166
|
-
"execution_count":
|
|
175
|
+
"execution_count": 5,
|
|
167
176
|
"metadata": {},
|
|
168
177
|
"output_type": "execute_result"
|
|
169
178
|
}
|
|
@@ -174,7 +183,7 @@
|
|
|
174
183
|
},
|
|
175
184
|
{
|
|
176
185
|
"cell_type": "code",
|
|
177
|
-
"execution_count":
|
|
186
|
+
"execution_count": 6,
|
|
178
187
|
"metadata": {},
|
|
179
188
|
"outputs": [
|
|
180
189
|
{
|
|
@@ -233,7 +242,7 @@
|
|
|
233
242
|
"level 0.663524 0.713597 0.606709"
|
|
234
243
|
]
|
|
235
244
|
},
|
|
236
|
-
"execution_count":
|
|
245
|
+
"execution_count": 6,
|
|
237
246
|
"metadata": {},
|
|
238
247
|
"output_type": "execute_result"
|
|
239
248
|
}
|
|
@@ -244,7 +253,7 @@
|
|
|
244
253
|
},
|
|
245
254
|
{
|
|
246
255
|
"cell_type": "code",
|
|
247
|
-
"execution_count":
|
|
256
|
+
"execution_count": 7,
|
|
248
257
|
"metadata": {},
|
|
249
258
|
"outputs": [
|
|
250
259
|
{
|
|
@@ -303,7 +312,7 @@
|
|
|
303
312
|
"level Enhance, bi- Enhance, bi- NaN"
|
|
304
313
|
]
|
|
305
314
|
},
|
|
306
|
-
"execution_count":
|
|
315
|
+
"execution_count": 7,
|
|
307
316
|
"metadata": {},
|
|
308
317
|
"output_type": "execute_result"
|
|
309
318
|
}
|
|
@@ -314,7 +323,7 @@
|
|
|
314
323
|
},
|
|
315
324
|
{
|
|
316
325
|
"cell_type": "code",
|
|
317
|
-
"execution_count":
|
|
326
|
+
"execution_count": 8,
|
|
318
327
|
"metadata": {},
|
|
319
328
|
"outputs": [
|
|
320
329
|
{
|
|
@@ -373,7 +382,7 @@
|
|
|
373
382
|
"level Y N NaN"
|
|
374
383
|
]
|
|
375
384
|
},
|
|
376
|
-
"execution_count":
|
|
385
|
+
"execution_count": 8,
|
|
377
386
|
"metadata": {},
|
|
378
387
|
"output_type": "execute_result"
|
|
379
388
|
}
|
|
@@ -384,7 +393,138 @@
|
|
|
384
393
|
},
|
|
385
394
|
{
|
|
386
395
|
"cell_type": "code",
|
|
387
|
-
"execution_count":
|
|
396
|
+
"execution_count": 9,
|
|
397
|
+
"metadata": {},
|
|
398
|
+
"outputs": [],
|
|
399
|
+
"source": [
|
|
400
|
+
"r = gd.risk_detector()"
|
|
401
|
+
]
|
|
402
|
+
},
|
|
403
|
+
{
|
|
404
|
+
"cell_type": "code",
|
|
405
|
+
"execution_count": 12,
|
|
406
|
+
"metadata": {},
|
|
407
|
+
"outputs": [
|
|
408
|
+
{
|
|
409
|
+
"data": {
|
|
410
|
+
"text/plain": [
|
|
411
|
+
"type\n",
|
|
412
|
+
"1 6.340000\n",
|
|
413
|
+
"2 6.687500\n",
|
|
414
|
+
"3 6.583279\n",
|
|
415
|
+
"5 5.843810\n",
|
|
416
|
+
"7 6.347073\n",
|
|
417
|
+
"Name: incidence, dtype: float64"
|
|
418
|
+
]
|
|
419
|
+
},
|
|
420
|
+
"execution_count": 12,
|
|
421
|
+
"metadata": {},
|
|
422
|
+
"output_type": "execute_result"
|
|
423
|
+
}
|
|
424
|
+
],
|
|
425
|
+
"source": [
|
|
426
|
+
"r['type']['risk']"
|
|
427
|
+
]
|
|
428
|
+
},
|
|
429
|
+
{
|
|
430
|
+
"cell_type": "code",
|
|
431
|
+
"execution_count": 13,
|
|
432
|
+
"metadata": {},
|
|
433
|
+
"outputs": [
|
|
434
|
+
{
|
|
435
|
+
"data": {
|
|
436
|
+
"text/html": [
|
|
437
|
+
"<div>\n",
|
|
438
|
+
"<style scoped>\n",
|
|
439
|
+
" .dataframe tbody tr th:only-of-type {\n",
|
|
440
|
+
" vertical-align: middle;\n",
|
|
441
|
+
" }\n",
|
|
442
|
+
"\n",
|
|
443
|
+
" .dataframe tbody tr th {\n",
|
|
444
|
+
" vertical-align: top;\n",
|
|
445
|
+
" }\n",
|
|
446
|
+
"\n",
|
|
447
|
+
" .dataframe thead th {\n",
|
|
448
|
+
" text-align: right;\n",
|
|
449
|
+
" }\n",
|
|
450
|
+
"</style>\n",
|
|
451
|
+
"<table border=\"1\" class=\"dataframe\">\n",
|
|
452
|
+
" <thead>\n",
|
|
453
|
+
" <tr style=\"text-align: right;\">\n",
|
|
454
|
+
" <th></th>\n",
|
|
455
|
+
" <th>1</th>\n",
|
|
456
|
+
" <th>2</th>\n",
|
|
457
|
+
" <th>3</th>\n",
|
|
458
|
+
" <th>5</th>\n",
|
|
459
|
+
" <th>7</th>\n",
|
|
460
|
+
" </tr>\n",
|
|
461
|
+
" </thead>\n",
|
|
462
|
+
" <tbody>\n",
|
|
463
|
+
" <tr>\n",
|
|
464
|
+
" <th>1</th>\n",
|
|
465
|
+
" <td>NaN</td>\n",
|
|
466
|
+
" <td>NaN</td>\n",
|
|
467
|
+
" <td>NaN</td>\n",
|
|
468
|
+
" <td>NaN</td>\n",
|
|
469
|
+
" <td>NaN</td>\n",
|
|
470
|
+
" </tr>\n",
|
|
471
|
+
" <tr>\n",
|
|
472
|
+
" <th>2</th>\n",
|
|
473
|
+
" <td>True</td>\n",
|
|
474
|
+
" <td>NaN</td>\n",
|
|
475
|
+
" <td>NaN</td>\n",
|
|
476
|
+
" <td>NaN</td>\n",
|
|
477
|
+
" <td>NaN</td>\n",
|
|
478
|
+
" </tr>\n",
|
|
479
|
+
" <tr>\n",
|
|
480
|
+
" <th>3</th>\n",
|
|
481
|
+
" <td>True</td>\n",
|
|
482
|
+
" <td>False</td>\n",
|
|
483
|
+
" <td>NaN</td>\n",
|
|
484
|
+
" <td>NaN</td>\n",
|
|
485
|
+
" <td>NaN</td>\n",
|
|
486
|
+
" </tr>\n",
|
|
487
|
+
" <tr>\n",
|
|
488
|
+
" <th>5</th>\n",
|
|
489
|
+
" <td>True</td>\n",
|
|
490
|
+
" <td>True</td>\n",
|
|
491
|
+
" <td>True</td>\n",
|
|
492
|
+
" <td>NaN</td>\n",
|
|
493
|
+
" <td>NaN</td>\n",
|
|
494
|
+
" </tr>\n",
|
|
495
|
+
" <tr>\n",
|
|
496
|
+
" <th>7</th>\n",
|
|
497
|
+
" <td>False</td>\n",
|
|
498
|
+
" <td>True</td>\n",
|
|
499
|
+
" <td>True</td>\n",
|
|
500
|
+
" <td>True</td>\n",
|
|
501
|
+
" <td>NaN</td>\n",
|
|
502
|
+
" </tr>\n",
|
|
503
|
+
" </tbody>\n",
|
|
504
|
+
"</table>\n",
|
|
505
|
+
"</div>"
|
|
506
|
+
],
|
|
507
|
+
"text/plain": [
|
|
508
|
+
" 1 2 3 5 7\n",
|
|
509
|
+
"1 NaN NaN NaN NaN NaN\n",
|
|
510
|
+
"2 True NaN NaN NaN NaN\n",
|
|
511
|
+
"3 True False NaN NaN NaN\n",
|
|
512
|
+
"5 True True True NaN NaN\n",
|
|
513
|
+
"7 False True True True NaN"
|
|
514
|
+
]
|
|
515
|
+
},
|
|
516
|
+
"execution_count": 13,
|
|
517
|
+
"metadata": {},
|
|
518
|
+
"output_type": "execute_result"
|
|
519
|
+
}
|
|
520
|
+
],
|
|
521
|
+
"source": [
|
|
522
|
+
"r['type']['ttest_stra']"
|
|
523
|
+
]
|
|
524
|
+
},
|
|
525
|
+
{
|
|
526
|
+
"cell_type": "code",
|
|
527
|
+
"execution_count": 16,
|
|
388
528
|
"metadata": {},
|
|
389
529
|
"outputs": [
|
|
390
530
|
{
|
|
@@ -2,8 +2,8 @@ import warnings
|
|
|
2
2
|
import numpy as np
|
|
3
3
|
import pandas as pd
|
|
4
4
|
from typing import Sequence
|
|
5
|
-
from scipy.stats import f, ncf
|
|
6
5
|
import matplotlib.pyplot as plt
|
|
6
|
+
from scipy.stats import f, levene, ncf, ttest_ind
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
from pathlib import Path
|
|
@@ -27,10 +27,11 @@ def _plot_value(ax, interaction_df, ecological_df, value_fontsize=10):
|
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
class GeoDetector(object):
|
|
30
|
-
def __init__(self, df: pd.DataFrame, y: str, factors: Sequence[str]):
|
|
30
|
+
def __init__(self, df: pd.DataFrame, y: str, factors: Sequence[str], alpha=0.05):
|
|
31
31
|
self.df = df
|
|
32
32
|
self.y = y
|
|
33
33
|
self.factors = factors
|
|
34
|
+
self.alpha = alpha
|
|
34
35
|
self._check_data(df, y, factors)
|
|
35
36
|
self.factor_df, self.interaction_df, self.ecological_df = None, None, None
|
|
36
37
|
|
|
@@ -155,11 +156,44 @@ class GeoDetector(object):
|
|
|
155
156
|
ssw2, _, _ = self._cal_ssw(self.df, self.y, self.factors[j])
|
|
156
157
|
dfd = self.df[self.factors[j]].notna().sum()-1
|
|
157
158
|
fval = (dfn*(dfd-1)*ssw1)/(dfd*(dfn-1)*ssw2)
|
|
158
|
-
if fval<f.ppf(
|
|
159
|
+
if fval<f.ppf(self.alpha, dfn, dfn):
|
|
159
160
|
self.ecological_df.loc[self.factors[i], self.factors[j]] = 'Y'
|
|
160
161
|
else:
|
|
161
162
|
self.ecological_df.loc[self.factors[i], self.factors[j]] = 'N'
|
|
162
163
|
return self.ecological_df
|
|
164
|
+
|
|
165
|
+
def risk_detector(self):
|
|
166
|
+
"""
|
|
167
|
+
Compares the difference of average values between sub-groups
|
|
168
|
+
Reference:
|
|
169
|
+
https://github.com/gsnrguo/QGIS-Geographical-detector/blob/main/gd_core/geodetector.py
|
|
170
|
+
"""
|
|
171
|
+
risk_result = dict()
|
|
172
|
+
for factor in self.factors:
|
|
173
|
+
risk_name = self.df.groupby(factor)[self.y].mean()
|
|
174
|
+
strata = np.sort(self.df[factor].unique())
|
|
175
|
+
t_test = np.empty((len(strata), len(strata)))
|
|
176
|
+
t_test.fill(np.nan)
|
|
177
|
+
t_test_strata = pd.DataFrame(t_test, index=strata, columns=strata)
|
|
178
|
+
for i in range(len(strata) - 1):
|
|
179
|
+
for j in range(i + 1, len(strata)):
|
|
180
|
+
y_i = self.df.loc[self.df[factor] == strata[i], [self.y]]
|
|
181
|
+
y_j = self.df.loc[self.df[factor] == strata[j], [self.y]]
|
|
182
|
+
y_i = np.array(y_i).reshape(-1)
|
|
183
|
+
y_j = np.array(y_j).reshape(-1)
|
|
184
|
+
# hypothesis testing of variance homogeneity
|
|
185
|
+
levene_result = levene(y_i, y_j)
|
|
186
|
+
if levene_result.pvalue < self.alpha:
|
|
187
|
+
# variance non-homogeneous
|
|
188
|
+
ttest_result = ttest_ind(y_i, y_j, equal_var=False)
|
|
189
|
+
else:
|
|
190
|
+
ttest_result = ttest_ind(y_i, y_j)
|
|
191
|
+
|
|
192
|
+
t_test_strata.iloc[j, i] = ttest_result.pvalue <= self.alpha
|
|
193
|
+
|
|
194
|
+
risk_factor = dict(risk=risk_name, ttest_stra=t_test_strata)
|
|
195
|
+
risk_result[factor] = risk_factor
|
|
196
|
+
return risk_result
|
|
163
197
|
|
|
164
198
|
def plot(self, tick_fontsize=10, value_fontsize=10, colorbar_fontsize=10, show=True):
|
|
165
199
|
if isinstance(self.interaction_df, type(None)):
|
|
@@ -188,4 +222,4 @@ class GeoDetector(object):
|
|
|
188
222
|
plt.show()
|
|
189
223
|
return ax
|
|
190
224
|
else:
|
|
191
|
-
return ax
|
|
225
|
+
return ax
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|