py-geodetector 0.1.1__tar.gz → 0.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: py_geodetector
3
- Version: 0.1.1
3
+ Version: 0.1.3
4
4
  Summary: A simple Python package for the geodetector
5
5
  Project-URL: Homepage, https://github.com/djw-easy/GeoDetector
6
6
  Project-URL: Bug Tracker, https://github.com/djw-easy/GeoDetector/issues
@@ -41,6 +41,9 @@ interaction_df, interaction_relationship_df = gd.interaction_detector(relationsh
41
41
  # ecological detect
42
42
  ecological_df = gd.ecological_detector()
43
43
 
44
+ # risk detect
45
+ risk_result = gd.risk_detector()
46
+
44
47
  # plot
45
48
  # use a heatmap visualize the interaction detect result,
46
49
  # red text means that the ecological detection results show a significant difference
@@ -28,6 +28,9 @@ interaction_df, interaction_relationship_df = gd.interaction_detector(relationsh
28
28
  # ecological detect
29
29
  ecological_df = gd.ecological_detector()
30
30
 
31
+ # risk detect
32
+ risk_result = gd.risk_detector()
33
+
31
34
  # plot
32
35
  # use a heatmap visualize the interaction detect result,
33
36
  # red text means that the ecological detection results show a significant difference
@@ -13,6 +13,15 @@
13
13
  "cell_type": "code",
14
14
  "execution_count": 2,
15
15
  "metadata": {},
16
+ "outputs": [],
17
+ "source": [
18
+ "from src.py_geodetector import GeoDetector, load_example_data"
19
+ ]
20
+ },
21
+ {
22
+ "cell_type": "code",
23
+ "execution_count": 3,
24
+ "metadata": {},
16
25
  "outputs": [
17
26
  {
18
27
  "data": {
@@ -90,7 +99,7 @@
90
99
  "4 6.49 3 2 4"
91
100
  ]
92
101
  },
93
- "execution_count": 2,
102
+ "execution_count": 3,
94
103
  "metadata": {},
95
104
  "output_type": "execute_result"
96
105
  }
@@ -102,7 +111,7 @@
102
111
  },
103
112
  {
104
113
  "cell_type": "code",
105
- "execution_count": 3,
114
+ "execution_count": 4,
106
115
  "metadata": {},
107
116
  "outputs": [],
108
117
  "source": [
@@ -111,7 +120,7 @@
111
120
  },
112
121
  {
113
122
  "cell_type": "code",
114
- "execution_count": 4,
123
+ "execution_count": 5,
115
124
  "metadata": {},
116
125
  "outputs": [
117
126
  {
@@ -163,7 +172,7 @@
163
172
  "p value 0.363236 0.000117 0.040804"
164
173
  ]
165
174
  },
166
- "execution_count": 4,
175
+ "execution_count": 5,
167
176
  "metadata": {},
168
177
  "output_type": "execute_result"
169
178
  }
@@ -174,7 +183,7 @@
174
183
  },
175
184
  {
176
185
  "cell_type": "code",
177
- "execution_count": 5,
186
+ "execution_count": 6,
178
187
  "metadata": {},
179
188
  "outputs": [
180
189
  {
@@ -233,7 +242,7 @@
233
242
  "level 0.663524 0.713597 0.606709"
234
243
  ]
235
244
  },
236
- "execution_count": 5,
245
+ "execution_count": 6,
237
246
  "metadata": {},
238
247
  "output_type": "execute_result"
239
248
  }
@@ -244,7 +253,7 @@
244
253
  },
245
254
  {
246
255
  "cell_type": "code",
247
- "execution_count": 6,
256
+ "execution_count": 7,
248
257
  "metadata": {},
249
258
  "outputs": [
250
259
  {
@@ -303,7 +312,7 @@
303
312
  "level Enhance, bi- Enhance, bi- NaN"
304
313
  ]
305
314
  },
306
- "execution_count": 6,
315
+ "execution_count": 7,
307
316
  "metadata": {},
308
317
  "output_type": "execute_result"
309
318
  }
@@ -314,7 +323,7 @@
314
323
  },
315
324
  {
316
325
  "cell_type": "code",
317
- "execution_count": 7,
326
+ "execution_count": 8,
318
327
  "metadata": {},
319
328
  "outputs": [
320
329
  {
@@ -373,7 +382,7 @@
373
382
  "level Y N NaN"
374
383
  ]
375
384
  },
376
- "execution_count": 7,
385
+ "execution_count": 8,
377
386
  "metadata": {},
378
387
  "output_type": "execute_result"
379
388
  }
@@ -384,7 +393,138 @@
384
393
  },
385
394
  {
386
395
  "cell_type": "code",
387
- "execution_count": 8,
396
+ "execution_count": 9,
397
+ "metadata": {},
398
+ "outputs": [],
399
+ "source": [
400
+ "r = gd.risk_detector()"
401
+ ]
402
+ },
403
+ {
404
+ "cell_type": "code",
405
+ "execution_count": 12,
406
+ "metadata": {},
407
+ "outputs": [
408
+ {
409
+ "data": {
410
+ "text/plain": [
411
+ "type\n",
412
+ "1 6.340000\n",
413
+ "2 6.687500\n",
414
+ "3 6.583279\n",
415
+ "5 5.843810\n",
416
+ "7 6.347073\n",
417
+ "Name: incidence, dtype: float64"
418
+ ]
419
+ },
420
+ "execution_count": 12,
421
+ "metadata": {},
422
+ "output_type": "execute_result"
423
+ }
424
+ ],
425
+ "source": [
426
+ "r['type']['risk']"
427
+ ]
428
+ },
429
+ {
430
+ "cell_type": "code",
431
+ "execution_count": 13,
432
+ "metadata": {},
433
+ "outputs": [
434
+ {
435
+ "data": {
436
+ "text/html": [
437
+ "<div>\n",
438
+ "<style scoped>\n",
439
+ " .dataframe tbody tr th:only-of-type {\n",
440
+ " vertical-align: middle;\n",
441
+ " }\n",
442
+ "\n",
443
+ " .dataframe tbody tr th {\n",
444
+ " vertical-align: top;\n",
445
+ " }\n",
446
+ "\n",
447
+ " .dataframe thead th {\n",
448
+ " text-align: right;\n",
449
+ " }\n",
450
+ "</style>\n",
451
+ "<table border=\"1\" class=\"dataframe\">\n",
452
+ " <thead>\n",
453
+ " <tr style=\"text-align: right;\">\n",
454
+ " <th></th>\n",
455
+ " <th>1</th>\n",
456
+ " <th>2</th>\n",
457
+ " <th>3</th>\n",
458
+ " <th>5</th>\n",
459
+ " <th>7</th>\n",
460
+ " </tr>\n",
461
+ " </thead>\n",
462
+ " <tbody>\n",
463
+ " <tr>\n",
464
+ " <th>1</th>\n",
465
+ " <td>NaN</td>\n",
466
+ " <td>NaN</td>\n",
467
+ " <td>NaN</td>\n",
468
+ " <td>NaN</td>\n",
469
+ " <td>NaN</td>\n",
470
+ " </tr>\n",
471
+ " <tr>\n",
472
+ " <th>2</th>\n",
473
+ " <td>True</td>\n",
474
+ " <td>NaN</td>\n",
475
+ " <td>NaN</td>\n",
476
+ " <td>NaN</td>\n",
477
+ " <td>NaN</td>\n",
478
+ " </tr>\n",
479
+ " <tr>\n",
480
+ " <th>3</th>\n",
481
+ " <td>True</td>\n",
482
+ " <td>False</td>\n",
483
+ " <td>NaN</td>\n",
484
+ " <td>NaN</td>\n",
485
+ " <td>NaN</td>\n",
486
+ " </tr>\n",
487
+ " <tr>\n",
488
+ " <th>5</th>\n",
489
+ " <td>True</td>\n",
490
+ " <td>True</td>\n",
491
+ " <td>True</td>\n",
492
+ " <td>NaN</td>\n",
493
+ " <td>NaN</td>\n",
494
+ " </tr>\n",
495
+ " <tr>\n",
496
+ " <th>7</th>\n",
497
+ " <td>False</td>\n",
498
+ " <td>True</td>\n",
499
+ " <td>True</td>\n",
500
+ " <td>True</td>\n",
501
+ " <td>NaN</td>\n",
502
+ " </tr>\n",
503
+ " </tbody>\n",
504
+ "</table>\n",
505
+ "</div>"
506
+ ],
507
+ "text/plain": [
508
+ " 1 2 3 5 7\n",
509
+ "1 NaN NaN NaN NaN NaN\n",
510
+ "2 True NaN NaN NaN NaN\n",
511
+ "3 True False NaN NaN NaN\n",
512
+ "5 True True True NaN NaN\n",
513
+ "7 False True True True NaN"
514
+ ]
515
+ },
516
+ "execution_count": 13,
517
+ "metadata": {},
518
+ "output_type": "execute_result"
519
+ }
520
+ ],
521
+ "source": [
522
+ "r['type']['ttest_stra']"
523
+ ]
524
+ },
525
+ {
526
+ "cell_type": "code",
527
+ "execution_count": 16,
388
528
  "metadata": {},
389
529
  "outputs": [
390
530
  {
@@ -11,7 +11,7 @@ build-backend = "hatchling.build"
11
11
 
12
12
  [project]
13
13
  name = "py_geodetector"
14
- version = "0.1.1"
14
+ version = "0.1.3"
15
15
  authors = [
16
16
  { name = "djw", email = "djweasy@163.com" },
17
17
  ]
@@ -2,8 +2,8 @@ import warnings
2
2
  import numpy as np
3
3
  import pandas as pd
4
4
  from typing import Sequence
5
- from scipy.stats import f, ncf
6
5
  import matplotlib.pyplot as plt
6
+ from scipy.stats import f, levene, ncf, ttest_ind
7
7
 
8
8
 
9
9
  from pathlib import Path
@@ -27,10 +27,11 @@ def _plot_value(ax, interaction_df, ecological_df, value_fontsize=10):
27
27
 
28
28
 
29
29
  class GeoDetector(object):
30
- def __init__(self, df: pd.DataFrame, y: str, factors: Sequence[str]):
30
+ def __init__(self, df: pd.DataFrame, y: str, factors: Sequence[str], alpha=0.05):
31
31
  self.df = df
32
32
  self.y = y
33
33
  self.factors = factors
34
+ self.alpha = alpha
34
35
  self._check_data(df, y, factors)
35
36
  self.factor_df, self.interaction_df, self.ecological_df = None, None, None
36
37
 
@@ -155,11 +156,44 @@ class GeoDetector(object):
155
156
  ssw2, _, _ = self._cal_ssw(self.df, self.y, self.factors[j])
156
157
  dfd = self.df[self.factors[j]].notna().sum()-1
157
158
  fval = (dfn*(dfd-1)*ssw1)/(dfd*(dfn-1)*ssw2)
158
- if fval<f.ppf(0.05, dfn, dfn):
159
+ if fval<f.ppf(self.alpha, dfn, dfn):
159
160
  self.ecological_df.loc[self.factors[i], self.factors[j]] = 'Y'
160
161
  else:
161
162
  self.ecological_df.loc[self.factors[i], self.factors[j]] = 'N'
162
163
  return self.ecological_df
164
+
165
+ def risk_detector(self):
166
+ """
167
+ Compares the difference of average values between sub-groups
168
+ Reference:
169
+ https://github.com/gsnrguo/QGIS-Geographical-detector/blob/main/gd_core/geodetector.py
170
+ """
171
+ risk_result = dict()
172
+ for factor in self.factors:
173
+ risk_name = self.df.groupby(factor)[self.y].mean()
174
+ strata = np.sort(self.df[factor].unique())
175
+ t_test = np.empty((len(strata), len(strata)))
176
+ t_test.fill(np.nan)
177
+ t_test_strata = pd.DataFrame(t_test, index=strata, columns=strata)
178
+ for i in range(len(strata) - 1):
179
+ for j in range(i + 1, len(strata)):
180
+ y_i = self.df.loc[self.df[factor] == strata[i], [self.y]]
181
+ y_j = self.df.loc[self.df[factor] == strata[j], [self.y]]
182
+ y_i = np.array(y_i).reshape(-1)
183
+ y_j = np.array(y_j).reshape(-1)
184
+ # hypothesis testing of variance homogeneity
185
+ levene_result = levene(y_i, y_j)
186
+ if levene_result.pvalue < self.alpha:
187
+ # variance non-homogeneous
188
+ ttest_result = ttest_ind(y_i, y_j, equal_var=False)
189
+ else:
190
+ ttest_result = ttest_ind(y_i, y_j)
191
+
192
+ t_test_strata.iloc[j, i] = ttest_result.pvalue <= self.alpha
193
+
194
+ risk_factor = dict(risk=risk_name, ttest_stra=t_test_strata)
195
+ risk_result[factor] = risk_factor
196
+ return risk_result
163
197
 
164
198
  def plot(self, tick_fontsize=10, value_fontsize=10, colorbar_fontsize=10, show=True):
165
199
  if isinstance(self.interaction_df, type(None)):
@@ -188,4 +222,4 @@ class GeoDetector(object):
188
222
  plt.show()
189
223
  return ax
190
224
  else:
191
- return ax
225
+ return ax