py-geodetector 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1 @@
1
+ from .geodetector import GeoDetector, load_example_data
@@ -0,0 +1,186 @@
1
+ incidence,type,region,level
2
+ 5.94,7,5,5
3
+ 5.87,5,5,5
4
+ 5.92,5,5,5
5
+ 6.32,1,7,1
6
+ 6.49,3,2,4
7
+ 6.46,3,2,4
8
+ 6.51,3,2,4
9
+ 6.7,3,2,4
10
+ 6.68,3,2,4
11
+ 6.65,3,2,4
12
+ 6.65,3,2,4
13
+ 6.6,3,2,4
14
+ 6.66,3,2,4
15
+ 5.86,5,5,5
16
+ 6.93,3,2,4
17
+ 6.5,3,2,4
18
+ 6.37,3,2,4
19
+ 6.64,3,2,4
20
+ 6.43,7,9,1
21
+ 7.01,2,9,1
22
+ 6.77,2,9,1
23
+ 6.36,7,9,1
24
+ 6.44,7,9,3
25
+ 6.52,7,9,1
26
+ 5.83,5,6,7
27
+ 6.32,7,9,1
28
+ 6.33,7,9,1
29
+ 6.46,7,9,1
30
+ 6.63,7,9,6
31
+ 6.53,3,9,1
32
+ 6.59,7,9,4
33
+ 6.58,2,9,1
34
+ 6.63,3,9,6
35
+ 6.61,2,9,1
36
+ 7.04,3,9,4
37
+ 5.89,5,6,5
38
+ 6.57,2,9,1
39
+ 6.53,3,9,4
40
+ 6.58,3,9,4
41
+ 6.53,3,9,1
42
+ 6.55,3,9,4
43
+ 6.57,2,9,1
44
+ 6.57,1,7,1
45
+ 6.49,2,7,4
46
+ 6.56,2,9,4
47
+ 6.57,1,7,1
48
+ 5.97,5,6,5
49
+ 6.54,2,7,4
50
+ 6.71,3,9,6
51
+ 6.58,3,9,1
52
+ 6.63,7,9,6
53
+ 7.42,3,2,6
54
+ 6.71,3,4,1
55
+ 6.82,3,2,6
56
+ 6.56,3,2,6
57
+ 6.82,3,2,6
58
+ 5.86,5,6,7
59
+ 6.8,3,2,6
60
+ 7.74,3,2,6
61
+ 6.92,3,2,6
62
+ 6.75,3,2,6
63
+ 7.06,3,2,6
64
+ 7.1,3,2,6
65
+ 6.69,3,4,6
66
+ 6.81,3,2,6
67
+ 6.99,3,2,6
68
+ 6.79,3,2,6
69
+ 5.98,5,6,5
70
+ 7.01,3,2,6
71
+ 7.2,3,2,6
72
+ 6.95,3,2,4
73
+ 6.74,3,4,6
74
+ 6.77,3,4,1
75
+ 6.73,3,4,6
76
+ 6.57,7,3,3
77
+ 7.55,7,3,3
78
+ 6.04,1,6,5
79
+ 6.28,7,3,3
80
+ 6.38,7,3,3
81
+ 6.21,7,3,3
82
+ 6.33,7,3,3
83
+ 6.19,7,3,3
84
+ 6.45,7,3,2
85
+ 6.19,7,3,2
86
+ 6.14,7,3,2
87
+ 6.2,7,3,2
88
+ 6.14,3,3,2
89
+ 5.95,1,5,5
90
+ 6.27,7,3,3
91
+ 6.14,3,3,2
92
+ 6.18,7,1,2
93
+ 6.09,7,1,2
94
+ 6.04,7,1,3
95
+ 6.01,7,5,2
96
+ 6.01,7,5,3
97
+ 6.09,7,1,3
98
+ 5.85,5,6,5
99
+ 5.88,7,5,5
100
+ 5.8,5,6,7
101
+ 5.78,5,6,7
102
+ 5.7,5,6,7
103
+ 5.82,5,6,7
104
+ 5.73,5,6,7
105
+ 5.79,5,6,7
106
+ 5.8,5,6,7
107
+ 6.45,1,8,3
108
+ 6.47,1,8,3
109
+ 6.11,3,8,1
110
+ 5.98,7,5,5
111
+ 6.12,1,8,3
112
+ 6.15,1,8,1
113
+ 6.04,3,8,1
114
+ 6.06,3,8,3
115
+ 5.95,3,8,1
116
+ 6.09,3,8,1
117
+ 6.11,3,8,1
118
+ 6.27,1,8,1
119
+ 6.28,1,8,1
120
+ 6.28,1,8,1
121
+ 5.96,1,5,5
122
+ 6.23,1,8,1
123
+ 6.53,1,8,1
124
+ 6.25,1,8,3
125
+ 6.14,3,1,3
126
+ 6.48,3,1,3
127
+ 6.02,1,1,3
128
+ 6.03,1,1,3
129
+ 6.13,3,1,3
130
+ 6.07,3,1,3
131
+ 6.09,3,1,3
132
+ 5.66,5,5,5
133
+ 6.2,3,1,1
134
+ 6.13,1,1,3
135
+ 5.99,1,1,3
136
+ 6.13,1,1,1
137
+ 6.01,7,1,3
138
+ 6.11,1,1,3
139
+ 6.17,1,1,1
140
+ 6.19,3,1,1
141
+ 6.18,1,1,3
142
+ 6.09,1,1,3
143
+ 5.74,5,5,5
144
+ 6.19,7,1,3
145
+ 6.29,1,1,3
146
+ 6.25,1,1,1
147
+ 6.24,1,1,1
148
+ 6.33,3,1,1
149
+ 6.23,3,1,3
150
+ 6.35,1,8,1
151
+ 6.32,1,1,1
152
+ 6.37,1,1,1
153
+ 6.21,1,1,1
154
+ 5.88,5,5,5
155
+ 6.2,1,1,1
156
+ 6.58,1,8,1
157
+ 6.44,1,8,1
158
+ 6.48,1,8,1
159
+ 7.4,2,8,1
160
+ 6.38,1,7,1
161
+ 6.44,1,8,1
162
+ 6.65,1,8,1
163
+ 6.86,1,8,1
164
+ 6.71,1,3,1
165
+ 6.1,5,5,5
166
+ 6.88,7,3,1
167
+ 6.72,7,3,1
168
+ 6.3,7,3,1
169
+ 6.65,1,7,1
170
+ 6.34,1,8,1
171
+ 7.22,1,3,1
172
+ 6.5,1,3,3
173
+ 6.52,1,3,1
174
+ 6.53,7,3,1
175
+ 6.43,7,3,1
176
+ 5.89,5,5,5
177
+ 6.5,7,3,3
178
+ 6.48,7,3,3
179
+ 6.5,7,3,1
180
+ 6.38,1,7,1
181
+ 6.18,3,1,3
182
+ 6.39,1,7,1
183
+ 6.45,1,8,1
184
+ 6.86,2,7,1
185
+ 6.29,2,7,1
186
+ 6.49,1,7,1
@@ -0,0 +1,191 @@
1
+ import warnings
2
+ import numpy as np
3
+ import pandas as pd
4
+ from typing import Sequence
5
+ from scipy.stats import f, ncf
6
+ import matplotlib.pyplot as plt
7
+
8
+
9
+ from pathlib import Path
10
+ def load_example_data():
11
+ file_path = Path(__file__).parent / "example_data" / "disease.csv"
12
+ df = pd.read_csv(file_path)
13
+ return df
14
+
15
+
16
+ def _plot_value(ax, interaction_df, ecological_df, value_fontsize=10):
17
+ length = len(interaction_df.index)
18
+ for i in range(length):
19
+ for j in range(length):
20
+ if not pd.isna(interaction_df.iloc[i, j]):
21
+ num = str(round(interaction_df.iloc[i, j], 2))
22
+ mark = num[-2:] if 3 == len(num) else num[-3:]
23
+ if 'Y'==ecological_df.iloc[i, j]:
24
+ ax.text(j, i, mark, ha="center", va="center", color="r", fontsize=value_fontsize)
25
+ else:
26
+ ax.text(j, i, mark, ha="center", va="center", color="k", fontsize=value_fontsize)
27
+
28
+
29
+ class GeoDetector(object):
30
+ def __init__(self, df: pd.DataFrame, y: str, factors: Sequence[str]):
31
+ self.df = df
32
+ self.y = y
33
+ self.factors = factors
34
+ self._check_data(df, y, factors)
35
+ self.factor_df, self.interaction_df, self.ecological_df = None, None, None
36
+
37
+ def _check_data(self, df, y, factors):
38
+ for factor in factors:
39
+ if not factor in df.columns:
40
+ raise ValueError('Factor [{}] is not in data')
41
+
42
+ for factor in factors:
43
+ # 检查列的数据类型
44
+ if df[factor].dtype not in ['int64', 'int32', 'int16', 'int8',
45
+ 'uint64', 'uint32', 'uint16', 'uint8',
46
+ 'object', 'string']:
47
+ # 如果数据类型不是整型或字符型,发出警告
48
+ warnings.warn(f"Factor '{factor}' is not of type 'int' or 'str'.")
49
+
50
+ if y not in df.columns:
51
+ raise ValueError('Factor [{}] is not in data')
52
+
53
+ for factor in factors:
54
+ if y==factor:
55
+ raise ValueError("Y variable should not in Factor variables. ")
56
+
57
+ has_null = df.isnull().values.any()
58
+ if has_null:
59
+ raise ValueError("data hava some objects with value NULL")
60
+
61
+ @classmethod
62
+ def _cal_ssw(self, df: pd.DataFrame, y, factor, extra_factor=None):
63
+ def cal_ssw(df: pd.DataFrame, y):
64
+ length = df.shape[0]
65
+ if length==1:
66
+ strataVar = 0
67
+ lamda_1st = np.square(df[y].values[0])
68
+ lamda_2nd = df[y].values[0]
69
+ else:
70
+ strataVar = (length-1) * df[y].var(ddof=1)
71
+
72
+ lamda_1st = np.square(df[y].values.mean())
73
+ lamda_2nd = np.sqrt(length) * df[y].values.mean()
74
+ return strataVar, lamda_1st, lamda_2nd
75
+ if extra_factor==None:
76
+ df2 = df[[y, factor]].groupby(factor).apply(cal_ssw, y=y)
77
+ else:
78
+ df2 = df[[y]+list(set([factor, extra_factor]))].groupby([factor, extra_factor]).apply(cal_ssw, y=y)
79
+ df2 = df2.apply(pd.Series)
80
+ df2 = df2.sum()
81
+ strataVarSum, lamda_1st_sum, lamda_2nd_sum = df2.values
82
+ return strataVarSum, lamda_1st_sum, lamda_2nd_sum
83
+
84
+ @classmethod
85
+ def _cal_q(self, df, y, factor, extra_factor=None):
86
+ strataVarSum, lamda_1st_sum, lamda_2nd_sum = self._cal_ssw(df, y, factor, extra_factor)
87
+ TotalVar = (df.shape[0]-1)*df[y].var(ddof=1)
88
+ q = 1 - strataVarSum/TotalVar
89
+ return q, lamda_1st_sum, lamda_2nd_sum
90
+
91
+ def factor_dector(self):
92
+ self.factor_df = pd.DataFrame(index=["q statistic", "p value"], columns=self.factors, dtype="float32")
93
+ N_var = self.df[self.y].var(ddof=1)
94
+ N_popu = self.df.shape[0]
95
+ for factor in self.factors:
96
+ N_stra = self.df[factor].unique().shape[0]
97
+ q, lamda_1st_sum, lamda_2nd_sum = self._cal_q(self.df, self.y, factor)
98
+
99
+ #lamda value
100
+ lamda = (lamda_1st_sum - np.square(lamda_2nd_sum) / N_popu) / N_var
101
+ # F value
102
+ F_value = (N_popu - N_stra)* q / ((N_stra - 1)* (1 - q))
103
+ #p value
104
+ p_value = ncf.sf(F_value, N_stra - 1, N_popu - N_stra, nc=lamda)
105
+
106
+ self.factor_df.loc["q statistic", factor] = q
107
+ self.factor_df.loc["p value", factor] = p_value
108
+ return self.factor_df
109
+
110
+ @classmethod
111
+ def _interaction_relationship(self, df):
112
+ out_df = pd.DataFrame(index=df.index, columns=df.columns)
113
+ length = len(df.index)
114
+ for i in range(length):
115
+ for j in range(i+1, length):
116
+ factor1, factor2 = df.index[i], df.index[j]
117
+ i_q = df.loc[factor2, factor1]
118
+ q1 = df.loc[factor1, factor1]
119
+ q2 = df.loc[factor2, factor2]
120
+
121
+ if (i_q <= q1 and i_q <= q2):
122
+ outputRls = "Weaken, nonlinear"
123
+ if (i_q < max(q1, q2) and i_q > min(q1, q2)):
124
+ outputRls = "Weaken, uni-"
125
+ if (i_q == (q1 + q2)):
126
+ outputRls = "Independent"
127
+ if (i_q > max(q1, q2)):
128
+ outputRls = "Enhance, bi-"
129
+ if (i_q > (q1 + q2)):
130
+ outputRls = "Enhance, nonlinear"
131
+
132
+ out_df.loc[factor2, factor1] = outputRls
133
+ return out_df
134
+
135
+ def interaction_detector(self, relationship=False):
136
+ self.interaction_df = pd.DataFrame(index=self.factors, columns=self.factors, dtype="float32")
137
+ length = len(self.factors)
138
+ for i in range(0, length):
139
+ for j in range(0, i+1):
140
+ q, _, _ = self._cal_q(self.df, self.y, self.factors[i], self.factors[j])
141
+ self.interaction_df.loc[self.factors[i], self.factors[j]] = q
142
+
143
+ if relationship:
144
+ self.interaction_relationship_df = self._interaction_relationship(self.interaction_df)
145
+ return self.interaction_df, self.interaction_relationship_df
146
+ return self.interaction_df
147
+
148
+ def ecological_detector(self):
149
+ self.ecological_df = pd.DataFrame(index=self.factors, columns=self.factors, dtype="float32")
150
+ length = len(self.factors)
151
+ for i in range(1, length):
152
+ ssw1, _, _ = self._cal_ssw(self.df, self.y, self.factors[i])
153
+ dfn = self.df[self.factors[i]].notna().sum()-1
154
+ for j in range(0, i):
155
+ ssw2, _, _ = self._cal_ssw(self.df, self.y, self.factors[j])
156
+ dfd = self.df[self.factors[j]].notna().sum()-1
157
+ fval = (dfn*(dfd-1)*ssw1)/(dfd*(dfn-1)*ssw2)
158
+ if fval<f.ppf(0.05, dfn, dfn):
159
+ self.ecological_df.loc[self.factors[i], self.factors[j]] = 'Y'
160
+ else:
161
+ self.ecological_df.loc[self.factors[i], self.factors[j]] = 'N'
162
+ return self.ecological_df
163
+
164
+ def plot(self, tick_fontsize=10, value_fontsize=10, colorbar_fontsize=10, show=True):
165
+ if isinstance(self.interaction_df, type(None)):
166
+ self.interaction_detector()
167
+ if isinstance(self.ecological_df, type(None)):
168
+ self.ecological_detector()
169
+
170
+ fig, ax = plt.subplots(constrained_layout=True)
171
+
172
+ im = ax.imshow(self.interaction_df.values, cmap="YlGnBu", vmin=0, vmax=1)
173
+ _plot_value(ax, self.interaction_df, self.ecological_df, value_fontsize=value_fontsize)
174
+
175
+ ax.set_xticks(np.arange(len(self.factors)))
176
+ ax.set_yticks(np.arange(len(self.factors)))
177
+ ax.spines['top'].set_visible(False)
178
+ ax.spines['right'].set_visible(False)
179
+
180
+ ax.set_xticklabels(self.factors, fontsize=tick_fontsize)
181
+ ax.set_yticklabels(self.factors, rotation=45, fontsize=tick_fontsize)
182
+ ax.tick_params(axis='y', pad=0.1)
183
+
184
+ colorbar = fig.colorbar(im, ax=ax, shrink=0.9, pad=0.01, aspect=25, extend="both")
185
+ colorbar.ax.tick_params(labelsize=colorbar_fontsize)
186
+
187
+ if show:
188
+ plt.show()
189
+ return ax
190
+ else:
191
+ return ax
@@ -0,0 +1,63 @@
1
+ Metadata-Version: 2.3
2
+ Name: py_geodetector
3
+ Version: 0.1.1
4
+ Summary: A simple Python package for the geodetector
5
+ Project-URL: Homepage, https://github.com/djw-easy/GeoDetector
6
+ Project-URL: Bug Tracker, https://github.com/djw-easy/GeoDetector/issues
7
+ Author-email: djw <djweasy@163.com>
8
+ Classifier: License :: OSI Approved :: MIT License
9
+ Classifier: Operating System :: OS Independent
10
+ Classifier: Programming Language :: Python :: 3
11
+ Requires-Python: >=3.7
12
+ Description-Content-Type: text/markdown
13
+
14
+ # A simple Python package for the geodetector
15
+
16
+ # Install
17
+
18
+ ```
19
+ pip install py-geodetector
20
+ ```
21
+
22
+ # Usage
23
+
24
+ A quick example of geodetector usage is given in the ./example.ipynb.
25
+
26
+ ```python
27
+ from py_geodetector import load_example_data, GeoDetector
28
+
29
+ # load example data
30
+ df = load_example_data()
31
+
32
+ gd = GeoDetector(df)
33
+ # factor detect
34
+ factor_df = gd.factor_dector()
35
+
36
+ # interaction detect
37
+ interaction_df = gd.interaction_detector()
38
+ # or you can generate the interaction relationship as the same time
39
+ interaction_df, interaction_relationship_df = gd.interaction_detector(relationship=True)
40
+
41
+ # ecological detect
42
+ ecological_df = gd.ecological_detector()
43
+
44
+ # plot
45
+ # use a heatmap visualize the interaction detect result,
46
+ # red text means that the ecological detection results show a significant difference
47
+ gd.plot(value_fontsize=14, tick_fontsize=16, colorbar_fontsize=14);
48
+ ```
49
+
50
+ # Reference
51
+
52
+ ```
53
+ @article{wang2010geographical,
54
+ title={Geographical detectors-based health risk assessment and its application in the neural tube defects study of the Heshun Region, China},
55
+ author={Wang, Jin-Feng and Li, Xin-Hu and Christakos, George and Liao, Yi-Lan and Zhang, Tin and Gu, Xue and Zheng, Xiao-Ying},
56
+ journal={International Journal of Geographical Information Science},
57
+ volume={24},
58
+ number={1},
59
+ pages={107-127},
60
+ year={2010},
61
+ publisher={Taylor \& Francis}
62
+ }
63
+ ```
@@ -0,0 +1,6 @@
1
+ py_geodetector/__init__.py,sha256=bhyJq1ipXNRhG-zPbF_5YUbJ_sana5rJ-FG-ogBNh_U,57
2
+ py_geodetector/geodetector.py,sha256=R-3OlNrgghwZTdieIbhB5J3N3nl2-ix6bjwjyVmh1E8,8240
3
+ py_geodetector/example_data/disease.csv,sha256=sodkE21Xw-eSlGWxLE1by7hSHQk2FjNxb4ncWdyWLmE,2231
4
+ py_geodetector-0.1.1.dist-info/METADATA,sha256=ILLj1V2F_SoPoiJCdaAjmRCT_W4gSeLqCeKAmoznUL0,1853
5
+ py_geodetector-0.1.1.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
6
+ py_geodetector-0.1.1.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.25.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any