PyPI - py-geodetector - Versions diffs - 0.1.1__tar.gz → 0.1.3__tar.gz - Mend

py-geodetector 0.1.1tar.gz → 0.1.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

{py_geodetector-0.1.1 → py_geodetector-0.1.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: py_geodetector
-Version: 0.1.1
+Version: 0.1.3
 Summary: A simple Python package for the geodetector
 Project-URL: Homepage, https://github.com/djw-easy/GeoDetector
 Project-URL: Bug Tracker, https://github.com/djw-easy/GeoDetector/issues
@@ -41,6 +41,9 @@ interaction_df, interaction_relationship_df = gd.interaction_detector(relationsh
 # ecological detect
 ecological_df = gd.ecological_detector()
+# risk detect
+risk_result = gd.risk_detector()
 # plot
 # use a heatmap visualize the interaction detect result,
 # red text means that the ecological detection results show a significant difference

{py_geodetector-0.1.1 → py_geodetector-0.1.3}/README.md RENAMED Viewed

@@ -28,6 +28,9 @@ interaction_df, interaction_relationship_df = gd.interaction_detector(relationsh
 # ecological detect
 ecological_df = gd.ecological_detector()
+# risk detect
+risk_result = gd.risk_detector()
 # plot
 # use a heatmap visualize the interaction detect result,
 # red text means that the ecological detection results show a significant difference

{py_geodetector-0.1.1 → py_geodetector-0.1.3}/example.ipynb RENAMED Viewed

@@ -13,6 +13,15 @@
    "cell_type": "code",
    "execution_count": 2,
    "metadata": {},
+   "outputs": [],
+   "source": [
+    "from src.py_geodetector import GeoDetector, load_example_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -90,7 +99,7 @@
        "4       6.49     3       2      4"
       ]
      },
-     "execution_count": 2,
+     "execution_count": 3,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -102,7 +111,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -111,7 +120,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
@@ -163,7 +172,7 @@
        "p value      0.363236  0.000117  0.040804"
       ]
      },
-     "execution_count": 4,
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -174,7 +183,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
@@ -233,7 +242,7 @@
        "level   0.663524  0.713597  0.606709"
       ]
      },
-     "execution_count": 5,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -244,7 +253,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
@@ -303,7 +312,7 @@
        "level   Enhance, bi-  Enhance, bi-   NaN"
       ]
      },
-     "execution_count": 6,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -314,7 +323,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
@@ -373,7 +382,7 @@
        "level     Y      N    NaN"
       ]
      },
-     "execution_count": 7,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -384,7 +393,138 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "r = gd.risk_detector()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "type\n",
+       "1    6.340000\n",
+       "2    6.687500\n",
+       "3    6.583279\n",
+       "5    5.843810\n",
+       "7    6.347073\n",
+       "Name: incidence, dtype: float64"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "r['type']['risk']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>1</th>\n",
+       "      <th>2</th>\n",
+       "      <th>3</th>\n",
+       "      <th>5</th>\n",
+       "      <th>7</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>True</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>True</td>\n",
+       "      <td>False</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>True</td>\n",
+       "      <td>True</td>\n",
+       "      <td>True</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>False</td>\n",
+       "      <td>True</td>\n",
+       "      <td>True</td>\n",
+       "      <td>True</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       1      2     3     5   7\n",
+       "1    NaN    NaN   NaN   NaN NaN\n",
+       "2   True    NaN   NaN   NaN NaN\n",
+       "3   True  False   NaN   NaN NaN\n",
+       "5   True   True  True   NaN NaN\n",
+       "7  False   True  True  True NaN"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "r['type']['ttest_stra']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
    "metadata": {},
    "outputs": [
     {

{py_geodetector-0.1.1 → py_geodetector-0.1.3}/pyproject.toml RENAMED Viewed

@@ -11,7 +11,7 @@ build-backend = "hatchling.build"
 [project]
 name = "py_geodetector"
-version = "0.1.1"
+version = "0.1.3"
 authors = [
     { name = "djw", email = "djweasy@163.com" },
 ]

{py_geodetector-0.1.1 → py_geodetector-0.1.3}/src/py_geodetector/geodetector.py RENAMED Viewed

@@ -2,8 +2,8 @@ import warnings
 import numpy as np
 import pandas as pd
 from typing import Sequence
-from scipy.stats import f, ncf
 import matplotlib.pyplot as plt
+from scipy.stats import f, levene, ncf, ttest_ind
 from pathlib import Path
@@ -27,10 +27,11 @@ def _plot_value(ax, interaction_df, ecological_df, value_fontsize=10):
 class GeoDetector(object):
-    def __init__(self, df: pd.DataFrame, y: str, factors: Sequence[str]):
+    def __init__(self, df: pd.DataFrame, y: str, factors: Sequence[str], alpha=0.05):
         self.df = df
         self.y = y
         self.factors = factors
+        self.alpha = alpha
         self._check_data(df, y, factors)
         self.factor_df, self.interaction_df, self.ecological_df = None, None, None
@@ -155,11 +156,44 @@ class GeoDetector(object):
                 ssw2, _, _ = self._cal_ssw(self.df, self.y, self.factors[j])
                 dfd = self.df[self.factors[j]].notna().sum()-1
                 fval = (dfn*(dfd-1)*ssw1)/(dfd*(dfn-1)*ssw2)
-                if fval<f.ppf(0.05, dfn, dfn):
+                if fval<f.ppf(self.alpha, dfn, dfn):
                     self.ecological_df.loc[self.factors[i], self.factors[j]] = 'Y'
                 else:
                     self.ecological_df.loc[self.factors[i], self.factors[j]] = 'N'
         return self.ecological_df
+    def risk_detector(self):
+        """
+        Compares the difference of average values between sub-groups
+        Reference:
+            https://github.com/gsnrguo/QGIS-Geographical-detector/blob/main/gd_core/geodetector.py
+        """
+        risk_result = dict()
+        for factor in self.factors:
+            risk_name = self.df.groupby(factor)[self.y].mean()
+            strata = np.sort(self.df[factor].unique())
+            t_test = np.empty((len(strata), len(strata)))
+            t_test.fill(np.nan)
+            t_test_strata = pd.DataFrame(t_test, index=strata, columns=strata)
+            for i in range(len(strata) - 1):
+                for j in range(i + 1, len(strata)):
+                    y_i = self.df.loc[self.df[factor] == strata[i], [self.y]]
+                    y_j = self.df.loc[self.df[factor] == strata[j], [self.y]]
+                    y_i = np.array(y_i).reshape(-1)
+                    y_j = np.array(y_j).reshape(-1)
+                    # hypothesis testing of variance homogeneity
+                    levene_result = levene(y_i, y_j)
+                    if levene_result.pvalue < self.alpha:
+                        # variance non-homogeneous
+                        ttest_result = ttest_ind(y_i, y_j, equal_var=False)
+                    else:
+                        ttest_result = ttest_ind(y_i, y_j)
+                    t_test_strata.iloc[j, i] = ttest_result.pvalue <= self.alpha
+            risk_factor = dict(risk=risk_name, ttest_stra=t_test_strata)
+            risk_result[factor] = risk_factor
+        return risk_result
     def plot(self, tick_fontsize=10, value_fontsize=10, colorbar_fontsize=10, show=True):
         if isinstance(self.interaction_df, type(None)):
@@ -188,4 +222,4 @@ class GeoDetector(object):
             plt.show()
             return ax
         else:
-            return ax
+            return ax

{py_geodetector-0.1.1 → py_geodetector-0.1.3}/.gitignore RENAMED Viewed

File without changes

{py_geodetector-0.1.1 → py_geodetector-0.1.3}/src/__init__.py RENAMED Viewed

File without changes

{py_geodetector-0.1.1 → py_geodetector-0.1.3}/src/py_geodetector/__init__.py RENAMED Viewed

File without changes

{py_geodetector-0.1.1 → py_geodetector-0.1.3}/src/py_geodetector/example_data/disease.csv RENAMED Viewed

File without changes

py-geodetector 0.1.1__tar.gz → 0.1.3__tar.gz

py-geodetector 0.1.1tar.gz → 0.1.3tar.gz