PyPI - python-fedci - Versions diffs - 0.1.3__tar.gz → 0.1.5__tar.gz - Mend

python-fedci 0.1.3tar.gz → 0.1.5tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

{python_fedci-0.1.3 → python_fedci-0.1.5}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: python-fedci
-Version: 0.1.3
+Version: 0.1.5
 Summary: A small package for federated independence tests
 Author-email: Maximilian Hahn <max.hahn@gmx.de>
 License: SPDX-License-Identifier: AGPL-3.0-or-later

{python_fedci-0.1.3 → python_fedci-0.1.5}/fedci/client.py RENAMED Viewed

@@ -7,13 +7,17 @@ import statsmodels.api as sm
 import statsmodels.genmod.families.family as fam
 from scipy.special import softmax
-from .env import get_env_additive_masking, get_env_client_heterogeniety, get_env_fit_intercept
+from .env import (
+    get_env_additive_masking,
+    get_env_client_heterogeniety,
+    get_env_fit_intercept,
+)
 from .utils import (
     BetaUpdateData,
     VariableType,
     categorical_separator,
-    constant_colname,
     client_colname,
+    constant_colname,
     ordinal_separator,
     polars_dtype_map,
 )
@@ -99,7 +103,7 @@ class ComputationHelper:
         family: DistributionalFamily,
     ):
         eta = np.zeros_like(y)
-        if get_env_client_heterogeniety()==1:
+        if get_env_client_heterogeniety() == 1:
             alpha = ComputationHelper.fit_local_alpha(y=y, offset=eta, family=family)
             eta += alpha
         mu: np.ndarray = family.inverse_link(eta)
@@ -115,11 +119,11 @@ class ComputationHelper:
             "xwx": xwx,
             "xwz": xwz,
             "rss": 0,
-            "n": 0,
+            "n": y.shape[0],
         }
         if family == Gaussian:
             result["rss"] = np.sum((y - mu) ** 2).item()
-            result["n"] = y.shape[0]
+            # result["n"] = y.shape[0]
         return result
     @staticmethod
@@ -128,7 +132,7 @@ class ComputationHelper:
         family: DistributionalFamily,
     ):
         eta = np.zeros_like(y)
-        if get_env_client_heterogeniety()==1:
+        if get_env_client_heterogeniety() == 1:
             alpha = ComputationHelper.fit_local_alpha(y=y, offset=eta, family=family)
             eta += alpha
         else:
@@ -146,7 +150,7 @@ class ComputationHelper:
     ):
         eta: np.ndarray = X @ beta
-        if get_env_client_heterogeniety()==1:
+        if get_env_client_heterogeniety() == 1:
             alpha = ComputationHelper.fit_local_alpha(y=y, offset=eta, family=family)
         else:
             alpha = np.zeros_like(eta)
@@ -188,10 +192,10 @@ class ComputationHelper:
         xwx, xwz = ComputationHelper.get_irls_step(y, X, eta, mu, alpha, family)
         llf: float = family.loglik(y, mu)
         # only use non-dummy values in rss and n for gaussian regression
-        result = {"llf": llf, "xwx": xwx, "xwz": xwz, "rss": 0, "n": 0}
+        result = {"llf": llf, "xwx": xwx, "xwz": xwz, "rss": 0, "n": y.shape[0]}
         if family == Gaussian:
             result["rss"] = np.sum((y - mu) ** 2).item()
-            result["n"] = y.shape[0]
+            # result["n"] = y.shape[0]
         return result
     @staticmethod
@@ -357,7 +361,7 @@ class CategoricalComputationUnit(ComputationUnit):
             # Reshape beta (K x (J-1))
             beta = beta.reshape(num_features, -1, order="F")
-        if get_env_client_heterogeniety()==1:
+        if get_env_client_heterogeniety() == 1:
             if X is None:
                 offset = np.zeros_like(y)
             else:
@@ -385,7 +389,7 @@ class CategoricalComputationUnit(ComputationUnit):
             xwx = np.empty((0, 0))
             xwz = np.empty((0, 0))
-            return {"llf": llf, "xwx": xwx, "xwz": xwz, "rss": 0, "n": 0}
+            return {"llf": llf, "xwx": xwx, "xwz": xwz, "rss": 0, "n": y.shape[0]}
         # Compute eta and mu
         eta = X @ beta  # N x (J-1)
@@ -424,11 +428,17 @@ class CategoricalComputationUnit(ComputationUnit):
             XWX += Xi.T @ Wi @ Xi
             XWz += Xi.T @ Wi @ z_i
-        logprob = np.log(np.clip(mu, 1e-8, 1-1e-8))
+        logprob = np.log(np.clip(mu, 1e-8, 1 - 1e-8))
         llf = np.sum(y_full * logprob)
         # only use non-dummy values in rss and n for gaussian regression
-        return {"llf": llf, "xwx": XWX, "xwz": XWz.reshape(-1, 1), "rss": 0, "n": 0}
+        return {
+            "llf": llf,
+            "xwx": XWX,
+            "xwz": XWz.reshape(-1, 1),
+            "rss": 0,
+            "n": y.shape[0],
+        }
 class OrdinalComputationUnit:  # (ComputationUnit):
@@ -534,7 +544,7 @@ class OrdinalComputationUnit:  # (ComputationUnit):
             llf += np.sum(np.log(np.take(mu_diff, current_level_indices.nonzero()[0])))
         mu_diff = mus_diff[-1]
         llf += np.sum(np.log(np.take(mu_diff, reference_level_indices.nonzero()[0])))
-        result = {"llf": llf, "xwx": xwx, "xwz": xwz, "rss": 0, "n": 0}
+        result = {"llf": llf, "xwx": xwx, "xwz": xwz, "rss": 0, "n": y.shape[0]}
         return result
@@ -554,8 +564,10 @@ class Client:
         self.id = id
         self.data: pl.DataFrame = data
-        if get_env_client_heterogeniety()==2:
-            self.data = self.data.with_columns(pl.lit(str(self.id)).alias(client_colname))
+        if get_env_client_heterogeniety() == 2:
+            self.data = self.data.with_columns(
+                pl.lit(str(self.id)).alias(client_colname)
+            )
         self.schema: Dict[str, VariableType] = {
             column: polars_dtype_map[dtype]
@@ -792,10 +804,7 @@ class Client:
                 new_cond_vars.append(constant_colname)
             cond_vars = new_cond_vars
             result = regression_computation_map[self.schema[resp_var]].compute(
-                self.expanded_data,
-                resp_var,
-                cond_vars,
-                beta
+                self.expanded_data, resp_var, cond_vars, beta
             )
             if len(self.contributing_clients) > 0:
                 result = self.apply_masks(test_key, result)

{python_fedci-0.1.3 → python_fedci-0.1.5}/fedci/testing.py RENAMED Viewed

@@ -30,6 +30,7 @@ class RegressionTest:
         self.num_classes, self.num_parameters = params
         self.dof = self.num_classes * self.num_parameters
         self.beta = np.zeros((self.dof, 1))
+        self.num_samples = None
         # self.beta = np.random.randn(self.dof, 1)
         self.alpha = 1.0
@@ -132,6 +133,10 @@ class RegressionTest:
         xwx = sum([_update.xwx for _update in update])
         xwz = sum([_update.xwz for _update in update])
         n = int(np.sum([_update.n for _update in update]).item())
+        assert self.num_samples is None or self.num_samples == n, (
+            "Number of supporting samples changed during test"
+        )
+        self.num_samples = n
         if abs(llf) < 1e-10:
             self.early_stop = True
@@ -232,8 +237,9 @@ class LikelihoodRatioTest:
         self.bad_fit = False
         self.p_value: Optional[float] = None
-        self.chi2stat = None
-        self.dof = None
+        self.chi2stat: Optional[float] = None
+        self.dof: Optional[int] = None
+        self.num_samples: Optional[int] = None
     def __repr__(self):
         if self.p_value is None:
@@ -324,6 +330,11 @@ class LikelihoodRatioTest:
                     for _update in update
                 ]
             )
+        if self.num_samples is None:
+            assert (
+                self.restricted_test.num_samples == self.unrestricted_test.num_samples
+            ), "Restricted and Unrestricted Test mustn't differ in supporting samples"
+            self.num_samples = self.restricted_test.num_samples
     def get_iterations(self):
         return max([self.restricted_test.iterations, self.unrestricted_test.iterations])
@@ -336,8 +347,8 @@ class LikelihoodRatioTest:
         t1_dof = self.unrestricted_test.dof
         self.chi2stat = 2 * (t1_llf - t0_llf)
-        self.dofs = t1_dof - t0_dof
-        self.p_value = scipy.stats.chi2.sf(self.chi2stat, self.dofs).item()
+        self.dof = t1_dof - t0_dof
+        self.p_value = scipy.stats.chi2.sf(self.chi2stat, self.dof).item()
         if get_env_debug() >= 2:
             print(
@@ -371,6 +382,7 @@ class SymmetricLikelihoodRatioTest:
             self.lrt2: LikelihoodRatioTest = lrt1
         self.p_value: Optional[float] = None
+        self.num_samples: Optional[int] = None
     def __repr__(self):
         if self.p_value is None:
@@ -418,6 +430,11 @@ class SymmetricLikelihoodRatioTest:
             self.lrt2.update_parameters(
                 [_update[self.lrt2.response] for _update in update]
             )
+        if self.num_samples is None:
+            assert self.lrt1.num_samples == self.lrt2.num_samples, (
+                "Restricted and Unrestricted Test mustn't differ in supporting samples"
+            )
+            self.num_samples = self.lrt1.num_samples
     def get_iterations(self):
         return max([self.lrt1.get_iterations(), self.lrt2.get_iterations()])

{python_fedci-0.1.3 → python_fedci-0.1.5}/pyproject.toml RENAMED Viewed

@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "python-fedci"
 authors = [{ name = "Maximilian Hahn", email = "max.hahn@gmx.de" }]
-version = "0.1.3"
+version = "0.1.5"
 license = { file = "LICENSE" }
 description = "A small package for federated independence tests"
 readme = "README.md"

{python_fedci-0.1.3 → python_fedci-0.1.5}/python_fedci.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: python-fedci
-Version: 0.1.3
+Version: 0.1.5
 Summary: A small package for federated independence tests
 Author-email: Maximilian Hahn <max.hahn@gmx.de>
 License: SPDX-License-Identifier: AGPL-3.0-or-later