python-fedci 0.1.3__tar.gz → 0.1.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: python-fedci
3
- Version: 0.1.3
3
+ Version: 0.1.5
4
4
  Summary: A small package for federated independence tests
5
5
  Author-email: Maximilian Hahn <max.hahn@gmx.de>
6
6
  License: SPDX-License-Identifier: AGPL-3.0-or-later
@@ -7,13 +7,17 @@ import statsmodels.api as sm
7
7
  import statsmodels.genmod.families.family as fam
8
8
  from scipy.special import softmax
9
9
 
10
- from .env import get_env_additive_masking, get_env_client_heterogeniety, get_env_fit_intercept
10
+ from .env import (
11
+ get_env_additive_masking,
12
+ get_env_client_heterogeniety,
13
+ get_env_fit_intercept,
14
+ )
11
15
  from .utils import (
12
16
  BetaUpdateData,
13
17
  VariableType,
14
18
  categorical_separator,
15
- constant_colname,
16
19
  client_colname,
20
+ constant_colname,
17
21
  ordinal_separator,
18
22
  polars_dtype_map,
19
23
  )
@@ -99,7 +103,7 @@ class ComputationHelper:
99
103
  family: DistributionalFamily,
100
104
  ):
101
105
  eta = np.zeros_like(y)
102
- if get_env_client_heterogeniety()==1:
106
+ if get_env_client_heterogeniety() == 1:
103
107
  alpha = ComputationHelper.fit_local_alpha(y=y, offset=eta, family=family)
104
108
  eta += alpha
105
109
  mu: np.ndarray = family.inverse_link(eta)
@@ -115,11 +119,11 @@ class ComputationHelper:
115
119
  "xwx": xwx,
116
120
  "xwz": xwz,
117
121
  "rss": 0,
118
- "n": 0,
122
+ "n": y.shape[0],
119
123
  }
120
124
  if family == Gaussian:
121
125
  result["rss"] = np.sum((y - mu) ** 2).item()
122
- result["n"] = y.shape[0]
126
+ # result["n"] = y.shape[0]
123
127
  return result
124
128
 
125
129
  @staticmethod
@@ -128,7 +132,7 @@ class ComputationHelper:
128
132
  family: DistributionalFamily,
129
133
  ):
130
134
  eta = np.zeros_like(y)
131
- if get_env_client_heterogeniety()==1:
135
+ if get_env_client_heterogeniety() == 1:
132
136
  alpha = ComputationHelper.fit_local_alpha(y=y, offset=eta, family=family)
133
137
  eta += alpha
134
138
  else:
@@ -146,7 +150,7 @@ class ComputationHelper:
146
150
  ):
147
151
  eta: np.ndarray = X @ beta
148
152
 
149
- if get_env_client_heterogeniety()==1:
153
+ if get_env_client_heterogeniety() == 1:
150
154
  alpha = ComputationHelper.fit_local_alpha(y=y, offset=eta, family=family)
151
155
  else:
152
156
  alpha = np.zeros_like(eta)
@@ -188,10 +192,10 @@ class ComputationHelper:
188
192
  xwx, xwz = ComputationHelper.get_irls_step(y, X, eta, mu, alpha, family)
189
193
  llf: float = family.loglik(y, mu)
190
194
  # only use non-dummy values in rss and n for gaussian regression
191
- result = {"llf": llf, "xwx": xwx, "xwz": xwz, "rss": 0, "n": 0}
195
+ result = {"llf": llf, "xwx": xwx, "xwz": xwz, "rss": 0, "n": y.shape[0]}
192
196
  if family == Gaussian:
193
197
  result["rss"] = np.sum((y - mu) ** 2).item()
194
- result["n"] = y.shape[0]
198
+ # result["n"] = y.shape[0]
195
199
  return result
196
200
 
197
201
  @staticmethod
@@ -357,7 +361,7 @@ class CategoricalComputationUnit(ComputationUnit):
357
361
  # Reshape beta (K x (J-1))
358
362
  beta = beta.reshape(num_features, -1, order="F")
359
363
 
360
- if get_env_client_heterogeniety()==1:
364
+ if get_env_client_heterogeniety() == 1:
361
365
  if X is None:
362
366
  offset = np.zeros_like(y)
363
367
  else:
@@ -385,7 +389,7 @@ class CategoricalComputationUnit(ComputationUnit):
385
389
  xwx = np.empty((0, 0))
386
390
  xwz = np.empty((0, 0))
387
391
 
388
- return {"llf": llf, "xwx": xwx, "xwz": xwz, "rss": 0, "n": 0}
392
+ return {"llf": llf, "xwx": xwx, "xwz": xwz, "rss": 0, "n": y.shape[0]}
389
393
 
390
394
  # Compute eta and mu
391
395
  eta = X @ beta # N x (J-1)
@@ -424,11 +428,17 @@ class CategoricalComputationUnit(ComputationUnit):
424
428
  XWX += Xi.T @ Wi @ Xi
425
429
  XWz += Xi.T @ Wi @ z_i
426
430
 
427
- logprob = np.log(np.clip(mu, 1e-8, 1-1e-8))
431
+ logprob = np.log(np.clip(mu, 1e-8, 1 - 1e-8))
428
432
  llf = np.sum(y_full * logprob)
429
433
 
430
434
  # only use non-dummy values in rss and n for gaussian regression
431
- return {"llf": llf, "xwx": XWX, "xwz": XWz.reshape(-1, 1), "rss": 0, "n": 0}
435
+ return {
436
+ "llf": llf,
437
+ "xwx": XWX,
438
+ "xwz": XWz.reshape(-1, 1),
439
+ "rss": 0,
440
+ "n": y.shape[0],
441
+ }
432
442
 
433
443
 
434
444
  class OrdinalComputationUnit: # (ComputationUnit):
@@ -534,7 +544,7 @@ class OrdinalComputationUnit: # (ComputationUnit):
534
544
  llf += np.sum(np.log(np.take(mu_diff, current_level_indices.nonzero()[0])))
535
545
  mu_diff = mus_diff[-1]
536
546
  llf += np.sum(np.log(np.take(mu_diff, reference_level_indices.nonzero()[0])))
537
- result = {"llf": llf, "xwx": xwx, "xwz": xwz, "rss": 0, "n": 0}
547
+ result = {"llf": llf, "xwx": xwx, "xwz": xwz, "rss": 0, "n": y.shape[0]}
538
548
  return result
539
549
 
540
550
 
@@ -554,8 +564,10 @@ class Client:
554
564
  self.id = id
555
565
  self.data: pl.DataFrame = data
556
566
 
557
- if get_env_client_heterogeniety()==2:
558
- self.data = self.data.with_columns(pl.lit(str(self.id)).alias(client_colname))
567
+ if get_env_client_heterogeniety() == 2:
568
+ self.data = self.data.with_columns(
569
+ pl.lit(str(self.id)).alias(client_colname)
570
+ )
559
571
 
560
572
  self.schema: Dict[str, VariableType] = {
561
573
  column: polars_dtype_map[dtype]
@@ -792,10 +804,7 @@ class Client:
792
804
  new_cond_vars.append(constant_colname)
793
805
  cond_vars = new_cond_vars
794
806
  result = regression_computation_map[self.schema[resp_var]].compute(
795
- self.expanded_data,
796
- resp_var,
797
- cond_vars,
798
- beta
807
+ self.expanded_data, resp_var, cond_vars, beta
799
808
  )
800
809
  if len(self.contributing_clients) > 0:
801
810
  result = self.apply_masks(test_key, result)
@@ -30,6 +30,7 @@ class RegressionTest:
30
30
  self.num_classes, self.num_parameters = params
31
31
  self.dof = self.num_classes * self.num_parameters
32
32
  self.beta = np.zeros((self.dof, 1))
33
+ self.num_samples = None
33
34
  # self.beta = np.random.randn(self.dof, 1)
34
35
  self.alpha = 1.0
35
36
 
@@ -132,6 +133,10 @@ class RegressionTest:
132
133
  xwx = sum([_update.xwx for _update in update])
133
134
  xwz = sum([_update.xwz for _update in update])
134
135
  n = int(np.sum([_update.n for _update in update]).item())
136
+ assert self.num_samples is None or self.num_samples == n, (
137
+ "Number of supporting samples changed during test"
138
+ )
139
+ self.num_samples = n
135
140
 
136
141
  if abs(llf) < 1e-10:
137
142
  self.early_stop = True
@@ -232,8 +237,9 @@ class LikelihoodRatioTest:
232
237
  self.bad_fit = False
233
238
 
234
239
  self.p_value: Optional[float] = None
235
- self.chi2stat = None
236
- self.dof = None
240
+ self.chi2stat: Optional[float] = None
241
+ self.dof: Optional[int] = None
242
+ self.num_samples: Optional[int] = None
237
243
 
238
244
  def __repr__(self):
239
245
  if self.p_value is None:
@@ -324,6 +330,11 @@ class LikelihoodRatioTest:
324
330
  for _update in update
325
331
  ]
326
332
  )
333
+ if self.num_samples is None:
334
+ assert (
335
+ self.restricted_test.num_samples == self.unrestricted_test.num_samples
336
+ ), "Restricted and Unrestricted Test mustn't differ in supporting samples"
337
+ self.num_samples = self.restricted_test.num_samples
327
338
 
328
339
  def get_iterations(self):
329
340
  return max([self.restricted_test.iterations, self.unrestricted_test.iterations])
@@ -336,8 +347,8 @@ class LikelihoodRatioTest:
336
347
  t1_dof = self.unrestricted_test.dof
337
348
 
338
349
  self.chi2stat = 2 * (t1_llf - t0_llf)
339
- self.dofs = t1_dof - t0_dof
340
- self.p_value = scipy.stats.chi2.sf(self.chi2stat, self.dofs).item()
350
+ self.dof = t1_dof - t0_dof
351
+ self.p_value = scipy.stats.chi2.sf(self.chi2stat, self.dof).item()
341
352
 
342
353
  if get_env_debug() >= 2:
343
354
  print(
@@ -371,6 +382,7 @@ class SymmetricLikelihoodRatioTest:
371
382
  self.lrt2: LikelihoodRatioTest = lrt1
372
383
 
373
384
  self.p_value: Optional[float] = None
385
+ self.num_samples: Optional[int] = None
374
386
 
375
387
  def __repr__(self):
376
388
  if self.p_value is None:
@@ -418,6 +430,11 @@ class SymmetricLikelihoodRatioTest:
418
430
  self.lrt2.update_parameters(
419
431
  [_update[self.lrt2.response] for _update in update]
420
432
  )
433
+ if self.num_samples is None:
434
+ assert self.lrt1.num_samples == self.lrt2.num_samples, (
435
+ "Restricted and Unrestricted Test mustn't differ in supporting samples"
436
+ )
437
+ self.num_samples = self.lrt1.num_samples
421
438
 
422
439
  def get_iterations(self):
423
440
  return max([self.lrt1.get_iterations(), self.lrt2.get_iterations()])
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
5
5
  [project]
6
6
  name = "python-fedci"
7
7
  authors = [{ name = "Maximilian Hahn", email = "max.hahn@gmx.de" }]
8
- version = "0.1.3"
8
+ version = "0.1.5"
9
9
  license = { file = "LICENSE" }
10
10
  description = "A small package for federated independence tests"
11
11
  readme = "README.md"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: python-fedci
3
- Version: 0.1.3
3
+ Version: 0.1.5
4
4
  Summary: A small package for federated independence tests
5
5
  Author-email: Maximilian Hahn <max.hahn@gmx.de>
6
6
  License: SPDX-License-Identifier: AGPL-3.0-or-later
File without changes
File without changes
File without changes
File without changes
File without changes