cbps 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. cbps/__init__.py +3462 -0
  2. cbps/constants.py +46 -0
  3. cbps/core/__init__.py +93 -0
  4. cbps/core/cbps_binary.py +1943 -0
  5. cbps/core/cbps_continuous.py +945 -0
  6. cbps/core/cbps_multitreat.py +1123 -0
  7. cbps/core/cbps_optimal.py +507 -0
  8. cbps/core/results.py +1447 -0
  9. cbps/data/Blackwell.csv +571 -0
  10. cbps/data/LaLonde.csv +3213 -0
  11. cbps/data/npcbps_continuous_sim.csv +501 -0
  12. cbps/data/nsw.csv +723 -0
  13. cbps/data/nsw_dw.csv +446 -0
  14. cbps/data/political_ads_urban_niebler.csv +16266 -0
  15. cbps/data/psid_controls.csv +2491 -0
  16. cbps/data/psid_controls2.csv +254 -0
  17. cbps/data/psid_controls3.csv +129 -0
  18. cbps/data/simulation_dgp1_seed12345.csv +201 -0
  19. cbps/data/simulation_dgp2_seed12345.csv +201 -0
  20. cbps/data/simulation_dgp3_seed12345.csv +201 -0
  21. cbps/data/simulation_dgp4_seed12345.csv +201 -0
  22. cbps/datasets/__init__.py +78 -0
  23. cbps/datasets/blackwell.py +112 -0
  24. cbps/datasets/continuous.py +223 -0
  25. cbps/datasets/lalonde.py +272 -0
  26. cbps/datasets/npcbps_sim.py +101 -0
  27. cbps/diagnostics/__init__.py +101 -0
  28. cbps/diagnostics/balance.py +760 -0
  29. cbps/diagnostics/balance_cbmsm_addon.py +162 -0
  30. cbps/diagnostics/continuous_diagnostics.py +259 -0
  31. cbps/diagnostics/normality.py +173 -0
  32. cbps/diagnostics/ocbps_conditions.py +197 -0
  33. cbps/diagnostics/overlap.py +198 -0
  34. cbps/diagnostics/plots.py +1193 -0
  35. cbps/diagnostics/weights_diag.py +205 -0
  36. cbps/highdim/__init__.py +84 -0
  37. cbps/highdim/gmm_loss.py +340 -0
  38. cbps/highdim/hdcbps.py +1078 -0
  39. cbps/highdim/lasso_utils.py +498 -0
  40. cbps/highdim/weight_funcs.py +298 -0
  41. cbps/inference/__init__.py +42 -0
  42. cbps/inference/asyvar.py +621 -0
  43. cbps/inference/vcov_outcome.py +217 -0
  44. cbps/iv/__init__.py +48 -0
  45. cbps/iv/cbiv.py +2603 -0
  46. cbps/logging_config.py +45 -0
  47. cbps/msm/__init__.py +45 -0
  48. cbps/msm/cbmsm.py +1871 -0
  49. cbps/msm/rank_diagnostics.py +112 -0
  50. cbps/nonparametric/__init__.py +58 -0
  51. cbps/nonparametric/cholesky_whitening.py +232 -0
  52. cbps/nonparametric/empirical_likelihood.py +339 -0
  53. cbps/nonparametric/npcbps.py +1036 -0
  54. cbps/nonparametric/taylor_approx.py +207 -0
  55. cbps/py.typed +0 -0
  56. cbps/sklearn/__init__.py +42 -0
  57. cbps/sklearn/estimator.py +378 -0
  58. cbps/utils/__init__.py +82 -0
  59. cbps/utils/formula.py +415 -0
  60. cbps/utils/helpers.py +378 -0
  61. cbps/utils/numerics.py +438 -0
  62. cbps/utils/r_compat.py +109 -0
  63. cbps/utils/validation.py +224 -0
  64. cbps/utils/variance_transform.py +483 -0
  65. cbps/utils/weights.py +586 -0
  66. cbps-0.2.0.dist-info/METADATA +1090 -0
  67. cbps-0.2.0.dist-info/RECORD +70 -0
  68. cbps-0.2.0.dist-info/WHEEL +5 -0
  69. cbps-0.2.0.dist-info/licenses/LICENSE +661 -0
  70. cbps-0.2.0.dist-info/top_level.txt +1 -0
cbps/utils/weights.py ADDED
@@ -0,0 +1,586 @@
1
+ """
2
+ Propensity Score Weight Computation
3
+
4
+ This module provides functions for computing inverse probability weights
5
+ (IPW) for different treatment types and target estimands within the
6
+ CBPS framework.
7
+
8
+ Supported weight types:
9
+
10
+ - **ATE weights**: Average Treatment Effect weights for binary treatments
11
+ - **ATT weights**: Average Treatment Effect on Treated weights
12
+ - **Continuous treatment**: Weighted treatment variable for balance conditions
13
+ - **Standardized weights**: Group-normalized weights (Hajek estimator)
14
+ - **WeightNormalizer**: Unified class encapsulating the standardization flow
15
+
16
+ Mathematical Framework
17
+ ----------------------
18
+ For binary treatments with propensity score π(X):
19
+
20
+ **ATE** (Imai & Ratkovic 2014, Eq. 10)::
21
+
22
+ w_i = T_i/π_i + (1-T_i)/(1-π_i)
23
+
24
+ **ATT** (Imai & Ratkovic 2014, Eq. 11)::
25
+
26
+ w_i = (N/N₁) × (T_i - π_i)/(1 - π_i)
27
+
28
+ For continuous treatments with generalized propensity score f(T|X):
29
+
30
+ **Stabilized** (Fong et al. 2018, Eq. 2)::
31
+
32
+ w_i = f(T_i) / f(T_i|X_i)
33
+
34
+ Standardization Protocol
35
+ ------------------------
36
+ All normalization follows a strict four-step order:
37
+
38
+ 1. Compute raw weights from propensity scores.
39
+ 2. Apply sampling weights (if provided).
40
+ 3. Group-wise normalization (each group sums to 1).
41
+ 4. Validate result (no NaN/Inf).
42
+
43
+ Functions
44
+ ---------
45
+ compute_ate_weights
46
+ Compute ATE inverse probability weights.
47
+ compute_att_weights
48
+ Compute ATT inverse probability weights.
49
+ compute_continuous_weights
50
+ Compute stabilized continuous treatment weights.
51
+ standardize_weights
52
+ Normalize weights by treatment group.
53
+
54
+ Classes
55
+ -------
56
+ WeightNormalizer
57
+ Unified standardization class with validate/normalize methods.
58
+
59
+ References
60
+ ----------
61
+ Imai, K. and Ratkovic, M. (2014). Covariate balancing propensity score.
62
+ Journal of the Royal Statistical Society, Series B 76(1), 243-263.
63
+
64
+ Fong, C., Hazlett, C., and Imai, K. (2018). Covariate balancing propensity
65
+ score for a continuous treatment. The Annals of Applied Statistics, 12(1),
66
+ 156-177.
67
+ """
68
+
69
+ import warnings
70
+ from typing import Optional
71
+
72
+ import numpy as np
73
+
74
+
75
+ class WeightNormalizer:
76
+ """
77
+ Unified weight normalization for CBPS estimators.
78
+
79
+ Encapsulates the Hajek-style standardization flow for inverse probability
80
+ weights, ensuring a consistent step ordering across ATE and ATT estimands.
81
+
82
+ Standardization Steps (applied in strict order)
83
+ ------------------------------------------------
84
+ 1. Compute raw weights from propensity scores.
85
+ 2. Apply sampling weights (if provided).
86
+ 3. Group-wise normalization (treated sum → 1, control sum → 1).
87
+ 4. Validate result (no NaN/Inf, correct signs).
88
+
89
+ Mathematical Reference
90
+ ----------------------
91
+ **ATE weights** (Imai & Ratkovic 2014, Eq. 10)::
92
+
93
+ w_i = T_i / π_i + (1 - T_i) / (1 - π_i)
94
+
95
+ After Hajek normalization each group sums to 1.
96
+
97
+ **ATT weights** (Imai & Ratkovic 2014, Eq. 11)::
98
+
99
+ w_i = (N / N₁) × (T_i - π_i) / (1 - π_i)
100
+
101
+ Treated group weights are constant N/N₁; control weights are
102
+ normalized by their absolute-value sum.
103
+
104
+ References
105
+ ----------
106
+ Imai, K. and Ratkovic, M. (2014). Covariate balancing propensity score.
107
+ Journal of the Royal Statistical Society, Series B 76(1), 243-263.
108
+ """
109
+
110
+ # ------------------------------------------------------------------
111
+ # Public API
112
+ # ------------------------------------------------------------------
113
+
114
+ @staticmethod
115
+ def normalize_ate(
116
+ weights: np.ndarray,
117
+ treat: np.ndarray,
118
+ sample_weights: Optional[np.ndarray] = None,
119
+ ) -> np.ndarray:
120
+ """
121
+ ATE standardization (Hajek-style group normalization).
122
+
123
+ Each treatment group's weights are rescaled so that their sum
124
+ equals 1, yielding an estimate of E[Y(1)] - E[Y(0)].
125
+
126
+ Parameters
127
+ ----------
128
+ weights : np.ndarray
129
+ Raw ATE weights (e.g., from ``compute_ate_weights``), shape (n,).
130
+ treat : np.ndarray
131
+ Binary treatment indicator (0/1), shape (n,).
132
+ sample_weights : np.ndarray or None, optional
133
+ Sampling weights, shape (n,). If None, uniform weights are used.
134
+
135
+ Returns
136
+ -------
137
+ np.ndarray
138
+ Standardized weights multiplied by sample weights, shape (n,).
139
+ ``weights[treat==1].sum() ≈ 1`` and ``weights[treat==0].sum() ≈ 1``.
140
+
141
+ Notes
142
+ -----
143
+ Follows the four-step standardization protocol:
144
+
145
+ 1. Raw weights are supplied via the *weights* argument.
146
+ 2. Sample weights are applied (element-wise multiplication).
147
+ 3. Treated / control sums are each normalized to 1.
148
+ 4. Validity check (via ``validate``).
149
+ """
150
+ n = len(treat)
151
+ if sample_weights is None:
152
+ sample_weights = np.ones(n)
153
+
154
+ # Step 2: Apply sample weights
155
+ w = weights.copy() * sample_weights
156
+
157
+ # Step 3: Group-wise normalization
158
+ treat_mask = treat == 1
159
+ ctrl_mask = treat == 0
160
+
161
+ sum_treat = np.sum(w[treat_mask])
162
+ sum_ctrl = np.sum(w[ctrl_mask])
163
+
164
+ # Avoid division by zero when a group is empty
165
+ if sum_treat > 0:
166
+ w[treat_mask] /= sum_treat
167
+ if sum_ctrl > 0:
168
+ w[ctrl_mask] /= sum_ctrl
169
+
170
+ # Step 4: Validate
171
+ WeightNormalizer.validate(w, allow_negative=False)
172
+
173
+ return w
174
+
175
+ @staticmethod
176
+ def normalize_att(
177
+ weights: np.ndarray,
178
+ treat: np.ndarray,
179
+ probs: np.ndarray,
180
+ sample_weights: Optional[np.ndarray] = None,
181
+ ) -> np.ndarray:
182
+ """
183
+ ATT standardization.
184
+
185
+ Treated units receive a constant weight (N/N₁ after normalization);
186
+ control unit weights are normalized by their absolute-value sum.
187
+
188
+ Parameters
189
+ ----------
190
+ weights : np.ndarray
191
+ Raw ATT weights (e.g., from ``compute_att_weights``), shape (n,).
192
+ Control weights are expected to be negative.
193
+ treat : np.ndarray
194
+ Binary treatment indicator (0/1), shape (n,).
195
+ probs : np.ndarray
196
+ Propensity scores, shape (n,).
197
+ sample_weights : np.ndarray or None, optional
198
+ Sampling weights, shape (n,). If None, uniform weights are used.
199
+
200
+ Returns
201
+ -------
202
+ np.ndarray
203
+ Standardized ATT weights (all non-negative), shape (n,).
204
+ Treated group sums to 1 and control group sums to 1.
205
+
206
+ Notes
207
+ -----
208
+ Follows the four-step standardization protocol:
209
+
210
+ 1. Raw weights are supplied via the *weights* argument.
211
+ 2. Sample weights are applied.
212
+ 3. Treated group normalized to sum 1; control group absolute values
213
+ normalized to sum 1.
214
+ 4. Validity check.
215
+ """
216
+ n = len(treat)
217
+ if sample_weights is None:
218
+ sample_weights = np.ones(n)
219
+
220
+ treat_mask = treat == 1
221
+ ctrl_mask = treat == 0
222
+
223
+ # Step 2: Apply sample weights
224
+ w = weights.copy() * sample_weights
225
+
226
+ # Step 3: Group-wise normalization
227
+ sum_treat = np.sum(w[treat_mask])
228
+ sum_ctrl_abs = np.sum(np.abs(w[ctrl_mask]))
229
+
230
+ out = np.empty(n)
231
+ if sum_treat > 0:
232
+ out[treat_mask] = w[treat_mask] / sum_treat
233
+ else:
234
+ out[treat_mask] = w[treat_mask]
235
+
236
+ if sum_ctrl_abs > 0:
237
+ out[ctrl_mask] = np.abs(w[ctrl_mask]) / sum_ctrl_abs
238
+ else:
239
+ out[ctrl_mask] = np.abs(w[ctrl_mask])
240
+
241
+ # Step 4: Validate
242
+ WeightNormalizer.validate(out, allow_negative=False)
243
+
244
+ return out
245
+
246
+ @staticmethod
247
+ def validate(weights: np.ndarray, allow_negative: bool = False) -> bool:
248
+ """Validate weight vector.
249
+
250
+ Parameters
251
+ ----------
252
+ weights : np.ndarray
253
+ Weight vector to validate.
254
+ allow_negative : bool, default=False
255
+ If False, warns when negative weights are detected.
256
+ Set to True for balance condition weights (which can be
257
+ negative by design).
258
+
259
+ Returns
260
+ -------
261
+ bool
262
+ True if all checks pass.
263
+
264
+ Raises
265
+ ------
266
+ ValueError
267
+ If weights contain NaN or Inf values.
268
+ """
269
+ if np.any(np.isnan(weights)):
270
+ raise ValueError(
271
+ "Weights contain NaN values. Check propensity score estimation."
272
+ )
273
+ if np.any(np.isinf(weights)):
274
+ raise ValueError(
275
+ "Weights contain Inf values. Propensity scores may be too "
276
+ "close to 0 or 1."
277
+ )
278
+ if not allow_negative and np.any(weights < 0):
279
+ n_neg = int(np.sum(weights < 0))
280
+ min_val = float(np.min(weights))
281
+ warnings.warn(
282
+ f"Detected {n_neg} negative weight(s) (min={min_val:.6g}). "
283
+ f"IPW weights should be non-negative; this may indicate "
284
+ f"numerical issues in propensity score estimation.",
285
+ stacklevel=2,
286
+ )
287
+ return True
288
+
289
+
290
+ def compute_ate_weights(
291
+ treat: np.ndarray,
292
+ probs: np.ndarray
293
+ ) -> np.ndarray:
294
+ """
295
+ Compute ATE inverse probability weights for binary treatments.
296
+
297
+ Implements the standard IPW formula (Imai & Ratkovic 2014, Eq. 10):
298
+
299
+ w_i = T_i / π_i + (1 - T_i) / (1 - π_i)
300
+
301
+ Parameters
302
+ ----------
303
+ treat : np.ndarray
304
+ Binary treatment indicator (0/1), shape (n,).
305
+ probs : np.ndarray
306
+ Propensity scores, shape (n,).
307
+ Should be clipped to (0, 1) before calling for numerical stability.
308
+
309
+ Returns
310
+ -------
311
+ np.ndarray
312
+ Unstandardized ATE weights, shape (n,).
313
+ All weights are guaranteed positive.
314
+
315
+ Notes
316
+ -----
317
+ This formula naturally produces positive weights:
318
+
319
+ - Treated units (T=1): weight = 1/π(X)
320
+ - Control units (T=0): weight = 1/(1-π(X))
321
+
322
+ Use ``WeightNormalizer.normalize_ate`` for Hajek-style group normalization.
323
+
324
+ Examples
325
+ --------
326
+ >>> import numpy as np
327
+ >>> treat = np.array([1, 0, 1, 0])
328
+ >>> probs = np.array([0.6, 0.4, 0.7, 0.3])
329
+ >>> w = compute_ate_weights(treat, probs)
330
+ >>> bool(np.all(w > 0))
331
+ True
332
+
333
+ References
334
+ ----------
335
+ Imai, K. and Ratkovic, M. (2014). Covariate balancing propensity score.
336
+ Journal of the Royal Statistical Society, Series B 76(1), 243-263.
337
+ """
338
+ # Standard form: ensures positive weights directly
339
+ weights = treat / probs + (1 - treat) / (1 - probs)
340
+
341
+ # Note: Alternative algebraically equivalent form
342
+ # weights = np.abs(1 / (probs - 1 + treat))
343
+
344
+ return weights
345
+
346
+
347
+ def compute_att_weights(
348
+ treat: np.ndarray,
349
+ probs: np.ndarray,
350
+ sample_weights: np.ndarray
351
+ ) -> np.ndarray:
352
+ """
353
+ Compute ATT inverse probability weights for binary treatments.
354
+
355
+ Implements the ATT weighting formula (Imai & Ratkovic 2014, Eq. 11):
356
+
357
+ w_i = (N / N₁) × (T_i - π_i) / (1 - π_i)
358
+
359
+ Parameters
360
+ ----------
361
+ treat : np.ndarray
362
+ Binary treatment indicator (0/1), shape (n,).
363
+ probs : np.ndarray
364
+ Propensity scores, shape (n,).
365
+ sample_weights : np.ndarray
366
+ Sampling weights normalized to sum to n, shape (n,).
367
+
368
+ Returns
369
+ -------
370
+ np.ndarray
371
+ Unstandardized ATT weights, shape (n,).
372
+ Control unit weights are negative by construction.
373
+
374
+ Notes
375
+ -----
376
+ The formula produces different signs by treatment status:
377
+
378
+ - Treated (T=1): w = N/N₁ (positive constant)
379
+ - Control (T=0): w = -(N/N₁) × π/(1-π) (negative)
380
+
381
+ Use ``WeightNormalizer.normalize_att`` for Hajek-style normalization
382
+ that takes absolute values of control weights and normalizes each group.
383
+
384
+ Examples
385
+ --------
386
+ >>> import numpy as np
387
+ >>> treat = np.array([1, 1, 0, 0])
388
+ >>> probs = np.array([0.6, 0.7, 0.4, 0.3])
389
+ >>> sw = np.ones(4)
390
+ >>> w = compute_att_weights(treat, probs, sw)
391
+ >>> bool(all(w[treat == 1] > 0)) # Treated positive
392
+ True
393
+ >>> bool(all(w[treat == 0] < 0)) # Control negative
394
+ True
395
+
396
+ References
397
+ ----------
398
+ Imai, K. and Ratkovic, M. (2014). Covariate balancing propensity score.
399
+ Journal of the Royal Statistical Society, Series B 76(1), 243-263.
400
+ """
401
+ # Calculate effective sample size of treated group
402
+ n_t = np.sum(sample_weights[treat == 1])
403
+ n = len(treat)
404
+
405
+ # ATT weight formula (may produce negative values for controls)
406
+ weights = (n / n_t) * (treat - probs) / (1 - probs)
407
+
408
+ return weights
409
+
410
+
411
+ def compute_continuous_weights(
412
+ Ttilde: np.ndarray,
413
+ stabilizers: np.ndarray,
414
+ log_density: np.ndarray
415
+ ) -> np.ndarray:
416
+ """
417
+ Compute weighted standardized treatment for continuous CBPS balance conditions.
418
+
419
+ Computes the quantity T* × w where w is the stabilized weight:
420
+
421
+ T̃_i × w_i = T̃_i × exp(log f(T̃_i) - log f(T̃_i|X_i))
422
+
423
+ This is used in the CBPS balance condition E[T* × w × X*] = 0
424
+ (Fong et al., 2018, Eq. 2).
425
+
426
+ Parameters
427
+ ----------
428
+ Ttilde : np.ndarray
429
+ Standardized treatment (mean=0, std=1), shape (n,).
430
+ stabilizers : np.ndarray
431
+ Log marginal density log f(T̃), shape (n,).
432
+ log_density : np.ndarray
433
+ Log conditional density log f(T̃|X) (GPS), shape (n,).
434
+
435
+ Returns
436
+ -------
437
+ np.ndarray
438
+ Weighted treatment T̃ × w, shape (n,).
439
+ Note: This is NOT the weight itself; the stabilized weight is
440
+ w = f(T̃)/f(T̃|X) = exp(stabilizers - log_density).
441
+
442
+ Notes
443
+ -----
444
+ **Numerical stability**: The log-density difference is clipped to
445
+ [-50, 50] before exponentiation to prevent overflow.
446
+
447
+ Examples
448
+ --------
449
+ >>> import numpy as np
450
+ >>> from scipy.stats import norm
451
+ >>> Ttilde = np.array([0.0, 1.0, -1.0])
452
+ >>> stabilizers = norm.logpdf(Ttilde, 0, 1) # Marginal density
453
+ >>> log_density = norm.logpdf(Ttilde, 0, 1) # Same as marginal (no confounding)
454
+ >>> Tw = compute_continuous_weights(Ttilde, stabilizers, log_density)
455
+ >>> bool(np.all(np.isfinite(Tw)))
456
+ True
457
+
458
+ References
459
+ ----------
460
+ Fong, C., Hazlett, C., and Imai, K. (2018). Covariate balancing propensity
461
+ score for a continuous treatment. The Annals of Applied Statistics, 12(1),
462
+ 156-177.
463
+ """
464
+ # Compute weights in log space for numerical stability
465
+ # Equivalent to: Ttilde * exp(stabilizers - log_density)
466
+
467
+ # Clip difference to prevent overflow in exp()
468
+ log_diff = stabilizers - log_density
469
+ log_diff_clipped = np.clip(log_diff, -50, 50)
470
+
471
+ # Final weight computation
472
+ weights = Ttilde * np.exp(log_diff_clipped)
473
+
474
+ return weights
475
+
476
+
477
+ def standardize_weights(
478
+ weights: np.ndarray,
479
+ treat: np.ndarray,
480
+ probs: np.ndarray,
481
+ sample_weights: np.ndarray,
482
+ att: int = 0,
483
+ standardize: bool = True
484
+ ) -> np.ndarray:
485
+ """
486
+ Normalize IPW weights by treatment group.
487
+
488
+ Applies group-wise normalization so that weights within each treatment
489
+ group sum to 1, or returns unnormalized Horvitz-Thompson weights.
490
+
491
+ This function internally follows the ``WeightNormalizer`` four-step
492
+ protocol but reconstructs weights from propensity scores to maintain
493
+ backward compatibility with R's ``CBPS`` package (v0.23).
494
+
495
+ Parameters
496
+ ----------
497
+ weights : np.ndarray
498
+ Raw weights (unused, kept for API compatibility), shape (n,).
499
+ treat : np.ndarray
500
+ Binary treatment indicator (0/1), shape (n,).
501
+ probs : np.ndarray
502
+ Propensity scores, shape (n,).
503
+ sample_weights : np.ndarray
504
+ Sampling weights (normalized to sum to n), shape (n,).
505
+ att : int, default=0
506
+ Target estimand: 0=ATE, 1=ATT, 2=ATT with reversed encoding.
507
+ standardize : bool, default=True
508
+ If True, apply group-wise normalization (Hajek estimator).
509
+ If False, return Horvitz-Thompson weights.
510
+
511
+ Returns
512
+ -------
513
+ np.ndarray
514
+ Final weights multiplied by sample_weights, shape (n,).
515
+
516
+ Notes
517
+ -----
518
+ **Standardization Steps** (strict order):
519
+
520
+ 1. Reconstruct raw weights from propensity scores.
521
+ 2. Multiply by sample_weights.
522
+ 3. Normalize treated / control groups to each sum to 1.
523
+ 4. Final validation (finite, non-negative).
524
+
525
+ **ATE** (Imai & Ratkovic 2014, Eq. 10):
526
+ Treated: 1/π normalized; Control: 1/(1-π) normalized.
527
+
528
+ **ATT** (Imai & Ratkovic 2014, Eq. 11):
529
+ Treated: N/N₁ normalized; Control: |N/N₁ × (T-π)/(1-π)| normalized.
530
+
531
+ **Implementation**: Weights are reconstructed from propensity scores
532
+ rather than directly normalizing the input weights, to match the
533
+ R CBPS package behavior.
534
+
535
+ Examples
536
+ --------
537
+ >>> import numpy as np
538
+ >>> treat = np.array([1, 0, 1, 0])
539
+ >>> probs = np.array([0.6, 0.4, 0.7, 0.3])
540
+ >>> sw = np.ones(4)
541
+ >>> w = np.ones(4)
542
+ >>> w_std = standardize_weights(w, treat, probs, sw, att=0, standardize=True)
543
+ >>> bool(np.isclose(w_std[treat==1].sum(), 1.0)) # Treated group normalized
544
+ True
545
+ >>> bool(np.isclose(w_std[treat==0].sum(), 1.0)) # Control group normalized
546
+ True
547
+ """
548
+ n = len(treat)
549
+ # Use unweighted count for ATT normalization, matching R's CBPSBinary.R
550
+ # R redefines n.t = sum(treat==1) (unweighted) before weight standardization
551
+ n_t_unweighted = np.sum(treat == 1)
552
+
553
+ if standardize:
554
+ # Step 1: Compute normalization factors
555
+ if att: # ATT branch
556
+ norm1 = np.sum(treat * sample_weights * n / n_t_unweighted)
557
+ norm2 = np.sum((1 - treat) * sample_weights * n / n_t_unweighted *
558
+ (treat - probs) / (1 - probs))
559
+ else: # ATE branch
560
+ norm1 = np.sum(treat * sample_weights / probs)
561
+ norm2 = np.sum((1 - treat) * sample_weights / (1 - probs))
562
+ else:
563
+ # Step 2: Horvitz-Thompson weights (no normalization)
564
+ norm1 = 1.0
565
+ norm2 = 1.0
566
+
567
+ # Step 3: Reconstruct standardized weights
568
+ # Note: weights are reconstructed, not directly standardized from input
569
+ if att: # ATT branch
570
+ # Treatment group + abs(control group)
571
+ weights_std = (
572
+ (treat == 1) * n / n_t_unweighted / norm1 +
573
+ np.abs((treat == 0) * n / n_t_unweighted *
574
+ (treat - probs) / (1 - probs) / norm2)
575
+ )
576
+ else: # ATE branch
577
+ # Standard form, no abs() needed (1/π and 1/(1-π) always positive)
578
+ weights_std = (
579
+ (treat == 1) / probs / norm1 +
580
+ (treat == 0) / (1 - probs) / norm2
581
+ )
582
+
583
+ # Step 4: Multiply by sample_weights
584
+ weights_std = weights_std * sample_weights
585
+
586
+ return weights_std