PyPI - pyleebounds - Versions diffs - 0.1.0__tar.gz - Mend

pyleebounds 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

pyleebounds-0.1.0/LICENSE +21 -0
pyleebounds-0.1.0/PKG-INFO +196 -0
pyleebounds-0.1.0/README.md +155 -0
pyleebounds-0.1.0/pyleebounds/__init__.py +14 -0
pyleebounds-0.1.0/pyleebounds/lee_bounds.py +291 -0
pyleebounds-0.1.0/pyleebounds.egg-info/PKG-INFO +196 -0
pyleebounds-0.1.0/pyleebounds.egg-info/SOURCES.txt +11 -0
pyleebounds-0.1.0/pyleebounds.egg-info/dependency_links.txt +1 -0
pyleebounds-0.1.0/pyleebounds.egg-info/not-zip-safe +1 -0
pyleebounds-0.1.0/pyleebounds.egg-info/requires.txt +12 -0
pyleebounds-0.1.0/pyleebounds.egg-info/top_level.txt +1 -0
pyleebounds-0.1.0/setup.cfg +4 -0
pyleebounds-0.1.0/setup.py +65 -0

pyleebounds-0.1.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2025 Vasil (Vasco) Yasenov
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

pyleebounds-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,196 @@
+Metadata-Version: 2.4
+Name: pyleebounds
+Version: 0.1.0
+Summary: Python package for Lee 2009 treatment effect bounds under sample selection
+Home-page: https://github.com/vyasenov/pyleebounds
+Author: Vasco Yasenov
+Author-email:
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Science/Research
+Classifier: Topic :: Scientific/Engineering :: Information Analysis
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: numpy>=1.20.0
+Requires-Dist: pandas>=1.3.0
+Requires-Dist: matplotlib>=3.3.0
+Requires-Dist: seaborn>=0.11.0
+Requires-Dist: scipy>=1.7.0
+Provides-Extra: dev
+Requires-Dist: pytest>=6.0; extra == "dev"
+Requires-Dist: pytest-cov>=2.0; extra == "dev"
+Requires-Dist: black>=21.0; extra == "dev"
+Requires-Dist: flake8>=3.8; extra == "dev"
+Requires-Dist: mypy>=0.800; extra == "dev"
+Dynamic: author
+Dynamic: classifier
+Dynamic: description
+Dynamic: description-content-type
+Dynamic: home-page
+Dynamic: license-file
+Dynamic: provides-extra
+Dynamic: requires-dist
+Dynamic: requires-python
+Dynamic: summary
+# pyleebounds
+A Python package for estimating treatment effect bounds under sample selection, based on the method of Lee (2009). This approach is especially useful when selection into the observed sample (e.g., post-treatment employment) differs by treatment status and may introduce bias in outcome comparisons.
+## Installation
+You can install the package using pip:
+```bash
+pip install pyleebounds
+````
+## Features
+* Sharp nonparametric bounds on treatment effects under endogenous sample selection
+* Automatically handles non-random attrition or truncation (e.g. only observing outcomes for employed individuals)
+* Bootstrap confidence intervals
+* Seamless integration with Pandas
+## Quick Start
+```python
+import pandas as pd
+import numpy as np
+from pyleebounds import LeeBounds
+# Generate synthetic data
+np.random.seed(1988)
+n = 1000
+# Treatment assignment (random)
+D = np.random.binomial(1, 0.5, n)
+# Potential outcomes (e.g., wages)
+Y0 = np.random.normal(50, 10, n)  # Control potential outcome
+treatment_effect = np.random.normal(5, 3, n)  # Treatment effect
+Y1 = Y0 + treatment_effect  # Treated potential outcome
+Y = D * Y1 + (1 - D) * Y0  # Actual outcome
+# Selection mechanism (e.g., employment)
+# Higher wages and treatment increase employment probability
+employment_prob = 0.3 + 0.4 * (Y > 50) + 0.2 * D
+employment_prob = np.clip(employment_prob, 0, 1)
+S = np.random.binomial(1, employment_prob, n)
+# Create DataFrame
+df = pd.DataFrame({
+    'Y': Y,  # outcome variable
+    'D': D,  # treatment indicator (1 = treated, 0 = control)
+    'S': S   # selection indicator (1 = observed, 0 = missing/selected out)
+})
+# Initialize and fit Lee bounds estimator
+lb = LeeBounds()
+results = lb.fit(df, outcome='Y', treatment='D', selection='S')
+# View summary
+print(results.summary())
+# Plot estimated bounds
+results.plot()
+```
+## Examples
+You can find detailed usage examples in the  `examples/` directory.
+## Background
+### Why Treatment Bounds?
+In many applied settings, outcomes are observed only for a selected subset of the population—e.g., wages are observed only for employed individuals. If treatment affects selection (e.g., job training increases employment), naïvely comparing outcomes may confound treatment effects with selection effects.
+Lee (2009) offers a way to partially identify treatment effects by trimming the treated group's distribution to match the control group’s selection rate under plausible assumptions.
+---
+### Notation
+Let's establish the following notation:
+* $Y$: observed *continuous* outcome
+* $D \in \{0,1\}$: treatment indicator (1 = treated)
+* $S \in \{0,1\}$: selection indicator (1 = observed)
+* $Y(0), Y(1)$: potential outcomes under control/treatment
+* $S(0), S(1)$: potential selection statuses
+* $p_1 = \Pr(S=1 \mid D=1)$, $p_0 = \Pr(S=1 \mid D=0)$: selection rates
+For each unit we observe $\left(D, S, Y\times S \right)$.
+---
+### Assumptions
+1. Monotonicity: Treatment weakly increases the probability of being observed $$S(1)\geq S(0).$$
+2. Exogeneity: Treatment is randomly assigned or unconfounded $$\left(Y(0),Y(1),S(0),S(1)\right) \perp D.$$
+---
+### Main Result
+To adjust for differential selection, Lee (2009) suggested trimming the treated group’s outcome distribution among those with $S=1$. We then compute bounds on the average treatment effect (ATE) for the observed sample as:
+$$
+ATE \in \left[ \underline{\Delta}, \overline{\Delta} \right],
+$$
+where:
+$$
+\underline{\Delta} = \mathbb{E}[Y \mid Y\geq q^{1-\frac{p_0}{p_1}}, D=1, S=1] - \mathbb{E}[Y \mid D=0, S=1]
+$$
+$$
+\overline{\Delta} = \mathbb{E}[Y \mid Y\leq q^{\frac{p_0}{p_1}}, D=1, S=1] - \mathbb{E}[Y \mid D=0, S=1]
+$$
+Here $q^{u}$ represents the $u$th quantile of $Y|D=1,S=1$. These form sharp bounds under the stated assumptions.
+These bounds can be tightened in presence of additional covariates $X$, but this package does not offer that functionality. See also Semenova (2020).
+---
+### Confidence Intervals
+Since the Lee bounds involve non-differentiable operations (quantile trimming), variance formulas are complex. Instead, this package provides bootstrap confidence intervals computed as follows:
+1. Resample units with replacement, stratified by treatment group.
+2. Compute Lee bounds for each bootstrap sample.
+3. Construct percentile intervals using the empirical bootstrap distribution.
+## References
+* Lee, D. S. (2009). *Training, wages, and sample selection: Estimating sharp bounds on treatment effects*. *The Review of Economic Studies*, 76(3), 1071–1102.
+* Semenova, V. (2020). Generalized lee bounds. arXiv preprint arXiv:2008.12720.
+* Tauchmann, H. (2014). Lee (2009) treatment-effect bounds for nonrandom sample selection. The Stata Journal, 14(4), 884-894.
+## License
+This project is licensed under the MIT License – see the [LICENSE](LICENSE) file for details.
+## Citation
+To cite this package in publications, use the following BibTeX entry:
+```bibtex
+@misc{yasenov2025pyleebounds,
+  author       = {Vasco Yasenov},
+  title        = {pyleebounds: Python Tools for Estimating Treatment Effect Bounds under Sample Selection},
+  year         = {2025},
+  howpublished = {\url{https://github.com/vyasenov/pyleebounds}},
+  note         = {Version 0.1.0}
+}
+```

pyleebounds-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,155 @@
+# pyleebounds
+A Python package for estimating treatment effect bounds under sample selection, based on the method of Lee (2009). This approach is especially useful when selection into the observed sample (e.g., post-treatment employment) differs by treatment status and may introduce bias in outcome comparisons.
+## Installation
+You can install the package using pip:
+```bash
+pip install pyleebounds
+````
+## Features
+* Sharp nonparametric bounds on treatment effects under endogenous sample selection
+* Automatically handles non-random attrition or truncation (e.g. only observing outcomes for employed individuals)
+* Bootstrap confidence intervals
+* Seamless integration with Pandas
+## Quick Start
+```python
+import pandas as pd
+import numpy as np
+from pyleebounds import LeeBounds
+# Generate synthetic data
+np.random.seed(1988)
+n = 1000
+# Treatment assignment (random)
+D = np.random.binomial(1, 0.5, n)
+# Potential outcomes (e.g., wages)
+Y0 = np.random.normal(50, 10, n)  # Control potential outcome
+treatment_effect = np.random.normal(5, 3, n)  # Treatment effect
+Y1 = Y0 + treatment_effect  # Treated potential outcome
+Y = D * Y1 + (1 - D) * Y0  # Actual outcome
+# Selection mechanism (e.g., employment)
+# Higher wages and treatment increase employment probability
+employment_prob = 0.3 + 0.4 * (Y > 50) + 0.2 * D
+employment_prob = np.clip(employment_prob, 0, 1)
+S = np.random.binomial(1, employment_prob, n)
+# Create DataFrame
+df = pd.DataFrame({
+    'Y': Y,  # outcome variable
+    'D': D,  # treatment indicator (1 = treated, 0 = control)
+    'S': S   # selection indicator (1 = observed, 0 = missing/selected out)
+})
+# Initialize and fit Lee bounds estimator
+lb = LeeBounds()
+results = lb.fit(df, outcome='Y', treatment='D', selection='S')
+# View summary
+print(results.summary())
+# Plot estimated bounds
+results.plot()
+```
+## Examples
+You can find detailed usage examples in the  `examples/` directory.
+## Background
+### Why Treatment Bounds?
+In many applied settings, outcomes are observed only for a selected subset of the population—e.g., wages are observed only for employed individuals. If treatment affects selection (e.g., job training increases employment), naïvely comparing outcomes may confound treatment effects with selection effects.
+Lee (2009) offers a way to partially identify treatment effects by trimming the treated group's distribution to match the control group’s selection rate under plausible assumptions.
+---
+### Notation
+Let's establish the following notation:
+* $Y$: observed *continuous* outcome
+* $D \in \{0,1\}$: treatment indicator (1 = treated)
+* $S \in \{0,1\}$: selection indicator (1 = observed)
+* $Y(0), Y(1)$: potential outcomes under control/treatment
+* $S(0), S(1)$: potential selection statuses
+* $p_1 = \Pr(S=1 \mid D=1)$, $p_0 = \Pr(S=1 \mid D=0)$: selection rates
+For each unit we observe $\left(D, S, Y\times S \right)$.
+---
+### Assumptions
+1. Monotonicity: Treatment weakly increases the probability of being observed $$S(1)\geq S(0).$$
+2. Exogeneity: Treatment is randomly assigned or unconfounded $$\left(Y(0),Y(1),S(0),S(1)\right) \perp D.$$
+---
+### Main Result
+To adjust for differential selection, Lee (2009) suggested trimming the treated group’s outcome distribution among those with $S=1$. We then compute bounds on the average treatment effect (ATE) for the observed sample as:
+$$
+ATE \in \left[ \underline{\Delta}, \overline{\Delta} \right],
+$$
+where:
+$$
+\underline{\Delta} = \mathbb{E}[Y \mid Y\geq q^{1-\frac{p_0}{p_1}}, D=1, S=1] - \mathbb{E}[Y \mid D=0, S=1]
+$$
+$$
+\overline{\Delta} = \mathbb{E}[Y \mid Y\leq q^{\frac{p_0}{p_1}}, D=1, S=1] - \mathbb{E}[Y \mid D=0, S=1]
+$$
+Here $q^{u}$ represents the $u$th quantile of $Y|D=1,S=1$. These form sharp bounds under the stated assumptions.
+These bounds can be tightened in presence of additional covariates $X$, but this package does not offer that functionality. See also Semenova (2020).
+---
+### Confidence Intervals
+Since the Lee bounds involve non-differentiable operations (quantile trimming), variance formulas are complex. Instead, this package provides bootstrap confidence intervals computed as follows:
+1. Resample units with replacement, stratified by treatment group.
+2. Compute Lee bounds for each bootstrap sample.
+3. Construct percentile intervals using the empirical bootstrap distribution.
+## References
+* Lee, D. S. (2009). *Training, wages, and sample selection: Estimating sharp bounds on treatment effects*. *The Review of Economic Studies*, 76(3), 1071–1102.
+* Semenova, V. (2020). Generalized lee bounds. arXiv preprint arXiv:2008.12720.
+* Tauchmann, H. (2014). Lee (2009) treatment-effect bounds for nonrandom sample selection. The Stata Journal, 14(4), 884-894.
+## License
+This project is licensed under the MIT License – see the [LICENSE](LICENSE) file for details.
+## Citation
+To cite this package in publications, use the following BibTeX entry:
+```bibtex
+@misc{yasenov2025pyleebounds,
+  author       = {Vasco Yasenov},
+  title        = {pyleebounds: Python Tools for Estimating Treatment Effect Bounds under Sample Selection},
+  year         = {2025},
+  howpublished = {\url{https://github.com/vyasenov/pyleebounds}},
+  note         = {Version 0.1.0}
+}
+```

pyleebounds-0.1.0/pyleebounds/__init__.py ADDED Viewed

@@ -0,0 +1,14 @@
+"""
+pyleebounds: Python package for Lee 2009 treatment effect bounds under sample selection.
+This package implements the method from Lee (2009) for estimating sharp bounds
+on treatment effects when selection into the post-treatment sample is endogenous.
+"""
+__version__ = "0.1.0"
+__author__ = "Vasco Yasenov"
+__email__ = ""
+from .lee_bounds import LeeBounds
+__all__ = ["LeeBounds"]

pyleebounds-0.1.0/pyleebounds/lee_bounds.py ADDED Viewed

@@ -0,0 +1,291 @@
+"""
+Implementation of Lee (2009) treatment effect bounds under sample selection.
+"""
+import numpy as np
+import pandas as pd
+from typing import Tuple, Dict, Any
+import matplotlib.pyplot as plt
+class LeeBounds:
+    """
+    Lee (2009) treatment effect bounds estimator.
+    Implements the method from Lee (2009) for estimating sharp bounds on treatment
+    effects when selection into the post-treatment sample is endogenous.
+    Parameters
+    ----------
+    None
+    References
+    ----------
+    Lee, D. S. (2009). Training, wages, and sample selection: Estimating sharp
+    bounds on treatment effects. The Review of Economic Studies, 76(3), 1071-1102.
+    """
+    def __init__(self, trim_method: str = 'quantile'):
+        self.trim_method = trim_method
+        self.fitted = False
+        self.results = None
+    def fit(self, data: pd.DataFrame, outcome: str, treatment: str,
+            selection: str) -> 'LeeBoundsResults':
+        """
+        Fit Lee bounds estimator to the data.
+        Parameters
+        ----------
+        data : pd.DataFrame
+            Input data containing outcome, treatment, and selection variables
+        outcome : str
+            Name of the outcome variable column
+        treatment : str
+            Name of the treatment indicator column (0=control, 1=treated)
+        selection : str
+            Name of the selection indicator column (0=missing, 1=observed)
+        Returns
+        -------
+        LeeBoundsResults
+            Results object containing bounds and summary statistics
+        """
+        # Validate inputs
+        required_cols = [outcome, treatment, selection]
+        missing_cols = [col for col in required_cols if col not in data.columns]
+        if missing_cols:
+            raise ValueError(f"Missing columns: {missing_cols}")
+        # Extract data
+        Y = data[outcome].values
+        D = data[treatment].values
+        S = data[selection].values
+        # Remove missing values
+        valid_mask = ~(np.isnan(Y) | np.isnan(D) | np.isnan(S))
+        Y, D, S = Y[valid_mask], D[valid_mask], S[valid_mask]
+        # Validate data types and values
+        self._validate_variables(Y, D, S)
+        # Calculate selection rates
+        p1 = np.mean(S[D == 1])  # Selection rate in treated group
+        p0 = np.mean(S[D == 0])  # Selection rate in control group
+        if p1 <= p0:
+            raise ValueError("Selection rate in treated group must be greater than control group")
+        # Calculate bounds
+        lower_bound, upper_bound = self._compute_bounds(Y, D, S, p1, p0)
+        # Store results
+        self.results = LeeBoundsResults(
+            lower_bound=lower_bound,
+            upper_bound=upper_bound,
+            p1=p1,
+            p0=p0,
+            trim_proportion=p1 - p0,
+            n_treated=np.sum(D == 1),
+            n_control=np.sum(D == 0),
+            n_treated_selected=np.sum((D == 1) & (S == 1)),
+            n_control_selected=np.sum((D == 0) & (S == 1))
+        )
+        self.fitted = True
+        return self.results
+    def _compute_bounds(self, Y: np.ndarray, D: np.ndarray, S: np.ndarray,
+                       p1: float, p0: float) -> Tuple[float, float]:
+        """
+        Compute Lee bounds using trimming approach.
+        Parameters
+        ----------
+        Y : np.ndarray
+            Outcome values
+        D : np.ndarray
+            Treatment indicators
+        S : np.ndarray
+            Selection indicators
+        p1 : float
+            Selection rate in treated group
+        p0 : float
+            Selection rate in control group
+        Returns
+        -------
+        Tuple[float, float]
+            (lower_bound, upper_bound)
+        """
+        # Control group mean (among selected)
+        control_mean = np.mean(Y[(D == 0) & (S == 1)])
+        # Treated group (among selected)
+        treated_selected = Y[(D == 1) & (S == 1)]
+        # Trim proportion
+        trim_prop = p1 - p0
+        # For Lee bounds, we trim the treated group to match control selection rate
+        # Lower bound: trim from top (keep lowest outcomes)
+        # Upper bound: trim from bottom (keep highest outcomes)
+        n_trim = int(len(treated_selected) * trim_prop)
+        if n_trim > 0:
+            sorted_treated = np.sort(treated_selected)
+            # Lower bound: keep bottom (1 - trim_prop) of observations
+            lower_trimmed_mean = np.mean(sorted_treated[:-n_trim])
+            # Upper bound: keep top (1 - trim_prop) of observations
+            upper_trimmed_mean = np.mean(sorted_treated[n_trim:])
+        else:
+            lower_trimmed_mean = upper_trimmed_mean = np.mean(treated_selected)
+        lower_bound = lower_trimmed_mean - control_mean
+        upper_bound = upper_trimmed_mean - control_mean
+        return lower_bound, upper_bound
+    def _validate_variables(self, Y: np.ndarray, D: np.ndarray, S: np.ndarray) -> None:
+        """
+        Validate that variables have correct types and values.
+        Parameters
+        ----------
+        Y : np.ndarray
+            Outcome variable
+        D : np.ndarray
+            Treatment variable
+        S : np.ndarray
+            Selection variable
+        Raises
+        ------
+        ValueError
+            If validation fails
+        """
+        # Check that D is binary with values 0 and 1
+        unique_d = np.unique(D)
+        if not np.array_equal(unique_d, np.array([0, 1])):
+            raise ValueError(f"Treatment variable D must be binary (0, 1). Found values: {unique_d}")
+        # Check that S is binary with values 0 and 1
+        unique_s = np.unique(S)
+        if not np.array_equal(unique_s, np.array([0, 1])):
+            raise ValueError(f"Selection variable S must be binary (0, 1). Found values: {unique_s}")
+        # Check that Y is continuous (not all integers)
+        if len(np.unique(Y)) < len(Y) * 0.1:  # If less than 10% unique values, likely discrete
+            raise ValueError("Outcome variable Y should be continuous. Consider if this is appropriate.")
+        # Check for reasonable sample sizes
+        if len(Y) < 10:
+            raise ValueError("Sample size too small. Need at least 10 observations.")
+        # Check that we have both treatment groups
+        if np.sum(D == 0) == 0:
+            raise ValueError("No control observations (D=0) found.")
+        if np.sum(D == 1) == 0:
+            raise ValueError("No treated observations (D=1) found.")
+        # Check that we have selected observations in both groups
+        if np.sum((D == 0) & (S == 1)) == 0:
+            raise ValueError("No selected control observations (D=0, S=1) found.")
+        if np.sum((D == 1) & (S == 1)) == 0:
+            raise ValueError("No selected treated observations (D=1, S=1) found.")
+    def bootstrap(self, data: pd.DataFrame, outcome: str, treatment: str,
+                  selection: str, n_bootstrap: int = 500,
+                  ci_level: float = 0.95) -> Dict[str, Any]:
+        """
+        Compute bootstrap confidence intervals for the bounds.
+        Parameters
+        ----------
+        data : pd.DataFrame
+            Input data
+        outcome : str
+            Outcome variable name
+        treatment : str
+            Treatment variable name
+        selection : str
+            Selection variable name
+        n_bootstrap : int
+            Number of bootstrap samples
+        ci_level : float
+            Confidence level (e.g., 0.95 for 95% CI)
+        Returns
+        -------
+        Dict[str, Any]
+            Dictionary containing bootstrap results
+        """
+        lower_bounds = []
+        upper_bounds = []
+        for _ in range(n_bootstrap):
+            # Bootstrap sample
+            boot_idx = np.random.choice(len(data), size=len(data), replace=True)
+            boot_data = data.iloc[boot_idx].reset_index(drop=True)
+            try:
+                # Fit Lee bounds on bootstrap sample
+                lb = LeeBounds()
+                results = lb.fit(boot_data, outcome, treatment, selection)
+                lower_bounds.append(results.lower_bound)
+                upper_bounds.append(results.upper_bound)
+            except:
+                # Skip if bootstrap sample fails
+                continue
+        # Calculate confidence intervals
+        alpha = 1 - ci_level
+        lower_ci = np.percentile(lower_bounds, [alpha/2*100, (1-alpha/2)*100])
+        upper_ci = np.percentile(upper_bounds, [alpha/2*100, (1-alpha/2)*100])
+        return {
+            'lower_bound_ci': lower_ci,
+            'upper_bound_ci': upper_ci,
+            'lower_bounds': lower_bounds,
+            'upper_bounds': upper_bounds,
+            'ci_level': ci_level
+        }
+class LeeBoundsResults:
+    """
+    Results from Lee bounds estimation.
+    """
+    def __init__(self, lower_bound: float, upper_bound: float, p1: float,
+                 p0: float, trim_proportion: float, n_treated: int, n_control: int,
+                 n_treated_selected: int, n_control_selected: int):
+        self.lower_bound = lower_bound
+        self.upper_bound = upper_bound
+        self.p1 = p1
+        self.p0 = p0
+        self.trim_proportion = trim_proportion
+        self.n_treated = n_treated
+        self.n_control = n_control
+        self.n_treated_selected = n_treated_selected
+        self.n_control_selected = n_control_selected
+    def summary(self) -> str:
+        """
+        Return a summary of the results.
+        Returns
+        -------
+        str
+            Formatted summary string
+        """
+        summary = f"""
+Lee (2009) Treatment Effect Bounds
+=================================
+Treatment Effect Bounds:
+- Lower bound: {self.lower_bound:.4f}
+- Upper bound: {self.upper_bound:.4f}
+- Bound width: {self.upper_bound - self.lower_bound:.4f}
+"""
+        return summary

pyleebounds-0.1.0/pyleebounds.egg-info/PKG-INFO ADDED Viewed

@@ -0,0 +1,196 @@
+Metadata-Version: 2.4
+Name: pyleebounds
+Version: 0.1.0
+Summary: Python package for Lee 2009 treatment effect bounds under sample selection
+Home-page: https://github.com/vyasenov/pyleebounds
+Author: Vasco Yasenov
+Author-email:
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Science/Research
+Classifier: Topic :: Scientific/Engineering :: Information Analysis
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: numpy>=1.20.0
+Requires-Dist: pandas>=1.3.0
+Requires-Dist: matplotlib>=3.3.0
+Requires-Dist: seaborn>=0.11.0
+Requires-Dist: scipy>=1.7.0
+Provides-Extra: dev
+Requires-Dist: pytest>=6.0; extra == "dev"
+Requires-Dist: pytest-cov>=2.0; extra == "dev"
+Requires-Dist: black>=21.0; extra == "dev"
+Requires-Dist: flake8>=3.8; extra == "dev"
+Requires-Dist: mypy>=0.800; extra == "dev"
+Dynamic: author
+Dynamic: classifier
+Dynamic: description
+Dynamic: description-content-type
+Dynamic: home-page
+Dynamic: license-file
+Dynamic: provides-extra
+Dynamic: requires-dist
+Dynamic: requires-python
+Dynamic: summary
+# pyleebounds
+A Python package for estimating treatment effect bounds under sample selection, based on the method of Lee (2009). This approach is especially useful when selection into the observed sample (e.g., post-treatment employment) differs by treatment status and may introduce bias in outcome comparisons.
+## Installation
+You can install the package using pip:
+```bash
+pip install pyleebounds
+````
+## Features
+* Sharp nonparametric bounds on treatment effects under endogenous sample selection
+* Automatically handles non-random attrition or truncation (e.g. only observing outcomes for employed individuals)
+* Bootstrap confidence intervals
+* Seamless integration with Pandas
+## Quick Start
+```python
+import pandas as pd
+import numpy as np
+from pyleebounds import LeeBounds
+# Generate synthetic data
+np.random.seed(1988)
+n = 1000
+# Treatment assignment (random)
+D = np.random.binomial(1, 0.5, n)
+# Potential outcomes (e.g., wages)
+Y0 = np.random.normal(50, 10, n)  # Control potential outcome
+treatment_effect = np.random.normal(5, 3, n)  # Treatment effect
+Y1 = Y0 + treatment_effect  # Treated potential outcome
+Y = D * Y1 + (1 - D) * Y0  # Actual outcome
+# Selection mechanism (e.g., employment)
+# Higher wages and treatment increase employment probability
+employment_prob = 0.3 + 0.4 * (Y > 50) + 0.2 * D
+employment_prob = np.clip(employment_prob, 0, 1)
+S = np.random.binomial(1, employment_prob, n)
+# Create DataFrame
+df = pd.DataFrame({
+    'Y': Y,  # outcome variable
+    'D': D,  # treatment indicator (1 = treated, 0 = control)
+    'S': S   # selection indicator (1 = observed, 0 = missing/selected out)
+})
+# Initialize and fit Lee bounds estimator
+lb = LeeBounds()
+results = lb.fit(df, outcome='Y', treatment='D', selection='S')
+# View summary
+print(results.summary())
+# Plot estimated bounds
+results.plot()
+```
+## Examples
+You can find detailed usage examples in the  `examples/` directory.
+## Background
+### Why Treatment Bounds?
+In many applied settings, outcomes are observed only for a selected subset of the population—e.g., wages are observed only for employed individuals. If treatment affects selection (e.g., job training increases employment), naïvely comparing outcomes may confound treatment effects with selection effects.
+Lee (2009) offers a way to partially identify treatment effects by trimming the treated group's distribution to match the control group’s selection rate under plausible assumptions.
+---
+### Notation
+Let's establish the following notation:
+* $Y$: observed *continuous* outcome
+* $D \in \{0,1\}$: treatment indicator (1 = treated)
+* $S \in \{0,1\}$: selection indicator (1 = observed)
+* $Y(0), Y(1)$: potential outcomes under control/treatment
+* $S(0), S(1)$: potential selection statuses
+* $p_1 = \Pr(S=1 \mid D=1)$, $p_0 = \Pr(S=1 \mid D=0)$: selection rates
+For each unit we observe $\left(D, S, Y\times S \right)$.
+---
+### Assumptions
+1. Monotonicity: Treatment weakly increases the probability of being observed $$S(1)\geq S(0).$$
+2. Exogeneity: Treatment is randomly assigned or unconfounded $$\left(Y(0),Y(1),S(0),S(1)\right) \perp D.$$
+---
+### Main Result
+To adjust for differential selection, Lee (2009) suggested trimming the treated group’s outcome distribution among those with $S=1$. We then compute bounds on the average treatment effect (ATE) for the observed sample as:
+$$
+ATE \in \left[ \underline{\Delta}, \overline{\Delta} \right],
+$$
+where:
+$$
+\underline{\Delta} = \mathbb{E}[Y \mid Y\geq q^{1-\frac{p_0}{p_1}}, D=1, S=1] - \mathbb{E}[Y \mid D=0, S=1]
+$$
+$$
+\overline{\Delta} = \mathbb{E}[Y \mid Y\leq q^{\frac{p_0}{p_1}}, D=1, S=1] - \mathbb{E}[Y \mid D=0, S=1]
+$$
+Here $q^{u}$ represents the $u$th quantile of $Y|D=1,S=1$. These form sharp bounds under the stated assumptions.
+These bounds can be tightened in presence of additional covariates $X$, but this package does not offer that functionality. See also Semenova (2020).
+---
+### Confidence Intervals
+Since the Lee bounds involve non-differentiable operations (quantile trimming), variance formulas are complex. Instead, this package provides bootstrap confidence intervals computed as follows:
+1. Resample units with replacement, stratified by treatment group.
+2. Compute Lee bounds for each bootstrap sample.
+3. Construct percentile intervals using the empirical bootstrap distribution.
+## References
+* Lee, D. S. (2009). *Training, wages, and sample selection: Estimating sharp bounds on treatment effects*. *The Review of Economic Studies*, 76(3), 1071–1102.
+* Semenova, V. (2020). Generalized lee bounds. arXiv preprint arXiv:2008.12720.
+* Tauchmann, H. (2014). Lee (2009) treatment-effect bounds for nonrandom sample selection. The Stata Journal, 14(4), 884-894.
+## License
+This project is licensed under the MIT License – see the [LICENSE](LICENSE) file for details.
+## Citation
+To cite this package in publications, use the following BibTeX entry:
+```bibtex
+@misc{yasenov2025pyleebounds,
+  author       = {Vasco Yasenov},
+  title        = {pyleebounds: Python Tools for Estimating Treatment Effect Bounds under Sample Selection},
+  year         = {2025},
+  howpublished = {\url{https://github.com/vyasenov/pyleebounds}},
+  note         = {Version 0.1.0}
+}
+```

pyleebounds-0.1.0/pyleebounds.egg-info/SOURCES.txt ADDED Viewed

@@ -0,0 +1,11 @@
+LICENSE
+README.md
+setup.py
+pyleebounds/__init__.py
+pyleebounds/lee_bounds.py
+pyleebounds.egg-info/PKG-INFO
+pyleebounds.egg-info/SOURCES.txt
+pyleebounds.egg-info/dependency_links.txt
+pyleebounds.egg-info/not-zip-safe
+pyleebounds.egg-info/requires.txt
+pyleebounds.egg-info/top_level.txt

pyleebounds-0.1.0/pyleebounds.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+

pyleebounds-0.1.0/pyleebounds.egg-info/not-zip-safe ADDED Viewed

	@@ -0,0 +1 @@
1	+

pyleebounds-0.1.0/pyleebounds.egg-info/requires.txt ADDED Viewed

@@ -0,0 +1,12 @@
+numpy>=1.20.0
+pandas>=1.3.0
+matplotlib>=3.3.0
+seaborn>=0.11.0
+scipy>=1.7.0
+[dev]
+pytest>=6.0
+pytest-cov>=2.0
+black>=21.0
+flake8>=3.8
+mypy>=0.800

pyleebounds-0.1.0/pyleebounds.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ pyleebounds

pyleebounds-0.1.0/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0

pyleebounds-0.1.0/setup.py ADDED Viewed

@@ -0,0 +1,65 @@
+"""
+Setup script for pyleebounds package.
+"""
+from setuptools import setup, find_packages
+import os
+def read_readme():
+    """Read README.md file."""
+    try:
+        with open("README.md", "r", encoding="utf-8") as fh:
+            return fh.read()
+    except FileNotFoundError:
+        return "Python package for Lee 2009 treatment effect bounds under sample selection"
+def read_requirements():
+    """Read requirements.txt file."""
+    try:
+        with open("requirements.txt", "r", encoding="utf-8") as fh:
+            return [line.strip() for line in fh if line.strip() and not line.startswith("#")]
+    except FileNotFoundError:
+        # Default requirements if file not found
+        return [
+            "numpy>=1.20.0",
+            "pandas>=1.3.0",
+            "matplotlib>=3.3.0",
+            "seaborn>=0.11.0",
+            "scipy>=1.7.0"
+        ]
+setup(
+    name="pyleebounds",
+    version="0.1.0",
+    author="Vasco Yasenov",
+    author_email="",
+    description="Python package for Lee 2009 treatment effect bounds under sample selection",
+    long_description=read_readme(),
+    long_description_content_type="text/markdown",
+    url="https://github.com/vyasenov/pyleebounds",
+    packages=find_packages(),
+    classifiers=[
+        "Development Status :: 3 - Alpha",
+        "Intended Audience :: Science/Research",
+        "Topic :: Scientific/Engineering :: Information Analysis",
+        "License :: OSI Approved :: MIT License",
+        "Programming Language :: Python :: 3",
+        "Programming Language :: Python :: 3.8",
+        "Programming Language :: Python :: 3.9",
+        "Programming Language :: Python :: 3.10",
+        "Programming Language :: Python :: 3.11",
+    ],
+    python_requires=">=3.8",
+    install_requires=read_requirements(),
+    extras_require={
+        "dev": [
+            "pytest>=6.0",
+            "pytest-cov>=2.0",
+            "black>=21.0",
+            "flake8>=3.8",
+            "mypy>=0.800",
+        ],
+    },
+    include_package_data=True,
+    zip_safe=False,
+)