pycreditools 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,43 @@
1
+ """
2
+ pycreditools: A Python library for credit risk policy simulation and analysis.
3
+ """
4
+
5
+ from ._types import SimulationMethod, ClusteringMethod, Quadrant, StageDirection, PolicySummary
6
+ from .stages import Stage, CutoffStage, FilterStage, RateStage
7
+ from .stress import StressScenario, AggravationStress, MonotonicStress, CustomStress
8
+ from .policy import CreditPolicy
9
+ from .simulation import CreditSimResults, run_simulation
10
+ from .performance import summarize_results, compare_policies
11
+ from .analysis import run_tradeoff_analysis
12
+ from .grouping import find_risk_groups, RiskGroupResult, GroupingRecipe
13
+ from .screening import screen_risk_segments, ScreeningResult, ScreeningRecipe
14
+ from .sample_data import generate_sample_data
15
+
16
+ __all__ = [
17
+ "SimulationMethod",
18
+ "ClusteringMethod",
19
+ "Quadrant",
20
+ "StageDirection",
21
+ "PolicySummary",
22
+ "Stage",
23
+ "CutoffStage",
24
+ "FilterStage",
25
+ "RateStage",
26
+ "StressScenario",
27
+ "AggravationStress",
28
+ "MonotonicStress",
29
+ "CustomStress",
30
+ "CreditPolicy",
31
+ "CreditSimResults",
32
+ "run_simulation",
33
+ "summarize_results",
34
+ "compare_policies",
35
+ "run_tradeoff_analysis",
36
+ "find_risk_groups",
37
+ "RiskGroupResult",
38
+ "GroupingRecipe",
39
+ "screen_risk_segments",
40
+ "ScreeningResult",
41
+ "ScreeningRecipe",
42
+ "generate_sample_data",
43
+ ]
@@ -0,0 +1,5 @@
1
+ from .ward import ward_cluster
2
+ from .iv import iv_cluster
3
+ from .tier_metrics import calculate_tier_metrics
4
+
5
+ __all__ = ["ward_cluster", "iv_cluster", "calculate_tier_metrics"]
@@ -0,0 +1,167 @@
1
+ from __future__ import annotations
2
+ import numpy as np
3
+
4
+ def iv_cluster(
5
+ pd_values: np.ndarray,
6
+ volumes: np.ndarray,
7
+ max_groups: int,
8
+ min_vol_ratio: float,
9
+ lambda_cross: float = 0.5,
10
+ lambda_vol: float = 0.2,
11
+ monthly_vols: np.ndarray | None = None,
12
+ monthly_bads: np.ndarray | None = None,
13
+ ) -> np.ndarray:
14
+ """
15
+ IV-based agglomerative clustering with constraints.
16
+
17
+ Args:
18
+ pd_values: float64[n_bins] - mean PD per bin
19
+ volumes: int64[n_bins] - volume per bin
20
+ max_groups: exact number of output clusters (algorithm will merge down to this)
21
+ min_vol_ratio: min fraction of total volume per cluster
22
+ lambda_cross: penalty weight for vintage crossings
23
+ lambda_vol: penalty weight for PD volatility
24
+ monthly_vols: int64[n_bins, n_months]
25
+ monthly_bads: int64[n_bins, n_months]
26
+
27
+ Returns:
28
+ int64[n_bins] - 1-based group assignments
29
+ """
30
+ n_bins = len(pd_values)
31
+ if n_bins == 0:
32
+ return np.array([], dtype=np.int64)
33
+ if n_bins <= max_groups and (volumes == 0).sum() == 0:
34
+ # Check if all other constraints hold? Actually if we just want to force merges
35
+ # when constraints are violated, we should still run the loop.
36
+ pass
37
+
38
+ active = np.ones(n_bins, dtype=bool)
39
+ current_vol = volumes.copy().astype(np.float64)
40
+ current_bads = (pd_values * current_vol).astype(np.float64)
41
+
42
+ total_vol = current_vol.sum()
43
+ total_bads = current_bads.sum()
44
+ total_goods = total_vol - total_bads
45
+
46
+ if monthly_vols is not None and monthly_bads is not None:
47
+ curr_m_vols = monthly_vols.copy().astype(np.float64)
48
+ curr_m_bads = monthly_bads.copy().astype(np.float64)
49
+ else:
50
+ curr_m_vols = None
51
+ curr_m_bads = None
52
+
53
+ group_ids = np.arange(n_bins)
54
+ n_active = n_bins
55
+
56
+ def calc_iv(bads, vols):
57
+ if total_goods <= 0 or total_bads <= 0:
58
+ return 0.0
59
+ goods = vols - bads
60
+ p_b = bads / total_bads
61
+ p_g = goods / total_goods
62
+ if p_b <= 0 or p_g <= 0:
63
+ return 0.0
64
+ return (p_g - p_b) * np.log(p_g / p_b)
65
+
66
+ while True:
67
+ if n_active <= 1:
68
+ break
69
+
70
+ active_indices = np.where(active)[0]
71
+ n_curr = len(active_indices)
72
+
73
+ min_cost = np.inf
74
+ best_merge_idx = -1
75
+
76
+ for i in range(n_curr - 1):
77
+ idx1 = active_indices[i]
78
+ idx2 = active_indices[i+1]
79
+
80
+ v1 = current_vol[idx1]
81
+ v2 = current_vol[idx2]
82
+ b1 = current_bads[idx1]
83
+ b2 = current_bads[idx2]
84
+
85
+ p1 = b1 / v1 if v1 > 0 else 0.0
86
+ p2 = b2 / v2 if v2 > 0 else 0.0
87
+
88
+ # Hard skip for monotonicity violation unless it's a forced merge
89
+ # Monotonicity violation: p1 >= p2
90
+ violation = (p1 >= p2) and (v1 > 0) and (v2 > 0)
91
+
92
+ # Force merges if volume is 0
93
+ if v1 == 0 or v2 == 0:
94
+ cost = -1e9
95
+ else:
96
+ if violation:
97
+ cost = -1e6 # prioritize fixing monotonicity over normal merges
98
+ else:
99
+ # Calculate IV loss
100
+ iv1 = calc_iv(b1, v1)
101
+ iv2 = calc_iv(b2, v2)
102
+ iv_merged = calc_iv(b1 + b2, v1 + v2)
103
+ iv_loss = iv1 + iv2 - iv_merged
104
+
105
+ cross_penalty = 0.0
106
+ volatility_penalty = 0.0
107
+
108
+ if curr_m_vols is not None and curr_m_bads is not None:
109
+ mv = curr_m_vols[idx1] + curr_m_vols[idx2]
110
+ mb = curr_m_bads[idx1] + curr_m_bads[idx2]
111
+
112
+ valid = mv > 0
113
+ if valid.any():
114
+ mp = mb[valid] / mv[valid]
115
+ volatility_penalty = np.std(mp)
116
+
117
+ # crossings between new merged group and neighbors?
118
+ # To simplify, the C++ IV clustering engine penalizes crossings
119
+ # *within* the merged group (i.e. did the two groups cross each other?)
120
+ mv1 = curr_m_vols[idx1]
121
+ mv2 = curr_m_vols[idx2]
122
+ mb1 = curr_m_bads[idx1]
123
+ mb2 = curr_m_bads[idx2]
124
+ v_valid = (mv1 > 0) & (mv2 > 0)
125
+ if v_valid.any():
126
+ mp1 = mb1[v_valid] / mv1[v_valid]
127
+ mp2 = mb2[v_valid] / mv2[v_valid]
128
+ crossings = np.sum(mp1 >= mp2)
129
+ cross_penalty = crossings
130
+
131
+ cost = iv_loss + lambda_cross * cross_penalty + lambda_vol * volatility_penalty
132
+
133
+ # Force merge if volume below threshold
134
+ if (v1 / total_vol < min_vol_ratio) or (v2 / total_vol < min_vol_ratio):
135
+ cost -= 1000.0 # arbitrary large priority but less than monotonicity
136
+
137
+ if cost < min_cost:
138
+ min_cost = cost
139
+ best_merge_idx = i
140
+
141
+ # Stopping condition
142
+ # If no forced merges are required AND we reached max_groups, stop.
143
+ # Forced merges have cost < -100
144
+ if min_cost >= -100 and n_active <= max_groups:
145
+ break
146
+
147
+ # Execute merge
148
+ idx1 = active_indices[best_merge_idx]
149
+ idx2 = active_indices[best_merge_idx + 1]
150
+
151
+ current_vol[idx1] += current_vol[idx2]
152
+ current_bads[idx1] += current_bads[idx2]
153
+
154
+ if curr_m_vols is not None and curr_m_bads is not None:
155
+ curr_m_vols[idx1] += curr_m_vols[idx2]
156
+ curr_m_bads[idx1] += curr_m_bads[idx2]
157
+
158
+ active[idx2] = False
159
+ group_ids[group_ids == idx2] = idx1
160
+ n_active -= 1
161
+
162
+ # Remap to 1-based sequential integers
163
+ active_indices = np.where(active)[0]
164
+ final_mapping = {old_idx: new_idx for new_idx, old_idx in enumerate(active_indices, 1)}
165
+
166
+ result = np.array([final_mapping[g] for g in group_ids], dtype=np.int64)
167
+ return result
@@ -0,0 +1,103 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+
4
+ def calculate_tier_metrics(
5
+ values: np.ndarray,
6
+ groups: np.ndarray,
7
+ defaults: np.ndarray,
8
+ n_bins: int,
9
+ ) -> pd.DataFrame:
10
+ """
11
+ Fast screening metrics calculating IV and PD spread for a variable across risk groups.
12
+
13
+ Args:
14
+ values: float64[n_obs] - candidate variable values
15
+ groups: int64[n_obs] - risk group assignments
16
+ defaults: int64[n_obs] - default flags (0/1)
17
+ n_bins: number of quantile bins per group
18
+
19
+ Returns:
20
+ DataFrame with columns: risk_group, iv, pd_min, pd_max, pd_spread, tier_vol
21
+ """
22
+ unique_groups = np.unique(groups)
23
+ results = []
24
+
25
+ # We should exclude NaNs from calculations
26
+ valid_mask = ~np.isnan(values) & ~np.isnan(defaults) & ~np.isnan(groups)
27
+ values = values[valid_mask]
28
+ groups = groups[valid_mask].astype(np.int64)
29
+ defaults = defaults[valid_mask].astype(np.int64)
30
+
31
+ for g in unique_groups:
32
+ if np.isnan(g):
33
+ continue
34
+
35
+ g_mask = (groups == g)
36
+ g_values = values[g_mask]
37
+ g_defaults = defaults[g_mask]
38
+ tier_vol = len(g_values)
39
+
40
+ if tier_vol == 0:
41
+ results.append({
42
+ "risk_group": g,
43
+ "iv": 0.0,
44
+ "pd_min": np.nan,
45
+ "pd_max": np.nan,
46
+ "pd_spread": 0.0,
47
+ "tier_vol": 0
48
+ })
49
+ continue
50
+
51
+ total_bads = g_defaults.sum()
52
+ total_goods = tier_vol - total_bads
53
+
54
+ # Sort by value to bin
55
+ sort_idx = np.argsort(g_values)
56
+ sorted_defaults = g_defaults[sort_idx]
57
+
58
+ # Rank-based binning (similar to pd.qcut with duplicates handled implicitly by position)
59
+ bin_assignments = (np.arange(tier_vol) * n_bins) // tier_vol
60
+
61
+ iv_sum = 0.0
62
+ pd_list = []
63
+
64
+ for b in range(n_bins):
65
+ b_mask = (bin_assignments == b)
66
+ b_vol = b_mask.sum()
67
+
68
+ if b_vol == 0:
69
+ continue
70
+
71
+ b_bads = sorted_defaults[b_mask].sum()
72
+ b_goods = b_vol - b_bads
73
+
74
+ pd_list.append(b_bads / b_vol)
75
+
76
+ # Laplace smoothing for IV
77
+ p_b = (b_bads + 0.5) / (total_bads + 1.0)
78
+ p_g = (b_goods + 0.5) / (total_goods + 1.0)
79
+
80
+ iv_sum += (p_g - p_b) * np.log(p_g / p_b)
81
+
82
+ if len(pd_list) > 0:
83
+ pd_min = min(pd_list)
84
+ pd_max = max(pd_list)
85
+ pd_spread = pd_max - pd_min
86
+ else:
87
+ pd_min = np.nan
88
+ pd_max = np.nan
89
+ pd_spread = 0.0
90
+
91
+ results.append({
92
+ "risk_group": g,
93
+ "iv": iv_sum,
94
+ "pd_min": pd_min,
95
+ "pd_max": pd_max,
96
+ "pd_spread": pd_spread,
97
+ "tier_vol": tier_vol
98
+ })
99
+
100
+ if not results:
101
+ return pd.DataFrame(columns=["risk_group", "iv", "pd_min", "pd_max", "pd_spread", "tier_vol"])
102
+
103
+ return pd.DataFrame(results)
@@ -0,0 +1,155 @@
1
+ from __future__ import annotations
2
+ import numpy as np
3
+
4
+ def ward_cluster(
5
+ pd_values: np.ndarray,
6
+ volumes: np.ndarray,
7
+ max_groups: int,
8
+ min_vol_ratio: float,
9
+ max_crossings: int,
10
+ use_volume_weights: bool = True,
11
+ monthly_vols: np.ndarray | None = None,
12
+ monthly_bads: np.ndarray | None = None,
13
+ ) -> np.ndarray:
14
+ """
15
+ Ward agglomerative clustering with credit-risk constraints.
16
+
17
+ Args:
18
+ pd_values: float64[n_bins] - mean PD per bin
19
+ volumes: int64[n_bins] - volume per bin
20
+ max_groups: max number of output clusters
21
+ min_vol_ratio: min fraction of total volume per cluster
22
+ max_crossings: max vintage inversions between adjacent groups
23
+ use_volume_weights: if False, performs pure distance-based linkage
24
+ monthly_vols: int64[n_bins, n_months]
25
+ monthly_bads: int64[n_bins, n_months]
26
+
27
+ Returns:
28
+ int64[n_bins] - 1-based group assignments
29
+ """
30
+ n_bins = len(pd_values)
31
+ if n_bins == 0:
32
+ return np.array([], dtype=np.int64)
33
+ if n_bins <= max_groups and (volumes == 0).sum() == 0:
34
+ # Check if all other constraints hold? Actually if we just want to force merges
35
+ # when constraints are violated, we should still run the loop.
36
+ pass
37
+
38
+ # State vectors
39
+ # We maintain active groups in a linked list structure to allow O(1) merges,
40
+ # or just use masking since n_bins is typically small (e.g. 100-1000).
41
+ # Since this is pure numpy for small N, masking and array recreation is fine.
42
+
43
+ active = np.ones(n_bins, dtype=bool)
44
+ current_pd = pd_values.copy().astype(np.float64)
45
+ current_vol = volumes.copy().astype(np.float64)
46
+ total_vol = current_vol.sum()
47
+
48
+ if monthly_vols is not None and monthly_bads is not None:
49
+ curr_m_vols = monthly_vols.copy().astype(np.float64)
50
+ curr_m_bads = monthly_bads.copy().astype(np.float64)
51
+ else:
52
+ curr_m_vols = None
53
+ curr_m_bads = None
54
+
55
+ # group_ids tracks which original bins belong to which current cluster index.
56
+ # initially bin i belongs to cluster i
57
+ group_ids = np.arange(n_bins)
58
+
59
+ n_active = n_bins
60
+
61
+ while True:
62
+ if n_active <= 1:
63
+ break
64
+
65
+ active_indices = np.where(active)[0]
66
+ n_curr = len(active_indices)
67
+
68
+ min_cost = np.inf
69
+ best_merge_idx = -1 # index in active_indices of the left group
70
+
71
+ for i in range(n_curr - 1):
72
+ idx1 = active_indices[i]
73
+ idx2 = active_indices[i+1]
74
+
75
+ v1 = current_vol[idx1]
76
+ v2 = current_vol[idx2]
77
+ p1 = current_pd[idx1]
78
+ p2 = current_pd[idx2]
79
+
80
+ # Linkage distance
81
+ if use_volume_weights:
82
+ if v1 + v2 == 0:
83
+ delta = 0.0
84
+ else:
85
+ delta = (v1 * v2) / (v1 + v2) * (p1 - p2)**2
86
+ else:
87
+ delta = (p1 - p2)**2
88
+
89
+ cost = delta
90
+
91
+ # Priority 0: Zero volume
92
+ if v1 == 0 or v2 == 0:
93
+ cost = -2e9 + delta
94
+ # Priority 1: Monotonicity violation (p1 >= p2)
95
+ elif p1 >= p2:
96
+ cost = -1e9 + delta
97
+ # Priority 2: Volume below min_vol_ratio
98
+ elif (v1 / total_vol) < min_vol_ratio or (v2 / total_vol) < min_vol_ratio:
99
+ cost = -1e6 + delta
100
+ else:
101
+ # Priority 3: Crossings
102
+ if curr_m_vols is not None and curr_m_bads is not None:
103
+ mv1 = curr_m_vols[idx1]
104
+ mv2 = curr_m_vols[idx2]
105
+ mb1 = curr_m_bads[idx1]
106
+ mb2 = curr_m_bads[idx2]
107
+
108
+ # Compute monthly PDs, ignoring months with zero volume in either group
109
+ valid_months = (mv1 > 0) & (mv2 > 0)
110
+ if valid_months.any():
111
+ mp1 = mb1[valid_months] / mv1[valid_months]
112
+ mp2 = mb2[valid_months] / mv2[valid_months]
113
+ crossings = np.sum(mp1 >= mp2)
114
+
115
+ if crossings > max_crossings:
116
+ cost = -1e3 + delta
117
+
118
+
119
+ if cost < min_cost:
120
+ min_cost = cost
121
+ best_merge_idx = i
122
+
123
+ # Stopping condition: if no constraint violated AND n_active <= max_groups
124
+ if min_cost >= 0 and n_active <= max_groups:
125
+ break
126
+
127
+ # Execute merge
128
+ idx1 = active_indices[best_merge_idx]
129
+ idx2 = active_indices[best_merge_idx + 1]
130
+
131
+ # Merge idx2 into idx1
132
+ v1 = current_vol[idx1]
133
+ v2 = current_vol[idx2]
134
+
135
+ if v1 + v2 > 0:
136
+ current_pd[idx1] = (current_pd[idx1] * v1 + current_pd[idx2] * v2) / (v1 + v2)
137
+ else:
138
+ current_pd[idx1] = 0.0
139
+
140
+ current_vol[idx1] = v1 + v2
141
+
142
+ if curr_m_vols is not None and curr_m_bads is not None:
143
+ curr_m_vols[idx1] += curr_m_vols[idx2]
144
+ curr_m_bads[idx1] += curr_m_bads[idx2]
145
+
146
+ active[idx2] = False
147
+ group_ids[group_ids == idx2] = idx1
148
+ n_active -= 1
149
+
150
+ # Remap active groups to 1-based sequential integers
151
+ active_indices = np.where(active)[0]
152
+ final_mapping = {old_idx: new_idx for new_idx, old_idx in enumerate(active_indices, 1)}
153
+
154
+ result = np.array([final_mapping[g] for g in group_ids], dtype=np.int64)
155
+ return result
@@ -0,0 +1,32 @@
1
+ import pandas as pd
2
+ from typing import Callable, Iterable, Any, Optional
3
+
4
+ def parallel_map(
5
+ fn: Callable[[Any], Any],
6
+ items: Iterable[Any],
7
+ parallel: bool = False,
8
+ n_workers: Optional[int] = None,
9
+ desc: Optional[str] = None
10
+ ) -> list[Any]:
11
+ """Map fn over items, optionally in parallel via concurrent.futures."""
12
+ if not parallel:
13
+ return [fn(item) for item in items]
14
+
15
+ import concurrent.futures
16
+ with concurrent.futures.ProcessPoolExecutor(max_workers=n_workers) as executor:
17
+ results = list(executor.map(fn, items))
18
+
19
+ return results
20
+
21
+ def parallel_map_df(
22
+ fn: Callable[[Any], pd.DataFrame],
23
+ items: Iterable[Any],
24
+ parallel: bool = False,
25
+ n_workers: Optional[int] = None,
26
+ desc: Optional[str] = None
27
+ ) -> pd.DataFrame:
28
+ """Map fn over items and concat results into DataFrame."""
29
+ results = parallel_map(fn, items, parallel, n_workers, desc)
30
+ if not results:
31
+ return pd.DataFrame()
32
+ return pd.concat(results, ignore_index=True)
pycreditools/_types.py ADDED
@@ -0,0 +1,28 @@
1
+ from enum import Enum
2
+ from typing import TypedDict
3
+
4
+ class SimulationMethod(str, Enum):
5
+ ANALYTICAL = "analytical"
6
+ STOCHASTIC = "stochastic"
7
+
8
+ class ClusteringMethod(str, Enum):
9
+ WARD = "ward"
10
+ IV = "iv"
11
+
12
+ class Quadrant(str, Enum):
13
+ KEEP_IN = "keep_in"
14
+ SWAP_IN = "swap_in"
15
+ SWAP_OUT = "swap_out"
16
+ KEEP_OUT = "keep_out"
17
+
18
+ class StageDirection(str, Enum):
19
+ GTE = "gte"
20
+ LTE = "lte"
21
+
22
+ class PolicySummary(TypedDict):
23
+ """Schema for simulation summary outputs."""
24
+ scenario: str
25
+ applicants: int
26
+ approved: float
27
+ hired: float
28
+ bad_rate: float
@@ -0,0 +1,96 @@
1
+ import pandas as pd
2
+ import itertools
3
+ from typing import Any
4
+ import copy
5
+
6
+ from .policy import CreditPolicy
7
+ from .stages import CutoffStage, RateStage, FilterStage
8
+ from .stress import AggravationStress
9
+ from .simulation import run_simulation, SimulationMethod
10
+
11
+ def run_tradeoff_analysis(
12
+ data: pd.DataFrame,
13
+ base_policy: CreditPolicy,
14
+ vary_params: dict[str, list[Any]],
15
+ parallel: bool = False,
16
+ ) -> pd.DataFrame:
17
+ """Run a trade-off analysis simulation over a grid of parameters.
18
+
19
+ Args:
20
+ data: Applicant data.
21
+ base_policy: The template policy.
22
+ vary_params: Dictionary mapping parameter names to lists of values.
23
+ parallel: Whether to run in parallel using concurrent.futures.
24
+
25
+ Returns:
26
+ DataFrame containing results.
27
+ """
28
+ keys = list(vary_params.keys())
29
+ values = list(vary_params.values())
30
+
31
+ # Create parameter grid
32
+ grid = [dict(zip(keys, v)) for v in itertools.product(*values)]
33
+
34
+ def _run_single(params: dict[str, Any]) -> dict[str, Any]:
35
+ temp_policy = copy.deepcopy(base_policy)
36
+
37
+ # 1. Handle Cutoffs
38
+ cutoff_params = {k: v for k, v in params.items() if k.endswith("_cutoff")}
39
+ if cutoff_params:
40
+ actual_cutoffs = {}
41
+ for k, v in cutoff_params.items():
42
+ col_name = k.replace("_cutoff", "")
43
+ if col_name in data.columns:
44
+ actual_cutoffs[col_name] = v
45
+
46
+ if actual_cutoffs:
47
+ temp_policy = temp_policy.add_stage(
48
+ CutoffStage(name="dynamic_cutoffs", cutoffs=actual_cutoffs)
49
+ )
50
+
51
+ # 2. Handle Aggravation Factor
52
+ if "aggravation_factor" in params:
53
+ agg_stress = AggravationStress(factor=params["aggravation_factor"])
54
+ # Replace stress scenarios
55
+ import dataclasses
56
+ temp_policy = dataclasses.replace(temp_policy, stress_scenarios=(agg_stress,))
57
+
58
+ # 3. Handle Dynamic Base Rates
59
+ base_rate_params = {k: v for k, v in params.items() if k.endswith("_base_rate")}
60
+ if base_rate_params:
61
+ stages_list = list(temp_policy.stages)
62
+ for k, v in base_rate_params.items():
63
+ stage_name = k.replace("_base_rate", "")
64
+ for i, stage in enumerate(stages_list):
65
+ if stage.name == stage_name and isinstance(stage, RateStage):
66
+ stages_list[i] = RateStage(name=stage.name, base_rate=v, variable=stage.variable)
67
+
68
+ import dataclasses
69
+ temp_policy = dataclasses.replace(temp_policy, stages=tuple(stages_list))
70
+
71
+ # Run simulation
72
+ sim_results = run_simulation(data, temp_policy, method=SimulationMethod.ANALYTICAL)
73
+ final_data = sim_results.data
74
+
75
+ app_sum = final_data["new_approval"].sum()
76
+ total = len(final_data)
77
+ approval_rate = app_sum / total if total > 0 else 0.0
78
+
79
+ if app_sum > 0:
80
+ bad_rate = (final_data["simulated_default"] * final_data["new_approval"]).sum() / app_sum
81
+ else:
82
+ bad_rate = 0.0
83
+
84
+ result = dict(params)
85
+ result["approval_rate"] = approval_rate
86
+ result["default_rate"] = bad_rate
87
+ return result
88
+
89
+ if parallel:
90
+ import concurrent.futures
91
+ with concurrent.futures.ProcessPoolExecutor() as executor:
92
+ results = list(executor.map(_run_single, grid))
93
+ else:
94
+ results = [_run_single(p) for p in grid]
95
+
96
+ return pd.DataFrame(results)