PyPI - pycreditools - Versions diffs - 0.1.0__tar.gz - Mend

pycreditools 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

pycreditools-0.1.0/.github/workflows/workflow.yaml +34 -0
pycreditools-0.1.0/.gitignore +38 -0
pycreditools-0.1.0/LICENSE +21 -0
pycreditools-0.1.0/PKG-INFO +155 -0
pycreditools-0.1.0/README.md +113 -0
pycreditools-0.1.0/pyproject.toml +75 -0
pycreditools-0.1.0/src/pycreditools/__init__.py +43 -0
pycreditools-0.1.0/src/pycreditools/_kernels/__init__.py +5 -0
pycreditools-0.1.0/src/pycreditools/_kernels/iv.py +167 -0
pycreditools-0.1.0/src/pycreditools/_kernels/tier_metrics.py +103 -0
pycreditools-0.1.0/src/pycreditools/_kernels/ward.py +155 -0
pycreditools-0.1.0/src/pycreditools/_parallel.py +32 -0
pycreditools-0.1.0/src/pycreditools/_types.py +28 -0
pycreditools-0.1.0/src/pycreditools/analysis.py +96 -0
pycreditools-0.1.0/src/pycreditools/grouping.py +222 -0
pycreditools-0.1.0/src/pycreditools/performance.py +141 -0
pycreditools-0.1.0/src/pycreditools/policy.py +133 -0
pycreditools-0.1.0/src/pycreditools/py.typed +1 -0
pycreditools-0.1.0/src/pycreditools/sample_data.py +98 -0
pycreditools-0.1.0/src/pycreditools/screening.py +224 -0
pycreditools-0.1.0/src/pycreditools/simulation.py +185 -0
pycreditools-0.1.0/src/pycreditools/stages.py +175 -0
pycreditools-0.1.0/src/pycreditools/stress.py +119 -0
pycreditools-0.1.0/test_script.py +24 -0
pycreditools-0.1.0/tests/test_grouping.py +36 -0

pycreditools-0.1.0/.github/workflows/workflow.yaml ADDED Viewed

@@ -0,0 +1,34 @@
+name: Publish to PyPI
+on:
+  release:
+    types: [published]
+  workflow_dispatch:
+jobs:
+  pypi-publish:
+    name: Build and publish to PyPI
+    runs-on: ubuntu-latest
+    environment:
+      name: pypi
+      url: https://pypi.org/p/pycreditools
+    permissions:
+      id-token: write  # IMPORTANT: this permission is mandatory for trusted publishing
+      contents: read
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
+      - name: Install pypa/build
+        run: python -m pip install build
+      - name: Build a binary wheel and a source tarball
+        run: python -m build
+      - name: Publish package distributions to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1

pycreditools-0.1.0/.gitignore ADDED Viewed

@@ -0,0 +1,38 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+# Pytest
+.pytest_cache/
+# Virtual Environments
+venv/
+.venv/
+env/
+ENV/
+env.bak/
+venv.bak/
+# IDE
+.idea/
+.vscode/
+*.swp

pycreditools-0.1.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 Matheus Pasche
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

pycreditools-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,155 @@
+Metadata-Version: 2.4
+Name: pycreditools
+Version: 0.1.0
+Summary: Credit Risk Simulation and Policy Optimization — Python edition of creditools
+Project-URL: Homepage, https://github.com/matheuspasche/pycreditools
+Project-URL: Repository, https://github.com/matheuspasche/pycreditools
+Project-URL: Issues, https://github.com/matheuspasche/pycreditools/issues
+Author-email: Matheus Pasche <matheuspasche@outlook.com>
+License: MIT
+License-File: LICENSE
+Keywords: credit-risk,credit-scoring,information-value,policy-optimization,risk-management,simulation,ward-clustering
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Financial and Insurance Industry
+Classifier: Intended Audience :: Science/Research
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Topic :: Office/Business :: Financial
+Classifier: Topic :: Scientific/Engineering :: Information Analysis
+Classifier: Typing :: Typed
+Requires-Python: >=3.10
+Requires-Dist: numpy>=1.24
+Requires-Dist: pandas>=2.0
+Provides-Extra: accel
+Requires-Dist: numba>=0.58; extra == 'accel'
+Provides-Extra: all
+Requires-Dist: joblib>=1.3; extra == 'all'
+Requires-Dist: numba>=0.58; extra == 'all'
+Requires-Dist: plotly>=5.0; extra == 'all'
+Provides-Extra: dev
+Requires-Dist: pytest-cov>=4.0; extra == 'dev'
+Requires-Dist: pytest>=7.0; extra == 'dev'
+Requires-Dist: ruff>=0.4; extra == 'dev'
+Provides-Extra: parallel
+Requires-Dist: joblib>=1.3; extra == 'parallel'
+Provides-Extra: viz
+Requires-Dist: plotly>=5.0; extra == 'viz'
+Description-Content-Type: text/markdown
+<div align="center">
+  <h1>📊 PyCrediTools</h1>
+  <p><i>Credit Risk Simulation and Policy Optimization for Python</i></p>
+  [![Python](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://www.python.org/downloads/)
+  [![License](https://img.shields.io/badge/license-MIT-green.svg)](https://opensource.org/licenses/MIT)
+  [![Status](https://img.shields.io/badge/status-alpha-orange.svg)]()
+</div>
+---
+**PyCrediTools** é uma biblioteca de ponta projetada para equipes de Risco de Crédito. Traduzida e evoluída a partir do pacote fundacional em R, ela fornece motores computacionais para **Simulação de Funis de Crédito (Trade-off de Políticas)** e **Agrupamento Autónomo de Risco (Risk Clustering)**.
+Esqueça as aproximações por tentativas e erros. Com o PyCrediTools, você pode testar cortes de score contra taxas de aprovação, encontrar a política que maximiza a receita mantendo a inadimplência dentro do apetite ao risco, e recriar dinamicamente as faixas de Rating de forma matematicamente ótima.
+---
+## 🚀 Instalação
+Atualmente em fase final de testes, o pacote pode ser instalado diretamente do GitHub:
+```bash
+pip install git+https://github.com/matheuspasche/pycreditools.git
+```
+*(Em breve estará disponível no PyPI via `pip install pycreditools`)*
+---
+## 💡 Core Features
+- **Credit Policy Simulation**: Monte estágios rigorosos (Filtros duros, Regras de Corte de Score, Probabilidades Variáveis) e estresse a carteira sob diferentes condições económicas (agravamentos macro, declínios monotónicos).
+- **Automated Risk Clustering**: Agrupe milhares de combinações de scores numa arquitetura compacta de "Ratings de Risco". O algoritmo respeita limitações de negócio rigorosas (Tolerância a inversão de safra, Exigência Mínima de Volume).
+- **Distance Linkage Engine**: (Novo!) Uma evolução do algoritmo Ward tradicional que prioriza a simetria orgânica e o distanciamento da probabilidade de inadimplência em vez da densidade volumétrica da carteira.
+---
+## 📖 Quickstart (Exemplo de Uso)
+O uso típico envolve duas fases: Simular a política para gerar a "População Aprovada", e depois agrupar essa população em Ratings estruturais.
+### 1. Simulação do Funil
+```python
+import pandas as pd
+from pycreditools.policy import CreditPolicy
+from pycreditools.stages import CutoffStage
+from pycreditools.simulation import simulate_policy
+# Carregar Dados (O seu histórico de propostas e performance real)
+df = pd.read_csv("minha_base.csv")
+# Criar a Política
+policy = (
+    CreditPolicy(score_cols=["meu_score_novo"], actual_default_col="inadimplencia")
+    .add_stage(CutoffStage("Aprovacao_Score", cutoffs={"meu_score_novo": 650}))
+)
+# Simular aprovação
+df_simulado = simulate_policy(df, policy)
+df_aprovados = df_simulado[df_simulado["_approved"]]
+```
+### 2. Agrupamento Ótimo de Risco (Clustering)
+Vamos pedir ao motor que encontre o número **ótimo** de curvas de risco (até um máximo de 5 grupos), garantindo que **nunca se cruzam no tempo** (`max_crossings=0`).
+```python
+from pycreditools.grouping import find_risk_groups
+clustering = find_risk_groups(
+    df_aprovados,
+    score_cols="meu_score_novo",
+    default_col="inadimplencia",
+    time_col="safra_mes",       # Para matriz de temporalidade
+    bins=20,                    # Granularidade da pesquisa
+    max_groups=5,               # Teto Máximo
+    method="distance",          # Heurística pura de Distância
+    max_crossings=0,            # Tolerância Zero a inversão de curvas
+    min_vol_ratio=0.05          # Cada Rating deve ter >5% do volume
+)
+# Aplicar o modelo (nova coluna "risk_rating" gerada)
+df_final = clustering.predict(df_aprovados)
+print(f"O algoritmo agrupou os scores em {clustering.n_groups} Ratings de Risco Perfeitos.")
+print(df_final.groupby("risk_rating")["inadimplencia"].mean())
+```
+---
+## 🧠 Algoritmos de Agrupamento
+Ao invocar o `find_risk_groups`, o motor aceita dois métodos principais (`method="ward"` ou `method="distance"`):
+### Ward Method Tradicional (`method="ward"`)
+Pesquisa aglomerativa que funde micro-faixas usando o critério de variância espacial (Ward). Este método tende a produzir faixas de risco **igualmente densas** em termos de volume (Ratings com 20% do volume cada, mesmo que o risco não esteja bem distribuído).
+### Distance Linkage Autónomo (`method="distance"`)
+Um critério de custo inovador que ignora o volume na hora de medir as pontes matemáticas, penalizando unicamente o `(Risco 1 - Risco 2)^2`. A consequência brilhante disto é que os Ratings finais ficam distribuídos pelas faixas de probabilidade com **distâncias perfeitamente equidistantes**, independentemente se um Rating ficar com 30% da carteira e outro com 8%. Ideal para mapas visuais limpos e estabilidade de risco orgânica.
+---
+## 🛠️ Contribuir e Desenvolver
+Para correr a suite de testes e submeter pull requests:
+```bash
+git clone https://github.com/matheuspasche/pycreditools.git
+cd pycreditools
+pip install -e .[dev]
+pytest tests/
+```
+## 📜 Licença
+Distribuído sob licença MIT. Desenvolvido para a engenharia financeira moderna.

pycreditools-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,113 @@
+<div align="center">
+  <h1>📊 PyCrediTools</h1>
+  <p><i>Credit Risk Simulation and Policy Optimization for Python</i></p>
+  [![Python](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://www.python.org/downloads/)
+  [![License](https://img.shields.io/badge/license-MIT-green.svg)](https://opensource.org/licenses/MIT)
+  [![Status](https://img.shields.io/badge/status-alpha-orange.svg)]()
+</div>
+---
+**PyCrediTools** é uma biblioteca de ponta projetada para equipes de Risco de Crédito. Traduzida e evoluída a partir do pacote fundacional em R, ela fornece motores computacionais para **Simulação de Funis de Crédito (Trade-off de Políticas)** e **Agrupamento Autónomo de Risco (Risk Clustering)**.
+Esqueça as aproximações por tentativas e erros. Com o PyCrediTools, você pode testar cortes de score contra taxas de aprovação, encontrar a política que maximiza a receita mantendo a inadimplência dentro do apetite ao risco, e recriar dinamicamente as faixas de Rating de forma matematicamente ótima.
+---
+## 🚀 Instalação
+Atualmente em fase final de testes, o pacote pode ser instalado diretamente do GitHub:
+```bash
+pip install git+https://github.com/matheuspasche/pycreditools.git
+```
+*(Em breve estará disponível no PyPI via `pip install pycreditools`)*
+---
+## 💡 Core Features
+- **Credit Policy Simulation**: Monte estágios rigorosos (Filtros duros, Regras de Corte de Score, Probabilidades Variáveis) e estresse a carteira sob diferentes condições económicas (agravamentos macro, declínios monotónicos).
+- **Automated Risk Clustering**: Agrupe milhares de combinações de scores numa arquitetura compacta de "Ratings de Risco". O algoritmo respeita limitações de negócio rigorosas (Tolerância a inversão de safra, Exigência Mínima de Volume).
+- **Distance Linkage Engine**: (Novo!) Uma evolução do algoritmo Ward tradicional que prioriza a simetria orgânica e o distanciamento da probabilidade de inadimplência em vez da densidade volumétrica da carteira.
+---
+## 📖 Quickstart (Exemplo de Uso)
+O uso típico envolve duas fases: Simular a política para gerar a "População Aprovada", e depois agrupar essa população em Ratings estruturais.
+### 1. Simulação do Funil
+```python
+import pandas as pd
+from pycreditools.policy import CreditPolicy
+from pycreditools.stages import CutoffStage
+from pycreditools.simulation import simulate_policy
+# Carregar Dados (O seu histórico de propostas e performance real)
+df = pd.read_csv("minha_base.csv")
+# Criar a Política
+policy = (
+    CreditPolicy(score_cols=["meu_score_novo"], actual_default_col="inadimplencia")
+    .add_stage(CutoffStage("Aprovacao_Score", cutoffs={"meu_score_novo": 650}))
+)
+# Simular aprovação
+df_simulado = simulate_policy(df, policy)
+df_aprovados = df_simulado[df_simulado["_approved"]]
+```
+### 2. Agrupamento Ótimo de Risco (Clustering)
+Vamos pedir ao motor que encontre o número **ótimo** de curvas de risco (até um máximo de 5 grupos), garantindo que **nunca se cruzam no tempo** (`max_crossings=0`).
+```python
+from pycreditools.grouping import find_risk_groups
+clustering = find_risk_groups(
+    df_aprovados,
+    score_cols="meu_score_novo",
+    default_col="inadimplencia",
+    time_col="safra_mes",       # Para matriz de temporalidade
+    bins=20,                    # Granularidade da pesquisa
+    max_groups=5,               # Teto Máximo
+    method="distance",          # Heurística pura de Distância
+    max_crossings=0,            # Tolerância Zero a inversão de curvas
+    min_vol_ratio=0.05          # Cada Rating deve ter >5% do volume
+)
+# Aplicar o modelo (nova coluna "risk_rating" gerada)
+df_final = clustering.predict(df_aprovados)
+print(f"O algoritmo agrupou os scores em {clustering.n_groups} Ratings de Risco Perfeitos.")
+print(df_final.groupby("risk_rating")["inadimplencia"].mean())
+```
+---
+## 🧠 Algoritmos de Agrupamento
+Ao invocar o `find_risk_groups`, o motor aceita dois métodos principais (`method="ward"` ou `method="distance"`):
+### Ward Method Tradicional (`method="ward"`)
+Pesquisa aglomerativa que funde micro-faixas usando o critério de variância espacial (Ward). Este método tende a produzir faixas de risco **igualmente densas** em termos de volume (Ratings com 20% do volume cada, mesmo que o risco não esteja bem distribuído).
+### Distance Linkage Autónomo (`method="distance"`)
+Um critério de custo inovador que ignora o volume na hora de medir as pontes matemáticas, penalizando unicamente o `(Risco 1 - Risco 2)^2`. A consequência brilhante disto é que os Ratings finais ficam distribuídos pelas faixas de probabilidade com **distâncias perfeitamente equidistantes**, independentemente se um Rating ficar com 30% da carteira e outro com 8%. Ideal para mapas visuais limpos e estabilidade de risco orgânica.
+---
+## 🛠️ Contribuir e Desenvolver
+Para correr a suite de testes e submeter pull requests:
+```bash
+git clone https://github.com/matheuspasche/pycreditools.git
+cd pycreditools
+pip install -e .[dev]
+pytest tests/
+```
+## 📜 Licença
+Distribuído sob licença MIT. Desenvolvido para a engenharia financeira moderna.

pycreditools-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,75 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+[project]
+name = "pycreditools"
+version = "0.1.0"
+description = "Credit Risk Simulation and Policy Optimization — Python edition of creditools"
+readme = "README.md"
+license = {text = "MIT"}
+requires-python = ">=3.10"
+authors = [
+    {name = "Matheus Pasche", email = "matheuspasche@outlook.com"},
+]
+keywords = [
+    "credit-risk",
+    "simulation",
+    "policy-optimization",
+    "risk-management",
+    "ward-clustering",
+    "information-value",
+    "credit-scoring",
+]
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Intended Audience :: Financial and Insurance Industry",
+    "Intended Audience :: Science/Research",
+    "License :: OSI Approved :: MIT License",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
+    "Topic :: Scientific/Engineering :: Information Analysis",
+    "Topic :: Office/Business :: Financial",
+    "Typing :: Typed",
+]
+dependencies = [
+    "pandas>=2.0",
+    "numpy>=1.24",
+]
+[project.optional-dependencies]
+viz = ["plotly>=5.0"]
+parallel = ["joblib>=1.3"]
+accel = ["numba>=0.58"]
+all = [
+    "plotly>=5.0",
+    "joblib>=1.3",
+    "numba>=0.58",
+]
+dev = [
+    "pytest>=7.0",
+    "pytest-cov>=4.0",
+    "ruff>=0.4",
+]
+[project.urls]
+Homepage = "https://github.com/matheuspasche/pycreditools"
+Repository = "https://github.com/matheuspasche/pycreditools"
+Issues = "https://github.com/matheuspasche/pycreditools/issues"
+[tool.hatch.build.targets.wheel]
+packages = ["src/pycreditools"]
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+addopts = "-v --tb=short"
+[tool.ruff]
+target-version = "py310"
+line-length = 100
+[tool.ruff.lint]
+select = ["E", "F", "I", "W", "UP"]

pycreditools-0.1.0/src/pycreditools/__init__.py ADDED Viewed

@@ -0,0 +1,43 @@
+"""
+pycreditools: A Python library for credit risk policy simulation and analysis.
+"""
+from ._types import SimulationMethod, ClusteringMethod, Quadrant, StageDirection, PolicySummary
+from .stages import Stage, CutoffStage, FilterStage, RateStage
+from .stress import StressScenario, AggravationStress, MonotonicStress, CustomStress
+from .policy import CreditPolicy
+from .simulation import CreditSimResults, run_simulation
+from .performance import summarize_results, compare_policies
+from .analysis import run_tradeoff_analysis
+from .grouping import find_risk_groups, RiskGroupResult, GroupingRecipe
+from .screening import screen_risk_segments, ScreeningResult, ScreeningRecipe
+from .sample_data import generate_sample_data
+__all__ = [
+    "SimulationMethod",
+    "ClusteringMethod",
+    "Quadrant",
+    "StageDirection",
+    "PolicySummary",
+    "Stage",
+    "CutoffStage",
+    "FilterStage",
+    "RateStage",
+    "StressScenario",
+    "AggravationStress",
+    "MonotonicStress",
+    "CustomStress",
+    "CreditPolicy",
+    "CreditSimResults",
+    "run_simulation",
+    "summarize_results",
+    "compare_policies",
+    "run_tradeoff_analysis",
+    "find_risk_groups",
+    "RiskGroupResult",
+    "GroupingRecipe",
+    "screen_risk_segments",
+    "ScreeningResult",
+    "ScreeningRecipe",
+    "generate_sample_data",
+]

pycreditools-0.1.0/src/pycreditools/_kernels/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+from .ward import ward_cluster
+from .iv import iv_cluster
+from .tier_metrics import calculate_tier_metrics
+__all__ = ["ward_cluster", "iv_cluster", "calculate_tier_metrics"]

pycreditools-0.1.0/src/pycreditools/_kernels/iv.py ADDED Viewed

@@ -0,0 +1,167 @@
+from __future__ import annotations
+import numpy as np
+def iv_cluster(
+    pd_values: np.ndarray,
+    volumes: np.ndarray,
+    max_groups: int,
+    min_vol_ratio: float,
+    lambda_cross: float = 0.5,
+    lambda_vol: float = 0.2,
+    monthly_vols: np.ndarray | None = None,
+    monthly_bads: np.ndarray | None = None,
+) -> np.ndarray:
+    """
+    IV-based agglomerative clustering with constraints.
+    Args:
+        pd_values: float64[n_bins] - mean PD per bin
+        volumes: int64[n_bins] - volume per bin
+        max_groups: exact number of output clusters (algorithm will merge down to this)
+        min_vol_ratio: min fraction of total volume per cluster
+        lambda_cross: penalty weight for vintage crossings
+        lambda_vol: penalty weight for PD volatility
+        monthly_vols: int64[n_bins, n_months]
+        monthly_bads: int64[n_bins, n_months]
+    Returns:
+        int64[n_bins] - 1-based group assignments
+    """
+    n_bins = len(pd_values)
+    if n_bins == 0:
+        return np.array([], dtype=np.int64)
+    if n_bins <= max_groups and (volumes == 0).sum() == 0:
+        # Check if all other constraints hold? Actually if we just want to force merges
+        # when constraints are violated, we should still run the loop.
+        pass
+    active = np.ones(n_bins, dtype=bool)
+    current_vol = volumes.copy().astype(np.float64)
+    current_bads = (pd_values * current_vol).astype(np.float64)
+    total_vol = current_vol.sum()
+    total_bads = current_bads.sum()
+    total_goods = total_vol - total_bads
+    if monthly_vols is not None and monthly_bads is not None:
+        curr_m_vols = monthly_vols.copy().astype(np.float64)
+        curr_m_bads = monthly_bads.copy().astype(np.float64)
+    else:
+        curr_m_vols = None
+        curr_m_bads = None
+    group_ids = np.arange(n_bins)
+    n_active = n_bins
+    def calc_iv(bads, vols):
+        if total_goods <= 0 or total_bads <= 0:
+            return 0.0
+        goods = vols - bads
+        p_b = bads / total_bads
+        p_g = goods / total_goods
+        if p_b <= 0 or p_g <= 0:
+            return 0.0
+        return (p_g - p_b) * np.log(p_g / p_b)
+    while True:
+        if n_active <= 1:
+            break
+        active_indices = np.where(active)[0]
+        n_curr = len(active_indices)
+        min_cost = np.inf
+        best_merge_idx = -1
+        for i in range(n_curr - 1):
+            idx1 = active_indices[i]
+            idx2 = active_indices[i+1]
+            v1 = current_vol[idx1]
+            v2 = current_vol[idx2]
+            b1 = current_bads[idx1]
+            b2 = current_bads[idx2]
+            p1 = b1 / v1 if v1 > 0 else 0.0
+            p2 = b2 / v2 if v2 > 0 else 0.0
+            # Hard skip for monotonicity violation unless it's a forced merge
+            # Monotonicity violation: p1 >= p2
+            violation = (p1 >= p2) and (v1 > 0) and (v2 > 0)
+            # Force merges if volume is 0
+            if v1 == 0 or v2 == 0:
+                cost = -1e9
+            else:
+                if violation:
+                    cost = -1e6 # prioritize fixing monotonicity over normal merges
+                else:
+                    # Calculate IV loss
+                    iv1 = calc_iv(b1, v1)
+                    iv2 = calc_iv(b2, v2)
+                    iv_merged = calc_iv(b1 + b2, v1 + v2)
+                    iv_loss = iv1 + iv2 - iv_merged
+                    cross_penalty = 0.0
+                    volatility_penalty = 0.0
+                    if curr_m_vols is not None and curr_m_bads is not None:
+                        mv = curr_m_vols[idx1] + curr_m_vols[idx2]
+                        mb = curr_m_bads[idx1] + curr_m_bads[idx2]
+                        valid = mv > 0
+                        if valid.any():
+                            mp = mb[valid] / mv[valid]
+                            volatility_penalty = np.std(mp)
+                        # crossings between new merged group and neighbors?
+                        # To simplify, the C++ IV clustering engine penalizes crossings
+                        # *within* the merged group (i.e. did the two groups cross each other?)
+                        mv1 = curr_m_vols[idx1]
+                        mv2 = curr_m_vols[idx2]
+                        mb1 = curr_m_bads[idx1]
+                        mb2 = curr_m_bads[idx2]
+                        v_valid = (mv1 > 0) & (mv2 > 0)
+                        if v_valid.any():
+                            mp1 = mb1[v_valid] / mv1[v_valid]
+                            mp2 = mb2[v_valid] / mv2[v_valid]
+                            crossings = np.sum(mp1 >= mp2)
+                            cross_penalty = crossings
+                    cost = iv_loss + lambda_cross * cross_penalty + lambda_vol * volatility_penalty
+                    # Force merge if volume below threshold
+                    if (v1 / total_vol < min_vol_ratio) or (v2 / total_vol < min_vol_ratio):
+                        cost -= 1000.0 # arbitrary large priority but less than monotonicity
+            if cost < min_cost:
+                min_cost = cost
+                best_merge_idx = i
+        # Stopping condition
+        # If no forced merges are required AND we reached max_groups, stop.
+        # Forced merges have cost < -100
+        if min_cost >= -100 and n_active <= max_groups:
+            break
+        # Execute merge
+        idx1 = active_indices[best_merge_idx]
+        idx2 = active_indices[best_merge_idx + 1]
+        current_vol[idx1] += current_vol[idx2]
+        current_bads[idx1] += current_bads[idx2]
+        if curr_m_vols is not None and curr_m_bads is not None:
+            curr_m_vols[idx1] += curr_m_vols[idx2]
+            curr_m_bads[idx1] += curr_m_bads[idx2]
+        active[idx2] = False
+        group_ids[group_ids == idx2] = idx1
+        n_active -= 1
+    # Remap to 1-based sequential integers
+    active_indices = np.where(active)[0]
+    final_mapping = {old_idx: new_idx for new_idx, old_idx in enumerate(active_indices, 1)}
+    result = np.array([final_mapping[g] for g in group_ids], dtype=np.int64)
+    return result