proscore 0.2.0__tar.gz → 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. {proscore-0.2.0/src/proscore.egg-info → proscore-0.2.2}/PKG-INFO +56 -2
  2. {proscore-0.2.0 → proscore-0.2.2}/README.md +55 -1
  3. {proscore-0.2.0 → proscore-0.2.2}/pyproject.toml +1 -1
  4. {proscore-0.2.0 → proscore-0.2.2}/src/proscore/__init__.py +37 -1
  5. {proscore-0.2.0 → proscore-0.2.2}/src/proscore/_pipeline_config.py +91 -25
  6. {proscore-0.2.0 → proscore-0.2.2}/src/proscore/evaluate/__init__.py +6 -1
  7. proscore-0.2.2/src/proscore/evaluate/_diagnose.py +620 -0
  8. {proscore-0.2.0 → proscore-0.2.2}/src/proscore/inspect/__init__.py +2 -2
  9. {proscore-0.2.0 → proscore-0.2.2}/src/proscore/inspect/_stability.py +87 -45
  10. {proscore-0.2.0 → proscore-0.2.2}/src/proscore/report/_builder.py +36 -0
  11. {proscore-0.2.0 → proscore-0.2.2}/src/proscore/rules/_miner.py +16 -0
  12. {proscore-0.2.0 → proscore-0.2.2/src/proscore.egg-info}/PKG-INFO +56 -2
  13. {proscore-0.2.0 → proscore-0.2.2}/src/proscore.egg-info/SOURCES.txt +2 -0
  14. proscore-0.2.2/tests/test_diagnose.py +279 -0
  15. {proscore-0.2.0 → proscore-0.2.2}/tests/test_inspect.py +26 -10
  16. {proscore-0.2.0 → proscore-0.2.2}/tests/test_pipeline_rules.py +4 -0
  17. {proscore-0.2.0 → proscore-0.2.2}/tests/test_rules.py +30 -0
  18. {proscore-0.2.0 → proscore-0.2.2}/LICENSE +0 -0
  19. {proscore-0.2.0 → proscore-0.2.2}/setup.cfg +0 -0
  20. {proscore-0.2.0 → proscore-0.2.2}/src/proscore/__main__.py +0 -0
  21. {proscore-0.2.0 → proscore-0.2.2}/src/proscore/_data/__init__.py +0 -0
  22. {proscore-0.2.0 → proscore-0.2.2}/src/proscore/_spec.py +0 -0
  23. {proscore-0.2.0 → proscore-0.2.2}/src/proscore/binning/__init__.py +0 -0
  24. {proscore-0.2.0 → proscore-0.2.2}/src/proscore/binning/_adjust.py +0 -0
  25. {proscore-0.2.0 → proscore-0.2.2}/src/proscore/binning/_base.py +0 -0
  26. {proscore-0.2.0 → proscore-0.2.2}/src/proscore/binning/_binning.py +0 -0
  27. {proscore-0.2.0 → proscore-0.2.2}/src/proscore/binning/_categorical.py +0 -0
  28. {proscore-0.2.0 → proscore-0.2.2}/src/proscore/binning/_chi.py +0 -0
  29. {proscore-0.2.0 → proscore-0.2.2}/src/proscore/binning/_distance.py +0 -0
  30. {proscore-0.2.0 → proscore-0.2.2}/src/proscore/binning/_frequency.py +0 -0
  31. {proscore-0.2.0 → proscore-0.2.2}/src/proscore/binning/_tree.py +0 -0
  32. {proscore-0.2.0 → proscore-0.2.2}/src/proscore/binning/_woe.py +0 -0
  33. {proscore-0.2.0 → proscore-0.2.2}/src/proscore/evaluate/_metrics.py +0 -0
  34. {proscore-0.2.0 → proscore-0.2.2}/src/proscore/inspect/_correlation.py +0 -0
  35. {proscore-0.2.0 → proscore-0.2.2}/src/proscore/inspect/_detect.py +0 -0
  36. {proscore-0.2.0 → proscore-0.2.2}/src/proscore/inspect/_quality.py +0 -0
  37. {proscore-0.2.0 → proscore-0.2.2}/src/proscore/modeling/__init__.py +0 -0
  38. {proscore-0.2.0 → proscore-0.2.2}/src/proscore/modeling/_scorecard.py +0 -0
  39. {proscore-0.2.0 → proscore-0.2.2}/src/proscore/monitor/__init__.py +0 -0
  40. {proscore-0.2.0 → proscore-0.2.2}/src/proscore/monitor/_monitor.py +0 -0
  41. {proscore-0.2.0 → proscore-0.2.2}/src/proscore/report/__init__.py +0 -0
  42. {proscore-0.2.0 → proscore-0.2.2}/src/proscore/rules/__init__.py +0 -0
  43. {proscore-0.2.0 → proscore-0.2.2}/src/proscore/selection/__init__.py +0 -0
  44. {proscore-0.2.0 → proscore-0.2.2}/src/proscore/selection/_filter.py +0 -0
  45. {proscore-0.2.0 → proscore-0.2.2}/src/proscore/selection/_screen.py +0 -0
  46. {proscore-0.2.0 → proscore-0.2.2}/src/proscore/selection/_stepwise.py +0 -0
  47. {proscore-0.2.0 → proscore-0.2.2}/src/proscore/transform/__init__.py +0 -0
  48. {proscore-0.2.0 → proscore-0.2.2}/src/proscore/transform/_woe.py +0 -0
  49. {proscore-0.2.0 → proscore-0.2.2}/src/proscore/utils/__init__.py +0 -0
  50. {proscore-0.2.0 → proscore-0.2.2}/src/proscore/utils/_config.py +0 -0
  51. {proscore-0.2.0 → proscore-0.2.2}/src/proscore/utils/_exceptions.py +0 -0
  52. {proscore-0.2.0 → proscore-0.2.2}/src/proscore/utils/_presets.py +0 -0
  53. {proscore-0.2.0 → proscore-0.2.2}/src/proscore/utils/_psi.py +0 -0
  54. {proscore-0.2.0 → proscore-0.2.2}/src/proscore/viz/__init__.py +0 -0
  55. {proscore-0.2.0 → proscore-0.2.2}/src/proscore/viz/_plots.py +0 -0
  56. {proscore-0.2.0 → proscore-0.2.2}/src/proscore.egg-info/dependency_links.txt +0 -0
  57. {proscore-0.2.0 → proscore-0.2.2}/src/proscore.egg-info/entry_points.txt +0 -0
  58. {proscore-0.2.0 → proscore-0.2.2}/src/proscore.egg-info/requires.txt +0 -0
  59. {proscore-0.2.0 → proscore-0.2.2}/src/proscore.egg-info/top_level.txt +0 -0
  60. {proscore-0.2.0 → proscore-0.2.2}/tests/test_binning.py +0 -0
  61. {proscore-0.2.0 → proscore-0.2.2}/tests/test_docs_examples.py +0 -0
  62. {proscore-0.2.0 → proscore-0.2.2}/tests/test_evaluate.py +0 -0
  63. {proscore-0.2.0 → proscore-0.2.2}/tests/test_evaluate_period.py +0 -0
  64. {proscore-0.2.0 → proscore-0.2.2}/tests/test_filter.py +0 -0
  65. {proscore-0.2.0 → proscore-0.2.2}/tests/test_pipeline.py +0 -0
  66. {proscore-0.2.0 → proscore-0.2.2}/tests/test_presets.py +0 -0
  67. {proscore-0.2.0 → proscore-0.2.2}/tests/test_report.py +0 -0
  68. {proscore-0.2.0 → proscore-0.2.2}/tests/test_scorecard.py +0 -0
  69. {proscore-0.2.0 → proscore-0.2.2}/tests/test_screen.py +0 -0
  70. {proscore-0.2.0 → proscore-0.2.2}/tests/test_spec.py +0 -0
  71. {proscore-0.2.0 → proscore-0.2.2}/tests/test_stepwise.py +0 -0
  72. {proscore-0.2.0 → proscore-0.2.2}/tests/test_transform.py +0 -0
  73. {proscore-0.2.0 → proscore-0.2.2}/tests/test_woe.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: proscore
3
- Version: 0.2.0
3
+ Version: 0.2.2
4
4
  Summary: Production-grade scorecard development toolkit
5
5
  Author: Liqiwei
6
6
  License-Expression: MIT
@@ -45,10 +45,47 @@ Dynamic: license-file
45
45
  **生产级评分卡开发工具包**
46
46
  端到端的确定性评分卡建模管线,为银行和金融机构的信用评分卡建模场景设计, 满足对可解释性、合规性和稳定性的要求。
47
47
 
48
+ ## Why ProScore
49
+
50
+ ProScore 不是通用机器学习框架,而是面向金融评分卡落地的**工程化工具包**。
51
+ 目标是把“能建模”升级为“可评审、可复现、可上线、可监控”。
52
+
53
+ 适合以下场景:
54
+
55
+ - 银行/消金/互金团队做信用评分卡开发与迭代
56
+ - 研发与业务分析师需要通过 Python + Excel 协同建模
57
+ - 需要输出监管/评审材料,并建立投产后监控闭环
58
+
59
+ ## 核心亮点
60
+
61
+ 1. **单调性工程化(关键差异)**
62
+ - 支持变量级单调方向配置(increasing/decreasing/u/inverted_u/none)
63
+ - 支持自动单调调整,减少人工反复调箱
64
+ - 单调配置可模板化复用,跨项目保持一致性
65
+
66
+ 2. **端到端确定性流程**
67
+ - `detect -> prefilter -> bin -> refine -> transform -> select -> fit -> evaluate -> diagnose -> report -> monitor`
68
+ - 同样输入得到同样输出,便于审计、复盘和团队协作
69
+
70
+ 3. **三种使用方式统一口径**
71
+ - 模块化 API(灵活)
72
+ - 链式 API(高效)
73
+ - Excel 配置驱动(零代码)
74
+ - 三种入口共享同一建模逻辑,减少“口径不一致”
75
+
76
+ 4. **诊断与报告一体化**
77
+ - `diagnose()` 提供 4 层结构化诊断(区分力/过拟合/稳定性/变量质量)
78
+ - 支持阈值自定义(`thresholds=...`)
79
+ - `ReportBuilder` 自动纳入诊断章节,提升评审效率
80
+
81
+ 5. **投产后监控闭环**
82
+ - 支持 PSI、KS 衰减、规则告警、分期追踪
83
+ - 帮助形成“上线—监控—重训”的持续运营机制
48
84
  ---
49
85
 
50
86
  ## 目录
51
87
 
88
+ - [入门教程(Notebook)](#入门教程notebook)
52
89
  - [三种使用方式](#三种使用方式)
53
90
  - [核心功能概览](#核心功能概览)
54
91
  - [安装](#安装)
@@ -57,6 +94,17 @@ Dynamic: license-file
57
94
 
58
95
  ---
59
96
 
97
+ ## 入门教程(Notebook)
98
+
99
+ 推荐按下面顺序阅读,先跑通再深入:
100
+
101
+ | Notebook | 适合谁 | 你会得到什么 |
102
+ |----------|--------|--------------|
103
+ | [**ProScore快速开始**](notebooks/ProScore快速开始.ipynb) | 第一次上手 | 5–10 分钟链式单路径,只看 KS/AUC/PSI、入模变量、诊断摘要 |
104
+ | [**ProScore完整建模流程**](notebooks/ProScore完整建模流程.ipynb) | 准备落地生产 | 模块化 + 链式对照、CFG 参数单一真源、规则挖掘、监控、报告、诊断 |
105
+
106
+ > 快速开始刻意保持精简(不含规则挖掘等可选步骤);完整版是权威样例,含 `[主线]` / `[可选]` 章节导航与一致性断言。
107
+
60
108
  ## 三种使用方式
61
109
 
62
110
  ProScore 提供三种递进的使用方式,从零代码到完全自定义,按需选择。
@@ -106,7 +154,11 @@ p = (
106
154
 
107
155
  > `train` 必传,`test` 和 `oot` 可选。分箱/WOE 只在 train 上拟合;逐步回归用 test 监控过拟合;OOT 仅用于最终评估。
108
156
  >
109
- > 完整教程见 [notebooks/ProScore完整建模流程.ipynb](notebooks/ProScore完整建模流程.ipynb)
157
+ > Notebook 教程见上方 [入门教程](#入门教程notebook)
158
+ >
159
+ > **诊断增强**(v0.2+):`.evaluate().diagnose()` 生成 4 层结构化健康报告(含根因变量),支持 `thresholds=...` 自定义阈值。
160
+ >
161
+ > **参数单一真源(推荐)**:`CFG` + `PipelineSpec`(`apply(spec)`)确保模块化与链式同参同结果,详见 [pipeline-spec.md](docs/使用指南/pipeline-spec.md)。
110
162
 
111
163
  ### C. Excel 配置驱动
112
164
 
@@ -138,9 +190,11 @@ proscore run my_project/pipeline_template.xlsx --output-script run.py
138
190
  |------------|-----------------------------------------------|---------------------------------------|
139
191
  | 数据探查 | IV/AUC/KS 三指标 + PSI 时序稳定性 + 相关性/VIF | 快速筛选优质变量,识别分布漂移风险 |
140
192
  | 分箱 | 4 种单调趋势 + 5 种分箱方法 + 两阶段趋势校验 | 确保 WOE 趋势符合业务逻辑,满足监管 |
193
+ | 规则挖掘 | 单变量/交叉规则 + Lift/Precision/Recall 联合筛选 | 产出可解释策略规则,与评分卡变量互斥 |
141
194
  | 逐步回归 | 双向选择 + 五重约束(p值/符号/VIF/相关/来源) | 严谨的多重共线性控制与维度归属管理 |
142
195
  | 模型监控 | Score/Feature PSI + 规则引擎告警 + JSON 持久化 | 投产后持续验证,自动风险预警 |
143
196
  | 报告生成 | 7 章自动 Markdown 报告(含图表) | 银保监合规文档一键生成 |
197
+ | 模型诊断 | 4 层健康检查 + 根因定位 + 可自定义阈值 | 投产前自动风险识别,支持策略微调 |
144
198
 
145
199
  ### 设计原则
146
200
 
@@ -7,10 +7,47 @@
7
7
  **生产级评分卡开发工具包**
8
8
  端到端的确定性评分卡建模管线,为银行和金融机构的信用评分卡建模场景设计, 满足对可解释性、合规性和稳定性的要求。
9
9
 
10
+ ## Why ProScore
11
+
12
+ ProScore 不是通用机器学习框架,而是面向金融评分卡落地的**工程化工具包**。
13
+ 目标是把“能建模”升级为“可评审、可复现、可上线、可监控”。
14
+
15
+ 适合以下场景:
16
+
17
+ - 银行/消金/互金团队做信用评分卡开发与迭代
18
+ - 研发与业务分析师需要通过 Python + Excel 协同建模
19
+ - 需要输出监管/评审材料,并建立投产后监控闭环
20
+
21
+ ## 核心亮点
22
+
23
+ 1. **单调性工程化(关键差异)**
24
+ - 支持变量级单调方向配置(increasing/decreasing/u/inverted_u/none)
25
+ - 支持自动单调调整,减少人工反复调箱
26
+ - 单调配置可模板化复用,跨项目保持一致性
27
+
28
+ 2. **端到端确定性流程**
29
+ - `detect -> prefilter -> bin -> refine -> transform -> select -> fit -> evaluate -> diagnose -> report -> monitor`
30
+ - 同样输入得到同样输出,便于审计、复盘和团队协作
31
+
32
+ 3. **三种使用方式统一口径**
33
+ - 模块化 API(灵活)
34
+ - 链式 API(高效)
35
+ - Excel 配置驱动(零代码)
36
+ - 三种入口共享同一建模逻辑,减少“口径不一致”
37
+
38
+ 4. **诊断与报告一体化**
39
+ - `diagnose()` 提供 4 层结构化诊断(区分力/过拟合/稳定性/变量质量)
40
+ - 支持阈值自定义(`thresholds=...`)
41
+ - `ReportBuilder` 自动纳入诊断章节,提升评审效率
42
+
43
+ 5. **投产后监控闭环**
44
+ - 支持 PSI、KS 衰减、规则告警、分期追踪
45
+ - 帮助形成“上线—监控—重训”的持续运营机制
10
46
  ---
11
47
 
12
48
  ## 目录
13
49
 
50
+ - [入门教程(Notebook)](#入门教程notebook)
14
51
  - [三种使用方式](#三种使用方式)
15
52
  - [核心功能概览](#核心功能概览)
16
53
  - [安装](#安装)
@@ -19,6 +56,17 @@
19
56
 
20
57
  ---
21
58
 
59
+ ## 入门教程(Notebook)
60
+
61
+ 推荐按下面顺序阅读,先跑通再深入:
62
+
63
+ | Notebook | 适合谁 | 你会得到什么 |
64
+ |----------|--------|--------------|
65
+ | [**ProScore快速开始**](notebooks/ProScore快速开始.ipynb) | 第一次上手 | 5–10 分钟链式单路径,只看 KS/AUC/PSI、入模变量、诊断摘要 |
66
+ | [**ProScore完整建模流程**](notebooks/ProScore完整建模流程.ipynb) | 准备落地生产 | 模块化 + 链式对照、CFG 参数单一真源、规则挖掘、监控、报告、诊断 |
67
+
68
+ > 快速开始刻意保持精简(不含规则挖掘等可选步骤);完整版是权威样例,含 `[主线]` / `[可选]` 章节导航与一致性断言。
69
+
22
70
  ## 三种使用方式
23
71
 
24
72
  ProScore 提供三种递进的使用方式,从零代码到完全自定义,按需选择。
@@ -68,7 +116,11 @@ p = (
68
116
 
69
117
  > `train` 必传,`test` 和 `oot` 可选。分箱/WOE 只在 train 上拟合;逐步回归用 test 监控过拟合;OOT 仅用于最终评估。
70
118
  >
71
- > 完整教程见 [notebooks/ProScore完整建模流程.ipynb](notebooks/ProScore完整建模流程.ipynb)
119
+ > Notebook 教程见上方 [入门教程](#入门教程notebook)
120
+ >
121
+ > **诊断增强**(v0.2+):`.evaluate().diagnose()` 生成 4 层结构化健康报告(含根因变量),支持 `thresholds=...` 自定义阈值。
122
+ >
123
+ > **参数单一真源(推荐)**:`CFG` + `PipelineSpec`(`apply(spec)`)确保模块化与链式同参同结果,详见 [pipeline-spec.md](docs/使用指南/pipeline-spec.md)。
72
124
 
73
125
  ### C. Excel 配置驱动
74
126
 
@@ -100,9 +152,11 @@ proscore run my_project/pipeline_template.xlsx --output-script run.py
100
152
  |------------|-----------------------------------------------|---------------------------------------|
101
153
  | 数据探查 | IV/AUC/KS 三指标 + PSI 时序稳定性 + 相关性/VIF | 快速筛选优质变量,识别分布漂移风险 |
102
154
  | 分箱 | 4 种单调趋势 + 5 种分箱方法 + 两阶段趋势校验 | 确保 WOE 趋势符合业务逻辑,满足监管 |
155
+ | 规则挖掘 | 单变量/交叉规则 + Lift/Precision/Recall 联合筛选 | 产出可解释策略规则,与评分卡变量互斥 |
103
156
  | 逐步回归 | 双向选择 + 五重约束(p值/符号/VIF/相关/来源) | 严谨的多重共线性控制与维度归属管理 |
104
157
  | 模型监控 | Score/Feature PSI + 规则引擎告警 + JSON 持久化 | 投产后持续验证,自动风险预警 |
105
158
  | 报告生成 | 7 章自动 Markdown 报告(含图表) | 银保监合规文档一键生成 |
159
+ | 模型诊断 | 4 层健康检查 + 根因定位 + 可自定义阈值 | 投产前自动风险识别,支持策略微调 |
106
160
 
107
161
  ### 设计原则
108
162
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "proscore"
7
- version = "0.2.0"
7
+ version = "0.2.2"
8
8
  description = "Production-grade scorecard development toolkit"
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -19,7 +19,7 @@ from proscore.rules import RuleMiner
19
19
  from proscore.selection import Filter, StepwiseSelector, assess_screen
20
20
  from proscore.transform import WOETransformer
21
21
 
22
- __version__ = "0.2.0"
22
+ __version__ = "0.2.2"
23
23
 
24
24
 
25
25
  class ProScore:
@@ -426,6 +426,42 @@ class ProScore:
426
426
  )
427
427
  return self
428
428
 
429
+ def diagnose(self, *, print_report: bool = True, **kwargs) -> ProScore:
430
+ """Run model health diagnosis (post-evaluate) and optionally print a formatted report.
431
+
432
+ By default prints the human-readable report (for notebook / interactive use).
433
+ Set ``print_report=False`` to obtain the :class:`~proscore.evaluate.DiagnosisReport`
434
+ silently via the ``diagnosis_`` property.
435
+
436
+ Pass additional artefacts for deeper root-cause analysis, or override thresholds::
437
+
438
+ p.diagnose(
439
+ binning=p.binner_,
440
+ selector=p.selector_,
441
+ stability=stability_result,
442
+ period_eval=period_result,
443
+ thresholds={"discrimination": {"ks_critical": 0.18}},
444
+ )
445
+ """
446
+ from proscore.evaluate import diagnose as _diagnose
447
+
448
+ report = _diagnose(
449
+ self.eval_result,
450
+ binning=kwargs.pop("binning", self._binner),
451
+ selector=kwargs.pop("selector", self._selector),
452
+ y_train=self._train_y(),
453
+ **kwargs,
454
+ )
455
+ if print_report:
456
+ print(report)
457
+ self._diagnosis = report
458
+ return self
459
+
460
+ @property
461
+ def diagnosis_(self):
462
+ """The :class:`DiagnosisReport` from the last :meth:`diagnose` call."""
463
+ return getattr(self, "_diagnosis", None)
464
+
429
465
  # ── properties ────────────────────────────────────────────────────────────
430
466
 
431
467
  @property
@@ -20,6 +20,46 @@ from typing import Any
20
20
  import numpy as np
21
21
  import pandas as pd
22
22
 
23
+
24
+ def _train_test_from_dev_pool(
25
+ dev_pool: pd.DataFrame,
26
+ *,
27
+ target: str | None,
28
+ train_ratio: float,
29
+ random_state: int,
30
+ ) -> tuple[pd.DataFrame, pd.DataFrame]:
31
+ """Stratified train/test split when *target* has ≥2 samples per class."""
32
+ if (
33
+ target
34
+ and target in dev_pool.columns
35
+ and len(dev_pool) > 1
36
+ and dev_pool[target].nunique() >= 2
37
+ and int(dev_pool[target].value_counts().min()) >= 2
38
+ ):
39
+ from sklearn.model_selection import train_test_split
40
+
41
+ tr, te = train_test_split(
42
+ dev_pool,
43
+ train_size=train_ratio,
44
+ stratify=dev_pool[target],
45
+ random_state=random_state,
46
+ )
47
+ return tr.reset_index(drop=True), te.reset_index(drop=True)
48
+
49
+ n = len(dev_pool)
50
+ rng = np.random.RandomState(random_state)
51
+ idx = rng.permutation(n)
52
+ split = int(n * train_ratio)
53
+ if n > 1:
54
+ split = max(1, min(n - 1, split))
55
+ else:
56
+ split = n
57
+ return (
58
+ dev_pool.iloc[idx[:split]].reset_index(drop=True),
59
+ dev_pool.iloc[idx[split:]].reset_index(drop=True),
60
+ )
61
+
62
+
23
63
  # ── constants ────────────────────────────────────────────────────────────────
24
64
 
25
65
  _DEFAULT_GLOBAL = {
@@ -99,6 +139,8 @@ _PARAM_SPEC = {
99
139
  "变量不足 n_min 时是否强制补齐"),
100
140
  "perturbation": ("on", ["on", "off"], "str",
101
141
  "是否启用扰动搜索"),
142
+ "max_iter_round": (100, None, "int", 2, 200,
143
+ "逐步回归最大迭代轮数"),
102
144
  "odds": (20, None, "int", 10, 100,
103
145
  "基准好坏比(1:20 ≈ 坏账率 4.8%)"),
104
146
  "pdo": (20, None, "int", 10, 50,
@@ -115,6 +157,10 @@ _PARAM_SPEC = {
115
157
  "决策树最大深度(tree 模式)", "rules"),
116
158
  "rm_min_lift": (3.0, None, "float", 1.0, 10.0,
117
159
  "最小 Lift(precision / 整体坏账率)", "rules"),
160
+ "rm_min_precision": (None, None, "float", 0.0, 1.0,
161
+ "最小 Precision(留空表示不启用)", "rules"),
162
+ "rm_min_recall": (None, None, "float", 0.0, 1.0,
163
+ "最小 Recall(留空表示不启用)", "rules"),
118
164
  "rm_min_hit_rate": (0.02, None, "float", 0.001, 0.5,
119
165
  "最小命中率(覆盖样本占比)", "rules"),
120
166
  "rm_max_hit_rate": (0.20, None, "float", 0.01, 0.8,
@@ -307,7 +353,8 @@ class PipelineConfig:
307
353
  target[key] = spec[0]
308
354
  elif section == "modeling" and (stage is None):
309
355
  if key in ("n_min", "n_max", "pvalue_threshold", "coef_sign",
310
- "force_fill", "perturbation", "odds", "pdo", "base_score"):
356
+ "force_fill", "perturbation", "max_iter_round",
357
+ "odds", "pdo", "base_score"):
311
358
  target[key] = spec[0]
312
359
  elif section == "rules" and stage == "rules":
313
360
  target[key.removeprefix("rm_")] = spec[0]
@@ -321,7 +368,8 @@ class PipelineConfig:
321
368
  # ── Rules section: bare Excel keys → rm_ prefixed _PARAM_SPEC keys ──
322
369
  if section == "rules":
323
370
  valid = ("method", "max_depth", "max_tree_depth",
324
- "min_lift", "min_hit_rate", "max_hit_rate",
371
+ "min_lift", "min_precision", "min_recall",
372
+ "min_hit_rate", "max_hit_rate",
325
373
  "max_rules", "random_state", "export_csv")
326
374
  # Accept both bare keys and legacy rm_ prefixed keys
327
375
  bare_key = key.removeprefix("rm_")
@@ -351,7 +399,8 @@ class PipelineConfig:
351
399
  continue
352
400
  if section == "modeling":
353
401
  valid = ("n_min", "n_max", "pvalue_threshold", "coef_sign",
354
- "force_fill", "perturbation", "odds", "pdo", "base_score")
402
+ "force_fill", "perturbation", "max_iter_round",
403
+ "odds", "pdo", "base_score")
355
404
  if key not in valid:
356
405
  continue
357
406
 
@@ -546,12 +595,14 @@ class PipelineConfig:
546
595
 
547
596
  def _load_data(self) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame | None]:
548
597
  """Load and split data according to config."""
549
- np.random.seed(int(self.global_cfg.get("random_seed", 42)))
598
+ seed = int(self.global_cfg.get("random_seed", 42))
599
+ np.random.seed(seed)
550
600
 
551
601
  fpath = self.data_cfg["data_file"]
552
602
  time_col = self.data_cfg.get("time_col")
553
603
  id_col = self.data_cfg.get("id_col")
554
604
  train_ratio = float(self.data_cfg.get("train_ratio", 0.7))
605
+ target = self.data_cfg.get("target")
555
606
 
556
607
  # Load
557
608
  if str(fpath).endswith((".xlsx", ".xls")):
@@ -602,12 +653,13 @@ class PipelineConfig:
602
653
  # Drop time_col from dev_pool
603
654
  dev_pool = dev_pool.drop(columns=[time_col], errors="ignore")
604
655
 
605
- # Random split within dev pool
606
- n = len(dev_pool)
607
- idx = np.random.permutation(n)
608
- split = int(n * train_ratio)
609
- train = dev_pool.iloc[idx[:split]].reset_index(drop=True)
610
- test = dev_pool.iloc[idx[split:]].reset_index(drop=True)
656
+ tgt = str(target).strip() if target else None
657
+ train, test = _train_test_from_dev_pool(
658
+ dev_pool,
659
+ target=tgt if tgt else None,
660
+ train_ratio=train_ratio,
661
+ random_state=seed,
662
+ )
611
663
  if oot is not None:
612
664
  oot = oot.reset_index(drop=True)
613
665
 
@@ -754,10 +806,19 @@ class PipelineConfig:
754
806
  return result
755
807
 
756
808
  def _build_prefilter_kw(self) -> dict[str, Any]:
757
- kw: dict[str, Any] = {}
809
+ kw: dict[str, Any] = {
810
+ # 粗筛不做 IV/PSI(与链式 Notebook 常见写法一致;精筛在 refine)
811
+ "iv_range": None,
812
+ "max_psi": None,
813
+ }
814
+ cfg = self.screening_cfg
758
815
  for key in ("max_missing_rate", "max_one_value_rate"):
759
- if key in self.screening_cfg:
760
- kw[key] = self.screening_cfg[key]
816
+ if key in cfg:
817
+ kw[key] = cfg[key]
818
+ if cfg.get("max_corr") is not None:
819
+ kw["max_corr"] = float(cfg["max_corr"])
820
+ if cfg.get("max_vif") is not None:
821
+ kw["max_vif"] = float(cfg["max_vif"])
761
822
  return kw
762
823
 
763
824
  def _build_binning_kw(self) -> dict[str, Any]:
@@ -794,7 +855,7 @@ class PipelineConfig:
794
855
  def _build_select_kw(self) -> dict[str, Any]:
795
856
  kw: dict[str, Any] = {}
796
857
  cfg = self.modeling_cfg
797
- for key in ("n_min", "n_max", "pvalue_threshold"):
858
+ for key in ("n_min", "n_max", "pvalue_threshold", "max_iter_round"):
798
859
  if key in cfg:
799
860
  kw[key] = cfg[key]
800
861
  cs = cfg.get("coef_sign", "positive")
@@ -816,7 +877,8 @@ class PipelineConfig:
816
877
  kw: dict[str, Any] = {}
817
878
  cfg = self.rules_cfg
818
879
  for key in ("method", "max_depth", "max_tree_depth", "min_lift",
819
- "min_hit_rate", "max_hit_rate", "max_rules", "random_state"):
880
+ "min_precision", "min_recall", "min_hit_rate",
881
+ "max_hit_rate", "max_rules", "random_state"):
820
882
  if key in cfg:
821
883
  kw[key] = cfg[key]
822
884
  return kw
@@ -873,6 +935,7 @@ class PipelineConfig:
873
935
  _w("import numpy as np")
874
936
  _w("import pandas as pd")
875
937
  _w("import proscore as ps")
938
+ _w("from proscore._pipeline_config import _train_test_from_dev_pool")
876
939
  _w("")
877
940
  _w(f"np.random.seed({seed})")
878
941
  _w("")
@@ -890,6 +953,7 @@ class PipelineConfig:
890
953
  oot_start = self.data_cfg.get("oot_start")
891
954
  oot_end = self.data_cfg.get("oot_end")
892
955
  _w("dev_pool = df.copy()")
956
+ _w("oot = None")
893
957
  if dev_start:
894
958
  _w(f"dev_pool = dev_pool[dev_pool[{time_col!r}] >= pd.Timestamp({str(dev_start)!r})]")
895
959
  if dev_end:
@@ -901,16 +965,16 @@ class PipelineConfig:
901
965
  _w("oot = df[oot_mask].drop(columns=[" + repr(time_col) + "]).reset_index(drop=True)")
902
966
  _w(f"dev_pool = dev_pool.drop(columns=[{time_col!r}])")
903
967
  _w("")
904
- _w("idx = np.random.permutation(len(dev_pool))")
905
- _w(f"n_train = int(len(dev_pool) * {train_ratio})")
906
- _w("train = dev_pool.iloc[idx[:n_train]].reset_index(drop=True)")
907
- _w("test = dev_pool.iloc[idx[n_train:]].reset_index(drop=True)")
908
968
  else:
909
- _w("idx = np.random.permutation(len(df))")
910
- _w(f"n_train = int(len(df) * {train_ratio})")
911
- _w("train = df.iloc[idx[:n_train]].reset_index(drop=True)")
912
- _w("test = df.iloc[idx[n_train:]].reset_index(drop=True)")
969
+ _w("dev_pool = df.copy()")
913
970
  _w("oot = None")
971
+ _w("")
972
+
973
+ tgt_py = repr(str(target)) if (target and str(target).strip()) else "None"
974
+ _w(
975
+ f"train, test = _train_test_from_dev_pool(dev_pool, target={tgt_py}, "
976
+ f"train_ratio={float(train_ratio)}, random_state={int(seed)})"
977
+ )
914
978
 
915
979
  _w("")
916
980
  _w("# ── 建模流水线 ──")
@@ -1125,11 +1189,13 @@ def generate_template(out_dir: str = ".") -> str:
1125
1189
  # ── Modeling ────────────────────────────────────────────────────────
1126
1190
  _write_params_sheet(writer, "Modeling",
1127
1191
  ["n_min", "n_max", "pvalue_threshold", "coef_sign",
1128
- "force_fill", "perturbation", "odds", "pdo", "base_score"])
1192
+ "force_fill", "perturbation", "max_iter_round",
1193
+ "odds", "pdo", "base_score"])
1129
1194
 
1130
1195
  # ── Rules ───────────────────────────────────────────────────────────
1131
1196
  _write_params_sheet(writer, "Rules",
1132
- ["method", "max_depth", "max_tree_depth", "min_lift", "min_hit_rate",
1197
+ ["method", "max_depth", "max_tree_depth", "min_lift",
1198
+ "min_precision", "min_recall", "min_hit_rate",
1133
1199
  "max_hit_rate", "max_rules", "random_state", "export_csv"],
1134
1200
  section="rules")
1135
1201
 
@@ -5,9 +5,14 @@ from __future__ import annotations
5
5
  import importlib
6
6
 
7
7
  _metrics = importlib.import_module("proscore.evaluate._metrics")
8
+ _diagnose = importlib.import_module("proscore.evaluate._diagnose")
8
9
 
9
10
  evaluate = _metrics.evaluate
10
11
  evaluate_by_period = getattr(_metrics, "evaluate_by_period", None)
12
+ diagnose = _diagnose.diagnose
13
+ DiagnosisReport = _diagnose.DiagnosisReport
14
+ DiagnosisIssue = _diagnose.DiagnosisIssue
15
+ DEFAULT_THRESHOLDS = _diagnose.DEFAULT_THRESHOLDS
11
16
 
12
17
  if evaluate_by_period is None:
13
18
  raise ImportError(
@@ -16,4 +21,4 @@ if evaluate_by_period is None:
16
21
  "(Kernel → Restart) to clear cached imports."
17
22
  )
18
23
 
19
- __all__ = ["evaluate", "evaluate_by_period"]
24
+ __all__ = ["evaluate", "evaluate_by_period", "diagnose", "DiagnosisReport", "DiagnosisIssue", "DEFAULT_THRESHOLDS"]