proscore 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- proscore-0.1.0/LICENSE +21 -0
- proscore-0.1.0/PKG-INFO +192 -0
- proscore-0.1.0/README.md +154 -0
- proscore-0.1.0/pyproject.toml +81 -0
- proscore-0.1.0/setup.cfg +4 -0
- proscore-0.1.0/src/proscore/__init__.py +469 -0
- proscore-0.1.0/src/proscore/__main__.py +68 -0
- proscore-0.1.0/src/proscore/_data/__init__.py +93 -0
- proscore-0.1.0/src/proscore/_pipeline_config.py +1133 -0
- proscore-0.1.0/src/proscore/binning/__init__.py +8 -0
- proscore-0.1.0/src/proscore/binning/_adjust.py +280 -0
- proscore-0.1.0/src/proscore/binning/_base.py +42 -0
- proscore-0.1.0/src/proscore/binning/_binning.py +774 -0
- proscore-0.1.0/src/proscore/binning/_categorical.py +112 -0
- proscore-0.1.0/src/proscore/binning/_chi.py +197 -0
- proscore-0.1.0/src/proscore/binning/_distance.py +23 -0
- proscore-0.1.0/src/proscore/binning/_frequency.py +38 -0
- proscore-0.1.0/src/proscore/binning/_tree.py +34 -0
- proscore-0.1.0/src/proscore/binning/_woe.py +76 -0
- proscore-0.1.0/src/proscore/evaluate/__init__.py +19 -0
- proscore-0.1.0/src/proscore/evaluate/_metrics.py +331 -0
- proscore-0.1.0/src/proscore/inspect/__init__.py +9 -0
- proscore-0.1.0/src/proscore/inspect/_correlation.py +117 -0
- proscore-0.1.0/src/proscore/inspect/_detect.py +213 -0
- proscore-0.1.0/src/proscore/inspect/_quality.py +394 -0
- proscore-0.1.0/src/proscore/inspect/_stability.py +259 -0
- proscore-0.1.0/src/proscore/modeling/__init__.py +3 -0
- proscore-0.1.0/src/proscore/modeling/_scorecard.py +213 -0
- proscore-0.1.0/src/proscore/monitor/__init__.py +9 -0
- proscore-0.1.0/src/proscore/monitor/_monitor.py +549 -0
- proscore-0.1.0/src/proscore/report/__init__.py +5 -0
- proscore-0.1.0/src/proscore/report/_builder.py +1132 -0
- proscore-0.1.0/src/proscore/selection/__init__.py +11 -0
- proscore-0.1.0/src/proscore/selection/_filter.py +448 -0
- proscore-0.1.0/src/proscore/selection/_screen.py +83 -0
- proscore-0.1.0/src/proscore/selection/_stepwise.py +623 -0
- proscore-0.1.0/src/proscore/transform/__init__.py +3 -0
- proscore-0.1.0/src/proscore/transform/_woe.py +255 -0
- proscore-0.1.0/src/proscore/utils/__init__.py +62 -0
- proscore-0.1.0/src/proscore/utils/_config.py +5 -0
- proscore-0.1.0/src/proscore/utils/_exceptions.py +14 -0
- proscore-0.1.0/src/proscore/utils/_presets.py +135 -0
- proscore-0.1.0/src/proscore/utils/_psi.py +49 -0
- proscore-0.1.0/src/proscore/viz/__init__.py +15 -0
- proscore-0.1.0/src/proscore/viz/_plots.py +269 -0
- proscore-0.1.0/src/proscore.egg-info/PKG-INFO +192 -0
- proscore-0.1.0/src/proscore.egg-info/SOURCES.txt +63 -0
- proscore-0.1.0/src/proscore.egg-info/dependency_links.txt +1 -0
- proscore-0.1.0/src/proscore.egg-info/entry_points.txt +2 -0
- proscore-0.1.0/src/proscore.egg-info/requires.txt +16 -0
- proscore-0.1.0/src/proscore.egg-info/top_level.txt +1 -0
- proscore-0.1.0/tests/test_binning.py +149 -0
- proscore-0.1.0/tests/test_docs_examples.py +212 -0
- proscore-0.1.0/tests/test_evaluate.py +85 -0
- proscore-0.1.0/tests/test_evaluate_period.py +51 -0
- proscore-0.1.0/tests/test_filter.py +71 -0
- proscore-0.1.0/tests/test_inspect.py +151 -0
- proscore-0.1.0/tests/test_pipeline.py +109 -0
- proscore-0.1.0/tests/test_presets.py +104 -0
- proscore-0.1.0/tests/test_report.py +101 -0
- proscore-0.1.0/tests/test_scorecard.py +80 -0
- proscore-0.1.0/tests/test_screen.py +48 -0
- proscore-0.1.0/tests/test_stepwise.py +137 -0
- proscore-0.1.0/tests/test_transform.py +71 -0
- proscore-0.1.0/tests/test_woe.py +44 -0
proscore-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 ProScore contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
proscore-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: proscore
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Production-grade scorecard development toolkit
|
|
5
|
+
Author: Liqiwei
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/lqwzlh/proscore
|
|
8
|
+
Project-URL: Repository, https://github.com/lqwzlh/proscore
|
|
9
|
+
Project-URL: Issues, https://github.com/lqwzlh/proscore/issues
|
|
10
|
+
Keywords: scorecard,credit-risk,woe,binning,credit-scoring,risk-modeling,financial,banking,logistic-regression
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Financial and Insurance Industry
|
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
21
|
+
Requires-Python: >=3.9
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
License-File: LICENSE
|
|
24
|
+
Requires-Dist: numpy>=1.20
|
|
25
|
+
Requires-Dist: pandas>=1.5
|
|
26
|
+
Requires-Dist: scikit-learn>=1.2
|
|
27
|
+
Requires-Dist: statsmodels>=0.13
|
|
28
|
+
Provides-Extra: inspect-ml
|
|
29
|
+
Requires-Dist: xgboost>=2.0; extra == "inspect-ml"
|
|
30
|
+
Requires-Dist: lightgbm>=4.0; extra == "inspect-ml"
|
|
31
|
+
Provides-Extra: excel
|
|
32
|
+
Requires-Dist: openpyxl>=3.0; extra == "excel"
|
|
33
|
+
Provides-Extra: dev
|
|
34
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
35
|
+
Requires-Dist: pytest-cov; extra == "dev"
|
|
36
|
+
Requires-Dist: ruff; extra == "dev"
|
|
37
|
+
Dynamic: license-file
|
|
38
|
+
|
|
39
|
+
# ProScore
|
|
40
|
+
|
|
41
|
+
[](https://pypi.org/project/proscore/)
|
|
42
|
+
[](https://pypi.org/project/proscore/)
|
|
43
|
+
[](https://opensource.org/licenses/MIT)
|
|
44
|
+
|
|
45
|
+
**生产级评分卡开发工具包**
|
|
46
|
+
端到端的确定性评分卡建模管线,为银行和金融机构的信用评分卡建模场景设计, 满足对可解释性、合规性和稳定性的要求。
|
|
47
|
+
|
|
48
|
+
---
|
|
49
|
+
|
|
50
|
+
## 目录
|
|
51
|
+
|
|
52
|
+
- [三种使用方式](#三种使用方式)
|
|
53
|
+
- [核心功能概览](#核心功能概览)
|
|
54
|
+
- [安装](#安装)
|
|
55
|
+
- [依赖](#依赖)
|
|
56
|
+
- [License](#license)
|
|
57
|
+
|
|
58
|
+
---
|
|
59
|
+
|
|
60
|
+
## 三种使用方式
|
|
61
|
+
|
|
62
|
+
ProScore 提供三种递进的使用方式,从零代码到完全自定义,按需选择。
|
|
63
|
+
|
|
64
|
+
| 方式 | 适合 | 门槛 |
|
|
65
|
+
|------|------|------|
|
|
66
|
+
| [A. 模块独立使用](#a-模块独立使用) | 需要自定义逻辑、部分环节手动干预 | 熟悉 Python |
|
|
67
|
+
| [B. 链式 API](#b-链式-api) | 标准建模流程,一行到底 | 会写 Python |
|
|
68
|
+
| [C. Excel 配置驱动](#c-excel-配置驱动) | 业务人员零代码,一套 Excel 跑到底 | 会填 Excel |
|
|
69
|
+
|
|
70
|
+
### A. 模块独立使用
|
|
71
|
+
|
|
72
|
+
每个模块可单独 `import`,适合在任意环节插入自定义逻辑。
|
|
73
|
+
|
|
74
|
+
```python
|
|
75
|
+
from proscore.inspect import detect, quality
|
|
76
|
+
from proscore.selection import Filter
|
|
77
|
+
from proscore.binning import Binning
|
|
78
|
+
from proscore.transform import WOETransformer
|
|
79
|
+
# ... 按需组合
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
详见 [docs/使用指南/](docs/使用指南/index.md) 各模块手册。
|
|
83
|
+
|
|
84
|
+
### B. 链式 API
|
|
85
|
+
|
|
86
|
+
标准建模流程,数据切分与模型边界严格分离(Train/Test/OOT)。
|
|
87
|
+
|
|
88
|
+
```python
|
|
89
|
+
import proscore as ps
|
|
90
|
+
|
|
91
|
+
p = (
|
|
92
|
+
ps.ProScore()
|
|
93
|
+
.read(train=df_train, test=df_test, oot=df_oot, target="bad_flag")
|
|
94
|
+
.detect()
|
|
95
|
+
.prefilter()
|
|
96
|
+
.bin(method="chi", n_bins=5)
|
|
97
|
+
.refine(iv_range=(0.02, None))
|
|
98
|
+
.transform()
|
|
99
|
+
.select(method="stepwise")
|
|
100
|
+
.fit(odds=20, pdo=20, base_score=600)
|
|
101
|
+
.scorecard()
|
|
102
|
+
.evaluate() # 自动汇报 train / test / oot 三列指标
|
|
103
|
+
)
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
> `train` 必传,`test` 和 `oot` 可选。分箱/WOE 只在 train 上拟合;逐步回归用 test 监控过拟合;OOT 仅用于最终评估。
|
|
107
|
+
>
|
|
108
|
+
> 完整教程见 [notebooks/ProScore完整建模流程.ipynb](notebooks/ProScore完整建模流程.ipynb)
|
|
109
|
+
|
|
110
|
+
### C. Excel 配置驱动
|
|
111
|
+
|
|
112
|
+
拿模板填参数,一行命令跑通全流程。**不需要写一行代码。**
|
|
113
|
+
|
|
114
|
+
```bash
|
|
115
|
+
# 1. 获取空白配置模板(二选一)
|
|
116
|
+
cp examples/pipeline_template.xlsx ./my_project/ # 克隆仓库后可直接复制
|
|
117
|
+
# 或: proscore template ./my_project/
|
|
118
|
+
|
|
119
|
+
# 2. 打开 Excel,填 data_file、target、time_col 等参数
|
|
120
|
+
|
|
121
|
+
# 3. 运行
|
|
122
|
+
proscore run my_project/pipeline_template.xlsx
|
|
123
|
+
|
|
124
|
+
# 可选:导出等效 Python 脚本
|
|
125
|
+
proscore run my_project/pipeline_template.xlsx --output-script run.py
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
模板含 7 个 Sheet(Global / Data / Steps / Binning / Screening / Modeling / Variables),每个参数带中文说明、可选范围和默认值。留空 = 使用默认值。无 OOT 时最少只需填 `data_file`、`target`、`time_col` 3 个格子;有 OOT 时再补充时间切分参数。
|
|
129
|
+
|
|
130
|
+
详细参数说明见 [docs/使用指南/pipeline-config.md](docs/使用指南/pipeline-config.md)
|
|
131
|
+
|
|
132
|
+
---
|
|
133
|
+
|
|
134
|
+
## 核心功能概览
|
|
135
|
+
|
|
136
|
+
| 模块 | 核心能力 | 业务价值 |
|
|
137
|
+
|------------|-----------------------------------------------|---------------------------------------|
|
|
138
|
+
| 数据探查 | IV/AUC/KS 三指标 + PSI 时序稳定性 + 相关性/VIF | 快速筛选优质变量,识别分布漂移风险 |
|
|
139
|
+
| 分箱 | 4 种单调趋势 + 5 种分箱方法 + 两阶段趋势校验 | 确保 WOE 趋势符合业务逻辑,满足监管 |
|
|
140
|
+
| 逐步回归 | 双向选择 + 五重约束(p值/符号/VIF/相关/来源) | 严谨的多重共线性控制与维度归属管理 |
|
|
141
|
+
| 模型监控 | Score/Feature PSI + 规则引擎告警 + JSON 持久化 | 投产后持续验证,自动风险预警 |
|
|
142
|
+
| 报告生成 | 7 章自动 Markdown 报告(含图表) | 银保监合规文档一键生成 |
|
|
143
|
+
|
|
144
|
+
### 设计原则
|
|
145
|
+
|
|
146
|
+
- **确定性**:相同输入 → 相同输出,不依赖随机优化器。
|
|
147
|
+
- **sklearn 风格**:统一 `fit()` / `transform()` 接口。
|
|
148
|
+
- **生产就绪**:内置 unseen 处理、inf 容错、分箱序列化。
|
|
149
|
+
- **轻量核心**:仅 numpy/pandas/scikit-learn/statsmodels,XGBoost/LightGBM 为可选依赖。
|
|
150
|
+
|
|
151
|
+
---
|
|
152
|
+
|
|
153
|
+
## 安装
|
|
154
|
+
|
|
155
|
+
核心依赖仅需 numpy、pandas、scikit-learn、statsmodels,无重依赖:
|
|
156
|
+
|
|
157
|
+
```bash
|
|
158
|
+
pip install proscore
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
如需使用 XGBoost 或 LightGBM 作为变量质量评估的备选估计器(在 `inspect.quality()` 中启用 `estimator="xgb"` 或 `estimator="lgb"`),安装对应的可选依赖组:
|
|
162
|
+
|
|
163
|
+
```bash
|
|
164
|
+
pip install proscore[inspect-ml]
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
> 为什么是可选?XGBoost 和 LightGBM 体积较大,且涉及平台相关的编译依赖。绝大多数场景下,默认的逻辑回归估计器已经足够可靠。仅在需要用树模型对变量进行非线性排序时才需安装。
|
|
168
|
+
>
|
|
169
|
+
> 后续 AI / LLM 相关功能同样会以可选依赖组方式发布(如 `proscore[ai]`),不强制安装,不拖累核心包体积。
|
|
170
|
+
|
|
171
|
+
---
|
|
172
|
+
|
|
173
|
+
## 依赖
|
|
174
|
+
|
|
175
|
+
- Python >= 3.9
|
|
176
|
+
- numpy >= 1.20
|
|
177
|
+
- pandas >= 1.5
|
|
178
|
+
- scikit-learn >= 1.2
|
|
179
|
+
- statsmodels >= 0.13
|
|
180
|
+
|
|
181
|
+
**可选依赖**:
|
|
182
|
+
|
|
183
|
+
| 依赖组 | 安装命令 | 用途 |
|
|
184
|
+
|--------|---------|------|
|
|
185
|
+
| `inspect-ml` | `pip install proscore[inspect-ml]` | XGBoost / LightGBM 用于变量质量评估 |
|
|
186
|
+
| `excel` | `pip install proscore[excel]` | openpyxl,用于 `proscore run` 和 `load_presets()` |
|
|
187
|
+
|
|
188
|
+
> `proscore run` 命令由 `[project.scripts]` 注册,安装后即可使用。
|
|
189
|
+
|
|
190
|
+
## License
|
|
191
|
+
|
|
192
|
+
MIT
|
proscore-0.1.0/README.md
ADDED
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
# ProScore
|
|
2
|
+
|
|
3
|
+
[](https://pypi.org/project/proscore/)
|
|
4
|
+
[](https://pypi.org/project/proscore/)
|
|
5
|
+
[](https://opensource.org/licenses/MIT)
|
|
6
|
+
|
|
7
|
+
**生产级评分卡开发工具包**
|
|
8
|
+
端到端的确定性评分卡建模管线,为银行和金融机构的信用评分卡建模场景设计, 满足对可解释性、合规性和稳定性的要求。
|
|
9
|
+
|
|
10
|
+
---
|
|
11
|
+
|
|
12
|
+
## 目录
|
|
13
|
+
|
|
14
|
+
- [三种使用方式](#三种使用方式)
|
|
15
|
+
- [核心功能概览](#核心功能概览)
|
|
16
|
+
- [安装](#安装)
|
|
17
|
+
- [依赖](#依赖)
|
|
18
|
+
- [License](#license)
|
|
19
|
+
|
|
20
|
+
---
|
|
21
|
+
|
|
22
|
+
## 三种使用方式
|
|
23
|
+
|
|
24
|
+
ProScore 提供三种递进的使用方式,从零代码到完全自定义,按需选择。
|
|
25
|
+
|
|
26
|
+
| 方式 | 适合 | 门槛 |
|
|
27
|
+
|------|------|------|
|
|
28
|
+
| [A. 模块独立使用](#a-模块独立使用) | 需要自定义逻辑、部分环节手动干预 | 熟悉 Python |
|
|
29
|
+
| [B. 链式 API](#b-链式-api) | 标准建模流程,一行到底 | 会写 Python |
|
|
30
|
+
| [C. Excel 配置驱动](#c-excel-配置驱动) | 业务人员零代码,一套 Excel 跑到底 | 会填 Excel |
|
|
31
|
+
|
|
32
|
+
### A. 模块独立使用
|
|
33
|
+
|
|
34
|
+
每个模块可单独 `import`,适合在任意环节插入自定义逻辑。
|
|
35
|
+
|
|
36
|
+
```python
|
|
37
|
+
from proscore.inspect import detect, quality
|
|
38
|
+
from proscore.selection import Filter
|
|
39
|
+
from proscore.binning import Binning
|
|
40
|
+
from proscore.transform import WOETransformer
|
|
41
|
+
# ... 按需组合
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
详见 [docs/使用指南/](docs/使用指南/index.md) 各模块手册。
|
|
45
|
+
|
|
46
|
+
### B. 链式 API
|
|
47
|
+
|
|
48
|
+
标准建模流程,数据切分与模型边界严格分离(Train/Test/OOT)。
|
|
49
|
+
|
|
50
|
+
```python
|
|
51
|
+
import proscore as ps
|
|
52
|
+
|
|
53
|
+
p = (
|
|
54
|
+
ps.ProScore()
|
|
55
|
+
.read(train=df_train, test=df_test, oot=df_oot, target="bad_flag")
|
|
56
|
+
.detect()
|
|
57
|
+
.prefilter()
|
|
58
|
+
.bin(method="chi", n_bins=5)
|
|
59
|
+
.refine(iv_range=(0.02, None))
|
|
60
|
+
.transform()
|
|
61
|
+
.select(method="stepwise")
|
|
62
|
+
.fit(odds=20, pdo=20, base_score=600)
|
|
63
|
+
.scorecard()
|
|
64
|
+
.evaluate() # 自动汇报 train / test / oot 三列指标
|
|
65
|
+
)
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
> `train` 必传,`test` 和 `oot` 可选。分箱/WOE 只在 train 上拟合;逐步回归用 test 监控过拟合;OOT 仅用于最终评估。
|
|
69
|
+
>
|
|
70
|
+
> 完整教程见 [notebooks/ProScore完整建模流程.ipynb](notebooks/ProScore完整建模流程.ipynb)
|
|
71
|
+
|
|
72
|
+
### C. Excel 配置驱动
|
|
73
|
+
|
|
74
|
+
拿模板填参数,一行命令跑通全流程。**不需要写一行代码。**
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
# 1. 获取空白配置模板(二选一)
|
|
78
|
+
cp examples/pipeline_template.xlsx ./my_project/ # 克隆仓库后可直接复制
|
|
79
|
+
# 或: proscore template ./my_project/
|
|
80
|
+
|
|
81
|
+
# 2. 打开 Excel,填 data_file、target、time_col 等参数
|
|
82
|
+
|
|
83
|
+
# 3. 运行
|
|
84
|
+
proscore run my_project/pipeline_template.xlsx
|
|
85
|
+
|
|
86
|
+
# 可选:导出等效 Python 脚本
|
|
87
|
+
proscore run my_project/pipeline_template.xlsx --output-script run.py
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
模板含 7 个 Sheet(Global / Data / Steps / Binning / Screening / Modeling / Variables),每个参数带中文说明、可选范围和默认值。留空 = 使用默认值。无 OOT 时最少只需填 `data_file`、`target`、`time_col` 3 个格子;有 OOT 时再补充时间切分参数。
|
|
91
|
+
|
|
92
|
+
详细参数说明见 [docs/使用指南/pipeline-config.md](docs/使用指南/pipeline-config.md)
|
|
93
|
+
|
|
94
|
+
---
|
|
95
|
+
|
|
96
|
+
## 核心功能概览
|
|
97
|
+
|
|
98
|
+
| 模块 | 核心能力 | 业务价值 |
|
|
99
|
+
|------------|-----------------------------------------------|---------------------------------------|
|
|
100
|
+
| 数据探查 | IV/AUC/KS 三指标 + PSI 时序稳定性 + 相关性/VIF | 快速筛选优质变量,识别分布漂移风险 |
|
|
101
|
+
| 分箱 | 4 种单调趋势 + 5 种分箱方法 + 两阶段趋势校验 | 确保 WOE 趋势符合业务逻辑,满足监管 |
|
|
102
|
+
| 逐步回归 | 双向选择 + 五重约束(p值/符号/VIF/相关/来源) | 严谨的多重共线性控制与维度归属管理 |
|
|
103
|
+
| 模型监控 | Score/Feature PSI + 规则引擎告警 + JSON 持久化 | 投产后持续验证,自动风险预警 |
|
|
104
|
+
| 报告生成 | 7 章自动 Markdown 报告(含图表) | 银保监合规文档一键生成 |
|
|
105
|
+
|
|
106
|
+
### 设计原则
|
|
107
|
+
|
|
108
|
+
- **确定性**:相同输入 → 相同输出,不依赖随机优化器。
|
|
109
|
+
- **sklearn 风格**:统一 `fit()` / `transform()` 接口。
|
|
110
|
+
- **生产就绪**:内置 unseen 处理、inf 容错、分箱序列化。
|
|
111
|
+
- **轻量核心**:仅 numpy/pandas/scikit-learn/statsmodels,XGBoost/LightGBM 为可选依赖。
|
|
112
|
+
|
|
113
|
+
---
|
|
114
|
+
|
|
115
|
+
## 安装
|
|
116
|
+
|
|
117
|
+
核心依赖仅需 numpy、pandas、scikit-learn、statsmodels,无重依赖:
|
|
118
|
+
|
|
119
|
+
```bash
|
|
120
|
+
pip install proscore
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
如需使用 XGBoost 或 LightGBM 作为变量质量评估的备选估计器(在 `inspect.quality()` 中启用 `estimator="xgb"` 或 `estimator="lgb"`),安装对应的可选依赖组:
|
|
124
|
+
|
|
125
|
+
```bash
|
|
126
|
+
pip install proscore[inspect-ml]
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
> 为什么是可选?XGBoost 和 LightGBM 体积较大,且涉及平台相关的编译依赖。绝大多数场景下,默认的逻辑回归估计器已经足够可靠。仅在需要用树模型对变量进行非线性排序时才需安装。
|
|
130
|
+
>
|
|
131
|
+
> 后续 AI / LLM 相关功能同样会以可选依赖组方式发布(如 `proscore[ai]`),不强制安装,不拖累核心包体积。
|
|
132
|
+
|
|
133
|
+
---
|
|
134
|
+
|
|
135
|
+
## 依赖
|
|
136
|
+
|
|
137
|
+
- Python >= 3.9
|
|
138
|
+
- numpy >= 1.20
|
|
139
|
+
- pandas >= 1.5
|
|
140
|
+
- scikit-learn >= 1.2
|
|
141
|
+
- statsmodels >= 0.13
|
|
142
|
+
|
|
143
|
+
**可选依赖**:
|
|
144
|
+
|
|
145
|
+
| 依赖组 | 安装命令 | 用途 |
|
|
146
|
+
|--------|---------|------|
|
|
147
|
+
| `inspect-ml` | `pip install proscore[inspect-ml]` | XGBoost / LightGBM 用于变量质量评估 |
|
|
148
|
+
| `excel` | `pip install proscore[excel]` | openpyxl,用于 `proscore run` 和 `load_presets()` |
|
|
149
|
+
|
|
150
|
+
> `proscore run` 命令由 `[project.scripts]` 注册,安装后即可使用。
|
|
151
|
+
|
|
152
|
+
## License
|
|
153
|
+
|
|
154
|
+
MIT
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "proscore"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Production-grade scorecard development toolkit"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = "MIT"
|
|
11
|
+
license-files = ["LICENSE"]
|
|
12
|
+
authors = [
|
|
13
|
+
{name = "Liqiwei"},
|
|
14
|
+
]
|
|
15
|
+
classifiers = [
|
|
16
|
+
"Development Status :: 3 - Alpha",
|
|
17
|
+
"Intended Audience :: Financial and Insurance Industry",
|
|
18
|
+
"Intended Audience :: Science/Research",
|
|
19
|
+
"Programming Language :: Python :: 3",
|
|
20
|
+
"Programming Language :: Python :: 3.9",
|
|
21
|
+
"Programming Language :: Python :: 3.10",
|
|
22
|
+
"Programming Language :: Python :: 3.11",
|
|
23
|
+
"Programming Language :: Python :: 3.12",
|
|
24
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
25
|
+
"Topic :: Scientific/Engineering :: Information Analysis",
|
|
26
|
+
]
|
|
27
|
+
keywords = [
|
|
28
|
+
"scorecard",
|
|
29
|
+
"credit-risk",
|
|
30
|
+
"woe",
|
|
31
|
+
"binning",
|
|
32
|
+
"credit-scoring",
|
|
33
|
+
"risk-modeling",
|
|
34
|
+
"financial",
|
|
35
|
+
"banking",
|
|
36
|
+
"logistic-regression",
|
|
37
|
+
]
|
|
38
|
+
requires-python = ">=3.9"
|
|
39
|
+
dependencies = [
|
|
40
|
+
"numpy>=1.20",
|
|
41
|
+
"pandas>=1.5",
|
|
42
|
+
"scikit-learn>=1.2",
|
|
43
|
+
"statsmodels>=0.13",
|
|
44
|
+
]
|
|
45
|
+
|
|
46
|
+
[project.urls]
|
|
47
|
+
Homepage = "https://github.com/lqwzlh/proscore"
|
|
48
|
+
Repository = "https://github.com/lqwzlh/proscore"
|
|
49
|
+
Issues = "https://github.com/lqwzlh/proscore/issues"
|
|
50
|
+
|
|
51
|
+
[project.scripts]
|
|
52
|
+
proscore = "proscore.__main__:main"
|
|
53
|
+
|
|
54
|
+
[project.optional-dependencies]
|
|
55
|
+
inspect-ml = [
|
|
56
|
+
"xgboost>=2.0",
|
|
57
|
+
"lightgbm>=4.0",
|
|
58
|
+
]
|
|
59
|
+
excel = [
|
|
60
|
+
"openpyxl>=3.0",
|
|
61
|
+
]
|
|
62
|
+
dev = [
|
|
63
|
+
"pytest>=7.0",
|
|
64
|
+
"pytest-cov",
|
|
65
|
+
"ruff",
|
|
66
|
+
]
|
|
67
|
+
|
|
68
|
+
[tool.setuptools.packages.find]
|
|
69
|
+
where = ["src"]
|
|
70
|
+
|
|
71
|
+
[tool.pytest.ini_options]
|
|
72
|
+
testpaths = ["tests"]
|
|
73
|
+
python_files = ["test_*.py"]
|
|
74
|
+
addopts = "-v --tb=short"
|
|
75
|
+
|
|
76
|
+
[tool.ruff]
|
|
77
|
+
target-version = "py39"
|
|
78
|
+
line-length = 100
|
|
79
|
+
|
|
80
|
+
[tool.ruff.lint]
|
|
81
|
+
select = ["E", "F", "W", "I"]
|
proscore-0.1.0/setup.cfg
ADDED