predikit 0.4.0__tar.gz → 0.4.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- predikit-0.4.1/.github/ISSUE_TEMPLATE/bug_report.md +38 -0
- predikit-0.4.1/.github/ISSUE_TEMPLATE/feature_request.md +20 -0
- {predikit-0.4.0 → predikit-0.4.1}/.github/workflows/publish.yml +4 -0
- {predikit-0.4.0 → predikit-0.4.1}/.github/workflows/test.yml +24 -0
- predikit-0.4.1/.pre-commit-config.yaml +15 -0
- predikit-0.4.1/CHANGELOG.md +39 -0
- predikit-0.4.1/CLAUDE.md +71 -0
- predikit-0.4.1/CONTRIBUTING.md +66 -0
- {predikit-0.4.0 → predikit-0.4.1}/PKG-INFO +27 -7
- {predikit-0.4.0 → predikit-0.4.1}/README.md +23 -6
- {predikit-0.4.0 → predikit-0.4.1}/examples/03_orlando_real_estate.py +2 -2
- {predikit-0.4.0 → predikit-0.4.1}/pyproject.toml +21 -1
- {predikit-0.4.0 → predikit-0.4.1}/src/predikit/__init__.py +3 -3
- {predikit-0.4.0 → predikit-0.4.1}/src/predikit/cli.py +19 -5
- {predikit-0.4.0 → predikit-0.4.1}/src/predikit/coerce.py +4 -2
- {predikit-0.4.0 → predikit-0.4.1}/src/predikit/ensemble.py +9 -6
- {predikit-0.4.0 → predikit-0.4.1}/src/predikit/exporters/langchain.py +5 -4
- {predikit-0.4.0 → predikit-0.4.1}/src/predikit/exporters/openai.py +1 -0
- {predikit-0.4.0 → predikit-0.4.1}/src/predikit/introspect.py +1 -3
- {predikit-0.4.0 → predikit-0.4.1}/src/predikit/loaders/mlflow.py +5 -3
- {predikit-0.4.0 → predikit-0.4.1}/src/predikit/loaders/snowflake.py +8 -4
- {predikit-0.4.0 → predikit-0.4.1}/src/predikit/registry.py +8 -9
- {predikit-0.4.0 → predikit-0.4.1}/src/predikit/tool.py +14 -7
- {predikit-0.4.0 → predikit-0.4.1}/tests/test_cli.py +8 -7
- {predikit-0.4.0 → predikit-0.4.1}/tests/test_confidence.py +23 -8
- {predikit-0.4.0 → predikit-0.4.1}/tests/test_ensemble.py +52 -11
- {predikit-0.4.0 → predikit-0.4.1}/tests/test_loaders_mlflow.py +23 -11
- {predikit-0.4.0 → predikit-0.4.1}/tests/test_loaders_snowflake.py +8 -1
- {predikit-0.4.0 → predikit-0.4.1}/tests/test_logging.py +12 -9
- {predikit-0.4.0 → predikit-0.4.1}/tests/test_tool.py +1 -0
- {predikit-0.4.0 → predikit-0.4.1}/tests/test_weighted_ensemble.py +57 -16
- {predikit-0.4.0 → predikit-0.4.1}/.claude/settings.local.json +0 -0
- {predikit-0.4.0 → predikit-0.4.1}/.gitignore +0 -0
- {predikit-0.4.0 → predikit-0.4.1}/LICENSE +0 -0
- {predikit-0.4.0 → predikit-0.4.1}/examples/01_basic_sklearn.py +0 -0
- {predikit-0.4.0 → predikit-0.4.1}/examples/02_xgboost_regression.py +0 -0
- {predikit-0.4.0 → predikit-0.4.1}/examples/04_confidence_routing.py +0 -0
- {predikit-0.4.0 → predikit-0.4.1}/examples/05_multi_model_ensemble.py +0 -0
- {predikit-0.4.0 → predikit-0.4.1}/examples/06_mlflow_loader.py +0 -0
- {predikit-0.4.0 → predikit-0.4.1}/examples/07_snowflake_loader.py +0 -0
- {predikit-0.4.0 → predikit-0.4.1}/src/predikit/exceptions.py +0 -0
- {predikit-0.4.0 → predikit-0.4.1}/src/predikit/exporters/__init__.py +0 -0
- {predikit-0.4.0 → predikit-0.4.1}/src/predikit/loaders/__init__.py +0 -0
- {predikit-0.4.0 → predikit-0.4.1}/tests/__init__.py +0 -0
- {predikit-0.4.0 → predikit-0.4.1}/tests/test_coerce.py +0 -0
- {predikit-0.4.0 → predikit-0.4.1}/tests/test_exporters_openai.py +0 -0
- {predikit-0.4.0 → predikit-0.4.1}/tests/test_introspect.py +0 -0
- {predikit-0.4.0 → predikit-0.4.1}/tests/test_registry.py +0 -0
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: Bug report
|
|
3
|
+
about: Create a report to help us improve
|
|
4
|
+
title: ''
|
|
5
|
+
labels: ''
|
|
6
|
+
assignees: ''
|
|
7
|
+
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
**Describe the bug**
|
|
11
|
+
A clear and concise description of what the bug is.
|
|
12
|
+
|
|
13
|
+
**To Reproduce**
|
|
14
|
+
Steps to reproduce the behavior:
|
|
15
|
+
1. Go to '...'
|
|
16
|
+
2. Click on '....'
|
|
17
|
+
3. Scroll down to '....'
|
|
18
|
+
4. See error
|
|
19
|
+
|
|
20
|
+
**Expected behavior**
|
|
21
|
+
A clear and concise description of what you expected to happen.
|
|
22
|
+
|
|
23
|
+
**Screenshots**
|
|
24
|
+
If applicable, add screenshots to help explain your problem.
|
|
25
|
+
|
|
26
|
+
**Desktop (please complete the following information):**
|
|
27
|
+
- OS: [e.g. iOS]
|
|
28
|
+
- Browser [e.g. chrome, safari]
|
|
29
|
+
- Version [e.g. 22]
|
|
30
|
+
|
|
31
|
+
**Smartphone (please complete the following information):**
|
|
32
|
+
- Device: [e.g. iPhone6]
|
|
33
|
+
- OS: [e.g. iOS8.1]
|
|
34
|
+
- Browser [e.g. stock browser, safari]
|
|
35
|
+
- Version [e.g. 22]
|
|
36
|
+
|
|
37
|
+
**Additional context**
|
|
38
|
+
Add any other context about the problem here.
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: Feature request
|
|
3
|
+
about: Suggest an idea for this project
|
|
4
|
+
title: ''
|
|
5
|
+
labels: ''
|
|
6
|
+
assignees: ''
|
|
7
|
+
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
**Is your feature request related to a problem? Please describe.**
|
|
11
|
+
A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
|
|
12
|
+
|
|
13
|
+
**Describe the solution you'd like**
|
|
14
|
+
A clear and concise description of what you want to happen.
|
|
15
|
+
|
|
16
|
+
**Describe alternatives you've considered**
|
|
17
|
+
A clear and concise description of any alternative solutions or features you've considered.
|
|
18
|
+
|
|
19
|
+
**Additional context**
|
|
20
|
+
Add any other context or screenshots about the feature request here.
|
|
@@ -6,7 +6,31 @@ on:
|
|
|
6
6
|
pull_request:
|
|
7
7
|
branches: [main]
|
|
8
8
|
|
|
9
|
+
permissions:
|
|
10
|
+
contents: read
|
|
11
|
+
|
|
9
12
|
jobs:
|
|
13
|
+
lint:
|
|
14
|
+
runs-on: ubuntu-latest
|
|
15
|
+
steps:
|
|
16
|
+
- uses: actions/checkout@v4
|
|
17
|
+
|
|
18
|
+
- uses: actions/setup-python@v5
|
|
19
|
+
with:
|
|
20
|
+
python-version: "3.12"
|
|
21
|
+
|
|
22
|
+
- name: Install dev dependencies
|
|
23
|
+
run: pip install -e ".[dev]"
|
|
24
|
+
|
|
25
|
+
- name: Ruff lint
|
|
26
|
+
run: ruff check src/ tests/
|
|
27
|
+
|
|
28
|
+
- name: Ruff format check
|
|
29
|
+
run: ruff format --check src/ tests/
|
|
30
|
+
|
|
31
|
+
- name: Mypy
|
|
32
|
+
run: mypy src/predikit
|
|
33
|
+
|
|
10
34
|
test:
|
|
11
35
|
runs-on: ubuntu-latest
|
|
12
36
|
strategy:
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
repos:
|
|
2
|
+
- repo: https://github.com/astral-sh/ruff-pre-commit
|
|
3
|
+
rev: v0.5.0
|
|
4
|
+
hooks:
|
|
5
|
+
- id: ruff
|
|
6
|
+
args: [--fix]
|
|
7
|
+
- id: ruff-format
|
|
8
|
+
|
|
9
|
+
- repo: https://github.com/pre-commit/mirrors-mypy
|
|
10
|
+
rev: v1.10.0
|
|
11
|
+
hooks:
|
|
12
|
+
- id: mypy
|
|
13
|
+
additional_dependencies:
|
|
14
|
+
- pydantic>=2.0
|
|
15
|
+
- numpy>=1.24
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
6
|
+
|
|
7
|
+
## [Unreleased]
|
|
8
|
+
|
|
9
|
+
## [0.4.1] - 2026-06-02
|
|
10
|
+
|
|
11
|
+
### Added
|
|
12
|
+
- `ruff` (lint + format) and `mypy` (type checking) configured in `pyproject.toml`
|
|
13
|
+
- `.pre-commit-config.yaml` — ruff and mypy hooks run automatically before every commit
|
|
14
|
+
- Lint CI job in GitHub Actions — runs `ruff check`, `ruff format --check`, and `mypy` on every push and PR
|
|
15
|
+
- `CONTRIBUTING.md` — development setup, code style, and PR guidelines
|
|
16
|
+
- `CHANGELOG.md`
|
|
17
|
+
- `CLAUDE.md` — project context for Claude Code
|
|
18
|
+
|
|
19
|
+
### Changed
|
|
20
|
+
- Bumped version to `0.4.1`
|
|
21
|
+
- Added `ruff>=0.4.0`, `mypy>=1.10`, and `pre-commit>=3.0` to `[dev]` extras
|
|
22
|
+
|
|
23
|
+
## [0.4.0] - 2026-05-01
|
|
24
|
+
|
|
25
|
+
### Added
|
|
26
|
+
- `ModelEnsemble` with `weighted_mean` and `weighted_vote` strategies
|
|
27
|
+
- `ainvoke()` async wrapper on `ModelTool` (runs blocking predict in a thread pool)
|
|
28
|
+
- Verbose logging via `verbose=True` on `ModelTool`
|
|
29
|
+
- Snowflake Model Registry loader (`from_snowflake`)
|
|
30
|
+
- MLflow Model Registry loader (`from_mlflow`)
|
|
31
|
+
- `predikit inspect` CLI command
|
|
32
|
+
|
|
33
|
+
### Changed
|
|
34
|
+
- Moved to `src/` layout with hatchling build backend
|
|
35
|
+
- Upgraded Pydantic dependency to v2
|
|
36
|
+
|
|
37
|
+
## [0.3.x] and earlier
|
|
38
|
+
|
|
39
|
+
See [GitHub Releases](https://github.com/Tejas-TA/predikit/releases) for earlier history.
|
predikit-0.4.1/CLAUDE.md
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# predikit — Claude Code context
|
|
2
|
+
|
|
3
|
+
## What this is
|
|
4
|
+
|
|
5
|
+
predikit wraps fitted scikit-learn / XGBoost models as LLM-callable tools with auto-generated JSON schemas, typed I/O via Pydantic, and first-class support for OpenAI function calling and LangChain.
|
|
6
|
+
|
|
7
|
+
## Project layout
|
|
8
|
+
|
|
9
|
+
```
|
|
10
|
+
src/predikit/ Core package (installed as `predikit`)
|
|
11
|
+
tool.py ModelTool — main public class
|
|
12
|
+
registry.py ToolRegistry — groups multiple tools for bulk export
|
|
13
|
+
ensemble.py ModelEnsemble — multi-model reconciliation strategies
|
|
14
|
+
exceptions.py LowConfidenceError
|
|
15
|
+
coerce.py LLM-string → Python type coercion (bool, int, float, str)
|
|
16
|
+
introspect.py Extract sklearn model metadata (feature names, task type)
|
|
17
|
+
cli.py `predikit inspect` CLI (click + joblib)
|
|
18
|
+
exporters/
|
|
19
|
+
openai.py .to_openai() — OpenAI function-calling schema
|
|
20
|
+
langchain.py .to_langchain() — LangChain StructuredTool
|
|
21
|
+
loaders/
|
|
22
|
+
mlflow.py from_mlflow() — load from MLflow Model Registry
|
|
23
|
+
snowflake.py from_snowflake() — load from Snowflake Model Registry
|
|
24
|
+
tests/ pytest suite (mirrors src/ layout)
|
|
25
|
+
examples/ Standalone usage scripts (01–07)
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## Commands
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
# Install for development
|
|
32
|
+
pip install -e ".[dev]"
|
|
33
|
+
|
|
34
|
+
# Run tests
|
|
35
|
+
pytest --cov=src/predikit --cov-report=term-missing
|
|
36
|
+
|
|
37
|
+
# Lint (check only)
|
|
38
|
+
ruff check src/ tests/
|
|
39
|
+
|
|
40
|
+
# Format (check only)
|
|
41
|
+
ruff format --check src/ tests/
|
|
42
|
+
|
|
43
|
+
# Lint + format (auto-fix)
|
|
44
|
+
ruff check --fix src/ tests/
|
|
45
|
+
ruff format src/ tests/
|
|
46
|
+
|
|
47
|
+
# Type check
|
|
48
|
+
mypy src/predikit
|
|
49
|
+
|
|
50
|
+
# All checks via pre-commit
|
|
51
|
+
pre-commit run --all-files
|
|
52
|
+
|
|
53
|
+
# Install pre-commit hooks
|
|
54
|
+
pre-commit install
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
## Key conventions
|
|
58
|
+
|
|
59
|
+
- **Public API** — `ModelTool`, `ToolRegistry`, `ModelEnsemble`, `LowConfidenceError` are all re-exported from `predikit/__init__.py`. Everything else is internal.
|
|
60
|
+
- **Optional imports** — heavy optional deps (`langchain`, `xgboost`, `mlflow`, `snowflake`) are imported inside functions/methods, never at module level, so missing extras don't break core imports.
|
|
61
|
+
- **Feature name matching** — inputs map to model features by column name (using `feature_names_in_` from sklearn), not by position. Mismatches raise a `ValueError` with a clear diff.
|
|
62
|
+
- **Bool coercion** — `coerce_value()` converts LLM strings (`"yes"`, `"true"`, `"1"`, `"on"`) to Python bools before Pydantic validation.
|
|
63
|
+
- **Async** — `ainvoke()` is a thin wrapper over `invoke()` via `loop.run_in_executor`. No async-native I/O involved.
|
|
64
|
+
- **Ensemble strategies** — `"collect"`, `"mean"`, `"vote"`, `"weighted_mean"`, `"weighted_vote"`. Weighted strategies take a `weights` list parallel to `tools`.
|
|
65
|
+
|
|
66
|
+
## Release process
|
|
67
|
+
|
|
68
|
+
1. Bump `version` in `pyproject.toml` and `__version__` in `src/predikit/__init__.py`.
|
|
69
|
+
2. Add a section to `CHANGELOG.md`.
|
|
70
|
+
3. Commit, push to `main`, create a GitHub Release with the version tag (e.g. `v0.4.1`).
|
|
71
|
+
4. `publish.yml` auto-triggers a PyPI upload via OIDC trusted publishing.
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# Contributing to predikit
|
|
2
|
+
|
|
3
|
+
Thanks for your interest in contributing.
|
|
4
|
+
|
|
5
|
+
## Prerequisites
|
|
6
|
+
|
|
7
|
+
- Python 3.10+
|
|
8
|
+
- [pre-commit](https://pre-commit.com/) (`pip install pre-commit`)
|
|
9
|
+
|
|
10
|
+
## Development setup
|
|
11
|
+
|
|
12
|
+
```bash
|
|
13
|
+
git clone https://github.com/Tejas-TA/predikit
|
|
14
|
+
cd predikit
|
|
15
|
+
pip install -e ".[dev]"
|
|
16
|
+
pre-commit install
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
`pre-commit install` wires up ruff and mypy to run automatically before every commit.
|
|
20
|
+
|
|
21
|
+
## Running tests
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
pytest --cov=src/predikit --cov-report=term-missing
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
## Code style
|
|
28
|
+
|
|
29
|
+
This project uses [ruff](https://docs.astral.sh/ruff/) for linting and formatting, and [mypy](https://mypy.readthedocs.io/) for type checking. All three are configured in `pyproject.toml`.
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
# Check and auto-fix
|
|
33
|
+
ruff check --fix src/ tests/
|
|
34
|
+
ruff format src/ tests/
|
|
35
|
+
|
|
36
|
+
# Type check
|
|
37
|
+
mypy src/predikit
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
Or run everything at once via pre-commit:
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
pre-commit run --all-files
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
CI enforces all three checks on every push and PR (see `.github/workflows/test.yml`).
|
|
47
|
+
|
|
48
|
+
## Pull request guidelines
|
|
49
|
+
|
|
50
|
+
- Target `main`.
|
|
51
|
+
- Keep changes focused — one feature or fix per PR.
|
|
52
|
+
- Add or update tests for any changed behaviour.
|
|
53
|
+
- Run `pre-commit run --all-files` before pushing; CI will catch failures regardless.
|
|
54
|
+
|
|
55
|
+
## Adding a new exporter or loader
|
|
56
|
+
|
|
57
|
+
- Exporters live in `src/predikit/exporters/` and should expose a single function consumed by `ModelTool`.
|
|
58
|
+
- Loaders live in `src/predikit/loaders/` and should return a `ModelTool` instance.
|
|
59
|
+
- Any new optional dependency must be added to both the relevant optional group and `dev` in `pyproject.toml`, and imported inside the function body (not at module level).
|
|
60
|
+
|
|
61
|
+
## Release process (maintainers)
|
|
62
|
+
|
|
63
|
+
1. Bump `version` in `pyproject.toml` and `__version__` in `src/predikit/__init__.py`.
|
|
64
|
+
2. Add a section to `CHANGELOG.md`.
|
|
65
|
+
3. Merge to `main`, create a GitHub Release tagged `vX.Y.Z`.
|
|
66
|
+
4. `publish.yml` automatically publishes to PyPI via OIDC trusted publishing.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: predikit
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.1
|
|
4
4
|
Summary: Turn any trained sklearn/XGBoost model into an LLM-callable tool with auto-generated schemas and typed I/O.
|
|
5
5
|
Project-URL: Homepage, https://github.com/Tejas-TA/predikit
|
|
6
6
|
Project-URL: Repository, https://github.com/Tejas-TA/predikit
|
|
@@ -31,9 +31,12 @@ Requires-Dist: click>=8.0; extra == 'dev'
|
|
|
31
31
|
Requires-Dist: joblib>=1.2; extra == 'dev'
|
|
32
32
|
Requires-Dist: langchain-core>=0.1; extra == 'dev'
|
|
33
33
|
Requires-Dist: mlflow>=2.0; extra == 'dev'
|
|
34
|
+
Requires-Dist: mypy>=1.10; extra == 'dev'
|
|
34
35
|
Requires-Dist: pandas>=1.5; extra == 'dev'
|
|
36
|
+
Requires-Dist: pre-commit>=3.0; extra == 'dev'
|
|
35
37
|
Requires-Dist: pytest-cov>=4.0; extra == 'dev'
|
|
36
38
|
Requires-Dist: pytest>=7.0; extra == 'dev'
|
|
39
|
+
Requires-Dist: ruff>=0.4.0; extra == 'dev'
|
|
37
40
|
Requires-Dist: xgboost>=1.7; extra == 'dev'
|
|
38
41
|
Provides-Extra: langchain
|
|
39
42
|
Requires-Dist: langchain-core>=0.1; extra == 'langchain'
|
|
@@ -49,9 +52,23 @@ Description-Content-Type: text/markdown
|
|
|
49
52
|
[](https://pypi.org/project/predikit/)
|
|
50
53
|
[](https://www.python.org/)
|
|
51
54
|
[](LICENSE)
|
|
55
|
+
[](https://github.com/Tejas-TA/predikit/actions/workflows/test.yml)
|
|
56
|
+
[](https://github.com/astral-sh/ruff)
|
|
52
57
|
|
|
58
|
+
### 📈 Project Traffic
|
|
59
|
+
Detailed breakdown of downloads by version, region, and platform:
|
|
53
60
|
|
|
54
|
-
|
|
61
|
+
[](https://pepy.tech/project/predikit)
|
|
62
|
+
|
|
63
|
+
## Table of Contents
|
|
64
|
+
- [Install](#install)
|
|
65
|
+
- [30-second example](#30-second-example)
|
|
66
|
+
- [Core API](#core-api)
|
|
67
|
+
- [Cookbook](#cookbook)
|
|
68
|
+
- [Contributing](#contributing)
|
|
69
|
+
- [License](#license)
|
|
70
|
+
|
|
71
|
+
## Turn any trained scikit-learn or XGBoost model into an LLM-callable tool — auto-generated JSON schemas, typed I/O, zero boilerplate.
|
|
55
72
|
|
|
56
73
|
```python
|
|
57
74
|
tool = ModelTool(model=clf, name="classify_iris", ...)
|
|
@@ -333,7 +350,7 @@ tool = from_snowflake(
|
|
|
333
350
|
model_name="VACATION_CHURN",
|
|
334
351
|
model_version="V3",
|
|
335
352
|
name="churn_risk",
|
|
336
|
-
description="
|
|
353
|
+
description="Churn classifier.",
|
|
337
354
|
input_schema=MemberInput,
|
|
338
355
|
output_name="churn_probability",
|
|
339
356
|
output_description="Churn probability 0–1",
|
|
@@ -351,10 +368,13 @@ See [`examples/03_orlando_real_estate.py`](examples/03_orlando_real_estate.py) f
|
|
|
351
368
|
|
|
352
369
|
Planned for later releases:
|
|
353
370
|
|
|
354
|
-
- HuggingFace / PyTorch / TensorFlow support
|
|
355
|
-
-
|
|
356
|
-
-
|
|
357
|
-
|
|
371
|
+
- HuggingFace / PyTorch / TensorFlow model support
|
|
372
|
+
- Streaming inference support
|
|
373
|
+
- OpenAI Assistants API integration
|
|
374
|
+
|
|
375
|
+
## Contributing
|
|
376
|
+
|
|
377
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md) for development setup, code style, and PR guidelines. The [CHANGELOG](CHANGELOG.md) tracks notable changes per release.
|
|
358
378
|
|
|
359
379
|
## License
|
|
360
380
|
|
|
@@ -2,9 +2,23 @@
|
|
|
2
2
|
[](https://pypi.org/project/predikit/)
|
|
3
3
|
[](https://www.python.org/)
|
|
4
4
|
[](LICENSE)
|
|
5
|
+
[](https://github.com/Tejas-TA/predikit/actions/workflows/test.yml)
|
|
6
|
+
[](https://github.com/astral-sh/ruff)
|
|
5
7
|
|
|
8
|
+
### 📈 Project Traffic
|
|
9
|
+
Detailed breakdown of downloads by version, region, and platform:
|
|
6
10
|
|
|
7
|
-
|
|
11
|
+
[](https://pepy.tech/project/predikit)
|
|
12
|
+
|
|
13
|
+
## Table of Contents
|
|
14
|
+
- [Install](#install)
|
|
15
|
+
- [30-second example](#30-second-example)
|
|
16
|
+
- [Core API](#core-api)
|
|
17
|
+
- [Cookbook](#cookbook)
|
|
18
|
+
- [Contributing](#contributing)
|
|
19
|
+
- [License](#license)
|
|
20
|
+
|
|
21
|
+
## Turn any trained scikit-learn or XGBoost model into an LLM-callable tool — auto-generated JSON schemas, typed I/O, zero boilerplate.
|
|
8
22
|
|
|
9
23
|
```python
|
|
10
24
|
tool = ModelTool(model=clf, name="classify_iris", ...)
|
|
@@ -286,7 +300,7 @@ tool = from_snowflake(
|
|
|
286
300
|
model_name="VACATION_CHURN",
|
|
287
301
|
model_version="V3",
|
|
288
302
|
name="churn_risk",
|
|
289
|
-
description="
|
|
303
|
+
description="Churn classifier.",
|
|
290
304
|
input_schema=MemberInput,
|
|
291
305
|
output_name="churn_probability",
|
|
292
306
|
output_description="Churn probability 0–1",
|
|
@@ -304,10 +318,13 @@ See [`examples/03_orlando_real_estate.py`](examples/03_orlando_real_estate.py) f
|
|
|
304
318
|
|
|
305
319
|
Planned for later releases:
|
|
306
320
|
|
|
307
|
-
- HuggingFace / PyTorch / TensorFlow support
|
|
308
|
-
-
|
|
309
|
-
-
|
|
310
|
-
|
|
321
|
+
- HuggingFace / PyTorch / TensorFlow model support
|
|
322
|
+
- Streaming inference support
|
|
323
|
+
- OpenAI Assistants API integration
|
|
324
|
+
|
|
325
|
+
## Contributing
|
|
326
|
+
|
|
327
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md) for development setup, code style, and PR guidelines. The [CHANGELOG](CHANGELOG.md) tracks notable changes per release.
|
|
311
328
|
|
|
312
329
|
## License
|
|
313
330
|
|
|
@@ -5,7 +5,7 @@ Trains an XGBoost model on synthetic Orlando-area housing data,
|
|
|
5
5
|
wraps it as an LLM-callable tool, and shows the full end-to-end flow:
|
|
6
6
|
schema generation → registry export → direct invocation → callable.
|
|
7
7
|
|
|
8
|
-
Requires: pip install
|
|
8
|
+
Requires: pip install xgboost
|
|
9
9
|
"""
|
|
10
10
|
import json
|
|
11
11
|
|
|
@@ -17,7 +17,7 @@ from sklearn.model_selection import train_test_split
|
|
|
17
17
|
try:
|
|
18
18
|
from xgboost import XGBRegressor
|
|
19
19
|
except ImportError:
|
|
20
|
-
raise SystemExit("XGBoost not installed. Run: pip install
|
|
20
|
+
raise SystemExit("XGBoost not installed. Run: pip install xgboost")
|
|
21
21
|
|
|
22
22
|
from predikit import ModelTool, ToolRegistry
|
|
23
23
|
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "predikit"
|
|
7
|
-
version = "0.4.
|
|
7
|
+
version = "0.4.1"
|
|
8
8
|
description = "Turn any trained sklearn/XGBoost model into an LLM-callable tool with auto-generated schemas and typed I/O."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = {text = "MIT"}
|
|
@@ -45,6 +45,9 @@ dev = [
|
|
|
45
45
|
"mlflow>=2.0",
|
|
46
46
|
"click>=8.0",
|
|
47
47
|
"joblib>=1.2",
|
|
48
|
+
"ruff>=0.4.0",
|
|
49
|
+
"mypy>=1.10",
|
|
50
|
+
"pre-commit>=3.0",
|
|
48
51
|
]
|
|
49
52
|
|
|
50
53
|
[project.scripts]
|
|
@@ -61,3 +64,20 @@ packages = ["src/predikit"]
|
|
|
61
64
|
|
|
62
65
|
[tool.pytest.ini_options]
|
|
63
66
|
testpaths = ["tests"]
|
|
67
|
+
|
|
68
|
+
[tool.ruff]
|
|
69
|
+
line-length = 100
|
|
70
|
+
target-version = "py310"
|
|
71
|
+
|
|
72
|
+
[tool.ruff.lint]
|
|
73
|
+
select = ["E", "F", "I", "UP", "B", "SIM"]
|
|
74
|
+
ignore = ["E501"]
|
|
75
|
+
|
|
76
|
+
[tool.ruff.lint.isort]
|
|
77
|
+
known-first-party = ["predikit"]
|
|
78
|
+
|
|
79
|
+
[tool.mypy]
|
|
80
|
+
python_version = "3.10"
|
|
81
|
+
warn_return_any = true
|
|
82
|
+
warn_unused_configs = true
|
|
83
|
+
ignore_missing_imports = true
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
from .tool import ModelTool
|
|
2
|
-
from .registry import ToolRegistry
|
|
3
1
|
from .ensemble import ModelEnsemble
|
|
4
2
|
from .exceptions import LowConfidenceError
|
|
3
|
+
from .registry import ToolRegistry
|
|
4
|
+
from .tool import ModelTool
|
|
5
5
|
|
|
6
6
|
__all__ = ["ModelTool", "ToolRegistry", "ModelEnsemble", "LowConfidenceError"]
|
|
7
|
-
__version__ = "0.4.
|
|
7
|
+
__version__ = "0.4.1"
|
|
@@ -1,31 +1,41 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
+
|
|
2
3
|
import json
|
|
3
4
|
import sys
|
|
4
5
|
|
|
5
6
|
try:
|
|
6
7
|
import click
|
|
8
|
+
|
|
7
9
|
_CLICK_AVAILABLE = True
|
|
8
10
|
except ImportError:
|
|
9
11
|
_CLICK_AVAILABLE = False
|
|
10
12
|
|
|
11
13
|
|
|
12
14
|
if _CLICK_AVAILABLE:
|
|
15
|
+
|
|
13
16
|
@click.group()
|
|
14
17
|
def cli() -> None:
|
|
15
18
|
"""predikit — ML model utilities for LLM agents."""
|
|
16
19
|
|
|
17
20
|
@cli.command()
|
|
18
21
|
@click.argument("model_path", type=click.Path(exists=True))
|
|
19
|
-
@click.option(
|
|
20
|
-
|
|
22
|
+
@click.option(
|
|
23
|
+
"--name", default="model", show_default=True, help="Tool name used in schema generation."
|
|
24
|
+
)
|
|
25
|
+
@click.option(
|
|
26
|
+
"--description", default="ML model prediction", show_default=True, help="Tool description."
|
|
27
|
+
)
|
|
21
28
|
def inspect(model_path: str, name: str, description: str) -> None:
|
|
22
29
|
"""Inspect a saved model file and print its metadata and OpenAI schema."""
|
|
23
30
|
try:
|
|
24
31
|
import joblib
|
|
25
|
-
except ImportError:
|
|
26
|
-
raise click.ClickException(
|
|
32
|
+
except ImportError as err:
|
|
33
|
+
raise click.ClickException(
|
|
34
|
+
"joblib is required. Install it with: pip install predikit[cli]"
|
|
35
|
+
) from err
|
|
27
36
|
|
|
28
37
|
from pydantic import create_model
|
|
38
|
+
|
|
29
39
|
from .introspect import introspect
|
|
30
40
|
from .tool import ModelTool
|
|
31
41
|
|
|
@@ -64,9 +74,13 @@ if _CLICK_AVAILABLE:
|
|
|
64
74
|
click.echo("\nOpenAI schema: unavailable (fit model with a named DataFrame to enable)")
|
|
65
75
|
|
|
66
76
|
else:
|
|
77
|
+
|
|
67
78
|
def cli() -> None: # type: ignore[misc]
|
|
68
79
|
"""Fallback when click is not installed."""
|
|
69
|
-
print(
|
|
80
|
+
print(
|
|
81
|
+
"Error: 'click' is required. Install it with: pip install predikit[cli]",
|
|
82
|
+
file=sys.stderr,
|
|
83
|
+
)
|
|
70
84
|
sys.exit(1)
|
|
71
85
|
|
|
72
86
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from typing import Any
|
|
2
|
-
from pydantic import BaseModel
|
|
3
2
|
|
|
3
|
+
from pydantic import BaseModel
|
|
4
4
|
|
|
5
5
|
_BOOL_TRUE = {"true", "1", "yes", "on"}
|
|
6
6
|
_BOOL_FALSE = {"false", "0", "no", "off"}
|
|
@@ -18,7 +18,9 @@ def coerce_value(value: Any, target_type: type) -> Any:
|
|
|
18
18
|
return True
|
|
19
19
|
if low in _BOOL_FALSE:
|
|
20
20
|
return False
|
|
21
|
-
raise ValueError(
|
|
21
|
+
raise ValueError(
|
|
22
|
+
f"Cannot interpret {value!r} as bool. Expected one of: true/false, yes/no, 1/0, on/off"
|
|
23
|
+
)
|
|
22
24
|
return bool(value)
|
|
23
25
|
|
|
24
26
|
if target_type is int:
|
|
@@ -1,11 +1,13 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
+
|
|
2
3
|
import asyncio
|
|
3
4
|
from collections import Counter
|
|
4
|
-
from
|
|
5
|
+
from collections.abc import Callable
|
|
6
|
+
from typing import Any
|
|
5
7
|
|
|
6
|
-
from .tool import ModelTool
|
|
7
|
-
from .exporters.openai import to_openai_schema
|
|
8
8
|
from .exporters.langchain import to_langchain_tool
|
|
9
|
+
from .exporters.openai import to_openai_schema
|
|
10
|
+
from .tool import ModelTool
|
|
9
11
|
|
|
10
12
|
_VALID_STRATEGIES = {"collect", "mean", "weighted_mean", "vote", "weighted_vote"}
|
|
11
13
|
|
|
@@ -71,14 +73,14 @@ class ModelEnsemble:
|
|
|
71
73
|
if self.strategy == "weighted_mean":
|
|
72
74
|
numeric = [float(v) for v in values]
|
|
73
75
|
total = sum(weights)
|
|
74
|
-
return {output_name: sum(w * v for w, v in zip(weights, numeric)) / total}
|
|
76
|
+
return {output_name: sum(w * v for w, v in zip(weights, numeric, strict=True)) / total}
|
|
75
77
|
|
|
76
78
|
if self.strategy == "vote":
|
|
77
79
|
return {output_name: Counter(values).most_common(1)[0][0]}
|
|
78
80
|
|
|
79
81
|
if self.strategy == "weighted_vote":
|
|
80
82
|
tally: dict[Any, float] = {}
|
|
81
|
-
for w, v in zip(weights, values):
|
|
83
|
+
for w, v in zip(weights, values, strict=True):
|
|
82
84
|
tally[v] = tally.get(v, 0.0) + w
|
|
83
85
|
return {output_name: max(tally, key=tally.__getitem__)}
|
|
84
86
|
|
|
@@ -88,12 +90,13 @@ class ModelEnsemble:
|
|
|
88
90
|
def to_openai(self) -> dict:
|
|
89
91
|
return to_openai_schema(self)
|
|
90
92
|
|
|
91
|
-
def to_langchain(self):
|
|
93
|
+
def to_langchain(self) -> Any:
|
|
92
94
|
return to_langchain_tool(self)
|
|
93
95
|
|
|
94
96
|
def to_callable(self) -> Callable[..., dict]:
|
|
95
97
|
def _fn(**kwargs) -> dict:
|
|
96
98
|
return self.invoke(kwargs)
|
|
99
|
+
|
|
97
100
|
_fn.__name__ = self.name
|
|
98
101
|
_fn.__doc__ = self.description
|
|
99
102
|
return _fn
|
|
@@ -1,18 +1,19 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
-
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, Any
|
|
3
4
|
|
|
4
5
|
if TYPE_CHECKING:
|
|
5
6
|
from ..tool import ModelTool
|
|
6
7
|
|
|
7
8
|
|
|
8
|
-
def to_langchain_tool(tool: ModelTool):
|
|
9
|
+
def to_langchain_tool(tool: ModelTool) -> Any:
|
|
9
10
|
"""Convert a ModelTool to a LangChain StructuredTool."""
|
|
10
11
|
try:
|
|
11
12
|
from langchain_core.tools import StructuredTool
|
|
12
|
-
except ImportError:
|
|
13
|
+
except ImportError as err:
|
|
13
14
|
raise ImportError(
|
|
14
15
|
"langchain-core is required. Install with: pip install predikit[langchain]"
|
|
15
|
-
)
|
|
16
|
+
) from err
|
|
16
17
|
|
|
17
18
|
def _run(**kwargs) -> dict:
|
|
18
19
|
return tool.invoke(kwargs)
|
|
@@ -8,9 +8,7 @@ def introspect(model: Any) -> dict:
|
|
|
8
8
|
meta["feature_names"] = (
|
|
9
9
|
list(model.feature_names_in_) if hasattr(model, "feature_names_in_") else None
|
|
10
10
|
)
|
|
11
|
-
meta["n_features"] = (
|
|
12
|
-
int(model.n_features_in_) if hasattr(model, "n_features_in_") else None
|
|
13
|
-
)
|
|
11
|
+
meta["n_features"] = int(model.n_features_in_) if hasattr(model, "n_features_in_") else None
|
|
14
12
|
|
|
15
13
|
if hasattr(model, "classes_"):
|
|
16
14
|
meta["task"] = "classification"
|