llm-io-normalizer 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llm_io_normalizer-0.1.0/LICENSE +21 -0
- llm_io_normalizer-0.1.0/PKG-INFO +190 -0
- llm_io_normalizer-0.1.0/README.md +161 -0
- llm_io_normalizer-0.1.0/pyproject.toml +69 -0
- llm_io_normalizer-0.1.0/setup.cfg +4 -0
- llm_io_normalizer-0.1.0/src/llm_io_normalizer/__init__.py +4 -0
- llm_io_normalizer-0.1.0/src/llm_io_normalizer/adapters/__init__.py +3 -0
- llm_io_normalizer-0.1.0/src/llm_io_normalizer/adapters/openai_compatible.py +326 -0
- llm_io_normalizer-0.1.0/src/llm_io_normalizer/errors.py +10 -0
- llm_io_normalizer-0.1.0/src/llm_io_normalizer/gateway.py +18 -0
- llm_io_normalizer-0.1.0/src/llm_io_normalizer/normalizers/__init__.py +10 -0
- llm_io_normalizer-0.1.0/src/llm_io_normalizer/normalizers/json_output.py +41 -0
- llm_io_normalizer-0.1.0/src/llm_io_normalizer/normalizers/reasoning.py +80 -0
- llm_io_normalizer-0.1.0/src/llm_io_normalizer/py.typed +0 -0
- llm_io_normalizer-0.1.0/src/llm_io_normalizer/schemas.py +90 -0
- llm_io_normalizer-0.1.0/src/llm_io_normalizer.egg-info/PKG-INFO +190 -0
- llm_io_normalizer-0.1.0/src/llm_io_normalizer.egg-info/SOURCES.txt +21 -0
- llm_io_normalizer-0.1.0/src/llm_io_normalizer.egg-info/dependency_links.txt +1 -0
- llm_io_normalizer-0.1.0/src/llm_io_normalizer.egg-info/requires.txt +7 -0
- llm_io_normalizer-0.1.0/src/llm_io_normalizer.egg-info/top_level.txt +1 -0
- llm_io_normalizer-0.1.0/tests/test_gateway_stream_extraction.py +17 -0
- llm_io_normalizer-0.1.0/tests/test_json_output.py +13 -0
- llm_io_normalizer-0.1.0/tests/test_reasoning_normalizer.py +23 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 llm-io-normalizer contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: llm-io-normalizer
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A lightweight Model I/O normalization layer for OpenAI-compatible LLM calls.
|
|
5
|
+
Author: llm-io-normalizer contributors
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/wanghesong2019/llm-io-normalizer
|
|
8
|
+
Project-URL: Repository, https://github.com/wanghesong2019/llm-io-normalizer
|
|
9
|
+
Project-URL: Issues, https://github.com/wanghesong2019/llm-io-normalizer/issues
|
|
10
|
+
Keywords: llm,model-io,openai-compatible,reasoning,streaming,normalization,judge-model,tested-model,structured-output
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
18
|
+
Classifier: Typing :: Typed
|
|
19
|
+
Requires-Python: >=3.10
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
License-File: LICENSE
|
|
22
|
+
Requires-Dist: openai>=1.0.0
|
|
23
|
+
Provides-Extra: dev
|
|
24
|
+
Requires-Dist: pytest>=8; extra == "dev"
|
|
25
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
|
|
26
|
+
Requires-Dist: ruff>=0.6; extra == "dev"
|
|
27
|
+
Requires-Dist: build>=1.2; extra == "dev"
|
|
28
|
+
Dynamic: license-file
|
|
29
|
+
|
|
30
|
+
# llm-io-normalizer
|
|
31
|
+
|
|
32
|
+
`llm-io-normalizer` is a lightweight **Model I/O normalization layer** for OpenAI-compatible LLM calls.
|
|
33
|
+
It is built for applications that call both **tested models** and **judge models** and need a stable result contract instead of provider-specific response parsing.
|
|
34
|
+
|
|
35
|
+
It normalizes common LLM response differences such as:
|
|
36
|
+
|
|
37
|
+
- `message.content` vs `message.reasoning`
|
|
38
|
+
- `delta.content` vs `delta.reasoning` / `delta.reasoning_content`
|
|
39
|
+
- `<think>...</think>` reasoning mixed into normal content
|
|
40
|
+
- stream vs non-stream completion behavior
|
|
41
|
+
- successful HTTP responses that contain reasoning but no final answer
|
|
42
|
+
- final JSON extraction from model output
|
|
43
|
+
|
|
44
|
+
The public contract is intentionally small and stable:
|
|
45
|
+
|
|
46
|
+
```python
|
|
47
|
+
result.answer_text
|
|
48
|
+
result.reasoning_text
|
|
49
|
+
result.ok
|
|
50
|
+
result.error_type
|
|
51
|
+
result.error_message
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
Business code should depend on these normalized fields instead of reading raw provider fields directly.
|
|
55
|
+
|
|
56
|
+
## Install
|
|
57
|
+
|
|
58
|
+
After publishing to PyPI:
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
pip install llm-io-normalizer
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
From a local checkout:
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
pip install -e .
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
For development:
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
pip install -e ".[dev]"
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
## Quick start
|
|
77
|
+
|
|
78
|
+
```python
|
|
79
|
+
import asyncio
|
|
80
|
+
|
|
81
|
+
from llm_io_normalizer import LLMGateway, LLMRequest, LLMRole
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
async def main() -> None:
|
|
85
|
+
gateway = LLMGateway()
|
|
86
|
+
|
|
87
|
+
result = await gateway.generate(
|
|
88
|
+
LLMRequest(
|
|
89
|
+
role=LLMRole.JUDGE_MODEL,
|
|
90
|
+
model_name="your-model-name",
|
|
91
|
+
base_url="https://example.com/v1",
|
|
92
|
+
api_key="YOUR_API_KEY",
|
|
93
|
+
messages=[
|
|
94
|
+
{"role": "system", "content": "You are a strict JSON judge."},
|
|
95
|
+
{"role": "user", "content": "Return {\"result\": 2}."},
|
|
96
|
+
],
|
|
97
|
+
# Judge models should usually prefer stable non-stream output.
|
|
98
|
+
stream=False,
|
|
99
|
+
enable_thinking=False,
|
|
100
|
+
temperature=0,
|
|
101
|
+
max_tokens=1024,
|
|
102
|
+
)
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
result.require_ok()
|
|
106
|
+
print(result.answer_text)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
asyncio.run(main())
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
## Core concepts
|
|
113
|
+
|
|
114
|
+
### Tested model calls
|
|
115
|
+
|
|
116
|
+
`LLMRole.TESTED_MODEL` is for the model being evaluated or observed.
|
|
117
|
+
|
|
118
|
+
Default behavior:
|
|
119
|
+
|
|
120
|
+
- streams by default unless `stream=False` is provided
|
|
121
|
+
- can collect native reasoning fields from compatible providers
|
|
122
|
+
- can split `<think>...</think>` blocks out of the answer
|
|
123
|
+
- returns clean `answer_text` and separate `reasoning_text`
|
|
124
|
+
- can retry without thinking when the provider returns no final answer
|
|
125
|
+
|
|
126
|
+
### Judge model calls
|
|
127
|
+
|
|
128
|
+
`LLMRole.JUDGE_MODEL` is for scoring, evaluation, moderation, ranking, or structured judgment tasks.
|
|
129
|
+
|
|
130
|
+
Default behavior:
|
|
131
|
+
|
|
132
|
+
- uses non-stream mode by default unless `stream=True` is provided
|
|
133
|
+
- is designed for stable final output, especially JSON scoring results
|
|
134
|
+
- usually pairs well with `enable_thinking=False` and `temperature=0`
|
|
135
|
+
- marks the call as `ok=False` with `error_type="EMPTY_ANSWER"` when the provider returns reasoning but no final answer
|
|
136
|
+
|
|
137
|
+
## JSON output helper
|
|
138
|
+
|
|
139
|
+
```python
|
|
140
|
+
from llm_io_normalizer.normalizers import extract_json_object
|
|
141
|
+
|
|
142
|
+
obj = extract_json_object('```json\n{"result": 2}\n```')
|
|
143
|
+
assert obj == {"result": 2}
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
## Examples
|
|
147
|
+
|
|
148
|
+
The `examples/` directory contains runnable examples for:
|
|
149
|
+
|
|
150
|
+
- tested-model calls
|
|
151
|
+
- judge-model calls
|
|
152
|
+
- a tested-model → judge-model evaluation pipeline
|
|
153
|
+
|
|
154
|
+
Use environment variables for provider credentials and endpoints when running examples:
|
|
155
|
+
|
|
156
|
+
```bash
|
|
157
|
+
export LLM_BASE_URL="https://your-provider.example/v1"
|
|
158
|
+
export LLM_API_KEY="your-api-key"
|
|
159
|
+
export LLM_MODEL="your-model-name"
|
|
160
|
+
python examples/judge_model.py
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
## Development
|
|
164
|
+
|
|
165
|
+
```bash
|
|
166
|
+
pip install -e ".[dev]"
|
|
167
|
+
ruff check .
|
|
168
|
+
pytest
|
|
169
|
+
python -m build
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
## Release
|
|
173
|
+
|
|
174
|
+
Recommended PyPI release mode is **Trusted Publishing** from GitHub Actions.
|
|
175
|
+
Configure the PyPI project to trust this repository workflow, then publish a GitHub release tag such as `v0.1.0`.
|
|
176
|
+
|
|
177
|
+
## Scope
|
|
178
|
+
|
|
179
|
+
This package is intentionally **not** a full API gateway.
|
|
180
|
+
It does not implement authentication, rate limiting, billing, routing dashboards, or multi-tenant governance.
|
|
181
|
+
Those can be handled by an outer gateway such as Kong, APISIX, Envoy, Portkey, or other infrastructure.
|
|
182
|
+
|
|
183
|
+
`llm-io-normalizer` focuses on the reusable Python SDK layer:
|
|
184
|
+
|
|
185
|
+
- Model I/O normalization
|
|
186
|
+
- reasoning / answer separation
|
|
187
|
+
- stream / non-stream fallback
|
|
188
|
+
- tested-model and judge-model call policies
|
|
189
|
+
- unified result/error contract
|
|
190
|
+
- simple JSON object extraction from model output
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
# llm-io-normalizer
|
|
2
|
+
|
|
3
|
+
`llm-io-normalizer` is a lightweight **Model I/O normalization layer** for OpenAI-compatible LLM calls.
|
|
4
|
+
It is built for applications that call both **tested models** and **judge models** and need a stable result contract instead of provider-specific response parsing.
|
|
5
|
+
|
|
6
|
+
It normalizes common LLM response differences such as:
|
|
7
|
+
|
|
8
|
+
- `message.content` vs `message.reasoning`
|
|
9
|
+
- `delta.content` vs `delta.reasoning` / `delta.reasoning_content`
|
|
10
|
+
- `<think>...</think>` reasoning mixed into normal content
|
|
11
|
+
- stream vs non-stream completion behavior
|
|
12
|
+
- successful HTTP responses that contain reasoning but no final answer
|
|
13
|
+
- final JSON extraction from model output
|
|
14
|
+
|
|
15
|
+
The public contract is intentionally small and stable:
|
|
16
|
+
|
|
17
|
+
```python
|
|
18
|
+
result.answer_text
|
|
19
|
+
result.reasoning_text
|
|
20
|
+
result.ok
|
|
21
|
+
result.error_type
|
|
22
|
+
result.error_message
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
Business code should depend on these normalized fields instead of reading raw provider fields directly.
|
|
26
|
+
|
|
27
|
+
## Install
|
|
28
|
+
|
|
29
|
+
After publishing to PyPI:
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
pip install llm-io-normalizer
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
From a local checkout:
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
pip install -e .
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
For development:
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
pip install -e ".[dev]"
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## Quick start
|
|
48
|
+
|
|
49
|
+
```python
|
|
50
|
+
import asyncio
|
|
51
|
+
|
|
52
|
+
from llm_io_normalizer import LLMGateway, LLMRequest, LLMRole
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
async def main() -> None:
|
|
56
|
+
gateway = LLMGateway()
|
|
57
|
+
|
|
58
|
+
result = await gateway.generate(
|
|
59
|
+
LLMRequest(
|
|
60
|
+
role=LLMRole.JUDGE_MODEL,
|
|
61
|
+
model_name="your-model-name",
|
|
62
|
+
base_url="https://example.com/v1",
|
|
63
|
+
api_key="YOUR_API_KEY",
|
|
64
|
+
messages=[
|
|
65
|
+
{"role": "system", "content": "You are a strict JSON judge."},
|
|
66
|
+
{"role": "user", "content": "Return {\"result\": 2}."},
|
|
67
|
+
],
|
|
68
|
+
# Judge models should usually prefer stable non-stream output.
|
|
69
|
+
stream=False,
|
|
70
|
+
enable_thinking=False,
|
|
71
|
+
temperature=0,
|
|
72
|
+
max_tokens=1024,
|
|
73
|
+
)
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
result.require_ok()
|
|
77
|
+
print(result.answer_text)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
asyncio.run(main())
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
## Core concepts
|
|
84
|
+
|
|
85
|
+
### Tested model calls
|
|
86
|
+
|
|
87
|
+
`LLMRole.TESTED_MODEL` is for the model being evaluated or observed.
|
|
88
|
+
|
|
89
|
+
Default behavior:
|
|
90
|
+
|
|
91
|
+
- streams by default unless `stream=False` is provided
|
|
92
|
+
- can collect native reasoning fields from compatible providers
|
|
93
|
+
- can split `<think>...</think>` blocks out of the answer
|
|
94
|
+
- returns clean `answer_text` and separate `reasoning_text`
|
|
95
|
+
- can retry without thinking when the provider returns no final answer
|
|
96
|
+
|
|
97
|
+
### Judge model calls
|
|
98
|
+
|
|
99
|
+
`LLMRole.JUDGE_MODEL` is for scoring, evaluation, moderation, ranking, or structured judgment tasks.
|
|
100
|
+
|
|
101
|
+
Default behavior:
|
|
102
|
+
|
|
103
|
+
- uses non-stream mode by default unless `stream=True` is provided
|
|
104
|
+
- is designed for stable final output, especially JSON scoring results
|
|
105
|
+
- usually pairs well with `enable_thinking=False` and `temperature=0`
|
|
106
|
+
- marks the call as `ok=False` with `error_type="EMPTY_ANSWER"` when the provider returns reasoning but no final answer
|
|
107
|
+
|
|
108
|
+
## JSON output helper
|
|
109
|
+
|
|
110
|
+
```python
|
|
111
|
+
from llm_io_normalizer.normalizers import extract_json_object
|
|
112
|
+
|
|
113
|
+
obj = extract_json_object('```json\n{"result": 2}\n```')
|
|
114
|
+
assert obj == {"result": 2}
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
## Examples
|
|
118
|
+
|
|
119
|
+
The `examples/` directory contains runnable examples for:
|
|
120
|
+
|
|
121
|
+
- tested-model calls
|
|
122
|
+
- judge-model calls
|
|
123
|
+
- a tested-model → judge-model evaluation pipeline
|
|
124
|
+
|
|
125
|
+
Use environment variables for provider credentials and endpoints when running examples:
|
|
126
|
+
|
|
127
|
+
```bash
|
|
128
|
+
export LLM_BASE_URL="https://your-provider.example/v1"
|
|
129
|
+
export LLM_API_KEY="your-api-key"
|
|
130
|
+
export LLM_MODEL="your-model-name"
|
|
131
|
+
python examples/judge_model.py
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
## Development
|
|
135
|
+
|
|
136
|
+
```bash
|
|
137
|
+
pip install -e ".[dev]"
|
|
138
|
+
ruff check .
|
|
139
|
+
pytest
|
|
140
|
+
python -m build
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
## Release
|
|
144
|
+
|
|
145
|
+
Recommended PyPI release mode is **Trusted Publishing** from GitHub Actions.
|
|
146
|
+
Configure the PyPI project to trust this repository workflow, then publish a GitHub release tag such as `v0.1.0`.
|
|
147
|
+
|
|
148
|
+
## Scope
|
|
149
|
+
|
|
150
|
+
This package is intentionally **not** a full API gateway.
|
|
151
|
+
It does not implement authentication, rate limiting, billing, routing dashboards, or multi-tenant governance.
|
|
152
|
+
Those can be handled by an outer gateway such as Kong, APISIX, Envoy, Portkey, or other infrastructure.
|
|
153
|
+
|
|
154
|
+
`llm-io-normalizer` focuses on the reusable Python SDK layer:
|
|
155
|
+
|
|
156
|
+
- Model I/O normalization
|
|
157
|
+
- reasoning / answer separation
|
|
158
|
+
- stream / non-stream fallback
|
|
159
|
+
- tested-model and judge-model call policies
|
|
160
|
+
- unified result/error contract
|
|
161
|
+
- simple JSON object extraction from model output
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=69", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "llm-io-normalizer"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "A lightweight Model I/O normalization layer for OpenAI-compatible LLM calls."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
license = "MIT"
|
|
12
|
+
authors = [
|
|
13
|
+
{name = "llm-io-normalizer contributors"}
|
|
14
|
+
]
|
|
15
|
+
keywords = [
|
|
16
|
+
"llm",
|
|
17
|
+
"model-io",
|
|
18
|
+
"openai-compatible",
|
|
19
|
+
"reasoning",
|
|
20
|
+
"streaming",
|
|
21
|
+
"normalization",
|
|
22
|
+
"judge-model",
|
|
23
|
+
"tested-model",
|
|
24
|
+
"structured-output"
|
|
25
|
+
]
|
|
26
|
+
classifiers = [
|
|
27
|
+
"Development Status :: 3 - Alpha",
|
|
28
|
+
"Intended Audience :: Developers",
|
|
29
|
+
"Programming Language :: Python :: 3",
|
|
30
|
+
"Programming Language :: Python :: 3.10",
|
|
31
|
+
"Programming Language :: Python :: 3.11",
|
|
32
|
+
"Programming Language :: Python :: 3.12",
|
|
33
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
34
|
+
"Typing :: Typed",
|
|
35
|
+
]
|
|
36
|
+
dependencies = [
|
|
37
|
+
"openai>=1.0.0",
|
|
38
|
+
]
|
|
39
|
+
|
|
40
|
+
[project.optional-dependencies]
|
|
41
|
+
dev = [
|
|
42
|
+
"pytest>=8",
|
|
43
|
+
"pytest-asyncio>=0.23",
|
|
44
|
+
"ruff>=0.6",
|
|
45
|
+
"build>=1.2",
|
|
46
|
+
]
|
|
47
|
+
|
|
48
|
+
[project.urls]
|
|
49
|
+
Homepage = "https://github.com/wanghesong2019/llm-io-normalizer"
|
|
50
|
+
Repository = "https://github.com/wanghesong2019/llm-io-normalizer"
|
|
51
|
+
Issues = "https://github.com/wanghesong2019/llm-io-normalizer/issues"
|
|
52
|
+
|
|
53
|
+
[tool.setuptools.packages.find]
|
|
54
|
+
where = ["src"]
|
|
55
|
+
|
|
56
|
+
[tool.setuptools.package-data]
|
|
57
|
+
llm_io_normalizer = ["py.typed"]
|
|
58
|
+
|
|
59
|
+
[tool.pytest.ini_options]
|
|
60
|
+
asyncio_mode = "auto"
|
|
61
|
+
testpaths = ["tests"]
|
|
62
|
+
|
|
63
|
+
[tool.ruff]
|
|
64
|
+
line-length = 100
|
|
65
|
+
src = ["src", "tests", "examples"]
|
|
66
|
+
|
|
67
|
+
[tool.ruff.lint]
|
|
68
|
+
select = ["E", "F", "I", "UP", "B"]
|
|
69
|
+
ignore = ["E501"]
|
|
@@ -0,0 +1,326 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import json
|
|
5
|
+
import logging
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from llm_io_normalizer.normalizers import normalize_reasoning_answer
|
|
9
|
+
from llm_io_normalizer.schemas import LLMMode, LLMRequest, LLMResult, LLMRole
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class OpenAICompatibleGateway:
|
|
15
|
+
"""OpenAI-compatible adapter with Model IO normalization.
|
|
16
|
+
|
|
17
|
+
This adapter intentionally keeps provider-specific return fields out of
|
|
18
|
+
business code. It collects content/reasoning from common stream and
|
|
19
|
+
non-stream locations, then normalizes them into answer_text/reasoning_text.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
def __init__(self, *, default_timeout: float | None = 120) -> None:
|
|
23
|
+
self.default_timeout = default_timeout
|
|
24
|
+
|
|
25
|
+
async def generate(self, request: LLMRequest) -> LLMResult:
|
|
26
|
+
role = request.normalized_role()
|
|
27
|
+
stream = self._select_stream_default(request, role)
|
|
28
|
+
|
|
29
|
+
if stream:
|
|
30
|
+
result = await self._generate_stream(request)
|
|
31
|
+
if result.ok:
|
|
32
|
+
return result
|
|
33
|
+
|
|
34
|
+
if request.fallback_to_non_stream and result.error_type == "EMPTY_ANSWER":
|
|
35
|
+
logger.warning(
|
|
36
|
+
"[LLMGatewayFallback] model=%s role=%s action=retry_non_stream reason=%s",
|
|
37
|
+
request.model_name,
|
|
38
|
+
role.value,
|
|
39
|
+
result.error_message,
|
|
40
|
+
)
|
|
41
|
+
return await self._generate_non_stream(request.with_updates(stream=False))
|
|
42
|
+
return result
|
|
43
|
+
|
|
44
|
+
result = await self._generate_non_stream(request)
|
|
45
|
+
if result.ok:
|
|
46
|
+
return result
|
|
47
|
+
|
|
48
|
+
# For tested models, if thinking consumed the final answer, retry once without thinking.
|
|
49
|
+
if (
|
|
50
|
+
role == LLMRole.TESTED_MODEL
|
|
51
|
+
and request.retry_without_thinking_when_empty
|
|
52
|
+
and request.enable_thinking is True
|
|
53
|
+
and result.error_type == "EMPTY_ANSWER"
|
|
54
|
+
):
|
|
55
|
+
logger.warning(
|
|
56
|
+
"[LLMGatewayFallback] model=%s role=%s action=retry_without_thinking",
|
|
57
|
+
request.model_name,
|
|
58
|
+
role.value,
|
|
59
|
+
)
|
|
60
|
+
return await self.generate(request.with_updates(enable_thinking=False))
|
|
61
|
+
|
|
62
|
+
return result
|
|
63
|
+
|
|
64
|
+
def _select_stream_default(self, request: LLMRequest, role: LLMRole) -> bool:
|
|
65
|
+
if request.stream is not None:
|
|
66
|
+
return bool(request.stream)
|
|
67
|
+
# Judge calls should prefer non-stream because final JSON should be stable.
|
|
68
|
+
if role == LLMRole.JUDGE_MODEL:
|
|
69
|
+
return False
|
|
70
|
+
return True
|
|
71
|
+
|
|
72
|
+
def _client(self, request: LLMRequest):
|
|
73
|
+
from openai import OpenAI
|
|
74
|
+
|
|
75
|
+
kwargs: dict[str, Any] = {}
|
|
76
|
+
if request.api_key:
|
|
77
|
+
kwargs["api_key"] = request.api_key
|
|
78
|
+
if request.base_url:
|
|
79
|
+
kwargs["base_url"] = request.base_url
|
|
80
|
+
timeout = request.timeout if request.timeout is not None else self.default_timeout
|
|
81
|
+
if timeout is not None:
|
|
82
|
+
kwargs["timeout"] = timeout
|
|
83
|
+
return OpenAI(**kwargs)
|
|
84
|
+
|
|
85
|
+
def _extra_body(self, request: LLMRequest) -> dict[str, Any] | None:
|
|
86
|
+
body = dict(request.extra_body or {})
|
|
87
|
+
if request.enable_thinking is not None:
|
|
88
|
+
body.setdefault("enable_thinking", request.enable_thinking)
|
|
89
|
+
return body or None
|
|
90
|
+
|
|
91
|
+
def _common_kwargs(self, request: LLMRequest) -> dict[str, Any]:
|
|
92
|
+
kwargs: dict[str, Any] = {
|
|
93
|
+
"model": request.model_name,
|
|
94
|
+
"messages": request.messages,
|
|
95
|
+
}
|
|
96
|
+
if request.temperature is not None:
|
|
97
|
+
kwargs["temperature"] = request.temperature
|
|
98
|
+
if request.top_p is not None:
|
|
99
|
+
kwargs["top_p"] = request.top_p
|
|
100
|
+
if request.max_tokens is not None:
|
|
101
|
+
kwargs["max_tokens"] = request.max_tokens
|
|
102
|
+
extra_body = self._extra_body(request)
|
|
103
|
+
if extra_body is not None:
|
|
104
|
+
kwargs["extra_body"] = extra_body
|
|
105
|
+
return kwargs
|
|
106
|
+
|
|
107
|
+
async def _generate_non_stream(self, request: LLMRequest) -> LLMResult:
|
|
108
|
+
client = self._client(request)
|
|
109
|
+
kwargs = self._common_kwargs(request)
|
|
110
|
+
kwargs["stream"] = False
|
|
111
|
+
|
|
112
|
+
logger.info(
|
|
113
|
+
"[LLMGatewayRequest] mode=non_stream role=%s model=%s messages=%s",
|
|
114
|
+
request.normalized_role().value,
|
|
115
|
+
request.model_name,
|
|
116
|
+
len(request.messages),
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
try:
|
|
120
|
+
response = await asyncio.to_thread(lambda: client.chat.completions.create(**kwargs))
|
|
121
|
+
except Exception as exc:
|
|
122
|
+
logger.exception("[LLMGatewayError] mode=non_stream model=%s", request.model_name)
|
|
123
|
+
return LLMResult(
|
|
124
|
+
ok=False,
|
|
125
|
+
model_name=request.model_name,
|
|
126
|
+
mode=LLMMode.NON_STREAM,
|
|
127
|
+
error_type="PROVIDER_ERROR",
|
|
128
|
+
error_message=str(exc),
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
if not response.choices:
|
|
132
|
+
return LLMResult(
|
|
133
|
+
ok=False,
|
|
134
|
+
model_name=request.model_name,
|
|
135
|
+
mode=LLMMode.NON_STREAM,
|
|
136
|
+
error_type="EMPTY_CHOICES",
|
|
137
|
+
error_message="Provider returned no choices.",
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
choice = response.choices[0]
|
|
141
|
+
msg = choice.message
|
|
142
|
+
finish_reason = getattr(choice, "finish_reason", None)
|
|
143
|
+
|
|
144
|
+
content_text = getattr(msg, "content", None) or ""
|
|
145
|
+
native_reasoning = self._extract_message_reasoning(msg)
|
|
146
|
+
usage = self._safe_model_dump(getattr(response, "usage", None))
|
|
147
|
+
|
|
148
|
+
return self._finalize_result(
|
|
149
|
+
request=request,
|
|
150
|
+
mode=LLMMode.NON_STREAM,
|
|
151
|
+
content_text=content_text,
|
|
152
|
+
native_reasoning_text=native_reasoning,
|
|
153
|
+
finish_reason=finish_reason,
|
|
154
|
+
usage=usage,
|
|
155
|
+
raw_chunks_sample=[],
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
async def _generate_stream(self, request: LLMRequest) -> LLMResult:
|
|
159
|
+
client = self._client(request)
|
|
160
|
+
kwargs = self._common_kwargs(request)
|
|
161
|
+
kwargs["stream"] = True
|
|
162
|
+
|
|
163
|
+
logger.info(
|
|
164
|
+
"[LLMGatewayRequest] mode=stream role=%s model=%s messages=%s",
|
|
165
|
+
request.normalized_role().value,
|
|
166
|
+
request.model_name,
|
|
167
|
+
len(request.messages),
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
content_text = ""
|
|
171
|
+
native_reasoning = ""
|
|
172
|
+
finish_reason: str | None = None
|
|
173
|
+
raw_chunks_sample: list[dict[str, Any]] = []
|
|
174
|
+
chunk_count = 0
|
|
175
|
+
|
|
176
|
+
try:
|
|
177
|
+
stream = await asyncio.to_thread(lambda: client.chat.completions.create(**kwargs))
|
|
178
|
+
for chunk in stream:
|
|
179
|
+
chunk_count += 1
|
|
180
|
+
if len(raw_chunks_sample) < 3:
|
|
181
|
+
raw_chunks_sample.append(self._safe_model_dump(chunk))
|
|
182
|
+
|
|
183
|
+
if not getattr(chunk, "choices", None):
|
|
184
|
+
continue
|
|
185
|
+
choice = chunk.choices[0]
|
|
186
|
+
if getattr(choice, "finish_reason", None):
|
|
187
|
+
finish_reason = choice.finish_reason
|
|
188
|
+
delta = getattr(choice, "delta", None)
|
|
189
|
+
if delta is None:
|
|
190
|
+
continue
|
|
191
|
+
|
|
192
|
+
content_text += self._extract_delta_content(delta)
|
|
193
|
+
native_reasoning += self._extract_delta_reasoning(delta)
|
|
194
|
+
except Exception as exc:
|
|
195
|
+
logger.exception("[LLMGatewayError] mode=stream model=%s", request.model_name)
|
|
196
|
+
return LLMResult(
|
|
197
|
+
ok=False,
|
|
198
|
+
model_name=request.model_name,
|
|
199
|
+
mode=LLMMode.STREAM,
|
|
200
|
+
error_type="PROVIDER_ERROR",
|
|
201
|
+
error_message=str(exc),
|
|
202
|
+
raw_chunks_sample=raw_chunks_sample,
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
result = self._finalize_result(
|
|
206
|
+
request=request,
|
|
207
|
+
mode=LLMMode.STREAM,
|
|
208
|
+
content_text=content_text,
|
|
209
|
+
native_reasoning_text=native_reasoning,
|
|
210
|
+
finish_reason=finish_reason,
|
|
211
|
+
usage={},
|
|
212
|
+
raw_chunks_sample=raw_chunks_sample,
|
|
213
|
+
)
|
|
214
|
+
result.metadata.update({"chunk_count": chunk_count})
|
|
215
|
+
return result
|
|
216
|
+
|
|
217
|
+
def _finalize_result(
|
|
218
|
+
self,
|
|
219
|
+
*,
|
|
220
|
+
request: LLMRequest,
|
|
221
|
+
mode: LLMMode,
|
|
222
|
+
content_text: str,
|
|
223
|
+
native_reasoning_text: str,
|
|
224
|
+
finish_reason: str | None,
|
|
225
|
+
usage: dict[str, Any],
|
|
226
|
+
raw_chunks_sample: list[dict[str, Any]],
|
|
227
|
+
) -> LLMResult:
|
|
228
|
+
normalized = normalize_reasoning_answer(
|
|
229
|
+
content_text=content_text,
|
|
230
|
+
native_reasoning_text=native_reasoning_text,
|
|
231
|
+
role=request.normalized_role().value,
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
answer = normalized.answer_text
|
|
235
|
+
reasoning = normalized.reasoning_text
|
|
236
|
+
|
|
237
|
+
ok = bool(answer.strip())
|
|
238
|
+
error_type = None if ok else "EMPTY_ANSWER"
|
|
239
|
+
error_message = None
|
|
240
|
+
if not ok:
|
|
241
|
+
if native_reasoning_text.strip():
|
|
242
|
+
error_message = "Provider returned reasoning but no final answer content."
|
|
243
|
+
else:
|
|
244
|
+
error_message = "Provider returned no final answer content."
|
|
245
|
+
|
|
246
|
+
logger.info(
|
|
247
|
+
"[LLMGatewayResult] mode=%s role=%s model=%s ok=%s answer_len=%s reasoning_len=%s parse=%s",
|
|
248
|
+
mode.value,
|
|
249
|
+
request.normalized_role().value,
|
|
250
|
+
request.model_name,
|
|
251
|
+
ok,
|
|
252
|
+
len(answer or ""),
|
|
253
|
+
len(reasoning or ""),
|
|
254
|
+
normalized.parse_method,
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
return LLMResult(
|
|
258
|
+
ok=ok,
|
|
259
|
+
model_name=request.model_name,
|
|
260
|
+
mode=mode,
|
|
261
|
+
answer_text=answer,
|
|
262
|
+
reasoning_text=reasoning,
|
|
263
|
+
raw_text=normalized.raw_text,
|
|
264
|
+
content_text=content_text,
|
|
265
|
+
native_reasoning_text=native_reasoning_text,
|
|
266
|
+
finish_reason=finish_reason,
|
|
267
|
+
usage=usage,
|
|
268
|
+
parse_method=normalized.parse_method,
|
|
269
|
+
confidence=normalized.confidence,
|
|
270
|
+
error_type=error_type,
|
|
271
|
+
error_message=error_message,
|
|
272
|
+
raw_chunks_sample=raw_chunks_sample,
|
|
273
|
+
metadata={"role": request.normalized_role().value},
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
def _extract_message_reasoning(self, msg: Any) -> str:
|
|
277
|
+
parts: list[str] = []
|
|
278
|
+
for field in ("reasoning", "reasoning_content", "thoughts", "reason"):
|
|
279
|
+
value = getattr(msg, field, None)
|
|
280
|
+
if value:
|
|
281
|
+
parts.append(str(value))
|
|
282
|
+
if isinstance(msg, dict):
|
|
283
|
+
for field in ("reasoning", "reasoning_content", "thoughts", "reason"):
|
|
284
|
+
if msg.get(field):
|
|
285
|
+
parts.append(str(msg[field]))
|
|
286
|
+
return "".join(parts)
|
|
287
|
+
|
|
288
|
+
def _extract_delta_content(self, delta: Any) -> str:
|
|
289
|
+
value = getattr(delta, "content", None)
|
|
290
|
+
if value:
|
|
291
|
+
return str(value)
|
|
292
|
+
if isinstance(delta, dict) and delta.get("content"):
|
|
293
|
+
return str(delta["content"])
|
|
294
|
+
return ""
|
|
295
|
+
|
|
296
|
+
def _extract_delta_reasoning(self, delta: Any) -> str:
|
|
297
|
+
parts: list[str] = []
|
|
298
|
+
for field in ("reasoning", "reasoning_content", "thoughts", "reason"):
|
|
299
|
+
value = getattr(delta, field, None)
|
|
300
|
+
if value:
|
|
301
|
+
parts.append(str(value))
|
|
302
|
+
if isinstance(delta, dict):
|
|
303
|
+
for field in ("reasoning", "reasoning_content", "thoughts", "reason"):
|
|
304
|
+
if delta.get(field):
|
|
305
|
+
parts.append(str(delta[field]))
|
|
306
|
+
return "".join(parts)
|
|
307
|
+
|
|
308
|
+
def _safe_model_dump(self, obj: Any) -> dict[str, Any]:
|
|
309
|
+
if obj is None:
|
|
310
|
+
return {}
|
|
311
|
+
if isinstance(obj, dict):
|
|
312
|
+
return obj
|
|
313
|
+
if hasattr(obj, "model_dump"):
|
|
314
|
+
try:
|
|
315
|
+
return obj.model_dump()
|
|
316
|
+
except Exception:
|
|
317
|
+
pass
|
|
318
|
+
if hasattr(obj, "dict"):
|
|
319
|
+
try:
|
|
320
|
+
return obj.dict()
|
|
321
|
+
except Exception:
|
|
322
|
+
pass
|
|
323
|
+
try:
|
|
324
|
+
return json.loads(json.dumps(obj, default=str))
|
|
325
|
+
except Exception:
|
|
326
|
+
return {"repr": repr(obj)}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
class LLMGatewayError(RuntimeError):
|
|
2
|
+
"""Base gateway error."""
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class LLMEmptyOutputError(LLMGatewayError):
|
|
6
|
+
"""The provider returned HTTP success but no usable final answer."""
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class LLMProviderError(LLMGatewayError):
|
|
10
|
+
"""The provider call failed."""
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from llm_io_normalizer.adapters import OpenAICompatibleGateway
|
|
4
|
+
from llm_io_normalizer.schemas import LLMRequest, LLMResult
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class LLMGateway:
|
|
8
|
+
"""Facade for Model IO normalization.
|
|
9
|
+
|
|
10
|
+
The first release delegates to an OpenAI-compatible adapter. Additional
|
|
11
|
+
adapters can be added without changing business code.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
def __init__(self, adapter: OpenAICompatibleGateway | None = None) -> None:
|
|
15
|
+
self.adapter = adapter or OpenAICompatibleGateway()
|
|
16
|
+
|
|
17
|
+
async def generate(self, request: LLMRequest) -> LLMResult:
|
|
18
|
+
return await self.adapter.generate(request)
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
from .json_output import extract_json_object
|
|
2
|
+
from .reasoning import NormalizedText, normalize_reasoning_answer, split_think_tag, strip_think_tags
|
|
3
|
+
|
|
4
|
+
__all__ = [
|
|
5
|
+
"NormalizedText",
|
|
6
|
+
"extract_json_object",
|
|
7
|
+
"normalize_reasoning_answer",
|
|
8
|
+
"split_think_tag",
|
|
9
|
+
"strip_think_tags",
|
|
10
|
+
]
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import re
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def extract_json_object(text: str) -> dict[str, Any] | None:
|
|
9
|
+
"""Extract and parse the first JSON object from model output."""
|
|
10
|
+
if not text:
|
|
11
|
+
return None
|
|
12
|
+
|
|
13
|
+
cleaned = text.strip()
|
|
14
|
+
try:
|
|
15
|
+
parsed = json.loads(cleaned)
|
|
16
|
+
if isinstance(parsed, dict):
|
|
17
|
+
return parsed
|
|
18
|
+
except Exception:
|
|
19
|
+
pass
|
|
20
|
+
|
|
21
|
+
# Remove markdown fences if present.
|
|
22
|
+
cleaned = re.sub(r"^```(?:json)?\s*", "", cleaned, flags=re.IGNORECASE).strip()
|
|
23
|
+
cleaned = re.sub(r"\s*```$", "", cleaned).strip()
|
|
24
|
+
try:
|
|
25
|
+
parsed = json.loads(cleaned)
|
|
26
|
+
if isinstance(parsed, dict):
|
|
27
|
+
return parsed
|
|
28
|
+
except Exception:
|
|
29
|
+
pass
|
|
30
|
+
|
|
31
|
+
# Last resort: find the first {...} block.
|
|
32
|
+
match = re.search(r"\{.*\}", cleaned, flags=re.DOTALL)
|
|
33
|
+
if not match:
|
|
34
|
+
return None
|
|
35
|
+
try:
|
|
36
|
+
parsed = json.loads(match.group(0))
|
|
37
|
+
if isinstance(parsed, dict):
|
|
38
|
+
return parsed
|
|
39
|
+
except Exception:
|
|
40
|
+
return None
|
|
41
|
+
return None
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
|
|
6
|
+
_THINK_RE = re.compile(r"<think>\s*(.*?)\s*</think>", re.IGNORECASE | re.DOTALL)
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass(frozen=True)
|
|
10
|
+
class NormalizedText:
|
|
11
|
+
reasoning_text: str
|
|
12
|
+
answer_text: str
|
|
13
|
+
raw_text: str
|
|
14
|
+
parse_method: str
|
|
15
|
+
confidence: float
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def strip_think_tags(text: str) -> str:
|
|
19
|
+
if not text:
|
|
20
|
+
return ""
|
|
21
|
+
return _THINK_RE.sub("", text).replace("<think>", "").replace("</think>", "").strip()
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def split_think_tag(raw_text: str) -> NormalizedText | None:
|
|
25
|
+
"""Split '<think>...</think>final answer' style output."""
|
|
26
|
+
raw_text = raw_text or ""
|
|
27
|
+
match = _THINK_RE.search(raw_text)
|
|
28
|
+
if not match:
|
|
29
|
+
return None
|
|
30
|
+
|
|
31
|
+
reasoning = match.group(1).strip()
|
|
32
|
+
answer = (raw_text[: match.start()] + raw_text[match.end() :]).strip()
|
|
33
|
+
answer = answer.replace("<think>", "").replace("</think>", "").strip()
|
|
34
|
+
return NormalizedText(
|
|
35
|
+
reasoning_text=reasoning,
|
|
36
|
+
answer_text=answer,
|
|
37
|
+
raw_text=raw_text,
|
|
38
|
+
parse_method="think_tag",
|
|
39
|
+
confidence=0.95,
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def normalize_reasoning_answer(
|
|
44
|
+
*,
|
|
45
|
+
content_text: str,
|
|
46
|
+
native_reasoning_text: str = "",
|
|
47
|
+
role: str = "tested_model",
|
|
48
|
+
) -> NormalizedText:
|
|
49
|
+
"""Normalize provider-specific content/reasoning channels.
|
|
50
|
+
|
|
51
|
+
Rules:
|
|
52
|
+
- If a native reasoning channel exists, final answer is content_text.
|
|
53
|
+
- Else, if content has <think>...</think>, split it.
|
|
54
|
+
- Else, entire content is the answer.
|
|
55
|
+
"""
|
|
56
|
+
content_text = content_text or ""
|
|
57
|
+
native_reasoning_text = native_reasoning_text or ""
|
|
58
|
+
raw_text = content_text
|
|
59
|
+
|
|
60
|
+
if native_reasoning_text.strip():
|
|
61
|
+
# Do not merge reasoning into answer; keep final answer clean.
|
|
62
|
+
return NormalizedText(
|
|
63
|
+
reasoning_text=native_reasoning_text.strip(),
|
|
64
|
+
answer_text=strip_think_tags(content_text),
|
|
65
|
+
raw_text=raw_text,
|
|
66
|
+
parse_method="native_reasoning",
|
|
67
|
+
confidence=0.95,
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
split = split_think_tag(content_text)
|
|
71
|
+
if split is not None:
|
|
72
|
+
return split
|
|
73
|
+
|
|
74
|
+
return NormalizedText(
|
|
75
|
+
reasoning_text="",
|
|
76
|
+
answer_text=content_text.strip(),
|
|
77
|
+
raw_text=raw_text,
|
|
78
|
+
parse_method="content_only",
|
|
79
|
+
confidence=0.70,
|
|
80
|
+
)
|
|
File without changes
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field, replace
|
|
4
|
+
from enum import Enum
|
|
5
|
+
from typing import Any, Literal
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class LLMRole(str, Enum):
|
|
9
|
+
"""Role-specific call policy."""
|
|
10
|
+
|
|
11
|
+
TESTED_MODEL = "tested_model"
|
|
12
|
+
JUDGE_MODEL = "judge_model"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class LLMMode(str, Enum):
|
|
16
|
+
STREAM = "stream"
|
|
17
|
+
NON_STREAM = "non_stream"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass(frozen=True)
|
|
21
|
+
class LLMRequest:
|
|
22
|
+
"""Portable request contract for an LLM call.
|
|
23
|
+
|
|
24
|
+
`role` is deliberately part of the request because tested-model calls and
|
|
25
|
+
judge-model calls should use different defaults.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
model_name: str
|
|
29
|
+
messages: list[dict[str, Any]]
|
|
30
|
+
role: LLMRole | str = LLMRole.TESTED_MODEL
|
|
31
|
+
|
|
32
|
+
api_key: str | None = None
|
|
33
|
+
base_url: str | None = None
|
|
34
|
+
|
|
35
|
+
stream: bool | None = None
|
|
36
|
+
enable_thinking: bool | None = None
|
|
37
|
+
temperature: float | None = None
|
|
38
|
+
top_p: float | None = None
|
|
39
|
+
max_tokens: int | None = None
|
|
40
|
+
|
|
41
|
+
response_format: Literal["text", "json"] = "text"
|
|
42
|
+
timeout: float | None = None
|
|
43
|
+
extra_body: dict[str, Any] = field(default_factory=dict)
|
|
44
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
45
|
+
|
|
46
|
+
# Fallback behavior
|
|
47
|
+
fallback_to_non_stream: bool = True
|
|
48
|
+
retry_without_thinking_when_empty: bool = True
|
|
49
|
+
|
|
50
|
+
def normalized_role(self) -> LLMRole:
|
|
51
|
+
if isinstance(self.role, LLMRole):
|
|
52
|
+
return self.role
|
|
53
|
+
return LLMRole(self.role)
|
|
54
|
+
|
|
55
|
+
def with_updates(self, **updates: Any) -> LLMRequest:
|
|
56
|
+
return replace(self, **updates)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@dataclass(frozen=True)
|
|
60
|
+
class LLMResult:
|
|
61
|
+
"""Portable result contract returned by gateway.generate()."""
|
|
62
|
+
|
|
63
|
+
ok: bool
|
|
64
|
+
model_name: str
|
|
65
|
+
mode: LLMMode | str
|
|
66
|
+
|
|
67
|
+
answer_text: str = ""
|
|
68
|
+
reasoning_text: str = ""
|
|
69
|
+
raw_text: str = ""
|
|
70
|
+
|
|
71
|
+
# Low-level raw channels before final normalization
|
|
72
|
+
content_text: str = ""
|
|
73
|
+
native_reasoning_text: str = ""
|
|
74
|
+
|
|
75
|
+
finish_reason: str | None = None
|
|
76
|
+
usage: dict[str, Any] = field(default_factory=dict)
|
|
77
|
+
|
|
78
|
+
parse_method: str | None = None
|
|
79
|
+
confidence: float | None = None
|
|
80
|
+
|
|
81
|
+
error_type: str | None = None
|
|
82
|
+
error_message: str | None = None
|
|
83
|
+
|
|
84
|
+
raw_chunks_sample: list[dict[str, Any]] = field(default_factory=list)
|
|
85
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
86
|
+
|
|
87
|
+
def require_ok(self) -> LLMResult:
|
|
88
|
+
if not self.ok:
|
|
89
|
+
raise RuntimeError(f"LLM call failed: {self.error_type}: {self.error_message}")
|
|
90
|
+
return self
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: llm-io-normalizer
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A lightweight Model I/O normalization layer for OpenAI-compatible LLM calls.
|
|
5
|
+
Author: llm-io-normalizer contributors
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/wanghesong2019/llm-io-normalizer
|
|
8
|
+
Project-URL: Repository, https://github.com/wanghesong2019/llm-io-normalizer
|
|
9
|
+
Project-URL: Issues, https://github.com/wanghesong2019/llm-io-normalizer/issues
|
|
10
|
+
Keywords: llm,model-io,openai-compatible,reasoning,streaming,normalization,judge-model,tested-model,structured-output
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
18
|
+
Classifier: Typing :: Typed
|
|
19
|
+
Requires-Python: >=3.10
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
License-File: LICENSE
|
|
22
|
+
Requires-Dist: openai>=1.0.0
|
|
23
|
+
Provides-Extra: dev
|
|
24
|
+
Requires-Dist: pytest>=8; extra == "dev"
|
|
25
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
|
|
26
|
+
Requires-Dist: ruff>=0.6; extra == "dev"
|
|
27
|
+
Requires-Dist: build>=1.2; extra == "dev"
|
|
28
|
+
Dynamic: license-file
|
|
29
|
+
|
|
30
|
+
# llm-io-normalizer
|
|
31
|
+
|
|
32
|
+
`llm-io-normalizer` is a lightweight **Model I/O normalization layer** for OpenAI-compatible LLM calls.
|
|
33
|
+
It is built for applications that call both **tested models** and **judge models** and need a stable result contract instead of provider-specific response parsing.
|
|
34
|
+
|
|
35
|
+
It normalizes common LLM response differences such as:
|
|
36
|
+
|
|
37
|
+
- `message.content` vs `message.reasoning`
|
|
38
|
+
- `delta.content` vs `delta.reasoning` / `delta.reasoning_content`
|
|
39
|
+
- `<think>...</think>` reasoning mixed into normal content
|
|
40
|
+
- stream vs non-stream completion behavior
|
|
41
|
+
- successful HTTP responses that contain reasoning but no final answer
|
|
42
|
+
- final JSON extraction from model output
|
|
43
|
+
|
|
44
|
+
The public contract is intentionally small and stable:
|
|
45
|
+
|
|
46
|
+
```python
|
|
47
|
+
result.answer_text
|
|
48
|
+
result.reasoning_text
|
|
49
|
+
result.ok
|
|
50
|
+
result.error_type
|
|
51
|
+
result.error_message
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
Business code should depend on these normalized fields instead of reading raw provider fields directly.
|
|
55
|
+
|
|
56
|
+
## Install
|
|
57
|
+
|
|
58
|
+
After publishing to PyPI:
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
pip install llm-io-normalizer
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
From a local checkout:
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
pip install -e .
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
For development:
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
pip install -e ".[dev]"
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
## Quick start
|
|
77
|
+
|
|
78
|
+
```python
|
|
79
|
+
import asyncio
|
|
80
|
+
|
|
81
|
+
from llm_io_normalizer import LLMGateway, LLMRequest, LLMRole
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
async def main() -> None:
|
|
85
|
+
gateway = LLMGateway()
|
|
86
|
+
|
|
87
|
+
result = await gateway.generate(
|
|
88
|
+
LLMRequest(
|
|
89
|
+
role=LLMRole.JUDGE_MODEL,
|
|
90
|
+
model_name="your-model-name",
|
|
91
|
+
base_url="https://example.com/v1",
|
|
92
|
+
api_key="YOUR_API_KEY",
|
|
93
|
+
messages=[
|
|
94
|
+
{"role": "system", "content": "You are a strict JSON judge."},
|
|
95
|
+
{"role": "user", "content": "Return {\"result\": 2}."},
|
|
96
|
+
],
|
|
97
|
+
# Judge models should usually prefer stable non-stream output.
|
|
98
|
+
stream=False,
|
|
99
|
+
enable_thinking=False,
|
|
100
|
+
temperature=0,
|
|
101
|
+
max_tokens=1024,
|
|
102
|
+
)
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
result.require_ok()
|
|
106
|
+
print(result.answer_text)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
asyncio.run(main())
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
## Core concepts
|
|
113
|
+
|
|
114
|
+
### Tested model calls
|
|
115
|
+
|
|
116
|
+
`LLMRole.TESTED_MODEL` is for the model being evaluated or observed.
|
|
117
|
+
|
|
118
|
+
Default behavior:
|
|
119
|
+
|
|
120
|
+
- streams by default unless `stream=False` is provided
|
|
121
|
+
- can collect native reasoning fields from compatible providers
|
|
122
|
+
- can split `<think>...</think>` blocks out of the answer
|
|
123
|
+
- returns clean `answer_text` and separate `reasoning_text`
|
|
124
|
+
- can retry without thinking when the provider returns no final answer
|
|
125
|
+
|
|
126
|
+
### Judge model calls
|
|
127
|
+
|
|
128
|
+
`LLMRole.JUDGE_MODEL` is for scoring, evaluation, moderation, ranking, or structured judgment tasks.
|
|
129
|
+
|
|
130
|
+
Default behavior:
|
|
131
|
+
|
|
132
|
+
- uses non-stream mode by default unless `stream=True` is provided
|
|
133
|
+
- is designed for stable final output, especially JSON scoring results
|
|
134
|
+
- usually pairs well with `enable_thinking=False` and `temperature=0`
|
|
135
|
+
- marks the call as `ok=False` with `error_type="EMPTY_ANSWER"` when the provider returns reasoning but no final answer
|
|
136
|
+
|
|
137
|
+
## JSON output helper
|
|
138
|
+
|
|
139
|
+
```python
|
|
140
|
+
from llm_io_normalizer.normalizers import extract_json_object
|
|
141
|
+
|
|
142
|
+
obj = extract_json_object('```json\n{"result": 2}\n```')
|
|
143
|
+
assert obj == {"result": 2}
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
## Examples
|
|
147
|
+
|
|
148
|
+
The `examples/` directory contains runnable examples for:
|
|
149
|
+
|
|
150
|
+
- tested-model calls
|
|
151
|
+
- judge-model calls
|
|
152
|
+
- a tested-model → judge-model evaluation pipeline
|
|
153
|
+
|
|
154
|
+
Use environment variables for provider credentials and endpoints when running examples:
|
|
155
|
+
|
|
156
|
+
```bash
|
|
157
|
+
export LLM_BASE_URL="https://your-provider.example/v1"
|
|
158
|
+
export LLM_API_KEY="your-api-key"
|
|
159
|
+
export LLM_MODEL="your-model-name"
|
|
160
|
+
python examples/judge_model.py
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
## Development
|
|
164
|
+
|
|
165
|
+
```bash
|
|
166
|
+
pip install -e ".[dev]"
|
|
167
|
+
ruff check .
|
|
168
|
+
pytest
|
|
169
|
+
python -m build
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
## Release
|
|
173
|
+
|
|
174
|
+
Recommended PyPI release mode is **Trusted Publishing** from GitHub Actions.
|
|
175
|
+
Configure the PyPI project to trust this repository workflow, then publish a GitHub release tag such as `v0.1.0`.
|
|
176
|
+
|
|
177
|
+
## Scope
|
|
178
|
+
|
|
179
|
+
This package is intentionally **not** a full API gateway.
|
|
180
|
+
It does not implement authentication, rate limiting, billing, routing dashboards, or multi-tenant governance.
|
|
181
|
+
Those can be handled by an outer gateway such as Kong, APISIX, Envoy, Portkey, or other infrastructure.
|
|
182
|
+
|
|
183
|
+
`llm-io-normalizer` focuses on the reusable Python SDK layer:
|
|
184
|
+
|
|
185
|
+
- Model I/O normalization
|
|
186
|
+
- reasoning / answer separation
|
|
187
|
+
- stream / non-stream fallback
|
|
188
|
+
- tested-model and judge-model call policies
|
|
189
|
+
- unified result/error contract
|
|
190
|
+
- simple JSON object extraction from model output
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
src/llm_io_normalizer/__init__.py
|
|
5
|
+
src/llm_io_normalizer/errors.py
|
|
6
|
+
src/llm_io_normalizer/gateway.py
|
|
7
|
+
src/llm_io_normalizer/py.typed
|
|
8
|
+
src/llm_io_normalizer/schemas.py
|
|
9
|
+
src/llm_io_normalizer.egg-info/PKG-INFO
|
|
10
|
+
src/llm_io_normalizer.egg-info/SOURCES.txt
|
|
11
|
+
src/llm_io_normalizer.egg-info/dependency_links.txt
|
|
12
|
+
src/llm_io_normalizer.egg-info/requires.txt
|
|
13
|
+
src/llm_io_normalizer.egg-info/top_level.txt
|
|
14
|
+
src/llm_io_normalizer/adapters/__init__.py
|
|
15
|
+
src/llm_io_normalizer/adapters/openai_compatible.py
|
|
16
|
+
src/llm_io_normalizer/normalizers/__init__.py
|
|
17
|
+
src/llm_io_normalizer/normalizers/json_output.py
|
|
18
|
+
src/llm_io_normalizer/normalizers/reasoning.py
|
|
19
|
+
tests/test_gateway_stream_extraction.py
|
|
20
|
+
tests/test_json_output.py
|
|
21
|
+
tests/test_reasoning_normalizer.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
llm_io_normalizer
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from types import SimpleNamespace
|
|
2
|
+
|
|
3
|
+
from llm_io_normalizer.adapters.openai_compatible import OpenAICompatibleGateway
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_extract_delta_content_and_reasoning_attributes():
|
|
7
|
+
gateway = OpenAICompatibleGateway()
|
|
8
|
+
delta = SimpleNamespace(content="answer", reasoning="think", reasoning_content="think2")
|
|
9
|
+
assert gateway._extract_delta_content(delta) == "answer"
|
|
10
|
+
assert gateway._extract_delta_reasoning(delta) == "thinkthink2"
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def test_extract_delta_dict():
|
|
14
|
+
gateway = OpenAICompatibleGateway()
|
|
15
|
+
delta = {"content": "answer", "reasoning": "think"}
|
|
16
|
+
assert gateway._extract_delta_content(delta) == "answer"
|
|
17
|
+
assert gateway._extract_delta_reasoning(delta) == "think"
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from llm_io_normalizer.normalizers import extract_json_object
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def test_extract_json_object_plain():
|
|
5
|
+
assert extract_json_object('{"结果": 2}') == {"结果": 2}
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def test_extract_json_object_fenced():
|
|
9
|
+
assert extract_json_object('```json\n{"结果": 0}\n```') == {"结果": 0}
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def test_extract_json_object_embedded():
|
|
13
|
+
assert extract_json_object('分析如下 {"结果": 1} 结束') == {"结果": 1}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from llm_io_normalizer.normalizers import normalize_reasoning_answer, split_think_tag
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def test_split_think_tag():
|
|
5
|
+
parsed = split_think_tag("<think>plan</think>final")
|
|
6
|
+
assert parsed is not None
|
|
7
|
+
assert parsed.reasoning_text == "plan"
|
|
8
|
+
assert parsed.answer_text == "final"
|
|
9
|
+
assert parsed.parse_method == "think_tag"
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def test_native_reasoning_wins():
|
|
13
|
+
parsed = normalize_reasoning_answer(content_text="final", native_reasoning_text="thought")
|
|
14
|
+
assert parsed.reasoning_text == "thought"
|
|
15
|
+
assert parsed.answer_text == "final"
|
|
16
|
+
assert parsed.parse_method == "native_reasoning"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def test_content_only():
|
|
20
|
+
parsed = normalize_reasoning_answer(content_text="hello")
|
|
21
|
+
assert parsed.reasoning_text == ""
|
|
22
|
+
assert parsed.answer_text == "hello"
|
|
23
|
+
assert parsed.parse_method == "content_only"
|