ff-aitoolkit 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ff_aitoolkit-0.2.0/.gitignore +28 -0
- ff_aitoolkit-0.2.0/LICENSE +21 -0
- ff_aitoolkit-0.2.0/PKG-INFO +159 -0
- ff_aitoolkit-0.2.0/README.md +119 -0
- ff_aitoolkit-0.2.0/pyproject.toml +71 -0
- ff_aitoolkit-0.2.0/src/aitoolkit/__init__.py +66 -0
- ff_aitoolkit-0.2.0/src/aitoolkit/config.py +107 -0
- ff_aitoolkit-0.2.0/src/aitoolkit/embeddings/__init__.py +5 -0
- ff_aitoolkit-0.2.0/src/aitoolkit/embeddings/client.py +133 -0
- ff_aitoolkit-0.2.0/src/aitoolkit/exceptions.py +35 -0
- ff_aitoolkit-0.2.0/src/aitoolkit/integrations/__init__.py +1 -0
- ff_aitoolkit-0.2.0/src/aitoolkit/integrations/langchain.py +69 -0
- ff_aitoolkit-0.2.0/src/aitoolkit/llm/__init__.py +5 -0
- ff_aitoolkit-0.2.0/src/aitoolkit/llm/client.py +230 -0
- ff_aitoolkit-0.2.0/src/aitoolkit/py.typed +0 -0
- ff_aitoolkit-0.2.0/src/aitoolkit/rag/__init__.py +25 -0
- ff_aitoolkit-0.2.0/src/aitoolkit/rag/agent.py +165 -0
- ff_aitoolkit-0.2.0/src/aitoolkit/rag/query_expansion.py +147 -0
- ff_aitoolkit-0.2.0/src/aitoolkit/rag/retriever.py +141 -0
- ff_aitoolkit-0.2.0/src/aitoolkit/rag/vector_store.py +245 -0
- ff_aitoolkit-0.2.0/src/aitoolkit/retry.py +51 -0
- ff_aitoolkit-0.2.0/src/aitoolkit/stt/__init__.py +5 -0
- ff_aitoolkit-0.2.0/src/aitoolkit/stt/client.py +147 -0
- ff_aitoolkit-0.2.0/src/aitoolkit/tts/__init__.py +10 -0
- ff_aitoolkit-0.2.0/src/aitoolkit/tts/audio.py +68 -0
- ff_aitoolkit-0.2.0/src/aitoolkit/tts/client.py +219 -0
- ff_aitoolkit-0.2.0/src/aitoolkit/types.py +66 -0
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*.egg-info/
|
|
5
|
+
*.egg
|
|
6
|
+
build/
|
|
7
|
+
dist/
|
|
8
|
+
.eggs/
|
|
9
|
+
|
|
10
|
+
# Virtual environments
|
|
11
|
+
.venv/
|
|
12
|
+
venv/
|
|
13
|
+
env/
|
|
14
|
+
|
|
15
|
+
# Local secrets / config
|
|
16
|
+
.env
|
|
17
|
+
|
|
18
|
+
# Tooling caches
|
|
19
|
+
.pytest_cache/
|
|
20
|
+
.mypy_cache/
|
|
21
|
+
.ruff_cache/
|
|
22
|
+
.coverage
|
|
23
|
+
htmlcov/
|
|
24
|
+
|
|
25
|
+
# Editors / OS
|
|
26
|
+
.vscode/
|
|
27
|
+
.idea/
|
|
28
|
+
.DS_Store
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Faisal Fida
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ff-aitoolkit
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Centralized AI clients (LLM, embeddings, STT, TTS, RAG) for self-hosted OpenAI-compatible GPU services.
|
|
5
|
+
Project-URL: Homepage, https://github.com/CNIT-Organization/aitoolkit
|
|
6
|
+
Project-URL: Repository, https://github.com/CNIT-Organization/aitoolkit
|
|
7
|
+
Project-URL: Issues, https://github.com/CNIT-Organization/aitoolkit/issues
|
|
8
|
+
Author: Faisal Fida
|
|
9
|
+
License-Expression: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: ai,embeddings,llm,openai,rag,self-hosted,stt,tts
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Operating System :: OS Independent
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
20
|
+
Classifier: Typing :: Typed
|
|
21
|
+
Requires-Python: >=3.10
|
|
22
|
+
Requires-Dist: httpx>=0.27
|
|
23
|
+
Requires-Dist: loguru>=0.7
|
|
24
|
+
Requires-Dist: openai>=1.50
|
|
25
|
+
Requires-Dist: pydantic-settings>=2.3
|
|
26
|
+
Requires-Dist: pydantic>=2.7
|
|
27
|
+
Provides-Extra: all
|
|
28
|
+
Requires-Dist: langchain-core>=0.3; extra == 'all'
|
|
29
|
+
Requires-Dist: langchain-openai>=0.2; extra == 'all'
|
|
30
|
+
Requires-Dist: qdrant-client>=1.12; extra == 'all'
|
|
31
|
+
Requires-Dist: redis>=5.0; extra == 'all'
|
|
32
|
+
Provides-Extra: cache
|
|
33
|
+
Requires-Dist: redis>=5.0; extra == 'cache'
|
|
34
|
+
Provides-Extra: langchain
|
|
35
|
+
Requires-Dist: langchain-core>=0.3; extra == 'langchain'
|
|
36
|
+
Requires-Dist: langchain-openai>=0.2; extra == 'langchain'
|
|
37
|
+
Provides-Extra: rag
|
|
38
|
+
Requires-Dist: qdrant-client>=1.12; extra == 'rag'
|
|
39
|
+
Description-Content-Type: text/markdown
|
|
40
|
+
|
|
41
|
+
# aitoolkit
|
|
42
|
+
|
|
43
|
+
Centralized AI clients — **LLM, Embeddings, Speech-to-Text, Text-to-Speech, and
|
|
44
|
+
RAG** — targeting self-hosted, OpenAI-compatible services. Reusable across
|
|
45
|
+
projects via a single git install.
|
|
46
|
+
|
|
47
|
+
## Why
|
|
48
|
+
|
|
49
|
+
Replaces scattered, provider-specific AI code (cloud-LLM wrappers, local Whisper,
|
|
50
|
+
ad-hoc LangChain) with one thin, stable, dependency-light package. The core is
|
|
51
|
+
**100% LangChain-free**; LangChain is an opt-in extra used only where LangGraph
|
|
52
|
+
orchestration needs it.
|
|
53
|
+
|
|
54
|
+
## Design principles
|
|
55
|
+
|
|
56
|
+
1. **Depend on stable interfaces, hide volatile implementations.** The public API
|
|
57
|
+
is a small set of clients + plain types; provider SDKs stay internal.
|
|
58
|
+
2. **Core is dependency-light and LangChain-free.** Heavy/optional things (qdrant,
|
|
59
|
+
redis, langchain) live behind extras and import lazily.
|
|
60
|
+
3. **No project specifics in the package.** Hosts, model ids, voices, collection
|
|
61
|
+
names, and keywords are parameters/config — never hardcoded.
|
|
62
|
+
4. **OpenAI-compatible first.** LLM, embeddings and STT use the `openai` SDK; only
|
|
63
|
+
TTS is a small custom `httpx` client.
|
|
64
|
+
5. **Async-first**, with sync convenience wrappers where ergonomics demand it.
|
|
65
|
+
|
|
66
|
+
## Install
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
# core only (LLM, embeddings, STT, TTS)
|
|
70
|
+
pip install "aitoolkit @ git+https://github.com/CNIT-Organization/aitoolkit.git@v0.2.0"
|
|
71
|
+
|
|
72
|
+
# with RAG (Qdrant) + caching + LangChain bridge
|
|
73
|
+
pip install "aitoolkit[all] @ git+https://github.com/CNIT-Organization/aitoolkit.git@v0.2.0"
|
|
74
|
+
|
|
75
|
+
# pick exactly what a service needs
|
|
76
|
+
pip install "aitoolkit[rag,cache] @ git+...@v0.2.0"
|
|
77
|
+
pip install "aitoolkit[rag,langchain] @ git+...@v0.2.0"
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
Extras: `rag` (qdrant-client) · `cache` (redis) · `langchain` (langchain-core +
|
|
81
|
+
langchain-openai) · `all`.
|
|
82
|
+
|
|
83
|
+
## Configuration
|
|
84
|
+
|
|
85
|
+
All config is environment-driven (`AITOOLKIT_*`, see [.env.example](.env.example)),
|
|
86
|
+
but every client also accepts explicit overrides, so no env is strictly required.
|
|
87
|
+
|
|
88
|
+
## Quick start
|
|
89
|
+
|
|
90
|
+
```python
|
|
91
|
+
import asyncio
|
|
92
|
+
from aitoolkit import get_llm_client, get_embeddings_client, get_stt_client, get_tts_client
|
|
93
|
+
|
|
94
|
+
async def main():
|
|
95
|
+
llm = get_llm_client()
|
|
96
|
+
print(await llm.chat("Say hello in one short sentence."))
|
|
97
|
+
|
|
98
|
+
async for tok in llm.stream("Count to five."):
|
|
99
|
+
print(tok, end="", flush=True)
|
|
100
|
+
|
|
101
|
+
emb = get_embeddings_client()
|
|
102
|
+
vecs = await emb.aembed_documents(["first document", "second document"])
|
|
103
|
+
print("dim:", emb.dimension)
|
|
104
|
+
|
|
105
|
+
stt = get_stt_client()
|
|
106
|
+
result = await stt.transcribe("audio.wav", language="en")
|
|
107
|
+
print(result.text)
|
|
108
|
+
|
|
109
|
+
tts = get_tts_client()
|
|
110
|
+
audio = await tts.synthesize("Hello world", voice="your-voice-id")
|
|
111
|
+
open("out.wav", "wb").write(audio)
|
|
112
|
+
|
|
113
|
+
# multi-speaker: synthesize each turn with its own voice and stitch to one WAV
|
|
114
|
+
dialogue = await tts.synthesize_dialogue([
|
|
115
|
+
{"voice_id": "voice-a", "text": "Welcome to the overview."},
|
|
116
|
+
{"voice_id": "voice-b", "text": "Let's dive in."},
|
|
117
|
+
])
|
|
118
|
+
open("dialogue.wav", "wb").write(dialogue)
|
|
119
|
+
|
|
120
|
+
asyncio.run(main())
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
### Structured output
|
|
124
|
+
|
|
125
|
+
```python
|
|
126
|
+
from pydantic import BaseModel
|
|
127
|
+
class Flashcard(BaseModel):
|
|
128
|
+
question: str
|
|
129
|
+
answer: str
|
|
130
|
+
|
|
131
|
+
card = await get_llm_client().chat_structured(Flashcard, "Make a flashcard about the water cycle.")
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
### RAG (`aitoolkit[rag]`)
|
|
135
|
+
|
|
136
|
+
```python
|
|
137
|
+
from aitoolkit.rag import get_rag_agent
|
|
138
|
+
agent = get_rag_agent(collection_name="documents")
|
|
139
|
+
await agent.add_documents(["chunk 1", "chunk 2"], file_id="doc-42")
|
|
140
|
+
answer = await agent.answer_question("What does the document say about safety?")
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
### LangChain bridge (`aitoolkit[langchain]`)
|
|
144
|
+
|
|
145
|
+
```python
|
|
146
|
+
from aitoolkit.integrations.langchain import to_chat_model, LangChainEmbeddings
|
|
147
|
+
chat_model = to_chat_model(temperature=0.3) # a LangChain BaseChatModel for LangGraph
|
|
148
|
+
embeddings = LangChainEmbeddings() # a LangChain Embeddings
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
## Testing
|
|
152
|
+
|
|
153
|
+
```bash
|
|
154
|
+
pip install -e ".[all]" --group dev
|
|
155
|
+
pytest # unit tests (mocked)
|
|
156
|
+
AITOOLKIT_RUN_LIVE=1 pytest # also run live smoke tests against your endpoints
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
Live tests auto-skip when the configured endpoints are unreachable.
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
# aitoolkit
|
|
2
|
+
|
|
3
|
+
Centralized AI clients — **LLM, Embeddings, Speech-to-Text, Text-to-Speech, and
|
|
4
|
+
RAG** — targeting self-hosted, OpenAI-compatible services. Reusable across
|
|
5
|
+
projects via a single git install.
|
|
6
|
+
|
|
7
|
+
## Why
|
|
8
|
+
|
|
9
|
+
Replaces scattered, provider-specific AI code (cloud-LLM wrappers, local Whisper,
|
|
10
|
+
ad-hoc LangChain) with one thin, stable, dependency-light package. The core is
|
|
11
|
+
**100% LangChain-free**; LangChain is an opt-in extra used only where LangGraph
|
|
12
|
+
orchestration needs it.
|
|
13
|
+
|
|
14
|
+
## Design principles
|
|
15
|
+
|
|
16
|
+
1. **Depend on stable interfaces, hide volatile implementations.** The public API
|
|
17
|
+
is a small set of clients + plain types; provider SDKs stay internal.
|
|
18
|
+
2. **Core is dependency-light and LangChain-free.** Heavy/optional things (qdrant,
|
|
19
|
+
redis, langchain) live behind extras and import lazily.
|
|
20
|
+
3. **No project specifics in the package.** Hosts, model ids, voices, collection
|
|
21
|
+
names, and keywords are parameters/config — never hardcoded.
|
|
22
|
+
4. **OpenAI-compatible first.** LLM, embeddings and STT use the `openai` SDK; only
|
|
23
|
+
TTS is a small custom `httpx` client.
|
|
24
|
+
5. **Async-first**, with sync convenience wrappers where ergonomics demand it.
|
|
25
|
+
|
|
26
|
+
## Install
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
# core only (LLM, embeddings, STT, TTS)
|
|
30
|
+
pip install "aitoolkit @ git+https://github.com/CNIT-Organization/aitoolkit.git@v0.2.0"
|
|
31
|
+
|
|
32
|
+
# with RAG (Qdrant) + caching + LangChain bridge
|
|
33
|
+
pip install "aitoolkit[all] @ git+https://github.com/CNIT-Organization/aitoolkit.git@v0.2.0"
|
|
34
|
+
|
|
35
|
+
# pick exactly what a service needs
|
|
36
|
+
pip install "aitoolkit[rag,cache] @ git+...@v0.2.0"
|
|
37
|
+
pip install "aitoolkit[rag,langchain] @ git+...@v0.2.0"
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
Extras: `rag` (qdrant-client) · `cache` (redis) · `langchain` (langchain-core +
|
|
41
|
+
langchain-openai) · `all`.
|
|
42
|
+
|
|
43
|
+
## Configuration
|
|
44
|
+
|
|
45
|
+
All config is environment-driven (`AITOOLKIT_*`, see [.env.example](.env.example)),
|
|
46
|
+
but every client also accepts explicit overrides, so no env is strictly required.
|
|
47
|
+
|
|
48
|
+
## Quick start
|
|
49
|
+
|
|
50
|
+
```python
|
|
51
|
+
import asyncio
|
|
52
|
+
from aitoolkit import get_llm_client, get_embeddings_client, get_stt_client, get_tts_client
|
|
53
|
+
|
|
54
|
+
async def main():
|
|
55
|
+
llm = get_llm_client()
|
|
56
|
+
print(await llm.chat("Say hello in one short sentence."))
|
|
57
|
+
|
|
58
|
+
async for tok in llm.stream("Count to five."):
|
|
59
|
+
print(tok, end="", flush=True)
|
|
60
|
+
|
|
61
|
+
emb = get_embeddings_client()
|
|
62
|
+
vecs = await emb.aembed_documents(["first document", "second document"])
|
|
63
|
+
print("dim:", emb.dimension)
|
|
64
|
+
|
|
65
|
+
stt = get_stt_client()
|
|
66
|
+
result = await stt.transcribe("audio.wav", language="en")
|
|
67
|
+
print(result.text)
|
|
68
|
+
|
|
69
|
+
tts = get_tts_client()
|
|
70
|
+
audio = await tts.synthesize("Hello world", voice="your-voice-id")
|
|
71
|
+
open("out.wav", "wb").write(audio)
|
|
72
|
+
|
|
73
|
+
# multi-speaker: synthesize each turn with its own voice and stitch to one WAV
|
|
74
|
+
dialogue = await tts.synthesize_dialogue([
|
|
75
|
+
{"voice_id": "voice-a", "text": "Welcome to the overview."},
|
|
76
|
+
{"voice_id": "voice-b", "text": "Let's dive in."},
|
|
77
|
+
])
|
|
78
|
+
open("dialogue.wav", "wb").write(dialogue)
|
|
79
|
+
|
|
80
|
+
asyncio.run(main())
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
### Structured output
|
|
84
|
+
|
|
85
|
+
```python
|
|
86
|
+
from pydantic import BaseModel
|
|
87
|
+
class Flashcard(BaseModel):
|
|
88
|
+
question: str
|
|
89
|
+
answer: str
|
|
90
|
+
|
|
91
|
+
card = await get_llm_client().chat_structured(Flashcard, "Make a flashcard about the water cycle.")
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
### RAG (`aitoolkit[rag]`)
|
|
95
|
+
|
|
96
|
+
```python
|
|
97
|
+
from aitoolkit.rag import get_rag_agent
|
|
98
|
+
agent = get_rag_agent(collection_name="documents")
|
|
99
|
+
await agent.add_documents(["chunk 1", "chunk 2"], file_id="doc-42")
|
|
100
|
+
answer = await agent.answer_question("What does the document say about safety?")
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
### LangChain bridge (`aitoolkit[langchain]`)
|
|
104
|
+
|
|
105
|
+
```python
|
|
106
|
+
from aitoolkit.integrations.langchain import to_chat_model, LangChainEmbeddings
|
|
107
|
+
chat_model = to_chat_model(temperature=0.3) # a LangChain BaseChatModel for LangGraph
|
|
108
|
+
embeddings = LangChainEmbeddings() # a LangChain Embeddings
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
## Testing
|
|
112
|
+
|
|
113
|
+
```bash
|
|
114
|
+
pip install -e ".[all]" --group dev
|
|
115
|
+
pytest # unit tests (mocked)
|
|
116
|
+
AITOOLKIT_RUN_LIVE=1 pytest # also run live smoke tests against your endpoints
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
Live tests auto-skip when the configured endpoints are unreachable.
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "ff-aitoolkit"
|
|
7
|
+
version = "0.2.0"
|
|
8
|
+
description = "Centralized AI clients (LLM, embeddings, STT, TTS, RAG) for self-hosted OpenAI-compatible GPU services."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
license = "MIT"
|
|
12
|
+
license-files = ["LICENSE"]
|
|
13
|
+
authors = [{ name = "Faisal Fida" }]
|
|
14
|
+
keywords = ["llm", "embeddings", "stt", "tts", "rag", "openai", "self-hosted", "ai"]
|
|
15
|
+
classifiers = [
|
|
16
|
+
"Development Status :: 4 - Beta",
|
|
17
|
+
"Intended Audience :: Developers",
|
|
18
|
+
"Operating System :: OS Independent",
|
|
19
|
+
"Programming Language :: Python :: 3",
|
|
20
|
+
"Programming Language :: Python :: 3.10",
|
|
21
|
+
"Programming Language :: Python :: 3.11",
|
|
22
|
+
"Programming Language :: Python :: 3.12",
|
|
23
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
24
|
+
"Typing :: Typed",
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
# Core stays light and 100% LangChain-free. Heavy/optional deps live in extras.
|
|
28
|
+
dependencies = [
|
|
29
|
+
"openai>=1.50",
|
|
30
|
+
"httpx>=0.27",
|
|
31
|
+
"pydantic>=2.7",
|
|
32
|
+
"pydantic-settings>=2.3",
|
|
33
|
+
"loguru>=0.7",
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
[project.optional-dependencies]
|
|
37
|
+
# Retrieval-augmented generation (Qdrant vector store).
|
|
38
|
+
rag = ["qdrant-client>=1.12"]
|
|
39
|
+
# Query result caching for the retriever.
|
|
40
|
+
cache = ["redis>=5.0"]
|
|
41
|
+
# Return LangChain BaseChatModel / Embeddings for LangGraph consumers (e.g. notebook).
|
|
42
|
+
langchain = ["langchain-core>=0.3", "langchain-openai>=0.2"]
|
|
43
|
+
# Everything.
|
|
44
|
+
all = ["ff-aitoolkit[rag,cache,langchain]"]
|
|
45
|
+
|
|
46
|
+
[project.urls]
|
|
47
|
+
Homepage = "https://github.com/CNIT-Organization/aitoolkit"
|
|
48
|
+
Repository = "https://github.com/CNIT-Organization/aitoolkit"
|
|
49
|
+
Issues = "https://github.com/CNIT-Organization/aitoolkit/issues"
|
|
50
|
+
|
|
51
|
+
[dependency-groups]
|
|
52
|
+
dev = [
|
|
53
|
+
"pytest>=8.0",
|
|
54
|
+
"pytest-asyncio>=0.23",
|
|
55
|
+
"respx>=0.21",
|
|
56
|
+
]
|
|
57
|
+
|
|
58
|
+
[tool.hatch.build.targets.wheel]
|
|
59
|
+
packages = ["src/aitoolkit"]
|
|
60
|
+
|
|
61
|
+
[tool.hatch.build.targets.sdist]
|
|
62
|
+
include = ["src/aitoolkit", "README.md", "LICENSE"]
|
|
63
|
+
|
|
64
|
+
[tool.pytest.ini_options]
|
|
65
|
+
asyncio_mode = "auto"
|
|
66
|
+
testpaths = ["tests"]
|
|
67
|
+
filterwarnings = ["ignore::DeprecationWarning"]
|
|
68
|
+
|
|
69
|
+
[tool.ruff]
|
|
70
|
+
line-length = 100
|
|
71
|
+
target-version = "py310"
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"""aitoolkit — centralized AI clients for self-hosted OpenAI-compatible services.
|
|
2
|
+
|
|
3
|
+
Core capabilities (LLM, embeddings, STT, TTS) are always available. RAG and the
|
|
4
|
+
LangChain bridge live behind extras and are imported from their own subpackages
|
|
5
|
+
(``aitoolkit.rag``, ``aitoolkit.integrations.langchain``) so that importing the
|
|
6
|
+
top-level package never forces an optional dependency.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from aitoolkit.config import AIToolkitSettings, configure, get_settings
|
|
12
|
+
from aitoolkit.embeddings import EmbeddingsClient, get_embeddings_client
|
|
13
|
+
from aitoolkit.exceptions import (
|
|
14
|
+
AIToolkitError,
|
|
15
|
+
ConfigurationError,
|
|
16
|
+
EmbeddingsError,
|
|
17
|
+
LLMError,
|
|
18
|
+
STTError,
|
|
19
|
+
TTSError,
|
|
20
|
+
VectorStoreError,
|
|
21
|
+
)
|
|
22
|
+
from aitoolkit.llm import LLMClient, get_llm_client
|
|
23
|
+
from aitoolkit.stt import STTClient, get_stt_client
|
|
24
|
+
from aitoolkit.tts import TTSClient, concat_wav, get_tts_client
|
|
25
|
+
from aitoolkit.types import (
|
|
26
|
+
ChatMessage,
|
|
27
|
+
DialogueTurn,
|
|
28
|
+
RetrievedChunk,
|
|
29
|
+
TranscriptionResult,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
__version__ = "0.2.0"
|
|
33
|
+
|
|
34
|
+
__all__ = [
|
|
35
|
+
"__version__",
|
|
36
|
+
# config
|
|
37
|
+
"AIToolkitSettings",
|
|
38
|
+
"configure",
|
|
39
|
+
"get_settings",
|
|
40
|
+
# llm
|
|
41
|
+
"LLMClient",
|
|
42
|
+
"get_llm_client",
|
|
43
|
+
# embeddings
|
|
44
|
+
"EmbeddingsClient",
|
|
45
|
+
"get_embeddings_client",
|
|
46
|
+
# stt
|
|
47
|
+
"STTClient",
|
|
48
|
+
"get_stt_client",
|
|
49
|
+
# tts
|
|
50
|
+
"TTSClient",
|
|
51
|
+
"get_tts_client",
|
|
52
|
+
"concat_wav",
|
|
53
|
+
# types
|
|
54
|
+
"ChatMessage",
|
|
55
|
+
"DialogueTurn",
|
|
56
|
+
"RetrievedChunk",
|
|
57
|
+
"TranscriptionResult",
|
|
58
|
+
# exceptions
|
|
59
|
+
"AIToolkitError",
|
|
60
|
+
"ConfigurationError",
|
|
61
|
+
"LLMError",
|
|
62
|
+
"EmbeddingsError",
|
|
63
|
+
"STTError",
|
|
64
|
+
"TTSError",
|
|
65
|
+
"VectorStoreError",
|
|
66
|
+
]
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
"""Central configuration for aitoolkit.
|
|
2
|
+
|
|
3
|
+
All settings are read from ``AITOOLKIT_*`` environment variables (or a ``.env``
|
|
4
|
+
file) but every client also accepts explicit overrides, so the package can be
|
|
5
|
+
used with zero environment configuration.
|
|
6
|
+
|
|
7
|
+
Defaults intentionally point at ``localhost`` — they are NOT specific to any one
|
|
8
|
+
deployment. Production endpoints are supplied by the consuming application via
|
|
9
|
+
environment variables (see ``.env.example``).
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
from functools import lru_cache
|
|
15
|
+
from typing import Optional
|
|
16
|
+
|
|
17
|
+
from pydantic import Field
|
|
18
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
19
|
+
|
|
20
|
+
# A placeholder key for OpenAI-compatible servers that perform no app-layer auth
|
|
21
|
+
# (our GPU services are firewalled, not key-gated). The openai SDK requires a
|
|
22
|
+
# non-empty key, so we provide one.
|
|
23
|
+
_NO_AUTH = "no-auth"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class AIToolkitSettings(BaseSettings):
|
|
27
|
+
"""Runtime configuration for every aitoolkit capability."""
|
|
28
|
+
|
|
29
|
+
model_config = SettingsConfigDict(
|
|
30
|
+
env_prefix="AITOOLKIT_",
|
|
31
|
+
env_file=".env",
|
|
32
|
+
env_file_encoding="utf-8",
|
|
33
|
+
extra="ignore",
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
# --- LLM (vLLM, OpenAI-compatible) ---
|
|
37
|
+
llm_base_url: str = Field(default="http://localhost:8000/v1")
|
|
38
|
+
llm_api_key: str = Field(default=_NO_AUTH)
|
|
39
|
+
# No default model id — set AITOOLKIT_LLM_MODEL to your served model.
|
|
40
|
+
llm_model: str = Field(default="")
|
|
41
|
+
llm_temperature: float = Field(default=0.2)
|
|
42
|
+
llm_timeout: float = Field(default=60.0)
|
|
43
|
+
llm_max_retries: int = Field(default=2)
|
|
44
|
+
|
|
45
|
+
# --- Embeddings (TEI, OpenAI-compatible) ---
|
|
46
|
+
embeddings_base_url: str = Field(default="http://localhost:8001/v1")
|
|
47
|
+
embeddings_api_key: str = Field(default=_NO_AUTH)
|
|
48
|
+
# No default model id — set AITOOLKIT_EMBEDDINGS_MODEL to your served model.
|
|
49
|
+
embeddings_model: str = Field(default="")
|
|
50
|
+
# TEI accepts modest batches; keep conservative and configurable.
|
|
51
|
+
embeddings_batch_size: int = Field(default=32)
|
|
52
|
+
embeddings_timeout: float = Field(default=60.0)
|
|
53
|
+
|
|
54
|
+
# --- Speech-to-Text (faster-whisper, OpenAI-compatible) ---
|
|
55
|
+
stt_base_url: str = Field(default="http://localhost:8003/v1")
|
|
56
|
+
stt_api_key: str = Field(default=_NO_AUTH)
|
|
57
|
+
stt_model: str = Field(default="whisper-1")
|
|
58
|
+
stt_language: Optional[str] = Field(default=None)
|
|
59
|
+
stt_timeout: float = Field(default=120.0)
|
|
60
|
+
|
|
61
|
+
# --- Text-to-Speech (custom /api/tts) ---
|
|
62
|
+
tts_base_url: str = Field(default="http://localhost:8002")
|
|
63
|
+
tts_default_voice: Optional[str] = Field(default=None)
|
|
64
|
+
tts_timeout: float = Field(default=120.0)
|
|
65
|
+
|
|
66
|
+
# --- Vector store (Qdrant) ---
|
|
67
|
+
qdrant_url: str = Field(default="http://localhost:6333")
|
|
68
|
+
qdrant_collection: str = Field(default="documents")
|
|
69
|
+
# Optional fixed vector size. When None, it is detected from the embedding model.
|
|
70
|
+
qdrant_vector_size: Optional[int] = Field(default=None)
|
|
71
|
+
# The qdrant-client refuses to talk to a server whose minor version differs by
|
|
72
|
+
# more than one, emitting a UserWarning. Self-hosted servers often lag the
|
|
73
|
+
# client; set False to silence the check when the API surface we use is stable.
|
|
74
|
+
qdrant_check_compatibility: bool = Field(default=True)
|
|
75
|
+
|
|
76
|
+
# --- Retriever cache (Redis, optional) ---
|
|
77
|
+
redis_url: Optional[str] = Field(default=None)
|
|
78
|
+
cache_ttl: int = Field(default=3600)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
_override: Optional[AIToolkitSettings] = None
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
@lru_cache(maxsize=1)
|
|
85
|
+
def _env_settings() -> AIToolkitSettings:
|
|
86
|
+
"""Settings built from AITOOLKIT_* env vars (cached fallback)."""
|
|
87
|
+
return AIToolkitSettings()
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def get_settings() -> AIToolkitSettings:
|
|
91
|
+
"""Return the process-wide settings.
|
|
92
|
+
|
|
93
|
+
Prefers an instance installed via :func:`configure`; otherwise builds one
|
|
94
|
+
from ``AITOOLKIT_*`` environment variables.
|
|
95
|
+
"""
|
|
96
|
+
return _override if _override is not None else _env_settings()
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def configure(settings: AIToolkitSettings) -> None:
|
|
100
|
+
"""Install an explicit settings object as the process-wide singleton.
|
|
101
|
+
|
|
102
|
+
Lets a consuming application own configuration directly instead of relying
|
|
103
|
+
on environment variables and this package's generic defaults. Call once at
|
|
104
|
+
startup, before any client is created.
|
|
105
|
+
"""
|
|
106
|
+
global _override
|
|
107
|
+
_override = settings
|