clovis 0.1.0__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- clovis-0.3.0/PKG-INFO +98 -0
- clovis-0.3.0/README.md +74 -0
- {clovis-0.1.0 → clovis-0.3.0}/pyproject.toml +2 -2
- {clovis-0.1.0 → clovis-0.3.0}/ruvector.db +0 -0
- {clovis-0.1.0 → clovis-0.3.0}/src/clovis/__init__.py +1 -1
- clovis-0.3.0/src/clovis/_cli.py +81 -0
- clovis-0.3.0/src/clovis/_client.py +170 -0
- clovis-0.3.0/src/clovis/_server.py +58 -0
- clovis-0.3.0/test_live.py +108 -0
- clovis-0.1.0/PKG-INFO +0 -79
- clovis-0.1.0/README.md +0 -55
- clovis-0.1.0/src/clovis/_cli.py +0 -114
- clovis-0.1.0/src/clovis/_client.py +0 -68
- clovis-0.1.0/src/clovis/_completions.py +0 -96
- clovis-0.1.0/src/clovis/_conversation.py +0 -65
- clovis-0.1.0/src/clovis/_models.py +0 -75
- clovis-0.1.0/src/clovis/_server.py +0 -117
clovis-0.3.0/PKG-INFO
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: clovis
|
|
3
|
+
Version: 0.3.0
|
|
4
|
+
Summary: cloooooo — personal LLM client, prompt/context/thinking interface over local Ollama
|
|
5
|
+
Author: Clovis Sfeir
|
|
6
|
+
License: MIT
|
|
7
|
+
Keywords: ai,llm,local-ai,ollama,openai
|
|
8
|
+
Classifier: Development Status :: 3 - Alpha
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
16
|
+
Requires-Python: >=3.10
|
|
17
|
+
Requires-Dist: fastapi>=0.111
|
|
18
|
+
Requires-Dist: httpx>=0.27
|
|
19
|
+
Requires-Dist: pydantic>=2.0
|
|
20
|
+
Requires-Dist: rich>=13.0
|
|
21
|
+
Requires-Dist: typer>=0.12
|
|
22
|
+
Requires-Dist: uvicorn[standard]>=0.30
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
|
|
25
|
+
# clovis
|
|
26
|
+
|
|
27
|
+
Client Python personnel pour un LLM local via [Ollama](https://ollama.com). Interface ultra-simple : `prompt`, `negative_prompt`, `thinking`, `context`.
|
|
28
|
+
|
|
29
|
+
## Install
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
pip install clovis
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## Usage
|
|
36
|
+
|
|
37
|
+
```python
|
|
38
|
+
from clovis import cloooooo
|
|
39
|
+
|
|
40
|
+
ai = cloooooo()
|
|
41
|
+
|
|
42
|
+
# Appel direct
|
|
43
|
+
print(ai("Explique les trous noirs"))
|
|
44
|
+
|
|
45
|
+
# Avec options
|
|
46
|
+
print(ai(
|
|
47
|
+
"Génère un poème sur la mer",
|
|
48
|
+
negative_prompt="pas de rimes",
|
|
49
|
+
thinking=True,
|
|
50
|
+
context="Tu es un poète du 19e siècle.",
|
|
51
|
+
))
|
|
52
|
+
|
|
53
|
+
# Streaming token par token
|
|
54
|
+
for token in ai.stream("Raconte une histoire courte"):
|
|
55
|
+
print(token, end="", flush=True)
|
|
56
|
+
|
|
57
|
+
# Conversation avec mémoire
|
|
58
|
+
conv = ai.conversation(context="Tu es un expert en finance.")
|
|
59
|
+
conv("Explique le CAPM")
|
|
60
|
+
conv("Et ses limites ?") # se souvient de la réponse précédente
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
## CLI
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
clovis "Explique les trous noirs" # question directe
|
|
67
|
+
clovis "Génère un poème" --no "sans rimes" # avec negative prompt
|
|
68
|
+
clovis "Résous ce problème" --think # mode réflexion
|
|
69
|
+
clovis repl # conversation interactive
|
|
70
|
+
clovis serve --port 8000 # démarre le serveur API
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## API server
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
clovis serve --port 8000 --key sk-montoken
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
Requête :
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
curl -X POST http://localhost:8000/ia \
|
|
83
|
+
-H "Authorization: Bearer sk-montoken" \
|
|
84
|
+
-H "Content-Type: application/json" \
|
|
85
|
+
-d '{"prompt": "Bonjour !", "thinking": false}'
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
Réponse : `{"response": "Bonjour ! Comment puis-je t'aider ?"}`
|
|
89
|
+
|
|
90
|
+
Streaming : ajouter `"stream": true` → réponse en `text/plain` token par token.
|
|
91
|
+
|
|
92
|
+
## Config
|
|
93
|
+
|
|
94
|
+
```bash
|
|
95
|
+
export CLOVIS_MODEL="qwen3-72b-q5km" # modèle Ollama
|
|
96
|
+
export CLOVIS_OLLAMA_URL="http://localhost:11434"
|
|
97
|
+
export CLOVIS_API_KEY="sk-..." # clé API pour le serveur
|
|
98
|
+
```
|
clovis-0.3.0/README.md
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# clovis
|
|
2
|
+
|
|
3
|
+
Client Python personnel pour un LLM local via [Ollama](https://ollama.com). Interface ultra-simple : `prompt`, `negative_prompt`, `thinking`, `context`.
|
|
4
|
+
|
|
5
|
+
## Install
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install clovis
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Usage
|
|
12
|
+
|
|
13
|
+
```python
|
|
14
|
+
from clovis import cloooooo
|
|
15
|
+
|
|
16
|
+
ai = cloooooo()
|
|
17
|
+
|
|
18
|
+
# Appel direct
|
|
19
|
+
print(ai("Explique les trous noirs"))
|
|
20
|
+
|
|
21
|
+
# Avec options
|
|
22
|
+
print(ai(
|
|
23
|
+
"Génère un poème sur la mer",
|
|
24
|
+
negative_prompt="pas de rimes",
|
|
25
|
+
thinking=True,
|
|
26
|
+
context="Tu es un poète du 19e siècle.",
|
|
27
|
+
))
|
|
28
|
+
|
|
29
|
+
# Streaming token par token
|
|
30
|
+
for token in ai.stream("Raconte une histoire courte"):
|
|
31
|
+
print(token, end="", flush=True)
|
|
32
|
+
|
|
33
|
+
# Conversation avec mémoire
|
|
34
|
+
conv = ai.conversation(context="Tu es un expert en finance.")
|
|
35
|
+
conv("Explique le CAPM")
|
|
36
|
+
conv("Et ses limites ?") # se souvient de la réponse précédente
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
## CLI
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
clovis "Explique les trous noirs" # question directe
|
|
43
|
+
clovis "Génère un poème" --no "sans rimes" # avec negative prompt
|
|
44
|
+
clovis "Résous ce problème" --think # mode réflexion
|
|
45
|
+
clovis repl # conversation interactive
|
|
46
|
+
clovis serve --port 8000 # démarre le serveur API
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## API server
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
clovis serve --port 8000 --key sk-montoken
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
Requête :
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
curl -X POST http://localhost:8000/ia \
|
|
59
|
+
-H "Authorization: Bearer sk-montoken" \
|
|
60
|
+
-H "Content-Type: application/json" \
|
|
61
|
+
-d '{"prompt": "Bonjour !", "thinking": false}'
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
Réponse : `{"response": "Bonjour ! Comment puis-je t'aider ?"}`
|
|
65
|
+
|
|
66
|
+
Streaming : ajouter `"stream": true` → réponse en `text/plain` token par token.
|
|
67
|
+
|
|
68
|
+
## Config
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
export CLOVIS_MODEL="qwen3-72b-q5km" # modèle Ollama
|
|
72
|
+
export CLOVIS_OLLAMA_URL="http://localhost:11434"
|
|
73
|
+
export CLOVIS_API_KEY="sk-..." # clé API pour le serveur
|
|
74
|
+
```
|
|
@@ -4,8 +4,8 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "clovis"
|
|
7
|
-
version = "0.
|
|
8
|
-
description = "cloooooo — personal LLM client,
|
|
7
|
+
version = "0.3.0"
|
|
8
|
+
description = "cloooooo — personal LLM client, prompt/context/thinking interface over local Ollama"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = { text = "MIT" }
|
|
11
11
|
authors = [{ name = "Clovis Sfeir" }]
|
|
Binary file
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
import typer
|
|
7
|
+
from rich.console import Console
|
|
8
|
+
from rich.live import Live
|
|
9
|
+
from rich.markdown import Markdown
|
|
10
|
+
|
|
11
|
+
app = typer.Typer(help="cloooooo — LLM personnel", add_completion=True)
|
|
12
|
+
console = Console()
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _ai():
|
|
16
|
+
from ._client import cloooooo
|
|
17
|
+
return cloooooo()
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@app.callback(invoke_without_command=True)
|
|
21
|
+
def main(
|
|
22
|
+
ctx: typer.Context,
|
|
23
|
+
prompt: Optional[str] = typer.Argument(None),
|
|
24
|
+
negative: Optional[str] = typer.Option(None, "--no", "-n", help="Ce qu'il faut éviter"),
|
|
25
|
+
thinking: bool = typer.Option(False, "--think", "-t", help="Active le mode réflexion"),
|
|
26
|
+
context: Optional[str] = typer.Option(None, "--ctx", "-c", help="System prompt"),
|
|
27
|
+
):
|
|
28
|
+
"""Envoie un prompt directement : clovis \"Bonjour\""""
|
|
29
|
+
if ctx.invoked_subcommand is not None:
|
|
30
|
+
return
|
|
31
|
+
if not prompt:
|
|
32
|
+
console.print(ctx.get_help())
|
|
33
|
+
return
|
|
34
|
+
|
|
35
|
+
ai = _ai()
|
|
36
|
+
text = ""
|
|
37
|
+
with Live(console=console, refresh_per_second=20) as live:
|
|
38
|
+
for token in ai.stream(prompt, negative_prompt=negative, thinking=thinking, context=context):
|
|
39
|
+
text += token
|
|
40
|
+
live.update(Markdown(text))
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@app.command()
|
|
44
|
+
def repl(
|
|
45
|
+
context: Optional[str] = typer.Option(None, "--ctx", "-c", help="System prompt"),
|
|
46
|
+
):
|
|
47
|
+
"""Conversation interactive."""
|
|
48
|
+
ai = _ai()
|
|
49
|
+
conv = ai.conversation(context=context)
|
|
50
|
+
console.print("[bold]cloooooo[/] — [dim]Ctrl+C pour quitter · /reset pour vider[/]\n")
|
|
51
|
+
|
|
52
|
+
while True:
|
|
53
|
+
try:
|
|
54
|
+
prompt = typer.prompt("Vous")
|
|
55
|
+
except (typer.Abort, KeyboardInterrupt):
|
|
56
|
+
console.print("\n[dim]Au revoir.[/]")
|
|
57
|
+
break
|
|
58
|
+
|
|
59
|
+
if prompt.strip() == "/reset":
|
|
60
|
+
conv.reset()
|
|
61
|
+
console.print("[dim]Historique vidé.[/]")
|
|
62
|
+
continue
|
|
63
|
+
|
|
64
|
+
text = ""
|
|
65
|
+
console.print("[bold cyan]cloooooo[/] ", end="")
|
|
66
|
+
with Live(console=console, refresh_per_second=20) as live:
|
|
67
|
+
for token in conv.stream(prompt):
|
|
68
|
+
text += token
|
|
69
|
+
live.update(text)
|
|
70
|
+
console.print()
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
@app.command()
|
|
74
|
+
def serve(
|
|
75
|
+
port: int = typer.Option(8000, "--port", "-p"),
|
|
76
|
+
host: str = typer.Option("0.0.0.0", "--host"),
|
|
77
|
+
api_key: Optional[str] = typer.Option(None, "--key", envvar="CLOVIS_API_KEY"),
|
|
78
|
+
):
|
|
79
|
+
"""Lance le serveur sur /ia."""
|
|
80
|
+
from ._server import start_server
|
|
81
|
+
start_server(host=host, port=port, api_key=api_key)
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
from typing import Iterator, Optional
|
|
6
|
+
|
|
7
|
+
import httpx
|
|
8
|
+
|
|
9
|
+
_SERVER_URL = "https://cloooooo.com" # API publique par défaut
|
|
10
|
+
_OLLAMA_URL = "http://localhost:11434" # fallback local
|
|
11
|
+
_MODEL = "qwen3:14b"
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _build_messages(
|
|
15
|
+
prompt: str,
|
|
16
|
+
context: Optional[str],
|
|
17
|
+
negative_prompt: Optional[str],
|
|
18
|
+
thinking: bool,
|
|
19
|
+
history: list[dict],
|
|
20
|
+
) -> list[dict]:
|
|
21
|
+
system_parts = []
|
|
22
|
+
if context:
|
|
23
|
+
system_parts.append(context)
|
|
24
|
+
if negative_prompt:
|
|
25
|
+
system_parts.append(f"Évite absolument dans ta réponse : {negative_prompt}")
|
|
26
|
+
if thinking:
|
|
27
|
+
system_parts.append("Réfléchis étape par étape avant de répondre.")
|
|
28
|
+
|
|
29
|
+
messages = []
|
|
30
|
+
if system_parts:
|
|
31
|
+
messages.append({"role": "system", "content": "\n".join(system_parts)})
|
|
32
|
+
messages.extend(history)
|
|
33
|
+
messages.append({"role": "user", "content": prompt})
|
|
34
|
+
return messages
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class Conversation:
|
|
38
|
+
def __init__(self, ai: "cloooooo", context: Optional[str] = None) -> None:
|
|
39
|
+
self._ai = ai
|
|
40
|
+
self._context = context
|
|
41
|
+
self._history: list[dict] = []
|
|
42
|
+
|
|
43
|
+
def __call__(self, prompt: str, *, negative_prompt: Optional[str] = None, thinking: bool = False) -> str:
|
|
44
|
+
messages = _build_messages(prompt, self._context, negative_prompt, thinking, self._history)
|
|
45
|
+
reply = self._ai._send(messages)
|
|
46
|
+
self._history += [{"role": "user", "content": prompt}, {"role": "assistant", "content": reply}]
|
|
47
|
+
return reply
|
|
48
|
+
|
|
49
|
+
def stream(self, prompt: str, *, negative_prompt: Optional[str] = None, thinking: bool = False) -> Iterator[str]:
|
|
50
|
+
messages = _build_messages(prompt, self._context, negative_prompt, thinking, self._history)
|
|
51
|
+
full = ""
|
|
52
|
+
for token in self._ai._stream(messages):
|
|
53
|
+
full += token
|
|
54
|
+
yield token
|
|
55
|
+
self._history += [{"role": "user", "content": prompt}, {"role": "assistant", "content": full}]
|
|
56
|
+
|
|
57
|
+
def reset(self) -> None:
|
|
58
|
+
self._history.clear()
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class cloooooo:
|
|
62
|
+
"""
|
|
63
|
+
from clovis import cloooooo
|
|
64
|
+
|
|
65
|
+
ai = cloooooo() # → cloooooo.com (aucune config requise)
|
|
66
|
+
ai = cloooooo(local=True) # → localhost:11434
|
|
67
|
+
ai = cloooooo(server="http://...") # → serveur custom
|
|
68
|
+
|
|
69
|
+
print(ai("Explique les trous noirs"))
|
|
70
|
+
print(ai("Écris un poème", negative_prompt="sans rimes", thinking=True))
|
|
71
|
+
for token in ai.stream("Raconte une histoire"): print(token, end="", flush=True)
|
|
72
|
+
conv = ai.conversation(context="Tu es un expert en finance")
|
|
73
|
+
conv("Explique le CAPM") ; conv("Et ses limites ?")
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
def __init__(
|
|
77
|
+
self,
|
|
78
|
+
server: Optional[str] = None,
|
|
79
|
+
*,
|
|
80
|
+
local: bool = False,
|
|
81
|
+
ollama_url: Optional[str] = None,
|
|
82
|
+
model: str = _MODEL,
|
|
83
|
+
) -> None:
|
|
84
|
+
# Priorité : server arg > CLOVIS_SERVER env > cloooooo.com
|
|
85
|
+
# Si local=True ou CLOVIS_OLLAMA_URL défini → mode Ollama direct
|
|
86
|
+
env_server = os.getenv("CLOVIS_SERVER")
|
|
87
|
+
env_ollama = os.getenv("CLOVIS_OLLAMA_URL")
|
|
88
|
+
|
|
89
|
+
if local or ollama_url or env_ollama:
|
|
90
|
+
self._mode = "ollama"
|
|
91
|
+
self._url = ollama_url or env_ollama or _OLLAMA_URL
|
|
92
|
+
self._model = os.getenv("CLOVIS_MODEL", model)
|
|
93
|
+
else:
|
|
94
|
+
self._mode = "server"
|
|
95
|
+
self._url = server or env_server or _SERVER_URL
|
|
96
|
+
|
|
97
|
+
self._http = httpx.Client()
|
|
98
|
+
|
|
99
|
+
def __call__(self, prompt: str, *, negative_prompt: Optional[str] = None, thinking: bool = False, context: Optional[str] = None) -> str:
|
|
100
|
+
if self._mode == "server":
|
|
101
|
+
return self._call_server(prompt, negative_prompt=negative_prompt, thinking=thinking, context=context)
|
|
102
|
+
messages = _build_messages(prompt, context, negative_prompt, thinking, [])
|
|
103
|
+
return self._send(messages)
|
|
104
|
+
|
|
105
|
+
def stream(self, prompt: str, *, negative_prompt: Optional[str] = None, thinking: bool = False, context: Optional[str] = None) -> Iterator[str]:
|
|
106
|
+
if self._mode == "server":
|
|
107
|
+
yield from self._stream_server(prompt, negative_prompt=negative_prompt, thinking=thinking, context=context)
|
|
108
|
+
return
|
|
109
|
+
messages = _build_messages(prompt, context, negative_prompt, thinking, [])
|
|
110
|
+
yield from self._stream(messages)
|
|
111
|
+
|
|
112
|
+
def conversation(self, context: Optional[str] = None) -> Conversation:
|
|
113
|
+
return Conversation(self, context=context)
|
|
114
|
+
|
|
115
|
+
# --- mode server (cloooooo.com/ia) ---
|
|
116
|
+
|
|
117
|
+
def _call_server(self, prompt: str, **kwargs) -> str:
|
|
118
|
+
resp = self._http.post(
|
|
119
|
+
f"{self._url}/ia",
|
|
120
|
+
json={"prompt": prompt, **{k: v for k, v in kwargs.items() if v is not None}},
|
|
121
|
+
timeout=120,
|
|
122
|
+
)
|
|
123
|
+
resp.raise_for_status()
|
|
124
|
+
return resp.json()["response"]
|
|
125
|
+
|
|
126
|
+
def _stream_server(self, prompt: str, **kwargs) -> Iterator[str]:
|
|
127
|
+
with self._http.stream(
|
|
128
|
+
"POST",
|
|
129
|
+
f"{self._url}/ia",
|
|
130
|
+
json={"prompt": prompt, "stream": True, **{k: v for k, v in kwargs.items() if v is not None}},
|
|
131
|
+
timeout=120,
|
|
132
|
+
) as resp:
|
|
133
|
+
resp.raise_for_status()
|
|
134
|
+
for chunk in resp.iter_text():
|
|
135
|
+
if chunk:
|
|
136
|
+
yield chunk
|
|
137
|
+
|
|
138
|
+
# --- mode ollama (local) ---
|
|
139
|
+
|
|
140
|
+
def _send(self, messages: list[dict]) -> str:
|
|
141
|
+
resp = self._http.post(
|
|
142
|
+
f"{self._url}/api/chat",
|
|
143
|
+
json={"model": self._model, "messages": messages, "stream": False, "think": False},
|
|
144
|
+
timeout=120,
|
|
145
|
+
)
|
|
146
|
+
resp.raise_for_status()
|
|
147
|
+
return resp.json()["message"]["content"]
|
|
148
|
+
|
|
149
|
+
def _stream(self, messages: list[dict]) -> Iterator[str]:
|
|
150
|
+
with self._http.stream(
|
|
151
|
+
"POST",
|
|
152
|
+
f"{self._url}/api/chat",
|
|
153
|
+
json={"model": self._model, "messages": messages, "stream": True, "think": False},
|
|
154
|
+
timeout=120,
|
|
155
|
+
) as resp:
|
|
156
|
+
resp.raise_for_status()
|
|
157
|
+
for line in resp.iter_lines():
|
|
158
|
+
if not line:
|
|
159
|
+
continue
|
|
160
|
+
data = json.loads(line)
|
|
161
|
+
token = data.get("message", {}).get("content", "")
|
|
162
|
+
if token:
|
|
163
|
+
yield token
|
|
164
|
+
if data.get("done"):
|
|
165
|
+
break
|
|
166
|
+
|
|
167
|
+
@classmethod
|
|
168
|
+
def serve(cls, port: int = 8000, host: str = "0.0.0.0", api_key: Optional[str] = None) -> None:
|
|
169
|
+
from ._server import start_server
|
|
170
|
+
start_server(host=host, port=port, api_key=api_key)
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
import uvicorn
|
|
7
|
+
from fastapi import Depends, FastAPI, HTTPException
|
|
8
|
+
from fastapi.responses import StreamingResponse
|
|
9
|
+
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
|
|
10
|
+
from pydantic import BaseModel
|
|
11
|
+
|
|
12
|
+
from ._client import cloooooo
|
|
13
|
+
|
|
14
|
+
_bearer = HTTPBearer(auto_error=False)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class IARequest(BaseModel):
|
|
18
|
+
prompt: str
|
|
19
|
+
negative_prompt: Optional[str] = None
|
|
20
|
+
thinking: bool = False
|
|
21
|
+
context: Optional[str] = None
|
|
22
|
+
stream: bool = False
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def build_app(api_key: Optional[str] = None) -> FastAPI:
|
|
26
|
+
app = FastAPI(title="cloooooo", version="0.1.0")
|
|
27
|
+
ai = cloooooo()
|
|
28
|
+
|
|
29
|
+
def _check_key(creds: Optional[HTTPAuthorizationCredentials] = Depends(_bearer)):
|
|
30
|
+
if api_key and (not creds or creds.credentials != api_key):
|
|
31
|
+
raise HTTPException(status_code=401, detail="Clé API invalide")
|
|
32
|
+
|
|
33
|
+
@app.post("/ia", dependencies=[Depends(_check_key)])
|
|
34
|
+
async def ia(req: IARequest):
|
|
35
|
+
kwargs = dict(
|
|
36
|
+
negative_prompt=req.negative_prompt,
|
|
37
|
+
thinking=req.thinking,
|
|
38
|
+
context=req.context,
|
|
39
|
+
)
|
|
40
|
+
if req.stream:
|
|
41
|
+
def generate():
|
|
42
|
+
for token in ai.stream(req.prompt, **kwargs):
|
|
43
|
+
yield token
|
|
44
|
+
return StreamingResponse(generate(), media_type="text/plain")
|
|
45
|
+
|
|
46
|
+
return {"response": ai(req.prompt, **kwargs)}
|
|
47
|
+
|
|
48
|
+
@app.get("/")
|
|
49
|
+
def root():
|
|
50
|
+
return {"status": "ok", "endpoint": "/ia"}
|
|
51
|
+
|
|
52
|
+
return app
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def start_server(host: str = "0.0.0.0", port: int = 8000, api_key: Optional[str] = None) -> None:
|
|
56
|
+
app = build_app(api_key=api_key)
|
|
57
|
+
print(f"cloooooo API → http://{host}:{port}/ia")
|
|
58
|
+
uvicorn.run(app, host=host, port=port)
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"""Test live end-to-end — lance avec : python3 test_live.py"""
|
|
2
|
+
import os
|
|
3
|
+
import sys
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_import():
|
|
7
|
+
from clovis import cloooooo
|
|
8
|
+
ai = cloooooo()
|
|
9
|
+
print(f" import OK — modèle: {ai._model}, url: {ai._url}")
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def test_basic_call():
|
|
13
|
+
from clovis import cloooooo
|
|
14
|
+
ai = cloooooo()
|
|
15
|
+
resp = ai('Réponds uniquement par le mot "ok".')
|
|
16
|
+
assert isinstance(resp, str) and len(resp) > 0
|
|
17
|
+
print(f" appel OK — réponse: {resp!r}")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def test_negative_prompt():
|
|
21
|
+
from clovis import cloooooo
|
|
22
|
+
ai = cloooooo()
|
|
23
|
+
resp = ai("Présente-toi en 10 mots.", negative_prompt="ne mentionne pas ton nom")
|
|
24
|
+
assert isinstance(resp, str) and len(resp) > 0
|
|
25
|
+
print(f" negative_prompt OK — réponse: {resp!r}")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def test_thinking():
|
|
29
|
+
from clovis import cloooooo
|
|
30
|
+
ai = cloooooo()
|
|
31
|
+
resp = ai("Combien font 17 × 23 ?", thinking=True)
|
|
32
|
+
assert isinstance(resp, str) and len(resp) > 0
|
|
33
|
+
print(f" thinking OK — réponse: {resp!r}")
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def test_context():
|
|
37
|
+
from clovis import cloooooo
|
|
38
|
+
ai = cloooooo()
|
|
39
|
+
resp = ai("Comment ça va ?", context="Tu es un pirate des Caraïbes. Réponds toujours en argot de marin.")
|
|
40
|
+
assert isinstance(resp, str) and len(resp) > 0
|
|
41
|
+
print(f" context OK — réponse: {resp!r}")
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def test_stream():
|
|
45
|
+
from clovis import cloooooo
|
|
46
|
+
ai = cloooooo()
|
|
47
|
+
tokens = list(ai.stream("Compte jusqu'à 5, un nombre par ligne."))
|
|
48
|
+
assert len(tokens) > 0
|
|
49
|
+
full = "".join(tokens)
|
|
50
|
+
assert len(full) > 0
|
|
51
|
+
print(f" stream OK — {len(tokens)} tokens, texte: {full!r}")
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def test_conversation():
|
|
55
|
+
from clovis import cloooooo
|
|
56
|
+
ai = cloooooo()
|
|
57
|
+
conv = ai.conversation(context="Réponds toujours en une seule phrase courte.")
|
|
58
|
+
r1 = conv("Mon prénom est Clovis.")
|
|
59
|
+
r2 = conv("Quel est mon prénom ?")
|
|
60
|
+
assert "Clovis" in r2 or "clovis" in r2.lower(), f"Prénom pas mémorisé: {r2!r}"
|
|
61
|
+
print(f" conversation OK — mémoire: {r2!r}")
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def test_conversation_stream():
|
|
65
|
+
from clovis import cloooooo
|
|
66
|
+
ai = cloooooo()
|
|
67
|
+
conv = ai.conversation()
|
|
68
|
+
tokens = list(conv.stream("Dis bonjour en 3 langues."))
|
|
69
|
+
full = "".join(tokens)
|
|
70
|
+
assert len(full) > 0
|
|
71
|
+
print(f" conversation stream OK — {len(tokens)} tokens")
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def test_conversation_reset():
|
|
75
|
+
from clovis import cloooooo
|
|
76
|
+
ai = cloooooo()
|
|
77
|
+
conv = ai.conversation()
|
|
78
|
+
conv("Mon prénom est Clovis.")
|
|
79
|
+
conv.reset()
|
|
80
|
+
assert len(conv._history) == 0
|
|
81
|
+
print(" conversation reset OK")
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
if __name__ == "__main__":
|
|
85
|
+
tests = [
|
|
86
|
+
test_import,
|
|
87
|
+
test_basic_call,
|
|
88
|
+
test_negative_prompt,
|
|
89
|
+
test_thinking,
|
|
90
|
+
test_context,
|
|
91
|
+
test_stream,
|
|
92
|
+
test_conversation,
|
|
93
|
+
test_conversation_stream,
|
|
94
|
+
test_conversation_reset,
|
|
95
|
+
]
|
|
96
|
+
passed = 0
|
|
97
|
+
for t in tests:
|
|
98
|
+
print(f"\n{t.__name__}")
|
|
99
|
+
try:
|
|
100
|
+
t()
|
|
101
|
+
passed += 1
|
|
102
|
+
except Exception as e:
|
|
103
|
+
print(f" FAILED: {e}")
|
|
104
|
+
import traceback; traceback.print_exc()
|
|
105
|
+
|
|
106
|
+
print(f"\n{'='*40}")
|
|
107
|
+
print(f"Résultat: {passed}/{len(tests)} tests passés")
|
|
108
|
+
sys.exit(0 if passed == len(tests) else 1)
|
clovis-0.1.0/PKG-INFO
DELETED
|
@@ -1,79 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: clovis
|
|
3
|
-
Version: 0.1.0
|
|
4
|
-
Summary: cloooooo — personal LLM client, OpenAI-compatible interface over local Ollama
|
|
5
|
-
Author: Clovis Sfeir
|
|
6
|
-
License: MIT
|
|
7
|
-
Keywords: ai,llm,local-ai,ollama,openai
|
|
8
|
-
Classifier: Development Status :: 3 - Alpha
|
|
9
|
-
Classifier: Intended Audience :: Developers
|
|
10
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
-
Classifier: Programming Language :: Python :: 3
|
|
12
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
-
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
16
|
-
Requires-Python: >=3.10
|
|
17
|
-
Requires-Dist: fastapi>=0.111
|
|
18
|
-
Requires-Dist: httpx>=0.27
|
|
19
|
-
Requires-Dist: pydantic>=2.0
|
|
20
|
-
Requires-Dist: rich>=13.0
|
|
21
|
-
Requires-Dist: typer>=0.12
|
|
22
|
-
Requires-Dist: uvicorn[standard]>=0.30
|
|
23
|
-
Description-Content-Type: text/markdown
|
|
24
|
-
|
|
25
|
-
# clovis
|
|
26
|
-
|
|
27
|
-
OpenAI-compatible Python client over a local [Ollama](https://ollama.com) instance.
|
|
28
|
-
|
|
29
|
-
## Install
|
|
30
|
-
|
|
31
|
-
```bash
|
|
32
|
-
pip install clovis
|
|
33
|
-
```
|
|
34
|
-
|
|
35
|
-
## Usage
|
|
36
|
-
|
|
37
|
-
```python
|
|
38
|
-
from clovis import cloooooo
|
|
39
|
-
|
|
40
|
-
client = cloooooo() # connects to localhost:11434 by default
|
|
41
|
-
|
|
42
|
-
# Chat
|
|
43
|
-
resp = client.chat.completions.create(
|
|
44
|
-
model="qwen3-72b",
|
|
45
|
-
messages=[{"role": "user", "content": "Bonjour !"}]
|
|
46
|
-
)
|
|
47
|
-
print(resp.choices[0].message.content)
|
|
48
|
-
|
|
49
|
-
# Streaming
|
|
50
|
-
for chunk in client.chat.completions.create(
|
|
51
|
-
messages=[{"role": "user", "content": "Écris un poème"}],
|
|
52
|
-
stream=True,
|
|
53
|
-
):
|
|
54
|
-
print(chunk.choices[0].delta.get("content", ""), end="", flush=True)
|
|
55
|
-
|
|
56
|
-
# Conversation with auto history
|
|
57
|
-
with client.conversation(system="Tu es un expert en finance.") as conv:
|
|
58
|
-
print(conv.chat("Explique le CAPM"))
|
|
59
|
-
print(conv.chat("Et ses limites ?")) # remembers context
|
|
60
|
-
|
|
61
|
-
# Start API server
|
|
62
|
-
cloooooo.serve(port=8000, api_key="sk-...")
|
|
63
|
-
```
|
|
64
|
-
|
|
65
|
-
## CLI
|
|
66
|
-
|
|
67
|
-
```bash
|
|
68
|
-
clovis "Explique les trous noirs" # direct question
|
|
69
|
-
clovis repl # interactive conversation
|
|
70
|
-
clovis serve --port 8000 # start API server
|
|
71
|
-
```
|
|
72
|
-
|
|
73
|
-
## Config
|
|
74
|
-
|
|
75
|
-
```bash
|
|
76
|
-
export CLOVIS_MODEL="qwen3-72b"
|
|
77
|
-
export CLOVIS_OLLAMA_URL="http://localhost:11434"
|
|
78
|
-
export CLOVIS_API_KEY="sk-..."
|
|
79
|
-
```
|
clovis-0.1.0/README.md
DELETED
|
@@ -1,55 +0,0 @@
|
|
|
1
|
-
# clovis
|
|
2
|
-
|
|
3
|
-
OpenAI-compatible Python client over a local [Ollama](https://ollama.com) instance.
|
|
4
|
-
|
|
5
|
-
## Install
|
|
6
|
-
|
|
7
|
-
```bash
|
|
8
|
-
pip install clovis
|
|
9
|
-
```
|
|
10
|
-
|
|
11
|
-
## Usage
|
|
12
|
-
|
|
13
|
-
```python
|
|
14
|
-
from clovis import cloooooo
|
|
15
|
-
|
|
16
|
-
client = cloooooo() # connects to localhost:11434 by default
|
|
17
|
-
|
|
18
|
-
# Chat
|
|
19
|
-
resp = client.chat.completions.create(
|
|
20
|
-
model="qwen3-72b",
|
|
21
|
-
messages=[{"role": "user", "content": "Bonjour !"}]
|
|
22
|
-
)
|
|
23
|
-
print(resp.choices[0].message.content)
|
|
24
|
-
|
|
25
|
-
# Streaming
|
|
26
|
-
for chunk in client.chat.completions.create(
|
|
27
|
-
messages=[{"role": "user", "content": "Écris un poème"}],
|
|
28
|
-
stream=True,
|
|
29
|
-
):
|
|
30
|
-
print(chunk.choices[0].delta.get("content", ""), end="", flush=True)
|
|
31
|
-
|
|
32
|
-
# Conversation with auto history
|
|
33
|
-
with client.conversation(system="Tu es un expert en finance.") as conv:
|
|
34
|
-
print(conv.chat("Explique le CAPM"))
|
|
35
|
-
print(conv.chat("Et ses limites ?")) # remembers context
|
|
36
|
-
|
|
37
|
-
# Start API server
|
|
38
|
-
cloooooo.serve(port=8000, api_key="sk-...")
|
|
39
|
-
```
|
|
40
|
-
|
|
41
|
-
## CLI
|
|
42
|
-
|
|
43
|
-
```bash
|
|
44
|
-
clovis "Explique les trous noirs" # direct question
|
|
45
|
-
clovis repl # interactive conversation
|
|
46
|
-
clovis serve --port 8000 # start API server
|
|
47
|
-
```
|
|
48
|
-
|
|
49
|
-
## Config
|
|
50
|
-
|
|
51
|
-
```bash
|
|
52
|
-
export CLOVIS_MODEL="qwen3-72b"
|
|
53
|
-
export CLOVIS_OLLAMA_URL="http://localhost:11434"
|
|
54
|
-
export CLOVIS_API_KEY="sk-..."
|
|
55
|
-
```
|
clovis-0.1.0/src/clovis/_cli.py
DELETED
|
@@ -1,114 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import os
|
|
4
|
-
import sys
|
|
5
|
-
from typing import Optional
|
|
6
|
-
|
|
7
|
-
import typer
|
|
8
|
-
from rich.console import Console
|
|
9
|
-
from rich.live import Live
|
|
10
|
-
from rich.markdown import Markdown
|
|
11
|
-
|
|
12
|
-
app = typer.Typer(help="cloooooo — personal LLM CLI")
|
|
13
|
-
console = Console()
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
def _get_client():
|
|
17
|
-
from ._client import cloooooo
|
|
18
|
-
return cloooooo(
|
|
19
|
-
api_key=os.getenv("CLOVIS_API_KEY"),
|
|
20
|
-
ollama_url=os.getenv("CLOVIS_OLLAMA_URL", "http://localhost:11434"),
|
|
21
|
-
model=os.getenv("CLOVIS_MODEL", "qwen3-72b-q5km"),
|
|
22
|
-
)
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
@app.command(name="chat")
|
|
26
|
-
def cmd_chat(
|
|
27
|
-
prompt: str = typer.Argument(..., help="Message à envoyer"),
|
|
28
|
-
model: Optional[str] = typer.Option(None, "--model", "-m"),
|
|
29
|
-
no_stream: bool = typer.Option(False, "--no-stream"),
|
|
30
|
-
markdown: bool = typer.Option(True, "--markdown/--no-markdown"),
|
|
31
|
-
):
|
|
32
|
-
"""Envoie un message au LLM."""
|
|
33
|
-
client = _get_client()
|
|
34
|
-
if model:
|
|
35
|
-
client.model = model
|
|
36
|
-
|
|
37
|
-
if no_stream:
|
|
38
|
-
resp = client.chat.completions.create(
|
|
39
|
-
messages=[{"role": "user", "content": prompt}]
|
|
40
|
-
)
|
|
41
|
-
text = resp.choices[0].message.content
|
|
42
|
-
console.print(Markdown(text) if markdown else text)
|
|
43
|
-
return
|
|
44
|
-
|
|
45
|
-
text = ""
|
|
46
|
-
with Live(console=console, refresh_per_second=15) as live:
|
|
47
|
-
for chunk in client.chat.completions.create(
|
|
48
|
-
messages=[{"role": "user", "content": prompt}],
|
|
49
|
-
stream=True,
|
|
50
|
-
):
|
|
51
|
-
text += chunk.choices[0].delta.get("content", "")
|
|
52
|
-
live.update(Markdown(text) if markdown else text)
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
@app.command(name="serve")
|
|
56
|
-
def cmd_serve(
|
|
57
|
-
port: int = typer.Option(8000, "--port", "-p"),
|
|
58
|
-
host: str = typer.Option("0.0.0.0", "--host"),
|
|
59
|
-
api_key: Optional[str] = typer.Option(None, "--api-key", envvar="CLOVIS_API_KEY"),
|
|
60
|
-
ollama_url: str = typer.Option("http://localhost:11434", "--ollama-url", envvar="CLOVIS_OLLAMA_URL"),
|
|
61
|
-
):
|
|
62
|
-
"""Lance le serveur API OpenAI-compatible."""
|
|
63
|
-
from ._server import start_server
|
|
64
|
-
console.print(f"[bold green]cloooooo API[/] démarré sur [bold]http://{host}:{port}[/]")
|
|
65
|
-
if api_key:
|
|
66
|
-
console.print(f"[dim]Clé API : {api_key[:12]}...[/]")
|
|
67
|
-
start_server(host=host, port=port, ollama_url=ollama_url, api_key=api_key)
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
@app.command(name="repl")
|
|
71
|
-
def cmd_repl(
|
|
72
|
-
system: Optional[str] = typer.Option(None, "--system", "-s", help="System prompt"),
|
|
73
|
-
model: Optional[str] = typer.Option(None, "--model", "-m"),
|
|
74
|
-
):
|
|
75
|
-
"""Lance une conversation interactive (REPL)."""
|
|
76
|
-
client = _get_client()
|
|
77
|
-
if model:
|
|
78
|
-
client.model = model
|
|
79
|
-
|
|
80
|
-
conv = client.conversation(system=system)
|
|
81
|
-
console.print("[bold]cloooooo REPL[/] — [dim]Ctrl+C pour quitter, /reset pour vider l'historique[/]\n")
|
|
82
|
-
|
|
83
|
-
while True:
|
|
84
|
-
try:
|
|
85
|
-
prompt = typer.prompt("You")
|
|
86
|
-
except (typer.Abort, KeyboardInterrupt):
|
|
87
|
-
console.print("\n[dim]Au revoir.[/]")
|
|
88
|
-
break
|
|
89
|
-
|
|
90
|
-
if prompt.strip() == "/reset":
|
|
91
|
-
conv.reset()
|
|
92
|
-
console.print("[dim]Historique réinitialisé.[/]")
|
|
93
|
-
continue
|
|
94
|
-
|
|
95
|
-
text = ""
|
|
96
|
-
console.print("[bold cyan]cloooooo[/] ", end="")
|
|
97
|
-
with Live(console=console, refresh_per_second=15) as live:
|
|
98
|
-
for chunk in conv.stream(prompt):
|
|
99
|
-
text += chunk
|
|
100
|
-
live.update(text)
|
|
101
|
-
console.print()
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
# Allow: clovis "question" (shortcut without subcommand)
|
|
105
|
-
@app.callback(invoke_without_command=True)
|
|
106
|
-
def main(
|
|
107
|
-
ctx: typer.Context,
|
|
108
|
-
prompt: Optional[str] = typer.Argument(None),
|
|
109
|
-
):
|
|
110
|
-
if ctx.invoked_subcommand is None:
|
|
111
|
-
if prompt:
|
|
112
|
-
cmd_chat(prompt=prompt, model=None, no_stream=False, markdown=True)
|
|
113
|
-
else:
|
|
114
|
-
console.print(ctx.get_help())
|
|
@@ -1,68 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import os
|
|
4
|
-
from typing import Optional
|
|
5
|
-
|
|
6
|
-
import httpx
|
|
7
|
-
|
|
8
|
-
from ._completions import Chat
|
|
9
|
-
from ._conversation import Conversation
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
class cloooooo:
|
|
13
|
-
"""
|
|
14
|
-
Personal LLM client — OpenAI-compatible interface over a local Ollama instance.
|
|
15
|
-
|
|
16
|
-
Usage::
|
|
17
|
-
|
|
18
|
-
from clovis import cloooooo
|
|
19
|
-
|
|
20
|
-
client = cloooooo(api_key="sk-clovis-...")
|
|
21
|
-
resp = client.chat.completions.create(
|
|
22
|
-
model="qwen3-72b",
|
|
23
|
-
messages=[{"role": "user", "content": "Bonjour !"}]
|
|
24
|
-
)
|
|
25
|
-
print(resp.choices[0].message.content)
|
|
26
|
-
"""
|
|
27
|
-
|
|
28
|
-
def __init__(
|
|
29
|
-
self,
|
|
30
|
-
api_key: Optional[str] = None,
|
|
31
|
-
ollama_url: str = "http://localhost:11434",
|
|
32
|
-
model: str = "qwen3-72b-q5km",
|
|
33
|
-
) -> None:
|
|
34
|
-
self.api_key = api_key or os.getenv("CLOVIS_API_KEY")
|
|
35
|
-
self.ollama_url = ollama_url or os.getenv("CLOVIS_OLLAMA_URL", "http://localhost:11434")
|
|
36
|
-
self.model = model or os.getenv("CLOVIS_MODEL", "qwen3-72b-q5km")
|
|
37
|
-
|
|
38
|
-
self._http = httpx.Client()
|
|
39
|
-
self.chat = Chat(self)
|
|
40
|
-
|
|
41
|
-
def conversation(
|
|
42
|
-
self,
|
|
43
|
-
system: Optional[str] = None,
|
|
44
|
-
model: Optional[str] = None,
|
|
45
|
-
) -> Conversation:
|
|
46
|
-
"""Return a new conversation with persistent history."""
|
|
47
|
-
return Conversation(self, system=system, model=model)
|
|
48
|
-
|
|
49
|
-
@classmethod
|
|
50
|
-
def serve(
|
|
51
|
-
cls,
|
|
52
|
-
port: int = 8000,
|
|
53
|
-
host: str = "0.0.0.0",
|
|
54
|
-
ollama_url: str = "http://localhost:11434",
|
|
55
|
-
api_key: Optional[str] = None,
|
|
56
|
-
) -> None:
|
|
57
|
-
"""Start an OpenAI-compatible HTTP API server."""
|
|
58
|
-
from ._server import start_server
|
|
59
|
-
start_server(host=host, port=port, ollama_url=ollama_url, api_key=api_key)
|
|
60
|
-
|
|
61
|
-
def models(self) -> list[str]:
|
|
62
|
-
"""List available models from Ollama."""
|
|
63
|
-
resp = self._http.get(f"{self.ollama_url}/api/tags", timeout=10)
|
|
64
|
-
resp.raise_for_status()
|
|
65
|
-
return [m["name"] for m in resp.json().get("models", [])]
|
|
66
|
-
|
|
67
|
-
def __repr__(self) -> str:
|
|
68
|
-
return f"cloooooo(model={self.model!r}, ollama_url={self.ollama_url!r})"
|
|
@@ -1,96 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import json
|
|
4
|
-
from typing import TYPE_CHECKING, Iterator, Optional
|
|
5
|
-
|
|
6
|
-
import httpx
|
|
7
|
-
|
|
8
|
-
from ._models import (
|
|
9
|
-
ChatCompletion,
|
|
10
|
-
ChatCompletionChunk,
|
|
11
|
-
ChatCompletionRequest,
|
|
12
|
-
DeltaChoice,
|
|
13
|
-
Message,
|
|
14
|
-
)
|
|
15
|
-
|
|
16
|
-
if TYPE_CHECKING:
|
|
17
|
-
from ._client import cloooooo
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
class ChatCompletions:
|
|
21
|
-
def __init__(self, client: "cloooooo") -> None:
|
|
22
|
-
self._client = client
|
|
23
|
-
|
|
24
|
-
def create(
|
|
25
|
-
self,
|
|
26
|
-
model: Optional[str] = None,
|
|
27
|
-
messages: Optional[list[dict]] = None,
|
|
28
|
-
stream: bool = False,
|
|
29
|
-
temperature: float = 0.7,
|
|
30
|
-
max_tokens: Optional[int] = None,
|
|
31
|
-
top_p: float = 0.9,
|
|
32
|
-
**_,
|
|
33
|
-
) -> ChatCompletion | Iterator[ChatCompletionChunk]:
|
|
34
|
-
model = model or self._client.model
|
|
35
|
-
parsed = [Message(**m) for m in (messages or [])]
|
|
36
|
-
|
|
37
|
-
payload = {
|
|
38
|
-
"model": model,
|
|
39
|
-
"messages": [m.model_dump() for m in parsed],
|
|
40
|
-
"stream": stream,
|
|
41
|
-
"options": {
|
|
42
|
-
"temperature": temperature,
|
|
43
|
-
"top_p": top_p,
|
|
44
|
-
**({"num_predict": max_tokens} if max_tokens else {}),
|
|
45
|
-
},
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
if stream:
|
|
49
|
-
return self._stream(model, payload)
|
|
50
|
-
return self._sync(model, payload)
|
|
51
|
-
|
|
52
|
-
def _sync(self, model: str, payload: dict) -> ChatCompletion:
|
|
53
|
-
resp = self._client._http.post(
|
|
54
|
-
f"{self._client.ollama_url}/api/chat",
|
|
55
|
-
json=payload,
|
|
56
|
-
timeout=600,
|
|
57
|
-
)
|
|
58
|
-
resp.raise_for_status()
|
|
59
|
-
return ChatCompletion.from_ollama(resp.json(), model)
|
|
60
|
-
|
|
61
|
-
def _stream(self, model: str, payload: dict) -> Iterator[ChatCompletionChunk]:
|
|
62
|
-
chunk_id = None
|
|
63
|
-
with self._client._http.stream(
|
|
64
|
-
"POST",
|
|
65
|
-
f"{self._client.ollama_url}/api/chat",
|
|
66
|
-
json=payload,
|
|
67
|
-
timeout=600,
|
|
68
|
-
) as resp:
|
|
69
|
-
resp.raise_for_status()
|
|
70
|
-
for line in resp.iter_lines():
|
|
71
|
-
if not line:
|
|
72
|
-
continue
|
|
73
|
-
data = json.loads(line)
|
|
74
|
-
token = data.get("message", {}).get("content", "")
|
|
75
|
-
done = data.get("done", False)
|
|
76
|
-
|
|
77
|
-
chunk = ChatCompletionChunk(
|
|
78
|
-
model=model,
|
|
79
|
-
choices=[DeltaChoice(
|
|
80
|
-
delta={"content": token} if token else {},
|
|
81
|
-
finish_reason="stop" if done else None,
|
|
82
|
-
)],
|
|
83
|
-
)
|
|
84
|
-
if chunk_id is None:
|
|
85
|
-
chunk_id = chunk.id
|
|
86
|
-
else:
|
|
87
|
-
chunk.id = chunk_id
|
|
88
|
-
|
|
89
|
-
yield chunk
|
|
90
|
-
if done:
|
|
91
|
-
break
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
class Chat:
|
|
95
|
-
def __init__(self, client: "cloooooo") -> None:
|
|
96
|
-
self.completions = ChatCompletions(client)
|
|
@@ -1,65 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
from typing import TYPE_CHECKING, Iterator, Optional
|
|
4
|
-
|
|
5
|
-
from ._models import ChatCompletionChunk, Message
|
|
6
|
-
|
|
7
|
-
if TYPE_CHECKING:
|
|
8
|
-
from ._client import cloooooo
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class Conversation:
|
|
12
|
-
"""Persistent conversation with automatic history."""
|
|
13
|
-
|
|
14
|
-
def __init__(
|
|
15
|
-
self,
|
|
16
|
-
client: "cloooooo",
|
|
17
|
-
system: Optional[str] = None,
|
|
18
|
-
model: Optional[str] = None,
|
|
19
|
-
) -> None:
|
|
20
|
-
self._client = client
|
|
21
|
-
self._model = model or client.model
|
|
22
|
-
self._history: list[dict] = []
|
|
23
|
-
if system:
|
|
24
|
-
self._history.append({"role": "system", "content": system})
|
|
25
|
-
|
|
26
|
-
def chat(self, message: str, **kwargs) -> str:
|
|
27
|
-
self._history.append({"role": "user", "content": message})
|
|
28
|
-
resp = self._client.chat.completions.create(
|
|
29
|
-
model=self._model,
|
|
30
|
-
messages=self._history,
|
|
31
|
-
**kwargs,
|
|
32
|
-
)
|
|
33
|
-
content = resp.choices[0].message.content
|
|
34
|
-
self._history.append({"role": "assistant", "content": content})
|
|
35
|
-
return content
|
|
36
|
-
|
|
37
|
-
def stream(self, message: str, **kwargs) -> Iterator[str]:
|
|
38
|
-
self._history.append({"role": "user", "content": message})
|
|
39
|
-
full = ""
|
|
40
|
-
for chunk in self._client.chat.completions.create(
|
|
41
|
-
model=self._model,
|
|
42
|
-
messages=self._history,
|
|
43
|
-
stream=True,
|
|
44
|
-
**kwargs,
|
|
45
|
-
):
|
|
46
|
-
token = chunk.choices[0].delta.get("content", "")
|
|
47
|
-
full += token
|
|
48
|
-
yield token
|
|
49
|
-
self._history.append({"role": "assistant", "content": full})
|
|
50
|
-
|
|
51
|
-
def reset(self, keep_system: bool = True) -> None:
|
|
52
|
-
if keep_system and self._history and self._history[0]["role"] == "system":
|
|
53
|
-
self._history = [self._history[0]]
|
|
54
|
-
else:
|
|
55
|
-
self._history = []
|
|
56
|
-
|
|
57
|
-
@property
|
|
58
|
-
def history(self) -> list[dict]:
|
|
59
|
-
return list(self._history)
|
|
60
|
-
|
|
61
|
-
def __enter__(self) -> "Conversation":
|
|
62
|
-
return self
|
|
63
|
-
|
|
64
|
-
def __exit__(self, *_) -> None:
|
|
65
|
-
pass
|
|
@@ -1,75 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import time
|
|
4
|
-
import uuid
|
|
5
|
-
from typing import Iterator, Literal, Optional
|
|
6
|
-
|
|
7
|
-
from pydantic import BaseModel, Field
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
class Message(BaseModel):
|
|
11
|
-
role: Literal["system", "user", "assistant"]
|
|
12
|
-
content: str
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
class ChatCompletionRequest(BaseModel):
|
|
16
|
-
model: str
|
|
17
|
-
messages: list[Message]
|
|
18
|
-
stream: bool = False
|
|
19
|
-
temperature: float = 0.7
|
|
20
|
-
max_tokens: Optional[int] = None
|
|
21
|
-
top_p: float = 0.9
|
|
22
|
-
top_k: int = 20
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
class Choice(BaseModel):
|
|
26
|
-
index: int = 0
|
|
27
|
-
message: Message
|
|
28
|
-
finish_reason: str = "stop"
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
class DeltaChoice(BaseModel):
|
|
32
|
-
index: int = 0
|
|
33
|
-
delta: dict
|
|
34
|
-
finish_reason: Optional[str] = None
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
class Usage(BaseModel):
|
|
38
|
-
prompt_tokens: int = 0
|
|
39
|
-
completion_tokens: int = 0
|
|
40
|
-
total_tokens: int = 0
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
class ChatCompletion(BaseModel):
|
|
44
|
-
id: str = Field(default_factory=lambda: f"chatcmpl-{uuid.uuid4().hex[:12]}")
|
|
45
|
-
object: str = "chat.completion"
|
|
46
|
-
created: int = Field(default_factory=lambda: int(time.time()))
|
|
47
|
-
model: str
|
|
48
|
-
choices: list[Choice]
|
|
49
|
-
usage: Usage
|
|
50
|
-
|
|
51
|
-
@classmethod
|
|
52
|
-
def from_ollama(cls, data: dict, model: str) -> "ChatCompletion":
|
|
53
|
-
return cls(
|
|
54
|
-
model=model,
|
|
55
|
-
choices=[Choice(
|
|
56
|
-
message=Message(
|
|
57
|
-
role=data["message"]["role"],
|
|
58
|
-
content=data["message"]["content"],
|
|
59
|
-
),
|
|
60
|
-
finish_reason="stop" if data.get("done") else "length",
|
|
61
|
-
)],
|
|
62
|
-
usage=Usage(
|
|
63
|
-
prompt_tokens=data.get("prompt_eval_count", 0),
|
|
64
|
-
completion_tokens=data.get("eval_count", 0),
|
|
65
|
-
total_tokens=(data.get("prompt_eval_count", 0) + data.get("eval_count", 0)),
|
|
66
|
-
),
|
|
67
|
-
)
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
class ChatCompletionChunk(BaseModel):
|
|
71
|
-
id: str = Field(default_factory=lambda: f"chatcmpl-{uuid.uuid4().hex[:12]}")
|
|
72
|
-
object: str = "chat.completion.chunk"
|
|
73
|
-
created: int = Field(default_factory=lambda: int(time.time()))
|
|
74
|
-
model: str
|
|
75
|
-
choices: list[DeltaChoice]
|
|
@@ -1,117 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import json
|
|
4
|
-
import time
|
|
5
|
-
import uuid
|
|
6
|
-
from typing import Optional
|
|
7
|
-
|
|
8
|
-
import httpx
|
|
9
|
-
import uvicorn
|
|
10
|
-
from fastapi import Depends, FastAPI, HTTPException, Request
|
|
11
|
-
from fastapi.responses import StreamingResponse
|
|
12
|
-
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
|
|
13
|
-
|
|
14
|
-
from ._models import ChatCompletionRequest
|
|
15
|
-
|
|
16
|
-
_bearer = HTTPBearer(auto_error=False)
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
def build_app(ollama_url: str, api_key: Optional[str]) -> FastAPI:
|
|
20
|
-
app = FastAPI(title="cloooooo API", version="0.1.0")
|
|
21
|
-
|
|
22
|
-
def _check_key(creds: Optional[HTTPAuthorizationCredentials] = Depends(_bearer)):
|
|
23
|
-
if api_key and (not creds or creds.credentials != api_key):
|
|
24
|
-
raise HTTPException(status_code=401, detail="Invalid API key")
|
|
25
|
-
|
|
26
|
-
@app.get("/v1/models", dependencies=[Depends(_check_key)])
|
|
27
|
-
def list_models():
|
|
28
|
-
resp = httpx.get(f"{ollama_url}/api/tags", timeout=10)
|
|
29
|
-
models = [m["name"] for m in resp.json().get("models", [])]
|
|
30
|
-
return {
|
|
31
|
-
"object": "list",
|
|
32
|
-
"data": [{"id": m, "object": "model", "owned_by": "clovis"} for m in models],
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
@app.post("/v1/chat/completions", dependencies=[Depends(_check_key)])
|
|
36
|
-
async def chat_completions(req: ChatCompletionRequest, raw: Request):
|
|
37
|
-
payload = {
|
|
38
|
-
"model": req.model,
|
|
39
|
-
"messages": [m.model_dump() for m in req.messages],
|
|
40
|
-
"stream": req.stream,
|
|
41
|
-
"options": {
|
|
42
|
-
"temperature": req.temperature,
|
|
43
|
-
"top_p": req.top_p,
|
|
44
|
-
**({"num_predict": req.max_tokens} if req.max_tokens else {}),
|
|
45
|
-
},
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
if req.stream:
|
|
49
|
-
return StreamingResponse(
|
|
50
|
-
_stream_ollama(ollama_url, payload, req.model),
|
|
51
|
-
media_type="text/event-stream",
|
|
52
|
-
)
|
|
53
|
-
|
|
54
|
-
async with httpx.AsyncClient() as client:
|
|
55
|
-
resp = await client.post(
|
|
56
|
-
f"{ollama_url}/api/chat", json=payload, timeout=600
|
|
57
|
-
)
|
|
58
|
-
resp.raise_for_status()
|
|
59
|
-
data = resp.json()
|
|
60
|
-
|
|
61
|
-
cid = f"chatcmpl-{uuid.uuid4().hex[:12]}"
|
|
62
|
-
return {
|
|
63
|
-
"id": cid,
|
|
64
|
-
"object": "chat.completion",
|
|
65
|
-
"created": int(time.time()),
|
|
66
|
-
"model": req.model,
|
|
67
|
-
"choices": [{
|
|
68
|
-
"index": 0,
|
|
69
|
-
"message": data["message"],
|
|
70
|
-
"finish_reason": "stop",
|
|
71
|
-
}],
|
|
72
|
-
"usage": {
|
|
73
|
-
"prompt_tokens": data.get("prompt_eval_count", 0),
|
|
74
|
-
"completion_tokens": data.get("eval_count", 0),
|
|
75
|
-
"total_tokens": data.get("prompt_eval_count", 0) + data.get("eval_count", 0),
|
|
76
|
-
},
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
return app
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
async def _stream_ollama(ollama_url: str, payload: dict, model: str):
|
|
83
|
-
cid = f"chatcmpl-{uuid.uuid4().hex[:12]}"
|
|
84
|
-
async with httpx.AsyncClient() as client:
|
|
85
|
-
async with client.stream("POST", f"{ollama_url}/api/chat", json=payload, timeout=600) as resp:
|
|
86
|
-
async for line in resp.aiter_lines():
|
|
87
|
-
if not line:
|
|
88
|
-
continue
|
|
89
|
-
data = json.loads(line)
|
|
90
|
-
token = data.get("message", {}).get("content", "")
|
|
91
|
-
done = data.get("done", False)
|
|
92
|
-
|
|
93
|
-
chunk = {
|
|
94
|
-
"id": cid,
|
|
95
|
-
"object": "chat.completion.chunk",
|
|
96
|
-
"created": int(time.time()),
|
|
97
|
-
"model": model,
|
|
98
|
-
"choices": [{
|
|
99
|
-
"index": 0,
|
|
100
|
-
"delta": {"content": token} if token else {},
|
|
101
|
-
"finish_reason": "stop" if done else None,
|
|
102
|
-
}],
|
|
103
|
-
}
|
|
104
|
-
yield f"data: {json.dumps(chunk)}\n\n"
|
|
105
|
-
if done:
|
|
106
|
-
break
|
|
107
|
-
yield "data: [DONE]\n\n"
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
def start_server(
|
|
111
|
-
host: str = "0.0.0.0",
|
|
112
|
-
port: int = 8000,
|
|
113
|
-
ollama_url: str = "http://localhost:11434",
|
|
114
|
-
api_key: Optional[str] = None,
|
|
115
|
-
) -> None:
|
|
116
|
-
app = build_app(ollama_url=ollama_url, api_key=api_key)
|
|
117
|
-
uvicorn.run(app, host=host, port=port)
|