clovis 0.3.0__tar.gz → 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: clovis
3
- Version: 0.3.0
3
+ Version: 0.4.0
4
4
  Summary: cloooooo — personal LLM client, prompt/context/thinking interface over local Ollama
5
5
  Author: Clovis Sfeir
6
6
  License: MIT
@@ -14,6 +14,7 @@ Classifier: Programming Language :: Python :: 3.11
14
14
  Classifier: Programming Language :: Python :: 3.12
15
15
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
16
16
  Requires-Python: >=3.10
17
+ Requires-Dist: ddgs>=0.1
17
18
  Requires-Dist: fastapi>=0.111
18
19
  Requires-Dist: httpx>=0.27
19
20
  Requires-Dist: pydantic>=2.0
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "clovis"
7
- version = "0.3.0"
7
+ version = "0.4.0"
8
8
  description = "cloooooo — personal LLM client, prompt/context/thinking interface over local Ollama"
9
9
  readme = "README.md"
10
10
  license = { text = "MIT" }
@@ -28,6 +28,7 @@ dependencies = [
28
28
  "typer>=0.12",
29
29
  "pydantic>=2.0",
30
30
  "rich>=13.0",
31
+ "ddgs>=0.1",
31
32
  ]
32
33
 
33
34
  [project.scripts]
Binary file
@@ -1,4 +1,4 @@
1
1
  from ._client import cloooooo
2
2
 
3
3
  __all__ = ["cloooooo"]
4
- __version__ = "0.3.0"
4
+ __version__ = "0.4.0"
@@ -8,14 +8,13 @@ import httpx
8
8
 
9
9
  _SERVER_URL = "https://cloooooo.com" # API publique par défaut
10
10
  _OLLAMA_URL = "http://localhost:11434" # fallback local
11
- _MODEL = "qwen3:14b"
11
+ _MODEL = "qwen3-32b"
12
12
 
13
13
 
14
14
  def _build_messages(
15
15
  prompt: str,
16
16
  context: Optional[str],
17
17
  negative_prompt: Optional[str],
18
- thinking: bool,
19
18
  history: list[dict],
20
19
  ) -> list[dict]:
21
20
  system_parts = []
@@ -23,8 +22,6 @@ def _build_messages(
23
22
  system_parts.append(context)
24
23
  if negative_prompt:
25
24
  system_parts.append(f"Évite absolument dans ta réponse : {negative_prompt}")
26
- if thinking:
27
- system_parts.append("Réfléchis étape par étape avant de répondre.")
28
25
 
29
26
  messages = []
30
27
  if system_parts:
@@ -41,15 +38,15 @@ class Conversation:
41
38
  self._history: list[dict] = []
42
39
 
43
40
  def __call__(self, prompt: str, *, negative_prompt: Optional[str] = None, thinking: bool = False) -> str:
44
- messages = _build_messages(prompt, self._context, negative_prompt, thinking, self._history)
45
- reply = self._ai._send(messages)
41
+ messages = _build_messages(prompt, self._context, negative_prompt, self._history)
42
+ reply = self._ai._send(messages, think=thinking)
46
43
  self._history += [{"role": "user", "content": prompt}, {"role": "assistant", "content": reply}]
47
44
  return reply
48
45
 
49
46
  def stream(self, prompt: str, *, negative_prompt: Optional[str] = None, thinking: bool = False) -> Iterator[str]:
50
- messages = _build_messages(prompt, self._context, negative_prompt, thinking, self._history)
47
+ messages = _build_messages(prompt, self._context, negative_prompt, self._history)
51
48
  full = ""
52
- for token in self._ai._stream(messages):
49
+ for token in self._ai._stream(messages, think=thinking):
53
50
  full += token
54
51
  yield token
55
52
  self._history += [{"role": "user", "content": prompt}, {"role": "assistant", "content": full}]
@@ -96,22 +93,86 @@ class cloooooo:
96
93
 
97
94
  self._http = httpx.Client()
98
95
 
99
- def __call__(self, prompt: str, *, negative_prompt: Optional[str] = None, thinking: bool = False, context: Optional[str] = None) -> str:
96
+ def __call__(self, prompt: str, *, negative_prompt: Optional[str] = None, thinking: bool = False, context: Optional[str] = None, search: bool = False) -> str:
100
97
  if self._mode == "server":
101
- return self._call_server(prompt, negative_prompt=negative_prompt, thinking=thinking, context=context)
102
- messages = _build_messages(prompt, context, negative_prompt, thinking, [])
103
- return self._send(messages)
104
-
105
- def stream(self, prompt: str, *, negative_prompt: Optional[str] = None, thinking: bool = False, context: Optional[str] = None) -> Iterator[str]:
98
+ return self._call_server(prompt, negative_prompt=negative_prompt, thinking=thinking, context=context, search=search)
99
+ if search:
100
+ from ._search import web_search
101
+ extra = web_search(prompt)
102
+ if extra:
103
+ context = f"{extra}\n\n{context}" if context else extra
104
+ messages = _build_messages(prompt, context, negative_prompt, [])
105
+ return self._send(messages, think=thinking)
106
+
107
+ def stream(self, prompt: str, *, negative_prompt: Optional[str] = None, thinking: bool = False, context: Optional[str] = None, search: bool = False) -> Iterator[str]:
106
108
  if self._mode == "server":
107
- yield from self._stream_server(prompt, negative_prompt=negative_prompt, thinking=thinking, context=context)
109
+ yield from self._stream_server(prompt, negative_prompt=negative_prompt, thinking=thinking, context=context, search=search)
108
110
  return
109
- messages = _build_messages(prompt, context, negative_prompt, thinking, [])
110
- yield from self._stream(messages)
111
+ if search:
112
+ from ._search import web_search
113
+ extra = web_search(prompt)
114
+ if extra:
115
+ context = f"{extra}\n\n{context}" if context else extra
116
+ messages = _build_messages(prompt, context, negative_prompt, [])
117
+ yield from self._stream(messages, think=thinking)
111
118
 
112
119
  def conversation(self, context: Optional[str] = None) -> Conversation:
113
120
  return Conversation(self, context=context)
114
121
 
122
+ def deep_think(
123
+ self,
124
+ prompt: str,
125
+ *,
126
+ max_iterations: int = 4,
127
+ searches_per_step: int = 3,
128
+ on_progress: "Optional[callable]" = None,
129
+ ) -> str:
130
+ if self._mode == "server":
131
+ resp = self._http.post(
132
+ f"{self._url}/deep_think",
133
+ json={"prompt": prompt, "max_iterations": max_iterations, "searches_per_step": searches_per_step},
134
+ timeout=600,
135
+ )
136
+ resp.raise_for_status()
137
+ return resp.json()["response"]
138
+ from ._deep_think import deep_think as _dt
139
+ return _dt(
140
+ prompt,
141
+ ollama_url=self._url,
142
+ model=self._model,
143
+ max_iterations=max_iterations,
144
+ searches_per_step=searches_per_step,
145
+ on_progress=on_progress,
146
+ )
147
+
148
+ def deep_think_stream(
149
+ self,
150
+ prompt: str,
151
+ *,
152
+ max_iterations: int = 4,
153
+ searches_per_step: int = 3,
154
+ ) -> Iterator[str]:
155
+ if self._mode == "server":
156
+ with self._http.stream(
157
+ "POST",
158
+ f"{self._url}/deep_think",
159
+ json={"prompt": prompt, "max_iterations": max_iterations, "searches_per_step": searches_per_step, "stream": True},
160
+ timeout=600,
161
+ ) as resp:
162
+ resp.raise_for_status()
163
+ for chunk in resp.iter_text():
164
+ if chunk:
165
+ yield chunk
166
+ return
167
+ from ._deep_think import deep_think_stream as _dts
168
+ yield from _dts(
169
+ prompt,
170
+ ollama_url=self._url,
171
+ model=self._model,
172
+ max_iterations=max_iterations,
173
+ searches_per_step=searches_per_step,
174
+ )
175
+
115
176
  # --- mode server (cloooooo.com/ia) ---
116
177
 
117
178
  def _call_server(self, prompt: str, **kwargs) -> str:
@@ -137,20 +198,20 @@ class cloooooo:
137
198
 
138
199
  # --- mode ollama (local) ---
139
200
 
140
- def _send(self, messages: list[dict]) -> str:
201
+ def _send(self, messages: list[dict], think: bool = False) -> str:
141
202
  resp = self._http.post(
142
203
  f"{self._url}/api/chat",
143
- json={"model": self._model, "messages": messages, "stream": False, "think": False},
204
+ json={"model": self._model, "messages": messages, "stream": False, "think": think},
144
205
  timeout=120,
145
206
  )
146
207
  resp.raise_for_status()
147
208
  return resp.json()["message"]["content"]
148
209
 
149
- def _stream(self, messages: list[dict]) -> Iterator[str]:
210
+ def _stream(self, messages: list[dict], think: bool = False) -> Iterator[str]:
150
211
  with self._http.stream(
151
212
  "POST",
152
213
  f"{self._url}/api/chat",
153
- json={"model": self._model, "messages": messages, "stream": True, "think": False},
214
+ json={"model": self._model, "messages": messages, "stream": True, "think": think},
154
215
  timeout=120,
155
216
  ) as resp:
156
217
  resp.raise_for_status()
@@ -0,0 +1,288 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import re
5
+ from typing import Iterator
6
+
7
+ import httpx
8
+
9
+ _PLAN_SYSTEM = """Tu es un assistant de recherche expert.
10
+ Réponds UNIQUEMENT en JSON valide, sans markdown, sans explication."""
11
+
12
+ _REFLECT_SYSTEM = """Tu es un analyste de recherche critique.
13
+ Réponds UNIQUEMENT en JSON valide, sans markdown, sans explication."""
14
+
15
+ _SYNTH_SYSTEM = """Tu es un expert en synthèse d'informations.
16
+ Tu dois produire une réponse complète, structurée et approfondie en te basant uniquement sur les recherches fournies."""
17
+
18
+
19
+ def _llm_json(prompt: str, system: str, ollama_url: str, model: str, timeout: int = 60) -> dict:
20
+ resp = httpx.post(
21
+ f"{ollama_url}/api/chat",
22
+ json={
23
+ "model": model,
24
+ "messages": [
25
+ {"role": "system", "content": system},
26
+ {"role": "user", "content": prompt},
27
+ ],
28
+ "stream": False,
29
+ "think": False,
30
+ "format": "json",
31
+ },
32
+ timeout=timeout,
33
+ )
34
+ resp.raise_for_status()
35
+ raw = resp.json()["message"]["content"]
36
+ # strip markdown fences if model wraps anyway
37
+ raw = re.sub(r"^```(?:json)?\n?", "", raw.strip())
38
+ raw = re.sub(r"\n?```$", "", raw.strip())
39
+ return json.loads(raw)
40
+
41
+
42
+ def _llm_text(messages: list[dict], ollama_url: str, model: str, timeout: int = 120, think: bool = False) -> str:
43
+ resp = httpx.post(
44
+ f"{ollama_url}/api/chat",
45
+ json={"model": model, "messages": messages, "stream": False, "think": think},
46
+ timeout=timeout,
47
+ )
48
+ resp.raise_for_status()
49
+ return resp.json()["message"]["content"]
50
+
51
+
52
+ def _llm_stream(messages: list[dict], ollama_url: str, model: str, think: bool = False) -> Iterator[str]:
53
+ with httpx.stream(
54
+ "POST",
55
+ f"{ollama_url}/api/chat",
56
+ json={"model": model, "messages": messages, "stream": True, "think": think},
57
+ timeout=300,
58
+ ) as resp:
59
+ resp.raise_for_status()
60
+ for line in resp.iter_lines():
61
+ if not line:
62
+ continue
63
+ data = json.loads(line)
64
+ token = data.get("message", {}).get("content", "")
65
+ if token:
66
+ yield token
67
+ if data.get("done"):
68
+ break
69
+
70
+
71
+ def _plan(prompt: str, n_queries: int, ollama_url: str, model: str) -> list[str]:
72
+ """Génère n_queries requêtes de recherche pour répondre au prompt."""
73
+ result = _llm_json(
74
+ f"""Question à approfondir : {prompt}
75
+
76
+ Génère exactement {n_queries} requêtes de recherche web complémentaires et diversifiées pour rassembler toutes les informations nécessaires.
77
+
78
+ Réponds avec ce JSON :
79
+ {{"queries": ["requête1", "requête2", "requête3"]}}""",
80
+ _PLAN_SYSTEM,
81
+ ollama_url,
82
+ model,
83
+ )
84
+ return result.get("queries", [])[:n_queries]
85
+
86
+
87
+ def _reflect(
88
+ prompt: str,
89
+ accumulated: list[str],
90
+ iteration: int,
91
+ max_iterations: int,
92
+ n_queries: int,
93
+ ollama_url: str,
94
+ model: str,
95
+ ) -> tuple[bool, list[str]]:
96
+ """Analyse les lacunes et décide si la recherche est suffisante."""
97
+ context_summary = "\n\n---\n\n".join(accumulated[-6:]) # garde les 6 derniers blocs
98
+ result = _llm_json(
99
+ f"""Question initiale : {prompt}
100
+
101
+ Informations collectées jusqu'ici (itération {iteration}/{max_iterations}) :
102
+ {context_summary}
103
+
104
+ Analyse :
105
+ 1. Est-ce qu'on a suffisamment d'informations pour répondre complètement et avec précision ?
106
+ 2. Quelles lacunes importantes subsistent ?
107
+ 3. Génère {n_queries} nouvelles requêtes pour combler ces lacunes.
108
+
109
+ Réponds avec ce JSON :
110
+ {{"satisfied": true/false, "missing": "description des lacunes", "follow_up_queries": ["q1", "q2", "q3"]}}""",
111
+ _REFLECT_SYSTEM,
112
+ ollama_url,
113
+ model,
114
+ timeout=90,
115
+ )
116
+ satisfied = result.get("satisfied", False)
117
+ queries = result.get("follow_up_queries", [])[:n_queries]
118
+ return satisfied, queries
119
+
120
+
121
+ def _extract(
122
+ prompt: str, search_results: str, ollama_url: str, model: str
123
+ ) -> str:
124
+ """Extrait et résume les informations pertinentes des résultats de recherche."""
125
+ return _llm_text(
126
+ [
127
+ {
128
+ "role": "system",
129
+ "content": "Tu es un extracteur d'information précis. Résume uniquement ce qui est pertinent pour la question.",
130
+ },
131
+ {
132
+ "role": "user",
133
+ "content": f"""Question : {prompt}
134
+
135
+ Résultats de recherche :
136
+ {search_results}
137
+
138
+ Extrais et résume les informations clés et pertinentes en 3-5 points.""",
139
+ },
140
+ ],
141
+ ollama_url,
142
+ model,
143
+ timeout=90,
144
+ )
145
+
146
+
147
+ def deep_think(
148
+ prompt: str,
149
+ ollama_url: str = "http://localhost:11434",
150
+ model: str = "qwen3-32b",
151
+ max_iterations: int = 4,
152
+ searches_per_step: int = 3,
153
+ on_progress: "callable | None" = None,
154
+ ) -> str:
155
+ """
156
+ Recherche approfondie multi-itérations avec accès internet.
157
+
158
+ Boucle : plan → search → extract → reflect → (repeat) → synthesize
159
+ """
160
+ from ._search import web_search
161
+
162
+ def _log(msg: str):
163
+ if on_progress:
164
+ on_progress(msg)
165
+
166
+ all_context: list[str] = []
167
+
168
+ # Étape 1 : Planification
169
+ _log(f"[plan] Génération du plan de recherche...")
170
+ queries = _plan(prompt, searches_per_step, ollama_url, model)
171
+ _log(f"[plan] {len(queries)} requêtes générées : {queries}")
172
+
173
+ for iteration in range(1, max_iterations + 1):
174
+ _log(f"[iter {iteration}/{max_iterations}] Recherche en cours...")
175
+
176
+ # Étape 2 : Recherche
177
+ raw_results = []
178
+ for q in queries:
179
+ _log(f"[search] → {q}")
180
+ result = web_search(q, max_results=5)
181
+ if result:
182
+ raw_results.append(result)
183
+
184
+ if not raw_results:
185
+ _log("[search] Aucun résultat trouvé, arrêt.")
186
+ break
187
+
188
+ combined = "\n\n".join(raw_results)
189
+
190
+ # Étape 3 : Extraction
191
+ _log(f"[extract] Analyse des résultats...")
192
+ summary = _extract(prompt, combined, ollama_url, model)
193
+ all_context.append(f"=== Itération {iteration} ===\n{summary}")
194
+ _log(f"[extract] ✓ {len(summary)} chars extraits")
195
+
196
+ # Étape 4 : Réflexion (pas à la dernière itération)
197
+ if iteration < max_iterations:
198
+ _log(f"[reflect] Analyse des lacunes...")
199
+ satisfied, queries = _reflect(
200
+ prompt, all_context, iteration, max_iterations, searches_per_step, ollama_url, model
201
+ )
202
+ _log(f"[reflect] Satisfait={satisfied}, nouvelles requêtes={queries}")
203
+ if satisfied:
204
+ _log(f"[reflect] Recherche jugée complète à l'itération {iteration}.")
205
+ break
206
+
207
+ # Étape 5 : Synthèse finale
208
+ _log(f"[synthesize] Génération de la réponse finale...")
209
+ full_context = "\n\n".join(all_context)
210
+ final_messages = [
211
+ {"role": "system", "content": _SYNTH_SYSTEM},
212
+ {
213
+ "role": "user",
214
+ "content": f"""Question : {prompt}
215
+
216
+ Résultats de recherche approfondis ({len(all_context)} itérations) :
217
+ {full_context}
218
+
219
+ Fournis une réponse complète, structurée, sourcée et approfondie à cette question.
220
+ Utilise des titres, des points clés, et cite les faits importants trouvés dans la recherche.""",
221
+ },
222
+ ]
223
+ answer = _llm_text(final_messages, ollama_url, model, timeout=300, think=True)
224
+ _log(f"[synthesize] ✓ Réponse générée ({len(answer)} chars)")
225
+ return answer
226
+
227
+
228
+ def deep_think_stream(
229
+ prompt: str,
230
+ ollama_url: str = "http://localhost:11434",
231
+ model: str = "qwen3:14b",
232
+ max_iterations: int = 4,
233
+ searches_per_step: int = 3,
234
+ ) -> Iterator[str]:
235
+ """
236
+ Version streaming : yield des tokens de progression puis la réponse finale.
237
+ Les lignes commençant par '[' sont des logs de progression.
238
+ """
239
+ from ._search import web_search
240
+
241
+ all_context: list[str] = []
242
+
243
+ yield f"[plan] Génération du plan de recherche...\n"
244
+ queries = _plan(prompt, searches_per_step, ollama_url, model)
245
+ yield f"[plan] Requêtes : {', '.join(queries)}\n"
246
+
247
+ for iteration in range(1, max_iterations + 1):
248
+ yield f"[iter {iteration}/{max_iterations}] Recherche...\n"
249
+
250
+ raw_results = []
251
+ for q in queries:
252
+ yield f"[search] → {q}\n"
253
+ result = web_search(q, max_results=5)
254
+ if result:
255
+ raw_results.append(result)
256
+
257
+ if not raw_results:
258
+ break
259
+
260
+ combined = "\n\n".join(raw_results)
261
+ yield f"[extract] Analyse...\n"
262
+ summary = _extract(prompt, combined, ollama_url, model)
263
+ all_context.append(f"=== Itération {iteration} ===\n{summary}")
264
+
265
+ if iteration < max_iterations:
266
+ satisfied, queries = _reflect(
267
+ prompt, all_context, iteration, max_iterations, searches_per_step, ollama_url, model
268
+ )
269
+ yield f"[reflect] Satisfait={satisfied}\n"
270
+ if satisfied:
271
+ break
272
+
273
+ yield f"[synthesize] Génération de la réponse finale...\n\n"
274
+
275
+ full_context = "\n\n".join(all_context)
276
+ final_messages = [
277
+ {"role": "system", "content": _SYNTH_SYSTEM},
278
+ {
279
+ "role": "user",
280
+ "content": f"""Question : {prompt}
281
+
282
+ Résultats de recherche approfondis ({len(all_context)} itérations) :
283
+ {full_context}
284
+
285
+ Fournis une réponse complète, structurée, sourcée et approfondie.""",
286
+ },
287
+ ]
288
+ yield from _llm_stream(final_messages, ollama_url, model, think=True)
@@ -0,0 +1,21 @@
1
+ from __future__ import annotations
2
+
3
+
4
+ def web_search(query: str, max_results: int = 4) -> str:
5
+ """Retourne un bloc de contexte avec les résultats DuckDuckGo."""
6
+ try:
7
+ from ddgs import DDGS
8
+ except ImportError:
9
+ return ""
10
+
11
+ try:
12
+ with DDGS() as ddgs:
13
+ results = list(ddgs.text(query, max_results=max_results))
14
+ except Exception:
15
+ return ""
16
+
17
+ if not results:
18
+ return ""
19
+
20
+ lines = [f"- {r['title']}: {r['body']}" for r in results]
21
+ return "Résultats de recherche web (utilise ces informations pour répondre) :\n" + "\n".join(lines)
@@ -14,17 +14,25 @@ from ._client import cloooooo
14
14
  _bearer = HTTPBearer(auto_error=False)
15
15
 
16
16
 
17
+ class DeepThinkRequest(BaseModel):
18
+ prompt: str
19
+ max_iterations: int = 4
20
+ searches_per_step: int = 3
21
+ stream: bool = False
22
+
23
+
17
24
  class IARequest(BaseModel):
18
25
  prompt: str
19
26
  negative_prompt: Optional[str] = None
20
27
  thinking: bool = False
21
28
  context: Optional[str] = None
22
29
  stream: bool = False
30
+ search: bool = False
23
31
 
24
32
 
25
33
  def build_app(api_key: Optional[str] = None) -> FastAPI:
26
- app = FastAPI(title="cloooooo", version="0.1.0")
27
- ai = cloooooo()
34
+ app = FastAPI(title="cloooooo", version="0.4.0")
35
+ ai = cloooooo(local=True)
28
36
 
29
37
  def _check_key(creds: Optional[HTTPAuthorizationCredentials] = Depends(_bearer)):
30
38
  if api_key and (not creds or creds.credentials != api_key):
@@ -32,10 +40,17 @@ def build_app(api_key: Optional[str] = None) -> FastAPI:
32
40
 
33
41
  @app.post("/ia", dependencies=[Depends(_check_key)])
34
42
  async def ia(req: IARequest):
43
+ context = req.context
44
+ if req.search:
45
+ from ._search import web_search
46
+ search_ctx = web_search(req.prompt)
47
+ if search_ctx:
48
+ context = f"{search_ctx}\n\n{context}" if context else search_ctx
49
+
35
50
  kwargs = dict(
36
51
  negative_prompt=req.negative_prompt,
37
52
  thinking=req.thinking,
38
- context=req.context,
53
+ context=context,
39
54
  )
40
55
  if req.stream:
41
56
  def generate():
@@ -45,9 +60,23 @@ def build_app(api_key: Optional[str] = None) -> FastAPI:
45
60
 
46
61
  return {"response": ai(req.prompt, **kwargs)}
47
62
 
63
+ @app.post("/deep_think", dependencies=[Depends(_check_key)])
64
+ async def deep_think_endpoint(req: DeepThinkRequest):
65
+ from ._deep_think import deep_think as _dt, deep_think_stream as _dts
66
+ ollama_url = ai._url
67
+ model = ai._model
68
+ if req.stream:
69
+ def generate():
70
+ yield from _dts(req.prompt, ollama_url=ollama_url, model=model,
71
+ max_iterations=req.max_iterations, searches_per_step=req.searches_per_step)
72
+ return StreamingResponse(generate(), media_type="text/plain")
73
+ answer = _dt(req.prompt, ollama_url=ollama_url, model=model,
74
+ max_iterations=req.max_iterations, searches_per_step=req.searches_per_step)
75
+ return {"response": answer}
76
+
48
77
  @app.get("/")
49
78
  def root():
50
- return {"status": "ok", "endpoint": "/ia"}
79
+ return {"status": "ok", "endpoints": ["/ia", "/deep_think"]}
51
80
 
52
81
  return app
53
82
 
File without changes
File without changes
File without changes