agent-devkit 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +22 -11
- package/package.json +1 -1
- package/runtime/README.md +20 -10
- package/runtime/cli/README.md +14 -6
- package/runtime/cli/aikit/__init__.py +1 -1
- package/runtime/cli/aikit/app_home.py +1 -0
- package/runtime/cli/aikit/cli_parser.py +1 -1
- package/runtime/cli/aikit/embedded_mini_brain.py +351 -0
- package/runtime/cli/aikit/interactive_wizard.py +6 -8
- package/runtime/cli/aikit/llm.py +28 -2
- package/runtime/cli/aikit/local_llm.py +19 -4
- package/runtime/cli/aikit/local_llm_operator.py +15 -5
- package/runtime/cli/aikit/mini_brain.py +56 -44
- package/runtime/cli/aikit/model_router.py +42 -9
- package/runtime/cli/aikit/natural_prompt_runtime.py +69 -1
- package/runtime/cli/aikit/onboarding.py +3 -3
- package/runtime/cli/aikit/review_gate.py +14 -2
- package/runtime/models/qwen2.5-0.5b-instruct/manifest.json +30 -0
- package/runtime/scripts/release-catalog-snapshot.json +1 -1
package/README.md
CHANGED
|
@@ -30,7 +30,7 @@ agent doctor
|
|
|
30
30
|
Expected version for this release:
|
|
31
31
|
|
|
32
32
|
```text
|
|
33
|
-
agent 0.3.
|
|
33
|
+
agent 0.3.1
|
|
34
34
|
```
|
|
35
35
|
|
|
36
36
|
## Quick Start
|
|
@@ -45,8 +45,12 @@ agent llm list
|
|
|
45
45
|
agent commands list
|
|
46
46
|
```
|
|
47
47
|
|
|
48
|
-
Agent DevKit `v0.3.
|
|
49
|
-
|
|
48
|
+
Agent DevKit `v0.3.1` also includes the embedded Qwen2.5-0.5B mini-brain
|
|
49
|
+
contract for local bootstrap conversations without Ollama, Claude, Codex or API
|
|
50
|
+
keys. The npm package stays small; `agent setup mini-brain --yes` downloads the
|
|
51
|
+
GGUF into `.agent-devkit/models` after explicit opt-in.
|
|
52
|
+
The `v0.3.0` deterministic runtime discovery and integration commands remain
|
|
53
|
+
available:
|
|
50
54
|
|
|
51
55
|
```bash
|
|
52
56
|
agent roadmap
|
|
@@ -73,14 +77,15 @@ Run a natural-language task:
|
|
|
73
77
|
agent "analise o problema relatado no card 9900"
|
|
74
78
|
```
|
|
75
79
|
|
|
76
|
-
Natural-language mode
|
|
77
|
-
|
|
78
|
-
|
|
80
|
+
Natural-language mode can start with the embedded mini-brain. Stronger
|
|
81
|
+
coordinator/reviewer backends remain optional for higher-level work.
|
|
82
|
+
Deterministic commands such as `agent agents list`, `agent capabilities list`,
|
|
83
|
+
`agent doctor`, `agent provider` and `agent run` do not require an external LLM.
|
|
79
84
|
|
|
80
85
|
Running `agent` without arguments starts the local onboarding status and wizard:
|
|
81
86
|
memory, personality, LLM backends, Ollama, toolchain, sources and next actions.
|
|
82
|
-
Use `agent onboard minimal` for identity, coordinator LLM,
|
|
83
|
-
|
|
87
|
+
Use `agent onboard minimal` for identity, optional coordinator LLM, installable
|
|
88
|
+
mini-brain and local memory. Use `agent onboard complete` to include toolchain,
|
|
84
89
|
providers/sources, specialist catalog, local automations, tasks, notifications,
|
|
85
90
|
knowledge and shared memory. Both commands return plans; external installs
|
|
86
91
|
still require explicit opt-in.
|
|
@@ -97,6 +102,7 @@ Useful operational commands:
|
|
|
97
102
|
agent plan "analyze Azure card 7914"
|
|
98
103
|
agent execute --dry-run "summarize these logs"
|
|
99
104
|
agent workflow install daily-pr-review --dry-run
|
|
105
|
+
agent setup mini-brain --yes
|
|
100
106
|
agent local-llm doctor
|
|
101
107
|
agent local-llm install qwen3:0.6b --dry-run
|
|
102
108
|
agent skill create my-skill --description "Local skill"
|
|
@@ -224,9 +230,11 @@ agent llm configure openrouter --api-key-env OPENROUTER_API_KEY --model openai/g
|
|
|
224
230
|
agent llm doctor openrouter
|
|
225
231
|
```
|
|
226
232
|
|
|
227
|
-
###
|
|
233
|
+
### Embedded mini-brain and Ollama local backend
|
|
228
234
|
|
|
229
235
|
```bash
|
|
236
|
+
agent setup mini-brain --yes
|
|
237
|
+
agent local-llm doctor
|
|
230
238
|
agent ollama status
|
|
231
239
|
agent ollama models
|
|
232
240
|
agent ollama pull qwen3:0.6b --dry-run
|
|
@@ -236,8 +244,11 @@ agent llm configure ollama --base-url http://localhost:11434/v1 --model qwen3:0.
|
|
|
236
244
|
agent llm doctor ollama
|
|
237
245
|
```
|
|
238
246
|
|
|
239
|
-
|
|
240
|
-
|
|
247
|
+
Agent DevKit includes an installable embedded mini-brain for initial
|
|
248
|
+
conversation, onboarding and setup without external authentication. The GGUF is
|
|
249
|
+
downloaded to `.agent-devkit/models` only after opt-in. Ollama is still treated
|
|
250
|
+
as an optional operational worker for repetitive local tasks. Codex and Claude
|
|
251
|
+
remain the preferred coordinators and reviewers for high-level planning,
|
|
241
252
|
software changes, documents, automation decisions and final review.
|
|
242
253
|
|
|
243
254
|
### Switch or override the backend
|
package/package.json
CHANGED
package/runtime/README.md
CHANGED
|
@@ -65,8 +65,8 @@ agent secrets doctor
|
|
|
65
65
|
agent mcp tools
|
|
66
66
|
```
|
|
67
67
|
|
|
68
|
-
`agent onboard minimal` planeja o setup essencial: identidade, coordenador LLM
|
|
69
|
-
mini-cerebro
|
|
68
|
+
`agent onboard minimal` planeja o setup essencial: identidade, coordenador LLM
|
|
69
|
+
opcional, mini-cerebro local instalavel sob demanda e memoria local. `agent onboard complete`
|
|
70
70
|
inclui tambem toolchain, providers/sources, catalogo de agentes, automacoes
|
|
71
71
|
locais, tarefas, notificacoes, knowledge e memoria compartilhada. Ambos
|
|
72
72
|
retornam plano deterministico; instalacoes externas continuam exigindo opt-in.
|
|
@@ -247,14 +247,23 @@ Uso:
|
|
|
247
247
|
agent "roteie este pedido para o agente especialista adequado"
|
|
248
248
|
```
|
|
249
249
|
|
|
250
|
-
###
|
|
250
|
+
### Mini cerebro embarcado e Ollama local
|
|
251
251
|
|
|
252
|
-
O Agent DevKit
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
252
|
+
O Agent DevKit vem com um mini cerebro local baseado no contrato
|
|
253
|
+
`Qwen/Qwen2.5-0.5B-Instruct` para conversa inicial, onboarding, setup e tarefas
|
|
254
|
+
simples sem depender de Claude, Codex, API externa ou Ollama. O pacote npm
|
|
255
|
+
inclui o manifest do modelo; o GGUF e baixado para `.agent-devkit/models` sob
|
|
256
|
+
demanda com `agent setup mini-brain --yes`.
|
|
257
|
+
|
|
258
|
+
Ollama continua suportado como pool opcional de workers locais. O Agent DevKit
|
|
259
|
+
consegue diagnosticar Ollama, listar modelos, planejar pull e usar o backend
|
|
260
|
+
local como trabalhador operacional quando ele estiver configurado ou tiver
|
|
261
|
+
modelos instalados. Claude/Codex continuam sendo os coordenadores e revisores
|
|
262
|
+
preferenciais para decisao, especificacao e entrega final.
|
|
256
263
|
|
|
257
264
|
```bash
|
|
265
|
+
agent setup mini-brain --yes
|
|
266
|
+
agent local-llm doctor
|
|
258
267
|
agent ollama status
|
|
259
268
|
agent ollama models
|
|
260
269
|
agent ollama pull qwen3:0.6b --dry-run
|
|
@@ -325,9 +334,10 @@ executa a task primaria pelo runner existente e revisa a conclusao pelo
|
|
|
325
334
|
`review_gate`.
|
|
326
335
|
|
|
327
336
|
Para tarefas operacionais como resumo, classificacao, extracao e normalizacao,
|
|
328
|
-
o runtime pode
|
|
329
|
-
|
|
330
|
-
|
|
337
|
+
o runtime pode usar o mini cerebro embarcado para bootstrap/conversa simples ou
|
|
338
|
+
delegar uma subtarefa limitada ao `local-llm-operator` usando Ollama quando
|
|
339
|
+
disponivel. O resultado local aparece em `local_llm_execution` e e usado apenas
|
|
340
|
+
como contexto de apoio pelo coordenador principal.
|
|
331
341
|
|
|
332
342
|
Quando `review_gate.required = true`, o Agent DevKit exige uma segunda revisao
|
|
333
343
|
concreta pelo `execution-reviewer`, preferindo `claude-code` ou `codex-cli`.
|
package/runtime/cli/README.md
CHANGED
|
@@ -209,8 +209,8 @@ agent onboard minimal
|
|
|
209
209
|
agent onboard complete
|
|
210
210
|
```
|
|
211
211
|
|
|
212
|
-
`minimal` cobre identidade, coordenador LLM, mini-cerebro
|
|
213
|
-
|
|
212
|
+
`minimal` cobre identidade, coordenador LLM opcional, mini-cerebro local
|
|
213
|
+
instalavel sob demanda e memoria local. `complete` inclui tambem toolchain, providers/sources,
|
|
214
214
|
catalogo de agentes, automacoes locais, tarefas, notificacoes, knowledge e
|
|
215
215
|
memoria compartilhada. Instalacoes externas continuam exigindo opt-in.
|
|
216
216
|
|
|
@@ -232,9 +232,12 @@ remoto continua exigindo provider, criptografia e opt-in explicito.
|
|
|
232
232
|
|
|
233
233
|
## Backends LLM
|
|
234
234
|
|
|
235
|
-
O modo `agent "<prompt>"`
|
|
235
|
+
O modo `agent "<prompt>"` consegue conversar e orientar setup com o mini cerebro
|
|
236
|
+
local depois que ele for instalado com opt-in. Para coordenacao/revisao mais forte, o Agent DevKit suporta estas
|
|
236
237
|
familias de backend:
|
|
237
238
|
|
|
239
|
+
- Mini cerebro local instalavel (`embedded-mini-brain`) para onboarding, setup e
|
|
240
|
+
conversa simples sem autenticacao externa.
|
|
238
241
|
- CLIs oficiais autenticadas fora do Agent DevKit (`codex-cli` e
|
|
239
242
|
`claude-code`).
|
|
240
243
|
- APIs configuradas por referencia a variavel de ambiente (`openai`,
|
|
@@ -321,6 +324,8 @@ agent llm doctor openrouter
|
|
|
321
324
|
### Ollama local
|
|
322
325
|
|
|
323
326
|
```bash
|
|
327
|
+
agent setup mini-brain --yes
|
|
328
|
+
agent local-llm doctor
|
|
324
329
|
agent ollama status
|
|
325
330
|
agent ollama models
|
|
326
331
|
agent ollama pull qwen3:0.6b --dry-run
|
|
@@ -330,12 +335,15 @@ agent llm configure ollama --base-url http://localhost:11434/v1 --model qwen3:0.
|
|
|
330
335
|
agent llm doctor ollama
|
|
331
336
|
```
|
|
332
337
|
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
338
|
+
O mini cerebro embarcado e a base inicial para conversa/setup sem dependencia
|
|
339
|
+
externa. Ollama e tratado como executor operacional local opcional. Codex e
|
|
340
|
+
Claude continuam como coordenadores/revisores preferenciais para decisao,
|
|
341
|
+
especificacao, codigo, documentos, automacoes e fechamento de entrega.
|
|
336
342
|
|
|
337
343
|
Backends suportados no MVP:
|
|
338
344
|
|
|
345
|
+
- `embedded-mini-brain`: mini cerebro local embarcado para bootstrap e tarefas
|
|
346
|
+
simples.
|
|
339
347
|
- `openai`: API OpenAI ou endpoint OpenAI-compatible.
|
|
340
348
|
- `anthropic`: API Anthropic.
|
|
341
349
|
- `openrouter`: API OpenRouter.
|
|
@@ -246,7 +246,7 @@ def build_parser(prog: str | None = None) -> argparse.ArgumentParser:
|
|
|
246
246
|
setup_parser.add_argument("--json", action="store_true", default=argparse.SUPPRESS, help=argparse.SUPPRESS)
|
|
247
247
|
setup_parser.add_argument("--dry-run", action="store_true", help="show setup plan without installing external tools")
|
|
248
248
|
setup_parser.add_argument("--yes", action="store_true", help="confirm setup actions")
|
|
249
|
-
setup_parser.add_argument("--set-default", action="store_true", help="make the mini-brain
|
|
249
|
+
setup_parser.add_argument("--set-default", action="store_true", help="make the embedded mini-brain the default LLM")
|
|
250
250
|
setup_parser.add_argument("action", nargs="?", default="plan", choices=["plan", "personality", "mini-brain"])
|
|
251
251
|
|
|
252
252
|
alias_parser = subparsers.add_parser("alias", help="manage local command aliases for agent")
|
|
@@ -0,0 +1,351 @@
|
|
|
1
|
+
"""Embedded mini-brain runtime backed by an on-demand GGUF model."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import hashlib
|
|
6
|
+
import os
|
|
7
|
+
import shutil
|
|
8
|
+
import subprocess
|
|
9
|
+
import sys
|
|
10
|
+
import urllib.error
|
|
11
|
+
import urllib.request
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
from cli.aikit.app_home import app_path, ensure_app_home
|
|
16
|
+
from cli.aikit.identity import identity_system_prompt
|
|
17
|
+
from cli.aikit.runtime_paths import ROOT
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
EMBEDDED_BACKEND_ID = "embedded-mini-brain"
|
|
21
|
+
EMBEDDED_MODEL_ID = "Qwen/Qwen2.5-0.5B-Instruct"
|
|
22
|
+
EMBEDDED_MODEL_NAME = "qwen2.5-0.5b-instruct"
|
|
23
|
+
EMBEDDED_MODEL_SOURCE = "https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct-GGUF/resolve/main/qwen2.5-0.5b-instruct-q2_k.gguf"
|
|
24
|
+
EMBEDDED_MODEL_SIZE_BYTES = 415182688
|
|
25
|
+
EMBEDDED_MODEL_PATH = app_path("models", EMBEDDED_MODEL_NAME)
|
|
26
|
+
EMBEDDED_MANIFEST_PATH = ROOT / "models" / EMBEDDED_MODEL_NAME / "manifest.json"
|
|
27
|
+
EMBEDDED_MODEL_FILE = EMBEDDED_MODEL_PATH / "qwen2.5-0.5b-instruct-q2_k.gguf"
|
|
28
|
+
EMBEDDED_MODEL_SHA256 = "9ee36184e616dfc76df4f5dd66f908dbde6979524ae36e6cefb67f532f798cb8"
|
|
29
|
+
EMBEDDED_RUNTIME = "llama-cpp-python"
|
|
30
|
+
EMBEDDED_RUNTIME_REQUIREMENT = "llama-cpp-python>=0.3.9"
|
|
31
|
+
EMBEDDED_MAX_RESPONSE_CHARS = 2000
|
|
32
|
+
DEFAULT_MAX_TOKENS = 220
|
|
33
|
+
DEFAULT_CONTEXT_TOKENS = 2048
|
|
34
|
+
SMOKE_RESPONSE_ENV = "AGENT_DEVKIT_EMBEDDED_SMOKE_RESPONSE"
|
|
35
|
+
SOURCE_ENV = "AGENT_DEVKIT_EMBEDDED_MODEL_SOURCE"
|
|
36
|
+
SKIP_DEP_INSTALL_ENV = "AGENT_DEVKIT_EMBEDDED_SKIP_DEP_INSTALL"
|
|
37
|
+
|
|
38
|
+
_LLAMA_CACHE: Any | None = None
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def embedded_mini_brain_status() -> dict[str, Any]:
|
|
42
|
+
manifest_exists = EMBEDDED_MANIFEST_PATH.exists()
|
|
43
|
+
model_exists = EMBEDDED_MODEL_FILE.exists()
|
|
44
|
+
model_sha256 = sha256_file(EMBEDDED_MODEL_FILE) if model_exists else None
|
|
45
|
+
smoke_mode = bool(os.environ.get(SMOKE_RESPONSE_ENV))
|
|
46
|
+
model_file_valid = smoke_mode or (model_sha256 == EMBEDDED_MODEL_SHA256 if model_exists else False)
|
|
47
|
+
dependency = llama_cpp_dependency_status()
|
|
48
|
+
available = model_file_valid and dependency["status"] == "ok"
|
|
49
|
+
if available:
|
|
50
|
+
status = "ok"
|
|
51
|
+
elif not model_exists:
|
|
52
|
+
status = "not-installed"
|
|
53
|
+
elif not model_file_valid:
|
|
54
|
+
status = "invalid-model"
|
|
55
|
+
elif dependency["status"] != "ok":
|
|
56
|
+
status = "dependency-missing"
|
|
57
|
+
else:
|
|
58
|
+
status = "missing"
|
|
59
|
+
return {
|
|
60
|
+
"kind": "embedded-mini-brain",
|
|
61
|
+
"id": EMBEDDED_BACKEND_ID,
|
|
62
|
+
"status": status,
|
|
63
|
+
"available": available,
|
|
64
|
+
"configured": model_file_valid,
|
|
65
|
+
"provider": EMBEDDED_BACKEND_ID,
|
|
66
|
+
"runtime": EMBEDDED_RUNTIME,
|
|
67
|
+
"runtime_requirement": EMBEDDED_RUNTIME_REQUIREMENT,
|
|
68
|
+
"model": EMBEDDED_MODEL_ID,
|
|
69
|
+
"hf_model": EMBEDDED_MODEL_ID,
|
|
70
|
+
"model_name": EMBEDDED_MODEL_NAME,
|
|
71
|
+
"model_path": str(EMBEDDED_MODEL_PATH),
|
|
72
|
+
"model_file": str(EMBEDDED_MODEL_FILE),
|
|
73
|
+
"model_file_present": model_exists,
|
|
74
|
+
"model_file_valid": model_file_valid,
|
|
75
|
+
"model_file_sha256": model_sha256,
|
|
76
|
+
"smoke_mode": smoke_mode,
|
|
77
|
+
"model_size_bytes": EMBEDDED_MODEL_SIZE_BYTES,
|
|
78
|
+
"download_url": model_source(),
|
|
79
|
+
"sha256": EMBEDDED_MODEL_SHA256,
|
|
80
|
+
"manifest_path": str(EMBEDDED_MANIFEST_PATH),
|
|
81
|
+
"manifest_present": manifest_exists,
|
|
82
|
+
"dependency": dependency,
|
|
83
|
+
"auth": "none",
|
|
84
|
+
"stored_secret": False,
|
|
85
|
+
"install_command": "agent setup mini-brain --yes",
|
|
86
|
+
"message": (
|
|
87
|
+
"Embedded Qwen2.5 mini-brain is available for real local inference."
|
|
88
|
+
if available
|
|
89
|
+
else "Embedded mini-brain model is not installed or llama_cpp runtime is missing."
|
|
90
|
+
),
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def invoke_embedded_mini_brain(prompt: str, *, public_name: str = "Agent DevKit") -> str:
|
|
95
|
+
status = embedded_mini_brain_status()
|
|
96
|
+
if not status["available"]:
|
|
97
|
+
raise EmbeddedMiniBrainError(status["message"])
|
|
98
|
+
smoke_response = os.environ.get(SMOKE_RESPONSE_ENV)
|
|
99
|
+
if smoke_response:
|
|
100
|
+
return f"{public_name}: {smoke_response}"[:EMBEDDED_MAX_RESPONSE_CHARS]
|
|
101
|
+
llama = load_llama()
|
|
102
|
+
payload = llama.create_chat_completion(
|
|
103
|
+
messages=[
|
|
104
|
+
{
|
|
105
|
+
"role": "system",
|
|
106
|
+
"content": embedded_system_prompt(public_name),
|
|
107
|
+
},
|
|
108
|
+
{
|
|
109
|
+
"role": "user",
|
|
110
|
+
"content": prompt,
|
|
111
|
+
},
|
|
112
|
+
],
|
|
113
|
+
max_tokens=int(os.environ.get("AGENT_DEVKIT_EMBEDDED_MAX_TOKENS", str(DEFAULT_MAX_TOKENS))),
|
|
114
|
+
temperature=float(os.environ.get("AGENT_DEVKIT_EMBEDDED_TEMPERATURE", "0.2")),
|
|
115
|
+
top_p=float(os.environ.get("AGENT_DEVKIT_EMBEDDED_TOP_P", "0.9")),
|
|
116
|
+
repeat_penalty=float(os.environ.get("AGENT_DEVKIT_EMBEDDED_REPEAT_PENALTY", "1.08")),
|
|
117
|
+
)
|
|
118
|
+
try:
|
|
119
|
+
content = str(payload["choices"][0]["message"]["content"]).strip()
|
|
120
|
+
except (KeyError, IndexError, TypeError) as exc:
|
|
121
|
+
raise EmbeddedMiniBrainError("Embedded mini-brain returned an unexpected response shape.") from exc
|
|
122
|
+
if not content:
|
|
123
|
+
raise EmbeddedMiniBrainError("Embedded mini-brain returned an empty response.")
|
|
124
|
+
return content[:EMBEDDED_MAX_RESPONSE_CHARS]
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def embedded_backend_doctor() -> dict[str, Any]:
|
|
128
|
+
status = embedded_mini_brain_status()
|
|
129
|
+
return {
|
|
130
|
+
"id": EMBEDDED_BACKEND_ID,
|
|
131
|
+
"display_name": "Embedded mini-brain",
|
|
132
|
+
"kind": "embedded-local",
|
|
133
|
+
"status": status["status"],
|
|
134
|
+
"configured": status["configured"],
|
|
135
|
+
"model": EMBEDDED_MODEL_ID,
|
|
136
|
+
"model_file": status["model_file"],
|
|
137
|
+
"runtime": EMBEDDED_RUNTIME,
|
|
138
|
+
"auth_status": "none",
|
|
139
|
+
"message": status["message"],
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def embedded_backend_config() -> dict[str, Any]:
|
|
144
|
+
return {
|
|
145
|
+
"kind": "embedded-local",
|
|
146
|
+
"auth": "none",
|
|
147
|
+
"model": EMBEDDED_MODEL_ID,
|
|
148
|
+
"runtime": EMBEDDED_RUNTIME,
|
|
149
|
+
"model_file": str(EMBEDDED_MODEL_FILE),
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def setup_embedded_mini_brain(*, dry_run: bool = False, yes: bool = False) -> dict[str, Any]:
|
|
154
|
+
before = embedded_mini_brain_status()
|
|
155
|
+
plan = embedded_install_plan()
|
|
156
|
+
if dry_run or not yes:
|
|
157
|
+
needs_confirmation = not dry_run and not yes
|
|
158
|
+
return {
|
|
159
|
+
"kind": "embedded-mini-brain-install",
|
|
160
|
+
"status": "planned" if dry_run else "needs-confirmation",
|
|
161
|
+
"ok": bool(dry_run),
|
|
162
|
+
"exit_code": 2 if needs_confirmation else 0,
|
|
163
|
+
"dry_run": dry_run,
|
|
164
|
+
"yes": yes,
|
|
165
|
+
"before": before,
|
|
166
|
+
"after": before,
|
|
167
|
+
"plan": plan,
|
|
168
|
+
"message": "Use --yes to download the embedded mini-brain model and install its local runtime.",
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
ensure_app_home()
|
|
172
|
+
EMBEDDED_MODEL_PATH.mkdir(parents=True, exist_ok=True)
|
|
173
|
+
download_result = ensure_model_file()
|
|
174
|
+
dependency_result = ensure_llama_cpp_dependency()
|
|
175
|
+
after = embedded_mini_brain_status()
|
|
176
|
+
ok = after.get("available") is True
|
|
177
|
+
return {
|
|
178
|
+
"kind": "embedded-mini-brain-install",
|
|
179
|
+
"status": "ok" if ok else "failed",
|
|
180
|
+
"ok": ok,
|
|
181
|
+
"exit_code": 0 if ok else 1,
|
|
182
|
+
"dry_run": False,
|
|
183
|
+
"yes": True,
|
|
184
|
+
"before": before,
|
|
185
|
+
"after": after,
|
|
186
|
+
"plan": plan,
|
|
187
|
+
"download": download_result,
|
|
188
|
+
"dependency_install": dependency_result,
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def embedded_install_plan() -> dict[str, Any]:
|
|
193
|
+
return {
|
|
194
|
+
"provider": EMBEDDED_BACKEND_ID,
|
|
195
|
+
"model": EMBEDDED_MODEL_ID,
|
|
196
|
+
"model_name": EMBEDDED_MODEL_NAME,
|
|
197
|
+
"download_url": model_source(),
|
|
198
|
+
"size_bytes": EMBEDDED_MODEL_SIZE_BYTES,
|
|
199
|
+
"sha256": EMBEDDED_MODEL_SHA256,
|
|
200
|
+
"destination": str(EMBEDDED_MODEL_FILE),
|
|
201
|
+
"runtime_requirement": EMBEDDED_RUNTIME_REQUIREMENT,
|
|
202
|
+
"writes": [
|
|
203
|
+
str(EMBEDDED_MODEL_FILE),
|
|
204
|
+
str(app_path("python")),
|
|
205
|
+
],
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def embedded_system_prompt(public_name: str) -> str:
|
|
210
|
+
return "\n".join(
|
|
211
|
+
[
|
|
212
|
+
identity_system_prompt(name=public_name),
|
|
213
|
+
"Voce e o mini cerebro local embarcado do Agent DevKit.",
|
|
214
|
+
"Responda em portugues claro quando o usuario escrever em portugues.",
|
|
215
|
+
"Voce pode conversar, orientar onboarding/setup, explicar capacidades e preparar tarefas simples.",
|
|
216
|
+
"Nao finja ser Claude, Codex, OpenAI ou Ollama.",
|
|
217
|
+
"Nao aprove escrita externa, operacoes destrutivas, decisoes finais de seguranca ou revisoes finais.",
|
|
218
|
+
"Quando a tarefa exigir alto julgamento, diga que pode acionar Claude, Codex, Ollama ou APIs se configurados.",
|
|
219
|
+
]
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def load_llama() -> Any:
|
|
224
|
+
global _LLAMA_CACHE
|
|
225
|
+
if _LLAMA_CACHE is not None:
|
|
226
|
+
return _LLAMA_CACHE
|
|
227
|
+
try:
|
|
228
|
+
from llama_cpp import Llama # type: ignore
|
|
229
|
+
except ImportError as exc:
|
|
230
|
+
raise EmbeddedMiniBrainError("llama-cpp-python is required for embedded mini-brain inference.") from exc
|
|
231
|
+
if not EMBEDDED_MODEL_FILE.exists():
|
|
232
|
+
raise EmbeddedMiniBrainError(f"Embedded model file not found: {EMBEDDED_MODEL_FILE}")
|
|
233
|
+
if sha256_file(EMBEDDED_MODEL_FILE) != EMBEDDED_MODEL_SHA256:
|
|
234
|
+
raise EmbeddedMiniBrainError(f"Embedded model file failed SHA-256 validation: {EMBEDDED_MODEL_FILE}")
|
|
235
|
+
_LLAMA_CACHE = Llama(
|
|
236
|
+
model_path=str(EMBEDDED_MODEL_FILE),
|
|
237
|
+
n_ctx=int(os.environ.get("AGENT_DEVKIT_EMBEDDED_N_CTX", str(DEFAULT_CONTEXT_TOKENS))),
|
|
238
|
+
n_threads=int(os.environ.get("AGENT_DEVKIT_EMBEDDED_THREADS", str(max(1, min(4, os.cpu_count() or 1))))),
|
|
239
|
+
verbose=os.environ.get("AGENT_DEVKIT_EMBEDDED_VERBOSE") == "1",
|
|
240
|
+
)
|
|
241
|
+
return _LLAMA_CACHE
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def llama_cpp_dependency_status() -> dict[str, Any]:
|
|
245
|
+
if os.environ.get(SMOKE_RESPONSE_ENV):
|
|
246
|
+
return {
|
|
247
|
+
"status": "ok",
|
|
248
|
+
"module": "llama_cpp",
|
|
249
|
+
"package": "llama-cpp-python",
|
|
250
|
+
"mode": "smoke",
|
|
251
|
+
}
|
|
252
|
+
try:
|
|
253
|
+
import llama_cpp # type: ignore
|
|
254
|
+
except ImportError:
|
|
255
|
+
return {
|
|
256
|
+
"status": "missing",
|
|
257
|
+
"module": "llama_cpp",
|
|
258
|
+
"package": "llama-cpp-python",
|
|
259
|
+
}
|
|
260
|
+
return {
|
|
261
|
+
"status": "ok",
|
|
262
|
+
"module": "llama_cpp",
|
|
263
|
+
"package": "llama-cpp-python",
|
|
264
|
+
"version": getattr(llama_cpp, "__version__", None),
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
def ensure_model_file() -> dict[str, Any]:
|
|
269
|
+
if os.environ.get(SMOKE_RESPONSE_ENV):
|
|
270
|
+
return {
|
|
271
|
+
"status": "skipped",
|
|
272
|
+
"ok": True,
|
|
273
|
+
"model_file": str(EMBEDDED_MODEL_FILE),
|
|
274
|
+
"reason": "smoke-mode",
|
|
275
|
+
}
|
|
276
|
+
if EMBEDDED_MODEL_FILE.exists() and sha256_file(EMBEDDED_MODEL_FILE) == EMBEDDED_MODEL_SHA256:
|
|
277
|
+
return {
|
|
278
|
+
"status": "already-installed",
|
|
279
|
+
"ok": True,
|
|
280
|
+
"model_file": str(EMBEDDED_MODEL_FILE),
|
|
281
|
+
"sha256": EMBEDDED_MODEL_SHA256,
|
|
282
|
+
}
|
|
283
|
+
partial = EMBEDDED_MODEL_FILE.with_suffix(EMBEDDED_MODEL_FILE.suffix + ".part")
|
|
284
|
+
source = model_source()
|
|
285
|
+
try:
|
|
286
|
+
if Path(source).expanduser().exists():
|
|
287
|
+
shutil.copyfile(Path(source).expanduser(), partial)
|
|
288
|
+
else:
|
|
289
|
+
with urllib.request.urlopen(source, timeout=120) as response, partial.open("wb") as target:
|
|
290
|
+
shutil.copyfileobj(response, target)
|
|
291
|
+
except (OSError, urllib.error.URLError) as exc:
|
|
292
|
+
return {
|
|
293
|
+
"status": "failed",
|
|
294
|
+
"ok": False,
|
|
295
|
+
"model_file": str(EMBEDDED_MODEL_FILE),
|
|
296
|
+
"source": source,
|
|
297
|
+
"message": str(exc),
|
|
298
|
+
}
|
|
299
|
+
actual_sha = sha256_file(partial)
|
|
300
|
+
if actual_sha != EMBEDDED_MODEL_SHA256:
|
|
301
|
+
return {
|
|
302
|
+
"status": "failed",
|
|
303
|
+
"ok": False,
|
|
304
|
+
"model_file": str(EMBEDDED_MODEL_FILE),
|
|
305
|
+
"source": source,
|
|
306
|
+
"sha256": actual_sha,
|
|
307
|
+
"expected_sha256": EMBEDDED_MODEL_SHA256,
|
|
308
|
+
"message": "Downloaded embedded model failed SHA-256 validation.",
|
|
309
|
+
}
|
|
310
|
+
partial.replace(EMBEDDED_MODEL_FILE)
|
|
311
|
+
return {
|
|
312
|
+
"status": "downloaded",
|
|
313
|
+
"ok": True,
|
|
314
|
+
"model_file": str(EMBEDDED_MODEL_FILE),
|
|
315
|
+
"source": source,
|
|
316
|
+
"sha256": EMBEDDED_MODEL_SHA256,
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
def ensure_llama_cpp_dependency() -> dict[str, Any]:
|
|
321
|
+
current = llama_cpp_dependency_status()
|
|
322
|
+
if current.get("status") == "ok":
|
|
323
|
+
return {"status": "already-installed", "ok": True, "dependency": current}
|
|
324
|
+
if os.environ.get(SKIP_DEP_INSTALL_ENV) == "1":
|
|
325
|
+
return {"status": "skipped", "ok": True, "dependency": current, "reason": "disabled-by-env"}
|
|
326
|
+
command = [sys.executable, "-m", "pip", "install", EMBEDDED_RUNTIME_REQUIREMENT]
|
|
327
|
+
process = subprocess.run(command, check=False, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=900)
|
|
328
|
+
return {
|
|
329
|
+
"status": "installed" if process.returncode == 0 else "failed",
|
|
330
|
+
"ok": process.returncode == 0,
|
|
331
|
+
"command": command,
|
|
332
|
+
"exit_code": process.returncode,
|
|
333
|
+
"stdout": process.stdout[-4000:],
|
|
334
|
+
"stderr": process.stderr[-4000:],
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
def model_source() -> str:
|
|
339
|
+
return os.environ.get(SOURCE_ENV) or EMBEDDED_MODEL_SOURCE
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
def sha256_file(path: Path) -> str:
|
|
343
|
+
hash_obj = hashlib.sha256()
|
|
344
|
+
with path.open("rb") as file:
|
|
345
|
+
for chunk in iter(lambda: file.read(1024 * 1024), b""):
|
|
346
|
+
hash_obj.update(chunk)
|
|
347
|
+
return hash_obj.hexdigest()
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
class EmbeddedMiniBrainError(RuntimeError):
|
|
351
|
+
"""Raised when embedded local inference cannot run."""
|
|
@@ -8,7 +8,7 @@ from typing import Any
|
|
|
8
8
|
from cli.aikit.core.requests import AgentPromptRequest
|
|
9
9
|
from cli.aikit.core.runtime import run_agent_prompt
|
|
10
10
|
from cli.aikit.llm import BACKENDS, configure_backend
|
|
11
|
-
from cli.aikit.mini_brain import DEFAULT_OLLAMA_MODEL
|
|
11
|
+
from cli.aikit.mini_brain import DEFAULT_OLLAMA_MODEL
|
|
12
12
|
from cli.aikit.ollama import ollama_status
|
|
13
13
|
from cli.aikit.onboarding import onboarding_status
|
|
14
14
|
from cli.aikit.personality import load_personality, update_personality
|
|
@@ -104,11 +104,9 @@ def run_interactive_onboarding(result: dict[str, Any]) -> dict[str, Any]:
|
|
|
104
104
|
print("\nOllama nao foi encontrado.")
|
|
105
105
|
if command:
|
|
106
106
|
print(f"Instalacao sugerida: {command}")
|
|
107
|
-
print("
|
|
108
|
-
elif ask_yes_no(f"Deseja
|
|
109
|
-
|
|
110
|
-
setup = setup_mini_brain(yes=True, set_default=set_default)
|
|
111
|
-
print(setup.get("message") or f"Mini cerebro: {setup.get('status')}")
|
|
107
|
+
print("O mini cerebro embarcado ja funciona; instale Ollama apenas se quiser workers locais adicionais.")
|
|
108
|
+
elif ask_yes_no(f"Deseja instalar o modelo Ollama opcional {DEFAULT_OLLAMA_MODEL} para workers locais?", default=False):
|
|
109
|
+
print("Rode: agent local-llm install " + DEFAULT_OLLAMA_MODEL + " --yes")
|
|
112
110
|
|
|
113
111
|
fresh = onboarding_status(ROOT)
|
|
114
112
|
toolchain = fresh.get("toolchain") if isinstance(fresh.get("toolchain"), dict) else {}
|
|
@@ -129,7 +127,7 @@ def run_interactive_onboarding(result: dict[str, Any]) -> dict[str, Any]:
|
|
|
129
127
|
|
|
130
128
|
def choose_onboarding_mode() -> str:
|
|
131
129
|
print("\nModos de onboarding:")
|
|
132
|
-
print("1. minimo: identidade,
|
|
130
|
+
print("1. minimo: identidade, mini-cerebro local embarcado e memoria")
|
|
133
131
|
print("2. completo: minimo + toolchain, sources, notificacoes, knowledge e memorias")
|
|
134
132
|
print("3. pular")
|
|
135
133
|
answer = ask_text("Escolha o modo", default="minimo").strip().lower()
|
|
@@ -161,7 +159,7 @@ def configure_personality_interactively(agent: dict[str, Any]) -> None:
|
|
|
161
159
|
|
|
162
160
|
|
|
163
161
|
def configure_llm_interactively() -> None:
|
|
164
|
-
print("\nNenhum backend LLM coordenador utilizavel foi detectado.")
|
|
162
|
+
print("\nNenhum backend LLM coordenador externo utilizavel foi detectado.")
|
|
165
163
|
print("Opcoes: claude-code, codex-cli, ollama, openai, anthropic, openrouter, pular")
|
|
166
164
|
choice = ask_text("Qual backend deseja configurar primeiro?", default="pular").strip().lower()
|
|
167
165
|
if choice in {"", "pular", "skip", "cancelar", "cancel"}:
|
package/runtime/cli/aikit/llm.py
CHANGED
|
@@ -14,6 +14,14 @@ from pathlib import Path
|
|
|
14
14
|
from typing import Any
|
|
15
15
|
|
|
16
16
|
from cli.aikit.app_home import app_home, config_path as app_config_path, ensure_app_home
|
|
17
|
+
from cli.aikit.embedded_mini_brain import (
|
|
18
|
+
EMBEDDED_BACKEND_ID,
|
|
19
|
+
EMBEDDED_MODEL_ID,
|
|
20
|
+
EmbeddedMiniBrainError,
|
|
21
|
+
embedded_backend_config,
|
|
22
|
+
embedded_backend_doctor,
|
|
23
|
+
invoke_embedded_mini_brain,
|
|
24
|
+
)
|
|
17
25
|
from cli.aikit.identity import host_cli_prompt, identity_system_prompt
|
|
18
26
|
|
|
19
27
|
|
|
@@ -33,6 +41,14 @@ class LlmBackend:
|
|
|
33
41
|
|
|
34
42
|
|
|
35
43
|
BACKENDS: dict[str, LlmBackend] = {
|
|
44
|
+
EMBEDDED_BACKEND_ID: LlmBackend(
|
|
45
|
+
id=EMBEDDED_BACKEND_ID,
|
|
46
|
+
display_name="Embedded mini-brain",
|
|
47
|
+
kind="embedded-local",
|
|
48
|
+
auth="none",
|
|
49
|
+
default_model=EMBEDDED_MODEL_ID,
|
|
50
|
+
notes="Uses the Agent DevKit embedded mini-brain for setup, onboarding and low-risk conversation.",
|
|
51
|
+
),
|
|
36
52
|
"openai": LlmBackend(
|
|
37
53
|
id="openai",
|
|
38
54
|
display_name="OpenAI API",
|
|
@@ -100,7 +116,7 @@ BACKENDS: dict[str, LlmBackend] = {
|
|
|
100
116
|
|
|
101
117
|
ENV_VAR_NAME_PATTERN = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
|
|
102
118
|
DEFAULT_AGENT_TIMEOUT_SECONDS = 120
|
|
103
|
-
DEFAULT_FALLBACK_ORDER = ("claude-code", "codex-cli", "openai", "anthropic", "openrouter", "ollama")
|
|
119
|
+
DEFAULT_FALLBACK_ORDER = ("claude-code", "codex-cli", "openai", "anthropic", "openrouter", "ollama", EMBEDDED_BACKEND_ID)
|
|
104
120
|
|
|
105
121
|
|
|
106
122
|
def config_home() -> Path:
|
|
@@ -323,6 +339,8 @@ def normalize_backend_order(order: str | list[str] | tuple[str, ...]) -> list[st
|
|
|
323
339
|
|
|
324
340
|
|
|
325
341
|
def default_backend_config(backend: LlmBackend) -> dict[str, Any]:
|
|
342
|
+
if backend.id == EMBEDDED_BACKEND_ID:
|
|
343
|
+
return embedded_backend_config()
|
|
326
344
|
entry: dict[str, Any] = {"kind": backend.kind, "auth": backend.auth}
|
|
327
345
|
if backend.auth == "api-key-env":
|
|
328
346
|
entry["api_key_ref"] = f"env:{backend.api_key_env}"
|
|
@@ -346,7 +364,8 @@ def doctor_backends(backend_id: str | None = None) -> dict[str, Any]:
|
|
|
346
364
|
|
|
347
365
|
checks = [doctor_backend(BACKENDS[item], config) for item in ids]
|
|
348
366
|
status = "ok"
|
|
349
|
-
|
|
367
|
+
missing_statuses = {"missing", "not-installed", "dependency-missing", "invalid-model"}
|
|
368
|
+
if any(item["status"] in missing_statuses for item in checks):
|
|
350
369
|
status = "partial" if not backend_id else "missing"
|
|
351
370
|
if any(item["status"] == "error" for item in checks):
|
|
352
371
|
status = "error"
|
|
@@ -361,6 +380,8 @@ def doctor_backends(backend_id: str | None = None) -> dict[str, Any]:
|
|
|
361
380
|
|
|
362
381
|
|
|
363
382
|
def doctor_backend(backend: LlmBackend, config: dict[str, Any]) -> dict[str, Any]:
|
|
383
|
+
if backend.id == EMBEDDED_BACKEND_ID:
|
|
384
|
+
return embedded_backend_doctor()
|
|
364
385
|
configured = config.get("llm", {}).get("backends", {}).get(backend.id, {})
|
|
365
386
|
if not isinstance(configured, dict):
|
|
366
387
|
configured = {}
|
|
@@ -616,6 +637,11 @@ class LlmPolicyError(LlmInvocationError):
|
|
|
616
637
|
def invoke_resolved_backend(backend: dict[str, Any], prompt: str, *, public_name: str = "Agent DevKit") -> str:
|
|
617
638
|
kind = backend.get("kind")
|
|
618
639
|
backend_id = backend.get("id")
|
|
640
|
+
if kind == "embedded-local" and backend_id == EMBEDDED_BACKEND_ID:
|
|
641
|
+
try:
|
|
642
|
+
return invoke_embedded_mini_brain(prompt, public_name=public_name)
|
|
643
|
+
except EmbeddedMiniBrainError as exc:
|
|
644
|
+
raise LlmInvocationError(str(exc)) from exc
|
|
619
645
|
if kind == "openai-compatible":
|
|
620
646
|
return invoke_openai_compatible(backend, prompt, public_name=public_name)
|
|
621
647
|
if kind == "anthropic":
|
|
@@ -6,6 +6,7 @@ import shutil
|
|
|
6
6
|
import subprocess
|
|
7
7
|
from typing import Any
|
|
8
8
|
|
|
9
|
+
from cli.aikit.embedded_mini_brain import EMBEDDED_BACKEND_ID, EMBEDDED_MODEL_ID, embedded_mini_brain_status
|
|
9
10
|
from cli.aikit.mini_brain import DEFAULT_OLLAMA_MODEL, mini_brain_contract
|
|
10
11
|
from cli.aikit.model_router import build_model_plan
|
|
11
12
|
from cli.aikit.ollama import ollama_models, ollama_pull, ollama_status
|
|
@@ -28,7 +29,9 @@ def local_llm_list() -> dict[str, Any]:
|
|
|
28
29
|
"kind": "local-llm",
|
|
29
30
|
"schema_version": LOCAL_LLM_SCHEMA_VERSION,
|
|
30
31
|
"status": "ok",
|
|
31
|
-
"provider":
|
|
32
|
+
"provider": EMBEDDED_BACKEND_ID,
|
|
33
|
+
"optional_providers": ["ollama"],
|
|
34
|
+
"embedded": embedded_mini_brain_status(),
|
|
32
35
|
"mini_brain": contract,
|
|
33
36
|
"workers": [{"id": worker_id, "purpose": purpose} for worker_id, purpose in LOCAL_WORKERS],
|
|
34
37
|
"models": {
|
|
@@ -42,12 +45,14 @@ def local_llm_doctor() -> dict[str, Any]:
|
|
|
42
45
|
status = ollama_status()
|
|
43
46
|
contract = mini_brain_contract(ollama_payload=status)
|
|
44
47
|
model_plan = build_model_plan("resuma estes logs operacionais")
|
|
45
|
-
ok =
|
|
48
|
+
ok = contract.get("available") is True
|
|
46
49
|
return {
|
|
47
50
|
"kind": "local-llm-doctor",
|
|
48
51
|
"schema_version": LOCAL_LLM_SCHEMA_VERSION,
|
|
49
52
|
"status": "ok" if ok else "partial",
|
|
50
|
-
"provider":
|
|
53
|
+
"provider": EMBEDDED_BACKEND_ID,
|
|
54
|
+
"optional_providers": ["ollama"],
|
|
55
|
+
"embedded": embedded_mini_brain_status(),
|
|
51
56
|
"ollama": status,
|
|
52
57
|
"mini_brain": contract,
|
|
53
58
|
"model_plan": {
|
|
@@ -63,9 +68,19 @@ def local_llm_doctor() -> dict[str, Any]:
|
|
|
63
68
|
|
|
64
69
|
def local_llm_models() -> dict[str, Any]:
|
|
65
70
|
payload = ollama_models()
|
|
71
|
+
embedded = embedded_mini_brain_status()
|
|
66
72
|
payload["kind"] = "local-llm-models"
|
|
67
73
|
payload["schema_version"] = LOCAL_LLM_SCHEMA_VERSION
|
|
68
|
-
payload["provider"] =
|
|
74
|
+
payload["provider"] = EMBEDDED_BACKEND_ID
|
|
75
|
+
payload["embedded"] = {
|
|
76
|
+
"status": embedded.get("status"),
|
|
77
|
+
"provider": EMBEDDED_BACKEND_ID,
|
|
78
|
+
"model": EMBEDDED_MODEL_ID,
|
|
79
|
+
"installed": embedded.get("model_file_valid") is True,
|
|
80
|
+
"available": embedded.get("available") is True,
|
|
81
|
+
"install_command": embedded.get("install_command"),
|
|
82
|
+
}
|
|
83
|
+
payload["optional_provider"] = "ollama"
|
|
69
84
|
return payload
|
|
70
85
|
|
|
71
86
|
|
|
@@ -24,7 +24,7 @@ FORBIDDEN_DELEGATION_MARKERS = (
|
|
|
24
24
|
|
|
25
25
|
|
|
26
26
|
def maybe_delegate_local_llm(prompt: str, model_plan: dict[str, Any]) -> dict[str, Any]:
|
|
27
|
-
"""Execute a bounded operational task with
|
|
27
|
+
"""Execute a bounded operational task with the selected local worker."""
|
|
28
28
|
delegation = model_plan.get("delegation") if isinstance(model_plan.get("delegation"), dict) else {}
|
|
29
29
|
if model_plan.get("strategy") != "mini-brain":
|
|
30
30
|
return skipped(
|
|
@@ -38,6 +38,12 @@ def maybe_delegate_local_llm(prompt: str, model_plan: dict[str, Any]) -> dict[st
|
|
|
38
38
|
"High-risk tasks cannot be delegated to local LLM workers.",
|
|
39
39
|
model_plan=model_plan,
|
|
40
40
|
)
|
|
41
|
+
if model_plan.get("local_llm_role") != "operational-worker":
|
|
42
|
+
return skipped(
|
|
43
|
+
"not-operational-worker",
|
|
44
|
+
"The embedded mini-brain is acting as the bootstrap coordinator, not as a delegated worker.",
|
|
45
|
+
model_plan=model_plan,
|
|
46
|
+
)
|
|
41
47
|
if int(model_plan.get("max_llm_calls") or 0) <= 0:
|
|
42
48
|
return skipped(
|
|
43
49
|
"llm-budget-not-available",
|
|
@@ -50,9 +56,10 @@ def maybe_delegate_local_llm(prompt: str, model_plan: dict[str, Any]) -> dict[st
|
|
|
50
56
|
if any(marker in lowered for marker in FORBIDDEN_DELEGATION_MARKERS):
|
|
51
57
|
return skipped("forbidden", "Prompt contains an action that local LLM workers cannot execute.", model_plan=model_plan)
|
|
52
58
|
delegated_prompt = build_delegated_prompt(prompt, model_plan)
|
|
59
|
+
provider = str(model_plan.get("local_llm_provider") or "ollama")
|
|
53
60
|
result = invoke_agent_prompt(
|
|
54
61
|
delegated_prompt,
|
|
55
|
-
|
|
62
|
+
provider,
|
|
56
63
|
public_name="Local LLM Operator",
|
|
57
64
|
allow_fallback=False,
|
|
58
65
|
)
|
|
@@ -64,7 +71,7 @@ def maybe_delegate_local_llm(prompt: str, model_plan: dict[str, Any]) -> dict[st
|
|
|
64
71
|
"status": "ok" if result.get("ok") else result.get("status", "failed"),
|
|
65
72
|
"ok": bool(result.get("ok")),
|
|
66
73
|
"llm_backend": result.get("llm_backend"),
|
|
67
|
-
"model_provider":
|
|
74
|
+
"model_provider": provider,
|
|
68
75
|
"mini_brain": summarize_mini_brain(model_plan.get("mini_brain")),
|
|
69
76
|
"strategy": model_plan.get("strategy"),
|
|
70
77
|
"risk": model_plan.get("risk"),
|
|
@@ -108,7 +115,7 @@ def enrich_prompt_with_local_result(prompt: str, local_execution: dict[str, Any]
|
|
|
108
115
|
[
|
|
109
116
|
prompt,
|
|
110
117
|
"",
|
|
111
|
-
"Contexto operacional produzido pelo local-llm-operator/
|
|
118
|
+
f"Contexto operacional produzido pelo local-llm-operator/{local_execution.get('model_provider') or local_execution.get('llm_backend') or 'local'}:",
|
|
112
119
|
str(local_execution["response"]),
|
|
113
120
|
"",
|
|
114
121
|
"Use esse contexto apenas como apoio. A decisao, resposta final e revisao continuam sob responsabilidade do coordenador.",
|
|
@@ -131,7 +138,10 @@ def skipped(reason: str, message: str, *, model_plan: dict[str, Any]) -> dict[st
|
|
|
131
138
|
"strategy": model_plan.get("strategy"),
|
|
132
139
|
"risk": model_plan.get("risk"),
|
|
133
140
|
"confidence": model_plan.get("confidence"),
|
|
134
|
-
"requires_review": bool(
|
|
141
|
+
"requires_review": bool(
|
|
142
|
+
model_plan.get("local_llm_role") == "operational-worker"
|
|
143
|
+
and (model_plan.get("local_llm_recommended") or model_plan.get("local_llm_selected"))
|
|
144
|
+
),
|
|
135
145
|
}
|
|
136
146
|
|
|
137
147
|
|
|
@@ -5,14 +5,20 @@ from __future__ import annotations
|
|
|
5
5
|
from datetime import datetime, timezone
|
|
6
6
|
from typing import Any
|
|
7
7
|
|
|
8
|
+
from cli.aikit.embedded_mini_brain import (
|
|
9
|
+
EMBEDDED_BACKEND_ID,
|
|
10
|
+
EMBEDDED_MODEL_ID,
|
|
11
|
+
embedded_mini_brain_status,
|
|
12
|
+
setup_embedded_mini_brain,
|
|
13
|
+
)
|
|
8
14
|
from cli.aikit.llm import BACKENDS, configure_backend, doctor_backend, load_config, save_config
|
|
9
|
-
from cli.aikit.ollama import
|
|
15
|
+
from cli.aikit.ollama import ollama_status
|
|
10
16
|
|
|
11
17
|
|
|
12
18
|
MINI_BRAIN_CONFIG_KEY = "mini_brain"
|
|
13
|
-
DEFAULT_HF_MODEL =
|
|
19
|
+
DEFAULT_HF_MODEL = EMBEDDED_MODEL_ID
|
|
14
20
|
DEFAULT_OLLAMA_MODEL = "qwen3:0.6b"
|
|
15
|
-
DEFAULT_PROVIDER =
|
|
21
|
+
DEFAULT_PROVIDER = EMBEDDED_BACKEND_ID
|
|
16
22
|
DEFAULT_BASE_URL = "http://localhost:11434/v1"
|
|
17
23
|
ALLOWED_TASKS = [
|
|
18
24
|
"setup_help",
|
|
@@ -50,14 +56,15 @@ def mini_brain_contract(
|
|
|
50
56
|
) -> dict[str, Any]:
|
|
51
57
|
config = load_config() if config is None else config
|
|
52
58
|
stored = config.get(MINI_BRAIN_CONFIG_KEY) if isinstance(config.get(MINI_BRAIN_CONFIG_KEY), dict) else {}
|
|
53
|
-
enabled = bool(stored.get("enabled"))
|
|
59
|
+
enabled = bool(stored.get("enabled", True))
|
|
54
60
|
provider = stored.get("provider") or stored.get("runtime") or DEFAULT_PROVIDER
|
|
55
61
|
hf_model = stored.get("hf_model") or stored.get("model") or DEFAULT_HF_MODEL
|
|
56
62
|
ollama_model = stored.get("ollama_model") or DEFAULT_OLLAMA_MODEL
|
|
63
|
+
embedded = embedded_mini_brain_status()
|
|
57
64
|
ollama_payload = ollama_status() if ollama_payload is None else ollama_payload
|
|
58
65
|
ollama_backend = doctor_backend(BACKENDS["ollama"], config) if ollama_backend is None else ollama_backend
|
|
59
|
-
|
|
60
|
-
runtime_available =
|
|
66
|
+
ollama_configured = ollama_backend.get("configured") is True
|
|
67
|
+
runtime_available = embedded.get("available") is True
|
|
61
68
|
available = enabled and provider == DEFAULT_PROVIDER and runtime_available
|
|
62
69
|
status = "ok" if available else "disabled" if not enabled else "unavailable"
|
|
63
70
|
return {
|
|
@@ -65,7 +72,8 @@ def mini_brain_contract(
|
|
|
65
72
|
"status": status,
|
|
66
73
|
"enabled": enabled,
|
|
67
74
|
"available": available,
|
|
68
|
-
"configured":
|
|
75
|
+
"configured": available,
|
|
76
|
+
"embedded_configured": provider == DEFAULT_PROVIDER,
|
|
69
77
|
"provider": provider,
|
|
70
78
|
"runtime": provider,
|
|
71
79
|
"hf_model": hf_model,
|
|
@@ -76,6 +84,7 @@ def mini_brain_contract(
|
|
|
76
84
|
"limits": dict_value(stored.get("limits"), DEFAULT_LIMITS),
|
|
77
85
|
"guardrails": list_value(stored.get("guardrails"), DEFAULT_GUARDRAILS),
|
|
78
86
|
"stored_secret": False,
|
|
87
|
+
"embedded": embedded,
|
|
79
88
|
"ollama": {
|
|
80
89
|
"status": ollama_payload.get("status"),
|
|
81
90
|
"daemon": (ollama_payload.get("daemon") or {}).get("status")
|
|
@@ -87,6 +96,7 @@ def mini_brain_contract(
|
|
|
87
96
|
"status": ollama_backend.get("status"),
|
|
88
97
|
"model": ollama_backend.get("model"),
|
|
89
98
|
"base_url": ollama_backend.get("base_url"),
|
|
99
|
+
"configured": ollama_configured,
|
|
90
100
|
},
|
|
91
101
|
}
|
|
92
102
|
|
|
@@ -98,6 +108,7 @@ def setup_mini_brain(
|
|
|
98
108
|
set_default: bool = False,
|
|
99
109
|
model: str = DEFAULT_OLLAMA_MODEL,
|
|
100
110
|
) -> dict[str, Any]:
|
|
111
|
+
embedded = embedded_mini_brain_status()
|
|
101
112
|
if dry_run or not yes:
|
|
102
113
|
status = "planned" if dry_run else "needs-confirmation"
|
|
103
114
|
needs_confirmation = not dry_run and not yes
|
|
@@ -110,53 +121,46 @@ def setup_mini_brain(
|
|
|
110
121
|
"yes": yes,
|
|
111
122
|
"stored_secret": False,
|
|
112
123
|
"mini_brain": planned_contract(model=model),
|
|
113
|
-
"
|
|
124
|
+
"embedded": embedded,
|
|
125
|
+
"embedded_install": setup_embedded_mini_brain(dry_run=True, yes=False),
|
|
126
|
+
"ollama_setup": {
|
|
127
|
+
"status": "skipped",
|
|
128
|
+
"ok": True,
|
|
129
|
+
"provider": "ollama",
|
|
130
|
+
"model": model,
|
|
131
|
+
"message": "Ollama is optional; use `agent local-llm install` to add local worker models.",
|
|
132
|
+
},
|
|
114
133
|
"next_steps": ["agent setup mini-brain --yes"],
|
|
115
|
-
"message": "Use --yes to
|
|
134
|
+
"message": "Use --yes to download and enable the embedded Qwen2.5-0.5B mini-brain.",
|
|
116
135
|
}
|
|
117
136
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
if
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
toolchain_install = install_toolchain(None, "ollama", dry_run=False, yes=True)
|
|
124
|
-
if toolchain_install.get("status") == "installed":
|
|
125
|
-
pull = ollama_pull(model, yes=True, dry_run=False)
|
|
126
|
-
if not pull.get("ok"):
|
|
127
|
-
payload = {
|
|
137
|
+
embedded_install = setup_embedded_mini_brain(dry_run=False, yes=True)
|
|
138
|
+
embedded = embedded_mini_brain_status()
|
|
139
|
+
if embedded_install.get("ok") is not True:
|
|
140
|
+
return {
|
|
128
141
|
"kind": "mini-brain-setup",
|
|
129
142
|
"status": "failed",
|
|
130
143
|
"ok": False,
|
|
131
|
-
"exit_code":
|
|
144
|
+
"exit_code": embedded_install.get("exit_code", 1),
|
|
132
145
|
"dry_run": False,
|
|
133
146
|
"yes": True,
|
|
134
147
|
"stored_secret": False,
|
|
135
|
-
"mini_brain":
|
|
136
|
-
"
|
|
137
|
-
"
|
|
138
|
-
"
|
|
148
|
+
"mini_brain": mini_brain_contract(),
|
|
149
|
+
"embedded": embedded,
|
|
150
|
+
"embedded_install": embedded_install,
|
|
151
|
+
"ollama_setup": {
|
|
152
|
+
"status": "skipped",
|
|
153
|
+
"ok": True,
|
|
154
|
+
"provider": "ollama",
|
|
155
|
+
"model": model,
|
|
156
|
+
"message": "Ollama remains optional for additional local worker models.",
|
|
157
|
+
},
|
|
158
|
+
"message": "Embedded mini-brain setup failed before the backend could be enabled.",
|
|
139
159
|
}
|
|
140
|
-
|
|
141
|
-
payload["toolchain_install"] = toolchain_install
|
|
142
|
-
payload["next_steps"] = [
|
|
143
|
-
"Review `agent toolchain doctor ollama`.",
|
|
144
|
-
"Run `agent toolchain install ollama --yes` if you approve external installation.",
|
|
145
|
-
"Then run `agent setup mini-brain --yes` again.",
|
|
146
|
-
]
|
|
147
|
-
return payload
|
|
148
|
-
|
|
149
|
-
existing_config = load_config()
|
|
150
|
-
existing_ollama = (
|
|
151
|
-
existing_config.get("llm", {}).get("backends", {}).get(DEFAULT_PROVIDER)
|
|
152
|
-
if isinstance(existing_config.get("llm"), dict)
|
|
153
|
-
else {}
|
|
154
|
-
)
|
|
155
|
-
existing_base_url = existing_ollama.get("base_url") if isinstance(existing_ollama, dict) else None
|
|
160
|
+
|
|
156
161
|
configured = configure_backend(
|
|
157
162
|
DEFAULT_PROVIDER,
|
|
158
|
-
|
|
159
|
-
model=model,
|
|
163
|
+
model=DEFAULT_HF_MODEL,
|
|
160
164
|
set_default=set_default,
|
|
161
165
|
)
|
|
162
166
|
config = load_config()
|
|
@@ -172,8 +176,15 @@ def setup_mini_brain(
|
|
|
172
176
|
"stored_secret": False,
|
|
173
177
|
"config_path": str(written_path),
|
|
174
178
|
"mini_brain": contract,
|
|
175
|
-
"
|
|
176
|
-
"
|
|
179
|
+
"embedded": embedded,
|
|
180
|
+
"embedded_install": embedded_install,
|
|
181
|
+
"ollama_setup": {
|
|
182
|
+
"status": "skipped",
|
|
183
|
+
"ok": True,
|
|
184
|
+
"provider": "ollama",
|
|
185
|
+
"model": model,
|
|
186
|
+
"message": "Ollama remains optional for additional local worker models.",
|
|
187
|
+
},
|
|
177
188
|
"llm_configure": configured,
|
|
178
189
|
"next_steps": ["Use low-risk setup, wizard and summary prompts normally."],
|
|
179
190
|
}
|
|
@@ -196,6 +207,7 @@ def planned_contract(*, model: str = DEFAULT_OLLAMA_MODEL) -> dict[str, Any]:
|
|
|
196
207
|
"limits": dict(DEFAULT_LIMITS),
|
|
197
208
|
"guardrails": list(DEFAULT_GUARDRAILS),
|
|
198
209
|
"stored_secret": False,
|
|
210
|
+
"embedded": embedded_mini_brain_status(),
|
|
199
211
|
}
|
|
200
212
|
|
|
201
213
|
|
|
@@ -5,6 +5,7 @@ from __future__ import annotations
|
|
|
5
5
|
import re
|
|
6
6
|
from typing import Any
|
|
7
7
|
|
|
8
|
+
from cli.aikit.embedded_mini_brain import EMBEDDED_BACKEND_ID
|
|
8
9
|
from cli.aikit.llm import BACKENDS, doctor_backend, llm_preference, load_config
|
|
9
10
|
from cli.aikit.mini_brain import mini_brain_contract
|
|
10
11
|
from cli.aikit.ollama import ollama_status
|
|
@@ -14,6 +15,9 @@ from cli.aikit.write_policy import normalize_write_policy, write_policy_public_f
|
|
|
14
15
|
OPERATIONAL_PATTERN = re.compile(
|
|
15
16
|
r"(?i)\b(resum\w*|sumari\w*|classifi\w*|extra(?:i|ir|ia|cao|ção)\w*|normaliz\w*|compar\w*|logs?|rascunho|agrupe|agrupar)\b"
|
|
16
17
|
)
|
|
18
|
+
SIMPLE_CHAT_SETUP_PATTERN = re.compile(
|
|
19
|
+
r"(?i)\b(ol[aá]|oi|bom dia|boa tarde|boa noite|ajuda|help|comec(?:ar|o)|começ(?:ar|o)|setup|onboard|configur|instal|usar)\b"
|
|
20
|
+
)
|
|
17
21
|
HIGH_LEVEL_PATTERN = re.compile(
|
|
18
22
|
r"(?i)\b(arquitet|decid|aprovar|reprovar|especifica|requisit|implemente|codigo|c[oó]digo|documento|automac|deploy|seguran)\b"
|
|
19
23
|
)
|
|
@@ -48,7 +52,9 @@ def build_model_plan(
|
|
|
48
52
|
mini_brain = mini_brain_contract(config=config, ollama_payload=ollama, ollama_backend=ollama_backend)
|
|
49
53
|
local_available = mini_brain.get("available") is True
|
|
50
54
|
operational = bool(OPERATIONAL_PATTERN.search(prompt))
|
|
55
|
+
simple_chat_setup = bool(SIMPLE_CHAT_SETUP_PATTERN.search(prompt))
|
|
51
56
|
high_level = bool(HIGH_LEVEL_PATTERN.search(prompt))
|
|
57
|
+
local_provider = select_local_provider(ollama_payload=ollama, ollama_backend=ollama_backend, mini_brain=mini_brain)
|
|
52
58
|
policy = choose_model_strategy(
|
|
53
59
|
prompt,
|
|
54
60
|
route=route,
|
|
@@ -56,10 +62,12 @@ def build_model_plan(
|
|
|
56
62
|
specialist_tasks=specialist_tasks or [],
|
|
57
63
|
configuration_tasks=configuration_tasks or [],
|
|
58
64
|
operational=operational,
|
|
65
|
+
simple_chat_setup=simple_chat_setup,
|
|
59
66
|
high_level=high_level,
|
|
60
67
|
local_available=local_available,
|
|
61
68
|
)
|
|
62
69
|
use_local = policy["strategy"] == "mini-brain" and local_available
|
|
70
|
+
delegate_local = use_local and operational
|
|
63
71
|
return {
|
|
64
72
|
"kind": "model-plan",
|
|
65
73
|
"status": "planned",
|
|
@@ -73,22 +81,30 @@ def build_model_plan(
|
|
|
73
81
|
"max_llm_calls": policy["max_llm_calls"],
|
|
74
82
|
"intent": route.get("intent") if route else "llm",
|
|
75
83
|
"primary_coordinators": coordinator_order(preference),
|
|
76
|
-
"local_llm_role": "operational-worker",
|
|
84
|
+
"local_llm_role": "operational-worker" if operational else "bootstrap-coordinator",
|
|
77
85
|
"local_llm_available": local_available,
|
|
78
|
-
"local_llm_provider":
|
|
79
|
-
"local_llm_backend_configured": ollama_backend.get("
|
|
86
|
+
"local_llm_provider": local_provider,
|
|
87
|
+
"local_llm_backend_configured": ollama_backend.get("configured") is True if local_provider == "ollama" else True,
|
|
80
88
|
"local_llm_runtime": {
|
|
81
|
-
"
|
|
89
|
+
"provider": local_provider,
|
|
90
|
+
"binary_status": ollama.get("status") if local_provider == "ollama" else "embedded",
|
|
82
91
|
"backend_status": ollama_backend.get("status"),
|
|
83
|
-
"model": mini_brain.get("ollama_model") or ollama_backend.get("model"),
|
|
84
|
-
"base_url": ollama_backend.get("base_url"),
|
|
92
|
+
"model": (mini_brain.get("ollama_model") or ollama_backend.get("model")) if local_provider == "ollama" else mini_brain.get("hf_model"),
|
|
93
|
+
"base_url": ollama_backend.get("base_url") if local_provider == "ollama" else None,
|
|
94
|
+
},
|
|
95
|
+
"optional_local_providers": {
|
|
96
|
+
"ollama": {
|
|
97
|
+
"status": ollama.get("status"),
|
|
98
|
+
"backend_status": ollama_backend.get("status"),
|
|
99
|
+
"model_count": ollama.get("model_count"),
|
|
100
|
+
}
|
|
85
101
|
},
|
|
86
102
|
"mini_brain": mini_brain,
|
|
87
103
|
"local_llm_recommended": operational,
|
|
88
104
|
"local_llm_selected": use_local,
|
|
89
105
|
"delegation": {
|
|
90
|
-
"allowed": policy["strategy"] == "mini-brain",
|
|
91
|
-
"selected":
|
|
106
|
+
"allowed": policy["strategy"] == "mini-brain" and operational,
|
|
107
|
+
"selected": delegate_local,
|
|
92
108
|
"reason": local_reason(
|
|
93
109
|
operational=operational,
|
|
94
110
|
local_available=local_available,
|
|
@@ -110,6 +126,7 @@ def choose_model_strategy(
|
|
|
110
126
|
specialist_tasks: list[dict[str, Any]],
|
|
111
127
|
configuration_tasks: list[dict[str, Any]],
|
|
112
128
|
operational: bool,
|
|
129
|
+
simple_chat_setup: bool,
|
|
113
130
|
high_level: bool,
|
|
114
131
|
local_available: bool,
|
|
115
132
|
) -> dict[str, Any]:
|
|
@@ -154,7 +171,7 @@ def choose_model_strategy(
|
|
|
154
171
|
max_llm_calls=0,
|
|
155
172
|
matrix="Conhecida + estruturada + baixo risco -> automacao",
|
|
156
173
|
)
|
|
157
|
-
if operational and not high_level:
|
|
174
|
+
if (operational or simple_chat_setup) and not high_level:
|
|
158
175
|
return policy(
|
|
159
176
|
"mini-brain" if local_available else "external-llm",
|
|
160
177
|
"The prompt is operational and low-risk; local mini-brain is preferred when available.",
|
|
@@ -241,3 +258,19 @@ def local_reason(*, operational: bool, local_available: bool, high_level: bool,
|
|
|
241
258
|
if operational and not local_available:
|
|
242
259
|
return "Task is operational, but the local mini-brain is not enabled or available; coordinator/API fallback should execute."
|
|
243
260
|
return "Task requires coordinator-level reasoning or review."
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def select_local_provider(
|
|
264
|
+
*,
|
|
265
|
+
ollama_payload: dict[str, Any],
|
|
266
|
+
ollama_backend: dict[str, Any],
|
|
267
|
+
mini_brain: dict[str, Any],
|
|
268
|
+
) -> str:
|
|
269
|
+
ollama_ready = (
|
|
270
|
+
ollama_payload.get("status") == "ok"
|
|
271
|
+
and ollama_backend.get("status") == "ok"
|
|
272
|
+
and (ollama_backend.get("configured") is True or int(ollama_payload.get("model_count") or 0) > 0)
|
|
273
|
+
)
|
|
274
|
+
if ollama_ready:
|
|
275
|
+
return "ollama"
|
|
276
|
+
return str(mini_brain.get("provider") or EMBEDDED_BACKEND_ID)
|
|
@@ -138,6 +138,42 @@ def local_capabilities_help_response(prompt: str, *, name: str) -> dict[str, Any
|
|
|
138
138
|
}
|
|
139
139
|
|
|
140
140
|
|
|
141
|
+
def embedded_mini_brain_install_response(prompt: str, *, name: str, model_plan: dict[str, Any]) -> dict[str, Any]:
|
|
142
|
+
embedded = (
|
|
143
|
+
((model_plan.get("mini_brain") or {}).get("embedded") or {})
|
|
144
|
+
if isinstance(model_plan.get("mini_brain"), dict)
|
|
145
|
+
else {}
|
|
146
|
+
)
|
|
147
|
+
status = embedded.get("status") or "not-installed"
|
|
148
|
+
response = (
|
|
149
|
+
f"Eu sou {name}. Consigo orientar o setup inicial localmente, mas o mini-cerebro local ainda nao esta instalado "
|
|
150
|
+
f"(status: {status}). Para habilitar conversa local sem Claude, Codex, Ollama ou API externa, execute "
|
|
151
|
+
"`agent setup mini-brain --yes`. Sem esse download, posso continuar com onboarding, memoria, wizards e "
|
|
152
|
+
"capabilities deterministicas."
|
|
153
|
+
)
|
|
154
|
+
return {
|
|
155
|
+
"kind": "agent",
|
|
156
|
+
"status": "needs-setup",
|
|
157
|
+
"ok": False,
|
|
158
|
+
"requires_llm": False,
|
|
159
|
+
"prompt_received": True,
|
|
160
|
+
"prompt_length": len(prompt),
|
|
161
|
+
"mode": "embedded-mini-brain-not-installed",
|
|
162
|
+
"identity": {"name": name, "source": "local"},
|
|
163
|
+
"llm_backend": "embedded-mini-brain",
|
|
164
|
+
"mini_brain": model_plan.get("mini_brain"),
|
|
165
|
+
"response": response,
|
|
166
|
+
"message": "Embedded mini-brain is not installed yet.",
|
|
167
|
+
"next_steps": [
|
|
168
|
+
"agent setup mini-brain --dry-run",
|
|
169
|
+
"agent setup mini-brain --yes",
|
|
170
|
+
"agent llm configure claude-code --set-default",
|
|
171
|
+
"agent llm configure codex-cli --set-default",
|
|
172
|
+
],
|
|
173
|
+
"exit_code": 2,
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
|
|
141
177
|
def agent_requires_llm(args: argparse.Namespace) -> dict[str, Any]:
|
|
142
178
|
prompt = " ".join(args.prompt).strip()
|
|
143
179
|
return run_agent_prompt_request(
|
|
@@ -237,9 +273,15 @@ def run_agent_prompt_request(request: AgentPromptRequest) -> dict[str, Any]:
|
|
|
237
273
|
)
|
|
238
274
|
local_llm_execution = maybe_delegate_local_llm(prompt, model_plan)
|
|
239
275
|
coordinator_prompt = enrich_prompt_with_local_result(contextual_prompt, local_llm_execution)
|
|
276
|
+
requested_backend = request.llm
|
|
277
|
+
if should_prompt_for_embedded_install(model_plan, requested_backend=request.llm):
|
|
278
|
+
result = embedded_mini_brain_install_response(prompt, name=name, model_plan=model_plan)
|
|
279
|
+
return finalize_agent_session(result, session, prompt, backend="embedded-mini-brain")
|
|
280
|
+
if should_use_embedded_coordinator(model_plan, requested_backend=request.llm):
|
|
281
|
+
requested_backend = "embedded-mini-brain"
|
|
240
282
|
result = invoke_agent_prompt(
|
|
241
283
|
coordinator_prompt,
|
|
242
|
-
|
|
284
|
+
requested_backend,
|
|
243
285
|
public_name=name,
|
|
244
286
|
allow_fallback=not request.no_llm_fallback,
|
|
245
287
|
)
|
|
@@ -278,6 +320,32 @@ def run_agent_prompt_request(request: AgentPromptRequest) -> dict[str, Any]:
|
|
|
278
320
|
return finalize_agent_session(result, session, prompt, backend=result.get("llm_backend") or request.llm)
|
|
279
321
|
|
|
280
322
|
|
|
323
|
+
def should_use_embedded_coordinator(model_plan: dict[str, Any], *, requested_backend: str | None) -> bool:
|
|
324
|
+
if requested_backend:
|
|
325
|
+
return False
|
|
326
|
+
return (
|
|
327
|
+
model_plan.get("strategy") == "mini-brain"
|
|
328
|
+
and model_plan.get("local_llm_provider") == "embedded-mini-brain"
|
|
329
|
+
and model_plan.get("risk") == "low"
|
|
330
|
+
)
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
def should_prompt_for_embedded_install(model_plan: dict[str, Any], *, requested_backend: str | None) -> bool:
|
|
334
|
+
if requested_backend:
|
|
335
|
+
return False
|
|
336
|
+
embedded = (
|
|
337
|
+
((model_plan.get("mini_brain") or {}).get("embedded") or {})
|
|
338
|
+
if isinstance(model_plan.get("mini_brain"), dict)
|
|
339
|
+
else {}
|
|
340
|
+
)
|
|
341
|
+
return (
|
|
342
|
+
model_plan.get("strategy") in {"mini-brain", "external-llm"}
|
|
343
|
+
and model_plan.get("local_llm_provider") == "embedded-mini-brain"
|
|
344
|
+
and embedded.get("available") is not True
|
|
345
|
+
and model_plan.get("fallback") == "configure-local-mini-brain-or-use-external-llm"
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
|
|
281
349
|
def mark_review_task_needs_review(execution_plan: dict[str, Any], review_result: dict[str, Any]) -> dict[str, Any]:
|
|
282
350
|
task = dict(execution_plan.get("review_task") or {})
|
|
283
351
|
if task:
|
|
@@ -106,16 +106,16 @@ def onboarding_plan(root: Path, mode: str) -> dict[str, Any]:
|
|
|
106
106
|
),
|
|
107
107
|
plan_step(
|
|
108
108
|
"coordinator-llm",
|
|
109
|
-
"Registrar Claude Code, Codex CLI ou API como coordenador/planejador/revisor.",
|
|
109
|
+
"Registrar Claude Code, Codex CLI ou API como coordenador/planejador/revisor opcional para tarefas de alto nivel.",
|
|
110
110
|
"agent llm list",
|
|
111
111
|
write_policy="local_config_write",
|
|
112
112
|
),
|
|
113
113
|
plan_step(
|
|
114
114
|
"mini-brain",
|
|
115
|
-
"
|
|
115
|
+
"Validar o mini cerebro embarcado Qwen2.5-0.5B para conversa simples, setup e tarefas operacionais leves.",
|
|
116
116
|
"agent setup mini-brain --dry-run",
|
|
117
117
|
write_policy="local_config_write",
|
|
118
|
-
model="
|
|
118
|
+
model="Qwen/Qwen2.5-0.5B-Instruct",
|
|
119
119
|
),
|
|
120
120
|
plan_step(
|
|
121
121
|
"sessions-and-memory",
|
|
@@ -24,13 +24,13 @@ def build_review_gate(
|
|
|
24
24
|
if route:
|
|
25
25
|
required = True
|
|
26
26
|
reasons.append("deterministic-route")
|
|
27
|
-
if model_plan and (model_plan
|
|
27
|
+
if model_plan and local_worker_review_required(model_plan):
|
|
28
28
|
required = True
|
|
29
29
|
reasons.append("local-llm")
|
|
30
30
|
if model_plan and model_plan.get("strategy") == "human":
|
|
31
31
|
required = True
|
|
32
32
|
reasons.append("human-strategy")
|
|
33
|
-
if model_plan and model_plan
|
|
33
|
+
if model_plan and mini_brain_review_required(model_plan):
|
|
34
34
|
required = True
|
|
35
35
|
reasons.append("mini-brain")
|
|
36
36
|
if model_plan and model_plan.get("risk") == "high":
|
|
@@ -50,6 +50,18 @@ def build_review_gate(
|
|
|
50
50
|
}
|
|
51
51
|
|
|
52
52
|
|
|
53
|
+
def local_worker_review_required(model_plan: dict[str, Any]) -> bool:
|
|
54
|
+
if not (model_plan.get("local_llm_selected") or model_plan.get("local_llm_recommended")):
|
|
55
|
+
return False
|
|
56
|
+
return model_plan.get("local_llm_provider") == "ollama" or model_plan.get("risk") != "low"
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def mini_brain_review_required(model_plan: dict[str, Any]) -> bool:
|
|
60
|
+
if model_plan.get("strategy") != "mini-brain":
|
|
61
|
+
return False
|
|
62
|
+
return model_plan.get("risk") != "low" or model_plan.get("local_llm_provider") == "ollama"
|
|
63
|
+
|
|
64
|
+
|
|
53
65
|
def mark_reviewed(payload: dict[str, Any], *, reviewer: str | None = None, notes: str | None = None) -> dict[str, Any]:
|
|
54
66
|
gate = dict(payload)
|
|
55
67
|
if gate.get("required"):
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
{
|
|
2
|
+
"schema_version": "agent-devkit.embedded-model/v1",
|
|
3
|
+
"model_id": "Qwen/Qwen2.5-0.5B-Instruct",
|
|
4
|
+
"model_name": "qwen2.5-0.5b-instruct",
|
|
5
|
+
"artifact": {
|
|
6
|
+
"filename": "qwen2.5-0.5b-instruct-q2_k.gguf",
|
|
7
|
+
"format": "gguf",
|
|
8
|
+
"quantization": "q2_k",
|
|
9
|
+
"size_bytes": 415182688,
|
|
10
|
+
"sha256": "9ee36184e616dfc76df4f5dd66f908dbde6979524ae36e6cefb67f532f798cb8",
|
|
11
|
+
"source": "https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct-GGUF/resolve/main/qwen2.5-0.5b-instruct-q2_k.gguf"
|
|
12
|
+
},
|
|
13
|
+
"provider": "embedded-mini-brain",
|
|
14
|
+
"license": "apache-2.0",
|
|
15
|
+
"runtime": "llama-cpp-python",
|
|
16
|
+
"purpose": [
|
|
17
|
+
"setup_help",
|
|
18
|
+
"wizard_conversation",
|
|
19
|
+
"intent_classification",
|
|
20
|
+
"command_explanation",
|
|
21
|
+
"short_error_summary"
|
|
22
|
+
],
|
|
23
|
+
"guardrails": [
|
|
24
|
+
"no_secrets",
|
|
25
|
+
"low_risk_only",
|
|
26
|
+
"no_external_writes",
|
|
27
|
+
"coordinator_review_required"
|
|
28
|
+
],
|
|
29
|
+
"notes": "This manifest declares the embedded mini-brain artifact downloaded on demand into .agent-devkit/models. Ollama remains an optional local worker pool for additional models."
|
|
30
|
+
}
|