maestro-bundle 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/README.md +91 -0
  2. package/package.json +25 -0
  3. package/src/cli.mjs +212 -0
  4. package/templates/bundle-ai-agents/.spec/constitution.md +33 -0
  5. package/templates/bundle-ai-agents/AGENTS.md +140 -0
  6. package/templates/bundle-ai-agents/skills/agent-orchestration/SKILL.md +132 -0
  7. package/templates/bundle-ai-agents/skills/api-design/SKILL.md +100 -0
  8. package/templates/bundle-ai-agents/skills/clean-architecture/SKILL.md +99 -0
  9. package/templates/bundle-ai-agents/skills/context-engineering/SKILL.md +98 -0
  10. package/templates/bundle-ai-agents/skills/database-modeling/SKILL.md +59 -0
  11. package/templates/bundle-ai-agents/skills/docker-containerization/SKILL.md +114 -0
  12. package/templates/bundle-ai-agents/skills/eval-testing/SKILL.md +115 -0
  13. package/templates/bundle-ai-agents/skills/memory-management/SKILL.md +106 -0
  14. package/templates/bundle-ai-agents/skills/prompt-engineering/SKILL.md +66 -0
  15. package/templates/bundle-ai-agents/skills/rag-pipeline/SKILL.md +128 -0
  16. package/templates/bundle-ai-agents/skills/testing-strategy/SKILL.md +95 -0
  17. package/templates/bundle-base/AGENTS.md +118 -0
  18. package/templates/bundle-base/skills/branch-strategy/SKILL.md +42 -0
  19. package/templates/bundle-base/skills/code-review/SKILL.md +54 -0
  20. package/templates/bundle-base/skills/commit-pattern/SKILL.md +58 -0
  21. package/templates/bundle-data-pipeline/.spec/constitution.md +32 -0
  22. package/templates/bundle-data-pipeline/AGENTS.md +115 -0
  23. package/templates/bundle-data-pipeline/skills/data-preprocessing/SKILL.md +75 -0
  24. package/templates/bundle-data-pipeline/skills/docker-containerization/SKILL.md +114 -0
  25. package/templates/bundle-data-pipeline/skills/feature-engineering/SKILL.md +76 -0
  26. package/templates/bundle-data-pipeline/skills/mlops-pipeline/SKILL.md +77 -0
  27. package/templates/bundle-data-pipeline/skills/model-training/SKILL.md +68 -0
  28. package/templates/bundle-data-pipeline/skills/rag-pipeline/SKILL.md +128 -0
  29. package/templates/bundle-frontend-spa/.spec/constitution.md +32 -0
  30. package/templates/bundle-frontend-spa/AGENTS.md +107 -0
  31. package/templates/bundle-frontend-spa/skills/authentication/SKILL.md +90 -0
  32. package/templates/bundle-frontend-spa/skills/component-design/SKILL.md +115 -0
  33. package/templates/bundle-frontend-spa/skills/e2e-testing/SKILL.md +101 -0
  34. package/templates/bundle-frontend-spa/skills/integration-api/SKILL.md +95 -0
  35. package/templates/bundle-frontend-spa/skills/react-patterns/SKILL.md +130 -0
  36. package/templates/bundle-frontend-spa/skills/responsive-layout/SKILL.md +65 -0
  37. package/templates/bundle-frontend-spa/skills/state-management/SKILL.md +86 -0
  38. package/templates/bundle-jhipster-microservices/.spec/constitution.md +37 -0
  39. package/templates/bundle-jhipster-microservices/AGENTS.md +307 -0
  40. package/templates/bundle-jhipster-microservices/skills/ci-cd-pipeline/SKILL.md +112 -0
  41. package/templates/bundle-jhipster-microservices/skills/clean-architecture/SKILL.md +99 -0
  42. package/templates/bundle-jhipster-microservices/skills/ddd-tactical/SKILL.md +138 -0
  43. package/templates/bundle-jhipster-microservices/skills/jhipster-angular/SKILL.md +97 -0
  44. package/templates/bundle-jhipster-microservices/skills/jhipster-docker-k8s/SKILL.md +183 -0
  45. package/templates/bundle-jhipster-microservices/skills/jhipster-entities/SKILL.md +87 -0
  46. package/templates/bundle-jhipster-microservices/skills/jhipster-gateway/SKILL.md +96 -0
  47. package/templates/bundle-jhipster-microservices/skills/jhipster-kafka/SKILL.md +145 -0
  48. package/templates/bundle-jhipster-microservices/skills/jhipster-registry/SKILL.md +83 -0
  49. package/templates/bundle-jhipster-microservices/skills/jhipster-service/SKILL.md +131 -0
  50. package/templates/bundle-jhipster-microservices/skills/testing-strategy/SKILL.md +95 -0
  51. package/templates/bundle-jhipster-monorepo/.spec/constitution.md +32 -0
  52. package/templates/bundle-jhipster-monorepo/AGENTS.md +227 -0
  53. package/templates/bundle-jhipster-monorepo/skills/clean-architecture/SKILL.md +99 -0
  54. package/templates/bundle-jhipster-monorepo/skills/ddd-tactical/SKILL.md +138 -0
  55. package/templates/bundle-jhipster-monorepo/skills/jhipster-angular/SKILL.md +166 -0
  56. package/templates/bundle-jhipster-monorepo/skills/jhipster-entities/SKILL.md +141 -0
  57. package/templates/bundle-jhipster-monorepo/skills/jhipster-liquibase/SKILL.md +95 -0
  58. package/templates/bundle-jhipster-monorepo/skills/jhipster-security/SKILL.md +89 -0
  59. package/templates/bundle-jhipster-monorepo/skills/jhipster-spring/SKILL.md +155 -0
  60. package/templates/bundle-jhipster-monorepo/skills/testing-strategy/SKILL.md +95 -0
@@ -0,0 +1,100 @@
1
+ ---
2
+ name: api-design
3
+ description: Criar APIs REST com FastAPI ou Spring Boot seguindo padrões de versionamento, paginação, error handling e documentação. Use quando for criar endpoints, definir contratos de API, ou estruturar controllers.
4
+ ---
5
+
6
+ # API Design
7
+
8
+ ## Padrões REST
9
+
10
+ | Operação | Método | Path | Status | Body |
11
+ |---|---|---|---|---|
12
+ | Listar | GET | `/api/v1/demands` | 200 | Lista paginada |
13
+ | Buscar | GET | `/api/v1/demands/{id}` | 200 | Objeto |
14
+ | Criar | POST | `/api/v1/demands` | 201 | Objeto criado |
15
+ | Atualizar | PUT | `/api/v1/demands/{id}` | 200 | Objeto atualizado |
16
+ | Patch | PATCH | `/api/v1/demands/{id}` | 200 | Objeto atualizado |
17
+ | Deletar | DELETE | `/api/v1/demands/{id}` | 204 | Vazio |
18
+
19
+ ## FastAPI — Template
20
+
21
+ ```python
22
+ from fastapi import APIRouter, Depends, HTTPException, Query
23
+
24
+ router = APIRouter(prefix="/api/v1/demands", tags=["demands"])
25
+
26
+ @router.get("/", response_model=PaginatedResponse[DemandResponse])
27
+ async def list_demands(
28
+ page: int = Query(1, ge=1),
29
+ size: int = Query(20, ge=1, le=100),
30
+ status: str | None = None,
31
+ use_case: ListDemands = Depends()
32
+ ):
33
+ result = use_case.execute(page=page, size=size, status=status)
34
+ return PaginatedResponse(
35
+ items=result.items,
36
+ total=result.total,
37
+ page=page,
38
+ size=size
39
+ )
40
+
41
+ @router.post("/", response_model=DemandResponse, status_code=201)
42
+ async def create_demand(
43
+ body: CreateDemandRequest,
44
+ use_case: CreateDemand = Depends()
45
+ ):
46
+ return use_case.execute(body)
47
+
48
+ @router.get("/{demand_id}", response_model=DemandResponse)
49
+ async def get_demand(
50
+ demand_id: str,
51
+ use_case: GetDemand = Depends()
52
+ ):
53
+ result = use_case.execute(demand_id)
54
+ if not result:
55
+ raise HTTPException(status_code=404, detail="Demand not found")
56
+ return result
57
+ ```
58
+
59
+ ## Error Handling padronizado
60
+
61
+ ```python
62
+ class ErrorResponse(BaseModel):
63
+ error: str
64
+ message: str
65
+ details: list[str] | None = None
66
+
67
+ @app.exception_handler(DomainException)
68
+ async def domain_exception_handler(request, exc: DomainException):
69
+ return JSONResponse(
70
+ status_code=422,
71
+ content=ErrorResponse(error="domain_error", message=str(exc)).dict()
72
+ )
73
+
74
+ @app.exception_handler(NotFoundException)
75
+ async def not_found_handler(request, exc: NotFoundException):
76
+ return JSONResponse(
77
+ status_code=404,
78
+ content=ErrorResponse(error="not_found", message=str(exc)).dict()
79
+ )
80
+ ```
81
+
82
+ ## Paginação
83
+
84
+ ```python
85
+ class PaginatedResponse(BaseModel, Generic[T]):
86
+ items: list[T]
87
+ total: int
88
+ page: int
89
+ size: int
90
+ pages: int
91
+
92
+ @model_validator(mode="before")
93
+ def calc_pages(cls, values):
94
+ values["pages"] = ceil(values["total"] / values["size"])
95
+ return values
96
+ ```
97
+
98
+ ## Versionamento
99
+
100
+ Usar path-based: `/api/v1/`, `/api/v2/`. Manter v1 funcionando até todos os clientes migrarem.
@@ -0,0 +1,99 @@
1
+ ---
2
+ name: clean-architecture
3
+ description: Implementar Clean Architecture com camadas de domínio, aplicação e infraestrutura. Use quando for criar módulos, organizar código em camadas, separar regras de negócio de infraestrutura, ou estruturar um projeto novo.
4
+ ---
5
+
6
+ # Clean Architecture
7
+
8
+ ## Camadas
9
+
10
+ ```
11
+ ┌──────────────────────────────┐
12
+ │ API / CLI │ ← Controllers, Routers
13
+ ├──────────────────────────────┤
14
+ │ APPLICATION │ ← Use Cases, DTOs
15
+ ├──────────────────────────────┤
16
+ │ DOMAIN │ ← Entities, VOs, Events, Repos (interface)
17
+ ├──────────────────────────────┤
18
+ │ INFRASTRUCTURE │ ← DB, HTTP clients, Frameworks
19
+ └──────────────────────────────┘
20
+
21
+ Dependency Rule: setas apontam para DENTRO (infra → domain)
22
+ Domain NUNCA importa de infrastructure
23
+ ```
24
+
25
+ ## Domain Layer
26
+
27
+ ```python
28
+ # domain/entities/demand.py
29
+ class Demand:
30
+ def __init__(self, id: DemandId, description: str):
31
+ self._id = id
32
+ self._description = description
33
+ self._status = DemandStatus.CREATED
34
+ self._events: list[DomainEvent] = []
35
+
36
+ def decompose(self, planner: TaskPlanner) -> list[Task]:
37
+ if self._status != DemandStatus.CREATED:
38
+ raise DemandAlreadyDecomposedException(self._id)
39
+ tasks = planner.plan(self._description)
40
+ self._status = DemandStatus.PLANNED
41
+ self._events.append(DemandDecomposed(self._id, [t.id for t in tasks]))
42
+ return tasks
43
+
44
+ @property
45
+ def pending_events(self) -> list[DomainEvent]:
46
+ return list(self._events)
47
+
48
+ # domain/repositories/demand_repository.py (PORT - apenas interface)
49
+ class DemandRepository(ABC):
50
+ @abstractmethod
51
+ def find_by_id(self, id: DemandId) -> Demand: ...
52
+ @abstractmethod
53
+ def save(self, demand: Demand) -> None: ...
54
+ ```
55
+
56
+ ## Application Layer
57
+
58
+ ```python
59
+ # application/use_cases/decompose_demand.py
60
+ class DecomposeDemand:
61
+ def __init__(self, repo: DemandRepository, planner: TaskPlanner, event_bus: EventBus):
62
+ self._repo = repo
63
+ self._planner = planner
64
+ self._event_bus = event_bus
65
+
66
+ def execute(self, demand_id: str) -> DecomposeDemandOutput:
67
+ demand = self._repo.find_by_id(DemandId(demand_id))
68
+ tasks = demand.decompose(self._planner)
69
+ self._repo.save(demand)
70
+ for event in demand.pending_events:
71
+ self._event_bus.publish(event)
72
+ return DecomposeDemandOutput(tasks=[TaskDTO.from_entity(t) for t in tasks])
73
+ ```
74
+
75
+ ## Infrastructure Layer
76
+
77
+ ```python
78
+ # infrastructure/persistence/pg_demand_repository.py (ADAPTER)
79
+ class PgDemandRepository(DemandRepository):
80
+ def __init__(self, session: Session):
81
+ self._session = session
82
+
83
+ def find_by_id(self, id: DemandId) -> Demand:
84
+ model = self._session.query(DemandModel).get(str(id))
85
+ if not model:
86
+ raise DemandNotFoundException(id)
87
+ return self._to_entity(model)
88
+
89
+ def save(self, demand: Demand) -> None:
90
+ model = self._to_model(demand)
91
+ self._session.merge(model)
92
+ self._session.commit()
93
+ ```
94
+
95
+ ## Regra de ouro
96
+
97
+ Use Case orquestra → Entity contém regra → Repository persiste
98
+
99
+ Nunca colocar regra de negócio no Controller, Repository ou "Service" genérico.
@@ -0,0 +1,98 @@
1
+ ---
2
+ name: context-engineering
3
+ description: Implementar as 4 estratégias de context engineering (Write, Select, Compress, Isolate) para agentes. Use quando precisar gerenciar a janela de contexto, otimizar o que o agente recebe, ou reduzir custos de tokens.
4
+ ---
5
+
6
+ # Context Engineering
7
+
8
+ As 4 estratégias conforme Anthropic:
9
+
10
+ ## 1. Write Context — Memória Persistente
11
+
12
+ O que o agente "sabe" antes de começar.
13
+
14
+ ```
15
+ CLAUDE.md → Padrões do projeto, arquitetura, decisões
16
+ agents.md → Comportamento específico do agente
17
+ skills/SKILL.md → Capacidades on-demand
18
+ memory/ → Aprendizados de execuções anteriores
19
+ ```
20
+
21
+ **Regra:** CLAUDE.md deve ter no máximo 2000 tokens. Se precisar de mais, mover para skills que são carregadas on-demand.
22
+
23
+ ## 2. Select Context — Retrieval Inteligente
24
+
25
+ Injetar apenas o contexto relevante para a task atual.
26
+
27
+ ```python
28
+ def select_context(task: Task, retriever) -> str:
29
+ # Buscar skills relevantes para a task
30
+ relevant_skills = retriever.invoke(task.description)
31
+
32
+ # Buscar código relacionado no repo
33
+ related_code = code_search(task.description, worktree_path)
34
+
35
+ # Buscar decisões anteriores similares
36
+ past_decisions = memory_store.search(task.description, k=3)
37
+
38
+ return format_context(relevant_skills, related_code, past_decisions)
39
+ ```
40
+
41
+ **Regra:** Nunca injetar mais de 30% da janela de contexto com contexto selecionado. Deixar espaço para o agente raciocinar.
42
+
43
+ ## 3. Compress Context — Resumo Eficiente
44
+
45
+ Reduzir informação sem perder o essencial.
46
+
47
+ ```python
48
+ async def compress_code(code: str, max_tokens: int = 2000) -> str:
49
+ if count_tokens(code) <= max_tokens:
50
+ return code
51
+
52
+ summary = await llm.ainvoke(f"""
53
+ Resuma este código mantendo:
54
+ - Assinaturas de funções/classes
55
+ - Tipos de entrada/saída
56
+ - Lógica principal (sem detalhes de implementação)
57
+ - Imports relevantes
58
+
59
+ Código:
60
+ {code}
61
+ """)
62
+ return summary.content
63
+ ```
64
+
65
+ **Regra:** Comprimir apenas quando necessário. Código que o agente vai modificar deve estar completo, não comprimido.
66
+
67
+ ## 4. Isolate Context — Escopo por Agente
68
+
69
+ Cada agente vê apenas o que precisa.
70
+
71
+ ```python
72
+ agent_contexts = {
73
+ "frontend": {
74
+ "sees": ["src/features/", "src/shared/", "package.json"],
75
+ "not_sees": ["src/domain/", "infra/", "alembic/"],
76
+ "skills": ["react-patterns", "component-design"],
77
+ },
78
+ "backend": {
79
+ "sees": ["src/domain/", "src/application/", "src/api/"],
80
+ "not_sees": ["src/features/", "node_modules/"],
81
+ "skills": ["clean-architecture", "ddd-tactical"],
82
+ }
83
+ }
84
+ ```
85
+
86
+ **Regra:** Agentes nunca compartilham janela de contexto. Comunicação via mensagens estruturadas, não via contexto compartilhado.
87
+
88
+ ## Budget de contexto
89
+
90
+ Para um modelo com 200k tokens:
91
+
92
+ | Componente | % | Tokens |
93
+ |---|---|---|
94
+ | System prompt + CLAUDE.md | 5% | 10k |
95
+ | Skills carregadas | 10% | 20k |
96
+ | Código relevante (Select) | 25% | 50k |
97
+ | Histórico de conversa | 15% | 30k |
98
+ | **Espaço para raciocínio** | **45%** | **90k** |
@@ -0,0 +1,59 @@
1
+ ---
2
+ name: database-modeling
3
+ description: Modelar banco de dados PostgreSQL com migrations, índices, e pgvector. Use quando for criar tabelas, definir schema, criar migrations, ou otimizar queries.
4
+ ---
5
+
6
+ # Modelagem de Banco — PostgreSQL
7
+
8
+ ## Convenções
9
+
10
+ - Nomes de tabelas: `snake_case`, plural (`demands`, `tasks`, `agents`)
11
+ - PKs: `id UUID DEFAULT gen_random_uuid()`
12
+ - FKs: `<tabela_singular>_id` (ex: `demand_id`)
13
+ - Timestamps: `created_at`, `updated_at` com default `NOW()`
14
+ - Soft delete: `deleted_at TIMESTAMP NULL`
15
+
16
+ ## Migration com Alembic
17
+
18
+ ```python
19
+ # alembic/versions/001_create_demands.py
20
+ def upgrade():
21
+ op.create_table(
22
+ 'demands',
23
+ sa.Column('id', sa.UUID(), primary_key=True, server_default=sa.text('gen_random_uuid()')),
24
+ sa.Column('description', sa.Text(), nullable=False),
25
+ sa.Column('status', sa.VARCHAR(20), nullable=False, server_default='created'),
26
+ sa.Column('requester', sa.VARCHAR(100), nullable=False),
27
+ sa.Column('created_at', sa.TIMESTAMP(timezone=True), server_default=sa.text('NOW()')),
28
+ sa.Column('updated_at', sa.TIMESTAMP(timezone=True), server_default=sa.text('NOW()')),
29
+ )
30
+
31
+ def downgrade():
32
+ op.drop_table('demands')
33
+ ```
34
+
35
+ ## Índices
36
+
37
+ ```sql
38
+ -- Queries frequentes devem ter índice
39
+ CREATE INDEX idx_tasks_status ON tasks(status) WHERE status != 'completed';
40
+ CREATE INDEX idx_tasks_demand ON tasks(demand_id);
41
+ CREATE INDEX idx_tracking_events_demand_agent ON tracking_events(demand_id, agent_id, created_at DESC);
42
+
43
+ -- pgvector para busca semântica
44
+ CREATE INDEX idx_embeddings_vector ON bundle_embeddings
45
+ USING hnsw (embedding vector_cosine_ops) WITH (m = 16, ef_construction = 200);
46
+
47
+ -- Full-text search
48
+ ALTER TABLE bundles ADD COLUMN search_vector tsvector
49
+ GENERATED ALWAYS AS (to_tsvector('portuguese', name || ' ' || description)) STORED;
50
+ CREATE INDEX idx_bundles_search ON bundles USING GIN(search_vector);
51
+ ```
52
+
53
+ ## Anti-patterns a evitar
54
+
55
+ - Não usar CASCADE DELETE sem pensar nas consequências
56
+ - Não criar índice em toda coluna (custo de escrita)
57
+ - Não fazer SELECT * em produção
58
+ - Não ignorar EXPLAIN ANALYZE para queries lentas
59
+ - Não alterar schema sem migration
@@ -0,0 +1,114 @@
1
+ ---
2
+ name: docker-containerization
3
+ description: Criar Dockerfiles otimizados com multi-stage build, security hardening e docker-compose para desenvolvimento. Use quando for containerizar aplicações, criar Dockerfiles, ou configurar ambiente de dev.
4
+ ---
5
+
6
+ # Docker Containerization
7
+
8
+ ## Dockerfile Python — Multi-stage
9
+
10
+ ```dockerfile
11
+ # === Build stage ===
12
+ FROM python:3.11-slim AS builder
13
+ WORKDIR /app
14
+ RUN apt-get update && apt-get install -y --no-install-recommends gcc && rm -rf /var/lib/apt/lists/*
15
+ COPY requirements.txt .
16
+ RUN pip install --no-cache-dir --prefix=/install -r requirements.txt
17
+
18
+ # === Runtime stage ===
19
+ FROM python:3.11-slim
20
+ WORKDIR /app
21
+ RUN groupadd -r appuser && useradd -r -g appuser appuser
22
+ COPY --from=builder /install /usr/local
23
+ COPY src/ ./src/
24
+ USER appuser
25
+ EXPOSE 8000
26
+ HEALTHCHECK --interval=30s --timeout=5s CMD curl -f http://localhost:8000/health || exit 1
27
+ CMD ["uvicorn", "src.main:app", "--host", "0.0.0.0", "--port", "8000"]
28
+ ```
29
+
30
+ ## Dockerfile React — Multi-stage
31
+
32
+ ```dockerfile
33
+ FROM node:20-slim AS builder
34
+ WORKDIR /app
35
+ COPY package*.json ./
36
+ RUN npm ci
37
+ COPY . .
38
+ RUN npm run build
39
+
40
+ FROM nginx:alpine
41
+ COPY --from=builder /app/dist /usr/share/nginx/html
42
+ COPY nginx.conf /etc/nginx/conf.d/default.conf
43
+ EXPOSE 80
44
+ ```
45
+
46
+ ## Docker Compose — Dev
47
+
48
+ ```yaml
49
+ # docker-compose.dev.yml
50
+ services:
51
+ api:
52
+ build:
53
+ context: .
54
+ dockerfile: docker/Dockerfile.api
55
+ ports:
56
+ - "8000:8000"
57
+ environment:
58
+ - DATABASE_URL=postgresql://maestro:maestro@postgres/maestro
59
+ - REDIS_URL=redis://redis:6379
60
+ volumes:
61
+ - ./src:/app/src # Hot reload
62
+ depends_on:
63
+ postgres:
64
+ condition: service_healthy
65
+
66
+ postgres:
67
+ image: pgvector/pgvector:pg16
68
+ environment:
69
+ POSTGRES_DB: maestro
70
+ POSTGRES_USER: maestro
71
+ POSTGRES_PASSWORD: maestro
72
+ ports:
73
+ - "5432:5432"
74
+ volumes:
75
+ - pgdata:/var/lib/postgresql/data
76
+ healthcheck:
77
+ test: ["CMD-SHELL", "pg_isready -U maestro"]
78
+ interval: 5s
79
+ timeout: 5s
80
+ retries: 5
81
+
82
+ redis:
83
+ image: redis:7-alpine
84
+ ports:
85
+ - "6379:6379"
86
+
87
+ minio:
88
+ image: minio/minio
89
+ command: server /data --console-address ":9001"
90
+ ports:
91
+ - "9000:9000"
92
+ - "9001:9001"
93
+ environment:
94
+ MINIO_ROOT_USER: minioadmin
95
+ MINIO_ROOT_PASSWORD: minioadmin
96
+
97
+ volumes:
98
+ pgdata:
99
+ ```
100
+
101
+ ## .dockerignore
102
+
103
+ ```
104
+ .git
105
+ node_modules
106
+ __pycache__
107
+ *.pyc
108
+ .env
109
+ .venv
110
+ dist
111
+ build
112
+ coverage
113
+ .pytest_cache
114
+ ```
@@ -0,0 +1,115 @@
1
+ ---
2
+ name: eval-testing
3
+ description: Criar framework de avaliação para agentes de AI com LLM-as-judge, rule-based evals e golden datasets. Use quando precisar testar agentes, avaliar qualidade de RAG, ou criar benchmarks de compliance.
4
+ ---
5
+
6
+ # Avaliação de Agentes
7
+
8
+ ## Tipos de eval
9
+
10
+ | Tipo | Quando usar | Velocidade |
11
+ |---|---|---|
12
+ | Rule-based | Validar formato, estrutura, compliance | Rápido |
13
+ | LLM-as-judge | Avaliar qualidade, coerência, utilidade | Lento |
14
+ | Golden dataset | Comparar com respostas esperadas | Médio |
15
+ | RAGAS | Métricas de RAG (faithfulness, relevancy) | Médio |
16
+
17
+ ## 1. Rule-based Evals
18
+
19
+ ```python
20
+ class ComplianceEvaluator:
21
+ """Verifica se o código segue o bundle"""
22
+
23
+ def evaluate(self, code: str, bundle: Bundle) -> EvalResult:
24
+ checks = []
25
+ checks.append(self._check_max_lines(code, max=500))
26
+ checks.append(self._check_no_hardcoded_secrets(code))
27
+ checks.append(self._check_function_length(code, max=20))
28
+ checks.append(self._check_naming_convention(code))
29
+ checks.append(self._check_test_coverage(code, min=80))
30
+
31
+ score = sum(c.passed for c in checks) / len(checks)
32
+ return EvalResult(score=score, checks=checks)
33
+
34
+ def _check_max_lines(self, code: str, max: int) -> Check:
35
+ lines = len(code.split('\n'))
36
+ return Check(
37
+ name="max_lines",
38
+ passed=lines <= max,
39
+ detail=f"{lines}/{max} linhas"
40
+ )
41
+ ```
42
+
43
+ ## 2. LLM-as-Judge
44
+
45
+ ```python
46
+ JUDGE_PROMPT = """
47
+ Avalie o código abaixo nos critérios:
48
+ 1. Clareza (1-5): O código é fácil de entender?
49
+ 2. Correção (1-5): O código faz o que deveria?
50
+ 3. Padrões (1-5): Segue Clean Architecture e DDD?
51
+ 4. Testes (1-5): Os testes são adequados?
52
+
53
+ Código:
54
+ {code}
55
+
56
+ Responda em JSON:
57
+ {{"clareza": X, "correcao": X, "padroes": X, "testes": X, "justificativa": "..."}}
58
+ """
59
+
60
+ async def llm_judge(code: str) -> dict:
61
+ response = await llm.ainvoke(JUDGE_PROMPT.format(code=code))
62
+ return json.loads(response.content)
63
+ ```
64
+
65
+ ## 3. Golden Dataset
66
+
67
+ ```json
68
+ {
69
+ "evals": [
70
+ {
71
+ "id": "eval-001",
72
+ "prompt": "Crie um endpoint GET /api/v1/demands que retorna lista paginada",
73
+ "expected": {
74
+ "has_controller": true,
75
+ "has_use_case": true,
76
+ "has_repository": true,
77
+ "has_pagination": true,
78
+ "follows_clean_arch": true
79
+ }
80
+ }
81
+ ]
82
+ }
83
+ ```
84
+
85
+ ## 4. Runner de avaliação
86
+
87
+ ```python
88
+ async def run_evals(agent, eval_set: list[dict]) -> BenchmarkResult:
89
+ results = []
90
+ for eval_case in eval_set:
91
+ output = await agent.ainvoke(eval_case["prompt"])
92
+
93
+ rule_score = rule_evaluator.evaluate(output)
94
+ judge_score = await llm_judge(output)
95
+
96
+ results.append({
97
+ "eval_id": eval_case["id"],
98
+ "rule_score": rule_score,
99
+ "judge_score": judge_score,
100
+ "output": output
101
+ })
102
+
103
+ return BenchmarkResult(results=results, aggregate=aggregate(results))
104
+ ```
105
+
106
+ ## No CI/CD
107
+
108
+ ```yaml
109
+ eval:
110
+ stage: test
111
+ script:
112
+ - python -m evals.run_evals --dataset evals/golden_dataset.json
113
+ - python -m evals.check_threshold --min-score 0.8
114
+ allow_failure: false
115
+ ```
@@ -0,0 +1,106 @@
1
+ ---
2
+ name: memory-management
3
+ description: Implementar memória de curto, médio e longo prazo para agentes usando LangGraph Store e checkpointers. Use quando precisar que agentes lembrem de interações anteriores, persistam estado, ou aprendam com execuções passadas.
4
+ ---
5
+
6
+ # Gerenciamento de Memória
7
+
8
+ ## 3 Níveis de Memória
9
+
10
+ | Nível | Duração | Mecanismo | Exemplo |
11
+ |---|---|---|---|
12
+ | Curto prazo | 1 sessão | Context window | Mensagens da conversa atual |
13
+ | Médio prazo | 1 demanda | Checkpointer | Estado entre nós do grafo |
14
+ | Longo prazo | Permanente | Store | Padrões aprendidos, preferências |
15
+
16
+ ## Curto Prazo — Context Window
17
+
18
+ Gerenciado automaticamente pelo LangGraph. Usar `add_messages` para acumular.
19
+
20
+ ```python
21
+ class AgentState(TypedDict):
22
+ messages: Annotated[list, add_messages] # Acumula automaticamente
23
+ ```
24
+
25
+ ## Médio Prazo — Checkpointer
26
+
27
+ Persiste o estado do grafo entre invocações da mesma demanda.
28
+
29
+ ```python
30
+ from langgraph.checkpoint.postgres.aio import AsyncPostgresSaver
31
+
32
+ checkpointer = AsyncPostgresSaver.from_conn_string(DATABASE_URL)
33
+
34
+ graph = StateGraph(OrchestratorState)
35
+ # ... definir nós e edges ...
36
+ app = graph.compile(checkpointer=checkpointer)
37
+
38
+ # Usar thread_id consistente por demanda
39
+ config = {"configurable": {"thread_id": f"demand-{demand_id}"}}
40
+ result = await app.ainvoke({"messages": [...]}, config=config)
41
+
42
+ # Próxima invocação com mesmo thread_id retoma do estado salvo
43
+ result2 = await app.ainvoke({"messages": [nova_msg]}, config=config)
44
+ ```
45
+
46
+ ## Longo Prazo — Store
47
+
48
+ Persiste conhecimento entre demandas diferentes.
49
+
50
+ ```python
51
+ from langgraph.store.postgres import AsyncPostgresStore
52
+
53
+ store = AsyncPostgresStore.from_conn_string(DATABASE_URL)
54
+
55
+ # Salvar aprendizado
56
+ await store.aput(
57
+ namespace=("agent", "backend", "patterns"),
58
+ key="spring-crud-pattern",
59
+ value={
60
+ "pattern": "Usar record para DTO, entity para domínio",
61
+ "learned_from": "demand-123",
62
+ "confidence": 0.95,
63
+ "created_at": "2026-03-27"
64
+ }
65
+ )
66
+
67
+ # Buscar aprendizados relevantes
68
+ results = await store.asearch(
69
+ namespace=("agent", "backend", "patterns"),
70
+ query="como criar DTO para API REST",
71
+ limit=5
72
+ )
73
+ ```
74
+
75
+ ## Memória no Deep Agent
76
+
77
+ ```python
78
+ from deepagents import create_deep_agent
79
+ from deepagents.backends import FilesystemBackend
80
+ from langgraph.checkpoint.postgres import PostgresSaver
81
+ from langgraph.store.postgres import PostgresStore
82
+
83
+ agent = create_deep_agent(
84
+ model="claude-sonnet-4-5-20250929",
85
+ backend=FilesystemBackend(root_dir=".", virtual_mode=True),
86
+ checkpointer=PostgresSaver(conn_string=DATABASE_URL),
87
+ store=PostgresStore(conn_string=DATABASE_URL),
88
+ system_prompt="Você é um agente backend..."
89
+ )
90
+ ```
91
+
92
+ ## Limpeza de memória
93
+
94
+ Memórias envelhecem. Implementar decay:
95
+
96
+ ```python
97
+ async def cleanup_stale_memories(store, max_age_days: int = 90):
98
+ """Remove memórias antigas ou com baixa confiança"""
99
+ cutoff = datetime.now() - timedelta(days=max_age_days)
100
+ memories = await store.alist(namespace=("agent",))
101
+ for mem in memories:
102
+ if mem.value.get("created_at", "") < cutoff.isoformat():
103
+ await store.adelete(namespace=mem.namespace, key=mem.key)
104
+ elif mem.value.get("confidence", 1.0) < 0.3:
105
+ await store.adelete(namespace=mem.namespace, key=mem.key)
106
+ ```