agentgear-ai 0.1.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentgear/__init__.py +18 -0
- agentgear/cli/__init__.py +1 -0
- agentgear/cli/main.py +125 -0
- agentgear/sdk/__init__.py +6 -0
- agentgear/sdk/client.py +276 -0
- agentgear/sdk/decorators.py +65 -0
- agentgear/sdk/integrations/openai.py +52 -0
- agentgear/sdk/prompt.py +23 -0
- agentgear/sdk/trace.py +59 -0
- agentgear/server/__init__.py +1 -0
- agentgear/server/app/__init__.py +1 -0
- agentgear/server/app/api/__init__.py +1 -0
- agentgear/server/app/api/auth.py +156 -0
- agentgear/server/app/api/datasets.py +185 -0
- agentgear/server/app/api/evaluations.py +69 -0
- agentgear/server/app/api/evaluators.py +157 -0
- agentgear/server/app/api/llm_models.py +39 -0
- agentgear/server/app/api/metrics.py +18 -0
- agentgear/server/app/api/projects.py +139 -0
- agentgear/server/app/api/prompts.py +227 -0
- agentgear/server/app/api/runs.py +75 -0
- agentgear/server/app/api/seed.py +106 -0
- agentgear/server/app/api/settings.py +135 -0
- agentgear/server/app/api/spans.py +56 -0
- agentgear/server/app/api/tokens.py +67 -0
- agentgear/server/app/api/users.py +116 -0
- agentgear/server/app/auth.py +80 -0
- agentgear/server/app/config.py +26 -0
- agentgear/server/app/db.py +41 -0
- agentgear/server/app/deps.py +46 -0
- agentgear/server/app/main.py +77 -0
- agentgear/server/app/migrations.py +88 -0
- agentgear/server/app/models.py +339 -0
- agentgear/server/app/schemas.py +343 -0
- agentgear/server/app/utils/email.py +30 -0
- agentgear/server/app/utils/llm.py +27 -0
- agentgear/server/static/assets/index-BAAzXAln.js +121 -0
- agentgear/server/static/assets/index-CE45MZx1.css +1 -0
- agentgear/server/static/index.html +13 -0
- agentgear_ai-0.1.16.dist-info/METADATA +387 -0
- agentgear_ai-0.1.16.dist-info/RECORD +44 -0
- agentgear_ai-0.1.16.dist-info/WHEEL +4 -0
- agentgear_ai-0.1.16.dist-info/entry_points.txt +2 -0
- agentgear_ai-0.1.16.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
import secrets
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
from fastapi import APIRouter, Depends, HTTPException, status
|
|
5
|
+
from sqlalchemy.orm import Session
|
|
6
|
+
|
|
7
|
+
from agentgear.server.app import schemas
|
|
8
|
+
from agentgear.server.app.auth import generate_token, hash_password
|
|
9
|
+
from agentgear.server.app.config import get_settings
|
|
10
|
+
from agentgear.server.app.db import get_db
|
|
11
|
+
from agentgear.server.app.models import AdminUser, APIKey, Project
|
|
12
|
+
|
|
13
|
+
router = APIRouter(prefix="/api/auth", tags=["auth"])
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _ensure_default_project(db: Session) -> Project:
|
|
17
|
+
project = db.query(Project).filter(Project.name == "Default Project").first()
|
|
18
|
+
if project:
|
|
19
|
+
return project
|
|
20
|
+
project = Project(name="Default Project", description="Default project for AgentGear UI")
|
|
21
|
+
db.add(project)
|
|
22
|
+
db.commit()
|
|
23
|
+
db.refresh(project)
|
|
24
|
+
return project
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _issue_token(db: Session, project: Project, role: str = "user") -> str:
|
|
28
|
+
raw, hashed = generate_token()
|
|
29
|
+
scopes = ["runs.write", "prompts.read", "prompts.write", "tokens.manage", "datasets.read", "datasets.write", "evaluations.read", "evaluations.write"]
|
|
30
|
+
record = APIKey(project_id=project.id, key_hash=hashed, scopes=scopes, role=role)
|
|
31
|
+
db.add(record)
|
|
32
|
+
db.commit()
|
|
33
|
+
return raw
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _login_success(db: Session, username: str, role: str = "user", project_id: str | None = None) -> schemas.AuthResponse:
|
|
37
|
+
if project_id:
|
|
38
|
+
project = db.query(Project).filter(Project.id == project_id).first()
|
|
39
|
+
if not project:
|
|
40
|
+
# Fallback if specific project ID not found (shouldn't happen for valid users, but safety)
|
|
41
|
+
project = _ensure_default_project(db)
|
|
42
|
+
else:
|
|
43
|
+
project = _ensure_default_project(db)
|
|
44
|
+
|
|
45
|
+
token = _issue_token(db, project, role)
|
|
46
|
+
return schemas.AuthResponse(token=token, project_id=project.id, username=username, role=role)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@router.get("/status", response_model=schemas.AuthStatus)
|
|
50
|
+
def auth_status(db: Session = Depends(get_db)):
|
|
51
|
+
settings = get_settings()
|
|
52
|
+
env_mode = bool(settings.admin_username and settings.admin_password)
|
|
53
|
+
stored: Optional[AdminUser] = db.query(AdminUser).first()
|
|
54
|
+
project = db.query(Project).filter(Project.name == "Default Project").first()
|
|
55
|
+
return schemas.AuthStatus(
|
|
56
|
+
configured=env_mode or stored is not None,
|
|
57
|
+
mode="env" if env_mode else ("db" if stored else "none"),
|
|
58
|
+
username=settings.admin_username or (stored.username if stored else None),
|
|
59
|
+
project_id=project.id if project else None,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@router.post("/setup", response_model=schemas.AuthResponse, status_code=status.HTTP_201_CREATED)
|
|
64
|
+
def setup_admin(payload: schemas.AuthSetup, db: Session = Depends(get_db)):
|
|
65
|
+
settings = get_settings()
|
|
66
|
+
if settings.admin_username and settings.admin_password:
|
|
67
|
+
raise HTTPException(
|
|
68
|
+
status_code=status.HTTP_400_BAD_REQUEST,
|
|
69
|
+
detail="Setup is disabled when admin credentials are provided via environment.",
|
|
70
|
+
)
|
|
71
|
+
existing = db.query(AdminUser).first()
|
|
72
|
+
if existing:
|
|
73
|
+
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Admin already configured")
|
|
74
|
+
salt = secrets.token_hex(8)
|
|
75
|
+
password_hash = hash_password(payload.password, salt)
|
|
76
|
+
admin = AdminUser(username=payload.username, password_hash=password_hash, salt=salt)
|
|
77
|
+
db.add(admin)
|
|
78
|
+
db.commit()
|
|
79
|
+
return _login_success(db, payload.username, role="admin")
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
@router.post("/login", response_model=schemas.AuthResponse)
|
|
83
|
+
def login(payload: schemas.AuthLogin, db: Session = Depends(get_db)):
|
|
84
|
+
settings = get_settings()
|
|
85
|
+
env_mode = settings.admin_username and settings.admin_password
|
|
86
|
+
if env_mode:
|
|
87
|
+
if payload.username != settings.admin_username or payload.password != settings.admin_password:
|
|
88
|
+
raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid credentials")
|
|
89
|
+
return _login_success(db, settings.admin_username, role="admin")
|
|
90
|
+
|
|
91
|
+
admin: Optional[AdminUser] = db.query(AdminUser).filter(AdminUser.username == payload.username).first()
|
|
92
|
+
if admin:
|
|
93
|
+
if hash_password(payload.password, admin.salt) == admin.password_hash:
|
|
94
|
+
return _login_success(db, admin.username, role="admin")
|
|
95
|
+
|
|
96
|
+
# Check standard users
|
|
97
|
+
from agentgear.server.app.models import User
|
|
98
|
+
user: Optional[User] = db.query(User).filter(User.username == payload.username).first()
|
|
99
|
+
if user:
|
|
100
|
+
if hash_password(payload.password, user.salt) == user.password_hash:
|
|
101
|
+
return _login_success(db, user.username, role=user.role, project_id=user.project_id)
|
|
102
|
+
|
|
103
|
+
raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid credentials")
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
class ForgotPasswordRequest(schemas.BaseModel):
|
|
107
|
+
email: str
|
|
108
|
+
|
|
109
|
+
@router.post("/forgot-password", status_code=status.HTTP_200_OK)
|
|
110
|
+
def forgot_password(payload: ForgotPasswordRequest, db: Session = Depends(get_db)):
|
|
111
|
+
from agentgear.server.app.models import User, SMTPSettings
|
|
112
|
+
from agentgear.server.app.utils.email import send_email
|
|
113
|
+
|
|
114
|
+
# Find user
|
|
115
|
+
user = db.query(User).filter(User.email == payload.email).first()
|
|
116
|
+
if not user:
|
|
117
|
+
# Prevent enumeration? For internal tool maybe not critical.
|
|
118
|
+
# But let's be nice.
|
|
119
|
+
return {"message": "If an account with that email exists, a reset code has been sent."}
|
|
120
|
+
|
|
121
|
+
# Find SMTP config
|
|
122
|
+
# Try user's project first
|
|
123
|
+
smtp = None
|
|
124
|
+
if user.project_id:
|
|
125
|
+
smtp = db.query(SMTPSettings).filter(SMTPSettings.project_id == user.project_id).first()
|
|
126
|
+
|
|
127
|
+
if not smtp or not smtp.enabled:
|
|
128
|
+
# Fallback to any enabled SMTP (e.g. global/admin)
|
|
129
|
+
smtp = db.query(SMTPSettings).filter(SMTPSettings.enabled == True).first()
|
|
130
|
+
|
|
131
|
+
if not smtp:
|
|
132
|
+
raise HTTPException(status_code=500, detail="SMTP not configured. Contact admin.")
|
|
133
|
+
|
|
134
|
+
# Generate temp password
|
|
135
|
+
temp_pass = secrets.token_hex(4) # 8 chars
|
|
136
|
+
user.salt = secrets.token_hex(8)
|
|
137
|
+
user.password_hash = hash_password(temp_pass, user.salt)
|
|
138
|
+
db.add(user)
|
|
139
|
+
db.commit()
|
|
140
|
+
|
|
141
|
+
# Send Email
|
|
142
|
+
try:
|
|
143
|
+
subject = "AgentGear Password Reset"
|
|
144
|
+
html = f"""
|
|
145
|
+
<p>Hello {user.username},</p>
|
|
146
|
+
<p>Your password has been reset.</p>
|
|
147
|
+
<p><strong>New Password:</strong> {temp_pass}</p>
|
|
148
|
+
<p>Please login and change your password immediately.</p>
|
|
149
|
+
"""
|
|
150
|
+
send_email(smtp, [user.email], subject, html)
|
|
151
|
+
except Exception as e:
|
|
152
|
+
import logging
|
|
153
|
+
logging.error(f"Failed to send reset email: {e}")
|
|
154
|
+
raise HTTPException(status_code=500, detail="Failed to send email.")
|
|
155
|
+
|
|
156
|
+
return {"message": "Password reset email sent."}
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
|
|
3
|
+
from fastapi import APIRouter, Depends, HTTPException, Query, Request, status, UploadFile, File
|
|
4
|
+
from sqlalchemy.orm import Session
|
|
5
|
+
|
|
6
|
+
from agentgear.server.app import schemas
|
|
7
|
+
from agentgear.server.app.config import get_settings
|
|
8
|
+
from agentgear.server.app.db import get_db
|
|
9
|
+
from agentgear.server.app.models import Dataset, DatasetExample, Project
|
|
10
|
+
from agentgear.server.app.deps import require_scopes
|
|
11
|
+
|
|
12
|
+
router = APIRouter(prefix="/api/datasets", tags=["datasets"])
|
|
13
|
+
|
|
14
|
+
@router.post("", response_model=schemas.DatasetRead, status_code=status.HTTP_201_CREATED)
|
|
15
|
+
def create_dataset(
|
|
16
|
+
payload: schemas.DatasetCreate,
|
|
17
|
+
request: Request,
|
|
18
|
+
db: Session = Depends(get_db),
|
|
19
|
+
_: None = Depends(require_scopes(["datasets.write"])),
|
|
20
|
+
):
|
|
21
|
+
settings = get_settings()
|
|
22
|
+
# RBAC check
|
|
23
|
+
pid = payload.project_id or request.state.project_id
|
|
24
|
+
if not pid:
|
|
25
|
+
# Fallback to default project if local mode or loose auth?
|
|
26
|
+
# Ideally request.state.project_id is always set by auth middleware.
|
|
27
|
+
raise HTTPException(status_code=400, detail="Project ID required")
|
|
28
|
+
|
|
29
|
+
if not settings.local_mode and request.state.project_id and request.state.project_id != pid:
|
|
30
|
+
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Project mismatch")
|
|
31
|
+
|
|
32
|
+
dataset = Dataset(
|
|
33
|
+
project_id=pid,
|
|
34
|
+
name=payload.name,
|
|
35
|
+
description=payload.description,
|
|
36
|
+
tags=payload.tags,
|
|
37
|
+
metadata_=payload.metadata
|
|
38
|
+
)
|
|
39
|
+
db.add(dataset)
|
|
40
|
+
db.commit()
|
|
41
|
+
db.refresh(dataset)
|
|
42
|
+
return dataset
|
|
43
|
+
|
|
44
|
+
@router.get("", response_model=List[schemas.DatasetRead])
|
|
45
|
+
def list_datasets(
|
|
46
|
+
project_id: str | None = Query(default=None),
|
|
47
|
+
request: Request = None,
|
|
48
|
+
db: Session = Depends(get_db),
|
|
49
|
+
_: None = Depends(require_scopes(["datasets.read"])),
|
|
50
|
+
):
|
|
51
|
+
settings = get_settings()
|
|
52
|
+
query = db.query(Dataset)
|
|
53
|
+
|
|
54
|
+
# Filter by user's project if strict mode
|
|
55
|
+
if not settings.local_mode and request.state.project_id:
|
|
56
|
+
query = query.filter(Dataset.project_id == request.state.project_id)
|
|
57
|
+
elif project_id:
|
|
58
|
+
query = query.filter(Dataset.project_id == project_id)
|
|
59
|
+
|
|
60
|
+
return query.order_by(Dataset.created_at.desc()).all()
|
|
61
|
+
|
|
62
|
+
@router.get("/{dataset_id}", response_model=schemas.DatasetRead)
|
|
63
|
+
def get_dataset(
|
|
64
|
+
dataset_id: str,
|
|
65
|
+
db: Session = Depends(get_db),
|
|
66
|
+
_: None = Depends(require_scopes(["datasets.read"])),
|
|
67
|
+
):
|
|
68
|
+
dataset = db.query(Dataset).filter(Dataset.id == dataset_id).first()
|
|
69
|
+
if not dataset:
|
|
70
|
+
raise HTTPException(status_code=404, detail="Dataset not found")
|
|
71
|
+
return dataset
|
|
72
|
+
|
|
73
|
+
@router.post("/{dataset_id}/examples", response_model=schemas.DatasetExampleRead)
|
|
74
|
+
def create_example(
|
|
75
|
+
dataset_id: str,
|
|
76
|
+
payload: schemas.DatasetExampleCreate,
|
|
77
|
+
db: Session = Depends(get_db),
|
|
78
|
+
_: None = Depends(require_scopes(["datasets.write"])),
|
|
79
|
+
):
|
|
80
|
+
dataset = db.query(Dataset).filter(Dataset.id == dataset_id).first()
|
|
81
|
+
if not dataset:
|
|
82
|
+
raise HTTPException(status_code=404, detail="Dataset not found")
|
|
83
|
+
|
|
84
|
+
example = DatasetExample(
|
|
85
|
+
dataset_id=dataset_id,
|
|
86
|
+
input_text=payload.input_text,
|
|
87
|
+
expected_output=payload.expected_output,
|
|
88
|
+
metadata_=payload.metadata
|
|
89
|
+
)
|
|
90
|
+
db.add(example)
|
|
91
|
+
db.commit()
|
|
92
|
+
db.refresh(example)
|
|
93
|
+
return example
|
|
94
|
+
|
|
95
|
+
@router.get("/{dataset_id}/examples", response_model=List[schemas.DatasetExampleRead])
|
|
96
|
+
def list_examples(
|
|
97
|
+
dataset_id: str,
|
|
98
|
+
db: Session = Depends(get_db),
|
|
99
|
+
_: None = Depends(require_scopes(["datasets.read"])),
|
|
100
|
+
):
|
|
101
|
+
return db.query(DatasetExample).filter(DatasetExample.dataset_id == dataset_id).all()
|
|
102
|
+
|
|
103
|
+
@router.delete("/{dataset_id}/examples/{example_id}", status_code=204)
|
|
104
|
+
def delete_example(
|
|
105
|
+
dataset_id: str,
|
|
106
|
+
example_id: str,
|
|
107
|
+
db: Session = Depends(get_db),
|
|
108
|
+
_: None = Depends(require_scopes(["datasets.write"])),
|
|
109
|
+
):
|
|
110
|
+
example = db.query(DatasetExample).filter(DatasetExample.id == example_id, DatasetExample.dataset_id == dataset_id).first()
|
|
111
|
+
if not example:
|
|
112
|
+
raise HTTPException(status_code=404, detail="Example not found")
|
|
113
|
+
|
|
114
|
+
db.delete(example)
|
|
115
|
+
db.commit()
|
|
116
|
+
|
|
117
|
+
@router.post("/{dataset_id}/upload", status_code=201)
|
|
118
|
+
async def upload_dataset_file(
|
|
119
|
+
dataset_id: str,
|
|
120
|
+
file: UploadFile = File(...),
|
|
121
|
+
db: Session = Depends(get_db),
|
|
122
|
+
_: None = Depends(require_scopes(["datasets.write"])),
|
|
123
|
+
):
|
|
124
|
+
dataset = db.query(Dataset).filter(Dataset.id == dataset_id).first()
|
|
125
|
+
if not dataset:
|
|
126
|
+
raise HTTPException(status_code=404, detail="Dataset not found")
|
|
127
|
+
|
|
128
|
+
content = await file.read()
|
|
129
|
+
filename = file.filename.lower()
|
|
130
|
+
|
|
131
|
+
examples_to_add = []
|
|
132
|
+
|
|
133
|
+
import csv
|
|
134
|
+
import json
|
|
135
|
+
import io
|
|
136
|
+
|
|
137
|
+
try:
|
|
138
|
+
if filename.endswith(".csv"):
|
|
139
|
+
# Expect header: input, output (optional)
|
|
140
|
+
text = content.decode("utf-8")
|
|
141
|
+
reader = csv.DictReader(io.StringIO(text))
|
|
142
|
+
for row in reader:
|
|
143
|
+
# Flexible column matching
|
|
144
|
+
input_text = row.get("input") or row.get("input_text") or row.get("prompt")
|
|
145
|
+
expected_output = row.get("output") or row.get("expected_output") or row.get("completion")
|
|
146
|
+
|
|
147
|
+
if input_text:
|
|
148
|
+
examples_to_add.append({
|
|
149
|
+
"input_text": input_text,
|
|
150
|
+
"expected_output": expected_output
|
|
151
|
+
})
|
|
152
|
+
|
|
153
|
+
elif filename.endswith(".json"):
|
|
154
|
+
data = json.loads(content)
|
|
155
|
+
if isinstance(data, list):
|
|
156
|
+
for item in data:
|
|
157
|
+
input_text = item.get("input") or item.get("input_text") or item.get("prompt")
|
|
158
|
+
expected_output = item.get("output") or item.get("expected_output") or item.get("completion")
|
|
159
|
+
if input_text:
|
|
160
|
+
examples_to_add.append({
|
|
161
|
+
"input_text": input_text,
|
|
162
|
+
"expected_output": expected_output
|
|
163
|
+
})
|
|
164
|
+
else:
|
|
165
|
+
raise HTTPException(status_code=400, detail="Unsupported file type. Use .csv or .json")
|
|
166
|
+
|
|
167
|
+
except Exception as e:
|
|
168
|
+
raise HTTPException(status_code=400, detail=f"Failed to parse file: {str(e)}")
|
|
169
|
+
|
|
170
|
+
if not examples_to_add:
|
|
171
|
+
return {"message": "No valid examples found", "count": 0}
|
|
172
|
+
|
|
173
|
+
# Bulk insert
|
|
174
|
+
objects = [
|
|
175
|
+
DatasetExample(
|
|
176
|
+
dataset_id=dataset_id,
|
|
177
|
+
input_text=ex["input_text"],
|
|
178
|
+
expected_output=ex.get("expected_output")
|
|
179
|
+
)
|
|
180
|
+
for ex in examples_to_add
|
|
181
|
+
]
|
|
182
|
+
db.add_all(objects)
|
|
183
|
+
db.commit()
|
|
184
|
+
|
|
185
|
+
return {"message": "Successfully uploaded examples", "count": len(objects)}
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
|
|
3
|
+
from fastapi import APIRouter, Depends, HTTPException, Query, Request, status
|
|
4
|
+
from sqlalchemy.orm import Session
|
|
5
|
+
|
|
6
|
+
from agentgear.server.app import schemas
|
|
7
|
+
from agentgear.server.app.config import get_settings
|
|
8
|
+
from agentgear.server.app.db import get_db
|
|
9
|
+
from agentgear.server.app.models import Evaluation, Project, Trace
|
|
10
|
+
from agentgear.server.app.deps import require_scopes
|
|
11
|
+
|
|
12
|
+
router = APIRouter(prefix="/api/scores", tags=["evaluations"])
|
|
13
|
+
|
|
14
|
+
@router.post("", response_model=schemas.EvaluationRead, status_code=status.HTTP_201_CREATED)
|
|
15
|
+
def create_score(
|
|
16
|
+
payload: schemas.EvaluationCreate,
|
|
17
|
+
request: Request,
|
|
18
|
+
db: Session = Depends(get_db),
|
|
19
|
+
_: None = Depends(require_scopes(["evaluations.write"])),
|
|
20
|
+
):
|
|
21
|
+
settings = get_settings()
|
|
22
|
+
# RBAC check
|
|
23
|
+
if not settings.local_mode and request.state.project_id and request.state.project_id != payload.project_id:
|
|
24
|
+
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Project mismatch")
|
|
25
|
+
|
|
26
|
+
# Validate target exists (logic can be expanded)
|
|
27
|
+
if payload.trace_id:
|
|
28
|
+
trace = db.query(Trace).filter(Trace.id == payload.trace_id).first()
|
|
29
|
+
if not trace:
|
|
30
|
+
raise HTTPException(status_code=404, detail="Trace not found")
|
|
31
|
+
|
|
32
|
+
evaluation = Evaluation(
|
|
33
|
+
project_id=payload.project_id,
|
|
34
|
+
trace_id=payload.trace_id,
|
|
35
|
+
run_id=payload.run_id,
|
|
36
|
+
span_id=payload.span_id,
|
|
37
|
+
evaluator_type=payload.evaluator_type,
|
|
38
|
+
score=payload.score,
|
|
39
|
+
max_score=payload.max_score,
|
|
40
|
+
passed=payload.passed,
|
|
41
|
+
comments=payload.comments,
|
|
42
|
+
metadata_=payload.metadata
|
|
43
|
+
)
|
|
44
|
+
db.add(evaluation)
|
|
45
|
+
db.commit()
|
|
46
|
+
db.refresh(evaluation)
|
|
47
|
+
return evaluation
|
|
48
|
+
|
|
49
|
+
@router.get("", response_model=List[schemas.EvaluationRead])
|
|
50
|
+
def list_scores(
|
|
51
|
+
trace_id: str | None = Query(default=None),
|
|
52
|
+
project_id: str | None = Query(default=None),
|
|
53
|
+
request: Request = None,
|
|
54
|
+
db: Session = Depends(get_db),
|
|
55
|
+
_: None = Depends(require_scopes(["evaluations.read"])),
|
|
56
|
+
):
|
|
57
|
+
settings = get_settings()
|
|
58
|
+
query = db.query(Evaluation)
|
|
59
|
+
|
|
60
|
+
# Filter by user's project if strict mode
|
|
61
|
+
if not settings.local_mode and request.state.project_id:
|
|
62
|
+
query = query.filter(Evaluation.project_id == request.state.project_id)
|
|
63
|
+
elif project_id:
|
|
64
|
+
query = query.filter(Evaluation.project_id == project_id)
|
|
65
|
+
|
|
66
|
+
if trace_id:
|
|
67
|
+
query = query.filter(Evaluation.trace_id == trace_id)
|
|
68
|
+
|
|
69
|
+
return query.order_by(Evaluation.created_at.desc()).all()
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
from typing import List, Optional
|
|
2
|
+
from fastapi import APIRouter, Depends, HTTPException, status, Request
|
|
3
|
+
from sqlalchemy.orm import Session
|
|
4
|
+
from pydantic import BaseModel
|
|
5
|
+
|
|
6
|
+
from agentgear.server.app import schemas
|
|
7
|
+
from agentgear.server.app.config import get_settings
|
|
8
|
+
from agentgear.server.app.db import get_db
|
|
9
|
+
from agentgear.server.app.models import Evaluator, Trace, Run, Span, Project
|
|
10
|
+
from agentgear.server.app.deps import require_scopes
|
|
11
|
+
|
|
12
|
+
router = APIRouter(prefix="/api/evaluators", tags=["evaluators"])
|
|
13
|
+
|
|
14
|
+
class EvaluateRequest(BaseModel):
|
|
15
|
+
trace_id: Optional[str] = None
|
|
16
|
+
run_id: Optional[str] = None # span id if run_id? confusing naming in models. Run=Span container (Trace)?
|
|
17
|
+
# In my model: Run = Trace (renamed in UI?), Span = Step.
|
|
18
|
+
# Actually Trace = container, Run = ?
|
|
19
|
+
# Let's look at models again. Trace has many Runs?
|
|
20
|
+
# Trace has Span. Run has Span.
|
|
21
|
+
# Trace seems to be the top level.
|
|
22
|
+
# Let's support trace_id and span_id.
|
|
23
|
+
|
|
24
|
+
span_id: Optional[str] = None
|
|
25
|
+
|
|
26
|
+
@router.post("", response_model=schemas.EvaluatorRead, status_code=status.HTTP_201_CREATED)
|
|
27
|
+
def create_evaluator(
|
|
28
|
+
payload: schemas.EvaluatorCreate,
|
|
29
|
+
request: Request,
|
|
30
|
+
db: Session = Depends(get_db),
|
|
31
|
+
_: None = Depends(require_scopes(["evaluations.write"])),
|
|
32
|
+
):
|
|
33
|
+
settings = get_settings()
|
|
34
|
+
pid = payload.project_id or request.state.project_id
|
|
35
|
+
if not pid:
|
|
36
|
+
raise HTTPException(status_code=400, detail="Project ID required")
|
|
37
|
+
|
|
38
|
+
obj = Evaluator(
|
|
39
|
+
project_id=pid,
|
|
40
|
+
name=payload.name,
|
|
41
|
+
prompt_template=payload.prompt_template,
|
|
42
|
+
model=payload.model,
|
|
43
|
+
config=payload.config
|
|
44
|
+
)
|
|
45
|
+
db.add(obj)
|
|
46
|
+
db.commit()
|
|
47
|
+
db.refresh(obj)
|
|
48
|
+
return obj
|
|
49
|
+
|
|
50
|
+
@router.get("", response_model=List[schemas.EvaluatorRead])
|
|
51
|
+
def list_evaluators(
|
|
52
|
+
request: Request,
|
|
53
|
+
db: Session = Depends(get_db),
|
|
54
|
+
_: None = Depends(require_scopes(["evaluations.read"])),
|
|
55
|
+
):
|
|
56
|
+
settings = get_settings()
|
|
57
|
+
pid = request.state.project_id
|
|
58
|
+
query = db.query(Evaluator)
|
|
59
|
+
if not settings.local_mode and pid:
|
|
60
|
+
query = query.filter(Evaluator.project_id == pid)
|
|
61
|
+
return query.all()
|
|
62
|
+
|
|
63
|
+
@router.post("/{evaluator_id}/run")
|
|
64
|
+
async def run_evaluation(
|
|
65
|
+
evaluator_id: str,
|
|
66
|
+
target: EvaluateRequest,
|
|
67
|
+
db: Session = Depends(get_db),
|
|
68
|
+
_: None = Depends(require_scopes(["evaluations.write"])),
|
|
69
|
+
):
|
|
70
|
+
evaluator = db.query(Evaluator).filter(Evaluator.id == evaluator_id).first()
|
|
71
|
+
if not evaluator:
|
|
72
|
+
raise HTTPException(status_code=404, detail="Evaluator not found")
|
|
73
|
+
|
|
74
|
+
# Fetch context
|
|
75
|
+
input_text = ""
|
|
76
|
+
output_text = ""
|
|
77
|
+
|
|
78
|
+
if target.span_id:
|
|
79
|
+
span = db.query(Span).filter(Span.id == target.span_id).first()
|
|
80
|
+
if not span:
|
|
81
|
+
raise HTTPException(status_code=404, detail="Span not found")
|
|
82
|
+
# Find input/output from span?
|
|
83
|
+
# Span doesn't strictly have input_text/output_text columns in my memory, let's check models.
|
|
84
|
+
# It has request_payload / response_payload.
|
|
85
|
+
# Or I might have added input_text?
|
|
86
|
+
# Checking models.py... Span has request_payload (JSON), response_payload (JSON).
|
|
87
|
+
# We need to extract text.
|
|
88
|
+
input_text = str(span.request_payload)
|
|
89
|
+
output_text = str(span.response_payload)
|
|
90
|
+
|
|
91
|
+
elif target.trace_id:
|
|
92
|
+
trace = db.query(Trace).filter(Trace.id == target.trace_id).first()
|
|
93
|
+
if not trace:
|
|
94
|
+
raise HTTPException(status_code=404, detail="Trace not found")
|
|
95
|
+
input_text = trace.input_text
|
|
96
|
+
output_text = trace.output_text
|
|
97
|
+
else:
|
|
98
|
+
raise HTTPException(status_code=400, detail="Target trace_id or span_id required")
|
|
99
|
+
|
|
100
|
+
# Simple template substitution
|
|
101
|
+
prompt = evaluator.prompt_template.replace("{{input}}", str(input_text)).replace("{{output}}", str(output_text))
|
|
102
|
+
|
|
103
|
+
# Call LLM
|
|
104
|
+
try:
|
|
105
|
+
from agentgear.server.app.utils.llm import call_llm
|
|
106
|
+
import os
|
|
107
|
+
|
|
108
|
+
# Simple provider detection
|
|
109
|
+
provider = "openai"
|
|
110
|
+
# In a real app, looking up LLMModel by name to get provider/key would be better
|
|
111
|
+
# For now, rely on env var
|
|
112
|
+
api_key = os.getenv("OPENAI_API_KEY", "")
|
|
113
|
+
|
|
114
|
+
messages = [{"role": "user", "content": prompt}]
|
|
115
|
+
|
|
116
|
+
# call_llm is synchronous currently
|
|
117
|
+
result_text = call_llm(
|
|
118
|
+
provider=provider,
|
|
119
|
+
api_key=api_key,
|
|
120
|
+
model_name=evaluator.model,
|
|
121
|
+
messages=messages,
|
|
122
|
+
config=evaluator.config
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
# Parse result - expecting JSON? or Score?
|
|
126
|
+
# For simplicity, let's assume the template asks for a number 0-1 or JSON.
|
|
127
|
+
# We'll try to extract a number.
|
|
128
|
+
score = None
|
|
129
|
+
try:
|
|
130
|
+
import re
|
|
131
|
+
match = re.search(r'\b(0(\.\d+)?|1(\.0+)?)\b', result_text)
|
|
132
|
+
if match:
|
|
133
|
+
score = float(match.group(0))
|
|
134
|
+
except:
|
|
135
|
+
pass
|
|
136
|
+
|
|
137
|
+
# Save Evaluation
|
|
138
|
+
from agentgear.server.app.models import Evaluation
|
|
139
|
+
evaluation = Evaluation(
|
|
140
|
+
project_id=evaluator.project_id,
|
|
141
|
+
trace_id=target.trace_id,
|
|
142
|
+
span_id=target.span_id,
|
|
143
|
+
evaluator_type="llm_as_a_judge",
|
|
144
|
+
score=score,
|
|
145
|
+
comments=result_text,
|
|
146
|
+
metadata_={"evaluator_id": evaluator.id, "model": evaluator.model}
|
|
147
|
+
)
|
|
148
|
+
db.add(evaluation)
|
|
149
|
+
db.commit()
|
|
150
|
+
db.refresh(evaluation)
|
|
151
|
+
|
|
152
|
+
return evaluation
|
|
153
|
+
|
|
154
|
+
except Exception as e:
|
|
155
|
+
import traceback
|
|
156
|
+
traceback.print_exc()
|
|
157
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
|
|
3
|
+
from fastapi import APIRouter, Depends, HTTPException, status
|
|
4
|
+
from sqlalchemy.orm import Session
|
|
5
|
+
|
|
6
|
+
from agentgear.server.app import schemas
|
|
7
|
+
from agentgear.server.app.db import get_db
|
|
8
|
+
from agentgear.server.app.models import LLMModel
|
|
9
|
+
|
|
10
|
+
router = APIRouter(prefix="/api/models", tags=["models"])
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@router.get("", response_model=List[schemas.LLMModelRead])
|
|
14
|
+
def list_models(db: Session = Depends(get_db)):
|
|
15
|
+
return db.query(LLMModel).all()
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@router.post("", response_model=schemas.LLMModelRead, status_code=status.HTTP_201_CREATED)
|
|
19
|
+
def create_model(payload: schemas.LLMModelCreate, db: Session = Depends(get_db)):
|
|
20
|
+
model = LLMModel(
|
|
21
|
+
name=payload.name,
|
|
22
|
+
provider=payload.provider,
|
|
23
|
+
api_key=payload.api_key,
|
|
24
|
+
base_url=payload.base_url,
|
|
25
|
+
config=payload.config
|
|
26
|
+
)
|
|
27
|
+
db.add(model)
|
|
28
|
+
db.commit()
|
|
29
|
+
db.refresh(model)
|
|
30
|
+
return model
|
|
31
|
+
|
|
32
|
+
@router.delete("/{model_id}", status_code=status.HTTP_204_NO_CONTENT)
|
|
33
|
+
def delete_model(model_id: str, db: Session = Depends(get_db)):
|
|
34
|
+
model = db.query(LLMModel).filter(LLMModel.id == model_id).first()
|
|
35
|
+
if not model:
|
|
36
|
+
raise HTTPException(status_code=404, detail="Model not found")
|
|
37
|
+
|
|
38
|
+
db.delete(model)
|
|
39
|
+
db.commit()
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from fastapi import APIRouter, Depends
|
|
2
|
+
from sqlalchemy import func
|
|
3
|
+
from sqlalchemy.orm import Session
|
|
4
|
+
|
|
5
|
+
from agentgear.server.app import schemas
|
|
6
|
+
from agentgear.server.app.db import get_db
|
|
7
|
+
from agentgear.server.app.models import Project, Prompt, Run, Span
|
|
8
|
+
|
|
9
|
+
router = APIRouter(prefix="/api/metrics", tags=["metrics"])
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@router.get("/summary", response_model=schemas.MetricsSummary)
|
|
13
|
+
def metrics_summary(db: Session = Depends(get_db)):
|
|
14
|
+
runs = db.query(func.count(Run.id)).scalar() or 0
|
|
15
|
+
spans = db.query(func.count(Span.id)).scalar() or 0
|
|
16
|
+
prompts = db.query(func.count(Prompt.id)).scalar() or 0
|
|
17
|
+
projects = db.query(func.count(Project.id)).scalar() or 0
|
|
18
|
+
return schemas.MetricsSummary(runs=runs, spans=spans, prompts=prompts, projects=projects)
|