evalvault 1.63.1__py3-none-any.whl → 1.64.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- evalvault/adapters/inbound/api/main.py +147 -9
- evalvault/adapters/inbound/api/routers/config.py +6 -1
- evalvault/adapters/inbound/api/routers/knowledge.py +62 -6
- evalvault/adapters/outbound/methods/external_command.py +22 -1
- evalvault/adapters/outbound/tracker/langfuse_adapter.py +28 -8
- evalvault/adapters/outbound/tracker/log_sanitizer.py +93 -0
- evalvault/adapters/outbound/tracker/mlflow_adapter.py +3 -2
- evalvault/adapters/outbound/tracker/phoenix_adapter.py +51 -25
- evalvault/config/secret_manager.py +118 -0
- evalvault/config/settings.py +141 -1
- {evalvault-1.63.1.dist-info → evalvault-1.64.0.dist-info}/METADATA +8 -1
- {evalvault-1.63.1.dist-info → evalvault-1.64.0.dist-info}/RECORD +15 -13
- {evalvault-1.63.1.dist-info → evalvault-1.64.0.dist-info}/WHEEL +0 -0
- {evalvault-1.63.1.dist-info → evalvault-1.64.0.dist-info}/entry_points.txt +0 -0
- {evalvault-1.63.1.dist-info → evalvault-1.64.0.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -2,14 +2,59 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
+
import hashlib
|
|
6
|
+
import logging
|
|
7
|
+
import time
|
|
8
|
+
from collections import defaultdict, deque
|
|
5
9
|
from contextlib import asynccontextmanager
|
|
6
10
|
from typing import Annotated
|
|
7
11
|
|
|
8
|
-
from fastapi import Depends, FastAPI, Request
|
|
12
|
+
from fastapi import Depends, FastAPI, HTTPException, Request, Security
|
|
9
13
|
from fastapi.middleware.cors import CORSMiddleware
|
|
14
|
+
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
|
|
15
|
+
from starlette.responses import JSONResponse
|
|
10
16
|
|
|
11
17
|
from evalvault.adapters.inbound.api.adapter import WebUIAdapter, create_adapter
|
|
12
|
-
from evalvault.config.settings import get_settings
|
|
18
|
+
from evalvault.config.settings import Settings, get_settings, is_production_profile
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class RateLimiter:
|
|
24
|
+
def __init__(self) -> None:
|
|
25
|
+
self._requests: dict[str, deque[float]] = defaultdict(deque)
|
|
26
|
+
self._blocked_counts: dict[str, int] = defaultdict(int)
|
|
27
|
+
|
|
28
|
+
def check(self, key: str, limit: int, window_seconds: int) -> tuple[bool, int | None, int]:
|
|
29
|
+
now = time.monotonic()
|
|
30
|
+
window = max(window_seconds, 1)
|
|
31
|
+
queue = self._requests[key]
|
|
32
|
+
while queue and now - queue[0] >= window:
|
|
33
|
+
queue.popleft()
|
|
34
|
+
if len(queue) >= limit:
|
|
35
|
+
self._blocked_counts[key] += 1
|
|
36
|
+
retry_after = int(window - (now - queue[0])) if queue else window
|
|
37
|
+
return False, max(retry_after, 1), self._blocked_counts[key]
|
|
38
|
+
queue.append(now)
|
|
39
|
+
return True, None, self._blocked_counts[key]
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
rate_limiter = RateLimiter()
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _hash_token(token: str) -> str:
|
|
46
|
+
return hashlib.sha256(token.encode("utf-8")).hexdigest()[:8]
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _rate_limit_key(request: Request) -> str:
|
|
50
|
+
auth_header = request.headers.get("Authorization", "")
|
|
51
|
+
if auth_header.lower().startswith("bearer "):
|
|
52
|
+
token = auth_header[7:].strip()
|
|
53
|
+
if token:
|
|
54
|
+
return f"token:{_hash_token(token)}"
|
|
55
|
+
client = request.client
|
|
56
|
+
host = client.host if client else "unknown"
|
|
57
|
+
return f"ip:{host}"
|
|
13
58
|
|
|
14
59
|
|
|
15
60
|
@asynccontextmanager
|
|
@@ -23,6 +68,31 @@ async def lifespan(app: FastAPI):
|
|
|
23
68
|
pass
|
|
24
69
|
|
|
25
70
|
|
|
71
|
+
auth_scheme = HTTPBearer(auto_error=False)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _normalize_api_tokens(raw_tokens: str | None) -> set[str]:
|
|
75
|
+
if not raw_tokens:
|
|
76
|
+
return set()
|
|
77
|
+
return {token.strip() for token in raw_tokens.split(",") if token.strip()}
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def require_api_token(
|
|
81
|
+
credentials: Annotated[HTTPAuthorizationCredentials | None, Security(auth_scheme)],
|
|
82
|
+
settings: Settings = Depends(get_settings),
|
|
83
|
+
) -> str | None:
|
|
84
|
+
tokens = _normalize_api_tokens(settings.api_auth_tokens)
|
|
85
|
+
if not tokens:
|
|
86
|
+
return None
|
|
87
|
+
if credentials is None or credentials.credentials not in tokens:
|
|
88
|
+
raise HTTPException(
|
|
89
|
+
status_code=401,
|
|
90
|
+
detail="Invalid or missing API token",
|
|
91
|
+
headers={"WWW-Authenticate": "Bearer"},
|
|
92
|
+
)
|
|
93
|
+
return credentials.credentials
|
|
94
|
+
|
|
95
|
+
|
|
26
96
|
def create_app() -> FastAPI:
|
|
27
97
|
"""Create and configure the FastAPI application."""
|
|
28
98
|
app = FastAPI(
|
|
@@ -32,10 +102,46 @@ def create_app() -> FastAPI:
|
|
|
32
102
|
lifespan=lifespan,
|
|
33
103
|
)
|
|
34
104
|
|
|
105
|
+
@app.middleware("http")
|
|
106
|
+
async def rate_limit_middleware(request: Request, call_next):
|
|
107
|
+
settings = get_settings()
|
|
108
|
+
if not settings.rate_limit_enabled:
|
|
109
|
+
return await call_next(request)
|
|
110
|
+
if not request.url.path.startswith("/api/"):
|
|
111
|
+
return await call_next(request)
|
|
112
|
+
limit = max(settings.rate_limit_requests, 1)
|
|
113
|
+
window_seconds = max(settings.rate_limit_window_seconds, 1)
|
|
114
|
+
key = _rate_limit_key(request)
|
|
115
|
+
allowed, retry_after, blocked_count = rate_limiter.check(
|
|
116
|
+
key,
|
|
117
|
+
limit,
|
|
118
|
+
window_seconds,
|
|
119
|
+
)
|
|
120
|
+
if not allowed:
|
|
121
|
+
if blocked_count >= settings.rate_limit_block_threshold:
|
|
122
|
+
logger.warning(
|
|
123
|
+
"Rate limit blocked request",
|
|
124
|
+
extra={
|
|
125
|
+
"rate_limit_key": key,
|
|
126
|
+
"blocked_count": blocked_count,
|
|
127
|
+
},
|
|
128
|
+
)
|
|
129
|
+
headers = {"Retry-After": str(retry_after)} if retry_after else None
|
|
130
|
+
return JSONResponse(
|
|
131
|
+
status_code=429,
|
|
132
|
+
content={"detail": "Rate limit exceeded"},
|
|
133
|
+
headers=headers,
|
|
134
|
+
)
|
|
135
|
+
return await call_next(request)
|
|
136
|
+
|
|
35
137
|
settings = get_settings()
|
|
36
138
|
cors_origins = [
|
|
37
139
|
origin.strip() for origin in (settings.cors_origins or "").split(",") if origin.strip()
|
|
38
|
-
]
|
|
140
|
+
]
|
|
141
|
+
if not cors_origins:
|
|
142
|
+
if is_production_profile(settings.evalvault_profile):
|
|
143
|
+
raise RuntimeError("CORS_ORIGINS must be set for production profile.")
|
|
144
|
+
cors_origins = ["http://localhost:5173"]
|
|
39
145
|
|
|
40
146
|
# Configure CORS
|
|
41
147
|
app.add_middleware(
|
|
@@ -48,12 +154,44 @@ def create_app() -> FastAPI:
|
|
|
48
154
|
|
|
49
155
|
from .routers import benchmark, config, domain, knowledge, pipeline, runs
|
|
50
156
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
app.include_router(
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
157
|
+
auth_dependencies = [Depends(require_api_token)]
|
|
158
|
+
|
|
159
|
+
app.include_router(
|
|
160
|
+
runs.router,
|
|
161
|
+
prefix="/api/v1/runs",
|
|
162
|
+
tags=["runs"],
|
|
163
|
+
dependencies=auth_dependencies,
|
|
164
|
+
)
|
|
165
|
+
app.include_router(
|
|
166
|
+
benchmark.router,
|
|
167
|
+
prefix="/api/v1/benchmarks",
|
|
168
|
+
tags=["benchmarks"],
|
|
169
|
+
dependencies=auth_dependencies,
|
|
170
|
+
)
|
|
171
|
+
app.include_router(
|
|
172
|
+
knowledge.router,
|
|
173
|
+
prefix="/api/v1/knowledge",
|
|
174
|
+
tags=["knowledge"],
|
|
175
|
+
dependencies=auth_dependencies,
|
|
176
|
+
)
|
|
177
|
+
app.include_router(
|
|
178
|
+
pipeline.router,
|
|
179
|
+
prefix="/api/v1/pipeline",
|
|
180
|
+
tags=["pipeline"],
|
|
181
|
+
dependencies=auth_dependencies,
|
|
182
|
+
)
|
|
183
|
+
app.include_router(
|
|
184
|
+
domain.router,
|
|
185
|
+
prefix="/api/v1/domain",
|
|
186
|
+
tags=["domain"],
|
|
187
|
+
dependencies=auth_dependencies,
|
|
188
|
+
)
|
|
189
|
+
app.include_router(
|
|
190
|
+
config.router,
|
|
191
|
+
prefix="/api/v1/config",
|
|
192
|
+
tags=["config"],
|
|
193
|
+
dependencies=auth_dependencies,
|
|
194
|
+
)
|
|
57
195
|
|
|
58
196
|
@app.get("/health")
|
|
59
197
|
def health_check():
|
|
@@ -28,6 +28,9 @@ def get_config():
|
|
|
28
28
|
"phoenix_api_token",
|
|
29
29
|
"postgres_password",
|
|
30
30
|
"postgres_connection_string",
|
|
31
|
+
"api_auth_tokens",
|
|
32
|
+
"knowledge_read_tokens",
|
|
33
|
+
"knowledge_write_tokens",
|
|
31
34
|
}
|
|
32
35
|
)
|
|
33
36
|
|
|
@@ -80,7 +83,6 @@ def update_config(
|
|
|
80
83
|
payload: ConfigUpdateRequest,
|
|
81
84
|
adapter: AdapterDep,
|
|
82
85
|
):
|
|
83
|
-
"""Update runtime configuration (non-secret fields only)."""
|
|
84
86
|
updates = payload.model_dump(exclude_unset=True)
|
|
85
87
|
if not updates:
|
|
86
88
|
return get_config()
|
|
@@ -96,6 +98,9 @@ def update_config(
|
|
|
96
98
|
"phoenix_api_token",
|
|
97
99
|
"postgres_password",
|
|
98
100
|
"postgres_connection_string",
|
|
101
|
+
"api_auth_tokens",
|
|
102
|
+
"knowledge_read_tokens",
|
|
103
|
+
"knowledge_write_tokens",
|
|
99
104
|
}
|
|
100
105
|
)
|
|
101
106
|
|
|
@@ -2,10 +2,11 @@ import shutil
|
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
from typing import Any
|
|
4
4
|
|
|
5
|
-
from fastapi import APIRouter, BackgroundTasks, File, HTTPException, UploadFile
|
|
5
|
+
from fastapi import APIRouter, BackgroundTasks, Depends, File, HTTPException, Request, UploadFile
|
|
6
6
|
from pydantic import BaseModel
|
|
7
7
|
|
|
8
8
|
from evalvault.adapters.outbound.kg.parallel_kg_builder import ParallelKGBuilder
|
|
9
|
+
from evalvault.config.settings import Settings, get_settings
|
|
9
10
|
|
|
10
11
|
router = APIRouter(tags=["knowledge"])
|
|
11
12
|
|
|
@@ -18,6 +19,47 @@ KG_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
|
|
18
19
|
KG_JOBS: dict[str, dict[str, Any]] = {}
|
|
19
20
|
|
|
20
21
|
|
|
22
|
+
def _normalize_tokens(raw_tokens: str | None) -> set[str]:
|
|
23
|
+
if not raw_tokens:
|
|
24
|
+
return set()
|
|
25
|
+
return {token.strip() for token in raw_tokens.split(",") if token.strip()}
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _extract_bearer_token(request: Request) -> str | None:
|
|
29
|
+
auth_header = request.headers.get("Authorization", "")
|
|
30
|
+
if not auth_header:
|
|
31
|
+
return None
|
|
32
|
+
prefix = "bearer "
|
|
33
|
+
if auth_header.lower().startswith(prefix):
|
|
34
|
+
return auth_header[len(prefix) :].strip()
|
|
35
|
+
return None
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _require_knowledge_read_token(
|
|
39
|
+
request: Request,
|
|
40
|
+
settings: Settings = Depends(get_settings),
|
|
41
|
+
) -> None:
|
|
42
|
+
read_tokens = _normalize_tokens(settings.knowledge_read_tokens)
|
|
43
|
+
write_tokens = _normalize_tokens(settings.knowledge_write_tokens)
|
|
44
|
+
if not read_tokens and not write_tokens:
|
|
45
|
+
return
|
|
46
|
+
token = _extract_bearer_token(request)
|
|
47
|
+
if token is None or token not in (read_tokens | write_tokens):
|
|
48
|
+
raise HTTPException(status_code=403, detail="Invalid or missing knowledge read token")
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _require_knowledge_write_token(
|
|
52
|
+
request: Request,
|
|
53
|
+
settings: Settings = Depends(get_settings),
|
|
54
|
+
) -> None:
|
|
55
|
+
write_tokens = _normalize_tokens(settings.knowledge_write_tokens)
|
|
56
|
+
if not write_tokens:
|
|
57
|
+
return
|
|
58
|
+
token = _extract_bearer_token(request)
|
|
59
|
+
if token is None or token not in write_tokens:
|
|
60
|
+
raise HTTPException(status_code=403, detail="Invalid or missing knowledge write token")
|
|
61
|
+
|
|
62
|
+
|
|
21
63
|
class BuildKGRequest(BaseModel):
|
|
22
64
|
workers: int = 4
|
|
23
65
|
batch_size: int = 32
|
|
@@ -26,7 +68,10 @@ class BuildKGRequest(BaseModel):
|
|
|
26
68
|
|
|
27
69
|
|
|
28
70
|
@router.post("/upload")
|
|
29
|
-
async def upload_files(
|
|
71
|
+
async def upload_files(
|
|
72
|
+
files: list[UploadFile] = File(...),
|
|
73
|
+
_: None = Depends(_require_knowledge_write_token),
|
|
74
|
+
):
|
|
30
75
|
"""Upload documents for Knowledge Graph building."""
|
|
31
76
|
uploaded = []
|
|
32
77
|
for file in files:
|
|
@@ -40,7 +85,9 @@ async def upload_files(files: list[UploadFile] = File(...)):
|
|
|
40
85
|
|
|
41
86
|
|
|
42
87
|
@router.get("/files")
|
|
43
|
-
def list_files(
|
|
88
|
+
def list_files(
|
|
89
|
+
_: None = Depends(_require_knowledge_read_token),
|
|
90
|
+
):
|
|
44
91
|
"""List uploaded files."""
|
|
45
92
|
files = []
|
|
46
93
|
if DATA_DIR.exists():
|
|
@@ -49,7 +96,11 @@ def list_files():
|
|
|
49
96
|
|
|
50
97
|
|
|
51
98
|
@router.post("/build", status_code=202)
|
|
52
|
-
async def build_knowledge_graph(
|
|
99
|
+
async def build_knowledge_graph(
|
|
100
|
+
request: BuildKGRequest,
|
|
101
|
+
background_tasks: BackgroundTasks,
|
|
102
|
+
_: None = Depends(_require_knowledge_write_token),
|
|
103
|
+
):
|
|
53
104
|
"""Trigger background Knowledge Graph construction."""
|
|
54
105
|
job_id = f"kg_build_{len(KG_JOBS) + 1}"
|
|
55
106
|
KG_JOBS[job_id] = {"status": "pending", "progress": "0%", "details": "Queued"}
|
|
@@ -121,7 +172,10 @@ async def build_knowledge_graph(request: BuildKGRequest, background_tasks: Backg
|
|
|
121
172
|
|
|
122
173
|
|
|
123
174
|
@router.get("/jobs/{job_id}")
|
|
124
|
-
def get_job_status(
|
|
175
|
+
def get_job_status(
|
|
176
|
+
job_id: str,
|
|
177
|
+
_: None = Depends(_require_knowledge_read_token),
|
|
178
|
+
):
|
|
125
179
|
job = KG_JOBS.get(job_id)
|
|
126
180
|
if not job:
|
|
127
181
|
raise HTTPException(status_code=404, detail="Job not found")
|
|
@@ -129,7 +183,9 @@ def get_job_status(job_id: str):
|
|
|
129
183
|
|
|
130
184
|
|
|
131
185
|
@router.get("/stats")
|
|
132
|
-
def get_graph_stats(
|
|
186
|
+
def get_graph_stats(
|
|
187
|
+
_: None = Depends(_require_knowledge_read_token),
|
|
188
|
+
):
|
|
133
189
|
"""Get statistics of the built Knowledge Graph."""
|
|
134
190
|
# Try to load from memory DB or default output JSON
|
|
135
191
|
# For now, we'll try to load the JSON if it exists, or just return empty
|
|
@@ -5,6 +5,7 @@ from __future__ import annotations
|
|
|
5
5
|
import json
|
|
6
6
|
import os
|
|
7
7
|
import subprocess
|
|
8
|
+
import warnings
|
|
8
9
|
from collections.abc import Sequence
|
|
9
10
|
from pathlib import Path
|
|
10
11
|
from typing import Any
|
|
@@ -18,7 +19,9 @@ class ExternalCommandMethod(RagMethodPort):
|
|
|
18
19
|
|
|
19
20
|
name = "external_command"
|
|
20
21
|
version = "0.1.0"
|
|
21
|
-
description =
|
|
22
|
+
description = (
|
|
23
|
+
"Execute a method in a separate process (shell=True requires a trusted command string)."
|
|
24
|
+
)
|
|
22
25
|
tags = ("external", "isolation")
|
|
23
26
|
|
|
24
27
|
def __init__(
|
|
@@ -67,6 +70,7 @@ class ExternalCommandMethod(RagMethodPort):
|
|
|
67
70
|
)
|
|
68
71
|
|
|
69
72
|
command = self._build_command(runtime)
|
|
73
|
+
self._validate_shell_usage(command)
|
|
70
74
|
result = subprocess.run( # noqa: S603 - user-controlled command by design
|
|
71
75
|
command,
|
|
72
76
|
cwd=self._workdir,
|
|
@@ -104,6 +108,23 @@ class ExternalCommandMethod(RagMethodPort):
|
|
|
104
108
|
except KeyError as exc:
|
|
105
109
|
raise ValueError(f"Unknown command placeholder: {exc}") from exc
|
|
106
110
|
|
|
111
|
+
def _validate_shell_usage(self, command: list[str] | str) -> None:
|
|
112
|
+
if not self._shell:
|
|
113
|
+
return
|
|
114
|
+
if not isinstance(command, str):
|
|
115
|
+
raise ValueError(
|
|
116
|
+
"shell=True requires a single command string; list arguments are rejected."
|
|
117
|
+
)
|
|
118
|
+
if not command.strip():
|
|
119
|
+
raise ValueError("shell=True requires a non-empty command string.")
|
|
120
|
+
if "\n" in command or "\r" in command:
|
|
121
|
+
raise ValueError("shell=True command must not contain newlines.")
|
|
122
|
+
warnings.warn(
|
|
123
|
+
"shell=True executes through the system shell. Use only trusted commands.",
|
|
124
|
+
RuntimeWarning,
|
|
125
|
+
stacklevel=2,
|
|
126
|
+
)
|
|
127
|
+
|
|
107
128
|
@staticmethod
|
|
108
129
|
def _load_payload(path: Path) -> Any:
|
|
109
130
|
if not path.exists():
|
|
@@ -4,6 +4,13 @@ from typing import Any
|
|
|
4
4
|
|
|
5
5
|
from langfuse import Langfuse
|
|
6
6
|
|
|
7
|
+
from evalvault.adapters.outbound.tracker.log_sanitizer import (
|
|
8
|
+
MAX_CONTEXT_CHARS,
|
|
9
|
+
MAX_LOG_CHARS,
|
|
10
|
+
sanitize_payload,
|
|
11
|
+
sanitize_text,
|
|
12
|
+
sanitize_text_list,
|
|
13
|
+
)
|
|
7
14
|
from evalvault.config.phoenix_support import extract_phoenix_links
|
|
8
15
|
from evalvault.domain.entities import EvaluationRun
|
|
9
16
|
from evalvault.ports.outbound.tracker_port import TrackerPort
|
|
@@ -88,21 +95,31 @@ class LangfuseAdapter(TrackerPort):
|
|
|
88
95
|
raise ValueError(f"Trace not found: {trace_id}")
|
|
89
96
|
|
|
90
97
|
trace_or_span = self._traces[trace_id]
|
|
98
|
+
safe_input = (
|
|
99
|
+
sanitize_payload(input_data, max_chars=MAX_LOG_CHARS)
|
|
100
|
+
if input_data is not None
|
|
101
|
+
else None
|
|
102
|
+
)
|
|
103
|
+
safe_output = (
|
|
104
|
+
sanitize_payload(output_data, max_chars=MAX_LOG_CHARS)
|
|
105
|
+
if output_data is not None
|
|
106
|
+
else None
|
|
107
|
+
)
|
|
91
108
|
# Support both old and new Langfuse API
|
|
92
109
|
if hasattr(trace_or_span, "start_span"):
|
|
93
110
|
# Langfuse 3.x: create nested span
|
|
94
111
|
child_span = trace_or_span.start_span(
|
|
95
112
|
name=name,
|
|
96
|
-
input=
|
|
97
|
-
output=
|
|
113
|
+
input=safe_input,
|
|
114
|
+
output=safe_output,
|
|
98
115
|
)
|
|
99
116
|
child_span.end()
|
|
100
117
|
else:
|
|
101
118
|
# Langfuse 2.x: use span method on trace
|
|
102
119
|
trace_or_span.span(
|
|
103
120
|
name=name,
|
|
104
|
-
input=
|
|
105
|
-
output=
|
|
121
|
+
input=safe_input,
|
|
122
|
+
output=safe_output,
|
|
106
123
|
)
|
|
107
124
|
|
|
108
125
|
def log_score(
|
|
@@ -377,10 +394,13 @@ class LangfuseAdapter(TrackerPort):
|
|
|
377
394
|
# Span input: test case data (question, answer, contexts, ground_truth)
|
|
378
395
|
span_input = {
|
|
379
396
|
"test_case_id": result.test_case_id,
|
|
380
|
-
"question": result.question,
|
|
381
|
-
"answer": result.answer,
|
|
382
|
-
"contexts":
|
|
383
|
-
|
|
397
|
+
"question": sanitize_text(result.question, max_chars=MAX_LOG_CHARS),
|
|
398
|
+
"answer": sanitize_text(result.answer, max_chars=MAX_LOG_CHARS),
|
|
399
|
+
"contexts": sanitize_text_list(
|
|
400
|
+
result.contexts,
|
|
401
|
+
max_chars=MAX_CONTEXT_CHARS,
|
|
402
|
+
),
|
|
403
|
+
"ground_truth": sanitize_text(result.ground_truth, max_chars=MAX_LOG_CHARS),
|
|
384
404
|
}
|
|
385
405
|
|
|
386
406
|
# Span output: evaluation results
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
MASK_TOKEN = "[REDACTED]"
|
|
7
|
+
MAX_LOG_CHARS = 1000
|
|
8
|
+
MAX_CONTEXT_CHARS = 500
|
|
9
|
+
MAX_LIST_ITEMS = 20
|
|
10
|
+
MAX_PAYLOAD_DEPTH = 2
|
|
11
|
+
|
|
12
|
+
_EMAIL_PATTERN = re.compile(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b")
|
|
13
|
+
_PHONE_PATTERN = re.compile(
|
|
14
|
+
r"\b(?:\+?\d{1,3}[-.\s]?)?(?:\(?\d{2,4}\)?[-.\s]?)?\d{3,4}[-.\s]?\d{4}\b"
|
|
15
|
+
)
|
|
16
|
+
_SSN_PATTERN = re.compile(r"\b\d{3}-\d{2}-\d{4}\b")
|
|
17
|
+
_CARD_PATTERN = re.compile(r"\b(?:\d[ -]*?){13,16}\b")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _mask_pii(text: str) -> str:
|
|
21
|
+
text = _EMAIL_PATTERN.sub(MASK_TOKEN, text)
|
|
22
|
+
text = _PHONE_PATTERN.sub(MASK_TOKEN, text)
|
|
23
|
+
text = _SSN_PATTERN.sub(MASK_TOKEN, text)
|
|
24
|
+
text = _CARD_PATTERN.sub(MASK_TOKEN, text)
|
|
25
|
+
return text
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _truncate(text: str, max_chars: int) -> str:
|
|
29
|
+
if max_chars <= 0:
|
|
30
|
+
return ""
|
|
31
|
+
if len(text) <= max_chars:
|
|
32
|
+
return text
|
|
33
|
+
if max_chars <= 3:
|
|
34
|
+
return text[:max_chars]
|
|
35
|
+
return f"{text[: max_chars - 3]}..."
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def sanitize_text(value: str | None, *, max_chars: int = MAX_LOG_CHARS) -> str | None:
|
|
39
|
+
if value is None:
|
|
40
|
+
return None
|
|
41
|
+
if not isinstance(value, str):
|
|
42
|
+
value = str(value)
|
|
43
|
+
return _truncate(_mask_pii(value), max_chars)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def sanitize_text_list(
|
|
47
|
+
values: list[str] | tuple[str, ...] | None,
|
|
48
|
+
*,
|
|
49
|
+
max_items: int = MAX_LIST_ITEMS,
|
|
50
|
+
max_chars: int = MAX_CONTEXT_CHARS,
|
|
51
|
+
) -> list[str]:
|
|
52
|
+
if not values:
|
|
53
|
+
return []
|
|
54
|
+
trimmed = list(values)[:max_items]
|
|
55
|
+
return [sanitize_text(item, max_chars=max_chars) or "" for item in trimmed]
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def sanitize_payload(
|
|
59
|
+
value: Any,
|
|
60
|
+
*,
|
|
61
|
+
max_chars: int = MAX_LOG_CHARS,
|
|
62
|
+
max_items: int = MAX_LIST_ITEMS,
|
|
63
|
+
max_depth: int = MAX_PAYLOAD_DEPTH,
|
|
64
|
+
) -> Any:
|
|
65
|
+
if value is None:
|
|
66
|
+
return None
|
|
67
|
+
if isinstance(value, str):
|
|
68
|
+
return sanitize_text(value, max_chars=max_chars)
|
|
69
|
+
if isinstance(value, bool | int | float):
|
|
70
|
+
return value
|
|
71
|
+
if max_depth <= 0:
|
|
72
|
+
return sanitize_text(str(value), max_chars=max_chars)
|
|
73
|
+
if isinstance(value, dict):
|
|
74
|
+
return {
|
|
75
|
+
key: sanitize_payload(
|
|
76
|
+
item,
|
|
77
|
+
max_chars=max_chars,
|
|
78
|
+
max_items=max_items,
|
|
79
|
+
max_depth=max_depth - 1,
|
|
80
|
+
)
|
|
81
|
+
for key, item in list(value.items())[:max_items]
|
|
82
|
+
}
|
|
83
|
+
if isinstance(value, list | tuple | set):
|
|
84
|
+
return [
|
|
85
|
+
sanitize_payload(
|
|
86
|
+
item,
|
|
87
|
+
max_chars=max_chars,
|
|
88
|
+
max_items=max_items,
|
|
89
|
+
max_depth=max_depth - 1,
|
|
90
|
+
)
|
|
91
|
+
for item in list(value)[:max_items]
|
|
92
|
+
]
|
|
93
|
+
return sanitize_text(str(value), max_chars=max_chars)
|
|
@@ -4,6 +4,7 @@ import json
|
|
|
4
4
|
import tempfile
|
|
5
5
|
from typing import Any
|
|
6
6
|
|
|
7
|
+
from evalvault.adapters.outbound.tracker.log_sanitizer import MAX_LOG_CHARS, sanitize_payload
|
|
7
8
|
from evalvault.domain.entities import EvaluationRun
|
|
8
9
|
from evalvault.ports.outbound.tracker_port import TrackerPort
|
|
9
10
|
|
|
@@ -85,8 +86,8 @@ class MLflowAdapter(TrackerPort):
|
|
|
85
86
|
# Store span data as JSON artifact
|
|
86
87
|
span_data = {
|
|
87
88
|
"name": name,
|
|
88
|
-
"input": input_data,
|
|
89
|
-
"output": output_data,
|
|
89
|
+
"input": sanitize_payload(input_data, max_chars=MAX_LOG_CHARS),
|
|
90
|
+
"output": sanitize_payload(output_data, max_chars=MAX_LOG_CHARS),
|
|
90
91
|
}
|
|
91
92
|
|
|
92
93
|
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
|
|
@@ -9,6 +9,13 @@ from datetime import datetime
|
|
|
9
9
|
from typing import TYPE_CHECKING, Any
|
|
10
10
|
|
|
11
11
|
from evalvault.adapters.outbound.tracer.open_rag_trace_helpers import serialize_json
|
|
12
|
+
from evalvault.adapters.outbound.tracker.log_sanitizer import (
|
|
13
|
+
MAX_CONTEXT_CHARS,
|
|
14
|
+
MAX_LOG_CHARS,
|
|
15
|
+
sanitize_payload,
|
|
16
|
+
sanitize_text,
|
|
17
|
+
sanitize_text_list,
|
|
18
|
+
)
|
|
12
19
|
from evalvault.domain.entities import (
|
|
13
20
|
EvaluationRun,
|
|
14
21
|
GenerationData,
|
|
@@ -171,9 +178,11 @@ class PhoenixAdapter(TrackerPort):
|
|
|
171
178
|
|
|
172
179
|
with self._tracer.start_span(name, context=context) as span:
|
|
173
180
|
if input_data is not None:
|
|
174
|
-
|
|
181
|
+
safe_input = sanitize_payload(input_data, max_chars=MAX_LOG_CHARS)
|
|
182
|
+
span.set_attribute("input", json.dumps(safe_input, default=str))
|
|
175
183
|
if output_data is not None:
|
|
176
|
-
|
|
184
|
+
safe_output = sanitize_payload(output_data, max_chars=MAX_LOG_CHARS)
|
|
185
|
+
span.set_attribute("output", json.dumps(safe_output, default=str))
|
|
177
186
|
|
|
178
187
|
def log_score(
|
|
179
188
|
self,
|
|
@@ -368,12 +377,20 @@ class PhoenixAdapter(TrackerPort):
|
|
|
368
377
|
context=context,
|
|
369
378
|
) as span:
|
|
370
379
|
# Input data
|
|
371
|
-
|
|
372
|
-
|
|
380
|
+
safe_question = sanitize_text(result.question, max_chars=MAX_LOG_CHARS) or ""
|
|
381
|
+
safe_answer = sanitize_text(result.answer, max_chars=MAX_LOG_CHARS) or ""
|
|
382
|
+
span.set_attribute("input.question", safe_question)
|
|
383
|
+
span.set_attribute("input.answer", safe_answer)
|
|
373
384
|
if result.contexts:
|
|
374
|
-
|
|
385
|
+
safe_contexts = sanitize_text_list(
|
|
386
|
+
result.contexts,
|
|
387
|
+
max_chars=MAX_CONTEXT_CHARS,
|
|
388
|
+
)
|
|
389
|
+
span.set_attribute("input.contexts", json.dumps(safe_contexts))
|
|
375
390
|
if result.ground_truth:
|
|
376
|
-
|
|
391
|
+
safe_ground_truth = sanitize_text(result.ground_truth, max_chars=MAX_LOG_CHARS)
|
|
392
|
+
if safe_ground_truth:
|
|
393
|
+
span.set_attribute("input.ground_truth", safe_ground_truth)
|
|
377
394
|
|
|
378
395
|
# Metrics
|
|
379
396
|
span.set_attribute("output.all_passed", result.all_passed)
|
|
@@ -468,8 +485,10 @@ class PhoenixAdapter(TrackerPort):
|
|
|
468
485
|
|
|
469
486
|
# Set query
|
|
470
487
|
if data.query:
|
|
471
|
-
|
|
472
|
-
|
|
488
|
+
safe_query = sanitize_text(data.query, max_chars=MAX_LOG_CHARS)
|
|
489
|
+
if safe_query:
|
|
490
|
+
span.set_attribute("retrieval.query", safe_query)
|
|
491
|
+
span.set_attribute("input.value", safe_query)
|
|
473
492
|
|
|
474
493
|
span.set_attribute("spec.version", "0.1")
|
|
475
494
|
span.set_attribute("rag.module", "retrieve")
|
|
@@ -495,11 +514,14 @@ class PhoenixAdapter(TrackerPort):
|
|
|
495
514
|
event_attrs["doc.rerank_rank"] = doc.rerank_rank
|
|
496
515
|
if doc.chunk_id:
|
|
497
516
|
event_attrs["doc.chunk_id"] = doc.chunk_id
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
517
|
+
safe_preview = (
|
|
518
|
+
sanitize_text(doc.content, max_chars=MAX_CONTEXT_CHARS) if doc.content else ""
|
|
519
|
+
)
|
|
520
|
+
if safe_preview:
|
|
521
|
+
event_attrs["doc.preview"] = safe_preview
|
|
501
522
|
if doc.metadata:
|
|
502
|
-
|
|
523
|
+
safe_metadata = sanitize_payload(doc.metadata, max_chars=MAX_LOG_CHARS)
|
|
524
|
+
event_attrs["doc.metadata"] = json.dumps(safe_metadata, default=str)
|
|
503
525
|
span.add_event(f"retrieved_doc_{i}", attributes=event_attrs)
|
|
504
526
|
|
|
505
527
|
def log_generation(
|
|
@@ -544,9 +566,8 @@ class PhoenixAdapter(TrackerPort):
|
|
|
544
566
|
span.set_attribute(key, value)
|
|
545
567
|
|
|
546
568
|
# Set prompt/response (truncate if too long)
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
response = data.response[:max_len] if data.response else ""
|
|
569
|
+
prompt = sanitize_text(data.prompt, max_chars=MAX_LOG_CHARS) or ""
|
|
570
|
+
response = sanitize_text(data.response, max_chars=MAX_LOG_CHARS) or ""
|
|
550
571
|
if prompt:
|
|
551
572
|
span.set_attribute("generation.prompt", prompt)
|
|
552
573
|
span.set_attribute("input.value", prompt)
|
|
@@ -559,24 +580,28 @@ class PhoenixAdapter(TrackerPort):
|
|
|
559
580
|
|
|
560
581
|
# Set prompt template if available
|
|
561
582
|
if data.prompt_template:
|
|
562
|
-
|
|
583
|
+
safe_template = sanitize_text(data.prompt_template, max_chars=MAX_LOG_CHARS)
|
|
584
|
+
if safe_template:
|
|
585
|
+
span.set_attribute("generation.prompt_template", safe_template)
|
|
563
586
|
|
|
564
587
|
def log_rag_trace(self, data: RAGTraceData) -> str:
|
|
565
588
|
"""Log a full RAG trace (retrieval + generation) to Phoenix."""
|
|
566
589
|
|
|
567
590
|
self._ensure_initialized()
|
|
568
591
|
metadata = {"event_type": "rag_trace", "total_time_ms": data.total_time_ms}
|
|
569
|
-
|
|
570
|
-
|
|
592
|
+
safe_query = sanitize_text(data.query, max_chars=MAX_LOG_CHARS)
|
|
593
|
+
if safe_query:
|
|
594
|
+
metadata["query"] = safe_query
|
|
571
595
|
if data.metadata:
|
|
572
|
-
|
|
596
|
+
safe_metadata = sanitize_payload(data.metadata, max_chars=MAX_LOG_CHARS)
|
|
597
|
+
metadata.update(safe_metadata)
|
|
573
598
|
|
|
574
599
|
should_end = False
|
|
575
600
|
trace_id = data.trace_id
|
|
576
601
|
if trace_id and trace_id in self._active_spans:
|
|
577
602
|
span = self._active_spans[trace_id]
|
|
578
603
|
else:
|
|
579
|
-
trace_name = f"rag-trace-{(
|
|
604
|
+
trace_name = f"rag-trace-{(safe_query or 'run')[:12]}"
|
|
580
605
|
trace_id = self.start_trace(trace_name, metadata=metadata)
|
|
581
606
|
span = self._active_spans[trace_id]
|
|
582
607
|
should_end = True
|
|
@@ -589,12 +614,13 @@ class PhoenixAdapter(TrackerPort):
|
|
|
589
614
|
if data.generation:
|
|
590
615
|
self.log_generation(trace_id, data.generation)
|
|
591
616
|
if data.final_answer:
|
|
592
|
-
preview = data.final_answer
|
|
593
|
-
|
|
594
|
-
|
|
617
|
+
preview = sanitize_text(data.final_answer, max_chars=MAX_LOG_CHARS)
|
|
618
|
+
if preview:
|
|
619
|
+
span.set_attribute("rag.final_answer", preview)
|
|
620
|
+
span.set_attribute("output.value", preview)
|
|
595
621
|
|
|
596
|
-
if
|
|
597
|
-
span.set_attribute("input.value",
|
|
622
|
+
if safe_query:
|
|
623
|
+
span.set_attribute("input.value", safe_query)
|
|
598
624
|
|
|
599
625
|
span.set_attribute("spec.version", "0.1")
|
|
600
626
|
span.set_attribute("rag.module", "custom.pipeline")
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import base64
|
|
4
|
+
import os
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import Protocol
|
|
7
|
+
|
|
8
|
+
SECRET_REF_PREFIX = "secret://"
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class SecretProvider(Protocol):
|
|
12
|
+
def get_secret(self, name: str) -> str: ...
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class SecretProviderError(RuntimeError):
|
|
16
|
+
pass
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class EnvSecretProvider:
|
|
21
|
+
def get_secret(self, name: str) -> str:
|
|
22
|
+
value = os.environ.get(name)
|
|
23
|
+
if value is None:
|
|
24
|
+
raise SecretProviderError(f"Missing secret in environment: {name}")
|
|
25
|
+
return value
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass
|
|
29
|
+
class AwsSecretsManagerProvider:
|
|
30
|
+
region_name: str | None = None
|
|
31
|
+
|
|
32
|
+
def get_secret(self, name: str) -> str:
|
|
33
|
+
try:
|
|
34
|
+
import boto3 # type: ignore
|
|
35
|
+
except ImportError as exc:
|
|
36
|
+
raise SecretProviderError("boto3 is required for AWS Secrets Manager") from exc
|
|
37
|
+
client = boto3.client("secretsmanager", region_name=self.region_name)
|
|
38
|
+
response = client.get_secret_value(SecretId=name)
|
|
39
|
+
if "SecretString" in response and response["SecretString"] is not None:
|
|
40
|
+
return response["SecretString"]
|
|
41
|
+
secret_binary = response.get("SecretBinary")
|
|
42
|
+
if secret_binary is None:
|
|
43
|
+
raise SecretProviderError("Empty secret value returned from AWS Secrets Manager")
|
|
44
|
+
return base64.b64decode(secret_binary).decode("utf-8")
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@dataclass
|
|
48
|
+
class GcpSecretManagerProvider:
|
|
49
|
+
def get_secret(self, name: str) -> str:
|
|
50
|
+
try:
|
|
51
|
+
from google.cloud import secretmanager # type: ignore
|
|
52
|
+
except ImportError as exc:
|
|
53
|
+
raise SecretProviderError(
|
|
54
|
+
"google-cloud-secret-manager is required for GCP Secret Manager"
|
|
55
|
+
) from exc
|
|
56
|
+
client = secretmanager.SecretManagerServiceClient()
|
|
57
|
+
response = client.access_secret_version(request={"name": name})
|
|
58
|
+
return response.payload.data.decode("utf-8")
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
@dataclass
|
|
62
|
+
class VaultSecretProvider:
|
|
63
|
+
def get_secret(self, name: str) -> str:
|
|
64
|
+
try:
|
|
65
|
+
import hvac # type: ignore
|
|
66
|
+
except ImportError as exc:
|
|
67
|
+
raise SecretProviderError("hvac is required for Vault secret access") from exc
|
|
68
|
+
client = hvac.Client()
|
|
69
|
+
if not client.is_authenticated():
|
|
70
|
+
raise SecretProviderError("Vault client authentication failed")
|
|
71
|
+
response = client.secrets.kv.v2.read_secret_version(path=name)
|
|
72
|
+
data = response.get("data", {}).get("data", {})
|
|
73
|
+
if not data:
|
|
74
|
+
raise SecretProviderError("Vault secret payload is empty")
|
|
75
|
+
if "value" in data:
|
|
76
|
+
return str(data["value"])
|
|
77
|
+
if len(data) == 1:
|
|
78
|
+
return str(next(iter(data.values())))
|
|
79
|
+
raise SecretProviderError("Vault secret has multiple keys; specify 'value' key")
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def is_secret_reference(value: str | None) -> bool:
|
|
83
|
+
return bool(value) and value.startswith(SECRET_REF_PREFIX)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def parse_secret_reference(value: str) -> str:
|
|
87
|
+
return value.removeprefix(SECRET_REF_PREFIX).strip()
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def build_secret_provider(provider_name: str | None) -> SecretProvider:
|
|
91
|
+
provider = (provider_name or "").strip().lower()
|
|
92
|
+
if not provider:
|
|
93
|
+
raise SecretProviderError("Secret provider is not configured.")
|
|
94
|
+
if provider == "env":
|
|
95
|
+
return EnvSecretProvider()
|
|
96
|
+
if provider in {"aws", "aws-secrets-manager", "secretsmanager"}:
|
|
97
|
+
return AwsSecretsManagerProvider(region_name=os.environ.get("AWS_REGION"))
|
|
98
|
+
if provider in {"gcp", "gcp-secret-manager", "secretmanager"}:
|
|
99
|
+
return GcpSecretManagerProvider()
|
|
100
|
+
if provider in {"vault", "hashicorp-vault"}:
|
|
101
|
+
return VaultSecretProvider()
|
|
102
|
+
raise SecretProviderError(f"Unknown secret provider: {provider_name}")
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def resolve_secret_reference(
|
|
106
|
+
value: str,
|
|
107
|
+
provider: SecretProvider,
|
|
108
|
+
cache: dict[str, str] | None = None,
|
|
109
|
+
) -> str:
|
|
110
|
+
secret_name = parse_secret_reference(value)
|
|
111
|
+
if not secret_name:
|
|
112
|
+
raise SecretProviderError("Secret reference must include a name.")
|
|
113
|
+
if cache is not None and secret_name in cache:
|
|
114
|
+
return cache[secret_name]
|
|
115
|
+
secret_value = provider.get_secret(secret_name)
|
|
116
|
+
if cache is not None:
|
|
117
|
+
cache[secret_name] = secret_value
|
|
118
|
+
return secret_value
|
evalvault/config/settings.py
CHANGED
|
@@ -3,9 +3,16 @@
|
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
from typing import Any
|
|
5
5
|
|
|
6
|
-
from pydantic import Field
|
|
6
|
+
from pydantic import Field, PrivateAttr
|
|
7
7
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
8
8
|
|
|
9
|
+
from evalvault.config.secret_manager import (
|
|
10
|
+
SecretProviderError,
|
|
11
|
+
build_secret_provider,
|
|
12
|
+
is_secret_reference,
|
|
13
|
+
resolve_secret_reference,
|
|
14
|
+
)
|
|
15
|
+
|
|
9
16
|
|
|
10
17
|
def _detect_repo_root(start: Path, max_depth: int = 6) -> Path | None:
|
|
11
18
|
current = start
|
|
@@ -38,6 +45,75 @@ def _ensure_http_scheme(url_value: str) -> str:
|
|
|
38
45
|
return f"http://{value}"
|
|
39
46
|
|
|
40
47
|
|
|
48
|
+
def is_production_profile(profile_name: str | None) -> bool:
|
|
49
|
+
return (profile_name or "").strip().lower() == "prod"
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _parse_cors_origins(cors_origins: str | None) -> list[str]:
|
|
53
|
+
if not cors_origins:
|
|
54
|
+
return []
|
|
55
|
+
return [origin.strip() for origin in cors_origins.split(",") if origin.strip()]
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
SECRET_REFERENCE_FIELDS = (
|
|
59
|
+
"api_auth_tokens",
|
|
60
|
+
"knowledge_read_tokens",
|
|
61
|
+
"knowledge_write_tokens",
|
|
62
|
+
"openai_api_key",
|
|
63
|
+
"anthropic_api_key",
|
|
64
|
+
"azure_api_key",
|
|
65
|
+
"vllm_api_key",
|
|
66
|
+
"langfuse_public_key",
|
|
67
|
+
"langfuse_secret_key",
|
|
68
|
+
"phoenix_api_token",
|
|
69
|
+
"postgres_password",
|
|
70
|
+
"postgres_connection_string",
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _validate_production_settings(settings: "Settings") -> None:
|
|
75
|
+
if not is_production_profile(settings.evalvault_profile):
|
|
76
|
+
return
|
|
77
|
+
|
|
78
|
+
missing: list[str] = []
|
|
79
|
+
|
|
80
|
+
if not settings.api_auth_tokens:
|
|
81
|
+
missing.append("API_AUTH_TOKENS")
|
|
82
|
+
|
|
83
|
+
if settings.llm_provider == "openai" and not settings.openai_api_key:
|
|
84
|
+
missing.append("OPENAI_API_KEY")
|
|
85
|
+
|
|
86
|
+
if settings.tracker_provider == "langfuse":
|
|
87
|
+
if not settings.langfuse_public_key:
|
|
88
|
+
missing.append("LANGFUSE_PUBLIC_KEY")
|
|
89
|
+
if not settings.langfuse_secret_key:
|
|
90
|
+
missing.append("LANGFUSE_SECRET_KEY")
|
|
91
|
+
|
|
92
|
+
if settings.tracker_provider == "mlflow" and not settings.mlflow_tracking_uri:
|
|
93
|
+
missing.append("MLFLOW_TRACKING_URI")
|
|
94
|
+
|
|
95
|
+
if (
|
|
96
|
+
settings.postgres_connection_string is None
|
|
97
|
+
and settings.postgres_host
|
|
98
|
+
and not settings.postgres_password
|
|
99
|
+
):
|
|
100
|
+
missing.append("POSTGRES_PASSWORD")
|
|
101
|
+
|
|
102
|
+
cors_origins = _parse_cors_origins(settings.cors_origins)
|
|
103
|
+
if not cors_origins:
|
|
104
|
+
missing.append("CORS_ORIGINS")
|
|
105
|
+
else:
|
|
106
|
+
localhost_origins = {"localhost", "127.0.0.1"}
|
|
107
|
+
for origin in cors_origins:
|
|
108
|
+
if any(host in origin for host in localhost_origins):
|
|
109
|
+
raise ValueError("Production profile forbids localhost in CORS_ORIGINS.")
|
|
110
|
+
|
|
111
|
+
if missing:
|
|
112
|
+
raise ValueError(
|
|
113
|
+
"Missing required settings for prod profile: " + ", ".join(sorted(set(missing)))
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
|
|
41
117
|
class Settings(BaseSettings):
|
|
42
118
|
"""Application configuration settings."""
|
|
43
119
|
|
|
@@ -48,6 +124,8 @@ class Settings(BaseSettings):
|
|
|
48
124
|
extra="ignore",
|
|
49
125
|
)
|
|
50
126
|
|
|
127
|
+
_secret_cache: dict[str, str] = PrivateAttr(default_factory=dict)
|
|
128
|
+
|
|
51
129
|
# Profile Configuration (YAML 기반 모델 프로필)
|
|
52
130
|
evalvault_profile: str | None = Field(
|
|
53
131
|
default=None,
|
|
@@ -58,6 +136,45 @@ class Settings(BaseSettings):
|
|
|
58
136
|
default="http://localhost:5173,http://127.0.0.1:5173",
|
|
59
137
|
description="Comma-separated list of allowed CORS origins.",
|
|
60
138
|
)
|
|
139
|
+
secret_provider: str | None = Field(
|
|
140
|
+
default=None,
|
|
141
|
+
description="Secret provider name for secret:// references (env/aws/gcp/vault).",
|
|
142
|
+
)
|
|
143
|
+
secret_cache_enabled: bool = Field(
|
|
144
|
+
default=True,
|
|
145
|
+
description="Cache resolved secret references in memory.",
|
|
146
|
+
)
|
|
147
|
+
api_auth_tokens: str | None = Field(
|
|
148
|
+
default=None,
|
|
149
|
+
description=(
|
|
150
|
+
"Comma-separated list of API bearer tokens for FastAPI auth. "
|
|
151
|
+
"Leave empty to disable authentication."
|
|
152
|
+
),
|
|
153
|
+
)
|
|
154
|
+
knowledge_read_tokens: str | None = Field(
|
|
155
|
+
default=None,
|
|
156
|
+
description="Comma-separated read tokens for knowledge endpoints.",
|
|
157
|
+
)
|
|
158
|
+
knowledge_write_tokens: str | None = Field(
|
|
159
|
+
default=None,
|
|
160
|
+
description="Comma-separated write tokens for knowledge endpoints.",
|
|
161
|
+
)
|
|
162
|
+
rate_limit_enabled: bool = Field(
|
|
163
|
+
default=False,
|
|
164
|
+
description="Enable API rate limiting for /api routes.",
|
|
165
|
+
)
|
|
166
|
+
rate_limit_requests: int = Field(
|
|
167
|
+
default=120,
|
|
168
|
+
description="Max requests allowed within rate_limit_window_seconds.",
|
|
169
|
+
)
|
|
170
|
+
rate_limit_window_seconds: int = Field(
|
|
171
|
+
default=60,
|
|
172
|
+
description="Window size for rate limit checks in seconds.",
|
|
173
|
+
)
|
|
174
|
+
rate_limit_block_threshold: int = Field(
|
|
175
|
+
default=10,
|
|
176
|
+
description="Log suspicious activity after this many rate limit blocks.",
|
|
177
|
+
)
|
|
61
178
|
evalvault_db_path: str = Field(
|
|
62
179
|
default="data/db/evalvault.db",
|
|
63
180
|
description="SQLite database path for API/CLI storage.",
|
|
@@ -71,6 +188,26 @@ class Settings(BaseSettings):
|
|
|
71
188
|
self.evalvault_db_path = _resolve_storage_path(self.evalvault_db_path)
|
|
72
189
|
self.evalvault_memory_db_path = _resolve_storage_path(self.evalvault_memory_db_path)
|
|
73
190
|
self.ollama_base_url = _ensure_http_scheme(self.ollama_base_url)
|
|
191
|
+
self._resolve_secret_references()
|
|
192
|
+
|
|
193
|
+
def _resolve_secret_references(self) -> None:
|
|
194
|
+
secret_values = [
|
|
195
|
+
value
|
|
196
|
+
for value in (getattr(self, field, None) for field in SECRET_REFERENCE_FIELDS)
|
|
197
|
+
if isinstance(value, str)
|
|
198
|
+
]
|
|
199
|
+
if not any(is_secret_reference(value) for value in secret_values):
|
|
200
|
+
return
|
|
201
|
+
try:
|
|
202
|
+
provider = build_secret_provider(self.secret_provider)
|
|
203
|
+
except SecretProviderError as exc:
|
|
204
|
+
raise ValueError(str(exc)) from exc
|
|
205
|
+
cache = self._secret_cache if self.secret_cache_enabled else None
|
|
206
|
+
for field in SECRET_REFERENCE_FIELDS:
|
|
207
|
+
value = getattr(self, field, None)
|
|
208
|
+
if isinstance(value, str) and is_secret_reference(value):
|
|
209
|
+
resolved = resolve_secret_reference(value, provider, cache)
|
|
210
|
+
setattr(self, field, resolved)
|
|
74
211
|
|
|
75
212
|
# LLM Provider Selection
|
|
76
213
|
llm_provider: str = Field(
|
|
@@ -314,6 +451,8 @@ def get_settings() -> Settings:
|
|
|
314
451
|
if _settings.evalvault_profile:
|
|
315
452
|
_settings = apply_profile(_settings, _settings.evalvault_profile)
|
|
316
453
|
|
|
454
|
+
_validate_production_settings(_settings)
|
|
455
|
+
|
|
317
456
|
return _settings
|
|
318
457
|
|
|
319
458
|
|
|
@@ -346,6 +485,7 @@ def apply_runtime_overrides(overrides: dict[str, object]) -> Settings:
|
|
|
346
485
|
updated = Settings.model_validate(payload)
|
|
347
486
|
if updated.evalvault_profile:
|
|
348
487
|
updated = apply_profile(updated, updated.evalvault_profile)
|
|
488
|
+
_validate_production_settings(updated)
|
|
349
489
|
for key, value in updated.model_dump().items():
|
|
350
490
|
setattr(settings, key, value)
|
|
351
491
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: evalvault
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.64.0
|
|
4
4
|
Summary: RAG evaluation system using Ragas with Phoenix/Langfuse tracing
|
|
5
5
|
Project-URL: Homepage, https://github.com/ntts9990/EvalVault
|
|
6
6
|
Project-URL: Documentation, https://github.com/ntts9990/EvalVault#readme
|
|
@@ -111,6 +111,10 @@ Requires-Dist: opentelemetry-exporter-otlp>=1.20.0; extra == 'phoenix'
|
|
|
111
111
|
Requires-Dist: opentelemetry-sdk>=1.20.0; extra == 'phoenix'
|
|
112
112
|
Provides-Extra: postgres
|
|
113
113
|
Requires-Dist: psycopg[binary]>=3.0.0; extra == 'postgres'
|
|
114
|
+
Provides-Extra: secrets
|
|
115
|
+
Requires-Dist: boto3; extra == 'secrets'
|
|
116
|
+
Requires-Dist: google-cloud-secret-manager; extra == 'secrets'
|
|
117
|
+
Requires-Dist: hvac; extra == 'secrets'
|
|
114
118
|
Provides-Extra: timeseries
|
|
115
119
|
Requires-Dist: aeon>=1.3.0; extra == 'timeseries'
|
|
116
120
|
Requires-Dist: numba>=0.55.0; extra == 'timeseries'
|
|
@@ -175,6 +179,9 @@ uv run evalvault run --mode simple tests/fixtures/e2e/insurance_qa_korean.json \
|
|
|
175
179
|
--auto-analyze
|
|
176
180
|
```
|
|
177
181
|
|
|
182
|
+
- API 인증을 쓰려면 `.env`에 `API_AUTH_TOKENS`를 설정하세요.
|
|
183
|
+
- `secret://` 참조를 쓰면 `SECRET_PROVIDER`와 `--extra secrets`가 필요합니다.
|
|
184
|
+
- 레이트리밋은 `RATE_LIMIT_ENABLED`로 활성화합니다.
|
|
178
185
|
- 결과는 기본 DB(`data/db/evalvault.db`)에 저장되어 `history`, Web UI, 비교 분석에서 재사용됩니다.
|
|
179
186
|
- `--db`를 생략해도 기본 경로로 저장되며, 모든 데이터가 자동으로 엑셀로 내보내집니다.
|
|
180
187
|
- `--auto-analyze`는 요약 리포트 + 모듈별 아티팩트를 함께 생성합니다.
|
|
@@ -6,12 +6,12 @@ evalvault/adapters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuF
|
|
|
6
6
|
evalvault/adapters/inbound/__init__.py,sha256=SG1svel1PwqetnqVpKFLSv612_WwGwLTbFpYgwk6FMw,166
|
|
7
7
|
evalvault/adapters/inbound/api/__init__.py,sha256=LeVVttCA3tLKoHA2PO4z3y8VkfVcf3Bq8CZSzo91lf4,34
|
|
8
8
|
evalvault/adapters/inbound/api/adapter.py,sha256=tYkJciUUFOK80QcSwzrqkXP1G4qUFItFV7uBYbjBGqU,68473
|
|
9
|
-
evalvault/adapters/inbound/api/main.py,sha256=
|
|
9
|
+
evalvault/adapters/inbound/api/main.py,sha256=lRuyg3aBs5jIk7tq4p4d7jrRkFpV_brZypoOq8s56Rk,6896
|
|
10
10
|
evalvault/adapters/inbound/api/routers/__init__.py,sha256=q07_YF9TnBl68bqcRCvhPU4-zRTyvmPoHVehwO6W7QM,19
|
|
11
11
|
evalvault/adapters/inbound/api/routers/benchmark.py,sha256=yevntbZcNtMvbVODsITUBgR1Ka4pdFQrXBJJ4K4Jyr4,4477
|
|
12
|
-
evalvault/adapters/inbound/api/routers/config.py,sha256=
|
|
12
|
+
evalvault/adapters/inbound/api/routers/config.py,sha256=LygN0fVMr8NFtj5zuQXnVFhoafx56Txa98vpwtPa4Jc,4104
|
|
13
13
|
evalvault/adapters/inbound/api/routers/domain.py,sha256=RsR7GIFMjccDN7vpG1uDyk9n1DnCTH18JDGAX7o4Qqc,3648
|
|
14
|
-
evalvault/adapters/inbound/api/routers/knowledge.py,sha256=
|
|
14
|
+
evalvault/adapters/inbound/api/routers/knowledge.py,sha256=yb_e7OEPtwldOAzHTGiWe7jShHw2JdpOFnzGPMceRsg,7109
|
|
15
15
|
evalvault/adapters/inbound/api/routers/pipeline.py,sha256=8UgQzNFHcuqS61s69mOrPee4OMwfxVdvRWHJ2_qYBF0,17175
|
|
16
16
|
evalvault/adapters/inbound/api/routers/runs.py,sha256=rydOvwWk24QIYafu3XYS3oL_VVCE_jHDmjADhA19T1s,40059
|
|
17
17
|
evalvault/adapters/inbound/cli/__init__.py,sha256=a42flC5NK-VfbdbBrE49IrUL5zAyKdXZYJVM6E3NTE0,675
|
|
@@ -148,7 +148,7 @@ evalvault/adapters/outbound/llm/token_aware_chat.py,sha256=yYmynaniNrYxtvXL6ejTE
|
|
|
148
148
|
evalvault/adapters/outbound/llm/vllm_adapter.py,sha256=OKb3Nda9OLMmHdvLjvkeJcQVeXf-B8TDibmAs7PS7kg,5157
|
|
149
149
|
evalvault/adapters/outbound/methods/__init__.py,sha256=3vyE9w3Ex2oMaO4ZE7Fy6xlHhJ6YQXHQNCvBiW9X2lM,345
|
|
150
150
|
evalvault/adapters/outbound/methods/baseline_oracle.py,sha256=oUsF5sIiPY5vuDtrz0Ki05SnPlnVzn7APERP5v1KpPM,1308
|
|
151
|
-
evalvault/adapters/outbound/methods/external_command.py,sha256=
|
|
151
|
+
evalvault/adapters/outbound/methods/external_command.py,sha256=hsWaqMG0u2JhsS736n0t8sobrGSJMNNp1tUL_M4zgyg,6118
|
|
152
152
|
evalvault/adapters/outbound/methods/registry.py,sha256=Znd35eouoe8k2E0NfDpVlDBSNAAWmyQkqBhAwVWllGI,7635
|
|
153
153
|
evalvault/adapters/outbound/nlp/__init__.py,sha256=9MQMIjEUU03T0ZZtG-Wjz0Bt2-esGEcfv1kT9W6_CBY,40
|
|
154
154
|
evalvault/adapters/outbound/nlp/korean/__init__.py,sha256=3ZVFHDxS6jzXat-WhTvW3hnbGNaeFhhWVVN1TtEOlnE,2267
|
|
@@ -180,9 +180,10 @@ evalvault/adapters/outbound/tracer/open_rag_trace_decorators.py,sha256=LFnk-3FSL
|
|
|
180
180
|
evalvault/adapters/outbound/tracer/open_rag_trace_helpers.py,sha256=D48Mbj-ioDKztjhV9513Q5DiUNiVdO60B_2sWMFEmnI,3520
|
|
181
181
|
evalvault/adapters/outbound/tracer/phoenix_tracer_adapter.py,sha256=inmTAolAVsm0IrszE9VTJoI7HSvGGAnGNZVu_vZRAGg,741
|
|
182
182
|
evalvault/adapters/outbound/tracker/__init__.py,sha256=Suu5BznOK5uTuD5_jS8JMZd8RPfQNlddLxHCBvMTm_4,358
|
|
183
|
-
evalvault/adapters/outbound/tracker/langfuse_adapter.py,sha256=
|
|
184
|
-
evalvault/adapters/outbound/tracker/
|
|
185
|
-
evalvault/adapters/outbound/tracker/
|
|
183
|
+
evalvault/adapters/outbound/tracker/langfuse_adapter.py,sha256=HmuMVUfDYjqNqHZGZMRybhrgca_EmeENuX7DfP-L5Fg,18504
|
|
184
|
+
evalvault/adapters/outbound/tracker/log_sanitizer.py,sha256=ilKTTSzsHslQYc-elnWu0Z3HKNNw1D1iI0_cCvYbo1M,2653
|
|
185
|
+
evalvault/adapters/outbound/tracker/mlflow_adapter.py,sha256=m4xj3XBULFYg27U3twKrldLhbLyLNefezmb2pCpHJrw,7180
|
|
186
|
+
evalvault/adapters/outbound/tracker/phoenix_adapter.py,sha256=sz5TyWC67e3YbQd2y-ogU9_66rilLdf8TbC-7bN_JR0,24316
|
|
186
187
|
evalvault/config/__init__.py,sha256=UCgeDx62M2gOuFvdN29wWwny2fdH4bPY_uUC3-42eDw,1297
|
|
187
188
|
evalvault/config/agent_types.py,sha256=EP2Pv3ZtOzDXIvIa-Hnd1to9JIbMUtGitrlwzZtx0Ys,13418
|
|
188
189
|
evalvault/config/domain_config.py,sha256=rOgNA2T8NWlDzcEFC0shdUCCww0lI1E5fUm5QrKQSZI,9264
|
|
@@ -190,7 +191,8 @@ evalvault/config/instrumentation.py,sha256=L8on9HjB6Ji8cSOJ6Pepsopfg9okDNMWF7LKZ
|
|
|
190
191
|
evalvault/config/langfuse_support.py,sha256=DEzVMfMGGf1V45W_2oUG-NCDfsYI4UUdnYJIgBSrN2o,582
|
|
191
192
|
evalvault/config/model_config.py,sha256=KlzDbGyDLeOGE7ElekFFk5YjjT5u8i6KO2B4EyZkLnI,3542
|
|
192
193
|
evalvault/config/phoenix_support.py,sha256=e6RPWd6Qb7KU6Q8pLaYTpJGWULtvEEU6B0xHWyVyOH0,13604
|
|
193
|
-
evalvault/config/
|
|
194
|
+
evalvault/config/secret_manager.py,sha256=YjPMuNqeBrAR2BzCJvsBNUExaU4TBSFyZ8kVYZZifqA,4172
|
|
195
|
+
evalvault/config/settings.py,sha256=JKJf8t20sOHYnHoCfTxqupQixNgfmWYJhChiGMNz-W0,17617
|
|
194
196
|
evalvault/config/playbooks/improvement_playbook.yaml,sha256=9F9WVVCydFfz6zUuGYzZ4PKdW1LLtcBKVF36T7xT764,26965
|
|
195
197
|
evalvault/domain/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
196
198
|
evalvault/domain/entities/__init__.py,sha256=RZi_6oQcq-2-sJcydfKOSr03vFxo-mF7CGHN9Ma4Cdg,3379
|
|
@@ -304,8 +306,8 @@ evalvault/reports/__init__.py,sha256=Bb1X4871msAN8I6PM6nKGED3psPwZt88hXZBAOdH06Y
|
|
|
304
306
|
evalvault/reports/release_notes.py,sha256=pZj0PBFT-4F_Ty-Kv5P69BuoOnmTCn4kznDcORFJd0w,4011
|
|
305
307
|
evalvault/scripts/__init__.py,sha256=NwEeIFQbkX4ml2R_PhtIoNtArDSX_suuoymgG_7Kwso,89
|
|
306
308
|
evalvault/scripts/regression_runner.py,sha256=SxZori5BZ8jVQ057Mf5V5FPgIVDccrV5oRONmnhuk8w,8438
|
|
307
|
-
evalvault-1.
|
|
308
|
-
evalvault-1.
|
|
309
|
-
evalvault-1.
|
|
310
|
-
evalvault-1.
|
|
311
|
-
evalvault-1.
|
|
309
|
+
evalvault-1.64.0.dist-info/METADATA,sha256=DcFREpjg4tyoNf8FXTK632rgrOsWuFjSGnVBBQ4LeQ4,24276
|
|
310
|
+
evalvault-1.64.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
311
|
+
evalvault-1.64.0.dist-info/entry_points.txt,sha256=Oj9Xc5gYcyUYYNmQfWI8NYGw7nN-3M-h2ipHIMlVn6o,65
|
|
312
|
+
evalvault-1.64.0.dist-info/licenses/LICENSE.md,sha256=3RNWY4jjtrQ_yYa-D-7I3XO12Ti7YzxsLV_dpykujvo,11358
|
|
313
|
+
evalvault-1.64.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|