evalvault 1.63.0__py3-none-any.whl → 1.64.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,14 +2,59 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
+ import hashlib
6
+ import logging
7
+ import time
8
+ from collections import defaultdict, deque
5
9
  from contextlib import asynccontextmanager
6
10
  from typing import Annotated
7
11
 
8
- from fastapi import Depends, FastAPI, Request
12
+ from fastapi import Depends, FastAPI, HTTPException, Request, Security
9
13
  from fastapi.middleware.cors import CORSMiddleware
14
+ from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
15
+ from starlette.responses import JSONResponse
10
16
 
11
17
  from evalvault.adapters.inbound.api.adapter import WebUIAdapter, create_adapter
12
- from evalvault.config.settings import get_settings
18
+ from evalvault.config.settings import Settings, get_settings, is_production_profile
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ class RateLimiter:
24
+ def __init__(self) -> None:
25
+ self._requests: dict[str, deque[float]] = defaultdict(deque)
26
+ self._blocked_counts: dict[str, int] = defaultdict(int)
27
+
28
+ def check(self, key: str, limit: int, window_seconds: int) -> tuple[bool, int | None, int]:
29
+ now = time.monotonic()
30
+ window = max(window_seconds, 1)
31
+ queue = self._requests[key]
32
+ while queue and now - queue[0] >= window:
33
+ queue.popleft()
34
+ if len(queue) >= limit:
35
+ self._blocked_counts[key] += 1
36
+ retry_after = int(window - (now - queue[0])) if queue else window
37
+ return False, max(retry_after, 1), self._blocked_counts[key]
38
+ queue.append(now)
39
+ return True, None, self._blocked_counts[key]
40
+
41
+
42
+ rate_limiter = RateLimiter()
43
+
44
+
45
+ def _hash_token(token: str) -> str:
46
+ return hashlib.sha256(token.encode("utf-8")).hexdigest()[:8]
47
+
48
+
49
+ def _rate_limit_key(request: Request) -> str:
50
+ auth_header = request.headers.get("Authorization", "")
51
+ if auth_header.lower().startswith("bearer "):
52
+ token = auth_header[7:].strip()
53
+ if token:
54
+ return f"token:{_hash_token(token)}"
55
+ client = request.client
56
+ host = client.host if client else "unknown"
57
+ return f"ip:{host}"
13
58
 
14
59
 
15
60
  @asynccontextmanager
@@ -23,6 +68,31 @@ async def lifespan(app: FastAPI):
23
68
  pass
24
69
 
25
70
 
71
+ auth_scheme = HTTPBearer(auto_error=False)
72
+
73
+
74
+ def _normalize_api_tokens(raw_tokens: str | None) -> set[str]:
75
+ if not raw_tokens:
76
+ return set()
77
+ return {token.strip() for token in raw_tokens.split(",") if token.strip()}
78
+
79
+
80
+ def require_api_token(
81
+ credentials: Annotated[HTTPAuthorizationCredentials | None, Security(auth_scheme)],
82
+ settings: Settings = Depends(get_settings),
83
+ ) -> str | None:
84
+ tokens = _normalize_api_tokens(settings.api_auth_tokens)
85
+ if not tokens:
86
+ return None
87
+ if credentials is None or credentials.credentials not in tokens:
88
+ raise HTTPException(
89
+ status_code=401,
90
+ detail="Invalid or missing API token",
91
+ headers={"WWW-Authenticate": "Bearer"},
92
+ )
93
+ return credentials.credentials
94
+
95
+
26
96
  def create_app() -> FastAPI:
27
97
  """Create and configure the FastAPI application."""
28
98
  app = FastAPI(
@@ -32,10 +102,46 @@ def create_app() -> FastAPI:
32
102
  lifespan=lifespan,
33
103
  )
34
104
 
105
+ @app.middleware("http")
106
+ async def rate_limit_middleware(request: Request, call_next):
107
+ settings = get_settings()
108
+ if not settings.rate_limit_enabled:
109
+ return await call_next(request)
110
+ if not request.url.path.startswith("/api/"):
111
+ return await call_next(request)
112
+ limit = max(settings.rate_limit_requests, 1)
113
+ window_seconds = max(settings.rate_limit_window_seconds, 1)
114
+ key = _rate_limit_key(request)
115
+ allowed, retry_after, blocked_count = rate_limiter.check(
116
+ key,
117
+ limit,
118
+ window_seconds,
119
+ )
120
+ if not allowed:
121
+ if blocked_count >= settings.rate_limit_block_threshold:
122
+ logger.warning(
123
+ "Rate limit blocked request",
124
+ extra={
125
+ "rate_limit_key": key,
126
+ "blocked_count": blocked_count,
127
+ },
128
+ )
129
+ headers = {"Retry-After": str(retry_after)} if retry_after else None
130
+ return JSONResponse(
131
+ status_code=429,
132
+ content={"detail": "Rate limit exceeded"},
133
+ headers=headers,
134
+ )
135
+ return await call_next(request)
136
+
35
137
  settings = get_settings()
36
138
  cors_origins = [
37
139
  origin.strip() for origin in (settings.cors_origins or "").split(",") if origin.strip()
38
- ] or ["http://localhost:5173"]
140
+ ]
141
+ if not cors_origins:
142
+ if is_production_profile(settings.evalvault_profile):
143
+ raise RuntimeError("CORS_ORIGINS must be set for production profile.")
144
+ cors_origins = ["http://localhost:5173"]
39
145
 
40
146
  # Configure CORS
41
147
  app.add_middleware(
@@ -48,12 +154,44 @@ def create_app() -> FastAPI:
48
154
 
49
155
  from .routers import benchmark, config, domain, knowledge, pipeline, runs
50
156
 
51
- app.include_router(runs.router, prefix="/api/v1/runs", tags=["runs"])
52
- app.include_router(benchmark.router, prefix="/api/v1/benchmarks", tags=["benchmarks"])
53
- app.include_router(knowledge.router, prefix="/api/v1/knowledge", tags=["knowledge"])
54
- app.include_router(pipeline.router, prefix="/api/v1/pipeline", tags=["pipeline"])
55
- app.include_router(domain.router, prefix="/api/v1/domain", tags=["domain"])
56
- app.include_router(config.router, prefix="/api/v1/config", tags=["config"])
157
+ auth_dependencies = [Depends(require_api_token)]
158
+
159
+ app.include_router(
160
+ runs.router,
161
+ prefix="/api/v1/runs",
162
+ tags=["runs"],
163
+ dependencies=auth_dependencies,
164
+ )
165
+ app.include_router(
166
+ benchmark.router,
167
+ prefix="/api/v1/benchmarks",
168
+ tags=["benchmarks"],
169
+ dependencies=auth_dependencies,
170
+ )
171
+ app.include_router(
172
+ knowledge.router,
173
+ prefix="/api/v1/knowledge",
174
+ tags=["knowledge"],
175
+ dependencies=auth_dependencies,
176
+ )
177
+ app.include_router(
178
+ pipeline.router,
179
+ prefix="/api/v1/pipeline",
180
+ tags=["pipeline"],
181
+ dependencies=auth_dependencies,
182
+ )
183
+ app.include_router(
184
+ domain.router,
185
+ prefix="/api/v1/domain",
186
+ tags=["domain"],
187
+ dependencies=auth_dependencies,
188
+ )
189
+ app.include_router(
190
+ config.router,
191
+ prefix="/api/v1/config",
192
+ tags=["config"],
193
+ dependencies=auth_dependencies,
194
+ )
57
195
 
58
196
  @app.get("/health")
59
197
  def health_check():
@@ -28,6 +28,9 @@ def get_config():
28
28
  "phoenix_api_token",
29
29
  "postgres_password",
30
30
  "postgres_connection_string",
31
+ "api_auth_tokens",
32
+ "knowledge_read_tokens",
33
+ "knowledge_write_tokens",
31
34
  }
32
35
  )
33
36
 
@@ -80,7 +83,6 @@ def update_config(
80
83
  payload: ConfigUpdateRequest,
81
84
  adapter: AdapterDep,
82
85
  ):
83
- """Update runtime configuration (non-secret fields only)."""
84
86
  updates = payload.model_dump(exclude_unset=True)
85
87
  if not updates:
86
88
  return get_config()
@@ -96,6 +98,9 @@ def update_config(
96
98
  "phoenix_api_token",
97
99
  "postgres_password",
98
100
  "postgres_connection_string",
101
+ "api_auth_tokens",
102
+ "knowledge_read_tokens",
103
+ "knowledge_write_tokens",
99
104
  }
100
105
  )
101
106
 
@@ -2,10 +2,11 @@ import shutil
2
2
  from pathlib import Path
3
3
  from typing import Any
4
4
 
5
- from fastapi import APIRouter, BackgroundTasks, File, HTTPException, UploadFile
5
+ from fastapi import APIRouter, BackgroundTasks, Depends, File, HTTPException, Request, UploadFile
6
6
  from pydantic import BaseModel
7
7
 
8
8
  from evalvault.adapters.outbound.kg.parallel_kg_builder import ParallelKGBuilder
9
+ from evalvault.config.settings import Settings, get_settings
9
10
 
10
11
  router = APIRouter(tags=["knowledge"])
11
12
 
@@ -18,6 +19,47 @@ KG_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
18
19
  KG_JOBS: dict[str, dict[str, Any]] = {}
19
20
 
20
21
 
22
+ def _normalize_tokens(raw_tokens: str | None) -> set[str]:
23
+ if not raw_tokens:
24
+ return set()
25
+ return {token.strip() for token in raw_tokens.split(",") if token.strip()}
26
+
27
+
28
+ def _extract_bearer_token(request: Request) -> str | None:
29
+ auth_header = request.headers.get("Authorization", "")
30
+ if not auth_header:
31
+ return None
32
+ prefix = "bearer "
33
+ if auth_header.lower().startswith(prefix):
34
+ return auth_header[len(prefix) :].strip()
35
+ return None
36
+
37
+
38
+ def _require_knowledge_read_token(
39
+ request: Request,
40
+ settings: Settings = Depends(get_settings),
41
+ ) -> None:
42
+ read_tokens = _normalize_tokens(settings.knowledge_read_tokens)
43
+ write_tokens = _normalize_tokens(settings.knowledge_write_tokens)
44
+ if not read_tokens and not write_tokens:
45
+ return
46
+ token = _extract_bearer_token(request)
47
+ if token is None or token not in (read_tokens | write_tokens):
48
+ raise HTTPException(status_code=403, detail="Invalid or missing knowledge read token")
49
+
50
+
51
+ def _require_knowledge_write_token(
52
+ request: Request,
53
+ settings: Settings = Depends(get_settings),
54
+ ) -> None:
55
+ write_tokens = _normalize_tokens(settings.knowledge_write_tokens)
56
+ if not write_tokens:
57
+ return
58
+ token = _extract_bearer_token(request)
59
+ if token is None or token not in write_tokens:
60
+ raise HTTPException(status_code=403, detail="Invalid or missing knowledge write token")
61
+
62
+
21
63
  class BuildKGRequest(BaseModel):
22
64
  workers: int = 4
23
65
  batch_size: int = 32
@@ -26,7 +68,10 @@ class BuildKGRequest(BaseModel):
26
68
 
27
69
 
28
70
  @router.post("/upload")
29
- async def upload_files(files: list[UploadFile] = File(...)):
71
+ async def upload_files(
72
+ files: list[UploadFile] = File(...),
73
+ _: None = Depends(_require_knowledge_write_token),
74
+ ):
30
75
  """Upload documents for Knowledge Graph building."""
31
76
  uploaded = []
32
77
  for file in files:
@@ -40,7 +85,9 @@ async def upload_files(files: list[UploadFile] = File(...)):
40
85
 
41
86
 
42
87
  @router.get("/files")
43
- def list_files():
88
+ def list_files(
89
+ _: None = Depends(_require_knowledge_read_token),
90
+ ):
44
91
  """List uploaded files."""
45
92
  files = []
46
93
  if DATA_DIR.exists():
@@ -49,7 +96,11 @@ def list_files():
49
96
 
50
97
 
51
98
  @router.post("/build", status_code=202)
52
- async def build_knowledge_graph(request: BuildKGRequest, background_tasks: BackgroundTasks):
99
+ async def build_knowledge_graph(
100
+ request: BuildKGRequest,
101
+ background_tasks: BackgroundTasks,
102
+ _: None = Depends(_require_knowledge_write_token),
103
+ ):
53
104
  """Trigger background Knowledge Graph construction."""
54
105
  job_id = f"kg_build_{len(KG_JOBS) + 1}"
55
106
  KG_JOBS[job_id] = {"status": "pending", "progress": "0%", "details": "Queued"}
@@ -121,7 +172,10 @@ async def build_knowledge_graph(request: BuildKGRequest, background_tasks: Backg
121
172
 
122
173
 
123
174
  @router.get("/jobs/{job_id}")
124
- def get_job_status(job_id: str):
175
+ def get_job_status(
176
+ job_id: str,
177
+ _: None = Depends(_require_knowledge_read_token),
178
+ ):
125
179
  job = KG_JOBS.get(job_id)
126
180
  if not job:
127
181
  raise HTTPException(status_code=404, detail="Job not found")
@@ -129,7 +183,9 @@ def get_job_status(job_id: str):
129
183
 
130
184
 
131
185
  @router.get("/stats")
132
- def get_graph_stats():
186
+ def get_graph_stats(
187
+ _: None = Depends(_require_knowledge_read_token),
188
+ ):
133
189
  """Get statistics of the built Knowledge Graph."""
134
190
  # Try to load from memory DB or default output JSON
135
191
  # For now, we'll try to load the JSON if it exists, or just return empty
@@ -213,7 +213,9 @@ def register_run_commands(
213
213
  None,
214
214
  "--output",
215
215
  "-o",
216
- help="Output file for results (JSON format).",
216
+ help=(
217
+ "Output file for results (JSON format). If .xlsx/.xls, exports Excel via DB save."
218
+ ),
217
219
  ),
218
220
  auto_analyze: bool = typer.Option(
219
221
  False,
@@ -813,6 +815,27 @@ def register_run_commands(
813
815
  if db_path is None:
814
816
  db_path = Path(settings.evalvault_db_path)
815
817
 
818
+ excel_output: Path | None = None
819
+ if output and output.suffix.lower() in {".xlsx", ".xls"}:
820
+ excel_output = output
821
+ output = None
822
+ if db_path is None:
823
+ print_cli_error(
824
+ console,
825
+ "엑셀 출력은 DB 저장이 필요합니다.",
826
+ fixes=["--db <sqlite_path> 옵션을 함께 지정하세요."],
827
+ )
828
+ raise typer.Exit(1)
829
+ print_cli_warning(
830
+ console,
831
+ "엑셀 출력은 DB 저장이 필수이며, 지정한 경로로만 저장됩니다.",
832
+ tips=[
833
+ f"DB 저장 경로: {db_path}",
834
+ "기본 DB 엑셀은 생성하지 않습니다.",
835
+ "필요 시 --db로 경로를 변경하세요.",
836
+ ],
837
+ )
838
+
816
839
  # Override model if specified
817
840
  if model:
818
841
  if _is_oss_open_model(model) and settings.llm_provider != "vllm":
@@ -1954,8 +1977,23 @@ def register_run_commands(
1954
1977
  console,
1955
1978
  storage_cls=SQLiteStorageAdapter,
1956
1979
  prompt_bundle=prompt_bundle,
1980
+ export_excel=excel_output is None,
1957
1981
  )
1958
1982
  _log_duration(console, verbose, "DB 저장 완료", db_started_at)
1983
+ if excel_output:
1984
+ excel_started_at = datetime.now()
1985
+ _log_timestamp(console, verbose, f"엑셀 저장 시작 ({excel_output})")
1986
+ try:
1987
+ storage = SQLiteStorageAdapter(db_path=db_path)
1988
+ storage.export_run_to_excel(result.run_id, excel_output)
1989
+ console.print(f"[green]Excel export saved: {excel_output}[/green]")
1990
+ except Exception as exc:
1991
+ print_cli_warning(
1992
+ console,
1993
+ "엑셀 내보내기에 실패했습니다.",
1994
+ tips=[str(exc)],
1995
+ )
1996
+ _log_duration(console, verbose, "엑셀 저장 완료", excel_started_at)
1959
1997
  if output:
1960
1998
  output_started_at = datetime.now()
1961
1999
  _log_timestamp(console, verbose, f"결과 저장 시작 ({output})")
@@ -2060,7 +2098,9 @@ def register_run_commands(
2060
2098
  None,
2061
2099
  "--output",
2062
2100
  "-o",
2063
- help="Output file for results (JSON format).",
2101
+ help=(
2102
+ "Output file for results (JSON format). If .xlsx/.xls, exports Excel via DB save."
2103
+ ),
2064
2104
  ),
2065
2105
  auto_analyze: bool = typer.Option(
2066
2106
  False,
@@ -2344,7 +2384,9 @@ def register_run_commands(
2344
2384
  None,
2345
2385
  "--output",
2346
2386
  "-o",
2347
- help="Output file for results (JSON format).",
2387
+ help=(
2388
+ "Output file for results (JSON format). If .xlsx/.xls, exports Excel via DB save."
2389
+ ),
2348
2390
  ),
2349
2391
  auto_analyze: bool = typer.Option(
2350
2392
  False,
@@ -430,6 +430,7 @@ def _save_to_db(
430
430
  *,
431
431
  storage_cls: type[SQLiteStorageAdapter] = SQLiteStorageAdapter,
432
432
  prompt_bundle: PromptSetBundle | None = None,
433
+ export_excel: bool = True,
433
434
  ) -> None:
434
435
  """Persist evaluation run (and optional prompt set) to SQLite database."""
435
436
  with console.status(f"[bold green]Saving to database {db_path}..."):
@@ -443,16 +444,17 @@ def _save_to_db(
443
444
  result.run_id,
444
445
  prompt_bundle.prompt_set.prompt_set_id,
445
446
  )
446
- excel_path = db_path.parent / f"evalvault_run_{result.run_id}.xlsx"
447
- try:
448
- storage.export_run_to_excel(result.run_id, excel_path)
449
- console.print(f"[green]Excel export saved: {excel_path}[/green]")
450
- except Exception as exc:
451
- print_cli_warning(
452
- console,
453
- "엑셀 내보내기에 실패했습니다.",
454
- tips=[str(exc)],
455
- )
447
+ if export_excel:
448
+ excel_path = db_path.parent / f"evalvault_run_{result.run_id}.xlsx"
449
+ try:
450
+ storage.export_run_to_excel(result.run_id, excel_path)
451
+ console.print(f"[green]Excel export saved: {excel_path}[/green]")
452
+ except Exception as exc:
453
+ print_cli_warning(
454
+ console,
455
+ "엑셀 내보내기에 실패했습니다.",
456
+ tips=[str(exc)],
457
+ )
456
458
  console.print(f"[green]Results saved to database: {db_path}[/green]")
457
459
  console.print(f"[dim]Run ID: {result.run_id}[/dim]")
458
460
  if prompt_bundle:
@@ -11,7 +11,7 @@ import logging
11
11
  import re
12
12
  from collections.abc import Callable, Sequence
13
13
  from dataclasses import dataclass, field
14
- from typing import TYPE_CHECKING, Any, cast
14
+ from typing import TYPE_CHECKING, Any
15
15
 
16
16
  import numpy as np
17
17
  from scipy import stats
@@ -9,7 +9,7 @@ import logging
9
9
  from collections.abc import Sequence
10
10
  from dataclasses import dataclass, field
11
11
  from pathlib import Path
12
- from typing import TYPE_CHECKING, Any
12
+ from typing import Any
13
13
 
14
14
  import yaml
15
15
 
@@ -5,6 +5,7 @@ from __future__ import annotations
5
5
  import json
6
6
  import os
7
7
  import subprocess
8
+ import warnings
8
9
  from collections.abc import Sequence
9
10
  from pathlib import Path
10
11
  from typing import Any
@@ -18,7 +19,9 @@ class ExternalCommandMethod(RagMethodPort):
18
19
 
19
20
  name = "external_command"
20
21
  version = "0.1.0"
21
- description = "Execute a method in a separate process."
22
+ description = (
23
+ "Execute a method in a separate process (shell=True requires a trusted command string)."
24
+ )
22
25
  tags = ("external", "isolation")
23
26
 
24
27
  def __init__(
@@ -67,6 +70,7 @@ class ExternalCommandMethod(RagMethodPort):
67
70
  )
68
71
 
69
72
  command = self._build_command(runtime)
73
+ self._validate_shell_usage(command)
70
74
  result = subprocess.run( # noqa: S603 - user-controlled command by design
71
75
  command,
72
76
  cwd=self._workdir,
@@ -104,6 +108,23 @@ class ExternalCommandMethod(RagMethodPort):
104
108
  except KeyError as exc:
105
109
  raise ValueError(f"Unknown command placeholder: {exc}") from exc
106
110
 
111
+ def _validate_shell_usage(self, command: list[str] | str) -> None:
112
+ if not self._shell:
113
+ return
114
+ if not isinstance(command, str):
115
+ raise ValueError(
116
+ "shell=True requires a single command string; list arguments are rejected."
117
+ )
118
+ if not command.strip():
119
+ raise ValueError("shell=True requires a non-empty command string.")
120
+ if "\n" in command or "\r" in command:
121
+ raise ValueError("shell=True command must not contain newlines.")
122
+ warnings.warn(
123
+ "shell=True executes through the system shell. Use only trusted commands.",
124
+ RuntimeWarning,
125
+ stacklevel=2,
126
+ )
127
+
107
128
  @staticmethod
108
129
  def _load_payload(path: Path) -> Any:
109
130
  if not path.exists():
@@ -622,7 +622,8 @@ class BaseSQLStorageAdapter(ABC):
622
622
  if isinstance(row, dict):
623
623
  return dict(row)
624
624
  if hasattr(row, "keys"):
625
- return {key: row[key] for key in row}
625
+ keys = row.keys()
626
+ return {key: row[key] for key in keys}
626
627
  try:
627
628
  return dict(row)
628
629
  except Exception:
@@ -4,6 +4,13 @@ from typing import Any
4
4
 
5
5
  from langfuse import Langfuse
6
6
 
7
+ from evalvault.adapters.outbound.tracker.log_sanitizer import (
8
+ MAX_CONTEXT_CHARS,
9
+ MAX_LOG_CHARS,
10
+ sanitize_payload,
11
+ sanitize_text,
12
+ sanitize_text_list,
13
+ )
7
14
  from evalvault.config.phoenix_support import extract_phoenix_links
8
15
  from evalvault.domain.entities import EvaluationRun
9
16
  from evalvault.ports.outbound.tracker_port import TrackerPort
@@ -88,21 +95,31 @@ class LangfuseAdapter(TrackerPort):
88
95
  raise ValueError(f"Trace not found: {trace_id}")
89
96
 
90
97
  trace_or_span = self._traces[trace_id]
98
+ safe_input = (
99
+ sanitize_payload(input_data, max_chars=MAX_LOG_CHARS)
100
+ if input_data is not None
101
+ else None
102
+ )
103
+ safe_output = (
104
+ sanitize_payload(output_data, max_chars=MAX_LOG_CHARS)
105
+ if output_data is not None
106
+ else None
107
+ )
91
108
  # Support both old and new Langfuse API
92
109
  if hasattr(trace_or_span, "start_span"):
93
110
  # Langfuse 3.x: create nested span
94
111
  child_span = trace_or_span.start_span(
95
112
  name=name,
96
- input=input_data,
97
- output=output_data,
113
+ input=safe_input,
114
+ output=safe_output,
98
115
  )
99
116
  child_span.end()
100
117
  else:
101
118
  # Langfuse 2.x: use span method on trace
102
119
  trace_or_span.span(
103
120
  name=name,
104
- input=input_data,
105
- output=output_data,
121
+ input=safe_input,
122
+ output=safe_output,
106
123
  )
107
124
 
108
125
  def log_score(
@@ -377,10 +394,13 @@ class LangfuseAdapter(TrackerPort):
377
394
  # Span input: test case data (question, answer, contexts, ground_truth)
378
395
  span_input = {
379
396
  "test_case_id": result.test_case_id,
380
- "question": result.question,
381
- "answer": result.answer,
382
- "contexts": result.contexts,
383
- "ground_truth": result.ground_truth,
397
+ "question": sanitize_text(result.question, max_chars=MAX_LOG_CHARS),
398
+ "answer": sanitize_text(result.answer, max_chars=MAX_LOG_CHARS),
399
+ "contexts": sanitize_text_list(
400
+ result.contexts,
401
+ max_chars=MAX_CONTEXT_CHARS,
402
+ ),
403
+ "ground_truth": sanitize_text(result.ground_truth, max_chars=MAX_LOG_CHARS),
384
404
  }
385
405
 
386
406
  # Span output: evaluation results
@@ -0,0 +1,93 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ from typing import Any
5
+
6
+ MASK_TOKEN = "[REDACTED]"
7
+ MAX_LOG_CHARS = 1000
8
+ MAX_CONTEXT_CHARS = 500
9
+ MAX_LIST_ITEMS = 20
10
+ MAX_PAYLOAD_DEPTH = 2
11
+
12
+ _EMAIL_PATTERN = re.compile(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b")
13
+ _PHONE_PATTERN = re.compile(
14
+ r"\b(?:\+?\d{1,3}[-.\s]?)?(?:\(?\d{2,4}\)?[-.\s]?)?\d{3,4}[-.\s]?\d{4}\b"
15
+ )
16
+ _SSN_PATTERN = re.compile(r"\b\d{3}-\d{2}-\d{4}\b")
17
+ _CARD_PATTERN = re.compile(r"\b(?:\d[ -]*?){13,16}\b")
18
+
19
+
20
+ def _mask_pii(text: str) -> str:
21
+ text = _EMAIL_PATTERN.sub(MASK_TOKEN, text)
22
+ text = _PHONE_PATTERN.sub(MASK_TOKEN, text)
23
+ text = _SSN_PATTERN.sub(MASK_TOKEN, text)
24
+ text = _CARD_PATTERN.sub(MASK_TOKEN, text)
25
+ return text
26
+
27
+
28
+ def _truncate(text: str, max_chars: int) -> str:
29
+ if max_chars <= 0:
30
+ return ""
31
+ if len(text) <= max_chars:
32
+ return text
33
+ if max_chars <= 3:
34
+ return text[:max_chars]
35
+ return f"{text[: max_chars - 3]}..."
36
+
37
+
38
+ def sanitize_text(value: str | None, *, max_chars: int = MAX_LOG_CHARS) -> str | None:
39
+ if value is None:
40
+ return None
41
+ if not isinstance(value, str):
42
+ value = str(value)
43
+ return _truncate(_mask_pii(value), max_chars)
44
+
45
+
46
+ def sanitize_text_list(
47
+ values: list[str] | tuple[str, ...] | None,
48
+ *,
49
+ max_items: int = MAX_LIST_ITEMS,
50
+ max_chars: int = MAX_CONTEXT_CHARS,
51
+ ) -> list[str]:
52
+ if not values:
53
+ return []
54
+ trimmed = list(values)[:max_items]
55
+ return [sanitize_text(item, max_chars=max_chars) or "" for item in trimmed]
56
+
57
+
58
+ def sanitize_payload(
59
+ value: Any,
60
+ *,
61
+ max_chars: int = MAX_LOG_CHARS,
62
+ max_items: int = MAX_LIST_ITEMS,
63
+ max_depth: int = MAX_PAYLOAD_DEPTH,
64
+ ) -> Any:
65
+ if value is None:
66
+ return None
67
+ if isinstance(value, str):
68
+ return sanitize_text(value, max_chars=max_chars)
69
+ if isinstance(value, bool | int | float):
70
+ return value
71
+ if max_depth <= 0:
72
+ return sanitize_text(str(value), max_chars=max_chars)
73
+ if isinstance(value, dict):
74
+ return {
75
+ key: sanitize_payload(
76
+ item,
77
+ max_chars=max_chars,
78
+ max_items=max_items,
79
+ max_depth=max_depth - 1,
80
+ )
81
+ for key, item in list(value.items())[:max_items]
82
+ }
83
+ if isinstance(value, list | tuple | set):
84
+ return [
85
+ sanitize_payload(
86
+ item,
87
+ max_chars=max_chars,
88
+ max_items=max_items,
89
+ max_depth=max_depth - 1,
90
+ )
91
+ for item in list(value)[:max_items]
92
+ ]
93
+ return sanitize_text(str(value), max_chars=max_chars)
@@ -4,6 +4,7 @@ import json
4
4
  import tempfile
5
5
  from typing import Any
6
6
 
7
+ from evalvault.adapters.outbound.tracker.log_sanitizer import MAX_LOG_CHARS, sanitize_payload
7
8
  from evalvault.domain.entities import EvaluationRun
8
9
  from evalvault.ports.outbound.tracker_port import TrackerPort
9
10
 
@@ -85,8 +86,8 @@ class MLflowAdapter(TrackerPort):
85
86
  # Store span data as JSON artifact
86
87
  span_data = {
87
88
  "name": name,
88
- "input": input_data,
89
- "output": output_data,
89
+ "input": sanitize_payload(input_data, max_chars=MAX_LOG_CHARS),
90
+ "output": sanitize_payload(output_data, max_chars=MAX_LOG_CHARS),
90
91
  }
91
92
 
92
93
  with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
@@ -9,6 +9,13 @@ from datetime import datetime
9
9
  from typing import TYPE_CHECKING, Any
10
10
 
11
11
  from evalvault.adapters.outbound.tracer.open_rag_trace_helpers import serialize_json
12
+ from evalvault.adapters.outbound.tracker.log_sanitizer import (
13
+ MAX_CONTEXT_CHARS,
14
+ MAX_LOG_CHARS,
15
+ sanitize_payload,
16
+ sanitize_text,
17
+ sanitize_text_list,
18
+ )
12
19
  from evalvault.domain.entities import (
13
20
  EvaluationRun,
14
21
  GenerationData,
@@ -171,9 +178,11 @@ class PhoenixAdapter(TrackerPort):
171
178
 
172
179
  with self._tracer.start_span(name, context=context) as span:
173
180
  if input_data is not None:
174
- span.set_attribute("input", json.dumps(input_data, default=str))
181
+ safe_input = sanitize_payload(input_data, max_chars=MAX_LOG_CHARS)
182
+ span.set_attribute("input", json.dumps(safe_input, default=str))
175
183
  if output_data is not None:
176
- span.set_attribute("output", json.dumps(output_data, default=str))
184
+ safe_output = sanitize_payload(output_data, max_chars=MAX_LOG_CHARS)
185
+ span.set_attribute("output", json.dumps(safe_output, default=str))
177
186
 
178
187
  def log_score(
179
188
  self,
@@ -368,12 +377,20 @@ class PhoenixAdapter(TrackerPort):
368
377
  context=context,
369
378
  ) as span:
370
379
  # Input data
371
- span.set_attribute("input.question", result.question or "")
372
- span.set_attribute("input.answer", result.answer or "")
380
+ safe_question = sanitize_text(result.question, max_chars=MAX_LOG_CHARS) or ""
381
+ safe_answer = sanitize_text(result.answer, max_chars=MAX_LOG_CHARS) or ""
382
+ span.set_attribute("input.question", safe_question)
383
+ span.set_attribute("input.answer", safe_answer)
373
384
  if result.contexts:
374
- span.set_attribute("input.contexts", json.dumps(result.contexts))
385
+ safe_contexts = sanitize_text_list(
386
+ result.contexts,
387
+ max_chars=MAX_CONTEXT_CHARS,
388
+ )
389
+ span.set_attribute("input.contexts", json.dumps(safe_contexts))
375
390
  if result.ground_truth:
376
- span.set_attribute("input.ground_truth", result.ground_truth)
391
+ safe_ground_truth = sanitize_text(result.ground_truth, max_chars=MAX_LOG_CHARS)
392
+ if safe_ground_truth:
393
+ span.set_attribute("input.ground_truth", safe_ground_truth)
377
394
 
378
395
  # Metrics
379
396
  span.set_attribute("output.all_passed", result.all_passed)
@@ -468,8 +485,10 @@ class PhoenixAdapter(TrackerPort):
468
485
 
469
486
  # Set query
470
487
  if data.query:
471
- span.set_attribute("retrieval.query", data.query)
472
- span.set_attribute("input.value", data.query)
488
+ safe_query = sanitize_text(data.query, max_chars=MAX_LOG_CHARS)
489
+ if safe_query:
490
+ span.set_attribute("retrieval.query", safe_query)
491
+ span.set_attribute("input.value", safe_query)
473
492
 
474
493
  span.set_attribute("spec.version", "0.1")
475
494
  span.set_attribute("rag.module", "retrieve")
@@ -495,11 +514,14 @@ class PhoenixAdapter(TrackerPort):
495
514
  event_attrs["doc.rerank_rank"] = doc.rerank_rank
496
515
  if doc.chunk_id:
497
516
  event_attrs["doc.chunk_id"] = doc.chunk_id
498
- preview = doc.content[:200] if doc.content else ""
499
- if preview:
500
- event_attrs["doc.preview"] = preview
517
+ safe_preview = (
518
+ sanitize_text(doc.content, max_chars=MAX_CONTEXT_CHARS) if doc.content else ""
519
+ )
520
+ if safe_preview:
521
+ event_attrs["doc.preview"] = safe_preview
501
522
  if doc.metadata:
502
- event_attrs["doc.metadata"] = json.dumps(doc.metadata, default=str)
523
+ safe_metadata = sanitize_payload(doc.metadata, max_chars=MAX_LOG_CHARS)
524
+ event_attrs["doc.metadata"] = json.dumps(safe_metadata, default=str)
503
525
  span.add_event(f"retrieved_doc_{i}", attributes=event_attrs)
504
526
 
505
527
  def log_generation(
@@ -544,9 +566,8 @@ class PhoenixAdapter(TrackerPort):
544
566
  span.set_attribute(key, value)
545
567
 
546
568
  # Set prompt/response (truncate if too long)
547
- max_len = 10000
548
- prompt = data.prompt[:max_len] if data.prompt else ""
549
- response = data.response[:max_len] if data.response else ""
569
+ prompt = sanitize_text(data.prompt, max_chars=MAX_LOG_CHARS) or ""
570
+ response = sanitize_text(data.response, max_chars=MAX_LOG_CHARS) or ""
550
571
  if prompt:
551
572
  span.set_attribute("generation.prompt", prompt)
552
573
  span.set_attribute("input.value", prompt)
@@ -559,24 +580,28 @@ class PhoenixAdapter(TrackerPort):
559
580
 
560
581
  # Set prompt template if available
561
582
  if data.prompt_template:
562
- span.set_attribute("generation.prompt_template", data.prompt_template[:max_len])
583
+ safe_template = sanitize_text(data.prompt_template, max_chars=MAX_LOG_CHARS)
584
+ if safe_template:
585
+ span.set_attribute("generation.prompt_template", safe_template)
563
586
 
564
587
  def log_rag_trace(self, data: RAGTraceData) -> str:
565
588
  """Log a full RAG trace (retrieval + generation) to Phoenix."""
566
589
 
567
590
  self._ensure_initialized()
568
591
  metadata = {"event_type": "rag_trace", "total_time_ms": data.total_time_ms}
569
- if data.query:
570
- metadata["query"] = data.query
592
+ safe_query = sanitize_text(data.query, max_chars=MAX_LOG_CHARS)
593
+ if safe_query:
594
+ metadata["query"] = safe_query
571
595
  if data.metadata:
572
- metadata.update(data.metadata)
596
+ safe_metadata = sanitize_payload(data.metadata, max_chars=MAX_LOG_CHARS)
597
+ metadata.update(safe_metadata)
573
598
 
574
599
  should_end = False
575
600
  trace_id = data.trace_id
576
601
  if trace_id and trace_id in self._active_spans:
577
602
  span = self._active_spans[trace_id]
578
603
  else:
579
- trace_name = f"rag-trace-{(data.query or 'run')[:12]}"
604
+ trace_name = f"rag-trace-{(safe_query or 'run')[:12]}"
580
605
  trace_id = self.start_trace(trace_name, metadata=metadata)
581
606
  span = self._active_spans[trace_id]
582
607
  should_end = True
@@ -589,12 +614,13 @@ class PhoenixAdapter(TrackerPort):
589
614
  if data.generation:
590
615
  self.log_generation(trace_id, data.generation)
591
616
  if data.final_answer:
592
- preview = data.final_answer[:1000]
593
- span.set_attribute("rag.final_answer", preview)
594
- span.set_attribute("output.value", preview)
617
+ preview = sanitize_text(data.final_answer, max_chars=MAX_LOG_CHARS)
618
+ if preview:
619
+ span.set_attribute("rag.final_answer", preview)
620
+ span.set_attribute("output.value", preview)
595
621
 
596
- if data.query:
597
- span.set_attribute("input.value", data.query)
622
+ if safe_query:
623
+ span.set_attribute("input.value", safe_query)
598
624
 
599
625
  span.set_attribute("spec.version", "0.1")
600
626
  span.set_attribute("rag.module", "custom.pipeline")
@@ -0,0 +1,118 @@
1
+ from __future__ import annotations
2
+
3
+ import base64
4
+ import os
5
+ from dataclasses import dataclass
6
+ from typing import Protocol
7
+
8
+ SECRET_REF_PREFIX = "secret://"
9
+
10
+
11
+ class SecretProvider(Protocol):
12
+ def get_secret(self, name: str) -> str: ...
13
+
14
+
15
+ class SecretProviderError(RuntimeError):
16
+ pass
17
+
18
+
19
+ @dataclass
20
+ class EnvSecretProvider:
21
+ def get_secret(self, name: str) -> str:
22
+ value = os.environ.get(name)
23
+ if value is None:
24
+ raise SecretProviderError(f"Missing secret in environment: {name}")
25
+ return value
26
+
27
+
28
+ @dataclass
29
+ class AwsSecretsManagerProvider:
30
+ region_name: str | None = None
31
+
32
+ def get_secret(self, name: str) -> str:
33
+ try:
34
+ import boto3 # type: ignore
35
+ except ImportError as exc:
36
+ raise SecretProviderError("boto3 is required for AWS Secrets Manager") from exc
37
+ client = boto3.client("secretsmanager", region_name=self.region_name)
38
+ response = client.get_secret_value(SecretId=name)
39
+ if "SecretString" in response and response["SecretString"] is not None:
40
+ return response["SecretString"]
41
+ secret_binary = response.get("SecretBinary")
42
+ if secret_binary is None:
43
+ raise SecretProviderError("Empty secret value returned from AWS Secrets Manager")
44
+ return base64.b64decode(secret_binary).decode("utf-8")
45
+
46
+
47
+ @dataclass
48
+ class GcpSecretManagerProvider:
49
+ def get_secret(self, name: str) -> str:
50
+ try:
51
+ from google.cloud import secretmanager # type: ignore
52
+ except ImportError as exc:
53
+ raise SecretProviderError(
54
+ "google-cloud-secret-manager is required for GCP Secret Manager"
55
+ ) from exc
56
+ client = secretmanager.SecretManagerServiceClient()
57
+ response = client.access_secret_version(request={"name": name})
58
+ return response.payload.data.decode("utf-8")
59
+
60
+
61
+ @dataclass
62
+ class VaultSecretProvider:
63
+ def get_secret(self, name: str) -> str:
64
+ try:
65
+ import hvac # type: ignore
66
+ except ImportError as exc:
67
+ raise SecretProviderError("hvac is required for Vault secret access") from exc
68
+ client = hvac.Client()
69
+ if not client.is_authenticated():
70
+ raise SecretProviderError("Vault client authentication failed")
71
+ response = client.secrets.kv.v2.read_secret_version(path=name)
72
+ data = response.get("data", {}).get("data", {})
73
+ if not data:
74
+ raise SecretProviderError("Vault secret payload is empty")
75
+ if "value" in data:
76
+ return str(data["value"])
77
+ if len(data) == 1:
78
+ return str(next(iter(data.values())))
79
+ raise SecretProviderError("Vault secret has multiple keys; specify 'value' key")
80
+
81
+
82
+ def is_secret_reference(value: str | None) -> bool:
83
+ return bool(value) and value.startswith(SECRET_REF_PREFIX)
84
+
85
+
86
+ def parse_secret_reference(value: str) -> str:
87
+ return value.removeprefix(SECRET_REF_PREFIX).strip()
88
+
89
+
90
+ def build_secret_provider(provider_name: str | None) -> SecretProvider:
91
+ provider = (provider_name or "").strip().lower()
92
+ if not provider:
93
+ raise SecretProviderError("Secret provider is not configured.")
94
+ if provider == "env":
95
+ return EnvSecretProvider()
96
+ if provider in {"aws", "aws-secrets-manager", "secretsmanager"}:
97
+ return AwsSecretsManagerProvider(region_name=os.environ.get("AWS_REGION"))
98
+ if provider in {"gcp", "gcp-secret-manager", "secretmanager"}:
99
+ return GcpSecretManagerProvider()
100
+ if provider in {"vault", "hashicorp-vault"}:
101
+ return VaultSecretProvider()
102
+ raise SecretProviderError(f"Unknown secret provider: {provider_name}")
103
+
104
+
105
+ def resolve_secret_reference(
106
+ value: str,
107
+ provider: SecretProvider,
108
+ cache: dict[str, str] | None = None,
109
+ ) -> str:
110
+ secret_name = parse_secret_reference(value)
111
+ if not secret_name:
112
+ raise SecretProviderError("Secret reference must include a name.")
113
+ if cache is not None and secret_name in cache:
114
+ return cache[secret_name]
115
+ secret_value = provider.get_secret(secret_name)
116
+ if cache is not None:
117
+ cache[secret_name] = secret_value
118
+ return secret_value
@@ -3,9 +3,16 @@
3
3
  from pathlib import Path
4
4
  from typing import Any
5
5
 
6
- from pydantic import Field
6
+ from pydantic import Field, PrivateAttr
7
7
  from pydantic_settings import BaseSettings, SettingsConfigDict
8
8
 
9
+ from evalvault.config.secret_manager import (
10
+ SecretProviderError,
11
+ build_secret_provider,
12
+ is_secret_reference,
13
+ resolve_secret_reference,
14
+ )
15
+
9
16
 
10
17
  def _detect_repo_root(start: Path, max_depth: int = 6) -> Path | None:
11
18
  current = start
@@ -38,6 +45,75 @@ def _ensure_http_scheme(url_value: str) -> str:
38
45
  return f"http://{value}"
39
46
 
40
47
 
48
+ def is_production_profile(profile_name: str | None) -> bool:
49
+ return (profile_name or "").strip().lower() == "prod"
50
+
51
+
52
+ def _parse_cors_origins(cors_origins: str | None) -> list[str]:
53
+ if not cors_origins:
54
+ return []
55
+ return [origin.strip() for origin in cors_origins.split(",") if origin.strip()]
56
+
57
+
58
+ SECRET_REFERENCE_FIELDS = (
59
+ "api_auth_tokens",
60
+ "knowledge_read_tokens",
61
+ "knowledge_write_tokens",
62
+ "openai_api_key",
63
+ "anthropic_api_key",
64
+ "azure_api_key",
65
+ "vllm_api_key",
66
+ "langfuse_public_key",
67
+ "langfuse_secret_key",
68
+ "phoenix_api_token",
69
+ "postgres_password",
70
+ "postgres_connection_string",
71
+ )
72
+
73
+
74
+ def _validate_production_settings(settings: "Settings") -> None:
75
+ if not is_production_profile(settings.evalvault_profile):
76
+ return
77
+
78
+ missing: list[str] = []
79
+
80
+ if not settings.api_auth_tokens:
81
+ missing.append("API_AUTH_TOKENS")
82
+
83
+ if settings.llm_provider == "openai" and not settings.openai_api_key:
84
+ missing.append("OPENAI_API_KEY")
85
+
86
+ if settings.tracker_provider == "langfuse":
87
+ if not settings.langfuse_public_key:
88
+ missing.append("LANGFUSE_PUBLIC_KEY")
89
+ if not settings.langfuse_secret_key:
90
+ missing.append("LANGFUSE_SECRET_KEY")
91
+
92
+ if settings.tracker_provider == "mlflow" and not settings.mlflow_tracking_uri:
93
+ missing.append("MLFLOW_TRACKING_URI")
94
+
95
+ if (
96
+ settings.postgres_connection_string is None
97
+ and settings.postgres_host
98
+ and not settings.postgres_password
99
+ ):
100
+ missing.append("POSTGRES_PASSWORD")
101
+
102
+ cors_origins = _parse_cors_origins(settings.cors_origins)
103
+ if not cors_origins:
104
+ missing.append("CORS_ORIGINS")
105
+ else:
106
+ localhost_origins = {"localhost", "127.0.0.1"}
107
+ for origin in cors_origins:
108
+ if any(host in origin for host in localhost_origins):
109
+ raise ValueError("Production profile forbids localhost in CORS_ORIGINS.")
110
+
111
+ if missing:
112
+ raise ValueError(
113
+ "Missing required settings for prod profile: " + ", ".join(sorted(set(missing)))
114
+ )
115
+
116
+
41
117
  class Settings(BaseSettings):
42
118
  """Application configuration settings."""
43
119
 
@@ -48,6 +124,8 @@ class Settings(BaseSettings):
48
124
  extra="ignore",
49
125
  )
50
126
 
127
+ _secret_cache: dict[str, str] = PrivateAttr(default_factory=dict)
128
+
51
129
  # Profile Configuration (YAML 기반 모델 프로필)
52
130
  evalvault_profile: str | None = Field(
53
131
  default=None,
@@ -58,6 +136,45 @@ class Settings(BaseSettings):
58
136
  default="http://localhost:5173,http://127.0.0.1:5173",
59
137
  description="Comma-separated list of allowed CORS origins.",
60
138
  )
139
+ secret_provider: str | None = Field(
140
+ default=None,
141
+ description="Secret provider name for secret:// references (env/aws/gcp/vault).",
142
+ )
143
+ secret_cache_enabled: bool = Field(
144
+ default=True,
145
+ description="Cache resolved secret references in memory.",
146
+ )
147
+ api_auth_tokens: str | None = Field(
148
+ default=None,
149
+ description=(
150
+ "Comma-separated list of API bearer tokens for FastAPI auth. "
151
+ "Leave empty to disable authentication."
152
+ ),
153
+ )
154
+ knowledge_read_tokens: str | None = Field(
155
+ default=None,
156
+ description="Comma-separated read tokens for knowledge endpoints.",
157
+ )
158
+ knowledge_write_tokens: str | None = Field(
159
+ default=None,
160
+ description="Comma-separated write tokens for knowledge endpoints.",
161
+ )
162
+ rate_limit_enabled: bool = Field(
163
+ default=False,
164
+ description="Enable API rate limiting for /api routes.",
165
+ )
166
+ rate_limit_requests: int = Field(
167
+ default=120,
168
+ description="Max requests allowed within rate_limit_window_seconds.",
169
+ )
170
+ rate_limit_window_seconds: int = Field(
171
+ default=60,
172
+ description="Window size for rate limit checks in seconds.",
173
+ )
174
+ rate_limit_block_threshold: int = Field(
175
+ default=10,
176
+ description="Log suspicious activity after this many rate limit blocks.",
177
+ )
61
178
  evalvault_db_path: str = Field(
62
179
  default="data/db/evalvault.db",
63
180
  description="SQLite database path for API/CLI storage.",
@@ -71,6 +188,26 @@ class Settings(BaseSettings):
71
188
  self.evalvault_db_path = _resolve_storage_path(self.evalvault_db_path)
72
189
  self.evalvault_memory_db_path = _resolve_storage_path(self.evalvault_memory_db_path)
73
190
  self.ollama_base_url = _ensure_http_scheme(self.ollama_base_url)
191
+ self._resolve_secret_references()
192
+
193
+ def _resolve_secret_references(self) -> None:
194
+ secret_values = [
195
+ value
196
+ for value in (getattr(self, field, None) for field in SECRET_REFERENCE_FIELDS)
197
+ if isinstance(value, str)
198
+ ]
199
+ if not any(is_secret_reference(value) for value in secret_values):
200
+ return
201
+ try:
202
+ provider = build_secret_provider(self.secret_provider)
203
+ except SecretProviderError as exc:
204
+ raise ValueError(str(exc)) from exc
205
+ cache = self._secret_cache if self.secret_cache_enabled else None
206
+ for field in SECRET_REFERENCE_FIELDS:
207
+ value = getattr(self, field, None)
208
+ if isinstance(value, str) and is_secret_reference(value):
209
+ resolved = resolve_secret_reference(value, provider, cache)
210
+ setattr(self, field, resolved)
74
211
 
75
212
  # LLM Provider Selection
76
213
  llm_provider: str = Field(
@@ -314,6 +451,8 @@ def get_settings() -> Settings:
314
451
  if _settings.evalvault_profile:
315
452
  _settings = apply_profile(_settings, _settings.evalvault_profile)
316
453
 
454
+ _validate_production_settings(_settings)
455
+
317
456
  return _settings
318
457
 
319
458
 
@@ -346,6 +485,7 @@ def apply_runtime_overrides(overrides: dict[str, object]) -> Settings:
346
485
  updated = Settings.model_validate(payload)
347
486
  if updated.evalvault_profile:
348
487
  updated = apply_profile(updated, updated.evalvault_profile)
488
+ _validate_production_settings(updated)
349
489
  for key, value in updated.model_dump().items():
350
490
  setattr(settings, key, value)
351
491
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: evalvault
3
- Version: 1.63.0
3
+ Version: 1.64.0
4
4
  Summary: RAG evaluation system using Ragas with Phoenix/Langfuse tracing
5
5
  Project-URL: Homepage, https://github.com/ntts9990/EvalVault
6
6
  Project-URL: Documentation, https://github.com/ntts9990/EvalVault#readme
@@ -111,6 +111,10 @@ Requires-Dist: opentelemetry-exporter-otlp>=1.20.0; extra == 'phoenix'
111
111
  Requires-Dist: opentelemetry-sdk>=1.20.0; extra == 'phoenix'
112
112
  Provides-Extra: postgres
113
113
  Requires-Dist: psycopg[binary]>=3.0.0; extra == 'postgres'
114
+ Provides-Extra: secrets
115
+ Requires-Dist: boto3; extra == 'secrets'
116
+ Requires-Dist: google-cloud-secret-manager; extra == 'secrets'
117
+ Requires-Dist: hvac; extra == 'secrets'
114
118
  Provides-Extra: timeseries
115
119
  Requires-Dist: aeon>=1.3.0; extra == 'timeseries'
116
120
  Requires-Dist: numba>=0.55.0; extra == 'timeseries'
@@ -175,6 +179,9 @@ uv run evalvault run --mode simple tests/fixtures/e2e/insurance_qa_korean.json \
175
179
  --auto-analyze
176
180
  ```
177
181
 
182
+ - API 인증을 쓰려면 `.env`에 `API_AUTH_TOKENS`를 설정하세요.
183
+ - `secret://` 참조를 쓰면 `SECRET_PROVIDER`와 `--extra secrets`가 필요합니다.
184
+ - 레이트리밋은 `RATE_LIMIT_ENABLED`로 활성화합니다.
178
185
  - 결과는 기본 DB(`data/db/evalvault.db`)에 저장되어 `history`, Web UI, 비교 분석에서 재사용됩니다.
179
186
  - `--db`를 생략해도 기본 경로로 저장되며, 모든 데이터가 자동으로 엑셀로 내보내집니다.
180
187
  - `--auto-analyze`는 요약 리포트 + 모듈별 아티팩트를 함께 생성합니다.
@@ -6,12 +6,12 @@ evalvault/adapters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuF
6
6
  evalvault/adapters/inbound/__init__.py,sha256=SG1svel1PwqetnqVpKFLSv612_WwGwLTbFpYgwk6FMw,166
7
7
  evalvault/adapters/inbound/api/__init__.py,sha256=LeVVttCA3tLKoHA2PO4z3y8VkfVcf3Bq8CZSzo91lf4,34
8
8
  evalvault/adapters/inbound/api/adapter.py,sha256=tYkJciUUFOK80QcSwzrqkXP1G4qUFItFV7uBYbjBGqU,68473
9
- evalvault/adapters/inbound/api/main.py,sha256=KdlAxKn0QfGI3UuoTrBDBbUs2xCvP8lnWOY1ce3svcU,2619
9
+ evalvault/adapters/inbound/api/main.py,sha256=lRuyg3aBs5jIk7tq4p4d7jrRkFpV_brZypoOq8s56Rk,6896
10
10
  evalvault/adapters/inbound/api/routers/__init__.py,sha256=q07_YF9TnBl68bqcRCvhPU4-zRTyvmPoHVehwO6W7QM,19
11
11
  evalvault/adapters/inbound/api/routers/benchmark.py,sha256=yevntbZcNtMvbVODsITUBgR1Ka4pdFQrXBJJ4K4Jyr4,4477
12
- evalvault/adapters/inbound/api/routers/config.py,sha256=CN-FH2cn0Ive-BD3WacWY6PFfuMtZEHP5_out3fvST4,3957
12
+ evalvault/adapters/inbound/api/routers/config.py,sha256=LygN0fVMr8NFtj5zuQXnVFhoafx56Txa98vpwtPa4Jc,4104
13
13
  evalvault/adapters/inbound/api/routers/domain.py,sha256=RsR7GIFMjccDN7vpG1uDyk9n1DnCTH18JDGAX7o4Qqc,3648
14
- evalvault/adapters/inbound/api/routers/knowledge.py,sha256=7mgyoUM1PepFb4X8_Ntn0vd7ZZYcNbM3_9nyD10g4Aw,5307
14
+ evalvault/adapters/inbound/api/routers/knowledge.py,sha256=yb_e7OEPtwldOAzHTGiWe7jShHw2JdpOFnzGPMceRsg,7109
15
15
  evalvault/adapters/inbound/api/routers/pipeline.py,sha256=8UgQzNFHcuqS61s69mOrPee4OMwfxVdvRWHJ2_qYBF0,17175
16
16
  evalvault/adapters/inbound/api/routers/runs.py,sha256=rydOvwWk24QIYafu3XYS3oL_VVCE_jHDmjADhA19T1s,40059
17
17
  evalvault/adapters/inbound/cli/__init__.py,sha256=a42flC5NK-VfbdbBrE49IrUL5zAyKdXZYJVM6E3NTE0,675
@@ -36,8 +36,8 @@ evalvault/adapters/inbound/cli/commands/method.py,sha256=OWdoofhvsDJchgNKnGGjXfI
36
36
  evalvault/adapters/inbound/cli/commands/phoenix.py,sha256=LQi3KTLq1ybjjBuz92oQ6lYyBS3mHrCHk0qe-7bqB4U,15611
37
37
  evalvault/adapters/inbound/cli/commands/pipeline.py,sha256=NeqWLzO9kRDuZd0pHAIHglP3F7VzoNOU4JI0QcSZ120,7788
38
38
  evalvault/adapters/inbound/cli/commands/prompts.py,sha256=lddde5VbjYaqN_9gHPLNu6DWpg5fE-KqZzjN-XYwvJw,27153
39
- evalvault/adapters/inbound/cli/commands/run.py,sha256=5rWCh8dTVqRgoiKu2Kd_53PxeIh0GRIkULl3GSpoSiU,117412
40
- evalvault/adapters/inbound/cli/commands/run_helpers.py,sha256=cc6oZHJSBJM9cxr928zq3sGrIh73u2vD0z2j9IzbPo4,40236
39
+ evalvault/adapters/inbound/cli/commands/run.py,sha256=X19rgXhajhvZNA4c0JMmzmPatTxhZgfapuW07bZL9xA,119265
40
+ evalvault/adapters/inbound/cli/commands/run_helpers.py,sha256=hu2TioocitUZzGR7HUwZ6gOeEJSvt5tGNjwXOlo4Eic,40336
41
41
  evalvault/adapters/inbound/cli/commands/stage.py,sha256=oRC9c5CysLX90Iy5Ba1pc_00DaOBS78lcBvzkbdrGRM,17123
42
42
  evalvault/adapters/inbound/cli/utils/__init__.py,sha256=QPNKneZS-Z-tTnYYxtgJXgcJWY6puUlRQcKrn7Mlv1M,685
43
43
  evalvault/adapters/inbound/cli/utils/analysis_io.py,sha256=RHkKEq4e-PtbtRDlXAJWU80RYHNPw-O5V9_GujdaGfc,13393
@@ -127,8 +127,8 @@ evalvault/adapters/outbound/domain_memory/domain_memory_schema.sql,sha256=APlNhJ
127
127
  evalvault/adapters/outbound/domain_memory/sqlite_adapter.py,sha256=RWobnFgvxiItxFAr6niY89sT19O-cnExTbP0I7UAY78,85186
128
128
  evalvault/adapters/outbound/improvement/__init__.py,sha256=tXA6vaZOLvqwJpyjGMiC8WrvszMmvUPzJnHjvJhQxSI,1143
129
129
  evalvault/adapters/outbound/improvement/insight_generator.py,sha256=U16l0euCZy0_08Zb_i0eijXSjS5t-iq0iMUfttwPqgI,17636
130
- evalvault/adapters/outbound/improvement/pattern_detector.py,sha256=4Pc5yrsi2warhKdpWxL0Ba9Ms2sCvFeRVWU8jTeALZ8,24608
131
- evalvault/adapters/outbound/improvement/playbook_loader.py,sha256=zXDpiTpYWtQvVrDeo149YHKIyhF6nUP34j0FVnlBCJo,7471
130
+ evalvault/adapters/outbound/improvement/pattern_detector.py,sha256=uFFjWNy8A4KIihw_ANtL6At73RirwNnFnN4rFsEvcXk,24602
131
+ evalvault/adapters/outbound/improvement/playbook_loader.py,sha256=keheUoJn--cjSbdngEAUlkhrc_dYqdrlW_iZAI2R4Y4,7456
132
132
  evalvault/adapters/outbound/improvement/stage_metric_playbook_loader.py,sha256=JdmXQsackWqeWTnULE4gfTK8vAikGR27h-TVc03CGXk,1706
133
133
  evalvault/adapters/outbound/kg/__init__.py,sha256=fUCKOV080ZjiEob9s4TmXWf-IDa6GbIFQMLfH6gFCKg,567
134
134
  evalvault/adapters/outbound/kg/graph_rag_retriever.py,sha256=_6qd8p_2TpHnppv8LUQQPxUdTPoE0QTQ-rCVnY1ap6c,18658
@@ -148,7 +148,7 @@ evalvault/adapters/outbound/llm/token_aware_chat.py,sha256=yYmynaniNrYxtvXL6ejTE
148
148
  evalvault/adapters/outbound/llm/vllm_adapter.py,sha256=OKb3Nda9OLMmHdvLjvkeJcQVeXf-B8TDibmAs7PS7kg,5157
149
149
  evalvault/adapters/outbound/methods/__init__.py,sha256=3vyE9w3Ex2oMaO4ZE7Fy6xlHhJ6YQXHQNCvBiW9X2lM,345
150
150
  evalvault/adapters/outbound/methods/baseline_oracle.py,sha256=oUsF5sIiPY5vuDtrz0Ki05SnPlnVzn7APERP5v1KpPM,1308
151
- evalvault/adapters/outbound/methods/external_command.py,sha256=gR2mlgr-SCAO3cS3I7pYgS8hL8JE8Y-0VZIhg7USazY,5287
151
+ evalvault/adapters/outbound/methods/external_command.py,sha256=hsWaqMG0u2JhsS736n0t8sobrGSJMNNp1tUL_M4zgyg,6118
152
152
  evalvault/adapters/outbound/methods/registry.py,sha256=Znd35eouoe8k2E0NfDpVlDBSNAAWmyQkqBhAwVWllGI,7635
153
153
  evalvault/adapters/outbound/nlp/__init__.py,sha256=9MQMIjEUU03T0ZZtG-Wjz0Bt2-esGEcfv1kT9W6_CBY,40
154
154
  evalvault/adapters/outbound/nlp/korean/__init__.py,sha256=3ZVFHDxS6jzXat-WhTvW3hnbGNaeFhhWVVN1TtEOlnE,2267
@@ -167,7 +167,7 @@ evalvault/adapters/outbound/report/dashboard_generator.py,sha256=Dcu18NTK4lS8XNK
167
167
  evalvault/adapters/outbound/report/llm_report_generator.py,sha256=HUDA_IPBbl54cyEjTTJzdKTQ6H4IoZi-1VBdVmZf0uI,26593
168
168
  evalvault/adapters/outbound/report/markdown_adapter.py,sha256=5PS72h_qe4ZtYs-umhX5TqQL2k5SuDaCUc6rRw9AKRw,16761
169
169
  evalvault/adapters/outbound/storage/__init__.py,sha256=n5R6thAPTx1leSwv6od6nBWcLWFa-UYD6cOLzN89T8I,614
170
- evalvault/adapters/outbound/storage/base_sql.py,sha256=kWYaiUq5D35iMx34cX3_mjhRZoEXfgQR-tSk3UhbvcE,40792
170
+ evalvault/adapters/outbound/storage/base_sql.py,sha256=7jWtmNDBHncLDABf5ewwQJnfhFjySTfpfDJmEbPBD1w,40823
171
171
  evalvault/adapters/outbound/storage/benchmark_storage_adapter.py,sha256=Qgf9xSSIkYQRpG4uLzcUdoYO9LTQDQ4tFRkkMYer-WA,9803
172
172
  evalvault/adapters/outbound/storage/postgres_adapter.py,sha256=HLaoQ3YJDFwOxeY0S92oPIqb-7EgWSasgt89RM86vr0,47148
173
173
  evalvault/adapters/outbound/storage/postgres_schema.sql,sha256=A9MfO0pjf4kjxoRj2KPI0Gg1cbX13I2YE3oieT-PGiI,8906
@@ -180,9 +180,10 @@ evalvault/adapters/outbound/tracer/open_rag_trace_decorators.py,sha256=LFnk-3FSL
180
180
  evalvault/adapters/outbound/tracer/open_rag_trace_helpers.py,sha256=D48Mbj-ioDKztjhV9513Q5DiUNiVdO60B_2sWMFEmnI,3520
181
181
  evalvault/adapters/outbound/tracer/phoenix_tracer_adapter.py,sha256=inmTAolAVsm0IrszE9VTJoI7HSvGGAnGNZVu_vZRAGg,741
182
182
  evalvault/adapters/outbound/tracker/__init__.py,sha256=Suu5BznOK5uTuD5_jS8JMZd8RPfQNlddLxHCBvMTm_4,358
183
- evalvault/adapters/outbound/tracker/langfuse_adapter.py,sha256=Gejd3fOBwShfjbtjVcZK9sCJKRz6oB3OaN6KukOYN38,17782
184
- evalvault/adapters/outbound/tracker/mlflow_adapter.py,sha256=Wee1S7OPemPt5SoIdwBHuBdnXmLxNd3lcgQ9NNMKcDQ,7000
185
- evalvault/adapters/outbound/tracker/phoenix_adapter.py,sha256=TNGU1RqpWwEEw5uQfx7-ClAh4C7wITwu_-X-fyVsCgc,22888
183
+ evalvault/adapters/outbound/tracker/langfuse_adapter.py,sha256=HmuMVUfDYjqNqHZGZMRybhrgca_EmeENuX7DfP-L5Fg,18504
184
+ evalvault/adapters/outbound/tracker/log_sanitizer.py,sha256=ilKTTSzsHslQYc-elnWu0Z3HKNNw1D1iI0_cCvYbo1M,2653
185
+ evalvault/adapters/outbound/tracker/mlflow_adapter.py,sha256=m4xj3XBULFYg27U3twKrldLhbLyLNefezmb2pCpHJrw,7180
186
+ evalvault/adapters/outbound/tracker/phoenix_adapter.py,sha256=sz5TyWC67e3YbQd2y-ogU9_66rilLdf8TbC-7bN_JR0,24316
186
187
  evalvault/config/__init__.py,sha256=UCgeDx62M2gOuFvdN29wWwny2fdH4bPY_uUC3-42eDw,1297
187
188
  evalvault/config/agent_types.py,sha256=EP2Pv3ZtOzDXIvIa-Hnd1to9JIbMUtGitrlwzZtx0Ys,13418
188
189
  evalvault/config/domain_config.py,sha256=rOgNA2T8NWlDzcEFC0shdUCCww0lI1E5fUm5QrKQSZI,9264
@@ -190,7 +191,8 @@ evalvault/config/instrumentation.py,sha256=L8on9HjB6Ji8cSOJ6Pepsopfg9okDNMWF7LKZ
190
191
  evalvault/config/langfuse_support.py,sha256=DEzVMfMGGf1V45W_2oUG-NCDfsYI4UUdnYJIgBSrN2o,582
191
192
  evalvault/config/model_config.py,sha256=KlzDbGyDLeOGE7ElekFFk5YjjT5u8i6KO2B4EyZkLnI,3542
192
193
  evalvault/config/phoenix_support.py,sha256=e6RPWd6Qb7KU6Q8pLaYTpJGWULtvEEU6B0xHWyVyOH0,13604
193
- evalvault/config/settings.py,sha256=T92GShlYKDaVinwbsbWX2DmNfm91Cvcvh8Te8pNOTsw,12875
194
+ evalvault/config/secret_manager.py,sha256=YjPMuNqeBrAR2BzCJvsBNUExaU4TBSFyZ8kVYZZifqA,4172
195
+ evalvault/config/settings.py,sha256=JKJf8t20sOHYnHoCfTxqupQixNgfmWYJhChiGMNz-W0,17617
194
196
  evalvault/config/playbooks/improvement_playbook.yaml,sha256=9F9WVVCydFfz6zUuGYzZ4PKdW1LLtcBKVF36T7xT764,26965
195
197
  evalvault/domain/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
196
198
  evalvault/domain/entities/__init__.py,sha256=RZi_6oQcq-2-sJcydfKOSr03vFxo-mF7CGHN9Ma4Cdg,3379
@@ -304,8 +306,8 @@ evalvault/reports/__init__.py,sha256=Bb1X4871msAN8I6PM6nKGED3psPwZt88hXZBAOdH06Y
304
306
  evalvault/reports/release_notes.py,sha256=pZj0PBFT-4F_Ty-Kv5P69BuoOnmTCn4kznDcORFJd0w,4011
305
307
  evalvault/scripts/__init__.py,sha256=NwEeIFQbkX4ml2R_PhtIoNtArDSX_suuoymgG_7Kwso,89
306
308
  evalvault/scripts/regression_runner.py,sha256=SxZori5BZ8jVQ057Mf5V5FPgIVDccrV5oRONmnhuk8w,8438
307
- evalvault-1.63.0.dist-info/METADATA,sha256=Kscv51ExIOOosrBnBXI5S1_3V0S2t2nCZhfyssREdg4,23879
308
- evalvault-1.63.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
309
- evalvault-1.63.0.dist-info/entry_points.txt,sha256=Oj9Xc5gYcyUYYNmQfWI8NYGw7nN-3M-h2ipHIMlVn6o,65
310
- evalvault-1.63.0.dist-info/licenses/LICENSE.md,sha256=3RNWY4jjtrQ_yYa-D-7I3XO12Ti7YzxsLV_dpykujvo,11358
311
- evalvault-1.63.0.dist-info/RECORD,,
309
+ evalvault-1.64.0.dist-info/METADATA,sha256=DcFREpjg4tyoNf8FXTK632rgrOsWuFjSGnVBBQ4LeQ4,24276
310
+ evalvault-1.64.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
311
+ evalvault-1.64.0.dist-info/entry_points.txt,sha256=Oj9Xc5gYcyUYYNmQfWI8NYGw7nN-3M-h2ipHIMlVn6o,65
312
+ evalvault-1.64.0.dist-info/licenses/LICENSE.md,sha256=3RNWY4jjtrQ_yYa-D-7I3XO12Ti7YzxsLV_dpykujvo,11358
313
+ evalvault-1.64.0.dist-info/RECORD,,