PyPI - evalvault - Versions diffs - 1.73.2__py3-none-any.whl → 1.75.0__py3-none-any.whl - Mend

evalvault 1.73.2py3-none-any.whl → 1.75.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

evalvault/adapters/outbound/domain_memory/postgres_domain_memory_schema.sql ADDED Viewed

@@ -0,0 +1,177 @@
+-- EvalVault Domain Memory Schema (PostgreSQL)
+-- Based on "Memory in the Age of AI Agents: A Survey" framework
+-- Forms: Flat (Phase 1), Planar/Hierarchical (Phase 2-3)
+-- Functions: Factual, Experiential, Working layers
+-- Dynamics: Formation, Evolution, Retrieval strategies
+-- =========================================================================
+-- Factual Layer - 검증된 도메인 사실 (SPO 트리플)
+-- =========================================================================
+CREATE TABLE IF NOT EXISTS factual_facts (
+    fact_id TEXT PRIMARY KEY,
+    subject TEXT NOT NULL,           -- 엔티티 이름
+    predicate TEXT NOT NULL,         -- 관계 타입
+    object TEXT NOT NULL,            -- 대상 엔티티
+    language TEXT DEFAULT 'ko',      -- 언어 코드 (ko, en)
+    domain TEXT DEFAULT 'default',   -- 도메인 (insurance, legal, medical)
+    fact_type TEXT DEFAULT 'verified', -- verified, inferred, contradictory
+    verification_score REAL DEFAULT 1.0, -- 0.0-1.0
+    verification_count INTEGER DEFAULT 1,
+    source_document_ids TEXT,        -- JSON array of document IDs
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    last_verified TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    abstraction_level INTEGER DEFAULT 0
+);
+-- 검색 최적화를 위한 인덱스
+CREATE INDEX IF NOT EXISTS idx_facts_domain_lang ON factual_facts(domain, language);
+CREATE INDEX IF NOT EXISTS idx_facts_subject ON factual_facts(subject);
+CREATE INDEX IF NOT EXISTS idx_facts_predicate ON factual_facts(predicate);
+CREATE INDEX IF NOT EXISTS idx_facts_object ON factual_facts(object);
+CREATE INDEX IF NOT EXISTS idx_facts_triple ON factual_facts(subject, predicate, object);
+CREATE INDEX IF NOT EXISTS idx_facts_verification_score ON factual_facts(verification_score DESC);
+CREATE INDEX IF NOT EXISTS idx_facts_last_verified ON factual_facts(last_verified DESC);
+-- GIN indexes for text search (ILIKE-based)
+CREATE INDEX IF NOT EXISTS idx_facts_subject_gin ON factual_facts USING GIN (to_tsvector('english', subject));
+CREATE INDEX IF NOT EXISTS idx_facts_predicate_gin ON factual_facts USING GIN (to_tsvector('english', predicate));
+CREATE INDEX IF NOT EXISTS idx_facts_object_gin ON factual_facts USING GIN (to_tsvector('english', object));
+-- =========================================================================
+-- Experiential Layer - 평가에서 학습된 패턴
+-- =========================================================================
+CREATE TABLE IF NOT EXISTS learning_memories (
+    learning_id TEXT PRIMARY KEY,
+    run_id TEXT NOT NULL,            -- 원본 평가 run ID
+    domain TEXT DEFAULT 'default',
+    language TEXT DEFAULT 'ko',
+    entity_type_reliability TEXT,    -- JSON: {entity_type: reliability_score}
+    relation_type_reliability TEXT,  -- JSON: {relation_type: reliability_score}
+    failed_patterns TEXT,            -- JSON array of failed patterns
+    successful_patterns TEXT,        -- JSON array of successful patterns
+    faithfulness_by_entity_type TEXT, -- JSON: {entity_type: faithfulness_score}
+    timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+);
+CREATE INDEX IF NOT EXISTS idx_learnings_domain_lang ON learning_memories(domain, language);
+CREATE INDEX IF NOT EXISTS idx_learnings_run_id ON learning_memories(run_id);
+CREATE INDEX IF NOT EXISTS idx_learnings_timestamp ON learning_memories(timestamp DESC);
+-- =========================================================================
+-- Behavior Layer - Metacognitive Reuse (재사용 가능한 행동)
+-- =========================================================================
+CREATE TABLE IF NOT EXISTS behavior_entries (
+    behavior_id TEXT PRIMARY KEY,
+    description TEXT NOT NULL,
+    trigger_pattern TEXT,            -- 트리거 조건 (regex 또는 키워드)
+    action_sequence TEXT,            -- JSON array of action steps
+    success_rate REAL DEFAULT 0.0,   -- 역사적 성공률
+    token_savings INTEGER DEFAULT 0, -- 절감되는 토큰 수
+    applicable_languages TEXT DEFAULT '["ko", "en"]', -- JSON array
+    domain TEXT DEFAULT 'default',
+    last_used TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    use_count INTEGER DEFAULT 0,
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+);
+CREATE INDEX IF NOT EXISTS idx_behaviors_domain ON behavior_entries(domain);
+CREATE INDEX IF NOT EXISTS idx_behaviors_success_rate ON behavior_entries(success_rate DESC);
+CREATE INDEX IF NOT EXISTS idx_behaviors_use_count ON behavior_entries(use_count DESC);
+CREATE INDEX IF NOT EXISTS idx_behaviors_last_used ON behavior_entries(last_used DESC);
+-- GIN indexes for behavior search
+CREATE INDEX IF NOT EXISTS idx_behaviors_description_gin ON behavior_entries USING GIN (to_tsvector('english', description));
+CREATE INDEX IF NOT EXISTS idx_behaviors_trigger_gin ON behavior_entries USING GIN (to_tsvector('english', trigger_pattern));
+-- =========================================================================
+-- Working Layer - 현재 세션의 활성 컨텍스트
+-- =========================================================================
+CREATE TABLE IF NOT EXISTS memory_contexts (
+    session_id TEXT PRIMARY KEY,
+    domain TEXT DEFAULT 'default',
+    language TEXT DEFAULT 'ko',
+    active_entities TEXT,            -- JSON array of entity names
+    entity_type_distribution TEXT,   -- JSON: {entity_type: count}
+    current_quality_metrics TEXT,    -- JSON: {metric_name: value}
+    started_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+);
+CREATE INDEX IF NOT EXISTS idx_contexts_domain ON memory_contexts(domain);
+CREATE INDEX IF NOT EXISTS idx_contexts_updated_at ON memory_contexts(updated_at DESC);
+-- =========================================================================
+-- Fact Sources - 사실과 문서 간의 관계 (Phase 2)
+-- =========================================================================
+CREATE TABLE IF NOT EXISTS fact_sources (
+    id SERIAL PRIMARY KEY,
+    fact_id TEXT NOT NULL,
+    document_id TEXT NOT NULL,
+    extraction_confidence REAL DEFAULT 1.0,
+    extracted_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    FOREIGN KEY (fact_id) REFERENCES factual_facts(fact_id) ON DELETE CASCADE,
+    UNIQUE(fact_id, document_id)
+);
+CREATE INDEX IF NOT EXISTS idx_fact_sources_fact_id ON fact_sources(fact_id);
+CREATE INDEX IF NOT EXISTS idx_fact_sources_document_id ON fact_sources(document_id);
+-- =========================================================================
+-- Memory Evolution Log - 메모리 변화 추적 (Phase 2)
+-- =========================================================================
+CREATE TABLE IF NOT EXISTS memory_evolution_log (
+    id SERIAL PRIMARY KEY,
+    operation TEXT NOT NULL,         -- consolidate, update, forget, decay
+    target_type TEXT NOT NULL,       -- fact, learning, behavior
+    target_id TEXT NOT NULL,
+    details TEXT,                    -- JSON: operation-specific details
+    performed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+);
+CREATE INDEX IF NOT EXISTS idx_evolution_log_operation ON memory_evolution_log(operation);
+CREATE INDEX IF NOT EXISTS idx_evolution_log_target ON memory_evolution_log(target_type, target_id);
+CREATE INDEX IF NOT EXISTS idx_evolution_log_performed_at ON memory_evolution_log(performed_at DESC);
+-- =========================================================================
+-- Phase 5: Planar Form - KG Integration
+-- =========================================================================
+-- KG Entity binding table for explicit KG links
+CREATE TABLE IF NOT EXISTS fact_kg_bindings (
+    id SERIAL PRIMARY KEY,
+    fact_id TEXT NOT NULL,
+    kg_entity_id TEXT NOT NULL,          -- KG 엔티티 이름/ID
+    kg_relation_type TEXT,               -- KG 관계 타입
+    binding_confidence REAL DEFAULT 1.0,
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    FOREIGN KEY (fact_id) REFERENCES factual_facts(fact_id) ON DELETE CASCADE,
+    UNIQUE(fact_id, kg_entity_id)
+);
+CREATE INDEX IF NOT EXISTS idx_kg_bindings_fact_id ON fact_kg_bindings(fact_id);
+CREATE INDEX IF NOT EXISTS idx_kg_bindings_kg_entity ON fact_kg_bindings(kg_entity_id);
+CREATE INDEX IF NOT EXISTS idx_kg_bindings_relation_type ON fact_kg_bindings(kg_relation_type);
+-- =========================================================================
+-- Phase 5: Hierarchical Form - Summary Layers
+-- =========================================================================
+-- Fact hierarchy table for parent-child relationships
+CREATE TABLE IF NOT EXISTS fact_hierarchy (
+    id SERIAL PRIMARY KEY,
+    parent_fact_id TEXT NOT NULL,
+    child_fact_id TEXT NOT NULL,
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    FOREIGN KEY (parent_fact_id) REFERENCES factual_facts(fact_id) ON DELETE CASCADE,
+    FOREIGN KEY (child_fact_id) REFERENCES factual_facts(fact_id) ON DELETE CASCADE,
+    UNIQUE(parent_fact_id, child_fact_id)
+);
+CREATE INDEX IF NOT EXISTS idx_hierarchy_parent ON fact_hierarchy(parent_fact_id);
+CREATE INDEX IF NOT EXISTS idx_hierarchy_child ON fact_hierarchy(child_fact_id);

evalvault/adapters/outbound/llm/vllm_adapter.py CHANGED Viewed

@@ -64,6 +64,29 @@ class VLLMAdapter(BaseLLMAdapter):
         """Get the embedding model name being used."""
         return self._embedding_model_name
+    def embed_sync(
+        self,
+        *,
+        texts: list[str],
+        model: str | None = None,
+        dimension: int | None = None,
+    ) -> list[list[float]]:
+        """Synchronous embedding call using OpenAI-compatible API."""
+        embed_base_url = self._settings.vllm_embedding_base_url or self._settings.vllm_base_url
+        client = OpenAI(
+            base_url=embed_base_url,
+            api_key=self._settings.vllm_api_key or "local",
+            timeout=self._settings.vllm_timeout,
+        )
+        payload: dict[str, Any] = {
+            "model": model or self._embedding_model_name,
+            "input": texts,
+        }
+        if dimension is not None:
+            payload["dimensions"] = dimension
+        response = client.embeddings.create(**payload)
+        return [item.embedding for item in response.data]
     async def agenerate_text(
         self,
         prompt: str,

evalvault/adapters/outbound/nlp/korean/dense_retriever.py CHANGED Viewed

@@ -141,8 +141,8 @@ class KoreanDenseRetriever:
         },
     }
-    # 기본 모델: dragonkue/BGE-m3-ko (AutoRAG 벤치마크 1위)
-    DEFAULT_MODEL = "dragonkue/BGE-m3-ko"
+    # 기본 모델: BAAI/bge-m3 (멀티링거시 기본)
+    DEFAULT_MODEL = "BAAI/bge-m3"
     def __init__(
         self,
@@ -175,7 +175,7 @@ class KoreanDenseRetriever:
             device: 디바이스 (auto, cpu, cuda, mps)
             batch_size: 인코딩 배치 크기
                 - 0 이하로 설정하면 간단한 휴리스틱으로 자동 결정
-            ollama_adapter: Ollama LLM 어댑터 (Qwen3-Embedding 사용 시 필수)
+        ollama_adapter: OpenAI 호환 임베딩 어댑터 (Ollama/vLLM)
             matryoshka_dim: Matryoshka 차원 (Qwen3-Embedding 전용)
                 - None: 모델 권장 차원 사용
                 - 256: 개발용 (속도 우선)
@@ -237,12 +237,12 @@ class KoreanDenseRetriever:
         self._query_cache_size = max(query_cache_size, 0)
         self._search_cache_size = max(search_cache_size, 0)
-        # Validate Ollama adapter for Ollama models
+        # Validate embedding adapter for OpenAI-compatible embedding models
         model_info = self.SUPPORTED_MODELS.get(self._model_name)
         if model_info and model_info.get("type") == "ollama" and not self._ollama_adapter:
             raise ValueError(
-                f"ollama_adapter is required for Ollama model '{self._model_name}'. "
-                "Create one with: OllamaAdapter(settings)"
+                f"embedding adapter is required for model '{self._model_name}'. "
+                "Create one with: OllamaAdapter(settings) or VLLMAdapter(settings)"
             )
         # Auto-select matryoshka dimension if not specified
@@ -362,7 +362,10 @@ class KoreanDenseRetriever:
             return
         model_info = self.SUPPORTED_MODELS.get(self._model_name)
-        model_type = model_info["type"] if model_info else "sentence-transformers"
+        if model_info is None and self._ollama_adapter is not None:
+            model_type = "ollama"
+        else:
+            model_type = model_info["type"] if model_info else "sentence-transformers"
         # Ollama models use adapter directly - no model loading needed
         if model_type == "ollama":

evalvault/adapters/outbound/nlp/korean/toolkit.py CHANGED Viewed

@@ -76,10 +76,21 @@ class KoreanNLPToolkit(KoreanNLPToolkitPort):
         embedding_func = None
         try:
-            dense_retriever = KoreanDenseRetriever(
-                profile=embedding_profile,
-                ollama_adapter=ollama_adapter,
-            )
+            if embedding_profile == "vllm":
+                from evalvault.adapters.outbound.llm.vllm_adapter import VLLMAdapter
+                from evalvault.config.settings import Settings
+                settings = Settings()
+                adapter = ollama_adapter or VLLMAdapter(settings)
+                dense_retriever = KoreanDenseRetriever(
+                    model_name=settings.vllm_embedding_model,
+                    ollama_adapter=adapter,
+                )
+            else:
+                dense_retriever = KoreanDenseRetriever(
+                    profile=embedding_profile,
+                    ollama_adapter=ollama_adapter,
+                )
             embedding_func = dense_retriever.get_embedding_func()
             if verbose:
                 logger.info(

evalvault/adapters/outbound/ops/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""Ops report renderers."""
+from evalvault.adapters.outbound.ops.report_renderer import render_json, render_markdown
+__all__ = ["render_json", "render_markdown"]

evalvault/adapters/outbound/ops/report_renderer.py ADDED Viewed

@@ -0,0 +1,159 @@
+from __future__ import annotations
+import json
+from evalvault.domain.entities.ops_report import OpsReport
+from evalvault.domain.entities.stage import StageMetric, StageSummary
+def render_markdown(report: OpsReport) -> str:
+    lines: list[str] = []
+    lines.append("# Ops Report")
+    lines.append("")
+    lines.extend(_render_run_summary(report.run_summary, report.metadata))
+    lines.append("")
+    lines.extend(_render_ops_kpis(report.ops_kpis))
+    lines.append("")
+    lines.extend(_render_stage_summary(report.stage_summary))
+    lines.append("")
+    lines.extend(_render_bottlenecks(report.bottlenecks))
+    lines.append("")
+    lines.extend(_render_recommendations(report.recommendations))
+    lines.append("")
+    lines.extend(_render_failing_metrics(report.stage_metrics))
+    return "\n".join(lines).strip()
+def render_json(report: OpsReport) -> str:
+    payload = report.to_dict()
+    return json.dumps(payload, ensure_ascii=True, indent=2)
+def _render_run_summary(summary: dict[str, object], metadata: dict[str, object]) -> list[str]:
+    run_id = summary.get("run_id", "-")
+    dataset = summary.get("dataset_name", "-")
+    version = summary.get("dataset_version", "-")
+    model = summary.get("model_name", "-")
+    started = summary.get("started_at", "-")
+    finished = summary.get("finished_at", "-")
+    duration = summary.get("duration_seconds", "-")
+    total_cases = summary.get("total_test_cases", "-")
+    pass_rate = summary.get("pass_rate", "-")
+    total_tokens = summary.get("total_tokens", "-")
+    total_cost = summary.get("total_cost_usd", "-")
+    lines = [
+        "## Run Summary",
+        f"- run_id: {run_id}",
+        f"- dataset: {dataset} ({version})",
+        f"- model: {model}",
+        f"- started_at: {started}",
+        f"- finished_at: {finished}",
+        f"- duration_seconds: {duration}",
+        f"- total_test_cases: {total_cases}",
+        f"- pass_rate: {pass_rate}",
+        f"- total_tokens: {total_tokens}",
+        f"- total_cost_usd: {total_cost}",
+    ]
+    trace_links: list[str] = []
+    if metadata.get("langfuse_trace_url"):
+        trace_links.append(f"langfuse_trace_url={metadata['langfuse_trace_url']}")
+    if metadata.get("phoenix_trace_url"):
+        trace_links.append(f"phoenix_trace_url={metadata['phoenix_trace_url']}")
+    if trace_links:
+        lines.append(f"- trace_links: {', '.join(trace_links)}")
+    return lines
+def _render_stage_summary(summary: StageSummary | None) -> list[str]:
+    lines = ["## Stage Summary"]
+    if summary is None:
+        lines.append("- no stage events found")
+        return lines
+    lines.append(f"- total_events: {summary.total_events}")
+    if summary.missing_required_stage_types:
+        missing = ", ".join(summary.missing_required_stage_types)
+        lines.append(f"- missing_required_stage_types: {missing}")
+    if summary.stage_type_counts:
+        lines.append("- stage_type_counts:")
+        for stage_type, count in summary.stage_type_counts.items():
+            lines.append(f"  - {stage_type}: {count}")
+    if summary.stage_type_avg_durations:
+        lines.append("- stage_type_avg_durations_ms:")
+        for stage_type, duration in summary.stage_type_avg_durations.items():
+            lines.append(f"  - {stage_type}: {duration:.3f}")
+    return lines
+def _render_ops_kpis(kpis: dict[str, object]) -> list[str]:
+    lines = ["## Ops KPIs"]
+    lines.append(f"- total_test_cases: {kpis.get('total_test_cases', '-')}")
+    lines.append(f"- pass_rate: {kpis.get('pass_rate', '-')}")
+    lines.append(f"- failure_rate: {kpis.get('failure_rate', '-')}")
+    lines.append(f"- stage_error_rate: {kpis.get('stage_error_rate', '-')}")
+    lines.append(f"- stage_error_severity: {kpis.get('stage_error_severity', '-')}")
+    lines.append(f"- duration_seconds: {kpis.get('duration_seconds', '-')}")
+    lines.append(f"- total_tokens: {kpis.get('total_tokens', '-')}")
+    lines.append(f"- total_cost_usd: {kpis.get('total_cost_usd', '-')}")
+    lines.append(f"- avg_latency_ms: {kpis.get('avg_latency_ms', '-')}")
+    lines.append(f"- p95_latency_ms: {kpis.get('p95_latency_ms', '-')}")
+    lines.append(f"- avg_tokens_per_case: {kpis.get('avg_tokens_per_case', '-')}")
+    lines.append(f"- avg_cost_per_case_usd: {kpis.get('avg_cost_per_case_usd', '-')}")
+    return lines
+def _render_bottlenecks(bottlenecks: list[dict[str, object]]) -> list[str]:
+    lines = ["## Ops Signals"]
+    if not bottlenecks:
+        lines.append("- none")
+        return lines
+    for entry in bottlenecks:
+        entry_type = entry.get("type", "unknown")
+        if entry_type == "latency":
+            stage_type = entry.get("stage_type", "-")
+            duration = entry.get("avg_duration_ms", "-")
+            lines.append(f"- latency: {stage_type} avg_duration_ms={duration}")
+        elif entry_type == "missing_stage":
+            stage_type = entry.get("stage_type", "-")
+            lines.append(f"- missing_stage: {stage_type}")
+        else:
+            lines.append(f"- {entry_type}: {entry}")
+    return lines
+def _render_recommendations(recommendations: list[str]) -> list[str]:
+    lines = ["## Recommendations"]
+    if not recommendations:
+        lines.append("- none")
+        return lines
+    for item in recommendations:
+        lines.append(f"- {item}")
+    return lines
+def _render_failing_metrics(metrics: list[StageMetric]) -> list[str]:
+    lines = ["## Failing Stage Metrics"]
+    failing = [metric for metric in metrics if metric.passed is False]
+    if not failing:
+        lines.append("- none")
+        return lines
+    failing_sorted = sorted(failing, key=_metric_severity, reverse=True)[:20]
+    for metric in failing_sorted:
+        threshold = metric.threshold if metric.threshold is not None else "-"
+        lines.append(
+            f"- {metric.metric_name}: score={metric.score} threshold={threshold} "
+            f"stage_id={metric.stage_id}"
+        )
+    return lines
+def _metric_severity(metric: StageMetric) -> float:
+    if metric.threshold is None:
+        return 0.0
+    comparison = None
+    if isinstance(metric.evidence, dict):
+        comparison = metric.evidence.get("comparison")
+    if isinstance(comparison, str) and comparison.lower() in {"max", "<=", "le"}:
+        return max(metric.score - metric.threshold, 0.0)
+    return max(metric.threshold - metric.score, 0.0)

evalvault/adapters/outbound/retriever/pgvector_store.py ADDED Viewed

@@ -0,0 +1,165 @@
+from __future__ import annotations
+import logging
+from collections.abc import Iterable
+from dataclasses import dataclass
+import psycopg
+from psycopg.rows import dict_row
+logger = logging.getLogger(__name__)
+@dataclass(frozen=True)
+class PgvectorResult:
+    doc_id: int
+    content: str
+    score: float
+class PgvectorStore:
+    def __init__(
+        self,
+        conn_string: str,
+        *,
+        distance: str = "cosine",
+        index_type: str = "hnsw",
+        index_lists: int = 100,
+        hnsw_m: int = 16,
+        hnsw_ef_construction: int = 64,
+    ) -> None:
+        self._conn_string = conn_string
+        self._distance = distance
+        self._index_type = index_type
+        self._index_lists = index_lists
+        self._hnsw_m = hnsw_m
+        self._hnsw_ef_construction = hnsw_ef_construction
+    def _vector_ops(self) -> str:
+        if self._distance == "ip":
+            return "vector_ip_ops"
+        if self._distance == "l2":
+            return "vector_l2_ops"
+        return "vector_cosine_ops"
+    def _connect(self) -> psycopg.Connection:
+        conn = psycopg.connect(self._conn_string, row_factory=dict_row)
+        try:
+            from pgvector.psycopg import register_vector
+            register_vector(conn)
+        except Exception as exc:  # pragma: no cover - runtime dependency
+            logger.warning("Failed to register pgvector type: %s", exc)
+        return conn
+    def ensure_schema(self, *, dimension: int) -> None:
+        sql = f"""
+        CREATE EXTENSION IF NOT EXISTS vector;
+        CREATE TABLE IF NOT EXISTS rag_documents (
+            id BIGSERIAL PRIMARY KEY,
+            source TEXT NOT NULL,
+            source_hash TEXT NOT NULL,
+            doc_id INTEGER NOT NULL,
+            content TEXT NOT NULL,
+            embedding VECTOR({dimension}),
+            metadata JSONB,
+            created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
+        );
+        CREATE INDEX IF NOT EXISTS idx_rag_documents_source ON rag_documents(source);
+        CREATE INDEX IF NOT EXISTS idx_rag_documents_doc_id ON rag_documents(doc_id);
+        CREATE UNIQUE INDEX IF NOT EXISTS idx_rag_documents_source_doc_id
+            ON rag_documents(source, doc_id);
+        """
+        with self._connect() as conn:
+            conn.execute(sql)
+            if self._index_type != "none":
+                opclass = self._vector_ops()
+                if self._index_type == "ivfflat":
+                    index_sql = (
+                        "CREATE INDEX IF NOT EXISTS idx_rag_documents_embedding "
+                        f"ON rag_documents USING ivfflat (embedding {opclass}) "
+                        f"WITH (lists = {self._index_lists});"
+                    )
+                    try:
+                        conn.execute(index_sql)
+                    except Exception as exc:  # pragma: no cover - runtime dependency
+                        logger.warning("Failed to create ivfflat index: %s", exc)
+                elif self._index_type == "hnsw":
+                    index_sql = (
+                        "CREATE INDEX IF NOT EXISTS idx_rag_documents_embedding "
+                        f"ON rag_documents USING hnsw (embedding {opclass}) "
+                        f"WITH (m = {self._hnsw_m}, ef_construction = {self._hnsw_ef_construction});"
+                    )
+                    try:
+                        conn.execute(index_sql)
+                    except Exception as exc:  # pragma: no cover - runtime dependency
+                        logger.warning("Failed to create hnsw index: %s", exc)
+            conn.commit()
+    def get_source_state(self, *, source: str) -> tuple[str | None, int]:
+        with self._connect() as conn:
+            row = conn.execute(
+                """
+                SELECT source_hash, COUNT(*) AS total
+                FROM rag_documents
+                WHERE source = %s
+                GROUP BY source_hash
+                ORDER BY total DESC
+                LIMIT 1
+                """,
+                (source,),
+            ).fetchone()
+        if not row:
+            return None, 0
+        return row["source_hash"], int(row["total"])
+    def replace_documents(
+        self,
+        *,
+        source: str,
+        source_hash: str,
+        documents: Iterable[str],
+        embeddings: Iterable[list[float]],
+    ) -> None:
+        rows = list(zip(documents, embeddings, strict=True))
+        with self._connect() as conn:
+            conn.execute("DELETE FROM rag_documents WHERE source = %s", (source,))
+            with conn.cursor() as cursor:
+                cursor.executemany(
+                    """
+                    INSERT INTO rag_documents (source, source_hash, doc_id, content, embedding)
+                    VALUES (%s, %s, %s, %s, %s)
+                    """,
+                    [
+                        (source, source_hash, index, content, embedding)
+                        for index, (content, embedding) in enumerate(rows)
+                    ],
+                )
+            conn.commit()
+    def search(
+        self, *, source: str, query_embedding: list[float], top_k: int
+    ) -> list[PgvectorResult]:
+        if self._distance == "ip":
+            operator = "<#>"
+        elif self._distance == "l2":
+            operator = "<->"
+        else:
+            operator = "<=>"
+        sql = (
+            f"SELECT doc_id, content, embedding {operator} %s::vector AS score "
+            f"FROM rag_documents WHERE source = %s ORDER BY embedding {operator} %s::vector LIMIT %s"
+        )
+        with self._connect() as conn:
+            rows = conn.execute(sql, (query_embedding, source, query_embedding, top_k)).fetchall()
+        return [
+            PgvectorResult(
+                doc_id=int(row["doc_id"]), content=row["content"], score=float(row["score"])
+            )
+            for row in rows
+        ]

evalvault/adapters/outbound/storage/base_sql.py CHANGED Viewed

@@ -247,7 +247,7 @@ class SQLQueries:
         return "SELECT run_id FROM evaluation_runs WHERE 1=1"
     def list_runs_ordering(self) -> str:
-        return f" ORDER BY started_at DESC LIMIT {self.placeholder}"
+        return f" ORDER BY started_at DESC LIMIT {self.placeholder} OFFSET {self.placeholder}"
     def upsert_regression_baseline(self) -> str:
         raise NotImplementedError("Override in subclass")
@@ -394,6 +394,7 @@ class BaseSQLStorageAdapter(ABC):
     def list_runs(
         self,
         limit: int = 100,
+        offset: int = 0,
         dataset_name: str | None = None,
         model_name: str | None = None,
     ) -> list[EvaluationRun]:
@@ -410,7 +411,7 @@ class BaseSQLStorageAdapter(ABC):
                 params.append(model_name)
             query += self.queries.list_runs_ordering()
-            params.append(limit)
+            params.extend([limit, offset])
             cursor = self._execute(conn, query, params)
             run_ids = [row["run_id"] for row in cursor.fetchall()]

evalvault 1.73.2__py3-none-any.whl → 1.75.0__py3-none-any.whl

evalvault 1.73.2py3-none-any.whl → 1.75.0py3-none-any.whl