PyPI - eval-ai-library - Versions diffs - 0.2.2__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

eval-ai-library 0.2.2py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of eval-ai-library might be problematic. Click here for more details.

Files changed (29) hide show

eval_ai_library-0.3.0.dist-info/METADATA +1042 -0
eval_ai_library-0.3.0.dist-info/RECORD +34 -0
eval_lib/__init__.py +19 -6
eval_lib/agent_metrics/knowledge_retention_metric/knowledge_retention.py +8 -3
eval_lib/agent_metrics/role_adherence_metric/role_adherence.py +12 -4
eval_lib/agent_metrics/task_success_metric/task_success_rate.py +23 -23
eval_lib/agent_metrics/tools_correctness_metric/tool_correctness.py +8 -2
eval_lib/datagenerator/datagenerator.py +208 -12
eval_lib/datagenerator/document_loader.py +29 -29
eval_lib/evaluate.py +0 -22
eval_lib/llm_client.py +223 -78
eval_lib/metric_pattern.py +208 -152
eval_lib/metrics/answer_precision_metric/answer_precision.py +8 -3
eval_lib/metrics/answer_relevancy_metric/answer_relevancy.py +7 -2
eval_lib/metrics/bias_metric/bias.py +12 -2
eval_lib/metrics/contextual_precision_metric/contextual_precision.py +9 -4
eval_lib/metrics/contextual_recall_metric/contextual_recall.py +7 -3
eval_lib/metrics/contextual_relevancy_metric/contextual_relevancy.py +8 -2
eval_lib/metrics/custom_metric/custom_eval.py +237 -204
eval_lib/metrics/faithfulness_metric/faithfulness.py +7 -2
eval_lib/metrics/geval/geval.py +8 -2
eval_lib/metrics/restricted_refusal_metric/restricted_refusal.py +7 -3
eval_lib/metrics/toxicity_metric/toxicity.py +8 -2
eval_lib/utils.py +44 -29
eval_ai_library-0.2.2.dist-info/METADATA +0 -779
eval_ai_library-0.2.2.dist-info/RECORD +0 -34
{eval_ai_library-0.2.2.dist-info → eval_ai_library-0.3.0.dist-info}/WHEEL +0 -0
{eval_ai_library-0.2.2.dist-info → eval_ai_library-0.3.0.dist-info}/licenses/LICENSE +0 -0
{eval_ai_library-0.2.2.dist-info → eval_ai_library-0.3.0.dist-info}/top_level.txt +0 -0

eval_lib/datagenerator/document_loader.py CHANGED Viewed

@@ -2,22 +2,41 @@
 from __future__ import annotations
 from pathlib import Path
 from typing import List
 from langchain_core.documents import Document
 from langchain_text_splitters import RecursiveCharacterTextSplitter
-# LangChain loaders (оставляем существующие)
 from langchain_community.document_loaders import PyPDFLoader
 from langchain_community.document_loaders import Docx2txtLoader
 from langchain_community.document_loaders import TextLoader
 import html2text
 import markdown
+from pptx import Presentation
+from striprtf.striprtf import rtf_to_text
+from pypdf import PdfReader
+import fitz
+import zipfile
+from xml.etree import ElementTree as ET
+import pandas as pd
+import yaml
+import pytesseract
+from PIL import Image
+import io as _io
+import pytesseract
+from PIL import Image
+import io as _io
+import docx  # python-docx
+import mammoth
 import io
 import json
 import zipfile
+try:
+    import textract
+    HAS_TEXTRACT = True
+except ImportError:
+    HAS_TEXTRACT = False
+    textract = None
 # ---------------------------
 # Helper functions
 # ---------------------------
@@ -33,7 +52,7 @@ def _read_bytes(p: Path) -> bytes:
 def _csv_tsv_to_text(p: Path) -> str:
     try:
-        import pandas as pd
         sep = "," if p.suffix.lower() == ".csv" else "\t"
         df = pd.read_csv(str(p), dtype=str, sep=sep,
                          encoding="utf-8", engine="python")
@@ -50,7 +69,6 @@ def _csv_tsv_to_text(p: Path) -> str:
 def _xlsx_to_text(p: Path) -> str:
     try:
-        import pandas as pd
         df = pd.read_excel(str(p), dtype=str, engine="openpyxl")
         df = df.fillna("")
         buf = io.StringIO()
@@ -62,7 +80,6 @@ def _xlsx_to_text(p: Path) -> str:
 def _pptx_to_text(p: Path) -> str:
     try:
-        from pptx import Presentation
         prs = Presentation(str(p))
         texts = []
         for slide in prs.slides:
@@ -96,7 +113,6 @@ def _json_to_text(p: Path) -> str:
 def _yaml_to_text(p: Path) -> str:
     try:
-        import yaml
         data = yaml.safe_load(_read_text(p))
         return json.dumps(data, ensure_ascii=False, indent=2)
     except Exception:
@@ -105,7 +121,6 @@ def _yaml_to_text(p: Path) -> str:
 def _xml_to_text(p: Path) -> str:
     try:
-        from xml.etree import ElementTree as ET
         tree = ET.parse(str(p))
         root = tree.getroot()
         lines = []
@@ -125,7 +140,7 @@ def _xml_to_text(p: Path) -> str:
 def _rtf_to_text(p: Path) -> str:
     try:
-        from striprtf.striprtf import rtf_to_text
         return rtf_to_text(_read_text(p))
     except Exception:
         return ""
@@ -153,7 +168,6 @@ def _odt_to_text(p: Path) -> str:
 def _pdf_text_pypdf(p: Path) -> str:
     try:
-        from pypdf import PdfReader  # <- именно pypdf
         reader = PdfReader(str(p))
         texts = []
         for page in reader.pages:
@@ -167,7 +181,6 @@ def _pdf_text_pypdf(p: Path) -> str:
 def _pdf_text_pymupdf(p: Path) -> str:
     try:
-        import fitz  # PyMuPDF
         text_parts = []
         with fitz.open(str(p)) as doc:
             for page in doc:
@@ -182,11 +195,6 @@ def _pdf_text_pymupdf(p: Path) -> str:
 def _pdf_ocr_via_pymupdf(p: Path) -> str:
     """Render pages via PyMuPDF and OCR pytesseract. Will work if pytesseract + tesseract are installed."""
     try:
-        import fitz  # PyMuPDF
-        import pytesseract
-        from PIL import Image
-        import io as _io
         texts = []
         zoom = 2.0
         mat = fitz.Matrix(zoom, zoom)
@@ -208,9 +216,6 @@ def _pdf_ocr_via_pymupdf(p: Path) -> str:
 def _ocr_image_bytes(img_bytes: bytes) -> str:
     try:
-        import pytesseract
-        from PIL import Image
-        import io as _io
         img = Image.open(_io.BytesIO(img_bytes))
         return pytesseract.image_to_string(img) or ""
     except Exception:
@@ -223,7 +228,6 @@ def _ocr_image_bytes(img_bytes: bytes) -> str:
 def _docx_to_text_python_docx(p: Path) -> str:
     try:
-        import docx  # python-docx
         d = docx.Document(str(p))
         parts = []
         for para in d.paragraphs:
@@ -242,7 +246,6 @@ def _docx_to_text_python_docx(p: Path) -> str:
 def _docx_to_text_mammoth(p: Path) -> str:
     try:
-        import mammoth
         with open(str(p), "rb") as f:
             result = mammoth.extract_raw_text(f)
             return (result.value or "").strip()
@@ -253,19 +256,16 @@ def _docx_to_text_mammoth(p: Path) -> str:
 def _docx_to_text_zipxml(p: Path) -> str:
     """Без зависимостей: читаем word/document.xml и вытаскиваем все w:t."""
     try:
-        import zipfile
-        from xml.etree import ElementTree as ET
         texts = []
         with zipfile.ZipFile(str(p)) as z:
-            # основной документ
             if "word/document.xml" in z.namelist():
                 with z.open("word/document.xml") as f:
                     root = ET.parse(f).getroot()
                     for el in root.iter():
-                        tag = el.tag.rsplit("}", 1)[-1]  # убрать namespace
+                        tag = el.tag.rsplit("}", 1)[-1]
                         if tag == "t" and el.text and el.text.strip():
                             texts.append(el.text.strip())
-            # заголовки/футеры тоже могут содержать текст
             for name in z.namelist():
                 if name.startswith("word/header") and name.endswith(".xml"):
                     with z.open(name) as f:
@@ -287,9 +287,9 @@ def _docx_to_text_zipxml(p: Path) -> str:
 def _doc_to_text_textract(p: Path) -> str:
-    """Для старого .doc. Работает, если установлен textract и системные бинарники (antiword/catdoc)."""
+    if not HAS_TEXTRACT:
+        return ""
     try:
-        import textract
         return textract.process(str(p)).decode("utf-8", errors="ignore")
     except Exception:
         return ""

eval_lib/evaluate.py CHANGED Viewed

@@ -183,18 +183,6 @@ async def evaluate(
         _print_summary(results, total_cost, total_time,
                        total_passed, total_tests)
-    # Print detailed results if requested
-    if verbose:
-        print(f"{Colors.BOLD}Detailed Results:{Colors.ENDC}")
-        for idx, (meta, tc_list) in enumerate(results, 1):
-            print(f"\n{Colors.DIM}{'─'*70}{Colors.ENDC}")
-            print(f"{Colors.BOLD}Test Case {idx}:{Colors.ENDC}")
-            for tc in tc_list:
-                tc_dict = asdict(tc)
-                # Pretty print with indentation
-                print(json.dumps(tc_dict, indent=2, ensure_ascii=False))
-        print(f"{Colors.DIM}{'─'*70}{Colors.ENDC}\n")
     return results
@@ -322,14 +310,4 @@ async def evaluate_conversations(
         _print_summary(results, total_cost, total_time,
                        total_passed, total_conversations)
-    # Print detailed results if requested
-    if verbose:
-        print(f"{Colors.BOLD}Detailed Results:{Colors.ENDC}")
-        for idx, (_, conv_list) in enumerate(results, 1):
-            print(f"\n{Colors.DIM}{'─'*70}{Colors.ENDC}")
-            print(f"{Colors.BOLD}Conversation {idx}:{Colors.ENDC}")
-            for conv in conv_list:
-                print(json.dumps(asdict(conv), indent=2, ensure_ascii=False))
-        print(f"{Colors.DIM}{'─'*70}{Colors.ENDC}\n")
     return results

eval_lib/llm_client.py CHANGED Viewed

@@ -13,6 +13,11 @@ from types import SimpleNamespace
 from .price import model_pricing
+class LLMConfigurationError(Exception):
+    """Raised when LLM client configuration is missing or invalid."""
+    pass
 class Provider(str, Enum):
     OPENAI = "openai"
     AZURE = "azure"
@@ -45,12 +50,59 @@ class LLMDescriptor:
         return f"{self.provider}:{self.model}"
+def _check_env_var(var_name: str, provider: str, required: bool = True) -> Optional[str]:
+    """
+    Check if environment variable is set and return its value.
+    Args:
+        var_name: Name of the environment variable
+        provider: Provider name for error message
+        required: Whether this variable is required
+    Returns:
+        Value of the environment variable or None if not required
+    Raises:
+        LLMConfigurationError: If required variable is missing
+    """
+    value = os.getenv(var_name)
+    if required and not value:
+        raise LLMConfigurationError(
+            f"❌ Missing {provider} configuration!\n\n"
+            f"Environment variable '{var_name}' is not set.\n\n"
+            f"To fix this, set the environment variable:\n"
+            f"  export {var_name}='your-api-key-here'\n\n"
+            f"Or add it to your .env file:\n"
+            f"  {var_name}=your-api-key-here\n\n"
+            f"📖 Documentation: https://github.com/meshkovQA/Eval-ai-library#environment-variables"
+        )
+    return value
 @functools.cache
 def _get_client(provider: Provider):
+    """
+    Get or create LLM client for the specified provider.
+    Args:
+        provider: LLM provider enum
+    Returns:
+        Configured client instance
+    Raises:
+        LLMConfigurationError: If required configuration is missing
+        ValueError: If provider is not supported
+    """
     if provider == Provider.OPENAI:
+        _check_env_var("OPENAI_API_KEY", "OpenAI")
         return openai.AsyncOpenAI()
     if provider == Provider.AZURE:
+        _check_env_var("AZURE_OPENAI_API_KEY", "Azure OpenAI")
+        _check_env_var("AZURE_OPENAI_ENDPOINT", "Azure OpenAI")
+        # AZURE_OPENAI_DEPLOYMENT проверяется при вызове, не обязателен здесь
         return AsyncAzureOpenAI(
             api_key=os.getenv("AZURE_OPENAI_API_KEY"),
             azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
@@ -58,20 +110,28 @@ def _get_client(provider: Provider):
         )
     if provider == Provider.GOOGLE:
+        _check_env_var("GOOGLE_API_KEY", "Google Gemini")
         return genai.Client(api_key=os.getenv("GOOGLE_API_KEY"))
     if provider == Provider.OLLAMA:
+        # Ollama может работать без ключа (локальный сервер)
+        api_key = _check_env_var(
+            "OLLAMA_API_KEY", "Ollama", required=False) or "ollama"
+        base_url = _check_env_var(
+            "OLLAMA_API_BASE_URL", "Ollama", required=False) or "http://localhost:11434/v1"
         return openai.AsyncOpenAI(
-            api_key=os.getenv("OLLAMA_API_KEY"),
-            base_url=os.getenv("OLLAMA_API_BASE_URL")
+            api_key=api_key,
+            base_url=base_url
         )
     if provider == Provider.ANTHROPIC:
+        _check_env_var("ANTHROPIC_API_KEY", "Anthropic Claude")
         return anthropic.AsyncAnthropic(
             api_key=os.getenv("ANTHROPIC_API_KEY"),
         )
-    raise ValueError(f"Unsupported provider {provider}")
+    raise ValueError(f"Unsupported provider: {provider}")
 async def _openai_chat_complete(
@@ -80,17 +140,25 @@ async def _openai_chat_complete(
     messages: list[dict[str, str]],
     temperature: float,
 ):
-    """
-    Обычный OpenAI.
-    """
-    response = await client.chat.completions.create(
-        model=llm.model,
-        messages=messages,
-        temperature=temperature,
-    )
-    text = response.choices[0].message.content.strip()
-    cost = _calculate_cost(llm, response.usage)
-    return text, cost
+    """OpenAI chat completion."""
+    try:
+        response = await client.chat.completions.create(
+            model=llm.model,
+            messages=messages,
+            temperature=temperature,
+        )
+        text = response.choices[0].message.content.strip()
+        cost = _calculate_cost(llm, response.usage)
+        return text, cost
+    except Exception as e:
+        if "API key" in str(e) or "authentication" in str(e).lower():
+            raise LLMConfigurationError(
+                f"❌ OpenAI API authentication failed!\n\n"
+                f"Error: {str(e)}\n\n"
+                f"Please check that your OPENAI_API_KEY is valid.\n"
+                f"Get your API key at: https://platform.openai.com/api-keys"
+            )
+        raise
 async def _azure_chat_complete(
@@ -99,17 +167,36 @@ async def _azure_chat_complete(
     messages: list[dict[str, str]],
     temperature: float,
 ):
+    """Azure OpenAI chat completion."""
     deployment_name = os.getenv("AZURE_OPENAI_DEPLOYMENT") or llm.model
-    response = await client.chat.completions.create(
-        model=deployment_name,
-        messages=messages,
-        temperature=temperature,
-    )
-    text = response.choices[0].message.content.strip()
-    cost = _calculate_cost(llm, response.usage)
-    return text, cost
+    if not deployment_name:
+        raise LLMConfigurationError(
+            f"❌ Missing Azure OpenAI deployment name!\n\n"
+            f"Please set AZURE_OPENAI_DEPLOYMENT environment variable.\n"
+            f"Example: export AZURE_OPENAI_DEPLOYMENT='gpt-4o'"
+        )
+    try:
+        response = await client.chat.completions.create(
+            model=deployment_name,
+            messages=messages,
+            temperature=temperature,
+        )
+        text = response.choices[0].message.content.strip()
+        cost = _calculate_cost(llm, response.usage)
+        return text, cost
+    except Exception as e:
+        if "API key" in str(e) or "authentication" in str(e).lower():
+            raise LLMConfigurationError(
+                f"❌ Azure OpenAI authentication failed!\n\n"
+                f"Error: {str(e)}\n\n"
+                f"Please check your Azure OpenAI configuration:\n"
+                f"  - AZURE_OPENAI_API_KEY\n"
+                f"  - AZURE_OPENAI_ENDPOINT\n"
+                f"  - AZURE_OPENAI_DEPLOYMENT"
+            )
+        raise
 async def _google_chat_complete(
@@ -118,27 +205,35 @@ async def _google_chat_complete(
     messages: list[dict[str, str]],
     temperature: float,
 ):
-    """
-    Google GenAI / Gemini 2.x
-    """
+    """Google GenAI / Gemini chat completion."""
     prompt = "\n".join(m["content"] for m in messages)
-    response = await client.aio.models.generate_content(
-        model=llm.model,
-        contents=prompt,
-        config=GenerateContentConfig(temperature=temperature),
-    )
+    try:
+        response = await client.aio.models.generate_content(
+            model=llm.model,
+            contents=prompt,
+            config=GenerateContentConfig(temperature=temperature),
+        )
-    text = response.text.strip()
+        text = response.text.strip()
-    um = response.usage_metadata
-    usage = SimpleNamespace(
-        prompt_tokens=um.prompt_token_count,
-        completion_tokens=um.candidates_token_count,
-    )
+        um = response.usage_metadata
+        usage = SimpleNamespace(
+            prompt_tokens=um.prompt_token_count,
+            completion_tokens=um.candidates_token_count,
+        )
-    cost = _calculate_cost(llm, usage)
-    return text, cost
+        cost = _calculate_cost(llm, usage)
+        return text, cost
+    except Exception as e:
+        if "API key" in str(e) or "authentication" in str(e).lower() or "credentials" in str(e).lower():
+            raise LLMConfigurationError(
+                f"❌ Google Gemini API authentication failed!\n\n"
+                f"Error: {str(e)}\n\n"
+                f"Please check that your GOOGLE_API_KEY is valid.\n"
+                f"Get your API key at: https://aistudio.google.com/apikey"
+            )
+        raise
 async def _ollama_chat_complete(
@@ -147,14 +242,29 @@ async def _ollama_chat_complete(
     messages: list[dict[str, str]],
     temperature: float,
 ):
-    response = await client.chat.completions.create(
-        model=llm.model,
-        messages=messages,
-        temperature=temperature,
-    )
-    text = response.choices[0].message.content.strip()
-    cost = _calculate_cost(llm, response.usage)
-    return text, cost
+    """Ollama (local) chat completion."""
+    try:
+        response = await client.chat.completions.create(
+            model=llm.model,
+            messages=messages,
+            temperature=temperature,
+        )
+        text = response.choices[0].message.content.strip()
+        cost = _calculate_cost(llm, response.usage)
+        return text, cost
+    except Exception as e:
+        if "Connection" in str(e) or "refused" in str(e).lower():
+            raise LLMConfigurationError(
+                f"❌ Cannot connect to Ollama server!\n\n"
+                f"Error: {str(e)}\n\n"
+                f"Make sure Ollama is running:\n"
+                f"  1. Install Ollama: https://ollama.ai/download\n"
+                f"  2. Start Ollama: ollama serve\n"
+                f"  3. Pull model: ollama pull {llm.model}\n\n"
+                f"Or set OLLAMA_API_BASE_URL to your Ollama server:\n"
+                f"  export OLLAMA_API_BASE_URL='http://localhost:11434/v1'"
+            )
+        raise
 async def _anthropic_chat_complete(
@@ -163,23 +273,31 @@ async def _anthropic_chat_complete(
     messages: list[dict[str, str]],
     temperature: float,
 ):
-    """
-    Anthropic Claude chat completion.
-    """
-    response = await client.messages.create(
-        model=llm.model,
-        messages=messages,
-        temperature=temperature,
-        max_tokens=4096,  # Default max tokens for Claude
-    )
-    if isinstance(response.content, list):
-        text = "".join(
-            block.text for block in response.content if block.type == "text").strip()
-    else:
-        text = response.content.strip()
-    cost = _calculate_cost(llm, response.usage)
-    return text, cost
+    """Anthropic Claude chat completion."""
+    try:
+        response = await client.messages.create(
+            model=llm.model,
+            messages=messages,
+            temperature=temperature,
+            max_tokens=4096,
+        )
+        if isinstance(response.content, list):
+            text = "".join(
+                block.text for block in response.content if block.type == "text").strip()
+        else:
+            text = response.content.strip()
+        cost = _calculate_cost(llm, response.usage)
+        return text, cost
+    except Exception as e:
+        if "API key" in str(e) or "authentication" in str(e).lower():
+            raise LLMConfigurationError(
+                f"❌ Anthropic Claude API authentication failed!\n\n"
+                f"Error: {str(e)}\n\n"
+                f"Please check that your ANTHROPIC_API_KEY is valid.\n"
+                f"Get your API key at: https://console.anthropic.com/settings/keys"
+            )
+        raise
 _HELPERS = {
@@ -196,20 +314,33 @@ async def chat_complete(
     messages: list[dict[str, str]],
     temperature: float = 0.0,
 ):
+    """
+    Complete a chat conversation using the specified LLM.
+    Args:
+        llm: LLM specification (e.g., "gpt-4o-mini", "openai:gpt-4o", or LLMDescriptor)
+        messages: List of message dicts with "role" and "content"
+        temperature: Sampling temperature (0.0-2.0)
+    Returns:
+        Tuple of (response_text, cost_in_usd)
+    Raises:
+        LLMConfigurationError: If required API keys or configuration are missing
+        ValueError: If provider is not supported
+    """
     llm = LLMDescriptor.parse(llm)
     helper = _HELPERS.get(llm.provider)
     if helper is None:
-        raise ValueError(f"Unsupported provider {llm.provider}")
+        raise ValueError(f"Unsupported provider: {llm.provider}")
     client = _get_client(llm.provider)
     return await helper(client, llm, messages, temperature)
 def _calculate_cost(llm: LLMDescriptor, usage) -> Optional[float]:
-    """
-    Calculate the cost of the LLM usage based on the model and usage data.
-    """
+    """Calculate the cost of the LLM usage based on the model and usage data."""
     if llm.provider == Provider.OLLAMA:
         return 0.0
     if not usage:
@@ -219,7 +350,7 @@ def _calculate_cost(llm: LLMDescriptor, usage) -> Optional[float]:
     if not price:
         return None
-    prompt = getattr(usage, "prompt_tokens",     0)
+    prompt = getattr(usage, "prompt_tokens", 0)
     completion = getattr(usage, "completion_tokens", 0)
     return round(
@@ -242,6 +373,10 @@ async def get_embeddings(
     Returns:
         Tuple of (embeddings_list, total_cost)
+    Raises:
+        LLMConfigurationError: If required API keys are missing
+        ValueError: If non-OpenAI provider is specified
     """
     llm = LLMDescriptor.parse(model)
@@ -259,16 +394,26 @@ async def _openai_get_embeddings(
     texts: list[str],
 ) -> tuple[list[list[float]], Optional[float]]:
     """OpenAI embeddings implementation."""
-    response = await client.embeddings.create(
-        model=llm.model,
-        input=texts,
-        encoding_format="float"
-    )
-    embeddings = [data.embedding for data in response.data]
-    cost = _calculate_embedding_cost(llm, response.usage)
+    try:
+        response = await client.embeddings.create(
+            model=llm.model,
+            input=texts,
+            encoding_format="float"
+        )
-    return embeddings, cost
+        embeddings = [data.embedding for data in response.data]
+        cost = _calculate_embedding_cost(llm, response.usage)
+        return embeddings, cost
+    except Exception as e:
+        if "API key" in str(e) or "authentication" in str(e).lower():
+            raise LLMConfigurationError(
+                f"❌ OpenAI API authentication failed for embeddings!\n\n"
+                f"Error: {str(e)}\n\n"
+                f"Please check that your OPENAI_API_KEY is valid.\n"
+                f"Get your API key at: https://platform.openai.com/api-keys"
+            )
+        raise
 def _calculate_embedding_cost(llm: LLMDescriptor, usage) -> Optional[float]:

eval-ai-library 0.2.2__py3-none-any.whl → 0.3.0__py3-none-any.whl

Potentially problematic release.

eval-ai-library 0.2.2py3-none-any.whl → 0.3.0py3-none-any.whl