PyPI - eval-ai-library - Versions diffs - 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl - Mend

eval-ai-library 0.3.0py3-none-any.whl → 0.3.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of eval-ai-library might be problematic. Click here for more details.

Files changed (13) hide show

{eval_ai_library-0.3.0.dist-info → eval_ai_library-0.3.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: eval-ai-library
-Version: 0.3.0
+Version: 0.3.2
 Summary: Comprehensive AI Model Evaluation Framework with support for multiple LLM providers
 Author-email: Aleksandr Meshkov <alekslynx90@gmail.com>
 License: MIT

{eval_ai_library-0.3.0.dist-info → eval_ai_library-0.3.2.dist-info}/RECORD RENAMED Viewed

@@ -1,34 +1,34 @@
-eval_ai_library-0.3.0.dist-info/licenses/LICENSE,sha256=rK9uLDgWNrCHNdp-Zma_XghDE7Fs0u0kDi3WMcmYx6w,1074
-eval_lib/__init__.py,sha256=BA0vmdi5_3Zd1ib8nLyq6pESBVnFX7aKdvRAbR_I9bQ,3029
+eval_ai_library-0.3.2.dist-info/licenses/LICENSE,sha256=rK9uLDgWNrCHNdp-Zma_XghDE7Fs0u0kDi3WMcmYx6w,1074
+eval_lib/__init__.py,sha256=IeDW5pLarPmHCBJu-6vFX71g9VxZTS46UkHjDkrU_Gw,3043
 eval_lib/evaluate.py,sha256=GjlXZb5dnl44LCaJwdkyGCYcC50zoNZn3NrofzNAVJ0,11490
 eval_lib/evaluation_schema.py,sha256=7IDd_uozqewhh7k0p1hKut_20udvRxxkV6thclxKUg0,1904
-eval_lib/llm_client.py,sha256=6uNLIAfSdj4u_n4SAk3UkYmxcfO2Y2y-9Kp0T4bSjPM,13845
+eval_lib/llm_client.py,sha256=3eMcarKLkDLDVh4AOxgWbaIzXlzpqsmEfJXNTBonNic,13633
 eval_lib/metric_pattern.py,sha256=wULgMNDeAqJC_Qjglo7bYzY2eGhA_PmY_hA_qGfg0sI,11730
 eval_lib/price.py,sha256=jbmkkUTxPuXrkSHuaJYPl7jSzfDIzQ9p_swWWs26UJ0,1986
 eval_lib/py.typed,sha256=8PjyZ1aVoQpRVvt71muvuq5qE-jTFZkK-GLHkhdebmc,26
 eval_lib/testcases_schema.py,sha256=qI4o6kX0jH1DR3sHGXUnu3Cyt2oq7rGlsMlOaXSt6F4,696
 eval_lib/utils.py,sha256=-hwagFFn3_QjgyLqF8Qx7JIkpgOEI8-F14eycog3bgc,3141
 eval_lib/agent_metrics/__init__.py,sha256=20Y4BsicD2s7OkOBQPBvB2JKStBDtplv52_q6q35Vgo,525
-eval_lib/agent_metrics/knowledge_retention_metric/knowledge_retention.py,sha256=_9MRWbv8oi6goY-ZXWu7FaNV0vQX1UD1w1Ar7CPVino,8244
-eval_lib/agent_metrics/role_adherence_metric/role_adherence.py,sha256=D9MuKUI8ujh766vF8VQO73m-fPSQxy5u2Tkcr3wHbVk,9180
-eval_lib/agent_metrics/task_success_metric/task_success_rate.py,sha256=mt1PRHadup2k64gF_OyL8d-eQ5zm2EeNwztcE-aGOe0,12452
+eval_lib/agent_metrics/knowledge_retention_metric/knowledge_retention.py,sha256=7j89HOTsu0rMoFnznTjMl-tqQpnZlS6ZIdrHPueEbb8,8289
+eval_lib/agent_metrics/role_adherence_metric/role_adherence.py,sha256=kJsYj9H3W3Mw2iBqj3Br_glP8gU6_diFPiJhRSnHGxg,9225
+eval_lib/agent_metrics/task_success_metric/task_success_rate.py,sha256=v5cO07cymo9GWSZ34ryAx3ya4DDBiRWih9w0bm_j_R8,12497
 eval_lib/agent_metrics/tools_correctness_metric/tool_correctness.py,sha256=qbVMtD6EWKah27FogLEiEh6pBX-k2wwKbwM_kFkvYeQ,4220
 eval_lib/datagenerator/datagenerator.py,sha256=NQZIQuSCmryxIT3lTLS1PpJjENmGqARtR-zTiQ8OvRk,15513
 eval_lib/datagenerator/document_loader.py,sha256=vnQUz_Dxb3SxcVPUmMXZe-rgfPp6OfHb6D2Ie9iqPms,17025
 eval_lib/datagenerator/prompts.py,sha256=iQAYitAbLud3vWJnXGh_OCF4syWYS_S71zZEPI6qYAU,7213
 eval_lib/metrics/__init__.py,sha256=3qClCCjPXt5i0c38g5krfuQnqlAXEl-jhAHy1C_ICMY,1213
 eval_lib/metrics/answer_precision_metric/answer_precision.py,sha256=AxPmwzGFU7tnTrrZuQZ7ow4nNSD-blDHdAGwhMHMxjM,15040
-eval_lib/metrics/answer_relevancy_metric/answer_relevancy.py,sha256=LHf7FI9dYzZcKBdxUH9VpsOceRLh0NMb79qliZRE3Uo,8477
+eval_lib/metrics/answer_relevancy_metric/answer_relevancy.py,sha256=-Xb9I-BVMDf5E55FbJzP6IyvD6IVTUPBI-uCrRnEboc,8522
 eval_lib/metrics/bias_metric/bias.py,sha256=BVH8xlTUTRfVG_F1kauwpGAkVKBkUWhM9rUsrrLhpRU,4020
 eval_lib/metrics/contextual_precision_metric/contextual_precision.py,sha256=CQOb6uR2KeffTkhPSqZae56sX5tXMr0pJVM5W_wU1fU,3993
 eval_lib/metrics/contextual_recall_metric/contextual_recall.py,sha256=iw73_hGLWklHZSBkCRkPDNUt1xD5dknA_7CZ6Efkf5w,3913
-eval_lib/metrics/contextual_relevancy_metric/contextual_relevancy.py,sha256=pjyQPVDzWfTnqTgy5D1OYq1KdsCP1bVeUg3rcV8El4I,6501
-eval_lib/metrics/custom_metric/custom_eval.py,sha256=GrOo39DxHzdMUGnBnCiHfT7Y8VabFFgmYQGcnkyGm1w,11340
+eval_lib/metrics/contextual_relevancy_metric/contextual_relevancy.py,sha256=G1cYlA95YNcpEqQsALVi6ZbyNzWr9ccO2DATcsES5pk,6546
+eval_lib/metrics/custom_metric/custom_eval.py,sha256=Ov3-i6IytaJXlzcMgp46SRSeb8scyhqyuR2BqUtDFoM,11385
 eval_lib/metrics/faithfulness_metric/faithfulness.py,sha256=OqamlhTOps7d-NOStSIK7Tq-UAJXNql8VKjgtuqlDhA,5930
 eval_lib/metrics/geval/geval.py,sha256=mNciHXnqU2drOJsWlYmbwftGiKM89-Ykw2f6XneIGBM,10629
 eval_lib/metrics/restricted_refusal_metric/restricted_refusal.py,sha256=4QqYgGMcp6W9Lw-v4s0AlUhMSOKvBOEgnLvhqVXaT9I,4286
 eval_lib/metrics/toxicity_metric/toxicity.py,sha256=rBE1_fvpbCRdBpBep1y1LTIhofKR8GD4Eh76EOYzxL0,4076
-eval_ai_library-0.3.0.dist-info/METADATA,sha256=s7bRUm49crFB9ICrQaPnJvWHVQ28YUBMlTtlL0L2dWQ,37706
-eval_ai_library-0.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-eval_ai_library-0.3.0.dist-info/top_level.txt,sha256=uQHpEd2XI0oZgq1eCww9zMvVgDJgwXMWkCD45fYUzEg,9
-eval_ai_library-0.3.0.dist-info/RECORD,,
+eval_ai_library-0.3.2.dist-info/METADATA,sha256=7kIgiDelzM1wbCdKdy4PXuFudIZ3UCUTPXhlVccFe9k,37706
+eval_ai_library-0.3.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+eval_ai_library-0.3.2.dist-info/top_level.txt,sha256=uQHpEd2XI0oZgq1eCww9zMvVgDJgwXMWkCD45fYUzEg,9
+eval_ai_library-0.3.2.dist-info/RECORD,,

eval_lib/__init__.py CHANGED Viewed

@@ -7,7 +7,7 @@ A powerful library for evaluating AI models with support for multiple LLM provid
 and a wide range of evaluation metrics for RAG systems and AI agents.
 """
-__version__ = "0.3.0"
+__version__ = "0.3.2"
 __author__ = "Aleksandr Meshkov"
 # Core evaluation functions
@@ -68,12 +68,14 @@ from eval_lib.agent_metrics import (
 def __getattr__(name):
     """
-    Ленивый импорт для модулей с тяжёлыми зависимостями.
-    DataGenerator импортируется только когда реально используется.
+    Lazy loading for data generation components.
     """
-    if name == "DataGenerator":
-        from eval_lib.datagenerator.datagenerator import DataGenerator
-        return DataGenerator
+    if name == "DatasetGenerator":
+        from eval_lib.datagenerator.datagenerator import DatasetGenerator
+        return DatasetGenerator
+    if name == "DataGenerator":  # Alias for DatasetGenerator
+        from eval_lib.datagenerator.datagenerator import DatasetGenerator
+        return DatasetGenerator
     if name == "DocumentLoader":
         from eval_lib.datagenerator.document_loader import DocumentLoader
         return DocumentLoader

eval_lib/agent_metrics/knowledge_retention_metric/knowledge_retention.py CHANGED Viewed

@@ -217,6 +217,7 @@ Verdicts:
             "final_score": final_score,
             "comment_final_score": f"Weighted average of verdict scores using softmax aggregation (temperature={self.temperature}).",
             "threshold": self.threshold,
+            "temperature": self.temperature,
             "success": success,
             "comment_success": "Whether the retention score meets the required threshold.",
             "final_reason": summary,

eval_lib/agent_metrics/role_adherence_metric/role_adherence.py CHANGED Viewed

@@ -239,6 +239,7 @@ Return JSON array:
             "final_score": final_score,
             "comment_final_score": f"Weighted average of verdict scores using softmax aggregation (temperature={self.temperature}).",
             "threshold": self.threshold,
+            "temperature": self.temperature,
             "success": success,
             "comment_success": "Whether the role adherence score meets the required threshold.",
             "final_reason": summary,

eval_lib/agent_metrics/task_success_metric/task_success_rate.py CHANGED Viewed

@@ -328,6 +328,7 @@ Criteria: [
             "final_score": final_score,
             "comment_final_score": f"Weighted average of verdict scores using softmax aggregation (temperature={self.temperature}).",
             "threshold": self.threshold,
+            "temperature": self.temperature,
             "success": success,
             "comment_success": "Whether the task success score meets the required threshold.",
             "final_reason": summary,

eval_lib/llm_client.py CHANGED Viewed

@@ -101,7 +101,6 @@ def _get_client(provider: Provider):
     if provider == Provider.AZURE:
         _check_env_var("AZURE_OPENAI_API_KEY", "Azure OpenAI")
         _check_env_var("AZURE_OPENAI_ENDPOINT", "Azure OpenAI")
-        # AZURE_OPENAI_DEPLOYMENT проверяется при вызове, не обязателен здесь
         return AsyncAzureOpenAI(
             api_key=os.getenv("AZURE_OPENAI_API_KEY"),
@@ -114,7 +113,6 @@ def _get_client(provider: Provider):
         return genai.Client(api_key=os.getenv("GOOGLE_API_KEY"))
     if provider == Provider.OLLAMA:
-        # Ollama может работать без ключа (локальный сервер)
         api_key = _check_env_var(
             "OLLAMA_API_KEY", "Ollama", required=False) or "ollama"
         base_url = _check_env_var(

eval_lib/metrics/answer_relevancy_metric/answer_relevancy.py CHANGED Viewed

@@ -181,6 +181,7 @@ class AnswerRelevancyMetric(MetricPattern):
             "final_score": final_score,
             "comment_final_score": "Score based on the proportion of relevant statements.",
             "threshold": self.threshold,
+            "temperature": self.temperature,
             "success": success,
             "comment_success": "Whether the score exceeds the pass threshold.",
             "final_reason": summary_reason,

eval_lib/metrics/contextual_relevancy_metric/contextual_relevancy.py CHANGED Viewed

@@ -155,6 +155,7 @@ class ContextualRelevancyMetric(MetricPattern):
             "final_score": score,
             "comment_final_score": "Weighted support score from context.",
             "threshold": self.threshold,
+            "temperature": self.temperature,
             "success": success,
             "comment_success": "Whether the score exceeds the threshold.",
             "final_reason": summary,

eval_lib/metrics/custom_metric/custom_eval.py CHANGED Viewed

@@ -312,6 +312,7 @@ JSON:"""
             "final_score": final_score,
             "comment_final_score": f"Weighted average of verdict scores calculated using softmax aggregation (temperature={self.temperature}).",
             "threshold": self.threshold,
+            "temperature": self.temperature,
             "success": success,
             "comment_success": "Whether the final score meets the required threshold.",
             "summary": summary,

{eval_ai_library-0.3.0.dist-info → eval_ai_library-0.3.2.dist-info}/WHEEL RENAMED Viewed

File without changes

{eval_ai_library-0.3.0.dist-info → eval_ai_library-0.3.2.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{eval_ai_library-0.3.0.dist-info → eval_ai_library-0.3.2.dist-info}/top_level.txt RENAMED Viewed

File without changes

eval-ai-library 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl

Potentially problematic release.

eval-ai-library 0.3.0py3-none-any.whl → 0.3.2py3-none-any.whl