eval-ai-library 0.3.10__tar.gz → 0.3.12__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of eval-ai-library might be problematic. Click here for more details.
- {eval_ai_library-0.3.10/eval_ai_library.egg-info → eval_ai_library-0.3.12}/PKG-INFO +1 -1
- {eval_ai_library-0.3.10 → eval_ai_library-0.3.12/eval_ai_library.egg-info}/PKG-INFO +1 -1
- {eval_ai_library-0.3.10 → eval_ai_library-0.3.12}/eval_lib/__init__.py +1 -1
- {eval_ai_library-0.3.10 → eval_ai_library-0.3.12}/eval_lib/cli.py +20 -3
- {eval_ai_library-0.3.10 → eval_ai_library-0.3.12}/eval_lib/dashboard_server.py +1 -1
- eval_ai_library-0.3.12/eval_lib/html.py +49 -0
- {eval_ai_library-0.3.10 → eval_ai_library-0.3.12}/eval_lib/llm_client.py +32 -4
- {eval_ai_library-0.3.10 → eval_ai_library-0.3.12}/pyproject.toml +1 -1
- eval_ai_library-0.3.10/eval_lib/html.py +0 -736
- {eval_ai_library-0.3.10 → eval_ai_library-0.3.12}/LICENSE +0 -0
- {eval_ai_library-0.3.10 → eval_ai_library-0.3.12}/MANIFEST.in +0 -0
- {eval_ai_library-0.3.10 → eval_ai_library-0.3.12}/README.md +0 -0
- {eval_ai_library-0.3.10 → eval_ai_library-0.3.12}/eval_ai_library.egg-info/SOURCES.txt +0 -0
- {eval_ai_library-0.3.10 → eval_ai_library-0.3.12}/eval_ai_library.egg-info/dependency_links.txt +0 -0
- {eval_ai_library-0.3.10 → eval_ai_library-0.3.12}/eval_ai_library.egg-info/entry_points.txt +0 -0
- {eval_ai_library-0.3.10 → eval_ai_library-0.3.12}/eval_ai_library.egg-info/requires.txt +0 -0
- {eval_ai_library-0.3.10 → eval_ai_library-0.3.12}/eval_ai_library.egg-info/top_level.txt +0 -0
- {eval_ai_library-0.3.10 → eval_ai_library-0.3.12}/eval_lib/agent_metrics/__init__.py +0 -0
- {eval_ai_library-0.3.10 → eval_ai_library-0.3.12}/eval_lib/agent_metrics/knowledge_retention_metric/knowledge_retention.py +0 -0
- {eval_ai_library-0.3.10 → eval_ai_library-0.3.12}/eval_lib/agent_metrics/role_adherence_metric/role_adherence.py +0 -0
- {eval_ai_library-0.3.10 → eval_ai_library-0.3.12}/eval_lib/agent_metrics/task_success_metric/task_success_rate.py +0 -0
- {eval_ai_library-0.3.10 → eval_ai_library-0.3.12}/eval_lib/agent_metrics/tools_correctness_metric/tool_correctness.py +0 -0
- {eval_ai_library-0.3.10 → eval_ai_library-0.3.12}/eval_lib/datagenerator/datagenerator.py +0 -0
- {eval_ai_library-0.3.10 → eval_ai_library-0.3.12}/eval_lib/datagenerator/document_loader.py +0 -0
- {eval_ai_library-0.3.10 → eval_ai_library-0.3.12}/eval_lib/datagenerator/prompts.py +0 -0
- {eval_ai_library-0.3.10 → eval_ai_library-0.3.12}/eval_lib/evaluate.py +0 -0
- {eval_ai_library-0.3.10 → eval_ai_library-0.3.12}/eval_lib/evaluation_schema.py +0 -0
- {eval_ai_library-0.3.10 → eval_ai_library-0.3.12}/eval_lib/metric_pattern.py +0 -0
- {eval_ai_library-0.3.10 → eval_ai_library-0.3.12}/eval_lib/metrics/__init__.py +0 -0
- {eval_ai_library-0.3.10 → eval_ai_library-0.3.12}/eval_lib/metrics/answer_precision_metric/answer_precision.py +0 -0
- {eval_ai_library-0.3.10 → eval_ai_library-0.3.12}/eval_lib/metrics/answer_relevancy_metric/answer_relevancy.py +0 -0
- {eval_ai_library-0.3.10 → eval_ai_library-0.3.12}/eval_lib/metrics/bias_metric/bias.py +0 -0
- {eval_ai_library-0.3.10 → eval_ai_library-0.3.12}/eval_lib/metrics/contextual_precision_metric/contextual_precision.py +0 -0
- {eval_ai_library-0.3.10 → eval_ai_library-0.3.12}/eval_lib/metrics/contextual_recall_metric/contextual_recall.py +0 -0
- {eval_ai_library-0.3.10 → eval_ai_library-0.3.12}/eval_lib/metrics/contextual_relevancy_metric/contextual_relevancy.py +0 -0
- {eval_ai_library-0.3.10 → eval_ai_library-0.3.12}/eval_lib/metrics/custom_metric/custom_eval.py +0 -0
- {eval_ai_library-0.3.10 → eval_ai_library-0.3.12}/eval_lib/metrics/faithfulness_metric/faithfulness.py +0 -0
- {eval_ai_library-0.3.10 → eval_ai_library-0.3.12}/eval_lib/metrics/geval/geval.py +0 -0
- {eval_ai_library-0.3.10 → eval_ai_library-0.3.12}/eval_lib/metrics/restricted_refusal_metric/restricted_refusal.py +0 -0
- {eval_ai_library-0.3.10 → eval_ai_library-0.3.12}/eval_lib/metrics/toxicity_metric/toxicity.py +0 -0
- {eval_ai_library-0.3.10 → eval_ai_library-0.3.12}/eval_lib/price.py +0 -0
- {eval_ai_library-0.3.10 → eval_ai_library-0.3.12}/eval_lib/py.typed +0 -0
- {eval_ai_library-0.3.10 → eval_ai_library-0.3.12}/eval_lib/testcases_schema.py +0 -0
- {eval_ai_library-0.3.10 → eval_ai_library-0.3.12}/eval_lib/utils.py +0 -0
- {eval_ai_library-0.3.10 → eval_ai_library-0.3.12}/setup.cfg +0 -0
- {eval_ai_library-0.3.10 → eval_ai_library-0.3.12}/setup.py +0 -0
|
@@ -7,7 +7,7 @@ A powerful library for evaluating AI models with support for multiple LLM provid
|
|
|
7
7
|
and a wide range of evaluation metrics for RAG systems and AI agents.
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
|
-
__version__ = "0.3.
|
|
10
|
+
__version__ = "0.3.12"
|
|
11
11
|
__author__ = "Aleksandr Meshkov"
|
|
12
12
|
|
|
13
13
|
# Core evaluation functions
|
|
@@ -6,6 +6,8 @@ Command-line interface for Eval AI Library
|
|
|
6
6
|
import argparse
|
|
7
7
|
import sys
|
|
8
8
|
from pathlib import Path
|
|
9
|
+
import os
|
|
10
|
+
import json
|
|
9
11
|
|
|
10
12
|
|
|
11
13
|
def run_dashboard():
|
|
@@ -70,8 +72,11 @@ def run_dashboard():
|
|
|
70
72
|
print(f" Press Ctrl+C to stop\n")
|
|
71
73
|
print("="*70 + "\n")
|
|
72
74
|
|
|
73
|
-
|
|
75
|
+
static_folder = os.path.join(os.path.dirname(__file__), 'static')
|
|
76
|
+
|
|
77
|
+
app = Flask(__name__, static_folder=static_folder)
|
|
74
78
|
app.config['WTF_CSRF_ENABLED'] = False
|
|
79
|
+
app.config['JSON_SORT_KEYS'] = False
|
|
75
80
|
|
|
76
81
|
@app.route('/')
|
|
77
82
|
def index():
|
|
@@ -93,7 +98,13 @@ def run_dashboard():
|
|
|
93
98
|
cache = get_fresh_cache()
|
|
94
99
|
latest = cache.get_latest()
|
|
95
100
|
if latest:
|
|
96
|
-
|
|
101
|
+
json_str = json.dumps(latest, ensure_ascii=False, sort_keys=False)
|
|
102
|
+
from flask import Response
|
|
103
|
+
return Response(
|
|
104
|
+
json_str,
|
|
105
|
+
mimetype='application/json',
|
|
106
|
+
headers={'Content-Type': 'application/json; charset=utf-8'}
|
|
107
|
+
)
|
|
97
108
|
return jsonify({'error': 'No results available'}), 404
|
|
98
109
|
|
|
99
110
|
@app.route('/api/sessions')
|
|
@@ -114,7 +125,13 @@ def run_dashboard():
|
|
|
114
125
|
cache = get_fresh_cache()
|
|
115
126
|
session = cache.get_by_session(session_id)
|
|
116
127
|
if session:
|
|
117
|
-
|
|
128
|
+
json_str = json.dumps(session, ensure_ascii=False, sort_keys=False)
|
|
129
|
+
from flask import Response
|
|
130
|
+
return Response(
|
|
131
|
+
json_str,
|
|
132
|
+
mimetype='application/json',
|
|
133
|
+
headers={'Content-Type': 'application/json; charset=utf-8'}
|
|
134
|
+
)
|
|
118
135
|
return jsonify({'error': 'Session not found'}), 404
|
|
119
136
|
|
|
120
137
|
@app.route('/api/clear')
|
|
@@ -31,7 +31,7 @@ class DashboardCache:
|
|
|
31
31
|
try:
|
|
32
32
|
with open(self.cache_file, 'w', encoding='utf-8') as f:
|
|
33
33
|
json.dump(self.results_history, f,
|
|
34
|
-
indent=2, ensure_ascii=False)
|
|
34
|
+
indent=2, ensure_ascii=False, sort_keys=False)
|
|
35
35
|
except Exception as e:
|
|
36
36
|
print(f"Warning: Could not save cache: {e}")
|
|
37
37
|
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
HTML_TEMPLATE = """
|
|
2
|
+
<!DOCTYPE html>
|
|
3
|
+
<html lang="en">
|
|
4
|
+
<head>
|
|
5
|
+
<meta charset="UTF-8">
|
|
6
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
7
|
+
<title>Eval AI Library - Dashboard</title>
|
|
8
|
+
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
|
|
9
|
+
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
|
|
10
|
+
<link rel="stylesheet" href="{{ url_for('static', filename='dashboard.css') }}">
|
|
11
|
+
</head>
|
|
12
|
+
<body>
|
|
13
|
+
<div class="container">
|
|
14
|
+
<header>
|
|
15
|
+
<div>
|
|
16
|
+
<h1>Eval AI Library Dashboard</h1>
|
|
17
|
+
<div class="timestamp" id="timestamp">Loading...</div>
|
|
18
|
+
</div>
|
|
19
|
+
<div class="controls">
|
|
20
|
+
<select id="sessionSelect" onchange="loadSession()">
|
|
21
|
+
<option value="">Loading sessions...</option>
|
|
22
|
+
</select>
|
|
23
|
+
<button onclick="refreshData()">Refresh</button>
|
|
24
|
+
<button class="primary" onclick="clearCache()">Clear Cache</button>
|
|
25
|
+
</div>
|
|
26
|
+
</header>
|
|
27
|
+
|
|
28
|
+
<div id="content" class="loading">
|
|
29
|
+
Loading data...
|
|
30
|
+
</div>
|
|
31
|
+
</div>
|
|
32
|
+
|
|
33
|
+
<!-- Modal for detailed information -->
|
|
34
|
+
<div id="detailsModal" class="modal">
|
|
35
|
+
<div class="modal-content">
|
|
36
|
+
<div class="modal-header">
|
|
37
|
+
<div class="test-status">
|
|
38
|
+
<h2 id="modalTitle">Test Details</h2>
|
|
39
|
+
</div>
|
|
40
|
+
<span class="close" onclick="closeModal()">×</span>
|
|
41
|
+
</div>
|
|
42
|
+
<div class="modal-body" id="modalBody"></div>
|
|
43
|
+
</div>
|
|
44
|
+
</div>
|
|
45
|
+
|
|
46
|
+
<script src="{{ url_for('static', filename='dashboard.js') }}"></script>
|
|
47
|
+
</body>
|
|
48
|
+
</html>
|
|
49
|
+
"""
|
|
@@ -47,6 +47,29 @@ class CustomLLMClient(ABC):
|
|
|
47
47
|
"""
|
|
48
48
|
pass
|
|
49
49
|
|
|
50
|
+
async def get_embeddings(
|
|
51
|
+
self,
|
|
52
|
+
texts: list[str],
|
|
53
|
+
model: str = "text-embedding-3-small"
|
|
54
|
+
) -> tuple[list[list[float]], Optional[float]]:
|
|
55
|
+
"""
|
|
56
|
+
Get embeddings for texts (optional implementation).
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
texts: List of texts to embed
|
|
60
|
+
model: Embedding model name
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
Tuple of (embeddings_list, cost_in_usd)
|
|
64
|
+
|
|
65
|
+
Raises:
|
|
66
|
+
NotImplementedError: If custom client doesn't support embeddings
|
|
67
|
+
"""
|
|
68
|
+
raise NotImplementedError(
|
|
69
|
+
f"{self.__class__.__name__} does not support embeddings. "
|
|
70
|
+
"Implement get_embeddings() method or use OpenAI for embeddings."
|
|
71
|
+
)
|
|
72
|
+
|
|
50
73
|
@abstractmethod
|
|
51
74
|
def get_model_name(self) -> str:
|
|
52
75
|
"""Return the model name for logging/tracking purposes."""
|
|
@@ -405,14 +428,14 @@ def _calculate_cost(llm: LLMDescriptor, usage) -> Optional[float]:
|
|
|
405
428
|
|
|
406
429
|
|
|
407
430
|
async def get_embeddings(
|
|
408
|
-
model: str | tuple[str, str] | LLMDescriptor,
|
|
431
|
+
model: str | tuple[str, str] | LLMDescriptor | CustomLLMClient,
|
|
409
432
|
texts: list[str],
|
|
410
433
|
) -> tuple[list[list[float]], Optional[float]]:
|
|
411
434
|
"""
|
|
412
|
-
Get embeddings for a list of texts
|
|
435
|
+
Get embeddings for a list of texts.
|
|
413
436
|
|
|
414
437
|
Args:
|
|
415
|
-
model: Model specification
|
|
438
|
+
model: Model specification or CustomLLMClient instance
|
|
416
439
|
texts: List of texts to embed
|
|
417
440
|
|
|
418
441
|
Returns:
|
|
@@ -420,8 +443,13 @@ async def get_embeddings(
|
|
|
420
443
|
|
|
421
444
|
Raises:
|
|
422
445
|
LLMConfigurationError: If required API keys are missing
|
|
423
|
-
ValueError: If
|
|
446
|
+
ValueError: If provider doesn't support embeddings
|
|
447
|
+
NotImplementedError: If CustomLLMClient doesn't implement get_embeddings
|
|
424
448
|
"""
|
|
449
|
+
# Handle custom LLM clients
|
|
450
|
+
if isinstance(model, CustomLLMClient):
|
|
451
|
+
return await model.get_embeddings(texts)
|
|
452
|
+
|
|
425
453
|
llm = LLMDescriptor.parse(model)
|
|
426
454
|
|
|
427
455
|
if llm.provider != Provider.OPENAI:
|
|
@@ -1,736 +0,0 @@
|
|
|
1
|
-
HTML_TEMPLATE = """
|
|
2
|
-
<!DOCTYPE html>
|
|
3
|
-
<html lang="en">
|
|
4
|
-
<head>
|
|
5
|
-
<meta charset="UTF-8">
|
|
6
|
-
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
7
|
-
<title>Eval AI Library - Interactive Dashboard</title>
|
|
8
|
-
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
|
|
9
|
-
<style>
|
|
10
|
-
* {
|
|
11
|
-
margin: 0;
|
|
12
|
-
padding: 0;
|
|
13
|
-
box-sizing: border-box;
|
|
14
|
-
}
|
|
15
|
-
|
|
16
|
-
body {
|
|
17
|
-
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
|
|
18
|
-
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
|
19
|
-
padding: 20px;
|
|
20
|
-
min-height: 100vh;
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
.container {
|
|
24
|
-
max-width: 1400px;
|
|
25
|
-
margin: 0 auto;
|
|
26
|
-
background: white;
|
|
27
|
-
border-radius: 20px;
|
|
28
|
-
padding: 30px;
|
|
29
|
-
box-shadow: 0 20px 60px rgba(0,0,0,0.3);
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
header {
|
|
33
|
-
display: flex;
|
|
34
|
-
justify-content: space-between;
|
|
35
|
-
align-items: center;
|
|
36
|
-
margin-bottom: 40px;
|
|
37
|
-
padding-bottom: 20px;
|
|
38
|
-
border-bottom: 3px solid #667eea;
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
h1 {
|
|
42
|
-
color: #667eea;
|
|
43
|
-
font-size: 2.5em;
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
.controls {
|
|
47
|
-
display: flex;
|
|
48
|
-
gap: 10px;
|
|
49
|
-
align-items: center;
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
select, button {
|
|
53
|
-
padding: 10px 20px;
|
|
54
|
-
border-radius: 8px;
|
|
55
|
-
border: 2px solid #667eea;
|
|
56
|
-
background: white;
|
|
57
|
-
color: #667eea;
|
|
58
|
-
font-weight: 600;
|
|
59
|
-
cursor: pointer;
|
|
60
|
-
transition: all 0.3s;
|
|
61
|
-
}
|
|
62
|
-
|
|
63
|
-
button:hover {
|
|
64
|
-
background: #667eea;
|
|
65
|
-
color: white;
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
.timestamp {
|
|
69
|
-
color: #666;
|
|
70
|
-
font-size: 0.9em;
|
|
71
|
-
margin-left: 20px;
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
.summary {
|
|
75
|
-
display: grid;
|
|
76
|
-
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
|
|
77
|
-
gap: 20px;
|
|
78
|
-
margin-bottom: 40px;
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
.summary-card {
|
|
82
|
-
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
|
83
|
-
color: white;
|
|
84
|
-
padding: 25px;
|
|
85
|
-
border-radius: 15px;
|
|
86
|
-
text-align: center;
|
|
87
|
-
box-shadow: 0 5px 15px rgba(0,0,0,0.2);
|
|
88
|
-
transition: transform 0.3s;
|
|
89
|
-
}
|
|
90
|
-
|
|
91
|
-
.summary-card:hover {
|
|
92
|
-
transform: translateY(-5px);
|
|
93
|
-
}
|
|
94
|
-
|
|
95
|
-
.summary-card h3 {
|
|
96
|
-
font-size: 0.9em;
|
|
97
|
-
margin-bottom: 10px;
|
|
98
|
-
opacity: 0.9;
|
|
99
|
-
}
|
|
100
|
-
|
|
101
|
-
.summary-card .value {
|
|
102
|
-
font-size: 2em;
|
|
103
|
-
font-weight: bold;
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
.metrics-grid {
|
|
107
|
-
display: grid;
|
|
108
|
-
grid-template-columns: repeat(auto-fit, minmax(280px, 1fr));
|
|
109
|
-
gap: 20px;
|
|
110
|
-
margin-bottom: 40px;
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
.metric-card {
|
|
114
|
-
background: #f8f9fa;
|
|
115
|
-
border-radius: 15px;
|
|
116
|
-
padding: 20px;
|
|
117
|
-
box-shadow: 0 3px 10px rgba(0,0,0,0.1);
|
|
118
|
-
transition: transform 0.3s;
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
.metric-card:hover {
|
|
122
|
-
transform: translateY(-5px);
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
.metric-card h3 {
|
|
126
|
-
color: #667eea;
|
|
127
|
-
margin-bottom: 15px;
|
|
128
|
-
font-size: 1.1em;
|
|
129
|
-
}
|
|
130
|
-
|
|
131
|
-
.metric-score {
|
|
132
|
-
font-size: 2.5em;
|
|
133
|
-
font-weight: bold;
|
|
134
|
-
color: #764ba2;
|
|
135
|
-
margin-bottom: 15px;
|
|
136
|
-
}
|
|
137
|
-
|
|
138
|
-
.metric-details p {
|
|
139
|
-
margin: 8px 0;
|
|
140
|
-
color: #555;
|
|
141
|
-
font-size: 0.9em;
|
|
142
|
-
}
|
|
143
|
-
|
|
144
|
-
.charts {
|
|
145
|
-
display: grid;
|
|
146
|
-
grid-template-columns: repeat(auto-fit, minmax(400px, 1fr));
|
|
147
|
-
gap: 30px;
|
|
148
|
-
margin-bottom: 40px;
|
|
149
|
-
}
|
|
150
|
-
|
|
151
|
-
.chart-container {
|
|
152
|
-
background: #f8f9fa;
|
|
153
|
-
border-radius: 15px;
|
|
154
|
-
padding: 20px;
|
|
155
|
-
box-shadow: 0 3px 10px rgba(0,0,0,0.1);
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
.chart-container h2 {
|
|
159
|
-
color: #667eea;
|
|
160
|
-
margin-bottom: 20px;
|
|
161
|
-
font-size: 1.3em;
|
|
162
|
-
}
|
|
163
|
-
|
|
164
|
-
table {
|
|
165
|
-
width: 100%;
|
|
166
|
-
border-collapse: collapse;
|
|
167
|
-
background: white;
|
|
168
|
-
border-radius: 10px;
|
|
169
|
-
overflow: hidden;
|
|
170
|
-
box-shadow: 0 3px 10px rgba(0,0,0,0.1);
|
|
171
|
-
}
|
|
172
|
-
|
|
173
|
-
th {
|
|
174
|
-
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
|
175
|
-
color: white;
|
|
176
|
-
padding: 15px;
|
|
177
|
-
text-align: left;
|
|
178
|
-
font-weight: 600;
|
|
179
|
-
cursor: pointer;
|
|
180
|
-
user-select: none;
|
|
181
|
-
}
|
|
182
|
-
|
|
183
|
-
th:hover {
|
|
184
|
-
background: linear-gradient(135deg, #5568d3 0%, #653a8b 100%);
|
|
185
|
-
}
|
|
186
|
-
|
|
187
|
-
td {
|
|
188
|
-
padding: 12px 15px;
|
|
189
|
-
border-bottom: 1px solid #eee;
|
|
190
|
-
font-size: 0.9em;
|
|
191
|
-
}
|
|
192
|
-
|
|
193
|
-
tr.success {
|
|
194
|
-
background: #f0fdf4;
|
|
195
|
-
}
|
|
196
|
-
|
|
197
|
-
tr.failed {
|
|
198
|
-
background: #fef2f2;
|
|
199
|
-
}
|
|
200
|
-
|
|
201
|
-
tr:hover {
|
|
202
|
-
background: #f8f9fa !important;
|
|
203
|
-
}
|
|
204
|
-
|
|
205
|
-
.reason {
|
|
206
|
-
max-width: 300px;
|
|
207
|
-
color: #666;
|
|
208
|
-
}
|
|
209
|
-
|
|
210
|
-
.view-details-btn {
|
|
211
|
-
background: #667eea;
|
|
212
|
-
color: white;
|
|
213
|
-
border: none;
|
|
214
|
-
padding: 5px 12px;
|
|
215
|
-
border-radius: 5px;
|
|
216
|
-
cursor: pointer;
|
|
217
|
-
font-size: 0.85em;
|
|
218
|
-
transition: all 0.3s;
|
|
219
|
-
}
|
|
220
|
-
|
|
221
|
-
.view-details-btn:hover {
|
|
222
|
-
background: #5568d3;
|
|
223
|
-
transform: scale(1.05);
|
|
224
|
-
}
|
|
225
|
-
|
|
226
|
-
/* Modal styles */
|
|
227
|
-
.modal {
|
|
228
|
-
display: none;
|
|
229
|
-
position: fixed;
|
|
230
|
-
z-index: 1000;
|
|
231
|
-
left: 0;
|
|
232
|
-
top: 0;
|
|
233
|
-
width: 100%;
|
|
234
|
-
height: 100%;
|
|
235
|
-
overflow: auto;
|
|
236
|
-
background-color: rgba(0,0,0,0.7);
|
|
237
|
-
animation: fadeIn 0.3s;
|
|
238
|
-
}
|
|
239
|
-
|
|
240
|
-
@keyframes fadeIn {
|
|
241
|
-
from { opacity: 0; }
|
|
242
|
-
to { opacity: 1; }
|
|
243
|
-
}
|
|
244
|
-
|
|
245
|
-
.modal-content {
|
|
246
|
-
background-color: #fefefe;
|
|
247
|
-
margin: 2% auto;
|
|
248
|
-
padding: 30px;
|
|
249
|
-
border-radius: 15px;
|
|
250
|
-
width: 90%;
|
|
251
|
-
max-width: 900px;
|
|
252
|
-
max-height: 90vh;
|
|
253
|
-
overflow-y: auto;
|
|
254
|
-
box-shadow: 0 20px 60px rgba(0,0,0,0.3);
|
|
255
|
-
animation: slideIn 0.3s;
|
|
256
|
-
}
|
|
257
|
-
|
|
258
|
-
@keyframes slideIn {
|
|
259
|
-
from {
|
|
260
|
-
transform: translateY(-50px);
|
|
261
|
-
opacity: 0;
|
|
262
|
-
}
|
|
263
|
-
to {
|
|
264
|
-
transform: translateY(0);
|
|
265
|
-
opacity: 1;
|
|
266
|
-
}
|
|
267
|
-
}
|
|
268
|
-
|
|
269
|
-
.modal-header {
|
|
270
|
-
display: flex;
|
|
271
|
-
justify-content: space-between;
|
|
272
|
-
align-items: center;
|
|
273
|
-
margin-bottom: 20px;
|
|
274
|
-
padding-bottom: 15px;
|
|
275
|
-
border-bottom: 2px solid #667eea;
|
|
276
|
-
}
|
|
277
|
-
|
|
278
|
-
.modal-header h2 {
|
|
279
|
-
color: #667eea;
|
|
280
|
-
margin: 0;
|
|
281
|
-
}
|
|
282
|
-
|
|
283
|
-
.close {
|
|
284
|
-
color: #aaa;
|
|
285
|
-
font-size: 35px;
|
|
286
|
-
font-weight: bold;
|
|
287
|
-
cursor: pointer;
|
|
288
|
-
transition: color 0.3s;
|
|
289
|
-
}
|
|
290
|
-
|
|
291
|
-
.close:hover {
|
|
292
|
-
color: #667eea;
|
|
293
|
-
}
|
|
294
|
-
|
|
295
|
-
.detail-section {
|
|
296
|
-
margin: 20px 0;
|
|
297
|
-
padding: 15px;
|
|
298
|
-
background: #f8f9fa;
|
|
299
|
-
border-radius: 10px;
|
|
300
|
-
border-left: 4px solid #667eea;
|
|
301
|
-
}
|
|
302
|
-
|
|
303
|
-
.detail-section h3 {
|
|
304
|
-
color: #667eea;
|
|
305
|
-
margin-bottom: 10px;
|
|
306
|
-
font-size: 1.1em;
|
|
307
|
-
}
|
|
308
|
-
|
|
309
|
-
.detail-section pre {
|
|
310
|
-
background: white;
|
|
311
|
-
padding: 15px;
|
|
312
|
-
border-radius: 8px;
|
|
313
|
-
overflow-x: auto;
|
|
314
|
-
font-size: 0.85em;
|
|
315
|
-
line-height: 1.5;
|
|
316
|
-
}
|
|
317
|
-
|
|
318
|
-
.detail-section p {
|
|
319
|
-
margin: 8px 0;
|
|
320
|
-
color: #555;
|
|
321
|
-
line-height: 1.6;
|
|
322
|
-
}
|
|
323
|
-
|
|
324
|
-
.badge {
|
|
325
|
-
display: inline-block;
|
|
326
|
-
padding: 4px 10px;
|
|
327
|
-
border-radius: 12px;
|
|
328
|
-
font-size: 0.8em;
|
|
329
|
-
font-weight: 600;
|
|
330
|
-
margin-right: 8px;
|
|
331
|
-
}
|
|
332
|
-
|
|
333
|
-
.badge-success {
|
|
334
|
-
background: #d1fae5;
|
|
335
|
-
color: #065f46;
|
|
336
|
-
}
|
|
337
|
-
|
|
338
|
-
.badge-failed {
|
|
339
|
-
background: #fee2e2;
|
|
340
|
-
color: #991b1b;
|
|
341
|
-
}
|
|
342
|
-
|
|
343
|
-
.loading {
|
|
344
|
-
text-align: center;
|
|
345
|
-
padding: 40px;
|
|
346
|
-
color: #667eea;
|
|
347
|
-
font-size: 1.2em;
|
|
348
|
-
}
|
|
349
|
-
|
|
350
|
-
.no-data {
|
|
351
|
-
text-align: center;
|
|
352
|
-
padding: 60px;
|
|
353
|
-
color: #999;
|
|
354
|
-
}
|
|
355
|
-
|
|
356
|
-
.no-data h2 {
|
|
357
|
-
color: #667eea;
|
|
358
|
-
margin-bottom: 20px;
|
|
359
|
-
}
|
|
360
|
-
|
|
361
|
-
@media (max-width: 768px) {
|
|
362
|
-
.charts {
|
|
363
|
-
grid-template-columns: 1fr;
|
|
364
|
-
}
|
|
365
|
-
|
|
366
|
-
.metrics-grid {
|
|
367
|
-
grid-template-columns: 1fr;
|
|
368
|
-
}
|
|
369
|
-
|
|
370
|
-
header {
|
|
371
|
-
flex-direction: column;
|
|
372
|
-
gap: 15px;
|
|
373
|
-
}
|
|
374
|
-
|
|
375
|
-
.modal-content {
|
|
376
|
-
width: 95%;
|
|
377
|
-
margin: 5% auto;
|
|
378
|
-
padding: 20px;
|
|
379
|
-
}
|
|
380
|
-
}
|
|
381
|
-
</style>
|
|
382
|
-
</head>
|
|
383
|
-
<body>
|
|
384
|
-
<div class="container">
|
|
385
|
-
<header>
|
|
386
|
-
<div>
|
|
387
|
-
<h1>📊 Eval AI Library Dashboard</h1>
|
|
388
|
-
<span class="timestamp" id="timestamp">Loading...</span>
|
|
389
|
-
</div>
|
|
390
|
-
<div class="controls">
|
|
391
|
-
<select id="sessionSelect" onchange="loadSession()">
|
|
392
|
-
<option value="">Loading sessions...</option>
|
|
393
|
-
</select>
|
|
394
|
-
<button onclick="refreshData()">🔄 Refresh</button>
|
|
395
|
-
<button onclick="clearCache()">🗑️ Clear Cache</button>
|
|
396
|
-
</div>
|
|
397
|
-
</header>
|
|
398
|
-
|
|
399
|
-
<div id="content" class="loading">
|
|
400
|
-
Loading data...
|
|
401
|
-
</div>
|
|
402
|
-
</div>
|
|
403
|
-
|
|
404
|
-
<!-- Modal для детальной информации -->
|
|
405
|
-
<div id="detailsModal" class="modal">
|
|
406
|
-
<div class="modal-content">
|
|
407
|
-
<div class="modal-header">
|
|
408
|
-
<h2>📋 Evaluation Details</h2>
|
|
409
|
-
<span class="close" onclick="closeModal()">×</span>
|
|
410
|
-
</div>
|
|
411
|
-
<div id="modalBody"></div>
|
|
412
|
-
</div>
|
|
413
|
-
</div>
|
|
414
|
-
|
|
415
|
-
<script>
|
|
416
|
-
let currentData = null;
|
|
417
|
-
let scoresChart = null;
|
|
418
|
-
let successChart = null;
|
|
419
|
-
|
|
420
|
-
// Загрузить список сессий
|
|
421
|
-
async function loadSessions() {
|
|
422
|
-
try {
|
|
423
|
-
const response = await fetch('/api/sessions');
|
|
424
|
-
const sessions = await response.json();
|
|
425
|
-
|
|
426
|
-
const select = document.getElementById('sessionSelect');
|
|
427
|
-
select.innerHTML = '<option value="latest">Latest Results</option>';
|
|
428
|
-
|
|
429
|
-
sessions.reverse().forEach(session => {
|
|
430
|
-
const option = document.createElement('option');
|
|
431
|
-
option.value = session.session_id;
|
|
432
|
-
option.textContent = `${session.session_id} (${session.timestamp}) - ${session.total_tests} tests`;
|
|
433
|
-
select.appendChild(option);
|
|
434
|
-
});
|
|
435
|
-
} catch (error) {
|
|
436
|
-
console.error('Error loading sessions:', error);
|
|
437
|
-
}
|
|
438
|
-
}
|
|
439
|
-
|
|
440
|
-
// Загрузить данные сессии
|
|
441
|
-
async function loadSession() {
|
|
442
|
-
const select = document.getElementById('sessionSelect');
|
|
443
|
-
const sessionId = select.value;
|
|
444
|
-
|
|
445
|
-
try {
|
|
446
|
-
let url = '/api/latest';
|
|
447
|
-
if (sessionId && sessionId !== 'latest') {
|
|
448
|
-
url = `/api/session/${sessionId}`;
|
|
449
|
-
}
|
|
450
|
-
|
|
451
|
-
const response = await fetch(url);
|
|
452
|
-
if (!response.ok) {
|
|
453
|
-
showNoData();
|
|
454
|
-
return;
|
|
455
|
-
}
|
|
456
|
-
|
|
457
|
-
const session = await response.json();
|
|
458
|
-
currentData = session.data;
|
|
459
|
-
renderDashboard(session);
|
|
460
|
-
} catch (error) {
|
|
461
|
-
console.error('Error loading session:', error);
|
|
462
|
-
showNoData();
|
|
463
|
-
}
|
|
464
|
-
}
|
|
465
|
-
|
|
466
|
-
// Показать "нет данных"
|
|
467
|
-
function showNoData() {
|
|
468
|
-
document.getElementById('content').innerHTML = `
|
|
469
|
-
<div class="no-data">
|
|
470
|
-
<h2>No evaluation results available</h2>
|
|
471
|
-
<p>Run an evaluation with <code>show_dashboard=True</code> to see results here.</p>
|
|
472
|
-
</div>
|
|
473
|
-
`;
|
|
474
|
-
}
|
|
475
|
-
|
|
476
|
-
// Отрисовать дашборд
|
|
477
|
-
function renderDashboard(session) {
|
|
478
|
-
const data = session.data;
|
|
479
|
-
document.getElementById('timestamp').textContent = `Generated: ${session.timestamp}`;
|
|
480
|
-
|
|
481
|
-
const metricsLabels = Object.keys(data.metrics_summary);
|
|
482
|
-
const metricsScores = metricsLabels.map(m => data.metrics_summary[m].avg_score);
|
|
483
|
-
const metricsSuccessRates = metricsLabels.map(m => data.metrics_summary[m].success_rate);
|
|
484
|
-
|
|
485
|
-
let metricCards = '';
|
|
486
|
-
for (const [metricName, metricData] of Object.entries(data.metrics_summary)) {
|
|
487
|
-
metricCards += `
|
|
488
|
-
<div class="metric-card">
|
|
489
|
-
<h3>${metricName}</h3>
|
|
490
|
-
<div class="metric-score">${metricData.avg_score.toFixed(3)}</div>
|
|
491
|
-
<div class="metric-details">
|
|
492
|
-
<p>✅ Passed: ${metricData.passed}</p>
|
|
493
|
-
<p>❌ Failed: ${metricData.failed}</p>
|
|
494
|
-
<p>📊 Success Rate: ${metricData.success_rate.toFixed(1)}%</p>
|
|
495
|
-
<p>🎯 Threshold: ${metricData.threshold}</p>
|
|
496
|
-
<p>🤖 Model: ${metricData.model}</p>
|
|
497
|
-
<p>💰 Total Cost: $${metricData.total_cost.toFixed(6)}</p>
|
|
498
|
-
</div>
|
|
499
|
-
</div>
|
|
500
|
-
`;
|
|
501
|
-
}
|
|
502
|
-
|
|
503
|
-
let tableRows = '';
|
|
504
|
-
data.test_cases.forEach((testCase, tcIdx) => {
|
|
505
|
-
testCase.metrics.forEach((metric, mIdx) => {
|
|
506
|
-
const statusEmoji = metric.success ? '✅' : '❌';
|
|
507
|
-
const statusClass = metric.success ? 'success' : 'failed';
|
|
508
|
-
|
|
509
|
-
tableRows += `
|
|
510
|
-
<tr class="${statusClass}">
|
|
511
|
-
<td>${testCase.test_index}</td>
|
|
512
|
-
<td>${testCase.input}</td>
|
|
513
|
-
<td>${metric.name}</td>
|
|
514
|
-
<td>${metric.score.toFixed(3)}</td>
|
|
515
|
-
<td>${metric.threshold}</td>
|
|
516
|
-
<td>${statusEmoji}</td>
|
|
517
|
-
<td>${metric.evaluation_model}</td>
|
|
518
|
-
<td>$${(metric.evaluation_cost || 0).toFixed(6)}</td>
|
|
519
|
-
<td>
|
|
520
|
-
<button class="view-details-btn" onclick="showDetails(${tcIdx}, ${mIdx})">
|
|
521
|
-
View Details
|
|
522
|
-
</button>
|
|
523
|
-
</td>
|
|
524
|
-
</tr>
|
|
525
|
-
`;
|
|
526
|
-
});
|
|
527
|
-
});
|
|
528
|
-
|
|
529
|
-
document.getElementById('content').innerHTML = `
|
|
530
|
-
<div class="summary">
|
|
531
|
-
<div class="summary-card">
|
|
532
|
-
<h3>Total Tests</h3>
|
|
533
|
-
<div class="value">${data.total_tests}</div>
|
|
534
|
-
</div>
|
|
535
|
-
<div class="summary-card">
|
|
536
|
-
<h3>Total Cost</h3>
|
|
537
|
-
<div class="value">$${data.total_cost.toFixed(6)}</div>
|
|
538
|
-
</div>
|
|
539
|
-
<div class="summary-card">
|
|
540
|
-
<h3>Metrics</h3>
|
|
541
|
-
<div class="value">${metricsLabels.length}</div>
|
|
542
|
-
</div>
|
|
543
|
-
</div>
|
|
544
|
-
|
|
545
|
-
<h2 style="color: #667eea; margin-bottom: 20px;">📈 Metrics Summary</h2>
|
|
546
|
-
<div class="metrics-grid">
|
|
547
|
-
${metricCards}
|
|
548
|
-
</div>
|
|
549
|
-
|
|
550
|
-
<h2 style="color: #667eea; margin-bottom: 20px;">📊 Charts</h2>
|
|
551
|
-
<div class="charts">
|
|
552
|
-
<div class="chart-container">
|
|
553
|
-
<h2>Average Scores by Metric</h2>
|
|
554
|
-
<canvas id="scoresChart"></canvas>
|
|
555
|
-
</div>
|
|
556
|
-
<div class="chart-container">
|
|
557
|
-
<h2>Success Rate by Metric</h2>
|
|
558
|
-
<canvas id="successChart"></canvas>
|
|
559
|
-
</div>
|
|
560
|
-
</div>
|
|
561
|
-
|
|
562
|
-
<h2 style="color: #667eea; margin: 40px 0 20px 0;">📋 Detailed Results</h2>
|
|
563
|
-
<table>
|
|
564
|
-
<thead>
|
|
565
|
-
<tr>
|
|
566
|
-
<th>Test #</th>
|
|
567
|
-
<th>Input</th>
|
|
568
|
-
<th>Metric</th>
|
|
569
|
-
<th>Score</th>
|
|
570
|
-
<th>Threshold</th>
|
|
571
|
-
<th>Status</th>
|
|
572
|
-
<th>Model</th>
|
|
573
|
-
<th>Cost</th>
|
|
574
|
-
<th>Actions</th>
|
|
575
|
-
</tr>
|
|
576
|
-
</thead>
|
|
577
|
-
<tbody>
|
|
578
|
-
${tableRows}
|
|
579
|
-
</tbody>
|
|
580
|
-
</table>
|
|
581
|
-
`;
|
|
582
|
-
|
|
583
|
-
renderCharts(metricsLabels, metricsScores, metricsSuccessRates);
|
|
584
|
-
}
|
|
585
|
-
|
|
586
|
-
// Показать детали в модальном окне
|
|
587
|
-
function showDetails(testCaseIdx, metricIdx) {
|
|
588
|
-
const testCase = currentData.test_cases[testCaseIdx];
|
|
589
|
-
const metric = testCase.metrics[metricIdx];
|
|
590
|
-
|
|
591
|
-
const statusBadge = metric.success
|
|
592
|
-
? '<span class="badge badge-success">✅ PASSED</span>'
|
|
593
|
-
: '<span class="badge badge-failed">❌ FAILED</span>';
|
|
594
|
-
|
|
595
|
-
let modalContent = `
|
|
596
|
-
<div class="detail-section">
|
|
597
|
-
<h3>Test Case #${testCase.test_index}</h3>
|
|
598
|
-
<p><strong>Input:</strong> ${testCase.input_full}</p>
|
|
599
|
-
<p><strong>Actual Output:</strong> ${testCase.actual_output_full || 'N/A'}</p>
|
|
600
|
-
<p><strong>Expected Output:</strong> ${testCase.expected_output_full || 'N/A'}</p>
|
|
601
|
-
</div>
|
|
602
|
-
|
|
603
|
-
<div class="detail-section">
|
|
604
|
-
<h3>Metric: ${metric.name}</h3>
|
|
605
|
-
${statusBadge}
|
|
606
|
-
<p><strong>Score:</strong> ${metric.score.toFixed(3)} / ${metric.threshold}</p>
|
|
607
|
-
<p><strong>Model:</strong> ${metric.evaluation_model}</p>
|
|
608
|
-
<p><strong>Cost:</strong> $${(metric.evaluation_cost || 0).toFixed(6)}</p>
|
|
609
|
-
</div>
|
|
610
|
-
|
|
611
|
-
<div class="detail-section">
|
|
612
|
-
<h3>Reason</h3>
|
|
613
|
-
<p>${metric.reason_full || metric.reason}</p>
|
|
614
|
-
</div>
|
|
615
|
-
`;
|
|
616
|
-
|
|
617
|
-
// Добавляем retrieval context если есть
|
|
618
|
-
if (testCase.retrieval_context && testCase.retrieval_context.length > 0) {
|
|
619
|
-
modalContent += `
|
|
620
|
-
<div class="detail-section">
|
|
621
|
-
<h3>Retrieval Context (${testCase.retrieval_context.length} chunks)</h3>
|
|
622
|
-
${testCase.retrieval_context.map((ctx, idx) => `
|
|
623
|
-
<p><strong>Chunk ${idx + 1}:</strong></p>
|
|
624
|
-
<p style="margin-left: 20px; color: #666;">${ctx.substring(0, 300)}${ctx.length > 300 ? '...' : ''}</p>
|
|
625
|
-
`).join('')}
|
|
626
|
-
</div>
|
|
627
|
-
`;
|
|
628
|
-
}
|
|
629
|
-
|
|
630
|
-
// Добавляем evaluation log если есть
|
|
631
|
-
if (metric.evaluation_log) {
|
|
632
|
-
modalContent += `
|
|
633
|
-
<div class="detail-section">
|
|
634
|
-
<h3>Evaluation Log</h3>
|
|
635
|
-
<pre>${JSON.stringify(metric.evaluation_log, null, 2)}</pre>
|
|
636
|
-
</div>
|
|
637
|
-
`;
|
|
638
|
-
}
|
|
639
|
-
|
|
640
|
-
document.getElementById('modalBody').innerHTML = modalContent;
|
|
641
|
-
document.getElementById('detailsModal').style.display = 'block';
|
|
642
|
-
}
|
|
643
|
-
|
|
644
|
-
// Закрыть модальное окно
|
|
645
|
-
function closeModal() {
|
|
646
|
-
document.getElementById('detailsModal').style.display = 'none';
|
|
647
|
-
}
|
|
648
|
-
|
|
649
|
-
// Закрытие по клику вне модального окна
|
|
650
|
-
window.onclick = function(event) {
|
|
651
|
-
const modal = document.getElementById('detailsModal');
|
|
652
|
-
if (event.target == modal) {
|
|
653
|
-
closeModal();
|
|
654
|
-
}
|
|
655
|
-
}
|
|
656
|
-
|
|
657
|
-
// Отрисовать графики
|
|
658
|
-
function renderCharts(labels, scores, successRates) {
|
|
659
|
-
if (scoresChart) scoresChart.destroy();
|
|
660
|
-
if (successChart) successChart.destroy();
|
|
661
|
-
|
|
662
|
-
const scoresCtx = document.getElementById('scoresChart').getContext('2d');
|
|
663
|
-
scoresChart = new Chart(scoresCtx, {
|
|
664
|
-
type: 'bar',
|
|
665
|
-
data: {
|
|
666
|
-
labels: labels,
|
|
667
|
-
datasets: [{
|
|
668
|
-
label: 'Average Score',
|
|
669
|
-
data: scores,
|
|
670
|
-
backgroundColor: 'rgba(102, 126, 234, 0.8)',
|
|
671
|
-
borderColor: 'rgba(102, 126, 234, 1)',
|
|
672
|
-
borderWidth: 2
|
|
673
|
-
}]
|
|
674
|
-
},
|
|
675
|
-
options: {
|
|
676
|
-
responsive: true,
|
|
677
|
-
scales: {
|
|
678
|
-
y: {
|
|
679
|
-
beginAtZero: true,
|
|
680
|
-
max: 1.0
|
|
681
|
-
}
|
|
682
|
-
}
|
|
683
|
-
}
|
|
684
|
-
});
|
|
685
|
-
|
|
686
|
-
const successCtx = document.getElementById('successChart').getContext('2d');
|
|
687
|
-
successChart = new Chart(successCtx, {
|
|
688
|
-
type: 'doughnut',
|
|
689
|
-
data: {
|
|
690
|
-
labels: labels,
|
|
691
|
-
datasets: [{
|
|
692
|
-
label: 'Success Rate (%)',
|
|
693
|
-
data: successRates,
|
|
694
|
-
backgroundColor: [
|
|
695
|
-
'rgba(102, 126, 234, 0.8)',
|
|
696
|
-
'rgba(118, 75, 162, 0.8)',
|
|
697
|
-
'rgba(237, 100, 166, 0.8)',
|
|
698
|
-
'rgba(255, 154, 158, 0.8)',
|
|
699
|
-
'rgba(250, 208, 196, 0.8)'
|
|
700
|
-
],
|
|
701
|
-
borderWidth: 2
|
|
702
|
-
}]
|
|
703
|
-
},
|
|
704
|
-
options: {
|
|
705
|
-
responsive: true
|
|
706
|
-
}
|
|
707
|
-
});
|
|
708
|
-
}
|
|
709
|
-
|
|
710
|
-
// Обновить данные
|
|
711
|
-
function refreshData() {
|
|
712
|
-
loadSessions();
|
|
713
|
-
loadSession();
|
|
714
|
-
}
|
|
715
|
-
|
|
716
|
-
// Очистить кеш
|
|
717
|
-
async function clearCache() {
|
|
718
|
-
if (confirm('Are you sure you want to clear all cached results?')) {
|
|
719
|
-
try {
|
|
720
|
-
await fetch('/api/clear');
|
|
721
|
-
alert('Cache cleared!');
|
|
722
|
-
refreshData();
|
|
723
|
-
} catch (error) {
|
|
724
|
-
console.error('Error clearing cache:', error);
|
|
725
|
-
alert('Error clearing cache');
|
|
726
|
-
}
|
|
727
|
-
}
|
|
728
|
-
}
|
|
729
|
-
|
|
730
|
-
// Инициализация
|
|
731
|
-
loadSessions();
|
|
732
|
-
loadSession();
|
|
733
|
-
</script>
|
|
734
|
-
</body>
|
|
735
|
-
</html>
|
|
736
|
-
"""
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_ai_library-0.3.10 → eval_ai_library-0.3.12}/eval_ai_library.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_ai_library-0.3.10 → eval_ai_library-0.3.12}/eval_lib/metrics/custom_metric/custom_eval.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_ai_library-0.3.10 → eval_ai_library-0.3.12}/eval_lib/metrics/toxicity_metric/toxicity.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|