eval-ai-library 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of eval-ai-library might be problematic. Click here for more details.

Files changed (34) hide show
  1. eval_ai_library-0.1.0.dist-info/METADATA +753 -0
  2. eval_ai_library-0.1.0.dist-info/RECORD +34 -0
  3. eval_ai_library-0.1.0.dist-info/WHEEL +5 -0
  4. eval_ai_library-0.1.0.dist-info/licenses/LICENSE +21 -0
  5. eval_ai_library-0.1.0.dist-info/top_level.txt +1 -0
  6. eval_lib/__init__.py +122 -0
  7. eval_lib/agent_metrics/__init__.py +12 -0
  8. eval_lib/agent_metrics/knowledge_retention_metric/knowledge_retention.py +231 -0
  9. eval_lib/agent_metrics/role_adherence_metric/role_adherence.py +251 -0
  10. eval_lib/agent_metrics/task_success_metric/task_success_rate.py +347 -0
  11. eval_lib/agent_metrics/tools_correctness_metric/tool_correctness.py +106 -0
  12. eval_lib/datagenerator/datagenerator.py +230 -0
  13. eval_lib/datagenerator/document_loader.py +510 -0
  14. eval_lib/datagenerator/prompts.py +192 -0
  15. eval_lib/evaluate.py +335 -0
  16. eval_lib/evaluation_schema.py +63 -0
  17. eval_lib/llm_client.py +286 -0
  18. eval_lib/metric_pattern.py +229 -0
  19. eval_lib/metrics/__init__.py +25 -0
  20. eval_lib/metrics/answer_precision_metric/answer_precision.py +405 -0
  21. eval_lib/metrics/answer_relevancy_metric/answer_relevancy.py +195 -0
  22. eval_lib/metrics/bias_metric/bias.py +114 -0
  23. eval_lib/metrics/contextual_precision_metric/contextual_precision.py +102 -0
  24. eval_lib/metrics/contextual_recall_metric/contextual_recall.py +91 -0
  25. eval_lib/metrics/contextual_relevancy_metric/contextual_relevancy.py +169 -0
  26. eval_lib/metrics/custom_metric/custom_eval.py +303 -0
  27. eval_lib/metrics/faithfulness_metric/faithfulness.py +140 -0
  28. eval_lib/metrics/geval/geval.py +326 -0
  29. eval_lib/metrics/restricted_refusal_metric/restricted_refusal.py +102 -0
  30. eval_lib/metrics/toxicity_metric/toxicity.py +113 -0
  31. eval_lib/price.py +37 -0
  32. eval_lib/py.typed +1 -0
  33. eval_lib/testcases_schema.py +27 -0
  34. eval_lib/utils.py +99 -0
@@ -0,0 +1,510 @@
1
+ # document_loader.py
2
+ from __future__ import annotations
3
+ from pathlib import Path
4
+ from typing import List
5
+
6
+ from langchain_core.documents import Document
7
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
8
+
9
+ # LangChain loaders (оставляем существующие)
10
+ from langchain_community.document_loaders import PyPDFLoader
11
+ from langchain_community.document_loaders import Docx2txtLoader
12
+ from langchain_community.document_loaders import TextLoader
13
+
14
+ import html2text
15
+ import markdown
16
+
17
+ import io
18
+ import json
19
+ import zipfile
20
+
21
+ # ---------------------------
22
+ # Helper functions
23
+ # ---------------------------
24
+
25
+
26
+ def _read_text(p: Path) -> str:
27
+ return p.read_text(encoding="utf-8", errors="ignore")
28
+
29
+
30
+ def _read_bytes(p: Path) -> bytes:
31
+ return p.read_bytes()
32
+
33
+
34
+ def _csv_tsv_to_text(p: Path) -> str:
35
+ try:
36
+ import pandas as pd
37
+ sep = "," if p.suffix.lower() == ".csv" else "\t"
38
+ df = pd.read_csv(str(p), dtype=str, sep=sep,
39
+ encoding="utf-8", engine="python")
40
+ df = df.fillna("")
41
+ buf = io.StringIO()
42
+ df.to_csv(buf, index=False)
43
+ return buf.getvalue()
44
+ except Exception:
45
+ try:
46
+ return _read_text(p)
47
+ except Exception:
48
+ return ""
49
+
50
+
51
+ def _xlsx_to_text(p: Path) -> str:
52
+ try:
53
+ import pandas as pd
54
+ df = pd.read_excel(str(p), dtype=str, engine="openpyxl")
55
+ df = df.fillna("")
56
+ buf = io.StringIO()
57
+ df.to_csv(buf, index=False)
58
+ return buf.getvalue()
59
+ except Exception:
60
+ return ""
61
+
62
+
63
+ def _pptx_to_text(p: Path) -> str:
64
+ try:
65
+ from pptx import Presentation
66
+ prs = Presentation(str(p))
67
+ texts = []
68
+ for slide in prs.slides:
69
+ for shape in slide.shapes:
70
+ if hasattr(shape, "text") and shape.text:
71
+ texts.append(shape.text)
72
+ return "\n".join(texts)
73
+ except Exception:
74
+ return ""
75
+
76
+
77
+ def _json_to_text(p: Path) -> str:
78
+ try:
79
+ data = json.loads(_read_text(p))
80
+
81
+ def flatten(obj, prefix=""):
82
+ lines = []
83
+ if isinstance(obj, dict):
84
+ for k, v in obj.items():
85
+ lines += flatten(v, f"{prefix}{k}.")
86
+ elif isinstance(obj, list):
87
+ for i, v in enumerate(obj):
88
+ lines += flatten(v, f"{prefix}{i}.")
89
+ else:
90
+ lines.append(f"{prefix[:-1]}: {obj}")
91
+ return lines
92
+ return "\n".join(flatten(data))
93
+ except Exception:
94
+ return _read_text(p)
95
+
96
+
97
+ def _yaml_to_text(p: Path) -> str:
98
+ try:
99
+ import yaml
100
+ data = yaml.safe_load(_read_text(p))
101
+ return json.dumps(data, ensure_ascii=False, indent=2)
102
+ except Exception:
103
+ return _read_text(p)
104
+
105
+
106
+ def _xml_to_text(p: Path) -> str:
107
+ try:
108
+ from xml.etree import ElementTree as ET
109
+ tree = ET.parse(str(p))
110
+ root = tree.getroot()
111
+ lines = []
112
+
113
+ def walk(node, prefix=""):
114
+ text = (node.text or "").strip()
115
+ tag = node.tag
116
+ if text:
117
+ lines.append(f"{prefix}{tag}: {text}")
118
+ for child in node:
119
+ walk(child, prefix + tag + ".")
120
+ walk(root)
121
+ return "\n".join(lines)
122
+ except Exception:
123
+ return _read_text(p)
124
+
125
+
126
+ def _rtf_to_text(p: Path) -> str:
127
+ try:
128
+ from striprtf.striprtf import rtf_to_text
129
+ return rtf_to_text(_read_text(p))
130
+ except Exception:
131
+ return ""
132
+
133
+
134
+ def _odt_to_text(p: Path) -> str:
135
+ try:
136
+ with zipfile.ZipFile(str(p)) as z:
137
+ from xml.etree import ElementTree as ET
138
+ with z.open("content.xml") as f:
139
+ tree = ET.parse(f)
140
+ root = tree.getroot()
141
+ texts = []
142
+ for elem in root.iter():
143
+ if elem.text and elem.text.strip():
144
+ texts.append(elem.text.strip())
145
+ return "\n".join(texts)
146
+ except Exception:
147
+ return ""
148
+
149
+ # ---------------------------
150
+ # PDF: LangChain -> pypdf -> PyMuPDF -> OCR(PyMuPDF+pytesseract)
151
+ # ---------------------------
152
+
153
+
154
+ def _pdf_text_pypdf(p: Path) -> str:
155
+ try:
156
+ from pypdf import PdfReader # <- именно pypdf
157
+ reader = PdfReader(str(p))
158
+ texts = []
159
+ for page in reader.pages:
160
+ t = page.extract_text() or ""
161
+ if t.strip():
162
+ texts.append(t)
163
+ return "\n".join(texts)
164
+ except Exception:
165
+ return ""
166
+
167
+
168
+ def _pdf_text_pymupdf(p: Path) -> str:
169
+ try:
170
+ import fitz # PyMuPDF
171
+ text_parts = []
172
+ with fitz.open(str(p)) as doc:
173
+ for page in doc:
174
+ t = page.get_text("text") or ""
175
+ if t.strip():
176
+ text_parts.append(t)
177
+ return "\n".join(text_parts)
178
+ except Exception:
179
+ return ""
180
+
181
+
182
+ def _pdf_ocr_via_pymupdf(p: Path) -> str:
183
+ """Render pages via PyMuPDF and OCR pytesseract. Will work if pytesseract + tesseract are installed."""
184
+ try:
185
+ import fitz # PyMuPDF
186
+ import pytesseract
187
+ from PIL import Image
188
+ import io as _io
189
+
190
+ texts = []
191
+ zoom = 2.0
192
+ mat = fitz.Matrix(zoom, zoom)
193
+ with fitz.open(str(p)) as doc:
194
+ for page in doc:
195
+ pix = page.get_pixmap(matrix=mat, alpha=False)
196
+ img = Image.open(_io.BytesIO(pix.tobytes("png")))
197
+ t = pytesseract.image_to_string(img) or ""
198
+ if t.strip():
199
+ texts.append(t)
200
+ return "\n".join(texts)
201
+ except Exception:
202
+ return ""
203
+
204
+ # ---------------------------
205
+ # Images (OCR)
206
+ # ---------------------------
207
+
208
+
209
+ def _ocr_image_bytes(img_bytes: bytes) -> str:
210
+ try:
211
+ import pytesseract
212
+ from PIL import Image
213
+ import io as _io
214
+ img = Image.open(_io.BytesIO(img_bytes))
215
+ return pytesseract.image_to_string(img) or ""
216
+ except Exception:
217
+ return ""
218
+
219
+ # ---------------------------
220
+ # Docx
221
+ # ---------------------------
222
+
223
+
224
+ def _docx_to_text_python_docx(p: Path) -> str:
225
+ try:
226
+ import docx # python-docx
227
+ d = docx.Document(str(p))
228
+ parts = []
229
+ for para in d.paragraphs:
230
+ if para.text:
231
+ parts.append(para.text)
232
+ # захватим текст из таблиц
233
+ for tbl in d.tables:
234
+ for row in tbl.rows:
235
+ cells = [cell.text for cell in row.cells]
236
+ if any(c.strip() for c in cells):
237
+ parts.append("\t".join(cells))
238
+ return "\n".join(parts)
239
+ except Exception:
240
+ return ""
241
+
242
+
243
+ def _docx_to_text_mammoth(p: Path) -> str:
244
+ try:
245
+ import mammoth
246
+ with open(str(p), "rb") as f:
247
+ result = mammoth.extract_raw_text(f)
248
+ return (result.value or "").strip()
249
+ except Exception:
250
+ return ""
251
+
252
+
253
+ def _docx_to_text_zipxml(p: Path) -> str:
254
+ """Без зависимостей: читаем word/document.xml и вытаскиваем все w:t."""
255
+ try:
256
+ import zipfile
257
+ from xml.etree import ElementTree as ET
258
+ texts = []
259
+ with zipfile.ZipFile(str(p)) as z:
260
+ # основной документ
261
+ if "word/document.xml" in z.namelist():
262
+ with z.open("word/document.xml") as f:
263
+ root = ET.parse(f).getroot()
264
+ for el in root.iter():
265
+ tag = el.tag.rsplit("}", 1)[-1] # убрать namespace
266
+ if tag == "t" and el.text and el.text.strip():
267
+ texts.append(el.text.strip())
268
+ # заголовки/футеры тоже могут содержать текст
269
+ for name in z.namelist():
270
+ if name.startswith("word/header") and name.endswith(".xml"):
271
+ with z.open(name) as f:
272
+ root = ET.parse(f).getroot()
273
+ for el in root.iter():
274
+ tag = el.tag.rsplit("}", 1)[-1]
275
+ if tag == "t" and el.text and el.text.strip():
276
+ texts.append(el.text.strip())
277
+ if name.startswith("word/footer") and name.endswith(".xml"):
278
+ with z.open(name) as f:
279
+ root = ET.parse(f).getroot()
280
+ for el in root.iter():
281
+ tag = el.tag.rsplit("}", 1)[-1]
282
+ if tag == "t" and el.text and el.text.strip():
283
+ texts.append(el.text.strip())
284
+ return "\n".join(texts)
285
+ except Exception:
286
+ return ""
287
+
288
+
289
+ def _doc_to_text_textract(p: Path) -> str:
290
+ """Для старого .doc. Работает, если установлен textract и системные бинарники (antiword/catdoc)."""
291
+ try:
292
+ import textract
293
+ return textract.process(str(p)).decode("utf-8", errors="ignore")
294
+ except Exception:
295
+ return ""
296
+
297
+ # ---------------------------
298
+ # General functions (extended)
299
+ # ---------------------------
300
+
301
+
302
+ def load_documents(file_paths: List[str]) -> List[Document]:
303
+ documents: List[Document] = []
304
+
305
+ for path in map(Path, file_paths):
306
+ ext = path.suffix.lower()
307
+
308
+ try:
309
+ # ---- PDF ----
310
+ if ext == ".pdf":
311
+ used_langchain = False
312
+ # 1) LangChain PyPDFLoader
313
+ try:
314
+ docs = PyPDFLoader(str(path)).load()
315
+ if docs and any((d.page_content or "").strip() for d in docs):
316
+ documents += docs
317
+ used_langchain = True
318
+ except Exception:
319
+ used_langchain = False
320
+
321
+ if not used_langchain:
322
+ # 2) pypdf
323
+ text = _pdf_text_pypdf(path)
324
+ if not text.strip():
325
+ # 3) PyMuPDF
326
+ text = _pdf_text_pymupdf(path)
327
+ if not text.strip():
328
+ # 4) OCR через PyMuPDF
329
+ text = _pdf_ocr_via_pymupdf(path)
330
+
331
+ if text.strip():
332
+ documents.append(Document(page_content=text, metadata={
333
+ "source": str(path), "filetype": "pdf"}))
334
+ else:
335
+ print(
336
+ f"⚠️ PDF has no extractable text (maybe scanned): {path.name}")
337
+
338
+ # ---- DOCX ----
339
+ elif ext == ".docx":
340
+ # 1) Пытаемся стандартным Docx2txtLoader
341
+ added = False
342
+ try:
343
+ docs = Docx2txtLoader(str(path)).load()
344
+ # Docx2txt иногда возвращает Document с пустым page_content
345
+ docs = [d for d in docs if (d.page_content or "").strip()]
346
+ if docs:
347
+ documents += docs
348
+ added = True
349
+ except Exception:
350
+ added = False
351
+
352
+ if not added:
353
+ # 2) python-docx
354
+ text = _docx_to_text_python_docx(path)
355
+ if not text.strip():
356
+ # 3) mammoth
357
+ text = _docx_to_text_mammoth(path)
358
+ if not text.strip():
359
+ # 4) zip+xml fallback
360
+ text = _docx_to_text_zipxml(path)
361
+
362
+ if text.strip():
363
+ documents.append(Document(
364
+ page_content=text,
365
+ metadata={"source": str(path), "filetype": "docx"}
366
+ ))
367
+ else:
368
+ print(f"⚠️ DOCX produced no text: {path.name}")
369
+
370
+ elif ext == ".doc":
371
+ # старый формат
372
+ text = _doc_to_text_textract(path)
373
+ if text.strip():
374
+ documents.append(Document(
375
+ page_content=text,
376
+ metadata={"source": str(path), "filetype": "doc"}
377
+ ))
378
+ else:
379
+ print(
380
+ f"⚠️ .DOC not extractable (install textract/antiword?): {path.name}")
381
+
382
+ # ---- TXT ----
383
+ elif ext == ".txt":
384
+ documents += TextLoader(str(path), encoding="utf-8").load()
385
+
386
+ # ---- HTML ----
387
+ elif ext in (".html", ".htm"):
388
+ html = _read_text(path)
389
+ text = html2text.html2text(html)
390
+ documents.append(Document(page_content=text, metadata={
391
+ "source": str(path), "filetype": "html"}))
392
+
393
+ # ---- Markdown ----
394
+ elif ext == ".md":
395
+ md = _read_text(path)
396
+ html = markdown.markdown(md)
397
+ text = html2text.html2text(html)
398
+ documents.append(Document(page_content=text, metadata={
399
+ "source": str(path), "filetype": "md"}))
400
+
401
+ # ---- CSV / TSV ----
402
+ elif ext in (".csv", ".tsv"):
403
+ text = _csv_tsv_to_text(path)
404
+ if text.strip():
405
+ documents.append(Document(page_content=text, metadata={
406
+ "source": str(path), "filetype": ext.lstrip(".")}))
407
+ else:
408
+ print(f"⚠️ Empty CSV/TSV: {path.name}")
409
+
410
+ # ---- XLSX ----
411
+ elif ext == ".xlsx":
412
+ text = _xlsx_to_text(path)
413
+ if text.strip():
414
+ documents.append(Document(page_content=text, metadata={
415
+ "source": str(path), "filetype": "xlsx"}))
416
+ else:
417
+ print(f"⚠️ Empty XLSX: {path.name}")
418
+
419
+ # ---- PPTX ----
420
+ elif ext == ".pptx":
421
+ text = _pptx_to_text(path)
422
+ if text.strip():
423
+ documents.append(Document(page_content=text, metadata={
424
+ "source": str(path), "filetype": "pptx"}))
425
+ else:
426
+ print(f"⚠️ Empty PPTX: {path.name}")
427
+
428
+ # ---- JSON ----
429
+ elif ext == ".json":
430
+ text = _json_to_text(path)
431
+ if text.strip():
432
+ documents.append(Document(page_content=text, metadata={
433
+ "source": str(path), "filetype": "json"}))
434
+ else:
435
+ print(f"⚠️ Empty JSON: {path.name}")
436
+
437
+ # ---- YAML / YML ----
438
+ elif ext in (".yaml", ".yml"):
439
+ text = _yaml_to_text(path)
440
+ if text.strip():
441
+ documents.append(Document(page_content=text, metadata={
442
+ "source": str(path), "filetype": "yaml"}))
443
+ else:
444
+ print(f"⚠️ Empty YAML: {path.name}")
445
+
446
+ # ---- XML ----
447
+ elif ext == ".xml":
448
+ text = _xml_to_text(path)
449
+ if text.strip():
450
+ documents.append(Document(page_content=text, metadata={
451
+ "source": str(path), "filetype": "xml"}))
452
+ else:
453
+ print(f"⚠️ Empty XML: {path.name}")
454
+
455
+ # ---- RTF ----
456
+ elif ext == ".rtf":
457
+ text = _rtf_to_text(path)
458
+ if text.strip():
459
+ documents.append(Document(page_content=text, metadata={
460
+ "source": str(path), "filetype": "rtf"}))
461
+ else:
462
+ print(f"⚠️ Empty RTF: {path.name}")
463
+
464
+ # ---- ODT ----
465
+ elif ext == ".odt":
466
+ text = _odt_to_text(path)
467
+ if text.strip():
468
+ documents.append(Document(page_content=text, metadata={
469
+ "source": str(path), "filetype": "odt"}))
470
+ else:
471
+ print(f"⚠️ Empty ODT: {path.name}")
472
+
473
+ # ---- Изображения (OCR) ----
474
+ elif ext in (".png", ".jpg", ".jpeg", ".tif", ".tiff", ".bmp"):
475
+ txt = _ocr_image_bytes(_read_bytes(path))
476
+ if txt.strip():
477
+ documents.append(Document(page_content=txt, metadata={
478
+ "source": str(path), "filetype": "image"}))
479
+ else:
480
+ print(f"⚠️ Image has no OCR text: {path.name}")
481
+
482
+ else:
483
+ print(f"⚠️ Unsupported format: {path.name} — skipped")
484
+
485
+ except Exception as exc:
486
+ print(f"❌ Error reading {path.name}: {exc}")
487
+
488
+ return documents
489
+
490
+
491
+ def chunk_documents(
492
+ docs: List[Document],
493
+ chunk_size: int = 1024,
494
+ chunk_overlap: int = 100,
495
+ ) -> List[Document]:
496
+
497
+ splitter = RecursiveCharacterTextSplitter(
498
+ chunk_size=chunk_size,
499
+ chunk_overlap=chunk_overlap,
500
+ separators=["\n\n", "\n", " ", ""],
501
+ )
502
+
503
+ chunks: List[Document] = []
504
+ for doc in docs:
505
+ for i, chunk_text in enumerate(splitter.split_text(doc.page_content)):
506
+ meta = dict(doc.metadata)
507
+ meta.update({"chunk_index": i}) # FIX
508
+ chunks.append(Document(page_content=chunk_text, metadata=meta))
509
+
510
+ return chunks
@@ -0,0 +1,192 @@
1
+ def get_question_style_guidance(question_openness: str, question_length: str) -> str:
2
+ """Generate guidance based on question style parameters."""
3
+ guidance = []
4
+
5
+ if question_openness == "open":
6
+ guidance.append(
7
+ "- Favor open-ended questions that allow for detailed, explanatory responses")
8
+ guidance.append(
9
+ "- Include 'how', 'why', 'explain', 'describe' type questions")
10
+ elif question_openness == "closed":
11
+ guidance.append(
12
+ "- Focus on specific, factual questions with definitive answers")
13
+ guidance.append(
14
+ "- Include yes/no questions, specific data requests, and factual lookups")
15
+ else: # mixed
16
+ guidance.append("- Mix both open-ended and closed questions")
17
+ guidance.append(
18
+ "- Balance exploratory questions with specific factual queries")
19
+
20
+ if question_length == "short":
21
+ guidance.append("- Keep inputs concise and direct (1-2 sentences)")
22
+ elif question_length == "long":
23
+ guidance.append(
24
+ "- Include detailed context and background in inputs (3+ sentences)")
25
+ guidance.append(
26
+ "- Provide scenarios with multiple parts or complex requirements")
27
+ else: # mixed
28
+ guidance.append(
29
+ "- Vary input length from brief queries to detailed scenarios")
30
+
31
+ return "\n".join(guidance) if guidance else ""
32
+
33
+
34
+ def get_trap_guidance(trap_density: float) -> str:
35
+ """Generate guidance for trap questions based on density."""
36
+ if trap_density == 0:
37
+ return "**No Trap Questions:** All questions should be answerable using the provided information."
38
+
39
+ trap_percentage = int(trap_density * 100)
40
+
41
+ return f"""**Trap Questions ({trap_percentage}% of total):**
42
+ Create realistic scenarios where:
43
+ - The user asks about information NOT present in the reference material
44
+ - Questions contain subtle factual errors or misconceptions
45
+ - Requests involve information that would require knowledge beyond the provided context
46
+ - Make traps subtle and realistic - they should feel like genuine user mistakes or knowledge gaps
47
+ - The agent should be able to politely indicate the limitation or correct the misconception"""
48
+
49
+
50
+ def dataset_generation_prompt(
51
+ chunk: str,
52
+ rows_per_chunk: int,
53
+ agent_description: str,
54
+ input_format: str,
55
+ expected_output_format: str,
56
+ test_types: list[str],
57
+ question_length: str,
58
+ question_openness: str,
59
+ trap_density: float,
60
+ language: str
61
+ ) -> str:
62
+ """
63
+ Create a more realistic and detailed prompt for dataset generation.
64
+ """
65
+
66
+ question_style_guidance = get_question_style_guidance(
67
+ question_openness, question_length)
68
+
69
+ trap_guidance = get_trap_guidance(trap_density)
70
+
71
+ prompt = f"""You are an expert test case designer creating realistic evaluation scenarios for an AI agent.
72
+
73
+ **Agent Context:**
74
+ {agent_description}
75
+
76
+ **Input Format:** {input_format}
77
+ **Expected Output Format:** {expected_output_format}
78
+
79
+ **Reference Information:**
80
+ \"\"\"{chunk}\"\"\"
81
+
82
+ **Your Task:**
83
+ Create {rows_per_chunk} realistic test cases that would naturally occur when evaluating this agent. Each test case should:
84
+
85
+ 1. **Reflect Real User Interactions:** Generate inputs that actual users would provide in real scenarios
86
+ 2. **Be Contextually Relevant:** Use the reference information naturally, as if a user discovered it and wants to interact with the agent about it
87
+ 3. **Vary in Complexity:** Include both straightforward and nuanced scenarios
88
+ 4. **Test Different Capabilities:** Cover various aspects of the agent's expected functionality
89
+
90
+ **Test Types Available:** {', '.join(test_types)}
91
+
92
+ **Question Characteristics:**
93
+ - **Length:** {question_length}
94
+ - **Style:** {question_openness}
95
+ {question_style_guidance}
96
+
97
+ **Quality Requirements:**
98
+ - Use natural, conversational language that real users would employ
99
+ - Avoid academic or artificial phrasing
100
+ - Include context and background when users would naturally provide it
101
+ - Make questions specific enough to have clear, verifiable answers
102
+ - Ensure variety in question types and complexity levels
103
+
104
+ {trap_guidance}
105
+
106
+ **Output Format:**
107
+ Generate exactly {rows_per_chunk} JSON objects in an array. Each object must have:
108
+ - "input": The user's natural input/question
109
+ - "expected_output": The ideal agent response
110
+ - "test_type": One of {test_types}
111
+ - "is_trap": boolean indicating if this is a trap question
112
+
113
+ **Language:** {language}
114
+
115
+ Return ONLY the JSON array, no additional text or formatting."""
116
+
117
+ return prompt
118
+
119
+
120
+ def dataset_generation_from_scratch_prompt(
121
+ max_rows: int,
122
+ agent_description: str,
123
+ input_format: str,
124
+ expected_output_format: str,
125
+ test_types: list[str],
126
+ question_length: str,
127
+ question_openness: str,
128
+ trap_density: float,
129
+ language: str
130
+ ) -> str:
131
+ """
132
+ Create a realistic prompt for generating dataset from scratch without reference material.
133
+ """
134
+
135
+ question_style_guidance = get_question_style_guidance(
136
+ question_openness, question_length)
137
+ trap_guidance = get_trap_guidance(trap_density)
138
+
139
+ prompt = f"""You are an expert test case designer creating comprehensive evaluation scenarios for an AI agent.
140
+
141
+ **Agent Context:**
142
+ {agent_description}
143
+
144
+ **Input Format:** {input_format}
145
+ **Expected Output Format:** {expected_output_format}
146
+
147
+ **Your Task:**
148
+ Create {max_rows} diverse, realistic test cases that thoroughly evaluate this agent's capabilities. Design scenarios that would naturally occur in real-world usage.
149
+
150
+ **Test Design Principles:**
151
+
152
+ 1. **Real-World Scenarios:** Create inputs that actual users would provide in genuine situations
153
+ 2. **Comprehensive Coverage:** Test different aspects of the agent's functionality and knowledge domains
154
+ 3. **Varied Complexity:** Include simple queries, moderate challenges, and complex multi-step scenarios
155
+ 4. **Edge Cases:** Include boundary conditions and unusual but valid requests
156
+ 5. **Common Use Cases:** Focus on frequent user interaction patterns
157
+
158
+ **Test Types Available:** {', '.join(test_types)}
159
+
160
+ **Question Characteristics:**
161
+ - **Length:** {question_length}
162
+ - **Style:** {question_openness}
163
+ {question_style_guidance}
164
+
165
+ **Quality Requirements:**
166
+ - Use natural, conversational language that real users would employ
167
+ - Avoid repetitive patterns or artificial academic phrasing
168
+ - Include appropriate context when users would naturally provide it
169
+ - Create specific, testable scenarios with clear success criteria
170
+ - Ensure broad coverage of the agent's expected capabilities
171
+ - Make each test case unique and valuable for evaluation
172
+
173
+ {trap_guidance}
174
+
175
+ **Diversity Guidelines:**
176
+ - Vary question topics and domains relevant to the agent
177
+ - Include different user personas and interaction styles
178
+ - Test both common workflows and edge cases
179
+ - Balance straightforward requests with more complex scenarios
180
+
181
+ **Output Format:**
182
+ Generate exactly {max_rows} JSON objects in an array. Each object must have:
183
+ - "input": The user's natural input/question
184
+ - "expected_output": The ideal agent response
185
+ - "test_type": One of {test_types}
186
+ - "is_trap": boolean indicating if this is a trap question
187
+
188
+ **Language:** {language}
189
+
190
+ Return ONLY the JSON array, no additional text or formatting."""
191
+
192
+ return prompt