deepeval 3.7.1__py3-none-any.whl → 3.7.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/evaluate/compare.py +6 -2
- deepeval/synthesizer/config.py +9 -0
- deepeval/synthesizer/schema.py +23 -0
- deepeval/synthesizer/synthesizer.py +1137 -2
- deepeval/synthesizer/templates/__init__.py +11 -2
- deepeval/synthesizer/templates/template.py +554 -1
- deepeval/synthesizer/templates/template_extraction.py +32 -0
- deepeval/synthesizer/templates/template_prompt.py +262 -0
- {deepeval-3.7.1.dist-info → deepeval-3.7.2.dist-info}/METADATA +2 -1
- {deepeval-3.7.1.dist-info → deepeval-3.7.2.dist-info}/RECORD +14 -14
- {deepeval-3.7.1.dist-info → deepeval-3.7.2.dist-info}/LICENSE.md +0 -0
- {deepeval-3.7.1.dist-info → deepeval-3.7.2.dist-info}/WHEEL +0 -0
- {deepeval-3.7.1.dist-info → deepeval-3.7.2.dist-info}/entry_points.txt +0 -0
|
@@ -37,6 +37,69 @@ class PromptSynthesizerTemplate:
|
|
|
37
37
|
JSON:
|
|
38
38
|
"""
|
|
39
39
|
|
|
40
|
+
@staticmethod
|
|
41
|
+
def generate_synthetic_conversational_scenarios(
|
|
42
|
+
scenario: str,
|
|
43
|
+
conversational_task: str,
|
|
44
|
+
participant_roles: str,
|
|
45
|
+
num_goldens: int,
|
|
46
|
+
):
|
|
47
|
+
return f"""
|
|
48
|
+
Generate a series of conversational SCENARIOS from scratch based on the provided scenario description,
|
|
49
|
+
conversational task, and participant roles.
|
|
50
|
+
|
|
51
|
+
A SCENARIO is a narrative description of a situation in which a conversation naturally occurs.
|
|
52
|
+
It is NOT a question, NOT a prompt, and NOT a user query. It MUST purely describe context.
|
|
53
|
+
|
|
54
|
+
Each scenario MUST depict a realistic MULTI-TURN conversational situation involving the given participants.
|
|
55
|
+
|
|
56
|
+
**
|
|
57
|
+
IMPORTANT FORMAT:
|
|
58
|
+
- Only return JSON
|
|
59
|
+
- JSON MUST contain: {{ "data": [ {{ "scenario": "..." }}, ... ] }}
|
|
60
|
+
- You MUST TRY to generate {num_goldens} items
|
|
61
|
+
**
|
|
62
|
+
|
|
63
|
+
Example of GOOD scenarios (situational descriptions):
|
|
64
|
+
- "During a late afternoon code review session, a junior engineer asks their senior engineer why an async function is inconsistent, leading to a detailed back-and-forth about race conditions."
|
|
65
|
+
- "While preparing for a sprint demo, a senior engineer helps a junior engineer interpret stack traces, prompting a step-by-step explanation."
|
|
66
|
+
|
|
67
|
+
Example of BAD scenarios (DO NOT DO):
|
|
68
|
+
- "Why does my async function return inconsistent results?" (This is a prompt)
|
|
69
|
+
- "Explain how to debug race conditions." (Instruction)
|
|
70
|
+
- "What is the freezing point of water?" (Question)
|
|
71
|
+
|
|
72
|
+
CRITICAL REQUIREMENTS:
|
|
73
|
+
- Scenario MUST be a narrative description of a SITUATION.
|
|
74
|
+
- Scenario MUST involve these participant roles: {participant_roles}
|
|
75
|
+
- Scenario MUST align with this conversational task: {conversational_task}
|
|
76
|
+
- Scenario MUST feel natural, real-world, and MULTI-TURN.
|
|
77
|
+
- Scenario MUST NOT contain:
|
|
78
|
+
• direct questions
|
|
79
|
+
• instructions
|
|
80
|
+
• tasks
|
|
81
|
+
• explicit prompts
|
|
82
|
+
• standalone facts
|
|
83
|
+
- Scenario MUST be grounded in the meaning of the provided base scenario description.
|
|
84
|
+
|
|
85
|
+
You MUST TRY to generate {num_goldens} high-quality, non-repetitive scenarios.
|
|
86
|
+
**
|
|
87
|
+
|
|
88
|
+
Base Scenario Description:
|
|
89
|
+
{scenario}
|
|
90
|
+
|
|
91
|
+
Conversational Task:
|
|
92
|
+
{conversational_task}
|
|
93
|
+
|
|
94
|
+
Participant Roles:
|
|
95
|
+
{participant_roles}
|
|
96
|
+
|
|
97
|
+
Num Scenarios:
|
|
98
|
+
{num_goldens}
|
|
99
|
+
|
|
100
|
+
JSON:
|
|
101
|
+
"""
|
|
102
|
+
|
|
40
103
|
|
|
41
104
|
######################################################################################################
|
|
42
105
|
##### Approach similar to https://github.com/nlpxucan/WizardLM/blob/main/Evol_Instruct/depth.py ######
|
|
@@ -282,3 +345,202 @@ class PromptEvolutionTemplate:
|
|
|
282
345
|
Rewritten Input:
|
|
283
346
|
"""
|
|
284
347
|
)
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
class ConversationalPromptEvolutionTemplate:
|
|
351
|
+
|
|
352
|
+
base_instruction = """I want you to act as a conversational scenario rewriter.
|
|
353
|
+
Your objective is to rewrite the given `Scenario`. You MUST complicate the `Scenario` using the following method:"""
|
|
354
|
+
|
|
355
|
+
@staticmethod
|
|
356
|
+
def reasoning_evolution(scenario):
|
|
357
|
+
return (
|
|
358
|
+
ConversationalPromptEvolutionTemplate.base_instruction
|
|
359
|
+
+ f"""
|
|
360
|
+
1. Rewrite `Scenario` to force participants into multi-step conversational reasoning.
|
|
361
|
+
2. Add layered inferences or analytical leaps required in dialogue.
|
|
362
|
+
3. `Rewritten Scenario` must stay concise, human-readable, and remain a conversation setup.
|
|
363
|
+
4. Do NOT exceed **15 words**.
|
|
364
|
+
|
|
365
|
+
**
|
|
366
|
+
EXAMPLES
|
|
367
|
+
|
|
368
|
+
Example scenario:
|
|
369
|
+
Two students discuss climate change.
|
|
370
|
+
Example rewritten scenario:
|
|
371
|
+
Two students debate climate impacts, tracing cause-effect chains across multiple evidence sources.
|
|
372
|
+
|
|
373
|
+
--------------------------
|
|
374
|
+
|
|
375
|
+
Example scenario:
|
|
376
|
+
A doctor explains treatment options.
|
|
377
|
+
Example rewritten scenario:
|
|
378
|
+
Doctor and patient reason through symptoms requiring sequential diagnostic logic.
|
|
379
|
+
|
|
380
|
+
--------------------------
|
|
381
|
+
|
|
382
|
+
Scenario:
|
|
383
|
+
{scenario}
|
|
384
|
+
Rewritten Scenario:
|
|
385
|
+
"""
|
|
386
|
+
)
|
|
387
|
+
|
|
388
|
+
@staticmethod
|
|
389
|
+
def concretizing_evolution(scenario):
|
|
390
|
+
return (
|
|
391
|
+
ConversationalPromptEvolutionTemplate.base_instruction
|
|
392
|
+
+ f"""
|
|
393
|
+
1. Replace broad conversation setup with a **more specific, concrete** conversational scene.
|
|
394
|
+
2. Add real-world detail (location, constraint, specific topic).
|
|
395
|
+
3. Keep under **15 words**, concise, and still a dialogue setup.
|
|
396
|
+
|
|
397
|
+
**
|
|
398
|
+
EXAMPLES
|
|
399
|
+
|
|
400
|
+
Example scenario:
|
|
401
|
+
Two engineers talk about safety.
|
|
402
|
+
Example rewritten scenario:
|
|
403
|
+
Two engineers argue over failing brake-system logs during late-night review.
|
|
404
|
+
|
|
405
|
+
--------------------------
|
|
406
|
+
|
|
407
|
+
Example scenario:
|
|
408
|
+
Two friends discuss exercise.
|
|
409
|
+
Example rewritten scenario:
|
|
410
|
+
Two friends compare heart-rate sensor issues during a marathon-training chat.
|
|
411
|
+
|
|
412
|
+
--------------------------
|
|
413
|
+
|
|
414
|
+
Scenario:
|
|
415
|
+
{scenario}
|
|
416
|
+
Rewritten Scenario:
|
|
417
|
+
"""
|
|
418
|
+
)
|
|
419
|
+
|
|
420
|
+
@staticmethod
|
|
421
|
+
def constrained_evolution(scenario):
|
|
422
|
+
return (
|
|
423
|
+
ConversationalPromptEvolutionTemplate.base_instruction
|
|
424
|
+
+ f"""
|
|
425
|
+
1. Add at least one new constraint shaping the conversation.
|
|
426
|
+
2. Constraint must significantly affect the dialogue.
|
|
427
|
+
3. Keep under **15 words**, concise, conversational.
|
|
428
|
+
|
|
429
|
+
**
|
|
430
|
+
EXAMPLES
|
|
431
|
+
|
|
432
|
+
Example scenario:
|
|
433
|
+
Two coworkers plan a report.
|
|
434
|
+
Example rewritten scenario:
|
|
435
|
+
Two coworkers plan a report with strict no-internet constraint.
|
|
436
|
+
|
|
437
|
+
--------------------------
|
|
438
|
+
|
|
439
|
+
Example scenario:
|
|
440
|
+
A teacher reviews homework.
|
|
441
|
+
Example rewritten scenario:
|
|
442
|
+
Teacher and student discuss homework under urgent submission deadline.
|
|
443
|
+
|
|
444
|
+
--------------------------
|
|
445
|
+
|
|
446
|
+
Scenario:
|
|
447
|
+
{scenario}
|
|
448
|
+
Rewritten Scenario:
|
|
449
|
+
"""
|
|
450
|
+
)
|
|
451
|
+
|
|
452
|
+
@staticmethod
|
|
453
|
+
def comparative_question_evolution(scenario):
|
|
454
|
+
return (
|
|
455
|
+
ConversationalPromptEvolutionTemplate.base_instruction
|
|
456
|
+
+ f"""
|
|
457
|
+
1. Rewrite `Scenario` so the conversation centers on comparing two+ items.
|
|
458
|
+
2. Must highlight similarities/differences through dialogue.
|
|
459
|
+
3. Keep under **15 words**, concise, conversational.
|
|
460
|
+
|
|
461
|
+
**
|
|
462
|
+
EXAMPLES
|
|
463
|
+
|
|
464
|
+
Example scenario:
|
|
465
|
+
Two analysts discuss tools.
|
|
466
|
+
Example rewritten scenario:
|
|
467
|
+
Two analysts compare legacy analytics pipeline vs. new automated system.
|
|
468
|
+
|
|
469
|
+
--------------------------
|
|
470
|
+
|
|
471
|
+
Example scenario:
|
|
472
|
+
Two students study history.
|
|
473
|
+
Example rewritten scenario:
|
|
474
|
+
Two students contrast Renaissance ideals with Enlightenment philosophies.
|
|
475
|
+
|
|
476
|
+
--------------------------
|
|
477
|
+
|
|
478
|
+
Scenario:
|
|
479
|
+
{scenario}
|
|
480
|
+
Rewritten Scenario:
|
|
481
|
+
"""
|
|
482
|
+
)
|
|
483
|
+
|
|
484
|
+
@staticmethod
|
|
485
|
+
def hypothetical_scenario_evolution(scenario):
|
|
486
|
+
return (
|
|
487
|
+
ConversationalPromptEvolutionTemplate.base_instruction
|
|
488
|
+
+ f"""
|
|
489
|
+
1. Rewrite `Scenario` to introduce a hypothetical twist derived from the setup.
|
|
490
|
+
2. The hypothetical MUST drive the conversation.
|
|
491
|
+
3. Keep under **15 words**, concise, conversational.
|
|
492
|
+
|
|
493
|
+
**
|
|
494
|
+
EXAMPLES
|
|
495
|
+
|
|
496
|
+
Example scenario:
|
|
497
|
+
Two scientists discuss pollution.
|
|
498
|
+
Example rewritten scenario:
|
|
499
|
+
Two scientists debate effects if emissions doubled overnight.
|
|
500
|
+
|
|
501
|
+
--------------------------
|
|
502
|
+
|
|
503
|
+
Example scenario:
|
|
504
|
+
A medic trains a recruit.
|
|
505
|
+
Example rewritten scenario:
|
|
506
|
+
Medic and recruit plan response to hypothetical antibiotic-resistant outbreak.
|
|
507
|
+
|
|
508
|
+
--------------------------
|
|
509
|
+
|
|
510
|
+
Scenario:
|
|
511
|
+
{scenario}
|
|
512
|
+
Rewritten Scenario:
|
|
513
|
+
"""
|
|
514
|
+
)
|
|
515
|
+
|
|
516
|
+
@staticmethod
|
|
517
|
+
def in_breadth_evolution(scenario):
|
|
518
|
+
return (
|
|
519
|
+
ConversationalPromptEvolutionTemplate.base_instruction
|
|
520
|
+
+ f"""
|
|
521
|
+
1. Rewrite `Scenario` into a new conversation within the same domain.
|
|
522
|
+
2. The new conversation must explore a rarer, niche angle.
|
|
523
|
+
3. Keep under **15 words**, concise, conversational.
|
|
524
|
+
|
|
525
|
+
**
|
|
526
|
+
EXAMPLES
|
|
527
|
+
|
|
528
|
+
Example scenario:
|
|
529
|
+
Two doctors discuss patient care.
|
|
530
|
+
Example rewritten scenario:
|
|
531
|
+
Two doctors debate rare autoimmune disorder diagnostics.
|
|
532
|
+
|
|
533
|
+
--------------------------
|
|
534
|
+
|
|
535
|
+
Example scenario:
|
|
536
|
+
Two programmers discuss bugs.
|
|
537
|
+
Example rewritten scenario:
|
|
538
|
+
Two programmers examine obscure concurrency race-condition failures.
|
|
539
|
+
|
|
540
|
+
--------------------------
|
|
541
|
+
|
|
542
|
+
Scenario:
|
|
543
|
+
{scenario}
|
|
544
|
+
Rewritten Scenario:
|
|
545
|
+
"""
|
|
546
|
+
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: deepeval
|
|
3
|
-
Version: 3.7.
|
|
3
|
+
Version: 3.7.2
|
|
4
4
|
Summary: The LLM Evaluation Framework
|
|
5
5
|
Home-page: https://github.com/confident-ai/deepeval
|
|
6
6
|
License: Apache-2.0
|
|
@@ -439,6 +439,7 @@ Using `.env.local` or `.env` is optional. If they are missing, DeepEval uses you
|
|
|
439
439
|
```bash
|
|
440
440
|
cp .env.example .env.local
|
|
441
441
|
# then edit .env.local (ignored by git)
|
|
442
|
+
```
|
|
442
443
|
|
|
443
444
|
<br />
|
|
444
445
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
deepeval/__init__.py,sha256=tle4lT4FONApg3OeztGPEdrpGMEGLWajyGTu7bEd3s0,2976
|
|
2
|
-
deepeval/_version.py,sha256=
|
|
2
|
+
deepeval/_version.py,sha256=uv4OEocrxlEkjp4YMUEXl0-kwRJuwkdtGlNshton0Mg,27
|
|
3
3
|
deepeval/annotation/__init__.py,sha256=ZFhUVNNuH_YgQSZJ-m5E9iUb9TkAkEV33a6ouMDZ8EI,111
|
|
4
4
|
deepeval/annotation/annotation.py,sha256=3j3-syeJepAcEj3u3e4T_BeRDzNr7yXGDIoNQGMKpwQ,2298
|
|
5
5
|
deepeval/annotation/api.py,sha256=EYN33ACVzVxsFleRYm60KB4Exvff3rPJKt1VBuuX970,2147
|
|
@@ -161,7 +161,7 @@ deepeval/dataset/utils.py,sha256=MRiqwt-3E5WNCHtP2kY7P1PeRtFMRpGoy3r75tJ2QFg,791
|
|
|
161
161
|
deepeval/errors.py,sha256=FfhtULNIQqHpKVqCr-xlvTtLxkNj40qVU89sXYKuDrA,754
|
|
162
162
|
deepeval/evaluate/__init__.py,sha256=315IaMiYEz7oJhZ4kPTBfeCNd1xF-wWVU6KOQnrKQpE,291
|
|
163
163
|
deepeval/evaluate/api.py,sha256=rkblH0ZFAAdyuF0Ymh7JE1pIJPR9yFuPrn9SQaCEQp4,435
|
|
164
|
-
deepeval/evaluate/compare.py,sha256=
|
|
164
|
+
deepeval/evaluate/compare.py,sha256=AJ3kR4Th44MTuL0juvvNwPdel1Q8tw37IYUgLdy2qrw,16575
|
|
165
165
|
deepeval/evaluate/configs.py,sha256=QfWjaWNxLsgEe8-5j4PIs5WcSyEckiWt0qdpXSpl57M,928
|
|
166
166
|
deepeval/evaluate/evaluate.py,sha256=CLc-5rlHcBkJcakYXf9twaF6G8chp95gCBe8V4B-cVo,10684
|
|
167
167
|
deepeval/evaluate/execute.py,sha256=cXX4W4jX_Uly73WBLEduLv6Dqs32AWHGsgyp9tZmV1Q,134219
|
|
@@ -461,13 +461,13 @@ deepeval/synthesizer/base_synthesizer.py,sha256=ua7HDq1lemeFH5FuWdGJxMGFH5QA2CSY
|
|
|
461
461
|
deepeval/synthesizer/chunking/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
462
462
|
deepeval/synthesizer/chunking/context_generator.py,sha256=ucwa-7BcPSHHf4Tu31dfyJ63Bg9HdNhGLAW5_QsfFX4,38052
|
|
463
463
|
deepeval/synthesizer/chunking/doc_chunker.py,sha256=DvJmbA_NnZBGCZgxKQsxlIC29kca_d7J-Dxq9SyfzX0,10612
|
|
464
|
-
deepeval/synthesizer/config.py,sha256=
|
|
465
|
-
deepeval/synthesizer/schema.py,sha256=
|
|
466
|
-
deepeval/synthesizer/synthesizer.py,sha256=
|
|
467
|
-
deepeval/synthesizer/templates/__init__.py,sha256=
|
|
468
|
-
deepeval/synthesizer/templates/template.py,sha256=
|
|
469
|
-
deepeval/synthesizer/templates/template_extraction.py,sha256=
|
|
470
|
-
deepeval/synthesizer/templates/template_prompt.py,sha256=
|
|
464
|
+
deepeval/synthesizer/config.py,sha256=rg9HzN5w_82tpzBALgt__NkAcmh0vDEoORJpjsRLWMY,2207
|
|
465
|
+
deepeval/synthesizer/schema.py,sha256=TFCIvXeL0TOKqfjMm2qgR4hFcvvFaPEZdQ1xTnRJqPs,1294
|
|
466
|
+
deepeval/synthesizer/synthesizer.py,sha256=QF02PSedde8t5Q0R9WpkWeD4tVD8fo4m4pJFu0H7jbc,110316
|
|
467
|
+
deepeval/synthesizer/templates/__init__.py,sha256=9UhfJFwPEdLWmxJz3ksNJps-jGYJFJnJP1U-x7j0By4,319
|
|
468
|
+
deepeval/synthesizer/templates/template.py,sha256=ri3dX2gzxNmL8qlkl47HD8kecmqMBF5mE-mraZvf1xU,65008
|
|
469
|
+
deepeval/synthesizer/templates/template_extraction.py,sha256=jmvr8AOOUzDgsHYIOsq-NaxlRQ5GygK16TTRGxBXDyM,3508
|
|
470
|
+
deepeval/synthesizer/templates/template_prompt.py,sha256=bzfC71AXZqBrmoDWmBvuIQKD6hPJZ0ZAWX4hy-lPlnQ,21478
|
|
471
471
|
deepeval/synthesizer/types.py,sha256=wUZntvCAE29sM9K8hk9RPwUpkTip1ObOCExyMEo3sME,493
|
|
472
472
|
deepeval/synthesizer/utils.py,sha256=o-9z5gApQcHqDqusgrD0LagXWAju17LVc27BxtaA7og,1018
|
|
473
473
|
deepeval/telemetry.py,sha256=0k6oVhRBi1Ap6JC6SNe_9-nBnlG-uReUoS3KTrPayhk,21716
|
|
@@ -505,8 +505,8 @@ deepeval/tracing/tracing.py,sha256=VWu5z6fxWP6KfDaw5ZmIkN-0yNDqe1CVHa0e9_Wjrgg,4
|
|
|
505
505
|
deepeval/tracing/types.py,sha256=WhnxefUc5I8jcAOBQ-tsZ8_zVZfGqSvCWHD5XUN6Ggw,6040
|
|
506
506
|
deepeval/tracing/utils.py,sha256=mdvhYAxDNsdnusaEXJd-c-_O2Jn6S3xSuzRvLO1Jz4U,5684
|
|
507
507
|
deepeval/utils.py,sha256=zy9RR0bt3YMzWVlJc5Rl6eU5RyeW2uEjMfwD1sdgPr4,23234
|
|
508
|
-
deepeval-3.7.
|
|
509
|
-
deepeval-3.7.
|
|
510
|
-
deepeval-3.7.
|
|
511
|
-
deepeval-3.7.
|
|
512
|
-
deepeval-3.7.
|
|
508
|
+
deepeval-3.7.2.dist-info/LICENSE.md,sha256=0ATkuLv6QgsJTBODUHC5Rak_PArA6gv2t7inJzNTP38,11352
|
|
509
|
+
deepeval-3.7.2.dist-info/METADATA,sha256=-p5kW-oBcfsEi5_uiLnCn_godaoKAeRDCgzRdpOsWUI,18743
|
|
510
|
+
deepeval-3.7.2.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
|
|
511
|
+
deepeval-3.7.2.dist-info/entry_points.txt,sha256=fVr8UphXTfJe9I2rObmUtfU3gkSrYeM0pLy-NbJYg10,94
|
|
512
|
+
deepeval-3.7.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|