ragbits-evaluate 0.0.8.dev23005__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. ragbits_evaluate-0.0.8.dev23005/.gitignore +116 -0
  2. ragbits_evaluate-0.0.8.dev23005/CHANGELOG.md +244 -0
  3. ragbits_evaluate-0.0.8.dev23005/PKG-INFO +58 -0
  4. ragbits_evaluate-0.0.8.dev23005/README.md +23 -0
  5. ragbits_evaluate-0.0.8.dev23005/pyproject.toml +68 -0
  6. ragbits_evaluate-0.0.8.dev23005/src/ragbits/evaluate/__init__.py +0 -0
  7. ragbits_evaluate-0.0.8.dev23005/src/ragbits/evaluate/agent_simulation/__init__.py +122 -0
  8. ragbits_evaluate-0.0.8.dev23005/src/ragbits/evaluate/agent_simulation/context.py +140 -0
  9. ragbits_evaluate-0.0.8.dev23005/src/ragbits/evaluate/agent_simulation/conversation.py +515 -0
  10. ragbits_evaluate-0.0.8.dev23005/src/ragbits/evaluate/agent_simulation/deepeval_evaluator.py +92 -0
  11. ragbits_evaluate-0.0.8.dev23005/src/ragbits/evaluate/agent_simulation/logger.py +165 -0
  12. ragbits_evaluate-0.0.8.dev23005/src/ragbits/evaluate/agent_simulation/metrics/__init__.py +19 -0
  13. ragbits_evaluate-0.0.8.dev23005/src/ragbits/evaluate/agent_simulation/metrics/builtin.py +221 -0
  14. ragbits_evaluate-0.0.8.dev23005/src/ragbits/evaluate/agent_simulation/metrics/collectors.py +142 -0
  15. ragbits_evaluate-0.0.8.dev23005/src/ragbits/evaluate/agent_simulation/models.py +37 -0
  16. ragbits_evaluate-0.0.8.dev23005/src/ragbits/evaluate/agent_simulation/results.py +200 -0
  17. ragbits_evaluate-0.0.8.dev23005/src/ragbits/evaluate/agent_simulation/scenarios.py +129 -0
  18. ragbits_evaluate-0.0.8.dev23005/src/ragbits/evaluate/agent_simulation/simulation.py +245 -0
  19. ragbits_evaluate-0.0.8.dev23005/src/ragbits/evaluate/cli.py +150 -0
  20. ragbits_evaluate-0.0.8.dev23005/src/ragbits/evaluate/config.py +11 -0
  21. ragbits_evaluate-0.0.8.dev23005/src/ragbits/evaluate/dataloaders/__init__.py +3 -0
  22. ragbits_evaluate-0.0.8.dev23005/src/ragbits/evaluate/dataloaders/base.py +95 -0
  23. ragbits_evaluate-0.0.8.dev23005/src/ragbits/evaluate/dataloaders/document_search.py +61 -0
  24. ragbits_evaluate-0.0.8.dev23005/src/ragbits/evaluate/dataloaders/exceptions.py +25 -0
  25. ragbits_evaluate-0.0.8.dev23005/src/ragbits/evaluate/dataloaders/gaia.py +78 -0
  26. ragbits_evaluate-0.0.8.dev23005/src/ragbits/evaluate/dataloaders/hotpot_qa.py +95 -0
  27. ragbits_evaluate-0.0.8.dev23005/src/ragbits/evaluate/dataloaders/human_eval.py +70 -0
  28. ragbits_evaluate-0.0.8.dev23005/src/ragbits/evaluate/dataloaders/question_answer.py +56 -0
  29. ragbits_evaluate-0.0.8.dev23005/src/ragbits/evaluate/dataset_generator/__init__.py +0 -0
  30. ragbits_evaluate-0.0.8.dev23005/src/ragbits/evaluate/dataset_generator/pipeline.py +141 -0
  31. ragbits_evaluate-0.0.8.dev23005/src/ragbits/evaluate/dataset_generator/prompts/__init__.py +0 -0
  32. ragbits_evaluate-0.0.8.dev23005/src/ragbits/evaluate/dataset_generator/prompts/corpus_generation.py +21 -0
  33. ragbits_evaluate-0.0.8.dev23005/src/ragbits/evaluate/dataset_generator/prompts/qa.py +83 -0
  34. ragbits_evaluate-0.0.8.dev23005/src/ragbits/evaluate/dataset_generator/tasks/__init__.py +0 -0
  35. ragbits_evaluate-0.0.8.dev23005/src/ragbits/evaluate/dataset_generator/tasks/corpus_generation.py +67 -0
  36. ragbits_evaluate-0.0.8.dev23005/src/ragbits/evaluate/dataset_generator/tasks/filter/__init__.py +0 -0
  37. ragbits_evaluate-0.0.8.dev23005/src/ragbits/evaluate/dataset_generator/tasks/filter/base.py +43 -0
  38. ragbits_evaluate-0.0.8.dev23005/src/ragbits/evaluate/dataset_generator/tasks/filter/dont_know.py +34 -0
  39. ragbits_evaluate-0.0.8.dev23005/src/ragbits/evaluate/dataset_generator/tasks/text_generation/__init__.py +0 -0
  40. ragbits_evaluate-0.0.8.dev23005/src/ragbits/evaluate/dataset_generator/tasks/text_generation/base.py +66 -0
  41. ragbits_evaluate-0.0.8.dev23005/src/ragbits/evaluate/dataset_generator/tasks/text_generation/qa.py +96 -0
  42. ragbits_evaluate-0.0.8.dev23005/src/ragbits/evaluate/dataset_generator/utils.py +43 -0
  43. ragbits_evaluate-0.0.8.dev23005/src/ragbits/evaluate/evaluator.py +244 -0
  44. ragbits_evaluate-0.0.8.dev23005/src/ragbits/evaluate/factories/__init__.py +42 -0
  45. ragbits_evaluate-0.0.8.dev23005/src/ragbits/evaluate/metrics/__init__.py +3 -0
  46. ragbits_evaluate-0.0.8.dev23005/src/ragbits/evaluate/metrics/base.py +89 -0
  47. ragbits_evaluate-0.0.8.dev23005/src/ragbits/evaluate/metrics/document_search.py +90 -0
  48. ragbits_evaluate-0.0.8.dev23005/src/ragbits/evaluate/metrics/gaia.py +84 -0
  49. ragbits_evaluate-0.0.8.dev23005/src/ragbits/evaluate/metrics/hotpot_qa.py +51 -0
  50. ragbits_evaluate-0.0.8.dev23005/src/ragbits/evaluate/metrics/human_eval.py +105 -0
  51. ragbits_evaluate-0.0.8.dev23005/src/ragbits/evaluate/metrics/question_answer.py +205 -0
  52. ragbits_evaluate-0.0.8.dev23005/src/ragbits/evaluate/optimizer.py +210 -0
  53. ragbits_evaluate-0.0.8.dev23005/src/ragbits/evaluate/pipelines/__init__.py +37 -0
  54. ragbits_evaluate-0.0.8.dev23005/src/ragbits/evaluate/pipelines/base.py +64 -0
  55. ragbits_evaluate-0.0.8.dev23005/src/ragbits/evaluate/pipelines/document_search.py +106 -0
  56. ragbits_evaluate-0.0.8.dev23005/src/ragbits/evaluate/pipelines/gaia.py +249 -0
  57. ragbits_evaluate-0.0.8.dev23005/src/ragbits/evaluate/pipelines/hotpot_qa.py +342 -0
  58. ragbits_evaluate-0.0.8.dev23005/src/ragbits/evaluate/pipelines/human_eval.py +323 -0
  59. ragbits_evaluate-0.0.8.dev23005/src/ragbits/evaluate/pipelines/question_answer.py +96 -0
  60. ragbits_evaluate-0.0.8.dev23005/src/ragbits/evaluate/py.typed +0 -0
  61. ragbits_evaluate-0.0.8.dev23005/src/ragbits/evaluate/utils.py +160 -0
  62. ragbits_evaluate-0.0.8.dev23005/tests/cli/test_run_evaluation.py +50 -0
  63. ragbits_evaluate-0.0.8.dev23005/tests/unit/test_agent_simulation_context.py +395 -0
  64. ragbits_evaluate-0.0.8.dev23005/tests/unit/test_agent_simulation_metrics.py +360 -0
  65. ragbits_evaluate-0.0.8.dev23005/tests/unit/test_agent_simulation_results.py +406 -0
  66. ragbits_evaluate-0.0.8.dev23005/tests/unit/test_evaluator.py +198 -0
  67. ragbits_evaluate-0.0.8.dev23005/tests/unit/test_metrics.py +217 -0
  68. ragbits_evaluate-0.0.8.dev23005/tests/unit/test_optimizer.py +124 -0
@@ -0,0 +1,116 @@
1
+ # Directories
2
+ .vscode/
3
+ .idea/
4
+ .neptune/
5
+ .pytest_cache/
6
+ .mypy_cache/
7
+ venv/
8
+ .venv/
9
+ __pycache__/
10
+ **.egg-info/
11
+ .deepeval/
12
+
13
+ # Local cursor rules
14
+ .cursor/rules/local/
15
+
16
+ # Byte-compiled / optimized / DLL files
17
+ __pycache__/
18
+ *.py[cod]
19
+ *$py.class
20
+
21
+ # C extensions
22
+ *.so
23
+
24
+ # Distribution / packaging
25
+ .Python
26
+ env/
27
+ build/
28
+ develop-eggs/
29
+ dist/
30
+ downloads/
31
+ eggs/
32
+ .eggs/
33
+ lib/
34
+ lib64/
35
+ parts/
36
+ sdist/
37
+ var/
38
+ *.egg-info/
39
+ .installed.cfg
40
+ *.egg
41
+
42
+ # Sphinx documentation
43
+ docs/_build/
44
+ public/
45
+ # autogenerated package license table
46
+ docs/licenses_table.rst
47
+
48
+ # license dump file
49
+ licenses.txt
50
+
51
+ # File formats
52
+ *.onnx
53
+ *.pyc
54
+ *.pt
55
+ *.pth
56
+ *.pkl
57
+ *.mar
58
+ *.torchscript
59
+ **/.ipynb_checkpoints
60
+ **/dist/
61
+ **/checkpoints/
62
+ **/outputs/
63
+ **/multirun/
64
+
65
+ # Other env files
66
+ .python-version
67
+ pyvenv.cfg
68
+ pip-selfcheck.json
69
+
70
+ # Unit test / coverage reports
71
+ htmlcov/
72
+ .tox/
73
+ .coverage
74
+ .coverage.*
75
+ .cache
76
+ nosetests.xml
77
+ coverage.xml
78
+ *,cover
79
+ .hypothesis/
80
+
81
+ # dotenv
82
+ .env
83
+
84
+ # coverage and pytest reports
85
+ coverage.xml
86
+ report.xml
87
+
88
+ # CMake
89
+ cmake-build-*/
90
+
91
+ # Terraform
92
+ **/.terraform.lock.hcl
93
+ **/.terraform
94
+
95
+ # mkdocs generated files
96
+ site/
97
+
98
+ # build artifacts
99
+ dist/
100
+
101
+ # examples
102
+ chroma/
103
+ qdrant/
104
+
105
+ .aider*
106
+
107
+ .DS_Store
108
+ node_modules/
109
+
110
+ lazygit
111
+
112
+ lazygit.tar.gz
113
+
114
+ # chat conversation logs
115
+ duet_conversation.log
116
+ worktrees/
@@ -0,0 +1,244 @@
1
+ # CHANGELOG
2
+
3
+ ## Unreleased
4
+
5
+ - Feat: introduce agent evaluation pipelines and metrics (HotpotQA, HumanEval, GAIA) (#829)
6
+
7
+ - Feat: introduce agent simulation module with utilities for agent-to-agent conversation and evaluation scenarios (#857)
8
+
9
+ - Feat: add structured results to agent simulation with `SimulationResult`, `TurnResult`, `TaskResult`, and `ConversationMetrics` models (#885)
10
+
11
+ - Feat: add `DomainContext` for domain-specific goal checking in agent simulation (currency, locale, business rules) (#884)
12
+
13
+ - Feat: add `DataSnapshot` for data-grounded simulated user requests (prevents unrealistic requests for non-existent items) (#883)
14
+
15
+ - Feat: add metrics collection system for agent simulation (`MetricCollector` protocol, `LatencyMetricCollector`, `TokenUsageMetricCollector`, `ToolUsageMetricCollector`) (#882)
16
+
17
+ - Feat: add support for response adapters from `ragbits.chat.adapters` in agent simulation, enabling production chat interfaces to be used directly without wrapper classes
18
+
19
+ ## 1.3.0 (2025-09-11)
20
+
21
+ ### Changed
22
+
23
+ - ragbits-core updated to version v1.3.0
24
+
25
+ - Optional parallel batches execution in ragbits.evaluate.Evaluator (#769)
26
+
27
+ ## 1.2.2 (2025-08-08)
28
+
29
+ ### Changed
30
+
31
+ - ragbits-core updated to version v1.2.2
32
+
33
+ ## 1.2.1 (2025-08-04)
34
+
35
+ ### Changed
36
+
37
+ - ragbits-core updated to version v1.2.1
38
+
39
+ ## 1.2.0 (2025-08-01)
40
+
41
+ ### Changed
42
+
43
+ - ragbits-core updated to version v1.2.0
44
+
45
+ ## 1.1.0 (2025-07-09)
46
+
47
+ ### Changed
48
+
49
+ - ragbits-core updated to version v1.1.0
50
+
51
+ - Update qa data loader docstring (#565)
52
+ - Fix deadlock on qa metrics compute (#609)
53
+ - Upgrade distilabel version to 1.5.0 (#682)
54
+
55
+ ## 1.0.0 (2025-06-04)
56
+
57
+ ### Changed
58
+
59
+ - ragbits-core updated to version v1.0.0
60
+
61
+ ## 0.20.1 (2025-06-04)
62
+
63
+ ### Changed
64
+
65
+ - ragbits-core updated to version v0.20.1
66
+
67
+ ## 0.20.0 (2025-06-03)
68
+
69
+ ### Changed
70
+
71
+ - ragbits-core updated to version v0.20.0
72
+
73
+ ## 0.19.1 (2025-05-27)
74
+
75
+ ### Changed
76
+
77
+ - ragbits-core updated to version v0.19.1
78
+
79
+ ## 0.19.0 (2025-05-27)
80
+
81
+ ### Changed
82
+
83
+ - ragbits-core updated to version v0.19.0
84
+
85
+ - Add evals for question answering (#577)
86
+ - Add support for slicing dataset (#576)
87
+ - Separate load and map ops in data loaders (#576)
88
+
89
+ ## 0.18.0 (2025-05-22)
90
+
91
+ ### Changed
92
+
93
+ - ragbits-core updated to version v0.18.0
94
+
95
+ - Add support for custom column names in evaluation dataset (#566)
96
+ - Add support for reference document ids and page numbers in evaluation dataset (#566)
97
+ - BREAKING CHANGE: Adjust eval pipline interface to batch processing (#555)
98
+ - Rename DocumentMeta create_text_document_from_literal to from_literal (#561)
99
+ - Adjust typing for DocumentSearch (#554)
100
+
101
+ ## 0.17.1 (2025-05-09)
102
+
103
+ ### Changed
104
+
105
+ - ragbits-core updated to version v0.17.1
106
+
107
+ ## 0.17.0 (2025-05-06)
108
+
109
+ ### Changed
110
+
111
+ - ragbits-core updated to version v0.17.0
112
+
113
+ - Add tests for ragbits-evaluate package (#390)
114
+ - Integrate sources with dataloaders (#529)
115
+
116
+ ## 0.16.0 (2025-04-29)
117
+
118
+ ### Changed
119
+
120
+ - ragbits-core updated to version v0.16.0
121
+
122
+ ## 0.15.0 (2025-04-28)
123
+
124
+ ### Changed
125
+
126
+ - ragbits-core updated to version v0.15.0
127
+
128
+ ## 0.14.0 (2025-04-22)
129
+
130
+ ### Changed
131
+
132
+ - ragbits-core updated to version v0.14.0
133
+
134
+ - move sources from ragbits-document-search to ragbits-core (#496)
135
+
136
+ ## 0.13.0 (2025-04-02)
137
+
138
+ ### Changed
139
+
140
+ - ragbits-core updated to version v0.13.0
141
+
142
+ ## 0.12.0 (2025-03-25)
143
+
144
+ ### Changed
145
+
146
+ - ragbits-core updated to version v0.12.0
147
+
148
+ ## 0.11.0 (2025-03-25)
149
+
150
+ ### Changed
151
+
152
+ - ragbits-core updated to version v0.11.0
153
+
154
+ ## 0.10.2 (2025-03-21)
155
+
156
+ ### Changed
157
+
158
+ - ragbits-core updated to version v0.10.2
159
+
160
+ ## 0.10.1 (2025-03-19)
161
+
162
+ ### Changed
163
+
164
+ - ragbits-core updated to version v0.10.1
165
+
166
+ ## 0.10.0 (2025-03-17)
167
+
168
+ ### Changed
169
+
170
+ - ragbits-core updated to version v0.10.0
171
+
172
+ - Compability with the new Vector Store interface from ragbits-core (#288)
173
+ - chore: fix typo in README.
174
+ - fix typos in doc strings
175
+
176
+ ## 0.9.0 (2025-02-25)
177
+
178
+ ### Changed
179
+
180
+ - ragbits-core updated to version v0.9.0
181
+ - Add cli for document search evaluation added (#356)
182
+ - Add local data loader (#334).
183
+
184
+ ## 0.8.0 (2025-01-29)
185
+
186
+ ### Changed
187
+
188
+ - ragbits-core updated to version v0.8.0
189
+
190
+ ## 0.7.0 (2025-01-21)
191
+
192
+ ### Added
193
+
194
+ - Simplified interface to document-search evaluation (#258).
195
+
196
+ ### Changed
197
+
198
+ - ragbits-core updated to version v0.7.0
199
+
200
+ ## 0.6.0 (2024-12-27)
201
+
202
+ ### Changed
203
+
204
+ - ragbits-core updated to version v0.6.0
205
+
206
+ ## 0.5.1 (2024-12-09)
207
+
208
+ ### Changed
209
+
210
+ - ragbits-core updated to version v0.5.1
211
+ - document search evaluation now returns all Element types, rather than only TextElements (#241).
212
+
213
+ ## 0.5.0 (2024-12-05)
214
+
215
+ ### Changed
216
+
217
+ - ragbits-core updated to version v0.5.0
218
+
219
+ ## 0.4.0 (2024-11-27)
220
+
221
+ ### Added
222
+
223
+ - Introduced optimization with optuna (#177).
224
+ - Add synthetic data generation pipeline (#165).
225
+
226
+ ### Changed
227
+
228
+ - ragbits-core updated to version v0.4.0
229
+
230
+ ## 0.3.0 (2024-11-06)
231
+
232
+ ### Changed
233
+
234
+ - ragbits-core updated to version v0.3.0
235
+
236
+ ## 0.2.0 (2024-10-23)
237
+
238
+ - Initial release of the package.
239
+ - Evaluation pipeline framework with capability to define evaluators & metrics.
240
+ - Evaluation pipeline for `ragbits-document-search`.
241
+
242
+ ### Changed
243
+
244
+ - ragbits-core updated to version v0.2.0
@@ -0,0 +1,58 @@
1
+ Metadata-Version: 2.4
2
+ Name: ragbits-evaluate
3
+ Version: 0.0.8.dev23005
4
+ Summary: Evaluation module for Ragbits components
5
+ Project-URL: Homepage, https://github.com/deepsense-ai/ragbits
6
+ Project-URL: Bug Reports, https://github.com/deepsense-ai/ragbits/issues
7
+ Project-URL: Documentation, https://ragbits.deepsense.ai/
8
+ Project-URL: Source, https://github.com/deepsense-ai/ragbits
9
+ Author-email: "deepsense.ai" <ragbits@deepsense.ai>
10
+ License-Expression: MIT
11
+ Keywords: Evaluation,GenAI,Generative AI,LLMs,Large Language Models,RAG,Retrieval Augmented Generation
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Environment :: Console
14
+ Classifier: Intended Audience :: Science/Research
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Natural Language :: English
17
+ Classifier: Operating System :: OS Independent
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Programming Language :: Python :: 3.13
22
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
23
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
24
+ Requires-Python: >=3.10
25
+ Requires-Dist: datasets<4.0.0,>=3.0.1
26
+ Requires-Dist: deepeval<3.0.0,>=2.0.0
27
+ Requires-Dist: distilabel<2.0.0,>=1.5.0
28
+ Requires-Dist: hydra-core<2.0.0,>=1.3.2
29
+ Requires-Dist: neptune[optuna]<2.0.0,>=1.12.0
30
+ Requires-Dist: optuna<5.0.0,>=4.0.0
31
+ Requires-Dist: ragbits-core==0.0.8.dev23005
32
+ Provides-Extra: relari
33
+ Requires-Dist: continuous-eval<1.0.0,>=0.3.12; extra == 'relari'
34
+ Description-Content-Type: text/markdown
35
+
36
+ # Ragbits Evaluate
37
+
38
+ Ragbits Evaluate is a package that contains tools for evaluating the performance of AI pipelines defined with Ragbits components. It also helps with automatically finding the best hyperparameter configurations for them.
39
+
40
+ ## Installation
41
+
42
+ To install the Ragbits Evaluate package, run:
43
+
44
+ ```sh
45
+ pip install ragbits-evaluate
46
+ ```
47
+
48
+ <!--
49
+ TODO: Add a minimalistic example inspired by the Quickstart chapter on Ragbits Evaluate once it is ready.
50
+ -->
51
+
52
+ ## Documentation
53
+ <!--
54
+ TODO:
55
+ * Add link to the Quickstart chapter on Ragbits Evaluate once it is ready.
56
+ * Add link to API Reference once classes from the Evaluate package are added to the API Reference.
57
+ -->
58
+ * [How-To Guides - Evaluate](https://ragbits.deepsense.ai/how-to/evaluate/optimize/)
@@ -0,0 +1,23 @@
1
+ # Ragbits Evaluate
2
+
3
+ Ragbits Evaluate is a package that contains tools for evaluating the performance of AI pipelines defined with Ragbits components. It also helps with automatically finding the best hyperparameter configurations for them.
4
+
5
+ ## Installation
6
+
7
+ To install the Ragbits Evaluate package, run:
8
+
9
+ ```sh
10
+ pip install ragbits-evaluate
11
+ ```
12
+
13
+ <!--
14
+ TODO: Add a minimalistic example inspired by the Quickstart chapter on Ragbits Evaluate once it is ready.
15
+ -->
16
+
17
+ ## Documentation
18
+ <!--
19
+ TODO:
20
+ * Add link to the Quickstart chapter on Ragbits Evaluate once it is ready.
21
+ * Add link to API Reference once classes from the Evaluate package are added to the API Reference.
22
+ -->
23
+ * [How-To Guides - Evaluate](https://ragbits.deepsense.ai/how-to/evaluate/optimize/)
@@ -0,0 +1,68 @@
1
+ [project]
2
+ name = "ragbits-evaluate"
3
+ version = "0.0.8.dev23005"
4
+ description = "Evaluation module for Ragbits components"
5
+ readme = "README.md"
6
+ requires-python = ">=3.10"
7
+
8
+ license = "MIT"
9
+ authors = [
10
+ { name = "deepsense.ai", email = "ragbits@deepsense.ai"}
11
+ ]
12
+ keywords = [
13
+ "Retrieval Augmented Generation",
14
+ "RAG",
15
+ "Large Language Models",
16
+ "LLMs",
17
+ "Generative AI",
18
+ "GenAI",
19
+ "Evaluation"
20
+ ]
21
+ classifiers = [
22
+ "Development Status :: 4 - Beta",
23
+ "Environment :: Console",
24
+ "Intended Audience :: Science/Research",
25
+ "License :: OSI Approved :: MIT License",
26
+ "Natural Language :: English",
27
+ "Operating System :: OS Independent",
28
+ "Programming Language :: Python :: 3.10",
29
+ "Programming Language :: Python :: 3.11",
30
+ "Programming Language :: Python :: 3.12",
31
+ "Programming Language :: Python :: 3.13",
32
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
33
+ "Topic :: Software Development :: Libraries :: Python Modules",
34
+ ]
35
+ dependencies = ["hydra-core>=1.3.2,<2.0.0", "neptune[optuna]>=1.12.0,<2.0.0", "optuna>=4.0.0,<5.0.0", "distilabel>=1.5.0,<2.0.0", "datasets>=3.0.1,<4.0.0", "ragbits-core==0.0.8.dev23005", "deepeval>=2.0.0,<3.0.0"]
36
+
37
+ [project.urls]
38
+ "Homepage" = "https://github.com/deepsense-ai/ragbits"
39
+ "Bug Reports" = "https://github.com/deepsense-ai/ragbits/issues"
40
+ "Documentation" = "https://ragbits.deepsense.ai/"
41
+ "Source" = "https://github.com/deepsense-ai/ragbits"
42
+
43
+ [project.optional-dependencies]
44
+ relari = [
45
+ "continuous-eval>=0.3.12,<1.0.0",
46
+ ]
47
+
48
+ [tool.uv]
49
+ dev-dependencies = [
50
+ "pre-commit~=3.8.0",
51
+ "pytest~=8.3.3",
52
+ "pytest-cov~=5.0.0",
53
+ "pytest-asyncio~=0.24.0",
54
+ "pip-licenses>=4.0.0,<5.0.0"
55
+ ]
56
+
57
+ [build-system]
58
+ requires = ["hatchling"]
59
+ build-backend = "hatchling.build"
60
+
61
+ [tool.hatch.metadata]
62
+ allow-direct-references = true
63
+
64
+ [tool.hatch.build.targets.wheel]
65
+ packages = ["src/ragbits"]
66
+
67
+ [tool.pytest.ini_options]
68
+ asyncio_mode = "auto"
@@ -0,0 +1,122 @@
1
+ """Agent simulation utilities for evaluation scenarios.
2
+
3
+ This module uses lazy imports for components that require optional dependencies
4
+ (ragbits-agents, ragbits-chat) to allow importing result models independently.
5
+ """
6
+
7
+ from typing import TYPE_CHECKING
8
+
9
+ # Import context, metrics, and result models eagerly - they have no external dependencies
10
+ # Adapters are re-exported from ragbits.chat.adapters for convenience
11
+ from ragbits.chat.adapters import (
12
+ AdapterContext,
13
+ AdapterPipeline,
14
+ BaseAdapter,
15
+ ChatResponseAdapter,
16
+ FilterAdapter,
17
+ ResponseAdapter,
18
+ TextAccumulatorAdapter,
19
+ ToolCallAccumulatorAdapter,
20
+ ToolResultTextAdapter,
21
+ UsageAggregatorAdapter,
22
+ )
23
+ from ragbits.evaluate.agent_simulation.context import DataSnapshot, DomainContext
24
+ from ragbits.evaluate.agent_simulation.metrics import (
25
+ CompositeMetricCollector,
26
+ LatencyMetricCollector,
27
+ MetricCollector,
28
+ TokenUsageMetricCollector,
29
+ ToolUsageMetricCollector,
30
+ )
31
+ from ragbits.evaluate.agent_simulation.results import (
32
+ ConversationMetrics,
33
+ SimulationResult,
34
+ SimulationStatus,
35
+ TaskResult,
36
+ TurnResult,
37
+ )
38
+
39
+ if TYPE_CHECKING:
40
+ from ragbits.evaluate.agent_simulation.conversation import (
41
+ run_scenario_matrix,
42
+ run_simulation,
43
+ run_simulations_concurrent,
44
+ )
45
+ from ragbits.evaluate.agent_simulation.deepeval_evaluator import DeepEvalEvaluator
46
+ from ragbits.evaluate.agent_simulation.logger import ConversationLogger
47
+ from ragbits.evaluate.agent_simulation.models import Personality, Scenario, Task, Turn
48
+ from ragbits.evaluate.agent_simulation.scenarios import load_personalities, load_scenarios
49
+ from ragbits.evaluate.agent_simulation.simulation import GoalChecker, SimulatedUser
50
+
51
+ __all__ = [
52
+ # Adapters
53
+ "AdapterContext",
54
+ "AdapterPipeline",
55
+ "BaseAdapter",
56
+ "ChatResponseAdapter",
57
+ "FilterAdapter",
58
+ "ResponseAdapter",
59
+ "TextAccumulatorAdapter",
60
+ "ToolCallAccumulatorAdapter",
61
+ "ToolResultTextAdapter",
62
+ "UsageAggregatorAdapter",
63
+ # Metrics
64
+ "CompositeMetricCollector",
65
+ "LatencyMetricCollector",
66
+ "MetricCollector",
67
+ "TokenUsageMetricCollector",
68
+ "ToolUsageMetricCollector",
69
+ # Context
70
+ "DataSnapshot",
71
+ "DomainContext",
72
+ # Results
73
+ "ConversationMetrics",
74
+ "SimulationResult",
75
+ "SimulationStatus",
76
+ "TaskResult",
77
+ "TurnResult",
78
+ # Components (lazy loaded)
79
+ "ConversationLogger",
80
+ "DeepEvalEvaluator",
81
+ "GoalChecker",
82
+ "Personality",
83
+ "Scenario",
84
+ "SimulatedUser",
85
+ "Task",
86
+ "Turn",
87
+ # Functions (lazy loaded)
88
+ "load_personalities",
89
+ "load_scenarios",
90
+ "run_scenario_matrix",
91
+ "run_simulation",
92
+ "run_simulations_concurrent",
93
+ ]
94
+
95
+
96
+ def __getattr__(name: str) -> object:
97
+ """Lazy import for components with optional dependencies."""
98
+ if name in ("run_simulation", "run_simulations_concurrent", "run_scenario_matrix"):
99
+ from ragbits.evaluate.agent_simulation import conversation
100
+
101
+ return getattr(conversation, name)
102
+ if name == "DeepEvalEvaluator":
103
+ from ragbits.evaluate.agent_simulation.deepeval_evaluator import DeepEvalEvaluator
104
+
105
+ return DeepEvalEvaluator
106
+ if name == "ConversationLogger":
107
+ from ragbits.evaluate.agent_simulation.logger import ConversationLogger
108
+
109
+ return ConversationLogger
110
+ if name in ("Personality", "Scenario", "Task", "Turn"):
111
+ from ragbits.evaluate.agent_simulation import models
112
+
113
+ return getattr(models, name)
114
+ if name in ("load_personalities", "load_scenarios"):
115
+ from ragbits.evaluate.agent_simulation import scenarios
116
+
117
+ return getattr(scenarios, name)
118
+ if name in ("GoalChecker", "SimulatedUser"):
119
+ from ragbits.evaluate.agent_simulation import simulation
120
+
121
+ return getattr(simulation, name)
122
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")