levelapp 0.1.4__tar.gz → 0.1.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of levelapp might be problematic. Click here for more details.
- {levelapp-0.1.4 → levelapp-0.1.5}/PKG-INFO +146 -31
- {levelapp-0.1.4 → levelapp-0.1.5}/README.md +145 -30
- {levelapp-0.1.4 → levelapp-0.1.5}/examples/README.md +35 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/aspects/loader.py +4 -4
- levelapp-0.1.5/levelapp/config/api_config.yaml +156 -0
- levelapp-0.1.5/levelapp/config/dashq_api.yaml +94 -0
- levelapp-0.1.5/levelapp/config/endpoint_.py +382 -0
- levelapp-0.1.5/levelapp/config/endpoints.yaml +47 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/core/session.py +8 -0
- levelapp-0.1.5/levelapp/endpoint/client.py +102 -0
- levelapp-0.1.5/levelapp/endpoint/manager.py +114 -0
- levelapp-0.1.5/levelapp/endpoint/parsers.py +120 -0
- levelapp-0.1.5/levelapp/endpoint/schemas.py +38 -0
- levelapp-0.1.5/levelapp/endpoint/tester.py +53 -0
- levelapp-0.1.5/levelapp/endpoint/usage_example.py +39 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/evaluator/evaluator.py +9 -1
- levelapp-0.1.5/levelapp/repository/filesystem.py +203 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/simulator/schemas.py +4 -4
- {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/simulator/simulator.py +57 -43
- levelapp-0.1.5/levelapp/simulator/utils.py +134 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/workflow/base.py +33 -2
- {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/workflow/config.py +6 -2
- {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/workflow/context.py +3 -1
- {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/workflow/runtime.py +3 -3
- {levelapp-0.1.4 → levelapp-0.1.5}/pyproject.toml +67 -67
- {levelapp-0.1.4 → levelapp-0.1.5}/src/data/conversation_example_1.json +4 -4
- levelapp-0.1.5/src/data/workflow_config.yaml +118 -0
- levelapp-0.1.5/src/level_app/main_session.py +142 -0
- levelapp-0.1.5/tests/endpoint/test_client.py +113 -0
- levelapp-0.1.5/tests/endpoint/test_parsers.py +105 -0
- levelapp-0.1.5/tests/repository/test_filesystem.py +102 -0
- levelapp-0.1.5/tests/test_session.py +0 -0
- levelapp-0.1.5/tests/test_simulator.py +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/uv.lock +752 -752
- levelapp-0.1.4/levelapp/config/endpoint_.py +0 -62
- levelapp-0.1.4/levelapp/simulator/utils.py +0 -257
- levelapp-0.1.4/src/data/workflow_config.yaml +0 -41
- levelapp-0.1.4/src/level_app/main_session.py +0 -73
- {levelapp-0.1.4 → levelapp-0.1.5}/.gitignore +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/.python-version +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/LICENSE +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/MANIFEST.in +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/Makefile +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/docs/media/simulator-module-diagram.PNG +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/docs/media/simulator-sequence-diagram.png +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/examples/conversation_script.json +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/examples/example_chatbot.py +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/examples/example_evaluation.py +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/examples/workflow_configuration.yaml +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/__init__.py +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/aspects/__init__.py +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/aspects/logger.py +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/aspects/monitor.py +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/aspects/sanitizer.py +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/clients/__init__.py +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/clients/anthropic.py +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/clients/ionos.py +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/clients/mistral.py +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/clients/openai.py +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/comparator/__init__.py +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/comparator/comparator.py +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/comparator/extractor.py +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/comparator/schemas.py +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/comparator/scorer.py +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/comparator/utils.py +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/config/__init__.py +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/config/endpoint.py +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/config/prompts.py +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/core/__init__.py +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/core/base.py +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/core/schemas.py +0 -0
- {levelapp-0.1.4/levelapp/plugins → levelapp-0.1.5/levelapp/endpoint}/__init__.py +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/evaluator/__init__.py +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/metrics/__init__.py +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/metrics/embedding.py +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/metrics/exact.py +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/metrics/fuzzy.py +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/metrics/token.py +0 -0
- {levelapp-0.1.4/tests → levelapp-0.1.5/levelapp/plugins}/__init__.py +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/repository/__init__.py +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/repository/firestore.py +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/simulator/__init__.py +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/workflow/__init__.py +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/workflow/factory.py +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/workflow/registration.py +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/make.bat +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/project_structure.txt +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/src/data/endpoint_configuration.yaml +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/src/data/evaluation_results.json +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/src/data/payload_example_1.yaml +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/src/data/payload_example_2.yaml +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/src/data/workflow_config_2.json +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/src/level_app/__init__.py +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/src/level_app/main.py +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/src/level_app/main_monitoring.py +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/src/level_app/main_simulator.py +0 -0
- /levelapp-0.1.4/tests/test_comparator.py → /levelapp-0.1.5/tests/__init__.py +0 -0
- /levelapp-0.1.4/tests/test_session.py → /levelapp-0.1.5/tests/repository/__init__.py +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/tests/test_anthropic.py +0 -0
- /levelapp-0.1.4/tests/test_simulator.py → /levelapp-0.1.5/tests/test_comparator.py +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/tests/test_ionos.py +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/tests/test_mistral.py +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/tests/test_monitoring.py +0 -0
- {levelapp-0.1.4 → levelapp-0.1.5}/tests/test_openai.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: levelapp
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.5
|
|
4
4
|
Summary: LevelApp is an evaluation framework for AI/LLM-based software application. [Powered by Norma]
|
|
5
5
|
Project-URL: Homepage, https://github.com/levelapp-org
|
|
6
6
|
Project-URL: Repository, https://github.com/levelapp-org/levelapp-framework
|
|
@@ -114,23 +114,54 @@ evaluation:
|
|
|
114
114
|
field_2 : LEVENSHTEIN
|
|
115
115
|
|
|
116
116
|
reference_data:
|
|
117
|
-
path:
|
|
117
|
+
path: "../data/conversation_example_1.json"
|
|
118
118
|
data:
|
|
119
119
|
|
|
120
120
|
endpoint:
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
121
|
+
name: conversational-agent
|
|
122
|
+
base_url: http://127.0.0.1:8000
|
|
123
|
+
path: /v1/chat
|
|
124
|
+
method: POST
|
|
125
|
+
timeout: 60
|
|
126
|
+
retry_count: 3
|
|
127
|
+
retry_backoff: 0.5
|
|
128
|
+
headers:
|
|
129
|
+
- name: model_id
|
|
130
|
+
value: meta-llama/Meta-Llama-3-8B-Instruct
|
|
131
|
+
secure: false
|
|
132
|
+
- name: x-api-key
|
|
133
|
+
value: API_KEY # Load from .env file using python-dotenv.
|
|
134
|
+
secure: true
|
|
135
|
+
- name: Content-Type
|
|
136
|
+
value: application/json
|
|
137
|
+
secure: false
|
|
138
|
+
request_schema:
|
|
139
|
+
# Static field to be included in every request.
|
|
140
|
+
- field_path: message.source
|
|
141
|
+
value: system
|
|
142
|
+
value_type: static
|
|
143
|
+
required: true
|
|
144
|
+
|
|
145
|
+
# Dynamic field to be populated from runtime context.
|
|
146
|
+
- field_path: message.text
|
|
147
|
+
value: message_text # the key from the runtime context.
|
|
148
|
+
value_type: dynamic
|
|
149
|
+
required: true
|
|
150
|
+
|
|
151
|
+
# Env-based field (from OS environment variables).
|
|
152
|
+
- field_path: metadata.env
|
|
153
|
+
value: ENV_VAR_NAME
|
|
154
|
+
value_type: env
|
|
155
|
+
required: false
|
|
156
|
+
|
|
157
|
+
response_mapping:
|
|
158
|
+
# Map the response fields that will be extracted.
|
|
159
|
+
- field_path: reply.text
|
|
160
|
+
extract_as: agent_reply # The simulator requires this key: 'agent_reply'.
|
|
161
|
+
- field_path: reply.metadata
|
|
162
|
+
extract_as: generated_metadata # The simulator requires this key: 'generated_metadata'.
|
|
163
|
+
- field_path: reply.guardrail_flag
|
|
164
|
+
extract_as: guardrail_flag # The simulator requires this key: 'guardrail_flag'.
|
|
134
165
|
|
|
135
166
|
repository:
|
|
136
167
|
type: FIRESTORE # Pick one of the following: FIRESTORE, FILESYSTEM
|
|
@@ -138,8 +169,8 @@ repository:
|
|
|
138
169
|
database_name: ""
|
|
139
170
|
```
|
|
140
171
|
|
|
141
|
-
- **Endpoint Configuration**: Define how to interact with your LLM-based system (base URL,
|
|
142
|
-
- **Placeholders**: For
|
|
172
|
+
- **Endpoint Configuration**: Define how to interact with your LLM-based system (base URL, headers, request/response payload schema).
|
|
173
|
+
- **Placeholders**: For dynamic request schema fields, use the values ('value') to dynamically populate these fields during runtime (e.g., `context = {'message_text': "Hello, world!"}`).
|
|
143
174
|
- **Secrets**: Store API keys in `.env` and load via `python-dotenv` (e.g., `API_KEY=your_key_here`).
|
|
144
175
|
|
|
145
176
|
For conversation scripts (used in Simulator), provide a JSON file with this schema:
|
|
@@ -154,16 +185,14 @@ For conversation scripts (used in Simulator), provide a JSON file with this sche
|
|
|
154
185
|
"reference_reply": "Sure, I can help with that. Could you please specify the type of doctor you need to see?",
|
|
155
186
|
"interaction_type": "initial",
|
|
156
187
|
"reference_metadata": {},
|
|
157
|
-
"guardrail_flag": false
|
|
158
|
-
"request_payload": {"user_id": "0001", "user_role": "ADMIN"}
|
|
188
|
+
"guardrail_flag": false
|
|
159
189
|
},
|
|
160
190
|
{
|
|
161
191
|
"user_message": "I need to see a cardiologist.",
|
|
162
192
|
"reference_reply": "When would you like to schedule your appointment?",
|
|
163
193
|
"interaction_type": "intermediate",
|
|
164
194
|
"reference_metadata": {},
|
|
165
|
-
"guardrail_flag": false
|
|
166
|
-
"request_payload": {"user_id": "0001", "user_role": "ADMIN"}
|
|
195
|
+
"guardrail_flag": false
|
|
167
196
|
},
|
|
168
197
|
{
|
|
169
198
|
"user_message": "I would like to book it for next Monday morning.",
|
|
@@ -174,8 +203,7 @@ For conversation scripts (used in Simulator), provide a JSON file with this sche
|
|
|
174
203
|
"date": "next Monday",
|
|
175
204
|
"time": "10 AM"
|
|
176
205
|
},
|
|
177
|
-
"guardrail_flag": false
|
|
178
|
-
"request_payload": {"user_id": "0001", "user_role": "ADMIN"}
|
|
206
|
+
"guardrail_flag": false
|
|
179
207
|
},
|
|
180
208
|
{
|
|
181
209
|
"id": "f4f2dd35-71d7-4b75-ba2b-93a4f546004a",
|
|
@@ -183,8 +211,7 @@ For conversation scripts (used in Simulator), provide a JSON file with this sche
|
|
|
183
211
|
"reference_reply": "Your appointment with the cardiologist is booked for 10 AM next Monday. Is there anything else I can help you with?",
|
|
184
212
|
"interaction_type": "final",
|
|
185
213
|
"reference_metadata": {},
|
|
186
|
-
"guardrail_flag": false
|
|
187
|
-
"request_payload": {"user_id": "0001", "user_role": "ADMIN"}
|
|
214
|
+
"guardrail_flag": false
|
|
188
215
|
}
|
|
189
216
|
],
|
|
190
217
|
"description": "A conversation about booking a doctor appointment.",
|
|
@@ -245,11 +272,90 @@ if __name__ == "__main__":
|
|
|
245
272
|
|
|
246
273
|
|
|
247
274
|
config_dict = {
|
|
248
|
-
"process": {
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
275
|
+
"process": {
|
|
276
|
+
"project_name": "test-project",
|
|
277
|
+
"workflow_type": "SIMULATOR", # Pick one of the following workflows: SIMULATOR, COMPARATOR, ASSESSOR.
|
|
278
|
+
"evaluation_params": {
|
|
279
|
+
"attempts": 1, # Add the number of simulation attempts.
|
|
280
|
+
}
|
|
281
|
+
},
|
|
282
|
+
"evaluation": {
|
|
283
|
+
"evaluators": ["JUDGE", "REFERENCE"], # Select from the following: JUDGE, REFERENCE, RAG.
|
|
284
|
+
"providers": ["openai", "ionos"],
|
|
285
|
+
"metrics_map": {
|
|
286
|
+
"field_1": "EXACT",
|
|
287
|
+
"field_2": "LEVENSHTEIN"
|
|
288
|
+
}
|
|
289
|
+
},
|
|
290
|
+
"reference_data": {
|
|
291
|
+
"path": "../data/conversation_example_1.json",
|
|
292
|
+
"data": None
|
|
293
|
+
},
|
|
294
|
+
"endpoint": {
|
|
295
|
+
"name": "conversational-agent",
|
|
296
|
+
"base_url": "http://127.0.0.1:8000",
|
|
297
|
+
"path": "/v1/chat",
|
|
298
|
+
"method": "POST",
|
|
299
|
+
"timeout": 60,
|
|
300
|
+
"retry_count": 3,
|
|
301
|
+
"retry_backoff": 0.5,
|
|
302
|
+
"headers": [
|
|
303
|
+
{
|
|
304
|
+
"name": "model_id",
|
|
305
|
+
"value": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
|
306
|
+
"secure": False
|
|
307
|
+
},
|
|
308
|
+
{
|
|
309
|
+
"name": "x-api-key",
|
|
310
|
+
"value": "API_KEY", # Load from .env file using python-dotenv.
|
|
311
|
+
"secure": True
|
|
312
|
+
},
|
|
313
|
+
{
|
|
314
|
+
"name": "Content-Type",
|
|
315
|
+
"value": "application/json",
|
|
316
|
+
"secure": False
|
|
317
|
+
}
|
|
318
|
+
],
|
|
319
|
+
"request_schema": [
|
|
320
|
+
{
|
|
321
|
+
"field_path": "message.source",
|
|
322
|
+
"value": "system",
|
|
323
|
+
"value_type": "static",
|
|
324
|
+
"required": True
|
|
325
|
+
},
|
|
326
|
+
{
|
|
327
|
+
"field_path": "message.text",
|
|
328
|
+
"value": "message_text", # the key from the runtime context.
|
|
329
|
+
"value_type": "dynamic",
|
|
330
|
+
"required": True
|
|
331
|
+
},
|
|
332
|
+
{
|
|
333
|
+
"field_path": "metadata.env",
|
|
334
|
+
"value": "ENV_VAR_NAME",
|
|
335
|
+
"value_type": "env",
|
|
336
|
+
"required": False
|
|
337
|
+
}
|
|
338
|
+
],
|
|
339
|
+
"response_mapping": [
|
|
340
|
+
{
|
|
341
|
+
"field_path": "reply.text",
|
|
342
|
+
"extract_as": "agent_reply" # Remember that the simulator requires this key: 'agent_reply'.
|
|
343
|
+
},
|
|
344
|
+
{
|
|
345
|
+
"field_path": "reply.metadata",
|
|
346
|
+
"extract_as": "agent_reply" # Remember that the simulator requires this key: 'agent_reply'.
|
|
347
|
+
},
|
|
348
|
+
{
|
|
349
|
+
"field_path": "reply.guardrail_flag",
|
|
350
|
+
"extract_as": "metadata" # Remember that the simulator requires this key: 'agent_reply'.
|
|
351
|
+
}
|
|
352
|
+
]
|
|
353
|
+
},
|
|
354
|
+
"repository": {
|
|
355
|
+
"type": "FIRESTORE", # Pick one of the following: FIRESTORE, FILESYSTEM
|
|
356
|
+
"project_id": "(default)",
|
|
357
|
+
"database_name": ""
|
|
358
|
+
}
|
|
253
359
|
}
|
|
254
360
|
|
|
255
361
|
content = {
|
|
@@ -275,9 +381,18 @@ if __name__ == "__main__":
|
|
|
275
381
|
# Load reference data from dict variable
|
|
276
382
|
config.set_reference_data(content=content)
|
|
277
383
|
|
|
278
|
-
evaluation_session = EvaluationSession(
|
|
384
|
+
evaluation_session = EvaluationSession(
|
|
385
|
+
session_name="test-session",
|
|
386
|
+
workflow_config=config,
|
|
387
|
+
enable_monitoring=True # To disable the monitoring aspect, set this to False.
|
|
388
|
+
)
|
|
279
389
|
|
|
280
390
|
with evaluation_session as session:
|
|
391
|
+
# Optional: Run connectivity test before the full evaluation
|
|
392
|
+
test_results = session.run_connectivity_test(
|
|
393
|
+
context={"user_message": "I want to book an appointment with a dentist."}
|
|
394
|
+
)
|
|
395
|
+
print(f"Connectivity Test Results:\n{test_results}\n---")
|
|
281
396
|
session.run()
|
|
282
397
|
results = session.workflow.collect_results()
|
|
283
398
|
print("Results:", results)
|
|
@@ -62,23 +62,54 @@ evaluation:
|
|
|
62
62
|
field_2 : LEVENSHTEIN
|
|
63
63
|
|
|
64
64
|
reference_data:
|
|
65
|
-
path:
|
|
65
|
+
path: "../data/conversation_example_1.json"
|
|
66
66
|
data:
|
|
67
67
|
|
|
68
68
|
endpoint:
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
69
|
+
name: conversational-agent
|
|
70
|
+
base_url: http://127.0.0.1:8000
|
|
71
|
+
path: /v1/chat
|
|
72
|
+
method: POST
|
|
73
|
+
timeout: 60
|
|
74
|
+
retry_count: 3
|
|
75
|
+
retry_backoff: 0.5
|
|
76
|
+
headers:
|
|
77
|
+
- name: model_id
|
|
78
|
+
value: meta-llama/Meta-Llama-3-8B-Instruct
|
|
79
|
+
secure: false
|
|
80
|
+
- name: x-api-key
|
|
81
|
+
value: API_KEY # Load from .env file using python-dotenv.
|
|
82
|
+
secure: true
|
|
83
|
+
- name: Content-Type
|
|
84
|
+
value: application/json
|
|
85
|
+
secure: false
|
|
86
|
+
request_schema:
|
|
87
|
+
# Static field to be included in every request.
|
|
88
|
+
- field_path: message.source
|
|
89
|
+
value: system
|
|
90
|
+
value_type: static
|
|
91
|
+
required: true
|
|
92
|
+
|
|
93
|
+
# Dynamic field to be populated from runtime context.
|
|
94
|
+
- field_path: message.text
|
|
95
|
+
value: message_text # the key from the runtime context.
|
|
96
|
+
value_type: dynamic
|
|
97
|
+
required: true
|
|
98
|
+
|
|
99
|
+
# Env-based field (from OS environment variables).
|
|
100
|
+
- field_path: metadata.env
|
|
101
|
+
value: ENV_VAR_NAME
|
|
102
|
+
value_type: env
|
|
103
|
+
required: false
|
|
104
|
+
|
|
105
|
+
response_mapping:
|
|
106
|
+
# Map the response fields that will be extracted.
|
|
107
|
+
- field_path: reply.text
|
|
108
|
+
extract_as: agent_reply # The simulator requires this key: 'agent_reply'.
|
|
109
|
+
- field_path: reply.metadata
|
|
110
|
+
extract_as: generated_metadata # The simulator requires this key: 'generated_metadata'.
|
|
111
|
+
- field_path: reply.guardrail_flag
|
|
112
|
+
extract_as: guardrail_flag # The simulator requires this key: 'guardrail_flag'.
|
|
82
113
|
|
|
83
114
|
repository:
|
|
84
115
|
type: FIRESTORE # Pick one of the following: FIRESTORE, FILESYSTEM
|
|
@@ -86,8 +117,8 @@ repository:
|
|
|
86
117
|
database_name: ""
|
|
87
118
|
```
|
|
88
119
|
|
|
89
|
-
- **Endpoint Configuration**: Define how to interact with your LLM-based system (base URL,
|
|
90
|
-
- **Placeholders**: For
|
|
120
|
+
- **Endpoint Configuration**: Define how to interact with your LLM-based system (base URL, headers, request/response payload schema).
|
|
121
|
+
- **Placeholders**: For dynamic request schema fields, use the values ('value') to dynamically populate these fields during runtime (e.g., `context = {'message_text': "Hello, world!"}`).
|
|
91
122
|
- **Secrets**: Store API keys in `.env` and load via `python-dotenv` (e.g., `API_KEY=your_key_here`).
|
|
92
123
|
|
|
93
124
|
For conversation scripts (used in Simulator), provide a JSON file with this schema:
|
|
@@ -102,16 +133,14 @@ For conversation scripts (used in Simulator), provide a JSON file with this sche
|
|
|
102
133
|
"reference_reply": "Sure, I can help with that. Could you please specify the type of doctor you need to see?",
|
|
103
134
|
"interaction_type": "initial",
|
|
104
135
|
"reference_metadata": {},
|
|
105
|
-
"guardrail_flag": false
|
|
106
|
-
"request_payload": {"user_id": "0001", "user_role": "ADMIN"}
|
|
136
|
+
"guardrail_flag": false
|
|
107
137
|
},
|
|
108
138
|
{
|
|
109
139
|
"user_message": "I need to see a cardiologist.",
|
|
110
140
|
"reference_reply": "When would you like to schedule your appointment?",
|
|
111
141
|
"interaction_type": "intermediate",
|
|
112
142
|
"reference_metadata": {},
|
|
113
|
-
"guardrail_flag": false
|
|
114
|
-
"request_payload": {"user_id": "0001", "user_role": "ADMIN"}
|
|
143
|
+
"guardrail_flag": false
|
|
115
144
|
},
|
|
116
145
|
{
|
|
117
146
|
"user_message": "I would like to book it for next Monday morning.",
|
|
@@ -122,8 +151,7 @@ For conversation scripts (used in Simulator), provide a JSON file with this sche
|
|
|
122
151
|
"date": "next Monday",
|
|
123
152
|
"time": "10 AM"
|
|
124
153
|
},
|
|
125
|
-
"guardrail_flag": false
|
|
126
|
-
"request_payload": {"user_id": "0001", "user_role": "ADMIN"}
|
|
154
|
+
"guardrail_flag": false
|
|
127
155
|
},
|
|
128
156
|
{
|
|
129
157
|
"id": "f4f2dd35-71d7-4b75-ba2b-93a4f546004a",
|
|
@@ -131,8 +159,7 @@ For conversation scripts (used in Simulator), provide a JSON file with this sche
|
|
|
131
159
|
"reference_reply": "Your appointment with the cardiologist is booked for 10 AM next Monday. Is there anything else I can help you with?",
|
|
132
160
|
"interaction_type": "final",
|
|
133
161
|
"reference_metadata": {},
|
|
134
|
-
"guardrail_flag": false
|
|
135
|
-
"request_payload": {"user_id": "0001", "user_role": "ADMIN"}
|
|
162
|
+
"guardrail_flag": false
|
|
136
163
|
}
|
|
137
164
|
],
|
|
138
165
|
"description": "A conversation about booking a doctor appointment.",
|
|
@@ -193,11 +220,90 @@ if __name__ == "__main__":
|
|
|
193
220
|
|
|
194
221
|
|
|
195
222
|
config_dict = {
|
|
196
|
-
"process": {
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
223
|
+
"process": {
|
|
224
|
+
"project_name": "test-project",
|
|
225
|
+
"workflow_type": "SIMULATOR", # Pick one of the following workflows: SIMULATOR, COMPARATOR, ASSESSOR.
|
|
226
|
+
"evaluation_params": {
|
|
227
|
+
"attempts": 1, # Add the number of simulation attempts.
|
|
228
|
+
}
|
|
229
|
+
},
|
|
230
|
+
"evaluation": {
|
|
231
|
+
"evaluators": ["JUDGE", "REFERENCE"], # Select from the following: JUDGE, REFERENCE, RAG.
|
|
232
|
+
"providers": ["openai", "ionos"],
|
|
233
|
+
"metrics_map": {
|
|
234
|
+
"field_1": "EXACT",
|
|
235
|
+
"field_2": "LEVENSHTEIN"
|
|
236
|
+
}
|
|
237
|
+
},
|
|
238
|
+
"reference_data": {
|
|
239
|
+
"path": "../data/conversation_example_1.json",
|
|
240
|
+
"data": None
|
|
241
|
+
},
|
|
242
|
+
"endpoint": {
|
|
243
|
+
"name": "conversational-agent",
|
|
244
|
+
"base_url": "http://127.0.0.1:8000",
|
|
245
|
+
"path": "/v1/chat",
|
|
246
|
+
"method": "POST",
|
|
247
|
+
"timeout": 60,
|
|
248
|
+
"retry_count": 3,
|
|
249
|
+
"retry_backoff": 0.5,
|
|
250
|
+
"headers": [
|
|
251
|
+
{
|
|
252
|
+
"name": "model_id",
|
|
253
|
+
"value": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
|
254
|
+
"secure": False
|
|
255
|
+
},
|
|
256
|
+
{
|
|
257
|
+
"name": "x-api-key",
|
|
258
|
+
"value": "API_KEY", # Load from .env file using python-dotenv.
|
|
259
|
+
"secure": True
|
|
260
|
+
},
|
|
261
|
+
{
|
|
262
|
+
"name": "Content-Type",
|
|
263
|
+
"value": "application/json",
|
|
264
|
+
"secure": False
|
|
265
|
+
}
|
|
266
|
+
],
|
|
267
|
+
"request_schema": [
|
|
268
|
+
{
|
|
269
|
+
"field_path": "message.source",
|
|
270
|
+
"value": "system",
|
|
271
|
+
"value_type": "static",
|
|
272
|
+
"required": True
|
|
273
|
+
},
|
|
274
|
+
{
|
|
275
|
+
"field_path": "message.text",
|
|
276
|
+
"value": "message_text", # the key from the runtime context.
|
|
277
|
+
"value_type": "dynamic",
|
|
278
|
+
"required": True
|
|
279
|
+
},
|
|
280
|
+
{
|
|
281
|
+
"field_path": "metadata.env",
|
|
282
|
+
"value": "ENV_VAR_NAME",
|
|
283
|
+
"value_type": "env",
|
|
284
|
+
"required": False
|
|
285
|
+
}
|
|
286
|
+
],
|
|
287
|
+
"response_mapping": [
|
|
288
|
+
{
|
|
289
|
+
"field_path": "reply.text",
|
|
290
|
+
"extract_as": "agent_reply" # Remember that the simulator requires this key: 'agent_reply'.
|
|
291
|
+
},
|
|
292
|
+
{
|
|
293
|
+
"field_path": "reply.metadata",
|
|
294
|
+
"extract_as": "agent_reply" # Remember that the simulator requires this key: 'agent_reply'.
|
|
295
|
+
},
|
|
296
|
+
{
|
|
297
|
+
"field_path": "reply.guardrail_flag",
|
|
298
|
+
"extract_as": "metadata" # Remember that the simulator requires this key: 'agent_reply'.
|
|
299
|
+
}
|
|
300
|
+
]
|
|
301
|
+
},
|
|
302
|
+
"repository": {
|
|
303
|
+
"type": "FIRESTORE", # Pick one of the following: FIRESTORE, FILESYSTEM
|
|
304
|
+
"project_id": "(default)",
|
|
305
|
+
"database_name": ""
|
|
306
|
+
}
|
|
201
307
|
}
|
|
202
308
|
|
|
203
309
|
content = {
|
|
@@ -223,9 +329,18 @@ if __name__ == "__main__":
|
|
|
223
329
|
# Load reference data from dict variable
|
|
224
330
|
config.set_reference_data(content=content)
|
|
225
331
|
|
|
226
|
-
evaluation_session = EvaluationSession(
|
|
332
|
+
evaluation_session = EvaluationSession(
|
|
333
|
+
session_name="test-session",
|
|
334
|
+
workflow_config=config,
|
|
335
|
+
enable_monitoring=True # To disable the monitoring aspect, set this to False.
|
|
336
|
+
)
|
|
227
337
|
|
|
228
338
|
with evaluation_session as session:
|
|
339
|
+
# Optional: Run connectivity test before the full evaluation
|
|
340
|
+
test_results = session.run_connectivity_test(
|
|
341
|
+
context={"user_message": "I want to book an appointment with a dentist."}
|
|
342
|
+
)
|
|
343
|
+
print(f"Connectivity Test Results:\n{test_results}\n---")
|
|
229
344
|
session.run()
|
|
230
345
|
results = session.workflow.collect_results()
|
|
231
346
|
print("Results:", results)
|
|
@@ -138,6 +138,41 @@ endpoint:
|
|
|
138
138
|
agent_reply: "${generated_reply}" # Map to your response field
|
|
139
139
|
generated_metadata: "${metadata}" # e.g., extracted entities
|
|
140
140
|
|
|
141
|
+
endpoint:
|
|
142
|
+
name: conversational-agent
|
|
143
|
+
base_url: "http://127.0.0.1:8000" # Your chatbot's API base URL
|
|
144
|
+
path: "/chat" # Endpoint path (full URL = base_url + url_path)
|
|
145
|
+
method: POST # HTTP method
|
|
146
|
+
timeout: 60 # Timeout in seconds
|
|
147
|
+
retry_count: 3 # Number of retries on failure
|
|
148
|
+
retry_backoff: 0.5 # Backoff factor for retries
|
|
149
|
+
# Optional authentication headers
|
|
150
|
+
headers:
|
|
151
|
+
- name: model_id
|
|
152
|
+
value: meta-llama/Meta-Llama-3-8B-Instruct
|
|
153
|
+
secure: false
|
|
154
|
+
- name: x-api-key
|
|
155
|
+
value: API_KEY # Load from .env file using python-dotenv.
|
|
156
|
+
secure: true
|
|
157
|
+
- name: Content-Type
|
|
158
|
+
value: application/json
|
|
159
|
+
secure: false
|
|
160
|
+
request_schema:
|
|
161
|
+
# Static field to be included in every request.
|
|
162
|
+
- field_path: message
|
|
163
|
+
value: system
|
|
164
|
+
value_type: dynamic
|
|
165
|
+
required: true
|
|
166
|
+
|
|
167
|
+
response_mapping:
|
|
168
|
+
# Map the response fields that will be extracted.
|
|
169
|
+
- field_path: reply
|
|
170
|
+
extract_as: agent_reply # The simulator requires this key: 'agent_reply'.
|
|
171
|
+
- field_path: reply.metadata
|
|
172
|
+
extract_as: generated_metadata # The simulator requires this key: 'generated_metadata'.
|
|
173
|
+
- field_path: reply.guardrail_flag
|
|
174
|
+
extract_as: guardrail_flag # The simulator requires this key: 'guardrail_flag'.
|
|
175
|
+
|
|
141
176
|
# REPOSITORY SECTION (Optional):
|
|
142
177
|
repository:
|
|
143
178
|
type: FILESYSTEM # Or FIRESTORE/MONGODB for persistence
|
|
@@ -111,7 +111,7 @@ class DynamicModelBuilder:
|
|
|
111
111
|
"""
|
|
112
112
|
if isinstance(value, Mapping):
|
|
113
113
|
nested_model = self.create_dynamic_model(model_name=f"{model_name}_{key}", data=value)
|
|
114
|
-
return nested_model,
|
|
114
|
+
return Optional[nested_model], None
|
|
115
115
|
|
|
116
116
|
elif isinstance(value, Sequence) and not isinstance(value, (str, bytes)):
|
|
117
117
|
if not value:
|
|
@@ -119,15 +119,15 @@ class DynamicModelBuilder:
|
|
|
119
119
|
|
|
120
120
|
elif isinstance(value[0], Mapping):
|
|
121
121
|
nested_model = self.create_dynamic_model(model_name=f"{model_name}_{key}", data=value[0])
|
|
122
|
-
return List[nested_model],
|
|
122
|
+
return Optional[List[nested_model]], None
|
|
123
123
|
|
|
124
124
|
else:
|
|
125
125
|
field_type = type(value[0]) if value[0] is not None else Any
|
|
126
|
-
return List[field_type],
|
|
126
|
+
return Optional[List[field_type]], None
|
|
127
127
|
|
|
128
128
|
else:
|
|
129
129
|
field_type = Optional[type(value)] if value is not None else Optional[Any]
|
|
130
|
-
return field_type,
|
|
130
|
+
return field_type, None
|
|
131
131
|
|
|
132
132
|
def create_dynamic_model(self, model_name: str, data: Any) -> Type[BaseModel]:
|
|
133
133
|
"""
|