levelapp 0.1.4__tar.gz → 0.1.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of levelapp might be problematic. Click here for more details.

Files changed (104) hide show
  1. {levelapp-0.1.4 → levelapp-0.1.5}/PKG-INFO +146 -31
  2. {levelapp-0.1.4 → levelapp-0.1.5}/README.md +145 -30
  3. {levelapp-0.1.4 → levelapp-0.1.5}/examples/README.md +35 -0
  4. {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/aspects/loader.py +4 -4
  5. levelapp-0.1.5/levelapp/config/api_config.yaml +156 -0
  6. levelapp-0.1.5/levelapp/config/dashq_api.yaml +94 -0
  7. levelapp-0.1.5/levelapp/config/endpoint_.py +382 -0
  8. levelapp-0.1.5/levelapp/config/endpoints.yaml +47 -0
  9. {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/core/session.py +8 -0
  10. levelapp-0.1.5/levelapp/endpoint/client.py +102 -0
  11. levelapp-0.1.5/levelapp/endpoint/manager.py +114 -0
  12. levelapp-0.1.5/levelapp/endpoint/parsers.py +120 -0
  13. levelapp-0.1.5/levelapp/endpoint/schemas.py +38 -0
  14. levelapp-0.1.5/levelapp/endpoint/tester.py +53 -0
  15. levelapp-0.1.5/levelapp/endpoint/usage_example.py +39 -0
  16. {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/evaluator/evaluator.py +9 -1
  17. levelapp-0.1.5/levelapp/repository/filesystem.py +203 -0
  18. {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/simulator/schemas.py +4 -4
  19. {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/simulator/simulator.py +57 -43
  20. levelapp-0.1.5/levelapp/simulator/utils.py +134 -0
  21. {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/workflow/base.py +33 -2
  22. {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/workflow/config.py +6 -2
  23. {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/workflow/context.py +3 -1
  24. {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/workflow/runtime.py +3 -3
  25. {levelapp-0.1.4 → levelapp-0.1.5}/pyproject.toml +67 -67
  26. {levelapp-0.1.4 → levelapp-0.1.5}/src/data/conversation_example_1.json +4 -4
  27. levelapp-0.1.5/src/data/workflow_config.yaml +118 -0
  28. levelapp-0.1.5/src/level_app/main_session.py +142 -0
  29. levelapp-0.1.5/tests/endpoint/test_client.py +113 -0
  30. levelapp-0.1.5/tests/endpoint/test_parsers.py +105 -0
  31. levelapp-0.1.5/tests/repository/test_filesystem.py +102 -0
  32. levelapp-0.1.5/tests/test_session.py +0 -0
  33. levelapp-0.1.5/tests/test_simulator.py +0 -0
  34. {levelapp-0.1.4 → levelapp-0.1.5}/uv.lock +752 -752
  35. levelapp-0.1.4/levelapp/config/endpoint_.py +0 -62
  36. levelapp-0.1.4/levelapp/simulator/utils.py +0 -257
  37. levelapp-0.1.4/src/data/workflow_config.yaml +0 -41
  38. levelapp-0.1.4/src/level_app/main_session.py +0 -73
  39. {levelapp-0.1.4 → levelapp-0.1.5}/.gitignore +0 -0
  40. {levelapp-0.1.4 → levelapp-0.1.5}/.python-version +0 -0
  41. {levelapp-0.1.4 → levelapp-0.1.5}/LICENSE +0 -0
  42. {levelapp-0.1.4 → levelapp-0.1.5}/MANIFEST.in +0 -0
  43. {levelapp-0.1.4 → levelapp-0.1.5}/Makefile +0 -0
  44. {levelapp-0.1.4 → levelapp-0.1.5}/docs/media/simulator-module-diagram.PNG +0 -0
  45. {levelapp-0.1.4 → levelapp-0.1.5}/docs/media/simulator-sequence-diagram.png +0 -0
  46. {levelapp-0.1.4 → levelapp-0.1.5}/examples/conversation_script.json +0 -0
  47. {levelapp-0.1.4 → levelapp-0.1.5}/examples/example_chatbot.py +0 -0
  48. {levelapp-0.1.4 → levelapp-0.1.5}/examples/example_evaluation.py +0 -0
  49. {levelapp-0.1.4 → levelapp-0.1.5}/examples/workflow_configuration.yaml +0 -0
  50. {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/__init__.py +0 -0
  51. {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/aspects/__init__.py +0 -0
  52. {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/aspects/logger.py +0 -0
  53. {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/aspects/monitor.py +0 -0
  54. {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/aspects/sanitizer.py +0 -0
  55. {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/clients/__init__.py +0 -0
  56. {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/clients/anthropic.py +0 -0
  57. {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/clients/ionos.py +0 -0
  58. {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/clients/mistral.py +0 -0
  59. {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/clients/openai.py +0 -0
  60. {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/comparator/__init__.py +0 -0
  61. {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/comparator/comparator.py +0 -0
  62. {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/comparator/extractor.py +0 -0
  63. {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/comparator/schemas.py +0 -0
  64. {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/comparator/scorer.py +0 -0
  65. {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/comparator/utils.py +0 -0
  66. {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/config/__init__.py +0 -0
  67. {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/config/endpoint.py +0 -0
  68. {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/config/prompts.py +0 -0
  69. {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/core/__init__.py +0 -0
  70. {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/core/base.py +0 -0
  71. {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/core/schemas.py +0 -0
  72. {levelapp-0.1.4/levelapp/plugins → levelapp-0.1.5/levelapp/endpoint}/__init__.py +0 -0
  73. {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/evaluator/__init__.py +0 -0
  74. {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/metrics/__init__.py +0 -0
  75. {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/metrics/embedding.py +0 -0
  76. {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/metrics/exact.py +0 -0
  77. {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/metrics/fuzzy.py +0 -0
  78. {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/metrics/token.py +0 -0
  79. {levelapp-0.1.4/tests → levelapp-0.1.5/levelapp/plugins}/__init__.py +0 -0
  80. {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/repository/__init__.py +0 -0
  81. {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/repository/firestore.py +0 -0
  82. {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/simulator/__init__.py +0 -0
  83. {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/workflow/__init__.py +0 -0
  84. {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/workflow/factory.py +0 -0
  85. {levelapp-0.1.4 → levelapp-0.1.5}/levelapp/workflow/registration.py +0 -0
  86. {levelapp-0.1.4 → levelapp-0.1.5}/make.bat +0 -0
  87. {levelapp-0.1.4 → levelapp-0.1.5}/project_structure.txt +0 -0
  88. {levelapp-0.1.4 → levelapp-0.1.5}/src/data/endpoint_configuration.yaml +0 -0
  89. {levelapp-0.1.4 → levelapp-0.1.5}/src/data/evaluation_results.json +0 -0
  90. {levelapp-0.1.4 → levelapp-0.1.5}/src/data/payload_example_1.yaml +0 -0
  91. {levelapp-0.1.4 → levelapp-0.1.5}/src/data/payload_example_2.yaml +0 -0
  92. {levelapp-0.1.4 → levelapp-0.1.5}/src/data/workflow_config_2.json +0 -0
  93. {levelapp-0.1.4 → levelapp-0.1.5}/src/level_app/__init__.py +0 -0
  94. {levelapp-0.1.4 → levelapp-0.1.5}/src/level_app/main.py +0 -0
  95. {levelapp-0.1.4 → levelapp-0.1.5}/src/level_app/main_monitoring.py +0 -0
  96. {levelapp-0.1.4 → levelapp-0.1.5}/src/level_app/main_simulator.py +0 -0
  97. /levelapp-0.1.4/tests/test_comparator.py → /levelapp-0.1.5/tests/__init__.py +0 -0
  98. /levelapp-0.1.4/tests/test_session.py → /levelapp-0.1.5/tests/repository/__init__.py +0 -0
  99. {levelapp-0.1.4 → levelapp-0.1.5}/tests/test_anthropic.py +0 -0
  100. /levelapp-0.1.4/tests/test_simulator.py → /levelapp-0.1.5/tests/test_comparator.py +0 -0
  101. {levelapp-0.1.4 → levelapp-0.1.5}/tests/test_ionos.py +0 -0
  102. {levelapp-0.1.4 → levelapp-0.1.5}/tests/test_mistral.py +0 -0
  103. {levelapp-0.1.4 → levelapp-0.1.5}/tests/test_monitoring.py +0 -0
  104. {levelapp-0.1.4 → levelapp-0.1.5}/tests/test_openai.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: levelapp
3
- Version: 0.1.4
3
+ Version: 0.1.5
4
4
  Summary: LevelApp is an evaluation framework for AI/LLM-based software application. [Powered by Norma]
5
5
  Project-URL: Homepage, https://github.com/levelapp-org
6
6
  Project-URL: Repository, https://github.com/levelapp-org/levelapp-framework
@@ -114,23 +114,54 @@ evaluation:
114
114
  field_2 : LEVENSHTEIN
115
115
 
116
116
  reference_data:
117
- path:
117
+ path: "../data/conversation_example_1.json"
118
118
  data:
119
119
 
120
120
  endpoint:
121
- base_url: "http://127.0.0.1:8000"
122
- url_path: ''
123
- api_key: "<API-KEY>"
124
- bearer_token: "<BEARER-TOKEN>"
125
- model_id: "meta-llama/Meta-Llama-3.1-8B-Instruct"
126
- default_request_payload_template:
127
- # Change the user message field name only according to the request payload schema (example: 'prompt' to 'message').
128
- prompt: "${user_message}"
129
- details: "${request_payload}" # Rest of the request payload data.
130
- default_response_payload_template:
131
- # Change the placeholder value only according to the response payload schema (example: ${agent_reply} to ${reply}).
132
- agent_reply: "${agent_reply}"
133
- generated_metadata: "${generated_metadata}"
121
+ name: conversational-agent
122
+ base_url: http://127.0.0.1:8000
123
+ path: /v1/chat
124
+ method: POST
125
+ timeout: 60
126
+ retry_count: 3
127
+ retry_backoff: 0.5
128
+ headers:
129
+ - name: model_id
130
+ value: meta-llama/Meta-Llama-3-8B-Instruct
131
+ secure: false
132
+ - name: x-api-key
133
+ value: API_KEY # Load from .env file using python-dotenv.
134
+ secure: true
135
+ - name: Content-Type
136
+ value: application/json
137
+ secure: false
138
+ request_schema:
139
+ # Static field to be included in every request.
140
+ - field_path: message.source
141
+ value: system
142
+ value_type: static
143
+ required: true
144
+
145
+ # Dynamic field to be populated from runtime context.
146
+ - field_path: message.text
147
+ value: message_text # the key from the runtime context.
148
+ value_type: dynamic
149
+ required: true
150
+
151
+ # Env-based field (from OS environment variables).
152
+ - field_path: metadata.env
153
+ value: ENV_VAR_NAME
154
+ value_type: env
155
+ required: false
156
+
157
+ response_mapping:
158
+ # Map the response fields that will be extracted.
159
+ - field_path: reply.text
160
+ extract_as: agent_reply # The simulator requires this key: 'agent_reply'.
161
+ - field_path: reply.metadata
162
+ extract_as: generated_metadata # The simulator requires this key: 'generated_metadata'.
163
+ - field_path: reply.guardrail_flag
164
+ extract_as: guardrail_flag # The simulator requires this key: 'guardrail_flag'.
134
165
 
135
166
  repository:
136
167
  type: FIRESTORE # Pick one of the following: FIRESTORE, FILESYSTEM
@@ -138,8 +169,8 @@ repository:
138
169
  database_name: ""
139
170
  ```
140
171
 
141
- - **Endpoint Configuration**: Define how to interact with your LLM-based system (base URL, auth, payload templates).
142
- - **Placeholders**: For the request payload, change the field names (e.g., 'prompt' to 'message') according to your API specs. For the response payload, change the place holders values (e.g., `${agent_reply}` to `${generated_reply}`).
172
+ - **Endpoint Configuration**: Define how to interact with your LLM-based system (base URL, headers, request/response payload schema).
173
+ - **Placeholders**: For dynamic request schema fields, use the values ('value') to dynamically populate these fields during runtime (e.g., `context = {'message_text': "Hello, world!"}`).
143
174
  - **Secrets**: Store API keys in `.env` and load via `python-dotenv` (e.g., `API_KEY=your_key_here`).
144
175
 
145
176
  For conversation scripts (used in Simulator), provide a JSON file with this schema:
@@ -154,16 +185,14 @@ For conversation scripts (used in Simulator), provide a JSON file with this sche
154
185
  "reference_reply": "Sure, I can help with that. Could you please specify the type of doctor you need to see?",
155
186
  "interaction_type": "initial",
156
187
  "reference_metadata": {},
157
- "guardrail_flag": false,
158
- "request_payload": {"user_id": "0001", "user_role": "ADMIN"}
188
+ "guardrail_flag": false
159
189
  },
160
190
  {
161
191
  "user_message": "I need to see a cardiologist.",
162
192
  "reference_reply": "When would you like to schedule your appointment?",
163
193
  "interaction_type": "intermediate",
164
194
  "reference_metadata": {},
165
- "guardrail_flag": false,
166
- "request_payload": {"user_id": "0001", "user_role": "ADMIN"}
195
+ "guardrail_flag": false
167
196
  },
168
197
  {
169
198
  "user_message": "I would like to book it for next Monday morning.",
@@ -174,8 +203,7 @@ For conversation scripts (used in Simulator), provide a JSON file with this sche
174
203
  "date": "next Monday",
175
204
  "time": "10 AM"
176
205
  },
177
- "guardrail_flag": false,
178
- "request_payload": {"user_id": "0001", "user_role": "ADMIN"}
206
+ "guardrail_flag": false
179
207
  },
180
208
  {
181
209
  "id": "f4f2dd35-71d7-4b75-ba2b-93a4f546004a",
@@ -183,8 +211,7 @@ For conversation scripts (used in Simulator), provide a JSON file with this sche
183
211
  "reference_reply": "Your appointment with the cardiologist is booked for 10 AM next Monday. Is there anything else I can help you with?",
184
212
  "interaction_type": "final",
185
213
  "reference_metadata": {},
186
- "guardrail_flag": false,
187
- "request_payload": {"user_id": "0001", "user_role": "ADMIN"}
214
+ "guardrail_flag": false
188
215
  }
189
216
  ],
190
217
  "description": "A conversation about booking a doctor appointment.",
@@ -245,11 +272,90 @@ if __name__ == "__main__":
245
272
 
246
273
 
247
274
  config_dict = {
248
- "process": {"project_name": "test-project", "workflow_type": "SIMULATOR", "evaluation_params": {"attempts": 2}},
249
- "evaluation": {"evaluators": ["JUDGE", "REFERENCE"], "providers": ["openai", "ionos"], "metrics_map": {"field_1": "EXACT"}},
250
- "reference_data": {"path": "", "data": {}},
251
- "endpoint": {"base_url": "http://127.0.0.1:8000", "api_key": "key", "model_id": "model"},
252
- "repository": {"type": "FIRESTORE", "source": "IN_MEMORY"},
275
+ "process": {
276
+ "project_name": "test-project",
277
+ "workflow_type": "SIMULATOR", # Pick one of the following workflows: SIMULATOR, COMPARATOR, ASSESSOR.
278
+ "evaluation_params": {
279
+ "attempts": 1, # Add the number of simulation attempts.
280
+ }
281
+ },
282
+ "evaluation": {
283
+ "evaluators": ["JUDGE", "REFERENCE"], # Select from the following: JUDGE, REFERENCE, RAG.
284
+ "providers": ["openai", "ionos"],
285
+ "metrics_map": {
286
+ "field_1": "EXACT",
287
+ "field_2": "LEVENSHTEIN"
288
+ }
289
+ },
290
+ "reference_data": {
291
+ "path": "../data/conversation_example_1.json",
292
+ "data": None
293
+ },
294
+ "endpoint": {
295
+ "name": "conversational-agent",
296
+ "base_url": "http://127.0.0.1:8000",
297
+ "path": "/v1/chat",
298
+ "method": "POST",
299
+ "timeout": 60,
300
+ "retry_count": 3,
301
+ "retry_backoff": 0.5,
302
+ "headers": [
303
+ {
304
+ "name": "model_id",
305
+ "value": "meta-llama/Meta-Llama-3.1-8B-Instruct",
306
+ "secure": False
307
+ },
308
+ {
309
+ "name": "x-api-key",
310
+ "value": "API_KEY", # Load from .env file using python-dotenv.
311
+ "secure": True
312
+ },
313
+ {
314
+ "name": "Content-Type",
315
+ "value": "application/json",
316
+ "secure": False
317
+ }
318
+ ],
319
+ "request_schema": [
320
+ {
321
+ "field_path": "message.source",
322
+ "value": "system",
323
+ "value_type": "static",
324
+ "required": True
325
+ },
326
+ {
327
+ "field_path": "message.text",
328
+ "value": "message_text", # the key from the runtime context.
329
+ "value_type": "dynamic",
330
+ "required": True
331
+ },
332
+ {
333
+ "field_path": "metadata.env",
334
+ "value": "ENV_VAR_NAME",
335
+ "value_type": "env",
336
+ "required": False
337
+ }
338
+ ],
339
+ "response_mapping": [
340
+ {
341
+ "field_path": "reply.text",
342
+ "extract_as": "agent_reply" # Remember that the simulator requires this key: 'agent_reply'.
343
+ },
344
+ {
345
+ "field_path": "reply.metadata",
346
+ "extract_as": "agent_reply" # Remember that the simulator requires this key: 'agent_reply'.
347
+ },
348
+ {
349
+ "field_path": "reply.guardrail_flag",
350
+ "extract_as": "metadata" # Remember that the simulator requires this key: 'agent_reply'.
351
+ }
352
+ ]
353
+ },
354
+ "repository": {
355
+ "type": "FIRESTORE", # Pick one of the following: FIRESTORE, FILESYSTEM
356
+ "project_id": "(default)",
357
+ "database_name": ""
358
+ }
253
359
  }
254
360
 
255
361
  content = {
@@ -275,9 +381,18 @@ if __name__ == "__main__":
275
381
  # Load reference data from dict variable
276
382
  config.set_reference_data(content=content)
277
383
 
278
- evaluation_session = EvaluationSession(session_name="test-session-2", workflow_config=config)
384
+ evaluation_session = EvaluationSession(
385
+ session_name="test-session",
386
+ workflow_config=config,
387
+ enable_monitoring=True # To disable the monitoring aspect, set this to False.
388
+ )
279
389
 
280
390
  with evaluation_session as session:
391
+ # Optional: Run connectivity test before the full evaluation
392
+ test_results = session.run_connectivity_test(
393
+ context={"user_message": "I want to book an appointment with a dentist."}
394
+ )
395
+ print(f"Connectivity Test Results:\n{test_results}\n---")
281
396
  session.run()
282
397
  results = session.workflow.collect_results()
283
398
  print("Results:", results)
@@ -62,23 +62,54 @@ evaluation:
62
62
  field_2 : LEVENSHTEIN
63
63
 
64
64
  reference_data:
65
- path:
65
+ path: "../data/conversation_example_1.json"
66
66
  data:
67
67
 
68
68
  endpoint:
69
- base_url: "http://127.0.0.1:8000"
70
- url_path: ''
71
- api_key: "<API-KEY>"
72
- bearer_token: "<BEARER-TOKEN>"
73
- model_id: "meta-llama/Meta-Llama-3.1-8B-Instruct"
74
- default_request_payload_template:
75
- # Change the user message field name only according to the request payload schema (example: 'prompt' to 'message').
76
- prompt: "${user_message}"
77
- details: "${request_payload}" # Rest of the request payload data.
78
- default_response_payload_template:
79
- # Change the placeholder value only according to the response payload schema (example: ${agent_reply} to ${reply}).
80
- agent_reply: "${agent_reply}"
81
- generated_metadata: "${generated_metadata}"
69
+ name: conversational-agent
70
+ base_url: http://127.0.0.1:8000
71
+ path: /v1/chat
72
+ method: POST
73
+ timeout: 60
74
+ retry_count: 3
75
+ retry_backoff: 0.5
76
+ headers:
77
+ - name: model_id
78
+ value: meta-llama/Meta-Llama-3-8B-Instruct
79
+ secure: false
80
+ - name: x-api-key
81
+ value: API_KEY # Load from .env file using python-dotenv.
82
+ secure: true
83
+ - name: Content-Type
84
+ value: application/json
85
+ secure: false
86
+ request_schema:
87
+ # Static field to be included in every request.
88
+ - field_path: message.source
89
+ value: system
90
+ value_type: static
91
+ required: true
92
+
93
+ # Dynamic field to be populated from runtime context.
94
+ - field_path: message.text
95
+ value: message_text # the key from the runtime context.
96
+ value_type: dynamic
97
+ required: true
98
+
99
+ # Env-based field (from OS environment variables).
100
+ - field_path: metadata.env
101
+ value: ENV_VAR_NAME
102
+ value_type: env
103
+ required: false
104
+
105
+ response_mapping:
106
+ # Map the response fields that will be extracted.
107
+ - field_path: reply.text
108
+ extract_as: agent_reply # The simulator requires this key: 'agent_reply'.
109
+ - field_path: reply.metadata
110
+ extract_as: generated_metadata # The simulator requires this key: 'generated_metadata'.
111
+ - field_path: reply.guardrail_flag
112
+ extract_as: guardrail_flag # The simulator requires this key: 'guardrail_flag'.
82
113
 
83
114
  repository:
84
115
  type: FIRESTORE # Pick one of the following: FIRESTORE, FILESYSTEM
@@ -86,8 +117,8 @@ repository:
86
117
  database_name: ""
87
118
  ```
88
119
 
89
- - **Endpoint Configuration**: Define how to interact with your LLM-based system (base URL, auth, payload templates).
90
- - **Placeholders**: For the request payload, change the field names (e.g., 'prompt' to 'message') according to your API specs. For the response payload, change the place holders values (e.g., `${agent_reply}` to `${generated_reply}`).
120
+ - **Endpoint Configuration**: Define how to interact with your LLM-based system (base URL, headers, request/response payload schema).
121
+ - **Placeholders**: For dynamic request schema fields, use the values ('value') to dynamically populate these fields during runtime (e.g., `context = {'message_text': "Hello, world!"}`).
91
122
  - **Secrets**: Store API keys in `.env` and load via `python-dotenv` (e.g., `API_KEY=your_key_here`).
92
123
 
93
124
  For conversation scripts (used in Simulator), provide a JSON file with this schema:
@@ -102,16 +133,14 @@ For conversation scripts (used in Simulator), provide a JSON file with this sche
102
133
  "reference_reply": "Sure, I can help with that. Could you please specify the type of doctor you need to see?",
103
134
  "interaction_type": "initial",
104
135
  "reference_metadata": {},
105
- "guardrail_flag": false,
106
- "request_payload": {"user_id": "0001", "user_role": "ADMIN"}
136
+ "guardrail_flag": false
107
137
  },
108
138
  {
109
139
  "user_message": "I need to see a cardiologist.",
110
140
  "reference_reply": "When would you like to schedule your appointment?",
111
141
  "interaction_type": "intermediate",
112
142
  "reference_metadata": {},
113
- "guardrail_flag": false,
114
- "request_payload": {"user_id": "0001", "user_role": "ADMIN"}
143
+ "guardrail_flag": false
115
144
  },
116
145
  {
117
146
  "user_message": "I would like to book it for next Monday morning.",
@@ -122,8 +151,7 @@ For conversation scripts (used in Simulator), provide a JSON file with this sche
122
151
  "date": "next Monday",
123
152
  "time": "10 AM"
124
153
  },
125
- "guardrail_flag": false,
126
- "request_payload": {"user_id": "0001", "user_role": "ADMIN"}
154
+ "guardrail_flag": false
127
155
  },
128
156
  {
129
157
  "id": "f4f2dd35-71d7-4b75-ba2b-93a4f546004a",
@@ -131,8 +159,7 @@ For conversation scripts (used in Simulator), provide a JSON file with this sche
131
159
  "reference_reply": "Your appointment with the cardiologist is booked for 10 AM next Monday. Is there anything else I can help you with?",
132
160
  "interaction_type": "final",
133
161
  "reference_metadata": {},
134
- "guardrail_flag": false,
135
- "request_payload": {"user_id": "0001", "user_role": "ADMIN"}
162
+ "guardrail_flag": false
136
163
  }
137
164
  ],
138
165
  "description": "A conversation about booking a doctor appointment.",
@@ -193,11 +220,90 @@ if __name__ == "__main__":
193
220
 
194
221
 
195
222
  config_dict = {
196
- "process": {"project_name": "test-project", "workflow_type": "SIMULATOR", "evaluation_params": {"attempts": 2}},
197
- "evaluation": {"evaluators": ["JUDGE", "REFERENCE"], "providers": ["openai", "ionos"], "metrics_map": {"field_1": "EXACT"}},
198
- "reference_data": {"path": "", "data": {}},
199
- "endpoint": {"base_url": "http://127.0.0.1:8000", "api_key": "key", "model_id": "model"},
200
- "repository": {"type": "FIRESTORE", "source": "IN_MEMORY"},
223
+ "process": {
224
+ "project_name": "test-project",
225
+ "workflow_type": "SIMULATOR", # Pick one of the following workflows: SIMULATOR, COMPARATOR, ASSESSOR.
226
+ "evaluation_params": {
227
+ "attempts": 1, # Add the number of simulation attempts.
228
+ }
229
+ },
230
+ "evaluation": {
231
+ "evaluators": ["JUDGE", "REFERENCE"], # Select from the following: JUDGE, REFERENCE, RAG.
232
+ "providers": ["openai", "ionos"],
233
+ "metrics_map": {
234
+ "field_1": "EXACT",
235
+ "field_2": "LEVENSHTEIN"
236
+ }
237
+ },
238
+ "reference_data": {
239
+ "path": "../data/conversation_example_1.json",
240
+ "data": None
241
+ },
242
+ "endpoint": {
243
+ "name": "conversational-agent",
244
+ "base_url": "http://127.0.0.1:8000",
245
+ "path": "/v1/chat",
246
+ "method": "POST",
247
+ "timeout": 60,
248
+ "retry_count": 3,
249
+ "retry_backoff": 0.5,
250
+ "headers": [
251
+ {
252
+ "name": "model_id",
253
+ "value": "meta-llama/Meta-Llama-3.1-8B-Instruct",
254
+ "secure": False
255
+ },
256
+ {
257
+ "name": "x-api-key",
258
+ "value": "API_KEY", # Load from .env file using python-dotenv.
259
+ "secure": True
260
+ },
261
+ {
262
+ "name": "Content-Type",
263
+ "value": "application/json",
264
+ "secure": False
265
+ }
266
+ ],
267
+ "request_schema": [
268
+ {
269
+ "field_path": "message.source",
270
+ "value": "system",
271
+ "value_type": "static",
272
+ "required": True
273
+ },
274
+ {
275
+ "field_path": "message.text",
276
+ "value": "message_text", # the key from the runtime context.
277
+ "value_type": "dynamic",
278
+ "required": True
279
+ },
280
+ {
281
+ "field_path": "metadata.env",
282
+ "value": "ENV_VAR_NAME",
283
+ "value_type": "env",
284
+ "required": False
285
+ }
286
+ ],
287
+ "response_mapping": [
288
+ {
289
+ "field_path": "reply.text",
290
+ "extract_as": "agent_reply" # Remember that the simulator requires this key: 'agent_reply'.
291
+ },
292
+ {
293
+ "field_path": "reply.metadata",
294
+ "extract_as": "agent_reply" # Remember that the simulator requires this key: 'agent_reply'.
295
+ },
296
+ {
297
+ "field_path": "reply.guardrail_flag",
298
+ "extract_as": "metadata" # Remember that the simulator requires this key: 'agent_reply'.
299
+ }
300
+ ]
301
+ },
302
+ "repository": {
303
+ "type": "FIRESTORE", # Pick one of the following: FIRESTORE, FILESYSTEM
304
+ "project_id": "(default)",
305
+ "database_name": ""
306
+ }
201
307
  }
202
308
 
203
309
  content = {
@@ -223,9 +329,18 @@ if __name__ == "__main__":
223
329
  # Load reference data from dict variable
224
330
  config.set_reference_data(content=content)
225
331
 
226
- evaluation_session = EvaluationSession(session_name="test-session-2", workflow_config=config)
332
+ evaluation_session = EvaluationSession(
333
+ session_name="test-session",
334
+ workflow_config=config,
335
+ enable_monitoring=True # To disable the monitoring aspect, set this to False.
336
+ )
227
337
 
228
338
  with evaluation_session as session:
339
+ # Optional: Run connectivity test before the full evaluation
340
+ test_results = session.run_connectivity_test(
341
+ context={"user_message": "I want to book an appointment with a dentist."}
342
+ )
343
+ print(f"Connectivity Test Results:\n{test_results}\n---")
229
344
  session.run()
230
345
  results = session.workflow.collect_results()
231
346
  print("Results:", results)
@@ -138,6 +138,41 @@ endpoint:
138
138
  agent_reply: "${generated_reply}" # Map to your response field
139
139
  generated_metadata: "${metadata}" # e.g., extracted entities
140
140
 
141
+ endpoint:
142
+ name: conversational-agent
143
+ base_url: "http://127.0.0.1:8000" # Your chatbot's API base URL
144
+ path: "/chat" # Endpoint path (full URL = base_url + url_path)
145
+ method: POST # HTTP method
146
+ timeout: 60 # Timeout in seconds
147
+ retry_count: 3 # Number of retries on failure
148
+ retry_backoff: 0.5 # Backoff factor for retries
149
+ # Optional authentication headers
150
+ headers:
151
+ - name: model_id
152
+ value: meta-llama/Meta-Llama-3-8B-Instruct
153
+ secure: false
154
+ - name: x-api-key
155
+ value: API_KEY # Load from .env file using python-dotenv.
156
+ secure: true
157
+ - name: Content-Type
158
+ value: application/json
159
+ secure: false
160
+ request_schema:
161
+ # Static field to be included in every request.
162
+ - field_path: message
163
+ value: system
164
+ value_type: dynamic
165
+ required: true
166
+
167
+ response_mapping:
168
+ # Map the response fields that will be extracted.
169
+ - field_path: reply
170
+ extract_as: agent_reply # The simulator requires this key: 'agent_reply'.
171
+ - field_path: reply.metadata
172
+ extract_as: generated_metadata # The simulator requires this key: 'generated_metadata'.
173
+ - field_path: reply.guardrail_flag
174
+ extract_as: guardrail_flag # The simulator requires this key: 'guardrail_flag'.
175
+
141
176
  # REPOSITORY SECTION (Optional):
142
177
  repository:
143
178
  type: FILESYSTEM # Or FIRESTORE/MONGODB for persistence
@@ -111,7 +111,7 @@ class DynamicModelBuilder:
111
111
  """
112
112
  if isinstance(value, Mapping):
113
113
  nested_model = self.create_dynamic_model(model_name=f"{model_name}_{key}", data=value)
114
- return nested_model, ...
114
+ return Optional[nested_model], None
115
115
 
116
116
  elif isinstance(value, Sequence) and not isinstance(value, (str, bytes)):
117
117
  if not value:
@@ -119,15 +119,15 @@ class DynamicModelBuilder:
119
119
 
120
120
  elif isinstance(value[0], Mapping):
121
121
  nested_model = self.create_dynamic_model(model_name=f"{model_name}_{key}", data=value[0])
122
- return List[nested_model], ...
122
+ return Optional[List[nested_model]], None
123
123
 
124
124
  else:
125
125
  field_type = type(value[0]) if value[0] is not None else Any
126
- return List[field_type], ...
126
+ return Optional[List[field_type]], None
127
127
 
128
128
  else:
129
129
  field_type = Optional[type(value)] if value is not None else Optional[Any]
130
- return field_type, ...
130
+ return field_type, None
131
131
 
132
132
  def create_dynamic_model(self, model_name: str, data: Any) -> Type[BaseModel]:
133
133
  """