levelapp 0.1.0__tar.gz → 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of levelapp might be problematic. Click here for more details.

Files changed (91) hide show
  1. {levelapp-0.1.0 → levelapp-0.1.2}/PKG-INFO +102 -39
  2. {levelapp-0.1.0 → levelapp-0.1.2}/README.md +96 -35
  3. levelapp-0.1.2/docs/media/simulator-module-diagram.PNG +0 -0
  4. levelapp-0.1.2/docs/media/simulator-sequence-diagram.png +0 -0
  5. levelapp-0.1.2/examples/README.md +322 -0
  6. levelapp-0.1.2/examples/conversation_script.json +38 -0
  7. levelapp-0.1.2/examples/example_chatbot.py +48 -0
  8. levelapp-0.1.2/examples/example_evaluation.py +28 -0
  9. levelapp-0.1.2/examples/workflow_configuration.yaml +38 -0
  10. {levelapp-0.1.0 → levelapp-0.1.2}/levelapp/aspects/monitor.py +3 -1
  11. {levelapp-0.1.0 → levelapp-0.1.2}/levelapp/clients/__init__.py +0 -1
  12. {levelapp-0.1.0 → levelapp-0.1.2}/levelapp/comparator/scorer.py +0 -2
  13. {levelapp-0.1.0 → levelapp-0.1.2}/levelapp/config/endpoint.py +22 -13
  14. levelapp-0.1.2/levelapp/config/endpoint_.py +62 -0
  15. {levelapp-0.1.0 → levelapp-0.1.2}/levelapp/config/prompts.py +22 -0
  16. levelapp-0.1.2/levelapp/core/schemas.py +24 -0
  17. {levelapp-0.1.0 → levelapp-0.1.2}/levelapp/core/session.py +97 -59
  18. {levelapp-0.1.0 → levelapp-0.1.2}/levelapp/evaluator/evaluator.py +42 -14
  19. {levelapp-0.1.0 → levelapp-0.1.2}/levelapp/metrics/__init__.py +1 -5
  20. {levelapp-0.1.0 → levelapp-0.1.2}/levelapp/repository/firestore.py +15 -6
  21. {levelapp-0.1.0 → levelapp-0.1.2}/levelapp/simulator/schemas.py +15 -21
  22. {levelapp-0.1.0 → levelapp-0.1.2}/levelapp/simulator/simulator.py +124 -55
  23. {levelapp-0.1.0 → levelapp-0.1.2}/levelapp/simulator/utils.py +40 -78
  24. levelapp-0.1.2/levelapp/workflow/__init__.py +6 -0
  25. {levelapp-0.1.0 → levelapp-0.1.2}/levelapp/workflow/base.py +64 -17
  26. levelapp-0.1.2/levelapp/workflow/config.py +92 -0
  27. levelapp-0.1.2/levelapp/workflow/context.py +62 -0
  28. levelapp-0.1.2/levelapp/workflow/factory.py +42 -0
  29. {levelapp-0.1.0 → levelapp-0.1.2}/levelapp/workflow/registration.py +1 -1
  30. levelapp-0.1.2/levelapp/workflow/runtime.py +19 -0
  31. {levelapp-0.1.0 → levelapp-0.1.2}/pyproject.toml +6 -4
  32. levelapp-0.1.2/src/data/evaluation_results.json +1 -0
  33. levelapp-0.1.2/src/data/workflow_config.yaml +41 -0
  34. levelapp-0.1.2/src/level_app/main_session.py +48 -0
  35. {levelapp-0.1.0 → levelapp-0.1.2}/uv.lock +212 -5
  36. levelapp-0.1.0/examples/example_evaluation.py +0 -0
  37. levelapp-0.1.0/levelapp/workflow/__init__.py +0 -5
  38. levelapp-0.1.0/levelapp/workflow/factory.py +0 -51
  39. levelapp-0.1.0/levelapp/workflow/schemas.py +0 -121
  40. levelapp-0.1.0/src/data/workflow_config.yaml +0 -35
  41. levelapp-0.1.0/src/level_app/main_session.py +0 -16
  42. {levelapp-0.1.0 → levelapp-0.1.2}/.gitignore +0 -0
  43. {levelapp-0.1.0 → levelapp-0.1.2}/.python-version +0 -0
  44. {levelapp-0.1.0 → levelapp-0.1.2}/LICENSE +0 -0
  45. {levelapp-0.1.0 → levelapp-0.1.2}/MANIFEST.in +0 -0
  46. {levelapp-0.1.0 → levelapp-0.1.2}/Makefile +0 -0
  47. {levelapp-0.1.0 → levelapp-0.1.2}/levelapp/__init__.py +0 -0
  48. {levelapp-0.1.0 → levelapp-0.1.2}/levelapp/aspects/__init__.py +0 -0
  49. {levelapp-0.1.0 → levelapp-0.1.2}/levelapp/aspects/loader.py +0 -0
  50. {levelapp-0.1.0 → levelapp-0.1.2}/levelapp/aspects/logger.py +0 -0
  51. {levelapp-0.1.0 → levelapp-0.1.2}/levelapp/aspects/sanitizer.py +0 -0
  52. {levelapp-0.1.0 → levelapp-0.1.2}/levelapp/clients/anthropic.py +0 -0
  53. {levelapp-0.1.0 → levelapp-0.1.2}/levelapp/clients/ionos.py +0 -0
  54. {levelapp-0.1.0 → levelapp-0.1.2}/levelapp/clients/mistral.py +0 -0
  55. {levelapp-0.1.0 → levelapp-0.1.2}/levelapp/clients/openai.py +0 -0
  56. {levelapp-0.1.0 → levelapp-0.1.2}/levelapp/comparator/__init__.py +0 -0
  57. {levelapp-0.1.0 → levelapp-0.1.2}/levelapp/comparator/comparator.py +0 -0
  58. {levelapp-0.1.0 → levelapp-0.1.2}/levelapp/comparator/extractor.py +0 -0
  59. {levelapp-0.1.0 → levelapp-0.1.2}/levelapp/comparator/schemas.py +0 -0
  60. {levelapp-0.1.0 → levelapp-0.1.2}/levelapp/comparator/utils.py +0 -0
  61. {levelapp-0.1.0 → levelapp-0.1.2}/levelapp/config/__init__.py +0 -0
  62. {levelapp-0.1.0 → levelapp-0.1.2}/levelapp/core/__init__.py +0 -0
  63. {levelapp-0.1.0 → levelapp-0.1.2}/levelapp/core/base.py +0 -0
  64. {levelapp-0.1.0 → levelapp-0.1.2}/levelapp/evaluator/__init__.py +0 -0
  65. {levelapp-0.1.0 → levelapp-0.1.2}/levelapp/metrics/embedding.py +0 -0
  66. {levelapp-0.1.0 → levelapp-0.1.2}/levelapp/metrics/exact.py +0 -0
  67. {levelapp-0.1.0 → levelapp-0.1.2}/levelapp/metrics/fuzzy.py +0 -0
  68. {levelapp-0.1.0 → levelapp-0.1.2}/levelapp/metrics/token.py +0 -0
  69. {levelapp-0.1.0 → levelapp-0.1.2}/levelapp/plugins/__init__.py +0 -0
  70. {levelapp-0.1.0 → levelapp-0.1.2}/levelapp/repository/__init__.py +0 -0
  71. {levelapp-0.1.0 → levelapp-0.1.2}/levelapp/simulator/__init__.py +0 -0
  72. {levelapp-0.1.0 → levelapp-0.1.2}/make.bat +0 -0
  73. {levelapp-0.1.0 → levelapp-0.1.2}/project_structure.txt +0 -0
  74. {levelapp-0.1.0 → levelapp-0.1.2}/src/data/conversation_example_1.json +0 -0
  75. {levelapp-0.1.0 → levelapp-0.1.2}/src/data/endpoint_configuration.yaml +0 -0
  76. {levelapp-0.1.0 → levelapp-0.1.2}/src/data/payload_example_1.yaml +0 -0
  77. {levelapp-0.1.0 → levelapp-0.1.2}/src/data/payload_example_2.yaml +0 -0
  78. {levelapp-0.1.0 → levelapp-0.1.2}/src/data/workflow_config_2.json +0 -0
  79. {levelapp-0.1.0 → levelapp-0.1.2}/src/level_app/__init__.py +0 -0
  80. {levelapp-0.1.0 → levelapp-0.1.2}/src/level_app/main.py +0 -0
  81. {levelapp-0.1.0 → levelapp-0.1.2}/src/level_app/main_monitoring.py +0 -0
  82. {levelapp-0.1.0 → levelapp-0.1.2}/src/level_app/main_simulator.py +0 -0
  83. {levelapp-0.1.0 → levelapp-0.1.2}/tests/__init__.py +0 -0
  84. {levelapp-0.1.0 → levelapp-0.1.2}/tests/test_anthropic.py +0 -0
  85. {levelapp-0.1.0 → levelapp-0.1.2}/tests/test_comparator.py +0 -0
  86. {levelapp-0.1.0 → levelapp-0.1.2}/tests/test_ionos.py +0 -0
  87. {levelapp-0.1.0 → levelapp-0.1.2}/tests/test_mistral.py +0 -0
  88. {levelapp-0.1.0 → levelapp-0.1.2}/tests/test_monitoring.py +0 -0
  89. {levelapp-0.1.0 → levelapp-0.1.2}/tests/test_openai.py +0 -0
  90. {levelapp-0.1.0 → levelapp-0.1.2}/tests/test_session.py +0 -0
  91. {levelapp-0.1.0 → levelapp-0.1.2}/tests/test_simulator.py +0 -0
@@ -1,12 +1,12 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: levelapp
3
- Version: 0.1.0
3
+ Version: 0.1.2
4
4
  Summary: LevelApp is an evaluation framework for AI/LLM-based software application. [Powered by Norma]
5
5
  Project-URL: Homepage, https://github.com/levelapp-org
6
6
  Project-URL: Repository, https://github.com/levelapp-org/levelapp-framework
7
7
  Project-URL: Documentation, https://levelapp.readthedocs.io
8
8
  Project-URL: Issues, https://github.com/levelapp-org/levelapp-framework/issues
9
- Author-email: KadriSof <kadrisofyen@gmail.com>
9
+ Author-email: Mohamed Sofiene KADRI <ms.kadri.dev@gmail.com>
10
10
  License-File: LICENSE
11
11
  Keywords: ai,evaluation,framework,llm,testing
12
12
  Classifier: Development Status :: 3 - Alpha
@@ -17,10 +17,12 @@ Classifier: Programming Language :: Python :: 3.12
17
17
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
18
18
  Classifier: Topic :: Software Development :: Testing
19
19
  Requires-Python: >=3.12
20
- Requires-Dist: arrow>=1.3.0
20
+ Requires-Dist: google-api-core>=2.25.1
21
+ Requires-Dist: google-auth>=2.40.3
22
+ Requires-Dist: google-cloud-firestore>=2.21.0
21
23
  Requires-Dist: httpx>=0.28.1
24
+ Requires-Dist: humanize>=4.13.0
22
25
  Requires-Dist: numpy>=2.3.2
23
- Requires-Dist: openai>=1.99.9
24
26
  Requires-Dist: pandas-stubs==2.3.0.250703
25
27
  Requires-Dist: pandas>=2.3.1
26
28
  Requires-Dist: pydantic>=2.11.7
@@ -91,38 +93,47 @@ pip install levelapp
91
93
  LevelApp uses a YAML configuration file to define the evaluation setup. Create a `workflow_config.yaml` with the following structure:
92
94
 
93
95
  ```yaml
94
- project_name: "test-project"
95
- evaluation_params:
96
- attempts: 1 # Number of simulation attempts.
96
+ process:
97
+ project_name: "test-project"
98
+ workflow_type: SIMULATOR # Pick one of the following workflows: SIMULATOR, COMPARATOR, ASSESSOR.
99
+ evaluation_params:
100
+ attempts: 1 # Add the number of simulation attempts.
101
+ batch_size: 5
102
+
103
+ evaluation:
104
+ evaluators: # Select from the following: JUDGE, REFERENCE, RAG.
105
+ - JUDGE
106
+ - REFERENCE
107
+ providers:
108
+ - openai
109
+ - ionos
110
+ metrics_map:
111
+ field_1: EXACT
112
+ field_2 : LEVENSHTEIN
97
113
 
98
- workflow: SIMULATOR # SIMULATOR, COMPARATOR, ASSESSOR.
99
- repository: FIRESTORE # FIRESTORE, FILESYSTEM, MONGODB.
100
- evaluators: # JUDGE, REFERENCE, RAG.
101
- - JUDGE
102
- - REFERENCE
114
+ reference_data:
115
+ path:
116
+ data:
103
117
 
104
- endpoint_configuration:
118
+ endpoint:
105
119
  base_url: "http://127.0.0.1:8000"
106
120
  url_path: ''
107
121
  api_key: "<API-KEY>"
108
122
  bearer_token: "<BEARER-TOKEN>"
109
123
  model_id: "meta-llama/Meta-Llama-3.1-8B-Instruct"
110
- payload_path: "../../src/data/payload_example_1.yaml"
111
124
  default_request_payload_template:
125
+ # Change the user message field name only according to the request payload schema (example: 'prompt' to 'message').
112
126
  prompt: "${user_message}"
113
127
  details: "${request_payload}" # Rest of the request payload data.
114
128
  default_response_payload_template:
129
+ # Change the placeholder value only according to the response payload schema (example: ${agent_reply} to ${reply}).
115
130
  agent_reply: "${agent_reply}"
116
- guardrail_flag: "${guardrail_flag}"
117
131
  generated_metadata: "${generated_metadata}"
118
132
 
119
- reference_data:
120
- source: LOCAL # LOCAL or REMOTE.
121
- path: "../../src/data/conversation_example_1.json"
122
-
123
- metrics_map:
124
- field_1: EXACT
125
- field_2: LEVENSHTEIN
133
+ repository:
134
+ type: FIRESTORE # Pick one of the following: FIRESTORE, FILESYSTEM
135
+ project_id: "(default)"
136
+ database_name: ""
126
137
  ```
127
138
 
128
139
  - **Endpoint Configuration**: Define how to interact with your LLM-based system (base URL, auth, payload templates).
@@ -133,33 +144,26 @@ For conversation scripts (used in Simulator), provide a JSON file with this sche
133
144
 
134
145
  ```json
135
146
  {
136
- "id": "1fa6f6ed-3cfe-4c0b-b389-7292f58879d4",
137
147
  "scripts": [
138
148
  {
139
- "id": "65f58cec-d55d-4a24-bf16-fa8327a3aa6b",
140
149
  "interactions": [
141
150
  {
142
- "id": "e99a2898-6a79-4a20-ac85-dfe977ea9935",
143
151
  "user_message": "Hello, I would like to book an appointment with a doctor.",
144
152
  "reference_reply": "Sure, I can help with that. Could you please specify the type of doctor you need to see?",
145
153
  "interaction_type": "initial",
146
154
  "reference_metadata": {},
147
- "generated_metadata": {},
148
155
  "guardrail_flag": false,
149
156
  "request_payload": {"user_id": "0001", "user_role": "ADMIN"}
150
157
  },
151
158
  {
152
- "id": "fe5c539a-d0a1-40ee-97bd-dbe456703ccc",
153
159
  "user_message": "I need to see a cardiologist.",
154
160
  "reference_reply": "When would you like to schedule your appointment?",
155
161
  "interaction_type": "intermediate",
156
162
  "reference_metadata": {},
157
- "generated_metadata": {},
158
163
  "guardrail_flag": false,
159
164
  "request_payload": {"user_id": "0001", "user_role": "ADMIN"}
160
165
  },
161
166
  {
162
- "id": "2cfdbd1c-a065-48bb-9aa9-b958342154b1",
163
167
  "user_message": "I would like to book it for next Monday morning.",
164
168
  "reference_reply": "We have an available slot at 10 AM next Monday. Does that work for you?",
165
169
  "interaction_type": "intermediate",
@@ -168,11 +172,6 @@ For conversation scripts (used in Simulator), provide a JSON file with this sche
168
172
  "date": "next Monday",
169
173
  "time": "10 AM"
170
174
  },
171
- "generated_metadata": {
172
- "appointment_type": "Cardiology",
173
- "date": "next Monday",
174
- "time": "morning"
175
- },
176
175
  "guardrail_flag": false,
177
176
  "request_payload": {"user_id": "0001", "user_role": "ADMIN"}
178
177
  },
@@ -182,7 +181,6 @@ For conversation scripts (used in Simulator), provide a JSON file with this sche
182
181
  "reference_reply": "Your appointment with the cardiologist is booked for 10 AM next Monday. Is there anything else I can help you with?",
183
182
  "interaction_type": "final",
184
183
  "reference_metadata": {},
185
- "generated_metadata": {},
186
184
  "guardrail_flag": false,
187
185
  "request_payload": {"user_id": "0001", "user_role": "ADMIN"}
188
186
  }
@@ -195,9 +193,22 @@ For conversation scripts (used in Simulator), provide a JSON file with this sche
195
193
  ]
196
194
  }
197
195
  ```
198
-
199
196
  - **Fields**: Include user messages, reference/references replies, metadata for comparison, guardrail flags, and request payloads.
200
197
 
198
+ In the `.env` you need to add the LLM providers credentials that will be used for the evaluation process.
199
+ ```
200
+ OPENAI_API_KEY=
201
+ IONOS_API_KEY=
202
+ ANTHROPIC_API_KEY=
203
+ MISTRAL_API_KEY=
204
+
205
+ # For IONOS, you must include the base URL and the model ID.
206
+ IONOS_BASE_URL="https://inference.de-txl.ionos.com"
207
+ IONOS_MODEL_ID="0b6c4a15-bb8d-4092-82b0-f357b77c59fd"
208
+
209
+ WORKFLOW_CONFIG_PATH="../../src/data/workflow_config_1.yaml"
210
+ ```
211
+
201
212
  ## Usage Example
202
213
 
203
214
  To run an evaluation:
@@ -207,14 +218,14 @@ To run an evaluation:
207
218
 
208
219
  ```python
209
220
  if __name__ == "__main__":
210
- from levelapp.workflow.schemas import WorkflowConfig
221
+ from levelapp.workflow import WorkflowConfig
211
222
  from levelapp.core.session import EvaluationSession
212
223
 
213
224
  # Load configuration from YAML
214
225
  config = WorkflowConfig.load(path="../data/workflow_config.yaml")
215
226
 
216
- # Run evaluation session
217
- with EvaluationSession(session_name="sim-test", workflow_config=config) as session:
227
+ # Run evaluation session (You can enable/disable the monitoring aspect)
228
+ with EvaluationSession(session_name="test-session-1", workflow_config=config, enable_monitoring=False) as session:
218
229
  session.run()
219
230
  results = session.workflow.collect_results()
220
231
  print("Results:", results)
@@ -223,6 +234,58 @@ if __name__ == "__main__":
223
234
  print(f"session stats:\n{stats}")
224
235
  ```
225
236
 
237
+ Alternatively, if you want to pass the configuration and reference data from in-memory variables,
238
+ you can manually load the data like the following:
239
+ ```python
240
+ if __name__ == "__main__":
241
+ from levelapp.workflow import WorkflowConfig
242
+ from levelapp.core.session import EvaluationSession
243
+
244
+
245
+ config_dict = {
246
+ "process": {"project_name": "test-project", "workflow_type": "SIMULATOR", "evaluation_params": {"attempts": 2}},
247
+ "evaluation": {"evaluators": ["JUDGE", "REFERENCE"], "providers": ["openai", "ionos"], "metrics_map": {"field_1": "EXACT"}},
248
+ "reference_data": {"path": "", "data": {}},
249
+ "endpoint": {"base_url": "http://127.0.0.1:8000", "api_key": "key", "model_id": "model"},
250
+ "repository": {"type": "FIRESTORE", "source": "IN_MEMORY"},
251
+ }
252
+
253
+ content = {
254
+ "scripts": [
255
+ {
256
+ "interactions": [
257
+ {
258
+ "user_message": "Hello!",
259
+ "reference_reply": "Hello, how can I help you!"
260
+ },
261
+ {
262
+ "user_message": "I need an apartment",
263
+ "reference_reply": "sorry, but I can only assist you with booking medical appointments."
264
+ },
265
+ ]
266
+ },
267
+ ]
268
+ }
269
+
270
+ # Load configuration from a dict variable
271
+ config = WorkflowConfig.from_dict(content=config_dict)
272
+
273
+ # Load reference data from dict variable
274
+ config.set_reference_data(content=content)
275
+
276
+ evaluation_session = EvaluationSession(session_name="test-session-2", workflow_config=config)
277
+
278
+ with evaluation_session as session:
279
+ session.run()
280
+ results = session.workflow.collect_results()
281
+ print("Results:", results)
282
+
283
+ stats = session.get_stats()
284
+ print(f"session stats:\n{stats}")
285
+
286
+ ```
287
+
288
+
226
289
  - This loads the config, runs the specified workflow (e.g., Simulator), collects results, and prints stats.
227
290
 
228
291
  For more examples, see the `examples/` directory.
@@ -43,38 +43,47 @@ pip install levelapp
43
43
  LevelApp uses a YAML configuration file to define the evaluation setup. Create a `workflow_config.yaml` with the following structure:
44
44
 
45
45
  ```yaml
46
- project_name: "test-project"
47
- evaluation_params:
48
- attempts: 1 # Number of simulation attempts.
46
+ process:
47
+ project_name: "test-project"
48
+ workflow_type: SIMULATOR # Pick one of the following workflows: SIMULATOR, COMPARATOR, ASSESSOR.
49
+ evaluation_params:
50
+ attempts: 1 # Add the number of simulation attempts.
51
+ batch_size: 5
52
+
53
+ evaluation:
54
+ evaluators: # Select from the following: JUDGE, REFERENCE, RAG.
55
+ - JUDGE
56
+ - REFERENCE
57
+ providers:
58
+ - openai
59
+ - ionos
60
+ metrics_map:
61
+ field_1: EXACT
62
+ field_2 : LEVENSHTEIN
49
63
 
50
- workflow: SIMULATOR # SIMULATOR, COMPARATOR, ASSESSOR.
51
- repository: FIRESTORE # FIRESTORE, FILESYSTEM, MONGODB.
52
- evaluators: # JUDGE, REFERENCE, RAG.
53
- - JUDGE
54
- - REFERENCE
64
+ reference_data:
65
+ path:
66
+ data:
55
67
 
56
- endpoint_configuration:
68
+ endpoint:
57
69
  base_url: "http://127.0.0.1:8000"
58
70
  url_path: ''
59
71
  api_key: "<API-KEY>"
60
72
  bearer_token: "<BEARER-TOKEN>"
61
73
  model_id: "meta-llama/Meta-Llama-3.1-8B-Instruct"
62
- payload_path: "../../src/data/payload_example_1.yaml"
63
74
  default_request_payload_template:
75
+ # Change the user message field name only according to the request payload schema (example: 'prompt' to 'message').
64
76
  prompt: "${user_message}"
65
77
  details: "${request_payload}" # Rest of the request payload data.
66
78
  default_response_payload_template:
79
+ # Change the placeholder value only according to the response payload schema (example: ${agent_reply} to ${reply}).
67
80
  agent_reply: "${agent_reply}"
68
- guardrail_flag: "${guardrail_flag}"
69
81
  generated_metadata: "${generated_metadata}"
70
82
 
71
- reference_data:
72
- source: LOCAL # LOCAL or REMOTE.
73
- path: "../../src/data/conversation_example_1.json"
74
-
75
- metrics_map:
76
- field_1: EXACT
77
- field_2: LEVENSHTEIN
83
+ repository:
84
+ type: FIRESTORE # Pick one of the following: FIRESTORE, FILESYSTEM
85
+ project_id: "(default)"
86
+ database_name: ""
78
87
  ```
79
88
 
80
89
  - **Endpoint Configuration**: Define how to interact with your LLM-based system (base URL, auth, payload templates).
@@ -85,33 +94,26 @@ For conversation scripts (used in Simulator), provide a JSON file with this sche
85
94
 
86
95
  ```json
87
96
  {
88
- "id": "1fa6f6ed-3cfe-4c0b-b389-7292f58879d4",
89
97
  "scripts": [
90
98
  {
91
- "id": "65f58cec-d55d-4a24-bf16-fa8327a3aa6b",
92
99
  "interactions": [
93
100
  {
94
- "id": "e99a2898-6a79-4a20-ac85-dfe977ea9935",
95
101
  "user_message": "Hello, I would like to book an appointment with a doctor.",
96
102
  "reference_reply": "Sure, I can help with that. Could you please specify the type of doctor you need to see?",
97
103
  "interaction_type": "initial",
98
104
  "reference_metadata": {},
99
- "generated_metadata": {},
100
105
  "guardrail_flag": false,
101
106
  "request_payload": {"user_id": "0001", "user_role": "ADMIN"}
102
107
  },
103
108
  {
104
- "id": "fe5c539a-d0a1-40ee-97bd-dbe456703ccc",
105
109
  "user_message": "I need to see a cardiologist.",
106
110
  "reference_reply": "When would you like to schedule your appointment?",
107
111
  "interaction_type": "intermediate",
108
112
  "reference_metadata": {},
109
- "generated_metadata": {},
110
113
  "guardrail_flag": false,
111
114
  "request_payload": {"user_id": "0001", "user_role": "ADMIN"}
112
115
  },
113
116
  {
114
- "id": "2cfdbd1c-a065-48bb-9aa9-b958342154b1",
115
117
  "user_message": "I would like to book it for next Monday morning.",
116
118
  "reference_reply": "We have an available slot at 10 AM next Monday. Does that work for you?",
117
119
  "interaction_type": "intermediate",
@@ -120,11 +122,6 @@ For conversation scripts (used in Simulator), provide a JSON file with this sche
120
122
  "date": "next Monday",
121
123
  "time": "10 AM"
122
124
  },
123
- "generated_metadata": {
124
- "appointment_type": "Cardiology",
125
- "date": "next Monday",
126
- "time": "morning"
127
- },
128
125
  "guardrail_flag": false,
129
126
  "request_payload": {"user_id": "0001", "user_role": "ADMIN"}
130
127
  },
@@ -134,7 +131,6 @@ For conversation scripts (used in Simulator), provide a JSON file with this sche
134
131
  "reference_reply": "Your appointment with the cardiologist is booked for 10 AM next Monday. Is there anything else I can help you with?",
135
132
  "interaction_type": "final",
136
133
  "reference_metadata": {},
137
- "generated_metadata": {},
138
134
  "guardrail_flag": false,
139
135
  "request_payload": {"user_id": "0001", "user_role": "ADMIN"}
140
136
  }
@@ -147,9 +143,22 @@ For conversation scripts (used in Simulator), provide a JSON file with this sche
147
143
  ]
148
144
  }
149
145
  ```
150
-
151
146
  - **Fields**: Include user messages, reference/references replies, metadata for comparison, guardrail flags, and request payloads.
152
147
 
148
+ In the `.env` you need to add the LLM providers credentials that will be used for the evaluation process.
149
+ ```
150
+ OPENAI_API_KEY=
151
+ IONOS_API_KEY=
152
+ ANTHROPIC_API_KEY=
153
+ MISTRAL_API_KEY=
154
+
155
+ # For IONOS, you must include the base URL and the model ID.
156
+ IONOS_BASE_URL="https://inference.de-txl.ionos.com"
157
+ IONOS_MODEL_ID="0b6c4a15-bb8d-4092-82b0-f357b77c59fd"
158
+
159
+ WORKFLOW_CONFIG_PATH="../../src/data/workflow_config_1.yaml"
160
+ ```
161
+
153
162
  ## Usage Example
154
163
 
155
164
  To run an evaluation:
@@ -159,14 +168,14 @@ To run an evaluation:
159
168
 
160
169
  ```python
161
170
  if __name__ == "__main__":
162
- from levelapp.workflow.schemas import WorkflowConfig
171
+ from levelapp.workflow import WorkflowConfig
163
172
  from levelapp.core.session import EvaluationSession
164
173
 
165
174
  # Load configuration from YAML
166
175
  config = WorkflowConfig.load(path="../data/workflow_config.yaml")
167
176
 
168
- # Run evaluation session
169
- with EvaluationSession(session_name="sim-test", workflow_config=config) as session:
177
+ # Run evaluation session (You can enable/disable the monitoring aspect)
178
+ with EvaluationSession(session_name="test-session-1", workflow_config=config, enable_monitoring=False) as session:
170
179
  session.run()
171
180
  results = session.workflow.collect_results()
172
181
  print("Results:", results)
@@ -175,6 +184,58 @@ if __name__ == "__main__":
175
184
  print(f"session stats:\n{stats}")
176
185
  ```
177
186
 
187
+ Alternatively, if you want to pass the configuration and reference data from in-memory variables,
188
+ you can manually load the data like the following:
189
+ ```python
190
+ if __name__ == "__main__":
191
+ from levelapp.workflow import WorkflowConfig
192
+ from levelapp.core.session import EvaluationSession
193
+
194
+
195
+ config_dict = {
196
+ "process": {"project_name": "test-project", "workflow_type": "SIMULATOR", "evaluation_params": {"attempts": 2}},
197
+ "evaluation": {"evaluators": ["JUDGE", "REFERENCE"], "providers": ["openai", "ionos"], "metrics_map": {"field_1": "EXACT"}},
198
+ "reference_data": {"path": "", "data": {}},
199
+ "endpoint": {"base_url": "http://127.0.0.1:8000", "api_key": "key", "model_id": "model"},
200
+ "repository": {"type": "FIRESTORE", "source": "IN_MEMORY"},
201
+ }
202
+
203
+ content = {
204
+ "scripts": [
205
+ {
206
+ "interactions": [
207
+ {
208
+ "user_message": "Hello!",
209
+ "reference_reply": "Hello, how can I help you!"
210
+ },
211
+ {
212
+ "user_message": "I need an apartment",
213
+ "reference_reply": "sorry, but I can only assist you with booking medical appointments."
214
+ },
215
+ ]
216
+ },
217
+ ]
218
+ }
219
+
220
+ # Load configuration from a dict variable
221
+ config = WorkflowConfig.from_dict(content=config_dict)
222
+
223
+ # Load reference data from dict variable
224
+ config.set_reference_data(content=content)
225
+
226
+ evaluation_session = EvaluationSession(session_name="test-session-2", workflow_config=config)
227
+
228
+ with evaluation_session as session:
229
+ session.run()
230
+ results = session.workflow.collect_results()
231
+ print("Results:", results)
232
+
233
+ stats = session.get_stats()
234
+ print(f"session stats:\n{stats}")
235
+
236
+ ```
237
+
238
+
178
239
  - This loads the config, runs the specified workflow (e.g., Simulator), collects results, and prints stats.
179
240
 
180
241
  For more examples, see the `examples/` directory.