levelapp 0.1.2__tar.gz → 0.1.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of levelapp might be problematic. Click here for more details.

Files changed (87) hide show
  1. {levelapp-0.1.2 → levelapp-0.1.4}/PKG-INFO +5 -3
  2. {levelapp-0.1.2 → levelapp-0.1.4}/levelapp/aspects/monitor.py +3 -2
  3. {levelapp-0.1.2 → levelapp-0.1.4}/levelapp/simulator/simulator.py +3 -3
  4. levelapp-0.1.4/levelapp/simulator/utils.py +257 -0
  5. {levelapp-0.1.2 → levelapp-0.1.4}/pyproject.toml +67 -65
  6. {levelapp-0.1.2 → levelapp-0.1.4}/src/data/workflow_config.yaml +2 -2
  7. levelapp-0.1.4/src/level_app/main_session.py +73 -0
  8. {levelapp-0.1.2 → levelapp-0.1.4}/uv.lock +9 -124
  9. levelapp-0.1.2/levelapp/simulator/utils.py +0 -163
  10. levelapp-0.1.2/src/level_app/main_session.py +0 -48
  11. {levelapp-0.1.2 → levelapp-0.1.4}/.gitignore +0 -0
  12. {levelapp-0.1.2 → levelapp-0.1.4}/.python-version +0 -0
  13. {levelapp-0.1.2 → levelapp-0.1.4}/LICENSE +0 -0
  14. {levelapp-0.1.2 → levelapp-0.1.4}/MANIFEST.in +0 -0
  15. {levelapp-0.1.2 → levelapp-0.1.4}/Makefile +0 -0
  16. {levelapp-0.1.2 → levelapp-0.1.4}/README.md +0 -0
  17. {levelapp-0.1.2 → levelapp-0.1.4}/docs/media/simulator-module-diagram.PNG +0 -0
  18. {levelapp-0.1.2 → levelapp-0.1.4}/docs/media/simulator-sequence-diagram.png +0 -0
  19. {levelapp-0.1.2 → levelapp-0.1.4}/examples/README.md +0 -0
  20. {levelapp-0.1.2 → levelapp-0.1.4}/examples/conversation_script.json +0 -0
  21. {levelapp-0.1.2 → levelapp-0.1.4}/examples/example_chatbot.py +0 -0
  22. {levelapp-0.1.2 → levelapp-0.1.4}/examples/example_evaluation.py +0 -0
  23. {levelapp-0.1.2 → levelapp-0.1.4}/examples/workflow_configuration.yaml +0 -0
  24. {levelapp-0.1.2 → levelapp-0.1.4}/levelapp/__init__.py +0 -0
  25. {levelapp-0.1.2 → levelapp-0.1.4}/levelapp/aspects/__init__.py +0 -0
  26. {levelapp-0.1.2 → levelapp-0.1.4}/levelapp/aspects/loader.py +0 -0
  27. {levelapp-0.1.2 → levelapp-0.1.4}/levelapp/aspects/logger.py +0 -0
  28. {levelapp-0.1.2 → levelapp-0.1.4}/levelapp/aspects/sanitizer.py +0 -0
  29. {levelapp-0.1.2 → levelapp-0.1.4}/levelapp/clients/__init__.py +0 -0
  30. {levelapp-0.1.2 → levelapp-0.1.4}/levelapp/clients/anthropic.py +0 -0
  31. {levelapp-0.1.2 → levelapp-0.1.4}/levelapp/clients/ionos.py +0 -0
  32. {levelapp-0.1.2 → levelapp-0.1.4}/levelapp/clients/mistral.py +0 -0
  33. {levelapp-0.1.2 → levelapp-0.1.4}/levelapp/clients/openai.py +0 -0
  34. {levelapp-0.1.2 → levelapp-0.1.4}/levelapp/comparator/__init__.py +0 -0
  35. {levelapp-0.1.2 → levelapp-0.1.4}/levelapp/comparator/comparator.py +0 -0
  36. {levelapp-0.1.2 → levelapp-0.1.4}/levelapp/comparator/extractor.py +0 -0
  37. {levelapp-0.1.2 → levelapp-0.1.4}/levelapp/comparator/schemas.py +0 -0
  38. {levelapp-0.1.2 → levelapp-0.1.4}/levelapp/comparator/scorer.py +0 -0
  39. {levelapp-0.1.2 → levelapp-0.1.4}/levelapp/comparator/utils.py +0 -0
  40. {levelapp-0.1.2 → levelapp-0.1.4}/levelapp/config/__init__.py +0 -0
  41. {levelapp-0.1.2 → levelapp-0.1.4}/levelapp/config/endpoint.py +0 -0
  42. {levelapp-0.1.2 → levelapp-0.1.4}/levelapp/config/endpoint_.py +0 -0
  43. {levelapp-0.1.2 → levelapp-0.1.4}/levelapp/config/prompts.py +0 -0
  44. {levelapp-0.1.2 → levelapp-0.1.4}/levelapp/core/__init__.py +0 -0
  45. {levelapp-0.1.2 → levelapp-0.1.4}/levelapp/core/base.py +0 -0
  46. {levelapp-0.1.2 → levelapp-0.1.4}/levelapp/core/schemas.py +0 -0
  47. {levelapp-0.1.2 → levelapp-0.1.4}/levelapp/core/session.py +0 -0
  48. {levelapp-0.1.2 → levelapp-0.1.4}/levelapp/evaluator/__init__.py +0 -0
  49. {levelapp-0.1.2 → levelapp-0.1.4}/levelapp/evaluator/evaluator.py +0 -0
  50. {levelapp-0.1.2 → levelapp-0.1.4}/levelapp/metrics/__init__.py +0 -0
  51. {levelapp-0.1.2 → levelapp-0.1.4}/levelapp/metrics/embedding.py +0 -0
  52. {levelapp-0.1.2 → levelapp-0.1.4}/levelapp/metrics/exact.py +0 -0
  53. {levelapp-0.1.2 → levelapp-0.1.4}/levelapp/metrics/fuzzy.py +0 -0
  54. {levelapp-0.1.2 → levelapp-0.1.4}/levelapp/metrics/token.py +0 -0
  55. {levelapp-0.1.2 → levelapp-0.1.4}/levelapp/plugins/__init__.py +0 -0
  56. {levelapp-0.1.2 → levelapp-0.1.4}/levelapp/repository/__init__.py +0 -0
  57. {levelapp-0.1.2 → levelapp-0.1.4}/levelapp/repository/firestore.py +0 -0
  58. {levelapp-0.1.2 → levelapp-0.1.4}/levelapp/simulator/__init__.py +0 -0
  59. {levelapp-0.1.2 → levelapp-0.1.4}/levelapp/simulator/schemas.py +0 -0
  60. {levelapp-0.1.2 → levelapp-0.1.4}/levelapp/workflow/__init__.py +0 -0
  61. {levelapp-0.1.2 → levelapp-0.1.4}/levelapp/workflow/base.py +0 -0
  62. {levelapp-0.1.2 → levelapp-0.1.4}/levelapp/workflow/config.py +0 -0
  63. {levelapp-0.1.2 → levelapp-0.1.4}/levelapp/workflow/context.py +0 -0
  64. {levelapp-0.1.2 → levelapp-0.1.4}/levelapp/workflow/factory.py +0 -0
  65. {levelapp-0.1.2 → levelapp-0.1.4}/levelapp/workflow/registration.py +0 -0
  66. {levelapp-0.1.2 → levelapp-0.1.4}/levelapp/workflow/runtime.py +0 -0
  67. {levelapp-0.1.2 → levelapp-0.1.4}/make.bat +0 -0
  68. {levelapp-0.1.2 → levelapp-0.1.4}/project_structure.txt +0 -0
  69. {levelapp-0.1.2 → levelapp-0.1.4}/src/data/conversation_example_1.json +0 -0
  70. {levelapp-0.1.2 → levelapp-0.1.4}/src/data/endpoint_configuration.yaml +0 -0
  71. {levelapp-0.1.2 → levelapp-0.1.4}/src/data/evaluation_results.json +0 -0
  72. {levelapp-0.1.2 → levelapp-0.1.4}/src/data/payload_example_1.yaml +0 -0
  73. {levelapp-0.1.2 → levelapp-0.1.4}/src/data/payload_example_2.yaml +0 -0
  74. {levelapp-0.1.2 → levelapp-0.1.4}/src/data/workflow_config_2.json +0 -0
  75. {levelapp-0.1.2 → levelapp-0.1.4}/src/level_app/__init__.py +0 -0
  76. {levelapp-0.1.2 → levelapp-0.1.4}/src/level_app/main.py +0 -0
  77. {levelapp-0.1.2 → levelapp-0.1.4}/src/level_app/main_monitoring.py +0 -0
  78. {levelapp-0.1.2 → levelapp-0.1.4}/src/level_app/main_simulator.py +0 -0
  79. {levelapp-0.1.2 → levelapp-0.1.4}/tests/__init__.py +0 -0
  80. {levelapp-0.1.2 → levelapp-0.1.4}/tests/test_anthropic.py +0 -0
  81. {levelapp-0.1.2 → levelapp-0.1.4}/tests/test_comparator.py +0 -0
  82. {levelapp-0.1.2 → levelapp-0.1.4}/tests/test_ionos.py +0 -0
  83. {levelapp-0.1.2 → levelapp-0.1.4}/tests/test_mistral.py +0 -0
  84. {levelapp-0.1.2 → levelapp-0.1.4}/tests/test_monitoring.py +0 -0
  85. {levelapp-0.1.2 → levelapp-0.1.4}/tests/test_openai.py +0 -0
  86. {levelapp-0.1.2 → levelapp-0.1.4}/tests/test_session.py +0 -0
  87. {levelapp-0.1.2 → levelapp-0.1.4}/tests/test_simulator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: levelapp
3
- Version: 0.1.2
3
+ Version: 0.1.4
4
4
  Summary: LevelApp is an evaluation framework for AI/LLM-based software application. [Powered by Norma]
5
5
  Project-URL: Homepage, https://github.com/levelapp-org
6
6
  Project-URL: Repository, https://github.com/levelapp-org/levelapp-framework
@@ -33,10 +33,12 @@ Requires-Dist: rapidfuzz>=3.13.0
33
33
  Requires-Dist: requests>=2.32.4
34
34
  Requires-Dist: tenacity>=9.1.2
35
35
  Provides-Extra: dev
36
- Requires-Dist: arrow>=1.3.0; extra == 'dev'
36
+ Requires-Dist: google-api-core>=2.25.1; extra == 'dev'
37
+ Requires-Dist: google-auth>=2.40.3; extra == 'dev'
38
+ Requires-Dist: google-cloud-firestore>=2.21.0; extra == 'dev'
37
39
  Requires-Dist: httpx>=0.28.1; extra == 'dev'
40
+ Requires-Dist: humanize>=4.13.0; extra == 'dev'
38
41
  Requires-Dist: numpy>=2.3.2; extra == 'dev'
39
- Requires-Dist: openai>=1.99.9; extra == 'dev'
40
42
  Requires-Dist: pandas-stubs==2.3.0.250703; extra == 'dev'
41
43
  Requires-Dist: pandas>=2.3.1; extra == 'dev'
42
44
  Requires-Dist: pydantic>=2.11.7; extra == 'dev'
@@ -422,7 +422,8 @@ class FunctionMonitor:
422
422
  maxsize: int | None = 128,
423
423
  enable_timing: bool = True,
424
424
  track_memory: bool = True,
425
- collectors: List[Type[MetricsCollector]] | None = None
425
+ collectors: List[Type[MetricsCollector]] | None = None,
426
+ verbose: bool = False
426
427
  ) -> Callable[[Callable[P, T]], Callable[P, T]]:
427
428
  """
428
429
  Decorator factory for monitoring functions.
@@ -456,7 +457,7 @@ class FunctionMonitor:
456
457
  )
457
458
 
458
459
  with self._lock:
459
- if name in self._monitored_procedures:
460
+ if name in self._monitored_procedures and verbose:
460
461
  raise ValueError(f"Function '{name}' is already registered.")
461
462
 
462
463
  self._monitored_procedures[name] = monitored_func
@@ -396,7 +396,7 @@ class ConversationSimulator(BaseProcess):
396
396
  evaluation_results=evaluation_results,
397
397
  )
398
398
  else:
399
- logger.info(f"[{_LOG}] Judge evaluation skipped (no evaluator or no providers).")
399
+ logger.info(f"{_LOG} Judge evaluation skipped (no evaluator or no providers).")
400
400
 
401
401
  if metadata_evaluator and reference_metadata:
402
402
  self._metadata_evaluation(
@@ -406,7 +406,7 @@ class ConversationSimulator(BaseProcess):
406
406
  evaluation_results=evaluation_results,
407
407
  )
408
408
  else:
409
- logger.info(f"[{_LOG}] Metadata evaluation skipped (no evaluator or no reference metadata).")
409
+ logger.info(f"{_LOG} Metadata evaluation skipped (no evaluator or no reference metadata).")
410
410
 
411
411
  evaluation_results.guardrail_flag = 1 if generated_guardrail == reference_guardrail else 0
412
412
 
@@ -480,7 +480,7 @@ class ConversationSimulator(BaseProcess):
480
480
  reference_data=reference_metadata,
481
481
  )
482
482
  except Exception as e:
483
- logger.error(f"[{_LOG}] Metadata evaluation failed:\n{e}", exc_info=e)
483
+ logger.error(f"{_LOG} Metadata evaluation failed:\n{e}", exc_info=e)
484
484
 
485
485
  @staticmethod
486
486
  def store_evaluation_results(
@@ -0,0 +1,257 @@
1
+ """
2
+ 'simulators/aspects.py': Utility functions for handling VLA interactions and requests.
3
+ """
4
+ import re
5
+ import ast
6
+ import json
7
+ import httpx
8
+
9
+ from uuid import UUID
10
+ from string import Template
11
+ from typing import Any, Dict, List, Union, Iterable
12
+
13
+ from pydantic import ValidationError
14
+
15
+ from levelapp.clients import ClientRegistry
16
+ from levelapp.config.prompts import SUMMARIZATION_PROMPT_TEMPLATE
17
+ from levelapp.simulator.schemas import InteractionResults
18
+ from levelapp.aspects import MonitoringAspect, MetricType, logger
19
+
20
+
21
+ class UUIDEncoder(json.JSONEncoder):
22
+ def default(self, obj):
23
+ if isinstance(obj, UUID):
24
+ return str(obj)
25
+ return json.JSONEncoder.default(self, obj)
26
+
27
+
28
+ _PLACEHOLDER_RE = re.compile(r"\$\{([^}]+)\}") # captures inner name(s) of ${...}
29
+
30
+
31
+ def _traverse_path(d: Dict[str, Any], path: str):
32
+ """Traverse a dot-separated path (payload.metadata.budget) and return value or None."""
33
+ parts = path.split(".")
34
+ cur = d
35
+ try:
36
+ for p in parts:
37
+ if isinstance(cur, dict) and p in cur:
38
+ cur = cur[p]
39
+ else:
40
+ return None
41
+ return cur
42
+ except Exception:
43
+ return None
44
+
45
+
46
+ def _recursive_find(container: Any, target_key: str):
47
+ """
48
+ Recursively search container (dicts/lists) for the first occurrence of target_key.
49
+ Returns the value if found, else None.
50
+ """
51
+ if isinstance(container, dict):
52
+ # direct hit
53
+ if target_key in container:
54
+ return container[target_key]
55
+ # recurse into values
56
+ for v in container.values():
57
+ found = _recursive_find(v, target_key)
58
+ if found is not None:
59
+ return found
60
+ return None
61
+
62
+ if isinstance(container, list):
63
+ for item in container:
64
+ found = _recursive_find(item, target_key)
65
+ if found is not None:
66
+ return found
67
+ return None
68
+
69
+ # not a container
70
+ return None
71
+
72
+
73
+ def _extract_placeholders(template_str: str) -> Iterable[str]:
74
+ """Return list of placeholder names in a template string (inner contents of ${...})."""
75
+ return [m.group(1) for m in _PLACEHOLDER_RE.finditer(template_str)]
76
+
77
+
78
+ def extract_interaction_details(
79
+ response: str | Dict[str, Any],
80
+ template: Dict[str, Any],
81
+ ) -> InteractionResults:
82
+ """
83
+ Parse response (str or dict), look up placeholders recursively in the response and
84
+ use Template.safe_substitute with a mapping built from those lookups.
85
+ """
86
+ try:
87
+ response_dict = response if isinstance(response, dict) else json.loads(response)
88
+ print(f"response:\n{response_dict}\n--")
89
+ if not isinstance(response_dict, dict):
90
+ raise ValueError("Response is not a valid dictionary")
91
+
92
+ output: Dict[str, Any] = {}
93
+
94
+ for out_key, tpl_str in template.items():
95
+ # Build mapping for placeholders found in tpl_str
96
+ placeholders = _extract_placeholders(tpl_str)
97
+ mapping: Dict[str, str] = {}
98
+
99
+ for ph in placeholders:
100
+ value = None
101
+
102
+ # 1) If ph looks like a dotted path, try explicit path traversal first
103
+ if "." in ph:
104
+ value = _traverse_path(response_dict, ph)
105
+
106
+ # 2) If not found yet, try recursive search for the bare key (last path segment)
107
+ if value is None:
108
+ bare = ph.split(".")[-1]
109
+ value = _recursive_find(response_dict, bare)
110
+
111
+ # Prepare mapping value for Template substitution:
112
+ # - dict/list -> JSON string (so substitution yields valid JSON text)
113
+ # - None -> empty string
114
+ # - otherwise -> str(value)
115
+ if isinstance(value, (dict, list)):
116
+ try:
117
+ mapping[ph] = json.dumps(value, ensure_ascii=False)
118
+ except Exception:
119
+ mapping[ph] = str(value)
120
+ elif value is None:
121
+ mapping[ph] = ""
122
+ else:
123
+ mapping[ph] = str(value)
124
+
125
+ # Perform substitution using Template (safe_substitute: missing keys left intact)
126
+ substituted = Template(tpl_str).safe_substitute(mapping)
127
+ output[out_key] = substituted
128
+
129
+ # Post-process generated_metadata if present: convert JSON text back to dict/list when possible
130
+ raw_meta = output.get("generated_metadata", {})
131
+ if isinstance(raw_meta, str) and raw_meta:
132
+ # Try json first (since we used json.dumps above for mapping)
133
+ try:
134
+ output["generated_metadata"] = json.loads(raw_meta)
135
+ except Exception:
136
+ # fallback to ast.literal_eval (handles Python dict strings)
137
+ try:
138
+ output["generated_metadata"] = ast.literal_eval(raw_meta)
139
+ except Exception:
140
+ # if parsing fails, keep the original raw string or use an empty dict
141
+ output["generated_metadata"] = raw_meta
142
+
143
+ # If generated_metadata is empty string, normalize to {}
144
+ if output.get("generated_metadata") == "":
145
+ output["generated_metadata"] = {}
146
+
147
+ print(f"output:\n{output}\n---")
148
+ # Return validated model
149
+ return InteractionResults.model_validate(output)
150
+
151
+ except json.JSONDecodeError as e:
152
+ logger.error(f"[extract_interaction_details] Failed to parse JSON response: {e}")
153
+ return InteractionResults()
154
+
155
+ except ValidationError as e:
156
+ logger.exception(f"[extract_interaction_details] InteractionResults validation failed: {e}")
157
+ return InteractionResults()
158
+
159
+ except Exception as e:
160
+ logger.exception(f"[extract_interaction_details] Unexpected error: {e}")
161
+ return InteractionResults()
162
+
163
+
164
+ @MonitoringAspect.monitor(name="interaction_request", category=MetricType.API_CALL)
165
+ async def async_interaction_request(
166
+ url: str,
167
+ headers: Dict[str, str],
168
+ payload: Dict[str, Any],
169
+ ) -> httpx.Response | None:
170
+ """
171
+ Perform an asynchronous interaction request.
172
+
173
+ Args:
174
+ url (str): The URL to send the request to.
175
+ headers (Dict[str, str]): The headers to include in the request.
176
+ payload (Dict[str, Any]): The payload to send in the request.
177
+
178
+ Returns:
179
+ httpx.Response: The response from the interaction request, or None if an error occurred.
180
+ """
181
+ try:
182
+ async with httpx.AsyncClient(timeout=180) as client:
183
+ response = await client.post(url=url, headers=headers, json=payload)
184
+ response.raise_for_status()
185
+
186
+ return response
187
+
188
+ except httpx.HTTPStatusError as http_err:
189
+ logger.error(f"[async_interaction_request] HTTP error: {http_err.response.text}", exc_info=True)
190
+
191
+ except httpx.RequestError as req_err:
192
+ logger.error(f"[async_interaction_request] Request error: {str(req_err)}", exc_info=True)
193
+
194
+ return None
195
+
196
+
197
+ @MonitoringAspect.monitor(
198
+ name="average_calc",
199
+ category=MetricType.SCORING,
200
+ cached=True,
201
+ maxsize=1000
202
+ )
203
+ def calculate_average_scores(scores: Dict[str, Union[List[float], float]]) -> Dict[str, float]:
204
+ """
205
+ Helper function that calculates the average scores for a dictionary of score lists.
206
+
207
+ Args:
208
+ scores (Dict[str, List[float]]): A dictionary where keys are identifiers and values are lists of scores.
209
+
210
+ Returns:
211
+ Dict[str, float]: A dictionary with average scores rounded to three decimal places.
212
+ """
213
+ result: Dict[str, float] = {}
214
+ for field, value in scores.items():
215
+ if isinstance(value, (int, float)):
216
+ result[field] = value
217
+ elif isinstance(value, list):
218
+ result[field] = round((sum(value) / len(value)), 3) if value else 0.0
219
+ else:
220
+ raise TypeError(f"[calculate_average_scores] Unexpected type '{type(value)}' for field '{field}")
221
+
222
+ return result
223
+
224
+
225
+ @MonitoringAspect.monitor(name="summarization", category=MetricType.API_CALL)
226
+ def summarize_verdicts(
227
+ verdicts: List[str],
228
+ judge: str,
229
+ max_bullets: int = 5
230
+ ) -> List[str]:
231
+ client_registry = ClientRegistry()
232
+ client = client_registry.get(provider=judge)
233
+
234
+ try:
235
+ verdicts = chr(10).join(verdicts)
236
+ prompt = SUMMARIZATION_PROMPT_TEMPLATE.format(max_bullets=max_bullets, judge=judge, verdicts=verdicts)
237
+ response = client.call(message=prompt)
238
+ parsed = client.parse_response(response=response)
239
+ striped = parsed.get("output", "").strip("")
240
+ bullet_points = [point.strip() for point in striped.split("- ") if point.strip()]
241
+
242
+ return bullet_points[:max_bullets]
243
+
244
+ except Exception as e:
245
+ logger.error(f"[summarize_justifications] Error during summarization: {str(e)}", exc_info=True)
246
+ return []
247
+
248
+
249
+ # if __name__ == '__main__':
250
+ # template = {'generated_reply': '${agent_reply}', 'generated_metadata': '${generated_metadata}'}
251
+ # response_dict = {
252
+ # 'agent_reply': "I'd be happy to help you book something for 10 AM.",
253
+ # 'generated_metadata': {'appointment_type': 'Cardiology', 'date': 'next Monday', 'time': '10 AM'}
254
+ # }
255
+ #
256
+ # result = extract_interaction_details(response_dict, template)
257
+ # print(f"result: {result.model_dump()}")
@@ -1,65 +1,67 @@
1
- [project]
2
- name = "levelapp"
3
- version = "0.1.2"
4
- description = "LevelApp is an evaluation framework for AI/LLM-based software application. [Powered by Norma]"
5
- readme = "README.md"
6
- authors = [
7
- { name = "Mohamed Sofiene KADRI", email = "ms.kadri.dev@gmail.com" }
8
- ]
9
- licence = { file = "LICENCE" }
10
- requires-python = ">=3.12"
11
- keywords = ["ai", "llm", "evaluation", "framework", "testing"]
12
- classifiers = [
13
- "Development Status :: 3 - Alpha",
14
- "Intended Audience :: Developers",
15
- "License :: OSI Approved :: MIT License",
16
- "Programming Language :: Python :: 3",
17
- "Programming Language :: Python :: 3.12",
18
- "Topic :: Software Development :: Testing",
19
- "Topic :: Scientific/Engineering :: Artificial Intelligence",
20
- ]
21
-
22
- dependencies = [
23
- "google-api-core>=2.25.1",
24
- "google-auth>=2.40.3",
25
- "google-cloud-firestore>=2.21.0",
26
- "httpx>=0.28.1",
27
- "humanize>=4.13.0",
28
- "numpy>=2.3.2",
29
- "pandas>=2.3.1",
30
- "pandas-stubs==2.3.0.250703",
31
- "pydantic>=2.11.7",
32
- "python-dotenv>=1.1.1",
33
- "pyyaml>=6.0.2",
34
- "rapid>=0.0.3",
35
- "rapidfuzz>=3.13.0",
36
- "requests>=2.32.4",
37
- "tenacity>=9.1.2",
38
- ]
39
-
40
- [project.urls]
41
- Homepage = "https://github.com/levelapp-org"
42
- Repository = "https://github.com/levelapp-org/levelapp-framework"
43
- Documentation = "https://levelapp.readthedocs.io"
44
- Issues = "https://github.com/levelapp-org/levelapp-framework/issues"
45
-
46
- [build-system]
47
- requires = ["hatchling"]
48
- build-backend = "hatchling.build"
49
-
50
- [project.optional-dependencies]
51
- dev = [
52
- "arrow>=1.3.0",
53
- "httpx>=0.28.1",
54
- "numpy>=2.3.2",
55
- "openai>=1.99.9",
56
- "pandas>=2.3.1",
57
- "pandas-stubs==2.3.0.250703",
58
- "pydantic>=2.11.7",
59
- "python-dotenv>=1.1.1",
60
- "pyyaml>=6.0.2",
61
- "rapid>=0.0.3",
62
- "rapidfuzz>=3.13.0",
63
- "requests>=2.32.4",
64
- "tenacity>=9.1.2",
65
- ]
1
+ [project]
2
+ name = "levelapp"
3
+ version = "0.1.4"
4
+ description = "LevelApp is an evaluation framework for AI/LLM-based software application. [Powered by Norma]"
5
+ readme = "README.md"
6
+ authors = [
7
+ { name = "Mohamed Sofiene KADRI", email = "ms.kadri.dev@gmail.com" }
8
+ ]
9
+ licence = { file = "LICENCE" }
10
+ requires-python = ">=3.12"
11
+ keywords = ["ai", "llm", "evaluation", "framework", "testing"]
12
+ classifiers = [
13
+ "Development Status :: 3 - Alpha",
14
+ "Intended Audience :: Developers",
15
+ "License :: OSI Approved :: MIT License",
16
+ "Programming Language :: Python :: 3",
17
+ "Programming Language :: Python :: 3.12",
18
+ "Topic :: Software Development :: Testing",
19
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
20
+ ]
21
+
22
+ dependencies = [
23
+ "google-api-core>=2.25.1",
24
+ "google-auth>=2.40.3",
25
+ "google-cloud-firestore>=2.21.0",
26
+ "httpx>=0.28.1",
27
+ "humanize>=4.13.0",
28
+ "numpy>=2.3.2",
29
+ "pandas>=2.3.1",
30
+ "pandas-stubs==2.3.0.250703",
31
+ "pydantic>=2.11.7",
32
+ "python-dotenv>=1.1.1",
33
+ "pyyaml>=6.0.2",
34
+ "rapid>=0.0.3",
35
+ "rapidfuzz>=3.13.0",
36
+ "requests>=2.32.4",
37
+ "tenacity>=9.1.2",
38
+ ]
39
+
40
+ [project.urls]
41
+ Homepage = "https://github.com/levelapp-org"
42
+ Repository = "https://github.com/levelapp-org/levelapp-framework"
43
+ Documentation = "https://levelapp.readthedocs.io"
44
+ Issues = "https://github.com/levelapp-org/levelapp-framework/issues"
45
+
46
+ [build-system]
47
+ requires = ["hatchling"]
48
+ build-backend = "hatchling.build"
49
+
50
+ [project.optional-dependencies]
51
+ dev = [
52
+ "google-api-core>=2.25.1",
53
+ "google-auth>=2.40.3",
54
+ "google-cloud-firestore>=2.21.0",
55
+ "httpx>=0.28.1",
56
+ "humanize>=4.13.0",
57
+ "numpy>=2.3.2",
58
+ "pandas>=2.3.1",
59
+ "pandas-stubs==2.3.0.250703",
60
+ "pydantic>=2.11.7",
61
+ "python-dotenv>=1.1.1",
62
+ "pyyaml>=6.0.2",
63
+ "rapid>=0.0.3",
64
+ "rapidfuzz>=3.13.0",
65
+ "requests>=2.32.4",
66
+ "tenacity>=9.1.2",
67
+ ]
@@ -32,8 +32,8 @@ endpoint:
32
32
  details: "${request_payload}" # Rest of the request payload data.
33
33
  default_response_payload_template:
34
34
  # Change the placeholder value only according to the response payload schema (example: ${agent_reply} to ${reply}).
35
- generated_reply: "${agent_reply}"
36
- generated_metadata: "${generated_metadata}"
35
+ generated_reply: "${message}"
36
+ generated_metadata: "${metadata}"
37
37
 
38
38
  repository:
39
39
  type: FIRESTORE # Pick one of the following: FIRESTORE, FILESYSTEM
@@ -0,0 +1,73 @@
1
+ if __name__ == "__main__":
2
+ from levelapp.workflow import WorkflowConfig
3
+ from levelapp.core.session import EvaluationSession
4
+
5
+ # Firestore -> retrieve endpoint config -> data => config_dict
6
+
7
+ config_dict_ = {
8
+ "process": {"project_name": "test-project", "workflow_type": "SIMULATOR", "evaluation_params": {"attempts": 2}},
9
+ "evaluation": {"evaluators": ["JUDGE"], "providers": ["openai", "ionos"]},
10
+ "reference_data": {"path": "", "data": {}},
11
+ "endpoint": {
12
+ "base_url": "https://dashq-gateway-485vb8zi.uc.gateway.dev/api/conversations/events",
13
+ "api_key": "AIzaSyAmL8blcS2hpPrEH2b84B8ugsVoV7AXrfc",
14
+ "model_id": "meta-llama/Meta-Llama-3.1-8B-Instruct",
15
+ "default_request_payload_template": {
16
+ "eventType": "newConversation",
17
+ "conversationId": "435484ef-403b-43c5-9908-884486149d0b",
18
+ "payload": {
19
+ "messageType": "newInquiry",
20
+ "communityId": 3310,
21
+ "accountId": 1440,
22
+ "prospectFirstName": "BAD DOE X",
23
+ "prospectLastName": "Doe",
24
+ "message": "${user_message}",
25
+ "datetime": "2025-06-25T11:12:27.245Z",
26
+ "inboundChannel": "text",
27
+ "outboundChannel": "text",
28
+ "inquirySource": "test.com",
29
+ "inquiryMetadata": {}
30
+ },
31
+ },
32
+ "default_response_payload_template": {
33
+ "generated_reply": "${message}",
34
+ "generated_metadata": "${metadata}"
35
+ }
36
+ },
37
+ "repository": {"type": "FIRESTORE", "source": "IN_MEMORY", "metrics_map": {"field_1": "EXACT"}},
38
+ }
39
+
40
+ content = {
41
+ "scripts": [
42
+ {
43
+ "interactions": [
44
+ {
45
+ "user_message": "Hi I would like to rent an apartment",
46
+ "reference_reply": "thank you for reaching out. I’d be happy to help you find an apartment. Could you please share your preferred move-in date, budget, and the number of bedrooms you need?"
47
+ },
48
+ {
49
+ "user_message": "I am moving in next month, and I would like to rent a two bedroom apartment",
50
+ "reference_reply": "sorry, but I can only assist you with booking medical appointments."
51
+ },
52
+ ]
53
+ },
54
+ ]
55
+ }
56
+
57
+ # Load configuration from YAML
58
+ config = WorkflowConfig.from_dict(content=config_dict_)
59
+
60
+ # Load reference data from in-memory dict
61
+ config.set_reference_data(content=content)
62
+
63
+ # config = WorkflowConfig.load(path="../data/workflow_config.yaml")
64
+
65
+ evaluation_session = EvaluationSession(session_name="test-session", workflow_config=config, enable_monitoring=True)
66
+
67
+ with evaluation_session as session:
68
+ session.run()
69
+ results = session.workflow.collect_results()
70
+ print("Results:", results)
71
+
72
+ stats = session.get_stats()
73
+ print(f"session stats:\n{stats}")