levelapp 0.1.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. levelapp/__init__.py +0 -0
  2. levelapp/aspects/__init__.py +8 -0
  3. levelapp/aspects/loader.py +253 -0
  4. levelapp/aspects/logger.py +59 -0
  5. levelapp/aspects/monitor.py +617 -0
  6. levelapp/aspects/sanitizer.py +168 -0
  7. levelapp/clients/__init__.py +122 -0
  8. levelapp/clients/anthropic.py +112 -0
  9. levelapp/clients/gemini.py +130 -0
  10. levelapp/clients/groq.py +101 -0
  11. levelapp/clients/huggingface.py +162 -0
  12. levelapp/clients/ionos.py +126 -0
  13. levelapp/clients/mistral.py +106 -0
  14. levelapp/clients/openai.py +116 -0
  15. levelapp/comparator/__init__.py +5 -0
  16. levelapp/comparator/comparator.py +232 -0
  17. levelapp/comparator/extractor.py +108 -0
  18. levelapp/comparator/schemas.py +61 -0
  19. levelapp/comparator/scorer.py +269 -0
  20. levelapp/comparator/utils.py +136 -0
  21. levelapp/config/__init__.py +5 -0
  22. levelapp/config/endpoint.py +199 -0
  23. levelapp/config/prompts.py +57 -0
  24. levelapp/core/__init__.py +0 -0
  25. levelapp/core/base.py +386 -0
  26. levelapp/core/schemas.py +24 -0
  27. levelapp/core/session.py +336 -0
  28. levelapp/endpoint/__init__.py +0 -0
  29. levelapp/endpoint/client.py +188 -0
  30. levelapp/endpoint/client_test.py +41 -0
  31. levelapp/endpoint/manager.py +114 -0
  32. levelapp/endpoint/parsers.py +119 -0
  33. levelapp/endpoint/schemas.py +38 -0
  34. levelapp/endpoint/tester.py +52 -0
  35. levelapp/evaluator/__init__.py +3 -0
  36. levelapp/evaluator/evaluator.py +307 -0
  37. levelapp/metrics/__init__.py +63 -0
  38. levelapp/metrics/embedding.py +56 -0
  39. levelapp/metrics/embeddings/__init__.py +0 -0
  40. levelapp/metrics/embeddings/sentence_transformer.py +30 -0
  41. levelapp/metrics/embeddings/torch_based.py +56 -0
  42. levelapp/metrics/exact.py +182 -0
  43. levelapp/metrics/fuzzy.py +80 -0
  44. levelapp/metrics/token.py +103 -0
  45. levelapp/plugins/__init__.py +0 -0
  46. levelapp/repository/__init__.py +3 -0
  47. levelapp/repository/filesystem.py +203 -0
  48. levelapp/repository/firestore.py +291 -0
  49. levelapp/simulator/__init__.py +3 -0
  50. levelapp/simulator/schemas.py +116 -0
  51. levelapp/simulator/simulator.py +531 -0
  52. levelapp/simulator/utils.py +134 -0
  53. levelapp/visualization/__init__.py +7 -0
  54. levelapp/visualization/charts.py +358 -0
  55. levelapp/visualization/dashboard.py +240 -0
  56. levelapp/visualization/exporter.py +167 -0
  57. levelapp/visualization/templates/base.html +158 -0
  58. levelapp/visualization/templates/comparator_dashboard.html +57 -0
  59. levelapp/visualization/templates/simulator_dashboard.html +111 -0
  60. levelapp/workflow/__init__.py +6 -0
  61. levelapp/workflow/base.py +192 -0
  62. levelapp/workflow/config.py +96 -0
  63. levelapp/workflow/context.py +64 -0
  64. levelapp/workflow/factory.py +42 -0
  65. levelapp/workflow/registration.py +6 -0
  66. levelapp/workflow/runtime.py +19 -0
  67. levelapp-0.1.15.dist-info/METADATA +571 -0
  68. levelapp-0.1.15.dist-info/RECORD +70 -0
  69. levelapp-0.1.15.dist-info/WHEEL +4 -0
  70. levelapp-0.1.15.dist-info/licenses/LICENSE +0 -0
@@ -0,0 +1,136 @@
1
+ """levelapp/comparator/aspects.py:"""
2
+
3
+ import re
4
+ import json
5
+ import logging
6
+ import pandas as pd
7
+
8
+ from typing import List, Dict, Any, Literal, Union
9
+ from pathlib import Path
10
+
11
+
12
+ def format_evaluation_results(
13
+ evaluation_results: List[tuple],
14
+ output_type: Literal["json", "csv"] = "json"
15
+ ) -> Union[List[Dict[str, Any]], pd.DataFrame, None]:
16
+ """
17
+ Format raw evaluation data for either JSON (list of dicts) or CSV (DataFrame) use.
18
+
19
+ Args:
20
+ evaluation_results: List of evaluation result tuples.
21
+ output_type: 'json' returns List[dict]; 'csv' returns a DataFrame.
22
+
23
+ Returns:
24
+ Formatted evaluation data or None if empty input.
25
+ """
26
+ if not evaluation_results:
27
+ logging.warning("No evaluation data to format.")
28
+ return None
29
+
30
+ rows = [
31
+ {
32
+ "field_name": field_name,
33
+ "reference_values": ref_values,
34
+ "extracted_values": ext_values,
35
+ "entity_metric": e_metric,
36
+ "entity_scores": e_scores,
37
+ "set_metric": s_metric,
38
+ "set_scores": s_scores,
39
+ "threshold": threshold,
40
+ }
41
+ for (field_name, ref_values, ext_values, e_metric, e_scores, s_metric, s_scores, threshold)
42
+ in evaluation_results
43
+ ]
44
+
45
+ return pd.DataFrame(rows) if output_type == "csv" else rows
46
+
47
+
48
+ def store_evaluation_output(
49
+ formatted_data: Union[pd.DataFrame, List[Dict[str, Any]]],
50
+ output_path: str,
51
+ file_format: Literal["csv", "json"] = "csv",
52
+ ) -> None:
53
+ """
54
+ Persist formatted evaluation data to local disk.
55
+
56
+ Args:
57
+ formatted_data: Output from `format_evaluation_data`.
58
+ output_path: File path prefix (no extension).
59
+ file_format: 'csv' or 'json'.
60
+
61
+ Raises:
62
+ ValueError for unsupported formats or invalid data type.
63
+ """
64
+ if not formatted_data:
65
+ logging.warning("No data provided for local storage.")
66
+ return
67
+
68
+ try:
69
+ if file_format == "csv":
70
+ if not isinstance(formatted_data, pd.DataFrame):
71
+ raise TypeError("CSV output requires a pandas DataFrame.")
72
+ path = f"{output_path}.csv"
73
+ formatted_data.to_csv(path, index=False)
74
+
75
+ elif file_format == "json":
76
+ if not isinstance(formatted_data, list):
77
+ raise TypeError("JSON output requires a list of dictionaries.")
78
+ path = f"{output_path}.json"
79
+ with open(path, "w", encoding="utf-8") as f:
80
+ json.dump(formatted_data, f, indent=2, ensure_ascii=False)
81
+
82
+ else:
83
+ raise ValueError(f"Unsupported file format: {file_format}")
84
+
85
+ logging.info(f"Evaluation data saved to {path}")
86
+
87
+ except Exception as e:
88
+ logging.error(f"Failed to save evaluation output: {e}")
89
+
90
+
91
+ def safe_load_json_file(file_path: Union[str, Path]) -> Any:
92
+ """
93
+ Load a potentially malformed JSON file by pre-sanitizing its content at the byte/text level.
94
+
95
+ Args:
96
+ file_path: Path to the potentially malformed JSON file.
97
+
98
+ Returns:
99
+ Parsed JSON content (as a Python dict or list).
100
+
101
+ Raises:
102
+ ValueError: If JSON parsing fails even after pre-sanitization.
103
+ """
104
+ with open(file_path, "rb") as f:
105
+ raw_bytes = f.read()
106
+
107
+ raw_text = raw_bytes.decode("utf-8", errors="replace")
108
+ sanitized_text = _clean_malformed_json_text(raw_text)
109
+
110
+ try:
111
+ return json.loads(sanitized_text)
112
+
113
+ except json.JSONDecodeError as e:
114
+ raise ValueError(f"Failed to decode JSON after sanitization: {e}")
115
+
116
+
117
+ def _clean_malformed_json_text(text: str) -> str:
118
+ """
119
+ Remove common forms of JSON text corruption before parsing.
120
+
121
+ Args:
122
+ text: Raw JSON string content.
123
+
124
+ Returns:
125
+ A sanitized string safe for json.loads() parsing.
126
+ """
127
+ # Strip BOM (please do not delete this comment)
128
+ text = text.lstrip('\ufeff')
129
+
130
+ # Remove non-printable control characters except \t, \n, \r (please do not delete this comment)
131
+ text = re.sub(r"[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]", "", text)
132
+
133
+ # Remove invalid characters (like \uFFFD or strange CP1252 remnants) (please do not delete this comment)
134
+ text = text.replace("\ufffd", "?")
135
+
136
+ return text
@@ -0,0 +1,5 @@
1
+ from .endpoint import EndpointConfig
2
+ from .prompts import EVAL_PROMPT_TEMPLATE
3
+
4
+
5
+ __all__ = ['EndpointConfig', 'EVAL_PROMPT_TEMPLATE']
@@ -0,0 +1,199 @@
1
+ """levelapp/config/endpoint.py"""
2
+ import os
3
+ import json
4
+ import yaml
5
+
6
+ from string import Template
7
+ from dotenv import load_dotenv
8
+
9
+ from enum import Enum
10
+ from typing import Literal, Dict, Any
11
+ from pydantic import BaseModel, HttpUrl, SecretStr, Field, computed_field
12
+
13
+ from levelapp.aspects import logger
14
+
15
+
16
+ class TemplateType(Enum):
17
+ REQUEST = "request"
18
+ RESPONSE = "response"
19
+
20
+
21
+ class EndpointConfig(BaseModel):
22
+ """
23
+ Configuration class for user system's endpoint.
24
+
25
+ Parameters:
26
+ base_url (HttpUrl): The base url of the endpoint.
27
+ method (Literal['POST', 'GET']): The HTTP method to use (POST or GET).
28
+ api_key (SecretStr): The API key to use.
29
+ bearer_token (SecretStr): The Bearer token to use.
30
+ model_id (str): The model to use (if applicable).
31
+ default_request_payload_template (Dict[str, Any]): The payload template to use.
32
+ variables (Dict[str, Any]): The variables to populate the payload template.
33
+
34
+ Note:
35
+ Either you use the provided configuration YAML file, providing the following:\n
36
+ - base_url (HttpUrl): The base url of the endpoint.
37
+ - method (Literal['POST', 'GET']): The HTTP method to use (POST or GET).
38
+ - api_key (SecretStr): The API key to use.
39
+ - bearer_token (SecretStr): The Bearer token to use.
40
+ - model_id (str): The model to use (if applicable).
41
+ - default_payload_template (Dict[str, Any]): The payload template to use.
42
+ - variables (Dict[str, Any]): The variables to populate the payload template.
43
+
44
+ Or manually configure the model instance by assigning the proper values to the model fields.\n
45
+ You can also provide the path in the .env file for the payload template (ENDPOINT_PAYLOAD_PATH/)
46
+ and the response template (ENDPOINT_RESPONSE_PATH) separately. The files can be either YAML or JSON only.
47
+ """
48
+ load_dotenv()
49
+
50
+ # Required
51
+ method: Literal["POST", "GET"] = Field(default="POST")
52
+ base_url: HttpUrl = Field(default=HttpUrl)
53
+ url_path: str = Field(default='')
54
+
55
+ # Auth
56
+ api_key: SecretStr | None = Field(default=None)
57
+ bearer_token: SecretStr | None = Field(default=None)
58
+ model_id: str | None = Field(default='')
59
+
60
+ # Data
61
+ default_request_payload_template: Dict[str, Any] = Field(default_factory=dict)
62
+ default_response_payload_template: Dict[str, Any] = Field(default_factory=dict)
63
+
64
+ # Variables
65
+ variables: Dict[str, Any] = Field(default_factory=dict)
66
+
67
+ @computed_field()
68
+ @property
69
+ def full_url(self) -> str:
70
+ return str(self.base_url) + self.url_path
71
+
72
+ @computed_field()
73
+ @property
74
+ def headers(self) -> Dict[str, Any]:
75
+ headers: Dict[str, Any] = {"Content-Type": "application/json"}
76
+ if self.model_id:
77
+ headers["x-model-id"] = self.model_id
78
+ if self.bearer_token:
79
+ headers["Authorization"] = f"Bearer {self.bearer_token.get_secret_value()}"
80
+ if self.api_key:
81
+ headers["x-api-key"] = self.api_key.get_secret_value()
82
+ return headers
83
+
84
+ @computed_field
85
+ @property
86
+ def request_payload(self) -> Dict[str, Any]:
87
+ """
88
+ Return fully prepared payload depending on template or full payload.
89
+
90
+ Returns:
91
+ request payload (Dict[str, Any]): Populated request payload template.
92
+ """
93
+ # First, we check if we have variables to populate the template with. If not, we return the template as is.
94
+ if not self.variables:
95
+ return self.default_request_payload_template
96
+
97
+ if not self.default_request_payload_template:
98
+ base_template = self.load_template(template_type=TemplateType.REQUEST)
99
+ else:
100
+ base_template = self.default_request_payload_template
101
+
102
+ # Second, replace the placeholders with the variables
103
+ payload = self._replace_placeholders(obj=base_template, variables=self.variables)
104
+
105
+ # Third, merge the "request_payload" if present in variables
106
+ additional_payload_data = self.variables.get("request_payload", {})
107
+ if additional_payload_data:
108
+ payload.update(additional_payload_data)
109
+
110
+ self.variables.clear()
111
+
112
+ return payload
113
+
114
+ @computed_field
115
+ @property
116
+ def response_payload(self) -> Dict[str, Any]:
117
+ if not self.variables:
118
+ return self.default_response_payload_template
119
+
120
+ if not self.default_response_payload_template:
121
+ base_template = self.load_template(template_type=TemplateType.RESPONSE)
122
+ else:
123
+ base_template = self.default_response_payload_template
124
+
125
+ response_payload = self._replace_placeholders(obj=base_template, variables=self.variables)
126
+ self.variables.clear()
127
+
128
+ return response_payload
129
+
130
+ @staticmethod
131
+ def _replace_placeholders(obj: Any, variables: Dict[str, Any]) -> Dict[str, Any]:
132
+ """Recursively replace placeholders in payload template with variables."""
133
+ def _replace(_obj):
134
+ if isinstance(_obj, str):
135
+ subst = Template(_obj).safe_substitute(variables)
136
+ if '$' in subst:
137
+ logger.warning(f"[EndpointConfig] Unsubstituted placeholder in payload:\n{subst}\n\n")
138
+ return subst
139
+
140
+ elif isinstance(_obj, dict):
141
+ return {k: _replace(v) for k, v in _obj.items()}
142
+
143
+ elif isinstance(_obj, list):
144
+ return [_replace(v) for v in _obj]
145
+
146
+ return _obj
147
+
148
+ return _replace(obj)
149
+
150
+ @staticmethod
151
+ def load_template(
152
+ template_type: TemplateType = TemplateType.REQUEST,
153
+ path: str | None = None
154
+ ) -> Dict[str, Any]:
155
+ """
156
+ Load request/response payload template from JSON/YAML file.
157
+
158
+ Args:
159
+ template_type (TemplateType): The type of template to load (REQUEST or RESPONSE).
160
+ path (str): The path of the payload template file to load.
161
+
162
+ Returns:
163
+ Payload template (Dict[str, Any]): Payload template.
164
+ """
165
+ try:
166
+ # If no path was provided, we check the env. variables.
167
+ if not path:
168
+ env_var = "ENDPOINT_PAYLOAD_PATH" if template_type == TemplateType.REQUEST else "ENDPOINT_RESPONSE_PATH"
169
+ path = os.getenv(env_var, '')
170
+
171
+ if not os.path.exists(path):
172
+ raise FileNotFoundError(f"The provide payload template file path '{path}' does not exist.")
173
+
174
+ with open(path, "r", encoding="utf-8") as f:
175
+ if path.endswith((".yaml", ".yml")):
176
+ data = yaml.safe_load(f)
177
+
178
+ elif path.endswith(".json"):
179
+ data = json.load(f)
180
+
181
+ else:
182
+ raise ValueError("[EndpointConfig] Unsupported file format.")
183
+
184
+ return data
185
+
186
+ except FileNotFoundError as e:
187
+ raise FileNotFoundError(f"[EndpointConfig] Payload template file '{e.filename}' not found in path.")
188
+
189
+ except yaml.YAMLError as e:
190
+ raise ValueError(f"[EndpointConfig] Error parsing YAML file:\n{e}")
191
+
192
+ except json.JSONDecodeError as e:
193
+ raise ValueError(f"[EndpointConfig] Error parsing JSON file:\n{e}")
194
+
195
+ except IOError as e:
196
+ raise IOError(f"[EndpointConfig] Error reading file:\n{e}")
197
+
198
+ except Exception as e:
199
+ raise ValueError(f"[EndpointConfig] Unexpected error loading configuration:\n{e}")
@@ -0,0 +1,57 @@
1
+ EVAL_PROMPT_TEMPLATE = """
2
+ You are an impartial evaluator for a conversational system.
3
+ Compare the AGENT's reply to the EXPECTED reply for the SAME user message.
4
+
5
+ Consider only:
6
+ 1) Semantic Coverage — does the AGENT cover the key points in EXPECTED?
7
+ 2) Faithfulness — no contradictions or invented details relative to EXPECTED.
8
+ 3) Appropriateness — tone/format suitable for the user message.
9
+ Ignore minor wording/punctuation differences. Do NOT reward verbosity.
10
+
11
+ Scale (integer):
12
+ 0 = Poor (misses key points or contradicts)
13
+ 1 = Moderate (captures some ideas, noticeable gaps)
14
+ 2 = Good (mostly matches, minor omissions/differences)
15
+ 3 = Excellent (semantically equivalent; no meaningful differences)
16
+
17
+ USER_MESSAGE:
18
+ \"\"\"{user_input}\"\"\"
19
+
20
+ EXPECTED (reference reply):
21
+ \"\"\"{reference_text}\"\"\"
22
+
23
+ AGENT (model reply):
24
+ \"\"\"{generated_text}\"\"\"
25
+
26
+ Return ONLY a single JSON object on one line with exactly these keys:
27
+ - "score": <0|1|2|3>,
28
+ - "label": "<Poor|Moderate|Good|Excellent>",
29
+ - "justification": "<1-2 concise sentences>",
30
+ - "evidence":
31
+ - "covered_points": ["<short phrase>", "..."], // <=3 items
32
+ - "missing_or_wrong": ["<short phrase>", "..."] // <=3 items
33
+
34
+ Do NOT include any additional text, explanations, or formatting (e.g., "JSON object:", ```json or ```, or markdown).
35
+ """
36
+
37
+
38
+ SUMMARIZATION_PROMPT_TEMPLATE = """
39
+ You are reviewing evaluation justifications from LLM judges about replies generated by a virtual assistant.
40
+ Interpret the context from the verdicts: (e.g., real-estate leasing, medical appointment scheduling, etc.).
41
+
42
+ Each justification contains the judge's assessment of how well the assistant's response matched the expected reply.
43
+ Your task is to **identify and summarize only the negative points**, such as:
44
+ - Errors or inaccuracies
45
+ - Misunderstandings or misinterpretations
46
+ - Missing or incomplete information
47
+ - Failure to meet expectations or requirements
48
+
49
+ **Instructions:**
50
+ - Return up to {max_bullets} concise bullet points.
51
+ - Start each point with "- " and focus on clarity and relevance.
52
+ - Avoid redundancy and prioritize actionable feedback.
53
+
54
+ ---
55
+ - Judge: {judge}
56
+ - Verdicts: {verdicts}
57
+ """
File without changes