levelapp 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of levelapp might be problematic. Click here for more details.

Files changed (46) hide show
  1. levelapp/__init__.py +0 -0
  2. levelapp/aspects/__init__.py +8 -0
  3. levelapp/aspects/loader.py +253 -0
  4. levelapp/aspects/logger.py +59 -0
  5. levelapp/aspects/monitor.py +614 -0
  6. levelapp/aspects/sanitizer.py +168 -0
  7. levelapp/clients/__init__.py +119 -0
  8. levelapp/clients/anthropic.py +112 -0
  9. levelapp/clients/ionos.py +116 -0
  10. levelapp/clients/mistral.py +106 -0
  11. levelapp/clients/openai.py +102 -0
  12. levelapp/comparator/__init__.py +5 -0
  13. levelapp/comparator/comparator.py +232 -0
  14. levelapp/comparator/extractor.py +108 -0
  15. levelapp/comparator/schemas.py +61 -0
  16. levelapp/comparator/scorer.py +271 -0
  17. levelapp/comparator/utils.py +136 -0
  18. levelapp/config/__init__.py +5 -0
  19. levelapp/config/endpoint.py +190 -0
  20. levelapp/config/prompts.py +35 -0
  21. levelapp/core/__init__.py +0 -0
  22. levelapp/core/base.py +386 -0
  23. levelapp/core/session.py +214 -0
  24. levelapp/evaluator/__init__.py +3 -0
  25. levelapp/evaluator/evaluator.py +265 -0
  26. levelapp/metrics/__init__.py +67 -0
  27. levelapp/metrics/embedding.py +2 -0
  28. levelapp/metrics/exact.py +182 -0
  29. levelapp/metrics/fuzzy.py +80 -0
  30. levelapp/metrics/token.py +103 -0
  31. levelapp/plugins/__init__.py +0 -0
  32. levelapp/repository/__init__.py +3 -0
  33. levelapp/repository/firestore.py +282 -0
  34. levelapp/simulator/__init__.py +3 -0
  35. levelapp/simulator/schemas.py +89 -0
  36. levelapp/simulator/simulator.py +441 -0
  37. levelapp/simulator/utils.py +201 -0
  38. levelapp/workflow/__init__.py +5 -0
  39. levelapp/workflow/base.py +113 -0
  40. levelapp/workflow/factory.py +51 -0
  41. levelapp/workflow/registration.py +6 -0
  42. levelapp/workflow/schemas.py +121 -0
  43. levelapp-0.1.0.dist-info/METADATA +254 -0
  44. levelapp-0.1.0.dist-info/RECORD +46 -0
  45. levelapp-0.1.0.dist-info/WHEEL +4 -0
  46. levelapp-0.1.0.dist-info/licenses/LICENSE +0 -0
@@ -0,0 +1,271 @@
1
+ """'comparator/scorer.py':"""
2
+ import numpy as np
3
+
4
+ from collections import namedtuple
5
+ from typing import List, Dict, Callable, cast, Protocol
6
+
7
+ from rapidfuzz import distance, process, utils, fuzz
8
+
9
+ from levelapp.comparator.schemas import MetricConfig, EntityMetric, SetMetric
10
+ from levelapp.aspects import logger
11
+
12
+ ComputedScores = namedtuple(
13
+ typename="ComputedScores",
14
+ field_names=["ref", "ext", "e_metric", "e_score"],
15
+ )
16
+ ComparisonResults = namedtuple(
17
+ typename="ComparisonResults",
18
+ field_names=["ref", "ext", "e_metric", "e_score", "s_metric", "s_score"]
19
+ )
20
+
21
+
22
+ class Scorer(Protocol):
23
+ def __call__(self, ref: str, ext: str) -> float:
24
+ ...
25
+
26
+
27
+ class MetricsManager:
28
+ """Manages scorer registration, score computation, metric configuration."""
29
+
30
+ def __init__(self, metrics_mapping: Dict[str, MetricConfig] | None = None):
31
+ self._scorers: Dict[str, Callable] = {}
32
+ self._metrics_mapping = metrics_mapping or {}
33
+ self._initialize_scorers()
34
+
35
+ @property
36
+ def metrics_mapping(self) -> Dict[str, MetricConfig]:
37
+ return self._metrics_mapping
38
+
39
+ @metrics_mapping.setter
40
+ def metrics_mapping(self, value: Dict[str, MetricConfig]):
41
+ self._metrics_mapping = value
42
+
43
+ def _initialize_scorers(self) -> None:
44
+ """Register existing scorers to prevent residual state."""
45
+ self._scorers.clear()
46
+
47
+ self.register_scorer(
48
+ EntityMetric.LEV_NORM.value,
49
+ distance.Levenshtein.normalized_similarity
50
+ )
51
+ self.register_scorer(
52
+ EntityMetric.JARO_WINKLER.value,
53
+ distance.JaroWinkler.normalized_similarity,
54
+ )
55
+ self.register_scorer(
56
+ EntityMetric.TOKEN_SET_RATIO.value,
57
+ fuzz.token_set_ratio,
58
+ )
59
+ self.register_scorer(
60
+ EntityMetric.TOKEN_SET_RATIO.value,
61
+ fuzz.token_set_ratio,
62
+ )
63
+
64
+ self.register_scorer(
65
+ EntityMetric.WRATIO.value,
66
+ fuzz.WRatio
67
+ )
68
+
69
+ def register_scorer(self, name: str, scorer: Callable) -> None:
70
+ """
71
+ Register a scorer
72
+
73
+ Args:
74
+ name (str): name of the scorer.
75
+ scorer (Callable): scorer to register.
76
+
77
+ Raises:
78
+ ValueError: if the scorer is not a callable.
79
+ """
80
+ self._scorers[name] = scorer
81
+ logger.info(f"[MetricsManager] Registered scorer: {name}")
82
+
83
+ def get_scorer(self, name: str) -> Callable:
84
+ """
85
+ Retrieve a scorer by name.
86
+
87
+ Args:
88
+ name (str): name of the scorer.
89
+
90
+ Returns:
91
+ Callable: scorer.
92
+
93
+ Raises:
94
+ ValueError: if the passed name is not registered.
95
+ """
96
+ try:
97
+ scorer = self._scorers.get(name)
98
+ logger.info(f"[get_scorer] Retrieved scorer: {name}")
99
+ return scorer
100
+
101
+ except KeyError:
102
+ raise ValueError(f"[MetricsManager] '{name}' is not registered")
103
+
104
+ def get_metrics_config(self, field: str) -> MetricConfig:
105
+ """
106
+ Retrieve the metrics configuration for a given field.
107
+
108
+ Args:
109
+ field (str): field name.
110
+
111
+ Returns:
112
+ MetricConfig: metrics configuration for the given field.
113
+ """
114
+ default_config = MetricConfig(
115
+ field_name=field,
116
+ entity_metric=EntityMetric.LEV_NORM,
117
+ set_metric=SetMetric.ACCURACY,
118
+ threshold=1
119
+ )
120
+ return self._metrics_mapping.get(field, default_config)
121
+
122
+ def compute_entity_scores(
123
+ self,
124
+ reference_seq: List[str],
125
+ extracted_seq: List[str],
126
+ scorer: EntityMetric = EntityMetric.LEV_NORM,
127
+ pairwise: bool = True
128
+ ) -> List[ComputedScores]:
129
+ """
130
+ Compute the distance/similarity between ref/seq sequence entities.
131
+
132
+ Args:
133
+ reference_seq (List[str]): The reference sequence.
134
+ extracted_seq (List[str]): The extracted sequence.
135
+ scorer (str): Name of the scorer to use (e.g., 'levenshtein', 'jaro_winkler').
136
+ pairwise (bool): Whether to use pairwise distances or not.
137
+
138
+ Returns:
139
+ List[Tuple[str, str, np.float32]]: List of (reference, extracted, score) tuples.
140
+ """
141
+ if not reference_seq or not extracted_seq:
142
+ return [
143
+ ComputedScores(
144
+ ref=reference_seq,
145
+ ext=extracted_seq,
146
+ e_metric=scorer.value,
147
+ e_score=np.nan,
148
+ )
149
+ ]
150
+
151
+ if scorer not in EntityMetric.list():
152
+ logger.warning(f"[MetricsManager] Scorer name <{scorer}> is not supported.]")
153
+ raise ValueError(f"[MetricsManager] Scorer <{scorer}> is not registered.")
154
+
155
+ max_len = max(len(reference_seq), len(extracted_seq))
156
+ reference_padded = reference_seq + [""] * (max_len - len(reference_seq))
157
+ extracted_padded = extracted_seq + [""] * (max_len - len(extracted_seq))
158
+
159
+ scorer_func = cast(Callable, self.get_scorer(name=scorer.value))
160
+
161
+ if pairwise:
162
+ scores_ = process.cpdist(
163
+ queries=reference_padded,
164
+ choices=extracted_padded,
165
+ scorer=scorer_func,
166
+ processor=utils.default_process,
167
+ workers=-1,
168
+ )
169
+ scores = scores_.flatten()
170
+ res = [
171
+ ComputedScores(
172
+ ref=reference_padded[i],
173
+ ext=extracted_padded[i],
174
+ e_metric=scorer.value,
175
+ e_score=scores[i]
176
+ ) for i in range(len(scores))
177
+ ]
178
+
179
+ else:
180
+ scores_ = process.cdist(
181
+ queries=reference_padded,
182
+ choices=extracted_padded,
183
+ scorer=scorer_func,
184
+ processor=utils.default_process,
185
+ workers=-1,
186
+ )
187
+ scores = np.max(scores_, axis=1)
188
+ max_idx = np.argmax(scores_, axis=1)
189
+ res = [
190
+ ComputedScores(
191
+ ref=reference_padded[i],
192
+ ext=extracted_padded[max_idx[i]],
193
+ e_metric=scorer.value,
194
+ e_score=scores[i]
195
+ ) for i in range(len(scores))
196
+ ]
197
+
198
+ return res
199
+
200
+ @staticmethod
201
+ def compute_set_scores(
202
+ data: List[ComputedScores],
203
+ scorer: SetMetric = SetMetric.F1_SCORE,
204
+ threshold: float = 1.0,
205
+ ) -> ComparisonResults:
206
+ """
207
+ Compute evaluation metrics from similarity scores and return results as named tuples.
208
+
209
+ Args:
210
+ data: List of tuples containing reference string, extracted string, and similarity score.
211
+ scorer: Metric to compute.
212
+ threshold: Similarity threshold for considering a match.
213
+
214
+ Returns:
215
+ List[ComparisonResults]: List of named tuples containing reference, extracted, score, and metric value.
216
+ """
217
+ if not data:
218
+ return ComparisonResults("", "", None, None, None, None)
219
+
220
+ ref = [_.ref for _ in data]
221
+ ext = [_.ext for _ in data]
222
+ entity_scores = np.array([_.e_score for _ in data], dtype=np.float32)
223
+ entity_metric = data[0].e_metric
224
+
225
+ matches = np.count_nonzero(entity_scores >= threshold)
226
+
227
+ if len(data) == 1:
228
+ entity_scores = entity_scores.tolist()
229
+ set_scores = np.array(
230
+ [1 if score >= threshold else 0 for score in entity_scores], dtype=np.float32
231
+ ).tolist()
232
+ return ComparisonResults(
233
+ ref=ref,
234
+ ext=ext,
235
+ e_metric=entity_metric,
236
+ e_score=entity_scores,
237
+ s_metric=None,
238
+ s_score=set_scores
239
+ )
240
+
241
+ tp = matches
242
+ fp = len(ref) - int(matches)
243
+ fn = len(ext) - int(matches)
244
+
245
+ if scorer == SetMetric.ACCURACY:
246
+ accuracy = (tp / len(entity_scores)) if len(entity_scores) > 0 else 0.0
247
+ return ComparisonResults(
248
+ ref=ref,
249
+ ext=ext,
250
+ e_metric=entity_metric,
251
+ e_score=entity_scores,
252
+ s_metric=scorer.value,
253
+ s_score=accuracy
254
+ )
255
+
256
+ if scorer == SetMetric.F1_SCORE:
257
+ precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0
258
+ recall = tp / (tp + fn) if (tp + fn) > 0 else 0.0
259
+ f1 = (
260
+ 2 * (precision * recall) / (precision + recall)
261
+ if (precision + recall) > 0
262
+ else 0.0
263
+ )
264
+ return ComparisonResults(
265
+ ref=ref,
266
+ ext=ext,
267
+ e_metric=entity_metric,
268
+ e_score=entity_scores,
269
+ s_metric=scorer.value,
270
+ s_score=f1
271
+ )
@@ -0,0 +1,136 @@
1
+ """levelapp/comparator/aspects.py:"""
2
+
3
+ import re
4
+ import json
5
+ import logging
6
+ import pandas as pd
7
+
8
+ from typing import List, Dict, Any, Literal, Union
9
+ from pathlib import Path
10
+
11
+
12
+ def format_evaluation_results(
13
+ evaluation_results: List[tuple],
14
+ output_type: Literal["json", "csv"] = "json"
15
+ ) -> Union[List[Dict[str, Any]], pd.DataFrame, None]:
16
+ """
17
+ Format raw evaluation data for either JSON (list of dicts) or CSV (DataFrame) use.
18
+
19
+ Args:
20
+ evaluation_results: List of evaluation result tuples.
21
+ output_type: 'json' returns List[dict]; 'csv' returns a DataFrame.
22
+
23
+ Returns:
24
+ Formatted evaluation data or None if empty input.
25
+ """
26
+ if not evaluation_results:
27
+ logging.warning("No evaluation data to format.")
28
+ return None
29
+
30
+ rows = [
31
+ {
32
+ "field_name": field_name,
33
+ "reference_values": ref_values,
34
+ "extracted_values": ext_values,
35
+ "entity_metric": e_metric,
36
+ "entity_scores": e_scores,
37
+ "set_metric": s_metric,
38
+ "set_scores": s_scores,
39
+ "threshold": threshold,
40
+ }
41
+ for (field_name, ref_values, ext_values, e_metric, e_scores, s_metric, s_scores, threshold)
42
+ in evaluation_results
43
+ ]
44
+
45
+ return pd.DataFrame(rows) if output_type == "csv" else rows
46
+
47
+
48
+ def store_evaluation_output(
49
+ formatted_data: Union[pd.DataFrame, List[Dict[str, Any]]],
50
+ output_path: str,
51
+ file_format: Literal["csv", "json"] = "csv",
52
+ ) -> None:
53
+ """
54
+ Persist formatted evaluation data to local disk.
55
+
56
+ Args:
57
+ formatted_data: Output from `format_evaluation_data`.
58
+ output_path: File path prefix (no extension).
59
+ file_format: 'csv' or 'json'.
60
+
61
+ Raises:
62
+ ValueError for unsupported formats or invalid data type.
63
+ """
64
+ if not formatted_data:
65
+ logging.warning("No data provided for local storage.")
66
+ return
67
+
68
+ try:
69
+ if file_format == "csv":
70
+ if not isinstance(formatted_data, pd.DataFrame):
71
+ raise TypeError("CSV output requires a pandas DataFrame.")
72
+ path = f"{output_path}.csv"
73
+ formatted_data.to_csv(path, index=False)
74
+
75
+ elif file_format == "json":
76
+ if not isinstance(formatted_data, list):
77
+ raise TypeError("JSON output requires a list of dictionaries.")
78
+ path = f"{output_path}.json"
79
+ with open(path, "w", encoding="utf-8") as f:
80
+ json.dump(formatted_data, f, indent=2, ensure_ascii=False)
81
+
82
+ else:
83
+ raise ValueError(f"Unsupported file format: {file_format}")
84
+
85
+ logging.info(f"Evaluation data saved to {path}")
86
+
87
+ except Exception as e:
88
+ logging.error(f"Failed to save evaluation output: {e}")
89
+
90
+
91
+ def safe_load_json_file(file_path: Union[str, Path]) -> Any:
92
+ """
93
+ Load a potentially malformed JSON file by pre-sanitizing its content at the byte/text level.
94
+
95
+ Args:
96
+ file_path: Path to the potentially malformed JSON file.
97
+
98
+ Returns:
99
+ Parsed JSON content (as a Python dict or list).
100
+
101
+ Raises:
102
+ ValueError: If JSON parsing fails even after pre-sanitization.
103
+ """
104
+ with open(file_path, "rb") as f:
105
+ raw_bytes = f.read()
106
+
107
+ raw_text = raw_bytes.decode("utf-8", errors="replace")
108
+ sanitized_text = _clean_malformed_json_text(raw_text)
109
+
110
+ try:
111
+ return json.loads(sanitized_text)
112
+
113
+ except json.JSONDecodeError as e:
114
+ raise ValueError(f"Failed to decode JSON after sanitization: {e}")
115
+
116
+
117
+ def _clean_malformed_json_text(text: str) -> str:
118
+ """
119
+ Remove common forms of JSON text corruption before parsing.
120
+
121
+ Args:
122
+ text: Raw JSON string content.
123
+
124
+ Returns:
125
+ A sanitized string safe for json.loads() parsing.
126
+ """
127
+ # Strip BOM (please do not delete this comment)
128
+ text = text.lstrip('\ufeff')
129
+
130
+ # Remove non-printable control characters except \t, \n, \r (please do not delete this comment)
131
+ text = re.sub(r"[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]", "", text)
132
+
133
+ # Remove invalid characters (like \uFFFD or strange CP1252 remnants) (please do not delete this comment)
134
+ text = text.replace("\ufffd", "?")
135
+
136
+ return text
@@ -0,0 +1,5 @@
1
+ from .endpoint import EndpointConfig
2
+ from .prompts import EVAL_PROMPT_TEMPLATE
3
+
4
+
5
+ __all__ = ['EndpointConfig', 'EVAL_PROMPT_TEMPLATE']
@@ -0,0 +1,190 @@
1
+ """levelapp/config/endpoint.py"""
2
+ import os
3
+ import json
4
+ import yaml
5
+
6
+ from string import Template
7
+ from dotenv import load_dotenv
8
+
9
+ from enum import Enum
10
+ from typing import Literal, Dict, Any
11
+ from pydantic import BaseModel, HttpUrl, SecretStr, Field, computed_field
12
+
13
+ from levelapp.aspects import logger
14
+
15
+
16
+ class TemplateType(Enum):
17
+ REQUEST = "request"
18
+ RESPONSE = "response"
19
+
20
+
21
+ class EndpointConfig(BaseModel):
22
+ """
23
+ Configuration class for user system's endpoint.
24
+
25
+ Parameters:
26
+ base_url (HttpUrl): The base url of the endpoint.
27
+ method (Literal['POST', 'GET']): The HTTP method to use (POST or GET).
28
+ api_key (SecretStr): The API key to use.
29
+ bearer_token (SecretStr): The Bearer token to use.
30
+ model_id (str): The model to use (if applicable).
31
+ default_request_payload_template (Dict[str, Any]): The payload template to use.
32
+ generated_request_payload_template (Dict[str, Any]): The generated payload template from a provided file.
33
+ variables (Dict[str, Any]): The variables to populate the payload template.
34
+
35
+ Note:
36
+ Either you use the provided configuration YAML file, providing the following:\n
37
+ - base_url (HttpUrl): The base url of the endpoint.
38
+ - method (Literal['POST', 'GET']): The HTTP method to use (POST or GET).
39
+ - api_key (SecretStr): The API key to use.
40
+ - bearer_token (SecretStr): The Bearer token to use.
41
+ - model_id (str): The model to use (if applicable).
42
+ - default_payload_template (Dict[str, Any]): The payload template to use.
43
+ - generated_payload_template (Dict[str, Any]): The generated payload template from a provided file.
44
+ - variables (Dict[str, Any]): The variables to populate the payload template.
45
+
46
+ Or manually configure the model instance by assigning the proper values to the model fields.\n
47
+ You can also provide the path in the .env file for the payload template (ENDPOINT_PAYLOAD_PATH)
48
+ and the response template (ENDPOINT_RESPONSE_PATH) separately. The files can be either YAML or JSON only.
49
+ """
50
+ load_dotenv()
51
+
52
+ # Required
53
+ method: Literal["POST", "GET"] = Field(default="POST")
54
+ base_url: HttpUrl = Field(default=HttpUrl)
55
+ url_path: str = Field(default='')
56
+
57
+ # Auth
58
+ api_key: SecretStr | None = Field(default=None)
59
+ bearer_token: SecretStr | None = Field(default=None)
60
+ model_id: str | None = Field(default='')
61
+
62
+ # Data
63
+ default_request_payload_template: Dict[str, Any] = Field(default_factory=dict)
64
+ generated_request_payload_template: Dict[str, Any] = Field(default_factory=dict)
65
+ default_response_payload_template: Dict[str, Any] = Field(default_factory=dict)
66
+ generated_response_payload_template: Dict[str, Any] = Field(default_factory=dict)
67
+
68
+ # Variables
69
+ variables: Dict[str, Any] = Field(default_factory=dict)
70
+
71
+ @computed_field()
72
+ @property
73
+ def full_url(self) -> str:
74
+ return str(self.base_url) + self.url_path
75
+
76
+ @computed_field()
77
+ @property
78
+ def headers(self) -> Dict[str, Any]:
79
+ headers: Dict[str, Any] = {"Content-Type": "application/json"}
80
+ if self.model_id:
81
+ headers["x-model-id"] = self.model_id
82
+ if self.bearer_token:
83
+ headers["Authorization"] = f"Bearer {self.bearer_token.get_secret_value()}"
84
+ if self.api_key:
85
+ headers["x-api-key"] = self.api_key.get_secret_value()
86
+ return headers
87
+
88
+ @computed_field
89
+ @property
90
+ def request_payload(self) -> Dict[str, Any]:
91
+ """Return fully prepared payload depending on template or full payload."""
92
+ # First, load the request payload template (either from YAML config file or from specific template)
93
+ if not self.variables:
94
+ return self.default_request_payload_template
95
+
96
+ if not self.default_request_payload_template:
97
+ self.load_template(template_type=TemplateType.REQUEST)
98
+ base_template = self.generated_request_payload_template
99
+ else:
100
+ base_template = self.default_request_payload_template
101
+
102
+ # Second, replace the placeholders with the variables
103
+ payload = self._replace_placeholders(obj=base_template, variables=self.variables)
104
+
105
+ # Third, merge the "request_payload" if present in variables
106
+ additional_payload_data = self.variables.get("request_payload", {})
107
+ if additional_payload_data:
108
+ payload.update(additional_payload_data)
109
+
110
+ self.variables.clear()
111
+
112
+ return payload
113
+
114
+ @computed_field
115
+ @property
116
+ def response_payload(self) -> Dict[str, Any]:
117
+ if not self.variables:
118
+ return self.default_response_payload_template
119
+
120
+ if not self.default_response_payload_template:
121
+ self.load_template(template_type=TemplateType.RESPONSE)
122
+ base_template = self.generated_response_payload_template
123
+ else:
124
+ base_template = self.default_response_payload_template
125
+
126
+ response_payload = self._replace_placeholders(obj=base_template, variables=self.variables)
127
+ self.variables.clear()
128
+
129
+ return response_payload
130
+
131
+ @staticmethod
132
+ def _replace_placeholders(obj: Any, variables: Dict[str, Any]) -> Dict[str, Any]:
133
+ """Recursively replace placeholders in payload template with variables."""
134
+ def _replace(_obj):
135
+ if isinstance(_obj, str):
136
+ subst = Template(_obj).safe_substitute(variables)
137
+ if '$' in subst:
138
+ logger.warning(f"[EndpointConfig] Unsubstituted placeholder in payload:\n{subst}\n\n")
139
+ return subst
140
+
141
+ elif isinstance(_obj, dict):
142
+ return {k: _replace(v) for k, v in _obj.items()}
143
+
144
+ elif isinstance(_obj, list):
145
+ return [_replace(v) for v in _obj]
146
+
147
+ return _obj
148
+
149
+ return _replace(obj)
150
+
151
+ def load_template(
152
+ self,
153
+ template_type: TemplateType = TemplateType.REQUEST,
154
+ path: str | None = None
155
+ ) -> Dict[str, Any]:
156
+ try:
157
+ if not path:
158
+ env_var = "ENDPOINT_PAYLOAD_PATH" if template_type == TemplateType.REQUEST else "ENDPOINT_RESPONSE_PATH"
159
+ path = os.getenv(env_var, '')
160
+
161
+ if not os.path.exists(path):
162
+ raise FileNotFoundError(f"The provide payload template file path '{path}' does not exist.")
163
+
164
+ with open(path, "r", encoding="utf-8") as f:
165
+ if path.endswith((".yaml", ".yml")):
166
+ data = yaml.safe_load(f)
167
+
168
+ elif path.endswith(".json"):
169
+ data = json.load(f)
170
+
171
+ else:
172
+ raise ValueError("[EndpointConfig] Unsupported file format.")
173
+
174
+ self.generated_request_payload_template = data
175
+ return data
176
+
177
+ except FileNotFoundError as e:
178
+ raise FileNotFoundError(f"[EndpointConfig] Payload template file '{e.filename}' not found in path.")
179
+
180
+ except yaml.YAMLError as e:
181
+ raise ValueError(f"[EndpointConfig] Error parsing YAML file:\n{e}")
182
+
183
+ except json.JSONDecodeError as e:
184
+ raise ValueError(f"[EndpointConfig] Error parsing JSON file:\n{e}")
185
+
186
+ except IOError as e:
187
+ raise IOError(f"[EndpointConfig] Error reading file:\n{e}")
188
+
189
+ except Exception as e:
190
+ raise ValueError(f"[EndpointConfig] Unexpected error loading configuration:\n{e}")
@@ -0,0 +1,35 @@
1
+ EVAL_PROMPT_TEMPLATE = """
2
+ You are an impartial evaluator for a conversational system.
3
+ Compare the AGENT's reply to the EXPECTED reply for the SAME user message.
4
+
5
+ Consider only:
6
+ 1) Semantic Coverage — does the AGENT cover the key points in EXPECTED?
7
+ 2) Faithfulness — no contradictions or invented details relative to EXPECTED.
8
+ 3) Appropriateness — tone/format suitable for the user message.
9
+ Ignore minor wording/punctuation differences. Do NOT reward verbosity.
10
+
11
+ Scale (integer):
12
+ 0 = Poor (misses key points or contradicts)
13
+ 1 = Moderate (captures some ideas, noticeable gaps)
14
+ 2 = Good (mostly matches, minor omissions/differences)
15
+ 3 = Excellent (semantically equivalent; no meaningful differences)
16
+
17
+ USER_MESSAGE:
18
+ \"\"\"{user_input}\"\"\"
19
+
20
+ EXPECTED (reference reply):
21
+ \"\"\"{reference_text}\"\"\"
22
+
23
+ AGENT (model reply):
24
+ \"\"\"{generated_text}\"\"\"
25
+
26
+ Return ONLY a single JSON object on one line with exactly these keys:
27
+ - "score": <0|1|2|3>,
28
+ - "label": "<Poor|Moderate|Good|Excellent>",
29
+ - "justification": "<1-2 concise sentences>",
30
+ - "evidence":
31
+ - "covered_points": ["<short phrase>", "..."], // <=3 items
32
+ - "missing_or_wrong": ["<short phrase>", "..."] // <=3 items
33
+
34
+ Do NOT include any additional text, explanations, or formatting (e.g., "JSON object:", ```json or ```, or markdown).
35
+ """
File without changes