levelapp 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of levelapp might be problematic. Click here for more details.
- levelapp/__init__.py +0 -0
- levelapp/aspects/__init__.py +8 -0
- levelapp/aspects/loader.py +253 -0
- levelapp/aspects/logger.py +59 -0
- levelapp/aspects/monitor.py +614 -0
- levelapp/aspects/sanitizer.py +168 -0
- levelapp/clients/__init__.py +119 -0
- levelapp/clients/anthropic.py +112 -0
- levelapp/clients/ionos.py +116 -0
- levelapp/clients/mistral.py +106 -0
- levelapp/clients/openai.py +102 -0
- levelapp/comparator/__init__.py +5 -0
- levelapp/comparator/comparator.py +232 -0
- levelapp/comparator/extractor.py +108 -0
- levelapp/comparator/schemas.py +61 -0
- levelapp/comparator/scorer.py +271 -0
- levelapp/comparator/utils.py +136 -0
- levelapp/config/__init__.py +5 -0
- levelapp/config/endpoint.py +190 -0
- levelapp/config/prompts.py +35 -0
- levelapp/core/__init__.py +0 -0
- levelapp/core/base.py +386 -0
- levelapp/core/session.py +214 -0
- levelapp/evaluator/__init__.py +3 -0
- levelapp/evaluator/evaluator.py +265 -0
- levelapp/metrics/__init__.py +67 -0
- levelapp/metrics/embedding.py +2 -0
- levelapp/metrics/exact.py +182 -0
- levelapp/metrics/fuzzy.py +80 -0
- levelapp/metrics/token.py +103 -0
- levelapp/plugins/__init__.py +0 -0
- levelapp/repository/__init__.py +3 -0
- levelapp/repository/firestore.py +282 -0
- levelapp/simulator/__init__.py +3 -0
- levelapp/simulator/schemas.py +89 -0
- levelapp/simulator/simulator.py +441 -0
- levelapp/simulator/utils.py +201 -0
- levelapp/workflow/__init__.py +5 -0
- levelapp/workflow/base.py +113 -0
- levelapp/workflow/factory.py +51 -0
- levelapp/workflow/registration.py +6 -0
- levelapp/workflow/schemas.py +121 -0
- levelapp-0.1.0.dist-info/METADATA +254 -0
- levelapp-0.1.0.dist-info/RECORD +46 -0
- levelapp-0.1.0.dist-info/WHEEL +4 -0
- levelapp-0.1.0.dist-info/licenses/LICENSE +0 -0
|
@@ -0,0 +1,271 @@
|
|
|
1
|
+
"""'comparator/scorer.py':"""
|
|
2
|
+
import numpy as np
|
|
3
|
+
|
|
4
|
+
from collections import namedtuple
|
|
5
|
+
from typing import List, Dict, Callable, cast, Protocol
|
|
6
|
+
|
|
7
|
+
from rapidfuzz import distance, process, utils, fuzz
|
|
8
|
+
|
|
9
|
+
from levelapp.comparator.schemas import MetricConfig, EntityMetric, SetMetric
|
|
10
|
+
from levelapp.aspects import logger
|
|
11
|
+
|
|
12
|
+
ComputedScores = namedtuple(
|
|
13
|
+
typename="ComputedScores",
|
|
14
|
+
field_names=["ref", "ext", "e_metric", "e_score"],
|
|
15
|
+
)
|
|
16
|
+
ComparisonResults = namedtuple(
|
|
17
|
+
typename="ComparisonResults",
|
|
18
|
+
field_names=["ref", "ext", "e_metric", "e_score", "s_metric", "s_score"]
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class Scorer(Protocol):
|
|
23
|
+
def __call__(self, ref: str, ext: str) -> float:
|
|
24
|
+
...
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class MetricsManager:
|
|
28
|
+
"""Manages scorer registration, score computation, metric configuration."""
|
|
29
|
+
|
|
30
|
+
def __init__(self, metrics_mapping: Dict[str, MetricConfig] | None = None):
|
|
31
|
+
self._scorers: Dict[str, Callable] = {}
|
|
32
|
+
self._metrics_mapping = metrics_mapping or {}
|
|
33
|
+
self._initialize_scorers()
|
|
34
|
+
|
|
35
|
+
@property
|
|
36
|
+
def metrics_mapping(self) -> Dict[str, MetricConfig]:
|
|
37
|
+
return self._metrics_mapping
|
|
38
|
+
|
|
39
|
+
@metrics_mapping.setter
|
|
40
|
+
def metrics_mapping(self, value: Dict[str, MetricConfig]):
|
|
41
|
+
self._metrics_mapping = value
|
|
42
|
+
|
|
43
|
+
def _initialize_scorers(self) -> None:
|
|
44
|
+
"""Register existing scorers to prevent residual state."""
|
|
45
|
+
self._scorers.clear()
|
|
46
|
+
|
|
47
|
+
self.register_scorer(
|
|
48
|
+
EntityMetric.LEV_NORM.value,
|
|
49
|
+
distance.Levenshtein.normalized_similarity
|
|
50
|
+
)
|
|
51
|
+
self.register_scorer(
|
|
52
|
+
EntityMetric.JARO_WINKLER.value,
|
|
53
|
+
distance.JaroWinkler.normalized_similarity,
|
|
54
|
+
)
|
|
55
|
+
self.register_scorer(
|
|
56
|
+
EntityMetric.TOKEN_SET_RATIO.value,
|
|
57
|
+
fuzz.token_set_ratio,
|
|
58
|
+
)
|
|
59
|
+
self.register_scorer(
|
|
60
|
+
EntityMetric.TOKEN_SET_RATIO.value,
|
|
61
|
+
fuzz.token_set_ratio,
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
self.register_scorer(
|
|
65
|
+
EntityMetric.WRATIO.value,
|
|
66
|
+
fuzz.WRatio
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
def register_scorer(self, name: str, scorer: Callable) -> None:
|
|
70
|
+
"""
|
|
71
|
+
Register a scorer
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
name (str): name of the scorer.
|
|
75
|
+
scorer (Callable): scorer to register.
|
|
76
|
+
|
|
77
|
+
Raises:
|
|
78
|
+
ValueError: if the scorer is not a callable.
|
|
79
|
+
"""
|
|
80
|
+
self._scorers[name] = scorer
|
|
81
|
+
logger.info(f"[MetricsManager] Registered scorer: {name}")
|
|
82
|
+
|
|
83
|
+
def get_scorer(self, name: str) -> Callable:
|
|
84
|
+
"""
|
|
85
|
+
Retrieve a scorer by name.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
name (str): name of the scorer.
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
Callable: scorer.
|
|
92
|
+
|
|
93
|
+
Raises:
|
|
94
|
+
ValueError: if the passed name is not registered.
|
|
95
|
+
"""
|
|
96
|
+
try:
|
|
97
|
+
scorer = self._scorers.get(name)
|
|
98
|
+
logger.info(f"[get_scorer] Retrieved scorer: {name}")
|
|
99
|
+
return scorer
|
|
100
|
+
|
|
101
|
+
except KeyError:
|
|
102
|
+
raise ValueError(f"[MetricsManager] '{name}' is not registered")
|
|
103
|
+
|
|
104
|
+
def get_metrics_config(self, field: str) -> MetricConfig:
|
|
105
|
+
"""
|
|
106
|
+
Retrieve the metrics configuration for a given field.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
field (str): field name.
|
|
110
|
+
|
|
111
|
+
Returns:
|
|
112
|
+
MetricConfig: metrics configuration for the given field.
|
|
113
|
+
"""
|
|
114
|
+
default_config = MetricConfig(
|
|
115
|
+
field_name=field,
|
|
116
|
+
entity_metric=EntityMetric.LEV_NORM,
|
|
117
|
+
set_metric=SetMetric.ACCURACY,
|
|
118
|
+
threshold=1
|
|
119
|
+
)
|
|
120
|
+
return self._metrics_mapping.get(field, default_config)
|
|
121
|
+
|
|
122
|
+
def compute_entity_scores(
|
|
123
|
+
self,
|
|
124
|
+
reference_seq: List[str],
|
|
125
|
+
extracted_seq: List[str],
|
|
126
|
+
scorer: EntityMetric = EntityMetric.LEV_NORM,
|
|
127
|
+
pairwise: bool = True
|
|
128
|
+
) -> List[ComputedScores]:
|
|
129
|
+
"""
|
|
130
|
+
Compute the distance/similarity between ref/seq sequence entities.
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
reference_seq (List[str]): The reference sequence.
|
|
134
|
+
extracted_seq (List[str]): The extracted sequence.
|
|
135
|
+
scorer (str): Name of the scorer to use (e.g., 'levenshtein', 'jaro_winkler').
|
|
136
|
+
pairwise (bool): Whether to use pairwise distances or not.
|
|
137
|
+
|
|
138
|
+
Returns:
|
|
139
|
+
List[Tuple[str, str, np.float32]]: List of (reference, extracted, score) tuples.
|
|
140
|
+
"""
|
|
141
|
+
if not reference_seq or not extracted_seq:
|
|
142
|
+
return [
|
|
143
|
+
ComputedScores(
|
|
144
|
+
ref=reference_seq,
|
|
145
|
+
ext=extracted_seq,
|
|
146
|
+
e_metric=scorer.value,
|
|
147
|
+
e_score=np.nan,
|
|
148
|
+
)
|
|
149
|
+
]
|
|
150
|
+
|
|
151
|
+
if scorer not in EntityMetric.list():
|
|
152
|
+
logger.warning(f"[MetricsManager] Scorer name <{scorer}> is not supported.]")
|
|
153
|
+
raise ValueError(f"[MetricsManager] Scorer <{scorer}> is not registered.")
|
|
154
|
+
|
|
155
|
+
max_len = max(len(reference_seq), len(extracted_seq))
|
|
156
|
+
reference_padded = reference_seq + [""] * (max_len - len(reference_seq))
|
|
157
|
+
extracted_padded = extracted_seq + [""] * (max_len - len(extracted_seq))
|
|
158
|
+
|
|
159
|
+
scorer_func = cast(Callable, self.get_scorer(name=scorer.value))
|
|
160
|
+
|
|
161
|
+
if pairwise:
|
|
162
|
+
scores_ = process.cpdist(
|
|
163
|
+
queries=reference_padded,
|
|
164
|
+
choices=extracted_padded,
|
|
165
|
+
scorer=scorer_func,
|
|
166
|
+
processor=utils.default_process,
|
|
167
|
+
workers=-1,
|
|
168
|
+
)
|
|
169
|
+
scores = scores_.flatten()
|
|
170
|
+
res = [
|
|
171
|
+
ComputedScores(
|
|
172
|
+
ref=reference_padded[i],
|
|
173
|
+
ext=extracted_padded[i],
|
|
174
|
+
e_metric=scorer.value,
|
|
175
|
+
e_score=scores[i]
|
|
176
|
+
) for i in range(len(scores))
|
|
177
|
+
]
|
|
178
|
+
|
|
179
|
+
else:
|
|
180
|
+
scores_ = process.cdist(
|
|
181
|
+
queries=reference_padded,
|
|
182
|
+
choices=extracted_padded,
|
|
183
|
+
scorer=scorer_func,
|
|
184
|
+
processor=utils.default_process,
|
|
185
|
+
workers=-1,
|
|
186
|
+
)
|
|
187
|
+
scores = np.max(scores_, axis=1)
|
|
188
|
+
max_idx = np.argmax(scores_, axis=1)
|
|
189
|
+
res = [
|
|
190
|
+
ComputedScores(
|
|
191
|
+
ref=reference_padded[i],
|
|
192
|
+
ext=extracted_padded[max_idx[i]],
|
|
193
|
+
e_metric=scorer.value,
|
|
194
|
+
e_score=scores[i]
|
|
195
|
+
) for i in range(len(scores))
|
|
196
|
+
]
|
|
197
|
+
|
|
198
|
+
return res
|
|
199
|
+
|
|
200
|
+
@staticmethod
|
|
201
|
+
def compute_set_scores(
|
|
202
|
+
data: List[ComputedScores],
|
|
203
|
+
scorer: SetMetric = SetMetric.F1_SCORE,
|
|
204
|
+
threshold: float = 1.0,
|
|
205
|
+
) -> ComparisonResults:
|
|
206
|
+
"""
|
|
207
|
+
Compute evaluation metrics from similarity scores and return results as named tuples.
|
|
208
|
+
|
|
209
|
+
Args:
|
|
210
|
+
data: List of tuples containing reference string, extracted string, and similarity score.
|
|
211
|
+
scorer: Metric to compute.
|
|
212
|
+
threshold: Similarity threshold for considering a match.
|
|
213
|
+
|
|
214
|
+
Returns:
|
|
215
|
+
List[ComparisonResults]: List of named tuples containing reference, extracted, score, and metric value.
|
|
216
|
+
"""
|
|
217
|
+
if not data:
|
|
218
|
+
return ComparisonResults("", "", None, None, None, None)
|
|
219
|
+
|
|
220
|
+
ref = [_.ref for _ in data]
|
|
221
|
+
ext = [_.ext for _ in data]
|
|
222
|
+
entity_scores = np.array([_.e_score for _ in data], dtype=np.float32)
|
|
223
|
+
entity_metric = data[0].e_metric
|
|
224
|
+
|
|
225
|
+
matches = np.count_nonzero(entity_scores >= threshold)
|
|
226
|
+
|
|
227
|
+
if len(data) == 1:
|
|
228
|
+
entity_scores = entity_scores.tolist()
|
|
229
|
+
set_scores = np.array(
|
|
230
|
+
[1 if score >= threshold else 0 for score in entity_scores], dtype=np.float32
|
|
231
|
+
).tolist()
|
|
232
|
+
return ComparisonResults(
|
|
233
|
+
ref=ref,
|
|
234
|
+
ext=ext,
|
|
235
|
+
e_metric=entity_metric,
|
|
236
|
+
e_score=entity_scores,
|
|
237
|
+
s_metric=None,
|
|
238
|
+
s_score=set_scores
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
tp = matches
|
|
242
|
+
fp = len(ref) - int(matches)
|
|
243
|
+
fn = len(ext) - int(matches)
|
|
244
|
+
|
|
245
|
+
if scorer == SetMetric.ACCURACY:
|
|
246
|
+
accuracy = (tp / len(entity_scores)) if len(entity_scores) > 0 else 0.0
|
|
247
|
+
return ComparisonResults(
|
|
248
|
+
ref=ref,
|
|
249
|
+
ext=ext,
|
|
250
|
+
e_metric=entity_metric,
|
|
251
|
+
e_score=entity_scores,
|
|
252
|
+
s_metric=scorer.value,
|
|
253
|
+
s_score=accuracy
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
if scorer == SetMetric.F1_SCORE:
|
|
257
|
+
precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0
|
|
258
|
+
recall = tp / (tp + fn) if (tp + fn) > 0 else 0.0
|
|
259
|
+
f1 = (
|
|
260
|
+
2 * (precision * recall) / (precision + recall)
|
|
261
|
+
if (precision + recall) > 0
|
|
262
|
+
else 0.0
|
|
263
|
+
)
|
|
264
|
+
return ComparisonResults(
|
|
265
|
+
ref=ref,
|
|
266
|
+
ext=ext,
|
|
267
|
+
e_metric=entity_metric,
|
|
268
|
+
e_score=entity_scores,
|
|
269
|
+
s_metric=scorer.value,
|
|
270
|
+
s_score=f1
|
|
271
|
+
)
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
"""levelapp/comparator/aspects.py:"""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
import json
|
|
5
|
+
import logging
|
|
6
|
+
import pandas as pd
|
|
7
|
+
|
|
8
|
+
from typing import List, Dict, Any, Literal, Union
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def format_evaluation_results(
|
|
13
|
+
evaluation_results: List[tuple],
|
|
14
|
+
output_type: Literal["json", "csv"] = "json"
|
|
15
|
+
) -> Union[List[Dict[str, Any]], pd.DataFrame, None]:
|
|
16
|
+
"""
|
|
17
|
+
Format raw evaluation data for either JSON (list of dicts) or CSV (DataFrame) use.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
evaluation_results: List of evaluation result tuples.
|
|
21
|
+
output_type: 'json' returns List[dict]; 'csv' returns a DataFrame.
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
Formatted evaluation data or None if empty input.
|
|
25
|
+
"""
|
|
26
|
+
if not evaluation_results:
|
|
27
|
+
logging.warning("No evaluation data to format.")
|
|
28
|
+
return None
|
|
29
|
+
|
|
30
|
+
rows = [
|
|
31
|
+
{
|
|
32
|
+
"field_name": field_name,
|
|
33
|
+
"reference_values": ref_values,
|
|
34
|
+
"extracted_values": ext_values,
|
|
35
|
+
"entity_metric": e_metric,
|
|
36
|
+
"entity_scores": e_scores,
|
|
37
|
+
"set_metric": s_metric,
|
|
38
|
+
"set_scores": s_scores,
|
|
39
|
+
"threshold": threshold,
|
|
40
|
+
}
|
|
41
|
+
for (field_name, ref_values, ext_values, e_metric, e_scores, s_metric, s_scores, threshold)
|
|
42
|
+
in evaluation_results
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
return pd.DataFrame(rows) if output_type == "csv" else rows
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def store_evaluation_output(
|
|
49
|
+
formatted_data: Union[pd.DataFrame, List[Dict[str, Any]]],
|
|
50
|
+
output_path: str,
|
|
51
|
+
file_format: Literal["csv", "json"] = "csv",
|
|
52
|
+
) -> None:
|
|
53
|
+
"""
|
|
54
|
+
Persist formatted evaluation data to local disk.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
formatted_data: Output from `format_evaluation_data`.
|
|
58
|
+
output_path: File path prefix (no extension).
|
|
59
|
+
file_format: 'csv' or 'json'.
|
|
60
|
+
|
|
61
|
+
Raises:
|
|
62
|
+
ValueError for unsupported formats or invalid data type.
|
|
63
|
+
"""
|
|
64
|
+
if not formatted_data:
|
|
65
|
+
logging.warning("No data provided for local storage.")
|
|
66
|
+
return
|
|
67
|
+
|
|
68
|
+
try:
|
|
69
|
+
if file_format == "csv":
|
|
70
|
+
if not isinstance(formatted_data, pd.DataFrame):
|
|
71
|
+
raise TypeError("CSV output requires a pandas DataFrame.")
|
|
72
|
+
path = f"{output_path}.csv"
|
|
73
|
+
formatted_data.to_csv(path, index=False)
|
|
74
|
+
|
|
75
|
+
elif file_format == "json":
|
|
76
|
+
if not isinstance(formatted_data, list):
|
|
77
|
+
raise TypeError("JSON output requires a list of dictionaries.")
|
|
78
|
+
path = f"{output_path}.json"
|
|
79
|
+
with open(path, "w", encoding="utf-8") as f:
|
|
80
|
+
json.dump(formatted_data, f, indent=2, ensure_ascii=False)
|
|
81
|
+
|
|
82
|
+
else:
|
|
83
|
+
raise ValueError(f"Unsupported file format: {file_format}")
|
|
84
|
+
|
|
85
|
+
logging.info(f"Evaluation data saved to {path}")
|
|
86
|
+
|
|
87
|
+
except Exception as e:
|
|
88
|
+
logging.error(f"Failed to save evaluation output: {e}")
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def safe_load_json_file(file_path: Union[str, Path]) -> Any:
|
|
92
|
+
"""
|
|
93
|
+
Load a potentially malformed JSON file by pre-sanitizing its content at the byte/text level.
|
|
94
|
+
|
|
95
|
+
Args:
|
|
96
|
+
file_path: Path to the potentially malformed JSON file.
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
Parsed JSON content (as a Python dict or list).
|
|
100
|
+
|
|
101
|
+
Raises:
|
|
102
|
+
ValueError: If JSON parsing fails even after pre-sanitization.
|
|
103
|
+
"""
|
|
104
|
+
with open(file_path, "rb") as f:
|
|
105
|
+
raw_bytes = f.read()
|
|
106
|
+
|
|
107
|
+
raw_text = raw_bytes.decode("utf-8", errors="replace")
|
|
108
|
+
sanitized_text = _clean_malformed_json_text(raw_text)
|
|
109
|
+
|
|
110
|
+
try:
|
|
111
|
+
return json.loads(sanitized_text)
|
|
112
|
+
|
|
113
|
+
except json.JSONDecodeError as e:
|
|
114
|
+
raise ValueError(f"Failed to decode JSON after sanitization: {e}")
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _clean_malformed_json_text(text: str) -> str:
|
|
118
|
+
"""
|
|
119
|
+
Remove common forms of JSON text corruption before parsing.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
text: Raw JSON string content.
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
A sanitized string safe for json.loads() parsing.
|
|
126
|
+
"""
|
|
127
|
+
# Strip BOM (please do not delete this comment)
|
|
128
|
+
text = text.lstrip('\ufeff')
|
|
129
|
+
|
|
130
|
+
# Remove non-printable control characters except \t, \n, \r (please do not delete this comment)
|
|
131
|
+
text = re.sub(r"[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]", "", text)
|
|
132
|
+
|
|
133
|
+
# Remove invalid characters (like \uFFFD or strange CP1252 remnants) (please do not delete this comment)
|
|
134
|
+
text = text.replace("\ufffd", "?")
|
|
135
|
+
|
|
136
|
+
return text
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
"""levelapp/config/endpoint.py"""
|
|
2
|
+
import os
|
|
3
|
+
import json
|
|
4
|
+
import yaml
|
|
5
|
+
|
|
6
|
+
from string import Template
|
|
7
|
+
from dotenv import load_dotenv
|
|
8
|
+
|
|
9
|
+
from enum import Enum
|
|
10
|
+
from typing import Literal, Dict, Any
|
|
11
|
+
from pydantic import BaseModel, HttpUrl, SecretStr, Field, computed_field
|
|
12
|
+
|
|
13
|
+
from levelapp.aspects import logger
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class TemplateType(Enum):
|
|
17
|
+
REQUEST = "request"
|
|
18
|
+
RESPONSE = "response"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class EndpointConfig(BaseModel):
|
|
22
|
+
"""
|
|
23
|
+
Configuration class for user system's endpoint.
|
|
24
|
+
|
|
25
|
+
Parameters:
|
|
26
|
+
base_url (HttpUrl): The base url of the endpoint.
|
|
27
|
+
method (Literal['POST', 'GET']): The HTTP method to use (POST or GET).
|
|
28
|
+
api_key (SecretStr): The API key to use.
|
|
29
|
+
bearer_token (SecretStr): The Bearer token to use.
|
|
30
|
+
model_id (str): The model to use (if applicable).
|
|
31
|
+
default_request_payload_template (Dict[str, Any]): The payload template to use.
|
|
32
|
+
generated_request_payload_template (Dict[str, Any]): The generated payload template from a provided file.
|
|
33
|
+
variables (Dict[str, Any]): The variables to populate the payload template.
|
|
34
|
+
|
|
35
|
+
Note:
|
|
36
|
+
Either you use the provided configuration YAML file, providing the following:\n
|
|
37
|
+
- base_url (HttpUrl): The base url of the endpoint.
|
|
38
|
+
- method (Literal['POST', 'GET']): The HTTP method to use (POST or GET).
|
|
39
|
+
- api_key (SecretStr): The API key to use.
|
|
40
|
+
- bearer_token (SecretStr): The Bearer token to use.
|
|
41
|
+
- model_id (str): The model to use (if applicable).
|
|
42
|
+
- default_payload_template (Dict[str, Any]): The payload template to use.
|
|
43
|
+
- generated_payload_template (Dict[str, Any]): The generated payload template from a provided file.
|
|
44
|
+
- variables (Dict[str, Any]): The variables to populate the payload template.
|
|
45
|
+
|
|
46
|
+
Or manually configure the model instance by assigning the proper values to the model fields.\n
|
|
47
|
+
You can also provide the path in the .env file for the payload template (ENDPOINT_PAYLOAD_PATH)
|
|
48
|
+
and the response template (ENDPOINT_RESPONSE_PATH) separately. The files can be either YAML or JSON only.
|
|
49
|
+
"""
|
|
50
|
+
load_dotenv()
|
|
51
|
+
|
|
52
|
+
# Required
|
|
53
|
+
method: Literal["POST", "GET"] = Field(default="POST")
|
|
54
|
+
base_url: HttpUrl = Field(default=HttpUrl)
|
|
55
|
+
url_path: str = Field(default='')
|
|
56
|
+
|
|
57
|
+
# Auth
|
|
58
|
+
api_key: SecretStr | None = Field(default=None)
|
|
59
|
+
bearer_token: SecretStr | None = Field(default=None)
|
|
60
|
+
model_id: str | None = Field(default='')
|
|
61
|
+
|
|
62
|
+
# Data
|
|
63
|
+
default_request_payload_template: Dict[str, Any] = Field(default_factory=dict)
|
|
64
|
+
generated_request_payload_template: Dict[str, Any] = Field(default_factory=dict)
|
|
65
|
+
default_response_payload_template: Dict[str, Any] = Field(default_factory=dict)
|
|
66
|
+
generated_response_payload_template: Dict[str, Any] = Field(default_factory=dict)
|
|
67
|
+
|
|
68
|
+
# Variables
|
|
69
|
+
variables: Dict[str, Any] = Field(default_factory=dict)
|
|
70
|
+
|
|
71
|
+
@computed_field()
|
|
72
|
+
@property
|
|
73
|
+
def full_url(self) -> str:
|
|
74
|
+
return str(self.base_url) + self.url_path
|
|
75
|
+
|
|
76
|
+
@computed_field()
|
|
77
|
+
@property
|
|
78
|
+
def headers(self) -> Dict[str, Any]:
|
|
79
|
+
headers: Dict[str, Any] = {"Content-Type": "application/json"}
|
|
80
|
+
if self.model_id:
|
|
81
|
+
headers["x-model-id"] = self.model_id
|
|
82
|
+
if self.bearer_token:
|
|
83
|
+
headers["Authorization"] = f"Bearer {self.bearer_token.get_secret_value()}"
|
|
84
|
+
if self.api_key:
|
|
85
|
+
headers["x-api-key"] = self.api_key.get_secret_value()
|
|
86
|
+
return headers
|
|
87
|
+
|
|
88
|
+
@computed_field
|
|
89
|
+
@property
|
|
90
|
+
def request_payload(self) -> Dict[str, Any]:
|
|
91
|
+
"""Return fully prepared payload depending on template or full payload."""
|
|
92
|
+
# First, load the request payload template (either from YAML config file or from specific template)
|
|
93
|
+
if not self.variables:
|
|
94
|
+
return self.default_request_payload_template
|
|
95
|
+
|
|
96
|
+
if not self.default_request_payload_template:
|
|
97
|
+
self.load_template(template_type=TemplateType.REQUEST)
|
|
98
|
+
base_template = self.generated_request_payload_template
|
|
99
|
+
else:
|
|
100
|
+
base_template = self.default_request_payload_template
|
|
101
|
+
|
|
102
|
+
# Second, replace the placeholders with the variables
|
|
103
|
+
payload = self._replace_placeholders(obj=base_template, variables=self.variables)
|
|
104
|
+
|
|
105
|
+
# Third, merge the "request_payload" if present in variables
|
|
106
|
+
additional_payload_data = self.variables.get("request_payload", {})
|
|
107
|
+
if additional_payload_data:
|
|
108
|
+
payload.update(additional_payload_data)
|
|
109
|
+
|
|
110
|
+
self.variables.clear()
|
|
111
|
+
|
|
112
|
+
return payload
|
|
113
|
+
|
|
114
|
+
@computed_field
|
|
115
|
+
@property
|
|
116
|
+
def response_payload(self) -> Dict[str, Any]:
|
|
117
|
+
if not self.variables:
|
|
118
|
+
return self.default_response_payload_template
|
|
119
|
+
|
|
120
|
+
if not self.default_response_payload_template:
|
|
121
|
+
self.load_template(template_type=TemplateType.RESPONSE)
|
|
122
|
+
base_template = self.generated_response_payload_template
|
|
123
|
+
else:
|
|
124
|
+
base_template = self.default_response_payload_template
|
|
125
|
+
|
|
126
|
+
response_payload = self._replace_placeholders(obj=base_template, variables=self.variables)
|
|
127
|
+
self.variables.clear()
|
|
128
|
+
|
|
129
|
+
return response_payload
|
|
130
|
+
|
|
131
|
+
@staticmethod
|
|
132
|
+
def _replace_placeholders(obj: Any, variables: Dict[str, Any]) -> Dict[str, Any]:
|
|
133
|
+
"""Recursively replace placeholders in payload template with variables."""
|
|
134
|
+
def _replace(_obj):
|
|
135
|
+
if isinstance(_obj, str):
|
|
136
|
+
subst = Template(_obj).safe_substitute(variables)
|
|
137
|
+
if '$' in subst:
|
|
138
|
+
logger.warning(f"[EndpointConfig] Unsubstituted placeholder in payload:\n{subst}\n\n")
|
|
139
|
+
return subst
|
|
140
|
+
|
|
141
|
+
elif isinstance(_obj, dict):
|
|
142
|
+
return {k: _replace(v) for k, v in _obj.items()}
|
|
143
|
+
|
|
144
|
+
elif isinstance(_obj, list):
|
|
145
|
+
return [_replace(v) for v in _obj]
|
|
146
|
+
|
|
147
|
+
return _obj
|
|
148
|
+
|
|
149
|
+
return _replace(obj)
|
|
150
|
+
|
|
151
|
+
def load_template(
|
|
152
|
+
self,
|
|
153
|
+
template_type: TemplateType = TemplateType.REQUEST,
|
|
154
|
+
path: str | None = None
|
|
155
|
+
) -> Dict[str, Any]:
|
|
156
|
+
try:
|
|
157
|
+
if not path:
|
|
158
|
+
env_var = "ENDPOINT_PAYLOAD_PATH" if template_type == TemplateType.REQUEST else "ENDPOINT_RESPONSE_PATH"
|
|
159
|
+
path = os.getenv(env_var, '')
|
|
160
|
+
|
|
161
|
+
if not os.path.exists(path):
|
|
162
|
+
raise FileNotFoundError(f"The provide payload template file path '{path}' does not exist.")
|
|
163
|
+
|
|
164
|
+
with open(path, "r", encoding="utf-8") as f:
|
|
165
|
+
if path.endswith((".yaml", ".yml")):
|
|
166
|
+
data = yaml.safe_load(f)
|
|
167
|
+
|
|
168
|
+
elif path.endswith(".json"):
|
|
169
|
+
data = json.load(f)
|
|
170
|
+
|
|
171
|
+
else:
|
|
172
|
+
raise ValueError("[EndpointConfig] Unsupported file format.")
|
|
173
|
+
|
|
174
|
+
self.generated_request_payload_template = data
|
|
175
|
+
return data
|
|
176
|
+
|
|
177
|
+
except FileNotFoundError as e:
|
|
178
|
+
raise FileNotFoundError(f"[EndpointConfig] Payload template file '{e.filename}' not found in path.")
|
|
179
|
+
|
|
180
|
+
except yaml.YAMLError as e:
|
|
181
|
+
raise ValueError(f"[EndpointConfig] Error parsing YAML file:\n{e}")
|
|
182
|
+
|
|
183
|
+
except json.JSONDecodeError as e:
|
|
184
|
+
raise ValueError(f"[EndpointConfig] Error parsing JSON file:\n{e}")
|
|
185
|
+
|
|
186
|
+
except IOError as e:
|
|
187
|
+
raise IOError(f"[EndpointConfig] Error reading file:\n{e}")
|
|
188
|
+
|
|
189
|
+
except Exception as e:
|
|
190
|
+
raise ValueError(f"[EndpointConfig] Unexpected error loading configuration:\n{e}")
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
EVAL_PROMPT_TEMPLATE = """
|
|
2
|
+
You are an impartial evaluator for a conversational system.
|
|
3
|
+
Compare the AGENT's reply to the EXPECTED reply for the SAME user message.
|
|
4
|
+
|
|
5
|
+
Consider only:
|
|
6
|
+
1) Semantic Coverage — does the AGENT cover the key points in EXPECTED?
|
|
7
|
+
2) Faithfulness — no contradictions or invented details relative to EXPECTED.
|
|
8
|
+
3) Appropriateness — tone/format suitable for the user message.
|
|
9
|
+
Ignore minor wording/punctuation differences. Do NOT reward verbosity.
|
|
10
|
+
|
|
11
|
+
Scale (integer):
|
|
12
|
+
0 = Poor (misses key points or contradicts)
|
|
13
|
+
1 = Moderate (captures some ideas, noticeable gaps)
|
|
14
|
+
2 = Good (mostly matches, minor omissions/differences)
|
|
15
|
+
3 = Excellent (semantically equivalent; no meaningful differences)
|
|
16
|
+
|
|
17
|
+
USER_MESSAGE:
|
|
18
|
+
\"\"\"{user_input}\"\"\"
|
|
19
|
+
|
|
20
|
+
EXPECTED (reference reply):
|
|
21
|
+
\"\"\"{reference_text}\"\"\"
|
|
22
|
+
|
|
23
|
+
AGENT (model reply):
|
|
24
|
+
\"\"\"{generated_text}\"\"\"
|
|
25
|
+
|
|
26
|
+
Return ONLY a single JSON object on one line with exactly these keys:
|
|
27
|
+
- "score": <0|1|2|3>,
|
|
28
|
+
- "label": "<Poor|Moderate|Good|Excellent>",
|
|
29
|
+
- "justification": "<1-2 concise sentences>",
|
|
30
|
+
- "evidence":
|
|
31
|
+
- "covered_points": ["<short phrase>", "..."], // <=3 items
|
|
32
|
+
- "missing_or_wrong": ["<short phrase>", "..."] // <=3 items
|
|
33
|
+
|
|
34
|
+
Do NOT include any additional text, explanations, or formatting (e.g., "JSON object:", ```json or ```, or markdown).
|
|
35
|
+
"""
|
|
File without changes
|