levelapp 0.1.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- levelapp/__init__.py +0 -0
- levelapp/aspects/__init__.py +8 -0
- levelapp/aspects/loader.py +253 -0
- levelapp/aspects/logger.py +59 -0
- levelapp/aspects/monitor.py +617 -0
- levelapp/aspects/sanitizer.py +168 -0
- levelapp/clients/__init__.py +122 -0
- levelapp/clients/anthropic.py +112 -0
- levelapp/clients/gemini.py +130 -0
- levelapp/clients/groq.py +101 -0
- levelapp/clients/huggingface.py +162 -0
- levelapp/clients/ionos.py +126 -0
- levelapp/clients/mistral.py +106 -0
- levelapp/clients/openai.py +116 -0
- levelapp/comparator/__init__.py +5 -0
- levelapp/comparator/comparator.py +232 -0
- levelapp/comparator/extractor.py +108 -0
- levelapp/comparator/schemas.py +61 -0
- levelapp/comparator/scorer.py +269 -0
- levelapp/comparator/utils.py +136 -0
- levelapp/config/__init__.py +5 -0
- levelapp/config/endpoint.py +199 -0
- levelapp/config/prompts.py +57 -0
- levelapp/core/__init__.py +0 -0
- levelapp/core/base.py +386 -0
- levelapp/core/schemas.py +24 -0
- levelapp/core/session.py +336 -0
- levelapp/endpoint/__init__.py +0 -0
- levelapp/endpoint/client.py +188 -0
- levelapp/endpoint/client_test.py +41 -0
- levelapp/endpoint/manager.py +114 -0
- levelapp/endpoint/parsers.py +119 -0
- levelapp/endpoint/schemas.py +38 -0
- levelapp/endpoint/tester.py +52 -0
- levelapp/evaluator/__init__.py +3 -0
- levelapp/evaluator/evaluator.py +307 -0
- levelapp/metrics/__init__.py +63 -0
- levelapp/metrics/embedding.py +56 -0
- levelapp/metrics/embeddings/__init__.py +0 -0
- levelapp/metrics/embeddings/sentence_transformer.py +30 -0
- levelapp/metrics/embeddings/torch_based.py +56 -0
- levelapp/metrics/exact.py +182 -0
- levelapp/metrics/fuzzy.py +80 -0
- levelapp/metrics/token.py +103 -0
- levelapp/plugins/__init__.py +0 -0
- levelapp/repository/__init__.py +3 -0
- levelapp/repository/filesystem.py +203 -0
- levelapp/repository/firestore.py +291 -0
- levelapp/simulator/__init__.py +3 -0
- levelapp/simulator/schemas.py +116 -0
- levelapp/simulator/simulator.py +531 -0
- levelapp/simulator/utils.py +134 -0
- levelapp/visualization/__init__.py +7 -0
- levelapp/visualization/charts.py +358 -0
- levelapp/visualization/dashboard.py +240 -0
- levelapp/visualization/exporter.py +167 -0
- levelapp/visualization/templates/base.html +158 -0
- levelapp/visualization/templates/comparator_dashboard.html +57 -0
- levelapp/visualization/templates/simulator_dashboard.html +111 -0
- levelapp/workflow/__init__.py +6 -0
- levelapp/workflow/base.py +192 -0
- levelapp/workflow/config.py +96 -0
- levelapp/workflow/context.py +64 -0
- levelapp/workflow/factory.py +42 -0
- levelapp/workflow/registration.py +6 -0
- levelapp/workflow/runtime.py +19 -0
- levelapp-0.1.15.dist-info/METADATA +571 -0
- levelapp-0.1.15.dist-info/RECORD +70 -0
- levelapp-0.1.15.dist-info/WHEEL +4 -0
- levelapp-0.1.15.dist-info/licenses/LICENSE +0 -0
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
"""'comparator/service.py':"""
|
|
2
|
+
from collections.abc import Mapping
|
|
3
|
+
from typing import Any, Dict, List, Tuple, Literal
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel
|
|
6
|
+
|
|
7
|
+
from levelapp.core.base import BaseProcess
|
|
8
|
+
from levelapp.comparator.extractor import DataExtractor
|
|
9
|
+
from levelapp.comparator.scorer import MetricsManager, ComparisonResults
|
|
10
|
+
from levelapp.comparator.schemas import EntityMetric, SetMetric, MetricConfig
|
|
11
|
+
from levelapp.comparator.utils import format_evaluation_results
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class MetadataComparator(BaseProcess):
|
|
15
|
+
"""Metadata comparator component."""
|
|
16
|
+
|
|
17
|
+
def __init__(
|
|
18
|
+
self,
|
|
19
|
+
reference: BaseModel | None = None,
|
|
20
|
+
generated: BaseModel | None = None,
|
|
21
|
+
metrics_manager: MetricsManager | None = None,
|
|
22
|
+
):
|
|
23
|
+
"""
|
|
24
|
+
Initialize the MetadataComparator.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
reference (BaseModel): Reference BaseModel
|
|
28
|
+
generated (BaseModel): Extracted BaseModel
|
|
29
|
+
metrics_manager (MetricsManager): MetricsManager
|
|
30
|
+
"""
|
|
31
|
+
self.extractor = DataExtractor()
|
|
32
|
+
|
|
33
|
+
self._reference = reference
|
|
34
|
+
self._generated = generated
|
|
35
|
+
self._metrics_manager = metrics_manager
|
|
36
|
+
|
|
37
|
+
self._evaluation_data: List[
|
|
38
|
+
Tuple[str, list[str], list[str], Any, Any, Any, Any, float]
|
|
39
|
+
] = []
|
|
40
|
+
|
|
41
|
+
@property
|
|
42
|
+
def reference_data(self) -> BaseModel:
|
|
43
|
+
return self._reference
|
|
44
|
+
|
|
45
|
+
@property
|
|
46
|
+
def generated_data(self) -> BaseModel:
|
|
47
|
+
return self._generated
|
|
48
|
+
|
|
49
|
+
@property
|
|
50
|
+
def metrics_manager(self) -> MetricsManager:
|
|
51
|
+
return self._metrics_manager
|
|
52
|
+
|
|
53
|
+
@reference_data.setter
|
|
54
|
+
def reference_data(self, value: BaseModel):
|
|
55
|
+
self._reference = value
|
|
56
|
+
|
|
57
|
+
@generated_data.setter
|
|
58
|
+
def generated_data(self, value: BaseModel):
|
|
59
|
+
self._generated = value
|
|
60
|
+
|
|
61
|
+
@metrics_manager.setter
|
|
62
|
+
def metrics_manager(self, value: MetricsManager):
|
|
63
|
+
self._metrics_manager = value
|
|
64
|
+
|
|
65
|
+
def _get_score(self, field: str) -> Tuple[EntityMetric, SetMetric, float]:
|
|
66
|
+
"""
|
|
67
|
+
Retrieve the scoring metric and threshold for a given field.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
field: The field for which to retrieve the metric and threshold.
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
A tuple containing the scoring metric and its threshold.
|
|
74
|
+
"""
|
|
75
|
+
if self._metrics_manager:
|
|
76
|
+
config = self._metrics_manager.get_metrics_config(field=field)
|
|
77
|
+
else:
|
|
78
|
+
config = MetricConfig()
|
|
79
|
+
|
|
80
|
+
return config.entity_metric, config.set_metric, config.threshold
|
|
81
|
+
|
|
82
|
+
def _format_results(
|
|
83
|
+
self,
|
|
84
|
+
output_type: Literal["json", "csv"] = "json"
|
|
85
|
+
) -> Dict[int, Any]:
|
|
86
|
+
"""
|
|
87
|
+
Format the internal evaluation data for reporting or storage.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
output_type: 'json' returns a list of dictionaries; 'csv' returns a DataFrame.
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
Formatted evaluation results or None if no data.
|
|
94
|
+
"""
|
|
95
|
+
formatted_results = format_evaluation_results(self._evaluation_data, output_type=output_type)
|
|
96
|
+
|
|
97
|
+
return dict(enumerate(formatted_results))
|
|
98
|
+
|
|
99
|
+
def evaluate(
|
|
100
|
+
self,
|
|
101
|
+
reference_list: List[str],
|
|
102
|
+
extracted_list: List[str],
|
|
103
|
+
entity_metric: EntityMetric,
|
|
104
|
+
set_metric: SetMetric,
|
|
105
|
+
threshold: float,
|
|
106
|
+
) -> ComparisonResults:
|
|
107
|
+
"""
|
|
108
|
+
Evaluates pairwise similarity between elements in two lists using fuzzy matching.
|
|
109
|
+
|
|
110
|
+
Args:
|
|
111
|
+
reference_list: Ground-truth list of strings.
|
|
112
|
+
extracted_list: Extracted list of strings to compare.
|
|
113
|
+
entity_metric (EntityMetric): entity-level comparison metric.
|
|
114
|
+
set_metric (SetMetric): set-level comparison metric.
|
|
115
|
+
threshold: Similarity threshold (0–100) for considering a match.
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
A dict with accuracy, precision, recall, and F1-score.
|
|
119
|
+
"""
|
|
120
|
+
if not (reference_list or extracted_list):
|
|
121
|
+
return ComparisonResults("", "", entity_metric.value, None, set_metric.value, None)
|
|
122
|
+
|
|
123
|
+
scores = self._metrics_manager.compute_entity_scores(
|
|
124
|
+
reference_seq=reference_list,
|
|
125
|
+
extracted_seq=extracted_list,
|
|
126
|
+
scorer=entity_metric,
|
|
127
|
+
pairwise=False
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
return self._metrics_manager.compute_set_scores(
|
|
131
|
+
data=scores,
|
|
132
|
+
scorer=set_metric,
|
|
133
|
+
threshold=threshold,
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
def _recursive_compare(
|
|
137
|
+
self,
|
|
138
|
+
ref_node: Any,
|
|
139
|
+
ext_node: Any,
|
|
140
|
+
results: Dict[str, Dict[str, float]],
|
|
141
|
+
prefix: str = "",
|
|
142
|
+
threshold: float = 99.0,
|
|
143
|
+
) -> None:
|
|
144
|
+
"""
|
|
145
|
+
Recursively compare extracted vs. reference metadata nodes.
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
ref_node: dict or list (from deep_extract reference metadata)
|
|
149
|
+
ext_node: dict or list (from deep_extract extracted metadata)
|
|
150
|
+
results: Dict to accumulate comp_results keyed by hierarchical attribute paths.
|
|
151
|
+
prefix: str, current path prefix to form hierarchical keys.
|
|
152
|
+
"""
|
|
153
|
+
# Case 1: Both nodes are dicts -> recurse on keys
|
|
154
|
+
if isinstance(ref_node, Mapping) and isinstance(ext_node, Mapping):
|
|
155
|
+
all_keys = set(ref_node.keys())
|
|
156
|
+
for key in all_keys:
|
|
157
|
+
new_prefix = f"{prefix}.{key}" if prefix else key
|
|
158
|
+
ref_subnode = ref_node.get(key, [])
|
|
159
|
+
ext_subnode = ext_node.get(key, [])
|
|
160
|
+
self._recursive_compare(
|
|
161
|
+
ref_node=ref_subnode,
|
|
162
|
+
ext_node=ext_subnode,
|
|
163
|
+
results=results,
|
|
164
|
+
prefix=new_prefix,
|
|
165
|
+
threshold=threshold,
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
# Case 2: Leaf nodes (lists) -> evaluate directly
|
|
169
|
+
else:
|
|
170
|
+
# Defensive: convert to list if not list
|
|
171
|
+
ref_list = ref_node if isinstance(ref_node, list) else [ref_node]
|
|
172
|
+
ext_list = ext_node if isinstance(ext_node, list) else [ext_node]
|
|
173
|
+
|
|
174
|
+
# Convert all to strings for consistent fuzzy matching
|
|
175
|
+
ref_list_str = list(map(str, ref_list))
|
|
176
|
+
ext_list_str = list(map(str, ext_list))
|
|
177
|
+
|
|
178
|
+
entity_metric_, set_metric_, threshold = self._get_score(field=prefix)
|
|
179
|
+
|
|
180
|
+
# Evaluate similarity metrics
|
|
181
|
+
comp_results = self.evaluate(
|
|
182
|
+
reference_list=ref_list_str,
|
|
183
|
+
extracted_list=ext_list_str,
|
|
184
|
+
entity_metric=entity_metric_,
|
|
185
|
+
set_metric=set_metric_,
|
|
186
|
+
threshold=threshold,
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
if comp_results:
|
|
190
|
+
self._evaluation_data.append(
|
|
191
|
+
(
|
|
192
|
+
prefix,
|
|
193
|
+
ref_list_str,
|
|
194
|
+
ext_list_str,
|
|
195
|
+
comp_results.e_metric,
|
|
196
|
+
comp_results.e_score,
|
|
197
|
+
comp_results.s_metric,
|
|
198
|
+
comp_results.s_score,
|
|
199
|
+
threshold,
|
|
200
|
+
)
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
results[prefix] = comp_results or {"accuracy": 0}
|
|
204
|
+
|
|
205
|
+
def run(self, indexed_mode: bool = False) -> Dict[int, Any]:
|
|
206
|
+
"""
|
|
207
|
+
Launch a metadata comparison process between reference and extracted data.
|
|
208
|
+
|
|
209
|
+
Args:
|
|
210
|
+
indexed_mode: Flag to use indexed mode for metadata extraction.
|
|
211
|
+
|
|
212
|
+
Returns:
|
|
213
|
+
Dictionary with comparison results, keyed by attribute paths.
|
|
214
|
+
"""
|
|
215
|
+
self._evaluation_data.clear()
|
|
216
|
+
|
|
217
|
+
ref_data = self.extractor.deep_extract(model=self.reference_data, indexed=indexed_mode)
|
|
218
|
+
ext_data = self.extractor.deep_extract(model=self.generated_data, indexed=indexed_mode)
|
|
219
|
+
|
|
220
|
+
results: Dict[str, Dict[str, float]] = {}
|
|
221
|
+
|
|
222
|
+
self._recursive_compare(
|
|
223
|
+
ref_node=ref_data,
|
|
224
|
+
ext_node=ext_data,
|
|
225
|
+
results=results,
|
|
226
|
+
prefix="",
|
|
227
|
+
threshold=1,
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
formatted_results = self._format_results()
|
|
231
|
+
|
|
232
|
+
return formatted_results
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"""levelapp/comparator/extractor.py"""
|
|
2
|
+
|
|
3
|
+
from collections import defaultdict
|
|
4
|
+
from collections.abc import Sequence
|
|
5
|
+
from typing import List, Dict, Any
|
|
6
|
+
from pydantic import BaseModel
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class DataExtractor:
|
|
10
|
+
"""
|
|
11
|
+
Extracts primitive values from nested Pydantic models, dicts, and sequences.
|
|
12
|
+
"""
|
|
13
|
+
def deep_extract(
|
|
14
|
+
self, model: BaseModel,
|
|
15
|
+
indexed: bool = False
|
|
16
|
+
) -> Dict[str, List[str]]:
|
|
17
|
+
"""
|
|
18
|
+
Extracts data in a recursive way from pydantic model.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
model: An instance of a BaseModel.
|
|
22
|
+
indexed: Switch parameter to select the extraction approach.
|
|
23
|
+
|
|
24
|
+
Returns:
|
|
25
|
+
A dictionary where keys are attribute names and values are lists of string values.
|
|
26
|
+
"""
|
|
27
|
+
result: Dict[str, List[str]] = defaultdict(list)
|
|
28
|
+
for field_name, field_info in type(model).model_fields.items():
|
|
29
|
+
field_value = getattr(model, field_name)
|
|
30
|
+
self._extract_field_values(
|
|
31
|
+
value=field_value, prefix=field_name, result=result, indexed=indexed
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
return result
|
|
35
|
+
|
|
36
|
+
def _extract_field_values(
|
|
37
|
+
self,
|
|
38
|
+
value: Any,
|
|
39
|
+
prefix: str,
|
|
40
|
+
result: Dict[str, List[str]],
|
|
41
|
+
indexed: bool = False,
|
|
42
|
+
) -> None:
|
|
43
|
+
"""
|
|
44
|
+
Recursively extract values from a field, storing them in result with field path as key.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
value: The value to extract (BaseModel, dict, list, or primitive).
|
|
48
|
+
prefix: The current field path (e.g., 'documents.tribunal_members').
|
|
49
|
+
result: Dictionary to store field paths and their value lists.
|
|
50
|
+
indexed: Switch parameter to select the extraction approach.
|
|
51
|
+
"""
|
|
52
|
+
if isinstance(value, BaseModel):
|
|
53
|
+
self._handle_model(model=value, prefix=prefix, result=result)
|
|
54
|
+
|
|
55
|
+
elif isinstance(value, Sequence) and not isinstance(value, (str, bytes)):
|
|
56
|
+
self._handle_sequence(
|
|
57
|
+
sequence=value, prefix=prefix, result=result, indexed=indexed
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
else:
|
|
61
|
+
result[prefix].append(value)
|
|
62
|
+
|
|
63
|
+
def _handle_model(
|
|
64
|
+
self, model: BaseModel, prefix: str, result: Dict[str, List[str]]
|
|
65
|
+
) -> None:
|
|
66
|
+
"""
|
|
67
|
+
Extract values from a Pydantic model recursively.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
model: Pydantic BaseModel instance.
|
|
71
|
+
prefix: Current field path.
|
|
72
|
+
result: Dictionary to store field paths and value lists.
|
|
73
|
+
"""
|
|
74
|
+
for field_name, field_info in type(model).model_fields.items():
|
|
75
|
+
field_value = getattr(model, field_name)
|
|
76
|
+
new_prefix = f"{prefix}.{field_name}" if prefix else field_name
|
|
77
|
+
self._extract_field_values(
|
|
78
|
+
value=field_value, prefix=new_prefix, result=result
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
def _handle_sequence(
|
|
82
|
+
self,
|
|
83
|
+
sequence: Sequence,
|
|
84
|
+
prefix: str,
|
|
85
|
+
result: Dict[str, List[str]],
|
|
86
|
+
indexed: bool = False,
|
|
87
|
+
) -> None:
|
|
88
|
+
"""
|
|
89
|
+
Extract values from a sequence (list or tuple) recursively.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
sequence: List or tuple of values.
|
|
93
|
+
prefix: Current field path.
|
|
94
|
+
result: Dictionary to store field paths and value lists.
|
|
95
|
+
indexed: Switch parameter to select the extraction approach.
|
|
96
|
+
"""
|
|
97
|
+
if not sequence:
|
|
98
|
+
result[prefix] = []
|
|
99
|
+
|
|
100
|
+
if indexed:
|
|
101
|
+
for i, item in enumerate(sequence):
|
|
102
|
+
new_prefix = f"{prefix}[{i}]" if prefix else f"[{i}]"
|
|
103
|
+
self._extract_field_values(value=item, prefix=new_prefix, result=result)
|
|
104
|
+
else:
|
|
105
|
+
for i, item in enumerate(sequence):
|
|
106
|
+
self._extract_field_values(
|
|
107
|
+
value=item, prefix=prefix, result=result, indexed=indexed
|
|
108
|
+
)
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
"""'comparator/schemas.py': Defines Pydantic models for extracted metadata."""
|
|
2
|
+
|
|
3
|
+
from enum import Enum
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel, Field
|
|
6
|
+
from rapidfuzz import fuzz, utils
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class AttrCompMixin:
|
|
10
|
+
def __eq__(self, other) -> bool:
|
|
11
|
+
if not isinstance(other, type(self)):
|
|
12
|
+
return False
|
|
13
|
+
|
|
14
|
+
attr_name = next(iter(self.__dict__.keys()))
|
|
15
|
+
_cond = (
|
|
16
|
+
fuzz.ratio(
|
|
17
|
+
s1=getattr(self, attr_name),
|
|
18
|
+
s2=getattr(other, attr_name),
|
|
19
|
+
processor=utils.default_process,
|
|
20
|
+
)
|
|
21
|
+
> 99
|
|
22
|
+
)
|
|
23
|
+
return _cond
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class CompScoreMixin:
|
|
27
|
+
def comp_score(self, other) -> float:
|
|
28
|
+
attr_name = next(iter(self.__dict__.keys()))
|
|
29
|
+
_score = fuzz.ratio(
|
|
30
|
+
s1=getattr(self, attr_name),
|
|
31
|
+
s2=getattr(other, attr_name),
|
|
32
|
+
processor=utils.default_process,
|
|
33
|
+
)
|
|
34
|
+
return _score
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class EntityMetric(str, Enum):
|
|
38
|
+
WRATIO = "wratio"
|
|
39
|
+
LEV_NORM = "lev-norm"
|
|
40
|
+
JARO_WINKLER = "jaro-winkler"
|
|
41
|
+
TOKEN_SORT_RATIO = "token-sort-ratio"
|
|
42
|
+
TOKEN_SET_RATIO = "token-set-ratio"
|
|
43
|
+
|
|
44
|
+
@classmethod
|
|
45
|
+
def list(cls):
|
|
46
|
+
return [field.value for field in cls]
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class SetMetric(str, Enum):
|
|
50
|
+
ACCURACY = "accuracy"
|
|
51
|
+
F1_SCORE = "f1-score"
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class MetricConfig(BaseModel):
|
|
55
|
+
"""
|
|
56
|
+
Configuration for a field's comparison metric.
|
|
57
|
+
"""
|
|
58
|
+
field_name: str = Field(default="token-set-ratio", description="Name of the field")
|
|
59
|
+
entity_metric: EntityMetric = Field(default=EntityMetric.LEV_NORM, description="Entity level metric")
|
|
60
|
+
set_metric: SetMetric = Field(default=SetMetric.ACCURACY, description="Set level metric")
|
|
61
|
+
threshold: float = Field(default=50, ge=0, le=100, description="Match threshold")
|
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
"""'comparator/scorer.py':"""
|
|
2
|
+
import numpy as np
|
|
3
|
+
|
|
4
|
+
from collections import namedtuple
|
|
5
|
+
from typing import List, Dict, Callable, cast, Protocol
|
|
6
|
+
|
|
7
|
+
from rapidfuzz import distance, process, utils, fuzz
|
|
8
|
+
|
|
9
|
+
from levelapp.comparator.schemas import MetricConfig, EntityMetric, SetMetric
|
|
10
|
+
from levelapp.aspects import logger
|
|
11
|
+
|
|
12
|
+
ComputedScores = namedtuple(
|
|
13
|
+
typename="ComputedScores",
|
|
14
|
+
field_names=["ref", "ext", "e_metric", "e_score"],
|
|
15
|
+
)
|
|
16
|
+
ComparisonResults = namedtuple(
|
|
17
|
+
typename="ComparisonResults",
|
|
18
|
+
field_names=["ref", "ext", "e_metric", "e_score", "s_metric", "s_score"]
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class Scorer(Protocol):
|
|
23
|
+
def __call__(self, ref: str, ext: str) -> float:
|
|
24
|
+
...
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class MetricsManager:
|
|
28
|
+
"""Manages scorer registration, score computation, metric configuration."""
|
|
29
|
+
|
|
30
|
+
def __init__(self, metrics_mapping: Dict[str, MetricConfig] | None = None):
|
|
31
|
+
self._scorers: Dict[str, Callable] = {}
|
|
32
|
+
self._metrics_mapping = metrics_mapping or {}
|
|
33
|
+
self._initialize_scorers()
|
|
34
|
+
|
|
35
|
+
@property
|
|
36
|
+
def metrics_mapping(self) -> Dict[str, MetricConfig]:
|
|
37
|
+
return self._metrics_mapping
|
|
38
|
+
|
|
39
|
+
@metrics_mapping.setter
|
|
40
|
+
def metrics_mapping(self, value: Dict[str, MetricConfig]):
|
|
41
|
+
self._metrics_mapping = value
|
|
42
|
+
|
|
43
|
+
def _initialize_scorers(self) -> None:
|
|
44
|
+
"""Register existing scorers to prevent residual state."""
|
|
45
|
+
self._scorers.clear()
|
|
46
|
+
|
|
47
|
+
self.register_scorer(
|
|
48
|
+
EntityMetric.LEV_NORM.value,
|
|
49
|
+
distance.Levenshtein.normalized_similarity
|
|
50
|
+
)
|
|
51
|
+
self.register_scorer(
|
|
52
|
+
EntityMetric.JARO_WINKLER.value,
|
|
53
|
+
distance.JaroWinkler.normalized_similarity,
|
|
54
|
+
)
|
|
55
|
+
self.register_scorer(
|
|
56
|
+
EntityMetric.TOKEN_SET_RATIO.value,
|
|
57
|
+
fuzz.token_set_ratio,
|
|
58
|
+
)
|
|
59
|
+
self.register_scorer(
|
|
60
|
+
EntityMetric.TOKEN_SET_RATIO.value,
|
|
61
|
+
fuzz.token_set_ratio,
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
self.register_scorer(
|
|
65
|
+
EntityMetric.WRATIO.value,
|
|
66
|
+
fuzz.WRatio
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
def register_scorer(self, name: str, scorer: Callable) -> None:
|
|
70
|
+
"""
|
|
71
|
+
Register a scorer
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
name (str): name of the scorer.
|
|
75
|
+
scorer (Callable): scorer to register.
|
|
76
|
+
|
|
77
|
+
Raises:
|
|
78
|
+
ValueError: if the scorer is not a callable.
|
|
79
|
+
"""
|
|
80
|
+
self._scorers[name] = scorer
|
|
81
|
+
|
|
82
|
+
def get_scorer(self, name: str) -> Callable:
|
|
83
|
+
"""
|
|
84
|
+
Retrieve a scorer by name.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
name (str): name of the scorer.
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
Callable: scorer.
|
|
91
|
+
|
|
92
|
+
Raises:
|
|
93
|
+
ValueError: if the passed name is not registered.
|
|
94
|
+
"""
|
|
95
|
+
try:
|
|
96
|
+
scorer = self._scorers.get(name)
|
|
97
|
+
return scorer
|
|
98
|
+
|
|
99
|
+
except KeyError:
|
|
100
|
+
raise ValueError(f"[MetricsManager] '{name}' is not registered")
|
|
101
|
+
|
|
102
|
+
def get_metrics_config(self, field: str) -> MetricConfig:
|
|
103
|
+
"""
|
|
104
|
+
Retrieve the metrics configuration for a given field.
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
field (str): field name.
|
|
108
|
+
|
|
109
|
+
Returns:
|
|
110
|
+
MetricConfig: metrics configuration for the given field.
|
|
111
|
+
"""
|
|
112
|
+
default_config = MetricConfig(
|
|
113
|
+
field_name=field,
|
|
114
|
+
entity_metric=EntityMetric.TOKEN_SET_RATIO,
|
|
115
|
+
set_metric=SetMetric.ACCURACY,
|
|
116
|
+
threshold=0.5
|
|
117
|
+
)
|
|
118
|
+
return self._metrics_mapping.get(field, default_config)
|
|
119
|
+
|
|
120
|
+
def compute_entity_scores(
|
|
121
|
+
self,
|
|
122
|
+
reference_seq: List[str],
|
|
123
|
+
extracted_seq: List[str],
|
|
124
|
+
scorer: EntityMetric = EntityMetric.LEV_NORM,
|
|
125
|
+
pairwise: bool = True
|
|
126
|
+
) -> List[ComputedScores]:
|
|
127
|
+
"""
|
|
128
|
+
Compute the distance/similarity between ref/seq sequence entities.
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
reference_seq (List[str]): The reference sequence.
|
|
132
|
+
extracted_seq (List[str]): The extracted sequence.
|
|
133
|
+
scorer (str): Name of the scorer to use (e.g., 'levenshtein', 'jaro_winkler').
|
|
134
|
+
pairwise (bool): Whether to use pairwise distances or not.
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
List[Tuple[str, str, np.float32]]: List of (reference, extracted, score) tuples.
|
|
138
|
+
"""
|
|
139
|
+
if not reference_seq or not extracted_seq:
|
|
140
|
+
return [
|
|
141
|
+
ComputedScores(
|
|
142
|
+
ref=reference_seq,
|
|
143
|
+
ext=extracted_seq,
|
|
144
|
+
e_metric=scorer.value,
|
|
145
|
+
e_score=np.nan,
|
|
146
|
+
)
|
|
147
|
+
]
|
|
148
|
+
|
|
149
|
+
if scorer not in EntityMetric.list():
|
|
150
|
+
logger.warning(f"[MetricsManager] Scorer name <{scorer}> is not supported.]")
|
|
151
|
+
raise ValueError(f"[MetricsManager] Scorer <{scorer}> is not registered.")
|
|
152
|
+
|
|
153
|
+
max_len = max(len(reference_seq), len(extracted_seq))
|
|
154
|
+
reference_padded = reference_seq + [""] * (max_len - len(reference_seq))
|
|
155
|
+
extracted_padded = extracted_seq + [""] * (max_len - len(extracted_seq))
|
|
156
|
+
|
|
157
|
+
scorer_func = cast(Callable, self.get_scorer(name=scorer.value))
|
|
158
|
+
|
|
159
|
+
if pairwise:
|
|
160
|
+
scores_ = process.cpdist(
|
|
161
|
+
queries=reference_padded,
|
|
162
|
+
choices=extracted_padded,
|
|
163
|
+
scorer=scorer_func,
|
|
164
|
+
processor=utils.default_process,
|
|
165
|
+
workers=-1,
|
|
166
|
+
)
|
|
167
|
+
scores = scores_.flatten()
|
|
168
|
+
res = [
|
|
169
|
+
ComputedScores(
|
|
170
|
+
ref=reference_padded[i],
|
|
171
|
+
ext=extracted_padded[i],
|
|
172
|
+
e_metric=scorer.value,
|
|
173
|
+
e_score=scores[i]
|
|
174
|
+
) for i in range(len(scores))
|
|
175
|
+
]
|
|
176
|
+
|
|
177
|
+
else:
|
|
178
|
+
scores_ = process.cdist(
|
|
179
|
+
queries=reference_padded,
|
|
180
|
+
choices=extracted_padded,
|
|
181
|
+
scorer=scorer_func,
|
|
182
|
+
processor=utils.default_process,
|
|
183
|
+
workers=-1,
|
|
184
|
+
)
|
|
185
|
+
scores = np.max(scores_, axis=1)
|
|
186
|
+
max_idx = np.argmax(scores_, axis=1)
|
|
187
|
+
res = [
|
|
188
|
+
ComputedScores(
|
|
189
|
+
ref=reference_padded[i],
|
|
190
|
+
ext=extracted_padded[max_idx[i]],
|
|
191
|
+
e_metric=scorer.value,
|
|
192
|
+
e_score=scores[i]
|
|
193
|
+
) for i in range(len(scores))
|
|
194
|
+
]
|
|
195
|
+
|
|
196
|
+
return res
|
|
197
|
+
|
|
198
|
+
@staticmethod
|
|
199
|
+
def compute_set_scores(
|
|
200
|
+
data: List[ComputedScores],
|
|
201
|
+
scorer: SetMetric = SetMetric.F1_SCORE,
|
|
202
|
+
threshold: float = 1.0,
|
|
203
|
+
) -> ComparisonResults:
|
|
204
|
+
"""
|
|
205
|
+
Compute evaluation metrics from similarity scores and return results as named tuples.
|
|
206
|
+
|
|
207
|
+
Args:
|
|
208
|
+
data: List of tuples containing reference string, extracted string, and similarity score.
|
|
209
|
+
scorer: Metric to compute.
|
|
210
|
+
threshold: Similarity threshold for considering a match.
|
|
211
|
+
|
|
212
|
+
Returns:
|
|
213
|
+
List[ComparisonResults]: List of named tuples containing reference, extracted, score, and metric value.
|
|
214
|
+
"""
|
|
215
|
+
if not data:
|
|
216
|
+
return ComparisonResults("", "", None, None, None, None)
|
|
217
|
+
|
|
218
|
+
ref = [_.ref for _ in data]
|
|
219
|
+
ext = [_.ext for _ in data]
|
|
220
|
+
entity_scores = np.array([_.e_score for _ in data], dtype=np.float32)
|
|
221
|
+
entity_metric = data[0].e_metric
|
|
222
|
+
|
|
223
|
+
matches = np.count_nonzero(entity_scores >= threshold)
|
|
224
|
+
|
|
225
|
+
if len(data) == 1:
|
|
226
|
+
entity_scores = entity_scores.tolist()
|
|
227
|
+
set_scores = np.array(
|
|
228
|
+
[1 if score >= threshold else 0 for score in entity_scores], dtype=np.float32
|
|
229
|
+
).tolist()
|
|
230
|
+
return ComparisonResults(
|
|
231
|
+
ref=ref,
|
|
232
|
+
ext=ext,
|
|
233
|
+
e_metric=entity_metric,
|
|
234
|
+
e_score=entity_scores,
|
|
235
|
+
s_metric=None,
|
|
236
|
+
s_score=set_scores
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
tp = matches
|
|
240
|
+
fp = len(ref) - int(matches)
|
|
241
|
+
fn = len(ext) - int(matches)
|
|
242
|
+
|
|
243
|
+
if scorer == SetMetric.ACCURACY:
|
|
244
|
+
accuracy = (tp / len(entity_scores)) if len(entity_scores) > 0 else 0.0
|
|
245
|
+
return ComparisonResults(
|
|
246
|
+
ref=ref,
|
|
247
|
+
ext=ext,
|
|
248
|
+
e_metric=entity_metric,
|
|
249
|
+
e_score=entity_scores,
|
|
250
|
+
s_metric=scorer.value,
|
|
251
|
+
s_score=accuracy
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
if scorer == SetMetric.F1_SCORE:
|
|
255
|
+
precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0
|
|
256
|
+
recall = tp / (tp + fn) if (tp + fn) > 0 else 0.0
|
|
257
|
+
f1 = (
|
|
258
|
+
2 * (precision * recall) / (precision + recall)
|
|
259
|
+
if (precision + recall) > 0
|
|
260
|
+
else 0.0
|
|
261
|
+
)
|
|
262
|
+
return ComparisonResults(
|
|
263
|
+
ref=ref,
|
|
264
|
+
ext=ext,
|
|
265
|
+
e_metric=entity_metric,
|
|
266
|
+
e_score=entity_scores,
|
|
267
|
+
s_metric=scorer.value,
|
|
268
|
+
s_score=f1
|
|
269
|
+
)
|