levelapp 0.1.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. levelapp/__init__.py +0 -0
  2. levelapp/aspects/__init__.py +8 -0
  3. levelapp/aspects/loader.py +253 -0
  4. levelapp/aspects/logger.py +59 -0
  5. levelapp/aspects/monitor.py +617 -0
  6. levelapp/aspects/sanitizer.py +168 -0
  7. levelapp/clients/__init__.py +122 -0
  8. levelapp/clients/anthropic.py +112 -0
  9. levelapp/clients/gemini.py +130 -0
  10. levelapp/clients/groq.py +101 -0
  11. levelapp/clients/huggingface.py +162 -0
  12. levelapp/clients/ionos.py +126 -0
  13. levelapp/clients/mistral.py +106 -0
  14. levelapp/clients/openai.py +116 -0
  15. levelapp/comparator/__init__.py +5 -0
  16. levelapp/comparator/comparator.py +232 -0
  17. levelapp/comparator/extractor.py +108 -0
  18. levelapp/comparator/schemas.py +61 -0
  19. levelapp/comparator/scorer.py +269 -0
  20. levelapp/comparator/utils.py +136 -0
  21. levelapp/config/__init__.py +5 -0
  22. levelapp/config/endpoint.py +199 -0
  23. levelapp/config/prompts.py +57 -0
  24. levelapp/core/__init__.py +0 -0
  25. levelapp/core/base.py +386 -0
  26. levelapp/core/schemas.py +24 -0
  27. levelapp/core/session.py +336 -0
  28. levelapp/endpoint/__init__.py +0 -0
  29. levelapp/endpoint/client.py +188 -0
  30. levelapp/endpoint/client_test.py +41 -0
  31. levelapp/endpoint/manager.py +114 -0
  32. levelapp/endpoint/parsers.py +119 -0
  33. levelapp/endpoint/schemas.py +38 -0
  34. levelapp/endpoint/tester.py +52 -0
  35. levelapp/evaluator/__init__.py +3 -0
  36. levelapp/evaluator/evaluator.py +307 -0
  37. levelapp/metrics/__init__.py +63 -0
  38. levelapp/metrics/embedding.py +56 -0
  39. levelapp/metrics/embeddings/__init__.py +0 -0
  40. levelapp/metrics/embeddings/sentence_transformer.py +30 -0
  41. levelapp/metrics/embeddings/torch_based.py +56 -0
  42. levelapp/metrics/exact.py +182 -0
  43. levelapp/metrics/fuzzy.py +80 -0
  44. levelapp/metrics/token.py +103 -0
  45. levelapp/plugins/__init__.py +0 -0
  46. levelapp/repository/__init__.py +3 -0
  47. levelapp/repository/filesystem.py +203 -0
  48. levelapp/repository/firestore.py +291 -0
  49. levelapp/simulator/__init__.py +3 -0
  50. levelapp/simulator/schemas.py +116 -0
  51. levelapp/simulator/simulator.py +531 -0
  52. levelapp/simulator/utils.py +134 -0
  53. levelapp/visualization/__init__.py +7 -0
  54. levelapp/visualization/charts.py +358 -0
  55. levelapp/visualization/dashboard.py +240 -0
  56. levelapp/visualization/exporter.py +167 -0
  57. levelapp/visualization/templates/base.html +158 -0
  58. levelapp/visualization/templates/comparator_dashboard.html +57 -0
  59. levelapp/visualization/templates/simulator_dashboard.html +111 -0
  60. levelapp/workflow/__init__.py +6 -0
  61. levelapp/workflow/base.py +192 -0
  62. levelapp/workflow/config.py +96 -0
  63. levelapp/workflow/context.py +64 -0
  64. levelapp/workflow/factory.py +42 -0
  65. levelapp/workflow/registration.py +6 -0
  66. levelapp/workflow/runtime.py +19 -0
  67. levelapp-0.1.15.dist-info/METADATA +571 -0
  68. levelapp-0.1.15.dist-info/RECORD +70 -0
  69. levelapp-0.1.15.dist-info/WHEEL +4 -0
  70. levelapp-0.1.15.dist-info/licenses/LICENSE +0 -0
@@ -0,0 +1,232 @@
1
+ """'comparator/service.py':"""
2
+ from collections.abc import Mapping
3
+ from typing import Any, Dict, List, Tuple, Literal
4
+
5
+ from pydantic import BaseModel
6
+
7
+ from levelapp.core.base import BaseProcess
8
+ from levelapp.comparator.extractor import DataExtractor
9
+ from levelapp.comparator.scorer import MetricsManager, ComparisonResults
10
+ from levelapp.comparator.schemas import EntityMetric, SetMetric, MetricConfig
11
+ from levelapp.comparator.utils import format_evaluation_results
12
+
13
+
14
+ class MetadataComparator(BaseProcess):
15
+ """Metadata comparator component."""
16
+
17
+ def __init__(
18
+ self,
19
+ reference: BaseModel | None = None,
20
+ generated: BaseModel | None = None,
21
+ metrics_manager: MetricsManager | None = None,
22
+ ):
23
+ """
24
+ Initialize the MetadataComparator.
25
+
26
+ Args:
27
+ reference (BaseModel): Reference BaseModel
28
+ generated (BaseModel): Extracted BaseModel
29
+ metrics_manager (MetricsManager): MetricsManager
30
+ """
31
+ self.extractor = DataExtractor()
32
+
33
+ self._reference = reference
34
+ self._generated = generated
35
+ self._metrics_manager = metrics_manager
36
+
37
+ self._evaluation_data: List[
38
+ Tuple[str, list[str], list[str], Any, Any, Any, Any, float]
39
+ ] = []
40
+
41
+ @property
42
+ def reference_data(self) -> BaseModel:
43
+ return self._reference
44
+
45
+ @property
46
+ def generated_data(self) -> BaseModel:
47
+ return self._generated
48
+
49
+ @property
50
+ def metrics_manager(self) -> MetricsManager:
51
+ return self._metrics_manager
52
+
53
+ @reference_data.setter
54
+ def reference_data(self, value: BaseModel):
55
+ self._reference = value
56
+
57
+ @generated_data.setter
58
+ def generated_data(self, value: BaseModel):
59
+ self._generated = value
60
+
61
+ @metrics_manager.setter
62
+ def metrics_manager(self, value: MetricsManager):
63
+ self._metrics_manager = value
64
+
65
+ def _get_score(self, field: str) -> Tuple[EntityMetric, SetMetric, float]:
66
+ """
67
+ Retrieve the scoring metric and threshold for a given field.
68
+
69
+ Args:
70
+ field: The field for which to retrieve the metric and threshold.
71
+
72
+ Returns:
73
+ A tuple containing the scoring metric and its threshold.
74
+ """
75
+ if self._metrics_manager:
76
+ config = self._metrics_manager.get_metrics_config(field=field)
77
+ else:
78
+ config = MetricConfig()
79
+
80
+ return config.entity_metric, config.set_metric, config.threshold
81
+
82
+ def _format_results(
83
+ self,
84
+ output_type: Literal["json", "csv"] = "json"
85
+ ) -> Dict[int, Any]:
86
+ """
87
+ Format the internal evaluation data for reporting or storage.
88
+
89
+ Args:
90
+ output_type: 'json' returns a list of dictionaries; 'csv' returns a DataFrame.
91
+
92
+ Returns:
93
+ Formatted evaluation results or None if no data.
94
+ """
95
+ formatted_results = format_evaluation_results(self._evaluation_data, output_type=output_type)
96
+
97
+ return dict(enumerate(formatted_results))
98
+
99
+ def evaluate(
100
+ self,
101
+ reference_list: List[str],
102
+ extracted_list: List[str],
103
+ entity_metric: EntityMetric,
104
+ set_metric: SetMetric,
105
+ threshold: float,
106
+ ) -> ComparisonResults:
107
+ """
108
+ Evaluates pairwise similarity between elements in two lists using fuzzy matching.
109
+
110
+ Args:
111
+ reference_list: Ground-truth list of strings.
112
+ extracted_list: Extracted list of strings to compare.
113
+ entity_metric (EntityMetric): entity-level comparison metric.
114
+ set_metric (SetMetric): set-level comparison metric.
115
+ threshold: Similarity threshold (0–100) for considering a match.
116
+
117
+ Returns:
118
+ A dict with accuracy, precision, recall, and F1-score.
119
+ """
120
+ if not (reference_list or extracted_list):
121
+ return ComparisonResults("", "", entity_metric.value, None, set_metric.value, None)
122
+
123
+ scores = self._metrics_manager.compute_entity_scores(
124
+ reference_seq=reference_list,
125
+ extracted_seq=extracted_list,
126
+ scorer=entity_metric,
127
+ pairwise=False
128
+ )
129
+
130
+ return self._metrics_manager.compute_set_scores(
131
+ data=scores,
132
+ scorer=set_metric,
133
+ threshold=threshold,
134
+ )
135
+
136
+ def _recursive_compare(
137
+ self,
138
+ ref_node: Any,
139
+ ext_node: Any,
140
+ results: Dict[str, Dict[str, float]],
141
+ prefix: str = "",
142
+ threshold: float = 99.0,
143
+ ) -> None:
144
+ """
145
+ Recursively compare extracted vs. reference metadata nodes.
146
+
147
+ Args:
148
+ ref_node: dict or list (from deep_extract reference metadata)
149
+ ext_node: dict or list (from deep_extract extracted metadata)
150
+ results: Dict to accumulate comp_results keyed by hierarchical attribute paths.
151
+ prefix: str, current path prefix to form hierarchical keys.
152
+ """
153
+ # Case 1: Both nodes are dicts -> recurse on keys
154
+ if isinstance(ref_node, Mapping) and isinstance(ext_node, Mapping):
155
+ all_keys = set(ref_node.keys())
156
+ for key in all_keys:
157
+ new_prefix = f"{prefix}.{key}" if prefix else key
158
+ ref_subnode = ref_node.get(key, [])
159
+ ext_subnode = ext_node.get(key, [])
160
+ self._recursive_compare(
161
+ ref_node=ref_subnode,
162
+ ext_node=ext_subnode,
163
+ results=results,
164
+ prefix=new_prefix,
165
+ threshold=threshold,
166
+ )
167
+
168
+ # Case 2: Leaf nodes (lists) -> evaluate directly
169
+ else:
170
+ # Defensive: convert to list if not list
171
+ ref_list = ref_node if isinstance(ref_node, list) else [ref_node]
172
+ ext_list = ext_node if isinstance(ext_node, list) else [ext_node]
173
+
174
+ # Convert all to strings for consistent fuzzy matching
175
+ ref_list_str = list(map(str, ref_list))
176
+ ext_list_str = list(map(str, ext_list))
177
+
178
+ entity_metric_, set_metric_, threshold = self._get_score(field=prefix)
179
+
180
+ # Evaluate similarity metrics
181
+ comp_results = self.evaluate(
182
+ reference_list=ref_list_str,
183
+ extracted_list=ext_list_str,
184
+ entity_metric=entity_metric_,
185
+ set_metric=set_metric_,
186
+ threshold=threshold,
187
+ )
188
+
189
+ if comp_results:
190
+ self._evaluation_data.append(
191
+ (
192
+ prefix,
193
+ ref_list_str,
194
+ ext_list_str,
195
+ comp_results.e_metric,
196
+ comp_results.e_score,
197
+ comp_results.s_metric,
198
+ comp_results.s_score,
199
+ threshold,
200
+ )
201
+ )
202
+
203
+ results[prefix] = comp_results or {"accuracy": 0}
204
+
205
+ def run(self, indexed_mode: bool = False) -> Dict[int, Any]:
206
+ """
207
+ Launch a metadata comparison process between reference and extracted data.
208
+
209
+ Args:
210
+ indexed_mode: Flag to use indexed mode for metadata extraction.
211
+
212
+ Returns:
213
+ Dictionary with comparison results, keyed by attribute paths.
214
+ """
215
+ self._evaluation_data.clear()
216
+
217
+ ref_data = self.extractor.deep_extract(model=self.reference_data, indexed=indexed_mode)
218
+ ext_data = self.extractor.deep_extract(model=self.generated_data, indexed=indexed_mode)
219
+
220
+ results: Dict[str, Dict[str, float]] = {}
221
+
222
+ self._recursive_compare(
223
+ ref_node=ref_data,
224
+ ext_node=ext_data,
225
+ results=results,
226
+ prefix="",
227
+ threshold=1,
228
+ )
229
+
230
+ formatted_results = self._format_results()
231
+
232
+ return formatted_results
@@ -0,0 +1,108 @@
1
+ """levelapp/comparator/extractor.py"""
2
+
3
+ from collections import defaultdict
4
+ from collections.abc import Sequence
5
+ from typing import List, Dict, Any
6
+ from pydantic import BaseModel
7
+
8
+
9
+ class DataExtractor:
10
+ """
11
+ Extracts primitive values from nested Pydantic models, dicts, and sequences.
12
+ """
13
+ def deep_extract(
14
+ self, model: BaseModel,
15
+ indexed: bool = False
16
+ ) -> Dict[str, List[str]]:
17
+ """
18
+ Extracts data in a recursive way from pydantic model.
19
+
20
+ Args:
21
+ model: An instance of a BaseModel.
22
+ indexed: Switch parameter to select the extraction approach.
23
+
24
+ Returns:
25
+ A dictionary where keys are attribute names and values are lists of string values.
26
+ """
27
+ result: Dict[str, List[str]] = defaultdict(list)
28
+ for field_name, field_info in type(model).model_fields.items():
29
+ field_value = getattr(model, field_name)
30
+ self._extract_field_values(
31
+ value=field_value, prefix=field_name, result=result, indexed=indexed
32
+ )
33
+
34
+ return result
35
+
36
+ def _extract_field_values(
37
+ self,
38
+ value: Any,
39
+ prefix: str,
40
+ result: Dict[str, List[str]],
41
+ indexed: bool = False,
42
+ ) -> None:
43
+ """
44
+ Recursively extract values from a field, storing them in result with field path as key.
45
+
46
+ Args:
47
+ value: The value to extract (BaseModel, dict, list, or primitive).
48
+ prefix: The current field path (e.g., 'documents.tribunal_members').
49
+ result: Dictionary to store field paths and their value lists.
50
+ indexed: Switch parameter to select the extraction approach.
51
+ """
52
+ if isinstance(value, BaseModel):
53
+ self._handle_model(model=value, prefix=prefix, result=result)
54
+
55
+ elif isinstance(value, Sequence) and not isinstance(value, (str, bytes)):
56
+ self._handle_sequence(
57
+ sequence=value, prefix=prefix, result=result, indexed=indexed
58
+ )
59
+
60
+ else:
61
+ result[prefix].append(value)
62
+
63
+ def _handle_model(
64
+ self, model: BaseModel, prefix: str, result: Dict[str, List[str]]
65
+ ) -> None:
66
+ """
67
+ Extract values from a Pydantic model recursively.
68
+
69
+ Args:
70
+ model: Pydantic BaseModel instance.
71
+ prefix: Current field path.
72
+ result: Dictionary to store field paths and value lists.
73
+ """
74
+ for field_name, field_info in type(model).model_fields.items():
75
+ field_value = getattr(model, field_name)
76
+ new_prefix = f"{prefix}.{field_name}" if prefix else field_name
77
+ self._extract_field_values(
78
+ value=field_value, prefix=new_prefix, result=result
79
+ )
80
+
81
+ def _handle_sequence(
82
+ self,
83
+ sequence: Sequence,
84
+ prefix: str,
85
+ result: Dict[str, List[str]],
86
+ indexed: bool = False,
87
+ ) -> None:
88
+ """
89
+ Extract values from a sequence (list or tuple) recursively.
90
+
91
+ Args:
92
+ sequence: List or tuple of values.
93
+ prefix: Current field path.
94
+ result: Dictionary to store field paths and value lists.
95
+ indexed: Switch parameter to select the extraction approach.
96
+ """
97
+ if not sequence:
98
+ result[prefix] = []
99
+
100
+ if indexed:
101
+ for i, item in enumerate(sequence):
102
+ new_prefix = f"{prefix}[{i}]" if prefix else f"[{i}]"
103
+ self._extract_field_values(value=item, prefix=new_prefix, result=result)
104
+ else:
105
+ for i, item in enumerate(sequence):
106
+ self._extract_field_values(
107
+ value=item, prefix=prefix, result=result, indexed=indexed
108
+ )
@@ -0,0 +1,61 @@
1
+ """'comparator/schemas.py': Defines Pydantic models for extracted metadata."""
2
+
3
+ from enum import Enum
4
+
5
+ from pydantic import BaseModel, Field
6
+ from rapidfuzz import fuzz, utils
7
+
8
+
9
+ class AttrCompMixin:
10
+ def __eq__(self, other) -> bool:
11
+ if not isinstance(other, type(self)):
12
+ return False
13
+
14
+ attr_name = next(iter(self.__dict__.keys()))
15
+ _cond = (
16
+ fuzz.ratio(
17
+ s1=getattr(self, attr_name),
18
+ s2=getattr(other, attr_name),
19
+ processor=utils.default_process,
20
+ )
21
+ > 99
22
+ )
23
+ return _cond
24
+
25
+
26
+ class CompScoreMixin:
27
+ def comp_score(self, other) -> float:
28
+ attr_name = next(iter(self.__dict__.keys()))
29
+ _score = fuzz.ratio(
30
+ s1=getattr(self, attr_name),
31
+ s2=getattr(other, attr_name),
32
+ processor=utils.default_process,
33
+ )
34
+ return _score
35
+
36
+
37
+ class EntityMetric(str, Enum):
38
+ WRATIO = "wratio"
39
+ LEV_NORM = "lev-norm"
40
+ JARO_WINKLER = "jaro-winkler"
41
+ TOKEN_SORT_RATIO = "token-sort-ratio"
42
+ TOKEN_SET_RATIO = "token-set-ratio"
43
+
44
+ @classmethod
45
+ def list(cls):
46
+ return [field.value for field in cls]
47
+
48
+
49
+ class SetMetric(str, Enum):
50
+ ACCURACY = "accuracy"
51
+ F1_SCORE = "f1-score"
52
+
53
+
54
+ class MetricConfig(BaseModel):
55
+ """
56
+ Configuration for a field's comparison metric.
57
+ """
58
+ field_name: str = Field(default="token-set-ratio", description="Name of the field")
59
+ entity_metric: EntityMetric = Field(default=EntityMetric.LEV_NORM, description="Entity level metric")
60
+ set_metric: SetMetric = Field(default=SetMetric.ACCURACY, description="Set level metric")
61
+ threshold: float = Field(default=50, ge=0, le=100, description="Match threshold")
@@ -0,0 +1,269 @@
1
+ """'comparator/scorer.py':"""
2
+ import numpy as np
3
+
4
+ from collections import namedtuple
5
+ from typing import List, Dict, Callable, cast, Protocol
6
+
7
+ from rapidfuzz import distance, process, utils, fuzz
8
+
9
+ from levelapp.comparator.schemas import MetricConfig, EntityMetric, SetMetric
10
+ from levelapp.aspects import logger
11
+
12
+ ComputedScores = namedtuple(
13
+ typename="ComputedScores",
14
+ field_names=["ref", "ext", "e_metric", "e_score"],
15
+ )
16
+ ComparisonResults = namedtuple(
17
+ typename="ComparisonResults",
18
+ field_names=["ref", "ext", "e_metric", "e_score", "s_metric", "s_score"]
19
+ )
20
+
21
+
22
+ class Scorer(Protocol):
23
+ def __call__(self, ref: str, ext: str) -> float:
24
+ ...
25
+
26
+
27
+ class MetricsManager:
28
+ """Manages scorer registration, score computation, metric configuration."""
29
+
30
+ def __init__(self, metrics_mapping: Dict[str, MetricConfig] | None = None):
31
+ self._scorers: Dict[str, Callable] = {}
32
+ self._metrics_mapping = metrics_mapping or {}
33
+ self._initialize_scorers()
34
+
35
+ @property
36
+ def metrics_mapping(self) -> Dict[str, MetricConfig]:
37
+ return self._metrics_mapping
38
+
39
+ @metrics_mapping.setter
40
+ def metrics_mapping(self, value: Dict[str, MetricConfig]):
41
+ self._metrics_mapping = value
42
+
43
+ def _initialize_scorers(self) -> None:
44
+ """Register existing scorers to prevent residual state."""
45
+ self._scorers.clear()
46
+
47
+ self.register_scorer(
48
+ EntityMetric.LEV_NORM.value,
49
+ distance.Levenshtein.normalized_similarity
50
+ )
51
+ self.register_scorer(
52
+ EntityMetric.JARO_WINKLER.value,
53
+ distance.JaroWinkler.normalized_similarity,
54
+ )
55
+ self.register_scorer(
56
+ EntityMetric.TOKEN_SET_RATIO.value,
57
+ fuzz.token_set_ratio,
58
+ )
59
+ self.register_scorer(
60
+ EntityMetric.TOKEN_SET_RATIO.value,
61
+ fuzz.token_set_ratio,
62
+ )
63
+
64
+ self.register_scorer(
65
+ EntityMetric.WRATIO.value,
66
+ fuzz.WRatio
67
+ )
68
+
69
+ def register_scorer(self, name: str, scorer: Callable) -> None:
70
+ """
71
+ Register a scorer
72
+
73
+ Args:
74
+ name (str): name of the scorer.
75
+ scorer (Callable): scorer to register.
76
+
77
+ Raises:
78
+ ValueError: if the scorer is not a callable.
79
+ """
80
+ self._scorers[name] = scorer
81
+
82
+ def get_scorer(self, name: str) -> Callable:
83
+ """
84
+ Retrieve a scorer by name.
85
+
86
+ Args:
87
+ name (str): name of the scorer.
88
+
89
+ Returns:
90
+ Callable: scorer.
91
+
92
+ Raises:
93
+ ValueError: if the passed name is not registered.
94
+ """
95
+ try:
96
+ scorer = self._scorers.get(name)
97
+ return scorer
98
+
99
+ except KeyError:
100
+ raise ValueError(f"[MetricsManager] '{name}' is not registered")
101
+
102
+ def get_metrics_config(self, field: str) -> MetricConfig:
103
+ """
104
+ Retrieve the metrics configuration for a given field.
105
+
106
+ Args:
107
+ field (str): field name.
108
+
109
+ Returns:
110
+ MetricConfig: metrics configuration for the given field.
111
+ """
112
+ default_config = MetricConfig(
113
+ field_name=field,
114
+ entity_metric=EntityMetric.TOKEN_SET_RATIO,
115
+ set_metric=SetMetric.ACCURACY,
116
+ threshold=0.5
117
+ )
118
+ return self._metrics_mapping.get(field, default_config)
119
+
120
+ def compute_entity_scores(
121
+ self,
122
+ reference_seq: List[str],
123
+ extracted_seq: List[str],
124
+ scorer: EntityMetric = EntityMetric.LEV_NORM,
125
+ pairwise: bool = True
126
+ ) -> List[ComputedScores]:
127
+ """
128
+ Compute the distance/similarity between ref/seq sequence entities.
129
+
130
+ Args:
131
+ reference_seq (List[str]): The reference sequence.
132
+ extracted_seq (List[str]): The extracted sequence.
133
+ scorer (str): Name of the scorer to use (e.g., 'levenshtein', 'jaro_winkler').
134
+ pairwise (bool): Whether to use pairwise distances or not.
135
+
136
+ Returns:
137
+ List[Tuple[str, str, np.float32]]: List of (reference, extracted, score) tuples.
138
+ """
139
+ if not reference_seq or not extracted_seq:
140
+ return [
141
+ ComputedScores(
142
+ ref=reference_seq,
143
+ ext=extracted_seq,
144
+ e_metric=scorer.value,
145
+ e_score=np.nan,
146
+ )
147
+ ]
148
+
149
+ if scorer not in EntityMetric.list():
150
+ logger.warning(f"[MetricsManager] Scorer name <{scorer}> is not supported.]")
151
+ raise ValueError(f"[MetricsManager] Scorer <{scorer}> is not registered.")
152
+
153
+ max_len = max(len(reference_seq), len(extracted_seq))
154
+ reference_padded = reference_seq + [""] * (max_len - len(reference_seq))
155
+ extracted_padded = extracted_seq + [""] * (max_len - len(extracted_seq))
156
+
157
+ scorer_func = cast(Callable, self.get_scorer(name=scorer.value))
158
+
159
+ if pairwise:
160
+ scores_ = process.cpdist(
161
+ queries=reference_padded,
162
+ choices=extracted_padded,
163
+ scorer=scorer_func,
164
+ processor=utils.default_process,
165
+ workers=-1,
166
+ )
167
+ scores = scores_.flatten()
168
+ res = [
169
+ ComputedScores(
170
+ ref=reference_padded[i],
171
+ ext=extracted_padded[i],
172
+ e_metric=scorer.value,
173
+ e_score=scores[i]
174
+ ) for i in range(len(scores))
175
+ ]
176
+
177
+ else:
178
+ scores_ = process.cdist(
179
+ queries=reference_padded,
180
+ choices=extracted_padded,
181
+ scorer=scorer_func,
182
+ processor=utils.default_process,
183
+ workers=-1,
184
+ )
185
+ scores = np.max(scores_, axis=1)
186
+ max_idx = np.argmax(scores_, axis=1)
187
+ res = [
188
+ ComputedScores(
189
+ ref=reference_padded[i],
190
+ ext=extracted_padded[max_idx[i]],
191
+ e_metric=scorer.value,
192
+ e_score=scores[i]
193
+ ) for i in range(len(scores))
194
+ ]
195
+
196
+ return res
197
+
198
+ @staticmethod
199
+ def compute_set_scores(
200
+ data: List[ComputedScores],
201
+ scorer: SetMetric = SetMetric.F1_SCORE,
202
+ threshold: float = 1.0,
203
+ ) -> ComparisonResults:
204
+ """
205
+ Compute evaluation metrics from similarity scores and return results as named tuples.
206
+
207
+ Args:
208
+ data: List of tuples containing reference string, extracted string, and similarity score.
209
+ scorer: Metric to compute.
210
+ threshold: Similarity threshold for considering a match.
211
+
212
+ Returns:
213
+ List[ComparisonResults]: List of named tuples containing reference, extracted, score, and metric value.
214
+ """
215
+ if not data:
216
+ return ComparisonResults("", "", None, None, None, None)
217
+
218
+ ref = [_.ref for _ in data]
219
+ ext = [_.ext for _ in data]
220
+ entity_scores = np.array([_.e_score for _ in data], dtype=np.float32)
221
+ entity_metric = data[0].e_metric
222
+
223
+ matches = np.count_nonzero(entity_scores >= threshold)
224
+
225
+ if len(data) == 1:
226
+ entity_scores = entity_scores.tolist()
227
+ set_scores = np.array(
228
+ [1 if score >= threshold else 0 for score in entity_scores], dtype=np.float32
229
+ ).tolist()
230
+ return ComparisonResults(
231
+ ref=ref,
232
+ ext=ext,
233
+ e_metric=entity_metric,
234
+ e_score=entity_scores,
235
+ s_metric=None,
236
+ s_score=set_scores
237
+ )
238
+
239
+ tp = matches
240
+ fp = len(ref) - int(matches)
241
+ fn = len(ext) - int(matches)
242
+
243
+ if scorer == SetMetric.ACCURACY:
244
+ accuracy = (tp / len(entity_scores)) if len(entity_scores) > 0 else 0.0
245
+ return ComparisonResults(
246
+ ref=ref,
247
+ ext=ext,
248
+ e_metric=entity_metric,
249
+ e_score=entity_scores,
250
+ s_metric=scorer.value,
251
+ s_score=accuracy
252
+ )
253
+
254
+ if scorer == SetMetric.F1_SCORE:
255
+ precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0
256
+ recall = tp / (tp + fn) if (tp + fn) > 0 else 0.0
257
+ f1 = (
258
+ 2 * (precision * recall) / (precision + recall)
259
+ if (precision + recall) > 0
260
+ else 0.0
261
+ )
262
+ return ComparisonResults(
263
+ ref=ref,
264
+ ext=ext,
265
+ e_metric=entity_metric,
266
+ e_score=entity_scores,
267
+ s_metric=scorer.value,
268
+ s_score=f1
269
+ )