docling-metrics-core 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Metric types and evaluation utilities for Docling document processing."""
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from typing import Annotated, Iterable, Optional
|
|
3
|
+
|
|
4
|
+
from pydantic import BaseModel, Field
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class BaseInputSample(BaseModel):
|
|
8
|
+
"""Base class for input samples to metrics."""
|
|
9
|
+
|
|
10
|
+
id: Annotated[
|
|
11
|
+
str,
|
|
12
|
+
Field(
|
|
13
|
+
description="Unique sample identifier, shared between ground-truth and predictions"
|
|
14
|
+
),
|
|
15
|
+
]
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class BaseSampleResult(BaseModel):
|
|
19
|
+
"""Output of a single sample evaluation."""
|
|
20
|
+
|
|
21
|
+
id: Annotated[
|
|
22
|
+
str, Field(description="Sample identifier from the evaluated input pair")
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class BaseAggregateResult(BaseModel):
|
|
27
|
+
"""Output of aggregating multiple sample results."""
|
|
28
|
+
|
|
29
|
+
sample_count: Annotated[
|
|
30
|
+
int, Field(description="Number of samples that contributed to this result")
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class BaseMetric(ABC):
|
|
35
|
+
"""Abstract base class defining the interface for all metrics."""
|
|
36
|
+
|
|
37
|
+
@abstractmethod
|
|
38
|
+
def evaluate_sample(
|
|
39
|
+
self, sample_a: BaseInputSample, sample_b: BaseInputSample
|
|
40
|
+
) -> BaseSampleResult:
|
|
41
|
+
"""Evaluate a single sample pair."""
|
|
42
|
+
...
|
|
43
|
+
|
|
44
|
+
@abstractmethod
|
|
45
|
+
def aggregate(
|
|
46
|
+
self, results: Iterable[BaseSampleResult]
|
|
47
|
+
) -> Optional[BaseAggregateResult]:
|
|
48
|
+
"""Aggregate multiple sample results."""
|
|
49
|
+
...
|
|
50
|
+
|
|
51
|
+
@abstractmethod
|
|
52
|
+
def evaluate_dataset(
|
|
53
|
+
self, sample_pairs: Iterable[BaseInputSample]
|
|
54
|
+
) -> BaseAggregateResult:
|
|
55
|
+
"""Evaluate an entire dataset."""
|
|
56
|
+
...
|
|
File without changes
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: docling-metrics-core
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Metric types and evaluation utilities for Docling document processing
|
|
5
|
+
Project-URL: homepage, https://github.com/docling-project/docling-metrics
|
|
6
|
+
Project-URL: repository, https://github.com/docling-project/docling-metrics
|
|
7
|
+
Project-URL: issues, https://github.com/docling-project/docling-metrics/issues
|
|
8
|
+
Author-email: Christoph Auer <cau@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Ahmed Nassar <ahn@zurich.ibm.com>, Nikos Livathinos <nli@zurich.ibm.com>, Maxim Lysak <mly@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
|
|
9
|
+
License-Expression: MIT
|
|
10
|
+
Keywords: docling,evaluation,metrics,types
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
|
14
|
+
Classifier: Operating System :: MacOS :: MacOS X
|
|
15
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
18
|
+
Requires-Python: <4.0,>=3.10
|
|
19
|
+
Requires-Dist: pydantic<3.0.0,>=2.0.0
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
|
|
22
|
+
# docling-metrics-core
|
|
23
|
+
|
|
24
|
+
A minimal interface for computing and aggregating metrics on paired data samples.
|
|
25
|
+
|
|
26
|
+
## Overview
|
|
27
|
+
|
|
28
|
+
This package provides base types for building metrics that:
|
|
29
|
+
- Evaluate pairs of input samples (e.g., ground-truth vs. prediction)
|
|
30
|
+
- Produce per-sample results traceable by sample ID
|
|
31
|
+
- Aggregate results across multiple samples
|
|
32
|
+
|
|
33
|
+
## Core Types
|
|
34
|
+
|
|
35
|
+
- **`BaseInputSample`** — Input data with a unique `id` shared between sample pairs
|
|
36
|
+
- **`BaseSampleResult`** — Output of evaluating a single sample pair
|
|
37
|
+
- **`BaseAggregateResult`** — Output of aggregating multiple sample results
|
|
38
|
+
- **`BaseMetric`** — Abstract interface defining `evaluate_sample`, `aggregate`, and `evaluate_dataset`
|
|
39
|
+
|
|
40
|
+
## Installation
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
pip install docling-metrics-core
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## Requirements
|
|
47
|
+
|
|
48
|
+
- Python >=3.10,<4.0
|
|
49
|
+
|
|
50
|
+
## Usage
|
|
51
|
+
|
|
52
|
+
*Coming soon*
|
|
53
|
+
|
|
54
|
+
## Development
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
# Install in development mode
|
|
58
|
+
uv sync
|
|
59
|
+
|
|
60
|
+
# Run tests
|
|
61
|
+
uv run pytest
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## License
|
|
65
|
+
|
|
66
|
+
MIT
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
docling_metrics_core/__init__.py,sha256=CVtjMCCcdikE0CCYPb-Hd2W-IUivENSTKIlZ02mltRE,77
|
|
2
|
+
docling_metrics_core/base_types.py,sha256=Tre6ei2NdYINJDJJKChyLkL9I6BT6cRS0wraZk1Bp3c,1450
|
|
3
|
+
docling_metrics_core/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
+
docling_metrics_core-0.1.0.dist-info/METADATA,sha256=tXbloYmuM2qHOYjN4qeucpWtvWn1rMm_yEQx7vZqQxQ,2104
|
|
5
|
+
docling_metrics_core-0.1.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
6
|
+
docling_metrics_core-0.1.0.dist-info/RECORD,,
|